[Bf-blender-cvs] [08b3426df9e] master: Cycles: Occupancy tuning for new higher end M2 machines
Michael Jones
noreply at git.blender.org
Thu Jan 19 18:56:41 CET 2023
Commit: 08b3426df9e5b5dd3c7cc042197bea3ea2398e75
Author: Michael Jones
Date: Thu Jan 19 17:55:53 2023 +0000
Branches: master
https://developer.blender.org/rB08b3426df9e5b5dd3c7cc042197bea3ea2398e75
Cycles: Occupancy tuning for new higher end M2 machines
This patch adds occupancy tuning for the newly announced high-end M2 machines, giving 10-15% render speedup over a pre-tuned build.
Reviewed By: brecht
Differential Revision: https://developer.blender.org/D17037
===================================================================
M intern/cycles/device/metal/kernel.mm
M intern/cycles/device/metal/queue.mm
M intern/cycles/device/metal/util.h
M intern/cycles/device/metal/util.mm
===================================================================
diff --git a/intern/cycles/device/metal/kernel.mm b/intern/cycles/device/metal/kernel.mm
index e4ce5e19f63..48bdf2f0ef1 100644
--- a/intern/cycles/device/metal/kernel.mm
+++ b/intern/cycles/device/metal/kernel.mm
@@ -49,6 +49,18 @@ struct ShaderCache {
if (MetalInfo::get_device_vendor(mtlDevice) == METAL_GPU_APPLE) {
switch (MetalInfo::get_apple_gpu_architecture(mtlDevice)) {
default:
+ case APPLE_M2_BIG:
+ occupancy_tuning[DEVICE_KERNEL_INTEGRATOR_COMPACT_SHADOW_STATES] = {384, 128};
+ occupancy_tuning[DEVICE_KERNEL_INTEGRATOR_INIT_FROM_CAMERA] = {640, 128};
+ occupancy_tuning[DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST] = {1024, 64};
+ occupancy_tuning[DEVICE_KERNEL_INTEGRATOR_INTERSECT_SHADOW] = {704, 704};
+ occupancy_tuning[DEVICE_KERNEL_INTEGRATOR_INTERSECT_SUBSURFACE] = {640, 32};
+ occupancy_tuning[DEVICE_KERNEL_INTEGRATOR_QUEUED_PATHS_ARRAY] = {896, 768};
+ occupancy_tuning[DEVICE_KERNEL_INTEGRATOR_SHADE_BACKGROUND] = {512, 128};
+ occupancy_tuning[DEVICE_KERNEL_INTEGRATOR_SHADE_SHADOW] = {32, 32};
+ occupancy_tuning[DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE] = {768, 576};
+ occupancy_tuning[DEVICE_KERNEL_INTEGRATOR_SORTED_PATHS_ARRAY] = {896, 768};
+ break;
case APPLE_M2:
occupancy_tuning[DEVICE_KERNEL_INTEGRATOR_COMPACT_SHADOW_STATES] = {32, 32};
occupancy_tuning[DEVICE_KERNEL_INTEGRATOR_INIT_FROM_CAMERA] = {832, 32};
diff --git a/intern/cycles/device/metal/queue.mm b/intern/cycles/device/metal/queue.mm
index 837be0b0c23..f335844c3f9 100644
--- a/intern/cycles/device/metal/queue.mm
+++ b/intern/cycles/device/metal/queue.mm
@@ -278,7 +278,8 @@ int MetalDeviceQueue::num_concurrent_states(const size_t state_size) const
if (metal_device_->device_vendor == METAL_GPU_APPLE) {
result *= 4;
- if (MetalInfo::get_apple_gpu_architecture(metal_device_->mtlDevice) == APPLE_M2) {
+ /* Increasing the state count doesn't notably benefit M1-family systems. */
+ if (MetalInfo::get_apple_gpu_architecture(metal_device_->mtlDevice) != APPLE_M1) {
size_t system_ram = system_physical_ram();
size_t allocated_so_far = [metal_device_->mtlDevice currentAllocatedSize];
size_t max_recommended_working_set = [metal_device_->mtlDevice recommendedMaxWorkingSetSize];
diff --git a/intern/cycles/device/metal/util.h b/intern/cycles/device/metal/util.h
index a988d01d361..c30c4ccd9bc 100644
--- a/intern/cycles/device/metal/util.h
+++ b/intern/cycles/device/metal/util.h
@@ -29,6 +29,7 @@ enum AppleGPUArchitecture {
APPLE_UNKNOWN,
APPLE_M1,
APPLE_M2,
+ APPLE_M2_BIG,
};
/* Contains static Metal helper functions. */
diff --git a/intern/cycles/device/metal/util.mm b/intern/cycles/device/metal/util.mm
index f47638fac15..984e7a70c76 100644
--- a/intern/cycles/device/metal/util.mm
+++ b/intern/cycles/device/metal/util.mm
@@ -52,7 +52,7 @@ AppleGPUArchitecture MetalInfo::get_apple_gpu_architecture(id<MTLDevice> device)
return APPLE_M1;
}
else if (strstr(device_name, "M2")) {
- return APPLE_M2;
+ return get_apple_gpu_core_count(device) <= 10 ? APPLE_M2 : APPLE_M2_BIG;
}
return APPLE_UNKNOWN;
}
More information about the Bf-blender-cvs
mailing list