[Bf-blender-cvs] [08b3426df9e] master: Cycles: Occupancy tuning for new higher end M2 machines

Thu Jan 19 18:56:41 CET 2023

Commit: 08b3426df9e5b5dd3c7cc042197bea3ea2398e75
Author: Michael Jones
Date:   Thu Jan 19 17:55:53 2023 +0000
Branches: master
https://developer.blender.org/rB08b3426df9e5b5dd3c7cc042197bea3ea2398e75

Cycles: Occupancy tuning for new higher end M2 machines

This patch adds occupancy tuning for the newly announced high-end M2 machines, giving 10-15% render speedup over a pre-tuned build.

Reviewed By: brecht

Differential Revision: https://developer.blender.org/D17037

===================================================================

M	intern/cycles/device/metal/kernel.mm
M	intern/cycles/device/metal/queue.mm
M	intern/cycles/device/metal/util.h
M	intern/cycles/device/metal/util.mm

===================================================================

diff --git a/intern/cycles/device/metal/kernel.mm b/intern/cycles/device/metal/kernel.mm
index e4ce5e19f63..48bdf2f0ef1 100644
--- a/intern/cycles/device/metal/kernel.mm
+++ b/intern/cycles/device/metal/kernel.mm
@@ -49,6 +49,18 @@ struct ShaderCache {
     if (MetalInfo::get_device_vendor(mtlDevice) == METAL_GPU_APPLE) {
       switch (MetalInfo::get_apple_gpu_architecture(mtlDevice)) {
         default:
+        case APPLE_M2_BIG:
+          occupancy_tuning[DEVICE_KERNEL_INTEGRATOR_COMPACT_SHADOW_STATES] = {384, 128};
+          occupancy_tuning[DEVICE_KERNEL_INTEGRATOR_INIT_FROM_CAMERA] = {640, 128};
+          occupancy_tuning[DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST] = {1024, 64};
+          occupancy_tuning[DEVICE_KERNEL_INTEGRATOR_INTERSECT_SHADOW] = {704, 704};
+          occupancy_tuning[DEVICE_KERNEL_INTEGRATOR_INTERSECT_SUBSURFACE] = {640, 32};
+          occupancy_tuning[DEVICE_KERNEL_INTEGRATOR_QUEUED_PATHS_ARRAY] = {896, 768};
+          occupancy_tuning[DEVICE_KERNEL_INTEGRATOR_SHADE_BACKGROUND] = {512, 128};
+          occupancy_tuning[DEVICE_KERNEL_INTEGRATOR_SHADE_SHADOW] = {32, 32};
+          occupancy_tuning[DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE] = {768, 576};
+          occupancy_tuning[DEVICE_KERNEL_INTEGRATOR_SORTED_PATHS_ARRAY] = {896, 768};
+          break;
         case APPLE_M2:
           occupancy_tuning[DEVICE_KERNEL_INTEGRATOR_COMPACT_SHADOW_STATES] = {32, 32};
           occupancy_tuning[DEVICE_KERNEL_INTEGRATOR_INIT_FROM_CAMERA] = {832, 32};
diff --git a/intern/cycles/device/metal/queue.mm b/intern/cycles/device/metal/queue.mm
index 837be0b0c23..f335844c3f9 100644
--- a/intern/cycles/device/metal/queue.mm
+++ b/intern/cycles/device/metal/queue.mm
@@ -278,7 +278,8 @@ int MetalDeviceQueue::num_concurrent_states(const size_t state_size) const
   if (metal_device_->device_vendor == METAL_GPU_APPLE) {
     result *= 4;
 
-    if (MetalInfo::get_apple_gpu_architecture(metal_device_->mtlDevice) == APPLE_M2) {
+    /* Increasing the state count doesn't notably benefit M1-family systems.  */
+    if (MetalInfo::get_apple_gpu_architecture(metal_device_->mtlDevice) != APPLE_M1) {
       size_t system_ram = system_physical_ram();
       size_t allocated_so_far = [metal_device_->mtlDevice currentAllocatedSize];
       size_t max_recommended_working_set = [metal_device_->mtlDevice recommendedMaxWorkingSetSize];
diff --git a/intern/cycles/device/metal/util.h b/intern/cycles/device/metal/util.h
index a988d01d361..c30c4ccd9bc 100644
--- a/intern/cycles/device/metal/util.h
+++ b/intern/cycles/device/metal/util.h
@@ -29,6 +29,7 @@ enum AppleGPUArchitecture {
   APPLE_UNKNOWN,
   APPLE_M1,
   APPLE_M2,
+  APPLE_M2_BIG,
 };
 
 /* Contains static Metal helper functions. */
diff --git a/intern/cycles/device/metal/util.mm b/intern/cycles/device/metal/util.mm
index f47638fac15..984e7a70c76 100644
--- a/intern/cycles/device/metal/util.mm
+++ b/intern/cycles/device/metal/util.mm
@@ -52,7 +52,7 @@ AppleGPUArchitecture MetalInfo::get_apple_gpu_architecture(id<MTLDevice> device)
     return APPLE_M1;
   }
   else if (strstr(device_name, "M2")) {
-    return APPLE_M2;
+    return get_apple_gpu_core_count(device) <= 10 ? APPLE_M2 : APPLE_M2_BIG;
   }
   return APPLE_UNKNOWN;
 }