[Bf-blender-cvs] [f2d39b810b4] temp-pbvh-split: Enable inlining on Apple Silicon. Use new process-wide ShaderCache in order to safely re-enable binary archives

Michael Jones noreply at git.blender.org
Fri Jun 3 01:16:32 CEST 2022


Commit: f2d39b810b4902bb5accbac7c5b2e8ec1e60c679
Author: Michael Jones
Date:   Wed May 11 14:52:49 2022 +0100
Branches: temp-pbvh-split
https://developer.blender.org/rBf2d39b810b4902bb5accbac7c5b2e8ec1e60c679

Enable inlining on Apple Silicon. Use new process-wide ShaderCache in order to safely re-enable binary archives

This patch is the same as D14763, but with a fix for unit test failures caused by ShaderCache fetch logic not working in the non-MetalRT case:

```
diff --git a/intern/cycles/device/metal/kernel.mm b/intern/cycles/device/metal/kernel.mm
index ad268ae7057..6aa1a56056e 100644
--- a/intern/cycles/device/metal/kernel.mm
+++ b/intern/cycles/device/metal/kernel.mm
@@ -203,9 +203,12 @@ bool kernel_has_intersection(DeviceKernel device_kernel)

   /* metalrt options */
   request.pipeline->use_metalrt = device->use_metalrt;
-  request.pipeline->metalrt_hair = device->kernel_features & KERNEL_FEATURE_HAIR;
-  request.pipeline->metalrt_hair_thick = device->kernel_features & KERNEL_FEATURE_HAIR_THICK;
-  request.pipeline->metalrt_pointcloud = device->kernel_features & KERNEL_FEATURE_POINTCLOUD;
+  request.pipeline->metalrt_hair = device->use_metalrt &&
+                                   (device->kernel_features & KERNEL_FEATURE_HAIR);
+  request.pipeline->metalrt_hair_thick = device->use_metalrt &&
+                                         (device->kernel_features & KERNEL_FEATURE_HAIR_THICK);
+  request.pipeline->metalrt_pointcloud = device->use_metalrt &&
+                                         (device->kernel_features & KERNEL_FEATURE_POINTCLOUD);

   {
     thread_scoped_lock lock(cache_mutex);
@@ -225,9 +228,9 @@ bool kernel_has_intersection(DeviceKernel device_kernel)

   /* metalrt options */
   bool use_metalrt = device->use_metalrt;
-  bool metalrt_hair = device->kernel_features & KERNEL_FEATURE_HAIR;
-  bool metalrt_hair_thick = device->kernel_features & KERNEL_FEATURE_HAIR_THICK;
-  bool metalrt_pointcloud = device->kernel_features & KERNEL_FEATURE_POINTCLOUD;
+  bool metalrt_hair = use_metalrt && (device->kernel_features & KERNEL_FEATURE_HAIR);
+  bool metalrt_hair_thick = use_metalrt && (device->kernel_features & KERNEL_FEATURE_HAIR_THICK);
+  bool metalrt_pointcloud = use_metalrt && (device->kernel_features & KERNEL_FEATURE_POINTCLOUD);

   MetalKernelPipeline *best_pipeline = nullptr;
   for (auto &pipeline : collection) {

```

Reviewed By: brecht

Differential Revision: https://developer.blender.org/D14923

===================================================================

M	intern/cycles/device/metal/device_impl.h
M	intern/cycles/device/metal/device_impl.mm
M	intern/cycles/device/metal/kernel.h
M	intern/cycles/device/metal/kernel.mm
M	intern/cycles/device/metal/queue.mm
M	intern/cycles/kernel/device/metal/compat.h

===================================================================

diff --git a/intern/cycles/device/metal/device_impl.h b/intern/cycles/device/metal/device_impl.h
index 27c58ce6d2f..7506b9b069f 100644
--- a/intern/cycles/device/metal/device_impl.h
+++ b/intern/cycles/device/metal/device_impl.h
@@ -28,7 +28,8 @@ class MetalDevice : public Device {
   id<MTLCommandQueue> mtlGeneralCommandQueue = nil;
   id<MTLArgumentEncoder> mtlAncillaryArgEncoder =
       nil; /* encoder used for fetching device pointers from MTLBuffers */
-  string source_used_for_compile[PSO_NUM];
+  string source[PSO_NUM];
+  string source_md5[PSO_NUM];
 
   KernelParamsMetal launch_params = {0};
 
@@ -72,7 +73,6 @@ class MetalDevice : public Device {
   id<MTLBuffer> texture_bindings_3d = nil;
   std::vector<id<MTLTexture>> texture_slot_map;
 
-  MetalDeviceKernels kernels;
   bool use_metalrt = false;
   bool use_function_specialisation = false;
 
@@ -110,6 +110,8 @@ class MetalDevice : public Device {
 
   virtual void build_bvh(BVH *bvh, Progress &progress, bool refit) override;
 
+  id<MTLLibrary> compile(string const &source);
+
   /* ------------------------------------------------------------------ */
   /* low-level memory management */
 
diff --git a/intern/cycles/device/metal/device_impl.mm b/intern/cycles/device/metal/device_impl.mm
index c01f51fb506..e1438a9d6e2 100644
--- a/intern/cycles/device/metal/device_impl.mm
+++ b/intern/cycles/device/metal/device_impl.mm
@@ -275,96 +275,44 @@ bool MetalDevice::load_kernels(const uint _kernel_features)
    * active, but may still need to be rendered without motion blur if that isn't active as well. */
   motion_blur = kernel_features & KERNEL_FEATURE_OBJECT_MOTION;
 
-  NSError *error = NULL;
+  source[PSO_GENERIC] = get_source(kernel_features);
+  mtlLibrary[PSO_GENERIC] = compile(source[PSO_GENERIC]);
 
-  for (int i = 0; i < PSO_NUM; i++) {
-    if (mtlLibrary[i]) {
-      [mtlLibrary[i] release];
-      mtlLibrary[i] = nil;
-    }
-  }
+  MD5Hash md5;
+  md5.append(source[PSO_GENERIC]);
+  source_md5[PSO_GENERIC] = md5.get_hex();
+
+  metal_printf("Front-end compilation finished (generic)\n");
+
+  bool result = MetalDeviceKernels::load(this, false);
+
+  reserve_local_memory(kernel_features);
+
+  return result;
+}
 
+id<MTLLibrary> MetalDevice::compile(string const &source)
+{
   MTLCompileOptions *options = [[MTLCompileOptions alloc] init];
 
   options.fastMathEnabled = YES;
   if (@available(macOS 12.0, *)) {
     options.languageVersion = MTLLanguageVersion2_4;
   }
-  else {
-    return false;
-  }
 
-  string metalsrc;
-
-  /* local helper: dump source to disk and return filepath */
-  auto dump_source = [&](int kernel_type) -> string {
-    string &source = source_used_for_compile[kernel_type];
-    string metalsrc = path_cache_get(path_join("kernels",
-                                               string_printf("%s.%s.metal",
-                                                             kernel_type_as_string(kernel_type),
-                                                             util_md5_string(source).c_str())));
-    path_write_text(metalsrc, source);
-    return metalsrc;
-  };
-
-  /* local helper: fetch the kernel source code, adjust it for specific PSO_.. kernel_type flavor,
-   * then compile it into a MTLLibrary */
-  auto fetch_and_compile_source = [&](int kernel_type) {
-    /* Record the source used to compile this library, for hash building later. */
-    string &source = source_used_for_compile[kernel_type];
-
-    switch (kernel_type) {
-      case PSO_GENERIC: {
-        source = get_source(kernel_features);
-        break;
-      }
-      case PSO_SPECIALISED: {
-        /* PSO_SPECIALISED derives from PSO_GENERIC */
-        string &generic_source = source_used_for_compile[PSO_GENERIC];
-        if (generic_source.empty()) {
-          generic_source = get_source(kernel_features);
-        }
-        source = "#define __KERNEL_METAL_USE_FUNCTION_SPECIALISATION__\n" + generic_source;
-        break;
-      }
-      default:
-        assert(0);
-    }
-
-    /* create MTLLibrary (front-end compilation) */
-    mtlLibrary[kernel_type] = [mtlDevice newLibraryWithSource:@(source.c_str())
+  NSError *error = NULL;
+  id<MTLLibrary> mtlLibrary = [mtlDevice newLibraryWithSource:@(source.c_str())
                                                       options:options
                                                         error:&error];
 
-    bool do_source_dump = (getenv("CYCLES_METAL_DUMP_SOURCE") != nullptr);
-
-    if (!mtlLibrary[kernel_type] || do_source_dump) {
-      string metalsrc = dump_source(kernel_type);
-
-      if (!mtlLibrary[kernel_type]) {
-        NSString *err = [error localizedDescription];
-        set_error(string_printf("Failed to compile library:\n%s", [err UTF8String]));
-
-        return false;
-      }
-    }
-    return true;
-  };
-
-  fetch_and_compile_source(PSO_GENERIC);
-
-  if (use_function_specialisation) {
-    fetch_and_compile_source(PSO_SPECIALISED);
+  if (!mtlLibrary) {
+    NSString *err = [error localizedDescription];
+    set_error(string_printf("Failed to compile library:\n%s", [err UTF8String]));
   }
 
-  metal_printf("Front-end compilation finished\n");
-
-  bool result = kernels.load(this, PSO_GENERIC);
-
   [options release];
-  reserve_local_memory(kernel_features);
 
-  return result;
+  return mtlLibrary;
 }
 
 void MetalDevice::reserve_local_memory(const uint kernel_features)
diff --git a/intern/cycles/device/metal/kernel.h b/intern/cycles/device/metal/kernel.h
index b12491d820d..69b2a686ecc 100644
--- a/intern/cycles/device/metal/kernel.h
+++ b/intern/cycles/device/metal/kernel.h
@@ -54,103 +54,41 @@ enum {
 const char *kernel_type_as_string(int kernel_type);
 
 struct MetalKernelPipeline {
-  void release()
-  {
-    if (pipeline) {
-      [pipeline release];
-      pipeline = nil;
-      if (@available(macOS 11.0, *)) {
-        for (int i = 0; i < METALRT_TABLE_NUM; i++) {
-          if (intersection_func_table[i]) {
-            [intersection_func_table[i] release];
-            intersection_func_table[i] = nil;
-          }
-        }
-      }
-    }
-    if (function) {
-      [function release];
-      function = nil;
-    }
-    if (@available(macOS 11.0, *)) {
-      for (int i = 0; i < METALRT_TABLE_NUM; i++) {
-        if (intersection_func_table[i]) {
-          [intersection_func_table[i] release];
-        }
-      }
-    }
-  }
 
-  bool loaded = false;
-  id<MTLFunction> function = nil;
-  id<MTLComputePipelineState> pipeline = nil;
-
-  API_AVAILABLE(macos(11.0))
-  id<MTLIntersectionFunctionTable> intersection_func_table[METALRT_TABLE_NUM] = {nil};
-};
-
-struct MetalKernelLoadDesc {
-  int pso_index = 0;
-  const char *function_name = nullptr;
-  int kernel_index = 0;
-  int threads_per_threadgroup = 0;
-  MTLFunctionConstantValues *constant_values = nullptr;
-  NSArray *linked_functions = nullptr;
-
-  struct IntersectorFunctions {
-    NSArray *defaults;
-    NSArray *shadow;
-    NSArray *local;
-    NSArray *operator[](int index) const
-    {
-      if (index == METALRT_TABLE_DEFAULT)
-        return defaults;
-      if (index == METALRT_TABLE_SHADOW)
-        return shadow;
-      return local;
-    }
-  } intersector_functions = {nullptr};
-};
-
-/* Metal kernel and associate occupancy information. */
-class MetalDeviceKernel {
- public:
-  ~MetalDeviceKernel();
+  void compile();
 
-  bool load(MetalDevice *device, MetalKernelLoadDesc const &desc, class MD5Hash const &md5);
+  id<MTLLibrary> mtlLibrary = nil;
+  bool scene_specialized;
+  string source_md5;
 
-  void mark_loaded(int pso_index)
-  {
-    pso[pso_index].loaded = true;
-  }
+  bool use_metalrt;
+  bool metalrt_hair;
+  bool metalrt_hair_thick;
+  bool metalrt_pointcloud;
 
-  int get_num_threads_per_block() const
-  {
-    return num_threads_per_block;
-  }
-  const MetalKernelPipeline &get_pso() const;
+  int threads_per_threadgroup;
 
-  double load_duration = 0.0;
+  DeviceKernel device_kernel;
+  bool loaded = false;
+  id<MTLDevice> mtlDevice = nil;
+  id<MTLFunction> function = nil;
+  id<MTLComputePipelineState> pipeline = nil;
+  int num_threads_per_block = 0;
 
- private:
-  MetalKernelPipeline pso[PSO_NUM];
+  string error_str;
 
-  int num_threads_per_block = 0;
+  API_AVAILABLE(macos(11.0))
+  id<MTLIntersectionFunctionTable> intersection_func_table[METALRT_TABLE_NUM] = {nil};
+  id<MTLFunction> rt_intersection_function[METALRT_FUNC_NUM] = {nil};
 };
 
 /* Cache of Metal kernels for each DeviceKernel. */
-class MetalDeviceKernels {
- public:
-  bool load(MetalDevice *device, int kernel_type);
-  bool available(DeviceKernel kernel) const;
-  const MetalDeviceKernel &get(DeviceKernel kernel) const;
+namespace MetalDeviceKernels {
 
-  MetalDeviceKernel kernels_[DEVICE_KERNEL_NUM];
+bool load(MetalDevice *device, bool scene_specialized);
+const MetalKernelPipeline *get_best_pipeline(const MetalDevice *device, DeviceKernel kernel);
 
-  id<MTLFunction> rt_intersection_funcs[PSO_NUM][METALRT_FUNC_NUM] = {{nil}};
-
-  string loaded_md5[PSO_NUM];
-};
+} /* namespace MetalDeviceKernels */
 
 CCL_NAMESPACE_END
 
diff --git a/intern/cycles/device/metal/kernel.mm b/intern/cycles/device/metal/kernel.mm
index 9555ca03c8e..fc9a8cecd75 100644
--- a/intern/cycles/device/metal/kernel.mm
+++ b/intern/cycles/device/metal/kernel.mm
@@ -9,6 +9,7 @@
 #  include "util/path.h"
 #  include "util/tbb.h"
 #  include "util/time.h"
+#  include "util/unique_ptr.h"
 
 CCL_NAMESPACE_BEGIN
 
@@ -28,82 +29,376 @@ const char *kernel_type_as_string(int kernel_type)
   return "";
 }
 
-MetalDeviceKernel::~MetalDeviceKernel()
+bool kernel_has_intersection(DeviceKernel device_kernel)
 {
-  for (int i = 0; i < PSO_NUM; i++) {
-    pso[i].release();
+  return (device_kernel == DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST ||
+          device_kernel == DEVICE_KERNEL_INTEGRATOR_INTERSECT_SHADOW ||
+          device_kernel == DEVICE_KERNEL_INTEGRATOR_INTERSECT_SUBSURFACE ||
+          device_kernel == DEVICE_KERNEL_INTEGRATOR_INTERSECT_VOLUME_STACK ||
+          device_kernel == DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE);
+}
+
+struct ShaderCache {
+  ShaderCache(id<MTLDevice> _mtlDevice) : mtlDevice(_mtlDevice)
+  {
+  }
+  ~ShaderCache();

@@ Diff output truncated at 10240 characters. @@



More information about the Bf-blender-cvs mailing list