[Bf-blender-cvs] [77c3e67d3d7] master: Cycles: Improved render start/stop responsiveness on Metal

Michael Jones noreply at git.blender.org
Wed Jan 4 17:00:54 CET 2023


Commit: 77c3e67d3d7d8055619491bf09f0e7626afe33f9
Author: Michael Jones
Date:   Wed Jan 4 14:23:33 2023 +0000
Branches: master
https://developer.blender.org/rB77c3e67d3d7d8055619491bf09f0e7626afe33f9

Cycles: Improved render start/stop responsiveness on Metal

All kernel specialisation is now performed in the background regardless of kernel type, meaning that the first render will be visible a few seconds sooner. The only exception is during benchmark warm up, in which case we wait for all kernels to be cached. When stopping a render, we call a new `cancel()` method on the device which causes any outstanding compilation work to be cancelled, and we destroy the device in a detached thread so that any stale queued compilations can be safely purge [...]

Reviewed By: brecht

Differential Revision: https://developer.blender.org/D16371

===================================================================

M	intern/cycles/device/device.h
M	intern/cycles/device/metal/device_impl.h
M	intern/cycles/device/metal/device_impl.mm
M	intern/cycles/device/metal/kernel.h
M	intern/cycles/device/metal/kernel.mm
M	intern/cycles/device/metal/queue.mm
M	intern/cycles/integrator/path_trace.cpp
M	intern/cycles/session/session.cpp

===================================================================

diff --git a/intern/cycles/device/device.h b/intern/cycles/device/device.h
index b9308dc8949..959939ddbb7 100644
--- a/intern/cycles/device/device.h
+++ b/intern/cycles/device/device.h
@@ -167,6 +167,17 @@ class Device {
     return true;
   }
 
+  /* Request cancellation of any long-running work. */
+  virtual void cancel()
+  {
+  }
+
+  /* Return true if device is ready for rendering, or report status if not. */
+  virtual bool is_ready(string &status) const
+  {
+    return true;
+  }
+
   /* GPU device only functions.
    * These may not be used on CPU or multi-devices. */
 
diff --git a/intern/cycles/device/metal/device_impl.h b/intern/cycles/device/metal/device_impl.h
index e57b8628023..526535ff132 100644
--- a/intern/cycles/device/metal/device_impl.h
+++ b/intern/cycles/device/metal/device_impl.h
@@ -76,7 +76,20 @@ class MetalDevice : public Device {
 
   bool use_metalrt = false;
   MetalPipelineType kernel_specialization_level = PSO_GENERIC;
-  std::atomic_bool async_compile_and_load = false;
+
+  int device_id = 0;
+
+  static thread_mutex existing_devices_mutex;
+  static std::map<int, MetalDevice *> active_device_ids;
+
+  static bool is_device_cancelled(int device_id);
+
+  static MetalDevice *get_device_by_ID(int device_idID,
+                                       thread_scoped_lock &existing_devices_mutex_lock);
+
+  virtual bool is_ready(string &status) const override;
+
+  virtual void cancel() override;
 
   virtual BVHLayoutMask get_bvh_layout_mask() const override;
 
@@ -92,14 +105,12 @@ class MetalDevice : public Device {
 
   bool use_adaptive_compilation();
 
+  bool make_source_and_check_if_compile_needed(MetalPipelineType pso_type);
+
   void make_source(MetalPipelineType pso_type, const uint kernel_features);
 
   virtual bool load_kernels(const uint kernel_features) override;
 
-  void reserve_local_memory(const uint kernel_features);
-
-  void init_host_memory();
-
   void load_texture_info();
 
   void erase_allocation(device_memory &mem);
@@ -112,7 +123,7 @@ class MetalDevice : public Device {
 
   virtual void optimize_for_scene(Scene *scene) override;
 
-  bool compile_and_load(MetalPipelineType pso_type);
+  static void compile_and_load(int device_id, MetalPipelineType pso_type);
 
   /* ------------------------------------------------------------------ */
   /* low-level memory management */
diff --git a/intern/cycles/device/metal/device_impl.mm b/intern/cycles/device/metal/device_impl.mm
index 95935ce2a3a..a6966bf167d 100644
--- a/intern/cycles/device/metal/device_impl.mm
+++ b/intern/cycles/device/metal/device_impl.mm
@@ -13,10 +13,32 @@
 #  include "util/path.h"
 #  include "util/time.h"
 
+#  include <crt_externs.h>
+
 CCL_NAMESPACE_BEGIN
 
 class MetalDevice;
 
+thread_mutex MetalDevice::existing_devices_mutex;
+std::map<int, MetalDevice *> MetalDevice::active_device_ids;
+
+/* Thread-safe device access for async work. Calling code must pass an appropriatelty scoped lock
+ * to existing_devices_mutex to safeguard against destruction of the returned instance. */
+MetalDevice *MetalDevice::get_device_by_ID(int ID, thread_scoped_lock &existing_devices_mutex_lock)
+{
+  auto it = active_device_ids.find(ID);
+  if (it != active_device_ids.end()) {
+    return it->second;
+  }
+  return nullptr;
+}
+
+bool MetalDevice::is_device_cancelled(int ID)
+{
+  thread_scoped_lock lock(existing_devices_mutex);
+  return get_device_by_ID(ID, lock) == nullptr;
+}
+
 BVHLayoutMask MetalDevice::get_bvh_layout_mask() const
 {
   return use_metalrt ? BVH_LAYOUT_METAL : BVH_LAYOUT_BVH2;
@@ -40,6 +62,15 @@ void MetalDevice::set_error(const string &error)
 MetalDevice::MetalDevice(const DeviceInfo &info, Stats &stats, Profiler &profiler)
     : Device(info, stats, profiler), texture_info(this, "texture_info", MEM_GLOBAL)
 {
+  {
+    /* Assign an ID for this device which we can use to query whether async shader compilation
+     * requests are still relevant. */
+    thread_scoped_lock lock(existing_devices_mutex);
+    static int existing_devices_counter = 1;
+    device_id = existing_devices_counter++;
+    active_device_ids[device_id] = this;
+  }
+
   mtlDevId = info.num;
 
   /* select chosen device */
@@ -57,7 +88,6 @@ MetalDevice::MetalDevice(const DeviceInfo &info, Stats &stats, Profiler &profile
   if (@available(macos 11.0, *)) {
     if ([mtlDevice hasUnifiedMemory]) {
       default_storage_mode = MTLResourceStorageModeShared;
-      init_host_memory();
     }
   }
 
@@ -181,6 +211,13 @@ MetalDevice::MetalDevice(const DeviceInfo &info, Stats &stats, Profiler &profile
 
 MetalDevice::~MetalDevice()
 {
+  /* Cancel any async shader compilations that are in flight. */
+  cancel();
+
+  /* This lock safeguards against destruction during use (see other uses of
+   * existing_devices_mutex). */
+  thread_scoped_lock lock(existing_devices_mutex);
+
   for (auto &tex : texture_slot_map) {
     if (tex) {
       [tex release];
@@ -326,22 +363,67 @@ bool MetalDevice::load_kernels(const uint _kernel_features)
    * active, but may still need to be rendered without motion blur if that isn't active as well. */
   motion_blur = kernel_features & KERNEL_FEATURE_OBJECT_MOTION;
 
-  bool result = compile_and_load(PSO_GENERIC);
+  /* Only request generic kernels if they aren't cached in memory. */
+  if (make_source_and_check_if_compile_needed(PSO_GENERIC)) {
+    /* If needed, load them asynchronously in order to responsively message progess to the user. */
+    int this_device_id = this->device_id;
+    auto compile_kernels_fn = ^() {
+      compile_and_load(this_device_id, PSO_GENERIC);
+    };
+
+    dispatch_async(dispatch_get_global_queue(DISPATCH_QUEUE_PRIORITY_DEFAULT, 0),
+                   compile_kernels_fn);
+  }
 
-  reserve_local_memory(kernel_features);
-  return result;
+  return true;
 }
 
-bool MetalDevice::compile_and_load(MetalPipelineType pso_type)
+bool MetalDevice::make_source_and_check_if_compile_needed(MetalPipelineType pso_type)
 {
-  make_source(pso_type, kernel_features);
+  if (this->source[pso_type].empty()) {
+    make_source(pso_type, kernel_features);
+  }
+  return MetalDeviceKernels::should_load_kernels(this, pso_type);
+}
+
+void MetalDevice::compile_and_load(int device_id, MetalPipelineType pso_type)
+{
+  /* Thread-safe front-end compilation. Typically the MSL->AIR compilation can take a few seconds,
+   * so we avoid blocking device teardown if the user cancels a render immediately.
+   */
+
+  id<MTLDevice> mtlDevice;
+  string source;
+  MetalGPUVendor device_vendor;
+
+  /* Safely gather any state required for the MSL->AIR compilation. */
+  {
+    thread_scoped_lock lock(existing_devices_mutex);
 
-  if (!MetalDeviceKernels::should_load_kernels(this, pso_type)) {
-    /* We already have a full set of matching pipelines which are cached or queued. */
-    metal_printf("%s kernels already requested\n", kernel_type_as_string(pso_type));
-    return true;
+    /* Check whether the device still exists. */
+    MetalDevice *instance = get_device_by_ID(device_id, lock);
+    if (!instance) {
+      metal_printf("Ignoring %s compilation request - device no longer exists\n",
+                   kernel_type_as_string(pso_type));
+      return;
+    }
+
+    if (!instance->make_source_and_check_if_compile_needed(pso_type)) {
+      /* We already have a full set of matching pipelines which are cached or queued. Return early
+       * to avoid redundant MTLLibrary compilation. */
+      metal_printf("Ignoreing %s compilation request - kernels already requested\n",
+                   kernel_type_as_string(pso_type));
+      return;
+    }
+
+    mtlDevice = instance->mtlDevice;
+    device_vendor = instance->device_vendor;
+    source = instance->source[pso_type];
   }
 
+  /* Perform the actual compilation using our cached context. The MetalDevice can safely destruct
+   * in this time. */
+
   MTLCompileOptions *options = [[MTLCompileOptions alloc] init];
 
 #  if defined(MAC_OS_VERSION_13_0)
@@ -359,20 +441,15 @@ bool MetalDevice::compile_and_load(MetalPipelineType pso_type)
 
   if (getenv("CYCLES_METAL_PROFILING") || getenv("CYCLES_METAL_DEBUG")) {
     path_write_text(path_cache_get(string_printf("%s.metal", kernel_type_as_string(pso_type))),
-                    source[pso_type]);
+                    source);
   }
 
   const double starttime = time_dt();
 
   NSError *error = NULL;
-  mtlLibrary[pso_type] = [mtlDevice newLibraryWithSource:@(source[pso_type].c_str())
-                                                 options:options
-                                                   error:&error];
-
-  if (!mtlLibrary[pso_type]) {
-    NSString *err = [error localizedDescription];
-    set_error(string_printf("Failed to compile library:\n%s", [err UTF8String]));
-  }
+  id<MTLLibrary> mtlLibrary = [mtlDevice newLibraryWithSource:@(source.c_str())
+                                                      options:options
+                                                        error:&error];
 
   metal_printf("Front-end compilation finished in %.1f seconds (%s)\n",
                time_dt() - starttime,
@@ -380,17 +457,21 @@ bool MetalDevice::compile_and_load(MetalPipelineType pso_type)
 
   [options release];
 
-  return MetalDeviceKernels::load(this, pso_type);
-}
-
-void MetalDevice::reserve_local_memory(const uint kernel_features)
-{
-  /* METAL_WIP - implement this */
-}
-
-void MetalDevice::init_host_memory()
-{
-  /* METAL_WIP - implement this */
+  /* Save the compiled MTLLibrary and trigger the AIR->PSO builds (if the MetalDevice still
+   * exists). */
+  {
+    thread_scoped_lock lock(existing_devices_mutex);
+    if (MetalDevice *instance = get_device_by_ID(device_id, lock)) {
+      if (mtlLibrary) {
+        instance->mtlLibrary[pso_type] = mtlLibrary;
+        MetalDeviceKernels::load(instance, pso_type);
+      }
+      else {
+        NSString *err = [error localizedDescription];
+        instance->set_error(string_printf("Failed to compile library:\n%s", [err UTF8String]));
+      }
+    }
+  }
 }
 
 void MetalDevice::load_texture_info()
@@ -700,55 +781,74 @@ device_ptr MetalDevice::mem_alloc_sub_ptr(device_memory &mem, size_t offset, siz
   return 0;
 }
 
-void MetalDevice::optimize_

@@ Diff output truncated at 10240 characters. @@



More information about the Bf-blender-cvs mailing list