[Bf-blender-cvs] [77c3e67d3d7] master: Cycles: Improved render start/stop responsiveness on Metal
Michael Jones
noreply at git.blender.org
Wed Jan 4 17:00:54 CET 2023
Commit: 77c3e67d3d7d8055619491bf09f0e7626afe33f9
Author: Michael Jones
Date: Wed Jan 4 14:23:33 2023 +0000
Branches: master
https://developer.blender.org/rB77c3e67d3d7d8055619491bf09f0e7626afe33f9
Cycles: Improved render start/stop responsiveness on Metal
All kernel specialisation is now performed in the background regardless of kernel type, meaning that the first render will be visible a few seconds sooner. The only exception is during benchmark warm up, in which case we wait for all kernels to be cached. When stopping a render, we call a new `cancel()` method on the device which causes any outstanding compilation work to be cancelled, and we destroy the device in a detached thread so that any stale queued compilations can be safely purge [...]
Reviewed By: brecht
Differential Revision: https://developer.blender.org/D16371
===================================================================
M intern/cycles/device/device.h
M intern/cycles/device/metal/device_impl.h
M intern/cycles/device/metal/device_impl.mm
M intern/cycles/device/metal/kernel.h
M intern/cycles/device/metal/kernel.mm
M intern/cycles/device/metal/queue.mm
M intern/cycles/integrator/path_trace.cpp
M intern/cycles/session/session.cpp
===================================================================
diff --git a/intern/cycles/device/device.h b/intern/cycles/device/device.h
index b9308dc8949..959939ddbb7 100644
--- a/intern/cycles/device/device.h
+++ b/intern/cycles/device/device.h
@@ -167,6 +167,17 @@ class Device {
return true;
}
+ /* Request cancellation of any long-running work. */
+ virtual void cancel()
+ {
+ }
+
+ /* Return true if device is ready for rendering, or report status if not. */
+ virtual bool is_ready(string &status) const
+ {
+ return true;
+ }
+
/* GPU device only functions.
* These may not be used on CPU or multi-devices. */
diff --git a/intern/cycles/device/metal/device_impl.h b/intern/cycles/device/metal/device_impl.h
index e57b8628023..526535ff132 100644
--- a/intern/cycles/device/metal/device_impl.h
+++ b/intern/cycles/device/metal/device_impl.h
@@ -76,7 +76,20 @@ class MetalDevice : public Device {
bool use_metalrt = false;
MetalPipelineType kernel_specialization_level = PSO_GENERIC;
- std::atomic_bool async_compile_and_load = false;
+
+ int device_id = 0;
+
+ static thread_mutex existing_devices_mutex;
+ static std::map<int, MetalDevice *> active_device_ids;
+
+ static bool is_device_cancelled(int device_id);
+
+ static MetalDevice *get_device_by_ID(int device_idID,
+ thread_scoped_lock &existing_devices_mutex_lock);
+
+ virtual bool is_ready(string &status) const override;
+
+ virtual void cancel() override;
virtual BVHLayoutMask get_bvh_layout_mask() const override;
@@ -92,14 +105,12 @@ class MetalDevice : public Device {
bool use_adaptive_compilation();
+ bool make_source_and_check_if_compile_needed(MetalPipelineType pso_type);
+
void make_source(MetalPipelineType pso_type, const uint kernel_features);
virtual bool load_kernels(const uint kernel_features) override;
- void reserve_local_memory(const uint kernel_features);
-
- void init_host_memory();
-
void load_texture_info();
void erase_allocation(device_memory &mem);
@@ -112,7 +123,7 @@ class MetalDevice : public Device {
virtual void optimize_for_scene(Scene *scene) override;
- bool compile_and_load(MetalPipelineType pso_type);
+ static void compile_and_load(int device_id, MetalPipelineType pso_type);
/* ------------------------------------------------------------------ */
/* low-level memory management */
diff --git a/intern/cycles/device/metal/device_impl.mm b/intern/cycles/device/metal/device_impl.mm
index 95935ce2a3a..a6966bf167d 100644
--- a/intern/cycles/device/metal/device_impl.mm
+++ b/intern/cycles/device/metal/device_impl.mm
@@ -13,10 +13,32 @@
# include "util/path.h"
# include "util/time.h"
+# include <crt_externs.h>
+
CCL_NAMESPACE_BEGIN
class MetalDevice;
+thread_mutex MetalDevice::existing_devices_mutex;
+std::map<int, MetalDevice *> MetalDevice::active_device_ids;
+
+/* Thread-safe device access for async work. Calling code must pass an appropriatelty scoped lock
+ * to existing_devices_mutex to safeguard against destruction of the returned instance. */
+MetalDevice *MetalDevice::get_device_by_ID(int ID, thread_scoped_lock &existing_devices_mutex_lock)
+{
+ auto it = active_device_ids.find(ID);
+ if (it != active_device_ids.end()) {
+ return it->second;
+ }
+ return nullptr;
+}
+
+bool MetalDevice::is_device_cancelled(int ID)
+{
+ thread_scoped_lock lock(existing_devices_mutex);
+ return get_device_by_ID(ID, lock) == nullptr;
+}
+
BVHLayoutMask MetalDevice::get_bvh_layout_mask() const
{
return use_metalrt ? BVH_LAYOUT_METAL : BVH_LAYOUT_BVH2;
@@ -40,6 +62,15 @@ void MetalDevice::set_error(const string &error)
MetalDevice::MetalDevice(const DeviceInfo &info, Stats &stats, Profiler &profiler)
: Device(info, stats, profiler), texture_info(this, "texture_info", MEM_GLOBAL)
{
+ {
+ /* Assign an ID for this device which we can use to query whether async shader compilation
+ * requests are still relevant. */
+ thread_scoped_lock lock(existing_devices_mutex);
+ static int existing_devices_counter = 1;
+ device_id = existing_devices_counter++;
+ active_device_ids[device_id] = this;
+ }
+
mtlDevId = info.num;
/* select chosen device */
@@ -57,7 +88,6 @@ MetalDevice::MetalDevice(const DeviceInfo &info, Stats &stats, Profiler &profile
if (@available(macos 11.0, *)) {
if ([mtlDevice hasUnifiedMemory]) {
default_storage_mode = MTLResourceStorageModeShared;
- init_host_memory();
}
}
@@ -181,6 +211,13 @@ MetalDevice::MetalDevice(const DeviceInfo &info, Stats &stats, Profiler &profile
MetalDevice::~MetalDevice()
{
+ /* Cancel any async shader compilations that are in flight. */
+ cancel();
+
+ /* This lock safeguards against destruction during use (see other uses of
+ * existing_devices_mutex). */
+ thread_scoped_lock lock(existing_devices_mutex);
+
for (auto &tex : texture_slot_map) {
if (tex) {
[tex release];
@@ -326,22 +363,67 @@ bool MetalDevice::load_kernels(const uint _kernel_features)
* active, but may still need to be rendered without motion blur if that isn't active as well. */
motion_blur = kernel_features & KERNEL_FEATURE_OBJECT_MOTION;
- bool result = compile_and_load(PSO_GENERIC);
+ /* Only request generic kernels if they aren't cached in memory. */
+ if (make_source_and_check_if_compile_needed(PSO_GENERIC)) {
+ /* If needed, load them asynchronously in order to responsively message progess to the user. */
+ int this_device_id = this->device_id;
+ auto compile_kernels_fn = ^() {
+ compile_and_load(this_device_id, PSO_GENERIC);
+ };
+
+ dispatch_async(dispatch_get_global_queue(DISPATCH_QUEUE_PRIORITY_DEFAULT, 0),
+ compile_kernels_fn);
+ }
- reserve_local_memory(kernel_features);
- return result;
+ return true;
}
-bool MetalDevice::compile_and_load(MetalPipelineType pso_type)
+bool MetalDevice::make_source_and_check_if_compile_needed(MetalPipelineType pso_type)
{
- make_source(pso_type, kernel_features);
+ if (this->source[pso_type].empty()) {
+ make_source(pso_type, kernel_features);
+ }
+ return MetalDeviceKernels::should_load_kernels(this, pso_type);
+}
+
+void MetalDevice::compile_and_load(int device_id, MetalPipelineType pso_type)
+{
+ /* Thread-safe front-end compilation. Typically the MSL->AIR compilation can take a few seconds,
+ * so we avoid blocking device teardown if the user cancels a render immediately.
+ */
+
+ id<MTLDevice> mtlDevice;
+ string source;
+ MetalGPUVendor device_vendor;
+
+ /* Safely gather any state required for the MSL->AIR compilation. */
+ {
+ thread_scoped_lock lock(existing_devices_mutex);
- if (!MetalDeviceKernels::should_load_kernels(this, pso_type)) {
- /* We already have a full set of matching pipelines which are cached or queued. */
- metal_printf("%s kernels already requested\n", kernel_type_as_string(pso_type));
- return true;
+ /* Check whether the device still exists. */
+ MetalDevice *instance = get_device_by_ID(device_id, lock);
+ if (!instance) {
+ metal_printf("Ignoring %s compilation request - device no longer exists\n",
+ kernel_type_as_string(pso_type));
+ return;
+ }
+
+ if (!instance->make_source_and_check_if_compile_needed(pso_type)) {
+ /* We already have a full set of matching pipelines which are cached or queued. Return early
+ * to avoid redundant MTLLibrary compilation. */
+ metal_printf("Ignoreing %s compilation request - kernels already requested\n",
+ kernel_type_as_string(pso_type));
+ return;
+ }
+
+ mtlDevice = instance->mtlDevice;
+ device_vendor = instance->device_vendor;
+ source = instance->source[pso_type];
}
+ /* Perform the actual compilation using our cached context. The MetalDevice can safely destruct
+ * in this time. */
+
MTLCompileOptions *options = [[MTLCompileOptions alloc] init];
# if defined(MAC_OS_VERSION_13_0)
@@ -359,20 +441,15 @@ bool MetalDevice::compile_and_load(MetalPipelineType pso_type)
if (getenv("CYCLES_METAL_PROFILING") || getenv("CYCLES_METAL_DEBUG")) {
path_write_text(path_cache_get(string_printf("%s.metal", kernel_type_as_string(pso_type))),
- source[pso_type]);
+ source);
}
const double starttime = time_dt();
NSError *error = NULL;
- mtlLibrary[pso_type] = [mtlDevice newLibraryWithSource:@(source[pso_type].c_str())
- options:options
- error:&error];
-
- if (!mtlLibrary[pso_type]) {
- NSString *err = [error localizedDescription];
- set_error(string_printf("Failed to compile library:\n%s", [err UTF8String]));
- }
+ id<MTLLibrary> mtlLibrary = [mtlDevice newLibraryWithSource:@(source.c_str())
+ options:options
+ error:&error];
metal_printf("Front-end compilation finished in %.1f seconds (%s)\n",
time_dt() - starttime,
@@ -380,17 +457,21 @@ bool MetalDevice::compile_and_load(MetalPipelineType pso_type)
[options release];
- return MetalDeviceKernels::load(this, pso_type);
-}
-
-void MetalDevice::reserve_local_memory(const uint kernel_features)
-{
- /* METAL_WIP - implement this */
-}
-
-void MetalDevice::init_host_memory()
-{
- /* METAL_WIP - implement this */
+ /* Save the compiled MTLLibrary and trigger the AIR->PSO builds (if the MetalDevice still
+ * exists). */
+ {
+ thread_scoped_lock lock(existing_devices_mutex);
+ if (MetalDevice *instance = get_device_by_ID(device_id, lock)) {
+ if (mtlLibrary) {
+ instance->mtlLibrary[pso_type] = mtlLibrary;
+ MetalDeviceKernels::load(instance, pso_type);
+ }
+ else {
+ NSString *err = [error localizedDescription];
+ instance->set_error(string_printf("Failed to compile library:\n%s", [err UTF8String]));
+ }
+ }
+ }
}
void MetalDevice::load_texture_info()
@@ -700,55 +781,74 @@ device_ptr MetalDevice::mem_alloc_sub_ptr(device_memory &mem, size_t offset, siz
return 0;
}
-void MetalDevice::optimize_
@@ Diff output truncated at 10240 characters. @@
More information about the Bf-blender-cvs
mailing list