[Bf-blender-cvs] [f2d39b810b4] temp-pbvh-split: Enable inlining on Apple Silicon. Use new process-wide ShaderCache in order to safely re-enable binary archives
Michael Jones
noreply at git.blender.org
Fri Jun 3 01:16:32 CEST 2022
Commit: f2d39b810b4902bb5accbac7c5b2e8ec1e60c679
Author: Michael Jones
Date: Wed May 11 14:52:49 2022 +0100
Branches: temp-pbvh-split
https://developer.blender.org/rBf2d39b810b4902bb5accbac7c5b2e8ec1e60c679
Enable inlining on Apple Silicon. Use new process-wide ShaderCache in order to safely re-enable binary archives
This patch is the same as D14763, but with a fix for unit test failures caused by ShaderCache fetch logic not working in the non-MetalRT case:
```
diff --git a/intern/cycles/device/metal/kernel.mm b/intern/cycles/device/metal/kernel.mm
index ad268ae7057..6aa1a56056e 100644
--- a/intern/cycles/device/metal/kernel.mm
+++ b/intern/cycles/device/metal/kernel.mm
@@ -203,9 +203,12 @@ bool kernel_has_intersection(DeviceKernel device_kernel)
/* metalrt options */
request.pipeline->use_metalrt = device->use_metalrt;
- request.pipeline->metalrt_hair = device->kernel_features & KERNEL_FEATURE_HAIR;
- request.pipeline->metalrt_hair_thick = device->kernel_features & KERNEL_FEATURE_HAIR_THICK;
- request.pipeline->metalrt_pointcloud = device->kernel_features & KERNEL_FEATURE_POINTCLOUD;
+ request.pipeline->metalrt_hair = device->use_metalrt &&
+ (device->kernel_features & KERNEL_FEATURE_HAIR);
+ request.pipeline->metalrt_hair_thick = device->use_metalrt &&
+ (device->kernel_features & KERNEL_FEATURE_HAIR_THICK);
+ request.pipeline->metalrt_pointcloud = device->use_metalrt &&
+ (device->kernel_features & KERNEL_FEATURE_POINTCLOUD);
{
thread_scoped_lock lock(cache_mutex);
@@ -225,9 +228,9 @@ bool kernel_has_intersection(DeviceKernel device_kernel)
/* metalrt options */
bool use_metalrt = device->use_metalrt;
- bool metalrt_hair = device->kernel_features & KERNEL_FEATURE_HAIR;
- bool metalrt_hair_thick = device->kernel_features & KERNEL_FEATURE_HAIR_THICK;
- bool metalrt_pointcloud = device->kernel_features & KERNEL_FEATURE_POINTCLOUD;
+ bool metalrt_hair = use_metalrt && (device->kernel_features & KERNEL_FEATURE_HAIR);
+ bool metalrt_hair_thick = use_metalrt && (device->kernel_features & KERNEL_FEATURE_HAIR_THICK);
+ bool metalrt_pointcloud = use_metalrt && (device->kernel_features & KERNEL_FEATURE_POINTCLOUD);
MetalKernelPipeline *best_pipeline = nullptr;
for (auto &pipeline : collection) {
```
Reviewed By: brecht
Differential Revision: https://developer.blender.org/D14923
===================================================================
M intern/cycles/device/metal/device_impl.h
M intern/cycles/device/metal/device_impl.mm
M intern/cycles/device/metal/kernel.h
M intern/cycles/device/metal/kernel.mm
M intern/cycles/device/metal/queue.mm
M intern/cycles/kernel/device/metal/compat.h
===================================================================
diff --git a/intern/cycles/device/metal/device_impl.h b/intern/cycles/device/metal/device_impl.h
index 27c58ce6d2f..7506b9b069f 100644
--- a/intern/cycles/device/metal/device_impl.h
+++ b/intern/cycles/device/metal/device_impl.h
@@ -28,7 +28,8 @@ class MetalDevice : public Device {
id<MTLCommandQueue> mtlGeneralCommandQueue = nil;
id<MTLArgumentEncoder> mtlAncillaryArgEncoder =
nil; /* encoder used for fetching device pointers from MTLBuffers */
- string source_used_for_compile[PSO_NUM];
+ string source[PSO_NUM];
+ string source_md5[PSO_NUM];
KernelParamsMetal launch_params = {0};
@@ -72,7 +73,6 @@ class MetalDevice : public Device {
id<MTLBuffer> texture_bindings_3d = nil;
std::vector<id<MTLTexture>> texture_slot_map;
- MetalDeviceKernels kernels;
bool use_metalrt = false;
bool use_function_specialisation = false;
@@ -110,6 +110,8 @@ class MetalDevice : public Device {
virtual void build_bvh(BVH *bvh, Progress &progress, bool refit) override;
+ id<MTLLibrary> compile(string const &source);
+
/* ------------------------------------------------------------------ */
/* low-level memory management */
diff --git a/intern/cycles/device/metal/device_impl.mm b/intern/cycles/device/metal/device_impl.mm
index c01f51fb506..e1438a9d6e2 100644
--- a/intern/cycles/device/metal/device_impl.mm
+++ b/intern/cycles/device/metal/device_impl.mm
@@ -275,96 +275,44 @@ bool MetalDevice::load_kernels(const uint _kernel_features)
* active, but may still need to be rendered without motion blur if that isn't active as well. */
motion_blur = kernel_features & KERNEL_FEATURE_OBJECT_MOTION;
- NSError *error = NULL;
+ source[PSO_GENERIC] = get_source(kernel_features);
+ mtlLibrary[PSO_GENERIC] = compile(source[PSO_GENERIC]);
- for (int i = 0; i < PSO_NUM; i++) {
- if (mtlLibrary[i]) {
- [mtlLibrary[i] release];
- mtlLibrary[i] = nil;
- }
- }
+ MD5Hash md5;
+ md5.append(source[PSO_GENERIC]);
+ source_md5[PSO_GENERIC] = md5.get_hex();
+
+ metal_printf("Front-end compilation finished (generic)\n");
+
+ bool result = MetalDeviceKernels::load(this, false);
+
+ reserve_local_memory(kernel_features);
+
+ return result;
+}
+id<MTLLibrary> MetalDevice::compile(string const &source)
+{
MTLCompileOptions *options = [[MTLCompileOptions alloc] init];
options.fastMathEnabled = YES;
if (@available(macOS 12.0, *)) {
options.languageVersion = MTLLanguageVersion2_4;
}
- else {
- return false;
- }
- string metalsrc;
-
- /* local helper: dump source to disk and return filepath */
- auto dump_source = [&](int kernel_type) -> string {
- string &source = source_used_for_compile[kernel_type];
- string metalsrc = path_cache_get(path_join("kernels",
- string_printf("%s.%s.metal",
- kernel_type_as_string(kernel_type),
- util_md5_string(source).c_str())));
- path_write_text(metalsrc, source);
- return metalsrc;
- };
-
- /* local helper: fetch the kernel source code, adjust it for specific PSO_.. kernel_type flavor,
- * then compile it into a MTLLibrary */
- auto fetch_and_compile_source = [&](int kernel_type) {
- /* Record the source used to compile this library, for hash building later. */
- string &source = source_used_for_compile[kernel_type];
-
- switch (kernel_type) {
- case PSO_GENERIC: {
- source = get_source(kernel_features);
- break;
- }
- case PSO_SPECIALISED: {
- /* PSO_SPECIALISED derives from PSO_GENERIC */
- string &generic_source = source_used_for_compile[PSO_GENERIC];
- if (generic_source.empty()) {
- generic_source = get_source(kernel_features);
- }
- source = "#define __KERNEL_METAL_USE_FUNCTION_SPECIALISATION__\n" + generic_source;
- break;
- }
- default:
- assert(0);
- }
-
- /* create MTLLibrary (front-end compilation) */
- mtlLibrary[kernel_type] = [mtlDevice newLibraryWithSource:@(source.c_str())
+ NSError *error = NULL;
+ id<MTLLibrary> mtlLibrary = [mtlDevice newLibraryWithSource:@(source.c_str())
options:options
error:&error];
- bool do_source_dump = (getenv("CYCLES_METAL_DUMP_SOURCE") != nullptr);
-
- if (!mtlLibrary[kernel_type] || do_source_dump) {
- string metalsrc = dump_source(kernel_type);
-
- if (!mtlLibrary[kernel_type]) {
- NSString *err = [error localizedDescription];
- set_error(string_printf("Failed to compile library:\n%s", [err UTF8String]));
-
- return false;
- }
- }
- return true;
- };
-
- fetch_and_compile_source(PSO_GENERIC);
-
- if (use_function_specialisation) {
- fetch_and_compile_source(PSO_SPECIALISED);
+ if (!mtlLibrary) {
+ NSString *err = [error localizedDescription];
+ set_error(string_printf("Failed to compile library:\n%s", [err UTF8String]));
}
- metal_printf("Front-end compilation finished\n");
-
- bool result = kernels.load(this, PSO_GENERIC);
-
[options release];
- reserve_local_memory(kernel_features);
- return result;
+ return mtlLibrary;
}
void MetalDevice::reserve_local_memory(const uint kernel_features)
diff --git a/intern/cycles/device/metal/kernel.h b/intern/cycles/device/metal/kernel.h
index b12491d820d..69b2a686ecc 100644
--- a/intern/cycles/device/metal/kernel.h
+++ b/intern/cycles/device/metal/kernel.h
@@ -54,103 +54,41 @@ enum {
const char *kernel_type_as_string(int kernel_type);
struct MetalKernelPipeline {
- void release()
- {
- if (pipeline) {
- [pipeline release];
- pipeline = nil;
- if (@available(macOS 11.0, *)) {
- for (int i = 0; i < METALRT_TABLE_NUM; i++) {
- if (intersection_func_table[i]) {
- [intersection_func_table[i] release];
- intersection_func_table[i] = nil;
- }
- }
- }
- }
- if (function) {
- [function release];
- function = nil;
- }
- if (@available(macOS 11.0, *)) {
- for (int i = 0; i < METALRT_TABLE_NUM; i++) {
- if (intersection_func_table[i]) {
- [intersection_func_table[i] release];
- }
- }
- }
- }
- bool loaded = false;
- id<MTLFunction> function = nil;
- id<MTLComputePipelineState> pipeline = nil;
-
- API_AVAILABLE(macos(11.0))
- id<MTLIntersectionFunctionTable> intersection_func_table[METALRT_TABLE_NUM] = {nil};
-};
-
-struct MetalKernelLoadDesc {
- int pso_index = 0;
- const char *function_name = nullptr;
- int kernel_index = 0;
- int threads_per_threadgroup = 0;
- MTLFunctionConstantValues *constant_values = nullptr;
- NSArray *linked_functions = nullptr;
-
- struct IntersectorFunctions {
- NSArray *defaults;
- NSArray *shadow;
- NSArray *local;
- NSArray *operator[](int index) const
- {
- if (index == METALRT_TABLE_DEFAULT)
- return defaults;
- if (index == METALRT_TABLE_SHADOW)
- return shadow;
- return local;
- }
- } intersector_functions = {nullptr};
-};
-
-/* Metal kernel and associate occupancy information. */
-class MetalDeviceKernel {
- public:
- ~MetalDeviceKernel();
+ void compile();
- bool load(MetalDevice *device, MetalKernelLoadDesc const &desc, class MD5Hash const &md5);
+ id<MTLLibrary> mtlLibrary = nil;
+ bool scene_specialized;
+ string source_md5;
- void mark_loaded(int pso_index)
- {
- pso[pso_index].loaded = true;
- }
+ bool use_metalrt;
+ bool metalrt_hair;
+ bool metalrt_hair_thick;
+ bool metalrt_pointcloud;
- int get_num_threads_per_block() const
- {
- return num_threads_per_block;
- }
- const MetalKernelPipeline &get_pso() const;
+ int threads_per_threadgroup;
- double load_duration = 0.0;
+ DeviceKernel device_kernel;
+ bool loaded = false;
+ id<MTLDevice> mtlDevice = nil;
+ id<MTLFunction> function = nil;
+ id<MTLComputePipelineState> pipeline = nil;
+ int num_threads_per_block = 0;
- private:
- MetalKernelPipeline pso[PSO_NUM];
+ string error_str;
- int num_threads_per_block = 0;
+ API_AVAILABLE(macos(11.0))
+ id<MTLIntersectionFunctionTable> intersection_func_table[METALRT_TABLE_NUM] = {nil};
+ id<MTLFunction> rt_intersection_function[METALRT_FUNC_NUM] = {nil};
};
/* Cache of Metal kernels for each DeviceKernel. */
-class MetalDeviceKernels {
- public:
- bool load(MetalDevice *device, int kernel_type);
- bool available(DeviceKernel kernel) const;
- const MetalDeviceKernel &get(DeviceKernel kernel) const;
+namespace MetalDeviceKernels {
- MetalDeviceKernel kernels_[DEVICE_KERNEL_NUM];
+bool load(MetalDevice *device, bool scene_specialized);
+const MetalKernelPipeline *get_best_pipeline(const MetalDevice *device, DeviceKernel kernel);
- id<MTLFunction> rt_intersection_funcs[PSO_NUM][METALRT_FUNC_NUM] = {{nil}};
-
- string loaded_md5[PSO_NUM];
-};
+} /* namespace MetalDeviceKernels */
CCL_NAMESPACE_END
diff --git a/intern/cycles/device/metal/kernel.mm b/intern/cycles/device/metal/kernel.mm
index 9555ca03c8e..fc9a8cecd75 100644
--- a/intern/cycles/device/metal/kernel.mm
+++ b/intern/cycles/device/metal/kernel.mm
@@ -9,6 +9,7 @@
# include "util/path.h"
# include "util/tbb.h"
# include "util/time.h"
+# include "util/unique_ptr.h"
CCL_NAMESPACE_BEGIN
@@ -28,82 +29,376 @@ const char *kernel_type_as_string(int kernel_type)
return "";
}
-MetalDeviceKernel::~MetalDeviceKernel()
+bool kernel_has_intersection(DeviceKernel device_kernel)
{
- for (int i = 0; i < PSO_NUM; i++) {
- pso[i].release();
+ return (device_kernel == DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST ||
+ device_kernel == DEVICE_KERNEL_INTEGRATOR_INTERSECT_SHADOW ||
+ device_kernel == DEVICE_KERNEL_INTEGRATOR_INTERSECT_SUBSURFACE ||
+ device_kernel == DEVICE_KERNEL_INTEGRATOR_INTERSECT_VOLUME_STACK ||
+ device_kernel == DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE);
+}
+
+struct ShaderCache {
+ ShaderCache(id<MTLDevice> _mtlDevice) : mtlDevice(_mtlDevice)
+ {
+ }
+ ~ShaderCache();
@@ Diff output truncated at 10240 characters. @@
More information about the Bf-blender-cvs
mailing list