[Bf-blender-cvs] [52a5f685626] master: Revert "Cycles: Enable inlining on Apple Silicon for 1.1x speedup"
Brecht Van Lommel
noreply at git.blender.org
Thu Apr 28 00:50:13 CEST 2022
Commit: 52a5f68562680c0ccd6d4e525098bb5e2af7d0bd
Author: Brecht Van Lommel
Date: Thu Apr 28 00:46:14 2022 +0200
Branches: master
https://developer.blender.org/rB52a5f68562680c0ccd6d4e525098bb5e2af7d0bd
Revert "Cycles: Enable inlining on Apple Silicon for 1.1x speedup"
This reverts commit b82de02e7ce857e20b842a074c0068b146a9fd79. It is causing
crashes in various regression tests.
Ref D14763
===================================================================
M intern/cycles/device/metal/device_impl.h
M intern/cycles/device/metal/device_impl.mm
M intern/cycles/device/metal/kernel.h
M intern/cycles/device/metal/kernel.mm
M intern/cycles/device/metal/queue.mm
M intern/cycles/kernel/device/metal/compat.h
===================================================================
diff --git a/intern/cycles/device/metal/device_impl.h b/intern/cycles/device/metal/device_impl.h
index d7311ee985f..27c58ce6d2f 100644
--- a/intern/cycles/device/metal/device_impl.h
+++ b/intern/cycles/device/metal/device_impl.h
@@ -28,8 +28,7 @@ class MetalDevice : public Device {
id<MTLCommandQueue> mtlGeneralCommandQueue = nil;
id<MTLArgumentEncoder> mtlAncillaryArgEncoder =
nil; /* encoder used for fetching device pointers from MTLBuffers */
- string source[PSO_NUM];
- string source_md5[PSO_NUM];
+ string source_used_for_compile[PSO_NUM];
KernelParamsMetal launch_params = {0};
@@ -111,12 +110,6 @@ class MetalDevice : public Device {
virtual void build_bvh(BVH *bvh, Progress &progress, bool refit) override;
- id<MTLLibrary> compile(string const &source);
-
- const MetalKernelPipeline &get_best_pipeline(DeviceKernel kernel) const;
-
- bool kernel_available(DeviceKernel kernel) const;
-
/* ------------------------------------------------------------------ */
/* low-level memory management */
diff --git a/intern/cycles/device/metal/device_impl.mm b/intern/cycles/device/metal/device_impl.mm
index 7d1212cb37c..c01f51fb506 100644
--- a/intern/cycles/device/metal/device_impl.mm
+++ b/intern/cycles/device/metal/device_impl.mm
@@ -275,44 +275,96 @@ bool MetalDevice::load_kernels(const uint _kernel_features)
* active, but may still need to be rendered without motion blur if that isn't active as well. */
motion_blur = kernel_features & KERNEL_FEATURE_OBJECT_MOTION;
- source[PSO_GENERIC] = get_source(kernel_features);
- mtlLibrary[PSO_GENERIC] = compile(source[PSO_GENERIC]);
-
- MD5Hash md5;
- md5.append(source[PSO_GENERIC]);
- source_md5[PSO_GENERIC] = md5.get_hex();
-
- metal_printf("Front-end compilation finished (generic)\n");
-
- bool result = kernels.load(this, false);
-
- reserve_local_memory(kernel_features);
+ NSError *error = NULL;
- return result;
-}
+ for (int i = 0; i < PSO_NUM; i++) {
+ if (mtlLibrary[i]) {
+ [mtlLibrary[i] release];
+ mtlLibrary[i] = nil;
+ }
+ }
-id<MTLLibrary> MetalDevice::compile(string const &source)
-{
MTLCompileOptions *options = [[MTLCompileOptions alloc] init];
options.fastMathEnabled = YES;
if (@available(macOS 12.0, *)) {
options.languageVersion = MTLLanguageVersion2_4;
}
+ else {
+ return false;
+ }
- NSError *error = NULL;
- id<MTLLibrary> mtlLibrary = [mtlDevice newLibraryWithSource:@(source.c_str())
+ string metalsrc;
+
+ /* local helper: dump source to disk and return filepath */
+ auto dump_source = [&](int kernel_type) -> string {
+ string &source = source_used_for_compile[kernel_type];
+ string metalsrc = path_cache_get(path_join("kernels",
+ string_printf("%s.%s.metal",
+ kernel_type_as_string(kernel_type),
+ util_md5_string(source).c_str())));
+ path_write_text(metalsrc, source);
+ return metalsrc;
+ };
+
+ /* local helper: fetch the kernel source code, adjust it for specific PSO_.. kernel_type flavor,
+ * then compile it into a MTLLibrary */
+ auto fetch_and_compile_source = [&](int kernel_type) {
+ /* Record the source used to compile this library, for hash building later. */
+ string &source = source_used_for_compile[kernel_type];
+
+ switch (kernel_type) {
+ case PSO_GENERIC: {
+ source = get_source(kernel_features);
+ break;
+ }
+ case PSO_SPECIALISED: {
+ /* PSO_SPECIALISED derives from PSO_GENERIC */
+ string &generic_source = source_used_for_compile[PSO_GENERIC];
+ if (generic_source.empty()) {
+ generic_source = get_source(kernel_features);
+ }
+ source = "#define __KERNEL_METAL_USE_FUNCTION_SPECIALISATION__\n" + generic_source;
+ break;
+ }
+ default:
+ assert(0);
+ }
+
+ /* create MTLLibrary (front-end compilation) */
+ mtlLibrary[kernel_type] = [mtlDevice newLibraryWithSource:@(source.c_str())
options:options
error:&error];
- if (!mtlLibrary) {
- NSString *err = [error localizedDescription];
- set_error(string_printf("Failed to compile library:\n%s", [err UTF8String]));
+ bool do_source_dump = (getenv("CYCLES_METAL_DUMP_SOURCE") != nullptr);
+
+ if (!mtlLibrary[kernel_type] || do_source_dump) {
+ string metalsrc = dump_source(kernel_type);
+
+ if (!mtlLibrary[kernel_type]) {
+ NSString *err = [error localizedDescription];
+ set_error(string_printf("Failed to compile library:\n%s", [err UTF8String]));
+
+ return false;
+ }
+ }
+ return true;
+ };
+
+ fetch_and_compile_source(PSO_GENERIC);
+
+ if (use_function_specialisation) {
+ fetch_and_compile_source(PSO_SPECIALISED);
}
+ metal_printf("Front-end compilation finished\n");
+
+ bool result = kernels.load(this, PSO_GENERIC);
+
[options release];
+ reserve_local_memory(kernel_features);
- return mtlLibrary;
+ return result;
}
void MetalDevice::reserve_local_memory(const uint kernel_features)
@@ -619,11 +671,6 @@ device_ptr MetalDevice::mem_alloc_sub_ptr(device_memory &mem, size_t offset, siz
return 0;
}
-const MetalKernelPipeline &MetalDevice::get_best_pipeline(DeviceKernel kernel) const
-{
- return kernels.get_best_pipeline(this, kernel);
-}
-
void MetalDevice::const_copy_to(const char *name, void *host, size_t size)
{
if (strcmp(name, "__data") == 0) {
diff --git a/intern/cycles/device/metal/kernel.h b/intern/cycles/device/metal/kernel.h
index 7e398d1cf41..b12491d820d 100644
--- a/intern/cycles/device/metal/kernel.h
+++ b/intern/cycles/device/metal/kernel.h
@@ -54,41 +54,98 @@ enum {
const char *kernel_type_as_string(int kernel_type);
struct MetalKernelPipeline {
+ void release()
+ {
+ if (pipeline) {
+ [pipeline release];
+ pipeline = nil;
+ if (@available(macOS 11.0, *)) {
+ for (int i = 0; i < METALRT_TABLE_NUM; i++) {
+ if (intersection_func_table[i]) {
+ [intersection_func_table[i] release];
+ intersection_func_table[i] = nil;
+ }
+ }
+ }
+ }
+ if (function) {
+ [function release];
+ function = nil;
+ }
+ if (@available(macOS 11.0, *)) {
+ for (int i = 0; i < METALRT_TABLE_NUM; i++) {
+ if (intersection_func_table[i]) {
+ [intersection_func_table[i] release];
+ }
+ }
+ }
+ }
- void compile();
-
- id<MTLLibrary> mtlLibrary = nil;
- bool scene_specialized;
- string source_md5;
-
- bool use_metalrt;
- bool metalrt_hair;
- bool metalrt_hair_thick;
- bool metalrt_pointcloud;
-
- int threads_per_threadgroup;
-
- DeviceKernel device_kernel;
bool loaded = false;
- id<MTLDevice> mtlDevice = nil;
id<MTLFunction> function = nil;
id<MTLComputePipelineState> pipeline = nil;
- int num_threads_per_block = 0;
-
- string error_str;
API_AVAILABLE(macos(11.0))
id<MTLIntersectionFunctionTable> intersection_func_table[METALRT_TABLE_NUM] = {nil};
- id<MTLFunction> rt_intersection_function[METALRT_FUNC_NUM] = {nil};
+};
+
+struct MetalKernelLoadDesc {
+ int pso_index = 0;
+ const char *function_name = nullptr;
+ int kernel_index = 0;
+ int threads_per_threadgroup = 0;
+ MTLFunctionConstantValues *constant_values = nullptr;
+ NSArray *linked_functions = nullptr;
+
+ struct IntersectorFunctions {
+ NSArray *defaults;
+ NSArray *shadow;
+ NSArray *local;
+ NSArray *operator[](int index) const
+ {
+ if (index == METALRT_TABLE_DEFAULT)
+ return defaults;
+ if (index == METALRT_TABLE_SHADOW)
+ return shadow;
+ return local;
+ }
+ } intersector_functions = {nullptr};
+};
+
+/* Metal kernel and associate occupancy information. */
+class MetalDeviceKernel {
+ public:
+ ~MetalDeviceKernel();
+
+ bool load(MetalDevice *device, MetalKernelLoadDesc const &desc, class MD5Hash const &md5);
+
+ void mark_loaded(int pso_index)
+ {
+ pso[pso_index].loaded = true;
+ }
+
+ int get_num_threads_per_block() const
+ {
+ return num_threads_per_block;
+ }
+ const MetalKernelPipeline &get_pso() const;
+
+ double load_duration = 0.0;
+
+ private:
+ MetalKernelPipeline pso[PSO_NUM];
+
+ int num_threads_per_block = 0;
};
/* Cache of Metal kernels for each DeviceKernel. */
class MetalDeviceKernels {
public:
- bool load(MetalDevice *device, bool scene_specialized);
- bool available(const MetalDevice *device, DeviceKernel kernel) const;
- const MetalKernelPipeline &get_best_pipeline(const MetalDevice *device,
- DeviceKernel kernel) const;
+ bool load(MetalDevice *device, int kernel_type);
+ bool available(DeviceKernel kernel) const;
+ const MetalDeviceKernel &get(DeviceKernel kernel) const;
+
+ MetalDeviceKernel kernels_[DEVICE_KERNEL_NUM];
id<MTLFunction> rt_intersection_funcs[PSO_NUM][METALRT_FUNC_NUM] = {{nil}};
diff --git a/intern/cycles/device/metal/kernel.mm b/intern/cycles/device/metal/kernel.mm
index 44a5e23d00f..9555ca03c8e 100644
--- a/intern/cycles/device/metal/kernel.mm
+++ b/intern/cycles/device/metal/kernel.mm
@@ -9,7 +9,6 @@
# include "util/path.h"
# include "util/tbb.h"
# include "util/time.h"
-# include "util/unique_ptr.h"
CCL_NAMESPACE_BEGIN
@@ -29,370 +28,82 @@ const char *kernel_type_as_string(int kernel_type)
return "";
}
-bool kernel_has_intersection(DeviceKernel device_kernel)
+MetalDeviceKernel::~MetalDeviceKernel()
{
- return (device_kernel == DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST ||
- device_kernel == DEVICE_KERNEL_INTEGRATOR_INTERSECT_SHADOW ||
- device_kernel == DEVICE_KERNEL_INTEGRATOR_INTERSECT_SUBSURFACE ||
- device_kernel == DEVICE_KERNEL_INTEGRATOR_INTERSECT_VOLUME_STACK ||
- device_kernel == DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE);
-}
-
-struct ShaderCache {
- ShaderCache(id<MTLDevice> _mtlDevice) : mtlDevice(_mtlDevice)
- {
- }
- ~ShaderCache();
-
- /* Get the fastest a
@@ Diff output truncated at 10240 characters. @@
More information about the Bf-blender-cvs
mailing list