[Bf-blender-cvs] [eb7827e7970] master: Cycles: Fix film convert address space mismatch on Metal

Michael Jones noreply at git.blender.org
Fri Nov 26 14:59:03 CET 2021


Commit: eb7827e7970cca8e3fb0e0bf39e8742e69f0b2b6
Author: Michael Jones
Date:   Wed Nov 24 20:34:27 2021 +0000
Branches: master
https://developer.blender.org/rBeb7827e7970cca8e3fb0e0bf39e8742e69f0b2b6

Cycles: Fix film convert address space mismatch on Metal

This patch fixes an address space mismatch in the film convert kernels on Metal. The `film_get_pass_pixel_...` functions take a `ccl_private` result pointer, but the film convert kernels pass a `ccl_global` memory pointer. Specialising the pass-fetch functions with templates results in compilation errors on Visual Studio, so instead this patch just adds an intermediate local on Metal.

Reviewed By: brecht

Differential Revision: https://developer.blender.org/D13350

===================================================================

M	intern/cycles/kernel/device/gpu/kernel.h

===================================================================

diff --git a/intern/cycles/kernel/device/gpu/kernel.h b/intern/cycles/kernel/device/gpu/kernel.h
index 22e2a61a06d..24702de496c 100644
--- a/intern/cycles/kernel/device/gpu/kernel.h
+++ b/intern/cycles/kernel/device/gpu/kernel.h
@@ -547,6 +547,33 @@ ccl_device_inline void kernel_gpu_film_convert_half_write(ccl_global uchar4 *rgb
 #endif
 }
 
+#ifdef __KERNEL_METAL__
+
+/* Fetch into a local variable on Metal - there is minimal overhead. Templating the
+ * film_get_pass_pixel_... functions works on MSL, but not on other compilers. */
+#  define FILM_GET_PASS_PIXEL_F32(variant, input_channel_count) \
+    float local_pixel[4]; \
+    film_get_pass_pixel_##variant(&kfilm_convert, buffer, local_pixel); \
+    if (input_channel_count >= 1) { \
+      pixel[0] = local_pixel[0]; \
+    } \
+    if (input_channel_count >= 2) { \
+      pixel[1] = local_pixel[1]; \
+    } \
+    if (input_channel_count >= 3) { \
+      pixel[2] = local_pixel[2]; \
+    } \
+    if (input_channel_count >= 4) { \
+      pixel[3] = local_pixel[3]; \
+    }
+
+#else
+
+#  define FILM_GET_PASS_PIXEL_F32(variant, input_channel_count) \
+    film_get_pass_pixel_##variant(&kfilm_convert, buffer, pixel);
+
+#endif
+
 #define KERNEL_FILM_CONVERT_VARIANT(variant, input_channel_count) \
   ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS) \
       ccl_gpu_kernel_signature(film_convert_##variant, \
@@ -574,7 +601,7 @@ ccl_device_inline void kernel_gpu_film_convert_half_write(ccl_global uchar4 *rgb
     ccl_global float *pixel = pixels + \
                               (render_pixel_index + rgba_offset) * kfilm_convert.pixel_stride; \
 \
-    film_get_pass_pixel_##variant(&kfilm_convert, buffer, pixel); \
+    FILM_GET_PASS_PIXEL_F32(variant, input_channel_count); \
   } \
 \
   ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS) \



More information about the Bf-blender-cvs mailing list