[Bf-blender-cvs] [6b0008129e6] blender-v3.0-release: Fix T92972: Cycles HIP wrong render display after a recent refactor

Brecht Van Lommel noreply at git.blender.org
Wed Nov 10 20:22:00 CET 2021


Commit: 6b0008129e6370866808bd937161579a2cb5cb51
Author: Brecht Van Lommel
Date:   Wed Nov 10 19:43:19 2021 +0100
Branches: blender-v3.0-release
https://developer.blender.org/rB6b0008129e6370866808bd937161579a2cb5cb51

Fix T92972: Cycles HIP wrong render display after a recent refactor

It's unclear why this fails. Maybe the size of half4 is not the expected
8 bytes and adjacent pixels are overwritten. Or there is some bug in the
HIP compiler writing a struct into global memory, which we probably don't
do elsewhere in the kernel.

Thanks to Thomas, William and Jeroen for helping investigate this.

===================================================================

M	intern/cycles/kernel/device/gpu/kernel.h

===================================================================

diff --git a/intern/cycles/kernel/device/gpu/kernel.h b/intern/cycles/kernel/device/gpu/kernel.h
index 5848ba5df9d..844bbf90f67 100644
--- a/intern/cycles/kernel/device/gpu/kernel.h
+++ b/intern/cycles/kernel/device/gpu/kernel.h
@@ -486,6 +486,26 @@ ccl_device_inline void kernel_gpu_film_convert_common(const KernelFilmConvert *k
   processor(kfilm_convert, buffer, pixel);
 }
 
+ccl_device_inline void kernel_gpu_film_convert_half_write(ccl_global uchar4 *rgba,
+                                                          const int rgba_offset,
+                                                          const int rgba_stride,
+                                                          const int x,
+                                                          const int y,
+                                                          const half4 half_pixel)
+{
+  /* Work around HIP issue with half float display, see T92972. */
+#ifdef __KERNEL_HIP__
+  ccl_global half *out = ((ccl_global half *)rgba) + (rgba_offset + y * rgba_stride + x) * 4;
+  out[0] = half_pixel.x;
+  out[1] = half_pixel.y;
+  out[2] = half_pixel.z;
+  out[3] = half_pixel.w;
+#else
+  ccl_global half4 *out = ((ccl_global half4 *)rgba) + rgba_offset + y * rgba_stride + x;
+  *out = half_pixel;
+#endif
+}
+
 /* Common implementation for half4 destination and 4-channel input pass. */
 template<typename Processor>
 ccl_device_inline void kernel_gpu_film_convert_half_rgba_common_rgba(
@@ -516,8 +536,9 @@ ccl_device_inline void kernel_gpu_film_convert_half_rgba_common_rgba(
 
   film_apply_pass_pixel_overlays_rgba(kfilm_convert, buffer, pixel);
 
-  ccl_global half4 *out = ((ccl_global half4 *)rgba) + rgba_offset + y * rgba_stride + x;
-  *out = float4_to_half4_display(make_float4(pixel[0], pixel[1], pixel[2], pixel[3]));
+  const half4 half_pixel = float4_to_half4_display(
+      make_float4(pixel[0], pixel[1], pixel[2], pixel[3]));
+  kernel_gpu_film_convert_half_write(rgba, rgba_offset, rgba_stride, x, y, half_pixel);
 }
 
 /* Common implementation for half4 destination and 3-channel input pass. */



More information about the Bf-blender-cvs mailing list