[Bf-blender-cvs] [80a90f6c1c9] cycles-x: Fix unaligned address CUDA error in Cycles X

Sergey Sharybin noreply at git.blender.org
Wed Aug 18 12:16:00 CEST 2021


Commit: 80a90f6c1c96254c056c1dd5aa2002e5ae891aed
Author: Sergey Sharybin
Date:   Wed Aug 18 12:11:37 2021 +0200
Branches: cycles-x
https://developer.blender.org/rB80a90f6c1c96254c056c1dd5aa2002e5ae891aed

Fix unaligned address CUDA error in Cycles X

Was reproducible with Italian Flat demo file.

===================================================================

M	intern/cycles/kernel/kernel_id_passes.h

===================================================================

diff --git a/intern/cycles/kernel/kernel_id_passes.h b/intern/cycles/kernel/kernel_id_passes.h
index 7bbda2df6c5..ed01f494f98 100644
--- a/intern/cycles/kernel/kernel_id_passes.h
+++ b/intern/cycles/kernel/kernel_id_passes.h
@@ -18,6 +18,14 @@
 
 CCL_NAMESPACE_BEGIN
 
+/* Element of ID pass stored in the render buffers.
+ * It is `float2` semantically, but it must be unaligned since the offset of ID passes in the
+ * render buffers might not meet expected by compiler alignment. */
+typedef struct IDPassBufferElement {
+  float x;
+  float y;
+} IDPassBufferElement;
+
 ccl_device_inline void kernel_write_id_slots(ccl_global float *buffer,
                                              int num_slots,
                                              float id,
@@ -29,7 +37,7 @@ ccl_device_inline void kernel_write_id_slots(ccl_global float *buffer,
   }
 
   for (int slot = 0; slot < num_slots; slot++) {
-    ccl_global float2 *id_buffer = (ccl_global float2 *)buffer;
+    ccl_global IDPassBufferElement *id_buffer = (ccl_global IDPassBufferElement *)buffer;
 #ifdef __ATOMIC_PASS_WRITE__
     /* If the loop reaches an empty slot, the ID isn't in any slot yet - so add it! */
     if (id_buffer[slot].x == ID_NONE) {
@@ -67,7 +75,7 @@ ccl_device_inline void kernel_write_id_slots(ccl_global float *buffer,
 
 ccl_device_inline void kernel_sort_id_slots(ccl_global float *buffer, int num_slots)
 {
-  ccl_global float2 *id_buffer = (ccl_global float2 *)buffer;
+  ccl_global IDPassBufferElement *id_buffer = (ccl_global IDPassBufferElement *)buffer;
   for (int slot = 1; slot < num_slots; ++slot) {
     if (id_buffer[slot].x == ID_NONE) {
       return;
@@ -75,7 +83,7 @@ ccl_device_inline void kernel_sort_id_slots(ccl_global float *buffer, int num_sl
     /* Since we're dealing with a tiny number of elements, insertion sort should be fine. */
     int i = slot;
     while (i > 0 && id_buffer[i].y > id_buffer[i - 1].y) {
-      float2 swap = id_buffer[i];
+      const IDPassBufferElement swap = id_buffer[i];
       id_buffer[i] = id_buffer[i - 1];
       id_buffer[i - 1] = swap;
       --i;



More information about the Bf-blender-cvs mailing list