[Bf-blender-cvs] [80a90f6c1c9] cycles-x: Fix unaligned address CUDA error in Cycles X
Sergey Sharybin
noreply at git.blender.org
Wed Aug 18 12:16:00 CEST 2021
Commit: 80a90f6c1c96254c056c1dd5aa2002e5ae891aed
Author: Sergey Sharybin
Date: Wed Aug 18 12:11:37 2021 +0200
Branches: cycles-x
https://developer.blender.org/rB80a90f6c1c96254c056c1dd5aa2002e5ae891aed
Fix unaligned address CUDA error in Cycles X
Was reproducible with Italian Flat demo file.
===================================================================
M intern/cycles/kernel/kernel_id_passes.h
===================================================================
diff --git a/intern/cycles/kernel/kernel_id_passes.h b/intern/cycles/kernel/kernel_id_passes.h
index 7bbda2df6c5..ed01f494f98 100644
--- a/intern/cycles/kernel/kernel_id_passes.h
+++ b/intern/cycles/kernel/kernel_id_passes.h
@@ -18,6 +18,14 @@
CCL_NAMESPACE_BEGIN
+/* Element of ID pass stored in the render buffers.
+ * It is `float2` semantically, but it must be unaligned since the offset of ID passes in the
+ * render buffers might not meet expected by compiler alignment. */
+typedef struct IDPassBufferElement {
+ float x;
+ float y;
+} IDPassBufferElement;
+
ccl_device_inline void kernel_write_id_slots(ccl_global float *buffer,
int num_slots,
float id,
@@ -29,7 +37,7 @@ ccl_device_inline void kernel_write_id_slots(ccl_global float *buffer,
}
for (int slot = 0; slot < num_slots; slot++) {
- ccl_global float2 *id_buffer = (ccl_global float2 *)buffer;
+ ccl_global IDPassBufferElement *id_buffer = (ccl_global IDPassBufferElement *)buffer;
#ifdef __ATOMIC_PASS_WRITE__
/* If the loop reaches an empty slot, the ID isn't in any slot yet - so add it! */
if (id_buffer[slot].x == ID_NONE) {
@@ -67,7 +75,7 @@ ccl_device_inline void kernel_write_id_slots(ccl_global float *buffer,
ccl_device_inline void kernel_sort_id_slots(ccl_global float *buffer, int num_slots)
{
- ccl_global float2 *id_buffer = (ccl_global float2 *)buffer;
+ ccl_global IDPassBufferElement *id_buffer = (ccl_global IDPassBufferElement *)buffer;
for (int slot = 1; slot < num_slots; ++slot) {
if (id_buffer[slot].x == ID_NONE) {
return;
@@ -75,7 +83,7 @@ ccl_device_inline void kernel_sort_id_slots(ccl_global float *buffer, int num_sl
/* Since we're dealing with a tiny number of elements, insertion sort should be fine. */
int i = slot;
while (i > 0 && id_buffer[i].y > id_buffer[i - 1].y) {
- float2 swap = id_buffer[i];
+ const IDPassBufferElement swap = id_buffer[i];
id_buffer[i] = id_buffer[i - 1];
id_buffer[i - 1] = swap;
--i;
More information about the Bf-blender-cvs
mailing list