[Bf-blender-cvs] [040fa75] master: Fix Cycles CUDA adaptive kernel not working correctly after recent closure changes.
Brecht Van Lommel
noreply at git.blender.org
Tue Aug 9 01:37:15 CEST 2016
Commit: 040fa75d7b9d796d2818d731934caf77e4288b7f
Author: Brecht Van Lommel
Date: Tue Aug 9 01:00:57 2016 +0200
Branches: master
https://developer.blender.org/rB040fa75d7b9d796d2818d731934caf77e4288b7f
Fix Cycles CUDA adaptive kernel not working correctly after recent closure changes.
===================================================================
M intern/cycles/kernel/kernel_compat_cuda.h
M intern/cycles/kernel/kernel_compat_opencl.h
M intern/cycles/kernel/kernel_types.h
===================================================================
diff --git a/intern/cycles/kernel/kernel_compat_cuda.h b/intern/cycles/kernel/kernel_compat_cuda.h
index a039b41..063220b 100644
--- a/intern/cycles/kernel/kernel_compat_cuda.h
+++ b/intern/cycles/kernel/kernel_compat_cuda.h
@@ -47,6 +47,7 @@
#define ccl_may_alias
#define ccl_addr_space
#define ccl_restrict __restrict__
+#define ccl_align(n) __align__(n)
/* No assert supported for CUDA */
diff --git a/intern/cycles/kernel/kernel_compat_opencl.h b/intern/cycles/kernel/kernel_compat_opencl.h
index 8505cb8..2ae89dd 100644
--- a/intern/cycles/kernel/kernel_compat_opencl.h
+++ b/intern/cycles/kernel/kernel_compat_opencl.h
@@ -40,6 +40,7 @@
#define ccl_local __local
#define ccl_private __private
#define ccl_restrict restrict
+#define ccl_align(n) __attribute__((aligned(n)))
#ifdef __SPLIT_KERNEL__
# define ccl_addr_space __global
diff --git a/intern/cycles/kernel/kernel_types.h b/intern/cycles/kernel/kernel_types.h
index 3923804..f3b10c2 100644
--- a/intern/cycles/kernel/kernel_types.h
+++ b/intern/cycles/kernel/kernel_types.h
@@ -656,23 +656,18 @@ typedef struct AttributeDescriptor {
* ShaderClosure has a fixed size, and any extra space must be allocated
* with closure_alloc_extra().
*
- * float3 is 12 bytes on CUDA and 16 bytes on CPU/OpenCL, we set the data
- * size to ensure ShaderClosure is 80 bytes total everywhere. */
+ * We pad the struct to 80 bytes and ensure it is aligned to 16 bytes, which
+ * we assume to be the maximum required alignment for any struct. */
#define SHADER_CLOSURE_BASE \
float3 weight; \
ClosureType type; \
float sample_weight \
-typedef ccl_addr_space struct ShaderClosure {
+typedef ccl_addr_space struct ccl_align(16) ShaderClosure {
SHADER_CLOSURE_BASE;
- /* pad to 80 bytes, data types are aligned to own size */
-#ifdef __KERNEL_CUDA__
- float data[15];
-#else
- float data[14];
-#endif
+ float data[14]; /* pad to 80 bytes */
} ShaderClosure;
/* Shader Context
More information about the Bf-blender-cvs
mailing list