[Bf-blender-cvs] [1cad64900e] master: Cycles: Define ccl_local variables in kernel functions

Sergey Sharybin noreply at git.blender.org
Thu Mar 16 11:29:52 CET 2017


Commit: 1cad64900e3f052fa895a4ac2a994d87b0c3fce1
Author: Sergey Sharybin
Date:   Wed Mar 8 13:34:29 2017 +0100
Branches: master
https://developer.blender.org/rB1cad64900e3f052fa895a4ac2a994d87b0c3fce1

Cycles: Define ccl_local variables in kernel functions

Declaring ccl_local in a device function is not supported
by certain compilers.

===================================================================

M	intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h
M	intern/cycles/kernel/kernels/cuda/kernel_split.cu
M	intern/cycles/kernel/kernels/opencl/kernel_buffer_update.cl
M	intern/cycles/kernel/kernels/opencl/kernel_direct_lighting.cl
M	intern/cycles/kernel/kernels/opencl/kernel_holdout_emission_blurring_pathtermination_ao.cl
M	intern/cycles/kernel/kernels/opencl/kernel_next_iteration_setup.cl
M	intern/cycles/kernel/kernels/opencl/kernel_queue_enqueue.cl
M	intern/cycles/kernel/kernels/opencl/kernel_shader_eval.cl
M	intern/cycles/kernel/kernels/opencl/kernel_subsurface_scatter.cl
M	intern/cycles/kernel/split/kernel_buffer_update.h
M	intern/cycles/kernel/split/kernel_direct_lighting.h
M	intern/cycles/kernel/split/kernel_holdout_emission_blurring_pathtermination_ao.h
M	intern/cycles/kernel/split/kernel_indirect_background.h
M	intern/cycles/kernel/split/kernel_next_iteration_setup.h
M	intern/cycles/kernel/split/kernel_queue_enqueue.h
M	intern/cycles/kernel/split/kernel_shader_eval.h
M	intern/cycles/kernel/split/kernel_split_data_types.h
M	intern/cycles/kernel/split/kernel_subsurface_scatter.h

===================================================================

diff --git a/intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h b/intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h
index ba6b103391..e220d85738 100644
--- a/intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h
+++ b/intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h
@@ -168,21 +168,28 @@ void KERNEL_FUNCTION_FULL_NAME(shader)(KernelGlobals *kg,
 		kernel_##name(kg); \
 	}
 
+#define DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(name, type) \
+	void KERNEL_FUNCTION_FULL_NAME(name)(KernelGlobals *kg, KernelData* /*data*/) \
+	{ \
+		ccl_local type locals; \
+		kernel_##name(kg, &locals); \
+	}
+
 DEFINE_SPLIT_KERNEL_FUNCTION(path_init)
 DEFINE_SPLIT_KERNEL_FUNCTION(scene_intersect)
 DEFINE_SPLIT_KERNEL_FUNCTION(lamp_emission)
 DEFINE_SPLIT_KERNEL_FUNCTION(do_volume)
-DEFINE_SPLIT_KERNEL_FUNCTION(queue_enqueue)
+DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(queue_enqueue, QueueEnqueueLocals)
 DEFINE_SPLIT_KERNEL_FUNCTION(indirect_background)
-DEFINE_SPLIT_KERNEL_FUNCTION(shader_eval)
-DEFINE_SPLIT_KERNEL_FUNCTION(holdout_emission_blurring_pathtermination_ao)
-DEFINE_SPLIT_KERNEL_FUNCTION(subsurface_scatter)
-DEFINE_SPLIT_KERNEL_FUNCTION(direct_lighting)
+DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(shader_eval, uint)
+DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(holdout_emission_blurring_pathtermination_ao, BackgroundAOLocals)
+DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(subsurface_scatter, uint)
+DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(direct_lighting, uint)
 DEFINE_SPLIT_KERNEL_FUNCTION(shadow_blocked_ao)
 DEFINE_SPLIT_KERNEL_FUNCTION(shadow_blocked_dl)
-DEFINE_SPLIT_KERNEL_FUNCTION(next_iteration_setup)
+DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(next_iteration_setup, uint)
 DEFINE_SPLIT_KERNEL_FUNCTION(indirect_subsurface)
-DEFINE_SPLIT_KERNEL_FUNCTION(buffer_update)
+DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(buffer_update, uint)
 
 void KERNEL_FUNCTION_FULL_NAME(register_functions)(void(*reg)(const char* name, void* func))
 {
diff --git a/intern/cycles/kernel/kernels/cuda/kernel_split.cu b/intern/cycles/kernel/kernels/cuda/kernel_split.cu
index fbdf79697d..4479a04492 100644
--- a/intern/cycles/kernel/kernels/cuda/kernel_split.cu
+++ b/intern/cycles/kernel/kernels/cuda/kernel_split.cu
@@ -93,21 +93,30 @@ kernel_cuda_path_trace_data_init(
 		kernel_##name(NULL); \
 	}
 
+#define DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(name, type) \
+	extern "C" __global__ void \
+	CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_MAX_REGISTERS) \
+	kernel_cuda_##name() \
+	{ \
+		ccl_local type locals; \
+		kernel_##name(NULL, &locals); \
+	}
+
 DEFINE_SPLIT_KERNEL_FUNCTION(path_init)
 DEFINE_SPLIT_KERNEL_FUNCTION(scene_intersect)
 DEFINE_SPLIT_KERNEL_FUNCTION(lamp_emission)
 DEFINE_SPLIT_KERNEL_FUNCTION(do_volume)
-DEFINE_SPLIT_KERNEL_FUNCTION(queue_enqueue)
+DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(queue_enqueue, QueueEnqueueLocals)
 DEFINE_SPLIT_KERNEL_FUNCTION(indirect_background)
-DEFINE_SPLIT_KERNEL_FUNCTION(shader_eval)
-DEFINE_SPLIT_KERNEL_FUNCTION(holdout_emission_blurring_pathtermination_ao)
-DEFINE_SPLIT_KERNEL_FUNCTION(subsurface_scatter)
-DEFINE_SPLIT_KERNEL_FUNCTION(direct_lighting)
+DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(shader_eval, uint)
+DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(holdout_emission_blurring_pathtermination_ao, BackgroundAOLocals)
+DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(subsurface_scatter, uint)
+DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(direct_lighting, uint)
 DEFINE_SPLIT_KERNEL_FUNCTION(shadow_blocked_ao)
 DEFINE_SPLIT_KERNEL_FUNCTION(shadow_blocked_dl)
-DEFINE_SPLIT_KERNEL_FUNCTION(next_iteration_setup)
+DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(next_iteration_setup, uint)
 DEFINE_SPLIT_KERNEL_FUNCTION(indirect_subsurface)
-DEFINE_SPLIT_KERNEL_FUNCTION(buffer_update)
+DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(buffer_update, uint)
 
 extern "C" __global__ void
 CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_MAX_REGISTERS)
diff --git a/intern/cycles/kernel/kernels/opencl/kernel_buffer_update.cl b/intern/cycles/kernel/kernels/opencl/kernel_buffer_update.cl
index 3c25d1d85a..b61f1cda33 100644
--- a/intern/cycles/kernel/kernels/opencl/kernel_buffer_update.cl
+++ b/intern/cycles/kernel/kernels/opencl/kernel_buffer_update.cl
@@ -22,5 +22,6 @@ __kernel void kernel_ocl_path_trace_buffer_update(
         ccl_global char *kg,
         ccl_constant KernelData *data)
 {
-	kernel_buffer_update((KernelGlobals*)kg);
+	ccl_local unsigned int local_queue_atomics;
+	kernel_buffer_update((KernelGlobals*)kg, &local_queue_atomics);
 }
diff --git a/intern/cycles/kernel/kernels/opencl/kernel_direct_lighting.cl b/intern/cycles/kernel/kernels/opencl/kernel_direct_lighting.cl
index 942a80f94f..374be6cbd0 100644
--- a/intern/cycles/kernel/kernels/opencl/kernel_direct_lighting.cl
+++ b/intern/cycles/kernel/kernels/opencl/kernel_direct_lighting.cl
@@ -22,5 +22,6 @@ __kernel void kernel_ocl_path_trace_direct_lighting(
         ccl_global char *kg,
         ccl_constant KernelData *data)
 {
-	kernel_direct_lighting((KernelGlobals*)kg);
+	ccl_local unsigned int local_queue_atomics;
+	kernel_direct_lighting((KernelGlobals*)kg, &local_queue_atomics);
 }
diff --git a/intern/cycles/kernel/kernels/opencl/kernel_holdout_emission_blurring_pathtermination_ao.cl b/intern/cycles/kernel/kernels/opencl/kernel_holdout_emission_blurring_pathtermination_ao.cl
index 209080fecd..351687e203 100644
--- a/intern/cycles/kernel/kernels/opencl/kernel_holdout_emission_blurring_pathtermination_ao.cl
+++ b/intern/cycles/kernel/kernels/opencl/kernel_holdout_emission_blurring_pathtermination_ao.cl
@@ -22,5 +22,8 @@ __kernel void kernel_ocl_path_trace_holdout_emission_blurring_pathtermination_ao
         ccl_global char *kg,
         ccl_constant KernelData *data)
 {
-	kernel_holdout_emission_blurring_pathtermination_ao((KernelGlobals*)kg);
+	ccl_local BackgroundAOLocals locals;
+	kernel_holdout_emission_blurring_pathtermination_ao(
+	        (KernelGlobals*)kg,
+	        &locals);
 }
diff --git a/intern/cycles/kernel/kernels/opencl/kernel_next_iteration_setup.cl b/intern/cycles/kernel/kernels/opencl/kernel_next_iteration_setup.cl
index 2a007e39c3..fd49ed5def 100644
--- a/intern/cycles/kernel/kernels/opencl/kernel_next_iteration_setup.cl
+++ b/intern/cycles/kernel/kernels/opencl/kernel_next_iteration_setup.cl
@@ -22,5 +22,6 @@ __kernel void kernel_ocl_path_trace_next_iteration_setup(
         ccl_global char *kg,
         ccl_constant KernelData *data)
 {
-	kernel_next_iteration_setup((KernelGlobals*)kg);
+	ccl_local unsigned int local_queue_atomics;
+	kernel_next_iteration_setup((KernelGlobals*)kg, &local_queue_atomics);
 }
diff --git a/intern/cycles/kernel/kernels/opencl/kernel_queue_enqueue.cl b/intern/cycles/kernel/kernels/opencl/kernel_queue_enqueue.cl
index 19074db1b8..6dd9d39c4e 100644
--- a/intern/cycles/kernel/kernels/opencl/kernel_queue_enqueue.cl
+++ b/intern/cycles/kernel/kernels/opencl/kernel_queue_enqueue.cl
@@ -22,5 +22,6 @@ __kernel void kernel_ocl_path_trace_queue_enqueue(
         ccl_global char *kg,
         ccl_constant KernelData *data)
 {
-	kernel_queue_enqueue((KernelGlobals*)kg);
+	ccl_local QueueEnqueueLocals locals;
+	kernel_queue_enqueue((KernelGlobals*)kg, &locals);
 }
diff --git a/intern/cycles/kernel/kernels/opencl/kernel_shader_eval.cl b/intern/cycles/kernel/kernels/opencl/kernel_shader_eval.cl
index 534d37f695..71ac288697 100644
--- a/intern/cycles/kernel/kernels/opencl/kernel_shader_eval.cl
+++ b/intern/cycles/kernel/kernels/opencl/kernel_shader_eval.cl
@@ -22,5 +22,6 @@ __kernel void kernel_ocl_path_trace_shader_eval(
         ccl_global char *kg,
         ccl_constant KernelData *data)
 {
-	kernel_shader_eval((KernelGlobals*)kg);
+	ccl_local unsigned int local_queue_atomics;
+	kernel_shader_eval((KernelGlobals*)kg, &local_queue_atomics);
 }
diff --git a/intern/cycles/kernel/kernels/opencl/kernel_subsurface_scatter.cl b/intern/cycles/kernel/kernels/opencl/kernel_subsurface_scatter.cl
index 34a01bbdfe..853bba2efc 100644
--- a/intern/cycles/kernel/kernels/opencl/kernel_subsurface_scatter.cl
+++ b/intern/cycles/kernel/kernels/opencl/kernel_subsurface_scatter.cl
@@ -22,5 +22,6 @@ __kernel void kernel_ocl_path_trace_subsurface_scatter(
         ccl_global char *kg,
         ccl_constant KernelData *data)
 {
-	kernel_subsurface_scatter((KernelGlobals*)kg);
+	ccl_local unsigned int local_queue_atomics;
+	kernel_subsurface_scatter((KernelGlobals*)kg, &local_queue_atomics);
 }
diff --git a/intern/cycles/kernel/split/kernel_buffer_update.h b/intern/cycles/kernel/split/kernel_buffer_update.h
index e8f574c554..f36899b884 100644
--- a/intern/cycles/kernel/split/kernel_buffer_update.h
+++ b/intern/cycles/kernel/split/kernel_buffer_update.h
@@ -38,11 +38,11 @@ CCL_NAMESPACE_BEGIN
  *     RAY_REGENERATED rays.
  *   - QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS will be empty.
  */
-ccl_device void kernel_buffer_update(KernelGlobals *kg)
+ccl_device void kernel_buffer_update(KernelGlobals *kg,
+                                     ccl_local_param unsigned int *local_queue_atomics)
 {
-	ccl_local unsigned int local_queue_atomics;
 	if(ccl_local_id(0) == 0 && ccl_local_id(1) == 0) {
-		local_queue_atomics = 0;
+		*local_queue_atomics = 0;
 	}
 	ccl_barrier(CCL_LOCAL_MEM_FENCE);
 
@@ -188,7 +188,7 @@ ccl_device void kernel_buffer_update(KernelGlobals *kg)
 	                        QUEUE_ACTIVE_AND_REGENERATED_RAYS,
 	                        enqueue_flag,
 	                        kernel_split_params.queue_size,
-	                        &local_queue_atomics,
+	                        local_queue_atomics,
 	                        kernel_split_state.queue_data,
 	                        kernel_split_params.queue_index);
 }
diff --git a/intern/cycles/kernel/split/kernel_direct_lighting.h b/intern/cycles/kernel/split/kernel_direct_lighting.h
index dfe461fb35..3d062cf0e2 100644
--- a/intern/cycles/kernel/split/kernel_direct_lighting.h
+++ b/intern/cycles/kernel/split/kernel_direct_lighting.h
@@ -40,11 +40,11 @@ CCL_NAMESPACE_BEGIN
  *   shadow_blocked function must be executed, after this kernel call
  *    Before this kernel call the QUEUE_SHADOW_RAY_CAST_DL_RAYS will be empty.
  */
-ccl_device void kernel_direct_lighting(KernelGlobals *kg)
+ccl_device void kernel_direct_lighting(KernelGlobals *kg,
+                                       ccl_local_param unsigned int

@@ Diff output truncated at 10240 characters. @@




More information about the Bf-blender-cvs mailing list