[Bf-blender-cvs] [86f3d22] cycles_split_kernel: Cycles: Refactor so all split kernels have same signature

Mai Lavelle noreply at git.blender.org
Mon Oct 17 14:28:31 CEST 2016


Commit: 86f3d223fd4356af549057a70cd90674c6b69a75
Author: Mai Lavelle
Date:   Mon Oct 17 13:47:19 2016 +0200
Branches: cycles_split_kernel
https://developer.blender.org/rB86f3d223fd4356af549057a70cd90674c6b69a75

Cycles: Refactor so all split kernels have same signature

This is to set things up for supporting the split kernel on cpu and other
devices. By having the same signature for each kernel we can deduplicate
a lot of code without needing any trickery. The only kernel that doesn't
share this signature is the `data_init` kernel, which might end up being
different for each device type.

===================================================================

M	intern/cycles/device/opencl/opencl_split.cpp
M	intern/cycles/kernel/kernel_globals.h
M	intern/cycles/kernel/kernels/opencl/kernel_background_buffer_update.cl
M	intern/cycles/kernel/kernels/opencl/kernel_data_init.cl
M	intern/cycles/kernel/kernels/opencl/kernel_direct_lighting.cl
M	intern/cycles/kernel/kernels/opencl/kernel_holdout_emission_blurring_pathtermination_ao.cl
M	intern/cycles/kernel/kernels/opencl/kernel_lamp_emission.cl
M	intern/cycles/kernel/kernels/opencl/kernel_next_iteration_setup.cl
M	intern/cycles/kernel/kernels/opencl/kernel_queue_enqueue.cl
M	intern/cycles/kernel/kernels/opencl/kernel_scene_intersect.cl
M	intern/cycles/kernel/kernels/opencl/kernel_shader_eval.cl
M	intern/cycles/kernel/kernels/opencl/kernel_shadow_blocked.cl
M	intern/cycles/kernel/kernels/opencl/kernel_sum_all_radiance.cl
M	intern/cycles/kernel/split/kernel_data_init.h
M	intern/cycles/kernel/split/kernel_split_data.h

===================================================================

diff --git a/intern/cycles/device/opencl/opencl_split.cpp b/intern/cycles/device/opencl/opencl_split.cpp
index ce0c702..fc80173 100644
--- a/intern/cycles/device/opencl/opencl_split.cpp
+++ b/intern/cycles/device/opencl/opencl_split.cpp
@@ -223,6 +223,7 @@ public:
 			void *sd_input;
 			void *isect_shadow;
 			SplitData split_data;
+			SplitParams split_param_data;
 		} KernelGlobals;
 
 		return sizeof(KernelGlobals);
@@ -422,6 +423,7 @@ public:
 			kernel_set_args(program_data_init(),
 			                start_arg_index,
 			                start_sample,
+			                end_sample,
 			                d_x,
 			                d_y,
 			                d_w,
@@ -438,128 +440,24 @@ public:
 			                work_pool_wgs,
 			                num_samples,
 #endif
-			                num_parallel_samples);
-
-		//printf("kernel_set_args scene_intersect\n");
-		kernel_set_args(program_scene_intersect(),
-		                0,
-		                kgbuffer,
-		                d_data,
-		                d_w,
-		                d_h,
-		                Queue_index,
-		                dQueue_size,
-		                use_queues_flag,
-		                num_parallel_samples);
-
-		//printf("kernel_set_args lamp_emission\n");
-		kernel_set_args(program_lamp_emission(),
-		                0,
-		                kgbuffer,
-		                d_data,
-		                d_w,
-		                d_h,
-		                Queue_index,
-		                dQueue_size,
-		                use_queues_flag,
-		                num_parallel_samples);
-
-		//printf("kernel_set_args queue_enqueue\n");
-		kernel_set_args(program_queue_enqueue(),
-		                0,
-		                kgbuffer,
-			            d_data,
-		                Queue_index,
-		                dQueue_size);
-
-		//printf("kernel_set_args background_buffer_update\n");
-		kernel_set_args(program_background_buffer_update(),
-		                 0,
-		                 kgbuffer,
-		                 d_data,
-		                 d_rng_state,
-		                 d_w,
-		                 d_h,
-		                 d_x,
-		                 d_y,
-		                 d_stride,
-		                 rtile.rng_state_offset_x,
-		                 rtile.rng_state_offset_y,
-		                 rtile.buffer_rng_state_stride,
-		                 Queue_index,
-		                 dQueue_size,
-		                 end_sample,
-		                 start_sample,
-#ifdef __WORK_STEALING__
-		                 work_pool_wgs,
-		                 num_samples,
-#endif
-		                 num_parallel_samples);
-
-		//printf("kernel_set_args shader_eval\n");
-		kernel_set_args(program_shader_eval(),
-		                0,
-		                kgbuffer,
-		                d_data,
-		                Queue_index,
-		                dQueue_size);
-
-		//printf("kernel_set_args holdout_emission_blurring_pathtermination_ao\n");
-		kernel_set_args(program_holdout_emission_blurring_pathtermination_ao(),
-		                0,
-		                kgbuffer,
-		                d_data,
-		                d_w,
-		                d_h,
-		                d_x,
-		                d_y,
-		                d_stride,
-		                Queue_index,
-		                dQueue_size,
-#ifdef __WORK_STEALING__
-		                start_sample,
-#endif
-		                num_parallel_samples);
-
-		//printf("kernel_set_args direct_lighting\n");
-		kernel_set_args(program_direct_lighting(),
-		                0,
-		                kgbuffer,
-		                d_data,
-		                Queue_index,
-		                dQueue_size);
-
-		//printf("kernel_set_args shadow_blocked\n");
-		kernel_set_args(program_shadow_blocked(),
-		                0,
-		                kgbuffer,
-		                d_data,
-		                Queue_index,
-		                dQueue_size);
-
-		//printf("kernel_set_args next_iteration_setup\n");
-		kernel_set_args(program_next_iteration_setup(),
-		                0,
-		                kgbuffer,
-		                d_data,
-		                Queue_index,
-		                dQueue_size,
-		                use_queues_flag);
-
-		//printf("kernel_set_args sum_all_radiance\n");
-		kernel_set_args(program_sum_all_radiance(),
-		                0,
-		                kgbuffer,
-		                d_data,
-		                d_buffer,
-		                num_parallel_samples,
-		                d_w,
-		                d_h,
-		                d_stride,
-		                rtile.buffer_offset_x,
-		                rtile.buffer_offset_y,
-		                rtile.buffer_rng_state_stride,
-		                start_sample);
+			                num_parallel_samples,
+			                rtile.buffer_offset_x,
+			                rtile.buffer_offset_y,
+			                rtile.buffer_rng_state_stride,
+							d_buffer);
+
+#define KERNEL_SET_ARGS(name) kernel_set_args(program_##name(), 0, kgbuffer, d_data);
+		KERNEL_SET_ARGS(scene_intersect);
+		KERNEL_SET_ARGS(lamp_emission);
+		KERNEL_SET_ARGS(queue_enqueue);
+		KERNEL_SET_ARGS(background_buffer_update);
+		KERNEL_SET_ARGS(shader_eval);
+		KERNEL_SET_ARGS(holdout_emission_blurring_pathtermination_ao);
+		KERNEL_SET_ARGS(direct_lighting);
+		KERNEL_SET_ARGS(shadow_blocked);
+		KERNEL_SET_ARGS(next_iteration_setup);
+		KERNEL_SET_ARGS(sum_all_radiance);
+#undef KERNEL_SET_ARGS
 
 		/* Macro for Enqueuing split kernels. */
 #define GLUE(a, b) a ## b
diff --git a/intern/cycles/kernel/kernel_globals.h b/intern/cycles/kernel/kernel_globals.h
index 87ee551..e40ed05 100644
--- a/intern/cycles/kernel/kernel_globals.h
+++ b/intern/cycles/kernel/kernel_globals.h
@@ -103,6 +103,7 @@ typedef ccl_addr_space struct KernelGlobals {
 	ShaderData *sd_input;
 	Intersection *isect_shadow;
 	SplitData split_data;
+	SplitParams split_param_data;
 #  endif
 } KernelGlobals;
 
diff --git a/intern/cycles/kernel/kernels/opencl/kernel_background_buffer_update.cl b/intern/cycles/kernel/kernels/opencl/kernel_background_buffer_update.cl
index 5fcfd82..bb8217e 100644
--- a/intern/cycles/kernel/kernels/opencl/kernel_background_buffer_update.cl
+++ b/intern/cycles/kernel/kernels/opencl/kernel_background_buffer_update.cl
@@ -18,21 +18,7 @@
 
 __kernel void kernel_ocl_path_trace_background_buffer_update(
         KernelGlobals *kg,
-        ccl_constant KernelData *data,
-        ccl_global uint *rng_state,
-        int sw, int sh, int sx, int sy, int stride,
-        int rng_state_offset_x,
-        int rng_state_offset_y,
-        int rng_state_stride,
-        ccl_global int *Queue_index,           /* Tracks the number of elements in each queue */
-        int queuesize,                         /* Size (capacity) of each queue */
-        int end_sample,
-        int start_sample,
-#ifdef __WORK_STEALING__
-        ccl_global unsigned int *work_pool_wgs,
-        unsigned int num_samples,
-#endif
-        int parallel_samples)                  /* Number of samples to be processed in parallel */
+        ccl_constant KernelData *data)
 {
 	ccl_local unsigned int local_queue_atomics;
 	if(get_local_id(0) == 0 && get_local_id(1) == 0) {
@@ -43,13 +29,13 @@ __kernel void kernel_ocl_path_trace_background_buffer_update(
 	int ray_index = get_global_id(1) * get_global_size(0) + get_global_id(0);
 	if(ray_index == 0) {
 		/* We will empty this queue in this kernel. */
-		Queue_index[QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS] = 0;
+		split_params->queue_index[QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS] = 0;
 	}
 	char enqueue_flag = 0;
 	ray_index = get_ray_index(ray_index,
 	                          QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS,
 	                          split_state->queue_data,
-	                          queuesize,
+	                          split_params->queue_size,
 	                          1);
 
 #ifdef __COMPUTE_DEVICE_GPU__
@@ -70,18 +56,22 @@ __kernel void kernel_ocl_path_trace_background_buffer_update(
 #endif
 		enqueue_flag =
 			kernel_background_buffer_update(kg,
-			                                rng_state,
-			                                sw, sh, sx, sy, stride,
-			                                rng_state_offset_x,
-			                                rng_state_offset_y,
-			                                rng_state_stride,
-			                                end_sample,
-			                                start_sample,
+			                                split_params->rng_state,
+			                                split_params->w,
+			                                split_params->h,
+			                                split_params->x,
+			                                split_params->y,
+			                                split_params->stride,
+			                                split_params->rng_offset_x,
+			                                split_params->rng_offset_y,
+			                                split_params->rng_stride,
+			                                split_params->end_sample,
+			                                split_params->start_sample,
 #ifdef __WORK_STEALING__
-			                                work_pool_wgs,
-			                                num_samples,
+			                                split_params->work_pool_wgs,
+			                                split_params->num_samples,
 #endif
-			                                parallel_samples,
+			                                split_params->parallel_samples,
 			                                ray_index);
 #ifndef __COMPUTE_DEVICE_GPU__
 	}
@@ -93,8 +83,8 @@ __kernel void kernel_ocl_path_trace_background_buffer_update(
 	enqueue_ray_index_local(ray_index,
 	                        QUEUE_ACTIVE_AND_REGENERATED_RAYS,
 	                        enqueue_flag,
-	                        queuesize,
+	                        split_params->queue_size,
 	                        &local_queue_atomics,
 	                        split_state->queue_data,
-	                        Queue_index);
+	                        split_params->queue_index);
 }
diff --git a/intern/cycles/kernel/kernels/opencl/kernel_data_init.cl b/intern/cycles/kernel/kernels/opencl/kernel_data_init.cl
index f4f0f0d..205e65a 100644
--- a/intern/cycles/kernel/kernels/opencl/kernel_data_init.cl
+++ b/intern/cycles/kernel/kernels/opencl/kernel_data_init.cl
@@ -28,7 +28,9 @@ __kernel void kernel_ocl_path_tr

@@ Diff output truncated at 10240 characters. @@




More information about the Bf-blender-cvs mailing list