[Bf-blender-cvs] [436552d] cycles_kernel_split: Support__KERNEL_DEBUG__ flag

varunsundar08 noreply at git.blender.org
Thu Apr 30 23:25:24 CEST 2015


Commit: 436552d2ffcb4699adc419ccdaf0a1ed92d635b9
Author: varunsundar08
Date:   Wed Apr 29 23:11:17 2015 +0530
Branches: cycles_kernel_split
https://developer.blender.org/rB436552d2ffcb4699adc419ccdaf0a1ed92d635b9

Support__KERNEL_DEBUG__ flag

===================================================================

M	intern/cycles/device/device_opencl.cpp
M	intern/cycles/kernel/kernel_Background_BufferUpdate.cl
M	intern/cycles/kernel/kernel_DataInit.cl
M	intern/cycles/kernel/kernel_SceneIntersect.cl
M	intern/cycles/kernel/kernel_debug.h

===================================================================

diff --git a/intern/cycles/device/device_opencl.cpp b/intern/cycles/device/device_opencl.cpp
index 6d4cf1c..5e8b171 100644
--- a/intern/cycles/device/device_opencl.cpp
+++ b/intern/cycles/device/device_opencl.cpp
@@ -130,7 +130,6 @@ static string opencl_kernel_build_options(const string& platform, const string *
 			build_options += "-g -s \"" + *debug_src + "\"";
 	}
 
-	/* TODO : support KERNEL_DEBUG for split kernel */
 	if(opencl_kernel_use_debug())
 		build_options += "-D__KERNEL_OPENCL_DEBUG__ ";
 
@@ -1280,6 +1279,11 @@ public:
 	cl_mem Intersection_coop_AO;
 	cl_mem Intersection_coop_DL;
 
+#ifdef WITH_CYCLES_DEBUG
+	/* DebugData memory */
+	cl_mem debugdata_coop;
+#endif
+
 	/* Global state array that tracks ray state */
 	cl_mem ray_state;
 
@@ -1320,6 +1324,10 @@ public:
 	size_t Intersection_coop_AO_size;
 	size_t Intersection_coop_DL_size;
 
+#ifdef WITH_CYCLES_DEBUG
+	size_t debugdata_size;
+#endif
+
 	/* Amount of memory in output buffer associated with one pixel/thread */
 	size_t per_thread_output_buffer_size;
 
@@ -1528,6 +1536,10 @@ public:
 		Intersection_coop_AO = NULL;
 		Intersection_coop_DL = NULL;
 
+#ifdef WITH_CYCLES_DEBUG
+		debugdata_coop = NULL;
+#endif
+
 		work_array = NULL;
 
 		/* Queue */
@@ -1567,6 +1579,10 @@ public:
 		Intersection_coop_AO_size = sizeof(Intersection);
 		Intersection_coop_DL_size = sizeof(Intersection);
 
+#ifdef WITH_CYCLES_DEBUG
+		debugdata_size = sizeof(DebugData);
+#endif
+
 		per_thread_output_buffer_size = 0;
 		hostRayStateArray = NULL;
 		PathIteration_times = PATH_ITER_INC_FACTOR;
@@ -2685,6 +2701,11 @@ public:
 		if (Intersection_coop_DL != NULL)
 			clReleaseMemObject(Intersection_coop_DL);
 
+#ifdef WITH_CYCLES_DEBUG
+		if(debugdata_coop != NULL)
+			clReleaseMemObject(debugdata_coop);
+#endif
+
 		if (use_queues_flag != NULL)
 			clReleaseMemObject(use_queues_flag);
 
@@ -3174,6 +3195,11 @@ public:
 			Intersection_coop_DL = clCreateBuffer(cxContext, CL_MEM_READ_WRITE, num_global_elements * Intersection_coop_DL_size, NULL, &ciErr);
 			assert(ciErr == CL_SUCCESS && "Can't create Intersection_coop_DL_memory");
 
+#ifdef WITH_CYCLES_DEBUG
+			debugdata_coop = clCreateBuffer(cxContext, CL_MEM_READ_WRITE, num_global_elements * debugdata_size, NULL, &ciErr);
+			assert(ciErr == CL_SUCCESS && "Can't create debugdata_coop memory");
+#endif
+
 			ray_state = clCreateBuffer(cxContext, CL_MEM_READ_WRITE, num_global_elements * rayState_size, NULL, &ciErr);
 			assert(ciErr == CL_SUCCESS && "Can't create ray_state memory");
 
@@ -3312,6 +3338,9 @@ public:
 		opencl_assert(clSetKernelArg(ckPathTraceKernel_DataInit_SPLIT_KERNEL, narg++, sizeof(work_pool_wgs), (void*)&work_pool_wgs));
 		opencl_assert(clSetKernelArg(ckPathTraceKernel_DataInit_SPLIT_KERNEL, narg++, sizeof(num_samples), (void*)&num_samples));
 #endif
+#ifdef WITH_CYCLES_DEBUG
+		opencl_assert(clSetKernelArg(ckPathTraceKernel_DataInit_SPLIT_KERNEL, narg++, sizeof(debugdata_coop), (void*)&debugdata_coop));
+#endif
 		opencl_assert(clSetKernelArg(ckPathTraceKernel_DataInit_SPLIT_KERNEL, narg++, sizeof(num_parallel_samples), (void*)&num_parallel_samples));
 
 		/* Set arguments for ckPathTraceKernel_SceneIntersect_SPLIT_KERNEL */;
@@ -3330,6 +3359,9 @@ public:
 		opencl_assert(clSetKernelArg(ckPathTraceKernel_SceneIntersect_SPLIT_KERNEL, narg++, sizeof(Queue_index), (void*)&Queue_index));
 		opencl_assert(clSetKernelArg(ckPathTraceKernel_SceneIntersect_SPLIT_KERNEL, narg++, sizeof(dQueue_size), (void*)&dQueue_size));
 		opencl_assert(clSetKernelArg(ckPathTraceKernel_SceneIntersect_SPLIT_KERNEL, narg++, sizeof(use_queues_flag), (void*)&use_queues_flag));
+#ifdef WITH_CYCLES_DEBUG
+		opencl_assert(clSetKernelArg(ckPathTraceKernel_SceneIntersect_SPLIT_KERNEL, narg++, sizeof(debugdata_coop), (void*)&debugdata_coop));
+#endif
 		opencl_assert(clSetKernelArg(ckPathTraceKernel_SceneIntersect_SPLIT_KERNEL, narg++, sizeof(num_parallel_samples), (void*)&num_parallel_samples));
 
 		/* Set arguments for ckPathTracekernel_LampEmission_SPLIT_KERNEL kernel */
@@ -3392,6 +3424,9 @@ public:
 		opencl_assert(clSetKernelArg(ckPathTraceKernel_BG_BufferUpdate_SPLIT_KERNEL, narg++, sizeof(work_pool_wgs), (void*)&work_pool_wgs));
 		opencl_assert(clSetKernelArg(ckPathTraceKernel_BG_BufferUpdate_SPLIT_KERNEL, narg++, sizeof(num_samples), (void*)&num_samples));
 #endif
+#ifdef WITH_CYCLES_DEBUG
+		opencl_assert(clSetKernelArg(ckPathTraceKernel_BG_BufferUpdate_SPLIT_KERNEL, narg++, sizeof(debugdata_coop), (void*)&debugdata_coop));
+#endif
 		opencl_assert(clSetKernelArg(ckPathTraceKernel_BG_BufferUpdate_SPLIT_KERNEL, narg++, sizeof(num_parallel_samples), (void*)&num_parallel_samples));
 
 		/* Set arguments for ckPathTraceKernel_Shader_Lighting_SPLIT_KERNEL */
diff --git a/intern/cycles/kernel/kernel_Background_BufferUpdate.cl b/intern/cycles/kernel/kernel_Background_BufferUpdate.cl
index b797edc..effef4c 100644
--- a/intern/cycles/kernel/kernel_Background_BufferUpdate.cl
+++ b/intern/cycles/kernel/kernel_Background_BufferUpdate.cl
@@ -122,6 +122,9 @@ __kernel void kernel_ocl_path_trace_Background_BufferUpdate_SPLIT_KERNEL(
 	ccl_global unsigned int *work_pool_wgs,
 	unsigned int num_samples,
 #endif
+#ifdef __KERNEL_DEBUG__
+	ccl_global DebugData *debugdata_coop,
+#endif
 	int parallel_samples                         /* Number of samples to be processed in parallel */
 	)
 {
@@ -156,6 +159,9 @@ __kernel void kernel_ocl_path_trace_Background_BufferUpdate_SPLIT_KERNEL(
 		ccl_global KernelGlobals *kg = (ccl_global KernelGlobals *)globals;
 		ccl_global ShaderData *sd = (ccl_global ShaderData *)shader_data;
 
+#ifdef __KERNEL_DEBUG__
+		ccl_global DebugData *debug_data = &debugdata_coop[ray_index];
+#endif
 		ccl_global PathState *state = &PathState_coop[ray_index];
 		ccl_global PathRadiance *L = L = &PathRadiance_coop[ray_index];
 		ccl_global Ray *ray = &Ray_coop[ray_index];
@@ -218,7 +224,7 @@ __kernel void kernel_ocl_path_trace_Background_BufferUpdate_SPLIT_KERNEL(
 			float3 L_sum = path_radiance_clamp_and_sum(kg, L);
 			kernel_write_light_passes(kg, per_sample_output_buffers, L, sample);
 #ifdef __KERNEL_DEBUG__
-			kernel_write_debug_passes(kg, buffer, &state, &debug_data, sample);
+			kernel_write_debug_passes(kg, per_sample_output_buffers, state, debug_data, sample);
 #endif
 			float4 L_rad = make_float4(L_sum.x, L_sum.y, L_sum.z, 1.0f - (*L_transparent));
 
@@ -275,8 +281,7 @@ __kernel void kernel_ocl_path_trace_Background_BufferUpdate_SPLIT_KERNEL(
 					path_radiance_init(L, kernel_data.film.use_light_pass);
 					path_state_init(kg, state, rng, sample, ray);
 #ifdef __KERNEL_DEBUG__
-					DebugData debug_data;
-					debug_data_init(&debug_data);
+					debug_data_init(debug_data);
 #endif
 					ASSIGN_RAY_STATE(ray_state, ray_index, RAY_REGENERATED);
 					enqueue_flag = 1;
diff --git a/intern/cycles/kernel/kernel_DataInit.cl b/intern/cycles/kernel/kernel_DataInit.cl
index d852410..423dcd0 100644
--- a/intern/cycles/kernel/kernel_DataInit.cl
+++ b/intern/cycles/kernel/kernel_DataInit.cl
@@ -189,6 +189,9 @@ __kernel void kernel_ocl_path_trace_data_initialization_SPLIT_KERNEL(
 	ccl_global unsigned int *work_pool_wgs,      /* Work pool for each work group */
 	unsigned int num_samples,                    /* Total number of samples per pixel */
 #endif
+#ifdef __KERNEL_DEBUG__
+	ccl_global DebugData *debugdata_coop,
+#endif
 	int parallel_samples                         /* Number of samples to be processed in parallel */
 	)
 {
@@ -384,10 +387,8 @@ __kernel void kernel_ocl_path_trace_data_initialization_SPLIT_KERNEL(
 			L_transparent_coop[ray_index] = 0.0f;
 			path_radiance_init(&PathRadiance_coop[ray_index], kernel_data.film.use_light_pass);
 			path_state_init(kg, &PathState_coop[ray_index], &rng_coop[ray_index], my_sample, &Ray_coop[ray_index]);
-			/* __KERNEL_DEBUG__ is disabled */
 #ifdef __KERNEL_DEBUG__
-			DebugData debug_data;
-			debug_data_init(&debug_data);
+			debug_data_init(&debugdata_coop[ray_index]);
 #endif
 		} else {
 			/*These rays do not participate in path-iteration */
diff --git a/intern/cycles/kernel/kernel_SceneIntersect.cl b/intern/cycles/kernel/kernel_SceneIntersect.cl
index 58ce02b..3db25af 100644
--- a/intern/cycles/kernel/kernel_SceneIntersect.cl
+++ b/intern/cycles/kernel/kernel_SceneIntersect.cl
@@ -76,6 +76,9 @@ __kernel void kernel_ocl_path_trace_SceneIntersect_SPLIT_KERNEL(
 	ccl_global int *Queue_index,                /* Tracks the number of elements in queues */
 	int queuesize,                              /* Size (capacity) of queues */
 	ccl_global char *use_queues_flag,           /* used to decide if this kernel should use queues to fetch ray index */
+#ifdef __KERNEL_DEBUG__
+	ccl_global DebugData *debugdata_coop,
+#endif
 	int parallel_samples                        /* Number of samples to be processed in parallel */
 	)
 {
@@ -115,6 +118,9 @@ __kernel void kernel_ocl_path_trace_SceneIntersect_SPLIT_KERNEL(
 	/* Load kernel globals structure */
 	ccl_global KernelGlobals *kg = (ccl_global KernelGlobals *)globals;
 
+#ifdef __KERNEL_DEBUG__
+	ccl_global DebugData *debug_data = &debugdata_coop[ray_index];
+#endif
 	ccl_global Intersection *isect = &Intersection_coop[ray_index];
 	PathState state = PathState_coop[ray_index];
 	Ray ray = Ray_coop[ray_index];
@@ -145,7 +151,7 @@ __kernel void kernel_ocl_path_trace_SceneIntersect_SPLIT_KERNEL(
 
 #ifdef __KERNEL_DEBUG__
 	if(state.flag & PATH_RAY_CAMERA) {
-		debug_data.num_bvh_traversal_steps += isect.num_traversal_steps;
+		debug_data->num_bvh_traversal_steps += isect->num_traversal_steps;
 	}
 #endif
 
diff --git a/intern/cycles/kernel/kernel_debug.h b/intern/cycles/kernel/kernel_debug.h
index f532442..7ecb097 100644
--- a/intern/cycles/kernel/kernel_debug.h
+++ b/intern/cycles/kernel/kernel_debug.h
@@ -16,15 +16,15 @@
 
 CCL_NAMESPACE_BEGIN
 
-ccl_device_inline void debug_data_init(DebugData *debug_data)
+ccl_device_inline void debug_data_init(__ADDR_SPACE__ DebugData *debug_data)
 {
 	debug_data->num_bvh_traversal_steps = 0;
 }
 
-ccl_device_inline void kernel_write_debug_passes(KernelGlobals *kg,
+ccl_device_inline void kernel_write_debug_passes(__ADDR_SPACE__ KernelGlobals *kg,
                                                  ccl_global float *bu

@@ Diff output truncated at 10240 characters. @@




More information about the Bf-blender-cvs mailing list