[Bf-blender-cvs] [436552d] cycles_kernel_split: Support__KERNEL_DEBUG__ flag
varunsundar08
noreply at git.blender.org
Thu Apr 30 23:25:24 CEST 2015
Commit: 436552d2ffcb4699adc419ccdaf0a1ed92d635b9
Author: varunsundar08
Date: Wed Apr 29 23:11:17 2015 +0530
Branches: cycles_kernel_split
https://developer.blender.org/rB436552d2ffcb4699adc419ccdaf0a1ed92d635b9
Support__KERNEL_DEBUG__ flag
===================================================================
M intern/cycles/device/device_opencl.cpp
M intern/cycles/kernel/kernel_Background_BufferUpdate.cl
M intern/cycles/kernel/kernel_DataInit.cl
M intern/cycles/kernel/kernel_SceneIntersect.cl
M intern/cycles/kernel/kernel_debug.h
===================================================================
diff --git a/intern/cycles/device/device_opencl.cpp b/intern/cycles/device/device_opencl.cpp
index 6d4cf1c..5e8b171 100644
--- a/intern/cycles/device/device_opencl.cpp
+++ b/intern/cycles/device/device_opencl.cpp
@@ -130,7 +130,6 @@ static string opencl_kernel_build_options(const string& platform, const string *
build_options += "-g -s \"" + *debug_src + "\"";
}
- /* TODO : support KERNEL_DEBUG for split kernel */
if(opencl_kernel_use_debug())
build_options += "-D__KERNEL_OPENCL_DEBUG__ ";
@@ -1280,6 +1279,11 @@ public:
cl_mem Intersection_coop_AO;
cl_mem Intersection_coop_DL;
+#ifdef WITH_CYCLES_DEBUG
+ /* DebugData memory */
+ cl_mem debugdata_coop;
+#endif
+
/* Global state array that tracks ray state */
cl_mem ray_state;
@@ -1320,6 +1324,10 @@ public:
size_t Intersection_coop_AO_size;
size_t Intersection_coop_DL_size;
+#ifdef WITH_CYCLES_DEBUG
+ size_t debugdata_size;
+#endif
+
/* Amount of memory in output buffer associated with one pixel/thread */
size_t per_thread_output_buffer_size;
@@ -1528,6 +1536,10 @@ public:
Intersection_coop_AO = NULL;
Intersection_coop_DL = NULL;
+#ifdef WITH_CYCLES_DEBUG
+ debugdata_coop = NULL;
+#endif
+
work_array = NULL;
/* Queue */
@@ -1567,6 +1579,10 @@ public:
Intersection_coop_AO_size = sizeof(Intersection);
Intersection_coop_DL_size = sizeof(Intersection);
+#ifdef WITH_CYCLES_DEBUG
+ debugdata_size = sizeof(DebugData);
+#endif
+
per_thread_output_buffer_size = 0;
hostRayStateArray = NULL;
PathIteration_times = PATH_ITER_INC_FACTOR;
@@ -2685,6 +2701,11 @@ public:
if (Intersection_coop_DL != NULL)
clReleaseMemObject(Intersection_coop_DL);
+#ifdef WITH_CYCLES_DEBUG
+ if(debugdata_coop != NULL)
+ clReleaseMemObject(debugdata_coop);
+#endif
+
if (use_queues_flag != NULL)
clReleaseMemObject(use_queues_flag);
@@ -3174,6 +3195,11 @@ public:
Intersection_coop_DL = clCreateBuffer(cxContext, CL_MEM_READ_WRITE, num_global_elements * Intersection_coop_DL_size, NULL, &ciErr);
assert(ciErr == CL_SUCCESS && "Can't create Intersection_coop_DL_memory");
+#ifdef WITH_CYCLES_DEBUG
+ debugdata_coop = clCreateBuffer(cxContext, CL_MEM_READ_WRITE, num_global_elements * debugdata_size, NULL, &ciErr);
+ assert(ciErr == CL_SUCCESS && "Can't create debugdata_coop memory");
+#endif
+
ray_state = clCreateBuffer(cxContext, CL_MEM_READ_WRITE, num_global_elements * rayState_size, NULL, &ciErr);
assert(ciErr == CL_SUCCESS && "Can't create ray_state memory");
@@ -3312,6 +3338,9 @@ public:
opencl_assert(clSetKernelArg(ckPathTraceKernel_DataInit_SPLIT_KERNEL, narg++, sizeof(work_pool_wgs), (void*)&work_pool_wgs));
opencl_assert(clSetKernelArg(ckPathTraceKernel_DataInit_SPLIT_KERNEL, narg++, sizeof(num_samples), (void*)&num_samples));
#endif
+#ifdef WITH_CYCLES_DEBUG
+ opencl_assert(clSetKernelArg(ckPathTraceKernel_DataInit_SPLIT_KERNEL, narg++, sizeof(debugdata_coop), (void*)&debugdata_coop));
+#endif
opencl_assert(clSetKernelArg(ckPathTraceKernel_DataInit_SPLIT_KERNEL, narg++, sizeof(num_parallel_samples), (void*)&num_parallel_samples));
/* Set arguments for ckPathTraceKernel_SceneIntersect_SPLIT_KERNEL */;
@@ -3330,6 +3359,9 @@ public:
opencl_assert(clSetKernelArg(ckPathTraceKernel_SceneIntersect_SPLIT_KERNEL, narg++, sizeof(Queue_index), (void*)&Queue_index));
opencl_assert(clSetKernelArg(ckPathTraceKernel_SceneIntersect_SPLIT_KERNEL, narg++, sizeof(dQueue_size), (void*)&dQueue_size));
opencl_assert(clSetKernelArg(ckPathTraceKernel_SceneIntersect_SPLIT_KERNEL, narg++, sizeof(use_queues_flag), (void*)&use_queues_flag));
+#ifdef WITH_CYCLES_DEBUG
+ opencl_assert(clSetKernelArg(ckPathTraceKernel_SceneIntersect_SPLIT_KERNEL, narg++, sizeof(debugdata_coop), (void*)&debugdata_coop));
+#endif
opencl_assert(clSetKernelArg(ckPathTraceKernel_SceneIntersect_SPLIT_KERNEL, narg++, sizeof(num_parallel_samples), (void*)&num_parallel_samples));
/* Set arguments for ckPathTracekernel_LampEmission_SPLIT_KERNEL kernel */
@@ -3392,6 +3424,9 @@ public:
opencl_assert(clSetKernelArg(ckPathTraceKernel_BG_BufferUpdate_SPLIT_KERNEL, narg++, sizeof(work_pool_wgs), (void*)&work_pool_wgs));
opencl_assert(clSetKernelArg(ckPathTraceKernel_BG_BufferUpdate_SPLIT_KERNEL, narg++, sizeof(num_samples), (void*)&num_samples));
#endif
+#ifdef WITH_CYCLES_DEBUG
+ opencl_assert(clSetKernelArg(ckPathTraceKernel_BG_BufferUpdate_SPLIT_KERNEL, narg++, sizeof(debugdata_coop), (void*)&debugdata_coop));
+#endif
opencl_assert(clSetKernelArg(ckPathTraceKernel_BG_BufferUpdate_SPLIT_KERNEL, narg++, sizeof(num_parallel_samples), (void*)&num_parallel_samples));
/* Set arguments for ckPathTraceKernel_Shader_Lighting_SPLIT_KERNEL */
diff --git a/intern/cycles/kernel/kernel_Background_BufferUpdate.cl b/intern/cycles/kernel/kernel_Background_BufferUpdate.cl
index b797edc..effef4c 100644
--- a/intern/cycles/kernel/kernel_Background_BufferUpdate.cl
+++ b/intern/cycles/kernel/kernel_Background_BufferUpdate.cl
@@ -122,6 +122,9 @@ __kernel void kernel_ocl_path_trace_Background_BufferUpdate_SPLIT_KERNEL(
ccl_global unsigned int *work_pool_wgs,
unsigned int num_samples,
#endif
+#ifdef __KERNEL_DEBUG__
+ ccl_global DebugData *debugdata_coop,
+#endif
int parallel_samples /* Number of samples to be processed in parallel */
)
{
@@ -156,6 +159,9 @@ __kernel void kernel_ocl_path_trace_Background_BufferUpdate_SPLIT_KERNEL(
ccl_global KernelGlobals *kg = (ccl_global KernelGlobals *)globals;
ccl_global ShaderData *sd = (ccl_global ShaderData *)shader_data;
+#ifdef __KERNEL_DEBUG__
+ ccl_global DebugData *debug_data = &debugdata_coop[ray_index];
+#endif
ccl_global PathState *state = &PathState_coop[ray_index];
ccl_global PathRadiance *L = L = &PathRadiance_coop[ray_index];
ccl_global Ray *ray = &Ray_coop[ray_index];
@@ -218,7 +224,7 @@ __kernel void kernel_ocl_path_trace_Background_BufferUpdate_SPLIT_KERNEL(
float3 L_sum = path_radiance_clamp_and_sum(kg, L);
kernel_write_light_passes(kg, per_sample_output_buffers, L, sample);
#ifdef __KERNEL_DEBUG__
- kernel_write_debug_passes(kg, buffer, &state, &debug_data, sample);
+ kernel_write_debug_passes(kg, per_sample_output_buffers, state, debug_data, sample);
#endif
float4 L_rad = make_float4(L_sum.x, L_sum.y, L_sum.z, 1.0f - (*L_transparent));
@@ -275,8 +281,7 @@ __kernel void kernel_ocl_path_trace_Background_BufferUpdate_SPLIT_KERNEL(
path_radiance_init(L, kernel_data.film.use_light_pass);
path_state_init(kg, state, rng, sample, ray);
#ifdef __KERNEL_DEBUG__
- DebugData debug_data;
- debug_data_init(&debug_data);
+ debug_data_init(debug_data);
#endif
ASSIGN_RAY_STATE(ray_state, ray_index, RAY_REGENERATED);
enqueue_flag = 1;
diff --git a/intern/cycles/kernel/kernel_DataInit.cl b/intern/cycles/kernel/kernel_DataInit.cl
index d852410..423dcd0 100644
--- a/intern/cycles/kernel/kernel_DataInit.cl
+++ b/intern/cycles/kernel/kernel_DataInit.cl
@@ -189,6 +189,9 @@ __kernel void kernel_ocl_path_trace_data_initialization_SPLIT_KERNEL(
ccl_global unsigned int *work_pool_wgs, /* Work pool for each work group */
unsigned int num_samples, /* Total number of samples per pixel */
#endif
+#ifdef __KERNEL_DEBUG__
+ ccl_global DebugData *debugdata_coop,
+#endif
int parallel_samples /* Number of samples to be processed in parallel */
)
{
@@ -384,10 +387,8 @@ __kernel void kernel_ocl_path_trace_data_initialization_SPLIT_KERNEL(
L_transparent_coop[ray_index] = 0.0f;
path_radiance_init(&PathRadiance_coop[ray_index], kernel_data.film.use_light_pass);
path_state_init(kg, &PathState_coop[ray_index], &rng_coop[ray_index], my_sample, &Ray_coop[ray_index]);
- /* __KERNEL_DEBUG__ is disabled */
#ifdef __KERNEL_DEBUG__
- DebugData debug_data;
- debug_data_init(&debug_data);
+ debug_data_init(&debugdata_coop[ray_index]);
#endif
} else {
/*These rays do not participate in path-iteration */
diff --git a/intern/cycles/kernel/kernel_SceneIntersect.cl b/intern/cycles/kernel/kernel_SceneIntersect.cl
index 58ce02b..3db25af 100644
--- a/intern/cycles/kernel/kernel_SceneIntersect.cl
+++ b/intern/cycles/kernel/kernel_SceneIntersect.cl
@@ -76,6 +76,9 @@ __kernel void kernel_ocl_path_trace_SceneIntersect_SPLIT_KERNEL(
ccl_global int *Queue_index, /* Tracks the number of elements in queues */
int queuesize, /* Size (capacity) of queues */
ccl_global char *use_queues_flag, /* used to decide if this kernel should use queues to fetch ray index */
+#ifdef __KERNEL_DEBUG__
+ ccl_global DebugData *debugdata_coop,
+#endif
int parallel_samples /* Number of samples to be processed in parallel */
)
{
@@ -115,6 +118,9 @@ __kernel void kernel_ocl_path_trace_SceneIntersect_SPLIT_KERNEL(
/* Load kernel globals structure */
ccl_global KernelGlobals *kg = (ccl_global KernelGlobals *)globals;
+#ifdef __KERNEL_DEBUG__
+ ccl_global DebugData *debug_data = &debugdata_coop[ray_index];
+#endif
ccl_global Intersection *isect = &Intersection_coop[ray_index];
PathState state = PathState_coop[ray_index];
Ray ray = Ray_coop[ray_index];
@@ -145,7 +151,7 @@ __kernel void kernel_ocl_path_trace_SceneIntersect_SPLIT_KERNEL(
#ifdef __KERNEL_DEBUG__
if(state.flag & PATH_RAY_CAMERA) {
- debug_data.num_bvh_traversal_steps += isect.num_traversal_steps;
+ debug_data->num_bvh_traversal_steps += isect->num_traversal_steps;
}
#endif
diff --git a/intern/cycles/kernel/kernel_debug.h b/intern/cycles/kernel/kernel_debug.h
index f532442..7ecb097 100644
--- a/intern/cycles/kernel/kernel_debug.h
+++ b/intern/cycles/kernel/kernel_debug.h
@@ -16,15 +16,15 @@
CCL_NAMESPACE_BEGIN
-ccl_device_inline void debug_data_init(DebugData *debug_data)
+ccl_device_inline void debug_data_init(__ADDR_SPACE__ DebugData *debug_data)
{
debug_data->num_bvh_traversal_steps = 0;
}
-ccl_device_inline void kernel_write_debug_passes(KernelGlobals *kg,
+ccl_device_inline void kernel_write_debug_passes(__ADDR_SPACE__ KernelGlobals *kg,
ccl_global float *bu
@@ Diff output truncated at 10240 characters. @@
More information about the Bf-blender-cvs
mailing list