[Bf-blender-cvs] [0069b48] cycles_kernel_split: Record buffer and rng_state offsets in RenderTile

varunsundar08 noreply at git.blender.org
Wed Apr 15 17:36:54 CEST 2015


Commit: 0069b484b31b647a19a220761c34db550d6eb5dc
Author: varunsundar08
Date:   Tue Apr 7 19:30:06 2015 +0530
Branches: cycles_kernel_split
https://developer.blender.org/rB0069b484b31b647a19a220761c34db550d6eb5dc

Record buffer and rng_state offsets in RenderTile

===================================================================

M	intern/cycles/device/device_opencl.cpp
M	intern/cycles/kernel/kernel_Background_BufferUpdate.cl
M	intern/cycles/kernel/kernel_DataInit.cl
M	intern/cycles/kernel/kernel_SumAllRadiance.cl
M	intern/cycles/render/buffers.h

===================================================================

diff --git a/intern/cycles/device/device_opencl.cpp b/intern/cycles/device/device_opencl.cpp
index 6051782..1fc765c 100644
--- a/intern/cycles/device/device_opencl.cpp
+++ b/intern/cycles/device/device_opencl.cpp
@@ -3049,6 +3049,8 @@ One possible tile size is %zux%zu \n", tile_max_x - local_size[0] , tile_max_y -
 		opencl_assert(clSetKernelArg(ckPathTraceKernel_DataInit_SPLIT_KERNEL, narg++, sizeof(d_h), (void*)&d_h));
 		opencl_assert(clSetKernelArg(ckPathTraceKernel_DataInit_SPLIT_KERNEL, narg++, sizeof(d_offset), (void*)&d_offset));
 		opencl_assert(clSetKernelArg(ckPathTraceKernel_DataInit_SPLIT_KERNEL, narg++, sizeof(d_stride), (void*)&d_stride));
+		opencl_assert(clSetKernelArg(ckPathTraceKernel_DataInit_SPLIT_KERNEL, narg++, sizeof(rtile.rng_state_offset_x), (void*)&(rtile.rng_state_offset_x)));
+		opencl_assert(clSetKernelArg(ckPathTraceKernel_DataInit_SPLIT_KERNEL, narg++, sizeof(rtile.rng_state_offset_y), (void*)&(rtile.rng_state_offset_y)));
 		opencl_assert(clSetKernelArg(ckPathTraceKernel_DataInit_SPLIT_KERNEL, narg++, sizeof(Queue_data), (void*)&Queue_data));
 		opencl_assert(clSetKernelArg(ckPathTraceKernel_DataInit_SPLIT_KERNEL, narg++, sizeof(Queue_index), (void*)&Queue_index));
 		opencl_assert(clSetKernelArg(ckPathTraceKernel_DataInit_SPLIT_KERNEL, narg++, sizeof(dQueue_size), (void*)&dQueue_size));
@@ -3125,6 +3127,8 @@ One possible tile size is %zux%zu \n", tile_max_x - local_size[0] , tile_max_y -
 		opencl_assert(clSetKernelArg(ckPathTraceKernel_BG_BufferUpdate_SPLIT_KERNEL, narg++, sizeof(d_x), (void*)&d_x));
 		opencl_assert(clSetKernelArg(ckPathTraceKernel_BG_BufferUpdate_SPLIT_KERNEL, narg++, sizeof(d_y), (void*)&d_y));
 		opencl_assert(clSetKernelArg(ckPathTraceKernel_BG_BufferUpdate_SPLIT_KERNEL, narg++, sizeof(d_stride), (void*)&d_stride));
+		opencl_assert(clSetKernelArg(ckPathTraceKernel_BG_BufferUpdate_SPLIT_KERNEL, narg++, sizeof(rtile.rng_state_offset_x), (void*)&(rtile.rng_state_offset_x)));
+		opencl_assert(clSetKernelArg(ckPathTraceKernel_BG_BufferUpdate_SPLIT_KERNEL, narg++, sizeof(rtile.rng_state_offset_y), (void*)&(rtile.rng_state_offset_y)));
 		opencl_assert(clSetKernelArg(ckPathTraceKernel_BG_BufferUpdate_SPLIT_KERNEL, narg++, sizeof(work_array), (void*)&work_array));
 		opencl_assert(clSetKernelArg(ckPathTraceKernel_BG_BufferUpdate_SPLIT_KERNEL, narg++, sizeof(Queue_data), (void*)&Queue_data));
 		opencl_assert(clSetKernelArg(ckPathTraceKernel_BG_BufferUpdate_SPLIT_KERNEL, narg++, sizeof(Queue_index), (void*)&Queue_index));
@@ -3266,6 +3270,8 @@ One possible tile size is %zux%zu \n", tile_max_x - local_size[0] , tile_max_y -
 		opencl_assert(clSetKernelArg(ckPathTraceKernel_SumAllRadiance_SPLIT_KERNEL, narg++, sizeof(d_w), (void *)&d_w));
 		opencl_assert(clSetKernelArg(ckPathTraceKernel_SumAllRadiance_SPLIT_KERNEL, narg++, sizeof(d_h), (void *)&d_h));
 		opencl_assert(clSetKernelArg(ckPathTraceKernel_SumAllRadiance_SPLIT_KERNEL, narg++, sizeof(d_stride), (void *)&d_stride));
+		opencl_assert(clSetKernelArg(ckPathTraceKernel_SumAllRadiance_SPLIT_KERNEL, narg++, sizeof(rtile.buffer_offset_x), (void *)&(rtile.buffer_offset_x)));
+		opencl_assert(clSetKernelArg(ckPathTraceKernel_SumAllRadiance_SPLIT_KERNEL, narg++, sizeof(rtile.buffer_offset_y), (void *)&(rtile.buffer_offset_y)));
 		opencl_assert(clSetKernelArg(ckPathTraceKernel_SumAllRadiance_SPLIT_KERNEL, narg++, sizeof(start_sample), (void*)&start_sample));
 
 		/* Enqueue ckPathTraceKernel_DataInit_SPLIT_KERNEL kernel */
@@ -3684,6 +3690,10 @@ One possible tile size is %zux%zu \n", tile_max_x - local_size[0] , tile_max_y -
 			for (int tile_iter_x = 0; tile_iter_x < num_tiles_x; tile_iter_x++) {
 				int rtile_index = tile_iter_y * num_tiles_x + tile_iter_x;
 
+				to_path_trace_rtile[rtile_index].rng_state_offset_x = tile_iter_x * render_feasible_tile_size.x;
+				to_path_trace_rtile[rtile_index].rng_state_offset_y = tile_iter_y * render_feasible_tile_size.y;
+				to_path_trace_rtile[rtile_index].buffer_offset_x = tile_iter_x * render_feasible_tile_size.x;
+				to_path_trace_rtile[rtile_index].buffer_offset_y = tile_iter_y * render_feasible_tile_size.y;
 				to_path_trace_rtile[rtile_index].start_sample = rtile.start_sample;
 				to_path_trace_rtile[rtile_index].num_samples = rtile.num_samples;
 				to_path_trace_rtile[rtile_index].sample = rtile.sample;
@@ -3723,6 +3733,11 @@ One possible tile size is %zux%zu \n", tile_max_x - local_size[0] , tile_max_y -
 			while(task->acquire_tile(this, tile)) {
 
 #ifdef __SPLIT_KERNEL__
+				tile.buffer_offset_x = 0;
+				tile.buffer_offset_y = 0;
+				tile.rng_state_offset_x = 0;
+				tile.rng_state_offset_y = 0;
+
 				/* The second argument is dummy */
 				path_trace(tile, 0);
 				tile.sample = tile.start_sample + tile.num_samples;
diff --git a/intern/cycles/kernel/kernel_Background_BufferUpdate.cl b/intern/cycles/kernel/kernel_Background_BufferUpdate.cl
index 7c9999b..bb32791 100644
--- a/intern/cycles/kernel/kernel_Background_BufferUpdate.cl
+++ b/intern/cycles/kernel/kernel_Background_BufferUpdate.cl
@@ -109,6 +109,8 @@ __kernel void kernel_ocl_path_trace_Background_BufferUpdate_SPLIT_KERNEL(
 	ccl_global float *L_transparent_coop,        /* Required for background hit processing and buffer Update */
 	ccl_global char *ray_state,                  /* Stores information on the current state of a ray */
 	int sw, int sh, int sx, int sy, int stride,
+	int rng_state_offset_x,
+	int rng_state_offset_y,
 	ccl_global unsigned int *work_array,         /* Denotes work of each ray */
 	ccl_global int *Queue_data,                  /* Queues memory */
 	ccl_global int *Queue_index,                 /* Tracks the number of elements in each queue */
@@ -187,7 +189,7 @@ __kernel void kernel_ocl_path_trace_Background_BufferUpdate_SPLIT_KERNEL(
 		tile_y = tile_index / sw;
 		my_sample_tile = ray_index - (tile_index * parallel_samples);
 #endif
-		rng_state += tile_x + tile_y * stride;
+		rng_state += (rng_state_offset_x + tile_x) + (rng_state_offset_y + tile_y) * stride;
 		per_sample_output_buffers += (((tile_x + (tile_y * stride)) * parallel_samples) + my_sample_tile) * kernel_data.film.pass_stride;
 
 		if(IS_STATE(ray_state, ray_index, RAY_HIT_BACKGROUND)) {
@@ -249,7 +251,7 @@ __kernel void kernel_ocl_path_trace_Background_BufferUpdate_SPLIT_KERNEL(
 				my_sample_tile = 0;
 
 				/* Remap rng_state according to the current work */
-				rng_state = initial_rng + (tile_x + tile_y * stride);
+				rng_state = initial_rng + ((rng_state_offset_x + tile_x) + (rng_state_offset_y + tile_y) * stride);
 				/* Remap per_sample_output_buffers according to the current work */
 				per_sample_output_buffers = initial_per_sample_output_buffers
 											+ (((tile_x + (tile_y * stride)) * parallel_samples) + my_sample_tile) * kernel_data.film.pass_stride;
diff --git a/intern/cycles/kernel/kernel_DataInit.cl b/intern/cycles/kernel/kernel_DataInit.cl
index f1c9001..9e86b9b 100644
--- a/intern/cycles/kernel/kernel_DataInit.cl
+++ b/intern/cycles/kernel/kernel_DataInit.cl
@@ -204,6 +204,8 @@ __kernel void kernel_ocl_path_trace_data_initialization_SPLIT_KERNEL(
 #include "kernel_textures.h"
 
 	int start_sample, int sx, int sy, int sw, int sh, int offset, int stride,
+	int rng_state_offset_x,
+	int rng_state_offset_y,
 	ccl_global int *Queue_data,                  /* Memory for queues */
 	ccl_global int *Queue_index,                 /* Tracks the number of elements in queues */
 	int queuesize,                               /* size (capacity) of the queue */
@@ -417,7 +419,7 @@ __kernel void kernel_ocl_path_trace_data_initialization_SPLIT_KERNEL(
 		pixel_y = sy + tile_y;
 #endif // __WORK_STEALING__
 
-		rng_state += tile_x + tile_y * stride;
+		rng_state += (rng_state_offset_x + tile_x) + (rng_state_offset_y + tile_y) * stride;
 
 		/* Initialise per_sample_output_buffers to all zeros */
 		per_sample_output_buffers += (((tile_x + (tile_y * stride)) * parallel_samples) + (my_sample_tile)) * kernel_data.film.pass_stride;
diff --git a/intern/cycles/kernel/kernel_SumAllRadiance.cl b/intern/cycles/kernel/kernel_SumAllRadiance.cl
index 440bc6f..a9a43ed 100644
--- a/intern/cycles/kernel/kernel_SumAllRadiance.cl
+++ b/intern/cycles/kernel/kernel_SumAllRadiance.cl
@@ -30,13 +30,15 @@ __kernel void kernel_ocl_path_trace_SumAllRadiance_SPLIT_KERNEL(
 	ccl_global float *buffer,                    /* Output buffer of RenderTile */
 	ccl_global float *per_sample_output_buffer,  /* Radiance contributed by all samples */
 	int parallel_samples, int sw, int sh, int stride,
+	int buffer_offset_x,
+	int buffer_offset_y,
 	int start_sample)
 {
 	int x = get_global_id(0);
 	int y = get_global_id(1);
 
 	if(x < sw && y < sh) {
-		buffer += (x + y * stride) * (data->film.pass_stride);
+		buffer += ((buffer_offset_x + x) + (buffer_offset_y + y) * stride) * (data->film.pass_stride);
 		per_sample_output_buffer += ((x + y * stride) * parallel_samples) * (data->film.pass_stride);
 
 		int sample_stride = (data->film.pass_stride);
diff --git a/intern/cycles/render/buffers.h b/intern/cycles/render/buffers.h
index afff012..8f224f1 100644
--- a/intern/cycles/render/buffers.h
+++ b/intern/cycles/render/buffers.h
@@ -141,6 +141,11 @@ public:
 
 	/* user set tile-size */
 	int2 tile_size;
+	/* Used in split kernel */
+	int buffer_offset_x;
+	int buffer_offset_y;
+	int rng_state_offset_x;
+	int rng_state_offset_y;
 
 	device_ptr buffer;
 	device_ptr rng_state;




More information about the Bf-blender-cvs mailing list