[Bf-blender-cvs] [0161dd62b2] cycles_split_kernel: Cycles: Zero split kernel state memory outside of kernel

Mai Lavelle noreply at git.blender.org
Thu Jan 26 06:36:59 CET 2017


Commit: 0161dd62b21a040b990b11e7c2afafe199bcdf14
Author: Mai Lavelle
Date:   Thu Jan 26 00:05:55 2017 -0500
Branches: cycles_split_kernel
https://developer.blender.org/rB0161dd62b21a040b990b11e7c2afafe199bcdf14

Cycles: Zero split kernel state memory outside of kernel

Fixes problem seen with CUDA. The global work size wasn't large enough for the
data_init kernel to zero the full buffers, so now its done from the host.

===================================================================

M	intern/cycles/device/device_split_kernel.cpp
M	intern/cycles/device/device_split_kernel.h
M	intern/cycles/kernel/split/kernel_data_init.h

===================================================================

diff --git a/intern/cycles/device/device_split_kernel.cpp b/intern/cycles/device/device_split_kernel.cpp
index e47b65b459..57f2809f08 100644
--- a/intern/cycles/device/device_split_kernel.cpp
+++ b/intern/cycles/device/device_split_kernel.cpp
@@ -194,6 +194,12 @@ bool DeviceSplitKernel::path_trace(DeviceTask *task,
 			return false;
 		}
 
+		/* reset state memory here as global size for data_init
+		 * kernel might not be large enough to do in kernel
+		 */
+		device->mem_zero(work_pool_wgs);
+		device->mem_zero(split_data);
+
 		if(!device->enqueue_split_kernel_data_init(KernelDimensions(global_size, local_size),
 		                                           subtile,
 		                                           num_global_elements,
diff --git a/intern/cycles/device/device_split_kernel.h b/intern/cycles/device/device_split_kernel.h
index 92dfb7f099..7c9db7fbd1 100644
--- a/intern/cycles/device/device_split_kernel.h
+++ b/intern/cycles/device/device_split_kernel.h
@@ -50,7 +50,7 @@ private:
 	 * memory.
 	 */
 	device_memory kgbuffer;
-	device_memory split_data;
+	device_vector<uchar> split_data;
 	device_vector<uchar> ray_state;
 	device_memory queue_index; /* Array of size num_queues * sizeof(int) that tracks the size of each queue. */
 
@@ -61,7 +61,7 @@ private:
 	double avg_time_per_sample;
 
 	/* Work pool with respect to each work group. */
-	device_memory work_pool_wgs;
+	device_vector<uchar> work_pool_wgs;
 
 	/* clos_max value for which the kernels have been loaded currently. */
 	int current_max_closure;
diff --git a/intern/cycles/kernel/split/kernel_data_init.h b/intern/cycles/kernel/split/kernel_data_init.h
index 56a334d384..10f90c5196 100644
--- a/intern/cycles/kernel/split/kernel_data_init.h
+++ b/intern/cycles/kernel/split/kernel_data_init.h
@@ -127,12 +127,6 @@ void KERNEL_FUNCTION_FULL_NAME(data_init)(
 
 	int thread_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0);
 
-	/* Initialize work_pools */
-	if(thread_index < kernel_num_work_pools(kg)) {
-		work_pools[thread_index] = 0;
-	}
-	ccl_barrier(CCL_LOCAL_MEM_FENCE);
-
 	/* Initialize queue data and queue index. */
 	if(thread_index < queuesize) {
 		/* Initialize active ray queue. */
@@ -193,19 +187,9 @@ void KERNEL_FUNCTION_FULL_NAME(data_init)(
 
 	rng_state += (rng_state_offset_x + tile_x) + (rng_state_offset_y + tile_y) * rng_state_stride;
 
-
-	/* Initialise per_sample_output_buffers to all zeros. */
 	ccl_global float *per_sample_output_buffers = kernel_split_state.per_sample_output_buffers;
 	per_sample_output_buffers += ((tile_x + (tile_y * stride)) + (my_sample_tile)) * kernel_data.film.pass_stride;
 
-	int per_sample_output_buffers_iterator = 0;
-	for(per_sample_output_buffers_iterator = 0;
-	    per_sample_output_buffers_iterator < kernel_data.film.pass_stride;
-	    per_sample_output_buffers_iterator++)
-	{
-		per_sample_output_buffers[per_sample_output_buffers_iterator] = 0.0f;
-	}
-
 	/* Initialize random numbers and ray. */
 	kernel_path_trace_setup(kg,
 	                        rng_state,




More information about the Bf-blender-cvs mailing list