[Bf-blender-cvs] [0161dd62b2] cycles_split_kernel: Cycles: Zero split kernel state memory outside of kernel
Mai Lavelle
noreply at git.blender.org
Thu Jan 26 06:36:59 CET 2017
Commit: 0161dd62b21a040b990b11e7c2afafe199bcdf14
Author: Mai Lavelle
Date: Thu Jan 26 00:05:55 2017 -0500
Branches: cycles_split_kernel
https://developer.blender.org/rB0161dd62b21a040b990b11e7c2afafe199bcdf14
Cycles: Zero split kernel state memory outside of kernel
Fixes problem seen with CUDA. The global work size wasn't large enough for the
data_init kernel to zero the full buffers, so now its done from the host.
===================================================================
M intern/cycles/device/device_split_kernel.cpp
M intern/cycles/device/device_split_kernel.h
M intern/cycles/kernel/split/kernel_data_init.h
===================================================================
diff --git a/intern/cycles/device/device_split_kernel.cpp b/intern/cycles/device/device_split_kernel.cpp
index e47b65b459..57f2809f08 100644
--- a/intern/cycles/device/device_split_kernel.cpp
+++ b/intern/cycles/device/device_split_kernel.cpp
@@ -194,6 +194,12 @@ bool DeviceSplitKernel::path_trace(DeviceTask *task,
return false;
}
+ /* reset state memory here as global size for data_init
+ * kernel might not be large enough to do in kernel
+ */
+ device->mem_zero(work_pool_wgs);
+ device->mem_zero(split_data);
+
if(!device->enqueue_split_kernel_data_init(KernelDimensions(global_size, local_size),
subtile,
num_global_elements,
diff --git a/intern/cycles/device/device_split_kernel.h b/intern/cycles/device/device_split_kernel.h
index 92dfb7f099..7c9db7fbd1 100644
--- a/intern/cycles/device/device_split_kernel.h
+++ b/intern/cycles/device/device_split_kernel.h
@@ -50,7 +50,7 @@ private:
* memory.
*/
device_memory kgbuffer;
- device_memory split_data;
+ device_vector<uchar> split_data;
device_vector<uchar> ray_state;
device_memory queue_index; /* Array of size num_queues * sizeof(int) that tracks the size of each queue. */
@@ -61,7 +61,7 @@ private:
double avg_time_per_sample;
/* Work pool with respect to each work group. */
- device_memory work_pool_wgs;
+ device_vector<uchar> work_pool_wgs;
/* clos_max value for which the kernels have been loaded currently. */
int current_max_closure;
diff --git a/intern/cycles/kernel/split/kernel_data_init.h b/intern/cycles/kernel/split/kernel_data_init.h
index 56a334d384..10f90c5196 100644
--- a/intern/cycles/kernel/split/kernel_data_init.h
+++ b/intern/cycles/kernel/split/kernel_data_init.h
@@ -127,12 +127,6 @@ void KERNEL_FUNCTION_FULL_NAME(data_init)(
int thread_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0);
- /* Initialize work_pools */
- if(thread_index < kernel_num_work_pools(kg)) {
- work_pools[thread_index] = 0;
- }
- ccl_barrier(CCL_LOCAL_MEM_FENCE);
-
/* Initialize queue data and queue index. */
if(thread_index < queuesize) {
/* Initialize active ray queue. */
@@ -193,19 +187,9 @@ void KERNEL_FUNCTION_FULL_NAME(data_init)(
rng_state += (rng_state_offset_x + tile_x) + (rng_state_offset_y + tile_y) * rng_state_stride;
-
- /* Initialise per_sample_output_buffers to all zeros. */
ccl_global float *per_sample_output_buffers = kernel_split_state.per_sample_output_buffers;
per_sample_output_buffers += ((tile_x + (tile_y * stride)) + (my_sample_tile)) * kernel_data.film.pass_stride;
- int per_sample_output_buffers_iterator = 0;
- for(per_sample_output_buffers_iterator = 0;
- per_sample_output_buffers_iterator < kernel_data.film.pass_stride;
- per_sample_output_buffers_iterator++)
- {
- per_sample_output_buffers[per_sample_output_buffers_iterator] = 0.0f;
- }
-
/* Initialize random numbers and ray. */
kernel_path_trace_setup(kg,
rng_state,
More information about the Bf-blender-cvs
mailing list