[Bf-blender-cvs] [70501a4] cycles_split_kernel: Cycles: Rearrange split kernel code to move all logic out of OpenCL kernel files
Mai Lavelle
noreply at git.blender.org
Tue Oct 25 15:21:06 CEST 2016
Commit: 70501a43c663b62a20a53f0c67b385c60bb1ffba
Author: Mai Lavelle
Date: Tue Oct 25 15:03:53 2016 +0200
Branches: cycles_split_kernel
https://developer.blender.org/rB70501a43c663b62a20a53f0c67b385c60bb1ffba
Cycles: Rearrange split kernel code to move all logic out of OpenCL kernel files
We need all logic in a place that can be used when building for other devices
besides OpenCL. This leaves things a little messy at the moment, code
deduplication and general clean up can happen later.
===================================================================
M intern/cycles/kernel/CMakeLists.txt
M intern/cycles/kernel/kernels/opencl/kernel_background_buffer_update.cl
M intern/cycles/kernel/kernels/opencl/kernel_direct_lighting.cl
M intern/cycles/kernel/kernels/opencl/kernel_holdout_emission_blurring_pathtermination_ao.cl
M intern/cycles/kernel/kernels/opencl/kernel_lamp_emission.cl
M intern/cycles/kernel/kernels/opencl/kernel_next_iteration_setup.cl
M intern/cycles/kernel/kernels/opencl/kernel_queue_enqueue.cl
M intern/cycles/kernel/kernels/opencl/kernel_scene_intersect.cl
M intern/cycles/kernel/kernels/opencl/kernel_shader_eval.cl
M intern/cycles/kernel/kernels/opencl/kernel_shadow_blocked.cl
M intern/cycles/kernel/kernels/opencl/kernel_sum_all_radiance.cl
M intern/cycles/kernel/split/kernel_background_buffer_update.h
M intern/cycles/kernel/split/kernel_direct_lighting.h
M intern/cycles/kernel/split/kernel_holdout_emission_blurring_pathtermination_ao.h
M intern/cycles/kernel/split/kernel_lamp_emission.h
M intern/cycles/kernel/split/kernel_next_iteration_setup.h
A intern/cycles/kernel/split/kernel_queue_enqueue.h
M intern/cycles/kernel/split/kernel_scene_intersect.h
M intern/cycles/kernel/split/kernel_shader_eval.h
M intern/cycles/kernel/split/kernel_shadow_blocked.h
M intern/cycles/kernel/split/kernel_sum_all_radiance.h
===================================================================
diff --git a/intern/cycles/kernel/CMakeLists.txt b/intern/cycles/kernel/CMakeLists.txt
index d346467..f5ec9be 100644
--- a/intern/cycles/kernel/CMakeLists.txt
+++ b/intern/cycles/kernel/CMakeLists.txt
@@ -192,6 +192,7 @@ set(SRC_SPLIT_HEADERS
split/kernel_holdout_emission_blurring_pathtermination_ao.h
split/kernel_lamp_emission.h
split/kernel_next_iteration_setup.h
+ split/kernel_queue_enqueue.h
split/kernel_scene_intersect.h
split/kernel_shader_eval.h
split/kernel_shadow_blocked.h
diff --git a/intern/cycles/kernel/kernels/opencl/kernel_background_buffer_update.cl b/intern/cycles/kernel/kernels/opencl/kernel_background_buffer_update.cl
index bb8217e..5d543a7 100644
--- a/intern/cycles/kernel/kernels/opencl/kernel_background_buffer_update.cl
+++ b/intern/cycles/kernel/kernels/opencl/kernel_background_buffer_update.cl
@@ -14,77 +14,12 @@
* limitations under the License.
*/
+#include "split/kernel_split_common.h"
#include "split/kernel_background_buffer_update.h"
__kernel void kernel_ocl_path_trace_background_buffer_update(
KernelGlobals *kg,
ccl_constant KernelData *data)
{
- ccl_local unsigned int local_queue_atomics;
- if(get_local_id(0) == 0 && get_local_id(1) == 0) {
- local_queue_atomics = 0;
- }
- barrier(CLK_LOCAL_MEM_FENCE);
-
- int ray_index = get_global_id(1) * get_global_size(0) + get_global_id(0);
- if(ray_index == 0) {
- /* We will empty this queue in this kernel. */
- split_params->queue_index[QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS] = 0;
- }
- char enqueue_flag = 0;
- ray_index = get_ray_index(ray_index,
- QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS,
- split_state->queue_data,
- split_params->queue_size,
- 1);
-
-#ifdef __COMPUTE_DEVICE_GPU__
- /* If we are executing on a GPU device, we exit all threads that are not
- * required.
- *
- * If we are executing on a CPU device, then we need to keep all threads
- * active since we have barrier() calls later in the kernel. CPU devices,
- * expect all threads to execute barrier statement.
- */
- if(ray_index == QUEUE_EMPTY_SLOT) {
- return;
- }
-#endif
-
-#ifndef __COMPUTE_DEVICE_GPU__
- if(ray_index != QUEUE_EMPTY_SLOT) {
-#endif
- enqueue_flag =
- kernel_background_buffer_update(kg,
- split_params->rng_state,
- split_params->w,
- split_params->h,
- split_params->x,
- split_params->y,
- split_params->stride,
- split_params->rng_offset_x,
- split_params->rng_offset_y,
- split_params->rng_stride,
- split_params->end_sample,
- split_params->start_sample,
-#ifdef __WORK_STEALING__
- split_params->work_pool_wgs,
- split_params->num_samples,
-#endif
- split_params->parallel_samples,
- ray_index);
-#ifndef __COMPUTE_DEVICE_GPU__
- }
-#endif
-
- /* Enqueue RAY_REGENERATED rays into QUEUE_ACTIVE_AND_REGENERATED_RAYS;
- * These rays will be made active during next SceneIntersectkernel.
- */
- enqueue_ray_index_local(ray_index,
- QUEUE_ACTIVE_AND_REGENERATED_RAYS,
- enqueue_flag,
- split_params->queue_size,
- &local_queue_atomics,
- split_state->queue_data,
- split_params->queue_index);
+ kernel_background_buffer_update(kg);
}
diff --git a/intern/cycles/kernel/kernels/opencl/kernel_direct_lighting.cl b/intern/cycles/kernel/kernels/opencl/kernel_direct_lighting.cl
index 648938b..dc83784 100644
--- a/intern/cycles/kernel/kernels/opencl/kernel_direct_lighting.cl
+++ b/intern/cycles/kernel/kernels/opencl/kernel_direct_lighting.cl
@@ -14,56 +14,12 @@
* limitations under the License.
*/
+#include "split/kernel_split_common.h"
#include "split/kernel_direct_lighting.h"
__kernel void kernel_ocl_path_trace_direct_lighting(
KernelGlobals *kg,
ccl_constant KernelData *data)
{
- ccl_local unsigned int local_queue_atomics;
- if(get_local_id(0) == 0 && get_local_id(1) == 0) {
- local_queue_atomics = 0;
- }
- barrier(CLK_LOCAL_MEM_FENCE);
-
- char enqueue_flag = 0;
- int ray_index = get_global_id(1) * get_global_size(0) + get_global_id(0);
- ray_index = get_ray_index(ray_index,
- QUEUE_ACTIVE_AND_REGENERATED_RAYS,
- split_state->queue_data,
- split_params->queue_size,
- 0);
-
-#ifdef __COMPUTE_DEVICE_GPU__
- /* If we are executing on a GPU device, we exit all threads that are not
- * required.
- *
- * If we are executing on a CPU device, then we need to keep all threads
- * active since we have barrier() calls later in the kernel. CPU devices,
- * expect all threads to execute barrier statement.
- */
- if(ray_index == QUEUE_EMPTY_SLOT) {
- return;
- }
-#endif
-
-#ifndef __COMPUTE_DEVICE_GPU__
- if(ray_index != QUEUE_EMPTY_SLOT) {
-#endif
- enqueue_flag = kernel_direct_lighting(kg, ray_index);
-
-#ifndef __COMPUTE_DEVICE_GPU__
- }
-#endif
-
-#ifdef __EMISSION__
- /* Enqueue RAY_SHADOW_RAY_CAST_DL rays. */
- enqueue_ray_index_local(ray_index,
- QUEUE_SHADOW_RAY_CAST_DL_RAYS,
- enqueue_flag,
- split_params->queue_size,
- &local_queue_atomics,
- split_state->queue_data,
- split_params->queue_index);
-#endif
+ kernel_direct_lighting(kg);
}
diff --git a/intern/cycles/kernel/kernels/opencl/kernel_holdout_emission_blurring_pathtermination_ao.cl b/intern/cycles/kernel/kernels/opencl/kernel_holdout_emission_blurring_pathtermination_ao.cl
index 6698407..5ee9037 100644
--- a/intern/cycles/kernel/kernels/opencl/kernel_holdout_emission_blurring_pathtermination_ao.cl
+++ b/intern/cycles/kernel/kernels/opencl/kernel_holdout_emission_blurring_pathtermination_ao.cl
@@ -14,76 +14,12 @@
* limitations under the License.
*/
+#include "split/kernel_split_common.h"
#include "split/kernel_holdout_emission_blurring_pathtermination_ao.h"
__kernel void kernel_ocl_path_trace_holdout_emission_blurring_pathtermination_ao(
KernelGlobals *kg,
ccl_constant KernelData *data)
{
- ccl_local unsigned int local_queue_atomics_bg;
- ccl_local unsigned int local_queue_atomics_ao;
- if(get_local_id(0) == 0 && get_local_id(1) == 0) {
- local_queue_atomics_bg = 0;
- local_queue_atomics_ao = 0;
- }
- barrier(CLK_LOCAL_MEM_FENCE);
-
- char enqueue_flag = 0;
- char enqueue_flag_AO_SHADOW_RAY_CAST = 0;
- int ray_index = get_global_id(1) * get_global_size(0) + get_global_id(0);
- ray_index = get_ray_index(ray_index,
- QUEUE_ACTIVE_AND_REGENERATED_RAYS,
- split_state->queue_data,
- split_params->queue_size,
- 0);
-
-#ifdef __COMPUTE_DEVICE_GPU__
- /* If we are executing on a GPU device, we exit all threads that are not
- * required.
- *
- * If we are executing on a CPU device, then we need to keep all threads
- * active since we have barrier() calls later in the kernel. CPU devices,
- * expect all threads to execute barrier statement.
- */
- if(ray_index == QUEUE_EMPTY_SLOT) {
- return;
- }
-#endif /* __COMPUTE_DEVICE_GPU__ */
-
-#ifndef __COMPUTE_DEVICE_GPU__
- if(ray_index != QUEUE_EMPTY_SLOT) {
-#endif
- kernel_holdout_emission_blurring_pathtermination_ao(
- kg,
- split_params->w, split_params->h, split_params->x, split_params->y, split_params->stride,
-#ifdef __WORK_STEALING__
- split_params->start_sample,
-#endif
- split_params->parallel_samples,
- ray_index,
- &enqueue_flag,
- &enqueue_flag_AO_SHADOW_RAY_CAST);
-#ifndef __COMPUTE_DEVICE_GPU__
- }
-#endif
-
- /* Enqueue RAY_UPDATE_BUFFER rays. */
- enqueue_ray_index_local(ray_index,
- QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS,
- enqueue_flag,
- split_params->queue_size,
- &local_queue_atomics_bg,
- split_state->queue_data,
- split_params->queue_index);
-
-#ifdef __AO__
- /* Enqueue to-shadow-ray-cast rays. */
- enqueue_ray_index_local(ray_index,
- QUEUE_SHADOW_RAY_CAST_AO_RAYS,
- enqueue_flag_AO_SHADOW_RAY_CAST,
- split_params->queue_size,
- &local_queue_atomics_ao,
- split_state->queue_data,
- split_params->queue_index);
-#endif
+ kernel_holdout_emission_blurring_pathtermination_ao(kg);
}
diff --git a/intern/cycles/kernel/kernels/opencl/kernel_lamp_emission.cl b/intern/cycles/kernel/kernels/opencl/kernel_lamp_emission.cl
index b80692c..33a9c6b 100644
--- a/intern/cycles/kernel/kernels/opencl/kernel_lamp_emission.cl
+++ b/intern/cycles/kernel/kernels/opencl/kernel_lamp_emission.cl
@@ -14,47 +14,12 @@
* limitations under the License.
*/
+#include "split/kernel_split_common.h"
#include "split/kernel_lamp_emission.h"
__kernel void kernel_ocl_path_trace_lamp_emission(
KernelGlobals *kg,
ccl_constant KernelData *data)
{
- int x = get_global_id(0);
- int y = get_global_id(1);
-
- /* We will empty this queue in this kernel. */
- if(get_global_id(0) == 0 && get_global_id(1) == 0) {
- split_params->queue_index[QUEUE_ACTIVE_AND_REGENERATED_RAYS] = 0;
- }
- /* Fetch use_queues_flag. */
- ccl_local char local_use_queues_flag;
- if(get_local_id(0) == 0 && get_local_id(1) == 0) {
- local_use_queues_flag = split_params->use_queues_flag[0];
- }
- barrier(CLK_LOCAL_MEM_FENCE);
-
- int ray_index;
- if(local_use_queues_flag) {
- int thread_inde
@@ Diff output truncated at 10240 characters. @@
More information about the Bf-blender-cvs
mailing list