[Bf-blender-cvs] [d75d262318c] split-kernel-faster-building: Cycles: Split shader_eval out of the kernel_lamp_emission kernel
Mai Lavelle
noreply at git.blender.org
Mon Nov 27 08:52:19 CET 2017
Commit: d75d262318c845058a6410fb4c764dadbaa20b6b
Author: Mai Lavelle
Date: Mon Nov 27 02:21:06 2017 -0500
Branches: split-kernel-faster-building
https://developer.blender.org/rBd75d262318c845058a6410fb4c764dadbaa20b6b
Cycles: Split shader_eval out of the kernel_lamp_emission kernel
With this kernels for BWM and classroom scenes are building in half the
time as master. Render times are 1% faster as well.
===================================================================
M intern/cycles/device/device_split_kernel.cpp
M intern/cycles/kernel/kernel_types.h
M intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h
M intern/cycles/kernel/kernels/cuda/kernel_split.cu
M intern/cycles/kernel/kernels/opencl/kernel_lamp_emission.cl
M intern/cycles/kernel/split/kernel_do_volume.h
M intern/cycles/kernel/split/kernel_lamp_emission.h
M intern/cycles/kernel/split/kernel_shader_eval.h
===================================================================
diff --git a/intern/cycles/device/device_split_kernel.cpp b/intern/cycles/device/device_split_kernel.cpp
index 9697411a23e..e8ea556bd4f 100644
--- a/intern/cycles/device/device_split_kernel.cpp
+++ b/intern/cycles/device/device_split_kernel.cpp
@@ -239,6 +239,7 @@ bool DeviceSplitKernel::path_trace(DeviceTask *task,
for(int PathIter = 0; PathIter < 16; PathIter++) {
ENQUEUE_SPLIT_KERNEL(scene_intersect, global_size, local_size);
ENQUEUE_SPLIT_KERNEL(lamp_emission, global_size, local_size);
+ ENQUEUE_SPLIT_KERNEL(shader_eval, global_size, local_size);
ENQUEUE_SPLIT_KERNEL(do_volume, global_size, local_size);
ENQUEUE_SPLIT_KERNEL(queue_enqueue, global_size, local_size);
ENQUEUE_SPLIT_KERNEL(indirect_background, global_size, local_size);
diff --git a/intern/cycles/kernel/kernel_types.h b/intern/cycles/kernel/kernel_types.h
index ed756096ebf..e1fdd1340db 100644
--- a/intern/cycles/kernel/kernel_types.h
+++ b/intern/cycles/kernel/kernel_types.h
@@ -1462,6 +1462,8 @@ enum RayState {
RAY_VOLUME_INDIRECT_NEXT_ITER,
RAY_SUBSURFACE_INDIRECT_NEXT_ITER,
+ RAY_STATE_ANY, /* Special, never assigned to a ray */
+
/* Ray flags */
/* Flags to denote that the ray is currently evaluating the branched indirect loop */
diff --git a/intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h b/intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h
index 6bcefe39ae5..98aaf6b7770 100644
--- a/intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h
+++ b/intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h
@@ -211,7 +211,7 @@ void KERNEL_FUNCTION_FULL_NAME(shader)(KernelGlobals *kg,
DEFINE_SPLIT_KERNEL_FUNCTION(path_init)
DEFINE_SPLIT_KERNEL_FUNCTION(scene_intersect)
-DEFINE_SPLIT_KERNEL_FUNCTION(lamp_emission)
+DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(lamp_emission, uint)
DEFINE_SPLIT_KERNEL_FUNCTION(do_volume)
DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(queue_enqueue, QueueEnqueueLocals)
DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(indirect_background, uint)
diff --git a/intern/cycles/kernel/kernels/cuda/kernel_split.cu b/intern/cycles/kernel/kernels/cuda/kernel_split.cu
index d64c8c66458..3f3915be981 100644
--- a/intern/cycles/kernel/kernels/cuda/kernel_split.cu
+++ b/intern/cycles/kernel/kernels/cuda/kernel_split.cu
@@ -104,7 +104,7 @@ kernel_cuda_path_trace_data_init(
DEFINE_SPLIT_KERNEL_FUNCTION(path_init)
DEFINE_SPLIT_KERNEL_FUNCTION(scene_intersect)
-DEFINE_SPLIT_KERNEL_FUNCTION(lamp_emission)
+DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(lamp_emission, uint)
DEFINE_SPLIT_KERNEL_FUNCTION(do_volume)
DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(queue_enqueue, QueueEnqueueLocals)
DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(indirect_background, uint)
diff --git a/intern/cycles/kernel/kernels/opencl/kernel_lamp_emission.cl b/intern/cycles/kernel/kernels/opencl/kernel_lamp_emission.cl
index c314dc96c33..0792fdc3171 100644
--- a/intern/cycles/kernel/kernels/opencl/kernel_lamp_emission.cl
+++ b/intern/cycles/kernel/kernels/opencl/kernel_lamp_emission.cl
@@ -19,6 +19,8 @@
#include "kernel/split/kernel_lamp_emission.h"
#define KERNEL_NAME lamp_emission
+#define LOCALS_TYPE uint
#include "kernel/kernels/opencl/kernel_split_function.h"
#undef KERNEL_NAME
+#undef LOCALS_TYPE
diff --git a/intern/cycles/kernel/split/kernel_do_volume.h b/intern/cycles/kernel/split/kernel_do_volume.h
index fb5bd3d48dd..7296bb60846 100644
--- a/intern/cycles/kernel/split/kernel_do_volume.h
+++ b/intern/cycles/kernel/split/kernel_do_volume.h
@@ -111,6 +111,28 @@ ccl_device_noinline bool kernel_split_branched_path_volume_indirect_light_iter(K
ccl_device void kernel_do_volume(KernelGlobals *kg)
{
+ /* Finish up kernel_path_lamp_emission from kernel_lamp_emission kernel. */
+ int ray_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0);
+
+ ray_index = get_ray_index(kg, ray_index,
+ QUEUE_SHADER_EVAL,
+ kernel_split_state.queue_data,
+ kernel_split_params.queue_size,
+ 1);
+
+ if(ray_index != QUEUE_EMPTY_SLOT) {
+ PathRadiance *L = &kernel_split_state.path_radiance[ray_index];
+ ccl_global PathState *state = &kernel_split_state.path_state[ray_index];
+
+ float3 throughput = kernel_split_state.throughput[ray_index];
+ Ray ray = kernel_split_state.ray[ray_index];
+ ccl_global Intersection *isect = &kernel_split_state.isect[ray_index];
+ ShaderData *sd = kernel_split_sd(sd, ray_index);
+ LightSample ls = kernel_split_state.light_sample[ray_index];
+
+ kernel_path_lamp_emission_finish(kg, state, &ray, throughput, isect, sd, L, &ls);
+ }
+
#ifdef __VOLUME__
/* We will empty this queue in this kernel. */
if(ccl_global_id(0) == 0 && ccl_global_id(1) == 0) {
@@ -120,7 +142,7 @@ ccl_device void kernel_do_volume(KernelGlobals *kg)
# endif /* __BRANCHED_PATH__ */
}
- int ray_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0);
+ ray_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0);
if(*kernel_split_params.use_queues_flag) {
ray_index = get_ray_index(kg, ray_index,
diff --git a/intern/cycles/kernel/split/kernel_lamp_emission.h b/intern/cycles/kernel/split/kernel_lamp_emission.h
index c14f66f664f..a94ef87aaed 100644
--- a/intern/cycles/kernel/split/kernel_lamp_emission.h
+++ b/intern/cycles/kernel/split/kernel_lamp_emission.h
@@ -20,14 +20,22 @@ CCL_NAMESPACE_BEGIN
* It processes rays of state RAY_ACTIVE and RAY_HIT_BACKGROUND.
* We will empty QUEUE_ACTIVE_AND_REGENERATED_RAYS queue in this kernel.
*/
-ccl_device void kernel_lamp_emission(KernelGlobals *kg)
+ccl_device void kernel_lamp_emission(KernelGlobals *kg, ccl_local_param uint *local_queue_atomics)
{
+ if(ccl_local_id(0) == 0 && ccl_local_id(1) == 0) {
+ *local_queue_atomics = 0;
+ }
+ ccl_barrier(CCL_LOCAL_MEM_FENCE);
+
+ if(ccl_global_id(0) == 0 && ccl_global_id(1) == 0) {
+ kernel_split_params.shader_eval_queue = QUEUE_SHADER_EVAL;
+ kernel_split_params.shader_eval_state = RAY_STATE_ANY;
#ifndef __VOLUME__
/* We will empty this queue in this kernel. */
- if(ccl_global_id(0) == 0 && ccl_global_id(1) == 0) {
kernel_split_params.queue_index[QUEUE_ACTIVE_AND_REGENERATED_RAYS] = 0;
- }
#endif
+ }
+
/* Fetch use_queues_flag. */
char local_use_queues_flag = *kernel_split_params.use_queues_flag;
ccl_barrier(CCL_LOCAL_MEM_FENCE);
@@ -49,19 +57,33 @@ ccl_device void kernel_lamp_emission(KernelGlobals *kg)
}
}
+ ShaderEvalTask *eval_task = &kernel_split_state.shader_eval_task[ray_index];
+ ShaderEvalIntent intent = SHADER_EVAL_INTENT_SKIP;
+
if(IS_STATE(kernel_split_state.ray_state, ray_index, RAY_ACTIVE) ||
IS_STATE(kernel_split_state.ray_state, ray_index, RAY_HIT_BACKGROUND))
{
- PathRadiance *L = &kernel_split_state.path_radiance[ray_index];
ccl_global PathState *state = &kernel_split_state.path_state[ray_index];
- float3 throughput = kernel_split_state.throughput[ray_index];
Ray ray = kernel_split_state.ray[ray_index];
ccl_global Intersection *isect = &kernel_split_state.isect[ray_index];
ShaderData *sd = kernel_split_sd(sd, ray_index);
+ LightSample ls = kernel_split_state.light_sample[ray_index];
- kernel_path_lamp_emission(kg, state, &ray, throughput, isect, sd, L);
+ intent = kernel_path_lamp_emission_setup(kg, state, &ray, isect, sd, &ls);
+ if(intent) {
+ shader_eval_task_setup(kg, eval_task, sd, intent);
+ kernel_split_state.light_sample[ray_index] = ls;
+ }
}
+
+ enqueue_ray_index_local(ray_index,
+ QUEUE_SHADER_EVAL,
+ intent != SHADER_EVAL_INTENT_SKIP,
+ kernel_split_params.queue_size,
+ local_queue_atomics,
+ kernel_split_state.queue_data,
+ kernel_split_params.queue_index);
}
CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/split/kernel_shader_eval.h b/intern/cycles/kernel/split/kernel_shader_eval.h
index c53807f4e09..b75608f61f7 100644
--- a/intern/cycles/kernel/split/kernel_shader_eval.h
+++ b/intern/cycles/kernel/split/kernel_shader_eval.h
@@ -40,7 +40,7 @@ ccl_device void kernel_shader_eval(KernelGlobals *kg)
return;
}
- if(IS_STATE(kernel_split_state.ray_state, ray_index, shade_state)) {
+ if(IS_STATE(kernel_split_state.ray_state, ray_index, shade_state) || shade_state == RAY_STATE_ANY) {
ShaderEvalTask *eval_task = &kernel_split_state.shader_eval_task[ray_index];
ShaderData *sd = (ShaderData*)(kernel_split_state.data + eval_task->sd_offset);
ccl_global PathState *state = &kernel_split_state.path_state[ray_index];
More information about the Bf-blender-cvs
mailing list