[Bf-blender-cvs] [d75d262318c] split-kernel-faster-building: Cycles: Split shader_eval out of the kernel_lamp_emission kernel

Mai Lavelle noreply at git.blender.org
Mon Nov 27 08:52:19 CET 2017


Commit: d75d262318c845058a6410fb4c764dadbaa20b6b
Author: Mai Lavelle
Date:   Mon Nov 27 02:21:06 2017 -0500
Branches: split-kernel-faster-building
https://developer.blender.org/rBd75d262318c845058a6410fb4c764dadbaa20b6b

Cycles: Split shader_eval out of the kernel_lamp_emission kernel

With this kernels for BWM and classroom scenes are building in half the
time as master. Render times are 1% faster as well.

===================================================================

M	intern/cycles/device/device_split_kernel.cpp
M	intern/cycles/kernel/kernel_types.h
M	intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h
M	intern/cycles/kernel/kernels/cuda/kernel_split.cu
M	intern/cycles/kernel/kernels/opencl/kernel_lamp_emission.cl
M	intern/cycles/kernel/split/kernel_do_volume.h
M	intern/cycles/kernel/split/kernel_lamp_emission.h
M	intern/cycles/kernel/split/kernel_shader_eval.h

===================================================================

diff --git a/intern/cycles/device/device_split_kernel.cpp b/intern/cycles/device/device_split_kernel.cpp
index 9697411a23e..e8ea556bd4f 100644
--- a/intern/cycles/device/device_split_kernel.cpp
+++ b/intern/cycles/device/device_split_kernel.cpp
@@ -239,6 +239,7 @@ bool DeviceSplitKernel::path_trace(DeviceTask *task,
 			for(int PathIter = 0; PathIter < 16; PathIter++) {
 				ENQUEUE_SPLIT_KERNEL(scene_intersect, global_size, local_size);
 				ENQUEUE_SPLIT_KERNEL(lamp_emission, global_size, local_size);
+				ENQUEUE_SPLIT_KERNEL(shader_eval, global_size, local_size);
 				ENQUEUE_SPLIT_KERNEL(do_volume, global_size, local_size);
 				ENQUEUE_SPLIT_KERNEL(queue_enqueue, global_size, local_size);
 				ENQUEUE_SPLIT_KERNEL(indirect_background, global_size, local_size);
diff --git a/intern/cycles/kernel/kernel_types.h b/intern/cycles/kernel/kernel_types.h
index ed756096ebf..e1fdd1340db 100644
--- a/intern/cycles/kernel/kernel_types.h
+++ b/intern/cycles/kernel/kernel_types.h
@@ -1462,6 +1462,8 @@ enum RayState {
 	RAY_VOLUME_INDIRECT_NEXT_ITER,
 	RAY_SUBSURFACE_INDIRECT_NEXT_ITER,
 
+	RAY_STATE_ANY, /* Special, never assigned to a ray */
+
 	/* Ray flags */
 
 	/* Flags to denote that the ray is currently evaluating the branched indirect loop */
diff --git a/intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h b/intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h
index 6bcefe39ae5..98aaf6b7770 100644
--- a/intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h
+++ b/intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h
@@ -211,7 +211,7 @@ void KERNEL_FUNCTION_FULL_NAME(shader)(KernelGlobals *kg,
 
 DEFINE_SPLIT_KERNEL_FUNCTION(path_init)
 DEFINE_SPLIT_KERNEL_FUNCTION(scene_intersect)
-DEFINE_SPLIT_KERNEL_FUNCTION(lamp_emission)
+DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(lamp_emission, uint)
 DEFINE_SPLIT_KERNEL_FUNCTION(do_volume)
 DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(queue_enqueue, QueueEnqueueLocals)
 DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(indirect_background, uint)
diff --git a/intern/cycles/kernel/kernels/cuda/kernel_split.cu b/intern/cycles/kernel/kernels/cuda/kernel_split.cu
index d64c8c66458..3f3915be981 100644
--- a/intern/cycles/kernel/kernels/cuda/kernel_split.cu
+++ b/intern/cycles/kernel/kernels/cuda/kernel_split.cu
@@ -104,7 +104,7 @@ kernel_cuda_path_trace_data_init(
 
 DEFINE_SPLIT_KERNEL_FUNCTION(path_init)
 DEFINE_SPLIT_KERNEL_FUNCTION(scene_intersect)
-DEFINE_SPLIT_KERNEL_FUNCTION(lamp_emission)
+DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(lamp_emission, uint)
 DEFINE_SPLIT_KERNEL_FUNCTION(do_volume)
 DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(queue_enqueue, QueueEnqueueLocals)
 DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(indirect_background, uint)
diff --git a/intern/cycles/kernel/kernels/opencl/kernel_lamp_emission.cl b/intern/cycles/kernel/kernels/opencl/kernel_lamp_emission.cl
index c314dc96c33..0792fdc3171 100644
--- a/intern/cycles/kernel/kernels/opencl/kernel_lamp_emission.cl
+++ b/intern/cycles/kernel/kernels/opencl/kernel_lamp_emission.cl
@@ -19,6 +19,8 @@
 #include "kernel/split/kernel_lamp_emission.h"
 
 #define KERNEL_NAME lamp_emission
+#define LOCALS_TYPE uint
 #include "kernel/kernels/opencl/kernel_split_function.h"
 #undef KERNEL_NAME
+#undef LOCALS_TYPE
 
diff --git a/intern/cycles/kernel/split/kernel_do_volume.h b/intern/cycles/kernel/split/kernel_do_volume.h
index fb5bd3d48dd..7296bb60846 100644
--- a/intern/cycles/kernel/split/kernel_do_volume.h
+++ b/intern/cycles/kernel/split/kernel_do_volume.h
@@ -111,6 +111,28 @@ ccl_device_noinline bool kernel_split_branched_path_volume_indirect_light_iter(K
 
 ccl_device void kernel_do_volume(KernelGlobals *kg)
 {
+	/* Finish up kernel_path_lamp_emission from kernel_lamp_emission kernel. */
+	int ray_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0);
+
+	ray_index = get_ray_index(kg, ray_index,
+	                          QUEUE_SHADER_EVAL,
+	                          kernel_split_state.queue_data,
+	                          kernel_split_params.queue_size,
+	                          1);
+
+	if(ray_index != QUEUE_EMPTY_SLOT) {
+		PathRadiance *L = &kernel_split_state.path_radiance[ray_index];
+		ccl_global PathState *state = &kernel_split_state.path_state[ray_index];
+
+		float3 throughput = kernel_split_state.throughput[ray_index];
+		Ray ray = kernel_split_state.ray[ray_index];
+		ccl_global Intersection *isect = &kernel_split_state.isect[ray_index];
+		ShaderData *sd = kernel_split_sd(sd, ray_index);
+		LightSample ls = kernel_split_state.light_sample[ray_index];
+
+		kernel_path_lamp_emission_finish(kg, state, &ray, throughput, isect, sd, L, &ls);
+	}
+
 #ifdef __VOLUME__
 	/* We will empty this queue in this kernel. */
 	if(ccl_global_id(0) == 0 && ccl_global_id(1) == 0) {
@@ -120,7 +142,7 @@ ccl_device void kernel_do_volume(KernelGlobals *kg)
 #  endif  /* __BRANCHED_PATH__ */
 	}
 
-	int ray_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0);
+	ray_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0);
 
 	if(*kernel_split_params.use_queues_flag) {
 		ray_index = get_ray_index(kg, ray_index,
diff --git a/intern/cycles/kernel/split/kernel_lamp_emission.h b/intern/cycles/kernel/split/kernel_lamp_emission.h
index c14f66f664f..a94ef87aaed 100644
--- a/intern/cycles/kernel/split/kernel_lamp_emission.h
+++ b/intern/cycles/kernel/split/kernel_lamp_emission.h
@@ -20,14 +20,22 @@ CCL_NAMESPACE_BEGIN
  * It processes rays of state RAY_ACTIVE and RAY_HIT_BACKGROUND.
  * We will empty QUEUE_ACTIVE_AND_REGENERATED_RAYS queue in this kernel.
  */
-ccl_device void kernel_lamp_emission(KernelGlobals *kg)
+ccl_device void kernel_lamp_emission(KernelGlobals *kg, ccl_local_param uint *local_queue_atomics)
 {
+	if(ccl_local_id(0) == 0 && ccl_local_id(1) == 0) {
+		*local_queue_atomics = 0;
+	}
+	ccl_barrier(CCL_LOCAL_MEM_FENCE);
+
+	if(ccl_global_id(0) == 0 && ccl_global_id(1) == 0) {
+		kernel_split_params.shader_eval_queue = QUEUE_SHADER_EVAL;
+		kernel_split_params.shader_eval_state = RAY_STATE_ANY;
 #ifndef __VOLUME__
 	/* We will empty this queue in this kernel. */
-	if(ccl_global_id(0) == 0 && ccl_global_id(1) == 0) {
 		kernel_split_params.queue_index[QUEUE_ACTIVE_AND_REGENERATED_RAYS] = 0;
-	}
 #endif
+	}
+
 	/* Fetch use_queues_flag. */
 	char local_use_queues_flag = *kernel_split_params.use_queues_flag;
 	ccl_barrier(CCL_LOCAL_MEM_FENCE);
@@ -49,19 +57,33 @@ ccl_device void kernel_lamp_emission(KernelGlobals *kg)
 		}
 	}
 
+	ShaderEvalTask *eval_task = &kernel_split_state.shader_eval_task[ray_index];
+	ShaderEvalIntent intent = SHADER_EVAL_INTENT_SKIP;
+
 	if(IS_STATE(kernel_split_state.ray_state, ray_index, RAY_ACTIVE) ||
 	   IS_STATE(kernel_split_state.ray_state, ray_index, RAY_HIT_BACKGROUND))
 	{
-		PathRadiance *L = &kernel_split_state.path_radiance[ray_index];
 		ccl_global PathState *state = &kernel_split_state.path_state[ray_index];
 
-		float3 throughput = kernel_split_state.throughput[ray_index];
 		Ray ray = kernel_split_state.ray[ray_index];
 		ccl_global Intersection *isect = &kernel_split_state.isect[ray_index];
 		ShaderData *sd = kernel_split_sd(sd, ray_index);
+		LightSample ls = kernel_split_state.light_sample[ray_index];
 
-		kernel_path_lamp_emission(kg, state, &ray, throughput, isect, sd, L);
+		intent = kernel_path_lamp_emission_setup(kg, state, &ray, isect, sd, &ls);
+		if(intent) {
+			shader_eval_task_setup(kg, eval_task, sd, intent);
+			kernel_split_state.light_sample[ray_index] = ls;
+		}
 	}
+
+	enqueue_ray_index_local(ray_index,
+	                        QUEUE_SHADER_EVAL,
+	                        intent != SHADER_EVAL_INTENT_SKIP,
+	                        kernel_split_params.queue_size,
+	                        local_queue_atomics,
+	                        kernel_split_state.queue_data,
+	                        kernel_split_params.queue_index);
 }
 
 CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/split/kernel_shader_eval.h b/intern/cycles/kernel/split/kernel_shader_eval.h
index c53807f4e09..b75608f61f7 100644
--- a/intern/cycles/kernel/split/kernel_shader_eval.h
+++ b/intern/cycles/kernel/split/kernel_shader_eval.h
@@ -40,7 +40,7 @@ ccl_device void kernel_shader_eval(KernelGlobals *kg)
 		return;
 	}
 
-	if(IS_STATE(kernel_split_state.ray_state, ray_index, shade_state)) {
+	if(IS_STATE(kernel_split_state.ray_state, ray_index, shade_state) || shade_state == RAY_STATE_ANY) {
 		ShaderEvalTask *eval_task = &kernel_split_state.shader_eval_task[ray_index];
 		ShaderData *sd = (ShaderData*)(kernel_split_state.data + eval_task->sd_offset);
 		ccl_global PathState *state = &kernel_split_state.path_state[ray_index];



More information about the Bf-blender-cvs mailing list