[Bf-blender-cvs] [9c88fe08a00] split-kernel-faster-building: Cycles: Speed up split kernel builds by using split direct_emission functions

Mai Lavelle noreply at git.blender.org
Sat Nov 11 12:48:32 CET 2017


Commit: 9c88fe08a00b5662eb84e1b40e06bb9ff6c4a510
Author: Mai Lavelle
Date:   Sat Nov 11 06:36:07 2017 -0500
Branches: split-kernel-faster-building
https://developer.blender.org/rB9c88fe08a00b5662eb84e1b40e06bb9ff6c4a510

Cycles: Speed up split kernel builds by using split direct_emission functions

This removes the indirect call to `svm_eval_nodes` from the
`kernel_direct_lighting` kernel, and speeds up kernel build times
by 4-5 seconds. Render time is slower by ~1%. It should be possible
to apply this technique to other areas of the kernel to get even
better build times.

Note: this breaks branched path and shadow catcher in the split
kernel. Should be an easy fix, will do later.

===================================================================

M	intern/cycles/device/device_split_kernel.cpp
M	intern/cycles/kernel/kernel_light.h
M	intern/cycles/kernel/kernel_types.h
M	intern/cycles/kernel/split/kernel_direct_lighting.h
M	intern/cycles/kernel/split/kernel_shadow_blocked_dl.h
M	intern/cycles/kernel/split/kernel_split_data_types.h

===================================================================

diff --git a/intern/cycles/device/device_split_kernel.cpp b/intern/cycles/device/device_split_kernel.cpp
index 74135a1f3c8..bcc438c86c2 100644
--- a/intern/cycles/device/device_split_kernel.cpp
+++ b/intern/cycles/device/device_split_kernel.cpp
@@ -252,6 +252,7 @@ bool DeviceSplitKernel::path_trace(DeviceTask *task,
 				ENQUEUE_SPLIT_KERNEL(subsurface_scatter, global_size, local_size);
 				ENQUEUE_SPLIT_KERNEL(queue_enqueue, global_size, local_size);
 				ENQUEUE_SPLIT_KERNEL(direct_lighting, global_size, local_size);
+				ENQUEUE_SPLIT_KERNEL(shader_eval, global_size, local_size);
 				ENQUEUE_SPLIT_KERNEL(shadow_blocked_ao, global_size, local_size);
 				ENQUEUE_SPLIT_KERNEL(shadow_blocked_dl, global_size, local_size);
 				ENQUEUE_SPLIT_KERNEL(enqueue_inactive, global_size, local_size);
diff --git a/intern/cycles/kernel/kernel_light.h b/intern/cycles/kernel/kernel_light.h
index dfa3150dc92..1a3ae4c1f32 100644
--- a/intern/cycles/kernel/kernel_light.h
+++ b/intern/cycles/kernel/kernel_light.h
@@ -16,23 +16,6 @@
 
 CCL_NAMESPACE_BEGIN
 
-/* Light Sample result */
-
-typedef struct LightSample {
-	float3 P;			/* position on light, or direction for distant light */
-	float3 Ng;			/* normal on light */
-	float3 D;			/* direction from shading point to light */
-	float t;			/* distance to light (FLT_MAX for distant light) */
-	float u, v;			/* parametric coordinate on primitive */
-	float pdf;			/* light sampling probability density function */
-	float eval_fac;		/* intensity multiplier */
-	int object;			/* object id for triangle/curve lights */
-	int prim;			/* primitive id for triangle/curve lights */
-	int shader;			/* shader id */
-	int lamp;			/* lamp id */
-	LightType type;		/* type of light */
-} LightSample;
-
 /* Area light sampling */
 
 /* Uses the following paper:
diff --git a/intern/cycles/kernel/kernel_types.h b/intern/cycles/kernel/kernel_types.h
index 6b0b86ad257..9032f847ec3 100644
--- a/intern/cycles/kernel/kernel_types.h
+++ b/intern/cycles/kernel/kernel_types.h
@@ -1513,6 +1513,23 @@ typedef struct WorkTile {
 	ccl_global float *buffer;
 } WorkTile;
 
+/* Light Sample result */
+
+typedef struct LightSample {
+	float3 P;			/* position on light, or direction for distant light */
+	float3 Ng;			/* normal on light */
+	float3 D;			/* direction from shading point to light */
+	float t;			/* distance to light (FLT_MAX for distant light) */
+	float u, v;			/* parametric coordinate on primitive */
+	float pdf;			/* light sampling probability density function */
+	float eval_fac;		/* intensity multiplier */
+	int object;			/* object id for triangle/curve lights */
+	int prim;			/* primitive id for triangle/curve lights */
+	int shader;			/* shader id */
+	int lamp;			/* lamp id */
+	LightType type;		/* type of light */
+} LightSample;
+
 /* Utility macro to get a pointer to an object that can be used locally, while avoiding
  * address space issues of the split kernel. `name` must exist in split data entries.
  */
diff --git a/intern/cycles/kernel/split/kernel_direct_lighting.h b/intern/cycles/kernel/split/kernel_direct_lighting.h
index ca79602c565..176c0ec16ad 100644
--- a/intern/cycles/kernel/split/kernel_direct_lighting.h
+++ b/intern/cycles/kernel/split/kernel_direct_lighting.h
@@ -68,14 +68,14 @@ ccl_device void kernel_direct_lighting(KernelGlobals *kg,
 #  ifdef __BRANCHED_PATH__
 		if(flag && kernel_data.integrator.branched) {
 			flag = false;
-			enqueue_flag = 1;
+			enqueue_flag = 1; // XXX
 		}
 #  endif  /* __BRANCHED_PATH__ */
 
 #  ifdef __SHADOW_TRICKS__
 		if(flag && state->flag & PATH_RAY_SHADOW_CATCHER) {
 			flag = false;
-			enqueue_flag = 1;
+			enqueue_flag = 1; // XXX
 		}
 #  endif  /* __SHADOW_TRICKS__ */
 
@@ -83,7 +83,6 @@ ccl_device void kernel_direct_lighting(KernelGlobals *kg,
 			/* Sample illumination from lights to find path contribution. */
 			float light_u, light_v;
 			path_state_rng_2D(kg, state, PRNG_LIGHT_U, &light_u, &light_v);
-			float terminate = path_state_rng_light_termination(kg, state);
 
 			LightSample ls;
 			if(light_sample(kg,
@@ -91,29 +90,17 @@ ccl_device void kernel_direct_lighting(KernelGlobals *kg,
 			                sd->time,
 			                sd->P,
 			                state->bounce,
-			                &ls)) {
-
-				Ray light_ray;
-				light_ray.time = sd->time;
-
-				BsdfEval L_light;
-				bool is_lamp;
-				if(direct_emission(kg,
-				                   sd,
-				                   AS_SHADER_DATA(&kernel_split_state.sd_DL_shadow[ray_index]),
-				                   &ls,
-				                   state,
-				                   &light_ray,
-				                   &L_light,
-				                   &is_lamp,
-				                   terminate))
-				{
+			                &ls))
+			{
+				ShaderData *emission_sd = AS_SHADER_DATA(&kernel_split_state.sd_DL_shadow[ray_index]);
+				ShaderEvalTask *eval_task = &kernel_split_state.shader_eval_task[ray_index];
+
+				if(direct_emission_setup(kg, sd, emission_sd, &ls, state, eval_task)) {
 					/* Write intermediate data to global memory to access from
 					 * the next kernel.
 					 */
-					kernel_split_state.light_ray[ray_index] = light_ray;
-					kernel_split_state.bsdf_eval[ray_index] = L_light;
-					kernel_split_state.is_lamp[ray_index] = is_lamp;
+					kernel_split_state.light_sample[ray_index] = ls;
+
 					/* Mark ray state for next shadow kernel. */
 					enqueue_flag = 1;
 				}
@@ -131,6 +118,10 @@ ccl_device void kernel_direct_lighting(KernelGlobals *kg,
 	                        local_queue_atomics,
 	                        kernel_split_state.queue_data,
 	                        kernel_split_params.queue_index);
+
+	if(ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0) == 0) {
+		kernel_split_params.shader_eval_queue = QUEUE_SHADOW_RAY_CAST_DL_RAYS;
+	}
 #endif
 
 #ifdef __BRANCHED_PATH__
diff --git a/intern/cycles/kernel/split/kernel_shadow_blocked_dl.h b/intern/cycles/kernel/split/kernel_shadow_blocked_dl.h
index da072fd5f1a..cde46d2250d 100644
--- a/intern/cycles/kernel/split/kernel_shadow_blocked_dl.h
+++ b/intern/cycles/kernel/split/kernel_shadow_blocked_dl.h
@@ -41,14 +41,24 @@ ccl_device void kernel_shadow_blocked_dl(KernelGlobals *kg)
 		return;
 
 	ccl_global PathState *state = &kernel_split_state.path_state[ray_index];
-	Ray ray = kernel_split_state.light_ray[ray_index];
 	PathRadiance *L = &kernel_split_state.path_radiance[ray_index];
 	ShaderData *sd = kernel_split_sd(sd, ray_index);
 	float3 throughput = kernel_split_state.throughput[ray_index];
-
-	BsdfEval L_light = kernel_split_state.bsdf_eval[ray_index];
 	ShaderData *emission_sd = AS_SHADER_DATA(&kernel_split_state.sd_DL_shadow[ray_index]);
-	bool is_lamp = kernel_split_state.is_lamp[ray_index];
+	LightSample ls = kernel_split_state.light_sample[ray_index];
+	ShaderEvalTask *eval_task = &kernel_split_state.shader_eval_task[ray_index];
+
+	float terminate = path_state_rng_light_termination(kg, state);
+
+	Ray ray;
+	ray.time = sd->time;
+
+	BsdfEval L_light;
+	bool is_lamp;
+
+	if(!direct_emission_finish(kg, sd, emission_sd, &ls, state, &ray, &L_light, &is_lamp, terminate, eval_task)) {
+		return;
+	}
 
 #  if defined(__BRANCHED_PATH__) || defined(__SHADOW_TRICKS__)
 	bool use_branched = false;
diff --git a/intern/cycles/kernel/split/kernel_split_data_types.h b/intern/cycles/kernel/split/kernel_split_data_types.h
index ae5b7d15692..8698ae7e9d5 100644
--- a/intern/cycles/kernel/split/kernel_split_data_types.h
+++ b/intern/cycles/kernel/split/kernel_split_data_types.h
@@ -110,9 +110,7 @@ typedef ccl_global struct SplitBranchedState {
 	SPLIT_DATA_ENTRY(ccl_global Ray, ray, 1) \
 	SPLIT_DATA_ENTRY(ccl_global PathState, path_state, 1) \
 	SPLIT_DATA_ENTRY(ccl_global Intersection, isect, 1) \
-	SPLIT_DATA_ENTRY(ccl_global BsdfEval, bsdf_eval, 1) \
-	SPLIT_DATA_ENTRY(ccl_global int, is_lamp, 1) \
-	SPLIT_DATA_ENTRY(ccl_global Ray, light_ray, 1) \
+	SPLIT_DATA_ENTRY(ccl_global LightSample, light_sample, 1) \
 	SPLIT_DATA_ENTRY(ShaderEvalTask, shader_eval_task, 1) \
 	SPLIT_DATA_ENTRY(ccl_global int, queue_data, (NUM_QUEUES*2)) /* TODO(mai): this is too large? */ \
 	SPLIT_DATA_ENTRY(ccl_global uint, buffer_offset, 1) \
@@ -129,9 +127,6 @@ typedef ccl_global struct SplitBranchedState {
 	SPLIT_DATA_ENTRY(ccl_global Ray, ray, 1) \
 	SPLIT_DATA_ENTRY(ccl_global PathState, path_state, 1) \
 	SPLIT_DATA_ENTRY(ccl_global Intersection, isect, 1) \
-	SPLIT_DATA_ENTRY(ccl_global BsdfEval, bsdf_eval, 1) \
-	SPLIT_DATA_ENTRY(ccl_global int, is_lamp, 1) \
-	SPLIT_DATA_ENTRY(ccl_global Ray, light_ray, 1) \
 	SPLIT_DATA_ENTRY(ShaderDataTinyStorage, sd_DL_shadow, 1) \
 	SPLIT_DATA_SUBSURFACE_ENTRIES \
 	SPLIT_DATA_VOLUME_ENTRIES \



More information about the Bf-blender-cvs mailing list