[Bf-blender-cvs] [2cacb36c77e] split-kernel-faster-building: Cycles: Modify kernel_indirect_background to remove call to svm_eval_nodes

Mai Lavelle noreply at git.blender.org
Tue Nov 14 07:54:20 CET 2017


Commit: 2cacb36c77e56026990644beb6b84ab58b204e10
Author: Mai Lavelle
Date:   Tue Nov 14 01:36:39 2017 -0500
Branches: split-kernel-faster-building
https://developer.blender.org/rB2cacb36c77e56026990644beb6b84ab58b204e10

Cycles: Modify kernel_indirect_background to remove call to svm_eval_nodes

Speeds up kernel building by another 2-3 seconds. Rendering is
another 1% slower.

===================================================================

M	intern/cycles/device/device_split_kernel.cpp
M	intern/cycles/kernel/kernel_types.h
M	intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h
M	intern/cycles/kernel/kernels/cuda/kernel_split.cu
M	intern/cycles/kernel/kernels/opencl/kernel_indirect_background.cl
M	intern/cycles/kernel/split/kernel_holdout_emission_blurring_pathtermination_ao.h
M	intern/cycles/kernel/split/kernel_indirect_background.h
M	intern/cycles/kernel/split/kernel_queue_enqueue.h

===================================================================

diff --git a/intern/cycles/device/device_split_kernel.cpp b/intern/cycles/device/device_split_kernel.cpp
index bcc438c86c2..7135169f238 100644
--- a/intern/cycles/device/device_split_kernel.cpp
+++ b/intern/cycles/device/device_split_kernel.cpp
@@ -245,6 +245,7 @@ bool DeviceSplitKernel::path_trace(DeviceTask *task,
 				ENQUEUE_SPLIT_KERNEL(do_volume, global_size, local_size);
 				ENQUEUE_SPLIT_KERNEL(queue_enqueue, global_size, local_size);
 				ENQUEUE_SPLIT_KERNEL(indirect_background, global_size, local_size);
+				ENQUEUE_SPLIT_KERNEL(shader_eval, global_size, local_size);
 				ENQUEUE_SPLIT_KERNEL(shader_setup, global_size, local_size);
 				ENQUEUE_SPLIT_KERNEL(shader_sort, global_size, local_size);
 				ENQUEUE_SPLIT_KERNEL(shader_eval, global_size, local_size);
diff --git a/intern/cycles/kernel/kernel_types.h b/intern/cycles/kernel/kernel_types.h
index bc822523b06..a861d5ec30a 100644
--- a/intern/cycles/kernel/kernel_types.h
+++ b/intern/cycles/kernel/kernel_types.h
@@ -1447,6 +1447,9 @@ typedef enum QueueNumber {
 #  endif
 #endif  /* __BRANCHED_PATH__ */
 
+	/* For temporarily holding rays that need to be shaded. */
+	QUEUE_SHADER_EVAL,
+
 	NUM_QUEUES
 } QueueNumber;
 
diff --git a/intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h b/intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h
index fdeb7dcd3e4..ecc2bb13eba 100644
--- a/intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h
+++ b/intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h
@@ -215,7 +215,7 @@ DEFINE_SPLIT_KERNEL_FUNCTION(scene_intersect)
 DEFINE_SPLIT_KERNEL_FUNCTION(lamp_emission)
 DEFINE_SPLIT_KERNEL_FUNCTION(do_volume)
 DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(queue_enqueue, QueueEnqueueLocals)
-DEFINE_SPLIT_KERNEL_FUNCTION(indirect_background)
+DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(indirect_background, uint)
 DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(shader_setup, uint)
 DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(shader_sort, ShaderSortLocals)
 DEFINE_SPLIT_KERNEL_FUNCTION(shader_eval)
diff --git a/intern/cycles/kernel/kernels/cuda/kernel_split.cu b/intern/cycles/kernel/kernels/cuda/kernel_split.cu
index 43b3d0aa0e6..41044b65347 100644
--- a/intern/cycles/kernel/kernels/cuda/kernel_split.cu
+++ b/intern/cycles/kernel/kernels/cuda/kernel_split.cu
@@ -108,7 +108,7 @@ DEFINE_SPLIT_KERNEL_FUNCTION(scene_intersect)
 DEFINE_SPLIT_KERNEL_FUNCTION(lamp_emission)
 DEFINE_SPLIT_KERNEL_FUNCTION(do_volume)
 DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(queue_enqueue, QueueEnqueueLocals)
-DEFINE_SPLIT_KERNEL_FUNCTION(indirect_background)
+DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(indirect_background, uint)
 DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(shader_setup, uint)
 DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(shader_sort, ShaderSortLocals)
 DEFINE_SPLIT_KERNEL_FUNCTION(shader_eval)
diff --git a/intern/cycles/kernel/kernels/opencl/kernel_indirect_background.cl b/intern/cycles/kernel/kernels/opencl/kernel_indirect_background.cl
index 192d01444ba..77d25aee295 100644
--- a/intern/cycles/kernel/kernels/opencl/kernel_indirect_background.cl
+++ b/intern/cycles/kernel/kernels/opencl/kernel_indirect_background.cl
@@ -19,6 +19,8 @@
 #include "kernel/split/kernel_indirect_background.h"
 
 #define KERNEL_NAME indirect_background
+#define LOCALS_TYPE unsigned int
 #include "kernel/kernels/opencl/kernel_split_function.h"
 #undef KERNEL_NAME
+#undef LOCALS_TYPE
 
diff --git a/intern/cycles/kernel/split/kernel_holdout_emission_blurring_pathtermination_ao.h b/intern/cycles/kernel/split/kernel_holdout_emission_blurring_pathtermination_ao.h
index f638a230e12..df8ebfc6480 100644
--- a/intern/cycles/kernel/split/kernel_holdout_emission_blurring_pathtermination_ao.h
+++ b/intern/cycles/kernel/split/kernel_holdout_emission_blurring_pathtermination_ao.h
@@ -66,8 +66,31 @@ ccl_device void kernel_holdout_emission_blurring_pathtermination_ao(
 #ifdef __AO__
 	char enqueue_flag = 0;
 #endif
-	int ray_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0);
-	ray_index = get_ray_index(kg, ray_index,
+	int ray_index;
+
+	ccl_global char *ray_state = kernel_split_state.ray_state;
+
+#ifdef __BACKGROUND__
+	ray_index = get_ray_index(kg, ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0),
+	                          QUEUE_SHADER_EVAL,
+	                          kernel_split_state.queue_data,
+	                          kernel_split_params.queue_size,
+	                          1);
+
+	if(IS_STATE(ray_state, ray_index, RAY_HIT_BACKGROUND)) {
+		ccl_global PathState *state = &kernel_split_state.path_state[ray_index];
+		ccl_global Ray *ray = &kernel_split_state.ray[ray_index];
+		float3 throughput = kernel_split_state.throughput[ray_index];
+		ShaderData *sd = kernel_split_sd(sd, ray_index);
+		PathRadiance *L = &kernel_split_state.path_radiance[ray_index];
+		ShaderEvalTask *eval_task = &kernel_split_state.shader_eval_task[ray_index];
+
+		kernel_path_background_finish(kg, state, ray, throughput, sd, L, eval_task);
+		kernel_split_path_end(kg, ray_index);
+	}
+#endif  /* __BACKGROUND__ */
+
+	ray_index = get_ray_index(kg, ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0),
 	                          QUEUE_ACTIVE_AND_REGENERATED_RAYS,
 	                          kernel_split_state.queue_data,
 	                          kernel_split_params.queue_size,
@@ -93,7 +116,6 @@ ccl_device void kernel_holdout_emission_blurring_pathtermination_ao(
 	ccl_global PathState *state = 0x0;
 	float3 throughput;
 
-	ccl_global char *ray_state = kernel_split_state.ray_state;
 	ShaderData *sd = kernel_split_sd(sd, ray_index);
 
 	if(IS_STATE(ray_state, ray_index, RAY_ACTIVE)) {
diff --git a/intern/cycles/kernel/split/kernel_indirect_background.h b/intern/cycles/kernel/split/kernel_indirect_background.h
index 4cf88a02590..517876c6546 100644
--- a/intern/cycles/kernel/split/kernel_indirect_background.h
+++ b/intern/cycles/kernel/split/kernel_indirect_background.h
@@ -16,8 +16,14 @@
 
 CCL_NAMESPACE_BEGIN
 
-ccl_device void kernel_indirect_background(KernelGlobals *kg)
+ccl_device void kernel_indirect_background(KernelGlobals *kg, ccl_local_param unsigned int *local_queue_atomics)
 {
+	if(ccl_local_id(0) == 0 && ccl_local_id(1) == 0) {
+		*local_queue_atomics = 0;
+	}
+	ccl_barrier(CCL_LOCAL_MEM_FENCE);
+	char enqueue_flag = 0;
+
 	ccl_global char *ray_state = kernel_split_state.ray_state;
 
 	int thread_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0);
@@ -40,6 +46,11 @@ ccl_device void kernel_indirect_background(KernelGlobals *kg)
 		}
 	}
 
+	if(ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0) == 0) {
+		kernel_split_params.shader_eval_queue = QUEUE_SHADER_EVAL;
+		kernel_split_params.shader_eval_state = RAY_HIT_BACKGROUND;
+	}
+
 	ray_index = get_ray_index(kg, thread_index,
 	                          QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS,
 	                          kernel_split_state.queue_data,
@@ -56,10 +67,23 @@ ccl_device void kernel_indirect_background(KernelGlobals *kg)
 		ccl_global Ray *ray = &kernel_split_state.ray[ray_index];
 		float3 throughput = kernel_split_state.throughput[ray_index];
 		ShaderData *sd = kernel_split_sd(sd, ray_index);
+		ShaderEvalTask *eval_task = &kernel_split_state.shader_eval_task[ray_index];
 
-		kernel_path_background(kg, state, ray, throughput, sd, L);
-		kernel_split_path_end(kg, ray_index);
+		if(kernel_path_background_setup(kg, state, ray, throughput, sd, L, eval_task)) {
+			enqueue_flag = 1;
+		}
+		else {
+			kernel_split_path_end(kg, ray_index);
+		}
 	}
+
+	enqueue_ray_index_local(ray_index,
+	                        QUEUE_SHADER_EVAL,
+	                        enqueue_flag,
+	                        kernel_split_params.queue_size,
+	                        local_queue_atomics,
+	                        kernel_split_state.queue_data,
+	                        kernel_split_params.queue_index);
 }
 
 CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/split/kernel_queue_enqueue.h b/intern/cycles/kernel/split/kernel_queue_enqueue.h
index 66ce2dfb6f1..33b0eff461f 100644
--- a/intern/cycles/kernel/split/kernel_queue_enqueue.h
+++ b/intern/cycles/kernel/split/kernel_queue_enqueue.h
@@ -86,6 +86,8 @@ ccl_device void kernel_queue_enqueue(KernelGlobals *kg,
 		                                  locals->queue_atomics);
 		kernel_split_state.queue_data[my_gqidx] = ray_index;
 	}
+
+	kernel_split_params.queue_index[QUEUE_SHADER_EVAL] = 0;
 }
 
 CCL_NAMESPACE_END



More information about the Bf-blender-cvs mailing list