[Bf-blender-cvs] [06c051363b5] master: Cycles: split kernel_shadow_blocked to AO & DL parts

Hristo Gueorguiev noreply at git.blender.org
Thu Mar 9 17:09:43 CET 2017


Commit: 06c051363b509f7c3c40a803b87739fe0e2a8576
Author: Hristo Gueorguiev
Date:   Wed Mar 8 17:39:40 2017 +0100
Branches: master
https://developer.blender.org/rB06c051363b509f7c3c40a803b87739fe0e2a8576

Cycles: split kernel_shadow_blocked to AO & DL parts

Reduces memory allocation for split kernel.

This allows for faster rendering due to bigger global size,
specially when GPU memory is limited.

Perfromance results:

                         R9 290 total render time
                        Before    After   Change
BMW                      4:37      4:34   -1.1 %
Classroom               14:43     14:30   -1.5 %
Fishy Cat               11:20     11:04   -2.4 %
Koro                    12:11     12:04   -1.0 %
Pabellon Barcelona      22:01     20:44   -5.8 %
Pabellon Barcelona(*)   15:32     15:09   -2.5 %

(*) without glossy connected to volume

===================================================================

M	intern/cycles/device/device_split_kernel.cpp
M	intern/cycles/device/device_split_kernel.h
M	intern/cycles/kernel/CMakeLists.txt
M	intern/cycles/kernel/kernels/cpu/kernel_cpu.h
M	intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h
A	intern/cycles/kernel/kernels/opencl/kernel_shadow_blocked_ao.cl
R085	intern/cycles/kernel/kernels/opencl/kernel_shadow_blocked.cl	intern/cycles/kernel/kernels/opencl/kernel_shadow_blocked_dl.cl
M	intern/cycles/kernel/kernels/opencl/kernel_split.cl
R065	intern/cycles/kernel/split/kernel_shadow_blocked.h	intern/cycles/kernel/split/kernel_shadow_blocked_ao.h
A	intern/cycles/kernel/split/kernel_shadow_blocked_dl.h
M	intern/cycles/kernel/split/kernel_split_data_types.h

===================================================================

diff --git a/intern/cycles/device/device_split_kernel.cpp b/intern/cycles/device/device_split_kernel.cpp
index 6ab0b3c5777..10a642ed4d0 100644
--- a/intern/cycles/device/device_split_kernel.cpp
+++ b/intern/cycles/device/device_split_kernel.cpp
@@ -42,7 +42,8 @@ DeviceSplitKernel::DeviceSplitKernel(Device *device) : device(device)
 	kernel_holdout_emission_blurring_pathtermination_ao = NULL;
 	kernel_subsurface_scatter = NULL;
 	kernel_direct_lighting = NULL;
-	kernel_shadow_blocked = NULL;
+	kernel_shadow_blocked_ao = NULL;
+	kernel_shadow_blocked_dl = NULL;
 	kernel_next_iteration_setup = NULL;
 	kernel_indirect_subsurface = NULL;
 	kernel_buffer_update = NULL;
@@ -66,7 +67,8 @@ DeviceSplitKernel::~DeviceSplitKernel()
 	delete kernel_holdout_emission_blurring_pathtermination_ao;
 	delete kernel_subsurface_scatter;
 	delete kernel_direct_lighting;
-	delete kernel_shadow_blocked;
+	delete kernel_shadow_blocked_ao;
+	delete kernel_shadow_blocked_dl;
 	delete kernel_next_iteration_setup;
 	delete kernel_indirect_subsurface;
 	delete kernel_buffer_update;
@@ -90,7 +92,8 @@ bool DeviceSplitKernel::load_kernels(const DeviceRequestedFeatures& requested_fe
 	LOAD_KERNEL(holdout_emission_blurring_pathtermination_ao);
 	LOAD_KERNEL(subsurface_scatter);
 	LOAD_KERNEL(direct_lighting);
-	LOAD_KERNEL(shadow_blocked);
+	LOAD_KERNEL(shadow_blocked_ao);
+	LOAD_KERNEL(shadow_blocked_dl);
 	LOAD_KERNEL(next_iteration_setup);
 	LOAD_KERNEL(indirect_subsurface);
 	LOAD_KERNEL(buffer_update);
@@ -222,12 +225,6 @@ bool DeviceSplitKernel::path_trace(DeviceTask *task,
 		bool activeRaysAvailable = true;
 
 		while(activeRaysAvailable) {
-			/* Twice the global work size of other kernels for
-			 * ckPathTraceKernel_shadow_blocked_direct_lighting. */
-			size_t global_size_shadow_blocked[2];
-			global_size_shadow_blocked[0] = global_size[0] * 2;
-			global_size_shadow_blocked[1] = global_size[1];
-
 			/* Do path-iteration in host [Enqueue Path-iteration kernels. */
 			for(int PathIter = 0; PathIter < 16; PathIter++) {
 				ENQUEUE_SPLIT_KERNEL(scene_intersect, global_size, local_size);
@@ -239,7 +236,8 @@ bool DeviceSplitKernel::path_trace(DeviceTask *task,
 				ENQUEUE_SPLIT_KERNEL(holdout_emission_blurring_pathtermination_ao, global_size, local_size);
 				ENQUEUE_SPLIT_KERNEL(subsurface_scatter, global_size, local_size);
 				ENQUEUE_SPLIT_KERNEL(direct_lighting, global_size, local_size);
-				ENQUEUE_SPLIT_KERNEL(shadow_blocked, global_size_shadow_blocked, local_size);
+				ENQUEUE_SPLIT_KERNEL(shadow_blocked_ao, global_size, local_size);
+				ENQUEUE_SPLIT_KERNEL(shadow_blocked_dl, global_size, local_size);
 				ENQUEUE_SPLIT_KERNEL(next_iteration_setup, global_size, local_size);
 				ENQUEUE_SPLIT_KERNEL(indirect_subsurface, global_size, local_size);
 				ENQUEUE_SPLIT_KERNEL(queue_enqueue, global_size, local_size);
diff --git a/intern/cycles/device/device_split_kernel.h b/intern/cycles/device/device_split_kernel.h
index abaf350cbbb..ae61f9e38c1 100644
--- a/intern/cycles/device/device_split_kernel.h
+++ b/intern/cycles/device/device_split_kernel.h
@@ -65,7 +65,8 @@ private:
 	SplitKernelFunction *kernel_holdout_emission_blurring_pathtermination_ao;
 	SplitKernelFunction *kernel_subsurface_scatter;
 	SplitKernelFunction *kernel_direct_lighting;
-	SplitKernelFunction *kernel_shadow_blocked;
+	SplitKernelFunction *kernel_shadow_blocked_ao;
+	SplitKernelFunction *kernel_shadow_blocked_dl;
 	SplitKernelFunction *kernel_next_iteration_setup;
 	SplitKernelFunction *kernel_indirect_subsurface;
 	SplitKernelFunction *kernel_buffer_update;
diff --git a/intern/cycles/kernel/CMakeLists.txt b/intern/cycles/kernel/CMakeLists.txt
index 30b3a6b52f7..b468e4e08a5 100644
--- a/intern/cycles/kernel/CMakeLists.txt
+++ b/intern/cycles/kernel/CMakeLists.txt
@@ -27,7 +27,8 @@ set(SRC
 	kernels/opencl/kernel_holdout_emission_blurring_pathtermination_ao.cl
 	kernels/opencl/kernel_subsurface_scatter.cl
 	kernels/opencl/kernel_direct_lighting.cl
-	kernels/opencl/kernel_shadow_blocked.cl
+	kernels/opencl/kernel_shadow_blocked_ao.cl
+	kernels/opencl/kernel_shadow_blocked_dl.cl
 	kernels/opencl/kernel_next_iteration_setup.cl
 	kernels/opencl/kernel_indirect_subsurface.cl
 	kernels/opencl/kernel_buffer_update.cl
@@ -214,7 +215,8 @@ set(SRC_SPLIT_HEADERS
 	split/kernel_queue_enqueue.h
 	split/kernel_scene_intersect.h
 	split/kernel_shader_eval.h
-	split/kernel_shadow_blocked.h
+	split/kernel_shadow_blocked_ao.h
+	split/kernel_shadow_blocked_dl.h
 	split/kernel_split_common.h
 	split/kernel_split_data.h
 	split/kernel_split_data_types.h
@@ -422,7 +424,8 @@ delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_shader_eval.c
 delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_holdout_emission_blurring_pathtermination_ao.cl" ${CYCLES_INSTALL_PATH}/kernel/kernels/opencl)
 delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_subsurface_scatter.cl" ${CYCLES_INSTALL_PATH}/kernel/kernels/opencl)
 delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_direct_lighting.cl" ${CYCLES_INSTALL_PATH}/kernel/kernels/opencl)
-delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_shadow_blocked.cl" ${CYCLES_INSTALL_PATH}/kernel/kernels/opencl)
+delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_shadow_blocked_ao.cl" ${CYCLES_INSTALL_PATH}/kernel/kernels/opencl)
+delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_shadow_blocked_dl.cl" ${CYCLES_INSTALL_PATH}/kernel/kernels/opencl)
 delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_next_iteration_setup.cl" ${CYCLES_INSTALL_PATH}/kernel/kernels/opencl)
 delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_indirect_subsurface.cl" ${CYCLES_INSTALL_PATH}/kernel/kernels/opencl)
 delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_buffer_update.cl" ${CYCLES_INSTALL_PATH}/kernel/kernels/opencl)
diff --git a/intern/cycles/kernel/kernels/cpu/kernel_cpu.h b/intern/cycles/kernel/kernels/cpu/kernel_cpu.h
index 8ce420d8a48..896b80d783e 100644
--- a/intern/cycles/kernel/kernels/cpu/kernel_cpu.h
+++ b/intern/cycles/kernel/kernels/cpu/kernel_cpu.h
@@ -81,7 +81,8 @@ DECLARE_SPLIT_KERNEL_FUNCTION(shader_eval)
 DECLARE_SPLIT_KERNEL_FUNCTION(holdout_emission_blurring_pathtermination_ao)
 DECLARE_SPLIT_KERNEL_FUNCTION(subsurface_scatter)
 DECLARE_SPLIT_KERNEL_FUNCTION(direct_lighting)
-DECLARE_SPLIT_KERNEL_FUNCTION(shadow_blocked)
+DECLARE_SPLIT_KERNEL_FUNCTION(shadow_blocked_ao)
+DECLARE_SPLIT_KERNEL_FUNCTION(shadow_blocked_dl)
 DECLARE_SPLIT_KERNEL_FUNCTION(next_iteration_setup)
 DECLARE_SPLIT_KERNEL_FUNCTION(indirect_subsurface)
 DECLARE_SPLIT_KERNEL_FUNCTION(buffer_update)
diff --git a/intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h b/intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h
index 8c519a21d95..ba6b1033915 100644
--- a/intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h
+++ b/intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h
@@ -48,7 +48,8 @@
 #  include "split/kernel_holdout_emission_blurring_pathtermination_ao.h"
 #  include "split/kernel_subsurface_scatter.h"
 #  include "split/kernel_direct_lighting.h"
-#  include "split/kernel_shadow_blocked.h"
+#  include "split/kernel_shadow_blocked_ao.h"
+#  include "split/kernel_shadow_blocked_dl.h"
 #  include "split/kernel_next_iteration_setup.h"
 #  include "split/kernel_indirect_subsurface.h"
 #  include "split/kernel_buffer_update.h"
@@ -177,7 +178,8 @@ DEFINE_SPLIT_KERNEL_FUNCTION(shader_eval)
 DEFINE_SPLIT_KERNEL_FUNCTION(holdout_emission_blurring_pathtermination_ao)
 DEFINE_SPLIT_KERNEL_FUNCTION(subsurface_scatter)
 DEFINE_SPLIT_KERNEL_FUNCTION(direct_lighting)
-DEFINE_SPLIT_KERNEL_FUNCTION(shadow_blocked)
+DEFINE_SPLIT_KERNEL_FUNCTION(shadow_blocked_ao)
+DEFINE_SPLIT_KERNEL_FUNCTION(shadow_blocked_dl)
 DEFINE_SPLIT_KERNEL_FUNCTION(next_iteration_setup)
 DEFINE_SPLIT_KERNEL_FUNCTION(indirect_subsurface)
 DEFINE_SPLIT_KERNEL_FUNCTION(buffer_update)
@@ -204,7 +206,8 @@ void KERNEL_FUNCTION_FULL_NAME(register_functions)(void(*reg)(const char* name,
 	REGISTER(holdout_emission_blurring_pathtermination_ao);
 	REGISTER(subsurface_scatter);
 	REGISTER(direct_lighting);
-	REGISTER(shadow_blocked);
+	REGISTER(shadow_blocked_ao);
+	REGISTER(shadow_blocked_dl);
 	REGISTER(next_iteration_setup);
 	REGISTER(indirect_subsurface);
 	REGISTER(buffer_update);
diff --git a/intern/cycles/kernel/kernels/opencl/kernel_shadow_blocked.cl b/intern/cycles/kernel/kernels/opencl/kernel_shadow_blocked_ao.cl
similarity index 85%
copy from intern/cycles/kernel/kernels/opencl/kernel_shadow_blocked.cl
copy to intern/cycles/kernel/kernels/opencl/kernel_shadow_blocked_ao.cl
index 3693f7f9c9d..1c96d67fec2 100644
--- a/intern/cycles/kernel/kernels/opencl/kernel_shadow_blocked.cl
+++ b/intern/cycles/kernel/kernels/opencl/kernel_shadow_blocked_ao.cl
@@ -16,11 +16,11 @@
 
 #include "kernel_compat_opencl.h"
 #include "split/kernel_split_common.h"
-#include "split/kernel_shadow_blocked.h"
+#include "split/kernel_shadow_blocked_ao.h"
 
-__kernel void kernel_ocl_path_trace_shadow_blocked(
+__kernel void kernel_ocl_path_trace_shadow_blocked_ao(
         KernelGlobals *kg,
         ccl_constant KernelData *data)
 {
-	kernel_shadow_blocked(kg);
+	kernel_shadow_blocked_ao(kg);
 }
diff --git a/intern/cycles/kernel/kernels/opencl/kernel_shadow_blocked.cl b/intern/cycles/kernel/kernels/opencl/kernel_shadow_blocked_dl.cl
similarity index 85%
rename from intern/cycles/kernel/kernels/opencl/kernel_shadow_blocked.cl
rename to intern/cycles/kernel/kernels/opencl/kernel_shadow_blocked_dl.cl
index 3693f7f9c9d..2231f767c0c 100644
--- a/intern/cycles/kernel/kernels/opencl/kernel_shadow_blocked.cl
+++ b/intern/cycles/kernel/kernels/opencl/kernel_shadow_blocked_dl.cl
@@ -16,11 +16,11 @@
 
 #include "kernel_compat_opencl.h"
 #include "split/kernel_split_common.h"
-#include "split/kernel_shadow_blocked.h"
+#include "split/kernel_shadow_blocked_dl.h"
 
-__kernel void kernel_ocl_path_trace_shadow_blocked(
+__kernel void kernel_ocl_path_trace_shadow_blocked_dl(
         KernelGlobals *kg,
         ccl_constant KernelData *data)
 {
-	kernel_shadow_blocked(kg);
+	kernel_shadow_blocked_dl(kg);
 }
diff --git a/intern/cycles/kernel/kernels/open

@@ Diff output truncated at 10240 characters. @@




More information about the Bf-blender-cvs mailing list