[Bf-blender-cvs] [15edda3a8e0] master: T61463: Separate Baking kernels

Jeroen Bakker noreply at git.blender.org
Tue Feb 19 16:34:37 CET 2019


Commit: 15edda3a8e07003bef695cca939744bbea80ad18
Author: Jeroen Bakker
Date:   Tue Feb 19 16:31:31 2019 +0100
Branches: master
https://developer.blender.org/rB15edda3a8e07003bef695cca939744bbea80ad18

T61463: Separate Baking kernels

Cycles OpenCL: Split baking kernels in own program

Fix T61463. Before this patch baking was part of the base kernels. There
are 3 baking kernels that and all 3 uses shader evaluation. Only for one
of these kernels the functionality was wrapped in the __NO_BAKING__
compile directive.

When you start baking this leads to long compile times. By separating
in individual programs will reduce the compile times.

Also wrapped all baking kernels with __NO_BAKING__ to reduce the
compilation times.

Impact on compilation time

    job   |   scene_name    | previous |  new  | percentage
  --------+-----------------+----------+-------+------------
   T61463 | empty           |    10.63 |  7.27 |         32%
   T61463 | bmw             |    17.91 | 14.24 |         20%
   T61463 | fishycat        |    19.57 | 15.08 |         23%
   T61463 | barbershop      |    54.10 | 48.18 |         11%
   T61463 | classroom       |    17.55 | 14.42 |         18%
   T61463 | koro            |    18.92 | 17.15 |          9%
   T61463 | pavillion       |    17.43 | 14.23 |         18%
   T61463 | splash279       |    16.48 | 15.33 |          7%
   T61463 | volume_emission |    36.22 | 34.19 |          6%

Impact on render time

    job   |   scene_name    | previous |   new   | percentage
  --------+-----------------+----------+---------+------------
   T61463 | empty           |    21.06 |   20.54 |          2%
   T61463 | bmw             |   198.44 |  189.59 |          4%
   T61463 | fishycat        |   394.20 |  388.50 |          1%
   T61463 | barbershop      |  1188.16 | 1185.49 |          0%
   T61463 | classroom       |   341.08 |  339.27 |          1%
   T61463 | koro            |   472.43 |  360.70 |         24%
   T61463 | pavillion       |   905.77 |  902.14 |          0%
   T61463 | splash279       |    55.26 |   54.92 |          1%
   T61463 | volume_emission |    62.59 |   39.09 |         38%

I don't have a grounded explanation why koro and volume_emission is this much
faster; I have done several tests though...

Maniphest Tasks: T61463

Differential Revision: https://developer.blender.org/D4376

===================================================================

M	intern/cycles/device/opencl/opencl.h
M	intern/cycles/device/opencl/opencl_base.cpp
M	intern/cycles/device/opencl/opencl_split.cpp
M	intern/cycles/kernel/CMakeLists.txt
M	intern/cycles/kernel/kernels/opencl/kernel.cl
A	intern/cycles/kernel/kernels/opencl/kernel_background.cl
A	intern/cycles/kernel/kernels/opencl/kernel_bake.cl
A	intern/cycles/kernel/kernels/opencl/kernel_displace.cl

===================================================================

diff --git a/intern/cycles/device/opencl/opencl.h b/intern/cycles/device/opencl/opencl.h
index a2c0e53b3e7..766b9e4bf1a 100644
--- a/intern/cycles/device/opencl/opencl.h
+++ b/intern/cycles/device/opencl/opencl.h
@@ -325,7 +325,11 @@ public:
 		map<ustring, cl_kernel> kernels;
 	};
 
-	OpenCLProgram base_program, denoising_program;
+	OpenCLProgram base_program;
+	OpenCLProgram bake_program;
+	OpenCLProgram displace_program;
+	OpenCLProgram background_program;
+	OpenCLProgram denoising_program;
 
 	typedef map<string, device_vector<uchar>*> ConstMemMap;
 	typedef map<string, device_ptr> MemMap;
@@ -571,7 +575,7 @@ protected:
 	        ustring key,
 	        thread_scoped_lock& cache_locker);
 
-	virtual string build_options_for_base_program(
+	virtual string build_options_for_bake_program(
 	        const DeviceRequestedFeatures& /*requested_features*/);
 
 private:
diff --git a/intern/cycles/device/opencl/opencl_base.cpp b/intern/cycles/device/opencl/opencl_base.cpp
index d8f9a242ac8..6a47a60e915 100644
--- a/intern/cycles/device/opencl/opencl_base.cpp
+++ b/intern/cycles/device/opencl/opencl_base.cpp
@@ -162,6 +162,9 @@ OpenCLDeviceBase::~OpenCLDeviceBase()
 	}
 
 	base_program.release();
+	bake_program.release();
+	displace_program.release();
+	background_program.release();
 	if(cqCommandQueue)
 		clReleaseCommandQueue(cqCommandQueue);
 	if(cxContext)
@@ -225,14 +228,20 @@ bool OpenCLDeviceBase::load_kernels(const DeviceRequestedFeatures& requested_fea
 	if(!opencl_version_check())
 		return false;
 
-	base_program = OpenCLProgram(this, "base", "kernel.cl", build_options_for_base_program(requested_features));
+	base_program = OpenCLProgram(this, "base", "kernel.cl", "");
 	base_program.add_kernel(ustring("convert_to_byte"));
 	base_program.add_kernel(ustring("convert_to_half_float"));
-	base_program.add_kernel(ustring("displace"));
-	base_program.add_kernel(ustring("background"));
-	base_program.add_kernel(ustring("bake"));
 	base_program.add_kernel(ustring("zero_buffer"));
 
+	bake_program = OpenCLProgram(this, "bake", "kernel_bake.cl", build_options_for_bake_program(requested_features));
+	bake_program.add_kernel(ustring("bake"));
+
+	displace_program = OpenCLProgram(this, "displace", "kernel_displace.cl", build_options_for_bake_program(requested_features));
+	displace_program.add_kernel(ustring("displace"));
+
+	background_program = OpenCLProgram(this, "background", "kernel_background.cl", build_options_for_bake_program(requested_features));
+	background_program.add_kernel(ustring("background"));
+
 	denoising_program = OpenCLProgram(this, "denoising", "filter.cl", "");
 	denoising_program.add_kernel(ustring("filter_divide_shadow"));
 	denoising_program.add_kernel(ustring("filter_get_feature"));
@@ -248,12 +257,15 @@ bool OpenCLDeviceBase::load_kernels(const DeviceRequestedFeatures& requested_fea
 	denoising_program.add_kernel(ustring("filter_finalize"));
 
 	vector<OpenCLProgram*> programs;
-	programs.push_back(&base_program);
-	programs.push_back(&denoising_program);
+	programs.push_back(&bake_program);
+	programs.push_back(&displace_program);
+	programs.push_back(&background_program);
 	/* Call actual class to fill the vector with its programs. */
 	if(!add_kernel_programs(requested_features, programs)) {
 		return false;
 	}
+	programs.push_back(&base_program);
+	programs.push_back(&denoising_program);
 
 	/* Parallel compilation of Cycles kernels, this launches multiple
 	 * processes to workaround OpenCL frameworks serializing the calls
@@ -1152,13 +1164,13 @@ void OpenCLDeviceBase::shader(DeviceTask& task)
 	cl_kernel kernel;
 
 	if(task.shader_eval_type >= SHADER_EVAL_BAKE) {
-		kernel = base_program(ustring("bake"));
+		kernel = bake_program(ustring("bake"));
 	}
 	else if(task.shader_eval_type == SHADER_EVAL_DISPLACE) {
-		kernel = base_program(ustring("displace"));
+		kernel = displace_program(ustring("displace"));
 	}
 	else {
-		kernel = base_program(ustring("background"));
+		kernel = background_program(ustring("background"));
 	}
 
 	cl_uint start_arg_index =
@@ -1385,7 +1397,7 @@ void OpenCLDeviceBase::store_cached_kernel(
 	                           cache_locker);
 }
 
-string OpenCLDeviceBase::build_options_for_base_program(
+string OpenCLDeviceBase::build_options_for_bake_program(
         const DeviceRequestedFeatures& requested_features)
 {
 	/* TODO(sergey): By default we compile all features, meaning
diff --git a/intern/cycles/device/opencl/opencl_split.cpp b/intern/cycles/device/opencl/opencl_split.cpp
index b759f69d3ab..c9d3eb2eb8c 100644
--- a/intern/cycles/device/opencl/opencl_split.cpp
+++ b/intern/cycles/device/opencl/opencl_split.cpp
@@ -327,7 +327,7 @@ public:
 protected:
 	/* ** Those guys are for workign around some compiler-specific bugs ** */
 
-	string build_options_for_base_program(
+	string build_options_for_bake_program(
 	        const DeviceRequestedFeatures& requested_features)
 	{
 		return requested_features.get_build_options();
diff --git a/intern/cycles/kernel/CMakeLists.txt b/intern/cycles/kernel/CMakeLists.txt
index f7041ee2783..0a2acd3f669 100644
--- a/intern/cycles/kernel/CMakeLists.txt
+++ b/intern/cycles/kernel/CMakeLists.txt
@@ -37,6 +37,9 @@ set(SRC_CUDA_KERNELS
 
 set(SRC_OPENCL_KERNELS
 	kernels/opencl/kernel.cl
+	kernels/opencl/kernel_bake.cl
+	kernels/opencl/kernel_displace.cl
+	kernels/opencl/kernel_background.cl
 	kernels/opencl/kernel_state_buffer_size.cl
 	kernels/opencl/kernel_split.cl
 	kernels/opencl/kernel_split_bundle.cl
diff --git a/intern/cycles/kernel/kernels/opencl/kernel.cl b/intern/cycles/kernel/kernels/opencl/kernel.cl
index de1f5088629..aa837e2ae87 100644
--- a/intern/cycles/kernel/kernels/opencl/kernel.cl
+++ b/intern/cycles/kernel/kernels/opencl/kernel.cl
@@ -81,78 +81,6 @@ __kernel void kernel_ocl_path_trace(
 
 #else  /* __COMPILE_ONLY_MEGAKERNEL__ */
 
-__kernel void kernel_ocl_displace(
-	ccl_constant KernelData *data,
-	ccl_global uint4 *input,
-	ccl_global float4 *output,
-
-	KERNEL_BUFFER_PARAMS,
-
-	int type, int sx, int sw, int offset, int sample)
-{
-	KernelGlobals kglobals, *kg = &kglobals;
-
-	kg->data = data;
-
-	kernel_set_buffer_pointers(kg, KERNEL_BUFFER_ARGS);
-	kernel_set_buffer_info(kg);
-
-	int x = sx + ccl_global_id(0);
-
-	if(x < sx + sw) {
-		kernel_displace_evaluate(kg, input, output, x);
-	}
-}
-__kernel void kernel_ocl_background(
-	ccl_constant KernelData *data,
-	ccl_global uint4 *input,
-	ccl_global float4 *output,
-
-	KERNEL_BUFFER_PARAMS,
-
-	int type, int sx, int sw, int offset, int sample)
-{
-	KernelGlobals kglobals, *kg = &kglobals;
-
-	kg->data = data;
-
-	kernel_set_buffer_pointers(kg, KERNEL_BUFFER_ARGS);
-	kernel_set_buffer_info(kg);
-
-	int x = sx + ccl_global_id(0);
-
-	if(x < sx + sw) {
-		kernel_background_evaluate(kg, input, output, x);
-	}
-}
-
-__kernel void kernel_ocl_bake(
-	ccl_constant KernelData *data,
-	ccl_global uint4 *input,
-	ccl_global float4 *output,
-
-	KERNEL_BUFFER_PARAMS,
-
-	int type, int filter, int sx, int sw, int offset, int sample)
-{
-	KernelGlobals kglobals, *kg = &kglobals;
-
-	kg->data = data;
-
-	kernel_set_buffer_pointers(kg, KERNEL_BUFFER_ARGS);
-	kernel_set_buffer_info(kg);
-
-	int x = sx + ccl_global_id(0);
-
-	if(x < sx + sw) {
-#ifdef __NO_BAKING__
-		output[x] = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
-#else
-		kernel_bake_evaluate(kg, input, output, (ShaderEvalType)type, filter, x, offset, sample);
-#endif
-	}
-}
-
 __kernel void kernel_ocl_convert_to_byte(
 	ccl_constant KernelData *data,
 	ccl_global uchar4 *rgba,
diff --git a/intern/cycles/kernel/kernels/opencl/kernel_background.cl b/intern/cycles/kernel/kernels/opencl/kernel_background.cl
new file mode 100644
index 00000000000..c7c709c0ad7
--- /dev/null
+++ b/intern/cycles/kernel/kernels/opencl/kernel_background.cl
@@ -0,0 +1,39 @@
+
+#include "kernel/kernel_compat_opencl.h"
+#include "kernel/kernel_math.h"
+#include "kernel/kernel_types.h"
+#include "kernel/kernel_globals.h"
+#include "kernel/kernel_color.h"
+#include "kernel/kernels/opencl/kernel_opencl_image.h"
+
+#include "kernel/kernel_path.h"
+#include "kernel/kernel_path_branched.h"
+
+#include "kernel/kernel_bake.h"
+
+__kernel void kernel_ocl_background(
+	ccl_constant KernelData *data,
+	ccl_global uint4 *input,
+	ccl_global float4 *output,
+
+	KERNEL_BUFFER_PARAMS,
+
+	int type, int sx, int sw, int offset, int sample)
+{
+	KernelGlobals kglobals, *kg = &kglobals;
+
+	kg->data = data;
+
+	kernel_set_buffer_pointers(kg, KERNEL_BUFFER_ARGS);
+	kernel_set_buffer_info(kg);
+
+	int x = sx + ccl_global_id(0);
+
+	if(x < sx + sw) {
+#ifdef __NO_BAKING__
+		output[x] = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
+#else
+		kernel_background_evaluate(kg, input, output, x);
+#endif
+	}
+}
diff --git a/intern/cycles/kernel/kernels/opencl/kernel_bake.cl b/intern/cycles/kernel/kernels/opencl/kernel_bake.cl
new file mode 100644
index 00000000000..041312b53cb
--- /dev/null
+++ b/intern/cycles/kernel/kernels/opencl/kernel_bake.cl
@@ -0,0 +1,38 @@
+#include "kernel/kernel_compat_opencl.h"
+#include "kernel/kernel_math.h"
+#include "kernel/kernel_types.h"
+#include "kernel/kernel_globals.h"
+#include "kernel/kernel_color.h"
+#include "kernel/kernels/opencl/kernel_opencl_image.h"
+
+#include "kernel/kernel_path.h"
+#include "kernel/kernel_path_branched.h"
+
+#include "kernel/kernel_bake.h"
+
+__kernel void kernel_ocl_bake(
+	ccl_constant KernelData *data,
+	ccl_global uint4 *input,
+	ccl_global float4 *output,
+
+	KERNEL_BUFFER_PARAMS,
+
+	int type, int filter, int sx, int sw, int offset, int sample)
+{
+	KernelGlobals kglobals, *kg = &kglobals;
+
+	kg->data = data;
+
+	kernel_set_buffer_pointers(kg, KERNEL_BUFFER_ARGS);
+	kernel_set_buffer_info(kg);
+
+	int x = sx + ccl_global_id(0);
+
+	if(x < sx + sw) {
+#ifdef __NO_BAKING__
+		output[x] = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
+#else
+		kernel_bake_evaluate(kg, input, output, (ShaderEvalType)type, filter, x, offset, sample);
+#endif
+	}
+}
diff --git a/intern/cycles/kernel/kernels/opencl/kernel_displace.cl b/intern/cycles/kernel/kernels/opencl/kernel_displace.cl
new file mode 100644
index 00000000000..288bfd5eadc
--- /dev/null
+++ b/intern/cycles/kernel/kernels/opencl/kernel_displace.cl
@@ -0,0 +1,40 @@
+
+#include "kernel/kernel_compat_opencl.h"
+

@@ Diff output truncated at 10240 characters. @@



More information about the Bf-blender-cvs mailing list