[Bf-blender-cvs] [4ce9785e015] master: Cycles: Support multithreaded compilation of kernels

Brecht Van Lommel noreply at git.blender.org
Fri Feb 15 08:50:29 CET 2019


Commit: 4ce9785e01587638ae26256fe23315e436c658ff
Author: Brecht Van Lommel
Date:   Fri Feb 15 08:18:38 2019 +0100
Branches: master
https://developer.blender.org/rB4ce9785e01587638ae26256fe23315e436c658ff

Cycles: Support multithreaded compilation of kernels

This patch implements a workaround to get the multithreaded compilation from D2231 working.
So far, it only works for Blender, not for Cycles Standalone. Also, I have only tested the Linux codepath in the helper function.
Depends on D2231.

Reviewers: brecht, dingto, sergey, juicyfruit, lukasstockner97

Reviewed By: brecht

Subscribers: Loner, jbakker, candreacchio, 3dLuver, LazyDodo, bliblubli

Differential Revision: https://developer.blender.org/D2264

===================================================================

M	intern/cycles/blender/CMakeLists.txt
M	intern/cycles/blender/blender_python.cpp
M	intern/cycles/device/device_intern.h
M	intern/cycles/device/opencl/opencl.h
M	intern/cycles/device/opencl/opencl_base.cpp
M	intern/cycles/device/opencl/opencl_mega.cpp
M	intern/cycles/device/opencl/opencl_split.cpp
M	intern/cycles/device/opencl/opencl_util.cpp
M	intern/cycles/kernel/CMakeLists.txt
A	intern/cycles/kernel/kernels/opencl/kernel_split_bundle.cl
M	intern/cycles/util/util_system.cpp
M	intern/cycles/util/util_system.h

===================================================================

diff --git a/intern/cycles/blender/CMakeLists.txt b/intern/cycles/blender/CMakeLists.txt
index 84e2690333e..f8720de366f 100644
--- a/intern/cycles/blender/CMakeLists.txt
+++ b/intern/cycles/blender/CMakeLists.txt
@@ -51,6 +51,10 @@ set(ADDON_FILES
 
 add_definitions(${GL_DEFINITIONS})
 
+if(WITH_CYCLES_DEVICE_OPENCL)
+    add_definitions(-DWITH_OPENCL)
+endif()
+
 if(WITH_CYCLES_NETWORK)
 	add_definitions(-DWITH_NETWORK)
 endif()
diff --git a/intern/cycles/blender/blender_python.cpp b/intern/cycles/blender/blender_python.cpp
index de702337f98..a720a60c05b 100644
--- a/intern/cycles/blender/blender_python.cpp
+++ b/intern/cycles/blender/blender_python.cpp
@@ -40,6 +40,10 @@
 #include <OSL/oslconfig.h>
 #endif
 
+#ifdef WITH_OPENCL
+#include "device/device_intern.h"
+#endif
+
 CCL_NAMESPACE_BEGIN
 
 namespace {
@@ -628,6 +632,31 @@ static PyObject *opencl_disable_func(PyObject * /*self*/, PyObject * /*value*/)
 	DebugFlags().opencl.device_type = DebugFlags::OpenCL::DEVICE_NONE;
 	Py_RETURN_NONE;
 }
+
+static PyObject *opencl_compile_func(PyObject * /*self*/, PyObject *args)
+{
+	PyObject *sequence = PySequence_Fast(args, "Arguments must be a sequence");
+	if(sequence == NULL) {
+		Py_RETURN_FALSE;
+	}
+
+	vector<string> parameters;
+	for(Py_ssize_t i = 0; i < PySequence_Fast_GET_SIZE(sequence); i++) {
+		PyObject *item = PySequence_Fast_GET_ITEM(sequence, i);
+		PyObject *item_as_string = PyObject_Str(item);
+		const char *parameter_string = PyUnicode_AsUTF8(item_as_string);
+		parameters.push_back(parameter_string);
+		Py_DECREF(item_as_string);
+	}
+	Py_DECREF(sequence);
+
+	if (device_opencl_compile_kernel(parameters)) {
+		Py_RETURN_TRUE;
+	}
+	else {
+		Py_RETURN_FALSE;
+	}
+}
 #endif
 
 static bool denoise_parse_filepaths(PyObject *pyfilepaths, vector<string>& filepaths)
@@ -903,6 +932,7 @@ static PyMethodDef methods[] = {
 	{"system_info", system_info_func, METH_NOARGS, ""},
 #ifdef WITH_OPENCL
 	{"opencl_disable", opencl_disable_func, METH_NOARGS, ""},
+ 	{"opencl_compile", opencl_compile_func, METH_VARARGS, ""},
 #endif
 
 	/* Standalone denoising */
diff --git a/intern/cycles/device/device_intern.h b/intern/cycles/device/device_intern.h
index 0b26057c3ba..94df1e009eb 100644
--- a/intern/cycles/device/device_intern.h
+++ b/intern/cycles/device/device_intern.h
@@ -24,6 +24,7 @@ class Device;
 Device *device_cpu_create(DeviceInfo& info, Stats &stats, Profiler &profiler, bool background);
 bool device_opencl_init();
 Device *device_opencl_create(DeviceInfo& info, Stats &stats, Profiler &profiler, bool background);
+bool device_opencl_compile_kernel(const vector<string>& parameters);
 bool device_cuda_init();
 Device *device_cuda_create(DeviceInfo& info, Stats &stats, Profiler &profiler, bool background);
 Device *device_network_create(DeviceInfo& info, Stats &stats, Profiler &profiler, const char *address);
diff --git a/intern/cycles/device/opencl/opencl.h b/intern/cycles/device/opencl/opencl.h
index 9b763167459..a2c0e53b3e7 100644
--- a/intern/cycles/device/opencl/opencl.h
+++ b/intern/cycles/device/opencl/opencl.h
@@ -268,6 +268,7 @@ public:
 	cl_platform_id cpPlatform;
 	cl_device_id cdDevice;
 	cl_int ciErr;
+	int device_num;
 
 	class OpenCLProgram {
 	public:
@@ -293,7 +294,15 @@ public:
 
 	private:
 		bool build_kernel(const string *debug_src);
+		/* Build the program by calling the own process.
+		 * This is required for multithreaded OpenCL compilation, since most Frameworks serialize
+		 * build calls internally if they come from the same process.
+		 * If that is not supported, this function just returns false.
+		 */
+		bool compile_separate(const string& clbin);
+		/* Build the program by calling OpenCL directly. */
 		bool compile_kernel(const string *debug_src);
+		/* Loading and saving the program from/to disk. */
 		bool load_binary(const string& clbin, const string *debug_src = NULL);
 		bool save_binary(const string& clbin);
 
@@ -342,12 +351,17 @@ public:
 	bool opencl_version_check();
 
 	string device_md5_hash(string kernel_custom_build_options = "");
-	bool load_kernels(const DeviceRequestedFeatures& requested_features);
+	virtual bool load_kernels(const DeviceRequestedFeatures& requested_features);
 
 	/* Has to be implemented by the real device classes.
 	 * The base device will then load all these programs. */
-	virtual bool load_kernels(const DeviceRequestedFeatures& requested_features,
-	                          vector<OpenCLProgram*> &programs) = 0;
+	virtual bool add_kernel_programs(const DeviceRequestedFeatures& requested_features,
+	                                 vector<OpenCLProgram*> &programs) = 0;
+
+	/* Get the name of the opencl program for the given kernel */
+	virtual const string get_opencl_program_name(bool single_program, const string& kernel_name) = 0;
+	/* Get the program file name to compile (*.cl) for the given kernel */
+	virtual const string get_opencl_program_filename(bool single_program, const string& kernel_name) = 0;
 
 	void mem_alloc(device_memory& mem);
 	void mem_copy_to(device_memory& mem);
diff --git a/intern/cycles/device/opencl/opencl_base.cpp b/intern/cycles/device/opencl/opencl_base.cpp
index 4417065bb7f..d8f9a242ac8 100644
--- a/intern/cycles/device/opencl/opencl_base.cpp
+++ b/intern/cycles/device/opencl/opencl_base.cpp
@@ -93,6 +93,7 @@ OpenCLDeviceBase::OpenCLDeviceBase(DeviceInfo& info, Stats &stats, Profiler &pro
 	}
 	assert(info.num < usable_devices.size());
 	OpenCLPlatformDevice& platform_device = usable_devices[info.num];
+	device_num = info.num;
 	cpPlatform = platform_device.platform_id;
 	cdDevice = platform_device.device_id;
 	platform_name = platform_device.platform_name;
@@ -143,7 +144,6 @@ OpenCLDeviceBase::OpenCLDeviceBase(DeviceInfo& info, Stats &stats, Profiler &pro
 	texture_info.resize(1);
 	memory_manager.alloc("texture_info", texture_info);
 
-	fprintf(stderr, "Device init success\n");
 	device_initialized = true;
 }
 
@@ -251,15 +251,13 @@ bool OpenCLDeviceBase::load_kernels(const DeviceRequestedFeatures& requested_fea
 	programs.push_back(&base_program);
 	programs.push_back(&denoising_program);
 	/* Call actual class to fill the vector with its programs. */
-	if(!load_kernels(requested_features, programs)) {
+	if(!add_kernel_programs(requested_features, programs)) {
 		return false;
 	}
 
-	/* Parallel compilation is supported by Cycles, but currently all OpenCL frameworks
-	 * serialize the calls internally, so it's not much use right now.
-	 * Note: When enabling parallel compilation, use_stdout in the OpenCLProgram constructor
-	 * should be set to false as well. */
-#if 0
+	/* Parallel compilation of Cycles kernels, this launches multiple
+	 * processes to workaround OpenCL frameworks serializing the calls
+	 * internally within a single process. */
 	TaskPool task_pool;
 	foreach(OpenCLProgram *program, programs) {
 		task_pool.push(function_bind(&OpenCLProgram::load, program));
@@ -273,14 +271,6 @@ bool OpenCLDeviceBase::load_kernels(const DeviceRequestedFeatures& requested_fea
 			return false;
 		}
 	}
-#else
-	foreach(OpenCLProgram *program, programs) {
-		program->load();
-		if(!program->is_loaded()) {
-			return false;
-		}
-	}
-#endif
 
 	return true;
 }
diff --git a/intern/cycles/device/opencl/opencl_mega.cpp b/intern/cycles/device/opencl/opencl_mega.cpp
index 0a7bf96fed7..c0b9e81d4d3 100644
--- a/intern/cycles/device/opencl/opencl_mega.cpp
+++ b/intern/cycles/device/opencl/opencl_mega.cpp
@@ -35,19 +35,35 @@ public:
 
 	OpenCLDeviceMegaKernel(DeviceInfo& info, Stats &stats, Profiler &profiler, bool background_)
 	: OpenCLDeviceBase(info, stats, profiler, background_),
-	  path_trace_program(this, "megakernel", "kernel.cl", "-D__COMPILE_ONLY_MEGAKERNEL__ ")
+	  path_trace_program(this,
+	                     get_opencl_program_name(false, "megakernel"),
+	                     get_opencl_program_filename(false, "megakernel"),
+	                     "-D__COMPILE_ONLY_MEGAKERNEL__ ")
 	{
 	}
 
-	virtual bool show_samples() const {
+
+	virtual bool show_samples() const
+	{
 		return true;
 	}
 
-	virtual BVHLayoutMask get_bvh_layout_mask() const {
+	virtual BVHLayoutMask get_bvh_layout_mask() const
+	{
 		return BVH_LAYOUT_BVH2;
 	}
 
-	virtual bool load_kernels(const DeviceRequestedFeatures& /*requested_features*/,
+	const string get_opencl_program_name(bool /*single_program*/, const string& kernel_name)
+	{
+		return kernel_name;
+	}
+
+	const string get_opencl_program_filename(bool /*single_program*/, const string& /*kernel_name*/)
+	{
+		return "kernel.cl";
+	}
+
+	virtual bool add_kernel_programs(const DeviceRequestedFeatures& /*requested_features*/,
 	                          vector<OpenCLProgram*> &programs)
 	{
 		path_trace_program.add_kernel(ustring("path_trace"));
diff --git a/intern/cycles/device/opencl/opencl_split.cpp b/intern/cycles/device/opencl/opencl_split.cpp
index 5a2555f9f80..b759f69d3ab 100644
--- a/intern/cycles/device/opencl/opencl_split.cpp
+++ b/intern/cycles/device/opencl/opencl_split.cpp
@@ -79,6 +79,27 @@ public:
 	OpenCLProgram program_data_init;
 	OpenCLProgram program_state_buffer_size;
 
+	OpenCLProgram program_split;
+
+	OpenCLProgram program_path_init;
+	OpenCLProgram program_scene_intersect;
+	OpenCLProgram program_lamp_emission;
+	OpenCLProgram program_do_volume;
+	OpenCLProgram program_queue_enqueue;
+	OpenCLProgram program_indirect_background;
+	OpenCLProgram program_shader_setup;
+	OpenCLProgram program_shader_sort;
+	OpenCLProgram program_shader_eval;
+	OpenCLProgram program_holdout_emission_blurring_pathtermination_ao;
+	OpenCLProgram program_subsurface_scatter;
+	OpenCLProgram program_direct_lighting;
+	OpenCLProgram program_shadow_blocked_ao;
+	OpenCLProgram program_shadow_blocked_dl;
+	OpenCLProgram program_enqueue_inactive;
+	OpenCLProgram program_next_iteration_setup;
+	OpenCLProgram program_indirect_subsurface;
+	OpenCLProgram program_buffer_update;
+
 	OpenCLDeviceSplitKernel(DeviceInfo& info, Stats &stats, Profiler &profiler, bool background_);
 
 	~OpenCLDeviceSplitKernel()
@@ -99,26 +120,150 @@ public:
 		return BVH_LAYOUT_BVH2;
 	}
 
-	virtual bool load_kernels(const DeviceRequestedFeatures& requested_features,
+	virtual bool load_kernels(const DeviceRequestedFeatures& requested_featur

@@ Diff output truncated at 10240 characters. @@



More information about the Bf-blender-cvs mailing list