[Bf-blender-cvs] [4ce9785e015] master: Cycles: Support multithreaded compilation of kernels
Brecht Van Lommel
noreply at git.blender.org
Fri Feb 15 08:50:29 CET 2019
Commit: 4ce9785e01587638ae26256fe23315e436c658ff
Author: Brecht Van Lommel
Date: Fri Feb 15 08:18:38 2019 +0100
Branches: master
https://developer.blender.org/rB4ce9785e01587638ae26256fe23315e436c658ff
Cycles: Support multithreaded compilation of kernels
This patch implements a workaround to get the multithreaded compilation from D2231 working.
So far, it only works for Blender, not for Cycles Standalone. Also, I have only tested the Linux codepath in the helper function.
Depends on D2231.
Reviewers: brecht, dingto, sergey, juicyfruit, lukasstockner97
Reviewed By: brecht
Subscribers: Loner, jbakker, candreacchio, 3dLuver, LazyDodo, bliblubli
Differential Revision: https://developer.blender.org/D2264
===================================================================
M intern/cycles/blender/CMakeLists.txt
M intern/cycles/blender/blender_python.cpp
M intern/cycles/device/device_intern.h
M intern/cycles/device/opencl/opencl.h
M intern/cycles/device/opencl/opencl_base.cpp
M intern/cycles/device/opencl/opencl_mega.cpp
M intern/cycles/device/opencl/opencl_split.cpp
M intern/cycles/device/opencl/opencl_util.cpp
M intern/cycles/kernel/CMakeLists.txt
A intern/cycles/kernel/kernels/opencl/kernel_split_bundle.cl
M intern/cycles/util/util_system.cpp
M intern/cycles/util/util_system.h
===================================================================
diff --git a/intern/cycles/blender/CMakeLists.txt b/intern/cycles/blender/CMakeLists.txt
index 84e2690333e..f8720de366f 100644
--- a/intern/cycles/blender/CMakeLists.txt
+++ b/intern/cycles/blender/CMakeLists.txt
@@ -51,6 +51,10 @@ set(ADDON_FILES
add_definitions(${GL_DEFINITIONS})
+if(WITH_CYCLES_DEVICE_OPENCL)
+ add_definitions(-DWITH_OPENCL)
+endif()
+
if(WITH_CYCLES_NETWORK)
add_definitions(-DWITH_NETWORK)
endif()
diff --git a/intern/cycles/blender/blender_python.cpp b/intern/cycles/blender/blender_python.cpp
index de702337f98..a720a60c05b 100644
--- a/intern/cycles/blender/blender_python.cpp
+++ b/intern/cycles/blender/blender_python.cpp
@@ -40,6 +40,10 @@
#include <OSL/oslconfig.h>
#endif
+#ifdef WITH_OPENCL
+#include "device/device_intern.h"
+#endif
+
CCL_NAMESPACE_BEGIN
namespace {
@@ -628,6 +632,31 @@ static PyObject *opencl_disable_func(PyObject * /*self*/, PyObject * /*value*/)
DebugFlags().opencl.device_type = DebugFlags::OpenCL::DEVICE_NONE;
Py_RETURN_NONE;
}
+
+static PyObject *opencl_compile_func(PyObject * /*self*/, PyObject *args)
+{
+ PyObject *sequence = PySequence_Fast(args, "Arguments must be a sequence");
+ if(sequence == NULL) {
+ Py_RETURN_FALSE;
+ }
+
+ vector<string> parameters;
+ for(Py_ssize_t i = 0; i < PySequence_Fast_GET_SIZE(sequence); i++) {
+ PyObject *item = PySequence_Fast_GET_ITEM(sequence, i);
+ PyObject *item_as_string = PyObject_Str(item);
+ const char *parameter_string = PyUnicode_AsUTF8(item_as_string);
+ parameters.push_back(parameter_string);
+ Py_DECREF(item_as_string);
+ }
+ Py_DECREF(sequence);
+
+ if (device_opencl_compile_kernel(parameters)) {
+ Py_RETURN_TRUE;
+ }
+ else {
+ Py_RETURN_FALSE;
+ }
+}
#endif
static bool denoise_parse_filepaths(PyObject *pyfilepaths, vector<string>& filepaths)
@@ -903,6 +932,7 @@ static PyMethodDef methods[] = {
{"system_info", system_info_func, METH_NOARGS, ""},
#ifdef WITH_OPENCL
{"opencl_disable", opencl_disable_func, METH_NOARGS, ""},
+ {"opencl_compile", opencl_compile_func, METH_VARARGS, ""},
#endif
/* Standalone denoising */
diff --git a/intern/cycles/device/device_intern.h b/intern/cycles/device/device_intern.h
index 0b26057c3ba..94df1e009eb 100644
--- a/intern/cycles/device/device_intern.h
+++ b/intern/cycles/device/device_intern.h
@@ -24,6 +24,7 @@ class Device;
Device *device_cpu_create(DeviceInfo& info, Stats &stats, Profiler &profiler, bool background);
bool device_opencl_init();
Device *device_opencl_create(DeviceInfo& info, Stats &stats, Profiler &profiler, bool background);
+bool device_opencl_compile_kernel(const vector<string>& parameters);
bool device_cuda_init();
Device *device_cuda_create(DeviceInfo& info, Stats &stats, Profiler &profiler, bool background);
Device *device_network_create(DeviceInfo& info, Stats &stats, Profiler &profiler, const char *address);
diff --git a/intern/cycles/device/opencl/opencl.h b/intern/cycles/device/opencl/opencl.h
index 9b763167459..a2c0e53b3e7 100644
--- a/intern/cycles/device/opencl/opencl.h
+++ b/intern/cycles/device/opencl/opencl.h
@@ -268,6 +268,7 @@ public:
cl_platform_id cpPlatform;
cl_device_id cdDevice;
cl_int ciErr;
+ int device_num;
class OpenCLProgram {
public:
@@ -293,7 +294,15 @@ public:
private:
bool build_kernel(const string *debug_src);
+ /* Build the program by calling the own process.
+ * This is required for multithreaded OpenCL compilation, since most Frameworks serialize
+ * build calls internally if they come from the same process.
+ * If that is not supported, this function just returns false.
+ */
+ bool compile_separate(const string& clbin);
+ /* Build the program by calling OpenCL directly. */
bool compile_kernel(const string *debug_src);
+ /* Loading and saving the program from/to disk. */
bool load_binary(const string& clbin, const string *debug_src = NULL);
bool save_binary(const string& clbin);
@@ -342,12 +351,17 @@ public:
bool opencl_version_check();
string device_md5_hash(string kernel_custom_build_options = "");
- bool load_kernels(const DeviceRequestedFeatures& requested_features);
+ virtual bool load_kernels(const DeviceRequestedFeatures& requested_features);
/* Has to be implemented by the real device classes.
* The base device will then load all these programs. */
- virtual bool load_kernels(const DeviceRequestedFeatures& requested_features,
- vector<OpenCLProgram*> &programs) = 0;
+ virtual bool add_kernel_programs(const DeviceRequestedFeatures& requested_features,
+ vector<OpenCLProgram*> &programs) = 0;
+
+ /* Get the name of the opencl program for the given kernel */
+ virtual const string get_opencl_program_name(bool single_program, const string& kernel_name) = 0;
+ /* Get the program file name to compile (*.cl) for the given kernel */
+ virtual const string get_opencl_program_filename(bool single_program, const string& kernel_name) = 0;
void mem_alloc(device_memory& mem);
void mem_copy_to(device_memory& mem);
diff --git a/intern/cycles/device/opencl/opencl_base.cpp b/intern/cycles/device/opencl/opencl_base.cpp
index 4417065bb7f..d8f9a242ac8 100644
--- a/intern/cycles/device/opencl/opencl_base.cpp
+++ b/intern/cycles/device/opencl/opencl_base.cpp
@@ -93,6 +93,7 @@ OpenCLDeviceBase::OpenCLDeviceBase(DeviceInfo& info, Stats &stats, Profiler &pro
}
assert(info.num < usable_devices.size());
OpenCLPlatformDevice& platform_device = usable_devices[info.num];
+ device_num = info.num;
cpPlatform = platform_device.platform_id;
cdDevice = platform_device.device_id;
platform_name = platform_device.platform_name;
@@ -143,7 +144,6 @@ OpenCLDeviceBase::OpenCLDeviceBase(DeviceInfo& info, Stats &stats, Profiler &pro
texture_info.resize(1);
memory_manager.alloc("texture_info", texture_info);
- fprintf(stderr, "Device init success\n");
device_initialized = true;
}
@@ -251,15 +251,13 @@ bool OpenCLDeviceBase::load_kernels(const DeviceRequestedFeatures& requested_fea
programs.push_back(&base_program);
programs.push_back(&denoising_program);
/* Call actual class to fill the vector with its programs. */
- if(!load_kernels(requested_features, programs)) {
+ if(!add_kernel_programs(requested_features, programs)) {
return false;
}
- /* Parallel compilation is supported by Cycles, but currently all OpenCL frameworks
- * serialize the calls internally, so it's not much use right now.
- * Note: When enabling parallel compilation, use_stdout in the OpenCLProgram constructor
- * should be set to false as well. */
-#if 0
+ /* Parallel compilation of Cycles kernels, this launches multiple
+ * processes to workaround OpenCL frameworks serializing the calls
+ * internally within a single process. */
TaskPool task_pool;
foreach(OpenCLProgram *program, programs) {
task_pool.push(function_bind(&OpenCLProgram::load, program));
@@ -273,14 +271,6 @@ bool OpenCLDeviceBase::load_kernels(const DeviceRequestedFeatures& requested_fea
return false;
}
}
-#else
- foreach(OpenCLProgram *program, programs) {
- program->load();
- if(!program->is_loaded()) {
- return false;
- }
- }
-#endif
return true;
}
diff --git a/intern/cycles/device/opencl/opencl_mega.cpp b/intern/cycles/device/opencl/opencl_mega.cpp
index 0a7bf96fed7..c0b9e81d4d3 100644
--- a/intern/cycles/device/opencl/opencl_mega.cpp
+++ b/intern/cycles/device/opencl/opencl_mega.cpp
@@ -35,19 +35,35 @@ public:
OpenCLDeviceMegaKernel(DeviceInfo& info, Stats &stats, Profiler &profiler, bool background_)
: OpenCLDeviceBase(info, stats, profiler, background_),
- path_trace_program(this, "megakernel", "kernel.cl", "-D__COMPILE_ONLY_MEGAKERNEL__ ")
+ path_trace_program(this,
+ get_opencl_program_name(false, "megakernel"),
+ get_opencl_program_filename(false, "megakernel"),
+ "-D__COMPILE_ONLY_MEGAKERNEL__ ")
{
}
- virtual bool show_samples() const {
+
+ virtual bool show_samples() const
+ {
return true;
}
- virtual BVHLayoutMask get_bvh_layout_mask() const {
+ virtual BVHLayoutMask get_bvh_layout_mask() const
+ {
return BVH_LAYOUT_BVH2;
}
- virtual bool load_kernels(const DeviceRequestedFeatures& /*requested_features*/,
+ const string get_opencl_program_name(bool /*single_program*/, const string& kernel_name)
+ {
+ return kernel_name;
+ }
+
+ const string get_opencl_program_filename(bool /*single_program*/, const string& /*kernel_name*/)
+ {
+ return "kernel.cl";
+ }
+
+ virtual bool add_kernel_programs(const DeviceRequestedFeatures& /*requested_features*/,
vector<OpenCLProgram*> &programs)
{
path_trace_program.add_kernel(ustring("path_trace"));
diff --git a/intern/cycles/device/opencl/opencl_split.cpp b/intern/cycles/device/opencl/opencl_split.cpp
index 5a2555f9f80..b759f69d3ab 100644
--- a/intern/cycles/device/opencl/opencl_split.cpp
+++ b/intern/cycles/device/opencl/opencl_split.cpp
@@ -79,6 +79,27 @@ public:
OpenCLProgram program_data_init;
OpenCLProgram program_state_buffer_size;
+ OpenCLProgram program_split;
+
+ OpenCLProgram program_path_init;
+ OpenCLProgram program_scene_intersect;
+ OpenCLProgram program_lamp_emission;
+ OpenCLProgram program_do_volume;
+ OpenCLProgram program_queue_enqueue;
+ OpenCLProgram program_indirect_background;
+ OpenCLProgram program_shader_setup;
+ OpenCLProgram program_shader_sort;
+ OpenCLProgram program_shader_eval;
+ OpenCLProgram program_holdout_emission_blurring_pathtermination_ao;
+ OpenCLProgram program_subsurface_scatter;
+ OpenCLProgram program_direct_lighting;
+ OpenCLProgram program_shadow_blocked_ao;
+ OpenCLProgram program_shadow_blocked_dl;
+ OpenCLProgram program_enqueue_inactive;
+ OpenCLProgram program_next_iteration_setup;
+ OpenCLProgram program_indirect_subsurface;
+ OpenCLProgram program_buffer_update;
+
OpenCLDeviceSplitKernel(DeviceInfo& info, Stats &stats, Profiler &profiler, bool background_);
~OpenCLDeviceSplitKernel()
@@ -99,26 +120,150 @@ public:
return BVH_LAYOUT_BVH2;
}
- virtual bool load_kernels(const DeviceRequestedFeatures& requested_features,
+ virtual bool load_kernels(const DeviceRequestedFeatures& requested_featur
@@ Diff output truncated at 10240 characters. @@
More information about the Bf-blender-cvs
mailing list