[Bf-blender-cvs] [70c659b] cycles_split_kernel: Cycles: Add SplitKernelFunction with OpenCL implementation
Mai Lavelle
noreply at git.blender.org
Tue Oct 18 17:15:55 CEST 2016
Commit: 70c659b77e11ac339775fdc4f5dc30947d7f9815
Author: Mai Lavelle
Date: Tue Oct 18 16:51:02 2016 +0200
Branches: cycles_split_kernel
https://developer.blender.org/rB70c659b77e11ac339775fdc4f5dc30947d7f9815
Cycles: Add SplitKernelFunction with OpenCL implementation
SplitKernelFunction can represent a split kernel function for any device its
been implemented for. Currently this is only for OpenCL to simplify the
enqueueing of the split kernels and move another step closer to a split
kernel that can run on any device.
===================================================================
M intern/cycles/device/device.h
M intern/cycles/device/opencl/opencl.h
M intern/cycles/device/opencl/opencl_mega.cpp
M intern/cycles/device/opencl/opencl_split.cpp
===================================================================
diff --git a/intern/cycles/device/device.h b/intern/cycles/device/device.h
index 014e5fc..f79678d 100644
--- a/intern/cycles/device/device.h
+++ b/intern/cycles/device/device.h
@@ -190,6 +190,28 @@ public:
std::ostream& operator <<(std::ostream &os,
const DeviceRequestedFeatures& requested_features);
+/* Types used for split kernel */
+
+class KernelDimensions {
+public:
+ size_t global_size[2];
+ size_t local_size[2];
+
+ KernelDimensions(size_t global_size_[2], size_t local_size_[2])
+ {
+ memcpy(global_size, global_size_, 2*sizeof(size_t));
+ memcpy(local_size, local_size_, 2*sizeof(size_t));
+ }
+};
+
+class SplitKernelFunction {
+public:
+ virtual ~SplitKernelFunction() {}
+
+ /* enqueue the kernel, returns false if there is an error */
+ virtual bool enqueue(const KernelDimensions& dim, device_memory& kg, device_memory& data) = 0;
+};
+
/* Device */
struct DeviceDrawParams {
@@ -273,6 +295,18 @@ public:
const DeviceRequestedFeatures& /*requested_features*/)
{ return true; }
+ /* split kernel */
+ virtual bool enqueue_split_kernel_data_init()
+ {
+ assert(!"not implemented for this device");
+ return false;
+ }
+ virtual SplitKernelFunction* get_split_kernel_function(string /*kernel_name*/, const DeviceRequestedFeatures&)
+ {
+ assert(!"not implemented for this device");
+ return NULL;
+ }
+
/* tasks */
virtual int get_split_task_count(DeviceTask& task) = 0;
virtual void task_add(DeviceTask& task) = 0;
diff --git a/intern/cycles/device/opencl/opencl.h b/intern/cycles/device/opencl/opencl.h
index 83603b2..dc2a5b2 100644
--- a/intern/cycles/device/opencl/opencl.h
+++ b/intern/cycles/device/opencl/opencl.h
@@ -261,7 +261,7 @@ public:
/* Has to be implemented by the real device classes.
* The base device will then load all these programs. */
- virtual void load_kernels(const DeviceRequestedFeatures& requested_features,
+ virtual bool load_kernels(const DeviceRequestedFeatures& requested_features,
vector<OpenCLProgram*> &programs) = 0;
void mem_alloc(device_memory& mem, MemoryType type);
@@ -417,6 +417,8 @@ protected:
virtual string build_options_for_base_program(
const DeviceRequestedFeatures& /*requested_features*/);
+
+ friend class OpenCLSplitKernelFunction;
};
Device *opencl_create_mega_device(DeviceInfo& info, Stats& stats, bool background);
diff --git a/intern/cycles/device/opencl/opencl_mega.cpp b/intern/cycles/device/opencl/opencl_mega.cpp
index 369c086..65feba8 100644
--- a/intern/cycles/device/opencl/opencl_mega.cpp
+++ b/intern/cycles/device/opencl/opencl_mega.cpp
@@ -39,11 +39,12 @@ public:
{
}
- virtual void load_kernels(const DeviceRequestedFeatures& /*requested_features*/,
+ virtual bool load_kernels(const DeviceRequestedFeatures& /*requested_features*/,
vector<OpenCLProgram*> &programs)
{
path_trace_program.add_kernel(ustring("path_trace"));
programs.push_back(&path_trace_program);
+ return true;
}
~OpenCLDeviceMegaKernel()
diff --git a/intern/cycles/device/opencl/opencl_split.cpp b/intern/cycles/device/opencl/opencl_split.cpp
index e1e1f54..7d8dd95 100644
--- a/intern/cycles/device/opencl/opencl_split.cpp
+++ b/intern/cycles/device/opencl/opencl_split.cpp
@@ -85,22 +85,57 @@ public:
int buffer_rng_state_stride;
};
+class OpenCLSplitKernelFunction : public SplitKernelFunction {
+public:
+ OpenCLDeviceBase* device;
+ OpenCLDeviceBase::OpenCLProgram program;
+
+ OpenCLSplitKernelFunction(OpenCLDeviceBase* device) : device(device) {}
+ ~OpenCLSplitKernelFunction() { program.release(); }
+
+ virtual bool enqueue(const KernelDimensions& dim, device_memory& kg, device_memory& data)
+ {
+ device->kernel_set_args(program(), 0, kg, data);
+
+ device->ciErr = clEnqueueNDRangeKernel(device->cqCommandQueue,
+ program(),
+ 2,
+ NULL,
+ dim.global_size,
+ dim.local_size,
+ 0,
+ NULL,
+ NULL);
+
+ device->opencl_assert_err(device->ciErr, "clEnqueueNDRangeKernel");
+
+ if(device->ciErr != CL_SUCCESS) {
+ string message = string_printf("OpenCL error: %s in clEnqueueNDRangeKernel()",
+ clewErrorString(device->ciErr));
+ device->opencl_error(message);
+ return false;
+ }
+
+ return true;
+ }
+};
+
/* OpenCLDeviceSplitKernel's declaration/definition. */
class OpenCLDeviceSplitKernel : public OpenCLDeviceBase
{
public:
/* Kernel declaration. */
OpenCLProgram program_data_init;
- OpenCLProgram program_scene_intersect;
- OpenCLProgram program_lamp_emission;
- OpenCLProgram program_queue_enqueue;
- OpenCLProgram program_background_buffer_update;
- OpenCLProgram program_shader_eval;
- OpenCLProgram program_holdout_emission_blurring_pathtermination_ao;
- OpenCLProgram program_direct_lighting;
- OpenCLProgram program_shadow_blocked;
- OpenCLProgram program_next_iteration_setup;
- OpenCLProgram program_sum_all_radiance;
+ SplitKernelFunction* program_scene_intersect;
+ SplitKernelFunction* program_lamp_emission;
+ SplitKernelFunction* program_queue_enqueue;
+ SplitKernelFunction* program_background_buffer_update;
+ SplitKernelFunction* program_shader_eval;
+ SplitKernelFunction* program_holdout_emission_blurring_pathtermination_ao;
+ SplitKernelFunction* program_direct_lighting;
+ SplitKernelFunction* program_shadow_blocked;
+ SplitKernelFunction* program_next_iteration_setup;
+ SplitKernelFunction* program_sum_all_radiance;
/* Global memory variables [porting]; These memory is used for
* co-operation between different kernels; Data written by one
@@ -206,8 +241,7 @@ public:
return sizeof(KernelGlobals);
}
- virtual void load_kernels(const DeviceRequestedFeatures& requested_features,
- vector<OpenCLProgram*> &programs)
+ string get_build_options(const DeviceRequestedFeatures& requested_features)
{
string build_options = "-D__SPLIT_KERNEL__ ";
#ifdef __WORK_STEALING__
@@ -227,15 +261,25 @@ public:
build_options += " -D__COMPUTE_DEVICE_GPU__";
}
-#define GLUE(a, b) a ## b
+ return build_options;
+ }
+
+ virtual bool load_kernels(const DeviceRequestedFeatures& requested_features,
+ vector<OpenCLProgram*> &programs)
+ {
+ program_data_init = OpenCLProgram(this,
+ "split_data_init",
+ "kernel_data_init.cl",
+ get_build_options(requested_features));
+ program_data_init.add_kernel(ustring("path_trace_data_init"));
+ programs.push_back(&program_data_init);
+
#define LOAD_KERNEL(name) \
- do { \
- GLUE(program_, name) = OpenCLProgram(this, "split_" #name, "kernel_" #name ".cl", build_options); \
- GLUE(program_, name).add_kernel(ustring("path_trace_" #name)); \
- programs.push_back(&GLUE(program_, name)); \
- } while(false)
+ program_##name = get_split_kernel_function(#name, requested_features); \
+ if(!program_##name) { \
+ return false;\
+ }
- LOAD_KERNEL(data_init);
LOAD_KERNEL(scene_intersect);
LOAD_KERNEL(lamp_emission);
LOAD_KERNEL(queue_enqueue);
@@ -247,10 +291,31 @@ public:
LOAD_KERNEL(next_iteration_setup);
LOAD_KERNEL(sum_all_radiance);
-#undef FIND_KERNEL
-#undef GLUE
+#undef LOAD_KERNEL
current_max_closure = requested_features.max_closure;
+
+ return true;
+ }
+
+ virtual SplitKernelFunction* get_split_kernel_function(string kernel_name,
+ const DeviceRequestedFeatures& requested_features)
+ {
+ OpenCLSplitKernelFunction* kernel = new OpenCLSplitKernelFunction(this);
+
+ kernel->program = OpenCLProgram(this,
+ "split_" + kernel_name,
+ "kernel_" + kernel_name + ".cl",
+ get_build_options(requested_features));
+ kernel->program.add_kernel(ustring("path_trace_" + kernel_name));
+ kernel->program.load();
+
+ if(!kernel->program.is_loaded()) {
+ delete kernel;
+ return NULL;
+ }
+
+ return kernel;
}
~OpenCLDeviceSplitKernel()
@@ -259,16 +324,16 @@ public:
/* Release kernels */
program_data_init.release();
- program_scene_intersect.release();
- program_lamp_emission.release();
- program_queue_enqueue.release();
- program_background_buffer_update.release();
- program_shader_eval.release();
- program_holdout_emission_blurring_pathtermination_ao.release();
- program_direct_lighting.release();
- program_shadow_blocked.release();
- program_next_iteration_setup.release();
- program_sum_all_radiance.release();
+ delete program_scene_intersect;
+ delete program_lamp_emission;
+ delete program_queue_enqueue;
+ delete program_background_buffer_update;
+ delete program_shader_eval;
+ delete program_holdout_emission_blurring_pathtermination_ao;
+ delete program_direct_lighting;
+ delete program_shadow_blocked;
+ delete program_next_iteration_setup;
+ delete program_sum_all_radiance;
/* Release global memory */
mem_free(kgbuffer);
@@ -418,51 +483,37 @@ public:
rtile.buffer_rng_state_stride,
d_buffer);
-#define KERNEL_SET_ARGS(name) kernel_set_args(program_##name(), 0, kgbuffer, d_data);
- KERNEL_SET_ARGS(scene_intersect);
- KERNEL_SET_ARGS(lamp_emission);
- KERNEL_SET_ARGS(queue_enqueue);
- KERNEL_SET_ARGS(background_buffer_update);
- KERNEL_SET_ARGS(shader_eval);
- KERNEL_SET_ARGS(holdout_emission_blurring_pathtermination_ao);
- KERNEL_SET_ARGS(direct_lighting);
- KERNEL_SET_ARGS(shadow_blocked);
- KERNEL_SET_ARGS(next_iteration_setup);
- KERNEL_SET_ARGS(sum_all_radiance);
-#undef KERNEL_SET_ARGS
-
- /* Macro for Enqueuing split kernels. */
-#define GLUE(a, b) a ## b
-#define ENQUEUE_SPLIT_KERNEL(kernelName, globalSize, localSize) \
- { \
- ciErr = clEnqueueNDRangeKernel(cqCommandQueue, \
- GLUE(program_, \
-
@@ Diff output truncated at 10240 characters. @@
More information about the Bf-blender-cvs
mailing list