[Bf-blender-cvs] [70c659b] cycles_split_kernel: Cycles: Add SplitKernelFunction with OpenCL implementation

Mai Lavelle noreply at git.blender.org
Tue Oct 18 17:15:55 CEST 2016


Commit: 70c659b77e11ac339775fdc4f5dc30947d7f9815
Author: Mai Lavelle
Date:   Tue Oct 18 16:51:02 2016 +0200
Branches: cycles_split_kernel
https://developer.blender.org/rB70c659b77e11ac339775fdc4f5dc30947d7f9815

Cycles: Add SplitKernelFunction with OpenCL implementation

SplitKernelFunction can represent a split kernel function for any device its
been implemented for. Currently this is only for OpenCL to simplify the
enqueueing of the split kernels and move another step closer to a split
kernel that can run on any device.

===================================================================

M	intern/cycles/device/device.h
M	intern/cycles/device/opencl/opencl.h
M	intern/cycles/device/opencl/opencl_mega.cpp
M	intern/cycles/device/opencl/opencl_split.cpp

===================================================================

diff --git a/intern/cycles/device/device.h b/intern/cycles/device/device.h
index 014e5fc..f79678d 100644
--- a/intern/cycles/device/device.h
+++ b/intern/cycles/device/device.h
@@ -190,6 +190,28 @@ public:
 std::ostream& operator <<(std::ostream &os,
                           const DeviceRequestedFeatures& requested_features);
 
+/* Types used for split kernel */
+
+class KernelDimensions {
+public:
+	size_t global_size[2];
+	size_t local_size[2];
+
+	KernelDimensions(size_t global_size_[2], size_t local_size_[2])
+	{
+		memcpy(global_size, global_size_, 2*sizeof(size_t));
+		memcpy(local_size, local_size_, 2*sizeof(size_t));
+	}
+};
+
+class SplitKernelFunction {
+public:
+	virtual ~SplitKernelFunction() {}
+
+	/* enqueue the kernel, returns false if there is an error */
+	virtual bool enqueue(const KernelDimensions& dim, device_memory& kg, device_memory& data) = 0;
+};
+
 /* Device */
 
 struct DeviceDrawParams {
@@ -273,6 +295,18 @@ public:
 	        const DeviceRequestedFeatures& /*requested_features*/)
 	{ return true; }
 
+	/* split kernel */
+	virtual bool enqueue_split_kernel_data_init()
+	{
+		assert(!"not implemented for this device");
+		return false;
+	}
+	virtual SplitKernelFunction* get_split_kernel_function(string /*kernel_name*/, const DeviceRequestedFeatures&)
+	{
+		assert(!"not implemented for this device");
+		return NULL;
+	}
+
 	/* tasks */
 	virtual int get_split_task_count(DeviceTask& task) = 0;
 	virtual void task_add(DeviceTask& task) = 0;
diff --git a/intern/cycles/device/opencl/opencl.h b/intern/cycles/device/opencl/opencl.h
index 83603b2..dc2a5b2 100644
--- a/intern/cycles/device/opencl/opencl.h
+++ b/intern/cycles/device/opencl/opencl.h
@@ -261,7 +261,7 @@ public:
 
 	/* Has to be implemented by the real device classes.
 	 * The base device will then load all these programs. */
-	virtual void load_kernels(const DeviceRequestedFeatures& requested_features,
+	virtual bool load_kernels(const DeviceRequestedFeatures& requested_features,
 	                          vector<OpenCLProgram*> &programs) = 0;
 
 	void mem_alloc(device_memory& mem, MemoryType type);
@@ -417,6 +417,8 @@ protected:
 
 	virtual string build_options_for_base_program(
 	        const DeviceRequestedFeatures& /*requested_features*/);
+
+	friend class OpenCLSplitKernelFunction;
 };
 
 Device *opencl_create_mega_device(DeviceInfo& info, Stats& stats, bool background);
diff --git a/intern/cycles/device/opencl/opencl_mega.cpp b/intern/cycles/device/opencl/opencl_mega.cpp
index 369c086..65feba8 100644
--- a/intern/cycles/device/opencl/opencl_mega.cpp
+++ b/intern/cycles/device/opencl/opencl_mega.cpp
@@ -39,11 +39,12 @@ public:
 	{
 	}
 
-	virtual void load_kernels(const DeviceRequestedFeatures& /*requested_features*/,
+	virtual bool load_kernels(const DeviceRequestedFeatures& /*requested_features*/,
 	                          vector<OpenCLProgram*> &programs)
 	{
 		path_trace_program.add_kernel(ustring("path_trace"));
 		programs.push_back(&path_trace_program);
+		return true;
 	}
 
 	~OpenCLDeviceMegaKernel()
diff --git a/intern/cycles/device/opencl/opencl_split.cpp b/intern/cycles/device/opencl/opencl_split.cpp
index e1e1f54..7d8dd95 100644
--- a/intern/cycles/device/opencl/opencl_split.cpp
+++ b/intern/cycles/device/opencl/opencl_split.cpp
@@ -85,22 +85,57 @@ public:
 	int buffer_rng_state_stride;
 };
 
+class OpenCLSplitKernelFunction : public SplitKernelFunction {
+public:
+	OpenCLDeviceBase* device;
+	OpenCLDeviceBase::OpenCLProgram program;
+
+	OpenCLSplitKernelFunction(OpenCLDeviceBase* device) : device(device) {}
+	~OpenCLSplitKernelFunction() { program.release(); }
+
+	virtual bool enqueue(const KernelDimensions& dim, device_memory& kg, device_memory& data)
+	{
+		device->kernel_set_args(program(), 0, kg, data);
+
+		device->ciErr = clEnqueueNDRangeKernel(device->cqCommandQueue,
+		                                       program(),
+		                                       2,
+		                                       NULL,
+		                                       dim.global_size,
+		                                       dim.local_size,
+		                                       0,
+		                                       NULL,
+		                                       NULL);
+
+		device->opencl_assert_err(device->ciErr, "clEnqueueNDRangeKernel");
+
+		if(device->ciErr != CL_SUCCESS) {
+			string message = string_printf("OpenCL error: %s in clEnqueueNDRangeKernel()",
+			                               clewErrorString(device->ciErr));
+			device->opencl_error(message);
+			return false;
+		}
+
+		return true;
+	}
+};
+
 /* OpenCLDeviceSplitKernel's declaration/definition. */
 class OpenCLDeviceSplitKernel : public OpenCLDeviceBase
 {
 public:
 	/* Kernel declaration. */
 	OpenCLProgram program_data_init;
-	OpenCLProgram program_scene_intersect;
-	OpenCLProgram program_lamp_emission;
-	OpenCLProgram program_queue_enqueue;
-	OpenCLProgram program_background_buffer_update;
-	OpenCLProgram program_shader_eval;
-	OpenCLProgram program_holdout_emission_blurring_pathtermination_ao;
-	OpenCLProgram program_direct_lighting;
-	OpenCLProgram program_shadow_blocked;
-	OpenCLProgram program_next_iteration_setup;
-	OpenCLProgram program_sum_all_radiance;
+	SplitKernelFunction* program_scene_intersect;
+	SplitKernelFunction* program_lamp_emission;
+	SplitKernelFunction* program_queue_enqueue;
+	SplitKernelFunction* program_background_buffer_update;
+	SplitKernelFunction* program_shader_eval;
+	SplitKernelFunction* program_holdout_emission_blurring_pathtermination_ao;
+	SplitKernelFunction* program_direct_lighting;
+	SplitKernelFunction* program_shadow_blocked;
+	SplitKernelFunction* program_next_iteration_setup;
+	SplitKernelFunction* program_sum_all_radiance;
 
 	/* Global memory variables [porting]; These memory is used for
 	 * co-operation between different kernels; Data written by one
@@ -206,8 +241,7 @@ public:
 		return sizeof(KernelGlobals);
 	}
 
-	virtual void load_kernels(const DeviceRequestedFeatures& requested_features,
-	                          vector<OpenCLProgram*> &programs)
+	string get_build_options(const DeviceRequestedFeatures& requested_features)
 	{
 		string build_options = "-D__SPLIT_KERNEL__ ";
 #ifdef __WORK_STEALING__
@@ -227,15 +261,25 @@ public:
 			build_options += " -D__COMPUTE_DEVICE_GPU__";
 		}
 
-#define GLUE(a, b) a ## b
+		return build_options;
+	}
+
+	virtual bool load_kernels(const DeviceRequestedFeatures& requested_features,
+	                          vector<OpenCLProgram*> &programs)
+	{
+		program_data_init = OpenCLProgram(this,
+		                                  "split_data_init",
+		                                  "kernel_data_init.cl",
+		                                  get_build_options(requested_features));
+		program_data_init.add_kernel(ustring("path_trace_data_init"));
+		programs.push_back(&program_data_init);
+
 #define LOAD_KERNEL(name) \
-	do { \
-		GLUE(program_, name) = OpenCLProgram(this, "split_" #name, "kernel_" #name ".cl", build_options); \
-		GLUE(program_, name).add_kernel(ustring("path_trace_" #name)); \
-		programs.push_back(&GLUE(program_, name)); \
-	} while(false)
+			program_##name = get_split_kernel_function(#name, requested_features); \
+			if(!program_##name) { \
+				return false;\
+			}
 
-		LOAD_KERNEL(data_init);
 		LOAD_KERNEL(scene_intersect);
 		LOAD_KERNEL(lamp_emission);
 		LOAD_KERNEL(queue_enqueue);
@@ -247,10 +291,31 @@ public:
 		LOAD_KERNEL(next_iteration_setup);
 		LOAD_KERNEL(sum_all_radiance);
 
-#undef FIND_KERNEL
-#undef GLUE
+#undef LOAD_KERNEL
 
 		current_max_closure = requested_features.max_closure;
+
+		return true;
+	}
+
+	virtual SplitKernelFunction* get_split_kernel_function(string kernel_name,
+	                                                       const DeviceRequestedFeatures& requested_features)
+	{
+		OpenCLSplitKernelFunction* kernel = new OpenCLSplitKernelFunction(this);
+
+		kernel->program = OpenCLProgram(this,
+		                                "split_" + kernel_name,
+		                                "kernel_" + kernel_name + ".cl",
+		                                get_build_options(requested_features));
+		kernel->program.add_kernel(ustring("path_trace_" + kernel_name));
+		kernel->program.load();
+
+		if(!kernel->program.is_loaded()) {
+			delete kernel;
+			return NULL;
+		}
+
+		return kernel;
 	}
 
 	~OpenCLDeviceSplitKernel()
@@ -259,16 +324,16 @@ public:
 
 		/* Release kernels */
 		program_data_init.release();
-		program_scene_intersect.release();
-		program_lamp_emission.release();
-		program_queue_enqueue.release();
-		program_background_buffer_update.release();
-		program_shader_eval.release();
-		program_holdout_emission_blurring_pathtermination_ao.release();
-		program_direct_lighting.release();
-		program_shadow_blocked.release();
-		program_next_iteration_setup.release();
-		program_sum_all_radiance.release();
+		delete program_scene_intersect;
+		delete program_lamp_emission;
+		delete program_queue_enqueue;
+		delete program_background_buffer_update;
+		delete program_shader_eval;
+		delete program_holdout_emission_blurring_pathtermination_ao;
+		delete program_direct_lighting;
+		delete program_shadow_blocked;
+		delete program_next_iteration_setup;
+		delete program_sum_all_radiance;
 
 		/* Release global memory */
 		mem_free(kgbuffer);
@@ -418,51 +483,37 @@ public:
 			                rtile.buffer_rng_state_stride,
 							d_buffer);
 
-#define KERNEL_SET_ARGS(name) kernel_set_args(program_##name(), 0, kgbuffer, d_data);
-		KERNEL_SET_ARGS(scene_intersect);
-		KERNEL_SET_ARGS(lamp_emission);
-		KERNEL_SET_ARGS(queue_enqueue);
-		KERNEL_SET_ARGS(background_buffer_update);
-		KERNEL_SET_ARGS(shader_eval);
-		KERNEL_SET_ARGS(holdout_emission_blurring_pathtermination_ao);
-		KERNEL_SET_ARGS(direct_lighting);
-		KERNEL_SET_ARGS(shadow_blocked);
-		KERNEL_SET_ARGS(next_iteration_setup);
-		KERNEL_SET_ARGS(sum_all_radiance);
-#undef KERNEL_SET_ARGS
-
-		/* Macro for Enqueuing split kernels. */
-#define GLUE(a, b) a ## b
-#define ENQUEUE_SPLIT_KERNEL(kernelName, globalSize, localSize) \
-		{ \
-			ciErr = clEnqueueNDRangeKernel(cqCommandQueue, \
-			                               GLUE(program_, \
-			                    

@@ Diff output truncated at 10240 characters. @@




More information about the Bf-blender-cvs mailing list