[Bf-blender-cvs] [a644f2b46c] cycles_split_kernel: Cycles: Refactor device split kernel code

Mai Lavelle noreply at git.blender.org
Sat Feb 11 13:06:30 CET 2017


Commit: a644f2b46c54259809bf2c88ee26465aa33e7b37
Author: Mai Lavelle
Date:   Fri Feb 3 22:40:59 2017 -0500
Branches: cycles_split_kernel
https://developer.blender.org/rBa644f2b46c54259809bf2c88ee26465aa33e7b37

Cycles: Refactor device split kernel code

Moved all split kernel related stuff out of `Device` as it doesnt belong
there. Those functions are now apart of `DeviceSplitKernel` which now
must be implemented for each device type supporting the split kernel. No
functional changes.

===================================================================

M	intern/cycles/device/device.h
M	intern/cycles/device/device_cpu.cpp
M	intern/cycles/device/device_cuda.cpp
M	intern/cycles/device/device_split_kernel.cpp
M	intern/cycles/device/device_split_kernel.h
M	intern/cycles/device/opencl/opencl_split.cpp

===================================================================

diff --git a/intern/cycles/device/device.h b/intern/cycles/device/device.h
index 6da7767ae2..c740cada98 100644
--- a/intern/cycles/device/device.h
+++ b/intern/cycles/device/device.h
@@ -33,7 +33,6 @@ CCL_NAMESPACE_BEGIN
 
 class Progress;
 class RenderTile;
-class DeviceSplitKernel;
 
 /* Device Types */
 
@@ -205,28 +204,6 @@ public:
 std::ostream& operator <<(std::ostream &os,
                           const DeviceRequestedFeatures& requested_features);
 
-/* Types used for split kernel */
-
-class KernelDimensions {
-public:
-	size_t global_size[2];
-	size_t local_size[2];
-
-	KernelDimensions(size_t global_size_[2], size_t local_size_[2])
-	{
-		memcpy(global_size, global_size_, sizeof(global_size));
-		memcpy(local_size, local_size_, sizeof(local_size));
-	}
-};
-
-class SplitKernelFunction {
-public:
-	virtual ~SplitKernelFunction() {}
-
-	/* enqueue the kernel, returns false if there is an error */
-	virtual bool enqueue(const KernelDimensions& dim, device_memory& kg, device_memory& data) = 0;
-};
-
 /* Device */
 
 struct DeviceDrawParams {
@@ -292,44 +269,6 @@ public:
 	        const DeviceRequestedFeatures& /*requested_features*/)
 	{ return true; }
 
-private:
-	/* split kernel */
-	virtual bool enqueue_split_kernel_data_init(const KernelDimensions& /*dim*/,
-	                                            RenderTile& /*rtile*/,
-	                                            int /*num_global_elements*/,
-	                                            device_memory& /*kernel_globals*/,
-	                                            device_memory& /*kernel_data*/,
-	                                            device_memory& /*split_data*/,
-	                                            device_memory& /*ray_state*/,
-	                                            device_memory& /*queue_index*/,
-	                                            device_memory& /*use_queues_flag*/,
-	                                            device_memory& /*work_pool_wgs*/)
-	{
-		assert(!"not implemented for this device");
-		return false;
-	}
-
-	virtual SplitKernelFunction* get_split_kernel_function(string /*kernel_name*/, const DeviceRequestedFeatures&)
-	{
-		assert(!"not implemented for this device");
-		return NULL;
-	}
-
-	virtual int2 split_kernel_local_size()
-	{
-		assert(!"not implemented for this device");
-		return make_int2(0, 0);
-	}
-
-	virtual int2 split_kernel_global_size(DeviceTask */*task*/, DeviceSplitKernel& /*split_kernel*/)
-	{
-		assert(!"not implemented for this device");
-		return make_int2(64, 64);
-	}
-
-	friend class DeviceSplitKernel;
-
-public:
 	/* tasks */
 	virtual int get_split_task_count(DeviceTask& task) = 0;
 	virtual void task_add(DeviceTask& task) = 0;
diff --git a/intern/cycles/device/device_cpu.cpp b/intern/cycles/device/device_cpu.cpp
index f423fdf60b..722274d042 100644
--- a/intern/cycles/device/device_cpu.cpp
+++ b/intern/cycles/device/device_cpu.cpp
@@ -53,33 +53,25 @@ CCL_NAMESPACE_BEGIN
 
 class CPUDevice;
 
-class CPUSplitKernelFunction : public SplitKernelFunction {
+class CPUSplitKernel : public DeviceSplitKernel {
+	CPUDevice *device;
 public:
-	CPUDevice* device;
-	void (*func)(KernelGlobals *kg, KernelData *data);
-
-	CPUSplitKernelFunction(CPUDevice* device) : device(device), func(NULL) {}
-	~CPUSplitKernelFunction() {}
-
-	virtual bool enqueue(const KernelDimensions& dim, device_memory& kernel_globals, device_memory& data)
-	{
-		if(!func) {
-			return false;
-		}
-
-		KernelGlobals *kg = (KernelGlobals*)kernel_globals.device_pointer;
-		kg->global_size = make_int2(dim.global_size[0], dim.global_size[1]);
+	explicit CPUSplitKernel(CPUDevice *device);
 
-		for(int y = 0; y < dim.global_size[1]; y++) {
-			for(int x = 0; x < dim.global_size[0]; x++) {
-				kg->global_id = make_int2(x, y);
-
-				func(kg, (KernelData*)data.device_pointer);
-			}
-		}
+	virtual bool enqueue_split_kernel_data_init(const KernelDimensions& dim,
+	                                            RenderTile& rtile,
+	                                            int num_global_elements,
+	                                            device_memory& kernel_globals,
+	                                            device_memory& kernel_data_,
+	                                            device_memory& split_data,
+	                                            device_memory& ray_state,
+	                                            device_memory& queue_index,
+	                                            device_memory& use_queues_flag,
+	                                            device_memory& work_pool_wgs);
 
-		return true;
-	}
+	virtual SplitKernelFunction* get_split_kernel_function(string kernel_name, const DeviceRequestedFeatures&);
+	virtual int2 split_kernel_local_size();
+	virtual int2 split_kernel_global_size(DeviceTask *task);
 };
 
 class CPUDevice : public Device
@@ -143,6 +135,8 @@ class CPUDevice : public Device
 		return (F)it->second;
 	}
 
+	friend class CPUSplitKernel;
+
 public:
 	TaskPool task_pool;
 	KernelGlobals kernel_globals;
@@ -444,7 +438,7 @@ public:
 
 		RenderTile tile;
 
-		DeviceSplitKernel split_kernel(this);
+		CPUSplitKernel split_kernel(this);
 
 		/* allocate buffer for kernel globals */
 		device_memory kgbuffer;
@@ -709,75 +703,22 @@ protected:
 
 		return true;
 	}
+};
 
-	/* split kernel */
-	virtual bool enqueue_split_kernel_data_init(const KernelDimensions& dim,
-	                                            RenderTile& rtile,
-	                                            int num_global_elements,
-	                                            device_memory& kernel_globals,
-	                                            device_memory& data,
-	                                            device_memory& split_data,
-	                                            device_memory& ray_state,
-	                                            device_memory& queue_index,
-	                                            device_memory& use_queues_flags,
-	                                            device_memory& work_pool_wgs)
-	{
-		typedef void(*data_init_t)(KernelGlobals *kg,
-                                   ccl_constant KernelData *data,
-                                   ccl_global void *split_data_buffer,
-                                   int num_elements,
-                                   ccl_global char *ray_state,
-                                   ccl_global uint *rng_state,
-                                   int start_sample,
-                                   int end_sample,
-                                   int sx, int sy, int sw, int sh, int offset, int stride,
-                                   int rng_state_offset_x,
-                                   int rng_state_offset_y,
-                                   int rng_state_stride,
-                                   ccl_global int *Queue_index,
-                                   int queuesize,
-                                   ccl_global char *use_queues_flag,
-                                   ccl_global unsigned int *work_pool_wgs,
-                                   unsigned int num_samples,
-                                   int buffer_offset_x,
-                                   int buffer_offset_y,
-                                   int buffer_stride,
-                                   ccl_global float *buffer);
-
-		data_init_t data_init;
+/* split kernel */
 
-#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_AVX2
-		if(system_cpu_support_avx2()) {
-			data_init = kernel_cpu_avx2_data_init;
-		}
-		else
-#endif
-#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_AVX
-		if(system_cpu_support_avx()) {
-			data_init = kernel_cpu_avx_data_init;
-		}
-		else
-#endif
-#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE41
-		if(system_cpu_support_sse41()) {
-			data_init = kernel_cpu_sse41_data_init;
-		}
-		else
-#endif
-#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE3
-		if(system_cpu_support_sse3()) {
-			data_init = kernel_cpu_sse3_data_init;
-		}
-		else
-#endif
-#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE2
-		if(system_cpu_support_sse2()) {
-			data_init = kernel_cpu_sse2_data_init;
-		}
-		else
-#endif
-		{
-			data_init = kernel_cpu_data_init;
+class CPUSplitKernelFunction : public SplitKernelFunction {
+public:
+	CPUDevice* device;
+	void (*func)(KernelGlobals *kg, KernelData *data);
+
+	CPUSplitKernelFunction(CPUDevice* device) : device(device), func(NULL) {}
+	~CPUSplitKernelFunction() {}
+
+	virtual bool enqueue(const KernelDimensions& dim, device_memory& kernel_globals, device_memory& data)
+	{
+		if(!func) {
+			return false;
 		}
 
 		KernelGlobals *kg = (KernelGlobals*)kernel_globals.device_pointer;
@@ -787,62 +728,148 @@ protected:
 			for(int x = 0; x < dim.global_size[0]; x++) {
 				kg->global_id = make_int2(x, y);
 
-				data_init((KernelGlobals*)kernel_globals.device_pointer,
-						  (KernelData*)data.device_pointer,
-						  (void*)split_data.device_pointer,
-						  num_global_elements,
-						  (char*)ray_state.device_pointer,
-						  (uint*)rtile.rng_state,
-						  rtile.start_sample,
-						  rtile.start_sample + rtile.num_samples,
-						  rtile.x,
-						  rtile.y,
-						  rtile.w,
-						  rtile.h,
-						  rtile.offset,
-						  rtile.stride,
-						  rtile.rng_state_offset_x,
-						  rtile.rng_state_offset_y,
-						  rtile.buffer_rng_state_stride,
-						  (int*)queue_index.device_pointer,
-						  dim.global_size[0] * dim.global_size[1],
-						  (char*)use_queues_flags.device_pointer,
-						  (uint*)work_pool_wgs.device_pointer,
-						  rtile.num_samples,
-						  rtile.buffer_offset_x,
-						  rtile.buffer_offset_y,
-						  rtile.buffer_rng_state_stride,
-						  (float*)rtile.buffer);
+				func(kg, (KernelData*)data.device_pointer);
 			}
 		}
 
 		return true;
 	}
+};
 
-	virtual SplitKernelFunction* get_split_kernel_function(string kernel_name, const DeviceRequestedFeatures&)
-	{
-		CPUSplitKernelFunction *kernel = new CPUSplitKernelFunction(this);
+CPUSplitKernel::CPUSplitKernel(CPUDevice *device) : DeviceSplitKernel(device), device(device)
+{
+}
 
-		kernel->func = get_kernel_function<void(*)(KernelGlobals*, KernelData*)>(kernel_name);
-		if(!kernel->func) {
-			delete kernel;
-			return NULL;
-		}
+bool CPUSplitKernel::enqueue_split_kernel

@@ Diff output truncated at 10240 characters. @@




More information about the Bf-blender-cvs mailing list