[Bf-blender-cvs] [e3bb3ed] cycles_split_kernel: Cycles: Add function so each device can specify its ideal local work size

Mai Lavelle noreply at git.blender.org
Thu Oct 27 20:22:36 CEST 2016


Commit: e3bb3ed5ba3dc8911af52bad4f0355f0caff93e4
Author: Mai Lavelle
Date:   Thu Oct 27 18:52:04 2016 +0200
Branches: cycles_split_kernel
https://developer.blender.org/rBe3bb3ed5ba3dc8911af52bad4f0355f0caff93e4

Cycles: Add function so each device can specify its ideal local work size

===================================================================

M	intern/cycles/device/device.h
M	intern/cycles/device/device_cpu.cpp
M	intern/cycles/device/device_split_kernel.cpp
M	intern/cycles/device/device_split_kernel.h
M	intern/cycles/device/opencl/opencl_split.cpp

===================================================================

diff --git a/intern/cycles/device/device.h b/intern/cycles/device/device.h
index 71e9a02..c832e19 100644
--- a/intern/cycles/device/device.h
+++ b/intern/cycles/device/device.h
@@ -328,6 +328,12 @@ public:
 		assert(!"not implemented for this device");
 	}
 
+	virtual int2 split_kernel_local_size()
+	{
+		assert(!"not implemented for this device");
+		return make_int2(0, 0);
+	}
+
 	/* tasks */
 	virtual int get_split_task_count(DeviceTask& task) = 0;
 	virtual void task_add(DeviceTask& task) = 0;
diff --git a/intern/cycles/device/device_cpu.cpp b/intern/cycles/device/device_cpu.cpp
index 1955952..50b6ad1 100644
--- a/intern/cycles/device/device_cpu.cpp
+++ b/intern/cycles/device/device_cpu.cpp
@@ -713,6 +713,11 @@ protected:
 	{
 		thread_kernel_globals_free((KernelGlobals*)mem.device_pointer);
 	}
+
+	virtual int2 split_kernel_local_size()
+	{
+		return make_int2(1, 1);
+	}
 };
 
 unordered_map<string, void*> CPUDevice::kernel_functions;
diff --git a/intern/cycles/device/device_split_kernel.cpp b/intern/cycles/device/device_split_kernel.cpp
index b9cec54..4bb3c5d 100644
--- a/intern/cycles/device/device_split_kernel.cpp
+++ b/intern/cycles/device/device_split_kernel.cpp
@@ -84,13 +84,22 @@ bool DeviceSplitKernel::path_trace(DeviceTask *task,
 	tile.buffer_rng_state_stride = tile.stride;
 	tile.stride = tile.w;
 
+	size_t global_size[2];
+	size_t local_size[2];
+
+	{
+		int2 lsize = device->split_kernel_local_size();
+		local_size[0] = lsize[0];
+		local_size[1] = lsize[1];
+	}
+
 	/* Make sure that set render feasible tile size is a multiple of local
 	 * work size dimensions.
 	 */
 	int2 max_render_feasible_tile_size;
 	const int2 tile_size = task->requested_tile_size;
-	max_render_feasible_tile_size.x = ROUND_UP(tile_size.x, SPLIT_KERNEL_LOCAL_SIZE_X);
-	max_render_feasible_tile_size.y = ROUND_UP(tile_size.y, SPLIT_KERNEL_LOCAL_SIZE_Y);
+	max_render_feasible_tile_size.x = ROUND_UP(tile_size.x, local_size[0]);
+	max_render_feasible_tile_size.y = ROUND_UP(tile_size.y, local_size[1]);
 
 	/* Calculate per_thread_output_buffer_size. */
 	size_t per_thread_output_buffer_size;
@@ -116,10 +125,6 @@ bool DeviceSplitKernel::path_trace(DeviceTask *task,
 			                      tile.buffers->params.height);
 	}
 
-	size_t global_size[2];
-	size_t local_size[2] = {SPLIT_KERNEL_LOCAL_SIZE_X,
-	                        SPLIT_KERNEL_LOCAL_SIZE_Y};
-
 	int d_w = tile.w;
 	int d_h = tile.h;
 
diff --git a/intern/cycles/device/device_split_kernel.h b/intern/cycles/device/device_split_kernel.h
index 643a2fd..619702c 100644
--- a/intern/cycles/device/device_split_kernel.h
+++ b/intern/cycles/device/device_split_kernel.h
@@ -23,11 +23,6 @@
 
 CCL_NAMESPACE_BEGIN
 
-/* Macro declarations used with split kernel */
-
-#define SPLIT_KERNEL_LOCAL_SIZE_X 64
-#define SPLIT_KERNEL_LOCAL_SIZE_Y 1
-
 /* This value may be tuned according to the scene we are rendering.
  *
  * Modifying PATH_ITER_INC_FACTOR value proportional to number of expected
diff --git a/intern/cycles/device/opencl/opencl_split.cpp b/intern/cycles/device/opencl/opencl_split.cpp
index 7f50968..2ab78f8 100644
--- a/intern/cycles/device/opencl/opencl_split.cpp
+++ b/intern/cycles/device/opencl/opencl_split.cpp
@@ -257,6 +257,11 @@ public:
 		return true;
 	}
 
+	virtual int2 split_kernel_local_size()
+	{
+		return make_int2(64, 1);
+	}
+
 	void thread_run(DeviceTask *task)
 	{
 		if(task->type == DeviceTask::FILM_CONVERT) {




More information about the Bf-blender-cvs mailing list