[Bf-blender-cvs] [83ef38068b] cycles_split_kernel: Cycles: Move kgbuffer allocation out of split kernel code

Mai Lavelle noreply at git.blender.org
Sat Feb 11 12:33:59 CET 2017


Commit: 83ef38068b71ae4453f94d566fd0da02e5912ad1
Author: Mai Lavelle
Date:   Sat Feb 11 06:25:01 2017 -0500
Branches: cycles_split_kernel
https://developer.blender.org/rB83ef38068b71ae4453f94d566fd0da02e5912ad1

Cycles: Move kgbuffer allocation out of split kernel code

Allocating the buffer is the job of the device implementation, not the split
kernel, so makes more sense to separate that code.

===================================================================

M	intern/cycles/device/device.h
M	intern/cycles/device/device_cpu.cpp
M	intern/cycles/device/device_cuda.cpp
M	intern/cycles/device/device_split_kernel.cpp
M	intern/cycles/device/device_split_kernel.h
M	intern/cycles/device/opencl/opencl_split.cpp

===================================================================

diff --git a/intern/cycles/device/device.h b/intern/cycles/device/device.h
index dd176c7577..6da7767ae2 100644
--- a/intern/cycles/device/device.h
+++ b/intern/cycles/device/device.h
@@ -315,16 +315,6 @@ private:
 		return NULL;
 	}
 
-	virtual void alloc_kernel_globals(device_memory& /*mem*/)
-	{
-		assert(!"not implemented for this device");
-	}
-
-	virtual void free_kernel_globals(device_memory& /*mem*/)
-	{
-		assert(!"not implemented for this device");
-	}
-
 	virtual int2 split_kernel_local_size()
 	{
 		assert(!"not implemented for this device");
diff --git a/intern/cycles/device/device_cpu.cpp b/intern/cycles/device/device_cpu.cpp
index 69a5169613..f423fdf60b 100644
--- a/intern/cycles/device/device_cpu.cpp
+++ b/intern/cycles/device/device_cpu.cpp
@@ -446,14 +446,25 @@ public:
 
 		DeviceSplitKernel split_kernel(this);
 
+		/* allocate buffer for kernel globals */
+		device_memory kgbuffer;
+		kgbuffer.resize(sizeof(KernelGlobals));
+		mem_alloc("kernel_globals", kgbuffer, MEM_READ_WRITE);
+
+		KernelGlobals *kg = (KernelGlobals*)kgbuffer.device_pointer;
+		*kg = thread_kernel_globals_init();
+
 		requested_features.max_closure = MAX_CLOSURE;
 		if(!split_kernel.load_kernels(requested_features)) {
+			thread_kernel_globals_free((KernelGlobals*)kgbuffer.device_pointer);
+			mem_free(kgbuffer);
+
 			return;
 		}
 
 		while(task.acquire_tile(this, tile)) {
 			device_memory data;
-			split_kernel.path_trace(&task, tile, data);
+			split_kernel.path_trace(&task, tile, kgbuffer, data);
 
 			task.release_tile(tile);
 
@@ -462,6 +473,9 @@ public:
 					break;
 			}
 		}
+
+		thread_kernel_globals_free((KernelGlobals*)kgbuffer.device_pointer);
+		mem_free(kgbuffer);
 	}
 
 	void thread_film_convert(DeviceTask& task)
@@ -818,21 +832,6 @@ protected:
 		return kernel;
 	}
 
-	virtual void alloc_kernel_globals(device_memory& mem)
-	{
-		mem.resize(sizeof(KernelGlobals));
-		mem_alloc("kernel_globals", mem, MEM_READ_WRITE);
-
-		KernelGlobals *kg = (KernelGlobals*)mem.device_pointer;
-		*kg = thread_kernel_globals_init();
-	}
-
-	virtual void free_kernel_globals(device_memory& mem)
-	{
-		thread_kernel_globals_free((KernelGlobals*)mem.device_pointer);
-		mem_free(mem);
-	}
-
 	virtual int2 split_kernel_global_size(DeviceTask *task, DeviceSplitKernel& /*split_kernel*/)
 	{
 		/* TODO(mai): this needs investigation but cpu gives incorrect render if global size doesnt match tile size */
diff --git a/intern/cycles/device/device_cuda.cpp b/intern/cycles/device/device_cuda.cpp
index 39bf2de780..8985c27c7a 100644
--- a/intern/cycles/device/device_cuda.cpp
+++ b/intern/cycles/device/device_cuda.cpp
@@ -1313,8 +1313,8 @@ public:
 				split_kernel.load_kernels(requested_features);
 
 				while(task->acquire_tile(this, tile)) {
-					device_memory data;
-					split_kernel.path_trace(task, tile, data);
+					device_memory void_buffer;
+					split_kernel.path_trace(task, tile, void_buffer, void_buffer);
 
 					task->release_tile(tile);
 
@@ -1527,14 +1527,6 @@ public:
 		return new CUDASplitKernelFunction(this, func);
 	}
 
-	void alloc_kernel_globals(device_memory& /*mem*/)
-	{
-	}
-
-	void free_kernel_globals(device_memory& /*mem*/)
-	{
-	}
-
 	int2 split_kernel_local_size()
 	{
 		return make_int2(32, 1);
diff --git a/intern/cycles/device/device_split_kernel.cpp b/intern/cycles/device/device_split_kernel.cpp
index 57f2809f08..7b897593f7 100644
--- a/intern/cycles/device/device_split_kernel.cpp
+++ b/intern/cycles/device/device_split_kernel.cpp
@@ -35,7 +35,6 @@ DeviceSplitKernel::DeviceSplitKernel(Device *device) : device(device)
 
 DeviceSplitKernel::~DeviceSplitKernel()
 {
-	device->free_kernel_globals(kgbuffer);
 	device->mem_free(split_data);
 	device->mem_free(ray_state);
 	device->mem_free(use_queues_flag);
@@ -88,6 +87,7 @@ size_t DeviceSplitKernel::max_elements_for_max_buffer_size(size_t max_buffer_siz
 
 bool DeviceSplitKernel::path_trace(DeviceTask *task,
                                    RenderTile& tile,
+                                   device_memory& kgbuffer,
                                    device_memory& kernel_data)
 {
 	if(device->have_error()) {
@@ -155,8 +155,6 @@ bool DeviceSplitKernel::path_trace(DeviceTask *task,
 		use_queues_flag.resize(sizeof(char));
 		device->mem_alloc("use_queues_flag", use_queues_flag, MEM_READ_WRITE);
 
-		device->alloc_kernel_globals(kgbuffer);
-
 		ray_state.resize(num_global_elements);
 		device->mem_alloc("ray_state", ray_state, MEM_READ_WRITE);
 
diff --git a/intern/cycles/device/device_split_kernel.h b/intern/cycles/device/device_split_kernel.h
index 92dfb7f099..0c9c5984b4 100644
--- a/intern/cycles/device/device_split_kernel.h
+++ b/intern/cycles/device/device_split_kernel.h
@@ -49,7 +49,6 @@ private:
 	 * kernel will be available to another kernel via this global
 	 * memory.
 	 */
-	device_memory kgbuffer;
 	device_memory split_data;
 	device_vector<uchar> ray_state;
 	device_memory queue_index; /* Array of size num_queues * sizeof(int) that tracks the size of each queue. */
@@ -76,6 +75,7 @@ public:
 	bool load_kernels(const DeviceRequestedFeatures& requested_features);
 	bool path_trace(DeviceTask *task,
 	                RenderTile& rtile,
+	                device_memory& kgbuffer,
 	                device_memory& kernel_data);
 
 	size_t max_elements_for_max_buffer_size(size_t max_buffer_size, size_t passes_size);
diff --git a/intern/cycles/device/opencl/opencl_split.cpp b/intern/cycles/device/opencl/opencl_split.cpp
index cd2c3f25b5..6b404e342b 100644
--- a/intern/cycles/device/opencl/opencl_split.cpp
+++ b/intern/cycles/device/opencl/opencl_split.cpp
@@ -82,32 +82,6 @@ public:
 		background = background_;
 	}
 
-	virtual void alloc_kernel_globals(device_memory& mem)
-	{
-		/* Copy dummy KernelGlobals related to OpenCL from kernel_globals.h to
-		 * fetch its size.
-		 */
-		typedef struct KernelGlobals {
-			ccl_constant KernelData *data;
-#define KERNEL_TEX(type, ttype, name) \
-	ccl_global type *name;
-#include "kernel_textures.h"
-#undef KERNEL_TEX
-			void *sd_input;
-			void *isect_shadow;
-			SplitData split_data;
-			SplitParams split_param_data;
-		} KernelGlobals;
-
-		mem.resize(sizeof(KernelGlobals));
-		mem_alloc("kernel_globals", mem, MEM_READ_WRITE);
-	}
-
-	virtual void free_kernel_globals(device_memory& mem)
-	{
-		mem_free(mem);
-	}
-
 	string get_build_options(const DeviceRequestedFeatures& requested_features)
 	{
 		string build_options = "-D__SPLIT_KERNEL__ ";
@@ -280,10 +254,31 @@ public:
 		else if(task->type == DeviceTask::PATH_TRACE) {
 			RenderTile tile;
 
+			/* Copy dummy KernelGlobals related to OpenCL from kernel_globals.h to
+			 * fetch its size.
+			 */
+			typedef struct KernelGlobals {
+				ccl_constant KernelData *data;
+#define KERNEL_TEX(type, ttype, name) \
+				ccl_global type *name;
+#include "kernel_textures.h"
+#undef KERNEL_TEX
+				void *sd_input;
+				void *isect_shadow;
+				SplitData split_data;
+				SplitParams split_param_data;
+			} KernelGlobals;
+
+			/* Allocate buffer for kernel globals */
+			device_memory kgbuffer;
+			kgbuffer.resize(sizeof(KernelGlobals));
+			mem_alloc("kernel_globals", kgbuffer, MEM_READ_WRITE);
+
 			/* Keep rendering tiles until done. */
 			while(task->acquire_tile(this, tile)) {
 				split_kernel->path_trace(task,
 		                                 tile,
+		                                 kgbuffer,
 		                                 *const_mem_map["__data"]);
 
 				/* Complete kernel execution before release tile. */
@@ -298,7 +293,10 @@ public:
 				clFinish(cqCommandQueue);
 
 				task->release_tile(tile);
+
 			}
+
+			mem_free(kgbuffer);
 		}
 	}




More information about the Bf-blender-cvs mailing list