[Bf-blender-cvs] [83ef38068b] cycles_split_kernel: Cycles: Move kgbuffer allocation out of split kernel code
Mai Lavelle
noreply at git.blender.org
Sat Feb 11 12:33:59 CET 2017
Commit: 83ef38068b71ae4453f94d566fd0da02e5912ad1
Author: Mai Lavelle
Date: Sat Feb 11 06:25:01 2017 -0500
Branches: cycles_split_kernel
https://developer.blender.org/rB83ef38068b71ae4453f94d566fd0da02e5912ad1
Cycles: Move kgbuffer allocation out of split kernel code
Allocating the buffer is the job of the device implementation, not the split
kernel, so makes more sense to separate that code.
===================================================================
M intern/cycles/device/device.h
M intern/cycles/device/device_cpu.cpp
M intern/cycles/device/device_cuda.cpp
M intern/cycles/device/device_split_kernel.cpp
M intern/cycles/device/device_split_kernel.h
M intern/cycles/device/opencl/opencl_split.cpp
===================================================================
diff --git a/intern/cycles/device/device.h b/intern/cycles/device/device.h
index dd176c7577..6da7767ae2 100644
--- a/intern/cycles/device/device.h
+++ b/intern/cycles/device/device.h
@@ -315,16 +315,6 @@ private:
return NULL;
}
- virtual void alloc_kernel_globals(device_memory& /*mem*/)
- {
- assert(!"not implemented for this device");
- }
-
- virtual void free_kernel_globals(device_memory& /*mem*/)
- {
- assert(!"not implemented for this device");
- }
-
virtual int2 split_kernel_local_size()
{
assert(!"not implemented for this device");
diff --git a/intern/cycles/device/device_cpu.cpp b/intern/cycles/device/device_cpu.cpp
index 69a5169613..f423fdf60b 100644
--- a/intern/cycles/device/device_cpu.cpp
+++ b/intern/cycles/device/device_cpu.cpp
@@ -446,14 +446,25 @@ public:
DeviceSplitKernel split_kernel(this);
+ /* allocate buffer for kernel globals */
+ device_memory kgbuffer;
+ kgbuffer.resize(sizeof(KernelGlobals));
+ mem_alloc("kernel_globals", kgbuffer, MEM_READ_WRITE);
+
+ KernelGlobals *kg = (KernelGlobals*)kgbuffer.device_pointer;
+ *kg = thread_kernel_globals_init();
+
requested_features.max_closure = MAX_CLOSURE;
if(!split_kernel.load_kernels(requested_features)) {
+ thread_kernel_globals_free((KernelGlobals*)kgbuffer.device_pointer);
+ mem_free(kgbuffer);
+
return;
}
while(task.acquire_tile(this, tile)) {
device_memory data;
- split_kernel.path_trace(&task, tile, data);
+ split_kernel.path_trace(&task, tile, kgbuffer, data);
task.release_tile(tile);
@@ -462,6 +473,9 @@ public:
break;
}
}
+
+ thread_kernel_globals_free((KernelGlobals*)kgbuffer.device_pointer);
+ mem_free(kgbuffer);
}
void thread_film_convert(DeviceTask& task)
@@ -818,21 +832,6 @@ protected:
return kernel;
}
- virtual void alloc_kernel_globals(device_memory& mem)
- {
- mem.resize(sizeof(KernelGlobals));
- mem_alloc("kernel_globals", mem, MEM_READ_WRITE);
-
- KernelGlobals *kg = (KernelGlobals*)mem.device_pointer;
- *kg = thread_kernel_globals_init();
- }
-
- virtual void free_kernel_globals(device_memory& mem)
- {
- thread_kernel_globals_free((KernelGlobals*)mem.device_pointer);
- mem_free(mem);
- }
-
virtual int2 split_kernel_global_size(DeviceTask *task, DeviceSplitKernel& /*split_kernel*/)
{
/* TODO(mai): this needs investigation but cpu gives incorrect render if global size doesnt match tile size */
diff --git a/intern/cycles/device/device_cuda.cpp b/intern/cycles/device/device_cuda.cpp
index 39bf2de780..8985c27c7a 100644
--- a/intern/cycles/device/device_cuda.cpp
+++ b/intern/cycles/device/device_cuda.cpp
@@ -1313,8 +1313,8 @@ public:
split_kernel.load_kernels(requested_features);
while(task->acquire_tile(this, tile)) {
- device_memory data;
- split_kernel.path_trace(task, tile, data);
+ device_memory void_buffer;
+ split_kernel.path_trace(task, tile, void_buffer, void_buffer);
task->release_tile(tile);
@@ -1527,14 +1527,6 @@ public:
return new CUDASplitKernelFunction(this, func);
}
- void alloc_kernel_globals(device_memory& /*mem*/)
- {
- }
-
- void free_kernel_globals(device_memory& /*mem*/)
- {
- }
-
int2 split_kernel_local_size()
{
return make_int2(32, 1);
diff --git a/intern/cycles/device/device_split_kernel.cpp b/intern/cycles/device/device_split_kernel.cpp
index 57f2809f08..7b897593f7 100644
--- a/intern/cycles/device/device_split_kernel.cpp
+++ b/intern/cycles/device/device_split_kernel.cpp
@@ -35,7 +35,6 @@ DeviceSplitKernel::DeviceSplitKernel(Device *device) : device(device)
DeviceSplitKernel::~DeviceSplitKernel()
{
- device->free_kernel_globals(kgbuffer);
device->mem_free(split_data);
device->mem_free(ray_state);
device->mem_free(use_queues_flag);
@@ -88,6 +87,7 @@ size_t DeviceSplitKernel::max_elements_for_max_buffer_size(size_t max_buffer_siz
bool DeviceSplitKernel::path_trace(DeviceTask *task,
RenderTile& tile,
+ device_memory& kgbuffer,
device_memory& kernel_data)
{
if(device->have_error()) {
@@ -155,8 +155,6 @@ bool DeviceSplitKernel::path_trace(DeviceTask *task,
use_queues_flag.resize(sizeof(char));
device->mem_alloc("use_queues_flag", use_queues_flag, MEM_READ_WRITE);
- device->alloc_kernel_globals(kgbuffer);
-
ray_state.resize(num_global_elements);
device->mem_alloc("ray_state", ray_state, MEM_READ_WRITE);
diff --git a/intern/cycles/device/device_split_kernel.h b/intern/cycles/device/device_split_kernel.h
index 92dfb7f099..0c9c5984b4 100644
--- a/intern/cycles/device/device_split_kernel.h
+++ b/intern/cycles/device/device_split_kernel.h
@@ -49,7 +49,6 @@ private:
* kernel will be available to another kernel via this global
* memory.
*/
- device_memory kgbuffer;
device_memory split_data;
device_vector<uchar> ray_state;
device_memory queue_index; /* Array of size num_queues * sizeof(int) that tracks the size of each queue. */
@@ -76,6 +75,7 @@ public:
bool load_kernels(const DeviceRequestedFeatures& requested_features);
bool path_trace(DeviceTask *task,
RenderTile& rtile,
+ device_memory& kgbuffer,
device_memory& kernel_data);
size_t max_elements_for_max_buffer_size(size_t max_buffer_size, size_t passes_size);
diff --git a/intern/cycles/device/opencl/opencl_split.cpp b/intern/cycles/device/opencl/opencl_split.cpp
index cd2c3f25b5..6b404e342b 100644
--- a/intern/cycles/device/opencl/opencl_split.cpp
+++ b/intern/cycles/device/opencl/opencl_split.cpp
@@ -82,32 +82,6 @@ public:
background = background_;
}
- virtual void alloc_kernel_globals(device_memory& mem)
- {
- /* Copy dummy KernelGlobals related to OpenCL from kernel_globals.h to
- * fetch its size.
- */
- typedef struct KernelGlobals {
- ccl_constant KernelData *data;
-#define KERNEL_TEX(type, ttype, name) \
- ccl_global type *name;
-#include "kernel_textures.h"
-#undef KERNEL_TEX
- void *sd_input;
- void *isect_shadow;
- SplitData split_data;
- SplitParams split_param_data;
- } KernelGlobals;
-
- mem.resize(sizeof(KernelGlobals));
- mem_alloc("kernel_globals", mem, MEM_READ_WRITE);
- }
-
- virtual void free_kernel_globals(device_memory& mem)
- {
- mem_free(mem);
- }
-
string get_build_options(const DeviceRequestedFeatures& requested_features)
{
string build_options = "-D__SPLIT_KERNEL__ ";
@@ -280,10 +254,31 @@ public:
else if(task->type == DeviceTask::PATH_TRACE) {
RenderTile tile;
+ /* Copy dummy KernelGlobals related to OpenCL from kernel_globals.h to
+ * fetch its size.
+ */
+ typedef struct KernelGlobals {
+ ccl_constant KernelData *data;
+#define KERNEL_TEX(type, ttype, name) \
+ ccl_global type *name;
+#include "kernel_textures.h"
+#undef KERNEL_TEX
+ void *sd_input;
+ void *isect_shadow;
+ SplitData split_data;
+ SplitParams split_param_data;
+ } KernelGlobals;
+
+ /* Allocate buffer for kernel globals */
+ device_memory kgbuffer;
+ kgbuffer.resize(sizeof(KernelGlobals));
+ mem_alloc("kernel_globals", kgbuffer, MEM_READ_WRITE);
+
/* Keep rendering tiles until done. */
while(task->acquire_tile(this, tile)) {
split_kernel->path_trace(task,
tile,
+ kgbuffer,
*const_mem_map["__data"]);
/* Complete kernel execution before release tile. */
@@ -298,7 +293,10 @@ public:
clFinish(cqCommandQueue);
task->release_tile(tile);
+
}
+
+ mem_free(kgbuffer);
}
}
More information about the Bf-blender-cvs
mailing list