[Bf-blender-cvs] [5911f960c3] cycles_split_kernel: Cycles: Add OpenCL kernel for zeroing memory buffers

Mai Lavelle noreply at git.blender.org
Thu Jan 26 09:01:06 CET 2017


Commit: 5911f960c3d71918ff1c630c17d858f1f227ae14
Author: Mai Lavelle
Date:   Thu Jan 26 01:41:48 2017 -0500
Branches: cycles_split_kernel
https://developer.blender.org/rB5911f960c3d71918ff1c630c17d858f1f227ae14

Cycles: Add OpenCL kernel for zeroing memory buffers

Transferring memory to the device was very slow and there's really no
need when only zeroing a buffer.

===================================================================

M	intern/cycles/device/opencl/opencl_base.cpp
M	intern/cycles/kernel/kernels/opencl/kernel.cl

===================================================================

diff --git a/intern/cycles/device/opencl/opencl_base.cpp b/intern/cycles/device/opencl/opencl_base.cpp
index 2d42fd05ba..924641c124 100644
--- a/intern/cycles/device/opencl/opencl_base.cpp
+++ b/intern/cycles/device/opencl/opencl_base.cpp
@@ -206,6 +206,7 @@ bool OpenCLDeviceBase::load_kernels(const DeviceRequestedFeatures& requested_fea
 	base_program.add_kernel(ustring("convert_to_half_float"));
 	base_program.add_kernel(ustring("shader"));
 	base_program.add_kernel(ustring("bake"));
+	base_program.add_kernel(ustring("zero_buffer"));
 
 	vector<OpenCLProgram*> programs;
 	programs.push_back(&base_program);
@@ -319,8 +320,38 @@ void OpenCLDeviceBase::mem_copy_from(device_memory& mem, int y, int w, int h, in
 void OpenCLDeviceBase::mem_zero(device_memory& mem)
 {
 	if(mem.device_pointer) {
-		memset((void*)mem.data_pointer, 0, mem.memory_size());
-		mem_copy_to(mem);
+		cl_kernel ckZeroBuffer = base_program(ustring("zero_buffer"));
+
+		size_t max_work_items[3];
+		clGetDeviceInfo(cdDevice, CL_DEVICE_MAX_WORK_ITEM_SIZES, sizeof(size_t)*3, max_work_items, NULL);
+		size_t global_size = max_work_items[0] * max_work_items[1] * max_work_items[2];
+
+		cl_mem d_buffer = CL_MEM_PTR(mem.device_pointer);
+		unsigned long long d_offset = 0;
+		unsigned long long d_size = 0;
+
+		while(d_offset + d_size < mem.memory_size()) {
+			d_size = std::min<unsigned long long>(global_size*sizeof(float4), mem.memory_size() - d_offset);
+
+			kernel_set_args(ckZeroBuffer, 0, d_buffer, d_size, d_offset);
+
+			ciErr = clEnqueueNDRangeKernel(cqCommandQueue,
+				                           ckZeroBuffer,
+				                           1,
+				                           NULL,
+				                           &global_size,
+				                           NULL,
+				                           0,
+				                           NULL,
+				                           NULL);
+			opencl_assert_err(ciErr, "clEnqueueNDRangeKernel");
+
+			d_offset += d_size;
+		}
+
+		if(mem.data_pointer) {
+			memset((void*)mem.data_pointer, 0, mem.memory_size());
+		}
 	}
 }
 
diff --git a/intern/cycles/kernel/kernels/opencl/kernel.cl b/intern/cycles/kernel/kernels/opencl/kernel.cl
index 03a27c875e..52406d2f54 100644
--- a/intern/cycles/kernel/kernels/opencl/kernel.cl
+++ b/intern/cycles/kernel/kernels/opencl/kernel.cl
@@ -193,4 +193,20 @@ __kernel void kernel_ocl_convert_to_half_float(
 		kernel_film_convert_to_half_float(kg, rgba, buffer, sample_scale, x, y, offset, stride);
 }
 
+__kernel void kernel_ocl_zero_buffer(ccl_global float4 *buffer, ulong size, ulong offset)
+{
+	size_t i = ccl_global_id(0) + ccl_global_id(1) * ccl_global_size(0);
+
+	if(i < size / sizeof(float4)) {
+		buffer[i+offset/sizeof(float4)] = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
+	}
+	else if(i == size / sizeof(float4)) {
+		ccl_global uchar *b = (ccl_global uchar*)&buffer[i+offset/sizeof(float4)];
+
+		for(i = 0; i < size % sizeof(float4); i++) {
+			*(b++) = 0;
+		}
+	}
+}
+
 #endif  /* __COMPILE_ONLY_MEGAKERNEL__ */




More information about the Bf-blender-cvs mailing list