[Bf-blender-cvs] [520b53364c] master: Cycles: Add OpenCL kernel for zeroing memory buffers

Mai Lavelle noreply at git.blender.org
Wed Mar 8 07:53:14 CET 2017


Commit: 520b53364c73c75c4ff400d639dad13630f0e6fc
Author: Mai Lavelle
Date:   Thu Jan 26 01:41:48 2017 -0500
Branches: master
https://developer.blender.org/rB520b53364c73c75c4ff400d639dad13630f0e6fc

Cycles: Add OpenCL kernel for zeroing memory buffers

Transferring memory to the device was very slow and there's really no
need when only zeroing a buffer.

===================================================================

M	intern/cycles/device/opencl/opencl_base.cpp
M	intern/cycles/kernel/kernels/opencl/kernel.cl

===================================================================

diff --git a/intern/cycles/device/opencl/opencl_base.cpp b/intern/cycles/device/opencl/opencl_base.cpp
index c3f099b795..7fa14eee70 100644
--- a/intern/cycles/device/opencl/opencl_base.cpp
+++ b/intern/cycles/device/opencl/opencl_base.cpp
@@ -206,6 +206,7 @@ bool OpenCLDeviceBase::load_kernels(const DeviceRequestedFeatures& requested_fea
 	base_program.add_kernel(ustring("convert_to_half_float"));
 	base_program.add_kernel(ustring("shader"));
 	base_program.add_kernel(ustring("bake"));
+	base_program.add_kernel(ustring("zero_buffer"));
 
 	vector<OpenCLProgram*> programs;
 	programs.push_back(&base_program);
@@ -311,10 +312,61 @@ void OpenCLDeviceBase::mem_copy_from(device_memory& mem, int y, int w, int h, in
 void OpenCLDeviceBase::mem_zero(device_memory& mem)
 {
 	if(mem.device_pointer) {
+		if(base_program.is_loaded()) {
+			cl_kernel ckZeroBuffer = base_program(ustring("zero_buffer"));
+
+			size_t global_size[] = {1024, 1024};
+			size_t num_threads = global_size[0] * global_size[1];
+
+			cl_mem d_buffer = CL_MEM_PTR(mem.device_pointer);
+			unsigned long long d_offset = 0;
+			unsigned long long d_size = 0;
+
+			while(d_offset < mem.memory_size()) {
+				d_size = std::min<unsigned long long>(num_threads*sizeof(float4), mem.memory_size() - d_offset);
+
+				kernel_set_args(ckZeroBuffer, 0, d_buffer, d_size, d_offset);
+
+				ciErr = clEnqueueNDRangeKernel(cqCommandQueue,
+						                       ckZeroBuffer,
+						                       2,
+						                       NULL,
+						                       global_size,
+						                       NULL,
+						                       0,
+						                       NULL,
+						                       NULL);
+				opencl_assert_err(ciErr, "clEnqueueNDRangeKernel");
+
+				d_offset += d_size;
+			}
+		}
+
 		if(mem.data_pointer) {
 			memset((void*)mem.data_pointer, 0, mem.memory_size());
 		}
-		mem_copy_to(mem);
+
+		if(!base_program.is_loaded()) {
+			void* zero = (void*)mem.data_pointer;
+
+			if(!mem.data_pointer) {
+				zero = util_aligned_malloc(mem.memory_size(), 16);
+				memset(zero, 0, mem.memory_size());
+			}
+
+			opencl_assert(clEnqueueWriteBuffer(cqCommandQueue,
+                                   CL_MEM_PTR(mem.device_pointer),
+                                   CL_TRUE,
+                                   0,
+                                   mem.memory_size(),
+                                   zero,
+                                   0,
+                                   NULL, NULL));
+
+			if(!mem.data_pointer) {
+				util_aligned_free(zero);
+			}
+		}
 	}
 }
 
diff --git a/intern/cycles/kernel/kernels/opencl/kernel.cl b/intern/cycles/kernel/kernels/opencl/kernel.cl
index a68f97857b..e501fd4f01 100644
--- a/intern/cycles/kernel/kernels/opencl/kernel.cl
+++ b/intern/cycles/kernel/kernels/opencl/kernel.cl
@@ -193,4 +193,20 @@ __kernel void kernel_ocl_convert_to_half_float(
 		kernel_film_convert_to_half_float(kg, rgba, buffer, sample_scale, x, y, offset, stride);
 }
 
+__kernel void kernel_ocl_zero_buffer(ccl_global float4 *buffer, ulong size, ulong offset)
+{
+	size_t i = get_global_id(0) + get_global_id(1) * get_global_size(0);
+
+	if(i < size / sizeof(float4)) {
+		buffer[i+offset/sizeof(float4)] = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
+	}
+	else if(i == size / sizeof(float4)) {
+		ccl_global uchar *b = (ccl_global uchar*)&buffer[i+offset/sizeof(float4)];
+
+		for(i = 0; i < size % sizeof(float4); i++) {
+			*(b++) = 0;
+		}
+	}
+}
+
 #endif  /* __COMPILE_ONLY_MEGAKERNEL__ */




More information about the Bf-blender-cvs mailing list