[Bf-blender-cvs] [c9608047472] master: Cycles Denoising: Pass tile buffers to every OpenCL kernel to conform to standard and get rid of set_tile_info

Lukas Stockner noreply at git.blender.org
Wed Jul 4 14:45:46 CEST 2018


Commit: c9608047472ae2b08d2d4b188fca5211a6c0b925
Author: Lukas Stockner
Date:   Wed Jul 4 14:02:38 2018 +0200
Branches: master
https://developer.blender.org/rBc9608047472ae2b08d2d4b188fca5211a6c0b925

Cycles Denoising: Pass tile buffers to every OpenCL kernel to conform to standard and get rid of set_tile_info

===================================================================

M	intern/cycles/device/device_cpu.cpp
M	intern/cycles/device/device_cuda.cpp
M	intern/cycles/device/device_denoising.cpp
M	intern/cycles/device/device_denoising.h
M	intern/cycles/device/opencl/opencl.h
M	intern/cycles/device/opencl/opencl_base.cpp
M	intern/cycles/kernel/filter/filter_defines.h
M	intern/cycles/kernel/filter/filter_prefilter.h
M	intern/cycles/kernel/kernels/opencl/filter.cl

===================================================================

diff --git a/intern/cycles/device/device_cpu.cpp b/intern/cycles/device/device_cpu.cpp
index c54ac1e5933..be0dcc20755 100644
--- a/intern/cycles/device/device_cpu.cpp
+++ b/intern/cycles/device/device_cpu.cpp
@@ -459,18 +459,6 @@ public:
 		}
 	};
 
-	bool denoising_set_tile_info(device_ptr *buffers, DenoisingTask *task)
-	{
-		TileInfo *tile_info = (TileInfo*) task->tile_info_mem.host_pointer;
-		for(int i = 0; i < 9; i++) {
-			tile_info->buffers[i] = buffers[i];
-		}
-
-		task->tile_info_mem.copy_to_device();
-
-		return true;
-	}
-
 	bool denoising_non_local_means(device_ptr image_ptr, device_ptr guide_ptr, device_ptr variance_ptr, device_ptr out_ptr,
 	                               DenoisingTask *task)
 	{
@@ -722,7 +710,6 @@ public:
 		denoising.functions.combine_halves = function_bind(&CPUDevice::denoising_combine_halves, this, _1, _2, _3, _4, _5, _6, &denoising);
 		denoising.functions.get_feature = function_bind(&CPUDevice::denoising_get_feature, this, _1, _2, _3, _4, &denoising);
 		denoising.functions.detect_outliers = function_bind(&CPUDevice::denoising_detect_outliers, this, _1, _2, _3, _4, &denoising);
-		denoising.functions.set_tile_info = function_bind(&CPUDevice::denoising_set_tile_info, this, _1, &denoising);
 
 		denoising.filter_area = make_int4(tile.x, tile.y, tile.w, tile.h);
 		denoising.render_buffer.samples = tile.sample;
diff --git a/intern/cycles/device/device_cuda.cpp b/intern/cycles/device/device_cuda.cpp
index c8e0ea4d896..8294af340e8 100644
--- a/intern/cycles/device/device_cuda.cpp
+++ b/intern/cycles/device/device_cuda.cpp
@@ -1251,18 +1251,6 @@ public:
 		}
 	}
 
-	bool denoising_set_tile_info(device_ptr *buffers, DenoisingTask *task)
-	{
-		TileInfo *tile_info = (TileInfo*) task->tile_info_mem.host_pointer;
-		for(int i = 0; i < 9; i++) {
-			tile_info->buffers[i] = buffers[i];
-		}
-
-		task->tile_info_mem.copy_to_device();
-
-		return !have_error();
-	}
-
 #define CUDA_GET_BLOCKSIZE(func, w, h)                                                                          \
 			int threads_per_block;                                                                              \
 			cuda_assert(cuFuncGetAttribute(&threads_per_block, CU_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK, func)); \
@@ -1622,7 +1610,6 @@ public:
 		denoising.functions.combine_halves = function_bind(&CUDADevice::denoising_combine_halves, this, _1, _2, _3, _4, _5, _6, &denoising);
 		denoising.functions.get_feature = function_bind(&CUDADevice::denoising_get_feature, this, _1, _2, _3, _4, &denoising);
 		denoising.functions.detect_outliers = function_bind(&CUDADevice::denoising_detect_outliers, this, _1, _2, _3, _4, &denoising);
-		denoising.functions.set_tile_info = function_bind(&CUDADevice::denoising_set_tile_info, this, _1, &denoising);
 
 		denoising.filter_area = make_int4(rtile.x, rtile.y, rtile.w, rtile.h);
 		denoising.render_buffer.samples = rtile.sample;
diff --git a/intern/cycles/device/device_denoising.cpp b/intern/cycles/device/device_denoising.cpp
index 318b14ab499..c0d4634262d 100644
--- a/intern/cycles/device/device_denoising.cpp
+++ b/intern/cycles/device/device_denoising.cpp
@@ -62,11 +62,10 @@ void DenoisingTask::set_render_buffer(RenderTile *rtiles)
 {
 	tile_info = (TileInfo*) tile_info_mem.alloc(sizeof(TileInfo)/sizeof(int));
 
-	device_ptr buffers[9];
 	for(int i = 0; i < 9; i++) {
-		buffers[i] = rtiles[i].buffer;
 		tile_info->offsets[i] = rtiles[i].offset;
 		tile_info->strides[i] = rtiles[i].stride;
+		tile_info->buffers[i] = rtiles[i].buffer;
 	}
 	tile_info->x[0] = rtiles[3].x;
 	tile_info->x[1] = rtiles[4].x;
@@ -81,7 +80,7 @@ void DenoisingTask::set_render_buffer(RenderTile *rtiles)
 	target_buffer.stride = rtiles[9].stride;
 	target_buffer.ptr    = rtiles[9].buffer;
 
-	functions.set_tile_info(buffers);
+	tile_info_mem.copy_to_device();
 }
 
 void DenoisingTask::setup_denoising_buffer()
diff --git a/intern/cycles/device/device_denoising.h b/intern/cycles/device/device_denoising.h
index 1063d43d039..e0da52867f1 100644
--- a/intern/cycles/device/device_denoising.h
+++ b/intern/cycles/device/device_denoising.h
@@ -89,7 +89,6 @@ public:
 		              device_ptr depth_ptr,
 		              device_ptr output_ptr
 		              )> detect_outliers;
-		function<bool(device_ptr*)> set_tile_info;
 		function<void(RenderTile *rtiles)> map_neighbor_tiles;
 		function<void(RenderTile *rtiles)> unmap_neighbor_tiles;
 	} functions;
diff --git a/intern/cycles/device/opencl/opencl.h b/intern/cycles/device/opencl/opencl.h
index c550d738bd5..22e0503365c 100644
--- a/intern/cycles/device/opencl/opencl.h
+++ b/intern/cycles/device/opencl/opencl.h
@@ -436,8 +436,6 @@ protected:
 	                               device_ptr depth_ptr,
 	                               device_ptr output_ptr,
 	                               DenoisingTask *task);
-	bool denoising_set_tile_info(device_ptr *buffers,
-	                             DenoisingTask *task);
 
 	device_ptr mem_alloc_sub_ptr(device_memory& mem, int offset, int size);
 	void mem_free_sub_ptr(device_ptr ptr);
diff --git a/intern/cycles/device/opencl/opencl_base.cpp b/intern/cycles/device/opencl/opencl_base.cpp
index 50d371e4c55..9a50d217321 100644
--- a/intern/cycles/device/opencl/opencl_base.cpp
+++ b/intern/cycles/device/opencl/opencl_base.cpp
@@ -246,7 +246,6 @@ bool OpenCLDeviceBase::load_kernels(const DeviceRequestedFeatures& requested_fea
 	denoising_program.add_kernel(ustring("filter_nlm_normalize"));
 	denoising_program.add_kernel(ustring("filter_nlm_construct_gramian"));
 	denoising_program.add_kernel(ustring("filter_finalize"));
-	denoising_program.add_kernel(ustring("filter_set_tile_info"));
 
 	vector<OpenCLProgram*> programs;
 	programs.push_back(&base_program);
@@ -981,9 +980,16 @@ bool OpenCLDeviceBase::denoising_divide_shadow(device_ptr a_ptr,
 
 	cl_kernel ckFilterDivideShadow = denoising_program(ustring("filter_divide_shadow"));
 
-	kernel_set_args(ckFilterDivideShadow, 0,
-	                task->render_buffer.samples,
-	                tile_info_mem,
+	int arg_ofs = kernel_set_args(ckFilterDivideShadow, 0,
+	                              task->render_buffer.samples,
+	                              tile_info_mem);
+	cl_mem buffers[9];
+	for(int i = 0; i < 9; i++) {
+		buffers[i] = CL_MEM_PTR(task->tile_info->buffers[i]);
+		arg_ofs += kernel_set_args(ckFilterDivideShadow, arg_ofs,
+		                           buffers[i]);
+	}
+	kernel_set_args(ckFilterDivideShadow, arg_ofs,
 	                a_mem,
 	                b_mem,
 	                sample_variance_mem,
@@ -1012,9 +1018,16 @@ bool OpenCLDeviceBase::denoising_get_feature(int mean_offset,
 
 	cl_kernel ckFilterGetFeature = denoising_program(ustring("filter_get_feature"));
 
-	kernel_set_args(ckFilterGetFeature, 0,
-	                task->render_buffer.samples,
-	                tile_info_mem,
+	int arg_ofs = kernel_set_args(ckFilterGetFeature, 0,
+	                              task->render_buffer.samples,
+	                              tile_info_mem);
+	cl_mem buffers[9];
+	for(int i = 0; i < 9; i++) {
+		buffers[i] = CL_MEM_PTR(task->tile_info->buffers[i]);
+		arg_ofs += kernel_set_args(ckFilterGetFeature, arg_ofs,
+		                           buffers[i]);
+	}
+	kernel_set_args(ckFilterGetFeature, arg_ofs,
 	                mean_offset,
 	                variance_offset,
 	                mean_mem,
@@ -1056,29 +1069,8 @@ bool OpenCLDeviceBase::denoising_detect_outliers(device_ptr image_ptr,
 	return true;
 }
 
-bool OpenCLDeviceBase::denoising_set_tile_info(device_ptr *buffers,
-                                               DenoisingTask *task)
-{
-	task->tile_info_mem.copy_to_device();
-
-	cl_mem tile_info_mem = CL_MEM_PTR(task->tile_info_mem.device_pointer);
-
-	cl_kernel ckFilterSetTileInfo = denoising_program(ustring("filter_set_tile_info"));
-
-	kernel_set_args(ckFilterSetTileInfo, 0, tile_info_mem);
-	for(int i = 0; i < 9; i++) {
-		cl_mem buffer_mem = CL_MEM_PTR(buffers[i]);
-		kernel_set_args(ckFilterSetTileInfo, i+1, buffer_mem);
-	}
-
-	enqueue_kernel(ckFilterSetTileInfo, 1, 1);
-
-	return true;
-}
-
 void OpenCLDeviceBase::denoise(RenderTile &rtile, DenoisingTask& denoising)
 {
-	denoising.functions.set_tile_info = function_bind(&OpenCLDeviceBase::denoising_set_tile_info, this, _1, &denoising);
 	denoising.functions.construct_transform = function_bind(&OpenCLDeviceBase::denoising_construct_transform, this, &denoising);
 	denoising.functions.reconstruct = function_bind(&OpenCLDeviceBase::denoising_reconstruct, this, _1, _2, _3, &denoising);
 	denoising.functions.divide_shadow = function_bind(&OpenCLDeviceBase::denoising_divide_shadow, this, _1, _2, _3, _4, _5, &denoising);
diff --git a/intern/cycles/kernel/filter/filter_defines.h b/intern/cycles/kernel/filter/filter_defines.h
index 57d3d90594f..d48ea3ac1d6 100644
--- a/intern/cycles/kernel/filter/filter_defines.h
+++ b/intern/cycles/kernel/filter/filter_defines.h
@@ -35,4 +35,29 @@ typedef struct TileInfo {
 #endif
 } TileInfo;
 
+#ifdef __KERNEL_OPENCL__
+#  define CCL_FILTER_TILE_INFO ccl_global TileInfo* tile_info,  \
+                               ccl_global float *tile_buffer_1, \
+                               ccl_global float *tile_buffer_2, \
+                               ccl_global float *tile_buffer_3, \
+                               ccl_global float *tile_buffer_4, \
+                               ccl_global float *tile_buffer_5, \
+                               ccl_global float *tile_buffer_6, \
+                               ccl_global float *tile_buffer_7, \
+                               ccl_global float *tile_buffer_8, \
+                               ccl_global float *tile_buffer_9
+#  define CCL_FILTER_TILE_INFO_ARG tile_info, \
+                                   tile_buffer_1, tile_buffer_2, tile_buffer_3, \
+                                   tile_buffer_4, tile_buffer_5, tile_buffer_6, \
+                                   tile_buffer_7, tile_buffer_8, tile_buffer_9
+#  define ccl_get_tile_buffer(id) (tile_buffer_ ## id)
+#else
+#  ifdef __KERNEL_CUDA__
+#    define CCL_FILTER_TILE_INFO ccl_global TileInfo* tile_info
+#  else
+#    define CCL_FILTER_TILE_INFO TileInfo* tile_info
+#  endif
+#  define ccl_get_tile_bu

@@ Diff output truncated at 10240 characters. @@



More information about the Bf-blender-cvs mailing list