[Bf-blender-cvs] [d4bc2bf8b9] temp-cycles-denoising: Cycles Denoising: Add separate set_tiles device callback

Lukas Stockner noreply at git.blender.org
Fri Mar 24 20:18:40 CET 2017


Commit: d4bc2bf8b9876c55c75405ef062c789c37a2f74b
Author: Lukas Stockner
Date:   Thu Mar 23 23:56:20 2017 +0100
Branches: temp-cycles-denoising
https://developer.blender.org/rBd4bc2bf8b9876c55c75405ef062c789c37a2f74b

Cycles Denoising: Add separate set_tiles device callback

For CPU and CUDA, it was possible to determine the pointers to the tile buffers on the host and just fill the TilesInfo there.
However, for OpenCL the actual pointer is only known inside the kernel, so a separate kernel for setting them is needed.

===================================================================

M	intern/cycles/device/device_cpu.cpp
M	intern/cycles/device/device_cuda.cpp
M	intern/cycles/device/device_denoising.cpp
M	intern/cycles/device/device_denoising.h
M	intern/cycles/filter/filter_defines.h

===================================================================

diff --git a/intern/cycles/device/device_cpu.cpp b/intern/cycles/device/device_cpu.cpp
index d71ff23093..426dda8742 100644
--- a/intern/cycles/device/device_cpu.cpp
+++ b/intern/cycles/device/device_cpu.cpp
@@ -381,6 +381,18 @@ public:
 		}
 	};
 
+	bool denoising_set_tiles(device_ptr *buffers, DenoisingTask *task)
+	{
+		mem_alloc("Denoising Tile Info", task->tiles_mem, MEM_READ_ONLY);
+
+		TilesInfo *tiles = (TilesInfo*) task->tiles_mem.data_pointer;
+		for(int i = 0; i < 9; i++) {
+			tiles->buffers[i] = buffers[i];
+		}
+
+		return true;
+	}
+
 	bool denoising_non_local_means(device_ptr image_ptr, device_ptr guide_ptr, device_ptr variance_ptr, device_ptr out_ptr,
 	                               DenoisingTask *task)
 	{
@@ -622,6 +634,15 @@ public:
 		tile.sample = tile.start_sample + tile.num_samples;
 
 		DenoisingTask denoising(this);
+
+		denoising.functions.construct_transform = function_bind(&CPUDevice::denoising_construct_transform, this, &denoising);
+		denoising.functions.reconstruct = function_bind(&CPUDevice::denoising_reconstruct, this, _1, _2, _3, _4, _5, &denoising);
+		denoising.functions.divide_shadow = function_bind(&CPUDevice::denoising_divide_shadow, this, _1, _2, _3, _4, _5, &denoising);
+		denoising.functions.non_local_means = function_bind(&CPUDevice::denoising_non_local_means, this, _1, _2, _3, _4, &denoising);
+		denoising.functions.combine_halves = function_bind(&CPUDevice::denoising_combine_halves, this, _1, _2, _3, _4, _5, _6, &denoising);
+		denoising.functions.get_feature = function_bind(&CPUDevice::denoising_get_feature, this, _1, _2, _3, _4, &denoising);
+		denoising.functions.set_tiles = function_bind(&CPUDevice::denoising_set_tiles, this, _1, &denoising);
+
 		denoising.filter_area = make_int4(tile.x, tile.y, tile.w, tile.h);
 		denoising.render_buffer.samples = tile.sample;
 		denoising.use_split_variance = use_split_kernel;
@@ -632,12 +653,6 @@ public:
 		denoising.tiles_from_rendertiles(rtiles);
 
 		denoising.init_from_devicetask(task);
-		denoising.functions.construct_transform = function_bind(&CPUDevice::denoising_construct_transform, this, &denoising);
-		denoising.functions.reconstruct = function_bind(&CPUDevice::denoising_reconstruct, this, _1, _2, _3, _4, _5, &denoising);
-		denoising.functions.divide_shadow = function_bind(&CPUDevice::denoising_divide_shadow, this, _1, _2, _3, _4, _5, &denoising);
-		denoising.functions.non_local_means = function_bind(&CPUDevice::denoising_non_local_means, this, _1, _2, _3, _4, &denoising);
-		denoising.functions.combine_halves = function_bind(&CPUDevice::denoising_combine_halves, this, _1, _2, _3, _4, _5, _6, &denoising);
-		denoising.functions.get_feature = function_bind(&CPUDevice::denoising_get_feature, this, _1, _2, _3, _4, &denoising);
 
 		denoising.run_denoising();
 
diff --git a/intern/cycles/device/device_cuda.cpp b/intern/cycles/device/device_cuda.cpp
index 5445e5e7b2..f7cd2db50f 100644
--- a/intern/cycles/device/device_cuda.cpp
+++ b/intern/cycles/device/device_cuda.cpp
@@ -907,6 +907,20 @@ public:
 		}
 	}
 
+	bool denoising_set_tiles(device_ptr *buffers, DenoisingTask *task)
+	{
+		mem_alloc("Denoising Tile Info", task->tiles_mem, MEM_READ_ONLY);
+
+		TilesInfo *tiles = (TilesInfo*) task->tiles_mem.data_pointer;
+		for(int i = 0; i < 9; i++) {
+			tiles->buffers[i] = buffers[i];
+		}
+
+		mem_copy_to(task->tiles_mem);
+
+		return !have_error();
+	}
+
 #define CUDA_GET_BLOCKSIZE(func, w, h)                                                                          \
 			int threads_per_block;                                                                              \
 			cuda_assert(cuFuncGetAttribute(&threads_per_block, CU_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK, func)); \
@@ -1231,6 +1245,14 @@ public:
 	{
 		DenoisingTask denoising(this);
 
+		denoising.functions.construct_transform = function_bind(&CUDADevice::denoising_construct_transform, this, &denoising);
+		denoising.functions.reconstruct = function_bind(&CUDADevice::denoising_reconstruct, this, _1, _2, _3, _4, _5, &denoising);
+		denoising.functions.divide_shadow = function_bind(&CUDADevice::denoising_divide_shadow, this, _1, _2, _3, _4, _5, &denoising);
+		denoising.functions.non_local_means = function_bind(&CUDADevice::denoising_non_local_means, this, _1, _2, _3, _4, &denoising);
+		denoising.functions.combine_halves = function_bind(&CUDADevice::denoising_combine_halves, this, _1, _2, _3, _4, _5, _6, &denoising);
+		denoising.functions.get_feature = function_bind(&CUDADevice::denoising_get_feature, this, _1, _2, _3, _4, &denoising);
+		denoising.functions.set_tiles = function_bind(&CUDADevice::denoising_set_tiles, this, _1, &denoising);
+
 		denoising.filter_area = make_int4(rtile.x, rtile.y, rtile.w, rtile.h);
 		denoising.render_buffer.samples = rtile.sample;
 
@@ -1241,12 +1263,6 @@ public:
 
 		denoising.init_from_devicetask(task);
 
-		denoising.functions.construct_transform = function_bind(&CUDADevice::denoising_construct_transform, this, &denoising);
-		denoising.functions.reconstruct = function_bind(&CUDADevice::denoising_reconstruct, this, _1, _2, _3, _4, _5, &denoising);
-		denoising.functions.divide_shadow = function_bind(&CUDADevice::denoising_divide_shadow, this, _1, _2, _3, _4, _5, &denoising);
-		denoising.functions.non_local_means = function_bind(&CUDADevice::denoising_non_local_means, this, _1, _2, _3, _4, &denoising);
-		denoising.functions.combine_halves = function_bind(&CUDADevice::denoising_combine_halves, this, _1, _2, _3, _4, _5, _6, &denoising);
-		denoising.functions.get_feature = function_bind(&CUDADevice::denoising_get_feature, this, _1, _2, _3, _4, &denoising);
 
 		denoising.run_denoising();
 	}
diff --git a/intern/cycles/device/device_denoising.cpp b/intern/cycles/device/device_denoising.cpp
index 149154c6ed..008808ddd1 100644
--- a/intern/cycles/device/device_denoising.cpp
+++ b/intern/cycles/device/device_denoising.cpp
@@ -31,9 +31,6 @@ void DenoisingTask::init_from_devicetask(const DeviceTask &task)
 	render_buffer.pass_stride = task.pass_stride;
 	render_buffer.denoising_data_offset  = task.pass_denoising_data;
 	render_buffer.denoising_clean_offset = task.pass_denoising_clean;
-	render_buffer.offset = tiles->offsets[4];
-	render_buffer.stride = tiles->strides[4];
-	render_buffer.ptr    = tiles->buffers[4];
 
 	/* Expand filter_area by half_window pixels and clamp the result to the extent of the neighboring tiles */
 	rect = make_int4(max(tiles->x[0], filter_area.x - half_window),
@@ -46,8 +43,9 @@ void DenoisingTask::tiles_from_rendertiles(RenderTile *rtiles)
 {
 	tiles = (TilesInfo*) tiles_mem.resize(sizeof(TilesInfo)/sizeof(int));
 
+	device_ptr buffers[9];
 	for(int i = 0; i < 9; i++) {
-		tiles->buffers[i] = rtiles[i].buffer;
+		buffers[i] = rtiles[i].buffer;
 		tiles->offsets[i] = rtiles[i].offset;
 		tiles->strides[i] = rtiles[i].stride;
 	}
@@ -59,6 +57,12 @@ void DenoisingTask::tiles_from_rendertiles(RenderTile *rtiles)
 	tiles->y[1] = rtiles[4].y;
 	tiles->y[2] = rtiles[7].y;
 	tiles->y[3] = rtiles[7].y + rtiles[7].h;
+
+	render_buffer.offset = rtiles[4].offset;
+	render_buffer.stride = rtiles[4].stride;
+	render_buffer.ptr    = rtiles[4].buffer;
+
+	functions.set_tiles(buffers);
 }
 
 bool DenoisingTask::run_denoising()
@@ -75,9 +79,6 @@ bool DenoisingTask::run_denoising()
 	buffer.mem.resize(buffer.pass_stride * buffer.passes);
 	device->mem_alloc("Denoising Pixel Buffer", buffer.mem, MEM_READ_WRITE);
 
-	device->mem_alloc("Denoising Tile Info", tiles_mem, MEM_READ_ONLY);
-	device->mem_copy_to(tiles_mem);
-
 	device_ptr null_ptr = (device_ptr) 0;
 
 	/* Prefilter shadow feature. */
diff --git a/intern/cycles/device/device_denoising.h b/intern/cycles/device/device_denoising.h
index d61c0ba880..6a761258ab 100644
--- a/intern/cycles/device/device_denoising.h
+++ b/intern/cycles/device/device_denoising.h
@@ -84,6 +84,7 @@ public:
 		              device_ptr mean_ptr,
 		              device_ptr variance_ptr
 		              )> get_feature;
+		function<bool(device_ptr*)> set_tiles;
 	} functions;
 
 	/* Stores state of the current Reconstruction operation,
diff --git a/intern/cycles/filter/filter_defines.h b/intern/cycles/filter/filter_defines.h
index 35b0b079e1..e98596d21e 100644
--- a/intern/cycles/filter/filter_defines.h
+++ b/intern/cycles/filter/filter_defines.h
@@ -26,10 +26,10 @@
 typedef struct TilesInfo {
 	int offsets[9];
 	int strides[9];
-	/* TODO(lukas): CUDA doesn't have uint64_t... */
-	long long int buffers[9];
 	int x[4];
 	int y[4];
+	/* TODO(lukas): CUDA doesn't have uint64_t... */
+	long long int buffers[9];
 } TilesInfo;
 
 #endif /* __FILTER_DEFINES_H__*/




More information about the Bf-blender-cvs mailing list