[Bf-blender-cvs] [740cd287485] master: Cycles Denoising: Add more robust outlier heuristic to avoid artifacts

Lukas Stockner noreply at git.blender.org
Fri May 19 02:12:56 CEST 2017


Commit: 740cd287485d919452fa4cd56a700cc0070f0c6a
Author: Lukas Stockner
Date:   Thu May 18 03:03:18 2017 +0200
Branches: master
https://developer.blender.org/rB740cd287485d919452fa4cd56a700cc0070f0c6a

Cycles Denoising: Add more robust outlier heuristic to avoid artifacts

Extremely bright pixels in the rendered image cause the denoising algorithm
to produce extremely noticable artifacts. Therefore, a heuristic is needed
to exclude these pixels from the filtering process.

The new approach calculates the 75% percentile of the 5x5 neighborhood of
each pixel and flags the pixel if it is more than twice as bright.

During the reconstruction process, flagged pixels are skipped. Therefore,
they don't cause any problems for neighboring pixels, and the outlier pixels
themselves are replaced by a prediction of their actual value based on their
feature pass values and the neighboring pixels.

Therefore, the denoiser now also works as a smarter despeckling filter that
uses a more accurate prediction of the pixel instead of a simple average.
This can be used even if denoising isn't wanted by setting the denoising
radius to 1.

===================================================================

M	intern/cycles/device/device_cpu.cpp
M	intern/cycles/device/device_cuda.cpp
M	intern/cycles/device/device_denoising.cpp
M	intern/cycles/device/device_denoising.h
M	intern/cycles/device/opencl/opencl.h
M	intern/cycles/device/opencl/opencl_base.cpp
M	intern/cycles/kernel/filter/filter_features.h
M	intern/cycles/kernel/filter/filter_features_sse.h
M	intern/cycles/kernel/filter/filter_prefilter.h
M	intern/cycles/kernel/filter/filter_reconstruction.h
M	intern/cycles/kernel/kernels/cpu/filter_cpu.h
M	intern/cycles/kernel/kernels/cpu/filter_cpu_impl.h
M	intern/cycles/kernel/kernels/cuda/filter.cu
M	intern/cycles/kernel/kernels/opencl/filter.cl

===================================================================

diff --git a/intern/cycles/device/device_cpu.cpp b/intern/cycles/device/device_cpu.cpp
index e219ce3cb8e..fea55049049 100644
--- a/intern/cycles/device/device_cpu.cpp
+++ b/intern/cycles/device/device_cpu.cpp
@@ -176,6 +176,7 @@ public:
 
 	KernelFunctions<void(*)(int, TilesInfo*, int, int, float*, float*, float*, float*, float*, int*, int, int, bool)> filter_divide_shadow_kernel;
 	KernelFunctions<void(*)(int, TilesInfo*, int, int, int, int, float*, float*, int*, int, int, bool)>               filter_get_feature_kernel;
+	KernelFunctions<void(*)(int, int, float*, float*, float*, float*, int*, int)>                                     filter_detect_outliers_kernel;
 	KernelFunctions<void(*)(int, int, float*, float*, float*, float*, int*, int)>                                     filter_combine_halves_kernel;
 
 	KernelFunctions<void(*)(int, int, float*, float*, float*, int*, int, int, float, float)> filter_nlm_calc_difference_kernel;
@@ -210,6 +211,7 @@ public:
 	  REGISTER_KERNEL(shader),
 	  REGISTER_KERNEL(filter_divide_shadow),
 	  REGISTER_KERNEL(filter_get_feature),
+	  REGISTER_KERNEL(filter_detect_outliers),
 	  REGISTER_KERNEL(filter_combine_halves),
 	  REGISTER_KERNEL(filter_nlm_calc_difference),
 	  REGISTER_KERNEL(filter_nlm_blur),
@@ -594,6 +596,26 @@ public:
 		return true;
 	}
 
+	bool denoising_detect_outliers(device_ptr image_ptr,
+	                               device_ptr variance_ptr,
+	                               device_ptr depth_ptr,
+	                               device_ptr output_ptr,
+	                               DenoisingTask *task)
+	{
+		for(int y = task->rect.y; y < task->rect.w; y++) {
+			for(int x = task->rect.x; x < task->rect.z; x++) {
+				filter_detect_outliers_kernel()(x, y,
+				                                (float*) image_ptr,
+				                                (float*) variance_ptr,
+				                                (float*) depth_ptr,
+				                                (float*) output_ptr,
+				                                &task->rect.x,
+				                                task->buffer.pass_stride);
+			}
+		}
+		return true;
+	}
+
 	void path_trace(DeviceTask &task, RenderTile &tile, KernelGlobals *kg)
 	{
 		float *render_buffer = (float*)tile.buffer;
@@ -632,6 +654,7 @@ public:
 		denoising.functions.non_local_means = function_bind(&CPUDevice::denoising_non_local_means, this, _1, _2, _3, _4, &denoising);
 		denoising.functions.combine_halves = function_bind(&CPUDevice::denoising_combine_halves, this, _1, _2, _3, _4, _5, _6, &denoising);
 		denoising.functions.get_feature = function_bind(&CPUDevice::denoising_get_feature, this, _1, _2, _3, _4, &denoising);
+		denoising.functions.detect_outliers = function_bind(&CPUDevice::denoising_detect_outliers, this, _1, _2, _3, _4, &denoising);
 		denoising.functions.set_tiles = function_bind(&CPUDevice::denoising_set_tiles, this, _1, &denoising);
 
 		denoising.filter_area = make_int4(tile.x, tile.y, tile.w, tile.h);
diff --git a/intern/cycles/device/device_cuda.cpp b/intern/cycles/device/device_cuda.cpp
index 968ee5bc487..4d8ca487799 100644
--- a/intern/cycles/device/device_cuda.cpp
+++ b/intern/cycles/device/device_cuda.cpp
@@ -1248,6 +1248,38 @@ public:
 		return !have_error();
 	}
 
+	bool denoising_detect_outliers(device_ptr image_ptr,
+	                               device_ptr variance_ptr,
+	                               device_ptr depth_ptr,
+	                               device_ptr output_ptr,
+	                               DenoisingTask *task)
+	{
+		if(have_error())
+			return false;
+
+		cuda_push_context();
+
+		CUfunction cuFilterDetectOutliers;
+		cuda_assert(cuModuleGetFunction(&cuFilterDetectOutliers, cuFilterModule, "kernel_cuda_filter_detect_outliers"));
+		cuda_assert(cuFuncSetCacheConfig(cuFilterDetectOutliers, CU_FUNC_CACHE_PREFER_L1));
+		CUDA_GET_BLOCKSIZE(cuFilterDetectOutliers,
+		                   task->rect.z-task->rect.x,
+		                   task->rect.w-task->rect.y);
+
+		void *args[] = {&image_ptr,
+		                &variance_ptr,
+		                &depth_ptr,
+		                &output_ptr,
+		                &task->rect,
+		                &task->buffer.pass_stride};
+
+		CUDA_LAUNCH_KERNEL(cuFilterDetectOutliers, args);
+		cuda_assert(cuCtxSynchronize());
+
+		cuda_pop_context();
+		return !have_error();
+	}
+
 	void denoise(RenderTile &rtile, const DeviceTask &task)
 	{
 		DenoisingTask denoising(this);
@@ -1258,6 +1290,7 @@ public:
 		denoising.functions.non_local_means = function_bind(&CUDADevice::denoising_non_local_means, this, _1, _2, _3, _4, &denoising);
 		denoising.functions.combine_halves = function_bind(&CUDADevice::denoising_combine_halves, this, _1, _2, _3, _4, _5, _6, &denoising);
 		denoising.functions.get_feature = function_bind(&CUDADevice::denoising_get_feature, this, _1, _2, _3, _4, &denoising);
+		denoising.functions.detect_outliers = function_bind(&CUDADevice::denoising_detect_outliers, this, _1, _2, _3, _4, &denoising);
 		denoising.functions.set_tiles = function_bind(&CUDADevice::denoising_set_tiles, this, _1, &denoising);
 
 		denoising.filter_area = make_int4(rtile.x, rtile.y, rtile.w, rtile.h);
diff --git a/intern/cycles/device/device_denoising.cpp b/intern/cycles/device/device_denoising.cpp
index f5543d7c82d..613bd9112cf 100644
--- a/intern/cycles/device/device_denoising.cpp
+++ b/intern/cycles/device/device_denoising.cpp
@@ -159,11 +159,25 @@ bool DenoisingTask::run_denoising()
 		int mean_to[]       = { 8,  9, 10};
 		int variance_to[]   = {11, 12, 13};
 		int num_color_passes = 3;
+
+		device_only_memory<float> temp_color;
+		temp_color.resize(3*buffer.pass_stride);
+		device->mem_alloc("Denoising temporary color", temp_color, MEM_READ_WRITE);
+
 		for(int pass = 0; pass < num_color_passes; pass++) {
-			device_sub_ptr color_pass    (device, buffer.mem,     mean_to[pass]*buffer.pass_stride, buffer.pass_stride, MEM_READ_WRITE);
+			device_sub_ptr color_pass(device, temp_color, pass*buffer.pass_stride, buffer.pass_stride, MEM_READ_WRITE);
 			device_sub_ptr color_var_pass(device, buffer.mem, variance_to[pass]*buffer.pass_stride, buffer.pass_stride, MEM_READ_WRITE);
 			functions.get_feature(mean_from[pass], variance_from[pass], *color_pass, *color_var_pass);
 		}
+
+		{
+			device_sub_ptr depth_pass    (device, buffer.mem,                                 0,   buffer.pass_stride, MEM_READ_WRITE);
+			device_sub_ptr color_var_pass(device, buffer.mem, variance_to[0]*buffer.pass_stride, 3*buffer.pass_stride, MEM_READ_WRITE);
+			device_sub_ptr output_pass   (device, buffer.mem,     mean_to[0]*buffer.pass_stride, 3*buffer.pass_stride, MEM_READ_WRITE);
+			functions.detect_outliers(temp_color.device_pointer, *color_var_pass, *depth_pass, *output_pass);
+		}
+
+		device->mem_free(temp_color);
 	}
 
 	storage.w = filter_area.z;
diff --git a/intern/cycles/device/device_denoising.h b/intern/cycles/device/device_denoising.h
index 86d8eb64386..25b93c2ad74 100644
--- a/intern/cycles/device/device_denoising.h
+++ b/intern/cycles/device/device_denoising.h
@@ -82,6 +82,11 @@ public:
 		              device_ptr mean_ptr,
 		              device_ptr variance_ptr
 		              )> get_feature;
+		function<bool(device_ptr image_ptr,
+		              device_ptr variance_ptr,
+		              device_ptr depth_ptr,
+		              device_ptr output_ptr
+		              )> detect_outliers;
 		function<bool(device_ptr*)> set_tiles;
 	} functions;
 
diff --git a/intern/cycles/device/opencl/opencl.h b/intern/cycles/device/opencl/opencl.h
index a458ca6bf64..27e196d1e68 100644
--- a/intern/cycles/device/opencl/opencl.h
+++ b/intern/cycles/device/opencl/opencl.h
@@ -411,6 +411,11 @@ protected:
 	                           device_ptr mean_ptr,
 	                           device_ptr variance_ptr,
 	                           DenoisingTask *task);
+	bool denoising_detect_outliers(device_ptr image_ptr,
+	                               device_ptr variance_ptr,
+	                               device_ptr depth_ptr,
+	                               device_ptr output_ptr,
+	                               DenoisingTask *task);
 	bool denoising_set_tiles(device_ptr *buffers,
 	                         DenoisingTask *task);
 
diff --git a/intern/cycles/device/opencl/opencl_base.cpp b/intern/cycles/device/opencl/opencl_base.cpp
index ae1a7b917c3..8f33014a00e 100644
--- a/intern/cycles/device/opencl/opencl_base.cpp
+++ b/intern/cycles/device/opencl/opencl_base.cpp
@@ -216,6 +216,7 @@ bool OpenCLDeviceBase::load_kernels(const DeviceRequestedFeatures& requested_fea
 	denoising_program = OpenCLProgram(this, "denoising", "filter.cl", "");
 	denoising_program.add_kernel(ustring("filter_divide_shadow"));
 	denoising_program.add_kernel(ustring("filter_get_feature"));
+	denoising_program.add_kernel(ustring("filter_detect_outliers"));
 	denoising_program.add_kernel(ustring("filter_combine_halves"));
 	denoising_program.add_kernel(ustring("filter_construct_transform"));
 	denoising_program.add_kernel(ustring("filter_nlm_calc_difference"));
@@ -910,6 +911,33 @@ bool OpenCLDeviceBase::denoising_get_feature(int mean_offset,
 	return true;
 }
 
+bool OpenCLDeviceBase::denoising_detect_outliers(device_ptr image_ptr,
+                                                 device_ptr variance_ptr,
+                                                 device_ptr depth_ptr,
+                                                 device_ptr output_ptr,
+                                                 DenoisingTask *task)
+{
+	cl_mem image_mem = CL_MEM_PTR(image_ptr);
+	cl_mem variance_mem = CL_MEM_PTR(variance_ptr);
+	cl_mem depth_mem = CL_MEM_PTR(depth_ptr);
+	cl_mem output_mem = CL_MEM_PTR(output_ptr);
+
+	cl_kernel ckFilterDetectOutliers = denoising_program(ustring("filter_detect_outliers"));
+
+	kernel_set_args(ckFilterDetectOutliers, 0,
+	                image_mem,
+	                variance_mem,
+	                depth_mem,
+	                output_mem,
+	                task->rect,
+	                task->buffer.pass_stride);
+	enqueue_kernel(ckFilterDetectOutliers,
+	               task->rect.z-task->rect.x,
+	               task->rect.w-task->rect.y);
+
+	return true;
+}
+
 bool OpenCLDeviceBase::denoising_set_tiles(device_ptr *buff

@@ Diff output truncated at 10240 characters. @@




More information about the Bf-blender-cvs mailing list