[Bf-blender-cvs] [fccf506ed7f] blender2.7: Cycles: animation denoising support in the kernel.

Lukas Stockner noreply at git.blender.org
Wed Feb 6 15:20:18 CET 2019


Commit: fccf506ed7fd96f8a8f5edda7b99f564a386321a
Author: Lukas Stockner
Date:   Wed Feb 6 14:19:20 2019 +0100
Branches: blender2.7
https://developer.blender.org/rBfccf506ed7fd96f8a8f5edda7b99f564a386321a

Cycles: animation denoising support in the kernel.

This is the internal implementation, not available from the API or
interface yet. The algorithm takes into account past and future frames,
both to get more coherent animation and reduce noise.

Ref D3889.

===================================================================

M	intern/cycles/device/device_cpu.cpp
M	intern/cycles/device/device_cuda.cpp
M	intern/cycles/device/device_denoising.cpp
M	intern/cycles/device/device_denoising.h
M	intern/cycles/device/device_task.h
M	intern/cycles/device/opencl/opencl.h
M	intern/cycles/device/opencl/opencl_base.cpp
M	intern/cycles/kernel/filter/filter_defines.h
M	intern/cycles/kernel/filter/filter_features.h
M	intern/cycles/kernel/filter/filter_features_sse.h
M	intern/cycles/kernel/filter/filter_nlm_cpu.h
M	intern/cycles/kernel/filter/filter_nlm_gpu.h
M	intern/cycles/kernel/filter/filter_reconstruction.h
M	intern/cycles/kernel/filter/filter_transform.h
M	intern/cycles/kernel/filter/filter_transform_gpu.h
M	intern/cycles/kernel/filter/filter_transform_sse.h
M	intern/cycles/kernel/kernels/cpu/filter_cpu.h
M	intern/cycles/kernel/kernels/cpu/filter_cpu_impl.h
M	intern/cycles/kernel/kernels/cuda/filter.cu
M	intern/cycles/kernel/kernels/opencl/filter.cl

===================================================================

diff --git a/intern/cycles/device/device_cpu.cpp b/intern/cycles/device/device_cpu.cpp
index 6668acc9cbe..93c63b92a55 100644
--- a/intern/cycles/device/device_cpu.cpp
+++ b/intern/cycles/device/device_cpu.cpp
@@ -186,15 +186,15 @@ public:
 	KernelFunctions<void(*)(int, int, float*, float*, float*, float*, int*, int)>                               filter_detect_outliers_kernel;
 	KernelFunctions<void(*)(int, int, float*, float*, float*, float*, int*, int)>                               filter_combine_halves_kernel;
 
-	KernelFunctions<void(*)(int, int, float*, float*, float*, float*, int*, int, int, float, float)>      filter_nlm_calc_difference_kernel;
+	KernelFunctions<void(*)(int, int, float*, float*, float*, float*, int*, int, int, int, float, float)> filter_nlm_calc_difference_kernel;
 	KernelFunctions<void(*)(float*, float*, int*, int, int)>                                              filter_nlm_blur_kernel;
 	KernelFunctions<void(*)(float*, float*, int*, int, int)>                                              filter_nlm_calc_weight_kernel;
 	KernelFunctions<void(*)(int, int, float*, float*, float*, float*, float*, int*, int, int, int)>       filter_nlm_update_output_kernel;
 	KernelFunctions<void(*)(float*, float*, int*, int)>                                                   filter_nlm_normalize_kernel;
 
-	KernelFunctions<void(*)(float*, int, int, int, float*, int*, int*, int, int, float)>                         filter_construct_transform_kernel;
-	KernelFunctions<void(*)(int, int, float*, float*, float*, int*, float*, float3*, int*, int*, int, int, int)> filter_nlm_construct_gramian_kernel;
-	KernelFunctions<void(*)(int, int, int, float*, int*, float*, float3*, int*, int)>                            filter_finalize_kernel;
+	KernelFunctions<void(*)(float*, TileInfo*, int, int, int, float*, int*, int*, int, int, bool, int, float)>                   filter_construct_transform_kernel;
+	KernelFunctions<void(*)(int, int, int, float*, float*, float*, int*, float*, float3*, int*, int*, int, int, int, int, bool)> filter_nlm_construct_gramian_kernel;
+	KernelFunctions<void(*)(int, int, int, float*, int*, float*, float3*, int*, int)>                                            filter_finalize_kernel;
 
 	KernelFunctions<void(*)(KernelGlobals *, ccl_constant KernelData*, ccl_global void*, int, ccl_global char*,
 	                       int, int, int, int, int, int, int, int, ccl_global int*, int,
@@ -512,7 +512,7 @@ public:
 			                                    difference,
 			                                    local_rect,
 			                                    w, channel_offset,
-			                                    a, k_2);
+			                                    0, a, k_2);
 
 			filter_nlm_blur_kernel()       (difference, blurDifference, local_rect, w, f);
 			filter_nlm_calc_weight_kernel()(blurDifference, difference, local_rect, w, f);
@@ -542,6 +542,7 @@ public:
 		for(int y = 0; y < task->filter_area.w; y++) {
 			for(int x = 0; x < task->filter_area.z; x++) {
 				filter_construct_transform_kernel()((float*) task->buffer.mem.device_pointer,
+				                                    task->tile_info,
 				                                    x + task->filter_area.x,
 				                                    y + task->filter_area.y,
 				                                    y*task->filter_area.z + x,
@@ -549,6 +550,8 @@ public:
 				                                    (int*)   task->storage.rank.device_pointer,
 				                                    &task->rect.x,
 				                                    task->buffer.pass_stride,
+				                                    task->buffer.frame_stride,
+				                                    task->buffer.use_time,
 				                                    task->radius,
 				                                    task->pca_threshold);
 			}
@@ -559,6 +562,7 @@ public:
 	bool denoising_accumulate(device_ptr color_ptr,
 	                          device_ptr color_variance_ptr,
 	                          device_ptr scale_ptr,
+	                          int frame,
 	                          DenoisingTask *task)
 	{
 		ProfilingHelper profiling(task->profiler, PROFILING_DENOISING_RECONSTRUCT);
@@ -568,6 +572,7 @@ public:
 		float *blurDifference = temporary_mem + task->buffer.pass_stride;
 
 		int r = task->radius;
+		int frame_offset = frame * task->buffer.frame_stride;
 		for(int i = 0; i < (2*r+1)*(2*r+1); i++) {
 			int dy = i / (2*r+1) - r;
 			int dx = i % (2*r+1) - r;
@@ -583,12 +588,14 @@ public:
 			                                    local_rect,
 			                                    task->buffer.stride,
 			                                    task->buffer.pass_stride,
+			                                    frame_offset,
 			                                    1.0f,
 			                                    task->nlm_k_2);
 			filter_nlm_blur_kernel()(difference, blurDifference, local_rect, task->buffer.stride, 4);
 			filter_nlm_calc_weight_kernel()(blurDifference, difference, local_rect, task->buffer.stride, 4);
 			filter_nlm_blur_kernel()(difference, blurDifference, local_rect, task->buffer.stride, 4);
 			filter_nlm_construct_gramian_kernel()(dx, dy,
+			                                      task->tile_info->frames[frame],
 			                                      blurDifference,
 			                                      (float*)  task->buffer.mem.device_pointer,
 			                                      (float*)  task->storage.transform.device_pointer,
@@ -599,7 +606,9 @@ public:
 			                                      &task->reconstruction_state.filter_window.x,
 			                                      task->buffer.stride,
 			                                      4,
-			                                      task->buffer.pass_stride);
+			                                      task->buffer.pass_stride,
+			                                      frame_offset,
+			                                      task->buffer.use_time);
 		}
 
 		return true;
@@ -787,7 +796,7 @@ public:
 		tile.sample = tile.start_sample + tile.num_samples;
 
 		denoising.functions.construct_transform = function_bind(&CPUDevice::denoising_construct_transform, this, &denoising);
-		denoising.functions.accumulate = function_bind(&CPUDevice::denoising_accumulate, this, _1, _2, _3, &denoising);
+		denoising.functions.accumulate = function_bind(&CPUDevice::denoising_accumulate, this, _1, _2, _3, _4, &denoising);
 		denoising.functions.solve = function_bind(&CPUDevice::denoising_solve, this, _1, &denoising);
 		denoising.functions.divide_shadow = function_bind(&CPUDevice::denoising_divide_shadow, this, _1, _2, _3, _4, _5, &denoising);
 		denoising.functions.non_local_means = function_bind(&CPUDevice::denoising_non_local_means, this, _1, _2, _3, _4, &denoising);
diff --git a/intern/cycles/device/device_cuda.cpp b/intern/cycles/device/device_cuda.cpp
index cb7d8bbb224..e21d974ebbe 100644
--- a/intern/cycles/device/device_cuda.cpp
+++ b/intern/cycles/device/device_cuda.cpp
@@ -1301,6 +1301,7 @@ public:
 		int pass_stride = task->buffer.pass_stride;
 		int num_shifts = (2*r+1)*(2*r+1);
 		int channel_offset = task->nlm_state.is_color? task->buffer.pass_stride : 0;
+		int frame_offset = 0;
 
 		if(have_error())
 			return false;
@@ -1327,7 +1328,7 @@ public:
 
 			CUDA_GET_BLOCKSIZE_1D(cuNLMCalcDifference, w*h, num_shifts);
 
-			void *calc_difference_args[] = {&guide_ptr, &variance_ptr, &scale_ptr, &difference, &w, &h, &stride, &pass_stride, &r, &channel_offset, &a, &k_2};
+			void *calc_difference_args[] = {&guide_ptr, &variance_ptr, &scale_ptr, &difference, &w, &h, &stride, &pass_stride, &r, &channel_offset, &frame_offset, &a, &k_2};
 			void *blur_args[]            = {&difference, &blurDifference, &w, &h, &stride, &pass_stride, &r, &f};
 			void *calc_weight_args[]     = {&blurDifference, &difference, &w, &h, &stride, &pass_stride, &r, &f};
 			void *update_output_args[]   = {&blurDifference, &image_ptr, &out_ptr, &weightAccum, &w, &h, &stride, &pass_stride, &channel_offset, &r, &f};
@@ -1367,13 +1368,16 @@ public:
 		                   task->storage.h);
 
 		void *args[] = {&task->buffer.mem.device_pointer,
+		                &task->tile_info_mem.device_pointer,
 		                &task->storage.transform.device_pointer,
 		                &task->storage.rank.device_pointer,
 		                &task->filter_area,
 		                &task->rect,
 		                &task->radius,
 		                &task->pca_threshold,
-		                &task->buffer.pass_stride};
+		                &task->buffer.pass_stride,
+		                &task->buffer.frame_stride,
+		                &task->buffer.use_time};
 		CUDA_LAUNCH_KERNEL(cuFilterConstructTransform, args);
 		cuda_assert(cuCtxSynchronize());
 
@@ -1383,6 +1387,7 @@ public:
 	bool denoising_accumulate(device_ptr color_ptr,
 	                          device_ptr color_variance_ptr,
 	                          device_ptr scale_ptr,
+	                          int frame,
 	                          DenoisingTask *task)
 	{
 		if(have_error())
@@ -1398,6 +1403,8 @@ public:
 		int w = task->reconstruction_state.source_w;
 		int h = task->reconstruction_state.source_h;
 		int stride = task->buffer.stride;
+		int frame_offset = frame * task->buffer.frame_stride;
+		int t = task->tile_info->frames[frame];
 
 		int pass_stride = task->buffer.pass_stride;
 		int num_shifts = (2*r+1)*(2*r+1);
@@ -1430,10 +1437,12 @@ public:
 		                                &w, &h,
 		                                &stride, &pass_stride,
 		                                &r, &pass_stride,
+		                                &frame_offset,
 		                                &a, &k_2};
 		void *blur_args[]            = {&difference, &blurDifference, &w, &h, &stride, &pass_stride, &r, &f};
 		void *calc_weight_args[]     = {&blurDifference, &difference, &w, &h, &stride, &pass_stride, &r, &f};
-		void *construct_gramian_args[] = {&blurDifference,
+		void *construct_gramian_args[] = {&t,
+		                                  &blurDifference,
 		                                  &task->buffer.mem.device_pointer,
 		                                  &task->storage.transform.device_pointer,
 		        

@@ Diff output truncated at 10240 characters. @@



More information about the Bf-blender-cvs mailing list