[Bf-blender-cvs] [f487a15] soc-2016-cycles_denoising: Cycles: Use the prefiltered shadow feature for denoising

Lukas Stockner noreply at git.blender.org
Sun Jul 24 03:46:11 CEST 2016


Commit: f487a153359ef2d37aa993f5d294fd6c4e86abdb
Author: Lukas Stockner
Date:   Sun Jul 24 02:18:18 2016 +0200
Branches: soc-2016-cycles_denoising
https://developer.blender.org/rBf487a153359ef2d37aa993f5d294fd6c4e86abdb

Cycles: Use the prefiltered shadow feature for denoising

This commit finally adds the prefiltered shadow feature to the main denoising algorithm.
Doing so improves detail preservation a lot: Although the main focus are sharp shadow edges, it actually also helps for Ambient-Occlusion-like and geometric details.

The only issue is that some geometric edges might be a bit noisier after denoising, but that will be fixed in the future by downweighting the shadow feature
when the geometric changes (normals and depth features) are strong.

===================================================================

M	intern/cycles/device/device_cpu.cpp
M	intern/cycles/kernel/kernel_filter.h
M	intern/cycles/kernel/kernel_types.h
M	intern/cycles/kernel/kernels/cpu/kernel_cpu.h
M	intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h

===================================================================

diff --git a/intern/cycles/device/device_cpu.cpp b/intern/cycles/device/device_cpu.cpp
index 4a8acfd..47e977c 100644
--- a/intern/cycles/device/device_cpu.cpp
+++ b/intern/cycles/device/device_cpu.cpp
@@ -361,8 +361,8 @@ public:
 		RenderTile tile;
 
 		void(*path_trace_kernel)(KernelGlobals*, float*, unsigned int*, int, int, int, int, int);
-		void(*filter_estimate_params_kernel)(KernelGlobals*, int, float**, int, int, int*, int*, int*, int*, void*, int4);
-		void(*filter_final_pass_kernel)(KernelGlobals*, int, float**, int, int, int*, int*, int*, int*, void*, int4);
+		void(*filter_estimate_params_kernel)(KernelGlobals*, int, float**, int, int, int*, int*, int*, int*, void*, float2*, int4, int4);
+		void(*filter_final_pass_kernel)(KernelGlobals*, int, float**, int, int, int*, int*, int*, int*, void*, float2*, int4, int4);
 
 #ifdef WITH_CYCLES_OPTIMIZED_KERNEL_AVX2
 		if(system_cpu_support_avx2()) {
@@ -442,21 +442,26 @@ public:
 					int offsets[9] = {0, 0, 0, 0, tile.offset, 0, 0, 0, 0};
 					int strides[9] = {0, 0, 0, 0, tile.stride, 0, 0, 0, 0};
 					float *buffers[9] = {NULL, NULL, NULL, NULL, (float*) tile.buffer, NULL, NULL, NULL, NULL};
-					FilterStorage *storages = new FilterStorage[tile.buffers->params.final_width*tile.buffers->params.final_height];
 
 					int overscan = tile.buffers->params.overscan;
 					int4 filter_rect = make_int4(tile.x + overscan, tile.y + overscan, tile.x + tile.w - overscan, tile.y + tile.h - overscan);
+					int4 prefilter_rect = make_int4(tile.x, tile.y, tile.x + tile.w, tile.y + tile.h);
+
+					float2* prefiltered = denoise_prefilter(prefilter_rect, tile, &kg, end_sample, buffers, tile_x, tile_y, offsets, strides);
+					FilterStorage *storages = new FilterStorage[tile.buffers->params.final_width*tile.buffers->params.final_height];
+
 					for(int y = filter_rect.y; y < filter_rect.w; y++) {
 						for(int x = filter_rect.x; x < filter_rect.z; x++) {
-							filter_estimate_params_kernel(&kg, end_sample, buffers, x, y, tile_x, tile_y, offsets, strides, storages, filter_rect);
+							filter_estimate_params_kernel(&kg, end_sample, buffers, x, y, tile_x, tile_y, offsets, strides, storages, prefiltered, filter_rect, prefilter_rect);
 						}
 					}
 					for(int y = filter_rect.y; y < filter_rect.w; y++) {
 						for(int x = filter_rect.x; x < filter_rect.z; x++) {
-							filter_final_pass_kernel(&kg, end_sample, buffers, x, y, tile_x, tile_y, offsets, strides, storages, filter_rect);
+							filter_final_pass_kernel(&kg, end_sample, buffers, x, y, tile_x, tile_y, offsets, strides, storages, prefiltered, filter_rect, prefilter_rect);
 						}
 					}
 
+					delete[] prefiltered;
 #ifdef WITH_CYCLES_DEBUG_FILTER
 #define WRITE_DEBUG(name, var) debug_write_pfm(string_printf("debug_%dx%d_%s.pfm", tile.x, tile.y, name).c_str(), &storages[0].var, tile.buffers->params.final_width, tile.buffers->params.final_height, sizeof(FilterStorage)/sizeof(float), tile.buffers->params.final_width);
 					for(int i = 0; i < DENOISE_FEATURES; i++) {
@@ -494,16 +499,25 @@ public:
 				FilterStorage *storages = new FilterStorage[tile.w*tile.h];
 
 				int4 filter_rect = make_int4(tile.x, tile.y, tile.x + tile.w, tile.y + tile.h);
+				int hw = kg.__data.integrator.half_window;
+				int4 prefilter_rect = make_int4(max(tile.x - hw, tile_x[0]), max(tile.y - hw, tile_y[0]), min(tile.x + tile.w + hw+1, tile_x[3]), min(tile.y + tile.h + hw+1, tile_y[3]));
+
+				float2* prefiltered = denoise_prefilter(prefilter_rect, tile, &kg, sample, buffers, tile_x, tile_y, offsets, strides);
+
 				for(int y = filter_rect.y; y < filter_rect.w; y++) {
 					for(int x = filter_rect.x; x < filter_rect.z; x++) {
-						filter_estimate_params_kernel(&kg, sample, buffers, x, y, tile_x, tile_y, offsets, strides, storages, filter_rect);
+						filter_estimate_params_kernel(&kg, sample, buffers, x, y, tile_x, tile_y, offsets, strides, storages, prefiltered, filter_rect, prefilter_rect);
 					}
 				}
 				for(int y = filter_rect.y; y < filter_rect.w; y++) {
 					for(int x = filter_rect.x; x < filter_rect.z; x++) {
-						filter_final_pass_kernel(&kg, sample, buffers, x, y, tile_x, tile_y, offsets, strides, storages, filter_rect);
+						filter_final_pass_kernel(&kg, sample, buffers, x, y, tile_x, tile_y, offsets, strides, storages, prefiltered, filter_rect, prefilter_rect);
 					}
 				}
+				delete[] prefiltered;
+
+
+
 #ifdef WITH_CYCLES_DEBUG_FILTER
 #define WRITE_DEBUG(name, var) debug_write_pfm(string_printf("debug_%dx%d_%s.pfm", tile.x, tile.y, name).c_str(), &storages[0].var, tile.w, tile.h, sizeof(FilterStorage)/sizeof(float), tile.w);
 				for(int i = 0; i < DENOISE_FEATURES; i++) {
diff --git a/intern/cycles/kernel/kernel_filter.h b/intern/cycles/kernel/kernel_filter.h
index 87b360b..dc649bb 100644
--- a/intern/cycles/kernel/kernel_filter.h
+++ b/intern/cycles/kernel/kernel_filter.h
@@ -18,18 +18,21 @@
 
 CCL_NAMESPACE_BEGIN
 
-#define FOR_PIXEL_WINDOW for(int py = low.y; py < high.y; py++) { \
+#define FOR_PIXEL_WINDOW pre_buffer = prefiltered + (low.y - prefilter_rect.y)*prefilter_w + (low.x - prefilter_rect.x); \
+                         for(int py = low.y; py < high.y; py++) { \
                              int ytile = (py < tile_y[1])? 0: ((py < tile_y[2])? 1: 2); \
-                                 for(int px = low.x; px < high.x; px++) { \
-                                     int xtile = (px < tile_x[1])? 0: ((px < tile_x[2])? 1: 2); \
-                                     int tile = ytile*3+xtile; \
-                                     buffer = buffers[tile] + (offset[tile] + py*stride[tile] + px)*kernel_data.film.pass_stride + kernel_data.film.pass_denoising;
+                             for(int px = low.x; px < high.x; px++, pre_buffer++) { \
+                                 int xtile = (px < tile_x[1])? 0: ((px < tile_x[2])? 1: 2); \
+                                 int tile = ytile*3+xtile; \
+                                 buffer = buffers[tile] + (offset[tile] + py*stride[tile] + px)*kernel_data.film.pass_stride + kernel_data.film.pass_denoising;
 
-#define END_FOR_PIXEL_WINDOW }}
+#define END_FOR_PIXEL_WINDOW } \
+                             pre_buffer += prefilter_w - (high.x - low.x); \
+                         }
 
-#define FEATURE_PASSES 7 /* Normals, Albedo, Depth */
+#define FEATURE_PASSES 8 /* Normals, Albedo, Depth */
 
-ccl_device_inline void filter_get_features(int x, int y, float *buffer, float sample, float *features, float *mean)
+ccl_device_inline void filter_get_features(int x, int y, float *buffer, float2 *pre_buffer, float sample, float *features, float *mean)
 {
 	float sample_scale = 1.0f/sample;
 	features[0] = x;
@@ -38,21 +41,22 @@ ccl_device_inline void filter_get_features(int x, int y, float *buffer, float sa
 	features[3] = buffer[0] * sample_scale;
 	features[4] = buffer[1] * sample_scale;
 	features[5] = buffer[2] * sample_scale;
-	features[6] = buffer[6] * sample_scale;
-	features[7] = buffer[7] * sample_scale;
-	features[8] = buffer[8] * sample_scale;
+	features[6] = pre_buffer->x;
+	features[7] = buffer[6] * sample_scale;
+	features[8] = buffer[7] * sample_scale;
+	features[9] = buffer[8] * sample_scale;
 	if(mean) {
 		for(int i = 0; i < DENOISE_FEATURES; i++)
 			features[i] -= mean[i];
 	}
 #ifdef DENOISE_SECOND_ORDER_SCREEN
-	features[9] = features[0]*features[0];
-	features[10] = features[1]*features[1];
-	features[11] = features[0]*features[1];
+	features[10] = features[0]*features[0];
+	features[11] = features[1]*features[1];
+	features[12] = features[0]*features[1];
 #endif
 }
 
-ccl_device_inline void filter_get_feature_variance(int x, int y, float *buffer, float sample, float *features, float *scale)
+ccl_device_inline void filter_get_feature_variance(int x, int y, float *buffer, float2 *pre_buffer, float sample, float *features, float *scale)
 {
 	float sample_scale = 1.0f/sample;
 	float sample_scale_var = 1.0f/(sample - 1.0f);
@@ -62,13 +66,14 @@ ccl_device_inline void filter_get_feature_variance(int x, int y, float *buffer,
 	features[3] = saturate(buffer[3] * sample_scale_var) * sample_scale;
 	features[4] = saturate(buffer[4] * sample_scale_var) * sample_scale;
 	features[5] = saturate(buffer[5] * sample_scale_var) * sample_scale;
-	features[6] = saturate(buffer[9] * sample_scale_var) * sample_scale;
-	features[7] = saturate(buffer[10] * sample_scale_var) * sample_scale;
-	features[8] = saturate(buffer[11] * sample_scale_var) * sample_scale;
+	features[6] = saturate(pre_buffer->y);
+	features[7] = saturate(buffer[9] * sample_scale_var) * sample_scale;
+	features[8] = saturate(buffer[10] * sample_scale_var) * sample_scale;
+	features[9] = saturate(buffer[11] * sample_scale_var) * sample_scale;
 #ifdef DENOISE_SECOND_ORDER_SCREEN
-	features[9] = 0.0f;
 	features[10] = 0.0f;
 	features[11] = 0.0f;
+	features[12] = 0.0f;
 #endif
 	for(int i = 0; i < DENOISE_FEATURES; i++)
 		features[i] *= scale[i]*scale[i];
@@ -224,16 +229,19 @@ ccl_device void kernel_filter_combine_halves(int x, int y, float *mean, float *v
  * - Start of the next upper/right neighbor (not accessed)
  * buffers contains the nine buffer pointers (y-major ordering, starting with the lower left tile), offset and stride the respective parameters of the tile.
  */
-ccl_device void kernel_filter_estimate_params(KernelGlobals *kg, int sample, float **buffers, int x, int y, int *tile_x, int *tile_y, int *offset, int *stride, FilterStorage *storage, int4 filter_rect)
+ccl_device void kernel_filter_estimate_params(KernelGlobals *kg, int sample, float **buffers, int x, int y, int *tile_x, int *tile_y, int *offset, int *stride, FilterStorage *storage, float2 *prefiltered, int4 filter_rect, int4 prefilter_rect)
 {
 	storage += (y-filter_rect.y)*(filter_rect.z-filter_rect.x) + (x-filter_rect.x);
+	int prefilter_w = (prefilter_rect.z - prefilter_rect.x);
 
 	/* Temporary storage, used in different steps of the algorithm. */
 	float tempmatrix[(2*DENOISE_FEATURES+1)*(2*DENOISE_FEATURES+1)], tempvector[4*DENOISE_FEATURES+1];
 	float *buffer, features[DENOISE_FEATURES];
+	float2 *pre_buffer;
 
 	/* === Get center pixel color and variance. === */
 	float *center_buffer = buffers[4] + (offset[4] + y*stride[4] + x)*kernel

@@ Diff output truncated at 10240 characters. @@




More information about the Bf-blender-cvs mailing list