[Bf-blender-cvs] [2af9026] soc-2016-cycles_denoising: Cycles: Implement the multi-frame denoising kernel

Lukas Stockner noreply at git.blender.org
Sat Aug 13 05:12:39 CEST 2016


Commit: 2af90268949d399180c83f6791f9f14cee886845
Author: Lukas Stockner
Date:   Sat Aug 13 04:07:59 2016 +0200
Branches: soc-2016-cycles_denoising
https://developer.blender.org/rB2af90268949d399180c83f6791f9f14cee886845

Cycles: Implement the multi-frame denoising kernel

This commit changes the denoising kernel to actually use the additional frames.
The required changes are surprisingly small - one additional feature contains
the frame to which the pixel belongs, and the per-pixel loop now iterates over frames first.

===================================================================

M	intern/cycles/kernel/kernel_filter.h
M	intern/cycles/kernel/kernel_filter_util.h
M	intern/cycles/kernel/kernel_types.h
M	intern/cycles/render/film.cpp
M	intern/cycles/render/session.cpp

===================================================================

diff --git a/intern/cycles/kernel/kernel_filter.h b/intern/cycles/kernel/kernel_filter.h
index ffb7760..f4db090 100644
--- a/intern/cycles/kernel/kernel_filter.h
+++ b/intern/cycles/kernel/kernel_filter.h
@@ -27,7 +27,10 @@ CCL_NAMESPACE_BEGIN
 ccl_device void kernel_filter_estimate_params(KernelGlobals *kg, int sample, float *buffer, int x, int y, FilterStorage *storage, int4 rect)
 {
 	int buffer_w = align_up(rect.z - rect.x, 4);
-	int pass_stride = (rect.w - rect.y) * buffer_w;
+	int buffer_h = (rect.w - rect.y);
+	int pass_stride = buffer_h * buffer_w * kernel_data.film.num_frames;
+	int num_frames = kernel_data.film.num_frames;
+	int prev_frames = kernel_data.film.prev_frames;
 
 	__m128 features[DENOISE_FEATURES];
 	float *pixel_buffer;
@@ -39,7 +42,7 @@ ccl_device void kernel_filter_estimate_params(KernelGlobals *kg, int sample, flo
 
 	__m128 feature_means[DENOISE_FEATURES] = {_mm_setzero_ps()};
 	FOR_PIXEL_WINDOW_SSE {
-		filter_get_features_sse(x4, y4, active_pixels, pixel_buffer, features, NULL, pass_stride);
+		filter_get_features_sse(x4, y4, t4, active_pixels, pixel_buffer, features, NULL, pass_stride);
 		math_add_vector_sse(feature_means, DENOISE_FEATURES, features);
 	} END_FOR_PIXEL_WINDOW_SSE
 
@@ -50,7 +53,7 @@ ccl_device void kernel_filter_estimate_params(KernelGlobals *kg, int sample, flo
 
 	__m128 feature_scale[DENOISE_FEATURES] = {_mm_setzero_ps()};
 	FOR_PIXEL_WINDOW_SSE {
-		filter_get_features_sse(x4, y4, active_pixels, pixel_buffer, features, feature_means, pass_stride);
+		filter_get_features_sse(x4, y4, t4, active_pixels, pixel_buffer, features, feature_means, pass_stride);
 		for(int i = 0; i < DENOISE_FEATURES; i++)
 			feature_scale[i] = _mm_max_ps(feature_scale[i], _mm_fabs_ps(features[i]));
 	} END_FOR_PIXEL_WINDOW_SSE
@@ -62,7 +65,7 @@ ccl_device void kernel_filter_estimate_params(KernelGlobals *kg, int sample, flo
 	__m128 feature_matrix_norm = _mm_setzero_ps();
 	math_matrix_zero_lower_sse(feature_matrix_sse, DENOISE_FEATURES);
 	FOR_PIXEL_WINDOW_SSE {
-		filter_get_features_sse(x4, y4, active_pixels, pixel_buffer, features, feature_means, pass_stride);
+		filter_get_features_sse(x4, y4, t4, active_pixels, pixel_buffer, features, feature_means, pass_stride);
 		math_mul_vector_sse(features, DENOISE_FEATURES, feature_scale);
 		math_add_gramian_sse(feature_matrix_sse, DENOISE_FEATURES, features, _mm_set1_ps(1.0f));
 
@@ -97,7 +100,7 @@ ccl_device void kernel_filter_estimate_params(KernelGlobals *kg, int sample, flo
 	/* From here on, the mean of the features will be shifted to the central pixel's values. */
 	float feature_means_scalar[DENOISE_FEATURES];
 	float *center_buffer = buffer + (y - rect.y) * buffer_w + (x - rect.x);
-	filter_get_features(x, y, center_buffer, feature_means_scalar, NULL, pass_stride);
+	filter_get_features(x, y, 0, center_buffer, feature_means_scalar, NULL, pass_stride);
 	for(int i = 0; i < DENOISE_FEATURES; i++)
 		feature_means[i] = _mm_set1_ps(feature_means_scalar[i]);
 
@@ -114,7 +117,7 @@ ccl_device void kernel_filter_estimate_params(KernelGlobals *kg, int sample, flo
 	math_matrix_zero_lower_sse(XtX_sse, matrix_size);
 	math_vec3_zero(XtY, matrix_size);
 	FOR_PIXEL_WINDOW_SSE {
-		filter_get_features_sse(x4, y4, active_pixels, pixel_buffer, features, feature_means, pass_stride);
+		filter_get_features_sse(x4, y4, t4, active_pixels, pixel_buffer, features, feature_means, pass_stride);
 		__m128 weight = filter_fill_design_row_sse(features, active_pixels, rank, design_row, feature_transform_sse, NULL);
 		active_pixels = _mm_and_ps(active_pixels, _mm_cmpneq_ps(weight, _mm_setzero_ps()));
 
@@ -178,7 +181,7 @@ ccl_device void kernel_filter_estimate_params(KernelGlobals *kg, int sample, flo
 			active_pixels = _mm_and_ps(active_pixels, filter_firefly_rejection_sse(color, variance, center_color_sse, sqrt_center_variance_sse));
 			if(!_mm_movemask_ps(active_pixels)) continue;
 
-			filter_get_features_sse(x4, y4, active_pixels, pixel_buffer, features, feature_means, pass_stride);
+			filter_get_features_sse(x4, y4, t4, active_pixels, pixel_buffer, features, feature_means, pass_stride);
 			__m128 weight = filter_fill_design_row_sse(features, active_pixels, rank, design_row, feature_transform_sse, g_bandwidth_factor);
 			active_pixels = _mm_and_ps(active_pixels, _mm_cmpneq_ps(weight, _mm_setzero_ps()));
 			if(!_mm_movemask_ps(active_pixels)) continue;
@@ -211,7 +214,7 @@ ccl_device void kernel_filter_estimate_params(KernelGlobals *kg, int sample, flo
 			__m128 variance = filter_get_pixel_variance_sse(pixel_buffer, active_pixels, pass_stride);
 			active_pixels = _mm_and_ps(active_pixels, filter_firefly_rejection_sse(color, variance, center_color_sse, sqrt_center_variance_sse));
 
-			filter_get_features_sse(x4, y4, active_pixels, pixel_buffer, features, feature_means, pass_stride);
+			filter_get_features_sse(x4, y4, t4, active_pixels, pixel_buffer, features, feature_means, pass_stride);
 			__m128 weight = filter_fill_design_row_sse(features, active_pixels, rank, design_row, feature_transform_sse, g_bandwidth_factor);
 			active_pixels = _mm_and_ps(active_pixels, _mm_cmpneq_ps(weight, _mm_setzero_ps()));
 
@@ -275,7 +278,10 @@ ccl_device void kernel_filter_estimate_params(KernelGlobals *kg, int sample, flo
 ccl_device void kernel_filter_estimate_params(KernelGlobals *kg, int sample, float *buffer, int x, int y, FilterStorage *storage, int4 rect)
 {
 	int buffer_w = align_up(rect.z - rect.x, 4);
-	int pass_stride = (rect.w - rect.y) * buffer_w;
+	int buffer_h = (rect.w - rect.y);
+	int pass_stride = buffer_h * buffer_w * kernel_data.film.num_frames;
+	int num_frames = kernel_data.film.num_frames;
+	int prev_frames = kernel_data.film.prev_frames;
 
 	/* Temporary storage, used in different steps of the algorithm. */
 	float tempmatrix[(2*DENOISE_FEATURES+1)*(2*DENOISE_FEATURES+1)], tempvector[4*DENOISE_FEATURES+1];
@@ -302,7 +308,7 @@ ccl_device void kernel_filter_estimate_params(KernelGlobals *kg, int sample, flo
 	/* === Shift feature passes to have mean 0. === */
 	float feature_means[DENOISE_FEATURES] = {0.0f};
 	FOR_PIXEL_WINDOW {
-		filter_get_features(px, py, pixel_buffer, features, NULL, pass_stride);
+		filter_get_features(px, py, pt, pixel_buffer, features, NULL, pass_stride);
 		for(int i = 0; i < DENOISE_FEATURES; i++)
 			feature_means[i] += features[i];
 	} END_FOR_PIXEL_WINDOW
@@ -316,7 +322,7 @@ ccl_device void kernel_filter_estimate_params(KernelGlobals *kg, int sample, flo
 	math_vector_zero(feature_scale, DENOISE_FEATURES);
 
 	FOR_PIXEL_WINDOW {
-		filter_get_features(px, py, pixel_buffer, features, feature_means, pass_stride);
+		filter_get_features(px, py, pt, pixel_buffer, features, feature_means, pass_stride);
 		for(int i = 0; i < DENOISE_FEATURES; i++)
 			feature_scale[i] = max(feature_scale[i], fabsf(features[i]));
 	} END_FOR_PIXEL_WINDOW
@@ -337,7 +343,7 @@ ccl_device void kernel_filter_estimate_params(KernelGlobals *kg, int sample, flo
 	math_matrix_zero_lower(perturbation_matrix, NORM_FEATURE_NUM);
 #endif
 	FOR_PIXEL_WINDOW {
-		filter_get_features(px, py, pixel_buffer, features, feature_means, pass_stride);
+		filter_get_features(px, py, pt, pixel_buffer, features, feature_means, pass_stride);
 		for(int i = 0; i < DENOISE_FEATURES; i++)
 			features[i] *= feature_scale[i];
 		math_add_gramian(feature_matrix, DENOISE_FEATURES, features, 1.0f);
@@ -385,7 +391,7 @@ ccl_device void kernel_filter_estimate_params(KernelGlobals *kg, int sample, flo
 #endif
 
 	/* From here on, the mean of the features will be shifted to the central pixel's values. */
-	filter_get_features(x, y, center_buffer, feature_means, NULL, pass_stride);
+	filter_get_features(x, y, 0, center_buffer, feature_means, NULL, pass_stride);
 
 
 
@@ -402,7 +408,7 @@ ccl_device void kernel_filter_estimate_params(KernelGlobals *kg, int sample, flo
 	math_matrix_zero_lower(XtX, matrix_size);
 	math_vec3_zero(XtY, matrix_size);
 	FOR_PIXEL_WINDOW {
-		filter_get_features(px, py, pixel_buffer, features, feature_means, pass_stride);
+		filter_get_features(px, py, pt, pixel_buffer, features, feature_means, pass_stride);
 		float weight = filter_fill_design_row(features, rank, design_row, feature_transform, NULL);
 	
 		if(weight == 0.0f) continue;
@@ -450,7 +456,7 @@ ccl_device void kernel_filter_estimate_params(KernelGlobals *kg, int sample, flo
 			float variance = filter_get_pixel_variance(pixel_buffer, pass_stride);
 			if(filter_firefly_rejection(color, variance, center_color, sqrt_center_variance)) continue;
 
-			filter_get_features(px, py, pixel_buffer, features, feature_means, pass_stride);
+			filter_get_features(px, py, pt, pixel_buffer, features, feature_means, pass_stride);
 			float weight = filter_fill_design_row(features, rank, design_row, feature_transform, g_bandwidth_factor);
 
 			if(weight == 0.0f) continue;
@@ -478,7 +484,7 @@ ccl_device void kernel_filter_estimate_params(KernelGlobals *kg, int sample, flo
 			float variance = filter_get_pixel_variance(pixel_buffer, pass_stride);
 			if(filter_firefly_rejection(color, variance, center_color, sqrt_center_variance)) continue;
 
-			filter_get_features(px, py, pixel_buffer, features, feature_means, pass_stride);
+			filter_get_features(px, py, pt, pixel_buffer, features, feature_means, pass_stride);
 			float weight = filter_fill_design_row(features, rank, design_row, feature_transform, g_bandwidth_factor);
 
 			if(weight == 0.0f) continue;
@@ -535,7 +541,11 @@ ccl_device void kernel_filter_estimate_params(KernelGlobals *kg, int sample, flo
 ccl_device void kernel_filter_final_pass(KernelGlobals *kg, int sample, float *buffer, int x, int y, int offset, int stride, float *buffers, FilterStorage *storage, int4 filter_area, int4 rect)
 {
 	int buffer_w = align_up(rect.z - rect.x, 4);
-	int pass_stride = (rect.w - rect.y) * buffer_w;
+	int buffer_h = (rect.w - rect.y);
+	int pass_stride = buffer_h * buffer_w * kernel_data.film.num_frames;
+	int num_frames = kernel_data.film.num_frames;
+	int prev_frames = kernel_data.film.prev_frames;
+
 	float features[DENOISE_FEATURES];
 	float *pixel_buffer;
 
@@ -545,7 +555,7 @@ ccl_device void kernel_filter_final_pass(KernelGlobals *kg, int sample, float *b
 	float sqrt_center_variance = s

@@ Diff output truncated at 10240 characters. @@




More information about the Bf-blender-cvs mailing list