[Bf-blender-cvs] [5c9eaeb676] soc-2016-cycles_denoising: Cycles Denoising: Use different heuristic for feature space dimensionality reduction

Wed Feb 1 05:19:02 CET 2017

Commit: 5c9eaeb6762b1760537962d29215583df9ae5f42
Author: Lukas Stockner
Date:   Sun Jan 15 18:04:27 2017 +0100
Branches: soc-2016-cycles_denoising
https://developer.blender.org/rB5c9eaeb6762b1760537962d29215583df9ae5f42

Cycles Denoising: Use different heuristic for feature space dimensionality reduction

Since the features that are used for denoising may be highly correlated (for example, with a greyscale texture the three albedo channels will be identical), using them directly for fitting would be rather unstable.
Therefore, before performing the actual fit a transformation into a reduced feature space is peformed using Principal Component Analysis by calculating the eigendecomposition of X^t*X, where X is the feature matrix.
After doing that, the eigenvectors are the basis vectors of the new feature space, and the eigenvalues specify their "importance". Therefore, by discarding eigenvectors whose eigenvalues are low, its possible to get rid of unneccessary dimensions.

Now, the question is which dimensions should be removed. The original WLR algorithm calculates a threshold based on the variance of the feature passes, with the goal of discarding noisy features. However, this implementation already prefilters the feature passes, so the (original) variance passes overestimate the actual variance a lot and discarding them isn't actually needed anymore.
Therefore, this commit replaces it with two simpler heuristics - either removing all eigenvalues below a certain threshold, or removing until a certain fraction of the energy in the eigenvalues is gone.
Which heuristic is used is chosen based on the sign of the filter strength, positive values choose the energy heuristic and negative values the absolute heuristic. In both cases, the threshold value is 10^(2*abs(filter strength)). If the default of zero is used, it uses the energy heuristic with a fraction of 10^-3.

Note that in some cases, especially motion blur and depth of field, this might cause new artifacts. These can be solved and I'll commit that soon. On the positive side, this change makes the denoiser handle hair/fur much better.

===================================================================

M	intern/cycles/blender/blender_session.cpp
M	intern/cycles/kernel/filter/filter_wlr.h
M	intern/cycles/kernel/filter/filter_wlr_cuda.h
M	intern/cycles/kernel/filter/filter_wlr_sse.h
M	intern/cycles/kernel/kernel_types.h
M	intern/cycles/render/integrator.cpp

===================================================================

diff --git a/intern/cycles/blender/blender_session.cpp b/intern/cycles/blender/blender_session.cpp
index 3c3ad659f3..b98db64eff 100644
--- a/intern/cycles/blender/blender_session.cpp
+++ b/intern/cycles/blender/blender_session.cpp
@@ -448,7 +448,7 @@ void BlenderSession::render()
 		buffer_params.selective_denoising = scene->film->selective_denoising;
 		buffer_params.cross_denoising = scene->film->cross_denoising;
 		scene->integrator->half_window = b_layer_iter->half_window();
-		scene->integrator->filter_strength = powf(2.0f, b_layer_iter->filter_strength());
+		scene->integrator->filter_strength = (b_layer_iter->filter_strength() == 0.0f)? 1e-3f : copysignf(powf(10.0f, -fabsf(b_layer_iter->filter_strength())*2.0f), b_layer_iter->filter_strength());
 		scene->integrator->weighting_adjust = powf(2.0f, b_layer_iter->filter_weighting_adjust() - 1.0f);
 		scene->integrator->use_gradients = b_layer_iter->filter_gradients();
 
@@ -1363,7 +1363,7 @@ void BlenderSession::denoise(BL::RenderResult& b_rr)
 
 		session->params.half_window = half_window;
 		session->params.samples = get_int(cscene, "samples");
-		session->params.filter_strength = powf(2.0f, filter_strength);
+		session->params.filter_strength = (filter_strength == 0.0f)? 1e-3f : copysignf(powf(10.0f, -fabsf(filter_strength)*2.0f), filter_strength);
 		session->params.filter_weight_adjust = powf(2.0f, weight_adjust - 1.0f);
 		session->params.filter_gradient = filter_gradient;
 
diff --git a/intern/cycles/kernel/filter/filter_wlr.h b/intern/cycles/kernel/filter/filter_wlr.h
index 95e4272ea2..bff8547aa3 100644
--- a/intern/cycles/kernel/filter/filter_wlr.h
+++ b/intern/cycles/kernel/filter/filter_wlr.h
@@ -70,51 +70,45 @@ ccl_device void kernel_filter_construct_transform(KernelGlobals *kg, int sample,
 	/* === Generate the feature transformation. ===
 	 * This transformation maps the DENOISE_FEATURES-dimentional feature space to a reduced feature (r-feature) space
 	 * which generally has fewer dimensions. This mainly helps to prevent overfitting. */
-	float* feature_matrix = tempmatrix, feature_matrix_norm = 0.0f;
+	float* feature_matrix = tempmatrix;
 	math_trimatrix_zero(feature_matrix, DENOISE_FEATURES);
-#ifdef FULL_EIGENVALUE_NORM
-	float perturbation_matrix[DENOISE_FEATURES*DENOISE_FEATURES];
-	math_trimatrix_zero(perturbation_matrix, NORM_FEATURE_NUM);
-#endif
 	FOR_PIXEL_WINDOW {
 		filter_get_features(pixel, pixel_buffer, features, feature_means, pass_stride);
 		math_vector_mul(features, feature_scale, DENOISE_FEATURES);
 		math_trimatrix_add_gramian(feature_matrix, DENOISE_FEATURES, features, 1.0f);
-
-		filter_get_feature_variance(pixel_buffer, features, feature_scale, pass_stride);
-#ifdef FULL_EIGENVALUE_NORM
-		math_trimatrix_add_gramian(perturbation_matrix, NORM_FEATURE_NUM, features, kernel_data.integrator.filter_strength);
-#else
-		for(int i = 0; i < NORM_FEATURE_NUM; i++)
-			feature_matrix_norm += features[i + NORM_FEATURE_OFFSET]*kernel_data.integrator.filter_strength;
-#endif
 	} END_FOR_PIXEL_WINDOW
 
 	float *feature_transform = &storage->transform[0];
-	int rank = math_trimatrix_jacobi_eigendecomposition(feature_matrix, feature_transform, DENOISE_FEATURES, 1);
-
-#ifdef FULL_EIGENVALUE_NORM
-	float tempvector_2[2*DENOISE_FEATURES];
-	for(int i = 0; i < DENOISE_FEATURES; i++)
-		tempvector_2[i] = 1.0f;
-	float singular_threshold = 0.01f + 2.0f * sqrtf(math_largest_eigenvalue(perturbation_matrix, NORM_FEATURE_NUM, tempvector_2, tempvector_2 + DENOISE_FEATURES));
-#else
-	float singular_threshold = 0.01f + 2.0f * (sqrtf(feature_matrix_norm) / (sqrtf(rank) * 0.5f));
-	singular_threshold *= singular_threshold;
-#endif
-
-	rank = 0;
-	for(int i = 0; i < DENOISE_FEATURES; i++, rank++) {
-		float s = feature_matrix[i*DENOISE_FEATURES+i];
-		if(i >= 2 && s < singular_threshold)
-			break;
-		/* Bake the feature scaling into the transformation matrix. */
-		math_vector_mul(feature_transform + rank*DENOISE_FEATURES, feature_scale, DENOISE_FEATURES);
+	math_trimatrix_jacobi_eigendecomposition(feature_matrix, feature_transform, DENOISE_FEATURES, 1);
+	int rank = 0;
+	if(kernel_data.integrator.filter_strength > 0.0f) {
+		float threshold_energy = 0.0f;
+		for(int i = 0; i < DENOISE_FEATURES; i++) {
+			threshold_energy += feature_matrix[i*DENOISE_FEATURES+i];
+		}
+		threshold_energy *= 1.0f-kernel_data.integrator.filter_strength;
+
+		float reduced_energy = 0.0f;
+		for(int i = 0; i < DENOISE_FEATURES; i++, rank++) {
+			float s = feature_matrix[i*DENOISE_FEATURES+i];
+			if(i >= 2 && reduced_energy >= threshold_energy)
+				break;
+			reduced_energy += s;
+			/* Bake the feature scaling into the transformation matrix. */
+			math_vector_mul(feature_transform + rank*DENOISE_FEATURES, feature_scale, DENOISE_FEATURES);
+		}
+	}
+	else {
+		for(int i = 0; i < DENOISE_FEATURES; i++, rank++) {
+			float s = feature_matrix[i*DENOISE_FEATURES+i];
+			if(i >= 2 && sqrtf(s) < -kernel_data.integrator.filter_strength)
+				break;
+			/* Bake the feature scaling into the transformation matrix. */
+			math_vector_mul(feature_transform + rank*DENOISE_FEATURES, feature_scale, DENOISE_FEATURES);
+		}
 	}
 
 #ifdef WITH_CYCLES_DEBUG_FILTER
-	storage->feature_matrix_norm = feature_matrix_norm;
-	storage->singular_threshold = singular_threshold;
 	for(int i = 0; i < DENOISE_FEATURES; i++) {
 		storage->means[i] = feature_means[i];
 		storage->scales[i] = feature_scale[i];
diff --git a/intern/cycles/kernel/filter/filter_wlr_cuda.h b/intern/cycles/kernel/filter/filter_wlr_cuda.h
index 841d84d710..4a451f295b 100644
--- a/intern/cycles/kernel/filter/filter_wlr_cuda.h
+++ b/intern/cycles/kernel/filter/filter_wlr_cuda.h
@@ -64,36 +64,46 @@ ccl_device void kernel_filter_construct_transform(KernelGlobals *kg, int sample,
 	/* === Generate the feature transformation. ===
 	 * This transformation maps the DENOISE_FEATURES-dimentional feature space to a reduced feature (r-feature) space
 	 * which generally has fewer dimensions. This mainly helps to prevent overfitting. */
-	float feature_matrix[DENOISE_FEATURES*DENOISE_FEATURES], feature_matrix_norm = 0.0f;
+	float feature_matrix[DENOISE_FEATURES*DENOISE_FEATURES];
 	math_trimatrix_zero(feature_matrix, DENOISE_FEATURES);
 	FOR_PIXEL_WINDOW {
 		filter_get_features(pixel, pixel_buffer, features, feature_means, pass_stride);
 		math_vector_mul(features, feature_scale, DENOISE_FEATURES);
 		math_trimatrix_add_gramian(feature_matrix, DENOISE_FEATURES, features, 1.0f);
-
-		filter_get_feature_variance(pixel_buffer, features, feature_scale, pass_stride);
-		for(int i = 0; i < NORM_FEATURE_NUM; i++)
-			feature_matrix_norm += features[i + NORM_FEATURE_OFFSET]*kernel_data.integrator.filter_strength;
 	} END_FOR_PIXEL_WINDOW
 
-	int rank = math_trimatrix_jacobi_eigendecomposition(feature_matrix, transform, DENOISE_FEATURES, transform_stride);
-
-	float singular_threshold = 0.01f + 2.0f * (sqrtf(feature_matrix_norm) / (sqrtf(rank) * 0.5f));
-	singular_threshold *= singular_threshold;
-
-	rank = 0;
-	for(int i = 0; i < DENOISE_FEATURES; i++, rank++) {
-		float s = feature_matrix[i*DENOISE_FEATURES+i];
-		if(i >= 2 && s < singular_threshold)
-			break;
-		/* Bake the feature scaling into the transformation matrix. */
-		math_vector_mul_strided(transform + rank*DENOISE_FEATURES*transform_stride, feature_scale, transform_stride, DENOISE_FEATURES);
+	math_trimatrix_jacobi_eigendecomposition(feature_matrix, transform, DENOISE_FEATURES, transform_stride);
+	int rank = 0;
+	if(kernel_data.integrator.filter_strength > 0.0f) {
+		float threshold_energy = 0.0f;
+		for(int i = 0; i < DENOISE_FEATURES; i++) {
+			threshold_energy += feature_matrix[i*DENOISE_FEATURES+i];
+		}
+		threshold_energy *= 1.0f-kernel_data.integrator.filter_strength;
+
+		float reduced_energy = 0.0f;
+		for(int i = 0; i < DENOISE_FEATURES; i++, rank++) {
+			float s = feature_matrix[i*DENOISE_FEATURES+i];
+			if(i >= 2 && reduced_energy >= threshold_energy)
+				break;
+			reduced_energy += s;
+			/* Bake the feature scaling into the transformation matrix. */
+			math_vector_mul_strided(transform + rank*DENOISE_FEATURES*transform_stride, feature_scale, transform_stride, DENOISE_FEATURES);
+		}
 	}
+	else {
+		for(int i = 0; i < DENOISE_FEATURES; i++, rank++) {
+			float s = feature_matrix[i*DENOISE_FEATURES+i];
+			if(i >= 2 && sqrtf(s) < -kernel_data.integrator.filter_strength)
+				break;
+			/* Bake the feature scaling into the transformation matrix. */
+			math_vector_mul_strided(transform + rank*DENOISE_FEATURES*transform_stride, feature_scale, transform_stride, DENOISE_FEATURES);
+		}
+	}
+
 	storage->rank = rank;
 
 #ifdef WITH_CYCLES_DEBUG_FILTER
-	storage->feature_matrix_norm = feature_matrix_norm;
-	storage->singular_threshold = singular_threshold;
 	for(int i = 0; i < DENOISE_FEATURES; i++) {
 		storage->means[i] = feature_means[i];
 		storage->scales[i] = feature_scale[i];
diff --git a/intern/cycles/kernel/filter/filter_wlr_sse.h b/intern/cycles/kernel/filter/filter_wlr_sse.h
index 4051cf5d0c..268f898cc2 100644
--- a/intern/cycles/kernel/filter/filter_wlr_sse.h
+++ b/intern/cycles/kernel/filter/filter_wlr_sse.h
@@ -56,41 +56,52 @@ ccl_device void kernel_filter_construct_transform(KernelGlobals *kg, int sample,
 	filter_calculate_scale_sse(feature_scale);
 
 	__m128 feature_matrix_sse[DENOISE_FEATURES*DENOISE_FEATURES];
-	__m128 feature_matrix_norm = _mm_setzero_ps();
 	math_trimatrix_zero_sse(feature_matrix_sse, DENOISE_FEATURES);
 	FOR_PIXEL_WINDOW_SSE {
 		filter_get_features_sse(x4, y4, t4, active_pixels, pixel_buffer, features, feature_means, pass_stride);
 		math_vector_mul_sse(features, DENOISE_FEATURES, feature_scale);
 		math_trimatrix_add_gramian_sse(feature_matrix_sse, DENOISE_FEATURES, features, _mm_set1_ps(1.0f));
-
-		filter_get_feature_variance_sse(x4, y4, active_pixels, pixel_buffer, features, feature_scale, pass_stride);
-		math_vector_scale_sse(features, DENOISE_FEATURES, _mm_set1_ps(kernel_data.integrator.filter_strength));
-		for(int i = 0; i < NORM_FEATURE_NUM; i++)
-			feature_matrix_norm = _mm_add_ps(feature_matrix_norm, features[i + NORM_FEATURE_OFFSET]);
 	} END_FOR_PIXEL_WINDOW_SSE
 
 	float feature_matrix[DENOISE_FEATURES*DENOISE_FEATURES];
 	math_trimatrix_hsum(fe

@@ Diff output truncated at 10240 characters. @@