[Bf-blender-cvs] [a7220bf] soc-2016-cycles_denoising: Cycles: Move normal equation solver into a separate function and rename a few variables for clarity
Lukas Stockner
noreply at git.blender.org
Tue Dec 20 16:06:55 CET 2016
Commit: a7220bffbfa5b3d5726e39397ff382984c5be499
Author: Lukas Stockner
Date: Tue Dec 20 05:22:50 2016 +0100
Branches: soc-2016-cycles_denoising
https://developer.blender.org/rBa7220bffbfa5b3d5726e39397ff382984c5be499
Cycles: Move normal equation solver into a separate function and rename a few variables for clarity
===================================================================
M intern/cycles/kernel/filter/filter_final_pass_impl.h
M intern/cycles/kernel/filter/filter_wlr.h
M intern/cycles/kernel/filter/filter_wlr_cuda.h
M intern/cycles/kernel/filter/filter_wlr_sse.h
M intern/cycles/util/util_math_matrix.h
===================================================================
diff --git a/intern/cycles/kernel/filter/filter_final_pass_impl.h b/intern/cycles/kernel/filter/filter_final_pass_impl.h
index 19c4633..20b500b 100644
--- a/intern/cycles/kernel/filter/filter_final_pass_impl.h
+++ b/intern/cycles/kernel/filter/filter_final_pass_impl.h
@@ -104,7 +104,7 @@ ccl_device void FUNCTION_NAME(KernelGlobals *kg, int sample, float ccl_readonly_
math_matrix_zero(XtWX, matrix_size);
math_vec3_zero(solution, matrix_size);
- /* Construct Xt*W*X matrix (and fill weight cache, if used). */
+ /* Construct Xt*W*X matrix and Xt*W*y vector (and fill weight cache, if used). */
FOR_PIXEL_WINDOW {
float3 color = filter_get_pixel_color(pixel_buffer + color_passes.x, pass_stride);
float variance = filter_get_pixel_variance(pixel_buffer + color_passes.x, pass_stride);
@@ -141,17 +141,7 @@ ccl_device void FUNCTION_NAME(KernelGlobals *kg, int sample, float ccl_readonly_
math_add_vec3(solution, matrix_size, design_row, weight * color);
} END_FOR_PIXEL_WINDOW
- /* Solve S = inv(Xt*W*X)*Xt*W*y.
- * Instead of explicitly inverting Xt*W*X, we rearrange to:
- * (Xt*W*X)*S = Wt*W*y
- * Xt*W*X is per definition symmetric positive-semidefinite, so we can apply Cholesky decomposition to find a lower triangular L so that L*Lt = Xt*W*X.
- * With, that we get (L*Lt)*S = L*(Lt*S) = L*b = Wt*W*y.
- * Since L is lower triangular, finding b (=Lt*S) is relatively easy.
- * Then, the remaining problem is Lt*S = b, which also can be solved easily. */
- math_matrix_add_diagonal(XtWX, matrix_size, 1e-4f); /* Improve the numerical stability. */
- math_cholesky(XtWX, matrix_size); /* Find L so that L*Lt = Xt*W*X. */
- math_substitute_forward_vec3(XtWX, matrix_size, solution); /* Solve L*b = X^T*y, replacing X^T*y by b. */
- math_substitute_back_vec3(XtWX, matrix_size, solution); /* Solve L^T*S = b, replacing b by S. */
+ math_solve_normal_equation(XtWX, solution, matrix_size);
if(kernel_data.integrator.use_gradients) {
FOR_PIXEL_WINDOW {
diff --git a/intern/cycles/kernel/filter/filter_wlr.h b/intern/cycles/kernel/filter/filter_wlr.h
index 2239a11..c2a7f25 100644
--- a/intern/cycles/kernel/filter/filter_wlr.h
+++ b/intern/cycles/kernel/filter/filter_wlr.h
@@ -176,11 +176,11 @@ ccl_device void kernel_filter_estimate_wlr_params(KernelGlobals *kg, int sample,
* both the r-feature vector z as well as z^T*z and using the resulting parameter for
* that dimension of the z^T*z vector times two as the derivative. */
int matrix_size = 2*rank + 1; /* Constant term (1 dim) + z (rank dims) + z^T*z (rank dims) */
- float *XtX = tempmatrix, *design_row = tempvector;
- float3 XtY[2*DENOISE_FEATURES+1];
+ float *XtWX = tempmatrix, *design_row = tempvector;
+ float3 XtWy[2*DENOISE_FEATURES+1];
- math_matrix_zero_lower(XtX, matrix_size);
- math_vec3_zero(XtY, matrix_size);
+ math_matrix_zero_lower(XtWX, matrix_size);
+ math_vec3_zero(XtWy, matrix_size);
FOR_PIXEL_WINDOW {
float weight = filter_get_design_row_transform_weight(pixel, pixel_buffer, feature_means, pass_stride, features, rank, design_row, feature_transform, 1, NULL);
@@ -188,23 +188,16 @@ ccl_device void kernel_filter_estimate_wlr_params(KernelGlobals *kg, int sample,
weight /= max(1.0f, filter_get_pixel_variance(pixel_buffer, pass_stride));
filter_extend_design_row_quadratic(rank, design_row);
- math_add_gramian(XtX, matrix_size, design_row, weight);
- math_add_vec3(XtY, matrix_size, design_row, weight * filter_get_pixel_color(pixel_buffer, pass_stride));
+ math_add_gramian(XtWX, matrix_size, design_row, weight);
+ math_add_vec3(XtWy, matrix_size, design_row, weight * filter_get_pixel_color(pixel_buffer, pass_stride));
} END_FOR_PIXEL_WINDOW
- /* Solve the normal equation of the linear least squares system: Decompose A = X^T*X into L
- * so that L is a lower triangular matrix and L*L^T = A. Then, solve
- * A*x = (L*L^T)*x = L*(L^T*x) = X^T*y by first solving L*b = X^T*y and then L^T*x = b through
- * forward- and backsubstitution. */
- math_matrix_add_diagonal(XtX, matrix_size, 1e-3f); /* Improve the numerical stability. */
- math_cholesky(XtX, matrix_size); /* Decompose A=X^T*x to L. */
- math_substitute_forward_vec3(XtX, matrix_size, XtY); /* Solve L*b = X^T*y. */
- math_substitute_back_vec3(XtX, matrix_size, XtY); /* Solve L^T*x = b. */
+ math_solve_normal_equation(XtWX, XtWy, matrix_size);
/* Calculate the inverse of the r-feature bandwidths. */
float *bandwidth_factor = &storage->bandwidth[0];
for(int i = 0; i < rank; i++)
- bandwidth_factor[i] = sqrtf(2.0f * average(fabs(XtY[1+rank+i])) + 0.16f);
+ bandwidth_factor[i] = sqrtf(2.0f * average(fabs(XtWy[1+rank+i])) + 0.16f);
for(int i = rank; i < DENOISE_FEATURES; i++)
bandwidth_factor[i] = 0.0f;
@@ -221,7 +214,7 @@ ccl_device void kernel_filter_estimate_wlr_params(KernelGlobals *kg, int sample,
g_bandwidth_factor[i] = bandwidth_factor[i]/candidate_bw[g];
matrix_size = rank+1;
- math_matrix_zero_lower(XtX, matrix_size);
+ math_matrix_zero_lower(XtWX, matrix_size);
FOR_PIXEL_WINDOW {
float3 color = filter_get_pixel_color(pixel_buffer, pass_stride);
@@ -233,18 +226,18 @@ ccl_device void kernel_filter_estimate_wlr_params(KernelGlobals *kg, int sample,
if(weight == 0.0f) continue;
weight /= max(1.0f, variance);
- math_add_gramian(XtX, matrix_size, design_row, weight);
+ math_add_gramian(XtWX, matrix_size, design_row, weight);
} END_FOR_PIXEL_WINDOW
- math_matrix_add_diagonal(XtX, matrix_size, 1e-4f); /* Improve the numerical stability. */
- math_cholesky(XtX, matrix_size);
- math_inverse_lower_tri_inplace(XtX, matrix_size);
+ math_matrix_add_diagonal(XtWX, matrix_size, 1e-4f); /* Improve the numerical stability. */
+ math_cholesky(XtWX, matrix_size);
+ math_inverse_lower_tri_inplace(XtWX, matrix_size);
float r_feature_weight[DENOISE_FEATURES+1];
math_vector_zero(r_feature_weight, matrix_size);
for(int col = 0; col < matrix_size; col++)
for(int row = col; row < matrix_size; row++)
- r_feature_weight[col] += XtX[row]*XtX[col*matrix_size+row];
+ r_feature_weight[col] += XtWX[row]*XtWX[col*matrix_size+row];
float3 est_color = make_float3(0.0f, 0.0f, 0.0f), est_pos_color = make_float3(0.0f, 0.0f, 0.0f);
float est_variance = 0.0f, est_pos_variance = 0.0f;
diff --git a/intern/cycles/kernel/filter/filter_wlr_cuda.h b/intern/cycles/kernel/filter/filter_wlr_cuda.h
index 81454f5..0322d89 100644
--- a/intern/cycles/kernel/filter/filter_wlr_cuda.h
+++ b/intern/cycles/kernel/filter/filter_wlr_cuda.h
@@ -136,11 +136,11 @@ ccl_device void kernel_filter_estimate_bandwidths(KernelGlobals *kg, int sample,
* both the r-feature vector z as well as z^T*z and using the resulting parameter for
* that dimension of the z^T*z vector times two as the derivative. */
int matrix_size = 2*rank + 1; /* Constant term (1 dim) + z (rank dims) + z^T*z (rank dims) */
- float XtX[(2*DENOISE_FEATURES+1)*(2*DENOISE_FEATURES+1)], design_row[2*DENOISE_FEATURES+1];
- float3 XtY[2*DENOISE_FEATURES+1];
+ float XtWX[(2*DENOISE_FEATURES+1)*(2*DENOISE_FEATURES+1)], design_row[2*DENOISE_FEATURES+1];
+ float3 XtWy[2*DENOISE_FEATURES+1];
- math_matrix_zero_lower(XtX, matrix_size);
- math_vec3_zero(XtY, matrix_size);
+ math_matrix_zero_lower(XtWX, matrix_size);
+ math_vec3_zero(XtWy, matrix_size);
FOR_PIXEL_WINDOW {
float weight = filter_get_design_row_transform_weight(pixel, pixel_buffer, feature_means, pass_stride, features, rank, design_row, transform, transform_stride, NULL);
@@ -148,22 +148,15 @@ ccl_device void kernel_filter_estimate_bandwidths(KernelGlobals *kg, int sample,
weight /= max(1.0f, filter_get_pixel_variance(pixel_buffer, pass_stride));
filter_extend_design_row_quadratic(rank, design_row);
- math_add_gramian(XtX, matrix_size, design_row, weight);
- math_add_vec3(XtY, matrix_size, design_row, weight * filter_get_pixel_color(pixel_buffer, pass_stride));
+ math_add_gramian(XtWX, matrix_size, design_row, weight);
+ math_add_vec3(XtWy, matrix_size, design_row, weight * filter_get_pixel_color(pixel_buffer, pass_stride));
} END_FOR_PIXEL_WINDOW
- /* Solve the normal equation of the linear least squares system: Decompose A = X^T*X into L
- * so that L is a lower triangular matrix and L*L^T = A. Then, solve
- * A*x = (L*L^T)*x = L*(L^T*x) = X^T*y by first solving L*b = X^T*y and then L^T*x = b through
- * forward- and backsubstitution. */
- math_matrix_add_diagonal(XtX, matrix_size, 1e-3f); /* Improve the numerical stability. */
- math_cholesky(XtX, matrix_size); /* Decompose A=X^T*x to L. */
- math_substitute_forward_vec3(XtX, matrix_size, XtY); /* Solve L*b = X^T*y. */
- math_substitute_back_vec3(XtX, matrix_size, XtY); /* Solve L^T*x = b. */
+ math_solve_normal_equation(XtWX, XtWy, matrix_size);
/* Calculate the inverse of the r-feature bandwidths. */
for(int i = 0; i < rank; i++)
- storage->bandwidth[i] = sqrtf(2.0f * average(fabs(XtY[1+rank+i])) + 0.16f);
+ storage->bandwidth[i] = sqrtf(2.0f * average(fabs(XtWy[1+rank+i])) + 0.16f);
for(int i = rank; i < DENOISE_FEATURES; i++)
storage->bandwidth[i] = 0.0f;
}
@@ -202,8 +195,8 @@ ccl_device void kernel_filter_estimate_bias_variance(KernelGlobals *kg, int samp
g_bandwidth_factor[i] = storage->bandwidth[i]/candidate_bw[candidate];
int matrix_size = rank+1;
- float XtX[(DENOISE_FEATURES+1)*(DENOISE_FEATURES+1)], design_row[DENOISE_FEATURES+1];
- math_matrix_zero_lower(XtX, matrix_size);
+ float XtWX[(DENOISE_FEATURES+1)*(DENOISE_FEATURES+1)], design_row[DENOISE_FEATURES+1];
+ math_matrix_zero_lower(XtWX, matrix_size);
FOR_PIXEL_WINDOW {
float3 color = filter_get_pixel_color(pixel_buffer, pass_stride);
@@ -215,18 +208,18 @@ ccl_device void kernel_filter_estimate_bias_variance(KernelGlobals *kg, int samp
if(weight == 0.0f) continue;
weight /= max(1.0f, variance);
- math_add_gramian(XtX, matrix_size, design_row, weight);
+ math_add_gramian(XtWX, matrix_size, design_row, weight);
} END_FOR_PIXEL_WINDOW
- math_matrix_add_diagonal(XtX, matrix_size, 1e-4f); /* Improve the numerical stability. */
- math_cholesky(XtX, matrix_size);
- math_inverse_lower_tri_inplace(XtX, matrix_size);
+ math_matrix_add_diagonal(XtWX, matrix_size, 1e-4f);
@@ Diff output truncated at 10240 characters. @@
More information about the Bf-blender-cvs
mailing list