[Bf-blender-cvs] [abac5736e4] soc-2016-cycles_denoising: Cycles: Rename and add new matrix math helper functions to clean up the actual filter code
Lukas Stockner
noreply at git.blender.org
Thu Jan 12 05:14:06 CET 2017
Commit: abac5736e42f476c05bbbab3b8ca8d4bccee8f58
Author: Lukas Stockner
Date: Wed Dec 21 05:09:28 2016 +0100
Branches: soc-2016-cycles_denoising
https://developer.blender.org/rBabac5736e42f476c05bbbab3b8ca8d4bccee8f58
Cycles: Rename and add new matrix math helper functions to clean up the actual filter code
===================================================================
M intern/cycles/kernel/filter/filter_features.h
M intern/cycles/kernel/filter/filter_features_sse.h
M intern/cycles/kernel/filter/filter_final_pass_impl.h
M intern/cycles/kernel/filter/filter_wlr.h
M intern/cycles/kernel/filter/filter_wlr_cuda.h
M intern/cycles/kernel/filter/filter_wlr_sse.h
M intern/cycles/util/util_math.h
M intern/cycles/util/util_math_matrix.h
===================================================================
diff --git a/intern/cycles/kernel/filter/filter_features.h b/intern/cycles/kernel/filter/filter_features.h
index f43dbada0f..604b7fac03 100644
--- a/intern/cycles/kernel/filter/filter_features.h
+++ b/intern/cycles/kernel/filter/filter_features.h
@@ -174,9 +174,9 @@ ccl_device_inline float filter_get_design_row_transform_weight(int3 pixel, float
float weight = 1.0f;
for(int d = 0; d < rank; d++) {
#ifdef __KERNEL_CUDA__
- float x = math_dot_cuda(features, feature_transform + d*DENOISE_FEATURES*transform_stride, transform_stride, DENOISE_FEATURES);
+ float x = math_vector_dot_strided(features, feature_transform + d*DENOISE_FEATURES*transform_stride, transform_stride, DENOISE_FEATURES);
#else
- float x = math_dot(features, feature_transform + d*DENOISE_FEATURES, DENOISE_FEATURES);
+ float x = math_vector_dot(features, feature_transform + d*DENOISE_FEATURES, DENOISE_FEATURES);
#endif
float x2 = x;
if(bandwidth_factor) x2 *= bandwidth_factor[d];
@@ -209,9 +209,9 @@ ccl_device_inline void filter_get_design_row_transform(int3 pixel, float ccl_rea
design_row[0] = 1.0f;
for(int d = 0; d < rank; d++) {
#ifdef __KERNEL_CUDA__
- float x = math_dot_cuda(features, feature_transform + d*DENOISE_FEATURES*transform_stride, transform_stride, DENOISE_FEATURES);
+ float x = math_vector_dot_strided(features, feature_transform + d*DENOISE_FEATURES*transform_stride, transform_stride, DENOISE_FEATURES);
#else
- float x = math_dot(features, feature_transform + d*DENOISE_FEATURES, DENOISE_FEATURES);
+ float x = math_vector_dot(features, feature_transform + d*DENOISE_FEATURES, DENOISE_FEATURES);
#endif
design_row[1+d] = x;
}
diff --git a/intern/cycles/kernel/filter/filter_features_sse.h b/intern/cycles/kernel/filter/filter_features_sse.h
index 5d57093ec7..51d8347d90 100644
--- a/intern/cycles/kernel/filter/filter_features_sse.h
+++ b/intern/cycles/kernel/filter/filter_features_sse.h
@@ -188,7 +188,7 @@ ccl_device_inline __m128 filter_fill_design_row_sse(__m128 *features, __m128 act
__m128 weight = _mm_mask_ps(_mm_set1_ps(1.0f), active_pixels);
design_row[0] = weight;
for(int d = 0; d < rank; d++) {
- __m128 x = math_dot_sse(features, feature_transform + d*DENOISE_FEATURES, DENOISE_FEATURES);
+ __m128 x = math_vector_dot_sse(features, feature_transform + d*DENOISE_FEATURES, DENOISE_FEATURES);
__m128 x2 = _mm_mul_ps(x, bandwidth_factor[d]);
x2 = _mm_mul_ps(x2, x2);
weight = _mm_mask_ps(_mm_mul_ps(weight, _mm_mul_ps(_mm_set1_ps(0.75f), _mm_sub_ps(_mm_set1_ps(1.0f), x2))), _mm_and_ps(_mm_cmplt_ps(x2, _mm_set1_ps(1.0f)), active_pixels));
@@ -202,7 +202,7 @@ ccl_device_inline __m128 filter_fill_design_row_quadratic_sse(__m128 *features,
__m128 weight = _mm_mask_ps(_mm_set1_ps(1.0f), active_pixels);
design_row[0] = weight;
for(int d = 0; d < rank; d++) {
- __m128 x = math_dot_sse(features, feature_transform + d*DENOISE_FEATURES, DENOISE_FEATURES);
+ __m128 x = math_vector_dot_sse(features, feature_transform + d*DENOISE_FEATURES, DENOISE_FEATURES);
__m128 x2 = _mm_mul_ps(x, x);
weight = _mm_mask_ps(_mm_mul_ps(weight, _mm_mul_ps(_mm_set1_ps(0.75f), _mm_sub_ps(_mm_set1_ps(1.0f), x2))), _mm_and_ps(_mm_cmplt_ps(x2, _mm_set1_ps(1.0f)), active_pixels));
design_row[1+d] = x;
diff --git a/intern/cycles/kernel/filter/filter_final_pass_impl.h b/intern/cycles/kernel/filter/filter_final_pass_impl.h
index 7ab0a856f9..708cfe65b5 100644
--- a/intern/cycles/kernel/filter/filter_final_pass_impl.h
+++ b/intern/cycles/kernel/filter/filter_final_pass_impl.h
@@ -102,7 +102,7 @@ ccl_device void FUNCTION_NAME(KernelGlobals *kg, int sample, float ccl_readonly_
float XtWX[(DENOISE_FEATURES+1)*(DENOISE_FEATURES+1)], design_row[DENOISE_FEATURES+1];
float3 solution[(DENOISE_FEATURES+1)];
- math_matrix_zero_lower(XtWX, matrix_size);
+ math_trimatrix_zero(XtWX, matrix_size);
math_vec3_zero(solution, matrix_size);
/* Construct Xt*W*X matrix and Xt*W*y vector (and fill weight cache, if used). */
FOR_PIXEL_WINDOW {
@@ -137,11 +137,11 @@ ccl_device void FUNCTION_NAME(KernelGlobals *kg, int sample, float ccl_readonly_
weight /= max(1.0f, variance);
weight_cache[cache_idx] = weight;
- math_add_gramian(XtWX, matrix_size, design_row, weight);
- math_add_vec3(solution, matrix_size, design_row, weight * color);
+ math_trimatrix_add_gramian(XtWX, matrix_size, design_row, weight);
+ math_vec3_add(solution, matrix_size, design_row, weight * color);
} END_FOR_PIXEL_WINDOW
- math_solve_normal_equation(XtWX, solution, matrix_size);
+ math_trimatrix_vec3_solve(XtWX, solution, matrix_size);
if(kernel_data.integrator.use_gradients) {
FOR_PIXEL_WINDOW {
@@ -184,7 +184,7 @@ ccl_device void FUNCTION_NAME(KernelGlobals *kg, int sample, float ccl_readonly_
}
#endif
- float3 reconstruction = math_dot_vec3(design_row, solution, matrix_size);
+ float3 reconstruction = math_vector_vec3_dot(design_row, solution, matrix_size);
#ifdef OUTPUT_RENDERBUFFER
if(pixel.y >= filter_area.y && pixel.y < filter_area.y+filter_area.w && pixel.x >= filter_area.x && pixel.x < filter_area.x+filter_area.z) {
float *combined_buffer = buffers + (offset + pixel.y*stride + pixel.x)*kernel_data.film.pass_stride;
diff --git a/intern/cycles/kernel/filter/filter_wlr.h b/intern/cycles/kernel/filter/filter_wlr.h
index 7e7f2b6465..ff520d336d 100644
--- a/intern/cycles/kernel/filter/filter_wlr.h
+++ b/intern/cycles/kernel/filter/filter_wlr.h
@@ -45,16 +45,15 @@ ccl_device void kernel_filter_construct_transform(KernelGlobals *kg, int sample,
/* === Shift feature passes to have mean 0. === */
- float feature_means[DENOISE_FEATURES] = {0.0f};
+ float feature_means[DENOISE_FEATURES];
+ math_vector_zero(feature_means, DENOISE_FEATURES);
FOR_PIXEL_WINDOW {
filter_get_features(pixel, pixel_buffer, features, NULL, pass_stride);
- for(int i = 0; i < DENOISE_FEATURES; i++)
- feature_means[i] += features[i];
+ math_vector_add(feature_means, features, DENOISE_FEATURES);
} END_FOR_PIXEL_WINDOW
float pixel_scale = 1.0f / ((high.y - low.y) * (high.x - low.x));
- for(int i = 0; i < DENOISE_FEATURES; i++)
- feature_means[i] *= pixel_scale;
+ math_vector_scale(feature_means, pixel_scale, DENOISE_FEATURES);
/* === Scale the shifted feature passes to a range of [-1; 1], will be baked into the transform later. === */
float *feature_scale = tempvector;
@@ -62,8 +61,7 @@ ccl_device void kernel_filter_construct_transform(KernelGlobals *kg, int sample,
FOR_PIXEL_WINDOW {
filter_get_feature_scales(pixel, pixel_buffer, features, feature_means, pass_stride);
- for(int i = 0; i < DENOISE_FEATURES; i++)
- feature_scale[i] = max(feature_scale[i], features[i]);
+ math_vector_max(feature_scale, features, DENOISE_FEATURES);
} END_FOR_PIXEL_WINDOW
filter_calculate_scale(feature_scale);
@@ -73,20 +71,19 @@ ccl_device void kernel_filter_construct_transform(KernelGlobals *kg, int sample,
* This transformation maps the DENOISE_FEATURES-dimentional feature space to a reduced feature (r-feature) space
* which generally has fewer dimensions. This mainly helps to prevent overfitting. */
float* feature_matrix = tempmatrix, feature_matrix_norm = 0.0f;
- math_matrix_zero_lower(feature_matrix, DENOISE_FEATURES);
+ math_trimatrix_zero(feature_matrix, DENOISE_FEATURES);
#ifdef FULL_EIGENVALUE_NORM
float perturbation_matrix[DENOISE_FEATURES*DENOISE_FEATURES];
- math_matrix_zero_lower(perturbation_matrix, NORM_FEATURE_NUM);
+ math_trimatrix_zero(perturbation_matrix, NORM_FEATURE_NUM);
#endif
FOR_PIXEL_WINDOW {
filter_get_features(pixel, pixel_buffer, features, feature_means, pass_stride);
- for(int i = 0; i < DENOISE_FEATURES; i++)
- features[i] *= feature_scale[i];
- math_add_gramian(feature_matrix, DENOISE_FEATURES, features, 1.0f);
+ math_vector_mul(features, feature_scale, DENOISE_FEATURES);
+ math_trimatrix_add_gramian(feature_matrix, DENOISE_FEATURES, features, 1.0f);
filter_get_feature_variance(pixel_buffer, features, feature_scale, pass_stride);
#ifdef FULL_EIGENVALUE_NORM
- math_add_gramian(perturbation_matrix, NORM_FEATURE_NUM, features, kernel_data.integrator.filter_strength);
+ math_trimatrix_add_gramian(perturbation_matrix, NORM_FEATURE_NUM, features, kernel_data.integrator.filter_strength);
#else
for(int i = 0; i < NORM_FEATURE_NUM; i++)
feature_matrix_norm += features[i + NORM_FEATURE_OFFSET]*kernel_data.integrator.filter_strength;
@@ -94,7 +91,7 @@ ccl_device void kernel_filter_construct_transform(KernelGlobals *kg, int sample,
} END_FOR_PIXEL_WINDOW
float *feature_transform = &storage->transform[0];
- int rank = math_jacobi_eigendecomposition(feature_matrix, feature_transform, DENOISE_FEATURES, 1);
+ int rank = math_trimatrix_jacobi_eigendecomposition(feature_matrix, feature_transform, DENOISE_FEATURES, 1);
#ifdef FULL_EIGENVALUE_NORM
float tempvector_2[2*DENOISE_FEATURES];
@@ -112,8 +109,7 @@ ccl_device void kernel_filter_construct_transform(KernelGlobals *kg, int sample,
if(i >= 2 && s < singular_threshold)
break;
/* Bake the feature scaling into the transformation matrix. */
- for(int j = 0; j < DENOISE_FEATURES; j++)
- feature_transform[rank*DENOISE_FEATURES + j] *= feature_scale[j];
+ math_vector_mul(feature_transform + rank*DENOISE_FEATURES, feature_scale, DENOISE_FEATURES);
}
#ifdef WITH_CYCLES_DEBUG_FILTER
@@ -162,7 +158,8 @@ ccl_device void kernel_filter_estimate_wlr_params(KernelGlobals *kg, int sample,
/* From here on, the mean of the features will be shifted to the central pixel's values. */
- float feature_means[DENOISE_FEATURES] = {0.0f};
+ float feature_means[DENOISE_FEATURES];
+ math_vector_zero(feature_means, DENOISE_FEATURES);
filter_get_features(make_int3(x, y, 0), center_buffer, feature_means, NULL, pass_stride);
int rank = storage->rank;
float *feature_transform = &storage->transform[0];
@@ -179,7 +176,7 @@ ccl_device void kernel_filter_estimate_wlr_params(KernelGlobals *kg, int sample,
float *XtWX = tempmatrix, *design_row = tempvector;
float3 XtWy[2*DENOISE_FEATURES+1];
- math_matrix_zero_lower(XtWX, matrix_size);
+ math_trimatrix_zero(XtWX, matrix_size);
math_vec3_zero(XtWy, matrix_size);
FOR_PIXEL_WINDOW {
float weig
@@ Diff output truncated at 10240 characters. @@
More information about the Bf-blender-cvs
mailing list