[Bf-blender-cvs] [aa605da] soc-2016-cycles_denoising: Cycles: Experimental collaborative denoising
Lukas Stockner
noreply at git.blender.org
Tue Nov 22 04:25:20 CET 2016
Commit: aa605da25af6d8e2fdd66433b603451e3926683a
Author: Lukas Stockner
Date: Mon Nov 14 18:00:58 2016 +0100
Branches: soc-2016-cycles_denoising
https://developer.blender.org/rBaa605da25af6d8e2fdd66433b603451e3926683a
Cycles: Experimental collaborative denoising
Right now tile borders are very visible, but that'll be fixed!
===================================================================
M intern/cycles/device/device_cpu.cpp
M intern/cycles/kernel/kernel_filter.h
M intern/cycles/kernel/kernel_types.h
M intern/cycles/render/integrator.cpp
M intern/cycles/util/util_math_matrix.h
===================================================================
diff --git a/intern/cycles/device/device_cpu.cpp b/intern/cycles/device/device_cpu.cpp
index 00fef00..be9bb1d 100644
--- a/intern/cycles/device/device_cpu.cpp
+++ b/intern/cycles/device/device_cpu.cpp
@@ -433,6 +433,7 @@ public:
bool old_filter = getenv("OLD_FILTER");
bool only_nlm_filter = getenv("ONLY_NLM_FILTER");
bool nlm_filter = getenv("NLM_FILTER");
+ bool use_collaborative_filtering = kg->__data.integrator.use_collaborative_filtering;
FilterStorage *storage = new FilterStorage[filter_area.z*filter_area.w];
int hw = kg->__data.integrator.half_window;
@@ -440,6 +441,17 @@ public:
int w = align_up(rect.z - rect.x, 4), h = (rect.w - rect.y);
int pass_stride = w*h;
+ if(use_collaborative_filtering) {
+ for(int y = 0; y < filter_area.w; y++) {
+ int py = y + filter_area.y;
+ for(int x = 0; x < filter_area.z; x++) {
+ int px = x + filter_area.x;
+ float *p_buffers = buffers + (offset + py*stride + px)*kg->__data.film.pass_stride;
+ p_buffers[0] = p_buffers[1] = p_buffers[2] = p_buffers[3] = 0.0f;
+ }
+ }
+ }
+
if(old_filter) {
for(int y = 0; y < filter_area.w; y++) {
for(int x = 0; x < filter_area.z; x++) {
@@ -495,10 +507,6 @@ public:
for(int y = 0; y < filter_area.w; y++) {
for(int x = 0; x < filter_area.z; x++) {
filter_construct_transform_kernel()(kg, sample, filter_buffer, x + filter_area.x, y + filter_area.y, storage + y*filter_area.z + x, &rect.x);
- }
- }
- for(int y = 0; y < filter_area.w; y++) {
- for(int x = 0; x < filter_area.z; x++) {
filter_final_pass_nlm_kernel()(kg, sample, filter_buffer, x + filter_area.x, y + filter_area.y, offset, stride, buffers, storage + y*filter_area.z + x, &filter_area.x, &rect.x);
}
}
@@ -534,6 +542,22 @@ public:
#undef WRITE_DEBUG
#endif
}
+
+ if(use_collaborative_filtering) {
+ for(int y = 0; y < filter_area.w; y++) {
+ int py = y + filter_area.y;
+ for(int x = 0; x < filter_area.z; x++) {
+ int px = x + filter_area.x;
+ float *p_buffers = buffers + (offset + py*stride + px)*kg->__data.film.pass_stride;
+ float fac = sample / p_buffers[3];
+ p_buffers[0] *= fac;
+ p_buffers[1] *= fac;
+ p_buffers[2] *= fac;
+ p_buffers[3] *= fac;
+ }
+ }
+ }
+
delete[] storage;
}
diff --git a/intern/cycles/kernel/kernel_filter.h b/intern/cycles/kernel/kernel_filter.h
index 990b419..ea065f9 100644
--- a/intern/cycles/kernel/kernel_filter.h
+++ b/intern/cycles/kernel/kernel_filter.h
@@ -383,15 +383,26 @@ ccl_device void kernel_filter_final_pass_wlr(KernelGlobals *kg, int sample, floa
math_cholesky(XtX, matrix_size);
math_inverse_lower_tri_inplace(XtX, matrix_size);
- float r_feature_weight[DENOISE_FEATURES+1];
- math_vector_zero(r_feature_weight, matrix_size);
- for(int col = 0; col < matrix_size; col++)
- for(int row = col; row < matrix_size; row++)
- r_feature_weight[col] += XtX[row]*XtX[col*matrix_size+row];
+ for(int row = 0; row < matrix_size; row++)
+ for(int col = 0; col < row; col++)
+ XtX[row*matrix_size+col] = 0.0f;
+
+ /* When using collaborative filtering, we need to solve for the intercept and gradients.
+ * Otherwise, just the intercept is enough. */
+ int solution_size = kernel_data.integrator.use_collaborative_filtering? matrix_size : 1;
+ float XtWXinv[(DENOISE_FEATURES+1)*(DENOISE_FEATURES+1)];
+ if(kernel_data.integrator.use_collaborative_filtering)
+ math_matrix_zero(XtWXinv, matrix_size);
+ else
+ math_vector_zero(XtWXinv, matrix_size);
+ for(int i = 0; i < solution_size; i++)
+ for(int col = 0; col < matrix_size; col++)
+ for(int row = 0; row < matrix_size; row++)
+ XtWXinv[i*matrix_size+col] += XtX[i*matrix_size+row]*XtX[col*matrix_size+row];
+ //math_lower_tri_to_full(XtWXinv, matrix_size);
- float3 final_color = make_float3(0.0f, 0.0f, 0.0f);
- float3 final_pos_color = make_float3(0.0f, 0.0f, 0.0f);
- float pos_weight = 0.0f;
+ float3 solution[DENOISE_FEATURES+1];
+ math_vec3_zero(solution, solution_size);
FOR_PIXEL_WINDOW {
float3 color = filter_get_pixel_color(pixel_buffer, pass_stride);
float variance = filter_get_pixel_variance(pixel_buffer, pass_stride);
@@ -402,34 +413,47 @@ ccl_device void kernel_filter_final_pass_wlr(KernelGlobals *kg, int sample, floa
if(weight == 0.0f) continue;
weight /= max(1.0f, variance);
- weight *= math_dot(design_row, r_feature_weight, matrix_size);
-
- final_color += weight * color;
- if(weight >= 0.0f) {
- final_pos_color += weight * color;
- pos_weight += weight;
+ for(int i = 0; i < solution_size; i++) {
+ float XtWXinvXt = math_dot(XtWXinv + i*matrix_size, design_row, matrix_size);
+ solution[i] += XtWXinvXt*weight*color;
}
} END_FOR_PIXEL_WINDOW
- if(final_color.x < 0.0f || final_color.y < 0.0f || final_color.z < 0.0f) {
- final_color = final_pos_color / max(pos_weight, 1e-5f);
- }
- final_color *= sample;
-
- float *combined_buffer = buffers + (offset + y*stride + x)*kernel_data.film.pass_stride;
- if(kernel_data.film.pass_no_denoising)
- final_color += make_float3(combined_buffer[kernel_data.film.pass_no_denoising],
- combined_buffer[kernel_data.film.pass_no_denoising+1],
- combined_buffer[kernel_data.film.pass_no_denoising+2]);
+ if(kernel_data.integrator.use_collaborative_filtering) {
+ FOR_PIXEL_WINDOW {
+ float3 color = filter_get_pixel_color(pixel_buffer, pass_stride);
+ float variance = filter_get_pixel_variance(pixel_buffer, pass_stride);
+ if(filter_firefly_rejection(color, variance, center_color, sqrt_center_variance)) continue;
- combined_buffer[0] = final_color.x;
- combined_buffer[1] = final_color.y;
- combined_buffer[2] = final_color.z;
+ filter_get_features(px, py, pt, pixel_buffer, features, feature_means, pass_stride);
+ float weight = filter_fill_design_row_cuda(features, rank, design_row, feature_transform, bandwidth_factor);
+ if(weight == 0.0f) continue;
+ weight /= max(1.0f, variance);
-#ifdef WITH_CYCLES_DEBUG_FILTER
- storage->log_rmse_per_sample -= 2.0f * logf(linear_rgb_to_gray(final_color) + 0.001f);
-#endif
+ float3 reconstruction = math_dot_vec3(design_row, solution, matrix_size);
+ if(py >= filter_area.y && py < filter_area.y+filter_area.w && px >= filter_area.x && px < filter_area.x+filter_area.z) {
+ float *combined_buffer = buffers + (offset + py*stride + px)*kernel_data.film.pass_stride;
+ combined_buffer[0] += weight*reconstruction.x;
+ combined_buffer[1] += weight*reconstruction.y;
+ combined_buffer[2] += weight*reconstruction.z;
+ combined_buffer[3] += weight;
+ }
+ } END_FOR_PIXEL_WINDOW
+ }
+ else {
+ float3 final_color = sample*solution[0];
+
+ float *combined_buffer = buffers + (offset + y*stride + x)*kernel_data.film.pass_stride;
+ if(kernel_data.film.pass_no_denoising)
+ final_color += make_float3(combined_buffer[kernel_data.film.pass_no_denoising],
+ combined_buffer[kernel_data.film.pass_no_denoising+1],
+ combined_buffer[kernel_data.film.pass_no_denoising+2]);
+
+ combined_buffer[0] = final_color.x;
+ combined_buffer[1] = final_color.y;
+ combined_buffer[2] = final_color.z;
+ }
}
ccl_device void kernel_filter_final_pass_nlm(KernelGlobals *kg, int sample, float ccl_readonly_ptr buffer, int x, int y, int offset, int stride, float *buffers, float ccl_readonly_ptr transform, CUDAFilterStorage *storage, int4 filter_area, int4 rect, int transform_stride, int localIdx)
@@ -498,15 +522,27 @@ ccl_device void kernel_filter_final_pass_nlm(KernelGlobals *kg, int sample, floa
math_cholesky(XtX, matrix_size);
math_inverse_lower_tri_inplace(XtX, matrix_size);
- float r_feature_weight[DENOISE_FEATURES+1];
- math_vector_zero(r_feature_weight, matrix_size);
- for(int col = 0; col < matrix_size; col++)
- for(int row = col; row < matrix_size; row++)
- r_feature_weight[col] += XtX[row]*XtX[col*matrix_size+row];
- float3 final_color = make_float3(0.0f, 0.0f, 0.0f);
- float3 final_pos_color = make_float3(0.0f, 0.0f, 0.0f);
- float pos_weight = 0.0f;
+ for(int row = 0; row < matrix_size; row++)
+ for(int col = 0; col < row; col++)
+ XtX[row*matrix_size+col] = 0.0f;
+
+ /* When using collaborative filtering, we need to solve for the intercept and gradients.
+ * Otherwise, just the intercept is enough. */
+ int solution_size = kernel_data.integrator.use_collaborative_filtering? matrix_size : 1;
+ float XtWXinv[(DENOISE_FEATURES+1)*(DENOISE_FEATURES+1)];
+ if(kernel_data.integrator.use_collaborative_filtering)
+ math_matrix_zero(XtWXinv, matrix_size);
+ else
+ math_vector_zero(XtWXinv, matrix_size);
+ for(int i = 0; i < solution_size; i++)
+ for(int col = 0; col < matrix_size; col++)
+ for(int row = 0; row < matrix_size; row++)
+ XtWXinv[i*matrix_size+col] += XtX[i*matrix_size+row]*XtX[col*matrix_size+row];
+ //math_lower_tri_to_full(XtWXinv, matrix_size);
+
+ float3 solution[DENOISE_FEATURES+1];
+ math_vec3_zero(solution, solution_size);
FOR_PIXEL_WINDOW {
float3 color = filter_get_pixel_color(pixel_buffer, pass_stride);
float variance = filter_get_pixel_variance(pixel_buffer, pass_stride);
@@ -518,34 +554,49 @@ ccl_device void kernel_filter_final_pass_nlm(KernelGlobals *kg, int sample, floa
float weight = nlm_weight(x, y, px, py, center_buffer, pixel_buffer, pass_stride, 1.0f, kernel_data.integrator.weighting_adjust, 4, rect);
if(weight == 0.0f) continue;
weight /= max(1.0f, variance);
- weight *= math_dot(design_row, r_feature_weight, matrix_size);
- final_color += weight * color;
-
- if(weight >= 0.0f) {
- final_pos_color += weight * color;
- pos_weight += weight;
+ for(int i = 0; i < solution_size; i++) {
+ float XtWXinvXt = math_dot(XtWXinv + i*matrix_size, design_row, matrix_size);
+ solution[i] += XtWXinvXt*weight*color;
}
} END_FOR_PIXEL_WINDOW
- if(final_color.x < 0.0f || final_color.y < 0.0f || final_color.z < 0.0f) {
- final_color = final_pos_color / max(pos_weight, 1e-5f);
- }
- final_color *= sample;
+ if(kernel_data.integrator.use_collaborative_filtering) {
+ FOR_PIXEL_WINDOW {
+ float3 color = filter_get_pixel_color(pixel_buffer, pass_stride);
+ float variance = filter_get
@@ Diff output truncated at 10240 characters. @@
More information about the Bf-blender-cvs
mailing list