[Bf-blender-cvs] [f487a15] soc-2016-cycles_denoising: Cycles: Use the prefiltered shadow feature for denoising
Lukas Stockner
noreply at git.blender.org
Sun Jul 24 03:46:11 CEST 2016
Commit: f487a153359ef2d37aa993f5d294fd6c4e86abdb
Author: Lukas Stockner
Date: Sun Jul 24 02:18:18 2016 +0200
Branches: soc-2016-cycles_denoising
https://developer.blender.org/rBf487a153359ef2d37aa993f5d294fd6c4e86abdb
Cycles: Use the prefiltered shadow feature for denoising
This commit finally adds the prefiltered shadow feature to the main denoising algorithm.
Doing so improves detail preservation a lot: Although the main focus are sharp shadow edges, it actually also helps for Ambient-Occlusion-like and geometric details.
The only issue is that some geometric edges might be a bit noisier after denoising, but that will be fixed in the future by downweighting the shadow feature
when the geometric changes (normals and depth features) are strong.
===================================================================
M intern/cycles/device/device_cpu.cpp
M intern/cycles/kernel/kernel_filter.h
M intern/cycles/kernel/kernel_types.h
M intern/cycles/kernel/kernels/cpu/kernel_cpu.h
M intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h
===================================================================
diff --git a/intern/cycles/device/device_cpu.cpp b/intern/cycles/device/device_cpu.cpp
index 4a8acfd..47e977c 100644
--- a/intern/cycles/device/device_cpu.cpp
+++ b/intern/cycles/device/device_cpu.cpp
@@ -361,8 +361,8 @@ public:
RenderTile tile;
void(*path_trace_kernel)(KernelGlobals*, float*, unsigned int*, int, int, int, int, int);
- void(*filter_estimate_params_kernel)(KernelGlobals*, int, float**, int, int, int*, int*, int*, int*, void*, int4);
- void(*filter_final_pass_kernel)(KernelGlobals*, int, float**, int, int, int*, int*, int*, int*, void*, int4);
+ void(*filter_estimate_params_kernel)(KernelGlobals*, int, float**, int, int, int*, int*, int*, int*, void*, float2*, int4, int4);
+ void(*filter_final_pass_kernel)(KernelGlobals*, int, float**, int, int, int*, int*, int*, int*, void*, float2*, int4, int4);
#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_AVX2
if(system_cpu_support_avx2()) {
@@ -442,21 +442,26 @@ public:
int offsets[9] = {0, 0, 0, 0, tile.offset, 0, 0, 0, 0};
int strides[9] = {0, 0, 0, 0, tile.stride, 0, 0, 0, 0};
float *buffers[9] = {NULL, NULL, NULL, NULL, (float*) tile.buffer, NULL, NULL, NULL, NULL};
- FilterStorage *storages = new FilterStorage[tile.buffers->params.final_width*tile.buffers->params.final_height];
int overscan = tile.buffers->params.overscan;
int4 filter_rect = make_int4(tile.x + overscan, tile.y + overscan, tile.x + tile.w - overscan, tile.y + tile.h - overscan);
+ int4 prefilter_rect = make_int4(tile.x, tile.y, tile.x + tile.w, tile.y + tile.h);
+
+ float2* prefiltered = denoise_prefilter(prefilter_rect, tile, &kg, end_sample, buffers, tile_x, tile_y, offsets, strides);
+ FilterStorage *storages = new FilterStorage[tile.buffers->params.final_width*tile.buffers->params.final_height];
+
for(int y = filter_rect.y; y < filter_rect.w; y++) {
for(int x = filter_rect.x; x < filter_rect.z; x++) {
- filter_estimate_params_kernel(&kg, end_sample, buffers, x, y, tile_x, tile_y, offsets, strides, storages, filter_rect);
+ filter_estimate_params_kernel(&kg, end_sample, buffers, x, y, tile_x, tile_y, offsets, strides, storages, prefiltered, filter_rect, prefilter_rect);
}
}
for(int y = filter_rect.y; y < filter_rect.w; y++) {
for(int x = filter_rect.x; x < filter_rect.z; x++) {
- filter_final_pass_kernel(&kg, end_sample, buffers, x, y, tile_x, tile_y, offsets, strides, storages, filter_rect);
+ filter_final_pass_kernel(&kg, end_sample, buffers, x, y, tile_x, tile_y, offsets, strides, storages, prefiltered, filter_rect, prefilter_rect);
}
}
+ delete[] prefiltered;
#ifdef WITH_CYCLES_DEBUG_FILTER
#define WRITE_DEBUG(name, var) debug_write_pfm(string_printf("debug_%dx%d_%s.pfm", tile.x, tile.y, name).c_str(), &storages[0].var, tile.buffers->params.final_width, tile.buffers->params.final_height, sizeof(FilterStorage)/sizeof(float), tile.buffers->params.final_width);
for(int i = 0; i < DENOISE_FEATURES; i++) {
@@ -494,16 +499,25 @@ public:
FilterStorage *storages = new FilterStorage[tile.w*tile.h];
int4 filter_rect = make_int4(tile.x, tile.y, tile.x + tile.w, tile.y + tile.h);
+ int hw = kg.__data.integrator.half_window;
+ int4 prefilter_rect = make_int4(max(tile.x - hw, tile_x[0]), max(tile.y - hw, tile_y[0]), min(tile.x + tile.w + hw+1, tile_x[3]), min(tile.y + tile.h + hw+1, tile_y[3]));
+
+ float2* prefiltered = denoise_prefilter(prefilter_rect, tile, &kg, sample, buffers, tile_x, tile_y, offsets, strides);
+
for(int y = filter_rect.y; y < filter_rect.w; y++) {
for(int x = filter_rect.x; x < filter_rect.z; x++) {
- filter_estimate_params_kernel(&kg, sample, buffers, x, y, tile_x, tile_y, offsets, strides, storages, filter_rect);
+ filter_estimate_params_kernel(&kg, sample, buffers, x, y, tile_x, tile_y, offsets, strides, storages, prefiltered, filter_rect, prefilter_rect);
}
}
for(int y = filter_rect.y; y < filter_rect.w; y++) {
for(int x = filter_rect.x; x < filter_rect.z; x++) {
- filter_final_pass_kernel(&kg, sample, buffers, x, y, tile_x, tile_y, offsets, strides, storages, filter_rect);
+ filter_final_pass_kernel(&kg, sample, buffers, x, y, tile_x, tile_y, offsets, strides, storages, prefiltered, filter_rect, prefilter_rect);
}
}
+ delete[] prefiltered;
+
+
+
#ifdef WITH_CYCLES_DEBUG_FILTER
#define WRITE_DEBUG(name, var) debug_write_pfm(string_printf("debug_%dx%d_%s.pfm", tile.x, tile.y, name).c_str(), &storages[0].var, tile.w, tile.h, sizeof(FilterStorage)/sizeof(float), tile.w);
for(int i = 0; i < DENOISE_FEATURES; i++) {
diff --git a/intern/cycles/kernel/kernel_filter.h b/intern/cycles/kernel/kernel_filter.h
index 87b360b..dc649bb 100644
--- a/intern/cycles/kernel/kernel_filter.h
+++ b/intern/cycles/kernel/kernel_filter.h
@@ -18,18 +18,21 @@
CCL_NAMESPACE_BEGIN
-#define FOR_PIXEL_WINDOW for(int py = low.y; py < high.y; py++) { \
+#define FOR_PIXEL_WINDOW pre_buffer = prefiltered + (low.y - prefilter_rect.y)*prefilter_w + (low.x - prefilter_rect.x); \
+ for(int py = low.y; py < high.y; py++) { \
int ytile = (py < tile_y[1])? 0: ((py < tile_y[2])? 1: 2); \
- for(int px = low.x; px < high.x; px++) { \
- int xtile = (px < tile_x[1])? 0: ((px < tile_x[2])? 1: 2); \
- int tile = ytile*3+xtile; \
- buffer = buffers[tile] + (offset[tile] + py*stride[tile] + px)*kernel_data.film.pass_stride + kernel_data.film.pass_denoising;
+ for(int px = low.x; px < high.x; px++, pre_buffer++) { \
+ int xtile = (px < tile_x[1])? 0: ((px < tile_x[2])? 1: 2); \
+ int tile = ytile*3+xtile; \
+ buffer = buffers[tile] + (offset[tile] + py*stride[tile] + px)*kernel_data.film.pass_stride + kernel_data.film.pass_denoising;
-#define END_FOR_PIXEL_WINDOW }}
+#define END_FOR_PIXEL_WINDOW } \
+ pre_buffer += prefilter_w - (high.x - low.x); \
+ }
-#define FEATURE_PASSES 7 /* Normals, Albedo, Depth */
+#define FEATURE_PASSES 8 /* Normals, Albedo, Depth */
-ccl_device_inline void filter_get_features(int x, int y, float *buffer, float sample, float *features, float *mean)
+ccl_device_inline void filter_get_features(int x, int y, float *buffer, float2 *pre_buffer, float sample, float *features, float *mean)
{
float sample_scale = 1.0f/sample;
features[0] = x;
@@ -38,21 +41,22 @@ ccl_device_inline void filter_get_features(int x, int y, float *buffer, float sa
features[3] = buffer[0] * sample_scale;
features[4] = buffer[1] * sample_scale;
features[5] = buffer[2] * sample_scale;
- features[6] = buffer[6] * sample_scale;
- features[7] = buffer[7] * sample_scale;
- features[8] = buffer[8] * sample_scale;
+ features[6] = pre_buffer->x;
+ features[7] = buffer[6] * sample_scale;
+ features[8] = buffer[7] * sample_scale;
+ features[9] = buffer[8] * sample_scale;
if(mean) {
for(int i = 0; i < DENOISE_FEATURES; i++)
features[i] -= mean[i];
}
#ifdef DENOISE_SECOND_ORDER_SCREEN
- features[9] = features[0]*features[0];
- features[10] = features[1]*features[1];
- features[11] = features[0]*features[1];
+ features[10] = features[0]*features[0];
+ features[11] = features[1]*features[1];
+ features[12] = features[0]*features[1];
#endif
}
-ccl_device_inline void filter_get_feature_variance(int x, int y, float *buffer, float sample, float *features, float *scale)
+ccl_device_inline void filter_get_feature_variance(int x, int y, float *buffer, float2 *pre_buffer, float sample, float *features, float *scale)
{
float sample_scale = 1.0f/sample;
float sample_scale_var = 1.0f/(sample - 1.0f);
@@ -62,13 +66,14 @@ ccl_device_inline void filter_get_feature_variance(int x, int y, float *buffer,
features[3] = saturate(buffer[3] * sample_scale_var) * sample_scale;
features[4] = saturate(buffer[4] * sample_scale_var) * sample_scale;
features[5] = saturate(buffer[5] * sample_scale_var) * sample_scale;
- features[6] = saturate(buffer[9] * sample_scale_var) * sample_scale;
- features[7] = saturate(buffer[10] * sample_scale_var) * sample_scale;
- features[8] = saturate(buffer[11] * sample_scale_var) * sample_scale;
+ features[6] = saturate(pre_buffer->y);
+ features[7] = saturate(buffer[9] * sample_scale_var) * sample_scale;
+ features[8] = saturate(buffer[10] * sample_scale_var) * sample_scale;
+ features[9] = saturate(buffer[11] * sample_scale_var) * sample_scale;
#ifdef DENOISE_SECOND_ORDER_SCREEN
- features[9] = 0.0f;
features[10] = 0.0f;
features[11] = 0.0f;
+ features[12] = 0.0f;
#endif
for(int i = 0; i < DENOISE_FEATURES; i++)
features[i] *= scale[i]*scale[i];
@@ -224,16 +229,19 @@ ccl_device void kernel_filter_combine_halves(int x, int y, float *mean, float *v
* - Start of the next upper/right neighbor (not accessed)
* buffers contains the nine buffer pointers (y-major ordering, starting with the lower left tile), offset and stride the respective parameters of the tile.
*/
-ccl_device void kernel_filter_estimate_params(KernelGlobals *kg, int sample, float **buffers, int x, int y, int *tile_x, int *tile_y, int *offset, int *stride, FilterStorage *storage, int4 filter_rect)
+ccl_device void kernel_filter_estimate_params(KernelGlobals *kg, int sample, float **buffers, int x, int y, int *tile_x, int *tile_y, int *offset, int *stride, FilterStorage *storage, float2 *prefiltered, int4 filter_rect, int4 prefilter_rect)
{
storage += (y-filter_rect.y)*(filter_rect.z-filter_rect.x) + (x-filter_rect.x);
+ int prefilter_w = (prefilter_rect.z - prefilter_rect.x);
/* Temporary storage, used in different steps of the algorithm. */
float tempmatrix[(2*DENOISE_FEATURES+1)*(2*DENOISE_FEATURES+1)], tempvector[4*DENOISE_FEATURES+1];
float *buffer, features[DENOISE_FEATURES];
+ float2 *pre_buffer;
/* === Get center pixel color and variance. === */
float *center_buffer = buffers[4] + (offset[4] + y*stride[4] + x)*kernel
@@ Diff output truncated at 10240 characters. @@
More information about the Bf-blender-cvs
mailing list