[Bf-blender-cvs] [2be1f63] soc-2016-cycles_denoising: Cycles: Prefilter all feature passes
Lukas Stockner
noreply at git.blender.org
Sat Aug 6 05:41:02 CEST 2016
Commit: 2be1f631ee169c7b394ecfb6c3b0a698809c1ef0
Author: Lukas Stockner
Date: Thu Jul 28 04:14:00 2016 +0200
Branches: soc-2016-cycles_denoising
https://developer.blender.org/rB2be1f631ee169c7b394ecfb6c3b0a698809c1ef0
Cycles: Prefilter all feature passes
This commit adds prefiltering to all feature passes, instead of just the shadow pass.
Feature passes are supposed to be noise-free, but effects like Depth of Field, Motion Blur or slightly glossy shaders could still produce noticable amounts of noise.
===================================================================
M intern/cycles/device/device_cpu.cpp
M intern/cycles/device/device_cuda.cpp
M intern/cycles/kernel/kernel_filter.h
M intern/cycles/kernel/kernels/cpu/kernel_cpu.h
M intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h
M intern/cycles/kernel/kernels/cuda/kernel.cu
===================================================================
diff --git a/intern/cycles/device/device_cpu.cpp b/intern/cycles/device/device_cpu.cpp
index 67b4df8..adc0414 100644
--- a/intern/cycles/device/device_cpu.cpp
+++ b/intern/cycles/device/device_cpu.cpp
@@ -208,15 +208,17 @@ public:
}
};
- float2* denoise_prefilter(int4 prefilter_rect, RenderTile &tile, KernelGlobals *kg, int sample, float** buffers, int* tile_x, int* tile_y, int *offsets, int *strides)
+ float* denoise_prefilter(int4 prefilter_rect, RenderTile &tile, KernelGlobals *kg, int sample, float** buffers, int* tile_x, int* tile_y, int *offsets, int *strides)
{
void(*filter_divide_shadow)(KernelGlobals*, int, float**, int, int, int*, int*, int*, int*, float*, float*, float*, float*, int4);
+ void(*filter_get_feature)(KernelGlobals*, int, float**, int, int, int, int, int*, int*, int*, int*, float*, float*, int4);
void(*filter_non_local_means)(int, int, float*, float*, float*, float*, int4, int, int, float, float);
- void(*filter_combine_halves)(int, int, float*, float*, float*, float*, int, int4);
+ void(*filter_combine_halves)(int, int, float*, float*, float*, float*, int4);
#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_AVX2
if(system_cpu_support_avx2()) {
filter_divide_shadow = kernel_cpu_avx2_filter_divide_shadow;
+ filter_get_feature = kernel_cpu_avx2_filter_get_feature;
filter_non_local_means = kernel_cpu_avx2_filter_non_local_means;
filter_combine_halves = kernel_cpu_avx2_filter_combine_halves;
}
@@ -225,6 +227,7 @@ public:
#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_AVX
if(system_cpu_support_avx()) {
filter_divide_shadow = kernel_cpu_avx_filter_divide_shadow;
+ filter_get_feature = kernel_cpu_avx_filter_get_feature;
filter_non_local_means = kernel_cpu_avx_filter_non_local_means;
filter_combine_halves = kernel_cpu_avx_filter_combine_halves;
}
@@ -233,6 +236,7 @@ public:
#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE41
if(system_cpu_support_sse41()) {
filter_divide_shadow = kernel_cpu_sse41_filter_divide_shadow;
+ filter_get_feature = kernel_cpu_sse41_filter_get_feature;
filter_non_local_means = kernel_cpu_sse41_filter_non_local_means;
filter_combine_halves = kernel_cpu_sse41_filter_combine_halves;
}
@@ -241,6 +245,7 @@ public:
#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE3
if(system_cpu_support_sse3()) {
filter_divide_shadow = kernel_cpu_sse3_filter_divide_shadow;
+ filter_get_feature = kernel_cpu_sse3_filter_get_feature;
filter_non_local_means = kernel_cpu_sse3_filter_non_local_means;
filter_combine_halves = kernel_cpu_sse3_filter_combine_halves;
}
@@ -249,6 +254,7 @@ public:
#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE2
if(system_cpu_support_sse2()) {
filter_divide_shadow = kernel_cpu_sse2_filter_divide_shadow;
+ filter_get_feature = kernel_cpu_sse2_filter_get_feature;
filter_non_local_means = kernel_cpu_sse2_filter_non_local_means;
filter_combine_halves = kernel_cpu_sse2_filter_combine_halves;
}
@@ -256,16 +262,52 @@ public:
#endif
{
filter_divide_shadow = kernel_cpu_filter_divide_shadow;
+ filter_get_feature = kernel_cpu_filter_get_feature;
filter_non_local_means = kernel_cpu_filter_non_local_means;
filter_combine_halves = kernel_cpu_filter_combine_halves;
}
int w = (prefilter_rect.z - prefilter_rect.x), h = (prefilter_rect.w - prefilter_rect.y);
- float2 *prefiltered = new float2[w*h];
- float *unfiltered = new float[2*w*h], *sampleV = ((float*) prefiltered), *sampleVV = new float[w*h], *bufferV = ((float*) prefiltered) + w*h, *cleanV = new float[w*h];
+ float *prefiltered = new float[16*w*h];
+ float *unfiltered = new float[2*w*h];
+ /* Prefilter general features. */
+ int m_offsets[] = {0, 1, 2, 6, 7, 8, 12};
+ int variances[] = {3, 4, 5, 9, 10, 11, 13};
+ for(int i = 0; i < 7; i++) {
+ for(int y = prefilter_rect.y; y < prefilter_rect.w; y++) {
+ for(int x = prefilter_rect.x; x < prefilter_rect.z; x++) {
+ filter_get_feature(kg, sample, buffers, m_offsets[i], variances[i], x, y, tile_x, tile_y, offsets, strides, unfiltered, prefiltered + (2*i+1)*w*h, prefilter_rect);
+ }
+ }
+ for(int y = prefilter_rect.y; y < prefilter_rect.w; y++) {
+ for(int x = prefilter_rect.x; x < prefilter_rect.z; x++) {
+ filter_non_local_means(x, y, unfiltered, unfiltered, prefiltered + (2*i+1)*w*h, prefiltered + 2*i*w*h, prefilter_rect, 2, 2, 1, 0.25f);
+ }
+ }
+#ifdef WITH_CYCLES_DEBUG_FILTER
+#define WRITE_DEBUG(name, var) debug_write_pfm(string_printf("debug_%dx%d_feature%d_%s.pfm", tile.x, tile.y, i, name).c_str(), var, w, h, 1, w)
+ WRITE_DEBUG("unfiltered", unfiltered);
+ WRITE_DEBUG("sampleV", prefiltered + (2*i+1)*w*h);
+ WRITE_DEBUG("filtered", prefiltered + 2*i*w*h);
+#undef WRITE_DEBUG
+#endif
+ }
+
+
+
+
+
+
+
+
+
+
+
+ float *sampleV = prefiltered + 14*w*h, *sampleVV = new float[w*h], *bufferV = prefiltered + 15*w*h, *cleanV = new float[w*h];
+
/* Get the A/B unfiltered passes, the combined sample variance, the estimated variance of the sample variance and the buffer variance. */
for(int y = prefilter_rect.y; y < prefilter_rect.w; y++) {
for(int x = prefilter_rect.x; x < prefilter_rect.z; x++) {
@@ -273,12 +315,12 @@ public:
}
}
#ifdef WITH_CYCLES_DEBUG_FILTER
-#define WRITE_DEBUG(name, var, stride) debug_write_pfm(string_printf("debug_%dx%d_shadow_%s.pfm", tile.x, tile.y, name).c_str(), var, w, h, stride, w)
- WRITE_DEBUG("unfilteredA", unfiltered, 1);
- WRITE_DEBUG("unfilteredB", unfiltered + w*h, 1);
- WRITE_DEBUG("bufferV", bufferV, 1);
- WRITE_DEBUG("sampleV", sampleV, 1);
- WRITE_DEBUG("sampleVV", sampleVV, 1);
+#define WRITE_DEBUG(name, var) debug_write_pfm(string_printf("debug_%dx%d_shadow_%s.pfm", tile.x, tile.y, name).c_str(), var, w, h, 1, w)
+ WRITE_DEBUG("unfilteredA", unfiltered);
+ WRITE_DEBUG("unfilteredB", unfiltered + w*h);
+ WRITE_DEBUG("bufferV", bufferV);
+ WRITE_DEBUG("sampleV", sampleV);
+ WRITE_DEBUG("sampleVV", sampleVV);
#endif
@@ -291,7 +333,7 @@ public:
}
}
#ifdef WITH_CYCLES_DEBUG_FILTER
- WRITE_DEBUG("cleanV", cleanV, 1);
+ WRITE_DEBUG("cleanV", cleanV);
#endif
@@ -305,8 +347,8 @@ public:
}
delete[] cleanV;
#ifdef WITH_CYCLES_DEBUG_FILTER
- WRITE_DEBUG("filteredA", sampleV, 1);
- WRITE_DEBUG("filteredB", bufferV, 1);
+ WRITE_DEBUG("filteredA", sampleV);
+ WRITE_DEBUG("filteredB", bufferV);
#endif
@@ -314,11 +356,11 @@ public:
/* Estimate the residual variance between the two filtered halves. */
for(int y = prefilter_rect.y; y < prefilter_rect.w; y++) {
for(int x = prefilter_rect.x; x < prefilter_rect.z; x++) {
- filter_combine_halves(x, y, NULL, sampleVV, sampleV, bufferV, 1, prefilter_rect);
+ filter_combine_halves(x, y, NULL, sampleVV, sampleV, bufferV, prefilter_rect);
}
}
#ifdef WITH_CYCLES_DEBUG_FILTER
- WRITE_DEBUG("residualV", sampleVV, 1);
+ WRITE_DEBUG("residualV", sampleVV);
#endif
/* Use the residual variance for a second filter pass. */
@@ -330,20 +372,20 @@ public:
}
delete[] sampleVV;
#ifdef WITH_CYCLES_DEBUG_FILTER
- WRITE_DEBUG("finalA", unfiltered, 1);
- WRITE_DEBUG("finalB", unfiltered + w*h, 1);
+ WRITE_DEBUG("finalA", unfiltered);
+ WRITE_DEBUG("finalB", unfiltered + w*h);
#endif
/* Combine the two double-filtered halves to a final shadow feature image and associated variance. */
for(int y = prefilter_rect.y; y < prefilter_rect.w; y++) {
for(int x = prefilter_rect.x; x < prefilter_rect.z; x++) {
- filter_combine_halves(x, y, (float*) prefiltered, ((float*) prefiltered)+1, unfiltered, unfiltered + w*h, 2, prefilter_rect);
+ filter_combine_halves(x, y, prefiltered + 14*w*h, prefiltered + 15*w*h, unfiltered, unfiltered + w*h, prefilter_rect);
}
}
delete[] unfiltered;
#ifdef WITH_CYCLES_DEBUG_FILTER
- WRITE_DEBUG("final", (float*) prefiltered, 2);
- WRITE_DEBUG("finalV", ((float*) prefiltered) + 1, 2);
+ WRITE_DEBUG("final", prefiltered + 14*w*h);
+ WRITE_DEBUG("finalV", prefiltered + 15*w*h);
#undef WRITE_DEBUG
#endif
@@ -361,8 +403,8 @@ public:
RenderTile tile;
void(*path_trace_kernel)(KernelGlobals*, float*, unsigned int*, int, int, int, int, int);
- void(*filter_estimate_params_kernel)(KernelGlobals*, int, float**, int, int, int*, int*, int*, int*, void*, float2*, int4, int4);
- void(*filter_final_pass_kernel)(KernelGlobals*, int, float**, int, int, int*, int*, int*, int*, void*, float2*, int4, int4);
+ void(*filter_estimate_params_kernel)(KernelGlobals*, int, float**, int, int, int*, int*, int*, int*, void*, float*, int4, int4);
+ void(*filter_final_pass_kernel)(KernelGlobals*, int, float**, int, int, int*, int*, int*, int*, void*, float*, int4, int4);
#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_AVX2
if(system_cpu_support_avx2()) {
@@ -450,7 +492,7 @@ public:
int4 filter_rect = make_int4(tile.x + overscan, tile.y + overscan, tile.x + tile.w - overscan, tile.y + tile.h - overscan);
int4 prefilter_rect = make_int4(tile.x, tile.y, tile.x + tile.w, tile.y + tile.h);
- float2* prefiltered = denoise_prefilter(prefilter_rect, tile, &kg, end_sample, buffers, tile_x, tile_y, offsets, strides);
+ float* prefiltered = denoise_prefilter(prefilter_rect, tile, &kg, end_sample, buffers, tile_x, tile_y, offsets, strides);
FilterStorage *storages = new FilterStorage[tile.buffers->params.final_width*tile.buffers->params.final_height];
for(int y = filter_rect.y; y < filter_rect.w; y++) {
@@ -505,7 +547,7 @@ public:
int hw = kg.__data.integrator.half_window;
int4 prefilter_rect = make_int4(max(tile.x - hw, tile_x[0]), max(tile.y - hw, tile_y[0]), min(tile.x + tile.w + hw+1, tile_x[3]), min(tile.y + tile.h + hw+1, tile_y[3]));
- float2* prefiltered = denoise_prefilter(prefilter_rect, tile, &kg, sample, buffers, tile_x, tile_y, offsets, strides);
+ float* prefiltered = denoise_prefilter(prefilter_rect, tile, &kg, sample, buffers, tile_x, tile_y, offsets, strides);
for(int y = filter_rect.y; y < filter_rect.w; y++) {
for(int x = filter_rect.x; x < filter_rect.z; x++) {
diff --git a/intern/cycles/device/device_cuda.cpp b/intern/cycles/device/device_cuda.cpp
index 5ae298d..b6e48c7 100644
--- a/intern/cycles/device/device_cuda.cpp
+
@@ Diff output truncated at 10240 characters. @@
More information about the Bf-blender-cvs
mailing list