[Bf-blender-cvs] [8771846] soc-2016-cycles_denoising: Cycles Denoising: Tweak shadow filtering
Lukas Stockner
noreply at git.blender.org
Sun Aug 21 06:18:13 CEST 2016
Commit: 8771846b2bd7d558d7d0aa1843c5a42fa9cf3f09
Author: Lukas Stockner
Date: Thu Aug 18 12:32:23 2016 +0200
Branches: soc-2016-cycles_denoising
https://developer.blender.org/rB8771846b2bd7d558d7d0aa1843c5a42fa9cf3f09
Cycles Denoising: Tweak shadow filtering
===================================================================
M intern/cycles/device/device_cpu.cpp
M intern/cycles/device/device_cuda.cpp
M intern/cycles/kernel/kernel_filter_pre.h
M intern/cycles/kernel/kernels/cpu/kernel_cpu.h
M intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h
M intern/cycles/kernel/kernels/cuda/kernel.cu
===================================================================
diff --git a/intern/cycles/device/device_cpu.cpp b/intern/cycles/device/device_cpu.cpp
index 8709030..ad52c90 100644
--- a/intern/cycles/device/device_cpu.cpp
+++ b/intern/cycles/device/device_cpu.cpp
@@ -213,7 +213,7 @@ public:
void(*filter_divide_shadow)(KernelGlobals*, int, float**, int, int, int*, int*, int*, int*, float*, float*, float*, float*, int4);
void(*filter_get_feature)(KernelGlobals*, int, float**, int, int, int, int, int*, int*, int*, int*, float*, float*, int4);
void(*filter_non_local_means)(int, int, float*, float*, float*, float*, int4, int, int, float, float);
- void(*filter_combine_halves)(int, int, float*, float*, float*, float*, int4);
+ void(*filter_combine_halves)(int, int, float*, float*, float*, float*, int4, int);
#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_AVX2
if(system_cpu_support_avx2()) {
@@ -341,7 +341,7 @@ public:
/* Smooth the (generally pretty noisy) buffer variance using the spatial information from the sample variance. */
for(int y = rect.y; y < rect.w; y++) {
for(int x = rect.x; x < rect.z; x++) {
- filter_non_local_means(x, y, bufferV, sampleV, sampleVV, cleanV, rect, 3, 1, 4, 1.0f);
+ filter_non_local_means(x, y, bufferV, sampleV, sampleVV, cleanV, rect, 6, 3, 4, 1.0f);
}
}
#ifdef WITH_CYCLES_DEBUG_FILTER
@@ -363,7 +363,7 @@ public:
/* Estimate the residual variance between the two filtered halves. */
for(int y = rect.y; y < rect.w; y++) {
for(int x = rect.x; x < rect.z; x++) {
- filter_combine_halves(x, y, NULL, sampleVV, sampleV, bufferV, rect);
+ filter_combine_halves(x, y, NULL, sampleVV, sampleV, bufferV, rect, 2);
}
}
#ifdef WITH_CYCLES_DEBUG_FILTER
@@ -373,8 +373,8 @@ public:
/* Use the residual variance for a second filter pass. */
for(int y = rect.y; y < rect.w; y++) {
for(int x = rect.x; x < rect.z; x++) {
- filter_non_local_means(x, y, sampleV, bufferV, sampleVV, unfiltered , rect, 4, 2, 1, 0.25f);
- filter_non_local_means(x, y, bufferV, sampleV, sampleVV, unfiltered + pass_stride, rect, 4, 2, 1, 0.25f);
+ filter_non_local_means(x, y, sampleV, bufferV, sampleVV, unfiltered , rect, 4, 2, 1, 0.5f);
+ filter_non_local_means(x, y, bufferV, sampleV, sampleVV, unfiltered + pass_stride, rect, 4, 2, 1, 0.5f);
}
}
#ifdef WITH_CYCLES_DEBUG_FILTER
@@ -385,7 +385,7 @@ public:
/* Combine the two double-filtered halves to a final shadow feature image and associated variance. */
for(int y = rect.y; y < rect.w; y++) {
for(int x = rect.x; x < rect.z; x++) {
- filter_combine_halves(x, y, filter_buffer + 8*pass_stride, filter_buffer + 9*pass_stride, unfiltered, unfiltered + pass_stride, rect);
+ filter_combine_halves(x, y, filter_buffer + 8*pass_stride, filter_buffer + 9*pass_stride, unfiltered, unfiltered + pass_stride, rect, 0);
}
}
#ifdef WITH_CYCLES_DEBUG_FILTER
diff --git a/intern/cycles/device/device_cuda.cpp b/intern/cycles/device/device_cuda.cpp
index 60fe45b..3d165ff 100644
--- a/intern/cycles/device/device_cuda.cpp
+++ b/intern/cycles/device/device_cuda.cpp
@@ -928,8 +928,8 @@ public:
0, 0, divide_args, 0));
/* Smooth the (generally pretty noisy) buffer variance using the spatial information from the sample variance. */
- float a = 4.0f, k_2 = 1.0f;
- int r = 3, f = 1;
+ float a = 2.0f, k_2 = 2.0f;
+ int r = 6, f = 3;
void *filter_variance_args[] = {&d_bufferV, &d_sampleV, &d_sampleVV, &d_cleanV,
&rect,
&r, &f, &a, &k_2};
@@ -959,8 +959,9 @@ public:
cuda_assert(cuCtxSynchronize());
/* Estimate the residual variance between the two filtered halves. */
+ int var_r = 2;
void *residual_variance_args[] = {&d_null, &d_sampleVV, &d_sampleV, &d_bufferV,
- &rect};
+ &rect, &var_r};
cuda_assert(cuLaunchKernel(cuFilterCombineHalves,
xblocks , yblocks, 1, /* blocks */
xthreads, ythreads, 1, /* threads */
@@ -968,6 +969,7 @@ public:
/* Use the residual variance for a second filter pass. */
r = 4; f = 2;
+ k_2 = 1.0f;
void *filter_filteredA_args[] = {&d_sampleV, &d_bufferV, &d_sampleVV, &d_unfilteredA,
&rect,
&r, &f, &a, &k_2};
@@ -986,9 +988,10 @@ public:
cuda_assert(cuCtxSynchronize());
/* Combine the two double-filtered halves to a final shadow feature image and associated variance. */
+ var_r = 0;
void *final_prefiltered_args[] = {&d_mean, &d_variance,
&d_unfilteredA, &d_unfilteredB,
- &rect};
+ &rect, &var_r};
cuda_assert(cuLaunchKernel(cuFilterCombineHalves,
xblocks , yblocks, 1, /* blocks */
xthreads, ythreads, 1, /* threads */
diff --git a/intern/cycles/kernel/kernel_filter_pre.h b/intern/cycles/kernel/kernel_filter_pre.h
index 7f0cd30..b46753d 100644
--- a/intern/cycles/kernel/kernel_filter_pre.h
+++ b/intern/cycles/kernel/kernel_filter_pre.h
@@ -69,13 +69,35 @@ ccl_device void kernel_filter_get_feature(KernelGlobals *kg, int sample, float *
/* Combine A/B buffers.
* Calculates the combined mean and the buffer variance. */
-ccl_device void kernel_filter_combine_halves(int x, int y, float *mean, float *variance, float *a, float *b, int4 rect)
+ccl_device void kernel_filter_combine_halves(int x, int y, float *mean, float *variance, float *a, float *b, int4 rect, int r)
{
int buffer_w = align_up(rect.z - rect.x, 4);
int idx = (y-rect.y)*buffer_w + (x - rect.x);
if(mean) mean[idx] = 0.5f * (a[idx]+b[idx]);
- if(variance) variance[idx] = 0.5f * (a[idx]-b[idx])*(a[idx]-b[idx]);
+ if(variance) {
+ if(r == 0) variance[idx] = 0.5f * (a[idx]-b[idx])*(a[idx]-b[idx]);
+ else {
+ variance[idx] = 0.0f;
+ float values[25];
+ int numValues = 0;
+ for(int py = max(y-r, rect.y); py < min(y+r+1, rect.w); py++) {
+ for(int px = max(x-r, rect.x); px < min(x+r+1, rect.z); px++) {
+ int pidx = (py-rect.y)*buffer_w + (px-rect.x);
+ values[numValues++] = 0.5f * (a[pidx]-b[pidx])*(a[pidx]-b[pidx]);
+ }
+ }
+ /* Insertion-sort the variances (fast enough for 25 elements). */
+ for(int i = 1; i < numValues; i++) {
+ float v = values[i];
+ int j;
+ for(j = i-1; j >= 0 && values[j] > v; j--)
+ values[j+1] = values[j];
+ values[j+1] = v;
+ }
+ variance[idx] = values[(7*numValues)/8];
+ }
+ }
}
/* General Non-Local Means filter implementation.
diff --git a/intern/cycles/kernel/kernels/cpu/kernel_cpu.h b/intern/cycles/kernel/kernels/cpu/kernel_cpu.h
index 56897dc..cf632a7 100644
--- a/intern/cycles/kernel/kernels/cpu/kernel_cpu.h
+++ b/intern/cycles/kernel/kernels/cpu/kernel_cpu.h
@@ -93,7 +93,8 @@ void KERNEL_FUNCTION_FULL_NAME(filter_combine_halves)(int x, int y,
float *variance,
float *a,
float *b,
- int4 prefilter_rect);
+ int4 prefilter_rect,
+ int r);
void KERNEL_FUNCTION_FULL_NAME(filter_estimate_params)(KernelGlobals *kg,
int sample,
diff --git a/intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h b/intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h
index fdbda08..ce2e656 100644
--- a/intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h
+++ b/intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h
@@ -183,9 +183,10 @@ void KERNEL_FUNCTION_FULL_NAME(filter_combine_halves)(int x, int y,
float *variance,
float *a,
float *b,
- int4 prefilter_rect)
+ int4 prefilter_rect,
+ int r)
{
- kernel_filter_combine_halves(x, y, mean, variance, a, b, prefilter_rect);
+ kernel_filter_combine_halves(x, y, mean, variance, a, b, prefilter_rect, r);
}
void KERNEL_FUNCTION_FULL_NAME(filter_estimate_params)(KernelGlobals *kg,
diff --git a/intern/cycles/kernel/kernels/cuda/kernel.cu b/intern/cycles/kernel/kernels/cuda/kernel.cu
index 70f6c2c..2e5a396 100644
--- a/intern/cycles/kernel/kernels/cuda/kernel.cu
+++ b/intern/cycles/kernel/kernels/cuda/kernel.cu
@@ -251,12 +251,12 @@ kernel_cuda_filter_non_local_means(float *noisyImage, float *weightImage, float
extern "C" __global__ void
CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_MAX_REGISTERS)
-kernel_cuda_filter_combine_halves(float *mean, float *variance, float *a, float *b, int4 prefilter_rect)
+kernel_cuda_filter_combine_halves(float *mean, float *variance, float *a, float *b, int4 prefilter_rect, int r)
{
int x = prefilter_rect.x + blockDim.x*blockIdx.x + threadIdx.x;
int y = prefilter_rect.y + blockDim.y*blockIdx.y + threadIdx.y;
if(x < prefilter_rect.z && y < prefilter_rect.w) {
- kernel_filter_combine_halves(x, y, mean, variance, a, b, prefilter_rect);
+ kernel_filter_combine_halves(x, y, mean, variance, a, b, prefilter_rect, r);
}
}
More information about the Bf-blender-cvs
mailing list