[Bf-blender-cvs] [af5a75bae0] soc-2016-cycles_denoising: Cycles Denoising: Remove WLR and experimental NFOR modes
Lukas Stockner
noreply at git.blender.org
Thu Jan 12 05:14:09 CET 2017
Commit: af5a75bae0a2e877ec22f0bb1df302535421802f
Author: Lukas Stockner
Date: Wed Jan 11 07:01:52 2017 +0100
Branches: soc-2016-cycles_denoising
https://developer.blender.org/rBaf5a75bae0a2e877ec22f0bb1df302535421802f
Cycles Denoising: Remove WLR and experimental NFOR modes
The denoising code started out as an implementation of WLR, but the NLM mode is working so much better that I decided to remove the WLR mode completely.
This allows to get rid of a significant amount of complexity and code.
Also, the NFOR mode is removed - the name is misleading, most of the ideas behind the NFOR paper are actually what powers the NLM mode. NFOR mode was just an experiment with removing the T-SVD feature space reduction, and it turned out that the experiment had failed.
===================================================================
M intern/cycles/blender/addon/ui.py
M intern/cycles/blender/blender_session.cpp
M intern/cycles/device/device_cpu.cpp
M intern/cycles/device/device_cuda.cpp
M intern/cycles/kernel/filter/filter.h
M intern/cycles/kernel/filter/filter_final_pass_impl.h
M intern/cycles/kernel/filter/filter_wlr.h
M intern/cycles/kernel/filter/filter_wlr_cuda.h
M intern/cycles/kernel/filter/filter_wlr_sse.h
M intern/cycles/kernel/kernel_types.h
M intern/cycles/kernel/kernels/cpu/kernel_cpu.h
M intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h
M intern/cycles/kernel/kernels/cuda/kernel.cu
M intern/cycles/render/denoising.cpp
M intern/cycles/render/integrator.cpp
M intern/cycles/render/integrator.h
M intern/cycles/render/session.cpp
M intern/cycles/render/session.h
M source/blender/makesdna/DNA_scene_types.h
M source/blender/makesrna/intern/rna_scene.c
===================================================================
diff --git a/intern/cycles/blender/addon/ui.py b/intern/cycles/blender/addon/ui.py
index 659d068321..b1c23609f6 100644
--- a/intern/cycles/blender/addon/ui.py
+++ b/intern/cycles/blender/addon/ui.py
@@ -579,7 +579,6 @@ class CyclesRender_PT_denoising(CyclesButtonsPanel, Panel):
sub.prop(rl, "filter_strength", slider=True)
sub.prop(rl, "filter_weighting_adjust", slider=True)
sub.prop(rl, "filter_gradients")
- sub.prop(rl, "filter_use_nlm_weights")
sub.prop(rl, "filter_cross")
sub = col.column(align=True)
diff --git a/intern/cycles/blender/blender_session.cpp b/intern/cycles/blender/blender_session.cpp
index d1ec0910d3..2982c7b286 100644
--- a/intern/cycles/blender/blender_session.cpp
+++ b/intern/cycles/blender/blender_session.cpp
@@ -468,9 +468,7 @@ void BlenderSession::render()
buffer_params.cross_denoising = scene->film->cross_denoising;
scene->integrator->half_window = b_layer_iter->half_window();
scene->integrator->filter_strength = powf(2.0f, b_layer_iter->filter_strength());
- scene->integrator->weighting_adjust = powf(2.0f, b_layer_iter->filter_weighting_adjust());
- scene->integrator->use_nlm_weights = b_layer_iter->filter_use_nlm_weights();
- if(scene->integrator->use_nlm_weights) scene->integrator->weighting_adjust /= 2.0f;
+ scene->integrator->weighting_adjust = powf(2.0f, b_layer_iter->filter_weighting_adjust() - 1.0f);
scene->integrator->use_gradients = b_layer_iter->filter_gradients();
scene->film->pass_alpha_threshold = b_layer_iter->pass_alpha_threshold();
@@ -1389,13 +1387,12 @@ void BlenderSession::denoise(BL::RenderResult& b_rr)
int half_window = -1;
float filter_strength = 0.0f;
float weight_adjust = 0.0f;
- bool filter_nlm, filter_gradient;
+ bool filter_gradient;
for(r.layers.begin(b_s_layer_iter); b_s_layer_iter != r.layers.end(); ++b_s_layer_iter) {
if(b_s_layer_iter->name() == b_layer_iter->name()) {
half_window = b_s_layer_iter->half_window();
filter_strength = b_s_layer_iter->filter_strength();
weight_adjust = b_s_layer_iter->filter_weighting_adjust();
- filter_nlm = b_s_layer_iter->filter_use_nlm_weights();
filter_gradient = b_s_layer_iter->filter_gradients();
break;
}
@@ -1405,10 +1402,8 @@ void BlenderSession::denoise(BL::RenderResult& b_rr)
session->params.half_window = half_window;
session->params.samples = get_int(cscene, "samples");
session->params.filter_strength = powf(2.0f, filter_strength);
- session->params.filter_weight_adjust = powf(2.0f, weight_adjust);
+ session->params.filter_weight_adjust = powf(2.0f, weight_adjust - 1.0f);
session->params.filter_gradient = filter_gradient;
- session->params.filter_nlm = filter_nlm;
- if(filter_nlm) session->params.filter_weight_adjust /= 2.0f;
session->buffers = BlenderSync::get_render_buffer(session->device, *b_layer_iter, b_rr, session->params.samples);
diff --git a/intern/cycles/device/device_cpu.cpp b/intern/cycles/device/device_cpu.cpp
index c8203fe802..31191f3c84 100644
--- a/intern/cycles/device/device_cpu.cpp
+++ b/intern/cycles/device/device_cpu.cpp
@@ -138,9 +138,7 @@ public:
KernelFunctions<void(*)(int, int, float*, float*, float*, float*, int*, int, int, float, float)> filter_non_local_means_kernel;
KernelFunctions<void(*)(int, int, float*, float*, float*, float*, int*, int)> filter_combine_halves_kernel;
KernelFunctions<void(*)(KernelGlobals*, int, float*, int, int, void*, int*)> filter_construct_transform_kernel;
- KernelFunctions<void(*)(KernelGlobals*, int, float*, int, int, void*, int*)> filter_estimate_wlr_params_kernel;
- KernelFunctions<void(*)(KernelGlobals*, int, float*, int, int, int, int, float*, void*, float*, int*, int*)> filter_final_pass_wlr_kernel;
- KernelFunctions<void(*)(KernelGlobals*, int, float*, int, int, int, int, float*, void*, float*, int*, int*)> filter_final_pass_nlm_kernel;
+ KernelFunctions<void(*)(KernelGlobals*, int, float*, int, int, int, int, float*, void*, float*, int*, int*)> filter_reconstruct_kernel;
KernelFunctions<void(*)(KernelGlobals*, int, int, int, float*, int, int)> filter_divide_combined_kernel;
#define KERNEL_FUNCTIONS(name) \
@@ -162,9 +160,7 @@ public:
filter_non_local_means_kernel(KERNEL_FUNCTIONS(filter_non_local_means)),
filter_combine_halves_kernel(KERNEL_FUNCTIONS(filter_combine_halves)),
filter_construct_transform_kernel(KERNEL_FUNCTIONS(filter_construct_transform)),
- filter_estimate_wlr_params_kernel(KERNEL_FUNCTIONS(filter_estimate_wlr_params)),
- filter_final_pass_wlr_kernel(KERNEL_FUNCTIONS(filter_final_pass_wlr)),
- filter_final_pass_nlm_kernel(KERNEL_FUNCTIONS(filter_final_pass_nlm)),
+ filter_reconstruct_kernel(KERNEL_FUNCTIONS(filter_reconstruct)),
filter_divide_combined_kernel(KERNEL_FUNCTIONS(filter_divide_combined))
{
#ifdef WITH_OSL
@@ -454,7 +450,6 @@ public:
void denoise_run(KernelGlobals *kg, int sample, float *filter_buffer, int4 filter_area, int4 rect, int offset, int stride, float *buffers)
{
bool use_gradients = kg->__data.integrator.use_gradients;
- bool nlm_weights = kg->__data.integrator.use_nlm_weights;
int hw = kg->__data.integrator.half_window;
FilterStorage *storage = new FilterStorage[filter_area.z*filter_area.w];
@@ -463,47 +458,11 @@ public:
int w = align_up(rect.z - rect.x, 4), h = (rect.w - rect.y);
int pass_stride = w*h;
- if(nlm_weights) {
- for(int y = 0; y < filter_area.w; y++) {
- for(int x = 0; x < filter_area.z; x++) {
- filter_construct_transform_kernel()(kg, sample, filter_buffer, x + filter_area.x, y + filter_area.y, storage + y*filter_area.z + x, &rect.x);
- filter_final_pass_nlm_kernel()(kg, sample, filter_buffer, x + filter_area.x, y + filter_area.y, offset, stride, buffers, storage + y*filter_area.z + x, weight_cache, &filter_area.x, &rect.x);
- }
- }
- }
- else {
- for(int y = 0; y < filter_area.w; y++) {
- for(int x = 0; x < filter_area.z; x++) {
- filter_construct_transform_kernel()(kg, sample, filter_buffer, x + filter_area.x, y + filter_area.y, storage + y*filter_area.z + x, &rect.x);
- filter_estimate_wlr_params_kernel()(kg, sample, filter_buffer, x + filter_area.x, y + filter_area.y, storage + y*filter_area.z + x, &rect.x);
- }
+ for(int y = 0; y < filter_area.w; y++) {
+ for(int x = 0; x < filter_area.z; x++) {
+ filter_construct_transform_kernel()(kg, sample, filter_buffer, x + filter_area.x, y + filter_area.y, storage + y*filter_area.z + x, &rect.x);
+ filter_reconstruct_kernel()(kg, sample, filter_buffer, x + filter_area.x, y + filter_area.y, offset, stride, buffers, storage + y*filter_area.z + x, weight_cache, &filter_area.x, &rect.x);
}
-#ifdef WITH_CYCLES_DEBUG_FILTER
- DenoiseDebug debug(filter_area.z, filter_area.w, 4 * DENOISE_FEATURES + 6);
-
-#define WRITE_DEBUG(name, var) debug.add_pass(name, &storage[0].var, sizeof(FilterStorage)/sizeof(float), filter_area.z);
- for(int i = 0; i < DENOISE_FEATURES; i++) {
- WRITE_DEBUG(string_printf("mean_%d", i), means[i]);
- WRITE_DEBUG(string_printf("scale_%d", i), scales[i]);
- WRITE_DEBUG(string_printf("singular_%d", i), singular[i]);
- WRITE_DEBUG(string_printf("bandwidth_%d", i), bandwidth[i]);
- }
- WRITE_DEBUG("singular_threshold", singular_threshold);
- WRITE_DEBUG("feature_matrix_norm", feature_matrix_norm);
- WRITE_DEBUG("global_bandwidth", global_bandwidth);
-#endif
- for(int y = 0; y < filter_area.w; y++) {
- for(int x = 0; x < filter_area.z; x++) {
- filter_final_pass_wlr_kernel()(kg, sample, filter_buffer, x + filter_area.x, y + filter_area.y, offset, stride, buffers, storage + y*filter_area.z + x, weight_cache, &filter_area.x, &rect.x);
- }
- }
-#ifdef WITH_CYCLES_DEBUG_FILTER
- WRITE_DEBUG("filtered_global_bandwidth", filtered_global_bandwidth);
- WRITE_DEBUG("sum_weight", sum_weight);
- WRITE_DEBUG("log_rmse_per_sample", log_rmse_per_sample);
- debug.write(string_printf("debug_%dx%d.exr", filter_area.x, filter_area.y));
-#undef WRITE_DEBUG
-#endif
}
if(use_gradients) {
diff --git a/intern/cycles/device/device_cuda.cpp b/intern/cycles/device/device_cuda.cpp
index 083db4980c..27e5b7af5b 100644
--- a/intern/cycles/device/device_cuda.cpp
+++ b/intern/cycles/device/device_cuda.cpp
@@ -838,8 +838,7 @@ public:
cuda_push_context();
CUfunction cuFilterDivideShadow, cuFilterGetFeature, cuFilterNonLocalMeans, cuFilterCombineHalves;
- CUfunction cuFilterConstructTransform, cuFilterEstimateBandwidths, cuFilterEstimateBiasVariance, cuFilterCalculateBandwidth;
- CUfunction cuFilterFinalPassWLR, cuFilterFinalPassNLM, cuFilterDivideCombined;
+ CUfunction cuFilterConstructTransform, cuFilterReconstruct, cuFilterDivideCombined;
CUdeviceptr d_buffers = cuda_device_ptr(rtile.buffer);
cuda_assert(cuModuleGetFunction(&cuFilterDivideShadow, cuModule, "kernel_cuda_filter_divide_shadow"));
@@ -848,11 +847,7 @@ public:
cuda_assert(cuModuleGetFunction(&cuFilterCombineHalves, cuModule, "kernel_cuda_filter_combine_halves"));
cuda_assert(cuModuleGetFunction(&cuFilterConstructTransform, cuModule, "kernel_cuda_filter_construct_transform"));
- cuda_assert(cuModuleGetFunction(&cuFilterEstimateBandwidths, cuModule, "kernel_cuda_filter_estimate_bandwidths"));
- cuda_assert(cuModuleGetFunction(&cuFilterEstimateBiasVariance, cuModule, "kernel_cuda_filter_estimate_bias_variance"));
- cuda_assert(cuModuleGetFunction(&cuFilterCalculateBandwidth, cuModule, "kernel_cuda_filter_calculate_bandwidth"));
- cuda_assert(cuModuleGetFunction(&cuFilterFinalPassWLR, cuModule, "kernel_cuda_filter_final_pass_wlr"));
- cuda_assert(cuModuleGetFunction(&cuFilterFinalPassNLM, cuModule, "kernel_cuda_filter_final_pass_nlm"));
+ cuda_assert(cuModuleGetFunction(&cuFilterReconstruct, cuModule, "kernel_cuda_filter_reconstruct"));
cuda_assert(cuModuleGetFunction(&cuFilterDivideCombined, cuModule, "kernel_cuda_filter_divide_combined"));
cuda_assert(cuFuncSetCacheConfig(cuFilterDivideShadow, CU_FU
@@ Diff output truncated at 10240 characters. @@
More information about the Bf-blender-cvs
mailing list