[Bf-blender-cvs] [4bc56ca9b1] soc-2016-cycles_denoising: Cycles Denoising: Move denoising kernels to a separate compilation unit
Lukas Stockner
noreply at git.blender.org
Sat Feb 4 04:09:03 CET 2017
Commit: 4bc56ca9b1a414cb326b759ff65979e2ce40f966
Author: Lukas Stockner
Date: Thu Feb 2 05:45:09 2017 +0100
Branches: soc-2016-cycles_denoising
https://developer.blender.org/rB4bc56ca9b1a414cb326b759ff65979e2ce40f966
Cycles Denoising: Move denoising kernels to a separate compilation unit
Previously, the denoising kernels were just included with the other kernels.
However, that is not ideal, since the kernels already take very long to compile. Also, it isn't needed since the rendering and denoising kernels share basically no code.
So, this commit adds intern/cycles/filter/, which contains the filtering kernels.
===================================================================
M build_files/cmake/macros.cmake
M intern/cycles/CMakeLists.txt
M intern/cycles/app/CMakeLists.txt
M intern/cycles/device/CMakeLists.txt
M intern/cycles/device/device_cpu.cpp
M intern/cycles/device/device_cuda.cpp
A intern/cycles/filter/CMakeLists.txt
A intern/cycles/filter/filter.h
A intern/cycles/filter/filter_compat_cpu.h
A intern/cycles/filter/filter_compat_cuda.h
A intern/cycles/filter/filter_compat_opencl.h
A intern/cycles/filter/filter_defines.h
R093 intern/cycles/kernel/filter/filter_features.h intern/cycles/filter/filter_features.h
R098 intern/cycles/kernel/filter/filter_features_sse.h intern/cycles/filter/filter_features_sse.h
A intern/cycles/filter/filter_kernel.h
R079 intern/cycles/kernel/filter/filter_nlm_cpu.h intern/cycles/filter/filter_nlm_cpu.h
R091 intern/cycles/kernel/filter/filter_nlm_gpu.h intern/cycles/filter/filter_nlm_gpu.h
R071 intern/cycles/kernel/filter/filter_prefilter.h intern/cycles/filter/filter_prefilter.h
R063 intern/cycles/kernel/filter/filter_final_pass_impl.h intern/cycles/filter/filter_reconstruction.h
R065 intern/cycles/kernel/filter/filter_wlr.h intern/cycles/filter/filter_transform.h
R066 intern/cycles/kernel/filter/filter_wlr_cuda.h intern/cycles/filter/filter_transform_cuda.h
R064 intern/cycles/kernel/filter/filter_wlr_sse.h intern/cycles/filter/filter_transform_sse.h
A intern/cycles/filter/kernels/cpu/filter.cpp
A intern/cycles/filter/kernels/cpu/filter_avx.cpp
A intern/cycles/filter/kernels/cpu/filter_avx2.cpp
A intern/cycles/filter/kernels/cpu/filter_cpu.h
A intern/cycles/filter/kernels/cpu/filter_cpu_impl.h
A intern/cycles/filter/kernels/cpu/filter_sse2.cpp
A intern/cycles/filter/kernels/cpu/filter_sse3.cpp
A intern/cycles/filter/kernels/cpu/filter_sse41.cpp
A intern/cycles/filter/kernels/cuda/filter.cu
A intern/cycles/filter/kernels/opencl/filter.cl
M intern/cycles/kernel/CMakeLists.txt
D intern/cycles/kernel/filter/filter.h
M intern/cycles/kernel/kernel_compat_cpu.h
M intern/cycles/kernel/kernel_compat_cuda.h
M intern/cycles/kernel/kernel_types.h
M intern/cycles/kernel/kernels/cpu/kernel_cpu.h
M intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h
M intern/cycles/kernel/kernels/cuda/kernel.cu
A intern/cycles/util/util_cuda.h
A intern/cycles/util/util_cuda_capabilities.h
===================================================================
diff --git a/build_files/cmake/macros.cmake b/build_files/cmake/macros.cmake
index 09428953a4..4ebeebbf1f 100644
--- a/build_files/cmake/macros.cmake
+++ b/build_files/cmake/macros.cmake
@@ -536,6 +536,7 @@ function(SETUP_BLENDER_SORTED_LIBS)
cycles_bvh
cycles_device
cycles_kernel
+ cycles_filter
cycles_util
cycles_subd)
if(WITH_CYCLES_OSL)
@@ -651,6 +652,7 @@ function(SETUP_BLENDER_SORTED_LIBS)
cycles_bvh
cycles_device
cycles_kernel
+ cycles_filter
cycles_util
cycles_subd
bf_intern_opencolorio
diff --git a/intern/cycles/CMakeLists.txt b/intern/cycles/CMakeLists.txt
index 29285c6ad6..f0505795dc 100644
--- a/intern/cycles/CMakeLists.txt
+++ b/intern/cycles/CMakeLists.txt
@@ -251,6 +251,7 @@ endif()
add_subdirectory(bvh)
add_subdirectory(device)
add_subdirectory(doc)
+add_subdirectory(filter)
add_subdirectory(graph)
add_subdirectory(kernel)
add_subdirectory(render)
diff --git a/intern/cycles/app/CMakeLists.txt b/intern/cycles/app/CMakeLists.txt
index ff72bd772a..9b78e168d4 100644
--- a/intern/cycles/app/CMakeLists.txt
+++ b/intern/cycles/app/CMakeLists.txt
@@ -18,6 +18,7 @@ set(INC_SYS
set(LIBRARIES
cycles_device
cycles_kernel
+ cycles_filter
cycles_render
cycles_bvh
cycles_subd
diff --git a/intern/cycles/device/CMakeLists.txt b/intern/cycles/device/CMakeLists.txt
index 966ff5e52b..2d20e7138f 100644
--- a/intern/cycles/device/CMakeLists.txt
+++ b/intern/cycles/device/CMakeLists.txt
@@ -3,6 +3,7 @@ set(INC
.
../graph
../kernel
+ ../filter
../kernel/svm
../kernel/osl
../util
diff --git a/intern/cycles/device/device_cpu.cpp b/intern/cycles/device/device_cpu.cpp
index e449d97c86..a1c77894fe 100644
--- a/intern/cycles/device/device_cpu.cpp
+++ b/intern/cycles/device/device_cpu.cpp
@@ -32,6 +32,8 @@
#include "kernel_types.h"
#include "kernel_globals.h"
+#include "filter.h"
+
#include "osl_shader.h"
#include "osl_globals.h"
@@ -133,11 +135,11 @@ public:
KernelFunctions<void(*)(KernelGlobals *, uchar4 *, float *, float, int, int, int, int)> convert_to_half_float_kernel;
KernelFunctions<void(*)(KernelGlobals *, uchar4 *, float *, float, int, int, int, int)> convert_to_byte_kernel;
KernelFunctions<void(*)(KernelGlobals *, uint4 *, float4 *, float*, int, int, int, int, int)> shader_kernel;
- KernelFunctions<void(*)(KernelGlobals*, int, float**, int, int, int*, int*, int*, int*, float*, float*, float*, float*, int*)> filter_divide_shadow_kernel;
- KernelFunctions<void(*)(KernelGlobals*, int, float**, int, int, int, int, int*, int*, int*, int*, float*, float*, int*)> filter_get_feature_kernel;
- KernelFunctions<void(*)(int, int, float*, float*, float*, float*, int*, int)> filter_combine_halves_kernel;
- KernelFunctions<void(*)(KernelGlobals*, int, float*, int, int, void*, int*)> filter_construct_transform_kernel;
- KernelFunctions<void(*)(KernelGlobals*, int, int, int, float*, int, int)> filter_divide_combined_kernel;
+
+ KernelFunctions<void(*)(int, float**, int, int, int*, int*, int*, int*, float*, float*, float*, float*, int*, int, int, int, bool)> filter_divide_shadow_kernel;
+ KernelFunctions<void(*)(int, float**, int, int, int, int, int*, int*, int*, int*, float*, float*, int*, int, int, bool)> filter_get_feature_kernel;
+ KernelFunctions<void(*)(int, int, float*, float*, float*, float*, int*, int)> filter_combine_halves_kernel;
+ KernelFunctions<void(*)(int, int, int, float*, int, int, int, int)> filter_divide_combined_kernel;
KernelFunctions<void(*)(int, int, float*, float*, float*, int*, int, int, float, float)> filter_nlm_calc_difference_kernel;
KernelFunctions<void(*)(float*, float*, int*, int, int)> filter_nlm_blur_kernel;
@@ -145,8 +147,9 @@ public:
KernelFunctions<void(*)(int, int, float*, float*, float*, float*, int*, int, int)> filter_nlm_update_output_kernel;
KernelFunctions<void(*)(float*, float*, int*, int)> filter_nlm_normalize_kernel;
- KernelFunctions<void(*)(int, int, float*, float*, int, int, void*, float*, float3*, int*, int*, int, int, int)> filter_nlm_construct_gramian_kernel;
- KernelFunctions<void(*)(int, int, int, int, int, float*, void*, float*, float3*, int*, int)> filter_finalize_kernel;
+ KernelFunctions<void(*)(int, float*, int, int, int, float*, int*, int*, int, float, int, int)> filter_construct_transform_kernel;
+ KernelFunctions<void(*)(int, int, float*, float*, int, int, float*, int*, float*, float3*, int*, int*, int, int, int)> filter_nlm_construct_gramian_kernel;
+ KernelFunctions<void(*)(int, int, int, int, int, float*, int*, float*, float3*, int*, int)> filter_finalize_kernel;
#define KERNEL_FUNCTIONS(name) \
KERNEL_NAME_EVAL(cpu, name), \
@@ -165,13 +168,13 @@ public:
filter_divide_shadow_kernel(KERNEL_FUNCTIONS(filter_divide_shadow)),
filter_get_feature_kernel(KERNEL_FUNCTIONS(filter_get_feature)),
filter_combine_halves_kernel(KERNEL_FUNCTIONS(filter_combine_halves)),
- filter_construct_transform_kernel(KERNEL_FUNCTIONS(filter_construct_transform)),
filter_divide_combined_kernel(KERNEL_FUNCTIONS(filter_divide_combined)),
filter_nlm_calc_difference_kernel(KERNEL_FUNCTIONS(filter_nlm_calc_difference)),
filter_nlm_blur_kernel(KERNEL_FUNCTIONS(filter_nlm_blur)),
filter_nlm_calc_weight_kernel(KERNEL_FUNCTIONS(filter_nlm_calc_weight)),
filter_nlm_update_output_kernel(KERNEL_FUNCTIONS(filter_nlm_update_output)),
filter_nlm_normalize_kernel(KERNEL_FUNCTIONS(filter_nlm_normalize)),
+ filter_construct_transform_kernel(KERNEL_FUNCTIONS(filter_construct_transform)),
filter_nlm_construct_gramian_kernel(KERNEL_FUNCTIONS(filter_nlm_construct_gramian)),
filter_finalize_kernel(KERNEL_FUNCTIONS(filter_finalize))
{
@@ -318,10 +321,15 @@ public:
float* denoise_fill_buffer(KernelGlobals *kg, int sample, int4 rect, float** buffers, int* tile_x, int* tile_y, int *offsets, int *strides, int frames, int *frame_strides)
{
- bool cross_denoise = kg->__data.film.denoise_cross;
+ bool use_cross_denoising = kg->__data.film.denoise_cross;
+ bool use_gradients = kg->__data.integrator.use_gradients;
+ int buffer_pass_stride = kg->__data.film.pass_stride;
+ int buffer_denoising_offset = kg->__data.film.pass_denoising;
+ int num_frames = 1;
+
int w = align_up(rect.z - rect.x, 4), h = (rect.w - rect.y);
int pass_stride = w*h*frames;
- int passes = cross_denoise? 20 : 14;
+ int passes = use_cross_denoising? 20 : 14;
float *filter_buffers = new float[passes*pass_stride];
memset(filter_buffers, 0, sizeof(float)*passes*pass_stride);
@@ -362,7 +370,7 @@ public:
/* Get the A/B unfiltered passes, the combined sample variance, the estimated variance of the sample variance and the buffer variance. */
for(int y = rect.y; y < rect.w; y++) {
for(int x = rect.x; x < rect.z; x++) {
- filter_divide_shadow_kernel()(kg, sample, buffer, x, y, tile_x, tile_y, offsets, strides, unfilteredA, sampleV, sampleVV, bufferV, &rect.x);
+ filter_divide_shadow_kernel()(sample, buffer, x, y, tile_x, tile_y, offsets, strides, unfilteredA, sampleV, sampleVV, bufferV, &rect.x, buffer_pass_stride, buffer_denoising_offset, num_frames, use_gradients);
}
}
debug.add_pass("shadowUnfilteredA", unfilteredA);
@@ -421,7 +429,7 @@ public:
for(int i = 0; i < 7; i++) {
for(int y = rect.y; y < rect.w; y++) {
for(int x = rect.x; x < rect.z; x++) {
- filter_get_feature_kernel()(kg, sample, buffer, mean_from[i], variance_from[i], x, y, tile_x, tile_y, offsets, strides, unfiltered, variance, &rect.x);
+ filter_get_feature_kernel()(sample, buffer, mean_from[i], variance_from[i], x, y, tile_x, tile_y, offsets, strides, unfiltered, variance, &rect.x, buffer_pass_stride, buffer_denoising_offset, use_cross_denoising);
}
}
non_local_means(rect, unfiltered, unfiltered, PASSPTR(mean_to[i]), variance, nlm_temp1, nlm_temp2, nlm_temp3, 2, 2, 1, 0.25f);
@@ -439,10 +447,10 @@ public:
int variance_from[] = {23, 24, 25, 29, 30, 31};
int mean_to[] = { 8, 9, 10, 14, 15, 16};
int variance_to[] = {11, 12, 13, 17, 18, 19};
- for(int i = 0; i < (cross_denoise? 6 : 3); i++) {
+ for(int i = 0; i < (use_cross_denoising? 6 : 3); i++) {
for(int y = rect.y; y < rect.w; y++) {
for(int x = rect.x; x < rect.z; x++) {
- filter_get_feature_kernel()(kg, sample, buffer, mean_from[i], variance_from[i], x, y, tile_x, tile_y, offsets, strides, PASSPTR(mean_to[i]), PASSPTR(variance_to[i]), &rect.x);
+ filter_get_feature_kernel()(sample, buffer, mean_from[i], variance_from[i], x, y, tile_x, tile_y, offsets, strides, PASSPTR(mean_to[i]), PASSPTR(variance_to[i]), &rect.x, buffer_pass_stride, buffer_denoising_offset, use_cross_denoising);
}
}
}
@@ -480,25 +488,28 @@ public:
scoped_fpe fpe(FPE_ENABLED);
#endif
- bool cross_denoise = kg->__data.film.denoise_cross;
-
- int hw = kg->__data.integrator.half_window;
- int storage_num = filter_area.z*filter_area.w;
- FilterStorage *storage = new FilterStorage[storage_num];
+ bool use_cross_denoising = kg->__data.film.denoise_cross;
+ int half_window = kg->__data.integrator.half_window;
+ float pca_threshold = kg->__data.integrator.filter_strength;
+ int num_frames = 1; /* TODO(lukas) */
+ int prev_frames = 0;
int w = align_up(rect.z - rect.x, 4), h = (rect.w - rect.y);
int pass_stride = w*h;
- float *XtWX = new float[(DENOISE_FEATURES+1)*(DENOISE_FEATURES+1)*storage_num];
- float3 *XtWY = new float3[(DENOISE_FEATURES+1)*storage_num];
+ int storage_num = filter_area.z*filter_area.w;
+ float *XtWX = new float[XTWX_SIZE*storage_num];
+ float3 *XtWY = new float3[XTWY_SIZE*storage_num];
+ float *transform = new float[TRANSFORM_SIZE*storage_num];
+ int *rank = new int[storage_num];
for(int y = 0; y < fil
@@ Diff output truncated at 10240 characters. @@
More information about the Bf-blender-cvs
mailing list