[Bf-blender-cvs] [2b7e4be] soc-2016-cycles_denoising: Cycles: Move code to new and nicer file layout
Lukas Stockner
noreply at git.blender.org
Tue Nov 22 04:25:42 CET 2016
Commit: 2b7e4beaa822b71c8cee6459c0272eb9387cfd8b
Author: Lukas Stockner
Date: Tue Nov 22 03:34:19 2016 +0100
Branches: soc-2016-cycles_denoising
https://developer.blender.org/rB2b7e4beaa822b71c8cee6459c0272eb9387cfd8b
Cycles: Move code to new and nicer file layout
===================================================================
M intern/cycles/kernel/CMakeLists.txt
A intern/cycles/kernel/filter/filter.h
A intern/cycles/kernel/filter/filter_features.h
R057 intern/cycles/kernel/kernel_filter_util.h intern/cycles/kernel/filter/filter_features_sse.h
M intern/cycles/kernel/filter/filter_final_pass_impl.h
A intern/cycles/kernel/filter/filter_nlm.h
R063 intern/cycles/kernel/kernel_filter_pre.h intern/cycles/kernel/filter/filter_prefilter.h
A intern/cycles/kernel/filter/filter_wlr.h
A intern/cycles/kernel/filter/filter_wlr_cuda.h
A intern/cycles/kernel/filter/filter_wlr_sse.h
D intern/cycles/kernel/kernel_filter.h
M intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h
M intern/cycles/kernel/kernels/cuda/kernel.cu
===================================================================
diff --git a/intern/cycles/kernel/CMakeLists.txt b/intern/cycles/kernel/CMakeLists.txt
index 84a0636..5f263fa 100644
--- a/intern/cycles/kernel/CMakeLists.txt
+++ b/intern/cycles/kernel/CMakeLists.txt
@@ -61,9 +61,6 @@ set(SRC_HEADERS
kernel_differential.h
kernel_emission.h
kernel_film.h
- kernel_filter.h
- kernel_filter_pre.h
- kernel_filter_util.h
kernel_globals.h
kernel_image_opencl.h
kernel_jitter.h
@@ -210,7 +207,15 @@ set(SRC_SPLIT_HEADERS
)
set(SRC_FILTER_HEADERS
+ filter/filter.h
+ filter/filter_features.h
+ filter/filter_features_sse.h
filter/filter_final_pass_impl.h
+ filter/filter_nlm.h
+ filter/filter_prefilter.h
+ filter/filter_wlr.h
+ filter/filter_wlr_cuda.h
+ filter/filter_wlr_sse.h
)
# CUDA module
diff --git a/intern/cycles/kernel/filter/filter.h b/intern/cycles/kernel/filter/filter.h
new file mode 100644
index 0000000..1f5d682
--- /dev/null
+++ b/intern/cycles/kernel/filter/filter.h
@@ -0,0 +1,118 @@
+/*
+ * Copyright 2011-2016 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "util_atomic.h"
+#include "util_math_matrix.h"
+
+#include "filter_features.h"
+#ifdef __KERNEL_SSE3__
+# include "filter_features_sse.h"
+#endif // __KERNEL_SSE3__
+
+#include "filter_nlm.h"
+#include "filter_prefilter.h"
+
+/* Not all features are included in the matrix norm. */
+#define NORM_FEATURE_OFFSET 3
+#define NORM_FEATURE_NUM 8
+
+/* Large enough for a half-window of 10. */
+#define CUDA_WEIGHT_CACHE_SIZE 441
+
+#ifdef __KERNEL_CUDA__
+
+# include "filter_wlr_cuda.h"
+
+/* Define the final pass functions. */
+# define FUNCTION_NAME kernel_filter_final_pass_wlr
+# define WEIGHTING_WLR
+# undef WEIGHTING_NLM
+# undef WEIGHTING_NFOR
+# undef WEIGHT_CACHING_CPU
+# define WEIGHT_CACHING_CUDA
+# define OUTPUT_RENDERBUFFER
+# include "filter_final_pass_impl.h"
+
+# define FUNCTION_NAME kernel_filter_final_pass_nlm
+# undef WEIGHTING_WLR
+# define WEIGHTING_NLM
+# undef WEIGHTING_NFOR
+# undef WEIGHT_CACHING_CPU
+# define WEIGHT_CACHING_CUDA
+# define OUTPUT_RENDERBUFFER
+# include "filter_final_pass_impl.h"
+
+# if 0
+# define FUNCTION_NAME kernel_filter_final_pass_nfor
+# undef WEIGHTING_WLR
+# undef WEIGHTING_NLM
+# define WEIGHTING_NFOR
+# undef WEIGHT_CACHING_CPU
+# define WEIGHT_CACHING_CUDA
+# define OUTPUT_RENDERBUFFER
+# include "filter_final_pass_impl.h"
+# endif
+
+#else
+
+# ifdef __KERNEL_SSE3__
+# include "filter_wlr_sse.h"
+# else
+# include "filter_wlr.h"
+# endif // __KERNEL_SSE3__
+
+/* Define the final pass functions. */
+# define FUNCTION_NAME kernel_filter_final_pass_wlr
+# define WEIGHTING_WLR
+# undef WEIGHTING_NLM
+# undef WEIGHTING_NFOR
+# define WEIGHT_CACHING_CPU
+# undef WEIGHT_CACHING_CUDA
+# define OUTPUT_RENDERBUFFER
+# include "filter_final_pass_impl.h"
+
+# define FUNCTION_NAME kernel_filter_final_pass_nlm
+# undef WEIGHTING_WLR
+# define WEIGHTING_NLM
+# undef WEIGHTING_NFOR
+# define WEIGHT_CACHING_CPU
+# undef WEIGHT_CACHING_CUDA
+# define OUTPUT_RENDERBUFFER
+# include "filter_final_pass_impl.h"
+
+# if 0
+# define FUNCTION_NAME kernel_filter_final_pass_nfor
+# undef WEIGHTING_WLR
+# undef WEIGHTING_NLM
+# define WEIGHTING_NFOR
+# define WEIGHT_CACHING_CPU
+# undef WEIGHT_CACHING_CUDA
+# define OUTPUT_RENDERBUFFER
+# include "filter_final_pass_impl.h"
+# endif
+
+#endif // __KERNEL_CUDA__
+
+CCL_NAMESPACE_BEGIN
+
+ccl_device void kernel_filter_divide_combined(KernelGlobals *kg, int x, int y, int sample, float *buffers, int offset, int stride)
+{
+ float4 *combined_buffer = (float4*) (buffers + (offset + y*stride + x)*kernel_data.film.pass_stride);
+ float fac = sample / combined_buffer->w;
+ *combined_buffer = *combined_buffer * fac;
+}
+
+CCL_NAMESPACE_END
\ No newline at end of file
diff --git a/intern/cycles/kernel/filter/filter_features.h b/intern/cycles/kernel/filter/filter_features.h
new file mode 100644
index 0000000..8e35539
--- /dev/null
+++ b/intern/cycles/kernel/filter/filter_features.h
@@ -0,0 +1,228 @@
+/*
+ * Copyright 2011-2016 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ CCL_NAMESPACE_BEGIN
+
+#define ccl_get_feature(pass) buffer[(pass)*pass_stride]
+
+/* Loop over the pixels in the range [low.x, high.x) x [low.y, high.y).
+ * pixel_buffer always points to the current pixel in the first pass.
+ * cache_idx always points to the same pixel, but only if rect stays the same. */
+#ifdef DENOISE_TEMPORAL
+#define FOR_PIXEL_WINDOW pixel_buffer = buffer + (low.y - rect.y)*buffer_w + (low.x - rect.x); \
+ for(int t = 0, cache_idx = 0; t < num_frames; t++) { \
+ pixel.z = (t == 0)? 0: ((t <= prev_frames)? (t-prev_frames-1): (t - prev_frames)); \
+ for(pixel.y = low.y; pixel.y < high.y; pixel.y++) { \
+ for(pixel.x = low.x; pixel.x < high.x; pixel.x++, pixel_buffer++, cache_idx++) {
+
+#define END_FOR_PIXEL_WINDOW } \
+ pixel_buffer += buffer_w - (high.x - low.x); \
+ } \
+ pixel_buffer += buffer_w * (buffer_h - (high.y - low.y)); \
+ }
+#else
+#define FOR_PIXEL_WINDOW pixel_buffer = buffer + (low.y - rect.y)*buffer_w + (low.x - rect.x); \
+ for(pixel.y = low.y, cache_idx = 0; pixel.y < high.y; pixel.y++) { \
+ for(pixel.x = low.x; pixel.x < high.x; pixel.x++, pixel_buffer++, cache_idx++) {
+
+#define END_FOR_PIXEL_WINDOW } \
+ pixel_buffer += buffer_w - (high.x - low.x); \
+ }
+#endif
+
+ccl_device_inline void filter_get_features(int3 pixel, float ccl_readonly_ptr buffer, float *features, float *mean, int pass_stride)
+{
+ float *feature = features;
+ *(feature++) = pixel.x;
+ *(feature++) = pixel.y;
+#ifdef DENOISE_TEMPORAL
+ *(feature++) = pixel.z;
+#endif
+ *(feature++) = ccl_get_feature(6);
+ *(feature++) = ccl_get_feature(0);
+ *(feature++) = ccl_get_feature(2);
+ *(feature++) = ccl_get_feature(4);
+ *(feature++) = ccl_get_feature(8);
+ *(feature++) = ccl_get_feature(10);
+ *(feature++) = ccl_get_feature(12);
+ *(feature++) = ccl_get_feature(14);
+ if(mean) {
+ for(int i = 0; i < DENOISE_FEATURES; i++)
+ features[i] -= mean[i];
+ }
+#ifdef DENOISE_SECOND_ORDER_SCREEN
+ features[10] = features[0]*features[0];
+ features[11] = features[1]*features[1];
+ features[12] = features[0]*features[1];
+#endif
+}
+
+ccl_device_inline void filter_get_feature_variance(float ccl_readonly_ptr buffer, float *features, float *scale, int pass_stride)
+{
+ float *feature = features;
+ *(feature++) = 0.0f;
+ *(feature++) = 0.0f;
+#ifdef DENOISE_TEMPORAL
+ *(feature++) = 0.0f;
+#endif
+ *(feature++) = ccl_get_feature(7);
+ *(feature++) = ccl_get_feature(1);
+ *(feature++) = ccl_get_feature(3);
+ *(feature++) = ccl_get_feature(5);
+ *(feature++) = 0.0f;//ccl_get_feature(9);
+ *(feature++) = ccl_get_feature(11);
+ *(feature++) = ccl_get_feature(13);
+ *(feature++) = ccl_get_feature(15);
+#ifdef DENOISE_SECOND_ORDER_SCREEN
+ features[10] = 0.0f;
+ features[11] = 0.0f;
+ features[12] = 0.0f;
+#endif
+ for(int i = 0; i < DENOISE_FEATURES; i++)
+ features[i] *= scale[i]*scale[i];
+}
+
+ccl_device_inline void filter_get_feature_scales(int3 pixel, float ccl_readonly_ptr buffer, float *scales, float *mean, int pass_stride)
+{
+ *(scales++) = fabsf(pixel.x - *(mean++)); //X
+ *(scales++) = fabsf(pixel.y - *(mean++)); //Y
+#ifdef DENOISE_TEMPORAL
+ *(scales++) = fabsf(pixel.z - *(mean++)); //T
+#endif
+
+ *(scales++) = fabsf(ccl_get_feature(6) - *(mean++)); //Depth
+
+ float normalS = len_squared(make_float3(ccl_get_feature(0) - mean[0], ccl_get_feature(2) - mean[1], ccl_get_feature(4) - mean[2]));
+ mean += 3;
+ *(scales++) = normalS; //NormalX
+ *(scales++) = normalS; //NormalY
+ *(scales++) = normalS; //NormalZ
+
+ *(scales++) = fabsf(ccl_get_feature(8) - *(mean++)); //Shadow
+
+ float normalT = len_squared(make_float3(ccl_get_feature(10) - mean[0], ccl_get_feature(12) - mean[1], ccl_get_feature(14) - mean[2]));
+ mean += 3;
+ *(scales++) = normalT; //AlbedoR
+ *(scales++) = normalT; //AlbedoG
+ *(scales++) = normalT; //AlbedoB
+}
+
+ccl_device_inline void filter_calculate_scale(float *scale)
+{
+ scale[0] = 1.0f/max(scale[0], 0.01f); //X
+ scale[1] = 1.0f/max(scale[1], 0.01f); //Y
+ scale += 2;
+#ifdef DENOISE_TEMPORAL
+ scale[0] = 1.0f/max(scale[0], 0.01f); //T
+ scale++;
+#endif
+
+ scale[0] = 1.0f/max(scale[0], 0.01f); //Depth
+
+ scale[1] = 1.0f/max(sqrtf(scale[1]), 0.01f); //NormalX
+ scale[2] = 1.0f/max(sqrtf(scale[2]), 0.01f); //NormalY
+ scale[3] = 1.0f/max(sqrtf(scale[3]), 0.01f); //NormalZ
+
+ scale[4] = 1.0f/max(scale[4], 0.01f); //Shadow
+
+ scale[5] = 1.0f/max(sqrtf(scale[5]), 0.01f); //AlbedoR
+ scale[6] = 1.0f/max(sqrtf(scale[6]), 0.01f); //AlbedoG
+ scale[7] = 1.0f/max(sqrtf(scale[7]), 0.01f); //AlbedoB
+}
+
+ccl_device_inline float3 filter_get_pixel_color(float ccl_readonly_ptr buffer, int pass_stride)
+{
+ return m
@@ Diff output truncated at 10240 characters. @@
More information about the Bf-blender-cvs
mailing list