[Bf-blender-cvs] [2f6db0e227] soc-2016-cycles_denoising: Cycles Denoising: Use device-independent denoising in the CPUDevice
Lukas Stockner
noreply at git.blender.org
Thu Feb 9 14:39:38 CET 2017
Commit: 2f6db0e227d8835bc4b2ec5d0e181c5cf29da7dc
Author: Lukas Stockner
Date: Wed Feb 8 16:59:38 2017 +0100
Branches: soc-2016-cycles_denoising
https://developer.blender.org/rB2f6db0e227d8835bc4b2ec5d0e181c5cf29da7dc
Cycles Denoising: Use device-independent denoising in the CPUDevice
===================================================================
M intern/cycles/device/device_cpu.cpp
M intern/cycles/device/device_cuda.cpp
M intern/cycles/filter/filter_features.h
M intern/cycles/filter/filter_nlm_cpu.h
M intern/cycles/filter/filter_nlm_gpu.h
M intern/cycles/filter/filter_prefilter.h
M intern/cycles/filter/filter_reconstruction.h
M intern/cycles/filter/kernels/cpu/filter_cpu.h
M intern/cycles/filter/kernels/cpu/filter_cpu_impl.h
M intern/cycles/filter/kernels/cuda/filter.cu
===================================================================
diff --git a/intern/cycles/device/device_cpu.cpp b/intern/cycles/device/device_cpu.cpp
index ebd4acb1e5..bd5630ae95 100644
--- a/intern/cycles/device/device_cpu.cpp
+++ b/intern/cycles/device/device_cpu.cpp
@@ -26,6 +26,7 @@
#include "device.h"
#include "device_intern.h"
+#include "device_denoising.h"
#include "kernel.h"
#include "kernel_compat_cpu.h"
@@ -136,10 +137,10 @@ public:
KernelFunctions<void(*)(KernelGlobals *, uchar4 *, float *, float, int, int, int, int)> convert_to_byte_kernel;
KernelFunctions<void(*)(KernelGlobals *, uint4 *, float4 *, float*, int, int, int, int, int)> shader_kernel;
- KernelFunctions<void(*)(int, float**, int, int, int*, int*, int*, int*, float*, float*, float*, float*, int*, int, int, int, bool)> filter_divide_shadow_kernel;
- KernelFunctions<void(*)(int, float**, int, int, int, int, int*, int*, int*, int*, float*, float*, int*, int, int, bool)> filter_get_feature_kernel;
- KernelFunctions<void(*)(int, int, float*, float*, float*, float*, int*, int)> filter_combine_halves_kernel;
- KernelFunctions<void(*)(int, int, int, float*, int, int, int, int)> filter_divide_combined_kernel;
+ KernelFunctions<void(*)(int, float**, int, int, int*, int*, int*, int*, float*, float*, float*, float*, float*, int*, int, int, bool)> filter_divide_shadow_kernel;
+ KernelFunctions<void(*)(int, float**, int, int, int, int, int*, int*, int*, int*, float*, float*, int*, int, int, bool)> filter_get_feature_kernel;
+ KernelFunctions<void(*)(int, int, float*, float*, float*, float*, int*, int)> filter_combine_halves_kernel;
+ KernelFunctions<void(*)(int, int, int, float*, int, int, int, int)> filter_divide_combined_kernel;
KernelFunctions<void(*)(int, int, float*, float*, float*, int*, int, int, float, float)> filter_nlm_calc_difference_kernel;
KernelFunctions<void(*)(float*, float*, int*, int, int)> filter_nlm_blur_kernel;
@@ -147,9 +148,9 @@ public:
KernelFunctions<void(*)(int, int, float*, float*, float*, float*, int*, int, int)> filter_nlm_update_output_kernel;
KernelFunctions<void(*)(float*, float*, int*, int)> filter_nlm_normalize_kernel;
- KernelFunctions<void(*)(int, float*, int, int, int, float*, int*, int*, int, float, int, int)> filter_construct_transform_kernel;
- KernelFunctions<void(*)(int, int, float*, float*, int, int, float*, int*, float*, float3*, int*, int*, int, int, int)> filter_nlm_construct_gramian_kernel;
- KernelFunctions<void(*)(int, int, int, int, int, float*, int*, float*, float3*, int*, int)> filter_finalize_kernel;
+ KernelFunctions<void(*)(int, float*, int, int, int, float*, int*, int*, int, float, int, int)> filter_construct_transform_kernel;
+ KernelFunctions<void(*)(int, int, float*, float*, float*, float*, float*, int*, float*, float3*, int*, int*, int, int, int)> filter_nlm_construct_gramian_kernel;
+ KernelFunctions<void(*)(int, int, int, int, int, float*, int*, float*, float3*, int*, int)> filter_finalize_kernel;
#define KERNEL_FUNCTIONS(name) \
KERNEL_NAME_EVAL(cpu, name), \
@@ -221,12 +222,20 @@ public:
void mem_free(device_memory& mem)
{
if(mem.device_pointer) {
+ if(!mem.data_pointer) {
+ delete[] (char*) mem.device_pointer;
+ }
mem.device_pointer = 0;
stats.mem_free(mem.device_size);
mem.device_size = 0;
}
}
+ virtual device_ptr mem_get_offset_ptr(device_memory& mem, int offset)
+ {
+ return (device_ptr) (((char*) mem.device_pointer) + mem.memory_offset(offset));
+ }
+
void const_copy_to(const char *name, void *host, size_t size)
{
kernel_const_copy(&kernel_globals, name, host, size);
@@ -290,368 +299,216 @@ public:
}
};
- void non_local_means(int4 rect, float *image, float *weight, float *out, float *variance, float *difference, float *blurDifference, float *weightAccum, int r, int f, float a, float k_2, int channel_ofs_in = 0, int channel_ofs_out = 0)
+ bool denoising_non_local_means(device_ptr image_ptr, device_ptr guide_ptr, device_ptr variance_ptr, device_ptr out_ptr,
+ DenoisingTask *task)
{
+ int4 rect = task->rect;
+ int r = task->nlm_state.r;
+ int f = task->nlm_state.f;
+ float a = task->nlm_state.a;
+ float k_2 = task->nlm_state.k_2;
+
int w = align_up(rect.z-rect.x, 4);
int h = rect.w-rect.y;
- int channels = channel_ofs_in? 3: 1;
- memset(weightAccum, 0, sizeof(float)*w*h*channels);
- memset(out, 0, sizeof(float)*w*h*channels);
+ float *blurDifference = (float*) task->nlm_state.temporary_1_ptr;
+ float *difference = (float*) task->nlm_state.temporary_2_ptr;
+ float *weightAccum = (float*) task->nlm_state.temporary_3_ptr;
+
+ memset(weightAccum, 0, sizeof(float)*w*h);
+ memset((float*) out_ptr, 0, sizeof(float)*w*h);
for(int i = 0; i < (2*r+1)*(2*r+1); i++) {
int dy = i / (2*r+1) - r;
int dx = i % (2*r+1) - r;
int local_rect[4] = {max(0, -dx), max(0, -dy), rect.z-rect.x - max(0, dx), rect.w-rect.y - max(0, dy)};
- filter_nlm_calc_difference_kernel()(dx, dy, weight, variance, difference, local_rect, w, channel_ofs_in, a, k_2);
- filter_nlm_blur_kernel()(difference, blurDifference, local_rect, w, f);
+ filter_nlm_calc_difference_kernel()(dx, dy,
+ (float*) guide_ptr,
+ (float*) variance_ptr,
+ difference,
+ local_rect,
+ 0,
+ w, a, k_2);
+
+ filter_nlm_blur_kernel() (difference, blurDifference, local_rect, w, f);
filter_nlm_calc_weight_kernel()(blurDifference, difference, local_rect, w, f);
- filter_nlm_blur_kernel()(difference, blurDifference, local_rect, w, f);
- for(int c = 0; c < channels; c++) {
- filter_nlm_update_output_kernel()(dx, dy, blurDifference, image + channel_ofs_in*c, out + channel_ofs_out*c, weightAccum + w*h*c, local_rect, w, f);
- }
+ filter_nlm_blur_kernel() (difference, blurDifference, local_rect, w, f);
+
+ filter_nlm_update_output_kernel()(dx, dy,
+ blurDifference,
+ (float*) image_ptr,
+ (float*) out_ptr,
+ weightAccum,
+ local_rect,
+ w, f);
}
int local_rect[4] = {0, 0, rect.z-rect.x, rect.w-rect.y};
- for(int c = 0; c < channels; c++) {
- filter_nlm_normalize_kernel()(out + channel_ofs_out*c, weightAccum + w*h*c, local_rect, w);
- }
+ filter_nlm_normalize_kernel()((float*) out_ptr, weightAccum, local_rect, w);
+
+ return true;
}
- float* denoise_fill_buffer(KernelGlobals *kg, int sample, int4 rect, float** buffers, int* tile_x, int* tile_y, int *offsets, int *strides, int frames, int *frame_strides)
+ bool denoising_construct_transform(DenoisingTask *task)
{
- bool use_cross_denoising = kg->__data.film.denoise_cross;
- bool use_gradients = kg->__data.integrator.use_gradients;
- int buffer_pass_stride = kg->__data.film.pass_stride;
- int buffer_denoising_offset = kg->__data.film.pass_denoising;
- int num_frames = 1;
-
- int w = align_up(rect.z - rect.x, 4), h = (rect.w - rect.y);
- int pass_stride = w*h*frames;
- int passes = use_cross_denoising? 20 : 14;
- float *filter_buffers = new float[passes*pass_stride];
- memset(filter_buffers, 0, sizeof(float)*passes*pass_stride);
-
- /* Denoising Buffer Pass allocation:
- * 0: Normal X
- * 1: Normal Y
- * 2: Normal Z
- * 3: Depth
- * 4: Shadowing
- * 5: Albedo R
- * 6: Albedo G
- * 7: Albedo B
- * 8: Color R
- * 9: Color G
- * 10: Color B
- * 11: Color Variance R
- * 12: Color Variance G
- * 13: Color Variance B
- * With Cross-denoising passes, this list is essentially repeated two times. */
-
- for(int frame = 0; frame < frames; frame++) {
- float *filter_buffer = filter_buffers + w*h*frame;
- float *buffer[9];
- for(int i = 0; i < 9; i++) {
- buffer[i] = buffers[i] + frame_strides[i]*frame;
- }
- DebugPasses debug((rect.z - rect.x), h, 42, 1, w);
-
-#define PASSPTR(i) (filter_buffer + (i)*pass_stride)
-
- /* ==== Step 1: Prefilter shadow feature. ==== */
- {
- /* Reuse some passes of the filter_buffer for temporary storage. */
- float *sampleV = PASSPTR(0), *sampleVV = PASSPTR(1), *bufferV = PASSPTR(2), *cleanV = PASSPTR(3);
- float *unfilteredA = PASSPTR(5), *unfilteredB = PASSPTR(6);
- float *nlm_temp1 = PASSPTR(7), *nlm_temp2 = PASSPTR(8), *nlm_temp3 = PASSPTR(9);
-
- /* Get the A/B unfiltered passes, the combined sample variance, the estimated variance of the sample variance and the buffer variance. */
- for(int y = rect.y; y < rect.w; y++) {
- for(int x = rect.x; x < rect.z; x++) {
- filter_divide_shadow_kernel()(sample, buffer, x, y, tile_x, tile_y, offsets, strides, unfilteredA, sampleV, sampleVV, bufferV, &rect.x, buffer_pass_stride, buffer_denoising_offset, num_frames, use_gradients);
- }
- }
- debug.add_pass("shadowUnfilteredA", unfilteredA);
- debug.add_pass("shadowUnfilteredB", unfilteredB);
- debug.add_pass("shadowBufferV", bufferV);
- debug.add_pass("shadowSampleV", sampleV);
- debug.add_pass("shadowSampleVV", sampleVV);
-
- /* Smooth the (generally pretty noisy) buffer variance using the spatial information from the sample variance. */
- non_local_means(rect, bufferV, sampleV, cleanV, sampleVV, nlm_temp1, nlm_temp2, nlm_temp3, 6, 3, 4.0f, 1.0f);
- debug.add_pass("shadowCleanV", cleanV);
-
- /* Use the smoothed variance to filter the two shadow half images using each other for weight calculation. */
- non_local_means(rect, unfilteredA, unfilteredB, sample
@@ Diff output truncated at 10240 characters. @@
More information about the Bf-blender-cvs
mailing list