[Bf-blender-cvs] [116dd643abb] temp-cycles-denoising: Merge remote-tracking branch 'origin/master' into temp-cycles-denoising
Lukas Stockner
noreply at git.blender.org
Fri Apr 14 00:57:27 CEST 2017
Commit: 116dd643abbdb67de17da664cfaa87bb05b77119
Author: Lukas Stockner
Date: Fri Mar 31 00:31:31 2017 +0200
Branches: temp-cycles-denoising
https://developer.blender.org/rB116dd643abbdb67de17da664cfaa87bb05b77119
Merge remote-tracking branch 'origin/master' into temp-cycles-denoising
===================================================================
===================================================================
diff --cc intern/cycles/device/device_cpu.cpp
index 8e41d618809,2761d9488ca..8160aee662c
--- a/intern/cycles/device/device_cpu.cpp
+++ b/intern/cycles/device/device_cpu.cpp
@@@ -24,33 -24,30 +24,33 @@@
# include <OSL/oslexec.h>
#endif
- #include "device.h"
- #include "device_intern.h"
- #include "device_denoising.h"
- #include "device_split_kernel.h"
-
- #include "kernel.h"
- #include "kernel_compat_cpu.h"
- #include "kernel_types.h"
- #include "split/kernel_split_data.h"
- #include "kernel_globals.h"
-
- #include "../filter/filter.h"
-
- #include "osl_shader.h"
- #include "osl_globals.h"
-
- #include "buffers.h"
-
- #include "util_debug.h"
- #include "util_foreach.h"
- #include "util_function.h"
- #include "util_logging.h"
- #include "util_map.h"
- #include "util_opengl.h"
- #include "util_progress.h"
- #include "util_system.h"
- #include "util_thread.h"
+ #include "device/device.h"
++#include "device/device_denoising.h"
+ #include "device/device_intern.h"
+ #include "device/device_split_kernel.h"
+
+ #include "kernel/kernel.h"
+ #include "kernel/kernel_compat_cpu.h"
+ #include "kernel/kernel_types.h"
+ #include "kernel/split/kernel_split_data.h"
+ #include "kernel/kernel_globals.h"
+
++#include "filter/filter.h"
++
+ #include "kernel/osl/osl_shader.h"
+ #include "kernel/osl/osl_globals.h"
+
+ #include "render/buffers.h"
+
+ #include "util/util_debug.h"
+ #include "util/util_foreach.h"
+ #include "util/util_function.h"
+ #include "util/util_logging.h"
+ #include "util/util_map.h"
+ #include "util/util_opengl.h"
+ #include "util/util_progress.h"
+ #include "util/util_system.h"
+ #include "util/util_thread.h"
CCL_NAMESPACE_BEGIN
diff --cc intern/cycles/device/device_cuda.cpp
index 88cb3085a29,ba3ca3c3e1e..aaa8ce88830
--- a/intern/cycles/device/device_cuda.cpp
+++ b/intern/cycles/device/device_cuda.cpp
@@@ -20,15 -20,12 +20,15 @@@
#include <stdlib.h>
#include <string.h>
- #include "device.h"
- #include "device_intern.h"
- #include "device_denoising.h"
- #include "device_split_kernel.h"
+ #include "device/device.h"
++#include "device/device_denoising.h"
+ #include "device/device_intern.h"
+ #include "device/device_split_kernel.h"
- #include "buffers.h"
+ #include "render/buffers.h"
- #include "filter_defines.h"
++#include "filter/filter_defines.h"
+
#ifdef WITH_CUDA_DYNLOAD
# include "cuew.h"
#else
@@@ -396,10 -379,10 +396,10 @@@ public
}
const string common_cflags =
- compile_kernel_get_common_cflags(requested_features, split);
+ compile_kernel_get_common_cflags(requested_features, filter, split);
/* Try to use locally compiled kernel. */
- const string kernel_path = path_get("kernel");
+ const string kernel_path = path_get("source/kernel");
const string kernel_md5 = path_files_md5_hash(kernel_path);
/* We include cflags into md5 so changing cuda toolkit or changing other
diff --cc intern/cycles/device/device_denoising.cpp
index c6df83ab372,00000000000..ae880f08f14
mode 100644,000000..100644
--- a/intern/cycles/device/device_denoising.cpp
+++ b/intern/cycles/device/device_denoising.cpp
@@@ -1,206 -1,0 +1,206 @@@
+/*
+ * Copyright 2011-2017 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
- #include "device_denoising.h"
++#include "device/device_denoising.h"
+
- #include "filter_defines.h"
++#include "filter/filter_defines.h"
+
+CCL_NAMESPACE_BEGIN
+
+void DenoisingTask::init_from_devicetask(const DeviceTask &task)
+{
+ radius = task.denoising_radius;
+ nlm_k_2 = task.denoising_k2;
+ relative_pca = task.denoising_relative_pca;
+
+ render_buffer.pass_stride = task.pass_stride;
+ render_buffer.denoising_data_offset = task.pass_denoising_data;
+ render_buffer.denoising_clean_offset = task.pass_denoising_clean;
+
+ /* Expand filter_area by radius pixels and clamp the result to the extent of the neighboring tiles */
+ rect = make_int4(max(tiles->x[0], filter_area.x - radius),
+ max(tiles->y[0], filter_area.y - radius),
+ min(tiles->x[3], filter_area.x + filter_area.z + radius),
+ min(tiles->y[3], filter_area.y + filter_area.w + radius));
+}
+
+void DenoisingTask::tiles_from_rendertiles(RenderTile *rtiles)
+{
+ tiles = (TilesInfo*) tiles_mem.resize(sizeof(TilesInfo)/sizeof(int));
+
+ device_ptr buffers[9];
+ for(int i = 0; i < 9; i++) {
+ buffers[i] = rtiles[i].buffer;
+ tiles->offsets[i] = rtiles[i].offset;
+ tiles->strides[i] = rtiles[i].stride;
+ }
+ tiles->x[0] = rtiles[3].x;
+ tiles->x[1] = rtiles[4].x;
+ tiles->x[2] = rtiles[5].x;
+ tiles->x[3] = rtiles[5].x + rtiles[5].w;
+ tiles->y[0] = rtiles[1].y;
+ tiles->y[1] = rtiles[4].y;
+ tiles->y[2] = rtiles[7].y;
+ tiles->y[3] = rtiles[7].y + rtiles[7].h;
+
+ render_buffer.offset = rtiles[4].offset;
+ render_buffer.stride = rtiles[4].stride;
+ render_buffer.ptr = rtiles[4].buffer;
+
+ functions.set_tiles(buffers);
+}
+
+bool DenoisingTask::run_denoising()
+{
+ /* Allocate denoising buffer. */
+ buffer.passes = 14;
+ buffer.w = align_up(rect.z - rect.x, 4);
+ buffer.h = rect.w - rect.y;
+ buffer.pass_stride = align_up(buffer.w * buffer.h, device->mem_get_offset_alignment());
+ buffer.mem.resize(buffer.pass_stride * buffer.passes);
+ device->mem_alloc("Denoising Pixel Buffer", buffer.mem, MEM_READ_WRITE);
+
+ device_ptr null_ptr = (device_ptr) 0;
+
+ /* Prefilter shadow feature. */
+ {
+ device_ptr unfiltered_a, unfiltered_b, sample_var, sample_var_var, buffer_var, filtered_var;
+ unfiltered_a = device->mem_get_offset_ptr(buffer.mem, 0, buffer.pass_stride, MEM_READ_WRITE);
+ unfiltered_b = device->mem_get_offset_ptr(buffer.mem, 1*buffer.pass_stride, buffer.pass_stride, MEM_READ_WRITE);
+ sample_var = device->mem_get_offset_ptr(buffer.mem, 2*buffer.pass_stride, buffer.pass_stride, MEM_READ_WRITE);
+ sample_var_var = device->mem_get_offset_ptr(buffer.mem, 3*buffer.pass_stride, buffer.pass_stride, MEM_READ_WRITE);
+ buffer_var = device->mem_get_offset_ptr(buffer.mem, 5*buffer.pass_stride, buffer.pass_stride, MEM_READ_WRITE);
+ filtered_var = device->mem_get_offset_ptr(buffer.mem, 6*buffer.pass_stride, buffer.pass_stride, MEM_READ_WRITE);
+ nlm_state.temporary_1_ptr = device->mem_get_offset_ptr(buffer.mem, 7*buffer.pass_stride, buffer.pass_stride, MEM_READ_WRITE);
+ nlm_state.temporary_2_ptr = device->mem_get_offset_ptr(buffer.mem, 8*buffer.pass_stride, buffer.pass_stride, MEM_READ_WRITE);
+ nlm_state.temporary_3_ptr = device->mem_get_offset_ptr(buffer.mem, 9*buffer.pass_stride, buffer.pass_stride, MEM_READ_WRITE);
+
+ /* Get the A/B unfiltered passes, the combined sample variance, the estimated variance of the sample variance and the buffer variance. */
+ functions.divide_shadow(unfiltered_a, unfiltered_b, sample_var, sample_var_var, buffer_var);
+
+ /* Smooth the (generally pretty noisy) buffer variance using the spatial information from the sample variance. */
+ nlm_state.set_parameters(6, 3, 4.0f, 1.0f);
+ functions.non_local_means(buffer_var, sample_var, sample_var_var, filtered_var);
+
+ /* Reuse memory, the previous data isn't needed anymore. */
+ device_ptr filtered_a = buffer_var,
+ filtered_b = sample_var;
+ /* Use the smoothed variance to filter the two shadow half images using each other for weight calculation. */
+ nlm_state.set_parameters(5, 3, 1.0f, 0.25f);
+ functions.non_local_means(unfiltered_a, unfiltered_b, filtered_var, filtered_a);
+ functions.non_local_means(unfiltered_b, unfiltered_a, filtered_var, filtered_b);
+
+ device_ptr residual_var = sample_var_var;
+ /* Estimate the residual variance between the two filtered halves. */
+ functions.combine_halves(filtered_a, filtered_b, null_ptr, residual_var, 2, rect);
+
+ device_ptr final_a = unfiltered_a,
+ final_b = unfiltered_b;
+ /* Use the residual variance for a second filter pass. */
+ nlm_state.set_parameters(4, 2, 1.0f, 0.5f);
+ functions.non_local_means(filtered_a, filtered_b, residual_var, final_a);
+ functions.non_local_means(filtered_b, filtered_a, residual_var, final_b);
+
+ /* Combine the two double-filtered halves to a final shadow feature. */
+ device_ptr shadow_pass = device->mem_get_offset_ptr(buffer.mem, 4*buffer.pass_stride, buffer.pass_stride, MEM_READ_WRITE);
+ functions.combine_halves(final_a, final_b, shadow_pass, null_ptr, 0, rect);
+ }
+
+ /* Prefilter general features. */
+ {
+ device_ptr unfiltered, variance, feature_pass;
+ unfiltered = device->mem_get_offset_ptr(buffer.mem, 8*buffer.pass_stride, buffer.pass_stride, MEM_READ_WRITE);
+ variance = device->mem_get_offset_ptr(buffer.mem, 9*buffer.pass_stride, buffer.pass_stride, MEM_READ_WRITE);
+ nlm_state.temporary_1_ptr = device->mem_get_offset_ptr(buffer.mem, 10*buffer.pass_stride, buffer.pass_stride, MEM_READ_WRITE);
+ nlm_state.temporary_2_ptr = device->mem_get_offset_ptr(buffer.mem, 11*buffer.pass_stride, buffer.pass_stride, MEM_READ_WRITE);
+ nlm_state.temporary_3_ptr = device->mem_get_offset_ptr(buffer.mem, 12*buffer.pass_stride, buffer.pass_stride, MEM_READ_WRITE);
+ int mean_from[] = { 0, 1, 2, 6, 7, 8, 12 };
+ int variance_from[] = { 3, 4, 5, 9, 10, 11, 13 };
+ int pass_to[] = { 1, 2, 3, 0, 5, 6, 7 };
+ for(int pass = 0; pass < 7; pass++) {
+ feature_pass = device->mem_get_offset_ptr(buffer.mem, pass_to[pass]*buffer.pass_stride, buffer.pass_stride, MEM_READ_WRITE);
+ /* Get the unfiltered pass and its variance from the Rende
@@ Diff output truncated at 10240 characters. @@
More information about the Bf-blender-cvs
mailing list