[Bf-blender-cvs] [f928cd3] soc-2016-cycles_denoising: Cycles: Implement Cross-Denoising

Lukas Stockner noreply at git.blender.org
Tue Nov 22 04:25:35 CET 2016


Commit: f928cd357fd6b2e0d6c0af409e0de0f3ce502fa2
Author: Lukas Stockner
Date:   Sun Nov 20 05:15:55 2016 +0100
Branches: soc-2016-cycles_denoising
https://developer.blender.org/rBf928cd357fd6b2e0d6c0af409e0de0f3ce502fa2

Cycles: Implement Cross-Denoising

Right now this is pretty bad, but it can be made much better.

===================================================================

M	intern/cycles/blender/addon/ui.py
M	intern/cycles/blender/blender_session.cpp
M	intern/cycles/blender/blender_util.h
M	intern/cycles/device/device_cpu.cpp
M	intern/cycles/kernel/kernel_filter.h
M	intern/cycles/kernel/kernel_filter_pre.h
M	intern/cycles/kernel/kernel_passes.h
M	intern/cycles/kernel/kernel_types.h
M	intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h
M	intern/cycles/kernel/kernels/cuda/kernel.cu
M	intern/cycles/render/buffers.cpp
M	intern/cycles/render/buffers.h
M	intern/cycles/render/film.cpp
M	intern/cycles/render/film.h
M	source/blender/makesdna/DNA_scene_types.h
M	source/blender/makesrna/intern/rna_scene.c
M	source/blender/render/intern/source/render_result.c

===================================================================

diff --git a/intern/cycles/blender/addon/ui.py b/intern/cycles/blender/addon/ui.py
index a0b387d..ea07ac2 100644
--- a/intern/cycles/blender/addon/ui.py
+++ b/intern/cycles/blender/addon/ui.py
@@ -580,6 +580,7 @@ class CyclesRender_PT_denoising(CyclesButtonsPanel, Panel):
         sub.prop(rl, "filter_weighting_adjust", slider=True)
         sub.prop(rl, "filter_gradients")
         sub.prop(rl, "filter_use_nlm_weights")
+        sub.prop(rl, "filter_cross")
 
         sub = col.column(align=True)
         row = sub.row(align=True)
diff --git a/intern/cycles/blender/blender_session.cpp b/intern/cycles/blender/blender_session.cpp
index 7867c0e..4dadc63 100644
--- a/intern/cycles/blender/blender_session.cpp
+++ b/intern/cycles/blender/blender_session.cpp
@@ -462,7 +462,9 @@ void BlenderSession::render()
 		if(b_layer_iter->denoise_subsurface_direct()) scene->film->denoise_flags |= DENOISE_SUBSURFACE_DIR;
 		if(b_layer_iter->denoise_subsurface_indirect()) scene->film->denoise_flags |= DENOISE_SUBSURFACE_IND;
 		scene->film->selective_denoising = (scene->film->denoise_flags != DENOISE_ALL);
+		scene->film->cross_denoising = b_layer_iter->filter_cross();
 		buffer_params.selective_denoising = scene->film->selective_denoising;
+		buffer_params.cross_denoising = scene->film->cross_denoising;
 		scene->integrator->half_window = b_layer_iter->half_window();
 		scene->integrator->filter_strength = powf(2.0f, b_layer_iter->filter_strength());
 		scene->integrator->weighting_adjust = powf(2.0f, b_layer_iter->filter_weighting_adjust());
@@ -486,6 +488,10 @@ void BlenderSession::render()
 			add_pass(b_engine, SCE_PASS_DENOISE_SHADOW_B, 3, b_rlay_name.c_str(), NULL);
 			add_pass(b_engine, SCE_PASS_DENOISE_NOISY, 3, b_rlay_name.c_str(), NULL);
 			add_pass(b_engine, SCE_PASS_DENOISE_NOISY_VAR, 3, b_rlay_name.c_str(), NULL);
+			if(buffer_params.cross_denoising) {
+				add_pass(b_engine, SCE_PASS_DENOISE_NOISY_B, 3, b_rlay_name.c_str(), NULL);
+				add_pass(b_engine, SCE_PASS_DENOISE_NOISY_B_VAR, 3, b_rlay_name.c_str(), NULL);
+			}
 			if(buffer_params.selective_denoising) {
 				add_pass(b_engine, SCE_PASS_DENOISE_CLEAN, 3, b_rlay_name.c_str(), NULL);
 			}
diff --git a/intern/cycles/blender/blender_util.h b/intern/cycles/blender/blender_util.h
index f311f15..de9ab2d 100644
--- a/intern/cycles/blender/blender_util.h
+++ b/intern/cycles/blender/blender_util.h
@@ -52,6 +52,8 @@ typedef enum BlenderDenoisePasses {
 	SCE_PASS_DENOISE_NOISY            = 40, /* The original noisy image (only the components that are denoised). */
 	SCE_PASS_DENOISE_NOISY_VAR        = 41,
 	SCE_PASS_DENOISE_CLEAN            = 42, /* If present, these image components are added to the denoised image. */
+	SCE_PASS_DENOISE_NOISY_B          = 43,
+	SCE_PASS_DENOISE_NOISY_B_VAR      = 44,
 } BlenderDenoisePasses;
 
 void python_thread_state_save(void **python_thread_state);
diff --git a/intern/cycles/device/device_cpu.cpp b/intern/cycles/device/device_cpu.cpp
index 7fb97bf..fcce0ff 100644
--- a/intern/cycles/device/device_cpu.cpp
+++ b/intern/cycles/device/device_cpu.cpp
@@ -281,10 +281,12 @@ public:
 
 	float* denoise_fill_buffer(KernelGlobals *kg, int sample, int4 rect, float** buffers, int* tile_x, int* tile_y, int *offsets, int *strides, int frames, int *frame_strides)
 	{
+		bool cross_denoise = kg->__data.film.denoise_cross;
 		int w = align_up(rect.z - rect.x, 4), h = (rect.w - rect.y);
 		int pass_stride = w*h*frames;
-		float *filter_buffers = new float[22*pass_stride];
-		memset(filter_buffers, 0, sizeof(float)*22*pass_stride);
+		int passes = cross_denoise? 28:22;
+		float *filter_buffers = new float[passes*pass_stride];
+		memset(filter_buffers, 0, sizeof(float)*passes*pass_stride);
 
 
 		for(int frame = 0; frame < frames; frame++) {
@@ -414,13 +416,27 @@ public:
 
 			/* ==== Step 3: Copy combined color pass. ==== */
 			{
-				int mean_from[]      = {20, 21, 22};
-				int variance_from[]  = {23, 24, 25};
-				int offset_to[]      = {16, 18, 20};
-				for(int i = 0; i < 3; i++) {
-					for(int y = rect.y; y < rect.w; y++) {
-						for(int x = rect.x; x < rect.z; x++) {
-							filter_get_feature_kernel()(kg, sample, buffer, mean_from[i], variance_from[i], x, y, tile_x, tile_y, offsets, strides, filter_buffer + offset_to[i]*pass_stride, filter_buffer + (offset_to[i]+1)*pass_stride, &rect.x);
+				if(cross_denoise) {
+					int mean_from[]      = {20, 21, 22, 26, 27, 28};
+					int variance_from[]  = {23, 24, 25, 29, 30, 31};
+					int offset_to[]      = {16, 18, 20, 22, 24, 26};
+					for(int i = 0; i < 6; i++) {
+						for(int y = rect.y; y < rect.w; y++) {
+							for(int x = rect.x; x < rect.z; x++) {
+								filter_get_feature_kernel()(kg, sample, buffer, mean_from[i], variance_from[i], x, y, tile_x, tile_y, offsets, strides, filter_buffer + offset_to[i]*pass_stride, filter_buffer + (offset_to[i]+1)*pass_stride, &rect.x);
+							}
+						}
+					}
+				}
+				else {
+					int mean_from[]      = {20, 21, 22};
+					int variance_from[]  = {23, 24, 25};
+					int offset_to[]      = {16, 18, 20};
+					for(int i = 0; i < 3; i++) {
+						for(int y = rect.y; y < rect.w; y++) {
+							for(int x = rect.x; x < rect.z; x++) {
+								filter_get_feature_kernel()(kg, sample, buffer, mean_from[i], variance_from[i], x, y, tile_x, tile_y, offsets, strides, filter_buffer + offset_to[i]*pass_stride, filter_buffer + (offset_to[i]+1)*pass_stride, &rect.x);
+							}
 						}
 					}
 				}
diff --git a/intern/cycles/kernel/kernel_filter.h b/intern/cycles/kernel/kernel_filter.h
index edafafa..594ed77 100644
--- a/intern/cycles/kernel/kernel_filter.h
+++ b/intern/cycles/kernel/kernel_filter.h
@@ -456,7 +456,7 @@ ccl_device void kernel_filter_final_pass_wlr(KernelGlobals *kg, int sample, floa
 	}
 }
 
-ccl_device void kernel_filter_final_pass_nlm(KernelGlobals *kg, int sample, float ccl_readonly_ptr buffer, int x, int y, int offset, int stride, float *buffers, float ccl_readonly_ptr transform, CUDAFilterStorage *storage, int4 filter_area, int4 rect, int transform_stride, int localIdx)
+ccl_device void kernel_filter_final_pass_nlm(KernelGlobals *kg, int sample, float ccl_readonly_ptr buffer, int x, int y, int offset, int stride, float *buffers, float ccl_readonly_ptr transform, CUDAFilterStorage *storage, int4 filter_area, int4 rect, int transform_stride, int localIdx, int weight_ofs, int color_ofs)
 {
 	__shared__ float shared_features[DENOISE_FEATURES*CUDA_THREADS_BLOCK_WIDTH*CUDA_THREADS_BLOCK_WIDTH];
 	float *features = shared_features + DENOISE_FEATURES*localIdx;
@@ -464,6 +464,8 @@ ccl_device void kernel_filter_final_pass_nlm(KernelGlobals *kg, int sample, floa
 	int buffer_w = align_up(rect.z - rect.x, 4);
 	int buffer_h = (rect.w - rect.y);
 	int pass_stride = buffer_h * buffer_w * kernel_data.film.num_frames;
+	weight_ofs *= pass_stride;
+	color_ofs *= pass_stride;
 	int num_frames = kernel_data.film.num_frames;
 	int prev_frames = kernel_data.film.prev_frames;
 	/* === Calculate denoising window. === */
@@ -474,8 +476,8 @@ ccl_device void kernel_filter_final_pass_nlm(KernelGlobals *kg, int sample, floa
 	float ccl_readonly_ptr pixel_buffer;
 	/* === Get center pixel. === */
 	float ccl_readonly_ptr center_buffer = buffer + (y - rect.y) * buffer_w + (x - rect.x);
-	float3 center_color  = filter_get_pixel_color(center_buffer, pass_stride);
-	float sqrt_center_variance = sqrtf(filter_get_pixel_variance(center_buffer, pass_stride));
+	float3 center_color  = filter_get_pixel_color(center_buffer + color_ofs, pass_stride);
+	float sqrt_center_variance = sqrtf(filter_get_pixel_variance(center_buffer + color_ofs, pass_stride));
 
 	float feature_means[DENOISE_FEATURES];
 	filter_get_features(x, y, 0, center_buffer, feature_means, NULL, pass_stride);
@@ -502,8 +504,8 @@ ccl_device void kernel_filter_final_pass_nlm(KernelGlobals *kg, int sample, floa
 	math_matrix_zero_lower(XtX, matrix_size);
 
 	FOR_PIXEL_WINDOW {
-		float3 color = filter_get_pixel_color(pixel_buffer, pass_stride);
-		float variance = filter_get_pixel_variance(pixel_buffer, pass_stride);
+		float3 color = filter_get_pixel_color(pixel_buffer + color_ofs, pass_stride);
+		float variance = filter_get_pixel_variance(pixel_buffer + color_ofs, pass_stride);
 		if(filter_firefly_rejection(color, variance, center_color, sqrt_center_variance)) {
 			if(cache_idx < WEIGHT_CACHE_SIZE) weight_cache[cache_idx] = 0.0f;
 			continue;
@@ -512,7 +514,7 @@ ccl_device void kernel_filter_final_pass_nlm(KernelGlobals *kg, int sample, floa
 		filter_get_features(px, py, pt, pixel_buffer, features, feature_means, pass_stride);
 		filter_fill_design_row_no_weight_cuda(features, rank, design_row, transform, transform_stride);
 
-		float weight = nlm_weight(x, y, px, py, center_buffer, pixel_buffer, pass_stride, 1.0f, kernel_data.integrator.weighting_adjust, 4, rect);
+		float weight = nlm_weight(x, y, px, py, center_buffer + weight_ofs, pixel_buffer + weight_ofs, pass_stride, 1.0f, kernel_data.integrator.weighting_adjust, 4, rect);
 		if(weight < 1e-5f) {
 			if(cache_idx < WEIGHT_CACHE_SIZE) weight_cache[cache_idx] = 0.0f;
 			continue;
@@ -559,14 +561,14 @@ ccl_device void kernel_filter_final_pass_nlm(KernelGlobals *kg, int sample, floa
 		if(cache_idx < WEIGHT_CACHE_SIZE) {
 			weight = weight_cache[cache_idx];
 			if(weight == 0.0f) continue;
-			color = filter_get_pixel_color(pixel_buffer, pass_stride);
+			color = filter_get_pixel_color(pixel_buffer + color_ofs, pass_stride);
 		}
 		else {
-			color = filter_get_pixel_color(pixel_buffer, pass_stride);
-			float variance = filter_get_pixel_variance(pixel_buffer, pass_stride);
+			color = filter_get_pixel_color(pixel_buffer + color_ofs, pass_stride);
+			float variance = filter_get_pixel_variance(pixel_buffer + color_ofs, pass_stride);
 			if(filter_firefly_rejection(color, variance, center_color, sqrt_center_variance)) continue;
 
-			weight = nlm_weight(x, y, px, py, center_buffer, pixel_buffer, pass_stride, 1.0f, kernel_data.integrator.weighting_adjust, 4, rect);
+			weight = nlm_weight(x, y, px, py, center_buffer + weight_ofs, pixel_buffer + weight_ofs, pass_stride, 1.0f, kernel_data.integrator.weighting_adjust, 4, rect);
 			if(weight < 1e-5f) continue;
 			weight /= max(1.0f, varianc

@@ Diff output truncated at 10240 characters. @@




More information about the Bf-blender-cvs mailing list