[Bf-blender-cvs] [20f67b1] soc-2016-cycles_denoising: Cycles: Move collaborative division into the kernel

Lukas Stockner noreply at git.blender.org
Tue Nov 22 04:25:21 CET 2016


Commit: 20f67b18c856cf159a3aa0ccf6df5c40663fda92
Author: Lukas Stockner
Date:   Wed Nov 16 15:57:06 2016 +0100
Branches: soc-2016-cycles_denoising
https://developer.blender.org/rB20f67b18c856cf159a3aa0ccf6df5c40663fda92

Cycles: Move collaborative division into the kernel

===================================================================

M	intern/cycles/device/device_cpu.cpp
M	intern/cycles/kernel/kernel_filter.h
M	intern/cycles/kernel/kernel_filter_pre.h
M	intern/cycles/kernel/kernels/cpu/kernel_cpu.h
M	intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h

===================================================================

diff --git a/intern/cycles/device/device_cpu.cpp b/intern/cycles/device/device_cpu.cpp
index be9bb1d..3465a10 100644
--- a/intern/cycles/device/device_cpu.cpp
+++ b/intern/cycles/device/device_cpu.cpp
@@ -144,6 +144,7 @@ public:
 	KernelFunctions<void(*)(int, int, float**, float**, float**, float**, int*, int, int, float, float)>              filter_non_local_means_3_kernel;
 	KernelFunctions<void(*)(KernelGlobals*, float*, int, int, int, int, float, float*, int*)>                         filter_old_1_kernel;
 	KernelFunctions<void(*)(KernelGlobals*, float*, float*, int, int, int, int, int, int, float, float*, int*, int*)> filter_old_2_kernel;
+	KernelFunctions<void(*)(KernelGlobals*, int, int, int, float*, int, int)>                                          filter_divide_combined_kernel;
 
 #define KERNEL_FUNCTIONS(name) \
 	      KERNEL_NAME_EVAL(cpu, name), \
@@ -167,6 +168,7 @@ public:
 	  filter_estimate_wlr_params_kernel(KERNEL_FUNCTIONS(filter_estimate_wlr_params)),
 	  filter_final_pass_wlr_kernel(KERNEL_FUNCTIONS(filter_final_pass_wlr)),
 	  filter_final_pass_nlm_kernel(KERNEL_FUNCTIONS(filter_final_pass_nlm)),
+	  filter_divide_combined_kernel(KERNEL_FUNCTIONS(filter_divide_combined)),
 	  filter_non_local_means_3_kernel(KERNEL_FUNCTIONS(filter_non_local_means_3)),
 	  filter_old_1_kernel(KERNEL_FUNCTIONS(filter_old_1)),
 	  filter_old_2_kernel(KERNEL_FUNCTIONS(filter_old_2))
@@ -441,17 +443,6 @@ public:
 		int w = align_up(rect.z - rect.x, 4), h = (rect.w - rect.y);
 		int pass_stride = w*h;
 
-		if(use_collaborative_filtering) {
-			for(int y = 0; y < filter_area.w; y++) {
-				int py = y + filter_area.y;
-				for(int x = 0; x < filter_area.z; x++) {
-					int px = x + filter_area.x;
-					float *p_buffers = buffers + (offset + py*stride + px)*kg->__data.film.pass_stride;
-					p_buffers[0] = p_buffers[1] = p_buffers[2] = p_buffers[3] = 0.0f;
-				}
-			}
-		}
-
 		if(old_filter) {
 			for(int y = 0; y < filter_area.w; y++) {
 				for(int x = 0; x < filter_area.z; x++) {
@@ -545,18 +536,11 @@ public:
 
 		if(use_collaborative_filtering) {
 			for(int y = 0; y < filter_area.w; y++) {
-				int py = y + filter_area.y;
 				for(int x = 0; x < filter_area.z; x++) {
-					int px = x + filter_area.x;
-					float *p_buffers = buffers + (offset + py*stride + px)*kg->__data.film.pass_stride;
-					float fac = sample / p_buffers[3];
-					p_buffers[0] *= fac;
-					p_buffers[1] *= fac;
-					p_buffers[2] *= fac;
-					p_buffers[3] *= fac;
+					filter_divide_combined_kernel()(kg, x + filter_area.x, y + filter_area.y, sample, buffers, offset, stride);
 				}
 			}
- 		}
+		}
 
 		delete[] storage;
 	}
diff --git a/intern/cycles/kernel/kernel_filter.h b/intern/cycles/kernel/kernel_filter.h
index ea065f9..4f6290f 100644
--- a/intern/cycles/kernel/kernel_filter.h
+++ b/intern/cycles/kernel/kernel_filter.h
@@ -1500,4 +1500,11 @@ ccl_device void kernel_filter_final_pass_nlm(KernelGlobals *kg, int sample, floa
 
 #endif // __KERNEL_CUDA__
 
+ccl_device void kernel_filter_divide_combined(KernelGlobals *kg, int x, int y, int sample, float *buffers, int offset, int stride)
+{
+	float4 *combined_buffer = (float4*) (buffers + (offset + y*stride + x)*kernel_data.film.pass_stride);
+	float fac = sample / combined_buffer->w;
+	*combined_buffer = *combined_buffer * fac;
+}
+
 CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/kernel_filter_pre.h b/intern/cycles/kernel/kernel_filter_pre.h
index 3f8b5b5..93d424f 100644
--- a/intern/cycles/kernel/kernel_filter_pre.h
+++ b/intern/cycles/kernel/kernel_filter_pre.h
@@ -30,7 +30,12 @@ ccl_device void kernel_filter_divide_shadow(KernelGlobals *kg, int sample, float
 	int xtile = (x < tile_x[1])? 0: ((x < tile_x[2])? 1: 2);
 	int ytile = (y < tile_y[1])? 0: ((y < tile_y[2])? 1: 2);
 	int tile = ytile*3+xtile;
-	float ccl_readonly_ptr center_buffer = buffers[tile] + (offset[tile] + y*stride[tile] + x)*kernel_data.film.pass_stride + kernel_data.film.pass_denoising;
+	float *center_buffer = buffers[tile] + (offset[tile] + y*stride[tile] + x)*kernel_data.film.pass_stride;
+
+	if(kernel_data.integrator.use_collaborative_filtering) {
+		center_buffer[0] = center_buffer[1] = center_buffer[2] = center_buffer[3] = 0.0f;
+	}
+	center_buffer += kernel_data.film.pass_denoising;
 
 	int buffer_w = align_up(rect.z - rect.x, 4);
 	int idx = (y-rect.y)*buffer_w + (x - rect.x);
diff --git a/intern/cycles/kernel/kernels/cpu/kernel_cpu.h b/intern/cycles/kernel/kernels/cpu/kernel_cpu.h
index 478f2fa..5aaa20e 100644
--- a/intern/cycles/kernel/kernels/cpu/kernel_cpu.h
+++ b/intern/cycles/kernel/kernels/cpu/kernel_cpu.h
@@ -145,6 +145,13 @@ void KERNEL_FUNCTION_FULL_NAME(filter_final_pass_nlm)(KernelGlobals *kg,
                                                   int* filter_area,
                                                   int* rect);
 
+void KERNEL_FUNCTION_FULL_NAME(filter_divide_combined)(KernelGlobals *kg,
+                                                       int x, int y,
+                                                       int sample,
+                                                       float *buffers,
+                                                       int offset,
+                                                       int stride);
+
 void KERNEL_FUNCTION_FULL_NAME(filter_old_1)(KernelGlobals *kg,
                                              float *denoise_data,
                                              int x, int y,
diff --git a/intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h b/intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h
index cc95a10..c5b9112 100644
--- a/intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h
+++ b/intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h
@@ -326,6 +326,20 @@ void KERNEL_FUNCTION_FULL_NAME(filter_final_pass_nlm)(KernelGlobals *kg,
 #endif
 }
 
+void KERNEL_FUNCTION_FULL_NAME(filter_divide_combined)(KernelGlobals *kg,
+                                                       int x, int y,
+                                                       int sample,
+                                                       float *buffers,
+                                                       int offset,
+                                                       int stride)
+{
+#ifdef KERNEL_STUB
+	STUB_ASSERT(KERNEL_ARCH, filter_divide_combined);
+#else
+	kernel_filter_divide_combined(kg, x, y, sample, buffers, offset, stride);
+#endif
+}
+
 void KERNEL_FUNCTION_FULL_NAME(filter_old_1)(KernelGlobals *kg,
                                              float *denoise_data,
                                              int x, int y,




More information about the Bf-blender-cvs mailing list