[Bf-blender-cvs] [20f67b1] soc-2016-cycles_denoising: Cycles: Move collaborative division into the kernel
Lukas Stockner
noreply at git.blender.org
Tue Nov 22 04:25:21 CET 2016
Commit: 20f67b18c856cf159a3aa0ccf6df5c40663fda92
Author: Lukas Stockner
Date: Wed Nov 16 15:57:06 2016 +0100
Branches: soc-2016-cycles_denoising
https://developer.blender.org/rB20f67b18c856cf159a3aa0ccf6df5c40663fda92
Cycles: Move collaborative division into the kernel
===================================================================
M intern/cycles/device/device_cpu.cpp
M intern/cycles/kernel/kernel_filter.h
M intern/cycles/kernel/kernel_filter_pre.h
M intern/cycles/kernel/kernels/cpu/kernel_cpu.h
M intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h
===================================================================
diff --git a/intern/cycles/device/device_cpu.cpp b/intern/cycles/device/device_cpu.cpp
index be9bb1d..3465a10 100644
--- a/intern/cycles/device/device_cpu.cpp
+++ b/intern/cycles/device/device_cpu.cpp
@@ -144,6 +144,7 @@ public:
KernelFunctions<void(*)(int, int, float**, float**, float**, float**, int*, int, int, float, float)> filter_non_local_means_3_kernel;
KernelFunctions<void(*)(KernelGlobals*, float*, int, int, int, int, float, float*, int*)> filter_old_1_kernel;
KernelFunctions<void(*)(KernelGlobals*, float*, float*, int, int, int, int, int, int, float, float*, int*, int*)> filter_old_2_kernel;
+ KernelFunctions<void(*)(KernelGlobals*, int, int, int, float*, int, int)> filter_divide_combined_kernel;
#define KERNEL_FUNCTIONS(name) \
KERNEL_NAME_EVAL(cpu, name), \
@@ -167,6 +168,7 @@ public:
filter_estimate_wlr_params_kernel(KERNEL_FUNCTIONS(filter_estimate_wlr_params)),
filter_final_pass_wlr_kernel(KERNEL_FUNCTIONS(filter_final_pass_wlr)),
filter_final_pass_nlm_kernel(KERNEL_FUNCTIONS(filter_final_pass_nlm)),
+ filter_divide_combined_kernel(KERNEL_FUNCTIONS(filter_divide_combined)),
filter_non_local_means_3_kernel(KERNEL_FUNCTIONS(filter_non_local_means_3)),
filter_old_1_kernel(KERNEL_FUNCTIONS(filter_old_1)),
filter_old_2_kernel(KERNEL_FUNCTIONS(filter_old_2))
@@ -441,17 +443,6 @@ public:
int w = align_up(rect.z - rect.x, 4), h = (rect.w - rect.y);
int pass_stride = w*h;
- if(use_collaborative_filtering) {
- for(int y = 0; y < filter_area.w; y++) {
- int py = y + filter_area.y;
- for(int x = 0; x < filter_area.z; x++) {
- int px = x + filter_area.x;
- float *p_buffers = buffers + (offset + py*stride + px)*kg->__data.film.pass_stride;
- p_buffers[0] = p_buffers[1] = p_buffers[2] = p_buffers[3] = 0.0f;
- }
- }
- }
-
if(old_filter) {
for(int y = 0; y < filter_area.w; y++) {
for(int x = 0; x < filter_area.z; x++) {
@@ -545,18 +536,11 @@ public:
if(use_collaborative_filtering) {
for(int y = 0; y < filter_area.w; y++) {
- int py = y + filter_area.y;
for(int x = 0; x < filter_area.z; x++) {
- int px = x + filter_area.x;
- float *p_buffers = buffers + (offset + py*stride + px)*kg->__data.film.pass_stride;
- float fac = sample / p_buffers[3];
- p_buffers[0] *= fac;
- p_buffers[1] *= fac;
- p_buffers[2] *= fac;
- p_buffers[3] *= fac;
+ filter_divide_combined_kernel()(kg, x + filter_area.x, y + filter_area.y, sample, buffers, offset, stride);
}
}
- }
+ }
delete[] storage;
}
diff --git a/intern/cycles/kernel/kernel_filter.h b/intern/cycles/kernel/kernel_filter.h
index ea065f9..4f6290f 100644
--- a/intern/cycles/kernel/kernel_filter.h
+++ b/intern/cycles/kernel/kernel_filter.h
@@ -1500,4 +1500,11 @@ ccl_device void kernel_filter_final_pass_nlm(KernelGlobals *kg, int sample, floa
#endif // __KERNEL_CUDA__
+ccl_device void kernel_filter_divide_combined(KernelGlobals *kg, int x, int y, int sample, float *buffers, int offset, int stride)
+{
+ float4 *combined_buffer = (float4*) (buffers + (offset + y*stride + x)*kernel_data.film.pass_stride);
+ float fac = sample / combined_buffer->w;
+ *combined_buffer = *combined_buffer * fac;
+}
+
CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/kernel_filter_pre.h b/intern/cycles/kernel/kernel_filter_pre.h
index 3f8b5b5..93d424f 100644
--- a/intern/cycles/kernel/kernel_filter_pre.h
+++ b/intern/cycles/kernel/kernel_filter_pre.h
@@ -30,7 +30,12 @@ ccl_device void kernel_filter_divide_shadow(KernelGlobals *kg, int sample, float
int xtile = (x < tile_x[1])? 0: ((x < tile_x[2])? 1: 2);
int ytile = (y < tile_y[1])? 0: ((y < tile_y[2])? 1: 2);
int tile = ytile*3+xtile;
- float ccl_readonly_ptr center_buffer = buffers[tile] + (offset[tile] + y*stride[tile] + x)*kernel_data.film.pass_stride + kernel_data.film.pass_denoising;
+ float *center_buffer = buffers[tile] + (offset[tile] + y*stride[tile] + x)*kernel_data.film.pass_stride;
+
+ if(kernel_data.integrator.use_collaborative_filtering) {
+ center_buffer[0] = center_buffer[1] = center_buffer[2] = center_buffer[3] = 0.0f;
+ }
+ center_buffer += kernel_data.film.pass_denoising;
int buffer_w = align_up(rect.z - rect.x, 4);
int idx = (y-rect.y)*buffer_w + (x - rect.x);
diff --git a/intern/cycles/kernel/kernels/cpu/kernel_cpu.h b/intern/cycles/kernel/kernels/cpu/kernel_cpu.h
index 478f2fa..5aaa20e 100644
--- a/intern/cycles/kernel/kernels/cpu/kernel_cpu.h
+++ b/intern/cycles/kernel/kernels/cpu/kernel_cpu.h
@@ -145,6 +145,13 @@ void KERNEL_FUNCTION_FULL_NAME(filter_final_pass_nlm)(KernelGlobals *kg,
int* filter_area,
int* rect);
+void KERNEL_FUNCTION_FULL_NAME(filter_divide_combined)(KernelGlobals *kg,
+ int x, int y,
+ int sample,
+ float *buffers,
+ int offset,
+ int stride);
+
void KERNEL_FUNCTION_FULL_NAME(filter_old_1)(KernelGlobals *kg,
float *denoise_data,
int x, int y,
diff --git a/intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h b/intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h
index cc95a10..c5b9112 100644
--- a/intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h
+++ b/intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h
@@ -326,6 +326,20 @@ void KERNEL_FUNCTION_FULL_NAME(filter_final_pass_nlm)(KernelGlobals *kg,
#endif
}
+void KERNEL_FUNCTION_FULL_NAME(filter_divide_combined)(KernelGlobals *kg,
+ int x, int y,
+ int sample,
+ float *buffers,
+ int offset,
+ int stride)
+{
+#ifdef KERNEL_STUB
+ STUB_ASSERT(KERNEL_ARCH, filter_divide_combined);
+#else
+ kernel_filter_divide_combined(kg, x, y, sample, buffers, offset, stride);
+#endif
+}
+
void KERNEL_FUNCTION_FULL_NAME(filter_old_1)(KernelGlobals *kg,
float *denoise_data,
int x, int y,
More information about the Bf-blender-cvs
mailing list