[Bf-blender-cvs] [42bd063] soc-2016-cycles_denoising: Cycles: Temporarily add old kernel for comparison

Lukas Stockner noreply at git.blender.org
Sat Sep 10 00:20:42 CEST 2016


Commit: 42bd06306323df702bf108c1b1a7529086ba28f6
Author: Lukas Stockner
Date:   Sat Sep 3 17:09:26 2016 +0200
Branches: soc-2016-cycles_denoising
https://developer.blender.org/rB42bd06306323df702bf108c1b1a7529086ba28f6

Cycles: Temporarily add old kernel for comparison

===================================================================

M	intern/cycles/device/device_cpu.cpp
M	intern/cycles/kernel/kernel_filter.h
A	intern/cycles/kernel/kernel_filter_old.h
M	intern/cycles/kernel/kernels/cpu/kernel_cpu.h
M	intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h

===================================================================

diff --git a/intern/cycles/device/device_cpu.cpp b/intern/cycles/device/device_cpu.cpp
index 7b20bfc..5dab9bb 100644
--- a/intern/cycles/device/device_cpu.cpp
+++ b/intern/cycles/device/device_cpu.cpp
@@ -419,11 +419,15 @@ public:
 	{
 		void(*filter_estimate_params_kernel)(KernelGlobals*, int, float*, int, int, void*, int4);
 		void(*filter_final_pass_kernel)(KernelGlobals*, int, float*, int, int, int, int, float*, void*, int4, int4);
+		void(*filter_old_1)(KernelGlobals*, float*, int, int, int, int, float, float*, int4);
+		void(*filter_old_2)(KernelGlobals*, float*, float*, int, int, int, int, int, int, float, float*, int4, int4);
 
 #ifdef WITH_CYCLES_OPTIMIZED_KERNEL_AVX2
 		if(system_cpu_support_avx2()) {
 			filter_estimate_params_kernel = kernel_cpu_avx2_filter_estimate_params;
 			filter_final_pass_kernel = kernel_cpu_avx2_filter_final_pass;
+			filter_old_1 = kernel_cpu_avx2_filter_old_1;
+			filter_old_2 = kernel_cpu_avx2_filter_old_2;
 		}
 		else
 #endif
@@ -431,6 +435,8 @@ public:
 		if(system_cpu_support_avx()) {
 			filter_estimate_params_kernel = kernel_cpu_avx_filter_estimate_params;
 			filter_final_pass_kernel = kernel_cpu_avx_filter_final_pass;
+			filter_old_1 = kernel_cpu_avx_filter_old_1;
+			filter_old_2 = kernel_cpu_avx_filter_old_2;
 		}
 		else
 #endif
@@ -438,6 +444,8 @@ public:
 		if(system_cpu_support_sse41()) {
 			filter_estimate_params_kernel = kernel_cpu_sse41_filter_estimate_params;
 			filter_final_pass_kernel = kernel_cpu_sse41_filter_final_pass;
+			filter_old_1 = kernel_cpu_sse41_filter_old_1;
+			filter_old_2 = kernel_cpu_sse41_filter_old_2;
 		}
 		else
 #endif
@@ -445,6 +453,8 @@ public:
 		if(system_cpu_support_sse3()) {
 			filter_estimate_params_kernel = kernel_cpu_sse3_filter_estimate_params;
 			filter_final_pass_kernel = kernel_cpu_sse3_filter_final_pass;
+			filter_old_1 = kernel_cpu_sse3_filter_old_1;
+			filter_old_2 = kernel_cpu_sse3_filter_old_2;
 		}
 		else
 #endif
@@ -452,46 +462,83 @@ public:
 		if(system_cpu_support_sse2()) {
 			filter_estimate_params_kernel = kernel_cpu_sse2_filter_estimate_params;
 			filter_final_pass_kernel = kernel_cpu_sse2_filter_final_pass;
+			filter_old_1 = kernel_cpu_sse2_filter_old_1;
+			filter_old_2 = kernel_cpu_sse2_filter_old_2;
 		}
 		else
 #endif
 		{
 			filter_estimate_params_kernel = kernel_cpu_filter_estimate_params;
 			filter_final_pass_kernel = kernel_cpu_filter_final_pass;
+			filter_old_1 = kernel_cpu_filter_old_1;
+			filter_old_2 = kernel_cpu_filter_old_2;
 		}
 
-		FilterStorage *storages = new FilterStorage[filter_area.z*filter_area.w];
+		bool old_filter = getenv("OLD_FILTER");
 
-		for(int y = 0; y < filter_area.w; y++) {
-			for(int x = 0; x < filter_area.z; x++) {
-				filter_estimate_params_kernel(kg, sample, filter_buffer, x + filter_area.x, y + filter_area.y, storages + y*filter_area.z + x, rect);
-			}
-		}
+		FilterStorage *storage = new FilterStorage[filter_area.z*filter_area.w];
+		int hw = kg->__data.integrator.half_window;
 
+		if(old_filter) {
+			for(int y = 0; y < filter_area.w; y++) {
+				for(int x = 0; x < filter_area.z; x++) {
+					filter_old_1(kg, filter_buffer, x + filter_area.x, y + filter_area.y, sample, hw, 1.0f, ((float*) (storage + y*filter_area.z + x)), rect);
+				}
+			}
 #ifdef WITH_CYCLES_DEBUG_FILTER
-#define WRITE_DEBUG(name, var) debug_write_pfm(string_printf("debug_%dx%d_%s.pfm", filter_area.x, filter_area.y, name).c_str(), &storages[0].var, filter_area.z, filter_area.w, sizeof(FilterStorage)/sizeof(float), filter_area.z);
-		for(int i = 0; i < DENOISE_FEATURES; i++) {
-			WRITE_DEBUG(string_printf("mean_%d", i).c_str(), means[i]);
-			WRITE_DEBUG(string_printf("scale_%d", i).c_str(), scales[i]);
-			WRITE_DEBUG(string_printf("singular_%d", i).c_str(), singular[i]);
-			WRITE_DEBUG(string_printf("bandwidth_%d", i).c_str(), bandwidth[i]);
-		}
-		WRITE_DEBUG("singular_threshold", singular_threshold);
-		WRITE_DEBUG("feature_matrix_norm", feature_matrix_norm);
-		WRITE_DEBUG("global_bandwidth", global_bandwidth);
-#endif
-		for(int y = 0; y < filter_area.w; y++) {
-			for(int x = 0; x < filter_area.z; x++) {
-				filter_final_pass_kernel(kg, sample, filter_buffer, x + filter_area.x, y + filter_area.y, offset, stride, buffers, storages + y*filter_area.z + x, filter_area, rect);
+#define WRITE_DEBUG(name, var) debug_write_pfm(string_printf("debug_%dx%d_%s.pfm", filter_area.x, filter_area.y, name).c_str(), &storage[0].var, filter_area.z, filter_area.w, sizeof(FilterStorage)/sizeof(float), filter_area.z);
+			for(int i = 0; i < DENOISE_FEATURES; i++) {
+				WRITE_DEBUG(string_printf("mean_%d", i).c_str(), means[i]);
+				WRITE_DEBUG(string_printf("scale_%d", i).c_str(), scales[i]);
+				WRITE_DEBUG(string_printf("singular_%d", i).c_str(), singular[i]);
+				WRITE_DEBUG(string_printf("bandwidth_%d", i).c_str(), bandwidth[i]);
+			}
+			WRITE_DEBUG("singular_threshold", singular_threshold);
+			WRITE_DEBUG("feature_matrix_norm", feature_matrix_norm);
+			WRITE_DEBUG("global_bandwidth", global_bandwidth);
+#endif
+			for(int y = 0; y < filter_area.w; y++) {
+				for(int x = 0; x < filter_area.z; x++) {
+					filter_old_2(kg, buffers, filter_buffer, x + filter_area.x, y + filter_area.y, offset, stride, sample, hw, 1.0f, ((float*) (storage + y*filter_area.z + x)), rect, filter_area);
+				}
 			}
-		}
 #ifdef WITH_CYCLES_DEBUG_FILTER
-		WRITE_DEBUG("filtered_global_bandwidth", filtered_global_bandwidth);
-		WRITE_DEBUG("sum_weight", sum_weight);
-		WRITE_DEBUG("log_rmse_per_sample", log_rmse_per_sample);
+			WRITE_DEBUG("filtered_global_bandwidth", filtered_global_bandwidth);
+			WRITE_DEBUG("sum_weight", sum_weight);
+			WRITE_DEBUG("log_rmse_per_sample", log_rmse_per_sample);
 #undef WRITE_DEBUG
 #endif
-		delete[] storages;
+		} else {
+			for(int y = 0; y < filter_area.w; y++) {
+				for(int x = 0; x < filter_area.z; x++) {
+					filter_estimate_params_kernel(kg, sample, filter_buffer, x + filter_area.x, y + filter_area.y, storage + y*filter_area.z + x, rect);
+				}
+			}
+#ifdef WITH_CYCLES_DEBUG_FILTER
+#define WRITE_DEBUG(name, var) debug_write_pfm(string_printf("debug_%dx%d_%s.pfm", filter_area.x, filter_area.y, name).c_str(), &storage[0].var, filter_area.z, filter_area.w, sizeof(FilterStorage)/sizeof(float), filter_area.z);
+			for(int i = 0; i < DENOISE_FEATURES; i++) {
+				WRITE_DEBUG(string_printf("mean_%d", i).c_str(), means[i]);
+				WRITE_DEBUG(string_printf("scale_%d", i).c_str(), scales[i]);
+				WRITE_DEBUG(string_printf("singular_%d", i).c_str(), singular[i]);
+				WRITE_DEBUG(string_printf("bandwidth_%d", i).c_str(), bandwidth[i]);
+			}
+			WRITE_DEBUG("singular_threshold", singular_threshold);
+			WRITE_DEBUG("feature_matrix_norm", feature_matrix_norm);
+			WRITE_DEBUG("global_bandwidth", global_bandwidth);
+#endif
+			for(int y = 0; y < filter_area.w; y++) {
+				for(int x = 0; x < filter_area.z; x++) {
+					filter_final_pass_kernel(kg, sample, filter_buffer, x + filter_area.x, y + filter_area.y, offset, stride, buffers, storage + y*filter_area.z + x, filter_area, rect);
+				}
+			}
+#ifdef WITH_CYCLES_DEBUG_FILTER
+			WRITE_DEBUG("filtered_global_bandwidth", filtered_global_bandwidth);
+			WRITE_DEBUG("sum_weight", sum_weight);
+			WRITE_DEBUG("log_rmse_per_sample", log_rmse_per_sample);
+#undef WRITE_DEBUG
+#endif
+		}
+		free(storage);
 	}
 
 	void thread_render(DeviceTask& task)
diff --git a/intern/cycles/kernel/kernel_filter.h b/intern/cycles/kernel/kernel_filter.h
index c32541a..e4031f6 100644
--- a/intern/cycles/kernel/kernel_filter.h
+++ b/intern/cycles/kernel/kernel_filter.h
@@ -17,6 +17,8 @@
 #include "kernel_filter_pre.h"
 #include "kernel_filter_util.h"
 
+#include "kernel_filter_old.h"
+
 CCL_NAMESPACE_BEGIN
 
 /* Not all features are included in the matrix norm. */
diff --git a/intern/cycles/kernel/kernel_filter_old.h b/intern/cycles/kernel/kernel_filter_old.h
new file mode 100644
index 0000000..8eb3370
--- /dev/null
+++ b/intern/cycles/kernel/kernel_filter_old.h
@@ -0,0 +1,659 @@
+/*
+ * Copyright 2011-2013 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+CCL_NAMESPACE_BEGIN
+
+#define Buf_F(px, py, o) denoise_data[(py - rect.y)*denoise_stride + (px - rect.x) + pass_stride*(o)]//(buffers[((y) * w + (x)) * kernel_data.film.pass_stride + (o)])
+#define Buf_F3(px, py, o) make_float3(denoise_data[(py - rect.y)*denoise_stride + (px - rect.x) + pass_stride*(o)], denoise_data[(py - rect.y)*denoise_stride + (px - rect.x) + pass_stride*((o)+2)], denoise_data[(py - rect.y)*denoise_stride + (px - rect.x) + pass_stride*((o)+4)])//(buffers + ((y) * w + (x)) * kernel_data.film.pass_stride + (o)))
+//#define Buf_F4(x, y, o) *((float4*) (buffers + ((y) * w + (x)) * kernel_data.film.pass_stride + (o)))
+
+ccl_device float3 saturate(float3 a)
+{
+	return make_float3(saturate(a.x), saturate(a.y), saturate(a.z));
+}
+
+ccl_device void cholesky(float *A, int n, float *L)
+{
+	for (int i = 0; i < n; ++i) {
+		for (int j = 0; j <= i; ++j) {
+			float s = 0.0f;
+			for (int k = 0; k < j; ++k) {
+				s += L[i * n + k] * L[j * n + k];
+			}
+			L[i * n + j] = (i == j) ? sqrtf(A[i * n + i] - s) : (1.0f / L[j * n + j] * (A[j * n + i] - s));
+		}
+	}
+}
+
+ccl_device int old_svd(float *A, float *V, float *S2, int n)
+{
+	int  i, j, k, EstColRank = n, RotCount = n, SweepCount = 0;
+	int slimit = 8;
+	float eps = 1e-8f;
+	float e2 = 10.f * n * eps * eps;
+	float tol = 0.1f * eps;
+	float vt, p, x0, y0, q, r, c0, s0, d1, d2;
+
+	for(int r = 0; r < n; r++)
+		for(int c = 0; c < n; c++)
+			V[r*n+c] = (c == r)? 1.0f: 0.0f;
+
+	while (RotCount != 0 && SweepCount++ <= slimit) {
+		RotCount = EstColRank * (EstColRank - 1) / 2;
+
+		for (j = 0; j < EstColRank-1; ++j) {
+			fo

@@ Diff output truncated at 10240 characters. @@




More information about the Bf-blender-cvs mailing list