[Bf-blender-cvs] [42bd063] soc-2016-cycles_denoising: Cycles: Temporarily add old kernel for comparison
Lukas Stockner
noreply at git.blender.org
Sat Sep 10 00:20:42 CEST 2016
Commit: 42bd06306323df702bf108c1b1a7529086ba28f6
Author: Lukas Stockner
Date: Sat Sep 3 17:09:26 2016 +0200
Branches: soc-2016-cycles_denoising
https://developer.blender.org/rB42bd06306323df702bf108c1b1a7529086ba28f6
Cycles: Temporarily add old kernel for comparison
===================================================================
M intern/cycles/device/device_cpu.cpp
M intern/cycles/kernel/kernel_filter.h
A intern/cycles/kernel/kernel_filter_old.h
M intern/cycles/kernel/kernels/cpu/kernel_cpu.h
M intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h
===================================================================
diff --git a/intern/cycles/device/device_cpu.cpp b/intern/cycles/device/device_cpu.cpp
index 7b20bfc..5dab9bb 100644
--- a/intern/cycles/device/device_cpu.cpp
+++ b/intern/cycles/device/device_cpu.cpp
@@ -419,11 +419,15 @@ public:
{
void(*filter_estimate_params_kernel)(KernelGlobals*, int, float*, int, int, void*, int4);
void(*filter_final_pass_kernel)(KernelGlobals*, int, float*, int, int, int, int, float*, void*, int4, int4);
+ void(*filter_old_1)(KernelGlobals*, float*, int, int, int, int, float, float*, int4);
+ void(*filter_old_2)(KernelGlobals*, float*, float*, int, int, int, int, int, int, float, float*, int4, int4);
#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_AVX2
if(system_cpu_support_avx2()) {
filter_estimate_params_kernel = kernel_cpu_avx2_filter_estimate_params;
filter_final_pass_kernel = kernel_cpu_avx2_filter_final_pass;
+ filter_old_1 = kernel_cpu_avx2_filter_old_1;
+ filter_old_2 = kernel_cpu_avx2_filter_old_2;
}
else
#endif
@@ -431,6 +435,8 @@ public:
if(system_cpu_support_avx()) {
filter_estimate_params_kernel = kernel_cpu_avx_filter_estimate_params;
filter_final_pass_kernel = kernel_cpu_avx_filter_final_pass;
+ filter_old_1 = kernel_cpu_avx_filter_old_1;
+ filter_old_2 = kernel_cpu_avx_filter_old_2;
}
else
#endif
@@ -438,6 +444,8 @@ public:
if(system_cpu_support_sse41()) {
filter_estimate_params_kernel = kernel_cpu_sse41_filter_estimate_params;
filter_final_pass_kernel = kernel_cpu_sse41_filter_final_pass;
+ filter_old_1 = kernel_cpu_sse41_filter_old_1;
+ filter_old_2 = kernel_cpu_sse41_filter_old_2;
}
else
#endif
@@ -445,6 +453,8 @@ public:
if(system_cpu_support_sse3()) {
filter_estimate_params_kernel = kernel_cpu_sse3_filter_estimate_params;
filter_final_pass_kernel = kernel_cpu_sse3_filter_final_pass;
+ filter_old_1 = kernel_cpu_sse3_filter_old_1;
+ filter_old_2 = kernel_cpu_sse3_filter_old_2;
}
else
#endif
@@ -452,46 +462,83 @@ public:
if(system_cpu_support_sse2()) {
filter_estimate_params_kernel = kernel_cpu_sse2_filter_estimate_params;
filter_final_pass_kernel = kernel_cpu_sse2_filter_final_pass;
+ filter_old_1 = kernel_cpu_sse2_filter_old_1;
+ filter_old_2 = kernel_cpu_sse2_filter_old_2;
}
else
#endif
{
filter_estimate_params_kernel = kernel_cpu_filter_estimate_params;
filter_final_pass_kernel = kernel_cpu_filter_final_pass;
+ filter_old_1 = kernel_cpu_filter_old_1;
+ filter_old_2 = kernel_cpu_filter_old_2;
}
- FilterStorage *storages = new FilterStorage[filter_area.z*filter_area.w];
+ bool old_filter = getenv("OLD_FILTER");
- for(int y = 0; y < filter_area.w; y++) {
- for(int x = 0; x < filter_area.z; x++) {
- filter_estimate_params_kernel(kg, sample, filter_buffer, x + filter_area.x, y + filter_area.y, storages + y*filter_area.z + x, rect);
- }
- }
+ FilterStorage *storage = new FilterStorage[filter_area.z*filter_area.w];
+ int hw = kg->__data.integrator.half_window;
+ if(old_filter) {
+ for(int y = 0; y < filter_area.w; y++) {
+ for(int x = 0; x < filter_area.z; x++) {
+ filter_old_1(kg, filter_buffer, x + filter_area.x, y + filter_area.y, sample, hw, 1.0f, ((float*) (storage + y*filter_area.z + x)), rect);
+ }
+ }
#ifdef WITH_CYCLES_DEBUG_FILTER
-#define WRITE_DEBUG(name, var) debug_write_pfm(string_printf("debug_%dx%d_%s.pfm", filter_area.x, filter_area.y, name).c_str(), &storages[0].var, filter_area.z, filter_area.w, sizeof(FilterStorage)/sizeof(float), filter_area.z);
- for(int i = 0; i < DENOISE_FEATURES; i++) {
- WRITE_DEBUG(string_printf("mean_%d", i).c_str(), means[i]);
- WRITE_DEBUG(string_printf("scale_%d", i).c_str(), scales[i]);
- WRITE_DEBUG(string_printf("singular_%d", i).c_str(), singular[i]);
- WRITE_DEBUG(string_printf("bandwidth_%d", i).c_str(), bandwidth[i]);
- }
- WRITE_DEBUG("singular_threshold", singular_threshold);
- WRITE_DEBUG("feature_matrix_norm", feature_matrix_norm);
- WRITE_DEBUG("global_bandwidth", global_bandwidth);
-#endif
- for(int y = 0; y < filter_area.w; y++) {
- for(int x = 0; x < filter_area.z; x++) {
- filter_final_pass_kernel(kg, sample, filter_buffer, x + filter_area.x, y + filter_area.y, offset, stride, buffers, storages + y*filter_area.z + x, filter_area, rect);
+#define WRITE_DEBUG(name, var) debug_write_pfm(string_printf("debug_%dx%d_%s.pfm", filter_area.x, filter_area.y, name).c_str(), &storage[0].var, filter_area.z, filter_area.w, sizeof(FilterStorage)/sizeof(float), filter_area.z);
+ for(int i = 0; i < DENOISE_FEATURES; i++) {
+ WRITE_DEBUG(string_printf("mean_%d", i).c_str(), means[i]);
+ WRITE_DEBUG(string_printf("scale_%d", i).c_str(), scales[i]);
+ WRITE_DEBUG(string_printf("singular_%d", i).c_str(), singular[i]);
+ WRITE_DEBUG(string_printf("bandwidth_%d", i).c_str(), bandwidth[i]);
+ }
+ WRITE_DEBUG("singular_threshold", singular_threshold);
+ WRITE_DEBUG("feature_matrix_norm", feature_matrix_norm);
+ WRITE_DEBUG("global_bandwidth", global_bandwidth);
+#endif
+ for(int y = 0; y < filter_area.w; y++) {
+ for(int x = 0; x < filter_area.z; x++) {
+ filter_old_2(kg, buffers, filter_buffer, x + filter_area.x, y + filter_area.y, offset, stride, sample, hw, 1.0f, ((float*) (storage + y*filter_area.z + x)), rect, filter_area);
+ }
}
- }
#ifdef WITH_CYCLES_DEBUG_FILTER
- WRITE_DEBUG("filtered_global_bandwidth", filtered_global_bandwidth);
- WRITE_DEBUG("sum_weight", sum_weight);
- WRITE_DEBUG("log_rmse_per_sample", log_rmse_per_sample);
+ WRITE_DEBUG("filtered_global_bandwidth", filtered_global_bandwidth);
+ WRITE_DEBUG("sum_weight", sum_weight);
+ WRITE_DEBUG("log_rmse_per_sample", log_rmse_per_sample);
#undef WRITE_DEBUG
#endif
- delete[] storages;
+ } else {
+ for(int y = 0; y < filter_area.w; y++) {
+ for(int x = 0; x < filter_area.z; x++) {
+ filter_estimate_params_kernel(kg, sample, filter_buffer, x + filter_area.x, y + filter_area.y, storage + y*filter_area.z + x, rect);
+ }
+ }
+#ifdef WITH_CYCLES_DEBUG_FILTER
+#define WRITE_DEBUG(name, var) debug_write_pfm(string_printf("debug_%dx%d_%s.pfm", filter_area.x, filter_area.y, name).c_str(), &storage[0].var, filter_area.z, filter_area.w, sizeof(FilterStorage)/sizeof(float), filter_area.z);
+ for(int i = 0; i < DENOISE_FEATURES; i++) {
+ WRITE_DEBUG(string_printf("mean_%d", i).c_str(), means[i]);
+ WRITE_DEBUG(string_printf("scale_%d", i).c_str(), scales[i]);
+ WRITE_DEBUG(string_printf("singular_%d", i).c_str(), singular[i]);
+ WRITE_DEBUG(string_printf("bandwidth_%d", i).c_str(), bandwidth[i]);
+ }
+ WRITE_DEBUG("singular_threshold", singular_threshold);
+ WRITE_DEBUG("feature_matrix_norm", feature_matrix_norm);
+ WRITE_DEBUG("global_bandwidth", global_bandwidth);
+#endif
+ for(int y = 0; y < filter_area.w; y++) {
+ for(int x = 0; x < filter_area.z; x++) {
+ filter_final_pass_kernel(kg, sample, filter_buffer, x + filter_area.x, y + filter_area.y, offset, stride, buffers, storage + y*filter_area.z + x, filter_area, rect);
+ }
+ }
+#ifdef WITH_CYCLES_DEBUG_FILTER
+ WRITE_DEBUG("filtered_global_bandwidth", filtered_global_bandwidth);
+ WRITE_DEBUG("sum_weight", sum_weight);
+ WRITE_DEBUG("log_rmse_per_sample", log_rmse_per_sample);
+#undef WRITE_DEBUG
+#endif
+ }
+ free(storage);
}
void thread_render(DeviceTask& task)
diff --git a/intern/cycles/kernel/kernel_filter.h b/intern/cycles/kernel/kernel_filter.h
index c32541a..e4031f6 100644
--- a/intern/cycles/kernel/kernel_filter.h
+++ b/intern/cycles/kernel/kernel_filter.h
@@ -17,6 +17,8 @@
#include "kernel_filter_pre.h"
#include "kernel_filter_util.h"
+#include "kernel_filter_old.h"
+
CCL_NAMESPACE_BEGIN
/* Not all features are included in the matrix norm. */
diff --git a/intern/cycles/kernel/kernel_filter_old.h b/intern/cycles/kernel/kernel_filter_old.h
new file mode 100644
index 0000000..8eb3370
--- /dev/null
+++ b/intern/cycles/kernel/kernel_filter_old.h
@@ -0,0 +1,659 @@
+/*
+ * Copyright 2011-2013 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+CCL_NAMESPACE_BEGIN
+
+#define Buf_F(px, py, o) denoise_data[(py - rect.y)*denoise_stride + (px - rect.x) + pass_stride*(o)]//(buffers[((y) * w + (x)) * kernel_data.film.pass_stride + (o)])
+#define Buf_F3(px, py, o) make_float3(denoise_data[(py - rect.y)*denoise_stride + (px - rect.x) + pass_stride*(o)], denoise_data[(py - rect.y)*denoise_stride + (px - rect.x) + pass_stride*((o)+2)], denoise_data[(py - rect.y)*denoise_stride + (px - rect.x) + pass_stride*((o)+4)])//(buffers + ((y) * w + (x)) * kernel_data.film.pass_stride + (o)))
+//#define Buf_F4(x, y, o) *((float4*) (buffers + ((y) * w + (x)) * kernel_data.film.pass_stride + (o)))
+
+ccl_device float3 saturate(float3 a)
+{
+ return make_float3(saturate(a.x), saturate(a.y), saturate(a.z));
+}
+
+ccl_device void cholesky(float *A, int n, float *L)
+{
+ for (int i = 0; i < n; ++i) {
+ for (int j = 0; j <= i; ++j) {
+ float s = 0.0f;
+ for (int k = 0; k < j; ++k) {
+ s += L[i * n + k] * L[j * n + k];
+ }
+ L[i * n + j] = (i == j) ? sqrtf(A[i * n + i] - s) : (1.0f / L[j * n + j] * (A[j * n + i] - s));
+ }
+ }
+}
+
+ccl_device int old_svd(float *A, float *V, float *S2, int n)
+{
+ int i, j, k, EstColRank = n, RotCount = n, SweepCount = 0;
+ int slimit = 8;
+ float eps = 1e-8f;
+ float e2 = 10.f * n * eps * eps;
+ float tol = 0.1f * eps;
+ float vt, p, x0, y0, q, r, c0, s0, d1, d2;
+
+ for(int r = 0; r < n; r++)
+ for(int c = 0; c < n; c++)
+ V[r*n+c] = (c == r)? 1.0f: 0.0f;
+
+ while (RotCount != 0 && SweepCount++ <= slimit) {
+ RotCount = EstColRank * (EstColRank - 1) / 2;
+
+ for (j = 0; j < EstColRank-1; ++j) {
+ fo
@@ Diff output truncated at 10240 characters. @@
More information about the Bf-blender-cvs
mailing list