[Bf-blender-cvs] [aabc71c] soc-2016-cycles_denoising: Cycles: Remove old WLR filter
Lukas Stockner
noreply at git.blender.org
Tue Nov 22 04:25:41 CET 2016
Commit: aabc71c6bbd8ca6e0e3d5fdb831eaa68eecd29de
Author: Lukas Stockner
Date: Tue Nov 22 03:31:03 2016 +0100
Branches: soc-2016-cycles_denoising
https://developer.blender.org/rBaabc71c6bbd8ca6e0e3d5fdb831eaa68eecd29de
Cycles: Remove old WLR filter
===================================================================
M intern/cycles/device/device_cpu.cpp
D intern/cycles/kernel/kernel_filter_old.h
M intern/cycles/kernel/kernels/cpu/kernel_cpu.h
M intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h
===================================================================
diff --git a/intern/cycles/device/device_cpu.cpp b/intern/cycles/device/device_cpu.cpp
index 61c1e75..1d1c201 100644
--- a/intern/cycles/device/device_cpu.cpp
+++ b/intern/cycles/device/device_cpu.cpp
@@ -141,10 +141,8 @@ public:
KernelFunctions<void(*)(KernelGlobals*, int, float*, int, int, void*, int*)> filter_estimate_wlr_params_kernel;
KernelFunctions<void(*)(KernelGlobals*, int, float*, int, int, int, int, float*, void*, float*, int*, int*)> filter_final_pass_wlr_kernel;
KernelFunctions<void(*)(KernelGlobals*, int, float*, int, int, int, int, float*, void*, float*, int*, int*)> filter_final_pass_nlm_kernel;
+ KernelFunctions<void(*)(KernelGlobals*, int, int, int, float*, int, int)> filter_divide_combined_kernel;
KernelFunctions<void(*)(int, int, float**, float**, float**, float**, int*, int, int, float, float)> filter_non_local_means_3_kernel;
- KernelFunctions<void(*)(KernelGlobals*, float*, int, int, int, int, float, float*, int*)> filter_old_1_kernel;
- KernelFunctions<void(*)(KernelGlobals*, float*, float*, int, int, int, int, int, int, float, float*, int*, int*)> filter_old_2_kernel;
- KernelFunctions<void(*)(KernelGlobals*, int, int, int, float*, int, int)> filter_divide_combined_kernel;
#define KERNEL_FUNCTIONS(name) \
KERNEL_NAME_EVAL(cpu, name), \
@@ -169,9 +167,7 @@ public:
filter_final_pass_wlr_kernel(KERNEL_FUNCTIONS(filter_final_pass_wlr)),
filter_final_pass_nlm_kernel(KERNEL_FUNCTIONS(filter_final_pass_nlm)),
filter_divide_combined_kernel(KERNEL_FUNCTIONS(filter_divide_combined)),
- filter_non_local_means_3_kernel(KERNEL_FUNCTIONS(filter_non_local_means_3)),
- filter_old_1_kernel(KERNEL_FUNCTIONS(filter_old_1)),
- filter_old_2_kernel(KERNEL_FUNCTIONS(filter_old_2))
+ filter_non_local_means_3_kernel(KERNEL_FUNCTIONS(filter_non_local_means_3))
{
#ifdef WITH_OSL
kernel_globals.osl = &osl_globals;
@@ -448,7 +444,6 @@ public:
void denoise_run(KernelGlobals *kg, int sample, float *filter_buffer, int4 filter_area, int4 rect, int offset, int stride, float *buffers)
{
- bool old_filter = getenv("OLD_FILTER");
bool only_nlm_filter = getenv("ONLY_NLM_FILTER");
bool use_gradients = kg->__data.integrator.use_gradients;
bool nlm_weights = kg->__data.integrator.use_nlm_weights;
@@ -460,37 +455,7 @@ public:
int w = align_up(rect.z - rect.x, 4), h = (rect.w - rect.y);
int pass_stride = w*h;
- if(old_filter) {
- for(int y = 0; y < filter_area.w; y++) {
- for(int x = 0; x < filter_area.z; x++) {
- filter_old_1_kernel()(kg, filter_buffer, x + filter_area.x, y + filter_area.y, sample, hw, 1.0f, ((float*) (storage + y*filter_area.z + x)), &rect.x);
- }
- }
-#ifdef WITH_CYCLES_DEBUG_FILTER
-#define WRITE_DEBUG(name, var) debug_write_pfm(string_printf("debug_%dx%d_%s.pfm", filter_area.x, filter_area.y, name).c_str(), &storage[0].var, filter_area.z, filter_area.w, sizeof(FilterStorage)/sizeof(float), filter_area.z);
- for(int i = 0; i < DENOISE_FEATURES; i++) {
- WRITE_DEBUG(string_printf("mean_%d", i).c_str(), means[i]);
- WRITE_DEBUG(string_printf("scale_%d", i).c_str(), scales[i]);
- WRITE_DEBUG(string_printf("singular_%d", i).c_str(), singular[i]);
- WRITE_DEBUG(string_printf("bandwidth_%d", i).c_str(), bandwidth[i]);
- }
- WRITE_DEBUG("singular_threshold", singular_threshold);
- WRITE_DEBUG("feature_matrix_norm", feature_matrix_norm);
- WRITE_DEBUG("global_bandwidth", global_bandwidth);
-#endif
- for(int y = 0; y < filter_area.w; y++) {
- for(int x = 0; x < filter_area.z; x++) {
- filter_old_2_kernel()(kg, buffers, filter_buffer, x + filter_area.x, y + filter_area.y, offset, stride, sample, hw, 1.0f, ((float*) (storage + y*filter_area.z + x)), &rect.x, &filter_area.x);
- }
- }
-#ifdef WITH_CYCLES_DEBUG_FILTER
- WRITE_DEBUG("filtered_global_bandwidth", filtered_global_bandwidth);
- WRITE_DEBUG("sum_weight", sum_weight);
- WRITE_DEBUG("log_rmse_per_sample", log_rmse_per_sample);
-#undef WRITE_DEBUG
-#endif
- }
- else if(only_nlm_filter) {
+ if(only_nlm_filter) {
float *img[3] = {filter_buffer + 16*pass_stride, filter_buffer + 18*pass_stride, filter_buffer + 20*pass_stride};
float *var[3] = {filter_buffer + 17*pass_stride, filter_buffer + 19*pass_stride, filter_buffer + 21*pass_stride};
float *out[3] = {filter_buffer + 0*pass_stride, filter_buffer + 1*pass_stride, filter_buffer + 2*pass_stride};
diff --git a/intern/cycles/kernel/kernel_filter_old.h b/intern/cycles/kernel/kernel_filter_old.h
deleted file mode 100644
index fee11c9..0000000
--- a/intern/cycles/kernel/kernel_filter_old.h
+++ /dev/null
@@ -1,658 +0,0 @@
-/*
- * Copyright 2011-2013 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-CCL_NAMESPACE_BEGIN
-
-#define Buf_F(px, py, o) denoise_data[(py - rect.y)*denoise_stride + (px - rect.x) + pass_stride*(o)]//(buffers[((y) * w + (x)) * kernel_data.film.pass_stride + (o)])
-#define Buf_F3(px, py, o) make_float3(denoise_data[(py - rect.y)*denoise_stride + (px - rect.x) + pass_stride*(o)], denoise_data[(py - rect.y)*denoise_stride + (px - rect.x) + pass_stride*((o)+2)], denoise_data[(py - rect.y)*denoise_stride + (px - rect.x) + pass_stride*((o)+4)])//(buffers + ((y) * w + (x)) * kernel_data.film.pass_stride + (o)))
-//#define Buf_F4(x, y, o) *((float4*) (buffers + ((y) * w + (x)) * kernel_data.film.pass_stride + (o)))
-
-ccl_device float3 saturate(float3 a)
-{
- return make_float3(saturate(a.x), saturate(a.y), saturate(a.z));
-}
-
-ccl_device void cholesky(float *A, int n, float *L)
-{
- for (int i = 0; i < n; ++i) {
- for (int j = 0; j <= i; ++j) {
- float s = 0.0f;
- for (int k = 0; k < j; ++k) {
- s += L[i * n + k] * L[j * n + k];
- }
- L[i * n + j] = (i == j) ? sqrtf(A[i * n + i] - s) : (1.0f / L[j * n + j] * (A[j * n + i] - s));
- }
- }
-}
-
-ccl_device int old_svd(float *A, float *V, float *S2, int n)
-{
- int i, j, k, EstColRank = n, RotCount = n, SweepCount = 0;
- int slimit = 8;
- float eps = 1e-8f;
- float e2 = 10.f * n * eps * eps;
- float tol = 0.1f * eps;
- float vt, p, x0, y0, q, r, c0, s0, d1, d2;
-
- for(int r = 0; r < n; r++)
- for(int c = 0; c < n; c++)
- V[r*n+c] = (c == r)? 1.0f: 0.0f;
-
- while (RotCount != 0 && SweepCount++ <= slimit) {
- RotCount = EstColRank * (EstColRank - 1) / 2;
-
- for (j = 0; j < EstColRank-1; ++j) {
- for (k = j+1; k < EstColRank; ++k) {
- p = q = r = 0.0;
-
- for (i = 0; i < n; ++i) {
- x0 = A[i * n + j];
- y0 = A[i * n + k];
- p += x0 * y0;
- q += x0 * x0;
- r += y0 * y0;
- }
-
- S2[j] = q;
- S2[k] = r;
-
- if (q >= r) {
- if (q <= e2 * S2[0] || fabsf(p) <= tol * q) {
- RotCount--;
- }
- else {
- p /= q;
- r = 1.f - r/q;
- vt = sqrtf(4.0f * p * p + r * r);
- c0 = sqrtf(0.5f * (1.f + r / vt));
- s0 = p / (vt*c0);
-
- // Rotation
- for (i = 0; i < n; ++i) {
- d1 = A[i * n + j];
- d2 = A[i * n + k];
- A[i * n + j] = d1*c0+d2*s0;
- A[i * n + k] = -d1*s0+d2*c0;
- }
- for (i = 0; i < n; ++i) {
- d1 = V[i * n + j];
- d2 = V[i * n + k];
- V[i * n + j] = d1 * c0 + d2 * s0;
- V[i * n + k] = -d1 * s0 + d2 * c0;
- }
- }
- } else {
- p /= r;
- q = q / r - 1.f;
- vt = sqrtf(4.f * p * p + q * q);
- s0 = sqrtf(0.5f * (1.f - q / vt));
- if (p < 0.f)
- s0 = -s0;
- c0 = p / (vt * s0);
-
- // Rotation
- for (i = 0; i < n; ++i) {
- d1 = A[i * n + j];
- d2 = A[i * n + k];
- A[i * n + j] = d1 * c0 + d2 * s0;
- A[i * n + k] = -d1 * s0 + d2 * c0;
- }
- for (i = 0; i < n; ++i) {
- d1 = V[i * n + j];
- d2 = V[i * n + k];
- V[i * n + j] = d1 * c0 + d2 * s0;
- V[i * n + k] = -d1 * s0 + d2 * c0;
- }
- }
- }
- }
- while (EstColRank >= 3 && S2[EstColRank-1] <= S2[0] * tol + tol * tol)
- EstColRank--;
- }
- return EstColRank;
-}
-
-ccl_device void kernel_filter1_pixel(KernelGlobals *kg, float ccl_readonly_ptr denoise_data, int x, int y, int samples, int halfWindow, float bandwidthFactor, float* storage2, int4 rect)
-{
- FilterStorage *storage = (FilterStorage*) storage2;
- int2 lo = make_int2(max(x - halfWindow, rect.x), max(y - halfWindow, rect.y));
- int2 hi = make_int2(min(x + halfWindow, rect.z-1), min(y + halfWindow, rect.w-1));
- int num = (hi.x - lo.x + 1) * (hi.y - lo.y + 1);
- int denoise_stride = align_up(rect.z-rect.x, 4);
- int pass_stride = (rect.w-rect.y)*denoise_stride;
-
- float3 meanT = make_float3(0.0f, 0.0f, 0.0f);
- float3 meanN = make_float3(0.0f, 0.0f, 0.0f);
- float meanD = 0.0f, meanS = 0.0f;
-
- for(int py = lo.y; py <= hi.y; py++) {
- for(int px = lo.x; px <= hi.x; px++) {
- meanD += Buf_F (px, py, 6);
- meanN += Buf_F3(px, py, 0);
- meanT += Buf_F3(px, py, 10);
- meanS += Buf_F (px, py, 8);
- }
- }
- meanT /= num;
- meanN /= num;
- meanD /= num;
- meanS /= num;
- float delta[11], transform[121], norm;
- int rank;
- /* Generate transform */
- {
- float nD = 0.0f, nT = 0.0f, nN = 0.0f, nS = 0.0f;
- for(int py = lo.y; py <= hi.y; py++) {
- for(int px = lo.x; px <= hi.x; px++) {
- nD = max(fabsf(Buf_F(px, py, 6) - meanD), nD);
- nN = max(len_squared(Buf_F3(px, py, 0) - meanN), nN);
- nT = max(len_squared(Buf_F3(px, py, 10) - meanT), nT);
- nS = max(fabsf(Buf_F(px, py, 8) - meanS), nS);
- }
- }
-
- nD = 1.0f / max(nD, 0.01f);
- nN = 1.0f / max(sqrtf(nN), 0.01f);
- nT = 1.0f / max(sqrtf(nT), 0.01f);
- nS = 1.0f / max(nS, 0.01f);
-
- norm = 0.0f;
- for(int i = 0; i
@@ Diff output truncated at 10240 characters. @@
More information about the Bf-blender-cvs
mailing list