[Bf-blender-cvs] [005d83a] soc-2016-cycles_denoising: Cycles: Add additional debugging output containing the estimated rMSE reduction per sample
Lukas Stockner
noreply at git.blender.org
Fri Jul 8 04:31:37 CEST 2016
Commit: 005d83a0bb076d280837f16f9bc435c4d6b4b1de
Author: Lukas Stockner
Date: Thu Jul 7 20:12:35 2016 +0200
Branches: soc-2016-cycles_denoising
https://developer.blender.org/rB005d83a0bb076d280837f16f9bc435c4d6b4b1de
Cycles: Add additional debugging output containing the estimated rMSE reduction per sample
===================================================================
M intern/cycles/device/device_cpu.cpp
M intern/cycles/device/device_cuda.cpp
M intern/cycles/kernel/kernel_filter.h
M intern/cycles/kernel/kernel_passes.h
M intern/cycles/kernel/kernel_types.h
M intern/cycles/util/util_math_matrix.h
===================================================================
diff --git a/intern/cycles/device/device_cpu.cpp b/intern/cycles/device/device_cpu.cpp
index c296c17..1f7e750 100644
--- a/intern/cycles/device/device_cpu.cpp
+++ b/intern/cycles/device/device_cpu.cpp
@@ -345,17 +345,21 @@ public:
}
}
#ifdef WITH_CYCLES_DEBUG_FILTER
+#define WRITE_DEBUG(name, var) debug_write_pfm(string_printf("debug_%dx%d_%s.pfm", tile.x, tile.y, name).c_str(), &storages[0].var, tile.w, tile.h, sizeof(FilterStorage)/sizeof(float), tile.w);
for(int i = 0; i < DENOISE_FEATURES; i++) {
- debug_write_pfm(string_printf("debug_%dx%d_mean_%d.pfm", tile.x, tile.y, i).c_str(), &storages[0].means[i], tile.w, tile.h, sizeof(FilterStorage)/sizeof(float), tile.w);
- debug_write_pfm(string_printf("debug_%dx%d_scale_%d.pfm", tile.x, tile.y, i).c_str(), &storages[0].scales[i], tile.w, tile.h, sizeof(FilterStorage)/sizeof(float), tile.w);
- debug_write_pfm(string_printf("debug_%dx%d_singular_%d.pfm", tile.x, tile.y, i).c_str(), &storages[0].singular[i], tile.w, tile.h, sizeof(FilterStorage)/sizeof(float), tile.w);
- debug_write_pfm(string_printf("debug_%dx%d_bandwidth_%d.pfm", tile.x, tile.y, i).c_str(), &storages[0].bandwidth[i], tile.w, tile.h, sizeof(FilterStorage)/sizeof(float), tile.w);
+ WRITE_DEBUG(string_printf("mean_%d.pfm", i).c_str(), means[i]);
+ WRITE_DEBUG(string_printf("scale_%d.pfm", i).c_str(), scales[i]);
+ WRITE_DEBUG(string_printf("singular_%d.pfm", i).c_str(), singular[i]);
+ WRITE_DEBUG(string_printf("bandwidth_%d.pfm", i).c_str(), bandwidth[i]);
}
- debug_write_pfm(string_printf("debug_%dx%d_singular_threshold.pfm", tile.x, tile.y).c_str(), &storages[0].singular_threshold, tile.w, tile.h, sizeof(FilterStorage)/sizeof(float), tile.w);
- debug_write_pfm(string_printf("debug_%dx%d_feature_matrix_norm.pfm", tile.x, tile.y).c_str(), &storages[0].feature_matrix_norm, tile.w, tile.h, sizeof(FilterStorage)/sizeof(float), tile.w);
- debug_write_pfm(string_printf("debug_%dx%d_global_bandwidth.pfm", tile.x, tile.y).c_str(), &storages[0].global_bandwidth, tile.w, tile.h, sizeof(FilterStorage)/sizeof(float), tile.w);
- debug_write_pfm(string_printf("debug_%dx%d_filtered_global_bandwidth.pfm", tile.x, tile.y).c_str(), &storages[0].filtered_global_bandwidth, tile.w, tile.h, sizeof(FilterStorage)/sizeof(float), tile.w);
- debug_write_pfm(string_printf("debug_%dx%d_sum_weight.pfm", tile.x, tile.y).c_str(), &storages[0].sum_weight, tile.w, tile.h, sizeof(FilterStorage)/sizeof(float), tile.w);
+ WRITE_DEBUG("singular_threshold", singular_threshold);
+ WRITE_DEBUG("singular_threshold.pfm", singular_threshold);
+ WRITE_DEBUG("feature_matrix_norm.pfm", feature_matrix_norm);
+ WRITE_DEBUG("global_bandwidth.pfm", global_bandwidth);
+ WRITE_DEBUG("filtered_global_bandwidth.pfm", filtered_global_bandwidth);
+ WRITE_DEBUG("sum_weight.pfm", sum_weight);
+ WRITE_DEBUG("log_rmse_per_sample.pfm", log_rmse_per_sample);
+#undef WRITE_DEBUG
#endif
tile.sample = sample;
}
diff --git a/intern/cycles/device/device_cuda.cpp b/intern/cycles/device/device_cuda.cpp
index 912465b..c0bf5c1 100644
--- a/intern/cycles/device/device_cuda.cpp
+++ b/intern/cycles/device/device_cuda.cpp
@@ -831,13 +831,22 @@ public:
#ifdef WITH_CYCLES_DEBUG_FILTER
FilterStorage *host_storage = new FilterStorage[filter_w*filter_h];
cuda_assert(cuMemcpyDtoH(host_storage, d_storage, sizeof(FilterStorage)*filter_w*filter_h));
- std::string prefix = string_printf("debug_%dx%d_cuda", rtile.x+rtile.buffers->params.overscan, rtile.y+rtile.buffers->params.overscan);
- for(int i = 0; i < DENOISE_FEATURES; i++)
- debug_write_pfm(string_printf("%s_bandwidth_%d.pfm", prefix.c_str(), i).c_str(), &host_storage[0].bandwidth[i], filter_w, filter_h, sizeof(FilterStorage)/sizeof(float), filter_w);
- debug_write_pfm(string_printf("%s_global_bandwidth.pfm", prefix.c_str()).c_str(), &host_storage[0].global_bandwidth, filter_w, filter_h, sizeof(FilterStorage)/sizeof(float), filter_w);
- debug_write_pfm(string_printf("%s_filtered_global_bandwidth.pfm", prefix.c_str()).c_str(), &host_storage[0].filtered_global_bandwidth, filter_w, filter_h, sizeof(FilterStorage)/sizeof(float), filter_w);
- debug_write_pfm(string_printf("%s_sum_weight.pfm", prefix.c_str()).c_str(), &host_storage[0].sum_weight, filter_w, filter_h, sizeof(FilterStorage)/sizeof(float), filter_w);
+#define WRITE_DEBUG(name, var) debug_write_pfm(string_printf("debug_%dx%d_cuda_%s.pfm", rtile.x+rtile.buffers->params.overscan, rtile.y+rtile.buffers->params.overscan, name).c_str(), &host_storage[0].var, filter_w, filter_h, sizeof(FilterStorage)/sizeof(float), filter_w);
+ for(int i = 0; i < DENOISE_FEATURES; i++) {
+ WRITE_DEBUG(string_printf("mean_%d.pfm", i).c_str(), means[i]);
+ WRITE_DEBUG(string_printf("scale_%d.pfm", i).c_str(), scales[i]);
+ WRITE_DEBUG(string_printf("singular_%d.pfm", i).c_str(), singular[i]);
+ WRITE_DEBUG(string_printf("bandwidth_%d.pfm", i).c_str(), bandwidth[i]);
+ }
+ WRITE_DEBUG("singular_threshold", singular_threshold);
+ WRITE_DEBUG("singular_threshold.pfm", singular_threshold);
+ WRITE_DEBUG("feature_matrix_norm.pfm", feature_matrix_norm);
+ WRITE_DEBUG("global_bandwidth.pfm", global_bandwidth);
+ WRITE_DEBUG("filtered_global_bandwidth.pfm", filtered_global_bandwidth);
+ WRITE_DEBUG("sum_weight.pfm", sum_weight);
+ WRITE_DEBUG("log_rmse_per_sample.pfm", log_rmse_per_sample);
delete[] host_storage;
+#undef WRITE_DEBUG
#endif
cuda_assert(cuMemFree(d_storage));
diff --git a/intern/cycles/kernel/kernel_filter.h b/intern/cycles/kernel/kernel_filter.h
index 00c6509..ad3d256 100644
--- a/intern/cycles/kernel/kernel_filter.h
+++ b/intern/cycles/kernel/kernel_filter.h
@@ -363,12 +363,21 @@ ccl_device void kernel_filter_estimate_params(KernelGlobals *kg, int sample, flo
/* === Estimate optimal global bandwidth. === */
- double bias_coef = math_lsq_solve(lsq_bias);
- double variance_coef = math_lsq_solve(lsq_variance);
+ double bias_coef = math_lsq_solve(lsq_bias, NULL);
+ double variance_zeroth;
+ double variance_coef = math_lsq_solve(lsq_variance, &variance_zeroth);
+ if(variance_coef < 0.0) {
+ variance_coef = -variance_coef;
+ variance_zeroth = 0.0;
+ }
float optimal_bw = (float) pow((rank * variance_coef) / (4.0 * bias_coef*bias_coef * sample), 1.0 / (rank + 4));
-
-
+#ifdef WITH_CYCLES_DEBUG_FILTER
+ double h2 = ((double) optimal_bw) * ((double) optimal_bw);
+ double bias = bias_coef*h2;
+ double variance = (variance_zeroth + variance_coef*pow(optimal_bw, -rank)) / sample;
+ storage->log_rmse_per_sample = ( (float) log(max(bias*bias + variance, 1e-20)) - 4.0f*logf(sample)/(rank + 4) );
+#endif
/* === Store the calculated data for the second kernel. === */
storage->rank = rank;
@@ -504,6 +513,10 @@ ccl_device void kernel_filter_final_pass(KernelGlobals *kg, int sample, float **
center_buffer[0] = final_color.x;
center_buffer[1] = final_color.y;
center_buffer[2] = final_color.z;
+
+#ifdef WITH_CYCLES_DEBUG_FILTER
+ storage->log_rmse_per_sample -= 2.0f * logf(linear_rgb_to_gray(final_color) + 0.001f);
+#endif
}
CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/kernel_passes.h b/intern/cycles/kernel/kernel_passes.h
index b80bdbc..18db922 100644
--- a/intern/cycles/kernel/kernel_passes.h
+++ b/intern/cycles/kernel/kernel_passes.h
@@ -161,7 +161,7 @@ ccl_device_inline void kernel_write_denoising_passes(KernelGlobals *kg, ccl_glob
else {
kernel_write_pass_float3_var(buffer, sample, make_float3(0.0f, 0.0f, 0.0f));
kernel_write_pass_float3_var(buffer + 6, sample, world_albedo);
- kernel_write_pass_float_var(buffer + 12, sample, 1e10f);
+ kernel_write_pass_float_var(buffer + 12, sample, 0.0f);
}
state->flag |= PATH_RAY_DENOISING_PASS_DONE;
diff --git a/intern/cycles/kernel/kernel_types.h b/intern/cycles/kernel/kernel_types.h
index 15dbcae..7ad03d8 100644
--- a/intern/cycles/kernel/kernel_types.h
+++ b/intern/cycles/kernel/kernel_types.h
@@ -1286,6 +1286,7 @@ typedef struct FilterStorage {
float sum_weight;
float means[DENOISE_FEATURES], scales[DENOISE_FEATURES], singular[DENOISE_FEATURES];
float singular_threshold, feature_matrix_norm;
+ float log_rmse_per_sample;
#endif
} FilterStorage;
diff --git a/intern/cycles/util/util_math_matrix.h b/intern/cycles/util/util_math_matrix.h
index cb426dc..ba81e9f 100644
--- a/intern/cycles/util/util_math_matrix.h
+++ b/intern/cycles/util/util_math_matrix.h
@@ -252,9 +252,11 @@ ccl_device_inline void math_lsq_add(double *lsq, double x, double y)
}
/* Returns the first-order coefficient a of the fitted function. */
-ccl_device_inline double math_lsq_solve(double *lsq)
+ccl_device_inline double math_lsq_solve(double *lsq, double *zeroth)
{
double inv_det = 1.0 / (lsq[0]*lsq[2] - lsq[1]*lsq[1] + 1e-4);
+ if(zeroth)
+ *zeroth = (lsq[2]*lsq[3] - lsq[1]*lsq[3]) * inv_det;
return (lsq[0]*lsq[4] - lsq[1]*lsq[3]) * inv_det;
}
More information about the Bf-blender-cvs
mailing list