[Bf-blender-cvs] [8dcf23b] soc-2016-cycles_denoising: Cycles: Fix various issues with the denoising debug passes
Lukas Stockner
noreply at git.blender.org
Sun Aug 21 06:18:23 CEST 2016
Commit: 8dcf23bb31c4d4aa8cd9405229b2be4d7ec7a08e
Author: Lukas Stockner
Date: Sun Aug 21 05:02:31 2016 +0200
Branches: soc-2016-cycles_denoising
https://developer.blender.org/rB8dcf23bb31c4d4aa8cd9405229b2be4d7ec7a08e
Cycles: Fix various issues with the denoising debug passes
===================================================================
M intern/cycles/device/device_cpu.cpp
M intern/cycles/device/device_cuda.cpp
M intern/cycles/kernel/kernel_filter.h
===================================================================
diff --git a/intern/cycles/device/device_cpu.cpp b/intern/cycles/device/device_cpu.cpp
index 3f4bea6..7b20bfc 100644
--- a/intern/cycles/device/device_cpu.cpp
+++ b/intern/cycles/device/device_cpu.cpp
@@ -467,15 +467,10 @@ public:
filter_estimate_params_kernel(kg, sample, filter_buffer, x + filter_area.x, y + filter_area.y, storages + y*filter_area.z + x, rect);
}
}
- for(int y = 0; y < filter_area.w; y++) {
- for(int x = 0; x < filter_area.z; x++) {
- filter_final_pass_kernel(kg, sample, filter_buffer, x + filter_area.x, y + filter_area.y, offset, stride, buffers, storages + y*filter_area.z + x, filter_area, rect);
- }
- }
#ifdef WITH_CYCLES_DEBUG_FILTER
#define WRITE_DEBUG(name, var) debug_write_pfm(string_printf("debug_%dx%d_%s.pfm", filter_area.x, filter_area.y, name).c_str(), &storages[0].var, filter_area.z, filter_area.w, sizeof(FilterStorage)/sizeof(float), filter_area.z);
- for(int i = 0; i < DENOISE_FEATURES; i++) {
+ for(int i = 0; i < DENOISE_FEATURES; i++) {
WRITE_DEBUG(string_printf("mean_%d", i).c_str(), means[i]);
WRITE_DEBUG(string_printf("scale_%d", i).c_str(), scales[i]);
WRITE_DEBUG(string_printf("singular_%d", i).c_str(), singular[i]);
@@ -484,6 +479,13 @@ public:
WRITE_DEBUG("singular_threshold", singular_threshold);
WRITE_DEBUG("feature_matrix_norm", feature_matrix_norm);
WRITE_DEBUG("global_bandwidth", global_bandwidth);
+#endif
+ for(int y = 0; y < filter_area.w; y++) {
+ for(int x = 0; x < filter_area.z; x++) {
+ filter_final_pass_kernel(kg, sample, filter_buffer, x + filter_area.x, y + filter_area.y, offset, stride, buffers, storages + y*filter_area.z + x, filter_area, rect);
+ }
+ }
+#ifdef WITH_CYCLES_DEBUG_FILTER
WRITE_DEBUG("filtered_global_bandwidth", filtered_global_bandwidth);
WRITE_DEBUG("sum_weight", sum_weight);
WRITE_DEBUG("log_rmse_per_sample", log_rmse_per_sample);
diff --git a/intern/cycles/device/device_cuda.cpp b/intern/cycles/device/device_cuda.cpp
index 0dcbad8..078c22b 100644
--- a/intern/cycles/device/device_cuda.cpp
+++ b/intern/cycles/device/device_cuda.cpp
@@ -1063,7 +1063,7 @@ public:
#define WRITE_DEBUG(name, pass) debug_write_pfm(string_printf("debug_%dx%d_cuda_feature%d_%s.pfm", rtile.x+rtile.buffers->params.overscan, rtile.y+rtile.buffers->params.overscan, i, name).c_str(), host_denoise_buffer+pass*pass_stride, rtile.w, rtile.h, 1, w)
float *host_denoise_buffer = new float[22*pass_stride];
cuda_assert(cuMemcpyDtoH(host_denoise_buffer, d_denoise_buffer, 22*pass_stride*sizeof(float)));
- for(int i = 0; i < 11; i++) {
+ for(int i = 0; i < 8; i++) {
WRITE_DEBUG("filtered", 2*i);
WRITE_DEBUG("variance", 2*i+1);
}
@@ -1140,18 +1140,17 @@ public:
cuda_assert(cuMemcpyDtoH(host_storage, d_storage, sizeof(FilterStorage)*filter_area.z*filter_area.w));
#define WRITE_DEBUG(name, var) debug_write_pfm(string_printf("debug_%dx%d_cuda_%s.pfm", rtile.x+rtile.buffers->params.overscan, rtile.y+rtile.buffers->params.overscan, name).c_str(), &host_storage[0].var, filter_area.z, filter_area.w, sizeof(FilterStorage)/sizeof(float), filter_area.z);
for(int i = 0; i < DENOISE_FEATURES; i++) {
- WRITE_DEBUG(string_printf("mean_%d.pfm", i).c_str(), means[i]);
- WRITE_DEBUG(string_printf("scale_%d.pfm", i).c_str(), scales[i]);
- WRITE_DEBUG(string_printf("singular_%d.pfm", i).c_str(), singular[i]);
- WRITE_DEBUG(string_printf("bandwidth_%d.pfm", i).c_str(), bandwidth[i]);
+ WRITE_DEBUG(string_printf("mean_%d", i).c_str(), means[i]);
+ WRITE_DEBUG(string_printf("scale_%d", i).c_str(), scales[i]);
+ WRITE_DEBUG(string_printf("singular_%d", i).c_str(), singular[i]);
+ WRITE_DEBUG(string_printf("bandwidth_%d", i).c_str(), bandwidth[i]);
}
WRITE_DEBUG("singular_threshold", singular_threshold);
- WRITE_DEBUG("singular_threshold.pfm", singular_threshold);
- WRITE_DEBUG("feature_matrix_norm.pfm", feature_matrix_norm);
- WRITE_DEBUG("global_bandwidth.pfm", global_bandwidth);
- WRITE_DEBUG("filtered_global_bandwidth.pfm", filtered_global_bandwidth);
- WRITE_DEBUG("sum_weight.pfm", sum_weight);
- WRITE_DEBUG("log_rmse_per_sample.pfm", log_rmse_per_sample);
+ WRITE_DEBUG("feature_matrix_norm", feature_matrix_norm);
+ WRITE_DEBUG("global_bandwidth", global_bandwidth);
+ WRITE_DEBUG("filtered_global_bandwidth", filtered_global_bandwidth);
+ WRITE_DEBUG("sum_weight", sum_weight);
+ WRITE_DEBUG("log_rmse_per_sample", log_rmse_per_sample);
delete[] host_storage;
#undef WRITE_DEBUG
#endif
diff --git a/intern/cycles/kernel/kernel_filter.h b/intern/cycles/kernel/kernel_filter.h
index a321fd3..b2f93b8 100644
--- a/intern/cycles/kernel/kernel_filter.h
+++ b/intern/cycles/kernel/kernel_filter.h
@@ -318,10 +318,7 @@ ccl_device void kernel_filter_final_pass(KernelGlobals *kg, int sample, float co
filter_get_features(x, y, 0, center_buffer, feature_means, NULL, pass_stride);
-
-
/* === Fetch stored data from the previous kernel. === */
- float *bandwidth_factor = &storage->bandwidth[0];
int rank = storage->rank;
/* Apply a median filter to the 3x3 window aroung the current pixel. */
int sort_idx = 0;
@@ -343,6 +340,12 @@ ccl_device void kernel_filter_final_pass(KernelGlobals *kg, int sample, float co
}
float global_bandwidth = global_bandwidths[sort_idx/2];
+ float bandwidth_factor[DENOISE_FEATURES];
+ for(int i = 0; i < rank; i++) {
+ /* Same as above, divide by the bandwidth since the bandwidth_factor actually is the inverse of the bandwidth. */
+ bandwidth_factor[i] = storage->bandwidth[i] / global_bandwidth;
+ }
+
@@ -353,9 +356,6 @@ ccl_device void kernel_filter_final_pass(KernelGlobals *kg, int sample, float co
/* === Calculate the final pixel color. === */
float XtX[(DENOISE_FEATURES+1)*(DENOISE_FEATURES+1)], design_row[DENOISE_FEATURES+1];
- for(int i = 0; i < rank; i++)
- /* Same as above, divide by the bandwidth since the bandwidth_factor actually is the inverse of the bandwidth. */
- bandwidth_factor[i] /= global_bandwidth;
int matrix_size = rank+1;
math_matrix_zero_lower(XtX, matrix_size);
@@ -508,6 +508,16 @@ ccl_device void kernel_filter_estimate_params(KernelGlobals *kg, int sample, flo
}
}
+#ifdef WITH_CYCLES_DEBUG_FILTER
+ storage->feature_matrix_norm = _mm_hsum_ss(feature_matrix_norm);
+ storage->singular_threshold = singular_threshold;
+ for(int i = 0; i < DENOISE_FEATURES; i++) {
+ storage->means[i] = _mm_cvtss_f32(feature_means[i]);
+ storage->scales[i] = _mm_cvtss_f32(feature_scale[i]);
+ storage->singular[i] = sqrtf(fabsf(singular[i]));
+ }
+#endif
+
/* From here on, the mean of the features will be shifted to the central pixel's values. */
float feature_means_scalar[DENOISE_FEATURES];
float const* __restrict__ center_buffer = buffer + (y - rect.y) * buffer_w + (x - rect.x);
More information about the Bf-blender-cvs
mailing list