[Bf-blender-cvs] [8dcf23b] soc-2016-cycles_denoising: Cycles: Fix various issues with the denoising debug passes

Lukas Stockner noreply at git.blender.org
Sun Aug 21 06:18:23 CEST 2016


Commit: 8dcf23bb31c4d4aa8cd9405229b2be4d7ec7a08e
Author: Lukas Stockner
Date:   Sun Aug 21 05:02:31 2016 +0200
Branches: soc-2016-cycles_denoising
https://developer.blender.org/rB8dcf23bb31c4d4aa8cd9405229b2be4d7ec7a08e

Cycles: Fix various issues with the denoising debug passes

===================================================================

M	intern/cycles/device/device_cpu.cpp
M	intern/cycles/device/device_cuda.cpp
M	intern/cycles/kernel/kernel_filter.h

===================================================================

diff --git a/intern/cycles/device/device_cpu.cpp b/intern/cycles/device/device_cpu.cpp
index 3f4bea6..7b20bfc 100644
--- a/intern/cycles/device/device_cpu.cpp
+++ b/intern/cycles/device/device_cpu.cpp
@@ -467,15 +467,10 @@ public:
 				filter_estimate_params_kernel(kg, sample, filter_buffer, x + filter_area.x, y + filter_area.y, storages + y*filter_area.z + x, rect);
 			}
 		}
-		for(int y = 0; y < filter_area.w; y++) {
-			for(int x = 0; x < filter_area.z; x++) {
-				filter_final_pass_kernel(kg, sample, filter_buffer, x + filter_area.x, y + filter_area.y, offset, stride, buffers, storages + y*filter_area.z + x, filter_area, rect);
-			}
-		}
 
 #ifdef WITH_CYCLES_DEBUG_FILTER
 #define WRITE_DEBUG(name, var) debug_write_pfm(string_printf("debug_%dx%d_%s.pfm", filter_area.x, filter_area.y, name).c_str(), &storages[0].var, filter_area.z, filter_area.w, sizeof(FilterStorage)/sizeof(float), filter_area.z);
-			for(int i = 0; i < DENOISE_FEATURES; i++) {
+		for(int i = 0; i < DENOISE_FEATURES; i++) {
 			WRITE_DEBUG(string_printf("mean_%d", i).c_str(), means[i]);
 			WRITE_DEBUG(string_printf("scale_%d", i).c_str(), scales[i]);
 			WRITE_DEBUG(string_printf("singular_%d", i).c_str(), singular[i]);
@@ -484,6 +479,13 @@ public:
 		WRITE_DEBUG("singular_threshold", singular_threshold);
 		WRITE_DEBUG("feature_matrix_norm", feature_matrix_norm);
 		WRITE_DEBUG("global_bandwidth", global_bandwidth);
+#endif
+		for(int y = 0; y < filter_area.w; y++) {
+			for(int x = 0; x < filter_area.z; x++) {
+				filter_final_pass_kernel(kg, sample, filter_buffer, x + filter_area.x, y + filter_area.y, offset, stride, buffers, storages + y*filter_area.z + x, filter_area, rect);
+			}
+		}
+#ifdef WITH_CYCLES_DEBUG_FILTER
 		WRITE_DEBUG("filtered_global_bandwidth", filtered_global_bandwidth);
 		WRITE_DEBUG("sum_weight", sum_weight);
 		WRITE_DEBUG("log_rmse_per_sample", log_rmse_per_sample);
diff --git a/intern/cycles/device/device_cuda.cpp b/intern/cycles/device/device_cuda.cpp
index 0dcbad8..078c22b 100644
--- a/intern/cycles/device/device_cuda.cpp
+++ b/intern/cycles/device/device_cuda.cpp
@@ -1063,7 +1063,7 @@ public:
 #define WRITE_DEBUG(name, pass) debug_write_pfm(string_printf("debug_%dx%d_cuda_feature%d_%s.pfm", rtile.x+rtile.buffers->params.overscan, rtile.y+rtile.buffers->params.overscan, i, name).c_str(), host_denoise_buffer+pass*pass_stride, rtile.w, rtile.h, 1, w)
 		float *host_denoise_buffer = new float[22*pass_stride];
 		cuda_assert(cuMemcpyDtoH(host_denoise_buffer, d_denoise_buffer, 22*pass_stride*sizeof(float)));
-		for(int i = 0; i < 11; i++) {
+		for(int i = 0; i < 8; i++) {
 			WRITE_DEBUG("filtered", 2*i);
 			WRITE_DEBUG("variance", 2*i+1);
 		}
@@ -1140,18 +1140,17 @@ public:
 		cuda_assert(cuMemcpyDtoH(host_storage, d_storage, sizeof(FilterStorage)*filter_area.z*filter_area.w));
 #define WRITE_DEBUG(name, var) debug_write_pfm(string_printf("debug_%dx%d_cuda_%s.pfm", rtile.x+rtile.buffers->params.overscan, rtile.y+rtile.buffers->params.overscan, name).c_str(), &host_storage[0].var, filter_area.z, filter_area.w, sizeof(FilterStorage)/sizeof(float), filter_area.z);
 		for(int i = 0; i < DENOISE_FEATURES; i++) {
-			WRITE_DEBUG(string_printf("mean_%d.pfm", i).c_str(), means[i]);
-			WRITE_DEBUG(string_printf("scale_%d.pfm", i).c_str(), scales[i]);
-			WRITE_DEBUG(string_printf("singular_%d.pfm", i).c_str(), singular[i]);
-			WRITE_DEBUG(string_printf("bandwidth_%d.pfm", i).c_str(), bandwidth[i]);
+			WRITE_DEBUG(string_printf("mean_%d", i).c_str(), means[i]);
+			WRITE_DEBUG(string_printf("scale_%d", i).c_str(), scales[i]);
+			WRITE_DEBUG(string_printf("singular_%d", i).c_str(), singular[i]);
+			WRITE_DEBUG(string_printf("bandwidth_%d", i).c_str(), bandwidth[i]);
 		}
 		WRITE_DEBUG("singular_threshold", singular_threshold);
-		WRITE_DEBUG("singular_threshold.pfm", singular_threshold);
-		WRITE_DEBUG("feature_matrix_norm.pfm", feature_matrix_norm);
-		WRITE_DEBUG("global_bandwidth.pfm", global_bandwidth);
-		WRITE_DEBUG("filtered_global_bandwidth.pfm", filtered_global_bandwidth);
-		WRITE_DEBUG("sum_weight.pfm", sum_weight);
-		WRITE_DEBUG("log_rmse_per_sample.pfm", log_rmse_per_sample);
+		WRITE_DEBUG("feature_matrix_norm", feature_matrix_norm);
+		WRITE_DEBUG("global_bandwidth", global_bandwidth);
+		WRITE_DEBUG("filtered_global_bandwidth", filtered_global_bandwidth);
+		WRITE_DEBUG("sum_weight", sum_weight);
+		WRITE_DEBUG("log_rmse_per_sample", log_rmse_per_sample);
 		delete[] host_storage;
 #undef WRITE_DEBUG
 #endif
diff --git a/intern/cycles/kernel/kernel_filter.h b/intern/cycles/kernel/kernel_filter.h
index a321fd3..b2f93b8 100644
--- a/intern/cycles/kernel/kernel_filter.h
+++ b/intern/cycles/kernel/kernel_filter.h
@@ -318,10 +318,7 @@ ccl_device void kernel_filter_final_pass(KernelGlobals *kg, int sample, float co
 	filter_get_features(x, y, 0, center_buffer, feature_means, NULL, pass_stride);
 
 
-
-
 	/* === Fetch stored data from the previous kernel. === */
-	float *bandwidth_factor = &storage->bandwidth[0];
 	int rank = storage->rank;
 	/* Apply a median filter to the 3x3 window aroung the current pixel. */
 	int sort_idx = 0;
@@ -343,6 +340,12 @@ ccl_device void kernel_filter_final_pass(KernelGlobals *kg, int sample, float co
 	}
 	float global_bandwidth = global_bandwidths[sort_idx/2];
 
+	float bandwidth_factor[DENOISE_FEATURES];
+	for(int i = 0; i < rank; i++) {
+		/* Same as above, divide by the bandwidth since the bandwidth_factor actually is the inverse of the bandwidth. */
+		bandwidth_factor[i] = storage->bandwidth[i] / global_bandwidth;
+	}
+
 
 
 
@@ -353,9 +356,6 @@ ccl_device void kernel_filter_final_pass(KernelGlobals *kg, int sample, float co
 
 	/* === Calculate the final pixel color. === */
 	float XtX[(DENOISE_FEATURES+1)*(DENOISE_FEATURES+1)], design_row[DENOISE_FEATURES+1];
-	for(int i = 0; i < rank; i++)
-		/* Same as above, divide by the bandwidth since the bandwidth_factor actually is the inverse of the bandwidth. */
-		bandwidth_factor[i] /= global_bandwidth;
 
 	int matrix_size = rank+1;
 	math_matrix_zero_lower(XtX, matrix_size);
@@ -508,6 +508,16 @@ ccl_device void kernel_filter_estimate_params(KernelGlobals *kg, int sample, flo
 		}
 	}
 
+#ifdef WITH_CYCLES_DEBUG_FILTER
+	storage->feature_matrix_norm = _mm_hsum_ss(feature_matrix_norm);
+	storage->singular_threshold = singular_threshold;
+	for(int i = 0; i < DENOISE_FEATURES; i++) {
+		storage->means[i] = _mm_cvtss_f32(feature_means[i]);
+		storage->scales[i] = _mm_cvtss_f32(feature_scale[i]);
+		storage->singular[i] = sqrtf(fabsf(singular[i]));
+	}
+#endif
+
 	/* From here on, the mean of the features will be shifted to the central pixel's values. */
 	float feature_means_scalar[DENOISE_FEATURES];
 	float const* __restrict__ center_buffer = buffer + (y - rect.y) * buffer_w + (x - rect.x);




More information about the Bf-blender-cvs mailing list