[Bf-blender-cvs] [a0d6ca39dd] soc-2016-cycles_denoising: Cycles Denoising: Redesign debug EXR writing code for a cleaner integration

Lukas Stockner noreply at git.blender.org
Wed Feb 1 05:18:56 CET 2017


Commit: a0d6ca39dd29c69344792cd4a33eb92bcc76235c
Author: Lukas Stockner
Date:   Sat Jan 14 18:57:03 2017 +0100
Branches: soc-2016-cycles_denoising
https://developer.blender.org/rBa0d6ca39dd29c69344792cd4a33eb92bcc76235c

Cycles Denoising: Redesign debug EXR writing code for a cleaner integration

Also, remove the denoising passes from CUDA - just adds to much clutter and the results are the same as on the CPU anyways.

===================================================================

M	intern/cycles/device/device_cpu.cpp
M	intern/cycles/device/device_cuda.cpp
M	intern/cycles/util/util_debug.cpp
M	intern/cycles/util/util_debug.h
M	intern/cycles/util/util_guarded_allocator.h

===================================================================

diff --git a/intern/cycles/device/device_cpu.cpp b/intern/cycles/device/device_cpu.cpp
index 2e7844ca99..26ec530cf1 100644
--- a/intern/cycles/device/device_cpu.cpp
+++ b/intern/cycles/device/device_cpu.cpp
@@ -329,10 +329,7 @@ public:
 			for(int i = 0; i < 9; i++) {
 				buffer[i] = buffers[i] + frame_strides[i]*frame;
 			}
-#ifdef WITH_CYCLES_DEBUG_FILTER
-			DenoiseDebug debug((rect.z - rect.x), h, 34);
-#endif
-
+			DebugPasses debug((rect.z - rect.x), h, 34, 1, w);
 
 #define PASSPTR(i) (filter_buffer + (i)*pass_stride)
 
@@ -349,28 +346,21 @@ public:
 						filter_divide_shadow_kernel()(kg, sample, buffer, x, y, tile_x, tile_y, offsets, strides, unfilteredA, sampleV, sampleVV, bufferV, &rect.x);
 					}
 				}
-#ifdef WITH_CYCLES_DEBUG_FILTER
-#define WRITE_DEBUG(name, var) debug.add_pass(string_printf("shadow_%s", name), var, 1, w);
-				WRITE_DEBUG("unfilteredA", unfilteredA);
-				WRITE_DEBUG("unfilteredB", unfilteredB);
-				WRITE_DEBUG("bufferV", bufferV);
-				WRITE_DEBUG("sampleV", sampleV);
-				WRITE_DEBUG("sampleVV", sampleVV);
-#endif
+				debug.add_pass("shadowUnfilteredA", unfilteredA);
+				debug.add_pass("shadowUnfilteredB", unfilteredB);
+				debug.add_pass("shadowBufferV", bufferV);
+				debug.add_pass("shadowSampleV", sampleV);
+				debug.add_pass("shadowSampleVV", sampleVV);
 
 				/* Smooth the (generally pretty noisy) buffer variance using the spatial information from the sample variance. */
 				non_local_means(rect, bufferV, sampleV, cleanV, sampleVV, nlm_temp1, nlm_temp2, nlm_temp3, 6, 3, 4.0f, 1.0f);
-#ifdef WITH_CYCLES_DEBUG_FILTER
-				WRITE_DEBUG("cleanV", cleanV);
-#endif
+				debug.add_pass("shadowCleanV", cleanV);
 
 				/* Use the smoothed variance to filter the two shadow half images using each other for weight calculation. */
 				non_local_means(rect, unfilteredA, unfilteredB, sampleV, cleanV, nlm_temp1, nlm_temp2, nlm_temp3, 5, 3, 1.0f, 0.25f);
 				non_local_means(rect, unfilteredB, unfilteredA, bufferV, cleanV, nlm_temp1, nlm_temp2, nlm_temp3, 5, 3, 1.0f, 0.25f);
-#ifdef WITH_CYCLES_DEBUG_FILTER
-				WRITE_DEBUG("filteredA", sampleV);
-				WRITE_DEBUG("filteredB", bufferV);
-#endif
+				debug.add_pass("shadowFilteredA", sampleV);
+				debug.add_pass("shadowFilteredB", bufferV);
 
 				/* Estimate the residual variance between the two filtered halves. */
 				for(int y = rect.y; y < rect.w; y++) {
@@ -378,17 +368,13 @@ public:
 						filter_combine_halves_kernel()(x, y, NULL, sampleVV, sampleV, bufferV, &rect.x, 2);
 					}
 				}
-#ifdef WITH_CYCLES_DEBUG_FILTER
-				WRITE_DEBUG("residualV", sampleVV);
-#endif
+				debug.add_pass("shadowResidualV", sampleVV);
 
 				/* Use the residual variance for a second filter pass. */
 				non_local_means(rect, sampleV, bufferV, unfilteredA, sampleVV, nlm_temp1, nlm_temp2, nlm_temp3, 4, 2, 1.0f, 0.5f);
 				non_local_means(rect, bufferV, sampleV, unfilteredB, sampleVV, nlm_temp1, nlm_temp2, nlm_temp3, 4, 2, 1.0f, 0.5f);
-#ifdef WITH_CYCLES_DEBUG_FILTER
-				WRITE_DEBUG("finalA", unfilteredA);
-				WRITE_DEBUG("finalB", unfilteredB);
-#endif
+				debug.add_pass("shadowFinalA", unfilteredA);
+				debug.add_pass("shadowFinalB", unfilteredB);
 
 				/* Combine the two double-filtered halves to a final shadow feature image and associated variance. */
 				for(int y = rect.y; y < rect.w; y++) {
@@ -396,12 +382,8 @@ public:
 						filter_combine_halves_kernel()(x, y, PASSPTR(8), PASSPTR(9), unfilteredA, unfilteredB, &rect.x, 0);
 					}
 				}
-#ifdef WITH_CYCLES_DEBUG_FILTER
-				WRITE_DEBUG("final", PASSPTR(8));
-				WRITE_DEBUG("finalV", PASSPTR(9));
-				debug.write(string_printf("debugf_%dx%d.exr", tile_x[1], tile_y[1]));
-#undef WRITE_DEBUG
-#endif
+				debug.add_pass("shadowFinal", PASSPTR(8));
+				debug.add_pass("shadowFinalV", PASSPTR(9));
 			}
 
 			/* ==== Step 2: Prefilter general features. ==== */
@@ -429,13 +411,9 @@ public:
 						}
 					}
 					non_local_means(rect, unfiltered, unfiltered, PASSPTR(offset_to[i]), PASSPTR(offset_to[i]+1), nlm_temp1, nlm_temp2, nlm_temp3, 2, 2, 1, 0.25f);
-#ifdef WITH_CYCLES_DEBUG_FILTER
-#define WRITE_DEBUG(name, var) debug.add_pass(string_printf("f%d_%s", i, name), var, 1, w);
-					WRITE_DEBUG("unfiltered", unfiltered);
-					WRITE_DEBUG("sampleV", PASSPTR(offset_to[i]+1));
-					WRITE_DEBUG("filtered", PASSPTR(offset_to[i]));
-#undef WRITE_DEBUG
-#endif
+					debug.add_pass(string_printf("feature%dUnfiltered", i), unfiltered);
+					debug.add_pass(string_printf("feature%dFiltered", i), PASSPTR(offset_to[i]));
+					debug.add_pass(string_printf("feature%dVariance", i), PASSPTR(offset_to[i]+1));
 				}
 			}
 
@@ -468,6 +446,8 @@ public:
 					}
 				}
 			}
+
+			debug.write(string_printf("debug_tile_%d_%d.exr", rect.x, rect.y));
 		}
 
 		return filter_buffers;
diff --git a/intern/cycles/device/device_cuda.cpp b/intern/cycles/device/device_cuda.cpp
index 8ec6ca6b91..4659b1dc01 100644
--- a/intern/cycles/device/device_cuda.cpp
+++ b/intern/cycles/device/device_cuda.cpp
@@ -993,33 +993,12 @@ public:
 				                           xthreads, ythreads, 1, /* threads */
 				                           0, 0, divide_args, 0));
 
-#ifdef WITH_CYCLES_DEBUG_FILTER
-#define WRITE_DEBUG(name, ptr) debug_write_pfm(string_printf("debug_%dx%d_cuda_shadow_%s.pfm", rtile.x+rtile.buffers->params.overscan, rtile.y+rtile.buffers->params.overscan, name).c_str(), ptr, rtile.w, rtile.h, 1, w)
-				float *temp = new float[pass_stride*6];
-				cuda_assert(cuMemcpyDtoH(temp, d_sampleV, 6*pass_stride*sizeof(float)));
-
-				WRITE_DEBUG("unfilteredA", temp + 4*pass_stride);
-				WRITE_DEBUG("unfilteredB", temp + 5*pass_stride);
-				WRITE_DEBUG("bufferV", temp + 2*pass_stride);
-				WRITE_DEBUG("sampleV", temp + 0*pass_stride);
-				WRITE_DEBUG("sampleVV", temp + 1*pass_stride);
-#endif
-
 				/* Smooth the (generally pretty noisy) buffer variance using the spatial information from the sample variance. */
 				non_local_means(rect, d_bufferV, d_sampleV, d_cleanV, d_sampleVV, d_temp1, d_temp2, d_temp3, 6, 3, 2.0f, 2.0f);
-#ifdef WITH_CYCLES_DEBUG_FILTER
-				cuda_assert(cuMemcpyDtoH(temp, d_cleanV, pass_stride*sizeof(float)));
-				WRITE_DEBUG("cleanV", temp);
-#endif
 
 				/* Use the smoothed variance to filter the two shadow half images using each other for weight calculation. */
 				non_local_means(rect, d_unfilteredA, d_unfilteredB, d_sampleV, d_cleanV, d_temp1, d_temp2, d_temp3, 5, 3, 1.0f, 0.25f);
 				non_local_means(rect, d_unfilteredB, d_unfilteredA, d_bufferV, d_cleanV, d_temp1, d_temp2, d_temp3, 5, 3, 1.0f, 0.25f);
-#ifdef WITH_CYCLES_DEBUG_FILTER
-				cuda_assert(cuMemcpyDtoH(temp, d_sampleV, 3*pass_stride*sizeof(float)));
-				WRITE_DEBUG("filteredA", temp);
-				WRITE_DEBUG("filteredB", temp + 2*pass_stride);
-#endif
 
 				/* Estimate the residual variance between the two filtered halves. */
 				int var_r = 2;
@@ -1029,19 +1008,10 @@ public:
 				                           xblocks , yblocks, 1, /* blocks */
 				                           xthreads, ythreads, 1, /* threads */
 				                           0, 0, residual_variance_args, 0));
-#ifdef WITH_CYCLES_DEBUG_FILTER
-				cuda_assert(cuMemcpyDtoH(temp, d_cleanV, pass_stride*sizeof(float)));
-				WRITE_DEBUG("residualV", temp);
-#endif
 
 				/* Use the residual variance for a second filter pass. */
 				non_local_means(rect, d_sampleV, d_bufferV, d_unfilteredA, d_cleanV, d_temp1, d_temp2, d_temp3, 4, 2, 1.0f, 1.0f);
 				non_local_means(rect, d_bufferV, d_sampleV, d_unfilteredB, d_cleanV, d_temp1, d_temp2, d_temp3, 4, 2, 1.0f, 1.0f);
-#ifdef WITH_CYCLES_DEBUG_FILTER
-				cuda_assert(cuMemcpyDtoH(temp, d_unfilteredA, 2*pass_stride*sizeof(float)));
-				WRITE_DEBUG("finalA", temp);
-				WRITE_DEBUG("finalB", temp + 1*pass_stride);
-#endif
 
 				/* Combine the two double-filtered halves to a final shadow feature image and associated variance. */
 				var_r = 0;
@@ -1053,13 +1023,6 @@ public:
 				                           xthreads, ythreads, 1, /* threads */
 				                           0, 0, final_prefiltered_args, 0));
 				cuda_assert(cuCtxSynchronize());
-#ifdef WITH_CYCLES_DEBUG_FILTER
-				cuda_assert(cuMemcpyDtoH(temp, d_mean, 2*pass_stride*sizeof(float)));
-				WRITE_DEBUG("final", temp);
-				WRITE_DEBUG("finalV", temp + 1*pass_stride);
-				delete[] temp;
-#undef WRITE_DEBUG
-#endif
 			}
 
 			/* ==== Step 2: Prefilter general features. ==== */
@@ -1113,18 +1076,6 @@ public:
 			}
 		}
 
-#ifdef WITH_CYCLES_DEBUG_FILTER
-#define WRITE_DEBUG(name, pass) debug_write_pfm(string_printf("debug_%dx%d_cuda_feature%d_%s.pfm", rtile.x+rtile.buffers->params.overscan, rtile.y+rtile.buffers->params.overscan, i, name).c_str(), host_denoise_buffer+pass*pass_stride, rtile.w, rtile.h, 1, w)
-		float *host_denoise_buffer = new float[22*pass_stride];
-		cuda_assert(cuMemcpyDtoH(host_denoise_buffer, d_denoise_buffers, 22*pass_stride*sizeof(float)));
-		for(int i = 0; i < 8; i++) {
-			WRITE_DEBUG("filtered", 2*i);
-			WRITE_DEBUG("variance", 2*i+1);
-		}
-		delete[] host_denoise_buffer;
-#undef WRITE_DEBUG
-#endif
-
 		/* Use the prefiltered feature to denoise the image. */
 		int storage_num = filter_area.z*filter_area.w;
 		CUdeviceptr d_storage, d_transforms;
@@ -1226,26 +1177,6 @@ public:
 		                           0, 0, finalize_args, 0));
 		cuda_assert(cuMemFree(d_XtWX));
 		cuda_assert(cuMemFree(d_XtWY));
-
-#ifdef WITH_CYCLES_DEBUG_FILTER
-		CUDAFilterStorage *host_storage = new CUDAFilterStorage[filter_area.z*filter_area.w];
-		cuda_assert(cuMemcpyDtoH(host_storage, d_storage, sizeof(CUDAFilterStorage)*filter_area.z*filter_area.w));
-#define WRITE_DEBUG(name, var) debug_write_pfm(string_printf("debug_%dx%d_cuda_%s.pfm", rtile.x+rtile.buffers->params.overscan, rtile.y+rtile.buffers->params.overscan, name).c_str(), &host_storage[0].var, filter_area.z, filter_area.w, sizeof(CUDAFilterStorage)/sizeof(float), filter_area.z);
-		for(int i = 0; i < DENOISE_FEATURES; i++) {
-			WRITE_DEBUG(string_printf("mean_%d", i).c_str(), means[i]);
-			WRITE_DEBUG(string_printf("scale_%d", i).c_str(), scales[i]);
-			WRITE_DEBUG(string_printf("singular_%d", i).c_str(), singular[i]);
-			WRITE_DEBUG(string_printf("bandwidth_%d", i).c_str(), bandwidth[i]);
-		}
-		WRITE_DEBUG("singula

@@ Diff output truncated at 10240 characters. @@




More information about the Bf-blender-cvs mailing list