[Bf-blender-cvs] [84b4575] soc-2016-cycles_denoising: Cycles: Add additional debugging info to the denoiser
Lukas Stockner
noreply at git.blender.org
Wed Jul 6 04:28:49 CEST 2016
Commit: 84b4575e9e23be6b1068c1f970a88764afc48123
Author: Lukas Stockner
Date: Mon Jul 4 17:39:18 2016 +0200
Branches: soc-2016-cycles_denoising
https://developer.blender.org/rB84b4575e9e23be6b1068c1f970a88764afc48123
Cycles: Add additional debugging info to the denoiser
===================================================================
M CMakeLists.txt
M intern/cycles/CMakeLists.txt
M intern/cycles/device/device_cpu.cpp
M intern/cycles/device/device_cuda.cpp
M intern/cycles/kernel/CMakeLists.txt
M intern/cycles/kernel/kernel_filter.h
M intern/cycles/kernel/kernel_types.h
M intern/cycles/util/util_debug.cpp
M intern/cycles/util/util_debug.h
===================================================================
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 1c933d3..ba1bd9f 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -399,9 +399,11 @@ mark_as_advanced(CYCLES_CUDA_BINARIES_ARCH)
unset(PLATFORM_DEFAULT)
option(WITH_CYCLES_LOGGING "Build Cycles with logging support" ON)
option(WITH_CYCLES_DEBUG "Build Cycles with extra debug capabilities" OFF)
+option(WITH_CYCLES_DEBUG_FILTER "Build Cycles with extra debug capabilities in the denoising filter" OFF)
option(WITH_CYCLES_NATIVE_ONLY "Build Cycles with native kernel only (which fits current CPU, use for development only)" OFF)
mark_as_advanced(WITH_CYCLES_LOGGING)
mark_as_advanced(WITH_CYCLES_DEBUG)
+mark_as_advanced(WITH_CYCLES_DEBUG_FILTER)
mark_as_advanced(WITH_CYCLES_NATIVE_ONLY)
option(WITH_CUDA_DYNLOAD "Dynamically load CUDA libraries at runtime" ON)
diff --git a/intern/cycles/CMakeLists.txt b/intern/cycles/CMakeLists.txt
index 3b410b2..d609511 100644
--- a/intern/cycles/CMakeLists.txt
+++ b/intern/cycles/CMakeLists.txt
@@ -192,6 +192,10 @@ if(WITH_CYCLES_DEBUG)
add_definitions(-DWITH_CYCLES_DEBUG)
endif()
+if(WITH_CYCLES_DEBUG_FILTER)
+ add_definitions(-DWITH_CYCLES_DEBUG_FILTER)
+endif()
+
include_directories(
SYSTEM
${BOOST_INCLUDE_DIR}
diff --git a/intern/cycles/device/device_cpu.cpp b/intern/cycles/device/device_cpu.cpp
index 0992029..83ef715 100644
--- a/intern/cycles/device/device_cpu.cpp
+++ b/intern/cycles/device/device_cpu.cpp
@@ -344,6 +344,14 @@ public:
filter_final_pass_kernel(&kg, sample, buffers, x, y, tile_x, tile_y, offsets, strides, storages, filter_rect);
}
}
+#ifdef WITH_CYCLES_DEBUG_FILTER
+ for(int i = 0; i < DENOISE_FEATURES; i++)
+ debug_write_pfm(string_printf("debug_%dx%d_bandwidth_%d.pfm", tile.x, tile.y, i).c_str(), &storages[0].bandwidth[i], tile.w, tile.h, sizeof(FilterStorage)/sizeof(float), tile.w);
+ debug_write_pfm(string_printf("debug_%dx%d_global_bandwidth.pfm", tile.x, tile.y).c_str(), &storages[0].global_bandwidth, tile.w, tile.h, sizeof(FilterStorage)/sizeof(float), tile.w);
+ debug_write_pfm(string_printf("debug_%dx%d_filtered_global_bandwidth.pfm", tile.x, tile.y).c_str(), &storages[0].filtered_global_bandwidth, tile.w, tile.h, sizeof(FilterStorage)/sizeof(float), tile.w);
+ debug_write_pfm(string_printf("debug_%dx%d_sum_weight.pfm", tile.x, tile.y).c_str(), &storages[0].sum_weight, tile.w, tile.h, sizeof(FilterStorage)/sizeof(float), tile.w);
+#endif
+ tile.sample = sample;
}
task.release_tile(tile);
diff --git a/intern/cycles/device/device_cuda.cpp b/intern/cycles/device/device_cuda.cpp
index d690fae..3d4c9ba 100644
--- a/intern/cycles/device/device_cuda.cpp
+++ b/intern/cycles/device/device_cuda.cpp
@@ -828,6 +828,17 @@ public:
cuda_assert(cuCtxSynchronize());
+#ifdef WITH_CYCLES_DEBUG_FILTER
+ FilterStorage *host_storage = new FilterStorage[filter_w*filter_h];
+ cuMemcpyDtoH(host_storage, d_storage, sizeof(host_storage));
+ for(int i = 0; i < DENOISE_FEATURES; i++)
+ debug_write_pfm(string_printf("debug_%dx%d_bandwidth_%d.pfm", rtile.x, rtile.y, i).c_str(), &host_storage[0].bandwidth[i], filter_w, filter_h, sizeof(FilterStorage)/sizeof(float), filter_w);
+ debug_write_pfm(string_printf("debug_%dx%d_global_bandwidth.pfm", rtile.x, rtile.y).c_str(), &host_storage[0].global_bandwidth, filter_w, filter_h, sizeof(FilterStorage)/sizeof(float), filter_w);
+ debug_write_pfm(string_printf("debug_%dx%d_filtered_global_bandwidth.pfm", rtile.x, rtile.y).c_str(), &host_storage[0].filtered_global_bandwidth, filter_w, filter_h, sizeof(FilterStorage)/sizeof(float), filter_w);
+ debug_write_pfm(string_printf("debug_%dx%d_sum_weight.pfm", rtile.x, rtile.y).c_str(), &host_storage[0].sum_weight, filter_w, filter_h, sizeof(FilterStorage)/sizeof(float), filter_w);
+ delete[] host_storage;
+#endif
+
cuda_assert(cuMemFree(d_storage));
cuda_pop_context();
diff --git a/intern/cycles/kernel/CMakeLists.txt b/intern/cycles/kernel/CMakeLists.txt
index c5433f7..2cdf45f 100644
--- a/intern/cycles/kernel/CMakeLists.txt
+++ b/intern/cycles/kernel/CMakeLists.txt
@@ -232,6 +232,10 @@ if(WITH_CYCLES_CUDA_BINARIES)
set(cuda_debug_flags "")
endif()
+ if(WITH_CYCLES_DEBUG_FILTER)
+ set(cuda_debug_flags "${cuda_debug_flags} -DWITH_CYCLES_DEBUG_FILTER")
+ endif()
+
set(cuda_version_flags "-D__KERNEL_CUDA_VERSION__=${CUDA_VERSION}")
set(cuda_math_flags "--use_fast_math")
diff --git a/intern/cycles/kernel/kernel_filter.h b/intern/cycles/kernel/kernel_filter.h
index 3dc2ee0..b6984a5 100644
--- a/intern/cycles/kernel/kernel_filter.h
+++ b/intern/cycles/kernel/kernel_filter.h
@@ -254,6 +254,8 @@ ccl_device void kernel_filter_estimate_params(KernelGlobals *kg, int sample, flo
float *bandwidth_factor = &storage->bandwidth[0];
for(int i = 0; i < rank; i++)
bandwidth_factor[i] = sqrtf(2.0f * average(fabs(XtY[1+rank+i])) + 0.16f);
+ for(int i = rank; i < DENOISE_FEATURES; i++)
+ bandwidth_factor[i] = 0.0f;
@@ -424,6 +426,11 @@ ccl_device void kernel_filter_final_pass(KernelGlobals *kg, int sample, float **
math_add_gramian(XtX, matrix_size, design_row, weight);
} END_FOR_PIXEL_WINDOW
+#ifdef WITH_CYCLES_DEBUG_FILTER
+ storage->filtered_global_bandwidth = global_bandwidth;
+ storage->sum_weight = XtX[0];
+#endif
+
math_matrix_add_diagonal(XtX, matrix_size, 1e-4f); /* Improve the numerical stability. */
math_cholesky(XtX, matrix_size);
math_inverse_lower_tri_inplace(XtX, matrix_size);
diff --git a/intern/cycles/kernel/kernel_types.h b/intern/cycles/kernel/kernel_types.h
index 43007a3..9ed47b8 100644
--- a/intern/cycles/kernel/kernel_types.h
+++ b/intern/cycles/kernel/kernel_types.h
@@ -1281,6 +1281,10 @@ typedef struct FilterStorage {
float bandwidth[DENOISE_FEATURES];
int rank;
float global_bandwidth;
+#ifdef WITH_CYCLES_DEBUG_FILTER
+ float filtered_global_bandwidth;
+ float sum_weight;
+#endif
} FilterStorage;
CCL_NAMESPACE_END
diff --git a/intern/cycles/util/util_debug.cpp b/intern/cycles/util/util_debug.cpp
index 80d177d..1ff8d8d 100644
--- a/intern/cycles/util/util_debug.cpp
+++ b/intern/cycles/util/util_debug.cpp
@@ -18,6 +18,10 @@
#include <stdlib.h>
+#ifdef WITH_CYCLES_DEBUG_FILTER
+#include <stdio.h>
+#endif
+
#include "util_logging.h"
#include "util_string.h"
@@ -179,4 +183,18 @@ std::ostream& operator <<(std::ostream &os,
return os;
}
+#ifdef WITH_CYCLES_DEBUG_FILTER
+bool debug_write_pfm(const char *name, float *data, int w, int h, int pixelstride, int linestride)
+{
+ FILE* f = fopen(name, "wb");
+ if(!f) return false;
+ fprintf(f, "Pf\n%d %d\n-1\n", w, h);
+ for(int y = 0; y < h; y++, data += linestride*pixelstride)
+ for(int x = 0; x < w; x++)
+ fwrite(data+x*pixelstride, sizeof(float), 1, f);
+ fclose(f);
+ return true;
+}
+#endif
+
CCL_NAMESPACE_END
diff --git a/intern/cycles/util/util_debug.h b/intern/cycles/util/util_debug.h
index 1787ff6..bc9d123 100644
--- a/intern/cycles/util/util_debug.h
+++ b/intern/cycles/util/util_debug.h
@@ -149,6 +149,10 @@ inline DebugFlags& DebugFlags() {
std::ostream& operator <<(std::ostream &os,
DebugFlagsConstRef debug_flags);
+#ifdef WITH_CYCLES_DEBUG_FILTER
+bool debug_write_pfm(const char *name, float *data, int w, int h, int pixelstride, int linestride);
+#endif
+
CCL_NAMESPACE_END
#endif /* __UTIL_DEBUG_H__ */
More information about the Bf-blender-cvs
mailing list