[Bf-blender-cvs] [04c0e7b] soc-2016-cycles_denoising: Merge remote-tracking branch 'origin/master' into soc-2016-cycles_denoising
Lukas Stockner
noreply at git.blender.org
Tue Dec 20 16:06:50 CET 2016
Commit: 04c0e7b2d71a3925f1019c365342c1ea08904be1
Author: Lukas Stockner
Date: Tue Dec 6 21:13:06 2016 +0100
Branches: soc-2016-cycles_denoising
https://developer.blender.org/rB04c0e7b2d71a3925f1019c365342c1ea08904be1
Merge remote-tracking branch 'origin/master' into soc-2016-cycles_denoising
Conflicts:
intern/cycles/device/device_cpu.cpp
intern/cycles/device/device_cuda.cpp
intern/cycles/render/tile.cpp
intern/cycles/render/tile.h
===================================================================
===================================================================
diff --cc intern/cycles/app/cycles_standalone.cpp
index 95d0120,9816d61..0987d4b
--- a/intern/cycles/app/cycles_standalone.cpp
+++ b/intern/cycles/app/cycles_standalone.cpp
@@@ -63,10 -70,8 +63,8 @@@ static void session_print(const string
fflush(stdout);
}
-static void session_print_status()
+void session_print_status()
{
- int sample, tile;
- double total_time, sample_time, render_time;
string status, substatus;
/* get status */
diff --cc intern/cycles/device/device_cpu.cpp
index 0f5ac8e,c8e001e..4e713e8
--- a/intern/cycles/device/device_cpu.cpp
+++ b/intern/cycles/device/device_cpu.cpp
@@@ -276,361 -213,74 +281,362 @@@ public
}
};
- void thread_path_trace(DeviceTask& task)
+ float* denoise_fill_buffer(KernelGlobals *kg, int sample, int4 rect, float** buffers, int* tile_x, int* tile_y, int *offsets, int *strides, int frames, int *frame_strides)
{
- if(task_pool.canceled()) {
- if(task.need_finish_queue == false)
- return;
- }
+ bool cross_denoise = kg->__data.film.denoise_cross;
+ int w = align_up(rect.z - rect.x, 4), h = (rect.w - rect.y);
+ int pass_stride = w*h*frames;
+ int passes = cross_denoise? 28:22;
+ float *filter_buffers = new float[passes*pass_stride];
+ memset(filter_buffers, 0, sizeof(float)*passes*pass_stride);
+
+
+ for(int frame = 0; frame < frames; frame++) {
+ float *filter_buffer = filter_buffers + w*h*frame;
+ float *buffer[9];
+ for(int i = 0; i < 9; i++) {
+ buffer[i] = buffers[i] + frame_strides[i]*frame;
+ }
+#ifdef WITH_CYCLES_DEBUG_FILTER
+ DenoiseDebug debug((rect.z - rect.x), h, 34);
+#endif
+ /* ==== Step 1: Prefilter general features. ==== */
+ {
- KernelGlobals kg = thread_kernel_globals_init();
- RenderTile tile;
+ float *unfiltered = filter_buffer + 16*pass_stride;
+ /* Order in render buffers:
+ * Normal[X, Y, Z] NormalVar[X, Y, Z] Albedo[R, G, B] AlbedoVar[R, G, B ] Depth DepthVar
+ * 0 1 2 3 4 5 6 7 8 9 10 11 12 13
+ *
+ * Order in denoise buffer:
+ * Normal[X, XVar, Y, YVar, Z, ZVar] Depth DepthVar Shadow ShadowVar Albedo[R, RVar, G, GVar, B, BVar] Color[R, RVar, G, GVar, B, BVar]
+ * 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21
+ *
+ * Order of processing: |NormalXYZ|Depth|AlbedoXYZ |
+ * | | | | */
+ int mean_from[] = { 0, 1, 2, 6, 7, 8, 12 };
+ int variance_from[] = { 3, 4, 5, 9, 10, 11, 13 };
+ int offset_to[] = { 0, 2, 4, 10, 12, 14, 6 };
+ for(int i = 0; i < 7; i++) {
+ for(int y = rect.y; y < rect.w; y++) {
+ for(int x = rect.x; x < rect.z; x++) {
+ filter_get_feature_kernel()(kg, sample, buffer, mean_from[i], variance_from[i], x, y, tile_x, tile_y, offsets, strides, unfiltered, filter_buffer + (offset_to[i]+1)*pass_stride, &rect.x);
+ }
+ }
+ for(int y = rect.y; y < rect.w; y++) {
+ for(int x = rect.x; x < rect.z; x++) {
+ filter_non_local_means_kernel()(x, y, unfiltered, unfiltered, filter_buffer + (offset_to[i]+1)*pass_stride, filter_buffer + offset_to[i]*pass_stride, &rect.x, 2, 2, 1, 0.25f);
+ }
+ }
+#ifdef WITH_CYCLES_DEBUG_FILTER
+#define WRITE_DEBUG(name, var) debug.add_pass(string_printf("f%d_%s", i, name), var, 1, w);
+ WRITE_DEBUG("unfiltered", unfiltered);
+ WRITE_DEBUG("sampleV", filter_buffer + (offset_to[i]+1)*pass_stride);
+ WRITE_DEBUG("filtered", filter_buffer + offset_to[i]*pass_stride);
+#undef WRITE_DEBUG
+#endif
+ }
+ }
- void(*path_trace_kernel)(KernelGlobals*, float*, unsigned int*, int, int, int, int, int);
-#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_AVX2
- if(system_cpu_support_avx2()) {
- path_trace_kernel = kernel_cpu_avx2_path_trace;
- }
- else
+
+ /* ==== Step 2: Prefilter shadow feature. ==== */
+ {
+ /* Reuse some passes of the filter_buffer for temporary storage. */
+ float *sampleV = filter_buffer + 16*pass_stride, *sampleVV = filter_buffer + 17*pass_stride, *bufferV = filter_buffer + 18*pass_stride, *cleanV = filter_buffer + 19*pass_stride;
+ float *unfiltered = filter_buffer + 20*pass_stride;
+
+ /* Get the A/B unfiltered passes, the combined sample variance, the estimated variance of the sample variance and the buffer variance. */
+ for(int y = rect.y; y < rect.w; y++) {
+ for(int x = rect.x; x < rect.z; x++) {
+ filter_divide_shadow_kernel()(kg, sample, buffer, x, y, tile_x, tile_y, offsets, strides, unfiltered, sampleV, sampleVV, bufferV, &rect.x);
+ }
+ }
+#ifdef WITH_CYCLES_DEBUG_FILTER
+#define WRITE_DEBUG(name, var) debug.add_pass(string_printf("shadow_%s", name), var, 1, w);
+ WRITE_DEBUG("unfilteredA", unfiltered);
+ WRITE_DEBUG("unfilteredB", unfiltered + pass_stride);
+ WRITE_DEBUG("bufferV", bufferV);
+ WRITE_DEBUG("sampleV", sampleV);
+ WRITE_DEBUG("sampleVV", sampleVV);
#endif
-#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_AVX
- if(system_cpu_support_avx()) {
- path_trace_kernel = kernel_cpu_avx_path_trace;
- }
- else
+
+ /* Smooth the (generally pretty noisy) buffer variance using the spatial information from the sample variance. */
+ for(int y = rect.y; y < rect.w; y++) {
+ for(int x = rect.x; x < rect.z; x++) {
+ filter_non_local_means_kernel()(x, y, bufferV, sampleV, sampleVV, cleanV, &rect.x, 6, 3, 4, 1.0f);
+ }
+ }
+#ifdef WITH_CYCLES_DEBUG_FILTER
+ WRITE_DEBUG("cleanV", cleanV);
#endif
-#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE41
- if(system_cpu_support_sse41()) {
- path_trace_kernel = kernel_cpu_sse41_path_trace;
- }
- else
+
+ /* Use the smoothed variance to filter the two shadow half images using each other for weight calculation. */
+ for(int y = rect.y; y < rect.w; y++) {
+ for(int x = rect.x; x < rect.z; x++) {
+ filter_non_local_means_kernel()(x, y, unfiltered, unfiltered + pass_stride, cleanV, sampleV, &rect.x, 5, 3, 1, 0.25f);
+ filter_non_local_means_kernel()(x, y, unfiltered + pass_stride, unfiltered, cleanV, bufferV, &rect.x, 5, 3, 1, 0.25f);
+ }
+ }
+#ifdef WITH_CYCLES_DEBUG_FILTER
+ WRITE_DEBUG("filteredA", sampleV);
+ WRITE_DEBUG("filteredB", bufferV);
#endif
-#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE3
- if(system_cpu_support_sse3()) {
- path_trace_kernel = kernel_cpu_sse3_path_trace;
- }
- else
+
+ /* Estimate the residual variance between the two filtered halves. */
+ for(int y = rect.y; y < rect.w; y++) {
+ for(int x = rect.x; x < rect.z; x++) {
+ filter_combine_halves_kernel()(x, y, NULL, sampleVV, sampleV, bufferV, &rect.x, 2);
+ }
+ }
+#ifdef WITH_CYCLES_DEBUG_FILTER
+ WRITE_DEBUG("residualV", sampleVV);
#endif
-#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE2
- if(system_cpu_support_sse2()) {
- path_trace_kernel = kernel_cpu_sse2_path_trace;
+
+ /* Use the residual variance for a second filter pass. */
+ for(int y = rect.y; y < rect.w; y++) {
+ for(int x = rect.x; x < rect.z; x++) {
+ filter_non_local_means_kernel()(x, y, sampleV, bufferV, sampleVV, unfiltered , &rect.x, 4, 2, 1, 0.5f);
+ filter_non_local_means_kernel()(x, y, bufferV, sampleV, sampleVV, unfiltered + pass_stride, &rect.x, 4, 2, 1, 0.5f);
+ }
+ }
+#ifdef WITH_CYCLES_DEBUG_FILTER
+ WRITE_DEBUG("finalA", unfiltered);
+ WRITE_DEBUG("finalB", unfiltered + pass_stride);
+#endif
+
+ /* Combine the two double-filtered halves to a final shadow feature image and associated variance. */
+ for(int y = rect.y; y < rect.w; y++) {
+ for(int x = rect.x; x < rect.z; x++) {
+ filter_combine_halves_kernel()(x, y, filter_buffer + 8*pass_stride, filter_buffer + 9*pass_stride, unfiltered, unfiltered + pass_stride, &rect.x, 0);
+ }
+ }
+#ifdef WITH_CYCLES_DEBUG_FILTER
+ WRITE_DEBUG("final", filter_buffer + 8*pass_stride);
+ WRITE_DEBUG("finalV", filter_buffer + 9 * pass_stride);
+ debug.write(string_printf("debugf_%dx%d.exr", tile_x[1], tile_y[1]));
+#undef WRITE_DEBUG
+#endif
+ }
+
+
+
+ /* ==== Step 3: Copy combined color pass. ==== */
+ {
+ if(cross_denoise) {
+ int mean_from[] = {20, 21, 22, 26, 27, 28};
+ int variance_from[] = {23, 24, 25, 29, 30, 31};
+ int offset_to[] = {16, 18, 20, 22, 24, 26};
+ for(int i = 0; i < 6; i++) {
+ for(int y = rect.y; y < rect.w; y++) {
+ for(int x = rect.x; x < rect.z; x++) {
+ filter_get_feature_kernel()(kg, sample, buffer, mean_from[i], variance_from[i], x, y, tile_x, tile_y, offsets, strides, filter_buffer + offset_to[i]*pass_stride, filter_buffer + (offset_to[i]+1)*pass_stride, &rect.x);
+ }
+ }
+ }
+ }
+ else {
+ int mean_from[] = {20, 21, 22};
+ int variance_from[] = {23, 24, 25};
+ int offset_to[] = {16, 18, 20};
+ for(int i = 0; i < 3; i++) {
+ for(int y = rect.y; y < rect.w; y++) {
+ for(int x = rect.x; x < rect.z; x++) {
+ filter_get_feature_kernel()(kg, sample, buffer, mean_from[i], variance_from[i], x, y, tile_x, tile_y, offsets, strides, filter_buffer + offset_to[i]*pass_stride, filter_buffer + (offset_to[i]+1)*pass_stride, &rect.x);
+ }
+ }
+ }
+ }
+ }
}
- else
+
+ return filter_buffers;
+ }
+
+ void denoise_run(KernelGlobals *kg, int sample, float *filter_buffer, int4 filter_area, int4 rect, int offset, int stride, float *buffers)
+ {
+ bool only_nlm_filter = getenv("ONLY_NLM_FILTER");
+ bool use_gradients = kg->__data.integrator.use_gradients;
+ bool nlm_weights = kg->__data.integrator.use_nlm_weights;
+
+ int hw = kg->__data.integrator.half_window;
+ FilterStorage *storage = new FilterStorage[filter_area.z*filter_area.w];
+ float *weight_cache = new float[(2*hw+1)*(2*hw+1)];
+
+ int w = align_up(rect.z - rect.x, 4), h = (rect.w - rect.y);
+ int pass_stride = w*h;
+
+ if(only_nlm_filter) {
+ float *img[3] = {filter_buffer + 16*pass_stride, filter_buffer + 18*pa
@@ Diff output truncated at 10240 characters. @@
More information about the Bf-blender-cvs
mailing list