[Bf-blender-cvs] [cc600de6695] master: Cycles Denoising: Get rid of halos around bright edges

Sat Jun 1 00:50:58 CEST 2019

Commit: cc600de6695a241dd9b0de275656b5a7459552dc
Author: Lukas Stockner
Date:   Fri May 31 22:38:50 2019 +0200
Branches: master
https://developer.blender.org/rBcc600de6695a241dd9b0de275656b5a7459552dc

Cycles Denoising: Get rid of halos around bright edges

Previously, bright edges (e.g. caused by rim lighting) would sometimes get
halos around them after denoising.

This change introduces a log(1+x) highlight compression step that is performed
before denoising and reversed afterwards. That way, the denoising algorithm
itself operates in the compressed space and therefore bright edges cause less
numerical issues.

===================================================================

M	intern/cycles/device/device_denoising.cpp
M	intern/cycles/kernel/filter/filter_prefilter.h
M	intern/cycles/kernel/filter/filter_reconstruction.h
M	intern/cycles/render/buffers.cpp
M	intern/cycles/render/denoising.cpp
M	intern/cycles/util/util_color.h
M	intern/cycles/util/util_math_float3.h

===================================================================

diff --git a/intern/cycles/device/device_denoising.cpp b/intern/cycles/device/device_denoising.cpp
index 55548c98b97..ac17c02a427 100644
--- a/intern/cycles/device/device_denoising.cpp
+++ b/intern/cycles/device/device_denoising.cpp
@@ -214,12 +214,12 @@ void DenoisingTask::prefilter_color()
   int num_color_passes = 3;
 
   device_only_memory<float> temporary_color(device, "denoising temporary color");
-  temporary_color.alloc_to_device(3 * buffer.pass_stride, false);
+  temporary_color.alloc_to_device(6 * buffer.pass_stride, false);
 
   for (int pass = 0; pass < num_color_passes; pass++) {
     device_sub_ptr color_pass(temporary_color, pass * buffer.pass_stride, buffer.pass_stride);
     device_sub_ptr color_var_pass(
-        buffer.mem, variance_to[pass] * buffer.pass_stride, buffer.pass_stride);
+        temporary_color, (pass + 3) * buffer.pass_stride, buffer.pass_stride);
     functions.get_feature(mean_from[pass],
                           variance_from[pass],
                           *color_pass,
diff --git a/intern/cycles/kernel/filter/filter_prefilter.h b/intern/cycles/kernel/filter/filter_prefilter.h
index b48a3f3f68b..97cecba190e 100644
--- a/intern/cycles/kernel/filter/filter_prefilter.h
+++ b/intern/cycles/kernel/filter/filter_prefilter.h
@@ -145,25 +145,34 @@ ccl_device void kernel_filter_write_feature(int sample,
   combined_buffer[out_offset] = from[idx];
 }
 
+#define GET_COLOR(image) \
+  make_float3(image[idx], image[idx + pass_stride], image[idx + 2 * pass_stride])
+#define SET_COLOR(image, color) \
+  image[idx] = color.x; \
+  image[idx + pass_stride] = color.y; \
+  image[idx + 2 * pass_stride] = color.z
+
 ccl_device void kernel_filter_detect_outliers(int x,
                                               int y,
-                                              ccl_global float *image,
-                                              ccl_global float *variance,
+                                              ccl_global float *in,
+                                              ccl_global float *variance_out,
                                               ccl_global float *depth,
-                                              ccl_global float *out,
+                                              ccl_global float *image_out,
                                               int4 rect,
                                               int pass_stride)
 {
   int buffer_w = align_up(rect.z - rect.x, 4);
 
+  ccl_global float *image_in = in;
+  ccl_global float *variance_in = in + 3 * pass_stride;
+
   int n = 0;
   float values[25];
   float pixel_variance, max_variance = 0.0f;
   for (int y1 = max(y - 2, rect.y); y1 < min(y + 3, rect.w); y1++) {
     for (int x1 = max(x - 2, rect.x); x1 < min(x + 3, rect.z); x1++) {
       int idx = (y1 - rect.y) * buffer_w + (x1 - rect.x);
-      float3 color = make_float3(
-          image[idx], image[idx + pass_stride], image[idx + 2 * pass_stride]);
+      float3 color = GET_COLOR(image_in);
       color = max(color, make_float3(0.0f, 0.0f, 0.0f));
       float L = average(color);
 
@@ -181,8 +190,7 @@ ccl_device void kernel_filter_detect_outliers(int x,
       values[i] = L;
       n++;
 
-      float3 pixel_var = make_float3(
-          variance[idx], variance[idx + pass_stride], variance[idx + 2 * pass_stride]);
+      float3 pixel_var = GET_COLOR(variance_in);
       float var = average(pixel_var);
       if ((x1 == x) && (y1 == y)) {
         pixel_variance = (pixel_var.x < 0.0f || pixel_var.y < 0.0f || pixel_var.z < 0.0f) ? -1.0f :
@@ -197,8 +205,12 @@ ccl_device void kernel_filter_detect_outliers(int x,
   max_variance += 1e-4f;
 
   int idx = (y - rect.y) * buffer_w + (x - rect.x);
-  float3 color = make_float3(image[idx], image[idx + pass_stride], image[idx + 2 * pass_stride]);
+
+  float3 color = GET_COLOR(image_in);
+  float3 variance = GET_COLOR(variance_in);
   color = max(color, make_float3(0.0f, 0.0f, 0.0f));
+  variance = max(variance, make_float3(0.0f, 0.0f, 0.0f));
+
   float L = average(color);
 
   float ref = 2.0f * values[(int)(n * 0.75f)];
@@ -218,7 +230,7 @@ ccl_device void kernel_filter_detect_outliers(int x,
     if (pixel_variance < 0.0f || pixel_variance > 9.0f * max_variance) {
       depth[idx] = -depth[idx];
       color *= ref / L;
-      variance[idx] = variance[idx + pass_stride] = variance[idx + 2 * pass_stride] = max_variance;
+      variance = make_float3(max_variance, max_variance, max_variance);
     }
     else {
       float stddev = sqrtf(pixel_variance);
@@ -229,17 +241,23 @@ ccl_device void kernel_filter_detect_outliers(int x,
         depth[idx] = -depth[idx];
         float fac = ref / L;
         color *= fac;
-        variance[idx] *= fac * fac;
-        variance[idx + pass_stride] *= fac * fac;
-        variance[idx + 2 * pass_stride] *= fac * fac;
+        variance *= sqr(fac);
       }
     }
   }
-  out[idx] = color.x;
-  out[idx + pass_stride] = color.y;
-  out[idx + 2 * pass_stride] = color.z;
+
+  /* Apply log(1+x) transform to compress highlights and avoid halos in the denoised results.
+   * Variance is transformed accordingly - the derivative of the transform is 1/(1+x), so we
+   * scale by the square of that (since we have variance instead of standard deviation). */
+  color = color_highlight_compress(color, &variance);
+
+  SET_COLOR(image_out, color);
+  SET_COLOR(variance_out, variance);
 }
 
+#undef GET_COLOR
+#undef SET_COLOR
+
 /* Combine A/B buffers.
  * Calculates the combined mean and the buffer variance. */
 ccl_device void kernel_filter_combine_halves(int x,
diff --git a/intern/cycles/kernel/filter/filter_reconstruction.h b/intern/cycles/kernel/filter/filter_reconstruction.h
index 850f20584da..a69397bc6a6 100644
--- a/intern/cycles/kernel/filter/filter_reconstruction.h
+++ b/intern/cycles/kernel/filter/filter_reconstruction.h
@@ -119,8 +119,8 @@ ccl_device_inline void kernel_filter_finalize(int x,
     final_color = mean_color;
   }
 
-  /* Clamp pixel value to positive values. */
-  final_color = max(final_color, make_float3(0.0f, 0.0f, 0.0f));
+  /* Clamp pixel value to positive values and reverse the highlight compression transform. */
+  final_color = color_highlight_uncompress(max(final_color, make_float3(0.0f, 0.0f, 0.0f)));
 
   ccl_global float *combined_buffer = buffer + (y * buffer_params.y + x + buffer_params.x) *
                                                    buffer_params.z;
diff --git a/intern/cycles/render/buffers.cpp b/intern/cycles/render/buffers.cpp
index 5405aaefc1d..ad3827df7e3 100644
--- a/intern/cycles/render/buffers.cpp
+++ b/intern/cycles/render/buffers.cpp
@@ -217,9 +217,14 @@ bool RenderBuffers::get_denoising_pass_rect(
     float *in_combined = buffer.data();
 
     for (int i = 0; i < size; i++, in += pass_stride, in_combined += pass_stride, pixels += 4) {
-      pixels[0] = in[0] * scale;
-      pixels[1] = in[1] * scale;
-      pixels[2] = in[2] * scale;
+      float3 val = make_float3(in[0], in[1], in[2]);
+      if (type == DENOISING_PASS_PREFILTERED_COLOR && params.denoising_prefiltered_pass) {
+        /* Remove highlight compression from the image. */
+        val = color_highlight_uncompress(val);
+      }
+      pixels[0] = val.x * scale;
+      pixels[1] = val.y * scale;
+      pixels[2] = val.z * scale;
       pixels[3] = saturate(in_combined[3] * alpha_scale);
     }
   }
diff --git a/intern/cycles/render/denoising.cpp b/intern/cycles/render/denoising.cpp
index 82bbfa5f823..39bd8ce00c2 100644
--- a/intern/cycles/render/denoising.cpp
+++ b/intern/cycles/render/denoising.cpp
@@ -491,6 +491,19 @@ bool DenoiseTask::load_input_pixels(int layer)
       }
     }
 
+    /* Highlight compression */
+    data = buffer_data + 8;
+    for (int y = 0; y < h; y++) {
+      for (int x = 0; x < w; x++) {
+        int idx = INPUT_NUM_CHANNELS * (y * w + x);
+        float3 color = make_float3(data[idx], data[idx + 1], data[idx + 2]);
+        color = color_highlight_compress(color, NULL);
+        data[idx] = color.x;
+        data[idx + 1] = color.y;
+        data[idx + 2] = color.z;
+      }
+    }
+
     buffer_data += frame_stride;
   }
 
diff --git a/intern/cycles/util/util_color.h b/intern/cycles/util/util_color.h
index 83410db13c6..6e61190de6b 100644
--- a/intern/cycles/util/util_color.h
+++ b/intern/cycles/util/util_color.h
@@ -257,6 +257,20 @@ ccl_device float4 color_srgb_to_linear_v4(float4 c)
 #endif
 }
 
+ccl_device float3 color_highlight_compress(float3 color, float3 *variance)
+{
+  color += make_float3(1.0f, 1.0f, 1.0f);
+  if (variance) {
+    *variance *= sqr3(make_float3(1.0f, 1.0f, 1.0f) / color);
+  }
+  return log3(color);
+}
+
+ccl_device float3 color_highlight_uncompress(float3 color)
+{
+  return exp3(color) - make_float3(1.0f, 1.0f, 1.0f);
+}
+
 CCL_NAMESPACE_END
 
 #endif /* __UTIL_COLOR_H__ */
diff --git a/intern/cycles/util/util_math_float3.h b/intern/cycles/util/util_math_float3.h
index 85e9b8114ff..554b7408148 100644
--- a/intern/cycles/util/util_math_float3.h
+++ b/intern/cycles/util/util_math_float3.h
@@ -70,6 +70,7 @@ ccl_device_inline float3 safe_normalize(const float3 a);
 ccl_device_inline float3 normalize_len(const float3 a, float *t);
 ccl_device_inline float3 safe_normalize_len(const float3 a, float *t);
 ccl_device_inline float3 interp(float3 a, float3 b, float t);
+ccl_device_inline float3 sqr3(float3 a);
 
 ccl_device_inline bool is_zero(const float3 a);
 ccl_device_inline float reduce_add(const float3 a);
@@ -349,6 +350,11 @@ ccl_device_inline float3 interp(float3 a, float3 b, float t)
   return a + t * (b - a);
 }
 
+ccl_device_inline float3 sqr3(float3 a)
+{
+  return a * a;
+}
+
 ccl_device_inline bool is_zero(const float3 a)
 {
 #ifdef __KERNEL_SSE__