[Bf-blender-cvs] [70aa80b6838] cycles-x: Cycles PMJ adaptive sampling working poorly with many bounces for Cycles-X

Mon Sep 6 12:55:26 CEST 2021

Commit: 70aa80b683812754b49b8b52820564074c1dd6a7
Author: William Leeson
Date:   Mon Sep 6 12:34:05 2021 +0200
Branches: cycles-x
https://developer.blender.org/rB70aa80b683812754b49b8b52820564074c1dd6a7

Cycles PMJ adaptive sampling working poorly with many bounces for Cycles-X

This fixes T79190
Problem:
The is correlation at the edges of the image tiles due to using the same hash twice with and xor. Also the PMJ sampling pattern when used with xor scrambling does not generate a good sample set which is causing the rays to not sample the higher dimensions correctly.
Solution:
Use a different pixel hash to avoid clashes with hash used during path tracing. Also, uses  Cranley-Patterson rotation (which matches the Sobol sampling scheme) with the PMJ sampling scheme instead of using xor scrambling which was resulting in a bad sample set. The number of samples drawable from the PMJ sampler is also increased by using all the PMJ sample sets instead of just one.
Issues:
The new samples produce images with more noise than the original PMJ sets and also the Sobol sets. This unfortunately is not avoidable as the nature of jitter sampling produces more noise as there is little correlation between sample sets. To alleviate this to some extent the Cranley-Patterson rotation method from the Sobol sample generate is used as it has some degree of correlation due to the simple hash used.

Reviewed By: brecht, sergey, LazyDodo

Maniphest Tasks: T79190

Differential Revision: https://developer.blender.org/D12213

===================================================================

M	intern/cycles/kernel/kernel_jitter.h
M	intern/cycles/kernel/kernel_random.h
M	intern/cycles/kernel/kernel_textures.h
M	intern/cycles/kernel/kernel_types.h
M	intern/cycles/render/integrator.cpp
M	intern/cycles/render/jitter.cpp
M	intern/cycles/render/scene.h
M	intern/cycles/test/util_math_test.cpp
M	intern/cycles/util/util_math.h

===================================================================

diff --git a/intern/cycles/kernel/kernel_jitter.h b/intern/cycles/kernel/kernel_jitter.h
index bce2866a9a3..3afce9f957b 100644
--- a/intern/cycles/kernel/kernel_jitter.h
+++ b/intern/cycles/kernel/kernel_jitter.h
@@ -15,89 +15,26 @@
  */
 
 #pragma once
-
 CCL_NAMESPACE_BEGIN
 
-/* "Correlated Multi-Jittered Sampling"
- * Andrew Kensler, Pixar Technical Memo 13-01, 2013 */
-
-ccl_device_inline bool cmj_is_pow2(int i)
-{
-  return (i > 1) && ((i & (i - 1)) == 0);
-}
-
-ccl_device_inline int cmj_fast_mod_pow2(int a, int b)
+ccl_device_inline uint32_t laine_karras_permutation(uint32_t x, uint32_t seed)
 {
-  return (a & (b - 1));
-}
+  x += seed;
+  x ^= (x * 0x6c50b47cu);
+  x ^= x * 0xb82f1e52u;
+  x ^= x * 0xc7afe638u;
+  x ^= x * 0x8d22f6e6u;
 
-/* b must be > 1 */
-ccl_device_inline int cmj_fast_div_pow2(int a, int b)
-{
-  kernel_assert(b > 1);
-  return a >> count_trailing_zeros(b);
+  return x;
 }
 
-ccl_device_inline uint cmj_w_mask(uint w)
+ccl_device_inline uint32_t nested_uniform_scramble(uint32_t x, uint32_t seed)
 {
-  kernel_assert(w > 1);
-  return ((1 << (32 - count_leading_zeros(w))) - 1);
-}
+  x = reverse_integer_bits(x);
+  x = laine_karras_permutation(x, seed);
+  x = reverse_integer_bits(x);
 
-ccl_device_inline uint cmj_permute(uint i, uint l, uint p)
-{
-  uint w = l - 1;
-
-  if ((l & w) == 0) {
-    /* l is a power of two (fast) */
-    i ^= p;
-    i *= 0xe170893d;
-    i ^= p >> 16;
-    i ^= (i & w) >> 4;
-    i ^= p >> 8;
-    i *= 0x0929eb3f;
-    i ^= p >> 23;
-    i ^= (i & w) >> 1;
-    i *= 1 | p >> 27;
-    i *= 0x6935fa69;
-    i ^= (i & w) >> 11;
-    i *= 0x74dcb303;
-    i ^= (i & w) >> 2;
-    i *= 0x9e501cc3;
-    i ^= (i & w) >> 2;
-    i *= 0xc860a3df;
-    i &= w;
-    i ^= i >> 5;
-
-    return (i + p) & w;
-  }
-  else {
-    /* l is not a power of two (slow) */
-    w = cmj_w_mask(w);
-
-    do {
-      i ^= p;
-      i *= 0xe170893d;
-      i ^= p >> 16;
-      i ^= (i & w) >> 4;
-      i ^= p >> 8;
-      i *= 0x0929eb3f;
-      i ^= p >> 23;
-      i ^= (i & w) >> 1;
-      i *= 1 | p >> 27;
-      i *= 0x6935fa69;
-      i ^= (i & w) >> 11;
-      i *= 0x74dcb303;
-      i ^= (i & w) >> 2;
-      i *= 0x9e501cc3;
-      i ^= (i & w) >> 2;
-      i *= 0xc860a3df;
-      i &= w;
-      i ^= i >> 5;
-    } while (i >= l);
-
-    return (i + p) % l;
-  }
+  return x;
 }
 
 ccl_device_inline uint cmj_hash(uint i, uint p)
@@ -130,37 +67,100 @@ ccl_device_inline float cmj_randfloat(uint i, uint p)
   return cmj_hash(i, p) * (1.0f / 4294967808.0f);
 }
 
-ccl_device float pmj_sample_1D(const KernelGlobals *kg, int sample, int rng_hash, int dimension)
+ccl_device_inline float cmj_randfloat_simple(uint i, uint p)
+{
+  return cmj_hash_simple(i, p) * (1.0f / (float)0xFFFFFFFF);
+}
+
+ccl_device float pmj_sample_1D(const KernelGlobals *kg, uint sample, uint rng_hash, uint dimension)
 {
-  /* Fallback to random */
-  if (sample >= NUM_PMJ_SAMPLES) {
-    const int p = rng_hash + dimension;
-    return cmj_randfloat(sample, p);
-  }
-  else {
-    const uint mask = cmj_hash_simple(dimension, rng_hash) & 0x007fffff;
-    const int index = ((dimension % NUM_PMJ_PATTERNS) * NUM_PMJ_SAMPLES + sample) * 2;
-    return __uint_as_float(kernel_tex_fetch(__sample_pattern_lut, index) ^ mask) - 1.0f;
-  }
+  /* The PMJ sample sets contain a sample with (x,y) with NUM_PMJ_SAMPLES so for 1D
+   *  the x part is used as the sample (TODO(@leesonw): Add using both x and y parts independently). */
+
+  /* Perform Owen shuffle of the sample number to reorder the samples. */
+#ifdef _SIMPLE_HASH_
+  const uint rv = cmj_hash_simple(dimension, rng_hash);
+#else /* Use a _REGULAR_HASH_. */
+  const uint rv = cmj_hash(dimension, rng_hash);
+#endif
+#ifdef _XOR_SHUFFLE_
+#  warning "Using XOR shuffle."
+  const uint s = sample ^ rv;
+#else /* Use _OWEN_SHUFFLE_ for reordering. */
+  const uint s = nested_uniform_scramble(sample, rv);
+#endif
+
+  /* Based on the sample number a sample pattern is selected and offset by the dimension. */
+  const uint sample_set = s / NUM_PMJ_SAMPLES;
+  const uint d = (dimension + sample_set);
+  const uint dim = d % NUM_PMJ_PATTERNS;
+  int index = 2 * (dim * NUM_PMJ_SAMPLES + (s % NUM_PMJ_SAMPLES));
+
+  float fx = kernel_tex_fetch(__sample_pattern_lut, index);
+
+#ifndef _NO_CRANLEY_PATTERSON_ROTATION_
+  /* Use Cranley-Patterson rotation to displace the sample pattern. */
+#  ifdef _SIMPLE_HASH_
+  float dx = cmj_randfloat_simple(d, rng_hash);
+#  else
+  /* Only jitter within the grid interval. */
+  float dx = cmj_randfloat(d, rng_hash);
+#  endif
+  fx = fx + dx * (1.0f / NUM_PMJ_SAMPLES);
+  fx = fx - floorf(fx);
+
+#else
+#  warning "Not using Cranley-Patterson Rotation."
+#endif
+
+  return fx;
 }
 
-ccl_device float2 pmj_sample_2D(const KernelGlobals *kg, int sample, int rng_hash, int dimension)
+ccl_device void pmj_sample_2D(
+    const KernelGlobals *kg, uint sample, uint rng_hash, uint dimension, float *x, float *y)
 {
-  if (sample >= NUM_PMJ_SAMPLES) {
-    const int p = rng_hash + dimension;
-    const float fx = cmj_randfloat(sample, p);
-    const float fy = cmj_randfloat(sample, p + 1);
-    return make_float2(fx, fy);
-  }
-  else {
-    const int index = ((dimension % NUM_PMJ_PATTERNS) * NUM_PMJ_SAMPLES + sample) * 2;
-    const uint maskx = cmj_hash_simple(dimension, rng_hash) & 0x007fffff;
-    const uint masky = cmj_hash_simple(dimension + 1, rng_hash) & 0x007fffff;
-    const float fx = __uint_as_float(kernel_tex_fetch(__sample_pattern_lut, index) ^ maskx) - 1.0f;
-    const float fy = __uint_as_float(kernel_tex_fetch(__sample_pattern_lut, index + 1) ^ masky) -
-                     1.0f;
-    return make_float2(fx, fy);
-  }
+  /* Perform a shuffle on the sample number to reorder the samples. */
+#ifdef _SIMPLE_HASH_
+  const uint rv = cmj_hash_simple(dimension, rng_hash);
+#else /* Use a _REGULAR_HASH_. */
+  const uint rv = cmj_hash(dimension, rng_hash);
+#endif
+#ifdef _XOR_SHUFFLE_
+#  warning "Using XOR shuffle."
+  const uint s = sample ^ rv;
+#else /* Use _OWEN_SHUFFLE_ for reordering. */
+  const uint s = nested_uniform_scramble(sample, rv);
+#endif
+
+  /* Based on the sample number a sample pattern is selected and offset by the dimension. */
+  const uint sample_set = s / NUM_PMJ_SAMPLES;
+  const uint d = (dimension + sample_set);
+  const uint dim = d % NUM_PMJ_PATTERNS;
+  int index = 2 * (dim * NUM_PMJ_SAMPLES + (s % NUM_PMJ_SAMPLES));
+
+  float fx = kernel_tex_fetch(__sample_pattern_lut, index);
+  float fy = kernel_tex_fetch(__sample_pattern_lut, index + 1);
+
+#ifndef _NO_CRANLEY_PATTERSON_ROTATION_
+  /* Use Cranley-Patterson rotation to displace the sample pattern. */
+#  ifdef _SIMPLE_HASH_
+  float dx = cmj_randfloat_simple(d, rng_hash);
+  float dy = cmj_randfloat_simple(d + 1, rng_hash);
+#  else
+  float dx = cmj_randfloat(d, rng_hash);
+  float dy = cmj_randfloat(d + 1, rng_hash);
+#  endif
+  /* Only jitter within the grid cells. */
+  fx = fx + dx * (1.0f / NUM_PMJ_DIVISIONS);
+  fy = fy + dy * (1.0f / NUM_PMJ_DIVISIONS);
+  fx = fx - floorf(fx);
+  fy = fy - floorf(fy);
+#else
+#  warning "Not using Cranley Patterson Rotation."
+#endif
+
+  (*x) = fx;
+  (*y) = fy;
 }
 
 CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/kernel_random.h b/intern/cycles/kernel/kernel_random.h
index 9a62bde6e96..41b7d76230a 100644
--- a/intern/cycles/kernel/kernel_random.h
+++ b/intern/cycles/kernel/kernel_random.h
@@ -13,7 +13,6 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-
 #pragma once
 
 #include "kernel/kernel_jitter.h"
@@ -45,7 +44,7 @@ ccl_device uint sobol_dimension(const KernelGlobals *kg, int index, int dimensio
   uint i = index + SOBOL_SKIP;
   for (int j = 0, x; (x = find_first_set(i)); i >>= x) {
     j += x;
-    result ^= kernel_tex_fetch(__sample_pattern_lut, 32 * dimension + j - 1);
+    result ^= __float_as_uint(kernel_tex_fetch(__sample_pattern_lut, 32 * dimension + j - 1));
   }
   return result;
 }
@@ -99,9 +98,8 @@ ccl_device_forceinline void path_rng_2D(
   if (kernel_data.integrator.sampling_pattern == SAMPLING_PATTERN_PMJ)
 #endif
   {
-    const float2 f = pmj_sample_2D(kg, sample, rng_hash, dimension);
-    *fx = f.x;
-    *fy = f.y;
+    pmj_sample_2D(kg, sample, rng_hash, dimension, fx, fy);
+
     return;
   }
 
@@ -112,12 +110,39 @@ ccl_device_forceinline void path_rng_2D(
 #endif
 }
 
+/**
+ * 1D hash recomended from "Hash Functions for GPU Rendering" JCGT Vol. 9, No. 3, 2020
+ * See https://www.shadertoy.com/view/4tXyWN and https://www.shadertoy.com/view/XlGcRh
+ * http://www.jcgt.org/published/0009/03/02/paper.pdf
+ */
+ccl_device_inline uint hash_iqint1(uint n)
+{
+  n = (n << 13U) ^ n;
+  n = n * (n * n * 15731U + 789221U) + 1376312589U;
+
+  return n;
+}
+
+/**
+ * 2D hash recomended from "Hash Functions for GPU Rendering" JCGT Vol. 9, No. 3, 2020
+ * See https://www.shadertoy.com/view/4tXyWN and https://www.shadertoy.com/view/XlGcRh
+ * http://www.jcgt.org/published/0009/03/02/paper.pdf
+ */
+ccl_device_inline uint hash_iqnt2d(const uint x, const uint y)
+{
+  const uint qx = 1103515245U * ((x >> 1U) ^ (y));
+  const uint qy = 1103515245U * ((y >> 1U) ^ (x));
+  const uint n = 1103515245U * ((qx) ^ (qy >> 3U));
+
+  return n;
+}
+
 ccl_device_inline uint path_rng_hash_init(const KernelGlobals *ccl_restrict kg,
                                           const int sample,
                                           const int x,
                                           const int y)
 {
-  const uint rng_hash = hash_uint2(x, y) ^ kernel_data.integrator.seed;
+  const uint rng_hash = hash_iqnt2d(x, y) ^ kernel_data.integrator.seed;
 
 #ifdef __DEBUG_CORRELATION__
   srand48(rng_hash + sample);
diff --git a/intern/cycles/kernel/kernel_textures.h b/intern/cycles/kernel/kernel_textures.h
index c8e01677d09..bf9b94c1753 100644
--- a/intern/cycles/kernel/kernel_textures.h
+++ b/intern/cycles/kernel/kernel_textures.h
@@ -78,7 +78,7 @@ KERNEL_TEX(KernelShader, __shaders)
 KERNEL_TEX(float, __lookup_table)
 
 /* sobol */
-KERNEL_TEX(uint, __sample_pattern_lut)
+KERNEL_TEX(float, __sample_pattern_lut)
 
 /* image textures */
 KERNEL_TEX(TextureInfo, __texture_info)
diff 

@@ Diff output truncated at 10240 characters. @@