[Bf-blender-cvs] [03b5be4e3cd] blender-v3.4-release: Cycles: use more PMJ patterns and make their size adaptive.

Mon Nov 21 19:19:11 CET 2022

Commit: 03b5be4e3cdf6a4967cb438dacd595c23075db79
Author: Nathan Vegdahl
Date:   Mon Nov 21 18:16:27 2022 +0100
Branches: blender-v3.4-release
https://developer.blender.org/rB03b5be4e3cdf6a4967cb438dacd595c23075db79

Cycles: use more PMJ patterns and make their size adaptive.

This resolves some issues with correlation artifacts at higher sample counts.

Fix T101356, correlation issues in new PMJ pattern.

Differential Revision: https://developer.blender.org/D16561

===================================================================

M	intern/cycles/kernel/data_template.h
M	intern/cycles/kernel/sample/jitter.h
M	intern/cycles/kernel/types.h
M	intern/cycles/scene/integrator.cpp

===================================================================

diff --git a/intern/cycles/kernel/data_template.h b/intern/cycles/kernel/data_template.h
index 6b89de13797..c7b50b20c70 100644
--- a/intern/cycles/kernel/data_template.h
+++ b/intern/cycles/kernel/data_template.h
@@ -183,6 +183,7 @@ KERNEL_STRUCT_MEMBER(integrator, int, use_lamp_mis)
 KERNEL_STRUCT_MEMBER(integrator, int, use_caustics)
 /* Sampling pattern. */
 KERNEL_STRUCT_MEMBER(integrator, int, sampling_pattern)
+KERNEL_STRUCT_MEMBER(integrator, int, pmj_sequence_size)
 KERNEL_STRUCT_MEMBER(integrator, float, scrambling_distance)
 /* Volume render. */
 KERNEL_STRUCT_MEMBER(integrator, int, use_volumes)
@@ -205,6 +206,11 @@ KERNEL_STRUCT_MEMBER(integrator, int, use_surface_guiding)
 KERNEL_STRUCT_MEMBER(integrator, int, use_volume_guiding)
 KERNEL_STRUCT_MEMBER(integrator, int, use_guiding_direct_light)
 KERNEL_STRUCT_MEMBER(integrator, int, use_guiding_mis_weights)
+
+/* Padding. */
+KERNEL_STRUCT_MEMBER(integrator, int, pad1)
+KERNEL_STRUCT_MEMBER(integrator, int, pad2)
+KERNEL_STRUCT_MEMBER(integrator, int, pad3)
 KERNEL_STRUCT_END(KernelIntegrator)
 
 /* SVM. For shader specialization. */
diff --git a/intern/cycles/kernel/sample/jitter.h b/intern/cycles/kernel/sample/jitter.h
index e748f95fc7d..1cde9f9d3de 100644
--- a/intern/cycles/kernel/sample/jitter.h
+++ b/intern/cycles/kernel/sample/jitter.h
@@ -7,6 +7,25 @@
 #pragma once
 CCL_NAMESPACE_BEGIN
 
+ccl_device uint pmj_shuffled_sample_index(KernelGlobals kg, uint sample, uint dimension, uint seed)
+{
+  const uint sample_count = kernel_data.integrator.pmj_sequence_size;
+
+  /* Shuffle the pattern order and sample index to better decorrelate
+   * dimensions and make the most of the finite patterns we have.
+   * The funky sample mask stuff is to ensure that we only shuffle
+   * *within* the current sample pattern, which is necessary to avoid
+   * early repeat pattern use. */
+  const uint pattern_i = hash_shuffle_uint(dimension, NUM_PMJ_PATTERNS, seed);
+  /* sample_count should always be a power of two, so this results in a mask. */
+  const uint sample_mask = sample_count - 1;
+  const uint sample_shuffled = nested_uniform_scramble(sample,
+                                                       hash_wang_seeded_uint(dimension, seed));
+  sample = (sample & ~sample_mask) | (sample_shuffled & sample_mask);
+
+  return ((pattern_i * sample_count) + sample) % (sample_count * NUM_PMJ_PATTERNS);
+}
+
 ccl_device float pmj_sample_1D(KernelGlobals kg,
                                uint sample,
                                const uint rng_hash,
@@ -20,22 +39,9 @@ ccl_device float pmj_sample_1D(KernelGlobals kg,
     seed = kernel_data.integrator.seed;
   }
 
-  /* Shuffle the pattern order and sample index to better decorrelate
-   * dimensions and make the most of the finite patterns we have.
-   * The funky sample mask stuff is to ensure that we only shuffle
-   * *within* the current sample pattern, which is necessary to avoid
-   * early repeat pattern use. */
-  const uint pattern_i = hash_shuffle_uint(dimension, NUM_PMJ_PATTERNS, seed);
-  /* NUM_PMJ_SAMPLES should be a power of two, so this results in a mask. */
-  const uint sample_mask = NUM_PMJ_SAMPLES - 1;
-  const uint sample_shuffled = nested_uniform_scramble(sample,
-                                                       hash_wang_seeded_uint(dimension, seed));
-  sample = (sample & ~sample_mask) | (sample_shuffled & sample_mask);
-
   /* Fetch the sample. */
-  const uint index = ((pattern_i * NUM_PMJ_SAMPLES) + sample) %
-                     (NUM_PMJ_SAMPLES * NUM_PMJ_PATTERNS);
-  float x = kernel_data_fetch(sample_pattern_lut, index * 2);
+  const uint index = pmj_shuffled_sample_index(kg, sample, dimension, seed);
+  float x = kernel_data_fetch(sample_pattern_lut, index * NUM_PMJ_DIMENSIONS);
 
   /* Do limited Cranley-Patterson rotation when using scrambling distance. */
   if (kernel_data.integrator.scrambling_distance < 1.0f) {
@@ -61,23 +67,10 @@ ccl_device float2 pmj_sample_2D(KernelGlobals kg,
     seed = kernel_data.integrator.seed;
   }
 
-  /* Shuffle the pattern order and sample index to better decorrelate
-   * dimensions and make the most of the finite patterns we have.
-   * The funky sample mask stuff is to ensure that we only shuffle
-   * *within* the current sample pattern, which is necessary to avoid
-   * early repeat pattern use. */
-  const uint pattern_i = hash_shuffle_uint(dimension, NUM_PMJ_PATTERNS, seed);
-  /* NUM_PMJ_SAMPLES should be a power of two, so this results in a mask. */
-  const uint sample_mask = NUM_PMJ_SAMPLES - 1;
-  const uint sample_shuffled = nested_uniform_scramble(sample,
-                                                       hash_wang_seeded_uint(dimension, seed));
-  sample = (sample & ~sample_mask) | (sample_shuffled & sample_mask);
-
   /* Fetch the sample. */
-  const uint index = ((pattern_i * NUM_PMJ_SAMPLES) + sample) %
-                     (NUM_PMJ_SAMPLES * NUM_PMJ_PATTERNS);
-  float x = kernel_data_fetch(sample_pattern_lut, index * 2);
-  float y = kernel_data_fetch(sample_pattern_lut, index * 2 + 1);
+  const uint index = pmj_shuffled_sample_index(kg, sample, dimension, seed);
+  float x = kernel_data_fetch(sample_pattern_lut, index * NUM_PMJ_DIMENSIONS);
+  float y = kernel_data_fetch(sample_pattern_lut, index * NUM_PMJ_DIMENSIONS + 1);
 
   /* Do limited Cranley-Patterson rotation when using scrambling distance. */
   if (kernel_data.integrator.scrambling_distance < 1.0f) {
diff --git a/intern/cycles/kernel/types.h b/intern/cycles/kernel/types.h
index 24c5a6a4540..6d80fd3425c 100644
--- a/intern/cycles/kernel/types.h
+++ b/intern/cycles/kernel/types.h
@@ -1382,12 +1382,13 @@ static_assert_align(KernelShaderEvalInput, 16);
 
 /* Pre-computed sample table sizes for PMJ02 sampler.
  *
- * NOTE: divisions *must* be a power of two, and patterns
+ * NOTE: min and max samples *must* be a power of two, and patterns
  * ideally should be as well.
  */
-#define NUM_PMJ_DIVISIONS 32
-#define NUM_PMJ_SAMPLES ((NUM_PMJ_DIVISIONS) * (NUM_PMJ_DIVISIONS))
-#define NUM_PMJ_PATTERNS 64
+#define MIN_PMJ_SAMPLES 256
+#define MAX_PMJ_SAMPLES 8192
+#define NUM_PMJ_DIMENSIONS 2
+#define NUM_PMJ_PATTERNS 256
 
 /* Device kernels.
  *
diff --git a/intern/cycles/scene/integrator.cpp b/intern/cycles/scene/integrator.cpp
index ade4716242b..23f9e8b7aa8 100644
--- a/intern/cycles/scene/integrator.cpp
+++ b/intern/cycles/scene/integrator.cpp
@@ -257,12 +257,18 @@ void Integrator::device_update(Device *device, DeviceScene *dscene, Scene *scene
     kintegrator->light_inv_rr_threshold = 0.0f;
   }
 
+  constexpr int num_sequences = NUM_PMJ_PATTERNS;
+  int sequence_size = clamp(next_power_of_two(aa_samples - 1), MIN_PMJ_SAMPLES, MAX_PMJ_SAMPLES);
   if (kintegrator->sampling_pattern == SAMPLING_PATTERN_PMJ &&
-      dscene->sample_pattern_lut.size() == 0) {
-    constexpr int sequence_size = NUM_PMJ_SAMPLES;
-    constexpr int num_sequences = NUM_PMJ_PATTERNS;
+      dscene->sample_pattern_lut.size() !=
+          (sequence_size * NUM_PMJ_DIMENSIONS * NUM_PMJ_PATTERNS)) {
+    kintegrator->pmj_sequence_size = sequence_size;
+
+    if (dscene->sample_pattern_lut.size() != 0) {
+      dscene->sample_pattern_lut.free();
+    }
     float2 *directions = (float2 *)dscene->sample_pattern_lut.alloc(sequence_size * num_sequences *
-                                                                    2);
+                                                                    NUM_PMJ_DIMENSIONS);
     TaskPool pool;
     for (int j = 0; j < num_sequences; ++j) {
       float2 *sequence = directions + j * sequence_size;