[Bf-blender-cvs] [d9192aaa6d2] master: Cycles: limit the internal sample index of Sobol-Burley for performance

Nathan Vegdahl noreply at git.blender.org
Wed Dec 14 17:51:04 CET 2022


Commit: d9192aaa6d2a0a17bcfe158bade77314f0679cf7
Author: Nathan Vegdahl
Date:   Wed Dec 14 16:43:12 2022 +0100
Branches: master
https://developer.blender.org/rBd9192aaa6d2a0a17bcfe158bade77314f0679cf7

Cycles: limit the internal sample index of Sobol-Burley for performance

This is done based on the render sample count so that it doesn't impact
sampling quality. It's similar in spirit to the adaptive table size in D16561,
but in this case for performance rather than memory usage.

Differential Revision: https://developer.blender.org/D16726

===================================================================

M	intern/cycles/kernel/data_template.h
M	intern/cycles/kernel/sample/pattern.h
M	intern/cycles/kernel/sample/sobol_burley.h
M	intern/cycles/scene/integrator.cpp

===================================================================

diff --git a/intern/cycles/kernel/data_template.h b/intern/cycles/kernel/data_template.h
index af7a6d2ef41..ddc462e02f6 100644
--- a/intern/cycles/kernel/data_template.h
+++ b/intern/cycles/kernel/data_template.h
@@ -180,6 +180,7 @@ KERNEL_STRUCT_MEMBER(integrator, int, use_caustics)
 /* Sampling pattern. */
 KERNEL_STRUCT_MEMBER(integrator, int, sampling_pattern)
 KERNEL_STRUCT_MEMBER(integrator, int, tabulated_sobol_sequence_size)
+KERNEL_STRUCT_MEMBER(integrator, int, sobol_index_mask)
 KERNEL_STRUCT_MEMBER(integrator, float, scrambling_distance)
 /* Volume render. */
 KERNEL_STRUCT_MEMBER(integrator, int, use_volumes)
@@ -204,7 +205,6 @@ KERNEL_STRUCT_MEMBER(integrator, int, use_guiding_mis_weights)
 
 /* Padding. */
 KERNEL_STRUCT_MEMBER(integrator, int, pad1)
-KERNEL_STRUCT_MEMBER(integrator, int, pad2)
 KERNEL_STRUCT_END(KernelIntegrator)
 
 /* SVM. For shader specialization. */
diff --git a/intern/cycles/kernel/sample/pattern.h b/intern/cycles/kernel/sample/pattern.h
index f6f1de448e0..71018bb1e91 100644
--- a/intern/cycles/kernel/sample/pattern.h
+++ b/intern/cycles/kernel/sample/pattern.h
@@ -3,8 +3,8 @@
 
 #pragma once
 
-#include "kernel/sample/tabulated_sobol.h"
 #include "kernel/sample/sobol_burley.h"
+#include "kernel/sample/tabulated_sobol.h"
 #include "util/hash.h"
 
 CCL_NAMESPACE_BEGIN
@@ -23,7 +23,8 @@ ccl_device_forceinline float path_rng_1D(KernelGlobals kg,
 #endif
 
   if (kernel_data.integrator.sampling_pattern == SAMPLING_PATTERN_SOBOL_BURLEY) {
-    return sobol_burley_sample_1D(sample, dimension, rng_hash);
+    const uint index_mask = kernel_data.integrator.sobol_index_mask;
+    return sobol_burley_sample_1D(sample, dimension, rng_hash, index_mask);
   }
   else {
     return tabulated_sobol_sample_1D(kg, sample, rng_hash, dimension);
@@ -40,7 +41,8 @@ ccl_device_forceinline float2 path_rng_2D(KernelGlobals kg,
 #endif
 
   if (kernel_data.integrator.sampling_pattern == SAMPLING_PATTERN_SOBOL_BURLEY) {
-    return sobol_burley_sample_2D(sample, dimension, rng_hash);
+    const uint index_mask = kernel_data.integrator.sobol_index_mask;
+    return sobol_burley_sample_2D(sample, dimension, rng_hash, index_mask);
   }
   else {
     return tabulated_sobol_sample_2D(kg, sample, rng_hash, dimension);
@@ -57,7 +59,8 @@ ccl_device_forceinline float3 path_rng_3D(KernelGlobals kg,
 #endif
 
   if (kernel_data.integrator.sampling_pattern == SAMPLING_PATTERN_SOBOL_BURLEY) {
-    return sobol_burley_sample_3D(sample, dimension, rng_hash);
+    const uint index_mask = kernel_data.integrator.sobol_index_mask;
+    return sobol_burley_sample_3D(sample, dimension, rng_hash, index_mask);
   }
   else {
     return tabulated_sobol_sample_3D(kg, sample, rng_hash, dimension);
@@ -74,7 +77,8 @@ ccl_device_forceinline float4 path_rng_4D(KernelGlobals kg,
 #endif
 
   if (kernel_data.integrator.sampling_pattern == SAMPLING_PATTERN_SOBOL_BURLEY) {
-    return sobol_burley_sample_4D(sample, dimension, rng_hash);
+    const uint index_mask = kernel_data.integrator.sobol_index_mask;
+    return sobol_burley_sample_4D(sample, dimension, rng_hash, index_mask);
   }
   else {
     return tabulated_sobol_sample_4D(kg, sample, rng_hash, dimension);
diff --git a/intern/cycles/kernel/sample/sobol_burley.h b/intern/cycles/kernel/sample/sobol_burley.h
index 47796ae7998..f3fcde4ff62 100644
--- a/intern/cycles/kernel/sample/sobol_burley.h
+++ b/intern/cycles/kernel/sample/sobol_burley.h
@@ -64,32 +64,76 @@ ccl_device_forceinline float sobol_burley(uint rev_bit_index,
   return uint_to_float_excl(result);
 }
 
+/*
+ * NOTE: the functions below intentionally produce samples that are
+ * uncorrelated between functions.  For example, a 1D sample and 2D
+ * sample produced with the same index, dimension, and seed are
+ * uncorrelated with each other.  This allows more care-free usage
+ * of the functions together, without having to worry about
+ * e.g. 1D and 2D samples being accidentally correlated with each
+ * other.
+ */
+
 /*
  * Computes a 1D Owen-scrambled and shuffled Sobol sample.
+ *
+ * `index` is the index of the sample in the sequence.
+ *
+ * `dimension` is which dimensions of the sample you want to fetch.  Note
+ * that different 1D dimensions are uncorrelated.  For samples with > 1D
+ * stratification, use the multi-dimensional sampling methods below.
+ *
+ * `seed`: different seeds produce statistically independent,
+ * uncorrelated sequences.
+ *
+ * `shuffled_index_mask` limits the sample sequence length, improving
+ * performance. It must be a string of binary 1 bits followed by a
+ * string of binary 0 bits (e.g. 0xffff0000) for the sampler to operate
+ * correctly. In general, `reverse_integer_bits(shuffled_index_mask)`
+ * should be >= the maximum number of samples expected to be taken. A safe
+ * default (but least performant) is 0xffffffff, for maximum sequence
+ * length.
  */
-ccl_device float sobol_burley_sample_1D(uint index, uint const dimension, uint seed)
+ccl_device float sobol_burley_sample_1D(uint index,
+                                        uint const dimension,
+                                        uint seed,
+                                        uint shuffled_index_mask)
 {
   /* Include the dimension in the seed, so we get decorrelated
    * sequences for different dimensions via shuffling. */
   seed ^= hash_hp_uint(dimension);
 
-  /* Shuffle. */
+  /* Shuffle and mask.  The masking is just for better
+   * performance at low sample counts. */
   index = reversed_bit_owen(reverse_integer_bits(index), seed ^ 0xbff95bfe);
+  index &= shuffled_index_mask;
 
   return sobol_burley(index, 0, seed ^ 0x635c77bd);
 }
 
 /*
  * Computes a 2D Owen-scrambled and shuffled Sobol sample.
+ *
+ * `dimension_set` is which two dimensions of the sample you want to
+ * fetch.  For example, 0 is the first two, 1 is the second two, etc.
+ * The dimensions within a single set are stratified, but different sets
+ * are uncorrelated.
+ *
+ * See sobol_burley_sample_1D for further usage details.
  */
-ccl_device float2 sobol_burley_sample_2D(uint index, const uint dimension_set, uint seed)
+ccl_device float2 sobol_burley_sample_2D(uint index,
+                                         const uint dimension_set,
+                                         uint seed,
+                                         uint shuffled_index_mask)
 {
   /* Include the dimension set in the seed, so we get decorrelated
    * sequences for different dimension sets via shuffling. */
   seed ^= hash_hp_uint(dimension_set);
 
-  /* Shuffle. */
+  /* Shuffle and mask.  The masking is just for better
+   * performance at low sample counts. */
   index = reversed_bit_owen(reverse_integer_bits(index), seed ^ 0xf8ade99a);
+  index &= shuffled_index_mask;
 
   return make_float2(sobol_burley(index, 0, seed ^ 0xe0aaaf76),
                      sobol_burley(index, 1, seed ^ 0x94964d4e));
@@ -97,15 +141,27 @@ ccl_device float2 sobol_burley_sample_2D(uint index, const uint dimension_set, u
 
 /*
  * Computes a 3D Owen-scrambled and shuffled Sobol sample.
+ *
+ * `dimension_set` is which three dimensions of the sample you want to
+ * fetch.  For example, 0 is the first three, 1 is the second three, etc.
+ * The dimensions within a single set are stratified, but different sets
+ * are uncorrelated.
+ *
+ * See sobol_burley_sample_1D for further usage details.
  */
-ccl_device float3 sobol_burley_sample_3D(uint index, const uint dimension_set, uint seed)
+ccl_device float3 sobol_burley_sample_3D(uint index,
+                                         const uint dimension_set,
+                                         uint seed,
+                                         uint shuffled_index_mask)
 {
   /* Include the dimension set in the seed, so we get decorrelated
    * sequences for different dimension sets via shuffling. */
   seed ^= hash_hp_uint(dimension_set);
 
-  /* Shuffle. */
+  /* Shuffle and mask.  The masking is just for better
+   * performance at low sample counts. */
   index = reversed_bit_owen(reverse_integer_bits(index), seed ^ 0xcaa726ac);
+  index &= shuffled_index_mask;
 
   return make_float3(sobol_burley(index, 0, seed ^ 0x9e78e391),
                      sobol_burley(index, 1, seed ^ 0x67c33241),
@@ -114,15 +170,27 @@ ccl_device float3 sobol_burley_sample_3D(uint index, const uint dimension_set, u
 
 /*
  * Computes a 4D Owen-scrambled and shuffled Sobol sample.
+ *
+ * `dimension_set` is which four dimensions of the sample you want to
+ * fetch.  For example, 0 is the first four, 1 is the second four, etc.
+ * The dimensions within a single set are stratified, but different sets
+ * are uncorrelated.
+ *
+ * See sobol_burley_sample_1D for further usage details.
  */
-ccl_device float4 sobol_burley_sample_4D(uint index, const uint dimension_set, uint seed)
+ccl_device float4 sobol_burley_sample_4D(uint index,
+                                         const uint dimension_set,
+                                         uint seed,
+                                         uint shuffled_index_mask)
 {
   /* Include the dimension set in the seed, so we get decorrelated
    * sequences for different dimension sets via shuffling. */
   seed ^= hash_hp_uint(dimension_set);
 
-  /* Shuffle. */
+  /* Shuffle and mask.  The masking is just for better
+   * performance at low sample counts. */
   index = reversed_bit_owen(reverse_integer_bits(index), seed ^ 0xc2c1a055);
+  index &= shuffled_index_mask;
 
   return make_float4(sobol_burley(index, 0, seed ^ 0x39468210),
                      sobol_burley(index, 1, seed ^ 0xe9d8a845),
diff --git a/intern/cycles/scene/integrator.cpp b/intern/cycles/scene/integrator.cpp
index ba376e8ba99..7e5633733ae 100644
--- a/intern/cycles/scene/integrator.cpp
+++ b/intern/cycles/scene/integrator.cpp
@@ -253,6 +253,7 @@ void Integrator::device_update(Device *device, DeviceScene *dscene, Scene *scene
 
   kintegrator->sampling_pattern = sampling_pattern;
   kintegrator->scrambling_distance = scrambling_distance;
+  kintegrator->sobol_index_mask = reverse_integer_bits(next_power_of_two(aa_samples - 1) - 1);
 
   kintegrator->use_light_tree = scene->integrator->use_light_tree;
   if (light_sampling_threshold > 0.0f) {



More information about the Bf-blender-cvs mailing list