[Bf-blender-cvs] [f71bfe46553] master: Fix anisotropic Beckmann regression test failing on Metal

Thu Jan 19 20:36:05 CET 2023

Commit: f71bfe465538e6add595804f14fa2731fc8f7b64
Author: Weizhen Huang
Date:   Thu Jan 19 20:02:35 2023 +0100
Branches: master
https://developer.blender.org/rBf71bfe465538e6add595804f14fa2731fc8f7b64

Fix anisotropic Beckmann regression test failing on Metal

The lookup table method on CPU and the numerical root finding method on
GPU give quite different results. This commit deletes the Beckmann lookup
table and uses numerical root finding on all devices. For the numerical
root finding, a combined bisection-Newton method with precision control
is used.

Differential Revision: https://developer.blender.org/D17050

===================================================================

M	intern/cycles/kernel/closure/bsdf_microfacet.h
M	intern/cycles/kernel/types.h
M	intern/cycles/scene/shader.cpp
M	intern/cycles/scene/shader.h
M	release/scripts/addons
M	release/scripts/addons_contrib

===================================================================

diff --git a/intern/cycles/kernel/closure/bsdf_microfacet.h b/intern/cycles/kernel/closure/bsdf_microfacet.h
index 25238698614..83051f08f40 100644
--- a/intern/cycles/kernel/closure/bsdf_microfacet.h
+++ b/intern/cycles/kernel/closure/bsdf_microfacet.h
@@ -71,7 +71,6 @@ ccl_device_inline void microfacet_beckmann_sample_slopes(KernelGlobals kg,
 
   *G1i = G1;
 
-#if defined(__KERNEL_GPU__)
   /* Based on paper from Wenzel Jakob
    * An Improved Visible Normal Sampling Routine for the Beckmann Distribution
    *
@@ -87,38 +86,38 @@ ccl_device_inline void microfacet_beckmann_sample_slopes(KernelGlobals kg,
    *   exp(-ierf(x)^2) ~= 1 - x * x
    *   solve y = 1 + b + K * (1 - b * b)
    */
-  float K = tan_theta_i * SQRT_PI_INV;
-  float y_approx = randu * (1.0f + erf_a + K * (1 - erf_a * erf_a));
-  float y_exact = randu * (1.0f + erf_a + K * exp_a2);
+  const float K = tan_theta_i * SQRT_PI_INV;
+  const float y_approx = randu * (1.0f + erf_a + K * (1 - erf_a * erf_a));
+  const float y_exact = randu * (1.0f + erf_a + K * exp_a2);
   float b = K > 0 ? (0.5f - sqrtf(K * (K - y_approx + 1.0f) + 0.25f)) / K : y_approx - 1.0f;
 
-  /* Perform newton step to refine toward the true root. */
   float inv_erf = fast_ierff(b);
-  float value = 1.0f + b + K * expf(-inv_erf * inv_erf) - y_exact;
-  /* Check if we are close enough already,
-   * this also avoids NaNs as we get close to the root.
-   */
-  if (fabsf(value) > 1e-6f) {
-    b -= value / (1.0f - inv_erf * tan_theta_i); /* newton step 1. */
-    inv_erf = fast_ierff(b);
-    value = 1.0f + b + K * expf(-inv_erf * inv_erf) - y_exact;
-    b -= value / (1.0f - inv_erf * tan_theta_i); /* newton step 2. */
-    /* Compute the slope from the refined value. */
-    *slope_x = fast_ierff(b);
-  }
-  else {
-    /* We are close enough already. */
-    *slope_x = inv_erf;
+  float2 begin = make_float2(-1.0f, -y_exact);
+  float2 end = make_float2(erf_a, 1.0f + erf_a + K * exp_a2 - y_exact);
+  float2 current = make_float2(b, 1.0f + b + K * expf(-sqr(inv_erf)) - y_exact);
+
+  /* Find root in a monotonic interval using newton method, under given precision and maximal
+   * iterations. Falls back to bisection if newton step produces results outside of the valid
+   * interval.*/
+  const float precision = 1e-6f;
+  const int max_iter = 3;
+  int iter = 0;
+  while (fabsf(current.y) > precision && iter++ < max_iter) {
+    if (signf(begin.y) == signf(current.y)) {
+      begin.x = current.x;
+      begin.y = current.y;
+    }
+    else {
+      end.x = current.x;
+    }
+    const float newton_x = current.x - current.y / (1.0f - inv_erf * tan_theta_i);
+    current.x = (newton_x >= begin.x && newton_x <= end.x) ? newton_x : 0.5f * (begin.x + end.x);
+    inv_erf = fast_ierff(current.x);
+    current.y = 1.0f + current.x + K * expf(-sqr(inv_erf)) - y_exact;
   }
-  *slope_y = fast_ierff(2.0f * randv - 1.0f);
-#else
-  /* Use precomputed table on CPU, it gives better performance. */
-  int beckmann_table_offset = kernel_data.tables.beckmann_offset;
 
-  *slope_x = lookup_table_read_2D(
-      kg, randu, cos_theta_i, beckmann_table_offset, BECKMANN_TABLE_SIZE, BECKMANN_TABLE_SIZE);
+  *slope_x = inv_erf;
   *slope_y = fast_ierff(2.0f * randv - 1.0f);
-#endif
 }
 
 /* GGX microfacet importance sampling from:
diff --git a/intern/cycles/kernel/types.h b/intern/cycles/kernel/types.h
index 72a9c65f303..4075980076f 100644
--- a/intern/cycles/kernel/types.h
+++ b/intern/cycles/kernel/types.h
@@ -34,8 +34,6 @@ CCL_NAMESPACE_BEGIN
 
 #define VOLUME_BOUNDS_MAX 1024
 
-#define BECKMANN_TABLE_SIZE 256
-
 #define SHADER_NONE (~0)
 #define OBJECT_NONE (~0)
 #define PRIM_NONE (~0)
@@ -1187,9 +1185,8 @@ typedef enum KernelBVHLayout {
 #include "kernel/data_template.h"
 
 typedef struct KernelTables {
-  int beckmann_offset;
   int filter_table_offset;
-  int pad1, pad2;
+  int pad1, pad2, pad3;
 } KernelTables;
 static_assert_align(KernelTables, 16);
 
diff --git a/intern/cycles/scene/shader.cpp b/intern/cycles/scene/shader.cpp
index e03b77917ef..f0faa91b4be 100644
--- a/intern/cycles/scene/shader.cpp
+++ b/intern/cycles/scene/shader.cpp
@@ -32,114 +32,6 @@ namespace OCIO = OCIO_NAMESPACE;
 CCL_NAMESPACE_BEGIN
 
 thread_mutex ShaderManager::lookup_table_mutex;
-vector<float> ShaderManager::beckmann_table;
-bool ShaderManager::beckmann_table_ready = false;
-
-/* Beckmann sampling precomputed table, see bsdf_microfacet.h */
-
-/* 2D slope distribution (alpha = 1.0) */
-static float beckmann_table_P22(const float slope_x, const float slope_y)
-{
-  return expf(-(slope_x * slope_x + slope_y * slope_y));
-}
-
-/* maximal slope amplitude (range that contains 99.99% of the distribution) */
-static float beckmann_table_slope_max()
-{
-  return 6.0;
-}
-
-/* MSVC 2015 needs this ugly hack to prevent a codegen bug on x86
- * see T50176 for details
- */
-#if defined(_MSC_VER) && (_MSC_VER == 1900)
-#  define MSVC_VOLATILE volatile
-#else
-#  define MSVC_VOLATILE
-#endif
-
-/* Paper used: Importance Sampling Microfacet-Based BSDFs with the
- * Distribution of Visible Normals. Supplemental Material 2/2.
- *
- * http://hal.inria.fr/docs/01/00/66/20/ANNEX/supplemental2.pdf
- */
-static void beckmann_table_rows(float *table, int row_from, int row_to)
-{
-  /* allocate temporary data */
-  const int DATA_TMP_SIZE = 512;
-  vector<double> slope_x(DATA_TMP_SIZE);
-  vector<double> CDF_P22_omega_i(DATA_TMP_SIZE);
-
-  /* loop over incident directions */
-  for (int index_theta = row_from; index_theta < row_to; index_theta++) {
-    /* incident vector */
-    const float cos_theta = index_theta / (BECKMANN_TABLE_SIZE - 1.0f);
-    const float sin_theta = safe_sqrtf(1.0f - cos_theta * cos_theta);
-
-    /* for a given incident vector
-     * integrate P22_{omega_i}(x_slope, 1, 1), Eq. (10) */
-    slope_x[0] = (double)-beckmann_table_slope_max();
-    CDF_P22_omega_i[0] = 0;
-
-    for (MSVC_VOLATILE int index_slope_x = 1; index_slope_x < DATA_TMP_SIZE; ++index_slope_x) {
-      /* slope_x */
-      slope_x[index_slope_x] = (double)(-beckmann_table_slope_max() +
-                                        2.0f * beckmann_table_slope_max() * index_slope_x /
-                                            (DATA_TMP_SIZE - 1.0f));
-
-      /* dot product with incident vector */
-      float dot_product = fmaxf(0.0f, -(float)slope_x[index_slope_x] * sin_theta + cos_theta);
-      /* marginalize P22_{omega_i}(x_slope, 1, 1), Eq. (10) */
-      float P22_omega_i = 0.0f;
-
-      for (int j = 0; j < 100; ++j) {
-        float slope_y = -beckmann_table_slope_max() +
-                        2.0f * beckmann_table_slope_max() * j * (1.0f / 99.0f);
-        P22_omega_i += dot_product * beckmann_table_P22((float)slope_x[index_slope_x], slope_y);
-      }
-
-      /* CDF of P22_{omega_i}(x_slope, 1, 1), Eq. (10) */
-      CDF_P22_omega_i[index_slope_x] = CDF_P22_omega_i[index_slope_x - 1] + (double)P22_omega_i;
-    }
-
-    /* renormalize CDF_P22_omega_i */
-    for (int index_slope_x = 1; index_slope_x < DATA_TMP_SIZE; ++index_slope_x)
-      CDF_P22_omega_i[index_slope_x] /= CDF_P22_omega_i[DATA_TMP_SIZE - 1];
-
-    /* loop over random number U1 */
-    int index_slope_x = 0;
-
-    for (int index_U = 0; index_U < BECKMANN_TABLE_SIZE; ++index_U) {
-      const double U = 0.0000001 + 0.9999998 * index_U / (double)(BECKMANN_TABLE_SIZE - 1);
-
-      /* inverse CDF_P22_omega_i, solve Eq.(11) */
-      while (CDF_P22_omega_i[index_slope_x] <= U)
-        ++index_slope_x;
-
-      const double interp = (CDF_P22_omega_i[index_slope_x] - U) /
-                            (CDF_P22_omega_i[index_slope_x] - CDF_P22_omega_i[index_slope_x - 1]);
-
-      /* store value */
-      table[index_U + index_theta * BECKMANN_TABLE_SIZE] =
-          (float)(interp * slope_x[index_slope_x - 1] + (1.0 - interp) * slope_x[index_slope_x]);
-    }
-  }
-}
-
-#undef MSVC_VOLATILE
-
-static void beckmann_table_build(vector<float> &table)
-{
-  table.resize(BECKMANN_TABLE_SIZE * BECKMANN_TABLE_SIZE);
-
-  /* multithreaded build */
-  TaskPool pool;
-
-  for (int i = 0; i < BECKMANN_TABLE_SIZE; i += 8)
-    pool.push(function_bind(&beckmann_table_rows, &table[0], i, i + 8));
-
-  pool.wait_work();
-}
 
 /* Shader */
 
@@ -491,7 +383,6 @@ bool Shader::need_update_geometry() const
 ShaderManager::ShaderManager()
 {
   update_flags = UPDATE_ALL;
-  beckmann_table_offset = TABLE_OFFSET_INVALID;
 
   init_xyz_transforms();
 }
@@ -663,22 +554,6 @@ void ShaderManager::device_update_common(Device * /*device*/,
 
   dscene->shaders.copy_to_device();
 
-  /* lookup tables */
-  KernelTables *ktables = &dscene->data.tables;
-
-  /* beckmann lookup table */
-  if (beckmann_table_offset == TABLE_OFFSET_INVALID) {
-    if (!beckmann_table_ready) {
-      thread_scoped_lock lock(lookup_table_mutex);
-      if (!beckmann_table_ready) {
-        beckmann_table_build(beckmann_table);
-        beckmann_table_ready = true;
-      }
-    }
-    beckmann_table_offset = scene->lookup_tables->add_table(dscene, beckmann_table);
-  }
-  ktables->beckmann_offset = (int)beckmann_table_offset;
-
   /* integrator */
   KernelIntegrator *kintegrator = &dscene->data.integrator;
   kintegrator->use_volumes = has_volumes;
@@ -700,8 +575,6 @@ void ShaderManager::device_update_common(Device * /*device*/,
 
 void ShaderManager::device_free_common(Device *, DeviceScene *dscene, Scene *scene)
 {
-  scene->lookup_tables->remove_table(&beckmann_table_offset);
-
   dscene->shaders.free();
 }
 
@@ -844,7 +717,6 @@ uint ShaderManager::get_kernel_features(Scene *scene)
 
 void ShaderManager::free_memory()
 {
-  beckmann_table.free_memory();
 
 #ifdef WITH_OSL
   OSLShaderManager::free_memory();
diff --git a/intern/cycles/scene/shader.h b/intern/cycles/scene/shader.h
index 8f59eefae05..fab09fcd9d3 100644
--- a/intern/cycles/scene/shader.h
+++ b/intern/cycles/scene/shader.h
@@ -232,10 +232,6 @@ class ShaderManager {
   AttributeIDMap unique_attribute_id;
 
   static thread_mutex lookup_table_mutex;
-  static vector<float> beckmann_table;
-  static bool beckmann_table_ready;
-
-  size_t beckmann_table_offset;
 
   uint get_graph_kernel_features(ShaderGraph *graph);
 
diff --git a/release/scripts/addons b/release/scripts/addons
index b

@@ Diff output truncated at 10240 characters. @@