[Bf-blender-cvs] [e308b891c88] master: Cycles: Use faster and exact GGX VNDF sampling algorithm

Tue Jan 24 18:00:06 CET 2023

Commit: e308b891c8829e08ad023eff7ed43f7acac3ae03
Author: Lukas Stockner
Date:   Tue Jan 24 17:34:24 2023 +0100
Branches: master
https://developer.blender.org/rBe308b891c8829e08ad023eff7ed43f7acac3ae03

Cycles: Use faster and exact GGX VNDF sampling algorithm

Based on "Sampling the GGX Distribution of Visible Normals" by Eric Heitz
(https://jcgt.org/published/0007/04/01/).

Also, this removes the lambdaI computation from the Beckmann sampling code and
just recomputes it below. We already need to recompute for two other cases
(GGX and clearcoat), so this makes the code more consistent.

In terms of performance, I don't expect a notable impact since the earlier
computation also was non-trivial, and while it probably was slightly more
accurate, I'd argue that being consistent between evaluation and sampling is
more important than absolute numerical accuracy anyways.

Differential Revision: https://developer.blender.org/D17100

===================================================================

M	intern/cycles/kernel/closure/bsdf_microfacet.h
M	intern/cycles/util/math_float2.h

===================================================================

diff --git a/intern/cycles/kernel/closure/bsdf_microfacet.h b/intern/cycles/kernel/closure/bsdf_microfacet.h
index 75167b9d816..8cf4cfa244d 100644
--- a/intern/cycles/kernel/closure/bsdf_microfacet.h
+++ b/intern/cycles/kernel/closure/bsdf_microfacet.h
@@ -37,189 +37,100 @@ typedef struct MicrofacetBsdf {
 
 static_assert(sizeof(ShaderClosure) >= sizeof(MicrofacetBsdf), "MicrofacetBsdf is too large!");
 
-/* Beckmann and GGX microfacet importance sampling. */
-
-ccl_device_inline void microfacet_beckmann_sample_slopes(KernelGlobals kg,
-                                                         const float cos_theta_i,
-                                                         const float sin_theta_i,
-                                                         float randu,
-                                                         float randv,
-                                                         ccl_private float *slope_x,
-                                                         ccl_private float *slope_y,
-                                                         ccl_private float *lambda_i)
-{
-  /* Special case (normal incidence). */
-  if (cos_theta_i >= 0.99999f) {
-    const float r = sqrtf(-logf(randu));
-    const float phi = M_2PI_F * randv;
-    *slope_x = r * cosf(phi);
-    *slope_y = r * sinf(phi);
-    *lambda_i = 0.0f;
-
-    return;
-  }
-
-  /* Precomputations. */
-  const float tan_theta_i = sin_theta_i / cos_theta_i;
-  const float inv_a = tan_theta_i;
-  const float cot_theta_i = 1.0f / tan_theta_i;
-  const float erf_a = fast_erff(cot_theta_i);
-  const float exp_a2 = expf(-cot_theta_i * cot_theta_i);
-  const float SQRT_PI_INV = 0.56418958354f;
-  const float Lambda = 0.5f * (erf_a - 1.0f) + (0.5f * SQRT_PI_INV) * (exp_a2 * inv_a);
-
-  *lambda_i = Lambda;
-
-  /* Based on paper from Wenzel Jakob
-   * An Improved Visible Normal Sampling Routine for the Beckmann Distribution
-   *
-   * http://www.mitsuba-renderer.org/~wenzel/files/visnormal.pdf
-   *
-   * Reformulation from OpenShadingLanguage which avoids using inverse
-   * trigonometric functions.
-   */
-
-  /* Sample slope X.
-   *
-   * Compute a coarse approximation using the approximation:
-   *   exp(-ierf(x)^2) ~= 1 - x * x
-   *   solve y = 1 + b + K * (1 - b * b)
-   */
-  const float K = tan_theta_i * SQRT_PI_INV;
-  const float y_approx = randu * (1.0f + erf_a + K * (1 - erf_a * erf_a));
-  const float y_exact = randu * (1.0f + erf_a + K * exp_a2);
-  float b = K > 0 ? (0.5f - sqrtf(K * (K - y_approx + 1.0f) + 0.25f)) / K : y_approx - 1.0f;
-
-  float inv_erf = fast_ierff(b);
-  float2 begin = make_float2(-1.0f, -y_exact);
-  float2 end = make_float2(erf_a, 1.0f + erf_a + K * exp_a2 - y_exact);
-  float2 current = make_float2(b, 1.0f + b + K * expf(-sqr(inv_erf)) - y_exact);
-
-  /* Find root in a monotonic interval using newton method, under given precision and maximal
-   * iterations. Falls back to bisection if newton step produces results outside of the valid
-   * interval.*/
-  const float precision = 1e-6f;
-  const int max_iter = 3;
-  int iter = 0;
-  while (fabsf(current.y) > precision && iter++ < max_iter) {
-    if (signf(begin.y) == signf(current.y)) {
-      begin.x = current.x;
-      begin.y = current.y;
-    }
-    else {
-      end.x = current.x;
-    }
-    const float newton_x = current.x - current.y / (1.0f - inv_erf * tan_theta_i);
-    current.x = (newton_x >= begin.x && newton_x <= end.x) ? newton_x : 0.5f * (begin.x + end.x);
-    inv_erf = fast_ierff(current.x);
-    current.y = 1.0f + current.x + K * expf(-sqr(inv_erf)) - y_exact;
-  }
-
-  *slope_x = inv_erf;
-  *slope_y = fast_ierff(2.0f * randv - 1.0f);
-}
-
-/* GGX microfacet importance sampling from:
- *
+/* Beckmann VNDF importance sampling algorithm from:
  * Importance Sampling Microfacet-Based BSDFs using the Distribution of Visible Normals.
- * E. Heitz and E. d'Eon, EGSR 2014
- */
-
-ccl_device_inline void microfacet_ggx_sample_slopes(const float cos_theta_i,
-                                                    const float sin_theta_i,
-                                                    float randu,
-                                                    float randv,
-                                                    ccl_private float *slope_x,
-                                                    ccl_private float *slope_y,
-                                                    ccl_private float *lambda_i)
-{
-  /* Special case (normal incidence). */
-  if (cos_theta_i >= 0.99999f) {
-    const float r = sqrtf(randu / (1.0f - randu));
-    const float phi = M_2PI_F * randv;
-    *slope_x = r * cosf(phi);
-    *slope_y = r * sinf(phi);
-    *lambda_i = 0.0f;
-
-    return;
-  }
-
-  /* Precomputations. */
-  const float tan_theta_i = sin_theta_i / cos_theta_i;
-  const float G1_inv = 0.5f * (1.0f + safe_sqrtf(1.0f + tan_theta_i * tan_theta_i));
-
-  *lambda_i = G1_inv - 1.0f;
-
-  /* Sample slope_x. */
-  const float A = 2.0f * randu * G1_inv - 1.0f;
-  const float AA = A * A;
-  const float tmp = 1.0f / (AA - 1.0f);
-  const float B = tan_theta_i;
-  const float BB = B * B;
-  const float D = safe_sqrtf(BB * (tmp * tmp) - (AA - BB) * tmp);
-  const float slope_x_1 = B * tmp - D;
-  const float slope_x_2 = B * tmp + D;
-  *slope_x = (A < 0.0f || slope_x_2 * tan_theta_i > 1.0f) ? slope_x_1 : slope_x_2;
-
-  /* Sample slope_y. */
-  float S;
-
-  if (randv > 0.5f) {
-    S = 1.0f;
-    randv = 2.0f * (randv - 0.5f);
-  }
-  else {
-    S = -1.0f;
-    randv = 2.0f * (0.5f - randv);
-  }
-
-  const float z = (randv * (randv * (randv * 0.27385f - 0.73369f) + 0.46341f)) /
-                  (randv * (randv * (randv * 0.093073f + 0.309420f) - 1.000000f) + 0.597999f);
-  *slope_y = S * z * safe_sqrtf(1.0f + (*slope_x) * (*slope_x));
-}
-
-template<MicrofacetType m_type>
-ccl_device_forceinline float3 microfacet_sample_stretched(KernelGlobals kg,
-                                                          const float3 wi,
-                                                          const float alpha_x,
-                                                          const float alpha_y,
-                                                          const float randu,
-                                                          const float randv,
-                                                          ccl_private float *lambda_i)
+ * Eric Heitz and Eugene d'Eon, EGSR 2014.
+ * https://hal.inria.fr/hal-00996995v2/document */
+
+ccl_device_forceinline float3 microfacet_beckmann_sample_vndf(KernelGlobals kg,
+                                                              const float3 wi,
+                                                              const float alpha_x,
+                                                              const float alpha_y,
+                                                              const float randu,
+                                                              const float randv)
 {
   /* 1. stretch wi */
   float3 wi_ = make_float3(alpha_x * wi.x, alpha_y * wi.y, wi.z);
   wi_ = normalize(wi_);
 
-  /* Compute polar coordinates of wi_. */
-  float costheta_ = 1.0f;
-  float sintheta_ = 0.0f;
-  float cosphi_ = 1.0f;
-  float sinphi_ = 0.0f;
-
-  if (wi_.z < 0.99999f) {
-    costheta_ = wi_.z;
-    sintheta_ = sin_from_cos(costheta_);
-
-    float invlen = 1.0f / sintheta_;
-    cosphi_ = wi_.x * invlen;
-    sinphi_ = wi_.y * invlen;
-  }
-
   /* 2. sample P22_{wi}(x_slope, y_slope, 1, 1) */
   float slope_x, slope_y;
+  float cos_phi_i = 1.0f;
+  float sin_phi_i = 0.0f;
 
-  if (m_type == MicrofacetType::BECKMANN) {
-    microfacet_beckmann_sample_slopes(
-        kg, costheta_, sintheta_, randu, randv, &slope_x, &slope_y, lambda_i);
+  if (wi_.z >= 0.99999f) {
+    /* Special case (normal incidence). */
+    const float r = sqrtf(-logf(randu));
+    const float phi = M_2PI_F * randv;
+    slope_x = r * cosf(phi);
+    slope_y = r * sinf(phi);
   }
   else {
-    microfacet_ggx_sample_slopes(costheta_, sintheta_, randu, randv, &slope_x, &slope_y, lambda_i);
+    /* Precomputations. */
+    const float cos_theta_i = wi_.z;
+    const float sin_theta_i = sin_from_cos(cos_theta_i);
+    const float tan_theta_i = sin_theta_i / cos_theta_i;
+    const float cot_theta_i = 1.0f / tan_theta_i;
+    const float erf_a = fast_erff(cot_theta_i);
+    const float exp_a2 = expf(-cot_theta_i * cot_theta_i);
+    const float SQRT_PI_INV = 0.56418958354f;
+
+    float invlen = 1.0f / sin_theta_i;
+    cos_phi_i = wi_.x * invlen;
+    sin_phi_i = wi_.y * invlen;
+
+    /* Based on paper from Wenzel Jakob
+     * An Improved Visible Normal Sampling Routine for the Beckmann Distribution
+     *
+     * http://www.mitsuba-renderer.org/~wenzel/files/visnormal.pdf
+     *
+     * Reformulation from OpenShadingLanguage which avoids using inverse
+     * trigonometric functions.
+     */
+
+    /* Sample slope X.
+     *
+     * Compute a coarse approximation using the approximation:
+     *   exp(-ierf(x)^2) ~= 1 - x * x
+     *   solve y = 1 + b + K * (1 - b * b)
+     */
+    const float K = tan_theta_i * SQRT_PI_INV;
+    const float y_approx = randu * (1.0f + erf_a + K * (1 - erf_a * erf_a));
+    const float y_exact = randu * (1.0f + erf_a + K * exp_a2);
+    float b = K > 0 ? (0.5f - sqrtf(K * (K - y_approx + 1.0f) + 0.25f)) / K : y_approx - 1.0f;
+
+    float inv_erf = fast_ierff(b);
+    float2 begin = make_float2(-1.0f, -y_exact);
+    float2 end = make_float2(erf_a, 1.0f + erf_a + K * exp_a2 - y_exact);
+    float2 current = make_float2(b, 1.0f + b + K * expf(-sqr(inv_erf)) - y_exact);
+
+    /* Find root in a monotonic interval using newton method, under given precision and maximal
+     * iterations. Falls back to bisection if newton step produces results outside of the valid
+     * interval.*/
+    const float precision = 1e-6f;
+    const int max_iter = 3;
+    int iter = 0;
+    while (fabsf(current.y) > precision && iter++ < max_iter) {
+      if (signf(begin.y) == signf(current.y)) {
+        begin.x = current.x;
+        begin.y = current.y;
+      }
+      else {
+        end.x = current.x;
+      }
+      const float newton_x = current.x - current.y / (1.0f - inv_er

@@ Diff output truncated at 10240 characters. @@