[Bf-blender-cvs] [b0dc3aff2c7] master: Metal: GLSL shader compatibility 3rd pass

Thu Apr 14 11:54:04 CEST 2022

Commit: b0dc3aff2c73f2a1b65406dcb7fe73c95b9485ed
Author: Jason Fielder
Date:   Thu Apr 14 11:47:52 2022 +0200
Branches: master
https://developer.blender.org/rBb0dc3aff2c73f2a1b65406dcb7fe73c95b9485ed

Metal: GLSL shader compatibility 3rd pass

Undefined behaviour for divergent control-flow fixes, replacement for partial vector references, and resolution of a number of calculation precision issues occuring on macOS.

Authored by Apple: Michael Parkin-White

Ref: T96261

Reviewed By: fclem

Differential Revision: https://developer.blender.org/D14437

===================================================================

M	source/blender/draw/engines/eevee/shaders/lightprobe_filter_visibility_frag.glsl
M	source/blender/draw/engines/eevee/shaders/volumetric_frag.glsl
M	source/blender/draw/engines/eevee/shaders/volumetric_integration_frag.glsl
M	source/blender/draw/engines/overlay/shaders/outline_detect_frag.glsl
M	source/blender/draw/engines/workbench/shaders/workbench_prepass_hair_vert.glsl
M	source/blender/draw/engines/workbench/shaders/workbench_volume_frag.glsl
M	source/blender/draw/engines/workbench/shaders/workbench_world_light_lib.glsl
M	source/blender/draw/intern/shaders/common_hair_lib.glsl
M	source/blender/draw/intern/shaders/common_hair_refine_vert.glsl
M	source/blender/draw/intern/shaders/common_smaa_lib.glsl
M	source/blender/gpu/shaders/material/gpu_shader_material_hash.glsl

===================================================================

diff --git a/source/blender/draw/engines/eevee/shaders/lightprobe_filter_visibility_frag.glsl b/source/blender/draw/engines/eevee/shaders/lightprobe_filter_visibility_frag.glsl
index d25ef23a706..681e69ae384 100644
--- a/source/blender/draw/engines/eevee/shaders/lightprobe_filter_visibility_frag.glsl
+++ b/source/blender/draw/engines/eevee/shaders/lightprobe_filter_visibility_frag.glsl
@@ -15,7 +15,6 @@ uniform float visibilityRange;
 uniform float visibilityBlur;
 
 uniform float sampleCount;
-uniform float;
 
 out vec4 FragColor;
 
diff --git a/source/blender/draw/engines/eevee/shaders/volumetric_frag.glsl b/source/blender/draw/engines/eevee/shaders/volumetric_frag.glsl
index 9f1afc4767c..4ff42892f7d 100644
--- a/source/blender/draw/engines/eevee/shaders/volumetric_frag.glsl
+++ b/source/blender/draw/engines/eevee/shaders/volumetric_frag.glsl
@@ -44,8 +44,13 @@ void main()
   volumeObjectLocalCoord = (volumeObjectToTexture * vec4(volumeObjectLocalCoord, 1.0)).xyz;
 
   if (any(lessThan(volumeObjectLocalCoord, vec3(0.0))) ||
-      any(greaterThan(volumeObjectLocalCoord, vec3(1.0))))
+      any(greaterThan(volumeObjectLocalCoord, vec3(1.0)))) {
+    /* Note: Discard is not an explicit return in Metal prior to versions 2.3.
+     * adding return after discard ensures consistent behaviour and avoids GPU
+     * side-effects where control flow continues with undefined values. */
     discard;
+    return;
+  }
 #endif
 
 #ifdef CLEAR
diff --git a/source/blender/draw/engines/eevee/shaders/volumetric_integration_frag.glsl b/source/blender/draw/engines/eevee/shaders/volumetric_integration_frag.glsl
index 11f57c0a82e..527bbd18896 100644
--- a/source/blender/draw/engines/eevee/shaders/volumetric_integration_frag.glsl
+++ b/source/blender/draw/engines/eevee/shaders/volumetric_integration_frag.glsl
@@ -70,7 +70,11 @@ void main()
     vec3 Tr = exp(-s_extinction * s_len);
 
     /* integrate along the current step segment */
-    Lscat = (Lscat - Lscat * Tr) / max(vec3(1e-8), s_extinction);
+    /* Note: Original calculation carries precision issues when compiling for AMD GPUs
+     * and running Metal. This version of the equation retains precision well for all
+     * macOS HW configurations. */
+    Lscat = (Lscat * (1.0f - Tr)) / max(vec3(1e-8), s_extinction);
+
     /* accumulate and also take into account the transmittance from previous steps */
     finalScattering += finalTransmittance * Lscat;
 
diff --git a/source/blender/draw/engines/overlay/shaders/outline_detect_frag.glsl b/source/blender/draw/engines/overlay/shaders/outline_detect_frag.glsl
index 19d54a57479..ba0a4c0da81 100644
--- a/source/blender/draw/engines/overlay/shaders/outline_detect_frag.glsl
+++ b/source/blender/draw/engines/overlay/shaders/outline_detect_frag.glsl
@@ -358,6 +358,12 @@ void main()
       line_end = vec2(0.0, 0.5);
       break;
     default:
+      /* Ensure values are assigned to, avoids undefined behaviour for
+       * divergent control-flow. This can occur if discard is called
+       * as discard is not treated as a return in Metal 2.2. So
+       * side-effects can still cause problems. */
+      line_start = vec2(0.0);
+      line_end = vec2(0.0);
       break;
   }
 
diff --git a/source/blender/draw/engines/workbench/shaders/workbench_prepass_hair_vert.glsl b/source/blender/draw/engines/workbench/shaders/workbench_prepass_hair_vert.glsl
index 71cf08b7e8c..cfc94ef7c9a 100644
--- a/source/blender/draw/engines/workbench/shaders/workbench_prepass_hair_vert.glsl
+++ b/source/blender/draw/engines/workbench/shaders/workbench_prepass_hair_vert.glsl
@@ -8,8 +8,10 @@
 /* From http://libnoise.sourceforge.net/noisegen/index.html */
 float integer_noise(int n)
 {
-  n = (n >> 13) ^ n;
-  int nn = (n * (n * n * 60493 + 19990303) + 1376312589) & 0x7fffffff;
+  /* Integer bit-shifts cause precision issues due to overflow
+   * in a number of workbench tests. Use uint instead. */
+  uint nn = (uint(n) >> 13u) ^ uint(n);
+  nn = (nn * (nn * nn * 60493u + 19990303u) + 1376312589u) & 0x7fffffffu;
   return (float(nn) / 1073741824.0);
 }
 
diff --git a/source/blender/draw/engines/workbench/shaders/workbench_volume_frag.glsl b/source/blender/draw/engines/workbench/shaders/workbench_volume_frag.glsl
index 4ff281ccd29..36059b6076f 100644
--- a/source/blender/draw/engines/workbench/shaders/workbench_volume_frag.glsl
+++ b/source/blender/draw/engines/workbench/shaders/workbench_volume_frag.glsl
@@ -218,7 +218,15 @@ void main()
   /* Manual depth test. TODO: remove. */
   float depth = texelFetch(depthBuffer, ivec2(gl_FragCoord.xy), 0).r;
   if (gl_FragCoord.z >= depth) {
+    /* Note: In the Metal API, prior to Metal 2.3, Discard is not an explicit return and can
+     * produce undefined behaviour. This is especially prominent with derivatives if control-flow
+     * divergence is present.
+     *
+     * Adding a return call eliminates undefined behaviour and a later out-of-bounds read causing
+     * a crash on AMD platforms.
+     * This behaviour can also affect OpenGL on certain devices. */
     discard;
+    return;
   }
 
   vec3 Lscat;
@@ -268,6 +276,7 @@ void main()
     /* Start is further away than the end.
      * That means no volume is intersected. */
     discard;
+    return;
   }
 
   fragColor = volume_integration(ls_ray_ori,
diff --git a/source/blender/draw/engines/workbench/shaders/workbench_world_light_lib.glsl b/source/blender/draw/engines/workbench/shaders/workbench_world_light_lib.glsl
index 531ed461057..20053b8917c 100644
--- a/source/blender/draw/engines/workbench/shaders/workbench_world_light_lib.glsl
+++ b/source/blender/draw/engines/workbench/shaders/workbench_world_light_lib.glsl
@@ -64,22 +64,35 @@ vec3 get_world_lighting(vec3 base_color, float roughness, float metallic, vec3 N
   if (world_data.use_specular) {
     /* Prepare Specular computation. Eval 4 lights at once. */
     vec3 R = -reflect(I, N);
+
+#ifdef GPU_METAL
+    /* Split vectors into arrays of floats. Partial vector references are unsupported by MSL. */
+    float spec_angle[4], spec_NL[4], wrap_NL[4];
+#  define AS_VEC4(a) vec4(a[0], a[1], a[2], a[3])
+#else
     vec4 spec_angle, spec_NL, wrap_NL;
-    prep_specular(world_data.lights[0].direction.xyz, I, N, R, spec_NL.x, wrap_NL.x, spec_angle.x);
-    prep_specular(world_data.lights[1].direction.xyz, I, N, R, spec_NL.y, wrap_NL.y, spec_angle.y);
-    prep_specular(world_data.lights[2].direction.xyz, I, N, R, spec_NL.z, wrap_NL.z, spec_angle.z);
-    prep_specular(world_data.lights[3].direction.xyz, I, N, R, spec_NL.w, wrap_NL.w, spec_angle.w);
+#  define AS_VEC4(a) a
+#endif
+    prep_specular(
+        world_data.lights[0].direction.xyz, I, N, R, spec_NL[0], wrap_NL[0], spec_angle[0]);
+    prep_specular(
+        world_data.lights[1].direction.xyz, I, N, R, spec_NL[1], wrap_NL[1], spec_angle[1]);
+    prep_specular(
+        world_data.lights[2].direction.xyz, I, N, R, spec_NL[2], wrap_NL[2], spec_angle[2]);
+    prep_specular(
+        world_data.lights[3].direction.xyz, I, N, R, spec_NL[3], wrap_NL[3], spec_angle[3]);
 
     vec4 gloss = vec4(1.0 - roughness);
     /* Reduce gloss for smooth light. (simulate bigger light) */
     gloss *= 1.0 - wrap;
     vec4 shininess = exp2(10.0 * gloss + 1.0);
 
-    vec4 spec_light = blinn_specular(shininess, spec_angle, spec_NL);
+    vec4 spec_light = blinn_specular(shininess, AS_VEC4(spec_angle), AS_VEC4(spec_NL));
 
     /* Simulate Env. light. */
     vec4 w = mix(wrap, vec4(1.0), roughness);
-    vec4 spec_env = wrapped_lighting(wrap_NL, w);
+    vec4 spec_env = wrapped_lighting(AS_VEC4(wrap_NL), w);
+#undef AS_VEC4
 
     spec_light = mix(spec_light, spec_env, wrap * wrap);
 
diff --git a/source/blender/draw/intern/shaders/common_hair_lib.glsl b/source/blender/draw/intern/shaders/common_hair_lib.glsl
index 6a8f1132e1b..ff52b483d77 100644
--- a/source/blender/draw/intern/shaders/common_hair_lib.glsl
+++ b/source/blender/draw/intern/shaders/common_hair_lib.glsl
@@ -211,6 +211,11 @@ void hair_get_pos_tan_binor_time(bool is_persp,
 
     wpos += wbinor * thick_time * scale;
   }
+  else {
+    /* Note: Ensures 'hairThickTime' is initialised -
+     * avoids undefined behaviour on certain macOS configurations. */
+    thick_time = 0.0;
+  }
 }
 
 float hair_get_customdata_float(const samplerBuffer cd_buf)
diff --git a/source/blender/draw/intern/shaders/common_hair_refine_vert.glsl b/source/blender/draw/intern/shaders/common_hair_refine_vert.glsl
index 371d43827b9..2eccae5bceb 100644
--- a/source/blender/draw/intern/shaders/common_hair_refine_vert.glsl
+++ b/source/blender/draw/intern/shaders/common_hair_refine_vert.glsl
@@ -18,7 +18,7 @@ void main(void)
   vec4 weights = hair_get_weights_cardinal(interp_time);
   finalColor = hair_interp_data(data0, data1, data2, data3, weights);
 
-#ifdef TF_WORKAROUND
+#if defined(TF_WORKAROUND)
   int id = gl_VertexID - idOffset;
   gl_Position.x = ((float(id % targetWidth) + 0.5) / float(targetWidth)) * 2.0 - 1.0;
   gl_Position.y = ((float(id / targetWidth) + 0.5) / float(targetHeight)) * 2.0 - 1.0;
@@ -26,5 +26,10 @@ void main(void)
   gl_Position.w = 1.0;
 
   gl_PointSize = 1.0;
+#else
+#  ifdef GPU_METAL
+  /* Metal still expects an output position for TF shaders. */
+  gl_Position = vec4(0.0, 0.0, 0.0, 1.0);
+#  endif
 #endif
 }
diff --git a/source/blender/draw/intern/shaders/common_smaa_lib.glsl b/source/blender/draw/intern/shaders/common_smaa_lib.glsl
index 73f65fb0799..dbc4c998b34 100644
--- a/source/blender/draw/intern/shaders/common_smaa_lib.glsl
+++ b/source/blender/draw/intern/shaders/common_smaa_lib.glsl
@@ -569,7 +569,7 @@ SamplerState PointSampler
 #    define SMAAGather(tex, coord) tex.Gather(LinearSampler, coord, 0)
 #  endif
 #endif
-#if defined(SMAA_GLSL_3) || defined(SMAA_GLSL_4)
+#if defined(SMAA_GLSL_3) || defined(SMAA_GLSL_4) || defined(GPU_METAL)
 #  define SMAATexture2D(tex) sampler2D tex
 #  define SMAATexturePass2D(tex) tex
 #  define SMAASampleLevelZero(tex, coord) textureLod(tex, coord, 0.0)
@@ -641,14 +641,14 @@ float2 SMAACalculatePredicatedThreshold(float2 texcoord,
  */
 void SMAAMovc(bool2 cond, inout float2 variable, float2 value)
 {
-  SMAA_FLATTEN if (cond.x) variable.x = value.x;
-  SMAA_FLATTEN if (cond.y) variab

@@ Diff output truncated at 10240 characters. @@