[Bf-blender-cvs] [596ee79a9f5] master: Metal: Optimize shader local memory usage.

Jason Fielder noreply at git.blender.org
Mon Jan 30 13:57:55 CET 2023


Commit: 596ee79a9f590c1d66aa1cb5c19f994ddb0f0272
Author: Jason Fielder
Date:   Mon Jan 30 13:44:46 2023 +0100
Branches: master
https://developer.blender.org/rB596ee79a9f590c1d66aa1cb5c19f994ddb0f0272

Metal: Optimize shader local memory usage.

Due to shader global scope emulation via class interface, global constant arrays in shaders are allocated in per-thread shader local memory. To reduce memory pressure, placing these constant arrays inside function scope will ensure they only reside within device constant memory. This results in a tangible 1.5-2x performance uplift for the specific shaders affected.

Authored by Apple: Michael Parkin-White

Ref T96261

Reviewed By: fclem

Maniphest Tasks: T96261

Differential Revision: https://developer.blender.org/D17089

===================================================================

M	source/blender/draw/engines/eevee/shaders/lightprobe_filter_diffuse_frag.glsl
M	source/blender/draw/engines/overlay/shaders/overlay_background_frag.glsl
M	source/blender/draw/engines/overlay/shaders/overlay_edit_uv_verts_vert.glsl
M	source/blender/draw/engines/overlay/shaders/overlay_volume_gridlines_vert.glsl
M	source/blender/draw/engines/overlay/shaders/overlay_volume_velocity_vert.glsl
M	source/blender/gpu/shaders/gpu_shader_2D_widget_shadow_vert.glsl
M	source/blender/gpu/shaders/gpu_shader_text_frag.glsl

===================================================================

diff --git a/source/blender/draw/engines/eevee/shaders/lightprobe_filter_diffuse_frag.glsl b/source/blender/draw/engines/eevee/shaders/lightprobe_filter_diffuse_frag.glsl
index 39c19ad6f74..ce71a73ea75 100644
--- a/source/blender/draw/engines/eevee/shaders/lightprobe_filter_diffuse_frag.glsl
+++ b/source/blender/draw/engines/eevee/shaders/lightprobe_filter_diffuse_frag.glsl
@@ -6,16 +6,17 @@
 
 #define M_4PI 12.5663706143591729
 
-const mat3 CUBE_ROTATIONS[6] = mat3[](
-    mat3(vec3(0.0, 0.0, -1.0), vec3(0.0, -1.0, 0.0), vec3(-1.0, 0.0, 0.0)),
-    mat3(vec3(0.0, 0.0, 1.0), vec3(0.0, -1.0, 0.0), vec3(1.0, 0.0, 0.0)),
-    mat3(vec3(1.0, 0.0, 0.0), vec3(0.0, 0.0, 1.0), vec3(0.0, -1.0, 0.0)),
-    mat3(vec3(1.0, 0.0, 0.0), vec3(0.0, 0.0, -1.0), vec3(0.0, 1.0, 0.0)),
-    mat3(vec3(1.0, 0.0, 0.0), vec3(0.0, -1.0, 0.0), vec3(0.0, 0.0, -1.0)),
-    mat3(vec3(-1.0, 0.0, 0.0), vec3(0.0, -1.0, 0.0), vec3(0.0, 0.0, 1.0)));
-
 vec3 get_cubemap_vector(vec2 co, int face)
 {
+  /* NOTE(Metal): Declaring constant array in function scope to avoid increasing local shader
+   * memory pressure. */
+  const mat3 CUBE_ROTATIONS[6] = mat3[](
+      mat3(vec3(0.0, 0.0, -1.0), vec3(0.0, -1.0, 0.0), vec3(-1.0, 0.0, 0.0)),
+      mat3(vec3(0.0, 0.0, 1.0), vec3(0.0, -1.0, 0.0), vec3(1.0, 0.0, 0.0)),
+      mat3(vec3(1.0, 0.0, 0.0), vec3(0.0, 0.0, 1.0), vec3(0.0, -1.0, 0.0)),
+      mat3(vec3(1.0, 0.0, 0.0), vec3(0.0, 0.0, -1.0), vec3(0.0, 1.0, 0.0)),
+      mat3(vec3(1.0, 0.0, 0.0), vec3(0.0, -1.0, 0.0), vec3(0.0, 0.0, -1.0)),
+      mat3(vec3(-1.0, 0.0, 0.0), vec3(0.0, -1.0, 0.0), vec3(0.0, 0.0, 1.0)));
   return normalize(CUBE_ROTATIONS[face] * vec3(co * 2.0 - 1.0, 1.0));
 }
 
diff --git a/source/blender/draw/engines/overlay/shaders/overlay_background_frag.glsl b/source/blender/draw/engines/overlay/shaders/overlay_background_frag.glsl
index 960986bebd5..e104163c11c 100644
--- a/source/blender/draw/engines/overlay/shaders/overlay_background_frag.glsl
+++ b/source/blender/draw/engines/overlay/shaders/overlay_background_frag.glsl
@@ -4,13 +4,16 @@
 
 /* 4x4 bayer matrix prepared for 8bit UNORM precision error. */
 #define P(x) (((x + 0.5) * (1.0 / 16.0) - 0.5) * (1.0 / 255.0))
-const vec4 dither_mat4x4[4] = vec4[4](vec4(P(0.0), P(8.0), P(2.0), P(10.0)),
-                                      vec4(P(12.0), P(4.0), P(14.0), P(6.0)),
-                                      vec4(P(3.0), P(11.0), P(1.0), P(9.0)),
-                                      vec4(P(15.0), P(7.0), P(13.0), P(5.0)));
 
 float dither(void)
 {
+  /* NOTE(Metal): Declaring constant array in function scope to avoid increasing local shader
+   * memory pressure. */
+  const vec4 dither_mat4x4[4] = vec4[4](vec4(P(0.0), P(8.0), P(2.0), P(10.0)),
+                                        vec4(P(12.0), P(4.0), P(14.0), P(6.0)),
+                                        vec4(P(3.0), P(11.0), P(1.0), P(9.0)),
+                                        vec4(P(15.0), P(7.0), P(13.0), P(5.0)));
+
   ivec2 co = ivec2(gl_FragCoord.xy) % 4;
   return dither_mat4x4[co.x][co.y];
 }
diff --git a/source/blender/draw/engines/overlay/shaders/overlay_edit_uv_verts_vert.glsl b/source/blender/draw/engines/overlay/shaders/overlay_edit_uv_verts_vert.glsl
index 2a59a623995..99d6c4fcad8 100644
--- a/source/blender/draw/engines/overlay/shaders/overlay_edit_uv_verts_vert.glsl
+++ b/source/blender/draw/engines/overlay/shaders/overlay_edit_uv_verts_vert.glsl
@@ -1,10 +1,10 @@
 #pragma BLENDER_REQUIRE(common_view_lib.glsl)
 
-/* TODO: Theme? */
-const vec4 pinned_col = vec4(1.0, 0.0, 0.0, 1.0);
-
 void main()
 {
+  /* TODO: Theme? */
+  const vec4 pinned_col = vec4(1.0, 0.0, 0.0, 1.0);
+
   bool is_selected = (flag & (VERT_UV_SELECT | FACE_UV_SELECT)) != 0u;
   bool is_pinned = (flag & VERT_UV_PINNED) != 0u;
   vec4 deselect_col = (is_pinned) ? pinned_col : vec4(color.rgb, 1.0);
diff --git a/source/blender/draw/engines/overlay/shaders/overlay_volume_gridlines_vert.glsl b/source/blender/draw/engines/overlay/shaders/overlay_volume_gridlines_vert.glsl
index 11a04dddd2a..d4a3059e08a 100644
--- a/source/blender/draw/engines/overlay/shaders/overlay_volume_gridlines_vert.glsl
+++ b/source/blender/draw/engines/overlay/shaders/overlay_volume_gridlines_vert.glsl
@@ -1,15 +1,6 @@
 
 #pragma BLENDER_REQUIRE(common_view_lib.glsl)
 
-/* Corners for cell outlines. 0.45 is arbitrary. Any value below 0.5 can be used to avoid
- * overlapping of the outlines. */
-const vec3 corners[4] = vec3[4](vec3(-0.45, 0.45, 0.0),
-                                vec3(0.45, 0.45, 0.0),
-                                vec3(0.45, -0.45, 0.0),
-                                vec3(-0.45, -0.45, 0.0));
-
-const int indices[8] = int[8](0, 1, 1, 2, 2, 3, 3, 0);
-
 vec4 flag_to_color(uint flag)
 {
   /* Color mapping for flags */
@@ -88,6 +79,16 @@ void main()
     }
   }
 #endif
+  /* NOTE(Metal): Declaring constant arrays in function scope to avoid increasing local shader
+   * memory pressure. */
+  const int indices[8] = int[8](0, 1, 1, 2, 2, 3, 3, 0);
+
+  /* Corners for cell outlines. 0.45 is arbitrary. Any value below 0.5 can be used to avoid
+   * overlapping of the outlines. */
+  const vec3 corners[4] = vec3[4](vec3(-0.45, 0.45, 0.0),
+                                  vec3(0.45, 0.45, 0.0),
+                                  vec3(0.45, -0.45, 0.0),
+                                  vec3(-0.45, -0.45, 0.0));
 
   vec3 pos = domainOriginOffset + cellSize * (vec3(cell_co + adaptiveCellOffset) + cell_offset);
   vec3 rotated_pos = rot_mat * corners[indices[gl_VertexID % 8]];
diff --git a/source/blender/draw/engines/overlay/shaders/overlay_volume_velocity_vert.glsl b/source/blender/draw/engines/overlay/shaders/overlay_volume_velocity_vert.glsl
index a33d27676c3..6cb4dfc903a 100644
--- a/source/blender/draw/engines/overlay/shaders/overlay_volume_velocity_vert.glsl
+++ b/source/blender/draw/engines/overlay/shaders/overlay_volume_velocity_vert.glsl
@@ -1,13 +1,6 @@
 
 #pragma BLENDER_REQUIRE(common_view_lib.glsl)
 
-const vec3 corners[4] = vec3[4](vec3(0.0, 0.2, -0.5),
-                                vec3(-0.2 * 0.866, -0.2 * 0.5, -0.5),
-                                vec3(0.2 * 0.866, -0.2 * 0.5, -0.5),
-                                vec3(0.0, 0.0, 0.5));
-
-const int indices[12] = int[12](0, 1, 1, 2, 2, 0, 0, 3, 1, 3, 2, 3);
-
 /* Straight Port from BKE_defvert_weight_to_rgb()
  * TODO: port this to a color ramp. */
 vec3 weight_to_color(float weight)
@@ -177,6 +170,15 @@ void main()
   mat3 rot_mat = rotation_from_vector(vector);
 
 #  ifdef USE_NEEDLE
+  /* NOTE(Metal): Declaring constant arrays in function scope to avoid increasing local shader
+   * memory pressure. */
+  const vec3 corners[4] = vec3[4](vec3(0.0, 0.2, -0.5),
+                                  vec3(-0.2 * 0.866, -0.2 * 0.5, -0.5),
+                                  vec3(0.2 * 0.866, -0.2 * 0.5, -0.5),
+                                  vec3(0.0, 0.0, 0.5));
+
+  const int indices[12] = int[12](0, 1, 1, 2, 2, 0, 0, 3, 1, 3, 2, 3);
+
   vec3 rotated_pos = rot_mat * corners[indices[gl_VertexID % 12]];
   pos += rotated_pos * vector_length * displaySize * cellSize;
 #  else
diff --git a/source/blender/gpu/shaders/gpu_shader_2D_widget_shadow_vert.glsl b/source/blender/gpu/shaders/gpu_shader_2D_widget_shadow_vert.glsl
index de1da01ff98..7b64f6b1bfe 100644
--- a/source/blender/gpu/shaders/gpu_shader_2D_widget_shadow_vert.glsl
+++ b/source/blender/gpu/shaders/gpu_shader_2D_widget_shadow_vert.glsl
@@ -12,42 +12,6 @@
 /* 4bits for corner id */
 #define CORNER_VEC_OFS 2u
 #define CORNER_VEC_RANGE BIT_RANGE(4)
-const vec2 cornervec[36] = vec2[36](vec2(0.0, 1.0),
-                                    vec2(0.02, 0.805),
-                                    vec2(0.067, 0.617),
-                                    vec2(0.169, 0.45),
-                                    vec2(0.293, 0.293),
-                                    vec2(0.45, 0.169),
-                                    vec2(0.617, 0.076),
-                                    vec2(0.805, 0.02),
-                                    vec2(1.0, 0.0),
-                                    vec2(-1.0, 0.0),
-                                    vec2(-0.805, 0.02),
-                                    vec2(-0.617, 0.067),
-                                    vec2(-0.45, 0.169),
-                                    vec2(-0.293, 0.293),
-                                    vec2(-0.169, 0.45),
-                                    vec2(-0.076, 0.617),
-                                    vec2(-0.02, 0.805),
-                                    vec2(0.0, 1.0),
-                                    vec2(0.0, -1.0),
-                                    vec2(-0.02, -0.805),
-                                    vec2(-0.067, -0.617),
-                                    vec2(-0.169, -0.45),
-                                    vec2(-0.293, -0.293),
-                                    vec2(-0.45, -0.169),
-                                    vec2(-0.617, -0.076),
-                                    vec2(-0.805, -0.02),
-                                    vec2(-1.0, 0.0),
-                                    vec2(1.0, 0.0),
-                                    vec2(0.805, -0.02),
-                                    vec2(0.617, -0.067),
-                                    vec2(0.45, -0.169),
-                                    vec2(0.293, -0.293),
-                                    vec2(0.169, -0.45),
-                                    vec2(0.076, -0.617),
-                                    vec2(0.02, -0.805),
-                                    vec2(0.0, -1.0));
 
 #define INNER_FLAG uint(1 << 10) /* is inner vert */
 
@@ -60,6 +24,45 @@ const vec2 cornervec[36] = vec2[36](vec2(0.0, 1.0),
 
 void main()
 {
+  /* NOTE(Metal): Declaring constant array in function scope to avoid increasing local shader
+   * memory pressure.*/
+  const vec2 cornervec[36] = vec2[36](vec2(0.0, 1.0),
+                                      vec2(0.02, 0.805),
+                                      vec2(0.067, 0.617),
+                                      vec2(0.169, 0.45),
+                                      vec2(0.293, 0.293),
+                                      vec2(0.45, 0.169

@@ Diff output truncated at 10240 characters. @@



More information about the Bf-blender-cvs mailing list