[Bf-blender-cvs] [3535670ff1a] master: Metal: Optimize local shader memory usage.

Jason Fielder noreply at git.blender.org
Wed Dec 21 14:29:16 CET 2022


Commit: 3535670ff1a1dd735475303c885e01a07a5f4b54
Author: Jason Fielder
Date:   Wed Dec 21 14:11:20 2022 +0100
Branches: master
https://developer.blender.org/rB3535670ff1a1dd735475303c885e01a07a5f4b54

Metal: Optimize local shader memory usage.

Global scope arrays can incur suboptimal per-shader-thread memory allocations, resulting in excessive usage of limited local memory resources. These changes ensure that any arrays are limited to the closest scope in which they are required and thus will get correctly optimized by the compiler.

A number of constants have also been replaced with Macro's as these can result in better runtime performance for complex shader code.

Authored by Apple: Michael Parkin-White

Ref T96261

Reviewed By: fclem

Maniphest Tasks: T96261

Differential Revision: https://developer.blender.org/D16825

===================================================================

M	source/blender/draw/engines/eevee/shaders/common_utiltex_lib.glsl
M	source/blender/draw/engines/eevee/shaders/effect_dof_dilate_tiles_frag.glsl
M	source/blender/draw/engines/eevee/shaders/effect_dof_downsample_frag.glsl
M	source/blender/draw/engines/eevee/shaders/effect_dof_flatten_tiles_frag.glsl
M	source/blender/draw/engines/eevee/shaders/effect_dof_gather_frag.glsl
M	source/blender/draw/engines/eevee/shaders/effect_dof_lib.glsl
M	source/blender/draw/engines/eevee/shaders/effect_dof_reduce_frag.glsl
M	source/blender/draw/engines/eevee/shaders/effect_dof_scatter_frag.glsl
M	source/blender/draw/engines/eevee/shaders/effect_dof_scatter_vert.glsl
M	source/blender/draw/engines/eevee/shaders/effect_dof_setup_frag.glsl
M	source/blender/draw/engines/eevee/shaders/effect_reflection_lib.glsl
M	source/blender/draw/engines/eevee/shaders/effect_reflection_resolve_frag.glsl
M	source/blender/draw/engines/workbench/shaders/workbench_volume_frag.glsl

===================================================================

diff --git a/source/blender/draw/engines/eevee/shaders/common_utiltex_lib.glsl b/source/blender/draw/engines/eevee/shaders/common_utiltex_lib.glsl
index 645f2798937..d2ad6eb3711 100644
--- a/source/blender/draw/engines/eevee/shaders/common_utiltex_lib.glsl
+++ b/source/blender/draw/engines/eevee/shaders/common_utiltex_lib.glsl
@@ -23,8 +23,8 @@ uniform sampler2DArray utilTex;
 #define LTC_DISK_LAYER 3 /* UNUSED */
 
 /* Layers 4 to 20 are for BTDF Lut. */
-const float lut_btdf_layer_first = 4.0;
-const float lut_btdf_layer_count = 16.0;
+#define lut_btdf_layer_first 4.0
+#define lut_btdf_layer_count 16.0
 
 /**
  * Reminder: The 4 noise values are based of 3 uncorrelated blue noises:
diff --git a/source/blender/draw/engines/eevee/shaders/effect_dof_dilate_tiles_frag.glsl b/source/blender/draw/engines/eevee/shaders/effect_dof_dilate_tiles_frag.glsl
index 1b75ca3e62a..c8c99737006 100644
--- a/source/blender/draw/engines/eevee/shaders/effect_dof_dilate_tiles_frag.glsl
+++ b/source/blender/draw/engines/eevee/shaders/effect_dof_dilate_tiles_frag.glsl
@@ -6,10 +6,10 @@
 
 #pragma BLENDER_REQUIRE(effect_dof_lib.glsl)
 
-const float tile_to_fullres_factor = float(DOF_TILE_DIVISOR);
+#define tile_to_fullres_factor float(DOF_TILE_DIVISOR)
 
 /* Error introduced by the random offset of the gathering kernel's center. */
-const float bluring_radius_error = 1.0 + 1.0 / (gather_ring_count + 0.5);
+#define bluring_radius_error (1.0 + 1.0 / (gather_ring_count + 0.5))
 
 void main()
 {
diff --git a/source/blender/draw/engines/eevee/shaders/effect_dof_downsample_frag.glsl b/source/blender/draw/engines/eevee/shaders/effect_dof_downsample_frag.glsl
index a5125a87fe7..14801192372 100644
--- a/source/blender/draw/engines/eevee/shaders/effect_dof_downsample_frag.glsl
+++ b/source/blender/draw/engines/eevee/shaders/effect_dof_downsample_frag.glsl
@@ -10,6 +10,7 @@
 
 void main()
 {
+  DEFINE_DOF_QUAD_OFFSETS
   vec2 halfres_texel_size = 1.0 / vec2(textureSize(colorBuffer, 0).xy);
   /* Center uv around the 4 halfres pixels. */
   vec2 quad_center = (floor(gl_FragCoord.xy) * 2.0 + 1.0) * halfres_texel_size;
diff --git a/source/blender/draw/engines/eevee/shaders/effect_dof_flatten_tiles_frag.glsl b/source/blender/draw/engines/eevee/shaders/effect_dof_flatten_tiles_frag.glsl
index 21736dcc2f1..007de142f92 100644
--- a/source/blender/draw/engines/eevee/shaders/effect_dof_flatten_tiles_frag.glsl
+++ b/source/blender/draw/engines/eevee/shaders/effect_dof_flatten_tiles_frag.glsl
@@ -9,7 +9,7 @@
 
 #pragma BLENDER_REQUIRE(effect_dof_lib.glsl)
 
-const int halfres_tile_divisor = DOF_TILE_DIVISOR / 2;
+#define halfres_tile_divisor (DOF_TILE_DIVISOR / 2)
 
 void main()
 {
diff --git a/source/blender/draw/engines/eevee/shaders/effect_dof_gather_frag.glsl b/source/blender/draw/engines/eevee/shaders/effect_dof_gather_frag.glsl
index 7bba9b20224..03837b97cb6 100644
--- a/source/blender/draw/engines/eevee/shaders/effect_dof_gather_frag.glsl
+++ b/source/blender/draw/engines/eevee/shaders/effect_dof_gather_frag.glsl
@@ -18,9 +18,9 @@ vec2 outOcclusion;
 #endif
 
 #ifdef DOF_FOREGROUND_PASS
-const bool is_foreground = true;
+#  define is_foreground true
 #else /* DOF_BACKGROUND_PASS */
-const bool is_foreground = false;
+#  define is_foreground false
 #endif
 
 const float unit_ring_radius = 1.0 / float(gather_ring_count);
diff --git a/source/blender/draw/engines/eevee/shaders/effect_dof_lib.glsl b/source/blender/draw/engines/eevee/shaders/effect_dof_lib.glsl
index a0f885d69ae..dc382e68d22 100644
--- a/source/blender/draw/engines/eevee/shaders/effect_dof_lib.glsl
+++ b/source/blender/draw/engines/eevee/shaders/effect_dof_lib.glsl
@@ -12,38 +12,41 @@
 // #define DOF_DEBUG_GATHER_PERF
 // #define DOF_DEBUG_SCATTER_PERF
 
-const bool no_smooth_intersection = false;
-const bool no_gather_occlusion = false;
-const bool no_gather_mipmaps = false;
-const bool no_gather_random = false;
-const bool no_gather_filtering = false;
-const bool no_scatter_occlusion = false;
-const bool no_scatter_pass = false;
-const bool no_foreground_pass = false;
-const bool no_background_pass = false;
-const bool no_slight_focus_pass = false;
-const bool no_focus_pass = false;
-const bool no_holefill_pass = false;
+#define no_smooth_intersection false
+#define no_gather_occlusion false
+#define no_gather_mipmaps false
+#define no_gather_random false
+#define no_gather_filtering false
+#define no_scatter_occlusion false
+#define no_scatter_pass false
+#define no_foreground_pass false
+#define no_background_pass false
+#define no_slight_focus_pass false
+#define no_focus_pass false
+#define no_holefill_pass false
 
 /* -------------- Quality Defines ------------- */
 
 #ifdef DOF_HOLEFILL_PASS
 /* No need for very high density for holefill. */
-const int gather_ring_count = 3;
-const int gather_ring_density = 3;
-const int gather_max_density_change = 0;
-const int gather_density_change_ring = 1;
+#  define gather_ring_count 3
+#  define gather_ring_density 3
+#  define gather_max_density_change 0
+#  define gather_density_change_ring 1
 #else
-const int gather_ring_count = DOF_GATHER_RING_COUNT;
-const int gather_ring_density = 3;
-const int gather_max_density_change = 50; /* Dictates the maximum good quality blur. */
-const int gather_density_change_ring = 1;
+#  define gather_ring_count DOF_GATHER_RING_COUNT
+#  define gather_ring_density 3
+#  define gather_max_density_change 50 /* Dictates the maximum good quality blur. */
+#  define gather_density_change_ring 1
 #endif
 
 /* -------------- Utils ------------- */
-
-const vec2 quad_offsets[4] = vec2[4](
-    vec2(-0.5, 0.5), vec2(0.5, 0.5), vec2(0.5, -0.5), vec2(-0.5, -0.5));
+/* For performance on macOS, constants declared within function scope utilize constant uniform
+   register space rather than per-thread, reducing spill and incrasing
+   thread execution width - and thus performance */
+#define DEFINE_DOF_QUAD_OFFSETS \
+  const vec2 quad_offsets[4] = vec2[4]( \
+      vec2(-0.5, 0.5), vec2(0.5, 0.5), vec2(0.5, -0.5), vec2(-0.5, -0.5));
 
 /* Divide by sensor size to get the normalized size. */
 #define calculate_coc_persp(zdepth) (cocMul / zdepth - cocBias)
@@ -128,11 +131,11 @@ float dof_load_gather_coc(sampler2D gather_input_coc_buffer, vec2 uv, float lod)
 }
 
 /* Distribute weights between near/slightfocus/far fields (slide 117). */
-const float layer_threshold = 4.0;
+#define layer_threshold 4.0
 /* Make sure it overlaps. */
-const float layer_offset_fg = 0.5 + 1.0;
+#define layer_offset_fg (0.5 + 1.0)
 /* Extra offset for convolution layers to avoid light leaking from background. */
-const float layer_offset = 0.5 + 0.5;
+#define layer_offset (0.5 + 0.5)
 
 #define DOF_MAX_SLIGHT_FOCUS_RADIUS 16
 
diff --git a/source/blender/draw/engines/eevee/shaders/effect_dof_reduce_frag.glsl b/source/blender/draw/engines/eevee/shaders/effect_dof_reduce_frag.glsl
index 9fd2abfc148..eed52815a6e 100644
--- a/source/blender/draw/engines/eevee/shaders/effect_dof_reduce_frag.glsl
+++ b/source/blender/draw/engines/eevee/shaders/effect_dof_reduce_frag.glsl
@@ -11,6 +11,7 @@
 /* NOTE: Do not compare alpha as it is not scattered by the scatter pass. */
 float dof_scatter_neighborhood_rejection(vec3 color)
 {
+  DEFINE_DOF_QUAD_OFFSETS;
   color = min(vec3(scatterColorNeighborMax), color);
 
   float validity = 0.0;
@@ -132,6 +133,7 @@ void main()
 /* Downsample pass done for each mip starting from mip1. */
 void main()
 {
+  DEFINE_DOF_QUAD_OFFSETS
   vec2 input_texel_size = 1.0 / vec2(textureSize(colorBuffer, 0).xy);
   /* Center uv around the 4 pixels of the previous mip. */
   vec2 quad_center = (floor(gl_FragCoord.xy) * 2.0 + 1.0) * input_texel_size;
diff --git a/source/blender/draw/engines/eevee/shaders/effect_dof_scatter_frag.glsl b/source/blender/draw/engines/eevee/shaders/effect_dof_scatter_frag.glsl
index f73c85cf690..5038dff87d8 100644
--- a/source/blender/draw/engines/eevee/shaders/effect_dof_scatter_frag.glsl
+++ b/source/blender/draw/engines/eevee/shaders/effect_dof_scatter_frag.glsl
@@ -29,6 +29,7 @@ float bokeh_shape(vec2 center)
 
 void main(void)
 {
+  DEFINE_DOF_QUAD_OFFSETS
   vec4 shapes;
   for (int i = 0; i < 4; i++) {
     shapes[i] = bokeh_shape(spritepos + quad_offsets[i]);
diff --git a/source/blender/draw/engines/eevee/shaders/effect_dof_scatter_vert.glsl b/source/blender/draw/engines/eevee/shaders/effect_dof_scatter_vert.glsl
index 6b1eac50645..6b38fbeb705 100644
--- a/source/blender/draw/engines/eevee/shaders/effect_dof_scatter_vert.glsl
+++ b/source/blender/draw/engines/eevee/shaders/effect_dof_scatter_vert.glsl
@@ -37,6 +37,7 @@ void vertex_discard()
 
 void main()
 {
+  DEFINE_DOF_QUAD_OFFSETS
   ivec2 tex_size = textureSize(cocBuffer, 0);
 
   int t_id = gl_VertexID / 3; /* Triangle Id */
diff --git a/source/blender/draw/engines/eevee/shaders/effect_dof_setup_frag.glsl b/source/blender/draw/engines/eevee/shaders/effect_dof_setup_frag.glsl
index 46c2beaa72a..3b996280e78 100644
--- a/source/blender/draw/engines/eevee/shaders/effect_dof_setup_frag.glsl
+++ b/source/blender/draw/engines/eevee/shaders/effect_dof_setup_frag.glsl
@@ -10,6 +10,7 @@
 
 void main()
 {
+  DEFINE_DOF_QUAD_OFFSETS
   vec2 fullres_texel_size = 1.0 / vec2(textureSize(colorBuffer, 0).xy);
   /* Center uv around the 4 fullres pixels. */
   vec2 quad_center = (floor(gl_FragCoord.xy) * 2.0 + 1.0) * fullres_texel_size;
diff --git a/source/blender/draw/engines/eevee/shaders/effect_reflection_lib.glsl b/source/blender/draw/engines/eevee/shaders/effect_reflection_lib.glsl
index 3b99acb9c31..7097d56fe54 100644
--- a/source/blender/draw/engines/eevee/shaders/effect_reflection_lib.glsl
+++ b/source/blender/draw/engines/eevee/shaders/effect_reflection_lib.glsl
@@ -47,53 +47,4 @@ HitData decode_hit_data(vec4 hit_data, float hit_depth)
 
 /* Blue noise categorized into 4 sets of samples.
  * See "Stochastic all the things" presentation slide 32-37. */
-const int resolve_samples_count = 9;
-const vec2 resolve_sample_offsets[36] = vec2[36](
-    /* Set 1. */
-    /* First Ring (2x2). */
-    vec2(0, 0),
-    /* Second Ring (6x6). */
-    vec2(-1, 3),
-    vec2(1, 3),
-    vec2(-1, 1),
-    vec2(3, 1),
-    vec2(-2, 0),
-    vec2(3, 0),
-    vec2(2, -1),
-    vec2(1, -2),
-    /* Set 2. 

@@ Diff output truncated at 10240 characters. @@



More information about the Bf-blender-cvs mailing list