[Bf-blender-cvs] [596ee79a9f5] master: Metal: Optimize shader local memory usage.
Jason Fielder
noreply at git.blender.org
Mon Jan 30 13:57:55 CET 2023
Commit: 596ee79a9f590c1d66aa1cb5c19f994ddb0f0272
Author: Jason Fielder
Date: Mon Jan 30 13:44:46 2023 +0100
Branches: master
https://developer.blender.org/rB596ee79a9f590c1d66aa1cb5c19f994ddb0f0272
Metal: Optimize shader local memory usage.
Due to shader global scope emulation via class interface, global constant arrays in shaders are allocated in per-thread shader local memory. To reduce memory pressure, placing these constant arrays inside function scope will ensure they only reside within device constant memory. This results in a tangible 1.5-2x performance uplift for the specific shaders affected.
Authored by Apple: Michael Parkin-White
Ref T96261
Reviewed By: fclem
Maniphest Tasks: T96261
Differential Revision: https://developer.blender.org/D17089
===================================================================
M source/blender/draw/engines/eevee/shaders/lightprobe_filter_diffuse_frag.glsl
M source/blender/draw/engines/overlay/shaders/overlay_background_frag.glsl
M source/blender/draw/engines/overlay/shaders/overlay_edit_uv_verts_vert.glsl
M source/blender/draw/engines/overlay/shaders/overlay_volume_gridlines_vert.glsl
M source/blender/draw/engines/overlay/shaders/overlay_volume_velocity_vert.glsl
M source/blender/gpu/shaders/gpu_shader_2D_widget_shadow_vert.glsl
M source/blender/gpu/shaders/gpu_shader_text_frag.glsl
===================================================================
diff --git a/source/blender/draw/engines/eevee/shaders/lightprobe_filter_diffuse_frag.glsl b/source/blender/draw/engines/eevee/shaders/lightprobe_filter_diffuse_frag.glsl
index 39c19ad6f74..ce71a73ea75 100644
--- a/source/blender/draw/engines/eevee/shaders/lightprobe_filter_diffuse_frag.glsl
+++ b/source/blender/draw/engines/eevee/shaders/lightprobe_filter_diffuse_frag.glsl
@@ -6,16 +6,17 @@
#define M_4PI 12.5663706143591729
-const mat3 CUBE_ROTATIONS[6] = mat3[](
- mat3(vec3(0.0, 0.0, -1.0), vec3(0.0, -1.0, 0.0), vec3(-1.0, 0.0, 0.0)),
- mat3(vec3(0.0, 0.0, 1.0), vec3(0.0, -1.0, 0.0), vec3(1.0, 0.0, 0.0)),
- mat3(vec3(1.0, 0.0, 0.0), vec3(0.0, 0.0, 1.0), vec3(0.0, -1.0, 0.0)),
- mat3(vec3(1.0, 0.0, 0.0), vec3(0.0, 0.0, -1.0), vec3(0.0, 1.0, 0.0)),
- mat3(vec3(1.0, 0.0, 0.0), vec3(0.0, -1.0, 0.0), vec3(0.0, 0.0, -1.0)),
- mat3(vec3(-1.0, 0.0, 0.0), vec3(0.0, -1.0, 0.0), vec3(0.0, 0.0, 1.0)));
-
vec3 get_cubemap_vector(vec2 co, int face)
{
+ /* NOTE(Metal): Declaring constant array in function scope to avoid increasing local shader
+ * memory pressure. */
+ const mat3 CUBE_ROTATIONS[6] = mat3[](
+ mat3(vec3(0.0, 0.0, -1.0), vec3(0.0, -1.0, 0.0), vec3(-1.0, 0.0, 0.0)),
+ mat3(vec3(0.0, 0.0, 1.0), vec3(0.0, -1.0, 0.0), vec3(1.0, 0.0, 0.0)),
+ mat3(vec3(1.0, 0.0, 0.0), vec3(0.0, 0.0, 1.0), vec3(0.0, -1.0, 0.0)),
+ mat3(vec3(1.0, 0.0, 0.0), vec3(0.0, 0.0, -1.0), vec3(0.0, 1.0, 0.0)),
+ mat3(vec3(1.0, 0.0, 0.0), vec3(0.0, -1.0, 0.0), vec3(0.0, 0.0, -1.0)),
+ mat3(vec3(-1.0, 0.0, 0.0), vec3(0.0, -1.0, 0.0), vec3(0.0, 0.0, 1.0)));
return normalize(CUBE_ROTATIONS[face] * vec3(co * 2.0 - 1.0, 1.0));
}
diff --git a/source/blender/draw/engines/overlay/shaders/overlay_background_frag.glsl b/source/blender/draw/engines/overlay/shaders/overlay_background_frag.glsl
index 960986bebd5..e104163c11c 100644
--- a/source/blender/draw/engines/overlay/shaders/overlay_background_frag.glsl
+++ b/source/blender/draw/engines/overlay/shaders/overlay_background_frag.glsl
@@ -4,13 +4,16 @@
/* 4x4 bayer matrix prepared for 8bit UNORM precision error. */
#define P(x) (((x + 0.5) * (1.0 / 16.0) - 0.5) * (1.0 / 255.0))
-const vec4 dither_mat4x4[4] = vec4[4](vec4(P(0.0), P(8.0), P(2.0), P(10.0)),
- vec4(P(12.0), P(4.0), P(14.0), P(6.0)),
- vec4(P(3.0), P(11.0), P(1.0), P(9.0)),
- vec4(P(15.0), P(7.0), P(13.0), P(5.0)));
float dither(void)
{
+ /* NOTE(Metal): Declaring constant array in function scope to avoid increasing local shader
+ * memory pressure. */
+ const vec4 dither_mat4x4[4] = vec4[4](vec4(P(0.0), P(8.0), P(2.0), P(10.0)),
+ vec4(P(12.0), P(4.0), P(14.0), P(6.0)),
+ vec4(P(3.0), P(11.0), P(1.0), P(9.0)),
+ vec4(P(15.0), P(7.0), P(13.0), P(5.0)));
+
ivec2 co = ivec2(gl_FragCoord.xy) % 4;
return dither_mat4x4[co.x][co.y];
}
diff --git a/source/blender/draw/engines/overlay/shaders/overlay_edit_uv_verts_vert.glsl b/source/blender/draw/engines/overlay/shaders/overlay_edit_uv_verts_vert.glsl
index 2a59a623995..99d6c4fcad8 100644
--- a/source/blender/draw/engines/overlay/shaders/overlay_edit_uv_verts_vert.glsl
+++ b/source/blender/draw/engines/overlay/shaders/overlay_edit_uv_verts_vert.glsl
@@ -1,10 +1,10 @@
#pragma BLENDER_REQUIRE(common_view_lib.glsl)
-/* TODO: Theme? */
-const vec4 pinned_col = vec4(1.0, 0.0, 0.0, 1.0);
-
void main()
{
+ /* TODO: Theme? */
+ const vec4 pinned_col = vec4(1.0, 0.0, 0.0, 1.0);
+
bool is_selected = (flag & (VERT_UV_SELECT | FACE_UV_SELECT)) != 0u;
bool is_pinned = (flag & VERT_UV_PINNED) != 0u;
vec4 deselect_col = (is_pinned) ? pinned_col : vec4(color.rgb, 1.0);
diff --git a/source/blender/draw/engines/overlay/shaders/overlay_volume_gridlines_vert.glsl b/source/blender/draw/engines/overlay/shaders/overlay_volume_gridlines_vert.glsl
index 11a04dddd2a..d4a3059e08a 100644
--- a/source/blender/draw/engines/overlay/shaders/overlay_volume_gridlines_vert.glsl
+++ b/source/blender/draw/engines/overlay/shaders/overlay_volume_gridlines_vert.glsl
@@ -1,15 +1,6 @@
#pragma BLENDER_REQUIRE(common_view_lib.glsl)
-/* Corners for cell outlines. 0.45 is arbitrary. Any value below 0.5 can be used to avoid
- * overlapping of the outlines. */
-const vec3 corners[4] = vec3[4](vec3(-0.45, 0.45, 0.0),
- vec3(0.45, 0.45, 0.0),
- vec3(0.45, -0.45, 0.0),
- vec3(-0.45, -0.45, 0.0));
-
-const int indices[8] = int[8](0, 1, 1, 2, 2, 3, 3, 0);
-
vec4 flag_to_color(uint flag)
{
/* Color mapping for flags */
@@ -88,6 +79,16 @@ void main()
}
}
#endif
+ /* NOTE(Metal): Declaring constant arrays in function scope to avoid increasing local shader
+ * memory pressure. */
+ const int indices[8] = int[8](0, 1, 1, 2, 2, 3, 3, 0);
+
+ /* Corners for cell outlines. 0.45 is arbitrary. Any value below 0.5 can be used to avoid
+ * overlapping of the outlines. */
+ const vec3 corners[4] = vec3[4](vec3(-0.45, 0.45, 0.0),
+ vec3(0.45, 0.45, 0.0),
+ vec3(0.45, -0.45, 0.0),
+ vec3(-0.45, -0.45, 0.0));
vec3 pos = domainOriginOffset + cellSize * (vec3(cell_co + adaptiveCellOffset) + cell_offset);
vec3 rotated_pos = rot_mat * corners[indices[gl_VertexID % 8]];
diff --git a/source/blender/draw/engines/overlay/shaders/overlay_volume_velocity_vert.glsl b/source/blender/draw/engines/overlay/shaders/overlay_volume_velocity_vert.glsl
index a33d27676c3..6cb4dfc903a 100644
--- a/source/blender/draw/engines/overlay/shaders/overlay_volume_velocity_vert.glsl
+++ b/source/blender/draw/engines/overlay/shaders/overlay_volume_velocity_vert.glsl
@@ -1,13 +1,6 @@
#pragma BLENDER_REQUIRE(common_view_lib.glsl)
-const vec3 corners[4] = vec3[4](vec3(0.0, 0.2, -0.5),
- vec3(-0.2 * 0.866, -0.2 * 0.5, -0.5),
- vec3(0.2 * 0.866, -0.2 * 0.5, -0.5),
- vec3(0.0, 0.0, 0.5));
-
-const int indices[12] = int[12](0, 1, 1, 2, 2, 0, 0, 3, 1, 3, 2, 3);
-
/* Straight Port from BKE_defvert_weight_to_rgb()
* TODO: port this to a color ramp. */
vec3 weight_to_color(float weight)
@@ -177,6 +170,15 @@ void main()
mat3 rot_mat = rotation_from_vector(vector);
# ifdef USE_NEEDLE
+ /* NOTE(Metal): Declaring constant arrays in function scope to avoid increasing local shader
+ * memory pressure. */
+ const vec3 corners[4] = vec3[4](vec3(0.0, 0.2, -0.5),
+ vec3(-0.2 * 0.866, -0.2 * 0.5, -0.5),
+ vec3(0.2 * 0.866, -0.2 * 0.5, -0.5),
+ vec3(0.0, 0.0, 0.5));
+
+ const int indices[12] = int[12](0, 1, 1, 2, 2, 0, 0, 3, 1, 3, 2, 3);
+
vec3 rotated_pos = rot_mat * corners[indices[gl_VertexID % 12]];
pos += rotated_pos * vector_length * displaySize * cellSize;
# else
diff --git a/source/blender/gpu/shaders/gpu_shader_2D_widget_shadow_vert.glsl b/source/blender/gpu/shaders/gpu_shader_2D_widget_shadow_vert.glsl
index de1da01ff98..7b64f6b1bfe 100644
--- a/source/blender/gpu/shaders/gpu_shader_2D_widget_shadow_vert.glsl
+++ b/source/blender/gpu/shaders/gpu_shader_2D_widget_shadow_vert.glsl
@@ -12,42 +12,6 @@
/* 4bits for corner id */
#define CORNER_VEC_OFS 2u
#define CORNER_VEC_RANGE BIT_RANGE(4)
-const vec2 cornervec[36] = vec2[36](vec2(0.0, 1.0),
- vec2(0.02, 0.805),
- vec2(0.067, 0.617),
- vec2(0.169, 0.45),
- vec2(0.293, 0.293),
- vec2(0.45, 0.169),
- vec2(0.617, 0.076),
- vec2(0.805, 0.02),
- vec2(1.0, 0.0),
- vec2(-1.0, 0.0),
- vec2(-0.805, 0.02),
- vec2(-0.617, 0.067),
- vec2(-0.45, 0.169),
- vec2(-0.293, 0.293),
- vec2(-0.169, 0.45),
- vec2(-0.076, 0.617),
- vec2(-0.02, 0.805),
- vec2(0.0, 1.0),
- vec2(0.0, -1.0),
- vec2(-0.02, -0.805),
- vec2(-0.067, -0.617),
- vec2(-0.169, -0.45),
- vec2(-0.293, -0.293),
- vec2(-0.45, -0.169),
- vec2(-0.617, -0.076),
- vec2(-0.805, -0.02),
- vec2(-1.0, 0.0),
- vec2(1.0, 0.0),
- vec2(0.805, -0.02),
- vec2(0.617, -0.067),
- vec2(0.45, -0.169),
- vec2(0.293, -0.293),
- vec2(0.169, -0.45),
- vec2(0.076, -0.617),
- vec2(0.02, -0.805),
- vec2(0.0, -1.0));
#define INNER_FLAG uint(1 << 10) /* is inner vert */
@@ -60,6 +24,45 @@ const vec2 cornervec[36] = vec2[36](vec2(0.0, 1.0),
void main()
{
+ /* NOTE(Metal): Declaring constant array in function scope to avoid increasing local shader
+ * memory pressure.*/
+ const vec2 cornervec[36] = vec2[36](vec2(0.0, 1.0),
+ vec2(0.02, 0.805),
+ vec2(0.067, 0.617),
+ vec2(0.169, 0.45),
+ vec2(0.293, 0.293),
+ vec2(0.45, 0.169
@@ Diff output truncated at 10240 characters. @@
More information about the Bf-blender-cvs
mailing list