[Bf-blender-cvs] [aed301704aa] master: Metal: MTLBatch and MTLDrawList implementation.

Tue Oct 18 12:53:08 CEST 2022

Commit: aed301704aa1cd02b8868396d36ce78bc292850e
Author: Jason Fielder
Date:   Tue Oct 18 12:18:25 2022 +0200
Branches: master
https://developer.blender.org/rBaed301704aa1cd02b8868396d36ce78bc292850e

Metal: MTLBatch and MTLDrawList implementation.

MTLBatch and MTLDrawList implementation enables use of Metal Viewport for UI and Workbench. Includes Vertex descriptor caching and SSBO Vertex Fetch mode draw call submission.

Authored by Apple: Michael Parkin-White

Ref T96261

Reviewed By: fclem

Maniphest Tasks: T96261

Differential Revision: https://developer.blender.org/D16101

===================================================================

M	source/blender/draw/engines/overlay/shaders/overlay_motion_path_line_vert_no_geom.glsl
M	source/blender/draw/intern/draw_manager_data.cc
M	source/blender/gpu/CMakeLists.txt
M	source/blender/gpu/intern/gpu_texture_private.hh
M	source/blender/gpu/intern/gpu_viewport.c
M	source/blender/gpu/metal/mtl_backend.mm
M	source/blender/gpu/metal/mtl_batch.hh
A	source/blender/gpu/metal/mtl_batch.mm
M	source/blender/gpu/metal/mtl_context.mm
M	source/blender/gpu/metal/mtl_drawlist.hh
A	source/blender/gpu/metal/mtl_drawlist.mm
M	source/blender/gpu/metal/mtl_immediate.mm
M	source/blender/gpu/metal/mtl_pso_descriptor_state.hh
M	source/blender/gpu/metal/mtl_shader_interface.mm
M	source/blender/gpu/metal/mtl_texture.hh
M	source/blender/gpu/metal/mtl_texture.mm

===================================================================

diff --git a/source/blender/draw/engines/overlay/shaders/overlay_motion_path_line_vert_no_geom.glsl b/source/blender/draw/engines/overlay/shaders/overlay_motion_path_line_vert_no_geom.glsl
index e3ddeb5c6a4..abaa814a4dc 100644
--- a/source/blender/draw/engines/overlay/shaders/overlay_motion_path_line_vert_no_geom.glsl
+++ b/source/blender/draw/engines/overlay/shaders/overlay_motion_path_line_vert_no_geom.glsl
@@ -108,8 +108,8 @@ void main()
   vec3 in_pos0 = vertex_fetch_attribute(base_vertex_id, pos, vec3);
   vec3 in_pos1 = vertex_fetch_attribute(base_vertex_id + 1, pos, vec3);
 
-  vec4 out_pos0 = ProjectionMatrix * (ViewMatrix * vec4(in_pos0, 1.0));
-  vec4 out_pos1 = ProjectionMatrix * (ViewMatrix * vec4(in_pos1, 1.0));
+  vec4 out_pos0 = drw_view.winmat * (drw_view.viewmat * vec4(in_pos0, 1.0));
+  vec4 out_pos1 = drw_view.winmat * (drw_view.viewmat * vec4(in_pos1, 1.0));
 
   /* Final calculations required for Geometry Shader alternative.
    * We need to calculate values for each vertex position to correctly determine the final output
@@ -130,28 +130,28 @@ void main()
   float line_size = float(lineThickness) * sizePixel;
 
   if (quad_vertex_id == 0) {
-    view_clipping_distances(out_pos0);
+    view_clipping_distances(out_pos0.xyz);
 
     interp.color = finalColor_geom[0];
     t = edge_dir * (line_size * (is_persp ? out_pos0.w : 1.0));
     gl_Position = out_pos0 + vec4(t, 0.0, 0.0);
   }
   else if (quad_vertex_id == 1 || quad_vertex_id == 3) {
-    view_clipping_distances(out_pos0);
+    view_clipping_distances(out_pos0.xyz);
 
     interp.color = finalColor_geom[0];
     t = edge_dir * (line_size * (is_persp ? out_pos0.w : 1.0));
     gl_Position = out_pos0 - vec4(t, 0.0, 0.0);
   }
   else if (quad_vertex_id == 2 || quad_vertex_id == 5) {
-    view_clipping_distances(out_pos1);
+    view_clipping_distances(out_pos1.xyz);
 
     interp.color = finalColor_geom[1];
     t = edge_dir * (line_size * (is_persp ? out_pos1.w : 1.0));
     gl_Position = out_pos1 + vec4(t, 0.0, 0.0);
   }
   else if (quad_vertex_id == 4) {
-    view_clipping_distances(out_pos1);
+    view_clipping_distances(out_pos1.xyz);
 
     interp.color = finalColor_geom[1];
     t = edge_dir * (line_size * (is_persp ? out_pos1.w : 1.0));
diff --git a/source/blender/draw/intern/draw_manager_data.cc b/source/blender/draw/intern/draw_manager_data.cc
index 9768f1ce9e7..981206e56fe 100644
--- a/source/blender/draw/intern/draw_manager_data.cc
+++ b/source/blender/draw/intern/draw_manager_data.cc
@@ -1714,23 +1714,32 @@ static void drw_shgroup_init(DRWShadingGroup *shgroup, GPUShader *shader)
   }
 
 #ifdef DEBUG
-  int debug_print_location = GPU_shader_get_builtin_ssbo(shader, GPU_STORAGE_BUFFER_DEBUG_PRINT);
-  if (debug_print_location != -1) {
-    GPUStorageBuf *buf = drw_debug_gpu_print_buf_get();
-    drw_shgroup_uniform_create_ex(
-        shgroup, debug_print_location, DRW_UNIFORM_STORAGE_BLOCK, buf, GPU_SAMPLER_DEFAULT, 0, 1);
+  /* TODO(Metal): Support Shader debug print.
+   * This is not currently supported by Metal Backend. */
+  if (GPU_backend_get_type() != GPU_BACKEND_METAL) {
+    int debug_print_location = GPU_shader_get_builtin_ssbo(shader, GPU_STORAGE_BUFFER_DEBUG_PRINT);
+    if (debug_print_location != -1) {
+      GPUStorageBuf *buf = drw_debug_gpu_print_buf_get();
+      drw_shgroup_uniform_create_ex(shgroup,
+                                    debug_print_location,
+                                    DRW_UNIFORM_STORAGE_BLOCK,
+                                    buf,
+                                    GPU_SAMPLER_DEFAULT,
+                                    0,
+                                    1);
 #  ifndef DISABLE_DEBUG_SHADER_PRINT_BARRIER
-    /* Add a barrier to allow multiple shader writing to the same buffer. */
-    DRW_shgroup_barrier(shgroup, GPU_BARRIER_SHADER_STORAGE);
+      /* Add a barrier to allow multiple shader writing to the same buffer. */
+      DRW_shgroup_barrier(shgroup, GPU_BARRIER_SHADER_STORAGE);
 #  endif
-  }
+    }
 
-  int debug_draw_location = GPU_shader_get_builtin_ssbo(shader, GPU_STORAGE_BUFFER_DEBUG_VERTS);
-  if (debug_draw_location != -1) {
-    GPUStorageBuf *buf = drw_debug_gpu_draw_buf_get();
-    drw_shgroup_uniform_create_ex(
-        shgroup, debug_draw_location, DRW_UNIFORM_STORAGE_BLOCK, buf, GPU_SAMPLER_DEFAULT, 0, 1);
-    /* NOTE(fclem): No barrier as ordering is not important. */
+    int debug_draw_location = GPU_shader_get_builtin_ssbo(shader, GPU_STORAGE_BUFFER_DEBUG_VERTS);
+    if (debug_draw_location != -1) {
+      GPUStorageBuf *buf = drw_debug_gpu_draw_buf_get();
+      drw_shgroup_uniform_create_ex(
+          shgroup, debug_draw_location, DRW_UNIFORM_STORAGE_BLOCK, buf, GPU_SAMPLER_DEFAULT, 0, 1);
+      /* NOTE(fclem): No barrier as ordering is not important. */
+    }
   }
 #endif
 
diff --git a/source/blender/gpu/CMakeLists.txt b/source/blender/gpu/CMakeLists.txt
index f387a4588b6..5a1e0cde1d8 100644
--- a/source/blender/gpu/CMakeLists.txt
+++ b/source/blender/gpu/CMakeLists.txt
@@ -186,9 +186,11 @@ set(OPENGL_SRC
 
 set(METAL_SRC
   metal/mtl_backend.mm
+  metal/mtl_batch.mm
   metal/mtl_command_buffer.mm
   metal/mtl_context.mm
   metal/mtl_debug.mm
+  metal/mtl_drawlist.mm
   metal/mtl_framebuffer.mm
   metal/mtl_immediate.mm
   metal/mtl_index_buffer.mm
diff --git a/source/blender/gpu/intern/gpu_texture_private.hh b/source/blender/gpu/intern/gpu_texture_private.hh
index b96a9b870e5..2ad31183206 100644
--- a/source/blender/gpu/intern/gpu_texture_private.hh
+++ b/source/blender/gpu/intern/gpu_texture_private.hh
@@ -431,15 +431,16 @@ inline bool validate_data_format(eGPUTextureFormat tex_format, eGPUDataFormat da
     case GPU_DEPTH_COMPONENT24:
     case GPU_DEPTH_COMPONENT16:
     case GPU_DEPTH_COMPONENT32F:
-      return data_format == GPU_DATA_FLOAT;
+      return ELEM(data_format, GPU_DATA_FLOAT, GPU_DATA_UINT);
     case GPU_DEPTH24_STENCIL8:
     case GPU_DEPTH32F_STENCIL8:
-      return data_format == GPU_DATA_UINT_24_8;
+      return ELEM(data_format, GPU_DATA_UINT_24_8, GPU_DATA_UINT);
     case GPU_R8UI:
     case GPU_R16UI:
     case GPU_RG16UI:
     case GPU_R32UI:
       return data_format == GPU_DATA_UINT;
+    case GPU_R32I:
     case GPU_RG16I:
     case GPU_R16I:
       return data_format == GPU_DATA_INT;
@@ -453,6 +454,8 @@ inline bool validate_data_format(eGPUTextureFormat tex_format, eGPUDataFormat da
       return ELEM(data_format, GPU_DATA_2_10_10_10_REV, GPU_DATA_FLOAT);
     case GPU_R11F_G11F_B10F:
       return ELEM(data_format, GPU_DATA_10_11_11_REV, GPU_DATA_FLOAT);
+    case GPU_RGBA16F:
+      return ELEM(data_format, GPU_DATA_HALF_FLOAT, GPU_DATA_FLOAT);
     default:
       return data_format == GPU_DATA_FLOAT;
   }
diff --git a/source/blender/gpu/intern/gpu_viewport.c b/source/blender/gpu/intern/gpu_viewport.c
index 71bdf9e336b..e267d5a2f12 100644
--- a/source/blender/gpu/intern/gpu_viewport.c
+++ b/source/blender/gpu/intern/gpu_viewport.c
@@ -147,6 +147,10 @@ static void gpu_viewport_textures_create(GPUViewport *viewport)
   if (viewport->depth_tx == NULL) {
     viewport->depth_tx = GPU_texture_create_2d(
         "dtxl_depth", UNPACK2(size), 1, GPU_DEPTH24_STENCIL8, NULL);
+    if (GPU_clear_viewport_workaround()) {
+      static int depth_clear = 0;
+      GPU_texture_clear(viewport->depth_tx, GPU_DATA_UINT_24_8, &depth_clear);
+    }
   }
 
   if (!viewport->depth_tx || !viewport->color_render_tx[0] || !viewport->color_overlay_tx[0]) {
diff --git a/source/blender/gpu/metal/mtl_backend.mm b/source/blender/gpu/metal/mtl_backend.mm
index 2ca1fd3f3d0..240951c1ebd 100644
--- a/source/blender/gpu/metal/mtl_backend.mm
+++ b/source/blender/gpu/metal/mtl_backend.mm
@@ -47,13 +47,11 @@ Context *MTLBackend::context_alloc(void *ghost_window, void *ghost_context)
 
 Batch *MTLBackend::batch_alloc()
 {
-  /* TODO(Metal): Full MTLBatch implementation. */
   return new MTLBatch();
 };
 
 DrawList *MTLBackend::drawlist_alloc(int list_length)
 {
-  /* TODO(Metal): Full MTLDrawList implementation. */
   return new MTLDrawList(list_length);
 };
 
@@ -420,6 +418,7 @@ void MTLBackend::capabilities_init(MTLContext *ctx)
   GCaps.depth_blitting_workaround = false;
   GCaps.use_main_context_workaround = false;
   GCaps.broken_amd_driver = false;
+  GCaps.clear_viewport_workaround = true;
 
   /* Metal related workarounds. */
   /* Minimum per-vertex stride is 4 bytes in Metal.
diff --git a/source/blender/gpu/metal/mtl_batch.hh b/source/blender/gpu/metal/mtl_batch.hh
index 236367bf5a4..9e179e662b5 100644
--- a/source/blender/gpu/metal/mtl_batch.hh
+++ b/source/blender/gpu/metal/mtl_batch.hh
@@ -10,31 +10,126 @@
 #pragma once
 
 #include "MEM_guardedalloc.h"
-
 #include "gpu_batch_private.hh"
+#include "mtl_index_buffer.hh"
+#include "mtl_primitive.hh"
+#include "mtl_shader.hh"
+#include "mtl_vertex_buffer.hh"
+
+namespace blender::gpu {
+
+class MTLContext;
+class MTLShaderInterface;
+
+#define GPU_VAO_STATIC_LEN 64
 
-namespace blender {
-namespace gpu {
+struct VertexBufferID {
+  uint32_t id : 16;
+  uint32_t is_instance : 15;
+  uint32_t used : 1;
+};
 
-/* Pass-through MTLBatch. TODO(Metal): Implement. */
 class MTLBatch : public Batch {
+
+  /* Vertex Bind-state Caching for a given shader interface used with the Batch. */
+  struct VertexDescriptorShaderInterfacePair {
+    MTLVertexDescriptor vertex_descriptor{};
+    const ShaderInterface *interface = nullptr;
+    uint16_t attr_mask{};
+    int num_buffers{};
+    VertexBufferID bufferIds[GPU_BATCH_VBO_MAX_LEN] = {};
+    /* Cache life index compares a cache entry with the active MTLBatch state.
+     * This is initially set to the cache life index of MTLBatch. If the batch has been modified,
+     * this index is incremented to cheaply invalidate existing cache entries.  */
+    uint32_t cache_life_index = 0;
+  };
+
+  class MTLVertexDescriptorCache {
+
+   private:
+    MTLBatch *batch_;
+
+    VertexDescriptorShaderInterfacePair cache_[GPU_VAO_STATIC_LEN] = {};
+    MTLContext *cache_context_ = nullptr;
+    uint32_t cache_life_index_ = 0;
+
+   public:
+    MTLVertexDescriptorCache(MTLBatch *batch) : batch_(batch){};
+    VertexDescriptorShaderInterfacePair *find(const ShaderInterface *interface);

@@ Diff output truncated at 10240 characters. @@