[Bf-blender-cvs] [5f4409b02ef] master: Metal: MTLIndexBuf class implementation.

Jason Fielder noreply at git.blender.org
Thu Sep 1 22:03:46 CEST 2022


Commit: 5f4409b02ef7c54089ff1b491e008d4b86c030f4
Author: Jason Fielder
Date:   Thu Sep 1 21:42:47 2022 +0200
Branches: master
https://developer.blender.org/rB5f4409b02ef7c54089ff1b491e008d4b86c030f4

Metal: MTLIndexBuf class implementation.

Implementation also contains a number of optimisations and feature enablements specific to the Metal API and Apple Silicon GPUs.

Ref T96261

Reviewed By: fclem

Maniphest Tasks: T96261

Differential Revision: https://developer.blender.org/D15369

===================================================================

M	source/blender/gpu/CMakeLists.txt
M	source/blender/gpu/GPU_index_buffer.h
M	source/blender/gpu/GPU_primitive.h
M	source/blender/gpu/intern/gpu_index_buffer.cc
M	source/blender/gpu/intern/gpu_index_buffer_private.hh
M	source/blender/gpu/metal/mtl_backend.hh
M	source/blender/gpu/metal/mtl_backend.mm
M	source/blender/gpu/metal/mtl_context.hh
A	source/blender/gpu/metal/mtl_index_buffer.hh
A	source/blender/gpu/metal/mtl_index_buffer.mm
M	source/blender/gpu/metal/mtl_query.hh
M	source/blender/gpu/metal/mtl_query.mm
M	source/blender/gpu/opengl/gl_index_buffer.hh

===================================================================

diff --git a/source/blender/gpu/CMakeLists.txt b/source/blender/gpu/CMakeLists.txt
index c289a21421a..6758b4b8794 100644
--- a/source/blender/gpu/CMakeLists.txt
+++ b/source/blender/gpu/CMakeLists.txt
@@ -191,6 +191,7 @@ set(METAL_SRC
   metal/mtl_context.mm
   metal/mtl_debug.mm
   metal/mtl_framebuffer.mm
+  metal/mtl_index_buffer.mm
   metal/mtl_memory.mm
   metal/mtl_query.mm
   metal/mtl_state.mm
@@ -204,6 +205,7 @@ set(METAL_SRC
   metal/mtl_context.hh
   metal/mtl_debug.hh
   metal/mtl_framebuffer.hh
+  metal/mtl_index_buffer.hh
   metal/mtl_memory.hh
   metal/mtl_query.hh
   metal/mtl_state.hh
diff --git a/source/blender/gpu/GPU_index_buffer.h b/source/blender/gpu/GPU_index_buffer.h
index bbb431cbc15..e6345b1e43b 100644
--- a/source/blender/gpu/GPU_index_buffer.h
+++ b/source/blender/gpu/GPU_index_buffer.h
@@ -26,6 +26,9 @@ typedef struct GPUIndexBufBuilder {
   uint index_len;
   uint index_min;
   uint index_max;
+  uint restart_index_value;
+  bool uses_restart_indices;
+
   GPUPrimType prim_type;
   uint32_t *data;
 } GPUIndexBufBuilder;
diff --git a/source/blender/gpu/GPU_primitive.h b/source/blender/gpu/GPU_primitive.h
index 4860b037bfb..de2feac2607 100644
--- a/source/blender/gpu/GPU_primitive.h
+++ b/source/blender/gpu/GPU_primitive.h
@@ -9,6 +9,7 @@
 
 #pragma once
 
+#include "BLI_assert.h"
 #include "GPU_common.h"
 
 #ifdef __cplusplus
@@ -42,6 +43,79 @@ typedef enum {
   GPU_PRIM_CLASS_ANY = GPU_PRIM_CLASS_POINT | GPU_PRIM_CLASS_LINE | GPU_PRIM_CLASS_SURFACE,
 } GPUPrimClass;
 
+inline int gpu_get_prim_count_from_type(uint vertex_len, GPUPrimType prim_type)
+{
+  /* does vertex_len make sense for this primitive type? */
+  if (vertex_len == 0) {
+    return 0;
+  }
+
+  switch (prim_type) {
+    case GPU_PRIM_POINTS:
+      return vertex_len;
+
+    case GPU_PRIM_LINES:
+      BLI_assert(vertex_len % 2 == 0);
+      return vertex_len / 2;
+
+    case GPU_PRIM_LINE_STRIP:
+      return vertex_len - 1;
+
+    case GPU_PRIM_LINE_LOOP:
+      return vertex_len;
+
+    case GPU_PRIM_LINES_ADJ:
+      BLI_assert(vertex_len % 4 == 0);
+      return vertex_len / 4;
+
+    case GPU_PRIM_LINE_STRIP_ADJ:
+      return vertex_len - 2;
+
+    case GPU_PRIM_TRIS:
+      BLI_assert(vertex_len % 3 == 0);
+      return vertex_len / 3;
+
+    case GPU_PRIM_TRI_STRIP:
+      BLI_assert(vertex_len >= 3);
+      return vertex_len - 2;
+
+    case GPU_PRIM_TRI_FAN:
+      BLI_assert(vertex_len >= 3);
+      return vertex_len - 2;
+
+    case GPU_PRIM_TRIS_ADJ:
+      BLI_assert(vertex_len % 6 == 0);
+      return vertex_len / 6;
+
+    default:
+      BLI_assert_unreachable();
+      return 0;
+  }
+}
+
+inline bool is_restart_compatible(GPUPrimType type)
+{
+  switch (type) {
+    case GPU_PRIM_POINTS:
+    case GPU_PRIM_LINES:
+    case GPU_PRIM_TRIS:
+    case GPU_PRIM_LINES_ADJ:
+    case GPU_PRIM_TRIS_ADJ:
+    case GPU_PRIM_NONE:
+    default: {
+      return false;
+    }
+    case GPU_PRIM_LINE_STRIP:
+    case GPU_PRIM_LINE_LOOP:
+    case GPU_PRIM_TRI_STRIP:
+    case GPU_PRIM_TRI_FAN:
+    case GPU_PRIM_LINE_STRIP_ADJ: {
+      return true;
+    }
+  }
+  return false;
+}
+
 /**
  * TODO: Improve error checking by validating that the shader is suited for this primitive type.
  * GPUPrimClass GPU_primtype_class(GPUPrimType);
diff --git a/source/blender/gpu/intern/gpu_index_buffer.cc b/source/blender/gpu/intern/gpu_index_buffer.cc
index 146461d1dfb..08c31d0d589 100644
--- a/source/blender/gpu/intern/gpu_index_buffer.cc
+++ b/source/blender/gpu/intern/gpu_index_buffer.cc
@@ -16,6 +16,8 @@
 
 #include "gpu_index_buffer_private.hh"
 
+#include "GPU_platform.h"
+
 #include <cstring>
 
 #define KEEP_SINGLE_COPY 1
@@ -40,6 +42,28 @@ void GPU_indexbuf_init_ex(GPUIndexBufBuilder *builder,
   builder->index_min = UINT32_MAX;
   builder->index_max = 0;
   builder->prim_type = prim_type;
+
+#ifdef __APPLE__
+  /* Only encode restart indices for restart-compatible primitive types.
+   * Resolves out-of-bounds read error on macOS. Using 0-index will ensure
+   * degenerative primitives when skipping primitives is required and will
+   * incur no additional performance cost for rendering. */
+  if (GPU_type_matches_ex(GPU_DEVICE_ANY, GPU_OS_MAC, GPU_DRIVER_ANY, GPU_BACKEND_METAL)) {
+    /* We will still use restart-indices for point primtives and then
+     * patch these during IndexBuf::init, as we cannot benefit from degenerative
+     * primitives to eliminate these. */
+    builder->restart_index_value = (is_restart_compatible(prim_type) ||
+                                    prim_type == GPU_PRIM_POINTS) ?
+                                       RESTART_INDEX :
+                                       0;
+  }
+  else {
+    builder->restart_index_value = RESTART_INDEX;
+  }
+#else
+  builder->restart_index_value = RESTART_INDEX;
+#endif
+  builder->uses_restart_indices = false;
   builder->data = (uint *)MEM_callocN(builder->max_index_len * sizeof(uint), "GPUIndexBuf data");
 }
 
@@ -94,7 +118,8 @@ void GPU_indexbuf_add_primitive_restart(GPUIndexBufBuilder *builder)
   assert(builder->data != nullptr);
   assert(builder->index_len < builder->max_index_len);
 #endif
-  builder->data[builder->index_len++] = RESTART_INDEX;
+  builder->data[builder->index_len++] = builder->restart_index_value;
+  builder->uses_restart_indices = true;
 }
 
 void GPU_indexbuf_add_point_vert(GPUIndexBufBuilder *builder, uint v)
@@ -186,8 +211,9 @@ void GPU_indexbuf_set_point_restart(GPUIndexBufBuilder *builder, uint elem)
 {
   BLI_assert(builder->prim_type == GPU_PRIM_POINTS);
   BLI_assert(elem < builder->max_index_len);
-  builder->data[elem++] = RESTART_INDEX;
+  builder->data[elem++] = builder->restart_index_value;
   builder->index_len = MAX2(builder->index_len, elem);
+  builder->uses_restart_indices = true;
 }
 
 void GPU_indexbuf_set_line_restart(GPUIndexBufBuilder *builder, uint elem)
@@ -195,9 +221,10 @@ void GPU_indexbuf_set_line_restart(GPUIndexBufBuilder *builder, uint elem)
   BLI_assert(builder->prim_type == GPU_PRIM_LINES);
   BLI_assert((elem + 1) * 2 <= builder->max_index_len);
   uint idx = elem * 2;
-  builder->data[idx++] = RESTART_INDEX;
-  builder->data[idx++] = RESTART_INDEX;
+  builder->data[idx++] = builder->restart_index_value;
+  builder->data[idx++] = builder->restart_index_value;
   builder->index_len = MAX2(builder->index_len, idx);
+  builder->uses_restart_indices = true;
 }
 
 void GPU_indexbuf_set_tri_restart(GPUIndexBufBuilder *builder, uint elem)
@@ -205,10 +232,11 @@ void GPU_indexbuf_set_tri_restart(GPUIndexBufBuilder *builder, uint elem)
   BLI_assert(builder->prim_type == GPU_PRIM_TRIS);
   BLI_assert((elem + 1) * 3 <= builder->max_index_len);
   uint idx = elem * 3;
-  builder->data[idx++] = RESTART_INDEX;
-  builder->data[idx++] = RESTART_INDEX;
-  builder->data[idx++] = RESTART_INDEX;
+  builder->data[idx++] = builder->restart_index_value;
+  builder->data[idx++] = builder->restart_index_value;
+  builder->data[idx++] = builder->restart_index_value;
   builder->index_len = MAX2(builder->index_len, idx);
+  builder->uses_restart_indices = true;
 }
 
 /** \} */
@@ -226,7 +254,12 @@ IndexBuf::~IndexBuf()
   }
 }
 
-void IndexBuf::init(uint indices_len, uint32_t *indices, uint min_index, uint max_index)
+void IndexBuf::init(uint indices_len,
+                    uint32_t *indices,
+                    uint min_index,
+                    uint max_index,
+                    GPUPrimType prim_type,
+                    bool uses_restart_indices)
 {
   is_init_ = true;
   data_ = indices;
@@ -234,6 +267,21 @@ void IndexBuf::init(uint indices_len, uint32_t *indices, uint min_index, uint ma
   index_len_ = indices_len;
   is_empty_ = min_index > max_index;
 
+  /* Patch index buffer to remove restart indices from
+   * non-restart-compatible primitive types. Restart indices
+   * are situationally added to selectively hide vertices.
+   * Metal does not support restart-indices for non-restart-compatible
+   * types, as such we should remove these indices.
+   *
+   * We only need to perform this for point primitives, as
+   * line primitives/triangle primitives can use index 0 for all
+   * vertices to create a degenerative primitive, where all
+   * vertices share the same index and skip rendering via HW
+   * culling. */
+  if (prim_type == GPU_PRIM_POINTS && uses_restart_indices) {
+    this->strip_restart_indices();
+  }
+
 #if GPU_TRACK_INDEX_RANGE
   /* Everything remains 32 bit while building to keep things simple.
    * Find min/max after, then convert to smallest index type possible. */
@@ -243,7 +291,18 @@ void IndexBuf::init(uint indices_len, uint32_t *indices, uint min_index, uint ma
 
   if (range <= 0xFFFF) {
     index_type_ = GPU_INDEX_U16;
-    this->squeeze_indices_short(min_index, max_index);
+    bool do_clamp_indices = false;
+#  ifdef __APPLE__
+    /* NOTE: For the Metal Backend, we use degenerative primitives to hide vertices
+     * which are not restart compatible. When this is done, we need to ensure
+     * that compressed index ranges clamp all index values within the valid
+     * range, rather than maximally clamping against the USHORT restart index
+     * value of 0xFFFFu, as this will cause an out-of-bounds read during
+     * vertex assembly. */
+    do_clamp_indices = GPU_type_matches_ex(
+        GPU_DEVICE_ANY, GPU_OS_MAC, GPU_DRIVER_ANY, GPU_BACKEND_METAL);
+#  endif
+    this->squeeze_indices_short(min_index, max_index, prim_type, do_clamp_indices);
   }
 #endif
 }
@@ -302,7 +361,10 @@ uint IndexBuf::index_range(uint *r_min, uint *r_max)
   return max_value - min_value;
 }
 
-void IndexBuf::squeeze_indices_short(uint min_idx, uint max_idx)
+void IndexBuf::squeeze_indices_short(uint min_idx,
+                                     uint max_idx,
+                                     GPUPrimType prim_type,
+                                     bool clamp_indices_in_range)
 {
   /* data will never be *larger* than builder->data...
    * converting in place to avoid extra allocation */
@@ -311,8 +373,22 @@ void IndexBuf::squeeze_indices_short(uint min_idx, uint max_idx)
 
   if (max_idx >= 0xFFFF) {
     index_base_ = min_idx;
+    /* NOTE: When using restart_index=0 for degenerative primitives indices

@@ Diff output truncated at 10240 characters. @@



More information about the Bf-blender-cvs mailing list