[Bf-blender-cvs] [5f4409b02ef] master: Metal: MTLIndexBuf class implementation.
Jason Fielder
noreply at git.blender.org
Thu Sep 1 22:03:46 CEST 2022
Commit: 5f4409b02ef7c54089ff1b491e008d4b86c030f4
Author: Jason Fielder
Date: Thu Sep 1 21:42:47 2022 +0200
Branches: master
https://developer.blender.org/rB5f4409b02ef7c54089ff1b491e008d4b86c030f4
Metal: MTLIndexBuf class implementation.
Implementation also contains a number of optimisations and feature enablements specific to the Metal API and Apple Silicon GPUs.
Ref T96261
Reviewed By: fclem
Maniphest Tasks: T96261
Differential Revision: https://developer.blender.org/D15369
===================================================================
M source/blender/gpu/CMakeLists.txt
M source/blender/gpu/GPU_index_buffer.h
M source/blender/gpu/GPU_primitive.h
M source/blender/gpu/intern/gpu_index_buffer.cc
M source/blender/gpu/intern/gpu_index_buffer_private.hh
M source/blender/gpu/metal/mtl_backend.hh
M source/blender/gpu/metal/mtl_backend.mm
M source/blender/gpu/metal/mtl_context.hh
A source/blender/gpu/metal/mtl_index_buffer.hh
A source/blender/gpu/metal/mtl_index_buffer.mm
M source/blender/gpu/metal/mtl_query.hh
M source/blender/gpu/metal/mtl_query.mm
M source/blender/gpu/opengl/gl_index_buffer.hh
===================================================================
diff --git a/source/blender/gpu/CMakeLists.txt b/source/blender/gpu/CMakeLists.txt
index c289a21421a..6758b4b8794 100644
--- a/source/blender/gpu/CMakeLists.txt
+++ b/source/blender/gpu/CMakeLists.txt
@@ -191,6 +191,7 @@ set(METAL_SRC
metal/mtl_context.mm
metal/mtl_debug.mm
metal/mtl_framebuffer.mm
+ metal/mtl_index_buffer.mm
metal/mtl_memory.mm
metal/mtl_query.mm
metal/mtl_state.mm
@@ -204,6 +205,7 @@ set(METAL_SRC
metal/mtl_context.hh
metal/mtl_debug.hh
metal/mtl_framebuffer.hh
+ metal/mtl_index_buffer.hh
metal/mtl_memory.hh
metal/mtl_query.hh
metal/mtl_state.hh
diff --git a/source/blender/gpu/GPU_index_buffer.h b/source/blender/gpu/GPU_index_buffer.h
index bbb431cbc15..e6345b1e43b 100644
--- a/source/blender/gpu/GPU_index_buffer.h
+++ b/source/blender/gpu/GPU_index_buffer.h
@@ -26,6 +26,9 @@ typedef struct GPUIndexBufBuilder {
uint index_len;
uint index_min;
uint index_max;
+ uint restart_index_value;
+ bool uses_restart_indices;
+
GPUPrimType prim_type;
uint32_t *data;
} GPUIndexBufBuilder;
diff --git a/source/blender/gpu/GPU_primitive.h b/source/blender/gpu/GPU_primitive.h
index 4860b037bfb..de2feac2607 100644
--- a/source/blender/gpu/GPU_primitive.h
+++ b/source/blender/gpu/GPU_primitive.h
@@ -9,6 +9,7 @@
#pragma once
+#include "BLI_assert.h"
#include "GPU_common.h"
#ifdef __cplusplus
@@ -42,6 +43,79 @@ typedef enum {
GPU_PRIM_CLASS_ANY = GPU_PRIM_CLASS_POINT | GPU_PRIM_CLASS_LINE | GPU_PRIM_CLASS_SURFACE,
} GPUPrimClass;
+inline int gpu_get_prim_count_from_type(uint vertex_len, GPUPrimType prim_type)
+{
+ /* does vertex_len make sense for this primitive type? */
+ if (vertex_len == 0) {
+ return 0;
+ }
+
+ switch (prim_type) {
+ case GPU_PRIM_POINTS:
+ return vertex_len;
+
+ case GPU_PRIM_LINES:
+ BLI_assert(vertex_len % 2 == 0);
+ return vertex_len / 2;
+
+ case GPU_PRIM_LINE_STRIP:
+ return vertex_len - 1;
+
+ case GPU_PRIM_LINE_LOOP:
+ return vertex_len;
+
+ case GPU_PRIM_LINES_ADJ:
+ BLI_assert(vertex_len % 4 == 0);
+ return vertex_len / 4;
+
+ case GPU_PRIM_LINE_STRIP_ADJ:
+ return vertex_len - 2;
+
+ case GPU_PRIM_TRIS:
+ BLI_assert(vertex_len % 3 == 0);
+ return vertex_len / 3;
+
+ case GPU_PRIM_TRI_STRIP:
+ BLI_assert(vertex_len >= 3);
+ return vertex_len - 2;
+
+ case GPU_PRIM_TRI_FAN:
+ BLI_assert(vertex_len >= 3);
+ return vertex_len - 2;
+
+ case GPU_PRIM_TRIS_ADJ:
+ BLI_assert(vertex_len % 6 == 0);
+ return vertex_len / 6;
+
+ default:
+ BLI_assert_unreachable();
+ return 0;
+ }
+}
+
+inline bool is_restart_compatible(GPUPrimType type)
+{
+ switch (type) {
+ case GPU_PRIM_POINTS:
+ case GPU_PRIM_LINES:
+ case GPU_PRIM_TRIS:
+ case GPU_PRIM_LINES_ADJ:
+ case GPU_PRIM_TRIS_ADJ:
+ case GPU_PRIM_NONE:
+ default: {
+ return false;
+ }
+ case GPU_PRIM_LINE_STRIP:
+ case GPU_PRIM_LINE_LOOP:
+ case GPU_PRIM_TRI_STRIP:
+ case GPU_PRIM_TRI_FAN:
+ case GPU_PRIM_LINE_STRIP_ADJ: {
+ return true;
+ }
+ }
+ return false;
+}
+
/**
* TODO: Improve error checking by validating that the shader is suited for this primitive type.
* GPUPrimClass GPU_primtype_class(GPUPrimType);
diff --git a/source/blender/gpu/intern/gpu_index_buffer.cc b/source/blender/gpu/intern/gpu_index_buffer.cc
index 146461d1dfb..08c31d0d589 100644
--- a/source/blender/gpu/intern/gpu_index_buffer.cc
+++ b/source/blender/gpu/intern/gpu_index_buffer.cc
@@ -16,6 +16,8 @@
#include "gpu_index_buffer_private.hh"
+#include "GPU_platform.h"
+
#include <cstring>
#define KEEP_SINGLE_COPY 1
@@ -40,6 +42,28 @@ void GPU_indexbuf_init_ex(GPUIndexBufBuilder *builder,
builder->index_min = UINT32_MAX;
builder->index_max = 0;
builder->prim_type = prim_type;
+
+#ifdef __APPLE__
+ /* Only encode restart indices for restart-compatible primitive types.
+ * Resolves out-of-bounds read error on macOS. Using 0-index will ensure
+ * degenerative primitives when skipping primitives is required and will
+ * incur no additional performance cost for rendering. */
+ if (GPU_type_matches_ex(GPU_DEVICE_ANY, GPU_OS_MAC, GPU_DRIVER_ANY, GPU_BACKEND_METAL)) {
+ /* We will still use restart-indices for point primtives and then
+ * patch these during IndexBuf::init, as we cannot benefit from degenerative
+ * primitives to eliminate these. */
+ builder->restart_index_value = (is_restart_compatible(prim_type) ||
+ prim_type == GPU_PRIM_POINTS) ?
+ RESTART_INDEX :
+ 0;
+ }
+ else {
+ builder->restart_index_value = RESTART_INDEX;
+ }
+#else
+ builder->restart_index_value = RESTART_INDEX;
+#endif
+ builder->uses_restart_indices = false;
builder->data = (uint *)MEM_callocN(builder->max_index_len * sizeof(uint), "GPUIndexBuf data");
}
@@ -94,7 +118,8 @@ void GPU_indexbuf_add_primitive_restart(GPUIndexBufBuilder *builder)
assert(builder->data != nullptr);
assert(builder->index_len < builder->max_index_len);
#endif
- builder->data[builder->index_len++] = RESTART_INDEX;
+ builder->data[builder->index_len++] = builder->restart_index_value;
+ builder->uses_restart_indices = true;
}
void GPU_indexbuf_add_point_vert(GPUIndexBufBuilder *builder, uint v)
@@ -186,8 +211,9 @@ void GPU_indexbuf_set_point_restart(GPUIndexBufBuilder *builder, uint elem)
{
BLI_assert(builder->prim_type == GPU_PRIM_POINTS);
BLI_assert(elem < builder->max_index_len);
- builder->data[elem++] = RESTART_INDEX;
+ builder->data[elem++] = builder->restart_index_value;
builder->index_len = MAX2(builder->index_len, elem);
+ builder->uses_restart_indices = true;
}
void GPU_indexbuf_set_line_restart(GPUIndexBufBuilder *builder, uint elem)
@@ -195,9 +221,10 @@ void GPU_indexbuf_set_line_restart(GPUIndexBufBuilder *builder, uint elem)
BLI_assert(builder->prim_type == GPU_PRIM_LINES);
BLI_assert((elem + 1) * 2 <= builder->max_index_len);
uint idx = elem * 2;
- builder->data[idx++] = RESTART_INDEX;
- builder->data[idx++] = RESTART_INDEX;
+ builder->data[idx++] = builder->restart_index_value;
+ builder->data[idx++] = builder->restart_index_value;
builder->index_len = MAX2(builder->index_len, idx);
+ builder->uses_restart_indices = true;
}
void GPU_indexbuf_set_tri_restart(GPUIndexBufBuilder *builder, uint elem)
@@ -205,10 +232,11 @@ void GPU_indexbuf_set_tri_restart(GPUIndexBufBuilder *builder, uint elem)
BLI_assert(builder->prim_type == GPU_PRIM_TRIS);
BLI_assert((elem + 1) * 3 <= builder->max_index_len);
uint idx = elem * 3;
- builder->data[idx++] = RESTART_INDEX;
- builder->data[idx++] = RESTART_INDEX;
- builder->data[idx++] = RESTART_INDEX;
+ builder->data[idx++] = builder->restart_index_value;
+ builder->data[idx++] = builder->restart_index_value;
+ builder->data[idx++] = builder->restart_index_value;
builder->index_len = MAX2(builder->index_len, idx);
+ builder->uses_restart_indices = true;
}
/** \} */
@@ -226,7 +254,12 @@ IndexBuf::~IndexBuf()
}
}
-void IndexBuf::init(uint indices_len, uint32_t *indices, uint min_index, uint max_index)
+void IndexBuf::init(uint indices_len,
+ uint32_t *indices,
+ uint min_index,
+ uint max_index,
+ GPUPrimType prim_type,
+ bool uses_restart_indices)
{
is_init_ = true;
data_ = indices;
@@ -234,6 +267,21 @@ void IndexBuf::init(uint indices_len, uint32_t *indices, uint min_index, uint ma
index_len_ = indices_len;
is_empty_ = min_index > max_index;
+ /* Patch index buffer to remove restart indices from
+ * non-restart-compatible primitive types. Restart indices
+ * are situationally added to selectively hide vertices.
+ * Metal does not support restart-indices for non-restart-compatible
+ * types, as such we should remove these indices.
+ *
+ * We only need to perform this for point primitives, as
+ * line primitives/triangle primitives can use index 0 for all
+ * vertices to create a degenerative primitive, where all
+ * vertices share the same index and skip rendering via HW
+ * culling. */
+ if (prim_type == GPU_PRIM_POINTS && uses_restart_indices) {
+ this->strip_restart_indices();
+ }
+
#if GPU_TRACK_INDEX_RANGE
/* Everything remains 32 bit while building to keep things simple.
* Find min/max after, then convert to smallest index type possible. */
@@ -243,7 +291,18 @@ void IndexBuf::init(uint indices_len, uint32_t *indices, uint min_index, uint ma
if (range <= 0xFFFF) {
index_type_ = GPU_INDEX_U16;
- this->squeeze_indices_short(min_index, max_index);
+ bool do_clamp_indices = false;
+# ifdef __APPLE__
+ /* NOTE: For the Metal Backend, we use degenerative primitives to hide vertices
+ * which are not restart compatible. When this is done, we need to ensure
+ * that compressed index ranges clamp all index values within the valid
+ * range, rather than maximally clamping against the USHORT restart index
+ * value of 0xFFFFu, as this will cause an out-of-bounds read during
+ * vertex assembly. */
+ do_clamp_indices = GPU_type_matches_ex(
+ GPU_DEVICE_ANY, GPU_OS_MAC, GPU_DRIVER_ANY, GPU_BACKEND_METAL);
+# endif
+ this->squeeze_indices_short(min_index, max_index, prim_type, do_clamp_indices);
}
#endif
}
@@ -302,7 +361,10 @@ uint IndexBuf::index_range(uint *r_min, uint *r_max)
return max_value - min_value;
}
-void IndexBuf::squeeze_indices_short(uint min_idx, uint max_idx)
+void IndexBuf::squeeze_indices_short(uint min_idx,
+ uint max_idx,
+ GPUPrimType prim_type,
+ bool clamp_indices_in_range)
{
/* data will never be *larger* than builder->data...
* converting in place to avoid extra allocation */
@@ -311,8 +373,22 @@ void IndexBuf::squeeze_indices_short(uint min_idx, uint max_idx)
if (max_idx >= 0xFFFF) {
index_base_ = min_idx;
+ /* NOTE: When using restart_index=0 for degenerative primitives indices
@@ Diff output truncated at 10240 characters. @@
More information about the Bf-blender-cvs
mailing list