[Bf-blender-cvs] [df1fe18ed75] master: Metal: Fix GPencil texture buffer attribute packing issue and cutting tool rendering.

Tue Dec 20 14:09:15 CET 2022

Commit: df1fe18ed75812265cf2af186f6b082d8d27d9fe
Author: Jason Fielder
Date:   Tue Dec 20 14:08:37 2022 +0100
Branches: master
https://developer.blender.org/rBdf1fe18ed75812265cf2af186f6b082d8d27d9fe

Metal: Fix GPencil texture buffer attribute packing issue and cutting tool rendering.

Line Loop topology support for cutting tool and add support for packing several vertex attributes across individual pixels within a texture buffer.

Authored by Apple: Michael Parkin-White

Ref T96261

Reviewed By: fclem

Maniphest Tasks: T96261

Differential Revision: https://developer.blender.org/D16783

===================================================================

M	source/blender/gpu/metal/mtl_immediate.mm
M	source/blender/gpu/metal/mtl_primitive.hh
M	source/blender/gpu/metal/mtl_texture.mm

===================================================================

diff --git a/source/blender/gpu/metal/mtl_immediate.mm b/source/blender/gpu/metal/mtl_immediate.mm
index 7af5ca30578..f0809e6e9d3 100644
--- a/source/blender/gpu/metal/mtl_immediate.mm
+++ b/source/blender/gpu/metal/mtl_immediate.mm
@@ -39,8 +39,16 @@ uchar *MTLImmediate::begin()
   metal_primitive_mode_ = mtl_prim_type_to_topology_class(metal_primitive_type_);
   has_begun_ = true;
 
+  /* If prim type is line loop, add an extra vertex at the end for placing the closing line,
+   * as metal does not support this primitive type. We treat this as a Line strip with one
+   * extra value. */
+  int vertex_alloc_length = vertex_len;
+  if (prim_type == GPU_PRIM_LINE_LOOP) {
+    vertex_alloc_length++;
+  }
+
   /* Allocate a range of data and return host-accessible pointer. */
-  const size_t bytes_needed = vertex_buffer_size(&vertex_format, vertex_len);
+  const size_t bytes_needed = vertex_buffer_size(&vertex_format, vertex_alloc_length);
   current_allocation_ = context_->get_scratchbuffer_manager()
                             .scratch_buffer_allocate_range_aligned(bytes_needed, 256);
   [current_allocation_.metal_buffer retain];
@@ -266,71 +274,88 @@ void MTLImmediate::end()
        * For immediate mode, generating these is currently very cheap, as we use
        * fast scratch buffer allocations. Though we may benefit from caching of
        * frequently used buffer sizes. */
+      bool rendered = false;
       if (mtl_needs_topology_emulation(this->prim_type)) {
 
-        /* Debug safety check for SSBO FETCH MODE. */
-        if (active_mtl_shader->get_uses_ssbo_vertex_fetch()) {
-          BLI_assert(false && "Topology emulation not supported with SSBO Vertex Fetch mode");
-        }
-
         /* Emulate Tri-fan. */
-        if (this->prim_type == GPU_PRIM_TRI_FAN) {
-          /* Prepare Triangle-Fan emulation index buffer on CPU based on number of input
-           * vertices. */
-          uint32_t base_vert_count = this->vertex_idx;
-          uint32_t num_triangles = max_ii(base_vert_count - 2, 0);
-          uint32_t fan_index_count = num_triangles * 3;
-          BLI_assert(num_triangles > 0);
-
-          uint32_t alloc_size = sizeof(uint32_t) * fan_index_count;
-          uint32_t *index_buffer = nullptr;
-
-          MTLTemporaryBuffer allocation =
-              context_->get_scratchbuffer_manager().scratch_buffer_allocate_range_aligned(
-                  alloc_size, 128);
-          index_buffer = (uint32_t *)allocation.data;
-
-          int a = 0;
-          for (int i = 0; i < num_triangles; i++) {
-            index_buffer[a++] = 0;
-            index_buffer[a++] = i + 1;
-            index_buffer[a++] = i + 2;
-          }
+        switch (this->prim_type) {
+          case GPU_PRIM_TRI_FAN: {
+            /* Debug safety check for SSBO FETCH MODE. */
+            if (active_mtl_shader->get_uses_ssbo_vertex_fetch()) {
+              BLI_assert(
+                  false &&
+                  "Topology emulation for TriangleFan not supported with SSBO Vertex Fetch mode");
+            }
 
-          @autoreleasepool {
+            /* Prepare Triangle-Fan emulation index buffer on CPU based on number of input
+             * vertices. */
+            uint32_t base_vert_count = this->vertex_idx;
+            uint32_t num_triangles = max_ii(base_vert_count - 2, 0);
+            uint32_t fan_index_count = num_triangles * 3;
+            BLI_assert(num_triangles > 0);
+
+            uint32_t alloc_size = sizeof(uint32_t) * fan_index_count;
+            uint32_t *index_buffer = nullptr;
+
+            MTLTemporaryBuffer allocation =
+                context_->get_scratchbuffer_manager().scratch_buffer_allocate_range_aligned(
+                    alloc_size, 128);
+            index_buffer = (uint32_t *)allocation.data;
+
+            int a = 0;
+            for (int i = 0; i < num_triangles; i++) {
+              index_buffer[a++] = 0;
+              index_buffer[a++] = i + 1;
+              index_buffer[a++] = i + 2;
+            }
 
-            id<MTLBuffer> index_buffer_mtl = nil;
-            uint32_t index_buffer_offset = 0;
+            @autoreleasepool {
 
-            /* Region of scratch buffer used for topology emulation element data.
-             * NOTE(Metal): We do not need to manually flush as the entire scratch
-             * buffer for current command buffer is flushed upon submission. */
-            index_buffer_mtl = allocation.metal_buffer;
-            index_buffer_offset = allocation.buffer_offset;
+              id<MTLBuffer> index_buffer_mtl = nil;
+              uint32_t index_buffer_offset = 0;
 
-            /* Set depth stencil state (requires knowledge of primitive type). */
-            context_->ensure_depth_stencil_state(MTLPrimitiveTypeTriangle);
+              /* Region of scratch buffer used for topology emulation element data.
+               * NOTE(Metal): We do not need to manually flush as the entire scratch
+               * buffer for current command buffer is flushed upon submission. */
+              index_buffer_mtl = allocation.metal_buffer;
+              index_buffer_offset = allocation.buffer_offset;
 
-            /* Bind Vertex Buffer. */
-            rps.bind_vertex_buffer(
-                current_allocation_.metal_buffer, current_allocation_.buffer_offset, 0);
+              /* Set depth stencil state (requires knowledge of primitive type). */
+              context_->ensure_depth_stencil_state(MTLPrimitiveTypeTriangle);
 
-            /* Draw. */
-            [rec drawIndexedPrimitives:MTLPrimitiveTypeTriangle
-                            indexCount:fan_index_count
-                             indexType:MTLIndexTypeUInt32
-                           indexBuffer:index_buffer_mtl
-                     indexBufferOffset:index_buffer_offset];
-          }
-        }
-        else {
-          /* TODO(Metal): Topology emulation for line loop.
-           * NOTE(Metal): This is currently not used anywhere and modified at the high
-           * level for efficiency in such cases. */
-          BLI_assert_msg(false, "LineLoop requires emulation support in immediate mode.");
+              /* Bind Vertex Buffer. */
+              rps.bind_vertex_buffer(
+                  current_allocation_.metal_buffer, current_allocation_.buffer_offset, 0);
+
+              /* Draw. */
+              [rec drawIndexedPrimitives:MTLPrimitiveTypeTriangle
+                              indexCount:fan_index_count
+                               indexType:MTLIndexTypeUInt32
+                             indexBuffer:index_buffer_mtl
+                       indexBufferOffset:index_buffer_offset];
+              context_->main_command_buffer.register_draw_counters(fan_index_count);
+            }
+            rendered = true;
+          } break;
+          case GPU_PRIM_LINE_LOOP: {
+            /* Patch final vertex of line loop to close. Rendered using LineStrip.
+             * Note: vertex_len represents original length, however, allocated Metal
+             * buffer contains space for one extra vertex when LineLoop is used. */
+            uchar *buffer_data = reinterpret_cast<uchar *>(current_allocation_.data);
+            memcpy(buffer_data + (vertex_len)*vertex_format.stride,
+                   buffer_data,
+                   vertex_format.stride);
+            this->vertex_idx++;
+          } break;
+          default: {
+            BLI_assert_unreachable();
+          } break;
         }
       }
-      else {
+
+      /* If not yet rendered, run through main render path. LineLoop primitive topology emulation
+       * will simply amend original data passed into default rendering path. */
+      if (!rendered) {
         MTLPrimitiveType primitive_type = metal_primitive_type_;
         int vertex_count = this->vertex_idx;
 
diff --git a/source/blender/gpu/metal/mtl_primitive.hh b/source/blender/gpu/metal/mtl_primitive.hh
index b32854a04bf..0b66a51d630 100644
--- a/source/blender/gpu/metal/mtl_primitive.hh
+++ b/source/blender/gpu/metal/mtl_primitive.hh
@@ -39,10 +39,10 @@ static inline MTLPrimitiveType gpu_prim_type_to_metal(GPUPrimType prim_type)
       return MTLPrimitiveTypePoint;
     case GPU_PRIM_LINES:
     case GPU_PRIM_LINES_ADJ:
-    case GPU_PRIM_LINE_LOOP:
       return MTLPrimitiveTypeLine;
     case GPU_PRIM_LINE_STRIP:
     case GPU_PRIM_LINE_STRIP_ADJ:
+    case GPU_PRIM_LINE_LOOP:
       return MTLPrimitiveTypeLineStrip;
     case GPU_PRIM_TRIS:
     case GPU_PRIM_TRI_FAN:
diff --git a/source/blender/gpu/metal/mtl_texture.mm b/source/blender/gpu/metal/mtl_texture.mm
index 411d1187610..d2d466bffe1 100644
--- a/source/blender/gpu/metal/mtl_texture.mm
+++ b/source/blender/gpu/metal/mtl_texture.mm
@@ -1621,6 +1621,7 @@ bool gpu::MTLTexture::init_internal(GPUVertBuf *vbo)
   }
 
   /* Verify Texture and vertex buffer alignment. */
+  const GPUVertFormat *format = GPU_vertbuf_get_format(vbo);
   int bytes_per_pixel = get_mtl_format_bytesize(mtl_format);
   int bytes_per_row = bytes_per_pixel * w_;
 
@@ -1628,12 +1629,40 @@ bool gpu::MTLTexture::init_internal(GPUVertBuf *vbo)
   uint32_t align_requirement = static_cast<uint32_t>(
       [mtl_ctx->device minimumLinearTextureAlignmentForPixelFormat:mtl_format]);
 
-  /* Verify per-vertex size aligns with texture size. */
-  const GPUVertFormat *format = GPU_vertbuf_get_format(vbo);
-  BLI_assert(bytes_per_pixel == format->stride &&
-             "Pixel format stride MUST match the texture format stride -- These being different "
-             "is likely caused by Metal's VBO padding to a minimum of 4-bytes per-vertex");
-  UNUSED_VARS_NDEBUG(format);
+  /* If stride is larger than bytes per pixel, but format has multiple attributes,
+   * split attributes across several pixels. */
+  if (format->stride > bytes_per_pixel && format->attr_len > 1) {
+
+    /* We need to increase the number of pixels available to store additional attributes.
+     * First ensure that the total stride of the vertex format fits uniformly into
+     * multiple pixels. If these sizes are different, then attributes are of differing
+     * sizes and this operation is unsupported. */
+    if (bytes_per_pixel * format->attr_len != format->stride) {
+      B

@@ Diff output truncated at 10240 characters. @@