[Bf-blender-cvs] [84c25fdcaa4] master: Metal: Improve command buffer handling and workload scheduling.

Mon Jan 23 17:52:18 CET 2023

Commit: 84c25fdcaa42ae1ac7a937d4021095273f08603d
Author: Jason Fielder
Date:   Mon Jan 23 17:47:11 2023 +0100
Branches: master
https://developer.blender.org/rB84c25fdcaa42ae1ac7a937d4021095273f08603d

Metal: Improve command buffer handling and workload scheduling.

Improve handling for cases where maximum in-flight command buffer count is exceeded. This can occur during light-baking operations. Ensures the application handles this gracefully and also improves workload pipelining by situationally stalling until GPU work has completed, if too much work is queued up.

This may have a tangible benefit for T103742 by ensuring Blender does not queue up too much GPU work.

Authored by Apple: Michael Parkin-White

Ref T96261
Ref T103742
Depends on D17018

Reviewed By: fclem

Maniphest Tasks: T103742, T96261

Differential Revision: https://developer.blender.org/D17019

===================================================================

M	intern/ghost/intern/GHOST_ContextCGL.h
M	intern/ghost/intern/GHOST_ContextCGL.mm
M	source/blender/editors/screen/glutil.c
M	source/blender/gpu/metal/mtl_command_buffer.mm
M	source/blender/gpu/metal/mtl_common.hh

===================================================================

diff --git a/intern/ghost/intern/GHOST_ContextCGL.h b/intern/ghost/intern/GHOST_ContextCGL.h
index d19fffffb43..60098c99fe3 100644
--- a/intern/ghost/intern/GHOST_ContextCGL.h
+++ b/intern/ghost/intern/GHOST_ContextCGL.h
@@ -23,6 +23,22 @@
 @class NSView;
 
 class GHOST_ContextCGL : public GHOST_Context {
+
+ public:
+  /* Defines the number of simultaneous command buffers which can be in flight.
+   * The default limit of `64` is considered to be optimal for Blender. Too many command buffers
+   * will result in workload fragmnetation and additional system-level overhead. This limit should
+   * also only be increased if the application is consistently exceeding the limit, and there are
+   * no command buffer leaks.
+   *
+   * If this limit is reached, starting a new command buffer will fail. The Metal backend will
+   * therefore stall until completion and log a warning when this limit is reached in order to
+   * ensure correct function of the app.
+   *
+   * It is generally preferable to reduce the prevalence of GPU_flush or GPU Context switches
+   * (which will both break command submissions), rather than increasing this limit. */
+  static const int max_command_buffer_count = 64;
+
  public:
   /**
    * Constructor.
diff --git a/intern/ghost/intern/GHOST_ContextCGL.mm b/intern/ghost/intern/GHOST_ContextCGL.mm
index 9dad337a5d6..1aa0cb9def4 100644
--- a/intern/ghost/intern/GHOST_ContextCGL.mm
+++ b/intern/ghost/intern/GHOST_ContextCGL.mm
@@ -529,7 +529,8 @@ void GHOST_ContextCGL::metalInit()
     id<MTLDevice> device = m_metalLayer.device;
 
     /* Create a command queue for blit/present operation. */
-    m_metalCmdQueue = (MTLCommandQueue *)[device newCommandQueue];
+    m_metalCmdQueue = (MTLCommandQueue *)[device
+        newCommandQueueWithMaxCommandBufferCount:GHOST_ContextCGL::max_command_buffer_count];
     [m_metalCmdQueue retain];
 
     /* Create shaders for blit operation. */
diff --git a/source/blender/editors/screen/glutil.c b/source/blender/editors/screen/glutil.c
index 4382fd3d1c2..dc5a9885e16 100644
--- a/source/blender/editors/screen/glutil.c
+++ b/source/blender/editors/screen/glutil.c
@@ -26,6 +26,7 @@
 #include "GPU_texture.h"
 
 #ifdef __APPLE__
+#  include "GPU_context.h"
 #  include "GPU_state.h"
 #endif
 
@@ -281,7 +282,9 @@ void immDrawPixelsTexTiled_scaling_clipping(IMMDrawPixelsTexState *state,
        * This doesn't seem to be too slow,
        * but still would be nice to have fast and nice solution. */
 #ifdef __APPLE__
-      GPU_flush();
+      if (GPU_type_matches_ex(GPU_DEVICE_ANY, GPU_OS_MAC, GPU_DRIVER_ANY, GPU_BACKEND_OPENGL)) {
+        GPU_flush();
+      }
 #endif
     }
   }
diff --git a/source/blender/gpu/metal/mtl_command_buffer.mm b/source/blender/gpu/metal/mtl_command_buffer.mm
index b78540241ad..d9dd14392c9 100644
--- a/source/blender/gpu/metal/mtl_command_buffer.mm
+++ b/source/blender/gpu/metal/mtl_command_buffer.mm
@@ -8,6 +8,8 @@
 #include "mtl_debug.hh"
 #include "mtl_framebuffer.hh"
 
+#include "intern/GHOST_ContextCGL.h"
+
 #include <fstream>
 
 using namespace blender;
@@ -45,9 +47,15 @@ id<MTLCommandBuffer> MTLCommandBufferManager::ensure_begin()
   if (active_command_buffer_ == nil) {
 
     /* Verify number of active command buffers is below limit.
-     * Exceeding this limit will mean we either have a leak/GPU hang
-     * or we should increase the command buffer limit during MTLQueue creation */
-    BLI_assert(MTLCommandBufferManager::num_active_cmd_bufs < MTL_MAX_COMMAND_BUFFERS);
+     * Exceeding this limit will mean we either have a command buffer leak/GPU hang
+     * or we should increase the command buffer limit during MTLQueue creation.
+     * Excessive command buffers can also be caused by frequent GPUContext switches, which cause
+     * the GPU pipeline to flush. This is common during indirect light baking operations.
+     *
+     * NOTE: We currently stall until completion of GPU work upon ::submit if we have reached the
+     * in-flight command buffer limit. */
+    BLI_assert(MTLCommandBufferManager::num_active_cmd_bufs <
+               GHOST_ContextCGL::max_command_buffer_count);
 
     if (G.debug & G_DEBUG_GPU) {
       /* Debug: Enable Advanced Errors for GPU work execution. */
@@ -137,6 +145,19 @@ bool MTLCommandBufferManager::submit(bool wait)
   /* Submit command buffer to GPU. */
   [active_command_buffer_ commit];
 
+  /* If we have too many active command buffers in flight, wait until completed to avoid running
+   * out. We can increase */
+  if (MTLCommandBufferManager::num_active_cmd_bufs >=
+      (GHOST_ContextCGL::max_command_buffer_count - 1)) {
+    wait = true;
+    MTL_LOG_WARNING(
+        "Maximum number of command buffers in flight. Host will wait until GPU work has "
+        "completed. Consider increasing GHOST_ContextCGL::max_command_buffer_count or reducing "
+        "work fragmentation to better utilise system hardware. Command buffers are flushed upon "
+        "GPUContext switches, this is the most common cause of excessive command buffer "
+        "generation.\n");
+  }
+
   if (wait || (G.debug & G_DEBUG_GPU)) {
     /* Wait until current GPU work has finished executing. */
     [active_command_buffer_ waitUntilCompleted];
diff --git a/source/blender/gpu/metal/mtl_common.hh b/source/blender/gpu/metal/mtl_common.hh
index 5c322efa3f9..8fa3be16556 100644
--- a/source/blender/gpu/metal/mtl_common.hh
+++ b/source/blender/gpu/metal/mtl_common.hh
@@ -9,7 +9,6 @@
 #define MTL_MAX_DRAWABLES 3
 #define MTL_MAX_SET_BYTES_SIZE 4096
 #define MTL_FORCE_WAIT_IDLE 0
-#define MTL_MAX_COMMAND_BUFFERS 64
 
 /* Number of frames for which we retain in-flight resources such as scratch buffers.
  * Set as number of GPU frames in flight, plus an additional value for extra possible CPU frame. */