[Bf-blender-cvs] [19e0b60f3e1] master: Cycles: MetalDeviceQueue - capture of multiple dispatches, and some tidying

Michael Jones noreply at git.blender.org
Mon Jun 13 14:42:08 CEST 2022


Commit: 19e0b60f3e1270a34b52d7829169ab8af6c816cb
Author: Michael Jones
Date:   Mon Jun 13 12:33:43 2022 +0100
Branches: master
https://developer.blender.org/rB19e0b60f3e1270a34b52d7829169ab8af6c816cb

Cycles: MetalDeviceQueue - capture of multiple dispatches, and some tidying

This patch adds a new mode of gpu capture (env var `CYCLES_DEBUG_METAL_CAPTURE_SAMPLES`) to capture a block of dispatches between "reset" calls. It also fixes member data naming inconsistencies and adds some missing OS version checks.

Screenshot showing .gputrace capture in Xcode 14.0 beta (using `CYCLES_DEBUG_METAL_CAPTURE_SAMPLES="1"` and `CYCLES_DEBUG_METAL_CAPTURE_LIMIT="10"`):

{F13155703}

Reviewed By: sergey, brecht

Differential Revision: https://developer.blender.org/D15179

===================================================================

M	intern/cycles/device/metal/queue.h
M	intern/cycles/device/metal/queue.mm

===================================================================

diff --git a/intern/cycles/device/metal/queue.h b/intern/cycles/device/metal/queue.h
index de20514de0b..b0bd487c86d 100644
--- a/intern/cycles/device/metal/queue.h
+++ b/intern/cycles/device/metal/queue.h
@@ -38,45 +38,50 @@ class MetalDeviceQueue : public DeviceQueue {
   virtual void copy_from_device(device_memory &mem) override;
 
  protected:
+  void setup_capture();
+  void update_capture(DeviceKernel kernel);
+  void begin_capture();
+  void end_capture();
   void prepare_resources(DeviceKernel kernel);
 
   id<MTLComputeCommandEncoder> get_compute_encoder(DeviceKernel kernel);
   id<MTLBlitCommandEncoder> get_blit_encoder();
 
-  MetalDevice *metal_device;
-  MetalBufferPool temp_buffer_pool;
+  MetalDevice *metal_device_;
+  MetalBufferPool temp_buffer_pool_;
 
   API_AVAILABLE(macos(11.0), ios(14.0))
-  MTLCommandBufferDescriptor *command_buffer_desc = nullptr;
-  id<MTLDevice> mtlDevice = nil;
-  id<MTLCommandQueue> mtlCommandQueue = nil;
-  id<MTLCommandBuffer> mtlCommandBuffer = nil;
-  id<MTLComputeCommandEncoder> mtlComputeEncoder = nil;
-  id<MTLBlitCommandEncoder> mtlBlitEncoder = nil;
+  MTLCommandBufferDescriptor *command_buffer_desc_ = nullptr;
+  id<MTLDevice> mtlDevice_ = nil;
+  id<MTLCommandQueue> mtlCommandQueue_ = nil;
+  id<MTLCommandBuffer> mtlCommandBuffer_ = nil;
+  id<MTLComputeCommandEncoder> mtlComputeEncoder_ = nil;
+  id<MTLBlitCommandEncoder> mtlBlitEncoder_ = nil;
   API_AVAILABLE(macos(10.14), ios(14.0))
-  id<MTLSharedEvent> shared_event = nil;
+  id<MTLSharedEvent> shared_event_ = nil;
   API_AVAILABLE(macos(10.14), ios(14.0))
-  MTLSharedEventListener *shared_event_listener = nil;
+  MTLSharedEventListener *shared_event_listener_ = nil;
 
-  dispatch_queue_t event_queue;
-  dispatch_semaphore_t wait_semaphore;
+  dispatch_queue_t event_queue_;
+  dispatch_semaphore_t wait_semaphore_;
 
   struct CopyBack {
     void *host_pointer;
     void *gpu_mem;
     uint64_t size;
   };
-  std::vector<CopyBack> copy_back_mem;
+  std::vector<CopyBack> copy_back_mem_;
 
-  uint64_t shared_event_id;
-  uint64_t command_buffers_submitted = 0;
-  uint64_t command_buffers_completed = 0;
-  Stats &stats;
+  uint64_t shared_event_id_;
+  uint64_t command_buffers_submitted_ = 0;
+  uint64_t command_buffers_completed_ = 0;
+  Stats &stats_;
 
   void close_compute_encoder();
   void close_blit_encoder();
 
-  bool verbose_tracing = false;
+  bool verbose_tracing_ = false;
+  bool label_command_encoders_ = false;
 
   /* Per-kernel profiling (see CYCLES_METAL_PROFILING). */
 
@@ -85,28 +90,30 @@ class MetalDeviceQueue : public DeviceQueue {
     int work_size;
     uint64_t timing_id;
   };
-  std::vector<TimingData> command_encoder_labels;
-  id<MTLSharedEvent> timing_shared_event = nil;
-  uint64_t timing_shared_event_id;
-  uint64_t command_buffer_start_timing_id;
+  std::vector<TimingData> command_encoder_labels_;
+  API_AVAILABLE(macos(10.14), ios(14.0))
+  id<MTLSharedEvent> timing_shared_event_ = nil;
+  uint64_t timing_shared_event_id_;
+  uint64_t command_buffer_start_timing_id_;
 
   struct TimingStats {
     double total_time = 0.0;
     uint64_t total_work_size = 0;
     uint64_t num_dispatches = 0;
   };
-  TimingStats timing_stats[DEVICE_KERNEL_NUM];
-  double last_completion_time = 0.0;
+  TimingStats timing_stats_[DEVICE_KERNEL_NUM];
+  double last_completion_time_ = 0.0;
 
   /* .gputrace capture (see CYCLES_DEBUG_METAL_CAPTURE_...). */
 
-  id<MTLCaptureScope> mtlCaptureScope = nil;
-  DeviceKernel capture_kernel;
-  int capture_dispatch = 0;
-  int capture_dispatch_counter = 0;
-  bool is_capturing = false;
-  bool is_capturing_to_disk = false;
-  bool has_captured_to_disk = false;
+  id<MTLCaptureScope> mtlCaptureScope_ = nil;
+  DeviceKernel capture_kernel_;
+  int capture_dispatch_counter_ = 0;
+  bool capture_samples_ = false;
+  int capture_reset_counter_ = 0;
+  bool is_capturing_ = false;
+  bool is_capturing_to_disk_ = false;
+  bool has_captured_to_disk_ = false;
 };
 
 CCL_NAMESPACE_END
diff --git a/intern/cycles/device/metal/queue.mm b/intern/cycles/device/metal/queue.mm
index 9d8625e1455..0e260886abb 100644
--- a/intern/cycles/device/metal/queue.mm
+++ b/intern/cycles/device/metal/queue.mm
@@ -17,79 +17,180 @@ CCL_NAMESPACE_BEGIN
 /* MetalDeviceQueue */
 
 MetalDeviceQueue::MetalDeviceQueue(MetalDevice *device)
-    : DeviceQueue(device), metal_device(device), stats(device->stats)
+    : DeviceQueue(device), metal_device_(device), stats_(device->stats)
 {
   if (@available(macos 11.0, *)) {
-    command_buffer_desc = [[MTLCommandBufferDescriptor alloc] init];
-    command_buffer_desc.errorOptions = MTLCommandBufferErrorOptionEncoderExecutionStatus;
+    command_buffer_desc_ = [[MTLCommandBufferDescriptor alloc] init];
+    command_buffer_desc_.errorOptions = MTLCommandBufferErrorOptionEncoderExecutionStatus;
   }
 
-  mtlDevice = device->mtlDevice;
-  mtlCommandQueue = [mtlDevice newCommandQueue];
+  mtlDevice_ = device->mtlDevice;
+  mtlCommandQueue_ = [mtlDevice_ newCommandQueue];
 
   if (@available(macos 10.14, *)) {
-    shared_event = [mtlDevice newSharedEvent];
-    shared_event_id = 1;
+    shared_event_ = [mtlDevice_ newSharedEvent];
+    shared_event_id_ = 1;
 
     /* Shareable event listener */
-    event_queue = dispatch_queue_create("com.cycles.metal.event_queue", NULL);
-    shared_event_listener = [[MTLSharedEventListener alloc] initWithDispatchQueue:event_queue];
+    event_queue_ = dispatch_queue_create("com.cycles.metal.event_queue", NULL);
+    shared_event_listener_ = [[MTLSharedEventListener alloc] initWithDispatchQueue:event_queue_];
   }
 
-  wait_semaphore = dispatch_semaphore_create(0);
+  wait_semaphore_ = dispatch_semaphore_create(0);
 
   if (@available(macos 10.14, *)) {
     if (getenv("CYCLES_METAL_PROFILING")) {
       /* Enable per-kernel timing breakdown (shown at end of render). */
-      timing_shared_event = [mtlDevice newSharedEvent];
+      timing_shared_event_ = [mtlDevice_ newSharedEvent];
+      label_command_encoders_ = true;
     }
     if (getenv("CYCLES_METAL_DEBUG")) {
       /* Enable very verbose tracing (shows every dispatch). */
-      verbose_tracing = true;
+      verbose_tracing_ = true;
+      label_command_encoders_ = true;
     }
-    timing_shared_event_id = 1;
+    timing_shared_event_id_ = 1;
   }
 
-  capture_kernel = DeviceKernel(-1);
+  setup_capture();
+}
+
+void MetalDeviceQueue::setup_capture()
+{
+  capture_kernel_ = DeviceKernel(-1);
+
   if (auto capture_kernel_str = getenv("CYCLES_DEBUG_METAL_CAPTURE_KERNEL")) {
-    /* Enable .gputrace capture for the specified DeviceKernel. */
-    MTLCaptureManager *captureManager = [MTLCaptureManager sharedCaptureManager];
-    mtlCaptureScope = [captureManager newCaptureScopeWithDevice:mtlDevice];
-    mtlCaptureScope.label = [NSString stringWithFormat:@"Cycles kernel dispatch"];
-    [captureManager setDefaultCaptureScope:mtlCaptureScope];
+    /* CYCLES_DEBUG_METAL_CAPTURE_KERNEL captures a single dispatch of the specified kernel. */
+    capture_kernel_ = DeviceKernel(atoi(capture_kernel_str));
+    printf("Capture kernel: %d = %s\n", capture_kernel_, device_kernel_as_string(capture_kernel_));
 
-    capture_dispatch = -1;
+    capture_dispatch_counter_ = 0;
     if (auto capture_dispatch_str = getenv("CYCLES_DEBUG_METAL_CAPTURE_DISPATCH")) {
-      capture_dispatch = atoi(capture_dispatch_str);
-      capture_dispatch_counter = 0;
+      capture_dispatch_counter_ = atoi(capture_dispatch_str);
+
+      printf("Capture dispatch number %d\n", capture_dispatch_counter_);
+    }
+  }
+  else if (auto capture_samples_str = getenv("CYCLES_DEBUG_METAL_CAPTURE_SAMPLES")) {
+    /* CYCLES_DEBUG_METAL_CAPTURE_SAMPLES captures a block of dispatches from reset#(N) to
+     * reset#(N+1). */
+    capture_samples_ = true;
+    capture_reset_counter_ = atoi(capture_samples_str);
+
+    capture_dispatch_counter_ = INT_MAX;
+    if (auto capture_limit_str = getenv("CYCLES_DEBUG_METAL_CAPTURE_LIMIT")) {
+      /* CYCLES_DEBUG_METAL_CAPTURE_LIMIT sets the maximum number of dispatches to capture. */
+      capture_dispatch_counter_ = atoi(capture_limit_str);
     }
 
-    capture_kernel = DeviceKernel(atoi(capture_kernel_str));
-    printf("Capture kernel: %d = %s\n", capture_kernel, device_kernel_as_string(capture_kernel));
+    printf("Capturing sample block %d (dispatch limit: %d)\n",
+           capture_reset_counter_,
+           capture_dispatch_counter_);
+  }
+  else {
+    /* No capturing requested. */
+    return;
+  }
 
-    if (auto capture_url = getenv("CYCLES_DEBUG_METAL_CAPTURE_URL")) {
-      if (@available(macos 10.15, *)) {
-        if ([captureManager supportsDestination:MTLCaptureDestinationGPUTraceDocument]) {
+  /* Enable .gputrace capture for the specified DeviceKernel. */
+  MTLCaptureManager *captureManager = [MTLCaptureManager sharedCaptureManager];
+  mtlCaptureScope_ = [captureManager newCaptureScopeWithDevice:mtlDevice_];
+  mtlCaptureScope_.label = [NSString stringWithFormat:@"Cycles kernel dispatch"];
+  [captureManager setDefaultCaptureScope:mtlCaptureScope_];
 
-          MTLCaptureDescriptor *captureDescriptor = [[MTLCaptureDescriptor alloc] init];
-          captureDescriptor.captureObject = mtlCaptureScope;
-          captureDescriptor.destination = MTLCaptureDestinationGPUTraceDocument;
-          captureDescriptor.outputURL = [NSURL fileURLWithPath:@(capture_url)];
+  label_command_encoders_ = true;
 
-          NSError *error;
-          if (![captureManager startCaptureWithDescriptor:captureDescriptor error:&error]) {
-            NSString *err = [error localizedDescription];
-            printf("Start capture failed: %s\n", [err UTF8String]);
-          }
-          else {
-            printf("Capture started (URL: %s)\n", capture_url);
-            is_capturing_to_disk = true;
-          }
+  if (auto capture_url = getenv("CYCLES_DEBUG_METAL_CAPTURE_URL")) {
+    if (@available(macos 10.15, *)) {
+      if ([captureManager supportsDestination:MTLCaptureDestinationGPUTraceDocument]) {
+
+        MTLCaptureDescriptor *captureDescriptor = [[MTLCaptureDescriptor alloc] init];
+        captureDescriptor.captureObject = mtlCaptureScope_;
+        captureDescriptor.desti

@@ Diff output truncated at 10240 characters. @@



More information about the Bf-blender-cvs mailing list