[Bf-blender-cvs] [19e0b60f3e1] master: Cycles: MetalDeviceQueue - capture of multiple dispatches, and some tidying
Michael Jones
noreply at git.blender.org
Mon Jun 13 14:42:08 CEST 2022
Commit: 19e0b60f3e1270a34b52d7829169ab8af6c816cb
Author: Michael Jones
Date: Mon Jun 13 12:33:43 2022 +0100
Branches: master
https://developer.blender.org/rB19e0b60f3e1270a34b52d7829169ab8af6c816cb
Cycles: MetalDeviceQueue - capture of multiple dispatches, and some tidying
This patch adds a new mode of gpu capture (env var `CYCLES_DEBUG_METAL_CAPTURE_SAMPLES`) to capture a block of dispatches between "reset" calls. It also fixes member data naming inconsistencies and adds some missing OS version checks.
Screenshot showing .gputrace capture in Xcode 14.0 beta (using `CYCLES_DEBUG_METAL_CAPTURE_SAMPLES="1"` and `CYCLES_DEBUG_METAL_CAPTURE_LIMIT="10"`):
{F13155703}
Reviewed By: sergey, brecht
Differential Revision: https://developer.blender.org/D15179
===================================================================
M intern/cycles/device/metal/queue.h
M intern/cycles/device/metal/queue.mm
===================================================================
diff --git a/intern/cycles/device/metal/queue.h b/intern/cycles/device/metal/queue.h
index de20514de0b..b0bd487c86d 100644
--- a/intern/cycles/device/metal/queue.h
+++ b/intern/cycles/device/metal/queue.h
@@ -38,45 +38,50 @@ class MetalDeviceQueue : public DeviceQueue {
virtual void copy_from_device(device_memory &mem) override;
protected:
+ void setup_capture();
+ void update_capture(DeviceKernel kernel);
+ void begin_capture();
+ void end_capture();
void prepare_resources(DeviceKernel kernel);
id<MTLComputeCommandEncoder> get_compute_encoder(DeviceKernel kernel);
id<MTLBlitCommandEncoder> get_blit_encoder();
- MetalDevice *metal_device;
- MetalBufferPool temp_buffer_pool;
+ MetalDevice *metal_device_;
+ MetalBufferPool temp_buffer_pool_;
API_AVAILABLE(macos(11.0), ios(14.0))
- MTLCommandBufferDescriptor *command_buffer_desc = nullptr;
- id<MTLDevice> mtlDevice = nil;
- id<MTLCommandQueue> mtlCommandQueue = nil;
- id<MTLCommandBuffer> mtlCommandBuffer = nil;
- id<MTLComputeCommandEncoder> mtlComputeEncoder = nil;
- id<MTLBlitCommandEncoder> mtlBlitEncoder = nil;
+ MTLCommandBufferDescriptor *command_buffer_desc_ = nullptr;
+ id<MTLDevice> mtlDevice_ = nil;
+ id<MTLCommandQueue> mtlCommandQueue_ = nil;
+ id<MTLCommandBuffer> mtlCommandBuffer_ = nil;
+ id<MTLComputeCommandEncoder> mtlComputeEncoder_ = nil;
+ id<MTLBlitCommandEncoder> mtlBlitEncoder_ = nil;
API_AVAILABLE(macos(10.14), ios(14.0))
- id<MTLSharedEvent> shared_event = nil;
+ id<MTLSharedEvent> shared_event_ = nil;
API_AVAILABLE(macos(10.14), ios(14.0))
- MTLSharedEventListener *shared_event_listener = nil;
+ MTLSharedEventListener *shared_event_listener_ = nil;
- dispatch_queue_t event_queue;
- dispatch_semaphore_t wait_semaphore;
+ dispatch_queue_t event_queue_;
+ dispatch_semaphore_t wait_semaphore_;
struct CopyBack {
void *host_pointer;
void *gpu_mem;
uint64_t size;
};
- std::vector<CopyBack> copy_back_mem;
+ std::vector<CopyBack> copy_back_mem_;
- uint64_t shared_event_id;
- uint64_t command_buffers_submitted = 0;
- uint64_t command_buffers_completed = 0;
- Stats &stats;
+ uint64_t shared_event_id_;
+ uint64_t command_buffers_submitted_ = 0;
+ uint64_t command_buffers_completed_ = 0;
+ Stats &stats_;
void close_compute_encoder();
void close_blit_encoder();
- bool verbose_tracing = false;
+ bool verbose_tracing_ = false;
+ bool label_command_encoders_ = false;
/* Per-kernel profiling (see CYCLES_METAL_PROFILING). */
@@ -85,28 +90,30 @@ class MetalDeviceQueue : public DeviceQueue {
int work_size;
uint64_t timing_id;
};
- std::vector<TimingData> command_encoder_labels;
- id<MTLSharedEvent> timing_shared_event = nil;
- uint64_t timing_shared_event_id;
- uint64_t command_buffer_start_timing_id;
+ std::vector<TimingData> command_encoder_labels_;
+ API_AVAILABLE(macos(10.14), ios(14.0))
+ id<MTLSharedEvent> timing_shared_event_ = nil;
+ uint64_t timing_shared_event_id_;
+ uint64_t command_buffer_start_timing_id_;
struct TimingStats {
double total_time = 0.0;
uint64_t total_work_size = 0;
uint64_t num_dispatches = 0;
};
- TimingStats timing_stats[DEVICE_KERNEL_NUM];
- double last_completion_time = 0.0;
+ TimingStats timing_stats_[DEVICE_KERNEL_NUM];
+ double last_completion_time_ = 0.0;
/* .gputrace capture (see CYCLES_DEBUG_METAL_CAPTURE_...). */
- id<MTLCaptureScope> mtlCaptureScope = nil;
- DeviceKernel capture_kernel;
- int capture_dispatch = 0;
- int capture_dispatch_counter = 0;
- bool is_capturing = false;
- bool is_capturing_to_disk = false;
- bool has_captured_to_disk = false;
+ id<MTLCaptureScope> mtlCaptureScope_ = nil;
+ DeviceKernel capture_kernel_;
+ int capture_dispatch_counter_ = 0;
+ bool capture_samples_ = false;
+ int capture_reset_counter_ = 0;
+ bool is_capturing_ = false;
+ bool is_capturing_to_disk_ = false;
+ bool has_captured_to_disk_ = false;
};
CCL_NAMESPACE_END
diff --git a/intern/cycles/device/metal/queue.mm b/intern/cycles/device/metal/queue.mm
index 9d8625e1455..0e260886abb 100644
--- a/intern/cycles/device/metal/queue.mm
+++ b/intern/cycles/device/metal/queue.mm
@@ -17,79 +17,180 @@ CCL_NAMESPACE_BEGIN
/* MetalDeviceQueue */
MetalDeviceQueue::MetalDeviceQueue(MetalDevice *device)
- : DeviceQueue(device), metal_device(device), stats(device->stats)
+ : DeviceQueue(device), metal_device_(device), stats_(device->stats)
{
if (@available(macos 11.0, *)) {
- command_buffer_desc = [[MTLCommandBufferDescriptor alloc] init];
- command_buffer_desc.errorOptions = MTLCommandBufferErrorOptionEncoderExecutionStatus;
+ command_buffer_desc_ = [[MTLCommandBufferDescriptor alloc] init];
+ command_buffer_desc_.errorOptions = MTLCommandBufferErrorOptionEncoderExecutionStatus;
}
- mtlDevice = device->mtlDevice;
- mtlCommandQueue = [mtlDevice newCommandQueue];
+ mtlDevice_ = device->mtlDevice;
+ mtlCommandQueue_ = [mtlDevice_ newCommandQueue];
if (@available(macos 10.14, *)) {
- shared_event = [mtlDevice newSharedEvent];
- shared_event_id = 1;
+ shared_event_ = [mtlDevice_ newSharedEvent];
+ shared_event_id_ = 1;
/* Shareable event listener */
- event_queue = dispatch_queue_create("com.cycles.metal.event_queue", NULL);
- shared_event_listener = [[MTLSharedEventListener alloc] initWithDispatchQueue:event_queue];
+ event_queue_ = dispatch_queue_create("com.cycles.metal.event_queue", NULL);
+ shared_event_listener_ = [[MTLSharedEventListener alloc] initWithDispatchQueue:event_queue_];
}
- wait_semaphore = dispatch_semaphore_create(0);
+ wait_semaphore_ = dispatch_semaphore_create(0);
if (@available(macos 10.14, *)) {
if (getenv("CYCLES_METAL_PROFILING")) {
/* Enable per-kernel timing breakdown (shown at end of render). */
- timing_shared_event = [mtlDevice newSharedEvent];
+ timing_shared_event_ = [mtlDevice_ newSharedEvent];
+ label_command_encoders_ = true;
}
if (getenv("CYCLES_METAL_DEBUG")) {
/* Enable very verbose tracing (shows every dispatch). */
- verbose_tracing = true;
+ verbose_tracing_ = true;
+ label_command_encoders_ = true;
}
- timing_shared_event_id = 1;
+ timing_shared_event_id_ = 1;
}
- capture_kernel = DeviceKernel(-1);
+ setup_capture();
+}
+
+void MetalDeviceQueue::setup_capture()
+{
+ capture_kernel_ = DeviceKernel(-1);
+
if (auto capture_kernel_str = getenv("CYCLES_DEBUG_METAL_CAPTURE_KERNEL")) {
- /* Enable .gputrace capture for the specified DeviceKernel. */
- MTLCaptureManager *captureManager = [MTLCaptureManager sharedCaptureManager];
- mtlCaptureScope = [captureManager newCaptureScopeWithDevice:mtlDevice];
- mtlCaptureScope.label = [NSString stringWithFormat:@"Cycles kernel dispatch"];
- [captureManager setDefaultCaptureScope:mtlCaptureScope];
+ /* CYCLES_DEBUG_METAL_CAPTURE_KERNEL captures a single dispatch of the specified kernel. */
+ capture_kernel_ = DeviceKernel(atoi(capture_kernel_str));
+ printf("Capture kernel: %d = %s\n", capture_kernel_, device_kernel_as_string(capture_kernel_));
- capture_dispatch = -1;
+ capture_dispatch_counter_ = 0;
if (auto capture_dispatch_str = getenv("CYCLES_DEBUG_METAL_CAPTURE_DISPATCH")) {
- capture_dispatch = atoi(capture_dispatch_str);
- capture_dispatch_counter = 0;
+ capture_dispatch_counter_ = atoi(capture_dispatch_str);
+
+ printf("Capture dispatch number %d\n", capture_dispatch_counter_);
+ }
+ }
+ else if (auto capture_samples_str = getenv("CYCLES_DEBUG_METAL_CAPTURE_SAMPLES")) {
+ /* CYCLES_DEBUG_METAL_CAPTURE_SAMPLES captures a block of dispatches from reset#(N) to
+ * reset#(N+1). */
+ capture_samples_ = true;
+ capture_reset_counter_ = atoi(capture_samples_str);
+
+ capture_dispatch_counter_ = INT_MAX;
+ if (auto capture_limit_str = getenv("CYCLES_DEBUG_METAL_CAPTURE_LIMIT")) {
+ /* CYCLES_DEBUG_METAL_CAPTURE_LIMIT sets the maximum number of dispatches to capture. */
+ capture_dispatch_counter_ = atoi(capture_limit_str);
}
- capture_kernel = DeviceKernel(atoi(capture_kernel_str));
- printf("Capture kernel: %d = %s\n", capture_kernel, device_kernel_as_string(capture_kernel));
+ printf("Capturing sample block %d (dispatch limit: %d)\n",
+ capture_reset_counter_,
+ capture_dispatch_counter_);
+ }
+ else {
+ /* No capturing requested. */
+ return;
+ }
- if (auto capture_url = getenv("CYCLES_DEBUG_METAL_CAPTURE_URL")) {
- if (@available(macos 10.15, *)) {
- if ([captureManager supportsDestination:MTLCaptureDestinationGPUTraceDocument]) {
+ /* Enable .gputrace capture for the specified DeviceKernel. */
+ MTLCaptureManager *captureManager = [MTLCaptureManager sharedCaptureManager];
+ mtlCaptureScope_ = [captureManager newCaptureScopeWithDevice:mtlDevice_];
+ mtlCaptureScope_.label = [NSString stringWithFormat:@"Cycles kernel dispatch"];
+ [captureManager setDefaultCaptureScope:mtlCaptureScope_];
- MTLCaptureDescriptor *captureDescriptor = [[MTLCaptureDescriptor alloc] init];
- captureDescriptor.captureObject = mtlCaptureScope;
- captureDescriptor.destination = MTLCaptureDestinationGPUTraceDocument;
- captureDescriptor.outputURL = [NSURL fileURLWithPath:@(capture_url)];
+ label_command_encoders_ = true;
- NSError *error;
- if (![captureManager startCaptureWithDescriptor:captureDescriptor error:&error]) {
- NSString *err = [error localizedDescription];
- printf("Start capture failed: %s\n", [err UTF8String]);
- }
- else {
- printf("Capture started (URL: %s)\n", capture_url);
- is_capturing_to_disk = true;
- }
+ if (auto capture_url = getenv("CYCLES_DEBUG_METAL_CAPTURE_URL")) {
+ if (@available(macos 10.15, *)) {
+ if ([captureManager supportsDestination:MTLCaptureDestinationGPUTraceDocument]) {
+
+ MTLCaptureDescriptor *captureDescriptor = [[MTLCaptureDescriptor alloc] init];
+ captureDescriptor.captureObject = mtlCaptureScope_;
+ captureDescriptor.desti
@@ Diff output truncated at 10240 characters. @@
More information about the Bf-blender-cvs
mailing list