[Bf-blender-cvs] [2dc51fccb83] master: Fix T101787, T102786. Cycles: Improved out-of-memory messaging on Metal

Michael Jones noreply at git.blender.org
Wed Dec 7 14:56:24 CET 2022


Commit: 2dc51fccb8387467e8a012b07ab148078e7c9e50
Author: Michael Jones
Date:   Wed Dec 7 13:28:59 2022 +0000
Branches: master
https://developer.blender.org/rB2dc51fccb8387467e8a012b07ab148078e7c9e50

Fix T101787, T102786. Cycles: Improved out-of-memory messaging on Metal

This patch adds a new `max_working_set_exceeded()` check on Metal so that we can display a "System is out of GPU memory" message to the user. Without this, we get obtuse "CommandBuffer failed" errors at render time due to exceeding the size limit of resident resources.

Likely fix for T101787 & T102786.

Reviewed By: brecht

Differential Revision: https://developer.blender.org/D16713

===================================================================

M	intern/cycles/device/metal/device_impl.h
M	intern/cycles/device/metal/device_impl.mm

===================================================================

diff --git a/intern/cycles/device/metal/device_impl.h b/intern/cycles/device/metal/device_impl.h
index 99e60d3a788..e57b8628023 100644
--- a/intern/cycles/device/metal/device_impl.h
+++ b/intern/cycles/device/metal/device_impl.h
@@ -117,6 +117,8 @@ class MetalDevice : public Device {
   /* ------------------------------------------------------------------ */
   /* low-level memory management */
 
+  bool max_working_set_exceeded(size_t safety_margin = 8 * 1024 * 1024) const;
+
   MetalMem *generic_alloc(device_memory &mem);
 
   void generic_copy_to(device_memory &mem);
diff --git a/intern/cycles/device/metal/device_impl.mm b/intern/cycles/device/metal/device_impl.mm
index 604abc2be1a..24836e88755 100644
--- a/intern/cycles/device/metal/device_impl.mm
+++ b/intern/cycles/device/metal/device_impl.mm
@@ -446,6 +446,14 @@ void MetalDevice::erase_allocation(device_memory &mem)
   }
 }
 
+bool MetalDevice::max_working_set_exceeded(size_t safety_margin) const
+{
+  /* We're allowed to allocate beyond the safe working set size, but then if all resources are made
+   * resident we will get command buffer failures at render time. */
+  size_t available = [mtlDevice recommendedMaxWorkingSetSize] - safety_margin;
+  return (stats.mem_used > available);
+}
+
 MetalDevice::MetalMem *MetalDevice::generic_alloc(device_memory &mem)
 {
   size_t size = mem.memory_size();
@@ -523,6 +531,11 @@ MetalDevice::MetalMem *MetalDevice::generic_alloc(device_memory &mem)
     mmem->use_UMA = false;
   }
 
+  if (max_working_set_exceeded()) {
+    set_error("System is out of GPU memory");
+    return nullptr;
+  }
+
   return mmem;
 }
 
@@ -921,9 +934,8 @@ void MetalDevice::tex_alloc(device_texture &mem)
               << string_human_readable_size(mem.memory_size()) << ")";
 
     mtlTexture = [mtlDevice newTextureWithDescriptor:desc];
-    assert(mtlTexture);
-
     if (!mtlTexture) {
+      set_error("System is out of GPU memory");
       return;
     }
 
@@ -955,7 +967,10 @@ void MetalDevice::tex_alloc(device_texture &mem)
               << string_human_readable_size(mem.memory_size()) << ")";
 
     mtlTexture = [mtlDevice newTextureWithDescriptor:desc];
-    assert(mtlTexture);
+    if (!mtlTexture) {
+      set_error("System is out of GPU memory");
+      return;
+    }
 
     [mtlTexture replaceRegion:MTLRegionMake2D(0, 0, mem.data_width, mem.data_height)
                   mipmapLevel:0
@@ -1017,6 +1032,10 @@ void MetalDevice::tex_alloc(device_texture &mem)
   need_texture_info = true;
 
   texture_info[slot].data = uint64_t(slot) | (sampler_index << 32);
+
+  if (max_working_set_exceeded()) {
+    set_error("System is out of GPU memory");
+  }
 }
 
 void MetalDevice::tex_free(device_texture &mem)
@@ -1077,6 +1096,10 @@ void MetalDevice::build_bvh(BVH *bvh, Progress &progress, bool refit)
       }
     }
   }
+
+  if (max_working_set_exceeded()) {
+    set_error("System is out of GPU memory");
+  }
 }
 
 CCL_NAMESPACE_END



More information about the Bf-blender-cvs mailing list