[Bf-blender-cvs] [c4fa17c67a7] master: Cycles: More accurate volume stack size calculation

Sergey Sharybin noreply at git.blender.org
Mon Oct 25 09:48:33 CEST 2021


Commit: c4fa17c67a7c28d34abe0db2da8783f4c5ab2a8f
Author: Sergey Sharybin
Date:   Fri Oct 22 14:20:22 2021 +0200
Branches: master
https://developer.blender.org/rBc4fa17c67a7c28d34abe0db2da8783f4c5ab2a8f

Cycles: More accurate volume stack size calculation

The idea is to allow having a lot of non-intersecting volumes without
allocating volume stack to its full size.

With the F11285472 file the memory usage goes from 1400 MiB to 1000
on the RTX6000 card.

The fix makes it so the integrator work memory is allocated after
scene update which has downside of possible less efficient update
when some textures don't fit GPU memory, but has an advantage of
making proper decision and having a clear and consistent internal API.

Fixes memory part of T92014.

Differential Revision: https://developer.blender.org/D12966

===================================================================

M	intern/cycles/render/object.cpp
M	intern/cycles/render/object.h
M	intern/cycles/render/scene.cpp
M	intern/cycles/render/scene.h
M	intern/cycles/render/session.cpp

===================================================================

diff --git a/intern/cycles/render/object.cpp b/intern/cycles/render/object.cpp
index 6d5c537e33d..330ae5ec0fc 100644
--- a/intern/cycles/render/object.cpp
+++ b/intern/cycles/render/object.cpp
@@ -114,6 +114,7 @@ Object::Object() : Node(get_node_type())
   particle_index = 0;
   attr_map_offset = 0;
   bounds = BoundBox::empty;
+  intersects_volume = false;
 }
 
 Object::~Object()
@@ -367,22 +368,6 @@ float Object::compute_volume_step_size() const
   return step_size;
 }
 
-bool Object::check_is_volume() const
-{
-  if (geometry->geometry_type == Geometry::VOLUME) {
-    return true;
-  }
-
-  for (Node *node : get_geometry()->get_used_shaders()) {
-    const Shader *shader = static_cast<const Shader *>(node);
-    if (shader->has_volume) {
-      return true;
-    }
-  }
-
-  return false;
-}
-
 int Object::get_device_index() const
 {
   return index;
@@ -775,12 +760,14 @@ void ObjectManager::device_update_flags(
     }
 
     if (bounds_valid) {
+      object->intersects_volume = false;
       foreach (Object *volume_object, volume_objects) {
         if (object == volume_object) {
           continue;
         }
         if (object->bounds.intersects(volume_object->bounds)) {
           object_flag[object->index] |= SD_OBJECT_INTERSECTS_VOLUME;
+          object->intersects_volume = true;
           break;
         }
       }
diff --git a/intern/cycles/render/object.h b/intern/cycles/render/object.h
index 6920f2c1f1c..ad312835789 100644
--- a/intern/cycles/render/object.h
+++ b/intern/cycles/render/object.h
@@ -75,6 +75,9 @@ class Object : public Node {
 
   NODE_SOCKET_API(float, ao_distance)
 
+  /* Set during device update. */
+  bool intersects_volume;
+
   Object();
   ~Object();
 
@@ -109,13 +112,6 @@ class Object : public Node {
   /* Compute step size from attributes, shaders, transforms. */
   float compute_volume_step_size() const;
 
-  /* Check whether this object requires volume sampling (and hence might require space in the
-   * volume stack).
-   *
-   * Note that this is a naive iteration over shaders, which allows to access information prior
-   * to `scene_update()`. */
-  bool check_is_volume() const;
-
  protected:
   /* Specifies the position of the object in scene->objects and
    * in the device vectors. Gets set in device_update. */
diff --git a/intern/cycles/render/scene.cpp b/intern/cycles/render/scene.cpp
index da6666babe6..fd19ab2efda 100644
--- a/intern/cycles/render/scene.cpp
+++ b/intern/cycles/render/scene.cpp
@@ -360,6 +360,8 @@ void Scene::device_update(Device *device_, Progress &progress)
     return;
 
   if (device->have_error() == false) {
+    dscene.data.volume_stack_size = get_volume_stack_size();
+
     progress.set_status("Updating Device", "Writing constant memory");
     device->const_copy_to("__data", &dscene.data, sizeof(dscene.data));
   }
@@ -527,8 +529,6 @@ void Scene::update_kernel_features()
   const uint max_closures = (params.background) ? get_max_closure_count() : MAX_CLOSURE;
   dscene.data.max_closures = max_closures;
   dscene.data.max_shaders = shaders.size();
-
-  dscene.data.volume_stack_size = get_volume_stack_size();
 }
 
 bool Scene::update(Progress &progress)
@@ -586,6 +586,8 @@ bool Scene::load_kernels(Progress &progress, bool lock_scene)
     scene_lock = thread_scoped_lock(mutex);
   }
 
+  update_kernel_features();
+
   const uint kernel_features = dscene.data.kernel_features;
 
   if (!kernels_loaded || loaded_kernel_features != kernel_features) {
@@ -656,11 +658,26 @@ int Scene::get_volume_stack_size() const
 
   /* Quick non-expensive check. Can over-estimate maximum possible nested level, but does not
    * require expensive calculation during pre-processing. */
+  bool has_volume_object = false;
   for (const Object *object : objects) {
-    if (object->check_is_volume()) {
+    if (!object->get_geometry()->has_volume) {
+      continue;
+    }
+
+    if (object->intersects_volume) {
+      /* Object intersects another volume, assume it's possible to go deeper in the stack. */
+      /* TODO(sergey): This might count nesting twice (A intersects B and B intersects A), but
+       * can't think of a computantially cheap algorithm. Dividing my 2 doesn't work because of
+       * Venn diagram example with 3 circles. */
+      ++volume_stack_size;
+    }
+    else if (!has_volume_object) {
+      /* Allocate space for at least one volume object. */
       ++volume_stack_size;
     }
 
+    has_volume_object = true;
+
     if (volume_stack_size == MAX_VOLUME_STACK_SIZE) {
       break;
     }
@@ -668,6 +685,8 @@ int Scene::get_volume_stack_size() const
 
   volume_stack_size = min(volume_stack_size, MAX_VOLUME_STACK_SIZE);
 
+  VLOG(3) << "Detected required volume stack size " << volume_stack_size;
+
   return volume_stack_size;
 }
 
diff --git a/intern/cycles/render/scene.h b/intern/cycles/render/scene.h
index 001da31e893..de7e3c8a99f 100644
--- a/intern/cycles/render/scene.h
+++ b/intern/cycles/render/scene.h
@@ -270,7 +270,6 @@ class Scene : public NodeOwner {
 
   void enable_update_stats();
 
-  void update_kernel_features();
   bool update(Progress &progress);
 
   bool has_shadow_catcher();
@@ -333,6 +332,7 @@ class Scene : public NodeOwner {
   bool kernels_loaded;
   uint loaded_kernel_features;
 
+  void update_kernel_features();
   bool load_kernels(Progress &progress, bool lock_scene = true);
 
   bool has_shadow_catcher_ = false;
diff --git a/intern/cycles/render/session.cpp b/intern/cycles/render/session.cpp
index a18c61599c2..7fba7ce7552 100644
--- a/intern/cycles/render/session.cpp
+++ b/intern/cycles/render/session.cpp
@@ -539,19 +539,12 @@ bool Session::update_scene(int width, int height)
   Camera *cam = scene->camera;
   cam->set_screen_size(width, height);
 
-  /* First detect which kernel features are used and allocate working memory.
-   * This helps estimate how may device memory is available for the scene and
-   * how much we need to allocate on the host instead. */
-  scene->update_kernel_features();
+  const bool scene_update_result = scene->update(progress);
 
   path_trace_->load_kernels();
   path_trace_->alloc_work_memory();
 
-  if (scene->update(progress)) {
-    return true;
-  }
-
-  return false;
+  return scene_update_result;
 }
 
 static string status_append(const string &status, const string &suffix)



More information about the Bf-blender-cvs mailing list