[Bf-blender-cvs] [73a2c79c074] master: Functions: free memory of unused sockets earlier

Sun Jan 8 21:09:50 CET 2023

Commit: 73a2c79c074ae0e3f5cbc970df0d502672aa2986
Author: Jacques Lucke
Date:   Sun Jan 8 21:09:33 2023 +0100
Branches: master
https://developer.blender.org/rB73a2c79c074ae0e3f5cbc970df0d502672aa2986

Functions: free memory of unused sockets earlier

During geometry nodes evaluation some sockets can be determined
to be unused, for example based on the condition input in a switch node.
Once a socket is determined to be unused, that information has to be
propagated backwards through the tree to free any memory that may
have been reserved for those sockets already. This is happening before
this commit already, but in a less ideal way.

Determining that sockets are unused early is good because it helps with
memory reuse and avoids copy-on-write copies caused by shared data.
Now, nodes that are scheduled because an output became unused have
priority over nodes scheduled for other reasons.

===================================================================

M	source/blender/functions/intern/lazy_function_graph_executor.cc

===================================================================

diff --git a/source/blender/functions/intern/lazy_function_graph_executor.cc b/source/blender/functions/intern/lazy_function_graph_executor.cc
index c7688f61460..4270885fc98 100644
--- a/source/blender/functions/intern/lazy_function_graph_executor.cc
+++ b/source/blender/functions/intern/lazy_function_graph_executor.cc
@@ -206,6 +206,44 @@ struct LockedNode {
 class Executor;
 class GraphExecutorLFParams;
 
+/**
+ * Keeps track of nodes that are currently scheduled on a thread. A node can only be scheduled by
+ * one thread at the same time.
+ */
+struct ScheduledNodes {
+ private:
+  /** Use two stacks of scheduled nodes for different priorities. */
+  Vector<const FunctionNode *> priority_;
+  Vector<const FunctionNode *> normal_;
+
+ public:
+  void schedule(const FunctionNode &node, const bool is_priority)
+  {
+    if (is_priority) {
+      this->priority_.append(&node);
+    }
+    else {
+      this->normal_.append(&node);
+    }
+  }
+
+  const FunctionNode *pop_next_node()
+  {
+    if (!this->priority_.is_empty()) {
+      return this->priority_.pop_last();
+    }
+    if (!this->normal_.is_empty()) {
+      return this->normal_.pop_last();
+    }
+    return nullptr;
+  }
+
+  bool is_empty() const
+  {
+    return this->priority_.is_empty() && this->normal_.is_empty();
+  }
+};
+
 struct CurrentTask {
   /**
    * Mutex used to protect #scheduled_nodes when the executor uses multi-threading.
@@ -214,7 +252,7 @@ struct CurrentTask {
   /**
    * Nodes that have been scheduled to execute next.
    */
-  Vector<const FunctionNode *> scheduled_nodes;
+  ScheduledNodes scheduled_nodes;
   /**
    * Makes it cheaper to check if there are any scheduled nodes because it avoids locking the
    * mutex.
@@ -330,7 +368,7 @@ class Executor {
       this->schedule_side_effect_nodes(side_effect_nodes, current_task);
     }
 
-    this->schedule_newly_requested_outputs(current_task);
+    this->schedule_for_new_output_usages(current_task);
     this->forward_newly_provided_inputs(current_task);
 
     this->run_task(current_task);
@@ -394,20 +432,29 @@ class Executor {
     std::destroy_at(&node_state);
   }
 
-  void schedule_newly_requested_outputs(CurrentTask &current_task)
+  /**
+   * When the usage of output values changed, propagate that information backwards.
+   */
+  void schedule_for_new_output_usages(CurrentTask &current_task)
   {
     for (const int graph_output_index : self_.graph_outputs_.index_range()) {
-      if (params_->get_output_usage(graph_output_index) != ValueUsage::Used) {
+      if (params_->output_was_set(graph_output_index)) {
         continue;
       }
-      if (params_->output_was_set(graph_output_index)) {
+      const ValueUsage output_usage = params_->get_output_usage(graph_output_index);
+      if (output_usage == ValueUsage::Maybe) {
         continue;
       }
       const InputSocket &socket = *self_.graph_outputs_[graph_output_index];
       const Node &node = socket.node();
       NodeState &node_state = *node_states_[node.index_in_graph()];
       this->with_locked_node(node, node_state, current_task, [&](LockedNode &locked_node) {
-        this->set_input_required(locked_node, socket);
+        if (output_usage == ValueUsage::Used) {
+          this->set_input_required(locked_node, socket);
+        }
+        else {
+          this->set_input_unused(locked_node, socket);
+        }
       });
     }
   }
@@ -532,7 +579,7 @@ class Executor {
     for (const FunctionNode *node : side_effect_nodes) {
       NodeState &node_state = *node_states_[node->index_in_graph()];
       this->with_locked_node(*node, node_state, current_task, [&](LockedNode &locked_node) {
-        this->schedule_node(locked_node, current_task);
+        this->schedule_node(locked_node, current_task, false);
       });
     }
   }
@@ -603,7 +650,7 @@ class Executor {
         return;
       }
       output_state.usage = ValueUsage::Used;
-      this->schedule_node(locked_node, current_task);
+      this->schedule_node(locked_node, current_task, false);
     });
   }
 
@@ -625,14 +672,16 @@ class Executor {
             params_->set_input_unused(graph_input_index);
           }
           else {
-            this->schedule_node(locked_node, current_task);
+            /* Schedule as priority node. This allows freeing up memory earlier which results in
+             * better memory reuse and less copy-on-write copies caused by shared data. */
+            this->schedule_node(locked_node, current_task, true);
           }
         }
       }
     });
   }
 
-  void schedule_node(LockedNode &locked_node, CurrentTask &current_task)
+  void schedule_node(LockedNode &locked_node, CurrentTask &current_task, const bool is_priority)
   {
     BLI_assert(locked_node.node.is_function());
     switch (locked_node.node_state.schedule_state) {
@@ -641,10 +690,10 @@ class Executor {
         const FunctionNode &node = static_cast<const FunctionNode &>(locked_node.node);
         if (this->use_multi_threading()) {
           std::lock_guard lock{current_task.mutex};
-          current_task.scheduled_nodes.append(&node);
+          current_task.scheduled_nodes.schedule(node, is_priority);
         }
         else {
-          current_task.scheduled_nodes.append(&node);
+          current_task.scheduled_nodes.schedule(node, is_priority);
         }
         current_task.has_scheduled_nodes.store(true, std::memory_order_relaxed);
         break;
@@ -700,12 +749,11 @@ class Executor {
 
   void run_task(CurrentTask &current_task)
   {
-    while (!current_task.scheduled_nodes.is_empty()) {
-      const FunctionNode &node = *current_task.scheduled_nodes.pop_last();
+    while (const FunctionNode *node = current_task.scheduled_nodes.pop_next_node()) {
       if (current_task.scheduled_nodes.is_empty()) {
         current_task.has_scheduled_nodes.store(false, std::memory_order_relaxed);
       }
-      this->run_node_task(node, current_task);
+      this->run_node_task(*node, current_task);
     }
   }
 
@@ -814,7 +862,7 @@ class Executor {
                                         NodeScheduleState::RunningAndRescheduled;
       node_state.schedule_state = NodeScheduleState::NotScheduled;
       if (reschedule_requested && !node_state.node_has_finished) {
-        this->schedule_node(locked_node, current_task);
+        this->schedule_node(locked_node, current_task, false);
       }
     });
   }
@@ -1061,7 +1109,7 @@ class Executor {
           (locked_node.node.is_function() && static_cast<const FunctionNode &>(locked_node.node)
                                                  .function()
                                                  .allow_missing_requested_inputs())) {
-        this->schedule_node(locked_node, current_task);
+        this->schedule_node(locked_node, current_task, false);
       }
     }
   }
@@ -1120,14 +1168,13 @@ class Executor {
   void move_scheduled_nodes_to_task_pool(CurrentTask &current_task)
   {
     BLI_assert(this->use_multi_threading());
-    using FunctionNodeVector = Vector<const FunctionNode *>;
-    FunctionNodeVector *nodes = MEM_new<FunctionNodeVector>(__func__);
+    ScheduledNodes *scheduled_nodes = MEM_new<ScheduledNodes>(__func__);
     {
       std::lock_guard lock{current_task.mutex};
       if (current_task.scheduled_nodes.is_empty()) {
         return;
       }
-      *nodes = std::move(current_task.scheduled_nodes);
+      *scheduled_nodes = std::move(current_task.scheduled_nodes);
       current_task.has_scheduled_nodes.store(false, std::memory_order_relaxed);
     }
     /* All nodes are pushed as a single task in the pool. This avoids unnecessary threading
@@ -1136,17 +1183,15 @@ class Executor {
         task_pool_.load(),
         [](TaskPool *pool, void *data) {
           Executor &executor = *static_cast<Executor *>(BLI_task_pool_user_data(pool));
-          FunctionNodeVector &nodes = *static_cast<FunctionNodeVector *>(data);
+          ScheduledNodes &scheduled_nodes = *static_cast<ScheduledNodes *>(data);
           CurrentTask new_current_task;
-          new_current_task.scheduled_nodes = std::move(nodes);
+          new_current_task.scheduled_nodes = std::move(scheduled_nodes);
           new_current_task.has_scheduled_nodes.store(true, std::memory_order_relaxed);
           executor.run_task(new_current_task);
         },
-        nodes,
+        scheduled_nodes,
         true,
-        [](TaskPool * /*pool*/, void *data) {
-          MEM_delete(static_cast<FunctionNodeVector *>(data));
-        });
+        [](TaskPool * /*pool*/, void *data) { MEM_delete(static_cast<ScheduledNodes *>(data)); });
   }
 
   LinearAllocator<> &get_main_or_local_allocator()