[Bf-blender-cvs] [67f51bff7cf] temp-angavrilov: Eevee: implement conditional evaluation of Mix node branches.

Sat Feb 4 21:41:01 CET 2023

Commit: 67f51bff7cfcc31c3c974b5a81e5a23d27410841
Author: Alexander Gavrilov
Date:   Sun Oct 9 21:15:48 2022 +0300
Branches: temp-angavrilov
https://developer.blender.org/rB67f51bff7cfcc31c3c974b5a81e5a23d27410841

Eevee: implement conditional evaluation of Mix node branches.

If the material effectively combines multiple distinct materials
using some kind of mask texture, it is wasteful to evaluate all
of them when the mask fully excludes some. Cycles already supports
this optimization for Mix Shader nodes.

This implements a similar feature for Mix Shader and Mix Color Blend
nodes in Eevee: shader matches Cycles, and mixing colors can be used
for a similar purpose in NPR shaders.

To achieve that, a Conditional node type directly supported by code
generation is added. Shader nodes can add these conditionals as needed,
and the code generator partitions the node graph into a branch tree
and appropriately generates conditionals. Empty conditionals are
automatically eliminated to avoid any performance impact. This
processing is done separately for every sub-graph to minimize
dependency cross-contamination.

Differential Revision: https://developer.blender.org/D16218

===================================================================

M	source/blender/gpu/GPU_material.h
M	source/blender/gpu/intern/gpu_codegen.cc
M	source/blender/gpu/intern/gpu_node_graph.cc
M	source/blender/gpu/intern/gpu_node_graph.h
M	source/blender/nodes/shader/nodes/node_shader_mix.cc
M	source/blender/nodes/shader/nodes/node_shader_mix_shader.cc

===================================================================

diff --git a/source/blender/gpu/GPU_material.h b/source/blender/gpu/GPU_material.h
index f9bae39b016..c6e75a78091 100644
--- a/source/blender/gpu/GPU_material.h
+++ b/source/blender/gpu/GPU_material.h
@@ -195,6 +195,51 @@ bool GPU_stack_link(GPUMaterial *mat,
                     GPUNodeStack *out,
                     ...);
 
+/** Comparison operator for conditionals. */
+typedef enum {
+  GPU_CMP_NE = 0,
+  GPU_CMP_LT,
+  GPU_CMP_LE,
+  GPU_CMP_EQ,
+  GPU_CMP_GE,
+  GPU_CMP_GT,
+} GPUComparisonOp;
+
+/**
+ * Create a runtime ternary conditional, choosing between two inputs based on
+ * comparing a scalar float input with a constant threshold.
+ *
+ * \param cmp_input: Input to compare with the threshold.
+ * \param result_type: Type of value to produce.
+ * \param if_true: Input to use when the condition is true.
+ * \param if_false: Input to use when the condition is false. If null, this signifies
+ * the conditional is an optimization hint the input is unused if the condition is false.
+ * Depending on context, a valid default value is used, or the conditional may be discarded
+ * if it produces no performance benefit.
+ */
+GPUNodeLink *GPU_link_conditional(GPUMaterial *mat,
+                                  GPUNodeLink *cmp_input,
+                                  GPUComparisonOp cmp,
+                                  float threshold,
+                                  eGPUType result_type,
+                                  GPUNodeLink *if_true,
+                                  GPUNodeLink *if_false);
+
+/**
+ * Introduces a predicate for evaluating a stack input only when necessary.
+ * The conditional is only added if both inputs are non-constant.
+ *
+ * \param cmp_input: Input to compare with the threshold.
+ * \param inout_if_true: Stack entry to wrap in the conditional, suppressing evaluation when false.
+ * The link field within the entry is updated in place.
+ * \return true if the conditional was actually added.
+ */
+bool GPU_stack_link_conditional(GPUMaterial *mat,
+                                GPUNodeStack *cmp_input,
+                                GPUComparisonOp cmp,
+                                float threshold,
+                                GPUNodeStack *inout_if_true);
+
 void GPU_material_output_surface(GPUMaterial *material, GPUNodeLink *link);
 void GPU_material_output_volume(GPUMaterial *material, GPUNodeLink *link);
 void GPU_material_output_displacement(GPUMaterial *material, GPUNodeLink *link);
diff --git a/source/blender/gpu/intern/gpu_codegen.cc b/source/blender/gpu/intern/gpu_codegen.cc
index 38f80760d61..db63f8f19a5 100644
--- a/source/blender/gpu/intern/gpu_codegen.cc
+++ b/source/blender/gpu/intern/gpu_codegen.cc
@@ -40,6 +40,7 @@
 
 #include <cstdarg>
 #include <cstring>
+#include <map>
 
 #include <sstream>
 #include <string>
@@ -205,9 +206,25 @@ static std::ostream &operator<<(std::ostream &stream, const GPUInput *input)
   }
 }
 
-static std::ostream &operator<<(std::ostream &stream, const GPUOutput *output)
+static std::ostream &operator<<(std::ostream &stream, GPUComparisonOp cmp)
 {
-  return stream << SRC_NAME("out", output, outputs, "tmp") << output->id;
+  switch (cmp) {
+    case GPU_CMP_NE:
+      return stream << "!=";
+    case GPU_CMP_LT:
+      return stream << "<";
+    case GPU_CMP_LE:
+      return stream << "<=";
+    case GPU_CMP_EQ:
+      return stream << "==";
+    case GPU_CMP_GE:
+      return stream << ">=";
+    case GPU_CMP_GT:
+      return stream << ">";
+    default:
+      BLI_assert(0);
+      return stream;
+  }
 }
 
 /* Trick type to change overload and keep a somewhat nice syntax. */
@@ -295,7 +312,6 @@ class GPUCodegen {
  private:
   void set_unique_ids();
 
-  void node_serialize(std::stringstream &eval_ss, const GPUNode *node);
   char *graph_serialize(eGPUNodeTag tree_tag, GPUNodeLink *output_link);
   char *graph_serialize(eGPUNodeTag tree_tag);
 
@@ -461,66 +477,255 @@ void GPUCodegen::generate_library()
   }
 }
 
-void GPUCodegen::node_serialize(std::stringstream &eval_ss, const GPUNode *node)
+class GPUCodegenSerializer {
+  GPUNodeGraph &graph;
+  eGPUNodeTag tree_tag;
+  GPUNodeLink *output_link;
+
+  struct NodeInfo {
+    int branch;
+  };
+
+  std::vector<NodeInfo> node_info;
+
+  struct BranchInfo {
+    int parent;
+    int num_nodes;
+    std::vector<GPUNode *> nodes;
+  };
+
+  std::vector<BranchInfo> branch_info;
+
+  std::map<int, int> output_map;
+
+ private:
+  int find_remapped_output(int id);
+
+  bool remap_input_to_output(const GPUOutput *to, const GPUInput *from);
+  bool remap_output_to_input(const GPUInput *to, const GPUOutput *from);
+
+  bool is_empty_conditional(const GPUNodeConditional *cond);
+  bool is_empty_conditional(const GPUNode *node);
+
+  static eGPUType get_true_input_type(const GPUInput *input);
+  static const char *get_default_value(eGPUType type);
+
+  void node_serialize_input_decl(std::stringstream &eval_ss,
+                                 const std::string &indent,
+                                 const GPUInput *input);
+  void node_serialize_input(std::stringstream &eval_ss, const GPUInput *input);
+
+  void node_serialize_output_decl(std::stringstream &eval_ss,
+                                  const std::string &indent,
+                                  const GPUOutput *output);
+  void node_serialize_output(std::stringstream &eval_ss, const GPUOutput *output);
+
+  void node_serialize_call(std::stringstream &eval_ss,
+                           const std::string &indent,
+                           const GPUNode *node);
+  void node_serialize_cond(std::stringstream &eval_ss,
+                           const std::string &indent,
+                           const GPUNodeConditional *cond);
+
+  void branch_serialize(std::stringstream &eval_ss, const std::string &indent, BranchInfo &branch);
+
+  void build_branches();
+  void assign_node_branch(NodeInfo &info, int branch);
+  void propagate_input_branch(const GPUNode *from, const GPUNodeLink *link, int branch);
+
+ public:
+  GPUCodegenSerializer(GPUNodeGraph &graph,
+                       eGPUNodeTag tree_tag,
+                       GPUNodeLink *output_link = nullptr)
+      : graph(graph),
+        tree_tag(tree_tag),
+        output_link(output_link),
+        node_info(graph.num_nodes, NodeInfo{-1}),
+        branch_info(graph.num_branches, BranchInfo{-1, 0})
+  {
+  }
+
+  void serialize(std::stringstream &eval_ss);
+};
+
+int GPUCodegenSerializer::find_remapped_output(int id)
+{
+  std::map<int, int>::iterator it = output_map.find(id);
+
+  return (it != output_map.end()) ? it->second : id;
+}
+
+bool GPUCodegenSerializer::remap_input_to_output(const GPUOutput *to, const GPUInput *from)
+{
+  /* Alias the backing storage of the input with the output. */
+  if (from->source == GPU_SOURCE_OUTPUT && from->link->output->type == to->type) {
+    BLI_assert(output_map.count(from->link->output->id) == 0);
+
+    output_map[from->link->output->id] = find_remapped_output(to->id);
+
+    /* Pass through blank conditionals. */
+    const GPUNode *node = from->link->output->node;
+
+    if (is_empty_conditional(node)) {
+      GPUInput *in_true = static_cast<GPUInput *>(node->inputs.first)->next;
+      remap_input_to_output(to, in_true);
+    }
+
+    return true;
+  }
+
+  return false;
+}
+
+bool GPUCodegenSerializer::remap_output_to_input(const GPUInput *to, const GPUOutput *from)
+{
+  /* Alias the backing storage of the output with the input. */
+  if (to->source == GPU_SOURCE_OUTPUT && to->link->output->type == from->type) {
+    int new_id = find_remapped_output(to->link->output->id);
+    int cur_id = find_remapped_output(from->id);
+
+    /* Already remapped to the same ID somehow. */
+    if (cur_id == new_id) {
+      return true;
+    }
+
+    /* Only remap if not already remapped. */
+    if (cur_id == from->id) {
+      output_map[from->id] = new_id;
+      return true;
+    }
+  }
+
+  return false;
+}
+
+void GPUCodegenSerializer::node_serialize_input_decl(std::stringstream &eval_ss,
+                                                     const std::string &indent,
+                                                     const GPUInput *input)
+{
+  switch (input->source) {
+    case GPU_SOURCE_FUNCTION_CALL:
+      eval_ss << indent << input->type << " " << input << "; " << input->function_call << input
+              << ");\n";
+      break;
+    case GPU_SOURCE_STRUCT:
+      eval_ss << indent << input->type << " " << input << " = CLOSURE_DEFAULT;\n";
+      break;
+    case GPU_SOURCE_CONSTANT:
+      eval_ss << indent << input->type << " " << input << " = " << (GPUConstant *)input << ";\n";
+      break;
+    default:
+      break;
+  }
+}
+
+eGPUType GPUCodegenSerializer::get_true_input_type(const GPUInput *input)
+{
+  switch (input->source) {
+    case GPU_SOURCE_ATTR:
+      return input->attr->gputype;
+    case GPU_SOURCE_OUTPUT:
+      return input->link->output->type;
+    default:
+      return input->type;
+  }
+}
+
+const char *GPUCodegenSerializer::get_default_value(eGPUType type)
+{
+  switch (type) {
+    case GPU_FLOAT:
+      return "0.0";
+    case GPU_VEC2:
+      return "vec2(0.0)";
+    case GPU_VEC3:
+      return "vec3(0.0)";
+    case GPU_VEC4:
+      return "vec4(0.0)";
+    case GPU_MAT3:
+      return "mat3(1.0)";
+    case GPU_MAT4:
+      return "mat4(1.0)";
+    case GPU_CLOSURE:
+      return "CLOSURE_DEFAULT";
+    default:
+      return nullptr;
+  }
+}
+
+void GPUCodegenSerializer::node_serialize_input(std::stringstream &eval_ss, const GPUInput *input)
+{
+  switch (input->source) {
+    case GPU_SOURCE_OUTPUT:
+    case GPU_SOURCE_ATTR: {
+      /* These inputs can have non matching types. Do conversion. */
+      eGPUType to = input->type;
+      eGPUType from = get_true_input_type(input);
+
+      if (from != to) {
+        /* Use defines declared inside codegen_lib (i.e: vec4_from_float). */
+        eval_ss << to << "_from_" << from << "(";
+      }
+
+      if (input->source == GPU_SOURCE_ATTR) {
+        eval_ss << input;
+      }
+      else {
+        node_serialize_output(eval_ss, input->link->output);
+      }
+
+      if (from != to) {
+        eval_ss << ")";
+      }
+      break;
+    }


@@ Diff output truncated at 10240 characters. @@