[Bf-blender-cvs] [b8bd304bd45] master: Geometry Nodes: speedup Separate XYZ node
Jacques Lucke
noreply at git.blender.org
Sat Jun 18 13:42:41 CEST 2022
Commit: b8bd304bd45397b8c5a5ce850c4ceb2fdefe4961
Author: Jacques Lucke
Date: Sat Jun 18 13:32:46 2022 +0200
Branches: master
https://developer.blender.org/rBb8bd304bd45397b8c5a5ce850c4ceb2fdefe4961
Geometry Nodes: speedup Separate XYZ node
This speeds up the node ~20% in common cases, e.g. when only the
X axis is used. The main optimization comes from not writing to memory
that's not used afterwards anymore anyway.
The "optimal code" for just extracting the x axis in a separate loop was
not faster for me. That indicates that the node is bottlenecked by
memory bandwidth, which seems reasonable.
===================================================================
M source/blender/nodes/shader/nodes/node_shader_sepcomb_xyz.cc
===================================================================
diff --git a/source/blender/nodes/shader/nodes/node_shader_sepcomb_xyz.cc b/source/blender/nodes/shader/nodes/node_shader_sepcomb_xyz.cc
index 94a6febe92e..d4413036555 100644
--- a/source/blender/nodes/shader/nodes/node_shader_sepcomb_xyz.cc
+++ b/source/blender/nodes/shader/nodes/node_shader_sepcomb_xyz.cc
@@ -48,16 +48,36 @@ class MF_SeparateXYZ : public fn::MultiFunction {
void call(IndexMask mask, fn::MFParams params, fn::MFContext UNUSED(context)) const override
{
const VArray<float3> &vectors = params.readonly_single_input<float3>(0, "XYZ");
- MutableSpan<float> xs = params.uninitialized_single_output<float>(1, "X");
- MutableSpan<float> ys = params.uninitialized_single_output<float>(2, "Y");
- MutableSpan<float> zs = params.uninitialized_single_output<float>(3, "Z");
-
- for (int64_t i : mask) {
- float3 xyz = vectors[i];
- xs[i] = xyz.x;
- ys[i] = xyz.y;
- zs[i] = xyz.z;
+ MutableSpan<float> xs = params.uninitialized_single_output_if_required<float>(1, "X");
+ MutableSpan<float> ys = params.uninitialized_single_output_if_required<float>(2, "Y");
+ MutableSpan<float> zs = params.uninitialized_single_output_if_required<float>(3, "Z");
+
+ std::array<MutableSpan<float>, 3> outputs = {xs, ys, zs};
+ Vector<int> used_outputs;
+ if (!xs.is_empty()) {
+ used_outputs.append(0);
}
+ if (!ys.is_empty()) {
+ used_outputs.append(1);
+ }
+ if (!zs.is_empty()) {
+ used_outputs.append(2);
+ }
+
+ devirtualize_varray(vectors, [&](auto vectors) {
+ mask.to_best_mask_type([&](auto mask) {
+ const int used_outputs_num = used_outputs.size();
+ const int *used_outputs_data = used_outputs.data();
+
+ for (const int64_t i : mask) {
+ const float3 &vector = vectors[i];
+ for (const int out_i : IndexRange(used_outputs_num)) {
+ const int coordinate = used_outputs_data[out_i];
+ outputs[coordinate][i] = vector[coordinate];
+ }
+ }
+ });
+ });
}
};
More information about the Bf-blender-cvs
mailing list