[Bf-blender-cvs] [ee3f71d747e] master: Functions: allow for better compiler optimization

Jacques Lucke noreply at git.blender.org
Mon Apr 4 12:00:57 CEST 2022


Commit: ee3f71d747e3ffd5091335437d52b3ec518d7b67
Author: Jacques Lucke
Date:   Mon Apr 4 11:57:39 2022 +0200
Branches: master
https://developer.blender.org/rBee3f71d747e3ffd5091335437d52b3ec518d7b67

Functions: allow for better compiler optimization

This extracts the inner loops into a separate function.
There are two main reasons for this:
* Allows using `__restrict` to indicate that no other parameter
  aliases with the output array. This allows for better optimization.
* Makes it easier to search for the generated assembly code,
  especially with the `BLI_NOINLINE`.

===================================================================

M	source/blender/functions/FN_multi_function_builder.hh

===================================================================

diff --git a/source/blender/functions/FN_multi_function_builder.hh b/source/blender/functions/FN_multi_function_builder.hh
index 2eaada5dea0..dfdd152e62a 100644
--- a/source/blender/functions/FN_multi_function_builder.hh
+++ b/source/blender/functions/FN_multi_function_builder.hh
@@ -49,15 +49,23 @@ template<typename In1, typename Out1> class CustomMF_SI_SO : public MultiFunctio
     return [=](IndexMask mask, const VArray<In1> &in1, MutableSpan<Out1> out1) {
       /* Devirtualization results in a 2-3x speedup for some simple functions. */
       devirtualize_varray(in1, [&](const auto &in1) {
-        mask.to_best_mask_type([&](const auto &mask) {
-          for (const int64_t i : mask) {
-            new (static_cast<void *>(&out1[i])) Out1(element_fn(in1[i]));
-          }
-        });
+        mask.to_best_mask_type(
+            [&](const auto &mask) { execute_SI_SO(element_fn, mask, in1, out1.data()); });
       });
     };
   }
 
+  template<typename ElementFuncT, typename MaskT, typename In1Array>
+  BLI_NOINLINE static void execute_SI_SO(const ElementFuncT &element_fn,
+                                         MaskT mask,
+                                         const In1Array &in1,
+                                         Out1 *__restrict r_out)
+  {
+    for (const int64_t i : mask) {
+      new (r_out + i) Out1(element_fn(in1[i]));
+    }
+  }
+
   void call(IndexMask mask, MFParams params, MFContext UNUSED(context)) const override
   {
     const VArray<In1> &in1 = params.readonly_single_input<In1>(0);
@@ -105,15 +113,24 @@ class CustomMF_SI_SI_SO : public MultiFunction {
                MutableSpan<Out1> out1) {
       /* Devirtualization results in a 2-3x speedup for some simple functions. */
       devirtualize_varray2(in1, in2, [&](const auto &in1, const auto &in2) {
-        mask.to_best_mask_type([&](const auto &mask) {
-          for (const int64_t i : mask) {
-            new (static_cast<void *>(&out1[i])) Out1(element_fn(in1[i], in2[i]));
-          }
-        });
+        mask.to_best_mask_type(
+            [&](const auto &mask) { execute_SI_SI_SO(element_fn, mask, in1, in2, out1.data()); });
       });
     };
   }
 
+  template<typename ElementFuncT, typename MaskT, typename In1Array, typename In2Array>
+  BLI_NOINLINE static void execute_SI_SI_SO(const ElementFuncT &element_fn,
+                                            MaskT mask,
+                                            const In1Array &in1,
+                                            const In2Array &in2,
+                                            Out1 *__restrict r_out)
+  {
+    for (const int64_t i : mask) {
+      new (r_out + i) Out1(element_fn(in1[i], in2[i]));
+    }
+  }
+
   void call(IndexMask mask, MFParams params, MFContext UNUSED(context)) const override
   {
     const VArray<In1> &in1 = params.readonly_single_input<In1>(0);



More information about the Bf-blender-cvs mailing list