[Bf-blender-cvs] [2920a569b52] temp-parallel-multi-function: progress
Jacques Lucke
noreply at git.blender.org
Fri Sep 10 11:02:28 CEST 2021
Commit: 2920a569b527c3543dd393a96bca2362ee04feef
Author: Jacques Lucke
Date: Thu Sep 9 11:19:09 2021 +0200
Branches: temp-parallel-multi-function
https://developer.blender.org/rB2920a569b527c3543dd393a96bca2362ee04feef
progress
===================================================================
M source/blender/blenlib/BLI_virtual_array.hh
M source/blender/functions/CMakeLists.txt
M source/blender/functions/FN_multi_function_parallel.hh
M source/blender/functions/intern/field.cc
A source/blender/functions/intern/multi_function_parallel.cc
M source/blender/functions/tests/FN_multi_function_test.cc
===================================================================
diff --git a/source/blender/blenlib/BLI_virtual_array.hh b/source/blender/blenlib/BLI_virtual_array.hh
index 1c02bce8411..e99036d06a9 100644
--- a/source/blender/blenlib/BLI_virtual_array.hh
+++ b/source/blender/blenlib/BLI_virtual_array.hh
@@ -622,41 +622,50 @@ inline void devirtualize_varray2(const VArray<T1> &varray1,
const Func &func,
bool enable = true)
{
- /* Support disabling the devirtualization to simplify benchmarking. */
- if (enable) {
- const bool is_span1 = varray1.is_span();
- const bool is_span2 = varray2.is_span();
- const bool is_single1 = varray1.is_single();
- const bool is_single2 = varray2.is_single();
- if (is_span1 && is_span2) {
- const VArray_For_Span<T1> varray1_span{varray1.get_internal_span()};
- const VArray_For_Span<T2> varray2_span{varray2.get_internal_span()};
- func(varray1_span, varray2_span);
- return;
- }
- if (is_span1 && is_single2) {
- const VArray_For_Span<T1> varray1_span{varray1.get_internal_span()};
- const VArray_For_Single<T2> varray2_single{varray2.get_internal_single(), varray2.size()};
- func(varray1_span, varray2_single);
- return;
- }
- if (is_single1 && is_span2) {
- const VArray_For_Single<T1> varray1_single{varray1.get_internal_single(), varray1.size()};
- const VArray_For_Span<T2> varray2_span{varray2.get_internal_span()};
- func(varray1_single, varray2_span);
- return;
- }
- if (is_single1 && is_single2) {
- const VArray_For_Single<T1> varray1_single{varray1.get_internal_single(), varray1.size()};
- const VArray_For_Single<T2> varray2_single{varray2.get_internal_single(), varray2.size()};
- func(varray1_single, varray2_single);
- return;
- }
- }
- /* This fallback is used even when one of the inputs could be optimized. It's probably not worth
- * it to optimize just one of the inputs, because then the compiler still has to call into
- * unknown code, which inhibits many compiler optimizations. */
- func(varray1, varray2);
+ devirtualize_varray(
+ varray1,
+ [&](const auto &varray1) {
+ devirtualize_varray(
+ varray2, [&](const auto &varray2) { func(varray1, varray2); }, enable);
+ },
+ enable);
+
+ // /* Support disabling the devirtualization to simplify benchmarking. */
+ // if (enable) {
+ // const bool is_span1 = varray1.is_span();
+ // const bool is_span2 = varray2.is_span();
+ // const bool is_single1 = varray1.is_single();
+ // const bool is_single2 = varray2.is_single();
+ // if (is_span1 && is_span2) {
+ // const VArray_For_Span<T1> varray1_span{varray1.get_internal_span()};
+ // const VArray_For_Span<T2> varray2_span{varray2.get_internal_span()};
+ // func(varray1_span, varray2_span);
+ // return;
+ // }
+ // if (is_span1 && is_single2) {
+ // const VArray_For_Span<T1> varray1_span{varray1.get_internal_span()};
+ // const VArray_For_Single<T2> varray2_single{varray2.get_internal_single(), varray2.size()};
+ // func(varray1_span, varray2_single);
+ // return;
+ // }
+ // if (is_single1 && is_span2) {
+ // const VArray_For_Single<T1> varray1_single{varray1.get_internal_single(), varray1.size()};
+ // const VArray_For_Span<T2> varray2_span{varray2.get_internal_span()};
+ // func(varray1_single, varray2_span);
+ // return;
+ // }
+ // if (is_single1 && is_single2) {
+ // const VArray_For_Single<T1> varray1_single{varray1.get_internal_single(), varray1.size()};
+ // const VArray_For_Single<T2> varray2_single{varray2.get_internal_single(), varray2.size()};
+ // func(varray1_single, varray2_single);
+ // return;
+ // }
+ // }
+ // /* This fallback is used even when one of the inputs could be optimized. It's probably not
+ // worth
+ // * it to optimize just one of the inputs, because then the compiler still has to call into
+ // * unknown code, which inhibits many compiler optimizations. */
+ // func(varray1, varray2);
}
} // namespace blender
diff --git a/source/blender/functions/CMakeLists.txt b/source/blender/functions/CMakeLists.txt
index 3c27e9d5e19..856668f01d7 100644
--- a/source/blender/functions/CMakeLists.txt
+++ b/source/blender/functions/CMakeLists.txt
@@ -34,6 +34,7 @@ set(SRC
intern/generic_virtual_vector_array.cc
intern/multi_function.cc
intern/multi_function_builder.cc
+ intern/multi_function_parallel.cc
intern/multi_function_procedure.cc
intern/multi_function_procedure_builder.cc
intern/multi_function_procedure_executor.cc
@@ -54,6 +55,7 @@ set(SRC
FN_multi_function_data_type.hh
FN_multi_function_param_type.hh
FN_multi_function_params.hh
+ FN_multi_function_parallel.hh
FN_multi_function_procedure.hh
FN_multi_function_procedure_builder.hh
FN_multi_function_procedure_executor.hh
@@ -64,6 +66,22 @@ set(LIB
bf_blenlib
)
+if(WITH_TBB)
+ add_definitions(-DWITH_TBB)
+ if(WIN32)
+ # TBB includes Windows.h which will define min/max macros
+ # that will collide with the stl versions.
+ add_definitions(-DNOMINMAX)
+ endif()
+ list(APPEND INC_SYS
+ ${TBB_INCLUDE_DIRS}
+ )
+
+ list(APPEND LIB
+ ${TBB_LIBRARIES}
+ )
+endif()
+
blender_add_lib(bf_functions "${SRC}" "${INC}" "${INC_SYS}" "${LIB}")
if(WITH_GTESTS)
diff --git a/source/blender/functions/FN_multi_function_parallel.hh b/source/blender/functions/FN_multi_function_parallel.hh
index b5b3e2f2f94..84c57efd434 100644
--- a/source/blender/functions/FN_multi_function_parallel.hh
+++ b/source/blender/functions/FN_multi_function_parallel.hh
@@ -20,5 +20,20 @@
* \ingroup fn
*/
+#include "FN_multi_function.hh"
+
namespace blender::fn {
-}
+
+class ParallelMultiFunction : public MultiFunction {
+ private:
+ const MultiFunction &fn_;
+ const int64_t grain_size_;
+ bool threading_supported_;
+
+ public:
+ ParallelMultiFunction(const MultiFunction &fn, const int64_t grain_size);
+
+ void call(IndexMask mask, MFParams params, MFContext context) const override;
+};
+
+} // namespace blender::fn
diff --git a/source/blender/functions/intern/field.cc b/source/blender/functions/intern/field.cc
index a27c5e4e3dc..7b35593ad75 100644
--- a/source/blender/functions/intern/field.cc
+++ b/source/blender/functions/intern/field.cc
@@ -18,9 +18,11 @@
#include "BLI_multi_value_map.hh"
#include "BLI_set.hh"
#include "BLI_stack.hh"
+#include "BLI_timeit.hh"
#include "BLI_vector_set.hh"
#include "FN_field.hh"
+#include "FN_multi_function_parallel.hh"
namespace blender::fn {
@@ -271,6 +273,8 @@ Vector<const GVArray *> evaluate_fields(ResourceScope &scope,
const FieldContext &context,
Span<GVMutableArray *> dst_hints)
{
+ SCOPED_TIMER(__func__);
+
Vector<const GVArray *> r_varrays(fields_to_evaluate.size(), nullptr);
/* Destination hints are optional. Create a small utility method to access them. */
@@ -334,7 +338,10 @@ Vector<const GVArray *> evaluate_fields(ResourceScope &scope,
build_multi_function_procedure_for_fields(
procedure, scope, field_tree_info, varying_fields_to_evaluate);
MFProcedureExecutor procedure_executor{"Procedure", procedure};
- MFParamsBuilder mf_params{procedure_executor, array_size};
+ fn::ParallelMultiFunction parallel_fn{procedure_executor, 20000};
+ const MultiFunction &fn_to_execute = procedure_executor;
+
+ MFParamsBuilder mf_params{fn_to_execute, array_size};
MFContextBuilder mf_context;
/* Provide inputs to the procedure executor. */
@@ -376,7 +383,7 @@ Vector<const GVArray *> evaluate_fields(ResourceScope &scope,
mf_params.add_uninitialized_single_output(span);
}
- procedure_executor.call(mask, mf_params, mf_context);
+ fn_to_execute.call(mask, mf_params, mf_context);
}
/* Evaluate constant fields if necessary. */
diff --git a/source/blender/functions/intern/multi_function_parallel.cc b/source/blender/functions/intern/multi_function_parallel.cc
new file mode 100644
index 00000000000..6843c4a233b
--- /dev/null
+++ b/source/blender/functions/intern/multi_function_parallel.cc
@@ -0,0 +1,109 @@
+/*
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ */
+
+#include "FN_multi_function_parallel.hh"
+
+#include "BLI_task.hh"
+
+#include <mutex>
+
+namespace blender::fn {
+
+ParallelMultiFunction::ParallelMultiFunction(const MultiFunction &fn, const int64_t grain_size)
+ : fn_(fn), grain_size_(grain_size)
+{
+ this->set_signature(&fn.signature());
+
+ threading_supported_ = true;
+ for (const int param_index : fn.param_indices()) {
+ const MFParamType param_type = fn.param_type(param_index);
+ if (param_type.data_type().category() == MFDataType::Vector) {
+ threading_supported_ = false;
+ break;
+ }
+ }
+}
+
+void ParallelMultiFunction::call(IndexMask mask, MFParams params, MFContext context) const
+{
+ if (mask.size() <= grain_size_ || !threading_supported_) {
+ fn_.call(mask, params, context);
+ return;
+ }
+
+ threading::parallel_for(mask.index_range(), grain_size_, [&](const IndexRange range) {
+ const int size = range.size();
+ IndexMask original_sub_mask{mask.indices().slice(range)};
+ const int64_t offset = original_sub_mask.indices().first();
+ const int64_t slice_size = original_sub_mask.indices().last() - offset + 1;
+ const IndexRange slice_range{offset, slice_size};
+ IndexMask sub_mask;
+ Vec
@@ Diff output truncated at 10240 characters. @@
More information about the Bf-blender-cvs
mailing list