[Bf-blender-cvs] [a5beca7ba0d] master: BLI: inline fast path of IndexRange::as_span

Thu Apr 7 19:28:54 CEST 2022

Commit: a5beca7ba0daaca81805dbbf8857378a5bde6414
Author: Jacques Lucke
Date:   Thu Apr 7 19:28:41 2022 +0200
Branches: master
https://developer.blender.org/rBa5beca7ba0daaca81805dbbf8857378a5bde6414

BLI: inline fast path of IndexRange::as_span

This frequently showed up in profiling but shouldn't.

This also updates the code to use atomics for more correctness and
adds multi-threading for better performance.

===================================================================

M	source/blender/blenlib/BLI_index_range.hh
M	source/blender/blenlib/BLI_span.hh
M	source/blender/blenlib/intern/BLI_index_range.cc

===================================================================

diff --git a/source/blender/blenlib/BLI_index_range.hh b/source/blender/blenlib/BLI_index_range.hh
index 85f2c83364b..7d5c2400bba 100644
--- a/source/blender/blenlib/BLI_index_range.hh
+++ b/source/blender/blenlib/BLI_index_range.hh
@@ -39,6 +39,7 @@
  */
 
 #include <algorithm>
+#include <atomic>
 #include <cmath>
 #include <iostream>
 
@@ -288,6 +289,12 @@ class IndexRange {
     stream << "[" << range.start() << ", " << range.one_after_last() << ")";
     return stream;
   }
+
+ private:
+  static std::atomic<int64_t> s_current_array_size;
+  static std::atomic<int64_t *> s_current_array;
+
+  Span<int64_t> as_span_internal() const;
 };
 
 }  // namespace blender
diff --git a/source/blender/blenlib/BLI_span.hh b/source/blender/blenlib/BLI_span.hh
index 9ab096094de..0f3fcea1270 100644
--- a/source/blender/blenlib/BLI_span.hh
+++ b/source/blender/blenlib/BLI_span.hh
@@ -722,4 +722,16 @@ template<typename T> class MutableSpan {
   }
 };
 
+/** This is defined here, because in `BLI_index_range.hh` `Span` is not yet defined. */
+inline Span<int64_t> IndexRange::as_span() const
+{
+  const int64_t min_required_size = start_ + size_;
+  const int64_t current_array_size = s_current_array_size.load(std::memory_order_acquire);
+  const int64_t *current_array = s_current_array.load(std::memory_order_acquire);
+  if (min_required_size <= current_array_size) {
+    return Span<int64_t>(current_array + start_, size_);
+  }
+  return this->as_span_internal();
+}
+
 } /* namespace blender */
diff --git a/source/blender/blenlib/intern/BLI_index_range.cc b/source/blender/blenlib/intern/BLI_index_range.cc
index 346f55d9405..398228ab461 100644
--- a/source/blender/blenlib/intern/BLI_index_range.cc
+++ b/source/blender/blenlib/intern/BLI_index_range.cc
@@ -1,46 +1,47 @@
 /* SPDX-License-Identifier: GPL-2.0-or-later */
 
-#include <atomic>
 #include <mutex>
 
 #include "BLI_array.hh"
 #include "BLI_index_range.hh"
 #include "BLI_span.hh"
+#include "BLI_task.hh"
 #include "BLI_vector.hh"
 
 namespace blender {
 
 static RawVector<RawArray<int64_t, 0>> arrays;
-static int64_t current_array_size = 0;
-static int64_t *current_array = nullptr;
 static std::mutex current_array_mutex;
+std::atomic<int64_t> IndexRange::s_current_array_size = 0;
+std::atomic<int64_t *> IndexRange::s_current_array = nullptr;
 
-Span<int64_t> IndexRange::as_span() const
+Span<int64_t> IndexRange::as_span_internal() const
 {
   int64_t min_required_size = start_ + size_;
 
-  if (min_required_size <= current_array_size) {
-    return Span<int64_t>(current_array + start_, size_);
-  }
-
   std::lock_guard<std::mutex> lock(current_array_mutex);
 
-  if (min_required_size <= current_array_size) {
-    return Span<int64_t>(current_array + start_, size_);
+  /* Double checked lock. */
+  if (min_required_size <= s_current_array_size) {
+    return Span<int64_t>(s_current_array + start_, size_);
   }
 
-  int64_t new_size = std::max<int64_t>(1000, power_of_2_max_u(min_required_size));
-  RawArray<int64_t, 0> new_array(new_size);
-  for (int64_t i = 0; i < new_size; i++) {
-    new_array[i] = i;
-  }
-  arrays.append(std::move(new_array));
-
-  current_array = arrays.last().data();
-  std::atomic_thread_fence(std::memory_order_seq_cst);
-  current_array_size = new_size;
-
-  return Span<int64_t>(current_array + start_, size_);
+  /* Isolate, because a mutex is locked. */
+  threading::isolate_task([&]() {
+    int64_t new_size = std::max<int64_t>(1000, power_of_2_max_u(min_required_size));
+    RawArray<int64_t, 0> new_array(new_size);
+    threading::parallel_for(IndexRange(new_size), 4096, [&](const IndexRange range) {
+      for (const int64_t i : range) {
+        new_array[i] = i;
+      }
+    });
+    arrays.append(std::move(new_array));
+
+    s_current_array.store(arrays.last().data(), std::memory_order_release);
+    s_current_array_size.store(new_size, std::memory_order_release);
+  });
+
+  return Span<int64_t>(s_current_array + start_, size_);
 }
 
 }  // namespace blender