[Bf-blender-cvs] [39615cd3b77] master: BLI: add atomic disjoint set data structure
Jacques Lucke
noreply at git.blender.org
Fri Dec 2 10:39:44 CET 2022
Commit: 39615cd3b7792a2fec82510c988a9f619a98f75f
Author: Jacques Lucke
Date: Fri Dec 2 10:39:11 2022 +0100
Branches: master
https://developer.blender.org/rB39615cd3b7792a2fec82510c988a9f619a98f75f
BLI: add atomic disjoint set data structure
The existing `DisjointSet` data structure only supports single
threaded access, which limits performance severely in some cases.
This patch implements `AtomicDisjointSet` based on
"Wait-free Parallel Algorithms for the Union-Find Problem"
by Richard J. Anderson and Heather Woll.
The Mesh Island node also got updated to make use of the new data
structure. In my tests it got 2-5 times faster. More details are in 16653.
Differential Revision: https://developer.blender.org/D16653
===================================================================
A source/blender/blenlib/BLI_atomic_disjoint_set.hh
M source/blender/blenlib/CMakeLists.txt
A source/blender/blenlib/intern/atomic_disjoint_set.cc
M source/blender/nodes/geometry/nodes/node_geo_input_mesh_island.cc
===================================================================
diff --git a/source/blender/blenlib/BLI_atomic_disjoint_set.hh b/source/blender/blenlib/BLI_atomic_disjoint_set.hh
new file mode 100644
index 00000000000..d0826ca1e0b
--- /dev/null
+++ b/source/blender/blenlib/BLI_atomic_disjoint_set.hh
@@ -0,0 +1,146 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+
+#pragma once
+
+#include <atomic>
+
+#include "BLI_array.hh"
+
+namespace blender {
+
+/**
+ * Same as `DisjointSet` but is thread safe (at slightly higher cost for the single threaded case).
+ *
+ * The implementation is based on the following paper:
+ * "Wait-free Parallel Algorithms for the Union-Find Problem"
+ * by Richard J. Anderson and Heather Woll.
+ *
+ * It's also inspired by this implementation: https://github.com/wjakob/dset.
+ */
+class AtomicDisjointSet {
+ private:
+ /* Can generally used relaxed memory order with this algorithm. */
+ static constexpr auto relaxed = std::memory_order_relaxed;
+
+ struct Item {
+ int parent;
+ int rank;
+ };
+
+ /**
+ * An #Item per element. It's important that the entire item is in a single atomic, so that it
+ * can be updated atomically. */
+ mutable Array<std::atomic<Item>> items_;
+
+ public:
+ /**
+ * Create a new disjoing set with the given set. Initially, every element is in a separate set.
+ */
+ AtomicDisjointSet(const int size);
+
+ /**
+ * Join the sets containing elements x and y. Nothing happens when they were in the same set
+ * before.
+ */
+ void join(int x, int y)
+ {
+ while (true) {
+ x = this->find_root(x);
+ y = this->find_root(y);
+
+ if (x == y) {
+ /* They are in the same set already. */
+ return;
+ }
+
+ Item x_item = items_[x].load(relaxed);
+ Item y_item = items_[y].load(relaxed);
+
+ if (
+ /* Implement union by rank heuristic. */
+ x_item.rank > y_item.rank
+ /* If the rank is the same, make a consistent decision. */
+ || (x_item.rank == y_item.rank && x < y)) {
+ std::swap(x_item, y_item);
+ std::swap(x, y);
+ }
+
+ /* Update parent of item x. */
+ const Item x_item_new{y, x_item.rank};
+ if (!items_[x].compare_exchange_strong(x_item, x_item_new, relaxed)) {
+ /* Another thread has updated item x, start again. */
+ continue;
+ }
+
+ if (x_item.rank == y_item.rank) {
+ /* Increase rank of item y. This may fail when another thread has updated item y in the
+ * meantime. That may lead to worse behavior with the union by rank heurist, but seems to
+ * be ok in practice. */
+ const Item y_item_new{y, y_item.rank + 1};
+ items_[y].compare_exchange_weak(y_item, y_item_new, relaxed);
+ }
+ }
+ }
+
+ /**
+ * Return true when x and y are in the same set.
+ */
+ bool in_same_set(int x, int y) const
+ {
+ while (true) {
+ x = this->find_root(x);
+ y = this->find_root(y);
+ if (x == y) {
+ return true;
+ }
+ if (items_[x].load(relaxed).parent == x) {
+ return false;
+ }
+ }
+ }
+
+ /**
+ * Find the element that represents the set containing x currently.
+ */
+ int find_root(int x) const
+ {
+ while (true) {
+ const Item item = items_[x].load(relaxed);
+ if (x == item.parent) {
+ return x;
+ }
+ const int new_parent = items_[item.parent].load(relaxed).parent;
+ if (item.parent != new_parent) {
+ /* This halves the path for faster future lookups. That fail but that does not change
+ * correctness. */
+ Item expected = item;
+ const Item desired{new_parent, item.rank};
+ items_[x].compare_exchange_weak(expected, desired, relaxed);
+ }
+ x = new_parent;
+ }
+ }
+
+ /**
+ * True when x represents a set.
+ */
+ bool is_root(const int x) const
+ {
+ const Item item = items_[x].load(relaxed);
+ return item.parent == x;
+ }
+
+ /**
+ * Get an identifier for each id. This is deterministic and does not depend on the order of
+ * joins. The ids are ordered by their first occurence. Consequently, `result[0]` is always zero
+ * (unless there are no elements).
+ */
+ void calc_reduced_ids(MutableSpan<int> result) const;
+
+ /**
+ * Count the number of disjoint sets.
+ */
+ int count_sets() const;
+};
+
+} // namespace blender
diff --git a/source/blender/blenlib/CMakeLists.txt b/source/blender/blenlib/CMakeLists.txt
index 185ca517945..ac501094ec4 100644
--- a/source/blender/blenlib/CMakeLists.txt
+++ b/source/blender/blenlib/CMakeLists.txt
@@ -50,6 +50,7 @@ set(SRC
intern/array_utils.c
intern/array_utils.cc
intern/astar.c
+ intern/atomic_disjoint_set.cc
intern/bitmap.c
intern/bitmap_draw_2d.c
intern/boxpack_2d.c
@@ -172,6 +173,7 @@ set(SRC
BLI_asan.h
BLI_assert.h
BLI_astar.h
+ BLI_atomic_disjoint_set.hh
BLI_bit_vector.hh
BLI_bitmap.h
BLI_bitmap_draw_2d.h
diff --git a/source/blender/blenlib/intern/atomic_disjoint_set.cc b/source/blender/blenlib/intern/atomic_disjoint_set.cc
new file mode 100644
index 00000000000..4ca4203031a
--- /dev/null
+++ b/source/blender/blenlib/intern/atomic_disjoint_set.cc
@@ -0,0 +1,108 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+
+#include "BLI_atomic_disjoint_set.hh"
+#include "BLI_enumerable_thread_specific.hh"
+#include "BLI_map.hh"
+#include "BLI_sort.hh"
+#include "BLI_task.hh"
+
+namespace blender {
+
+AtomicDisjointSet::AtomicDisjointSet(const int size) : items_(size)
+{
+ threading::parallel_for(IndexRange(size), 4096, [&](const IndexRange range) {
+ for (const int i : range) {
+ items_[i].store(Item{i, 0}, relaxed);
+ }
+ });
+}
+
+static void update_first_occurence(Map<int, int> &map, const int root, const int index)
+{
+ map.add_or_modify(
+ root,
+ [&](int *first_occurence) { *first_occurence = index; },
+ [&](int *first_occurence) {
+ if (index < *first_occurence) {
+ *first_occurence = index;
+ }
+ });
+}
+
+void AtomicDisjointSet::calc_reduced_ids(MutableSpan<int> result) const
+{
+ BLI_assert(result.size() == items_.size());
+
+ const int size = result.size();
+
+ /* Find the root for element. With multi-threading, this root is not deterministic. So
+ * some postprocessing has to be done to make it deterministic. */
+ threading::EnumerableThreadSpecific<Map<int, int>> first_occurence_by_root_per_thread;
+ threading::parallel_for(IndexRange(size), 1024, [&](const IndexRange range) {
+ Map<int, int> &first_occurence_by_root = first_occurence_by_root_per_thread.local();
+ for (const int i : range) {
+ const int root = this->find_root(i);
+ result[i] = root;
+ update_first_occurence(first_occurence_by_root, root, i);
+ }
+ });
+
+ /* Build a map that contains the first element index that has a certain root. */
+ Map<int, int> &combined_map = first_occurence_by_root_per_thread.local();
+ for (const Map<int, int> &other_map : first_occurence_by_root_per_thread) {
+ if (&combined_map == &other_map) {
+ continue;
+ }
+ for (const auto item : other_map.items()) {
+ update_first_occurence(combined_map, item.key, item.value);
+ }
+ }
+
+ struct RootOccurence {
+ int root;
+ int first_occurence;
+ };
+
+ /* Sort roots by first occurence. This removes the non-determinism above. */
+ Vector<RootOccurence, 16> root_occurences;
+ root_occurences.reserve(combined_map.size());
+ for (const auto item : combined_map.items()) {
+ root_occurences.append({item.key, item.value});
+ }
+ parallel_sort(root_occurences.begin(),
+ root_occurences.end(),
+ [](const RootOccurence &a, const RootOccurence &b) {
+ return a.first_occurence < b.first_occurence;
+ });
+
+ /* Remap original root values with deterministic values. */
+ Map<int, int> id_by_root;
+ id_by_root.reserve(root_occurences.size());
+ for (const int i : root_occurences.index_range()) {
+ id_by_root.add_new(root_occurences[i].root, i);
+ }
+ threading::parallel_for(IndexRange(size), 1024, [&](const IndexRange range) {
+ for (const int i : range) {
+ result[i] = id_by_root.lookup(result[i]);
+ }
+ });
+}
+
+int AtomicDisjointSet::count_sets() const
+{
+ return threading::parallel_reduce<int>(
+ items_.index_range(),
+ 1024,
+ 0,
+ [&](const IndexRange range, int count) {
+ for (const int i : range) {
+ if (this->is_root(i)) {
+ count++;
+ }
+ }
+ return count;
+ },
+ [](const int a, const int b) { return a + b; });
+}
+
+} // namespace blender
diff --git a/source/blender/nodes/geometry/nodes/node_geo_input_mesh_island.cc b/source/blender/nodes/geometry/nodes/node_geo_input_mesh_island.cc
index d658b14092a..d2a66986abd 100644
--- a/source/blender/nodes/geometry/nodes/node_geo_input_mesh_island.cc
+++ b/source/blender/nodes/geometry/nodes/node_geo_input_mesh_island.cc
@@ -5,7 +5,8 @@
#include "BKE_mesh.h"
-#include "BLI_disjoint_set.hh"
+#include "BLI_atomic_disjoint_set.hh"
+#include "BLI_task.hh"
#include "node_geometry_util.hh"
@@ -35,17 +36,15 @@ class IslandFieldInput final : public bke::MeshFieldInput {
{
const Span<MEdge> edges = mesh.edges();
- DisjointSet<int> islands(mesh.totvert);
- for (const int i : edges.index_range()) {
- islands.join(edges[i].v1, edges[i].v2);
- }
+ AtomicDisjointSet islands(mesh.totvert);
+ threading::parallel_for(edges.index_range(), 1024, [&](const IndexRange range) {
+ for (const MEdge &edge : edges.slice(range)) {
+ islands.join(edge.v1, edge.v2);
+ }
+ });
Array<int> output(mesh.totvert);
- VectorSet<int> ordered_roots;
- for (const int i : IndexRange(mesh.totvert)) {
- const int root = islands.find_root(i);
- output[i] = ordered_roots.index_of_or_add(root);
- }
+ islands.calc_reduced_ids(output);
return mesh.attributes().adapt_domain<int>(
VArray<int>::ForContainer(std::move(output)), ATTR_DOMAIN_POINT, domain);
@@ -81,18 +80,15 @@ class IslandCountFieldInput final : public bke::MeshFieldInput {
{
const Span<MEdge> edges = mesh.edges();
@@ Diff output truncated at 10240 characters. @@
More information about the Bf-blender-cvs
mailing list