[Bf-blender-cvs] [1c44d08a69e] master: BLI: new C++ hash table data structures

Jacques Lucke noreply at git.blender.org
Fri Sep 13 10:26:41 CEST 2019


Commit: 1c44d08a69eb3e66c7f942d748f549d6b8ca138f
Author: Jacques Lucke
Date:   Fri Sep 13 10:06:02 2019 +0200
Branches: master
https://developer.blender.org/rB1c44d08a69eb3e66c7f942d748f549d6b8ca138f

BLI: new C++ hash table data structures

This commit adds some new hashing based data structures to blenlib.
All of them use open addressing with probing currently.
Furthermore, they support small object optimization, but it is not
customizable yet. I'll add support for this when necessary.
The following main data structures are included:

**Set**
A collection of values, where every value must exist at most once.
This is similar to a Python `set`.

**SetVector**
A combination of a Set and a Vector. It supports fast search for
elements and maintains insertion order when there are no deletes.
All elements are stored in a continuous array. So they can be
iterated over using a normal `ArrayRef`.

**Map**
A set of key-value-pairs, where every key must exist at most once.
This is similar to a Python `dict`.

**StringMap**
A special map for the case when the keys are strings. This case is
fairly common and allows for some optimizations. Most importantly,
many unnecessary allocations can be avoided by storing strings in
a single buffer. Furthermore, the interface of this class uses
`StringRef` to avoid unnecessary conversions.

This commit is a continuation of rB369d5e8ad2bb7.

===================================================================

M	.clang-format
A	source/blender/blenlib/BLI_hash_cxx.h
A	source/blender/blenlib/BLI_map.h
M	source/blender/blenlib/BLI_math_base.h
A	source/blender/blenlib/BLI_open_addressing.h
A	source/blender/blenlib/BLI_set.h
A	source/blender/blenlib/BLI_set_vector.h
A	source/blender/blenlib/BLI_string_map.h
M	source/blender/blenlib/CMakeLists.txt
M	source/blender/blenlib/intern/math_base_inline.c
A	tests/gtests/blenlib/BLI_map_test.cc
M	tests/gtests/blenlib/BLI_math_base_test.cc
A	tests/gtests/blenlib/BLI_set_test.cc
A	tests/gtests/blenlib/BLI_set_vector_test.cc
A	tests/gtests/blenlib/BLI_string_map_test.cc
M	tests/gtests/blenlib/CMakeLists.txt

===================================================================

diff --git a/.clang-format b/.clang-format
index b81403c46ce..c4561ce960f 100644
--- a/.clang-format
+++ b/.clang-format
@@ -221,6 +221,7 @@ ForEachMacros:
   - ITER_BEGIN
   - ITER_PIXELS
   - ITER_SLOTS
+  - ITER_SLOTS_BEGIN
   - LISTBASE_CIRCULAR_BACKWARD_BEGIN
   - LISTBASE_CIRCULAR_FORWARD_BEGIN
   - LISTBASE_FOREACH
diff --git a/source/blender/blenlib/BLI_hash_cxx.h b/source/blender/blenlib/BLI_hash_cxx.h
new file mode 100644
index 00000000000..b9a53f29a04
--- /dev/null
+++ b/source/blender/blenlib/BLI_hash_cxx.h
@@ -0,0 +1,100 @@
+/*
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ */
+
+/** \file
+ * \ingroup bli
+ *
+ * This file provides default hash functions for some primitive types. The hash functions can be
+ * used by containers such as Map and Set.
+ */
+
+#pragma once
+
+#include <functional>
+#include <string>
+#include <utility>
+
+#include "BLI_utildefines.h"
+#include "BLI_math_base.h"
+
+namespace BLI {
+
+template<typename T> struct DefaultHash {
+};
+
+#define TRIVIAL_DEFAULT_INT_HASH(TYPE) \
+  template<> struct DefaultHash<TYPE> { \
+    uint32_t operator()(TYPE value) const \
+    { \
+      return (uint32_t)value; \
+    } \
+  }
+
+/**
+ * Cannot make any assumptions about the distribution of keys, so use a trivial hash function by
+ * default. The hash table implementations are designed to take all bits of the hash into account
+ * to avoid really bad behavior when the lower bits are all zero. Special hash functions can be
+ * implemented when more knowledge about a specific key distribution is available.
+ */
+TRIVIAL_DEFAULT_INT_HASH(int8_t);
+TRIVIAL_DEFAULT_INT_HASH(uint8_t);
+TRIVIAL_DEFAULT_INT_HASH(int16_t);
+TRIVIAL_DEFAULT_INT_HASH(uint16_t);
+TRIVIAL_DEFAULT_INT_HASH(int32_t);
+TRIVIAL_DEFAULT_INT_HASH(uint32_t);
+TRIVIAL_DEFAULT_INT_HASH(int64_t);
+
+template<> struct DefaultHash<float> {
+  uint32_t operator()(float value) const
+  {
+    return *(uint32_t *)&value;
+  }
+};
+
+template<> struct DefaultHash<std::string> {
+  uint32_t operator()(const std::string &value) const
+  {
+    uint32_t hash = 5381;
+    for (char c : value) {
+      hash = hash * 33 + c;
+    }
+    return hash;
+  }
+};
+
+/**
+ * While we cannot guarantee that the lower 3 bits or a pointer are zero, it is safe to assume
+ * this in the general case. MEM_malloc only returns 8 byte aligned addresses on 64-bit systems.
+ */
+template<typename T> struct DefaultHash<T *> {
+  uint32_t operator()(const T *value) const
+  {
+    uintptr_t ptr = POINTER_AS_UINT(value);
+    uint32_t hash = (uint32_t)(ptr >> 3);
+    return hash;
+  }
+};
+
+template<typename T1, typename T2> struct DefaultHash<std::pair<T1, T2>> {
+  uint32_t operator()(const std::pair<T1, T2> &value) const
+  {
+    uint32_t hash1 = DefaultHash<T1>{}(value.first);
+    uint32_t hash2 = DefaultHash<T2>{}(value.second);
+    return hash1 ^ (hash2 * 33);
+  }
+};
+
+}  // namespace BLI
diff --git a/source/blender/blenlib/BLI_map.h b/source/blender/blenlib/BLI_map.h
new file mode 100644
index 00000000000..5328dac1106
--- /dev/null
+++ b/source/blender/blenlib/BLI_map.h
@@ -0,0 +1,596 @@
+/*
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ */
+
+/** \file
+ * \ingroup bli
+ *
+ * This file provides a map implementation that uses open addressing with probing.
+ *
+ * The key and value objects are stored directly in the hash table to avoid indirect memory
+ * lookups. Keys and values are stored in groups of four to avoid wasting memory due to padding.
+ */
+
+#pragma once
+
+#include "BLI_hash_cxx.h"
+#include "BLI_array_ref.h"
+#include "BLI_open_addressing.h"
+
+namespace BLI {
+
+// clang-format off
+
+#define ITER_SLOTS_BEGIN(KEY, ARRAY, OPTIONAL_CONST, R_ITEM, R_OFFSET) \
+  uint32_t hash = DefaultHash<KeyT>{}(KEY); \
+  uint32_t perturb = hash; \
+  while (true) { \
+    uint32_t item_index = (hash & ARRAY.slot_mask()) >> OFFSET_SHIFT; \
+    uint8_t R_OFFSET = hash & OFFSET_MASK; \
+    uint8_t initial_offset = R_OFFSET; \
+    OPTIONAL_CONST Item &R_ITEM = ARRAY.item(item_index); \
+    do {
+
+#define ITER_SLOTS_END(R_OFFSET) \
+      R_OFFSET = (R_OFFSET + 1) & OFFSET_MASK; \
+    } while (R_OFFSET != initial_offset); \
+    perturb >>= 5; \
+    hash = hash * 5 + 1 + perturb; \
+  } ((void)0)
+
+// clang-format on
+
+template<typename KeyT, typename ValueT, typename Allocator = GuardedAllocator> class Map {
+ private:
+  static constexpr uint OFFSET_MASK = 3;
+  static constexpr uint OFFSET_SHIFT = 2;
+
+  class Item {
+   private:
+    static constexpr uint8_t IS_EMPTY = 0;
+    static constexpr uint8_t IS_SET = 1;
+    static constexpr uint8_t IS_DUMMY = 2;
+
+    uint8_t m_status[4];
+    char m_keys[4 * sizeof(KeyT)];
+    char m_values[4 * sizeof(ValueT)];
+
+   public:
+    static constexpr uint slots_per_item = 4;
+
+    Item()
+    {
+      for (uint offset = 0; offset < 4; offset++) {
+        m_status[offset] = IS_EMPTY;
+      }
+    }
+
+    ~Item()
+    {
+      for (uint offset = 0; offset < 4; offset++) {
+        if (m_status[offset] == IS_SET) {
+          this->key(offset)->~KeyT();
+          this->value(offset)->~ValueT();
+        }
+      }
+    }
+
+    Item(const Item &other)
+    {
+      for (uint offset = 0; offset < 4; offset++) {
+        uint8_t status = other.m_status[offset];
+        m_status[offset] = status;
+        if (status == IS_SET) {
+          new (this->key(offset)) KeyT(*other.key(offset));
+          new (this->value(offset)) ValueT(*other.value(offset));
+        }
+      }
+    }
+
+    Item(Item &&other) noexcept
+    {
+      for (uint offset = 0; offset < 4; offset++) {
+        uint8_t status = other.m_status[offset];
+        m_status[offset] = status;
+        if (status == IS_SET) {
+          new (this->key(offset)) KeyT(std::move(*other.key(offset)));
+          new (this->value(offset)) ValueT(std::move(*other.value(offset)));
+        }
+      }
+    }
+
+    bool has_key(uint offset, const KeyT &key) const
+    {
+      return m_status[offset] == IS_SET && key == *this->key(offset);
+    }
+
+    bool is_set(uint offset) const
+    {
+      return m_status[offset] == IS_SET;
+    }
+
+    bool is_empty(uint offset) const
+    {
+      return m_status[offset] == IS_EMPTY;
+    }
+
+    KeyT *key(uint offset) const
+    {
+      return (KeyT *)(m_keys + offset * sizeof(KeyT));
+    }
+
+    ValueT *value(uint offset) const
+    {
+      return (ValueT *)(m_values + offset * sizeof(ValueT));
+    }
+
+    void copy_in(uint offset, const KeyT &key, const ValueT &value)
+    {
+      BLI_assert(m_status[offset] != IS_SET);
+      m_status[offset] = IS_SET;
+      new (this->key(offset)) KeyT(key);
+      new (this->value(offset)) ValueT(value);
+    }
+
+    void move_in(uint offset, KeyT &key, ValueT &value)
+    {
+      BLI_assert(m_status[offset] != IS_SET);
+      m_status[offset] = IS_SET;
+      new (this->key(offset)) KeyT(std::move(key));
+      new (this->value(offset)) ValueT(std::move(value));
+    }
+
+    void set_dummy(uint offset)
+    {
+      BLI_assert(m_status[offset] == IS_SET);
+      m_status[offset] = IS_DUMMY;
+      destruct(this->key(offset));
+      destruct(this->value(offset));
+    }
+  };
+
+  using ArrayType = OpenAddressingArray<Item, 1, Allocator>;
+  ArrayType m_array;
+
+ public:
+  Map() = default;
+
+  /**
+   * Insert a new key-value-pair in the map.
+   * Asserts when the key existed before.
+   */
+  void add_new(const KeyT &key, const ValueT &value)
+  {
+    BLI_assert(!this->contains(key));
+    this->ensure_can_add();
+
+    ITER_SLOTS_BEGIN (key, m_array, , item, offset) {
+      if (item.is_empty(offset)) {
+        item.copy_in(offset, key, value);
+        m_array.update__empty_to_set();
+        return;
+      }
+    }
+    ITER_SLOTS_END(offset);
+  }
+
+  /**
+   * Insert a new key-value-pair in the map if the key does not exist yet.
+   * Returns true when the pair was newly inserted, otherwise false.
+   */
+  bool add(const KeyT &key, const ValueT &value)
+  {
+    this->ensure_can_add();
+
+    ITER_SLOTS_BEGIN (key, m_array, , item, offset) {
+      if (item.is_empty(offset)) {
+        item.copy_in(offset, key, value);
+        m_array.update__empty_to_set();
+        return true;
+      }
+      else if (item.has_key(offset, key)) {
+        return false;
+      }
+    }
+    ITER_SLOTS_END(offset);
+  }
+
+  /**
+   * Remove the key from the map.
+   * Asserts when the key does not exist in the map.
+   */
+  void remove(const KeyT &key)
+  {
+    BLI_assert(this->contains(key));
+    ITER_SLOTS_BEGIN (key, m_array, , item, offset) {
+      if (item.has_key(offset, key)) {
+        item.set_dummy(offset);
+        m_array.update__set_to_dummy();
+        return;
+      }
+    }
+    ITER_SLOTS_END(offset);
+  }
+
+  /**
+   * Get the value for the given key and remove it from the map.
+   * Asserts when the

@@ Diff output truncated at 10240 characters. @@



More information about the Bf-blender-cvs mailing list