[Bf-blender-cvs] [42c58fe8408] functions: support allocating larger buffers with temporary allocator

Jacques Lucke noreply at git.blender.org
Wed Aug 28 18:01:25 CEST 2019


Commit: 42c58fe8408d961271a670b05b4017d2734ac674
Author: Jacques Lucke
Date:   Wed Aug 28 16:29:30 2019 +0200
Branches: functions
https://developer.blender.org/rB42c58fe8408d961271a670b05b4017d2734ac674

support allocating larger buffers with temporary allocator

===================================================================

M	source/blender/blenlib/BLI_temporary_allocator.h
M	source/blender/blenlib/intern/BLI_temporary_allocator.cpp

===================================================================

diff --git a/source/blender/blenlib/BLI_temporary_allocator.h b/source/blender/blenlib/BLI_temporary_allocator.h
index 100304aba3f..b378e5869c0 100644
--- a/source/blender/blenlib/BLI_temporary_allocator.h
+++ b/source/blender/blenlib/BLI_temporary_allocator.h
@@ -17,19 +17,30 @@
 /** \file
  * \ingroup bli
  *
- * This allocation method should be used when a chunk of memory is only used for a short amount
- * of time. This makes it possible to cache potentially large buffers for reuse, without the fear
- * of running out of memory because too many large buffers are allocated.
+ * This allocation method assumes
+ *   1. The allocations are short-lived.
+ *   2. The total number of allocations is bound by a constant per thread.
+ *
+ * These two assumptions make it possible to cache and reuse relatively large buffers. They allow
+ * to hand out buffers that are much larger than the requested size, without the fear of running
+ * out of memory.
+ *
+ * The assumptions might feel a bit limiting at first, but hold true in many cases. For example,
+ * many algorithms need to store temporary data. With this allocator, the allocation can become
+ * very cheap for common cases.
  *
  * Many cpu-bound algorithms can benefit from being split up into several stages, whereby the
- * output of one stage is written into an array that is read by the next stage. This improves
- * debugability as well as profilability. Often a reason this is not done is that the memory
+ * output of one stage is written into an array that is read by the next stage. This makes them
+ * easier to debug, profile and optimize. Often a reason this is not done is that the memory
  * allocation might be expensive. The goal of this allocator is to make this a non-issue, by
  * reusing the same long buffers over and over again.
  *
- * The number of allocated buffers should stay in O(number of threads * max depth of stack trace).
- * Since these numbers are pretty much constant in Blender, the number of chunks allocated should
- * not increase over time.
+ * All allocated buffers are 64 byte aligned, to make them as reusable as possible.
+ * If the requested size is too large, there is a fallback to normal allocation. The allocation
+ * overhead is probably very small in these cases anyway.
+ *
+ * The best way to use this allocator is to use one of the prepared containers like TemporaryVector
+ * and TemporaryArray.
  */
 
 #ifndef __BLI_TEMPORARY_ALLOCATOR_H__
@@ -41,6 +52,8 @@
 extern "C" {
 #endif
 
+#define BLI_TEMPORARY_BUFFER_ALIGNMENT 64
+
 void *BLI_temporary_allocate(uint size);
 void BLI_temporary_deallocate(void *buffer);
 
diff --git a/source/blender/blenlib/intern/BLI_temporary_allocator.cpp b/source/blender/blenlib/intern/BLI_temporary_allocator.cpp
index 68025e00759..03260b95255 100644
--- a/source/blender/blenlib/intern/BLI_temporary_allocator.cpp
+++ b/source/blender/blenlib/intern/BLI_temporary_allocator.cpp
@@ -22,7 +22,9 @@
 
 using namespace BLI;
 
+constexpr uint ALIGNMENT = BLI_TEMPORARY_BUFFER_ALIGNMENT;
 constexpr uint SMALL_BUFFER_SIZE = 64 * 1024;
+constexpr uintptr_t ALIGNMENT_MASK = ~(uintptr_t)(ALIGNMENT - 1);
 
 struct ThreadLocalBuffers {
   Stack<void *, 32, RawAllocator> buffers;
@@ -37,23 +39,71 @@ struct ThreadLocalBuffers {
 
 thread_local ThreadLocalBuffers local_storage;
 
-void *BLI_temporary_allocate(uint size)
+enum TemporaryBufferType {
+  Small,
+  Large,
+};
+
+struct MemHead {
+  void *raw_ptr;
+  TemporaryBufferType type;
+};
+
+static MemHead &get_memhead(void *aligned_ptr)
+{
+  return *((MemHead *)aligned_ptr - 1);
+}
+
+static void *raw_allocate(uint size)
 {
-  BLI_assert(size <= SMALL_BUFFER_SIZE);
-  UNUSED_VARS_NDEBUG(size);
+  uint total_allocation_size = size + ALIGNMENT + sizeof(MemHead);
 
-  auto &buffers = local_storage.buffers;
-  if (buffers.empty()) {
-    void *ptr = RawAllocator().allocate_aligned(SMALL_BUFFER_SIZE, 64, __func__);
-    return ptr;
+  uintptr_t raw_ptr = (uintptr_t)malloc(total_allocation_size);
+  uintptr_t aligned_ptr = (raw_ptr + ALIGNMENT + sizeof(MemHead)) & ALIGNMENT_MASK;
+
+  MemHead &memhead = get_memhead((void *)aligned_ptr);
+  memhead.raw_ptr = (void *)raw_ptr;
+  return (void *)aligned_ptr;
+}
+
+static void raw_deallocate(void *ptr)
+{
+  BLI_assert(((uintptr_t)ptr & ~ALIGNMENT_MASK) == 0);
+  MemHead &memhead = get_memhead(ptr);
+  void *raw_ptr = memhead.raw_ptr;
+  free(raw_ptr);
+}
+
+void *BLI_temporary_allocate(uint size)
+{
+  if (size <= SMALL_BUFFER_SIZE) {
+    auto &buffers = local_storage.buffers;
+    if (buffers.empty()) {
+      void *ptr = raw_allocate(SMALL_BUFFER_SIZE);
+      MemHead &memhead = get_memhead(ptr);
+      memhead.type = TemporaryBufferType::Small;
+      return ptr;
+    }
+    else {
+      return buffers.pop();
+    }
   }
   else {
-    return buffers.pop();
+    void *ptr = raw_allocate(size);
+    MemHead &memhead = get_memhead(ptr);
+    memhead.type = TemporaryBufferType::Large;
+    return ptr;
   }
 }
 
 void BLI_temporary_deallocate(void *buffer)
 {
-  auto &buffers = local_storage.buffers;
-  buffers.push(buffer);
+  MemHead &memhead = get_memhead(buffer);
+  if (memhead.type == TemporaryBufferType::Small) {
+    auto &buffers = local_storage.buffers;
+    buffers.push(buffer);
+  }
+  else {
+    raw_deallocate(buffer);
+  }
 }



More information about the Bf-blender-cvs mailing list