[Bf-blender-cvs] [42c58fe8408] functions: support allocating larger buffers with temporary allocator
Jacques Lucke
noreply at git.blender.org
Wed Aug 28 18:01:25 CEST 2019
Commit: 42c58fe8408d961271a670b05b4017d2734ac674
Author: Jacques Lucke
Date: Wed Aug 28 16:29:30 2019 +0200
Branches: functions
https://developer.blender.org/rB42c58fe8408d961271a670b05b4017d2734ac674
support allocating larger buffers with temporary allocator
===================================================================
M source/blender/blenlib/BLI_temporary_allocator.h
M source/blender/blenlib/intern/BLI_temporary_allocator.cpp
===================================================================
diff --git a/source/blender/blenlib/BLI_temporary_allocator.h b/source/blender/blenlib/BLI_temporary_allocator.h
index 100304aba3f..b378e5869c0 100644
--- a/source/blender/blenlib/BLI_temporary_allocator.h
+++ b/source/blender/blenlib/BLI_temporary_allocator.h
@@ -17,19 +17,30 @@
/** \file
* \ingroup bli
*
- * This allocation method should be used when a chunk of memory is only used for a short amount
- * of time. This makes it possible to cache potentially large buffers for reuse, without the fear
- * of running out of memory because too many large buffers are allocated.
+ * This allocation method assumes
+ * 1. The allocations are short-lived.
+ * 2. The total number of allocations is bound by a constant per thread.
+ *
+ * These two assumptions make it possible to cache and reuse relatively large buffers. They allow
+ * to hand out buffers that are much larger than the requested size, without the fear of running
+ * out of memory.
+ *
+ * The assumptions might feel a bit limiting at first, but hold true in many cases. For example,
+ * many algorithms need to store temporary data. With this allocator, the allocation can become
+ * very cheap for common cases.
*
* Many cpu-bound algorithms can benefit from being split up into several stages, whereby the
- * output of one stage is written into an array that is read by the next stage. This improves
- * debugability as well as profilability. Often a reason this is not done is that the memory
+ * output of one stage is written into an array that is read by the next stage. This makes them
+ * easier to debug, profile and optimize. Often a reason this is not done is that the memory
* allocation might be expensive. The goal of this allocator is to make this a non-issue, by
* reusing the same long buffers over and over again.
*
- * The number of allocated buffers should stay in O(number of threads * max depth of stack trace).
- * Since these numbers are pretty much constant in Blender, the number of chunks allocated should
- * not increase over time.
+ * All allocated buffers are 64 byte aligned, to make them as reusable as possible.
+ * If the requested size is too large, there is a fallback to normal allocation. The allocation
+ * overhead is probably very small in these cases anyway.
+ *
+ * The best way to use this allocator is to use one of the prepared containers like TemporaryVector
+ * and TemporaryArray.
*/
#ifndef __BLI_TEMPORARY_ALLOCATOR_H__
@@ -41,6 +52,8 @@
extern "C" {
#endif
+#define BLI_TEMPORARY_BUFFER_ALIGNMENT 64
+
void *BLI_temporary_allocate(uint size);
void BLI_temporary_deallocate(void *buffer);
diff --git a/source/blender/blenlib/intern/BLI_temporary_allocator.cpp b/source/blender/blenlib/intern/BLI_temporary_allocator.cpp
index 68025e00759..03260b95255 100644
--- a/source/blender/blenlib/intern/BLI_temporary_allocator.cpp
+++ b/source/blender/blenlib/intern/BLI_temporary_allocator.cpp
@@ -22,7 +22,9 @@
using namespace BLI;
+constexpr uint ALIGNMENT = BLI_TEMPORARY_BUFFER_ALIGNMENT;
constexpr uint SMALL_BUFFER_SIZE = 64 * 1024;
+constexpr uintptr_t ALIGNMENT_MASK = ~(uintptr_t)(ALIGNMENT - 1);
struct ThreadLocalBuffers {
Stack<void *, 32, RawAllocator> buffers;
@@ -37,23 +39,71 @@ struct ThreadLocalBuffers {
thread_local ThreadLocalBuffers local_storage;
-void *BLI_temporary_allocate(uint size)
+enum TemporaryBufferType {
+ Small,
+ Large,
+};
+
+struct MemHead {
+ void *raw_ptr;
+ TemporaryBufferType type;
+};
+
+static MemHead &get_memhead(void *aligned_ptr)
+{
+ return *((MemHead *)aligned_ptr - 1);
+}
+
+static void *raw_allocate(uint size)
{
- BLI_assert(size <= SMALL_BUFFER_SIZE);
- UNUSED_VARS_NDEBUG(size);
+ uint total_allocation_size = size + ALIGNMENT + sizeof(MemHead);
- auto &buffers = local_storage.buffers;
- if (buffers.empty()) {
- void *ptr = RawAllocator().allocate_aligned(SMALL_BUFFER_SIZE, 64, __func__);
- return ptr;
+ uintptr_t raw_ptr = (uintptr_t)malloc(total_allocation_size);
+ uintptr_t aligned_ptr = (raw_ptr + ALIGNMENT + sizeof(MemHead)) & ALIGNMENT_MASK;
+
+ MemHead &memhead = get_memhead((void *)aligned_ptr);
+ memhead.raw_ptr = (void *)raw_ptr;
+ return (void *)aligned_ptr;
+}
+
+static void raw_deallocate(void *ptr)
+{
+ BLI_assert(((uintptr_t)ptr & ~ALIGNMENT_MASK) == 0);
+ MemHead &memhead = get_memhead(ptr);
+ void *raw_ptr = memhead.raw_ptr;
+ free(raw_ptr);
+}
+
+void *BLI_temporary_allocate(uint size)
+{
+ if (size <= SMALL_BUFFER_SIZE) {
+ auto &buffers = local_storage.buffers;
+ if (buffers.empty()) {
+ void *ptr = raw_allocate(SMALL_BUFFER_SIZE);
+ MemHead &memhead = get_memhead(ptr);
+ memhead.type = TemporaryBufferType::Small;
+ return ptr;
+ }
+ else {
+ return buffers.pop();
+ }
}
else {
- return buffers.pop();
+ void *ptr = raw_allocate(size);
+ MemHead &memhead = get_memhead(ptr);
+ memhead.type = TemporaryBufferType::Large;
+ return ptr;
}
}
void BLI_temporary_deallocate(void *buffer)
{
- auto &buffers = local_storage.buffers;
- buffers.push(buffer);
+ MemHead &memhead = get_memhead(buffer);
+ if (memhead.type == TemporaryBufferType::Small) {
+ auto &buffers = local_storage.buffers;
+ buffers.push(buffer);
+ }
+ else {
+ raw_deallocate(buffer);
+ }
}
More information about the Bf-blender-cvs
mailing list