[Bf-blender-cvs] [53b60ee] master: Add BLI_array_store copy-on-write API

Campbell Barton noreply at git.blender.org
Mon May 30 08:17:21 CEST 2016


Commit: 53b60eed45098cdb5e0a6f4a05e242f5d6524635
Author: Campbell Barton
Date:   Mon May 30 15:25:36 2016 +1000
Branches: master
https://developer.blender.org/rB53b60eed45098cdb5e0a6f4a05e242f5d6524635

Add BLI_array_store copy-on-write API

This supported in-memory de-duplication,
useful to avoid in-efficient memory use when storing multiple, similar arrays.

===================================================================

A	source/blender/blenlib/BLI_array_store.h
M	source/blender/blenlib/CMakeLists.txt
A	source/blender/blenlib/intern/array_store.c

===================================================================

diff --git a/source/blender/blenlib/BLI_array_store.h b/source/blender/blenlib/BLI_array_store.h
new file mode 100644
index 0000000..f4cbc07
--- /dev/null
+++ b/source/blender/blenlib/BLI_array_store.h
@@ -0,0 +1,66 @@
+/*
+ * ***** BEGIN GPL LICENSE BLOCK *****
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ *
+ * ***** END GPL LICENSE BLOCK *****
+ */
+
+#ifndef __BLI_ARRAY_STORE_H__
+#define __BLI_ARRAY_STORE_H__
+
+/** \file BLI_array_store.h
+ *  \ingroup bli
+ *  \brief Efficient in-memory storage of multiple similar arrays.
+ */
+
+typedef struct BArrayStore BArrayStore;
+typedef struct BArrayState BArrayState;
+
+BArrayStore *BLI_array_store_create(
+        unsigned int stride, unsigned int chunk_count);
+void BLI_array_store_destroy(
+        BArrayStore *bs);
+void BLI_array_store_clear(
+        BArrayStore *bs);
+
+/* find the memory used by all states (expanded & real) */
+size_t BLI_array_store_calc_size_expanded_get(
+        const BArrayStore *bs);
+size_t BLI_array_store_calc_size_compacted_get(
+        const BArrayStore *bs);
+
+BArrayState *BLI_array_store_state_add(
+        BArrayStore *bs,
+        const void *data, const size_t data_len,
+        const BArrayState *state_reference);
+void BLI_array_store_state_remove(
+        BArrayStore *bs,
+        BArrayState *state);
+
+size_t BLI_array_store_state_size_get(
+        BArrayState *state);
+void BLI_array_store_state_data_get(
+        BArrayState *state,
+        void *data);
+void *BLI_array_store_state_data_get_alloc(
+        BArrayState *state,
+        size_t *r_data_len);
+
+/* only for tests */
+bool BLI_array_store_is_valid(
+        BArrayStore *bs);
+
+#endif  /* __BLI_ARRAY_STORE_H__ */
diff --git a/source/blender/blenlib/CMakeLists.txt b/source/blender/blenlib/CMakeLists.txt
index 944ba60..42d9587 100644
--- a/source/blender/blenlib/CMakeLists.txt
+++ b/source/blender/blenlib/CMakeLists.txt
@@ -52,6 +52,7 @@ set(SRC
 	intern/BLI_memarena.c
 	intern/BLI_mempool.c
 	intern/DLRB_tree.c
+	intern/array_store.c
 	intern/array_utils.c
 	intern/astar.c
 	intern/boxpack2d.c
@@ -120,6 +121,7 @@ set(SRC
 	BLI_alloca.h
 	BLI_args.h
 	BLI_array.h
+	BLI_array_store.h
 	BLI_array_utils.h
 	BLI_astar.h
 	BLI_bitmap.h
diff --git a/source/blender/blenlib/intern/array_store.c b/source/blender/blenlib/intern/array_store.c
new file mode 100644
index 0000000..8cbabdd
--- /dev/null
+++ b/source/blender/blenlib/intern/array_store.c
@@ -0,0 +1,1731 @@
+/*
+ * ***** BEGIN GPL LICENSE BLOCK *****
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ *
+ * ***** END GPL LICENSE BLOCK *****
+ */
+
+/** \file blender/blenlib/intern/array_store.c
+ *  \ingroup bli
+ *  \brief Array storage to minimize duplication.
+ *
+ * This is done by splitting arrays into chunks and using copy-on-write (COW),
+ * to de-duplicate chunks,
+ * from the users perspective this is an implementation detail.
+ *
+ *
+ * Overview
+ * ========
+ *
+ *
+ * Data Structure
+ * --------------
+ *
+ * This diagram is an overview of the structure of a single array-store.
+ *
+ * \note The only 2 structues here which are referenced externally are the.
+ *
+ * - BArrayStore: The whole array store.
+ * - BArrayState: Represents a single state (array) of data.
+ *   These can be add using a reference state, while this could be considered the previous or parent state.
+ *   no relationship is kept, so the caller is free to add any state from the same BArrayStore as a reference.
+ *
+ * <pre>
+ * <+> BArrayStore: root data-structure,
+ *  |  can store many 'states', which share memory.
+ *  |
+ *  |  This can store many arrays, however they must share the same 'stride'.
+ *  |  Arrays of different types will need to use a new BArrayStore.
+ *  |
+ *  +- <+> states (Collection of BArrayState's):
+ *  |   |  Each represents an array added by the user of this API.
+ *  |   |  and references a chunk_list (each state is a chunk_list user).
+ *  |   |  Note that the list order has no significance.
+ *  |   |
+ *  |   +- <+> chunk_list (BChunkList):
+ *  |       |  The chunks that make up this state.
+ *  |       |  Each state is a chunk_list user,
+ *  |       |  avoids duplicating lists when there is no change between states.
+ *  |       |
+ *  |       +- chunk_refs (List of BChunkRef): Each chunk_ref links to a a BChunk.
+ *  |          Each reference is a chunk user,
+ *  |          avoids duplicating smaller chunks of memory found in multiple states.
+ *  |
+ *  +- info (BArrayInfo):
+ *  |  Sizes and offsets for this array-store.
+ *  |  Also caches some variables for reuse.
+ *  |
+ *  +- <+> memory (BArrayMemory):
+ *      |  Memory pools for storing BArrayStore data.
+ *      |
+ *      +- chunk_list (Pool of BChunkList):
+ *      |  All chunk_lists, (reference counted, used by BArrayState).
+ *      |
+ *      +- chunk_ref (Pool of BChunkRef):
+ *      |  All chunk_refs (link between BChunkList & BChunk).
+ *      |
+ *      +- chunks (Pool of BChunk):
+ *         All chunks, (reference counted, used by BChunkList).
+ *         These have their headers hashed for reuse so we can quickly check for duplicates.
+ * </pre>
+ *
+ *
+ * De-Duplication
+ * --------------
+ *
+ * When creating a new state, a previous state can be given as a reference,
+ * matching chunks from this state are re-used in the new state.
+ *
+ * First matches at either end of the array are detected.
+ * For identical arrays this is all thats needed.
+ *
+ * De-duplication is performed on any remaining chunks, by hasing the first few bytes of the chunk
+ * (see: BCHUNK_HASH_TABLE_ACCUMULATE_STEPS).
+ *
+ * \note This is cached for reuse since the referenced data never changes.
+ *
+ * An array is created to store hash values at every 'stride',
+ * then stepped over to search for matching chunks.
+ *
+ * Once a match is found, there is a high chance next chunks match too,
+ * so this is checked to avoid performing so many hash-lookups.
+ * Otherwise new chunks are created.
+ */
+
+#include <stdlib.h>
+#include <string.h>
+
+#include "MEM_guardedalloc.h"
+
+#include "BLI_listbase.h"
+#include "BLI_mempool.h"
+
+#include "BLI_strict_flags.h"
+
+#include "BLI_array_store.h"  /* own include */
+
+/* only for BLI_array_store_is_valid */
+#include "BLI_ghash.h"
+
+/** \name Defines
+ *
+ * Some of the logic for merging is quite involved,
+ * support disabling some parts of this.
+ * \{ */
+
+/* Scan first chunks (happy path when beginning of the array matches).
+ * When the array is a perfect match, we can re-use the entire list.
+ *
+ * Note that disabling makes some tests fail that check for output-size.
+ */
+#define USE_FASTPATH_CHUNKS_FIRST
+
+/* Scan last chunks (happy path when end of the array matches).
+ * When the end of the array matches, we can quickly add these chunks.
+ * note that we will add contiguous matching chunks
+ * so this isn't as useful as USE_FASTPATH_CHUNKS_FIRST,
+ * however it avoids adding matching chunks into the lookup table,
+ * so creating the lookup table won't be as expensive.
+ */
+#ifdef USE_FASTPATH_CHUNKS_FIRST
+#  define USE_FASTPATH_CHUNKS_LAST
+#endif
+
+/* For arrays of matching length, test that *enough* of the chunks are aligned,
+ * and simply step over both arrays, using matching chunks.
+ * This avoids overhead of using a lookup table for cases when we can assume they're mostly aligned.
+ */
+#define USE_ALIGN_CHUNKS_TEST
+
+/* Accumulate hashes from right to left so we can create a hash for the chunk-start.
+ * This serves to increase uniqueness and will help when there is many values which are the same.
+ */
+#define USE_HASH_TABLE_ACCUMULATE
+
+#ifdef USE_HASH_TABLE_ACCUMULATE
+/* Number of times to propagate hashes back.
+ * Effectively a 'triangle-number'.
+ * so 4 -> 7, 5 -> 10, 6 -> 15... etc.
+ */
+#  define BCHUNK_HASH_TABLE_ACCUMULATE_STEPS 4
+#else
+/* How many items to hash (multiplied by stride)
+ */
+#  define BCHUNK_HASH_LEN 4
+#endif
+
+/* Calculate the key once and reuse it
+ */
+#define USE_HASH_TABLE_KEY_CACHE
+#ifdef USE_HASH_TABLE_KEY_CACHE
+#  define HASH_TABLE_KEY_UNSET    ((uint64_t)-1)
+#  define HASH_TABLE_KEY_FALLBACK ((uint64_t)-2)
+#endif
+
+/* How much smaller the table is then the total number of steps.
+ */
+#define BCHUNK_HASH_TABLE_DIV 16
+
+/* Merge too small/large chunks:
+ *
+ * Using this means chunks below a threshold will be merged together.
+ * Even though short term this uses more memory,
+ * long term the overhead of maintaining many small chunks is reduced.
+ * This is defined by setting the minimum chunk size (as a fraction of the regular chunk size).
+ *
+ * Chunks may also become too large (when incrementally growing an array),
+ * this also enables chunk splitting.
+ */
+#define USE_MERGE_CHUNKS
+
+#ifdef USE_MERGE_CHUNKS
+/* Merge chunks smaller then: (chunk_size / BCHUNK_MIN_SIZE_DIV)
+ */
+#  define BCHUNK_S

@@ Diff output truncated at 10240 characters. @@




More information about the Bf-blender-cvs mailing list