[Bf-blender-cvs] [3b8743d] temp-ghash-experiments: Making hash storage optional.

Bastien Montagne noreply at git.blender.org
Fri Mar 6 14:06:42 CET 2015


Commit: 3b8743d790867b43267a3e527fd27e9a072ef46b
Author: Bastien Montagne
Date:   Fri Mar 6 14:00:29 2015 +0100
Branches: temp-ghash-experiments
https://developer.blender.org/rB3b8743d790867b43267a3e527fd27e9a072ef46b

Making hash storage optional.

This required quite a bit of work, but storing hashes for 'simple' very quick
hashing/comparison functions is stupid (loss of space, and even introduces a
very small slow down), while with e.g. strings (which comparison and hasing
is quite expansive), it gives huge speedup (twice quicker insertion, about
33% quicker on lookup).

This makes lower-level code a bit less clear, since we add one 'child type' of Entry
for each case (GHash/GSet, and storing hash or not), but this also has the benefit
of being stricter when accessing entries' members...

===================================================================

M	source/blender/blenkernel/intern/cdderivedmesh.c
M	source/blender/blenkernel/intern/treehash.c
M	source/blender/blenlib/BLI_ghash.h
M	source/blender/blenlib/intern/BLI_ghash.c
M	source/blender/bmesh/tools/bmesh_region_match.c
M	source/blender/editors/mesh/meshtools.c

===================================================================

diff --git a/source/blender/blenkernel/intern/cdderivedmesh.c b/source/blender/blenkernel/intern/cdderivedmesh.c
index 2554151..3cb728a 100644
--- a/source/blender/blenkernel/intern/cdderivedmesh.c
+++ b/source/blender/blenkernel/intern/cdderivedmesh.c
@@ -2568,7 +2568,7 @@ DerivedMesh *CDDM_merge_verts(DerivedMesh *dm, const int *vtargetmap, const int
 		/* This poly equality check is rather complex.   We use a BLI_ghash to speed it up with a first level check */
 		PolyKey *mpgh;
 		poly_keys = MEM_mallocN(sizeof(PolyKey) * totpoly, __func__);
-		poly_gset = BLI_gset_new_ex(poly_gset_hash_fn, poly_gset_compare_fn, __func__, totpoly);
+		poly_gset = BLI_gset_new_ex(poly_gset_hash_fn, poly_gset_compare_fn, __func__, totpoly, false);
 		/* Duplicates allowed because our compare function is not pure equality */
 		BLI_gset_flag_set(poly_gset, GHASH_FLAG_ALLOW_DUPES);
 
diff --git a/source/blender/blenkernel/intern/treehash.c b/source/blender/blenkernel/intern/treehash.c
index 866502c..7700c2f 100644
--- a/source/blender/blenkernel/intern/treehash.c
+++ b/source/blender/blenkernel/intern/treehash.c
@@ -110,7 +110,7 @@ static void fill_treehash(void *treehash, BLI_mempool *treestore)
 
 void *BKE_treehash_create_from_treestore(BLI_mempool *treestore)
 {
-	GHash *treehash = BLI_ghash_new_ex(tse_hash, tse_cmp, "treehash", BLI_mempool_count(treestore));
+	GHash *treehash = BLI_ghash_new_ex(tse_hash, tse_cmp, "treehash", BLI_mempool_count(treestore), false);
 	fill_treehash(treehash, treestore);
 	return treehash;
 }
diff --git a/source/blender/blenlib/BLI_ghash.h b/source/blender/blenlib/BLI_ghash.h
index 78f8aa3..89dfe98 100644
--- a/source/blender/blenlib/BLI_ghash.h
+++ b/source/blender/blenlib/BLI_ghash.h
@@ -63,7 +63,7 @@ enum {
 /* *** */
 
 GHash *BLI_ghash_new_ex(GHashHashFP hashfp, GHashCmpFP cmpfp, const char *info,
-                        const unsigned int nentries_reserve) ATTR_MALLOC ATTR_WARN_UNUSED_RESULT;
+                        const unsigned int nentries_reserve, const bool store_hash) ATTR_MALLOC ATTR_WARN_UNUSED_RESULT;
 GHash *BLI_ghash_new(GHashHashFP hashfp, GHashCmpFP cmpfp, const char *info) ATTR_MALLOC ATTR_WARN_UNUSED_RESULT;
 GHash *BLI_ghash_copy(GHash *gh, GHashKeyCopyFP keycopyfp,
                       GHashValCopyFP valcopyfp) ATTR_MALLOC ATTR_WARN_UNUSED_RESULT;
@@ -121,7 +121,7 @@ BLI_INLINE void  *BLI_ghashIterator_getValue(GHashIterator *ghi) ATTR_WARN_UNUSE
 BLI_INLINE void **BLI_ghashIterator_getValue_p(GHashIterator *ghi) ATTR_WARN_UNUSED_RESULT;
 BLI_INLINE bool   BLI_ghashIterator_done(GHashIterator *ghi) ATTR_WARN_UNUSED_RESULT;
 
-struct _gh_Entry { void *next; unsigned int hash; void *key, *val; };
+struct _gh_Entry { void *next, *key, *val; };
 BLI_INLINE void  *BLI_ghashIterator_getKey(GHashIterator *ghi)     { return  ((struct _gh_Entry *)ghi->curEntry)->key; }
 BLI_INLINE void  *BLI_ghashIterator_getValue(GHashIterator *ghi)   { return  ((struct _gh_Entry *)ghi->curEntry)->val; }
 BLI_INLINE void **BLI_ghashIterator_getValue_p(GHashIterator *ghi) { return &((struct _gh_Entry *)ghi->curEntry)->val; }
@@ -234,7 +234,7 @@ typedef struct GSetIterator {
 } GSetIterator;
 
 GSet  *BLI_gset_new_ex(GSetHashFP hashfp, GSetCmpFP cmpfp, const char *info,
-                       const unsigned int nentries_reserve) ATTR_MALLOC ATTR_WARN_UNUSED_RESULT;
+                       const unsigned int nentries_reserve, const bool store_hash) ATTR_MALLOC ATTR_WARN_UNUSED_RESULT;
 GSet  *BLI_gset_new(GSetHashFP hashfp, GSetCmpFP cmpfp, const char *info) ATTR_MALLOC ATTR_WARN_UNUSED_RESULT;
 GSet  *BLI_gset_copy(GSet *gs, GSetKeyCopyFP keycopyfp) ATTR_MALLOC ATTR_WARN_UNUSED_RESULT;
 int    BLI_gset_size(GSet *gs) ATTR_WARN_UNUSED_RESULT;
diff --git a/source/blender/blenlib/intern/BLI_ghash.c b/source/blender/blenlib/intern/BLI_ghash.c
index 4963805..ff5c20f 100644
--- a/source/blender/blenlib/intern/BLI_ghash.c
+++ b/source/blender/blenlib/intern/BLI_ghash.c
@@ -49,6 +49,7 @@
 #include "BLI_strict_flags.h"
 
 #define GHASH_USE_MODULO_BUCKETS
+#define GHASH_STORE_HASH store_hash  /* For testing only (allows forcing true/false), will be removed before master! */
 
 /* Also used by smallhash! */
 const unsigned int hashsizes[] = {
@@ -65,16 +66,6 @@ const unsigned int hashsizes[] = {
 #  define GHASH_BUCKET_BIT_MAX 28  /* About 268M of buckets... */
 #endif
 
-/* internal flag to ensure sets values aren't used */
-#ifndef NDEBUG
-#  define GHASH_FLAG_IS_SET (1 << 8)
-#  define IS_GHASH_ASSERT(gh) BLI_assert((gh->flag & GHASH_FLAG_IS_SET) == 0)
-// #  define IS_GSET_ASSERT(gs) BLI_assert((gs->flag & GHASH_FLAG_IS_SET) != 0)
-#else
-#  define IS_GHASH_ASSERT(gh)
-// #  define IS_GSET_ASSERT(eh)
-#endif
-
 #define GHASH_LIMIT_GROW(_nbkt) ((_nbkt) * 3) / 4
 #define GHASH_LIMIT_SHRINK(_nbkt) ((_nbkt) * 3) / 16
 
@@ -84,24 +75,34 @@ const unsigned int hashsizes[] = {
 typedef struct Entry {
 	struct Entry *next;
 
-	unsigned int hash;
 	void *key;
-	void *val;  /* This pointer ***must*** remain the last one, since it is 'virtually removed' for gset. */
 } Entry;
 
-#define GHASH_ENTRY_SIZE sizeof(Entry)
-#define GSET_ENTRY_SIZE sizeof(Entry) - sizeof(void *)
+typedef struct GHashEntry {
+	Entry e;
 
-BLI_INLINE void entry_copy(
-        Entry *dst, Entry *src, const size_t entry_size, GHashKeyCopyFP keycopyfp, GHashValCopyFP valcopyfp)
-{
-	const size_t start = offsetof(Entry, hash);
-	const size_t size = entry_size - start;
+	void *val;
+} GHashEntry;
 
-	memcpy(((char *)dst) + start, ((char *)src) + start, size);
-	if (keycopyfp) dst->key = keycopyfp(src->key);
-	if (valcopyfp) dst->val = valcopyfp(src->val);
-}
+typedef struct GHashEntryHash {
+	Entry e;
+
+	void *val;
+	unsigned int hash;  /* Order is very important here! */
+} GHashEntryHash;
+
+typedef Entry GSetEntry;
+
+typedef struct GSetEntryHash {
+	Entry e;
+
+	unsigned int hash;
+} GSetEntryHash;
+
+#define GHASH_ENTRY_SIZE(_is_gset, _use_hash) \
+	(_is_gset) ? \
+	((_use_hash) ? sizeof(GSetEntryHash) : sizeof(GSetEntry)) : \
+	((_use_hash) ? sizeof(GHashEntryHash) : sizeof(GHashEntry))
 
 struct GHash {
 	GHashHashFP hashfp;
@@ -118,12 +119,40 @@ struct GHash {
 #endif
 
 	unsigned int nentries;
-	unsigned int flag;
-
-	/* Entries metadata */
-	size_t entry_size;
+	unsigned short flag;
+	bool is_gset;
+	bool use_store_hash;
 };
 
+
+BLI_INLINE void entry_copy(
+        GHash *gh_dst, Entry *dst, GHash *gh_src, Entry *src,
+        const unsigned int hash, GHashKeyCopyFP keycopyfp, GHashValCopyFP valcopyfp)
+{
+	const bool is_gset_dst = gh_dst->is_gset;
+	const bool is_gset_src = gh_src->is_gset;
+
+	dst->key = (keycopyfp) ? keycopyfp(src->key) : src->key;
+
+	if (gh_dst->use_store_hash) {
+		if (is_gset_dst) {
+			((GSetEntryHash *)dst)->hash = hash;
+		}
+		else {
+			((GHashEntryHash *)dst)->hash = hash;
+		}
+	}
+
+	if (!is_gset_dst) {
+		if (is_gset_src) {
+			((GHashEntry *)dst)->val = NULL;
+		}
+		else {
+			((GHashEntry *)dst)->val = (valcopyfp) ? valcopyfp(((GHashEntry *)src)->val) : ((GHashEntry *)src)->val;
+		}
+	}
+}
+
 /* -------------------------------------------------------------------- */
 /* GHash API */
 
@@ -139,6 +168,14 @@ BLI_INLINE unsigned int ghash_keyhash(GHash *gh, const void *key)
 }
 
 /**
+ * Get the full hash for an entry.
+ */
+BLI_INLINE unsigned int ghash_entryhash(GHash *gh, const Entry *e)
+{
+	return gh->use_store_hash ? (gh->is_gset ? ((GSetEntryHash *)e)->hash : ((GHashEntryHash *)e)->hash) : gh->hashfp(e->key);
+}
+
+/**
  * Get the bucket-hash for an already-computed full hash.
  */
 BLI_INLINE unsigned int ghash_bucket_hash(GHash *gh, const unsigned int full_hash)
@@ -177,7 +214,8 @@ BLI_INLINE void ghash_resize_buckets(GHash *gh, const unsigned int nbuckets)
 			for (i = 0; i < nbuckets_old; i++) {
 				Entry *e_next;
 				for (e = buckets_old[i]; e; e = e_next) {
-					const unsigned bucket_hash = ghash_bucket_hash(gh, e->hash);
+					const unsigned hash = ghash_entryhash(gh, e);
+					const unsigned bucket_hash = ghash_bucket_hash(gh, hash);
 					e_next = e->next;
 					e->next = buckets_new[bucket_hash];
 					buckets_new[bucket_hash] = e;
@@ -189,7 +227,8 @@ BLI_INLINE void ghash_resize_buckets(GHash *gh, const unsigned int nbuckets)
 #ifdef GHASH_USE_MODULO_BUCKETS
 				Entry *e_next;
 				for (e = buckets_old[i]; e; e = e_next) {
-					const unsigned bucket_hash = ghash_bucket_hash(gh, e->hash);
+					const unsigned hash = ghash_entryhash(gh, e);
+					const unsigned bucket_hash = ghash_bucket_hash(gh, hash);
 					e_next = e->next;
 					e->next = buckets_new[bucket_hash];
 					buckets_new[bucket_hash] = e;
@@ -198,7 +237,7 @@ BLI_INLINE void ghash_resize_buckets(GHash *gh, const unsigned int nbuckets)
 				/* No need to recompute hashes in this case, since our mask is just smaller, all items in old bucket i
 				 * will go in same new bucket (i & new_mask)! */
 				const unsigned bucket_hash = ghash_bucket_hash(gh, i);
-				BLI_assert(bucket_hash == ghash_bucket_hash(gh, e->hash));
+				BLI_assert(!buckets_old[i] || (bucket_hash == ghash_bucket_hash(gh, ghash_entryhash(gh, buckets_old[i]))));
 				for (e = buckets_old[i]; e && e->next; e = e->next);
 				if (e) {
 					e->next = buckets_new[bucket_hash];
@@ -312,11 +351,79 @@ BLI_INLINE void ghash_buckets_reset(GHash *gh, const unsigned int nentries)
 BLI_INLINE Entry *ghash_lookup_entry_ex(
         GHash *gh, const void *key, const unsigned int hash, const unsigned int bucket_hash)
 {
-	Entry *e;
+	if (gh->use_store_hash) {
+		if (gh->is_gset) {
+			GSetEntryHash *e;
+			for (e = (GSetEntryHash *)gh->buckets[bucket_hash]; e; e = (GSetEntryHash *)e->e.next) {
+				if (UNLIKELY(e->hash == hash)) {
+					if (LIKELY(gh->cmpfp(key, e->e.key) == false)) {
+						return (Entry *)e;
+					}
+				}
+			}
+		}
+		else {
+			GHashEntryHash *e;
+			for (e = (GHashEntryHash *)gh->buckets[bucket_hash]; e; e = (GHashEntryHash *)e->e.next) {
+				if (UNLIKELY(e->hash == hash)) {
+					if (LIKELY(gh->cmpfp(key, e->e.key) == false)) {
+						return (Entry *)e;
+					}
+				}
+			}
+		}
+	}
+	else {
+		Entry *e;
+		/* If we do not store GHash, not worth computing it for each entry here!
+		 * Typically, comparison function will be quicker, and since it's needed in the end anyway.

@@ Diff output truncated at 10240 characters. @@




More information about the Bf-blender-cvs mailing list