[Bf-blender-cvs] [2605675] temp-ghash-experiments: Initial GHash-enhancements experiments.

Bastien Montagne noreply at git.blender.org
Sat Dec 13 18:55:40 CET 2014


Commit: 2605675cbdf0992cf55ef9493e3f92f573f0a2fa
Author: Bastien Montagne
Date:   Sat Dec 13 18:49:19 2014 +0100
Branches: temp-ghash-experiments
https://developer.blender.org/rB2605675cbdf0992cf55ef9493e3f92f573f0a2fa

Initial GHash-enhancements experiments.

This commit adds:
*Some Murmur-based hasing helpers for GHash;
*A (very basic) switch between key-to-bucketidx using modulo or binary-AND;
*Some gtests to make some comparisons.

First test results seem to show that:
* murmur is much much quicker for string keys, with approximately same hash quality;
* org ghash is much much quicker for int keys, but with much worse hash quality than murmur-based one;
* key-to-bucketidx method does not seem to make much difference so far.

Nothing definitive here yet, obviously.

===================================================================

M	source/blender/blenlib/BLI_ghash.h
M	source/blender/blenlib/BLI_hash_mm2a.h
M	source/blender/blenlib/intern/BLI_ghash.c
M	source/blender/blenlib/intern/hash_mm2a.c
M	source/blender/makesdna/intern/CMakeLists.txt
A	tests/gtests/blenlib/BLI_ghash_test.cc
M	tests/gtests/blenlib/CMakeLists.txt

===================================================================

diff --git a/source/blender/blenlib/BLI_ghash.h b/source/blender/blenlib/BLI_ghash.h
index 8be19d0..4c984a0 100644
--- a/source/blender/blenlib/BLI_ghash.h
+++ b/source/blender/blenlib/BLI_ghash.h
@@ -128,6 +128,7 @@ unsigned int    BLI_ghashutil_strhash_n(const char *key, size_t n);
                 CHECK_TYPE_INLINE(key, char *), \
                 BLI_ghashutil_strhash_p(key))
 unsigned int    BLI_ghashutil_strhash_p(const void *key);
+unsigned int    BLI_ghashutil_strhash_p_murmur(const void *key);
 bool            BLI_ghashutil_strcmp(const void *a, const void *b);
 
 #define         BLI_ghashutil_inthash(key) ( \
@@ -140,10 +141,14 @@ unsigned int    BLI_ghashutil_uinthash(unsigned int key);
 unsigned int    BLI_ghashutil_uinthash_v4(const unsigned int key[4]);
 #define         BLI_ghashutil_inthash_v4_p \
    ((GSetHashFP)BLI_ghashutil_uinthash_v4)
+#define         BLI_ghashutil_uinthash_v4_p \
+   ((GSetHashFP)BLI_ghashutil_uinthash_v4)
+unsigned int    BLI_ghashutil_uinthash_v4_p_murmur(const void *ptr);
 bool            BLI_ghashutil_uinthash_v4_cmp(const void *a, const void *b);
 #define         BLI_ghashutil_inthash_v4_cmp \
                 BLI_ghashutil_uinthash_v4_cmp
 unsigned int    BLI_ghashutil_inthash_p(const void *ptr);
+unsigned int    BLI_ghashutil_inthash_p_murmur(const void *ptr);
 bool            BLI_ghashutil_intcmp(const void *a, const void *b);
 
 /** \} */
@@ -230,10 +235,10 @@ BLI_INLINE bool BLI_gsetIterator_done(GSetIterator *gsi) { return BLI_ghashItera
 	     BLI_gsetIterator_done(&gs_iter_) == false;                           \
 	     BLI_gsetIterator_step(&gs_iter_), i_++)
 
-#ifdef DEBUG
+//~ #ifdef DEBUG
 double BLI_ghash_calc_quality(GHash *gh);
 double BLI_gset_calc_quality(GSet *gs);
-#endif
+//~ #endif
 
 #ifdef __cplusplus
 }
diff --git a/source/blender/blenlib/BLI_hash_mm2a.h b/source/blender/blenlib/BLI_hash_mm2a.h
index 007dec4..6beaf50 100644
--- a/source/blender/blenlib/BLI_hash_mm2a.h
+++ b/source/blender/blenlib/BLI_hash_mm2a.h
@@ -42,4 +42,6 @@ void BLI_hash_mm2a_add_int(BLI_HashMurmur2A *mm2, int data);
 
 uint32_t BLI_hash_mm2a_end(BLI_HashMurmur2A *mm2);
 
+uint32_t BLI_hash_mm2(const unsigned char *data, size_t len, uint32_t seed);
+
 #endif  /* __BLI_HASH_MM2A_H__ */
diff --git a/source/blender/blenlib/intern/BLI_ghash.c b/source/blender/blenlib/intern/BLI_ghash.c
index 6747e5c..f30d09d 100644
--- a/source/blender/blenlib/intern/BLI_ghash.c
+++ b/source/blender/blenlib/intern/BLI_ghash.c
@@ -41,17 +41,28 @@
 
 #include "BLI_sys_types.h"  /* for intptr_t support */
 #include "BLI_utildefines.h"
+#include "BLI_hash_mm2a.h"
 #include "BLI_mempool.h"
 #include "BLI_ghash.h"
 #include "BLI_strict_flags.h"
 
+//~ #define USE_MODULO_BUCKETS
 
+#ifdef USE_MODULO_BUCKETS
 const unsigned int hashsizes[] = {
 	5, 11, 17, 37, 67, 131, 257, 521, 1031, 2053, 4099, 8209, 
 	16411, 32771, 65537, 131101, 262147, 524309, 1048583, 2097169, 
 	4194319, 8388617, 16777259, 33554467, 67108879, 134217757, 
 	268435459
 };
+#else
+const unsigned int hashsizes[] = {
+	3, 7, 15, 31, 63, 127, 255, 511, 1023, 2047, 4095, 8191,
+	16383, 32767, 65535, 131071, 262143, 524287, 1048575, 2097151,
+	4194303, 8388607, 16777215, 33554431, 67108863, 134217727,
+	268435455
+};
+#endif
 
 /* internal flag to ensure sets values aren't used */
 #ifndef NDEBUG
@@ -94,7 +105,11 @@ struct GHash {
  */
 BLI_INLINE unsigned int ghash_keyhash(GHash *gh, const void *key)
 {
+#ifdef USE_MODULO_BUCKETS
 	return gh->hashfp(key) % gh->nbuckets;
+#else
+	return gh->hashfp(key) & gh->nbuckets;
+#endif
 }
 
 /**
@@ -700,6 +715,12 @@ unsigned int BLI_ghashutil_uinthash_v4(const unsigned int key[4])
 	hash += key[3];
 	return hash;
 }
+unsigned int BLI_ghashutil_uinthash_v4_p_murmur(const void *ptr)
+{
+	const unsigned int *key = ptr;
+
+	return BLI_hash_mm2((const unsigned char *)key, sizeof(*key) * 4, 0);
+}
 
 bool BLI_ghashutil_uinthash_v4_cmp(const void *a, const void *b)
 {
@@ -732,6 +753,13 @@ unsigned int BLI_ghashutil_inthash_p(const void *ptr)
 	return (unsigned int)(key & 0xffffffff);
 }
 
+unsigned int BLI_ghashutil_inthash_p_murmur(const void *ptr)
+{
+	const unsigned int *key = ptr;
+
+	return BLI_hash_mm2((const unsigned char *)key, sizeof(*key), 0);
+}
+
 bool BLI_ghashutil_intcmp(const void *a, const void *b)
 {
 	return (a != b);
@@ -768,6 +796,12 @@ unsigned int BLI_ghashutil_strhash_p(const void *ptr)
 
 	return h;
 }
+unsigned int BLI_ghashutil_strhash_p_murmur(const void *ptr)
+{
+	const unsigned char *key = ptr;
+
+	return BLI_hash_mm2(key, strlen((const char *)key), 0);
+}
 bool BLI_ghashutil_strcmp(const void *a, const void *b)
 {
 	return (strcmp(a, b) != 0);
@@ -1008,7 +1042,7 @@ GSet *BLI_gset_pair_new(const char *info)
 
 /** \name Debugging & Introspection
  * \{ */
-#ifdef DEBUG
+//~ #ifdef DEBUG
 
 /**
  * Measure how well the hash function performs
@@ -1040,5 +1074,5 @@ double BLI_gset_calc_quality(GSet *gs)
 	return BLI_ghash_calc_quality((GHash *)gs);
 }
 
-#endif
+//~ #endif
 /** \} */
diff --git a/source/blender/blenlib/intern/hash_mm2a.c b/source/blender/blenlib/intern/hash_mm2a.c
index 8b4242f..80c92e7 100644
--- a/source/blender/blenlib/intern/hash_mm2a.c
+++ b/source/blender/blenlib/intern/hash_mm2a.c
@@ -49,6 +49,13 @@
 	(h) = ((h) * MM2A_M) ^ (k);  \
 } (void)0
 
+#define MM2A_MIX_FINALIZE(h)     \
+{                                \
+	(h) ^= (h) >> 13;            \
+	(h) *= MM2A_M;               \
+	(h) ^= (h) >> 15;            \
+} (void)0
+
 static void mm2a_mix_tail(BLI_HashMurmur2A *mm2, const unsigned char **data, size_t *len)
 {
 	while (*len && ((*len < 4) || mm2->count)) {
@@ -99,9 +106,40 @@ uint32_t BLI_hash_mm2a_end(BLI_HashMurmur2A *mm2)
 	MM2A_MIX(mm2->hash, mm2->tail);
 	MM2A_MIX(mm2->hash, mm2->size);
 
-	mm2->hash ^= mm2->hash >> 13;
-	mm2->hash *= MM2A_M;
-	mm2->hash ^= mm2->hash >> 15;
+	MM2A_MIX_FINALIZE(mm2->hash);
 
 	return mm2->hash;
 }
+
+/* Non-incremental version, quicker for small keys. */
+uint32_t BLI_hash_mm2(const unsigned char *data, size_t len, uint32_t seed)
+{
+	/* Initialize the hash to a 'random' value */
+	uint32_t h = seed ^ len;
+
+	/* Mix 4 bytes at a time into the hash */
+	for (; len >= 4; data += 4, len -= 4) {
+		uint32_t k = *(uint32_t *)data;
+
+		MM2A_MIX(h, k);
+	}
+
+	/* Handle the last few bytes of the input array */
+	switch (len) {
+		case 3:
+			h ^= data[2] << 16;
+			/* fall through */
+		case 2:
+			h ^= data[1] << 8;
+			/* fall through */
+		case 1:
+			h ^= data[0];
+			h *= MM2A_M;
+	};
+
+	/* Do a few final mixes of the hash to ensure the last few bytes are well-incorporated. */
+	MM2A_MIX_FINALIZE(h);
+
+	return h;
+} 
+
diff --git a/source/blender/makesdna/intern/CMakeLists.txt b/source/blender/makesdna/intern/CMakeLists.txt
index 317141b..08d2f93 100644
--- a/source/blender/makesdna/intern/CMakeLists.txt
+++ b/source/blender/makesdna/intern/CMakeLists.txt
@@ -97,6 +97,7 @@ set(SRC
 	../../blenlib/intern/BLI_mempool.c
 	../../blenlib/intern/listbase.c
 	../../blenlib/intern/BLI_ghash.c
+	../../blenlib/intern/hash_mm2a.c
 )
 
 blender_add_lib(bf_dna_blenlib "${SRC}" "${INC}" "${INC_SYS}")
diff --git a/tests/gtests/blenlib/BLI_ghash_test.cc b/tests/gtests/blenlib/BLI_ghash_test.cc
new file mode 100644
index 0000000..0a27443
--- /dev/null
+++ b/tests/gtests/blenlib/BLI_ghash_test.cc
@@ -0,0 +1,291 @@
+/* Apache License, Version 2.0 */
+
+#include "testing/testing.h"
+
+#define BLI_INLINE
+
+extern "C" {
+#include "MEM_guardedalloc.h"
+#include "BLI_ghash.h"
+#include "BLI_string.h"
+#include "PIL_time_utildefines.h"
+}
+
+const char *words10k = "\
+Lorem ipsum dolor sit amet, consectetur adipiscing elit. Nullam auctor ultrices purus tincidunt mollis. Vestibulum tincidunt imperdiet molestie. Vivamus posuere, risus ut mollis rutrum, lacus nulla mollis velit, consectetur auctor erat est in odio. Proin quis lobortis ex. Ut id quam lacus. Morbi ultrices orci quis sem suscipit tincidunt. Nullam ut molestie justo, vulputate placerat diam. Nunc tincidunt auctor venenatis. Phasellus placerat, odio ac dictum pretium, nisi odio tristique sem, [...]
+In non magna orci. Curabitur finibus tempus semper. Aliquam fringilla arcu consectetur blandit vestibulum. Mauris mollis est arcu. Praesent pellentesque lacus bibendum massa commodo commodo. Aenean facilisis lobortis varius. Ut semper ullamcorper dui, at pellentesque felis. Duis accumsan sapien ut malesuada lacinia. Praesent elementum venenatis arcu in mattis.\
+Nunc sagittis mauris risus, quis rutrum nisi egestas quis. Maecenas pharetra posuere auctor. Suspendisse mollis sollicitudin elit, id cursus massa bibendum eu. Integer tincidunt dolor non porttitor tempus. Donec lacinia sapien eu enim feugiat suscipit non malesuada diam. Suspendisse nec convallis elit. Nulla eu augue ultrices, consequat lorem at, malesuada magna. Aliquam sed tempor ipsum. Sed hendrerit nec lectus et pharetra. In felis sem, cursus at nunc in, tristique convallis purus. Pr [...]
+Integer lacinia enim sodales sem mattis, sit amet egestas lectus tincidunt. Ut quis nisl ut ex luctus fermentum quis et diam. Maecenas lectus leo, hendrerit eu facilisis et, mattis ut sem. Duis imperdiet nisl vitae urna consequat suscipit. Suspendisse sed viverra massa, dapibus condimentum sem. Morbi suscipit congue odio. Nullam eleifend fringilla nisl et semper. Sed eu neque ante. Sed eget viverra urna.\
+Duis tempor laoreet interdum. Nunc fringilla aliquet urna sit amet commodo. Curabitur non orci nec libero egestas ullamcorper nec nec velit. Nam vitae ligula lobortis, vehicula nulla id, lacinia urna. Morbi id dignissim eros. Etiam eu risus in sem vestibulum dapibus ut mollis sem. Quisque ultricies pulvinar maximus. Proin risus turpis, auctor eget molestie nec, molestie a ipsum. Donec dapibus dui in lorem rhoncus, non rutrum neque convallis. Donec at tincidunt turpis, nec sce

@@ Diff output truncated at 10240 characters. @@




More information about the Bf-blender-cvs mailing list