[Bf-blender-cvs] [1a3b3fa] temp-ghash-experiments: Some minor cleanup, much better statistics...

Bastien Montagne noreply at git.blender.org
Sat Feb 21 15:59:06 CET 2015


Commit: 1a3b3fa61490ea5efc467e7a9d51444f02883d03
Author: Bastien Montagne
Date:   Sat Feb 21 15:58:02 2015 +0100
Branches: temp-ghash-experiments
https://developer.blender.org/rB1a3b3fa61490ea5efc467e7a9d51444f02883d03

Some minor cleanup, much better statistics...

===================================================================

M	source/blender/blenlib/BLI_ghash.h
M	source/blender/blenlib/intern/BLI_ghash.c
M	tests/gtests/blenlib/BLI_ghash_test.cc

===================================================================

diff --git a/source/blender/blenlib/BLI_ghash.h b/source/blender/blenlib/BLI_ghash.h
index 6da92f1..5692213 100644
--- a/source/blender/blenlib/BLI_ghash.h
+++ b/source/blender/blenlib/BLI_ghash.h
@@ -245,10 +245,12 @@ BLI_INLINE bool BLI_gsetIterator_done(GSetIterator *gsi) { return BLI_ghashItera
 	     BLI_gsetIterator_done(&gs_iter_) == false;                           \
 	     BLI_gsetIterator_step(&gs_iter_), i_++)
 
-//~ #ifdef DEBUG
-double BLI_ghash_calc_quality(GHash *gh, double *r_load, int *r_min_bucket, int *r_max_bucket);
-double BLI_gset_calc_quality(GSet *gs, double *r_load, int *r_min_bucket, int *r_max_bucket);
-//~ #endif
+double BLI_ghash_calc_quality(
+        GHash *gh, double *r_load, double *r_variance,
+        double *r_prop_empty_buckets, double *r_prop_overloaded_buckets, int *r_biggest_bucket);
+double BLI_gset_calc_quality(
+        GSet *gs, double *r_load, double *r_variance,
+        double *r_prop_empty_buckets, double *r_prop_overloaded_buckets, int *r_biggest_bucket);
 
 #ifdef __cplusplus
 }
diff --git a/source/blender/blenlib/intern/BLI_ghash.c b/source/blender/blenlib/intern/BLI_ghash.c
index e13526d..4a2a234 100644
--- a/source/blender/blenlib/intern/BLI_ghash.c
+++ b/source/blender/blenlib/intern/BLI_ghash.c
@@ -1160,14 +1160,15 @@ GSet *BLI_gset_pair_new(const char *info)
 #include "BLI_math.h"
 
 /**
- * Measure how well the hash function performs, i.e. variance of the distribution of the entries in the buckets.
- * (0.0 is approx as good as even distribution).
+ * Measure how well the hash function performs (1.0 is approx as good as random distribution),
+ * and return a few other stats like load, variance of the distribution of the entries in the buckets, etc.
  *
  * Smaller is better!
  */
-double BLI_ghash_calc_quality(GHash *gh, double *r_load, int *r_min_bucket, int *r_max_bucket)
+double BLI_ghash_calc_quality(
+        GHash *gh, double *r_load, double *r_variance,
+        double *r_prop_empty_buckets, double *r_prop_overloaded_buckets, int *r_biggest_bucket)
 {
-	double sum = 0.0;
 	double mean;
 	unsigned int i;
 
@@ -1175,11 +1176,17 @@ double BLI_ghash_calc_quality(GHash *gh, double *r_load, int *r_min_bucket, int
 		if (r_load) {
 			*r_load = 0.0;
 		}
-		if (r_min_bucket) {
-			*r_min_bucket = 0;
+		if (r_variance) {
+			*r_variance = 0.0;
 		}
-		if (r_max_bucket) {
-			*r_max_bucket = 0;
+		if (r_prop_empty_buckets) {
+			*r_prop_empty_buckets = 1.0;
+		}
+		if (r_prop_overloaded_buckets) {
+			*r_prop_overloaded_buckets = 0.0;
+		}
+		if (r_biggest_bucket) {
+			*r_biggest_bucket = 0;
 		}
 
 		return 0.0;
@@ -1189,35 +1196,62 @@ double BLI_ghash_calc_quality(GHash *gh, double *r_load, int *r_min_bucket, int
 	if (r_load) {
 		*r_load = mean;
 	}
-	if (r_min_bucket) {
-		*r_min_bucket = INT_MAX;
-	}
-	if (r_max_bucket) {
-		*r_max_bucket = 0;
+	if (r_biggest_bucket) {
+		*r_biggest_bucket = 0;
 	}
 
-	/* We already know our mean (i.e. load factor), easy to compute variance.
-	 * See http://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Two-pass_algorithm
-	 */
-	for (i = 0; i < gh->nbuckets; i++) {
-		int count = 0;
-		Entry *e;
-		for (e = gh->buckets[i]; e; e = e->next) {
-			count++;
-		}
-		if (r_min_bucket) {
-			*r_min_bucket = min_ii(*r_min_bucket, (int)count);
-		}
-		if (r_max_bucket) {
-			*r_max_bucket = max_ii(*r_max_bucket, (int)count);
+	if (r_variance) {
+		/* We already know our mean (i.e. load factor), easy to compute variance.
+		 * See http://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Two-pass_algorithm
+		 */
+		double sum = 0.0;
+		for (i = 0; i < gh->nbuckets; i++) {
+			int count = 0;
+			Entry *e;
+			for (e = gh->buckets[i]; e; e = e->next) {
+				count++;
+			}
+			sum += ((double)count - mean) * ((double)count - mean);
 		}
-		sum += ((double)count - mean) * ((double)count - mean);
+		*r_variance = sum / (double)(gh->nbuckets - 1);
 	}
-	return sum / (double)(gh->nbuckets - 1);
-}
-double BLI_gset_calc_quality(GSet *gs, double *r_load, int *r_min_bucket, int *r_max_bucket)
-{
-	return BLI_ghash_calc_quality((GHash *)gs, r_load, r_min_bucket, r_max_bucket);
+
+	{
+	   uint64_t sum = 0;
+	   uint64_t overloaded_buckets_threshold = (uint64_t)max_ii(GHASH_LIMIT_GROW(1), 1);
+	   uint64_t sum_overloaded = 0;
+	   uint64_t sum_empty = 0;
+
+	   for (i = 0; i < gh->nbuckets; i++) {
+		   uint64_t count = 0;
+		   Entry *e;
+		   for (e = gh->buckets[i]; e; e = e->next) {
+			   count++;
+		   }
+		   if (r_biggest_bucket) {
+			   *r_biggest_bucket = max_ii(*r_biggest_bucket, (int)count);
+		   }
+		   if (r_prop_overloaded_buckets && (count > overloaded_buckets_threshold)) {
+			   sum_overloaded++;
+		   }
+		   if (r_prop_empty_buckets && !count) {
+			   sum_empty++;
+		   }
+		   sum += count * (count + 1);
+	   }
+	   if (r_prop_overloaded_buckets) {
+		   *r_prop_overloaded_buckets = (double)sum_overloaded / (double)gh->nbuckets;
+	   }
+	   if (r_prop_empty_buckets) {
+		   *r_prop_empty_buckets = (double)sum_empty / (double)gh->nbuckets;
+	   }
+	   return ((double)sum * (double)gh->nbuckets /
+			   ((double)gh->nentries * (gh->nentries + 2 * gh->nbuckets - 1)));
+   }
+}
+double BLI_gset_calc_quality(GSet *gs, double *r_load, double *r_variance, double *r_prop_empty_buckets, double *r_prop_overloaded_buckets, int *r_biggest_bucket)
+{
+	return BLI_ghash_calc_quality((GHash *)gs, r_load, r_variance, r_prop_empty_buckets, r_prop_overloaded_buckets, r_biggest_bucket);
 }
 
 /** \} */
diff --git a/tests/gtests/blenlib/BLI_ghash_test.cc b/tests/gtests/blenlib/BLI_ghash_test.cc
index 495e9a1..fb78140 100644
--- a/tests/gtests/blenlib/BLI_ghash_test.cc
+++ b/tests/gtests/blenlib/BLI_ghash_test.cc
@@ -137,11 +137,13 @@ Praesent luctus vitae nunc vitae pellentesque. Praesent faucibus sed urna ut lac
 
 #define PRINTF_GHASH_STATS(_gh) \
 { \
-	double q, lf; \
-	int minb, maxb; \
-	q = BLI_ghash_calc_quality((_gh), &lf, &minb, &maxb); \
-	printf("GHash stats:\n\tQuality (the lower the better): %f\n\tLoad: %f\n\tSmallest bucket: %d\n\tBiggest bucket: %d\n", \
-	       q, lf, minb, maxb); \
+	double q, lf, var, pempty, poverloaded; \
+	int bigb; \
+	q = BLI_ghash_calc_quality((_gh), &lf, &var, &pempty, &poverloaded, &bigb); \
+	printf("GHash stats (%d entries):\n\t" \
+	       "Quality (the lower the better): %f\n\tVariance (the lower the better): %f\n\tLoad: %f\n\t" \
+	       "Empty buckets: %.2f%%\n\tOverloaded buckets: %.2f%% (biggest bucket: %d)\n", \
+	       BLI_ghash_size(_gh), q, var, lf, pempty * 100.0, poverloaded * 100.0, bigb); \
 } void (0)
 
 
@@ -267,12 +269,10 @@ TEST(ghash, TextMurmur2a)
 
 /* Int: uniform 50M first integers. */
 
-static void int_ghash_tests(GHash *ghash, const char *id)
+static void int_ghash_tests(GHash *ghash, const char *id, const unsigned int nbr)
 {
 	printf("\n========== STARTING %s ==========\n", id);
 
-	const unsigned int nbr = 100000000;
-
 	{
 		unsigned int i = nbr;
 
@@ -313,24 +313,31 @@ TEST(ghash, IntGHash)
 {
 	GHash *ghash = BLI_ghash_new(BLI_ghashutil_inthash_p, BLI_ghashutil_intcmp, __func__);
 
-	int_ghash_tests(ghash, "IntGHash - GHash");
+	int_ghash_tests(ghash, "IntGHash - GHash - 12000", 12000);
+
+	ghash = BLI_ghash_new(BLI_ghashutil_inthash_p, BLI_ghashutil_intcmp, __func__);
+
+	int_ghash_tests(ghash, "IntGHash - GHash - 100000000", 100000000);
 }
 
 TEST(ghash, IntMurmur2a)
 {
 	GHash *ghash = BLI_ghash_new(BLI_ghashutil_inthash_p_murmur, BLI_ghashutil_intcmp, __func__);
 
-	int_ghash_tests(ghash, "IntGHash - Murmur");
+	int_ghash_tests(ghash, "IntGHash - Murmur - 12000", 12000);
+
+	ghash = BLI_ghash_new(BLI_ghashutil_inthash_p_murmur, BLI_ghashutil_intcmp, __func__);
+
+	int_ghash_tests(ghash, "IntGHash - Murmur - 100000000", 100000000);
 }
 
 
 /* Int_v4: 10M of randomly-generated integer vectors. */
 
-static void int4_ghash_tests(GHash *ghash, const char *id)
+static void int4_ghash_tests(GHash *ghash, const char *id, const unsigned int nbr)
 {
 	printf("\n========== STARTING %s ==========\n", id);
 
-	const unsigned int nbr = 20000000;
 	unsigned int (*data)[4] = (unsigned int (*)[4])MEM_mallocN(sizeof(*data) * (size_t)nbr, __func__);
 	unsigned int (*dt)[4];
 	unsigned int i, j;
@@ -382,12 +389,20 @@ TEST(ghash, Int4GHash)
 {
 	GHash *ghash = BLI_ghash_new(BLI_ghashutil_uinthash_v4_p, BLI_ghashutil_uinthash_v4_cmp, __func__);
 
-	int4_ghash_tests(ghash, "Int4GHash - GHash");
+	int4_ghash_tests(ghash, "Int4GHash - GHash - 2000", 2000);
+
+	ghash = BLI_ghash_new(BLI_ghashutil_uinthash_v4_p, BLI_ghashutil_uinthash_v4_cmp, __func__);
+
+	int4_ghash_tests(ghash, "Int4GHash - GHash - 20000000", 20000000);
 }
 
 TEST(ghash, Int4Murmur2a)
 {
 	GHash *ghash = BLI_ghash_new(BLI_ghashutil_uinthash_v4_p_murmur, BLI_ghashutil_uinthash_v4_cmp, __func__);
 
-	int4_ghash_tests(ghash, "Int4GHash - Murmur");
+	int4_ghash_tests(ghash, "Int4GHash - Murmur - 2000", 2000);
+
+	ghash = BLI_ghash_new(BLI_ghashutil_uinthash_v4_p_murmur, BLI_ghashutil_uinthash_v4_cmp, __func__);
+
+	int4_ghash_tests(ghash, "Int4GHash - Murmur - 20000000", 20000000);
 }




More information about the Bf-blender-cvs mailing list