[Bf-blender-cvs] [1a3b3fa] temp-ghash-experiments: Some minor cleanup, much better statistics...
Bastien Montagne
noreply at git.blender.org
Sat Feb 21 15:59:06 CET 2015
Commit: 1a3b3fa61490ea5efc467e7a9d51444f02883d03
Author: Bastien Montagne
Date: Sat Feb 21 15:58:02 2015 +0100
Branches: temp-ghash-experiments
https://developer.blender.org/rB1a3b3fa61490ea5efc467e7a9d51444f02883d03
Some minor cleanup, much better statistics...
===================================================================
M source/blender/blenlib/BLI_ghash.h
M source/blender/blenlib/intern/BLI_ghash.c
M tests/gtests/blenlib/BLI_ghash_test.cc
===================================================================
diff --git a/source/blender/blenlib/BLI_ghash.h b/source/blender/blenlib/BLI_ghash.h
index 6da92f1..5692213 100644
--- a/source/blender/blenlib/BLI_ghash.h
+++ b/source/blender/blenlib/BLI_ghash.h
@@ -245,10 +245,12 @@ BLI_INLINE bool BLI_gsetIterator_done(GSetIterator *gsi) { return BLI_ghashItera
BLI_gsetIterator_done(&gs_iter_) == false; \
BLI_gsetIterator_step(&gs_iter_), i_++)
-//~ #ifdef DEBUG
-double BLI_ghash_calc_quality(GHash *gh, double *r_load, int *r_min_bucket, int *r_max_bucket);
-double BLI_gset_calc_quality(GSet *gs, double *r_load, int *r_min_bucket, int *r_max_bucket);
-//~ #endif
+double BLI_ghash_calc_quality(
+ GHash *gh, double *r_load, double *r_variance,
+ double *r_prop_empty_buckets, double *r_prop_overloaded_buckets, int *r_biggest_bucket);
+double BLI_gset_calc_quality(
+ GSet *gs, double *r_load, double *r_variance,
+ double *r_prop_empty_buckets, double *r_prop_overloaded_buckets, int *r_biggest_bucket);
#ifdef __cplusplus
}
diff --git a/source/blender/blenlib/intern/BLI_ghash.c b/source/blender/blenlib/intern/BLI_ghash.c
index e13526d..4a2a234 100644
--- a/source/blender/blenlib/intern/BLI_ghash.c
+++ b/source/blender/blenlib/intern/BLI_ghash.c
@@ -1160,14 +1160,15 @@ GSet *BLI_gset_pair_new(const char *info)
#include "BLI_math.h"
/**
- * Measure how well the hash function performs, i.e. variance of the distribution of the entries in the buckets.
- * (0.0 is approx as good as even distribution).
+ * Measure how well the hash function performs (1.0 is approx as good as random distribution),
+ * and return a few other stats like load, variance of the distribution of the entries in the buckets, etc.
*
* Smaller is better!
*/
-double BLI_ghash_calc_quality(GHash *gh, double *r_load, int *r_min_bucket, int *r_max_bucket)
+double BLI_ghash_calc_quality(
+ GHash *gh, double *r_load, double *r_variance,
+ double *r_prop_empty_buckets, double *r_prop_overloaded_buckets, int *r_biggest_bucket)
{
- double sum = 0.0;
double mean;
unsigned int i;
@@ -1175,11 +1176,17 @@ double BLI_ghash_calc_quality(GHash *gh, double *r_load, int *r_min_bucket, int
if (r_load) {
*r_load = 0.0;
}
- if (r_min_bucket) {
- *r_min_bucket = 0;
+ if (r_variance) {
+ *r_variance = 0.0;
}
- if (r_max_bucket) {
- *r_max_bucket = 0;
+ if (r_prop_empty_buckets) {
+ *r_prop_empty_buckets = 1.0;
+ }
+ if (r_prop_overloaded_buckets) {
+ *r_prop_overloaded_buckets = 0.0;
+ }
+ if (r_biggest_bucket) {
+ *r_biggest_bucket = 0;
}
return 0.0;
@@ -1189,35 +1196,62 @@ double BLI_ghash_calc_quality(GHash *gh, double *r_load, int *r_min_bucket, int
if (r_load) {
*r_load = mean;
}
- if (r_min_bucket) {
- *r_min_bucket = INT_MAX;
- }
- if (r_max_bucket) {
- *r_max_bucket = 0;
+ if (r_biggest_bucket) {
+ *r_biggest_bucket = 0;
}
- /* We already know our mean (i.e. load factor), easy to compute variance.
- * See http://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Two-pass_algorithm
- */
- for (i = 0; i < gh->nbuckets; i++) {
- int count = 0;
- Entry *e;
- for (e = gh->buckets[i]; e; e = e->next) {
- count++;
- }
- if (r_min_bucket) {
- *r_min_bucket = min_ii(*r_min_bucket, (int)count);
- }
- if (r_max_bucket) {
- *r_max_bucket = max_ii(*r_max_bucket, (int)count);
+ if (r_variance) {
+ /* We already know our mean (i.e. load factor), easy to compute variance.
+ * See http://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Two-pass_algorithm
+ */
+ double sum = 0.0;
+ for (i = 0; i < gh->nbuckets; i++) {
+ int count = 0;
+ Entry *e;
+ for (e = gh->buckets[i]; e; e = e->next) {
+ count++;
+ }
+ sum += ((double)count - mean) * ((double)count - mean);
}
- sum += ((double)count - mean) * ((double)count - mean);
+ *r_variance = sum / (double)(gh->nbuckets - 1);
}
- return sum / (double)(gh->nbuckets - 1);
-}
-double BLI_gset_calc_quality(GSet *gs, double *r_load, int *r_min_bucket, int *r_max_bucket)
-{
- return BLI_ghash_calc_quality((GHash *)gs, r_load, r_min_bucket, r_max_bucket);
+
+ {
+ uint64_t sum = 0;
+ uint64_t overloaded_buckets_threshold = (uint64_t)max_ii(GHASH_LIMIT_GROW(1), 1);
+ uint64_t sum_overloaded = 0;
+ uint64_t sum_empty = 0;
+
+ for (i = 0; i < gh->nbuckets; i++) {
+ uint64_t count = 0;
+ Entry *e;
+ for (e = gh->buckets[i]; e; e = e->next) {
+ count++;
+ }
+ if (r_biggest_bucket) {
+ *r_biggest_bucket = max_ii(*r_biggest_bucket, (int)count);
+ }
+ if (r_prop_overloaded_buckets && (count > overloaded_buckets_threshold)) {
+ sum_overloaded++;
+ }
+ if (r_prop_empty_buckets && !count) {
+ sum_empty++;
+ }
+ sum += count * (count + 1);
+ }
+ if (r_prop_overloaded_buckets) {
+ *r_prop_overloaded_buckets = (double)sum_overloaded / (double)gh->nbuckets;
+ }
+ if (r_prop_empty_buckets) {
+ *r_prop_empty_buckets = (double)sum_empty / (double)gh->nbuckets;
+ }
+ return ((double)sum * (double)gh->nbuckets /
+ ((double)gh->nentries * (gh->nentries + 2 * gh->nbuckets - 1)));
+ }
+}
+double BLI_gset_calc_quality(GSet *gs, double *r_load, double *r_variance, double *r_prop_empty_buckets, double *r_prop_overloaded_buckets, int *r_biggest_bucket)
+{
+ return BLI_ghash_calc_quality((GHash *)gs, r_load, r_variance, r_prop_empty_buckets, r_prop_overloaded_buckets, r_biggest_bucket);
}
/** \} */
diff --git a/tests/gtests/blenlib/BLI_ghash_test.cc b/tests/gtests/blenlib/BLI_ghash_test.cc
index 495e9a1..fb78140 100644
--- a/tests/gtests/blenlib/BLI_ghash_test.cc
+++ b/tests/gtests/blenlib/BLI_ghash_test.cc
@@ -137,11 +137,13 @@ Praesent luctus vitae nunc vitae pellentesque. Praesent faucibus sed urna ut lac
#define PRINTF_GHASH_STATS(_gh) \
{ \
- double q, lf; \
- int minb, maxb; \
- q = BLI_ghash_calc_quality((_gh), &lf, &minb, &maxb); \
- printf("GHash stats:\n\tQuality (the lower the better): %f\n\tLoad: %f\n\tSmallest bucket: %d\n\tBiggest bucket: %d\n", \
- q, lf, minb, maxb); \
+ double q, lf, var, pempty, poverloaded; \
+ int bigb; \
+ q = BLI_ghash_calc_quality((_gh), &lf, &var, &pempty, &poverloaded, &bigb); \
+ printf("GHash stats (%d entries):\n\t" \
+ "Quality (the lower the better): %f\n\tVariance (the lower the better): %f\n\tLoad: %f\n\t" \
+ "Empty buckets: %.2f%%\n\tOverloaded buckets: %.2f%% (biggest bucket: %d)\n", \
+ BLI_ghash_size(_gh), q, var, lf, pempty * 100.0, poverloaded * 100.0, bigb); \
} void (0)
@@ -267,12 +269,10 @@ TEST(ghash, TextMurmur2a)
/* Int: uniform 50M first integers. */
-static void int_ghash_tests(GHash *ghash, const char *id)
+static void int_ghash_tests(GHash *ghash, const char *id, const unsigned int nbr)
{
printf("\n========== STARTING %s ==========\n", id);
- const unsigned int nbr = 100000000;
-
{
unsigned int i = nbr;
@@ -313,24 +313,31 @@ TEST(ghash, IntGHash)
{
GHash *ghash = BLI_ghash_new(BLI_ghashutil_inthash_p, BLI_ghashutil_intcmp, __func__);
- int_ghash_tests(ghash, "IntGHash - GHash");
+ int_ghash_tests(ghash, "IntGHash - GHash - 12000", 12000);
+
+ ghash = BLI_ghash_new(BLI_ghashutil_inthash_p, BLI_ghashutil_intcmp, __func__);
+
+ int_ghash_tests(ghash, "IntGHash - GHash - 100000000", 100000000);
}
TEST(ghash, IntMurmur2a)
{
GHash *ghash = BLI_ghash_new(BLI_ghashutil_inthash_p_murmur, BLI_ghashutil_intcmp, __func__);
- int_ghash_tests(ghash, "IntGHash - Murmur");
+ int_ghash_tests(ghash, "IntGHash - Murmur - 12000", 12000);
+
+ ghash = BLI_ghash_new(BLI_ghashutil_inthash_p_murmur, BLI_ghashutil_intcmp, __func__);
+
+ int_ghash_tests(ghash, "IntGHash - Murmur - 100000000", 100000000);
}
/* Int_v4: 10M of randomly-generated integer vectors. */
-static void int4_ghash_tests(GHash *ghash, const char *id)
+static void int4_ghash_tests(GHash *ghash, const char *id, const unsigned int nbr)
{
printf("\n========== STARTING %s ==========\n", id);
- const unsigned int nbr = 20000000;
unsigned int (*data)[4] = (unsigned int (*)[4])MEM_mallocN(sizeof(*data) * (size_t)nbr, __func__);
unsigned int (*dt)[4];
unsigned int i, j;
@@ -382,12 +389,20 @@ TEST(ghash, Int4GHash)
{
GHash *ghash = BLI_ghash_new(BLI_ghashutil_uinthash_v4_p, BLI_ghashutil_uinthash_v4_cmp, __func__);
- int4_ghash_tests(ghash, "Int4GHash - GHash");
+ int4_ghash_tests(ghash, "Int4GHash - GHash - 2000", 2000);
+
+ ghash = BLI_ghash_new(BLI_ghashutil_uinthash_v4_p, BLI_ghashutil_uinthash_v4_cmp, __func__);
+
+ int4_ghash_tests(ghash, "Int4GHash - GHash - 20000000", 20000000);
}
TEST(ghash, Int4Murmur2a)
{
GHash *ghash = BLI_ghash_new(BLI_ghashutil_uinthash_v4_p_murmur, BLI_ghashutil_uinthash_v4_cmp, __func__);
- int4_ghash_tests(ghash, "Int4GHash - Murmur");
+ int4_ghash_tests(ghash, "Int4GHash - Murmur - 2000", 2000);
+
+ ghash = BLI_ghash_new(BLI_ghashutil_uinthash_v4_p_murmur, BLI_ghashutil_uinthash_v4_cmp, __func__);
+
+ int4_ghash_tests(ghash, "Int4GHash - Murmur - 20000000", 20000000);
}
More information about the Bf-blender-cvs
mailing list