[Bf-blender-cvs] [64c0c13] master: Add Murmur2A hashing feature to BLI

Bastien Montagne noreply at git.blender.org
Fri Nov 14 11:01:41 CET 2014


Commit: 64c0c13e6e08c51e92504631468db864f553d9b5
Author: Bastien Montagne
Date:   Fri Nov 14 11:00:10 2014 +0100
Branches: master
https://developer.blender.org/rB64c0c13e6e08c51e92504631468db864f553d9b5

Add Murmur2A hashing feature to BLI

Murmur2a is a very fast hashing function generation int32 hashes.
It also features a very good distribution of generated hashes.

However, it is not endianness-agnostic, meaning it will usually generate
different hashes for a same key on big- and little-endian architectures.
Consequently, **it shall not be used to generate persistent hashes**
(never store them in .blend file e.g.).

This implementation supports incremental hashing, and is a direct
adaptation of reference implementation (in c++):
https://smhasher.googlecode.com/svn-history/r130/trunk/MurmurHash2.cpp

That cpp code was also used to generate reference values in gtests file.

Reviewers: sergey, campbellbarton

Reviewed By: campbellbarton

Projects: #bf_blender

Differential Revision: https://developer.blender.org/D892

===================================================================

A	source/blender/blenlib/BLI_hash_md5.h
A	source/blender/blenlib/BLI_hash_mm2a.h
D	source/blender/blenlib/BLI_md5.h
M	source/blender/blenlib/CMakeLists.txt
A	source/blender/blenlib/intern/hash_md5.c
A	source/blender/blenlib/intern/hash_mm2a.c
D	source/blender/blenlib/intern/md5.c
M	source/blender/imbuf/intern/thumbs.c
M	source/blender/render/intern/source/render_result.c
A	tests/gtests/blenlib/BLI_hash_mm2a_test.cc
M	tests/gtests/blenlib/CMakeLists.txt

===================================================================

diff --git a/source/blender/blenlib/BLI_md5.h b/source/blender/blenlib/BLI_hash_md5.h
similarity index 95%
copy from source/blender/blenlib/BLI_md5.h
copy to source/blender/blenlib/BLI_hash_md5.h
index 6a760f5..cab3671 100644
--- a/source/blender/blenlib/BLI_md5.h
+++ b/source/blender/blenlib/BLI_hash_md5.h
@@ -19,9 +19,9 @@
  */
 
 #ifndef __BLI_MD5_H__
-#define __BLI_MD5_H__ 
+#define __BLI_MD5_H__
 
-/** \file BLI_md5.h
+/** \file BLI_hash_md5.h
  *  \ingroup bli
  */
 
@@ -43,5 +43,4 @@ int md5_stream(FILE *stream, void *resblock);
 
 char *md5_to_hexdigest(void *resblock, char r_hex_digest[33]);
 
-#endif
-
+#endif  /* __BLI_MD5_H__ */
diff --git a/source/blender/blenlib/BLI_md5.h b/source/blender/blenlib/BLI_hash_mm2a.h
similarity index 55%
rename from source/blender/blenlib/BLI_md5.h
rename to source/blender/blenlib/BLI_hash_mm2a.h
index 6a760f5..503eb5e 100644
--- a/source/blender/blenlib/BLI_md5.h
+++ b/source/blender/blenlib/BLI_hash_mm2a.h
@@ -18,30 +18,28 @@
  * ***** END GPL LICENSE BLOCK *****
  */
 
-#ifndef __BLI_MD5_H__
-#define __BLI_MD5_H__ 
+#ifndef __BLI_MM2A_H__
+#define __BLI_MM2A_H__
 
-/** \file BLI_md5.h
+/** \file BLI_hash_mm2a.h
  *  \ingroup bli
  */
 
-#include <stdio.h>
-#include <stdlib.h>
+#include "BLI_sys_types.h"
 
-/* Compute MD5 message digest for LEN bytes beginning at BUFFER.  The
- * result is always in little endian byte order, so that a byte-wise
- * output yields to the wanted ASCII representation of the message
- * digest.  */
+typedef struct BLI_HashMurmur2A {
+	uint32_t hash;
+	uint32_t tail;
+	uint32_t count;
+	uint32_t size;
+} BLI_HashMurmur2A;
 
-void *md5_buffer(const char *buffer, size_t len, void *resblock);
+void BLI_hash_mm2a_init(BLI_HashMurmur2A *mm2, uint32_t seed);
 
-/* Compute MD5 message digest for bytes read from STREAM.  The
- * resulting message digest number will be written into the 16 bytes
- * beginning at RESBLOCK.  */
+void BLI_hash_mm2a_add(BLI_HashMurmur2A *mm2, const unsigned char *data, size_t len);
 
-int md5_stream(FILE *stream, void *resblock);
+void BLI_hash_mm2a_add_int(BLI_HashMurmur2A *mm2, int data);
 
-char *md5_to_hexdigest(void *resblock, char r_hex_digest[33]);
-
-#endif
+uint32_t BLI_hash_mm2a_end(BLI_HashMurmur2A *mm2);
 
+#endif  /* __BLI_MM2A_H__ */
diff --git a/source/blender/blenlib/CMakeLists.txt b/source/blender/blenlib/CMakeLists.txt
index 9efa20d..ba166b1 100644
--- a/source/blender/blenlib/CMakeLists.txt
+++ b/source/blender/blenlib/CMakeLists.txt
@@ -65,6 +65,8 @@ set(SRC
 	intern/freetypefont.c
 	intern/graph.c
 	intern/gsqueue.c
+	intern/hash_md5.c
+	intern/hash_mm2a.c
 	intern/jitter.c
 	intern/lasso.c
 	intern/listbase.c
@@ -80,7 +82,6 @@ set(SRC
 	intern/math_rotation.c
 	intern/math_vector.c
 	intern/math_vector_inline.c
-	intern/md5.c
 	intern/noise.c
 	intern/path_util.c
 	intern/polyfill2d.c
@@ -134,6 +135,8 @@ set(SRC
 	BLI_ghash.h
 	BLI_graph.h
 	BLI_gsqueue.h
+	BLI_hash_md5.h
+	BLI_hash_mm2a.h
 	BLI_heap.h
 	BLI_jitter.h
 	BLI_kdopbvh.h
@@ -153,7 +156,6 @@ set(SRC
 	BLI_math_matrix.h
 	BLI_math_rotation.h
 	BLI_math_vector.h
-	BLI_md5.h
 	BLI_memarena.h
 	BLI_mempool.h
 	BLI_noise.h
diff --git a/source/blender/blenlib/intern/md5.c b/source/blender/blenlib/intern/hash_md5.c
similarity index 99%
rename from source/blender/blenlib/intern/md5.c
rename to source/blender/blenlib/intern/hash_md5.c
index 3d1a9cd..98a5cc5 100644
--- a/source/blender/blenlib/intern/md5.c
+++ b/source/blender/blenlib/intern/hash_md5.c
@@ -29,7 +29,7 @@
  *  according to the definition of MD5 in RFC 1321 from April 1992.
  */
 
-#include "BLI_md5.h"  /* own include */
+#include "BLI_hash_md5.h"  /* own include */
 
 #include <stdlib.h>
 #include <string.h>
diff --git a/source/blender/blenlib/intern/hash_mm2a.c b/source/blender/blenlib/intern/hash_mm2a.c
new file mode 100644
index 0000000..8b4242f
--- /dev/null
+++ b/source/blender/blenlib/intern/hash_mm2a.c
@@ -0,0 +1,107 @@
+/*
+ * ***** BEGIN GPL LICENSE BLOCK *****
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ *
+ * ***** END GPL LICENSE BLOCK *****
+ *
+ * Copyright (C) 2014 Blender Foundation.
+ *
+ */
+
+/** \file blender/blenlib/intern/hash_mm2a.c
+ *  \ingroup bli
+ *
+ *  Functions to compute Murmur2A hash key.
+ *
+ * A very fast hash generating int32 result, with few collisions and good repartition.
+ *
+ * See also:
+ *     reference implementation: https://smhasher.googlecode.com/svn-history/r130/trunk/MurmurHash2.cpp
+ *     and http://programmers.stackexchange.com/questions/49550/which-hashing-algorithm-is-best-for-uniqueness-and-speed
+ *
+ * \warning Do not store that hash in files or such, it is not endian-agnostic, so you should only use it
+ *          for temporary data.
+ */
+
+#include "BLI_hash_mm2a.h"  /* own include */
+
+/* Helpers. */
+#define MM2A_M 0x5bd1e995
+
+#define MM2A_MIX(h, k)           \
+{                                \
+	(k) *= MM2A_M;               \
+	(k) ^= (k) >> 24;            \
+	(k) *= MM2A_M;               \
+	(h) = ((h) * MM2A_M) ^ (k);  \
+} (void)0
+
+static void mm2a_mix_tail(BLI_HashMurmur2A *mm2, const unsigned char **data, size_t *len)
+{
+	while (*len && ((*len < 4) || mm2->count)) {
+		mm2->tail |= (uint32_t)(**data) << (mm2->count * 8);
+
+		mm2->count++;
+		(*len)--;
+		(*data)++;
+
+		if (mm2->count == 4) {
+			MM2A_MIX(mm2->hash, mm2->tail);
+			mm2->tail = 0;
+			mm2->count = 0;
+		}
+	}
+}
+
+void BLI_hash_mm2a_init(BLI_HashMurmur2A *mm2, uint32_t seed)
+{
+	mm2->hash  = seed;
+	mm2->tail  = 0;
+	mm2->count = 0;
+	mm2->size  = 0;
+}
+
+void BLI_hash_mm2a_add(BLI_HashMurmur2A *mm2, const unsigned char *data, size_t len)
+{
+	mm2->size += (uint32_t)len;
+
+	mm2a_mix_tail(mm2, &data, &len);
+
+	for (; len >= 4; data += 4, len -= 4) {
+		uint32_t k = *(uint32_t *)data;
+
+		MM2A_MIX(mm2->hash, k);
+	}
+
+	mm2a_mix_tail(mm2, &data, &len);
+}
+
+void BLI_hash_mm2a_add_int(BLI_HashMurmur2A *mm2, int data)
+{
+	BLI_hash_mm2a_add(mm2, (const unsigned char *)&data, sizeof(data));
+}
+
+uint32_t BLI_hash_mm2a_end(BLI_HashMurmur2A *mm2)
+{
+	MM2A_MIX(mm2->hash, mm2->tail);
+	MM2A_MIX(mm2->hash, mm2->size);
+
+	mm2->hash ^= mm2->hash >> 13;
+	mm2->hash *= MM2A_M;
+	mm2->hash ^= mm2->hash >> 15;
+
+	return mm2->hash;
+}
diff --git a/source/blender/imbuf/intern/thumbs.c b/source/blender/imbuf/intern/thumbs.c
index 9a97a14..5399e54 100644
--- a/source/blender/imbuf/intern/thumbs.c
+++ b/source/blender/imbuf/intern/thumbs.c
@@ -35,7 +35,7 @@
 #include "BLI_string.h"
 #include "BLI_path_util.h"
 #include "BLI_fileops.h"
-#include "BLI_md5.h"
+#include "BLI_hash_md5.h"
 #include "BLI_system.h"
 #include BLI_SYSTEM_PID_H
 
diff --git a/source/blender/render/intern/source/render_result.c b/source/blender/render/intern/source/render_result.c
index 2479752..d7c56dd 100644
--- a/source/blender/render/intern/source/render_result.c
+++ b/source/blender/render/intern/source/render_result.c
@@ -37,7 +37,7 @@
 
 #include "BLI_utildefines.h"
 #include "BLI_listbase.h"
-#include "BLI_md5.h"
+#include "BLI_hash_md5.h"
 #include "BLI_path_util.h"
 #include "BLI_rect.h"
 #include "BLI_string.h"
diff --git a/tests/gtests/blenlib/BLI_hash_mm2a_test.cc b/tests/gtests/blenlib/BLI_hash_mm2a_test.cc
new file mode 100644
index 0000000..b35a1a8
--- /dev/null
+++ b/tests/gtests/blenlib/BLI_hash_mm2a_test.cc
@@ -0,0 +1,75 @@
+/* Apache License, Version 2.0 */
+
+#include "testing/testing.h"
+
+extern "C" {
+#include "BLI_hash_mm2a.h"
+}
+
+/* Note: Reference results are taken from reference implementation (cpp code, CMurmurHash2A variant):
+ *       https://smhasher.googlecode.com/svn-history/r130/trunk/MurmurHash2.cpp
+ */
+
+TEST(hash_mm2a, MM2ABasic)
+{
+	BLI_HashMurmur2A mm2;
+
+	const char *data = "Blender";
+
+	BLI_hash_mm2a_init(&mm2, 0);
+	BLI_hash_mm2a_add(&mm2, (const unsigned char *)data, strlen(data));
+#ifdef __LITTLE_ENDIAN__
+	EXPECT_EQ(1633988145, BLI_hash_mm2a_end(&mm2));
+#else
+	EXPECT_EQ(959283772, BLI_hash_mm2a_end(&mm2));
+#endif
+}
+
+TEST(hash_mm2a, MM2AConcatenateStrings)
+{
+	BLI_HashMurmur2A mm2;
+	uint32_t hash;
+
+	const char *data1 = "Blender";
+	const char *data2 = " is ";
+	const char *data3 = "FaNtAsTiC";
+	const char *data123 = "Blender is FaNtAsTiC";
+
+	BLI_hash_mm2a_init(&mm2, 0);
+	BLI_hash_mm2a_add(&mm2, (const unsigned char *)data1, strlen(data1));
+	BLI_hash_mm2a_add(&mm2, (const unsigned char *)data2, strlen(data2));
+	BLI_hash_mm2a_add(&mm2, (const unsigned char *)data3, strlen(data3));
+	hash = BLI_hash_mm2a_end(&mm2);
+	BLI_hash_mm2a_init(&mm2, 0);
+	BLI_hash_mm2a_add(&mm2, (const unsigned char *)data123, strlen(data123));
+#ifdef __LITTLE_ENDIAN__
+	EXPECT_EQ(1545105348, hash);
+#else
+	EXPECT_EQ(2604964730, hash);
+#endif
+	EXPECT_EQ(hash, BLI_hash_mm2a_end(&mm2));
+}
+
+TEST(hash_mm2a, MM2AIntegers)
+{
+	BLI_HashMurmur2A mm2;
+	uint32_t hash;
+
+	const int ints[4] = {1, 2, 3, 4};
+
+	BLI_hash_mm2a_init(&mm2, 0);
+	BLI_hash_mm2a_add_int(&mm2, ints[0]);
+	BLI_hash_mm2a_add_int(&mm2, ints[1]);
+	BLI_hash_mm2a_add_int(&mm2, ints[2]);
+	BLI_hash_mm2a_add_int(&mm2, ints[3]);
+	hash = BLI_hash_mm2a_end(&mm2);
+	BLI_hash_mm2a_init(&mm2, 0);
+	BLI_hash_mm2a_add(&mm2, (const unsigned char *)ints, sizeof(ints));
+	/* Yes, same hash here on little and big endian. */
+#ifdef __LITTLE_ENDIAN__
+	EXPECT_EQ(405493096, hash);
+#else
+	EXPECT_EQ(405493096, hash);
+#endif
+	EXPECT_EQ(hash, BLI_hash_mm2a_end(&mm2));
+}
diff --git a/tests/gtests/blenlib/CMakeLists.txt b/tests/gtests/blenlib/CMakeLists.txt
index c949c1e..3a86d3f 100644
--- a/tests/gtests/blenlib/CMakeLists.txt
+++ b/tests/

@@ Diff output truncated at 10240 characters. @@




More information about the Bf-blender-cvs mailing list