[Bf-blender-cvs] [119846a6bb3] master: Mikktspace: Speed up the merging of identical vertices

Lukas Stockner noreply at git.blender.org
Fri Nov 17 18:35:04 CET 2017


Commit: 119846a6bb366f5ae13db18d965083d7927ff11e
Author: Lukas Stockner
Date:   Sun Jun 4 23:04:47 2017 +0200
Branches: master
https://developer.blender.org/rB119846a6bb366f5ae13db18d965083d7927ff11e

Mikktspace: Speed up the merging of identical vertices

Previously, Mikktspace just bucketed the vertices based on one spatial coordinate and then ran full pairwise comparisons inside each bucket.
However, since models are three-dimensional, the bucketing has a massive false-positive rate, and since pairwise comparison is O(n^2), the merging process is very slow.

But, since we only care about exactly identical vertices, there is a much more efficient approach - we can just hash all values belonging to each vertex and form buckets based on the hash.
Since the hash has 32 bits and considers all values, false-positives are very unlikely - and since both hashing and the radixsort that's used for bucketing are O(n), both asymptotical and
real-world performance (as well as code complexity) are significantly improved.

===================================================================

M	intern/mikktspace/mikktspace.c

===================================================================

diff --git a/intern/mikktspace/mikktspace.c b/intern/mikktspace/mikktspace.c
index f832b356ffe..ebf699c2428 100644
--- a/intern/mikktspace/mikktspace.c
+++ b/intern/mikktspace/mikktspace.c
@@ -447,305 +447,132 @@ typedef struct {
 	int index;
 } STmpVert;
 
-static const int g_iCells = 2048;
-static const float g_iCells_fl = 2048.0f;
+static void GenerateSharedVerticesIndexListSlow(int piTriList_in_and_out[], const SMikkTSpaceContext * pContext, const int iNrTrianglesIn);
 
-#ifdef _MSC_VER
-#  define NOINLINE __declspec(noinline)
-#else
-#  define NOINLINE __attribute__ ((noinline))
-#endif
+typedef unsigned int uint;
 
-// it is IMPORTANT that this function is called to evaluate the hash since
-// inlining could potentially reorder instructions and generate different
-// results for the same effective input value fVal.
-static NOINLINE int FindGridCell(const float fMin, const float fMax, const float fVal)
+static uint float_as_uint(const float v)
 {
-	const float fIndex = g_iCells_fl * ((fVal-fMin)/(fMax-fMin));
-	const int iIndex = (int)fIndex;
-	return iIndex < g_iCells ? (iIndex >= 0 ? iIndex : 0) : (g_iCells - 1);
+	return *((uint*)(&v));
 }
 
-static void MergeVertsFast(int piTriList_in_and_out[], STmpVert pTmpVert[], const SMikkTSpaceContext * pContext, const int iL_in, const int iR_in);
-static void MergeVertsSlow(int piTriList_in_and_out[], const SMikkTSpaceContext * pContext, const int pTable[], const int iEntries);
-static void GenerateSharedVerticesIndexListSlow(int piTriList_in_and_out[], const SMikkTSpaceContext * pContext, const int iNrTrianglesIn);
+#define HASH(x, y, z) (((x) * 73856093) ^ ((y) * 19349663) ^ ((z) * 83492791))
+#define HASH_F(x, y, z) HASH(float_as_uint(x), float_as_uint(y), float_as_uint(z))
 
-static void GenerateSharedVerticesIndexList(int piTriList_in_and_out[], const SMikkTSpaceContext * pContext, const int iNrTrianglesIn)
+/* Sort comp and data based on comp.
+ * comp2 and data2 are used as temporary storage. */
+static void radixsort_pair(uint *comp, int *data, uint *comp2, int *data2, int n)
 {
+	int shift = 0;
+	for(int pass = 0; pass < 4; pass++, shift+=8) {
+		int bins[257] = {0};
+		/* Count number of elements per bin. */
+		for(int i = 0; i < n; i++) {
+			bins[((comp[i] >> shift) & 0xff) + 1]++;
+		}
+		/* Compute prefix sum to find position of each bin in the sorted array. */
+		for(int i = 2; i < 256; i++) {
+			bins[i] += bins[i-1];
+		}
+		/* Insert the elements in their correct location based on their bin. */
+		for(int i = 0; i < n; i++) {
+			int pos = bins[(comp[i] >> shift) & 0xff]++;
+			comp2[pos] = comp[i];
+			data2[pos] = data[i];
+		}
 
-	// Generate bounding box
-	int * piHashTable=NULL, * piHashCount=NULL, * piHashOffsets=NULL, * piHashCount2=NULL;
-	STmpVert * pTmpVert = NULL;
-	int i=0, iChannel=0, k=0, e=0;
-	int iMaxCount=0;
-	SVec3 vMin = GetPosition(pContext, 0), vMax = vMin, vDim;
-	float fMin, fMax;
-	for (i=1; i<(iNrTrianglesIn*3); i++)
-	{
-		const int index = piTriList_in_and_out[i];
-
-		const SVec3 vP = GetPosition(pContext, index);
-		if (vMin.x > vP.x) vMin.x = vP.x;
-		else if (vMax.x < vP.x) vMax.x = vP.x;
-		if (vMin.y > vP.y) vMin.y = vP.y;
-		else if (vMax.y < vP.y) vMax.y = vP.y;
-		if (vMin.z > vP.z) vMin.z = vP.z;
-		else if (vMax.z < vP.z) vMax.z = vP.z;
+		/* Swap arrays. */
+		int  *tmpdata = data; data = data2; data2 = tmpdata;
+		uint *tmpcomp = comp; comp = comp2; comp2 = tmpcomp;
 	}
+}
 
-	vDim = vsub(vMax,vMin);
-	iChannel = 0;
-	fMin = vMin.x; fMax=vMax.x;
-	if (vDim.y>vDim.x && vDim.y>vDim.z)
-	{
-		iChannel=1;
-		fMin = vMin.y;
-		fMax = vMax.y;
-	}
-	else if (vDim.z>vDim.x)
-	{
-		iChannel=2;
-		fMin = vMin.z;
-		fMax = vMax.z;
-	}
+/* Merge identical vertices.
+ * To find vertices with identical position, normal and texcoord, we calculate a hash of the 9 values.
+ * Then, by sorting based on that hash, identical elements (having identical hashes) will be moved next to each other.
+ * Since there might be hash collisions, the elements of each block are then compared with each other and duplicates
+ * are merged.
+ */
+static void GenerateSharedVerticesIndexList(int piTriList_in_and_out[], const SMikkTSpaceContext * pContext, const int iNrTrianglesIn)
+{
+	int numVertices = iNrTrianglesIn*3;
 
-	// make allocations
-	piHashTable = (int *) malloc(sizeof(int[3])*iNrTrianglesIn);
-	piHashCount = (int *) malloc(sizeof(int)*g_iCells);
-	piHashOffsets = (int *) malloc(sizeof(int)*g_iCells);
-	piHashCount2 = (int *) malloc(sizeof(int)*g_iCells);
+	uint *hashes = (uint*) malloc(sizeof(uint)*numVertices);
+	int *indices = (int*) malloc(sizeof(int)*numVertices);
+	uint *temp_hashes = (uint*) malloc(sizeof(uint)*numVertices);
+	int *temp_indices = (int*) malloc(sizeof(int)*numVertices);
+
+	if(hashes == NULL || indices == NULL || temp_hashes == NULL || temp_indices == NULL) {
+		free(hashes);
+		free(indices);
+		free(temp_hashes);
+		free(temp_indices);
 
-	if (piHashTable==NULL || piHashCount==NULL || piHashOffsets==NULL || piHashCount2==NULL)
-	{
-		if (piHashTable!=NULL) free(piHashTable);
-		if (piHashCount!=NULL) free(piHashCount);
-		if (piHashOffsets!=NULL) free(piHashOffsets);
-		if (piHashCount2!=NULL) free(piHashCount2);
 		GenerateSharedVerticesIndexListSlow(piTriList_in_and_out, pContext, iNrTrianglesIn);
 		return;
 	}
-	memset(piHashCount, 0, sizeof(int)*g_iCells);
-	memset(piHashCount2, 0, sizeof(int)*g_iCells);
 
-	// count amount of elements in each cell unit
-	for (i=0; i<(iNrTrianglesIn*3); i++)
-	{
+	for (int i = 0; i < numVertices; i++) {
 		const int index = piTriList_in_and_out[i];
-		const SVec3 vP = GetPosition(pContext, index);
-		const float fVal = iChannel==0 ? vP.x : (iChannel==1 ? vP.y : vP.z);
-		const int iCell = FindGridCell(fMin, fMax, fVal);
-		++piHashCount[iCell];
-	}
 
-	// evaluate start index of each cell.
-	piHashOffsets[0]=0;
-	for (k=1; k<g_iCells; k++)
-		piHashOffsets[k]=piHashOffsets[k-1]+piHashCount[k-1];
-
-	// insert vertices
-	for (i=0; i<(iNrTrianglesIn*3); i++)
-	{
-		const int index = piTriList_in_and_out[i];
 		const SVec3 vP = GetPosition(pContext, index);
-		const float fVal = iChannel==0 ? vP.x : (iChannel==1 ? vP.y : vP.z);
-		const int iCell = FindGridCell(fMin, fMax, fVal);
-		int * pTable = NULL;
-
-		assert(piHashCount2[iCell]<piHashCount[iCell]);
-		pTable = &piHashTable[piHashOffsets[iCell]];
-		pTable[piHashCount2[iCell]] = i;	// vertex i has been inserted.
-		++piHashCount2[iCell];
-	}
-	for (k=0; k<g_iCells; k++)
-		assert(piHashCount2[k] == piHashCount[k]);	// verify the count
-	free(piHashCount2);
-
-	// find maximum amount of entries in any hash entry
-	iMaxCount = piHashCount[0];
-	for (k=1; k<g_iCells; k++)
-		if (iMaxCount<piHashCount[k])
-			iMaxCount=piHashCount[k];
-	pTmpVert = (STmpVert *) malloc(sizeof(STmpVert)*iMaxCount);
+		const uint hashP = HASH_F(vP.x, vP.y, vP.z);
 
+		const SVec3 vN = GetNormal(pContext, index);
+		const uint hashN = HASH_F(vN.x, vN.y, vN.z);
 
-	// complete the merge
-	for (k=0; k<g_iCells; k++)
-	{
-		// extract table of cell k and amount of entries in it
-		int * pTable = &piHashTable[piHashOffsets[k]];
-		const int iEntries = piHashCount[k];
-		if (iEntries < 2) continue;
+		const SVec3 vT = GetTexCoord(pContext, index);
+		const uint hashT = HASH_F(vT.x, vT.y, vT.z);
 
-		if (pTmpVert!=NULL)
-		{
-			for (e=0; e<iEntries; e++)
-			{
-				int i = pTable[e];
-				const SVec3 vP = GetPosition(pContext, piTriList_in_and_out[i]);
-				pTmpVert[e].vert[0] = vP.x; pTmpVert[e].vert[1] = vP.y;
-				pTmpVert[e].vert[2] = vP.z; pTmpVert[e].index = i;
-			}
-			MergeVertsFast(piTriList_in_and_out, pTmpVert, pContext, 0, iEntries-1);
-		}
-		else
-			MergeVertsSlow(piTriList_in_and_out, pContext, pTable, iEntries);
+		hashes[i] = HASH(hashP, hashN, hashT);
+		indices[i] = i;
 	}
 
-	if (pTmpVert!=NULL) { free(pTmpVert); }
-	free(piHashTable);
-	free(piHashCount);
-	free(piHashOffsets);
-}
-
-static void MergeVertsFast(int piTriList_in_and_out[], STmpVert pTmpVert[], const SMikkTSpaceContext * pContext, const int iL_in, const int iR_in)
-{
-	// make bbox
-	int c=0, l=0, channel=0;
-	float fvMin[3], fvMax[3];
-	float dx=0, dy=0, dz=0, fSep=0;
-	for (c=0; c<3; c++)
-	{	fvMin[c]=pTmpVert[iL_in].vert[c]; fvMax[c]=fvMin[c];	}
-	for (l=(iL_in+1); l<=iR_in; l++) {
-		for (c=0; c<3; c++) {
-			if (fvMin[c]>pTmpVert[l].vert[c]) fvMin[c]=pTmpVert[l].vert[c];
-			if (fvMax[c]<pTmpVert[l].vert[c]) fvMax[c]=pTmpVert[l].vert[c];
+	radixsort_pair(hashes, indices, temp_hashes, temp_indices, numVertices);
+
+	free(temp_hashes);
+	free(temp_indices);
+
+	/* Process blocks of vertices with the same hash.
+	 * Vertices in the block might still be separate, but we know for sure that
+	 * vertices in different blocks will never be identical. */
+	int blockstart = 0;
+	while (blockstart < numVertices) {
+		/* Find end of this block (exclusive). */
+		uint hash = hashes[blockstart];
+		int blockend = blockstart+1;
+		for(; blockend < numVertices; blockend++) {
+			if(hashes[blockend] != hash) break;
 		}
-	}
-
-	dx = fvMax[0]-fvMin[0];
-	dy = fvMax[1]-fvMin[1];
-	dz = fvMax[2]-fvMin[2];
-
-	channel = 0;
-	if (dy>dx && dy>dz) channel=1;
-	else if (dz>dx) channel=2;
-
-	fSep = 0.5f*(fvMax[channel]+fvMin[channel]);
 
-	// stop if all vertices are NaNs
-	if (!isfinite(fSep))
-		return;
-
-	// terminate recursion when the separation/average value
-	// is no longer strictly between fMin and fMax values.
-	if (fSep>=fvMax[channel] || fSep<=fvMin[channel])
-	{
-		// complete the weld
-		for (l=iL_in; l<=iR_in; l++)
-		{
-			int i = pTmpVert[l].index;
-			const int index = piTriList_in_and_out[i];
-			const SVec3 vP = GetPosition(pContext, index);
-			const SVec3 vN = GetNormal(pContext, index);
-			const SVec3 vT = GetTexCoord(pContext, index);
-
-			tbool bNotFound = TTRUE;
-			int l2=iL_in, i2rec=-1;
-			while (l2<l && bNotFound)
-			{
-				const int i2 = pTmpVert[l2].index;
-				const int index2 = piTriList_in_and_out[i2];
-				const SVec3 vP2 = GetPosition(pContext, index2);
-				const SVec3 vN2 = GetNormal(pContext, index2);
-				const SVec3 vT2 = GetTexCoord(pContext, index2);
-				i2rec=i2;
-
-				//if (vP==vP2 && vN==vN2 && vT==vT2)
-				if (vP.x==vP2.x && vP.y==vP2.y && vP.z==vP2.z &&
-

@@ Diff output truncated at 10240 characters. @@



More information about the Bf-blender-cvs mailing list