[Bf-blender-cvs] [49717d49715] master: Optimize tangent space calculation by inlining functions

Sergey Sharybin noreply at git.blender.org
Fri Aug 25 15:56:22 CEST 2017


Commit: 49717d49715857ba595336115b7dba8d66b7f2ae
Author: Sergey Sharybin
Date:   Fri Aug 25 14:50:04 2017 +0200
Branches: master
https://developer.blender.org/rB49717d49715857ba595336115b7dba8d66b7f2ae

Optimize tangent space calculation by inlining functions

Brings tangent space calculation from 4.6sec to 3.1sec for dragon model in BI.
Cycles is also somewhat faster, but it has other bottlenecks.

Funny thing, using simple `static inline` already gives a lot of speedup here.
That's just answering question whether it's OK to leave decision on what to
inline up to a compiler..

===================================================================

M	intern/mikktspace/mikktspace.c

===================================================================

diff --git a/intern/mikktspace/mikktspace.c b/intern/mikktspace/mikktspace.c
index 7fbfe57be5c..479443805bf 100644
--- a/intern/mikktspace/mikktspace.c
+++ b/intern/mikktspace/mikktspace.c
@@ -39,17 +39,23 @@
 
 #define INTERNAL_RND_SORT_SEED		39871946
 
+#ifdef _MSC_VER
+#  define MIKK_INLINE static __forceinline
+#else
+#  define MIKK_INLINE static inline __attribute__((always_inline)) __attribute__((unused))
+#endif
+
 // internal structure
 typedef struct {
 	float x, y, z;
 } SVec3;
 
-static tbool			veq( const SVec3 v1, const SVec3 v2 )
+MIKK_INLINE tbool			veq( const SVec3 v1, const SVec3 v2 )
 {
 	return (v1.x == v2.x) && (v1.y == v2.y) && (v1.z == v2.z);
 }
 
-static SVec3		vadd( const SVec3 v1, const SVec3 v2 )
+MIKK_INLINE SVec3		vadd( const SVec3 v1, const SVec3 v2 )
 {
 	SVec3 vRes;
 
@@ -61,7 +67,7 @@ static SVec3		vadd( const SVec3 v1, const SVec3 v2 )
 }
 
 
-static SVec3		vsub( const SVec3 v1, const SVec3 v2 )
+MIKK_INLINE SVec3		vsub( const SVec3 v1, const SVec3 v2 )
 {
 	SVec3 vRes;
 
@@ -72,7 +78,7 @@ static SVec3		vsub( const SVec3 v1, const SVec3 v2 )
 	return vRes;
 }
 
-static SVec3		vscale(const float fS, const SVec3 v)
+MIKK_INLINE SVec3		vscale(const float fS, const SVec3 v)
 {
 	SVec3 vRes;
 
@@ -83,24 +89,24 @@ static SVec3		vscale(const float fS, const SVec3 v)
 	return vRes;
 }
 
-static float			LengthSquared( const SVec3 v )
+MIKK_INLINE float			LengthSquared( const SVec3 v )
 {
 	return v.x*v.x + v.y*v.y + v.z*v.z;
 }
 
-static float			Length( const SVec3 v )
+MIKK_INLINE float			Length( const SVec3 v )
 {
 	return sqrtf(LengthSquared(v));
 }
 
 #if 0  // UNUSED
-static SVec3		Normalize( const SVec3 v )
+MIKK_INLINE SVec3		Normalize( const SVec3 v )
 {
 	return vscale(1.0f / Length(v), v);
 }
 #endif
 
-static SVec3		NormalizeSafe( const SVec3 v )
+MIKK_INLINE SVec3		NormalizeSafe( const SVec3 v )
 {
 	const float len = Length(v);
 	if (len != 0.0f) {
@@ -112,20 +118,20 @@ static SVec3		NormalizeSafe( const SVec3 v )
 	}
 }
 
-static float		vdot( const SVec3 v1, const SVec3 v2)
+MIKK_INLINE float		vdot( const SVec3 v1, const SVec3 v2)
 {
 	return v1.x*v2.x + v1.y*v2.y + v1.z*v2.z;
 }
 
 
-static tbool NotZero(const float fX)
+MIKK_INLINE tbool NotZero(const float fX)
 {
 	// could possibly use FLT_EPSILON instead
 	return fabsf(fX) > FLT_MIN;
 }
 
 #if 0  // UNUSED
-static tbool VNotZero(const SVec3 v)
+MIKK_INLINE tbool VNotZero(const SVec3 v)
 {
 	// might change this to an epsilon based test
 	return NotZero(v.x) || NotZero(v.y) || NotZero(v.z);
@@ -184,13 +190,13 @@ static tbool GenerateTSpaces(STSpace psTspace[], const STriInfo pTriInfos[], con
                              const int iNrActiveGroups, const int piTriListIn[], const float fThresCos,
                              const SMikkTSpaceContext * pContext);
 
-static int MakeIndex(const int iFace, const int iVert)
+MIKK_INLINE int MakeIndex(const int iFace, const int iVert)
 {
 	assert(iVert>=0 && iVert<4 && iFace>=0);
 	return (iFace<<2) | (iVert&0x3);
 }
 
-static void IndexToData(int * piFace, int * piVert, const int iIndexIn)
+MIKK_INLINE void IndexToData(int * piFace, int * piVert, const int iIndexIn)
 {
 	piVert[0] = iIndexIn&0x3;
 	piFace[0] = iIndexIn>>2;
@@ -226,9 +232,9 @@ static STSpace AvgTSpace(const STSpace * pTS0, const STSpace * pTS1)
 
 
 
-static SVec3 GetPosition(const SMikkTSpaceContext * pContext, const int index);
-static SVec3 GetNormal(const SMikkTSpaceContext * pContext, const int index);
-static SVec3 GetTexCoord(const SMikkTSpaceContext * pContext, const int index);
+MIKK_INLINE SVec3 GetPosition(const SMikkTSpaceContext * pContext, const int index);
+MIKK_INLINE SVec3 GetNormal(const SMikkTSpaceContext * pContext, const int index);
+MIKK_INLINE SVec3 GetTexCoord(const SMikkTSpaceContext * pContext, const int index);
 
 
 // degen triangles
@@ -896,7 +902,7 @@ static int GenerateInitialVerticesIndexList(STriInfo pTriInfos[], int piTriList_
 	return iTSpacesOffs;
 }
 
-static SVec3 GetPosition(const SMikkTSpaceContext * pContext, const int index)
+MIKK_INLINE SVec3 GetPosition(const SMikkTSpaceContext * pContext, const int index)
 {
 	int iF, iI;
 	SVec3 res; float pos[3];
@@ -906,7 +912,7 @@ static SVec3 GetPosition(const SMikkTSpaceContext * pContext, const int index)
 	return res;
 }
 
-static SVec3 GetNormal(const SMikkTSpaceContext * pContext, const int index)
+MIKK_INLINE SVec3 GetNormal(const SMikkTSpaceContext * pContext, const int index)
 {
 	int iF, iI;
 	SVec3 res; float norm[3];
@@ -916,7 +922,7 @@ static SVec3 GetNormal(const SMikkTSpaceContext * pContext, const int index)
 	return res;
 }
 
-static SVec3 GetTexCoord(const SMikkTSpaceContext * pContext, const int index)
+MIKK_INLINE SVec3 GetTexCoord(const SMikkTSpaceContext * pContext, const int index)
 {
 	int iF, iI;
 	SVec3 res; float texc[2];
@@ -1080,7 +1086,7 @@ static void InitTriInfo(STriInfo pTriInfos[], const int piTriListIn[], const SMi
 /////////////////////////////////////////////////////////////////////////////////////////////////////
 
 static tbool AssignRecur(const int piTriListIn[], STriInfo psTriInfos[], const int iMyTriIndex, SGroup * pGroup);
-static void AddTriToGroup(SGroup * pGroup, const int iTriIndex);
+MIKK_INLINE void AddTriToGroup(SGroup * pGroup, const int iTriIndex);
 
 static int Build4RuleGroups(STriInfo pTriInfos[], SGroup pGroups[], int piGroupTrianglesBuffer[], const int piTriListIn[], const int iNrTrianglesIn)
 {
@@ -1146,7 +1152,7 @@ static int Build4RuleGroups(STriInfo pTriInfos[], SGroup pGroups[], int piGroupT
 	return iNrActiveGroups;
 }
 
-static void AddTriToGroup(SGroup * pGroup, const int iTriIndex)
+MIKK_INLINE void AddTriToGroup(SGroup * pGroup, const int iTriIndex)
 {
 	pGroup->pFaceIndices[pGroup->iNrFaces] = iTriIndex;
 	++pGroup->iNrFaces;



More information about the Bf-blender-cvs mailing list