[Bf-blender-cvs] SVN commit: /data/svn/bf-blender [29474] branches/render25: Render Branch: move irradiance cache computation of 4x4 matrix svd and

Tue Jun 15 20:19:35 CEST 2010

Revision: 29474
          http://projects.blender.org/plugins/scmsvn/viewcvs.php?view=rev&root=bf-blender&revision=29474
Author:   blendix
Date:     2010-06-15 20:19:35 +0200 (Tue, 15 Jun 2010)

Log Message:
-----------
Render Branch: move irradiance cache computation of 4x4 matrix svd and
pseudoinverse into blenlib, slightly faster due to avoiding malloc/free
and some floating point divisions.

Modified Paths:
--------------
    branches/render25/intern/iksolver/intern/IK_Solver.cpp
    branches/render25/source/blender/blenlib/BLI_math_matrix.h
    branches/render25/source/blender/blenlib/intern/math_matrix.c
    branches/render25/source/blender/render/intern/source/cache.c

Modified: branches/render25/intern/iksolver/intern/IK_Solver.cpp
===================================================================

--- branches/render25/intern/iksolver/intern/IK_Solver.cpp	2010-06-15 18:14:04 UTC (rev 29473)
+++ branches/render25/intern/iksolver/intern/IK_Solver.cpp	2010-06-15 18:19:35 UTC (rev 29474)
@@ -377,40 +377,3 @@
 	return ((result)? 1: 0);
 }
 
-#include "TNT/cmat.h"
-#include "TNT/svd.h"
-
-extern "C" {
-void TNT_svd(float m[][4], float *w, float u[][4]);
-}
-
-void TNT_svd(float m[][4], float *w, float u[][4])
-{
-	TNT::Matrix<float> M, V, U;
-	TNT::Vector<float> W, W1, W2;
-	int i, j;
-
-	M.newsize(4, 4);
-	V.newsize(4, 4);
-	U.newsize(4, 4);
-	W.newsize(4);
-	W1.newsize(4);
-	W2.newsize(4);
-
-	for(i=0; i<4; i++)
-		for(j=0; j<4; j++)
-			M[i][j]= m[j][i];
-
-	TNT::SVD(M, V, W, U, W1, W2);
-
-	for(i=0; i<4; i++)
-		w[i]= W[i];
-
-	for(i=0; i<4; i++) {
-		for(j=0; j<4; j++) {
-			m[i][j]= V[j][i];
-			u[i][j]= U[j][i];
-		}
-	}
-}
-

Modified: branches/render25/source/blender/blenlib/BLI_math_matrix.h
===================================================================
--- branches/render25/source/blender/blenlib/BLI_math_matrix.h	2010-06-15 18:14:04 UTC (rev 29473)
+++ branches/render25/source/blender/blenlib/BLI_math_matrix.h	2010-06-15 18:19:35 UTC (rev 29474)
@@ -122,6 +122,9 @@
 	float g, float h, float i);
 float determinant_m4(float A[4][4]);
 
+void svd_m4(float U[4][4], float s[4], float V[4][4], float A[4][4]);
+void pseudoinverse_m4_m4(float Ainv[4][4], float A[4][4], float epsilon);
+
 /****************************** Transformations ******************************/
 
 void scale_m3_fl(float R[3][3], float scale);

Modified: branches/render25/source/blender/blenlib/intern/math_matrix.c
===================================================================
--- branches/render25/source/blender/blenlib/intern/math_matrix.c	2010-06-15 18:14:04 UTC (rev 29473)
+++ branches/render25/source/blender/blenlib/intern/math_matrix.c	2010-06-15 18:19:35 UTC (rev 29474)
@@ -1149,3 +1149,458 @@
 	printf("%f %f %f %f\n",m[0][3],m[1][3],m[2][3],m[3][3]);
 	printf("\n");
 }
+
+/*********************************** SVD ************************************
+ * from TNT matrix library
+
+ * Compute the Single Value Decomposition of an arbitrary matrix A
+ * That is compute the 3 matrices U,W,V with U column orthogonal (m,n) 
+ * ,W a diagonal matrix and V an orthogonal square matrix s.t. 
+ * A = U.W.Vt. From this decomposition it is trivial to compute the 
+ * (pseudo-inverse) of A as Ainv = V.Winv.tranpose(U).
+ */
+
+void svd_m4(float U[4][4], float s[4], float V[4][4], float A_[4][4])
+{
+	float A[4][4];
+	float work1[4], work2[4];
+	int m = 4;
+	int n = 4;
+	int maxiter = 200;
+	int nu = minf(m,n);
+
+	float *work = work1;
+	float *e = work2;
+	float eps;
+
+	int i=0, j=0, k=0, p, pp, iter;
+
+	// Reduce A to bidiagonal form, storing the diagonal elements
+	// in s and the super-diagonal elements in e.
+
+	int nct = minf(m-1,n);
+	int nrt = maxf(0,minf(n-2,m));
+
+	copy_m4_m4(A, A_);
+	zero_m4(U);
+	zero_v4(s);
+
+	for (k = 0; k < maxf(nct,nrt); k++) {
+		if (k < nct) {
+
+			// Compute the transformation for the k-th column and
+			// place the k-th diagonal in s[k].
+			// Compute 2-norm of k-th column without under/overflow.
+			s[k] = 0;
+			for (i = k; i < m; i++) {
+				s[k] = hypotf(s[k],A[i][k]);
+			}
+			if (s[k] != 0.0f) {
+				float invsk;
+				if (A[k][k] < 0.0f) {
+					s[k] = -s[k];
+				}
+				invsk = 1.0f/s[k];
+				for (i = k; i < m; i++) {
+					A[i][k] *= invsk;
+				}
+				A[k][k] += 1.0f;
+			}
+			s[k] = -s[k];
+		}
+		for (j = k+1; j < n; j++) {
+			if ((k < nct) && (s[k] != 0.0f))  {
+
+			// Apply the transformation.
+
+				float t = 0;
+				for (i = k; i < m; i++) {
+					t += A[i][k]*A[i][j];
+				}
+				t = -t/A[k][k];
+				for (i = k; i < m; i++) {
+					A[i][j] += t*A[i][k];
+				}
+			}
+
+			// Place the k-th row of A into e for the
+			// subsequent calculation of the row transformation.
+
+			e[j] = A[k][j];
+		}
+		if (k < nct) {
+
+			// Place the transformation in U for subsequent back
+			// multiplication.
+
+			for (i = k; i < m; i++)
+				U[i][k] = A[i][k];
+		}
+		if (k < nrt) {
+
+			// Compute the k-th row transformation and place the
+			// k-th super-diagonal in e[k].
+			// Compute 2-norm without under/overflow.
+			e[k] = 0;
+			for (i = k+1; i < n; i++) {
+				e[k] = hypotf(e[k],e[i]);
+			}
+			if (e[k] != 0.0f) {
+				float invek;
+				if (e[k+1] < 0.0f) {
+					e[k] = -e[k];
+				}
+				invek = 1.0f/e[k];
+				for (i = k+1; i < n; i++) {
+					e[i] *= invek;
+				}
+				e[k+1] += 1.0f;
+			}
+			e[k] = -e[k];
+			if ((k+1 < m) & (e[k] != 0.0f)) {
+				float invek1;
+
+			// Apply the transformation.
+
+				for (i = k+1; i < m; i++) {
+					work[i] = 0.0f;
+				}
+				for (j = k+1; j < n; j++) {
+					for (i = k+1; i < m; i++) {
+						work[i] += e[j]*A[i][j];
+					}
+				}
+				invek1 = 1.0f/e[k+1];
+				for (j = k+1; j < n; j++) {
+					float t = -e[j]*invek1;
+					for (i = k+1; i < m; i++) {
+						A[i][j] += t*work[i];
+					}
+				}
+			}
+
+			// Place the transformation in V for subsequent
+			// back multiplication.
+
+			for (i = k+1; i < n; i++)
+				V[i][k] = e[i];
+		}
+	}
+
+	// Set up the final bidiagonal matrix or order p.
+
+	p = minf(n,m+1);
+	if (nct < n) {
+		s[nct] = A[nct][nct];
+	}
+	if (m < p) {
+		s[p-1] = 0.0f;
+	}
+	if (nrt+1 < p) {
+		e[nrt] = A[nrt][p-1];
+	}
+	e[p-1] = 0.0f;
+
+	// If required, generate U.
+
+	for (j = nct; j < nu; j++) {
+		for (i = 0; i < m; i++) {
+			U[i][j] = 0.0f;
+		}
+		U[j][j] = 1.0f;
+	}
+	for (k = nct-1; k >= 0; k--) {
+		if (s[k] != 0.0f) {
+			for (j = k+1; j < nu; j++) {
+				float t = 0;
+				for (i = k; i < m; i++) {
+					t += U[i][k]*U[i][j];
+				}
+				t = -t/U[k][k];
+				for (i = k; i < m; i++) {
+					U[i][j] += t*U[i][k];
+				}
+			}
+			for (i = k; i < m; i++ ) {
+				U[i][k] = -U[i][k];
+			}
+			U[k][k] = 1.0f + U[k][k];
+			for (i = 0; i < k-1; i++) {
+				U[i][k] = 0.0f;
+			}
+		} else {
+			for (i = 0; i < m; i++) {
+				U[i][k] = 0.0f;
+			}
+			U[k][k] = 1.0f;
+		}
+	}
+
+	// If required, generate V.
+
+	for (k = n-1; k >= 0; k--) {
+		if ((k < nrt) & (e[k] != 0.0f)) {
+			for (j = k+1; j < nu; j++) {
+				float t = 0;
+				for (i = k+1; i < n; i++) {
+					t += V[i][k]*V[i][j];
+				}
+				t = -t/V[k+1][k];
+				for (i = k+1; i < n; i++) {
+					V[i][j] += t*V[i][k];
+				}
+			}
+		}
+		for (i = 0; i < n; i++) {
+			V[i][k] = 0.0f;
+		}
+		V[k][k] = 1.0f;
+	}
+
+	// Main iteration loop for the singular values.
+
+	pp = p-1;
+	iter = 0;
+	eps = powf(2.0f,-52.0f);
+	while (p > 0) {
+		int kase=0;
+		k=0;
+
+		// Test for maximum iterations to avoid infinite loop
+		if(maxiter == 0)
+			break;
+		maxiter--;
+
+		// This section of the program inspects for
+		// negligible elements in the s and e arrays.  On
+		// completion the variables kase and k are set as follows.
+
+		// kase = 1	  if s(p) and e[k-1] are negligible and k<p
+		// kase = 2	  if s(k) is negligible and k<p
+		// kase = 3	  if e[k-1] is negligible, k<p, and
+		//				  s(k), ..., s(p) are not negligible (qr step).
+		// kase = 4	  if e(p-1) is negligible (convergence).
+
+		for (k = p-2; k >= -1; k--) {
+			if (k == -1) {
+				break;
+			}
+			if (fabsf(e[k]) <= eps*(fabsf(s[k]) + fabsf(s[k+1]))) {
+				e[k] = 0.0f;
+				break;
+			}
+		}
+		if (k == p-2) {
+			kase = 4;
+		} else {
+			int ks;
+			for (ks = p-1; ks >= k; ks--) {
+				float t;
+				if (ks == k) {
+					break;
+				}
+				t = (ks != p ? fabsf(e[ks]) : 0.f) + 
+							  (ks != k+1 ? fabsf(e[ks-1]) : 0.0f);
+				if (fabsf(s[ks]) <= eps*t)  {
+					s[ks] = 0.0f;
+					break;
+				}
+			}
+			if (ks == k) {
+				kase = 3;
+			} else if (ks == p-1) {
+				kase = 1;
+			} else {
+				kase = 2;
+				k = ks;
+			}
+		}
+		k++;
+
+		// Perform the task indicated by kase.
+
+		switch (kase) {
+
+			// Deflate negligible s(p).
+
+			case 1: {
+				float f = e[p-2];
+				e[p-2] = 0.0f;
+				for (j = p-2; j >= k; j--) {
+					float t = hypotf(s[j],f);
+					float invt = 1.0f/t;
+					float cs = s[j]*invt;
+					float sn = f*invt;
+					s[j] = t;
+					if (j != k) {
+						f = -sn*e[j-1];
+						e[j-1] = cs*e[j-1];
+					}
+
+					for (i = 0; i < n; i++) {
+						t = cs*V[i][j] + sn*V[i][p-1];
+						V[i][p-1] = -sn*V[i][j] + cs*V[i][p-1];
+						V[i][j] = t;
+					}
+				}
+			}
+			break;
+
+			// Split at negligible s(k).
+
+			case 2: {
+				float f = e[k-1];
+				e[k-1] = 0.0f;
+				for (j = k; j < p; j++) {
+					float t = hypotf(s[j],f);
+					float invt = 1.0f/t;
+					float cs = s[j]*invt;
+					float sn = f*invt;
+					s[j] = t;
+					f = -sn*e[j];
+					e[j] = cs*e[j];
+
+					for (i = 0; i < m; i++) {
+						t = cs*U[i][j] + sn*U[i][k-1];
+						U[i][k-1] = -sn*U[i][j] + cs*U[i][k-1];
+						U[i][j] = t;
+					}
+				}
+			}
+			break;
+
+			// Perform one qr step.
+
+			case 3: {
+
+				// Calculate the shift.
+
+				float scale = maxf(maxf(maxf(maxf(
+						  fabsf(s[p-1]),fabsf(s[p-2])),fabsf(e[p-2])), 
+						  fabsf(s[k])),fabsf(e[k]));
+				float invscale = 1.0f/scale;
+				float sp = s[p-1]*invscale;
+				float spm1 = s[p-2]*invscale;
+				float epm1 = e[p-2]*invscale;
+				float sk = s[k]*invscale;
+				float ek = e[k]*invscale;
+				float b = ((spm1 + sp)*(spm1 - sp) + epm1*epm1)*0.5f;
+				float c = (sp*epm1)*(sp*epm1);
+				float shift = 0.0f;
+				float f, g;
+				if ((b != 0.0f) || (c != 0.0f)) {
+					shift = sqrtf(b*b + c);
+					if (b < 0.0f) {
+						shift = -shift;
+					}
+					shift = c/(b + shift);
+				}
+				f = (sk + sp)*(sk - sp) + shift;
+				g = sk*ek;
+
+				// Chase zeros.
+
+				for (j = k; j < p-1; j++) {
+					float t = hypotf(f,g);
+					/* division by zero checks added to avoid NaN (brecht) */
+					float cs = (t == 0.0f)? 0.0f: f/t;
+					float sn = (t == 0.0f)? 0.0f: g/t;
+					if (j != k) {
+						e[j-1] = t;
+					}
+					f = cs*s[j] + sn*e[j];
+					e[j] = cs*e[j] - sn*s[j];
+					g = sn*s[j+1];
+					s[j+1] = cs*s[j+1];
+
+					for (i = 0; i < n; i++) {
+						t = cs*V[i][j] + sn*V[i][j+1];
+						V[i][j+1] = -sn*V[i][j] + cs*V[i][j+1];
+						V[i][j] = t;
+					}
+
+					t = hypotf(f,g);
+					/* division by zero checks added to avoid NaN (brecht) */
+					cs = (t == 0.0f)? 0.0f: f/t;
+					sn = (t == 0.0f)? 0.0f: g/t;
+					s[j] = t;
+					f = cs*e[j] + sn*s[j+1];
+					s[j+1] = -sn*e[j] + cs*s[j+1];
+					g = sn*e[j+1];
+					e[j+1] = cs*e[j+1];
+					if (j < m-1) {
+						for (i = 0; i < m; i++) {
+							t = cs*U[i][j] + sn*U[i][j+1];
+							U[i][j+1] = -sn*U[i][j] + cs*U[i][j+1];
+							U[i][j] = t;
+						}
+					}
+				}
+				e[p-2] = f;
+				iter = iter + 1;

@@ Diff output truncated at 10240 characters. @@