[Bf-blender-cvs] [ac23406] blender-v2.72-release: OpenNL: modify SuperLU to use doubles rather than floats, for better precision.

Fri Oct 3 15:24:07 CEST 2014

Commit: ac234067e1025dbcf0e3e25ae6ab780536ce11f7
Author: Brecht Van Lommel
Date:   Sat Sep 20 19:51:34 2014 +0200
Branches: blender-v2.72-release
https://developer.blender.org/rBac234067e1025dbcf0e3e25ae6ab780536ce11f7

OpenNL: modify SuperLU to use doubles rather than floats, for better precision.

This helps to improve the accuracy of UV unwrapping and laplacian deform for
high poly meshes, which could get warped quite badly. It's not much slower,
doubles are pretty fast on modern CPUs, but it does double memory usage. This
seems acceptable as otherwise high poly meshes would not work correctly anyway.

Fixes T39004.

===================================================================

M	intern/opennl/extern/ONL_opennl.h
M	intern/opennl/intern/opennl.c
M	intern/opennl/superlu/scolumn_bmod.c
M	intern/opennl/superlu/scopy_to_ucol.c
M	intern/opennl/superlu/sgssv.c
M	intern/opennl/superlu/sgstrf.c
M	intern/opennl/superlu/sgstrs.c
M	intern/opennl/superlu/smemory.c
M	intern/opennl/superlu/smyblas2.c
M	intern/opennl/superlu/spanel_bmod.c
M	intern/opennl/superlu/spanel_dfs.c
M	intern/opennl/superlu/spivotL.c
M	intern/opennl/superlu/spruneL.c
M	intern/opennl/superlu/ssnode_bmod.c
M	intern/opennl/superlu/ssp_blas2.c
M	intern/opennl/superlu/ssp_blas3.c
M	intern/opennl/superlu/ssp_defs.h
M	intern/opennl/superlu/strsv.c
M	intern/opennl/superlu/sutil.c
M	intern/opennl/superlu/util.c
M	intern/opennl/superlu/util.h

===================================================================

diff --git a/intern/opennl/extern/ONL_opennl.h b/intern/opennl/extern/ONL_opennl.h
index 721da20..c89a4aa 100644
--- a/intern/opennl/extern/ONL_opennl.h
+++ b/intern/opennl/extern/ONL_opennl.h
@@ -100,11 +100,11 @@ NLContext nlGetCurrent(void);
 
 /* State get/set */
 
-void nlSolverParameterf(NLenum pname, NLfloat param);
+void nlSolverParameterf(NLenum pname, NLdouble param);
 void nlSolverParameteri(NLenum pname, NLint param);
 
 void nlGetBooleanv(NLenum pname, NLboolean* params);
-void nlGetFloatv(NLenum pname, NLfloat* params);
+void nlGetFloatv(NLenum pname, NLdouble* params);
 void nlGetIntergerv(NLenum pname, NLint* params);
 
 void nlEnable(NLenum pname);
@@ -113,8 +113,8 @@ NLboolean nlIsEnabled(NLenum pname);
 
 /* Variables */
 
-void nlSetVariable(NLuint rhsindex, NLuint index, NLfloat value);
-NLfloat nlGetVariable(NLuint rhsindex, NLuint index);
+void nlSetVariable(NLuint rhsindex, NLuint index, NLdouble value);
+NLdouble nlGetVariable(NLuint rhsindex, NLuint index);
 void nlLockVariable(NLuint index);
 void nlUnlockVariable(NLuint index);
 NLboolean nlVariableIsLocked(NLuint index);
@@ -126,13 +126,9 @@ void nlEnd(NLenum primitive);
 
 /* Setting elements in matrix/vector */
 
-void nlMatrixAdd(NLuint row, NLuint col, NLfloat value);
-void nlRightHandSideAdd(NLuint rhsindex, NLuint index, NLfloat value);
-void nlRightHandSideSet(NLuint rhsindex, NLuint index, NLfloat value);
-
-/* Multiply */
-
-void nlMatrixMultiply(NLfloat *x, NLfloat *y);
+void nlMatrixAdd(NLuint row, NLuint col, NLdouble value);
+void nlRightHandSideAdd(NLuint rhsindex, NLuint index, NLdouble value);
+void nlRightHandSideSet(NLuint rhsindex, NLuint index, NLdouble value);
 
 /* Solve */
 
diff --git a/intern/opennl/intern/opennl.c b/intern/opennl/intern/opennl.c
index f9c63e9..8cf4bf1 100644
--- a/intern/opennl/intern/opennl.c
+++ b/intern/opennl/intern/opennl.c
@@ -69,7 +69,7 @@ static void __nl_assertion_failed(char* cond, char* file, int line) {
 }
 
 static void __nl_range_assertion_failed(
-	float x, float min_val, float max_val, char* file, int line
+	double x, double min_val, double max_val, char* file, int line
 ) {
 	fprintf(
 		stderr, 
@@ -151,7 +151,7 @@ static void __nl_should_not_have_reached(char* file, int line) {
 
 typedef struct {
 	NLuint   index;
-	NLfloat value;
+	NLdouble value;
 } __NLCoeff;
 
 typedef struct {
@@ -183,7 +183,7 @@ static void __nlRowColumnGrow(__NLRowColumn* c) {
 	}
 }
 
-static void __nlRowColumnAdd(__NLRowColumn* c, NLint index, NLfloat value) {
+static void __nlRowColumnAdd(__NLRowColumn* c, NLint index, NLdouble value) {
 	NLuint i;
 	for(i=0; i<c->size; i++) {
 		if(c->coeff[i].index == (NLuint)index) {
@@ -200,7 +200,7 @@ static void __nlRowColumnAdd(__NLRowColumn* c, NLint index, NLfloat value) {
 }
 
 /* Does not check whether the index already exists */
-static void __nlRowColumnAppend(__NLRowColumn* c, NLint index, NLfloat value) {
+static void __nlRowColumnAppend(__NLRowColumn* c, NLint index, NLdouble value) {
 	if(c->size == c->capacity) {
 		__nlRowColumnGrow(c);
 	}
@@ -229,7 +229,7 @@ typedef struct {
 	NLenum storage;
 	__NLRowColumn* row;
 	__NLRowColumn* column;
-	NLfloat*	  diag;
+	NLdouble*	  diag;
 } __NLSparseMatrix;
 
 
@@ -259,7 +259,7 @@ static void __nlSparseMatrixConstruct(
 	}
 
 	M->diag_size = MIN(m,n);
-	M->diag = __NL_NEW_ARRAY(NLfloat, M->diag_size);
+	M->diag = __NL_NEW_ARRAY(NLdouble, M->diag_size);
 }
 
 static void __nlSparseMatrixDestroy(__NLSparseMatrix* M) {
@@ -283,7 +283,7 @@ static void __nlSparseMatrixDestroy(__NLSparseMatrix* M) {
 }
 
 static void __nlSparseMatrixAdd(
-	__NLSparseMatrix* M, NLuint i, NLuint j, NLfloat value
+	__NLSparseMatrix* M, NLuint i, NLuint j, NLdouble value
 ) {
 	__nl_parano_range_assert(i, 0, M->m - 1);
 	__nl_parano_range_assert(j, 0, M->n - 1);
@@ -313,7 +313,7 @@ static void __nlSparseMatrixClear( __NLSparseMatrix* M) {
 			__nlRowColumnClear(&(M->column[i]));
 		}
 	}
-	__NL_CLEAR_ARRAY(NLfloat, M->diag, M->diag_size);	
+	__NL_CLEAR_ARRAY(NLdouble, M->diag, M->diag_size);	
 }
 
 /* Returns the number of non-zero coefficients */
@@ -338,7 +338,7 @@ static NLuint __nlSparseMatrixNNZ( __NLSparseMatrix* M) {
 /* SparseMatrix x Vector routines, internal helper routines */
 
 static void __nlSparseMatrix_mult_rows_symmetric(
-	__NLSparseMatrix* A, NLfloat* x, NLfloat* y
+	__NLSparseMatrix* A, NLdouble* x, NLdouble* y
 ) {
 	NLuint m = A->m;
 	NLuint i,ij;
@@ -358,7 +358,7 @@ static void __nlSparseMatrix_mult_rows_symmetric(
 }
 
 static void __nlSparseMatrix_mult_rows(
-	__NLSparseMatrix* A, NLfloat* x, NLfloat* y
+	__NLSparseMatrix* A, NLdouble* x, NLdouble* y
 ) {
 	NLuint m = A->m;
 	NLuint i,ij;
@@ -375,7 +375,7 @@ static void __nlSparseMatrix_mult_rows(
 }
 
 static void __nlSparseMatrix_mult_cols_symmetric(
-	__NLSparseMatrix* A, NLfloat* x, NLfloat* y
+	__NLSparseMatrix* A, NLdouble* x, NLdouble* y
 ) {
 	NLuint n = A->n;
 	NLuint j,ii;
@@ -395,13 +395,13 @@ static void __nlSparseMatrix_mult_cols_symmetric(
 }
 
 static void __nlSparseMatrix_mult_cols(
-	__NLSparseMatrix* A, NLfloat* x, NLfloat* y
+	__NLSparseMatrix* A, NLdouble* x, NLdouble* y
 ) {
 	NLuint n = A->n;
 	NLuint j,ii; 
 	__NLRowColumn* Cj = NULL;
 	__NLCoeff* c = NULL;
-	__NL_CLEAR_ARRAY(NLfloat, y, A->m);
+	__NL_CLEAR_ARRAY(NLdouble, y, A->m);
 	for(j=0; j<n; j++) {
 		Cj = &(A->column[j]);
 		for(ii=0; ii<Cj->size; ii++) {
@@ -414,7 +414,7 @@ static void __nlSparseMatrix_mult_cols(
 /************************************************************************************/
 /* SparseMatrix x Vector routines, main driver routine */
 
-static void __nlSparseMatrixMult(__NLSparseMatrix* A, NLfloat* x, NLfloat* y) {
+static void __nlSparseMatrixMult(__NLSparseMatrix* A, NLdouble* x, NLdouble* y) {
 	if(A->storage & __NL_ROWS) {
 		if(A->storage & __NL_SYMMETRIC) {
 			__nlSparseMatrix_mult_rows_symmetric(A, x, y);
@@ -440,7 +440,7 @@ static void __nlSparseMatrix_square(
 	NLuint i, j0, j1;
 	__NLRowColumn *Ri = NULL;
 	__NLCoeff *c0 = NULL, *c1 = NULL;
-	float value;
+	double value;
 
 	__nlSparseMatrixConstruct(AtA, n, n, A->storage);
 
@@ -460,7 +460,7 @@ static void __nlSparseMatrix_square(
 }
 
 static void __nlSparseMatrix_transpose_mult_rows(
-	__NLSparseMatrix* A, NLfloat* x, NLfloat* y
+	__NLSparseMatrix* A, NLdouble* x, NLdouble* y
 ) {
 	NLuint m = A->m;
 	NLuint n = A->n;
@@ -468,7 +468,7 @@ static void __nlSparseMatrix_transpose_mult_rows(
 	__NLRowColumn* Ri = NULL;
 	__NLCoeff* c = NULL;
 
-	__NL_CLEAR_ARRAY(NLfloat, y, n);
+	__NL_CLEAR_ARRAY(NLdouble, y, n);
 
 	for(i=0; i<m; i++) {
 		Ri = &(A->row[i]);
@@ -482,10 +482,10 @@ static void __nlSparseMatrix_transpose_mult_rows(
 /************************************************************************************/
 /* NLContext data structure */
 
-typedef void(*__NLMatrixFunc)(float* x, float* y);
+typedef void(*__NLMatrixFunc)(double* x, double* y);
 
 typedef struct {
-	NLfloat  value[4];
+	NLdouble  value[4];
 	NLboolean locked;
 	NLuint	index;
 	__NLRowColumn *a;
@@ -503,11 +503,11 @@ typedef struct {
 	NLuint			n;
 	NLuint			m;
 	__NLVariable*	variable;
-	NLfloat*		b;
-	NLfloat*		Mtb;
+	NLdouble*		b;
+	NLdouble*		Mtb;
 	__NLSparseMatrix M;
 	__NLSparseMatrix MtM;
-	NLfloat*		x;
+	NLdouble*		x;
 	NLuint			nb_variables;
 	NLuint			nb_rows;
 	NLboolean		least_squares;
@@ -520,7 +520,7 @@ typedef struct {
 	NLboolean		alloc_x;
 	NLboolean		alloc_b;
 	NLboolean		alloc_Mtb;
-	NLfloat			error;
+	NLdouble			error;
 	__NLMatrixFunc	matrix_vector_prod;
 
 	struct __NLSuperLUContext {
@@ -533,7 +533,7 @@ typedef struct {
 
 static __NLContext* __nlCurrentContext = NULL;
 
-static void __nlMatrixVectorProd_default(NLfloat* x, NLfloat* y) {
+static void __nlMatrixVectorProd_default(NLdouble* x, NLdouble* y) {
 	__nlSparseMatrixMult(&(__nlCurrentContext->M), x, y);
 }
 
@@ -611,7 +611,7 @@ static void __nlTransition(NLenum from_state, NLenum to_state) {
 /************************************************************************************/
 /* Get/Set parameters */
 
-void nlSolverParameterf(NLenum pname, NLfloat param) {
+void nlSolverParameterf(NLenum pname, NLdouble param) {
 	__nlCheckState(__NL_STATE_INITIAL);
 	switch(pname) {
 	case NL_NB_VARIABLES: {
@@ -677,22 +677,22 @@ void nlGetBooleanv(NLenum pname, NLboolean* params) {
 	}
 }
 
-void nlGetFloatv(NLenum pname, NLfloat* params) {
+void nlGetFloatv(NLenum pname, NLdouble* params) {
 	switch(pname) {
 	case NL_NB_VARIABLES: {
-		*params = (NLfloat)(__nlCurrentContext->nb_variables);
+		*params = (NLdouble)(__nlCurrentContext->nb_variables);
 	} break;
 	case NL_NB_ROWS: {
-		*params = (NLfloat)(__nlCurrentContext->nb_rows);
+		*params = (NLdouble)(__nlCurrentContext->nb_rows);
 	} break;
 	case NL_LEAST_SQUARES: {
-		*params = (NLfloat)(__nlCurrentContext->least_squares);
+		*params = (NLdouble)(__nlCurrentContext->least_squares);
 	} break;
 	case NL_SYMMETRIC: {
-		*params = (NLfloat)(__nlCurrentContext->symmetric);
+		*params = (NLdouble)(__nlCurrentContext->symmetric);
 	} break;
 	case NL_ERROR: {
-		*params = (NLfloat)(__nlCurrentContext->error);
+		*params = (NLdouble)(__nlCurrentContext->error);
 	} break;
 	default: {
 		__nl_assert_not_reached;
@@ -751,13 +751,13 @@ NLboolean nlIsEnabled(NLenum pname) {
 /************************************************************************************/
 /* Get/Set Lock/Unlock variables */
 
-void nlSetVariable(NLuint rhsindex, NLuint index, NLfloat value) {
+void nlSetVariable(NLuint rhsindex, NLuint index, NLdouble value) {
 	__nlCheckState(__NL_STATE_SYSTEM);
 	__nl_parano_range_assert(index, 0, __nlCurrentContext->nb_variables - 1);
 	__nlCurrentContext->variable[index].value[rhsindex] = value;	
 }
 
-NLfloat nlGetVariable(NLuint rhsindex, NLuint index) {
+NLdouble nlGetVariable(NLuint rhsindex, NLuint index) {
 	__nl_assert(__nlCurrentContext->state != __NL_STATE_INITIAL);
 	__nl_parano_range_assert(index, 0, __nlCurrentContext->nb_variables - 1);
 	return __nlCurrentContext->variable[index].value[rhsindex];
@@ -870,15 +870,15 @@ static void __nlBeginMatrix() {
 		__nlSparseMatrixConstruct(&context->M, m, n, storage);
 		context->alloc_M = NL_TRUE;
 
-		context->b = __NL_NEW_ARRAY(NLfloat, m*context->nb_rhs);
+		cont

@@ Diff output truncated at 10240 characters. @@