[Bf-blender-cvs] SVN commit: /data/svn/bf-blender [12618] branches/cloth/blender/source/ blender/blenkernel/intern: fixed sse copy-paste glitch + #define __SSE3__ thanks to Lynx3d
Daniel Genrich
daniel.genrich at gmx.net
Mon Nov 19 01:01:34 CET 2007
Revision: 12618
http://projects.blender.org/plugins/scmsvn/viewcvs.php?view=rev&root=bf-blender&revision=12618
Author: genscher
Date: 2007-11-19 01:01:33 +0100 (Mon, 19 Nov 2007)
Log Message:
-----------
fixed sse copy-paste glitch + #define __SSE3__ thanks to Lynx3d
Modified Paths:
--------------
branches/cloth/blender/source/blender/blenkernel/intern/cloth.c
branches/cloth/blender/source/blender/blenkernel/intern/implicit.c
Modified: branches/cloth/blender/source/blender/blenkernel/intern/cloth.c
===================================================================
--- branches/cloth/blender/source/blender/blenkernel/intern/cloth.c 2007-11-18 21:27:19 UTC (rev 12617)
+++ branches/cloth/blender/source/blender/blenkernel/intern/cloth.c 2007-11-19 00:01:33 UTC (rev 12618)
@@ -567,6 +567,8 @@
{
if ((clmd->clothObject == NULL) || (numverts != clmd->clothObject->numverts) )
{
+ cloth_clear_cache(ob, clmd, 0);
+
if(!cloth_from_object (ob, clmd, result, dm, framenr))
return result;
@@ -756,7 +758,7 @@
for (i = 0; i < numverts; i++)
{
VECCOPY (mvert[i].co, cloth->x[i]);
- Mat4MulVecfl (ob->imat, mvert[i].co); /* softbody is in global coords */
+ Mat4MulVecfl (ob->imat, mvert[i].co); /* cloth is in global coords */
}
}
}
Modified: branches/cloth/blender/source/blender/blenkernel/intern/implicit.c
===================================================================
--- branches/cloth/blender/source/blender/blenkernel/intern/implicit.c 2007-11-18 21:27:19 UTC (rev 12617)
+++ branches/cloth/blender/source/blender/blenkernel/intern/implicit.c 2007-11-19 00:01:33 UTC (rev 12618)
@@ -410,14 +410,15 @@
/* 3x3 matrix multiplied by a vector */
/* STATUS: verified */
-#ifdef SSE3
+#ifdef __SSE3__
DO_INLINE void mul_fmatrix_fvector(float *to, float matrix[3][4], float *from) {
+ float temp[4];
__m128 v1, v2, v3, v4;
- v1 = _mm_load_ps(&matrix[0]);
- v2 = _mm_load_ps(&matrix[1]);
- v3 = _mm_load_ps(&matrix[2]);
- v4 = _mm_load_ps(from);
+ v1 = _mm_loadu_ps(&matrix[0][0]);
+ v2 = _mm_loadu_ps(&matrix[1][0]);
+ v3 = _mm_loadu_ps(&matrix[2][0]);
+ v4 = _mm_loadu_ps(from);
// stuff
v1 = _mm_mul_ps(v1, v4);
@@ -425,9 +426,9 @@
v3 = _mm_mul_ps(v3, v4);
v1 = _mm_hadd_ps(v1, v2);
v3 = _mm_hadd_ps(v3, _mm_setzero_ps());
- v1 = _mm_hadd_ps(v1, v3);
+ v4 = _mm_hadd_ps(v1, v3);
- _mm_store_ps(to, v4);
+ _mm_storeu_ps(to, v4);
}
#else
DO_INLINE void mul_fmatrix_fvector(float *to, float matrix[3][4], float *from)
@@ -537,7 +538,7 @@
/* 3x3 matrix multiplied+added by a vector */
/* STATUS: verified */
-#ifdef SSE3
+#ifdef __SSE3__
DO_INLINE void muladd_fmatrix_fvector(float to[3], float matrix[3][4], float from[3]) {
__m128 v1, v2, v3, v4;
@@ -1053,145 +1054,11 @@
}
}
-/*
-// 1.0 working PCG, but slow and unstable for bigger epsilon + strong forces
+// version 1.3
int cg_filtered_pre(lfVector *dv, fmatrix3x3 *lA, lfVector *lB, lfVector *z, fmatrix3x3 *S, fmatrix3x3 *P, fmatrix3x3 *Pinv)
{
unsigned int numverts = lA[0].vcount, iterations = 0, conjgrad_looplimit=100;
float delta0 = 0, deltaNew = 0, deltaOld = 0, alpha = 0;
- float conjgrad_epsilon=0.01f, conjgrad_lasterror=0;
- lfVector *filterb = create_lfvector(numverts);
- lfVector *p_fb = create_lfvector(numverts);
- lfVector *r = create_lfvector(numverts);
- lfVector *c = create_lfvector(numverts);
- lfVector *q = create_lfvector(numverts);
- lfVector *s = create_lfvector(numverts);
-
- BuildPPinv(lA, P, Pinv);
-
- cp_lfvector(dv, z, numverts);
- cp_lfvector(filterb, lB, numverts);
- filter(filterb, S);
- mul_bfmatrix_lfvector(p_fb, P, filterb);
- delta0 = dot_lfvector(filterb, p_fb, numverts);
-
- mul_bfmatrix_lfvector(r, lA, dv);
- mul_lfvectorS(r, r, -1.0, numverts);
- add_lfvector_lfvector(r, r, lB, numverts);
- filter(r, S);
-
- mul_bfmatrix_lfvector(c, Pinv, r);
- filter(c, S);
-
- deltaNew = dot_lfvector(r, c, numverts);
-
- do
- {
- iterations++;
-
- mul_bfmatrix_lfvector(q, lA, c);
- filter(q, S);
-
- alpha = deltaNew / dot_lfvector(c, q, numverts);
-
- add_lfvector_lfvectorS(dv, dv, c, alpha, numverts);
-
- add_lfvector_lfvectorS(r, r, q, -alpha, numverts);
-
- mul_bfmatrix_lfvector(s, Pinv, r);
-
-
- deltaOld = deltaNew;
-
- deltaNew = dot_lfvector(r, s, numverts);
-
- add_lfvector_lfvectorS(s, s, c, deltaNew / deltaOld, numverts);
- filter(s, S);
-
- cp_lfvector(c, s, numverts);
- } while ((deltaNew > (conjgrad_epsilon*conjgrad_epsilon*delta0)) && (iterations < conjgrad_looplimit));
-
- del_lfvector(s);
- del_lfvector(q);
- del_lfvector(c);
- del_lfvector(r);
- del_lfvector(p_fb);
- del_lfvector(filterb);
-
- printf("iterations: %d\n", iterations);
-
- return iterations<conjgrad_looplimit;
-}
-*/
-
-/*
-// 1.1 stable version frmo Box / Aschermann
-// but also very slow ;)
-int cg_filtered_pre(lfVector *dv, fmatrix3x3 *lA, lfVector *lB, lfVector *z, fmatrix3x3 *S, fmatrix3x3 *P, fmatrix3x3 *Pinv)
-{
- unsigned int numverts = lA[0].vcount, iterations = 0, conjgrad_looplimit=100;
- float delta0 = 0, deltaNew = 0, deltaOld = 0, alpha = 0;
- float conjgrad_epsilon=0.01f, conjgrad_lasterror=0;
- lfVector *r = create_lfvector(numverts);
- lfVector *p = create_lfvector(numverts);
- lfVector *s = create_lfvector(numverts);
- lfVector *h = create_lfvector(numverts);
-
- BuildPPinv(lA, P, Pinv);
-
- cp_lfvector(dv, z, numverts);
-
- mul_bfmatrix_lfvector(r, lA, dv);
- mul_lfvectorS(r, r, -1.0, numverts);
- add_lfvector_lfvector(r, r, lB, numverts);
- filter(r, S);
-
- mul_bfmatrix_lfvector(p, Pinv, r);
- filter(p, S);
-
- deltaNew = delta0 = dot_lfvector(r, p, numverts);
-
- while ((deltaNew > (conjgrad_epsilon*conjgrad_epsilon*delta0)) && (iterations < conjgrad_looplimit))
- {
- iterations++;
-
- mul_bfmatrix_lfvector(s, lA, p);
- filter(s, S);
-
- alpha = deltaNew / dot_lfvector(p, s, numverts);
-
- add_lfvector_lfvectorS(dv, dv, p, alpha, numverts);
-
- add_lfvector_lfvectorS(r, r, s, -alpha, numverts);
-
- mul_bfmatrix_lfvector(h, Pinv, r);
-
-
- deltaOld = deltaNew;
-
- deltaNew = dot_lfvector(r, h, numverts);
-
- add_lfvector_lfvectorS(p, h, p, deltaNew / deltaOld, numverts);
- filter(p, S);
-
- }
-
- del_lfvector(h);
- del_lfvector(s);
- del_lfvector(p);
- del_lfvector(r);
-
- printf("iterations: %d\n", iterations);
-
- return iterations<conjgrad_looplimit;
-}
-*/
-
-// 1.3
-int cg_filtered_pre(lfVector *dv, fmatrix3x3 *lA, lfVector *lB, lfVector *z, fmatrix3x3 *S, fmatrix3x3 *P, fmatrix3x3 *Pinv)
-{
- unsigned int numverts = lA[0].vcount, iterations = 0, conjgrad_looplimit=100;
- float delta0 = 0, deltaNew = 0, deltaOld = 0, alpha = 0;
float conjgrad_epsilon=0.0001; // 0.2 is dt for steps=5
lfVector *r = create_lfvector(numverts);
lfVector *p = create_lfvector(numverts);
@@ -1598,8 +1465,8 @@
add_lfvectorS_lfvectorS(B, lF, dt, dFdXmV, (dt*dt), numverts);
- cg_filtered(dV, A, B, z, S); // conjugate gradient algorithm to solve Ax=b
- // cg_filtered_pre(dV, A, B, z, S, P, Pinv);
+ // cg_filtered(dV, A, B, z, S); // conjugate gradient algorithm to solve Ax=b
+ cg_filtered_pre(dV, A, B, z, S, P, Pinv);
// advance velocities
add_lfvector_lfvector(Vnew, lV, dV, numverts);
More information about the Bf-blender-cvs
mailing list