[Bf-blender-cvs] SVN commit: /data/svn/bf-blender [28839] branches/render25/source/blender: Render Branch: implicit solver for hair now calculates forces and does

Tue May 18 18:40:05 CEST 2010

Revision: 28839
          http://projects.blender.org/plugins/scmsvn/viewcvs.php?view=rev&root=bf-blender&revision=28839
Author:   blendix
Date:     2010-05-18 18:40:05 +0200 (Tue, 18 May 2010)

Log Message:
-----------
Render Branch: implicit solver for hair now calculates forces and does
integration separate per hair. This increase the stability of the hair.

I found that usually the conjugate gradient takes about 4-8 steps, but
it sometimes takes 20 or even does not converge, and returns bogus
velocities. I'm not entirely sure why that is, but it seems logical
that a smaller system would be more numerically stable. What I suspect
is that since there are many hairs, and it tests for the average error
of those hairs, a few hairs may be able to hide their high error in this
average.

I've tried to not make the code too ugly but's there's a few stupid
hacks, maybe eventually it would be better to have a separate hair and
cloth function that calls common functions.

Also for threading, hairs can now be distributed over threads. There
still seems to be too much thread overhead though, should find a way
to keep all threads running for the entire timestep.

Modified Paths:
--------------
    branches/render25/source/blender/blenkernel/intern/cloth.c
    branches/render25/source/blender/blenkernel/intern/implicit.c
    branches/render25/source/blender/blenkernel/intern/particle_system.c
    branches/render25/source/blender/makesdna/DNA_cloth_types.h

Modified: branches/render25/source/blender/blenkernel/intern/cloth.c
===================================================================

--- branches/render25/source/blender/blenkernel/intern/cloth.c	2010-05-18 16:31:29 UTC (rev 28838)
+++ branches/render25/source/blender/blenkernel/intern/cloth.c	2010-05-18 16:40:05 UTC (rev 28839)
@@ -1280,6 +1280,9 @@
 		search2 = search->next;
 		while(search && search2)
 		{
+			LinkNode *nextsearch = search->next;
+			LinkNode *nextsearch2 = search2->next;
+
 			tspring = search->link;
 			tspring2 = search2->link;
 
@@ -1300,12 +1303,18 @@
 				spring->stiffness = (cloth->verts[spring->kl].bend_stiff + cloth->verts[spring->ij].bend_stiff) / 2.0;
 				bend_springs++;
 
-				BLI_linklist_prepend ( &cloth->springs, spring );
+				/* XXX hack to ensure springs for the same hair are together,
+				   so we can easily split them up per hair later */
+				BLI_linklist_insert_after ( &search, spring );
 			}
 			
-			search = search->next;
-			search2 = search2->next;
+			search = nextsearch;
+			search2 = nextsearch2;
 		}
+
+		/* XXX hack to ensure cloth springs are ordered
+		   correctly to be split up later per hair */
+		BLI_linklist_reverse( &cloth->springs );
 	}
 	
 	/* insert other near springs in edgehash AFTER bending springs are calculated (for selfcolls) */

Modified: branches/render25/source/blender/blenkernel/intern/implicit.c
===================================================================
--- branches/render25/source/blender/blenkernel/intern/implicit.c	2010-05-18 16:31:29 UTC (rev 28838)
+++ branches/render25/source/blender/blenkernel/intern/implicit.c	2010-05-18 16:40:05 UTC (rev 28839)
@@ -720,17 +720,121 @@
 {
 	lfVector *X, *V, *Xnew, *Vnew, *olddV, *F, *B, *dV, *z;
 	fmatrix3x3 *A, *dFdV, *dFdX, *S, *P, *Pinv, *bigI, *M; 
+
+	ClothVertex *verts;
+	LinkNode *springs;
+	int numverts, numsprings;
+
+	int totsubset;
+	int *vertoffset;
+	int *springoffset;
+	LinkNode **subspring;
 } Implicit_Data;
 
-int implicit_init (Object *ob, ClothModifierData *clmd)
+static void implicit_data_subset(Implicit_Data *idsub, Implicit_Data *id, int sub)
 {
+	int voffset = id->vertoffset[sub];
+	int soffset = id->springoffset[sub];
+
+	idsub->verts = id->verts + voffset;
+	idsub->springs = id->subspring[sub];
+	idsub->numverts = id->vertoffset[sub+1] - voffset;
+	idsub->numsprings = id->springoffset[sub+1] - soffset;
+
+	idsub->X = id->X + voffset;
+	idsub->V = id->V + voffset;
+	idsub->Xnew = id->Xnew + voffset;
+	idsub->Vnew = id->Vnew + voffset;
+	idsub->olddV = id->olddV + voffset;
+	idsub->F = id->F + voffset;
+	idsub->B = id->B + voffset;
+	idsub->dV = id->dV + voffset;
+	idsub->z = id->z + voffset;
+
+	idsub->A = id->A + voffset + soffset;
+	idsub->dFdV = id->dFdV + voffset + soffset;
+	idsub->dFdX = id->dFdX + voffset + soffset;
+	idsub->S = id->S + voffset;
+	idsub->P = id->P + voffset + soffset;
+	idsub->Pinv = id->Pinv + voffset + soffset;
+	idsub->bigI = id->bigI + voffset + soffset;
+	idsub->M = id->M + voffset + soffset;
+}
+
+static void implicit_init_mats(Implicit_Data *id, ClothVertex *verts, LinkNode *springs, int numverts, int numsprings, int voffset)
+{
+	ClothSpring *spring;
+	LinkNode *search = NULL;
 	unsigned int i = 0;
 	unsigned int pinned = 0;
+
+	id->A[0].vcount = numverts;
+	id->A[0].scount = numsprings;
+	id->dFdV[0].vcount = numverts;
+	id->dFdV[0].scount = numsprings;
+	id->dFdX[0].vcount = numverts;
+	id->dFdX[0].scount = numsprings;
+	id->Pinv[0].vcount = numverts;
+	id->Pinv[0].scount = numsprings;
+	id->P[0].vcount = numverts;
+	id->P[0].scount = numsprings;
+	id->bigI[0].vcount = numverts;
+	id->bigI[0].scount = numsprings;
+	id->M[0].vcount = numverts;
+	id->M[0].scount = numsprings;
+
+	for(i=0;i<numverts;i++) 
+	{
+		id->A[i].r = id->A[i].c = id->dFdV[i].r = id->dFdV[i].c = id->dFdX[i].r = id->dFdX[i].c = id->P[i].c = id->P[i].r = id->Pinv[i].c = id->Pinv[i].r = id->bigI[i].c = id->bigI[i].r = id->M[i].r = id->M[i].c = i;
+
+		if(verts [i].flags & CLOTH_VERT_FLAG_PINNED)
+		{
+			id->S[pinned].pinned = 1;
+			id->S[pinned].c = id->S[pinned].r = i;
+			pinned++;
+		}
+		
+		initdiag_fmatrixS(id->M[i].m, verts[i].mass);
+	}
+
+	// S is special and needs specific vcount and scount
+	id->S[0].vcount = pinned; id->S[0].scount = 0;
+
+	// init springs 
+	search = springs;
+	for(i=0;i<numsprings;i++) 
+	{
+		spring = search->link;
+
+		spring->ij -= voffset;
+		spring->kl -= voffset;
+
+		// dFdV_start[i].r = big_I[i].r = big_zero[i].r = 
+		id->A[i+numverts].r = id->dFdV[i+numverts].r = id->dFdX[i+numverts].r = 
+				id->P[i+numverts].r = id->Pinv[i+numverts].r = id->bigI[i+numverts].r = id->M[i+numverts].r = spring->ij;
+
+		// dFdV_start[i].c = big_I[i].c = big_zero[i].c = 
+		id->A[i+numverts].c = id->dFdV[i+numverts].c = id->dFdX[i+numverts].c = 
+				id->P[i+numverts].c = id->Pinv[i+numverts].c = id->bigI[i+numverts].c = id->M[i+numverts].c = spring->kl;
+
+		spring->matrix_index = i + numverts;
+		
+		search = search->next;
+	}
+	
+	initdiag_bfmatrix(id->bigI, I);
+
+	for(i = 0; i < numverts; i++)
+	{		
+		VECCOPY(id->X[i], verts[i].x);
+	}
+}
+
+int implicit_init (Object *ob, ClothModifierData *clmd)
+{
 	Cloth *cloth = NULL;
-	ClothVertex *verts = NULL;
-	ClothSpring *spring = NULL;
 	Implicit_Data *id = NULL;
-	LinkNode *search = NULL;
+	int sub;
 	
 	if(G.rt > 0)
 		printf("implicit_init\n");
@@ -739,7 +843,6 @@
 	// MEMORY_BASE.first = MEMORY_BASE.last = NULL;
 
 	cloth = (Cloth *)clmd->clothObject;
-	verts = cloth->verts;
 
 	// create implicit base
 	id = (Implicit_Data *)MEM_callocN (sizeof(Implicit_Data), "implicit vecmat");
@@ -764,52 +867,46 @@
 	id->B = create_lfvector(cloth->numverts);
 	id->dV = create_lfvector(cloth->numverts);
 	id->z = create_lfvector(cloth->numverts);
-	
-	for(i=0;i<cloth->numverts;i++) 
-	{
-		id->A[i].r = id->A[i].c = id->dFdV[i].r = id->dFdV[i].c = id->dFdX[i].r = id->dFdX[i].c = id->P[i].c = id->P[i].r = id->Pinv[i].c = id->Pinv[i].r = id->bigI[i].c = id->bigI[i].r = id->M[i].r = id->M[i].c = i;
 
-		if(verts [i].flags & CLOTH_VERT_FLAG_PINNED)
-		{
-			id->S[pinned].pinned = 1;
-			id->S[pinned].c = id->S[pinned].r = i;
-			pinned++;
-		}
-		
-		initdiag_fmatrixS(id->M[i].m, verts[i].mass);
+	/* set data */
+	id->verts = cloth->verts;
+	id->springs = cloth->springs;
+	id->numverts = cloth->numverts;
+	id->numsprings = cloth->numsprings;
+
+	/* create subsets for hair, to do individual solve for each hair strand,
+	   this should help increase stability and performance by fewer CG steps
+	   on average and better multithreading */
+	if(clmd->sim_parms->tothair) {
+		id->totsubset = clmd->sim_parms->tothair;
+		id->vertoffset = MEM_dupallocN(clmd->sim_parms->hair_vert_offset);
+		id->springoffset = MEM_dupallocN(clmd->sim_parms->hair_spring_offset);
 	}
+	else {
+		id->totsubset = 1;
+		id->vertoffset = MEM_callocN(sizeof(int)*(id->totsubset+1), "implicit vertoffset");
+		id->springoffset = MEM_callocN(sizeof(int)*(id->totsubset+1), "implicit springoffset");
+		id->vertoffset[1]= cloth->numverts;
+		id->springoffset[1]= cloth->numsprings;
+	}
 
-	// S is special and needs specific vcount and scount
-	id->S[0].vcount = pinned; id->S[0].scount = 0;
+	id->subspring = MEM_callocN(sizeof(LinkNode*)*id->totsubset, "implicit subspring");
 
-	// init springs 
-	search = cloth->springs;
-	for(i=0;i<cloth->numsprings;i++) 
-	{
-		spring = search->link;
+	/* init matrices for each subset */
+	for(sub=0; sub<id->totsubset; sub++) {
+		Implicit_Data idsub;
 		
-		// dFdV_start[i].r = big_I[i].r = big_zero[i].r = 
-		id->A[i+cloth->numverts].r = id->dFdV[i+cloth->numverts].r = id->dFdX[i+cloth->numverts].r = 
-				id->P[i+cloth->numverts].r = id->Pinv[i+cloth->numverts].r = id->bigI[i+cloth->numverts].r = id->M[i+cloth->numverts].r = spring->ij;
+		/* XXX this is slow, and the way we ensure springs are
+		   in the right order is also a major hack in cloth.c */
+		id->subspring[sub]= BLI_linklist_find(id->springs, id->springoffset[sub]);
 
-		// dFdV_start[i].c = big_I[i].c = big_zero[i].c = 
-		id->A[i+cloth->numverts].c = id->dFdV[i+cloth->numverts].c = id->dFdX[i+cloth->numverts].c = 
-				id->P[i+cloth->numverts].c = id->Pinv[i+cloth->numverts].c = id->bigI[i+cloth->numverts].c = id->M[i+cloth->numverts].c = spring->kl;
-
-		spring->matrix_index = i + cloth->numverts;
-		
-		search = search->next;
+		implicit_data_subset(&idsub, id, sub);
+		implicit_init_mats(&idsub, idsub.verts, idsub.springs, idsub.numverts, idsub.numsprings, idsub.verts - id->verts);
 	}
 	
-	initdiag_bfmatrix(id->bigI, I);
-
-	for(i = 0; i < cloth->numverts; i++)
-	{		
-		VECCOPY(id->X[i], verts[i].x);
-	}
-
 	return 1;
 }
+
 int	implicit_free (ClothModifierData *clmd)
 {
 	Implicit_Data *id;
@@ -841,6 +938,10 @@
 			del_lfvector(id->dV);
 			del_lfvector(id->z);
 
+			MEM_freeN(id->vertoffset);
+			MEM_freeN(id->springoffset);
+			MEM_freeN(id->subspring);
+
 			MEM_freeN(id);
 		}
 	}
@@ -1909,19 +2010,36 @@
 static void implicit_solve_forces(ClothModifierData *clmd, Implicit_Data *id, ListBase *effectors, float frame, float step, float dt)
 {
 	Cloth *cloth = clmd->clothObject;
-	ClothVertex *verts = cloth->verts;
-	LinkNode *springs = cloth->springs;
 	unsigned int numverts = cloth->numverts;
-	unsigned int numsprings = cloth->numsprings;
+	int sub;
 
-	cloth_calc_force(clmd, frame, id->F, id->X, id->V, id->dFdV, id->dFdX, effectors, step, id->M, verts, springs, numverts, numsprings);
+	// call this so memory alloc/free is thread safe
+	BLI_init_threads(NULL, 0, 0);
 
-	// velocity smoothing
+	// calculate forces
+	#pragma omp parallel for private(sub) if(id->totsubset > 1)
+	for(sub=0; sub<id->totsubset; sub++) {
+		Implicit_Data idsub;
+		implicit_data_subset(&idsub, id, sub);
+
+		cloth_calc_force(clmd, frame, idsub.F, idsub.X, idsub.V, idsub.dFdV, idsub.dFdX, effectors, step, idsub.M, idsub.verts, idsub.springs, idsub.numverts, idsub.numsprings);
+	}
+
+	// velocity smoothing non-threaded
 	if(clmd->sim_parms->velocity_smooth > 0.0f || clmd->sim_parms->collider_friction > 0.0f)
 		hair_velocity_smoothing(clmd, id->F, id->X, id->V, numverts);
 
-	// calculate new velocity
-	simulate_implicit_euler(id->Vnew, id->X, id->V, id->F, id->dFdV, id->dFdX, dt, id->A, id->B, id->dV, id->S, id->z, id->olddV, id->P, id->Pinv, id->M, id->bigI);
+	// calculate integration
+	#pragma omp parallel for private(sub) if(id->totsubset > 1)
+	for(sub=0; sub<id->totsubset; sub++) {
+		Implicit_Data idsub;
+		implicit_data_subset(&idsub, id, sub);
+		
+		// calculate new velocity

@@ Diff output truncated at 10240 characters. @@