[Bf-blender-cvs] SVN commit: /data/svn/bf-blender [59203] branches/ soc-2013-meshdata_transfer/source/blender/bmesh/tools/bmesh_data_transfer.c : multi layer UV transfer: making the l_grp be accessed per_vertex instead of per_mesh which speeds up the UV transfer in an obvious way ...

Sat Aug 17 10:55:41 CEST 2013

Revision: 59203
          http://projects.blender.org/scm/viewvc.php?view=rev&root=bf-blender&revision=59203
Author:   walid
Date:     2013-08-17 08:55:41 +0000 (Sat, 17 Aug 2013)
Log Message:
-----------
multi layer UV transfer: making the l_grp be accessed per_vertex instead of per_mesh which speeds up the UV transfer in an obvious way ... speed difference could be easily noticed compared to transfering a single layer which isn't updated yet

Modified Paths:
--------------
    branches/soc-2013-meshdata_transfer/source/blender/bmesh/tools/bmesh_data_transfer.c

Modified: branches/soc-2013-meshdata_transfer/source/blender/bmesh/tools/bmesh_data_transfer.c
===================================================================

--- branches/soc-2013-meshdata_transfer/source/blender/bmesh/tools/bmesh_data_transfer.c	2013-08-17 08:21:40 UTC (rev 59202)
+++ branches/soc-2013-meshdata_transfer/source/blender/bmesh/tools/bmesh_data_transfer.c	2013-08-17 08:55:41 UTC (rev 59203)
@@ -1450,6 +1450,11 @@
 	int count;
 } BM_loop_pool;
 
+typedef struct BM_loop_group_pool {
+	struct BM_loop_pool *l_grp;
+	int count;
+} BM_loop_group_pool;
+
 bool BM_loop_in_loops(BMLoop **l_grp, int len, BMLoop *l)
 {
 	int i;
@@ -2333,7 +2338,7 @@
 
 		case CD_MLOOPUV:
 		{
-			int c, d, g, h, i, l_dst_iter;
+			int c, d, g, h, i;
 
 //			int const exp_loop_per_face = 4;
 //			int const exp_tolerance = 2;	//expectation tolerance of 2 would allocate double the ideal memory for each face
@@ -2341,9 +2346,11 @@
 //			int const dst_src_faces_ratio = ceil((float)bm_dst->totface /(float)bm_src->totface);	//we should ensure that
 																									//bm_src->totface != 0
 //			int const exp_dst_loops_per_src_face = dst_src_faces_ratio * exp_loop_per_face * exp_tolerance;
-			int const exp_loop_per_vert_double = 10;	//it should be 8 for a vertex surrounded by 4 faces (common scenario)
+			int const exp_loop_per_vert = 4 + 1;	//4 => the exp nmbr, 1 => tolerance
+			int const exp_loop_per_vert_double = exp_loop_per_vert * 2;	//it should be 8 for a vertex surrounded by 4 faces (common scenario)
 														//as the time penalty in realloc is considered severe, we're adding a
 														//tolerence of 5 faces per vertex ... more than that we would realloc
+			int const exp_vert_splits = exp_loop_per_vert;
 
 			BMFace *f_src;
 			BMFace **f_src_table = MEM_mallocN(sizeof(*f_src_table) * bm_src->totface, "f_src_table bmesh_data_transfer.c");
@@ -2353,8 +2360,8 @@
 			BMEdge *e;
 			BMIter eiter;
 //			BM_UV_per_face_mapping *fuv_table = MEM_mallocN(sizeof(*fuv_table) * bm_dst->totface, "fuv_table bmesh_data_transfer.c");
-			BM_loop_pool *l_grp = MEM_mallocN(sizeof(*l_grp) * bm_dst->totloop, "l_grp bmesh_data_transfer.c");
-			int l_grp_count;	//count of actual loop groups
+			//store the loop groups per vertex ... for a single island mesh each vertex would have a single l_grp
+			BM_loop_group_pool *v_l_grp = MEM_mallocN(sizeof(*v_l_grp) * bm_dst->totvert, "v_l_grp bmesh_data_transfer.c");
 			float (*uv_buffer)[2] = MEM_mallocN(sizeof(*uv_buffer) * exp_loop_per_vert_double, "uv_buffer bmesh_data_transfer.c");
 			float mid_uv[2];
 			float weight_accu[2];
@@ -2362,17 +2369,20 @@
 
 
 			//Alloc loops
-			l_dst_iter = 0;
-			BM_ITER_MESH (f_dst, &fiter, bm_dst, BM_FACES_OF_MESH) {
-//			BM_ITER_MESH_INDEX (f_dst, &fiter, bm_dst, BM_FACES_OF_MESH, b) {
-//				fuv_table[b].uv = MEM_mallocN(sizeof(*(fuv_table->uv)) * (f_dst->len), "fuv_table->uv bmesh_data_transfer.c");
-//				fuv_table[b].f = f_dst;
 
-				BM_ITER_ELEM (l, &liter, f_dst, BM_LOOPS_OF_FACE) {
-					l_grp[l_dst_iter].l = MEM_mallocN(sizeof(*(l_grp->l)) * exp_loop_per_vert_double, "l_grp[].l bmesh_data_transfer");
+			///would it be faster/better if we used BM_ITER_MESH_INDEX instead of v->head.index?
+			///no need to recalculate the sizeof and multiplication everytime!!
+			//the upcoming allocation is so protective ... as the exp_vert_splits assumes having a totally disconnected
+			//vertex and the exp_loop_per_vert_double assumes having a totally connected vertex
+			//this is considered a cost for minimilazing the usage of realloc
+			BM_ITER_MESH (v, &iter, bm_dst, BM_VERTS_OF_MESH) {
+				v_l_grp[v->head.index].l_grp = MEM_mallocN(sizeof(*(v_l_grp[v->head.index].l_grp)) * exp_vert_splits, "v_l_grp[].l_grp bmesh_data_transfer.c");
 
-					l_dst_iter++;
+				for (i = 0; i < exp_vert_splits; i++) {
+					v_l_grp[v->head.index].l_grp[i].l = MEM_mallocN(sizeof(*(v_l_grp[v->head.index].l_grp[i].l)) * exp_loop_per_vert_double, "v_l_grp[].l_grp.l bmesh_data_transfer.c");
 				}
+
+				v_l_grp[v->head.index].count = 0;
 			}
 
 			BM_ITER_MESH (f_src, &fiter, bm_src, BM_FACES_OF_MESH) {
@@ -2393,8 +2403,6 @@
 				CD_src = CustomData_get_n_offset(&bm_src->ldata, CD_MLOOPUV, src_lay_iter);	//get the offset of the
 				CD_dst = CustomData_get_n_offset(&bm_dst->ldata, CD_MLOOPUV, dst_lay_iter);	//lay_iter(th)CD_SHAPEKEY layer
 
-				l_grp_count = 0;
-//				f_src_count = 0;
 				b = 0;
 				//the way we do it is by looping over each face!!
 				BM_ITER_MESH (f_dst, &fiter, bm_dst, BM_FACES_OF_MESH) {
@@ -2442,10 +2450,16 @@
 											//we finally found loops that shall be averaged from different faces!
 											//we now shall average them into a buffer!
 
+											//----start rewrite----
+											int v_ind = fl_table[f_src->head.index].l[d]->v->head.index;
+
+											int *l_grp_count = &v_l_grp[v_ind].count;
 											//we've got 2 loops ... search for them in the loop groups ... if found
 											//add a new entry to l_grp and increment l_grp_count
 											//else append the other loop
-											for (h = 0; h < l_grp_count; h++) {
+											for (h = 0; h < *l_grp_count; h++) {
+												BM_loop_pool *l_grp = v_l_grp[v_ind].l_grp;
+
 												if (BM_loop_in_loops(l_grp[h].l, l_grp[h].count, fl_table[f_n->head.index].l[g])) {
 													//found the neighboring face's loop in the group
 													if (!BM_loop_in_loops(l_grp[h].l, l_grp[h].count, fl_table[f_src->head.index].l[d])) {
@@ -2465,14 +2479,14 @@
 													break;
 												}
 
-												else if (BM_loop_in_loops(l_grp[h].l,l_grp[h].count, fl_table[f_src->head.index].l[d])) {
+												else if (BM_loop_in_loops(l_grp[h].l, l_grp[h].count, fl_table[f_src->head.index].l[d])) {
 													//found the source face's loop in the group
-													if (!BM_loop_in_loops(l_grp[h].l,l_grp[h].count, fl_table[f_n->head.index].l[g])) {
+													if (!BM_loop_in_loops(l_grp[h].l, l_grp[h].count, fl_table[f_n->head.index].l[g])) {
 														//now reallocate memory for a new loop
 														(l_grp[h].count)++;
 
 														if (l_grp[h].count < exp_loop_per_vert_double) {
-															l_grp[h].l = MEM_reallocN(l_grp[h].l, sizeof(*(l_grp->l)) * (l_grp[h].count));
+															l_grp[h].l = MEM_reallocN(l_grp[h].l, sizeof(*(l_grp[h].l)) * (l_grp[h].count));
 														}
 														//and append it
 														l_grp[h].l[l_grp[h].count - 1] = fl_table[f_n->head.index].l[g];
@@ -2482,16 +2496,18 @@
 												}
 											}
 
-											if ( h == l_grp_count) {
+											if ( h == *l_grp_count) {
+												BM_loop_pool *l_grp = v_l_grp[v_ind].l_grp;
+
 												//the loops weren't found in any group
 												//make a new group entry and append it
-												l_grp[l_grp_count].count = 2;
+												l_grp[h].count = 2;
 												//adding a place for 2 loops
-		//										l_grp[l_grp_count].l = MEM_mallocN(sizeof(*(l_grp->l)) * 2, "l_grp[].l bmesh_data_transfer");
-												l_grp[l_grp_count].l[0] = fl_table[f_src->head.index].l[d];
-												l_grp[l_grp_count].l[1] = fl_table[f_n->head.index].l[g];
-												l_grp_count++;
+												l_grp[h].l[0] = fl_table[f_src->head.index].l[d];
+												l_grp[h].l[1] = fl_table[f_n->head.index].l[g];
+												(*l_grp_count)++;
 											}
+											//====end rewrite====
 										}
 									}
 								}
@@ -2515,51 +2531,56 @@
 				//get the mid of UVs into a buffer
 				//get the UV coords after looping over the src faces!!
 
-				for (h = 0; h < l_grp_count; h++) {
-					//average the loops' uvs
+				BM_ITER_MESH (v, &iter, bm_dst, BM_VERTS_OF_MESH) {
+					int v_ind = v->head.index;
+					int *l_grp_count = &v_l_grp[v_ind].count;
 
-					if (l_grp[h].count > exp_loop_per_vert_double) {	//expand the buffer size when needed
-						if (l_grp[h].count > l_grp_max_count) {
-							uv_buffer = MEM_reallocN(uv_buffer, sizeof(*uv_buffer) * l_grp[h].count);
-							l_grp_max_count = l_grp[h].count;
+					for (h = 0; h < *l_grp_count; h++) {
+						BM_loop_pool *l_grp = v_l_grp[v_ind].l_grp;
+						//average the loops' uvs
+
+						if (l_grp[h].count > exp_loop_per_vert_double) {	//expand the buffer size when needed
+							if (l_grp[h].count > l_grp_max_count) {
+								uv_buffer = MEM_reallocN(uv_buffer, sizeof(*uv_buffer) * l_grp[h].count);
+								l_grp_max_count = l_grp[h].count;
+							}
 						}
-					}
 
-					//prepare the uvs to be averaged
-					for (i = 0; i < l_grp[h].count; i++) {
-						//copying each element is really inefficient it -at least- doubles the time when we are already accessing
-						//the pointer for read only! .. we should copy the pointer itself
-						copy_v2_v2(uv_buffer[i], BM_ELEM_CD_GET_VOID_P(l_grp[h].l[i], CD_dst));
-					}
+						//prepare the uvs to be averaged
+						for (i = 0; i < l_grp[h].count; i++) {
+							//copying each element is really inefficient it -at least- doubles the time when we are already accessing
+							//the pointer for read only! .. we should copy the pointer itself
+							copy_v2_v2(uv_buffer[i], BM_ELEM_CD_GET_VOID_P(l_grp[h].l[i], CD_dst));
+						}
 
-					//get the mid value
-					zero_v2(mid_uv);
-					mid_poly_v2(mid_uv, uv_buffer, l_grp[h].count);
+						//get the mid value
+						zero_v2(mid_uv);
+						mid_poly_v2(mid_uv, uv_buffer, l_grp[h].count);
 
-					//copy the value to each of them
-					for (i = 0; i < l_grp[h].count; i++) {
-						//commenting this would leave us with the output of interpolation for each face ^_^
-						copy_v2_v2(BM_ELEM_CD_GET_VOID_P(l_grp[h].l[i], CD_dst), mid_uv);
+						//copy the value to each of them
+						for (i = 0; i < l_grp[h].count; i++) {
+							//commenting this would leave us with the output of interpolation for each face ^_^
+							copy_v2_v2(BM_ELEM_CD_GET_VOID_P(l_grp[h].l[i], CD_dst), mid_uv);
+						}
+						//this would make the l_grp[h].l hidden for the function; to avoid unnessecery realloc/memsetting
+						l_grp[h].count = 0;
 					}
-					//this would make the l_grp[h].l hidden for the function; to avoid unnessecery realloc/memsetting
-					l_grp[h].count = 0;
 				}
 			}
 
 			//Free loops
 
-			l_dst_iter = 0;
-			BM_ITER_MESH_INDEX (f_dst, &fiter, bm_dst, BM_FACES_OF_MESH, b) {
-//				MEM_freeN(fuv_table[b].uv);
+			BM_ITER_MESH (v, &iter, bm_dst, BM_VERTS_OF_MESH) {
 
-				BM_ITER_ELEM (l, &liter, f_dst, BM_LOOPS_OF_FACE) {

@@ Diff output truncated at 10240 characters. @@