[Bf-blender-cvs] [2e77c41] master: Sculpt: OMP -> BLI_task, step II.

Sun Jan 3 23:59:25 CET 2016

Commit: 2e77c413ab3cdc5880c99ba14436d56f8280d3fb
Author: Bastien Montagne
Date:   Sun Jan 3 23:54:44 2016 +0100
Branches: master
https://developer.blender.org/rB2e77c413ab3cdc5880c99ba14436d56f8280d3fb

Sculpt: OMP -> BLI_task, step II.

This time, all tools' code itself.

Not much to say, except that we can also get rid of that OMP caching pre-process ugly stuff
for multires smoothing.

Together with previous commit, we have about 5% average speedup on stroke execution
(though this vary a lot, up to 30% speedup in rare cases, and in even rarer cases some
odd massive slowdowns...).

Tech note: we may want to add 'guided'-similar feature to our BLI_task threaded loop,
I suspect this could explain random massive slowdowns of new code (very rare, but annoying...).

===================================================================

M	source/blender/editors/sculpt_paint/sculpt.c

===================================================================

diff --git a/source/blender/editors/sculpt_paint/sculpt.c b/source/blender/editors/sculpt_paint/sculpt.c
index a78a79a..d4afc6c 100644
--- a/source/blender/editors/sculpt_paint/sculpt.c
+++ b/source/blender/editors/sculpt_paint/sculpt.c
@@ -200,11 +200,6 @@ typedef struct StrokeCache {
 	float clip_tolerance[3];
 	float initial_mouse[2];
 
-	/* Pre-allocated temporary storage used during smoothing */
-	int num_threads, init_num_threads;
-	float (**tmpgrid_co)[3], (**tmprow_co)[3];
-	float **tmpgrid_mask, **tmprow_mask;
-
 	/* Variants */
 	float radius;
 	float radius_squared;
@@ -1528,36 +1523,71 @@ static float bmesh_neighbor_average_mask(BMVert *v, const int cd_vert_mask_offse
 	}
 }
 
-static void do_mesh_smooth_brush(Sculpt *sd, SculptSession *ss, PBVHNode *node, float bstrength, int smooth_mask)
+typedef struct SculptDoBrushData {
+	Sculpt *sd;
+	Object *ob;
+	Brush *brush;
+    PBVHNode **nodes;
+
+	/* Data specific to some brushes. */
+	/* Note: even only one or two of those are used at a time, keeping them separated, names help figuring out
+	 *       what it is, and memory overhead is ridiculous anyway... */
+	SculptProjectVector *spvc;
+	float flippedbstrength;
+	float angle;
+	float *offset;
+	float *grab_delta;
+	float *cono;
+	float *area_no;
+	float *area_no_sp;
+	float *area_co;
+	float (*mat)[4];
+	float strength;
+	bool smooth_mask;
+
+	ThreadMutex mutex;
+} SculptDoBrushData;
+
+/* Note: uses after-struct allocated mem to store actual cache... */
+typedef struct SculptDoBrushSmoothGridDataChunk {
+	size_t tmpgrid_size;
+} SculptDoBrushSmoothGridDataChunk;
+
+static void do_smooth_brush_mesh_task_cb(void *userdata, void *UNUSED(userdata_chunk), int n)
 {
-	Brush *brush = BKE_paint_brush(&sd->paint);
+	SculptDoBrushData *data = userdata;
+	SculptSession *ss = data->ob->sculpt;
+	Sculpt *sd = data->sd;
+	Brush *brush = data->brush;
+	const bool smooth_mask = data->smooth_mask;
+	float bstrength = data->strength;
+
 	PBVHVertexIter vd;
 	SculptBrushTest test;
-	
+
 	CLAMP(bstrength, 0.0f, 1.0f);
 
 	sculpt_brush_test_init(ss, &test);
 
-	BKE_pbvh_vertex_iter_begin(ss->pbvh, node, vd, PBVH_ITER_UNIQUE)
+	BKE_pbvh_vertex_iter_begin(ss->pbvh, data->nodes[n], vd, PBVH_ITER_UNIQUE)
 	{
 		if (sculpt_brush_test(&test, vd.co)) {
-			const float fade = bstrength * tex_strength(ss, brush, vd.co, test.dist,
-			                                            vd.no, vd.fno,
-			                                            smooth_mask ? 0 : (vd.mask ? *vd.mask : 0.0f));
+			const float fade = bstrength * tex_strength(
+			                       ss, brush, vd.co, test.dist, vd.no, vd.fno,
+			                       smooth_mask ? 0.0f : (vd.mask ? *vd.mask : 0.0f));
 			if (smooth_mask) {
 				float val = neighbor_average_mask(ss, vd.vert_indices[vd.i]) - *vd.mask;
 				val *= fade * bstrength;
 				*vd.mask += val;
-				CLAMP(*vd.mask, 0, 1);
+				CLAMP(*vd.mask, 0.0f, 1.0f);
 			}
 			else {
 				float avg[3], val[3];
 
 				neighbor_average(ss, avg, vd.vert_indices[vd.i]);
 				sub_v3_v3v3(val, avg, vd.co);
-				mul_v3_fl(val, fade);
 
-				add_v3_v3(val, vd.co);
+				madd_v3_v3v3fl(val, vd.co, val, fade);
 
 				sculpt_clip(sd, ss, vd.co, val);
 			}
@@ -1569,36 +1599,40 @@ static void do_mesh_smooth_brush(Sculpt *sd, SculptSession *ss, PBVHNode *node,
 	BKE_pbvh_vertex_iter_end;
 }
 
-static void do_bmesh_smooth_brush(Sculpt *sd, SculptSession *ss, PBVHNode *node, float bstrength, int smooth_mask)
+static void do_smooth_brush_bmesh_task_cb(void *userdata, void *UNUSED(userdata_chunk), int n)
 {
-	Brush *brush = BKE_paint_brush(&sd->paint);
+	SculptDoBrushData *data = userdata;
+	SculptSession *ss = data->ob->sculpt;
+	Sculpt *sd = data->sd;
+	Brush *brush = data->brush;
+	const bool smooth_mask = data->smooth_mask;
+	float bstrength = data->strength;
+
 	PBVHVertexIter vd;
 	SculptBrushTest test;
-	
+
 	CLAMP(bstrength, 0.0f, 1.0f);
 
 	sculpt_brush_test_init(ss, &test);
 
-	BKE_pbvh_vertex_iter_begin(ss->pbvh, node, vd, PBVH_ITER_UNIQUE)
+	BKE_pbvh_vertex_iter_begin(ss->pbvh, data->nodes[n], vd, PBVH_ITER_UNIQUE)
 	{
 		if (sculpt_brush_test(&test, vd.co)) {
-			const float fade = bstrength * tex_strength(ss, brush, vd.co, test.dist,
-			                                            vd.no, vd.fno,
-			                                            smooth_mask ? 0 : *vd.mask);
+			const float fade = bstrength * tex_strength(
+			                       ss, brush, vd.co, test.dist, vd.no, vd.fno, smooth_mask ? 0.0f : *vd.mask);
 			if (smooth_mask) {
 				float val = bmesh_neighbor_average_mask(vd.bm_vert, vd.cd_vert_mask_offset) - *vd.mask;
 				val *= fade * bstrength;
 				*vd.mask += val;
-				CLAMP(*vd.mask, 0, 1);
+				CLAMP(*vd.mask, 0.0f, 1.0f);
 			}
 			else {
 				float avg[3], val[3];
 
 				bmesh_neighbor_average(avg, vd.bm_vert);
 				sub_v3_v3v3(val, avg, vd.co);
-				mul_v3_fl(val, fade);
 
-				add_v3_v3(val, vd.co);
+				madd_v3_v3v3fl(val, vd.co, val, fade);
 
 				sculpt_clip(sd, ss, vd.co, val);
 			}
@@ -1610,74 +1644,77 @@ static void do_bmesh_smooth_brush(Sculpt *sd, SculptSession *ss, PBVHNode *node,
 	BKE_pbvh_vertex_iter_end;
 }
 
-static void do_multires_smooth_brush(Sculpt *sd, SculptSession *ss, PBVHNode *node,
-                                     float bstrength, int smooth_mask)
+static void do_smooth_brush_multires_task_cb(void *userdata, void *userdata_chunk, int n)
 {
-	Brush *brush = BKE_paint_brush(&sd->paint);
+	SculptDoBrushData *data = userdata;
+	SculptDoBrushSmoothGridDataChunk *data_chunk = userdata_chunk;
+	SculptSession *ss = data->ob->sculpt;
+	Sculpt *sd = data->sd;
+	Brush *brush = data->brush;
+	const bool smooth_mask = data->smooth_mask;
+	float bstrength = data->strength;
+
 	SculptBrushTest test;
-	CCGElem **griddata, *data;
+	CCGElem **griddata, *gddata;
 	CCGKey key;
-	float (*tmpgrid_co)[3], (*tmprow_co)[3];
-	float *tmpgrid_mask, *tmprow_mask;
-	int v1, v2, v3, v4;
-	int thread_num;
+
+	float (*tmpgrid_co)[3] = NULL;
+	float tmprow_co[2][3];
+	float *tmpgrid_mask = NULL;
+	float tmprow_mask[2];
+
 	BLI_bitmap * const *grid_hidden;
-	int *grid_indices, totgrid, gridsize, i, x, y;
+	int *grid_indices, totgrid, gridsize;
+	int i, x, y;
 
 	sculpt_brush_test_init(ss, &test);
 
 	CLAMP(bstrength, 0.0f, 1.0f);
 
-	BKE_pbvh_node_get_grids(ss->pbvh, node, &grid_indices, &totgrid,
-	                        NULL, &gridsize, &griddata);
+	BKE_pbvh_node_get_grids(ss->pbvh, data->nodes[n], &grid_indices, &totgrid, NULL, &gridsize, &griddata);
 	BKE_pbvh_get_grid_key(ss->pbvh, &key);
 
 	grid_hidden = BKE_pbvh_grid_hidden(ss->pbvh);
 
-#ifdef _OPENMP
-	thread_num = omp_get_thread_num();
-#else
-	thread_num = 0;
-#endif
-	tmpgrid_co = ss->cache->tmpgrid_co[thread_num];
-	tmprow_co = ss->cache->tmprow_co[thread_num];
-	tmpgrid_mask = ss->cache->tmpgrid_mask[thread_num];
-	tmprow_mask = ss->cache->tmprow_mask[thread_num];
+	if (smooth_mask)
+		tmpgrid_mask = (void *)(data_chunk + 1);
+	else
+		tmpgrid_co = (void *)(data_chunk + 1);
 
-	for (i = 0; i < totgrid; ++i) {
+	for (i = 0; i < totgrid; i++) {
 		int gi = grid_indices[i];
 		const BLI_bitmap *gh = grid_hidden[gi];
-		data = griddata[gi];
+		gddata = griddata[gi];
 
 		if (smooth_mask)
-			memset(tmpgrid_mask, 0, sizeof(float) * gridsize * gridsize);
+			memset(tmpgrid_mask, 0, data_chunk->tmpgrid_size);
 		else
-			memset(tmpgrid_co, 0, sizeof(float) * 3 * gridsize * gridsize);
+			memset(tmpgrid_co, 0, data_chunk->tmpgrid_size);
 
 		for (y = 0; y < gridsize - 1; y++) {
-			v1 = y * gridsize;
+			const int v = y * gridsize;
 			if (smooth_mask) {
-				tmprow_mask[0] = (*CCG_elem_offset_mask(&key, data, v1) +
-				                  *CCG_elem_offset_mask(&key, data, v1 + gridsize));
+				tmprow_mask[0] = (*CCG_elem_offset_mask(&key, gddata, v) +
+				                  *CCG_elem_offset_mask(&key, gddata, v + gridsize));
 			}
 			else {
 				add_v3_v3v3(tmprow_co[0],
-				            CCG_elem_offset_co(&key, data, v1),
-				            CCG_elem_offset_co(&key, data, v1 + gridsize));
+				            CCG_elem_offset_co(&key, gddata, v),
+				            CCG_elem_offset_co(&key, gddata, v + gridsize));
 			}
 
 			for (x = 0; x < gridsize - 1; x++) {
-				v1 = x + y * gridsize;
-				v2 = v1 + 1;
-				v3 = v1 + gridsize;
-				v4 = v3 + 1;
+				const int v1 = x + y * gridsize;
+				const int v2 = v1 + 1;
+				const int v3 = v1 + gridsize;
+				const int v4 = v3 + 1;
 
 				if (smooth_mask) {
 					float tmp;
 
-					tmprow_mask[x + 1] = (*CCG_elem_offset_mask(&key, data, v2) +
-					                      *CCG_elem_offset_mask(&key, data, v4));
-					tmp = tmprow_mask[x + 1] + tmprow_mask[x];
+					tmprow_mask[(x + 1) % 2] = (*CCG_elem_offset_mask(&key, gddata, v2) +
+					                            *CCG_elem_offset_mask(&key, gddata, v4));
+					tmp = tmprow_mask[(x + 1) % 2] + tmprow_mask[x % 2];
 
 					tmpgrid_mask[v1] += tmp;
 					tmpgrid_mask[v2] += tmp;
@@ -1687,10 +1724,10 @@ static void do_multires_smooth_brush(Sculpt *sd, SculptSession *ss, PBVHNode *no
 				else {
 					float tmp[3];
 
-					add_v3_v3v3(tmprow_co[x + 1],
-					            CCG_elem_offset_co(&key, data, v2),
-					            CCG_elem_offset_co(&key, data, v4));
-					add_v3_v3v3(tmp, tmprow_co[x + 1], tmprow_co[x]);
+					add_v3_v3v3(tmprow_co[(x + 1) % 2],
+					            CCG_elem_offset_co(&key, gddata, v2),
+					            CCG_elem_offset_co(&key, gddata, v4));
+					add_v3_v3v3(tmp, tmprow_co[(x + 1) % 2], tmprow_co[x % 2]);
 
 					add_v3_v3(tmpgrid_co[v1], tmp);
 					add_v3_v3(tmpgrid_co[v2], tmp);
@@ -1701,49 +1738,43 @@ static void do_multires_smooth_brush(Sculpt *sd, SculptSession *ss, PBVHNode *no
 		}
 
 		/* blend with existing coordinates */
-		for (y = 0; y < gridsize; ++y) {
-			for (x = 0; x < gridsize; ++x) {
+		for (y = 0; y < gridsize; y++) {
+			for (x = 0; x < gridsize; x++) {
 				float *co;
 				const float *fno;
 				float *mask;
-				int index;
+				const int index = y * gridsize + x;
 
 				if (gh) {
-					if (BLI_BITMAP_TEST(gh, y * gridsize + x))
+					if (BLI_BITMAP_TEST(gh, index))
 						continue;
 				}
 
-				index = x + y * gridsize;
-				co = CCG_elem_offset_co(&key, data, index);
-				fno = CCG_elem_offset_no(&key, data, index);
-				mask = 

@@ Diff output truncated at 10240 characters. @@