[Bf-blender-cvs] [2e77c41] master: Sculpt: OMP -> BLI_task, step II.
Bastien Montagne
noreply at git.blender.org
Sun Jan 3 23:59:25 CET 2016
Commit: 2e77c413ab3cdc5880c99ba14436d56f8280d3fb
Author: Bastien Montagne
Date: Sun Jan 3 23:54:44 2016 +0100
Branches: master
https://developer.blender.org/rB2e77c413ab3cdc5880c99ba14436d56f8280d3fb
Sculpt: OMP -> BLI_task, step II.
This time, all tools' code itself.
Not much to say, except that we can also get rid of that OMP caching pre-process ugly stuff
for multires smoothing.
Together with previous commit, we have about 5% average speedup on stroke execution
(though this vary a lot, up to 30% speedup in rare cases, and in even rarer cases some
odd massive slowdowns...).
Tech note: we may want to add 'guided'-similar feature to our BLI_task threaded loop,
I suspect this could explain random massive slowdowns of new code (very rare, but annoying...).
===================================================================
M source/blender/editors/sculpt_paint/sculpt.c
===================================================================
diff --git a/source/blender/editors/sculpt_paint/sculpt.c b/source/blender/editors/sculpt_paint/sculpt.c
index a78a79a..d4afc6c 100644
--- a/source/blender/editors/sculpt_paint/sculpt.c
+++ b/source/blender/editors/sculpt_paint/sculpt.c
@@ -200,11 +200,6 @@ typedef struct StrokeCache {
float clip_tolerance[3];
float initial_mouse[2];
- /* Pre-allocated temporary storage used during smoothing */
- int num_threads, init_num_threads;
- float (**tmpgrid_co)[3], (**tmprow_co)[3];
- float **tmpgrid_mask, **tmprow_mask;
-
/* Variants */
float radius;
float radius_squared;
@@ -1528,36 +1523,71 @@ static float bmesh_neighbor_average_mask(BMVert *v, const int cd_vert_mask_offse
}
}
-static void do_mesh_smooth_brush(Sculpt *sd, SculptSession *ss, PBVHNode *node, float bstrength, int smooth_mask)
+typedef struct SculptDoBrushData {
+ Sculpt *sd;
+ Object *ob;
+ Brush *brush;
+ PBVHNode **nodes;
+
+ /* Data specific to some brushes. */
+ /* Note: even only one or two of those are used at a time, keeping them separated, names help figuring out
+ * what it is, and memory overhead is ridiculous anyway... */
+ SculptProjectVector *spvc;
+ float flippedbstrength;
+ float angle;
+ float *offset;
+ float *grab_delta;
+ float *cono;
+ float *area_no;
+ float *area_no_sp;
+ float *area_co;
+ float (*mat)[4];
+ float strength;
+ bool smooth_mask;
+
+ ThreadMutex mutex;
+} SculptDoBrushData;
+
+/* Note: uses after-struct allocated mem to store actual cache... */
+typedef struct SculptDoBrushSmoothGridDataChunk {
+ size_t tmpgrid_size;
+} SculptDoBrushSmoothGridDataChunk;
+
+static void do_smooth_brush_mesh_task_cb(void *userdata, void *UNUSED(userdata_chunk), int n)
{
- Brush *brush = BKE_paint_brush(&sd->paint);
+ SculptDoBrushData *data = userdata;
+ SculptSession *ss = data->ob->sculpt;
+ Sculpt *sd = data->sd;
+ Brush *brush = data->brush;
+ const bool smooth_mask = data->smooth_mask;
+ float bstrength = data->strength;
+
PBVHVertexIter vd;
SculptBrushTest test;
-
+
CLAMP(bstrength, 0.0f, 1.0f);
sculpt_brush_test_init(ss, &test);
- BKE_pbvh_vertex_iter_begin(ss->pbvh, node, vd, PBVH_ITER_UNIQUE)
+ BKE_pbvh_vertex_iter_begin(ss->pbvh, data->nodes[n], vd, PBVH_ITER_UNIQUE)
{
if (sculpt_brush_test(&test, vd.co)) {
- const float fade = bstrength * tex_strength(ss, brush, vd.co, test.dist,
- vd.no, vd.fno,
- smooth_mask ? 0 : (vd.mask ? *vd.mask : 0.0f));
+ const float fade = bstrength * tex_strength(
+ ss, brush, vd.co, test.dist, vd.no, vd.fno,
+ smooth_mask ? 0.0f : (vd.mask ? *vd.mask : 0.0f));
if (smooth_mask) {
float val = neighbor_average_mask(ss, vd.vert_indices[vd.i]) - *vd.mask;
val *= fade * bstrength;
*vd.mask += val;
- CLAMP(*vd.mask, 0, 1);
+ CLAMP(*vd.mask, 0.0f, 1.0f);
}
else {
float avg[3], val[3];
neighbor_average(ss, avg, vd.vert_indices[vd.i]);
sub_v3_v3v3(val, avg, vd.co);
- mul_v3_fl(val, fade);
- add_v3_v3(val, vd.co);
+ madd_v3_v3v3fl(val, vd.co, val, fade);
sculpt_clip(sd, ss, vd.co, val);
}
@@ -1569,36 +1599,40 @@ static void do_mesh_smooth_brush(Sculpt *sd, SculptSession *ss, PBVHNode *node,
BKE_pbvh_vertex_iter_end;
}
-static void do_bmesh_smooth_brush(Sculpt *sd, SculptSession *ss, PBVHNode *node, float bstrength, int smooth_mask)
+static void do_smooth_brush_bmesh_task_cb(void *userdata, void *UNUSED(userdata_chunk), int n)
{
- Brush *brush = BKE_paint_brush(&sd->paint);
+ SculptDoBrushData *data = userdata;
+ SculptSession *ss = data->ob->sculpt;
+ Sculpt *sd = data->sd;
+ Brush *brush = data->brush;
+ const bool smooth_mask = data->smooth_mask;
+ float bstrength = data->strength;
+
PBVHVertexIter vd;
SculptBrushTest test;
-
+
CLAMP(bstrength, 0.0f, 1.0f);
sculpt_brush_test_init(ss, &test);
- BKE_pbvh_vertex_iter_begin(ss->pbvh, node, vd, PBVH_ITER_UNIQUE)
+ BKE_pbvh_vertex_iter_begin(ss->pbvh, data->nodes[n], vd, PBVH_ITER_UNIQUE)
{
if (sculpt_brush_test(&test, vd.co)) {
- const float fade = bstrength * tex_strength(ss, brush, vd.co, test.dist,
- vd.no, vd.fno,
- smooth_mask ? 0 : *vd.mask);
+ const float fade = bstrength * tex_strength(
+ ss, brush, vd.co, test.dist, vd.no, vd.fno, smooth_mask ? 0.0f : *vd.mask);
if (smooth_mask) {
float val = bmesh_neighbor_average_mask(vd.bm_vert, vd.cd_vert_mask_offset) - *vd.mask;
val *= fade * bstrength;
*vd.mask += val;
- CLAMP(*vd.mask, 0, 1);
+ CLAMP(*vd.mask, 0.0f, 1.0f);
}
else {
float avg[3], val[3];
bmesh_neighbor_average(avg, vd.bm_vert);
sub_v3_v3v3(val, avg, vd.co);
- mul_v3_fl(val, fade);
- add_v3_v3(val, vd.co);
+ madd_v3_v3v3fl(val, vd.co, val, fade);
sculpt_clip(sd, ss, vd.co, val);
}
@@ -1610,74 +1644,77 @@ static void do_bmesh_smooth_brush(Sculpt *sd, SculptSession *ss, PBVHNode *node,
BKE_pbvh_vertex_iter_end;
}
-static void do_multires_smooth_brush(Sculpt *sd, SculptSession *ss, PBVHNode *node,
- float bstrength, int smooth_mask)
+static void do_smooth_brush_multires_task_cb(void *userdata, void *userdata_chunk, int n)
{
- Brush *brush = BKE_paint_brush(&sd->paint);
+ SculptDoBrushData *data = userdata;
+ SculptDoBrushSmoothGridDataChunk *data_chunk = userdata_chunk;
+ SculptSession *ss = data->ob->sculpt;
+ Sculpt *sd = data->sd;
+ Brush *brush = data->brush;
+ const bool smooth_mask = data->smooth_mask;
+ float bstrength = data->strength;
+
SculptBrushTest test;
- CCGElem **griddata, *data;
+ CCGElem **griddata, *gddata;
CCGKey key;
- float (*tmpgrid_co)[3], (*tmprow_co)[3];
- float *tmpgrid_mask, *tmprow_mask;
- int v1, v2, v3, v4;
- int thread_num;
+
+ float (*tmpgrid_co)[3] = NULL;
+ float tmprow_co[2][3];
+ float *tmpgrid_mask = NULL;
+ float tmprow_mask[2];
+
BLI_bitmap * const *grid_hidden;
- int *grid_indices, totgrid, gridsize, i, x, y;
+ int *grid_indices, totgrid, gridsize;
+ int i, x, y;
sculpt_brush_test_init(ss, &test);
CLAMP(bstrength, 0.0f, 1.0f);
- BKE_pbvh_node_get_grids(ss->pbvh, node, &grid_indices, &totgrid,
- NULL, &gridsize, &griddata);
+ BKE_pbvh_node_get_grids(ss->pbvh, data->nodes[n], &grid_indices, &totgrid, NULL, &gridsize, &griddata);
BKE_pbvh_get_grid_key(ss->pbvh, &key);
grid_hidden = BKE_pbvh_grid_hidden(ss->pbvh);
-#ifdef _OPENMP
- thread_num = omp_get_thread_num();
-#else
- thread_num = 0;
-#endif
- tmpgrid_co = ss->cache->tmpgrid_co[thread_num];
- tmprow_co = ss->cache->tmprow_co[thread_num];
- tmpgrid_mask = ss->cache->tmpgrid_mask[thread_num];
- tmprow_mask = ss->cache->tmprow_mask[thread_num];
+ if (smooth_mask)
+ tmpgrid_mask = (void *)(data_chunk + 1);
+ else
+ tmpgrid_co = (void *)(data_chunk + 1);
- for (i = 0; i < totgrid; ++i) {
+ for (i = 0; i < totgrid; i++) {
int gi = grid_indices[i];
const BLI_bitmap *gh = grid_hidden[gi];
- data = griddata[gi];
+ gddata = griddata[gi];
if (smooth_mask)
- memset(tmpgrid_mask, 0, sizeof(float) * gridsize * gridsize);
+ memset(tmpgrid_mask, 0, data_chunk->tmpgrid_size);
else
- memset(tmpgrid_co, 0, sizeof(float) * 3 * gridsize * gridsize);
+ memset(tmpgrid_co, 0, data_chunk->tmpgrid_size);
for (y = 0; y < gridsize - 1; y++) {
- v1 = y * gridsize;
+ const int v = y * gridsize;
if (smooth_mask) {
- tmprow_mask[0] = (*CCG_elem_offset_mask(&key, data, v1) +
- *CCG_elem_offset_mask(&key, data, v1 + gridsize));
+ tmprow_mask[0] = (*CCG_elem_offset_mask(&key, gddata, v) +
+ *CCG_elem_offset_mask(&key, gddata, v + gridsize));
}
else {
add_v3_v3v3(tmprow_co[0],
- CCG_elem_offset_co(&key, data, v1),
- CCG_elem_offset_co(&key, data, v1 + gridsize));
+ CCG_elem_offset_co(&key, gddata, v),
+ CCG_elem_offset_co(&key, gddata, v + gridsize));
}
for (x = 0; x < gridsize - 1; x++) {
- v1 = x + y * gridsize;
- v2 = v1 + 1;
- v3 = v1 + gridsize;
- v4 = v3 + 1;
+ const int v1 = x + y * gridsize;
+ const int v2 = v1 + 1;
+ const int v3 = v1 + gridsize;
+ const int v4 = v3 + 1;
if (smooth_mask) {
float tmp;
- tmprow_mask[x + 1] = (*CCG_elem_offset_mask(&key, data, v2) +
- *CCG_elem_offset_mask(&key, data, v4));
- tmp = tmprow_mask[x + 1] + tmprow_mask[x];
+ tmprow_mask[(x + 1) % 2] = (*CCG_elem_offset_mask(&key, gddata, v2) +
+ *CCG_elem_offset_mask(&key, gddata, v4));
+ tmp = tmprow_mask[(x + 1) % 2] + tmprow_mask[x % 2];
tmpgrid_mask[v1] += tmp;
tmpgrid_mask[v2] += tmp;
@@ -1687,10 +1724,10 @@ static void do_multires_smooth_brush(Sculpt *sd, SculptSession *ss, PBVHNode *no
else {
float tmp[3];
- add_v3_v3v3(tmprow_co[x + 1],
- CCG_elem_offset_co(&key, data, v2),
- CCG_elem_offset_co(&key, data, v4));
- add_v3_v3v3(tmp, tmprow_co[x + 1], tmprow_co[x]);
+ add_v3_v3v3(tmprow_co[(x + 1) % 2],
+ CCG_elem_offset_co(&key, gddata, v2),
+ CCG_elem_offset_co(&key, gddata, v4));
+ add_v3_v3v3(tmp, tmprow_co[(x + 1) % 2], tmprow_co[x % 2]);
add_v3_v3(tmpgrid_co[v1], tmp);
add_v3_v3(tmpgrid_co[v2], tmp);
@@ -1701,49 +1738,43 @@ static void do_multires_smooth_brush(Sculpt *sd, SculptSession *ss, PBVHNode *no
}
/* blend with existing coordinates */
- for (y = 0; y < gridsize; ++y) {
- for (x = 0; x < gridsize; ++x) {
+ for (y = 0; y < gridsize; y++) {
+ for (x = 0; x < gridsize; x++) {
float *co;
const float *fno;
float *mask;
- int index;
+ const int index = y * gridsize + x;
if (gh) {
- if (BLI_BITMAP_TEST(gh, y * gridsize + x))
+ if (BLI_BITMAP_TEST(gh, index))
continue;
}
- index = x + y * gridsize;
- co = CCG_elem_offset_co(&key, data, index);
- fno = CCG_elem_offset_no(&key, data, index);
- mask =
@@ Diff output truncated at 10240 characters. @@
More information about the Bf-blender-cvs
mailing list