[Bf-blender-cvs] [4909e599e8a] newboolean: Some small speedups from parallelizing more parts.
Howard Trickey
noreply at git.blender.org
Mon Aug 24 13:32:39 CEST 2020
Commit: 4909e599e8a84b10f93cd1111dcd118de6ff069e
Author: Howard Trickey
Date: Sun Aug 23 10:12:55 2020 -0400
Branches: newboolean
https://developer.blender.org/rB4909e599e8a84b10f93cd1111dcd118de6ff069e
Some small speedups from parallelizing more parts.
Parallelized bounding box finding and degenerate triangle testing.
===================================================================
M source/blender/blenlib/intern/mesh_intersect.cc
===================================================================
diff --git a/source/blender/blenlib/intern/mesh_intersect.cc b/source/blender/blenlib/intern/mesh_intersect.cc
index 28cd9c216e6..6b0f8df7e62 100644
--- a/source/blender/blenlib/intern/mesh_intersect.cc
+++ b/source/blender/blenlib/intern/mesh_intersect.cc
@@ -39,6 +39,8 @@
# include "BLI_vector.hh"
# include "BLI_vector_set.hh"
+# include "PIL_time.h"
+
# include "BLI_mesh_intersect.hh"
// # define PERFDEBUG
@@ -763,36 +765,90 @@ static bool bbs_might_intersect(const BoundingBox &bb_a, const BoundingBox &bb_b
return isect_aabb_aabb_v3(bb_a.min, bb_a.max, bb_b.min, bb_b.max);
}
+/* Data and functions to calculate bounding boxes and pad them, in parallel.
+ * The bounding box calculation has the additional task of calculating the maximum
+ * absolute value of any coordinate in the mesh, which will be used to calculate
+ * the pad value.
+ */
+struct BBChunkData {
+ double max_abs_val = 0.0;
+};
+
+struct BBCalcData {
+ const IMesh &im;
+ Array<BoundingBox> *face_bounding_box;
+
+ BBCalcData(const IMesh &im, Array<BoundingBox> *fbb) : im(im), face_bounding_box(fbb){};
+};
+
+static void calc_face_bb_range_func(void *__restrict userdata,
+ const int iter,
+ const TaskParallelTLS *__restrict tls)
+{
+ BBCalcData *bbdata = static_cast<BBCalcData *>(userdata);
+ double max_abs = 0.0;
+ const Face &face = *bbdata->im.face(iter);
+ BoundingBox &bb = (*bbdata->face_bounding_box)[iter];
+ for (const Vert *v : face) {
+ bb.combine(v->co);
+ for (int i = 0; i < 3; ++i) {
+ max_abs = max_dd(max_abs, fabs(v->co[i]));
+ }
+ }
+ BBChunkData *chunk = static_cast<BBChunkData *>(tls->userdata_chunk);
+ chunk->max_abs_val = max_dd(max_abs, chunk->max_abs_val);
+}
+
+struct BBPadData {
+ Array<BoundingBox> *face_bounding_box;
+ double pad;
+
+ BBPadData(Array<BoundingBox> *fbb, double pad) : face_bounding_box(fbb), pad(pad){};
+};
+
+static void pad_face_bb_range_func(void *__restrict userdata,
+ const int iter,
+ const TaskParallelTLS *__restrict UNUSED(tls))
+{
+ BBPadData *pad_data = static_cast<BBPadData *>(userdata);
+ (*pad_data->face_bounding_box)[iter].expand(pad_data->pad);
+}
+
+static void calc_face_bb_reduce(const void *__restrict UNUSED(userdata),
+ void *__restrict chunk_join,
+ void *__restrict chunk)
+{
+ BBChunkData *bbchunk_join = static_cast<BBChunkData *>(chunk_join);
+ BBChunkData *bbchunk = static_cast<BBChunkData *>(chunk);
+ bbchunk_join->max_abs_val = max_dd(bbchunk_join->max_abs_val, bbchunk->max_abs_val);
+}
+
/* We will expand the bounding boxes by an epsilon on all sides so that
* the "less than" tests in isect_aabb_aabb_v3 are sufficient to detect
* touching or overlap.
*/
static Array<BoundingBox> calc_face_bounding_boxes(const IMesh &m)
{
- double max_abs_val = 0.0;
- Array<BoundingBox> ans(m.face_size());
- for (int f : m.face_index_range()) {
- const Face &face = *m.face(f);
- BoundingBox &bb = ans[f];
- for (const Vert *v : face) {
- bb.combine(v->co);
- for (int i = 0; i < 3; ++i) {
- max_abs_val = max_dd(max_abs_val, fabs(v->co[i]));
- }
- }
- }
- float pad;
+ int n = m.face_size();
+ Array<BoundingBox> ans(n);
+ TaskParallelSettings settings;
+ BBCalcData data(m, &ans);
+ BBChunkData chunk_data;
+ BLI_parallel_range_settings_defaults(&settings);
+ settings.userdata_chunk = &chunk_data;
+ settings.userdata_chunk_size = sizeof(chunk_data);
+ settings.func_reduce = calc_face_bb_reduce;
+ settings.min_iter_per_thread = 1000;
+ BLI_task_parallel_range(0, n, &data, calc_face_bb_range_func, &settings);
+ double max_abs_val = chunk_data.max_abs_val;
constexpr float pad_factor = 10.0f;
- if (max_abs_val == 0.0f) {
- pad = FLT_EPSILON;
- }
- else {
- pad = 2 * FLT_EPSILON * max_abs_val;
- }
+ float pad = max_abs_val == 0.0f ? FLT_EPSILON : 2 * FLT_EPSILON * max_abs_val;
pad *= pad_factor; /* For extra safety. */
- for (int f : m.face_index_range()) {
- ans[f].expand(pad);
- }
+ TaskParallelSettings pad_settings;
+ BLI_parallel_range_settings_defaults(&pad_settings);
+ settings.min_iter_per_thread = 1000;
+ BBPadData pad_data(&ans, pad);
+ BLI_task_parallel_range(0, n, &pad_data, pad_face_bb_range_func, &pad_settings);
return ans;
}
@@ -2821,15 +2877,48 @@ static bool face_is_degenerate(const Face *f)
return false;
}
+/* Data and functions to test triangle degeneracy in parallel. */
+struct DegenData {
+ const IMesh &tm;
+};
+
+struct DegenChunkData {
+ bool has_degenerate_tri = false;
+};
+
+static void degenerate_range_func(void *__restrict userdata,
+ const int iter,
+ const TaskParallelTLS *__restrict tls)
+{
+ DegenData *data = static_cast<DegenData *>(userdata);
+ DegenChunkData *chunk_data = static_cast<DegenChunkData *>(tls->userdata_chunk);
+ const Face *f = data->tm.face(iter);
+ bool is_degenerate = face_is_degenerate(f);
+ chunk_data->has_degenerate_tri |= is_degenerate;
+}
+
+static void degenerate_reduce(const void *__restrict UNUSED(userdata),
+ void *__restrict chunk_join,
+ void *__restrict chunk)
+{
+ DegenChunkData *degen_chunk_join = static_cast<DegenChunkData *>(chunk_join);
+ DegenChunkData *degen_chunk = static_cast<DegenChunkData *>(chunk);
+ degen_chunk_join->has_degenerate_tri |= degen_chunk->has_degenerate_tri;
+}
+
/* Does triangle IMesh tm have any triangles with zero area? */
static bool has_degenerate_tris(const IMesh &tm)
{
- for (const Face *f : tm.faces()) {
- if (face_is_degenerate(f)) {
- return true;
- }
- }
- return false;
+ DegenData degen_data = {tm};
+ DegenChunkData degen_chunk_data;
+ TaskParallelSettings settings;
+ BLI_parallel_range_settings_defaults(&settings);
+ settings.userdata_chunk = °en_chunk_data;
+ settings.userdata_chunk_size = sizeof(degen_chunk_data);
+ settings.func_reduce = degenerate_reduce;
+ settings.min_iter_per_thread = 1000;
+ BLI_task_parallel_range(0, tm.face_size(), °en_data, degenerate_range_func, &settings);
+ return degen_chunk_data.has_degenerate_tri;
}
static IMesh remove_degenerate_tris(const IMesh &tm_in)
@@ -2874,6 +2963,11 @@ IMesh trimesh_nary_intersect(const IMesh &tm_in,
}
}
}
+# ifdef PERFDEBUG
+ perfdata_init();
+ double start_time = PIL_check_seconds_timer();
+ std::cout << "trimesh_nary_intersect start\n";
+# endif
/* Usually can use tm_in but if it has degenerate or illegal triangles,
* then need to work on a copy of it without those triangles.
*/
@@ -2889,14 +2983,28 @@ IMesh trimesh_nary_intersect(const IMesh &tm_in,
std::cout << "cleaned input mesh:\n" << tm_cleaned;
}
}
+# ifdef PERFDEBUG
+ double clean_time = PIL_check_seconds_timer();
+ std::cout << "cleaned, time = " << clean_time - start_time << "\n";
+# endif
Array<BoundingBox> tri_bb = calc_face_bounding_boxes(*tm_clean);
+# ifdef PERFDEBUG
+ double bb_calc_time = PIL_check_seconds_timer();
+ std::cout << "bbs calculated, time = " << bb_calc_time - clean_time << "\n";
+# endif
TriOverlaps tri_ov(*tm_clean, tri_bb, nshapes, shape_fn, use_self);
+# ifdef PERFDEBUG
+ double overlap_time = PIL_check_seconds_timer();
+ std::cout << "intersect overlaps calculated, time = " << overlap_time - bb_calc_time << "\n";
+
+# endif
CoplanarClusterInfo clinfo = find_clusters(*tm_clean, tri_bb);
if (dbg_level > 1) {
std::cout << clinfo;
}
# ifdef PERFDEBUG
- perfdata_init();
+ double find_cluster_time = PIL_check_seconds_timer();
+ std::cout << "clusters found, time = " << find_cluster_time - overlap_time << "\n";
doperfmax(0, tm_in.face_size());
doperfmax(1, clinfo.tot_cluster());
doperfmax(2, tri_ov.overlap().size());
@@ -2907,12 +3015,25 @@ IMesh trimesh_nary_intersect(const IMesh &tm_in,
Map<std::pair<int, int>, ITT_value> itt_map;
itt_map.reserve(tri_ov.overlap().size());
calc_overlap_itts(itt_map, *tm_clean, clinfo, tri_ov, arena);
+# ifdef PERFDEBUG
+ double itt_time = PIL_check_seconds_timer();
+ std::cout << "itts found, time = " << itt_time - find_cluster_time << "\n";
+# endif
Array<IMesh> tri_subdivided(tm_clean->face_size());
calc_subdivided_tris(tri_subdivided, *tm_clean, itt_map, clinfo, tri_ov, arena);
+# ifdef PERFDEBUG
+ double subdivided_tris_time = PIL_check_seconds_timer();
+ std::cout << "subdivided tris found, time = " << subdivided_tris_time - itt_time << "\n";
+# endif
Array<CDT_data> cluster_subdivided(clinfo.tot_cluster());
for (int c : clinfo.index_range()) {
cluster_subdivided[c] = calc_cluster_subdivided(clinfo, c, *tm_clean, tri_ov, itt_map, arena);
}
+# ifdef PERFDEBUG
+ double cluster_subdivide_time = PIL_check_seconds_timer();
+ std::cout << "subdivided clusters found, time = "
+ << cluster_subdivide_time - subdivided_tris_time << "\n";
+# endif
for (int t : tm_clean->face_index_range()) {
int c = clinfo.tri_cluster(t);
if (c != NO_INDEX) {
@@ -2923,12 +3044,19 @@ IMesh trimesh_nary_intersect(const IMesh &tm_in,
tri_subdivided[t] = extract_single_tri(*tm_clean, t);
}
}
+# ifdef PERFDEBUG
+ double extract_time = PIL_check_seconds_timer();
+ std::cout << "triangles extracted, time = " << extract_time - cluster_subdivide_time << "\n";
+# endif
IMesh combined = union_tri_subdivides(tri_subdivided);
if (dbg_level > 1) {
std::cout << "TRIMESH_NARY_INTERSECT answer:\n";
std::cout << combined;
}
# ifdef PERFDEBUG
+ double end_time = PIL_check_seconds_timer();
+ std::cout << "triangles combined, time = " << end_time - extract_time << "\n";
+ std::cout << "trimesh_nary_intersect done, total time = " << end_time - start_time << "\n";
dump_perfdata();
# endif
return combined;
@@ -3031,67 +3159,72 @@ struct PerfCounts {
Vector<const char *> count_name;
Vector<int> max;
Vector<const char *> max_name;
-} perfdata;
+};
+
+static PerfCounts *perfdata = nullptr;
stat
@@ Diff output truncated at 10240 characters. @@
More information about the Bf-blender-cvs
mailing list