[Bf-blender-cvs] [4ba06ad0a8c] master: Edit Mesh: multi-thread auto-smooth & custom normal calculations
Campbell Barton
noreply at git.blender.org
Fri Jul 23 04:58:35 CEST 2021
Commit: 4ba06ad0a8cdec66d9a9cb06f982736d46c40f4c
Author: Campbell Barton
Date: Wed Jul 14 13:22:58 2021 +1000
Branches: master
https://developer.blender.org/rB4ba06ad0a8cdec66d9a9cb06f982736d46c40f4c
Edit Mesh: multi-thread auto-smooth & custom normal calculations
Supported multi-threading for bm_mesh_loops_calc_normals.
This is done by operating on vertex-loops instead of face-loops.
Single threaded operation still loops over faces since iterating
over vertices adds some overhead in the case of custom-normals
as the order used for accessing loops must be the same as iterating
of a faces loops.
>From isolated timing tests of bm_mesh_loops_calc_normals on high
poly models, this gives between 3.5x to 10x speedup,
with larger gains for meshes with custom-normals.
NOTE: this is part one of two patches for multi-threaded auto-smooth,
tagging edges as sharp is still single threaded.
Reviewed By: mont29
Ref D11928
===================================================================
M source/blender/blenkernel/BKE_mesh.h
M source/blender/blenkernel/intern/mesh_normals.cc
M source/blender/bmesh/intern/bmesh_mesh_normals.c
===================================================================
diff --git a/source/blender/blenkernel/BKE_mesh.h b/source/blender/blenkernel/BKE_mesh.h
index 7846619577e..e3be9cd8ef8 100644
--- a/source/blender/blenkernel/BKE_mesh.h
+++ b/source/blender/blenkernel/BKE_mesh.h
@@ -399,6 +399,12 @@ void BKE_lnor_spacearr_init(MLoopNorSpaceArray *lnors_spacearr,
const char data_type);
void BKE_lnor_spacearr_clear(MLoopNorSpaceArray *lnors_spacearr);
void BKE_lnor_spacearr_free(MLoopNorSpaceArray *lnors_spacearr);
+
+void BKE_lnor_spacearr_tls_init(MLoopNorSpaceArray *lnors_spacearr,
+ MLoopNorSpaceArray *lnors_spacearr_tls);
+void BKE_lnor_spacearr_tls_join(MLoopNorSpaceArray *lnors_spacearr,
+ MLoopNorSpaceArray *lnors_spacearr_tls);
+
MLoopNorSpace *BKE_lnor_space_create(MLoopNorSpaceArray *lnors_spacearr);
void BKE_lnor_space_define(MLoopNorSpace *lnor_space,
const float lnor[3],
diff --git a/source/blender/blenkernel/intern/mesh_normals.cc b/source/blender/blenkernel/intern/mesh_normals.cc
index 2fe132fc684..f496d6eada1 100644
--- a/source/blender/blenkernel/intern/mesh_normals.cc
+++ b/source/blender/blenkernel/intern/mesh_normals.cc
@@ -530,6 +530,36 @@ void BKE_lnor_spacearr_init(MLoopNorSpaceArray *lnors_spacearr,
lnors_spacearr->data_type = data_type;
}
+/**
+ * Utility for multi-threaded calculation that ensures
+ * `lnors_spacearr_tls` doesn't share memory with `lnors_spacearr`
+ * that would cause it not to be thread safe.
+ *
+ * \note This works as long as threads never operate on the same loops at once.
+ */
+void BKE_lnor_spacearr_tls_init(MLoopNorSpaceArray *lnors_spacearr,
+ MLoopNorSpaceArray *lnors_spacearr_tls)
+{
+ *lnors_spacearr_tls = *lnors_spacearr;
+ lnors_spacearr_tls->mem = BLI_memarena_new(BLI_MEMARENA_STD_BUFSIZE, __func__);
+}
+
+/**
+ * Utility for multi-threaded calculation
+ * that merges `lnors_spacearr_tls` into `lnors_spacearr`.
+ */
+void BKE_lnor_spacearr_tls_join(MLoopNorSpaceArray *lnors_spacearr,
+ MLoopNorSpaceArray *lnors_spacearr_tls)
+{
+ BLI_assert(lnors_spacearr->data_type == lnors_spacearr_tls->data_type);
+ BLI_assert(lnors_spacearr->mem != lnors_spacearr_tls->mem);
+ lnors_spacearr->num_spaces += lnors_spacearr_tls->num_spaces;
+ BLI_memarena_merge(lnors_spacearr->mem, lnors_spacearr_tls->mem);
+ BLI_memarena_free(lnors_spacearr_tls->mem);
+ lnors_spacearr_tls->mem = nullptr;
+ BKE_lnor_spacearr_clear(lnors_spacearr_tls);
+}
+
void BKE_lnor_spacearr_clear(MLoopNorSpaceArray *lnors_spacearr)
{
lnors_spacearr->num_spaces = 0;
diff --git a/source/blender/bmesh/intern/bmesh_mesh_normals.c b/source/blender/bmesh/intern/bmesh_mesh_normals.c
index dea6561fe9a..b4d8053bc28 100644
--- a/source/blender/bmesh/intern/bmesh_mesh_normals.c
+++ b/source/blender/bmesh/intern/bmesh_mesh_normals.c
@@ -525,6 +525,494 @@ bool BM_loop_check_cyclic_smooth_fan(BMLoop *l_curr)
}
}
+/**
+ * Called for all faces loops.
+ *
+ * - All loops must have #BM_ELEM_TAG cleared.
+ * - Loop indices must be valid.
+ *
+ * \note When custom normals are present, the order of loops can be important.
+ * Loops with lower indices must be passed before loops with higher indices (for each vertex).
+ * This is needed since the first loop sets the reference point for the custom normal offsets.
+ *
+ * \return The number of loops that were handled (for early exit when all have been handled).
+ */
+static int bm_mesh_loops_calc_normals_for_loop(BMesh *bm,
+ const float (*vcos)[3],
+ const float (*fnos)[3],
+ const short (*clnors_data)[2],
+ const int cd_loop_clnors_offset,
+ const bool has_clnors,
+ /* Cache. */
+ BLI_Stack *edge_vectors,
+ /* Iterate. */
+ BMLoop *l_curr,
+ /* Result. */
+ float (*r_lnos)[3],
+ MLoopNorSpaceArray *r_lnors_spacearr)
+{
+ BLI_assert((bm->elem_index_dirty & (BM_FACE | BM_LOOP)) == 0);
+ BLI_assert((vcos == NULL) || ((bm->elem_index_dirty & BM_VERT) == 0));
+ UNUSED_VARS_NDEBUG(bm);
+
+ int handled = 0;
+
+ /* Temp normal stack. */
+ BLI_SMALLSTACK_DECLARE(normal, float *);
+ /* Temp clnors stack. */
+ BLI_SMALLSTACK_DECLARE(clnors, short *);
+ /* Temp edge vectors stack, only used when computing lnor spacearr. */
+
+ /* A smooth edge, we have to check for cyclic smooth fan case.
+ * If we find a new, never-processed cyclic smooth fan, we can do it now using that loop/edge
+ * as 'entry point', otherwise we can skip it. */
+
+ /* NOTE: In theory, we could make bm_mesh_loop_check_cyclic_smooth_fan() store
+ * mlfan_pivot's in a stack, to avoid having to fan again around
+ * the vert during actual computation of clnor & clnorspace. However, this would complicate
+ * the code, add more memory usage, and
+ * BM_vert_step_fan_loop() is quite cheap in term of CPU cycles,
+ * so really think it's not worth it. */
+ if (BM_elem_flag_test(l_curr->e, BM_ELEM_TAG) &&
+ (BM_elem_flag_test(l_curr, BM_ELEM_TAG) || !BM_loop_check_cyclic_smooth_fan(l_curr))) {
+ }
+ else if (!BM_elem_flag_test(l_curr->e, BM_ELEM_TAG) &&
+ !BM_elem_flag_test(l_curr->prev->e, BM_ELEM_TAG)) {
+ /* Simple case (both edges around that vertex are sharp in related polygon),
+ * this vertex just takes its poly normal.
+ */
+ const int l_curr_index = BM_elem_index_get(l_curr);
+ const float *no = fnos ? fnos[BM_elem_index_get(l_curr->f)] : l_curr->f->no;
+ copy_v3_v3(r_lnos[l_curr_index], no);
+
+ /* If needed, generate this (simple!) lnor space. */
+ if (r_lnors_spacearr) {
+ float vec_curr[3], vec_prev[3];
+ MLoopNorSpace *lnor_space = BKE_lnor_space_create(r_lnors_spacearr);
+
+ {
+ const BMVert *v_pivot = l_curr->v;
+ const float *co_pivot = vcos ? vcos[BM_elem_index_get(v_pivot)] : v_pivot->co;
+ const BMVert *v_1 = l_curr->next->v;
+ const float *co_1 = vcos ? vcos[BM_elem_index_get(v_1)] : v_1->co;
+ const BMVert *v_2 = l_curr->prev->v;
+ const float *co_2 = vcos ? vcos[BM_elem_index_get(v_2)] : v_2->co;
+
+ BLI_assert(v_1 == BM_edge_other_vert(l_curr->e, v_pivot));
+ BLI_assert(v_2 == BM_edge_other_vert(l_curr->prev->e, v_pivot));
+
+ sub_v3_v3v3(vec_curr, co_1, co_pivot);
+ normalize_v3(vec_curr);
+ sub_v3_v3v3(vec_prev, co_2, co_pivot);
+ normalize_v3(vec_prev);
+ }
+
+ BKE_lnor_space_define(lnor_space, r_lnos[l_curr_index], vec_curr, vec_prev, NULL);
+ /* We know there is only one loop in this space,
+ * no need to create a linklist in this case... */
+ BKE_lnor_space_add_loop(r_lnors_spacearr, lnor_space, l_curr_index, l_curr, true);
+
+ if (has_clnors) {
+ const short(*clnor)[2] = clnors_data ? &clnors_data[l_curr_index] :
+ (const void *)BM_ELEM_CD_GET_VOID_P(
+ l_curr, cd_loop_clnors_offset);
+ BKE_lnor_space_custom_data_to_normal(lnor_space, *clnor, r_lnos[l_curr_index]);
+ }
+ }
+ handled = 1;
+ }
+ /* We *do not need* to check/tag loops as already computed!
+ * Due to the fact a loop only links to one of its two edges,
+ * a same fan *will never be walked more than once!*
+ * Since we consider edges having neighbor faces with inverted (flipped) normals as sharp,
+ * we are sure that no fan will be skipped, even only considering the case
+ * (sharp curr_edge, smooth prev_edge), and not the alternative
+ * (smooth curr_edge, sharp prev_edge).
+ * All this due/thanks to link between normals and loop ordering.
+ */
+ else {
+ /* We have to fan around current vertex, until we find the other non-smooth edge,
+ * and accumulate face normals into the vertex!
+ * Note in case this vertex has only one sharp edge,
+ * this is a waste because the normal is the same as the vertex normal,
+ * but I do not see any easy way to detect that (would need to count number of sharp edges
+ * per vertex, I doubt the additional memory usage would be worth it, especially as it
+ * should not be a common case in real-life meshes anyway).
+ */
+ BMVert *v_pivot = l_curr->v;
+ BMEdge *e_next;
+ const BMEdge *e_org = l_curr->e;
+ BMLoop *lfan_pivot, *lfan_pivot_next;
+ int lfan_pivot_index;
+ float lnor[3] = {0.0f, 0.0f, 0.0f};
+ float vec_curr[3], vec_next[3], vec_org[3];
+
+ /* We validate clnors data on the fly - cheapest way to do! */
+ int clnors_avg[2] = {0, 0};
+ const short(*clnor_ref)[2] = NULL;
+ int clnors_nbr = 0;
+ bool clnors_invalid = false;
+
+ const float *co_pivot = vcos ? vcos[BM_elem_index_get(v_pivot)] : v_pivot->co;
+
+ MLoopNorSpace *lnor_space = r_lnors_spacearr ? BKE_lnor_space_create(r_lnors_spacearr) : NULL;
+
+ BLI_assert((edge_vectors == NULL) || BLI_stack_is_empty(edge_vectors));
+
+ lfan_pivot = l_curr;
+ lfan_pivot_index = BM_elem_index_get(lfan_pivot);
+ e_next = lfan_pivot->e; /* Current edge here, actually! */
+
+ /* Only need to compute previous edge's vector once,
+ * then we can just reuse old current one! */
+ {
+ const BMVert *v_2 = lfan_pivot->next->v;
+ const float *co_2 = vcos ? vcos[BM_elem_index_get(v_2)] : v_2->co;
+
+ BLI_assert(v_2 == BM_edge_other_vert(e_next, v_pivot));
+
+ sub_v3_v3v3(vec_org, co_2, co_pivot);
+ normalize_v3(vec_org);
+ copy_v3_v3(vec_curr, vec_org);
+
+ if (r_lnors_spacearr) {
+ BLI_stack_push(edge_vectors, vec_org);
+ }
+ }
+
+ while (true) {
+ /* Much simpler than in sibling code with basic Mesh data! */
+ lfan_pivot_next =
@@ Diff output truncated at 10240 characters. @@
More information about the Bf-blender-cvs
mailing list