[Bf-blender-cvs] [4ba06ad0a8c] master: Edit Mesh: multi-thread auto-smooth & custom normal calculations

Fri Jul 23 04:58:35 CEST 2021

Commit: 4ba06ad0a8cdec66d9a9cb06f982736d46c40f4c
Author: Campbell Barton
Date:   Wed Jul 14 13:22:58 2021 +1000
Branches: master
https://developer.blender.org/rB4ba06ad0a8cdec66d9a9cb06f982736d46c40f4c

Edit Mesh: multi-thread auto-smooth & custom normal calculations

Supported multi-threading for bm_mesh_loops_calc_normals.

This is done by operating on vertex-loops instead of face-loops.

Single threaded operation still loops over faces since iterating
over vertices adds some overhead in the case of custom-normals
as the order used for accessing loops must be the same as iterating
of a faces loops.

>From isolated timing tests of bm_mesh_loops_calc_normals on high
poly models, this gives between 3.5x to 10x speedup,
with larger gains for meshes with custom-normals.

NOTE: this is part one of two patches for multi-threaded auto-smooth,
tagging edges as sharp is still single threaded.

Reviewed By: mont29

Ref D11928

===================================================================

M	source/blender/blenkernel/BKE_mesh.h
M	source/blender/blenkernel/intern/mesh_normals.cc
M	source/blender/bmesh/intern/bmesh_mesh_normals.c

===================================================================

diff --git a/source/blender/blenkernel/BKE_mesh.h b/source/blender/blenkernel/BKE_mesh.h
index 7846619577e..e3be9cd8ef8 100644
--- a/source/blender/blenkernel/BKE_mesh.h
+++ b/source/blender/blenkernel/BKE_mesh.h
@@ -399,6 +399,12 @@ void BKE_lnor_spacearr_init(MLoopNorSpaceArray *lnors_spacearr,
                             const char data_type);
 void BKE_lnor_spacearr_clear(MLoopNorSpaceArray *lnors_spacearr);
 void BKE_lnor_spacearr_free(MLoopNorSpaceArray *lnors_spacearr);
+
+void BKE_lnor_spacearr_tls_init(MLoopNorSpaceArray *lnors_spacearr,
+                                MLoopNorSpaceArray *lnors_spacearr_tls);
+void BKE_lnor_spacearr_tls_join(MLoopNorSpaceArray *lnors_spacearr,
+                                MLoopNorSpaceArray *lnors_spacearr_tls);
+
 MLoopNorSpace *BKE_lnor_space_create(MLoopNorSpaceArray *lnors_spacearr);
 void BKE_lnor_space_define(MLoopNorSpace *lnor_space,
                            const float lnor[3],
diff --git a/source/blender/blenkernel/intern/mesh_normals.cc b/source/blender/blenkernel/intern/mesh_normals.cc
index 2fe132fc684..f496d6eada1 100644
--- a/source/blender/blenkernel/intern/mesh_normals.cc
+++ b/source/blender/blenkernel/intern/mesh_normals.cc
@@ -530,6 +530,36 @@ void BKE_lnor_spacearr_init(MLoopNorSpaceArray *lnors_spacearr,
   lnors_spacearr->data_type = data_type;
 }
 
+/**
+ * Utility for multi-threaded calculation that ensures
+ * `lnors_spacearr_tls` doesn't share memory with `lnors_spacearr`
+ * that would cause it not to be thread safe.
+ *
+ * \note This works as long as threads never operate on the same loops at once.
+ */
+void BKE_lnor_spacearr_tls_init(MLoopNorSpaceArray *lnors_spacearr,
+                                MLoopNorSpaceArray *lnors_spacearr_tls)
+{
+  *lnors_spacearr_tls = *lnors_spacearr;
+  lnors_spacearr_tls->mem = BLI_memarena_new(BLI_MEMARENA_STD_BUFSIZE, __func__);
+}
+
+/**
+ * Utility for multi-threaded calculation
+ * that merges `lnors_spacearr_tls` into `lnors_spacearr`.
+ */
+void BKE_lnor_spacearr_tls_join(MLoopNorSpaceArray *lnors_spacearr,
+                                MLoopNorSpaceArray *lnors_spacearr_tls)
+{
+  BLI_assert(lnors_spacearr->data_type == lnors_spacearr_tls->data_type);
+  BLI_assert(lnors_spacearr->mem != lnors_spacearr_tls->mem);
+  lnors_spacearr->num_spaces += lnors_spacearr_tls->num_spaces;
+  BLI_memarena_merge(lnors_spacearr->mem, lnors_spacearr_tls->mem);
+  BLI_memarena_free(lnors_spacearr_tls->mem);
+  lnors_spacearr_tls->mem = nullptr;
+  BKE_lnor_spacearr_clear(lnors_spacearr_tls);
+}
+
 void BKE_lnor_spacearr_clear(MLoopNorSpaceArray *lnors_spacearr)
 {
   lnors_spacearr->num_spaces = 0;
diff --git a/source/blender/bmesh/intern/bmesh_mesh_normals.c b/source/blender/bmesh/intern/bmesh_mesh_normals.c
index dea6561fe9a..b4d8053bc28 100644
--- a/source/blender/bmesh/intern/bmesh_mesh_normals.c
+++ b/source/blender/bmesh/intern/bmesh_mesh_normals.c
@@ -525,6 +525,494 @@ bool BM_loop_check_cyclic_smooth_fan(BMLoop *l_curr)
   }
 }
 
+/**
+ * Called for all faces loops.
+ *
+ * - All loops must have #BM_ELEM_TAG cleared.
+ * - Loop indices must be valid.
+ *
+ * \note When custom normals are present, the order of loops can be important.
+ * Loops with lower indices must be passed before loops with higher indices (for each vertex).
+ * This is needed since the first loop sets the reference point for the custom normal offsets.
+ *
+ * \return The number of loops that were handled (for early exit when all have been handled).
+ */
+static int bm_mesh_loops_calc_normals_for_loop(BMesh *bm,
+                                               const float (*vcos)[3],
+                                               const float (*fnos)[3],
+                                               const short (*clnors_data)[2],
+                                               const int cd_loop_clnors_offset,
+                                               const bool has_clnors,
+                                               /* Cache. */
+                                               BLI_Stack *edge_vectors,
+                                               /* Iterate. */
+                                               BMLoop *l_curr,
+                                               /* Result. */
+                                               float (*r_lnos)[3],
+                                               MLoopNorSpaceArray *r_lnors_spacearr)
+{
+  BLI_assert((bm->elem_index_dirty & (BM_FACE | BM_LOOP)) == 0);
+  BLI_assert((vcos == NULL) || ((bm->elem_index_dirty & BM_VERT) == 0));
+  UNUSED_VARS_NDEBUG(bm);
+
+  int handled = 0;
+
+  /* Temp normal stack. */
+  BLI_SMALLSTACK_DECLARE(normal, float *);
+  /* Temp clnors stack. */
+  BLI_SMALLSTACK_DECLARE(clnors, short *);
+  /* Temp edge vectors stack, only used when computing lnor spacearr. */
+
+  /* A smooth edge, we have to check for cyclic smooth fan case.
+   * If we find a new, never-processed cyclic smooth fan, we can do it now using that loop/edge
+   * as 'entry point', otherwise we can skip it. */
+
+  /* NOTE: In theory, we could make bm_mesh_loop_check_cyclic_smooth_fan() store
+   * mlfan_pivot's in a stack, to avoid having to fan again around
+   * the vert during actual computation of clnor & clnorspace. However, this would complicate
+   * the code, add more memory usage, and
+   * BM_vert_step_fan_loop() is quite cheap in term of CPU cycles,
+   * so really think it's not worth it. */
+  if (BM_elem_flag_test(l_curr->e, BM_ELEM_TAG) &&
+      (BM_elem_flag_test(l_curr, BM_ELEM_TAG) || !BM_loop_check_cyclic_smooth_fan(l_curr))) {
+  }
+  else if (!BM_elem_flag_test(l_curr->e, BM_ELEM_TAG) &&
+           !BM_elem_flag_test(l_curr->prev->e, BM_ELEM_TAG)) {
+    /* Simple case (both edges around that vertex are sharp in related polygon),
+     * this vertex just takes its poly normal.
+     */
+    const int l_curr_index = BM_elem_index_get(l_curr);
+    const float *no = fnos ? fnos[BM_elem_index_get(l_curr->f)] : l_curr->f->no;
+    copy_v3_v3(r_lnos[l_curr_index], no);
+
+    /* If needed, generate this (simple!) lnor space. */
+    if (r_lnors_spacearr) {
+      float vec_curr[3], vec_prev[3];
+      MLoopNorSpace *lnor_space = BKE_lnor_space_create(r_lnors_spacearr);
+
+      {
+        const BMVert *v_pivot = l_curr->v;
+        const float *co_pivot = vcos ? vcos[BM_elem_index_get(v_pivot)] : v_pivot->co;
+        const BMVert *v_1 = l_curr->next->v;
+        const float *co_1 = vcos ? vcos[BM_elem_index_get(v_1)] : v_1->co;
+        const BMVert *v_2 = l_curr->prev->v;
+        const float *co_2 = vcos ? vcos[BM_elem_index_get(v_2)] : v_2->co;
+
+        BLI_assert(v_1 == BM_edge_other_vert(l_curr->e, v_pivot));
+        BLI_assert(v_2 == BM_edge_other_vert(l_curr->prev->e, v_pivot));
+
+        sub_v3_v3v3(vec_curr, co_1, co_pivot);
+        normalize_v3(vec_curr);
+        sub_v3_v3v3(vec_prev, co_2, co_pivot);
+        normalize_v3(vec_prev);
+      }
+
+      BKE_lnor_space_define(lnor_space, r_lnos[l_curr_index], vec_curr, vec_prev, NULL);
+      /* We know there is only one loop in this space,
+       * no need to create a linklist in this case... */
+      BKE_lnor_space_add_loop(r_lnors_spacearr, lnor_space, l_curr_index, l_curr, true);
+
+      if (has_clnors) {
+        const short(*clnor)[2] = clnors_data ? &clnors_data[l_curr_index] :
+                                               (const void *)BM_ELEM_CD_GET_VOID_P(
+                                                   l_curr, cd_loop_clnors_offset);
+        BKE_lnor_space_custom_data_to_normal(lnor_space, *clnor, r_lnos[l_curr_index]);
+      }
+    }
+    handled = 1;
+  }
+  /* We *do not need* to check/tag loops as already computed!
+   * Due to the fact a loop only links to one of its two edges,
+   * a same fan *will never be walked more than once!*
+   * Since we consider edges having neighbor faces with inverted (flipped) normals as sharp,
+   * we are sure that no fan will be skipped, even only considering the case
+   * (sharp curr_edge, smooth prev_edge), and not the alternative
+   * (smooth curr_edge, sharp prev_edge).
+   * All this due/thanks to link between normals and loop ordering.
+   */
+  else {
+    /* We have to fan around current vertex, until we find the other non-smooth edge,
+     * and accumulate face normals into the vertex!
+     * Note in case this vertex has only one sharp edge,
+     * this is a waste because the normal is the same as the vertex normal,
+     * but I do not see any easy way to detect that (would need to count number of sharp edges
+     * per vertex, I doubt the additional memory usage would be worth it, especially as it
+     * should not be a common case in real-life meshes anyway).
+     */
+    BMVert *v_pivot = l_curr->v;
+    BMEdge *e_next;
+    const BMEdge *e_org = l_curr->e;
+    BMLoop *lfan_pivot, *lfan_pivot_next;
+    int lfan_pivot_index;
+    float lnor[3] = {0.0f, 0.0f, 0.0f};
+    float vec_curr[3], vec_next[3], vec_org[3];
+
+    /* We validate clnors data on the fly - cheapest way to do! */
+    int clnors_avg[2] = {0, 0};
+    const short(*clnor_ref)[2] = NULL;
+    int clnors_nbr = 0;
+    bool clnors_invalid = false;
+
+    const float *co_pivot = vcos ? vcos[BM_elem_index_get(v_pivot)] : v_pivot->co;
+
+    MLoopNorSpace *lnor_space = r_lnors_spacearr ? BKE_lnor_space_create(r_lnors_spacearr) : NULL;
+
+    BLI_assert((edge_vectors == NULL) || BLI_stack_is_empty(edge_vectors));
+
+    lfan_pivot = l_curr;
+    lfan_pivot_index = BM_elem_index_get(lfan_pivot);
+    e_next = lfan_pivot->e; /* Current edge here, actually! */
+
+    /* Only need to compute previous edge's vector once,
+     * then we can just reuse old current one! */
+    {
+      const BMVert *v_2 = lfan_pivot->next->v;
+      const float *co_2 = vcos ? vcos[BM_elem_index_get(v_2)] : v_2->co;
+
+      BLI_assert(v_2 == BM_edge_other_vert(e_next, v_pivot));
+
+      sub_v3_v3v3(vec_org, co_2, co_pivot);
+      normalize_v3(vec_org);
+      copy_v3_v3(vec_curr, vec_org);
+
+      if (r_lnors_spacearr) {
+        BLI_stack_push(edge_vectors, vec_org);
+      }
+    }
+
+    while (true) {
+      /* Much simpler than in sibling code with basic Mesh data! */
+      lfan_pivot_next =

@@ Diff output truncated at 10240 characters. @@