[Bf-blender-cvs] [513f566b40a] master: Mesh: optimize object mode face tessellation

Sun Jun 20 06:41:32 CEST 2021

Commit: 513f566b40a5bde4d89797aecf7c0ad3e4d1a20b
Author: Campbell Barton
Date:   Sun Jun 20 13:21:11 2021 +1000
Branches: master
https://developer.blender.org/rB513f566b40a5bde4d89797aecf7c0ad3e4d1a20b

Mesh: optimize object mode face tessellation

- Multi-thread BKE_mesh_recalc_looptri.

- Add BKE_mesh_recalc_looptri_with_normals,
  this skips having to calculate normals for ngons.

Exact performance depends on number of faces, size of ngons and
available CPU cores.

For high poly meshes the isolated improvement to BKE_mesh_recalc_looptri
in my tests was between 6.7x .. 25.0x, with the largest gains seen in
meshes containing ngons with many sides.

The overall speedup for high poly meshes containing quads and triangles
is only ~20% although ngon heavy meshes can be much faster.

===================================================================

M	source/blender/blenkernel/BKE_mesh.h
M	source/blender/blenkernel/intern/mesh_tessellate.c
M	source/blender/draw/intern/draw_cache_extract_mesh_render_data.c
M	source/blender/render/intern/bake.c

===================================================================

diff --git a/source/blender/blenkernel/BKE_mesh.h b/source/blender/blenkernel/BKE_mesh.h
index 81d3fc683b7..87964307105 100644
--- a/source/blender/blenkernel/BKE_mesh.h
+++ b/source/blender/blenkernel/BKE_mesh.h
@@ -297,6 +297,13 @@ void BKE_mesh_recalc_looptri(const struct MLoop *mloop,
                              int totloop,
                              int totpoly,
                              struct MLoopTri *mlooptri);
+void BKE_mesh_recalc_looptri_with_normals(const struct MLoop *mloop,
+                                          const struct MPoly *mpoly,
+                                          const struct MVert *mvert,
+                                          int totloop,
+                                          int totpoly,
+                                          struct MLoopTri *mlooptri,
+                                          const float (*poly_normals)[3]);
 
 /* *** mesh_evaluate.c *** */
 
diff --git a/source/blender/blenkernel/intern/mesh_tessellate.c b/source/blender/blenkernel/intern/mesh_tessellate.c
index 358f3413104..98a93dbc4b5 100644
--- a/source/blender/blenkernel/intern/mesh_tessellate.c
+++ b/source/blender/blenkernel/intern/mesh_tessellate.c
@@ -36,6 +36,7 @@
 #include "BLI_math.h"
 #include "BLI_memarena.h"
 #include "BLI_polyfill_2d.h"
+#include "BLI_task.h"
 #include "BLI_utildefines.h"
 
 #include "BKE_customdata.h"
@@ -43,6 +44,9 @@
 
 #include "BLI_strict_flags.h"
 
+/** Compared against total loops. */
+#define MESH_FACE_TESSELLATE_THREADED_LIMIT 4096
+
 /* -------------------------------------------------------------------- */
 /** \name MFace Tessellation
  * \{ */
@@ -440,145 +444,307 @@ void BKE_mesh_tessface_calc(Mesh *mesh)
  * \{ */
 
 /**
- * Calculate tessellation into #MLoopTri which exist only for this purpose.
+ * \param face_normal: This will be optimized out as a constant.
  */
-void BKE_mesh_recalc_looptri(const MLoop *mloop,
-                             const MPoly *mpoly,
-                             const MVert *mvert,
-                             int totloop,
-                             int totpoly,
-                             MLoopTri *mlooptri)
+BLI_INLINE void mesh_calc_tessellation_for_face_impl(const MLoop *mloop,
+                                                     const MPoly *mpoly,
+                                                     const MVert *mvert,
+                                                     uint poly_index,
+                                                     MLoopTri *mlt,
+                                                     MemArena **pf_arena_p,
+                                                     const bool face_normal,
+                                                     const float normal_precalc[3])
 {
-  /* use this to avoid locking pthread for _every_ polygon
-   * and calling the fill function */
-
-#define USE_TESSFACE_SPEEDUP
-
-  const MPoly *mp;
-  const MLoop *ml;
-  MLoopTri *mlt;
-  MemArena *arena = NULL;
-  int poly_index, mlooptri_index;
-  uint j;
-
-  mlooptri_index = 0;
-  mp = mpoly;
-  for (poly_index = 0; poly_index < totpoly; poly_index++, mp++) {
-    const uint mp_loopstart = (uint)mp->loopstart;
-    const uint mp_totloop = (uint)mp->totloop;
-    uint l1, l2, l3;
-    if (mp_totloop < 3) {
-      /* do nothing */
-    }
-
-#ifdef USE_TESSFACE_SPEEDUP
-
-#  define ML_TO_MLT(i1, i2, i3) \
-    { \
-      mlt = &mlooptri[mlooptri_index]; \
-      l1 = mp_loopstart + i1; \
-      l2 = mp_loopstart + i2; \
-      l3 = mp_loopstart + i3; \
-      ARRAY_SET_ITEMS(mlt->tri, l1, l2, l3); \
-      mlt->poly = (uint)poly_index; \
-    } \
-    ((void)0)
-
-    else if (mp_totloop == 3) {
+  const uint mp_loopstart = (uint)mpoly[poly_index].loopstart;
+  const uint mp_totloop = (uint)mpoly[poly_index].totloop;
+
+#define ML_TO_MLT(i1, i2, i3) \
+  { \
+    ARRAY_SET_ITEMS(mlt->tri, mp_loopstart + i1, mp_loopstart + i2, mp_loopstart + i3); \
+    mlt->poly = poly_index; \
+  } \
+  ((void)0)
+
+  switch (mp_totloop) {
+    case 3: {
       ML_TO_MLT(0, 1, 2);
-      mlooptri_index++;
+      break;
     }
-    else if (mp_totloop == 4) {
+    case 4: {
       ML_TO_MLT(0, 1, 2);
-      MLoopTri *mlt_a = mlt;
-      mlooptri_index++;
+      MLoopTri *mlt_a = mlt++;
       ML_TO_MLT(0, 2, 3);
       MLoopTri *mlt_b = mlt;
-      mlooptri_index++;
 
       if (UNLIKELY(is_quad_flip_v3_first_third_fast(mvert[mloop[mlt_a->tri[0]].v].co,
                                                     mvert[mloop[mlt_a->tri[1]].v].co,
                                                     mvert[mloop[mlt_a->tri[2]].v].co,
                                                     mvert[mloop[mlt_b->tri[2]].v].co))) {
-        /* flip out of degenerate 0-2 state. */
+        /* Flip out of degenerate 0-2 state. */
         mlt_a->tri[2] = mlt_b->tri[2];
         mlt_b->tri[0] = mlt_a->tri[1];
       }
+      break;
     }
-#endif /* USE_TESSFACE_SPEEDUP */
-    else {
-      const float *co_curr, *co_prev;
-
-      float normal[3];
-
+    default: {
+      const MLoop *ml;
       float axis_mat[3][3];
-      float(*projverts)[2];
-      uint(*tris)[3];
-
-      const uint totfilltri = mp_totloop - 2;
 
-      if (UNLIKELY(arena == NULL)) {
-        arena = BLI_memarena_new(BLI_MEMARENA_STD_BUFSIZE, __func__);
+      /* Calculate `axis_mat` to project verts to 2D. */
+      if (face_normal == false) {
+        float normal[3];
+        const float *co_curr, *co_prev;
+
+        zero_v3(normal);
+
+        /* Calc normal, flipped: to get a positive 2D cross product. */
+        ml = mloop + mp_loopstart;
+        co_prev = mvert[ml[mp_totloop - 1].v].co;
+        for (uint j = 0; j < mp_totloop; j++, ml++) {
+          co_curr = mvert[ml->v].co;
+          add_newell_cross_v3_v3v3(normal, co_prev, co_curr);
+          co_prev = co_curr;
+        }
+        if (UNLIKELY(normalize_v3(normal) == 0.0f)) {
+          normal[2] = 1.0f;
+        }
+        axis_dominant_v3_to_m3_negate(axis_mat, normal);
+      }
+      else {
+        axis_dominant_v3_to_m3_negate(axis_mat, normal_precalc);
       }
 
-      tris = BLI_memarena_alloc(arena, sizeof(*tris) * (size_t)totfilltri);
-      projverts = BLI_memarena_alloc(arena, sizeof(*projverts) * (size_t)mp_totloop);
-
-      zero_v3(normal);
+      const uint totfilltri = mp_totloop - 2;
 
-      /* calc normal, flipped: to get a positive 2d cross product */
-      ml = mloop + mp_loopstart;
-      co_prev = mvert[ml[mp_totloop - 1].v].co;
-      for (j = 0; j < mp_totloop; j++, ml++) {
-        co_curr = mvert[ml->v].co;
-        add_newell_cross_v3_v3v3(normal, co_prev, co_curr);
-        co_prev = co_curr;
-      }
-      if (UNLIKELY(normalize_v3(normal) == 0.0f)) {
-        normal[2] = 1.0f;
+      MemArena *pf_arena = *pf_arena_p;
+      if (UNLIKELY(pf_arena == NULL)) {
+        pf_arena = *pf_arena_p = BLI_memarena_new(BLI_MEMARENA_STD_BUFSIZE, __func__);
       }
 
-      /* project verts to 2d */
-      axis_dominant_v3_to_m3_negate(axis_mat, normal);
+      uint(*tris)[3] = tris = BLI_memarena_alloc(pf_arena, sizeof(*tris) * (size_t)totfilltri);
+      float(*projverts)[2] = projverts = BLI_memarena_alloc(
+          pf_arena, sizeof(*projverts) * (size_t)mp_totloop);
 
       ml = mloop + mp_loopstart;
-      for (j = 0; j < mp_totloop; j++, ml++) {
+      for (uint j = 0; j < mp_totloop; j++, ml++) {
         mul_v2_m3v3(projverts[j], axis_mat, mvert[ml->v].co);
       }
 
-      BLI_polyfill_calc_arena(projverts, mp_totloop, 1, tris, arena);
+      BLI_polyfill_calc_arena(projverts, mp_totloop, 1, tris, pf_arena);
 
-      /* apply fill */
-      for (j = 0; j < totfilltri; j++) {
-        uint *tri = tris[j];
+      /* Apply fill. */
+      for (uint j = 0; j < totfilltri; j++, mlt++) {
+        const uint *tri = tris[j];
+        ML_TO_MLT(tri[0], tri[1], tri[2]);
+      }
 
-        mlt = &mlooptri[mlooptri_index];
+      BLI_memarena_clear(pf_arena);
 
-        /* set loop indices, transformed to vert indices later */
-        l1 = mp_loopstart + tri[0];
-        l2 = mp_loopstart + tri[1];
-        l3 = mp_loopstart + tri[2];
+      break;
+    }
+  }
+#undef ML_TO_MLT
+}
 
-        ARRAY_SET_ITEMS(mlt->tri, l1, l2, l3);
-        mlt->poly = (uint)poly_index;
+static void mesh_calc_tessellation_for_face(const MLoop *mloop,
+                                            const MPoly *mpoly,
+                                            const MVert *mvert,
+                                            uint poly_index,
+                                            MLoopTri *mlt,
+                                            MemArena **pf_arena_p)
+{
+  mesh_calc_tessellation_for_face_impl(
+      mloop, mpoly, mvert, poly_index, mlt, pf_arena_p, false, NULL);
+}
 
-        mlooptri_index++;
-      }
+static void mesh_calc_tessellation_for_face_with_normal(const MLoop *mloop,
+                                                        const MPoly *mpoly,
+                                                        const MVert *mvert,
+                                                        uint poly_index,
+                                                        MLoopTri *mlt,
+                                                        MemArena **pf_arena_p,
+                                                        const float normal_precalc[3])
+{
+  mesh_calc_tessellation_for_face_impl(
+      mloop, mpoly, mvert, poly_index, mlt, pf_arena_p, true, normal_precalc);
+}
 
-      BLI_memarena_clear(arena);
+static void mesh_recalc_looptri__single_threaded(const MLoop *mloop,
+                                                 const MPoly *mpoly,
+                                                 const MVert *mvert,
+                                                 int totloop,
+                                                 int totpoly,
+                                                 MLoopTri *mlooptri,
+                                                 const float (*poly_normals)[3])
+{
+  MemArena *pf_arena = NULL;
+  const MPoly *mp = mpoly;
+  uint tri_index = 0;
+
+  if (poly_normals != NULL) {
+    for (uint poly_index = 0; poly_index < (uint)totpoly; poly_index++, mp++) {
+      mesh_calc_tessellation_for

@@ Diff output truncated at 10240 characters. @@