[Bf-blender-cvs] [6be2c079c12] temp_bmesh_multires: Did some profiling with VTune. * Sculpt code seems to be memory bandwidth bound. * Some key topology loops will have to be written manually instead of using BM_ITER.

Joseph Eagar noreply at git.blender.org
Sat May 15 01:05:42 CEST 2021


Commit: 6be2c079c1281173724ade93a2d9fe89917bc5f3
Author: Joseph Eagar
Date:   Fri May 14 15:56:04 2021 -0700
Branches: temp_bmesh_multires
https://developer.blender.org/rB6be2c079c1281173724ade93a2d9fe89917bc5f3

Did some profiling with VTune.
  * Sculpt code seems to be memory bandwidth bound.
  * Some key topology loops will have to be written manually
    instead of using BM_ITER.

I wrote a function to re-allocate a bmesh with elements ordered by
PBVH leaf nodes, SCULPT_reorder_bmesh.  It's currently disabled.

This is going to take more profiling, but my original proxy refactor
idea might be worth revisiting.  Might be more cache efficient.

The good news is that the worst case is the smooth code, which I can speed
up significantly by keeping a bit of state around.

===================================================================

M	source/blender/blenkernel/BKE_pbvh.h
M	source/blender/blenkernel/intern/customdata.c
M	source/blender/blenkernel/intern/paint.c
M	source/blender/blenkernel/intern/pbvh.c
M	source/blender/blenkernel/intern/pbvh_bmesh.c
M	source/blender/bmesh/intern/bmesh_log.c
M	source/blender/bmesh/intern/bmesh_log.h
M	source/blender/editors/sculpt_paint/sculpt.c
M	source/blender/editors/sculpt_paint/sculpt_dyntopo.c
M	source/blender/editors/sculpt_paint/sculpt_intern.h

===================================================================

diff --git a/source/blender/blenkernel/BKE_pbvh.h b/source/blender/blenkernel/BKE_pbvh.h
index 5912e52b079..f75f1491d68 100644
--- a/source/blender/blenkernel/BKE_pbvh.h
+++ b/source/blender/blenkernel/BKE_pbvh.h
@@ -715,6 +715,8 @@ struct BMVert *BKE_pbvh_vert_create_bmesh(
     PBVH *pbvh, float co[3], float no[3], PBVHNode *node, struct BMVert *v_example);
 PBVHNode *BKE_pbvh_node_from_face_bmesh(PBVH *pbvh, struct BMFace *f);
 
+struct BMesh *BKE_pbvh_reorder_bmesh(PBVH *pbvh);
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/source/blender/blenkernel/intern/customdata.c b/source/blender/blenkernel/intern/customdata.c
index c065c448328..3e3e5ca39af 100644
--- a/source/blender/blenkernel/intern/customdata.c
+++ b/source/blender/blenkernel/intern/customdata.c
@@ -35,6 +35,7 @@
 #include "DNA_meshdata_types.h"
 
 #include "BLI_bitmap.h"
+#include "BLI_compiler_attrs.h"
 #include "BLI_endian_switch.h"
 #include "BLI_math.h"
 #include "BLI_math_color_blend.h"
@@ -2943,15 +2944,17 @@ bool CustomData_is_referenced_layer(struct CustomData *data, int type)
   return (layer->flag & CD_FLAG_NOFREE) != 0;
 }
 
-void CustomData_unmark_temporary_nocopy(CustomData *data) {
-  for (int i=0; i<data->totlayer; i++) {
+void CustomData_unmark_temporary_nocopy(CustomData *data)
+{
+  for (int i = 0; i < data->totlayer; i++) {
     if (data->layers[i].flag & CD_FLAG_TEMPORARY) {
       data->layers[i].flag &= ~CD_FLAG_NOCOPY;
     }
   }
 }
 
-void CustomData_mark_temporary_nocopy(CustomData *data) {
+void CustomData_mark_temporary_nocopy(CustomData *data)
+{
   for (int i = 0; i < data->totlayer; i++) {
     if (data->layers[i].flag & CD_FLAG_TEMPORARY) {
       data->layers[i].flag |= CD_FLAG_NOCOPY;
@@ -3915,7 +3918,7 @@ void CustomData_bmesh_copy_data_exclude_by_type(const CustomData *source,
     }
   }
 
-  for (int dest_i=0; dest_i < dest->totlayer; dest_i++) {
+  for (int dest_i = 0; dest_i < dest->totlayer; dest_i++) {
     CustomData_bmesh_set_default_n(dest, dest_block, dest_i);
     dest_i++;
   }
diff --git a/source/blender/blenkernel/intern/paint.c b/source/blender/blenkernel/intern/paint.c
index 49fc04f7588..80467bef5f2 100644
--- a/source/blender/blenkernel/intern/paint.c
+++ b/source/blender/blenkernel/intern/paint.c
@@ -82,6 +82,7 @@
 // XXX todo: work our bad module cross ref
 void SCULPT_dynamic_topology_sync_layers(Object *ob, Mesh *me);
 void SCULPT_on_sculptsession_bmesh_free(SculptSession *ss);
+void SCULPT_reorder_bmesh(SculptSession *ss);
 
 static void palette_init_data(ID *id)
 {
@@ -1383,9 +1384,10 @@ static void sculptsession_bm_to_me_update_data_only(Object *ob, bool reorder)
 
   if (ss->bm) {
     if (ob->data) {
-      if (reorder) {
+      if (reorder && ss->bm_log) {
         BM_log_mesh_elems_reorder(ss->bm, ss->bm_log);
       }
+
       BM_mesh_bm_to_me(NULL,
                        NULL,
                        ss->bm,
@@ -1470,8 +1472,8 @@ void BKE_sculptsession_free(Object *ob)
   if (ob && ob->sculpt) {
     SculptSession *ss = ob->sculpt;
 
-    if (ss->bm_log) {
-      BM_log_free(ss->bm_log, true);
+    if (ss->bm_log && BM_log_free(ss->bm_log, true)) {
+      ss->bm_log = NULL;
     }
 
     /*try to save current mesh*/
@@ -2229,6 +2231,10 @@ PBVH *BKE_sculpt_object_pbvh_ensure(Depsgraph *depsgraph, Object *ob)
   if (ob->sculpt->bm != NULL) {
     /* Sculpting on a BMesh (dynamic-topology) gets a special PBVH. */
     pbvh = build_pbvh_for_dynamic_topology(ob);
+
+    ob->sculpt->pbvh = pbvh;
+    //reorder mesh elements to improve memory cache performance
+    SCULPT_reorder_bmesh(ob->sculpt);
   }
   else {
     Object *object_eval = DEG_get_evaluated_object(depsgraph, ob);
diff --git a/source/blender/blenkernel/intern/pbvh.c b/source/blender/blenkernel/intern/pbvh.c
index 33c0894f1d3..c3e16f4a050 100644
--- a/source/blender/blenkernel/intern/pbvh.c
+++ b/source/blender/blenkernel/intern/pbvh.c
@@ -711,6 +711,10 @@ void BKE_pbvh_free(PBVH *pbvh)
         BLI_table_gset_free(node->bm_other_verts, NULL);
       }
 
+      if (node->tribuf) {
+        BKE_pbvh_bmesh_free_tris(pbvh, node);
+      }
+
 #ifdef PROXY_ADVANCED
       BKE_pbvh_free_proxyarray(pbvh, node);
 #endif
diff --git a/source/blender/blenkernel/intern/pbvh_bmesh.c b/source/blender/blenkernel/intern/pbvh_bmesh.c
index 7159973f6f8..5f2523ad752 100644
--- a/source/blender/blenkernel/intern/pbvh_bmesh.c
+++ b/source/blender/blenkernel/intern/pbvh_bmesh.c
@@ -74,7 +74,6 @@ Topology rake:
 /* Avoid skinny faces */
 #define USE_EDGEQUEUE_EVEN_SUBDIV
 
-
 /* How much longer we need to be to consider for subdividing
  * (avoids subdividing faces which are only *slightly* skinny) */
 #define EVEN_EDGELEN_THRESHOLD 1.2f
@@ -82,14 +81,14 @@ Topology rake:
  * (avoids performing subdivisions too far away). */
 #define EVEN_GENERATION_SCALE 1.1f
 
-//recursion depth to start applying front face test
+// recursion depth to start applying front face test
 #define DEPTH_START_LIMIT 5
 
 //#define FANCY_EDGE_WEIGHTS
 #define SKINNY_EDGE_FIX
 
-//slightly relax geometry by this factor along surface tangents
-//to improve convergence of remesher
+// slightly relax geometry by this factor along surface tangents
+// to improve convergence of remesher
 #define DYNTOPO_SAFE_SMOOTH_FAC 0.05f
 
 #ifdef USE_EDGEQUEUE_EVEN_SUBDIV
@@ -228,7 +227,7 @@ BLI_INLINE void surface_smooth_v_safe(BMVert *v)
   mul_v3_fl(co, 1.0f / tot);
   float x = v->co[0], y = v->co[1], z = v->co[2];
 
-  //conflicts here should be pretty rare.
+  // conflicts here should be pretty rare.
   atomic_cas_float(&v->co[0], x, x + co[0] * DYNTOPO_SAFE_SMOOTH_FAC);
   atomic_cas_float(&v->co[1], y, y + co[1] * DYNTOPO_SAFE_SMOOTH_FAC);
   atomic_cas_float(&v->co[2], z, z + co[2] * DYNTOPO_SAFE_SMOOTH_FAC);
@@ -1188,7 +1187,7 @@ BLI_INLINE float calc_weighted_edge_collapse(EdgeQueueContext *eq_ctx, BMVert *v
 #ifdef FANCY_EDGE_WEIGHTS
   float l = len_squared_v3v3(v1->co, v2->co);
   float val = (float)BM_vert_edge_count(v1) + (float)BM_vert_edge_count(v2);
-  val = MAX2(val*0.5 - 6.0f, 1.0f);
+  val = MAX2(val * 0.5 - 6.0f, 1.0f);
   val = powf(val, 0.5);
   l /= val;
 
@@ -1728,8 +1727,12 @@ static void long_edge_queue_edge_add(EdgeQueueContext *eq_ctx, BMEdge *e)
 }
 
 #ifdef USE_EDGEQUEUE_EVEN_SUBDIV
-static void long_edge_queue_edge_add_recursive(
-    EdgeQueueContext *eq_ctx, BMLoop *l_edge, BMLoop *l_end, const float len_sq, float limit_len, int depth)
+static void long_edge_queue_edge_add_recursive(EdgeQueueContext *eq_ctx,
+                                               BMLoop *l_edge,
+                                               BMLoop *l_end,
+                                               const float len_sq,
+                                               float limit_len,
+                                               int depth)
 {
   BLI_assert(len_sq > square_f(limit_len));
 
@@ -1766,8 +1769,12 @@ static void long_edge_queue_edge_add_recursive(
         float len_sq_other = BM_edge_calc_length_squared(l_adjacent[i]->e);
         if (len_sq_other > max_ff(len_sq_cmp, limit_len_sq)) {
           //                  edge_queue_insert(eq_ctx, l_adjacent[i]->e, -len_sq_other);
-          long_edge_queue_edge_add_recursive(
-              eq_ctx, l_adjacent[i]->radial_next, l_adjacent[i], len_sq_other, limit_len, depth+1);
+          long_edge_queue_edge_add_recursive(eq_ctx,
+                                             l_adjacent[i]->radial_next,
+                                             l_adjacent[i],
+                                             len_sq_other,
+                                             limit_len,
+                                             depth + 1);
         }
       }
     } while ((l_iter = l_iter->radial_next) != l_end);
@@ -1806,8 +1813,13 @@ static void long_edge_queue_face_add(EdgeQueueContext *eq_ctx, BMFace *f, bool i
 #ifdef USE_EDGEQUEUE_EVEN_SUBDIV
       const float len_sq = BM_edge_calc_length_squared(l_iter->e);
       if (len_sq > eq_ctx->q->limit_len_squared) {
-        long_edge_queue_edge_add_recursive(
-            eq_ctx, l_iter->radial_next, l_iter, len_sq, eq_ctx->q->limit_len, DEPTH_START_LIMIT+1);//ignore_frontface ? 0 : DEPTH_START_LIMIT+1);
+        long_edge_queue_edge_add_recursive(eq_ctx,
+                                           l_iter->radial_next,
+                                           l_iter,
+                                           len_sq,
+                                           eq_ctx->q->limit_len,
+                                           DEPTH_START_LIMIT +
+                                               1);  // ignore_frontface ? 0 : DEPTH_START_LIMIT+1);
       }
 #else
       long_edge_queue_edge_add(eq_ctx, l_iter->e);
@@ -1838,13 +1850,12 @@ static void short_edge_queue_face_add(EdgeQueueContext *eq_ctx, BMFace *f)
   }
 }
 
-
 static void short_edge_queue_edge_add_recursive_2(EdgeQueueThreadData *tdata,
-                                                 BMLoop *l_edge,
-                                                 BMLoop *l_end,
-                                                 const float len_sq,
-                                                 float limit_len,
-                                                 int depth)
+                                                  BMLoop *l_edge,
+                                                  BMLoop *l_end,
+                                                  const float len_sq,
+                                                  float limit_len,
+                                                  int depth)
 {
   BLI_assert(len_sq > square_f(limit_len));
 
@@ -1884,19 +1895,23 @@ static void short_edge_queue_edge_add_recursive_2(EdgeQueueThreadData *tdata,
         if (len_sq_other > max_ff(len_sq_cmp, limit_len_sq)) {
           //                  edge_queue_insert(eq_ctx, l_adjacent[i]->e, -len_sq_other);
           short_edge_queue_edge_add_recursive_2(tdata,
-                                               l_adjacent[i]->radial_next,
-                                               l_adjacent[i],
-                                               len_sq_other,
-                                               limit_len,
-                                          

@@ Diff output truncated at 10240 characters. @@



More information about the Bf-blender-cvs mailing list