[Bf-blender-cvs] [6c302d6529e] master: Sculpt: use TBB instead of BLI_task for multithreading

Brecht Van Lommel noreply at git.blender.org
Thu Oct 10 17:39:04 CEST 2019


Commit: 6c302d6529ec5283334e5ef40d07cc46534f5020
Author: Brecht Van Lommel
Date:   Wed Oct 9 16:27:04 2019 +0200
Branches: master
https://developer.blender.org/rB6c302d6529ec5283334e5ef40d07cc46534f5020

Sculpt: use TBB instead of BLI_task for multithreading

This solves performance issues on some computers where there is significant
threading overhead. Rather than doing the complicated work of optimizing our
own task scheduler, use TBB which appears to work well. The downside is that
we have another thread pool, but it is already there when using OpenVDB voxel
remesh.

For future releases we can switch to using TBB to replace our task scheduler
implementation entirely, and use the same thread pool for BLI_task, Cycles,
Mantaflow, etc.

Differential Revision: https://developer.blender.org/D6030

===================================================================

M	source/blender/blenkernel/BKE_pbvh.h
M	source/blender/blenkernel/CMakeLists.txt
M	source/blender/blenkernel/intern/pbvh.c
A	source/blender/blenkernel/intern/pbvh_parallel.cc
M	source/blender/editors/sculpt_paint/paint_mask.c
M	source/blender/editors/sculpt_paint/paint_vertex.c
M	source/blender/editors/sculpt_paint/sculpt.c
M	source/blender/editors/sculpt_paint/sculpt_intern.h
M	source/blender/editors/sculpt_paint/sculpt_undo.c

===================================================================

diff --git a/source/blender/blenkernel/BKE_pbvh.h b/source/blender/blenkernel/BKE_pbvh.h
index 042b78cd06e..13adb868c01 100644
--- a/source/blender/blenkernel/BKE_pbvh.h
+++ b/source/blender/blenkernel/BKE_pbvh.h
@@ -28,6 +28,10 @@
 /* For embedding CCGKey in iterator. */
 #include "BKE_ccg.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 struct BMLog;
 struct BMesh;
 struct CCGElem;
@@ -45,6 +49,7 @@ struct PBVH;
 struct PBVHNode;
 struct SubdivCCG;
 struct TaskParallelSettings;
+struct TaskParallelTLS;
 
 typedef struct PBVH PBVH;
 typedef struct PBVHNode PBVHNode;
@@ -432,14 +437,39 @@ void BKE_pbvh_node_get_bm_orco_data(PBVHNode *node,
 
 bool BKE_pbvh_node_vert_update_check_any(PBVH *bvh, PBVHNode *node);
 
-void BKE_pbvh_parallel_range_settings(struct TaskParallelSettings *settings,
-                                      bool use_threading,
-                                      int totnode);
-
 // void BKE_pbvh_node_BB_reset(PBVHNode *node);
 // void BKE_pbvh_node_BB_expand(PBVHNode *node, float co[3]);
 
 bool pbvh_has_mask(PBVH *bvh);
 void pbvh_show_mask_set(PBVH *bvh, bool show_mask);
 
+/* Parallelization */
+typedef void (*PBVHParallelRangeFunc)(void *__restrict userdata,
+                                      const int iter,
+                                      const struct TaskParallelTLS *__restrict tls);
+typedef void (*PBVHParallelReduceFunc)(const void *__restrict userdata,
+                                       void *__restrict chunk_join,
+                                       void *__restrict chunk);
+
+typedef struct PBVHParallelSettings {
+  bool use_threading;
+  void *userdata_chunk;
+  size_t userdata_chunk_size;
+  PBVHParallelReduceFunc func_reduce;
+} PBVHParallelSettings;
+
+void BKE_pbvh_parallel_range_settings(struct PBVHParallelSettings *settings,
+                                      bool use_threading,
+                                      int totnode);
+
+void BKE_pbvh_parallel_range(const int start,
+                             const int stop,
+                             void *userdata,
+                             PBVHParallelRangeFunc func,
+                             const struct PBVHParallelSettings *settings);
+
+#ifdef __cplusplus
+}
+#endif
+
 #endif /* __BKE_PBVH_H__ */
diff --git a/source/blender/blenkernel/CMakeLists.txt b/source/blender/blenkernel/CMakeLists.txt
index 47b44c3828a..ec4246f5dba 100644
--- a/source/blender/blenkernel/CMakeLists.txt
+++ b/source/blender/blenkernel/CMakeLists.txt
@@ -183,6 +183,7 @@ set(SRC
   intern/particle_system.c
   intern/pbvh.c
   intern/pbvh_bmesh.c
+  intern/pbvh_parallel.cc
   intern/pointcache.c
   intern/report.c
   intern/rigidbody.c
@@ -637,6 +638,14 @@ if(WITH_QUADRIFLOW)
   add_definitions(-DWITH_QUADRIFLOW)
 endif()
 
+if(WITH_TBB)
+  add_definitions(-DWITH_TBB)
+
+  list(APPEND INC_SYS
+    ${TBB_INCLUDE_DIRS}
+  )
+endif()
+
 ## Warnings as errors, this is too strict!
 #if(MSVC)
 #   set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /WX")
diff --git a/source/blender/blenkernel/intern/pbvh.c b/source/blender/blenkernel/intern/pbvh.c
index 4e193e35109..01612ded396 100644
--- a/source/blender/blenkernel/intern/pbvh.c
+++ b/source/blender/blenkernel/intern/pbvh.c
@@ -1096,12 +1096,11 @@ static void pbvh_faces_update_normals(PBVH *bvh, PBVHNode **nodes, int totnode)
       .vnors = vnors,
   };
 
-  TaskParallelSettings settings;
+  PBVHParallelSettings settings;
   BKE_pbvh_parallel_range_settings(&settings, true, totnode);
 
-  BLI_task_parallel_range(0, totnode, &data, pbvh_update_normals_accum_task_cb, &settings);
-
-  BLI_task_parallel_range(0, totnode, &data, pbvh_update_normals_store_task_cb, &settings);
+  BKE_pbvh_parallel_range(0, totnode, &data, pbvh_update_normals_accum_task_cb, &settings);
+  BKE_pbvh_parallel_range(0, totnode, &data, pbvh_update_normals_store_task_cb, &settings);
 
   MEM_freeN(vnors);
 }
@@ -1151,9 +1150,9 @@ static void pbvh_update_mask_redraw(PBVH *bvh, PBVHNode **nodes, int totnode, in
       .flag = flag,
   };
 
-  TaskParallelSettings settings;
+  PBVHParallelSettings settings;
   BKE_pbvh_parallel_range_settings(&settings, true, totnode);
-  BLI_task_parallel_range(0, totnode, &data, pbvh_update_mask_redraw_task_cb, &settings);
+  BKE_pbvh_parallel_range(0, totnode, &data, pbvh_update_mask_redraw_task_cb, &settings);
 }
 
 static void pbvh_update_BB_redraw_task_cb(void *__restrict userdata,
@@ -1189,9 +1188,9 @@ void pbvh_update_BB_redraw(PBVH *bvh, PBVHNode **nodes, int totnode, int flag)
       .flag = flag,
   };
 
-  TaskParallelSettings settings;
+  PBVHParallelSettings settings;
   BKE_pbvh_parallel_range_settings(&settings, true, totnode);
-  BLI_task_parallel_range(0, totnode, &data, pbvh_update_BB_redraw_task_cb, &settings);
+  BKE_pbvh_parallel_range(0, totnode, &data, pbvh_update_BB_redraw_task_cb, &settings);
 }
 
 static int pbvh_get_buffers_update_flags(PBVH *bvh, bool show_vcol)
@@ -1299,9 +1298,9 @@ static void pbvh_update_draw_buffers(
       .show_vcol = show_vcol,
   };
 
-  TaskParallelSettings settings;
+  PBVHParallelSettings settings;
   BKE_pbvh_parallel_range_settings(&settings, true, totnode);
-  BLI_task_parallel_range(0, totnode, &data, pbvh_update_draw_buffer_cb, &settings);
+  BKE_pbvh_parallel_range(0, totnode, &data, pbvh_update_draw_buffer_cb, &settings);
 }
 
 static int pbvh_flush_bb(PBVH *bvh, PBVHNode *node, int flag)
@@ -2742,13 +2741,10 @@ void pbvh_show_mask_set(PBVH *bvh, bool show_mask)
   bvh->show_mask = show_mask;
 }
 
-void BKE_pbvh_parallel_range_settings(TaskParallelSettings *settings,
+void BKE_pbvh_parallel_range_settings(PBVHParallelSettings *settings,
                                       bool use_threading,
                                       int totnode)
 {
-  const int threaded_limit = 1;
-  BLI_parallel_range_settings_defaults(settings);
-  settings->use_threading = use_threading && (totnode > threaded_limit);
-  settings->min_iter_per_thread = 1;
-  settings->scheduling_mode = TASK_SCHEDULING_DYNAMIC;
+  memset(settings, 0, sizeof(*settings));
+  settings->use_threading = use_threading && totnode > 1;
 }
diff --git a/source/blender/blenkernel/intern/pbvh_parallel.cc b/source/blender/blenkernel/intern/pbvh_parallel.cc
new file mode 100644
index 00000000000..60503c9bad0
--- /dev/null
+++ b/source/blender/blenkernel/intern/pbvh_parallel.cc
@@ -0,0 +1,140 @@
+/*
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ */
+
+#include "MEM_guardedalloc.h"
+
+#include "BLI_task.h"
+#include "BLI_threads.h"
+
+#include "BKE_pbvh.h"
+
+#include "atomic_ops.h"
+
+#ifdef WITH_TBB
+
+#  include <tbb/tbb.h>
+
+/* Functor for running TBB parallel_for and parallel_reduce. */
+struct PBVHTask {
+  PBVHParallelRangeFunc func;
+  void *userdata;
+  const PBVHParallelSettings *settings;
+
+  void *userdata_chunk;
+  bool userdata_chunk_free;
+
+  PBVHTask()
+  {
+  }
+
+  PBVHTask(const PBVHTask &other)
+      : func(other.func),
+        userdata(other.userdata),
+        settings(other.settings),
+        userdata_chunk(0),
+        userdata_chunk_free(false)
+  {
+    if (other.userdata_chunk) {
+      userdata_chunk = MEM_mallocN(settings->userdata_chunk_size, "PBVHTask");
+      memcpy(userdata_chunk, other.userdata_chunk, settings->userdata_chunk_size);
+      userdata_chunk_free = true;
+    }
+  }
+
+  PBVHTask(PBVHTask &other, tbb::split) : PBVHTask(other)
+  {
+  }
+
+  ~PBVHTask()
+  {
+    if (userdata_chunk_free) {
+      MEM_freeN(userdata_chunk);
+    }
+  }
+
+  void operator()(const tbb::blocked_range<int> &r) const
+  {
+    TaskParallelTLS tls;
+    tls.thread_id = get_thread_id();
+    tls.userdata_chunk = userdata_chunk;
+    for (int i = r.begin(); i != r.end(); ++i) {
+      func(userdata, i, &tls);
+    }
+  }
+
+  void join(const PBVHTask &other)
+  {
+    settings->func_reduce(userdata, userdata_chunk, other.userdata_chunk);
+  }
+
+  int get_thread_id() const
+  {
+    /* Get a unique thread ID for texture nodes. In the future we should get rid
+     * of the thread ID and change texture evaluation to not require per-thread
+     * storage that can't be efficiently allocated on the stack. */
+    static tbb::enumerable_thread_specific<int> pbvh_thread_id(-1);
+    static int pbvh_thread_id_counter = 0;
+
+    int &thread_id = pbvh_thread_id.local();
+    if (thread_id == -1) {
+      thread_id = atomic_fetch_and_add_int32(&pbvh_thread_id_counter, 1);
+      if (thread_id >= BLENDER_MAX_THREADS) {
+        BLI_assert(!"Maximum number of threads exceeded for sculpting");
+        thread_id = thread_id % BLENDER_MAX_THREADS;
+      }
+    }
+    return thread_id;
+  }
+};
+
+#endif
+
+void BKE_pbvh_parallel_range(const int start,
+                             const int stop,
+                             void *userdata,
+                             PBVHParallelRangeFunc func,
+                             const struct PBVHParallelSettings *settings)
+{
+#ifdef WITH_TBB
+  /* Multithreading. */
+  if (settings->use_threading) {
+    PBVHTask task;
+    task.func = func;
+    task.userdata = userdata;
+    task.settings = settings;
+    task.userdata_chunk = settings->userdata_chunk;
+    task.userdata_chunk_free = false;
+
+    if (settings->func_reduce) {
+      parallel_reduce(tbb::blocked_range<int>(start, stop), task);
+    }
+    else {
+      parallel_for(tbb::blocked_range<int>(start, stop), task);
+    }
+
+    return;
+  }
+#endif
+
+  /* Single threaded. Nothing to reduce as everything is accumulated into the
+   * main userdata chunk directly. */
+  TaskParallelTLS tls;
+  tls.t

@@ Diff output truncated at 10240 characters. @@



More information about the Bf-blender-cvs mailing list