[Bf-blender-cvs] [d8b8b4d7e29] master: BMesh: multi-thread face tessellation
Campbell Barton
noreply at git.blender.org
Wed Jun 9 17:23:08 CEST 2021
Commit: d8b8b4d7e297b5dceddeba3a60e71e13372484da
Author: Campbell Barton
Date: Thu Jun 10 01:03:22 2021 +1000
Branches: master
https://developer.blender.org/rBd8b8b4d7e297b5dceddeba3a60e71e13372484da
BMesh: multi-thread face tessellation
Use BM_iter_parallel for face tessellation, this gives around
6.5x speedup for BM_mesh_calc_tessellation on high poly meshes in my
tests, although exact speedup depends on available cores.
===================================================================
M source/blender/bmesh/intern/bmesh_mesh_tessellate.c
===================================================================
diff --git a/source/blender/bmesh/intern/bmesh_mesh_tessellate.c b/source/blender/bmesh/intern/bmesh_mesh_tessellate.c
index f2a5fbe3bde..7a95e52ce25 100644
--- a/source/blender/bmesh/intern/bmesh_mesh_tessellate.c
+++ b/source/blender/bmesh/intern/bmesh_mesh_tessellate.c
@@ -37,6 +37,15 @@
#include "bmesh.h"
#include "bmesh_tools.h"
+/**
+ * On systems with 32+ cores,
+ * only a very small number of faces has any advantage single threading (in the 100's).
+ * Note that between 500-2000 quads, the difference isn't so much
+ * (tessellation isn't a bottleneck in this case anyway).
+ * Avoid the slight overhead of using threads in this case.
+ */
+#define BM_FACE_TESSELLATE_THREADED_LIMIT 1024
+
/* -------------------------------------------------------------------- */
/** \name Default Mesh Tessellation
* \{ */
@@ -125,7 +134,7 @@ static int mesh_calc_tessellation_for_face(BMLoop *(*looptris)[3],
*
* \note \a looptris Must be pre-allocated to at least the size of given by: poly_to_tri_count
*/
-void BM_mesh_calc_tessellation(BMesh *bm, BMLoop *(*looptris)[3])
+static void bm_mesh_calc_tessellation__single_threaded(BMesh *bm, BMLoop *(*looptris)[3])
{
#ifndef NDEBUG
const int looptris_tot = poly_to_tri_count(bm->totface, bm->totloop);
@@ -150,6 +159,54 @@ void BM_mesh_calc_tessellation(BMesh *bm, BMLoop *(*looptris)[3])
BLI_assert(i <= looptris_tot);
}
+struct TessellationUserTLS {
+ MemArena *pf_arena;
+};
+
+static void mesh_calc_tessellation_for_face_fn(void *__restrict userdata,
+ MempoolIterData *mp_f,
+ const TaskParallelTLS *__restrict tls)
+{
+ struct TessellationUserTLS *tls_data = tls->userdata_chunk;
+ BMLoop *(*looptris)[3] = userdata;
+ BMFace *f = (BMFace *)mp_f;
+ BMLoop *l = BM_FACE_FIRST_LOOP(f);
+ const int offset = BM_elem_index_get(l) - (BM_elem_index_get(f) * 2);
+ mesh_calc_tessellation_for_face(looptris + offset, f, &tls_data->pf_arena);
+}
+
+static void mesh_calc_tessellation_for_face_free_fn(const void *__restrict UNUSED(userdata),
+ void *__restrict tls_v)
+{
+ struct TessellationUserTLS *tls_data = tls_v;
+ if (tls_data->pf_arena) {
+ BLI_memarena_free(tls_data->pf_arena);
+ }
+}
+
+static void bm_mesh_calc_tessellation__multi_threaded(BMesh *bm, BMLoop *(*looptris)[3])
+{
+ BM_mesh_elem_index_ensure(bm, BM_LOOP | BM_FACE);
+
+ TaskParallelSettings settings;
+ struct TessellationUserTLS tls_dummy = {NULL};
+ BLI_parallel_mempool_settings_defaults(&settings);
+ settings.userdata_chunk = &tls_dummy;
+ settings.userdata_chunk_size = sizeof(tls_dummy);
+ settings.func_free = mesh_calc_tessellation_for_face_free_fn;
+ BM_iter_parallel(bm, BM_FACES_OF_MESH, mesh_calc_tessellation_for_face_fn, looptris, &settings);
+}
+
+void BM_mesh_calc_tessellation(BMesh *bm, BMLoop *(*looptris)[3])
+{
+ if (bm->totface < BM_FACE_TESSELLATE_THREADED_LIMIT) {
+ bm_mesh_calc_tessellation__single_threaded(bm, looptris);
+ }
+ else {
+ bm_mesh_calc_tessellation__multi_threaded(bm, looptris);
+ }
+}
+
/** \} */
/* -------------------------------------------------------------------- */
@@ -236,12 +293,7 @@ void BM_mesh_calc_tessellation_with_partial(BMesh *bm,
BM_mesh_elem_index_ensure(bm, BM_LOOP | BM_FACE);
- /* On systems with 32+ cores,
- * only a very small number of faces has any advantage single threading (in the 100's).
- * Note that between 500-2000 quads, the difference isn't so much
- * (tessellation isn't a bottleneck in this case anyway).
- * Avoid the slight overhead of using threads in this case. */
- if (bmpinfo->faces_len < 1024) {
+ if (bmpinfo->faces_len < BM_FACE_TESSELLATE_THREADED_LIMIT) {
bm_mesh_calc_tessellation_with_partial__single_threaded(looptris, bmpinfo);
}
else {
More information about the Bf-blender-cvs
mailing list