[Bf-blender-cvs] [78fd3bc8af1] temp-tbb-task-scheduler: Task: remove local queue and scheduling mode optimizations
Brecht Van Lommel
noreply at git.blender.org
Tue Nov 5 15:10:57 CET 2019
Commit: 78fd3bc8af13348dc83071e83e086299a5ab9d1c
Author: Brecht Van Lommel
Date: Sat Oct 12 17:11:36 2019 +0200
Branches: temp-tbb-task-scheduler
https://developer.blender.org/rB78fd3bc8af13348dc83071e83e086299a5ab9d1c
Task: remove local queue and scheduling mode optimizations
Hopefully no longer needed with TBB, but needs to be tested.
===================================================================
M source/blender/blenkernel/intern/particle.c
M source/blender/blenkernel/intern/smoke.c
M source/blender/blenlib/BLI_task.h
M source/blender/blenlib/intern/task_iterator.c
M source/blender/blenlib/intern/task_pool.cc
M source/blender/depsgraph/intern/eval/deg_eval.cc
M source/blender/editors/physics/particle_edit.c
===================================================================
diff --git a/source/blender/blenkernel/intern/particle.c b/source/blender/blenkernel/intern/particle.c
index e87bd5f320d..5b08baae2c3 100644
--- a/source/blender/blenkernel/intern/particle.c
+++ b/source/blender/blenkernel/intern/particle.c
@@ -3337,7 +3337,6 @@ void psys_cache_edit_paths(Depsgraph *depsgraph,
TaskParallelSettings settings;
BLI_parallel_range_settings_defaults(&settings);
- settings.scheduling_mode = TASK_SCHEDULING_DYNAMIC;
BLI_task_parallel_range(0, edit->totpoint, &iter_data, psys_cache_edit_paths_iter, &settings);
edit->totcached = totpart;
diff --git a/source/blender/blenkernel/intern/smoke.c b/source/blender/blenkernel/intern/smoke.c
index 3db51c95fcb..1a865c03b6c 100644
--- a/source/blender/blenkernel/intern/smoke.c
+++ b/source/blender/blenkernel/intern/smoke.c
@@ -976,7 +976,6 @@ static void obstacles_from_mesh(Object *coll_ob,
};
TaskParallelSettings settings;
BLI_parallel_range_settings_defaults(&settings);
- settings.scheduling_mode = TASK_SCHEDULING_DYNAMIC;
BLI_task_parallel_range(
sds->res_min[2], sds->res_max[2], &data, obstacles_from_mesh_task_cb, &settings);
}
@@ -1569,7 +1568,6 @@ static void emit_from_particles(Object *flow_ob,
TaskParallelSettings settings;
BLI_parallel_range_settings_defaults(&settings);
- settings.scheduling_mode = TASK_SCHEDULING_DYNAMIC;
BLI_task_parallel_range(min[2], max[2], &data, emit_from_particles_task_cb, &settings);
}
@@ -1977,7 +1975,6 @@ static void emit_from_mesh(
TaskParallelSettings settings;
BLI_parallel_range_settings_defaults(&settings);
- settings.scheduling_mode = TASK_SCHEDULING_DYNAMIC;
BLI_task_parallel_range(min[2], max[2], &data, emit_from_mesh_task_cb, &settings);
}
/* free bvh tree */
@@ -2967,7 +2964,6 @@ static void update_effectors(
TaskParallelSettings settings;
BLI_parallel_range_settings_defaults(&settings);
- settings.scheduling_mode = TASK_SCHEDULING_DYNAMIC;
BLI_task_parallel_range(0, sds->res[0], &data, update_effectors_task_cb, &settings);
}
diff --git a/source/blender/blenlib/BLI_task.h b/source/blender/blenlib/BLI_task.h
index a398972b7f5..0ed4eaec214 100644
--- a/source/blender/blenlib/BLI_task.h
+++ b/source/blender/blenlib/BLI_task.h
@@ -93,12 +93,6 @@ void BLI_task_pool_push(TaskPool *pool,
void *taskdata,
bool free_taskdata,
TaskFreeFunction freedata);
-void BLI_task_pool_push_from_thread(TaskPool *pool,
- TaskRunFunction run,
- void *taskdata,
- bool free_taskdata,
- TaskFreeFunction freedata,
- int thread_id);
/* work and wait until all tasks are done */
void BLI_task_pool_work_and_wait(TaskPool *pool);
@@ -116,32 +110,8 @@ void *BLI_task_pool_userdata(TaskPool *pool);
/* optional mutex to use from run function */
ThreadMutex *BLI_task_pool_user_mutex(TaskPool *pool);
-/* Thread ID of thread that created the task pool. */
-int BLI_task_pool_creator_thread_id(TaskPool *pool);
-
-/* Delayed push, use that to reduce thread overhead by accumulating
- * all new tasks into local queue first and pushing it to scheduler
- * from within a single mutex lock.
- */
-void BLI_task_pool_delayed_push_begin(TaskPool *pool, int thread_id);
-void BLI_task_pool_delayed_push_end(TaskPool *pool, int thread_id);
-
/* Parallel for routines */
-typedef enum eTaskSchedulingMode {
- /* Task scheduler will divide overall work into equal chunks, scheduling
- * even chunks to all worker threads.
- * Least run time benefit, ideal for cases when each task requires equal
- * amount of compute power.
- */
- TASK_SCHEDULING_STATIC,
- /* Task scheduler will schedule small amount of work to each worker thread.
- * Has more run time overhead, but deals much better with cases when each
- * part of the work requires totally different amount of compute power.
- */
- TASK_SCHEDULING_DYNAMIC,
-} eTaskSchedulingMode;
-
/* Per-thread specific data passed to the callback. */
typedef struct TaskParallelTLS {
/* Identifier of the thread who this data belongs to. */
@@ -170,8 +140,6 @@ typedef struct TaskParallelSettings {
* is higher than a chunk size. As in, threading will always be performed.
*/
bool use_threading;
- /* Scheduling mode to use for this parallel range invocation. */
- eTaskSchedulingMode scheduling_mode;
/* Each instance of looping chunks will get a copy of this data
* (similar to OpenMP's firstprivate).
*/
@@ -258,7 +226,6 @@ BLI_INLINE void BLI_parallel_range_settings_defaults(TaskParallelSettings *setti
{
memset(settings, 0, sizeof(*settings));
settings->use_threading = true;
- settings->scheduling_mode = TASK_SCHEDULING_STATIC;
/* Use default heuristic to define actual chunk size. */
settings->min_iter_per_thread = 0;
}
diff --git a/source/blender/blenlib/intern/task_iterator.c b/source/blender/blenlib/intern/task_iterator.c
index 25eed7d65fb..7d9ea048543 100644
--- a/source/blender/blenlib/intern/task_iterator.c
+++ b/source/blender/blenlib/intern/task_iterator.c
@@ -224,15 +224,13 @@ static void task_parallel_iterator_do(const TaskParallelSettings *settings,
userdata_chunk_array = MALLOCA(userdata_chunk_size * num_tasks);
}
- const int thread_id = BLI_task_pool_creator_thread_id(task_pool);
for (size_t i = 0; i < num_tasks; i++) {
if (use_userdata_chunk) {
userdata_chunk_local = (char *)userdata_chunk_array + (userdata_chunk_size * i);
memcpy(userdata_chunk_local, userdata_chunk, userdata_chunk_size);
}
/* Use this pool's pre-allocated tasks. */
- BLI_task_pool_push_from_thread(
- task_pool, parallel_iterator_func, userdata_chunk_local, false, NULL, thread_id);
+ BLI_task_pool_push(task_pool, parallel_iterator_func, userdata_chunk_local, false, NULL);
}
BLI_task_pool_work_and_wait(task_pool);
@@ -412,11 +410,9 @@ void BLI_task_parallel_mempool(BLI_mempool *mempool,
BLI_mempool_iter *mempool_iterators = BLI_mempool_iter_threadsafe_create(mempool,
(size_t)num_tasks);
- const int thread_id = BLI_task_pool_creator_thread_id(task_pool);
for (i = 0; i < num_tasks; i++) {
/* Use this pool's pre-allocated tasks. */
- BLI_task_pool_push_from_thread(
- task_pool, parallel_mempool_func, &mempool_iterators[i], false, NULL, thread_id);
+ BLI_task_pool_push(task_pool, parallel_mempool_func, &mempool_iterators[i], false, NULL);
}
BLI_task_pool_work_and_wait(task_pool);
diff --git a/source/blender/blenlib/intern/task_pool.cc b/source/blender/blenlib/intern/task_pool.cc
index 27cacd4aa4d..f0711cb8708 100644
--- a/source/blender/blenlib/intern/task_pool.cc
+++ b/source/blender/blenlib/intern/task_pool.cc
@@ -45,19 +45,6 @@
*/
#define MEMPOOL_SIZE 256
-/* Number of tasks which are pushed directly to local thread queue.
- *
- * This allows thread to fetch next task without locking the whole queue.
- */
-#define LOCAL_QUEUE_SIZE 1
-
-/* Number of tasks which are allowed to be scheduled in a delayed manner.
- *
- * This allows to use less locks per graph node children schedule. More details
- * could be found at TaskThreadLocalStorage::do_delayed_push.
- */
-#define DELAYED_QUEUE_SIZE 4096
-
#ifndef NDEBUG
# define ASSERT_THREAD_ID(scheduler, thread_id) \
do { \
@@ -132,31 +119,6 @@ typedef struct TaskMemPoolStats {
} TaskMemPoolStats;
#endif
-typedef struct TaskThreadLocalStorage {
- /* Memory pool for faster task allocation.
- * The idea is to re-use memory of finished/discarded tasks by this thread.
- */
- TaskMemPool task_mempool;
-
- /* Local queue keeps thread alive by keeping small amount of tasks ready
- * to be picked up without causing global thread locks for synchronization.
- */
- int num_local_queue;
- Task *local_queue[LOCAL_QUEUE_SIZE];
-
- /* Thread can be marked for delayed tasks push. This is helpful when it's
- * know that lots of subsequent task pushed will happen from the same thread
- * without "interrupting" for task execution.
- *
- * We try to accumulate as much tasks as possible in a local queue without
- * any locks first, and then we push all of them into a scheduler's queue
- * from within a single mutex lock.
- */
- bool do_delayed_push;
- int num_delayed_queue;
- Task *delayed_queue[DELAYED_QUEUE_SIZE];
-} TaskThreadLocalStorage;
-
struct TaskPool {
TaskScheduler *scheduler;
@@ -188,19 +150,9 @@ struct TaskPool {
*/
int thread_id;
- /* For the pools which are created from non-main thread which is not a
- * scheduler worker thread we can't re-use any of scheduler's threads TLS
- * and have to use our own one.
- */
- bool use_local_tls;
- TaskThreadLocalStorage local_tls;
#ifndef NDEBUG
pthread_t creator_thread_id;
#endif
-
-#ifdef DEBUG_STATS
- TaskMemPoolStats *mempool_stats;
-#endif
};
struct TaskScheduler {
@@ -226,7 +178,6 @@ struct TaskScheduler {
typedef struct TaskThread {
TaskScheduler *scheduler;
int id;
- TaskThreadLocalStorage tls;
} TaskThread;
/* Helper */
@@ -242,90 +193,15 @@ BLI_INLINE void task_data_free(Task *task, const int thread_id)
}
}
-BLI_INLINE void initialize_task_tls(TaskThreadLocalStorage *tls)
-{
- memset(tls, 0, sizeof(TaskThreadLocalStorage));
-}
-
-BLI_INLINE TaskThreadLocalStorage *get_task_tls(TaskPool *pool, const int thread_id)
-{
- TaskScheduler *scheduler = pool->scheduler;
- BLI_assert(thread_id >= 0);
- BLI_assert(thread_id <= scheduler->num_threads);
- if (pool->use_local_tls && thread_id == 0) {
- BLI_assert(pool->thread_id == 0);
- BLI_assert(!BLI_thread_is_main());
- BLI_assert(pthread_equal(pthread_self(), pool->creator_thread_id));
- return &pool->local_tls;
- }
- if (thread_id == 0) {
- BLI_assert(BLI_thread_is_main());
- return &scheduler->task_threads[pool->thread_id].tls;
- }
- return &scheduler->task_threads[thread_id].tls;
-}
-
-BLI_
@@ Diff output truncated at 10240 characters. @@
More information about the Bf-blender-cvs
mailing list