[Bf-blender-cvs] [78fd3bc8af1] temp-tbb-task-scheduler: Task: remove local queue and scheduling mode optimizations

Brecht Van Lommel noreply at git.blender.org
Tue Nov 5 15:10:57 CET 2019


Commit: 78fd3bc8af13348dc83071e83e086299a5ab9d1c
Author: Brecht Van Lommel
Date:   Sat Oct 12 17:11:36 2019 +0200
Branches: temp-tbb-task-scheduler
https://developer.blender.org/rB78fd3bc8af13348dc83071e83e086299a5ab9d1c

Task: remove local queue and scheduling mode optimizations

Hopefully no longer needed with TBB, but needs to be tested.

===================================================================

M	source/blender/blenkernel/intern/particle.c
M	source/blender/blenkernel/intern/smoke.c
M	source/blender/blenlib/BLI_task.h
M	source/blender/blenlib/intern/task_iterator.c
M	source/blender/blenlib/intern/task_pool.cc
M	source/blender/depsgraph/intern/eval/deg_eval.cc
M	source/blender/editors/physics/particle_edit.c

===================================================================

diff --git a/source/blender/blenkernel/intern/particle.c b/source/blender/blenkernel/intern/particle.c
index e87bd5f320d..5b08baae2c3 100644
--- a/source/blender/blenkernel/intern/particle.c
+++ b/source/blender/blenkernel/intern/particle.c
@@ -3337,7 +3337,6 @@ void psys_cache_edit_paths(Depsgraph *depsgraph,
 
   TaskParallelSettings settings;
   BLI_parallel_range_settings_defaults(&settings);
-  settings.scheduling_mode = TASK_SCHEDULING_DYNAMIC;
   BLI_task_parallel_range(0, edit->totpoint, &iter_data, psys_cache_edit_paths_iter, &settings);
 
   edit->totcached = totpart;
diff --git a/source/blender/blenkernel/intern/smoke.c b/source/blender/blenkernel/intern/smoke.c
index 3db51c95fcb..1a865c03b6c 100644
--- a/source/blender/blenkernel/intern/smoke.c
+++ b/source/blender/blenkernel/intern/smoke.c
@@ -976,7 +976,6 @@ static void obstacles_from_mesh(Object *coll_ob,
       };
       TaskParallelSettings settings;
       BLI_parallel_range_settings_defaults(&settings);
-      settings.scheduling_mode = TASK_SCHEDULING_DYNAMIC;
       BLI_task_parallel_range(
           sds->res_min[2], sds->res_max[2], &data, obstacles_from_mesh_task_cb, &settings);
     }
@@ -1569,7 +1568,6 @@ static void emit_from_particles(Object *flow_ob,
 
       TaskParallelSettings settings;
       BLI_parallel_range_settings_defaults(&settings);
-      settings.scheduling_mode = TASK_SCHEDULING_DYNAMIC;
       BLI_task_parallel_range(min[2], max[2], &data, emit_from_particles_task_cb, &settings);
     }
 
@@ -1977,7 +1975,6 @@ static void emit_from_mesh(
 
       TaskParallelSettings settings;
       BLI_parallel_range_settings_defaults(&settings);
-      settings.scheduling_mode = TASK_SCHEDULING_DYNAMIC;
       BLI_task_parallel_range(min[2], max[2], &data, emit_from_mesh_task_cb, &settings);
     }
     /* free bvh tree */
@@ -2967,7 +2964,6 @@ static void update_effectors(
 
     TaskParallelSettings settings;
     BLI_parallel_range_settings_defaults(&settings);
-    settings.scheduling_mode = TASK_SCHEDULING_DYNAMIC;
     BLI_task_parallel_range(0, sds->res[0], &data, update_effectors_task_cb, &settings);
   }
 
diff --git a/source/blender/blenlib/BLI_task.h b/source/blender/blenlib/BLI_task.h
index a398972b7f5..0ed4eaec214 100644
--- a/source/blender/blenlib/BLI_task.h
+++ b/source/blender/blenlib/BLI_task.h
@@ -93,12 +93,6 @@ void BLI_task_pool_push(TaskPool *pool,
                         void *taskdata,
                         bool free_taskdata,
                         TaskFreeFunction freedata);
-void BLI_task_pool_push_from_thread(TaskPool *pool,
-                                    TaskRunFunction run,
-                                    void *taskdata,
-                                    bool free_taskdata,
-                                    TaskFreeFunction freedata,
-                                    int thread_id);
 
 /* work and wait until all tasks are done */
 void BLI_task_pool_work_and_wait(TaskPool *pool);
@@ -116,32 +110,8 @@ void *BLI_task_pool_userdata(TaskPool *pool);
 /* optional mutex to use from run function */
 ThreadMutex *BLI_task_pool_user_mutex(TaskPool *pool);
 
-/* Thread ID of thread that created the task pool. */
-int BLI_task_pool_creator_thread_id(TaskPool *pool);
-
-/* Delayed push, use that to reduce thread overhead by accumulating
- * all new tasks into local queue first and pushing it to scheduler
- * from within a single mutex lock.
- */
-void BLI_task_pool_delayed_push_begin(TaskPool *pool, int thread_id);
-void BLI_task_pool_delayed_push_end(TaskPool *pool, int thread_id);
-
 /* Parallel for routines */
 
-typedef enum eTaskSchedulingMode {
-  /* Task scheduler will divide overall work into equal chunks, scheduling
-   * even chunks to all worker threads.
-   * Least run time benefit, ideal for cases when each task requires equal
-   * amount of compute power.
-   */
-  TASK_SCHEDULING_STATIC,
-  /* Task scheduler will schedule small amount of work to each worker thread.
-   * Has more run time overhead, but deals much better with cases when each
-   * part of the work requires totally different amount of compute power.
-   */
-  TASK_SCHEDULING_DYNAMIC,
-} eTaskSchedulingMode;
-
 /* Per-thread specific data passed to the callback. */
 typedef struct TaskParallelTLS {
   /* Identifier of the thread who this data belongs to. */
@@ -170,8 +140,6 @@ typedef struct TaskParallelSettings {
    * is higher than a chunk size. As in, threading will always be performed.
    */
   bool use_threading;
-  /* Scheduling mode to use for this parallel range invocation. */
-  eTaskSchedulingMode scheduling_mode;
   /* Each instance of looping chunks will get a copy of this data
    * (similar to OpenMP's firstprivate).
    */
@@ -258,7 +226,6 @@ BLI_INLINE void BLI_parallel_range_settings_defaults(TaskParallelSettings *setti
 {
   memset(settings, 0, sizeof(*settings));
   settings->use_threading = true;
-  settings->scheduling_mode = TASK_SCHEDULING_STATIC;
   /* Use default heuristic to define actual chunk size. */
   settings->min_iter_per_thread = 0;
 }
diff --git a/source/blender/blenlib/intern/task_iterator.c b/source/blender/blenlib/intern/task_iterator.c
index 25eed7d65fb..7d9ea048543 100644
--- a/source/blender/blenlib/intern/task_iterator.c
+++ b/source/blender/blenlib/intern/task_iterator.c
@@ -224,15 +224,13 @@ static void task_parallel_iterator_do(const TaskParallelSettings *settings,
     userdata_chunk_array = MALLOCA(userdata_chunk_size * num_tasks);
   }
 
-  const int thread_id = BLI_task_pool_creator_thread_id(task_pool);
   for (size_t i = 0; i < num_tasks; i++) {
     if (use_userdata_chunk) {
       userdata_chunk_local = (char *)userdata_chunk_array + (userdata_chunk_size * i);
       memcpy(userdata_chunk_local, userdata_chunk, userdata_chunk_size);
     }
     /* Use this pool's pre-allocated tasks. */
-    BLI_task_pool_push_from_thread(
-        task_pool, parallel_iterator_func, userdata_chunk_local, false, NULL, thread_id);
+    BLI_task_pool_push(task_pool, parallel_iterator_func, userdata_chunk_local, false, NULL);
   }
 
   BLI_task_pool_work_and_wait(task_pool);
@@ -412,11 +410,9 @@ void BLI_task_parallel_mempool(BLI_mempool *mempool,
   BLI_mempool_iter *mempool_iterators = BLI_mempool_iter_threadsafe_create(mempool,
                                                                            (size_t)num_tasks);
 
-  const int thread_id = BLI_task_pool_creator_thread_id(task_pool);
   for (i = 0; i < num_tasks; i++) {
     /* Use this pool's pre-allocated tasks. */
-    BLI_task_pool_push_from_thread(
-        task_pool, parallel_mempool_func, &mempool_iterators[i], false, NULL, thread_id);
+    BLI_task_pool_push(task_pool, parallel_mempool_func, &mempool_iterators[i], false, NULL);
   }
 
   BLI_task_pool_work_and_wait(task_pool);
diff --git a/source/blender/blenlib/intern/task_pool.cc b/source/blender/blenlib/intern/task_pool.cc
index 27cacd4aa4d..f0711cb8708 100644
--- a/source/blender/blenlib/intern/task_pool.cc
+++ b/source/blender/blenlib/intern/task_pool.cc
@@ -45,19 +45,6 @@
  */
 #define MEMPOOL_SIZE 256
 
-/* Number of tasks which are pushed directly to local thread queue.
- *
- * This allows thread to fetch next task without locking the whole queue.
- */
-#define LOCAL_QUEUE_SIZE 1
-
-/* Number of tasks which are allowed to be scheduled in a delayed manner.
- *
- * This allows to use less locks per graph node children schedule. More details
- * could be found at TaskThreadLocalStorage::do_delayed_push.
- */
-#define DELAYED_QUEUE_SIZE 4096
-
 #ifndef NDEBUG
 #  define ASSERT_THREAD_ID(scheduler, thread_id) \
     do { \
@@ -132,31 +119,6 @@ typedef struct TaskMemPoolStats {
 } TaskMemPoolStats;
 #endif
 
-typedef struct TaskThreadLocalStorage {
-  /* Memory pool for faster task allocation.
-   * The idea is to re-use memory of finished/discarded tasks by this thread.
-   */
-  TaskMemPool task_mempool;
-
-  /* Local queue keeps thread alive by keeping small amount of tasks ready
-   * to be picked up without causing global thread locks for synchronization.
-   */
-  int num_local_queue;
-  Task *local_queue[LOCAL_QUEUE_SIZE];
-
-  /* Thread can be marked for delayed tasks push. This is helpful when it's
-   * know that lots of subsequent task pushed will happen from the same thread
-   * without "interrupting" for task execution.
-   *
-   * We try to accumulate as much tasks as possible in a local queue without
-   * any locks first, and then we push all of them into a scheduler's queue
-   * from within a single mutex lock.
-   */
-  bool do_delayed_push;
-  int num_delayed_queue;
-  Task *delayed_queue[DELAYED_QUEUE_SIZE];
-} TaskThreadLocalStorage;
-
 struct TaskPool {
   TaskScheduler *scheduler;
 
@@ -188,19 +150,9 @@ struct TaskPool {
    */
   int thread_id;
 
-  /* For the pools which are created from non-main thread which is not a
-   * scheduler worker thread we can't re-use any of scheduler's threads TLS
-   * and have to use our own one.
-   */
-  bool use_local_tls;
-  TaskThreadLocalStorage local_tls;
 #ifndef NDEBUG
   pthread_t creator_thread_id;
 #endif
-
-#ifdef DEBUG_STATS
-  TaskMemPoolStats *mempool_stats;
-#endif
 };
 
 struct TaskScheduler {
@@ -226,7 +178,6 @@ struct TaskScheduler {
 typedef struct TaskThread {
   TaskScheduler *scheduler;
   int id;
-  TaskThreadLocalStorage tls;
 } TaskThread;
 
 /* Helper */
@@ -242,90 +193,15 @@ BLI_INLINE void task_data_free(Task *task, const int thread_id)
   }
 }
 
-BLI_INLINE void initialize_task_tls(TaskThreadLocalStorage *tls)
-{
-  memset(tls, 0, sizeof(TaskThreadLocalStorage));
-}
-
-BLI_INLINE TaskThreadLocalStorage *get_task_tls(TaskPool *pool, const int thread_id)
-{
-  TaskScheduler *scheduler = pool->scheduler;
-  BLI_assert(thread_id >= 0);
-  BLI_assert(thread_id <= scheduler->num_threads);
-  if (pool->use_local_tls && thread_id == 0) {
-    BLI_assert(pool->thread_id == 0);
-    BLI_assert(!BLI_thread_is_main());
-    BLI_assert(pthread_equal(pthread_self(), pool->creator_thread_id));
-    return &pool->local_tls;
-  }
-  if (thread_id == 0) {
-    BLI_assert(BLI_thread_is_main());
-    return &scheduler->task_threads[pool->thread_id].tls;
-  }
-  return &scheduler->task_threads[thread_id].tls;
-}
-
-BLI_

@@ Diff output truncated at 10240 characters. @@



More information about the Bf-blender-cvs mailing list