[Bf-blender-cvs] [585d6a51c07] functions: initial api for threading with thread local data
Jacques Lucke
noreply at git.blender.org
Mon Jul 1 11:11:59 CEST 2019
Commit: 585d6a51c0777e2c4a70a2898a173f6116f6889c
Author: Jacques Lucke
Date: Mon Jul 1 11:08:00 2019 +0200
Branches: functions
https://developer.blender.org/rB585d6a51c0777e2c4a70a2898a173f6116f6889c
initial api for threading with thread local data
This does look quite ugly so far, but it is still better than doing it manually every time.
===================================================================
M source/blender/blenlib/BLI_task.hpp
M source/blender/simulations/bparticles/simulate.cpp
===================================================================
diff --git a/source/blender/blenlib/BLI_task.hpp b/source/blender/blenlib/BLI_task.hpp
index 87e2bb7abc4..e872c237b7d 100644
--- a/source/blender/blenlib/BLI_task.hpp
+++ b/source/blender/blenlib/BLI_task.hpp
@@ -1,7 +1,10 @@
#pragma once
+#include <mutex>
+
#include "BLI_task.h"
#include "BLI_array_ref.hpp"
+#include "BLI_small_map.hpp"
namespace BLI {
namespace Task {
@@ -12,12 +15,14 @@ namespace Task {
*
* For debugging/profiling purposes the threading can be disabled.
*/
-template<typename T, typename Func>
-static void parallel_array_elements(ArrayRef<T> array, Func function, bool use_threading = false)
+template<typename T, typename ProcessElement>
+static void parallel_array_elements(ArrayRef<T> array,
+ ProcessElement process_element,
+ bool use_threading = false)
{
if (!use_threading) {
for (T &element : array) {
- function(element);
+ process_element(element);
}
return;
}
@@ -28,8 +33,8 @@ static void parallel_array_elements(ArrayRef<T> array, Func function, bool use_t
struct ParallelData {
ArrayRef<T> array;
- Func &function;
- } data = {array, function};
+ ProcessElement &process_element;
+ } data = {array, process_element};
BLI_task_parallel_range(0,
array.size(),
@@ -39,10 +44,67 @@ static void parallel_array_elements(ArrayRef<T> array, Func function, bool use_t
const ParallelRangeTLS *__restrict UNUSED(tls)) {
ParallelData &data = *(ParallelData *)userdata;
T &element = data.array[index];
- data.function(element);
+ data.process_element(element);
},
&settings);
}
+template<typename T, typename ProcessElement, typename CreateThreadLocal, typename FreeThreadLocal>
+static void parallel_array_elements(ArrayRef<T> array,
+ ProcessElement process_element,
+ CreateThreadLocal create_thread_local,
+ FreeThreadLocal free_thread_local,
+ bool use_threading = false)
+{
+ using LocalData = decltype(create_thread_local());
+
+ if (!use_threading) {
+ LocalData local_data = create_thread_local();
+ for (T &element : array) {
+ process_element(element, local_data);
+ }
+ free_thread_local(local_data);
+ return;
+ }
+
+ ParallelRangeSettings settings = {0};
+ BLI_parallel_range_settings_defaults(&settings);
+ settings.scheduling_mode = TASK_SCHEDULING_DYNAMIC;
+
+ struct ParallelData {
+ ArrayRef<T> array;
+ ProcessElement &process_element;
+ CreateThreadLocal &create_thread_local;
+ SmallMap<int, LocalData> thread_locals;
+ std::mutex thread_locals_mutex;
+ } data = {array, process_element, create_thread_local, {}, {}};
+
+ BLI_task_parallel_range(
+ 0,
+ array.size(),
+ (void *)&data,
+ [](void *__restrict userdata, const int index, const ParallelRangeTLS *__restrict tls) {
+ ParallelData &data = *(ParallelData *)userdata;
+ int thread_id = tls->thread_id;
+
+ data.thread_locals_mutex.lock();
+ LocalData *local_data_ptr = data.thread_locals.lookup_ptr(thread_id);
+ LocalData local_data = (local_data_ptr == nullptr) ? data.create_thread_local() :
+ *local_data_ptr;
+ if (local_data_ptr == nullptr) {
+ data.thread_locals.add_new(thread_id, local_data);
+ }
+ data.thread_locals_mutex.unlock();
+
+ T &element = data.array[index];
+ data.process_element(element, local_data);
+ },
+ &settings);
+
+ for (LocalData data : data.thread_locals.values()) {
+ free_thread_local(data);
+ }
+}
+
} // namespace Task
} // namespace BLI
diff --git a/source/blender/simulations/bparticles/simulate.cpp b/source/blender/simulations/bparticles/simulate.cpp
index f00b79a7cdb..6a02519bd80 100644
--- a/source/blender/simulations/bparticles/simulate.cpp
+++ b/source/blender/simulations/bparticles/simulate.cpp
@@ -517,51 +517,6 @@ struct ThreadLocalData {
}
};
-struct SimulateTimeSpanData {
- ArrayRef<ParticlesBlock *> blocks;
- ArrayRef<float> all_durations;
- float end_time;
- BlockAllocators &block_allocators;
- StepDescription &step_description;
-
- std::mutex data_per_thread_mutex;
- SmallMap<uint, ThreadLocalData *> data_per_thread;
-};
-
-BLI_NOINLINE static void simulate_block_time_span_cb(void *__restrict userdata,
- const int index,
- const ParallelRangeTLS *__restrict tls)
-{
- SimulateTimeSpanData *data = (SimulateTimeSpanData *)userdata;
-
- ThreadLocalData *my_data;
- {
- std::lock_guard<std::mutex> lock(data->data_per_thread_mutex);
- if (!data->data_per_thread.contains(tls->thread_id)) {
- ThreadLocalData *new_data = new ThreadLocalData(BLOCK_SIZE,
- data->block_allocators.new_allocator());
- data->data_per_thread.add_new(tls->thread_id, new_data);
- }
-
- my_data = data->data_per_thread.lookup(tls->thread_id);
- }
-
- BlockAllocator &block_allocator = my_data->block_allocator;
- FixedArrayAllocator &array_allocator = my_data->array_allocator;
-
- ParticlesBlock &block = *data->blocks[index];
- ParticlesState &state = block_allocator.particles_state();
- uint particle_type_id = state.particle_container_id(block.container());
- ParticleType &particle_type = data->step_description.particle_type(particle_type_id);
-
- simulate_block(array_allocator,
- block_allocator,
- block,
- particle_type,
- data->all_durations.take_back(block.active_amount()),
- data->end_time);
-}
-
BLI_NOINLINE static void simulate_blocks_for_time_span(BlockAllocators &block_allocators,
ArrayRef<ParticlesBlock *> blocks,
StepDescription &step_description,
@@ -571,69 +526,33 @@ BLI_NOINLINE static void simulate_blocks_for_time_span(BlockAllocators &block_al
return;
}
- ParallelRangeSettings settings;
- BLI_parallel_range_settings_defaults(&settings);
- settings.use_threading = USE_THREADING;
-
uint block_size = blocks[0]->container().block_size();
SmallVector<float> all_durations(block_size);
all_durations.fill(time_span.duration());
- SimulateTimeSpanData data = {
- blocks, all_durations, time_span.end(), block_allocators, step_description, {}, {}};
-
- BLI_task_parallel_range(0, blocks.size(), (void *)&data, simulate_block_time_span_cb, &settings);
-
- for (ThreadLocalData *local_data : data.data_per_thread.values()) {
- delete local_data;
- }
-}
-
-struct SimulateFromBirthData {
- ArrayRef<ParticlesBlock *> blocks;
- float end_time;
- BlockAllocators &block_allocators;
- StepDescription &step_description;
-
- std::mutex data_per_thread_mutex;
- SmallMap<uint, ThreadLocalData *> data_per_thread;
-};
-
-BLI_NOINLINE static void simulate_block_from_birth_cb(void *__restrict userdata,
- const int index,
- const ParallelRangeTLS *__restrict tls)
-{
- SimulateFromBirthData *data = (SimulateFromBirthData *)userdata;
-
- ThreadLocalData *my_data;
- {
- std::lock_guard<std::mutex> lock(data->data_per_thread_mutex);
- if (!data->data_per_thread.contains(tls->thread_id)) {
- ThreadLocalData *new_data = new ThreadLocalData(BLOCK_SIZE,
- data->block_allocators.new_allocator());
- data->data_per_thread.add_new(tls->thread_id, new_data);
- }
-
- my_data = data->data_per_thread.lookup(tls->thread_id);
- }
-
- FixedArrayAllocator &array_allocator = my_data->array_allocator;
- BlockAllocator &block_allocator = my_data->block_allocator;
-
- ParticlesBlock &block = *data->blocks[index];
- ParticlesState &state = block_allocator.particles_state();
-
- uint particle_type_id = state.particle_container_id(block.container());
- ParticleType &particle_type = data->step_description.particle_type(particle_type_id);
-
- uint active_amount = block.active_amount();
- SmallVector<float> durations(active_amount);
- auto birth_times = block.slice_active().get_float("Birth Time");
- for (uint i = 0; i < active_amount; i++) {
- durations[i] = data->end_time - birth_times[i];
- }
- simulate_block(
- array_allocator, block_allocator, block, particle_type, durations, data->end_time);
+ BLI::Task::parallel_array_elements(
+ blocks,
+ /* Process individual element. */
+ [&step_description, &all_durations, time_span](ParticlesBlock *block,
+ ThreadLocalData *local_data) {
+ ParticlesState &state = local_data->block_allocator.particles_state();
+ uint particle_type_id = state.particle_container_id(block->container());
+ ParticleType &particle_type = step_description.particle_type(particle_type_id);
+
+ simulate_block(local_data->array_allocator,
+ local_data->block_allocator,
+ *block,
+ particle_type,
+ ArrayRef<float>(all_durations).take_back(block->active_amount()),
+ time_span.end());
+ },
+ /* Create thread-local data. */
+ [&block_allocators]() {
+ return new ThreadLocalData(BLOCK_SIZE, block_allocators.new_allocator());
+ },
+ /* Free thread-local data. */
+ [](ThreadLocalData *local_data) { delete local_data; },
+ USE_THREADING);
}
BLI_NOINLINE static void simulate_blocks_from_birth_to_current_time(
@@ -646,17 +565,34 @@ BLI_NOINLINE static void simulate_blocks_from_birth_to_current_time(
return;
}
- ParallelRange
@@ Diff output truncated at 10240 characters. @@
More information about the Bf-blender-cvs
mailing list