[Bf-blender-cvs] [8d860d4a9b3] functions: use shared particle allocator instead of one per thread

Fri Sep 20 16:38:24 CEST 2019

Commit: 8d860d4a9b3ca4c0bd7b97d3087caf63ce7f22ba
Author: Jacques Lucke
Date:   Fri Sep 20 16:00:26 2019 +0200
Branches: functions
https://developer.blender.org/rB8d860d4a9b3ca4c0bd7b97d3087caf63ce7f22ba

use shared particle allocator instead of one per thread

===================================================================

M	source/blender/blenlib/BLI_task_cxx.h
M	source/blender/simulations/bparticles/particle_allocator.cpp
M	source/blender/simulations/bparticles/particle_allocator.hpp
M	source/blender/simulations/bparticles/simulate.cpp

===================================================================

diff --git a/source/blender/blenlib/BLI_task_cxx.h b/source/blender/blenlib/BLI_task_cxx.h
index 74fcb2a7c78..045df2ebdda 100644
--- a/source/blender/blenlib/BLI_task_cxx.h
+++ b/source/blender/blenlib/BLI_task_cxx.h
@@ -44,7 +44,7 @@ static void parallel_array_elements(ArrayRef<T> array,
                                     bool use_threading = true)
 {
   if (!use_threading) {
-    for (T &element : array) {
+    for (const T &element : array) {
       process_element(element);
     }
     return;
@@ -66,70 +66,12 @@ static void parallel_array_elements(ArrayRef<T> array,
                              const int index,
                              const TaskParallelTLS *__restrict UNUSED(tls)) {
                             ParallelData &data = *(ParallelData *)userdata;
-                            T &element = data.array[index];
+                            const T &element = data.array[index];
                             data.process_element(element);
                           },
                           &settings);
 }
 
-template<typename T, typename ProcessElement, typename CreateThreadLocal, typename FreeThreadLocal>
-static void parallel_array_elements(ArrayRef<T> array,
-                                    ProcessElement process_element,
-                                    CreateThreadLocal create_thread_local,
-                                    FreeThreadLocal free_thread_local,
-                                    bool use_threading = true)
-{
-  using LocalData = decltype(create_thread_local());
-
-  if (!use_threading) {
-    LocalData local_data = create_thread_local();
-    for (const T &element : array) {
-      process_element(element, local_data);
-    }
-    free_thread_local(local_data);
-    return;
-  }
-
-  TaskParallelSettings settings;
-  BLI_parallel_range_settings_defaults(&settings);
-  settings.scheduling_mode = TASK_SCHEDULING_STATIC;
-  settings.min_iter_per_thread = 1;
-
-  struct ParallelData {
-    ArrayRef<T> array;
-    ProcessElement &process_element;
-    CreateThreadLocal &create_thread_local;
-    Map<int, LocalData> thread_locals;
-    std::mutex thread_locals_mutex;
-  } data = {array, process_element, create_thread_local, {}, {}};
-
-  BLI_task_parallel_range(
-      0,
-      array.size(),
-      (void *)&data,
-      [](void *__restrict userdata, const int index, const TaskParallelTLS *__restrict tls) {
-        ParallelData &data = *(ParallelData *)userdata;
-        int thread_id = tls->thread_id;
-
-        data.thread_locals_mutex.lock();
-        LocalData *local_data_ptr = data.thread_locals.lookup_ptr(thread_id);
-        LocalData local_data = (local_data_ptr == nullptr) ? data.create_thread_local() :
-                                                             *local_data_ptr;
-        if (local_data_ptr == nullptr) {
-          data.thread_locals.add_new(thread_id, local_data);
-        }
-        data.thread_locals_mutex.unlock();
-
-        const T &element = data.array[index];
-        data.process_element(element, local_data);
-      },
-      &settings);
-
-  for (LocalData local_data : data.thread_locals.values()) {
-    free_thread_local(local_data);
-  }
-}
-
 template<typename ProcessRange>
 static void parallel_range(IndexRange total_range,
                            uint chunk_size,
diff --git a/source/blender/simulations/bparticles/particle_allocator.cpp b/source/blender/simulations/bparticles/particle_allocator.cpp
index 45ef8c507ff..77cf965508a 100644
--- a/source/blender/simulations/bparticles/particle_allocator.cpp
+++ b/source/blender/simulations/bparticles/particle_allocator.cpp
@@ -6,83 +6,71 @@ ParticleAllocator::ParticleAllocator(ParticlesState &state) : m_state(state)
 {
 }
 
-AttributesBlock &ParticleAllocator::get_non_full_block(AttributesBlockContainer &container)
+void ParticleAllocator::allocate_buffer_ranges(AttributesBlockContainer &container,
+                                               uint size,
+                                               Vector<ArrayRef<void *>> &r_buffers,
+                                               Vector<IndexRange> &r_ranges)
 {
-  AttributesBlock *cached_block = m_non_full_cache.lookup_default(&container, nullptr);
-  if (cached_block != nullptr) {
-    if (cached_block->remaining_capacity() > 0) {
-      return *cached_block;
-    }
-
-    m_non_full_cache.remove(&container);
-  }
-
-  AttributesBlock *block = container.new_block();
-  m_non_full_cache.add_new(&container, block);
-  m_allocated_blocks.append(block);
-  return *block;
-}
-
-void ParticleAllocator::allocate_block_ranges(StringRef particle_system_name,
-                                              uint size,
-                                              Vector<AttributesBlock *> &r_blocks,
-                                              Vector<IndexRange> &r_ranges)
-{
-  AttributesBlockContainer &container = m_state.particle_container(particle_system_name);
+  std::lock_guard<std::mutex> lock(m_request_mutex);
 
   uint remaining_size = size;
   while (remaining_size > 0) {
-    AttributesBlock &block = this->get_non_full_block(container);
-
-    uint size_to_use = std::min(block.remaining_capacity(), remaining_size);
-    IndexRange range(block.size(), size_to_use);
-    block.set_size(block.size() + size_to_use);
-
-    r_blocks.append(&block);
-    r_ranges.append(range);
-
-    this->initialize_new_particles(block, container, range);
-
-    remaining_size -= size_to_use;
+    AttributesBlock *cached_block = m_non_full_cache.lookup_default(&container, nullptr);
+    if (cached_block != nullptr) {
+      uint remaining_in_block = cached_block->remaining_capacity();
+      BLI_assert(remaining_in_block > 0);
+      uint size_to_use = std::min(remaining_size, remaining_in_block);
+
+      IndexRange range(cached_block->size(), size_to_use);
+      r_buffers.append(cached_block->as_ref__all().buffers());
+      r_ranges.append(range);
+      remaining_size -= size_to_use;
+
+      cached_block->set_size(range.one_after_last());
+      if (cached_block->remaining_capacity() == 0) {
+        m_non_full_cache.remove(&container);
+      }
+      continue;
+    }
+    else {
+      AttributesBlock *new_block = container.new_block();
+      m_non_full_cache.add_new(&container, new_block);
+      m_allocated_blocks.append(new_block);
+    }
   }
 }
 
-void ParticleAllocator::initialize_new_particles(AttributesBlock &block,
-                                                 AttributesBlockContainer &container,
-                                                 IndexRange pindices)
+void ParticleAllocator::initialize_new_particles(AttributesBlockContainer &container,
+                                                 AttributesRefGroup &attributes_group)
 {
-  AttributesRef attributes = block.as_ref().slice(pindices);
-  for (uint i : attributes.info().attribute_indices()) {
-    attributes.init_default(i);
-  }
+  for (AttributesRef attributes : attributes_group) {
+    for (uint i : attributes.info().attribute_indices()) {
+      attributes.init_default(i);
+    }
 
-  MutableArrayRef<int32_t> particle_ids = block.as_ref__all().get<int32_t>("ID");
-  IndexRange new_ids = container.new_ids(pindices.size());
-  for (uint i = 0; i < pindices.size(); i++) {
-    uint pindex = pindices[i];
-    particle_ids[pindex] = new_ids[i];
+    MutableArrayRef<int32_t> particle_ids = attributes.get<int32_t>("ID");
+    IndexRange new_ids = container.new_ids(attributes.size());
+    BLI_assert(particle_ids.size() == new_ids.size());
+    for (uint i = 0; i < new_ids.size(); i++) {
+      particle_ids[i] = new_ids[i];
+    }
   }
 }
 
-const AttributesInfo &ParticleAllocator::attributes_info(StringRef particle_system_name)
-{
-  return m_state.particle_container(particle_system_name).attributes_info();
-}
-
 AttributesRefGroup ParticleAllocator::request(StringRef particle_system_name, uint size)
 {
-  Vector<AttributesBlock *> blocks;
+  AttributesBlockContainer &container = m_state.particle_container(particle_system_name);
+
+  Vector<ArrayRef<void *>> buffers;
   Vector<IndexRange> ranges;
-  this->allocate_block_ranges(particle_system_name, size, blocks, ranges);
+  this->allocate_buffer_ranges(container, size, buffers, ranges);
 
-  const AttributesInfo &attributes_info = this->attributes_info(particle_system_name);
+  const AttributesInfo &attributes_info = container.attributes_info();
+  AttributesRefGroup attributes_group(attributes_info, std::move(buffers), std::move(ranges));
 
-  Vector<ArrayRef<void *>> buffers;
-  for (uint i = 0; i < blocks.size(); i++) {
-    buffers.append(blocks[i]->as_ref().buffers());
-  }
+  this->initialize_new_particles(container, attributes_group);
 
-  return AttributesRefGroup(attributes_info, std::move(buffers), std::move(ranges));
+  return attributes_group;
 }
 
 }  // namespace BParticles
diff --git a/source/blender/simulations/bparticles/particle_allocator.hpp b/source/blender/simulations/bparticles/particle_allocator.hpp
index c63ce4bc6c7..79b41363e61 100644
--- a/source/blender/simulations/bparticles/particle_allocator.hpp
+++ b/source/blender/simulations/bparticles/particle_allocator.hpp
@@ -6,28 +6,24 @@ namespace BParticles {
 
 using BKE::AttributesRefGroup;
 
-/**
- * This class allows allocating new blocks from different particle containers.
- * A single instance is not thread safe, but multiple allocator instances can
- * be used by multiple threads at the same time.
- * It might hand out the same block more than once until it is full.
- */
-class ParticleAllocator {
+class ParticleAllocator : BLI::NonCopyable, BLI::NonMovable {
  private:
   ParticlesState &m_state;
   Map<AttributesBlockContainer *, AttributesBlock *> m_non_full_cache;
   Vector<AttributesBlock *> m_allocated_blocks;
+  std::mutex m_request_mutex;
 
  public:
   ParticleAllocator(ParticlesState &state);
-  ParticleAllocator(ParticleAllocator &other) = delete;
-  ParticleAllocator(ParticleAllocator &&other) = delete;
 
   /**
    * Access all blocks that have been allocated by this allocator.
    */
   ArrayRef<AttributesBlock *> allocated_blocks();
 
+  /**
+   * Get memory buffers for new particles.
+   */
   AttributesRefGroup request(StringRef particle_

@@ Diff output truncated at 10240 characters. @@