[Bf-blender-cvs] [7a23c8a8cf6] temp-compositor-single-threaded-operation: Compositor: Keep WorkPackages and Data Around.

Fri Apr 2 11:29:17 CEST 2021

Commit: 7a23c8a8cf696ba290e99638886d4a926f5d3ca4
Author: Jeroen Bakker
Date:   Wed Mar 31 12:45:42 2021 +0200
Branches: temp-compositor-single-threaded-operation
https://developer.blender.org/rB7a23c8a8cf696ba290e99638886d4a926f5d3ca4

Compositor: Keep WorkPackages and Data Around.

WorkPackages struct was created when scheduled. This patch keeps the
WorkPackages around and stores additional data with the workpackages.

The speedup is to small to notice, but it is needed as preparation
to introduce a faster scheduling method.

===================================================================

M	source/blender/compositor/COM_compositor.h
M	source/blender/compositor/COM_defines.h
M	source/blender/compositor/intern/COM_CPUDevice.cc
M	source/blender/compositor/intern/COM_ExecutionGroup.cc
M	source/blender/compositor/intern/COM_ExecutionGroup.h
M	source/blender/compositor/intern/COM_OpenCLDevice.cc
M	source/blender/compositor/intern/COM_WorkPackage.cc
M	source/blender/compositor/intern/COM_WorkPackage.h
M	source/blender/compositor/intern/COM_WorkScheduler.cc
M	source/blender/compositor/intern/COM_WorkScheduler.h

===================================================================

diff --git a/source/blender/compositor/COM_compositor.h b/source/blender/compositor/COM_compositor.h
index 8e3caf7aaf5..300a06005ac 100644
--- a/source/blender/compositor/COM_compositor.h
+++ b/source/blender/compositor/COM_compositor.h
@@ -113,11 +113,11 @@ extern "C" {
  *
  * When the chunk-order is determined, the first few chunks will be checked if they can be scheduled.
  * Chunks can have three states:
- *  - [@ref eChunkExecutionState.NOT_SCHEDULED]:
+ *  - [@ref eChunkExecutionState.NotScheduled]:
  *    Chunk is not yet scheduled, or dependencies are not met.
- *  - [@ref eChunkExecutionState.SCHEDULED]:
+ *  - [@ref eChunkExecutionState.Scheduled]:
  *    All dependencies are met, chunk is scheduled, but not finished.
- *  - [@ref eChunkExecutionState.EXECUTED]:
+ *  - [@ref eChunkExecutionState.Executed]:
  *    Chunk is finished.
  *
  * \see ExecutionGroup.execute
diff --git a/source/blender/compositor/COM_defines.h b/source/blender/compositor/COM_defines.h
index b8f10448030..5a5868f1909 100644
--- a/source/blender/compositor/COM_defines.h
+++ b/source/blender/compositor/COM_defines.h
@@ -79,6 +79,25 @@ enum class CompositorPriority {
   Low = 0,
 };
 
+/**
+ * \brief the execution state of a chunk in an ExecutionGroup
+ * \ingroup Execution
+ */
+enum class eChunkExecutionState {
+  /**
+   * \brief chunk is not yet scheduled
+   */
+  NotScheduled = 0,
+  /**
+   * \brief chunk is scheduled, but not yet executed
+   */
+  Scheduled = 1,
+  /**
+   * \brief chunk is executed.
+   */
+  Executed = 2,
+};
+
 // configurable items
 
 // chunk size determination
diff --git a/source/blender/compositor/intern/COM_CPUDevice.cc b/source/blender/compositor/intern/COM_CPUDevice.cc
index e3081ca7bf4..29a82bec636 100644
--- a/source/blender/compositor/intern/COM_CPUDevice.cc
+++ b/source/blender/compositor/intern/COM_CPUDevice.cc
@@ -18,22 +18,22 @@
 
 #include "COM_CPUDevice.h"
 
+#include "COM_ExecutionGroup.h"
+
+#include "BLI_rect.h"
+
 namespace blender::compositor {
 
 CPUDevice::CPUDevice(int thread_id) : m_thread_id(thread_id)
 {
 }
 
-void CPUDevice::execute(WorkPackage *work)
+void CPUDevice::execute(WorkPackage *work_package)
 {
-  const unsigned int chunkNumber = work->chunk_number;
-  ExecutionGroup *executionGroup = work->execution_group;
-  rcti rect;
-
-  executionGroup->determineChunkRect(&rect, chunkNumber);
-
-  executionGroup->getOutputOperation()->executeRegion(&rect, chunkNumber);
+  const unsigned int chunkNumber = work_package->chunk_number;
+  ExecutionGroup *executionGroup = work_package->execution_group;
 
+  executionGroup->getOutputOperation()->executeRegion(&work_package->rect, chunkNumber);
   executionGroup->finalizeChunkExecution(chunkNumber, nullptr);
 }
 
diff --git a/source/blender/compositor/intern/COM_ExecutionGroup.cc b/source/blender/compositor/intern/COM_ExecutionGroup.cc
index 87c9e6e8a69..7ba787e2f52 100644
--- a/source/blender/compositor/intern/COM_ExecutionGroup.cc
+++ b/source/blender/compositor/intern/COM_ExecutionGroup.cc
@@ -123,12 +123,19 @@ NodeOperation *ExecutionGroup::getOutputOperation() const
 
 void ExecutionGroup::initExecution()
 {
-  m_chunk_execution_states.clear();
+  m_work_packages.clear();
   determineNumberOfChunks();
 
   if (this->m_chunks_len != 0) {
-    m_chunk_execution_states.resize(this->m_chunks_len);
-    m_chunk_execution_states.fill(eChunkExecutionState::NOT_SCHEDULED);
+    m_work_packages.resize(this->m_chunks_len);
+    for (unsigned int index = 0; index < m_chunks_len; index++) {
+      m_work_packages[index] = {
+          .state = eChunkExecutionState::NotScheduled,
+          .execution_group = this,
+          .chunk_number = index,
+      };
+      determineChunkRect(&m_work_packages[index].rect, index);
+    }
   }
 
   unsigned int max_offset = 0;
@@ -146,7 +153,7 @@ void ExecutionGroup::initExecution()
 
 void ExecutionGroup::deinitExecution()
 {
-  m_chunk_execution_states.clear();
+  m_work_packages.clear();
   this->m_chunks_len = 0;
   this->m_x_chunks_len = 0;
   this->m_y_chunks_len = 0;
@@ -214,11 +221,10 @@ blender::Array<unsigned int> ExecutionGroup::determine_chunk_execution_order() c
       ChunkOrderHotspot hotspot(border_width * centerX, border_height * centerY, 0.0f);
       blender::Array<ChunkOrder> chunk_orders(m_chunks_len);
       for (index = 0; index < this->m_chunks_len; index++) {
-        rcti rect;
-        determineChunkRect(&rect, index);
+        const WorkPackage &work_package = m_work_packages[index];
         chunk_orders[index].index = index;
-        chunk_orders[index].x = rect.xmin - this->m_viewerBorder.xmin;
-        chunk_orders[index].y = rect.ymin - this->m_viewerBorder.ymin;
+        chunk_orders[index].x = work_package.rect.xmin - this->m_viewerBorder.xmin;
+        chunk_orders[index].y = work_package.rect.ymin - this->m_viewerBorder.ymin;
         chunk_orders[index].update_distance(&hotspot, 1);
       }
 
@@ -252,11 +258,10 @@ blender::Array<unsigned int> ExecutionGroup::determine_chunk_execution_order() c
 
       blender::Array<ChunkOrder> chunk_orders(m_chunks_len);
       for (index = 0; index < this->m_chunks_len; index++) {
-        rcti rect;
-        determineChunkRect(&rect, index);
+        const WorkPackage &work_package = m_work_packages[index];
         chunk_orders[index].index = index;
-        chunk_orders[index].x = rect.xmin - this->m_viewerBorder.xmin;
-        chunk_orders[index].y = rect.ymin - this->m_viewerBorder.ymin;
+        chunk_orders[index].x = work_package.rect.xmin - this->m_viewerBorder.xmin;
+        chunk_orders[index].y = work_package.rect.ymin - this->m_viewerBorder.ymin;
         chunk_orders[index].update_distance(hotspots, 9);
       }
 
@@ -320,8 +325,9 @@ void ExecutionGroup::execute(ExecutionSystem *graph)
       chunk_index = chunk_order[index];
       int yChunk = chunk_index / this->m_x_chunks_len;
       int xChunk = chunk_index - (yChunk * this->m_x_chunks_len);
-      switch (m_chunk_execution_states[chunk_index]) {
-        case eChunkExecutionState::NOT_SCHEDULED: {
+      const WorkPackage &work_package = m_work_packages[chunk_index];
+      switch (work_package.state) {
+        case eChunkExecutionState::NotScheduled: {
           scheduleChunkWhenPossible(graph, xChunk, yChunk);
           finished = false;
           startEvaluated = true;
@@ -332,13 +338,13 @@ void ExecutionGroup::execute(ExecutionSystem *graph)
           }
           break;
         }
-        case eChunkExecutionState::SCHEDULED: {
+        case eChunkExecutionState::Scheduled: {
           finished = false;
           startEvaluated = true;
           numberEvaluated++;
           break;
         }
-        case eChunkExecutionState::EXECUTED: {
+        case eChunkExecutionState::Executed: {
           if (!startEvaluated) {
             startIndex = index + 1;
           }
@@ -358,15 +364,14 @@ void ExecutionGroup::execute(ExecutionSystem *graph)
 
 MemoryBuffer **ExecutionGroup::getInputBuffersOpenCL(int chunkNumber)
 {
-  rcti rect;
-  determineChunkRect(&rect, chunkNumber);
+  WorkPackage &work_package = m_work_packages[chunkNumber];
 
   MemoryBuffer **memoryBuffers = (MemoryBuffer **)MEM_callocN(
       sizeof(MemoryBuffer *) * this->m_max_read_buffer_offset, __func__);
   rcti output;
   for (ReadBufferOperation *readOperation : m_read_operations) {
     MemoryProxy *memoryProxy = readOperation->getMemoryProxy();
-    this->determineDependingAreaOfInterest(&rect, readOperation, &output);
+    this->determineDependingAreaOfInterest(&work_package.rect, readOperation, &output);
     MemoryBuffer *memoryBuffer = memoryProxy->getExecutor()->constructConsolidatedMemoryBuffer(
         *memoryProxy, output);
     memoryBuffers[readOperation->getOffset()] = memoryBuffer;
@@ -385,8 +390,9 @@ MemoryBuffer *ExecutionGroup::constructConsolidatedMemoryBuffer(MemoryProxy &mem
 
 void ExecutionGroup::finalizeChunkExecution(int chunkNumber, MemoryBuffer **memoryBuffers)
 {
-  if (this->m_chunk_execution_states[chunkNumber] == eChunkExecutionState::SCHEDULED) {
-    this->m_chunk_execution_states[chunkNumber] = eChunkExecutionState::EXECUTED;
+  WorkPackage &work_package = m_work_packages[chunkNumber];
+  if (work_package.state == eChunkExecutionState::Scheduled) {
+    work_package.state = eChunkExecutionState::Executed;
   }
 
   atomic_add_and_fetch_u(&this->m_chunks_finished, 1);
@@ -418,7 +424,7 @@ void ExecutionGroup::finalizeChunkExecution(int chunkNumber, MemoryBuffer **memo
   }
 }
 
-inline void ExecutionGroup::determineChunkRect(rcti *rect,
+inline void ExecutionGroup::determineChunkRect(rcti *r_rect,
                                                const unsigned int xChunk,
                                                const unsigned int yChunk) const
 {
@@ -427,14 +433,14 @@ inline void ExecutionGroup::determineChunkRect(rcti *rect,
 
   if (this->m_flags.single_threaded) {
     BLI_rcti_init(
-        rect, this->m_viewerBorder.xmin, border_width, this->m_viewerBorder.ymin, border_height);
+        r_rect, this->m_viewerBorder.xmin, border_width, this->m_viewerBorder.ymin, border_height);
   }
   else {
     const unsigned int minx = xChunk * this->m_chunkSize + this->m_viewerBorder.xmin;
     const unsigned int miny = yChunk * this->m_chunkSize + this->m_viewerBorder.ymin;
     const unsigned int width = MIN2((unsigned int)this->m_viewerBorder.xmax, this->m_width);
     const unsigned int height = MIN2((unsigned int)this->m_viewerBorder.ymax, this->m_height);
-    BLI_rcti_init(rect,
+    BLI_rcti_init(r_rect,
                   MIN2(minx, this->m_width),
                   MIN2(minx + this->m_chunkSize, width),
                   MIN2(miny, this->m_height),
@@ -442,11 +448,11 @@ inline void ExecutionGroup::determineChunkRect(rcti *rect,
   }
 }
 
-void ExecutionGroup::determineChunkRect(rcti *rect, const unsigned int chunkNumber) const
+void ExecutionGroup::determineChunkRect(rcti *r_rect, const unsigned int chunkNumber) const
 {
   const unsigned int yChunk = chunkNumber / this->m_x_chunks_len;
   const unsigned int xChunk = chunkNumber - (yChunk * this->m_x_chunks_len);
-  determineChunkRect(rect, xChunk, yChunk);
+  determineChunkRect(r_rect, xChunk, yChunk);
 }
 
 Memo

@@ Diff output truncated at 10240 characters. @@