[Bf-blender-cvs] [e80b8934381] cycles-x: Cycles X: Ground work for work balancing

Sergey Sharybin noreply at git.blender.org
Fri Jul 2 18:01:22 CEST 2021


Commit: e80b8934381d8ce52924aed71a389788cd1ff0c6
Author: Sergey Sharybin
Date:   Wed Jun 30 16:37:43 2021 +0200
Branches: cycles-x
https://developer.blender.org/rBe80b8934381d8ce52924aed71a389788cd1ff0c6

Cycles X: Ground work for work balancing

No actual logic implementation yet, just taking gradual steps towards
the desired goal.

===================================================================

M	intern/cycles/integrator/CMakeLists.txt
M	intern/cycles/integrator/path_trace.cpp
M	intern/cycles/integrator/path_trace.h
M	intern/cycles/integrator/render_scheduler.cpp
M	intern/cycles/integrator/render_scheduler.h
A	intern/cycles/integrator/work_balancer.cpp
A	intern/cycles/integrator/work_balancer.h

===================================================================

diff --git a/intern/cycles/integrator/CMakeLists.txt b/intern/cycles/integrator/CMakeLists.txt
index e8950445bb7..bfabd35d7c3 100644
--- a/intern/cycles/integrator/CMakeLists.txt
+++ b/intern/cycles/integrator/CMakeLists.txt
@@ -32,6 +32,7 @@ set(SRC
   path_trace_work_gpu.cpp
   render_scheduler.cpp
   shader_eval.cpp
+  work_balancer.cpp
   work_tile_scheduler.cpp
 )
 
@@ -51,6 +52,7 @@ set(SRC_HEADERS
   path_trace_work_gpu.h
   render_scheduler.h
   shader_eval.h
+  work_balancer.h
   work_tile_scheduler.h
 )
 
diff --git a/intern/cycles/integrator/path_trace.cpp b/intern/cycles/integrator/path_trace.cpp
index ffbd7b19db6..b2baf60d9d8 100644
--- a/intern/cycles/integrator/path_trace.cpp
+++ b/intern/cycles/integrator/path_trace.cpp
@@ -41,6 +41,9 @@ PathTrace::PathTrace(Device *device, DeviceScene *device_scene, RenderScheduler
     path_trace_works_.emplace_back(
         PathTraceWork::create(path_trace_device, device_scene, &render_cancel_.is_requested));
   });
+
+  work_balance_infos_.resize(path_trace_works_.size());
+  work_balance_do_initial(work_balance_infos_);
 }
 
 void PathTrace::load_kernels()
@@ -139,6 +142,7 @@ void PathTrace::render_pipeline(RenderWork render_work)
   }
 
   update_display(render_work);
+  rebalance(render_work);
 
   progress_update_if_needed();
 
@@ -159,6 +163,7 @@ void PathTrace::render_init_kernel_execution()
  * smaller. */
 template<typename Callback>
 static void foreach_sliced_buffer_params(const vector<unique_ptr<PathTraceWork>> &path_trace_works,
+                                         const vector<WorkBalanceInfo> &work_balance_infos,
                                          const BufferParams &buffer_params,
                                          const Callback &callback)
 {
@@ -167,9 +172,7 @@ static void foreach_sliced_buffer_params(const vector<unique_ptr<PathTraceWork>>
 
   int current_y = 0;
   for (int i = 0; i < num_works; ++i) {
-    /* TODO(sergey): Support adaptive weight based on an observed device performance. */
-    const float weight = 1.0f / num_works;
-
+    const double weight = work_balance_infos[i].weight;
     const int slice_height = max(lround(height * weight), 1);
 
     /* Disallow negative values to deal with situations when there are more compute devices than
@@ -196,6 +199,7 @@ static void foreach_sliced_buffer_params(const vector<unique_ptr<PathTraceWork>>
 void PathTrace::update_allocated_work_buffer_params()
 {
   foreach_sliced_buffer_params(path_trace_works_,
+                               work_balance_infos_,
                                big_tile_params_,
                                [](PathTraceWork *path_trace_work, const BufferParams &params) {
                                  RenderBuffers *buffers = path_trace_work->get_render_buffers();
@@ -227,6 +231,7 @@ void PathTrace::update_effective_work_buffer_params(const RenderWork &render_wor
                                                                   resolution_divider);
 
   foreach_sliced_buffer_params(path_trace_works_,
+                               work_balance_infos_,
                                scaled_big_tile_params,
                                [&](PathTraceWork *path_trace_work, const BufferParams params) {
                                  path_trace_work->set_effective_buffer_params(
@@ -277,9 +282,13 @@ void PathTrace::path_trace(RenderWork &render_work)
 
   const double start_time = time_dt();
 
-  tbb::parallel_for_each(path_trace_works_, [&](unique_ptr<PathTraceWork> &path_trace_work) {
+  const int num_works = path_trace_works_.size();
+  tbb::parallel_for(0, num_works, [&](int i) {
+    const double work_start_time = time_dt();
+    PathTraceWork *path_trace_work = path_trace_works_[i].get();
     path_trace_work->render_samples(render_work.path_trace.start_sample,
                                     render_work.path_trace.num_samples);
+    work_balance_infos_[i].time_spent += time_dt() - work_start_time;
   });
 
   render_scheduler_.report_path_trace_time(
@@ -493,6 +502,27 @@ void PathTrace::update_display(const RenderWork &render_work)
   render_scheduler_.report_display_update_time(render_work, time_dt() - start_time);
 }
 
+void PathTrace::rebalance(const RenderWork &render_work)
+{
+  if (!render_work.rebalance) {
+    return;
+  }
+
+  if (path_trace_works_.size() == 1) {
+    VLOG(3) << "Ignoring rebalance work due to single device render.";
+    return;
+  }
+
+  VLOG(3) << "Perform rebalance work.";
+
+  if (!work_balance_do_rebalance(work_balance_infos_)) {
+    VLOG(3) << "Balance in path trace works did not change.";
+    return;
+  }
+
+  /* TODO(sergey): Update buffer allocation, and copy data across devices as needed. */
+}
+
 void PathTrace::cancel()
 {
   thread_scoped_lock lock(render_cancel_.mutex);
diff --git a/intern/cycles/integrator/path_trace.h b/intern/cycles/integrator/path_trace.h
index 57f9ae885ca..39e81c854e3 100644
--- a/intern/cycles/integrator/path_trace.h
+++ b/intern/cycles/integrator/path_trace.h
@@ -19,6 +19,7 @@
 #include "integrator/denoiser.h"
 #include "integrator/pass_accessor.h"
 #include "integrator/path_trace_work.h"
+#include "integrator/work_balancer.h"
 #include "render/buffers.h"
 #include "util/util_function.h"
 #include "util/util_thread.h"
@@ -180,6 +181,7 @@ class PathTrace {
   void adaptive_sample(RenderWork &render_work);
   void denoise(const RenderWork &render_work);
   void update_display(const RenderWork &render_work);
+  void rebalance(const RenderWork &render_work);
 
   /* Get number of samples in the current state of the render buffers. */
   int get_num_samples_in_buffer();
@@ -209,6 +211,9 @@ class PathTrace {
    * device. */
   vector<unique_ptr<PathTraceWork>> path_trace_works_;
 
+  /* Per-path trace work information needed for multi-device balancing. */
+  vector<WorkBalanceInfo> work_balance_infos_;
+
   /* Render buffer parameters of the  the big tile. */
   BufferParams big_tile_params_;
 
diff --git a/intern/cycles/integrator/render_scheduler.cpp b/intern/cycles/integrator/render_scheduler.cpp
index 8e9d4019899..14b1ba69fdd 100644
--- a/intern/cycles/integrator/render_scheduler.cpp
+++ b/intern/cycles/integrator/render_scheduler.cpp
@@ -115,6 +115,8 @@ void RenderScheduler::reset(const BufferParams &buffer_params, int num_samples)
   state_.last_display_update_time = 0.0;
   state_.last_display_update_sample = -1;
 
+  state_.last_rebalance_time = 0.0;
+
   /* TODO(sergey): Choose better initial value. */
   /* NOTE: The adaptive sampling settings might not be available here yet. */
   state_.adaptive_sampling_threshold = 0.4f;
@@ -201,9 +203,11 @@ RenderWork RenderScheduler::get_render_work()
 {
   check_time_limit_reached();
 
+  const double time_now = time_dt();
+
   if (done()) {
     if (state_.end_render_time == 0.0) {
-      state_.end_render_time = time_dt();
+      state_.end_render_time = time_now;
     }
     return RenderWork();
   }
@@ -241,10 +245,15 @@ RenderWork RenderScheduler::get_render_work()
   /* A fallback display update time, for the case there is an error of display update, or when
    * there is no display at all. */
   if (render_work.update_display) {
-    state_.last_display_update_time = time_dt();
+    state_.last_display_update_time = time_now;
     state_.last_display_update_sample = state_.num_rendered_samples;
   }
 
+  render_work.rebalance = work_need_rebalance();
+  if (render_work.rebalance) {
+    state_.last_rebalance_time = time_now;
+  }
+
   return render_work;
 }
 
@@ -699,6 +708,22 @@ bool RenderScheduler::work_need_update_display(const bool denoiser_delayed)
   return (time_dt() - state_.last_display_update_time) > update_interval;
 }
 
+bool RenderScheduler::work_need_rebalance()
+{
+  /* This is the minimum time, as the rebalancing can not happen more often than the path trace
+   * work. */
+  static const double kRebalanceIntervalInSeconds = 5;
+
+  if (state_.resolution_divider != pixel_size_) {
+    /* Don't rebalance at a non-final resolution divider. Some reasons for this:
+     *  - It will introduce unnecessary during navigation.
+     *  - Per-render device timing information is not very reliable yet. */
+    return false;
+  }
+
+  return (time_dt() - state_.last_rebalance_time) > kRebalanceIntervalInSeconds;
+}
+
 void RenderScheduler::update_start_resolution_divider()
 {
   if (start_resolution_divider_ == 0) {
diff --git a/intern/cycles/integrator/render_scheduler.h b/intern/cycles/integrator/render_scheduler.h
index ffb5ee8c947..8551c806ec6 100644
--- a/intern/cycles/integrator/render_scheduler.h
+++ b/intern/cycles/integrator/render_scheduler.h
@@ -53,6 +53,11 @@ class RenderWork {
   /* Display which is used to visualize render result is to be updated for the new render. */
   bool update_display = false;
 
+  /* Re-balance multi-device scheduling after rendering this work.
+   * Note that the scheduler does not know anything abouce devices, so if there is only a single
+   * device used, then it is up for the PathTracer to ignore the balancing. */
+  bool rebalance = false;
+
   /* Conversion to bool, to simplify checks about whether there is anything to be done for this
    * work. */
   inline operator bool() const
@@ -188,6 +193,9 @@ class RenderScheduler {
    * The `denoiser_delayed` is what `work_need_denoise()` returned as delayed denoiser flag. */
   bool work_need_update_display(const bool denoiser_delayed);
 
+  /* Check whether it is time to perform rebalancing for the render work, */
+  bool work_need_rebalance();
+
   /* Check whether timing of the given work are usable to store timings in the `first_render_time_`
    * for the resolution divider calculation. */
   bool work_is_usable_for_first_render_estimation(const RenderWork &render_work);
@@ -258,6 +266,9 @@ class RenderScheduler {
     /* Value of -1 means display was never updated. */
     int last_display_update_sample = -1;
 
+    /* Point in time at which last rebalance has been performed. */
+    double last_rebalance_time = 0.0;
+
     /* Threshold for adaptive sampling which will be scheduled to work when not using progressive
      * noise floor. */
     float adaptive_sampling_threshold = 0.0f;
diff --git a/intern/cycles/integrator/work_balanc

@@ Diff output truncated at 10240 characters. @@



More information about the Bf-blender-cvs mailing list