[Bf-blender-cvs] [e80b8934381] cycles-x: Cycles X: Ground work for work balancing
Sergey Sharybin
noreply at git.blender.org
Fri Jul 2 18:01:22 CEST 2021
Commit: e80b8934381d8ce52924aed71a389788cd1ff0c6
Author: Sergey Sharybin
Date: Wed Jun 30 16:37:43 2021 +0200
Branches: cycles-x
https://developer.blender.org/rBe80b8934381d8ce52924aed71a389788cd1ff0c6
Cycles X: Ground work for work balancing
No actual logic implementation yet, just taking gradual steps towards
the desired goal.
===================================================================
M intern/cycles/integrator/CMakeLists.txt
M intern/cycles/integrator/path_trace.cpp
M intern/cycles/integrator/path_trace.h
M intern/cycles/integrator/render_scheduler.cpp
M intern/cycles/integrator/render_scheduler.h
A intern/cycles/integrator/work_balancer.cpp
A intern/cycles/integrator/work_balancer.h
===================================================================
diff --git a/intern/cycles/integrator/CMakeLists.txt b/intern/cycles/integrator/CMakeLists.txt
index e8950445bb7..bfabd35d7c3 100644
--- a/intern/cycles/integrator/CMakeLists.txt
+++ b/intern/cycles/integrator/CMakeLists.txt
@@ -32,6 +32,7 @@ set(SRC
path_trace_work_gpu.cpp
render_scheduler.cpp
shader_eval.cpp
+ work_balancer.cpp
work_tile_scheduler.cpp
)
@@ -51,6 +52,7 @@ set(SRC_HEADERS
path_trace_work_gpu.h
render_scheduler.h
shader_eval.h
+ work_balancer.h
work_tile_scheduler.h
)
diff --git a/intern/cycles/integrator/path_trace.cpp b/intern/cycles/integrator/path_trace.cpp
index ffbd7b19db6..b2baf60d9d8 100644
--- a/intern/cycles/integrator/path_trace.cpp
+++ b/intern/cycles/integrator/path_trace.cpp
@@ -41,6 +41,9 @@ PathTrace::PathTrace(Device *device, DeviceScene *device_scene, RenderScheduler
path_trace_works_.emplace_back(
PathTraceWork::create(path_trace_device, device_scene, &render_cancel_.is_requested));
});
+
+ work_balance_infos_.resize(path_trace_works_.size());
+ work_balance_do_initial(work_balance_infos_);
}
void PathTrace::load_kernels()
@@ -139,6 +142,7 @@ void PathTrace::render_pipeline(RenderWork render_work)
}
update_display(render_work);
+ rebalance(render_work);
progress_update_if_needed();
@@ -159,6 +163,7 @@ void PathTrace::render_init_kernel_execution()
* smaller. */
template<typename Callback>
static void foreach_sliced_buffer_params(const vector<unique_ptr<PathTraceWork>> &path_trace_works,
+ const vector<WorkBalanceInfo> &work_balance_infos,
const BufferParams &buffer_params,
const Callback &callback)
{
@@ -167,9 +172,7 @@ static void foreach_sliced_buffer_params(const vector<unique_ptr<PathTraceWork>>
int current_y = 0;
for (int i = 0; i < num_works; ++i) {
- /* TODO(sergey): Support adaptive weight based on an observed device performance. */
- const float weight = 1.0f / num_works;
-
+ const double weight = work_balance_infos[i].weight;
const int slice_height = max(lround(height * weight), 1);
/* Disallow negative values to deal with situations when there are more compute devices than
@@ -196,6 +199,7 @@ static void foreach_sliced_buffer_params(const vector<unique_ptr<PathTraceWork>>
void PathTrace::update_allocated_work_buffer_params()
{
foreach_sliced_buffer_params(path_trace_works_,
+ work_balance_infos_,
big_tile_params_,
[](PathTraceWork *path_trace_work, const BufferParams ¶ms) {
RenderBuffers *buffers = path_trace_work->get_render_buffers();
@@ -227,6 +231,7 @@ void PathTrace::update_effective_work_buffer_params(const RenderWork &render_wor
resolution_divider);
foreach_sliced_buffer_params(path_trace_works_,
+ work_balance_infos_,
scaled_big_tile_params,
[&](PathTraceWork *path_trace_work, const BufferParams params) {
path_trace_work->set_effective_buffer_params(
@@ -277,9 +282,13 @@ void PathTrace::path_trace(RenderWork &render_work)
const double start_time = time_dt();
- tbb::parallel_for_each(path_trace_works_, [&](unique_ptr<PathTraceWork> &path_trace_work) {
+ const int num_works = path_trace_works_.size();
+ tbb::parallel_for(0, num_works, [&](int i) {
+ const double work_start_time = time_dt();
+ PathTraceWork *path_trace_work = path_trace_works_[i].get();
path_trace_work->render_samples(render_work.path_trace.start_sample,
render_work.path_trace.num_samples);
+ work_balance_infos_[i].time_spent += time_dt() - work_start_time;
});
render_scheduler_.report_path_trace_time(
@@ -493,6 +502,27 @@ void PathTrace::update_display(const RenderWork &render_work)
render_scheduler_.report_display_update_time(render_work, time_dt() - start_time);
}
+void PathTrace::rebalance(const RenderWork &render_work)
+{
+ if (!render_work.rebalance) {
+ return;
+ }
+
+ if (path_trace_works_.size() == 1) {
+ VLOG(3) << "Ignoring rebalance work due to single device render.";
+ return;
+ }
+
+ VLOG(3) << "Perform rebalance work.";
+
+ if (!work_balance_do_rebalance(work_balance_infos_)) {
+ VLOG(3) << "Balance in path trace works did not change.";
+ return;
+ }
+
+ /* TODO(sergey): Update buffer allocation, and copy data across devices as needed. */
+}
+
void PathTrace::cancel()
{
thread_scoped_lock lock(render_cancel_.mutex);
diff --git a/intern/cycles/integrator/path_trace.h b/intern/cycles/integrator/path_trace.h
index 57f9ae885ca..39e81c854e3 100644
--- a/intern/cycles/integrator/path_trace.h
+++ b/intern/cycles/integrator/path_trace.h
@@ -19,6 +19,7 @@
#include "integrator/denoiser.h"
#include "integrator/pass_accessor.h"
#include "integrator/path_trace_work.h"
+#include "integrator/work_balancer.h"
#include "render/buffers.h"
#include "util/util_function.h"
#include "util/util_thread.h"
@@ -180,6 +181,7 @@ class PathTrace {
void adaptive_sample(RenderWork &render_work);
void denoise(const RenderWork &render_work);
void update_display(const RenderWork &render_work);
+ void rebalance(const RenderWork &render_work);
/* Get number of samples in the current state of the render buffers. */
int get_num_samples_in_buffer();
@@ -209,6 +211,9 @@ class PathTrace {
* device. */
vector<unique_ptr<PathTraceWork>> path_trace_works_;
+ /* Per-path trace work information needed for multi-device balancing. */
+ vector<WorkBalanceInfo> work_balance_infos_;
+
/* Render buffer parameters of the the big tile. */
BufferParams big_tile_params_;
diff --git a/intern/cycles/integrator/render_scheduler.cpp b/intern/cycles/integrator/render_scheduler.cpp
index 8e9d4019899..14b1ba69fdd 100644
--- a/intern/cycles/integrator/render_scheduler.cpp
+++ b/intern/cycles/integrator/render_scheduler.cpp
@@ -115,6 +115,8 @@ void RenderScheduler::reset(const BufferParams &buffer_params, int num_samples)
state_.last_display_update_time = 0.0;
state_.last_display_update_sample = -1;
+ state_.last_rebalance_time = 0.0;
+
/* TODO(sergey): Choose better initial value. */
/* NOTE: The adaptive sampling settings might not be available here yet. */
state_.adaptive_sampling_threshold = 0.4f;
@@ -201,9 +203,11 @@ RenderWork RenderScheduler::get_render_work()
{
check_time_limit_reached();
+ const double time_now = time_dt();
+
if (done()) {
if (state_.end_render_time == 0.0) {
- state_.end_render_time = time_dt();
+ state_.end_render_time = time_now;
}
return RenderWork();
}
@@ -241,10 +245,15 @@ RenderWork RenderScheduler::get_render_work()
/* A fallback display update time, for the case there is an error of display update, or when
* there is no display at all. */
if (render_work.update_display) {
- state_.last_display_update_time = time_dt();
+ state_.last_display_update_time = time_now;
state_.last_display_update_sample = state_.num_rendered_samples;
}
+ render_work.rebalance = work_need_rebalance();
+ if (render_work.rebalance) {
+ state_.last_rebalance_time = time_now;
+ }
+
return render_work;
}
@@ -699,6 +708,22 @@ bool RenderScheduler::work_need_update_display(const bool denoiser_delayed)
return (time_dt() - state_.last_display_update_time) > update_interval;
}
+bool RenderScheduler::work_need_rebalance()
+{
+ /* This is the minimum time, as the rebalancing can not happen more often than the path trace
+ * work. */
+ static const double kRebalanceIntervalInSeconds = 5;
+
+ if (state_.resolution_divider != pixel_size_) {
+ /* Don't rebalance at a non-final resolution divider. Some reasons for this:
+ * - It will introduce unnecessary during navigation.
+ * - Per-render device timing information is not very reliable yet. */
+ return false;
+ }
+
+ return (time_dt() - state_.last_rebalance_time) > kRebalanceIntervalInSeconds;
+}
+
void RenderScheduler::update_start_resolution_divider()
{
if (start_resolution_divider_ == 0) {
diff --git a/intern/cycles/integrator/render_scheduler.h b/intern/cycles/integrator/render_scheduler.h
index ffb5ee8c947..8551c806ec6 100644
--- a/intern/cycles/integrator/render_scheduler.h
+++ b/intern/cycles/integrator/render_scheduler.h
@@ -53,6 +53,11 @@ class RenderWork {
/* Display which is used to visualize render result is to be updated for the new render. */
bool update_display = false;
+ /* Re-balance multi-device scheduling after rendering this work.
+ * Note that the scheduler does not know anything abouce devices, so if there is only a single
+ * device used, then it is up for the PathTracer to ignore the balancing. */
+ bool rebalance = false;
+
/* Conversion to bool, to simplify checks about whether there is anything to be done for this
* work. */
inline operator bool() const
@@ -188,6 +193,9 @@ class RenderScheduler {
* The `denoiser_delayed` is what `work_need_denoise()` returned as delayed denoiser flag. */
bool work_need_update_display(const bool denoiser_delayed);
+ /* Check whether it is time to perform rebalancing for the render work, */
+ bool work_need_rebalance();
+
/* Check whether timing of the given work are usable to store timings in the `first_render_time_`
* for the resolution divider calculation. */
bool work_is_usable_for_first_render_estimation(const RenderWork &render_work);
@@ -258,6 +266,9 @@ class RenderScheduler {
/* Value of -1 means display was never updated. */
int last_display_update_sample = -1;
+ /* Point in time at which last rebalance has been performed. */
+ double last_rebalance_time = 0.0;
+
/* Threshold for adaptive sampling which will be scheduled to work when not using progressive
* noise floor. */
float adaptive_sampling_threshold = 0.0f;
diff --git a/intern/cycles/integrator/work_balanc
@@ Diff output truncated at 10240 characters. @@
More information about the Bf-blender-cvs
mailing list