[Bf-blender-cvs] [a6eae9213f2] cycles-x: Cycles X: Multi-device re-balancing
Sergey Sharybin
noreply at git.blender.org
Fri Jul 2 18:01:22 CEST 2021
Commit: a6eae9213f237650b289751df37e3e4b67360c31
Author: Sergey Sharybin
Date: Wed Jun 30 18:19:33 2021 +0200
Branches: cycles-x
https://developer.blender.org/rBa6eae9213f237650b289751df37e3e4b67360c31
Cycles X: Multi-device re-balancing
This is an initial implementation which seems to give better
device utilization here when using two non-matched GPUs, as
well as multi-GPU and CPU.
General idea is to balance amount of work based on an
observed performance of devices, and "re-slice" the big tile.
Things which are known to be not final but considered a further
development:
- The balancing algorithm might need some tweaks for the
objective function and weight modification to converge to
the ideal balance quicker.
- The "re-slicing" might also be optimized memory-wise.
- Headless rendering needs to give few iterations of smaller
works to allow multi-device to settle down in the balance.
The balancing logic is in own little file, which simplifies
process of experiments.
Differential Revision: https://developer.blender.org/D11774
===================================================================
M intern/cycles/integrator/path_trace.cpp
M intern/cycles/integrator/render_scheduler.cpp
M intern/cycles/integrator/work_balancer.cpp
===================================================================
diff --git a/intern/cycles/integrator/path_trace.cpp b/intern/cycles/integrator/path_trace.cpp
index 366030df6aa..3d434c5beda 100644
--- a/intern/cycles/integrator/path_trace.cpp
+++ b/intern/cycles/integrator/path_trace.cpp
@@ -516,23 +516,56 @@ void PathTrace::update_display(const RenderWork &render_work)
void PathTrace::rebalance(const RenderWork &render_work)
{
+ static const int kLogLevel = 3;
+
+ scoped_timer timer;
+
+ const int num_works = path_trace_works_.size();
+
if (!render_work.rebalance) {
return;
}
- if (path_trace_works_.size() == 1) {
- VLOG(3) << "Ignoring rebalance work due to single device render.";
+ if (num_works == 1) {
+ VLOG(kLogLevel) << "Ignoring rebalance work due to single device render.";
return;
}
- VLOG(3) << "Perform rebalance work.";
+ if (VLOG_IS_ON(kLogLevel)) {
+ VLOG(kLogLevel) << "Perform rebalance work.";
+ VLOG(kLogLevel) << "Per-device path tracing time (seconds):";
+ for (int i = 0; i < num_works; ++i) {
+ VLOG(kLogLevel) << path_trace_works_[i]->get_device()->info.description << ": "
+ << work_balance_infos_[i].time_spent;
+ }
+ }
+
+ const bool did_rebalance = work_balance_do_rebalance(work_balance_infos_);
- if (!work_balance_do_rebalance(work_balance_infos_)) {
- VLOG(3) << "Balance in path trace works did not change.";
+ if (VLOG_IS_ON(kLogLevel)) {
+ VLOG(kLogLevel) << "Calculated per-device weights for works:";
+ for (int i = 0; i < num_works; ++i) {
+ LOG(INFO) << path_trace_works_[i]->get_device()->info.description << ": "
+ << work_balance_infos_[i].weight;
+ }
+ }
+
+ if (!did_rebalance) {
+ VLOG(kLogLevel) << "Balance in path trace works did not change.";
return;
}
- /* TODO(sergey): Update buffer allocation, and copy data across devices as needed. */
+ TempCPURenderBuffers big_tile_cpu_buffers(device_);
+ big_tile_cpu_buffers.buffers->reset(render_state_.effective_big_tile_params);
+
+ copy_to_render_buffers(big_tile_cpu_buffers.buffers.get());
+
+ render_state_.need_reset_params = true;
+ update_work_buffer_params_if_needed(render_work);
+
+ copy_from_render_buffers(big_tile_cpu_buffers.buffers.get());
+
+ VLOG(kLogLevel) << "Rebalance time (seconds): " << timer.get_time();
}
void PathTrace::cancel()
diff --git a/intern/cycles/integrator/render_scheduler.cpp b/intern/cycles/integrator/render_scheduler.cpp
index 14b1ba69fdd..e9586075a5b 100644
--- a/intern/cycles/integrator/render_scheduler.cpp
+++ b/intern/cycles/integrator/render_scheduler.cpp
@@ -712,7 +712,7 @@ bool RenderScheduler::work_need_rebalance()
{
/* This is the minimum time, as the rebalancing can not happen more often than the path trace
* work. */
- static const double kRebalanceIntervalInSeconds = 5;
+ static const double kRebalanceIntervalInSeconds = 1;
if (state_.resolution_divider != pixel_size_) {
/* Don't rebalance at a non-final resolution divider. Some reasons for this:
@@ -721,6 +721,10 @@ bool RenderScheduler::work_need_rebalance()
return false;
}
+ if (state_.num_rendered_samples == 1) {
+ return true;
+ }
+
return (time_dt() - state_.last_rebalance_time) > kRebalanceIntervalInSeconds;
}
diff --git a/intern/cycles/integrator/work_balancer.cpp b/intern/cycles/integrator/work_balancer.cpp
index b2a69866982..3edb8ba5598 100644
--- a/intern/cycles/integrator/work_balancer.cpp
+++ b/intern/cycles/integrator/work_balancer.cpp
@@ -16,6 +16,8 @@
#include "integrator/work_balancer.h"
+#include "util/util_math.h"
+
CCL_NAMESPACE_BEGIN
void work_balance_do_initial(vector<WorkBalanceInfo> &work_balance_infos)
@@ -27,17 +29,108 @@ void work_balance_do_initial(vector<WorkBalanceInfo> &work_balance_infos)
return;
}
+ /* There is no statistics available, so start with an equal distribution. */
const double weight = 1.0 / num_infos;
for (WorkBalanceInfo &balance_info : work_balance_infos) {
balance_info.weight = weight;
}
}
+/* Calculate time which takes for every work to complete a unit of work.
+ * The result times are normalized so that their sum is 1. */
+static vector<double> calculate_normalized_times_per_unit(
+ const vector<WorkBalanceInfo> &work_balance_infos)
+{
+ const int num_infos = work_balance_infos.size();
+
+ vector<double> times_per_unit;
+ times_per_unit.reserve(num_infos);
+
+ double total_time_per_unit = 0;
+ for (const WorkBalanceInfo &work_balance_info : work_balance_infos) {
+ /* The work did `total_work * weight`, and the time per unit is
+ * `time_spent / (total_work * weight). The total amount of work is not known here, but it will
+ * gets cancelled out during normalization anyway.
+ *
+ * Note that in some degenerated cases (when amount of work is smaller than amount of workers)
+ * it is possible that the time and/or weight of the work is 0. */
+ const double time_per_unit = work_balance_info.weight != 0 ?
+ work_balance_info.time_spent / work_balance_info.weight :
+ 0;
+ times_per_unit.push_back(time_per_unit);
+ total_time_per_unit += time_per_unit;
+ }
+
+ const double total_time_per_unit_inv = 1.0 / total_time_per_unit;
+ for (double &time_per_unit : times_per_unit) {
+ time_per_unit *= total_time_per_unit_inv;
+ }
+
+ return times_per_unit;
+}
+
+/* Calculate weights for the more ideal distribution of work.
+ * The calculation here is based on an observed performance of every worker: the amount of work
+ * scheduler is proportional to the performance of the worker. Performance of the worker is an
+ * inverse of the time-per-unit-work. */
+static vector<double> calculate_normalized_weights(
+ const vector<WorkBalanceInfo> &work_balance_infos)
+{
+ const int num_infos = work_balance_infos.size();
+
+ const vector<double> times_per_unit = calculate_normalized_times_per_unit(work_balance_infos);
+
+ vector<double> weights;
+ weights.reserve(num_infos);
+
+ double total_weight = 0;
+ for (double time_per_unit : times_per_unit) {
+ /* Note that in some degenerated cases (when amount of work is smaller than amount of workers)
+ * it is possible that the time and/or weight of the work is 0. */
+ const double weight = time_per_unit != 0 ? 1.0 / time_per_unit : 0;
+ total_weight += weight;
+ weights.push_back(weight);
+ }
+
+ const double total_weight_inv = 1.0 / total_weight;
+ for (double &weight : weights) {
+ weight *= total_weight_inv;
+ }
+
+ return weights;
+}
+
+static bool apply_new_weights(vector<WorkBalanceInfo> &work_balance_infos,
+ const vector<double> &new_weights)
+{
+ const int num_infos = work_balance_infos.size();
+
+ bool has_big_difference = false;
+ for (int i = 0; i < num_infos; ++i) {
+ /* Apparently, there is no `ccl::fabs()`. */
+ if (std::fabs(work_balance_infos[i].weight - new_weights[i]) > 0.02) {
+ has_big_difference = true;
+ }
+ }
+
+ if (!has_big_difference) {
+ return false;
+ }
+
+ for (int i = 0; i < num_infos; ++i) {
+ WorkBalanceInfo &info = work_balance_infos[i];
+ info.weight = new_weights[i];
+ info.time_spent = 0;
+ }
+
+ return true;
+}
+
bool work_balance_do_rebalance(vector<WorkBalanceInfo> &work_balance_infos)
{
- /* TODO(sergey): Needs implementation. */
- (void)work_balance_infos;
- return false;
+ const vector<double> new_weights = calculate_normalized_weights(work_balance_infos);
+
+ return apply_new_weights(work_balance_infos, new_weights);
}
CCL_NAMESPACE_END
More information about the Bf-blender-cvs
mailing list