[Bf-blender-cvs] [062287596e1] cycles-x: Cycles X: Reduce OIDN memory usage for shadow catcher and multi-device
Sergey Sharybin
noreply at git.blender.org
Thu Jul 8 09:06:11 CEST 2021
Commit: 062287596e1c06bf8fa09c942959725d72647b16
Author: Sergey Sharybin
Date: Wed Jul 7 10:35:16 2021 +0200
Branches: cycles-x
https://developer.blender.org/rB062287596e1c06bf8fa09c942959725d72647b16
Cycles X: Reduce OIDN memory usage for shadow catcher and multi-device
Read compositing passes in-place, avoiding extra memory allocation for
the OIDN pass.
Differential Revision: https://developer.blender.org/D11840
===================================================================
M intern/cycles/integrator/denoiser_oidn.cpp
M intern/cycles/integrator/pass_accessor.h
M intern/cycles/integrator/pass_accessor_cpu.cpp
===================================================================
diff --git a/intern/cycles/integrator/denoiser_oidn.cpp b/intern/cycles/integrator/denoiser_oidn.cpp
index 9f3e4b0b1de..12a0a1bad69 100644
--- a/intern/cycles/integrator/denoiser_oidn.cpp
+++ b/intern/cycles/integrator/denoiser_oidn.cpp
@@ -146,8 +146,10 @@ class OIDNPass {
/* For the scaled passes, the data which holds values of scaled pixels. */
array<float> scaled_buffer;
- /* For the in-place usable passes denotes whether the underlying data has been scaled. */
- bool is_scaled = false;
+ /* For the in-place usable passes denotes whether the data is prepared to be used as-is.
+ * For example, for compositing passes this means that the compositing result has been read,
+ * and for scaling passes means that scaling has been performed. */
+ bool is_inplace_ready = false;
};
class OIDNDenoiseContext {
@@ -240,14 +242,8 @@ class OIDNDenoiseContext {
stride * pass_stride * sizeof(float));
}
- void read_pass_pixels(OIDNPass &oidn_pass)
+ void read_pass_pixels(OIDNPass &oidn_pass, const PassAccessor::Destination &destination)
{
- const int64_t width = buffer_params_.width;
- const int64_t height = buffer_params_.height;
-
- array<float> &scaled_buffer = oidn_pass.scaled_buffer;
- scaled_buffer.resize(width * height * 3);
-
PassAccessor::PassAccessInfo pass_access_info;
pass_access_info.type = oidn_pass.type;
pass_access_info.mode = oidn_pass.mode;
@@ -263,17 +259,50 @@ class OIDNDenoiseContext {
* by users. What is important is to use same exposure for read and write access of the pass
* pixels. */
const PassAccessorCPU pass_accessor(pass_access_info, 1.0f, num_samples_);
- const PassAccessor::Destination destination(scaled_buffer.data(), 3);
pass_accessor.get_render_tile_pixels(render_buffers_, buffer_params_, destination);
}
- void set_pass_scaled(OIDNPass &oidn_pass)
+ void read_pass_pixels_inplace_if_needed(OIDNPass &oidn_pass)
+ {
+ if (oidn_pass.is_inplace_ready) {
+ return;
+ }
+ oidn_pass.is_inplace_ready = true;
+
+ float *buffer_data = render_buffers_->buffer.data();
+ float *pass_data = buffer_data + oidn_pass.offset;
+
+ PassAccessor::Destination destination(pass_data, 3);
+ destination.pixel_stride = buffer_params_.pass_stride;
+
+ read_pass_pixels(oidn_pass, destination);
+ }
+
+ void read_pass_pixels_into_buffer_if_needed(OIDNPass &oidn_pass)
{
- if (oidn_pass.scaled_buffer.empty()) {
- read_pass_pixels(oidn_pass);
+ if (!oidn_pass.scaled_buffer.empty()) {
+ return;
}
+ VLOG(3) << "Allocating temporary buffer for pass " << oidn_pass.name << " ("
+ << pass_type_as_string(oidn_pass.type) << ")";
+
+ const int64_t width = buffer_params_.width;
+ const int64_t height = buffer_params_.height;
+
+ array<float> &scaled_buffer = oidn_pass.scaled_buffer;
+ scaled_buffer.resize(width * height * 3);
+
+ const PassAccessor::Destination destination(scaled_buffer.data(), 3);
+
+ read_pass_pixels(oidn_pass, destination);
+ }
+
+ void set_pass_scaled(OIDNPass &oidn_pass)
+ {
+ read_pass_pixels_into_buffer_if_needed(oidn_pass);
+
const int64_t width = buffer_params_.width;
const int64_t height = buffer_params_.height;
@@ -289,23 +318,23 @@ class OIDNDenoiseContext {
void set_pass(OIDNPass &oidn_pass)
{
- if (oidn_pass.use_compositing) {
- /* TODO(sergey): Avoid extra memory for compositing passes. */
- set_pass_scaled(oidn_pass);
- return;
- }
+ const bool use_compositing = oidn_pass.use_compositing;
- /* When adaptive sampling is involved scaling is always needed.
- * If the avoid scaling if there is only one sample, to save up time (so we dont divide buffer
- * by 1). */
- if (pass_sample_count_ == PASS_UNUSED && (!oidn_pass.need_scale || num_samples_ == 1)) {
+ /* Simple case: no compositing is involved, no scaling o9s needed. Reference the pass from the
+ * render buffers without extra compute. */
+ if (!use_compositing && !is_pass_scale_needed(oidn_pass)) {
set_pass_referenced(oidn_pass);
return;
}
if (allow_inplace_modification_) {
set_pass_referenced(oidn_pass);
- scale_pass_if_needed(oidn_pass);
+ if (use_compositing) {
+ read_pass_pixels_inplace_if_needed(oidn_pass);
+ }
+ else {
+ scale_pass_inplace_if_needed(oidn_pass);
+ }
return;
}
@@ -384,15 +413,37 @@ class OIDNDenoiseContext {
}
}
- void scale_pass_if_needed(OIDNPass &oidn_pass)
+ bool is_pass_scale_needed(OIDNPass &oidn_pass) const
{
+ if (oidn_pass.is_inplace_ready) {
+ return false;
+ }
+
+ if (pass_sample_count_ != PASS_UNUSED) {
+ /* With adaptive sampling pixels will have different number of samples in them, so need to
+ * always scale the pass to make pixels uniformly sampled. */
+ return true;
+ }
+
if (!oidn_pass.need_scale) {
- return;
+ return false;
}
- if (oidn_pass.is_scaled) {
+
+ if (num_samples_ == 1) {
+ /* If the avoid scaling if there is only one sample, to save up time (so we dont divide
+ * buffer by 1). */
+ return false;
+ }
+
+ return true;
+ }
+
+ void scale_pass_inplace_if_needed(OIDNPass &oidn_pass)
+ {
+ if (!is_pass_scale_needed(oidn_pass)) {
return;
}
- oidn_pass.is_scaled = true;
+ oidn_pass.is_inplace_ready = true;
const int64_t x = buffer_params_.full_x;
const int64_t y = buffer_params_.full_y;
diff --git a/intern/cycles/integrator/pass_accessor.h b/intern/cycles/integrator/pass_accessor.h
index 401917e744a..9610ba0d1fb 100644
--- a/intern/cycles/integrator/pass_accessor.h
+++ b/intern/cycles/integrator/pass_accessor.h
@@ -76,6 +76,9 @@ class PassAccessor {
/* Offset in pixels from the beginning of pixels storage.
* Allows to get pixels of render buffer into a partial slice of the destination. */
int offset = 0;
+
+ /* Number of floats per pixel. When zero is the same as `num_components`. */
+ int pixel_stride = 0;
};
class Source {
diff --git a/intern/cycles/integrator/pass_accessor_cpu.cpp b/intern/cycles/integrator/pass_accessor_cpu.cpp
index 7591d8bf643..af6890f304b 100644
--- a/intern/cycles/integrator/pass_accessor_cpu.cpp
+++ b/intern/cycles/integrator/pass_accessor_cpu.cpp
@@ -98,14 +98,15 @@ inline void PassAccessorCPU::run_get_pass_kernel_processor_float(
const Processor &processor) const
{
const float *buffer_data = render_buffers->buffer.data();
+ const int pixel_stride = destination.pixel_stride ? destination.pixel_stride :
+ destination.num_components;
tbb::parallel_for(0, buffer_params.height, [&](int y) {
int64_t pixel_index = int64_t(y) * buffer_params.width;
for (int x = 0; x < buffer_params.width; ++x, ++pixel_index) {
const int64_t input_pixel_offset = pixel_index * buffer_params.pass_stride;
const float *buffer = buffer_data + input_pixel_offset;
- float *pixel = destination.pixels +
- (pixel_index + destination.offset) * destination.num_components;
+ float *pixel = destination.pixels + (pixel_index + destination.offset) * pixel_stride;
processor(kfilm_convert, buffer, pixel);
}
More information about the Bf-blender-cvs
mailing list