[Bf-blender-cvs] [3ea9646c180] cycles-x: Cycles X: Reduce OIDN memory usage with multi-device render

Sergey Sharybin noreply at git.blender.org
Tue Jul 6 17:41:19 CEST 2021


Commit: 3ea9646c1802016d23f3e6b69e06570c0c9b0cc0
Author: Sergey Sharybin
Date:   Tue Jul 6 13:00:57 2021 +0200
Branches: cycles-x
https://developer.blender.org/rB3ea9646c1802016d23f3e6b69e06570c0c9b0cc0

Cycles X: Reduce OIDN memory usage with multi-device render

Allow OIDN to modify render buffers in-place, without allocating
extra temporary buffers.

Currently memory is only saved for non-composited passes (combined,
shadow catcher matte). The composited passes are possible to avoid
memory allocation as well, but requires passing row stride to the
pass accessor which is not yet possible.

Differential Revision: https://developer.blender.org/D11826

===================================================================

M	intern/cycles/integrator/denoiser.h
M	intern/cycles/integrator/denoiser_device.cpp
M	intern/cycles/integrator/denoiser_device.h
M	intern/cycles/integrator/denoiser_oidn.cpp
M	intern/cycles/integrator/denoiser_oidn.h
M	intern/cycles/integrator/path_trace.cpp

===================================================================

diff --git a/intern/cycles/integrator/denoiser.h b/intern/cycles/integrator/denoiser.h
index cac799f02ca..5870da694f9 100644
--- a/intern/cycles/integrator/denoiser.h
+++ b/intern/cycles/integrator/denoiser.h
@@ -70,10 +70,16 @@ class Denoiser {
    *
    * The `num_samples` corresponds to the number of samples in the render buffers. It is used
    * to scale buffers down to the "final" value in algorithms which don't do automatic exposure,
-   * or which needs "final" value for data passes. */
+   * or which needs "final" value for data passes.
+   *
+   * The `allow_inplace_modification` means that the denoiser is allowed to do in-place
+   * modification of the input passes (scaling them down i.e.). This will lower the memory
+   * footprint of the denoiser but will make input passes "invalid" (from path tracer) point of
+   * view. */
   virtual void denoise_buffer(const BufferParams &buffer_params,
                               RenderBuffers *render_buffers,
-                              const int num_samples) = 0;
+                              const int num_samples,
+                              bool allow_inplace_modification) = 0;
 
   /* Get a device which is used to perform actual denoising.
    *
diff --git a/intern/cycles/integrator/denoiser_device.cpp b/intern/cycles/integrator/denoiser_device.cpp
index 186b1f690b4..6d472f9883e 100644
--- a/intern/cycles/integrator/denoiser_device.cpp
+++ b/intern/cycles/integrator/denoiser_device.cpp
@@ -38,8 +38,12 @@ DeviceDenoiser::~DeviceDenoiser()
 
 void DeviceDenoiser::denoise_buffer(const BufferParams &buffer_params,
                                     RenderBuffers *render_buffers,
-                                    const int num_samples)
+                                    const int num_samples,
+                                    bool allow_inplace_modification)
 {
+  /* TODO(sergey): Support in-place modification to lower memory footprint. */
+  (void)allow_inplace_modification;
+
   Device *denoiser_device = get_denoiser_device();
   if (!denoiser_device) {
     return;
diff --git a/intern/cycles/integrator/denoiser_device.h b/intern/cycles/integrator/denoiser_device.h
index 2bf6cb48172..282cee2bfe3 100644
--- a/intern/cycles/integrator/denoiser_device.h
+++ b/intern/cycles/integrator/denoiser_device.h
@@ -33,7 +33,8 @@ class DeviceDenoiser : public Denoiser {
 
   virtual void denoise_buffer(const BufferParams &buffer_params,
                               RenderBuffers *render_buffers,
-                              const int num_samples) override;
+                              const int num_samples,
+                              bool allow_inplace_modification) override;
 };
 
 CCL_NAMESPACE_END
diff --git a/intern/cycles/integrator/denoiser_oidn.cpp b/intern/cycles/integrator/denoiser_oidn.cpp
index 72bd7cfec61..9f3e4b0b1de 100644
--- a/intern/cycles/integrator/denoiser_oidn.cpp
+++ b/intern/cycles/integrator/denoiser_oidn.cpp
@@ -145,6 +145,9 @@ class OIDNPass {
 
   /* For the scaled passes, the data which holds values of scaled pixels. */
   array<float> scaled_buffer;
+
+  /* For the in-place usable passes denotes whether the underlying data has been scaled. */
+  bool is_scaled = false;
 };
 
 class OIDNDenoiseContext {
@@ -153,12 +156,14 @@ class OIDNDenoiseContext {
                      const BufferParams &buffer_params,
                      RenderBuffers *render_buffers,
                      oidn::FilterRef *oidn_filter,
-                     const int num_samples)
+                     const int num_samples,
+                     const bool allow_inplace_modification)
       : denoise_params_(denoise_params),
         buffer_params_(buffer_params),
         render_buffers_(render_buffers),
         oidn_filter_(oidn_filter),
         num_samples_(num_samples),
+        allow_inplace_modification_(allow_inplace_modification),
         pass_sample_count_(buffer_params_.get_pass_offset(PASS_SAMPLE_COUNT))
   {
     if (denoise_params_.use_pass_albedo) {
@@ -285,6 +290,7 @@ class OIDNDenoiseContext {
   void set_pass(OIDNPass &oidn_pass)
   {
     if (oidn_pass.use_compositing) {
+      /* TODO(sergey): Avoid extra memory for compositing passes. */
       set_pass_scaled(oidn_pass);
       return;
     }
@@ -297,6 +303,12 @@ class OIDNDenoiseContext {
       return;
     }
 
+    if (allow_inplace_modification_) {
+      set_pass_referenced(oidn_pass);
+      scale_pass_if_needed(oidn_pass);
+      return;
+    }
+
     set_pass_scaled(oidn_pass);
   }
 
@@ -372,11 +384,55 @@ class OIDNDenoiseContext {
     }
   }
 
+  void scale_pass_if_needed(OIDNPass &oidn_pass)
+  {
+    if (!oidn_pass.need_scale) {
+      return;
+    }
+    if (oidn_pass.is_scaled) {
+      return;
+    }
+    oidn_pass.is_scaled = true;
+
+    const int64_t x = buffer_params_.full_x;
+    const int64_t y = buffer_params_.full_y;
+    const int64_t width = buffer_params_.width;
+    const int64_t height = buffer_params_.height;
+    const int64_t offset = buffer_params_.offset;
+    const int64_t stride = buffer_params_.stride;
+    const int64_t pass_stride = buffer_params_.pass_stride;
+    const int64_t row_stride = stride * pass_stride;
+
+    const int64_t pixel_offset = offset + x + y * stride;
+    const int64_t buffer_offset = (pixel_offset * pass_stride);
+
+    float *buffer_data = render_buffers_->buffer.data();
+
+    const bool has_pass_sample_count = (pass_sample_count_ != PASS_UNUSED);
+
+    for (int y = 0; y < height; ++y) {
+      float *buffer_row = buffer_data + buffer_offset + y * row_stride;
+      for (int x = 0; x < width; ++x) {
+        float *buffer_pixel = buffer_row + x * pass_stride;
+        float *pass_pixel = buffer_pixel + oidn_pass.offset;
+
+        const float pixel_scale = 1.0f / (has_pass_sample_count ?
+                                              __float_as_uint(buffer_pixel[pass_sample_count_]) :
+                                              num_samples_);
+
+        pass_pixel[0] = pass_pixel[0] * pixel_scale;
+        pass_pixel[1] = pass_pixel[1] * pixel_scale;
+        pass_pixel[2] = pass_pixel[2] * pixel_scale;
+      }
+    }
+  }
+
   const DenoiseParams &denoise_params_;
   const BufferParams &buffer_params_;
   RenderBuffers *render_buffers_;
   oidn::FilterRef *oidn_filter_;
   int num_samples_;
+  bool allow_inplace_modification_;
   int pass_sample_count_;
 
   /* Optional albedo and normal passes, reused by denoising of different pass types. */
@@ -389,7 +445,8 @@ class OIDNDenoiseContext {
 
 void OIDNDenoiser::denoise_buffer(const BufferParams &buffer_params,
                                   RenderBuffers *render_buffers,
-                                  const int num_samples)
+                                  const int num_samples,
+                                  bool allow_inplace_modification)
 {
   thread_scoped_lock lock(mutex_);
 
@@ -399,7 +456,12 @@ void OIDNDenoiser::denoise_buffer(const BufferParams &buffer_params,
 #ifdef WITH_OPENIMAGEDENOISE
   oidn::FilterRef *oidn_filter = &state_->oidn_filter;
 
-  OIDNDenoiseContext context(params_, buffer_params, render_buffers, oidn_filter, num_samples);
+  OIDNDenoiseContext context(params_,
+                             buffer_params,
+                             render_buffers,
+                             oidn_filter,
+                             num_samples,
+                             allow_inplace_modification);
   context.denoise(PASS_COMBINED);
   context.denoise(PASS_SHADOW_CATCHER);
   context.denoise(PASS_SHADOW_CATCHER_MATTE);
diff --git a/intern/cycles/integrator/denoiser_oidn.h b/intern/cycles/integrator/denoiser_oidn.h
index dcbd0403f53..91ae38801ea 100644
--- a/intern/cycles/integrator/denoiser_oidn.h
+++ b/intern/cycles/integrator/denoiser_oidn.h
@@ -36,7 +36,8 @@ class OIDNDenoiser : public Denoiser {
 
   virtual void denoise_buffer(const BufferParams &buffer_params,
                               RenderBuffers *render_buffers,
-                              const int num_samples) override;
+                              const int num_samples,
+                              bool allow_inplace_modification) override;
 
  protected:
   virtual uint get_device_type_mask() const override;
diff --git a/intern/cycles/integrator/path_trace.cpp b/intern/cycles/integrator/path_trace.cpp
index c04fb4e4838..87e5a6fb53d 100644
--- a/intern/cycles/integrator/path_trace.cpp
+++ b/intern/cycles/integrator/path_trace.cpp
@@ -418,6 +418,7 @@ void PathTrace::denoise(const RenderWork &render_work)
   RenderBuffers *buffer_to_denoise = nullptr;
 
   unique_ptr<RenderBuffers> multi_devoice_buffers;
+  bool allow_inplace_modification = false;
 
   if (path_trace_works_.size() == 1) {
     buffer_to_denoise = path_trace_works_.front()->get_render_buffers();
@@ -434,10 +435,14 @@ void PathTrace::denoise(const RenderWork &render_work)
     buffer_to_denoise = multi_devoice_buffers.get();
 
     copy_to_render_buffers(multi_devoice_buffers.get());
+
+    allow_inplace_modification = true;
   }
 
-  denoiser_->denoise_buffer(
-      render_state_.effective_big_tile_params, buffer_to_denoise, get_num_samples_in_buffer());
+  denoiser_->denoise_buffer(render_state_.effective_big_tile_params,
+                            buffer_to_denoise,
+                            get_num_samples_in_buffer(),
+                            allow_inplace_modification);
 
   if (multi_devoice_buffers) {
     multi_devoice_buffers->copy_from_device();



More information about the Bf-blender-cvs mailing list