[Bf-blender-cvs] [0e8bfe1f596] cycles-x: Cycles X: Reduce memory usage when denoising in multi-device render

Sergey Sharybin noreply at git.blender.org
Tue Jul 6 11:16:07 CEST 2021


Commit: 0e8bfe1f5969c0a1ee1aabcbe8a6ae8627ad971c
Author: Sergey Sharybin
Date:   Mon Jul 5 15:35:17 2021 +0200
Branches: cycles-x
https://developer.blender.org/rB0e8bfe1f5969c0a1ee1aabcbe8a6ae8627ad971c

Cycles X: Reduce memory usage when denoising in multi-device render

The idea is to create a full big tile buffer on the actual device
which will be used for denoising. This avoids OptiX creating a yet
another copy of the render buffers on the actual device.

Mainly moving some lines around from DeviceDenoiser to Denoiser
to make logic more accessible by all denoisers, and in the path
tracer.

Assume allocation is cheaper than data transfer, so that some TODOs
are marked as done.

It's possible to reduce memory even further by allowing OIDN and
OptiX to modify the copy of the render buffers in-place, as it can
be thrown away. Considering this an independent further improvement
which is not tackled in this change.

Differential Revision: https://developer.blender.org/D11814

===================================================================

M	intern/cycles/integrator/denoiser.cpp
M	intern/cycles/integrator/denoiser.h
M	intern/cycles/integrator/denoiser_device.cpp
M	intern/cycles/integrator/denoiser_device.h
M	intern/cycles/integrator/denoiser_oidn.cpp
M	intern/cycles/integrator/denoiser_oidn.h
M	intern/cycles/integrator/denoiser_optix.cpp
M	intern/cycles/integrator/denoiser_optix.h
M	intern/cycles/integrator/path_trace.cpp

===================================================================

diff --git a/intern/cycles/integrator/denoiser.cpp b/intern/cycles/integrator/denoiser.cpp
index 76f781971ae..768f805ce63 100644
--- a/intern/cycles/integrator/denoiser.cpp
+++ b/intern/cycles/integrator/denoiser.cpp
@@ -21,19 +21,20 @@
 #include "integrator/denoiser_optix.h"
 #include "render/buffers.h"
 #include "util/util_logging.h"
+#include "util/util_progress.h"
 
 CCL_NAMESPACE_BEGIN
 
-unique_ptr<Denoiser> Denoiser::create(Device *device, const DenoiseParams &params)
+unique_ptr<Denoiser> Denoiser::create(Device *path_trace_device, const DenoiseParams &params)
 {
   DCHECK(params.use);
 
   switch (params.type) {
     case DENOISER_OPTIX:
-      return make_unique<OptiXDenoiser>(device, params);
+      return make_unique<OptiXDenoiser>(path_trace_device, params);
 
     case DENOISER_OPENIMAGEDENOISE:
-      return make_unique<OIDNDenoiser>(device, params);
+      return make_unique<OIDNDenoiser>(path_trace_device, params);
 
     case DENOISER_NUM:
     case DENOISER_NONE:
@@ -47,7 +48,8 @@ unique_ptr<Denoiser> Denoiser::create(Device *device, const DenoiseParams &param
   return nullptr;
 }
 
-Denoiser::Denoiser(Device *device, const DenoiseParams &params) : device_(device), params_(params)
+Denoiser::Denoiser(Device *path_trace_device, const DenoiseParams &params)
+    : path_trace_device_(path_trace_device), params_(params)
 {
   DCHECK(params.use);
 }
@@ -69,4 +71,137 @@ const DenoiseParams &Denoiser::get_params() const
   return params_;
 }
 
+bool Denoiser::load_kernels(Progress *progress)
+{
+  const Device *denoiser_device = ensure_denoiser_device(progress);
+
+  if (!denoiser_device) {
+    path_trace_device_->set_error("No device available to denoise on");
+    return false;
+  }
+
+  VLOG(3) << "Will denoise on " << denoiser_device->info.description << " ("
+          << denoiser_device->info.id << ")";
+
+  return true;
+}
+
+Device *Denoiser::get_denoiser_device() const
+{
+  return denoiser_device_;
+}
+
+/* Check whether given device is single (not a MultiDevice) and supports requested denoiser. */
+static bool is_single_supported_device(Device *device, DenoiserType type)
+{
+  if (device->info.type == DEVICE_MULTI) {
+    /* Assume multi-device is never created with a single sub-device.
+     * If one requests such configuration it should be checked on the session level. */
+    return false;
+  }
+
+  if (!device->info.multi_devices.empty()) {
+    /* Some configurations will use multi_devices, but keep the type of an individual device.
+     * This does simplify checks for homogenous setups, but here we really need a single device. */
+    return false;
+  }
+
+  /* Check the denoiser type is supported. */
+  return (device->info.denoisers & type);
+}
+
+/* Find best suitable device to perform denoiser on. Will iterate over possible sub-devices of
+ * multi-device.
+ *
+ * If there is no device available which supports given denoiser type nullptr is returned. */
+static Device *find_best_device(Device *device, DenoiserType type)
+{
+  Device *best_device = nullptr;
+
+  device->foreach_device([&](Device *sub_device) {
+    if ((sub_device->info.denoisers & type) == 0) {
+      return;
+    }
+    if (!best_device) {
+      best_device = sub_device;
+    }
+    else {
+      /* TODO(sergey): Choose fastest device from available ones. Taking into account performance
+       * of the device and data transfer cost. */
+    }
+  });
+
+  return best_device;
+}
+
+static unique_ptr<Device> create_denoiser_device(Device *path_trace_device,
+                                                 const uint device_type_mask)
+{
+  const vector<DeviceInfo> device_infos = Device::available_devices(device_type_mask);
+  if (device_infos.empty()) {
+    return nullptr;
+  }
+
+  /* TODO(sergey): Use one of the already configured devices, so that OptiX denoising can happen on
+   * a physical CUDA device which is already used for rendering. */
+
+  /* TODO(sergey): Choose fastest device for denoising. */
+
+  const DeviceInfo denoiser_device_info = device_infos.front();
+
+  unique_ptr<Device> denoiser_device(
+      Device::create(denoiser_device_info, path_trace_device->stats, path_trace_device->profiler));
+
+  if (!denoiser_device) {
+    return nullptr;
+  }
+
+  if (denoiser_device->have_error()) {
+    return nullptr;
+  }
+
+  /* Only need denoising feature, everything else is unused. */
+  DeviceRequestedFeatures denoising_features;
+  denoising_features.use_denoising = true;
+  denoising_features.use_path_tracing = false;
+  if (!denoiser_device->load_kernels(denoising_features)) {
+    return nullptr;
+  }
+
+  return denoiser_device;
+}
+
+Device *Denoiser::ensure_denoiser_device(Progress *progress)
+{
+  /* The best device has been found already, avoid sequential lookups.
+   * Additionally, avoid device re-creation if it has failed once. */
+  if (denoiser_device_ || device_creation_attempted_) {
+    return denoiser_device_;
+  }
+
+  /* Simple case: rendering happens on a single device which also supports denoiser. */
+  if (is_single_supported_device(path_trace_device_, params_.type)) {
+    denoiser_device_ = path_trace_device_;
+    return denoiser_device_;
+  }
+
+  /* Find best device from the ones which are already used for rendering. */
+  denoiser_device_ = find_best_device(path_trace_device_, params_.type);
+  if (denoiser_device_) {
+    return denoiser_device_;
+  }
+
+  if (progress) {
+    progress->set_status("Loading denoising kernels (may take a few minutes the first time)");
+  }
+
+  device_creation_attempted_ = true;
+
+  const uint device_type_mask = get_device_type_mask();
+  local_denoiser_device_ = create_denoiser_device(path_trace_device_, device_type_mask);
+  denoiser_device_ = local_denoiser_device_.get();
+
+  return denoiser_device_;
+}
+
 CCL_NAMESPACE_END
diff --git a/intern/cycles/integrator/denoiser.h b/intern/cycles/integrator/denoiser.h
index 07ede300434..cac799f02ca 100644
--- a/intern/cycles/integrator/denoiser.h
+++ b/intern/cycles/integrator/denoiser.h
@@ -38,23 +38,24 @@ class Progress;
  * TODO(sergey): Are we better with device or a queue here? */
 class Denoiser {
  public:
-  virtual ~Denoiser() = default;
-
-  void set_params(const DenoiseParams &params);
-  const DenoiseParams &get_params() const;
-
-  /* Create denoiser for the given device.
+  /* Create denoiser for the given path trace device.
+   *
    * Notes:
    * - The denoiser must be configured. This means that `params.use` must be true.
    *   This is checked in debug builds.
    * - The device might be MultiDevice. */
-  static unique_ptr<Denoiser> create(Device *device, const DenoiseParams &params);
+  static unique_ptr<Denoiser> create(Device *path_trace_device, const DenoiseParams &params);
+
+  virtual ~Denoiser() = default;
+
+  void set_params(const DenoiseParams &params);
+  const DenoiseParams &get_params() const;
 
   /* Create devices and load kernels needed for denoising.
    * The progress is used to communicate state when kenrels actually needs to be loaded.
    *
    * NOTE: The `progress` is an optional argument, can be nullptr. */
-  virtual void load_kernels(Progress *progress) = 0;
+  virtual bool load_kernels(Progress *progress);
 
   /* Denoise the entire buffer.
    *
@@ -74,25 +75,41 @@ class Denoiser {
                               RenderBuffers *render_buffers,
                               const int num_samples) = 0;
 
-  /* Get access to the device information which is used to perform actual denoising.
-   * Note that this device:
+  /* Get a device which is used to perform actual denoising.
+   *
+   * Notes:
    *
-   * - Can be different from the device used during denoiser creation. This happens, for example,
-   *   when using OptiX denoiser and rendering on CPU.
+   * - The device is lazily initialized via `load_kernels()`, so it will be nullptr until then,
    *
-   * - The denoising device is lazily initialized, so if no denoising was perfoemed yet it is
-   *   possible that device info of type DEVICE_NONE will be returned.
+   * - The device can be different from the path tracing device. This happens, for example, when
+   *   using OptiX denoiser and rendering on CPU.
    *
    * - No threading safety is ensured in this call. This means, that it is up to caller to ensure
    *   that there is no threadingconflict between denoising task lazily initializing the device and
    *   access to this device happen. */
-  virtual DeviceInfo get_denoiser_device_info() const = 0;
+  Device *get_denoiser_device() const;
 
  protected:
-  Denoiser(Device *device, const DenoiseParams &params);
+  Denoiser(Device *path_trace_device, const DenoiseParams &params);
 
-  Device *device_;
+  /* Make sure denoising device is initialized. */
+  virtual Device *ensure_denoiser_device(Progress *progress);
+
+  /* Get device type mask which is used to filter available devices when new device needs to be
+   * created. */
+  virtual uint get_device_type_mask() const = 0;
+
+  Device *path_trace_device_;
   DenoiseParams params_;
+
+  /* Cached pointer to the device on which denoising will happen.
+   * Used to avoid lookup of a device for every denoising request. */
+  Device *denoiser_device_ = nullptr;
+
+  /* Denoiser device which was created to perform denoising in the case the none of the rendering
+   * devices are capable of denoising. */
+  unique_ptr<Device> local_denoiser_device_;
+  bool device_creation_attempted_ = false;
 };
 
 CCL_NAMESPACE_END
diff --git a/intern/cycles/integrator/denoiser_device.cpp b/intern/cycles/integrator/denoiser_device.cpp
index 8b11947a028..186b1f690b4 100644
--- a/intern/cycles/integrator/denoiser_device.cpp
+++ b/intern/cycles/integrator/denoiser_device.cpp
@@ -26,8 +26,8 @@
 
 CCL_NAMESPACE_BEGIN
 
-DeviceDenoiser::DeviceDenoiser(Device *device, const DenoiseParams &params)
-    : Denoiser(device, params)
+DeviceDenoiser::DeviceDenoiser(Device *path_trace_device, const DenoiseParams &params)
+    : Denoiser(path_trace_device, params)
 {
 }
 
@@ -36,158 +36,32 @@ DeviceDenoiser::~DeviceDenoiser()
   /* Explicit implementation, to allow forward declaration of Device in the header. */
 }
 
-void DeviceDenoiser::load_kernels(Progress *progress)
-{
-  get_denoiser

@@ Diff output truncated at 10240 characters. @@



More information about the Bf-blender-cvs mailing list