[Bf-blender-cvs] [8988fcc4943] cycles-x: Cycles X: Reduce memory usage of OptiX denoiser and CPU render

Sergey Sharybin noreply at git.blender.org
Mon Jul 12 15:03:53 CEST 2021


Commit: 8988fcc49436226c247a5a99b3486906d1738a64
Author: Sergey Sharybin
Date:   Fri Jul 9 17:46:10 2021 +0200
Branches: cycles-x
https://developer.blender.org/rB8988fcc49436226c247a5a99b3486906d1738a64

Cycles X: Reduce memory usage of OptiX denoiser and CPU render

Allow to modify a temporary buffer created in the DeviceDenoiser.

Moved denoised pass copy to a more accessible function. It is not
very flexible yet, and performance might be improved, but it fits
current usage and is only used from a much more compute-heavy steps
of the render.

Differential Revision: https://developer.blender.org/D11874

===================================================================

M	intern/cycles/integrator/denoiser_device.cpp
M	intern/cycles/integrator/path_trace_work.cpp
M	intern/cycles/render/buffers.cpp
M	intern/cycles/render/buffers.h

===================================================================

diff --git a/intern/cycles/integrator/denoiser_device.cpp b/intern/cycles/integrator/denoiser_device.cpp
index 05cc400f02a..d843700c323 100644
--- a/intern/cycles/integrator/denoiser_device.cpp
+++ b/intern/cycles/integrator/denoiser_device.cpp
@@ -86,17 +86,17 @@ void DeviceDenoiser::denoise_buffer(const BufferParams &buffer_params,
     queue->copy_to_device(local_render_buffers.buffer);
 
     task.render_buffers = &local_render_buffers;
+    task.allow_inplace_modification = true;
   }
 
   denoiser_device->denoise_buffer(task);
 
   if (local_buffer_used) {
-    /* TODO(sergey): Only copy denoised passes. This will also allow to reduce memory usage by
-     * allowing in-place modification of the temporary render buffer. */
     local_render_buffers.copy_from_device();
-    memcpy(render_buffers->buffer.data(),
-           local_render_buffers.buffer.data(),
-           sizeof(float) * local_render_buffers.buffer.size());
+
+    render_buffers_host_copy_denoised(
+        render_buffers, buffer_params, &local_render_buffers, local_render_buffers.params);
+
     render_buffers->copy_to_device();
   }
 }
diff --git a/intern/cycles/integrator/path_trace_work.cpp b/intern/cycles/integrator/path_trace_work.cpp
index b569b7b6100..fe53b470fa6 100644
--- a/intern/cycles/integrator/path_trace_work.cpp
+++ b/intern/cycles/integrator/path_trace_work.cpp
@@ -115,46 +115,11 @@ void PathTraceWork::copy_from_render_buffers(const RenderBuffers *render_buffers
 void PathTraceWork::copy_from_denoised_render_buffers(const RenderBuffers *render_buffers)
 {
   const int64_t width = effective_buffer_params_.width;
-  const int64_t height = effective_buffer_params_.height;
-  const int64_t pass_stride = effective_buffer_params_.pass_stride;
-  const int64_t row_stride = width * pass_stride;
-  const int64_t num_pixels = width * height;
-
   const int64_t offset_y = effective_buffer_params_.full_y - effective_big_tile_params_.full_y;
-  const int64_t offset_in_floats = offset_y * row_stride;
+  const int64_t offset = offset_y * width;
 
-  const float *src = render_buffers->buffer.data() + offset_in_floats;
-  float *dst = buffers_->buffer.data();
-
-  /* Gather pass offsets which are to be copied. */
-  /* TODO(sergey): Somehow de-duplicate logic with OptiX and OpenImage denoisers, so that we don't
-   * have duplicated list of passes in multiple places. */
-  const PassType pass_types[] = {
-      PASS_COMBINED, PASS_SHADOW_CATCHER, PASS_SHADOW_CATCHER_MATTE, PASS_NONE};
-  int pass_offsets[PASS_NUM];
-  int num_passes = 0;
-  for (int i = 0; i < PASS_NUM; ++i) {
-    if (pass_types[i] == PASS_NONE) {
-      break;
-    }
-    pass_offsets[i] = render_buffers->params.get_pass_offset(pass_types[i], PassMode::DENOISED);
-    ++num_passes;
-  }
-
-  for (int i = 0; i < num_pixels; ++i, src += pass_stride, dst += pass_stride) {
-    for (int pass_offset_idx = 0; pass_offset_idx < num_passes; ++pass_offset_idx) {
-      const int pass_offset = pass_offsets[pass_offset_idx];
-      if (pass_offset == PASS_UNUSED) {
-        continue;
-      }
-
-      /* TODO(sergey): Support non-RGBA passes. */
-      dst[pass_offset + 0] = src[pass_offset + 0];
-      dst[pass_offset + 1] = src[pass_offset + 1];
-      dst[pass_offset + 2] = src[pass_offset + 2];
-      dst[pass_offset + 3] = src[pass_offset + 3];
-    }
-  }
+  render_buffers_host_copy_denoised(
+      buffers_.get(), effective_buffer_params_, render_buffers, effective_buffer_params_, offset);
 
   copy_render_buffers_to_device();
 }
diff --git a/intern/cycles/render/buffers.cpp b/intern/cycles/render/buffers.cpp
index 5ce31e4e308..31fe7378924 100644
--- a/intern/cycles/render/buffers.cpp
+++ b/intern/cycles/render/buffers.cpp
@@ -70,11 +70,11 @@ void BufferParams::update_passes(vector<Pass> &passes)
   for (const Pass &pass : passes) {
     const int index = pass_to_index(pass);
 
-    if (pass_offset_[index] == PASS_UNUSED) {
-      pass_offset_[index] = pass_stride;
-    }
-
     if (pass.is_written()) {
+      if (pass_offset_[index] == PASS_UNUSED) {
+        pass_offset_[index] = pass_stride;
+      }
+
       pass_stride += pass.get_info().num_components;
     }
   }
@@ -183,4 +183,71 @@ void RenderBuffers::copy_to_device()
   buffer.copy_to_device();
 }
 
+void render_buffers_host_copy_denoised(RenderBuffers *dst,
+                                       const BufferParams &dst_params,
+                                       const RenderBuffers *src,
+                                       const BufferParams &src_params,
+                                       const size_t src_offset)
+{
+  DCHECK_EQ(dst_params.width, src_params.width);
+  /* TODO(sergey): More sanity checks to avoid buffer overrun. */
+
+  /* Create a map of pass ofsets to be copied.
+   * Assume offsets are different to allow copying passes between buffers with different set of
+   * passes. */
+
+  struct {
+    int dst_offset;
+    int src_offset;
+  } pass_offsets[PASS_NUM];
+
+  int num_passes = 0;
+
+  for (int i = 0; i < PASS_NUM; ++i) {
+    const PassType pass_type = static_cast<PassType>(i);
+
+    const int dst_pass_offset = dst_params.get_pass_offset(pass_type, PassMode::DENOISED);
+    if (dst_pass_offset == PASS_UNUSED) {
+      continue;
+    }
+
+    const int src_pass_offset = src_params.get_pass_offset(pass_type, PassMode::DENOISED);
+    if (src_pass_offset == PASS_UNUSED) {
+      continue;
+    }
+
+    pass_offsets[num_passes].dst_offset = dst_pass_offset;
+    pass_offsets[num_passes].src_offset = src_pass_offset;
+    ++num_passes;
+  }
+
+  /* Copy passes. */
+  /* TODO(sergey): Make it more reusable, allowing implement copy of noisy passes. */
+
+  const int64_t dst_width = dst_params.width;
+  const int64_t dst_height = dst_params.height;
+  const int64_t dst_pass_stride = dst_params.pass_stride;
+  const int64_t dst_num_pixels = dst_width * dst_height;
+
+  const int64_t src_pass_stride = src_params.pass_stride;
+  const int64_t src_offset_in_floats = src_offset * src_pass_stride;
+
+  const float *src_pixel = src->buffer.data() + src_offset_in_floats;
+  float *dst_pixel = dst->buffer.data();
+
+  for (int i = 0; i < dst_num_pixels;
+       ++i, src_pixel += src_pass_stride, dst_pixel += dst_pass_stride) {
+    for (int pass_offset_idx = 0; pass_offset_idx < num_passes; ++pass_offset_idx) {
+      const int dst_pass_offset = pass_offsets[pass_offset_idx].dst_offset;
+      const int src_pass_offset = pass_offsets[pass_offset_idx].src_offset;
+
+      /* TODO(sergey): Support non-RGBA passes. */
+      dst_pixel[dst_pass_offset + 0] = src_pixel[src_pass_offset + 0];
+      dst_pixel[dst_pass_offset + 1] = src_pixel[src_pass_offset + 1];
+      dst_pixel[dst_pass_offset + 2] = src_pixel[src_pass_offset + 2];
+      dst_pixel[dst_pass_offset + 3] = src_pixel[src_pass_offset + 3];
+    }
+  }
+}
+
 CCL_NAMESPACE_END
diff --git a/intern/cycles/render/buffers.h b/intern/cycles/render/buffers.h
index 93ea5393554..8077a30b87a 100644
--- a/intern/cycles/render/buffers.h
+++ b/intern/cycles/render/buffers.h
@@ -98,6 +98,21 @@ class RenderBuffers {
   void copy_to_device();
 };
 
+/* Copy denoised passes form source to destination.
+ *
+ * Buffer parameters are provided explicitly, allowing to copy pixelks between render buffers which
+ * content corresponds to a render result at a non-unit resolution divider.
+ *
+ * `src_offset` allows to offset source pixel index which is used when a fraction of the source
+ * buffer is to be copied.
+ *
+ * Copy happens of the number of pixels in the destination. */
+void render_buffers_host_copy_denoised(RenderBuffers *dst,
+                                       const BufferParams &dst_params,
+                                       const RenderBuffers *src,
+                                       const BufferParams &src_params,
+                                       const size_t src_offset = 0);
+
 /* Render Tile
  * Rendering task on a buffer */



More information about the Bf-blender-cvs mailing list