[Bf-blender-cvs] [8988fcc4943] cycles-x: Cycles X: Reduce memory usage of OptiX denoiser and CPU render
Sergey Sharybin
noreply at git.blender.org
Mon Jul 12 15:03:53 CEST 2021
Commit: 8988fcc49436226c247a5a99b3486906d1738a64
Author: Sergey Sharybin
Date: Fri Jul 9 17:46:10 2021 +0200
Branches: cycles-x
https://developer.blender.org/rB8988fcc49436226c247a5a99b3486906d1738a64
Cycles X: Reduce memory usage of OptiX denoiser and CPU render
Allow to modify a temporary buffer created in the DeviceDenoiser.
Moved denoised pass copy to a more accessible function. It is not
very flexible yet, and performance might be improved, but it fits
current usage and is only used from a much more compute-heavy steps
of the render.
Differential Revision: https://developer.blender.org/D11874
===================================================================
M intern/cycles/integrator/denoiser_device.cpp
M intern/cycles/integrator/path_trace_work.cpp
M intern/cycles/render/buffers.cpp
M intern/cycles/render/buffers.h
===================================================================
diff --git a/intern/cycles/integrator/denoiser_device.cpp b/intern/cycles/integrator/denoiser_device.cpp
index 05cc400f02a..d843700c323 100644
--- a/intern/cycles/integrator/denoiser_device.cpp
+++ b/intern/cycles/integrator/denoiser_device.cpp
@@ -86,17 +86,17 @@ void DeviceDenoiser::denoise_buffer(const BufferParams &buffer_params,
queue->copy_to_device(local_render_buffers.buffer);
task.render_buffers = &local_render_buffers;
+ task.allow_inplace_modification = true;
}
denoiser_device->denoise_buffer(task);
if (local_buffer_used) {
- /* TODO(sergey): Only copy denoised passes. This will also allow to reduce memory usage by
- * allowing in-place modification of the temporary render buffer. */
local_render_buffers.copy_from_device();
- memcpy(render_buffers->buffer.data(),
- local_render_buffers.buffer.data(),
- sizeof(float) * local_render_buffers.buffer.size());
+
+ render_buffers_host_copy_denoised(
+ render_buffers, buffer_params, &local_render_buffers, local_render_buffers.params);
+
render_buffers->copy_to_device();
}
}
diff --git a/intern/cycles/integrator/path_trace_work.cpp b/intern/cycles/integrator/path_trace_work.cpp
index b569b7b6100..fe53b470fa6 100644
--- a/intern/cycles/integrator/path_trace_work.cpp
+++ b/intern/cycles/integrator/path_trace_work.cpp
@@ -115,46 +115,11 @@ void PathTraceWork::copy_from_render_buffers(const RenderBuffers *render_buffers
void PathTraceWork::copy_from_denoised_render_buffers(const RenderBuffers *render_buffers)
{
const int64_t width = effective_buffer_params_.width;
- const int64_t height = effective_buffer_params_.height;
- const int64_t pass_stride = effective_buffer_params_.pass_stride;
- const int64_t row_stride = width * pass_stride;
- const int64_t num_pixels = width * height;
-
const int64_t offset_y = effective_buffer_params_.full_y - effective_big_tile_params_.full_y;
- const int64_t offset_in_floats = offset_y * row_stride;
+ const int64_t offset = offset_y * width;
- const float *src = render_buffers->buffer.data() + offset_in_floats;
- float *dst = buffers_->buffer.data();
-
- /* Gather pass offsets which are to be copied. */
- /* TODO(sergey): Somehow de-duplicate logic with OptiX and OpenImage denoisers, so that we don't
- * have duplicated list of passes in multiple places. */
- const PassType pass_types[] = {
- PASS_COMBINED, PASS_SHADOW_CATCHER, PASS_SHADOW_CATCHER_MATTE, PASS_NONE};
- int pass_offsets[PASS_NUM];
- int num_passes = 0;
- for (int i = 0; i < PASS_NUM; ++i) {
- if (pass_types[i] == PASS_NONE) {
- break;
- }
- pass_offsets[i] = render_buffers->params.get_pass_offset(pass_types[i], PassMode::DENOISED);
- ++num_passes;
- }
-
- for (int i = 0; i < num_pixels; ++i, src += pass_stride, dst += pass_stride) {
- for (int pass_offset_idx = 0; pass_offset_idx < num_passes; ++pass_offset_idx) {
- const int pass_offset = pass_offsets[pass_offset_idx];
- if (pass_offset == PASS_UNUSED) {
- continue;
- }
-
- /* TODO(sergey): Support non-RGBA passes. */
- dst[pass_offset + 0] = src[pass_offset + 0];
- dst[pass_offset + 1] = src[pass_offset + 1];
- dst[pass_offset + 2] = src[pass_offset + 2];
- dst[pass_offset + 3] = src[pass_offset + 3];
- }
- }
+ render_buffers_host_copy_denoised(
+ buffers_.get(), effective_buffer_params_, render_buffers, effective_buffer_params_, offset);
copy_render_buffers_to_device();
}
diff --git a/intern/cycles/render/buffers.cpp b/intern/cycles/render/buffers.cpp
index 5ce31e4e308..31fe7378924 100644
--- a/intern/cycles/render/buffers.cpp
+++ b/intern/cycles/render/buffers.cpp
@@ -70,11 +70,11 @@ void BufferParams::update_passes(vector<Pass> &passes)
for (const Pass &pass : passes) {
const int index = pass_to_index(pass);
- if (pass_offset_[index] == PASS_UNUSED) {
- pass_offset_[index] = pass_stride;
- }
-
if (pass.is_written()) {
+ if (pass_offset_[index] == PASS_UNUSED) {
+ pass_offset_[index] = pass_stride;
+ }
+
pass_stride += pass.get_info().num_components;
}
}
@@ -183,4 +183,71 @@ void RenderBuffers::copy_to_device()
buffer.copy_to_device();
}
+void render_buffers_host_copy_denoised(RenderBuffers *dst,
+ const BufferParams &dst_params,
+ const RenderBuffers *src,
+ const BufferParams &src_params,
+ const size_t src_offset)
+{
+ DCHECK_EQ(dst_params.width, src_params.width);
+ /* TODO(sergey): More sanity checks to avoid buffer overrun. */
+
+ /* Create a map of pass ofsets to be copied.
+ * Assume offsets are different to allow copying passes between buffers with different set of
+ * passes. */
+
+ struct {
+ int dst_offset;
+ int src_offset;
+ } pass_offsets[PASS_NUM];
+
+ int num_passes = 0;
+
+ for (int i = 0; i < PASS_NUM; ++i) {
+ const PassType pass_type = static_cast<PassType>(i);
+
+ const int dst_pass_offset = dst_params.get_pass_offset(pass_type, PassMode::DENOISED);
+ if (dst_pass_offset == PASS_UNUSED) {
+ continue;
+ }
+
+ const int src_pass_offset = src_params.get_pass_offset(pass_type, PassMode::DENOISED);
+ if (src_pass_offset == PASS_UNUSED) {
+ continue;
+ }
+
+ pass_offsets[num_passes].dst_offset = dst_pass_offset;
+ pass_offsets[num_passes].src_offset = src_pass_offset;
+ ++num_passes;
+ }
+
+ /* Copy passes. */
+ /* TODO(sergey): Make it more reusable, allowing implement copy of noisy passes. */
+
+ const int64_t dst_width = dst_params.width;
+ const int64_t dst_height = dst_params.height;
+ const int64_t dst_pass_stride = dst_params.pass_stride;
+ const int64_t dst_num_pixels = dst_width * dst_height;
+
+ const int64_t src_pass_stride = src_params.pass_stride;
+ const int64_t src_offset_in_floats = src_offset * src_pass_stride;
+
+ const float *src_pixel = src->buffer.data() + src_offset_in_floats;
+ float *dst_pixel = dst->buffer.data();
+
+ for (int i = 0; i < dst_num_pixels;
+ ++i, src_pixel += src_pass_stride, dst_pixel += dst_pass_stride) {
+ for (int pass_offset_idx = 0; pass_offset_idx < num_passes; ++pass_offset_idx) {
+ const int dst_pass_offset = pass_offsets[pass_offset_idx].dst_offset;
+ const int src_pass_offset = pass_offsets[pass_offset_idx].src_offset;
+
+ /* TODO(sergey): Support non-RGBA passes. */
+ dst_pixel[dst_pass_offset + 0] = src_pixel[src_pass_offset + 0];
+ dst_pixel[dst_pass_offset + 1] = src_pixel[src_pass_offset + 1];
+ dst_pixel[dst_pass_offset + 2] = src_pixel[src_pass_offset + 2];
+ dst_pixel[dst_pass_offset + 3] = src_pixel[src_pass_offset + 3];
+ }
+ }
+}
+
CCL_NAMESPACE_END
diff --git a/intern/cycles/render/buffers.h b/intern/cycles/render/buffers.h
index 93ea5393554..8077a30b87a 100644
--- a/intern/cycles/render/buffers.h
+++ b/intern/cycles/render/buffers.h
@@ -98,6 +98,21 @@ class RenderBuffers {
void copy_to_device();
};
+/* Copy denoised passes form source to destination.
+ *
+ * Buffer parameters are provided explicitly, allowing to copy pixelks between render buffers which
+ * content corresponds to a render result at a non-unit resolution divider.
+ *
+ * `src_offset` allows to offset source pixel index which is used when a fraction of the source
+ * buffer is to be copied.
+ *
+ * Copy happens of the number of pixels in the destination. */
+void render_buffers_host_copy_denoised(RenderBuffers *dst,
+ const BufferParams &dst_params,
+ const RenderBuffers *src,
+ const BufferParams &src_params,
+ const size_t src_offset = 0);
+
/* Render Tile
* Rendering task on a buffer */
More information about the Bf-blender-cvs
mailing list