[Bf-blender-cvs] [9485a2cabeb] cycles-x: Cycles X: Only copy denoised passes for multi-device render
Sergey Sharybin
noreply at git.blender.org
Tue Jul 6 12:25:29 CEST 2021
Commit: 9485a2cabebfb24d49de13606fbbbc862e253d71
Author: Sergey Sharybin
Date: Mon Jul 5 17:35:42 2021 +0200
Branches: cycles-x
https://developer.blender.org/rB9485a2cabebfb24d49de13606fbbbc862e253d71
Cycles X: Only copy denoised passes for multi-device render
No functional changes, and timing of the denoising process should be
quite the same. The change opens the doors to allow denoisers to modify
data in-place, avoiding extra allocation in the denoisers, lowering
memory peak of the denoising process.
Differential Revision: https://developer.blender.org/D11815
===================================================================
M intern/cycles/integrator/path_trace.cpp
M intern/cycles/integrator/path_trace_work.cpp
M intern/cycles/integrator/path_trace_work.h
===================================================================
diff --git a/intern/cycles/integrator/path_trace.cpp b/intern/cycles/integrator/path_trace.cpp
index 8f2aa5fd2e4..c04fb4e4838 100644
--- a/intern/cycles/integrator/path_trace.cpp
+++ b/intern/cycles/integrator/path_trace.cpp
@@ -440,7 +440,11 @@ void PathTrace::denoise(const RenderWork &render_work)
render_state_.effective_big_tile_params, buffer_to_denoise, get_num_samples_in_buffer());
if (multi_devoice_buffers) {
- copy_from_render_buffers(multi_devoice_buffers.get());
+ multi_devoice_buffers->copy_from_device();
+ tbb::parallel_for_each(
+ path_trace_works_, [&multi_devoice_buffers](unique_ptr<PathTraceWork> &path_trace_work) {
+ path_trace_work->copy_from_denoised_render_buffers(multi_devoice_buffers.get());
+ });
}
render_scheduler_.report_denoise_time(render_work, time_dt() - start_time);
diff --git a/intern/cycles/integrator/path_trace_work.cpp b/intern/cycles/integrator/path_trace_work.cpp
index 90ff2f86d3f..b569b7b6100 100644
--- a/intern/cycles/integrator/path_trace_work.cpp
+++ b/intern/cycles/integrator/path_trace_work.cpp
@@ -112,6 +112,53 @@ void PathTraceWork::copy_from_render_buffers(const RenderBuffers *render_buffers
copy_render_buffers_to_device();
}
+void PathTraceWork::copy_from_denoised_render_buffers(const RenderBuffers *render_buffers)
+{
+ const int64_t width = effective_buffer_params_.width;
+ const int64_t height = effective_buffer_params_.height;
+ const int64_t pass_stride = effective_buffer_params_.pass_stride;
+ const int64_t row_stride = width * pass_stride;
+ const int64_t num_pixels = width * height;
+
+ const int64_t offset_y = effective_buffer_params_.full_y - effective_big_tile_params_.full_y;
+ const int64_t offset_in_floats = offset_y * row_stride;
+
+ const float *src = render_buffers->buffer.data() + offset_in_floats;
+ float *dst = buffers_->buffer.data();
+
+ /* Gather pass offsets which are to be copied. */
+ /* TODO(sergey): Somehow de-duplicate logic with OptiX and OpenImage denoisers, so that we don't
+ * have duplicated list of passes in multiple places. */
+ const PassType pass_types[] = {
+ PASS_COMBINED, PASS_SHADOW_CATCHER, PASS_SHADOW_CATCHER_MATTE, PASS_NONE};
+ int pass_offsets[PASS_NUM];
+ int num_passes = 0;
+ for (int i = 0; i < PASS_NUM; ++i) {
+ if (pass_types[i] == PASS_NONE) {
+ break;
+ }
+ pass_offsets[i] = render_buffers->params.get_pass_offset(pass_types[i], PassMode::DENOISED);
+ ++num_passes;
+ }
+
+ for (int i = 0; i < num_pixels; ++i, src += pass_stride, dst += pass_stride) {
+ for (int pass_offset_idx = 0; pass_offset_idx < num_passes; ++pass_offset_idx) {
+ const int pass_offset = pass_offsets[pass_offset_idx];
+ if (pass_offset == PASS_UNUSED) {
+ continue;
+ }
+
+ /* TODO(sergey): Support non-RGBA passes. */
+ dst[pass_offset + 0] = src[pass_offset + 0];
+ dst[pass_offset + 1] = src[pass_offset + 1];
+ dst[pass_offset + 2] = src[pass_offset + 2];
+ dst[pass_offset + 3] = src[pass_offset + 3];
+ }
+ }
+
+ copy_render_buffers_to_device();
+}
+
bool PathTraceWork::get_render_tile_pixels(const PassAccessor &pass_accessor,
const PassAccessor::Destination &destination)
{
diff --git a/intern/cycles/integrator/path_trace_work.h b/intern/cycles/integrator/path_trace_work.h
index b69694c20f0..3ab8e371156 100644
--- a/intern/cycles/integrator/path_trace_work.h
+++ b/intern/cycles/integrator/path_trace_work.h
@@ -91,6 +91,12 @@ class PathTraceWork {
* - Copies work's render buffer to its device. */
void copy_from_render_buffers(const RenderBuffers *render_buffers);
+ /* Special version of the `copy_from_render_buffers()` which only copies denosied passes from the
+ * given render buffers, leaving rest of the passes.
+ *
+ * Same notes about device copying aplies to this call as well. */
+ void copy_from_denoised_render_buffers(const RenderBuffers *render_buffers);
+
/* Copy render buffers to/from device using an appropriate device queue when needed so that
* things are executed in order with the `render_samples()`. */
virtual bool copy_render_buffers_from_device() = 0;
More information about the Bf-blender-cvs
mailing list