[Bf-blender-cvs] [0c5c22f2ce4] cycles-x: Fix missing tiles with GPU render and OIDN denoiser in Cycles X
Sergey Sharybin
noreply at git.blender.org
Tue Aug 31 15:21:30 CEST 2021
Commit: 0c5c22f2ce4a1445a6745121387ad5abc9fbb45c
Author: Sergey Sharybin
Date: Fri Aug 27 11:46:07 2021 +0200
Branches: cycles-x
https://developer.blender.org/rB0c5c22f2ce4a1445a6745121387ad5abc9fbb45c
Fix missing tiles with GPU render and OIDN denoiser in Cycles X
Was noticed when testing upcoming tiled rendering with small tiles
sizes which stresses data transfers more than single tile rendering.
What happens is OIDN denoiser uses `copy_to_device()` using default
queue, which is not guaranteed to be synchronized before next call to
`copy_from_device()` happening via queue.
What we really want in the OIDN denoiser is synchronous data transfer
which is easiest to be achieved by creating a queue and synchronizing
it.
The issue is reported by Alaska in D12309#320253
Differential Revision: https://developer.blender.org/D12333
===================================================================
M intern/cycles/device/cuda/device.cpp
M intern/cycles/device/device.h
M intern/cycles/integrator/denoiser_oidn.cpp
M intern/cycles/integrator/denoiser_oidn.h
===================================================================
diff --git a/intern/cycles/device/cuda/device.cpp b/intern/cycles/device/cuda/device.cpp
index 95b8022aa23..84becd6d081 100644
--- a/intern/cycles/device/cuda/device.cpp
+++ b/intern/cycles/device/cuda/device.cpp
@@ -148,6 +148,8 @@ void device_cuda_info(vector<DeviceInfo> &devices)
info.has_nanovdb = true;
info.denoisers = 0;
+ info.has_gpu_queue = true;
+
/* Check if the device has P2P access to any other device in the system. */
for (int peer_num = 0; peer_num < count && !info.has_peer_memory; peer_num++) {
if (num != peer_num) {
diff --git a/intern/cycles/device/device.h b/intern/cycles/device/device.h
index 1397214c897..02b6edb56d0 100644
--- a/intern/cycles/device/device.h
+++ b/intern/cycles/device/device.h
@@ -75,6 +75,7 @@ class DeviceInfo {
bool has_osl; /* Support Open Shading Language. */
bool has_profiling; /* Supports runtime collection of profiling info. */
bool has_peer_memory; /* GPU has P2P access to memory of another GPU. */
+ bool has_gpu_queue; /* Device supports GPU queue. */
DenoiserTypeMask denoisers; /* Supported denoiser types. */
int cpu_threads;
vector<DeviceInfo> multi_devices;
@@ -92,6 +93,7 @@ class DeviceInfo {
has_osl = false;
has_profiling = false;
has_peer_memory = false;
+ has_gpu_queue = false;
denoisers = DENOISER_NONE;
}
diff --git a/intern/cycles/integrator/denoiser_oidn.cpp b/intern/cycles/integrator/denoiser_oidn.cpp
index 1a573be5367..1b5a012ec87 100644
--- a/intern/cycles/integrator/denoiser_oidn.cpp
+++ b/intern/cycles/integrator/denoiser_oidn.cpp
@@ -19,6 +19,7 @@
#include <array>
#include "device/device.h"
+#include "device/device_queue.h"
#include "integrator/pass_accessor_cpu.h"
#include "render/buffers.h"
#include "util/util_array.h"
@@ -40,12 +41,6 @@ OIDNDenoiser::OIDNDenoiser(Device *path_trace_device, const DenoiseParams ¶m
DCHECK(openimagedenoise_supported()) << "OpenImageDenoiser is not supported on this platform.";
}
-OIDNDenoiser::~OIDNDenoiser()
-{
- /* NOTE: Keep the destructor here so that it has access to the State which is an incomplete as
- * per the OIDN denoiser header. */
-}
-
#ifdef WITH_OPENIMAGEDENOISE
static bool oidn_progress_monitor_function(void *user_ptr, double /*n*/)
{
@@ -547,6 +542,39 @@ class OIDNDenoiseContext {
};
#endif
+static unique_ptr<DeviceQueue> create_device_queue(const RenderBuffers *render_buffers)
+{
+ Device *device = render_buffers->buffer.device;
+ if (device->info.has_gpu_queue) {
+ return device->gpu_queue_create();
+ }
+ return nullptr;
+}
+
+static void copy_render_buffers_from_device(unique_ptr<DeviceQueue> &queue,
+ RenderBuffers *render_buffers)
+{
+ if (queue) {
+ queue->copy_from_device(render_buffers->buffer);
+ queue->synchronize();
+ }
+ else {
+ render_buffers->copy_from_device();
+ }
+}
+
+static void copy_render_buffers_to_device(unique_ptr<DeviceQueue> &queue,
+ RenderBuffers *render_buffers)
+{
+ if (queue) {
+ queue->copy_to_device(render_buffers->buffer);
+ queue->synchronize();
+ }
+ else {
+ render_buffers->copy_to_device();
+ }
+}
+
bool OIDNDenoiser::denoise_buffer(const BufferParams &buffer_params,
RenderBuffers *render_buffers,
const int num_samples,
@@ -554,8 +582,9 @@ bool OIDNDenoiser::denoise_buffer(const BufferParams &buffer_params,
{
thread_scoped_lock lock(mutex_);
- /* Copy pixels from compute device to CPU (no-op for CPU device). */
- render_buffers->buffer.copy_from_device();
+ /* Make sure the host-side data is available for denoising. */
+ unique_ptr<DeviceQueue> queue = create_device_queue(render_buffers);
+ copy_render_buffers_from_device(queue, render_buffers);
#ifdef WITH_OPENIMAGEDENOISE
OIDNDenoiseContext context(
@@ -582,7 +611,7 @@ bool OIDNDenoiser::denoise_buffer(const BufferParams &buffer_params,
/* TODO: It may be possible to avoid this copy, but we have to ensure that when other code
* copies data from the device it doesn't overwrite the denoiser buffers. */
- render_buffers->buffer.copy_to_device();
+ copy_render_buffers_to_device(queue, render_buffers);
}
#endif
diff --git a/intern/cycles/integrator/denoiser_oidn.h b/intern/cycles/integrator/denoiser_oidn.h
index 2448a4c362b..566e761ae79 100644
--- a/intern/cycles/integrator/denoiser_oidn.h
+++ b/intern/cycles/integrator/denoiser_oidn.h
@@ -30,7 +30,6 @@ class OIDNDenoiser : public Denoiser {
class State;
OIDNDenoiser(Device *path_trace_device, const DenoiseParams ¶ms);
- ~OIDNDenoiser();
virtual bool denoise_buffer(const BufferParams &buffer_params,
RenderBuffers *render_buffers,
More information about the Bf-blender-cvs
mailing list