[Bf-blender-cvs] [0c5c22f2ce4] cycles-x: Fix missing tiles with GPU render and OIDN denoiser in Cycles X

Sergey Sharybin noreply at git.blender.org
Tue Aug 31 15:21:30 CEST 2021


Commit: 0c5c22f2ce4a1445a6745121387ad5abc9fbb45c
Author: Sergey Sharybin
Date:   Fri Aug 27 11:46:07 2021 +0200
Branches: cycles-x
https://developer.blender.org/rB0c5c22f2ce4a1445a6745121387ad5abc9fbb45c

Fix missing tiles with GPU render and OIDN denoiser in Cycles X

Was noticed when testing upcoming tiled rendering with small tiles
sizes which stresses data transfers more than single tile rendering.

What happens is OIDN denoiser uses `copy_to_device()` using default
queue, which is not guaranteed to be synchronized before next call to
`copy_from_device()` happening via queue.

What we really want in the OIDN denoiser is synchronous data transfer
which is easiest to be achieved by creating a queue and synchronizing
it.

The issue is reported by Alaska in D12309#320253

Differential Revision: https://developer.blender.org/D12333

===================================================================

M	intern/cycles/device/cuda/device.cpp
M	intern/cycles/device/device.h
M	intern/cycles/integrator/denoiser_oidn.cpp
M	intern/cycles/integrator/denoiser_oidn.h

===================================================================

diff --git a/intern/cycles/device/cuda/device.cpp b/intern/cycles/device/cuda/device.cpp
index 95b8022aa23..84becd6d081 100644
--- a/intern/cycles/device/cuda/device.cpp
+++ b/intern/cycles/device/cuda/device.cpp
@@ -148,6 +148,8 @@ void device_cuda_info(vector<DeviceInfo> &devices)
     info.has_nanovdb = true;
     info.denoisers = 0;
 
+    info.has_gpu_queue = true;
+
     /* Check if the device has P2P access to any other device in the system. */
     for (int peer_num = 0; peer_num < count && !info.has_peer_memory; peer_num++) {
       if (num != peer_num) {
diff --git a/intern/cycles/device/device.h b/intern/cycles/device/device.h
index 1397214c897..02b6edb56d0 100644
--- a/intern/cycles/device/device.h
+++ b/intern/cycles/device/device.h
@@ -75,6 +75,7 @@ class DeviceInfo {
   bool has_osl;               /* Support Open Shading Language. */
   bool has_profiling;         /* Supports runtime collection of profiling info. */
   bool has_peer_memory;       /* GPU has P2P access to memory of another GPU. */
+  bool has_gpu_queue;         /* Device supports GPU queue. */
   DenoiserTypeMask denoisers; /* Supported denoiser types. */
   int cpu_threads;
   vector<DeviceInfo> multi_devices;
@@ -92,6 +93,7 @@ class DeviceInfo {
     has_osl = false;
     has_profiling = false;
     has_peer_memory = false;
+    has_gpu_queue = false;
     denoisers = DENOISER_NONE;
   }
 
diff --git a/intern/cycles/integrator/denoiser_oidn.cpp b/intern/cycles/integrator/denoiser_oidn.cpp
index 1a573be5367..1b5a012ec87 100644
--- a/intern/cycles/integrator/denoiser_oidn.cpp
+++ b/intern/cycles/integrator/denoiser_oidn.cpp
@@ -19,6 +19,7 @@
 #include <array>
 
 #include "device/device.h"
+#include "device/device_queue.h"
 #include "integrator/pass_accessor_cpu.h"
 #include "render/buffers.h"
 #include "util/util_array.h"
@@ -40,12 +41,6 @@ OIDNDenoiser::OIDNDenoiser(Device *path_trace_device, const DenoiseParams &param
   DCHECK(openimagedenoise_supported()) << "OpenImageDenoiser is not supported on this platform.";
 }
 
-OIDNDenoiser::~OIDNDenoiser()
-{
-  /* NOTE: Keep the destructor here so that it has access to the State which is an incomplete as
-   * per the OIDN denoiser header. */
-}
-
 #ifdef WITH_OPENIMAGEDENOISE
 static bool oidn_progress_monitor_function(void *user_ptr, double /*n*/)
 {
@@ -547,6 +542,39 @@ class OIDNDenoiseContext {
 };
 #endif
 
+static unique_ptr<DeviceQueue> create_device_queue(const RenderBuffers *render_buffers)
+{
+  Device *device = render_buffers->buffer.device;
+  if (device->info.has_gpu_queue) {
+    return device->gpu_queue_create();
+  }
+  return nullptr;
+}
+
+static void copy_render_buffers_from_device(unique_ptr<DeviceQueue> &queue,
+                                            RenderBuffers *render_buffers)
+{
+  if (queue) {
+    queue->copy_from_device(render_buffers->buffer);
+    queue->synchronize();
+  }
+  else {
+    render_buffers->copy_from_device();
+  }
+}
+
+static void copy_render_buffers_to_device(unique_ptr<DeviceQueue> &queue,
+                                          RenderBuffers *render_buffers)
+{
+  if (queue) {
+    queue->copy_to_device(render_buffers->buffer);
+    queue->synchronize();
+  }
+  else {
+    render_buffers->copy_to_device();
+  }
+}
+
 bool OIDNDenoiser::denoise_buffer(const BufferParams &buffer_params,
                                   RenderBuffers *render_buffers,
                                   const int num_samples,
@@ -554,8 +582,9 @@ bool OIDNDenoiser::denoise_buffer(const BufferParams &buffer_params,
 {
   thread_scoped_lock lock(mutex_);
 
-  /* Copy pixels from compute device to CPU (no-op for CPU device). */
-  render_buffers->buffer.copy_from_device();
+  /* Make sure the host-side data is available for denoising. */
+  unique_ptr<DeviceQueue> queue = create_device_queue(render_buffers);
+  copy_render_buffers_from_device(queue, render_buffers);
 
 #ifdef WITH_OPENIMAGEDENOISE
   OIDNDenoiseContext context(
@@ -582,7 +611,7 @@ bool OIDNDenoiser::denoise_buffer(const BufferParams &buffer_params,
 
     /* TODO: It may be possible to avoid this copy, but we have to ensure that when other code
      * copies data from the device it doesn't overwrite the denoiser buffers. */
-    render_buffers->buffer.copy_to_device();
+    copy_render_buffers_to_device(queue, render_buffers);
   }
 #endif
 
diff --git a/intern/cycles/integrator/denoiser_oidn.h b/intern/cycles/integrator/denoiser_oidn.h
index 2448a4c362b..566e761ae79 100644
--- a/intern/cycles/integrator/denoiser_oidn.h
+++ b/intern/cycles/integrator/denoiser_oidn.h
@@ -30,7 +30,6 @@ class OIDNDenoiser : public Denoiser {
   class State;
 
   OIDNDenoiser(Device *path_trace_device, const DenoiseParams &params);
-  ~OIDNDenoiser();
 
   virtual bool denoise_buffer(const BufferParams &buffer_params,
                               RenderBuffers *render_buffers,



More information about the Bf-blender-cvs mailing list