[Bf-blender-cvs] [af54bbd61c7] master: Cycles: Rework tile scheduling for denoising

Patrick Mours noreply at git.blender.org
Fri Feb 28 16:12:55 CET 2020


Commit: af54bbd61c769c69891c9b39df19eb3cad9dafe2
Author: Patrick Mours
Date:   Wed Feb 26 16:30:42 2020 +0100
Branches: master
https://developer.blender.org/rBaf54bbd61c769c69891c9b39df19eb3cad9dafe2

Cycles: Rework tile scheduling for denoising

This fixes denoising being delayed until after all rendering has finished. Instead, tile-based
denoising is now part of the "RENDER" task again, so that it is all in one task and does not
cause issues with dedicated task pools where tasks are serialized.

Reviewed By: brecht

Differential Revision: https://developer.blender.org/D6940

===================================================================

M	intern/cycles/device/cuda/device_cuda_impl.cpp
M	intern/cycles/device/device_cpu.cpp
M	intern/cycles/device/device_multi.cpp
M	intern/cycles/device/device_optix.cpp
M	intern/cycles/device/device_task.cpp
M	intern/cycles/device/device_task.h
M	intern/cycles/device/opencl/device_opencl_impl.cpp
M	intern/cycles/render/buffers.h
M	intern/cycles/render/session.cpp
M	intern/cycles/render/session.h
M	intern/cycles/render/tile.cpp
M	intern/cycles/render/tile.h

===================================================================

diff --git a/intern/cycles/device/cuda/device_cuda_impl.cpp b/intern/cycles/device/cuda/device_cuda_impl.cpp
index f6a4f93a690..4a7c45d8b93 100644
--- a/intern/cycles/device/cuda/device_cuda_impl.cpp
+++ b/intern/cycles/device/cuda/device_cuda_impl.cpp
@@ -2144,7 +2144,7 @@ void CUDADevice::thread_run(DeviceTask *task)
 {
   CUDAContextScope scope(this);
 
-  if (task->type == DeviceTask::RENDER || task->type == DeviceTask::DENOISE) {
+  if (task->type == DeviceTask::RENDER) {
     DeviceRequestedFeatures requested_features;
     if (use_split_kernel()) {
       if (split_kernel == NULL) {
@@ -2159,7 +2159,7 @@ void CUDADevice::thread_run(DeviceTask *task)
     RenderTile tile;
     DenoisingTask denoising(this, *task);
 
-    while (task->acquire_tile(this, tile)) {
+    while (task->acquire_tile(this, tile, task->tile_types)) {
       if (tile.task == RenderTile::PATH_TRACE) {
         if (use_split_kernel()) {
           device_only_memory<uchar> void_buffer(this, "void_buffer");
diff --git a/intern/cycles/device/device_cpu.cpp b/intern/cycles/device/device_cpu.cpp
index 795781ee072..1c9d2227ac3 100644
--- a/intern/cycles/device/device_cpu.cpp
+++ b/intern/cycles/device/device_cpu.cpp
@@ -511,7 +511,7 @@ class CPUDevice : public Device {
 
   void thread_run(DeviceTask *task)
   {
-    if (task->type == DeviceTask::RENDER || task->type == DeviceTask::DENOISE)
+    if (task->type == DeviceTask::RENDER)
       thread_render(*task);
     else if (task->type == DeviceTask::SHADER)
       thread_shader(*task);
@@ -927,7 +927,7 @@ class CPUDevice : public Device {
     DenoisingTask denoising(this, task);
     denoising.profiler = &kg->profiler;
 
-    while (task.acquire_tile(this, tile)) {
+    while (task.acquire_tile(this, tile, task.tile_types)) {
       if (tile.task == RenderTile::PATH_TRACE) {
         if (use_split_kernel) {
           device_only_memory<uchar> void_buffer(this, "void_buffer");
diff --git a/intern/cycles/device/device_multi.cpp b/intern/cycles/device/device_multi.cpp
index 8226221ea08..0044610eeb4 100644
--- a/intern/cycles/device/device_multi.cpp
+++ b/intern/cycles/device/device_multi.cpp
@@ -482,11 +482,24 @@ class MultiDevice : public Device {
 
   void task_add(DeviceTask &task)
   {
-    list<SubDevice> &task_devices = denoising_devices.empty() ||
-                                            (task.type != DeviceTask::DENOISE &&
-                                             task.type != DeviceTask::DENOISE_BUFFER) ?
-                                        devices :
-                                        denoising_devices;
+    list<SubDevice> task_devices = devices;
+    if (!denoising_devices.empty()) {
+      if (task.type == DeviceTask::DENOISE_BUFFER) {
+        /* Denoising tasks should be redirected to the denoising devices entirely. */
+        task_devices = denoising_devices;
+      }
+      else if (task.type == DeviceTask::RENDER && (task.tile_types & RenderTile::DENOISE)) {
+        const uint tile_types = task.tile_types;
+        /* For normal rendering tasks only redirect the denoising part to the denoising devices.
+         * Do not need to split the task here, since they all run through 'acquire_tile'. */
+        task.tile_types = RenderTile::DENOISE;
+        foreach (SubDevice &sub, denoising_devices) {
+          sub.device->task_add(task);
+        }
+        /* Rendering itself should still be executed on the rendering devices. */
+        task.tile_types = tile_types ^ RenderTile::DENOISE;
+      }
+    }
 
     list<DeviceTask> tasks;
     task.split(tasks, task_devices.size());
diff --git a/intern/cycles/device/device_optix.cpp b/intern/cycles/device/device_optix.cpp
index 0121b89e9d8..61a5c74f69e 100644
--- a/intern/cycles/device/device_optix.cpp
+++ b/intern/cycles/device/device_optix.cpp
@@ -569,9 +569,14 @@ class OptiXDevice : public CUDADevice {
     if (have_error())
       return;  // Abort early if there was an error previously
 
-    if (task.type == DeviceTask::RENDER || task.type == DeviceTask::DENOISE) {
+    if (task.type == DeviceTask::RENDER) {
+      if (thread_index != 0) {
+        // Only execute denoising in a single thread (see also 'task_add')
+        task.tile_types &= ~RenderTile::DENOISE;
+      }
+
       RenderTile tile;
-      while (task.acquire_tile(this, tile)) {
+      while (task.acquire_tile(this, tile, task.tile_types)) {
         if (tile.task == RenderTile::PATH_TRACE)
           launch_render(task, tile, thread_index);
         else if (tile.task == RenderTile::DENOISE)
@@ -1451,7 +1456,7 @@ class OptiXDevice : public CUDADevice {
       return;
     }
 
-    if (task.type == DeviceTask::DENOISE || task.type == DeviceTask::DENOISE_BUFFER) {
+    if (task.type == DeviceTask::DENOISE_BUFFER) {
       // Execute denoising in a single thread (e.g. to avoid race conditions during creation)
       task_pool.push(new OptiXDeviceTask(this, task, 0));
       return;
diff --git a/intern/cycles/device/device_task.cpp b/intern/cycles/device/device_task.cpp
index 8f15e8c8c1e..36522b874ab 100644
--- a/intern/cycles/device/device_task.cpp
+++ b/intern/cycles/device/device_task.cpp
@@ -68,7 +68,7 @@ int DeviceTask::get_subtask_count(int num, int max_size)
   if (type == SHADER) {
     num = min(shader_w, num);
   }
-  else if (type == RENDER || type == DENOISE) {
+  else if (type == RENDER) {
   }
   else {
     num = min(h, num);
@@ -94,7 +94,7 @@ void DeviceTask::split(list<DeviceTask> &tasks, int num, int max_size)
       tasks.push_back(task);
     }
   }
-  else if (type == RENDER || type == DENOISE) {
+  else if (type == RENDER) {
     for (int i = 0; i < num; i++)
       tasks.push_back(*this);
   }
diff --git a/intern/cycles/device/device_task.h b/intern/cycles/device/device_task.h
index 0f718528b86..972f6131092 100644
--- a/intern/cycles/device/device_task.h
+++ b/intern/cycles/device/device_task.h
@@ -64,7 +64,7 @@ class DenoiseParams {
 
 class DeviceTask : public Task {
  public:
-  typedef enum { RENDER, DENOISE, DENOISE_BUFFER, FILM_CONVERT, SHADER } Type;
+  typedef enum { RENDER, FILM_CONVERT, SHADER, DENOISE_BUFFER } Type;
   Type type;
 
   int x, y, w, h;
@@ -90,7 +90,7 @@ class DeviceTask : public Task {
 
   void update_progress(RenderTile *rtile, int pixel_samples = -1);
 
-  function<bool(Device *device, RenderTile &)> acquire_tile;
+  function<bool(Device *device, RenderTile &, uint)> acquire_tile;
   function<void(long, int)> update_progress_sample;
   function<void(RenderTile &)> update_tile_sample;
   function<void(RenderTile &)> release_tile;
@@ -98,6 +98,7 @@ class DeviceTask : public Task {
   function<void(RenderTile *, Device *)> map_neighbor_tiles;
   function<void(RenderTile *, Device *)> unmap_neighbor_tiles;
 
+  uint tile_types;
   DenoiseParams denoising;
   bool denoising_from_render;
   vector<int> denoising_frames;
diff --git a/intern/cycles/device/opencl/device_opencl_impl.cpp b/intern/cycles/device/opencl/device_opencl_impl.cpp
index 012f6dbe114..68cdfd5238c 100644
--- a/intern/cycles/device/opencl/device_opencl_impl.cpp
+++ b/intern/cycles/device/opencl/device_opencl_impl.cpp
@@ -1308,7 +1308,7 @@ void OpenCLDevice::thread_run(DeviceTask *task)
 {
   flush_texture_buffers();
 
-  if (task->type == DeviceTask::RENDER || task->type == DeviceTask::DENOISE) {
+  if (task->type == DeviceTask::RENDER) {
     RenderTile tile;
     DenoisingTask denoising(this, *task);
 
@@ -1317,7 +1317,7 @@ void OpenCLDevice::thread_run(DeviceTask *task)
     kgbuffer.alloc_to_device(1);
 
     /* Keep rendering tiles until done. */
-    while (task->acquire_tile(this, tile)) {
+    while (task->acquire_tile(this, tile, task->tile_types)) {
       if (tile.task == RenderTile::PATH_TRACE) {
         assert(tile.task == RenderTile::PATH_TRACE);
         scoped_timer timer(&tile.buffers->render_time);
diff --git a/intern/cycles/render/buffers.h b/intern/cycles/render/buffers.h
index 1042b42810f..42efb031843 100644
--- a/intern/cycles/render/buffers.h
+++ b/intern/cycles/render/buffers.h
@@ -130,7 +130,7 @@ class DisplayBuffer {
 
 class RenderTile {
  public:
-  typedef enum { PATH_TRACE, DENOISE } Task;
+  typedef enum { PATH_TRACE = (1 << 0), DENOISE = (1 << 1) } Task;
 
   Task task;
   int x, y, w, h;
diff --git a/intern/cycles/render/session.cpp b/intern/cycles/render/session.cpp
index f45e6d68c97..0d1f8df3610 100644
--- a/intern/cycles/render/session.cpp
+++ b/intern/cycles/render/session.cpp
@@ -301,12 +301,7 @@ void Session::run_gpu()
       update_status_time();
 
       /* render */
-      render();
-
-      /* denoise */
-      if (need_denoise) {
-        denoise();
-      }
+      render(need_denoise);
 
       device->task_wait();
 
@@ -384,7 +379,7 @@ bool Session::draw_cpu(BufferParams &buffer_params, DeviceDrawParams &draw_param
   return false;
 }
 
-bool Session::acquire_tile(Device *tile_device, RenderTile &rtile, RenderTile::Task task)
+bool Session::acquire_tile(RenderTile &rtile, Device *tile_device, uint tile_types)
 {
   if (progress.get_cancel()) {
     if (params.progressive_refine == false) {
@@ -399,9 +394,9 @@ bool Session::acquire_tile(Device *tile_device, RenderTile &rtile, RenderTile::T
   Tile *tile;
   int device_num = device->device_number(tile_device);
 
-  while (!tile_manager.next_tile(tile, device_num, task == RenderTile::DENOISE)) {
+  while (!tile_manager.next_tile(tile, device_num, tile_types)) {
     /* Wait for denoising tiles to become available */
-    if (task == RenderTile::DENOISE && !progress.get_cancel() && tile_manager.has_tiles()) {
+    if ((tile_types & RenderTile::DENOISE) && !progress.get_cancel() && tile_manager.has_tiles()) {
       denoising_cond.wait(tile_lock);
       continue;
     }
@@ -417,7 +412,7 @@ bool Session::acquire_tile(Device *tile_device, RenderTile &rtile, RenderTile::T
   rtile.num_samples = tile_manager.state.num_samples;
   rtile.resolution = tile_manager.state.resolution_divider;
   rtile.tile_index = tile->index;
-  rtile.task = task;
+  rtile.task = tile->state == Tile::DENOISE ? RenderTile::DENOISE : RenderTile::PATH_TRACE;
 
   tile_lock.unlock();
 
@@ -700,12 +695,7 @@ void Session::run_cpu()
       update_status_time();
 
       /* render */
-      render();
-
-

@@ Diff output truncated at 10240 characters. @@



More information about the Bf-blender-cvs mailing list