[Bf-blender-cvs] [c77529091f8] cycles-x: Cycles X: Initial implementation of shadow catcher

Fri May 7 17:24:13 CEST 2021

Commit: c77529091f8d658f7bf5590d136a2c19c3c02559
Author: Sergey Sharybin
Date:   Tue May 4 15:17:44 2021 +0200
Branches: cycles-x
https://developer.blender.org/rBc77529091f8d658f7bf5590d136a2c19c3c02559

Cycles X: Initial implementation of shadow catcher

It is re-implemented in a way which does differential rendering: the
path is split at a first non-transparent bounce when shadow catcher
object is hit. One path will track state of the shadow catcher object
without other objects affecting it. The other path is rendering scene
as usual.

Upon user access of the shadow catcher pass the result is calculated
as combined pass divided by the pass with shadow catcher object. This
gives a pass which is to be multiplied with a backdrop to transport
light and shadows caught.

For the artificial objects which are to be added to the backdrop
there is an internal matte pass. This pass is presented as combined
to artists, so they alpha-over it onto the footage as usual. In the
future we might implement shadow approximation to gain old-style
shadow catcher behavior (maybe for compatibility reasons, maybe for
some artistic reasons).

This process is a bit more involved that the old single pass approach,
but it allows to deal with indirect light, caustics.

Example file: {F10059890}
Example result: {F10055172}

The footage is the Old_Factory MVI_4005.mov from

  https://cloud.blender.org/training/track-match-blend/56040f9b044a2a00ad6c660d

Differential Revision: https://developer.blender.org/D11172

===================================================================

M	intern/cycles/blender/addon/engine.py
M	intern/cycles/blender/addon/properties.py
M	intern/cycles/blender/addon/ui.py
M	intern/cycles/blender/blender_sync.cpp
M	intern/cycles/integrator/path_trace_work_cpu.cpp
M	intern/cycles/integrator/path_trace_work_gpu.cpp
M	intern/cycles/integrator/path_trace_work_gpu.h
M	intern/cycles/kernel/CMakeLists.txt
M	intern/cycles/kernel/bvh/bvh_util.h
M	intern/cycles/kernel/device/cuda/kernel.cu
M	intern/cycles/kernel/integrator/integrator_init_from_camera.h
M	intern/cycles/kernel/integrator/integrator_intersect_closest.h
M	intern/cycles/kernel/integrator/integrator_shade_surface.h
M	intern/cycles/kernel/integrator/integrator_state.h
M	intern/cycles/kernel/integrator/integrator_state_flow.h
M	intern/cycles/kernel/integrator/integrator_state_util.h
M	intern/cycles/kernel/kernel_accumulate.h
M	intern/cycles/kernel/kernel_emission.h
M	intern/cycles/kernel/kernel_passes.h
M	intern/cycles/kernel/kernel_path_state.h
A	intern/cycles/kernel/kernel_shadow_catcher.h
M	intern/cycles/kernel/kernel_types.h
M	intern/cycles/render/film.cpp
M	intern/cycles/render/integrator.cpp
M	intern/cycles/render/pass_accessor.cpp
M	intern/cycles/render/pass_accessor.h
M	intern/cycles/render/scene.cpp
M	intern/cycles/render/scene.h

===================================================================

diff --git a/intern/cycles/blender/addon/engine.py b/intern/cycles/blender/addon/engine.py
index 8863261f108..c2bee86f7b6 100644
--- a/intern/cycles/blender/addon/engine.py
+++ b/intern/cycles/blender/addon/engine.py
@@ -264,6 +264,7 @@ def list_render_passes(scene, srl):
     if crl.pass_debug_sample_count:            yield ("Debug Sample Count",            "X",   'VALUE')
     if crl.use_pass_volume_direct:             yield ("VolumeDir",                     "RGB", 'COLOR')
     if crl.use_pass_volume_indirect:           yield ("VolumeInd",                     "RGB", 'COLOR')
+    if crl.use_pass_shadow_catcher:            yield ("Shadow Catcher",                "RGBA", 'COLOR')
 
     # Cryptomatte passes.
     crypto_depth = (srl.pass_cryptomatte_depth + 1) // 2
diff --git a/intern/cycles/blender/addon/properties.py b/intern/cycles/blender/addon/properties.py
index f8f30167e03..a6d27b58f24 100644
--- a/intern/cycles/blender/addon/properties.py
+++ b/intern/cycles/blender/addon/properties.py
@@ -1209,6 +1209,13 @@ class CyclesRenderLayerSettings(bpy.types.PropertyGroup):
         update=update_render_passes,
     )
 
+    use_pass_shadow_catcher: BoolProperty(
+        name="Shadow Catcher",
+        description="Pass containing shadows and light which is to be multiplied into backdrop",
+        default=False,
+        update=update_render_passes,
+    )
+
     use_denoising: BoolProperty(
         name="Use Denoising",
         description="Denoise the rendered image",
diff --git a/intern/cycles/blender/addon/ui.py b/intern/cycles/blender/addon/ui.py
index 55f8b13c7af..64f4bc7d08b 100644
--- a/intern/cycles/blender/addon/ui.py
+++ b/intern/cycles/blender/addon/ui.py
@@ -784,6 +784,7 @@ class CYCLES_RENDER_PT_passes_light(CyclesButtonsPanel, Panel):
         col.prop(view_layer, "use_pass_environment")
         col.prop(view_layer, "use_pass_shadow")
         col.prop(view_layer, "use_pass_ambient_occlusion", text="Ambient Occlusion")
+        col.prop(cycles_view_layer, "use_pass_shadow_catcher")
 
 
 class CYCLES_RENDER_PT_passes_crypto(CyclesButtonsPanel, ViewLayerCryptomattePanel, Panel):
diff --git a/intern/cycles/blender/blender_sync.cpp b/intern/cycles/blender/blender_sync.cpp
index 9b2ff9a3312..f98eb393373 100644
--- a/intern/cycles/blender/blender_sync.cpp
+++ b/intern/cycles/blender/blender_sync.cpp
@@ -522,6 +522,8 @@ PassType BlenderSync::get_pass_type(BL::RenderPass &b_pass)
   MAP_PASS("Denoising Normal", PASS_DENOISING_NORMAL);
   MAP_PASS("Denoising Albedo", PASS_DENOISING_ALBEDO);
 
+  MAP_PASS("Shadow Catcher", PASS_SHADOW_CATCHER);
+
   MAP_PASS("Debug Render Time", PASS_RENDER_TIME);
 
   MAP_PASS("AdaptiveAuxBuffer", PASS_ADAPTIVE_AUX_BUFFER);
@@ -604,6 +606,11 @@ void BlenderSync::sync_render_passes(BL::RenderLayer &b_rlay, BL::ViewLayer &b_v
     Pass::add(PASS_VOLUME_INDIRECT, passes, "VolumeInd");
   }
 
+  if (get_boolean(crl, "use_pass_shadow_catcher")) {
+    b_engine.add_pass("Shadow Catcher", 4, "RGBA", b_view_layer.name().c_str());
+    Pass::add(PASS_SHADOW_CATCHER, passes, "Shadow Catcher");
+  }
+
   /* Cryptomatte stores two ID/weight pairs per RGBA layer.
    * User facing parameter is the number of pairs. */
   int crypto_depth = divide_up(min(16, b_view_layer.pass_cryptomatte_depth()), 2);
diff --git a/intern/cycles/integrator/path_trace_work_cpu.cpp b/intern/cycles/integrator/path_trace_work_cpu.cpp
index 2eab45ab266..247cbe7b054 100644
--- a/intern/cycles/integrator/path_trace_work_cpu.cpp
+++ b/intern/cycles/integrator/path_trace_work_cpu.cpp
@@ -21,6 +21,7 @@
 
 #include "render/buffers.h"
 #include "render/gpu_display.h"
+#include "render/scene.h"
 
 #include "util/util_logging.h"
 #include "util/util_tbb.h"
@@ -101,8 +102,12 @@ void PathTraceWorkCPU::render_samples_full_pipeline(KernelGlobals *kernel_global
                                                     const KernelWorkTile &work_tile,
                                                     const int samples_num)
 {
-  IntegratorState integrator_state;
-  IntegratorState *state = &integrator_state;
+  const bool has_shadow_catcher = device_scene_->data.integrator.has_shadow_catcher;
+
+  IntegratorState integrator_states[2];
+
+  IntegratorState *state = &integrator_states[0];
+  IntegratorState *shadow_catcher_state = &integrator_states[1];
 
   KernelWorkTile sample_work_tile = work_tile;
   float *render_buffer = render_buffers_->buffer.data();
@@ -119,6 +124,10 @@ void PathTraceWorkCPU::render_samples_full_pipeline(KernelGlobals *kernel_global
 
     kernels_.integrator_megakernel(kernel_globals, state, render_buffer);
 
+    if (has_shadow_catcher) {
+      kernels_.integrator_megakernel(kernel_globals, shadow_catcher_state, render_buffer);
+    }
+
     ++sample_work_tile.start_sample;
   }
 }
diff --git a/intern/cycles/integrator/path_trace_work_gpu.cpp b/intern/cycles/integrator/path_trace_work_gpu.cpp
index 0289c634ae6..5d37cd2caf4 100644
--- a/intern/cycles/integrator/path_trace_work_gpu.cpp
+++ b/intern/cycles/integrator/path_trace_work_gpu.cpp
@@ -47,7 +47,6 @@ PathTraceWorkGPU::PathTraceWorkGPU(Device *device,
       max_active_path_index_(0)
 {
   memset(&integrator_state_gpu_, 0, sizeof(integrator_state_gpu_));
-  work_tile_scheduler_.set_max_num_path_states(max_num_paths_);
 }
 
 void PathTraceWorkGPU::alloc_integrator_soa()
@@ -141,6 +140,8 @@ void PathTraceWorkGPU::init_execution()
   alloc_integrator_queue();
   alloc_integrator_sorting();
 
+  integrator_state_gpu_.shadow_catcher_state_offset = get_shadow_catcher_state_offset();
+
   /* Copy to device side struct in constant memory. */
   device_->const_copy_to(
       "__integrator_state", &integrator_state_gpu_, sizeof(integrator_state_gpu_));
@@ -148,6 +149,10 @@ void PathTraceWorkGPU::init_execution()
 
 void PathTraceWorkGPU::render_samples(int start_sample, int samples_num)
 {
+  /* Update number of available states based on the updated content of the scene (shadow catcher
+   * object might have been added or removed). */
+  work_tile_scheduler_.set_max_num_path_states(get_max_num_camera_paths());
+
   work_tile_scheduler_.reset(effective_buffer_params_, start_sample, samples_num);
 
   /* TODO: set a hard limit in case of undetected kernel failures? */
@@ -195,13 +200,19 @@ bool PathTraceWorkGPU::enqueue_path_iteration()
     return false;
   }
 
-  const float megakernel_threshold = 0.02f;
-  const bool use_megakernel = queue_->kernel_available(DEVICE_KERNEL_INTEGRATOR_MEGAKERNEL) &&
-                              (num_paths < megakernel_threshold * max_num_paths_);
+  /* Megakernel does not support state split, so disable for the shadow catcher.
+   * It is possible to make it work, but currently we are planning to make the megakernel
+   * obsolete for the GPU rendering, so we don't spend time on making shadow catcher to work
+   * there */
+  if (!has_shadow_catcher()) {
+    const float megakernel_threshold = 0.02f;
+    const bool use_megakernel = queue_->kernel_available(DEVICE_KERNEL_INTEGRATOR_MEGAKERNEL) &&
+                                (num_paths < megakernel_threshold * max_num_paths_);
 
-  if (use_megakernel) {
-    enqueue_path_iteration(DEVICE_KERNEL_INTEGRATOR_MEGAKERNEL);
-    return true;
+    if (use_megakernel) {
+      enqueue_path_iteration(DEVICE_KERNEL_INTEGRATOR_MEGAKERNEL);
+      return true;
+    }
   }
 
   /* Find kernel to execute, with max number of queued paths. */
@@ -378,7 +389,7 @@ void PathTraceWorkGPU::compute_queued_paths(DeviceKernel kernel, int queued_kern
   /* TODO: this could be smaller for terminated paths based on amount of work we want
    * to schedule. */
   const int work_size = (kernel == DEVICE_KERNEL_INTEGRATOR_TERMINATED_PATHS_ARRAY) ?
-                            max_num_paths_ :
+                            min(max_num_paths_, get_max_num_camera_paths()) :
                             max_active_path_index_;
 
   void *d_queued_paths = (void *)queued_paths_.device_pointer;
@@ -410,13 +421,15 @@ bool PathTraceWorkGPU::enqueue_work_tiles(bool &finished)
 
   vector<KernelWorkTile> work_tiles;
 
+  const int max_num_camera_paths = get_max_num_camera_paths();
+
   /* Schedule when we're out of paths or there are too few paths to keep the
    * device occupied. */
-  if (num_paths == 0 || num_paths < regenerate_threshold * max_num_paths_) {
+  if (num_paths == 0 || num_paths < regenerate_threshold * max_num_camera_paths) {
     /* Get work tiles until the maximum number of path is reached. */
-    while (num_paths < max_num_paths_) {
+    while (num_paths < max_num_camera_paths) {
       KernelWorkTile work_tile;
-      if (work_tile_scheduler_.get_work(&work_tile, max_num_paths_ - num_paths)) {
+      if (work_tile_scheduler_.get_work(&work_tile, max_num_camera_paths - num_paths)) {
         work_tiles.push_back(work_tile);
         num_paths += work_tile.w * work_tile.h * work_tile.num_samples;
       }
@@ -490,7 +503,7 @@ void PathTraceWorkGPU::enqueue_work_tiles(DeviceKernel kernel,
 
     /* Offset work tile and path index pointers for next tile. */
     num_paths += tile_work_size;
-    DCHECK_LE(num_paths, max_num_paths_);
+    DCHECK_LE(num_paths, get_max_num_camera_paths());
 
     /* TODO: this pointer manipulation won't work for OpenCL. */
     d_work_tile = (void *)(((KernelWorkTile *)d_work_tile) + 1);
@@ -501,7 +514,13 @@ void PathTraceWorkGPU::enqueue_work_tiles(DeviceKernel kernel,
 
   /* TODO: this could be computed more accurately using on the last entry
    * in the queued_paths array passed to the kernel? */
-  max_active_path_index_ = min(max_active_path_index_ + num_paths, max_num_paths_);
+  /* When there is a shadow catcher in the scene provision that the shadow catcher state will
+   * become active at some point.
+   *
+   * TODO: What is more accurate approach here? What if the shadow catcher is hit after some
+   * transparent bounce? Do we need to calculate this somewhere else as well? */
+  max_active_path_index_ = min(
+      max_active_path_index_ + num_paths + get_shadow_catcher_state_offset(), max_num_paths_);
 }
 
 int PathTraceWorkGPU::get_num_active_paths()
@@ -511,12 +530,26 @@ int PathTraceWorkGPU::get_num_active_paths()
 
   int num_paths = 0;
   for (int i = 0; i < DEVICE

@@ Diff output truncated at 10240 characters. @@