[Bf-blender-cvs] [632a7747cf5] cycles-x: Cycles X: Support approximate shadow catcher with background

Wed Jul 21 14:37:37 CEST 2021

Commit: 632a7747cf5aba1ddb7a352a4564eff53ccdc5a6
Author: Sergey Sharybin
Date:   Mon Jul 19 18:44:01 2021 +0200
Branches: cycles-x
https://developer.blender.org/rB632a7747cf5aba1ddb7a352a4564eff53ccdc5a6

Cycles X: Support approximate shadow catcher with background

On a user-level this change behaves as if the approximate shadow
catcher matte pass is alpha-overed on top of background pass.
This makes it easier to combine artificial objects onto a HDRI
footage.

General idea is to "de-tour" a shadow catcher matte path to the
shade background kernel when needed.

There is some not-so-nice looking scheduling logic after the
intersect closest kernel, which seems to be the best so far from
the performance point of view.

While the shadow catcher performance case might be improved by
avoiding some redundant atomic operations, there is no performance
impact for "regular" case (numbers are on RTX 5000):

```
                              new                           cycles-x
bmw27.blend                   12.2091                       12.2334
classroom.blend               24.2948                       24.2565
pabellon.blend                11.1832                       11.1941
monster.blend                 13.2831                       13.1683
barbershop_interior.blend     18.8203                       18.904
junkshop.blend                26.5066                       26.4452
pvt_flat.blend                22.6359                       22.5914
```

There seems to be 2% deviation, sometimes new code is 2% slower,
sometimes it is old code which is 2% slower.

Differential Revision: https://developer.blender.org/D11971

===================================================================

M	intern/cycles/device/optix/device_impl.cpp
M	intern/cycles/integrator/denoiser_oidn.cpp
M	intern/cycles/integrator/pass_accessor.cpp
M	intern/cycles/integrator/pass_accessor.h
M	intern/cycles/integrator/path_trace_work.cpp
M	intern/cycles/kernel/bvh/bvh_util.h
M	intern/cycles/kernel/integrator/integrator_intersect_closest.h
M	intern/cycles/kernel/integrator/integrator_shade_background.h
M	intern/cycles/kernel/integrator/integrator_state_util.h
M	intern/cycles/kernel/kernel_accumulate.h
M	intern/cycles/kernel/kernel_film.h
M	intern/cycles/kernel/kernel_shadow_catcher.h
M	intern/cycles/kernel/kernel_types.h
M	intern/cycles/render/scene.cpp
M	intern/cycles/render/session.cpp

===================================================================

diff --git a/intern/cycles/device/optix/device_impl.cpp b/intern/cycles/device/optix/device_impl.cpp
index 5b9d99dd463..42efa690acf 100644
--- a/intern/cycles/device/optix/device_impl.cpp
+++ b/intern/cycles/device/optix/device_impl.cpp
@@ -742,6 +742,7 @@ void OptiXDevice::denoise_color_read(DenoiseContext &context, const DenoisePass
    * on the approximation. The latter is not even possible because OptiX does not support
    * denoising of semi-transparent pixels. */
   pass_access_info.use_approximate_shadow_catcher = false;
+  pass_access_info.use_approximate_shadow_catcher_background = false;
   pass_access_info.show_active_pixels = false;
 
   /* TODO(sergey): Consider adding support of actual exposure, to avoid clamping in extreme cases.
diff --git a/intern/cycles/integrator/denoiser_oidn.cpp b/intern/cycles/integrator/denoiser_oidn.cpp
index ca062ee3545..405587d4c0b 100644
--- a/intern/cycles/integrator/denoiser_oidn.cpp
+++ b/intern/cycles/integrator/denoiser_oidn.cpp
@@ -295,6 +295,7 @@ class OIDNDenoiseContext {
      * on the approximation. The latter is not even possible because OIDN does not support
      * denoising of semi-transparent pixels. */
     pass_access_info.use_approximate_shadow_catcher = false;
+    pass_access_info.use_approximate_shadow_catcher_background = false;
     pass_access_info.show_active_pixels = false;
 
     /* OIDN will perform an auto-exposure, so it is not required to know exact exposure configured
diff --git a/intern/cycles/integrator/pass_accessor.cpp b/intern/cycles/integrator/pass_accessor.cpp
index c80e73a13c6..c7c5e7ad303 100644
--- a/intern/cycles/integrator/pass_accessor.cpp
+++ b/intern/cycles/integrator/pass_accessor.cpp
@@ -16,7 +16,9 @@
 
 #include "integrator/pass_accessor.h"
 
+#include "render/background.h"
 #include "render/buffers.h"
+#include "render/film.h"
 #include "util/util_logging.h"
 
 // clang-format off
@@ -32,11 +34,14 @@ CCL_NAMESPACE_BEGIN
 
 PassAccessor::PassAccessInfo::PassAccessInfo(const Pass &pass,
                                              const Film &film,
+                                             const Background &background,
                                              const vector<Pass> &passes)
     : type(pass.type),
       mode(pass.mode),
       offset(Pass::get_offset(passes, pass)),
-      use_approximate_shadow_catcher(film.get_use_approximate_shadow_catcher())
+      use_approximate_shadow_catcher(film.get_use_approximate_shadow_catcher()),
+      use_approximate_shadow_catcher_background(use_approximate_shadow_catcher &&
+                                                !background.get_transparent())
 {
 }
 
@@ -263,6 +268,9 @@ void PassAccessor::init_kernel_film_convert(KernelFilmConvert *kfilm_convert,
   kfilm_convert->pass_shadow_catcher_matte = buffer_params.get_pass_offset(
       PASS_SHADOW_CATCHER_MATTE, mode);
 
+  /* Background is not denoised, so always use noisy pass. */
+  kfilm_convert->pass_background = buffer_params.get_pass_offset(PASS_BACKGROUND);
+
   if (pass_info.use_filter) {
     kfilm_convert->scale = 1.0f / num_samples_;
   }
@@ -280,6 +288,8 @@ void PassAccessor::init_kernel_film_convert(KernelFilmConvert *kfilm_convert,
   kfilm_convert->scale_exposure = kfilm_convert->scale * kfilm_convert->exposure;
 
   kfilm_convert->use_approximate_shadow_catcher = pass_access_info_.use_approximate_shadow_catcher;
+  kfilm_convert->use_approximate_shadow_catcher_background =
+      pass_access_info_.use_approximate_shadow_catcher_background;
   kfilm_convert->show_active_pixels = pass_access_info_.show_active_pixels;
 
   kfilm_convert->num_components = destination.num_components;
diff --git a/intern/cycles/integrator/pass_accessor.h b/intern/cycles/integrator/pass_accessor.h
index 964a51016f2..6ca7c8d9c05 100644
--- a/intern/cycles/integrator/pass_accessor.h
+++ b/intern/cycles/integrator/pass_accessor.h
@@ -24,6 +24,7 @@
 
 CCL_NAMESPACE_BEGIN
 
+class Background;
 class Film;
 class RenderBuffers;
 class BufferParams;
@@ -37,7 +38,10 @@ class PassAccessor {
   class PassAccessInfo {
    public:
     PassAccessInfo() = default;
-    PassAccessInfo(const Pass &pass, const Film &film, const vector<Pass> &passes);
+    PassAccessInfo(const Pass &pass,
+                   const Film &film,
+                   const Background &background,
+                   const vector<Pass> &passes);
 
     PassType type = PASS_NONE;
     PassMode mode = PassMode::NOISY;
@@ -48,6 +52,9 @@ class PassAccessor {
      */
     bool use_approximate_shadow_catcher = false;
 
+    /* When approximate shadow catcher matte is used alpha-over the result on top of background. */
+    bool use_approximate_shadow_catcher_background = false;
+
     bool show_active_pixels = false;
   };
 
diff --git a/intern/cycles/integrator/path_trace_work.cpp b/intern/cycles/integrator/path_trace_work.cpp
index fe53b470fa6..e7a026d472c 100644
--- a/intern/cycles/integrator/path_trace_work.cpp
+++ b/intern/cycles/integrator/path_trace_work.cpp
@@ -151,6 +151,7 @@ bool PathTraceWork::set_render_tile_pixels(PassAccessor &pass_accessor,
 PassAccessor::PassAccessInfo PathTraceWork::get_display_pass_access_info(PassMode pass_mode) const
 {
   const KernelFilm &kfilm = device_scene_->data.film;
+  const KernelBackground &kbackground = device_scene_->data.background;
 
   PassAccessor::PassAccessInfo pass_access_info;
   pass_access_info.type = static_cast<PassType>(kfilm.display_pass_type);
@@ -165,6 +166,8 @@ PassAccessor::PassAccessInfo PathTraceWork::get_display_pass_access_info(PassMod
   }
 
   pass_access_info.use_approximate_shadow_catcher = kfilm.use_approximate_shadow_catcher;
+  pass_access_info.use_approximate_shadow_catcher_background =
+      kfilm.use_approximate_shadow_catcher && !kbackground.transparent;
   pass_access_info.show_active_pixels = kfilm.show_active_pixels;
 
   return pass_access_info;
diff --git a/intern/cycles/kernel/bvh/bvh_util.h b/intern/cycles/kernel/bvh/bvh_util.h
index 09b5d4d2852..21384457b16 100644
--- a/intern/cycles/kernel/bvh/bvh_util.h
+++ b/intern/cycles/kernel/bvh/bvh_util.h
@@ -137,14 +137,14 @@ ccl_device_forceinline int intersection_get_shader_flags(const KernelGlobals *cc
   return kernel_tex_fetch(__shaders, (shader & SHADER_MASK)).flags;
 }
 
-ccl_device_forceinline int intersection_get_shader(const KernelGlobals *ccl_restrict kg,
-                                                   const Intersection *ccl_restrict isect)
+ccl_device_forceinline int intersection_get_shader_from_isect_prim(
+    const KernelGlobals *ccl_restrict kg, const int isect_prim)
 {
-  const int prim = kernel_tex_fetch(__prim_index, isect->prim);
+  const int prim = kernel_tex_fetch(__prim_index, isect_prim);
   int shader = 0;
 
 #ifdef __HAIR__
-  if (kernel_tex_fetch(__prim_type, isect->prim) & PRIMITIVE_ALL_TRIANGLE)
+  if (kernel_tex_fetch(__prim_type, isect_prim) & PRIMITIVE_ALL_TRIANGLE)
 #endif
   {
     shader = kernel_tex_fetch(__tri_shader, prim);
@@ -159,6 +159,12 @@ ccl_device_forceinline int intersection_get_shader(const KernelGlobals *ccl_rest
   return shader & SHADER_MASK;
 }
 
+ccl_device_forceinline int intersection_get_shader(const KernelGlobals *ccl_restrict kg,
+                                                   const Intersection *ccl_restrict isect)
+{
+  return intersection_get_shader_from_isect_prim(kg, isect->prim);
+}
+
 ccl_device_forceinline int intersection_get_object(const KernelGlobals *ccl_restrict kg,
                                                    const Intersection *ccl_restrict isect)
 {
diff --git a/intern/cycles/kernel/integrator/integrator_intersect_closest.h b/intern/cycles/kernel/integrator/integrator_intersect_closest.h
index dad08de8590..e90a7b3f0f3 100644
--- a/intern/cycles/kernel/integrator/integrator_intersect_closest.h
+++ b/intern/cycles/kernel/integrator/integrator_intersect_closest.h
@@ -90,8 +90,29 @@ ccl_device_forceinline void integrator_intersect_shader_next_kernel(
     const int shader,
     const int shader_flags)
 {
-  /* Setup next kernel to execute. */
-  if ((shader_flags & SD_HAS_RAYTRACE) || (kernel_data.film.pass_ao != PASS_UNUSED)) {
+  /* Note on scheduling.
+   *
+   * When there is no shadow catcher split the scheduling is simple: schedule surface shading with
+   * or without raytrace support, depending on the shader used.
+   *
+   * When there is a shadow catcher split the general idea is to have the following configuration:
+   *
+   *  - Schedule surface shading kernel (with corresponding raytrace support) for the ray which
+   *    will trace shadow catcher object.
+   *
+   *  - When no alpha-over of approximate shadow catcher is needed, schedule surface shading for
+   *    the matte ray.
+   *
+   *  - Otherwise schedule background shading kernel, so that we have a background to alpha-over
+   *    on. The background kernel will then schedule surface shading for the matte ray.
+   *
+   * Note that the splitting leaves kernel and sorting counters as-is, so use INIT semantic for
+   * the matte path. */
+
+  const bool use_raytrace_kernel = ((shader_flags & SD_HAS_RAYTRACE) ||
+                                    (kernel_data.film.pass_ao != PASS_UNUSED));
+
+  if (use_raytrace_kernel) {
     INTEGRATOR_PATH_NEXT_SORTED(
         current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE, shader);
   }
@@ -99,9 +120,27 @@ ccl_device_forceinline void integrator_intersect_shader_next_kernel(
     INTEGRATOR_PATH_NEXT_SORTED(current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE, shader);
   }
 
-  /* Setup shadow catcher. */
+#ifdef __SHADOW_CATCHER__
   const int object_flags = intersection_get_object_flags(kg, isect);
-  kernel_shadow_catcher_split(INTEGRATOR_STATE_PASS, object_flags);
+  if (kernel_shadow_catcher_split(INTEGRATOR_STATE_PASS, object_flags)) {
+    if (kernel_data.film.use_approximate_shadow_catcher && !kernel_data.background.transparent) {
+      INTEGRATOR_STATE_WRITE(path, flag) |= PATH_RAY_SHADOW_CATCHER_BACKGROUND;
+
+      if (use_raytrace_kernel) {
+        INTEGRATOR_PATH_INIT(DEVICE_KERNEL_INTEGRATOR_SHADE_BACKGROUND);
+      }
+      else {
+        INTEGRATOR_PATH_INIT(DEVICE_KERNEL_INTEGRATOR_SHADE_BACKGROUND);
+      }
+    }
+    else if 

@@ Diff output truncated at 10240 characters. @@