[Bf-blender-cvs] [501048d57e5] cycles-x: Fixes for pass handling in Cycles X

Thu Jul 29 15:50:48 CEST 2021

Commit: 501048d57e563487d2e240cd0d77a3d54080c2a5
Author: Sergey Sharybin
Date:   Thu Jul 29 15:33:24 2021 +0200
Branches: cycles-x
https://developer.blender.org/rB501048d57e563487d2e240cd0d77a3d54080c2a5

Fixes for pass handling in Cycles X

- Register Shadow Catcher pass as 3-component RGB color.
  Matches the way how it is stored, solving uninitialized alpha channel.

- Don't write alpha channel in denoiser when the denoising input is a
  3-component color pass.

- Add safety asserts in the film conversion kernels.

Differential Revision: https://developer.blender.org/D12078

===================================================================

M	intern/cycles/blender/addon/engine.py
M	intern/cycles/blender/blender_sync.cpp
M	intern/cycles/device/optix/device_impl.cpp
M	intern/cycles/integrator/denoiser_oidn.cpp
M	intern/cycles/kernel/device/cuda/kernel.cu
M	intern/cycles/kernel/kernel_film.h

===================================================================

diff --git a/intern/cycles/blender/addon/engine.py b/intern/cycles/blender/addon/engine.py
index 6eed1277a31..98c032da4e9 100644
--- a/intern/cycles/blender/addon/engine.py
+++ b/intern/cycles/blender/addon/engine.py
@@ -214,7 +214,7 @@ def list_render_passes(scene, srl):
     if crl.pass_debug_sample_count:            yield ("Debug Sample Count",  "X",    'VALUE')
     if crl.use_pass_volume_direct:             yield ("VolumeDir",           "RGB",  'COLOR')
     if crl.use_pass_volume_indirect:           yield ("VolumeInd",           "RGB",  'COLOR')
-    if crl.use_pass_shadow_catcher:            yield ("Shadow Catcher",      "RGBA", 'COLOR')
+    if crl.use_pass_shadow_catcher:            yield ("Shadow Catcher",      "RGB",  'COLOR')
 
     # Cryptomatte passes.
     crypto_depth = (srl.pass_cryptomatte_depth + 1) // 2
diff --git a/intern/cycles/blender/blender_sync.cpp b/intern/cycles/blender/blender_sync.cpp
index ebea976f134..ded8a7cdd1f 100644
--- a/intern/cycles/blender/blender_sync.cpp
+++ b/intern/cycles/blender/blender_sync.cpp
@@ -642,11 +642,11 @@ void BlenderSync::sync_render_passes(BL::RenderLayer &b_rlay, BL::ViewLayer &b_v
   }
 
   if (get_boolean(crl, "use_pass_shadow_catcher")) {
-    b_engine.add_pass("Shadow Catcher", 4, "RGBA", b_view_layer.name().c_str());
+    b_engine.add_pass("Shadow Catcher", 3, "RGB", b_view_layer.name().c_str());
     Pass::add_denoising_read(passes, PASS_SHADOW_CATCHER, "Shadow Catcher");
 
     if (add_denoised_passes) {
-      b_engine.add_pass("Noisy Shadow Catcher", 4, "RGBA", b_view_layer.name().c_str());
+      b_engine.add_pass("Noisy Shadow Catcher", 3, "RGB", b_view_layer.name().c_str());
       Pass::add(passes, PASS_SHADOW_CATCHER, "Noisy Shadow Catcher");
     }
   }
diff --git a/intern/cycles/device/optix/device_impl.cpp b/intern/cycles/device/optix/device_impl.cpp
index 42efa690acf..60c2175937a 100644
--- a/intern/cycles/device/optix/device_impl.cpp
+++ b/intern/cycles/device/optix/device_impl.cpp
@@ -598,6 +598,7 @@ class OptiXDevice::DenoisePass {
     denoised_offset = buffer_params.get_pass_offset(type, PassMode::DENOISED);
 
     const PassInfo pass_info = Pass::get_info(type);
+    num_components = pass_info.num_components;
     use_compositing = pass_info.use_compositing;
     use_denoising_albedo = pass_info.use_denoising_albedo;
   }
@@ -607,6 +608,7 @@ class OptiXDevice::DenoisePass {
   int noisy_offset;
   int denoised_offset;
 
+  int num_components;
   bool use_compositing;
   bool use_denoising_albedo;
 };
@@ -797,6 +799,7 @@ bool OptiXDevice::denoise_filter_color_postprocess(DenoiseContext &context,
                   const_cast<int *>(&pass.noisy_offset),
                   const_cast<int *>(&pass.denoised_offset),
                   const_cast<int *>(&context.pass_sample_count),
+                  const_cast<int *>(&pass.num_components),
                   const_cast<bool *>(&pass.use_compositing)};
 
   return denoiser_.queue.enqueue(DEVICE_KERNEL_FILTER_COLOR_POSTPROCESS, work_size, args);
diff --git a/intern/cycles/integrator/denoiser_oidn.cpp b/intern/cycles/integrator/denoiser_oidn.cpp
index 214be37f5ca..bc1fb8c112a 100644
--- a/intern/cycles/integrator/denoiser_oidn.cpp
+++ b/intern/cycles/integrator/denoiser_oidn.cpp
@@ -129,6 +129,7 @@ class OIDNPass {
     need_scale = (type == PASS_DENOISING_ALBEDO || type == PASS_DENOISING_NORMAL);
 
     const PassInfo pass_info = Pass::get_info(type);
+    num_components = pass_info.num_components;
     use_compositing = pass_info.use_compositing;
     use_denoising_albedo = pass_info.use_denoising_albedo;
   }
@@ -145,6 +146,7 @@ class OIDNPass {
 
   PassType type = PASS_NONE;
   PassMode mode = PassMode::NOISY;
+  int num_components = -1;
   bool use_compositing = false;
   bool use_denoising_albedo = true;
 
@@ -426,6 +428,8 @@ class OIDNDenoiseContext {
    * back. */
   void postprocess_output(const OIDNPass &oidn_input_pass, const OIDNPass &oidn_output_pass)
   {
+    kernel_assert(oidn_input_pass.num_components == oidn_output_pass.num_components);
+
     const int64_t x = buffer_params_.full_x;
     const int64_t y = buffer_params_.full_y;
     const int64_t width = buffer_params_.width;
@@ -459,10 +463,13 @@ class OIDNDenoiseContext {
           denoised_pixel[2] = denoised_pixel[2] * pixel_scale;
         }
 
-        /* Currently compositing passes are either 3-component (derived by dividing light passes)
-         * or do not have transparency (shadow catcher). Implicitly rely on this logic, as it
-         * simplifies logic and avoids extra memory allocation. */
-        if (!oidn_input_pass.use_compositing) {
+        if (oidn_output_pass.num_components == 3) {
+          /* Pass without alpha channel. */
+        }
+        else if (!oidn_input_pass.use_compositing) {
+          /* Currently compositing passes are either 3-component (derived by dividing light passes)
+           * or do not have transparency (shadow catcher). Implicitly rely on this logic, as it
+           * simplifies logic and avoids extra memory allocation. */
           const float *noisy_pixel = buffer_pixel + oidn_input_pass.offset;
           denoised_pixel[3] = noisy_pixel[3];
         }
diff --git a/intern/cycles/kernel/device/cuda/kernel.cu b/intern/cycles/kernel/device/cuda/kernel.cu
index a43d41ff5c7..556ea0552ad 100644
--- a/intern/cycles/kernel/device/cuda/kernel.cu
+++ b/intern/cycles/kernel/device/cuda/kernel.cu
@@ -779,6 +779,7 @@ extern "C" __global__ void CUDA_LAUNCH_BOUNDS(CUDA_KERNEL_BLOCK_NUM_THREADS,
                                          int pass_noisy,
                                          int pass_denoised,
                                          int pass_sample_count,
+                                         int num_components,
                                          bool use_compositing)
 {
   const int work_index = ccl_global_id(0);
@@ -806,10 +807,13 @@ extern "C" __global__ void CUDA_LAUNCH_BOUNDS(CUDA_KERNEL_BLOCK_NUM_THREADS,
   denoised_pixel[1] *= pixel_scale;
   denoised_pixel[2] *= pixel_scale;
 
-  /* Currently compositing passes are either 3-component (derived by dividing light passes)
-   * or do not have transparency (shadow catcher). Implicitly rely on this logic, as it
-   * simplifies logic and avoids extra memory allocation. */
-  if (!use_compositing) {
+  if (num_components == 3) {
+    /* Pass without alpha channel. */
+  }
+  else if (!use_compositing) {
+    /* Currently compositing passes are either 3-component (derived by dividing light passes)
+     * or do not have transparency (shadow catcher). Implicitly rely on this logic, as it
+     * simplifies logic and avoids extra memory allocation. */
     const float *noisy_pixel = buffer + pass_noisy;
     denoised_pixel[3] = noisy_pixel[3];
   }
diff --git a/intern/cycles/kernel/kernel_film.h b/intern/cycles/kernel/kernel_film.h
index 00607d7a970..0dae7a86de1 100644
--- a/intern/cycles/kernel/kernel_film.h
+++ b/intern/cycles/kernel/kernel_film.h
@@ -99,6 +99,7 @@ ccl_device_inline void film_get_pass_pixel_depth(const KernelFilmConvert *ccl_re
                                                  ccl_global const float *ccl_restrict buffer,
                                                  float *ccl_restrict pixel)
 {
+  kernel_assert(kfilm_convert->num_components >= 1);
   kernel_assert(kfilm_convert->pass_offset != PASS_UNUSED);
 
   const float scale_exposure = film_get_scale_exposure(kfilm_convert, buffer);
@@ -114,6 +115,7 @@ ccl_device_inline void film_get_pass_pixel_mist(const KernelFilmConvert *ccl_res
                                                 ccl_global const float *ccl_restrict buffer,
                                                 float *ccl_restrict pixel)
 {
+  kernel_assert(kfilm_convert->num_components >= 1);
   kernel_assert(kfilm_convert->pass_offset != PASS_UNUSED);
 
   const float scale_exposure = film_get_scale_exposure(kfilm_convert, buffer);
@@ -136,6 +138,7 @@ ccl_device_inline void film_get_pass_pixel_sample_count(
    * number of samples was reached (for examples when number of samples is set to 0 in
    * viewport). */
 
+  kernel_assert(kfilm_convert->num_components >= 1);
   kernel_assert(kfilm_convert->pass_offset != PASS_UNUSED);
 
   const float *in = buffer + kfilm_convert->pass_offset;
@@ -149,6 +152,7 @@ ccl_device_inline void film_get_pass_pixel_float(const KernelFilmConvert *ccl_re
                                                  ccl_global const float *ccl_restrict buffer,
                                                  float *ccl_restrict pixel)
 {
+  kernel_assert(kfilm_convert->num_components >= 1);
   kernel_assert(kfilm_convert->pass_offset != PASS_UNUSED);
 
   const float scale_exposure = film_get_scale_exposure(kfilm_convert, buffer);
@@ -168,6 +172,7 @@ ccl_device_inline void film_get_pass_pixel_divide_even_color(
     ccl_global const float *ccl_restrict buffer,
     float *ccl_restrict pixel)
 {
+  kernel_assert(kfilm_convert->num_components >= 3);
   kernel_assert(kfilm_convert->pass_offset != PASS_UNUSED);
   kernel_assert(kfilm_convert->pass_divide != PASS_UNUSED);
 
@@ -188,6 +193,7 @@ ccl_device_inline void film_get_pass_pixel_float3(const KernelFilmConvert *ccl_r
                                                   ccl_global const float *ccl_restrict buffer,
                                                   float *ccl_restrict pixel)
 {
+  kernel_assert(kfilm_convert->num_components >= 3);
   kernel_assert(kfilm_convert->pass_offset != PASS_UNUSED);
 
   const float scale_exposure = film_get_scale_exposure(kfilm_convert, buffer);
@@ -210,6 +216,7 @@ ccl_device_inline void film_get_pass_pixel_motion(const KernelFilmConvert *ccl_r
                                                   ccl_global const float *ccl_restrict buffer,
                                                   float *ccl_restrict pixel)
 {
+  kernel_assert(kfilm_convert->num_components == 4);
   kernel_assert(kfilm_convert->pass_offset != PASS_UNUSED);
   kernel_assert(kfilm_convert->pass_motion_weight != PASS_UNUSED);
 
@@ -232,6 +239,7 @@ ccl_device_inline void film_get_pass_pixel_cryptomatte(const KernelFilmConvert *
                                                        ccl_global const float *

@@ Diff output truncated at 10240 characters. @@