[Bf-blender-cvs] [0b5a7c2e97c] cycles-x: Fix alpha in denoised shadow catcher pass in Cycles X

Tue Jul 6 17:38:23 CEST 2021

Commit: 0b5a7c2e97c0424d91d023c065ccc11998c78e67
Author: Sergey Sharybin
Date:   Tue Jul 6 14:34:56 2021 +0200
Branches: cycles-x
https://developer.blender.org/rB0b5a7c2e97c0424d91d023c065ccc11998c78e67

Fix alpha in denoised shadow catcher pass in Cycles X

Seems it was wrong since the initial implementation.

The issue is that for the shadow catcher pass we can not copy alpha
from input pass as the pass is calculated based on other passes.

Decided to go with an implicit knowledge that composited passes are
always opaque. Saves some complications and memory by storing full
RGBA buffer for noisy composited passes.

Differential Revision: https://developer.blender.org/D11825

===================================================================

M	intern/cycles/device/optix/device_impl.cpp
M	intern/cycles/integrator/denoiser_oidn.cpp
M	intern/cycles/kernel/device/cuda/kernel.cu

===================================================================

diff --git a/intern/cycles/device/optix/device_impl.cpp b/intern/cycles/device/optix/device_impl.cpp
index 85b1934399c..49b5b7bfa7e 100644
--- a/intern/cycles/device/optix/device_impl.cpp
+++ b/intern/cycles/device/optix/device_impl.cpp
@@ -26,6 +26,7 @@
 #  include "render/hair.h"
 #  include "render/mesh.h"
 #  include "render/object.h"
+#  include "render/pass.h"
 #  include "render/scene.h"
 
 #  include "util/util_debug.h"
@@ -701,6 +702,7 @@ bool OptiXDevice::denoise_filter_convert_to_rgb(DenoiseContext &context, const D
 bool OptiXDevice::denoise_filter_convert_from_rgb(DenoiseContext &context, const DenoisePass &pass)
 {
   const BufferParams &buffer_params = context.buffer_params;
+  const PassInfo pass_info = Pass::get_info(pass.type);
 
   const int work_size = buffer_params.width * buffer_params.height;
 
@@ -716,7 +718,8 @@ bool OptiXDevice::denoise_filter_convert_from_rgb(DenoiseContext &context, const
                   const_cast<int *>(&context.num_samples),
                   const_cast<int *>(&pass.noisy_offset),
                   const_cast<int *>(&pass.denoised_offset),
-                  const_cast<int *>(&context.pass_sample_count)};
+                  const_cast<int *>(&context.pass_sample_count),
+                  const_cast<bool *>(&pass_info.use_compositing)};
 
   return denoiser_.queue.enqueue(DEVICE_KERNEL_FILTER_CONVERT_FROM_RGB, work_size, args);
 }
diff --git a/intern/cycles/integrator/denoiser_oidn.cpp b/intern/cycles/integrator/denoiser_oidn.cpp
index fbb3939f65f..72bd7cfec61 100644
--- a/intern/cycles/integrator/denoiser_oidn.cpp
+++ b/intern/cycles/integrator/denoiser_oidn.cpp
@@ -344,20 +344,30 @@ class OIDNDenoiseContext {
       float *buffer_row = buffer_data + buffer_offset + y * row_stride;
       for (int x = 0; x < width; ++x) {
         float *buffer_pixel = buffer_row + x * pass_stride;
-        float *noisy_pixel = buffer_pixel + oidn_input_pass.offset;
         float *denoised_pixel = buffer_pixel + oidn_output_pass.offset;
 
         if (need_scale) {
-          float pixel_scale = has_pass_sample_count ?
-                                  __float_as_uint(buffer_pixel[pass_sample_count_]) :
-                                  num_samples_;
+          const float pixel_scale = has_pass_sample_count ?
+                                        __float_as_uint(buffer_pixel[pass_sample_count_]) :
+                                        num_samples_;
 
           denoised_pixel[0] = denoised_pixel[0] * pixel_scale;
           denoised_pixel[1] = denoised_pixel[1] * pixel_scale;
           denoised_pixel[2] = denoised_pixel[2] * pixel_scale;
         }
 
-        denoised_pixel[3] = noisy_pixel[3];
+        /* Currently compositing passes are either 3-component (derived by dividing light passes)
+         * or do not have transparency (shadow catcher). Implicitly rely on this logic, as it
+         * simplifies logic and avoids extra memory allocation. */
+        if (!oidn_input_pass.use_compositing) {
+          const float *noisy_pixel = buffer_pixel + oidn_input_pass.offset;
+          denoised_pixel[3] = noisy_pixel[3];
+        }
+        else {
+          /* Assigning to zero since this is a default alpha value for 3-component passes, and it
+           * is an opaque pixel for 4 component passes. */
+          denoised_pixel[3] = 0;
+        }
       }
     }
   }
diff --git a/intern/cycles/kernel/device/cuda/kernel.cu b/intern/cycles/kernel/device/cuda/kernel.cu
index 4fd0749f8f2..9d0106e0108 100644
--- a/intern/cycles/kernel/device/cuda/kernel.cu
+++ b/intern/cycles/kernel/device/cuda/kernel.cu
@@ -735,7 +735,8 @@ extern "C" __global__ void CUDA_LAUNCH_BOUNDS(CUDA_KERNEL_BLOCK_NUM_THREADS,
                                         int num_samples,
                                         int pass_noisy,
                                         int pass_denoised,
-                                        int pass_sample_count)
+                                        int pass_sample_count,
+                                        bool use_compositing)
 {
   const int work_index = ccl_global_id(0);
   const int y = work_index / sw;
@@ -758,13 +759,25 @@ extern "C" __global__ void CUDA_LAUNCH_BOUNDS(CUDA_KERNEL_BLOCK_NUM_THREADS,
     pixel_scale = __float_as_uint(buffer[pass_sample_count]);
   }
 
-  const float *noisy_pixel = buffer + pass_noisy;
   float *denoised_pixel = buffer + pass_denoised;
 
   denoised_pixel[0] = in[0] * pixel_scale;
   denoised_pixel[1] = in[1] * pixel_scale;
   denoised_pixel[2] = in[2] * pixel_scale;
-  denoised_pixel[3] = noisy_pixel[3];
+
+  /* Currently compositing passes are either 3-component (derived by dividing light passes)
+   * or do not have transparency (shadow catcher). Implicitly rely on this logic, as it
+   * simplifies logic and avoids extra memory allocation. */
+  if (!use_compositing) {
+    const float *noisy_pixel = buffer + pass_noisy;
+    denoised_pixel[3] = noisy_pixel[3];
+  }
+  else {
+    /* Assigning to zero since this is a default alpha value for 3-component passes, and it
+     * is an opaque pixel for 4 component passes. */
+
+    denoised_pixel[3] = 0;
+  }
 }
 
 #endif