[Bf-blender-cvs] [d26d3cfe193] blender-v3.0-release: Fix T92868: Cycles catcher with transparency crashes

Sergey Sharybin noreply at git.blender.org
Thu Nov 11 15:24:54 CET 2021


Commit: d26d3cfe193793728cac77be9b44463a84a0f57e
Author: Sergey Sharybin
Date:   Wed Nov 10 17:18:55 2021 +0100
Branches: blender-v3.0-release
https://developer.blender.org/rBd26d3cfe193793728cac77be9b44463a84a0f57e

Fix T92868: Cycles catcher with transparency crashes

The issue was caused by splitting happening twice.

Fixed by checking for split flag which is assigned to the both states
during split.

The tricky part was to write catcher data at the moment of split: the
transparency and shadow catcher sample count is to be accumulated at
that point. Now it is happening in the `intersect_closest` kernel.
The downside is that render buffer is to be passed to the kernel, but
the benefit is that extra split bounce check is not needed now.

Had to move the passes write to shadow catcher header, since include
of `film/passes.h` causes all the fun of requirement to have BSDF
data structures available.

Differential Revision: https://developer.blender.org/D13177

===================================================================

M	intern/cycles/device/cpu/kernel.h
M	intern/cycles/device/optix/queue.cpp
M	intern/cycles/integrator/path_trace_work_gpu.cpp
M	intern/cycles/kernel/device/cpu/kernel_arch.h
M	intern/cycles/kernel/device/cpu/kernel_arch_impl.h
M	intern/cycles/kernel/device/gpu/kernel.h
M	intern/cycles/kernel/device/optix/kernel.cu
M	intern/cycles/kernel/film/passes.h
M	intern/cycles/kernel/integrator/intersect_closest.h
M	intern/cycles/kernel/integrator/megakernel.h
M	intern/cycles/kernel/integrator/shade_surface.h
M	intern/cycles/kernel/integrator/shade_volume.h
M	intern/cycles/kernel/integrator/shadow_catcher.h

===================================================================

diff --git a/intern/cycles/device/cpu/kernel.h b/intern/cycles/device/cpu/kernel.h
index 406bd07ab3d..2d1de975c2b 100644
--- a/intern/cycles/device/cpu/kernel.h
+++ b/intern/cycles/device/cpu/kernel.h
@@ -42,7 +42,7 @@ class CPUKernels {
 
   IntegratorInitFunction integrator_init_from_camera;
   IntegratorInitFunction integrator_init_from_bake;
-  IntegratorFunction integrator_intersect_closest;
+  IntegratorShadeFunction integrator_intersect_closest;
   IntegratorFunction integrator_intersect_shadow;
   IntegratorFunction integrator_intersect_subsurface;
   IntegratorFunction integrator_intersect_volume_stack;
diff --git a/intern/cycles/device/optix/queue.cpp b/intern/cycles/device/optix/queue.cpp
index f5bfd916ccf..e3946d94f5d 100644
--- a/intern/cycles/device/optix/queue.cpp
+++ b/intern/cycles/device/optix/queue.cpp
@@ -73,7 +73,8 @@ bool OptiXDeviceQueue::enqueue(DeviceKernel kernel, const int work_size, void *a
                         sizeof(device_ptr),
                         cuda_stream_));
 
-  if (kernel == DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE) {
+  if (kernel == DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST ||
+      kernel == DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE) {
     cuda_device_assert(
         cuda_device_,
         cuMemcpyHtoDAsync(launch_params_ptr + offsetof(KernelParamsOptiX, render_buffer),
diff --git a/intern/cycles/integrator/path_trace_work_gpu.cpp b/intern/cycles/integrator/path_trace_work_gpu.cpp
index dfc1362ab09..b9784f68f56 100644
--- a/intern/cycles/integrator/path_trace_work_gpu.cpp
+++ b/intern/cycles/integrator/path_trace_work_gpu.cpp
@@ -437,7 +437,15 @@ void PathTraceWorkGPU::enqueue_path_iteration(DeviceKernel kernel, const int num
   DCHECK_LE(work_size, max_num_paths_);
 
   switch (kernel) {
-    case DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST:
+    case DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST: {
+      /* Closest ray intersection kernels with integrator state and render buffer. */
+      void *d_render_buffer = (void *)buffers_->buffer.device_pointer;
+      void *args[] = {&d_path_index, &d_render_buffer, const_cast<int *>(&work_size)};
+
+      queue_->enqueue(kernel, work_size, args);
+      break;
+    }
+
     case DEVICE_KERNEL_INTEGRATOR_INTERSECT_SHADOW:
     case DEVICE_KERNEL_INTEGRATOR_INTERSECT_SUBSURFACE:
     case DEVICE_KERNEL_INTEGRATOR_INTERSECT_VOLUME_STACK: {
diff --git a/intern/cycles/kernel/device/cpu/kernel_arch.h b/intern/cycles/kernel/device/cpu/kernel_arch.h
index 2f9a3f7c59d..61f62f3136b 100644
--- a/intern/cycles/kernel/device/cpu/kernel_arch.h
+++ b/intern/cycles/kernel/device/cpu/kernel_arch.h
@@ -37,7 +37,7 @@
 
 KERNEL_INTEGRATOR_INIT_FUNCTION(init_from_camera);
 KERNEL_INTEGRATOR_INIT_FUNCTION(init_from_bake);
-KERNEL_INTEGRATOR_FUNCTION(intersect_closest);
+KERNEL_INTEGRATOR_SHADE_FUNCTION(intersect_closest);
 KERNEL_INTEGRATOR_FUNCTION(intersect_shadow);
 KERNEL_INTEGRATOR_FUNCTION(intersect_subsurface);
 KERNEL_INTEGRATOR_FUNCTION(intersect_volume_stack);
diff --git a/intern/cycles/kernel/device/cpu/kernel_arch_impl.h b/intern/cycles/kernel/device/cpu/kernel_arch_impl.h
index 1ea5002e300..747c47c34c9 100644
--- a/intern/cycles/kernel/device/cpu/kernel_arch_impl.h
+++ b/intern/cycles/kernel/device/cpu/kernel_arch_impl.h
@@ -112,7 +112,7 @@ CCL_NAMESPACE_BEGIN
 
 DEFINE_INTEGRATOR_INIT_KERNEL(init_from_camera)
 DEFINE_INTEGRATOR_INIT_KERNEL(init_from_bake)
-DEFINE_INTEGRATOR_KERNEL(intersect_closest)
+DEFINE_INTEGRATOR_SHADE_KERNEL(intersect_closest)
 DEFINE_INTEGRATOR_KERNEL(intersect_subsurface)
 DEFINE_INTEGRATOR_KERNEL(intersect_volume_stack)
 DEFINE_INTEGRATOR_SHADE_KERNEL(shade_background)
diff --git a/intern/cycles/kernel/device/gpu/kernel.h b/intern/cycles/kernel/device/gpu/kernel.h
index 844bbf90f67..56fcc38b907 100644
--- a/intern/cycles/kernel/device/gpu/kernel.h
+++ b/intern/cycles/kernel/device/gpu/kernel.h
@@ -116,13 +116,15 @@ ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS)
 }
 
 ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS)
-    kernel_gpu_integrator_intersect_closest(const int *path_index_array, const int work_size)
+    kernel_gpu_integrator_intersect_closest(const int *path_index_array,
+                                            ccl_global float *render_buffer,
+                                            const int work_size)
 {
   const int global_index = ccl_gpu_global_id_x();
 
   if (global_index < work_size) {
     const int state = (path_index_array) ? path_index_array[global_index] : global_index;
-    integrator_intersect_closest(NULL, state);
+    integrator_intersect_closest(NULL, state, render_buffer);
   }
 }
 
diff --git a/intern/cycles/kernel/device/optix/kernel.cu b/intern/cycles/kernel/device/optix/kernel.cu
index 6989219cd9f..b987aa7a817 100644
--- a/intern/cycles/kernel/device/optix/kernel.cu
+++ b/intern/cycles/kernel/device/optix/kernel.cu
@@ -57,7 +57,7 @@ extern "C" __global__ void __raygen__kernel_optix_integrator_intersect_closest()
   const int global_index = optixGetLaunchIndex().x;
   const int path_index = (__params.path_index_array) ? __params.path_index_array[global_index] :
                                                        global_index;
-  integrator_intersect_closest(nullptr, path_index);
+  integrator_intersect_closest(nullptr, path_index, __params.render_buffer);
 }
 
 extern "C" __global__ void __raygen__kernel_optix_integrator_intersect_shadow()
diff --git a/intern/cycles/kernel/film/passes.h b/intern/cycles/kernel/film/passes.h
index 22b4b779a17..77761709a78 100644
--- a/intern/cycles/kernel/film/passes.h
+++ b/intern/cycles/kernel/film/passes.h
@@ -160,40 +160,6 @@ ccl_device_forceinline void kernel_write_denoising_features_volume(KernelGlobals
 }
 #endif /* __DENOISING_FEATURES__ */
 
-#ifdef __SHADOW_CATCHER__
-
-/* Write shadow catcher passes on a bounce from the shadow catcher object. */
-ccl_device_forceinline void kernel_write_shadow_catcher_bounce_data(
-    KernelGlobals kg,
-    IntegratorState state,
-    ccl_private const ShaderData *sd,
-    ccl_global float *ccl_restrict render_buffer)
-{
-  if (!kernel_data.integrator.has_shadow_catcher) {
-    return;
-  }
-
-  kernel_assert(kernel_data.film.pass_shadow_catcher_sample_count != PASS_UNUSED);
-  kernel_assert(kernel_data.film.pass_shadow_catcher_matte != PASS_UNUSED);
-
-  if (!kernel_shadow_catcher_is_path_split_bounce(kg, state, sd->object_flag)) {
-    return;
-  }
-
-  ccl_global float *buffer = kernel_pass_pixel_render_buffer(kg, state, render_buffer);
-
-  /* Count sample for the shadow catcher object. */
-  kernel_write_pass_float(buffer + kernel_data.film.pass_shadow_catcher_sample_count, 1.0f);
-
-  /* Since the split is done, the sample does not contribute to the matte, so accumulate it as
-   * transparency to the matte. */
-  const float3 throughput = INTEGRATOR_STATE(state, path, throughput);
-  kernel_write_pass_float(buffer + kernel_data.film.pass_shadow_catcher_matte + 3,
-                          average(throughput));
-}
-
-#endif /* __SHADOW_CATCHER__ */
-
 ccl_device_inline size_t kernel_write_id_pass(ccl_global float *ccl_restrict buffer,
                                               size_t depth,
                                               float id,
diff --git a/intern/cycles/kernel/integrator/intersect_closest.h b/intern/cycles/kernel/integrator/intersect_closest.h
index 5522b46205b..366bfba7aca 100644
--- a/intern/cycles/kernel/integrator/intersect_closest.h
+++ b/intern/cycles/kernel/integrator/intersect_closest.h
@@ -88,7 +88,10 @@ ccl_device_forceinline bool integrator_intersect_terminate(KernelGlobals kg,
 #ifdef __SHADOW_CATCHER__
 /* Split path if a shadow catcher was hit. */
 ccl_device_forceinline void integrator_split_shadow_catcher(
-    KernelGlobals kg, IntegratorState state, ccl_private const Intersection *ccl_restrict isect)
+    KernelGlobals kg,
+    IntegratorState state,
+    ccl_private const Intersection *ccl_restrict isect,
+    ccl_global float *ccl_restrict render_buffer)
 {
   /* Test if we hit a shadow catcher object, and potentially split the path to continue tracing two
    * paths from here. */
@@ -97,6 +100,8 @@ ccl_device_forceinline void integrator_split_shadow_catcher(
     return;
   }
 
+  kernel_write_shadow_catcher_bounce_data(kg, state, render_buffer);
+
   /* Mark state as having done a shadow catcher split so that it stops contributing to
    * the shadow catcher matte pass, but keeps contributing to the combined pass. */
   INTEGRATOR_STATE_WRITE(state, path, flag) |= PATH_RAY_SHADOW_CATCHER_HIT;
@@ -191,6 +196,7 @@ ccl_device_forceinline void integrator_intersect_next_kernel(
     KernelGlobals kg,
     IntegratorState state,
     ccl_private const Intersection *ccl_restrict isect,
+    ccl_global float *ccl_restrict render_buffer,
     const bool hit)
 {
   /* Continue with volume kernel if we are inside a volume, regardless if we hit anything. */
@@ -233,7 +239,7 @@ ccl_device_forceinline void integrator_intersect_next_kernel(
 
 #ifdef __SHADOW_CATCHER__
         /* Handle shadow catcher. */
-        integrator_split_shadow_catcher(kg, state, isect);
+        integrator_split_shadow_catcher(kg, state, isect, render_buffer);
 #endif
       }
       else {
@@ -253,7 +259,10 @@ ccl_device_forceinline void integrator_intersect_next_kernel(
  * volume shading and termination testing have already been done. */
 template<uint32_t current_kernel>
 ccl_device_forceinline void integrator_intersect_next_kernel_after_volume(
-    KernelGlobals kg, IntegratorState state, ccl_private const Intersection *ccl_restrict isect)
+    KernelGlobals kg,
+    IntegratorState state,
+    ccl_private const Intersection *ccl_restrict isect,
+    ccl_global float *ccl_restrict render_buffer)
 {
   if (isect->prim != PRIM_NONE) {
     /* Hit a surface, continue with light or surface kernel. */
@@ -278,7 +287,7 @@ ccl_device_forceinline void integrator_intersect_next_kernel_after_volume(
 
 #ifdef __SHADOW_CATCHER__
       /* Handle shadow catcher. */
-      integrator_split_shadow_catcher(kg, state, isect);
+      integrator_split_shadow_catcher(kg, state, isect, render_buffer);
 #endif
       return;
     }
@@ -290,7 +299,9 @@ 

@@ Diff output truncated at 10240 characters. @@



More information about the Bf-blender-cvs mailing list