[Bf-blender-cvs] [cad00ba01b4] cycles-x: Cycles X: Improve performance of transparent shadows with OptiX

Patrick Mours noreply at git.blender.org
Fri Sep 17 13:38:23 CEST 2021


Commit: cad00ba01b4ac7412c51c6d0143e402dc56967d7
Author: Patrick Mours
Date:   Thu Sep 16 18:16:38 2021 +0200
Branches: cycles-x
https://developer.blender.org/rBcad00ba01b4ac7412c51c6d0143e402dc56967d7

Cycles X: Improve performance of transparent shadows with OptiX

This changes the shadow record-all any-hit program to accept all
hits (return without calling `optixIgnoreIntersection`) beyond
the furthest distance recorded after the maximum number of hits
that can be recorded was reached.
OptiX will not call the any-hit program anymore for hits beyond
the distance of the accepted hits and also reduces the current
ray length behind the scenes. As a result performance improves
drastically in scenes where shadow rays can hit a lot of
transparent objects, like the "koro" benchmark scene.

With this applied I now get similar performance with both CUDA
and OptiX in "koro". Not quite perfect yet, but much better than
before.

Reviewed By: brecht

Differential Revision: https://developer.blender.org/D12524

===================================================================

M	intern/cycles/kernel/device/optix/kernel.cu
M	intern/cycles/kernel/integrator/integrator_shade_shadow.h

===================================================================

diff --git a/intern/cycles/kernel/device/optix/kernel.cu b/intern/cycles/kernel/device/optix/kernel.cu
index 8c68522289f..a4603b53150 100644
--- a/intern/cycles/kernel/device/optix/kernel.cu
+++ b/intern/cycles/kernel/device/optix/kernel.cu
@@ -169,11 +169,13 @@ extern "C" __global__ void __anyhit__kernel_optix_local_hit()
 extern "C" __global__ void __anyhit__kernel_optix_shadow_all_hit()
 {
 #ifdef __SHADOW_RECORD_ALL__
+  bool ignore_intersection = false;
+
   const uint prim = optixGetPrimitiveIndex();
 #  ifdef __VISIBILITY_FLAG__
   const uint visibility = optixGetPayload_4();
   if ((kernel_tex_fetch(__prim_visibility, prim) & visibility) == 0) {
-    return optixIgnoreIntersection();
+    ignore_intersection = true;
   }
 #  endif
 
@@ -190,7 +192,7 @@ extern "C" __global__ void __anyhit__kernel_optix_shadow_all_hit()
 
     // Filter out curve endcaps
     if (u == 0.0f || u == 1.0f) {
-      return optixIgnoreIntersection();
+      ignore_intersection = true;
     }
   }
 #  endif
@@ -199,7 +201,9 @@ extern "C" __global__ void __anyhit__kernel_optix_shadow_all_hit()
   int record_index = num_hits;
   const int max_hits = optixGetPayload_3();
 
-  optixSetPayload_2(num_hits + 1);
+  if (!ignore_intersection) {
+    optixSetPayload_2(num_hits + 1);
+  }
 
   Intersection *const isect_array = get_payload_ptr_0<Intersection>();
 
@@ -218,37 +222,37 @@ extern "C" __global__ void __anyhit__kernel_optix_shadow_all_hit()
     }
 
     if (optixGetRayTmax() >= max_recorded_t) {
-      return optixIgnoreIntersection();
+      /* Accept hit, so that OptiX won't consider any more hits beyond it anymore. */
+      return;
     }
 
     record_index = max_recorded_hit;
   }
-
-  /* TODO: is there a way to shorten the ray length when max_hits is reached, so Optix
-   * can discard triangles beyond it? */
 #  endif
 
-  Intersection *const isect = isect_array + record_index;
-  isect->u = u;
-  isect->v = v;
-  isect->t = optixGetRayTmax();
-  isect->prim = prim;
-  isect->object = get_object_id();
-  isect->type = kernel_tex_fetch(__prim_type, prim);
+  if (!ignore_intersection) {
+    Intersection *const isect = isect_array + record_index;
+    isect->u = u;
+    isect->v = v;
+    isect->t = optixGetRayTmax();
+    isect->prim = prim;
+    isect->object = get_object_id();
+    isect->type = kernel_tex_fetch(__prim_type, prim);
 
 #  ifdef __TRANSPARENT_SHADOWS__
-  // Detect if this surface has a shader with transparent shadows
-  if (!shader_transparent_shadow(NULL, isect) || max_hits == 0) {
+    // Detect if this surface has a shader with transparent shadows
+    if (!shader_transparent_shadow(NULL, isect) || max_hits == 0) {
 #  endif
-    // If no transparent shadows, all light is blocked and we can stop immediately
-    optixSetPayload_5(true);
-    return optixTerminateRay();
+      // If no transparent shadows, all light is blocked and we can stop immediately
+      optixSetPayload_5(true);
+      return optixTerminateRay();
 #  ifdef __TRANSPARENT_SHADOWS__
+    }
+#  endif
   }
 
   // Continue tracing
   optixIgnoreIntersection();
-#  endif
 #endif
 }
 
diff --git a/intern/cycles/kernel/integrator/integrator_shade_shadow.h b/intern/cycles/kernel/integrator/integrator_shade_shadow.h
index fb836191c94..fd3c3ae1653 100644
--- a/intern/cycles/kernel/integrator/integrator_shade_shadow.h
+++ b/intern/cycles/kernel/integrator/integrator_shade_shadow.h
@@ -23,6 +23,11 @@
 
 CCL_NAMESPACE_BEGIN
 
+ccl_device_inline bool shadow_intersections_has_remaining(const int num_hits)
+{
+  return num_hits >= INTEGRATOR_SHADOW_ISECT_SIZE;
+}
+
 #ifdef __TRANSPARENT_SHADOWS__
 ccl_device_inline float3 integrate_transparent_surface_shadow(INTEGRATOR_STATE_ARGS, const int hit)
 {
@@ -93,11 +98,6 @@ ccl_device_inline void integrate_transparent_volume_shadow(INTEGRATOR_STATE_ARGS
 }
 #  endif
 
-ccl_device_inline bool shadow_intersections_has_remaining(const int num_hits)
-{
-  return num_hits >= INTEGRATOR_SHADOW_ISECT_SIZE;
-}
-
 ccl_device_inline bool integrate_transparent_shadow(INTEGRATOR_STATE_ARGS, const int num_hits)
 {
   /* Accumulate shadow for transparent surfaces. */



More information about the Bf-blender-cvs mailing list