[Bf-blender-cvs] [5d565062edc] master: Cleanup: refactor OptiX shadow intersection for upcoming changes

Brecht Van Lommel noreply at git.blender.org
Fri Oct 15 15:59:43 CEST 2021


Commit: 5d565062edc25575bbabf173a4e26f184103944b
Author: Brecht Van Lommel
Date:   Wed Oct 13 18:19:51 2021 +0200
Branches: master
https://developer.blender.org/rB5d565062edc25575bbabf173a4e26f184103944b

Cleanup: refactor OptiX shadow intersection for upcoming changes

===================================================================

M	intern/cycles/bvh/bvh_embree.cpp
M	intern/cycles/kernel/bvh/bvh_shadow_all.h
M	intern/cycles/kernel/bvh/bvh_util.h
M	intern/cycles/kernel/device/optix/kernel.cu
M	intern/cycles/kernel/geom/geom_motion_curve.h
M	intern/cycles/kernel/geom/geom_motion_triangle.h
M	intern/cycles/kernel/geom/geom_motion_triangle_shader.h
M	intern/cycles/kernel/kernel_shader.h

===================================================================

diff --git a/intern/cycles/bvh/bvh_embree.cpp b/intern/cycles/bvh/bvh_embree.cpp
index ae5b7dd426a..76fcdf539ea 100644
--- a/intern/cycles/bvh/bvh_embree.cpp
+++ b/intern/cycles/bvh/bvh_embree.cpp
@@ -81,7 +81,7 @@ static void rtc_filter_occluded_func(const RTCFilterFunctionNArguments *args)
       kernel_embree_convert_hit(kg, ray, hit, &current_isect);
 
       /* If no transparent shadows, all light is blocked. */
-      const int flags = intersection_get_shader_flags(kg, &current_isect);
+      const int flags = intersection_get_shader_flags(kg, current_isect.prim, current_isect.type);
       if (!(flags & (SD_HAS_TRANSPARENT_SHADOW)) || ctx->max_hits == 0) {
         ctx->opaque_hit = true;
         return;
diff --git a/intern/cycles/kernel/bvh/bvh_shadow_all.h b/intern/cycles/kernel/bvh/bvh_shadow_all.h
index ea1ee26b863..4f2164a86ae 100644
--- a/intern/cycles/kernel/bvh/bvh_shadow_all.h
+++ b/intern/cycles/kernel/bvh/bvh_shadow_all.h
@@ -197,7 +197,7 @@ ccl_device_inline
 
               /* todo: optimize so primitive visibility flag indicates if
                * the primitive has a transparent shadow shader? */
-              const int flags = intersection_get_shader_flags(kg, isect);
+              const int flags = intersection_get_shader_flags(kg, isect->prim, isect->type);
 
               if (!(flags & SD_HAS_TRANSPARENT_SHADOW) || max_hits == 0) {
                 /* If no transparent shadows, all light is blocked and we can
diff --git a/intern/cycles/kernel/bvh/bvh_util.h b/intern/cycles/kernel/bvh/bvh_util.h
index fb546f568f3..31aae389da0 100644
--- a/intern/cycles/kernel/bvh/bvh_util.h
+++ b/intern/cycles/kernel/bvh/bvh_util.h
@@ -140,14 +140,12 @@ ccl_device_inline void sort_intersections_and_normals(ccl_private Intersection *
 /* Utility to quickly get flags from an intersection. */
 
 ccl_device_forceinline int intersection_get_shader_flags(
-    ccl_global const KernelGlobals *ccl_restrict kg,
-    ccl_private const Intersection *ccl_restrict isect)
+    ccl_global const KernelGlobals *ccl_restrict kg, const int prim, const int type)
 {
-  const int prim = isect->prim;
   int shader = 0;
 
 #ifdef __HAIR__
-  if (isect->type & PRIMITIVE_ALL_TRIANGLE)
+  if (type & PRIMITIVE_ALL_TRIANGLE)
 #endif
   {
     shader = kernel_tex_fetch(__tri_shader, prim);
@@ -195,4 +193,33 @@ ccl_device_forceinline int intersection_get_object_flags(
   return kernel_tex_fetch(__object_flag, isect->object);
 }
 
+/* TODO: find a better (faster) solution for this. Maybe store offset per object for
+ * attributes needed in intersection? */
+ccl_device_inline int intersection_find_attribute(ccl_global const KernelGlobals *kg,
+                                                  const int object,
+                                                  const uint id)
+{
+  uint attr_offset = kernel_tex_fetch(__objects, object).attribute_map_offset;
+  uint4 attr_map = kernel_tex_fetch(__attributes_map, attr_offset);
+
+  while (attr_map.x != id) {
+    if (UNLIKELY(attr_map.x == ATTR_STD_NONE)) {
+      if (UNLIKELY(attr_map.y == 0)) {
+        return (int)ATTR_STD_NOT_FOUND;
+      }
+      else {
+        /* Chain jump to a different part of the table. */
+        attr_offset = attr_map.z;
+      }
+    }
+    else {
+      attr_offset += ATTR_PRIM_TYPES;
+    }
+    attr_map = kernel_tex_fetch(__attributes_map, attr_offset);
+  }
+
+  /* return result */
+  return (attr_map.y == ATTR_ELEMENT_NONE) ? (int)ATTR_STD_NOT_FOUND : (int)attr_map.z;
+}
+
 CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/device/optix/kernel.cu b/intern/cycles/kernel/device/optix/kernel.cu
index c9577bb2aa2..e97b25d31a2 100644
--- a/intern/cycles/kernel/device/optix/kernel.cu
+++ b/intern/cycles/kernel/device/optix/kernel.cu
@@ -172,14 +172,12 @@ extern "C" __global__ void __anyhit__kernel_optix_local_hit()
 extern "C" __global__ void __anyhit__kernel_optix_shadow_all_hit()
 {
 #ifdef __SHADOW_RECORD_ALL__
-  bool ignore_intersection = false;
-
   int prim = optixGetPrimitiveIndex();
   const uint object = get_object_id();
 #  ifdef __VISIBILITY_FLAG__
   const uint visibility = optixGetPayload_4();
   if ((kernel_tex_fetch(__objects, object).visibility & visibility) == 0) {
-    ignore_intersection = true;
+    return optixIgnoreIntersection();
   }
 #  endif
 
@@ -202,29 +200,39 @@ extern "C" __global__ void __anyhit__kernel_optix_shadow_all_hit()
 
     /* Filter out curve endcaps. */
     if (u == 0.0f || u == 1.0f) {
-      ignore_intersection = true;
+      return optixIgnoreIntersection();
     }
   }
 #  endif
 
-  int num_hits = optixGetPayload_2();
-  int record_index = num_hits;
+#  ifndef __TRANSPARENT_SHADOWS__
+  /* No transparent shadows support compiled in, make opaque. */
+  optixSetPayload_5(true);
+  return optixTerminateRay();
+#  else
   const int max_hits = optixGetPayload_3();
 
-  if (!ignore_intersection) {
-    optixSetPayload_2(num_hits + 1);
+  /* If no transparent shadows, all light is blocked and we can stop immediately. */
+  if (max_hits == 0 ||
+      !(intersection_get_shader_flags(NULL, prim, type) & SD_HAS_TRANSPARENT_SHADOW)) {
+    optixSetPayload_5(true);
+    return optixTerminateRay();
   }
 
+  /* Record transparent intersection. */
+  const int num_hits = optixGetPayload_2();
+  int record_index = num_hits;
+
+  optixSetPayload_2(num_hits + 1);
+
   Intersection *const isect_array = get_payload_ptr_0<Intersection>();
 
-#  ifdef __TRANSPARENT_SHADOWS__
-  if (num_hits >= max_hits) {
+  if (record_index >= max_hits) {
     /* If maximum number of hits reached, find a hit to replace. */
-    const int num_recorded_hits = min(max_hits, num_hits);
     float max_recorded_t = isect_array[0].t;
     int max_recorded_hit = 0;
 
-    for (int i = 1; i < num_recorded_hits; i++) {
+    for (int i = 1; i < max_hits; i++) {
       if (isect_array[i].t > max_recorded_t) {
         max_recorded_t = isect_array[i].t;
         max_recorded_hit = i;
@@ -232,39 +240,25 @@ extern "C" __global__ void __anyhit__kernel_optix_shadow_all_hit()
     }
 
     if (optixGetRayTmax() >= max_recorded_t) {
-      /* Accept hit, so that OptiX won't consider any more hits beyond the distance of the current
-       * hit anymore. */
+      /* Accept hit, so that OptiX won't consider any more hits beyond the distance of the
+       * current hit anymore. */
       return;
     }
 
     record_index = max_recorded_hit;
   }
-#  endif
 
-  if (!ignore_intersection) {
-    Intersection *const isect = isect_array + record_index;
-    isect->u = u;
-    isect->v = v;
-    isect->t = optixGetRayTmax();
-    isect->prim = prim;
-    isect->object = object;
-    isect->type = type;
-
-#  ifdef __TRANSPARENT_SHADOWS__
-    /* Detect if this surface has a shader with transparent shadows. */
-    if (!shader_transparent_shadow(NULL, isect) || max_hits == 0) {
-#  endif
-      /* If no transparent shadows, all light is blocked and we can stop immediately. */
-      optixSetPayload_5(true);
-      return optixTerminateRay();
-#  ifdef __TRANSPARENT_SHADOWS__
-    }
-#  endif
-  }
+  Intersection *const isect = isect_array + record_index;
+  isect->u = u;
+  isect->v = v;
+  isect->t = optixGetRayTmax();
+  isect->prim = prim;
+  isect->object = object;
+  isect->type = type;
 
-  /* Continue tracing. */
   optixIgnoreIntersection();
-#endif
+#  endif /* __TRANSPARENT_SHADOWS__ */
+#endif   /* __SHADOW_RECORD_ALL__ */
 }
 
 extern "C" __global__ void __anyhit__kernel_optix_volume_test()
diff --git a/intern/cycles/kernel/geom/geom_motion_curve.h b/intern/cycles/kernel/geom/geom_motion_curve.h
index 8e32df439cd..5754608a69b 100644
--- a/intern/cycles/kernel/geom/geom_motion_curve.h
+++ b/intern/cycles/kernel/geom/geom_motion_curve.h
@@ -27,31 +27,6 @@ CCL_NAMESPACE_BEGIN
 
 #ifdef __HAIR__
 
-ccl_device_inline int find_attribute_curve_motion(ccl_global const KernelGlobals *kg,
-                                                  int object,
-                                                  uint id,
-                                                  ccl_private AttributeElement *elem)
-{
-  /* todo: find a better (faster) solution for this, maybe store offset per object.
-   *
-   * NOTE: currently it's not a bottleneck because in test scenes the loop below runs
-   * zero iterations and rendering is really slow with motion curves. For until other
-   * areas are speed up it's probably not so crucial to optimize this out.
-   */
-  uint attr_offset = object_attribute_map_offset(kg, object) + ATTR_PRIM_GEOMETRY;
-  uint4 attr_map = kernel_tex_fetch(__attributes_map, attr_offset);
-
-  while (attr_map.x != id) {
-    attr_offset += ATTR_PRIM_TYPES;
-    attr_map = kernel_tex_fetch(__attributes_map, attr_offset);
-  }
-
-  *elem = (AttributeElement)attr_map.y;
-
-  /* return result */
-  return (attr_map.y == ATTR_ELEMENT_NONE) ? (int)ATTR_STD_NOT_FOUND : (int)attr_map.z;
-}
-
 ccl_device_inline void motion_curve_keys_for_step_linear(ccl_global const KernelGlobals *kg,
                                                          int offset,
                                                          int numkeys,
@@ -92,13 +67,12 @@ ccl_device_inline void motion_curve_keys_linear(ccl_global const KernelGlobals *
   object_motion_info(kg, object, &numsteps, NULL, &numkeys);
 
   /* figure out which steps we need to fetch and their interpolation factor */
-  int maxstep = numsteps * 2;
-  int step = min((int)(time * maxstep), maxstep - 1);
-  float t = time * maxstep - step;
+  const int maxstep = numsteps * 2;
+  const int step = min((int)(time * maxstep), maxstep - 1);
+  const float t = time * maxstep - step;
 
   /* find attribute */
-  AttributeElement elem;
-  int offset = find_attribute_curve_motion(kg, object, ATTR_STD_MOTION_VERTEX_POSITION, &elem);
+  const int offset = intersection_find_attribute(kg, object, ATTR_STD_MOTION_VERTEX_POSITION);
   kernel_assert(offset != ATTR_STD_NOT_FOUND);
 
   /* fetch key coordinates */
@@ -160,13 +134,12 @@ ccl_device_inline void motion_curve_keys(ccl_global const KernelGlobals *kg,
   object_motion_info(kg, object, &numsteps, NULL, &numkeys);
 
   /* figure out which steps we need to fetch and their interpolation factor */
-  int maxstep = numsteps *

@@ Diff output truncated at 10240 characters. @@



More information about the Bf-blender-cvs mailing list