[Bf-blender-cvs] [5d565062edc] master: Cleanup: refactor OptiX shadow intersection for upcoming changes
Brecht Van Lommel
noreply at git.blender.org
Fri Oct 15 15:59:43 CEST 2021
Commit: 5d565062edc25575bbabf173a4e26f184103944b
Author: Brecht Van Lommel
Date: Wed Oct 13 18:19:51 2021 +0200
Branches: master
https://developer.blender.org/rB5d565062edc25575bbabf173a4e26f184103944b
Cleanup: refactor OptiX shadow intersection for upcoming changes
===================================================================
M intern/cycles/bvh/bvh_embree.cpp
M intern/cycles/kernel/bvh/bvh_shadow_all.h
M intern/cycles/kernel/bvh/bvh_util.h
M intern/cycles/kernel/device/optix/kernel.cu
M intern/cycles/kernel/geom/geom_motion_curve.h
M intern/cycles/kernel/geom/geom_motion_triangle.h
M intern/cycles/kernel/geom/geom_motion_triangle_shader.h
M intern/cycles/kernel/kernel_shader.h
===================================================================
diff --git a/intern/cycles/bvh/bvh_embree.cpp b/intern/cycles/bvh/bvh_embree.cpp
index ae5b7dd426a..76fcdf539ea 100644
--- a/intern/cycles/bvh/bvh_embree.cpp
+++ b/intern/cycles/bvh/bvh_embree.cpp
@@ -81,7 +81,7 @@ static void rtc_filter_occluded_func(const RTCFilterFunctionNArguments *args)
kernel_embree_convert_hit(kg, ray, hit, ¤t_isect);
/* If no transparent shadows, all light is blocked. */
- const int flags = intersection_get_shader_flags(kg, ¤t_isect);
+ const int flags = intersection_get_shader_flags(kg, current_isect.prim, current_isect.type);
if (!(flags & (SD_HAS_TRANSPARENT_SHADOW)) || ctx->max_hits == 0) {
ctx->opaque_hit = true;
return;
diff --git a/intern/cycles/kernel/bvh/bvh_shadow_all.h b/intern/cycles/kernel/bvh/bvh_shadow_all.h
index ea1ee26b863..4f2164a86ae 100644
--- a/intern/cycles/kernel/bvh/bvh_shadow_all.h
+++ b/intern/cycles/kernel/bvh/bvh_shadow_all.h
@@ -197,7 +197,7 @@ ccl_device_inline
/* todo: optimize so primitive visibility flag indicates if
* the primitive has a transparent shadow shader? */
- const int flags = intersection_get_shader_flags(kg, isect);
+ const int flags = intersection_get_shader_flags(kg, isect->prim, isect->type);
if (!(flags & SD_HAS_TRANSPARENT_SHADOW) || max_hits == 0) {
/* If no transparent shadows, all light is blocked and we can
diff --git a/intern/cycles/kernel/bvh/bvh_util.h b/intern/cycles/kernel/bvh/bvh_util.h
index fb546f568f3..31aae389da0 100644
--- a/intern/cycles/kernel/bvh/bvh_util.h
+++ b/intern/cycles/kernel/bvh/bvh_util.h
@@ -140,14 +140,12 @@ ccl_device_inline void sort_intersections_and_normals(ccl_private Intersection *
/* Utility to quickly get flags from an intersection. */
ccl_device_forceinline int intersection_get_shader_flags(
- ccl_global const KernelGlobals *ccl_restrict kg,
- ccl_private const Intersection *ccl_restrict isect)
+ ccl_global const KernelGlobals *ccl_restrict kg, const int prim, const int type)
{
- const int prim = isect->prim;
int shader = 0;
#ifdef __HAIR__
- if (isect->type & PRIMITIVE_ALL_TRIANGLE)
+ if (type & PRIMITIVE_ALL_TRIANGLE)
#endif
{
shader = kernel_tex_fetch(__tri_shader, prim);
@@ -195,4 +193,33 @@ ccl_device_forceinline int intersection_get_object_flags(
return kernel_tex_fetch(__object_flag, isect->object);
}
+/* TODO: find a better (faster) solution for this. Maybe store offset per object for
+ * attributes needed in intersection? */
+ccl_device_inline int intersection_find_attribute(ccl_global const KernelGlobals *kg,
+ const int object,
+ const uint id)
+{
+ uint attr_offset = kernel_tex_fetch(__objects, object).attribute_map_offset;
+ uint4 attr_map = kernel_tex_fetch(__attributes_map, attr_offset);
+
+ while (attr_map.x != id) {
+ if (UNLIKELY(attr_map.x == ATTR_STD_NONE)) {
+ if (UNLIKELY(attr_map.y == 0)) {
+ return (int)ATTR_STD_NOT_FOUND;
+ }
+ else {
+ /* Chain jump to a different part of the table. */
+ attr_offset = attr_map.z;
+ }
+ }
+ else {
+ attr_offset += ATTR_PRIM_TYPES;
+ }
+ attr_map = kernel_tex_fetch(__attributes_map, attr_offset);
+ }
+
+ /* return result */
+ return (attr_map.y == ATTR_ELEMENT_NONE) ? (int)ATTR_STD_NOT_FOUND : (int)attr_map.z;
+}
+
CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/device/optix/kernel.cu b/intern/cycles/kernel/device/optix/kernel.cu
index c9577bb2aa2..e97b25d31a2 100644
--- a/intern/cycles/kernel/device/optix/kernel.cu
+++ b/intern/cycles/kernel/device/optix/kernel.cu
@@ -172,14 +172,12 @@ extern "C" __global__ void __anyhit__kernel_optix_local_hit()
extern "C" __global__ void __anyhit__kernel_optix_shadow_all_hit()
{
#ifdef __SHADOW_RECORD_ALL__
- bool ignore_intersection = false;
-
int prim = optixGetPrimitiveIndex();
const uint object = get_object_id();
# ifdef __VISIBILITY_FLAG__
const uint visibility = optixGetPayload_4();
if ((kernel_tex_fetch(__objects, object).visibility & visibility) == 0) {
- ignore_intersection = true;
+ return optixIgnoreIntersection();
}
# endif
@@ -202,29 +200,39 @@ extern "C" __global__ void __anyhit__kernel_optix_shadow_all_hit()
/* Filter out curve endcaps. */
if (u == 0.0f || u == 1.0f) {
- ignore_intersection = true;
+ return optixIgnoreIntersection();
}
}
# endif
- int num_hits = optixGetPayload_2();
- int record_index = num_hits;
+# ifndef __TRANSPARENT_SHADOWS__
+ /* No transparent shadows support compiled in, make opaque. */
+ optixSetPayload_5(true);
+ return optixTerminateRay();
+# else
const int max_hits = optixGetPayload_3();
- if (!ignore_intersection) {
- optixSetPayload_2(num_hits + 1);
+ /* If no transparent shadows, all light is blocked and we can stop immediately. */
+ if (max_hits == 0 ||
+ !(intersection_get_shader_flags(NULL, prim, type) & SD_HAS_TRANSPARENT_SHADOW)) {
+ optixSetPayload_5(true);
+ return optixTerminateRay();
}
+ /* Record transparent intersection. */
+ const int num_hits = optixGetPayload_2();
+ int record_index = num_hits;
+
+ optixSetPayload_2(num_hits + 1);
+
Intersection *const isect_array = get_payload_ptr_0<Intersection>();
-# ifdef __TRANSPARENT_SHADOWS__
- if (num_hits >= max_hits) {
+ if (record_index >= max_hits) {
/* If maximum number of hits reached, find a hit to replace. */
- const int num_recorded_hits = min(max_hits, num_hits);
float max_recorded_t = isect_array[0].t;
int max_recorded_hit = 0;
- for (int i = 1; i < num_recorded_hits; i++) {
+ for (int i = 1; i < max_hits; i++) {
if (isect_array[i].t > max_recorded_t) {
max_recorded_t = isect_array[i].t;
max_recorded_hit = i;
@@ -232,39 +240,25 @@ extern "C" __global__ void __anyhit__kernel_optix_shadow_all_hit()
}
if (optixGetRayTmax() >= max_recorded_t) {
- /* Accept hit, so that OptiX won't consider any more hits beyond the distance of the current
- * hit anymore. */
+ /* Accept hit, so that OptiX won't consider any more hits beyond the distance of the
+ * current hit anymore. */
return;
}
record_index = max_recorded_hit;
}
-# endif
- if (!ignore_intersection) {
- Intersection *const isect = isect_array + record_index;
- isect->u = u;
- isect->v = v;
- isect->t = optixGetRayTmax();
- isect->prim = prim;
- isect->object = object;
- isect->type = type;
-
-# ifdef __TRANSPARENT_SHADOWS__
- /* Detect if this surface has a shader with transparent shadows. */
- if (!shader_transparent_shadow(NULL, isect) || max_hits == 0) {
-# endif
- /* If no transparent shadows, all light is blocked and we can stop immediately. */
- optixSetPayload_5(true);
- return optixTerminateRay();
-# ifdef __TRANSPARENT_SHADOWS__
- }
-# endif
- }
+ Intersection *const isect = isect_array + record_index;
+ isect->u = u;
+ isect->v = v;
+ isect->t = optixGetRayTmax();
+ isect->prim = prim;
+ isect->object = object;
+ isect->type = type;
- /* Continue tracing. */
optixIgnoreIntersection();
-#endif
+# endif /* __TRANSPARENT_SHADOWS__ */
+#endif /* __SHADOW_RECORD_ALL__ */
}
extern "C" __global__ void __anyhit__kernel_optix_volume_test()
diff --git a/intern/cycles/kernel/geom/geom_motion_curve.h b/intern/cycles/kernel/geom/geom_motion_curve.h
index 8e32df439cd..5754608a69b 100644
--- a/intern/cycles/kernel/geom/geom_motion_curve.h
+++ b/intern/cycles/kernel/geom/geom_motion_curve.h
@@ -27,31 +27,6 @@ CCL_NAMESPACE_BEGIN
#ifdef __HAIR__
-ccl_device_inline int find_attribute_curve_motion(ccl_global const KernelGlobals *kg,
- int object,
- uint id,
- ccl_private AttributeElement *elem)
-{
- /* todo: find a better (faster) solution for this, maybe store offset per object.
- *
- * NOTE: currently it's not a bottleneck because in test scenes the loop below runs
- * zero iterations and rendering is really slow with motion curves. For until other
- * areas are speed up it's probably not so crucial to optimize this out.
- */
- uint attr_offset = object_attribute_map_offset(kg, object) + ATTR_PRIM_GEOMETRY;
- uint4 attr_map = kernel_tex_fetch(__attributes_map, attr_offset);
-
- while (attr_map.x != id) {
- attr_offset += ATTR_PRIM_TYPES;
- attr_map = kernel_tex_fetch(__attributes_map, attr_offset);
- }
-
- *elem = (AttributeElement)attr_map.y;
-
- /* return result */
- return (attr_map.y == ATTR_ELEMENT_NONE) ? (int)ATTR_STD_NOT_FOUND : (int)attr_map.z;
-}
-
ccl_device_inline void motion_curve_keys_for_step_linear(ccl_global const KernelGlobals *kg,
int offset,
int numkeys,
@@ -92,13 +67,12 @@ ccl_device_inline void motion_curve_keys_linear(ccl_global const KernelGlobals *
object_motion_info(kg, object, &numsteps, NULL, &numkeys);
/* figure out which steps we need to fetch and their interpolation factor */
- int maxstep = numsteps * 2;
- int step = min((int)(time * maxstep), maxstep - 1);
- float t = time * maxstep - step;
+ const int maxstep = numsteps * 2;
+ const int step = min((int)(time * maxstep), maxstep - 1);
+ const float t = time * maxstep - step;
/* find attribute */
- AttributeElement elem;
- int offset = find_attribute_curve_motion(kg, object, ATTR_STD_MOTION_VERTEX_POSITION, &elem);
+ const int offset = intersection_find_attribute(kg, object, ATTR_STD_MOTION_VERTEX_POSITION);
kernel_assert(offset != ATTR_STD_NOT_FOUND);
/* fetch key coordinates */
@@ -160,13 +134,12 @@ ccl_device_inline void motion_curve_keys(ccl_global const KernelGlobals *kg,
object_motion_info(kg, object, &numsteps, NULL, &numkeys);
/* figure out which steps we need to fetch and their interpolation factor */
- int maxstep = numsteps *
@@ Diff output truncated at 10240 characters. @@
More information about the Bf-blender-cvs
mailing list