[Bf-blender-cvs] [21a31fd] cycles_bvh: Cycles: Optimization for non-hair regular BVH traversal
Sergey Sharybin
noreply at git.blender.org
Fri Jun 17 16:21:58 CEST 2016
Commit: 21a31fd1d2791bedfec2f81c0bcee131a90642af
Author: Sergey Sharybin
Date: Fri Jun 17 14:45:45 2016 +0200
Branches: cycles_bvh
https://developer.blender.org/rB21a31fd1d2791bedfec2f81c0bcee131a90642af
Cycles: Optimization for non-hair regular BVH traversal
===================================================================
M intern/cycles/kernel/geom/geom_bvh_shadow.h
M intern/cycles/kernel/geom/geom_bvh_subsurface.h
M intern/cycles/kernel/geom/geom_bvh_traversal.h
M intern/cycles/kernel/geom/geom_bvh_volume.h
M intern/cycles/kernel/geom/geom_bvh_volume_all.h
===================================================================
diff --git a/intern/cycles/kernel/geom/geom_bvh_shadow.h b/intern/cycles/kernel/geom/geom_bvh_shadow.h
index cbc92c0..3b374ce 100644
--- a/intern/cycles/kernel/geom/geom_bvh_shadow.h
+++ b/intern/cycles/kernel/geom/geom_bvh_shadow.h
@@ -21,6 +21,12 @@
# include "geom_qbvh_shadow.h"
#endif
+#if BVH_FEATURE(BVH_HAIR)
+# define NODE_INTERSECT bvh_node_intersect
+#else
+# define NODE_INTERSECT bvh_aligned_node_intersect
+#endif
+
/* This is a template BVH traversal function, where various features can be
* enabled/disabled. This way we can compile optimized versions for each case
* without new features slowing things down.
@@ -41,7 +47,7 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
* - likely and unlikely for if() statements
* - test restrict attribute for pointers
*/
-
+
/* traversal stack in CUDA thread-local memory */
int traversalStack[BVH_STACK_SIZE];
traversalStack[0] = ENTRYPOINT_SENTINEL;
@@ -72,7 +78,7 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
#if defined(__KERNEL_SSE2__)
const shuffle_swap_t shuf_identity = shuffle_swap_identity();
const shuffle_swap_t shuf_swap = shuffle_swap_swap();
-
+
const ssef pn = cast(ssei(0, 0, 0x80000000, 0x80000000));
ssef Psplat[3], idirsplat[3];
ssef tnear(0.0f), tfar(isect_t);
@@ -100,27 +106,29 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
float4 cnodes = kernel_tex_fetch(__bvh_nodes, nodeAddr+0);
#if !defined(__KERNEL_SSE2__)
- traverse_mask = bvh_node_intersect(kg,
- P,
- dir,
- idir,
- isect_t,
- PATH_RAY_SHADOW,
- nodeAddr,
- dist);
+ traverse_mask = NODE_INTERSECT(kg,
+ P,
+ dir,
+ idir,
+ isect_t,
+ PATH_RAY_SHADOW,
+ nodeAddr,
+ dist);
#else // __KERNEL_SSE2__
- traverse_mask = bvh_node_intersect(kg,
- P,
- dir,
- tnear,
- tfar,
- tsplat,
- Psplat,
- idirsplat,
- shufflexyz,
- PATH_RAY_SHADOW,
- nodeAddr,
- dist);
+ traverse_mask = NODE_INTERSECT(kg,
+ P,
+ dir,
+# if BVH_FEATURE(BVH_HAIR)
+ tnear,
+ tfar,
+# endif
+ tsplat,
+ Psplat,
+ idirsplat,
+ shufflexyz,
+ PATH_RAY_SHADOW,
+ nodeAddr,
+ dist);
#endif // __KERNEL_SSE2__
nodeAddr = __float_as_int(cnodes.z);
@@ -193,7 +201,7 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
#if BVH_FEATURE(BVH_HAIR)
case PRIMITIVE_CURVE:
case PRIMITIVE_MOTION_CURVE: {
- if(kernel_data.curve.curveflags & CURVE_KN_INTERPOLATE)
+ if(kernel_data.curve.curveflags & CURVE_KN_INTERPOLATE)
hit = bvh_cardinal_curve_intersect(kg, isect_array, P, dir, PATH_RAY_SHADOW, object, primAddr, ray->time, type, NULL, 0, 0);
else
hit = bvh_curve_intersect(kg, isect_array, P, dir, PATH_RAY_SHADOW, object, primAddr, ray->time, type, NULL, 0, 0);
@@ -367,3 +375,4 @@ ccl_device_inline bool BVH_FUNCTION_NAME(KernelGlobals *kg,
#undef BVH_FUNCTION_NAME
#undef BVH_FUNCTION_FEATURES
+#undef NODE_INTERSECT
diff --git a/intern/cycles/kernel/geom/geom_bvh_subsurface.h b/intern/cycles/kernel/geom/geom_bvh_subsurface.h
index f42cca0..2a9fc11 100644
--- a/intern/cycles/kernel/geom/geom_bvh_subsurface.h
+++ b/intern/cycles/kernel/geom/geom_bvh_subsurface.h
@@ -21,6 +21,12 @@
# include "geom_qbvh_subsurface.h"
#endif
+#if BVH_FEATURE(BVH_HAIR)
+# define NODE_INTERSECT bvh_node_intersect
+#else
+# define NODE_INTERSECT bvh_aligned_node_intersect
+#endif
+
/* This is a template BVH traversal function for subsurface scattering, where
* various features can be enabled/disabled. This way we can compile optimized
* versions for each case without new features slowing things down.
@@ -109,27 +115,29 @@ ccl_device void BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
float4 cnodes = kernel_tex_fetch(__bvh_nodes, nodeAddr+0);
#if !defined(__KERNEL_SSE2__)
- traverse_mask = bvh_node_intersect(kg,
- P,
- dir,
- idir,
- isect_t,
- PATH_RAY_ALL_VISIBILITY,
- nodeAddr,
- dist);
+ traverse_mask = NODE_INTERSECT(kg,
+ P,
+ dir,
+ idir,
+ isect_t,
+ PATH_RAY_ALL_VISIBILITY,
+ nodeAddr,
+ dist);
#else // __KERNEL_SSE2__
- traverse_mask = bvh_node_intersect(kg,
- P,
- dir,
- tnear,
- tfar,
- tsplat,
- Psplat,
- idirsplat,
- shufflexyz,
- PATH_RAY_ALL_VISIBILITY,
- nodeAddr,
- dist);
+ traverse_mask = NODE_INTERSECT(kg,
+ P,
+ dir,
+# if BVH_FEATURE(BVH_HAIR)
+ tnear,
+ tfar,
+# endif
+ tsplat,
+ Psplat,
+ idirsplat,
+ shufflexyz,
+ PATH_RAY_ALL_VISIBILITY,
+ nodeAddr,
+ dist);
#endif // __KERNEL_SSE2__
nodeAddr = __float_as_int(cnodes.z);
@@ -251,3 +259,4 @@ ccl_device_inline void BVH_FUNCTION_NAME(KernelGlobals *kg,
#undef BVH_FUNCTION_NAME
#undef BVH_FUNCTION_FEATURES
+#undef NODE_INTERSECT
diff --git a/intern/cycles/kernel/geom/geom_bvh_traversal.h b/intern/cycles/kernel/geom/geom_bvh_traversal.h
index c6c41b5..b1e21fa 100644
--- a/intern/cycles/kernel/geom/geom_bvh_traversal.h
+++ b/intern/cycles/kernel/geom/geom_bvh_traversal.h
@@ -21,6 +21,14 @@
# include "geom_qbvh_traversal.h"
#endif
+#if BVH_FEATURE(BVH_HAIR)
+# define NODE_INTERSECT bvh_node_intersect
+# define NODE_INTERSECT_ROBUST bvh_node_intersect_robust
+#else
+# define NODE_INTERSECT bvh_aligned_node_intersect
+# define NODE_INTERSECT_ROBUST bvh_aligned_node_intersect_robust
+#endif
+
/* This is a template BVH traversal function, where various features can be
* enabled/disabled. This way we can compile optimized versions for each case
* without new features slowing things down.
@@ -109,60 +117,64 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
#if !defined(__KERNEL_SSE2__)
# if BVH_FEATURE(BVH_HAIR_MINIMUM_WIDTH)
if(difl != 0.0f) {
- traverse_mask = bvh_node_intersect_robust(kg,
- P,
- dir,
- idir,
- isect->t,
- difl,
- visibility,
- nodeAddr,
- dist);
+ traverse_mask = NODE_INTERSECT_ROBUST(kg,
+ P,
+ dir,
+ idir,
+ isect->t,
+ difl,
+ visibility,
+ nodeAddr,
+ dist);
}
else
# endif
{
- traverse_mask = bvh_node_intersect(kg,
- P,
- dir,
- idir,
- isect->t,
- visibility,
- nodeAddr,
- dist);
+ traverse_mask = NODE_INTERSECT(kg,
+ P,
+ dir,
+ idir,
+ isect->t,
+ visibility,
+ nodeAddr,
+ dist);
}
#else // __KERNEL_SSE2__
# if BVH_FEATURE(BVH_HAIR_MINIMUM_WIDTH)
if(difl != 0.0f) {
- traverse_mask = bvh_node_intersect_robust(kg,
- P,
- dir,
-
@@ Diff output truncated at 10240 characters. @@
More information about the Bf-blender-cvs
mailing list