[Bf-blender-cvs] [21a31fd] cycles_bvh: Cycles: Optimization for non-hair regular BVH traversal

Sergey Sharybin noreply at git.blender.org
Fri Jun 17 16:21:58 CEST 2016


Commit: 21a31fd1d2791bedfec2f81c0bcee131a90642af
Author: Sergey Sharybin
Date:   Fri Jun 17 14:45:45 2016 +0200
Branches: cycles_bvh
https://developer.blender.org/rB21a31fd1d2791bedfec2f81c0bcee131a90642af

Cycles: Optimization for non-hair regular BVH traversal

===================================================================

M	intern/cycles/kernel/geom/geom_bvh_shadow.h
M	intern/cycles/kernel/geom/geom_bvh_subsurface.h
M	intern/cycles/kernel/geom/geom_bvh_traversal.h
M	intern/cycles/kernel/geom/geom_bvh_volume.h
M	intern/cycles/kernel/geom/geom_bvh_volume_all.h

===================================================================

diff --git a/intern/cycles/kernel/geom/geom_bvh_shadow.h b/intern/cycles/kernel/geom/geom_bvh_shadow.h
index cbc92c0..3b374ce 100644
--- a/intern/cycles/kernel/geom/geom_bvh_shadow.h
+++ b/intern/cycles/kernel/geom/geom_bvh_shadow.h
@@ -21,6 +21,12 @@
 #  include "geom_qbvh_shadow.h"
 #endif
 
+#if BVH_FEATURE(BVH_HAIR)
+#  define NODE_INTERSECT bvh_node_intersect
+#else
+#  define NODE_INTERSECT bvh_aligned_node_intersect
+#endif
+
 /* This is a template BVH traversal function, where various features can be
  * enabled/disabled. This way we can compile optimized versions for each case
  * without new features slowing things down.
@@ -41,7 +47,7 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
 	 * - likely and unlikely for if() statements
 	 * - test restrict attribute for pointers
 	 */
-	
+
 	/* traversal stack in CUDA thread-local memory */
 	int traversalStack[BVH_STACK_SIZE];
 	traversalStack[0] = ENTRYPOINT_SENTINEL;
@@ -72,7 +78,7 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
 #if defined(__KERNEL_SSE2__)
 	const shuffle_swap_t shuf_identity = shuffle_swap_identity();
 	const shuffle_swap_t shuf_swap = shuffle_swap_swap();
-	
+
 	const ssef pn = cast(ssei(0, 0, 0x80000000, 0x80000000));
 	ssef Psplat[3], idirsplat[3];
 	ssef tnear(0.0f), tfar(isect_t);
@@ -100,27 +106,29 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
 				float4 cnodes = kernel_tex_fetch(__bvh_nodes, nodeAddr+0);
 
 #if !defined(__KERNEL_SSE2__)
-				traverse_mask = bvh_node_intersect(kg,
-				                                   P,
-				                                   dir,
-				                                   idir,
-				                                   isect_t,
-				                                   PATH_RAY_SHADOW,
-				                                   nodeAddr,
-				                                   dist);
+				traverse_mask = NODE_INTERSECT(kg,
+				                               P,
+				                               dir,
+				                               idir,
+				                               isect_t,
+				                               PATH_RAY_SHADOW,
+				                               nodeAddr,
+				                               dist);
 #else // __KERNEL_SSE2__
-				traverse_mask = bvh_node_intersect(kg,
-				                                   P,
-				                                   dir,
-				                                   tnear,
-				                                   tfar,
-				                                   tsplat,
-				                                   Psplat,
-				                                   idirsplat,
-				                                   shufflexyz,
-				                                   PATH_RAY_SHADOW,
-				                                   nodeAddr,
-				                                   dist);
+				traverse_mask = NODE_INTERSECT(kg,
+				                               P,
+				                               dir,
+#  if BVH_FEATURE(BVH_HAIR)
+				                               tnear,
+				                               tfar,
+#  endif
+				                               tsplat,
+				                               Psplat,
+				                               idirsplat,
+				                               shufflexyz,
+				                               PATH_RAY_SHADOW,
+				                               nodeAddr,
+				                               dist);
 #endif // __KERNEL_SSE2__
 
 				nodeAddr = __float_as_int(cnodes.z);
@@ -193,7 +201,7 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
 #if BVH_FEATURE(BVH_HAIR)
 							case PRIMITIVE_CURVE:
 							case PRIMITIVE_MOTION_CURVE: {
-								if(kernel_data.curve.curveflags & CURVE_KN_INTERPOLATE) 
+								if(kernel_data.curve.curveflags & CURVE_KN_INTERPOLATE)
 									hit = bvh_cardinal_curve_intersect(kg, isect_array, P, dir, PATH_RAY_SHADOW, object, primAddr, ray->time, type, NULL, 0, 0);
 								else
 									hit = bvh_curve_intersect(kg, isect_array, P, dir, PATH_RAY_SHADOW, object, primAddr, ray->time, type, NULL, 0, 0);
@@ -367,3 +375,4 @@ ccl_device_inline bool BVH_FUNCTION_NAME(KernelGlobals *kg,
 
 #undef BVH_FUNCTION_NAME
 #undef BVH_FUNCTION_FEATURES
+#undef NODE_INTERSECT
diff --git a/intern/cycles/kernel/geom/geom_bvh_subsurface.h b/intern/cycles/kernel/geom/geom_bvh_subsurface.h
index f42cca0..2a9fc11 100644
--- a/intern/cycles/kernel/geom/geom_bvh_subsurface.h
+++ b/intern/cycles/kernel/geom/geom_bvh_subsurface.h
@@ -21,6 +21,12 @@
 #  include "geom_qbvh_subsurface.h"
 #endif
 
+#if BVH_FEATURE(BVH_HAIR)
+#  define NODE_INTERSECT bvh_node_intersect
+#else
+#  define NODE_INTERSECT bvh_aligned_node_intersect
+#endif
+
 /* This is a template BVH traversal function for subsurface scattering, where
  * various features can be enabled/disabled. This way we can compile optimized
  * versions for each case without new features slowing things down.
@@ -109,27 +115,29 @@ ccl_device void BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
 				float4 cnodes = kernel_tex_fetch(__bvh_nodes, nodeAddr+0);
 
 #if !defined(__KERNEL_SSE2__)
-				traverse_mask = bvh_node_intersect(kg,
-				                                   P,
-				                                   dir,
-				                                   idir,
-				                                   isect_t,
-				                                   PATH_RAY_ALL_VISIBILITY,
-				                                   nodeAddr,
-				                                   dist);
+				traverse_mask = NODE_INTERSECT(kg,
+				                               P,
+				                               dir,
+				                               idir,
+				                               isect_t,
+				                               PATH_RAY_ALL_VISIBILITY,
+				                               nodeAddr,
+				                               dist);
 #else // __KERNEL_SSE2__
-				traverse_mask = bvh_node_intersect(kg,
-				                                   P,
-				                                   dir,
-				                                   tnear,
-				                                   tfar,
-				                                   tsplat,
-				                                   Psplat,
-				                                   idirsplat,
-				                                   shufflexyz,
-				                                   PATH_RAY_ALL_VISIBILITY,
-				                                   nodeAddr,
-				                                   dist);
+				traverse_mask = NODE_INTERSECT(kg,
+				                               P,
+				                               dir,
+#  if BVH_FEATURE(BVH_HAIR)
+				                               tnear,
+				                               tfar,
+#  endif
+				                               tsplat,
+				                               Psplat,
+				                               idirsplat,
+				                               shufflexyz,
+				                               PATH_RAY_ALL_VISIBILITY,
+				                               nodeAddr,
+				                               dist);
 #endif // __KERNEL_SSE2__
 
 				nodeAddr = __float_as_int(cnodes.z);
@@ -251,3 +259,4 @@ ccl_device_inline void BVH_FUNCTION_NAME(KernelGlobals *kg,
 
 #undef BVH_FUNCTION_NAME
 #undef BVH_FUNCTION_FEATURES
+#undef NODE_INTERSECT
diff --git a/intern/cycles/kernel/geom/geom_bvh_traversal.h b/intern/cycles/kernel/geom/geom_bvh_traversal.h
index c6c41b5..b1e21fa 100644
--- a/intern/cycles/kernel/geom/geom_bvh_traversal.h
+++ b/intern/cycles/kernel/geom/geom_bvh_traversal.h
@@ -21,6 +21,14 @@
 #  include "geom_qbvh_traversal.h"
 #endif
 
+#if BVH_FEATURE(BVH_HAIR)
+#  define NODE_INTERSECT bvh_node_intersect
+#  define NODE_INTERSECT_ROBUST bvh_node_intersect_robust
+#else
+#  define NODE_INTERSECT bvh_aligned_node_intersect
+#  define NODE_INTERSECT_ROBUST bvh_aligned_node_intersect_robust
+#endif
+
 /* This is a template BVH traversal function, where various features can be
  * enabled/disabled. This way we can compile optimized versions for each case
  * without new features slowing things down.
@@ -109,60 +117,64 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
 #if !defined(__KERNEL_SSE2__)
 #  if BVH_FEATURE(BVH_HAIR_MINIMUM_WIDTH)
 				if(difl != 0.0f) {
-					traverse_mask = bvh_node_intersect_robust(kg,
-					                                          P,
-					                                          dir,
-					                                          idir,
-					                                          isect->t,
-					                                          difl,
-					                                          visibility,
-					                                          nodeAddr,
-					                                          dist);
+					traverse_mask = NODE_INTERSECT_ROBUST(kg,
+					                                      P,
+					                                      dir,
+					                                      idir,
+					                                      isect->t,
+					                                      difl,
+					                                      visibility,
+					                                      nodeAddr,
+					                                      dist);
 				}
 				else
 #  endif
 				{
-					traverse_mask = bvh_node_intersect(kg,
-					                                   P,
-					                                   dir,
-					                                   idir,
-					                                   isect->t,
-					                                   visibility,
-					                                   nodeAddr,
-					                                   dist);
+					traverse_mask = NODE_INTERSECT(kg,
+					                               P,
+					                               dir,
+					                               idir,
+					                               isect->t,
+					                               visibility,
+					                               nodeAddr,
+					                               dist);
 				}
 #else // __KERNEL_SSE2__
 #  if BVH_FEATURE(BVH_HAIR_MINIMUM_WIDTH)
 				if(difl != 0.0f) {
-					traverse_mask = bvh_node_intersect_robust(kg,
-					                                          P,
-					                                          dir,
-					                                   

@@ Diff output truncated at 10240 characters. @@




More information about the Bf-blender-cvs mailing list