[Bf-blender-cvs] [8ca3cf8] cycles_bvh: Cycles: Hook up proper node intersection function to regular BVH camera rays traversal

Sergey Sharybin noreply at git.blender.org
Fri Jun 17 16:21:50 CEST 2016


Commit: 8ca3cf816627c5361492abb9f700ce547e3f1196
Author: Sergey Sharybin
Date:   Thu Jun 16 11:25:47 2016 +0200
Branches: cycles_bvh
https://developer.blender.org/rB8ca3cf816627c5361492abb9f700ce547e3f1196

Cycles: Hook up proper node intersection function to regular BVH camera rays traversal

===================================================================

M	intern/cycles/kernel/geom/geom_bvh_traversal.h

===================================================================

diff --git a/intern/cycles/kernel/geom/geom_bvh_traversal.h b/intern/cycles/kernel/geom/geom_bvh_traversal.h
index 65c4255..add8fce 100644
--- a/intern/cycles/kernel/geom/geom_bvh_traversal.h
+++ b/intern/cycles/kernel/geom/geom_bvh_traversal.h
@@ -49,7 +49,7 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
 	 * - likely and unlikely for if() statements
 	 * - test restrict attribute for pointers
 	 */
-	
+
 	/* traversal stack in CUDA thread-local memory */
 	int traversalStack[BVH_STACK_SIZE];
 	traversalStack[0] = ENTRYPOINT_SENTINEL;
@@ -79,9 +79,10 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
 #if defined(__KERNEL_SSE2__)
 	const shuffle_swap_t shuf_identity = shuffle_swap_identity();
 	const shuffle_swap_t shuf_swap = shuffle_swap_swap();
-	
+
 	const ssef pn = cast(ssei(0, 0, 0x80000000, 0x80000000));
 	ssef Psplat[3], idirsplat[3];
+	ssef tnear(0.0f), tfar(isect->t);
 	shuffle_swap_t shufflexyz[3];
 
 	Psplat[0] = ssef(P.x);
@@ -101,121 +102,59 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
 		do {
 			/* traverse internal nodes */
 			while(nodeAddr >= 0 && nodeAddr != ENTRYPOINT_SENTINEL) {
-				bool traverseChild0, traverseChild1;
-				int nodeAddrChild1;
-
-#if !defined(__KERNEL_SSE2__)
-				/* Intersect two child bounding boxes, non-SSE version */
-				float t = isect->t;
-
-				/* fetch node data */
+				int nodeAddrChild1, traverse_mask;
+				float dist[2];
 				float4 cnodes = kernel_tex_fetch(__bvh_nodes, nodeAddr+0);
-				float4 node0 = kernel_tex_fetch(__bvh_nodes, nodeAddr+1);
-				float4 node1 = kernel_tex_fetch(__bvh_nodes, nodeAddr+2);
-				float4 node2 = kernel_tex_fetch(__bvh_nodes, nodeAddr+3);
-
-				/* intersect ray against child nodes */
-				NO_EXTENDED_PRECISION float c0lox = (node0.x - P.x) * idir.x;
-				NO_EXTENDED_PRECISION float c0hix = (node0.z - P.x) * idir.x;
-				NO_EXTENDED_PRECISION float c0loy = (node1.x - P.y) * idir.y;
-				NO_EXTENDED_PRECISION float c0hiy = (node1.z - P.y) * idir.y;
-				NO_EXTENDED_PRECISION float c0loz = (node2.x - P.z) * idir.z;
-				NO_EXTENDED_PRECISION float c0hiz = (node2.z - P.z) * idir.z;
-				NO_EXTENDED_PRECISION float c0min = max4(min(c0lox, c0hix), min(c0loy, c0hiy), min(c0loz, c0hiz), 0.0f);
-				NO_EXTENDED_PRECISION float c0max = min4(max(c0lox, c0hix), max(c0loy, c0hiy), max(c0loz, c0hiz), t);
-
-				NO_EXTENDED_PRECISION float c1lox = (node0.y - P.x) * idir.x;
-				NO_EXTENDED_PRECISION float c1hix = (node0.w - P.x) * idir.x;
-				NO_EXTENDED_PRECISION float c1loy = (node1.y - P.y) * idir.y;
-				NO_EXTENDED_PRECISION float c1hiy = (node1.w - P.y) * idir.y;
-				NO_EXTENDED_PRECISION float c1loz = (node2.y - P.z) * idir.z;
-				NO_EXTENDED_PRECISION float c1hiz = (node2.w - P.z) * idir.z;
-				NO_EXTENDED_PRECISION float c1min = max4(min(c1lox, c1hix), min(c1loy, c1hiy), min(c1loz, c1hiz), 0.0f);
-				NO_EXTENDED_PRECISION float c1max = min4(max(c1lox, c1hix), max(c1loy, c1hiy), max(c1loz, c1hiz), t);
 
+#if !defined(__KERNEL_SSE2__)
 #  if BVH_FEATURE(BVH_HAIR_MINIMUM_WIDTH)
 				if(difl != 0.0f) {
-					float hdiff = 1.0f + difl;
-					float ldiff = 1.0f - difl;
-					if(__float_as_int(cnodes.x) & PATH_RAY_CURVE) {
-						c0min = max(ldiff * c0min, c0min - extmax);
-						c0max = min(hdiff * c0max, c0max + extmax);
-					}
-					if(__float_as_int(cnodes.y) & PATH_RAY_CURVE) {
-						c1min = max(ldiff * c1min, c1min - extmax);
-						c1max = min(hdiff * c1max, c1max + extmax);
-					}
 				}
+				else
 #  endif
-
-				/* decide which nodes to traverse next */
-#  ifdef __VISIBILITY_FLAG__
-				/* this visibility test gives a 5% performance hit, how to solve? */
-				traverseChild0 = (c0max >= c0min) && (__float_as_uint(cnodes.x) & visibility);
-				traverseChild1 = (c1max >= c1min) && (__float_as_uint(cnodes.y) & visibility);
-#  else
-				traverseChild0 = (c0max >= c0min);
-				traverseChild1 = (c1max >= c1min);
-#  endif
-
+				{
+				}
 #else // __KERNEL_SSE2__
-				/* Intersect two child bounding boxes, SSE3 version adapted from Embree */
-
-				/* fetch node data */
-				const ssef *bvh_nodes = (ssef*)kg->__bvh_nodes.data + nodeAddr;
-				const float4 cnodes = ((float4*)bvh_nodes)[0];
-
-				/* intersect ray against child nodes */
-				const ssef tminmaxx = (shuffle_swap(bvh_nodes[1], shufflexyz[0]) - Psplat[0]) * idirsplat[0];
-				const ssef tminmaxy = (shuffle_swap(bvh_nodes[2], shufflexyz[1]) - Psplat[1]) * idirsplat[1];
-				const ssef tminmaxz = (shuffle_swap(bvh_nodes[3], shufflexyz[2]) - Psplat[2]) * idirsplat[2];
-
-				/* calculate { c0min, c1min, -c0max, -c1max} */
-				ssef minmax = max(max(tminmaxx, tminmaxy), max(tminmaxz, tsplat));
-				const ssef tminmax = minmax ^ pn;
-
 #  if BVH_FEATURE(BVH_HAIR_MINIMUM_WIDTH)
 				if(difl != 0.0f) {
-					float4 *tminmaxview = (float4*)&tminmax;
-					float &c0min = tminmaxview->x, &c1min = tminmaxview->y;
-					float &c0max = tminmaxview->z, &c1max = tminmaxview->w;
-
-					float hdiff = 1.0f + difl;
-					float ldiff = 1.0f - difl;
-					if(__float_as_int(cnodes.x) & PATH_RAY_CURVE) {
-						c0min = max(ldiff * c0min, c0min - extmax);
-						c0max = min(hdiff * c0max, c0max + extmax);
-					}
-					if(__float_as_int(cnodes.y) & PATH_RAY_CURVE) {
-						c1min = max(ldiff * c1min, c1min - extmax);
-						c1max = min(hdiff * c1max, c1max + extmax);
-					}
+					traverse_mask = bvh_node_intersect_robust(kg,
+					                                          P,
+					                                          dir,
+					                                          tnear,
+					                                          tfar,
+					                                          tsplat,
+					                                          Psplat,
+					                                          idirsplat,
+					                                          shufflexyz,
+					                                          difl,
+					                                          visibility,
+					                                          nodeAddr,
+					                                          dist);
 				}
+				else
 #  endif
-
-				const sseb lrhit = tminmax <= shuffle<2, 3, 0, 1>(tminmax);
-
-				/* decide which nodes to traverse next */
-#  ifdef __VISIBILITY_FLAG__
-				/* this visibility test gives a 5% performance hit, how to solve? */
-				traverseChild0 = (movemask(lrhit) & 1) && (__float_as_uint(cnodes.x) & visibility);
-				traverseChild1 = (movemask(lrhit) & 2) && (__float_as_uint(cnodes.y) & visibility);
-#  else
-				traverseChild0 = (movemask(lrhit) & 1);
-				traverseChild1 = (movemask(lrhit) & 2);
-#  endif
+				{
+					traverse_mask = bvh_node_intersect(kg,
+					                                   P,
+					                                   dir,
+					                                   tnear,
+					                                   tfar,
+					                                   tsplat,
+					                                   Psplat,
+					                                   idirsplat,
+					                                   shufflexyz,
+					                                   visibility,
+					                                   nodeAddr,
+					                                   dist);
+				}
 #endif // __KERNEL_SSE2__
 
 				nodeAddr = __float_as_int(cnodes.z);
 				nodeAddrChild1 = __float_as_int(cnodes.w);
 
-				if(traverseChild0 && traverseChild1) {
-					/* both children were intersected, push the farther one */
-#if !defined(__KERNEL_SSE2__)
-					bool closestChild1 = (c1min < c0min);
-#else
-					bool closestChild1 = tminmax[1] < tminmax[0];
-#endif
+				if(traverse_mask == 3) {
+					/* Both children were intersected, push the farther one. */
+					bool closestChild1 = (dist[1] < dist[0]);
 
 					if(closestChild1) {
 						int tmp = nodeAddr;
@@ -228,12 +167,12 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
 					traversalStack[stackPtr] = nodeAddrChild1;
 				}
 				else {
-					/* one child was intersected */
-					if(traverseChild1) {
+					/* One child was intersected. */
+					if(traverse_mask == 2) {
 						nodeAddr = nodeAddrChild1;
 					}
-					else if(!traverseChild0) {
-						/* neither child was intersected */
+					else if(traverse_mask == 0) {
+						/* Neither child was intersected. */
 						nodeAddr = traversalStack[stackPtr];
 						--stackPtr;
 					}
@@ -268,6 +207,7 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
 									if(visibility == PATH_RAY_SHADOW_OPAQUE)
 										return true;
 									tsplat = ssef(0.0f, 0.0f, -isect->t, -isect->t);
+									tfar = ssef(isect->t);
 #else
 									if(visibility == PATH_RAY_SHADOW_OPAQUE)
 										return true;
@@ -287,6 +227,7 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
 									if(visibility == PATH_RAY_SHADOW_OPAQUE)
 										return true;
 									tsplat = ssef(0.0f, 0.0f, -isect->t, -isect->t);
+									tfar = ssef(isect->t);
 #  else
 									if(visibility == PATH_RAY_SHADOW_OPAQUE)
 										return true;
@@ -313,6 +254,7 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
 									if(visibility == PATH_RAY_SHADOW_OPAQUE)
 										return true;
 									tsplat = ssef(0.0f, 0.0f, -isect->t, -isect->t);
+									tfar = ssef(isect->t);
 #  else
 									if(visibility == PATH_RAY_SHADOW_OPAQUE)
 										return true;
@@ -342,6 +284,7 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
 					Psplat[2] = ssef(P.z);
 
 					tsplat = ssef(0.0f, 0.0f, -isect->t, -isect->t);
+					tfar = ssef(isect->t);
 
 					gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz);
 #  endif
@@ -376,6 +319,7 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
 			Psplat[2] = ssef(P.z);
 
 			tsplat = ssef(0.0f, 0.0f, -isect->t, -isect->t);
+			tfar = ssef(isect->t);
 
 			gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz);
 #  endif




More information about the Bf-blender-cvs mailing list