[Bf-blender-cvs] [c52b147] cycles_bvh: Cycles: Adopt all QBVH traversal to unaligned nodes

Sergey Sharybin noreply at git.blender.org
Wed Jun 15 11:39:15 CEST 2016


Commit: c52b147e8a8b69389f027f6fc70ee85842223312
Author: Sergey Sharybin
Date:   Wed Jun 15 09:57:36 2016 +0200
Branches: cycles_bvh
https://developer.blender.org/rBc52b147e8a8b69389f027f6fc70ee85842223312

Cycles: Adopt all QBVH traversal to unaligned nodes

===================================================================

M	intern/cycles/kernel/geom/geom_qbvh.h
M	intern/cycles/kernel/geom/geom_qbvh_curve.h
M	intern/cycles/kernel/geom/geom_qbvh_shadow.h
M	intern/cycles/kernel/geom/geom_qbvh_subsurface.h
M	intern/cycles/kernel/geom/geom_qbvh_traversal.h
M	intern/cycles/kernel/geom/geom_qbvh_volume.h
M	intern/cycles/kernel/geom/geom_qbvh_volume_all.h

===================================================================

diff --git a/intern/cycles/kernel/geom/geom_qbvh.h b/intern/cycles/kernel/geom/geom_qbvh.h
index edffbb6..30ed851 100644
--- a/intern/cycles/kernel/geom/geom_qbvh.h
+++ b/intern/cycles/kernel/geom/geom_qbvh.h
@@ -56,9 +56,9 @@ ccl_device_inline int qbvh_node_intersect(KernelGlobals *__restrict kg,
                                           const ssef& tfar,
 #ifdef __KERNEL_AVX2__
                                           const sse3f& org_idir,
-#endif
+#else
                                           const sse3f& org,
-                                          const sse3f& dir,
+#endif
                                           const sse3f& idir,
                                           const int near_x,
                                           const int near_y,
@@ -70,9 +70,7 @@ ccl_device_inline int qbvh_node_intersect(KernelGlobals *__restrict kg,
                                           ssef *__restrict dist)
 {
 	const int offset = nodeAddr + 1;
-	(void)dir;
 #ifdef __KERNEL_AVX2__
-	(void)org;
 	const ssef tnear_x = msub(kernel_tex_fetch_ssef(__bvh_nodes, offset+near_x), idir.x, org_idir.x);
 	const ssef tnear_y = msub(kernel_tex_fetch_ssef(__bvh_nodes, offset+near_y), idir.y, org_idir.y);
 	const ssef tnear_z = msub(kernel_tex_fetch_ssef(__bvh_nodes, offset+near_z), idir.z, org_idir.z);
@@ -108,9 +106,9 @@ ccl_device_inline int qbvh_node_intersect_robust(KernelGlobals *__restrict kg,
                                                  const ssef& tfar,
 #ifdef __KERNEL_AVX2__
                                                  const sse3f& P_idir,
-#endif
+#else
                                                  const sse3f& P,
-                                                 const sse3f& dir,
+#endif
                                                  const sse3f& idir,
                                                  const int near_x,
                                                  const int near_y,
@@ -123,9 +121,7 @@ ccl_device_inline int qbvh_node_intersect_robust(KernelGlobals *__restrict kg,
                                                  ssef *__restrict dist)
 {
 	const int offset = nodeAddr + 1;
-	(void)dir;
 #ifdef __KERNEL_AVX2__
-	(void)P;
 	const ssef tnear_x = msub(kernel_tex_fetch_ssef(__bvh_nodes, offset+near_x), idir.x, P_idir.x);
 	const ssef tnear_y = msub(kernel_tex_fetch_ssef(__bvh_nodes, offset+near_y), idir.y, P_idir.y);
 	const ssef tnear_z = msub(kernel_tex_fetch_ssef(__bvh_nodes, offset+near_z), idir.z, P_idir.z);
diff --git a/intern/cycles/kernel/geom/geom_qbvh_curve.h b/intern/cycles/kernel/geom/geom_qbvh_curve.h
index 22551d4..757759f 100644
--- a/intern/cycles/kernel/geom/geom_qbvh_curve.h
+++ b/intern/cycles/kernel/geom/geom_qbvh_curve.h
@@ -106,9 +106,9 @@ ccl_device_inline int qbvh_curve_node_intersect(
 		        tfar,
 #ifdef __KERNEL_AVX2__
 		        org_idir,
-#endif
+#else
 		        org,
-		        dir,
+#endif
 		        idir,
 		        near_x, near_y, near_z,
 		        far_x, far_y, far_z,
@@ -208,9 +208,9 @@ ccl_device_inline int qbvh_curve_node_intersect_robust(
 		        tfar,
 #ifdef __KERNEL_AVX2__
 		        P_idir,
-#endif
+#else
 		        P,
-		        dir,
+#endif
 		        idir,
 		        near_x, near_y, near_z,
 		        far_x, far_y, far_z,
diff --git a/intern/cycles/kernel/geom/geom_qbvh_shadow.h b/intern/cycles/kernel/geom/geom_qbvh_shadow.h
index a12f3c4..6a6e191 100644
--- a/intern/cycles/kernel/geom/geom_qbvh_shadow.h
+++ b/intern/cycles/kernel/geom/geom_qbvh_shadow.h
@@ -27,6 +27,12 @@
  *
  */
 
+#if BVH_FEATURE(BVH_HAIR)
+#  define NODE_INTERSECT qbvh_curve_node_intersect
+#else
+#  define NODE_INTERSECT qbvh_node_intersect
+#endif
+
 ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
                                              const Ray *ray,
                                              Intersection *isect_array,
@@ -72,15 +78,18 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
 #endif
 
 	ssef tnear(0.0f), tfar(tmax);
+#if BVH_FEATURE(BVH_HAIR)
 	sse3f dir4(ssef(dir.x), ssef(dir.y), ssef(dir.z));
+#endif
 	sse3f idir4(ssef(idir.x), ssef(idir.y), ssef(idir.z));
 
 #ifdef __KERNEL_AVX2__
 	float3 P_idir = P*idir;
-	sse3f P_idir4 = sse3f(P_idir.x, P_idir.y, P_idir.z);
+	sse3f P_idir4(P_idir.x, P_idir.y, P_idir.z);
+#endif
+#if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
+	sse3f org4(ssef(P.x), ssef(P.y), ssef(P.z));
 #endif
-
-	sse3f org = sse3f(ssef(P.x), ssef(P.y), ssef(P.z));
 
 	/* Offsets to select the side that becomes the lower or upper bound. */
 	int near_x, near_y, near_z;
@@ -111,25 +120,35 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
 #endif
 
 				ssef dist;
-				int traverseChild = false;
-				if(false) {
-				/*int */traverseChild = qbvh_node_intersect(kg,
-				                                        tnear,
-				                                        tfar,
+				int traverseChild = NODE_INTERSECT(kg,
+				                                   tnear,
+				                                   tfar,
 #ifdef __KERNEL_AVX2__
-				                                        P_idir4,
+				                                   P_idir4,
 #endif
-				                                        org,
-				                                        dir4,
-				                                        idir4,
-				                                        near_x, near_y, near_z,
-				                                        far_x, far_y, far_z,
-				                                        nodeAddr,
-				                                        &dist);
-				}
+#  if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
+				                                   org4,
+#  endif
+#  if BVH_FEATURE(BVH_HAIR)
+				                                   dir4,
+#  endif
+				                                   idir4,
+				                                   near_x, near_y, near_z,
+				                                   far_x, far_y, far_z,
+				                                   nodeAddr,
+				                                   &dist);
 
 				if(traverseChild != 0) {
-					float4 cnodes = kernel_tex_fetch(__bvh_nodes, nodeAddr+7);
+					float4 cnodes;
+#if BVH_FEATURE(BVH_HAIR)
+					if(__float_as_uint(inodes.x) & PATH_RAY_NODE_UNALIGNED) {
+						cnodes = kernel_tex_fetch(__bvh_nodes, nodeAddr+13);
+					}
+					else
+#endif
+					{
+						cnodes = kernel_tex_fetch(__bvh_nodes, nodeAddr+7);
+					}
 
 					/* One child is hit, continue with that child. */
 					int r = __bscf(traverseChild);
@@ -345,12 +364,17 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
 					if(idir.y >= 0.0f) { near_y = 2; far_y = 3; } else { near_y = 3; far_y = 2; }
 					if(idir.z >= 0.0f) { near_z = 4; far_z = 5; } else { near_z = 5; far_z = 4; }
 					tfar = ssef(isect_t);
+#  if BVH_FEATURE(BVH_HAIR)
+					dir4 = sse3f(ssef(dir.x), ssef(dir.y), ssef(dir.z));
+#  endif
 					idir4 = sse3f(ssef(idir.x), ssef(idir.y), ssef(idir.z));
 #  ifdef __KERNEL_AVX2__
 					P_idir = P*idir;
 					P_idir4 = sse3f(P_idir.x, P_idir.y, P_idir.z);
 #  endif
-					org = sse3f(ssef(P.x), ssef(P.y), ssef(P.z));
+#  if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
+					org4 = sse3f(ssef(P.x), ssef(P.y), ssef(P.z));
+#  endif
 
 					triangle_intersect_precalc(dir, &isect_precalc);
 
@@ -399,12 +423,17 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
 			if(idir.y >= 0.0f) { near_y = 2; far_y = 3; } else { near_y = 3; far_y = 2; }
 			if(idir.z >= 0.0f) { near_z = 4; far_z = 5; } else { near_z = 5; far_z = 4; }
 			tfar = ssef(tmax);
+#  if BVH_FEATURE(BVH_HAIR)
+			dir4 = sse3f(ssef(dir.x), ssef(dir.y), ssef(dir.z));
+#  endif
 			idir4 = sse3f(ssef(idir.x), ssef(idir.y), ssef(idir.z));
 #  ifdef __KERNEL_AVX2__
 			P_idir = P*idir;
 			P_idir4 = sse3f(P_idir.x, P_idir.y, P_idir.z);
 #  endif
-			org = sse3f(ssef(P.x), ssef(P.y), ssef(P.z));
+#  if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
+			org4 = sse3f(ssef(P.x), ssef(P.y), ssef(P.z));
+#  endif
 
 			triangle_intersect_precalc(dir, &isect_precalc);
 
@@ -417,3 +446,5 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
 
 	return false;
 }
+
+#undef NODE_INTERSECT
diff --git a/intern/cycles/kernel/geom/geom_qbvh_subsurface.h b/intern/cycles/kernel/geom/geom_qbvh_subsurface.h
index abb0487..235b677 100644
--- a/intern/cycles/kernel/geom/geom_qbvh_subsurface.h
+++ b/intern/cycles/kernel/geom/geom_qbvh_subsurface.h
@@ -25,6 +25,12 @@
  *
  */
 
+#if BVH_FEATURE(BVH_HAIR)
+#  define NODE_INTERSECT qbvh_curve_node_intersect
+#else
+#  define NODE_INTERSECT qbvh_node_intersect
+#endif
+
 ccl_device void BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
                                              const Ray *ray,
                                              SubsurfaceIntersection *ss_isect,
@@ -82,14 +88,18 @@ ccl_device void BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
 #endif
 
 	ssef tnear(0.0f), tfar(isect_t);
+#if BVH_FEATURE(BVH_HAIR)
 	sse3f dir4(ssef(dir.x), ssef(dir.y), ssef(dir.z));
+#endif
 	sse3f idir4(ssef(idir.x), ssef(idir.y), ssef(idir.z));
 
 #ifdef __KERNEL_AVX2__
 	float3 P_idir = P*idir;
-	sse3f P_idir4 = sse3f(P_idir.x, P_idir.y, P_idir.z);
+	sse3f P_idir4(P_idir.x, P_idir.y, P_idir.z);
+#endif
+#if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
+	sse3f org4(ssef(P.x), ssef(P.y), ssef(P.z));
 #endif
-	sse3f org = sse3f(ssef(P.x), ssef(P.y), ssef(P.z));
 
 	/* Offsets to select the side that becomes the lower or upper bound. */
 	int near_x, near_y, near_z;
@@ -109,25 +119,35 @@ ccl_device void BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
 			while(nodeAddr >= 0 && nodeAddr != ENTRYPOINT_SENTINEL) {
 				ssef dist;
 
-				int traverseChild = 0;
-				if (false) {
-				/*int*/ traverseChild = qbvh_node_intersect(kg,
-				                                        tnear,
-				                                        tfar,
+				int traverseChild = NODE_INTERSECT(kg,
+				                                   tnear,
+				                                   tfar,
 #ifdef __KERNEL_AVX2__
-				                                        P_idir4,
+				                                   P_idir4,
 #endif
-				                                        org,
-				        

@@ Diff output truncated at 10240 characters. @@




More information about the Bf-blender-cvs mailing list