[Bf-blender-cvs] [8aa8d98] cycles_hair_bvh: Cycles: Move all unaligned boundbox to the parent node

Sergey Sharybin noreply at git.blender.org
Thu Apr 28 15:10:34 CEST 2016


Commit: 8aa8d987389ae23ac120172cab13e2775cb975e4
Author: Sergey Sharybin
Date:   Thu Apr 28 15:08:44 2016 +0200
Branches: cycles_hair_bvh
https://developer.blender.org/rB8aa8d987389ae23ac120172cab13e2775cb975e4

Cycles: Move all unaligned boundbox to the parent node

Now parent node contains unaligned bounding box of it's children,
which is more friendly for applying SIMD optimization.

This currently only solves inner nodes being intersected twice,
actual vectorization is coming next.

===================================================================

M	intern/cycles/bvh/bvh.cpp
M	intern/cycles/bvh/bvh.h
M	intern/cycles/kernel/geom/geom.h
M	intern/cycles/kernel/geom/geom_bvh_hair.h
M	intern/cycles/kernel/geom/geom_bvh_traversal_hair.h

===================================================================

diff --git a/intern/cycles/bvh/bvh.cpp b/intern/cycles/bvh/bvh.cpp
index b7ba422..2f8ceaa 100644
--- a/intern/cycles/bvh/bvh.cpp
+++ b/intern/cycles/bvh/bvh.cpp
@@ -367,7 +367,7 @@ void BVH::pack_instances(size_t nodes_size, size_t leaf_nodes_size)
 			 */
 			size_t nsize_bbox = (use_qbvh)? 6: 3;
 			if(params.use_unaligned_nodes) {
-				nsize_bbox = 4;
+				nsize_bbox = 8;
 			}
 			int4 *bvh_nodes = &bvh->pack.nodes[0];
 			size_t bvh_nodes_size = bvh->pack.nodes.size(); 
@@ -465,18 +465,7 @@ void RegularBVH::pack_node(int idx,
 void RegularBVH::pack_unaligned_leaf(const BVHStackEntry& e,
                                      const LeafNode *leaf)
 {
-	const Transform& aligned_space = e.node->m_aligned_space;
-	const BoundBox& bounds = e.node->m_bounds;
-	Transform space = BVHUnaligned::compute_node_transform(bounds,
-	                                                       aligned_space);
-	float4 data[BVH_UNALIGNED_NODE_LEAF_SIZE] =
-	{
-		make_float4(0.0f, 0.0f, 0.0f, 0.0f),
-		space.x,
-		space.y,
-		space.z,
-		space.w,
-	};
+	float4 data[BVH_UNALIGNED_NODE_LEAF_SIZE];
 
 	if(leaf->num_triangles() == 1 && pack.prim_index[leaf->m_lo] == -1) {
 		/* Object. */
@@ -503,32 +492,37 @@ void RegularBVH::pack_unaligned_inner(const BVHStackEntry& e,
                                       const BVHStackEntry& e1)
 {
 	pack_unaligned_node(e.idx,
-	                    e.node->m_aligned_space,
-	                    e.node->m_bounds,
+	                    e0.node->m_aligned_space,
+	                    e1.node->m_aligned_space,
+	                    e0.node->m_bounds,
+	                    e1.node->m_bounds,
 	                    e0.encodeIdx(), e1.encodeIdx(),
 	                    e0.node->m_visibility, e1.node->m_visibility);
 }
 
 void RegularBVH::pack_unaligned_node(int idx,
-                                     const Transform& aligned_space,
-                                     const BoundBox& bounds,
+                                     const Transform& aligned_space0,
+                                     const Transform& aligned_space1,
+                                     const BoundBox& bounds0,
+                                     const BoundBox& bounds1,
                                      int c0, int c1,
                                      uint visibility0, uint visibility1)
 {
-	Transform space = BVHUnaligned::compute_node_transform(bounds,
-	                                                       aligned_space);
-	int4 data[BVH_UNALIGNED_NODE_SIZE] =
+	Transform space0 = BVHUnaligned::compute_node_transform(bounds0,
+	                                                        aligned_space0);
+	Transform space1 = BVHUnaligned::compute_node_transform(bounds1,
+	                                                        aligned_space1);
+	float4 data[BVH_UNALIGNED_NODE_SIZE] =
 	{
-		make_int4(__float_as_int(space.x.x), __float_as_int(space.x.y), __float_as_int(space.x.z), __float_as_int(space.x.w)),
-		make_int4(__float_as_int(space.y.x), __float_as_int(space.y.y), __float_as_int(space.y.z), __float_as_int(space.y.w)),
-		make_int4(__float_as_int(space.z.x), __float_as_int(space.z.y), __float_as_int(space.z.z), __float_as_int(space.z.w)),
-		make_int4(__float_as_int(space.w.x), __float_as_int(space.w.y), __float_as_int(space.w.z), __float_as_int(space.w.w)),
-		make_int4(c0, c1, visibility0, visibility1)
+		space0.x, space0.y, space0.z, space0.w,
+		space1.x, space1.y, space1.z, space1.w,
+		make_float4(__int_as_float(c0), __int_as_float(c1),
+		            __int_as_float(visibility0), __int_as_float(visibility1))
 	};
 
 	memcpy(&pack.nodes[idx * BVH_UNALIGNED_NODE_SIZE],
 	       data,
-	       sizeof(int4)*BVH_UNALIGNED_NODE_SIZE);
+	       sizeof(float4)*BVH_UNALIGNED_NODE_SIZE);
 }
 
 void RegularBVH::pack_nodes(const BVHNode *root)
@@ -542,7 +536,7 @@ void RegularBVH::pack_nodes(const BVHNode *root)
 
 	/* for top level BVH, first merge existing BVH's so we know the offsets */
 	const int nsize = params.use_unaligned_nodes? BVH_UNALIGNED_NODE_SIZE: BVH_NODE_SIZE;
-	const int nsize_leaf = params.use_unaligned_nodes? BVH_UNALIGNED_NODE_SIZE: BVH_NODE_LEAF_SIZE;
+	const int nsize_leaf = params.use_unaligned_nodes? BVH_UNALIGNED_NODE_LEAF_SIZE: BVH_NODE_LEAF_SIZE;
 	if(params.top_level) {
 		pack_instances(node_size*nsize, leaf_node_size*nsize_leaf);
 	}
diff --git a/intern/cycles/bvh/bvh.h b/intern/cycles/bvh/bvh.h
index b38ab6e..3e670bc 100644
--- a/intern/cycles/bvh/bvh.h
+++ b/intern/cycles/bvh/bvh.h
@@ -39,8 +39,8 @@ class Progress;
 #define BVH_QNODE_LEAF_SIZE	1
 #define BVH_ALIGN		4096
 #define TRI_NODE_SIZE	3
-#define BVH_UNALIGNED_NODE_SIZE 5
-#define BVH_UNALIGNED_NODE_LEAF_SIZE 5
+#define BVH_UNALIGNED_NODE_SIZE 9
+#define BVH_UNALIGNED_NODE_LEAF_SIZE 1
 
 /* Packed BVH
  *
@@ -135,8 +135,10 @@ protected:
 	                          const BVHStackEntry& e0,
 	                          const BVHStackEntry& e1);
 	void pack_unaligned_node(int idx,
-	                         const Transform& aligned_space,
-	                         const BoundBox& bounds,
+	                         const Transform& aligned_space0,
+	                         const Transform& aligned_space1,
+	                         const BoundBox& b0,
+	                         const BoundBox& b1,
 	                         int c0, int c1,
 	                         uint visibility0, uint visibility1);
 
diff --git a/intern/cycles/kernel/geom/geom.h b/intern/cycles/kernel/geom/geom.h
index 9e9acbf..49d26ba 100644
--- a/intern/cycles/kernel/geom/geom.h
+++ b/intern/cycles/kernel/geom/geom.h
@@ -26,8 +26,8 @@
 #define BVH_QNODE_SIZE 7
 #define BVH_QNODE_LEAF_SIZE 1
 #define TRI_NODE_SIZE 3
-#define BVH_UNALIGNED_NODE_SIZE 5
-#define BVH_UNALIGNED_NODE_LEAF_SIZE 5
+#define BVH_UNALIGNED_NODE_SIZE 9
+#define BVH_UNALIGNED_NODE_LEAF_SIZE 1
 
 /* silly workaround for float extended precision that happens when compiling
  * without sse support on x86, it results in different results for float ops
diff --git a/intern/cycles/kernel/geom/geom_bvh_hair.h b/intern/cycles/kernel/geom/geom_bvh_hair.h
index aed266a..79cf16d 100644
--- a/intern/cycles/kernel/geom/geom_bvh_hair.h
+++ b/intern/cycles/kernel/geom/geom_bvh_hair.h
@@ -15,35 +15,36 @@
  */
 
 ccl_device_inline Transform bvh_hair_fetch_aligned_space(KernelGlobals *kg,
-                                                         int nodeAddr)
+                                                         int nodeAddr,
+                                                         int child)
 {
 	Transform aligned_space;
-	if(nodeAddr >= 0) {
+	if(child == 0) {
 		aligned_space.x = kernel_tex_fetch(__bvh_curve_nodes, nodeAddr*BVH_UNALIGNED_NODE_SIZE+0);
 		aligned_space.y = kernel_tex_fetch(__bvh_curve_nodes, nodeAddr*BVH_UNALIGNED_NODE_SIZE+1);
 		aligned_space.z = kernel_tex_fetch(__bvh_curve_nodes, nodeAddr*BVH_UNALIGNED_NODE_SIZE+2);
 		aligned_space.w = kernel_tex_fetch(__bvh_curve_nodes, nodeAddr*BVH_UNALIGNED_NODE_SIZE+3);
 	}
 	else {
-		int leafAddr = -nodeAddr-1;
-		aligned_space.x = kernel_tex_fetch(__bvh_curve_leaf_nodes, leafAddr*BVH_UNALIGNED_NODE_LEAF_SIZE+1);
-		aligned_space.y = kernel_tex_fetch(__bvh_curve_leaf_nodes, leafAddr*BVH_UNALIGNED_NODE_LEAF_SIZE+2);
-		aligned_space.z = kernel_tex_fetch(__bvh_curve_leaf_nodes, leafAddr*BVH_UNALIGNED_NODE_LEAF_SIZE+3);
-		aligned_space.w = kernel_tex_fetch(__bvh_curve_leaf_nodes, leafAddr*BVH_UNALIGNED_NODE_LEAF_SIZE+4);
+		aligned_space.x = kernel_tex_fetch(__bvh_curve_nodes, nodeAddr*BVH_UNALIGNED_NODE_SIZE+4);
+		aligned_space.y = kernel_tex_fetch(__bvh_curve_nodes, nodeAddr*BVH_UNALIGNED_NODE_SIZE+5);
+		aligned_space.z = kernel_tex_fetch(__bvh_curve_nodes, nodeAddr*BVH_UNALIGNED_NODE_SIZE+6);
+		aligned_space.w = kernel_tex_fetch(__bvh_curve_nodes, nodeAddr*BVH_UNALIGNED_NODE_SIZE+7);
 	}
 	return aligned_space;
 }
 
-ccl_device_inline bool bvh_hair_intersect_single_node(KernelGlobals *kg,
-                                                      const float3 P,
-                                                      const float3 dir,
-                                                      const float t,
-                                                      const float difl,
-                                                      const float extmax,
-                                                      int nodeAddr,
-                                                      float *dist)
+ccl_device_inline bool bvh_hair_intersect_child(KernelGlobals *kg,
+                                                const float3 P,
+                                                const float3 dir,
+                                                const float t,
+                                                const float difl,
+                                                const float extmax,
+                                                int nodeAddr,
+                                                int child,
+                                                float *dist)
 {
-	Transform aligned_space  = bvh_hair_fetch_aligned_space(kg, nodeAddr);
+	Transform aligned_space  = bvh_hair_fetch_aligned_space(kg, nodeAddr, child);
 	float3 aligned_dir = transform_direction(&aligned_space, dir);
 	float3 aligned_P = transform_point(&aligned_space, P);
 	float3 nrdir = -1.0f * bvh_inverse_direction(aligned_dir);
@@ -85,20 +86,11 @@ int ccl_device bvh_hair_intersect_node(KernelGlobals *kg,
                                        float dist[2])
 {
 	int mask = 0;
-	if(bvh_hair_intersect_single_node(kg, P, dir, t, difl, extmax, nodeAddr, NULL)) {
-		float4 cnodes = kernel_tex_fetch(__bvh_curve_nodes, nodeAddr*BVH_UNALIGNED_NODE_SIZE+4);
-		int nodeAddrChild0 = __float_as_int(cnodes.x);
-		int nodeAddrChild1 = __float_as_int(cnodes.y);
-		if(bvh_hair_intersect_single_node(kg, P, dir, t, difl, extmax, nodeAddrChild0, &dist[0])) {
-			if(__float_as_uint(cnodes.z) & visibility) {
-				mask |= 1;
-			}
-		}
-		if(bvh_hair_intersect_single_node(kg, P, dir, t, difl, extmax, nodeAddrChild1, &dist[1])) {
-			if(__float_as_uint(cnodes.w) & visibility) {
-				mask |= 2;
-			}
-		}
+	if(bvh_hair_intersect_child(kg, P, dir, t, difl, extmax, nodeAddr, 0, &dist[0])) {
+		mask |= 1;
+	}
+	if(bvh_hair_interse

@@ Diff output truncated at 10240 characters. @@




More information about the Bf-blender-cvs mailing list