[Bf-blender-cvs] [770f74e] cycles_hair_bvh: Cycles: Initial implementation of QBVH traversal for unaligned nodes

Sergey Sharybin noreply at git.blender.org
Fri Apr 29 18:38:00 CEST 2016


Commit: 770f74e0c03dff4bafe084441b5019fce51596f9
Author: Sergey Sharybin
Date:   Fri Apr 29 13:59:18 2016 +0200
Branches: cycles_hair_bvh
https://developer.blender.org/rB770f74e0c03dff4bafe084441b5019fce51596f9

Cycles: Initial implementation of QBVH traversal for unaligned nodes

Implements both QBVH packing and traversal on SSE2+ processors.

With a test render scene render time goes from 93sec (in master) down
to 55sec. Kind of impressive, let's hope it's not because some bug and
that we can keep such a nice speedup.

Also finished some non-SIMD binary BVH code. On a test scene got about
20% of speedup comparing to 2.77a.

Well, let's verify everything, finish some remaining TODOs and make
the branch ready for master.

===================================================================

M	intern/cycles/bvh/bvh.cpp
M	intern/cycles/bvh/bvh.h
M	intern/cycles/kernel/CMakeLists.txt
M	intern/cycles/kernel/geom/geom.h
M	intern/cycles/kernel/geom/geom_bvh.h
A	intern/cycles/kernel/geom/geom_bvh_curve.h
D	intern/cycles/kernel/geom/geom_bvh_hair.h
A	intern/cycles/kernel/geom/geom_bvh_shadow_curve.h
D	intern/cycles/kernel/geom/geom_bvh_shadow_hair.h
A	intern/cycles/kernel/geom/geom_bvh_traversal_curve.h
D	intern/cycles/kernel/geom/geom_bvh_traversal_hair.h
M	intern/cycles/kernel/geom/geom_qbvh.h
A	intern/cycles/kernel/geom/geom_qbvh_curve.h
A	intern/cycles/kernel/geom/geom_qbvh_shadow_curve.h
D	intern/cycles/kernel/geom/geom_qbvh_shadow_hair.h
A	intern/cycles/kernel/geom/geom_qbvh_traversal_curve.h
D	intern/cycles/kernel/geom/geom_qbvh_traversal_hair.h
M	intern/cycles/render/mesh.cpp

===================================================================

diff --git a/intern/cycles/bvh/bvh.cpp b/intern/cycles/bvh/bvh.cpp
index 0bf746c..fdb4d23 100644
--- a/intern/cycles/bvh/bvh.cpp
+++ b/intern/cycles/bvh/bvh.cpp
@@ -193,11 +193,14 @@ void BVH::pack_instances(size_t nodes_size, size_t leaf_nodes_size)
 	 * top level BVH, adjusting indexes and offsets where appropriate.
 	 */
 	const bool use_qbvh = params.use_qbvh;
-	size_t nsize = (use_qbvh)? BVH_QNODE_SIZE: BVH_NODE_SIZE;
-	size_t nsize_leaf = (use_qbvh)? BVH_QNODE_LEAF_SIZE: BVH_NODE_LEAF_SIZE;
+	size_t nsize, nsize_leaf;
 	if(params.use_unaligned_nodes) {
-		nsize = BVH_UNALIGNED_NODE_SIZE;
-		nsize_leaf = BVH_UNALIGNED_NODE_LEAF_SIZE;
+		nsize = (use_qbvh)? BVH_UNALIGNED_QNODE_SIZE: BVH_UNALIGNED_NODE_SIZE;
+		nsize_leaf = (use_qbvh)? BVH_UNALIGNED_QNODE_LEAF_SIZE: BVH_UNALIGNED_NODE_LEAF_SIZE;
+	}
+	else {
+		nsize = (use_qbvh)? BVH_QNODE_SIZE: BVH_NODE_SIZE;
+		nsize_leaf = (use_qbvh)? BVH_QNODE_LEAF_SIZE: BVH_NODE_LEAF_SIZE;
 	}
 
 	/* Adjust primitive index to point to the triangle in the global array, for
@@ -365,9 +368,12 @@ void BVH::pack_instances(size_t nodes_size, size_t leaf_nodes_size)
 			/* For QBVH we're packing a child bbox into 6 float4,
 			 * and for regular BVH they're packed into 3 float4.
 			 */
-			size_t nsize_bbox = (use_qbvh)? 6: 3;
+			size_t nsize_bbox;
 			if(params.use_unaligned_nodes) {
-				nsize_bbox = 8;
+				nsize_bbox = (use_qbvh)? 13: 8;
+			}
+			else {
+				nsize_bbox = (use_qbvh)? 6: 3;
 			}
 			int4 *bvh_nodes = &bvh->pack.nodes[0];
 			size_t bvh_nodes_size = bvh->pack.nodes.size(); 
@@ -789,6 +795,122 @@ void QBVH::pack_inner(const BVHStackEntry& e, const BVHStackEntry *en, int num)
 	memcpy(&pack.nodes[e.idx * BVH_QNODE_SIZE], data, sizeof(float4)*BVH_QNODE_SIZE);
 }
 
+void QBVH::pack_unaligned_leaf(const BVHStackEntry& e, const LeafNode *leaf)
+{
+	float4 data[BVH_UNALIGNED_QNODE_LEAF_SIZE];
+	memset(data, 0, sizeof(data));
+	if(leaf->num_triangles() == 1 && pack.prim_index[leaf->m_lo] == -1) {
+		/* object */
+		data[0].x = __int_as_float(~(leaf->m_lo));
+		data[0].y = __int_as_float(0);
+	}
+	else {
+		/* triangle */
+		data[0].x = __int_as_float(leaf->m_lo);
+		data[0].y = __int_as_float(leaf->m_hi);
+	}
+	data[0].z = __uint_as_float(leaf->m_visibility);
+	if(leaf->num_triangles() != 0) {
+		data[0].w = __uint_as_float(pack.prim_type[leaf->m_lo]);
+	}
+	memcpy(&pack.leaf_nodes[e.idx * BVH_UNALIGNED_QNODE_LEAF_SIZE],
+	       data,
+	       sizeof(float4)*BVH_UNALIGNED_QNODE_LEAF_SIZE);
+}
+
+void QBVH::pack_unaligned_inner(const BVHStackEntry& e,
+                                const BVHStackEntry *en,
+                                int num)
+{
+	float4 data[BVH_UNALIGNED_QNODE_SIZE];
+	memset(data, 0, sizeof(data));
+
+	bool has_unaligned = false;
+	for(int i = 0; i < num; i++) {
+		if(en[i].node->is_unaligned()) {
+			has_unaligned = true;
+			break;
+		}
+	}
+
+	if(has_unaligned) {
+		for(int i = 0; i < num; i++) {
+			Transform space = BVHUnaligned::compute_node_transform(
+			        en[i].node->m_bounds,
+			        en[i].node->m_aligned_space);
+
+			data[0][i] = 1.0f;
+
+			data[1][i] = space.x.x;
+			data[2][i] = space.x.y;
+			data[3][i] = space.x.z;
+
+			data[4][i] = space.y.x;
+			data[5][i] = space.y.y;
+			data[6][i] = space.y.z;
+
+			data[7][i] = space.z.x;
+			data[8][i] = space.z.y;
+			data[9][i] = space.z.z;
+
+			data[10][i] = space.x.w;
+			data[11][i] = space.y.w;
+			data[12][i] = space.z.w;
+
+			data[13][i] = __int_as_float(en[i].encodeIdx());
+		}
+		for(int i = num; i < 4; i++) {
+			data[0][i] = 1.0f;
+			/* We store BB which would never be recorded as intersection
+			 * so kernel might safely assume there are always 4 child nodes.
+			 */
+			for(int j = 1; j < 13; ++j) {
+				data[j][i] = 0.0f;
+			}
+			data[13][i] = __int_as_float(0);
+		}
+	}
+	else {
+		for(int i = 0; i < num; i++) {
+			float3 bb_min = en[i].node->m_bounds.min;
+			float3 bb_max = en[i].node->m_bounds.max;
+
+			data[0][i] = -1.0f;
+
+			data[1][i] = bb_min.x;
+			data[2][i] = bb_max.x;
+			data[3][i] = bb_min.y;
+			data[4][i] = bb_max.y;
+			data[5][i] = bb_min.z;
+			data[6][i] = bb_max.z;
+
+			data[13][i] = __int_as_float(en[i].encodeIdx());
+		}
+
+		for(int i = num; i < 4; i++) {
+			/* We store BB which would never be recorded as intersection
+			 * so kernel might safely assume there are always 4 child nodes.
+			 */
+			data[0][i] = -1.0f;
+
+			data[1][i] = FLT_MAX;
+			data[2][i] = -FLT_MAX;
+
+			data[3][i] = FLT_MAX;
+			data[4][i] = -FLT_MAX;
+
+			data[5][i] = FLT_MAX;
+			data[6][i] = -FLT_MAX;
+
+			data[13][i] = __int_as_float(0);
+		}
+	}
+
+	memcpy(&pack.nodes[e.idx * BVH_UNALIGNED_QNODE_SIZE],
+	       data,
+	       sizeof(float4)*BVH_UNALIGNED_QNODE_SIZE);
+}
+
 /* Quad SIMD Nodes */
 
 void QBVH::pack_nodes(const BVHNode *root)
@@ -802,13 +924,14 @@ void QBVH::pack_nodes(const BVHNode *root)
 	pack.leaf_nodes.clear();
 
 	/* for top level BVH, first merge existing BVH's so we know the offsets */
+	const int nsize = params.use_unaligned_nodes? BVH_UNALIGNED_QNODE_SIZE: BVH_QNODE_SIZE;
+	const int nsize_leaf = params.use_unaligned_nodes? BVH_UNALIGNED_QNODE_LEAF_SIZE: BVH_QNODE_LEAF_SIZE;
 	if(params.top_level) {
-		pack_instances(node_size*BVH_QNODE_SIZE,
-		               leaf_node_size*BVH_QNODE_LEAF_SIZE);
+		pack_instances(node_size*nsize, leaf_node_size*nsize_leaf);
 	}
 	else {
-		pack.nodes.resize(node_size*BVH_QNODE_SIZE);
-		pack.leaf_nodes.resize(leaf_node_size*BVH_QNODE_LEAF_SIZE);
+		pack.nodes.resize(node_size*nsize);
+		pack.leaf_nodes.resize(leaf_node_size*nsize_leaf);
 	}
 
 	int nextNodeIdx = 0, nextLeafNodeIdx = 0;
@@ -829,7 +952,12 @@ void QBVH::pack_nodes(const BVHNode *root)
 		if(e.node->is_leaf()) {
 			/* leaf node */
 			const LeafNode* leaf = reinterpret_cast<const LeafNode*>(e.node);
-			pack_leaf(e, leaf);
+			if(params.use_unaligned_nodes) {
+				pack_unaligned_leaf(e, leaf);
+			}
+			else {
+				pack_leaf(e, leaf);
+			}
 		}
 		else {
 			/* inner node */
@@ -870,7 +998,12 @@ void QBVH::pack_nodes(const BVHNode *root)
 			}
 
 			/* set node */
-			pack_inner(e, &stack[stack.size()-numnodes], numnodes);
+			if(params.use_unaligned_nodes) {
+				pack_unaligned_inner(e, &stack[stack.size()-numnodes], numnodes);
+			}
+			else {
+				pack_inner(e, &stack[stack.size()-numnodes], numnodes);
+			}
 		}
 	}
 
diff --git a/intern/cycles/bvh/bvh.h b/intern/cycles/bvh/bvh.h
index 3099e2c..1eb619f 100644
--- a/intern/cycles/bvh/bvh.h
+++ b/intern/cycles/bvh/bvh.h
@@ -39,8 +39,11 @@ class Progress;
 #define BVH_QNODE_LEAF_SIZE	1
 #define BVH_ALIGN		4096
 #define TRI_NODE_SIZE	3
+
 #define BVH_UNALIGNED_NODE_SIZE 9
 #define BVH_UNALIGNED_NODE_LEAF_SIZE 1
+#define BVH_UNALIGNED_QNODE_SIZE 14
+#define BVH_UNALIGNED_QNODE_LEAF_SIZE 1
 
 /* Packed BVH
  *
@@ -161,9 +164,16 @@ protected:
 
 	/* pack */
 	void pack_nodes(const BVHNode *root);
+
 	void pack_leaf(const BVHStackEntry& e, const LeafNode *leaf);
 	void pack_inner(const BVHStackEntry& e, const BVHStackEntry *en, int num);
 
+	void pack_unaligned_leaf(const BVHStackEntry& e,
+	                         const LeafNode *leaf);
+	void pack_unaligned_inner(const BVHStackEntry& e,
+	                          const BVHStackEntry *en,
+	                          int num);
+
 	/* refit */
 	void refit_nodes();
 	void refit_node(int idx, bool leaf, BoundBox& bbox, uint& visibility);
diff --git a/intern/cycles/kernel/CMakeLists.txt b/intern/cycles/kernel/CMakeLists.txt
index 2cd7222..be93df2 100644
--- a/intern/cycles/kernel/CMakeLists.txt
+++ b/intern/cycles/kernel/CMakeLists.txt
@@ -137,11 +137,12 @@ set(SRC_GEOM_HEADERS
 	geom/geom.h
 	geom/geom_attribute.h
 	geom/geom_bvh.h
+	geom/geom_bvh_curve.h
 	geom/geom_bvh_shadow.h
-	geom/geom_bvh_shadow_hair.h
+	geom/geom_bvh_shadow_curve.h
 	geom/geom_bvh_subsurface.h
 	geom/geom_bvh_traversal.h
-	geom/geom_bvh_traversal_hair.h
+	geom/geom_bvh_traversal_curve.h
 	geom/geom_bvh_volume.h
 	geom/geom_bvh_volume_all.h
 	geom/geom_curve.h
@@ -150,11 +151,12 @@ set(SRC_GEOM_HEADERS
 	geom/geom_object.h
 	geom/geom_primitive.h
 	geom/geom_qbvh.h
+	geom/geom_qbvh_curve.h
 	geom/geom_qbvh_shadow.h
-	geom/geom_qbvh_shadow_hair.h
+	geom/geom_qbvh_shadow_curve.h
 	geom/geom_qbvh_subsurface.h
 	geom/geom_qbvh_traversal.h
-	geom/geom_qbvh_traversal_hair.h
+	geom/geom_qbvh_traversal_curve.h
 	geom/geom_qbvh_volume.h
 	geom/geom_qbvh_volume_all.h
 	geom/geom_triangle.h
diff --git a/intern/cycles/kernel/geom/geom.h b/intern/cycles/kernel/geom/geom.h
index 49d26ba..2dd1dae 100644
--- a/intern/cycles/kernel/geom/geom.h
+++ b/intern/cycles/kernel/geom/geom.h
@@ -26,8 +26,11 @@
 #define BVH_QNODE_SIZE 7
 #define BVH_QNODE_LEAF_SIZE 1
 #define TRI_NODE_SIZE 3
+
 #define BVH_UNALIGNED_NODE_SIZE 9
 #define BVH_UNALIGNED_NODE_LEAF_SIZE 1
+#define BVH_UNALIGNED_QNODE_SIZE 14
+#define BVH_UNALIGNED_QNODE_LEAF_SIZE 1
 
 /* silly workaround for float extended precision that happens when compiling
  * without sse support on x86, it results in different results for float ops
diff --git a/intern/cycles/kernel/geom/geom_bvh.h b/intern/cycles/kernel/geom/geom_bvh.h
index beccab6..e9254b3 100644
--- a/intern/cycles/kernel/geom/geom_bvh.h
+++ b/intern/cycles/kernel/geom/geom_bvh.h
@@ -49,12 +49,13 @@ CCL_NAMESPACE_BEGIN
 #define BVH_FEATURE(f) (((BVH_FUNCTION_FEATURES) & (f)) != 0)
 
 /* Common QBVH functions. */
+#include "geom_bvh_curve.h"
+
 #ifdef __QBVH__
 #  include "geom_qbvh.h"
+#  include "geom_qbvh_curve.h"
 #endif
 
-#include "geom_bvh_hair.h"
-
 /* Regular BVH traversal */
 
 #define BVH_FUNCTION_NAME bvh_intersect
@@ -76,15 +77,15 @@ CCL_NAMESPACE_BEGIN
 /* Regular Hair BVH traversal */
 
 #if defined(__HAIR__)
-#  define BVH_FUNCTION_NAME bvh_intersect_hair
+#  define BVH_FUNCTION_NAME bvh_intersect_curve
 #  define BVH_FUNCTION_FEATURES BVH_INSTANCING|BVH_HAIR|BVH_HAIR_MINIMUM_WIDTH
-#  include "geom_bvh_traversal_hair.h"
+#  include "geom_bvh_traversal_curve.h"
 #endif
 
 #if defined(__HAIR__) && defined(__OBJECT_MOTION__)
-#  define BVH_FUNCTION_NAME bvh_intersect_hair_motion
+#  define BVH_FUNCTION_NAME bvh_intersect_curve_motion
 #  define BVH_FUNCTION_FEATURES BVH_INSTANCING|BVH_HAIR|BVH_HAIR_MINIMUM_WIDTH|BVH_MOTION
-#  include "geom_bvh_traversal_hair.h"
+#  include

@@ Diff output truncated at 10240 characters. @@




More information about the Bf-blender-cvs mailing list