[Bf-blender-cvs] [770f74e] cycles_hair_bvh: Cycles: Initial implementation of QBVH traversal for unaligned nodes
Sergey Sharybin
noreply at git.blender.org
Fri Apr 29 18:38:00 CEST 2016
Commit: 770f74e0c03dff4bafe084441b5019fce51596f9
Author: Sergey Sharybin
Date: Fri Apr 29 13:59:18 2016 +0200
Branches: cycles_hair_bvh
https://developer.blender.org/rB770f74e0c03dff4bafe084441b5019fce51596f9
Cycles: Initial implementation of QBVH traversal for unaligned nodes
Implements both QBVH packing and traversal on SSE2+ processors.
With a test render scene render time goes from 93sec (in master) down
to 55sec. Kind of impressive, let's hope it's not because some bug and
that we can keep such a nice speedup.
Also finished some non-SIMD binary BVH code. On a test scene got about
20% of speedup comparing to 2.77a.
Well, let's verify everything, finish some remaining TODOs and make
the branch ready for master.
===================================================================
M intern/cycles/bvh/bvh.cpp
M intern/cycles/bvh/bvh.h
M intern/cycles/kernel/CMakeLists.txt
M intern/cycles/kernel/geom/geom.h
M intern/cycles/kernel/geom/geom_bvh.h
A intern/cycles/kernel/geom/geom_bvh_curve.h
D intern/cycles/kernel/geom/geom_bvh_hair.h
A intern/cycles/kernel/geom/geom_bvh_shadow_curve.h
D intern/cycles/kernel/geom/geom_bvh_shadow_hair.h
A intern/cycles/kernel/geom/geom_bvh_traversal_curve.h
D intern/cycles/kernel/geom/geom_bvh_traversal_hair.h
M intern/cycles/kernel/geom/geom_qbvh.h
A intern/cycles/kernel/geom/geom_qbvh_curve.h
A intern/cycles/kernel/geom/geom_qbvh_shadow_curve.h
D intern/cycles/kernel/geom/geom_qbvh_shadow_hair.h
A intern/cycles/kernel/geom/geom_qbvh_traversal_curve.h
D intern/cycles/kernel/geom/geom_qbvh_traversal_hair.h
M intern/cycles/render/mesh.cpp
===================================================================
diff --git a/intern/cycles/bvh/bvh.cpp b/intern/cycles/bvh/bvh.cpp
index 0bf746c..fdb4d23 100644
--- a/intern/cycles/bvh/bvh.cpp
+++ b/intern/cycles/bvh/bvh.cpp
@@ -193,11 +193,14 @@ void BVH::pack_instances(size_t nodes_size, size_t leaf_nodes_size)
* top level BVH, adjusting indexes and offsets where appropriate.
*/
const bool use_qbvh = params.use_qbvh;
- size_t nsize = (use_qbvh)? BVH_QNODE_SIZE: BVH_NODE_SIZE;
- size_t nsize_leaf = (use_qbvh)? BVH_QNODE_LEAF_SIZE: BVH_NODE_LEAF_SIZE;
+ size_t nsize, nsize_leaf;
if(params.use_unaligned_nodes) {
- nsize = BVH_UNALIGNED_NODE_SIZE;
- nsize_leaf = BVH_UNALIGNED_NODE_LEAF_SIZE;
+ nsize = (use_qbvh)? BVH_UNALIGNED_QNODE_SIZE: BVH_UNALIGNED_NODE_SIZE;
+ nsize_leaf = (use_qbvh)? BVH_UNALIGNED_QNODE_LEAF_SIZE: BVH_UNALIGNED_NODE_LEAF_SIZE;
+ }
+ else {
+ nsize = (use_qbvh)? BVH_QNODE_SIZE: BVH_NODE_SIZE;
+ nsize_leaf = (use_qbvh)? BVH_QNODE_LEAF_SIZE: BVH_NODE_LEAF_SIZE;
}
/* Adjust primitive index to point to the triangle in the global array, for
@@ -365,9 +368,12 @@ void BVH::pack_instances(size_t nodes_size, size_t leaf_nodes_size)
/* For QBVH we're packing a child bbox into 6 float4,
* and for regular BVH they're packed into 3 float4.
*/
- size_t nsize_bbox = (use_qbvh)? 6: 3;
+ size_t nsize_bbox;
if(params.use_unaligned_nodes) {
- nsize_bbox = 8;
+ nsize_bbox = (use_qbvh)? 13: 8;
+ }
+ else {
+ nsize_bbox = (use_qbvh)? 6: 3;
}
int4 *bvh_nodes = &bvh->pack.nodes[0];
size_t bvh_nodes_size = bvh->pack.nodes.size();
@@ -789,6 +795,122 @@ void QBVH::pack_inner(const BVHStackEntry& e, const BVHStackEntry *en, int num)
memcpy(&pack.nodes[e.idx * BVH_QNODE_SIZE], data, sizeof(float4)*BVH_QNODE_SIZE);
}
+void QBVH::pack_unaligned_leaf(const BVHStackEntry& e, const LeafNode *leaf)
+{
+ float4 data[BVH_UNALIGNED_QNODE_LEAF_SIZE];
+ memset(data, 0, sizeof(data));
+ if(leaf->num_triangles() == 1 && pack.prim_index[leaf->m_lo] == -1) {
+ /* object */
+ data[0].x = __int_as_float(~(leaf->m_lo));
+ data[0].y = __int_as_float(0);
+ }
+ else {
+ /* triangle */
+ data[0].x = __int_as_float(leaf->m_lo);
+ data[0].y = __int_as_float(leaf->m_hi);
+ }
+ data[0].z = __uint_as_float(leaf->m_visibility);
+ if(leaf->num_triangles() != 0) {
+ data[0].w = __uint_as_float(pack.prim_type[leaf->m_lo]);
+ }
+ memcpy(&pack.leaf_nodes[e.idx * BVH_UNALIGNED_QNODE_LEAF_SIZE],
+ data,
+ sizeof(float4)*BVH_UNALIGNED_QNODE_LEAF_SIZE);
+}
+
+void QBVH::pack_unaligned_inner(const BVHStackEntry& e,
+ const BVHStackEntry *en,
+ int num)
+{
+ float4 data[BVH_UNALIGNED_QNODE_SIZE];
+ memset(data, 0, sizeof(data));
+
+ bool has_unaligned = false;
+ for(int i = 0; i < num; i++) {
+ if(en[i].node->is_unaligned()) {
+ has_unaligned = true;
+ break;
+ }
+ }
+
+ if(has_unaligned) {
+ for(int i = 0; i < num; i++) {
+ Transform space = BVHUnaligned::compute_node_transform(
+ en[i].node->m_bounds,
+ en[i].node->m_aligned_space);
+
+ data[0][i] = 1.0f;
+
+ data[1][i] = space.x.x;
+ data[2][i] = space.x.y;
+ data[3][i] = space.x.z;
+
+ data[4][i] = space.y.x;
+ data[5][i] = space.y.y;
+ data[6][i] = space.y.z;
+
+ data[7][i] = space.z.x;
+ data[8][i] = space.z.y;
+ data[9][i] = space.z.z;
+
+ data[10][i] = space.x.w;
+ data[11][i] = space.y.w;
+ data[12][i] = space.z.w;
+
+ data[13][i] = __int_as_float(en[i].encodeIdx());
+ }
+ for(int i = num; i < 4; i++) {
+ data[0][i] = 1.0f;
+ /* We store BB which would never be recorded as intersection
+ * so kernel might safely assume there are always 4 child nodes.
+ */
+ for(int j = 1; j < 13; ++j) {
+ data[j][i] = 0.0f;
+ }
+ data[13][i] = __int_as_float(0);
+ }
+ }
+ else {
+ for(int i = 0; i < num; i++) {
+ float3 bb_min = en[i].node->m_bounds.min;
+ float3 bb_max = en[i].node->m_bounds.max;
+
+ data[0][i] = -1.0f;
+
+ data[1][i] = bb_min.x;
+ data[2][i] = bb_max.x;
+ data[3][i] = bb_min.y;
+ data[4][i] = bb_max.y;
+ data[5][i] = bb_min.z;
+ data[6][i] = bb_max.z;
+
+ data[13][i] = __int_as_float(en[i].encodeIdx());
+ }
+
+ for(int i = num; i < 4; i++) {
+ /* We store BB which would never be recorded as intersection
+ * so kernel might safely assume there are always 4 child nodes.
+ */
+ data[0][i] = -1.0f;
+
+ data[1][i] = FLT_MAX;
+ data[2][i] = -FLT_MAX;
+
+ data[3][i] = FLT_MAX;
+ data[4][i] = -FLT_MAX;
+
+ data[5][i] = FLT_MAX;
+ data[6][i] = -FLT_MAX;
+
+ data[13][i] = __int_as_float(0);
+ }
+ }
+
+ memcpy(&pack.nodes[e.idx * BVH_UNALIGNED_QNODE_SIZE],
+ data,
+ sizeof(float4)*BVH_UNALIGNED_QNODE_SIZE);
+}
+
/* Quad SIMD Nodes */
void QBVH::pack_nodes(const BVHNode *root)
@@ -802,13 +924,14 @@ void QBVH::pack_nodes(const BVHNode *root)
pack.leaf_nodes.clear();
/* for top level BVH, first merge existing BVH's so we know the offsets */
+ const int nsize = params.use_unaligned_nodes? BVH_UNALIGNED_QNODE_SIZE: BVH_QNODE_SIZE;
+ const int nsize_leaf = params.use_unaligned_nodes? BVH_UNALIGNED_QNODE_LEAF_SIZE: BVH_QNODE_LEAF_SIZE;
if(params.top_level) {
- pack_instances(node_size*BVH_QNODE_SIZE,
- leaf_node_size*BVH_QNODE_LEAF_SIZE);
+ pack_instances(node_size*nsize, leaf_node_size*nsize_leaf);
}
else {
- pack.nodes.resize(node_size*BVH_QNODE_SIZE);
- pack.leaf_nodes.resize(leaf_node_size*BVH_QNODE_LEAF_SIZE);
+ pack.nodes.resize(node_size*nsize);
+ pack.leaf_nodes.resize(leaf_node_size*nsize_leaf);
}
int nextNodeIdx = 0, nextLeafNodeIdx = 0;
@@ -829,7 +952,12 @@ void QBVH::pack_nodes(const BVHNode *root)
if(e.node->is_leaf()) {
/* leaf node */
const LeafNode* leaf = reinterpret_cast<const LeafNode*>(e.node);
- pack_leaf(e, leaf);
+ if(params.use_unaligned_nodes) {
+ pack_unaligned_leaf(e, leaf);
+ }
+ else {
+ pack_leaf(e, leaf);
+ }
}
else {
/* inner node */
@@ -870,7 +998,12 @@ void QBVH::pack_nodes(const BVHNode *root)
}
/* set node */
- pack_inner(e, &stack[stack.size()-numnodes], numnodes);
+ if(params.use_unaligned_nodes) {
+ pack_unaligned_inner(e, &stack[stack.size()-numnodes], numnodes);
+ }
+ else {
+ pack_inner(e, &stack[stack.size()-numnodes], numnodes);
+ }
}
}
diff --git a/intern/cycles/bvh/bvh.h b/intern/cycles/bvh/bvh.h
index 3099e2c..1eb619f 100644
--- a/intern/cycles/bvh/bvh.h
+++ b/intern/cycles/bvh/bvh.h
@@ -39,8 +39,11 @@ class Progress;
#define BVH_QNODE_LEAF_SIZE 1
#define BVH_ALIGN 4096
#define TRI_NODE_SIZE 3
+
#define BVH_UNALIGNED_NODE_SIZE 9
#define BVH_UNALIGNED_NODE_LEAF_SIZE 1
+#define BVH_UNALIGNED_QNODE_SIZE 14
+#define BVH_UNALIGNED_QNODE_LEAF_SIZE 1
/* Packed BVH
*
@@ -161,9 +164,16 @@ protected:
/* pack */
void pack_nodes(const BVHNode *root);
+
void pack_leaf(const BVHStackEntry& e, const LeafNode *leaf);
void pack_inner(const BVHStackEntry& e, const BVHStackEntry *en, int num);
+ void pack_unaligned_leaf(const BVHStackEntry& e,
+ const LeafNode *leaf);
+ void pack_unaligned_inner(const BVHStackEntry& e,
+ const BVHStackEntry *en,
+ int num);
+
/* refit */
void refit_nodes();
void refit_node(int idx, bool leaf, BoundBox& bbox, uint& visibility);
diff --git a/intern/cycles/kernel/CMakeLists.txt b/intern/cycles/kernel/CMakeLists.txt
index 2cd7222..be93df2 100644
--- a/intern/cycles/kernel/CMakeLists.txt
+++ b/intern/cycles/kernel/CMakeLists.txt
@@ -137,11 +137,12 @@ set(SRC_GEOM_HEADERS
geom/geom.h
geom/geom_attribute.h
geom/geom_bvh.h
+ geom/geom_bvh_curve.h
geom/geom_bvh_shadow.h
- geom/geom_bvh_shadow_hair.h
+ geom/geom_bvh_shadow_curve.h
geom/geom_bvh_subsurface.h
geom/geom_bvh_traversal.h
- geom/geom_bvh_traversal_hair.h
+ geom/geom_bvh_traversal_curve.h
geom/geom_bvh_volume.h
geom/geom_bvh_volume_all.h
geom/geom_curve.h
@@ -150,11 +151,12 @@ set(SRC_GEOM_HEADERS
geom/geom_object.h
geom/geom_primitive.h
geom/geom_qbvh.h
+ geom/geom_qbvh_curve.h
geom/geom_qbvh_shadow.h
- geom/geom_qbvh_shadow_hair.h
+ geom/geom_qbvh_shadow_curve.h
geom/geom_qbvh_subsurface.h
geom/geom_qbvh_traversal.h
- geom/geom_qbvh_traversal_hair.h
+ geom/geom_qbvh_traversal_curve.h
geom/geom_qbvh_volume.h
geom/geom_qbvh_volume_all.h
geom/geom_triangle.h
diff --git a/intern/cycles/kernel/geom/geom.h b/intern/cycles/kernel/geom/geom.h
index 49d26ba..2dd1dae 100644
--- a/intern/cycles/kernel/geom/geom.h
+++ b/intern/cycles/kernel/geom/geom.h
@@ -26,8 +26,11 @@
#define BVH_QNODE_SIZE 7
#define BVH_QNODE_LEAF_SIZE 1
#define TRI_NODE_SIZE 3
+
#define BVH_UNALIGNED_NODE_SIZE 9
#define BVH_UNALIGNED_NODE_LEAF_SIZE 1
+#define BVH_UNALIGNED_QNODE_SIZE 14
+#define BVH_UNALIGNED_QNODE_LEAF_SIZE 1
/* silly workaround for float extended precision that happens when compiling
* without sse support on x86, it results in different results for float ops
diff --git a/intern/cycles/kernel/geom/geom_bvh.h b/intern/cycles/kernel/geom/geom_bvh.h
index beccab6..e9254b3 100644
--- a/intern/cycles/kernel/geom/geom_bvh.h
+++ b/intern/cycles/kernel/geom/geom_bvh.h
@@ -49,12 +49,13 @@ CCL_NAMESPACE_BEGIN
#define BVH_FEATURE(f) (((BVH_FUNCTION_FEATURES) & (f)) != 0)
/* Common QBVH functions. */
+#include "geom_bvh_curve.h"
+
#ifdef __QBVH__
# include "geom_qbvh.h"
+# include "geom_qbvh_curve.h"
#endif
-#include "geom_bvh_hair.h"
-
/* Regular BVH traversal */
#define BVH_FUNCTION_NAME bvh_intersect
@@ -76,15 +77,15 @@ CCL_NAMESPACE_BEGIN
/* Regular Hair BVH traversal */
#if defined(__HAIR__)
-# define BVH_FUNCTION_NAME bvh_intersect_hair
+# define BVH_FUNCTION_NAME bvh_intersect_curve
# define BVH_FUNCTION_FEATURES BVH_INSTANCING|BVH_HAIR|BVH_HAIR_MINIMUM_WIDTH
-# include "geom_bvh_traversal_hair.h"
+# include "geom_bvh_traversal_curve.h"
#endif
#if defined(__HAIR__) && defined(__OBJECT_MOTION__)
-# define BVH_FUNCTION_NAME bvh_intersect_hair_motion
+# define BVH_FUNCTION_NAME bvh_intersect_curve_motion
# define BVH_FUNCTION_FEATURES BVH_INSTANCING|BVH_HAIR|BVH_HAIR_MINIMUM_WIDTH|BVH_MOTION
-# include "geom_bvh_traversal_hair.h"
+# include
@@ Diff output truncated at 10240 characters. @@
More information about the Bf-blender-cvs
mailing list