[Bf-blender-cvs] [ce51c89] cycles_bvh: Cycles: Fixes for regular BVH traversal and unaligned nodes
Sergey Sharybin
noreply at git.blender.org
Fri Jun 17 16:21:53 CEST 2016
Commit: ce51c895af82244ec0bd79cf0ec94dc9aa7c9797
Author: Sergey Sharybin
Date: Fri Jun 17 13:06:59 2016 +0200
Branches: cycles_bvh
https://developer.blender.org/rBce51c895af82244ec0bd79cf0ec94dc9aa7c9797
Cycles: Fixes for regular BVH traversal and unaligned nodes
- Support proper packing
- Fix array offsets in intersectors
===================================================================
M intern/cycles/bvh/bvh.cpp
M intern/cycles/bvh/bvh_node.cpp
M intern/cycles/bvh/bvh_node.h
M intern/cycles/kernel/geom/geom_bvh_nodes.h
===================================================================
diff --git a/intern/cycles/bvh/bvh.cpp b/intern/cycles/bvh/bvh.cpp
index 7c144a4..37d72fc 100644
--- a/intern/cycles/bvh/bvh.cpp
+++ b/intern/cycles/bvh/bvh.cpp
@@ -410,6 +410,13 @@ void BVH::pack_instances(size_t nodes_size, size_t leaf_nodes_size)
/* Regular BVH */
+static bool node_bvh_is_unaligned(const BVHNode *node)
+{
+ const BVHNode *node0 = node->get_child(0),
+ *node1 = node->get_child(1);
+ return node0->is_unaligned() || node1->is_unaligned();
+}
+
RegularBVH::RegularBVH(const BVHParams& params_, const vector<Object*>& objects_)
: BVH(params_, objects_)
{
@@ -522,22 +529,30 @@ void RegularBVH::pack_unaligned_node(int idx,
void RegularBVH::pack_nodes(const BVHNode *root)
{
- size_t tot_node_size = root->getSubtreeSize(BVH_STAT_NODE_COUNT);
- size_t leaf_node_size = root->getSubtreeSize(BVH_STAT_LEAF_COUNT);
- size_t node_size = tot_node_size - leaf_node_size;
-
- /* resize arrays */
+ const size_t num_nodes = root->getSubtreeSize(BVH_STAT_NODE_COUNT);
+ const size_t num_leaf_nodes = root->getSubtreeSize(BVH_STAT_LEAF_COUNT);
+ assert(num_leaf_nodes <= num_nodes);
+ const size_t num_inner_nodes = num_nodes - num_leaf_nodes;
+ size_t node_size;
+ if(params.use_unaligned_nodes) {
+ const size_t num_unaligned_nodes =
+ root->getSubtreeSize(BVH_STAT_UNALIGNED_INNER_COUNT);
+ node_size = (num_unaligned_nodes * BVH_UNALIGNED_NODE_SIZE) +
+ (num_inner_nodes - num_unaligned_nodes) * BVH_NODE_SIZE;
+ }
+ else {
+ node_size = num_inner_nodes * BVH_NODE_SIZE;
+ }
+ /* Resize arrays */
pack.nodes.clear();
-
- /* for top level BVH, first merge existing BVH's so we know the offsets */
- const int nsize = params.use_unaligned_nodes? BVH_UNALIGNED_NODE_SIZE: BVH_NODE_SIZE;
+ pack.leaf_nodes.clear();
+ /* For top level BVH, first merge existing BVH's so we know the offsets. */
if(params.top_level) {
- pack_instances(node_size*nsize,
- leaf_node_size*BVH_NODE_LEAF_SIZE);
+ pack_instances(node_size, num_leaf_nodes*BVH_NODE_LEAF_SIZE);
}
else {
- pack.nodes.resize(node_size*nsize);
- pack.leaf_nodes.resize(leaf_node_size*BVH_NODE_LEAF_SIZE);
+ pack.nodes.resize(node_size);
+ pack.leaf_nodes.resize(num_leaf_nodes*BVH_NODE_LEAF_SIZE);
}
int nextNodeIdx = 0, nextLeafNodeIdx = 0;
@@ -549,7 +564,9 @@ void RegularBVH::pack_nodes(const BVHNode *root)
}
else {
stack.push_back(BVHStackEntry(root, nextNodeIdx));
- nextNodeIdx += nsize;
+ nextNodeIdx += node_bvh_is_unaligned(root)
+ ? BVH_UNALIGNED_NODE_SIZE
+ : BVH_NODE_SIZE;
}
while(stack.size()) {
@@ -570,7 +587,9 @@ void RegularBVH::pack_nodes(const BVHNode *root)
}
else {
idx[i] = nextNodeIdx;
- nextNodeIdx += nsize;
+ nextNodeIdx += node_bvh_is_unaligned(e.node->get_child(i))
+ ? BVH_UNALIGNED_NODE_SIZE
+ : BVH_NODE_SIZE;
}
}
@@ -580,7 +599,7 @@ void RegularBVH::pack_nodes(const BVHNode *root)
pack_inner(e, stack[stack.size()-2], stack[stack.size()-1]);
}
}
-
+ assert(node_size == nextNodeIdx);
/* root index to start traversal at, to handle case of single leaf node */
pack.root_index = (root->is_leaf())? -1: 0;
}
diff --git a/intern/cycles/bvh/bvh_node.cpp b/intern/cycles/bvh/bvh_node.cpp
index 41e372f..f5cd699 100644
--- a/intern/cycles/bvh/bvh_node.cpp
+++ b/intern/cycles/bvh/bvh_node.cpp
@@ -71,6 +71,24 @@ int BVHNode::getSubtreeSize(BVH_STAT stat) const
cnt = 1;
}
break;
+ case BVH_STAT_ALIGNED_INNER_COUNT:
+ if(!is_leaf()) {
+ bool has_unaligned = false;
+ for(int j = 0; j < num_children(); j++) {
+ has_unaligned |= get_child(j)->is_unaligned();
+ }
+ cnt += has_unaligned? 0: 1;
+ }
+ break;
+ case BVH_STAT_UNALIGNED_INNER_COUNT:
+ if(!is_leaf()) {
+ bool has_unaligned = false;
+ for(int j = 0; j < num_children(); j++) {
+ has_unaligned |= get_child(j)->is_unaligned();
+ }
+ cnt += has_unaligned? 1: 0;
+ }
+ break;
case BVH_STAT_ALIGNED_INNER_QNODE_COUNT:
{
bool has_unaligned = false;
diff --git a/intern/cycles/bvh/bvh_node.h b/intern/cycles/bvh/bvh_node.h
index b078cb9..53f85f7 100644
--- a/intern/cycles/bvh/bvh_node.h
+++ b/intern/cycles/bvh/bvh_node.h
@@ -33,6 +33,8 @@ enum BVH_STAT {
BVH_STAT_QNODE_COUNT,
BVH_STAT_ALIGNED_COUNT,
BVH_STAT_UNALIGNED_COUNT,
+ BVH_STAT_ALIGNED_INNER_COUNT,
+ BVH_STAT_UNALIGNED_INNER_COUNT,
BVH_STAT_ALIGNED_INNER_QNODE_COUNT,
BVH_STAT_UNALIGNED_INNER_QNODE_COUNT,
BVH_STAT_ALIGNED_LEAF_COUNT,
diff --git a/intern/cycles/kernel/geom/geom_bvh_nodes.h b/intern/cycles/kernel/geom/geom_bvh_nodes.h
index cb34dd4..f7b9cec 100644
--- a/intern/cycles/kernel/geom/geom_bvh_nodes.h
+++ b/intern/cycles/kernel/geom/geom_bvh_nodes.h
@@ -20,16 +20,16 @@ ccl_device_inline Transform bvh_unaligned_node_fetch_space(KernelGlobals *kg,
{
Transform space;
if(child == 0) {
- space.x = kernel_tex_fetch(__bvh_nodes, nodeAddr+0);
- space.y = kernel_tex_fetch(__bvh_nodes, nodeAddr+1);
- space.z = kernel_tex_fetch(__bvh_nodes, nodeAddr+2);
- space.w = kernel_tex_fetch(__bvh_nodes, nodeAddr+3);
+ space.x = kernel_tex_fetch(__bvh_nodes, nodeAddr+1);
+ space.y = kernel_tex_fetch(__bvh_nodes, nodeAddr+2);
+ space.z = kernel_tex_fetch(__bvh_nodes, nodeAddr+3);
+ space.w = kernel_tex_fetch(__bvh_nodes, nodeAddr+4);
}
else {
- space.x = kernel_tex_fetch(__bvh_nodes, nodeAddr+4);
- space.y = kernel_tex_fetch(__bvh_nodes, nodeAddr+5);
- space.z = kernel_tex_fetch(__bvh_nodes, nodeAddr+6);
- space.w = kernel_tex_fetch(__bvh_nodes, nodeAddr+7);
+ space.x = kernel_tex_fetch(__bvh_nodes, nodeAddr+5);
+ space.y = kernel_tex_fetch(__bvh_nodes, nodeAddr+6);
+ space.z = kernel_tex_fetch(__bvh_nodes, nodeAddr+7);
+ space.w = kernel_tex_fetch(__bvh_nodes, nodeAddr+8);
}
return space;
}
@@ -45,10 +45,10 @@ ccl_device_inline int bvh_aligned_node_intersect(KernelGlobals *kg,
{
/* fetch node data */
- float4 node0 = kernel_tex_fetch(__bvh_nodes, nodeAddr+0);
- float4 node1 = kernel_tex_fetch(__bvh_nodes, nodeAddr+1);
- float4 node2 = kernel_tex_fetch(__bvh_nodes, nodeAddr+2);
- float4 cnodes = kernel_tex_fetch(__bvh_nodes, nodeAddr+8);
+ float4 cnodes = kernel_tex_fetch(__bvh_nodes, nodeAddr+0);
+ float4 node0 = kernel_tex_fetch(__bvh_nodes, nodeAddr+1);
+ float4 node1 = kernel_tex_fetch(__bvh_nodes, nodeAddr+2);
+ float4 node2 = kernel_tex_fetch(__bvh_nodes, nodeAddr+3);
/* intersect ray against child nodes */
NO_EXTENDED_PRECISION float c0lox = (node0.x - P.x) * idir.x;
@@ -74,8 +74,8 @@ ccl_device_inline int bvh_aligned_node_intersect(KernelGlobals *kg,
#ifdef __VISIBILITY_FLAG__
/* this visibility test gives a 5% performance hit, how to solve? */
- return (((c0max >= c0min) && (__float_as_uint(cnodes.z) & visibility))? 1: 0) |
- (((c1max >= c1min) && (__float_as_uint(cnodes.w) & visibility))? 2: 0);
+ return (((c0max >= c0min) && (__float_as_uint(cnodes.x) & visibility))? 1: 0) |
+ (((c1max >= c1min) && (__float_as_uint(cnodes.y) & visibility))? 2: 0);
#else
return ((c0max >= c0min)? 1: 0) |
((c1max >= c1min)? 2: 0);
@@ -93,10 +93,10 @@ ccl_device_inline int bvh_aligned_node_intersect_robust(KernelGlobals *kg,
{
/* fetch node data */
- float4 node0 = kernel_tex_fetch(__bvh_nodes, nodeAddr+0);
- float4 node1 = kernel_tex_fetch(__bvh_nodes, nodeAddr+1);
- float4 node2 = kernel_tex_fetch(__bvh_nodes, nodeAddr+2);
- float4 cnodes = kernel_tex_fetch(__bvh_nodes, nodeAddr+8);
+ float4 cnodes = kernel_tex_fetch(__bvh_nodes, nodeAddr+0);
+ float4 node0 = kernel_tex_fetch(__bvh_nodes, nodeAddr+1);
+ float4 node1 = kernel_tex_fetch(__bvh_nodes, nodeAddr+2);
+ float4 node2 = kernel_tex_fetch(__bvh_nodes, nodeAddr+3);
/* intersect ray against child nodes */
NO_EXTENDED_PRECISION float c0lox = (node0.x - P.x) * idir.x;
@@ -135,8 +135,8 @@ ccl_device_inline int bvh_aligned_node_intersect_robust(KernelGlobals *kg,
#ifdef __VISIBILITY_FLAG__
/* this visibility test gives a 5% performance hit, how to solve? */
- return (((c0max >= c0min) && (__float_as_uint(cnodes.z) & visibility))? 1: 0) |
- (((c1max >= c1min) && (__float_as_uint(cnodes.w) & visibility))? 2: 0);
+ return (((c0max >= c0min) && (__float_as_uint(cnodes.x) & visibility))? 1: 0) |
+ (((c1max >= c1min) && (__float_as_uint(cnodes.y) & visibility))? 2: 0);
#else
return ((c0max >= c0min)? 1: 0) |
((c1max >= c1min)? 2: 0);
@@ -222,10 +222,10 @@ ccl_device_inline int bvh_unaligned_node_intersect(KernelGlobals *kg,
float *dist)
{
int mask = 0;
- float4 cnodes = kernel_tex_fetch(__bvh_nodes, nodeAddr+8);
+ float4 cnodes = kernel_tex_fetch(__bvh_nodes, nodeAddr+0);
if(bvh_unaligned_node_intersect_child(kg, P, dir, t, nodeAddr, 0, &dist[0])) {
#ifdef __VISIBILITY_FLAG__
- if((__float_as_uint(cnodes.z) & visibility))
+ if((__float_as_uint(cnodes.x) & visibility))
#endif
{
mask |= 1;
@@ -233,7 +233,7 @@ ccl_device_inline int bvh_unaligned_node_intersect(KernelGlobals *kg,
}
if(bvh_unaligned_node_intersect_child(kg, P, dir, t, nodeAddr, 1, &dist[1])) {
#ifdef __VISIBILITY_FLAG__
- if((__float_as_uint(cnodes.w) & visibility))
+ if((__float_as_uint(cnodes.y) & visibility))
#endif
{
mask |= 2;
@@ -253,10 +253,10 @@ ccl_device_inline int bvh_unaligned_node_intersect_robust(KernelGlobals *kg,
float *dist)
{
int mask = 0;
- float4 cnodes = kernel_tex_fetch(__bvh_nodes, nodeAddr+8);
+ float4 cnodes = kernel_tex_fetch(__bvh_nodes, nodeAddr+0);
if(bvh_unaligned_node_intersect_child_robust(kg, P, dir, t, difl, nodeAddr, 0, &dist[0])) {
#ifdef __VISIBILITY_FLAG__
- if((__float_as_uint(cnodes.z) & visibility))
+ if((__float_as_uint(cnodes.x) & visibility))
#endif
{
mask |= 1;
@@ -264,7 +264,7 @@ ccl_device_inline int bvh_unaligned_node_intersect_robust(KernelGlobals *kg,
}
if(bvh_unaligned_node_intersect_child_robust(kg, P, dir, t, difl, nodeAddr, 1, &dist[1])) {
#ifdef __VISIBILITY_FLAG__
- if((__float_as_uint(cnodes.w) & visibility))
+ if((__float_as_uint(cnodes.y) & visibility))
#endif
{
@@ Diff output truncated at 10240 characters. @@
More information about the Bf-blender-cvs
mailing list