[Bf-blender-cvs] [b0f08e8220b] soc-2019-embree-gpu: Cycle: Add linear boundbox interpolation on CPU
MATILLAT Quentin
noreply at git.blender.org
Sun Aug 11 12:20:15 CEST 2019
Commit: b0f08e8220b7a1cc1ca1f297cd2393c053624abf
Author: MATILLAT Quentin
Date: Sun Aug 11 12:17:17 2019 +0200
Branches: soc-2019-embree-gpu
https://developer.blender.org/rBb0f08e8220b7a1cc1ca1f297cd2393c053624abf
Cycle: Add linear boundbox interpolation on CPU
===================================================================
M intern/cycles/kernel/bvh/bvh_local.h
M intern/cycles/kernel/bvh/bvh_nodes.h
M intern/cycles/kernel/bvh/bvh_shadow_all.h
M intern/cycles/kernel/bvh/bvh_traversal.h
M intern/cycles/kernel/bvh/bvh_volume.h
M intern/cycles/kernel/bvh/bvh_volume_all.h
===================================================================
diff --git a/intern/cycles/kernel/bvh/bvh_local.h b/intern/cycles/kernel/bvh/bvh_local.h
index 98eb5a89c0e..71a8885f8da 100644
--- a/intern/cycles/kernel/bvh/bvh_local.h
+++ b/intern/cycles/kernel/bvh/bvh_local.h
@@ -143,6 +143,7 @@ ccl_device_inline
shufflexyz,
node_addr,
PATH_RAY_ALL_VISIBILITY,
+ ray->time,
dist);
#endif // __KERNEL_SSE2__
diff --git a/intern/cycles/kernel/bvh/bvh_nodes.h b/intern/cycles/kernel/bvh/bvh_nodes.h
index da613cc9f78..db9203ce328 100644
--- a/intern/cycles/kernel/bvh/bvh_nodes.h
+++ b/intern/cycles/kernel/bvh/bvh_nodes.h
@@ -190,34 +190,59 @@ int ccl_device_forceinline bvh_aligned_node_intersect(KernelGlobals *kg,
const ssef Psplat[3],
const ssef idirsplat[3],
const shuffle_swap_t shufflexyz[3],
- const int node_addr,
+ int node_addr,
const uint visibility,
+ const float rayTime,
float dist[2])
{
/* Intersect two child bounding boxes, SSE3 version adapted from Embree */
const ssef pn = cast(ssei(0, 0, 0x80000000, 0x80000000));
/* fetch node data */
- const ssef *bvh_nodes = (ssef *)kg->__bvh_nodes.data + node_addr;
+ const ssef *bvh_nodes = (ssef *)kg->__bvh_nodes.data;
+
+ /* intersect ray against child nodes */
+ float4 cnodes = kernel_tex_fetch(__bvh_nodes, node_addr++);
+ ssef x = bvh_nodes[node_addr++];
+ ssef y = bvh_nodes[node_addr++];
+ ssef z = bvh_nodes[node_addr++];
+
+ if (__float_as_uint(cnodes.x) & PATH_RAY_NODE_MB) {
+ const ssef dx = bvh_nodes[node_addr++];
+ const ssef dy = bvh_nodes[node_addr++];
+ const ssef dz = bvh_nodes[node_addr++];
+
+ x += rayTime * dx;
+ y += rayTime * dy;
+ z += rayTime * dz;
+ }
/* intersect ray against child nodes */
- const ssef tminmaxx = (shuffle_swap(bvh_nodes[1], shufflexyz[0]) - Psplat[0]) * idirsplat[0];
- const ssef tminmaxy = (shuffle_swap(bvh_nodes[2], shufflexyz[1]) - Psplat[1]) * idirsplat[1];
- const ssef tminmaxz = (shuffle_swap(bvh_nodes[3], shufflexyz[2]) - Psplat[2]) * idirsplat[2];
+ const ssef tminmaxx = (shuffle_swap(x, shufflexyz[0]) - Psplat[0]) * idirsplat[0];
+ const ssef tminmaxy = (shuffle_swap(y, shufflexyz[1]) - Psplat[1]) * idirsplat[1];
+ const ssef tminmaxz = (shuffle_swap(z, shufflexyz[2]) - Psplat[2]) * idirsplat[2];
/* calculate { c0min, c1min, -c0max, -c1max} */
ssef minmax = max(max(tminmaxx, tminmaxy), max(tminmaxz, tsplat));
const ssef tminmax = minmax ^ pn;
- const sseb lrhit = tminmax <= shuffle<2, 3, 0, 1>(tminmax);
+ sseb lrhit = tminmax <= shuffle<2, 3, 0, 1>(tminmax);
dist[0] = tminmax[0];
dist[1] = tminmax[1];
+ if (__float_as_uint(cnodes.x) & PATH_RAY_NODE_4D) {
+ const ssef timeLimit = bvh_nodes[node_addr++];
+
+ const sseb timeMin = timeLimit < rayTime;
+ const sseb timeMax = timeLimit > rayTime;
+
+ lrhit &= timeMin & shuffle<2, 3, 0, 1>(timeMax);
+ }
+
int mask = movemask(lrhit);
# ifdef __VISIBILITY_FLAG__
/* this visibility test gives a 5% performance hit, how to solve? */
- float4 cnodes = kernel_tex_fetch(__bvh_nodes, node_addr + 0);
int cmask = (((mask & 1) && (__float_as_uint(cnodes.x) & visibility)) ? 1 : 0) |
(((mask & 2) && (__float_as_uint(cnodes.y) & visibility)) ? 2 : 0);
return cmask;
@@ -289,6 +314,7 @@ ccl_device_forceinline int bvh_node_intersect(KernelGlobals *kg,
const shuffle_swap_t shufflexyz[3],
const int node_addr,
const uint visibility,
+ const float rayTime,
float dist[2])
{
float4 node = kernel_tex_fetch(__bvh_nodes, node_addr);
@@ -298,7 +324,7 @@ ccl_device_forceinline int bvh_node_intersect(KernelGlobals *kg,
}
else {
return bvh_aligned_node_intersect(
- kg, P, dir, tsplat, Psplat, idirsplat, shufflexyz, node_addr, visibility, dist);
+ kg, P, dir, tsplat, Psplat, idirsplat, shufflexyz, node_addr, visibility, rayTime, dist);
}
}
#endif /* !defined(__KERNEL_SSE2__) */
diff --git a/intern/cycles/kernel/bvh/bvh_shadow_all.h b/intern/cycles/kernel/bvh/bvh_shadow_all.h
index c008b7cde42..254c9187d96 100644
--- a/intern/cycles/kernel/bvh/bvh_shadow_all.h
+++ b/intern/cycles/kernel/bvh/bvh_shadow_all.h
@@ -138,6 +138,7 @@ ccl_device_inline
shufflexyz,
node_addr,
visibility,
+ ray->time,
dist);
#endif // __KERNEL_SSE2__
diff --git a/intern/cycles/kernel/bvh/bvh_traversal.h b/intern/cycles/kernel/bvh/bvh_traversal.h
index 7939e6084b7..0e80543fd6a 100644
--- a/intern/cycles/kernel/bvh/bvh_traversal.h
+++ b/intern/cycles/kernel/bvh/bvh_traversal.h
@@ -135,6 +135,7 @@ ccl_device_noinline bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
shufflexyz,
node_addr,
visibility,
+ ray->time,
dist);
}
#endif // __KERNEL_SSE2__
diff --git a/intern/cycles/kernel/bvh/bvh_volume.h b/intern/cycles/kernel/bvh/bvh_volume.h
index 49839ec83a0..b28b5cee9ec 100644
--- a/intern/cycles/kernel/bvh/bvh_volume.h
+++ b/intern/cycles/kernel/bvh/bvh_volume.h
@@ -134,6 +134,7 @@ ccl_device_inline
shufflexyz,
node_addr,
visibility,
+ ray->time,
dist);
#endif // __KERNEL_SSE2__
diff --git a/intern/cycles/kernel/bvh/bvh_volume_all.h b/intern/cycles/kernel/bvh/bvh_volume_all.h
index 8399e92cc92..e8194fab822 100644
--- a/intern/cycles/kernel/bvh/bvh_volume_all.h
+++ b/intern/cycles/kernel/bvh/bvh_volume_all.h
@@ -138,6 +138,7 @@ ccl_device_inline
shufflexyz,
node_addr,
visibility,
+ ray->time,
dist);
#endif // __KERNEL_SSE2__
More information about the Bf-blender-cvs
mailing list