[Bf-blender-cvs] [345ed4d] master: Cycles: Don't do node visibility check in subsurface and volume traversal
Sergey Sharybin
noreply at git.blender.org
Wed Dec 24 22:56:11 CET 2014
Commit: 345ed4dd105aca5dbe6fbc3936ef2af83c16544b
Author: Sergey Sharybin
Date: Tue Dec 16 20:39:31 2014 +0500
Branches: master
https://developer.blender.org/rB345ed4dd105aca5dbe6fbc3936ef2af83c16544b
Cycles: Don't do node visibility check in subsurface and volume traversal
Visibility flags are set to all visibility anyway, So there was no reason
to perform that test.
TODO: We need to investigate if having primitive intersection functions
which doesn't do visibility check gives any speedup here as well.
===================================================================
M intern/cycles/kernel/geom/geom_bvh_subsurface.h
M intern/cycles/kernel/geom/geom_bvh_volume.h
===================================================================
diff --git a/intern/cycles/kernel/geom/geom_bvh_subsurface.h b/intern/cycles/kernel/geom/geom_bvh_subsurface.h
index a8f57cf..5f1bd41 100644
--- a/intern/cycles/kernel/geom/geom_bvh_subsurface.h
+++ b/intern/cycles/kernel/geom/geom_bvh_subsurface.h
@@ -54,7 +54,6 @@ ccl_device uint BVH_FUNCTION_NAME(KernelGlobals *kg, const Ray *ray, Intersectio
int object = OBJECT_NONE;
float isect_t = ray->t;
- const uint visibility = PATH_RAY_ALL_VISIBILITY;
uint num_hits = 0;
#if FEATURE(BVH_MOTION)
@@ -118,14 +117,8 @@ ccl_device uint BVH_FUNCTION_NAME(KernelGlobals *kg, const Ray *ray, Intersectio
NO_EXTENDED_PRECISION float c1max = min4(max(c1lox, c1hix), max(c1loy, c1hiy), max(c1loz, c1hiz), t);
/* decide which nodes to traverse next */
-#ifdef __VISIBILITY_FLAG__
- /* this visibility test gives a 5% performance hit, how to solve? */
- traverseChild0 = (c0max >= c0min) && (__float_as_uint(cnodes.z) & visibility);
- traverseChild1 = (c1max >= c1min) && (__float_as_uint(cnodes.w) & visibility);
-#else
traverseChild0 = (c0max >= c0min);
traverseChild1 = (c1max >= c1min);
-#endif
#else // __KERNEL_SSE2__
/* Intersect two child bounding boxes, SSE3 version adapted from Embree */
@@ -145,14 +138,8 @@ ccl_device uint BVH_FUNCTION_NAME(KernelGlobals *kg, const Ray *ray, Intersectio
const sseb lrhit = tminmax <= shuffle<2, 3, 0, 1>(tminmax);
/* decide which nodes to traverse next */
-#ifdef __VISIBILITY_FLAG__
- /* this visibility test gives a 5% performance hit, how to solve? */
- traverseChild0 = (movemask(lrhit) & 1) && (__float_as_uint(cnodes.z) & visibility);
- traverseChild1 = (movemask(lrhit) & 2) && (__float_as_uint(cnodes.w) & visibility);
-#else
traverseChild0 = (movemask(lrhit) & 1);
traverseChild1 = (movemask(lrhit) & 2);
-#endif
#endif // __KERNEL_SSE2__
nodeAddr = __float_as_int(cnodes.x);
diff --git a/intern/cycles/kernel/geom/geom_bvh_volume.h b/intern/cycles/kernel/geom/geom_bvh_volume.h
index 16c16be..d9425ca 100644
--- a/intern/cycles/kernel/geom/geom_bvh_volume.h
+++ b/intern/cycles/kernel/geom/geom_bvh_volume.h
@@ -121,14 +121,8 @@ ccl_device bool BVH_FUNCTION_NAME(KernelGlobals *kg,
NO_EXTENDED_PRECISION float c1max = min4(max(c1lox, c1hix), max(c1loy, c1hiy), max(c1loz, c1hiz), t);
/* decide which nodes to traverse next */
-#ifdef __VISIBILITY_FLAG__
- /* this visibility test gives a 5% performance hit, how to solve? */
- traverseChild0 = (c0max >= c0min) && (__float_as_uint(cnodes.z) & visibility);
- traverseChild1 = (c1max >= c1min) && (__float_as_uint(cnodes.w) & visibility);
-#else
traverseChild0 = (c0max >= c0min);
traverseChild1 = (c1max >= c1min);
-#endif
#else // __KERNEL_SSE2__
/* Intersect two child bounding boxes, SSE3 version adapted from Embree */
@@ -149,14 +143,8 @@ ccl_device bool BVH_FUNCTION_NAME(KernelGlobals *kg,
const sseb lrhit = tminmax <= shuffle<2, 3, 0, 1>(tminmax);
/* decide which nodes to traverse next */
-#ifdef __VISIBILITY_FLAG__
- /* this visibility test gives a 5% performance hit, how to solve? */
- traverseChild0 = (movemask(lrhit) & 1) && (__float_as_uint(cnodes.z) & visibility);
- traverseChild1 = (movemask(lrhit) & 2) && (__float_as_uint(cnodes.w) & visibility);
-#else
traverseChild0 = (movemask(lrhit) & 1);
traverseChild1 = (movemask(lrhit) & 2);
-#endif
#endif // __KERNEL_SSE2__
nodeAddr = __float_as_int(cnodes.x);
More information about the Bf-blender-cvs
mailing list