[Bf-blender-cvs] [c52b147] cycles_bvh: Cycles: Adopt all QBVH traversal to unaligned nodes
Sergey Sharybin
noreply at git.blender.org
Wed Jun 15 11:39:15 CEST 2016
Commit: c52b147e8a8b69389f027f6fc70ee85842223312
Author: Sergey Sharybin
Date: Wed Jun 15 09:57:36 2016 +0200
Branches: cycles_bvh
https://developer.blender.org/rBc52b147e8a8b69389f027f6fc70ee85842223312
Cycles: Adopt all QBVH traversal to unaligned nodes
===================================================================
M intern/cycles/kernel/geom/geom_qbvh.h
M intern/cycles/kernel/geom/geom_qbvh_curve.h
M intern/cycles/kernel/geom/geom_qbvh_shadow.h
M intern/cycles/kernel/geom/geom_qbvh_subsurface.h
M intern/cycles/kernel/geom/geom_qbvh_traversal.h
M intern/cycles/kernel/geom/geom_qbvh_volume.h
M intern/cycles/kernel/geom/geom_qbvh_volume_all.h
===================================================================
diff --git a/intern/cycles/kernel/geom/geom_qbvh.h b/intern/cycles/kernel/geom/geom_qbvh.h
index edffbb6..30ed851 100644
--- a/intern/cycles/kernel/geom/geom_qbvh.h
+++ b/intern/cycles/kernel/geom/geom_qbvh.h
@@ -56,9 +56,9 @@ ccl_device_inline int qbvh_node_intersect(KernelGlobals *__restrict kg,
const ssef& tfar,
#ifdef __KERNEL_AVX2__
const sse3f& org_idir,
-#endif
+#else
const sse3f& org,
- const sse3f& dir,
+#endif
const sse3f& idir,
const int near_x,
const int near_y,
@@ -70,9 +70,7 @@ ccl_device_inline int qbvh_node_intersect(KernelGlobals *__restrict kg,
ssef *__restrict dist)
{
const int offset = nodeAddr + 1;
- (void)dir;
#ifdef __KERNEL_AVX2__
- (void)org;
const ssef tnear_x = msub(kernel_tex_fetch_ssef(__bvh_nodes, offset+near_x), idir.x, org_idir.x);
const ssef tnear_y = msub(kernel_tex_fetch_ssef(__bvh_nodes, offset+near_y), idir.y, org_idir.y);
const ssef tnear_z = msub(kernel_tex_fetch_ssef(__bvh_nodes, offset+near_z), idir.z, org_idir.z);
@@ -108,9 +106,9 @@ ccl_device_inline int qbvh_node_intersect_robust(KernelGlobals *__restrict kg,
const ssef& tfar,
#ifdef __KERNEL_AVX2__
const sse3f& P_idir,
-#endif
+#else
const sse3f& P,
- const sse3f& dir,
+#endif
const sse3f& idir,
const int near_x,
const int near_y,
@@ -123,9 +121,7 @@ ccl_device_inline int qbvh_node_intersect_robust(KernelGlobals *__restrict kg,
ssef *__restrict dist)
{
const int offset = nodeAddr + 1;
- (void)dir;
#ifdef __KERNEL_AVX2__
- (void)P;
const ssef tnear_x = msub(kernel_tex_fetch_ssef(__bvh_nodes, offset+near_x), idir.x, P_idir.x);
const ssef tnear_y = msub(kernel_tex_fetch_ssef(__bvh_nodes, offset+near_y), idir.y, P_idir.y);
const ssef tnear_z = msub(kernel_tex_fetch_ssef(__bvh_nodes, offset+near_z), idir.z, P_idir.z);
diff --git a/intern/cycles/kernel/geom/geom_qbvh_curve.h b/intern/cycles/kernel/geom/geom_qbvh_curve.h
index 22551d4..757759f 100644
--- a/intern/cycles/kernel/geom/geom_qbvh_curve.h
+++ b/intern/cycles/kernel/geom/geom_qbvh_curve.h
@@ -106,9 +106,9 @@ ccl_device_inline int qbvh_curve_node_intersect(
tfar,
#ifdef __KERNEL_AVX2__
org_idir,
-#endif
+#else
org,
- dir,
+#endif
idir,
near_x, near_y, near_z,
far_x, far_y, far_z,
@@ -208,9 +208,9 @@ ccl_device_inline int qbvh_curve_node_intersect_robust(
tfar,
#ifdef __KERNEL_AVX2__
P_idir,
-#endif
+#else
P,
- dir,
+#endif
idir,
near_x, near_y, near_z,
far_x, far_y, far_z,
diff --git a/intern/cycles/kernel/geom/geom_qbvh_shadow.h b/intern/cycles/kernel/geom/geom_qbvh_shadow.h
index a12f3c4..6a6e191 100644
--- a/intern/cycles/kernel/geom/geom_qbvh_shadow.h
+++ b/intern/cycles/kernel/geom/geom_qbvh_shadow.h
@@ -27,6 +27,12 @@
*
*/
+#if BVH_FEATURE(BVH_HAIR)
+# define NODE_INTERSECT qbvh_curve_node_intersect
+#else
+# define NODE_INTERSECT qbvh_node_intersect
+#endif
+
ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
const Ray *ray,
Intersection *isect_array,
@@ -72,15 +78,18 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
#endif
ssef tnear(0.0f), tfar(tmax);
+#if BVH_FEATURE(BVH_HAIR)
sse3f dir4(ssef(dir.x), ssef(dir.y), ssef(dir.z));
+#endif
sse3f idir4(ssef(idir.x), ssef(idir.y), ssef(idir.z));
#ifdef __KERNEL_AVX2__
float3 P_idir = P*idir;
- sse3f P_idir4 = sse3f(P_idir.x, P_idir.y, P_idir.z);
+ sse3f P_idir4(P_idir.x, P_idir.y, P_idir.z);
+#endif
+#if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
+ sse3f org4(ssef(P.x), ssef(P.y), ssef(P.z));
#endif
-
- sse3f org = sse3f(ssef(P.x), ssef(P.y), ssef(P.z));
/* Offsets to select the side that becomes the lower or upper bound. */
int near_x, near_y, near_z;
@@ -111,25 +120,35 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
#endif
ssef dist;
- int traverseChild = false;
- if(false) {
- /*int */traverseChild = qbvh_node_intersect(kg,
- tnear,
- tfar,
+ int traverseChild = NODE_INTERSECT(kg,
+ tnear,
+ tfar,
#ifdef __KERNEL_AVX2__
- P_idir4,
+ P_idir4,
#endif
- org,
- dir4,
- idir4,
- near_x, near_y, near_z,
- far_x, far_y, far_z,
- nodeAddr,
- &dist);
- }
+# if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
+ org4,
+# endif
+# if BVH_FEATURE(BVH_HAIR)
+ dir4,
+# endif
+ idir4,
+ near_x, near_y, near_z,
+ far_x, far_y, far_z,
+ nodeAddr,
+ &dist);
if(traverseChild != 0) {
- float4 cnodes = kernel_tex_fetch(__bvh_nodes, nodeAddr+7);
+ float4 cnodes;
+#if BVH_FEATURE(BVH_HAIR)
+ if(__float_as_uint(inodes.x) & PATH_RAY_NODE_UNALIGNED) {
+ cnodes = kernel_tex_fetch(__bvh_nodes, nodeAddr+13);
+ }
+ else
+#endif
+ {
+ cnodes = kernel_tex_fetch(__bvh_nodes, nodeAddr+7);
+ }
/* One child is hit, continue with that child. */
int r = __bscf(traverseChild);
@@ -345,12 +364,17 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
if(idir.y >= 0.0f) { near_y = 2; far_y = 3; } else { near_y = 3; far_y = 2; }
if(idir.z >= 0.0f) { near_z = 4; far_z = 5; } else { near_z = 5; far_z = 4; }
tfar = ssef(isect_t);
+# if BVH_FEATURE(BVH_HAIR)
+ dir4 = sse3f(ssef(dir.x), ssef(dir.y), ssef(dir.z));
+# endif
idir4 = sse3f(ssef(idir.x), ssef(idir.y), ssef(idir.z));
# ifdef __KERNEL_AVX2__
P_idir = P*idir;
P_idir4 = sse3f(P_idir.x, P_idir.y, P_idir.z);
# endif
- org = sse3f(ssef(P.x), ssef(P.y), ssef(P.z));
+# if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
+ org4 = sse3f(ssef(P.x), ssef(P.y), ssef(P.z));
+# endif
triangle_intersect_precalc(dir, &isect_precalc);
@@ -399,12 +423,17 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
if(idir.y >= 0.0f) { near_y = 2; far_y = 3; } else { near_y = 3; far_y = 2; }
if(idir.z >= 0.0f) { near_z = 4; far_z = 5; } else { near_z = 5; far_z = 4; }
tfar = ssef(tmax);
+# if BVH_FEATURE(BVH_HAIR)
+ dir4 = sse3f(ssef(dir.x), ssef(dir.y), ssef(dir.z));
+# endif
idir4 = sse3f(ssef(idir.x), ssef(idir.y), ssef(idir.z));
# ifdef __KERNEL_AVX2__
P_idir = P*idir;
P_idir4 = sse3f(P_idir.x, P_idir.y, P_idir.z);
# endif
- org = sse3f(ssef(P.x), ssef(P.y), ssef(P.z));
+# if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
+ org4 = sse3f(ssef(P.x), ssef(P.y), ssef(P.z));
+# endif
triangle_intersect_precalc(dir, &isect_precalc);
@@ -417,3 +446,5 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
return false;
}
+
+#undef NODE_INTERSECT
diff --git a/intern/cycles/kernel/geom/geom_qbvh_subsurface.h b/intern/cycles/kernel/geom/geom_qbvh_subsurface.h
index abb0487..235b677 100644
--- a/intern/cycles/kernel/geom/geom_qbvh_subsurface.h
+++ b/intern/cycles/kernel/geom/geom_qbvh_subsurface.h
@@ -25,6 +25,12 @@
*
*/
+#if BVH_FEATURE(BVH_HAIR)
+# define NODE_INTERSECT qbvh_curve_node_intersect
+#else
+# define NODE_INTERSECT qbvh_node_intersect
+#endif
+
ccl_device void BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
const Ray *ray,
SubsurfaceIntersection *ss_isect,
@@ -82,14 +88,18 @@ ccl_device void BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
#endif
ssef tnear(0.0f), tfar(isect_t);
+#if BVH_FEATURE(BVH_HAIR)
sse3f dir4(ssef(dir.x), ssef(dir.y), ssef(dir.z));
+#endif
sse3f idir4(ssef(idir.x), ssef(idir.y), ssef(idir.z));
#ifdef __KERNEL_AVX2__
float3 P_idir = P*idir;
- sse3f P_idir4 = sse3f(P_idir.x, P_idir.y, P_idir.z);
+ sse3f P_idir4(P_idir.x, P_idir.y, P_idir.z);
+#endif
+#if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
+ sse3f org4(ssef(P.x), ssef(P.y), ssef(P.z));
#endif
- sse3f org = sse3f(ssef(P.x), ssef(P.y), ssef(P.z));
/* Offsets to select the side that becomes the lower or upper bound. */
int near_x, near_y, near_z;
@@ -109,25 +119,35 @@ ccl_device void BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
while(nodeAddr >= 0 && nodeAddr != ENTRYPOINT_SENTINEL) {
ssef dist;
- int traverseChild = 0;
- if (false) {
- /*int*/ traverseChild = qbvh_node_intersect(kg,
- tnear,
- tfar,
+ int traverseChild = NODE_INTERSECT(kg,
+ tnear,
+ tfar,
#ifdef __KERNEL_AVX2__
- P_idir4,
+ P_idir4,
#endif
- org,
-
@@ Diff output truncated at 10240 characters. @@
More information about the Bf-blender-cvs
mailing list