[Bf-blender-cvs] [611d69a] soc-2014-cycles: Cycles: Use new ssef helpers for QBVH Triangle Intersection. No functional changes.
Thomas Dinges
noreply at git.blender.org
Mon Jun 16 00:58:01 CEST 2014
Commit: 611d69adb98b84c9af0d6da97672a853d899fd8c
Author: Thomas Dinges
Date: Mon Jun 16 00:56:51 2014 +0200
https://developer.blender.org/rB611d69adb98b84c9af0d6da97672a853d899fd8c
Cycles: Use new ssef helpers for QBVH Triangle Intersection. No functional changes.
===================================================================
M intern/cycles/kernel/geom/geom_triangle.h
===================================================================
diff --git a/intern/cycles/kernel/geom/geom_triangle.h b/intern/cycles/kernel/geom/geom_triangle.h
index 0b586a8..df6ddce 100644
--- a/intern/cycles/kernel/geom/geom_triangle.h
+++ b/intern/cycles/kernel/geom/geom_triangle.h
@@ -406,73 +406,73 @@ ccl_device_inline void qbvh_node_intersect(KernelGlobals *kg, int *traverseChild
{
#ifdef __KERNEL_AVX2__
/* X axis */
- const __m128 idirx = _mm_set_ps1(idir.x);
- const __m128 mulx = _mm_mul_ps(_mm_set_ps1(P.x), idirx);
- const __m128 bminx = kernel_tex_fetch_ssef(__bvh_nodes, nodeAddr*QBVH_NODE_SIZE+0);
- const __m128 t0x = msub(bminx, idirx, mulx);
- const __m128 bmaxx = kernel_tex_fetch_ssef(__bvh_nodes, nodeAddr*QBVH_NODE_SIZE+1);
- const __m128 t1x = msub(bmaxx, idirx, mulx);
+ const ssef idirx = ssef(idir.x);
+ const ssef mulx = ssef(P.x) * idirx;
+ const ssef bminx = kernel_tex_fetch_ssef(__bvh_nodes, nodeAddr*QBVH_NODE_SIZE+0);
+ const ssef t0x = msub(bminx, idirx, mulx);
+ const ssef bmaxx = kernel_tex_fetch_ssef(__bvh_nodes, nodeAddr*QBVH_NODE_SIZE+1);
+ const ssef t1x = msub(bmaxx, idirx, mulx);
- __m128 tmin = _mm_max_ps(_mm_min_ps(t0x, t1x), _mm_setzero_ps());
- __m128 tmax = _mm_min_ps(_mm_max_ps(t0x, t1x), _mm_set_ps1(t));
+ ssef tmin = max(min(t0x, t1x), _mm_setzero_ps());
+ ssef tmax = min(max(t0x, t1x), ssef(t));
/* Y axis */
- const __m128 idiry = _mm_set_ps1(idir.y);
- const __m128 muly = _mm_mul_ps(_mm_set_ps1(P.y), idiry);
- const __m128 bminy = kernel_tex_fetch_ssef(__bvh_nodes, nodeAddr*QBVH_NODE_SIZE+2);
- const __m128 t0y = msub(bminy, idiry, muly);
- const __m128 bmaxy = kernel_tex_fetch_ssef(__bvh_nodes, nodeAddr*QBVH_NODE_SIZE+3);
- const __m128 t1y = msub(bmaxy, idiry, muly);
+ const ssef idiry = ssef(idir.y);
+ const ssef muly = ssef(P.y) * idiry;
+ const ssef bminy = kernel_tex_fetch_ssef(__bvh_nodes, nodeAddr*QBVH_NODE_SIZE+2);
+ const ssef t0y = msub(bminy, idiry, muly);
+ const ssef bmaxy = kernel_tex_fetch_ssef(__bvh_nodes, nodeAddr*QBVH_NODE_SIZE+3);
+ const ssef t1y = msub(bmaxy, idiry, muly);
- tmin = _mm_max_ps(_mm_min_ps(t0y, t1y), tmin);
- tmax = _mm_min_ps(_mm_max_ps(t0y, t1y), tmax);
+ tmin = max(min(t0y, t1y), tmin);
+ tmax = min(max(t0y, t1y), tmax);
/* Z axis */
- const __m128 idirz = _mm_set_ps1(idir.z);
- const __m128 mulz = _mm_mul_ps(_mm_set_ps1(P.z), idirz);
- const __m128 bminz = kernel_tex_fetch_ssef(__bvh_nodes, nodeAddr*QBVH_NODE_SIZE+4);
- const __m128 t0z = msub(bminz, idirz, mulz);
- const __m128 bmaxz = kernel_tex_fetch_ssef(__bvh_nodes, nodeAddr*QBVH_NODE_SIZE+5);
- const __m128 t1z = msub(bmaxz, idirz, mulz);
-
- tmin = _mm_max_ps(_mm_min_ps(t0z, t1z), tmin);
- tmax = _mm_min_ps(_mm_max_ps(t0z, t1z), tmax);
+ const ssef idirz = ssef(idir.z);
+ const ssef mulz = ssef(P.z) * idirz;
+ const ssef bminz = kernel_tex_fetch_ssef(__bvh_nodes, nodeAddr*QBVH_NODE_SIZE+4);
+ const ssef t0z = msub(bminz, idirz, mulz);
+ const ssef bmaxz = kernel_tex_fetch_ssef(__bvh_nodes, nodeAddr*QBVH_NODE_SIZE+5);
+ const ssef t1z = msub(bmaxz, idirz, mulz);
+
+ tmin = max(min(t0z, t1z), tmin);
+ tmax = min(max(t0z, t1z), tmax);
#else
- const __m128 Px = _mm_set_ps1(P.x);
- const __m128 idirx = _mm_set_ps1(idir.x);
- const __m128 bminx = kernel_tex_fetch_ssef(__bvh_nodes, nodeAddr*QBVH_NODE_SIZE+0);
- const __m128 t0x = _mm_mul_ps(_mm_sub_ps(bminx, Px), idirx);
- const __m128 bmaxx = kernel_tex_fetch_ssef(__bvh_nodes, nodeAddr*QBVH_NODE_SIZE+1);
- const __m128 t1x = _mm_mul_ps(_mm_sub_ps(bmaxx, Px), idirx);
+ const ssef Px = ssef(P.x);
+ const ssef idirx = ssef(idir.x);
+ const ssef bminx = kernel_tex_fetch_ssef(__bvh_nodes, nodeAddr*QBVH_NODE_SIZE+0);
+ const ssef t0x = (bminx - Px) * idirx;
+ const ssef bmaxx = kernel_tex_fetch_ssef(__bvh_nodes, nodeAddr*QBVH_NODE_SIZE+1);
+ const ssef t1x = (bmaxx - Px) * idirx;
- __m128 tmin = _mm_max_ps(_mm_min_ps(t0x, t1x), _mm_setzero_ps());
- __m128 tmax = _mm_min_ps(_mm_max_ps(t0x, t1x), _mm_set_ps1(t));
+ ssef tmin = max(min(t0x, t1x), _mm_setzero_ps());
+ ssef tmax = min(max(t0x, t1x), ssef(t));
/* Y axis */
- const __m128 Py = _mm_set_ps1(P.y);
- const __m128 idiry = _mm_set_ps1(idir.y);
- const __m128 bminy = kernel_tex_fetch_ssef(__bvh_nodes, nodeAddr*QBVH_NODE_SIZE+2);
- const __m128 t0y = _mm_mul_ps(_mm_sub_ps(bminy, Py), idiry);
- const __m128 bmaxy = kernel_tex_fetch_ssef(__bvh_nodes, nodeAddr*QBVH_NODE_SIZE+3);
- const __m128 t1y = _mm_mul_ps(_mm_sub_ps(bmaxy, Py), idiry);
+ const ssef Py = ssef(P.y);
+ const ssef idiry = ssef(idir.y);
+ const ssef bminy = kernel_tex_fetch_ssef(__bvh_nodes, nodeAddr*QBVH_NODE_SIZE+2);
+ const ssef t0y = (bminy - Py) * idiry;
+ const ssef bmaxy = kernel_tex_fetch_ssef(__bvh_nodes, nodeAddr*QBVH_NODE_SIZE+3);
+ const ssef t1y = (bmaxy - Py) * idiry;
- tmin = _mm_max_ps(_mm_min_ps(t0y, t1y), tmin);
- tmax = _mm_min_ps(_mm_max_ps(t0y, t1y), tmax);
+ tmin = max(min(t0y, t1y), tmin);
+ tmax = min(max(t0y, t1y), tmax);
/* Z axis */
- const __m128 Pz = _mm_set_ps1(P.z);
- const __m128 idirz = _mm_set_ps1(idir.z);
- const __m128 bminz = kernel_tex_fetch_ssef(__bvh_nodes, nodeAddr*QBVH_NODE_SIZE+4);
- const __m128 t0z = _mm_mul_ps(_mm_sub_ps(bminz, Pz), idirz);
- const __m128 bmaxz = kernel_tex_fetch_ssef(__bvh_nodes, nodeAddr*QBVH_NODE_SIZE+5);
- const __m128 t1z = _mm_mul_ps(_mm_sub_ps(bmaxz, Pz), idirz);
-
- tmin = _mm_max_ps(_mm_min_ps(t0z, t1z), tmin);
- tmax = _mm_min_ps(_mm_max_ps(t0z, t1z), tmax);
+ const ssef Pz = ssef(P.z);
+ const ssef idirz = ssef(idir.z);
+ const ssef bminz = kernel_tex_fetch_ssef(__bvh_nodes, nodeAddr*QBVH_NODE_SIZE+4);
+ const ssef t0z = (bminz - Pz) * idirz;
+ const ssef bmaxz = kernel_tex_fetch_ssef(__bvh_nodes, nodeAddr*QBVH_NODE_SIZE+5);
+ const ssef t1z = (bmaxz - Pz) * idirz;
+
+ tmin = max(min(t0z, t1z), tmin);
+ tmax = min(max(t0z, t1z), tmax);
#endif
/* compare and get mask */
- *traverseChild = _mm_movemask_ps(_mm_cmple_ps(tmin, tmax));
+ *traverseChild = movemask(tmin <= tmax);
/* get node addresses */
float4 cnodes = kernel_tex_fetch(__bvh_nodes, nodeAddr*QBVH_NODE_SIZE+6);
More information about the Bf-blender-cvs
mailing list