[Bf-blender-cvs] [611d69a] soc-2014-cycles: Cycles: Use new ssef helpers for QBVH Triangle Intersection. No functional changes.

Mon Jun 16 00:58:01 CEST 2014

Commit: 611d69adb98b84c9af0d6da97672a853d899fd8c
Author: Thomas Dinges
Date:   Mon Jun 16 00:56:51 2014 +0200
https://developer.blender.org/rB611d69adb98b84c9af0d6da97672a853d899fd8c

Cycles: Use new ssef helpers for QBVH Triangle Intersection. No functional changes.

===================================================================

M	intern/cycles/kernel/geom/geom_triangle.h

===================================================================

diff --git a/intern/cycles/kernel/geom/geom_triangle.h b/intern/cycles/kernel/geom/geom_triangle.h
index 0b586a8..df6ddce 100644
--- a/intern/cycles/kernel/geom/geom_triangle.h
+++ b/intern/cycles/kernel/geom/geom_triangle.h
@@ -406,73 +406,73 @@ ccl_device_inline void qbvh_node_intersect(KernelGlobals *kg, int *traverseChild
 {
 #ifdef __KERNEL_AVX2__
 	/* X axis */
-	const __m128 idirx = _mm_set_ps1(idir.x);
-	const __m128 mulx = _mm_mul_ps(_mm_set_ps1(P.x), idirx);
-	const __m128 bminx = kernel_tex_fetch_ssef(__bvh_nodes, nodeAddr*QBVH_NODE_SIZE+0);
-	const __m128 t0x = msub(bminx, idirx, mulx);
-	const __m128 bmaxx = kernel_tex_fetch_ssef(__bvh_nodes, nodeAddr*QBVH_NODE_SIZE+1);
-	const __m128 t1x = msub(bmaxx, idirx, mulx);
+	const ssef idirx = ssef(idir.x);
+	const ssef mulx = ssef(P.x) * idirx;
+	const ssef bminx = kernel_tex_fetch_ssef(__bvh_nodes, nodeAddr*QBVH_NODE_SIZE+0);
+	const ssef t0x = msub(bminx, idirx, mulx);
+	const ssef bmaxx = kernel_tex_fetch_ssef(__bvh_nodes, nodeAddr*QBVH_NODE_SIZE+1);
+	const ssef t1x = msub(bmaxx, idirx, mulx);
 
-	__m128 tmin = _mm_max_ps(_mm_min_ps(t0x, t1x), _mm_setzero_ps());
-	__m128 tmax = _mm_min_ps(_mm_max_ps(t0x, t1x), _mm_set_ps1(t));
+	ssef tmin = max(min(t0x, t1x), _mm_setzero_ps());
+	ssef tmax = min(max(t0x, t1x), ssef(t));
 
 	/* Y axis */
-	const __m128 idiry = _mm_set_ps1(idir.y);
-	const __m128 muly = _mm_mul_ps(_mm_set_ps1(P.y), idiry);
-	const __m128 bminy = kernel_tex_fetch_ssef(__bvh_nodes, nodeAddr*QBVH_NODE_SIZE+2);
-	const __m128 t0y = msub(bminy, idiry, muly);
-	const __m128 bmaxy = kernel_tex_fetch_ssef(__bvh_nodes, nodeAddr*QBVH_NODE_SIZE+3);
-	const __m128 t1y = msub(bmaxy, idiry, muly);
+	const ssef idiry = ssef(idir.y);
+	const ssef muly = ssef(P.y) * idiry;
+	const ssef bminy = kernel_tex_fetch_ssef(__bvh_nodes, nodeAddr*QBVH_NODE_SIZE+2);
+	const ssef t0y = msub(bminy, idiry, muly);
+	const ssef bmaxy = kernel_tex_fetch_ssef(__bvh_nodes, nodeAddr*QBVH_NODE_SIZE+3);
+	const ssef t1y = msub(bmaxy, idiry, muly);
 
-	tmin = _mm_max_ps(_mm_min_ps(t0y, t1y), tmin);
-	tmax = _mm_min_ps(_mm_max_ps(t0y, t1y), tmax);
+	tmin = max(min(t0y, t1y), tmin);
+	tmax = min(max(t0y, t1y), tmax);
 
 	/* Z axis */
-	const __m128 idirz = _mm_set_ps1(idir.z);
-	const __m128 mulz = _mm_mul_ps(_mm_set_ps1(P.z), idirz);
-	const __m128 bminz = kernel_tex_fetch_ssef(__bvh_nodes, nodeAddr*QBVH_NODE_SIZE+4);
-	const __m128 t0z = msub(bminz, idirz, mulz);
-	const __m128 bmaxz = kernel_tex_fetch_ssef(__bvh_nodes, nodeAddr*QBVH_NODE_SIZE+5);
-	const __m128 t1z = msub(bmaxz, idirz, mulz);
-
-	tmin = _mm_max_ps(_mm_min_ps(t0z, t1z), tmin);
-	tmax = _mm_min_ps(_mm_max_ps(t0z, t1z), tmax);
+	const ssef idirz = ssef(idir.z);
+	const ssef mulz = ssef(P.z) * idirz;
+	const ssef bminz = kernel_tex_fetch_ssef(__bvh_nodes, nodeAddr*QBVH_NODE_SIZE+4);
+	const ssef t0z = msub(bminz, idirz, mulz);
+	const ssef bmaxz = kernel_tex_fetch_ssef(__bvh_nodes, nodeAddr*QBVH_NODE_SIZE+5);
+	const ssef t1z = msub(bmaxz, idirz, mulz);
+
+	tmin = max(min(t0z, t1z), tmin);
+	tmax = min(max(t0z, t1z), tmax);
 #else
-	const __m128 Px = _mm_set_ps1(P.x);
-	const __m128 idirx = _mm_set_ps1(idir.x);
-	const __m128 bminx = kernel_tex_fetch_ssef(__bvh_nodes, nodeAddr*QBVH_NODE_SIZE+0);
-	const __m128 t0x = _mm_mul_ps(_mm_sub_ps(bminx, Px), idirx);
-	const __m128 bmaxx = kernel_tex_fetch_ssef(__bvh_nodes, nodeAddr*QBVH_NODE_SIZE+1);
-	const __m128 t1x = _mm_mul_ps(_mm_sub_ps(bmaxx, Px), idirx);
+	const ssef Px = ssef(P.x);
+	const ssef idirx = ssef(idir.x);
+	const ssef bminx = kernel_tex_fetch_ssef(__bvh_nodes, nodeAddr*QBVH_NODE_SIZE+0);
+	const ssef t0x = (bminx - Px) * idirx;
+	const ssef bmaxx = kernel_tex_fetch_ssef(__bvh_nodes, nodeAddr*QBVH_NODE_SIZE+1);
+	const ssef t1x = (bmaxx - Px) * idirx;
 
-	__m128 tmin = _mm_max_ps(_mm_min_ps(t0x, t1x), _mm_setzero_ps());
-	__m128 tmax = _mm_min_ps(_mm_max_ps(t0x, t1x), _mm_set_ps1(t));
+	ssef tmin = max(min(t0x, t1x), _mm_setzero_ps());
+	ssef tmax = min(max(t0x, t1x), ssef(t));
 
 	/* Y axis */
-	const __m128 Py = _mm_set_ps1(P.y);
-	const __m128 idiry = _mm_set_ps1(idir.y);
-	const __m128 bminy = kernel_tex_fetch_ssef(__bvh_nodes, nodeAddr*QBVH_NODE_SIZE+2);
-	const __m128 t0y = _mm_mul_ps(_mm_sub_ps(bminy, Py), idiry);
-	const __m128 bmaxy = kernel_tex_fetch_ssef(__bvh_nodes, nodeAddr*QBVH_NODE_SIZE+3);
-	const __m128 t1y = _mm_mul_ps(_mm_sub_ps(bmaxy, Py), idiry);
+	const ssef Py = ssef(P.y);
+	const ssef idiry = ssef(idir.y);
+	const ssef bminy = kernel_tex_fetch_ssef(__bvh_nodes, nodeAddr*QBVH_NODE_SIZE+2);
+	const ssef t0y = (bminy - Py) * idiry;
+	const ssef bmaxy = kernel_tex_fetch_ssef(__bvh_nodes, nodeAddr*QBVH_NODE_SIZE+3);
+	const ssef t1y = (bmaxy - Py) * idiry;
 
-	tmin = _mm_max_ps(_mm_min_ps(t0y, t1y), tmin);
-	tmax = _mm_min_ps(_mm_max_ps(t0y, t1y), tmax);
+	tmin = max(min(t0y, t1y), tmin);
+	tmax = min(max(t0y, t1y), tmax);
 
 	/* Z axis */
-	const __m128 Pz = _mm_set_ps1(P.z);
-	const __m128 idirz = _mm_set_ps1(idir.z);
-	const __m128 bminz = kernel_tex_fetch_ssef(__bvh_nodes, nodeAddr*QBVH_NODE_SIZE+4);
-	const __m128 t0z = _mm_mul_ps(_mm_sub_ps(bminz, Pz), idirz);
-	const __m128 bmaxz = kernel_tex_fetch_ssef(__bvh_nodes, nodeAddr*QBVH_NODE_SIZE+5);
-	const __m128 t1z = _mm_mul_ps(_mm_sub_ps(bmaxz, Pz), idirz);
-
-	tmin = _mm_max_ps(_mm_min_ps(t0z, t1z), tmin);
-	tmax = _mm_min_ps(_mm_max_ps(t0z, t1z), tmax);
+	const ssef Pz = ssef(P.z);
+	const ssef idirz = ssef(idir.z);
+	const ssef bminz = kernel_tex_fetch_ssef(__bvh_nodes, nodeAddr*QBVH_NODE_SIZE+4);
+	const ssef t0z = (bminz - Pz) * idirz;
+	const ssef bmaxz = kernel_tex_fetch_ssef(__bvh_nodes, nodeAddr*QBVH_NODE_SIZE+5);
+	const ssef t1z = (bmaxz - Pz) * idirz;
+
+	tmin = max(min(t0z, t1z), tmin);
+	tmax = min(max(t0z, t1z), tmax);
 #endif
 
 	/* compare and get mask */
-	*traverseChild = _mm_movemask_ps(_mm_cmple_ps(tmin, tmax));
+	*traverseChild = movemask(tmin <= tmax);
 
 	/* get node addresses */
 	float4 cnodes = kernel_tex_fetch(__bvh_nodes, nodeAddr*QBVH_NODE_SIZE+6);