[Bf-blender-cvs] [741a245] soc-2016-cycles_denoising: Cycles: Add a few SSE utilities

Lukas Stockner noreply at git.blender.org
Wed Aug 10 03:22:22 CEST 2016


Commit: 741a2453bf7ccda2163d7f71c85c7d4a63c67506
Author: Lukas Stockner
Date:   Tue Aug 9 02:27:40 2016 +0200
Branches: soc-2016-cycles_denoising
https://developer.blender.org/rB741a2453bf7ccda2163d7f71c85c7d4a63c67506

Cycles: Add a few SSE utilities

===================================================================

M	intern/cycles/util/util_simd.h

===================================================================

diff --git a/intern/cycles/util/util_simd.h b/intern/cycles/util/util_simd.h
index 342e8bb..9f0ff84 100644
--- a/intern/cycles/util/util_simd.h
+++ b/intern/cycles/util/util_simd.h
@@ -430,6 +430,37 @@ __forceinline __int64 _mm_extract_epi64( __m128i input, const int index ) {
 
 #  endif
 
+#define _mm_fabs_ps(x) _mm_and_ps(x, _mm_castsi128_ps(_mm_set1_epi32(0x7fffffff)))
+
+/* Return a __m128 with every element set to the largest element of v. */
+ccl_device_inline __m128 _mm_hmax_ps(__m128 v)
+{
+  /* v[0, 1, 2, 3] => [0, 1, 0, 1] and [2, 3, 2, 3] => v[max(0, 2), max(1, 3), max(0, 2), max(1, 3)] */
+  v = _mm_max_ps(_mm_movehl_ps(v, v), _mm_movelh_ps(v, v));
+  /* v[max(0, 2), max(1, 3), max(0, 2), max(1, 3)] => [4 times max(1, 3)] and [4 times max(0, 2)] => v[4 times max(0, 1, 2, 3)] */
+  v = _mm_max_ps(_mm_movehdup_ps(v), _mm_moveldup_ps(v));
+  return v;
+}
+
+/* Return the sum of the four elements of x. */
+ccl_device_inline float _mm_hsum_ss(__m128 x)
+{
+    __m128 a = _mm_movehdup_ps(x);
+    __m128 b = _mm_add_ps(x, a);
+    return _mm_cvtss_f32(_mm_add_ss(_mm_movehl_ps(a, b), b));
+}
+
+/* Return a __m128 with every element set to the sum of the four elements of x. */
+ccl_device_inline __m128 _mm_hsum_ps(__m128 x)
+{
+    x = _mm_hadd_ps(x, x);
+    x = _mm_hadd_ps(x, x);
+    return x;
+}
+
+/* Replace elements of x with zero where mask isn't set. */
+#define _mm_mask_ps(x, mask) _mm_blendv_ps(_mm_setzero_ps(), x, mask)
+
 #endif
 
 #else  /* __KERNEL_SSE2__ */




More information about the Bf-blender-cvs mailing list