[Bf-blender-cvs] [1457e5ea738] master: Fix Cycles Windows render errors with BVH2 CPU rendering.

Brecht Van Lommel noreply at git.blender.org
Tue Aug 29 23:59:46 CEST 2017


Commit: 1457e5ea738de1da2ea5a7e6e5bf11b5a4c52d2e
Author: Brecht Van Lommel
Date:   Tue Aug 29 21:11:47 2017 +0200
Branches: master
https://developer.blender.org/rB1457e5ea738de1da2ea5a7e6e5bf11b5a4c52d2e

Fix Cycles Windows render errors with BVH2 CPU rendering.

One problem is that it was always using __mm_blendv_ps emulation even if the
instruction was supported. The other that the emulation function was wrong.

Thanks a lot to Ray Molenkamp for tracking this one down.

===================================================================

M	intern/cycles/util/util_simd.h

===================================================================

diff --git a/intern/cycles/util/util_simd.h b/intern/cycles/util/util_simd.h
index 1a26ca697dd..58b3d267266 100644
--- a/intern/cycles/util/util_simd.h
+++ b/intern/cycles/util/util_simd.h
@@ -347,7 +347,10 @@ __forceinline size_t __bscf(size_t& v)
 
 #endif /* _WIN32 */
 
-#if !(defined(__SSE4_1__) || defined(__SSE4_2__))
+/* Test __KERNEL_SSE41__ for MSVC which does not define __SSE4_1__, and test
+ * __SSE4_1__ to avoid OpenImageIO conflicts with our emulation macros on other
+ * platforms when compiling code outside the kernel. */
+#if !(defined(__KERNEL_SSE41__) || defined(__SSE4_1__) || defined(__SSE4_2__))
 
 /* Emulation of SSE4 functions with SSE2 */
 
@@ -361,7 +364,12 @@ __forceinline size_t __bscf(size_t& v)
 #define _mm_blendv_ps _mm_blendv_ps_emu
 __forceinline __m128 _mm_blendv_ps_emu( __m128 value, __m128 input, __m128 mask)
 {
-    return _mm_or_ps(_mm_and_ps(mask, input), _mm_andnot_ps(mask, value)); 
+    __m128i isignmask = _mm_set1_epi32(0x80000000);
+    __m128 signmask = _mm_castsi128_ps(isignmask);
+    __m128i iandsign = _mm_castps_si128(_mm_and_ps(mask, signmask));
+    __m128i icmpmask = _mm_cmpeq_epi32(iandsign, isignmask);
+    __m128 cmpmask = _mm_castsi128_ps(icmpmask);
+    return _mm_or_ps(_mm_and_ps(cmpmask, input), _mm_andnot_ps(cmpmask, value));
 }
 
 #undef _mm_blend_ps
@@ -435,7 +443,7 @@ __forceinline __m128 _mm_round_ps_emu( __m128 value, const int flags)
   return value;
 }
 
-#endif /* !(defined(__SSE4_1__) || defined(__SSE4_2__)) */
+#endif /* !(defined(__KERNEL_SSE41__) || defined(__SSE4_1__) || defined(__SSE4_2__)) */
 
 #else  /* __KERNEL_SSE2__ */



More information about the Bf-blender-cvs mailing list