[Bf-blender-cvs] [cf017e8] soc-2016-cycles_denoising: Cycles: Fix preprocessor directives around SSE3 replacement functions

Lukas Stockner noreply at git.blender.org
Wed Aug 10 03:22:18 CEST 2016


Commit: cf017e8181ba23ced63206147d9df5925e08167f
Author: Lukas Stockner
Date:   Tue Aug 9 02:26:50 2016 +0200
Branches: soc-2016-cycles_denoising
https://developer.blender.org/rBcf017e8181ba23ced63206147d9df5925e08167f

Cycles: Fix preprocessor directives around SSE3 replacement functions

The code is supposed to implement replacements for a few SSE4.1-specific functions so that they can be used with SSE3 as well.
Therefore, it was enabled when __KERNEL_SSE3__ was set, but __KERNEL_SSE4__ wasn't.

However, __KERNEL_SSE4__ is never set anywhere - the correct one is __KERNEL_SSE41__.
Because of that, the replacements were enabled for SSE4.1 and better (AVX) as well, where they're not needed, but only slow things down.

===================================================================

M	intern/cycles/util/util_simd.h

===================================================================

diff --git a/intern/cycles/util/util_simd.h b/intern/cycles/util/util_simd.h
index 36da155..342e8bb 100644
--- a/intern/cycles/util/util_simd.h
+++ b/intern/cycles/util/util_simd.h
@@ -331,9 +331,9 @@ __forceinline size_t __bscf(size_t& v)
 static const unsigned int BITSCAN_NO_BIT_SET_32 = 32;
 static const size_t       BITSCAN_NO_BIT_SET_64 = 64;
 
+#ifdef __KERNEL_SSE3__
 /* Emulation of SSE4 functions with SSE3 */
-
-#if defined(__KERNEL_SSE3) && !defined(__KERNEL_SSE4__)
+#  ifndef __KERNEL_SSE41__
 
 #define _MM_FROUND_TO_NEAREST_INT    0x00
 #define _MM_FROUND_TO_NEG_INF        0x01
@@ -362,7 +362,7 @@ __forceinline __m128i _mm_mullo_epi32( __m128i value, __m128i input ) {
   char* _r = (char*)(&rvalue + 1);
   char* _v = (char*)(& value + 1);
   char* _i = (char*)(& input + 1);
-  for( ssize_t i = -16 ; i != 0 ; i += 4 ) *((int32*)(_r + i)) = *((int32*)(_v + i))*  *((int32*)(_i + i));
+  for( ssize_t i = -16 ; i != 0 ; i += 4 ) *((int32_t*)(_r + i)) = *((int32_t*)(_v + i))*  *((int32_t*)(_i + i));
   return rvalue;
 }
 
@@ -395,7 +395,7 @@ __forceinline __m128i _mm_insert_epi32( __m128i value, int input, const int inde
 
 #define _mm_extract_ps __emu_mm_extract_ps
 __forceinline int _mm_extract_ps( __m128 input, const int index ) {
-  int32* ptr = (int32*)&input; return ptr[index];
+  int32_t* ptr = (int32_t*)&input; return ptr[index];
 }
 
 #define _mm_insert_ps __emu_mm_insert_ps
@@ -415,7 +415,7 @@ __forceinline __m128 _mm_round_ps( __m128 value, const int flags )
   return value;
 }
 
-#ifdef _M_X64
+#    ifdef _M_X64
 #define _mm_insert_epi64 __emu_mm_insert_epi64
 __forceinline __m128i _mm_insert_epi64( __m128i value, __int64 input, const int index ) { 
     assert(size_t(index) < 4); ((__int64*)&value)[index] = input; return value; 
@@ -426,7 +426,9 @@ __forceinline __int64 _mm_extract_epi64( __m128i input, const int index ) {
     assert(size_t(index) < 2); 
     return index == 0 ? _mm_cvtsi128_si64x(input) : _mm_cvtsi128_si64x(_mm_unpackhi_epi64(input, input)); 
 }
-#endif
+#    endif
+
+#  endif
 
 #endif




More information about the Bf-blender-cvs mailing list