[Bf-blender-cvs] [cf017e8] soc-2016-cycles_denoising: Cycles: Fix preprocessor directives around SSE3 replacement functions
Lukas Stockner
noreply at git.blender.org
Wed Aug 10 03:22:18 CEST 2016
Commit: cf017e8181ba23ced63206147d9df5925e08167f
Author: Lukas Stockner
Date: Tue Aug 9 02:26:50 2016 +0200
Branches: soc-2016-cycles_denoising
https://developer.blender.org/rBcf017e8181ba23ced63206147d9df5925e08167f
Cycles: Fix preprocessor directives around SSE3 replacement functions
The code is supposed to implement replacements for a few SSE4.1-specific functions so that they can be used with SSE3 as well.
Therefore, it was enabled when __KERNEL_SSE3__ was set, but __KERNEL_SSE4__ wasn't.
However, __KERNEL_SSE4__ is never set anywhere - the correct one is __KERNEL_SSE41__.
Because of that, the replacements were enabled for SSE4.1 and better (AVX) as well, where they're not needed, but only slow things down.
===================================================================
M intern/cycles/util/util_simd.h
===================================================================
diff --git a/intern/cycles/util/util_simd.h b/intern/cycles/util/util_simd.h
index 36da155..342e8bb 100644
--- a/intern/cycles/util/util_simd.h
+++ b/intern/cycles/util/util_simd.h
@@ -331,9 +331,9 @@ __forceinline size_t __bscf(size_t& v)
static const unsigned int BITSCAN_NO_BIT_SET_32 = 32;
static const size_t BITSCAN_NO_BIT_SET_64 = 64;
+#ifdef __KERNEL_SSE3__
/* Emulation of SSE4 functions with SSE3 */
-
-#if defined(__KERNEL_SSE3) && !defined(__KERNEL_SSE4__)
+# ifndef __KERNEL_SSE41__
#define _MM_FROUND_TO_NEAREST_INT 0x00
#define _MM_FROUND_TO_NEG_INF 0x01
@@ -362,7 +362,7 @@ __forceinline __m128i _mm_mullo_epi32( __m128i value, __m128i input ) {
char* _r = (char*)(&rvalue + 1);
char* _v = (char*)(& value + 1);
char* _i = (char*)(& input + 1);
- for( ssize_t i = -16 ; i != 0 ; i += 4 ) *((int32*)(_r + i)) = *((int32*)(_v + i))* *((int32*)(_i + i));
+ for( ssize_t i = -16 ; i != 0 ; i += 4 ) *((int32_t*)(_r + i)) = *((int32_t*)(_v + i))* *((int32_t*)(_i + i));
return rvalue;
}
@@ -395,7 +395,7 @@ __forceinline __m128i _mm_insert_epi32( __m128i value, int input, const int inde
#define _mm_extract_ps __emu_mm_extract_ps
__forceinline int _mm_extract_ps( __m128 input, const int index ) {
- int32* ptr = (int32*)&input; return ptr[index];
+ int32_t* ptr = (int32_t*)&input; return ptr[index];
}
#define _mm_insert_ps __emu_mm_insert_ps
@@ -415,7 +415,7 @@ __forceinline __m128 _mm_round_ps( __m128 value, const int flags )
return value;
}
-#ifdef _M_X64
+# ifdef _M_X64
#define _mm_insert_epi64 __emu_mm_insert_epi64
__forceinline __m128i _mm_insert_epi64( __m128i value, __int64 input, const int index ) {
assert(size_t(index) < 4); ((__int64*)&value)[index] = input; return value;
@@ -426,7 +426,9 @@ __forceinline __int64 _mm_extract_epi64( __m128i input, const int index ) {
assert(size_t(index) < 2);
return index == 0 ? _mm_cvtsi128_si64x(input) : _mm_cvtsi128_si64x(_mm_unpackhi_epi64(input, input));
}
-#endif
+# endif
+
+# endif
#endif
More information about the Bf-blender-cvs
mailing list