[Bf-blender-cvs] [8af782a] master: Code cleanup: some reshuffling of SIMD defines moving more code to util_optimization.h.

Brecht Van Lommel noreply at git.blender.org
Wed Jan 15 15:29:57 CET 2014


Commit: 8af782ad22c42654d23ca6379f105af8d98956cc
Author: Brecht Van Lommel
Date:   Wed Jan 15 15:11:50 2014 +0100
https://developer.blender.org/rB8af782ad22c42654d23ca6379f105af8d98956cc

Code cleanup: some reshuffling of SIMD defines moving more code to util_optimization.h.

===================================================================

M	intern/cycles/kernel/kernel.h
M	intern/cycles/kernel/kernel_sse2.cpp
M	intern/cycles/kernel/kernel_sse3.cpp
M	intern/cycles/kernel/kernel_sse41.cpp
M	intern/cycles/util/util_optimization.h
M	intern/cycles/util/util_types.h

===================================================================

diff --git a/intern/cycles/kernel/kernel.h b/intern/cycles/kernel/kernel.h
index b6db92f..01bea10 100644
--- a/intern/cycles/kernel/kernel.h
+++ b/intern/cycles/kernel/kernel.h
@@ -20,7 +20,6 @@
 /* CPU Kernel Interface */
 
 #include "util_types.h"
-#include "util_optimization.h"
 
 CCL_NAMESPACE_BEGIN
 
diff --git a/intern/cycles/kernel/kernel_sse2.cpp b/intern/cycles/kernel/kernel_sse2.cpp
index 6f3f171..6a2a780 100644
--- a/intern/cycles/kernel/kernel_sse2.cpp
+++ b/intern/cycles/kernel/kernel_sse2.cpp
@@ -17,16 +17,16 @@
 /* Optimized CPU kernel entry points. This file is compiled with SSE2
  * optimization flags and nearly all functions inlined, while kernel.cpp
  * is compiled without for other CPU's. */
- 
-#include "util_optimization.h"
- 
-#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE2
 
 /* SSE optimization disabled for now on 32 bit, see bug #36316 */
 #if !(defined(__GNUC__) && (defined(i386) || defined(_M_IX86)))
 #define __KERNEL_SSE2__
 #endif
 
+#include "util_optimization.h"
+
+#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE2
+
 #include "kernel.h"
 #include "kernel_compat_cpu.h"
 #include "kernel_math.h"
diff --git a/intern/cycles/kernel/kernel_sse3.cpp b/intern/cycles/kernel/kernel_sse3.cpp
index e676098..9d0abb9 100644
--- a/intern/cycles/kernel/kernel_sse3.cpp
+++ b/intern/cycles/kernel/kernel_sse3.cpp
@@ -17,10 +17,6 @@
 /* Optimized CPU kernel entry points. This file is compiled with SSE3/SSSE3
  * optimization flags and nearly all functions inlined, while kernel.cpp
  * is compiled without for other CPU's. */
- 
-#include "util_optimization.h"
- 
-#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE3
 
 /* SSE optimization disabled for now on 32 bit, see bug #36316 */
 #if !(defined(__GNUC__) && (defined(i386) || defined(_M_IX86)))
@@ -29,6 +25,10 @@
 #define __KERNEL_SSSE3__
 #endif
 
+#include "util_optimization.h"
+
+#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE3
+
 #include "kernel.h"
 #include "kernel_compat_cpu.h"
 #include "kernel_math.h"
diff --git a/intern/cycles/kernel/kernel_sse41.cpp b/intern/cycles/kernel/kernel_sse41.cpp
index fd2198a..bc20de0 100644
--- a/intern/cycles/kernel/kernel_sse41.cpp
+++ b/intern/cycles/kernel/kernel_sse41.cpp
@@ -17,10 +17,6 @@
 /* Optimized CPU kernel entry points. This file is compiled with SSE3/SSSE3
  * optimization flags and nearly all functions inlined, while kernel.cpp
  * is compiled without for other CPU's. */
- 
-#include "util_optimization.h"
- 
-#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE41
 
 /* SSE optimization disabled for now on 32 bit, see bug #36316 */
 #if !(defined(__GNUC__) && (defined(i386) || defined(_M_IX86)))
@@ -30,6 +26,10 @@
 #define __KERNEL_SSE41__
 #endif
 
+#include "util_optimization.h"
+
+#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE41
+
 #include "kernel.h"
 #include "kernel_compat_cpu.h"
 #include "kernel_math.h"
diff --git a/intern/cycles/util/util_optimization.h b/intern/cycles/util/util_optimization.h
index 61a2ad0..b7a2506 100644
--- a/intern/cycles/util/util_optimization.h
+++ b/intern/cycles/util/util_optimization.h
@@ -14,8 +14,31 @@
  * limitations under the License
  */
 
+#ifndef __UTIL_OPTIMIZATION_H__
+#define __UTIL_OPTIMIZATION_H__
+
+#ifndef __KERNEL_GPU__
+
+/* x86
+ *
+ * Compile a regular, SSE2 and SSE3 kernel. */
+
+#if defined(i386) || defined(_M_IX86)
+
+#define WITH_CYCLES_OPTIMIZED_KERNEL_SSE2
+#define WITH_CYCLES_OPTIMIZED_KERNEL_SSE3
+
+#endif
+
+/* x86-64
+ *
+ * Compile a regular (includes SSE2), SSE3 and SSE 4.1 kernel. */
+
 #if defined(__x86_64__) || defined(_M_X64)
 
+/* SSE2 is always available on x86-64 CPUs, so auto enable */
+#define __KERNEL_SSE2__
+
 /* no SSE2 kernel on x86-64, part of regular kernel */
 #define WITH_CYCLES_OPTIMIZED_KERNEL_SSE3
 #define WITH_CYCLES_OPTIMIZED_KERNEL_SSE41
@@ -27,9 +50,60 @@
 
 #endif
 
-#if defined(i386) || defined(_M_IX86)
+/* SSE Experiment
+ *
+ * This is disabled code for an experiment to use SSE types globally for types
+ * such as float3 and float4. Currently this gives an overall slowdown. */
 
-#define WITH_CYCLES_OPTIMIZED_KERNEL_SSE2
-#define WITH_CYCLES_OPTIMIZED_KERNEL_SSE3
+#if 0
+#define __KERNEL_SSE__
+#ifndef __KERNEL_SSE2__
+#define __KERNEL_SSE2__
+#endif
+#ifndef __KERNEL_SSE3__
+#define __KERNEL_SSE3__
+#endif
+#ifndef __KERNEL_SSSE3__
+#define __KERNEL_SSSE3__
+#endif
+#ifndef __KERNEL_SSE4__
+#define __KERNEL_SSE4__
+#endif
+#endif
+
+/* SSE Intrinsics includes
+ *
+ * We assume __KERNEL_SSEX__ flags to have been defined at this point */
+
+/* SSE intrinsics headers */
+#ifndef FREE_WINDOWS64
+
+#ifdef __KERNEL_SSE2__
+#include <xmmintrin.h> /* SSE 1 */
+#include <emmintrin.h> /* SSE 2 */
+#endif
+
+#ifdef __KERNEL_SSE3__
+#include <pmmintrin.h> /* SSE 3 */
+#endif
 
+#ifdef __KERNEL_SSSE3__
+#include <tmmintrin.h> /* SSSE 3 */
 #endif
+
+#ifdef __KERNEL_SSE41__
+#include <smmintrin.h> /* SSE 4.1 */
+#endif
+
+#else
+
+/* MinGW64 has conflicting declarations for these SSE headers in <windows.h>.
+ * Since we can't avoid including <windows.h>, better only include that */
+#include <windows.h>
+
+#endif
+
+#endif
+
+#endif /* __UTIL_OPTIMIZATION_H__ */
+
diff --git a/intern/cycles/util/util_types.h b/intern/cycles/util/util_types.h
index 2ee2f0f..ebfd8b6 100644
--- a/intern/cycles/util/util_types.h
+++ b/intern/cycles/util/util_types.h
@@ -57,67 +57,19 @@
 
 #endif
 
-/* SIMD Types */
+/* Standard Integer Types */
 
 #ifndef __KERNEL_GPU__
 
-#define __KERNEL_SSE2__
-
-/* not enabled, globally applying it gives slowdown, only for testing. */
-#if 0
-#define __KERNEL_SSE__
-#ifndef __KERNEL_SSE2__
-#define __KERNEL_SSE2__
-#endif
-#ifndef __KERNEL_SSE3__
-#define __KERNEL_SSE3__
-#endif
-#ifndef __KERNEL_SSSE3__
-#define __KERNEL_SSSE3__
-#endif
-#ifndef __KERNEL_SSE4__
-#define __KERNEL_SSE4__
-#endif
-#endif
-
-/* SSE2 is always available on x86_64 CPUs, so auto enable */
-#if defined(__x86_64__) && !defined(__KERNEL_SSE2__)
-#define __KERNEL_SSE2__
-#endif
-
-/* SSE intrinsics headers */
-#ifndef FREE_WINDOWS64
-
-#ifdef __KERNEL_SSE2__
-#include <xmmintrin.h> /* SSE 1 */
-#include <emmintrin.h> /* SSE 2 */
-#endif
-
-#ifdef __KERNEL_SSE3__
-#include <pmmintrin.h> /* SSE 3 */
-#endif
-
-#ifdef __KERNEL_SSSE3__
-#include <tmmintrin.h> /* SSSE 3 */
-#endif
-
-#ifdef __KERNEL_SSE41__
-#include <smmintrin.h> /* SSE 4.1 */
-#endif
-
-#else
-
-/* MinGW64 has conflicting declarations for these SSE headers in <windows.h>.
- * Since we can't avoid including <windows.h>, better only include that */
-#include <windows.h>
-
-#endif
-
 /* int8_t, uint16_t, and friends */
 #ifndef _WIN32
 #include <stdint.h>
 #endif
 
+/* SIMD Types */
+
+#include "util_optimization.h"
+
 #endif
 
 CCL_NAMESPACE_BEGIN




More information about the Bf-blender-cvs mailing list