[Bf-blender-cvs] [db28411fd90] master: BLI: use sse2neon to emulate SSE instructions with Arm Neon

Brecht Van Lommel noreply at git.blender.org
Wed Feb 17 16:26:39 CET 2021


Commit: db28411fd90b77035dddc1682bb2786da34f73e9
Author: Brecht Van Lommel
Date:   Sun Feb 14 04:16:39 2021 +0100
Branches: master
https://developer.blender.org/rBdb28411fd90b77035dddc1682bb2786da34f73e9

BLI: use sse2neon to emulate SSE instructions with Arm Neon

* WITH_CPU_SSE was renamed to WITH_CPU_SIMD, and now covers both SSE and Neon.
* For macOS sse2neon.h is included as part of the precompiled libraries.
* For Linux it is enabled if the sse2neon.h header file is detected. However
  this library does not have official releases and is not shipped with any Linux
  distribution, so manual installation and configuration is required to get this
  working.

Ref D8237, T78710

===================================================================

M	CMakeLists.txt
M	build_files/cmake/macros.cmake
M	build_files/cmake/platform/platform_apple.cmake
M	build_files/cmake/platform/platform_unix.cmake
M	intern/cycles/CMakeLists.txt
M	source/blender/blenlib/BLI_simd.h

===================================================================

diff --git a/CMakeLists.txt b/CMakeLists.txt
index b6fb6dbd9dc..c95b8f0f7af 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -370,8 +370,8 @@ if(WITH_PYTHON_INSTALL)
   endif()
 endif()
 
-option(WITH_CPU_SSE              "Enable SIMD instruction if they're detected on the host machine" ON)
-mark_as_advanced(WITH_CPU_SSE)
+option(WITH_CPU_SIMD              "Enable SIMD instruction if they're detected on the host machine" ON)
+mark_as_advanced(WITH_CPU_SIMD)
 
 # Cycles
 option(WITH_CYCLES                  "Enable Cycles Render Engine" ON)
@@ -775,14 +775,6 @@ if(WITH_GHOST_SDL OR WITH_HEADLESS)
   set(WITH_XR_OPENXR     OFF)
 endif()
 
-if(WITH_CPU_SSE)
-  TEST_SSE_SUPPORT(COMPILER_SSE_FLAG COMPILER_SSE2_FLAG)
-else()
-  message(STATUS "SSE and SSE2 optimizations are DISABLED!")
-  set(COMPILER_SSE_FLAG)
-  set(COMPILER_SSE2_FLAG)
-endif()
-
 if(WITH_BUILDINFO)
   find_package(Git)
   if(NOT GIT_FOUND)
@@ -962,22 +954,55 @@ if(WITH_INTERNATIONAL)
   endif()
 endif()
 
-# See TEST_SSE_SUPPORT() for how this is defined.
+# See TEST_SSE_SUPPORT() and TEST_NEON_SUPPORT() for how these are defined.
+#
+# This is done globally, so that all modules can use it if available, and
+# because these are used in headers used by many modules.
+if(WITH_CPU_SIMD)
+  set(COMPILER_SSE_FLAG)
+  set(COMPILER_SSE2_FLAG)
 
-# Do it globally, SSE2 is required for quite some time now.
-# Doing it now allows to use SSE/SSE2 in inline headers.
-if(SUPPORT_SSE_BUILD)
-  string(PREPEND PLATFORM_CFLAGS "${COMPILER_SSE_FLAG} ")
-  add_definitions(-D__SSE__ -D__MMX__)
-endif()
-if(SUPPORT_SSE2_BUILD)
-  string(APPEND PLATFORM_CFLAGS " ${COMPILER_SSE2_FLAG}")
-  add_definitions(-D__SSE2__)
-  if(NOT SUPPORT_SSE_BUILD) # don't double up
-    add_definitions(-D__MMX__)
+  # Test Neon first since macOS Arm can compile and run x86-64 SSE binaries.
+  TEST_NEON_SUPPORT()
+  if(SUPPORT_NEON_BUILD)
+    # Neon
+    if(SSE2NEON_FOUND)
+      blender_include_dirs_sys("${SSE2NEON_INCLUDE_DIRS}")
+      add_definitions(-DWITH_SSE2NEON)
+    endif()
+  else()
+    # SSE
+    TEST_SSE_SUPPORT(COMPILER_SSE_FLAG COMPILER_SSE2_FLAG)
+    if(SUPPORT_SSE_BUILD)
+      string(PREPEND PLATFORM_CFLAGS "${COMPILER_SSE_FLAG} ")
+      add_definitions(-D__SSE__ -D__MMX__)
+    endif()
+    if(SUPPORT_SSE2_BUILD)
+      string(APPEND PLATFORM_CFLAGS " ${COMPILER_SSE2_FLAG}")
+      add_definitions(-D__SSE2__)
+      if(NOT SUPPORT_SSE_BUILD) # don't double up
+        add_definitions(-D__MMX__)
+      endif()
+    endif()
   endif()
-endif()
 
+  # Print instructions used
+  if(SUPPORT_NEON_BUILD)
+    if(SSE2NEON_FOUND)
+      message(STATUS "Neon SIMD instructions enabled")
+    else()
+      message(STATUS "Neon SIMD instructions detected but unused, requires sse2neon")
+    endif()
+  elseif(SUPPORT_SSE2_BUILD)
+    message(STATUS "SSE2 SIMD instructions enabled")
+  elseif(SUPPORT_SSE_BUILD)
+    message(STATUS "SSE SIMD instructions enabled")
+  else()
+    message(STATUS "No SIMD instructions detected")
+  endif()
+else()
+  message(STATUS "SIMD instructions disabled")
+endif()
 
 # set the endian define
 if(MSVC)
diff --git a/build_files/cmake/macros.cmake b/build_files/cmake/macros.cmake
index aebcd25e3b6..b8f92a10761 100644
--- a/build_files/cmake/macros.cmake
+++ b/build_files/cmake/macros.cmake
@@ -668,12 +668,6 @@ macro(TEST_SSE_SUPPORT
       #include <xmmintrin.h>
       int main(void) { __m128 v = _mm_setzero_ps(); return 0; }"
     SUPPORT_SSE_BUILD)
-
-    if(SUPPORT_SSE_BUILD)
-      message(STATUS "SSE Support: detected.")
-    else()
-      message(STATUS "SSE Support: missing.")
-    endif()
   endif()
 
   if(NOT DEFINED SUPPORT_SSE2_BUILD)
@@ -682,17 +676,19 @@ macro(TEST_SSE_SUPPORT
       #include <emmintrin.h>
       int main(void) { __m128d v = _mm_setzero_pd(); return 0; }"
     SUPPORT_SSE2_BUILD)
-
-    if(SUPPORT_SSE2_BUILD)
-      message(STATUS "SSE2 Support: detected.")
-    else()
-      message(STATUS "SSE2 Support: missing.")
-    endif()
   endif()
 
   unset(CMAKE_REQUIRED_FLAGS)
 endmacro()
 
+macro(TEST_NEON_SUPPORT)
+  include(CheckCXXSourceCompiles)
+  check_cxx_source_compiles(
+    "#include <arm_neon.h>
+     int main() {return vaddvq_s32(vdupq_n_s32(1));}"
+    SUPPORT_NEON_BUILD)
+endmacro()
+
 # Only print message if running CMake first time
 macro(message_first_run)
   if(FIRST_RUN)
diff --git a/build_files/cmake/platform/platform_apple.cmake b/build_files/cmake/platform/platform_apple.cmake
index 5203ba10863..e7b0097a137 100644
--- a/build_files/cmake/platform/platform_apple.cmake
+++ b/build_files/cmake/platform/platform_apple.cmake
@@ -321,8 +321,11 @@ if(WITH_OPENVDB)
 endif()
 
 if(WITH_NANOVDB)
-  set(NANOVDB ${LIBDIR}/nanovdb)
-  set(NANOVDB_INCLUDE_DIR ${NANOVDB}/include)
+  find_package(NanoVDB)
+endif()
+
+if(WITH_CPU_SIMD)
+  find_package(sse2neon)
 endif()
 
 if(WITH_LLVM)
diff --git a/build_files/cmake/platform/platform_unix.cmake b/build_files/cmake/platform/platform_unix.cmake
index f212741f0b6..5d3f074bdda 100644
--- a/build_files/cmake/platform/platform_unix.cmake
+++ b/build_files/cmake/platform/platform_unix.cmake
@@ -284,6 +284,10 @@ if(WITH_NANOVDB)
   endif()
 endif()
 
+if(WITH_CPU_SIMD)
+  find_package_wrapper(sse2neon)
+endif()
+
 if(WITH_ALEMBIC)
   find_package_wrapper(Alembic)
 
diff --git a/intern/cycles/CMakeLists.txt b/intern/cycles/CMakeLists.txt
index 2a28d905144..b01bf1bd1e2 100644
--- a/intern/cycles/CMakeLists.txt
+++ b/intern/cycles/CMakeLists.txt
@@ -64,7 +64,7 @@ if(WITH_CYCLES_NATIVE_ONLY)
     endif()
     set(CYCLES_KERNEL_FLAGS "${MSVC_NATIVE_ARCH_FLAGS}")
   endif()
-elseif(NOT WITH_CPU_SSE)
+elseif(NOT WITH_CPU_SIMD OR (SUPPORT_NEON_BUILD AND SSE2NEON_FOUND))
   set(CXX_HAS_SSE FALSE)
   set(CXX_HAS_AVX FALSE)
   set(CXX_HAS_AVX2 FALSE)
diff --git a/source/blender/blenlib/BLI_simd.h b/source/blender/blenlib/BLI_simd.h
index 1518b6c1de2..2ebbd7a2250 100644
--- a/source/blender/blenlib/BLI_simd.h
+++ b/source/blender/blenlib/BLI_simd.h
@@ -22,7 +22,15 @@
  * SIMD instruction support.
  */
 
-#if defined(__SSE2__)
+#if defined(__ARM_NEON) && defined(WITH_SSE2NEON)
+/* SSE/SSE2 emulation on ARM Neon. Match SSE precision. */
+#  define SSE2NEON_PRECISE_MINMAX 1
+#  define SSE2NEON_PRECISE_DIV 1
+#  define SSE2NEON_PRECISE_SQRT 1
+#  include <sse2neon.h>
+#  define BLI_HAVE_SSE2
+#elif defined(__SSE2__)
+/* Native SSE2 on Intel/AMD. */
 #  include <emmintrin.h>
 #  define BLI_HAVE_SSE2
 #endif



More information about the Bf-blender-cvs mailing list