[Bf-blender-cvs] [e48ecea0237] tmp-macs-arm-cycles: macOS: use sse2neon to emulate SSE instructions with Arm Neon

Brecht Van Lommel noreply at git.blender.org
Mon Feb 15 20:03:59 CET 2021


Commit: e48ecea0237a64f3f030c99e1a7b06056f7795b7
Author: Brecht Van Lommel
Date:   Sun Feb 14 04:16:39 2021 +0100
Branches: tmp-macs-arm-cycles
https://developer.blender.org/rBe48ecea0237a64f3f030c99e1a7b06056f7795b7

macOS: use sse2neon to emulate SSE instructions with Arm Neon

* WITH_CPU_SSE was renamed to WITH_CPU_SIMD, and now covers both SSE and Neon.
* For macOS sse2neon.h is included as part of the precompiled libraries.
* Adding Linux support should be possible too, but the best way to handle this
  library without official releases or availability in distributions is to be
  decided still.

Ref T78710

===================================================================

M	CMakeLists.txt
M	build_files/cmake/macros.cmake
M	build_files/cmake/platform/platform_apple.cmake
M	build_files/cmake/platform/platform_unix.cmake
M	intern/cycles/CMakeLists.txt
M	source/blender/blenlib/BLI_simd.h

===================================================================

diff --git a/CMakeLists.txt b/CMakeLists.txt
index b6fb6dbd9dc..5e3578d6632 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -370,8 +370,8 @@ if(WITH_PYTHON_INSTALL)
   endif()
 endif()
 
-option(WITH_CPU_SSE              "Enable SIMD instruction if they're detected on the host machine" ON)
-mark_as_advanced(WITH_CPU_SSE)
+option(WITH_CPU_SIMD              "Enable SIMD instruction if they're detected on the host machine" ON)
+mark_as_advanced(WITH_CPU_SIMD)
 
 # Cycles
 option(WITH_CYCLES                  "Enable Cycles Render Engine" ON)
@@ -775,14 +775,6 @@ if(WITH_GHOST_SDL OR WITH_HEADLESS)
   set(WITH_XR_OPENXR     OFF)
 endif()
 
-if(WITH_CPU_SSE)
-  TEST_SSE_SUPPORT(COMPILER_SSE_FLAG COMPILER_SSE2_FLAG)
-else()
-  message(STATUS "SSE and SSE2 optimizations are DISABLED!")
-  set(COMPILER_SSE_FLAG)
-  set(COMPILER_SSE2_FLAG)
-endif()
-
 if(WITH_BUILDINFO)
   find_package(Git)
   if(NOT GIT_FOUND)
@@ -962,23 +954,34 @@ if(WITH_INTERNATIONAL)
   endif()
 endif()
 
-# See TEST_SSE_SUPPORT() for how this is defined.
+# See TEST_SSE_SUPPORT() and TEST_NEON_SUPPORT() for how these are defined.
+#
+# This is done globally, so that all modules can use it if available, and
+# because these are used in headers used by many modules.
+if(WITH_CPU_SIMD)
+  set(COMPILER_SSE_FLAG)
+  set(COMPILER_SSE2_FLAG)
 
-# Do it globally, SSE2 is required for quite some time now.
-# Doing it now allows to use SSE/SSE2 in inline headers.
-if(SUPPORT_SSE_BUILD)
-  string(PREPEND PLATFORM_CFLAGS "${COMPILER_SSE_FLAG} ")
-  add_definitions(-D__SSE__ -D__MMX__)
-endif()
-if(SUPPORT_SSE2_BUILD)
-  string(APPEND PLATFORM_CFLAGS " ${COMPILER_SSE2_FLAG}")
-  add_definitions(-D__SSE2__)
-  if(NOT SUPPORT_SSE_BUILD) # don't double up
-    add_definitions(-D__MMX__)
+  TEST_NEON_SUPPORT()
+  if(SUPPORT_NEON_BUILD)
+    blender_include_dirs_sys("${SSE2NEON_INCLUDE_DIRS}")
+    add_definitions(-DWITH_SSE2NEON)
+  else()
+    TEST_SSE_SUPPORT(COMPILER_SSE_FLAG COMPILER_SSE2_FLAG)
+    if(SUPPORT_SSE_BUILD)
+      string(PREPEND PLATFORM_CFLAGS "${COMPILER_SSE_FLAG} ")
+      add_definitions(-D__SSE__ -D__MMX__)
+    endif()
+    if(SUPPORT_SSE2_BUILD)
+      string(APPEND PLATFORM_CFLAGS " ${COMPILER_SSE2_FLAG}")
+      add_definitions(-D__SSE2__)
+      if(NOT SUPPORT_SSE_BUILD) # don't double up
+        add_definitions(-D__MMX__)
+      endif()
+    endif()
   endif()
 endif()
 
-
 # set the endian define
 if(MSVC)
   # for some reason this fails on msvc
diff --git a/build_files/cmake/macros.cmake b/build_files/cmake/macros.cmake
index aebcd25e3b6..bbbac659d1a 100644
--- a/build_files/cmake/macros.cmake
+++ b/build_files/cmake/macros.cmake
@@ -670,9 +670,9 @@ macro(TEST_SSE_SUPPORT
     SUPPORT_SSE_BUILD)
 
     if(SUPPORT_SSE_BUILD)
-      message(STATUS "SSE Support: detected.")
+      message(STATUS "SSE Instructions: detected.")
     else()
-      message(STATUS "SSE Support: missing.")
+      message(STATUS "SSE Instructions: not supported.")
     endif()
   endif()
 
@@ -684,15 +684,29 @@ macro(TEST_SSE_SUPPORT
     SUPPORT_SSE2_BUILD)
 
     if(SUPPORT_SSE2_BUILD)
-      message(STATUS "SSE2 Support: detected.")
+      message(STATUS "SSE2 Instructions: detected.")
     else()
-      message(STATUS "SSE2 Support: missing.")
+      message(STATUS "SSE2 Instructions: not supported.")
     endif()
   endif()
 
   unset(CMAKE_REQUIRED_FLAGS)
 endmacro()
 
+macro(TEST_NEON_SUPPORT)
+include(CheckCXXSourceCompiles)
+check_cxx_source_compiles(
+	"#include <arm_neon.h>
+   int main() {return vaddvq_s32(vdupq_n_s32(1));}"
+  SUPPORT_NEON_BUILD)
+
+  if(SUPPORT_NEON_BUILD)
+    message(STATUS "Neon Instructions: detected.")
+  else()
+    message(STATUS "Neon Instructions: not supported.")
+  endif()
+endmacro()
+
 # Only print message if running CMake first time
 macro(message_first_run)
   if(FIRST_RUN)
diff --git a/build_files/cmake/platform/platform_apple.cmake b/build_files/cmake/platform/platform_apple.cmake
index 5203ba10863..e7b0097a137 100644
--- a/build_files/cmake/platform/platform_apple.cmake
+++ b/build_files/cmake/platform/platform_apple.cmake
@@ -321,8 +321,11 @@ if(WITH_OPENVDB)
 endif()
 
 if(WITH_NANOVDB)
-  set(NANOVDB ${LIBDIR}/nanovdb)
-  set(NANOVDB_INCLUDE_DIR ${NANOVDB}/include)
+  find_package(NanoVDB)
+endif()
+
+if(WITH_CPU_SIMD)
+  find_package(sse2neon)
 endif()
 
 if(WITH_LLVM)
diff --git a/build_files/cmake/platform/platform_unix.cmake b/build_files/cmake/platform/platform_unix.cmake
index f212741f0b6..5d3f074bdda 100644
--- a/build_files/cmake/platform/platform_unix.cmake
+++ b/build_files/cmake/platform/platform_unix.cmake
@@ -284,6 +284,10 @@ if(WITH_NANOVDB)
   endif()
 endif()
 
+if(WITH_CPU_SIMD)
+  find_package_wrapper(sse2neon)
+endif()
+
 if(WITH_ALEMBIC)
   find_package_wrapper(Alembic)
 
diff --git a/intern/cycles/CMakeLists.txt b/intern/cycles/CMakeLists.txt
index 2a28d905144..b01bf1bd1e2 100644
--- a/intern/cycles/CMakeLists.txt
+++ b/intern/cycles/CMakeLists.txt
@@ -64,7 +64,7 @@ if(WITH_CYCLES_NATIVE_ONLY)
     endif()
     set(CYCLES_KERNEL_FLAGS "${MSVC_NATIVE_ARCH_FLAGS}")
   endif()
-elseif(NOT WITH_CPU_SSE)
+elseif(NOT WITH_CPU_SIMD OR (SUPPORT_NEON_BUILD AND SSE2NEON_FOUND))
   set(CXX_HAS_SSE FALSE)
   set(CXX_HAS_AVX FALSE)
   set(CXX_HAS_AVX2 FALSE)
diff --git a/source/blender/blenlib/BLI_simd.h b/source/blender/blenlib/BLI_simd.h
index 1518b6c1de2..2ebbd7a2250 100644
--- a/source/blender/blenlib/BLI_simd.h
+++ b/source/blender/blenlib/BLI_simd.h
@@ -22,7 +22,15 @@
  * SIMD instruction support.
  */
 
-#if defined(__SSE2__)
+#if defined(__ARM_NEON) && defined(WITH_SSE2NEON)
+/* SSE/SSE2 emulation on ARM Neon. Match SSE precision. */
+#  define SSE2NEON_PRECISE_MINMAX 1
+#  define SSE2NEON_PRECISE_DIV 1
+#  define SSE2NEON_PRECISE_SQRT 1
+#  include <sse2neon.h>
+#  define BLI_HAVE_SSE2
+#elif defined(__SSE2__)
+/* Native SSE2 on Intel/AMD. */
 #  include <emmintrin.h>
 #  define BLI_HAVE_SSE2
 #endif



More information about the Bf-blender-cvs mailing list