[Bf-blender-cvs] [a84a8a528da] master: Cycles: remove SSE3 and AVX kernel optimization levels

Brecht Van Lommel noreply at git.blender.org
Mon Jan 16 17:54:56 CET 2023


Commit: a84a8a528da89677dc16551dbf545eb1891c4c40
Author: Brecht Van Lommel
Date:   Wed Jan 11 16:16:21 2023 +0100
Branches: master
https://developer.blender.org/rBa84a8a528da89677dc16551dbf545eb1891c4c40

Cycles: remove SSE3 and AVX kernel optimization levels

While keeping SSE2, SSE4.1 and AVX2. This does not affect hardware support, it
only slightly reduces performance for some older CPUs.

To reduce maintenance cost and improve compile times.

Differential Revision: https://developer.blender.org/D16978

===================================================================

M	intern/cycles/CMakeLists.txt
M	intern/cycles/blender/addon/properties.py
M	intern/cycles/blender/addon/ui.py
M	intern/cycles/blender/python.cpp
M	intern/cycles/device/cpu/device.cpp
M	intern/cycles/device/cpu/kernel.cpp
M	intern/cycles/device/cpu/kernel_function.h
M	intern/cycles/kernel/CMakeLists.txt
M	intern/cycles/kernel/device/cpu/kernel.h
D	intern/cycles/kernel/device/cpu/kernel_avx.cpp
D	intern/cycles/kernel/device/cpu/kernel_sse3.cpp
M	intern/cycles/test/CMakeLists.txt
M	intern/cycles/util/debug.cpp
M	intern/cycles/util/debug.h
M	intern/cycles/util/optimization.h
M	intern/cycles/util/system.h

===================================================================

diff --git a/intern/cycles/CMakeLists.txt b/intern/cycles/CMakeLists.txt
index 53e87fc5c3a..366d38cc94c 100644
--- a/intern/cycles/CMakeLists.txt
+++ b/intern/cycles/CMakeLists.txt
@@ -85,15 +85,11 @@ elseif(WIN32 AND MSVC AND NOT CMAKE_CXX_COMPILER_ID MATCHES "Clang")
   # there is no /arch:SSE3, but intrinsics are available anyway
   if(CMAKE_CL_64)
     set(CYCLES_SSE2_KERNEL_FLAGS "${CYCLES_KERNEL_FLAGS}")
-    set(CYCLES_SSE3_KERNEL_FLAGS "${CYCLES_KERNEL_FLAGS}")
     set(CYCLES_SSE41_KERNEL_FLAGS "${CYCLES_KERNEL_FLAGS}")
-    set(CYCLES_AVX_KERNEL_FLAGS "${CYCLES_AVX_ARCH_FLAGS} ${CYCLES_KERNEL_FLAGS}")
     set(CYCLES_AVX2_KERNEL_FLAGS "${CYCLES_AVX2_ARCH_FLAGS} ${CYCLES_KERNEL_FLAGS}")
   else()
     set(CYCLES_SSE2_KERNEL_FLAGS "/arch:SSE2 ${CYCLES_KERNEL_FLAGS}")
-    set(CYCLES_SSE3_KERNEL_FLAGS "/arch:SSE2 ${CYCLES_KERNEL_FLAGS}")
     set(CYCLES_SSE41_KERNEL_FLAGS "/arch:SSE2 ${CYCLES_KERNEL_FLAGS}")
-    set(CYCLES_AVX_KERNEL_FLAGS "${CYCLES_AVX_ARCH_FLAGS} ${CYCLES_KERNEL_FLAGS}")
     set(CYCLES_AVX2_KERNEL_FLAGS "${CYCLES_AVX2_ARCH_FLAGS} ${CYCLES_KERNEL_FLAGS}")
   endif()
 
@@ -126,11 +122,7 @@ elseif(CMAKE_COMPILER_IS_GNUCC OR (CMAKE_CXX_COMPILER_ID MATCHES "Clang"))
     endif()
 
     set(CYCLES_SSE2_KERNEL_FLAGS "${CYCLES_KERNEL_FLAGS} -msse -msse2")
-    set(CYCLES_SSE3_KERNEL_FLAGS "${CYCLES_SSE2_KERNEL_FLAGS} -msse3 -mssse3")
-    set(CYCLES_SSE41_KERNEL_FLAGS "${CYCLES_SSE3_KERNEL_FLAGS} -msse4.1")
-    if(CXX_HAS_AVX)
-      set(CYCLES_AVX_KERNEL_FLAGS "${CYCLES_SSE41_KERNEL_FLAGS} -mavx")
-    endif()
+    set(CYCLES_SSE41_KERNEL_FLAGS "${CYCLES_SSE2_KERNEL_FLAGS} -msse3 -mssse3 -msse4.1")
     if(CXX_HAS_AVX2)
       set(CYCLES_AVX2_KERNEL_FLAGS "${CYCLES_SSE41_KERNEL_FLAGS} -mavx -mavx2 -mfma -mlzcnt -mbmi -mbmi2 -mf16c")
     endif()
@@ -144,13 +136,8 @@ elseif(WIN32 AND CMAKE_CXX_COMPILER_ID MATCHES "Intel")
 
   if(CXX_HAS_SSE)
     set(CYCLES_SSE2_KERNEL_FLAGS "/QxSSE2")
-    set(CYCLES_SSE3_KERNEL_FLAGS "/QxSSSE3")
     set(CYCLES_SSE41_KERNEL_FLAGS "/QxSSE4.1")
 
-    if(CXX_HAS_AVX)
-      set(CYCLES_AVX_KERNEL_FLAGS "/arch:AVX")
-    endif()
-
     if(CXX_HAS_AVX2)
       set(CYCLES_AVX2_KERNEL_FLAGS "/QxCORE-AVX2")
     endif()
@@ -174,13 +161,8 @@ elseif(CMAKE_CXX_COMPILER_ID MATCHES "Intel")
       set(CYCLES_SSE2_KERNEL_FLAGS "-xsse2")
     endif()
 
-    set(CYCLES_SSE3_KERNEL_FLAGS "-xssse3")
     set(CYCLES_SSE41_KERNEL_FLAGS "-xsse4.1")
 
-    if(CXX_HAS_AVX)
-      set(CYCLES_AVX_KERNEL_FLAGS "-xavx")
-    endif()
-
     if(CXX_HAS_AVX2)
       set(CYCLES_AVX2_KERNEL_FLAGS "-xcore-avx2")
     endif()
@@ -190,15 +172,10 @@ endif()
 if(CXX_HAS_SSE)
   add_definitions(
     -DWITH_KERNEL_SSE2
-    -DWITH_KERNEL_SSE3
     -DWITH_KERNEL_SSE41
   )
 endif()
 
-if(CXX_HAS_AVX)
-  add_definitions(-DWITH_KERNEL_AVX)
-endif()
-
 if(CXX_HAS_AVX2)
   add_definitions(-DWITH_KERNEL_AVX2)
 endif()
diff --git a/intern/cycles/blender/addon/properties.py b/intern/cycles/blender/addon/properties.py
index 9ec663eb258..9c1cb0a1b4a 100644
--- a/intern/cycles/blender/addon/properties.py
+++ b/intern/cycles/blender/addon/properties.py
@@ -951,9 +951,7 @@ class CyclesRenderSettings(bpy.types.PropertyGroup):
         return _cycles.debug_flags_update(scene)
 
     debug_use_cpu_avx2: BoolProperty(name="AVX2", default=True)
-    debug_use_cpu_avx: BoolProperty(name="AVX", default=True)
     debug_use_cpu_sse41: BoolProperty(name="SSE41", default=True)
-    debug_use_cpu_sse3: BoolProperty(name="SSE3", default=True)
     debug_use_cpu_sse2: BoolProperty(name="SSE2", default=True)
     debug_bvh_layout: EnumProperty(
         name="BVH Layout",
diff --git a/intern/cycles/blender/addon/ui.py b/intern/cycles/blender/addon/ui.py
index 102e014297f..81f940529d1 100644
--- a/intern/cycles/blender/addon/ui.py
+++ b/intern/cycles/blender/addon/ui.py
@@ -2112,9 +2112,7 @@ class CYCLES_RENDER_PT_debug(CyclesDebugButtonsPanel, Panel):
 
         row = col.row(align=True)
         row.prop(cscene, "debug_use_cpu_sse2", toggle=True)
-        row.prop(cscene, "debug_use_cpu_sse3", toggle=True)
         row.prop(cscene, "debug_use_cpu_sse41", toggle=True)
-        row.prop(cscene, "debug_use_cpu_avx", toggle=True)
         row.prop(cscene, "debug_use_cpu_avx2", toggle=True)
         col.prop(cscene, "debug_bvh_layout", text="BVH")
 
diff --git a/intern/cycles/blender/python.cpp b/intern/cycles/blender/python.cpp
index 96cb204be4b..ebbdc8abf7f 100644
--- a/intern/cycles/blender/python.cpp
+++ b/intern/cycles/blender/python.cpp
@@ -63,9 +63,7 @@ static void debug_flags_sync_from_scene(BL::Scene b_scene)
   PointerRNA cscene = RNA_pointer_get(&b_scene.ptr, "cycles");
   /* Synchronize CPU flags. */
   flags.cpu.avx2 = get_boolean(cscene, "debug_use_cpu_avx2");
-  flags.cpu.avx = get_boolean(cscene, "debug_use_cpu_avx");
   flags.cpu.sse41 = get_boolean(cscene, "debug_use_cpu_sse41");
-  flags.cpu.sse3 = get_boolean(cscene, "debug_use_cpu_sse3");
   flags.cpu.sse2 = get_boolean(cscene, "debug_use_cpu_sse2");
   flags.cpu.bvh_layout = (BVHLayout)get_enum(cscene, "debug_bvh_layout");
   /* Synchronize CUDA flags. */
diff --git a/intern/cycles/device/cpu/device.cpp b/intern/cycles/device/cpu/device.cpp
index 9b249063aec..580f70b25d7 100644
--- a/intern/cycles/device/cpu/device.cpp
+++ b/intern/cycles/device/cpu/device.cpp
@@ -45,9 +45,7 @@ string device_cpu_capabilities()
 {
   string capabilities = "";
   capabilities += system_cpu_support_sse2() ? "SSE2 " : "";
-  capabilities += system_cpu_support_sse3() ? "SSE3 " : "";
   capabilities += system_cpu_support_sse41() ? "SSE41 " : "";
-  capabilities += system_cpu_support_avx() ? "AVX " : "";
   capabilities += system_cpu_support_avx2() ? "AVX2" : "";
   if (capabilities[capabilities.size() - 1] == ' ')
     capabilities.resize(capabilities.size() - 1);
diff --git a/intern/cycles/device/cpu/kernel.cpp b/intern/cycles/device/cpu/kernel.cpp
index 3e078129bca..4ca68e875a3 100644
--- a/intern/cycles/device/cpu/kernel.cpp
+++ b/intern/cycles/device/cpu/kernel.cpp
@@ -9,8 +9,7 @@ CCL_NAMESPACE_BEGIN
 
 #define KERNEL_FUNCTIONS(name) \
   KERNEL_NAME_EVAL(cpu, name), KERNEL_NAME_EVAL(cpu_sse2, name), \
-      KERNEL_NAME_EVAL(cpu_sse3, name), KERNEL_NAME_EVAL(cpu_sse41, name), \
-      KERNEL_NAME_EVAL(cpu_avx, name), KERNEL_NAME_EVAL(cpu_avx2, name)
+      KERNEL_NAME_EVAL(cpu_sse41, name), KERNEL_NAME_EVAL(cpu_avx2, name)
 
 #define REGISTER_KERNEL(name) name(KERNEL_FUNCTIONS(name))
 #define REGISTER_KERNEL_FILM_CONVERT(name) \
diff --git a/intern/cycles/device/cpu/kernel_function.h b/intern/cycles/device/cpu/kernel_function.h
index 6171f582518..4875f66f8a8 100644
--- a/intern/cycles/device/cpu/kernel_function.h
+++ b/intern/cycles/device/cpu/kernel_function.h
@@ -17,13 +17,10 @@ template<typename FunctionType> class CPUKernelFunction {
  public:
   CPUKernelFunction(FunctionType kernel_default,
                     FunctionType kernel_sse2,
-                    FunctionType kernel_sse3,
                     FunctionType kernel_sse41,
-                    FunctionType kernel_avx,
                     FunctionType kernel_avx2)
   {
-    kernel_info_ = get_best_kernel_info(
-        kernel_default, kernel_sse2, kernel_sse3, kernel_sse41, kernel_avx, kernel_avx2);
+    kernel_info_ = get_best_kernel_info(kernel_default, kernel_sse2, kernel_sse41, kernel_avx2);
   }
 
   template<typename... Args> inline auto operator()(Args... args) const
@@ -60,16 +57,12 @@ template<typename FunctionType> class CPUKernelFunction {
 
   KernelInfo get_best_kernel_info(FunctionType kernel_default,
                                   FunctionType kernel_sse2,
-                                  FunctionType kernel_sse3,
                                   FunctionType kernel_sse41,
-                                  FunctionType kernel_avx,
                                   FunctionType kernel_avx2)
   {
     /* Silence warnings about unused variables when compiling without some architectures. */
     (void)kernel_sse2;
-    (void)kernel_sse3;
     (void)kernel_sse41;
-    (void)kernel_avx;
     (void)kernel_avx2;
 
 #ifdef WITH_CYCLES_OPTIMIZED_KERNEL_AVX2
@@ -78,24 +71,12 @@ template<typename FunctionType> class CPUKernelFunction {
     }
 #endif
 
-#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_AVX
-    if (DebugFlags().cpu.has_avx() && system_cpu_support_avx()) {
-      return KernelInfo("AVX", kernel_avx);
-    }
-#endif
-
 #ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE41
     if (DebugFlags().cpu.has_sse41() && system_cpu_support_sse41()) {
       return KernelInfo("SSE4.1", kernel_sse41);
     }
 #endif
 
-#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE3
-    if (DebugFlags().cpu.has_sse3() && system_cpu_support_sse3()) {
-      return KernelInfo("SSE3", kernel_sse3);
-    }
-#endif
-
 #ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE2
     if (DebugFlags().cpu.has_sse2() && system_cpu_support_sse2()) {
       return KernelInfo("SSE2", kernel_sse2);
diff --git a/intern/cycles/kernel/CMakeLists.txt b/intern/cycles/kernel/CMakeLists.txt
index 5ba1b683d6b..3ae468efd1f 100644
--- a/intern/cycles/kernel/CMakeLists.txt
+++ b/intern/cycles/kernel/CMakeLists.txt
@@ -14,9 +14,7 @@ set(INC_SYS
 set(SRC_KERNEL_DEVICE_CPU
   device/cpu/kernel.cpp
   device/cpu/kernel_sse2.cpp
-  device/cpu/kernel_sse3.cpp
   device/cpu/kernel_sse41.cpp
-  device/cpu/kernel_avx.cpp
   device/cpu/kernel_avx2.cpp
 )
 
@@ -940,14 +938,9 @@ set_source_files_properties(device/cpu/kernel.cpp PROPERTIES COMPILE_FLAGS "${CY
 
 if(CXX_HAS_SSE)
   set_source_files_properties(device/cpu/kernel_sse2.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_SSE2_KERNEL_FLAGS}")
-  set_source_files_properties(device/cpu/kernel_sse3.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_SSE3_KERNEL_FLAGS}")
   set_source_files_properties(device/cpu/kernel_sse41.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_SSE41_KERNEL_FLAGS}")
 endif()
 
-if(CXX_HAS_AVX)
-  set_source_files_properties(device/cpu/kernel_avx.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_AVX_KERNEL_FLAGS}")
-endif()
-
 if(CXX_HAS_AVX2)
   set_source_files_properties(device/cpu/kernel_avx2.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_AVX2_KERNEL_FLAGS}")
 endif()
diff --git a/intern/cycles/kernel/device/cpu/kernel.h b/intern/cycles/kernel/device/cpu/kernel.h
index 647b405140a..e43d7375e

@@ Diff output truncated at 10240 characters. @@



More information about the Bf-blender-cvs mailing list