[Bf-blender-cvs] [08aaa07adbd] blender-v2.83-release: Cycles: Use pre-compiled PTX kernel for older generation when no matching one is found

Patrick Mours noreply at git.blender.org
Mon Feb 8 16:50:13 CET 2021


Commit: 08aaa07adbd46e27f4226f29559be156f14a524b
Author: Patrick Mours
Date:   Fri Jul 17 15:06:55 2020 +0200
Branches: blender-v2.83-release
https://developer.blender.org/rB08aaa07adbd46e27f4226f29559be156f14a524b

Cycles: Use pre-compiled PTX kernel for older generation when no matching one is found

This patch changes the discovery of pre-compiled kernels, to look for any PTX, even if
it does not match the current architecture version exactly. It works because the driver can
JIT-compile PTX generated for architectures less than or equal to the current one.
This e.g. makes it possible to render on a new GPU architecture even if no pre-compiled
binary kernel was distributed for it as part of the Blender installation.

Reviewed By: brecht

Differential Revision: https://developer.blender.org/D8332

===================================================================

M	CMakeLists.txt
M	build_files/cmake/config/blender_release.cmake
M	intern/cycles/device/cuda/device_cuda_impl.cpp
M	intern/cycles/kernel/CMakeLists.txt

===================================================================

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 83f547eb593..6f705ffbe44 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -369,7 +369,7 @@ option(WITH_CYCLES_CUDA_BINARIES    "Build Cycles CUDA binaries" OFF)
 option(WITH_CYCLES_CUBIN_COMPILER   "Build cubins with nvrtc based compiler instead of nvcc" OFF)
 option(WITH_CYCLES_CUDA_BUILD_SERIAL "Build cubins one after another (useful on machines with limited RAM)" OFF)
 mark_as_advanced(WITH_CYCLES_CUDA_BUILD_SERIAL)
-set(CYCLES_CUDA_BINARIES_ARCH sm_30 sm_35 sm_37 sm_50 sm_52 sm_60 sm_61 sm_70 sm_75 CACHE STRING "CUDA architectures to build binaries for")
+set(CYCLES_CUDA_BINARIES_ARCH sm_30 sm_35 sm_37 sm_50 sm_52 sm_60 sm_61 sm_70 sm_75 compute_75 CACHE STRING "CUDA architectures to build binaries for")
 mark_as_advanced(CYCLES_CUDA_BINARIES_ARCH)
 unset(PLATFORM_DEFAULT)
 option(WITH_CYCLES_LOGGING  "Build Cycles with logging support" ON)
diff --git a/build_files/cmake/config/blender_release.cmake b/build_files/cmake/config/blender_release.cmake
index 01a59e451aa..2d52fb22c86 100644
--- a/build_files/cmake/config/blender_release.cmake
+++ b/build_files/cmake/config/blender_release.cmake
@@ -52,7 +52,7 @@ set(WITH_USD                 ON  CACHE BOOL "" FORCE)
 set(WITH_MEM_JEMALLOC          ON  CACHE BOOL "" FORCE)
 set(WITH_CYCLES_CUDA_BINARIES  ON  CACHE BOOL "" FORCE)
 set(WITH_CYCLES_CUBIN_COMPILER OFF CACHE BOOL "" FORCE)
-set(CYCLES_CUDA_BINARIES_ARCH sm_30;sm_35;sm_37;sm_50;sm_52;sm_60;sm_61;sm_70;sm_75 CACHE STRING "" FORCE)
+set(CYCLES_CUDA_BINARIES_ARCH sm_30;sm_35;sm_37;sm_50;sm_52;sm_60;sm_61;sm_70;sm_75;compute_75 CACHE STRING "" FORCE)
 set(WITH_CYCLES_DEVICE_OPTIX   ON CACHE BOOL "" FORCE)
 
 # platform dependent options
diff --git a/intern/cycles/device/cuda/device_cuda_impl.cpp b/intern/cycles/device/cuda/device_cuda_impl.cpp
index ba5d479e0e7..870f9f9ecf9 100644
--- a/intern/cycles/device/cuda/device_cuda_impl.cpp
+++ b/intern/cycles/device/cuda/device_cuda_impl.cpp
@@ -352,11 +352,24 @@ string CUDADevice::compile_kernel(const DeviceRequestedFeatures &requested_featu
       }
     }
 
-    const string ptx = path_get(string_printf("lib/%s_compute_%d%d.ptx", name, major, minor));
-    VLOG(1) << "Testing for pre-compiled kernel " << ptx << ".";
-    if (path_exists(ptx)) {
-      VLOG(1) << "Using precompiled kernel.";
-      return ptx;
+    /* The driver can JIT-compile PTX generated for older generations, so find the closest one. */
+    int ptx_major = major, ptx_minor = minor;
+    while (ptx_major >= 3) {
+      const string ptx = path_get(
+          string_printf("lib/%s_compute_%d%d.ptx", name, ptx_major, ptx_minor));
+      VLOG(1) << "Testing for pre-compiled kernel " << ptx << ".";
+      if (path_exists(ptx)) {
+        VLOG(1) << "Using precompiled kernel.";
+        return ptx;
+      }
+
+      if (ptx_minor > 0) {
+        ptx_minor--;
+      }
+      else {
+        ptx_major--;
+        ptx_minor = 9;
+      }
     }
   }
 
diff --git a/intern/cycles/kernel/CMakeLists.txt b/intern/cycles/kernel/CMakeLists.txt
index 2e839a616e9..6ab0b9d39d2 100644
--- a/intern/cycles/kernel/CMakeLists.txt
+++ b/intern/cycles/kernel/CMakeLists.txt
@@ -549,7 +549,7 @@ if(WITH_CYCLES_DEVICE_OPTIX AND WITH_CYCLES_CUDA_BINARIES)
           ${SRC_UTIL_HEADERS}
         COMMAND ${CUBIN_CC_ENV}
             "$<TARGET_FILE:cycles_cubin_cc>"
-            -target 30
+            -target 52
             -ptx
             -i ${CMAKE_CURRENT_SOURCE_DIR}/${input}
             ${cuda_flags}
@@ -573,7 +573,7 @@ if(WITH_CYCLES_DEVICE_OPTIX AND WITH_CYCLES_CUDA_BINARIES)
         COMMAND
           ${CUDA_NVCC_EXECUTABLE}
           --ptx
-          -arch=sm_30
+          -arch=sm_52
           ${cuda_flags}
           ${input}
         WORKING_DIRECTORY



More information about the Bf-blender-cvs mailing list