[Bf-blender-cvs] [741f17f] master: Cycles CUDA: make CUDA toolkit 6.0 the official supported version.

Brecht Van Lommel noreply at git.blender.org
Wed Apr 30 16:08:40 CEST 2014


Commit: 741f17f05b8272abfaf000a403e44b73692ba4c7
Author: Brecht Van Lommel
Date:   Wed Apr 30 10:54:17 2014 +0200
https://developer.blender.org/rB741f17f05b8272abfaf000a403e44b73692ba4c7

Cycles CUDA: make CUDA toolkit 6.0 the official supported version.

This also updates the configurations to build kernels for compute capability
5.0 cards, when using and older CUDA toolkit version this will be skipped.

Also includes tweaks to improve performance with this version:
* Increase max registers on sm_30, sm_35 and sm_50
* No longer use texture storage on sm_30

===================================================================

M	CMakeLists.txt
M	build_files/buildbot/config/user-config-cuda-glibc211-i686.py
M	build_files/buildbot/config/user-config-cuda-glibc211-x86_64.py
M	build_files/scons/config/darwin-config.py
M	build_files/scons/config/linux-config.py
M	build_files/scons/config/win32-mingw-config.py
M	build_files/scons/config/win32-vc-config.py
M	build_files/scons/config/win64-mingw-config.py
M	build_files/scons/config/win64-vc-config.py
M	intern/cycles/device/device_cuda.cpp
M	intern/cycles/kernel/CMakeLists.txt
M	intern/cycles/kernel/SConscript
M	intern/cycles/kernel/kernel.cu
M	intern/cycles/kernel/kernel_compat_cuda.h

===================================================================

diff --git a/CMakeLists.txt b/CMakeLists.txt
index cb10414..53dcb0c 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -264,7 +264,7 @@ option(WITH_CYCLES_STANDALONE		"Build cycles standalone application" OFF)
 option(WITH_CYCLES_STANDALONE_GUI	"Build cycles standalone with GUI" OFF)
 option(WITH_CYCLES_OSL				"Build Cycles with OSL support" OFF)
 option(WITH_CYCLES_CUDA_BINARIES	"Build cycles CUDA binaries" OFF)
-set(CYCLES_CUDA_BINARIES_ARCH sm_20 sm_21 sm_30 sm_35 CACHE STRING "CUDA architectures to build binaries for")
+set(CYCLES_CUDA_BINARIES_ARCH sm_20 sm_21 sm_30 sm_35 sm_50 CACHE STRING "CUDA architectures to build binaries for")
 mark_as_advanced(CYCLES_CUDA_BINARIES_ARCH)
 unset(PLATFORM_DEFAULT)
 
diff --git a/build_files/buildbot/config/user-config-cuda-glibc211-i686.py b/build_files/buildbot/config/user-config-cuda-glibc211-i686.py
index 69053d7..854f535 100644
--- a/build_files/buildbot/config/user-config-cuda-glibc211-i686.py
+++ b/build_files/buildbot/config/user-config-cuda-glibc211-i686.py
@@ -2,4 +2,4 @@ BF_BUILDDIR = '../blender-build/linux-glibc211-i686'
 BF_INSTALLDIR = '../blender-install/linux-glibc211-i686'
 BF_NUMJOBS = 1
 
-BF_CYCLES_CUDA_BINARIES_ARCH = ['sm_20', 'sm_21', 'sm_30', 'sm_35']
+BF_CYCLES_CUDA_BINARIES_ARCH = ['sm_20', 'sm_21', 'sm_30', 'sm_35', 'sm_50']
diff --git a/build_files/buildbot/config/user-config-cuda-glibc211-x86_64.py b/build_files/buildbot/config/user-config-cuda-glibc211-x86_64.py
index c9b765f..7e92894 100644
--- a/build_files/buildbot/config/user-config-cuda-glibc211-x86_64.py
+++ b/build_files/buildbot/config/user-config-cuda-glibc211-x86_64.py
@@ -2,4 +2,4 @@ BF_BUILDDIR = '../blender-build/linux-glibc211-x86_64'
 BF_INSTALLDIR = '../blender-install/linux-glibc211-x86_64'
 BF_NUMJOBS = 1
 
-BF_CYCLES_CUDA_BINARIES_ARCH = ['sm_20', 'sm_21', 'sm_30', 'sm_35']
+BF_CYCLES_CUDA_BINARIES_ARCH = ['sm_20', 'sm_21', 'sm_30', 'sm_35', 'sm_50']
diff --git a/build_files/scons/config/darwin-config.py b/build_files/scons/config/darwin-config.py
index 2f77c6b..aac7ed4 100644
--- a/build_files/scons/config/darwin-config.py
+++ b/build_files/scons/config/darwin-config.py
@@ -199,7 +199,7 @@ BF_BOOST_LIBPATH = '${BF_BOOST}/lib'
 
 WITH_BF_CYCLES_CUDA_BINARIES = False
 BF_CYCLES_CUDA_NVCC = '/usr/local/cuda/bin/nvcc'
-BF_CYCLES_CUDA_BINARIES_ARCH = ['sm_20', 'sm_21', 'sm_30', 'sm_35']
+BF_CYCLES_CUDA_BINARIES_ARCH = ['sm_20', 'sm_21', 'sm_30', 'sm_35', 'sm_50']
 
 #Freestyle
 WITH_BF_FREESTYLE = True
diff --git a/build_files/scons/config/linux-config.py b/build_files/scons/config/linux-config.py
index ce2d07f..8f2c5ca 100644
--- a/build_files/scons/config/linux-config.py
+++ b/build_files/scons/config/linux-config.py
@@ -206,7 +206,7 @@ WITH_BF_CYCLES = WITH_BF_OIIO and WITH_BF_BOOST
 
 WITH_BF_CYCLES_CUDA_BINARIES = False
 BF_CYCLES_CUDA_NVCC = '/usr/local/cuda/bin/nvcc'
-BF_CYCLES_CUDA_BINARIES_ARCH = ['sm_20', 'sm_21', 'sm_30', 'sm_35']
+BF_CYCLES_CUDA_BINARIES_ARCH = ['sm_20', 'sm_21', 'sm_30', 'sm_35', 'sm_50']
 
 WITH_BF_OPENMP = True
 
diff --git a/build_files/scons/config/win32-mingw-config.py b/build_files/scons/config/win32-mingw-config.py
index 3a5a02f..a6d1a7d 100644
--- a/build_files/scons/config/win32-mingw-config.py
+++ b/build_files/scons/config/win32-mingw-config.py
@@ -145,7 +145,7 @@ BF_OPENCOLLADA_LIBPATH = '${BF_OPENCOLLADA}/lib/opencollada'
 WITH_BF_CYCLES = True
 WITH_BF_CYCLES_CUDA_BINARIES = False
 BF_CYCLES_CUDA_NVCC = "" # Path to the NVIDIA CUDA compiler
-BF_CYCLES_CUDA_BINARIES_ARCH = ['sm_20', 'sm_21', 'sm_30', 'sm_35']
+BF_CYCLES_CUDA_BINARIES_ARCH = ['sm_20', 'sm_21', 'sm_30', 'sm_35', 'sm_50']
 
 WITH_BF_OIIO = True
 BF_OIIO = LIBDIR + '/openimageio'
@@ -175,7 +175,7 @@ WITH_BF_OPENMP = True
 #CUDA
 WITH_BF_CYCLES_CUDA_BINARIES = False
 #BF_CYCLES_CUDA_NVCC = "" # Path to the nvidia compiler
-BF_CYCLES_CUDA_BINARIES_ARCH = ['sm_20', 'sm_21', 'sm_30']
+BF_CYCLES_CUDA_BINARIES_ARCH = ['sm_20', 'sm_21', 'sm_30', 'sm_35', 'sm_50']
 
 #Freestyle
 WITH_BF_FREESTYLE = True
diff --git a/build_files/scons/config/win32-vc-config.py b/build_files/scons/config/win32-vc-config.py
index bb9bcc3..16b105d 100644
--- a/build_files/scons/config/win32-vc-config.py
+++ b/build_files/scons/config/win32-vc-config.py
@@ -226,7 +226,7 @@ WITH_BF_CYCLES_CUDA_BINARIES = False
 if VC_VERSION == '11.0':
 	BF_CYCLES_CUDA_BINARIES_ARCH = ['sm_20', 'sm_21', 'sm_30']
 else:
-	BF_CYCLES_CUDA_BINARIES_ARCH = ['sm_20', 'sm_21', 'sm_30', 'sm_35']
+	BF_CYCLES_CUDA_BINARIES_ARCH = ['sm_20', 'sm_21', 'sm_30', 'sm_35', 'sm_50']
 
 #Ray trace optimization
 WITH_BF_RAYOPTIMIZATION = True
diff --git a/build_files/scons/config/win64-mingw-config.py b/build_files/scons/config/win64-mingw-config.py
index 6efbf5b..dcdea65 100644
--- a/build_files/scons/config/win64-mingw-config.py
+++ b/build_files/scons/config/win64-mingw-config.py
@@ -144,7 +144,7 @@ BF_OPENCOLLADA_LIBPATH = '${BF_OPENCOLLADA}/lib/opencollada'
 WITH_BF_CYCLES = True
 WITH_BF_CYCLES_CUDA_BINARIES = False
 BF_CYCLES_CUDA_NVCC = "" # Path to the NVIDIA CUDA compiler
-BF_CYCLES_CUDA_BINARIES_ARCH = ['sm_20', 'sm_21', 'sm_30', 'sm_35']
+BF_CYCLES_CUDA_BINARIES_ARCH = ['sm_20', 'sm_21', 'sm_30', 'sm_35', 'sm_50']
 
 WITH_BF_OIIO = True
 BF_OIIO = LIBDIR + '/openimageio'
diff --git a/build_files/scons/config/win64-vc-config.py b/build_files/scons/config/win64-vc-config.py
index 9c32972..3ec284e 100644
--- a/build_files/scons/config/win64-vc-config.py
+++ b/build_files/scons/config/win64-vc-config.py
@@ -224,7 +224,7 @@ BF_BOOST_LIBPATH = '${BF_BOOST}/lib'
 #CUDA
 WITH_BF_CYCLES_CUDA_BINARIES = False
 #BF_CYCLES_CUDA_NVCC = "" # Path to the nvidia compiler
-BF_CYCLES_CUDA_BINARIES_ARCH = ['sm_20', 'sm_21', 'sm_30', 'sm_35']
+BF_CYCLES_CUDA_BINARIES_ARCH = ['sm_20', 'sm_21', 'sm_30', 'sm_35', 'sm_50']
 
 #Ray trace optimization
 WITH_BF_RAYOPTIMIZATION = True
diff --git a/intern/cycles/device/device_cuda.cpp b/intern/cycles/device/device_cuda.cpp
index 0d2f6cd..72453e8 100644
--- a/intern/cycles/device/device_cuda.cpp
+++ b/intern/cycles/device/device_cuda.cpp
@@ -222,7 +222,7 @@ public:
 		/* In order to use full 6GB of memory on Titan cards, use arrays instead
 		 * of textures. On earlier cards this seems slower, but on Titan it is
 		 * actually slightly faster in tests. */
-		use_texture_storage = (cuDevArchitecture < 350);
+		use_texture_storage = (cuDevArchitecture < 300);
 
 		cuda_pop_context();
 	}
diff --git a/intern/cycles/kernel/CMakeLists.txt b/intern/cycles/kernel/CMakeLists.txt
index 1527d15..d18f4fa 100644
--- a/intern/cycles/kernel/CMakeLists.txt
+++ b/intern/cycles/kernel/CMakeLists.txt
@@ -146,11 +146,11 @@ if(WITH_CYCLES_CUDA_BINARIES)
 	set(CUDA_VERSION "${CUDA_VERSION_MAJOR}${CUDA_VERSION_MINOR}")
 
 	# warn for other versions
-	if(CUDA_VERSION MATCHES "50")
+	if(CUDA_VERSION MATCHES "60")
 	else()
 		message(WARNING
 			"CUDA version ${CUDA_VERSION_MAJOR}.${CUDA_VERSION_MINOR} detected, "
-			"build may succeed but only CUDA 5.0 is officially supported")
+			"build may succeed but only CUDA 6.0 is officially supported")
 	endif()
 
 	# build for each arch
@@ -162,8 +162,10 @@ if(WITH_CYCLES_CUDA_BINARIES)
 
 		set(cuda_version_flags "-D__KERNEL_CUDA_VERSION__=${CUDA_VERSION}")
 		set(cuda_math_flags "--use_fast_math")
-		
-		if(CUDA_VERSION LESS 50 AND ${arch} MATCHES "sm_35")
+
+		if(CUDA_VERSION LESS 60 AND ${arch} MATCHES "sm_50")
+			message(WARNING "Can't build kernel for CUDA sm_50 architecture, skipping")
+		elseif(CUDA_VERSION LESS 50 AND ${arch} MATCHES "sm_35")
 			message(WARNING "Can't build kernel for CUDA sm_35 architecture, skipping")
 		else()
 			add_custom_command(
diff --git a/intern/cycles/kernel/SConscript b/intern/cycles/kernel/SConscript
index 5316ec9..04e1bad 100644
--- a/intern/cycles/kernel/SConscript
+++ b/intern/cycles/kernel/SConscript
@@ -69,8 +69,8 @@ if env['WITH_BF_CYCLES_CUDA_BINARIES']:
     cuda_major_minor = re.findall(r'release (\d+).(\d+)', output)[0]
     cuda_version = int(cuda_major_minor[0])*10 + int(cuda_major_minor[1])
 
-    if cuda_version != 50:
-        print("CUDA version %d.%d detected, build may succeed but only CUDA 5.0 is officially supported." % (cuda_version/10, cuda_version%10))
+    if cuda_version != 60:
+        print("CUDA version %d.%d detected, build may succeed but only CUDA 6.0 is officially supported." % (cuda_version/10, cuda_version%10))
 
     # nvcc flags
     nvcc_flags = "-m%s" % (bits)
@@ -85,6 +85,10 @@ if env['WITH_BF_CYCLES_CUDA_BINARIES']:
 
     # add command for each cuda architecture
     for arch in cuda_archs:
+        if cuda_version < 60 and arch == "sm_50":
+            print("Can't build kernel for CUDA sm_50 architecture, skipping")
+            continue
+
         cubin_file = os.path.join(build_dir, "kernel_%s.cubin" % arch)
 
         if env['BF_CYCLES_CUDA_ENV']:
diff --git a/intern/cycles/kernel/kernel.cu b/intern/cycles/kernel/kernel.cu
index d91c6d9..636e48b 100644
--- a/intern/cycles/kernel/kernel.cu
+++ b/intern/cycles/kernel/kernel.cu
@@ -49,8 +49,8 @@
 
 /* tunable parameters */
 #define CUDA_THREADS_BLOCK_WIDTH 16
-#define CUDA_KERNEL_MAX_REGISTERS 32
-#define CUDA_KERNEL_BRANCHED_MAX_REGISTERS 40
+#define CUDA_KERNEL_MAX_REGISTERS 63
+#define CUDA_KERNEL_BRANCHED_MAX_REGISTERS 63
 
 /* 5.0 */
 #elif __CUDA_ARCH__ == 500
@@ -61,8 +61,8 @@
 
 /* tunable parameters */
 #define CUDA_THREADS_BLOCK_WIDTH 16
-#define CUDA_KERNEL_MAX_REGISTERS 32
-#define CUDA_KERNEL_BRANCHED_MAX_REGISTERS 40
+#define CUDA_KERNEL_MAX_REGISTERS 63
+#define CUDA_KERNEL_BRANCHED_MAX_REGISTERS 63
 
 /* unknown architecture */
 #else
diff --git a/intern/cycles/kernel/kernel_compat_cuda.h b/intern/cycles/kernel/kernel_compat_cuda.h
index 15e7353..e4c20d2 100644
--- a/intern/cycles/kernel/kernel_compat_cuda.h
+++ b/intern/cycles/kernel/kernel_compat_cuda.h
@@ -60,7 +60,7 @@ typedef texture<uchar4, 2, cudaReadModeNormalizedFloat> texture_image_uchar4;
 /* In order to use full 6GB of memory on Titan cards, use arrays instead
  * of textures. On earlier cards this seems slower, but on Titan it is
  * actually slightly faster in tests. */
-#if __CUDA_ARCH__ < 350
+#if __CUDA_ARCH__ < 300
 #define __KERNEL_CUDA_TEX_STORAGE__
 #endif




More information about the Bf-blender-cvs mailing list