[Bf-blender-cvs] [2278aa0da9d] master: Cycles: Add support for adaptive kernel compilation to OptiX device

Patrick Mours noreply at git.blender.org
Mon Feb 17 14:40:16 CET 2020


Commit: 2278aa0da9d6a046ff014fab4b0cc6156394b0d1
Author: Patrick Mours
Date:   Mon Feb 17 13:35:31 2020 +0100
Branches: master
https://developer.blender.org/rB2278aa0da9d6a046ff014fab4b0cc6156394b0d1

Cycles: Add support for adaptive kernel compilation to OptiX device

This modifies the common CUDA implementation for adaptive kernel compilation slightly to support both CUBIN and PTX output (the latter which is then used in the OptiX device). It also fixes adaptive kernel compilation on Windows.

Reviewed By: brecht

Differential Revision: https://developer.blender.org/D6851

===================================================================

M	extern/cuew/src/cuew.c
M	intern/cycles/CMakeLists.txt
M	intern/cycles/device/cuda/device_cuda.h
M	intern/cycles/device/cuda/device_cuda_impl.cpp
M	intern/cycles/device/device_cuda.cpp
M	intern/cycles/device/device_optix.cpp

===================================================================

diff --git a/extern/cuew/src/cuew.c b/extern/cuew/src/cuew.c
index a0146741494..f477ec48a18 100644
--- a/extern/cuew/src/cuew.c
+++ b/extern/cuew/src/cuew.c
@@ -683,23 +683,23 @@ static int cuewNvrtcInit(void) {
 
 
 int cuewInit(cuuint32_t flags) {
-	int result = CUEW_SUCCESS;
-
-	if (flags & CUEW_INIT_CUDA) {
-		result = cuewCudaInit();
-		if (result != CUEW_SUCCESS) {
-			return result;
-		}
-	}
-
-	if (flags & CUEW_INIT_NVRTC) {
-		result = cuewNvrtcInit();
-		if (result != CUEW_SUCCESS) {
-			return result;
-		}
-	}
-
-	return result;
+  int result = CUEW_SUCCESS;
+
+  if (flags & CUEW_INIT_CUDA) {
+    result = cuewCudaInit();
+    if (result != CUEW_SUCCESS) {
+      return result;
+    }
+  }
+
+  if (flags & CUEW_INIT_NVRTC) {
+    result = cuewNvrtcInit();
+    if (result != CUEW_SUCCESS) {
+      return result;
+    }
+  }
+
+  return result;
 }
 
 
@@ -798,7 +798,10 @@ static int path_exists(const char *path) {
 
 const char *cuewCompilerPath(void) {
 #ifdef _WIN32
-  const char *defaultpaths[] = {"C:/CUDA/bin", NULL};
+  const char *defaultpaths[] = {
+    "C:/CUDA/bin",
+    "C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v10.1/bin",
+    NULL};
   const char *executable = "nvcc.exe";
 #else
   const char *defaultpaths[] = {
@@ -832,9 +835,12 @@ const char *cuewCompilerPath(void) {
     }
   }
 
-#ifndef _WIN32
   {
+#ifdef _WIN32
+    FILE *handle = popen("where nvcc", "r");
+#else
     FILE *handle = popen("which nvcc", "r");
+#endif
     if (handle) {
       char buffer[4096] = {0};
       int len = fread(buffer, 1, sizeof(buffer) - 1, handle);
@@ -845,7 +851,6 @@ const char *cuewCompilerPath(void) {
       }
     }
   }
-#endif
 
   return NULL;
 }
@@ -859,23 +864,6 @@ int cuewNvrtcVersion(void) {
   return 0;
 }
 
-static size_t safe_strnlen(const char *s, size_t maxlen) {
-  size_t length;
-  for (length = 0; length < maxlen; s++, length++) {
-    if (*s == '\0') {
-      break;
-    }
-  }
-  return length;
-}
-
-static char *safe_strncpy(char *dest, const char *src, size_t n) {
-  const size_t src_len = safe_strnlen(src, n - 1);
-  memcpy(dest, src, src_len);
-  dest[src_len] = '\0';
-  return dest;
-}
-
 int cuewCompilerVersion(void) {
   const char *path = cuewCompilerPath();
   const char *marker = "Cuda compilation tools, release ";
@@ -891,8 +879,9 @@ int cuewCompilerVersion(void) {
   }
 
   /* get --version output */
-  safe_strncpy(command, path, sizeof(command));
-  strncat(command, " --version", sizeof(command) - strlen(path));
+  strncat(command, "\"", 1);
+  strncat(command, path, sizeof(command) - 1);
+  strncat(command, "\" --version", sizeof(command) - strlen(path) - 1);
   pipe = popen(command, "r");
   if (!pipe) {
     fprintf(stderr, "CUDA: failed to run compiler to retrieve version");
@@ -922,4 +911,3 @@ int cuewCompilerVersion(void) {
 
   return 10 * major + minor;
 }
-
diff --git a/intern/cycles/CMakeLists.txt b/intern/cycles/CMakeLists.txt
index 6f6bd7ec2cc..1014831c403 100644
--- a/intern/cycles/CMakeLists.txt
+++ b/intern/cycles/CMakeLists.txt
@@ -228,11 +228,8 @@ if(WITH_CYCLES_DEVICE_OPTIX)
       SYSTEM
       ${OPTIX_INCLUDE_DIR}
       )
-
-    # Need pre-compiled CUDA binaries in the OptiX device
-    set(WITH_CYCLES_CUDA_BINARIES ON)
   else()
-    message(STATUS "Optix not found, disabling it from Cycles")
+    message(STATUS "OptiX not found, disabling it from Cycles")
     set(WITH_CYCLES_DEVICE_OPTIX OFF)
   endif()
 endif()
diff --git a/intern/cycles/device/cuda/device_cuda.h b/intern/cycles/device/cuda/device_cuda.h
index 0f4543e6007..5820b525fd6 100644
--- a/intern/cycles/device/cuda/device_cuda.h
+++ b/intern/cycles/device/cuda/device_cuda.h
@@ -109,15 +109,13 @@ class CUDADevice : public Device {
 
   bool use_split_kernel();
 
-  string compile_kernel_get_common_cflags(const DeviceRequestedFeatures &requested_features,
-                                          bool filter = false,
-                                          bool split = false);
-
-  bool compile_check_compiler();
+  virtual string compile_kernel_get_common_cflags(
+      const DeviceRequestedFeatures &requested_features, bool filter = false, bool split = false);
 
   string compile_kernel(const DeviceRequestedFeatures &requested_features,
-                        bool filter = false,
-                        bool split = false);
+                        const char *name,
+                        const char *base = "cuda",
+                        bool force_ptx = false);
 
   virtual bool load_kernels(const DeviceRequestedFeatures &requested_features);
 
diff --git a/intern/cycles/device/cuda/device_cuda_impl.cpp b/intern/cycles/device/cuda/device_cuda_impl.cpp
index a4e1c026263..cd37c4dd407 100644
--- a/intern/cycles/device/cuda/device_cuda_impl.cpp
+++ b/intern/cycles/device/cuda/device_cuda_impl.cpp
@@ -329,70 +329,27 @@ string CUDADevice::compile_kernel_get_common_cflags(
   return cflags;
 }
 
-bool CUDADevice::compile_check_compiler()
-{
-  const char *nvcc = cuewCompilerPath();
-  if (nvcc == NULL) {
-    cuda_error_message(
-        "CUDA nvcc compiler not found. "
-        "Install CUDA toolkit in default location.");
-    return false;
-  }
-  const int cuda_version = cuewCompilerVersion();
-  VLOG(1) << "Found nvcc " << nvcc << ", CUDA version " << cuda_version << ".";
-  const int major = cuda_version / 10, minor = cuda_version % 10;
-  if (cuda_version == 0) {
-    cuda_error_message("CUDA nvcc compiler version could not be parsed.");
-    return false;
-  }
-  if (cuda_version < 80) {
-    printf(
-        "Unsupported CUDA version %d.%d detected, "
-        "you need CUDA 8.0 or newer.\n",
-        major,
-        minor);
-    return false;
-  }
-  else if (cuda_version != 101) {
-    printf(
-        "CUDA version %d.%d detected, build may succeed but only "
-        "CUDA 10.1 is officially supported.\n",
-        major,
-        minor);
-  }
-  return true;
-}
-
 string CUDADevice::compile_kernel(const DeviceRequestedFeatures &requested_features,
-                                  bool filter,
-                                  bool split)
+                                  const char *name,
+                                  const char *base,
+                                  bool force_ptx)
 {
-  const char *name, *source;
-  if (filter) {
-    name = "filter";
-    source = "filter.cu";
-  }
-  else if (split) {
-    name = "kernel_split";
-    source = "kernel_split.cu";
-  }
-  else {
-    name = "kernel";
-    source = "kernel.cu";
-  }
-  /* Compute cubin name. */
+  /* Compute kernel name. */
   int major, minor;
   cuDeviceGetAttribute(&major, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR, cuDevId);
   cuDeviceGetAttribute(&minor, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR, cuDevId);
 
   /* Attempt to use kernel provided with Blender. */
   if (!use_adaptive_compilation()) {
-    const string cubin = path_get(string_printf("lib/%s_sm_%d%d.cubin", name, major, minor));
-    VLOG(1) << "Testing for pre-compiled kernel " << cubin << ".";
-    if (path_exists(cubin)) {
-      VLOG(1) << "Using precompiled kernel.";
-      return cubin;
+    if (!force_ptx) {
+      const string cubin = path_get(string_printf("lib/%s_sm_%d%d.cubin", name, major, minor));
+      VLOG(1) << "Testing for pre-compiled kernel " << cubin << ".";
+      if (path_exists(cubin)) {
+        VLOG(1) << "Using precompiled kernel.";
+        return cubin;
+      }
     }
+
     const string ptx = path_get(string_printf("lib/%s_compute_%d%d.ptx", name, major, minor));
     VLOG(1) << "Testing for pre-compiled kernel " << ptx << ".";
     if (path_exists(ptx)) {
@@ -401,19 +358,21 @@ string CUDADevice::compile_kernel(const DeviceRequestedFeatures &requested_featu
     }
   }
 
-  const string common_cflags = compile_kernel_get_common_cflags(requested_features, filter, split);
-
   /* Try to use locally compiled kernel. */
-  const string source_path = path_get("source");
-  const string kernel_md5 = path_files_md5_hash(source_path);
+  string source_path = path_get("source");
+  const string source_md5 = path_files_md5_hash(source_path);
 
   /* We include cflags into md5 so changing cuda toolkit or changing other
    * compiler command line arguments makes sure cubin gets re-built.
    */
-  const string cubin_md5 = util_md5_string(kernel_md5 + common_cflags);
+  string common_cflags = compile_kernel_get_common_cflags(
+      requested_features, strstr(name, "filter") != NULL, strstr(name, "split") != NULL);
+  const string kernel_md5 = util_md5_string(source_md5 + common_cflags);
 
+  const char *const kernel_ext = force_ptx ? "ptx" : "cubin";
+  const char *const kernel_arch = force_ptx ? "compute" : "sm";
   const string cubin_file = string_printf(
-      "cycles_%s_sm%d%d_%s.cubin", name, major, minor, cubin_md5.c_str());
+      "cycles_%s_%s_%d%d_%s.%s", name, kernel_arch, major, minor, kernel_md5.c_str(), kernel_ext);
   const string cubin = path_cache_get(path_join("kernels", cubin_file));
   VLOG(1) << "Testing for locally compiled kernel " << cubin << ".";
   if (path_exists(cubin)) {
@@ -422,7 +381,7 @@ string CUDADevice::compile_kernel(const DeviceRequestedFeatures &requested_featu
   }
 
 #  ifdef _WIN32
-  if (have_precompiled_kernels()) {
+  if (!use_adaptive_compilation() && have_precompiled_kernels()) {
     if (major < 3) {
       cuda_error_message(
           string_printf("CUDA device requires compute capability 3.0 or up, "
@@ -437,42 +396,69 @@ string CUDADevice::compile_kernel(const DeviceRequestedFeatures &requested_featu
                         major,
                         minor));
     }
-    return "";
+    return string();
   }
 #  endif
 
   /* Compile. */
-  if (!compile_check_compiler()) {
-    return "";
+  const char *const nvcc = cuewCompilerPath();
+  if (nvcc == NULL) {
+    cuda_error_message(
+        "CUDA nvcc compiler not found. "
+        "Install CUDA toolkit in default location.");
+    return string();
   }
-  const char *nvcc = cuewCompilerPath();
-  const string kernel = path_join(path_join(source_path, "kernel"),
-                                  path_join("kernels", path_join("cuda", source)));
+
+  const int nvcc_cuda_version = cuewCompilerVersion();
+  VLOG(1) 

@@ Diff output truncated at 10240 characters. @@



More information about the Bf-blender-cvs mailing list