[Bf-blender-cvs] SVN commit: /data/svn/bf-blender [40062] branches/cycles/intern/cycles: Cycles:

Brecht Van Lommel brechtvanlommel at pandora.be
Fri Sep 9 14:04:39 CEST 2011


Revision: 40062
          http://projects.blender.org/scm/viewvc.php?view=rev&root=bf-blender&revision=40062
Author:   blendix
Date:     2011-09-09 12:04:39 +0000 (Fri, 09 Sep 2011)
Log Message:
-----------
Cycles:
* OpenCL now only uses GPU/Accelerator devices, it's only confusing if CPU
  device is used, easy to enable in the code for debugging.
* OpenCL kernel binaries are now cached for faster startup after the first
  time compiling.
* CUDA kernels can now be compiled and cached at runtime if the CUDA toolkit
  is installed. This means that even if the build does not have CUDA enabled,
  it's still possible to use it as long as you install the toolkit.

Modified Paths:
--------------
    branches/cycles/intern/cycles/CMakeLists.txt
    branches/cycles/intern/cycles/blender/addon/engine.py
    branches/cycles/intern/cycles/blender/blender_python.cpp
    branches/cycles/intern/cycles/device/device_cuda.cpp
    branches/cycles/intern/cycles/device/device_opencl.cpp
    branches/cycles/intern/cycles/kernel/CMakeLists.txt
    branches/cycles/intern/cycles/kernel/kernel_compat_cuda.h
    branches/cycles/intern/cycles/render/session.cpp
    branches/cycles/intern/cycles/util/util_cuda.cpp
    branches/cycles/intern/cycles/util/util_path.cpp
    branches/cycles/intern/cycles/util/util_path.h

Modified: branches/cycles/intern/cycles/CMakeLists.txt
===================================================================
--- branches/cycles/intern/cycles/CMakeLists.txt	2011-09-09 11:55:38 UTC (rev 40061)
+++ branches/cycles/intern/cycles/CMakeLists.txt	2011-09-09 12:04:39 UTC (rev 40062)
@@ -46,7 +46,7 @@
 endif()
 
 if(WITH_CYCLES_CUDA)
-  add_definitions(-DWITH_CUDA)
+  add_definitions(-DWITH_CUDA_BINARIES)
 endif()
 
 if(WITH_CYCLES_OSL)
@@ -58,6 +58,7 @@
 endif()
 
 add_definitions(-DWITH_OPENCL)
+add_definitions(-DWITH_CUDA)
 
 include_directories(
 	${BOOST_INCLUDE_DIR}

Modified: branches/cycles/intern/cycles/blender/addon/engine.py
===================================================================
--- branches/cycles/intern/cycles/blender/addon/engine.py	2011-09-09 11:55:38 UTC (rev 40061)
+++ branches/cycles/intern/cycles/blender/addon/engine.py	2011-09-09 12:04:39 UTC (rev 40062)
@@ -21,8 +21,12 @@
 def init():
     import libcycles_blender as lib
     import os.path
-    lib.init(os.path.dirname(__file__))
 
+    path = os.path.dirname(__file__)
+    user_path = os.path.dirname(os.path.abspath(bpy.utils.user_resource('CONFIG', '')))
+
+    lib.init(path, user_path)
+
 def create(engine, data, scene, region = 0, v3d = 0, rv3d = 0):
     import libcycles_blender as lib
 

Modified: branches/cycles/intern/cycles/blender/blender_python.cpp
===================================================================
--- branches/cycles/intern/cycles/blender/blender_python.cpp	2011-09-09 11:55:38 UTC (rev 40061)
+++ branches/cycles/intern/cycles/blender/blender_python.cpp	2011-09-09 12:04:39 UTC (rev 40062)
@@ -28,12 +28,12 @@
 
 static PyObject *init_func(PyObject *self, PyObject *args)
 {
-	const char *path;
+	const char *path, *user_path;
 
-	if(!PyArg_ParseTuple(args, "s", &path))
+	if(!PyArg_ParseTuple(args, "ss", &path, &user_path))
 		return NULL;
 	
-	path_init(path);
+	path_init(path, user_path);
 
 	Py_INCREF(Py_None);
 	return Py_None;

Modified: branches/cycles/intern/cycles/device/device_cuda.cpp
===================================================================
--- branches/cycles/intern/cycles/device/device_cuda.cpp	2011-09-09 11:55:38 UTC (rev 40061)
+++ branches/cycles/intern/cycles/device/device_cuda.cpp	2011-09-09 12:04:39 UTC (rev 40062)
@@ -28,7 +28,9 @@
 #include "util_map.h"
 #include "util_opengl.h"
 #include "util_path.h"
+#include "util_system.h"
 #include "util_types.h"
+#include "util_time.h"
 
 CCL_NAMESPACE_BEGIN
 
@@ -125,6 +127,15 @@
 		} \
 	}
 
+	bool cuda_error(CUresult result)
+	{
+		if(result == CUDA_SUCCESS)
+			return false;
+
+		fprintf(stderr, "CUDA error: %s\n", cuda_error_string(result));
+		return true;
+	}
+
 	void cuda_push_context()
 	{
 		cuda_assert(cuCtxSetCurrent(cuContext))
@@ -140,18 +151,27 @@
 		background = background_;
 
 		cuDevId = 0;
+		cuDevice = 0;
+		cuContext = 0;
 
 		/* intialize */
-		cuda_assert(cuInit(0))
+		if(cuda_error(cuInit(0)))
+			return;
 
 		/* setup device and context */
-		cuda_assert(cuDeviceGet(&cuDevice, cuDevId))
+		if(cuda_error(cuDeviceGet(&cuDevice, cuDevId)))
+			return;
 
+		CUresult result;
+
 		if(background)
-			cuda_assert(cuCtxCreate(&cuContext, 0, cuDevice))
+			result = cuCtxCreate(&cuContext, 0, cuDevice);
 		else
-			cuda_assert(cuGLCtxCreate(&cuContext, 0, cuDevice))
+			result = cuGLCtxCreate(&cuContext, 0, cuDevice);
 
+		if(cuda_error(result))
+			return;
+
 		cuda_pop_context();
 	}
 
@@ -173,21 +193,80 @@
 		return string("CUDA ") + deviceName;
 	}
 
+	string compile_kernel()
+	{
+		/* compute cubin name */
+		int major, minor;
+		cuDeviceComputeCapability(&major, &minor, cuDevId);
 
+		/* attempt to use kernel provided with blender */
+		string cubin = path_get(string_printf("lib/kernel_sm_%d%d.cubin", major, minor));
+		if(path_exists(cubin))
+			return cubin;
+
+		/* not found, try to use locally compiled kernel */
+		string kernel_path = path_get("kernel");
+		string md5 = path_files_md5_hash(kernel_path);
+
+		cubin = string_printf("cycles_kernel_sm%d%d_%s.cubin", major, minor, md5.c_str());;
+		cubin = path_user_get(path_join("cache", cubin));
+
+		/* if exists already, use it */
+		if(path_exists(cubin))
+			return cubin;
+
+		/* if not, find CUDA compiler */
+		string nvcc = cuCompilerPath();
+
+		if(nvcc == "") {
+			fprintf(stderr, "CUDA nvcc compiler not found. Install CUDA toolkit in default location.\n");
+			return "";
+		}
+
+		/* compile */
+		string kernel = path_join(kernel_path, "kernel.cu");
+		string include = kernel_path;
+		const int machine = system_cpu_bits();
+		const int maxreg = 24;
+
+		double starttime = time_dt();
+		printf("Compiling CUDA kernel ...\n");
+
+		string command = string_printf("%s -arch=sm_%d%d -m%d --cubin \"%s\" --use_fast_math "
+			"-o \"%s\" --ptxas-options=\"-v\" --maxrregcount=%d --opencc-options -OPT:Olimit=0 -I\"%s\" -DNVCC",
+			nvcc.c_str(), major, minor, machine, kernel.c_str(), cubin.c_str(), maxreg, include.c_str());
+
+		system(command.c_str());
+
+		/* verify if compilation succeeded */
+		if(!path_exists(cubin)) {
+			fprintf(stderr, "CUDA kernel compilation failed.\n");
+			return "";
+		}
+
+		printf("Kernel compilation finished in %.2lfs.\n", time_dt() - starttime);
+
+		return cubin;
+	}
+
 	bool load_kernels()
 	{
-		CUresult result;
-		int major, minor;
+		/* check if cuda init succeeded */
+		if(cuContext == 0)
+			return false;
 
-		cuda_push_context();
+		/* get kernel */
+		string cubin = compile_kernel();
 
+		if(cubin == "")
+			return false;
+
 		/* open module */
-		cuDeviceComputeCapability(&major, &minor, cuDevId);
-		string cubin = path_get(string_printf("lib/kernel_sm_%d%d.cubin", major, minor));
+		cuda_push_context();
 
-		result = cuModuleLoad(&cuModule, cubin.c_str());
-		if(result != CUDA_SUCCESS)
-			fprintf(stderr, "Failed loading CUDA kernel %s (%s).\n", cubin.c_str(), cuda_error_string(result));
+		CUresult result = cuModuleLoad(&cuModule, cubin.c_str());
+		if(cuda_error(result))
+			fprintf(stderr, "Failed loading CUDA kernel %s.\n", cubin.c_str());
 
 		cuda_pop_context();
 

Modified: branches/cycles/intern/cycles/device/device_opencl.cpp
===================================================================
--- branches/cycles/intern/cycles/device/device_opencl.cpp	2011-09-09 11:55:38 UTC (rev 40061)
+++ branches/cycles/intern/cycles/device/device_opencl.cpp	2011-09-09 12:04:39 UTC (rev 40062)
@@ -27,6 +27,7 @@
 
 #include "util_map.h"
 #include "util_math.h"
+#include "util_md5.h"
 #include "util_opencl.h"
 #include "util_opengl.h"
 #include "util_path.h"
@@ -118,7 +119,7 @@
 	void opencl_assert(cl_int err)
 	{
 		if(err != CL_SUCCESS) {
-			printf("error (%d): %s\n", err, opencl_error_string(err));
+			fprintf(stderr, "OpenCL error (%d): %s\n", err, opencl_error_string(err));
 #ifndef NDEBUG
 			abort();
 #endif
@@ -157,7 +158,7 @@
 
 		cpPlatform = platform_ids[0]; /* todo: pick specified platform && device */
 
-		ciErr = clGetDeviceIDs(cpPlatform, CL_DEVICE_TYPE_ALL, 1, &cdDevice, NULL);
+		ciErr = clGetDeviceIDs(cpPlatform, CL_DEVICE_TYPE_GPU|CL_DEVICE_TYPE_ACCELERATOR, 1, &cdDevice, NULL);
 		if(opencl_error(ciErr))
 			return;
 
@@ -208,38 +209,67 @@
 		return true;
 	}
 
-	bool load_kernels()
+	bool load_binary(const string& kernel_path, const string& clbin)
 	{
-		/* verify if device was initialized */
-		if(!device_initialized)
+		/* read binary into memory */
+		vector<uint8_t> binary;
+
+		if(!path_read_binary(clbin, binary)) {
+			fprintf(stderr, "OpenCL failed to read cached binary %s.\n", clbin.c_str());
 			return false;
+		}
 
-		/* verify we have right opencl version */
-		if(!opencl_version_check())
+		/* create program */
+		cl_int status;
+		size_t size = binary.size();
+		const uint8_t *bytes = &binary[0];
+
+		cpProgram = clCreateProgramWithBinary(cxContext, 1, &cdDevice,
+			&size, &bytes, &status, &ciErr);
+
+		if(opencl_error(status) || opencl_error(ciErr)) {
+			fprintf(stderr, "OpenCL failed create program from cached binary %s.\n", clbin.c_str());
 			return false;
+		}
 
-		/* we compile kernels consisting of many files. unfortunately opencl
-		   kernel caches do not seem to recognize changes in included files.
-		   so we force recompile on changes by adding the md5 hash of all files */
-		string kernel_path = path_get("kernel");
-		string kernel_md5 = path_files_md5_hash(kernel_path);
+		if(!build_kernel(kernel_path))
+			return false;
 
-		string source = "#include \"kernel.cl\" // " + kernel_md5 + "\n";
-		size_t source_len = source.size();
-		const char *source_str = source.c_str();
+		return true;
+	}
 
+	bool save_binary(const string& clbin)
+	{
+		size_t size = 0;
+		clGetProgramInfo(cpProgram, CL_PROGRAM_BINARY_SIZES, sizeof(size_t), &size, NULL);
+
+		if(!size)
+			return false;
+
+		vector<uint8_t> binary(size);
+		uint8_t *bytes = &binary[0];
+
+		clGetProgramInfo(cpProgram, CL_PROGRAM_BINARIES, sizeof(uint8_t*), &bytes, NULL);
+
+		if(!path_write_binary(clbin, binary)) {
+			fprintf(stderr, "OpenCL failed to write cached binary %s.\n", clbin.c_str());
+			return false;
+		}
+
+		return true;
+	}
+
+	bool build_kernel(const string& kernel_path)
+	{
 		string build_options = "";
 
 		build_options += "-I " + kernel_path + ""; /* todo: escape path */
 		build_options += " -cl-fast-relaxed-math -cl-strict-aliasing";
 
-		cpProgram = clCreateProgramWithSource(cxContext, 1, &source_str, &source_len, &ciErr);
-		if(opencl_error(ciErr))
-			return false;
-
 		ciErr = clBuildProgram(cpProgram, 0, NULL, build_options.c_str(), NULL, NULL);
 
 		if(ciErr != CL_SUCCESS) {
+			/* show build errors */
 			char *build_log;
 			size_t ret_val_size;
 
@@ -256,6 +286,87 @@
 			return false;
 		}
 
+		return true;
+	}
+
+	bool compile_kernel(const string& kernel_path, const string& kernel_md5)
+	{
+		/* we compile kernels consisting of many files. unfortunately opencl
+		   kernel caches do not seem to recognize changes in included files.
+		   so we force recompile on changes by adding the md5 hash of all files */
+		string source = "#include \"kernel.cl\" // " + kernel_md5 + "\n";
+		size_t source_len = source.size();
+		const char *source_str = source.c_str();
+
+		cpProgram = clCreateProgramWithSource(cxContext, 1, &source_str, &source_len, &ciErr);
+
+		if(opencl_error(ciErr))
+			return false;
+
+		double starttime = time_dt();
+		printf("Compiling OpenCL kernel ...\n");
+
+		if(!build_kernel(kernel_path))
+			return false;
+
+		printf("Kernel compilation finished in %.2lfs.\n", time_dt() - starttime);
+
+		return true;
+	}
+
+	string device_md5_hash()
+	{
+		MD5Hash md5;

@@ Diff output truncated at 10240 characters. @@



More information about the Bf-blender-cvs mailing list