[Bf-blender-cvs] SVN commit: /data/svn/bf-blender [57054] trunk/blender/intern/cycles: Cycles OpenCL: patch #35514 by Doug Gale

Brecht Van Lommel brechtvanlommel at pandora.be
Mon May 27 18:21:07 CEST 2013


Revision: 57054
          http://projects.blender.org/scm/viewvc.php?view=rev&root=bf-blender&revision=57054
Author:   blendix
Date:     2013-05-27 16:21:07 +0000 (Mon, 27 May 2013)
Log Message:
-----------
Cycles OpenCL: patch #35514 by Doug Gale

* Support using devices from all OpenCL platforms, so that you can use e.g. both
  Intel and NVidia OpenCL implementations if you have them installed.
* Fix compile error due to missing fmodf after recent math node change.
* Enable advanced shading for Intel OpenCL.
* CYCLES_OPENCL_DEBUG environment variable for generating debug symbols so you
  can debug with gdb. This crashes the compiler with Intel OpenCL on Linux though.
  To make this work the preprocessed kernel source code is written out, as gdb
  needs this.
* Show OpenCL compiler warnings even if the build succeeded.
* Some small fixes to initialize cdDevice to NULL, add missing NULL check when
  creating buffer and add missing space at end of build options for Apple OpenCL.
* Fix crash with multi device + opencl, now e.g. CPU + GPU render should work.

I did a few tweaks to the code and also:

* Fix viewport render failing sometimes with Apple CPU OpenCL, was not taking
  workgroup size limits into account properly.
* Add compile error when advanced shading in the Blender binary and OpenCL kernel
  are not in sync.

Modified Paths:
--------------
    trunk/blender/intern/cycles/device/device_opencl.cpp
    trunk/blender/intern/cycles/kernel/kernel_compat_opencl.h
    trunk/blender/intern/cycles/kernel/kernel_types.h
    trunk/blender/intern/cycles/util/util_path.cpp
    trunk/blender/intern/cycles/util/util_path.h

Modified: trunk/blender/intern/cycles/device/device_opencl.cpp
===================================================================
--- trunk/blender/intern/cycles/device/device_opencl.cpp	2013-05-27 16:12:06 UTC (rev 57053)
+++ trunk/blender/intern/cycles/device/device_opencl.cpp	2013-05-27 16:21:07 UTC (rev 57054)
@@ -38,7 +38,7 @@
 
 CCL_NAMESPACE_BEGIN
 
-#define CL_MEM_PTR(p) ((cl_mem)(unsigned long)(p))
+#define CL_MEM_PTR(p) ((cl_mem)(uintptr_t)(p))
 
 static cl_device_type opencl_device_type()
 {
@@ -57,9 +57,59 @@
 			return CL_DEVICE_TYPE_ACCELERATOR;
 	}
 
-	return CL_DEVICE_TYPE_GPU|CL_DEVICE_TYPE_ACCELERATOR;
+	return CL_DEVICE_TYPE_ALL;
 }
 
+static bool opencl_kernel_use_debug()
+{
+	return (getenv("CYCLES_OPENCL_DEBUG") != NULL);
+}
+
+static bool opencl_kernel_use_advanced_shading(const string& platform)
+{
+	/* keep this in sync with kernel_types.h! */
+	if(platform == "NVIDIA CUDA")
+		return false;
+	else if(platform == "Apple")
+		return false;
+	else if(platform == "AMD Accelerated Parallel Processing")
+		return false;
+	else if(platform == "Intel(R) OpenCL")
+		return true;
+
+	return false;
+}
+
+static string opencl_kernel_build_options(const string& platform, const string *debug_src = NULL)
+{
+	string build_options = " -cl-fast-relaxed-math ";
+
+	if(platform == "NVIDIA CUDA")
+		build_options += "-D__KERNEL_OPENCL_NVIDIA__ -cl-nv-maxrregcount=24 -cl-nv-verbose ";
+
+	else if(platform == "Apple")
+		build_options += "-D__KERNEL_OPENCL_APPLE__ -Wno-missing-prototypes ";
+
+	else if(platform == "AMD Accelerated Parallel Processing")
+		build_options += "-D__KERNEL_OPENCL_AMD__ ";
+
+	else if(platform == "Intel(R) OpenCL") {
+		build_options += "-D__KERNEL_OPENCL_INTEL_CPU__";
+
+		/* options for gdb source level kernel debugging. this segfaults on linux currently */
+		if(opencl_kernel_use_debug() && debug_src)
+			build_options += "-g -s \"" + *debug_src + "\"";
+	}
+
+	if(opencl_kernel_use_debug())
+		build_options += "-D__KERNEL_OPENCL_DEBUG__ ";
+
+	if (opencl_kernel_use_advanced_shading(platform))
+		build_options += "-D__KERNEL_OPENCL_NEED_ADVANCED_SHADING__ ";
+	
+	return build_options;
+}
+
 class OpenCLDevice : public Device
 {
 public:
@@ -72,9 +122,14 @@
 	cl_kernel ckPathTraceKernel;
 	cl_kernel ckFilmConvertKernel;
 	cl_int ciErr;
-	map<string, device_vector<uchar>*> const_mem_map;
-	map<string, device_memory*> mem_map;
+
+	typedef map<string, device_vector<uchar>*> ConstMemMap;
+	typedef map<string, device_ptr> MemMap;
+
+	ConstMemMap const_mem_map;
+	MemMap mem_map;
 	device_ptr null_mem;
+
 	bool device_initialized;
 	string platform_name;
 
@@ -169,6 +224,7 @@
 	{
 		background = background_;
 		cpPlatform = NULL;
+		cdDevice = NULL;
 		cxContext = NULL;
 		cqCommandQueue = NULL;
 		cpProgram = NULL;
@@ -189,38 +245,64 @@
 			return;
 		}
 
-		ciErr = clGetPlatformIDs(1, &cpPlatform, NULL);
+		vector<cl_platform_id> platforms(num_platforms, NULL);
+
+		ciErr = clGetPlatformIDs(num_platforms, &platforms[0], NULL);
 		if(opencl_error(ciErr))
 			return;
 
-		char name[256];
-		clGetPlatformInfo(cpPlatform, CL_PLATFORM_NAME, sizeof(name), &name, NULL);
-		platform_name = name;
+		int num_base = 0;
+		int total_devices = 0;
 
-		/* get devices */
-		vector<cl_device_id> device_ids;
-		cl_uint num_devices;
+		for (int platform = 0; platform < num_platforms; platform++) {
+			cl_uint num_devices;
 
-		if(opencl_error(clGetDeviceIDs(cpPlatform, opencl_device_type(), 0, NULL, &num_devices)))
-			return;
+			if(opencl_error(clGetDeviceIDs(platforms[platform], opencl_device_type(), 0, NULL, &num_devices)))
+				return;
 
-		if(info.num > num_devices) {
-			if(num_devices == 0)
-				opencl_error("OpenCL: no devices found.");
-			else
-				opencl_error("OpenCL: specified device not found.");
-			return;
+			total_devices += num_devices;
+
+			if(info.num - num_base >= num_devices) {
+				/* num doesn't refer to a device in this platform */
+				num_base += num_devices;
+				continue;
+			}
+
+			/* device is in this platform */
+			cpPlatform = platforms[platform];
+
+			/* get devices */
+			vector<cl_device_id> device_ids(num_devices, NULL);
+
+			if(opencl_error(clGetDeviceIDs(cpPlatform, opencl_device_type(), num_devices, &device_ids[0], NULL)))
+				return;
+
+			cdDevice = device_ids[info.num - num_base];
+
+			char name[256];
+			clGetPlatformInfo(cpPlatform, CL_PLATFORM_NAME, sizeof(name), &name, NULL);
+			platform_name = name;
+
+			break;
 		}
 
-		device_ids.resize(num_devices);
-		
-		if(opencl_error(clGetDeviceIDs(cpPlatform, opencl_device_type(), num_devices, &device_ids[0], NULL)))
+		if(total_devices == 0) {
+			opencl_error("OpenCL: no devices found.");
 			return;
+		}
+		else if (!cdDevice) {
+			opencl_error("OpenCL: specified device not found.");
+			return;
+		}
 
-		cdDevice = device_ids[info.num];
+		/* Create context properties array to specify platform */
+		const cl_context_properties context_props[] = {
+			CL_CONTEXT_PLATFORM, (cl_context_properties)cpPlatform,
+			0, 0
+		};
 
 		/* create context */
-		cxContext = clCreateContext(0, 1, &cdDevice, NULL, NULL, &ciErr);
+		cxContext = clCreateContext(context_props, 1, &cdDevice, NULL, NULL, &ciErr);
 		if(opencl_error(ciErr))
 			return;
 
@@ -229,6 +311,9 @@
 			return;
 
 		null_mem = (device_ptr)clCreateBuffer(cxContext, CL_MEM_READ_ONLY, 1, NULL, &ciErr);
+		if(opencl_error(ciErr))
+			return;
+
 		device_initialized = true;
 	}
 
@@ -265,7 +350,7 @@
 		return true;
 	}
 
-	bool load_binary(const string& kernel_path, const string& clbin)
+	bool load_binary(const string& kernel_path, const string& clbin, const string *debug_src = NULL)
 	{
 		/* read binary into memory */
 		vector<uint8_t> binary;
@@ -288,7 +373,7 @@
 			return false;
 		}
 
-		if(!build_kernel(kernel_path))
+		if(!build_kernel(kernel_path, debug_src))
 			return false;
 
 		return true;
@@ -315,51 +400,35 @@
 		return true;
 	}
 
-	string kernel_build_options()
+	bool build_kernel(const string& kernel_path, const string *debug_src = NULL)
 	{
-		string build_options = " -cl-fast-relaxed-math ";
-		
-		if(platform_name == "NVIDIA CUDA")
-			build_options += "-D__KERNEL_OPENCL_NVIDIA__ -cl-nv-maxrregcount=24 -cl-nv-verbose ";
-
-		else if(platform_name == "Apple")
-			build_options += "-D__KERNEL_OPENCL_APPLE__ -Wno-missing-prototypes";
-
-		else if(platform_name == "AMD Accelerated Parallel Processing")
-			build_options += "-D__KERNEL_OPENCL_AMD__ ";
-
-		return build_options;
-	}
-
-	bool build_kernel(const string& kernel_path)
-	{
-		string build_options = kernel_build_options();
+		string build_options = opencl_kernel_build_options(platform_name, debug_src);
 	
 		ciErr = clBuildProgram(cpProgram, 0, NULL, build_options.c_str(), NULL, NULL);
 
-		if(ciErr != CL_SUCCESS) {
-			/* show build errors */
-			char *build_log;
-			size_t ret_val_size;
+		/* show warnings even if build is successful */
+		size_t ret_val_size = 0;
 
-			clGetProgramBuildInfo(cpProgram, cdDevice, CL_PROGRAM_BUILD_LOG, 0, NULL, &ret_val_size);
+		clGetProgramBuildInfo(cpProgram, cdDevice, CL_PROGRAM_BUILD_LOG, 0, NULL, &ret_val_size);
 
-			build_log = new char[ret_val_size+1];
-			clGetProgramBuildInfo(cpProgram, cdDevice, CL_PROGRAM_BUILD_LOG, ret_val_size, build_log, NULL);
+		if(ret_val_size > 1) {
+			vector<char> build_log(ret_val_size+1);
+			clGetProgramBuildInfo(cpProgram, cdDevice, CL_PROGRAM_BUILD_LOG, ret_val_size, &build_log[0], NULL);
 
 			build_log[ret_val_size] = '\0';
+			fprintf(stderr, "OpenCL kernel build output:\n");
+			fprintf(stderr, "%s\n", &build_log[0]);
+		}
+
+		if(ciErr != CL_SUCCESS) {
 			opencl_error("OpenCL build failed: errors in console");
-			fprintf(stderr, "%s\n", build_log);
-
-			delete[] build_log;
-
 			return false;
 		}
 
 		return true;
 	}
 
-	bool compile_kernel(const string& kernel_path, const string& kernel_md5)
+	bool compile_kernel(const string& kernel_path, const string& kernel_md5, const string *debug_src = NULL)
 	{
 		/* we compile kernels consisting of many files. unfortunately opencl
 		 * kernel caches do not seem to recognize changes in included files.
@@ -367,6 +436,9 @@
 		string source = "#include \"kernel.cl\" // " + kernel_md5 + "\n";
 		source = path_source_replace_includes(source, kernel_path);
 
+		if (debug_src)
+			path_write_text(*debug_src, source);
+
 		size_t source_len = source.size();
 		const char *source_str = source.c_str();
 
@@ -378,7 +450,7 @@
 		double starttime = time_dt();
 		printf("Compiling OpenCL kernel ...\n");
 
-		if(!build_kernel(kernel_path))
+		if(!build_kernel(kernel_path, debug_src))
 			return false;
 
 		printf("Kernel compilation finished in %.2lfs.\n", time_dt() - starttime);
@@ -401,7 +473,7 @@
 		md5.append((uint8_t*)name, strlen(name));
 		md5.append((uint8_t*)driver, strlen(driver));
 
-		string options = kernel_build_options();
+		string options = opencl_kernel_build_options(platform_name);
 		md5.append((uint8_t*)options.c_str(), options.size());
 
 		return md5.get_hex();
@@ -424,18 +496,26 @@
 		string kernel_md5 = path_files_md5_hash(kernel_path);
 		string device_md5 = device_md5_hash();
 
-		/* try to use cache binary */
+		/* path to cached binary */
 		string clbin = string_printf("cycles_kernel_%s_%s.clbin", device_md5.c_str(), kernel_md5.c_str());
 		clbin = path_user_get(path_join("cache", clbin));
 
-		if(path_exists(clbin)) {
-			/* if exists already, try use it */
-			if(!load_binary(kernel_path, clbin))
-				return false;
+		/* path to preprocessed source for debugging */
+		string clsrc, *debug_src = NULL;
+		
+		if (opencl_kernel_use_debug()) {
+			clsrc = string_printf("cycles_kernel_%s_%s.cl", device_md5.c_str(), kernel_md5.c_str());
+			clsrc = path_user_get(path_join("cache", clsrc));
+			debug_src = &clsrc;
 		}
+
+		/* if exists already, try use it */
+		if(path_exists(clbin) && load_binary(kernel_path, clbin, debug_src)) {
+			/* kernel loaded from binary */
+		}
 		else {
-			/* compile kernel */
-			if(!compile_kernel(kernel_path, kernel_md5))
+			/* if does not exist or loading binary failed, compile kernel */
+			if(!compile_kernel(kernel_path, kernel_md5, debug_src))
 				return false;
 
 			/* save binary for reuse */
@@ -461,7 +541,7 @@
 		if(null_mem)
 			clReleaseMemObject(CL_MEM_PTR(null_mem));
 
-		map<string, device_vector<uchar>*>::iterator mt;
+		ConstMemMap::iterator mt;
 		for(mt = const_mem_map.begin(); mt != const_mem_map.end(); mt++) {
 			mem_free(*(mt->second));
 			delete mt->second;
@@ -533,26 +613,29 @@
 
 	void const_copy_to(const char *name, void *host, size_t size)
 	{

@@ Diff output truncated at 10240 characters. @@



More information about the Bf-blender-cvs mailing list