[Bf-blender-cvs] SVN commit: /data/svn/bf-blender [57160] trunk/blender/intern/cycles: Cycles OpenCL: keep the opencl context and program around for quicker rendering

Brecht Van Lommel brechtvanlommel at pandora.be
Fri May 31 18:19:03 CEST 2013


Revision: 57160
          http://projects.blender.org/scm/viewvc.php?view=rev&root=bf-blender&revision=57160
Author:   blendix
Date:     2013-05-31 16:19:03 +0000 (Fri, 31 May 2013)
Log Message:
-----------
Cycles OpenCL: keep the opencl context and program around for quicker rendering
the second time, as for example Intel CPU startup time is 9 seconds.

* Adds an cache for contexts and programs for each platform and device pair,
  which also ensure now no two threads try to compile and write the binary cache
  file at the same time.
* Change clFinish to clFlush so we don't block until the result is done, instead
  it will block at the moment we copy back memory.
* Fix error in Cycles time_sleep implementation, does not affect any active code
  though.
* Adds some (disabled) debugging code in the task scheduler.

Patch #35559 by Doug Gale.

Modified Paths:
--------------
    trunk/blender/intern/cycles/device/device_opencl.cpp
    trunk/blender/intern/cycles/util/util_opencl.cpp
    trunk/blender/intern/cycles/util/util_task.cpp
    trunk/blender/intern/cycles/util/util_time.cpp

Modified: trunk/blender/intern/cycles/device/device_opencl.cpp
===================================================================
--- trunk/blender/intern/cycles/device/device_opencl.cpp	2013-05-31 15:48:42 UTC (rev 57159)
+++ trunk/blender/intern/cycles/device/device_opencl.cpp	2013-05-31 16:19:03 UTC (rev 57160)
@@ -104,12 +104,194 @@
 	if(opencl_kernel_use_debug())
 		build_options += "-D__KERNEL_OPENCL_DEBUG__ ";
 
-	if (opencl_kernel_use_advanced_shading(platform))
+	if(opencl_kernel_use_advanced_shading(platform))
 		build_options += "-D__KERNEL_OPENCL_NEED_ADVANCED_SHADING__ ";
 	
 	return build_options;
 }
 
+/* thread safe cache for contexts and programs */
+class OpenCLCache
+{
+	struct Slot
+	{
+		thread_mutex *mutex;
+		cl_context context;
+		cl_program program;
+
+		Slot() : mutex(NULL), context(NULL), program(NULL) {}
+
+		Slot(const Slot &rhs)
+			: mutex(rhs.mutex)
+			, context(rhs.context)
+			, program(rhs.program)
+		{
+			/* copy can only happen in map insert, assert that */
+			assert(mutex == NULL);
+		}
+
+		~Slot()
+		{
+			delete mutex;
+			mutex = NULL;
+		}
+	};
+
+	/* key is combination of platform ID and device ID */
+	typedef pair<cl_platform_id, cl_device_id> PlatformDevicePair;
+
+	/* map of Slot objects */
+	typedef map<PlatformDevicePair, Slot> CacheMap;
+	CacheMap cache;
+
+	thread_mutex cache_lock;
+
+	/* lazy instantiate */
+	static OpenCLCache &global_instance()
+	{
+		static OpenCLCache instance;
+		return instance;
+	}
+
+	OpenCLCache()
+	{
+	}
+
+	~OpenCLCache()
+	{
+		/* Intel OpenCL bug raises SIGABRT due to pure virtual call
+		 * so this is disabled. It's not necessary to free objects
+		 * at process exit anyway.
+		 * http://software.intel.com/en-us/forums/topic/370083#comments */
+
+		//flush();
+	}
+
+	/* lookup something in the cache. If this returns NULL, slot_locker
+	 * will be holding a lock for the cache. slot_locker should refer to a
+	 * default constructed thread_scoped_lock */
+	template<typename T>
+	static T get_something(cl_platform_id platform, cl_device_id device,
+		T Slot::*member, cl_int (*retain_func)(T), thread_scoped_lock &slot_locker)
+	{
+		assert(platform != NULL);
+
+		OpenCLCache &self = global_instance();
+
+		thread_scoped_lock cache_lock(self.cache_lock);
+
+		pair<CacheMap::iterator,bool> ins = self.cache.insert(
+			CacheMap::value_type(PlatformDevicePair(platform, device), Slot()));
+
+		Slot &slot = ins.first->second;
+
+		/* create slot lock only while holding cache lock */
+		if(!slot.mutex)
+			slot.mutex = new thread_mutex;
+
+		/* need to unlock cache before locking slot, to allow store to complete */
+		cache_lock.unlock();
+
+		/* lock the slot */
+		slot_locker = thread_scoped_lock(*slot.mutex);
+
+		/* If the thing isn't cached */
+		if(slot.*member == NULL) {
+			/* return with the caller's lock holder holding the slot lock */
+			return NULL;
+		}
+
+		/* the item was already cached, release the slot lock */
+		slot_locker.unlock();
+
+		/* caller is going to release it when done with it, so retain it */
+		cl_int ciErr = retain_func(slot.*member);
+		assert(ciErr == CL_SUCCESS);
+		(void)ciErr;
+
+		return slot.*member;
+	}
+
+	/* store something in the cache. you MUST have tried to get the item before storing to it */
+	template<typename T>
+	static void store_something(cl_platform_id platform, cl_device_id device, T thing,
+		T Slot::*member, cl_int (*retain_func)(T), thread_scoped_lock &slot_locker)
+	{
+		assert(platform != NULL);
+		assert(device != NULL);
+		assert(thing != NULL);
+
+		OpenCLCache &self = global_instance();
+
+		thread_scoped_lock cache_lock(self.cache_lock);
+		CacheMap::iterator i = self.cache.find(PlatformDevicePair(platform, device));
+		cache_lock.unlock();
+
+		Slot &slot = i->second;
+
+		/* sanity check */
+		assert(i != self.cache.end());
+		assert(slot.*member == NULL);
+
+		slot.*member = thing;
+
+		/* unlock the slot */
+		slot_locker.unlock();
+
+		/* increment reference count in OpenCL.
+		 * The caller is going to release the object when done with it. */
+		cl_int ciErr = retain_func(thing);
+		assert(ciErr == CL_SUCCESS);
+		(void)ciErr;
+	}
+
+public:
+	/* see get_something comment */
+	static cl_context get_context(cl_platform_id platform, cl_device_id device,
+		thread_scoped_lock &slot_locker)
+	{
+		return get_something(platform, device, &Slot::context, clRetainContext, slot_locker);
+	}
+
+	/* see get_something comment */
+	static cl_program get_program(cl_platform_id platform, cl_device_id device,
+		thread_scoped_lock &slot_locker)
+	{
+		return get_something(platform, device, &Slot::program, clRetainProgram, slot_locker);
+	}
+
+	/* see store_something comment */
+	static void store_context(cl_platform_id platform, cl_device_id device, cl_context context,
+		thread_scoped_lock &slot_locker)
+	{
+		store_something(platform, device, context, &Slot::context, clRetainContext, slot_locker);
+	}
+
+	/* see store_something comment */
+	static void store_program(cl_platform_id platform, cl_device_id device, cl_program program,
+		thread_scoped_lock &slot_locker)
+	{
+		store_something(platform, device, program, &Slot::program, clRetainProgram, slot_locker);
+	}
+
+	/* discard all cached contexts and programs
+	 * the parameter is a temporary workaround. See OpenCLCache::~OpenCLCache */
+	static void flush()
+	{
+		OpenCLCache &self = global_instance();
+		thread_scoped_lock cache_lock(self.cache_lock);
+
+		foreach(CacheMap::value_type &item, self.cache) {
+			if(item.second.program != NULL)
+				clReleaseProgram(item.second.program);
+			if(item.second.context != NULL)
+				clReleaseContext(item.second.context);
+		}
+
+		self.cache.clear();
+	}
+};
+
 class OpenCLDevice : public Device
 {
 public:
@@ -290,22 +472,35 @@
 			opencl_error("OpenCL: no devices found.");
 			return;
 		}
-		else if (!cdDevice) {
+		else if(!cdDevice) {
 			opencl_error("OpenCL: specified device not found.");
 			return;
 		}
 
-		/* Create context properties array to specify platform */
-		const cl_context_properties context_props[] = {
-			CL_CONTEXT_PLATFORM, (cl_context_properties)cpPlatform,
-			0, 0
-		};
+		{
+			/* try to use cached context */
+			thread_scoped_lock cache_locker;
+			cxContext = OpenCLCache::get_context(cpPlatform, cdDevice, cache_locker);
 
-		/* create context */
-		cxContext = clCreateContext(context_props, 1, &cdDevice, NULL, NULL, &ciErr);
-		if(opencl_error(ciErr))
-			return;
+			if(cxContext == NULL) {
+				/* create context properties array to specify platform */
+				const cl_context_properties context_props[] = {
+					CL_CONTEXT_PLATFORM, (cl_context_properties)cpPlatform,
+					0, 0
+				};
 
+				/* create context */
+				cxContext = clCreateContext(context_props, 1, &cdDevice,
+					context_notify_callback, cdDevice, &ciErr);
+
+				if(opencl_error(ciErr))
+					return;
+
+				/* cache it */
+				OpenCLCache::store_context(cpPlatform, cdDevice, cxContext, cache_locker);
+			}
+		}
+
 		cqCommandQueue = clCreateCommandQueue(cxContext, cdDevice, 0, &ciErr);
 		if(opencl_error(ciErr))
 			return;
@@ -317,6 +512,15 @@
 		device_initialized = true;
 	}
 
+	static void context_notify_callback(const char *err_info,
+		const void *private_info, size_t cb, void *user_data)
+	{
+		char name[256];
+		clGetDeviceInfo((cl_device_id)user_data, CL_DEVICE_NAME, sizeof(name), &name, NULL);
+
+		fprintf(stderr, "OpenCL error (%s): %s\n", name, err_info);
+	}
+
 	bool opencl_version_check()
 	{
 		char version[256];
@@ -436,7 +640,7 @@
 		string source = "#include \"kernel.cl\" // " + kernel_md5 + "\n";
 		source = path_source_replace_includes(source, kernel_path);
 
-		if (debug_src)
+		if(debug_src)
 			path_write_text(*debug_src, source);
 
 		size_t source_len = source.size();
@@ -487,39 +691,49 @@
 			return false;
 		}
 
-		/* verify we have right opencl version */
-		if(!opencl_version_check())
-			return false;
+		/* try to use cached kernel */
+		thread_scoped_lock cache_locker;
+		cpProgram = OpenCLCache::get_program(cpPlatform, cdDevice, cache_locker);
 
-		/* md5 hash to detect changes */
-		string kernel_path = path_get("kernel");
-		string kernel_md5 = path_files_md5_hash(kernel_path);
-		string device_md5 = device_md5_hash();
+		if(!cpProgram) {
+			/* verify we have right opencl version */
+			if(!opencl_version_check())
+				return false;
 
-		/* path to cached binary */
-		string clbin = string_printf("cycles_kernel_%s_%s.clbin", device_md5.c_str(), kernel_md5.c_str());
-		clbin = path_user_get(path_join("cache", clbin));
+			/* md5 hash to detect changes */
+			string kernel_path = path_get("kernel");
+			string kernel_md5 = path_files_md5_hash(kernel_path);
+			string device_md5 = device_md5_hash();
 
-		/* path to preprocessed source for debugging */
-		string clsrc, *debug_src = NULL;
-		
-		if (opencl_kernel_use_debug()) {
-			clsrc = string_printf("cycles_kernel_%s_%s.cl", device_md5.c_str(), kernel_md5.c_str());
-			clsrc = path_user_get(path_join("cache", clsrc));
-			debug_src = &clsrc;
-		}
+			/* path to cached binary */
+			string clbin = string_printf("cycles_kernel_%s_%s.clbin", device_md5.c_str(), kernel_md5.c_str());
+			clbin = path_user_get(path_join("cache", clbin));
 
-		/* if exists already, try use it */
-		if(path_exists(clbin) && load_binary(kernel_path, clbin, debug_src)) {
-			/* kernel loaded from binary */
-		}
-		else {
-			/* if does not exist or loading binary failed, compile kernel */
-			if(!compile_kernel(kernel_path, kernel_md5, debug_src))
-				return false;
+			/* path to preprocessed source for debugging */
+			string clsrc, *debug_src = NULL;
 
-			/* save binary for reuse */
-			save_binary(clbin);
+			if(opencl_kernel_use_debug()) {
+				clsrc = string_printf("cycles_kernel_%s_%s.cl", device_md5.c_str(), kernel_md5.c_str());
+				clsrc = path_user_get(path_join("cache", clsrc));
+				debug_src = &clsrc;
+			}
+
+			/* if exists already, try use it */
+			if(path_exists(clbin) && load_binary(kernel_path, clbin, debug_src)) {
+				/* kernel loaded from binary */
+			}
+			else {
+				/* if does not exist or loading binary failed, compile kernel */
+				if(!compile_kernel(kernel_path, kernel_md5, debug_src))
+					return false;
+
+				/* save binary for reuse */
+				if(!save_binary(clbin))
+					return false;
+			}
+
+			/* cache the program */
+			OpenCLCache::store_program(cpPlatform, cdDevice, cpProgram, cache_locker);
 		}
 
 		/* find kernels */
@@ -563,13 +777,18 @@
 	{
 		size_t size = mem.memory_size();
 
+		cl_mem_flags mem_flag;
+		void *mem_ptr = NULL;
+
 		if(type == MEM_READ_ONLY)

@@ Diff output truncated at 10240 characters. @@



More information about the Bf-blender-cvs mailing list