[Bf-blender-cvs] [43e8439] cycles_kernel_split: Use OpenCLCache for megakernel

varunsundar08 noreply at git.blender.org
Thu Apr 30 23:25:27 CEST 2015

Commit: 43e843966b0f4174105e10eaa1ea3c0d919c31d6
Author: varunsundar08
Date:   Thu Apr 30 14:38:41 2015 +0530
Branches: cycles_kernel_split

Use OpenCLCache for megakernel


M	intern/cycles/device/device_opencl.cpp
M	intern/cycles/kernel/kernel.cl


diff --git a/intern/cycles/device/device_opencl.cpp b/intern/cycles/device/device_opencl.cpp
index b1392fa..6bb862e 100644
--- a/intern/cycles/device/device_opencl.cpp
+++ b/intern/cycles/device/device_opencl.cpp
@@ -123,11 +123,11 @@ static string opencl_kernel_build_options(const string& platform, const string *
 		build_options += "-D__KERNEL_OPENCL_AMD__ ";
 	else if(platform == "Intel(R) OpenCL") {
-		build_options += "-D__KERNEL_OPENCL_INTEL_CPU__";
+		build_options += "-D__KERNEL_OPENCL_INTEL_CPU__ ";
 		/* options for gdb source level kernel debugging. this segfaults on linux currently */
 		if(opencl_kernel_use_debug() && debug_src)
-			build_options += "-g -s \"" + *debug_src + "\"";
+			build_options += "-g -s \"" + *debug_src + "\" ";
@@ -147,14 +147,18 @@ class OpenCLCache
 		thread_mutex *mutex;
 		cl_context context;
-		cl_program program;
+		/* cl_program for shader, bake, film_convert kernels (used in OpenCLDeviceBase) */
+		cl_program ocl_dev_base_program;
+		/* cl_program for megakernel (used in OpenCLDeviceMegaKernel) */
+		cl_program ocl_dev_megakernel_program;
-		Slot() : mutex(NULL), context(NULL), program(NULL) {}
+		Slot() : mutex(NULL), context(NULL), ocl_dev_base_program(NULL), ocl_dev_megakernel_program(NULL) {}
 		Slot(const Slot &rhs)
 			: mutex(rhs.mutex)
 			, context(rhs.context)
-			, program(rhs.program)
+			, ocl_dev_base_program(rhs.ocl_dev_base_program)
+			, ocl_dev_megakernel_program(rhs.ocl_dev_megakernel_program)
 			/* copy can only happen in map insert, assert that */
 			assert(mutex == NULL);
@@ -283,10 +287,21 @@ public:
 	/* see get_something comment */
-	static cl_program get_program(cl_platform_id platform, cl_device_id device,
+	static cl_program get_program(cl_platform_id platform, cl_device_id device, string program_name,
 		thread_scoped_lock &slot_locker)
-		cl_program program = get_something<cl_program>(platform, device, &Slot::program, slot_locker);
+		cl_program program = NULL;
+		if (program_name == "ocl_dev_base_program") {
+			/* Get program related to OpenCLDeviceBase */
+			program = get_something<cl_program>(platform, device, &Slot::ocl_dev_base_program, slot_locker);
+		}
+		else if (program_name == "ocl_dev_megakernel_program") {
+			/* Get program related to megakernel */
+			program = get_something<cl_program>(platform, device, &Slot::ocl_dev_megakernel_program, slot_locker);
+		} else {
+			fprintf(stderr, "Invalid program name in OpenCLCache \n");
+		}
 			return NULL;
@@ -313,10 +328,18 @@ public:
 	/* see store_something comment */
-	static void store_program(cl_platform_id platform, cl_device_id device, cl_program program,
+	static void store_program(cl_platform_id platform, cl_device_id device, cl_program program, string program_name,
 		thread_scoped_lock &slot_locker)
-		store_something<cl_program>(platform, device, program, &Slot::program, slot_locker);
+		if (program_name == "ocl_dev_base_program") {
+			store_something<cl_program>(platform, device, program, &Slot::ocl_dev_base_program, slot_locker);
+		}
+		else if(program_name == "ocl_dev_megakernel_program") {
+			store_something<cl_program>(platform, device, program, &Slot::ocl_dev_megakernel_program, slot_locker);
+		} else {
+			fprintf(stderr, "Invalid program name in OpenCLCache \n");
+			return;
+		}
 		/* increment reference count in OpenCL.
 		 * The caller is going to release the object when done with it. */
@@ -333,9 +356,11 @@ public:
 		thread_scoped_lock cache_lock(self.cache_lock);
 		foreach(CacheMap::value_type &item, self.cache) {
-			if(item.second.program != NULL)
-				clReleaseProgram(item.second.program);
-			if(item.second.context != NULL)
+			if (item.second.ocl_dev_base_program != NULL)
+				clReleaseProgram(item.second.ocl_dev_base_program);
+			if (item.second.ocl_dev_megakernel_program != NULL)
+				clReleaseProgram(item.second.ocl_dev_megakernel_program);
+			if (item.second.context != NULL)
@@ -733,6 +758,199 @@ public:
+	bool load_binary(cl_program *program, const string& kernel_path, const string& clbin, string custom_kernel_build_options, const string *debug_src = NULL)
+	{
+		/* read binary into memory */
+		vector<uint8_t> binary;
+		if (!path_read_binary(clbin, binary)) {
+			opencl_error(string_printf("OpenCL failed to read cached binary %s.", clbin.c_str()));
+			return false;
+		}
+		/* create program */
+		cl_int status;
+		size_t size = binary.size();
+		const uint8_t *bytes = &binary[0];
+		*program = clCreateProgramWithBinary(cxContext, 1, &cdDevice,
+			&size, &bytes, &status, &ciErr);
+		if (opencl_error(status) || opencl_error(ciErr)) {
+			opencl_error(string_printf("OpenCL failed create program from cached binary %s.", clbin.c_str()));
+			return false;
+		}
+		if (!build_kernel(kernel_path, program, custom_kernel_build_options, debug_src))
+			return false;
+		return true;
+	}
+	bool save_binary(cl_program *program, const string& clbin) {
+		size_t size = 0;
+		clGetProgramInfo(*program, CL_PROGRAM_BINARY_SIZES, sizeof(size_t), &size, NULL);
+		if (!size)
+			return false;
+		vector<uint8_t> binary(size);
+		uint8_t *bytes = &binary[0];
+		clGetProgramInfo(*program, CL_PROGRAM_BINARIES, sizeof(uint8_t*), &bytes, NULL);
+		if (!path_write_binary(clbin, binary)) {
+			opencl_error(string_printf("OpenCL failed to write cached binary %s.", clbin.c_str()));
+			return false;
+		}
+		return true;
+	}
+	bool build_kernel(const string& /*kernel_path*/,
+		cl_program *kernel_program,
+		string custom_kernel_build_options,
+		const string *debug_src = NULL)
+	{
+		string build_options;
+		build_options = opencl_kernel_build_options(platform_name, debug_src) + custom_kernel_build_options;
+		ciErr = clBuildProgram(*kernel_program, 0, NULL, build_options.c_str(), NULL, NULL);
+		/* show warnings even if build is successful */
+		size_t ret_val_size = 0;
+		clGetProgramBuildInfo(*kernel_program, cdDevice, CL_PROGRAM_BUILD_LOG, 0, NULL, &ret_val_size);
+		if (ret_val_size > 1) {
+			vector<char> build_log(ret_val_size + 1);
+			clGetProgramBuildInfo(*kernel_program, cdDevice, CL_PROGRAM_BUILD_LOG, ret_val_size, &build_log[0], NULL);
+			build_log[ret_val_size] = '\0';
+			fprintf(stderr, "OpenCL kernel build output:\n");
+			fprintf(stderr, "%s\n", &build_log[0]);
+		}
+		if (ciErr != CL_SUCCESS) {
+			opencl_error("OpenCL build failed: errors in console");
+			return false;
+		}
+		return true;
+	}
+	bool compile_kernel(const string& kernel_path,
+		const string& /*kernel_name*/,
+		string source,
+		cl_program *kernel_program,
+		string custom_kernel_build_options,
+		const string *debug_src = NULL)
+	{
+		/* we compile kernels consisting of many files. unfortunately opencl
+		* kernel caches do not seem to recognize changes in included files.
+		* so we force recompile on changes by adding the md5 hash of all files */
+		source = path_source_replace_includes(source, kernel_path);
+		if (debug_src)
+			path_write_text(*debug_src, source);
+		size_t source_len = source.size();
+		const char *source_str = source.c_str();
+		*kernel_program = clCreateProgramWithSource(cxContext, 1, &source_str, &source_len, &ciErr);
+		if (opencl_error(ciErr))
+			return false;
+		double starttime = time_dt();
+		printf("Compiling OpenCL kernel ...\n");
+		if (!build_kernel(kernel_path, kernel_program, custom_kernel_build_options, debug_src))
+			return false;
+		printf("Kernel compilation finished in %.2lfs.\n", time_dt() - starttime);
+		return true;
+	}
+	bool load_kernels(bool /*experimental*/)
+	{
+		/* verify if device was initialized */
+		if (!device_initialized) {
+			fprintf(stderr, "OpenCL: failed to initialize device.\n");
+			return false;
+		}
+		/* try to use cached kernel */
+		thread_scoped_lock cache_locker;
+		cpProgram = OpenCLCache::get_program(cpPlatform, cdDevice, "ocl_dev_base_program", cache_locker);
+		if (!cpProgram) {
+			/* verify we have right opencl version */
+			if (!opencl_version_check())
+				return false;
+			/* md5 hash to detect changes */
+			string kernel_path = path_get("kernel");
+			string kernel_md5 = path_files_md5_hash(kernel_path);
+			string custom_kernel_build_options = "";
+			string device_md5 = device_md5_hash(custom_kernel_build_options);
+			/* path to cached binary */
+			string clbin = string_printf("cycles_kernel_%s_%s.clbin", device_md5.c_str(), kernel_md5.c_str());
+			clbin = path_user_get(path_join("cache", clbin));
+			/* path to preprocessed source for debugging */
+			string clsrc, *debug_src = NULL;
+			if (opencl_kernel_use_debug()) {
+				clsrc = string_printf("cycles_kernel_%s_%s.cl", device_md5.c_str(), kernel_md5.c_str());
+				clsrc = path_user_get(path_join("cache", clsrc));
+				debug_src = &clsrc;
+			}
+			/* if exists already, try use it */
+			if (path_exists(clbin) && load_binary(&cpProgram, kernel_path, clbin, custom_kernel_build_options, debug_src)) {
+				/* kernel loaded from binary */
+			}
+			else {
+				string init_kernel_source = "#include \"kernel.cl\" // " + kernel_md5 + "\n";
+				/* if does not exist or loading binary failed, compile kernel */
+				if (!compile_kernel(kernel_path, "", init_kernel_source, &cpProgram, custom_kernel_build_options, debug_src))
+					return false;
+				/* save binary for reuse */
+				if (!save_binary(&cpProgram, clbin))
+					return false;
+			}
+			/* cache the program */
+			OpenCLCache::store_program(cpPlatform, cdDevice, cpProgram, "ocl_dev_base_program", cache_locker);
+		}
+		/* find kernels */
+		ckShaderKernel = clCreateKernel(cpProgram, "kernel_ocl_shader", &ciErr);
+		if (opencl_error(ciErr))
+			return false;
+		ckBakeKernel = clCreateKernel(cpProgram, "kernel_ocl_bake", &ciErr);
+		if (opencl_error(ciErr))
+			return false;
+		ckFilmConvertByteKernel = clCreateKernel(cpProgram, "kernel_ocl_convert_to_byte", &ciErr);
+		if (opencl_error(ciErr))
+			return false;
+		ckFilmConvertHalfFloatKernel = clCreateKernel(cpProgram, "kernel_ocl_convert_to_half_float", &ciErr);
+		if (opencl_error(ciErr))
+			return false;
+		retu

@@ Diff output truncated at 10240 characters. @@

More information about the Bf-blender-cvs mailing list