[Bf-blender-cvs] [5a39aaf] cycles_kernel_split: Cycles kernel split: Move device requested features to own struct

Thu May 7 15:54:03 CEST 2015

Commit: 5a39aaf64b4d3e54b048ef4c2bace751046391b8
Author: Sergey Sharybin
Date:   Thu May 7 18:15:33 2015 +0500
Branches: cycles_kernel_split
https://developer.blender.org/rB5a39aaf64b4d3e54b048ef4c2bace751046391b8

Cycles kernel split: Move device requested features to own struct

Previously it was all stored in the device itself, which is not really good
practice. Now all parameters which are related on what exact kernel with
what feature set to load are wrapped into own struct which is passed to
load_kenrels() function.

Network render part is not really tested and likely is broken.

===================================================================

M	intern/cycles/device/device.h
M	intern/cycles/device/device_cuda.cpp
M	intern/cycles/device/device_multi.cpp
M	intern/cycles/device/device_network.cpp
M	intern/cycles/device/device_opencl.cpp
M	intern/cycles/render/session.cpp
M	intern/cycles/render/session.h

===================================================================

diff --git a/intern/cycles/device/device.h b/intern/cycles/device/device.h
index 3bd867b..2702b79 100644
--- a/intern/cycles/device/device.h
+++ b/intern/cycles/device/device.h
@@ -72,6 +72,43 @@ public:
 	}
 };
 
+class DeviceRequestedFeatures {
+public:
+	/* Use experimental feature set. */
+	bool experimental;
+
+	/* Maximum number of closures in shader trees. */
+	int max_closure;
+
+	/* Selective nodes compilation. */
+
+	/* Identifier of a node group up to which all the nodes needs to be
+	 * compiled in. Nodes from higher group indices will be ignores.
+	 */
+	int max_nodes_group;
+
+	/* Features bitfield indicating which features from the requested group
+	 * will be compiled in. Nodes which corresponds to features which are not
+	 * in this bitfield will be ignored even if they're in the requested group.
+	 */
+	int nodes_features;
+
+	DeviceRequestedFeatures()
+	{
+		/* TODO(sergey): Find more meaningful defaults. */
+		max_closure = 0;
+		max_nodes_group = 0;
+		nodes_features = 0;
+	}
+
+	bool modified(const DeviceRequestedFeatures& requested_features)
+	{
+		return !(max_closure == requested_features.max_closure &&
+		         max_nodes_group == requested_features.max_nodes_group &&
+		         nodes_features == requested_features.nodes_features);
+	}
+};
+
 /* Device */
 
 struct DeviceDrawParams {
@@ -97,15 +134,6 @@ public:
 	/* statistics */
 	Stats &stats;
 
-	/* TODO(sergey): Move this to RequestedFeatureset argument of
-	 * load_kernels method.
-	 */
-	/* variables/functions used exclusively for split kernel */
-	/* Maximum closure count */
-	int clos_max;
-	int nodes_max_group;
-	int nodes_features;
-
 	/* regular memory */
 	virtual void mem_alloc(device_memory& mem, MemoryType type) = 0;
 	virtual void mem_copy_to(device_memory& mem) = 0;
@@ -137,7 +165,9 @@ public:
 	virtual void *osl_memory() { return NULL; }
 
 	/* load/compile kernels, must be called before adding tasks */ 
-	virtual bool load_kernels(bool /*experimental*/) { return true; }
+	virtual bool load_kernels(
+	        const DeviceRequestedFeatures& /*requested_features*/)
+	{ return true; }
 
 	/* tasks */
 	virtual int get_split_task_count(DeviceTask& task) = 0;
diff --git a/intern/cycles/device/device_cuda.cpp b/intern/cycles/device/device_cuda.cpp
index 9d3b16b..04319ba 100644
--- a/intern/cycles/device/device_cuda.cpp
+++ b/intern/cycles/device/device_cuda.cpp
@@ -309,18 +309,18 @@ public:
 		return cubin;
 	}
 
-	bool load_kernels(bool experimental)
+	bool load_kernels(const DeviceRequestedFeatures& requested_features)
 	{
 		/* check if cuda init succeeded */
 		if(cuContext == 0)
 			return false;
 		
 		/* check if GPU is supported */
-		if(!support_device(experimental))
+		if(!support_device(requested_features.experimental))
 			return false;
 
 		/* get kernel */
-		string cubin = compile_kernel(experimental);
+		string cubin = compile_kernel(requested_features.experimental);
 
 		if(cubin == "")
 			return false;
diff --git a/intern/cycles/device/device_multi.cpp b/intern/cycles/device/device_multi.cpp
index 3fb3b1f..b41ec99 100644
--- a/intern/cycles/device/device_multi.cpp
+++ b/intern/cycles/device/device_multi.cpp
@@ -89,19 +89,11 @@ public:
 		return error_msg;
 	}
 
-	bool load_kernels(bool experimental)
+	bool load_kernels(const DeviceRequestedFeatures& requested_features)
 	{
-		foreach(SubDevice& sub, devices) {
-
-			/* Update devic's clos_max; used in split kernel */
-			/* TODO(sergey): Get rid of this. */
-			sub.device->clos_max = clos_max;
-			sub.device->nodes_max_group = nodes_max_group;
-			sub.device->nodes_features = nodes_features;
-
-			if(!sub.device->load_kernels(experimental))
+		foreach(SubDevice& sub, devices)
+			if(!sub.device->load_kernels(requested_features))
 				return false;
-		}
 		return true;
 	}
 
diff --git a/intern/cycles/device/device_network.cpp b/intern/cycles/device/device_network.cpp
index 90cd6a7..ca6d668 100644
--- a/intern/cycles/device/device_network.cpp
+++ b/intern/cycles/device/device_network.cpp
@@ -196,7 +196,7 @@ public:
 		}
 	}
 
-	bool load_kernels(bool experimental)
+	bool load_kernels(const DeviceRequestedFeatures& requested_features)
 	{
 		if(error_func.have_error())
 			return false;
@@ -204,7 +204,10 @@ public:
 		thread_scoped_lock lock(rpc_lock);
 
 		RPCSend snd(socket, &error_func, "load_kernels");
-		snd.add(experimental);
+		snd.add(requested_features.experimental);
+		snd.add(requested_features.max_closure);
+		snd.add(requested_features.max_nodes_group);
+		snd.add(requested_features.nodes_features);
 		snd.write();
 
 		bool result;
@@ -607,11 +610,14 @@ protected:
 			device->tex_free(mem);
 		}
 		else if(rcv.name == "load_kernels") {
-			bool experimental;
-			rcv.read(experimental);
+			DeviceRequestedFeatures requested_features;
+			rcv.read(requested_features.experimental);
+			rcv.read(requested_features.max_closure);
+			rcv.read(requested_features.max_nodes_group);
+			rcv.read(requested_features.nodes_features);
 
 			bool result;
-			result = device->load_kernels(experimental);
+			result = device->load_kernels(requested_features);
 			RPCSend snd(socket, &error_func, "load_kernels");
 			snd.add(result);
 			snd.write();
diff --git a/intern/cycles/device/device_opencl.cpp b/intern/cycles/device/device_opencl.cpp
index ec271d5..3e89192 100644
--- a/intern/cycles/device/device_opencl.cpp
+++ b/intern/cycles/device/device_opencl.cpp
@@ -883,7 +883,7 @@ public:
 	}
 
 
-	bool load_kernels(bool /*experimental*/)
+	bool load_kernels(const DeviceRequestedFeatures& /*requested_features*/)
 	{
 		/* verify if device was initialized */
 		if(!device_initialized) {
@@ -1121,7 +1121,7 @@ public:
 		path_trace_program = NULL;
 	}
 
-	bool load_kernels(bool /*experimental*/)
+	bool load_kernels(const DeviceRequestedFeatures& requested_features)
 	{
 		/* verify if device was initialized */
 		if(!device_initialized) {
@@ -1130,7 +1130,7 @@ public:
 		}
 
 		/* Get Shader, bake and film convert kernels */
-		if(!OpenCLDeviceBase::load_kernels(false)) {
+		if(!OpenCLDeviceBase::load_kernels(requested_features)) {
 			return false;
 		}
 
@@ -1777,7 +1777,7 @@ public:
 		return shader_soa_size;
 	}
 
-	bool load_kernels(bool /*experimental*/)
+	bool load_kernels(const DeviceRequestedFeatures& requested_features)
 	{
 		/* verify if device was initialized */
 		if(!device_initialized) {
@@ -1788,20 +1788,22 @@ public:
 		/* if it is an interactive render; we ceil clos_max value to a multiple of 5 in order
 		* to limit re-compilations
 		*/
+		/* TODO(sergey): Decision about this should be done on higher levels. */
+		int max_closure = requested_features.max_closure;
 		if(!background) {
-			assert((clos_max != 0) && "clos_max value is 0" );
-			clos_max = (((clos_max - 1) / 5) + 1) * 5;
+			assert((max_closure != 0) && "clos_max value is 0" );
+			max_closure = (((max_closure - 1) / 5) + 1) * 5;
 			/* clos_max value shouldn't be greater than MAX_CLOSURE */
-			clos_max = (clos_max > MAX_CLOSURE) ? MAX_CLOSURE : clos_max;
+			max_closure = (max_closure > MAX_CLOSURE) ? MAX_CLOSURE : max_closure;
 
-			if(current_clos_max == clos_max) {
+			if(current_clos_max == max_closure) {
 				/* present kernels have been created with the same closure count build option */
 				return true;
 			}
 		}
 
 		/* Get Shader, bake and film_convert kernels */
-		if(!OpenCLDeviceBase::load_kernels(false)) {
+		if(!OpenCLDeviceBase::load_kernels(requested_features)) {
 			return false;
 		}
 
@@ -1810,11 +1812,11 @@ public:
 		string compute_device_type_build_option = "";
 
 		/* Set svm_build_options */
-		svm_build_options += " -D__NODES_MAX_GROUP__=" + string_printf("%d", nodes_max_group);
-		svm_build_options += " -D__NODES_FEATURES__=" + string_printf("%d", nodes_features);
+		svm_build_options += " -D__NODES_MAX_GROUP__=" + string_printf("%d", requested_features.max_nodes_group);
+		svm_build_options += " -D__NODES_FEATURES__=" + string_printf("%d", requested_features.nodes_features);
 		/* Set max closure build option */
 #ifdef __MULTI_CLOSURE__
-		max_closure_build_option += string_printf("-DMAX_CLOSURE=%d ", clos_max);
+		max_closure_build_option += string_printf("-DMAX_CLOSURE=%d ", max_closure);
 #endif
 
 		/* Set compute device build option */
@@ -1915,7 +1917,7 @@ public:
 		if(!load_split_kernel(kernel_path, kernel_init_source, clbin, custom_kernel_build_options, &sumAllRadiance_program))
 			return false;
 
-		current_clos_max = clos_max;
+		current_clos_max = max_closure;
 
 		/* find kernels */
 		ckPathTraceKernel_DataInit = clCreateKernel(dataInit_program, "kernel_ocl_path_trace_data_initialization_SPLIT_KERNEL", &ciErr);
@@ -2338,7 +2340,7 @@ public:
 			size_t num_global_elements = max_render_feasible_tile_size.x * max_render_feasible_tile_size.y;
 
 #ifdef __MULTI_CLOSURE__
-			size_t ShaderClosure_size = get_shader_closure_size(clos_max);
+			size_t ShaderClosure_size = get_shader_closure_size(current_clos_max);
 #else
 			size_t ShaderClosure_size = get_shader_closure_size(MAX_CLOSURE);
 #endif
@@ -2915,7 +2917,7 @@ public:
 		size_t shaderdata_volume = 0;
 
 #ifdef __MULTI_CLOSURE__
-		shader_closure_size = get_shader_closure_size(clos_max);
+		shader_closure_size = get_shader_closure_size(current_clos_max);
 #else
 		shader_closure_size = get_shader_closure_size(MAX_CLOSURE);
 #endif
diff --git a/intern/cycles/render/session.cpp b/intern/cycles/render/session.cpp
index b5c988e..6d22deb 100644
--- a/intern/cycles/render/session.cpp
+++ b/intern/cycles/render/session.cpp
@@ -598,6 +598,25 @@ void Session::run_cpu()
 		update_progressive_refine(true);
 }
 
+DeviceRequestedFeatures Session::get_requested_device_features()
+{
+	DeviceRequestedFeatures requested_features;
+	requested_features.experimental = params.experimental;
+	if(!params.background) {
+		requested_features.max_closure = 64;
+		requested_features.max_nodes_group = NODE_GROUP_LEVEL_2;
+		requested_features.nodes_features = NODE_FEATURE_ALL;
+	}
+	else {
+		requested_features.max_closure = get_max_closure_count();
+		scene->shader_manager->get_requested_features(
+		        scene,
+		        requested_features.max_nodes_group,
+		        requested_features.nodes_features);
+	}
+	return requested_features;
+}
+


@@ Diff output truncated at 10240 characters. @@