[Bf-blender-cvs] [ef863d6] cycles_kernel_split: Add OpenCLDeviceSplitKernel class

varunsundar08 noreply at git.blender.org
Thu Apr 30 23:24:59 CEST 2015


Commit: ef863d644ff3fcafb614d351723bc2d997dbf068
Author: varunsundar08
Date:   Mon Apr 27 19:10:49 2015 +0530
Branches: cycles_kernel_split
https://developer.blender.org/rBef863d644ff3fcafb614d351723bc2d997dbf068

Add OpenCLDeviceSplitKernel class

===================================================================

M	intern/cycles/device/device_opencl.cpp

===================================================================

diff --git a/intern/cycles/device/device_opencl.cpp b/intern/cycles/device/device_opencl.cpp
index cc184c6..605644d 100644
--- a/intern/cycles/device/device_opencl.cpp
+++ b/intern/cycles/device/device_opencl.cpp
@@ -1678,6 +1678,7 @@ public:
 
 		current_clos_max = clos_max;
 
+		/* TODO : Add macros to kernel_ocl_path_trace in kernel.cl to avoid megakernel build */
 		kernel_init_source = "#include \"kernel.cl\" // " + kernel_md5 + "\n";
 		device_md5 = device_md5_hash("");
 		clbin = string_printf("cycles_kernel_%s_%s.clbin", device_md5.c_str(), kernel_md5.c_str());
@@ -3518,6 +3519,2946 @@ The current tile of dimensions %dx%d is split into tiles of dimension %dx%d for
 	}
 };
 
+/* OpenCLDeviceSplitKernel's declaration/definition */
+class OpenCLDeviceSplitKernel : public Device
+{
+public:
+	DedicatedTaskPool task_pool;
+	cl_context cxContext;
+	cl_command_queue cqCommandQueue;
+	cl_platform_id cpPlatform;
+	cl_device_id cdDevice;
+	cl_int ciErr;
+
+	/* Kernel declaration */
+	cl_kernel ckPathTraceKernel_DataInit_SPLIT_KERNEL;
+	cl_kernel ckPathTraceKernel_SceneIntersect_SPLIT_KERNEL;
+	cl_kernel ckPathTraceKernel_LampEmission_SPLIT_KERNEL;
+	cl_kernel ckPathTraceKernel_QueueEnqueue_SPLIT_KERNEL;
+	cl_kernel ckPathTraceKernel_BG_BufferUpdate_SPLIT_KERNEL;
+	cl_kernel ckPathTraceKernel_Shader_Lighting_SPLIT_KERNEL;
+	cl_kernel ckPathTraceKernel_Holdout_Emission_Blurring_Pathtermination_AO_SPLIT_KERNEL;
+	cl_kernel ckPathTraceKernel_Subsurface_SPLIT_KERNEL;
+	cl_kernel ckPathTraceKernel_DirectLighting_SPLIT_KERNEL;
+	cl_kernel ckPathTraceKernel_ShadowBlocked_DirectLighting_SPLIT_KERNEL;
+	cl_kernel ckPathTraceKernel_SetUpNextIteration_SPLIT_KERNEL;
+	cl_kernel ckPathTraceKernel_SumAllRadiance_SPLIT_KERNEL;
+	cl_kernel ckShaderKernel;
+	cl_kernel ckBakeKernel;
+	cl_kernel ckFilmConvertByteKernel;
+	cl_kernel ckFilmConvertHalfFloatKernel;
+
+	/* cl_program declaration */
+	cl_program dataInit_program;
+	cl_program sceneIntersect_program;
+	cl_program lampEmission_program;
+	cl_program QueueEnqueue_program;
+	cl_program background_BufferUpdate_program;
+	cl_program shaderEval_program;
+	cl_program holdout_emission_blurring_termination_ao_program;
+	cl_program subsurface_program;
+	cl_program directLighting_program;
+	cl_program shadowBlocked_program;
+	cl_program nextIterationSetUp_program;
+	cl_program sumAllRadiance_program;
+	cl_program cpProgram;
+
+	/* Global memory variables [porting]; These memory is used for
+	* co-operation between different kernels; Data written by one
+	* kernel will be avaible to another kernel via this global
+	* memory
+	*/
+	cl_mem rng_coop;
+	cl_mem throughput_coop;
+	cl_mem L_transparent_coop;
+	cl_mem PathRadiance_coop;
+	cl_mem Ray_coop;
+	cl_mem PathState_coop;
+	cl_mem Intersection_coop;
+	cl_mem kgbuffer; /* KernelGlobals buffer */
+
+	/* global buffers for ShaderData */
+	cl_mem sd;                      /* ShaderData used in the main path-iteration loop */
+	cl_mem sd_DL_shadow;            /* ShaderData used in Direct Lighting and ShadowBlocked kernel */
+
+	/* global buffers of each member of ShaderData */
+	cl_mem P_sd;
+	cl_mem P_sd_DL_shadow;
+	cl_mem N_sd;
+	cl_mem N_sd_DL_shadow;
+	cl_mem Ng_sd;
+	cl_mem Ng_sd_DL_shadow;
+	cl_mem I_sd;
+	cl_mem I_sd_DL_shadow;
+	cl_mem shader_sd;
+	cl_mem shader_sd_DL_shadow;
+	cl_mem flag_sd;
+	cl_mem flag_sd_DL_shadow;
+	cl_mem prim_sd;
+	cl_mem prim_sd_DL_shadow;
+	cl_mem type_sd;
+	cl_mem type_sd_DL_shadow;
+	cl_mem u_sd;
+	cl_mem u_sd_DL_shadow;
+	cl_mem v_sd;
+	cl_mem v_sd_DL_shadow;
+	cl_mem object_sd;
+	cl_mem object_sd_DL_shadow;
+	cl_mem time_sd;
+	cl_mem time_sd_DL_shadow;
+	cl_mem ray_length_sd;
+	cl_mem ray_length_sd_DL_shadow;
+	cl_mem ray_depth_sd;
+	cl_mem ray_depth_sd_DL_shadow;
+	cl_mem transparent_depth_sd;
+	cl_mem transparent_depth_sd_DL_shadow;
+#ifdef __RAY_DIFFERENTIALS__
+	cl_mem dP_sd, dI_sd;
+	cl_mem dP_sd_DL_shadow, dI_sd_DL_shadow;
+	cl_mem du_sd, dv_sd;
+	cl_mem du_sd_DL_shadow, dv_sd_DL_shadow;
+#endif
+#ifdef __DPDU__
+	cl_mem dPdu_sd, dPdv_sd;
+	cl_mem dPdu_sd_DL_shadow, dPdv_sd_DL_shadow;
+#endif
+	cl_mem closure_sd;
+	cl_mem closure_sd_DL_shadow;
+	cl_mem num_closure_sd;
+	cl_mem num_closure_sd_DL_shadow;
+	cl_mem randb_closure_sd;
+	cl_mem randb_closure_sd_DL_shadow;
+	cl_mem ray_P_sd;
+	cl_mem ray_P_sd_DL_shadow;
+	cl_mem ray_dP_sd;
+	cl_mem ray_dP_sd_DL_shadow;
+
+	/* Global memory required for shadow blocked and accum_radiance */
+	cl_mem BSDFEval_coop;
+	cl_mem ISLamp_coop;
+	cl_mem LightRay_coop;
+	cl_mem AOAlpha_coop;
+	cl_mem AOBSDF_coop;
+	cl_mem AOLightRay_coop;
+	cl_mem Intersection_coop_AO;
+	cl_mem Intersection_coop_DL;
+
+	/* Global state array that tracks ray state */
+	cl_mem ray_state;
+
+	/* per sample buffers */
+	cl_mem per_sample_output_buffers;
+
+	/* Denotes which sample each ray is being processed for */
+	cl_mem work_array;
+
+	/* Queue*/
+	cl_mem Queue_data;  /* Array of size queuesize * num_queues * sizeof(int) */
+	cl_mem Queue_index; /* Array of size num_queues * sizeof(int); Tracks the size of each queue */
+
+	/* Flag to make sceneintersect and lampemission kernel use queues */
+	cl_mem use_queues_flag;
+
+	/* Required-memory size */
+	size_t rng_size;
+	size_t throughput_size;
+	size_t L_transparent_size;
+	size_t rayState_size;
+	size_t hostRayState_size;
+	size_t work_element_size;
+	size_t ISLamp_size;
+
+	/* size of structures declared in kernel_types.h */
+	size_t PathRadiance_size;
+	size_t Ray_size;
+	size_t PathState_size;
+	size_t Intersection_size;
+
+	/* Sizes of memory required for shadow blocked function */
+	size_t AOAlpha_size;
+	size_t AOBSDF_size;
+	size_t AOLightRay_size;
+	size_t LightRay_size;
+	size_t BSDFEval_size;
+	size_t Intersection_coop_AO_size;
+	size_t Intersection_coop_DL_size;
+
+	/* Amount of memory in output buffer associated with one pixel/thread */
+	size_t per_thread_output_buffer_size;
+
+	/* Total allocatable available device memory */
+	size_t total_allocatable_memory;
+
+	/* host version of ray_state; Used in checking host path-iteration termination */
+	char *hostRayStateArray;
+
+	/* Number of path-iterations to be done in one shot */
+	unsigned int PathIteration_times;
+
+	/* Denotes if the render is background or foreground */
+	bool background;
+
+#ifdef __WORK_STEALING__
+	/* Work pool with respect to each work group */
+	cl_mem work_pool_wgs;
+
+	/* Denotes the maximum work groups possible w.r.t. current tile size */
+	unsigned int max_work_groups;
+#endif
+
+	/* clos_max value for which the kernels have been loaded currently */
+	int current_clos_max;
+
+	/* Marked True in constructor and marked false at the end of path_trace() */
+	bool first_tile;
+
+	typedef map<string, device_vector<uchar>*> ConstMemMap;
+	typedef map<string, device_ptr> MemMap;
+
+	ConstMemMap const_mem_map;
+	MemMap mem_map;
+	device_ptr null_mem;
+
+	bool device_initialized;
+	string platform_name;
+
+	bool opencl_error(cl_int err)
+	{
+		if (err != CL_SUCCESS) {
+			string message = string_printf("OpenCL error (%d): %s", err, clewErrorString(err));
+			if (error_msg == "")
+				error_msg = message;
+			fprintf(stderr, "%s\n", message.c_str());
+			return true;
+		}
+
+		return false;
+	}
+
+	void opencl_error(const string& message)
+	{
+		if (error_msg == "")
+			error_msg = message;
+		fprintf(stderr, "%s\n", message.c_str());
+	}
+
+#define opencl_assert(stmt) \
+	{ \
+	cl_int err = stmt; \
+	\
+	if (err != CL_SUCCESS) { \
+	string message = string_printf("OpenCL error: %s in %s", clewErrorString(err), #stmt); \
+	if (error_msg == "") \
+	error_msg = message; \
+	fprintf(stderr, "%s\n", message.c_str()); \
+	} \
+	} (void)0
+
+	void opencl_assert_err(cl_int err, const char* where)
+	{
+		if (err != CL_SUCCESS) {
+			string message = string_printf("OpenCL error (%d): %s in %s", err, clewErrorString(err), where);
+			if (error_msg == "")
+				error_msg = message;
+			fprintf(stderr, "%s\n", message.c_str());
+#ifndef NDEBUG
+			abort();
+#endif
+		}
+	}
+
+	OpenCLDeviceSplitKernel(DeviceInfo& info, Stats &stats, bool background_)
+		: Device(info, stats, background_)
+	{
+		cpPlatform = NULL;
+		cdDevice = NULL;
+		cxContext = NULL;
+		cqCommandQueue = NULL;
+		null_mem = 0;
+		device_initialized = false;
+
+		use_split_kernel = true;
+		background = background_;
+
+		/* Initialize kernels */
+		ckPathTraceKernel_DataInit_SPLIT_KERNEL = NULL;
+		ckPathTraceKernel_SceneIntersect_SPLIT_KERNEL = NULL;
+		ckPathTraceKernel_LampEmission_SPLIT_KERNEL = NULL;
+		ckPathTraceKernel_BG_BufferUpdate_SPLIT_KERNEL = NULL;
+		ckPathTraceKernel_Shader_Lighting_SPLIT_KERNEL = NULL;
+		ckPathTraceKernel_Holdout_Emission_Blurring_Pathtermination_AO_SPLIT_KERNEL = NULL;
+		ckPathTraceKernel_Subsurface_SPLIT_KERNEL = NULL;
+		ckPathTraceKernel_DirectLighting_SPLIT_KERNEL = NULL;
+		ckPathTraceKernel_ShadowBlocked_DirectLighting_SPLIT_KERNEL = NULL;
+		ckPathTraceKernel_SetUpNextIteration_SPLIT_KERNEL = NULL;
+		ckPathTraceKernel_SumAllRadiance_SPLIT_KERNEL = NULL;
+		ckPathTraceKernel_QueueEnqueue_SPLIT_KERNEL = NULL;
+		ckShaderKernel = NULL;
+		ckBakeKernel = NULL;
+		ckFilmConvertByteKernel = NULL;
+		ckFilmConvertHalfFloatKernel = NULL;
+
+		/* Initialize program */
+		dataInit_program = NULL;
+		sceneIntersect_program = NULL;
+		lampEmission_program = NULL;
+		QueueEnqueue_program = NULL;
+		background_BufferUpdate_program = NULL;
+		shaderEval_program = NULL;
+		holdout_emission_blurring_termination_ao_program = NULL;
+		subsurface_program = NULL;
+		directLighting_program = NULL;
+		shadowBlocked_program = NULL;
+		nextIterationSetUp_program = NULL;
+		sumAllRadiance_program = NULL;
+		cpProgram = NULL;
+
+		/* Initialize cl_mem variables */
+		kgbuffer = NULL;
+		sd = NULL;
+		sd_DL_shadow = NULL;
+
+		P_sd = NULL;
+		P_sd_DL_shadow = NULL;
+		N_sd = NULL;
+		N_sd_DL_shadow = NULL;
+		Ng_sd = NULL;
+		Ng_sd_DL_shadow = NULL;
+		I_sd = NULL;
+		I_sd_DL_shadow = NULL;
+		shader_sd = NULL;
+		shader_sd_DL_shadow = NULL;
+		flag_sd = NULL;
+		flag_sd_DL_shadow = NULL;
+		prim_sd = NULL;
+		prim_sd_DL_shadow = NULL;
+		type_sd = NULL;
+		type_sd_DL_shadow = NULL;
+		u_sd = NULL;
+		u_sd_DL_shadow = NULL;
+		v_sd = NULL;
+		v_sd_DL_shadow = NULL;
+		object_sd = NULL;
+		object_sd_DL_shadow = NULL

@@ Diff output truncated at 10240 characters. @@




More information about the Bf-blender-cvs mailing list