[Bf-blender-cvs] [ef863d6] cycles_kernel_split: Add OpenCLDeviceSplitKernel class
varunsundar08
noreply at git.blender.org
Thu Apr 30 23:24:59 CEST 2015
Commit: ef863d644ff3fcafb614d351723bc2d997dbf068
Author: varunsundar08
Date: Mon Apr 27 19:10:49 2015 +0530
Branches: cycles_kernel_split
https://developer.blender.org/rBef863d644ff3fcafb614d351723bc2d997dbf068
Add OpenCLDeviceSplitKernel class
===================================================================
M intern/cycles/device/device_opencl.cpp
===================================================================
diff --git a/intern/cycles/device/device_opencl.cpp b/intern/cycles/device/device_opencl.cpp
index cc184c6..605644d 100644
--- a/intern/cycles/device/device_opencl.cpp
+++ b/intern/cycles/device/device_opencl.cpp
@@ -1678,6 +1678,7 @@ public:
current_clos_max = clos_max;
+ /* TODO : Add macros to kernel_ocl_path_trace in kernel.cl to avoid megakernel build */
kernel_init_source = "#include \"kernel.cl\" // " + kernel_md5 + "\n";
device_md5 = device_md5_hash("");
clbin = string_printf("cycles_kernel_%s_%s.clbin", device_md5.c_str(), kernel_md5.c_str());
@@ -3518,6 +3519,2946 @@ The current tile of dimensions %dx%d is split into tiles of dimension %dx%d for
}
};
+/* OpenCLDeviceSplitKernel's declaration/definition */
+class OpenCLDeviceSplitKernel : public Device
+{
+public:
+ DedicatedTaskPool task_pool;
+ cl_context cxContext;
+ cl_command_queue cqCommandQueue;
+ cl_platform_id cpPlatform;
+ cl_device_id cdDevice;
+ cl_int ciErr;
+
+ /* Kernel declaration */
+ cl_kernel ckPathTraceKernel_DataInit_SPLIT_KERNEL;
+ cl_kernel ckPathTraceKernel_SceneIntersect_SPLIT_KERNEL;
+ cl_kernel ckPathTraceKernel_LampEmission_SPLIT_KERNEL;
+ cl_kernel ckPathTraceKernel_QueueEnqueue_SPLIT_KERNEL;
+ cl_kernel ckPathTraceKernel_BG_BufferUpdate_SPLIT_KERNEL;
+ cl_kernel ckPathTraceKernel_Shader_Lighting_SPLIT_KERNEL;
+ cl_kernel ckPathTraceKernel_Holdout_Emission_Blurring_Pathtermination_AO_SPLIT_KERNEL;
+ cl_kernel ckPathTraceKernel_Subsurface_SPLIT_KERNEL;
+ cl_kernel ckPathTraceKernel_DirectLighting_SPLIT_KERNEL;
+ cl_kernel ckPathTraceKernel_ShadowBlocked_DirectLighting_SPLIT_KERNEL;
+ cl_kernel ckPathTraceKernel_SetUpNextIteration_SPLIT_KERNEL;
+ cl_kernel ckPathTraceKernel_SumAllRadiance_SPLIT_KERNEL;
+ cl_kernel ckShaderKernel;
+ cl_kernel ckBakeKernel;
+ cl_kernel ckFilmConvertByteKernel;
+ cl_kernel ckFilmConvertHalfFloatKernel;
+
+ /* cl_program declaration */
+ cl_program dataInit_program;
+ cl_program sceneIntersect_program;
+ cl_program lampEmission_program;
+ cl_program QueueEnqueue_program;
+ cl_program background_BufferUpdate_program;
+ cl_program shaderEval_program;
+ cl_program holdout_emission_blurring_termination_ao_program;
+ cl_program subsurface_program;
+ cl_program directLighting_program;
+ cl_program shadowBlocked_program;
+ cl_program nextIterationSetUp_program;
+ cl_program sumAllRadiance_program;
+ cl_program cpProgram;
+
+ /* Global memory variables [porting]; These memory is used for
+ * co-operation between different kernels; Data written by one
+ * kernel will be avaible to another kernel via this global
+ * memory
+ */
+ cl_mem rng_coop;
+ cl_mem throughput_coop;
+ cl_mem L_transparent_coop;
+ cl_mem PathRadiance_coop;
+ cl_mem Ray_coop;
+ cl_mem PathState_coop;
+ cl_mem Intersection_coop;
+ cl_mem kgbuffer; /* KernelGlobals buffer */
+
+ /* global buffers for ShaderData */
+ cl_mem sd; /* ShaderData used in the main path-iteration loop */
+ cl_mem sd_DL_shadow; /* ShaderData used in Direct Lighting and ShadowBlocked kernel */
+
+ /* global buffers of each member of ShaderData */
+ cl_mem P_sd;
+ cl_mem P_sd_DL_shadow;
+ cl_mem N_sd;
+ cl_mem N_sd_DL_shadow;
+ cl_mem Ng_sd;
+ cl_mem Ng_sd_DL_shadow;
+ cl_mem I_sd;
+ cl_mem I_sd_DL_shadow;
+ cl_mem shader_sd;
+ cl_mem shader_sd_DL_shadow;
+ cl_mem flag_sd;
+ cl_mem flag_sd_DL_shadow;
+ cl_mem prim_sd;
+ cl_mem prim_sd_DL_shadow;
+ cl_mem type_sd;
+ cl_mem type_sd_DL_shadow;
+ cl_mem u_sd;
+ cl_mem u_sd_DL_shadow;
+ cl_mem v_sd;
+ cl_mem v_sd_DL_shadow;
+ cl_mem object_sd;
+ cl_mem object_sd_DL_shadow;
+ cl_mem time_sd;
+ cl_mem time_sd_DL_shadow;
+ cl_mem ray_length_sd;
+ cl_mem ray_length_sd_DL_shadow;
+ cl_mem ray_depth_sd;
+ cl_mem ray_depth_sd_DL_shadow;
+ cl_mem transparent_depth_sd;
+ cl_mem transparent_depth_sd_DL_shadow;
+#ifdef __RAY_DIFFERENTIALS__
+ cl_mem dP_sd, dI_sd;
+ cl_mem dP_sd_DL_shadow, dI_sd_DL_shadow;
+ cl_mem du_sd, dv_sd;
+ cl_mem du_sd_DL_shadow, dv_sd_DL_shadow;
+#endif
+#ifdef __DPDU__
+ cl_mem dPdu_sd, dPdv_sd;
+ cl_mem dPdu_sd_DL_shadow, dPdv_sd_DL_shadow;
+#endif
+ cl_mem closure_sd;
+ cl_mem closure_sd_DL_shadow;
+ cl_mem num_closure_sd;
+ cl_mem num_closure_sd_DL_shadow;
+ cl_mem randb_closure_sd;
+ cl_mem randb_closure_sd_DL_shadow;
+ cl_mem ray_P_sd;
+ cl_mem ray_P_sd_DL_shadow;
+ cl_mem ray_dP_sd;
+ cl_mem ray_dP_sd_DL_shadow;
+
+ /* Global memory required for shadow blocked and accum_radiance */
+ cl_mem BSDFEval_coop;
+ cl_mem ISLamp_coop;
+ cl_mem LightRay_coop;
+ cl_mem AOAlpha_coop;
+ cl_mem AOBSDF_coop;
+ cl_mem AOLightRay_coop;
+ cl_mem Intersection_coop_AO;
+ cl_mem Intersection_coop_DL;
+
+ /* Global state array that tracks ray state */
+ cl_mem ray_state;
+
+ /* per sample buffers */
+ cl_mem per_sample_output_buffers;
+
+ /* Denotes which sample each ray is being processed for */
+ cl_mem work_array;
+
+ /* Queue*/
+ cl_mem Queue_data; /* Array of size queuesize * num_queues * sizeof(int) */
+ cl_mem Queue_index; /* Array of size num_queues * sizeof(int); Tracks the size of each queue */
+
+ /* Flag to make sceneintersect and lampemission kernel use queues */
+ cl_mem use_queues_flag;
+
+ /* Required-memory size */
+ size_t rng_size;
+ size_t throughput_size;
+ size_t L_transparent_size;
+ size_t rayState_size;
+ size_t hostRayState_size;
+ size_t work_element_size;
+ size_t ISLamp_size;
+
+ /* size of structures declared in kernel_types.h */
+ size_t PathRadiance_size;
+ size_t Ray_size;
+ size_t PathState_size;
+ size_t Intersection_size;
+
+ /* Sizes of memory required for shadow blocked function */
+ size_t AOAlpha_size;
+ size_t AOBSDF_size;
+ size_t AOLightRay_size;
+ size_t LightRay_size;
+ size_t BSDFEval_size;
+ size_t Intersection_coop_AO_size;
+ size_t Intersection_coop_DL_size;
+
+ /* Amount of memory in output buffer associated with one pixel/thread */
+ size_t per_thread_output_buffer_size;
+
+ /* Total allocatable available device memory */
+ size_t total_allocatable_memory;
+
+ /* host version of ray_state; Used in checking host path-iteration termination */
+ char *hostRayStateArray;
+
+ /* Number of path-iterations to be done in one shot */
+ unsigned int PathIteration_times;
+
+ /* Denotes if the render is background or foreground */
+ bool background;
+
+#ifdef __WORK_STEALING__
+ /* Work pool with respect to each work group */
+ cl_mem work_pool_wgs;
+
+ /* Denotes the maximum work groups possible w.r.t. current tile size */
+ unsigned int max_work_groups;
+#endif
+
+ /* clos_max value for which the kernels have been loaded currently */
+ int current_clos_max;
+
+ /* Marked True in constructor and marked false at the end of path_trace() */
+ bool first_tile;
+
+ typedef map<string, device_vector<uchar>*> ConstMemMap;
+ typedef map<string, device_ptr> MemMap;
+
+ ConstMemMap const_mem_map;
+ MemMap mem_map;
+ device_ptr null_mem;
+
+ bool device_initialized;
+ string platform_name;
+
+ bool opencl_error(cl_int err)
+ {
+ if (err != CL_SUCCESS) {
+ string message = string_printf("OpenCL error (%d): %s", err, clewErrorString(err));
+ if (error_msg == "")
+ error_msg = message;
+ fprintf(stderr, "%s\n", message.c_str());
+ return true;
+ }
+
+ return false;
+ }
+
+ void opencl_error(const string& message)
+ {
+ if (error_msg == "")
+ error_msg = message;
+ fprintf(stderr, "%s\n", message.c_str());
+ }
+
+#define opencl_assert(stmt) \
+ { \
+ cl_int err = stmt; \
+ \
+ if (err != CL_SUCCESS) { \
+ string message = string_printf("OpenCL error: %s in %s", clewErrorString(err), #stmt); \
+ if (error_msg == "") \
+ error_msg = message; \
+ fprintf(stderr, "%s\n", message.c_str()); \
+ } \
+ } (void)0
+
+ void opencl_assert_err(cl_int err, const char* where)
+ {
+ if (err != CL_SUCCESS) {
+ string message = string_printf("OpenCL error (%d): %s in %s", err, clewErrorString(err), where);
+ if (error_msg == "")
+ error_msg = message;
+ fprintf(stderr, "%s\n", message.c_str());
+#ifndef NDEBUG
+ abort();
+#endif
+ }
+ }
+
+ OpenCLDeviceSplitKernel(DeviceInfo& info, Stats &stats, bool background_)
+ : Device(info, stats, background_)
+ {
+ cpPlatform = NULL;
+ cdDevice = NULL;
+ cxContext = NULL;
+ cqCommandQueue = NULL;
+ null_mem = 0;
+ device_initialized = false;
+
+ use_split_kernel = true;
+ background = background_;
+
+ /* Initialize kernels */
+ ckPathTraceKernel_DataInit_SPLIT_KERNEL = NULL;
+ ckPathTraceKernel_SceneIntersect_SPLIT_KERNEL = NULL;
+ ckPathTraceKernel_LampEmission_SPLIT_KERNEL = NULL;
+ ckPathTraceKernel_BG_BufferUpdate_SPLIT_KERNEL = NULL;
+ ckPathTraceKernel_Shader_Lighting_SPLIT_KERNEL = NULL;
+ ckPathTraceKernel_Holdout_Emission_Blurring_Pathtermination_AO_SPLIT_KERNEL = NULL;
+ ckPathTraceKernel_Subsurface_SPLIT_KERNEL = NULL;
+ ckPathTraceKernel_DirectLighting_SPLIT_KERNEL = NULL;
+ ckPathTraceKernel_ShadowBlocked_DirectLighting_SPLIT_KERNEL = NULL;
+ ckPathTraceKernel_SetUpNextIteration_SPLIT_KERNEL = NULL;
+ ckPathTraceKernel_SumAllRadiance_SPLIT_KERNEL = NULL;
+ ckPathTraceKernel_QueueEnqueue_SPLIT_KERNEL = NULL;
+ ckShaderKernel = NULL;
+ ckBakeKernel = NULL;
+ ckFilmConvertByteKernel = NULL;
+ ckFilmConvertHalfFloatKernel = NULL;
+
+ /* Initialize program */
+ dataInit_program = NULL;
+ sceneIntersect_program = NULL;
+ lampEmission_program = NULL;
+ QueueEnqueue_program = NULL;
+ background_BufferUpdate_program = NULL;
+ shaderEval_program = NULL;
+ holdout_emission_blurring_termination_ao_program = NULL;
+ subsurface_program = NULL;
+ directLighting_program = NULL;
+ shadowBlocked_program = NULL;
+ nextIterationSetUp_program = NULL;
+ sumAllRadiance_program = NULL;
+ cpProgram = NULL;
+
+ /* Initialize cl_mem variables */
+ kgbuffer = NULL;
+ sd = NULL;
+ sd_DL_shadow = NULL;
+
+ P_sd = NULL;
+ P_sd_DL_shadow = NULL;
+ N_sd = NULL;
+ N_sd_DL_shadow = NULL;
+ Ng_sd = NULL;
+ Ng_sd_DL_shadow = NULL;
+ I_sd = NULL;
+ I_sd_DL_shadow = NULL;
+ shader_sd = NULL;
+ shader_sd_DL_shadow = NULL;
+ flag_sd = NULL;
+ flag_sd_DL_shadow = NULL;
+ prim_sd = NULL;
+ prim_sd_DL_shadow = NULL;
+ type_sd = NULL;
+ type_sd_DL_shadow = NULL;
+ u_sd = NULL;
+ u_sd_DL_shadow = NULL;
+ v_sd = NULL;
+ v_sd_DL_shadow = NULL;
+ object_sd = NULL;
+ object_sd_DL_shadow = NULL
@@ Diff output truncated at 10240 characters. @@
More information about the Bf-blender-cvs
mailing list