[Bf-blender-cvs] [a4e08e7] cycles_kernel_split: Cycles kernel split: Use more friendly way of setting kernel args
Sergey Sharybin
noreply at git.blender.org
Fri May 8 21:56:30 CEST 2015
Commit: a4e08e7187a15a732dabe07cdeea1c83f1e17dbd
Author: Sergey Sharybin
Date: Sat May 9 00:54:33 2015 +0500
Branches: cycles_kernel_split
https://developer.blender.org/rBa4e08e7187a15a732dabe07cdeea1c83f1e17dbd
Cycles kernel split: Use more friendly way of setting kernel args
It is now possible to do CUDA-like kernel argument set using utility
kernel_set_args() function which accepts quite reasonable amount of
arguments. It's possible to pass fewer arguments tho.
For now only used in split kernel code, but it is to be used more.
===================================================================
M intern/cycles/device/device_opencl.cpp
===================================================================
diff --git a/intern/cycles/device/device_opencl.cpp b/intern/cycles/device/device_opencl.cpp
index 88c2fbc..3113086 100644
--- a/intern/cycles/device/device_opencl.cpp
+++ b/intern/cycles/device/device_opencl.cpp
@@ -1084,6 +1084,104 @@ public:
task.update_progress(NULL);
}
}
+
+protected:
+ class ArgumentWrapper {
+ public:
+ ArgumentWrapper() : size(0), pointer(NULL) {}
+ template <typename T>
+ ArgumentWrapper(T& argument) : size(sizeof(argument)),
+ pointer(&argument) { }
+ size_t size;
+ void *pointer;
+ };
+
+ int kernel_set_args(cl_kernel kernel,
+ int start_argument_index,
+ const ArgumentWrapper& arg1 = ArgumentWrapper(),
+ const ArgumentWrapper& arg2 = ArgumentWrapper(),
+ const ArgumentWrapper& arg3 = ArgumentWrapper(),
+ const ArgumentWrapper& arg4 = ArgumentWrapper(),
+ const ArgumentWrapper& arg5 = ArgumentWrapper(),
+ const ArgumentWrapper& arg6 = ArgumentWrapper(),
+ const ArgumentWrapper& arg7 = ArgumentWrapper(),
+ const ArgumentWrapper& arg8 = ArgumentWrapper(),
+ const ArgumentWrapper& arg9 = ArgumentWrapper(),
+ const ArgumentWrapper& arg10 = ArgumentWrapper(),
+ const ArgumentWrapper& arg11 = ArgumentWrapper(),
+ const ArgumentWrapper& arg12 = ArgumentWrapper(),
+ const ArgumentWrapper& arg13 = ArgumentWrapper(),
+ const ArgumentWrapper& arg14 = ArgumentWrapper(),
+ const ArgumentWrapper& arg15 = ArgumentWrapper(),
+ const ArgumentWrapper& arg16 = ArgumentWrapper(),
+ const ArgumentWrapper& arg17 = ArgumentWrapper(),
+ const ArgumentWrapper& arg18 = ArgumentWrapper(),
+ const ArgumentWrapper& arg19 = ArgumentWrapper(),
+ const ArgumentWrapper& arg20 = ArgumentWrapper(),
+ const ArgumentWrapper& arg21 = ArgumentWrapper(),
+ const ArgumentWrapper& arg22 = ArgumentWrapper(),
+ const ArgumentWrapper& arg23 = ArgumentWrapper(),
+ const ArgumentWrapper& arg24 = ArgumentWrapper(),
+ const ArgumentWrapper& arg25 = ArgumentWrapper(),
+ const ArgumentWrapper& arg26 = ArgumentWrapper(),
+ const ArgumentWrapper& arg27 = ArgumentWrapper(),
+ const ArgumentWrapper& arg28 = ArgumentWrapper(),
+ const ArgumentWrapper& arg29 = ArgumentWrapper(),
+ const ArgumentWrapper& arg30 = ArgumentWrapper(),
+ const ArgumentWrapper& arg31 = ArgumentWrapper(),
+ const ArgumentWrapper& arg32 = ArgumentWrapper(),
+ const ArgumentWrapper& arg33 = ArgumentWrapper())
+ {
+ int current_arg_index = 0;
+#define FAKE_VARARG_HANDLE_ARG(arg) \
+ do { \
+ if(arg.pointer != NULL) { \
+ opencl_assert(clSetKernelArg( \
+ kernel, \
+ start_argument_index + current_arg_index, \
+ arg.size, arg.pointer)); \
+ ++current_arg_index; \
+ } \
+ else { \
+ return current_arg_index; \
+ } \
+ } while(false)
+ FAKE_VARARG_HANDLE_ARG(arg1);
+ FAKE_VARARG_HANDLE_ARG(arg2);
+ FAKE_VARARG_HANDLE_ARG(arg3);
+ FAKE_VARARG_HANDLE_ARG(arg4);
+ FAKE_VARARG_HANDLE_ARG(arg5);
+ FAKE_VARARG_HANDLE_ARG(arg6);
+ FAKE_VARARG_HANDLE_ARG(arg7);
+ FAKE_VARARG_HANDLE_ARG(arg8);
+ FAKE_VARARG_HANDLE_ARG(arg9);
+ FAKE_VARARG_HANDLE_ARG(arg10);
+ FAKE_VARARG_HANDLE_ARG(arg11);
+ FAKE_VARARG_HANDLE_ARG(arg12);
+ FAKE_VARARG_HANDLE_ARG(arg13);
+ FAKE_VARARG_HANDLE_ARG(arg14);
+ FAKE_VARARG_HANDLE_ARG(arg15);
+ FAKE_VARARG_HANDLE_ARG(arg16);
+ FAKE_VARARG_HANDLE_ARG(arg17);
+ FAKE_VARARG_HANDLE_ARG(arg18);
+ FAKE_VARARG_HANDLE_ARG(arg19);
+ FAKE_VARARG_HANDLE_ARG(arg20);
+ FAKE_VARARG_HANDLE_ARG(arg21);
+ FAKE_VARARG_HANDLE_ARG(arg22);
+ FAKE_VARARG_HANDLE_ARG(arg23);
+ FAKE_VARARG_HANDLE_ARG(arg24);
+ FAKE_VARARG_HANDLE_ARG(arg25);
+ FAKE_VARARG_HANDLE_ARG(arg26);
+ FAKE_VARARG_HANDLE_ARG(arg27);
+ FAKE_VARARG_HANDLE_ARG(arg28);
+ FAKE_VARARG_HANDLE_ARG(arg29);
+ FAKE_VARARG_HANDLE_ARG(arg30);
+ FAKE_VARARG_HANDLE_ARG(arg31);
+ FAKE_VARARG_HANDLE_ARG(arg32);
+ FAKE_VARARG_HANDLE_ARG(arg33);
+#undef FAKE_VARARG_HANDLE_ARG
+ return current_arg_index;
+ }
};
class OpenCLDeviceMegaKernel : public OpenCLDeviceBase
@@ -2335,301 +2433,310 @@ public:
cl_int total_num_rays = global_size[0] * global_size[1];
/* Set arguments for ckPathTraceKernel_DataInit kernel */
- cl_uint narg = 0;
-
- KERNEL_APPEND_ARG(ckPathTraceKernel_DataInit, kgbuffer);
- KERNEL_APPEND_ARG(ckPathTraceKernel_DataInit, sd);
- KERNEL_APPEND_ARG(ckPathTraceKernel_DataInit, sd_DL_shadow);
- KERNEL_APPEND_ARG(ckPathTraceKernel_DataInit, P_sd);
- KERNEL_APPEND_ARG(ckPathTraceKernel_DataInit, P_sd_DL_shadow);
- KERNEL_APPEND_ARG(ckPathTraceKernel_DataInit, N_sd);
- KERNEL_APPEND_ARG(ckPathTraceKernel_DataInit, N_sd_DL_shadow);
- KERNEL_APPEND_ARG(ckPathTraceKernel_DataInit, Ng_sd);
- KERNEL_APPEND_ARG(ckPathTraceKernel_DataInit, Ng_sd_DL_shadow);
- KERNEL_APPEND_ARG(ckPathTraceKernel_DataInit, I_sd);
- KERNEL_APPEND_ARG(ckPathTraceKernel_DataInit, I_sd_DL_shadow);
- KERNEL_APPEND_ARG(ckPathTraceKernel_DataInit, shader_sd);
- KERNEL_APPEND_ARG(ckPathTraceKernel_DataInit, shader_sd_DL_shadow);
- KERNEL_APPEND_ARG(ckPathTraceKernel_DataInit, flag_sd);
- KERNEL_APPEND_ARG(ckPathTraceKernel_DataInit, flag_sd_DL_shadow);
- KERNEL_APPEND_ARG(ckPathTraceKernel_DataInit, prim_sd);
- KERNEL_APPEND_ARG(ckPathTraceKernel_DataInit, prim_sd_DL_shadow);
- KERNEL_APPEND_ARG(ckPathTraceKernel_DataInit, type_sd);
- KERNEL_APPEND_ARG(ckPathTraceKernel_DataInit, type_sd_DL_shadow);
- KERNEL_APPEND_ARG(ckPathTraceKernel_DataInit, u_sd);
- KERNEL_APPEND_ARG(ckPathTraceKernel_DataInit, u_sd_DL_shadow);
- KERNEL_APPEND_ARG(ckPathTraceKernel_DataInit, v_sd);
- KERNEL_APPEND_ARG(ckPathTraceKernel_DataInit, v_sd_DL_shadow);
- KERNEL_APPEND_ARG(ckPathTraceKernel_DataInit, object_sd);
- KERNEL_APPEND_ARG(ckPathTraceKernel_DataInit, object_sd_DL_shadow);
- KERNEL_APPEND_ARG(ckPathTraceKernel_DataInit, time_sd);
- KERNEL_APPEND_ARG(ckPathTraceKernel_DataInit, time_sd_DL_shadow);
- KERNEL_APPEND_ARG(ckPathTraceKernel_DataInit, ray_length_sd);
- KERNEL_APPEND_ARG(ckPathTraceKernel_DataInit, ray_length_sd_DL_shadow);
- KERNEL_APPEND_ARG(ckPathTraceKernel_DataInit, ray_depth_sd);
- KERNEL_APPEND_ARG(ckPathTraceKernel_DataInit, ray_depth_sd_DL_shadow);
- KERNEL_APPEND_ARG(ckPathTraceKernel_DataInit, transparent_depth_sd);
- KERNEL_APPEND_ARG(ckPathTraceKernel_DataInit, transparent_depth_sd_DL_shadow);
+ cl_uint start_arg_index =
+ kernel_set_args(ckPathTraceKernel_DataInit,
+ 0,
+ kgbuffer,
+ sd,
+ sd_DL_shadow,
+ P_sd,
+ P_sd_DL_shadow,
+ N_sd,
+ N_sd_DL_shadow,
+ Ng_sd,
+ Ng_sd_DL_shadow,
+ I_sd,
+ I_sd_DL_shadow,
+ shader_sd,
+ shader_sd_DL_shadow,
+ flag_sd,
+ flag_sd_DL_shadow,
+ prim_sd,
+ prim_sd_DL_shadow,
+ type_sd,
+ type_sd_DL_shadow,
+ u_sd,
+ u_sd_DL_shadow,
+ v_sd,
+ v_sd_DL_shadow,
+ object_sd,
+ object_sd_DL_shadow,
+ time_sd,
+ time_sd_DL_shadow,
+ ray_length_sd,
+ ray_length_sd_DL_shadow,
+ ray_depth_sd,
+ ray_depth_sd_DL_shadow,
+ transparent_depth_sd,
+ transparent_depth_sd_DL_shadow);
+
+ start_arg_index +=
+ kernel_set_args(ckPathTraceKernel_DataInit,
#ifdef __RAY_DIFFERENTIALS__
- KERNEL_APPEND_ARG(ckPathTraceKernel_DataInit, dP_sd);
- KERNEL_APPEND_ARG(ckPathTraceKernel_DataInit, dP_sd_DL_shadow);
- KERNEL_APPEND_ARG(ckPathTraceKernel_DataInit, dI_sd);
- KERNEL_APPEND_ARG(ckPathTraceKernel_DataInit, dI_sd_DL_shadow);
- KERNEL_APPEND_ARG(ckPathTraceKernel_DataInit, du_sd);
- KERNEL_APPEND_ARG(ckPathTraceKernel_DataInit, du_sd_DL_shadow);
- KERNEL_APPEND_ARG(ckPathTraceKernel_DataInit, dv_sd);
- KERNEL_APPEND_ARG(ckPathTraceKernel_DataInit, dv_sd_DL_shadow);
+ start_arg_index,
+ dP_sd,
+ dP_sd_DL_shadow,
+ dI_sd,
+ dI_sd_DL_shadow,
+ du_sd,
+ du_sd_DL_shadow,
+ dv_sd,
+ dv_sd_DL_shadow,
#endif
#ifdef __DPDU__
- KERNEL_APPEND_ARG(ckPathTraceKernel_DataInit, dPdu_sd);
- KERNEL_APPEND_ARG(ckPathTraceKernel_DataInit, dPdu_sd_DL_shadow);
- KERNEL_APPEND_ARG(ckPathTraceKernel_DataInit, dPdv_sd);
- KERNEL_APPEND_ARG(ckPathTraceKernel_DataInit, dPdv_sd_DL_shadow);
+ dPdu_sd,
+ dPdu_sd_DL_shadow,
+ dPdv_sd,
+ dPdv_sd_DL_shadow,
#endif
- KERNEL_APPEND_ARG(ckPathTraceKernel_DataInit, closure_sd);
- KERNEL_APPEND_ARG(ckPathTraceKernel_DataInit, closure_sd_DL_shadow);
- KERNEL_APPEND_ARG(ckPathTraceKernel_DataInit, num_closure_sd);
- KERNEL_APPEND_ARG(ckPathTraceKernel_DataInit, num_closure_sd_DL_shadow);
- KERNEL_APPEND_ARG(ckPathTraceKernel_DataInit, randb_closure_sd);
- KERNEL_APPEND_ARG(ckPathTraceKernel_DataInit, randb_closure_sd_DL_shadow);
- KERNEL_APPEND_ARG(ckPathTraceKernel_DataInit, ray_P_sd);
- KERNEL_APPEND_ARG(ckPathTraceKernel_DataInit, ray_P_sd_DL_shadow);
- KERNEL_APPEND_ARG(ckPathTraceKernel_DataInit, ray_dP_sd);
- KERNEL_APPEND_ARG(ckPathTraceKernel_DataInit, ray_dP_sd_DL_shadow);
- KERNEL_APPEND_ARG(ckPathTraceKernel_DataInit, d_data);
- KERNEL_APPEND_ARG(ckPathTrace
@@ Diff output truncated at 10240 characters. @@
More information about the Bf-blender-cvs
mailing list