[Bf-blender-cvs] [de06b1c371c] split-kernel-faster-building: Cycles: Minor reworking of shader eval functions
Mai Lavelle
noreply at git.blender.org
Sat Nov 11 09:57:54 CET 2017
Commit: de06b1c371c9445001bb77e8f653896713117181
Author: Mai Lavelle
Date: Sat Nov 11 02:16:39 2017 -0500
Branches: split-kernel-faster-building
https://developer.blender.org/rBde06b1c371c9445001bb77e8f653896713117181
Cycles: Minor reworking of shader eval functions
Merges `shader_eval_surface` and `shader_eval_background` into the new
`shader_eval`. Also makes `kernel_shader_eval` of the split kernel
more generic. This will be used to split `svm_eval_nodes` out of
as many functions as possible so that there is only one call to this
function (or as close as we can get to that).
===================================================================
M intern/cycles/kernel/kernel_shader.h
M intern/cycles/kernel/kernel_types.h
M intern/cycles/kernel/split/kernel_holdout_emission_blurring_pathtermination_ao.h
M intern/cycles/kernel/split/kernel_shader_eval.h
M intern/cycles/kernel/split/kernel_shader_setup.h
M intern/cycles/kernel/split/kernel_shader_sort.h
M intern/cycles/kernel/split/kernel_split_data_types.h
===================================================================
diff --git a/intern/cycles/kernel/kernel_shader.h b/intern/cycles/kernel/kernel_shader.h
index 239c6b12bdf..82456f7263c 100644
--- a/intern/cycles/kernel/kernel_shader.h
+++ b/intern/cycles/kernel/kernel_shader.h
@@ -952,34 +952,75 @@ ccl_device float3 shader_holdout_eval(KernelGlobals *kg, ShaderData *sd)
return weight;
}
-/* Surface Evaluation */
-
-ccl_device void shader_eval_surface(KernelGlobals *kg, ShaderData *sd,
- ccl_addr_space PathState *state, int path_flag, int max_closure)
+ccl_device float3 shader_eval(KernelGlobals *kg, ShaderData *sd,
+ ccl_addr_space PathState *state, int path_flag, ShaderEvalIntent intent, int max_closure)
{
sd->num_closure = 0;
sd->num_closure_left = max_closure;
+ /* constant shader value */
+ if(intent == SHADER_EVAL_INTENT_CONSTANT) {
+ float3 eval;
+ eval.x = __int_as_float(kernel_tex_fetch(__shader_flag, (sd->shader & SHADER_MASK)*SHADER_SIZE + 2));
+ eval.y = __int_as_float(kernel_tex_fetch(__shader_flag, (sd->shader & SHADER_MASK)*SHADER_SIZE + 3));
+ eval.z = __int_as_float(kernel_tex_fetch(__shader_flag, (sd->shader & SHADER_MASK)*SHADER_SIZE + 4));
+ return eval;
+ }
+
#ifdef __OSL__
- if(kg->osl)
- OSLShader::eval_surface(kg, sd, state, path_flag);
+ /* OSL */
+ if(kg->osl) {
+ if(intent == SHADER_EVAL_INTENT_SURFACE) {
+ OSLShader::eval_surface(kg, sd, state, path_flag);
+ else if(sd->intent == SHADER_EVAL_INTENT_BACKGROUND) {
+ OSLShader::eval_background(kg, sd, state, path_flag);
+ }
+ }
else
-#endif
+#endif /* __OSL__ */
{
#ifdef __SVM__
+ /* eval nodes */
svm_eval_nodes(kg, sd, state, SHADER_TYPE_SURFACE, path_flag);
#else
- DiffuseBsdf *bsdf = (DiffuseBsdf*)bsdf_alloc(sd,
- sizeof(DiffuseBsdf),
- make_float3(0.8f, 0.8f, 0.8f));
- bsdf->N = sd->N;
- sd->flag |= bsdf_diffuse_setup(bsdf);
+ /* defaults when svm not built in */
+ if(intent == SHADER_EVAL_INTENT_SURFACE) {
+ DiffuseBsdf *bsdf = (DiffuseBsdf*)bsdf_alloc(sd,
+ sizeof(DiffuseBsdf),
+ make_float3(0.8f, 0.8f, 0.8f));
+ bsdf->N = sd->N;
+ sd->flag |= bsdf_diffuse_setup(bsdf);
+ }
+ else if(intent == SHADER_EVAL_INTENT_BACKGROUND) {
+ return make_float3(0.8f, 0.8f, 0.8f);
+ }
#endif
}
- if(sd->flag & SD_BSDF_NEEDS_LCG) {
- sd->lcg_state = lcg_state_init_addrspace(state, 0xb4bc3953);
+ /* finalization */
+ if(intent == SHADER_EVAL_INTENT_SURFACE) {
+ if(sd->flag & SD_BSDF_NEEDS_LCG) {
+ sd->lcg_state = lcg_state_init_addrspace(state, 0xb4bc3953);
+ }
+ }
+ else if(intent == SHADER_EVAL_INTENT_BACKGROUND) {
+ if(sd->flag & SD_EMISSION) {
+ return sd->closure_emission_background;
+ }
+ else {
+ return make_float3(0.0f, 0.0f, 0.0f);
+ }
}
+
+ return make_float3(0.0f, 0.0f, 0.0f);
+}
+
+/* Surface Evaluation */
+
+ccl_device void shader_eval_surface(KernelGlobals *kg, ShaderData *sd,
+ ccl_addr_space PathState *state, int path_flag, int max_closure)
+{
+ shader_eval(kg, sd, state, path_flag, SHADER_EVAL_INTENT_SURFACE, max_closure);
}
/* Background Evaluation */
@@ -987,29 +1028,7 @@ ccl_device void shader_eval_surface(KernelGlobals *kg, ShaderData *sd,
ccl_device float3 shader_eval_background(KernelGlobals *kg, ShaderData *sd,
ccl_addr_space PathState *state, int path_flag)
{
- sd->num_closure = 0;
- sd->num_closure_left = 0;
-
-#ifdef __SVM__
-# ifdef __OSL__
- if(kg->osl) {
- OSLShader::eval_background(kg, sd, state, path_flag);
- }
- else
-# endif /* __OSL__ */
- {
- svm_eval_nodes(kg, sd, state, SHADER_TYPE_SURFACE, path_flag);
- }
-
- if(sd->flag & SD_EMISSION) {
- return sd->closure_emission_background;
- }
- else {
- return make_float3(0.0f, 0.0f, 0.0f);
- }
-#else /* __SVM__ */
- return make_float3(0.8f, 0.8f, 0.8f);
-#endif /* __SVM__ */
+ return shader_eval(kg, sd, state, path_flag, SHADER_EVAL_INTENT_BACKGROUND, 0);
}
/* Volume */
diff --git a/intern/cycles/kernel/kernel_types.h b/intern/cycles/kernel/kernel_types.h
index 919dafbc780..7be5b6ee607 100644
--- a/intern/cycles/kernel/kernel_types.h
+++ b/intern/cycles/kernel/kernel_types.h
@@ -929,6 +929,12 @@ enum ShaderDataObjectFlag {
SD_OBJECT_SHADOW_CATCHER)
};
+typedef enum ShaderEvalIntent {
+ SHADER_EVAL_INTENT_SURFACE,
+ SHADER_EVAL_INTENT_BACKGROUND,
+ SHADER_EVAL_INTENT_CONSTANT,
+} ShaderEvalIntent;
+
typedef ccl_addr_space struct ShaderData {
/* position */
float3 P;
@@ -1385,7 +1391,7 @@ static_assert_align(KernelData, 16);
*/
/* Queue names */
-enum QueueNumber {
+typedef enum QueueNumber {
/* All active rays and regenerated rays are enqueued here. */
QUEUE_ACTIVE_AND_REGENERATED_RAYS = 0,
@@ -1426,7 +1432,7 @@ enum QueueNumber {
#endif /* __BRANCHED_PATH__ */
NUM_QUEUES
-};
+} QueueNumber;
/* We use RAY_STATE_MASK to get ray_state */
#define RAY_STATE_MASK 0x0F
diff --git a/intern/cycles/kernel/split/kernel_holdout_emission_blurring_pathtermination_ao.h b/intern/cycles/kernel/split/kernel_holdout_emission_blurring_pathtermination_ao.h
index 88919f47c7a..f638a230e12 100644
--- a/intern/cycles/kernel/split/kernel_holdout_emission_blurring_pathtermination_ao.h
+++ b/intern/cycles/kernel/split/kernel_holdout_emission_blurring_pathtermination_ao.h
@@ -107,6 +107,16 @@ ccl_device void kernel_holdout_emission_blurring_pathtermination_ao(
throughput = kernel_split_state.throughput[ray_index];
state = &kernel_split_state.path_state[ray_index];
+#ifdef __BRANCHED_PATH__
+ if(kernel_data.integrator.branched) {
+ shader_merge_closures(kernel_split_sd(sd, ray_index));
+ }
+ else
+#endif
+ {
+ shader_prepare_closures(kernel_split_sd(sd, ray_index), state);
+ }
+
if(!kernel_path_shader_apply(kg,
sd,
state,
diff --git a/intern/cycles/kernel/split/kernel_shader_eval.h b/intern/cycles/kernel/split/kernel_shader_eval.h
index 2409d1ba28b..00dc3c49560 100644
--- a/intern/cycles/kernel/split/kernel_shader_eval.h
+++ b/intern/cycles/kernel/split/kernel_shader_eval.h
@@ -21,23 +21,16 @@ CCL_NAMESPACE_BEGIN
*/
ccl_device void kernel_shader_eval(KernelGlobals *kg)
{
-
int ray_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0);
- /* Sorting on cuda split is not implemented */
-#ifdef __KERNEL_CUDA__
- int queue_index = kernel_split_params.queue_index[QUEUE_ACTIVE_AND_REGENERATED_RAYS];
-#else
- int queue_index = kernel_split_params.queue_index[QUEUE_SHADER_SORTED_RAYS];
-#endif
+
+ int queue = kernel_split_params.shader_eval_queue;
+ int queue_index = kernel_split_params.queue_index[queue];
+
if(ray_index >= queue_index) {
return;
}
ray_index = get_ray_index(kg, ray_index,
-#ifdef __KERNEL_CUDA__
- QUEUE_ACTIVE_AND_REGENERATED_RAYS,
-#else
- QUEUE_SHADER_SORTED_RAYS,
-#endif
+ queue,
kernel_split_state.queue_data,
kernel_split_params.queue_size,
0);
@@ -46,20 +39,12 @@ ccl_device void kernel_shader_eval(KernelGlobals *kg)
return;
}
- ccl_global char *ray_state = kernel_split_state.ray_state;
- if(IS_STATE(ray_state, ray_index, RAY_ACTIVE)) {
+ if(IS_STATE(kernel_split_state.ray_state, ray_index, RAY_ACTIVE)) {
+ ShaderEvalTask *eval_task = &kernel_split_state.shader_eval_task[ray_index];
+ ShaderData *sd = (ShaderData*)(((ccl_global char*)&kernel_split_state) + eval_task->sd_offset);
ccl_global PathState *state = &kernel_split_state.path_state[ray_index];
- shader_eval_surface(kg, kernel_split_sd(sd, ray_index), state, state->flag, kernel_data.integrator.max_closures);
-#ifdef __BRANCHED_PATH__
- if(kernel_data.integrator.branched) {
- shader_merge_closures(kernel_split_sd(sd, ray_index));
- }
- else
-#endif
- {
- shader_prepare_closures(kernel_split_sd(sd, ray_index), state);
- }
+ eval_task->eval_result = shader_eval(kg, sd, state, state->flag, eval_task->intent, eval_task->max_closure);
}
}
diff --git a/intern/cycles/kernel/split/kernel_shader_setup.h b/intern/cycles/kernel/split/kernel_shader_setup.h
index 9d428ee8139..a7ebdefff95 100644
--- a/intern/cycles/kernel/split/kernel_shader_setup.h
+++ b/intern/cycles/kernel/split/kernel_shader_setup.h
@@ -64,6 +64,12 @@ ccl_device void kernel_shader_setup(KernelGlobals *kg,
kernel_split_sd(sd, ray_index),
&isect,
&ray);
+
+ ShaderEvalTask *eval_task = &kernel_split_state.shader_eval_task[ray_index];
+
+ eval_task->sd_offset = ((ccl_global char*)kernel_split_sd(sd, ray_index)) - ((ccl_global char*)&kernel_split_state);
+ eval_task->intent = SHADER_EVAL_INTENT_SURFACE;
+ eval_task->max_closure = kernel_data.integrator.max_closures;
}
}
diff --git a/intern/cycles/kernel/split/kernel_shader_sort.h b/intern/cycles/kernel/split/kernel_shader_sort.h
index 2132c42220f..7e3e3ca92b4 100644
--- a/intern/cycles/kernel/split/kernel_shader_sort.h
+++ b/intern/cycles/kernel/split/kernel_shader_sort.h
@@ -20,11 +20,17 @@ CCL_NAMESPACE_BEGIN
ccl_device void kernel_shader_sort(KernelGlobals *kg,
ccl_local_param ShaderSortLocals *locals)
{
-#ifndef __KERNEL_CUDA__
int tid = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0);
+#ifdef __KERNEL_CUDA__
+ /* Sorting on cuda split is not implemented */
+ if(tid == 0) {
+ kernel_split_params.shader_eval_queue = QUEUE_ACTIVE_AND_REGENERATED_RAYS;
+ }
+#else
uint qsize = kernel_split_params.queue_index[QUEUE_ACTIVE_AND_REGENERATED_RAYS];
if(tid == 0) {
kernel_split_params.queue_index[QUEUE_SHADER_SORTED_RAYS] = qsize;
+ kernel_split_params.shader_eval_queue = QUEUE_SHADER_SORTED_RAYS;
}
uint offset = (tid/SHADER_SORT_LOCAL_SIZE)*SHADER_SORT_BLOCK_SIZE;
diff --git a/intern/cycles/kernel/split/kernel_split_data_types.h b/intern/cycles/kernel/split/kernel_split_data_types.h
index 9ac3f904819..5ffeb3e6965 100644
--- a/intern/cycles/kernel/split/kernel_split_data_types.h
+++ b/intern/cycles/kernel/split/kernel_split_data_types.h
@@ -31,6 +31,9 @@ typedef struct SplitPar
@@ Diff output truncated at 10240 characters. @@
More information about the Bf-blender-cvs
mailing list