[Bf-blender-cvs] [f854920] compositor-2016: Cycles CUDA: reduce stack memory by reusing ShaderData.
Brecht Van Lommel
noreply at git.blender.org
Wed Jun 8 21:49:42 CEST 2016
Commit: f8549205ad7d21b391d9da2986c1fbfa0e2f699e
Author: Brecht Van Lommel
Date: Sun May 22 22:35:47 2016 +0200
Branches: compositor-2016
https://developer.blender.org/rBf8549205ad7d21b391d9da2986c1fbfa0e2f699e
Cycles CUDA: reduce stack memory by reusing ShaderData.
57% less for path and 48% less for branched path.
===================================================================
M intern/cycles/kernel/kernel_bake.h
M intern/cycles/kernel/kernel_emission.h
M intern/cycles/kernel/kernel_path.h
M intern/cycles/kernel/kernel_path_branched.h
M intern/cycles/kernel/kernel_path_state.h
M intern/cycles/kernel/kernel_path_surface.h
M intern/cycles/kernel/kernel_path_volume.h
M intern/cycles/kernel/kernel_shadow.h
M intern/cycles/kernel/kernel_volume.h
M intern/cycles/kernel/split/kernel_background_buffer_update.h
M intern/cycles/kernel/split/kernel_data_init.h
M intern/cycles/kernel/split/kernel_direct_lighting.h
M intern/cycles/kernel/split/kernel_lamp_emission.h
M intern/cycles/kernel/split/kernel_shadow_blocked.h
===================================================================
diff --git a/intern/cycles/kernel/kernel_bake.h b/intern/cycles/kernel/kernel_bake.h
index 392cff9..77982ee 100644
--- a/intern/cycles/kernel/kernel_bake.h
+++ b/intern/cycles/kernel/kernel_bake.h
@@ -30,6 +30,9 @@ ccl_device void compute_light_pass(KernelGlobals *kg, ShaderData *sd, PathRadian
Ray ray;
float3 throughput = make_float3(1.0f, 1.0f, 1.0f);
+ /* emission shader data memory used by various functions */
+ ShaderData emission_sd;
+
ray.P = sd->P + sd->Ng;
ray.D = -sd->Ng;
ray.t = FLT_MAX;
@@ -41,7 +44,7 @@ ccl_device void compute_light_pass(KernelGlobals *kg, ShaderData *sd, PathRadian
path_radiance_init(&L_sample, kernel_data.film.use_light_pass);
/* init path state */
- path_state_init(kg, &state, &rng, sample, NULL);
+ path_state_init(kg, &emission_sd, &state, &rng, sample, NULL);
/* evaluate surface shader */
float rbsdf = path_state_rng_1D(kg, &rng, &state, PRNG_BSDF);
@@ -56,7 +59,7 @@ ccl_device void compute_light_pass(KernelGlobals *kg, ShaderData *sd, PathRadian
/* sample ambient occlusion */
if(pass_filter & BAKE_FILTER_AO) {
- kernel_path_ao(kg, sd, &L_sample, &state, &rng, throughput);
+ kernel_path_ao(kg, sd, &emission_sd, &L_sample, &state, &rng, throughput);
}
/* sample emission */
@@ -75,6 +78,7 @@ ccl_device void compute_light_pass(KernelGlobals *kg, ShaderData *sd, PathRadian
kernel_path_subsurface_init_indirect(&ss_indirect);
if(kernel_path_subsurface_scatter(kg,
sd,
+ &emission_sd,
&L_sample,
&state,
&rng,
@@ -90,6 +94,7 @@ ccl_device void compute_light_pass(KernelGlobals *kg, ShaderData *sd, PathRadian
&L_sample,
&throughput);
kernel_path_indirect(kg,
+ &emission_sd,
&rng,
&ray,
throughput,
@@ -105,14 +110,14 @@ ccl_device void compute_light_pass(KernelGlobals *kg, ShaderData *sd, PathRadian
/* sample light and BSDF */
if(!is_sss_sample && (pass_filter & (BAKE_FILTER_DIRECT | BAKE_FILTER_INDIRECT))) {
- kernel_path_surface_connect_light(kg, &rng, sd, throughput, &state, &L_sample);
+ kernel_path_surface_connect_light(kg, &rng, sd, &emission_sd, throughput, &state, &L_sample);
if(kernel_path_surface_bounce(kg, &rng, sd, &throughput, &state, &L_sample, &ray)) {
#ifdef __LAMP_MIS__
state.ray_t = 0.0f;
#endif
/* compute indirect light */
- kernel_path_indirect(kg, &rng, &ray, throughput, 1, &state, &L_sample);
+ kernel_path_indirect(kg, &emission_sd, &rng, &ray, throughput, 1, &state, &L_sample);
/* sum and reset indirect light pass variables for the next samples */
path_radiance_sum_indirect(&L_sample);
@@ -126,7 +131,7 @@ ccl_device void compute_light_pass(KernelGlobals *kg, ShaderData *sd, PathRadian
/* sample ambient occlusion */
if(pass_filter & BAKE_FILTER_AO) {
- kernel_branched_path_ao(kg, sd, &L_sample, &state, &rng, throughput);
+ kernel_branched_path_ao(kg, sd, &emission_sd, &L_sample, &state, &rng, throughput);
}
/* sample emission */
@@ -139,7 +144,7 @@ ccl_device void compute_light_pass(KernelGlobals *kg, ShaderData *sd, PathRadian
/* sample subsurface scattering */
if((pass_filter & BAKE_FILTER_SUBSURFACE) && (sd->flag & SD_BSSRDF)) {
/* when mixing BSSRDF and BSDF closures we should skip BSDF lighting if scattering was successful */
- kernel_branched_path_subsurface_scatter(kg, sd, &L_sample, &state, &rng, &ray, throughput);
+ kernel_branched_path_subsurface_scatter(kg, sd, &emission_sd, &L_sample, &state, &rng, &ray, throughput);
}
#endif
@@ -150,13 +155,13 @@ ccl_device void compute_light_pass(KernelGlobals *kg, ShaderData *sd, PathRadian
if(kernel_data.integrator.use_direct_light) {
int all = kernel_data.integrator.sample_all_lights_direct;
kernel_branched_path_surface_connect_light(kg, &rng,
- sd, &state, throughput, 1.0f, &L_sample, all);
+ sd, &emission_sd, &state, throughput, 1.0f, &L_sample, all);
}
#endif
/* indirect light */
kernel_branched_path_surface_indirect_light(kg, &rng,
- sd, throughput, 1.0f, &state, &L_sample);
+ sd, &emission_sd, throughput, 1.0f, &state, &L_sample);
}
}
#endif
diff --git a/intern/cycles/kernel/kernel_emission.h b/intern/cycles/kernel/kernel_emission.h
index 5cf52f9..4de8e0f 100644
--- a/intern/cycles/kernel/kernel_emission.h
+++ b/intern/cycles/kernel/kernel_emission.h
@@ -18,6 +18,7 @@ CCL_NAMESPACE_BEGIN
/* Direction Emission */
ccl_device_noinline float3 direct_emissive_eval(KernelGlobals *kg,
+ ShaderData *emission_sd,
LightSample *ls,
ccl_addr_space PathState *state,
float3 I,
@@ -26,12 +27,6 @@ ccl_device_noinline float3 direct_emissive_eval(KernelGlobals *kg,
float time)
{
/* setup shading at emitter */
-#ifdef __SPLIT_KERNEL__
- ShaderData *sd = kg->sd_input;
-#else
- ShaderData sd_object;
- ShaderData *sd = &sd_object;
-#endif
float3 eval;
#ifdef __BACKGROUND_MIS__
@@ -46,28 +41,28 @@ ccl_device_noinline float3 direct_emissive_eval(KernelGlobals *kg,
ray.dP = differential3_zero();
ray.dD = dI;
- shader_setup_from_background(kg, sd, &ray);
+ shader_setup_from_background(kg, emission_sd, &ray);
path_state_modify_bounce(state, true);
- eval = shader_eval_background(kg, sd, state, 0, SHADER_CONTEXT_EMISSION);
+ eval = shader_eval_background(kg, emission_sd, state, 0, SHADER_CONTEXT_EMISSION);
path_state_modify_bounce(state, false);
}
else
#endif
{
- shader_setup_from_sample(kg, sd, ls->P, ls->Ng, I, ls->shader, ls->object, ls->prim, ls->u, ls->v, t, time);
+ shader_setup_from_sample(kg, emission_sd, ls->P, ls->Ng, I, ls->shader, ls->object, ls->prim, ls->u, ls->v, t, time);
- ls->Ng = ccl_fetch(sd, Ng);
+ ls->Ng = ccl_fetch(emission_sd, Ng);
/* no path flag, we're evaluating this for all closures. that's weak but
* we'd have to do multiple evaluations otherwise */
path_state_modify_bounce(state, true);
- shader_eval_surface(kg, sd, state, 0.0f, 0, SHADER_CONTEXT_EMISSION);
+ shader_eval_surface(kg, emission_sd, state, 0.0f, 0, SHADER_CONTEXT_EMISSION);
path_state_modify_bounce(state, false);
/* evaluate emissive closure */
- if(ccl_fetch(sd, flag) & SD_EMISSION)
- eval = shader_emissive_eval(kg, sd);
+ if(ccl_fetch(emission_sd, flag) & SD_EMISSION)
+ eval = shader_emissive_eval(kg, emission_sd);
else
eval = make_float3(0.0f, 0.0f, 0.0f);
}
@@ -79,6 +74,7 @@ ccl_device_noinline float3 direct_emissive_eval(KernelGlobals *kg,
ccl_device_noinline bool direct_emission(KernelGlobals *kg,
ShaderData *sd,
+ ShaderData *emission_sd,
LightSample *ls,
ccl_addr_space PathState *state,
Ray *ray,
@@ -94,6 +90,7 @@ ccl_device_noinline bool direct_emission(KernelGlobals *kg,
/* evaluate closure */
float3 light_eval = direct_emissive_eval(kg,
+ emission_sd,
ls,
state,
-ls->D,
@@ -198,6 +195,7 @@ ccl_device_noinline float3 indirect_primitive_emission(KernelGlobals *kg, Shader
/* Indirect Lamp Emission */
ccl_device_noinline bool indirect_lamp_emission(KernelGlobals *kg,
+ ShaderData *emission_sd,
ccl_addr_space PathState *state,
Ray *ray,
float3 *emission)
@@ -225,6 +223,7 @@ ccl_device_noinline bool indirect_lamp_emission(KernelGlobals *kg,
#endif
float3 L = direct_emissive_eval(kg,
+ emission_sd,
&ls,
state,
-ray->D,
@@ -238,7 +237,7 @@ ccl_device_noinline bool indirect_lamp_emission(KernelGlobals *kg,
Ray volume_ray = *ray;
volume_ray.t = ls.t;
float3 volume_tp = make_float3(1.0f, 1.0f, 1.0f);
- kernel_volume_shadow(kg, state, &volume_ray, &volume_tp);
+ kernel_volume_shadow(kg, emission_sd, state, &volume_ray, &volume_tp);
L *= volume_tp;
}
#endif
@@ -260,6 +259,7 @@ ccl_device_noinline bool indirect_lamp_emission(KernelGlobals *kg,
/* Indirect Background */
ccl_device_noinline float3 indirect_background(KernelGlobals *kg,
+ ShaderData *emission_sd,
ccl_addr_space PathState *state,
ccl_addr_space Ray *ray)
{
@@ -280,19 +280,14 @@ ccl_device_noinline float3 indirect_background(KernelGlobals *kg,
/* evaluate background closure */
# ifdef __SPLIT_KERNEL__
Ray priv_ray = *ray;
- shader_setup_from_background(kg, kg->sd_input, &priv_ray);
-
- path_state_modify_bounce(state, true);
- float3 L = shader_eval_background(kg, kg->sd_input, state, state->flag, SHADER_CONTEXT_EMISSION);
- path_state_modify_bounce(state, false);
+ shader_setup_from_background(kg, emission_sd, &priv_ray);
# else
- ShaderData sd;
- shader_setup_from_background(kg, &sd, ray);
+ shader_setup_from_background(kg, emission_sd, ray);
+# endif
path_state_modify_bounce(state, true);
- float3 L = shader_eval_background(kg, &sd, state, state->flag, SHADER_CONTEXT_EMISSION);
+ float3 L = shader_eval_background(kg, emission_sd, state, state->flag, SHADER_CONTEXT_EMISSION);
path_state_modify_bounce(state, false);
-# endif
#ifdef __BA
@@ Diff output truncated at 10240 characters. @@
More information about the Bf-blender-cvs
mailing list