[Bf-blender-cvs] [8a72be7697f] master: Cycles: reduce closure memory usage for emission/shadow shader data.
Brecht Van Lommel
noreply at git.blender.org
Sun Nov 5 20:49:10 CET 2017
Commit: 8a72be7697f8fbfc8cb6cc9f3df049104e41d4a6
Author: Brecht Van Lommel
Date: Wed Nov 1 21:02:28 2017 +0100
Branches: master
https://developer.blender.org/rB8a72be7697f8fbfc8cb6cc9f3df049104e41d4a6
Cycles: reduce closure memory usage for emission/shadow shader data.
With a Titan Xp, reduces path trace local memory from 1092MB to 840MB.
Benchmark performance was within 1% with both RX 480 and Titan Xp.
Original patch was implemented by Sergey.
Differential Revision: https://developer.blender.org/D2249
===================================================================
M intern/cycles/kernel/closure/alloc.h
M intern/cycles/kernel/kernel_bake.h
M intern/cycles/kernel/kernel_emission.h
M intern/cycles/kernel/kernel_path.h
M intern/cycles/kernel/kernel_path_branched.h
M intern/cycles/kernel/kernel_shader.h
M intern/cycles/kernel/kernel_shadow.h
M intern/cycles/kernel/kernel_subsurface.h
M intern/cycles/kernel/kernel_types.h
M intern/cycles/kernel/kernel_volume.h
M intern/cycles/kernel/split/kernel_buffer_update.h
M intern/cycles/kernel/split/kernel_direct_lighting.h
M intern/cycles/kernel/split/kernel_do_volume.h
M intern/cycles/kernel/split/kernel_holdout_emission_blurring_pathtermination_ao.h
M intern/cycles/kernel/split/kernel_path_init.h
M intern/cycles/kernel/split/kernel_shader_eval.h
M intern/cycles/kernel/split/kernel_shadow_blocked_ao.h
M intern/cycles/kernel/split/kernel_shadow_blocked_dl.h
M intern/cycles/kernel/split/kernel_split_data_types.h
M intern/cycles/kernel/split/kernel_subsurface_scatter.h
===================================================================
diff --git a/intern/cycles/kernel/closure/alloc.h b/intern/cycles/kernel/closure/alloc.h
index e799855a65e..48a60405b5a 100644
--- a/intern/cycles/kernel/closure/alloc.h
+++ b/intern/cycles/kernel/closure/alloc.h
@@ -20,17 +20,16 @@ ccl_device ShaderClosure *closure_alloc(ShaderData *sd, int size, ClosureType ty
{
kernel_assert(size <= sizeof(ShaderClosure));
- int num_closure = sd->num_closure;
- int num_closure_extra = sd->num_closure_extra;
- if(num_closure + num_closure_extra >= MAX_CLOSURE)
+ if(sd->num_closure_left == 0)
return NULL;
- ShaderClosure *sc = &sd->closure[num_closure];
+ ShaderClosure *sc = &sd->closure[sd->num_closure];
sc->type = type;
sc->weight = weight;
sd->num_closure++;
+ sd->num_closure_left--;
return sc;
}
@@ -44,18 +43,16 @@ ccl_device ccl_addr_space void *closure_alloc_extra(ShaderData *sd, int size)
* This lets us keep the same fast array iteration over closures, as we
* found linked list iteration and iteration with skipping to be slower. */
int num_extra = ((size + sizeof(ShaderClosure) - 1) / sizeof(ShaderClosure));
- int num_closure = sd->num_closure;
- int num_closure_extra = sd->num_closure_extra + num_extra;
- if(num_closure + num_closure_extra > MAX_CLOSURE) {
+ if(num_extra > sd->num_closure_left) {
/* Remove previous closure. */
sd->num_closure--;
- sd->num_closure_extra++;
+ sd->num_closure_left++;
return NULL;
}
- sd->num_closure_extra = num_closure_extra;
- return (ccl_addr_space void*)(sd->closure + MAX_CLOSURE - num_closure_extra);
+ sd->num_closure_left -= num_extra;
+ return (ccl_addr_space void*)(sd->closure + sd->num_closure + sd->num_closure_left);
}
ccl_device_inline ShaderClosure *bsdf_alloc(ShaderData *sd, int size, float3 weight)
diff --git a/intern/cycles/kernel/kernel_bake.h b/intern/cycles/kernel/kernel_bake.h
index 84d8d84d486..9ce10358b81 100644
--- a/intern/cycles/kernel/kernel_bake.h
+++ b/intern/cycles/kernel/kernel_bake.h
@@ -51,7 +51,7 @@ ccl_device_inline void compute_light_pass(KernelGlobals *kg,
path_state_init(kg, &emission_sd, &state, rng_hash, sample, NULL);
/* evaluate surface shader */
- shader_eval_surface(kg, sd, &state, state.flag);
+ shader_eval_surface(kg, sd, &state, state.flag, MAX_CLOSURE);
/* TODO, disable more closures we don't need besides transparent */
shader_bsdf_disable_transparency(kg, sd);
@@ -239,12 +239,12 @@ ccl_device float3 kernel_bake_evaluate_direct_indirect(KernelGlobals *kg,
}
else {
/* surface color of the pass only */
- shader_eval_surface(kg, sd, state, 0);
+ shader_eval_surface(kg, sd, state, 0, MAX_CLOSURE);
return kernel_bake_shader_bsdf(kg, sd, type);
}
}
else {
- shader_eval_surface(kg, sd, state, 0);
+ shader_eval_surface(kg, sd, state, 0, MAX_CLOSURE);
color = kernel_bake_shader_bsdf(kg, sd, type);
}
@@ -337,7 +337,7 @@ ccl_device void kernel_bake_evaluate(KernelGlobals *kg, ccl_global uint4 *input,
{
float3 N = sd.N;
if((sd.flag & SD_HAS_BUMP)) {
- shader_eval_surface(kg, &sd, &state, 0);
+ shader_eval_surface(kg, &sd, &state, 0, MAX_CLOSURE);
N = shader_bsdf_average_normal(kg, &sd);
}
@@ -352,7 +352,7 @@ ccl_device void kernel_bake_evaluate(KernelGlobals *kg, ccl_global uint4 *input,
}
case SHADER_EVAL_EMISSION:
{
- shader_eval_surface(kg, &sd, &state, 0);
+ shader_eval_surface(kg, &sd, &state, 0, 0);
out = shader_emissive_eval(kg, &sd);
break;
}
diff --git a/intern/cycles/kernel/kernel_emission.h b/intern/cycles/kernel/kernel_emission.h
index 45b8c6311e1..94b0a37ce62 100644
--- a/intern/cycles/kernel/kernel_emission.h
+++ b/intern/cycles/kernel/kernel_emission.h
@@ -70,14 +70,11 @@ ccl_device_noinline float3 direct_emissive_eval(KernelGlobals *kg,
/* no path flag, we're evaluating this for all closures. that's weak but
* we'd have to do multiple evaluations otherwise */
path_state_modify_bounce(state, true);
- shader_eval_surface(kg, emission_sd, state, 0);
+ shader_eval_surface(kg, emission_sd, state, 0, 0);
path_state_modify_bounce(state, false);
/* evaluate emissive closure */
- if(emission_sd->flag & SD_EMISSION)
- eval = shader_emissive_eval(kg, emission_sd);
- else
- eval = make_float3(0.0f, 0.0f, 0.0f);
+ eval = shader_emissive_eval(kg, emission_sd);
}
eval *= ls->eval_fac;
diff --git a/intern/cycles/kernel/kernel_path.h b/intern/cycles/kernel/kernel_path.h
index 1099064038b..8519e0682e1 100644
--- a/intern/cycles/kernel/kernel_path.h
+++ b/intern/cycles/kernel/kernel_path.h
@@ -443,7 +443,7 @@ ccl_device void kernel_path_indirect(KernelGlobals *kg,
sd,
&isect,
ray);
- shader_eval_surface(kg, sd, state, state->flag);
+ shader_eval_surface(kg, sd, state, state->flag, MAX_CLOSURE);
shader_prepare_closures(sd, state);
/* Apply shadow catcher, holdout, emission. */
@@ -561,7 +561,7 @@ ccl_device_forceinline void kernel_path_integrate(
bool hit = kernel_path_scene_intersect(kg, state, ray, &isect, L);
/* Find intersection with lamps and compute emission for MIS. */
- kernel_path_lamp_emission(kg, state, ray, throughput, &isect, emission_sd, L);
+ kernel_path_lamp_emission(kg, state, ray, throughput, &isect, &sd, L);
#ifdef __VOLUME__
/* Volume integration. */
@@ -585,7 +585,7 @@ ccl_device_forceinline void kernel_path_integrate(
/* Shade background. */
if(!hit) {
- kernel_path_background(kg, state, ray, throughput, emission_sd, L);
+ kernel_path_background(kg, state, ray, throughput, &sd, L);
break;
}
else if(path_state_ao_bounce(kg, state)) {
@@ -594,7 +594,7 @@ ccl_device_forceinline void kernel_path_integrate(
/* Setup and evaluate shader. */
shader_setup_from_ray(kg, &sd, &isect, ray);
- shader_eval_surface(kg, &sd, state, state->flag);
+ shader_eval_surface(kg, &sd, state, state->flag, MAX_CLOSURE);
shader_prepare_closures(&sd, state);
/* Apply shadow catcher, holdout, emission. */
@@ -706,9 +706,11 @@ ccl_device void kernel_path_trace(KernelGlobals *kg,
PathRadiance L;
path_radiance_init(&L, kernel_data.film.use_light_pass);
- ShaderData emission_sd;
+ ShaderDataTinyStorage emission_sd_storage;
+ ShaderData *emission_sd = AS_SHADER_DATA(&emission_sd_storage);
+
PathState state;
- path_state_init(kg, &emission_sd, &state, rng_hash, sample, &ray);
+ path_state_init(kg, emission_sd, &state, rng_hash, sample, &ray);
/* Integrate. */
kernel_path_integrate(kg,
@@ -717,7 +719,7 @@ ccl_device void kernel_path_trace(KernelGlobals *kg,
&ray,
&L,
buffer,
- &emission_sd);
+ emission_sd);
kernel_write_result(kg, buffer, sample, &L);
}
diff --git a/intern/cycles/kernel/kernel_path_branched.h b/intern/cycles/kernel/kernel_path_branched.h
index 3877e4f0058..f93366eade1 100644
--- a/intern/cycles/kernel/kernel_path_branched.h
+++ b/intern/cycles/kernel/kernel_path_branched.h
@@ -436,10 +436,12 @@ ccl_device void kernel_branched_path_integrate(KernelGlobals *kg,
/* shader data memory used for both volumes and surfaces, saves stack space */
ShaderData sd;
/* shader data used by emission, shadows, volume stacks, indirect path */
- ShaderData emission_sd, indirect_sd;
+ ShaderDataTinyStorage emission_sd_storage;
+ ShaderData *emission_sd = AS_SHADER_DATA(&emission_sd_storage);
+ ShaderData indirect_sd;
PathState state;
- path_state_init(kg, &emission_sd, &state, rng_hash, sample, &ray);
+ path_state_init(kg, emission_sd, &state, rng_hash, sample, &ray);
/* Main Loop
* Here we only handle transparency intersections from the camera ray.
@@ -460,7 +462,7 @@ ccl_device void kernel_branched_path_integrate(KernelGlobals *kg,
&isect,
hit,
&indirect_sd,
- &emission_sd,
+ emission_sd,
L);
#endif /* __VOLUME__ */
@@ -472,7 +474,7 @@ ccl_device void kernel_branched_path_integrate(KernelGlobals *kg,
/* Setup and evaluate shader. */
shader_setup_from_ray(kg, &sd, &isect, &ray);
- shader_eval_surface(kg, &sd, &state, state.flag);
+ shader_eval_surface(kg, &sd, &state, state.flag, MAX_CLOSURE);
shader_merge_closures(&sd);
/* Apply shadow catcher, holdout, emission. */
@@ -481,7 +483,7 @@ ccl_device void kernel_branched_path_integrate(KernelGlobals *kg,
&state,
&ray,
throughput,
- &emission_sd,
+ emission_sd,
L,
buffer))
{
@@ -513,14 +515,14 @@ ccl_device void kernel_branched_path_integrate(KernelGlobals *kg,
#ifdef __AO__
/* ambient occlusion */
if(kernel_data.integrator.use_ambient_occlusion || (sd.flag & SD_AO)) {
- kernel_branched_path_ao(kg, &sd, &emission_sd, L, &state, throughput);
+ kernel_branched_path_ao(kg, &sd, emission_sd, L, &state, throughput);
}
#endif /* __AO__ */
#ifdef __SUBSURFACE__
/* bssrdf scatter to a different location on the same object */
if(sd.flag & SD_BSSRDF) {
- kernel_branched_path_subsurface_scatter(kg, &sd, &indirect_sd, &emission_sd,
+ kernel_branched_path_subsurface_scatter(kg, &sd, &indirect_sd, emission_sd,
L, &state, &ray, throughput);
}
#endif /* __SUBSURFACE__ */
@@ -534,13 +536,13 @@ ccl_device void kernel_branched_path_integrate(KernelGlobals *kg,
int all = (kernel_data.integrator.sample_all_lights_direct) ||
(state.flag & PATH_RAY_SHADOW_CATCHER);
kernel_branched_path_surface_connect_light(kg,
- &sd, &emission_sd, &hit_state, throughput, 1.0f, L, all);
+ &sd, emission_sd, &hit_state, throughput, 1.0f, L, all);
}
#endif /* __EMISSION__ */
/* indirect light */
kernel_branched_path_surface_indirect_light(kg,
- &sd, &indirect_sd, &emission_sd, throughput, 1.0f, &hit_state, L);
+ &sd, &indirect_sd, emissio
@@ Diff output truncated at 10240 characters. @@
More information about the Bf-blender-cvs
mailing list