[Bf-blender-cvs] [8a72be7697f] master: Cycles: reduce closure memory usage for emission/shadow shader data.

Brecht Van Lommel noreply at git.blender.org
Sun Nov 5 20:49:10 CET 2017


Commit: 8a72be7697f8fbfc8cb6cc9f3df049104e41d4a6
Author: Brecht Van Lommel
Date:   Wed Nov 1 21:02:28 2017 +0100
Branches: master
https://developer.blender.org/rB8a72be7697f8fbfc8cb6cc9f3df049104e41d4a6

Cycles: reduce closure memory usage for emission/shadow shader data.

With a Titan Xp, reduces path trace local memory from 1092MB to 840MB.
Benchmark performance was within 1% with both RX 480 and Titan Xp.

Original patch was implemented by Sergey.

Differential Revision: https://developer.blender.org/D2249

===================================================================

M	intern/cycles/kernel/closure/alloc.h
M	intern/cycles/kernel/kernel_bake.h
M	intern/cycles/kernel/kernel_emission.h
M	intern/cycles/kernel/kernel_path.h
M	intern/cycles/kernel/kernel_path_branched.h
M	intern/cycles/kernel/kernel_shader.h
M	intern/cycles/kernel/kernel_shadow.h
M	intern/cycles/kernel/kernel_subsurface.h
M	intern/cycles/kernel/kernel_types.h
M	intern/cycles/kernel/kernel_volume.h
M	intern/cycles/kernel/split/kernel_buffer_update.h
M	intern/cycles/kernel/split/kernel_direct_lighting.h
M	intern/cycles/kernel/split/kernel_do_volume.h
M	intern/cycles/kernel/split/kernel_holdout_emission_blurring_pathtermination_ao.h
M	intern/cycles/kernel/split/kernel_path_init.h
M	intern/cycles/kernel/split/kernel_shader_eval.h
M	intern/cycles/kernel/split/kernel_shadow_blocked_ao.h
M	intern/cycles/kernel/split/kernel_shadow_blocked_dl.h
M	intern/cycles/kernel/split/kernel_split_data_types.h
M	intern/cycles/kernel/split/kernel_subsurface_scatter.h

===================================================================

diff --git a/intern/cycles/kernel/closure/alloc.h b/intern/cycles/kernel/closure/alloc.h
index e799855a65e..48a60405b5a 100644
--- a/intern/cycles/kernel/closure/alloc.h
+++ b/intern/cycles/kernel/closure/alloc.h
@@ -20,17 +20,16 @@ ccl_device ShaderClosure *closure_alloc(ShaderData *sd, int size, ClosureType ty
 {
 	kernel_assert(size <= sizeof(ShaderClosure));
 
-	int num_closure = sd->num_closure;
-	int num_closure_extra = sd->num_closure_extra;
-	if(num_closure + num_closure_extra >= MAX_CLOSURE)
+	if(sd->num_closure_left == 0)
 		return NULL;
 
-	ShaderClosure *sc = &sd->closure[num_closure];
+	ShaderClosure *sc = &sd->closure[sd->num_closure];
 
 	sc->type = type;
 	sc->weight = weight;
 
 	sd->num_closure++;
+	sd->num_closure_left--;
 
 	return sc;
 }
@@ -44,18 +43,16 @@ ccl_device ccl_addr_space void *closure_alloc_extra(ShaderData *sd, int size)
 	 * This lets us keep the same fast array iteration over closures, as we
 	 * found linked list iteration and iteration with skipping to be slower. */
 	int num_extra = ((size + sizeof(ShaderClosure) - 1) / sizeof(ShaderClosure));
-	int num_closure = sd->num_closure;
-	int num_closure_extra = sd->num_closure_extra + num_extra;
 
-	if(num_closure + num_closure_extra > MAX_CLOSURE) {
+	if(num_extra > sd->num_closure_left) {
 		/* Remove previous closure. */
 		sd->num_closure--;
-		sd->num_closure_extra++;
+		sd->num_closure_left++;
 		return NULL;
 	}
 
-	sd->num_closure_extra = num_closure_extra;
-	return (ccl_addr_space void*)(sd->closure + MAX_CLOSURE - num_closure_extra);
+	sd->num_closure_left -= num_extra;
+	return (ccl_addr_space void*)(sd->closure + sd->num_closure + sd->num_closure_left);
 }
 
 ccl_device_inline ShaderClosure *bsdf_alloc(ShaderData *sd, int size, float3 weight)
diff --git a/intern/cycles/kernel/kernel_bake.h b/intern/cycles/kernel/kernel_bake.h
index 84d8d84d486..9ce10358b81 100644
--- a/intern/cycles/kernel/kernel_bake.h
+++ b/intern/cycles/kernel/kernel_bake.h
@@ -51,7 +51,7 @@ ccl_device_inline void compute_light_pass(KernelGlobals *kg,
 	path_state_init(kg, &emission_sd, &state, rng_hash, sample, NULL);
 
 	/* evaluate surface shader */
-	shader_eval_surface(kg, sd, &state, state.flag);
+	shader_eval_surface(kg, sd, &state, state.flag, MAX_CLOSURE);
 
 	/* TODO, disable more closures we don't need besides transparent */
 	shader_bsdf_disable_transparency(kg, sd);
@@ -239,12 +239,12 @@ ccl_device float3 kernel_bake_evaluate_direct_indirect(KernelGlobals *kg,
 		}
 		else {
 			/* surface color of the pass only */
-			shader_eval_surface(kg, sd, state, 0);
+			shader_eval_surface(kg, sd, state, 0, MAX_CLOSURE);
 			return kernel_bake_shader_bsdf(kg, sd, type);
 		}
 	}
 	else {
-		shader_eval_surface(kg, sd, state, 0);
+		shader_eval_surface(kg, sd, state, 0, MAX_CLOSURE);
 		color = kernel_bake_shader_bsdf(kg, sd, type);
 	}
 
@@ -337,7 +337,7 @@ ccl_device void kernel_bake_evaluate(KernelGlobals *kg, ccl_global uint4 *input,
 		{
 			float3 N = sd.N;
 			if((sd.flag & SD_HAS_BUMP)) {
-				shader_eval_surface(kg, &sd, &state, 0);
+				shader_eval_surface(kg, &sd, &state, 0, MAX_CLOSURE);
 				N = shader_bsdf_average_normal(kg, &sd);
 			}
 
@@ -352,7 +352,7 @@ ccl_device void kernel_bake_evaluate(KernelGlobals *kg, ccl_global uint4 *input,
 		}
 		case SHADER_EVAL_EMISSION:
 		{
-			shader_eval_surface(kg, &sd, &state, 0);
+			shader_eval_surface(kg, &sd, &state, 0, 0);
 			out = shader_emissive_eval(kg, &sd);
 			break;
 		}
diff --git a/intern/cycles/kernel/kernel_emission.h b/intern/cycles/kernel/kernel_emission.h
index 45b8c6311e1..94b0a37ce62 100644
--- a/intern/cycles/kernel/kernel_emission.h
+++ b/intern/cycles/kernel/kernel_emission.h
@@ -70,14 +70,11 @@ ccl_device_noinline float3 direct_emissive_eval(KernelGlobals *kg,
 		/* no path flag, we're evaluating this for all closures. that's weak but
 		 * we'd have to do multiple evaluations otherwise */
 		path_state_modify_bounce(state, true);
-		shader_eval_surface(kg, emission_sd, state, 0);
+		shader_eval_surface(kg, emission_sd, state, 0, 0);
 		path_state_modify_bounce(state, false);
 
 		/* evaluate emissive closure */
-		if(emission_sd->flag & SD_EMISSION)
-			eval = shader_emissive_eval(kg, emission_sd);
-		else
-			eval = make_float3(0.0f, 0.0f, 0.0f);
+		eval = shader_emissive_eval(kg, emission_sd);
 	}
 	
 	eval *= ls->eval_fac;
diff --git a/intern/cycles/kernel/kernel_path.h b/intern/cycles/kernel/kernel_path.h
index 1099064038b..8519e0682e1 100644
--- a/intern/cycles/kernel/kernel_path.h
+++ b/intern/cycles/kernel/kernel_path.h
@@ -443,7 +443,7 @@ ccl_device void kernel_path_indirect(KernelGlobals *kg,
 		                      sd,
 		                      &isect,
 		                      ray);
-		shader_eval_surface(kg, sd, state, state->flag);
+		shader_eval_surface(kg, sd, state, state->flag, MAX_CLOSURE);
 		shader_prepare_closures(sd, state);
 
 		/* Apply shadow catcher, holdout, emission. */
@@ -561,7 +561,7 @@ ccl_device_forceinline void kernel_path_integrate(
 		bool hit = kernel_path_scene_intersect(kg, state, ray, &isect, L);
 
 		/* Find intersection with lamps and compute emission for MIS. */
-		kernel_path_lamp_emission(kg, state, ray, throughput, &isect, emission_sd, L);
+		kernel_path_lamp_emission(kg, state, ray, throughput, &isect, &sd, L);
 
 #ifdef __VOLUME__
 		/* Volume integration. */
@@ -585,7 +585,7 @@ ccl_device_forceinline void kernel_path_integrate(
 
 		/* Shade background. */
 		if(!hit) {
-			kernel_path_background(kg, state, ray, throughput, emission_sd, L);
+			kernel_path_background(kg, state, ray, throughput, &sd, L);
 			break;
 		}
 		else if(path_state_ao_bounce(kg, state)) {
@@ -594,7 +594,7 @@ ccl_device_forceinline void kernel_path_integrate(
 
 		/* Setup and evaluate shader. */
 		shader_setup_from_ray(kg, &sd, &isect, ray);
-		shader_eval_surface(kg, &sd, state, state->flag);
+		shader_eval_surface(kg, &sd, state, state->flag, MAX_CLOSURE);
 		shader_prepare_closures(&sd, state);
 
 		/* Apply shadow catcher, holdout, emission. */
@@ -706,9 +706,11 @@ ccl_device void kernel_path_trace(KernelGlobals *kg,
 	PathRadiance L;
 	path_radiance_init(&L, kernel_data.film.use_light_pass);
 
-	ShaderData emission_sd;
+	ShaderDataTinyStorage emission_sd_storage;
+	ShaderData *emission_sd = AS_SHADER_DATA(&emission_sd_storage);
+
 	PathState state;
-	path_state_init(kg, &emission_sd, &state, rng_hash, sample, &ray);
+	path_state_init(kg, emission_sd, &state, rng_hash, sample, &ray);
 
 	/* Integrate. */
 	kernel_path_integrate(kg,
@@ -717,7 +719,7 @@ ccl_device void kernel_path_trace(KernelGlobals *kg,
 	                      &ray,
 	                      &L,
 	                      buffer,
-	                      &emission_sd);
+	                      emission_sd);
 
 	kernel_write_result(kg, buffer, sample, &L);
 }
diff --git a/intern/cycles/kernel/kernel_path_branched.h b/intern/cycles/kernel/kernel_path_branched.h
index 3877e4f0058..f93366eade1 100644
--- a/intern/cycles/kernel/kernel_path_branched.h
+++ b/intern/cycles/kernel/kernel_path_branched.h
@@ -436,10 +436,12 @@ ccl_device void kernel_branched_path_integrate(KernelGlobals *kg,
 	/* shader data memory used for both volumes and surfaces, saves stack space */
 	ShaderData sd;
 	/* shader data used by emission, shadows, volume stacks, indirect path */
-	ShaderData emission_sd, indirect_sd;
+	ShaderDataTinyStorage emission_sd_storage;
+	ShaderData *emission_sd = AS_SHADER_DATA(&emission_sd_storage);
+	ShaderData indirect_sd;
 
 	PathState state;
-	path_state_init(kg, &emission_sd, &state, rng_hash, sample, &ray);
+	path_state_init(kg, emission_sd, &state, rng_hash, sample, &ray);
 
 	/* Main Loop
 	 * Here we only handle transparency intersections from the camera ray.
@@ -460,7 +462,7 @@ ccl_device void kernel_branched_path_integrate(KernelGlobals *kg,
 		                            &isect,
 		                            hit,
 		                            &indirect_sd,
-		                            &emission_sd,
+		                            emission_sd,
 		                            L);
 #endif  /* __VOLUME__ */
 
@@ -472,7 +474,7 @@ ccl_device void kernel_branched_path_integrate(KernelGlobals *kg,
 
 		/* Setup and evaluate shader. */
 		shader_setup_from_ray(kg, &sd, &isect, &ray);
-		shader_eval_surface(kg, &sd, &state, state.flag);
+		shader_eval_surface(kg, &sd, &state, state.flag, MAX_CLOSURE);
 		shader_merge_closures(&sd);
 
 		/* Apply shadow catcher, holdout, emission. */
@@ -481,7 +483,7 @@ ccl_device void kernel_branched_path_integrate(KernelGlobals *kg,
 		                             &state,
 		                             &ray,
 		                             throughput,
-		                             &emission_sd,
+		                             emission_sd,
 		                             L,
 		                             buffer))
 		{
@@ -513,14 +515,14 @@ ccl_device void kernel_branched_path_integrate(KernelGlobals *kg,
 #ifdef __AO__
 		/* ambient occlusion */
 		if(kernel_data.integrator.use_ambient_occlusion || (sd.flag & SD_AO)) {
-			kernel_branched_path_ao(kg, &sd, &emission_sd, L, &state, throughput);
+			kernel_branched_path_ao(kg, &sd, emission_sd, L, &state, throughput);
 		}
 #endif  /* __AO__ */
 
 #ifdef __SUBSURFACE__
 		/* bssrdf scatter to a different location on the same object */
 		if(sd.flag & SD_BSSRDF) {
-			kernel_branched_path_subsurface_scatter(kg, &sd, &indirect_sd, &emission_sd,
+			kernel_branched_path_subsurface_scatter(kg, &sd, &indirect_sd, emission_sd,
 			                                        L, &state, &ray, throughput);
 		}
 #endif  /* __SUBSURFACE__ */
@@ -534,13 +536,13 @@ ccl_device void kernel_branched_path_integrate(KernelGlobals *kg,
 				int all = (kernel_data.integrator.sample_all_lights_direct) ||
 				          (state.flag & PATH_RAY_SHADOW_CATCHER);
 				kernel_branched_path_surface_connect_light(kg,
-					&sd, &emission_sd, &hit_state, throughput, 1.0f, L, all);
+					&sd, emission_sd, &hit_state, throughput, 1.0f, L, all);
 			}
 #endif  /* __EMISSION__ */
 
 			/* indirect light */
 			kernel_branched_path_surface_indirect_light(kg,
-				&sd, &indirect_sd, &emission_sd, throughput, 1.0f, &hit_state, L);
+				&sd, &indirect_sd, emissio

@@ Diff output truncated at 10240 characters. @@



More information about the Bf-blender-cvs mailing list