[Bf-blender-cvs] [f854920] compositor-2016: Cycles CUDA: reduce stack memory by reusing ShaderData.

Brecht Van Lommel noreply at git.blender.org
Wed Jun 8 21:49:42 CEST 2016


Commit: f8549205ad7d21b391d9da2986c1fbfa0e2f699e
Author: Brecht Van Lommel
Date:   Sun May 22 22:35:47 2016 +0200
Branches: compositor-2016
https://developer.blender.org/rBf8549205ad7d21b391d9da2986c1fbfa0e2f699e

Cycles CUDA: reduce stack memory by reusing ShaderData.

57% less for path and 48% less for branched path.

===================================================================

M	intern/cycles/kernel/kernel_bake.h
M	intern/cycles/kernel/kernel_emission.h
M	intern/cycles/kernel/kernel_path.h
M	intern/cycles/kernel/kernel_path_branched.h
M	intern/cycles/kernel/kernel_path_state.h
M	intern/cycles/kernel/kernel_path_surface.h
M	intern/cycles/kernel/kernel_path_volume.h
M	intern/cycles/kernel/kernel_shadow.h
M	intern/cycles/kernel/kernel_volume.h
M	intern/cycles/kernel/split/kernel_background_buffer_update.h
M	intern/cycles/kernel/split/kernel_data_init.h
M	intern/cycles/kernel/split/kernel_direct_lighting.h
M	intern/cycles/kernel/split/kernel_lamp_emission.h
M	intern/cycles/kernel/split/kernel_shadow_blocked.h

===================================================================

diff --git a/intern/cycles/kernel/kernel_bake.h b/intern/cycles/kernel/kernel_bake.h
index 392cff9..77982ee 100644
--- a/intern/cycles/kernel/kernel_bake.h
+++ b/intern/cycles/kernel/kernel_bake.h
@@ -30,6 +30,9 @@ ccl_device void compute_light_pass(KernelGlobals *kg, ShaderData *sd, PathRadian
 	Ray ray;
 	float3 throughput = make_float3(1.0f, 1.0f, 1.0f);
 
+	/* emission shader data memory used by various functions */
+	ShaderData emission_sd;
+
 	ray.P = sd->P + sd->Ng;
 	ray.D = -sd->Ng;
 	ray.t = FLT_MAX;
@@ -41,7 +44,7 @@ ccl_device void compute_light_pass(KernelGlobals *kg, ShaderData *sd, PathRadian
 	path_radiance_init(&L_sample, kernel_data.film.use_light_pass);
 
 	/* init path state */
-	path_state_init(kg, &state, &rng, sample, NULL);
+	path_state_init(kg, &emission_sd, &state, &rng, sample, NULL);
 
 	/* evaluate surface shader */
 	float rbsdf = path_state_rng_1D(kg, &rng, &state, PRNG_BSDF);
@@ -56,7 +59,7 @@ ccl_device void compute_light_pass(KernelGlobals *kg, ShaderData *sd, PathRadian
 
 		/* sample ambient occlusion */
 		if(pass_filter & BAKE_FILTER_AO) {
-			kernel_path_ao(kg, sd, &L_sample, &state, &rng, throughput);
+			kernel_path_ao(kg, sd, &emission_sd, &L_sample, &state, &rng, throughput);
 		}
 
 		/* sample emission */
@@ -75,6 +78,7 @@ ccl_device void compute_light_pass(KernelGlobals *kg, ShaderData *sd, PathRadian
 			kernel_path_subsurface_init_indirect(&ss_indirect);
 			if(kernel_path_subsurface_scatter(kg,
 			                                  sd,
+			                                  &emission_sd,
 			                                  &L_sample,
 			                                  &state,
 			                                  &rng,
@@ -90,6 +94,7 @@ ccl_device void compute_light_pass(KernelGlobals *kg, ShaderData *sd, PathRadian
 					                                      &L_sample,
 					                                      &throughput);
 					kernel_path_indirect(kg,
+					                     &emission_sd,
 					                     &rng,
 					                     &ray,
 					                     throughput,
@@ -105,14 +110,14 @@ ccl_device void compute_light_pass(KernelGlobals *kg, ShaderData *sd, PathRadian
 
 		/* sample light and BSDF */
 		if(!is_sss_sample && (pass_filter & (BAKE_FILTER_DIRECT | BAKE_FILTER_INDIRECT))) {
-			kernel_path_surface_connect_light(kg, &rng, sd, throughput, &state, &L_sample);
+			kernel_path_surface_connect_light(kg, &rng, sd, &emission_sd, throughput, &state, &L_sample);
 
 			if(kernel_path_surface_bounce(kg, &rng, sd, &throughput, &state, &L_sample, &ray)) {
 #ifdef __LAMP_MIS__
 				state.ray_t = 0.0f;
 #endif
 				/* compute indirect light */
-				kernel_path_indirect(kg, &rng, &ray, throughput, 1, &state, &L_sample);
+				kernel_path_indirect(kg, &emission_sd, &rng, &ray, throughput, 1, &state, &L_sample);
 
 				/* sum and reset indirect light pass variables for the next samples */
 				path_radiance_sum_indirect(&L_sample);
@@ -126,7 +131,7 @@ ccl_device void compute_light_pass(KernelGlobals *kg, ShaderData *sd, PathRadian
 
 		/* sample ambient occlusion */
 		if(pass_filter & BAKE_FILTER_AO) {
-			kernel_branched_path_ao(kg, sd, &L_sample, &state, &rng, throughput);
+			kernel_branched_path_ao(kg, sd, &emission_sd, &L_sample, &state, &rng, throughput);
 		}
 
 		/* sample emission */
@@ -139,7 +144,7 @@ ccl_device void compute_light_pass(KernelGlobals *kg, ShaderData *sd, PathRadian
 		/* sample subsurface scattering */
 		if((pass_filter & BAKE_FILTER_SUBSURFACE) && (sd->flag & SD_BSSRDF)) {
 			/* when mixing BSSRDF and BSDF closures we should skip BSDF lighting if scattering was successful */
-			kernel_branched_path_subsurface_scatter(kg, sd, &L_sample, &state, &rng, &ray, throughput);
+			kernel_branched_path_subsurface_scatter(kg, sd, &emission_sd, &L_sample, &state, &rng, &ray, throughput);
 		}
 #endif
 
@@ -150,13 +155,13 @@ ccl_device void compute_light_pass(KernelGlobals *kg, ShaderData *sd, PathRadian
 			if(kernel_data.integrator.use_direct_light) {
 				int all = kernel_data.integrator.sample_all_lights_direct;
 				kernel_branched_path_surface_connect_light(kg, &rng,
-					sd, &state, throughput, 1.0f, &L_sample, all);
+					sd, &emission_sd, &state, throughput, 1.0f, &L_sample, all);
 			}
 #endif
 
 			/* indirect light */
 			kernel_branched_path_surface_indirect_light(kg, &rng,
-				sd, throughput, 1.0f, &state, &L_sample);
+				sd, &emission_sd, throughput, 1.0f, &state, &L_sample);
 		}
 	}
 #endif
diff --git a/intern/cycles/kernel/kernel_emission.h b/intern/cycles/kernel/kernel_emission.h
index 5cf52f9..4de8e0f 100644
--- a/intern/cycles/kernel/kernel_emission.h
+++ b/intern/cycles/kernel/kernel_emission.h
@@ -18,6 +18,7 @@ CCL_NAMESPACE_BEGIN
 
 /* Direction Emission */
 ccl_device_noinline float3 direct_emissive_eval(KernelGlobals *kg,
+                                                ShaderData *emission_sd,
                                                 LightSample *ls,
                                                 ccl_addr_space PathState *state,
                                                 float3 I,
@@ -26,12 +27,6 @@ ccl_device_noinline float3 direct_emissive_eval(KernelGlobals *kg,
                                                 float time)
 {
 	/* setup shading at emitter */
-#ifdef __SPLIT_KERNEL__
-	ShaderData *sd = kg->sd_input;
-#else
-	ShaderData sd_object;
-	ShaderData *sd = &sd_object;
-#endif
 	float3 eval;
 
 #ifdef __BACKGROUND_MIS__
@@ -46,28 +41,28 @@ ccl_device_noinline float3 direct_emissive_eval(KernelGlobals *kg,
 		ray.dP = differential3_zero();
 		ray.dD = dI;
 
-		shader_setup_from_background(kg, sd, &ray);
+		shader_setup_from_background(kg, emission_sd, &ray);
 
 		path_state_modify_bounce(state, true);
-		eval = shader_eval_background(kg, sd, state, 0, SHADER_CONTEXT_EMISSION);
+		eval = shader_eval_background(kg, emission_sd, state, 0, SHADER_CONTEXT_EMISSION);
 		path_state_modify_bounce(state, false);
 	}
 	else
 #endif
 	{
-		shader_setup_from_sample(kg, sd, ls->P, ls->Ng, I, ls->shader, ls->object, ls->prim, ls->u, ls->v, t, time);
+		shader_setup_from_sample(kg, emission_sd, ls->P, ls->Ng, I, ls->shader, ls->object, ls->prim, ls->u, ls->v, t, time);
 
-		ls->Ng = ccl_fetch(sd, Ng);
+		ls->Ng = ccl_fetch(emission_sd, Ng);
 
 		/* no path flag, we're evaluating this for all closures. that's weak but
 		 * we'd have to do multiple evaluations otherwise */
 		path_state_modify_bounce(state, true);
-		shader_eval_surface(kg, sd, state, 0.0f, 0, SHADER_CONTEXT_EMISSION);
+		shader_eval_surface(kg, emission_sd, state, 0.0f, 0, SHADER_CONTEXT_EMISSION);
 		path_state_modify_bounce(state, false);
 
 		/* evaluate emissive closure */
-		if(ccl_fetch(sd, flag) & SD_EMISSION)
-			eval = shader_emissive_eval(kg, sd);
+		if(ccl_fetch(emission_sd, flag) & SD_EMISSION)
+			eval = shader_emissive_eval(kg, emission_sd);
 		else
 			eval = make_float3(0.0f, 0.0f, 0.0f);
 	}
@@ -79,6 +74,7 @@ ccl_device_noinline float3 direct_emissive_eval(KernelGlobals *kg,
 
 ccl_device_noinline bool direct_emission(KernelGlobals *kg,
                                          ShaderData *sd,
+                                         ShaderData *emission_sd,
                                          LightSample *ls,
                                          ccl_addr_space PathState *state,
                                          Ray *ray,
@@ -94,6 +90,7 @@ ccl_device_noinline bool direct_emission(KernelGlobals *kg,
 	/* evaluate closure */
 
 	float3 light_eval = direct_emissive_eval(kg,
+	                                         emission_sd,
 	                                         ls,
 	                                         state,
 	                                         -ls->D,
@@ -198,6 +195,7 @@ ccl_device_noinline float3 indirect_primitive_emission(KernelGlobals *kg, Shader
 /* Indirect Lamp Emission */
 
 ccl_device_noinline bool indirect_lamp_emission(KernelGlobals *kg,
+                                                ShaderData *emission_sd,
                                                 ccl_addr_space PathState *state,
                                                 Ray *ray,
                                                 float3 *emission)
@@ -225,6 +223,7 @@ ccl_device_noinline bool indirect_lamp_emission(KernelGlobals *kg,
 #endif
 
 		float3 L = direct_emissive_eval(kg,
+		                                emission_sd,
 		                                &ls,
 		                                state,
 		                                -ray->D,
@@ -238,7 +237,7 @@ ccl_device_noinline bool indirect_lamp_emission(KernelGlobals *kg,
 			Ray volume_ray = *ray;
 			volume_ray.t = ls.t;
 			float3 volume_tp = make_float3(1.0f, 1.0f, 1.0f);
-			kernel_volume_shadow(kg, state, &volume_ray, &volume_tp);
+			kernel_volume_shadow(kg, emission_sd, state, &volume_ray, &volume_tp);
 			L *= volume_tp;
 		}
 #endif
@@ -260,6 +259,7 @@ ccl_device_noinline bool indirect_lamp_emission(KernelGlobals *kg,
 /* Indirect Background */
 
 ccl_device_noinline float3 indirect_background(KernelGlobals *kg,
+                                               ShaderData *emission_sd,
                                                ccl_addr_space PathState *state,
                                                ccl_addr_space Ray *ray)
 {
@@ -280,19 +280,14 @@ ccl_device_noinline float3 indirect_background(KernelGlobals *kg,
 	/* evaluate background closure */
 #  ifdef __SPLIT_KERNEL__
 	Ray priv_ray = *ray;
-	shader_setup_from_background(kg, kg->sd_input, &priv_ray);
-
-	path_state_modify_bounce(state, true);
-	float3 L = shader_eval_background(kg, kg->sd_input, state, state->flag, SHADER_CONTEXT_EMISSION);
-	path_state_modify_bounce(state, false);
+	shader_setup_from_background(kg, emission_sd, &priv_ray);
 #  else
-	ShaderData sd;
-	shader_setup_from_background(kg, &sd, ray);
+	shader_setup_from_background(kg, emission_sd, ray);
+#  endif
 
 	path_state_modify_bounce(state, true);
-	float3 L = shader_eval_background(kg, &sd, state, state->flag, SHADER_CONTEXT_EMISSION);
+	float3 L = shader_eval_background(kg, emission_sd, state, state->flag, SHADER_CONTEXT_EMISSION);
 	path_state_modify_bounce(state, false);
-#  endif
 
 #ifdef __BA

@@ Diff output truncated at 10240 characters. @@




More information about the Bf-blender-cvs mailing list