[Bf-blender-cvs] [2a5c1fc] master: Cycles: Delay shooting SSS indirect rays

Sergey Sharybin noreply at git.blender.org
Wed Nov 25 09:01:30 CET 2015


Commit: 2a5c1fc9ccbabfaef4eeaf90093dfb2ac0acfc90
Author: Sergey Sharybin
Date:   Sun Nov 22 15:48:33 2015 +0500
Branches: master
https://developer.blender.org/rB2a5c1fc9ccbabfaef4eeaf90093dfb2ac0acfc90

Cycles: Delay shooting SSS indirect rays

The idea is to delay shooting indirect rays for the SSS sampling and
trace them after the main integration loop was finished.

This reduces GPU stack usage even further and brings it down to around
652MB (comparing to 722MB before the change and 946MB with previous
stable release).

This also solves the speed regression happened in the previous commit
and now simple SSS scene (SSS suzanne on the floor) renders in 0:50
(comparing to 1:16 with previous commit and 1:03 with official release).

===================================================================

M	intern/cycles/kernel/kernel_bake.h
M	intern/cycles/kernel/kernel_path.h
M	intern/cycles/kernel/kernel_path_branched.h
M	intern/cycles/kernel/kernel_types.h

===================================================================

diff --git a/intern/cycles/kernel/kernel_bake.h b/intern/cycles/kernel/kernel_bake.h
index 0cb5646..0f572b3 100644
--- a/intern/cycles/kernel/kernel_bake.h
+++ b/intern/cycles/kernel/kernel_bake.h
@@ -64,8 +64,19 @@ ccl_device void compute_light_pass(KernelGlobals *kg, ShaderData *sd, PathRadian
 		/* sample subsurface scattering */
 		if((is_combined || is_sss_sample) && (sd->flag & SD_BSSRDF)) {
 			/* when mixing BSSRDF and BSDF closures we should skip BSDF lighting if scattering was successful */
-			if(kernel_path_subsurface_scatter(kg, sd, &L_sample, &state, &rng, &ray, &throughput))
+			SubsurfaceIndirectRays ss_indirect;
+			if(kernel_path_subsurface_scatter(kg,
+			                                  sd,
+			                                  &L_sample,
+			                                  &state,
+			                                  &rng,
+			                                  &ray,
+			                                  &throughput,
+			                                  &ss_indirect))
+			{
+				kernel_path_subsurface_scatter_indirect(kg, &L_sample, &state, &rng, &ray, &ss_indirect);
 				is_sss_sample = true;
+			}
 		}
 #endif
 
@@ -84,7 +95,7 @@ ccl_device void compute_light_pass(KernelGlobals *kg, ShaderData *sd, PathRadian
 				state.ray_t = 0.0f;
 #endif
 				/* compute indirect light */
-				kernel_path_indirect(kg, &rng, ray, throughput, 1, state, &L_sample);
+				kernel_path_indirect(kg, &rng, &ray, throughput, 1, &state, &L_sample);
 
 				/* sum and reset indirect light pass variables for the next samples */
 				path_radiance_sum_indirect(&L_sample);
diff --git a/intern/cycles/kernel/kernel_path.h b/intern/cycles/kernel/kernel_path.h
index 87d36ef..1f385b8 100644
--- a/intern/cycles/kernel/kernel_path.h
+++ b/intern/cycles/kernel/kernel_path.h
@@ -52,47 +52,64 @@
 
 CCL_NAMESPACE_BEGIN
 
-ccl_device void kernel_path_indirect(KernelGlobals *kg, RNG *rng, Ray ray,
-	float3 throughput, int num_samples, PathState state, PathRadiance *L)
+ccl_device void kernel_path_indirect(KernelGlobals *kg,
+                                     RNG *rng,
+                                     Ray *ray,
+                                     float3 throughput,
+                                     int num_samples,
+                                     PathState *state,
+                                     PathRadiance *L)
 {
 	/* path iteration */
 	for(;;) {
 		/* intersect scene */
 		Intersection isect;
-		uint visibility = path_state_ray_visibility(kg, &state);
-		bool hit = scene_intersect(kg, &ray, visibility, &isect, NULL, 0.0f, 0.0f);
+		uint visibility = path_state_ray_visibility(kg, state);
+		bool hit = scene_intersect(kg,
+		                           ray,
+		                           visibility,
+		                           &isect,
+		                           NULL,
+		                           0.0f, 0.0f);
 
 #ifdef __LAMP_MIS__
-		if(kernel_data.integrator.use_lamp_mis && !(state.flag & PATH_RAY_CAMERA)) {
+		if(kernel_data.integrator.use_lamp_mis && !(state->flag & PATH_RAY_CAMERA)) {
 			/* ray starting from previous non-transparent bounce */
 			Ray light_ray;
 
-			light_ray.P = ray.P - state.ray_t*ray.D;
-			state.ray_t += isect.t;
-			light_ray.D = ray.D;
-			light_ray.t = state.ray_t;
-			light_ray.time = ray.time;
-			light_ray.dD = ray.dD;
-			light_ray.dP = ray.dP;
+			light_ray.P = ray->P - state->ray_t*ray->D;
+			state->ray_t += isect.t;
+			light_ray.D = ray->D;
+			light_ray.t = state->ray_t;
+			light_ray.time = ray->time;
+			light_ray.dD = ray->dD;
+			light_ray.dP = ray->dP;
 
 			/* intersect with lamp */
 			float3 emission;
-
-			if(indirect_lamp_emission(kg, &state, &light_ray, &emission))
-				path_radiance_accum_emission(L, throughput, emission, state.bounce);
+			if(indirect_lamp_emission(kg, state, &light_ray, &emission)) {
+				path_radiance_accum_emission(L,
+				                             throughput,
+				                             emission,
+				                             state->bounce);
+			}
 		}
 #endif
 
 #ifdef __VOLUME__
 		/* volume attenuation, emission, scatter */
-		if(state.volume_stack[0].shader != SHADER_NONE) {
-			Ray volume_ray = ray;
+		if(state->volume_stack[0].shader != SHADER_NONE) {
+			Ray volume_ray = *ray;
 			volume_ray.t = (hit)? isect.t: FLT_MAX;
 
-			bool heterogeneous = volume_stack_is_heterogeneous(kg, state.volume_stack);
+			bool heterogeneous =
+			        volume_stack_is_heterogeneous(kg,
+			                                      state->volume_stack);
 
 #ifdef __VOLUME_DECOUPLED__
-			int sampling_method = volume_stack_sampling_method(kg, state.volume_stack);
+			int sampling_method =
+			        volume_stack_sampling_method(kg,
+			                                     state->volume_stack);
 			bool decoupled = kernel_volume_use_decoupled(kg, heterogeneous, false, sampling_method);
 
 			if(decoupled) {
@@ -100,15 +117,27 @@ ccl_device void kernel_path_indirect(KernelGlobals *kg, RNG *rng, Ray ray,
 				VolumeSegment volume_segment;
 				ShaderData volume_sd;
 
-				shader_setup_from_volume(kg, &volume_sd, &volume_ray, state.bounce, state.transparent_bounce);
-				kernel_volume_decoupled_record(kg, &state,
-					&volume_ray, &volume_sd, &volume_segment, heterogeneous);
-				
+				shader_setup_from_volume(kg,
+				                         &volume_sd,
+				                         &volume_ray,
+				                         state->bounce,
+				                         state->transparent_bounce);
+				kernel_volume_decoupled_record(kg,
+				                               state,
+				                               &volume_ray,
+				                               &volume_sd,
+				                               &volume_segment,
+				                               heterogeneous);
+
 				volume_segment.sampling_method = sampling_method;
 
 				/* emission */
-				if(volume_segment.closure_flag & SD_EMISSION)
-					path_radiance_accum_emission(L, throughput, volume_segment.accum_emission, state.bounce);
+				if(volume_segment.closure_flag & SD_EMISSION) {
+					path_radiance_accum_emission(L,
+					                             throughput,
+					                             volume_segment.accum_emission,
+					                             state->bounce);
+				}
 
 				/* scattering */
 				VolumeIntegrateResult result = VOLUME_PATH_ATTENUATED;
@@ -117,28 +146,51 @@ ccl_device void kernel_path_indirect(KernelGlobals *kg, RNG *rng, Ray ray,
 					bool all = kernel_data.integrator.sample_all_lights_indirect;
 
 					/* direct light sampling */
-					kernel_branched_path_volume_connect_light(kg, rng, &volume_sd,
-						throughput, &state, L, all, &volume_ray, &volume_segment);
+					kernel_branched_path_volume_connect_light(kg,
+					                                          rng,
+					                                          &volume_sd,
+					                                          throughput,
+					                                          state,
+					                                          L,
+					                                          all,
+					                                          &volume_ray,
+					                                          &volume_segment);
 
 					/* indirect sample. if we use distance sampling and take just
 					 * one sample for direct and indirect light, we could share
 					 * this computation, but makes code a bit complex */
-					float rphase = path_state_rng_1D_for_decision(kg, rng, &state, PRNG_PHASE);
-					float rscatter = path_state_rng_1D_for_decision(kg, rng, &state, PRNG_SCATTER_DISTANCE);
+					float rphase = path_state_rng_1D_for_decision(kg, rng, state, PRNG_PHASE);
+					float rscatter = path_state_rng_1D_for_decision(kg, rng, state, PRNG_SCATTER_DISTANCE);
 
 					result = kernel_volume_decoupled_scatter(kg,
-						&state, &volume_ray, &volume_sd, &throughput,
-						rphase, rscatter, &volume_segment, NULL, true);
+					                                         state,
+					                                         &volume_ray,
+					                                         &volume_sd,
+					                                         &throughput,
+					                                         rphase,
+					                                         rscatter,
+					                                         &volume_segment,
+					                                         NULL,
+					                                         true);
 				}
 
 				/* free cached steps */
 				kernel_volume_decoupled_free(kg, &volume_segment);
 
 				if(result == VOLUME_PATH_SCATTERED) {
-					if(kernel_path_volume_bounce(kg, rng, &volume_sd, &throughput, &state, L, &ray))
+					if(kernel_path_volume_bounce(kg,
+					                             rng,
+					                             &volume_sd,
+					                             &throughput,
+					                             state,
+					                             L,
+					                             ray))
+					{
 						continue;
-					else
+					}
+					else {
 						break;
+					}
 				}
 				else {
 					throughput *= volume_segment.accum_transmittance;
@@ -150,18 +202,32 @@ ccl_device void kernel_path_indirect(KernelGlobals *kg, RNG *rng, Ray ray,
 				/* integrate along volume segment with distance sampling */
 				ShaderData volume_sd;
 				VolumeIntegrateResult result = kernel_volume_integrate(
-					kg, &state, &volume_sd, &volume_ray, L, &throughput, rng, heterogeneous);
+					kg, state, &volume_sd, &volume_ray, L, &throughput, rng, heterogeneous);
 
 #ifdef __VOLUME_SCATTER__
 				if(result == VOLUME_PATH_SCATTERED) {
 					/* direct lighting */
-					kernel_path_volume_connect_light(kg, rng, &volume_sd, throughput, &state, L);
+					kernel_path_volume_connect_light(kg,
+					                                 rng,
+					                                 &volume_sd,
+					                                 throughput,
+					                                 state,
+					                                 L);
 
 					/* indirect light bounce */
-					if(kernel_path_volume_bounce(kg, rng, &volume_sd, &throughput, &state, L, &ray))
+					if(kernel_path_volume_bounce(kg,
+					                             rng,
+					                             &volume_sd,
+					                          

@@ Diff output truncated at 10240 characters. @@




More information about the Bf-blender-cvs mailing list