[Bf-blender-cvs] [400e6f37b80] master: Cycles: reduce subsurface stack memory usage.

Brecht Van Lommel noreply at git.blender.org
Thu Sep 28 15:19:50 CEST 2017


Commit: 400e6f37b80dde3910b8d7a9d5e619b778a1c1ff
Author: Brecht Van Lommel
Date:   Wed Aug 23 03:57:27 2017 +0200
Branches: master
https://developer.blender.org/rB400e6f37b80dde3910b8d7a9d5e619b778a1c1ff

Cycles: reduce subsurface stack memory usage.

This is done by storing only a subset of PathRadiance, and by storing
direct light immediately in the main PathRadiance. Saves about 10% of
CUDA stack memory, and simplifies subsurface indirect ray code.

===================================================================

M	intern/cycles/kernel/kernel_accumulate.h
M	intern/cycles/kernel/kernel_bake.h
M	intern/cycles/kernel/kernel_path.h
M	intern/cycles/kernel/kernel_path_branched.h
M	intern/cycles/kernel/kernel_path_subsurface.h
M	intern/cycles/kernel/kernel_path_surface.h
M	intern/cycles/kernel/kernel_path_volume.h
M	intern/cycles/kernel/kernel_types.h
M	intern/cycles/kernel/split/kernel_branched.h
M	intern/cycles/kernel/split/kernel_do_volume.h
M	intern/cycles/kernel/split/kernel_indirect_subsurface.h
M	intern/cycles/kernel/split/kernel_next_iteration_setup.h

===================================================================

diff --git a/intern/cycles/kernel/kernel_accumulate.h b/intern/cycles/kernel/kernel_accumulate.h
index 5e604586557..ae5f6e5e070 100644
--- a/intern/cycles/kernel/kernel_accumulate.h
+++ b/intern/cycles/kernel/kernel_accumulate.h
@@ -181,7 +181,6 @@ ccl_device_inline void path_radiance_init(PathRadiance *L, int use_light_pass)
 
 	if(use_light_pass) {
 		L->indirect = make_float3(0.0f, 0.0f, 0.0f);
-		L->direct_throughput = make_float3(0.0f, 0.0f, 0.0f);
 		L->direct_emission = make_float3(0.0f, 0.0f, 0.0f);
 
 		L->color_diffuse = make_float3(0.0f, 0.0f, 0.0f);
@@ -202,18 +201,19 @@ ccl_device_inline void path_radiance_init(PathRadiance *L, int use_light_pass)
 		L->indirect_subsurface = make_float3(0.0f, 0.0f, 0.0f);
 		L->indirect_scatter = make_float3(0.0f, 0.0f, 0.0f);
 
-		L->path_diffuse = make_float3(0.0f, 0.0f, 0.0f);
-		L->path_glossy = make_float3(0.0f, 0.0f, 0.0f);
-		L->path_transmission = make_float3(0.0f, 0.0f, 0.0f);
-		L->path_subsurface = make_float3(0.0f, 0.0f, 0.0f);
-		L->path_scatter = make_float3(0.0f, 0.0f, 0.0f);
-
 		L->transparent = 0.0f;
 		L->emission = make_float3(0.0f, 0.0f, 0.0f);
 		L->background = make_float3(0.0f, 0.0f, 0.0f);
 		L->ao = make_float3(0.0f, 0.0f, 0.0f);
 		L->shadow = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
 		L->mist = 0.0f;
+
+		L->state.diffuse = make_float3(0.0f, 0.0f, 0.0f);
+		L->state.glossy = make_float3(0.0f, 0.0f, 0.0f);
+		L->state.transmission = make_float3(0.0f, 0.0f, 0.0f);
+		L->state.subsurface = make_float3(0.0f, 0.0f, 0.0f);
+		L->state.scatter = make_float3(0.0f, 0.0f, 0.0f);
+		L->state.direct = make_float3(0.0f, 0.0f, 0.0f);
 	}
 	else
 #endif
@@ -245,26 +245,34 @@ ccl_device_inline void path_radiance_init(PathRadiance *L, int use_light_pass)
 #endif
 }
 
-ccl_device_inline void path_radiance_bsdf_bounce(PathRadiance *L, ccl_addr_space float3 *throughput,
-	BsdfEval *bsdf_eval, float bsdf_pdf, int bounce, int bsdf_label)
+ccl_device_inline void path_radiance_bsdf_bounce(
+	KernelGlobals *kg,
+	PathRadianceState *L_state,
+	ccl_addr_space float3 *throughput,
+	BsdfEval *bsdf_eval,
+	float bsdf_pdf, int bounce, int bsdf_label)
 {
 	float inverse_pdf = 1.0f/bsdf_pdf;
 
 #ifdef __PASSES__
-	if(L->use_light_pass) {
+	if(kernel_data.film.use_light_pass) {
 		if(bounce == 0 && !(bsdf_label & LABEL_TRANSPARENT)) {
 			/* first on directly visible surface */
 			float3 value = *throughput*inverse_pdf;
 
-			L->path_diffuse = bsdf_eval->diffuse*value;
-			L->path_glossy = bsdf_eval->glossy*value;
-			L->path_transmission = bsdf_eval->transmission*value;
-			L->path_subsurface = bsdf_eval->subsurface*value;
-			L->path_scatter = bsdf_eval->scatter*value;
-
-			*throughput = L->path_diffuse + L->path_glossy + L->path_transmission + L->path_subsurface + L->path_scatter;
+			L_state->diffuse = bsdf_eval->diffuse*value;
+			L_state->glossy = bsdf_eval->glossy*value;
+			L_state->transmission = bsdf_eval->transmission*value;
+			L_state->subsurface = bsdf_eval->subsurface*value;
+			L_state->scatter = bsdf_eval->scatter*value;
+
+			*throughput = L_state->diffuse +
+			              L_state->glossy +
+			              L_state->transmission +
+			              L_state->subsurface +
+			              L_state->scatter;
 			
-			L->direct_throughput = *throughput;
+			L_state->direct = *throughput;
 		}
 		else {
 			/* transparent bounce before first hit, or indirectly visible through BSDF */
@@ -493,19 +501,19 @@ ccl_device_inline void path_radiance_sum_indirect(PathRadiance *L)
 	 * only a single throughput further along the path, here we recover just
 	 * the indirect path that is not influenced by any particular BSDF type */
 	if(L->use_light_pass) {
-		L->direct_emission = safe_divide_color(L->direct_emission, L->direct_throughput);
-		L->direct_diffuse += L->path_diffuse*L->direct_emission;
-		L->direct_glossy += L->path_glossy*L->direct_emission;
-		L->direct_transmission += L->path_transmission*L->direct_emission;
-		L->direct_subsurface += L->path_subsurface*L->direct_emission;
-		L->direct_scatter += L->path_scatter*L->direct_emission;
+		L->direct_emission = safe_divide_color(L->direct_emission, L->state.direct);
+		L->direct_diffuse += L->state.diffuse*L->direct_emission;
+		L->direct_glossy += L->state.glossy*L->direct_emission;
+		L->direct_transmission += L->state.transmission*L->direct_emission;
+		L->direct_subsurface += L->state.subsurface*L->direct_emission;
+		L->direct_scatter += L->state.scatter*L->direct_emission;
 
-		L->indirect = safe_divide_color(L->indirect, L->direct_throughput);
-		L->indirect_diffuse += L->path_diffuse*L->indirect;
-		L->indirect_glossy += L->path_glossy*L->indirect;
-		L->indirect_transmission += L->path_transmission*L->indirect;
-		L->indirect_subsurface += L->path_subsurface*L->indirect;
-		L->indirect_scatter += L->path_scatter*L->indirect;
+		L->indirect = safe_divide_color(L->indirect, L->state.direct);
+		L->indirect_diffuse += L->state.diffuse*L->indirect;
+		L->indirect_glossy += L->state.glossy*L->indirect;
+		L->indirect_transmission += L->state.transmission*L->indirect;
+		L->indirect_subsurface += L->state.subsurface*L->indirect;
+		L->indirect_scatter += L->state.scatter*L->indirect;
 	}
 #endif
 }
@@ -514,11 +522,11 @@ ccl_device_inline void path_radiance_reset_indirect(PathRadiance *L)
 {
 #ifdef __PASSES__
 	if(L->use_light_pass) {
-		L->path_diffuse = make_float3(0.0f, 0.0f, 0.0f);
-		L->path_glossy = make_float3(0.0f, 0.0f, 0.0f);
-		L->path_transmission = make_float3(0.0f, 0.0f, 0.0f);
-		L->path_subsurface = make_float3(0.0f, 0.0f, 0.0f);
-		L->path_scatter = make_float3(0.0f, 0.0f, 0.0f);
+		L->state.diffuse = make_float3(0.0f, 0.0f, 0.0f);
+		L->state.glossy = make_float3(0.0f, 0.0f, 0.0f);
+		L->state.transmission = make_float3(0.0f, 0.0f, 0.0f);
+		L->state.subsurface = make_float3(0.0f, 0.0f, 0.0f);
+		L->state.scatter = make_float3(0.0f, 0.0f, 0.0f);
 
 		L->direct_emission = make_float3(0.0f, 0.0f, 0.0f);
 		L->indirect = make_float3(0.0f, 0.0f, 0.0f);
@@ -531,11 +539,7 @@ ccl_device_inline void path_radiance_copy_indirect(PathRadiance *L,
 {
 #ifdef __PASSES__
 	if(L->use_light_pass) {
-		L->path_diffuse = L_src->path_diffuse;
-		L->path_glossy = L_src->path_glossy;
-		L->path_transmission = L_src->path_transmission;
-		L->path_subsurface = L_src->path_subsurface;
-		L->path_scatter = L_src->path_scatter;
+		L->state = L_src->state;
 
 		L->direct_emission = L_src->direct_emission;
 		L->indirect = L_src->indirect;
diff --git a/intern/cycles/kernel/kernel_bake.h b/intern/cycles/kernel/kernel_bake.h
index f06005c5072..4d89839c46c 100644
--- a/intern/cycles/kernel/kernel_bake.h
+++ b/intern/cycles/kernel/kernel_bake.h
@@ -103,7 +103,6 @@ ccl_device_inline void compute_light_pass(KernelGlobals *kg,
 					                     throughput,
 					                     &state,
 					                     &L_sample);
-					kernel_path_subsurface_accum_indirect(&ss_indirect, &L_sample);
 				}
 				is_sss_sample = true;
 			}
@@ -114,7 +113,7 @@ ccl_device_inline void compute_light_pass(KernelGlobals *kg,
 		if(!is_sss_sample && (pass_filter & (BAKE_FILTER_DIRECT | BAKE_FILTER_INDIRECT))) {
 			kernel_path_surface_connect_light(kg, sd, &emission_sd, throughput, &state, &L_sample);
 
-			if(kernel_path_surface_bounce(kg, sd, &throughput, &state, &L_sample, &ray)) {
+			if(kernel_path_surface_bounce(kg, sd, &throughput, &state, &L_sample.state, &ray)) {
 #ifdef __LAMP_MIS__
 				state.ray_t = 0.0f;
 #endif
diff --git a/intern/cycles/kernel/kernel_path.h b/intern/cycles/kernel/kernel_path.h
index d43d6374c13..793fede0deb 100644
--- a/intern/cycles/kernel/kernel_path.h
+++ b/intern/cycles/kernel/kernel_path.h
@@ -222,7 +222,7 @@ ccl_device_forceinline VolumeIntegrateResult kernel_path_volume(
 			kernel_volume_decoupled_free(kg, &volume_segment);
 
 			if(result == VOLUME_PATH_SCATTERED) {
-				if(kernel_path_volume_bounce(kg, sd, throughput, state, L, ray))
+				if(kernel_path_volume_bounce(kg, sd, throughput, state, &L->state, ray))
 					return VOLUME_PATH_SCATTERED;
 				else
 					return VOLUME_PATH_MISSED;
@@ -244,7 +244,7 @@ ccl_device_forceinline VolumeIntegrateResult kernel_path_volume(
 				kernel_path_volume_connect_light(kg, sd, emission_sd, *throughput, state, L);
 
 				/* indirect light bounce */
-				if(kernel_path_volume_bounce(kg, sd, throughput, state, L, ray))
+				if(kernel_path_volume_bounce(kg, sd, throughput, state, &L->state, ray))
 					return VOLUME_PATH_SCATTERED;
 				else
 					return VOLUME_PATH_MISSED;
@@ -519,7 +519,7 @@ ccl_device void kernel_path_indirect(KernelGlobals *kg,
 		}
 #endif  /* defined(__EMISSION__) */
 
-		if(!kernel_path_surface_bounce(kg, sd, &throughput, state, L, ray))
+		if(!kernel_path_surface_bounce(kg, sd, &throughput, state, &L->state, ray))
 			break;
 	}
 }
@@ -648,13 +648,11 @@ ccl_device_forceinline void kernel_path_integrate(
 		kernel_path_surface_connect_light(kg, &sd, emission_sd, throughput, state, L);
 
 		/* compute direct lighting and next bounce */
-		if(!kernel_path_surface_bounce(kg, &sd, &throughput, state, L, ray))
+		if(!kernel_path_surface_bounce(kg, &sd, &throughput, state, &L->state, ray))
 			break;
 	}
 
 #ifdef __SUBSURFACE__
-		kernel_path_subsurface_accum_indirect(&ss_indirect, L);
-
 		/* Trace indirect subsurface rays by restarting the loop. this uses less
 		 * stack memory than invoking kernel_path_indirect.
 		 */
diff --git a/intern/cycles/kernel/kernel_path_branched.h b/intern/cycles/kernel/kernel_path_branched.h
index 010988d2a02..6e0ec22d581 100644
--- a/intern/cycles/kernel/kernel_path_branched.h
+++ b/intern/cycles/kernel/kernel_path_branched.h
@@ -128,7 +128,7 @@ ccl_device_noinline void kernel_branched_path_surface_indirect_light(KernelGloba
 			                                        num_samples,
 			                                        &tp,
 			                                        &ps,
-			                                        L,
+			                                        &L->state,
 			                                        &bsdf_ray,
 			                                        sum_sample_weight))
 			{
@@ -350,7 +350,7 @@ ccl_device void kernel_branched_path_integrate(KernelGlobals *kg,
 					                             &sd,
 	

@@ Diff output truncated at 10240 characters. @@



More information about the Bf-blender-cvs mailing list