[Bf-blender-cvs] [1e43f0d] master: Cycles: Set of fixes for delayed SSS ray tracing

Sergey Sharybin noreply at git.blender.org
Sat Nov 28 16:13:15 CET 2015


Commit: 1e43f0d74216cc936e6a708be321ba2c05b66ca1
Author: Sergey Sharybin
Date:   Sat Nov 28 19:30:35 2015 +0500
Branches: master
https://developer.blender.org/rB1e43f0d74216cc936e6a708be321ba2c05b66ca1

Cycles: Set of fixes for delayed SSS ray tracing

There were multiple issues which are solved now:

- It was possible that ray wouldn't be bounced off the BSSRDF, for example
  when PDF or shader eval is zero. In this case PathState might have been
  left in pre-bounced state which would have been gave incorrect shading
  results.

  This is solved by having separate PathState for each of the hits.

- Path radiance summing wasn't happening correct as well, indirect rays
  were using wrong path radiance in the case when there were more than
  one hit recorded.

  This is now using a bit trickier state machine which calculates path
  radiance for just SSS (both direct and indirect) and then sums it back
  to the final radiance.

- Previous commit wasn't totally correct either and was an induced bug
  due to wrong path state left from the "un-happened" ray bounce.

  There should be no special case happening here, BSSRDFs will be replaced
  with diffuse ones due to PATH_RAY_DIFFUSE_ANCESTOR flag.

- Merged back codebases for "delayed" and "immediate" indirect SSS ray
  tracing, hopefully making it easier to maintain the codebase.

Sure this changes brings memory usage back by about 4-5%, but overall
it's still about 2x memory reduction for the experimental kernel here.

Thanks Brecht for the review!

===================================================================

M	intern/cycles/kernel/kernel_bake.h
M	intern/cycles/kernel/kernel_path.h
M	intern/cycles/kernel/kernel_types.h

===================================================================

diff --git a/intern/cycles/kernel/kernel_bake.h b/intern/cycles/kernel/kernel_bake.h
index 57cbf0b6..a04e759 100644
--- a/intern/cycles/kernel/kernel_bake.h
+++ b/intern/cycles/kernel/kernel_bake.h
@@ -65,6 +65,7 @@ ccl_device void compute_light_pass(KernelGlobals *kg, ShaderData *sd, PathRadian
 		if((is_combined || is_sss_sample) && (sd->flag & SD_BSSRDF)) {
 			/* when mixing BSSRDF and BSDF closures we should skip BSDF lighting if scattering was successful */
 			SubsurfaceIndirectRays ss_indirect;
+			ss_indirect.tracing = false;
 			ss_indirect.num_rays = 0;
 			if(kernel_path_subsurface_scatter(kg,
 			                                  sd,
@@ -75,14 +76,13 @@ ccl_device void compute_light_pass(KernelGlobals *kg, ShaderData *sd, PathRadian
 			                                  &throughput,
 			                                  &ss_indirect))
 			{
-#  ifdef __SUBSURFACE_DELAYED_INDIRECT__
 				while(ss_indirect.num_rays) {
 					kernel_path_subsurface_setup_indirect(kg,
 					                                      &ss_indirect,
-					                                      &L_sample,
-					                                      &state,
 					                                      &ray,
+					                                      &state,
 					                                      &ray,
+					                                      &L_sample,
 					                                      &throughput);
 					kernel_path_indirect(kg,
 					                     &rng,
@@ -91,8 +91,8 @@ ccl_device void compute_light_pass(KernelGlobals *kg, ShaderData *sd, PathRadian
 					                     state.num_samples,
 					                     &state,
 					                     &L_sample);
+					kernel_path_subsurface_accum_indirect(&ss_indirect, &L_sample);
 				}
-#  endif  /* __SUBSURFACE_DELAYED_INDIRECT__ */
 				is_sss_sample = true;
 			}
 		}
diff --git a/intern/cycles/kernel/kernel_path.h b/intern/cycles/kernel/kernel_path.h
index 850bfb2..721e0fc 100644
--- a/intern/cycles/kernel/kernel_path.h
+++ b/intern/cycles/kernel/kernel_path.h
@@ -448,21 +448,13 @@ ccl_device bool kernel_path_subsurface_scatter(
 
 	/* do bssrdf scatter step if we picked a bssrdf closure */
 	if(sc) {
-		uint lcg_state = lcg_state_init(rng, state, 0x68bc21eb);
+		/* We should never have two consecutive BSSRDF bounces,
+		 * the second one should be converted to a diffuse BSDF to
+		 * avoid this.
+		 */
+		kernel_assert(!ss_indirect->tracing);
 
-		/* If indirect ray hits BSSRDF we replace it with diffuse BSDF. */
-		if(ss_indirect->num_rays) {
-			float bssrdf_u, bssrdf_v;
-			path_state_rng_2D(kg, rng, state, PRNG_BSDF_U, &bssrdf_u, &bssrdf_v);
-			subsurface_scatter_step(kg,
-			                        sd,
-			                        state->flag,
-			                        sc,
-			                        &lcg_state,
-			                        bssrdf_u, bssrdf_v,
-			                        false);
-			return false;
-		}
+		uint lcg_state = lcg_state_init(rng, state, 0x68bc21eb);
 
 		SubsurfaceIntersection ss_isect;
 		float bssrdf_u, bssrdf_v;
@@ -493,9 +485,10 @@ ccl_device bool kernel_path_subsurface_scatter(
 			                               sc,
 			                               false);
 
-			PathState *hit_state = &ss_indirect->state;
+			PathState *hit_state = &ss_indirect->state[ss_indirect->num_rays];
 			Ray *hit_ray = &ss_indirect->rays[ss_indirect->num_rays];
 			float3 *hit_tp = &ss_indirect->throughputs[ss_indirect->num_rays];
+			PathRadiance *hit_L = &ss_indirect->L[ss_indirect->num_rays];
 
 			*hit_state = *state;
 			*hit_ray = *ray;
@@ -503,51 +496,25 @@ ccl_device bool kernel_path_subsurface_scatter(
 
 			hit_state->rng_offset += PRNG_BOUNCE_NUM;
 
-			kernel_path_surface_connect_light(kg, rng, sd, *hit_tp, state, L);
+			path_radiance_init(hit_L, kernel_data.film.use_light_pass);
+			kernel_path_surface_connect_light(kg, rng, sd, *hit_tp, state, hit_L);
 
 			if(kernel_path_surface_bounce(kg,
 			                              rng,
 			                              sd,
 			                              hit_tp,
 			                              hit_state,
-			                              L,
+			                              hit_L,
 			                              hit_ray))
 			{
 #ifdef __LAMP_MIS__
 				hit_state->ray_t = 0.0f;
 #endif
 
-#ifdef __SUBSURFACE_DELAYED_INDIRECT__
 				ss_indirect->num_rays++;
-#else
-#  ifdef __VOLUME__
-				if(ss_indirect->need_update_volume_stack) {
-					Ray volume_ray = *ray;
-
-					/* Setup ray from previous surface point to the new one. */
-					volume_ray.D = normalize_len(hit_ray->P - volume_ray.P,
-					                             &volume_ray.t);
-
-					kernel_volume_stack_update_for_subsurface(kg,
-					                                          &volume_ray,
-					                                          hit_state->volume_stack);
-				}
-#  endif  /* __VOLUME__ */
-
-				kernel_path_indirect(kg,
-				                     rng,
-				                     hit_ray,
-				                     *hit_tp,
-				                     hit_state->num_samples,
-				                     hit_state,
-				                     L);
-
-				/* For render passes, sum and reset indirect light pass variables
-				 * for the next samples.
-				 */
-				path_radiance_sum_indirect(L);
-				path_radiance_reset_indirect(L);
-#endif
+			}
+			else {
+				path_radiance_accum_sample(L, hit_L, 1);
 			}
 		}
 		return true;
@@ -555,23 +522,38 @@ ccl_device bool kernel_path_subsurface_scatter(
 	return false;
 }
 
-#ifdef __SUBSURFACE_DELAYED_INDIRECT__
+ccl_device void kernel_path_subsurface_accum_indirect(
+        SubsurfaceIndirectRays *ss_indirect,
+        PathRadiance *L)
+{
+	if(ss_indirect->tracing) {
+		path_radiance_sum_indirect(L);
+		path_radiance_accum_sample(&ss_indirect->direct_L, L, 1);
+		if(ss_indirect->num_rays == 0) {
+			*L = ss_indirect->direct_L;
+		}
+	}
+}
+
 ccl_device void kernel_path_subsurface_setup_indirect(
         KernelGlobals *kg,
         SubsurfaceIndirectRays *ss_indirect,
-        PathRadiance *L,
+        const Ray *orig_ray,
         PathState *state,
-        Ray *orig_ray,
         Ray *ray,
+        PathRadiance *L,
         float3 *throughput)
 {
+	if(!ss_indirect->tracing) {
+		ss_indirect->direct_L = *L;
+	}
+	ss_indirect->tracing = true;
+
 	/* Setup state, ray and throughput for indirect SSS rays. */
 	ss_indirect->num_rays--;
 
 	Ray *indirect_ray = &ss_indirect->rays[ss_indirect->num_rays];
-
-	*state = ss_indirect->state;
-	*throughput = ss_indirect->throughputs[ss_indirect->num_rays];
+	PathRadiance *indirect_L = &ss_indirect->L[ss_indirect->num_rays];
 
 #ifdef __VOLUME__
 	if(ss_indirect->need_update_volume_stack) {
@@ -587,17 +569,15 @@ ccl_device void kernel_path_subsurface_setup_indirect(
 	}
 #endif  /* __VOLUME__ */
 
+	*state = ss_indirect->state[ss_indirect->num_rays];
 	*ray = *indirect_ray;
+	*L = *indirect_L;
+	*throughput = ss_indirect->throughputs[ss_indirect->num_rays];
 
-	/* For render passes, sum and reset indirect light pass variables
-	 * for the next samples.
-	 */
-	path_radiance_sum_indirect(L);
-	path_radiance_reset_indirect(L);
+	state->rng_offset += ss_indirect->num_rays * PRNG_BOUNCE_NUM;
 }
-#endif  /* __SUBSURFACE_DELAYED_INDIRECT__ */
 
-#endif
+#endif  /* __SUBSURFACE__ */
 
 ccl_device float4 kernel_path_integrate(KernelGlobals *kg, RNG *rng, int sample, Ray ray, ccl_global float *buffer)
 {
@@ -618,9 +598,9 @@ ccl_device float4 kernel_path_integrate(KernelGlobals *kg, RNG *rng, int sample,
 
 #ifdef __SUBSURFACE__
 	SubsurfaceIndirectRays ss_indirect;
+	ss_indirect.tracing = false;
 	ss_indirect.num_rays = 0;
 
-#  ifdef __SUBSURFACE_DELAYED_INDIRECT__
 	/* TODO(sergey): Avoid having explicit copy of the pre-subsurface scatter
 	 * ray by storing an updated version of state in the ss_indirect which will
 	 * be updated to the new volume stack.
@@ -628,7 +608,6 @@ ccl_device float4 kernel_path_integrate(KernelGlobals *kg, RNG *rng, int sample,
 	Ray ss_orig_ray;
 
 	for(;;) {
-#  endif  /* __SUBSURFACE_DELAYED_INDIRECT__ */
 #endif
 
 	/* path iteration */
@@ -877,9 +856,7 @@ ccl_device float4 kernel_path_integrate(KernelGlobals *kg, RNG *rng, int sample,
 			                                  &throughput,
 			                                  &ss_indirect))
 			{
-#  ifdef __SUBSURFACE_DELAYED_INDIRECT__
 				ss_orig_ray = ray;
-#  endif  /* __SUBSURFACE_DELAYED_INDIRECT__ */
 				break;
 			}
 		}
@@ -893,24 +870,26 @@ ccl_device float4 kernel_path_integrate(KernelGlobals *kg, RNG *rng, int sample,
 			break;
 	}
 
-#ifdef __SUBSURFACE_DELAYED_INDIRECT__
+#ifdef __SUBSURFACE__
+		kernel_path_subsurface_accum_indirect(&ss_indirect, &L);
+
 		/* Trace indirect subsurface rays by restarting the loop. this uses less
 		 * stack memory than invoking kernel_path_indirect.
 		 */
 		if(ss_indirect.num_rays) {
 			kernel_path_subsurface_setup_indirect(kg,
 			                                      &ss_indirect,
-			                                      &L,
-			                                      &state,
 			                                      &ss_orig_ray,
+			                                      &state,
 			                                      &ray,
+			                                      &L,
 			                                      &throughput);
 		}
 		else {
 			break;
 		}
 	}
-#endif  /* __SUBSURFACE_DELAYED_INDIRECT__ */
+#endif  /* __SUBSURFACE__ */
 
 	float3 L_sum = path_radiance_clamp_and_sum(kg, &L);
 
diff --git a/intern/cycles/kernel/kernel_types.h b/intern/cycles/kernel/kernel_types.h
index b986988..017126d 100644
--- a/intern/cycles/kernel/kernel_types.h
+++ b/intern/cycles/kernel/kernel_types.h
@@ -87,7 +87,6 @@ CCL_NAMESPACE_BEGIN
 /* Experimental on GPU */
 #ifdef __KERNEL_EXPERIMENTAL__
 #define __SUBSURFACE__
-#define __SUBSURFACE_DELAYED_INDIRECT__
 #define __CMJ__
 #endif
 
@@ -770,11 +769,14 @@ struct SubsurfaceIntersection
 struct SubsurfaceIndirectRays
 {
 	bool need_update_volume_stack;
-	PathState state;
+	bool tracing;
+	PathState state[BSSRDF_MAX_HITS];
+	PathRadiance direct_L;
 
 	int num_rays;
 	Ray rays[BSSRDF_MAX_HITS];
 	float3 throughputs[BSSRDF_MAX_HITS];
+	PathRadiance L[BSSRDF_MAX_HITS];
 };
 
 /* Constant Kernel Dat

@@ Diff output truncated at 10240 characters. @@




More information about the Bf-blender-cvs mailing list