[Bf-blender-cvs] [3847d0c] master: Cycles code internals: add initial implementation of decoupled ray marching.

Sat Mar 29 15:33:23 CET 2014

Commit: 3847d0c0df53d9a5a90e039404ecf6e5fa153877
Author: Brecht Van Lommel
Date:   Sat Mar 29 13:03:50 2014 +0100
https://developer.blender.org/rB3847d0c0df53d9a5a90e039404ecf6e5fa153877

Cycles code internals: add initial implementation of decoupled ray marching.

This basically records all volumes steps, which can then later be used multiple
time to take scattering samples, without having to step through the volume
again. From the paper:

"Importance Sampling Techniques for Path Tracing in Participating Media"

This works only on the CPU, due to usage of malloc/free.

===================================================================

M	intern/cycles/kernel/kernel_volume.h
M	intern/cycles/util/util_math.h

===================================================================

diff --git a/intern/cycles/kernel/kernel_volume.h b/intern/cycles/kernel/kernel_volume.h
index e1682c9..eb32f0b 100644
--- a/intern/cycles/kernel/kernel_volume.h
+++ b/intern/cycles/kernel/kernel_volume.h
@@ -606,6 +606,272 @@ ccl_device VolumeIntegrateResult kernel_volume_integrate_heterogeneous(KernelGlo
 	return VOLUME_PATH_ATTENUATED;
 }
 
+/* Decoupled Volume Sampling
+ *
+ * VolumeSegment is list of coefficients and transmittance stored at all steps
+ * through a volume. This can then latter be used for decoupled sampling as in:
+ * "Importance Sampling Techniques for Path Tracing in Participating Media" */
+
+/* CPU only because of malloc/free */
+#ifdef __KERNEL_CPU__
+
+typedef struct VolumeStep {
+	float3 sigma_s;				/* scatter coefficient */
+	float3 sigma_t;				/* extinction coefficient */
+	float3 accum_transmittance;	/* accumulated transmittance including this step */
+	float3 cdf_distance;		/* cumulative density function for distance sampling */
+	float t;					/* distance at end of this step */
+	float shade_t;				/* jittered distance where shading was done in step */
+	int closure_flag;			/* shader evaluation closure flags */
+} VolumeStep;
+
+typedef struct VolumeSegment {
+	VolumeStep *steps;			/* recorded steps */
+	int numsteps;				/* number of steps */
+	int closure_flag;			/* accumulated closure flags from all steps */
+
+	float3 accum_emission;		/* accumulated emission at end of segment */
+	float3 accum_transmittance;	/* accumulated transmittance at end of segment */
+} VolumeSegment;
+
+/* record volume steps to the end of the volume.
+ *
+ * it would be nice if we could only record up to the point that we need to scatter,
+ * but the entire segment is needed to do always scattering, rather than probalistically
+ * hitting or missing the volume. if we don't know the transmittance at the end of the
+ * volume we can't generate stratitied distance samples up to that transmittance */
+ccl_device void kernel_volume_decoupled_record(KernelGlobals *kg, PathState *state,
+	Ray *ray, ShaderData *sd, VolumeSegment *segment, bool heterogeneous)
+{
+	/* prepare for volume stepping */
+	int max_steps;
+	float step_size, random_jitter_offset;
+
+	if(heterogeneous) {
+		max_steps = kernel_data.integrator.volume_max_steps;
+		step_size = kernel_data.integrator.volume_step_size;
+		random_jitter_offset = lcg_step_float(&state->rng_congruential) * step_size;
+
+		/* compute exact steps in advance for malloc */
+		max_steps = max((int)ceilf(ray->t/step_size), 1);
+	}
+	else {
+		max_steps = 1;
+		step_size = ray->t;
+		random_jitter_offset = 0.0f;
+	}
+	
+	/* init accumulation variables */
+	float3 accum_emission = make_float3(0.0f, 0.0f, 0.0f);
+	float3 accum_transmittance = make_float3(1.0f, 1.0f, 1.0f);
+	float3 cdf_distance = make_float3(0.0f, 0.0f, 0.0f);
+	float t = 0.0f;
+
+	segment->closure_flag = 0;
+	segment->numsteps = 0;
+	segment->steps = (VolumeStep*)malloc(sizeof(VolumeStep)*max_steps);
+
+	VolumeStep *step = segment->steps;
+
+	for(int i = 0; i < max_steps; i++, step++) {
+		/* advance to new position */
+		float new_t = min(ray->t, (i+1) * step_size);
+		float dt = new_t - t;
+
+		/* use random position inside this segment to sample shader */
+		if(heterogeneous && new_t == ray->t)
+			random_jitter_offset = lcg_step_float(&state->rng_congruential) * dt;
+
+		float3 new_P = ray->P + ray->D * (t + random_jitter_offset);
+		VolumeShaderCoefficients coeff;
+
+		/* compute segment */
+		if(volume_shader_sample(kg, sd, state, new_P, &coeff)) {
+			int closure_flag = sd->flag;
+			float3 sigma_t = coeff.sigma_a + coeff.sigma_s;
+
+			/* compute accumulated transmittance */
+			float3 transmittance = volume_color_transmittance(sigma_t, dt);
+			accum_transmittance *= transmittance;
+
+			/* compute pdf for distance sampling */
+			float3 pdf_distance = dt * accum_transmittance * coeff.sigma_s;
+			cdf_distance = cdf_distance + pdf_distance;
+
+			/* write step data */
+			step->sigma_t = sigma_t;
+			step->sigma_s = coeff.sigma_s;
+			step->closure_flag = closure_flag;
+
+			segment->closure_flag |= closure_flag;
+
+			/* compute emission attenuated by absorption */
+			if(closure_flag & SD_EMISSION) {
+				float3 emission = kernel_volume_emission_integrate(&coeff, closure_flag, transmittance, dt);
+				accum_emission += accum_transmittance * emission;
+			}
+		}
+		else {
+			/* store empty step (todo: skip consecutive empty steps) */
+			step->sigma_t = make_float3(0.0f, 0.0f, 0.0f);
+			step->sigma_s = make_float3(0.0f, 0.0f, 0.0f);
+			step->closure_flag = 0;
+		}
+
+		step->accum_transmittance = accum_transmittance;
+		step->cdf_distance = cdf_distance;
+		step->t = new_t;
+		step->shade_t = t + random_jitter_offset;
+
+		segment->numsteps++;
+
+		/* stop if at the end of the volume */
+		t = new_t;
+		if(t == ray->t)
+			break;
+	}
+
+	/* store total emission and transmittance */
+	segment->accum_emission = accum_emission;
+	segment->accum_transmittance = accum_transmittance;
+
+	/* normalize cumulative density function for distance sampling */
+	VolumeStep *last_step = segment->steps + segment->numsteps - 1;
+
+	if(!is_zero(last_step->cdf_distance)) {
+		VolumeStep *step = &segment->steps[0];
+		int numsteps = segment->numsteps;
+		float3 inv_cdf_distance_sum = safe_invert_color(last_step->cdf_distance);
+
+		for(int i = 0; i < numsteps; i++, step++)
+			step->cdf_distance *= inv_cdf_distance_sum;
+	}
+}
+
+ccl_device void kernel_volume_decoupled_free(KernelGlobals *kg, VolumeSegment *segment)
+{
+	free(segment->steps);
+}
+
+/* scattering for homogeneous and heterogeneous volumes, using decoupled ray
+ * marching. unlike the non-decoupled functions, these do not do probalistic
+ * scattering, they always scatter if there is any non-zero scattering
+ * coefficient.
+ *
+ * these also do not do emission or modify throughput. */
+ccl_device VolumeIntegrateResult kernel_volume_decoupled_scatter(
+	KernelGlobals *kg, PathState *state, Ray *ray, ShaderData *sd,
+	float3 *throughput, RNG *rng, VolumeSegment *segment)
+{
+	int closure_flag = segment->closure_flag;
+
+	if(!(closure_flag & SD_SCATTER))
+		return VOLUME_PATH_MISSED;
+
+	/* pick random color channel, we use the Veach one-sample
+	 * model with balance heuristic for the channels */
+	float rphase = path_state_rng_1D(kg, rng, state, PRNG_PHASE);
+	int channel = (int)(rphase*3.0f);
+	sd->randb_closure = rphase*3.0f - channel;
+
+	float xi = path_state_rng_1D(kg, rng, state, PRNG_SCATTER_DISTANCE);
+
+	VolumeStep *step;
+	float3 transmittance;
+	float pdf, sample_t;
+
+	/* distance sampling */
+	if(kernel_data.integrator.volume_homogeneous_sampling == 0 || !kernel_data.integrator.num_all_lights) { 
+		/* find step in cdf */
+		step = segment->steps;
+
+		float prev_t = 0.0f;
+		float3 step_pdf = make_float3(1.0f, 1.0f, 1.0f);
+
+		if(segment->numsteps > 1) {
+			float prev_cdf = 0.0f;
+			float step_cdf = 1.0f;
+			float3 prev_cdf_distance = make_float3(0.0f, 0.0f, 0.0f);
+
+			for(int i = 0; ; i++, step++) {
+				/* todo: optimize using binary search */
+				step_cdf = kernel_volume_channel_get(step->cdf_distance, channel);
+
+				if(xi < step_cdf || i == segment->numsteps-1)
+					break;
+
+				prev_cdf = step_cdf;
+				prev_t = step->t;
+				prev_cdf_distance = step->cdf_distance;
+			}
+
+			/* remap xi so we can reuse it */
+			xi = (xi - prev_cdf)/(step_cdf - prev_cdf);
+
+			/* pdf for picking step */
+			step_pdf = step->cdf_distance - prev_cdf_distance;
+		}
+
+		/* determine range in which we will sample */
+		float step_t = step->t - prev_t;
+
+		/* sample distance and compute transmittance */
+		float3 distance_pdf;
+		sample_t = prev_t + kernel_volume_distance_sample(step_t, step->sigma_t, channel, xi, &transmittance, &distance_pdf);
+		pdf = dot(distance_pdf, step_pdf) * (1.0f/3.0f);
+	}
+	/* equi-angular sampling */
+	else {
+		/* pick position on light */
+		float3 light_P;
+		if(!kernel_volume_equiangular_light_position(kg, state, ray, rng, &light_P))
+			return VOLUME_PATH_MISSED;
+
+		/* sample distance */
+		sample_t = kernel_volume_equiangular_sample(ray, light_P, xi, &pdf);
+
+		/* find step in which sampled distance is located */
+		step = segment->steps;
+
+		float prev_t = 0.0f;
+
+		if(segment->numsteps > 1) {
+			/* todo: optimize using binary search */
+			for(int i = 0; i < segment->numsteps-1; i++, step++) {
+				if(sample_t < step->t)
+					break;
+
+				prev_t = step->t;
+			}
+		}
+		
+		/* compute transmittance */
+		transmittance = volume_color_transmittance(step->sigma_t, sample_t - prev_t);
+	}
+
+	/* compute transmittance up to this step */
+	if(step != segment->steps)
+		transmittance *= (step-1)->accum_transmittance;
+
+	/* modify throughput */
+	*throughput *= step->sigma_s * transmittance / pdf;
+
+	/* evaluate shader to create closures at shading point */
+	if(segment->numsteps > 1) {
+		sd->P = ray->P + step->shade_t*ray->D;
+
+		VolumeShaderCoefficients coeff;
+		volume_shader_sample(kg, sd, state, sd->P, &coeff);
+	}
+
+	/* move to new position */
+	sd->P = ray->P + sample_t*ray->D;
+
+	return VOLUME_PATH_SCATTERED;
+}
+
+#endif
+
 /* get the volume attenuation and emission over line segment defined by
  * ray, with the assumption that there are no surfaces blocking light
  * between the endpoints */
@@ -619,12 +885,26 @@ ccl_device_noinline VolumeIntegrateResult kernel_volume_integrate(KernelGlobals
 	RNG tmp_rng = cmj_hash(*rng, state->rng_offset);
 	bool heterogeneous = volume_stack_is_heterogeneous(kg, state->volume_stack);
 
+#if 0
+	/* debugging code to compare decoupled ray marching */
+	VolumeSegment segment;
+
+	shader_setup_from_volume(kg, sd, ray, state->bounce);
+	kernel_volume_decoupled_record(kg, state, ray, sd, &segment, heterogeneous);
+
+	VolumeIntegrateResult result = kernel_volume_decoupled_scatter(kg, state, ray, sd, throughput, &tmp_rng, &segment);
+
+	kernel_volume_decoupled_free(kg, &segment);
+
+	return result;
+#else
 	shader_setup_from_volume(kg, sd, ray, state->bounce);
 
 	if(heterogeneous)
 		return kernel_volume_integrate_heterogeneous(kg, state, ray, sd, L, throughput, &tmp_rng);
 	else
 		return kernel_volume_integrate_homogeneous(kg, state, ray, sd, L, throughput, &tmp_rng, branched);
+#endi

@@ Diff output truncated at 10240 characters. @@