[Bf-blender-cvs] [f77cdd1d59f] master: Code cleanup: deduplicate some branched and split kernel code.

Brecht Van Lommel noreply at git.blender.org
Wed Sep 13 15:30:46 CEST 2017


Commit: f77cdd1d59f6e895b567c4d5fdcc6f2440e03307
Author: Brecht Van Lommel
Date:   Wed Sep 13 02:10:24 2017 +0200
Branches: master
https://developer.blender.org/rBf77cdd1d59f6e895b567c4d5fdcc6f2440e03307

Code cleanup: deduplicate some branched and split kernel code.

Benchmarks peformance on GTX 1080 and RX 480 on Linux is the same for
bmw27, classroom, pabellon, and about 2% faster on fishy_cat and koro.

===================================================================

M	intern/cycles/kernel/kernel_emission.h
M	intern/cycles/kernel/kernel_passes.h
M	intern/cycles/kernel/kernel_path.h
M	intern/cycles/kernel/kernel_path_branched.h
M	intern/cycles/kernel/kernel_path_volume.h
M	intern/cycles/kernel/kernel_shader.h
M	intern/cycles/kernel/split/kernel_direct_lighting.h
M	intern/cycles/kernel/split/kernel_holdout_emission_blurring_pathtermination_ao.h
M	intern/cycles/kernel/split/kernel_indirect_background.h
M	intern/cycles/kernel/split/kernel_lamp_emission.h
M	intern/cycles/kernel/split/kernel_scene_intersect.h

===================================================================

diff --git a/intern/cycles/kernel/kernel_emission.h b/intern/cycles/kernel/kernel_emission.h
index 48a8e53be33..13d4759a9ec 100644
--- a/intern/cycles/kernel/kernel_emission.h
+++ b/intern/cycles/kernel/kernel_emission.h
@@ -37,9 +37,7 @@ ccl_device_noinline float3 direct_emissive_eval(KernelGlobals *kg,
 		ray.D = ls->D;
 		ray.P = ls->P;
 		ray.t = 1.0f;
-#  ifdef __OBJECT_MOTION__
 		ray.time = time;
-#  endif
 		ray.dP = differential3_zero();
 		ray.dD = dI;
 
diff --git a/intern/cycles/kernel/kernel_passes.h b/intern/cycles/kernel/kernel_passes.h
index d454cce6e30..06510442cd1 100644
--- a/intern/cycles/kernel/kernel_passes.h
+++ b/intern/cycles/kernel/kernel_passes.h
@@ -225,7 +225,7 @@ ccl_device_inline void kernel_write_debug_passes(KernelGlobals *kg,
 #endif /* __KERNEL_DEBUG__ */
 
 ccl_device_inline void kernel_write_data_passes(KernelGlobals *kg, ccl_global float *buffer, PathRadiance *L,
-	ShaderData *sd, int sample, ccl_addr_space PathState *state, float3 throughput)
+	ShaderData *sd, ccl_addr_space PathState *state, float3 throughput)
 {
 #ifdef __PASSES__
 	int path_flag = state->flag;
@@ -243,6 +243,7 @@ ccl_device_inline void kernel_write_data_passes(KernelGlobals *kg, ccl_global fl
 		   kernel_data.film.pass_alpha_threshold == 0.0f ||
 		   average(shader_bsdf_alpha(kg, sd)) >= kernel_data.film.pass_alpha_threshold)
 		{
+			int sample = state->sample;
 
 			if(sample == 0) {
 				if(flag & PASS_DEPTH) {
diff --git a/intern/cycles/kernel/kernel_path.h b/intern/cycles/kernel/kernel_path.h
index bfde96ec270..afaa47c768c 100644
--- a/intern/cycles/kernel/kernel_path.h
+++ b/intern/cycles/kernel/kernel_path.h
@@ -50,6 +50,294 @@
 
 CCL_NAMESPACE_BEGIN
 
+ccl_device_forceinline bool kernel_path_scene_intersect(
+	KernelGlobals *kg,
+	ccl_addr_space PathState *state,
+	Ray *ray,
+	Intersection *isect,
+	PathRadiance *L)
+{
+	uint visibility = path_state_ray_visibility(kg, state);
+
+#ifdef __HAIR__
+	float difl = 0.0f, extmax = 0.0f;
+	uint lcg_state = 0;
+
+	if(kernel_data.bvh.have_curves) {
+		if((kernel_data.cam.resolution == 1) && (state->flag & PATH_RAY_CAMERA)) {
+			float3 pixdiff = ray->dD.dx + ray->dD.dy;
+			/*pixdiff = pixdiff - dot(pixdiff, ray.D)*ray.D;*/
+			difl = kernel_data.curve.minimum_width * len(pixdiff) * 0.5f;
+		}
+
+		extmax = kernel_data.curve.maximum_width;
+		lcg_state = lcg_state_init_addrspace(state, 0x51633e2d);
+	}
+
+	if(path_state_ao_bounce(kg, state)) {
+		visibility = PATH_RAY_SHADOW;
+		ray->t = kernel_data.background.ao_distance;
+	}
+
+	bool hit = scene_intersect(kg, *ray, visibility, isect, &lcg_state, difl, extmax);
+#else
+	bool hit = scene_intersect(kg, *ray, visibility, isect, NULL, 0.0f, 0.0f);
+#endif  /* __HAIR__ */
+
+#ifdef __KERNEL_DEBUG__
+	if(state->flag & PATH_RAY_CAMERA) {
+		L->debug_data.num_bvh_traversed_nodes += isect->num_traversed_nodes;
+		L->debug_data.num_bvh_traversed_instances += isect->num_traversed_instances;
+		L->debug_data.num_bvh_intersections += isect->num_intersections;
+	}
+	L->debug_data.num_ray_bounces++;
+#endif  /* __KERNEL_DEBUG__ */
+
+	return hit;
+}
+
+ccl_device_forceinline void kernel_path_lamp_emission(
+	KernelGlobals *kg,
+	ccl_addr_space PathState *state,
+	Ray *ray,
+	float3 throughput,
+	ccl_addr_space Intersection *isect,
+	ShaderData *emission_sd,
+	PathRadiance *L)
+{
+#ifdef __LAMP_MIS__
+	if(kernel_data.integrator.use_lamp_mis && !(state->flag & PATH_RAY_CAMERA)) {
+		/* ray starting from previous non-transparent bounce */
+		Ray light_ray;
+
+		light_ray.P = ray->P - state->ray_t*ray->D;
+		state->ray_t += isect->t;
+		light_ray.D = ray->D;
+		light_ray.t = state->ray_t;
+		light_ray.time = ray->time;
+		light_ray.dD = ray->dD;
+		light_ray.dP = ray->dP;
+
+		/* intersect with lamp */
+		float3 emission;
+
+		if(indirect_lamp_emission(kg, emission_sd, state, &light_ray, &emission))
+			path_radiance_accum_emission(L, throughput, emission, state->bounce);
+	}
+#endif  /* __LAMP_MIS__ */
+}
+
+ccl_device_forceinline void kernel_path_background(
+	KernelGlobals *kg,
+	ccl_addr_space PathState *state,
+	ccl_addr_space Ray *ray,
+	float3 throughput,
+	ShaderData *emission_sd,
+	PathRadiance *L)
+{
+	/* eval background shader if nothing hit */
+	if(kernel_data.background.transparent && (state->flag & PATH_RAY_CAMERA)) {
+		L->transparent += average(throughput);
+
+#ifdef __PASSES__
+		if(!(kernel_data.film.pass_flag & PASS_BACKGROUND))
+#endif  /* __PASSES__ */
+			return;
+	}
+
+#ifdef __BACKGROUND__
+	/* sample background shader */
+	float3 L_background = indirect_background(kg, emission_sd, state, ray);
+	path_radiance_accum_background(L, state, throughput, L_background);
+#endif  /* __BACKGROUND__ */
+}
+
+#ifndef __SPLIT_KERNEL__
+
+ccl_device_forceinline VolumeIntegrateResult kernel_path_volume(
+	KernelGlobals *kg,
+	ShaderData *sd,
+	PathState *state,
+	Ray *ray,
+	float3 *throughput,
+	ccl_addr_space Intersection *isect,
+	bool hit,
+	ShaderData *emission_sd,
+	PathRadiance *L)
+{
+#ifdef __VOLUME__
+	/* Sanitize volume stack. */
+	if(!hit) {
+		kernel_volume_clean_stack(kg, state->volume_stack);
+	}
+	/* volume attenuation, emission, scatter */
+	if(state->volume_stack[0].shader != SHADER_NONE) {
+		Ray volume_ray = *ray;
+		volume_ray.t = (hit)? isect->t: FLT_MAX;
+
+		bool heterogeneous = volume_stack_is_heterogeneous(kg, state->volume_stack);
+
+#  ifdef __VOLUME_DECOUPLED__
+		int sampling_method = volume_stack_sampling_method(kg, state->volume_stack);
+		bool direct = (state->flag & PATH_RAY_CAMERA) != 0;
+		bool decoupled = kernel_volume_use_decoupled(kg, heterogeneous, direct, sampling_method);
+
+		if(decoupled) {
+			/* cache steps along volume for repeated sampling */
+			VolumeSegment volume_segment;
+
+			shader_setup_from_volume(kg, sd, &volume_ray);
+			kernel_volume_decoupled_record(kg, state,
+				&volume_ray, sd, &volume_segment, heterogeneous);
+
+			volume_segment.sampling_method = sampling_method;
+
+			/* emission */
+			if(volume_segment.closure_flag & SD_EMISSION)
+				path_radiance_accum_emission(L, *throughput, volume_segment.accum_emission, state->bounce);
+
+			/* scattering */
+			VolumeIntegrateResult result = VOLUME_PATH_ATTENUATED;
+
+			if(volume_segment.closure_flag & SD_SCATTER) {
+				int all = kernel_data.integrator.sample_all_lights_indirect;
+
+				/* direct light sampling */
+				kernel_branched_path_volume_connect_light(kg, sd,
+					emission_sd, *throughput, state, L, all,
+					&volume_ray, &volume_segment);
+
+				/* indirect sample. if we use distance sampling and take just
+				 * one sample for direct and indirect light, we could share
+				 * this computation, but makes code a bit complex */
+				float rphase = path_state_rng_1D_for_decision(kg, state, PRNG_PHASE);
+				float rscatter = path_state_rng_1D_for_decision(kg, state, PRNG_SCATTER_DISTANCE);
+
+				result = kernel_volume_decoupled_scatter(kg,
+					state, &volume_ray, sd, throughput,
+					rphase, rscatter, &volume_segment, NULL, true);
+			}
+
+			/* free cached steps */
+			kernel_volume_decoupled_free(kg, &volume_segment);
+
+			if(result == VOLUME_PATH_SCATTERED) {
+				if(kernel_path_volume_bounce(kg, sd, throughput, state, L, ray))
+					return VOLUME_PATH_SCATTERED;
+				else
+					return VOLUME_PATH_MISSED;
+			}
+			else {
+				*throughput *= volume_segment.accum_transmittance;
+			}
+		}
+		else
+#  endif  /* __VOLUME_DECOUPLED__ */
+		{
+			/* integrate along volume segment with distance sampling */
+			VolumeIntegrateResult result = kernel_volume_integrate(
+				kg, state, sd, &volume_ray, L, throughput, heterogeneous);
+
+#  ifdef __VOLUME_SCATTER__
+			if(result == VOLUME_PATH_SCATTERED) {
+				/* direct lighting */
+				kernel_path_volume_connect_light(kg, sd, emission_sd, *throughput, state, L);
+
+				/* indirect light bounce */
+				if(kernel_path_volume_bounce(kg, sd, throughput, state, L, ray))
+					return VOLUME_PATH_SCATTERED;
+				else
+					return VOLUME_PATH_MISSED;
+			}
+#  endif  /* __VOLUME_SCATTER__ */
+		}
+	}
+#endif  /* __VOLUME__ */
+
+	return VOLUME_PATH_ATTENUATED;
+}
+
+#endif /* __SPLIT_KERNEL__ */
+
+ccl_device_forceinline bool kernel_path_shader_apply(
+	KernelGlobals *kg,
+	ShaderData *sd,
+	ccl_addr_space PathState *state,
+	ccl_addr_space Ray *ray,
+	float3 throughput,
+	ShaderData *emission_sd,
+	PathRadiance *L,
+	ccl_global float *buffer)
+{
+#ifdef __SHADOW_TRICKS__
+	if((sd->object_flag & SD_OBJECT_SHADOW_CATCHER)) {
+		if(state->flag & PATH_RAY_CAMERA) {
+			state->flag |= (PATH_RAY_SHADOW_CATCHER |
+						   PATH_RAY_STORE_SHADOW_INFO);
+			if(!kernel_data.background.transparent) {
+				L->shadow_background_color =
+						indirect_background(kg, emission_sd, state, ray);
+			}
+			L->shadow_radiance_sum = path_radiance_clamp_and_sum(kg, L);
+			L->shadow_throughput = average(throughput);
+		}
+	}
+	else if(state->flag & PATH_RAY_SHADOW_CATCHER) {
+		/* Only update transparency after shadow catcher bounce. */
+		L->shadow_transparency *=
+				average(shader_bsdf_transparency(kg, sd));
+	}
+#endif  /* __SHADOW_TRICKS__ */
+
+	/* holdout */
+#ifdef __HOLDOUT__
+	if(((sd->flag & SD_HOLDOUT) ||
+		(sd->object_flag & SD_OBJECT_HOLDOUT_MASK)) &&
+	   (state->flag & PATH_RAY_CAMERA))
+	{
+		if(kernel_data.background.transparent) {
+			float3 holdout_weight;
+			if(sd->object_flag & SD_OBJECT_HOLDOUT_MASK) {
+				holdout_weight = make_float3(1.0f, 1.0f, 1.0f);
+			}
+			else {
+				holdout_weight = shader_holdout_eval(kg, sd);
+			}
+			/* any throughput is ok, should all be identical here */
+			L->transparent += average(holdout_weight*throughput);
+		}
+
+		if(sd->object_flag & SD_OBJECT_HOLDOUT_MASK) {
+			return false;
+		}
+	}
+#endif  /* __HOLDOUT__ */
+
+	/* holdout mask objects do not write data passes */
+	kernel_write_data_passes(kg, buffer, L, sd, state, throughput);
+
+	/* blurring of bsdf after bounces, for rays that have a small likelihood
+	 * of following this particular path (diffuse, rough glossy) */
+	if(kernel_data.integrator.filter_glossy != FLT_MAX) {
+		float blur_pdf = kernel_data.integrator.filter_glossy*state->min_ray_pdf;
+
+		if(blur_pdf < 1.0f) {
+			float blur_roughness = sqrtf(1.0f - blur_pdf)*0.5f;
+			shader_bsdf_blur(kg, sd, blur_roughness);
+		}
+	}
+

@@ Diff output truncated at 10240 characters. @@



More information about the Bf-blender-cvs mailing list