[Bf-blender-cvs] [b745195785] id_override_static: Cycles: Split shadow functions to avoid some duplicated calculations

Thu Feb 9 14:52:33 CET 2017

Commit: b745195785c92a58d6c992a00869883bfb3d6dd9
Author: Sergey Sharybin
Date:   Mon Jan 30 12:08:20 2017 +0100
Branches: id_override_static
https://developer.blender.org/rBb745195785c92a58d6c992a00869883bfb3d6dd9

Cycles: Split shadow functions to avoid some duplicated calculations

===================================================================

M	intern/cycles/kernel/kernel_shadow.h

===================================================================

diff --git a/intern/cycles/kernel/kernel_shadow.h b/intern/cycles/kernel/kernel_shadow.h
index e69eac6ab8..95b34b5b2d 100644
--- a/intern/cycles/kernel/kernel_shadow.h
+++ b/intern/cycles/kernel/kernel_shadow.h
@@ -19,12 +19,13 @@ CCL_NAMESPACE_BEGIN
 /* Attenuate throughput accordingly to the given intersection event.
  * Returns true if the throughput is zero and traversal can be aborted.
  */
-ccl_device_inline bool shadow_handle_transparent_isect(KernelGlobals *kg,
-                                                       ShaderData *shadow_sd,
-                                                       PathState *state,
-                                                       Intersection *isect,
-                                                       Ray *ray,
-                                                       float3 *throughput)
+ccl_device_forceinline bool shadow_handle_transparent_isect(
+        KernelGlobals *kg,
+        ShaderData *shadow_sd,
+        PathState *state,
+        Intersection *isect,
+        Ray *ray,
+        float3 *throughput)
 {
 #ifdef __VOLUME__
 	/* Attenuation between last surface and next surface. */
@@ -60,7 +61,31 @@ ccl_device_inline bool shadow_handle_transparent_isect(KernelGlobals *kg,
 	return false;
 }
 
-#ifdef __SHADOW_RECORD_ALL__
+/* Special version which only handles opaque shadows. */
+ccl_device bool shadow_blocked_opaque(KernelGlobals *kg,
+                                      ShaderData *shadow_sd,
+                                      PathState *state,
+                                      Ray *ray,
+                                      Intersection *isect,
+                                      float3 *shadow)
+{
+	const bool blocked = scene_intersect(kg,
+	                                     *ray,
+	                                     PATH_RAY_SHADOW_OPAQUE,
+	                                     isect,
+	                                     NULL,
+	                                     0.0f, 0.0f);
+#ifdef __VOLUME__
+	if(!blocked && state->volume_stack[0].shader != SHADER_NONE) {
+		/* Apply attenuation from current volume shader. */
+		kernel_volume_shadow(kg, shadow_sd, state, ray, shadow);
+	}
+#endif
+	return blocked;
+}
+
+#ifdef __TRANSPARENT_SHADOWS__
+#  ifdef __SHADOW_RECORD_ALL__
 /* Shadow function to compute how much light is blocked,
  *
  * We trace a single ray. If it hits any opaque surface, or more than a given
@@ -86,121 +111,134 @@ ccl_device_inline bool shadow_handle_transparent_isect(KernelGlobals *kg,
  * is something to be kept an eye on.
  */
 
-#define SHADOW_STACK_MAX_HITS 64
+#    define SHADOW_STACK_MAX_HITS 64
 
-ccl_device_inline bool shadow_blocked_all(KernelGlobals *kg,
-                                          ShaderData *shadow_sd,
-                                          PathState *state,
-                                          Ray *ray,
-                                          float3 *shadow)
+/* Actual logic with traversal loop implementation which is free from device
+ * specific tweaks.
+ *
+ * Note that hits array should be as big as max_hits+1.
+ */
+ccl_device bool shadow_blocked_transparent_all_loop(KernelGlobals *kg,
+                                                    ShaderData *shadow_sd,
+                                                    PathState *state,
+                                                    Ray *ray,
+                                                    Intersection *hits,
+                                                    uint max_hits,
+                                                    float3 *shadow)
 {
-	*shadow = make_float3(1.0f, 1.0f, 1.0f);
-	if(ray->t == 0.0f) {
-		return false;
-	}
-	bool blocked;
-	if(kernel_data.integrator.transparent_shadows) {
-		/* Check transparent bounces here, for volume scatter which can do
-		 * lighting before surface path termination is checked.
-		 */
-		if(state->transparent_bounce >= kernel_data.integrator.transparent_max_bounce) {
-			return true;
-		}
-		/* Intersect to find an opaque surface, or record all transparent
-		 * surface hits.
-		 */
-#ifdef __KERNEL_CUDA__
-		Intersection *hits = kg->hits_stack;
-#else
-		Intersection hits_stack[SHADOW_STACK_MAX_HITS];
-		Intersection *hits = hits_stack;
-#endif
-		const int transparent_max_bounce = kernel_data.integrator.transparent_max_bounce;
-		uint max_hits = transparent_max_bounce - state->transparent_bounce - 1;
-#ifndef __KERNEL_GPU__
-		/* Prefer to use stack but use dynamic allocation if too deep max hits
-		 * we need max_hits + 1 storage space due to the logic in
-		 * scene_intersect_shadow_all which will first store and then check if
-		 * the limit is exceeded.
-		 *
-		 * Ignore this on GPU because of slow/unavailable malloc().
-		 */
-		if(max_hits + 1 > SHADOW_STACK_MAX_HITS) {
-			if(kg->transparent_shadow_intersections == NULL) {
-				kg->transparent_shadow_intersections =
-				    (Intersection*)malloc(sizeof(Intersection)*(transparent_max_bounce + 1));
+	/* Intersect to find an opaque surface, or record all transparent
+	 * surface hits.
+	 */
+	uint num_hits;
+	const bool blocked = scene_intersect_shadow_all(kg,
+	                                                ray,
+	                                                hits,
+	                                                max_hits,
+	                                                &num_hits);
+	/* If no opaque surface found but we did find transparent hits,
+	 * shade them.
+	 */
+	if(!blocked && num_hits > 0) {
+		float3 throughput = make_float3(1.0f, 1.0f, 1.0f);
+		float3 Pend = ray->P + ray->D*ray->t;
+		float last_t = 0.0f;
+		int bounce = state->transparent_bounce;
+		Intersection *isect = hits;
+#    ifdef __VOLUME__
+		PathState ps = *state;
+#    endif
+		sort_intersections(hits, num_hits);
+		for(int hit = 0; hit < num_hits; hit++, isect++) {
+			/* Adjust intersection distance for moving ray forward. */
+			float new_t = isect->t;
+			isect->t -= last_t;
+			/* Skip hit if we did not move forward, step by step raytracing
+			 * would have skipped it as well then.
+			 */
+			if(last_t == new_t) {
+				continue;
 			}
-			hits = kg->transparent_shadow_intersections;
-		}
-#endif  /* __KERNEL_GPU__ */
-		uint num_hits;
-		blocked = scene_intersect_shadow_all(kg, ray, hits, max_hits, &num_hits);
-		/* If no opaque surface found but we did find transparent hits,
-		 * shade them.
-		 */
-		if(!blocked && num_hits > 0) {
-			float3 throughput = make_float3(1.0f, 1.0f, 1.0f);
-			float3 Pend = ray->P + ray->D*ray->t;
-			float last_t = 0.0f;
-			int bounce = state->transparent_bounce;
-			Intersection *isect = hits;
-#ifdef __VOLUME__
-			PathState ps = *state;
-#endif
-			sort_intersections(hits, num_hits);
-			for(int hit = 0; hit < num_hits; hit++, isect++) {
-				/* Adjust intersection distance for moving ray forward. */
-				float new_t = isect->t;
-				isect->t -= last_t;
-				/* Skip hit if we did not move forward, step by step raytracing
-				 * would have skipped it as well then.
-				 */
-				if(last_t == new_t) {
-					continue;
-				}
-				last_t = new_t;
-				/* Attenuate the throughput. */
-				if(shadow_handle_transparent_isect(kg,
-				                                   shadow_sd,
-				                                   &ps,
-				                                   isect,
-				                                   ray,
-				                                   &throughput))
-				{
-					return true;
-				}
-				/* Move ray forward. */
-				ray->P = shadow_sd->P;
-				if(ray->t != FLT_MAX) {
-					ray->D = normalize_len(Pend - ray->P, &ray->t);
-				}
-				bounce++;
+			last_t = new_t;
+			/* Attenuate the throughput. */
+			if(shadow_handle_transparent_isect(kg,
+			                                   shadow_sd,
+			                                   &ps,
+			                                   isect,
+			                                   ray,
+			                                   &throughput))
+			{
+				return true;
 			}
-#ifdef __VOLUME__
-			/* Attenuation for last line segment towards light. */
-			if(ps.volume_stack[0].shader != SHADER_NONE) {
-				kernel_volume_shadow(kg, shadow_sd, &ps, ray, &throughput);
+			/* Move ray forward. */
+			ray->P = shadow_sd->P;
+			if(ray->t != FLT_MAX) {
+				ray->D = normalize_len(Pend - ray->P, &ray->t);
 			}
-#endif
-			*shadow = throughput;
-			return is_zero(throughput);
+			bounce++;
 		}
+#    ifdef __VOLUME__
+		/* Attenuation for last line segment towards light. */
+		if(ps.volume_stack[0].shader != SHADER_NONE) {
+			kernel_volume_shadow(kg, shadow_sd, &ps, ray, &throughput);
+		}
+#    endif
+		*shadow = throughput;
+		return is_zero(throughput);
 	}
-	else {
-		Intersection isect;
-		blocked = scene_intersect(kg, *ray, PATH_RAY_SHADOW_OPAQUE, &isect, NULL, 0.0f, 0.0f);
-	}
-#ifdef __VOLUME__
+#    ifdef __VOLUME__
 	if(!blocked && state->volume_stack[0].shader != SHADER_NONE) {
 		/* Apply attenuation from current volume shader/ */
 		kernel_volume_shadow(kg, shadow_sd, state, ray, shadow);
 	}
-#endif
+#    endif
 	return blocked;
 }
-#endif  /* __SHADOW_RECORD_ALL__ */
 
-#ifndef __KERNEL_CPU__
+/* Here we do all device specific trickery before invoking actual traversal
+ * loop to help readability of the actual logic.
+ */
+ccl_device bool shadow_blocked_transparent_all(KernelGlobals *kg,
+                                               ShaderData *shadow_sd,
+                                               PathState *state,
+                                               Ray *ray,
+                                               uint max_hits,
+                                               float3 *shadow)
+{
+#    ifdef __KERNEL_CUDA__
+	Intersection *hits = kg->hits_stack;
+#    else
+	Intersection hits_stack[SHADOW_STACK_MAX_HITS];
+	Intersection *hits = hits_stack;
+#    endif
+#    ifndef __KERNEL_GPU__
+	/* Prefer to use stack but use dynamic allocation if too deep max hits
+	 * we need max_hits + 1 storage space due to the logic in
+	 * scene_intersect_shadow_all which will first store and then check if
+	 * the limit is exceeded.
+	 *
+	 * Ignore this on GPU because of slow/unavailable malloc().
+	 */
+	if(max_hits + 1 > SHADOW_STACK_MAX_HITS) {
+		if(kg->transparent_s

@@ Diff output truncated at 10240 characters. @@