[Bf-blender-cvs] [9ab259f] master: Cycles: shadow function optimization for transparent shadows (CPU only).

Brecht Van Lommel noreply at git.blender.org
Mon Apr 21 19:35:11 CEST 2014


Commit: 9ab259f55b67c6e3b8028d95b23e708c3bc0c6fd
Author: Brecht Van Lommel
Date:   Sat Apr 19 17:02:30 2014 +0200
https://developer.blender.org/rB9ab259f55b67c6e3b8028d95b23e708c3bc0c6fd

Cycles: shadow function optimization for transparent shadows (CPU only).

Old algorithm:

Raytrace from one transparent surface to the next step by step. To minimize
overhead in cases where we don't need transparent shadows, we first trace a
regular shadow ray. We check if the hit primitive was potentially transparent,
and only in that case start marching. this gives extra ray cast for the cases
were we do want transparency.

New algorithm:

We trace a single ray. If it hits any opaque surface, or more than a given
number of transparent surfaces is hit, then we consider the geometry to be
entirely blocked. If not, all transparent surfaces will be recorded and we
will shade them one by one to determine how much light is blocked. This all
happens in one scene intersection function.

Recording all hits works well in some cases but may be slower in others. If
we have many semi-transparent hairs, one intersection may be faster because
you'd be reinteresecting the same hairs a lot with each step otherwise. If
however there is mostly binary transparency then we may be recording many
unnecessary intersections when one of the first surfaces blocks all light.

We found that this helps quite nicely in some scenes, on koro.blend this can
give a 50% reduction in render time, on the pabellon barcelona scene and a
forest scene with transparent leaves it was 30%. Some other files rendered
maybe 1% or 2% slower, but this seems a reasonable tradeoff.

Differential Revision: https://developer.blender.org/D473

===================================================================

M	intern/cycles/kernel/geom/geom_bvh.h
A	intern/cycles/kernel/geom/geom_bvh_shadow.h
M	intern/cycles/kernel/geom/geom_bvh_subsurface.h
M	intern/cycles/kernel/geom/geom_bvh_traversal.h
M	intern/cycles/kernel/geom/geom_object.h
M	intern/cycles/kernel/kernel_shadow.h
M	intern/cycles/kernel/kernel_types.h

===================================================================

diff --git a/intern/cycles/kernel/geom/geom_bvh.h b/intern/cycles/kernel/geom/geom_bvh.h
index 12ebc64..dd7c25d 100644
--- a/intern/cycles/kernel/geom/geom_bvh.h
+++ b/intern/cycles/kernel/geom/geom_bvh.h
@@ -93,6 +93,36 @@ CCL_NAMESPACE_BEGIN
 #include "geom_bvh_subsurface.h"
 #endif
 
+#if defined(__SHADOW_RECORD_ALL__)
+#define BVH_FUNCTION_NAME bvh_intersect_shadow_all
+#define BVH_FUNCTION_FEATURES 0
+#include "geom_bvh_shadow.h"
+#endif
+
+#if defined(__SUBSURFACE__) && defined(__INSTANCING__)
+#define BVH_FUNCTION_NAME bvh_intersect_shadow_all_instancing
+#define BVH_FUNCTION_FEATURES BVH_INSTANCING
+#include "geom_bvh_shadow.h"
+#endif
+
+#if defined(__SUBSURFACE__) && defined(__HAIR__)
+#define BVH_FUNCTION_NAME bvh_intersect_shadow_all_hair
+#define BVH_FUNCTION_FEATURES BVH_INSTANCING|BVH_HAIR
+#include "geom_bvh_shadow.h"
+#endif
+
+#if defined(__SUBSURFACE__) && defined(__OBJECT_MOTION__)
+#define BVH_FUNCTION_NAME bvh_intersect_shadow_all_motion
+#define BVH_FUNCTION_FEATURES BVH_INSTANCING|BVH_MOTION
+#include "geom_bvh_shadow.h"
+#endif
+
+#if defined(__SUBSURFACE__) && defined(__HAIR__) && defined(__OBJECT_MOTION__)
+#define BVH_FUNCTION_NAME bvh_intersect_shadow_all_hair_motion
+#define BVH_FUNCTION_FEATURES BVH_INSTANCING|BVH_HAIR|BVH_MOTION
+#include "geom_bvh_shadow.h"
+#endif
+
 /* to work around titan bug when using arrays instead of textures */
 #if !defined(__KERNEL_CUDA__) || defined(__KERNEL_CUDA_TEX_STORAGE__)
 ccl_device_inline
@@ -185,6 +215,52 @@ uint scene_intersect_subsurface(KernelGlobals *kg, const Ray *ray, Intersection
 }
 #endif
 
+/* to work around titan bug when using arrays instead of textures */
+#ifdef __SHADOW_RECORD_ALL__
+#if !defined(__KERNEL_CUDA__) || defined(__KERNEL_CUDA_TEX_STORAGE__)
+ccl_device_inline
+#else
+ccl_device_noinline
+#endif
+uint scene_intersect_shadow_all(KernelGlobals *kg, const Ray *ray, Intersection *isect, uint max_hits, uint *num_hits)
+{
+#ifdef __OBJECT_MOTION__
+	if(kernel_data.bvh.have_motion) {
+#ifdef __HAIR__
+		if(kernel_data.bvh.have_curves)
+			return bvh_intersect_shadow_all_hair_motion(kg, ray, isect, max_hits, num_hits);
+#endif /* __HAIR__ */
+
+		return bvh_intersect_shadow_all_motion(kg, ray, isect, max_hits, num_hits);
+	}
+#endif /* __OBJECT_MOTION__ */
+
+#ifdef __HAIR__ 
+	if(kernel_data.bvh.have_curves)
+		return bvh_intersect_shadow_all_hair(kg, ray, isect, max_hits, num_hits);
+#endif /* __HAIR__ */
+
+#ifdef __KERNEL_CPU__
+
+#ifdef __INSTANCING__
+	if(kernel_data.bvh.have_instancing)
+		return bvh_intersect_shadow_all_instancing(kg, ray, isect, max_hits, num_hits);
+#endif /* __INSTANCING__ */
+
+	return bvh_intersect_shadow_all(kg, ray, isect, max_hits, num_hits);
+#else /* __KERNEL_CPU__ */
+
+#ifdef __INSTANCING__
+	return bvh_intersect_shadow_all_instancing(kg, ray, isect, max_hits, num_hits);
+#else
+	return bvh_intersect_shadow_all(kg, ray, isect, max_hits, num_hits);
+#endif /* __INSTANCING__ */
+
+#endif /* __KERNEL_CPU__ */
+}
+#endif
+
+
 /* Ray offset to avoid self intersection.
  *
  * This function should be used to compute a modified ray start position for
diff --git a/intern/cycles/kernel/geom/geom_bvh_traversal.h b/intern/cycles/kernel/geom/geom_bvh_shadow.h
similarity index 74%
copy from intern/cycles/kernel/geom/geom_bvh_traversal.h
copy to intern/cycles/kernel/geom/geom_bvh_shadow.h
index 566aa42..6e981fb 100644
--- a/intern/cycles/kernel/geom/geom_bvh_traversal.h
+++ b/intern/cycles/kernel/geom/geom_bvh_shadow.h
@@ -23,7 +23,6 @@
  *
  * BVH_INSTANCING: object instancing
  * BVH_HAIR: hair curve rendering
- * BVH_HAIR_MINIMUM_WIDTH: hair curve rendering with minimum width
  * BVH_MOTION: motion blur rendering
  *
  */
@@ -31,15 +30,9 @@
 #define FEATURE(f) (((BVH_FUNCTION_FEATURES) & (f)) != 0)
 
 ccl_device bool BVH_FUNCTION_NAME
-(KernelGlobals *kg, const Ray *ray, Intersection *isect, const uint visibility
-#if FEATURE(BVH_HAIR_MINIMUM_WIDTH)
-, uint *lcg_state, float difl, float extmax
-#endif
-)
+(KernelGlobals *kg, const Ray *ray, Intersection *isect_array, const uint max_hits, uint *num_hits)
 {
 	/* todo:
-	 * - test if pushing distance on the stack helps (for non shadow rays)
-	 * - separate version for shadow rays
 	 * - likely and unlikely for if() statements
 	 * - test restrict attribute for pointers
 	 */
@@ -58,16 +51,18 @@ ccl_device bool BVH_FUNCTION_NAME
 	float3 dir = bvh_clamp_direction(ray->D);
 	float3 idir = bvh_inverse_direction(dir);
 	int object = OBJECT_NONE;
+	float isect_t = tmax;
 
 #if FEATURE(BVH_MOTION)
 	Transform ob_tfm;
 #endif
 
-	isect->t = tmax;
-	isect->object = OBJECT_NONE;
-	isect->prim = PRIM_NONE;
-	isect->u = 0.0f;
-	isect->v = 0.0f;
+#if FEATURE(BVH_INSTANCING)
+	int num_hits_in_instance = 0;
+#endif
+
+	*num_hits = 0;
+	isect_array->t = tmax;
 
 #if defined(__KERNEL_SSE2__)
 	const shuffle_swap_t shuf_identity = shuffle_swap_identity();
@@ -81,7 +76,7 @@ ccl_device bool BVH_FUNCTION_NAME
 	Psplat[1] = _mm_set_ps1(P.y);
 	Psplat[2] = _mm_set_ps1(P.z);
 
-	__m128 tsplat = _mm_set_ps(-isect->t, -isect->t, 0.0f, 0.0f);
+	__m128 tsplat = _mm_set_ps(-isect_t, -isect_t, 0.0f, 0.0f);
 
 	gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz);
 #endif
@@ -98,7 +93,7 @@ ccl_device bool BVH_FUNCTION_NAME
 
 #if !defined(__KERNEL_SSE2__)
 				/* Intersect two child bounding boxes, non-SSE version */
-				float t = isect->t;
+				float t = isect_t;
 
 				/* fetch node data */
 				float4 node0 = kernel_tex_fetch(__bvh_nodes, nodeAddr*BVH_NODE_SIZE+0);
@@ -125,26 +120,11 @@ ccl_device bool BVH_FUNCTION_NAME
 				NO_EXTENDED_PRECISION float c1min = max4(min(c1lox, c1hix), min(c1loy, c1hiy), min(c1loz, c1hiz), 0.0f);
 				NO_EXTENDED_PRECISION float c1max = min4(max(c1lox, c1hix), max(c1loy, c1hiy), max(c1loz, c1hiz), t);
 
-#if FEATURE(BVH_HAIR_MINIMUM_WIDTH)
-				if(difl != 0.0f) {
-					float hdiff = 1.0f + difl;
-					float ldiff = 1.0f - difl;
-					if(__float_as_int(cnodes.z) & PATH_RAY_CURVE) {
-						c0min = max(ldiff * c0min, c0min - extmax);
-						c0max = min(hdiff * c0max, c0max + extmax);
-					}
-					if(__float_as_int(cnodes.w) & PATH_RAY_CURVE) {
-						c1min = max(ldiff * c1min, c1min - extmax);
-						c1max = min(hdiff * c1max, c1max + extmax);
-					}
-				}
-#endif
-
 				/* decide which nodes to traverse next */
 #ifdef __VISIBILITY_FLAG__
 				/* this visibility test gives a 5% performance hit, how to solve? */
-				traverseChild0 = (c0max >= c0min) && (__float_as_uint(cnodes.z) & visibility);
-				traverseChild1 = (c1max >= c1min) && (__float_as_uint(cnodes.w) & visibility);
+				traverseChild0 = (c0max >= c0min) && (__float_as_uint(cnodes.z) & PATH_RAY_SHADOW);
+				traverseChild1 = (c1max >= c1min) && (__float_as_uint(cnodes.w) & PATH_RAY_SHADOW);
 #else
 				traverseChild0 = (c0max >= c0min);
 				traverseChild1 = (c1max >= c1min);
@@ -165,33 +145,13 @@ ccl_device bool BVH_FUNCTION_NAME
 				/* calculate { c0min, c1min, -c0max, -c1max} */
 				__m128 minmax = _mm_max_ps(_mm_max_ps(tminmaxx, tminmaxy), _mm_max_ps(tminmaxz, tsplat));
 				const __m128 tminmax = _mm_xor_ps(minmax, pn);
-
-#if FEATURE(BVH_HAIR_MINIMUM_WIDTH)
-				if(difl != 0.0f) {
-					float4 *tminmaxview = (float4*)&tminmax;
-					float &c0min = tminmaxview->x, &c1min = tminmaxview->y;
-					float &c0max = tminmaxview->z, &c1max = tminmaxview->w;
-
-					float hdiff = 1.0f + difl;
-					float ldiff = 1.0f - difl;
-					if(__float_as_int(cnodes.z) & PATH_RAY_CURVE) {
-						c0min = max(ldiff * c0min, c0min - extmax);
-						c0max = min(hdiff * c0max, c0max + extmax);
-					}
-					if(__float_as_int(cnodes.w) & PATH_RAY_CURVE) {
-						c1min = max(ldiff * c1min, c1min - extmax);
-						c1max = min(hdiff * c1max, c1max + extmax);
-					}
-				}
-#endif
-
 				const __m128 lrhit = _mm_cmple_ps(tminmax, shuffle<2, 3, 0, 1>(tminmax));
 
 				/* decide which nodes to traverse next */
 #ifdef __VISIBILITY_FLAG__
 				/* this visibility test gives a 5% performance hit, how to solve? */
-				traverseChild0 = (_mm_movemask_ps(lrhit) & 1) && (__float_as_uint(cnodes.z) & visibility);
-				traverseChild1 = (_mm_movemask_ps(lrhit) & 2) && (__float_as_uint(cnodes.w) & visibility);
+				traverseChild0 = (_mm_movemask_ps(lrhit) & 1) && (__float_as_uint(cnodes.z) & PATH_RAY_SHADOW);
+				traverseChild1 = (_mm_movemask_ps(lrhit) & 2) && (__float_as_uint(cnodes.w) & PATH_RAY_SHADOW);
 #else
 				traverseChild0 = (_mm_movemask_ps(lrhit) & 1);
 				traverseChild1 = (_mm_movemask_ps(lrhit) & 2);
@@ -252,30 +212,26 @@ ccl_device bool BVH_FUNCTION_NAME
 						bool hit;
 						uint type = kernel_tex_fetch(__prim_type, primAddr);
 
+						/* todo: specialized intersect functions which don't fill in
+						 * isect unless needed and check SD_HAS_TRANSPARENT_SHADOW?
+						 * might give a few % performance improvement */
+
 						switch(type & PRIMITIVE_ALL) {
 							case PRIMITIVE_TRIANGLE: {
-								hit = triangle_intersect(kg, isect, P, dir, visibility, object, primAddr);
+								hit = triangle_intersect(kg, isect_array, P, dir, PATH_RAY_SHADOW, object, primAddr);
 								break;
 							}
 							case PRIMITIVE_MOTION_TRIANGLE: {
-								hit = motion_triangle_intersect(kg, isect, P, dir, ray->time, visibility, object, primAddr);
+								hit = motion_triangle_intersect(kg, isect_array, P, dir, ray->time, PATH_RAY_SHADOW, object, primAddr);
 								break;
 							}
 #if FEATURE(BVH_HAIR)
 							case PRIMITIVE_CURVE:
 							case PRIMITIVE_MOTION_CURVE: {
-#if FEATURE(BVH_HAIR_MINIMUM_WIDTH)
 								if(kernel_data.curve.curveflags & CURVE_KN_INTERPOLATE) 
-									hit = bvh_cardinal_curve_intersect(kg, isect, P, dir, visibility, object, primAddr, ray->time, type, lcg_state, difl, extmax);
+									hit = bvh_cardinal_curve_intersect(kg, isect_array, P, dir, PATH_RAY_SHADOW, object, primAddr, ray->time, type, NULL, 0, 0);
 								else
-									hit = bvh_curve_intersect(kg, isect, P, dir, visibility, object, primAddr, ray->time, type, lcg_state, difl, extmax);
-#else
-								if(kernel_data.curve.curveflags & CURVE_KN_INTERPOLATE) 
-									hit = bvh_cardinal_curve_intersect(kg, isect, P, dir, visibility, object, primAddr, ray->time, type);
-								else
-									hit = bvh_curve_intersect(kg, isect, P

@@ Diff output truncated at 10240 characters. @@




More information about the Bf-blender-cvs mailing list