[Bf-blender-cvs] SVN commit: /data/svn/bf-blender [22269] branches/soc-2009-jaguarandi/ source/blender/render/intern/raytrace: *Process leafs as soon as found instead of pushing them on stack for later evaluation (leads to early exits)

André Pinto andresusanopinto at gmail.com
Thu Aug 6 19:45:51 CEST 2009


Revision: 22269
          http://projects.blender.org/plugins/scmsvn/viewcvs.php?view=rev&root=bf-blender&revision=22269
Author:   jaguarandi
Date:     2009-08-06 19:45:51 +0200 (Thu, 06 Aug 2009)

Log Message:
-----------
*Process leafs as soon as found instead of pushing them on stack for later evaluation (leads to early exits)
(this is mixed with some simd code commit, althouth no simd is being used up to the moment)

Modified Paths:
--------------
    branches/soc-2009-jaguarandi/source/blender/render/intern/raytrace/bvh.h
    branches/soc-2009-jaguarandi/source/blender/render/intern/raytrace/rayobject_vbvh.cpp

Modified: branches/soc-2009-jaguarandi/source/blender/render/intern/raytrace/bvh.h
===================================================================
--- branches/soc-2009-jaguarandi/source/blender/render/intern/raytrace/bvh.h	2009-08-06 17:00:37 UTC (rev 22268)
+++ branches/soc-2009-jaguarandi/source/blender/render/intern/raytrace/bvh.h	2009-08-06 17:45:51 UTC (rev 22269)
@@ -26,48 +26,25 @@
  *
  * ***** END GPL LICENSE BLOCK *****
  */
-/* 
-template<int SIZE>
-struct BBGroup
-{
-	float bb[6][SIZE];
-};
+#include <xmmintrin.h>
 
-
-static inline int test_bb_group(BBGroup<4> *bb_group, Isect *isec)
+inline int test_bb_group4(__m128 *bb_group, Isect *isec)
 {
+/*
 	const float *bb = _bb;
 	__m128 tmin={0}, tmax = {isec->labda};
 
-	tmin = _mm_max_ps(tmin, _mm_mul_ps( _mm_sub_ps( group->bb[isec->bv_index[0]], isec->sse_start[0] ), isec->sse_idot_axis[0]) );
-	tmax = _mm_min_ps(tmax, _mm_mul_ps( _mm_sub_ps( group->bb[isec->bv_index[1]], isec->sse_start[0] ), isec->sse_idot_axis[0]) );	
-	tmin = _mm_max_ps(tmin, _mm_mul_ps( _mm_sub_ps( group->bb[isec->bv_index[2]], isec->sse_start[1] ), isec->sse_idot_axis[1]) );
-	tmax = _mm_min_ps(tmax, _mm_mul_ps( _mm_sub_ps( group->bb[isec->bv_index[3]], isec->sse_start[1] ), isec->sse_idot_axis[1]) );
-	tmin = _mm_max_ps(tmin, _mm_mul_ps( _mm_sub_ps( group->bb[isec->bv_index[4]], isec->sse_start[2] ), isec->sse_idot_axis[2]) );
-	tmax = _mm_min_ps(tmax, _mm_mul_ps( _mm_sub_ps( group->bb[isec->bv_index[5]], isec->sse_start[2] ), isec->sse_idot_axis[2]) );
-	
-	return _mm_movemask_ps(_mm_cmpge_ps(tmax, tmin));
+	tmin = _mm_max_ps(tmin, _mm_mul_ps( _mm_sub_ps( bb_group[isec->bv_index[0]], isec->sse_start[0] ), isec->sse_idot_axis[0]) );
+	tmax = _mm_min_ps(tmax, _mm_mul_ps( _mm_sub_ps( bb_group[isec->bv_index[1]], isec->sse_start[0] ), isec->sse_idot_axis[0]) );	
+	tmin = _mm_max_ps(tmin, _mm_mul_ps( _mm_sub_ps( bb_group[isec->bv_index[2]], isec->sse_start[1] ), isec->sse_idot_axis[1]) );
+	tmax = _mm_min_ps(tmax, _mm_mul_ps( _mm_sub_ps( bb_group[isec->bv_index[3]], isec->sse_start[1] ), isec->sse_idot_axis[1]) );
+	tmin = _mm_max_ps(tmin, _mm_mul_ps( _mm_sub_ps( bb_group[isec->bv_index[4]], isec->sse_start[2] ), isec->sse_idot_axis[2]) );
+	tmax = _mm_min_ps(tmax, _mm_mul_ps( _mm_sub_ps( bb_group[isec->bv_index[5]], isec->sse_start[2] ), isec->sse_idot_axis[2]) );
+	*/
+	return 4; //_mm_movemask_ps(_mm_cmpge_ps(tmax, tmin));
 }
 
-static inline int test_bb_group(BBGroup<1> *bb_group, Isect *isec)
-{
-	float t1x = (bb[isec->bv_index[0]] - isec->start[0]) * isec->idot_axis[0];
-	float t2x = (bb[isec->bv_index[1]] - isec->start[0]) * isec->idot_axis[0];
-	float t1y = (bb[isec->bv_index[2]] - isec->start[1]) * isec->idot_axis[1];
-	float t2y = (bb[isec->bv_index[3]] - isec->start[1]) * isec->idot_axis[1];
-	float t1z = (bb[isec->bv_index[4]] - isec->start[2]) * isec->idot_axis[2];
-	float t2z = (bb[isec->bv_index[5]] - isec->start[2]) * isec->idot_axis[2];
 
-	RE_RC_COUNT(isec->raycounter->bb.test);
-	if(t1x > t2y || t2x < t1y || t1x > t2z || t2x < t1z || t1y > t2z || t2y < t1z) return 0;
-	if(t2x < 0.0 || t2y < 0.0 || t2z < 0.0) return 0;
-	if(t1x > isec->labda || t1y > isec->labda || t1z > isec->labda) return 0;
-
-	RE_RC_COUNT(isec->raycounter->bb.hit);
-	return 1;
-}
-*/
-
 /* bvh tree generics */
 template<class Tree> static int bvh_intersect(Tree *obj, Isect *isec);
 
@@ -163,9 +140,108 @@
 		}
 	}
 	return hit;
+}
 
+
+/*
+ * Generic SIMD bvh recursion
+ * this was created to be able to use any simd (with the cost of some memmoves)
+ * it can take advantage of any SIMD width and doens't needs any special tree care
+ */
+template<class Node,int MAX_STACK_SIZE,bool TEST_ROOT>
+static int bvh_node_stack_raycast_simd(Node *root, Isect *isec)
+{
+	Node *stack[MAX_STACK_SIZE];
+	__m128 t_bb[6];
+	Node * t_node[4];
+
+	int hit = 0, stack_pos = 0;
+		
+	if(!TEST_ROOT)
+	{
+		if(RayObject_isAligned(root))
+		{
+			if(RayObject_isAligned(root->child))
+				bvh_node_push_childs(root, isec, stack, stack_pos);
+			else
+				return RE_rayobject_intersect( (RayObject*)root->child, isec);
+		}
+		else
+			return RE_rayobject_intersect( (RayObject*)root, isec);
+	}
+	else
+	{
+		if(RayObject_isAligned(root))
+			stack[stack_pos++] = root;
+		else
+			return RE_rayobject_intersect( (RayObject*)root, isec);
+	}
+
+	while(true)
+	{
+		//Use SIMD 4
+		if(0 && stack_pos >= 4)
+		{
+			stack_pos -= 4;
+			for(int i=0; i<4; i++)
+			{
+				Node *t = stack[stack_pos+i];
+				assert(RayObject_isAligned(t));
+				
+				float *bb = ((float*)t_bb)+i;
+				bb[4*0] = t->bb[0];
+				bb[4*1] = t->bb[1];
+				bb[4*2] = t->bb[2];
+				bb[4*3] = t->bb[3];
+				bb[4*4] = t->bb[4];
+				bb[4*5] = t->bb[5];
+				t_node[i] = t->child;
+			}
+			int res = test_bb_group4( t_bb, isec );
+
+			for(int i=0; i<4; i++)
+			if(res & (1<<i))
+			{
+				if(RayObject_isAligned(t_node[i]))
+				{
+					for(Node *t=t_node[i]; t; t=t->sibling)
+					{
+						assert(stack_pos < MAX_STACK_SIZE);
+						stack[stack_pos++] = t;
+					}
+				}
+				else
+				{
+					hit |= RE_rayobject_intersect( (RayObject*)t_node[i], isec);
+					if(hit && isec->mode == RE_RAY_SHADOW) return hit;				
+				}	
+			}
+		}
+		else if(stack_pos > 0)
+		{	
+			Node *node = stack[--stack_pos];
+			assert(RayObject_isAligned(node));
+			
+			if(bvh_node_hit_test(node,isec))
+			{
+				if(RayObject_isAligned(node->child))
+				{
+					bvh_node_push_childs(node, isec, stack, stack_pos);
+					assert(stack_pos <= MAX_STACK_SIZE);
+				}
+				else
+				{
+					hit |= RE_rayobject_intersect( (RayObject*)node->child, isec);
+					if(hit && isec->mode == RE_RAY_SHADOW) return hit;
+				}
+			}
+		}
+		else break;
+	}
+	return hit;
 }
 
+
 /*
  * recursively transverse a BVH looking for a rayhit using system stack
  */

Modified: branches/soc-2009-jaguarandi/source/blender/render/intern/raytrace/rayobject_vbvh.cpp
===================================================================
--- branches/soc-2009-jaguarandi/source/blender/render/intern/raytrace/rayobject_vbvh.cpp	2009-08-06 17:00:37 UTC (rev 22268)
+++ branches/soc-2009-jaguarandi/source/blender/render/intern/raytrace/rayobject_vbvh.cpp	2009-08-06 17:45:51 UTC (rev 22269)
@@ -48,7 +48,7 @@
 
 #define RAY_BB_TEST_COST (0.2f)
 #define DFS_STACK_SIZE	256
-#define DYNAMIC_ALLOC
+//#define DYNAMIC_ALLOC_BB
 
 //#define rtbuild_split	rtbuild_mean_split_largest_axis		/* objects mean split on the longest axis, childs BB are allowed to overlap */
 //#define rtbuild_split	rtbuild_median_split_largest_axis	/* space median split on the longest axis, childs BB are allowed to overlap */
@@ -59,7 +59,11 @@
 	BVHNode *child;
 	BVHNode *sibling;
 
+#ifdef DYNAMIC_ALLOC_BB
+	float *bb;
+#else
 	float	bb[6];
+#endif
 };
 
 struct BVHTree
@@ -92,9 +96,11 @@
 		while(child)
 		{
 			//Skips BB tests on primitives
+/*
 			if(!RayObject_isAligned(child->child))
 				stack[stack_pos++] = child->child;
 			else
+*/
 				stack[stack_pos++] = child;
 				
 			child = child->sibling;
@@ -111,6 +117,9 @@
 	node->sibling = NULL;
 	node->child   = NULL;
 
+#ifdef DYNAMIC_ALLOC_BB
+	node->bb = (float*)BLI_memarena_alloc(tree->node_arena, 6*sizeof(float));
+#endif
 	assert(RayObject_isAligned(node));
 	return node;
 }
@@ -275,6 +284,28 @@
 	}
 }
 
+template<class Node>
+float bvh_refit(Node *node)
+{
+	if(RayObject_isAligned(node)) return 0;	
+	if(RayObject_isAligned(node->child)) return 0;
+	
+	float total = 0;
+	
+	for(Node *child = node->child; RayObject_isAligned(child) && child; child = child->sibling)
+		total += bvh_refit(child);
+		
+	float old_area = bb_area(node->bb, node->bb+3);
+	INIT_MINMAX(node->bb, node->bb+3);
+	for(Node *child = node->child; RayObject_isAligned(child) && child; child = child->sibling)
+	{
+		DO_MIN(child->bb, node->bb);
+		DO_MIN(child->bb+3, node->bb+3);
+	}
+	total += old_area - bb_area(node->bb, node->bb+3);
+	return total;
+}
+
 template<>
 void bvh_done<BVHTree>(BVHTree *obj)
 {
@@ -292,6 +323,7 @@
 	reorganize(obj->root);
 	remove_useless(obj->root, &obj->root);
 	pushup(obj->root);
+	printf("refit: %f\n", bvh_refit(obj->root) );
 	pushdown(obj->root);
 //	obj->root = memory_rearrange(obj->root);
 	obj->cost = 1.0;
@@ -313,7 +345,7 @@
 		{
 			BVHNode *node = (BVHNode*)lcts->stack[i];
 			if(RayObject_isAligned(node))
-				hit |= bvh_node_stack_raycast<BVHNode,StackSize,true>(node, isec);
+				hit |= bvh_node_stack_raycast_simd<BVHNode,StackSize,true>(node, isec);
 			else
 				hit |= RE_rayobject_intersect( (RayObject*)node, isec );
 			
@@ -326,7 +358,7 @@
 	else
 	{
 		if(RayObject_isAligned(obj->root))
-			return bvh_node_stack_raycast<BVHNode,StackSize,false>(obj->root, isec);
+			return bvh_node_stack_raycast_simd<BVHNode,StackSize,false>(obj->root, isec);
 		else
 			return RE_rayobject_intersect( (RayObject*) obj->root, isec );
 	}





More information about the Bf-blender-cvs mailing list