[Bf-blender-cvs] SVN commit: /data/svn/bf-blender [22374] branches/soc-2009-jaguarandi/ source/blender/render/intern/raytrace/svbvh.h: fix instance support when using SIMD

Andre Susano Pinto andresusanopinto at gmail.com
Tue Aug 11 19:28:58 CEST 2009


Revision: 22374
          http://projects.blender.org/plugins/scmsvn/viewcvs.php?view=rev&root=bf-blender&revision=22374
Author:   jaguarandi
Date:     2009-08-11 19:28:58 +0200 (Tue, 11 Aug 2009)

Log Message:
-----------
fix instance support when using SIMD

Modified Paths:
--------------
    branches/soc-2009-jaguarandi/source/blender/render/intern/raytrace/svbvh.h

Modified: branches/soc-2009-jaguarandi/source/blender/render/intern/raytrace/svbvh.h
===================================================================
--- branches/soc-2009-jaguarandi/source/blender/render/intern/raytrace/svbvh.h	2009-08-11 16:51:17 UTC (rev 22373)
+++ branches/soc-2009-jaguarandi/source/blender/render/intern/raytrace/svbvh.h	2009-08-11 17:28:58 UTC (rev 22374)
@@ -39,7 +39,7 @@
 	int nchilds;
 
 	//Array of bb, array of childs
-	float *bb;
+	float *child_bb;
 	SVBVHNode **child;
 };
 
@@ -57,7 +57,7 @@
 		int i=0;
 		while(i+4 <= node->nchilds)
 		{
-			int res = test_bb_group4( (__m128*) (node->bb+6*i), isec );
+			int res = test_bb_group4( (__m128*) (node->child_bb+6*i), isec );
 			RE_RC_COUNT(isec->raycounter->bb.test);
 			RE_RC_COUNT(isec->raycounter->bb.test);
 			RE_RC_COUNT(isec->raycounter->bb.test);
@@ -72,7 +72,7 @@
 		}
 		while(i < node->nchilds)
 		{
-			if(RE_rayobject_bb_intersect_test(isec, (const float*)node->bb+6*i))
+			if(RE_rayobject_bb_intersect_test(isec, (const float*)node->child_bb+6*i))
 				stack[stack_pos++] = node->child[i];
 			i++;
 		}
@@ -81,12 +81,51 @@
 	{
 		for(int i=0; i<node->nchilds; i++)
 		{
-			if(RE_rayobject_bb_intersect_test(isec, (const float*)node->bb+6*i))
+			if(RE_rayobject_bb_intersect_test(isec, (const float*)node->child_bb+6*i))
 				stack[stack_pos++] = node->child[i];
 		}
 	}
 }
 
+template<>
+void bvh_node_merge_bb<SVBVHNode>(SVBVHNode *node, float *min, float *max)
+{
+	if(is_leaf(node))
+	{
+		RE_rayobject_merge_bb( (RayObject*)node, min, max);
+	}
+	else
+	{
+		int i=0;
+		while(SVBVH_SIMD && i+4 <= node->nchilds)
+		{
+			float *res = node->child_bb + 6*i;
+			for(int j=0; j<3; j++)
+			{
+				min[j] = MIN2(min[j], res[4*j+0]);
+				min[j] = MIN2(min[j], res[4*j+1]);
+				min[j] = MIN2(min[j], res[4*j+2]);
+				min[j] = MIN2(min[j], res[4*j+3]);
+			}
+			for(int j=0; j<3; j++)
+			{
+				max[j] = MAX2(max[j], res[4*(j+3)+0]);
+				max[j] = MAX2(max[j], res[4*(j+3)+1]);
+				max[j] = MAX2(max[j], res[4*(j+3)+2]);
+				max[j] = MAX2(max[j], res[4*(j+3)+3]);
+			}
+			
+			i += 4;
+		}
+
+		for(; i<node->nchilds; i++)
+		{
+			DO_MIN(node->child_bb+6*i  , min);
+			DO_MAX(node->child_bb+3+6*i, max);
+		}
+	}
+}
+
 struct SVBVHTree
 {
 	RayObject rayobj;
@@ -131,7 +170,7 @@
 	{
 		SVBVHNode *node = (SVBVHNode*)BLI_memarena_alloc(tree->node_arena, sizeof(SVBVHNode));
 		node->nchilds = nchilds;
-		node->bb   = (float*)BLI_memarena_alloc(tree->node_arena, sizeof(float)*6*nchilds);
+		node->child_bb   = (float*)BLI_memarena_alloc(tree->node_arena, sizeof(float)*6*nchilds);
 		node->child= (SVBVHNode**)BLI_memarena_alloc(tree->node_arena, sizeof(SVBVHNode*)*nchilds);
 
 		return node;
@@ -148,8 +187,8 @@
 		while(i+4 <= node->nchilds)
 		{
 			float vec_tmp[4*6];
-			float *res = node->bb+6*i;
-			std::copy( node->bb+6*i, node->bb+6*(i+4), vec_tmp);
+			float *res = node->child_bb+6*i;
+			std::copy( res, res+6*4, vec_tmp);
 			
 			for(int j=0; j<6; j++)
 			{
@@ -167,18 +206,18 @@
 			//memmoves could be memory alligned
 			const __m128 x0y0x1y1 = _mm_shuffle_ps( _mm_loadu_ps(bb0), _mm_loadu_ps(bb1), _MM_SHUFFLE(1,0,1,0) );
 			const __m128 x2y2x3y3 = _mm_shuffle_ps( _mm_loadu_ps(bb2), _mm_loadu_ps(bb3), _MM_SHUFFLE(1,0,1,0) );
-			_mm_store_ps( node->bb+6*i+4*0, _mm_shuffle_ps( x0y0x1y1, x2y2x3y3, _MM_SHUFFLE(2,0,2,0) ) );
-			_mm_store_ps( node->bb+6*i+4*1, _mm_shuffle_ps( x0y0x1y1, x2y2x3y3, _MM_SHUFFLE(3,1,3,1) ) );
+			_mm_store_ps( node->child_bb+6*i+4*0, _mm_shuffle_ps( x0y0x1y1, x2y2x3y3, _MM_SHUFFLE(2,0,2,0) ) );
+			_mm_store_ps( node->child_bb+6*i+4*1, _mm_shuffle_ps( x0y0x1y1, x2y2x3y3, _MM_SHUFFLE(3,1,3,1) ) );
 
 			const __m128 z0X0z1X1 = _mm_shuffle_ps( _mm_loadu_ps(bb0), _mm_loadu_ps(bb1), _MM_SHUFFLE(3,2,3,2) );
 			const __m128 z2X2z3X3 = _mm_shuffle_ps( _mm_loadu_ps(bb2), _mm_loadu_ps(bb3), _MM_SHUFFLE(3,2,3,2) );
-			_mm_store_ps( node->bb+6*i+4*2, _mm_shuffle_ps( z0X0z1X1, z2X2z3X3, _MM_SHUFFLE(2,0,2,0) ) );
-			_mm_store_ps( node->bb+6*i+4*3, _mm_shuffle_ps( z0X0z1X1, z2X2z3X3, _MM_SHUFFLE(3,1,3,1) ) );
+			_mm_store_ps( node->child_bb+6*i+4*2, _mm_shuffle_ps( z0X0z1X1, z2X2z3X3, _MM_SHUFFLE(2,0,2,0) ) );
+			_mm_store_ps( node->child_bb+6*i+4*3, _mm_shuffle_ps( z0X0z1X1, z2X2z3X3, _MM_SHUFFLE(3,1,3,1) ) );
 
 			const __m128 Y0Z0Y1Z1 = _mm_shuffle_ps( _mm_loadu_ps(bb0+4), _mm_loadu_ps(bb1+4), _MM_SHUFFLE(1,0,1,0) );
 			const __m128 Y2Z2Y3Z3 = _mm_shuffle_ps( _mm_loadu_ps(bb2+4), _mm_loadu_ps(bb3+4), _MM_SHUFFLE(1,0,1,0) );
-			_mm_store_ps( node->bb+6*i+4*4, _mm_shuffle_ps( Y0Z0Y1Z1, Y2Z2Y3Z3, _MM_SHUFFLE(2,0,2,0) ) );
-			_mm_store_ps( node->bb+6*i+4*5, _mm_shuffle_ps( Y0Z0Y1Z1, Y2Z2Y3Z3, _MM_SHUFFLE(3,1,3,1) ) );
+			_mm_store_ps( node->child_bb+6*i+4*4, _mm_shuffle_ps( Y0Z0Y1Z1, Y2Z2Y3Z3, _MM_SHUFFLE(2,0,2,0) ) );
+			_mm_store_ps( node->child_bb+6*i+4*5, _mm_shuffle_ps( Y0Z0Y1Z1, Y2Z2Y3Z3, _MM_SHUFFLE(3,1,3,1) ) );
  */
 			
 			i += 4;
@@ -210,12 +249,12 @@
 				float bb[6];
 				INIT_MINMAX(bb, bb+3);
 				RE_rayobject_merge_bb( (RayObject*)o_child, bb, bb+3);
-				copy_bb(node->bb+i*6, bb);
+				copy_bb(node->child_bb+i*6, bb);
 				break;
 			}
 			else
 			{
-				copy_bb(node->bb+i*6, o_child->bb);
+				copy_bb(node->child_bb+i*6, o_child->bb);
 			}
 		}
 		assert( i == 0 );





More information about the Bf-blender-cvs mailing list