[Bf-blender-cvs] SVN commit: /data/svn/bf-blender [29529] branches/render25/source/blender/ render/intern/raytrace: Render Branch: some optimization tweaks that need more testing on more

Brecht Van Lommel brecht at blender.org
Thu Jun 17 19:04:42 CEST 2010


Revision: 29529
          http://projects.blender.org/plugins/scmsvn/viewcvs.php?view=rev&root=bf-blender&revision=29529
Author:   blendix
Date:     2010-06-17 19:04:42 +0200 (Thu, 17 Jun 2010)

Log Message:
-----------
Render Branch: some optimization tweaks that need more testing on more
scenes, but seems to give a 15% performance improvement in tests here.

* leave out logf() factor in SAH, makes tree build quicker with no
  noticeable influence on raytracing on performance?
* set max childs to 4, simplifies traversal code a bit, but also seems
  to help slightly in general.

Modified Paths:
--------------
    branches/render25/source/blender/render/intern/raytrace/rayobject_rtbuild.cpp
    branches/render25/source/blender/render/intern/raytrace/reorganize.h
    branches/render25/source/blender/render/intern/raytrace/svbvh.h

Modified: branches/render25/source/blender/render/intern/raytrace/rayobject_rtbuild.cpp
===================================================================
--- branches/render25/source/blender/render/intern/raytrace/rayobject_rtbuild.cpp	2010-06-17 15:59:02 UTC (rev 29528)
+++ branches/render25/source/blender/render/intern/raytrace/rayobject_rtbuild.cpp	2010-06-17 17:04:42 UTC (rev 29529)
@@ -371,8 +371,12 @@
 				//Worst case heuristic (cost of each child is linear)
 				float hcost, left_side, right_side;
 				
-				left_side = bb_area(sweep_left.bb, sweep_left.bb+3)*(sweep_left.cost+logf((float)i));
-				right_side= bb_area(sweep[i].bb, sweep[i].bb+3)*(sweep[i].cost+logf((float)size-i));
+				// not using log seems to have no impact on raytracing perf, but
+				// makes tree construction quicker, left out for now to test (brecht)
+				// left_side = bb_area(sweep_left.bb, sweep_left.bb+3)*(sweep_left.cost+logf((float)i));
+				// right_side= bb_area(sweep[i].bb, sweep[i].bb+3)*(sweep[i].cost+logf((float)size-i));
+				left_side = bb_area(sweep_left.bb, sweep_left.bb+3)*(sweep_left.cost);
+				right_side= bb_area(sweep[i].bb, sweep[i].bb+3)*(sweep[i].cost);
 				hcost = left_side+right_side;
 
 				assert(left_side >= 0);

Modified: branches/render25/source/blender/render/intern/raytrace/reorganize.h
===================================================================
--- branches/render25/source/blender/render/intern/raytrace/reorganize.h	2010-06-17 15:59:02 UTC (rev 29528)
+++ branches/render25/source/blender/render/intern/raytrace/reorganize.h	2010-06-17 17:04:42 UTC (rev 29529)
@@ -299,7 +299,7 @@
  * with the purpose to reduce the expected cost (eg.: number of BB tests).
  */
 #include <vector>
-#define MAX_CUT_SIZE	16
+#define MAX_CUT_SIZE		4				/* svbvh assumes max 4 children! */
 #define MAX_OPTIMIZE_CHILDS	MAX_CUT_SIZE
 
 struct OVBVHNode

Modified: branches/render25/source/blender/render/intern/raytrace/svbvh.h
===================================================================
--- branches/render25/source/blender/render/intern/raytrace/svbvh.h	2010-06-17 15:59:02 UTC (rev 29528)
+++ branches/render25/source/blender/render/intern/raytrace/svbvh.h	2010-06-17 17:04:42 UTC (rev 29529)
@@ -102,7 +102,7 @@
 	return 1;
 }
 
-static bool svbvh_node_is_leaf(SVBVHNode *node)
+static bool svbvh_node_is_leaf(const SVBVHNode *node)
 {
 	return !RE_rayobject_isAligned(node);
 }
@@ -110,7 +110,7 @@
 template<int MAX_STACK_SIZE, bool SHADOW>
 static int svbvh_node_stack_raycast(SVBVHNode *root, Isect *isec)
 {
-	SVBVHNode *stack[MAX_STACK_SIZE], *node;
+	const SVBVHNode *stack[MAX_STACK_SIZE], *node;
 	int hit = 0, stack_pos = 0;
 
 	stack[stack_pos++] = root;
@@ -121,26 +121,29 @@
 
 		if(!svbvh_node_is_leaf(node))
 		{
-			float *child_bb= node->child_bb;
-			SVBVHNode **child= node->child;
-			int i=0, nchilds= node->nchilds;
+			int nchilds= node->nchilds;
 
-			while(i+4 <= nchilds) {
-				int res = svbvh_bb_intersect_test_simd4(isec, ((__m128*) (child_bb + i*6)));
+			if(nchilds == 4) {
+				const float *child_bb= node->child_bb;
+				int res = svbvh_bb_intersect_test_simd4(isec, ((const __m128*) (child_bb)));
+				SVBVHNode **child= node->child;
 
 				RE_RC_COUNT(isec->raycounter->simd_bb.test);
 
-				if(res & 1) { stack[stack_pos++] = child[i+0]; RE_RC_COUNT(isec->raycounter->simd_bb.hit); }
-				if(res & 2) { stack[stack_pos++] = child[i+1]; RE_RC_COUNT(isec->raycounter->simd_bb.hit); }
-				if(res & 4) { stack[stack_pos++] = child[i+2]; RE_RC_COUNT(isec->raycounter->simd_bb.hit); }
-				if(res & 8) { stack[stack_pos++] = child[i+3]; RE_RC_COUNT(isec->raycounter->simd_bb.hit); }
+				if(res & 1) { stack[stack_pos++] = child[0]; RE_RC_COUNT(isec->raycounter->simd_bb.hit); }
+				if(res & 2) { stack[stack_pos++] = child[1]; RE_RC_COUNT(isec->raycounter->simd_bb.hit); }
+				if(res & 4) { stack[stack_pos++] = child[2]; RE_RC_COUNT(isec->raycounter->simd_bb.hit); }
+				if(res & 8) { stack[stack_pos++] = child[3]; RE_RC_COUNT(isec->raycounter->simd_bb.hit); }
+			}
+			else {
+				const float *child_bb= node->child_bb;
+				SVBVHNode **child= node->child;
+				int i;
 
-				i += 4;
+				for(i=0; i<nchilds; i++)
+					if(svbvh_bb_intersect_test(isec, (const float*)child_bb+6*i))
+						stack[stack_pos++] = child[i];
 			}
-
-			for(; i<nchilds; i++)
-				if(svbvh_bb_intersect_test(isec, (float*)child_bb+6*i))
-					stack[stack_pos++] = child[i];
 		}
 		else
 		{





More information about the Bf-blender-cvs mailing list