[Bf-blender-cvs] SVN commit: /data/svn/bf-blender [18017] branches/sim_physics/source/ blender/render/intern: Volume rendering:

Mon Dec 22 21:28:02 CET 2008

Revision: 18017
          http://projects.blender.org/plugins/scmsvn/viewcvs.php?view=rev&root=bf-blender&revision=18017
Author:   broken
Date:     2008-12-22 21:28:02 +0100 (Mon, 22 Dec 2008)

Log Message:
-----------
Volume rendering:

* Multithreaded volume light cache

While the render process itself is multithreaded, the light cache pre-process 
previously wasn't (painfully noticed this the other week rendering on some 
borrowed octocore nodes!). This commit adds threading, similar to the tiled render - 
it divides the light cache's voxel grid into 3d parts and renders them with the 
available threads.

This makes the most significant difference on shots where the light cache pre-
process is the bottleneck, so shots with either several lights, or a high res light 
cache, or both. On this file (3 lights, light cache res 120), on my Core 2 Duo it now 
renders in 27 seconds compared to 49 previously.

http://mke3.net/blender/devel/rendering/volumetrics/threaded_cache.jpg

Modified Paths:
--------------
    branches/sim_physics/source/blender/render/intern/include/render_types.h
    branches/sim_physics/source/blender/render/intern/source/volume_precache.c

Modified: branches/sim_physics/source/blender/render/intern/include/render_types.h
===================================================================

--- branches/sim_physics/source/blender/render/intern/include/render_types.h	2008-12-22 19:31:23 UTC (rev 18016)
+++ branches/sim_physics/source/blender/render/intern/include/render_types.h	2008-12-22 20:28:02 UTC (rev 18017)
@@ -202,9 +202,9 @@
 
 	struct Object *excludeob;
 	
-	ListBase vol_precache_obs;
 	ListBase render_volumes_inside;
 	ListBase volumes;
+	ListBase volume_precache_parts;
 
 	/* arena for allocating data for use during render, for
 		* example dynamic TFaces to go in the VlakRen structure.
@@ -404,13 +404,6 @@
 
 /* ------------------------------------------------------------------------- */
 
-typedef struct VolPrecache
-{
-	struct VolPrecache *next, *prev;
-	struct Material *ma;
-	struct ObjectRen *obr;
-} VolPrecache;
-
 typedef struct VolumeOb
 {
 	struct VolumeOb *next, *prev;
@@ -423,6 +416,23 @@
 	struct Material *ma;
 } MatInside;
 
+typedef struct VolPrecachePart
+{
+	struct VolPrecachePart *next, *prev;
+	struct RayTree *tree;
+	struct ShadeInput *shi;
+	struct ObjectInstanceRen *obi;
+	int num;
+	int minx, maxx;
+	int miny, maxy;
+	int minz, maxz;
+	int res;
+	float bbmin[3];
+	float voxel[3];
+	int working, done;
+} VolPrecachePart;
+
+
 /* ------------------------------------------------------------------------- */
 
 struct LampRen;

Modified: branches/sim_physics/source/blender/render/intern/source/volume_precache.c
===================================================================
--- branches/sim_physics/source/blender/render/intern/source/volume_precache.c	2008-12-22 19:31:23 UTC (rev 18016)
+++ branches/sim_physics/source/blender/render/intern/source/volume_precache.c	2008-12-22 20:28:02 UTC (rev 18017)
@@ -35,6 +35,7 @@
 
 #include "BLI_blenlib.h"
 #include "BLI_arithb.h"
+#include "BLI_threads.h"
 
 #include "PIL_time.h"
 
@@ -47,6 +48,9 @@
 #include "renderdatabase.h"
 #include "volumetric.h"
 
+
+#include "BKE_global.h"
+
 /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */
 /* defined in pipeline.c, is hardcopy of active dynamic allocated Render */
 /* only to be used here in this file, it's for speed */
@@ -207,11 +211,7 @@
 	}
 }
 
-/* Precache a volume into a 3D voxel grid.
- * The voxel grid is stored in the ObjectInstanceRen, 
- * in camera space, aligned with the ObjectRen's bounding box.
- * Resolution is defined by the user.
- */
+
 void vol_precache_objectinstance(Render *re, ObjectInstanceRen *obi, Material *ma, float *bbmin, float *bbmax)
 {
 	int x, y, z;
@@ -325,29 +325,20 @@
 
 }
 
-#if 0
-typedef struct VolPrecachePart {
-	struct VolPrecachePart *next, *prev;
-	int num;
-	int minx, maxx;
-	int miny, maxy;
-	int minz, maxz;
-	int res;
-	float bbmin[3], voxel[3];
-	struct RayTree *tree;
-	struct ShadeInput *shi;
-	struct ObjectInstanceRen *obi;
-	int done;
-} VolPrecachePart;
-
+#if 0 // debug stuff
 static void *vol_precache_part_test(void *data)
 {
-	VolPrecachePart *vpt =  (VolPrecachePart *)data;
+	VolPrecachePart *pa = data;
 
-	printf("part number: %d \n", vpt->num);
+	printf("part number: %d \n", pa->num);
+	printf("done: %d \n", pa->done);
+	printf("x min: %d   x max: %d \n", pa->minx, pa->maxx);
+	printf("y min: %d   y max: %d \n", pa->miny, pa->maxy);
+	printf("z min: %d   z max: %d \n", pa->minz, pa->maxz);
 
-	return 0;
+	return NULL;
 }
+#endif
 
 /* Iterate over the 3d voxel grid, and fill the voxels with scattering information
  *
@@ -357,28 +348,24 @@
  */
 static void *vol_precache_part(void *data)
 {
-	VolPrecachePart *vpt =  (VolPrecachePart *)data;
-	ObjectInstanceRen *obi = vpt->obi;
-	RayTree *tree = vpt->tree;
-	ShadeInput *shi = vpt->shi;
-	float scatter_col[3] = {0.f, 0.f, 0.f};
+	VolPrecachePart *pa =  (VolPrecachePart *)data;
+	ObjectInstanceRen *obi = pa->obi;
+	RayTree *tree = pa->tree;
+	ShadeInput *shi = pa->shi;
+	float density, scatter_col[3] = {0.f, 0.f, 0.f};
 	float co[3];
 	int x, y, z;
-	const int res=vpt->res, res_2=vpt->res*vpt->res, res_3=vpt->res*vpt->res*vpt->res;
+	const int res=pa->res, res_2=pa->res*pa->res, res_3=pa->res*pa->res*pa->res;
 	const float stepsize = vol_get_stepsize(shi, STEPSIZE_VIEW);
-	
-	res = vpt->res;
-	res_2 = res*res;
-	res_3 = res*res*res;
-	
-	for (x= vpt->minx; x < vpt->maxx; x++) {
-		co[0] = vpt->bbmin[0] + (vpt->voxel[0] * x);
+
+	for (x= pa->minx; x < pa->maxx; x++) {
+		co[0] = pa->bbmin[0] + (pa->voxel[0] * x);
 		
-		for (y= vpt->miny; y < vpt->maxy; y++) {
-			co[1] = vpt->bbmin[1] + (vpt->voxel[1] * y);
+		for (y= pa->miny; y < pa->maxy; y++) {
+			co[1] = pa->bbmin[1] + (pa->voxel[1] * y);
 			
-			for (z=vpt->minz; z < vpt->maxz; z++) {
-				co[2] = vpt->bbmin[2] + (vpt->voxel[2] * z);
+			for (z=pa->minz; z < pa->maxz; z++) {
+				co[2] = pa->bbmin[2] + (pa->voxel[2] * z);
 			
 				// don't bother if the point is not inside the volume mesh
 				if (!point_inside_obi(tree, obi, co)) {
@@ -397,14 +384,17 @@
 		}
 	}
 	
+	pa->done = 1;
+	
 	return 0;
 }
 
+
 static void precache_setup_shadeinput(Render *re, ObjectInstanceRen *obi, Material *ma, ShadeInput *shi)
 {
 	float view[3] = {0.0,0.0,-1.0};
 	
-	memset(&shi, 0, sizeof(ShadeInput)); 
+	memset(shi, 0, sizeof(ShadeInput)); 
 	shi->depth= 1;
 	shi->mask= 1;
 	shi->mat = ma;
@@ -417,61 +407,102 @@
 	VECCOPY(shi->view, view);
 }
 
-static void precache_init_parts(ListBase *precache_parts, RayTree *tree, ShadeInput *shi, ObjectInstanceRen *obi, float *bbmin, float *bbmax, int res)
+static void precache_init_parts(Render *re, RayTree *tree, ShadeInput *shi, ObjectInstanceRen *obi, float *bbmin, float *bbmax, int res, int totthread, int *parts)
 {
-	int i;
+	int i=0, x, y, z;
 	float voxel[3];
-
+	int sizex, sizey, sizez;
+	int minx, maxx;
+	int miny, maxy;
+	int minz, maxz;
+	
+	BLI_freelistN(&re->volume_precache_parts);
+	
+	/* currently we just subdivide the box, number of threads per side */
+	parts[0] = parts[1] = parts[2] = totthread;
+	
 	VecSubf(voxel, bbmax, bbmin);
 	if ((voxel[0] < FLT_EPSILON) || (voxel[1] < FLT_EPSILON) || (voxel[2] < FLT_EPSILON))
 		return;
 	VecMulf(voxel, 1.0f/res);
 
-	for(i=0; i < totparts; i++) {
-		VolPrecachePart *pa= MEM_callocN(sizeof(VolPrecachePart), "new precache part");
-	
-		pa->done = 0;
-		pa->num = i;
+	for (x=0; x < parts[0]; x++) {
+		sizex = ceil(res / (float)parts[0]);
+		minx = x * sizex;
+		maxx = minx + sizex;
+		maxx = (maxx>res)?res:maxx;
 		
-		pa->res = res;
-		VECCOPY(pa->bbmin, bbmin);
-		VECCOPY(precache_parts[j].voxel, voxel);
-		precache_parts[j].tree = tree;
-		precache_parts[j].shi = shi;
-		precache_parts[j].obi = obi;
-		
-		BLI_addtail(precache_parts, pa);
+		for (y=0; y < parts[1]; y++) {
+			sizey = ceil(res / (float)parts[1]);
+			miny = y * sizey;
+			maxy = miny + sizey;
+			maxy = (maxy>res)?res:maxy;
+			
+			for (z=0; z < parts[2]; z++) {
+				VolPrecachePart *pa= MEM_callocN(sizeof(VolPrecachePart), "new precache part");
+				
+				sizez = ceil(res / (float)parts[2]);
+				minz = z * sizez;
+				maxz = minz + sizez;
+				maxz = (maxz>res)?res:maxz;
+						
+				pa->done = 0;
+				pa->working = 0;
+				
+				pa->num = i;
+				pa->tree = tree;
+				pa->shi = shi;
+				pa->obi = obi;
+				VECCOPY(pa->bbmin, bbmin);
+				VECCOPY(pa->voxel, voxel);
+				pa->res = res;
+				
+				pa->minx = minx; pa->maxx = maxx;
+				pa->miny = miny; pa->maxy = maxy;
+				pa->minz = minz; pa->maxz = maxz;
+				
+				
+				BLI_addtail(&re->volume_precache_parts, pa);
+				
+				i++;
+			}
+		}
 	}
+}
+
+static VolPrecachePart *precache_get_new_part(Render *re)
+{
+	VolPrecachePart *pa, *nextpa=NULL;
 	
+	for (pa = re->volume_precache_parts.first; pa; pa=pa->next)
+	{
+		if (pa->done==0 && pa->working==0) {
+			nextpa = pa;
+			break;
+		}
+	}
+
+	return nextpa;
 }
 
+/* Precache a volume into a 3D voxel grid.
+ * The voxel grid is stored in the ObjectInstanceRen, 
+ * in camera space, aligned with the ObjectRen's bounding box.
+ * Resolution is defined by the user.
+ */
 void vol_precache_objectinstance_threads(Render *re, ObjectInstanceRen *obi, Material *ma, float *bbmin, float *bbmax)
 {
-	int x, y, z;
-
-	float co[3], voxel[3], scatter_col[3];
+	VolPrecachePart *nextpa, *pa;
+	RayTree *tree;
 	ShadeInput shi;
+	ListBase threads;
+	const int res = ma->vol_precache_resolution;
+	int parts[3], totparts;
 	
-	float density;
-	float stepsize;
-	
-	float resf, res_3f;
-	int res_2, res_3;
-	
-	int edgeparts=2;
-	ListBase threads, precache_parts;
-	int cont= 1;
-	int xparts, yparts, zparts;
-	float part[3];
+	int caching=1, counter=0;
 	int totthread = re->r.threads;
-	int totparts = edgeparts*edgeparts*edgeparts;
-	VolPrecachePart *nextpa;
-	int j;
 	
-	float i = 1.0f;
 	double time, lasttime= PIL_check_seconds_timer();
-	const int res = ma->vol_precache_resolution;
-	RayTree *tree;
 	
 	R = *re;
 
@@ -480,119 +511,62 @@
 	tree = create_raytree_obi(obi, bbmin, bbmax);
 	if (!tree) return;
 	
-	obi->volume_precache = MEM_callocN(sizeof(float)*res_3*3, "volume light cache");
+	obi->volume_precache = MEM_callocN(sizeof(float)*res*res*res*3, "volume light cache");
 
 	/* Need a shadeinput to calculate scattering */
 	precache_setup_shadeinput(re, obi, ma, &shi);
-	precache_init_parts(&precache_parts, tree, shi, obi, bbmin, bbmax, res);
-
-	BLI_init_threads(&threads, vol_precache_part, totthread);
 	
-	nextpa = precache_get_new_part(precache_threads);
+	precache_init_parts(re, tree, &shi, obi, bbmin, bbmax, res, totthread, parts);
+	totparts = parts[0] * parts[1] * parts[2];
 	
-	while(cont) {
+	BLI_init_threads(&threads, vol_precache_part, totthread);
+	
+	while(caching) {
 
 		if(BLI_available_threads(&threads) && !(re->test_break())) {
-			
-			precache_get_new_part(
-			// get new job (data pointer)
-			for(j=0; j < totparts; j++) {
-				if (!precache_threads[j].done) {
-					// tag job 'processed
-					precache_threads[j].done = 1;
-				}
+			nextpa = precache_get_new_part(re);
+			if (nextpa) {
+				nextpa->working = 1;
+				BLI_insert_thread(&threads, nextpa);
 			}
-		
-			BLI_insert_thread(&threads, precache_get_new_part(precache_threads));
 		}
 		else PIL_sleep_ms(50);
 
-		// find if a job is ready, this the do_something_func() should write in job somewhere
-		cont= 0;
-		for(go over all jobs)
-			if(job is ready) {
-				if(job was not removed) {
-					BLI_remove_thread(&lb, job);
-				}
-			}
-			else cont= 1;
+		caching=0;
+		counter=0;
+		for(pa= re->volume_precache_parts.first; pa; pa= pa->next) {
+			
+			if(pa->done) {
+				counter++;
+				BLI_remove_thread(&threads, pa);
+			} else
+				caching = 1;

@@ Diff output truncated at 10240 characters. @@