[Bf-blender-cvs] SVN commit: /data/svn/bf-blender [13525] branches/soc-2007-joeedh: DSM now has its own multithreading.

Joseph Eagar joeedh at gmail.com
Sat Feb 2 23:37:52 CET 2008


Revision: 13525
          http://projects.blender.org/plugins/scmsvn/viewcvs.php?view=rev&root=bf-blender&revision=13525
Author:   joeedh
Date:     2008-02-02 23:37:52 +0100 (Sat, 02 Feb 2008)

Log Message:
-----------
DSM now has its own multithreading.  Rather then using the
generic shadow buffer threading (which simply renders multiple
shadow buffers at once) DSM now renders one DSM map at a time,
and within that map it will use multiple threads to render the
tiles.

Note that the per-thread overhead of DSM can be significant,
especially if you're rendering a lot of hair/fur.

Modified Paths:
--------------
    branches/soc-2007-joeedh/source/blender/blenkernel/BKE_dsm.h
    branches/soc-2007-joeedh/source/blender/blenkernel/intern/tcs_cache.c
    branches/soc-2007-joeedh/source/blender/blenlib/intern/threads.c
    branches/soc-2007-joeedh/source/blender/render/intern/include/shadbuf.h
    branches/soc-2007-joeedh/source/blender/render/intern/source/dsm.c
    branches/soc-2007-joeedh/source/blender/render/intern/source/shadbuf.c
    branches/soc-2007-joeedh/source/blender/render/intern/source/zbuf.c
    branches/soc-2007-joeedh/tools/Blender.py
    branches/soc-2007-joeedh/tools/btools.py

Modified: branches/soc-2007-joeedh/source/blender/blenkernel/BKE_dsm.h
===================================================================
--- branches/soc-2007-joeedh/source/blender/blenkernel/BKE_dsm.h	2008-02-02 22:28:37 UTC (rev 13524)
+++ branches/soc-2007-joeedh/source/blender/blenkernel/BKE_dsm.h	2008-02-02 22:37:52 UTC (rev 13525)
@@ -1,6 +1,7 @@
 #ifndef BKE_DSM_H
 #define BKE_DSM_H
 
+#include "BLI_threads.h"
 #include "BKE_tile.h"
 
 struct MemArena;
@@ -92,10 +93,11 @@
 	struct MemArena *bucketarena;
 	
 	/*needed for zbuffering*/
-	void **lastbuf, **lastbufstrand;
+	void **lastbuf[BLENDER_MAX_THREADS], **lastbufstrand[BLENDER_MAX_THREADS];
 	ListBase *buckets;
 
-	void *s2, *s3;
+	void *s1[BLENDER_MAX_THREADS], *s2[BLENDER_MAX_THREADS];
+	void *s3[BLENDER_MAX_THREADS], *s4[BLENDER_MAX_THREADS];
 } DSMBuffer;
 
 //32 16 8 4 2 1

Modified: branches/soc-2007-joeedh/source/blender/blenkernel/intern/tcs_cache.c
===================================================================
--- branches/soc-2007-joeedh/source/blender/blenkernel/intern/tcs_cache.c	2008-02-02 22:28:37 UTC (rev 13524)
+++ branches/soc-2007-joeedh/source/blender/blenkernel/intern/tcs_cache.c	2008-02-02 22:37:52 UTC (rev 13525)
@@ -59,7 +59,7 @@
 	pool->maxmem = max_mem_bytes;
 	
 	/*this is to group cache runs so they process no less then 5 megs at a time.*/
-	pool->grace_period = 1024*1024*2; 
+	pool->grace_period = 1024*1024*5; 
 	
 	return pool;
 }

Modified: branches/soc-2007-joeedh/source/blender/blenlib/intern/threads.c
===================================================================
--- branches/soc-2007-joeedh/source/blender/blenlib/intern/threads.c	2008-02-02 22:28:37 UTC (rev 13524)
+++ branches/soc-2007-joeedh/source/blender/blenlib/intern/threads.c	2008-02-02 22:37:52 UTC (rev 13525)
@@ -83,6 +83,103 @@
 	BLI_end_threads(&lb);
 
  ************************************************ */
+ 
+#ifdef BF_PROFILE_GNU
+/*********************begin gnu profiling fix file****************/
+
+/* gprof-helper.c -- preload library to profile pthread-enabled programs
+ *
+ * Authors: Sam Hocevar <sam at zoy dot org>
+ *          Daniel J\xF6nsson <danieljo at fagotten dot org>
+ *
+ *  This program is free software; you can redistribute it and/or
+ *  modify it under the terms of the Do What The Fuck You Want To
+ *  Public License as published by Banlu Kemiyatorn. See
+ *  http://sam.zoy.org/projects/COPYING.WTFPL for more details.
+ *
+ */
+
+#define _GNU_SOURCE
+#include <sys/time.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <dlfcn.h>
+#include <pthread.h>
+
+static void * wrapper_routine(void *);
+
+/* Our data structure passed to the wrapper */
+typedef struct wrapper_s
+{
+    void * (*start_routine)(void *);
+    void * arg;
+
+    pthread_mutex_t lock;
+    pthread_cond_t  wait;
+
+    struct itimerval itimer;
+
+} wrapper_t;
+
+/* The wrapper function in charge for setting the itimer value */
+static void * wrapper_routine(void * data)
+{
+    /* Put user data in thread-local variables */
+    void * (*start_routine)(void *) = ((wrapper_t*)data)->start_routine;
+    void * arg = ((wrapper_t*)data)->arg;
+
+    /* Set the profile timer value */
+    setitimer(ITIMER_PROF, &((wrapper_t*)data)->itimer, NULL);
+
+    /* Tell the calling thread that we don't need its data anymore */
+    pthread_mutex_lock(&((wrapper_t*)data)->lock);
+    pthread_cond_signal(&((wrapper_t*)data)->wait);
+    pthread_mutex_unlock(&((wrapper_t*)data)->lock);
+
+    /* Call the real function */
+    return start_routine(arg);
+}
+
+/* Our wrapper function for the real pthread_create() */
+int pthread_create_with_profiling(pthread_t *__restrict thread,
+                   __const pthread_attr_t *__restrict attr,
+                   void * (*start_routine)(void *),
+                   void *__restrict arg)
+{
+    wrapper_t wrapper_data;
+    int i_return;
+
+    /* Initialize the wrapper structure */
+    wrapper_data.start_routine = start_routine;
+    wrapper_data.arg = arg;
+    getitimer(ITIMER_PROF, &wrapper_data.itimer);
+    pthread_cond_init(&wrapper_data.wait, NULL);
+    pthread_mutex_init(&wrapper_data.lock, NULL);
+    pthread_mutex_lock(&wrapper_data.lock);
+
+    /* The real pthread_create call */
+    i_return = pthread_create(thread,
+                                   attr,
+                                   &wrapper_routine,
+                                   &wrapper_data);
+
+    /* If the thread was successfully spawned, wait for the data
+     * to be released */
+    if(i_return == 0)
+    {
+        pthread_cond_wait(&wrapper_data.wait, &wrapper_data.lock);
+    }
+
+    pthread_mutex_unlock(&wrapper_data.lock);
+    pthread_mutex_destroy(&wrapper_data.lock);
+    pthread_cond_destroy(&wrapper_data.wait);
+
+    return i_return;
+} 
+/***********end profiling fix file****************/
+#endif /* BF_PROFILE_GNU */
+
+
 static pthread_mutex_t _malloc_lock = PTHREAD_MUTEX_INITIALIZER;
 static pthread_mutex_t _image_lock = PTHREAD_MUTEX_INITIALIZER;
 static pthread_mutex_t _custom1_lock = PTHREAD_MUTEX_INITIALIZER;
@@ -171,7 +268,11 @@
 		if(tslot->avail) {
 			tslot->avail= 0;
 			tslot->callerdata= callerdata;
+			#ifdef BF_PROFILE_GNU
+			pthread_create_with_profiling(&tslot->pthread, NULL, tslot->do_thread, tslot->callerdata);
+			#else
 			pthread_create(&tslot->pthread, NULL, tslot->do_thread, tslot->callerdata);
+			#endif
 			return;
 		}
 	}

Modified: branches/soc-2007-joeedh/source/blender/render/intern/include/shadbuf.h
===================================================================
--- branches/soc-2007-joeedh/source/blender/render/intern/include/shadbuf.h	2008-02-02 22:28:37 UTC (rev 13524)
+++ branches/soc-2007-joeedh/source/blender/render/intern/include/shadbuf.h	2008-02-02 22:37:52 UTC (rev 13525)
@@ -48,6 +48,7 @@
 void freeshadowbuf(struct LampRen *lar);
 
 void threaded_makeshadowbufs(struct Render *re);
+void DSM_threaded_makebuffer(struct Render *re, float *projmat, struct ShadBuf *shb, int tilesize);
 
 /**
  * Determines the shadow factor for a face and lamp. There is some

Modified: branches/soc-2007-joeedh/source/blender/render/intern/source/dsm.c
===================================================================
--- branches/soc-2007-joeedh/source/blender/render/intern/source/dsm.c	2008-02-02 22:28:37 UTC (rev 13524)
+++ branches/soc-2007-joeedh/source/blender/render/intern/source/dsm.c	2008-02-02 22:37:52 UTC (rev 13525)
@@ -76,8 +76,7 @@
 /* prototypes */
 struct _ClrEntry;
 void DSM_DoTile(Render *re1, float projmat[4][4], ShadBuf *shb, DSMTile *tile, 
-				DSMBuffer *dbuf, char *mergescratch, struct _ClrEntry *rowscratch,
-				RE_BucketTile *btile, void **lastbuf, void **lastbufstrand);
+				DSMBuffer *dbuf, RE_BucketTile *btile, int threadnr);
 
 int zbuffer_dsm(Render *re, RenderPart *pa, float winmat[4][4], APixstr **lastbuf, 
 				APixstr **lastbufstrand, APixstr *APixbuf, APixstr *APixbufstrand, 
@@ -139,6 +138,203 @@
 	MEM_freeN(dbuf->vfunc_rect);
 }
 
+typedef struct DeepThreadTile {
+	struct DeepThreadTile *next, *prev;
+	DSMTile *tile;
+	int ready, assigned;
+} DeepThreadTile;
+
+typedef struct DeepThreadData {
+	ListBase *tiles;
+	int threadnr;
+	DSMBuffer *dbuf;
+	float *projmat;
+	RE_BucketBuffer *bucketbuf;
+	Render *re;
+	ShadBuf *shb;
+} DeepThreadData;
+
+void *do_dsmshadow_thread(void *vdata)
+{
+	DeepThreadData *data = vdata;
+	DeepThreadTile *tilet;
+	RE_BucketTile *buckettile;
+	DSMTile *tile;
+
+	for (tilet=data->tiles->first; tilet; tilet=tilet->next) {
+		if (data->re->test_break()) break;
+
+		BLI_lock_thread(LOCK_CUSTOM1);
+		if (tilet->assigned==0) {
+			tilet->assigned = 1;
+			BLI_unlock_thread(LOCK_CUSTOM1);
+			
+			tile = tilet->tile;
+
+			tile->arena = BLI_memarena_new(DSM_TILE_MEMARENASIZE); /*FIXMEGREP: tweak this to find optimal value.*/
+			BLI_memarena_use_mapalloc(tile->arena);
+			tile->r_rect = BLI_memarena_alloc(tile->arena, sizeof(void*)*tile->sizex*tile->sizey);
+			tile->g_rect = BLI_memarena_alloc(tile->arena, sizeof(void*)*tile->sizex*tile->sizey);
+			tile->b_rect = BLI_memarena_alloc(tile->arena, sizeof(void*)*tile->sizex*tile->sizey);
+
+			printf("Rendering a dsm shadow tile! Tile %d of %d\n", tile->y*data->dbuf->tilex+tile->x, data->dbuf->tilex*data->dbuf->tiley);
+			memset(data->dbuf->lastbuf[data->threadnr], 0, sizeof(void*)*tile->sizex*tile->sizey);
+			memset(data->dbuf->lastbufstrand[data->threadnr], 0, sizeof(void*)*tile->sizex*tile->sizey);
+
+			buckettile = TCS_GetAndLockTile(data->bucketbuf, tile->x, tile->y, 0);
+			DSM_DoTile(data->re, data->projmat, data->shb, tile, data->dbuf, buckettile, data->threadnr);
+			TCS_UnlockTile(buckettile);
+
+			_DSM_maketile((TCS_TileBuffer*)data->dbuf, deepbuffer_pool, (TCS_Tile*)tile);
+			tilet->ready = 1;
+		} else BLI_unlock_thread(LOCK_CUSTOM1);
+	}
+
+	return NULL;
+}
+
+/*copied over from shadbuf.c code*/
+static volatile int g_break= 0;
+static int dsm_thread_break(void)
+{
+	return g_break;
+}
+
+void DSM_threaded_makebuffer(Render *re, float *projmat, ShadBuf *shb, int tilesize)
+{
+	DSMBuffer *dbuf = MEM_mapallocN(sizeof(DSMBuffer), "DSMBuffer");
+	DSMTile *tile;
+	DeepThreadTile *threadtile;
+	ListBase threads = {NULL, NULL}, threadtiles = {NULL, NULL};
+	RE_BucketBuffer *bucketbuf;
+	DeepThreadData thread_data[BLENDER_MAX_THREADS];
+	int (*test_break)(void);
+	float fac;
+	int x, y, lastsizex, lastsizey, stop=0;
+	int mergescratchlen, lay = (1<<20)-1;
+
+	/*use 30,000 max elements per pixel for the 2 preallocated
+	  pixel arrays DSM_DoTile() allocates.  the *6
+	  iirc is because the visibilty function calculation
+	  kindof expands things (note this is usually more then
+	  compensated by the final visibility function compression).*/
+	
+	if (G.rt==71)
+		dbuf->max_depth = 2000*6;
+	else
+		dbuf->max_depth = 30000*6;
+
+	dbuf->max_layers = 0;
+	
+	mergescratchlen = sizeof(_ClrEntry) > sizeof(DSMLayerSample) ? sizeof(_ClrEntry)*dbuf->max_depth : sizeof(DSMLayerSample)*dbuf->max_depth;
+	for (x=0; x<re->r.threads; x++) {
+		dbuf->s1[x] = MEM_mapallocN(mergescratchlen, "_ClrEntry");
+		dbuf->s2[x] = MEM_mapallocN(mergescratchlen, "mergescratch in zbuf.c");
+		dbuf->s3[x] = MEM_mapallocN(mergescratchlen, "dbuf->s2");
+		dbuf->s4[x] = MEM_mapallocN(mergescratchlen, "dbuf->s3");
+		dbuf->lastbuf[x] = MEM_callocN(sizeof(void*)*tilesize*tilesize, "lastbuf");
+		dbuf->lastbufstrand[x] = MEM_callocN(sizeof(void*)*tilesize*tilesize, "lastbufstrand");
+	}
+
+	if (tilesize > shb->size) tilesize = shb->size;
+	

@@ Diff output truncated at 10240 characters. @@




More information about the Bf-blender-cvs mailing list