[Bf-blender-cvs] SVN commit: /data/svn/bf-blender [13525] branches/soc-2007-joeedh: DSM now has its own multithreading.
Joseph Eagar
joeedh at gmail.com
Sat Feb 2 23:37:52 CET 2008
Revision: 13525
http://projects.blender.org/plugins/scmsvn/viewcvs.php?view=rev&root=bf-blender&revision=13525
Author: joeedh
Date: 2008-02-02 23:37:52 +0100 (Sat, 02 Feb 2008)
Log Message:
-----------
DSM now has its own multithreading. Rather then using the
generic shadow buffer threading (which simply renders multiple
shadow buffers at once) DSM now renders one DSM map at a time,
and within that map it will use multiple threads to render the
tiles.
Note that the per-thread overhead of DSM can be significant,
especially if you're rendering a lot of hair/fur.
Modified Paths:
--------------
branches/soc-2007-joeedh/source/blender/blenkernel/BKE_dsm.h
branches/soc-2007-joeedh/source/blender/blenkernel/intern/tcs_cache.c
branches/soc-2007-joeedh/source/blender/blenlib/intern/threads.c
branches/soc-2007-joeedh/source/blender/render/intern/include/shadbuf.h
branches/soc-2007-joeedh/source/blender/render/intern/source/dsm.c
branches/soc-2007-joeedh/source/blender/render/intern/source/shadbuf.c
branches/soc-2007-joeedh/source/blender/render/intern/source/zbuf.c
branches/soc-2007-joeedh/tools/Blender.py
branches/soc-2007-joeedh/tools/btools.py
Modified: branches/soc-2007-joeedh/source/blender/blenkernel/BKE_dsm.h
===================================================================
--- branches/soc-2007-joeedh/source/blender/blenkernel/BKE_dsm.h 2008-02-02 22:28:37 UTC (rev 13524)
+++ branches/soc-2007-joeedh/source/blender/blenkernel/BKE_dsm.h 2008-02-02 22:37:52 UTC (rev 13525)
@@ -1,6 +1,7 @@
#ifndef BKE_DSM_H
#define BKE_DSM_H
+#include "BLI_threads.h"
#include "BKE_tile.h"
struct MemArena;
@@ -92,10 +93,11 @@
struct MemArena *bucketarena;
/*needed for zbuffering*/
- void **lastbuf, **lastbufstrand;
+ void **lastbuf[BLENDER_MAX_THREADS], **lastbufstrand[BLENDER_MAX_THREADS];
ListBase *buckets;
- void *s2, *s3;
+ void *s1[BLENDER_MAX_THREADS], *s2[BLENDER_MAX_THREADS];
+ void *s3[BLENDER_MAX_THREADS], *s4[BLENDER_MAX_THREADS];
} DSMBuffer;
//32 16 8 4 2 1
Modified: branches/soc-2007-joeedh/source/blender/blenkernel/intern/tcs_cache.c
===================================================================
--- branches/soc-2007-joeedh/source/blender/blenkernel/intern/tcs_cache.c 2008-02-02 22:28:37 UTC (rev 13524)
+++ branches/soc-2007-joeedh/source/blender/blenkernel/intern/tcs_cache.c 2008-02-02 22:37:52 UTC (rev 13525)
@@ -59,7 +59,7 @@
pool->maxmem = max_mem_bytes;
/*this is to group cache runs so they process no less then 5 megs at a time.*/
- pool->grace_period = 1024*1024*2;
+ pool->grace_period = 1024*1024*5;
return pool;
}
Modified: branches/soc-2007-joeedh/source/blender/blenlib/intern/threads.c
===================================================================
--- branches/soc-2007-joeedh/source/blender/blenlib/intern/threads.c 2008-02-02 22:28:37 UTC (rev 13524)
+++ branches/soc-2007-joeedh/source/blender/blenlib/intern/threads.c 2008-02-02 22:37:52 UTC (rev 13525)
@@ -83,6 +83,103 @@
BLI_end_threads(&lb);
************************************************ */
+
+#ifdef BF_PROFILE_GNU
+/*********************begin gnu profiling fix file****************/
+
+/* gprof-helper.c -- preload library to profile pthread-enabled programs
+ *
+ * Authors: Sam Hocevar <sam at zoy dot org>
+ * Daniel J\xF6nsson <danieljo at fagotten dot org>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the Do What The Fuck You Want To
+ * Public License as published by Banlu Kemiyatorn. See
+ * http://sam.zoy.org/projects/COPYING.WTFPL for more details.
+ *
+ */
+
+#define _GNU_SOURCE
+#include <sys/time.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <dlfcn.h>
+#include <pthread.h>
+
+static void * wrapper_routine(void *);
+
+/* Our data structure passed to the wrapper */
+typedef struct wrapper_s
+{
+ void * (*start_routine)(void *);
+ void * arg;
+
+ pthread_mutex_t lock;
+ pthread_cond_t wait;
+
+ struct itimerval itimer;
+
+} wrapper_t;
+
+/* The wrapper function in charge for setting the itimer value */
+static void * wrapper_routine(void * data)
+{
+ /* Put user data in thread-local variables */
+ void * (*start_routine)(void *) = ((wrapper_t*)data)->start_routine;
+ void * arg = ((wrapper_t*)data)->arg;
+
+ /* Set the profile timer value */
+ setitimer(ITIMER_PROF, &((wrapper_t*)data)->itimer, NULL);
+
+ /* Tell the calling thread that we don't need its data anymore */
+ pthread_mutex_lock(&((wrapper_t*)data)->lock);
+ pthread_cond_signal(&((wrapper_t*)data)->wait);
+ pthread_mutex_unlock(&((wrapper_t*)data)->lock);
+
+ /* Call the real function */
+ return start_routine(arg);
+}
+
+/* Our wrapper function for the real pthread_create() */
+int pthread_create_with_profiling(pthread_t *__restrict thread,
+ __const pthread_attr_t *__restrict attr,
+ void * (*start_routine)(void *),
+ void *__restrict arg)
+{
+ wrapper_t wrapper_data;
+ int i_return;
+
+ /* Initialize the wrapper structure */
+ wrapper_data.start_routine = start_routine;
+ wrapper_data.arg = arg;
+ getitimer(ITIMER_PROF, &wrapper_data.itimer);
+ pthread_cond_init(&wrapper_data.wait, NULL);
+ pthread_mutex_init(&wrapper_data.lock, NULL);
+ pthread_mutex_lock(&wrapper_data.lock);
+
+ /* The real pthread_create call */
+ i_return = pthread_create(thread,
+ attr,
+ &wrapper_routine,
+ &wrapper_data);
+
+ /* If the thread was successfully spawned, wait for the data
+ * to be released */
+ if(i_return == 0)
+ {
+ pthread_cond_wait(&wrapper_data.wait, &wrapper_data.lock);
+ }
+
+ pthread_mutex_unlock(&wrapper_data.lock);
+ pthread_mutex_destroy(&wrapper_data.lock);
+ pthread_cond_destroy(&wrapper_data.wait);
+
+ return i_return;
+}
+/***********end profiling fix file****************/
+#endif /* BF_PROFILE_GNU */
+
+
static pthread_mutex_t _malloc_lock = PTHREAD_MUTEX_INITIALIZER;
static pthread_mutex_t _image_lock = PTHREAD_MUTEX_INITIALIZER;
static pthread_mutex_t _custom1_lock = PTHREAD_MUTEX_INITIALIZER;
@@ -171,7 +268,11 @@
if(tslot->avail) {
tslot->avail= 0;
tslot->callerdata= callerdata;
+ #ifdef BF_PROFILE_GNU
+ pthread_create_with_profiling(&tslot->pthread, NULL, tslot->do_thread, tslot->callerdata);
+ #else
pthread_create(&tslot->pthread, NULL, tslot->do_thread, tslot->callerdata);
+ #endif
return;
}
}
Modified: branches/soc-2007-joeedh/source/blender/render/intern/include/shadbuf.h
===================================================================
--- branches/soc-2007-joeedh/source/blender/render/intern/include/shadbuf.h 2008-02-02 22:28:37 UTC (rev 13524)
+++ branches/soc-2007-joeedh/source/blender/render/intern/include/shadbuf.h 2008-02-02 22:37:52 UTC (rev 13525)
@@ -48,6 +48,7 @@
void freeshadowbuf(struct LampRen *lar);
void threaded_makeshadowbufs(struct Render *re);
+void DSM_threaded_makebuffer(struct Render *re, float *projmat, struct ShadBuf *shb, int tilesize);
/**
* Determines the shadow factor for a face and lamp. There is some
Modified: branches/soc-2007-joeedh/source/blender/render/intern/source/dsm.c
===================================================================
--- branches/soc-2007-joeedh/source/blender/render/intern/source/dsm.c 2008-02-02 22:28:37 UTC (rev 13524)
+++ branches/soc-2007-joeedh/source/blender/render/intern/source/dsm.c 2008-02-02 22:37:52 UTC (rev 13525)
@@ -76,8 +76,7 @@
/* prototypes */
struct _ClrEntry;
void DSM_DoTile(Render *re1, float projmat[4][4], ShadBuf *shb, DSMTile *tile,
- DSMBuffer *dbuf, char *mergescratch, struct _ClrEntry *rowscratch,
- RE_BucketTile *btile, void **lastbuf, void **lastbufstrand);
+ DSMBuffer *dbuf, RE_BucketTile *btile, int threadnr);
int zbuffer_dsm(Render *re, RenderPart *pa, float winmat[4][4], APixstr **lastbuf,
APixstr **lastbufstrand, APixstr *APixbuf, APixstr *APixbufstrand,
@@ -139,6 +138,203 @@
MEM_freeN(dbuf->vfunc_rect);
}
+typedef struct DeepThreadTile {
+ struct DeepThreadTile *next, *prev;
+ DSMTile *tile;
+ int ready, assigned;
+} DeepThreadTile;
+
+typedef struct DeepThreadData {
+ ListBase *tiles;
+ int threadnr;
+ DSMBuffer *dbuf;
+ float *projmat;
+ RE_BucketBuffer *bucketbuf;
+ Render *re;
+ ShadBuf *shb;
+} DeepThreadData;
+
+void *do_dsmshadow_thread(void *vdata)
+{
+ DeepThreadData *data = vdata;
+ DeepThreadTile *tilet;
+ RE_BucketTile *buckettile;
+ DSMTile *tile;
+
+ for (tilet=data->tiles->first; tilet; tilet=tilet->next) {
+ if (data->re->test_break()) break;
+
+ BLI_lock_thread(LOCK_CUSTOM1);
+ if (tilet->assigned==0) {
+ tilet->assigned = 1;
+ BLI_unlock_thread(LOCK_CUSTOM1);
+
+ tile = tilet->tile;
+
+ tile->arena = BLI_memarena_new(DSM_TILE_MEMARENASIZE); /*FIXMEGREP: tweak this to find optimal value.*/
+ BLI_memarena_use_mapalloc(tile->arena);
+ tile->r_rect = BLI_memarena_alloc(tile->arena, sizeof(void*)*tile->sizex*tile->sizey);
+ tile->g_rect = BLI_memarena_alloc(tile->arena, sizeof(void*)*tile->sizex*tile->sizey);
+ tile->b_rect = BLI_memarena_alloc(tile->arena, sizeof(void*)*tile->sizex*tile->sizey);
+
+ printf("Rendering a dsm shadow tile! Tile %d of %d\n", tile->y*data->dbuf->tilex+tile->x, data->dbuf->tilex*data->dbuf->tiley);
+ memset(data->dbuf->lastbuf[data->threadnr], 0, sizeof(void*)*tile->sizex*tile->sizey);
+ memset(data->dbuf->lastbufstrand[data->threadnr], 0, sizeof(void*)*tile->sizex*tile->sizey);
+
+ buckettile = TCS_GetAndLockTile(data->bucketbuf, tile->x, tile->y, 0);
+ DSM_DoTile(data->re, data->projmat, data->shb, tile, data->dbuf, buckettile, data->threadnr);
+ TCS_UnlockTile(buckettile);
+
+ _DSM_maketile((TCS_TileBuffer*)data->dbuf, deepbuffer_pool, (TCS_Tile*)tile);
+ tilet->ready = 1;
+ } else BLI_unlock_thread(LOCK_CUSTOM1);
+ }
+
+ return NULL;
+}
+
+/*copied over from shadbuf.c code*/
+static volatile int g_break= 0;
+static int dsm_thread_break(void)
+{
+ return g_break;
+}
+
+void DSM_threaded_makebuffer(Render *re, float *projmat, ShadBuf *shb, int tilesize)
+{
+ DSMBuffer *dbuf = MEM_mapallocN(sizeof(DSMBuffer), "DSMBuffer");
+ DSMTile *tile;
+ DeepThreadTile *threadtile;
+ ListBase threads = {NULL, NULL}, threadtiles = {NULL, NULL};
+ RE_BucketBuffer *bucketbuf;
+ DeepThreadData thread_data[BLENDER_MAX_THREADS];
+ int (*test_break)(void);
+ float fac;
+ int x, y, lastsizex, lastsizey, stop=0;
+ int mergescratchlen, lay = (1<<20)-1;
+
+ /*use 30,000 max elements per pixel for the 2 preallocated
+ pixel arrays DSM_DoTile() allocates. the *6
+ iirc is because the visibilty function calculation
+ kindof expands things (note this is usually more then
+ compensated by the final visibility function compression).*/
+
+ if (G.rt==71)
+ dbuf->max_depth = 2000*6;
+ else
+ dbuf->max_depth = 30000*6;
+
+ dbuf->max_layers = 0;
+
+ mergescratchlen = sizeof(_ClrEntry) > sizeof(DSMLayerSample) ? sizeof(_ClrEntry)*dbuf->max_depth : sizeof(DSMLayerSample)*dbuf->max_depth;
+ for (x=0; x<re->r.threads; x++) {
+ dbuf->s1[x] = MEM_mapallocN(mergescratchlen, "_ClrEntry");
+ dbuf->s2[x] = MEM_mapallocN(mergescratchlen, "mergescratch in zbuf.c");
+ dbuf->s3[x] = MEM_mapallocN(mergescratchlen, "dbuf->s2");
+ dbuf->s4[x] = MEM_mapallocN(mergescratchlen, "dbuf->s3");
+ dbuf->lastbuf[x] = MEM_callocN(sizeof(void*)*tilesize*tilesize, "lastbuf");
+ dbuf->lastbufstrand[x] = MEM_callocN(sizeof(void*)*tilesize*tilesize, "lastbufstrand");
+ }
+
+ if (tilesize > shb->size) tilesize = shb->size;
+
@@ Diff output truncated at 10240 characters. @@
More information about the Bf-blender-cvs
mailing list