[Bf-blender-cvs] SVN commit: /data/svn/bf-blender [59163] branches/soc-2013-depsgraph_mt: Speedup for guarded allocator
Sergey Sharybin
sergey.vfx at gmail.com
Thu Aug 15 14:13:01 CEST 2013
Revision: 59163
http://projects.blender.org/scm/viewvc.php?view=rev&root=bf-blender&revision=59163
Author: nazgul
Date: 2013-08-15 12:13:01 +0000 (Thu, 15 Aug 2013)
Log Message:
-----------
Speedup for guarded allocator
- Re-arrange locks, so no actual memory allocation
(which is relatively slow) happens from inside
the lock. operation system will take care of locks
which might be needed there on it's own.
- Use spin lock instead of mutex, since it's just
list operations happens from inside lock, no need
in mutex here.
- Use atomic operations for memory in use and total
used blocks counters.
This makes guarded allocator almost the same speed
as non-guarded one in files from Tube project.
There're still MemHead/MemTail overhead which might
be bad for CPU cache utilization.
TODO: We need smarter 32/64bit compile-time check,
currently i'm afraid only x86 CPU family is
detecting reliably.
Modified Paths:
--------------
branches/soc-2013-depsgraph_mt/intern/atomic/atomic_ops.h
branches/soc-2013-depsgraph_mt/intern/guardedalloc/CMakeLists.txt
branches/soc-2013-depsgraph_mt/intern/guardedalloc/SConscript
branches/soc-2013-depsgraph_mt/intern/guardedalloc/intern/mallocn.c
branches/soc-2013-depsgraph_mt/source/blender/blenlib/intern/threads.c
branches/soc-2013-depsgraph_mt/source/blender/makesdna/intern/CMakeLists.txt
branches/soc-2013-depsgraph_mt/source/blender/makesdna/intern/SConscript
branches/soc-2013-depsgraph_mt/source/blender/makesrna/SConscript
branches/soc-2013-depsgraph_mt/source/blender/makesrna/intern/CMakeLists.txt
branches/soc-2013-depsgraph_mt/source/gameengine/GamePlayer/ghost/GPG_ghost.cpp
Modified: branches/soc-2013-depsgraph_mt/intern/atomic/atomic_ops.h
===================================================================
--- branches/soc-2013-depsgraph_mt/intern/atomic/atomic_ops.h 2013-08-15 12:08:18 UTC (rev 59162)
+++ branches/soc-2013-depsgraph_mt/intern/atomic/atomic_ops.h 2013-08-15 12:13:01 UTC (rev 59163)
@@ -44,6 +44,7 @@
# endif
#endif
+/* TODO(sergey): check on other 64bit platforms. */
#if defined(_M_X64) || defined(__amd64__) || defined(__x86_64__)
# define LG_SIZEOF_PTR 3
# define LG_SIZEOF_INT 3
Modified: branches/soc-2013-depsgraph_mt/intern/guardedalloc/CMakeLists.txt
===================================================================
--- branches/soc-2013-depsgraph_mt/intern/guardedalloc/CMakeLists.txt 2013-08-15 12:08:18 UTC (rev 59162)
+++ branches/soc-2013-depsgraph_mt/intern/guardedalloc/CMakeLists.txt 2013-08-15 12:13:01 UTC (rev 59163)
@@ -25,6 +25,7 @@
set(INC
.
+ ../atomic
)
set(INC_SYS
Modified: branches/soc-2013-depsgraph_mt/intern/guardedalloc/SConscript
===================================================================
--- branches/soc-2013-depsgraph_mt/intern/guardedalloc/SConscript 2013-08-15 12:08:18 UTC (rev 59162)
+++ branches/soc-2013-depsgraph_mt/intern/guardedalloc/SConscript 2013-08-15 12:13:01 UTC (rev 59163)
@@ -38,6 +38,6 @@
sources.append('cpp/mallocn.cpp')
defs.append('WITH_CXX_GUARDEDALLOC')
-incs = '.'
+incs = '. ../atomic'
env.BlenderLib ('bf_intern_guardedalloc', sources, Split(incs), defs, libtype=['intern','player'], priority = [5,150] )
Modified: branches/soc-2013-depsgraph_mt/intern/guardedalloc/intern/mallocn.c
===================================================================
--- branches/soc-2013-depsgraph_mt/intern/guardedalloc/intern/mallocn.c 2013-08-15 12:08:18 UTC (rev 59162)
+++ branches/soc-2013-depsgraph_mt/intern/guardedalloc/intern/mallocn.c 2013-08-15 12:13:01 UTC (rev 59163)
@@ -50,6 +50,8 @@
#include "MEM_guardedalloc.h"
+#include "atomic_ops.h"
+
/* should always be defined except for experimental cases */
#ifdef WITH_GUARDEDALLOC
@@ -210,9 +212,21 @@
/* --------------------------------------------------------------------- */
-static volatile int totblock = 0;
-static volatile uintptr_t mem_in_use = 0, mmap_in_use = 0, peak_mem = 0;
+/* TODO(sergey): need smarter check for 64bit platform. */
+#if defined(_M_X64) || defined(__amd64__) || defined(__x86_64__)
+typedef uint64_t mem_uintptr_t;
+# define mem_atomic_uint_sub atomic_sub_uint64
+# define mem_atomic_uint_add atomic_add_uint64
+#else
+typedef int32_t mem_int_t;
+typedef uint32_t mem_uintptr_t;
+# define mem_atomic_uint_sub atomic_sub_uint32
+# define mem_atomic_uint_add atomic_add_uint32
+#endif
+static mem_uintptr_t totblock = 0;
+static mem_uintptr_t mem_in_use = 0, mmap_in_use = 0, peak_mem = 0;
+
static volatile struct localListBase _membase;
static volatile struct localListBase *membase = &_membase;
static void (*error_callback)(const char *) = NULL;
@@ -493,31 +507,29 @@
memt = (MemTail *)(((char *) memh) + sizeof(MemHead) + len);
memt->tag3 = MEMTAG3;
-
+
+ mem_atomic_uint_add(&totblock, 1);
+ mem_atomic_uint_add(&mem_in_use, len);
+
+ mem_lock_thread();
addtail(membase, &memh->next);
if (memh->next) {
memh->nextname = MEMNEXT(memh->next)->name;
}
-
- totblock++;
- mem_in_use += len;
-
peak_mem = mem_in_use > peak_mem ? mem_in_use : peak_mem;
+ mem_unlock_thread();
}
void *MEM_mallocN(size_t len, const char *str)
{
MemHead *memh;
- mem_lock_thread();
-
len = (len + 3) & ~3; /* allocate in units of 4 */
memh = (MemHead *)malloc(len + sizeof(MemHead) + sizeof(MemTail));
if (memh) {
make_memhead_header(memh, len, str);
- mem_unlock_thread();
if (malloc_debug_memset && len)
memset(memh + 1, 255, len);
@@ -528,7 +540,6 @@
#endif
return (++memh);
}
- mem_unlock_thread();
print_error("Malloc returns null: len=" SIZET_FORMAT " in %s, total %u\n",
SIZET_ARG(len), str, (unsigned int) mem_in_use);
return NULL;
@@ -538,15 +549,12 @@
{
MemHead *memh;
- mem_lock_thread();
-
len = (len + 3) & ~3; /* allocate in units of 4 */
memh = (MemHead *)calloc(len + sizeof(MemHead) + sizeof(MemTail), 1);
if (memh) {
make_memhead_header(memh, len, str);
- mem_unlock_thread();
#ifdef DEBUG_MEMCOUNTER
if (_mallocn_count == DEBUG_MEMCOUNTER_ERROR_VAL)
memcount_raise(__func__);
@@ -554,7 +562,6 @@
#endif
return (++memh);
}
- mem_unlock_thread();
print_error("Calloc returns null: len=" SIZET_FORMAT " in %s, total %u\n",
SIZET_ARG(len), str, (unsigned int) mem_in_use);
return NULL;
@@ -565,8 +572,6 @@
{
MemHead *memh;
- mem_lock_thread();
-
len = (len + 3) & ~3; /* allocate in units of 4 */
memh = mmap(NULL, len + sizeof(MemHead) + sizeof(MemTail),
@@ -575,7 +580,8 @@
if (memh != (MemHead *)-1) {
make_memhead_header(memh, len, str);
memh->mmap = 1;
- mmap_in_use += len;
+ mem_atomic_uint_add(&mmap_in_use, len);
+ mem_lock_thread();
peak_mem = mmap_in_use > peak_mem ? mmap_in_use : peak_mem;
mem_unlock_thread();
#ifdef DEBUG_MEMCOUNTER
@@ -586,7 +592,6 @@
return (++memh);
}
else {
- mem_unlock_thread();
print_error("Mapalloc returns null, fallback to regular malloc: "
"len=" SIZET_FORMAT " in %s, total %u\n",
SIZET_ARG(len), str, (unsigned int) mmap_in_use);
@@ -844,7 +849,6 @@
return;
}
- mem_lock_thread();
if ((memh->tag1 == MEMTAG1) &&
(memh->tag2 == MEMTAG2) &&
((memh->len & 0x3) == 0))
@@ -858,8 +862,6 @@
/* after tags !!! */
rem_memblock(memh);
- mem_unlock_thread();
-
return;
}
MemorY_ErroR(memh->name, "end corrupt");
@@ -869,7 +871,9 @@
}
}
else {
+ mem_lock_thread();
name = check_memlist(memh);
+ mem_unlock_thread();
if (name == NULL)
MemorY_ErroR("free", "pointer not in memlist");
else
@@ -879,8 +883,6 @@
totblock--;
/* here a DUMP should happen */
- mem_unlock_thread();
-
return;
}
@@ -927,6 +929,7 @@
static void rem_memblock(MemHead *memh)
{
+ mem_lock_thread();
remlink(membase, &memh->next);
if (memh->prev) {
if (memh->next)
@@ -934,9 +937,10 @@
else
MEMNEXT(memh->prev)->nextname = NULL;
}
+ mem_unlock_thread();
- totblock--;
- mem_in_use -= memh->len;
+ mem_atomic_uint_sub(&totblock, 1);
+ mem_atomic_uint_sub(&mem_in_use, memh->len);
#ifdef DEBUG_MEMDUPLINAME
if (memh->need_free_name)
@@ -944,7 +948,7 @@
#endif
if (memh->mmap) {
- mmap_in_use -= memh->len;
+ mem_atomic_uint_sub(&mmap_in_use, memh->len);
if (munmap(memh, memh->len + sizeof(MemHead) + sizeof(MemTail)))
printf("Couldn't unmap memory %s\n", memh->name);
}
Modified: branches/soc-2013-depsgraph_mt/source/blender/blenlib/intern/threads.c
===================================================================
--- branches/soc-2013-depsgraph_mt/source/blender/blenlib/intern/threads.c 2013-08-15 12:08:18 UTC (rev 59162)
+++ branches/soc-2013-depsgraph_mt/source/blender/blenlib/intern/threads.c 2013-08-15 12:13:01 UTC (rev 59163)
@@ -111,7 +111,7 @@
* BLI_end_threads(&lb);
*
************************************************ */
-static pthread_mutex_t _malloc_lock = PTHREAD_MUTEX_INITIALIZER;
+static SpinLock _malloc_lock;
static pthread_mutex_t _image_lock = PTHREAD_MUTEX_INITIALIZER;
static pthread_mutex_t _image_draw_lock = PTHREAD_MUTEX_INITIALIZER;
static pthread_mutex_t _viewer_lock = PTHREAD_MUTEX_INITIALIZER;
@@ -138,22 +138,25 @@
static void BLI_lock_malloc_thread(void)
{
- pthread_mutex_lock(&_malloc_lock);
+ BLI_spin_lock(&_malloc_lock);
}
static void BLI_unlock_malloc_thread(void)
{
- pthread_mutex_unlock(&_malloc_lock);
+ BLI_spin_unlock(&_malloc_lock);
}
void BLI_threadapi_init(void)
{
mainid = pthread_self();
+
+ BLI_spin_init(&_malloc_lock);
}
void BLI_threadapi_exit(void)
{
BLI_task_scheduler_free(task_scheduler);
+ BLI_spin_end(&_malloc_lock);
}
TaskScheduler *BLI_task_scheduler_get(void)
Modified: branches/soc-2013-depsgraph_mt/source/blender/makesdna/intern/CMakeLists.txt
===================================================================
--- branches/soc-2013-depsgraph_mt/source/blender/makesdna/intern/CMakeLists.txt 2013-08-15 12:08:18 UTC (rev 59162)
+++ branches/soc-2013-depsgraph_mt/source/blender/makesdna/intern/CMakeLists.txt 2013-08-15 12:13:01 UTC (rev 59163)
@@ -29,6 +29,7 @@
blender_include_dirs(
../../../../intern/guardedalloc
+ ../../../../intern/atomic
../../blenlib
..
)
Modified: branches/soc-2013-depsgraph_mt/source/blender/makesdna/intern/SConscript
===================================================================
--- branches/soc-2013-depsgraph_mt/source/blender/makesdna/intern/SConscript 2013-08-15 12:08:18 UTC (rev 59162)
+++ branches/soc-2013-depsgraph_mt/source/blender/makesdna/intern/SConscript 2013-08-15 12:13:01 UTC (rev 59163)
@@ -46,6 +46,7 @@
makesdna_tool.Append(CCFLAGS = '-DBASE_HEADER="\\"source/blender/makesdna/\\"" ')
makesdna_tool.Append (CPPPATH = ['#/intern/guardedalloc',
+ '#/intern/atomic',
'../../makesdna', '../../bmesh'])
if env['OURPLATFORM'] == 'linuxcross':
Modified: branches/soc-2013-depsgraph_mt/source/blender/makesrna/SConscript
===================================================================
--- branches/soc-2013-depsgraph_mt/source/blender/makesrna/SConscript 2013-08-15 12:08:18 UTC (rev 59162)
+++ branches/soc-2013-depsgraph_mt/source/blender/makesrna/SConscript 2013-08-15 12:13:01 UTC (rev 59163)
@@ -36,6 +36,7 @@
'.',
'./intern',
'#/intern/guardedalloc',
+ '#/intern/atomic',
'#/intern/memutil',
'#/extern/glew/include',
'#/intern/audaspace/intern',
Modified: branches/soc-2013-depsgraph_mt/source/blender/makesrna/intern/CMakeLists.txt
===================================================================
--- branches/soc-2013-depsgraph_mt/source/blender/makesrna/intern/CMakeLists.txt 2013-08-15 12:08:18 UTC (rev 59162)
+++ branches/soc-2013-depsgraph_mt/source/blender/makesrna/intern/CMakeLists.txt 2013-08-15 12:13:01 UTC (rev 59163)
@@ -276,6 +276,7 @@
../../../../intern/audaspace/intern
../../../../intern/cycles/blender
../../../../intern/guardedalloc
+ ../../../../intern/atomic
../../../../intern/memutil
../../../../intern/smoke/extern
)
Modified: branches/soc-2013-depsgraph_mt/source/gameengine/GamePlayer/ghost/GPG_ghost.cpp
@@ Diff output truncated at 10240 characters. @@
More information about the Bf-blender-cvs
mailing list