[Bf-blender-cvs] SVN commit: /data/svn/bf-blender [59086] branches/soc-2013-depsgraph_mt: Use atomic operations instead of spin lock for threaded update
Sergey Sharybin
sergey.vfx at gmail.com
Mon Aug 12 16:37:15 CEST 2013
Revision: 59086
http://projects.blender.org/scm/viewvc.php?view=rev&root=bf-blender&revision=59086
Author: nazgul
Date: 2013-08-12 14:37:15 +0000 (Mon, 12 Aug 2013)
Log Message:
-----------
Use atomic operations instead of spin lock for threaded update
This replaces code (pseudo-code):
spin_lock();
update_child_dag_nodes();
schedule_new_nodes();
spin_unlock();
with:
update_child_dag_nodes_with_atomic_ops();
schedule_new_nodes();
The reason for this is that scheduling new nodes implies
mutex lock, and having spin around it is a bad idea.
Alternatives could have been to use spinlock around
child nodes update only, but that would either imply having
either per-node spin-lock or using array to put nodes
ready for update to an array.
Didn't like an alternatives, using atomic operations makes
code much easier to follow, keeps data-flow on cpu nice.
Same atomic ops might be used in other performance-critical
areas later.
Using atomic ops implementation from jemalloc project.
Modified Paths:
--------------
branches/soc-2013-depsgraph_mt/source/blender/blenkernel/CMakeLists.txt
branches/soc-2013-depsgraph_mt/source/blender/blenkernel/SConscript
branches/soc-2013-depsgraph_mt/source/blender/blenkernel/depsgraph_private.h
branches/soc-2013-depsgraph_mt/source/blender/blenkernel/intern/depsgraph.c
branches/soc-2013-depsgraph_mt/source/blender/blenkernel/intern/scene.c
Added Paths:
-----------
branches/soc-2013-depsgraph_mt/intern/atomic/
branches/soc-2013-depsgraph_mt/intern/atomic/atomic_ops.h
Added: branches/soc-2013-depsgraph_mt/intern/atomic/atomic_ops.h
===================================================================
--- branches/soc-2013-depsgraph_mt/intern/atomic/atomic_ops.h (rev 0)
+++ branches/soc-2013-depsgraph_mt/intern/atomic/atomic_ops.h 2013-08-12 14:37:15 UTC (rev 59086)
@@ -0,0 +1,291 @@
+/*
+ * Adopted from jemalloc with this license:
+ *
+ * Copyright (C) 2002-2013 Jason Evans <jasone at canonware.com>.
+ * All rights reserved.
+ * Copyright (C) 2007-2012 Mozilla Foundation. All rights reserved.
+ * Copyright (C) 2009-2013 Facebook, Inc. All rights reserved.
+
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ * 1. Redistributions of source code must retain the above copyright notice(s),
+ * this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright notice(s),
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) ``AS IS'' AND ANY EXPRESS
+ * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
+ * EVENT SHALL THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
+ * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
+ * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef ATOMIC_OPS_H__
+#define ATOMIC_OPS_H__
+
+/* needed for int types */
+#include "../../source/blender/blenlib/BLI_sys_types.h"
+
+/* little macro so inline keyword works */
+#if defined(_MSC_VER)
+# define ATOMIC_INLINE static __forceinline
+#else
+# if (defined(__APPLE__) && defined(__ppc__))
+/* static inline __attribute__ here breaks osx ppc gcc42 build */
+# define ATOMIC_INLINE static __attribute__((always_inline))
+# else
+# define ATOMIC_INLINE static inline __attribute__((always_inline))
+# endif
+#endif
+
+#if defined(_M_X64) || defined(__amd64__) || defined(__x86_64__)
+# define LG_SIZEOF_PTR 3
+# define LG_SIZEOF_INT 3
+#else
+# define LG_SIZEOF_PTR 2
+# define LG_SIZEOF_INT 2
+#endif
+
+/******************************************************************************/
+/* 64-bit operations. */
+#if (LG_SIZEOF_PTR == 3 || LG_SIZEOF_INT == 3)
+# ifdef __GCC_HAVE_SYNC_COMPARE_AND_SWAP_8
+ATOMIC_INLINE uint64_t
+atomic_add_uint64(uint64_t *p, uint64_t x)
+{
+ return (__sync_add_and_fetch(p, x));
+}
+
+ATOMIC_INLINE uint64_t
+atomic_sub_uint64(uint64_t *p, uint64_t x)
+{
+ return (__sync_sub_and_fetch(p, x));
+}
+#elif (defined(_MSC_VER))
+ATOMIC_INLINE uint64_t
+atomic_add_uint64(uint64_t *p, uint64_t x)
+{
+ return (InterlockedExchangeAdd64(p, x));
+}
+
+ATOMIC_INLINE uint64_t
+atomic_sub_uint64(uint64_t *p, uint64_t x)
+{
+ return (InterlockedExchangeAdd64(p, -((int64_t)x)));
+}
+#elif (defined(JEMALLOC_OSATOMIC))
+ATOMIC_INLINE uint64_t
+atomic_add_uint64(uint64_t *p, uint64_t x)
+{
+ return (OSAtomicAdd64((int64_t)x, (int64_t *)p));
+}
+
+ATOMIC_INLINE uint64_t
+atomic_sub_uint64(uint64_t *p, uint64_t x)
+{
+ return (OSAtomicAdd64(-((int64_t)x), (int64_t *)p));
+}
+# elif (defined(__amd64__) || defined(__x86_64__))
+ATOMIC_INLINE uint64_t
+atomic_add_uint64(uint64_t *p, uint64_t x)
+{
+ asm volatile (
+ "lock; xaddq %0, %1;"
+ : "+r" (x), "=m" (*p) /* Outputs. */
+ : "m" (*p) /* Inputs. */
+ );
+ return (x);
+}
+
+ATOMIC_INLINE uint64_t
+atomic_sub_uint64(uint64_t *p, uint64_t x)
+{
+ x = (uint64_t)(-(int64_t)x);
+ asm volatile (
+ "lock; xaddq %0, %1;"
+ : "+r" (x), "=m" (*p) /* Outputs. */
+ : "m" (*p) /* Inputs. */
+ );
+ return (x);
+}
+# elif (defined(JEMALLOC_ATOMIC9))
+ATOMIC_INLINE uint64_t
+atomic_add_uint64(uint64_t *p, uint64_t x)
+{
+ /*
+ * atomic_fetchadd_64() doesn't exist, but we only ever use this
+ * function on LP64 systems, so atomic_fetchadd_long() will do.
+ */
+ assert(sizeof(uint64_t) == sizeof(unsigned long));
+
+ return (atomic_fetchadd_long(p, (unsigned long)x) + x);
+}
+
+ATOMIC_INLINE uint64_t
+atomic_sub_uint64(uint64_t *p, uint64_t x)
+{
+ assert(sizeof(uint64_t) == sizeof(unsigned long));
+
+ return (atomic_fetchadd_long(p, (unsigned long)(-(long)x)) - x);
+}
+# elif (defined(JE_FORCE_SYNC_COMPARE_AND_SWAP_8))
+ATOMIC_INLINE uint64_t
+atomic_add_uint64(uint64_t *p, uint64_t x)
+{
+ return (__sync_add_and_fetch(p, x));
+}
+
+ATOMIC_INLINE uint64_t
+atomic_sub_uint64(uint64_t *p, uint64_t x)
+{
+ return (__sync_sub_and_fetch(p, x));
+}
+# else
+# error "Missing implementation for 64-bit atomic operations"
+# endif
+#endif
+
+/******************************************************************************/
+/* 32-bit operations. */
+#ifdef __GCC_HAVE_SYNC_COMPARE_AND_SWAP_4
+ATOMIC_INLINE uint32_t
+atomic_add_uint32(uint32_t *p, uint32_t x)
+{
+ return (__sync_add_and_fetch(p, x));
+}
+
+ATOMIC_INLINE uint32_t
+atomic_sub_uint32(uint32_t *p, uint32_t x)
+{
+ return (__sync_sub_and_fetch(p, x));
+}
+#elif (defined(_MSC_VER))
+ATOMIC_INLINE uint32_t
+atomic_add_uint32(uint32_t *p, uint32_t x)
+{
+ return (InterlockedExchangeAdd(p, x));
+}
+
+ATOMIC_INLINE uint32_t
+atomic_sub_uint32(uint32_t *p, uint32_t x)
+{
+ return (InterlockedExchangeAdd(p, -((int32_t)x)));
+}
+#elif (defined(JEMALLOC_OSATOMIC))
+ATOMIC_INLINE uint32_t
+atomic_add_uint32(uint32_t *p, uint32_t x)
+{
+ return (OSAtomicAdd32((int32_t)x, (int32_t *)p));
+}
+
+ATOMIC_INLINE uint32_t
+atomic_sub_uint32(uint32_t *p, uint32_t x)
+{
+ return (OSAtomicAdd32(-((int32_t)x), (int32_t *)p));
+}
+#elif (defined(__i386__) || defined(__amd64__) || defined(__x86_64__))
+ATOMIC_INLINE uint32_t
+atomic_add_uint32(uint32_t *p, uint32_t x)
+{
+ asm volatile (
+ "lock; xaddl %0, %1;"
+ : "+r" (x), "=m" (*p) /* Outputs. */
+ : "m" (*p) /* Inputs. */
+ );
+ return (x);
+}
+
+ATOMIC_INLINE uint32_t
+atomic_sub_uint32(uint32_t *p, uint32_t x)
+{
+ x = (uint32_t)(-(int32_t)x);
+ asm volatile (
+ "lock; xaddl %0, %1;"
+ : "+r" (x), "=m" (*p) /* Outputs. */
+ : "m" (*p) /* Inputs. */
+ );
+ return (x);
+}
+#elif (defined(JEMALLOC_ATOMIC9))
+ATOMIC_INLINE uint32_t
+atomic_add_uint32(uint32_t *p, uint32_t x)
+{
+ return (atomic_fetchadd_32(p, x) + x);
+}
+
+ATOMIC_INLINE uint32_t
+atomic_sub_uint32(uint32_t *p, uint32_t x)
+{
+ return (atomic_fetchadd_32(p, (uint32_t)(-(int32_t)x)) - x);
+}
+#elif (defined(JE_FORCE_SYNC_COMPARE_AND_SWAP_4))
+ATOMIC_INLINE uint32_t
+atomic_add_uint32(uint32_t *p, uint32_t x)
+{
+ return (__sync_add_and_fetch(p, x));
+}
+
+ATOMIC_INLINE uint32_t
+atomic_sub_uint32(uint32_t *p, uint32_t x)
+{
+ return (__sync_sub_and_fetch(p, x));
+}
+#else
+# error "Missing implementation for 32-bit atomic operations"
+#endif
+
+/******************************************************************************/
+/* size_t operations. */
+ATOMIC_INLINE size_t
+atomic_add_z(size_t *p, size_t x)
+{
+#if (LG_SIZEOF_PTR == 3)
+ return ((size_t)atomic_add_uint64((uint64_t *)p, (uint64_t)x));
+#elif (LG_SIZEOF_PTR == 2)
+ return ((size_t)atomic_add_uint32((uint32_t *)p, (uint32_t)x));
+#endif
+}
+
+ATOMIC_INLINE size_t
+atomic_sub_z(size_t *p, size_t x)
+{
+#if (LG_SIZEOF_PTR == 3)
+ return ((size_t)atomic_add_uint64((uint64_t *)p,
+ (uint64_t)-((int64_t)x)));
+#elif (LG_SIZEOF_PTR == 2)
+ return ((size_t)atomic_add_uint32((uint32_t *)p,
+ (uint32_t)-((int32_t)x)));
+#endif
+}
+
+/******************************************************************************/
+/* unsigned operations. */
+ATOMIC_INLINE unsigned
+atomic_add_u(unsigned *p, unsigned x)
+{
+#if (LG_SIZEOF_INT == 3)
+ return ((unsigned)atomic_add_uint64((uint64_t *)p, (uint64_t)x));
+#elif (LG_SIZEOF_INT == 2)
+ return ((unsigned)atomic_add_uint32((uint32_t *)p, (uint32_t)x));
+#endif
+}
+
+ATOMIC_INLINE unsigned
+atomic_sub_u(unsigned *p, unsigned x)
+{
+#if (LG_SIZEOF_INT == 3)
+ return ((unsigned)atomic_add_uint64((uint64_t *)p,
+ (uint64_t)-((int64_t)x)));
+#elif (LG_SIZEOF_INT == 2)
+ return ((unsigned)atomic_add_uint32((uint32_t *)p,
+ (uint32_t)-((int32_t)x)));
+#endif
+}
+
+#endif /* ATOMIC_OPS_H__ */
Modified: branches/soc-2013-depsgraph_mt/source/blender/blenkernel/CMakeLists.txt
===================================================================
--- branches/soc-2013-depsgraph_mt/source/blender/blenkernel/CMakeLists.txt 2013-08-12 13:52:13 UTC (rev 59085)
+++ branches/soc-2013-depsgraph_mt/source/blender/blenkernel/CMakeLists.txt 2013-08-12 14:37:15 UTC (rev 59086)
@@ -45,6 +45,7 @@
../../../intern/raskter
../../../intern/smoke/extern
../../../extern/libmv
+ ../../../intern/atomic
# XXX - BAD LEVEL CALL WM_api.h
../windowmanager
Modified: branches/soc-2013-depsgraph_mt/source/blender/blenkernel/SConscript
===================================================================
--- branches/soc-2013-depsgraph_mt/source/blender/blenkernel/SConscript 2013-08-12 13:52:13 UTC (rev 59085)
+++ branches/soc-2013-depsgraph_mt/source/blender/blenkernel/SConscript 2013-08-12 14:37:15 UTC (rev 59086)
@@ -52,6 +52,7 @@
'#/intern/iksolver/extern',
'#/intern/opennl/extern',
'#/intern/smoke/extern',
+ '#/intern/atomic',
'../avi',
'../blenfont',
'../blenlib',
Modified: branches/soc-2013-depsgraph_mt/source/blender/blenkernel/depsgraph_private.h
===================================================================
--- branches/soc-2013-depsgraph_mt/source/blender/blenkernel/depsgraph_private.h 2013-08-12 13:52:13 UTC (rev 59085)
+++ branches/soc-2013-depsgraph_mt/source/blender/blenkernel/depsgraph_private.h 2013-08-12 14:37:15 UTC (rev 59086)
@@ -94,11 +94,11 @@
struct DagNode *next;
/* Threaded evaluation routines */
- int valency; /* valency of the node is a number of parents which are not updated yet
- * this node has got.
- * Used by threaded update for faster detect whether node could be
- * updated aready.
- */
@@ Diff output truncated at 10240 characters. @@
More information about the Bf-blender-cvs
mailing list