[Bf-blender-cvs] [fdc2b7bfa4a] master: Intern: Adding atomic_load/store support for different types.
YimingWu
noreply at git.blender.org
Thu May 26 17:07:36 CEST 2022
Commit: fdc2b7bfa4a42951b7fce42544bb1ec16233d681
Author: YimingWu
Date: Thu May 26 23:06:36 2022 +0800
Branches: master
https://developer.blender.org/rBfdc2b7bfa4a42951b7fce42544bb1ec16233d681
Intern: Adding atomic_load/store support for different types.
Mostly using built-in `__atomic` functions, with a special code path
using `MemoryBarrier()`on windows.
Authored By: Sergey Sharybin (sergey)
Reviewed By: Sergey Sharybin (sergey), Ray molenkamp (LazyDodo)
Ref D15020
===================================================================
M intern/atomic/atomic_ops.h
M intern/atomic/intern/atomic_ops_ext.h
M intern/atomic/intern/atomic_ops_msvc.h
M intern/atomic/intern/atomic_ops_unix.h
M intern/atomic/tests/atomic_test.cc
===================================================================
diff --git a/intern/atomic/atomic_ops.h b/intern/atomic/atomic_ops.h
index 6a4d6d263c0..2bedce1b4f0 100644
--- a/intern/atomic/atomic_ops.h
+++ b/intern/atomic/atomic_ops.h
@@ -64,16 +64,22 @@ ATOMIC_INLINE uint64_t atomic_sub_and_fetch_uint64(uint64_t *p, uint64_t x);
ATOMIC_INLINE uint64_t atomic_fetch_and_add_uint64(uint64_t *p, uint64_t x);
ATOMIC_INLINE uint64_t atomic_fetch_and_sub_uint64(uint64_t *p, uint64_t x);
ATOMIC_INLINE uint64_t atomic_cas_uint64(uint64_t *v, uint64_t old, uint64_t _new);
+ATOMIC_INLINE uint64_t atomic_load_uint64(const uint64_t *v);
+ATOMIC_INLINE void atomic_store_uint64(uint64_t *p, uint64_t v);
ATOMIC_INLINE int64_t atomic_add_and_fetch_int64(int64_t *p, int64_t x);
ATOMIC_INLINE int64_t atomic_sub_and_fetch_int64(int64_t *p, int64_t x);
ATOMIC_INLINE int64_t atomic_fetch_and_add_int64(int64_t *p, int64_t x);
ATOMIC_INLINE int64_t atomic_fetch_and_sub_int64(int64_t *p, int64_t x);
ATOMIC_INLINE int64_t atomic_cas_int64(int64_t *v, int64_t old, int64_t _new);
+ATOMIC_INLINE int64_t atomic_load_int64(const int64_t *v);
+ATOMIC_INLINE void atomic_store_int64(int64_t *p, int64_t v);
ATOMIC_INLINE uint32_t atomic_add_and_fetch_uint32(uint32_t *p, uint32_t x);
ATOMIC_INLINE uint32_t atomic_sub_and_fetch_uint32(uint32_t *p, uint32_t x);
ATOMIC_INLINE uint32_t atomic_cas_uint32(uint32_t *v, uint32_t old, uint32_t _new);
+ATOMIC_INLINE uint32_t atomic_load_uint32(const uint32_t *v);
+ATOMIC_INLINE void atomic_store_uint32(uint32_t *p, uint32_t v);
ATOMIC_INLINE uint32_t atomic_fetch_and_add_uint32(uint32_t *p, uint32_t x);
ATOMIC_INLINE uint32_t atomic_fetch_and_or_uint32(uint32_t *p, uint32_t x);
@@ -82,6 +88,8 @@ ATOMIC_INLINE uint32_t atomic_fetch_and_and_uint32(uint32_t *p, uint32_t x);
ATOMIC_INLINE int32_t atomic_add_and_fetch_int32(int32_t *p, int32_t x);
ATOMIC_INLINE int32_t atomic_sub_and_fetch_int32(int32_t *p, int32_t x);
ATOMIC_INLINE int32_t atomic_cas_int32(int32_t *v, int32_t old, int32_t _new);
+ATOMIC_INLINE int32_t atomic_load_int32(const int32_t *v);
+ATOMIC_INLINE void atomic_store_int32(int32_t *p, int32_t v);
ATOMIC_INLINE int32_t atomic_fetch_and_add_int32(int32_t *p, int32_t x);
ATOMIC_INLINE int32_t atomic_fetch_and_or_int32(int32_t *p, int32_t x);
@@ -104,6 +112,8 @@ ATOMIC_INLINE size_t atomic_sub_and_fetch_z(size_t *p, size_t x);
ATOMIC_INLINE size_t atomic_fetch_and_add_z(size_t *p, size_t x);
ATOMIC_INLINE size_t atomic_fetch_and_sub_z(size_t *p, size_t x);
ATOMIC_INLINE size_t atomic_cas_z(size_t *v, size_t old, size_t _new);
+ATOMIC_INLINE size_t atomic_load_z(const size_t *v);
+ATOMIC_INLINE void atomic_store_z(size_t *p, size_t v);
/* Uses CAS loop, see warning below. */
ATOMIC_INLINE size_t atomic_fetch_and_update_max_z(size_t *p, size_t x);
diff --git a/intern/atomic/intern/atomic_ops_ext.h b/intern/atomic/intern/atomic_ops_ext.h
index aedf0985169..6ecc47f18be 100644
--- a/intern/atomic/intern/atomic_ops_ext.h
+++ b/intern/atomic/intern/atomic_ops_ext.h
@@ -102,6 +102,24 @@ ATOMIC_INLINE size_t atomic_cas_z(size_t *v, size_t old, size_t _new)
#endif
}
+ATOMIC_INLINE size_t atomic_load_z(const size_t *v)
+{
+#if (LG_SIZEOF_PTR == 8)
+ return (size_t)atomic_load_uint64((const uint64_t *)v);
+#elif (LG_SIZEOF_PTR == 4)
+ return (size_t)atomic_load_uint32((const uint32_t *)v);
+#endif
+}
+
+ATOMIC_INLINE void atomic_store_z(size_t *p, size_t v)
+{
+#if (LG_SIZEOF_PTR == 8)
+ atomic_store_uint64((uint64_t *)p, v);
+#elif (LG_SIZEOF_PTR == 4)
+ atomic_store_uint32((uint32_t *)p, v);
+#endif
+}
+
ATOMIC_INLINE size_t atomic_fetch_and_update_max_z(size_t *p, size_t x)
{
size_t prev_value;
diff --git a/intern/atomic/intern/atomic_ops_msvc.h b/intern/atomic/intern/atomic_ops_msvc.h
index ea5ae666db9..e65691d3970 100644
--- a/intern/atomic/intern/atomic_ops_msvc.h
+++ b/intern/atomic/intern/atomic_ops_msvc.h
@@ -49,6 +49,16 @@
# pragma GCC diagnostic ignored "-Wincompatible-pointer-types"
#endif
+/* TODO(sergey): On x64 platform both read and write of a variable aligned to its type size is
+ * atomic, so in theory it is possible to avoid memory barrier and gain performance. The downside
+ * of that would be that it will impose requirement to value which is being operated on. */
+#define __atomic_impl_load_generic(v) (MemoryBarrier(), *(v))
+#define __atomic_impl_store_generic(p, v) \
+ do { \
+ *(p) = (v); \
+ MemoryBarrier(); \
+ } while (0)
+
/* 64-bit operations. */
/* Unsigned */
ATOMIC_INLINE uint64_t atomic_add_and_fetch_uint64(uint64_t *p, uint64_t x)
@@ -66,6 +76,16 @@ ATOMIC_INLINE uint64_t atomic_cas_uint64(uint64_t *v, uint64_t old, uint64_t _ne
return InterlockedCompareExchange64((int64_t *)v, _new, old);
}
+ATOMIC_INLINE uint64_t atomic_load_uint64(const uint64_t *v)
+{
+ return __atomic_impl_load_generic(v);
+}
+
+ATOMIC_INLINE void atomic_store_uint64(uint64_t *p, uint64_t v)
+{
+ __atomic_impl_store_generic(p, v);
+}
+
ATOMIC_INLINE uint64_t atomic_fetch_and_add_uint64(uint64_t *p, uint64_t x)
{
return InterlockedExchangeAdd64((int64_t *)p, (int64_t)x);
@@ -92,6 +112,16 @@ ATOMIC_INLINE int64_t atomic_cas_int64(int64_t *v, int64_t old, int64_t _new)
return InterlockedCompareExchange64(v, _new, old);
}
+ATOMIC_INLINE int64_t atomic_load_int64(const int64_t *v)
+{
+ return __atomic_impl_load_generic(v);
+}
+
+ATOMIC_INLINE void atomic_store_int64(int64_t *p, int64_t v)
+{
+ __atomic_impl_store_generic(p, v);
+}
+
ATOMIC_INLINE int64_t atomic_fetch_and_add_int64(int64_t *p, int64_t x)
{
return InterlockedExchangeAdd64(p, x);
@@ -120,6 +150,16 @@ ATOMIC_INLINE uint32_t atomic_cas_uint32(uint32_t *v, uint32_t old, uint32_t _ne
return InterlockedCompareExchange((long *)v, _new, old);
}
+ATOMIC_INLINE uint32_t atomic_load_uint32(const uint32_t *v)
+{
+ return __atomic_impl_load_generic(v);
+}
+
+ATOMIC_INLINE void atomic_store_uint32(uint32_t *p, uint32_t v)
+{
+ __atomic_impl_store_generic(p, v);
+}
+
ATOMIC_INLINE uint32_t atomic_fetch_and_add_uint32(uint32_t *p, uint32_t x)
{
return InterlockedExchangeAdd(p, x);
@@ -151,6 +191,16 @@ ATOMIC_INLINE int32_t atomic_cas_int32(int32_t *v, int32_t old, int32_t _new)
return InterlockedCompareExchange((long *)v, _new, old);
}
+ATOMIC_INLINE int32_t atomic_load_int32(const int32_t *v)
+{
+ return __atomic_impl_load_generic(v);
+}
+
+ATOMIC_INLINE void atomic_store_int32(int32_t *p, int32_t v)
+{
+ __atomic_impl_store_generic(p, v);
+}
+
ATOMIC_INLINE int32_t atomic_fetch_and_add_int32(int32_t *p, int32_t x)
{
return InterlockedExchangeAdd((long *)p, x);
@@ -225,6 +275,9 @@ ATOMIC_INLINE int8_t atomic_fetch_and_or_int8(int8_t *p, int8_t b)
#endif
}
+#undef __atomic_impl_load_generic
+#undef __atomic_impl_store_generic
+
#if defined(__clang__)
# pragma GCC diagnostic pop
#endif
diff --git a/intern/atomic/intern/atomic_ops_unix.h b/intern/atomic/intern/atomic_ops_unix.h
index 2fcfe34d03c..8c703fc4a8d 100644
--- a/intern/atomic/intern/atomic_ops_unix.h
+++ b/intern/atomic/intern/atomic_ops_unix.h
@@ -98,6 +98,22 @@ ATOMIC_INLINE void atomic_spin_unlock(volatile AtomicSpinLock *lock)
/** \} */
+/* -------------------------------------------------------------------- */
+/** \name Common part of x64 implementation
+ * \{ */
+
+/* TODO(sergey): On x64 platform both read and write of a variable aligned to its type size is
+ * atomic, so in theory it is possible to avoid memory barrier and gain performance. The downside
+ * of that would be that it will impose requirement to value which is being operated on. */
+#define __atomic_impl_load_generic(v) (__sync_synchronize(), *(v))
+#define __atomic_impl_store_generic(p, v) \
+ do { \
+ *(p) = (v); \
+ __sync_synchronize(); \
+ } while (0)
+
+/** \} */
+
/* -------------------------------------------------------------------- */
/** \name Common part of locking fallback implementation
* \{ */
@@ -158,6 +174,23 @@ static _ATOMIC_MAYBE_UNUSED AtomicSpinLock _atomic_global_lock = {0};
return original_value; \
}
+#define ATOMIC_LOCKING_LOAD_DEFINE(_type) \
+ ATOMIC_INLINE _type##_t atomic_load_##_type(const _type##_t *v) \
+ { \
+ atomic_spin_lock(&_atomic_global_lock); \
+ const _type##_t value = *v; \
+ atomic_spin_unlock(&_atomic_global_lock); \
+ return value; \
+ }
+
+#define ATOMIC_LOCKING_STORE_DEFINE(_type) \
+ ATOMIC_INLINE void atomic_store_##_type(_type##_t *p, const _type##_t v) \
+ { \
+ atomic_spin_lock(&_atomic_global_lock); \
+ *p = v; \
+ atomic_spin_unlock(&_atomic_global_lock); \
+ }
+
/** \} */
/* -------------------------------------------------------------------- */
@@ -192,6 +225,16 @@ ATOMIC_INLINE uint64_t atomic_cas_uint64(uint64_t *v, uint64_t old, uint64_t _ne
return __sync_val_compare_and_swap(v, old, _new);
}
+ATOMIC_INLINE uint64_t atomic_load_uint64(const uint64_t *v)
+{
+ return __atomic_load_n(v, __ATOMIC_SEQ_CST);
+}
+
+ATOMIC_INLINE void atomic_store_uint64(uint64_t *p, uint64_t v)
+{
+ __atomic_store(p, &v, __ATOMIC_SEQ_CST);
+}
+
/* Signed */
ATOMIC_INLINE int64_t atomic_add_and_fetch_int64(int64_t *p, int64_t x)
{
@@ -218,6 +261,16 @@ ATOMIC_INLINE int64_t atomic_cas_int64(int64_t *v, int64_t old, int64_t _new)
return __sync_val_compare_and_swap(v, old, _new);
}
+ATOMIC_INLINE int64_t atomic_load_int64(const int64_t *v)
+{
+ return __atomic_load_n(v, __ATOMIC_SEQ_CST);
+}
+
+ATOMIC_INLINE void atomic_store_int64(int64_t *p, int64_t v)
+{
+ __atomic_store(p, &v, __ATOMIC_SEQ_CST);
+}
+
#elif !defined(ATOMIC_FORCE_USE_FALLBACK) && (defined(__amd64__) || defined(__x86_64__))
/* Unsigned */
ATOMIC_INLINE uint64_t atomic_fetch_and_add_uint64(uint64_t *p, uint64_t x)
@@ -256,6 +309,16 @@ ATOMIC_INLINE uint64_t atomic_cas_uint64(uint64_t *v, uint64_t old, uint64_t _ne
return ret;
}
+ATOMIC_INLINE uint64_t atomic_load_uint64(const uint64_t *v)
+{
+ return __atomic_impl_load_generic(v);
+}
+
+ATOMIC_INLINE void atomic_store_uint64(uint64_t *p, uint64_t v)
+{
+ __atomic_impl_store_generic(p, v);
+}
+
/* Signed */
ATOMIC_INLINE int64_t atomic_fetch_and_add_int64(int64_t *p, int64_t x)
{
@@ -292,6 +355,17 @@ ATOMIC_INLINE int64_t atomic_cas_int64(int64_t *v, int64_t old, int
@@ Diff output truncated at 10240 characters. @@
More information about the Bf-blender-cvs
mailing list