mirror of
https://github.com/postgres/postgres.git
synced 2025-04-27 22:56:53 +03:00
Several upcoming performance/scalability improvements require atomic operations. This new API avoids the need to splatter compiler and architecture dependent code over all the locations employing atomic ops. For several of the potential usages it'd be problematic to maintain both, a atomics using implementation and one using spinlocks or similar. In all likelihood one of the implementations would not get tested regularly under concurrency. To avoid that scenario the new API provides a automatic fallback of atomic operations to spinlocks. All properties of atomic operations are maintained. This fallback - obviously - isn't as fast as just using atomic ops, but it's not bad either. For one of the future users the atomics ontop spinlocks implementation was actually slightly faster than the old purely spinlock using implementation. That's important because it reduces the fear of regressing older platforms when improving the scalability for new ones. The API, loosely modeled after the C11 atomics support, currently provides 'atomic flags' and 32 bit unsigned integers. If the platform efficiently supports atomic 64 bit unsigned integers those are also provided. To implement atomics support for a platform/architecture/compiler for a type of atomics 32bit compare and exchange needs to be implemented. If available and more efficient native support for flags, 32 bit atomic addition, and corresponding 64 bit operations may also be provided. Additional useful atomic operations are implemented generically ontop of these. The implementation for various versions of gcc, msvc and sun studio have been tested. Additional existing stub implementations for * Intel icc * HUPX acc * IBM xlc are included but have never been tested. These will likely require fixes based on buildfarm and user feedback. As atomic operations also require barriers for some operations the existing barrier support has been moved into the atomics code. Author: Andres Freund with contributions from Oskari Saarenmaa Reviewed-By: Amit Kapila, Robert Haas, Heikki Linnakangas and Álvaro Herrera Discussion: CA+TgmoYBW+ux5-8Ja=Mcyuy8=VXAnVRHp3Kess6Pn3DMXAPAEA@mail.gmail.com, 20131015123303.GH5300@awork2.anarazel.de, 20131028205522.GI20248@awork2.anarazel.de
237 lines
7.3 KiB
C
237 lines
7.3 KiB
C
/*-------------------------------------------------------------------------
|
|
*
|
|
* generic-gcc.h
|
|
* Atomic operations, implemented using gcc (or compatible) intrinsics.
|
|
*
|
|
* Portions Copyright (c) 1996-2014, PostgreSQL Global Development Group
|
|
* Portions Copyright (c) 1994, Regents of the University of California
|
|
*
|
|
* NOTES:
|
|
*
|
|
* Documentation:
|
|
* * Legacy __sync Built-in Functions for Atomic Memory Access
|
|
* http://gcc.gnu.org/onlinedocs/gcc-4.8.2/gcc/_005f_005fsync-Builtins.html
|
|
* * Built-in functions for memory model aware atomic operations
|
|
* http://gcc.gnu.org/onlinedocs/gcc-4.8.2/gcc/_005f_005fatomic-Builtins.html
|
|
*
|
|
* src/include/port/atomics/generic-gcc.h
|
|
*
|
|
*-------------------------------------------------------------------------
|
|
*/
|
|
|
|
/* intentionally no include guards, should only be included by atomics.h */
|
|
#ifndef INSIDE_ATOMICS_H
|
|
#error "should be included via atomics.h"
|
|
#endif
|
|
|
|
/*
|
|
* icc provides all the same intrinsics but doesn't understand gcc's inline asm
|
|
*/
|
|
#if defined(__INTEL_COMPILER)
|
|
/* NB: Yes, __memory_barrier() is actually just a compiler barrier */
|
|
#define pg_compiler_barrier_impl() __memory_barrier()
|
|
#else
|
|
#define pg_compiler_barrier_impl() __asm__ __volatile__("" ::: "memory")
|
|
#endif
|
|
|
|
/*
|
|
* If we're on GCC 4.1.0 or higher, we should be able to get a memory barrier
|
|
* out of this compiler built-in. But we prefer to rely on platform specific
|
|
* definitions where possible, and use this only as a fallback.
|
|
*/
|
|
#if !defined(pg_memory_barrier_impl)
|
|
# if defined(HAVE_GCC__ATOMIC_INT64_CAS)
|
|
# define pg_memory_barrier_impl() __atomic_thread_fence(__ATOMIC_SEQ_CST)
|
|
# elif (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 1))
|
|
# define pg_memory_barrier_impl() __sync_synchronize()
|
|
# endif
|
|
#endif /* !defined(pg_memory_barrier_impl) */
|
|
|
|
#if !defined(pg_read_barrier_impl) && defined(HAVE_GCC__ATOMIC_INT64_CAS)
|
|
/* acquire semantics include read barrier semantics */
|
|
# define pg_read_barrier_impl() __atomic_thread_fence(__ATOMIC_ACQUIRE)
|
|
#endif
|
|
|
|
#if !defined(pg_write_barrier_impl) && defined(HAVE_GCC__ATOMIC_INT64_CAS)
|
|
/* release semantics include write barrier semantics */
|
|
# define pg_write_barrier_impl() __atomic_thread_fence(__ATOMIC_RELEASE)
|
|
#endif
|
|
|
|
#ifdef HAVE_ATOMICS
|
|
|
|
/* generic gcc based atomic flag implementation */
|
|
#if !defined(PG_HAVE_ATOMIC_FLAG_SUPPORT) \
|
|
&& (defined(HAVE_GCC__SYNC_INT32_TAS) || defined(HAVE_GCC__SYNC_CHAR_TAS))
|
|
|
|
#define PG_HAVE_ATOMIC_FLAG_SUPPORT
|
|
typedef struct pg_atomic_flag
|
|
{
|
|
/* some platforms only have a 8 bit wide TAS */
|
|
#ifdef HAVE_GCC__SYNC_CHAR_TAS
|
|
volatile char value;
|
|
#else
|
|
/* but an int works on more platforms */
|
|
volatile int value;
|
|
#endif
|
|
} pg_atomic_flag;
|
|
|
|
#endif /* !ATOMIC_FLAG_SUPPORT && SYNC_INT32_TAS */
|
|
|
|
/* generic gcc based atomic uint32 implementation */
|
|
#if !defined(PG_HAVE_ATOMIC_U32_SUPPORT) \
|
|
&& (defined(HAVE_GCC__ATOMIC_INT32_CAS) || defined(HAVE_GCC__SYNC_INT32_CAS))
|
|
|
|
#define PG_HAVE_ATOMIC_U32_SUPPORT
|
|
typedef struct pg_atomic_uint32
|
|
{
|
|
volatile uint32 value;
|
|
} pg_atomic_uint32;
|
|
|
|
#endif /* defined(HAVE_GCC__ATOMIC_INT32_CAS) || defined(HAVE_GCC__SYNC_INT32_CAS) */
|
|
|
|
/* generic gcc based atomic uint64 implementation */
|
|
#if !defined(PG_HAVE_ATOMIC_U64_SUPPORT) \
|
|
&& !defined(PG_DISABLE_64_BIT_ATOMICS) \
|
|
&& (defined(HAVE_GCC__ATOMIC_INT64_CAS) || defined(HAVE_GCC__SYNC_INT64_CAS))
|
|
|
|
#define PG_HAVE_ATOMIC_U64_SUPPORT
|
|
|
|
typedef struct pg_atomic_uint64
|
|
{
|
|
volatile uint64 value;
|
|
} pg_atomic_uint64;
|
|
|
|
#endif /* defined(HAVE_GCC__ATOMIC_INT64_CAS) || defined(HAVE_GCC__SYNC_INT64_CAS) */
|
|
|
|
/*
|
|
* Implementation follows. Inlined or directly included from atomics.c
|
|
*/
|
|
#if defined(PG_USE_INLINE) || defined(ATOMICS_INCLUDE_DEFINITIONS)
|
|
|
|
#if !defined(PG_HAVE_ATOMIC_TEST_SET_FLAG) && \
|
|
(defined(HAVE_GCC__SYNC_CHAR_TAS) || defined(HAVE_GCC__SYNC_INT32_TAS))
|
|
#define PG_HAVE_ATOMIC_TEST_SET_FLAG
|
|
static inline bool
|
|
pg_atomic_test_set_flag_impl(volatile pg_atomic_flag *ptr)
|
|
{
|
|
/* NB: only a acquire barrier, not a full one */
|
|
/* some platform only support a 1 here */
|
|
return __sync_lock_test_and_set(&ptr->value, 1) == 0;
|
|
}
|
|
#endif /* !defined(PG_HAVE_ATOMIC_TEST_SET_FLAG) && defined(HAVE_GCC__SYNC_*_TAS) */
|
|
|
|
#ifndef PG_HAVE_ATOMIC_UNLOCKED_TEST_FLAG
|
|
#define PG_HAVE_ATOMIC_UNLOCKED_TEST_FLAG
|
|
static inline bool
|
|
pg_atomic_unlocked_test_flag_impl(volatile pg_atomic_flag *ptr)
|
|
{
|
|
return ptr->value == 0;
|
|
}
|
|
#endif
|
|
|
|
#ifndef PG_HAVE_ATOMIC_CLEAR_FLAG
|
|
#define PG_HAVE_ATOMIC_CLEAR_FLAG
|
|
static inline void
|
|
pg_atomic_clear_flag_impl(volatile pg_atomic_flag *ptr)
|
|
{
|
|
/*
|
|
* XXX: It would be nicer to use __sync_lock_release here, but gcc insists
|
|
* on making that an atomic op which is far to expensive and a stronger
|
|
* guarantee than what we actually need.
|
|
*/
|
|
pg_write_barrier_impl();
|
|
ptr->value = 0;
|
|
}
|
|
#endif
|
|
|
|
#ifndef PG_HAVE_ATOMIC_INIT_FLAG
|
|
#define PG_HAVE_ATOMIC_INIT_FLAG
|
|
static inline void
|
|
pg_atomic_init_flag_impl(volatile pg_atomic_flag *ptr)
|
|
{
|
|
pg_atomic_clear_flag_impl(ptr);
|
|
}
|
|
#endif
|
|
|
|
/* prefer __atomic, it has a better API */
|
|
#if !defined(PG_HAVE_ATOMIC_COMPARE_EXCHANGE_U32) && defined(HAVE_GCC__ATOMIC_INT32_CAS)
|
|
#define PG_HAVE_ATOMIC_COMPARE_EXCHANGE_U32
|
|
static inline bool
|
|
pg_atomic_compare_exchange_u32_impl(volatile pg_atomic_uint32 *ptr,
|
|
uint32 *expected, uint32 newval)
|
|
{
|
|
/* FIXME: we can probably use a lower consistency model */
|
|
return __atomic_compare_exchange_n(&ptr->value, expected, newval, false,
|
|
__ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST);
|
|
}
|
|
#endif
|
|
|
|
#if !defined(PG_HAVE_ATOMIC_COMPARE_EXCHANGE_U32) && defined(HAVE_GCC__SYNC_INT32_CAS)
|
|
#define PG_HAVE_ATOMIC_COMPARE_EXCHANGE_U32
|
|
static inline bool
|
|
pg_atomic_compare_exchange_u32_impl(volatile pg_atomic_uint32 *ptr,
|
|
uint32 *expected, uint32 newval)
|
|
{
|
|
bool ret;
|
|
uint32 current;
|
|
current = __sync_val_compare_and_swap(&ptr->value, *expected, newval);
|
|
ret = current == *expected;
|
|
*expected = current;
|
|
return ret;
|
|
}
|
|
#endif
|
|
|
|
#if !defined(PG_HAVE_ATOMIC_FETCH_ADD_U32) && defined(HAVE_GCC__SYNC_INT32_CAS)
|
|
#define PG_HAVE_ATOMIC_FETCH_ADD_U32
|
|
static inline uint32
|
|
pg_atomic_fetch_add_u32_impl(volatile pg_atomic_uint32 *ptr, int32 add_)
|
|
{
|
|
return __sync_fetch_and_add(&ptr->value, add_);
|
|
}
|
|
#endif
|
|
|
|
|
|
#if !defined(PG_DISABLE_64_BIT_ATOMICS)
|
|
|
|
#if !defined(PG_HAVE_ATOMIC_COMPARE_EXCHANGE_U64) && defined(HAVE_GCC__ATOMIC_INT64_CAS)
|
|
#define PG_HAVE_ATOMIC_COMPARE_EXCHANGE_U64
|
|
static inline bool
|
|
pg_atomic_compare_exchange_u64_impl(volatile pg_atomic_uint64 *ptr,
|
|
uint64 *expected, uint64 newval)
|
|
{
|
|
return __atomic_compare_exchange_n(&ptr->value, expected, newval, false,
|
|
__ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST);
|
|
}
|
|
#endif
|
|
|
|
#if !defined(PG_HAVE_ATOMIC_COMPARE_EXCHANGE_U64) && defined(HAVE_GCC__SYNC_INT64_CAS)
|
|
#define PG_HAVE_ATOMIC_COMPARE_EXCHANGE_U64
|
|
static inline bool
|
|
pg_atomic_compare_exchange_u64_impl(volatile pg_atomic_uint64 *ptr,
|
|
uint64 *expected, uint64 newval)
|
|
{
|
|
bool ret;
|
|
uint64 current;
|
|
current = __sync_val_compare_and_swap(&ptr->value, *expected, newval);
|
|
ret = current == *expected;
|
|
*expected = current;
|
|
return ret;
|
|
}
|
|
#endif
|
|
|
|
#if !defined(PG_HAVE_ATOMIC_FETCH_ADD_U64) && defined(HAVE_GCC__SYNC_INT64_CAS)
|
|
#define PG_HAVE_ATOMIC_FETCH_ADD_U64
|
|
static inline uint64
|
|
pg_atomic_fetch_add_u64_impl(volatile pg_atomic_uint64 *ptr, int64 add_)
|
|
{
|
|
return __sync_fetch_and_add(&ptr->value, add_);
|
|
}
|
|
#endif
|
|
|
|
#endif /* !defined(PG_DISABLE_64_BIT_ATOMICS) */
|
|
|
|
#endif /* defined(PG_USE_INLINE) || defined(ATOMICS_INCLUDE_DEFINITIONS) */
|
|
|
|
#endif /* defined(HAVE_ATOMICS) */
|
|
|