From: Olivier Dion Date: Tue, 22 Aug 2023 20:23:17 +0000 (-0400) Subject: uatomic/x86: Remove redundant memory barriers X-Git-Url: https://git.lttng.org./?a=commitdiff_plain;h=ae5712d110b720768dcc30522171506a4b7f28a4;p=userspace-rcu.git uatomic/x86: Remove redundant memory barriers When liburcu is configured to _not_ use atomic builtins, the implementation of atomic operations is done using inline assembler for each architecture. Because we control the emitted assembler, we know whether specific operations (e.g. lock; cmpxchg) already have an implicit memory barrier. In those cases, emitting an explicit cmm_smp_mb() before/after the operation is redundant and hurts performance. Remove those redundant barriers on x86. Change-Id: Ic1f6cfe9c2afe250946549cf6187f8fa88f5b009 Signed-off-by: Olivier Dion Signed-off-by: Mathieu Desnoyers --- diff --git a/include/urcu/uatomic/generic.h b/include/urcu/uatomic/generic.h index 8f8c437..ed655bb 100644 --- a/include/urcu/uatomic/generic.h +++ b/include/urcu/uatomic/generic.h @@ -15,7 +15,6 @@ */ #include -#include #include #include @@ -27,125 +26,61 @@ extern "C" { #define uatomic_set(addr, v) ((void) CMM_STORE_SHARED(*(addr), (v))) #endif -#define uatomic_load_store_return_op(op, addr, v, mo) \ - __extension__ \ - ({ \ - \ - switch (mo) { \ - case CMM_ACQUIRE: \ - case CMM_CONSUME: \ - case CMM_RELAXED: \ - break; \ - case CMM_RELEASE: \ - case CMM_ACQ_REL: \ - case CMM_SEQ_CST: \ - case CMM_SEQ_CST_FENCE: \ - cmm_smp_mb(); \ - break; \ - default: \ - abort(); \ - } \ - \ - __typeof__((*addr)) _value = op(addr, v); \ - \ - switch (mo) { \ - case CMM_CONSUME: \ - cmm_smp_read_barrier_depends(); \ - break; \ - case CMM_ACQUIRE: \ - case CMM_ACQ_REL: \ - case CMM_SEQ_CST: \ - case CMM_SEQ_CST_FENCE: \ - cmm_smp_mb(); \ - break; \ - case CMM_RELAXED: \ - case CMM_RELEASE: \ - break; \ - default: \ - abort(); \ - } \ - _value; \ +/* + * Can be defined for the architecture. + * + * What needs to be emitted _before_ the `operation' with memory ordering `mo'. + */ +#ifndef _cmm_compat_c11_smp_mb__before_mo +# define _cmm_compat_c11_smp_mb__before_mo(operation, mo) cmm_smp_mb() +#endif + +/* + * Can be defined for the architecture. + * + * What needs to be emitted _after_ the `operation' with memory ordering `mo'. + */ +#ifndef _cmm_compat_c11_smp_mb__after_mo +# define _cmm_compat_c11_smp_mb__after_mo(operation, mo) cmm_smp_mb() +#endif + +#define uatomic_load_store_return_op(op, addr, v, mo) \ + __extension__ \ + ({ \ + _cmm_compat_c11_smp_mb__before_mo(op, mo); \ + __typeof__((*addr)) _value = op(addr, v); \ + _cmm_compat_c11_smp_mb__after_mo(op, mo); \ + \ + _value; \ }) -#define uatomic_load_store_op(op, addr, v, mo) \ - do { \ - switch (mo) { \ - case CMM_ACQUIRE: \ - case CMM_CONSUME: \ - case CMM_RELAXED: \ - break; \ - case CMM_RELEASE: \ - case CMM_ACQ_REL: \ - case CMM_SEQ_CST: \ - case CMM_SEQ_CST_FENCE: \ - cmm_smp_mb(); \ - break; \ - default: \ - abort(); \ - } \ - \ - op(addr, v); \ - \ - switch (mo) { \ - case CMM_CONSUME: \ - cmm_smp_read_barrier_depends(); \ - break; \ - case CMM_ACQUIRE: \ - case CMM_ACQ_REL: \ - case CMM_SEQ_CST: \ - case CMM_SEQ_CST_FENCE: \ - cmm_smp_mb(); \ - break; \ - case CMM_RELAXED: \ - case CMM_RELEASE: \ - break; \ - default: \ - abort(); \ - } \ +#define uatomic_load_store_op(op, addr, v, mo) \ + do { \ + _cmm_compat_c11_smp_mb__before_mo(op, mo); \ + op(addr, v); \ + _cmm_compat_c11_smp_mb__after_mo(op, mo); \ } while (0) -#define uatomic_store(addr, v, mo) \ - do { \ - switch (mo) { \ - case CMM_RELAXED: \ - break; \ - case CMM_RELEASE: \ - case CMM_SEQ_CST: \ - case CMM_SEQ_CST_FENCE: \ - cmm_smp_mb(); \ - break; \ - default: \ - abort(); \ - } \ - \ - uatomic_set(addr, v); \ - \ - switch (mo) { \ - case CMM_RELAXED: \ - case CMM_RELEASE: \ - break; \ - case CMM_SEQ_CST: \ - case CMM_SEQ_CST_FENCE: \ - cmm_smp_mb(); \ - break; \ - default: \ - abort(); \ - } \ +#define uatomic_store(addr, v, mo) \ + do { \ + _cmm_compat_c11_smp_mb__before_mo(uatomic_set, mo); \ + uatomic_set(addr, v); \ + _cmm_compat_c11_smp_mb__after_mo(uatomic_set, mo); \ } while (0) -#define uatomic_and_mo(addr, v, mo) \ +#define uatomic_and_mo(addr, v, mo) \ uatomic_load_store_op(uatomic_and, addr, v, mo) -#define uatomic_or_mo(addr, v, mo) \ +#define uatomic_or_mo(addr, v, mo) \ uatomic_load_store_op(uatomic_or, addr, v, mo) -#define uatomic_add_mo(addr, v, mo) \ +#define uatomic_add_mo(addr, v, mo) \ uatomic_load_store_op(uatomic_add, addr, v, mo) -#define uatomic_sub_mo(addr, v, mo) \ +#define uatomic_sub_mo(addr, v, mo) \ uatomic_load_store_op(uatomic_sub, addr, v, mo) -#define uatomic_inc_mo(addr, mo) \ +#define uatomic_inc_mo(addr, mo) \ uatomic_load_store_op(uatomic_add, addr, 1, mo) #define uatomic_dec_mo(addr, mo) \ @@ -157,58 +92,14 @@ extern "C" { #define uatomic_cmpxchg_mo(addr, old, new, mos, mof) \ __extension__ \ ({ \ - switch (mos) { \ - case CMM_ACQUIRE: \ - case CMM_CONSUME: \ - case CMM_RELAXED: \ - break; \ - case CMM_RELEASE: \ - case CMM_ACQ_REL: \ - case CMM_SEQ_CST: \ - case CMM_SEQ_CST_FENCE: \ - cmm_smp_mb(); \ - break; \ - default: \ - abort(); \ - } \ - \ + _cmm_compat_c11_smp_mb__before_mo(uatomic_cmpxchg, mos); \ __typeof__(*(addr)) _value = uatomic_cmpxchg(addr, old, \ new); \ \ if (_value == (old)) { \ - switch (mos) { \ - case CMM_CONSUME: \ - cmm_smp_read_barrier_depends(); \ - break; \ - case CMM_ACQUIRE: \ - case CMM_ACQ_REL: \ - case CMM_SEQ_CST: \ - case CMM_SEQ_CST_FENCE: \ - cmm_smp_mb(); \ - break; \ - case CMM_RELAXED: \ - case CMM_RELEASE: \ - break; \ - default: \ - abort(); \ - } \ + _cmm_compat_c11_smp_mb__after_mo(uatomic_cmpxchg, mos); \ } else { \ - switch (mof) { \ - case CMM_CONSUME: \ - cmm_smp_read_barrier_depends(); \ - break; \ - case CMM_ACQUIRE: \ - case CMM_ACQ_REL: \ - case CMM_SEQ_CST: \ - case CMM_SEQ_CST_FENCE: \ - cmm_smp_mb(); \ - break; \ - case CMM_RELAXED: \ - case CMM_RELEASE: \ - break; \ - default: \ - abort(); \ - } \ + _cmm_compat_c11_smp_mb__after_mo(uatomic_cmpxchg, mof); \ } \ _value; \ }) @@ -222,7 +113,6 @@ extern "C" { #define uatomic_sub_return_mo(addr, v, mo) \ uatomic_load_store_return_op(uatomic_sub_return, addr, v) - #ifndef uatomic_read #define uatomic_read(addr) CMM_LOAD_SHARED(*(addr)) #endif @@ -230,35 +120,9 @@ extern "C" { #define uatomic_load(addr, mo) \ __extension__ \ ({ \ - switch (mo) { \ - case CMM_ACQUIRE: \ - case CMM_CONSUME: \ - case CMM_RELAXED: \ - break; \ - case CMM_SEQ_CST: \ - case CMM_SEQ_CST_FENCE: \ - cmm_smp_mb(); \ - break; \ - default: \ - abort(); \ - } \ - \ + _cmm_compat_c11_smp_mb__before_mo(uatomic_read, mo); \ __typeof__(*(addr)) _rcu_value = uatomic_read(addr); \ - \ - switch (mo) { \ - case CMM_RELAXED: \ - break; \ - case CMM_CONSUME: \ - cmm_smp_read_barrier_depends(); \ - break; \ - case CMM_ACQUIRE: \ - case CMM_SEQ_CST: \ - case CMM_SEQ_CST_FENCE: \ - cmm_smp_mb(); \ - break; \ - default: \ - abort(); \ - } \ + _cmm_compat_c11_smp_mb__after_mo(uatomic_read, mo); \ \ _rcu_value; \ }) diff --git a/include/urcu/uatomic/x86.h b/include/urcu/uatomic/x86.h index b5725e0..616eee9 100644 --- a/include/urcu/uatomic/x86.h +++ b/include/urcu/uatomic/x86.h @@ -8,6 +8,8 @@ #ifndef _URCU_ARCH_UATOMIC_X86_H #define _URCU_ARCH_UATOMIC_X86_H +#include /* For abort(3). */ + /* * Code inspired from libuatomic_ops-1.2, inherited in part from the * Boehm-Demers-Weiser conservative garbage collector. @@ -630,6 +632,474 @@ extern unsigned long _compat_uatomic_add_return(void *addr, #define cmm_smp_mb__before_uatomic_dec() cmm_barrier() #define cmm_smp_mb__after_uatomic_dec() cmm_barrier() +static inline void _cmm_compat_c11_smp_mb__before_uatomic_read_mo(enum cmm_memorder mo) +{ + /* + * A SMP barrier is not necessary for CMM_SEQ_CST because, only a + * previous store can be reordered with the load. However, emitting the + * memory barrier after the store is sufficient to prevent reordering + * between the two. This follows toolchains decision of emitting the + * memory fence on the stores instead of the loads. + * + * A compiler barrier is necessary because the underlying operation does + * not clobber the registers. + */ + switch (mo) { + case CMM_RELAXED: /* Fall-through */ + case CMM_ACQUIRE: /* Fall-through */ + case CMM_CONSUME: /* Fall-through */ + case CMM_SEQ_CST: /* Fall-through */ + case CMM_SEQ_CST_FENCE: + cmm_barrier(); + break; + case CMM_ACQ_REL: /* Fall-through */ + case CMM_RELEASE: /* Fall-through */ + default: + abort(); + break; + } +} + +static inline void _cmm_compat_c11_smp_mb__after_uatomic_read_mo(enum cmm_memorder mo) +{ + /* + * A SMP barrier is not necessary for CMM_SEQ_CST because following + * loads and stores cannot be reordered with the load. + * + * A SMP barrier is however necessary for CMM_SEQ_CST_FENCE to respect + * the memory model, since the underlying operation does not have a lock + * prefix. + * + * A compiler barrier is necessary because the underlying operation does + * not clobber the registers. + */ + switch (mo) { + case CMM_SEQ_CST_FENCE: + cmm_smp_mb(); + break; + case CMM_RELAXED: /* Fall-through */ + case CMM_ACQUIRE: /* Fall-through */ + case CMM_CONSUME: /* Fall-through */ + case CMM_SEQ_CST: + cmm_barrier(); + break; + case CMM_ACQ_REL: /* Fall-through */ + case CMM_RELEASE: /* Fall-through */ + default: + abort(); + break; + } +} + +static inline void _cmm_compat_c11_smp_mb__before_uatomic_set_mo(enum cmm_memorder mo) +{ + /* + * A SMP barrier is not necessary for CMM_SEQ_CST because the store can + * only be reodered with later loads + * + * A compiler barrier is necessary because the underlying operation does + * not clobber the registers. + */ + switch (mo) { + case CMM_RELAXED: /* Fall-through */ + case CMM_RELEASE: /* Fall-through */ + case CMM_SEQ_CST: /* Fall-through */ + case CMM_SEQ_CST_FENCE: + cmm_barrier(); + break; + case CMM_ACQ_REL: /* Fall-through */ + case CMM_ACQUIRE: /* Fall-through */ + case CMM_CONSUME: /* Fall-through */ + default: + abort(); + break; + } +} + +static inline void _cmm_compat_c11_smp_mb__after_uatomic_set_mo(enum cmm_memorder mo) +{ + /* + * A SMP barrier is necessary for CMM_SEQ_CST because the store can be + * reorded with later loads. Since no memory barrier is being emitted + * before loads, one has to be emitted after the store. This follows + * toolchains decision of emitting the memory fence on the stores instead + * of the loads. + * + * A SMP barrier is necessary for CMM_SEQ_CST_FENCE to respect the + * memory model, since the underlying store does not have a lock prefix. + * + * A compiler barrier is necessary because the underlying operation does + * not clobber the registers. + */ + switch (mo) { + case CMM_SEQ_CST: /* Fall-through */ + case CMM_SEQ_CST_FENCE: + cmm_smp_mb(); + break; + case CMM_RELAXED: /* Fall-through */ + case CMM_RELEASE: + cmm_barrier(); + break; + case CMM_ACQ_REL: /* Fall-through */ + case CMM_ACQUIRE: /* Fall-through */ + case CMM_CONSUME: /* Fall-through */ + default: + abort(); + break; + } +} + +static inline void _cmm_compat_c11_smp_mb__before_uatomic_xchg_mo(enum cmm_memorder mo) +{ + /* NOP. uatomic_xchg has implicit lock prefix. */ + switch (mo) { + case CMM_RELAXED: /* Fall-through */ + case CMM_ACQUIRE: /* Fall-through */ + case CMM_CONSUME: /* Fall-through */ + case CMM_RELEASE: /* Fall-through */ + case CMM_ACQ_REL: /* Fall-through */ + case CMM_SEQ_CST: /* Fall-through */ + case CMM_SEQ_CST_FENCE: + break; + default: + abort(); + } +} + +static inline void _cmm_compat_c11_smp_mb__after_uatomic_xchg_mo(enum cmm_memorder mo) +{ + /* NOP. uatomic_xchg has implicit lock prefix. */ + switch (mo) { + case CMM_RELAXED: /* Fall-through */ + case CMM_ACQUIRE: /* Fall-through */ + case CMM_CONSUME: /* Fall-through */ + case CMM_RELEASE: /* Fall-through */ + case CMM_ACQ_REL: /* Fall-through */ + case CMM_SEQ_CST: /* Fall-through */ + case CMM_SEQ_CST_FENCE: + break; + default: + abort(); + } +} + +static inline void _cmm_compat_c11_smp_mb__before_uatomic_cmpxchg_mo(enum cmm_memorder mo) +{ + /* NOP. uatomic_cmpxchg has implicit lock prefix. */ + switch (mo) { + case CMM_RELAXED: /* Fall-through */ + case CMM_ACQUIRE: /* Fall-through */ + case CMM_CONSUME: /* Fall-through */ + case CMM_RELEASE: /* Fall-through */ + case CMM_ACQ_REL: /* Fall-through */ + case CMM_SEQ_CST: /* Fall-through */ + case CMM_SEQ_CST_FENCE: + break; + default: + abort(); + } +} + +static inline void _cmm_compat_c11_smp_mb__after_uatomic_cmpxchg_mo(enum cmm_memorder mo) +{ + /* NOP. uatomic_cmpxchg has implicit lock prefix. */ + switch (mo) { + case CMM_RELAXED: /* Fall-through */ + case CMM_ACQUIRE: /* Fall-through */ + case CMM_CONSUME: /* Fall-through */ + case CMM_RELEASE: /* Fall-through */ + case CMM_ACQ_REL: /* Fall-through */ + case CMM_SEQ_CST: /* Fall-through */ + case CMM_SEQ_CST_FENCE: + break; + default: + abort(); + } +} + +static inline void _cmm_compat_c11_smp_mb__before_uatomic_and_mo(enum cmm_memorder mo) +{ + /* NOP. uatomic_and has explicit lock prefix. */ + switch (mo) { + case CMM_RELAXED: /* Fall-through */ + case CMM_ACQUIRE: /* Fall-through */ + case CMM_CONSUME: /* Fall-through */ + case CMM_RELEASE: /* Fall-through */ + case CMM_ACQ_REL: /* Fall-through */ + case CMM_SEQ_CST: /* Fall-through */ + case CMM_SEQ_CST_FENCE: + break; + default: + abort(); + } +} + +static inline void _cmm_compat_c11_smp_mb__after_uatomic_and_mo(enum cmm_memorder mo) +{ + /* NOP. uatomic_and has explicit lock prefix. */ + switch (mo) { + case CMM_RELAXED: /* Fall-through */ + case CMM_ACQUIRE: /* Fall-through */ + case CMM_CONSUME: /* Fall-through */ + case CMM_RELEASE: /* Fall-through */ + case CMM_ACQ_REL: /* Fall-through */ + case CMM_SEQ_CST: /* Fall-through */ + case CMM_SEQ_CST_FENCE: + break; + default: + abort(); + } +} + +static inline void _cmm_compat_c11_smp_mb__before_uatomic_or_mo(enum cmm_memorder mo) +{ + /* NOP. uatomic_or has explicit lock prefix. */ + switch (mo) { + case CMM_RELAXED: /* Fall-through */ + case CMM_ACQUIRE: /* Fall-through */ + case CMM_CONSUME: /* Fall-through */ + case CMM_RELEASE: /* Fall-through */ + case CMM_ACQ_REL: /* Fall-through */ + case CMM_SEQ_CST: /* Fall-through */ + case CMM_SEQ_CST_FENCE: + break; + default: + abort(); + } +} + +static inline void _cmm_compat_c11_smp_mb__after_uatomic_or_mo(enum cmm_memorder mo) +{ + /* NOP. uatomic_or has explicit lock prefix. */ + switch (mo) { + case CMM_RELAXED: /* Fall-through */ + case CMM_ACQUIRE: /* Fall-through */ + case CMM_CONSUME: /* Fall-through */ + case CMM_RELEASE: /* Fall-through */ + case CMM_ACQ_REL: /* Fall-through */ + case CMM_SEQ_CST: /* Fall-through */ + case CMM_SEQ_CST_FENCE: + break; + default: + abort(); + } +} + +static inline void _cmm_compat_c11_smp_mb__before_uatomic_add_mo(enum cmm_memorder mo) +{ + /* NOP. uatomic_add has explicit lock prefix. */ + switch (mo) { + case CMM_RELAXED: /* Fall-through */ + case CMM_ACQUIRE: /* Fall-through */ + case CMM_CONSUME: /* Fall-through */ + case CMM_RELEASE: /* Fall-through */ + case CMM_ACQ_REL: /* Fall-through */ + case CMM_SEQ_CST: /* Fall-through */ + case CMM_SEQ_CST_FENCE: + break; + default: + abort(); + } +} + +static inline void _cmm_compat_c11_smp_mb__after_uatomic_add_mo(enum cmm_memorder mo) +{ + /* NOP. uatomic_add has explicit lock prefix. */ + switch (mo) { + case CMM_RELAXED: /* Fall-through */ + case CMM_ACQUIRE: /* Fall-through */ + case CMM_CONSUME: /* Fall-through */ + case CMM_RELEASE: /* Fall-through */ + case CMM_ACQ_REL: /* Fall-through */ + case CMM_SEQ_CST: /* Fall-through */ + case CMM_SEQ_CST_FENCE: + break; + default: + abort(); + } +} + +static inline void _cmm_compat_c11_smp_mb__before_uatomic_sub_mo(enum cmm_memorder mo) +{ + /* NOP. uatomic_sub has explicit lock prefix. */ + switch (mo) { + case CMM_RELAXED: /* Fall-through */ + case CMM_ACQUIRE: /* Fall-through */ + case CMM_CONSUME: /* Fall-through */ + case CMM_RELEASE: /* Fall-through */ + case CMM_ACQ_REL: /* Fall-through */ + case CMM_SEQ_CST: /* Fall-through */ + case CMM_SEQ_CST_FENCE: + break; + default: + abort(); + } +} + +static inline void _cmm_compat_c11_smp_mb__after_uatomic_sub_mo(enum cmm_memorder mo) +{ + /* NOP. uatomic_sub has explicit lock prefix. */ + switch (mo) { + case CMM_RELAXED: /* Fall-through */ + case CMM_ACQUIRE: /* Fall-through */ + case CMM_CONSUME: /* Fall-through */ + case CMM_RELEASE: /* Fall-through */ + case CMM_ACQ_REL: /* Fall-through */ + case CMM_SEQ_CST: /* Fall-through */ + case CMM_SEQ_CST_FENCE: + break; + default: + abort(); + } +} + +static inline void _cmm_compat_c11_smp_mb__before_uatomic_inc_mo(enum cmm_memorder mo) +{ + /* NOP. uatomic_inc has explicit lock prefix. */ + switch (mo) { + case CMM_RELAXED: /* Fall-through */ + case CMM_ACQUIRE: /* Fall-through */ + case CMM_CONSUME: /* Fall-through */ + case CMM_RELEASE: /* Fall-through */ + case CMM_ACQ_REL: /* Fall-through */ + case CMM_SEQ_CST: /* Fall-through */ + case CMM_SEQ_CST_FENCE: + break; + default: + abort(); + } +} + +static inline void _cmm_compat_c11_smp_mb__after_uatomic_inc_mo(enum cmm_memorder mo) +{ + /* NOP. uatomic_inc has explicit lock prefix. */ + switch (mo) { + case CMM_RELAXED: /* Fall-through */ + case CMM_ACQUIRE: /* Fall-through */ + case CMM_CONSUME: /* Fall-through */ + case CMM_RELEASE: /* Fall-through */ + case CMM_ACQ_REL: /* Fall-through */ + case CMM_SEQ_CST: /* Fall-through */ + case CMM_SEQ_CST_FENCE: + break; + default: + abort(); + } +} + +static inline void _cmm_compat_c11_smp_mb__before_uatomic_dec_mo(enum cmm_memorder mo) +{ + /* NOP. uatomic_dec has explicit lock prefix. */ + switch (mo) { + case CMM_RELAXED: /* Fall-through */ + case CMM_ACQUIRE: /* Fall-through */ + case CMM_CONSUME: /* Fall-through */ + case CMM_RELEASE: /* Fall-through */ + case CMM_ACQ_REL: /* Fall-through */ + case CMM_SEQ_CST: /* Fall-through */ + case CMM_SEQ_CST_FENCE: + break; + default: + abort(); + } +} + +static inline void _cmm_compat_c11_smp_mb__after_uatomic_dec_mo(enum cmm_memorder mo) +{ + /* NOP. uatomic_dec has explicit lock prefix. */ + switch (mo) { + case CMM_RELAXED: /* Fall-through */ + case CMM_ACQUIRE: /* Fall-through */ + case CMM_CONSUME: /* Fall-through */ + case CMM_RELEASE: /* Fall-through */ + case CMM_ACQ_REL: /* Fall-through */ + case CMM_SEQ_CST: /* Fall-through */ + case CMM_SEQ_CST_FENCE: + break; + default: + abort(); + } +} + +static inline void _cmm_compat_c11_smp_mb__before_uatomic_add_return_mo(enum cmm_memorder mo) +{ + /* NOP. uatomic_add_return has explicit lock prefix. */ + switch (mo) { + case CMM_RELAXED: /* Fall-through */ + case CMM_ACQUIRE: /* Fall-through */ + case CMM_CONSUME: /* Fall-through */ + case CMM_RELEASE: /* Fall-through */ + case CMM_ACQ_REL: /* Fall-through */ + case CMM_SEQ_CST: /* Fall-through */ + case CMM_SEQ_CST_FENCE: + break; + default: + abort(); + } +} + +static inline void _cmm_compat_c11_smp_mb__after_uatomic_add_return_mo(enum cmm_memorder mo) +{ + /* NOP. uatomic_add_return has explicit lock prefix. */ + switch (mo) { + case CMM_RELAXED: /* Fall-through */ + case CMM_ACQUIRE: /* Fall-through */ + case CMM_CONSUME: /* Fall-through */ + case CMM_RELEASE: /* Fall-through */ + case CMM_ACQ_REL: /* Fall-through */ + case CMM_SEQ_CST: /* Fall-through */ + case CMM_SEQ_CST_FENCE: + break; + default: + abort(); + } +} + +static inline void _cmm_compat_c11_smp_mb__before_uatomic_sub_return_mo(enum cmm_memorder mo) +{ + /* NOP. uatomic_sub_return has explicit lock prefix. */ + switch (mo) { + case CMM_RELAXED: /* Fall-through */ + case CMM_ACQUIRE: /* Fall-through */ + case CMM_CONSUME: /* Fall-through */ + case CMM_RELEASE: /* Fall-through */ + case CMM_ACQ_REL: /* Fall-through */ + case CMM_SEQ_CST: /* Fall-through */ + case CMM_SEQ_CST_FENCE: + break; + default: + abort(); + } +} + +static inline void _cmm_compat_c11_smp_mb__after_uatomic_sub_return_mo(enum cmm_memorder mo) +{ + /* NOP. uatomic_sub_return has explicit lock prefix. */ + switch (mo) { + case CMM_RELAXED: /* Fall-through */ + case CMM_ACQUIRE: /* Fall-through */ + case CMM_CONSUME: /* Fall-through */ + case CMM_RELEASE: /* Fall-through */ + case CMM_ACQ_REL: /* Fall-through */ + case CMM_SEQ_CST: /* Fall-through */ + case CMM_SEQ_CST_FENCE: + break; + default: + abort(); + } +} + +#define _cmm_compat_c11_smp_mb__before_mo(operation, mo) \ + do { \ + _cmm_compat_c11_smp_mb__before_ ## operation ## _mo (mo); \ + } while (0) + +#define _cmm_compat_c11_smp_mb__after_mo(operation, mo) \ + do { \ + _cmm_compat_c11_smp_mb__after_ ## operation ## _mo (mo); \ + } while (0) + + #ifdef __cplusplus } #endif