+#ifndef _URCU_ARCH_UATOMIC_X86_H
+#define _URCU_ARCH_UATOMIC_X86_H
+
+/*
+ * Copyright (c) 1991-1994 by Xerox Corporation. All rights reserved.
+ * Copyright (c) 1996-1999 by Silicon Graphics. All rights reserved.
+ * Copyright (c) 1999-2004 Hewlett-Packard Development Company, L.P.
+ * Copyright (c) 2009 Mathieu Desnoyers
+ *
+ * THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY EXPRESSED
+ * OR IMPLIED. ANY USE IS AT YOUR OWN RISK.
+ *
+ * Permission is hereby granted to use or copy this program
+ * for any purpose, provided the above notices are retained on all copies.
+ * Permission to modify the code and to distribute modified code is granted,
+ * provided the above notices are retained, and a notice that the code was
+ * modified is included with the above copyright notice.
+ *
+ * Code inspired from libuatomic_ops-1.2, inherited in part from the
+ * Boehm-Demers-Weiser conservative garbage collector.
+ */
+
+#include <urcu/compiler.h>
+#include <urcu/system.h>
+
+#define UATOMIC_HAS_ATOMIC_BYTE
+#define UATOMIC_HAS_ATOMIC_SHORT
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*
+ * Derived from AO_compare_and_swap() and AO_test_and_set_full().
+ */
+
+struct __uatomic_dummy {
+ unsigned long v[10];
+};
+#define __hp(x) ((struct __uatomic_dummy *)(x))
+
+#define _uatomic_set(addr, v) CMM_STORE_SHARED(*(addr), (v))
+
+/* cmpxchg */
+
+static inline __attribute__((always_inline))
+unsigned long __uatomic_cmpxchg(void *addr, unsigned long old,
+ unsigned long _new, int len)
+{
+ switch (len) {
+ case 1:
+ {
+ unsigned char result = old;
+
+ __asm__ __volatile__(
+ "lock; cmpxchgb %2, %1"
+ : "+a"(result), "+m"(*__hp(addr))
+ : "q"((unsigned char)_new)
+ : "memory");
+ return result;
+ }
+ case 2:
+ {
+ unsigned short result = old;
+
+ __asm__ __volatile__(
+ "lock; cmpxchgw %2, %1"
+ : "+a"(result), "+m"(*__hp(addr))
+ : "r"((unsigned short)_new)
+ : "memory");
+ return result;
+ }
+ case 4:
+ {
+ unsigned int result = old;
+
+ __asm__ __volatile__(
+ "lock; cmpxchgl %2, %1"
+ : "+a"(result), "+m"(*__hp(addr))
+ : "r"((unsigned int)_new)
+ : "memory");
+ return result;
+ }
+#if (CAA_BITS_PER_LONG == 64)
+ case 8:
+ {
+ unsigned long result = old;
+
+ __asm__ __volatile__(
+ "lock; cmpxchgq %2, %1"
+ : "+a"(result), "+m"(*__hp(addr))
+ : "r"((unsigned long)_new)
+ : "memory");
+ return result;
+ }
+#endif
+ }
+ /* generate an illegal instruction. Cannot catch this with linker tricks
+ * when optimizations are disabled. */
+ __asm__ __volatile__("ud2");
+ return 0;
+}
+
+#define _uatomic_cmpxchg(addr, old, _new) \
+ ((__typeof__(*(addr))) __uatomic_cmpxchg((addr), (unsigned long)(old),\
+ (unsigned long)(_new), \
+ sizeof(*(addr))))
+
+/* xchg */
+
+static inline __attribute__((always_inline))
+unsigned long __uatomic_exchange(void *addr, unsigned long val, int len)
+{
+ /* Note: the "xchg" instruction does not need a "lock" prefix. */
+ switch (len) {
+ case 1:
+ {
+ unsigned char result;
+ __asm__ __volatile__(
+ "xchgb %0, %1"
+ : "=q"(result), "+m"(*__hp(addr))
+ : "0" ((unsigned char)val)
+ : "memory");
+ return result;
+ }
+ case 2:
+ {
+ unsigned short result;
+ __asm__ __volatile__(
+ "xchgw %0, %1"
+ : "=r"(result), "+m"(*__hp(addr))
+ : "0" ((unsigned short)val)
+ : "memory");
+ return result;
+ }
+ case 4:
+ {
+ unsigned int result;
+ __asm__ __volatile__(
+ "xchgl %0, %1"
+ : "=r"(result), "+m"(*__hp(addr))
+ : "0" ((unsigned int)val)
+ : "memory");
+ return result;
+ }
+#if (CAA_BITS_PER_LONG == 64)
+ case 8:
+ {
+ unsigned long result;
+ __asm__ __volatile__(
+ "xchgq %0, %1"
+ : "=r"(result), "+m"(*__hp(addr))
+ : "0" ((unsigned long)val)
+ : "memory");
+ return result;
+ }
+#endif
+ }
+ /* generate an illegal instruction. Cannot catch this with linker tricks
+ * when optimizations are disabled. */
+ __asm__ __volatile__("ud2");
+ return 0;
+}
+
+#define _uatomic_xchg(addr, v) \
+ ((__typeof__(*(addr))) __uatomic_exchange((addr), (unsigned long)(v), \
+ sizeof(*(addr))))
+
+/* uatomic_add_return */
+
+static inline __attribute__((always_inline))
+unsigned long __uatomic_add_return(void *addr, unsigned long val,
+ int len)
+{
+ switch (len) {
+ case 1:
+ {
+ unsigned char result = val;
+
+ __asm__ __volatile__(
+ "lock; xaddb %1, %0"
+ : "+m"(*__hp(addr)), "+q" (result)
+ :
+ : "memory");
+ return result + (unsigned char)val;
+ }
+ case 2:
+ {
+ unsigned short result = val;
+
+ __asm__ __volatile__(
+ "lock; xaddw %1, %0"
+ : "+m"(*__hp(addr)), "+r" (result)
+ :
+ : "memory");
+ return result + (unsigned short)val;
+ }
+ case 4:
+ {
+ unsigned int result = val;
+
+ __asm__ __volatile__(
+ "lock; xaddl %1, %0"
+ : "+m"(*__hp(addr)), "+r" (result)
+ :
+ : "memory");
+ return result + (unsigned int)val;
+ }
+#if (CAA_BITS_PER_LONG == 64)
+ case 8:
+ {
+ unsigned long result = val;
+
+ __asm__ __volatile__(
+ "lock; xaddq %1, %0"
+ : "+m"(*__hp(addr)), "+r" (result)
+ :
+ : "memory");
+ return result + (unsigned long)val;
+ }
+#endif
+ }
+ /* generate an illegal instruction. Cannot catch this with linker tricks
+ * when optimizations are disabled. */
+ __asm__ __volatile__("ud2");
+ return 0;
+}
+
+#define _uatomic_add_return(addr, v) \
+ ((__typeof__(*(addr))) __uatomic_add_return((addr), \
+ (unsigned long)(v), \
+ sizeof(*(addr))))
+
+/* uatomic_and */
+
+static inline __attribute__((always_inline))
+void __uatomic_and(void *addr, unsigned long val, int len)
+{
+ switch (len) {
+ case 1:
+ {
+ __asm__ __volatile__(
+ "lock; andb %1, %0"
+ : "=m"(*__hp(addr))
+ : "iq" ((unsigned char)val)
+ : "memory");
+ return;
+ }
+ case 2:
+ {
+ __asm__ __volatile__(
+ "lock; andw %1, %0"
+ : "=m"(*__hp(addr))
+ : "ir" ((unsigned short)val)
+ : "memory");
+ return;
+ }
+ case 4:
+ {
+ __asm__ __volatile__(
+ "lock; andl %1, %0"
+ : "=m"(*__hp(addr))
+ : "ir" ((unsigned int)val)
+ : "memory");
+ return;
+ }
+#if (CAA_BITS_PER_LONG == 64)
+ case 8:
+ {
+ __asm__ __volatile__(
+ "lock; andq %1, %0"
+ : "=m"(*__hp(addr))
+ : "er" ((unsigned long)val)
+ : "memory");
+ return;
+ }
+#endif
+ }
+ /* generate an illegal instruction. Cannot catch this with linker tricks
+ * when optimizations are disabled. */
+ __asm__ __volatile__("ud2");
+ return;
+}
+
+#define _uatomic_and(addr, v) \
+ (__uatomic_and((addr), (unsigned long)(v), sizeof(*(addr))))
+
+/* uatomic_or */
+
+static inline __attribute__((always_inline))
+void __uatomic_or(void *addr, unsigned long val, int len)
+{
+ switch (len) {
+ case 1:
+ {
+ __asm__ __volatile__(
+ "lock; orb %1, %0"
+ : "=m"(*__hp(addr))
+ : "iq" ((unsigned char)val)
+ : "memory");
+ return;
+ }
+ case 2:
+ {
+ __asm__ __volatile__(
+ "lock; orw %1, %0"
+ : "=m"(*__hp(addr))
+ : "ir" ((unsigned short)val)
+ : "memory");
+ return;
+ }
+ case 4:
+ {
+ __asm__ __volatile__(
+ "lock; orl %1, %0"
+ : "=m"(*__hp(addr))
+ : "ir" ((unsigned int)val)
+ : "memory");
+ return;
+ }
+#if (CAA_BITS_PER_LONG == 64)
+ case 8:
+ {
+ __asm__ __volatile__(
+ "lock; orq %1, %0"
+ : "=m"(*__hp(addr))
+ : "er" ((unsigned long)val)
+ : "memory");
+ return;
+ }
+#endif
+ }
+ /* generate an illegal instruction. Cannot catch this with linker tricks
+ * when optimizations are disabled. */
+ __asm__ __volatile__("ud2");
+ return;
+}
+
+#define _uatomic_or(addr, v) \
+ (__uatomic_or((addr), (unsigned long)(v), sizeof(*(addr))))
+
+/* uatomic_add */
+
+static inline __attribute__((always_inline))
+void __uatomic_add(void *addr, unsigned long val, int len)
+{
+ switch (len) {
+ case 1:
+ {
+ __asm__ __volatile__(
+ "lock; addb %1, %0"
+ : "=m"(*__hp(addr))
+ : "iq" ((unsigned char)val)
+ : "memory");
+ return;
+ }
+ case 2:
+ {
+ __asm__ __volatile__(
+ "lock; addw %1, %0"
+ : "=m"(*__hp(addr))
+ : "ir" ((unsigned short)val)
+ : "memory");
+ return;
+ }
+ case 4:
+ {
+ __asm__ __volatile__(
+ "lock; addl %1, %0"
+ : "=m"(*__hp(addr))
+ : "ir" ((unsigned int)val)
+ : "memory");
+ return;
+ }
+#if (CAA_BITS_PER_LONG == 64)
+ case 8:
+ {
+ __asm__ __volatile__(
+ "lock; addq %1, %0"
+ : "=m"(*__hp(addr))
+ : "er" ((unsigned long)val)
+ : "memory");
+ return;
+ }
+#endif
+ }
+ /* generate an illegal instruction. Cannot catch this with linker tricks
+ * when optimizations are disabled. */
+ __asm__ __volatile__("ud2");
+ return;
+}
+
+#define _uatomic_add(addr, v) \
+ (__uatomic_add((addr), (unsigned long)(v), sizeof(*(addr))))
+
+
+/* uatomic_inc */
+
+static inline __attribute__((always_inline))
+void __uatomic_inc(void *addr, int len)
+{
+ switch (len) {
+ case 1:
+ {
+ __asm__ __volatile__(
+ "lock; incb %0"
+ : "=m"(*__hp(addr))
+ :
+ : "memory");
+ return;
+ }
+ case 2:
+ {
+ __asm__ __volatile__(
+ "lock; incw %0"
+ : "=m"(*__hp(addr))
+ :
+ : "memory");
+ return;
+ }
+ case 4:
+ {
+ __asm__ __volatile__(
+ "lock; incl %0"
+ : "=m"(*__hp(addr))
+ :
+ : "memory");
+ return;
+ }
+#if (CAA_BITS_PER_LONG == 64)
+ case 8:
+ {
+ __asm__ __volatile__(
+ "lock; incq %0"
+ : "=m"(*__hp(addr))
+ :
+ : "memory");
+ return;
+ }
+#endif
+ }
+ /* generate an illegal instruction. Cannot catch this with linker tricks
+ * when optimizations are disabled. */
+ __asm__ __volatile__("ud2");
+ return;
+}
+
+#define _uatomic_inc(addr) (__uatomic_inc((addr), sizeof(*(addr))))
+
+/* uatomic_dec */
+
+static inline __attribute__((always_inline))
+void __uatomic_dec(void *addr, int len)
+{
+ switch (len) {
+ case 1:
+ {
+ __asm__ __volatile__(
+ "lock; decb %0"
+ : "=m"(*__hp(addr))
+ :
+ : "memory");
+ return;
+ }
+ case 2:
+ {
+ __asm__ __volatile__(
+ "lock; decw %0"
+ : "=m"(*__hp(addr))
+ :
+ : "memory");
+ return;
+ }
+ case 4:
+ {
+ __asm__ __volatile__(
+ "lock; decl %0"
+ : "=m"(*__hp(addr))
+ :
+ : "memory");
+ return;
+ }
+#if (CAA_BITS_PER_LONG == 64)
+ case 8:
+ {
+ __asm__ __volatile__(
+ "lock; decq %0"
+ : "=m"(*__hp(addr))
+ :
+ : "memory");
+ return;
+ }
+#endif
+ }
+ /* generate an illegal instruction. Cannot catch this with linker tricks
+ * when optimizations are disabled. */
+ __asm__ __volatile__("ud2");
+ return;
+}
+
+#define _uatomic_dec(addr) (__uatomic_dec((addr), sizeof(*(addr))))
+
+#if ((CAA_BITS_PER_LONG != 64) && defined(CONFIG_RCU_COMPAT_ARCH))
+extern int __rcu_cas_avail;
+extern int __rcu_cas_init(void);
+
+#define UATOMIC_COMPAT(insn) \
+ ((likely(__rcu_cas_avail > 0)) \
+ ? (_uatomic_##insn) \
+ : ((unlikely(__rcu_cas_avail < 0) \
+ ? ((__rcu_cas_init() > 0) \
+ ? (_uatomic_##insn) \
+ : (compat_uatomic_##insn)) \
+ : (compat_uatomic_##insn))))
+
+extern unsigned long _compat_uatomic_set(void *addr,
+ unsigned long _new, int len);
+#define compat_uatomic_set(addr, _new) \
+ ((__typeof__(*(addr))) _compat_uatomic_set((addr), \
+ (unsigned long)(_new), \
+ sizeof(*(addr))))
+
+
+extern unsigned long _compat_uatomic_xchg(void *addr,
+ unsigned long _new, int len);
+#define compat_uatomic_xchg(addr, _new) \
+ ((__typeof__(*(addr))) _compat_uatomic_xchg((addr), \
+ (unsigned long)(_new), \
+ sizeof(*(addr))))
+
+extern unsigned long _compat_uatomic_cmpxchg(void *addr, unsigned long old,
+ unsigned long _new, int len);
+#define compat_uatomic_cmpxchg(addr, old, _new) \
+ ((__typeof__(*(addr))) _compat_uatomic_cmpxchg((addr), \
+ (unsigned long)(old), \
+ (unsigned long)(_new), \
+ sizeof(*(addr))))
+
+extern unsigned long _compat_uatomic_and(void *addr,
+ unsigned long _new, int len);
+#define compat_uatomic_and(addr, v) \
+ ((__typeof__(*(addr))) _compat_uatomic_and((addr), \
+ (unsigned long)(v), \
+ sizeof(*(addr))))
+
+extern unsigned long _compat_uatomic_or(void *addr,
+ unsigned long _new, int len);
+#define compat_uatomic_or(addr, v) \
+ ((__typeof__(*(addr))) _compat_uatomic_or((addr), \
+ (unsigned long)(v), \
+ sizeof(*(addr))))
+
+extern unsigned long _compat_uatomic_add_return(void *addr,
+ unsigned long _new, int len);
+#define compat_uatomic_add_return(addr, v) \
+ ((__typeof__(*(addr))) _compat_uatomic_add_return((addr), \
+ (unsigned long)(v), \
+ sizeof(*(addr))))
+
+#define compat_uatomic_add(addr, v) \
+ ((void)compat_uatomic_add_return((addr), (v)))
+#define compat_uatomic_inc(addr) \
+ (compat_uatomic_add((addr), 1))
+#define compat_uatomic_dec(addr) \
+ (compat_uatomic_add((addr), -1))
+
+#else
+#define UATOMIC_COMPAT(insn) (_uatomic_##insn)
+#endif
+
+/* Read is atomic even in compat mode */
+#define uatomic_set(addr, v) \
+ UATOMIC_COMPAT(set(addr, v))
+
+#define uatomic_cmpxchg(addr, old, _new) \
+ UATOMIC_COMPAT(cmpxchg(addr, old, _new))
+#define uatomic_xchg(addr, v) \
+ UATOMIC_COMPAT(xchg(addr, v))
+#define uatomic_and(addr, v) \
+ UATOMIC_COMPAT(and(addr, v))
+#define uatomic_or(addr, v) \
+ UATOMIC_COMPAT(or(addr, v))
+#define uatomic_add_return(addr, v) \
+ UATOMIC_COMPAT(add_return(addr, v))
+
+#define uatomic_add(addr, v) UATOMIC_COMPAT(add(addr, v))
+#define uatomic_inc(addr) UATOMIC_COMPAT(inc(addr))
+#define uatomic_dec(addr) UATOMIC_COMPAT(dec(addr))
+
+#ifdef __cplusplus
+}
+#endif
+
+#include <urcu/uatomic/generic.h>
+
+#endif /* _URCU_ARCH_UATOMIC_X86_H */