From: Mathieu Desnoyers Date: Tue, 12 May 2009 19:28:55 +0000 (-0400) Subject: Implementation of xchg primitives derived from MIT license X-Git-Tag: v0.1~236 X-Git-Url: https://git.lttng.org./?a=commitdiff_plain;h=0114ba7f23f86623c237baeb28ec8e4b39b9bb84;p=userspace-rcu.git Implementation of xchg primitives derived from MIT license See LICENSE for details. Signed-off-by: Mathieu Desnoyers --- diff --git a/LICENSE b/LICENSE index 6a52b4b..4aba776 100644 --- a/LICENSE +++ b/LICENSE @@ -2,6 +2,9 @@ Userspace RCU library licensing Mathieu Desnoyers May 10, 2009 + +* LGPLv2.1 + The library part is distributed under LGPLv2.1 or later. See lgpl-2.1.txt for details. This applies to : @@ -20,6 +23,23 @@ Dynamic-only linking with the LGPL library is used if _LGPL_SOURCE is not defined. It permits relinking with newer versions of the library, which is required by the LGPL license. + +* MIT-style license : + +xchg() privimitive has been rewritten from scratch starting from atomic_ops 1.2 +which has a MIT-style license that is intended to allow use in both free and +proprietary software: + http://www.hpl.hp.com/research/linux/atomic_ops/LICENSING.txt + http://www.hpl.hp.com/personal/Hans_Boehm/gc/gc_source/ + +This license applies to : + +arch_atomic_x86.h +arch_atomic_ppc.h + + +* GPLv2 + Library test code is distributed under the GPLv2 license. See gpl-2.0.txt for details. This applies to : @@ -31,3 +51,10 @@ test_urcu.c test_urcu_yield.c test_rwlock_timing.c urcu-asm.c + + +Various details : + +ACCESS_ONCE(), likely(), unlikely() and barrier() are considered trivial enough +that copyright does not apply to them. I (Mathieu Desnoyers) re-typed them from +scratch in a mail client just to prove it. diff --git a/Makefile b/Makefile index 20024c6..079a9e3 100644 --- a/Makefile +++ b/Makefile @@ -23,10 +23,12 @@ arch-api: api.h arch.h pthreads-x86: clean cp api_x86.h api.h cp arch_x86.h arch.h + cp arch_atomic_x86.h arch_atomic.h pthreads-ppc: clean cp api_ppc.h api.h cp arch_ppc.h arch.h + cp arch_atomic_ppc.h arch_atomic.h test_urcu: urcu.o test_urcu.c urcu.h $(CC) ${CFLAGS} $(LDFLAGS) -o $@ $(SRC_DEP) @@ -68,9 +70,9 @@ urcutorture-yield: urcutorture.c urcu-yield.o urcu.h rcutorture.h install: liburcu.so cp -f liburcu.so /usr/lib/ - cp -f arch.h compiler.h urcu.h urcu-static.h /usr/include/ + cp -f arch.h arch_atomic.h compiler.h urcu.h urcu-static.h /usr/include/ clean: rm -f *.o test_urcu test_urcu_timing test_rwlock_timing urcu-asm.S \ test_urcu_yield urcutorture urcutorture-yield liburcu.so \ - test_urcu_dynamic_link api.h arch.h + test_urcu_dynamic_link api.h arch.h arch_atomic.h diff --git a/arch_atomic_ppc.h b/arch_atomic_ppc.h new file mode 100644 index 0000000..13d56b4 --- /dev/null +++ b/arch_atomic_ppc.h @@ -0,0 +1,99 @@ +#ifndef _ARCH_ATOMIC_PPC_H +#define _ARCH_ATOMIC_PPC_H + +/* + * Copyright (c) 1991-1994 by Xerox Corporation. All rights reserved. + * Copyright (c) 1996-1999 by Silicon Graphics. All rights reserved. + * Copyright (c) 1999-2004 Hewlett-Packard Development Company, L.P. + * Copyright (c) 2009 Mathieu Desnoyers + * + * THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY EXPRESSED + * OR IMPLIED. ANY USE IS AT YOUR OWN RISK. + * + * Permission is hereby granted to use or copy this program + * for any purpose, provided the above notices are retained on all copies. + * Permission to modify the code and to distribute modified code is granted, + * provided the above notices are retained, and a notice that the code was + * modified is included with the above copyright notice. + * + * Code inspired from libatomic_ops-1.2, inherited in part from the + * Boehm-Demers-Weiser conservative garbage collector. + */ + +#ifndef BITS_PER_LONG +#define BITS_PER_LONG (__SIZEOF_LONG__ * 8) +#endif + +#define ILLEGAL_INSTR .long 0xd00d00 + +#ifndef _INCLUDE_API_H + +/* + * Using a isync as second barrier for exchange to provide acquire semantic. + * According to atomic_ops/sysdeps/gcc/powerpc.h, the documentation is "fairly + * explicit that this also has acquire semantics." + * Derived from AO_compare_and_swap(), but removed the comparison. + */ + +static __attribute__((always_inline)) +unsigned int atomic_exchange_32(volatile unsigned int *addr, unsigned int val) +{ + unsigned int result; + + __asm__ __volatile__( + "lwsync\n" + "1:\t" "lwarx %0,0,%1\n" /* load and reserve */ + "stwcx. %2,0,%1\n" /* else store conditional */ + "bne- 1b\n" /* retry if lost reservation */ + "isync\n" + : "=&r"(result), + : "r"(addr), "r"(val) + : "memory", "cc"); + + return result; +} + +#if (BITS_PER_LONG == 64) + +static __attribute__((always_inline)) +unsigned long atomic_exchange_64(volatile unsigned long *addr, + unsigned long val) +{ + unsigned long result; + + __asm__ __volatile__( + "lwsync\n" + "1:\t" "ldarx %0,0,%1\n" /* load and reserve */ + "stdcx. %2,0,%1\n" /* else store conditional */ + "bne- 1b\n" /* retry if lost reservation */ + "isync\n" + : "=&r"(result), + : "r"(addr), "r"(val) + : "memory", "cc"); + + return result; +} + +#endif + +static __attribute__((always_inline)) +unsigned long _atomic_exchange(volatile void *addr, unsigned long val, int len) +{ + switch (len) { + case 4: return atomic_exchange_32(addr, val); +#if (BITS_PER_LONG == 64) + case 8: return atomic_exchange_64(addr, val); +#endif + } + /* generate an illegal instruction. Cannot catch this with linker tricks + * when optimizations are disabled. */ + __asm__ __volatile__(ILLEGAL_INSTR); + return 0; +} + +#define xchg(addr, v) (__typeof__(*(addr)) _atomic_exchange((addr), (v), \ + sizeof(*(addr)))) + +#endif /* #ifndef _INCLUDE_API_H */ + +#endif /* ARCH_ATOMIC_PPC_H */ diff --git a/arch_atomic_x86.h b/arch_atomic_x86.h new file mode 100644 index 0000000..e9a0b3e --- /dev/null +++ b/arch_atomic_x86.h @@ -0,0 +1,92 @@ +#ifndef _ARCH_ATOMIC_X86_H +#define _ARCH_ATOMIC_X86_H + +/* + * Copyright (c) 1991-1994 by Xerox Corporation. All rights reserved. + * Copyright (c) 1996-1999 by Silicon Graphics. All rights reserved. + * Copyright (c) 1999-2004 Hewlett-Packard Development Company, L.P. + * Copyright (c) 2009 Mathieu Desnoyers + * + * THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY EXPRESSED + * OR IMPLIED. ANY USE IS AT YOUR OWN RISK. + * + * Permission is hereby granted to use or copy this program + * for any purpose, provided the above notices are retained on all copies. + * Permission to modify the code and to distribute modified code is granted, + * provided the above notices are retained, and a notice that the code was + * modified is included with the above copyright notice. + * + * Code inspired from libatomic_ops-1.2, inherited in part from the + * Boehm-Demers-Weiser conservative garbage collector. + */ + +#ifndef BITS_PER_LONG +#define BITS_PER_LONG (__SIZEOF_LONG__ * 8) +#endif + +#ifndef _INCLUDE_API_H + +/* + * Using a isync as second barrier for exchange to provide acquire semantic. + * According to atomic_ops/sysdeps/gcc/powerpc.h, the documentation is "fairly + * explicit that this also has acquire semantics." + * Derived from AO_compare_and_swap() and AO_test_and_set_full(). + */ + +static __attribute__((always_inline)) +unsigned int atomic_exchange_32(volatile unsigned int *addr, unsigned int val) +{ + unsigned int result; + + __asm__ __volatile__( + /* Note: the "xchg" instruction does not need a "lock" prefix */ + "xchgl %0, %1" + : "=&r"(result), "=m"(*addr) + : "0" (val), "m"(*addr) + : "memory"); + + return result; +} + +#if (BITS_PER_LONG == 64) + +static __attribute__((always_inline)) +unsigned long atomic_exchange_64(volatile unsigned long *addr, + unsigned long val) +{ + unsigned long result; + + __asm__ __volatile__( + /* Note: the "xchg" instruction does not need a "lock" prefix */ + "xchgq %0, %1" + : "=&r"(result), "=m"(*addr) + : "0" (val), "m"(*addr) + : "memory"); + + return result; +} + +#endif + +static __attribute__((always_inline)) +unsigned long _atomic_exchange(volatile void *addr, unsigned long val, int len) +{ + switch (len) { + case 4: return atomic_exchange_32(addr, val); +#if (BITS_PER_LONG == 64) + case 8: return atomic_exchange_64(addr, val); +#endif + } + /* generate an illegal instruction. Cannot catch this with linker tricks + * when optimizations are disabled. */ + __asm__ __volatile__("ud2"); + return 0; +} + +#define xchg(addr, v) \ + ((__typeof__(*(addr))) _atomic_exchange((addr), (unsigned long)(v), \ + sizeof(*(addr)))) + +#endif /* #ifndef _INCLUDE_API_H */ + +#endif /* ARCH_ATOMIC_X86_H */ diff --git a/arch_ppc.h b/arch_ppc.h index 794c9fc..c68790f 100644 --- a/arch_ppc.h +++ b/arch_ppc.h @@ -23,6 +23,7 @@ */ #include +#include #define CONFIG_HAVE_FENCE 1 #define CONFIG_HAVE_MEM_COHERENCY @@ -77,84 +78,6 @@ static inline void cpu_relax(void) barrier(); } -#define PPC405_ERR77(ra,rb) -#define LWSYNC_ON_SMP "\n\tlwsync\n" -#define ISYNC_ON_SMP "\n\tisync\n" - -struct __xchg_dummy { - unsigned long a[100]; -}; -#define __xg(x) ((struct __xchg_dummy *)(x)) - -#ifndef _INCLUDE_API_H - -/* - * Exchange the 32-bits value pointed to by p, returns the old value. - * Might not work with PPC405 (see err 77). - */ -static __always_inline -unsigned int __xchg_u32(volatile void *p, unsigned int val) -{ - unsigned int prev; - - __asm__ __volatile__(LWSYNC_ON_SMP - "1:\t" "lwarx %0,0,%2\n" - "stwcx. %3,0,%2\n" - "bne- 1b" - ISYNC_ON_SMP - : "=&r" (prev), "+m" (*(volatile unsigned int *)p) - : "r" (p), "r" (val) - : "cc", "memory"); - return prev; -} - -#if (BITS_PER_LONG == 64) -/* - * Exchange the 64-bits value pointed to by p, returns the old value. - * Might not work with PPC405 (see err 77). - */ -static __always_inline -unsigned long __xchg_u64(volatile void *p, unsigned long val) -{ - unsigned long prev; - - __asm__ __volatile__(LWSYNC_ON_SMP - "1:\t" "ldarx %0,0,%2\n" - "stdcx. %3,0,%2\n" - "bne- 1b" - ISYNC_ON_SMP - : "=&r" (prev), "+m" (*(volatile unsigned long *)p) - : "r" (p), "r" (val) - : "cc", "memory"); - return prev; -} -#endif - -static __always_inline -unsigned long __xchg(volatile void *ptr, unsigned long x, int size) -{ - switch (size) { - case 4: - return __xchg_u32(ptr, x); -#if (BITS_PER_LONG == 64) - case 8: - return __xchg_u64(ptr, x); -#endif - } - return x; -} - -/* - * note : xchg should only be used with pointers to 32 or 64-bits elements. - * No build-time check is done on the element size because depending on - * non-referenced unexisting symbol at link time to provide an error message - * only work when compiling with optimizations. - */ -#define xchg(ptr, v) \ - ((__typeof__(*(ptr)))__xchg((ptr), (unsigned long)(v), sizeof(*(ptr)))) - -#endif /* #ifndef _INCLUDE_API_H */ - #define mftbl() \ ({ \ unsigned long rval; \ diff --git a/arch_x86.h b/arch_x86.h index e899684..cc3ab01 100644 --- a/arch_x86.h +++ b/arch_x86.h @@ -23,6 +23,7 @@ */ #include +#include /* Assume P4 or newer */ #define CONFIG_HAVE_FENCE 1 @@ -94,56 +95,6 @@ static inline void cpu_relax(void) rep_nop(); } -#define xchg(ptr, v) \ - ((__typeof__(*(ptr)))__xchg((ptr), (unsigned long)(v), sizeof(*(ptr)))) - -struct __xchg_ptr_as_array { - unsigned long a[100]; -}; - -#define __xchg_ptr_as_array(x) ((struct __xchg_ptr_as_array *)(x)) - -/* - * xchg always implies a "lock" prefix, even on UP. See Intel documentation. - * volatile attribute is neccessary due to xchg side effect. - * *ptr is an output argument. - * x is considered local, ptr is considered remote. - */ -static inline unsigned long __xchg(volatile void *ptr, unsigned long x, - int size) -{ - switch (size) { - case 1: - asm volatile("xchgb %b0,%1" - : "=q" (x) - : "m" (*__xchg_ptr_as_array(ptr)), "0" (x) - : "memory"); - break; - case 2: - asm volatile("xchgw %w0,%1" - : "=r" (x) - : "m" (*__xchg_ptr_as_array(ptr)), "0" (x) - : "memory"); - break; - case 4: - asm volatile("xchgl %k0,%1" - : "=r" (x) - : "m" (*__xchg_ptr_as_array(ptr)), "0" (x) - : "memory"); - break; -#if (BITS_PER_LONG == 64) - case 8: - asm volatile("xchgq %0,%1" - : "=r" (x) - : "m" (*__xchg_ptr_as_array(ptr)), "0" (x) - : "memory"); - break; -#endif - } - smp_wmc(); - return x; -} - #define rdtscll(val) \ do { \ unsigned int __a, __d; \