aarch64: Implement caa_cpu_relax as yield instruction
[userspace-rcu.git] / include / urcu / uatomic / x86.h
1 #ifndef _URCU_ARCH_UATOMIC_X86_H
2 #define _URCU_ARCH_UATOMIC_X86_H
3
4 /*
5 * Copyright (c) 1991-1994 by Xerox Corporation. All rights reserved.
6 * Copyright (c) 1996-1999 by Silicon Graphics. All rights reserved.
7 * Copyright (c) 1999-2004 Hewlett-Packard Development Company, L.P.
8 * Copyright (c) 2009 Mathieu Desnoyers
9 *
10 * THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY EXPRESSED
11 * OR IMPLIED. ANY USE IS AT YOUR OWN RISK.
12 *
13 * Permission is hereby granted to use or copy this program
14 * for any purpose, provided the above notices are retained on all copies.
15 * Permission to modify the code and to distribute modified code is granted,
16 * provided the above notices are retained, and a notice that the code was
17 * modified is included with the above copyright notice.
18 *
19 * Code inspired from libuatomic_ops-1.2, inherited in part from the
20 * Boehm-Demers-Weiser conservative garbage collector.
21 */
22
23 #include <urcu/arch.h>
24 #include <urcu/config.h>
25 #include <urcu/compiler.h>
26 #include <urcu/system.h>
27
28 #define UATOMIC_HAS_ATOMIC_BYTE
29 #define UATOMIC_HAS_ATOMIC_SHORT
30
31 #ifdef __cplusplus
32 extern "C" {
33 #endif
34
35 /*
36 * Derived from AO_compare_and_swap() and AO_test_and_set_full().
37 */
38
39 /*
40 * The __hp() macro casts the void pointer @x to a pointer to a structure
41 * containing an array of char of the specified size. This allows passing the
42 * @addr arguments of the following inline functions as "m" and "+m" operands
43 * to the assembly. The @size parameter should be a constant to support
44 * compilers such as clang which do not support VLA. Create typedefs because
45 * C++ does not allow types be defined in casts.
46 */
47
48 typedef struct { char v[1]; } __hp_1;
49 typedef struct { char v[2]; } __hp_2;
50 typedef struct { char v[4]; } __hp_4;
51 typedef struct { char v[8]; } __hp_8;
52
53 #define __hp(size, x) ((__hp_##size *)(x))
54
55 #define _uatomic_set(addr, v) ((void) CMM_STORE_SHARED(*(addr), (v)))
56
57 /* cmpxchg */
58
59 static inline __attribute__((always_inline))
60 unsigned long __uatomic_cmpxchg(void *addr, unsigned long old,
61 unsigned long _new, int len)
62 {
63 switch (len) {
64 case 1:
65 {
66 unsigned char result = old;
67
68 __asm__ __volatile__(
69 "lock; cmpxchgb %2, %1"
70 : "+a"(result), "+m"(*__hp(1, addr))
71 : "q"((unsigned char)_new)
72 : "memory");
73 return result;
74 }
75 case 2:
76 {
77 unsigned short result = old;
78
79 __asm__ __volatile__(
80 "lock; cmpxchgw %2, %1"
81 : "+a"(result), "+m"(*__hp(2, addr))
82 : "r"((unsigned short)_new)
83 : "memory");
84 return result;
85 }
86 case 4:
87 {
88 unsigned int result = old;
89
90 __asm__ __volatile__(
91 "lock; cmpxchgl %2, %1"
92 : "+a"(result), "+m"(*__hp(4, addr))
93 : "r"((unsigned int)_new)
94 : "memory");
95 return result;
96 }
97 #if (CAA_BITS_PER_LONG == 64)
98 case 8:
99 {
100 unsigned long result = old;
101
102 __asm__ __volatile__(
103 "lock; cmpxchgq %2, %1"
104 : "+a"(result), "+m"(*__hp(8, addr))
105 : "r"((unsigned long)_new)
106 : "memory");
107 return result;
108 }
109 #endif
110 }
111 /*
112 * generate an illegal instruction. Cannot catch this with
113 * linker tricks when optimizations are disabled.
114 */
115 __asm__ __volatile__("ud2");
116 return 0;
117 }
118
119 #define _uatomic_cmpxchg(addr, old, _new) \
120 ((__typeof__(*(addr))) __uatomic_cmpxchg((addr), \
121 caa_cast_long_keep_sign(old), \
122 caa_cast_long_keep_sign(_new),\
123 sizeof(*(addr))))
124
125 /* xchg */
126
127 static inline __attribute__((always_inline))
128 unsigned long __uatomic_exchange(void *addr, unsigned long val, int len)
129 {
130 /* Note: the "xchg" instruction does not need a "lock" prefix. */
131 switch (len) {
132 case 1:
133 {
134 unsigned char result;
135 __asm__ __volatile__(
136 "xchgb %0, %1"
137 : "=q"(result), "+m"(*__hp(1, addr))
138 : "0" ((unsigned char)val)
139 : "memory");
140 return result;
141 }
142 case 2:
143 {
144 unsigned short result;
145 __asm__ __volatile__(
146 "xchgw %0, %1"
147 : "=r"(result), "+m"(*__hp(2, addr))
148 : "0" ((unsigned short)val)
149 : "memory");
150 return result;
151 }
152 case 4:
153 {
154 unsigned int result;
155 __asm__ __volatile__(
156 "xchgl %0, %1"
157 : "=r"(result), "+m"(*__hp(4, addr))
158 : "0" ((unsigned int)val)
159 : "memory");
160 return result;
161 }
162 #if (CAA_BITS_PER_LONG == 64)
163 case 8:
164 {
165 unsigned long result;
166 __asm__ __volatile__(
167 "xchgq %0, %1"
168 : "=r"(result), "+m"(*__hp(8, addr))
169 : "0" ((unsigned long)val)
170 : "memory");
171 return result;
172 }
173 #endif
174 }
175 /*
176 * generate an illegal instruction. Cannot catch this with
177 * linker tricks when optimizations are disabled.
178 */
179 __asm__ __volatile__("ud2");
180 return 0;
181 }
182
183 #define _uatomic_xchg(addr, v) \
184 ((__typeof__(*(addr))) __uatomic_exchange((addr), \
185 caa_cast_long_keep_sign(v), \
186 sizeof(*(addr))))
187
188 /* uatomic_add_return */
189
190 static inline __attribute__((always_inline))
191 unsigned long __uatomic_add_return(void *addr, unsigned long val,
192 int len)
193 {
194 switch (len) {
195 case 1:
196 {
197 unsigned char result = val;
198
199 __asm__ __volatile__(
200 "lock; xaddb %1, %0"
201 : "+m"(*__hp(1, addr)), "+q" (result)
202 :
203 : "memory");
204 return result + (unsigned char)val;
205 }
206 case 2:
207 {
208 unsigned short result = val;
209
210 __asm__ __volatile__(
211 "lock; xaddw %1, %0"
212 : "+m"(*__hp(2, addr)), "+r" (result)
213 :
214 : "memory");
215 return result + (unsigned short)val;
216 }
217 case 4:
218 {
219 unsigned int result = val;
220
221 __asm__ __volatile__(
222 "lock; xaddl %1, %0"
223 : "+m"(*__hp(4, addr)), "+r" (result)
224 :
225 : "memory");
226 return result + (unsigned int)val;
227 }
228 #if (CAA_BITS_PER_LONG == 64)
229 case 8:
230 {
231 unsigned long result = val;
232
233 __asm__ __volatile__(
234 "lock; xaddq %1, %0"
235 : "+m"(*__hp(8, addr)), "+r" (result)
236 :
237 : "memory");
238 return result + (unsigned long)val;
239 }
240 #endif
241 }
242 /*
243 * generate an illegal instruction. Cannot catch this with
244 * linker tricks when optimizations are disabled.
245 */
246 __asm__ __volatile__("ud2");
247 return 0;
248 }
249
250 #define _uatomic_add_return(addr, v) \
251 ((__typeof__(*(addr))) __uatomic_add_return((addr), \
252 caa_cast_long_keep_sign(v), \
253 sizeof(*(addr))))
254
255 /* uatomic_and */
256
257 static inline __attribute__((always_inline))
258 void __uatomic_and(void *addr, unsigned long val, int len)
259 {
260 switch (len) {
261 case 1:
262 {
263 __asm__ __volatile__(
264 "lock; andb %1, %0"
265 : "=m"(*__hp(1, addr))
266 : "iq" ((unsigned char)val)
267 : "memory");
268 return;
269 }
270 case 2:
271 {
272 __asm__ __volatile__(
273 "lock; andw %1, %0"
274 : "=m"(*__hp(2, addr))
275 : "ir" ((unsigned short)val)
276 : "memory");
277 return;
278 }
279 case 4:
280 {
281 __asm__ __volatile__(
282 "lock; andl %1, %0"
283 : "=m"(*__hp(4, addr))
284 : "ir" ((unsigned int)val)
285 : "memory");
286 return;
287 }
288 #if (CAA_BITS_PER_LONG == 64)
289 case 8:
290 {
291 __asm__ __volatile__(
292 "lock; andq %1, %0"
293 : "=m"(*__hp(8, addr))
294 : "er" ((unsigned long)val)
295 : "memory");
296 return;
297 }
298 #endif
299 }
300 /*
301 * generate an illegal instruction. Cannot catch this with
302 * linker tricks when optimizations are disabled.
303 */
304 __asm__ __volatile__("ud2");
305 return;
306 }
307
308 #define _uatomic_and(addr, v) \
309 (__uatomic_and((addr), caa_cast_long_keep_sign(v), sizeof(*(addr))))
310
311 /* uatomic_or */
312
313 static inline __attribute__((always_inline))
314 void __uatomic_or(void *addr, unsigned long val, int len)
315 {
316 switch (len) {
317 case 1:
318 {
319 __asm__ __volatile__(
320 "lock; orb %1, %0"
321 : "=m"(*__hp(1, addr))
322 : "iq" ((unsigned char)val)
323 : "memory");
324 return;
325 }
326 case 2:
327 {
328 __asm__ __volatile__(
329 "lock; orw %1, %0"
330 : "=m"(*__hp(2, addr))
331 : "ir" ((unsigned short)val)
332 : "memory");
333 return;
334 }
335 case 4:
336 {
337 __asm__ __volatile__(
338 "lock; orl %1, %0"
339 : "=m"(*__hp(4, addr))
340 : "ir" ((unsigned int)val)
341 : "memory");
342 return;
343 }
344 #if (CAA_BITS_PER_LONG == 64)
345 case 8:
346 {
347 __asm__ __volatile__(
348 "lock; orq %1, %0"
349 : "=m"(*__hp(8, addr))
350 : "er" ((unsigned long)val)
351 : "memory");
352 return;
353 }
354 #endif
355 }
356 /*
357 * generate an illegal instruction. Cannot catch this with
358 * linker tricks when optimizations are disabled.
359 */
360 __asm__ __volatile__("ud2");
361 return;
362 }
363
364 #define _uatomic_or(addr, v) \
365 (__uatomic_or((addr), caa_cast_long_keep_sign(v), sizeof(*(addr))))
366
367 /* uatomic_add */
368
369 static inline __attribute__((always_inline))
370 void __uatomic_add(void *addr, unsigned long val, int len)
371 {
372 switch (len) {
373 case 1:
374 {
375 __asm__ __volatile__(
376 "lock; addb %1, %0"
377 : "=m"(*__hp(1, addr))
378 : "iq" ((unsigned char)val)
379 : "memory");
380 return;
381 }
382 case 2:
383 {
384 __asm__ __volatile__(
385 "lock; addw %1, %0"
386 : "=m"(*__hp(2, addr))
387 : "ir" ((unsigned short)val)
388 : "memory");
389 return;
390 }
391 case 4:
392 {
393 __asm__ __volatile__(
394 "lock; addl %1, %0"
395 : "=m"(*__hp(4, addr))
396 : "ir" ((unsigned int)val)
397 : "memory");
398 return;
399 }
400 #if (CAA_BITS_PER_LONG == 64)
401 case 8:
402 {
403 __asm__ __volatile__(
404 "lock; addq %1, %0"
405 : "=m"(*__hp(8, addr))
406 : "er" ((unsigned long)val)
407 : "memory");
408 return;
409 }
410 #endif
411 }
412 /*
413 * generate an illegal instruction. Cannot catch this with
414 * linker tricks when optimizations are disabled.
415 */
416 __asm__ __volatile__("ud2");
417 return;
418 }
419
420 #define _uatomic_add(addr, v) \
421 (__uatomic_add((addr), caa_cast_long_keep_sign(v), sizeof(*(addr))))
422
423
424 /* uatomic_inc */
425
426 static inline __attribute__((always_inline))
427 void __uatomic_inc(void *addr, int len)
428 {
429 switch (len) {
430 case 1:
431 {
432 __asm__ __volatile__(
433 "lock; incb %0"
434 : "=m"(*__hp(1, addr))
435 :
436 : "memory");
437 return;
438 }
439 case 2:
440 {
441 __asm__ __volatile__(
442 "lock; incw %0"
443 : "=m"(*__hp(2, addr))
444 :
445 : "memory");
446 return;
447 }
448 case 4:
449 {
450 __asm__ __volatile__(
451 "lock; incl %0"
452 : "=m"(*__hp(4, addr))
453 :
454 : "memory");
455 return;
456 }
457 #if (CAA_BITS_PER_LONG == 64)
458 case 8:
459 {
460 __asm__ __volatile__(
461 "lock; incq %0"
462 : "=m"(*__hp(8, addr))
463 :
464 : "memory");
465 return;
466 }
467 #endif
468 }
469 /* generate an illegal instruction. Cannot catch this with linker tricks
470 * when optimizations are disabled. */
471 __asm__ __volatile__("ud2");
472 return;
473 }
474
475 #define _uatomic_inc(addr) (__uatomic_inc((addr), sizeof(*(addr))))
476
477 /* uatomic_dec */
478
479 static inline __attribute__((always_inline))
480 void __uatomic_dec(void *addr, int len)
481 {
482 switch (len) {
483 case 1:
484 {
485 __asm__ __volatile__(
486 "lock; decb %0"
487 : "=m"(*__hp(1, addr))
488 :
489 : "memory");
490 return;
491 }
492 case 2:
493 {
494 __asm__ __volatile__(
495 "lock; decw %0"
496 : "=m"(*__hp(2, addr))
497 :
498 : "memory");
499 return;
500 }
501 case 4:
502 {
503 __asm__ __volatile__(
504 "lock; decl %0"
505 : "=m"(*__hp(4, addr))
506 :
507 : "memory");
508 return;
509 }
510 #if (CAA_BITS_PER_LONG == 64)
511 case 8:
512 {
513 __asm__ __volatile__(
514 "lock; decq %0"
515 : "=m"(*__hp(8, addr))
516 :
517 : "memory");
518 return;
519 }
520 #endif
521 }
522 /*
523 * generate an illegal instruction. Cannot catch this with
524 * linker tricks when optimizations are disabled.
525 */
526 __asm__ __volatile__("ud2");
527 return;
528 }
529
530 #define _uatomic_dec(addr) (__uatomic_dec((addr), sizeof(*(addr))))
531
532 #ifdef URCU_ARCH_X86_NO_CAS
533
534 /* For backwards compat */
535 #define CONFIG_RCU_COMPAT_ARCH 1
536
537 extern int __rcu_cas_avail;
538 extern int __rcu_cas_init(void);
539
540 #define UATOMIC_COMPAT(insn) \
541 ((caa_likely(__rcu_cas_avail > 0)) \
542 ? (_uatomic_##insn) \
543 : ((caa_unlikely(__rcu_cas_avail < 0) \
544 ? ((__rcu_cas_init() > 0) \
545 ? (_uatomic_##insn) \
546 : (compat_uatomic_##insn)) \
547 : (compat_uatomic_##insn))))
548
549 /*
550 * We leave the return value so we don't break the ABI, but remove the
551 * return value from the API.
552 */
553 extern unsigned long _compat_uatomic_set(void *addr,
554 unsigned long _new, int len);
555 #define compat_uatomic_set(addr, _new) \
556 ((void) _compat_uatomic_set((addr), \
557 caa_cast_long_keep_sign(_new), \
558 sizeof(*(addr))))
559
560
561 extern unsigned long _compat_uatomic_xchg(void *addr,
562 unsigned long _new, int len);
563 #define compat_uatomic_xchg(addr, _new) \
564 ((__typeof__(*(addr))) _compat_uatomic_xchg((addr), \
565 caa_cast_long_keep_sign(_new), \
566 sizeof(*(addr))))
567
568 extern unsigned long _compat_uatomic_cmpxchg(void *addr, unsigned long old,
569 unsigned long _new, int len);
570 #define compat_uatomic_cmpxchg(addr, old, _new) \
571 ((__typeof__(*(addr))) _compat_uatomic_cmpxchg((addr), \
572 caa_cast_long_keep_sign(old), \
573 caa_cast_long_keep_sign(_new), \
574 sizeof(*(addr))))
575
576 extern void _compat_uatomic_and(void *addr, unsigned long _new, int len);
577 #define compat_uatomic_and(addr, v) \
578 (_compat_uatomic_and((addr), \
579 caa_cast_long_keep_sign(v), \
580 sizeof(*(addr))))
581
582 extern void _compat_uatomic_or(void *addr, unsigned long _new, int len);
583 #define compat_uatomic_or(addr, v) \
584 (_compat_uatomic_or((addr), \
585 caa_cast_long_keep_sign(v), \
586 sizeof(*(addr))))
587
588 extern unsigned long _compat_uatomic_add_return(void *addr,
589 unsigned long _new, int len);
590 #define compat_uatomic_add_return(addr, v) \
591 ((__typeof__(*(addr))) _compat_uatomic_add_return((addr), \
592 caa_cast_long_keep_sign(v), \
593 sizeof(*(addr))))
594
595 #define compat_uatomic_add(addr, v) \
596 ((void)compat_uatomic_add_return((addr), (v)))
597 #define compat_uatomic_inc(addr) \
598 (compat_uatomic_add((addr), 1))
599 #define compat_uatomic_dec(addr) \
600 (compat_uatomic_add((addr), -1))
601
602 #else
603 #define UATOMIC_COMPAT(insn) (_uatomic_##insn)
604 #endif
605
606 /* Read is atomic even in compat mode */
607 #define uatomic_set(addr, v) \
608 UATOMIC_COMPAT(set(addr, v))
609
610 #define uatomic_cmpxchg(addr, old, _new) \
611 UATOMIC_COMPAT(cmpxchg(addr, old, _new))
612 #define uatomic_xchg(addr, v) \
613 UATOMIC_COMPAT(xchg(addr, v))
614
615 #define uatomic_and(addr, v) \
616 UATOMIC_COMPAT(and(addr, v))
617 #define cmm_smp_mb__before_uatomic_and() cmm_barrier()
618 #define cmm_smp_mb__after_uatomic_and() cmm_barrier()
619
620 #define uatomic_or(addr, v) \
621 UATOMIC_COMPAT(or(addr, v))
622 #define cmm_smp_mb__before_uatomic_or() cmm_barrier()
623 #define cmm_smp_mb__after_uatomic_or() cmm_barrier()
624
625 #define uatomic_add_return(addr, v) \
626 UATOMIC_COMPAT(add_return(addr, v))
627
628 #define uatomic_add(addr, v) UATOMIC_COMPAT(add(addr, v))
629 #define cmm_smp_mb__before_uatomic_add() cmm_barrier()
630 #define cmm_smp_mb__after_uatomic_add() cmm_barrier()
631
632 #define uatomic_inc(addr) UATOMIC_COMPAT(inc(addr))
633 #define cmm_smp_mb__before_uatomic_inc() cmm_barrier()
634 #define cmm_smp_mb__after_uatomic_inc() cmm_barrier()
635
636 #define uatomic_dec(addr) UATOMIC_COMPAT(dec(addr))
637 #define cmm_smp_mb__before_uatomic_dec() cmm_barrier()
638 #define cmm_smp_mb__after_uatomic_dec() cmm_barrier()
639
640 #ifdef __cplusplus
641 }
642 #endif
643
644 #include <urcu/uatomic/generic.h>
645
646 #endif /* _URCU_ARCH_UATOMIC_X86_H */
This page took 0.043627 seconds and 4 git commands to generate.