Commit | Line | Data |
---|---|---|
d3d3857f MJ |
1 | // SPDX-FileCopyrightText: 2009 Paul E. McKenney, IBM Corporation. |
2 | // SPDX-FileCopyrightText: 2009 Mathieu Desnoyers <mathieu.desnoyers@efficios.com> | |
3 | // | |
4 | // SPDX-License-Identifier: LGPL-2.1-or-later | |
5 | ||
ec4e58a3 MD |
6 | #ifndef _URCU_ARCH_PPC_H |
7 | #define _URCU_ARCH_PPC_H | |
121a5d44 | 8 | |
6d0ce021 | 9 | /* |
af02d47e | 10 | * arch_ppc.h: trivial definitions for the powerpc architecture. |
6d0ce021 PM |
11 | */ |
12 | ||
ec4e58a3 | 13 | #include <urcu/compiler.h> |
c96a3726 | 14 | #include <urcu/config.h> |
999991c6 | 15 | #include <urcu/syscall-compat.h> |
3fa18286 | 16 | #include <stdint.h> |
121a5d44 | 17 | |
36bc70a8 MD |
18 | #ifdef __cplusplus |
19 | extern "C" { | |
67ecffc0 | 20 | #endif |
36bc70a8 | 21 | |
5307e3ab MD |
22 | /* |
23 | * Most powerpc machines have 128 bytes cache lines, but to make sure | |
24 | * there is no false sharing on all known Power hardware, use the | |
25 | * largest known cache line size, which is the physical size of POWER5 | |
26 | * L3 cache lines (256 bytes). | |
27 | * | |
28 | * "Each slice [of the L3] is 12-way set-associative, with 4,096 | |
29 | * congruence classes of 256-byte lines managed as two 128-byte sectors | |
30 | * to match the L2 line size." | |
31 | * | |
32 | * From: "POWER5 system microarchitecture", | |
33 | * IBM Journal of Research & Development, | |
34 | * vol. 49, no. 4/5, July/September 2005 | |
35 | * https://www.eecg.utoronto.ca/~moshovos/ACA08/readings/power5.pdf | |
36 | * | |
37 | * This value is a compile-time constant, which prevents us from | |
38 | * querying the processor for the cache line size at runtime. We | |
39 | * therefore need to be pessimistic and assume the largest known cache | |
40 | * line size. | |
41 | * | |
42 | * This value is exposed through public headers, so tuning it for | |
43 | * specific environments is a concern for ABI compatibility between | |
44 | * applications and liburcu. | |
45 | */ | |
06f22bdb | 46 | #define CAA_CACHE_LINE_SIZE 256 |
b4e52e3e | 47 | |
e62b2f86 MD |
48 | #ifdef __NO_LWSYNC__ |
49 | #define LWSYNC_OPCODE "sync\n" | |
50 | #else | |
51 | #define LWSYNC_OPCODE "lwsync\n" | |
52 | #endif | |
53 | ||
0174d10d PB |
54 | /* |
55 | * Use sync for all cmm_mb/rmb/wmb barriers because lwsync does not | |
56 | * preserve ordering of cacheable vs. non-cacheable accesses, so it | |
57 | * should not be used to order with respect to MMIO operations. An | |
58 | * eieio+lwsync pair is also not enough for cmm_rmb, because it will | |
59 | * order cacheable and non-cacheable memory operations separately---i.e. | |
60 | * not the latter against the former. | |
61 | */ | |
e51500ed | 62 | #define cmm_mb() __asm__ __volatile__ ("sync":::"memory") |
0174d10d PB |
63 | |
64 | /* | |
65 | * lwsync orders loads in cacheable memory with respect to other loads, | |
66 | * and stores in cacheable memory with respect to other stores. | |
67 | * Therefore, use it for barriers ordering accesses to cacheable memory | |
68 | * only. | |
69 | */ | |
e51500ed MD |
70 | #define cmm_smp_rmb() __asm__ __volatile__ (LWSYNC_OPCODE:::"memory") |
71 | #define cmm_smp_wmb() __asm__ __volatile__ (LWSYNC_OPCODE:::"memory") | |
6d0ce021 | 72 | |
af02d47e | 73 | #define mftbl() \ |
1b85da85 | 74 | __extension__ \ |
af02d47e MD |
75 | ({ \ |
76 | unsigned long rval; \ | |
c6fbc279 | 77 | __asm__ __volatile__ ("mftb %0" : "=r" (rval)); \ |
af02d47e MD |
78 | rval; \ |
79 | }) | |
80 | ||
81 | #define mftbu() \ | |
1b85da85 | 82 | __extension__ \ |
af02d47e MD |
83 | ({ \ |
84 | unsigned long rval; \ | |
e51500ed | 85 | __asm__ __volatile__ ("mftbu %0" : "=r" (rval)); \ |
af02d47e MD |
86 | rval; \ |
87 | }) | |
6d0ce021 | 88 | |
9a9d403a | 89 | #define mftb() \ |
1b85da85 | 90 | __extension__ \ |
9a9d403a TMQMF |
91 | ({ \ |
92 | unsigned long long rval; \ | |
e51500ed | 93 | __asm__ __volatile__ ("mftb %0" : "=r" (rval)); \ |
9a9d403a TMQMF |
94 | rval; \ |
95 | }) | |
96 | ||
f8c43f45 MD |
97 | #define HAS_CAA_GET_CYCLES |
98 | ||
3fa18286 | 99 | typedef uint64_t caa_cycles_t; |
6d0ce021 | 100 | |
9a9d403a | 101 | #ifdef __powerpc64__ |
3fa18286 | 102 | static inline caa_cycles_t caa_get_cycles(void) |
6d0ce021 | 103 | { |
3fa18286 | 104 | return (caa_cycles_t) mftb(); |
9a9d403a TMQMF |
105 | } |
106 | #else | |
3fa18286 | 107 | static inline caa_cycles_t caa_get_cycles(void) |
9a9d403a TMQMF |
108 | { |
109 | unsigned long h, l; | |
6d0ce021 PM |
110 | |
111 | for (;;) { | |
112 | h = mftbu(); | |
5481ddb3 | 113 | cmm_barrier(); |
6d0ce021 | 114 | l = mftbl(); |
5481ddb3 | 115 | cmm_barrier(); |
6d0ce021 | 116 | if (mftbu() == h) |
3fa18286 | 117 | return (((caa_cycles_t) h) << 32) + l; |
6d0ce021 PM |
118 | } |
119 | } | |
9a9d403a | 120 | #endif |
121a5d44 | 121 | |
1b2c84f9 | 122 | /* |
84f4ccb4 MD |
123 | * On Linux, define the membarrier system call number if not yet available in |
124 | * the system headers. | |
1b2c84f9 | 125 | */ |
84f4ccb4 | 126 | #if (defined(__linux__) && !defined(__NR_membarrier)) |
1b2c84f9 MD |
127 | #define __NR_membarrier 365 |
128 | #endif | |
129 | ||
67ecffc0 | 130 | #ifdef __cplusplus |
36bc70a8 MD |
131 | } |
132 | #endif | |
133 | ||
1b9119f8 | 134 | #include <urcu/arch/generic.h> |
e4d1eb09 | 135 | |
ec4e58a3 | 136 | #endif /* _URCU_ARCH_PPC_H */ |