ppc: Document cache line size choice
[urcu.git] / include / urcu / arch / ppc.h
... / ...
CommitLineData
1// SPDX-FileCopyrightText: 2009 Paul E. McKenney, IBM Corporation.
2// SPDX-FileCopyrightText: 2009 Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
3//
4// SPDX-License-Identifier: LGPL-2.1-or-later
5
6#ifndef _URCU_ARCH_PPC_H
7#define _URCU_ARCH_PPC_H
8
9/*
10 * arch_ppc.h: trivial definitions for the powerpc architecture.
11 */
12
13#include <urcu/compiler.h>
14#include <urcu/config.h>
15#include <urcu/syscall-compat.h>
16#include <stdint.h>
17
18#ifdef __cplusplus
19extern "C" {
20#endif
21
22/*
23 * Most powerpc machines have 128 bytes cache lines, but to make sure
24 * there is no false sharing on all known Power hardware, use the
25 * largest known cache line size, which is the physical size of POWER5
26 * L3 cache lines (256 bytes).
27 *
28 * "Each slice [of the L3] is 12-way set-associative, with 4,096
29 * congruence classes of 256-byte lines managed as two 128-byte sectors
30 * to match the L2 line size."
31 *
32 * From: "POWER5 system microarchitecture",
33 * IBM Journal of Research & Development,
34 * vol. 49, no. 4/5, July/September 2005
35 * https://www.eecg.utoronto.ca/~moshovos/ACA08/readings/power5.pdf
36 *
37 * This value is a compile-time constant, which prevents us from
38 * querying the processor for the cache line size at runtime. We
39 * therefore need to be pessimistic and assume the largest known cache
40 * line size.
41 *
42 * This value is exposed through public headers, so tuning it for
43 * specific environments is a concern for ABI compatibility between
44 * applications and liburcu.
45 */
46#define CAA_CACHE_LINE_SIZE 256
47
48#ifdef __NO_LWSYNC__
49#define LWSYNC_OPCODE "sync\n"
50#else
51#define LWSYNC_OPCODE "lwsync\n"
52#endif
53
54/*
55 * Use sync for all cmm_mb/rmb/wmb barriers because lwsync does not
56 * preserve ordering of cacheable vs. non-cacheable accesses, so it
57 * should not be used to order with respect to MMIO operations. An
58 * eieio+lwsync pair is also not enough for cmm_rmb, because it will
59 * order cacheable and non-cacheable memory operations separately---i.e.
60 * not the latter against the former.
61 */
62#define cmm_mb() __asm__ __volatile__ ("sync":::"memory")
63
64/*
65 * lwsync orders loads in cacheable memory with respect to other loads,
66 * and stores in cacheable memory with respect to other stores.
67 * Therefore, use it for barriers ordering accesses to cacheable memory
68 * only.
69 */
70#define cmm_smp_rmb() __asm__ __volatile__ (LWSYNC_OPCODE:::"memory")
71#define cmm_smp_wmb() __asm__ __volatile__ (LWSYNC_OPCODE:::"memory")
72
73#define mftbl() \
74 __extension__ \
75 ({ \
76 unsigned long rval; \
77 __asm__ __volatile__ ("mftb %0" : "=r" (rval)); \
78 rval; \
79 })
80
81#define mftbu() \
82 __extension__ \
83 ({ \
84 unsigned long rval; \
85 __asm__ __volatile__ ("mftbu %0" : "=r" (rval)); \
86 rval; \
87 })
88
89#define mftb() \
90 __extension__ \
91 ({ \
92 unsigned long long rval; \
93 __asm__ __volatile__ ("mftb %0" : "=r" (rval)); \
94 rval; \
95 })
96
97#define HAS_CAA_GET_CYCLES
98
99typedef uint64_t caa_cycles_t;
100
101#ifdef __powerpc64__
102static inline caa_cycles_t caa_get_cycles(void)
103{
104 return (caa_cycles_t) mftb();
105}
106#else
107static inline caa_cycles_t caa_get_cycles(void)
108{
109 unsigned long h, l;
110
111 for (;;) {
112 h = mftbu();
113 cmm_barrier();
114 l = mftbl();
115 cmm_barrier();
116 if (mftbu() == h)
117 return (((caa_cycles_t) h) << 32) + l;
118 }
119}
120#endif
121
122/*
123 * On Linux, define the membarrier system call number if not yet available in
124 * the system headers.
125 */
126#if (defined(__linux__) && !defined(__NR_membarrier))
127#define __NR_membarrier 365
128#endif
129
130#ifdef __cplusplus
131}
132#endif
133
134#include <urcu/arch/generic.h>
135
136#endif /* _URCU_ARCH_PPC_H */
This page took 0.031196 seconds and 4 git commands to generate.