[urcu.git] / include / urcu / arch / ppc.h

// SPDX-FileCopyrightText: 2009 Paul E. McKenney, IBM Corporation.
// SPDX-FileCopyrightText: 2009 Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
//
// SPDX-License-Identifier: LGPL-2.1-or-later

#ifndef _URCU_ARCH_PPC_H
#define _URCU_ARCH_PPC_H

/*
 * arch_ppc.h: trivial definitions for the powerpc architecture.
 */

#include <urcu/compiler.h>
#include <urcu/config.h>
#include <urcu/syscall-compat.h>
#include <stdint.h>

#ifdef __cplusplus
extern "C" {
#endif

/*
 * Most powerpc machines have 128 bytes cache lines, but to make sure
 * there is no false sharing on all known Power hardware, use the
 * largest known cache line size, which is the physical size of POWER5
 * L3 cache lines (256 bytes).
 *
 * "Each slice [of the L3] is 12-way set-associative, with 4,096
 * congruence classes of 256-byte lines managed as two 128-byte sectors
 * to match the L2 line size."
 *
 * From: "POWER5 system microarchitecture",
 *       IBM Journal of Research & Development,
 *       vol. 49, no. 4/5, July/September 2005
 *       https://www.eecg.utoronto.ca/~moshovos/ACA08/readings/power5.pdf
 *
 * This value is a compile-time constant, which prevents us from
 * querying the processor for the cache line size at runtime. We
 * therefore need to be pessimistic and assume the largest known cache
 * line size.
 *
 * This value is exposed through public headers, so tuning it for
 * specific environments is a concern for ABI compatibility between
 * applications and liburcu.
 */
#define CAA_CACHE_LINE_SIZE	256

#ifdef __NO_LWSYNC__
#define LWSYNC_OPCODE	"sync\n"
#else
#define LWSYNC_OPCODE	"lwsync\n"
#endif

/*
 * Use sync for all cmm_mb/rmb/wmb barriers because lwsync does not
 * preserve ordering of cacheable vs. non-cacheable accesses, so it
 * should not be used to order with respect to MMIO operations.  An
 * eieio+lwsync pair is also not enough for cmm_rmb, because it will
 * order cacheable and non-cacheable memory operations separately---i.e.
 * not the latter against the former.
 */
#define cmm_mb()         __asm__ __volatile__ ("sync":::"memory")

/*
 * lwsync orders loads in cacheable memory with respect to other loads,
 * and stores in cacheable memory with respect to other stores.
 * Therefore, use it for barriers ordering accesses to cacheable memory
 * only.
 */
#define cmm_smp_rmb()    __asm__ __volatile__ (LWSYNC_OPCODE:::"memory")
#define cmm_smp_wmb()    __asm__ __volatile__ (LWSYNC_OPCODE:::"memory")

#define mftbl()						\
	__extension__					\
	({ 						\
		unsigned long rval;			\
		__asm__ __volatile__ ("mftb %0" : "=r" (rval));	\
		rval;					\
	})

#define mftbu()						\
	__extension__					\
	({						\
		unsigned long rval;			\
		__asm__ __volatile__ ("mftbu %0" : "=r" (rval));	\
		rval;					\
	})

#define mftb()						\
	__extension__					\
	({						\
		unsigned long long rval;		\
		__asm__ __volatile__ ("mftb %0" : "=r" (rval));		\
		rval;					\
	})

#define HAS_CAA_GET_CYCLES

typedef uint64_t caa_cycles_t;

#ifdef __powerpc64__
static inline caa_cycles_t caa_get_cycles(void)
{
	return (caa_cycles_t) mftb();
}
#else
static inline caa_cycles_t caa_get_cycles(void)
{
	unsigned long h, l;

	for (;;) {
		h = mftbu();
		cmm_barrier();
		l = mftbl();
		cmm_barrier();
		if (mftbu() == h)
			return (((caa_cycles_t) h) << 32) + l;
	}
}
#endif

/*
 * On Linux, define the membarrier system call number if not yet available in
 * the system headers.
 */
#if (defined(__linux__) && !defined(__NR_membarrier))
#define __NR_membarrier		365
#endif

#ifdef __cplusplus
}
#endif

#include <urcu/arch/generic.h>

#endif /* _URCU_ARCH_PPC_H */
Commit	Line	Data
	1	// SPDX-FileCopyrightText: 2009 Paul E. McKenney, IBM Corporation.
	2	// SPDX-FileCopyrightText: 2009 Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
	3	//
	4	// SPDX-License-Identifier: LGPL-2.1-or-later
	5
	6	#ifndef _URCU_ARCH_PPC_H
	7	#define _URCU_ARCH_PPC_H
	8
	9	/*
	10	* arch_ppc.h: trivial definitions for the powerpc architecture.
	11	*/
	12
	13	#include <urcu/compiler.h>
	14	#include <urcu/config.h>
	15	#include <urcu/syscall-compat.h>
	16	#include <stdint.h>
	17
	18	#ifdef __cplusplus
	19	extern "C" {
	20	#endif
	21
	22	/*
	23	* Most powerpc machines have 128 bytes cache lines, but to make sure
	24	* there is no false sharing on all known Power hardware, use the
	25	* largest known cache line size, which is the physical size of POWER5
	26	* L3 cache lines (256 bytes).
	27	*
	28	* "Each slice [of the L3] is 12-way set-associative, with 4,096
	29	* congruence classes of 256-byte lines managed as two 128-byte sectors
	30	* to match the L2 line size."
	31	*
	32	* From: "POWER5 system microarchitecture",
	33	* IBM Journal of Research & Development,
	34	* vol. 49, no. 4/5, July/September 2005
	35	* https://www.eecg.utoronto.ca/~moshovos/ACA08/readings/power5.pdf
	36	*
	37	* This value is a compile-time constant, which prevents us from
	38	* querying the processor for the cache line size at runtime. We
	39	* therefore need to be pessimistic and assume the largest known cache
	40	* line size.
	41	*
	42	* This value is exposed through public headers, so tuning it for
	43	* specific environments is a concern for ABI compatibility between
	44	* applications and liburcu.
	45	*/
	46	#define CAA_CACHE_LINE_SIZE 256
	47
	48	#ifdef __NO_LWSYNC__
	49	#define LWSYNC_OPCODE "sync\n"
	50	#else
	51	#define LWSYNC_OPCODE "lwsync\n"
	52	#endif
	53
	54	/*
	55	* Use sync for all cmm_mb/rmb/wmb barriers because lwsync does not
	56	* preserve ordering of cacheable vs. non-cacheable accesses, so it
	57	* should not be used to order with respect to MMIO operations. An
	58	* eieio+lwsync pair is also not enough for cmm_rmb, because it will
	59	* order cacheable and non-cacheable memory operations separately---i.e.
	60	* not the latter against the former.
	61	*/
	62	#define cmm_mb() __asm__ __volatile__ ("sync":::"memory")
	63
	64	/*
	65	* lwsync orders loads in cacheable memory with respect to other loads,
	66	* and stores in cacheable memory with respect to other stores.
	67	* Therefore, use it for barriers ordering accesses to cacheable memory
	68	* only.
	69	*/
	70	#define cmm_smp_rmb() __asm__ __volatile__ (LWSYNC_OPCODE:::"memory")
	71	#define cmm_smp_wmb() __asm__ __volatile__ (LWSYNC_OPCODE:::"memory")
	72
	73	#define mftbl() \
	74	__extension__ \
	75	({ \
	76	unsigned long rval; \
	77	__asm__ __volatile__ ("mftb %0" : "=r" (rval)); \
	78	rval; \
	79	})
	80
	81	#define mftbu() \
	82	__extension__ \
	83	({ \
	84	unsigned long rval; \
	85	__asm__ __volatile__ ("mftbu %0" : "=r" (rval)); \
	86	rval; \
	87	})
	88
	89	#define mftb() \
	90	__extension__ \
	91	({ \
	92	unsigned long long rval; \
	93	__asm__ __volatile__ ("mftb %0" : "=r" (rval)); \
	94	rval; \
	95	})
	96
	97	#define HAS_CAA_GET_CYCLES
	98
	99	typedef uint64_t caa_cycles_t;
	100
	101	#ifdef __powerpc64__
	102	static inline caa_cycles_t caa_get_cycles(void)
	103	{
	104	return (caa_cycles_t) mftb();
	105	}
	106	#else
	107	static inline caa_cycles_t caa_get_cycles(void)
	108	{
	109	unsigned long h, l;
	110
	111	for (;;) {
	112	h = mftbu();
	113	cmm_barrier();
	114	l = mftbl();
	115	cmm_barrier();
	116	if (mftbu() == h)
	117	return (((caa_cycles_t) h) << 32) + l;
	118	}
	119	}
	120	#endif
	121
	122	/*
	123	* On Linux, define the membarrier system call number if not yet available in
	124	* the system headers.
	125	*/
	126	#if (defined(__linux__) && !defined(__NR_membarrier))
	127	#define __NR_membarrier 365
	128	#endif
	129
	130	#ifdef __cplusplus
	131	}
	132	#endif
	133
	134	#include <urcu/arch/generic.h>
	135
	136	#endif /* _URCU_ARCH_PPC_H */