This is the second patch for reducing namespace pollution.
The caa_ prefix stands for Concurrent Architecture Abstraction.
Again, suggested by Mathieu Desnoyers and Paul E. Mckenney.
Every define, macro and function specific to the architecture
abstraction of liburcu is modified with that prefix
Signed-off-by: David Goulet <david.goulet@polymtl.ca>
Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
* Machine parameters.
*/
-/* #define CACHE_LINE_SIZE 64 */
+/* #define CAA_CACHE_LINE_SIZE 64 */
#define ____cacheline_internodealigned_in_smp \
__attribute__((__aligned__(1 << 6)))
* Default machine parameters.
*/
-#ifndef CACHE_LINE_SIZE
-/* #define CACHE_LINE_SIZE 128 */
-#endif /* #ifndef CACHE_LINE_SIZE */
+#ifndef CAA_CACHE_LINE_SIZE
+/* #define CAA_CACHE_LINE_SIZE 128 */
+#endif /* #ifndef CAA_CACHE_LINE_SIZE */
/*
* Exclusive locking primitives.
#define DEFINE_PER_THREAD(type, name) \
struct { \
__typeof__(type) v \
- __attribute__((__aligned__(CACHE_LINE_SIZE))); \
+ __attribute__((__aligned__(CAA_CACHE_LINE_SIZE))); \
} __per_thread_##name[NR_THREADS];
#define DECLARE_PER_THREAD(type, name) extern DEFINE_PER_THREAD(type, name)
#define DEFINE_PER_CPU(type, name) \
struct { \
__typeof__(type) v \
- __attribute__((__aligned__(CACHE_LINE_SIZE))); \
+ __attribute__((__aligned__(CAA_CACHE_LINE_SIZE))); \
} __per_cpu_##name[NR_CPUS]
#define DECLARE_PER_CPU(type, name) extern DEFINE_PER_CPU(type, name)
* @member: the name of the list_struct within the struct.
*/
#define list_entry(ptr, type, member) \
- container_of(ptr, type, member)
+ caa_container_of(ptr, type, member)
/**
* list_first_entry - get the first element from a list
old->first = NULL;
}
-#define hlist_entry(ptr, type, member) container_of(ptr,type,member)
+#define hlist_entry(ptr, type, member) caa_container_of(ptr,type,member)
#define hlist_for_each(pos, head) \
for (pos = (head)->first; pos && ({ prefetch(pos->next); 1; }); \
#define CONFIG_PPC64
-/*#define CACHE_LINE_SIZE 128 */
+/*#define CAA_CACHE_LINE_SIZE 128 */
#define ____cacheline_internodealigned_in_smp \
__attribute__((__aligned__(1 << 7)))
* Default machine parameters.
*/
-#ifndef CACHE_LINE_SIZE
-/* #define CACHE_LINE_SIZE 128 */
-#endif /* #ifndef CACHE_LINE_SIZE */
+#ifndef CAA_CACHE_LINE_SIZE
+/* #define CAA_CACHE_LINE_SIZE 128 */
+#endif /* #ifndef CAA_CACHE_LINE_SIZE */
/*
* Exclusive locking primitives.
#define DEFINE_PER_THREAD(type, name) \
struct { \
__typeof__(type) v \
- __attribute__((__aligned__(CACHE_LINE_SIZE))); \
+ __attribute__((__aligned__(CAA_CACHE_LINE_SIZE))); \
} __per_thread_##name[NR_THREADS];
#define DECLARE_PER_THREAD(type, name) extern DEFINE_PER_THREAD(type, name)
#define DEFINE_PER_CPU(type, name) \
struct { \
__typeof__(type) v \
- __attribute__((__aligned__(CACHE_LINE_SIZE))); \
+ __attribute__((__aligned__(CAA_CACHE_LINE_SIZE))); \
} __per_cpu_##name[NR_CPUS]
#define DECLARE_PER_CPU(type, name) extern DEFINE_PER_CPU(type, name)
* @member: the name of the list_struct within the struct.
*/
#define list_entry(ptr, type, member) \
- container_of(ptr, type, member)
+ caa_container_of(ptr, type, member)
/**
* list_first_entry - get the first element from a list
old->first = NULL;
}
-#define hlist_entry(ptr, type, member) container_of(ptr,type,member)
+#define hlist_entry(ptr, type, member) caa_container_of(ptr,type,member)
#define hlist_for_each(pos, head) \
for (pos = (head)->first; pos && ({ prefetch(pos->next); 1; }); \
* Machine parameters.
*/
-/* #define CACHE_LINE_SIZE 64 */
+/* #define CAA_CACHE_LINE_SIZE 64 */
#define ____cacheline_internodealigned_in_smp \
__attribute__((__aligned__(1 << 6)))
* Default machine parameters.
*/
-#ifndef CACHE_LINE_SIZE
-/* #define CACHE_LINE_SIZE 128 */
-#endif /* #ifndef CACHE_LINE_SIZE */
+#ifndef CAA_CACHE_LINE_SIZE
+/* #define CAA_CACHE_LINE_SIZE 128 */
+#endif /* #ifndef CAA_CACHE_LINE_SIZE */
/*
* Exclusive locking primitives.
#define DEFINE_PER_THREAD(type, name) \
struct { \
__typeof__(type) v \
- __attribute__((__aligned__(CACHE_LINE_SIZE))); \
+ __attribute__((__aligned__(CAA_CACHE_LINE_SIZE))); \
} __per_thread_##name[NR_THREADS];
#define DECLARE_PER_THREAD(type, name) extern DEFINE_PER_THREAD(type, name)
#define DEFINE_PER_CPU(type, name) \
struct { \
__typeof__(type) v \
- __attribute__((__aligned__(CACHE_LINE_SIZE))); \
+ __attribute__((__aligned__(CAA_CACHE_LINE_SIZE))); \
} __per_cpu_##name[NR_CPUS]
#define DECLARE_PER_CPU(type, name) extern DEFINE_PER_CPU(type, name)
* @member: the name of the list_struct within the struct.
*/
#define list_entry(ptr, type, member) \
- container_of(ptr, type, member)
+ caa_container_of(ptr, type, member)
/**
* list_first_entry - get the first element from a list
old->first = NULL;
}
-#define hlist_entry(ptr, type, member) container_of(ptr,type,member)
+#define hlist_entry(ptr, type, member) caa_container_of(ptr,type,member)
#define hlist_for_each(pos, head) \
for (pos = (head)->first; pos && ({ prefetch(pos->next); 1; }); \
#define GOFLAG_RUN 1
#define GOFLAG_STOP 2
-int goflag __attribute__((__aligned__(CACHE_LINE_SIZE))) = GOFLAG_INIT;
+int goflag __attribute__((__aligned__(CAA_CACHE_LINE_SIZE))) = GOFLAG_INIT;
#define RCU_READ_RUN 1000
static inline void loop_sleep(unsigned long l)
{
while(l-- != 0)
- cpu_relax();
+ caa_cpu_relax();
}
int main()
{
cycles_t time1, time2;
- time1 = get_cycles();
+ time1 = caa_get_cycles();
loop_sleep(NR_LOOPS);
- time2 = get_cycles();
+ time2 = caa_get_cycles();
printf("CPU clock cycles per loop: %g\n", (time2 - time1) /
(double)NR_LOOPS);
}
static inline void loop_sleep(unsigned long l)
{
while(l-- != 0)
- cpu_relax();
+ caa_cpu_relax();
}
#define LOOPS 1048576
double cpl;
for (i = 0; i < TESTS; i++) {
- time1 = get_cycles();
+ time1 = caa_get_cycles();
loop_sleep(LOOPS);
- time2 = get_cycles();
+ time2 = caa_get_cycles();
time_tot += time2 - time1;
}
cpl = ((double)time_tot) / (double)TESTS / (double)LOOPS;
static inline void loop_sleep(unsigned long l)
{
while(l-- != 0)
- cpu_relax();
+ caa_cpu_relax();
}
static int verbose_mode;
static unsigned long long __thread nr_reads;
static
-unsigned long long __attribute__((aligned(CACHE_LINE_SIZE))) *tot_nr_writes;
+unsigned long long __attribute__((aligned(CAA_CACHE_LINE_SIZE))) *tot_nr_writes;
static
-unsigned long long __attribute__((aligned(CACHE_LINE_SIZE))) *tot_nr_reads;
+unsigned long long __attribute__((aligned(CAA_CACHE_LINE_SIZE))) *tot_nr_reads;
static unsigned int nr_readers;
static unsigned int nr_writers;
struct per_thread_lock {
pthread_mutex_t lock;
-} __attribute__((aligned(CACHE_LINE_SIZE))); /* cache-line aligned */
+} __attribute__((aligned(CAA_CACHE_LINE_SIZE))); /* cache-line aligned */
static struct per_thread_lock *per_thread_lock;
static inline void loop_sleep(unsigned long l)
{
while(l-- != 0)
- cpu_relax();
+ caa_cpu_relax();
}
static int verbose_mode;
static unsigned long long __thread nr_reads;
static
-unsigned long long __attribute__((aligned(CACHE_LINE_SIZE))) *tot_nr_writes;
+unsigned long long __attribute__((aligned(CAA_CACHE_LINE_SIZE))) *tot_nr_writes;
static
-unsigned long long __attribute__((aligned(CACHE_LINE_SIZE))) *tot_nr_reads;
+unsigned long long __attribute__((aligned(CAA_CACHE_LINE_SIZE))) *tot_nr_reads;
static unsigned int nr_readers;
static unsigned int nr_writers;
struct per_thread_lock {
pthread_mutex_t lock;
-} __attribute__((aligned(CACHE_LINE_SIZE))); /* cache-line aligned */
+} __attribute__((aligned(CAA_CACHE_LINE_SIZE))); /* cache-line aligned */
static struct per_thread_lock *per_thread_lock;
#define NR_READ num_read
#define NR_WRITE num_write
-static cycles_t __attribute__((aligned(CACHE_LINE_SIZE))) *reader_time;
-static cycles_t __attribute__((aligned(CACHE_LINE_SIZE))) *writer_time;
+static cycles_t __attribute__((aligned(CAA_CACHE_LINE_SIZE))) *reader_time;
+static cycles_t __attribute__((aligned(CAA_CACHE_LINE_SIZE))) *writer_time;
void *thr_reader(void *arg)
{
"reader", pthread_self(), (unsigned long)gettid());
sleep(2);
- time1 = get_cycles();
+ time1 = caa_get_cycles();
for (i = 0; i < OUTER_READ_LOOP; i++) {
for (j = 0; j < INNER_READ_LOOP; j++) {
pthread_mutex_lock(&per_thread_lock[tidx].lock);
pthread_mutex_unlock(&per_thread_lock[tidx].lock);
}
}
- time2 = get_cycles();
+ time2 = caa_get_cycles();
reader_time[tidx] = time2 - time1;
for (i = 0; i < OUTER_WRITE_LOOP; i++) {
for (j = 0; j < INNER_WRITE_LOOP; j++) {
- time1 = get_cycles();
+ time1 = caa_get_cycles();
for (tidx = 0; tidx < NR_READ; tidx++) {
pthread_mutex_lock(&per_thread_lock[tidx].lock);
}
for (tidx = NR_READ - 1; tidx >= 0; tidx--) {
pthread_mutex_unlock(&per_thread_lock[tidx].lock);
}
- time2 = get_cycles();
+ time2 = caa_get_cycles();
writer_time[(unsigned long)arg] += time2 - time1;
usleep(1);
}
static inline void loop_sleep(unsigned long l)
{
while(l-- != 0)
- cpu_relax();
+ caa_cpu_relax();
}
static int verbose_mode;
static inline void loop_sleep(unsigned long l)
{
while(l-- != 0)
- cpu_relax();
+ caa_cpu_relax();
}
static int verbose_mode;
pthread_mutex_t rcu_copy_mutex = PTHREAD_MUTEX_INITIALIZER;
static
-unsigned long long __attribute__((aligned(CACHE_LINE_SIZE))) *tot_nr_writes;
+unsigned long long __attribute__((aligned(CAA_CACHE_LINE_SIZE))) *tot_nr_writes;
void rcu_copy_mutex_lock(void)
tot_nr_writes = malloc(sizeof(*tot_nr_writes) * nr_writers);
pending_reclaims = malloc(sizeof(*pending_reclaims) * nr_writers);
if (reclaim_batch * sizeof(*pending_reclaims[i].queue)
- < CACHE_LINE_SIZE)
+ < CAA_CACHE_LINE_SIZE)
for (i = 0; i < nr_writers; i++)
- pending_reclaims[i].queue = calloc(1, CACHE_LINE_SIZE);
+ pending_reclaims[i].queue = calloc(1, CAA_CACHE_LINE_SIZE);
else
for (i = 0; i < nr_writers; i++)
pending_reclaims[i].queue = calloc(reclaim_batch,
#define NR_READ num_read
#define NR_WRITE num_write
-static cycles_t __attribute__((aligned(CACHE_LINE_SIZE))) *reader_time;
-static cycles_t __attribute__((aligned(CACHE_LINE_SIZE))) *writer_time;
+static cycles_t __attribute__((aligned(CAA_CACHE_LINE_SIZE))) *reader_time;
+static cycles_t __attribute__((aligned(CAA_CACHE_LINE_SIZE))) *writer_time;
void *thr_reader(void *arg)
{
rcu_register_thread();
- time1 = get_cycles();
+ time1 = caa_get_cycles();
for (i = 0; i < OUTER_READ_LOOP; i++) {
for (j = 0; j < INNER_READ_LOOP; j++) {
_rcu_read_lock();
}
_rcu_quiescent_state();
}
- time2 = get_cycles();
+ time2 = caa_get_cycles();
rcu_unregister_thread();
for (i = 0; i < OUTER_WRITE_LOOP; i++) {
for (j = 0; j < INNER_WRITE_LOOP; j++) {
- time1 = get_cycles();
+ time1 = caa_get_cycles();
new = malloc(sizeof(struct test_array));
rcu_copy_mutex_lock();
old = test_rcu_pointer;
old->a = 0;
}
free(old);
- time2 = get_cycles();
+ time2 = caa_get_cycles();
writer_time[(unsigned long)arg] += time2 - time1;
usleep(1);
}
static inline void loop_sleep(unsigned long l)
{
while(l-- != 0)
- cpu_relax();
+ caa_cpu_relax();
}
static int verbose_mode;
#define NR_READ num_read
#define NR_WRITE num_write
-static cycles_t __attribute__((aligned(CACHE_LINE_SIZE))) *reader_time;
-static cycles_t __attribute__((aligned(CACHE_LINE_SIZE))) *writer_time;
+static cycles_t __attribute__((aligned(CAA_CACHE_LINE_SIZE))) *reader_time;
+static cycles_t __attribute__((aligned(CAA_CACHE_LINE_SIZE))) *writer_time;
void *thr_reader(void *arg)
{
"reader", pthread_self(), (unsigned long)gettid());
sleep(2);
- time1 = get_cycles();
+ time1 = caa_get_cycles();
for (i = 0; i < OUTER_READ_LOOP; i++) {
for (j = 0; j < INNER_READ_LOOP; j++) {
pthread_rwlock_rdlock(&lock);
pthread_rwlock_unlock(&lock);
}
}
- time2 = get_cycles();
+ time2 = caa_get_cycles();
reader_time[(unsigned long)arg] = time2 - time1;
for (i = 0; i < OUTER_WRITE_LOOP; i++) {
for (j = 0; j < INNER_WRITE_LOOP; j++) {
- time1 = get_cycles();
+ time1 = caa_get_cycles();
pthread_rwlock_wrlock(&lock);
test_array.a = 8;
pthread_rwlock_unlock(&lock);
- time2 = get_cycles();
+ time2 = caa_get_cycles();
writer_time[(unsigned long)arg] += time2 - time1;
usleep(1);
}
static inline void loop_sleep(unsigned long l)
{
while(l-- != 0)
- cpu_relax();
+ caa_cpu_relax();
}
static int verbose_mode;
static inline void loop_sleep(unsigned long l)
{
while(l-- != 0)
- cpu_relax();
+ caa_cpu_relax();
}
static int verbose_mode;
static inline void loop_sleep(unsigned long l)
{
while(l-- != 0)
- cpu_relax();
+ caa_cpu_relax();
}
static int verbose_mode;
static inline void loop_sleep(unsigned long l)
{
while(l-- != 0)
- cpu_relax();
+ caa_cpu_relax();
}
static int verbose_mode;
static unsigned long long __thread nr_reads;
static
-unsigned long long __attribute__((aligned(CACHE_LINE_SIZE))) *tot_nr_writes;
+unsigned long long __attribute__((aligned(CAA_CACHE_LINE_SIZE))) *tot_nr_writes;
static unsigned int nr_readers;
static unsigned int nr_writers;
static inline void loop_sleep(unsigned long l)
{
while(l-- != 0)
- cpu_relax();
+ caa_cpu_relax();
}
static int verbose_mode;
static unsigned long long __thread nr_reads;
static
-unsigned long long __attribute__((aligned(CACHE_LINE_SIZE))) *tot_nr_writes;
+unsigned long long __attribute__((aligned(CAA_CACHE_LINE_SIZE))) *tot_nr_writes;
static unsigned int nr_readers;
static unsigned int nr_writers;
tot_nr_writes = malloc(sizeof(*tot_nr_writes) * nr_writers);
pending_reclaims = malloc(sizeof(*pending_reclaims) * nr_writers);
if (reclaim_batch * sizeof(*pending_reclaims[i].queue)
- < CACHE_LINE_SIZE)
+ < CAA_CACHE_LINE_SIZE)
for (i = 0; i < nr_writers; i++)
- pending_reclaims[i].queue = calloc(1, CACHE_LINE_SIZE);
+ pending_reclaims[i].queue = calloc(1, CAA_CACHE_LINE_SIZE);
else
for (i = 0; i < nr_writers; i++)
pending_reclaims[i].queue = calloc(reclaim_batch,
static inline void loop_sleep(unsigned long l)
{
while(l-- != 0)
- cpu_relax();
+ caa_cpu_relax();
}
static int verbose_mode;
static void rcu_release_node(struct urcu_ref *ref)
{
- struct rcu_lfq_node *node = container_of(ref, struct rcu_lfq_node, ref);
+ struct rcu_lfq_node *node = caa_container_of(ref, struct rcu_lfq_node, ref);
defer_rcu(free, node);
//synchronize_rcu();
//free(node);
static void release_node(struct urcu_ref *ref)
{
- struct rcu_lfq_node *node = container_of(ref, struct rcu_lfq_node, ref);
+ struct rcu_lfq_node *node = caa_container_of(ref, struct rcu_lfq_node, ref);
free(node);
}
static inline void loop_sleep(unsigned long l)
{
while(l-- != 0)
- cpu_relax();
+ caa_cpu_relax();
}
static int verbose_mode;
#define NR_READ num_read
#define NR_WRITE num_write
-static cycles_t __attribute__((aligned(CACHE_LINE_SIZE))) *reader_time;
-static cycles_t __attribute__((aligned(CACHE_LINE_SIZE))) *writer_time;
+static cycles_t __attribute__((aligned(CAA_CACHE_LINE_SIZE))) *reader_time;
+static cycles_t __attribute__((aligned(CAA_CACHE_LINE_SIZE))) *writer_time;
void *thr_reader(void *arg)
{
rcu_register_thread();
- time1 = get_cycles();
+ time1 = caa_get_cycles();
for (i = 0; i < OUTER_READ_LOOP; i++) {
for (j = 0; j < INNER_READ_LOOP; j++) {
rcu_read_lock();
rcu_read_unlock();
}
}
- time2 = get_cycles();
+ time2 = caa_get_cycles();
rcu_unregister_thread();
for (i = 0; i < OUTER_WRITE_LOOP; i++) {
for (j = 0; j < INNER_WRITE_LOOP; j++) {
- time1 = get_cycles();
+ time1 = caa_get_cycles();
new = malloc(sizeof(struct test_array));
rcu_copy_mutex_lock();
old = test_rcu_pointer;
old->a = 0;
}
free(old);
- time2 = get_cycles();
+ time2 = caa_get_cycles();
writer_time[(unsigned long)arg] += time2 - time1;
usleep(1);
}
static inline void loop_sleep(unsigned long l)
{
while(l-- != 0)
- cpu_relax();
+ caa_cpu_relax();
}
static int verbose_mode;
static inline void loop_sleep(unsigned long l)
{
while(l-- != 0)
- cpu_relax();
+ caa_cpu_relax();
}
static int verbose_mode;
/* Data used by both reader and synchronize_rcu() */
long ctr;
/* Data used for registry */
- struct list_head node __attribute__((aligned(CACHE_LINE_SIZE)));
+ struct list_head node __attribute__((aligned(CAA_CACHE_LINE_SIZE)));
pthread_t tid;
int alloc; /* registry entry allocated */
};
* Make sure both tests below are done on the same version of *value
* to insure consistency.
*/
- v = LOAD_SHARED(*value);
+ v = CAA_LOAD_SHARED(*value);
return (v & RCU_GP_CTR_NEST_MASK) &&
((v ^ rcu_gp_ctr) & RCU_GP_CTR_PHASE);
}
* RCU_GP_COUNT | (~RCU_GP_CTR_PHASE or RCU_GP_CTR_PHASE)
*/
if (likely(!(tmp & RCU_GP_CTR_NEST_MASK))) {
- _STORE_SHARED(rcu_reader->ctr, _LOAD_SHARED(rcu_gp_ctr));
+ _CAA_STORE_SHARED(rcu_reader->ctr, _CAA_LOAD_SHARED(rcu_gp_ctr));
/*
* Set active readers count for outermost nesting level before
* accessing the pointer.
*/
cmm_smp_mb();
} else {
- _STORE_SHARED(rcu_reader->ctr, tmp + RCU_GP_COUNT);
+ _CAA_STORE_SHARED(rcu_reader->ctr, tmp + RCU_GP_COUNT);
}
}
* Finish using rcu before decrementing the pointer.
*/
cmm_smp_mb();
- _STORE_SHARED(rcu_reader->ctr, rcu_reader->ctr - RCU_GP_COUNT);
+ _CAA_STORE_SHARED(rcu_reader->ctr, rcu_reader->ctr - RCU_GP_COUNT);
cmm_barrier(); /* Ensure the compiler does not reorder us with mutex */
}
struct rcu_reader *index, *tmp;
/* Switch parity: 0 -> 1, 1 -> 0 */
- STORE_SHARED(rcu_gp_ctr, rcu_gp_ctr ^ RCU_GP_CTR_PHASE);
+ CAA_STORE_SHARED(rcu_gp_ctr, rcu_gp_ctr ^ RCU_GP_CTR_PHASE);
/*
* Must commit qparity update to memory before waiting for other parity
* quiescent state. Failure to do so could result in the writer waiting
* forever while new readers are always accessing data (no progress).
- * Ensured by STORE_SHARED and LOAD_SHARED.
+ * Ensured by CAA_STORE_SHARED and CAA_LOAD_SHARED.
*/
/*
if (wait_loops == RCU_QS_ACTIVE_ATTEMPTS)
usleep(RCU_SLEEP_DELAY);
else
- cpu_relax();
+ caa_cpu_relax();
}
}
/* put back the reader list in the registry */
mutex_lock(&rcu_defer_mutex);
list_for_each_entry(index, ®istry, list) {
- head = LOAD_SHARED(index->head);
+ head = CAA_LOAD_SHARED(index->head);
num_items += head - index->tail;
}
mutex_unlock(&rcu_defer_mutex);
for (i = queue->tail; i != head;) {
cmm_smp_rmb(); /* read head before q[]. */
- p = LOAD_SHARED(queue->q[i++ & DEFER_QUEUE_MASK]);
+ p = CAA_LOAD_SHARED(queue->q[i++ & DEFER_QUEUE_MASK]);
if (unlikely(DQ_IS_FCT_BIT(p))) {
DQ_CLEAR_FCT_BIT(p);
queue->last_fct_out = p;
- p = LOAD_SHARED(queue->q[i++ & DEFER_QUEUE_MASK]);
+ p = CAA_LOAD_SHARED(queue->q[i++ & DEFER_QUEUE_MASK]);
} else if (unlikely(p == DQ_FCT_MARK)) {
- p = LOAD_SHARED(queue->q[i++ & DEFER_QUEUE_MASK]);
+ p = CAA_LOAD_SHARED(queue->q[i++ & DEFER_QUEUE_MASK]);
queue->last_fct_out = p;
- p = LOAD_SHARED(queue->q[i++ & DEFER_QUEUE_MASK]);
+ p = CAA_LOAD_SHARED(queue->q[i++ & DEFER_QUEUE_MASK]);
}
fct = queue->last_fct_out;
fct(p);
}
cmm_smp_mb(); /* push tail after having used q[] */
- STORE_SHARED(queue->tail, i);
+ CAA_STORE_SHARED(queue->tail, i);
}
static void _rcu_defer_barrier_thread(void)
mutex_lock(&rcu_defer_mutex);
list_for_each_entry(index, ®istry, list) {
- index->last_head = LOAD_SHARED(index->head);
+ index->last_head = CAA_LOAD_SHARED(index->head);
num_items += index->last_head - index->tail;
}
if (likely(!num_items)) {
* thread.
*/
head = defer_queue.head;
- tail = LOAD_SHARED(defer_queue.tail);
+ tail = CAA_LOAD_SHARED(defer_queue.tail);
/*
* If queue is full, or reached threshold. Empty queue ourself.
if (unlikely(head - tail >= DEFER_QUEUE_SIZE - 2)) {
assert(head - tail <= DEFER_QUEUE_SIZE);
rcu_defer_barrier_thread();
- assert(head - LOAD_SHARED(defer_queue.tail) == 0);
+ assert(head - CAA_LOAD_SHARED(defer_queue.tail) == 0);
}
if (unlikely(defer_queue.last_fct_in != fct)) {
* marker, write DQ_FCT_MARK followed by the function
* pointer.
*/
- _STORE_SHARED(defer_queue.q[head++ & DEFER_QUEUE_MASK],
+ _CAA_STORE_SHARED(defer_queue.q[head++ & DEFER_QUEUE_MASK],
DQ_FCT_MARK);
- _STORE_SHARED(defer_queue.q[head++ & DEFER_QUEUE_MASK],
+ _CAA_STORE_SHARED(defer_queue.q[head++ & DEFER_QUEUE_MASK],
fct);
} else {
DQ_SET_FCT_BIT(fct);
- _STORE_SHARED(defer_queue.q[head++ & DEFER_QUEUE_MASK],
+ _CAA_STORE_SHARED(defer_queue.q[head++ & DEFER_QUEUE_MASK],
fct);
}
} else {
* If the data to encode is not aligned or the marker,
* write DQ_FCT_MARK followed by the function pointer.
*/
- _STORE_SHARED(defer_queue.q[head++ & DEFER_QUEUE_MASK],
+ _CAA_STORE_SHARED(defer_queue.q[head++ & DEFER_QUEUE_MASK],
DQ_FCT_MARK);
- _STORE_SHARED(defer_queue.q[head++ & DEFER_QUEUE_MASK],
+ _CAA_STORE_SHARED(defer_queue.q[head++ & DEFER_QUEUE_MASK],
fct);
}
}
- _STORE_SHARED(defer_queue.q[head++ & DEFER_QUEUE_MASK], p);
+ _CAA_STORE_SHARED(defer_queue.q[head++ & DEFER_QUEUE_MASK], p);
cmm_smp_wmb(); /* Publish new pointer before head */
/* Write q[] before head. */
- STORE_SHARED(defer_queue.head, head);
+ CAA_STORE_SHARED(defer_queue.head, head);
cmm_smp_mb(); /* Write queue head before read futex */
/*
* Wake-up any waiting defer thread.
* Inserts memory barriers on architectures that require them (currently only
* Alpha) and documents which pointers are protected by RCU.
*
- * The compiler memory barrier in LOAD_SHARED() ensures that value-speculative
+ * The compiler memory barrier in CAA_LOAD_SHARED() ensures that value-speculative
* optimizations (e.g. VSS: Value Speculation Scheduling) does not perform the
* data read before the pointer read by speculating the value of the pointer.
* Correct ordering is ensured because the pointer is read as a volatile access.
*/
#define _rcu_dereference(p) ({ \
- typeof(p) _________p1 = LOAD_SHARED(p); \
+ typeof(p) _________p1 = CAA_LOAD_SHARED(p); \
cmm_smp_read_barrier_depends(); \
(_________p1); \
})
/* Data used by both reader and synchronize_rcu() */
unsigned long ctr;
/* Data used for registry */
- struct list_head node __attribute__((aligned(CACHE_LINE_SIZE)));
+ struct list_head node __attribute__((aligned(CAA_CACHE_LINE_SIZE)));
pthread_t tid;
};
{
unsigned long v;
- v = LOAD_SHARED(*ctr);
+ v = CAA_LOAD_SHARED(*ctr);
return v && (v != rcu_gp_ctr);
}
static inline void _rcu_quiescent_state(void)
{
cmm_smp_mb();
- _STORE_SHARED(rcu_reader.ctr, _LOAD_SHARED(rcu_gp_ctr));
+ _CAA_STORE_SHARED(rcu_reader.ctr, _CAA_LOAD_SHARED(rcu_gp_ctr));
cmm_smp_mb(); /* write rcu_reader.ctr before read futex */
wake_up_gp();
cmm_smp_mb();
static inline void _rcu_thread_offline(void)
{
cmm_smp_mb();
- STORE_SHARED(rcu_reader.ctr, 0);
+ CAA_STORE_SHARED(rcu_reader.ctr, 0);
cmm_smp_mb(); /* write rcu_reader.ctr before read futex */
wake_up_gp();
cmm_barrier(); /* Ensure the compiler does not reorder us with mutex */
static inline void _rcu_thread_online(void)
{
cmm_barrier(); /* Ensure the compiler does not reorder us with mutex */
- _STORE_SHARED(rcu_reader.ctr, LOAD_SHARED(rcu_gp_ctr));
+ _CAA_STORE_SHARED(rcu_reader.ctr, CAA_LOAD_SHARED(rcu_gp_ctr));
cmm_smp_mb();
}
#if (BITS_PER_LONG < 64)
/* Switch parity: 0 -> 1, 1 -> 0 */
- STORE_SHARED(rcu_gp_ctr, rcu_gp_ctr ^ RCU_GP_CTR);
+ CAA_STORE_SHARED(rcu_gp_ctr, rcu_gp_ctr ^ RCU_GP_CTR);
#else /* !(BITS_PER_LONG < 64) */
/* Increment current G.P. */
- STORE_SHARED(rcu_gp_ctr, rcu_gp_ctr + RCU_GP_CTR);
+ CAA_STORE_SHARED(rcu_gp_ctr, rcu_gp_ctr + RCU_GP_CTR);
#endif /* !(BITS_PER_LONG < 64) */
/*
wait_gp();
} else {
#ifndef HAS_INCOHERENT_CACHES
- cpu_relax();
+ caa_cpu_relax();
#else /* #ifndef HAS_INCOHERENT_CACHES */
cmm_smp_mb();
#endif /* #else #ifndef HAS_INCOHERENT_CACHES */
* threads registered as readers.
*/
if (was_online)
- STORE_SHARED(rcu_reader.ctr, 0);
+ CAA_STORE_SHARED(rcu_reader.ctr, 0);
mutex_lock(&rcu_gp_lock);
* freed.
*/
if (was_online)
- _STORE_SHARED(rcu_reader.ctr, LOAD_SHARED(rcu_gp_ctr));
+ _CAA_STORE_SHARED(rcu_reader.ctr, CAA_LOAD_SHARED(rcu_gp_ctr));
cmm_smp_mb();
}
#else /* !(BITS_PER_LONG < 64) */
*/
cmm_smp_mb();
if (was_online)
- STORE_SHARED(rcu_reader.ctr, 0);
+ CAA_STORE_SHARED(rcu_reader.ctr, 0);
mutex_lock(&rcu_gp_lock);
if (list_empty(®istry))
mutex_unlock(&rcu_gp_lock);
if (was_online)
- _STORE_SHARED(rcu_reader.ctr, LOAD_SHARED(rcu_gp_ctr));
+ _CAA_STORE_SHARED(rcu_reader.ctr, CAA_LOAD_SHARED(rcu_gp_ctr));
cmm_smp_mb();
}
#endif /* !(BITS_PER_LONG < 64) */
unsigned long ctr;
char need_mb;
/* Data used for registry */
- struct list_head node __attribute__((aligned(CACHE_LINE_SIZE)));
+ struct list_head node __attribute__((aligned(CAA_CACHE_LINE_SIZE)));
pthread_t tid;
};
* Make sure both tests below are done on the same version of *value
* to insure consistency.
*/
- v = LOAD_SHARED(*ctr);
+ v = CAA_LOAD_SHARED(*ctr);
return (v & RCU_GP_CTR_NEST_MASK) &&
((v ^ rcu_gp_ctr) & RCU_GP_CTR_PHASE);
}
* RCU_GP_COUNT | (~RCU_GP_CTR_PHASE or RCU_GP_CTR_PHASE)
*/
if (likely(!(tmp & RCU_GP_CTR_NEST_MASK))) {
- _STORE_SHARED(rcu_reader.ctr, _LOAD_SHARED(rcu_gp_ctr));
+ _CAA_STORE_SHARED(rcu_reader.ctr, _CAA_LOAD_SHARED(rcu_gp_ctr));
/*
* Set active readers count for outermost nesting level before
* accessing the pointer. See smp_mb_master().
*/
smp_mb_slave(RCU_MB_GROUP);
} else {
- _STORE_SHARED(rcu_reader.ctr, tmp + RCU_GP_COUNT);
+ _CAA_STORE_SHARED(rcu_reader.ctr, tmp + RCU_GP_COUNT);
}
}
*/
if (likely((tmp & RCU_GP_CTR_NEST_MASK) == RCU_GP_COUNT)) {
smp_mb_slave(RCU_MB_GROUP);
- _STORE_SHARED(rcu_reader.ctr, rcu_reader.ctr - RCU_GP_COUNT);
+ _CAA_STORE_SHARED(rcu_reader.ctr, rcu_reader.ctr - RCU_GP_COUNT);
/* write rcu_reader.ctr before read futex */
smp_mb_slave(RCU_MB_GROUP);
wake_up_gp();
} else {
- _STORE_SHARED(rcu_reader.ctr, rcu_reader.ctr - RCU_GP_COUNT);
+ _CAA_STORE_SHARED(rcu_reader.ctr, rcu_reader.ctr - RCU_GP_COUNT);
}
cmm_barrier(); /* Ensure the compiler does not reorder us with mutex */
}
perror("Error in pthread mutex lock");
exit(-1);
}
- if (LOAD_SHARED(rcu_reader.need_mb)) {
+ if (CAA_LOAD_SHARED(rcu_reader.need_mb)) {
cmm_smp_mb();
- _STORE_SHARED(rcu_reader.need_mb, 0);
+ _CAA_STORE_SHARED(rcu_reader.need_mb, 0);
cmm_smp_mb();
}
poll(NULL,0,10);
* cache flush is enforced.
*/
list_for_each_entry(index, ®istry, node) {
- STORE_SHARED(index->need_mb, 1);
+ CAA_STORE_SHARED(index->need_mb, 1);
pthread_kill(index->tid, SIGRCU);
}
/*
* the Linux Test Project (LTP).
*/
list_for_each_entry(index, ®istry, node) {
- while (LOAD_SHARED(index->need_mb)) {
+ while (CAA_LOAD_SHARED(index->need_mb)) {
pthread_kill(index->tid, SIGRCU);
poll(NULL, 0, 1);
}
struct rcu_reader *index, *tmp;
/* Switch parity: 0 -> 1, 1 -> 0 */
- STORE_SHARED(rcu_gp_ctr, rcu_gp_ctr ^ RCU_GP_CTR_PHASE);
+ CAA_STORE_SHARED(rcu_gp_ctr, rcu_gp_ctr ^ RCU_GP_CTR_PHASE);
/*
* Must commit rcu_gp_ctr update to memory before waiting for quiescent
if (wait_loops == RCU_QS_ACTIVE_ATTEMPTS)
wait_gp();
else
- cpu_relax();
+ caa_cpu_relax();
}
#else /* #ifndef HAS_INCOHERENT_CACHES */
/*
wait_loops = 0;
break; /* only escape switch */
default:
- cpu_relax();
+ caa_cpu_relax();
}
}
#endif /* #else #ifndef HAS_INCOHERENT_CACHES */
* executed on.
*/
cmm_smp_mb();
- _STORE_SHARED(rcu_reader.need_mb, 0);
+ _CAA_STORE_SHARED(rcu_reader.need_mb, 0);
cmm_smp_mb();
}
extern "C" {
#endif
-#ifndef CACHE_LINE_SIZE
-#define CACHE_LINE_SIZE 64
+#ifndef CAA_CACHE_LINE_SIZE
+#define CAA_CACHE_LINE_SIZE 64
#endif
#if !defined(cmm_mc) && !defined(cmm_rmc) && !defined(cmm_wmc)
#define cmm_smp_read_barrier_depends()
#endif
-#ifndef cpu_relax
-#define cpu_relax() cmm_barrier()
+#ifndef caa_cpu_relax
+#define caa_cpu_relax() cmm_barrier()
#endif
#ifdef __cplusplus
#endif
/* Include size of POWER5+ L3 cache lines: 256 bytes */
-#define CACHE_LINE_SIZE 256
+#define CAA_CACHE_LINE_SIZE 256
#define cmm_mb() asm volatile("sync":::"memory")
extern "C" {
#endif
-#define CACHE_LINE_SIZE 128
+#define CAA_CACHE_LINE_SIZE 128
#define cmm_mb() __asm__ __volatile__("bcr 15,0" : : : "memory")
extern "C" {
#endif
-#define CACHE_LINE_SIZE 256
+#define CAA_CACHE_LINE_SIZE 256
/*
* Inspired from the Linux kernel. Workaround Spitfire bug #51.
extern "C" {
#endif
-#define CACHE_LINE_SIZE 128
+#define CAA_CACHE_LINE_SIZE 128
#ifdef CONFIG_RCU_HAVE_FENCE
#define cmm_mb() asm volatile("mfence":::"memory")
#define cmm_wmb() asm volatile("lock; addl $0,0(%%esp)"::: "memory")
#endif
-#define cpu_relax() asm volatile("rep; nop" : : : "memory");
+#define caa_cpu_relax() asm volatile("rep; nop" : : : "memory");
#define rdtscll(val) \
do { \
typedef unsigned long long cycles_t;
-static inline cycles_t get_cycles(void)
+static inline cycles_t caa_get_cycles(void)
{
cycles_t ret = 0;
/*
* Instruct the compiler to perform only a single access to a variable
* (prohibits merging and refetching). The compiler is also forbidden to reorder
- * successive instances of ACCESS_ONCE(), but only when the compiler is aware of
+ * successive instances of CAA_ACCESS_ONCE(), but only when the compiler is aware of
* particular ordering. Compiler ordering can be ensured, for example, by
- * putting two ACCESS_ONCE() in separate C statements.
+ * putting two CAA_ACCESS_ONCE() in separate C statements.
*
* This macro does absolutely -nothing- to prevent the CPU from reordering,
* merging, or refetching absolutely anything at any time. Its main intended
* use is to mediate communication between process-level code and irq/NMI
* handlers, all running on the same CPU.
*/
-#define ACCESS_ONCE(x) (*(volatile typeof(x) *)&(x))
+#define CAA_ACCESS_ONCE(x) (*(volatile typeof(x) *)&(x))
#ifndef max
#define max(a,b) ((a)>(b)?(a):(b))
#endif
#if defined(__SIZEOF_LONG__)
-#define BITS_PER_LONG (__SIZEOF_LONG__ * 8)
+#define CAA_BITS_PER_LONG (__SIZEOF_LONG__ * 8)
#elif defined(_LP64)
-#define BITS_PER_LONG 64
+#define CAA_BITS_PER_LONG 64
#else
-#define BITS_PER_LONG 32
+#define CAA_BITS_PER_LONG 32
#endif
-#define container_of(ptr, type, member) \
+#define caa_container_of(ptr, type, member) \
({ \
const typeof(((type *)NULL)->member) * __ptr = (ptr); \
(type *)((char *)__ptr - offsetof(type, member)); \
/*
* Identify a shared load. A cmm_smp_rmc() or cmm_smp_mc() should come before the load.
*/
-#define _LOAD_SHARED(p) ACCESS_ONCE(p)
+#define _CAA_LOAD_SHARED(p) CAA_ACCESS_ONCE(p)
/*
* Load a data from shared memory, doing a cache flush if required.
*/
-#define LOAD_SHARED(p) \
+#define CAA_LOAD_SHARED(p) \
({ \
cmm_smp_rmc(); \
- _LOAD_SHARED(p); \
+ _CAA_LOAD_SHARED(p); \
})
/*
* Identify a shared store. A cmm_smp_wmc() or cmm_smp_mc() should follow the store.
*/
-#define _STORE_SHARED(x, v) ({ ACCESS_ONCE(x) = (v); })
+#define _CAA_STORE_SHARED(x, v) ({ CAA_ACCESS_ONCE(x) = (v); })
/*
* Store v into x, where x is located in shared memory. Performs the required
* cache flush after writing. Returns v.
*/
-#define STORE_SHARED(x, v) \
+#define CAA_STORE_SHARED(x, v) \
({ \
- typeof(x) _v = _STORE_SHARED(x, v); \
+ typeof(x) _v = _CAA_STORE_SHARED(x, v); \
cmm_smp_wmc(); \
_v; \
})
};
#define __hp(x) ((struct __uatomic_dummy *)(x))
-#define _uatomic_set(addr, v) STORE_SHARED(*(addr), (v))
+#define _uatomic_set(addr, v) CAA_STORE_SHARED(*(addr), (v))
/* cmpxchg */
#endif
#ifndef uatomic_set
-#define uatomic_set(addr, v) STORE_SHARED(*(addr), (v))
+#define uatomic_set(addr, v) CAA_STORE_SHARED(*(addr), (v))
#endif
#ifndef uatomic_read
-#define uatomic_read(addr) LOAD_SHARED(*(addr))
+#define uatomic_read(addr) CAA_LOAD_SHARED(*(addr))
#endif
#if !defined __OPTIMIZE__ || defined UATOMIC_NO_LINK_ERROR
* that the queue is being appended to. The following store will append
* "node" to the queue from a dequeuer perspective.
*/
- STORE_SHARED(*old_tail, node);
+ CAA_STORE_SHARED(*old_tail, node);
}
/*
/*
* Queue is empty if it only contains the dummy node.
*/
- if (q->head == &q->dummy && LOAD_SHARED(q->tail) == &q->dummy.next)
+ if (q->head == &q->dummy && CAA_LOAD_SHARED(q->tail) == &q->dummy.next)
return NULL;
node = q->head;
/*
* Adaptative busy-looping waiting for enqueuer to complete enqueue.
*/
- while ((next = LOAD_SHARED(node->next)) == NULL) {
+ while ((next = CAA_LOAD_SHARED(node->next)) == NULL) {
if (++attempt >= WFQ_ADAPT_ATTEMPTS) {
poll(NULL, 0, WFQ_WAIT); /* Wait for 10ms */
attempt = 0;
} else
- cpu_relax();
+ caa_cpu_relax();
}
/*
* Move queue head forward.
* At this point, dequeuers see a NULL node->next, they should busy-wait
* until node->next is set to old_head.
*/
- STORE_SHARED(node->next, old_head);
+ CAA_STORE_SHARED(node->next, old_head);
}
/*
int attempt = 0;
retry:
- head = LOAD_SHARED(s->head);
+ head = CAA_LOAD_SHARED(s->head);
if (head == WF_STACK_END)
return NULL;
/*
* Adaptative busy-looping waiting for push to complete.
*/
- while ((next = LOAD_SHARED(head->next)) == NULL) {
+ while ((next = CAA_LOAD_SHARED(head->next)) == NULL) {
if (++attempt >= WFS_ADAPT_ATTEMPTS) {
poll(NULL, 0, WFS_WAIT); /* Wait for 10ms */
attempt = 0;
} else
- cpu_relax();
+ caa_cpu_relax();
}
if (uatomic_cmpxchg(&s->head, head, next) == head)
return head;