From: Mathieu Desnoyers Date: Mon, 9 Feb 2009 04:56:15 +0000 (-0500) Subject: Remove parameter from rcu_read_lock() X-Git-Tag: v0.1~314 X-Git-Url: https://git.lttng.org./?a=commitdiff_plain;h=1430ee0bdca4cb454d534ef7fc84af3e0692f26b;p=urcu.git Remove parameter from rcu_read_lock() Also makes the read fast-path twice faster : 7 cycles instead of 14 on a 8-cores x86_64. Mathieu : I limited the amount of nested readers to 256. Should be enough and lets us use testb generically. Changed the 64-bits code to make it the same as 32-bits. I prefer to have the exact same behavior on both architectures. Signed-off-by: Paul E. McKenney Signed-off-by: Mathieu Desnoyers --- diff --git a/Makefile b/Makefile index cac6b5c..8c343cc 100644 --- a/Makefile +++ b/Makefile @@ -6,18 +6,19 @@ LDFLAGS=-lpthread SRC_DEP=`echo $^ | sed 's/[^ ]*.h//g'` -all: test_urcu test_urcu_timing test_rwlock_timing test_urcu_yield +all: test_urcu test_urcu_timing test_rwlock_timing test_urcu_yield urcu-asm.S \ + urcu-asm.o -test_urcu: urcu.o test_urcu.c +test_urcu: urcu.o test_urcu.c urcu.h $(CC) ${CFLAGS} $(LDFLAGS) -o $@ $(SRC_DEP) -test_urcu_yield: urcu-yield.o test_urcu.c +test_urcu_yield: urcu-yield.o test_urcu.c urcu.h $(CC) -DDEBUG_YIELD ${CFLAGS} $(LDFLAGS) -o $@ $(SRC_DEP) -test_urcu_timing: urcu.o test_urcu_timing.c +test_urcu_timing: urcu.o test_urcu_timing.c urcu.h $(CC) ${CFLAGS} $(LDFLAGS) -o $@ $(SRC_DEP) -test_rwlock_timing: urcu.o test_rwlock_timing.c +test_rwlock_timing: urcu.o test_rwlock_timing.c urcu.h $(CC) ${CFLAGS} $(LDFLAGS) -o $@ $(SRC_DEP) urcu.o: urcu.c urcu.h @@ -26,7 +27,14 @@ urcu.o: urcu.c urcu.h urcu-yield.o: urcu.c urcu.h $(CC) -DDEBUG_YIELD ${CFLAGS} $(LDFLAGS) -c -o $@ $(SRC_DEP) +urcu-asm.S: urcu-asm.c urcu.h + $(CC) ${CFLAGS} -S -o $@ $(SRC_DEP) + +urcu-asm.o: urcu-asm.c urcu.h + $(CC) ${CFLAGS} -c -o $@ $(SRC_DEP) + .PHONY: clean clean: - rm -f urcu.o test_urcu test_urcu_timing + rm -f *.o test_urcu test_urcu_timing test_rwlock_timing urcu-asm.S \ + test_urcu_yield diff --git a/test_urcu.c b/test_urcu.c index 93bc013..39408a0 100644 --- a/test_urcu.c +++ b/test_urcu.c @@ -88,7 +88,6 @@ void rcu_copy_mutex_unlock(void) void *thr_reader(void *arg) { - int qparity; struct test_array *local_ptr; printf("thread_begin %s, thread id : %lx, tid %lu\n", @@ -97,11 +96,11 @@ void *thr_reader(void *arg) urcu_register_thread(); for (;;) { - rcu_read_lock(&qparity); + rcu_read_lock(); local_ptr = rcu_dereference(test_rcu_pointer); if (local_ptr) assert(local_ptr->a == 8); - rcu_read_unlock(&qparity); + rcu_read_unlock(); if (!test_duration()) break; } @@ -144,6 +143,15 @@ void *thr_writer(void *arg) return ((void*)2); } +void show_usage(int argc, char **argv) +{ + printf("Usage : %s duration (s)", argv[0]); +#ifdef DEBUG_YIELD + printf(" [-r] [-w] (yield reader and/or writer)"); +#endif + printf("\n"); +} + int main(int argc, char **argv) { int err; @@ -152,15 +160,13 @@ int main(int argc, char **argv) int i; if (argc < 2) { - printf("Usage : %s duration (s) [-r] [-w] " - "(yield reader and/or writer)\n", argv[0]); + show_usage(argc, argv); return -1; } err = sscanf(argv[1], "%lu", &duration); if (err != 1) { - printf("Usage : %s duration (s) [-r] [-w] " - "(yield reader and/or writer)\n", argv[0]); + show_usage(argc, argv); return -1; } diff --git a/test_urcu_timing.c b/test_urcu_timing.c index 57fda4f..9903705 100644 --- a/test_urcu_timing.c +++ b/test_urcu_timing.c @@ -94,7 +94,7 @@ static cycles_t reader_time[NR_READ] __attribute__((aligned(128))); void *thr_reader(void *arg) { - int qparity, i, j; + int i, j; struct test_array *local_ptr; cycles_t time1, time2; @@ -107,12 +107,12 @@ void *thr_reader(void *arg) time1 = get_cycles(); for (i = 0; i < OUTER_READ_LOOP; i++) { for (j = 0; j < INNER_READ_LOOP; j++) { - rcu_read_lock(&qparity); + rcu_read_lock(); local_ptr = rcu_dereference(test_rcu_pointer); if (local_ptr) { assert(local_ptr->a == 8); } - rcu_read_unlock(&qparity); + rcu_read_unlock(); } } time2 = get_cycles(); diff --git a/urcu.c b/urcu.c index 83d2fe4..31cdf6f 100644 --- a/urcu.c +++ b/urcu.c @@ -19,10 +19,10 @@ pthread_mutex_t urcu_mutex = PTHREAD_MUTEX_INITIALIZER; -/* Global quiescent period parity */ -int urcu_qparity; +/* Global grace period counter */ +int urcu_gp_ctr; -int __thread urcu_active_readers[2]; +int __thread urcu_active_readers; /* Thread IDs of registered readers */ #define INIT_NUM_THREADS 4 @@ -65,11 +65,9 @@ void internal_urcu_unlock(void) /* * called with urcu_mutex held. */ -static int switch_next_urcu_qparity(void) +static void switch_next_urcu_qparity(void) { - int old_parity = urcu_qparity; - urcu_qparity = 1 - old_parity; - return old_parity; + urcu_gp_ctr ^= RCU_GP_CTR_BIT; } static void force_mb_all_threads(void) @@ -101,7 +99,7 @@ static void force_mb_all_threads(void) debug_yield_write(); } -void wait_for_quiescent_state(int parity) +void wait_for_quiescent_state(void) { struct reader_data *index; @@ -113,7 +111,7 @@ void wait_for_quiescent_state(int parity) /* * BUSY-LOOP. */ - while (index->urcu_active_readers[parity] != 0) + while (rcu_old_gp_ongoing(index->urcu_active_readers)) barrier(); } /* @@ -127,19 +125,17 @@ void wait_for_quiescent_state(int parity) static void switch_qparity(void) { - int prev_parity; - /* All threads should read qparity before accessing data structure. */ /* Write ptr before changing the qparity */ force_mb_all_threads(); debug_yield_write(); - prev_parity = switch_next_urcu_qparity(); + switch_next_urcu_qparity(); debug_yield_write(); /* * Wait for previous parity to be empty of readers. */ - wait_for_quiescent_state(prev_parity); + wait_for_quiescent_state(); } void synchronize_rcu(void) @@ -212,7 +208,7 @@ void urcu_add_reader(pthread_t id) } reader_data[num_readers].tid = id; /* reference to the TLS of _this_ reader thread. */ - reader_data[num_readers].urcu_active_readers = urcu_active_readers; + reader_data[num_readers].urcu_active_readers = &urcu_active_readers; num_readers++; } diff --git a/urcu.h b/urcu.h index c77b26f..01a4c68 100644 --- a/urcu.h +++ b/urcu.h @@ -17,6 +17,8 @@ * Distributed under GPLv2 */ +#include + /* The "volatile" is due to gcc bugs */ #define barrier() __asm__ __volatile__("": : :"memory") @@ -108,25 +110,43 @@ static inline void debug_yield_init(void) } #endif -/* Global quiescent period parity */ -extern int urcu_qparity; +/* + * Limiting the nesting level to 256 to keep instructions small in the read + * fast-path. + */ +#define RCU_GP_COUNT (1U << 0) +#define RCU_GP_CTR_BIT (1U << 8) +#define RCU_GP_CTR_NEST_MASK (RCU_GP_CTR_BIT - 1) + +/* Global quiescent period counter with low-order bits unused. */ +extern int urcu_gp_ctr; -extern int __thread urcu_active_readers[2]; +extern int __thread urcu_active_readers; -static inline int get_urcu_qparity(void) +static inline int rcu_old_gp_ongoing(int *value) { - return urcu_qparity; + int v; + + if (value == NULL) + return 0; + debug_yield_write(); + v = ACCESS_ONCE(*value); + debug_yield_write(); + return (v & RCU_GP_CTR_NEST_MASK) && + ((v ^ ACCESS_ONCE(urcu_gp_ctr)) & RCU_GP_CTR_BIT); } -/* - * urcu_parity should be declared on the caller's stack. - */ -static inline void rcu_read_lock(int *urcu_parity) +static inline void rcu_read_lock(void) { + int tmp; + debug_yield_read(); - *urcu_parity = get_urcu_qparity(); + tmp = urcu_active_readers; debug_yield_read(); - urcu_active_readers[*urcu_parity]++; + if (!(tmp & RCU_GP_CTR_NEST_MASK)) + urcu_active_readers = urcu_gp_ctr + RCU_GP_COUNT; + else + urcu_active_readers = tmp + RCU_GP_COUNT; debug_yield_read(); /* * Increment active readers count before accessing the pointer. @@ -136,7 +156,7 @@ static inline void rcu_read_lock(int *urcu_parity) debug_yield_read(); } -static inline void rcu_read_unlock(int *urcu_parity) +static inline void rcu_read_unlock(void) { debug_yield_read(); barrier(); @@ -145,7 +165,7 @@ static inline void rcu_read_unlock(int *urcu_parity) * Finish using rcu before decrementing the pointer. * See force_mb_all_threads(). */ - urcu_active_readers[*urcu_parity]--; + urcu_active_readers -= RCU_GP_COUNT; debug_yield_read(); }