Remove parameter from rcu_read_lock()
authorMathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
Mon, 9 Feb 2009 04:56:15 +0000 (23:56 -0500)
committerMathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
Mon, 9 Feb 2009 04:56:15 +0000 (23:56 -0500)
Also makes the read fast-path twice faster :

7 cycles instead of 14 on a 8-cores x86_64.

Mathieu :

I limited the amount of nested readers to 256. Should be enough and lets us use
testb generically.

Changed the 64-bits code to make it the same as 32-bits. I prefer to have the
exact same behavior on both architectures.

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
Makefile
test_urcu.c
test_urcu_timing.c
urcu.c
urcu.h

index cac6b5cada5f090e46d7d32ed33e05efd17d9f61..8c343ccd5c039244af33b173d38c8e9cdc119ed6 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -6,18 +6,19 @@ LDFLAGS=-lpthread
 
 SRC_DEP=`echo $^ | sed 's/[^ ]*.h//g'`
 
-all: test_urcu test_urcu_timing test_rwlock_timing test_urcu_yield
+all: test_urcu test_urcu_timing test_rwlock_timing test_urcu_yield urcu-asm.S \
+       urcu-asm.o
 
-test_urcu: urcu.o test_urcu.c
+test_urcu: urcu.o test_urcu.c urcu.h
        $(CC) ${CFLAGS} $(LDFLAGS) -o $@ $(SRC_DEP)
 
-test_urcu_yield: urcu-yield.o test_urcu.c
+test_urcu_yield: urcu-yield.o test_urcu.c urcu.h
        $(CC) -DDEBUG_YIELD ${CFLAGS} $(LDFLAGS) -o $@ $(SRC_DEP)
 
-test_urcu_timing: urcu.o test_urcu_timing.c
+test_urcu_timing: urcu.o test_urcu_timing.c urcu.h
        $(CC) ${CFLAGS} $(LDFLAGS) -o $@ $(SRC_DEP)
 
-test_rwlock_timing: urcu.o test_rwlock_timing.c
+test_rwlock_timing: urcu.o test_rwlock_timing.c urcu.h
        $(CC) ${CFLAGS} $(LDFLAGS) -o $@ $(SRC_DEP)
 
 urcu.o: urcu.c urcu.h
@@ -26,7 +27,14 @@ urcu.o: urcu.c urcu.h
 urcu-yield.o: urcu.c urcu.h
        $(CC) -DDEBUG_YIELD ${CFLAGS} $(LDFLAGS) -c -o $@ $(SRC_DEP)
 
+urcu-asm.S: urcu-asm.c urcu.h
+       $(CC) ${CFLAGS} -S -o $@ $(SRC_DEP)
+
+urcu-asm.o: urcu-asm.c urcu.h
+       $(CC) ${CFLAGS} -c -o $@ $(SRC_DEP)
+
 .PHONY: clean
 
 clean:
-       rm -f urcu.o test_urcu test_urcu_timing
+       rm -f *.o test_urcu test_urcu_timing test_rwlock_timing urcu-asm.S \
+               test_urcu_yield
index 93bc013b4a3a3575e598ef10eb61096871db7aa1..39408a0bdd6d7141915b66222a9d64a7f655a7c3 100644 (file)
@@ -88,7 +88,6 @@ void rcu_copy_mutex_unlock(void)
 
 void *thr_reader(void *arg)
 {
-       int qparity;
        struct test_array *local_ptr;
 
        printf("thread_begin %s, thread id : %lx, tid %lu\n",
@@ -97,11 +96,11 @@ void *thr_reader(void *arg)
        urcu_register_thread();
 
        for (;;) {
-               rcu_read_lock(&qparity);
+               rcu_read_lock();
                local_ptr = rcu_dereference(test_rcu_pointer);
                if (local_ptr)
                        assert(local_ptr->a == 8);
-               rcu_read_unlock(&qparity);
+               rcu_read_unlock();
                if (!test_duration())
                        break;
        }
@@ -144,6 +143,15 @@ void *thr_writer(void *arg)
        return ((void*)2);
 }
 
+void show_usage(int argc, char **argv)
+{
+       printf("Usage : %s duration (s)", argv[0]);
+#ifdef DEBUG_YIELD
+       printf(" [-r] [-w] (yield reader and/or writer)");
+#endif
+       printf("\n");
+}
+
 int main(int argc, char **argv)
 {
        int err;
@@ -152,15 +160,13 @@ int main(int argc, char **argv)
        int i;
 
        if (argc < 2) {
-               printf("Usage : %s duration (s) [-r] [-w] "
-                      "(yield reader and/or writer)\n", argv[0]);
+               show_usage(argc, argv);
                return -1;
        }
 
        err = sscanf(argv[1], "%lu", &duration);
        if (err != 1) {
-               printf("Usage : %s duration (s) [-r] [-w] "
-                      "(yield reader and/or writer)\n", argv[0]);
+               show_usage(argc, argv);
                return -1;
        }
 
index 57fda4fd4b9719b576aa152c4c62f0199ace4a04..9903705c6c74fbcf08efed40eb72b5050f069dff 100644 (file)
@@ -94,7 +94,7 @@ static cycles_t reader_time[NR_READ] __attribute__((aligned(128)));
 
 void *thr_reader(void *arg)
 {
-       int qparity, i, j;
+       int i, j;
        struct test_array *local_ptr;
        cycles_t time1, time2;
 
@@ -107,12 +107,12 @@ void *thr_reader(void *arg)
        time1 = get_cycles();
        for (i = 0; i < OUTER_READ_LOOP; i++) {
                for (j = 0; j < INNER_READ_LOOP; j++) {
-                       rcu_read_lock(&qparity);
+                       rcu_read_lock();
                        local_ptr = rcu_dereference(test_rcu_pointer);
                        if (local_ptr) {
                                assert(local_ptr->a == 8);
                        }
-                       rcu_read_unlock(&qparity);
+                       rcu_read_unlock();
                }
        }
        time2 = get_cycles();
diff --git a/urcu.c b/urcu.c
index 83d2fe4d6d9c367796d123fb05286f8305dfc332..31cdf6fc08b61c14e0a8f47d93154c0d54599ccc 100644 (file)
--- a/urcu.c
+++ b/urcu.c
 
 pthread_mutex_t urcu_mutex = PTHREAD_MUTEX_INITIALIZER;
 
-/* Global quiescent period parity */
-int urcu_qparity;
+/* Global grace period counter */
+int urcu_gp_ctr;
 
-int __thread urcu_active_readers[2];
+int __thread urcu_active_readers;
 
 /* Thread IDs of registered readers */
 #define INIT_NUM_THREADS 4
@@ -65,11 +65,9 @@ void internal_urcu_unlock(void)
 /*
  * called with urcu_mutex held.
  */
-static int switch_next_urcu_qparity(void)
+static void switch_next_urcu_qparity(void)
 {
-       int old_parity = urcu_qparity;
-       urcu_qparity = 1 - old_parity;
-       return old_parity;
+       urcu_gp_ctr ^= RCU_GP_CTR_BIT;
 }
 
 static void force_mb_all_threads(void)
@@ -101,7 +99,7 @@ static void force_mb_all_threads(void)
        debug_yield_write();
 }
 
-void wait_for_quiescent_state(int parity)
+void wait_for_quiescent_state(void)
 {
        struct reader_data *index;
 
@@ -113,7 +111,7 @@ void wait_for_quiescent_state(int parity)
                /*
                 * BUSY-LOOP.
                 */
-               while (index->urcu_active_readers[parity] != 0)
+               while (rcu_old_gp_ongoing(index->urcu_active_readers))
                        barrier();
        }
        /*
@@ -127,19 +125,17 @@ void wait_for_quiescent_state(int parity)
 
 static void switch_qparity(void)
 {
-       int prev_parity;
-
        /* All threads should read qparity before accessing data structure. */
        /* Write ptr before changing the qparity */
        force_mb_all_threads();
        debug_yield_write();
-       prev_parity = switch_next_urcu_qparity();
+       switch_next_urcu_qparity();
        debug_yield_write();
 
        /*
         * Wait for previous parity to be empty of readers.
         */
-       wait_for_quiescent_state(prev_parity);
+       wait_for_quiescent_state();
 }
 
 void synchronize_rcu(void)
@@ -212,7 +208,7 @@ void urcu_add_reader(pthread_t id)
        }
        reader_data[num_readers].tid = id;
        /* reference to the TLS of _this_ reader thread. */
-       reader_data[num_readers].urcu_active_readers = urcu_active_readers;
+       reader_data[num_readers].urcu_active_readers = &urcu_active_readers;
        num_readers++;
 }
 
diff --git a/urcu.h b/urcu.h
index c77b26f958e34b8521f1b8363d83b6492cb46b0f..01a4c6857d7e6fd31668dec57615be03e94ccc91 100644 (file)
--- a/urcu.h
+++ b/urcu.h
@@ -17,6 +17,8 @@
  * Distributed under GPLv2
  */
 
+#include <stdlib.h>
+
 /* The "volatile" is due to gcc bugs */
 #define barrier() __asm__ __volatile__("": : :"memory")
 
@@ -108,25 +110,43 @@ static inline void debug_yield_init(void)
 }
 #endif
 
-/* Global quiescent period parity */
-extern int urcu_qparity;
+/*
+ * Limiting the nesting level to 256 to keep instructions small in the read
+ * fast-path.
+ */
+#define RCU_GP_COUNT           (1U << 0)
+#define RCU_GP_CTR_BIT         (1U << 8)
+#define RCU_GP_CTR_NEST_MASK   (RCU_GP_CTR_BIT - 1)
+
+/* Global quiescent period counter with low-order bits unused. */
+extern int urcu_gp_ctr;
 
-extern int __thread urcu_active_readers[2];
+extern int __thread urcu_active_readers;
 
-static inline int get_urcu_qparity(void)
+static inline int rcu_old_gp_ongoing(int *value)
 {
-       return urcu_qparity;
+       int v;
+
+       if (value == NULL)
+               return 0;
+       debug_yield_write();
+       v = ACCESS_ONCE(*value);
+       debug_yield_write();
+       return (v & RCU_GP_CTR_NEST_MASK) &&
+                ((v ^ ACCESS_ONCE(urcu_gp_ctr)) & RCU_GP_CTR_BIT);
 }
 
-/*
- * urcu_parity should be declared on the caller's stack.
- */
-static inline void rcu_read_lock(int *urcu_parity)
+static inline void rcu_read_lock(void)
 {
+       int tmp;
+
        debug_yield_read();
-       *urcu_parity = get_urcu_qparity();
+       tmp = urcu_active_readers;
        debug_yield_read();
-       urcu_active_readers[*urcu_parity]++;
+       if (!(tmp & RCU_GP_CTR_NEST_MASK))
+               urcu_active_readers = urcu_gp_ctr + RCU_GP_COUNT;
+       else
+               urcu_active_readers = tmp + RCU_GP_COUNT;
        debug_yield_read();
        /*
         * Increment active readers count before accessing the pointer.
@@ -136,7 +156,7 @@ static inline void rcu_read_lock(int *urcu_parity)
        debug_yield_read();
 }
 
-static inline void rcu_read_unlock(int *urcu_parity)
+static inline void rcu_read_unlock(void)
 {
        debug_yield_read();
        barrier();
@@ -145,7 +165,7 @@ static inline void rcu_read_unlock(int *urcu_parity)
         * Finish using rcu before decrementing the pointer.
         * See force_mb_all_threads().
         */
-       urcu_active_readers[*urcu_parity]--;
+       urcu_active_readers -= RCU_GP_COUNT;
        debug_yield_read();
 }
 
This page took 0.030464 seconds and 4 git commands to generate.