generic urcu deferral (call_rcu())
authorMathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
Sun, 20 Sep 2009 02:20:16 +0000 (22:20 -0400)
committerMathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
Sun, 20 Sep 2009 02:20:16 +0000 (22:20 -0400)
Found out a way to encode the queues so the standard scenario is to use a single
pointer per call_rcu().

Uses more space for:
- unaligned functions pointers.
- unaligned data pointers.
- function/data value : -2L. (this is arbitrary)

Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
tests/test_urcu_defer.c
urcu-defer-static.h
urcu-defer.c
urcu-defer.h

index e56dffe2e308374dad1554b32013e3718309f4f1..233b38498ced552ed04ba753af5974892d89ec2f 100644 (file)
@@ -209,6 +209,14 @@ void *thr_reader(void *_count)
 
 }
 
+static void test_cb2(void *data)
+{
+}
+
+static void test_cb1(void *data)
+{
+}
+
 void *thr_writer(void *data)
 {
        unsigned long wtidx = (unsigned long)data;
@@ -230,7 +238,16 @@ void *thr_writer(void *data)
                new = malloc(sizeof(*new));
                new->a = 8;
                old = rcu_xchg_pointer(&test_rcu_pointer, new);
-               rcu_defer_queue(old);
+               call_rcu(free, old);
+#if 0
+               call_rcu(test_cb1, old);
+               call_rcu(test_cb1, (void *)-2L);
+               call_rcu(test_cb1, (void *)-2L);
+               call_rcu(test_cb1, old);
+               call_rcu(test_cb2, (void *)-2L);
+#endif //0
+               call_rcu(test_cb2, (void *)-4L);
+               //call_rcu(test_cb2, (void *)-2L);
                nr_writes++;
                if (unlikely(!test_duration_write()))
                        break;
index 25137690465fa9c32ef5939be20d1cec3bdd17ad..7d0ed52cf88b624d5a236a5a35c3d1f8920ef661 100644 (file)
 #define DEFER_QUEUE_SIZE       (1 << 12)
 #define DEFER_QUEUE_MASK       (DEFER_QUEUE_SIZE - 1)
 
+/*
+ * Typically, data is aligned at least on the architecture size.
+ * Use lowest bit to indicate that the current callback is changing.
+ * Assumes that (void *)-2L is not used often. Used to encode non-aligned
+ * functions and non-aligned data using extra space.
+ * We encode the (void *)-2L fct as: -2L, fct, data.
+ * We encode the (void *)-2L data as: -2L, fct, data.
+ * Here, DQ_FCT_MARK == ~DQ_FCT_BIT. Required for the test order.
+ */
+#define DQ_FCT_BIT             (1 << 0)
+#define DQ_IS_FCT_BIT(x)       ((unsigned long)(x) & DQ_FCT_BIT)
+#define DQ_SET_FCT_BIT(x)      \
+       (x = (void *)((unsigned long)(x) | DQ_FCT_BIT))
+#define DQ_CLEAR_FCT_BIT(x)    \
+       (x = (void *)((unsigned long)(x) & ~DQ_FCT_BIT))
+#define DQ_FCT_MARK            ((void *)(~DQ_FCT_BIT))
+
 /*
  * Identify a shared load. A smp_rmc() or smp_mc() should come before the load.
  */
 #define rcu_assert(args...)
 #endif
 
+/*
+ * defer queue.
+ * Contains pointers. Encoded to save space when same callback is often used.
+ * When looking up the next item:
+ * - if DQ_FCT_BIT is set, set the current callback to DQ_CLEAR_FCT_BIT(ptr)
+ *   - next element contains pointer to data.
+ * - else if item == DQ_FCT_MARK
+ *   - set the current callback to next element ptr
+ *   - following next element contains pointer to data.
+ * - else current element contains data
+ */
 struct defer_queue {
        unsigned long head;     /* add element at head */
+       void *last_fct_in;      /* last fct pointer encoded */
        unsigned long tail;     /* next element to remove at tail */
+       void *last_fct_out;     /* last fct pointer encoded */
        void **q;
 };
 
@@ -99,7 +129,7 @@ extern void rcu_defer_barrier_thread(void);
 /*
  * not signal-safe.
  */
-static inline void _rcu_defer_queue(void *p)
+static inline void _rcu_defer_queue(void (*fct)(void *p), void *p)
 {
        unsigned long head, tail;
 
@@ -112,17 +142,49 @@ static inline void _rcu_defer_queue(void *p)
 
        /*
         * If queue is full, empty it ourself.
+        * Worse-case: must allow 2 supplementary entries for fct pointer.
         */
-       if (unlikely(head - tail >= DEFER_QUEUE_SIZE)) {
-               assert(head - tail == DEFER_QUEUE_SIZE);
+       if (unlikely(head - tail >= DEFER_QUEUE_SIZE - 2)) {
+               assert(head - tail <= DEFER_QUEUE_SIZE);
                rcu_defer_barrier_thread();
                assert(head - LOAD_SHARED(defer_queue.tail) == 0);
        }
 
        smp_wmb();      /* Publish new pointer before write q[] */
-       _STORE_SHARED(defer_queue.q[head & DEFER_QUEUE_MASK], p);
+       if (unlikely(defer_queue.last_fct_in != fct)) {
+               //printf("fct diff %p %p\n", defer_queue.last_fct, fct);
+               defer_queue.last_fct_in = fct;
+               if (unlikely(DQ_IS_FCT_BIT(fct) || fct == DQ_FCT_MARK)) {
+                       /*
+                        * If the function to encode is not aligned or the
+                        * marker, write DQ_FCT_MARK followed by the function
+                        * pointer.
+                        */
+                       _STORE_SHARED(defer_queue.q[head++ & DEFER_QUEUE_MASK],
+                                     DQ_FCT_MARK);
+                       _STORE_SHARED(defer_queue.q[head++ & DEFER_QUEUE_MASK],
+                                     fct);
+               } else {
+                       DQ_SET_FCT_BIT(fct);
+                       _STORE_SHARED(defer_queue.q[head++ & DEFER_QUEUE_MASK],
+                                     fct);
+               }
+       } else {
+               //printf("fct same %p\n", fct);
+               if (unlikely(DQ_IS_FCT_BIT(p) || p == DQ_FCT_MARK)) {
+                       /*
+                        * If the data to encode is not aligned or the marker,
+                        * write DQ_FCT_MARK followed by the function pointer.
+                        */
+                       _STORE_SHARED(defer_queue.q[head++ & DEFER_QUEUE_MASK],
+                                     DQ_FCT_MARK);
+                       _STORE_SHARED(defer_queue.q[head++ & DEFER_QUEUE_MASK],
+                                     fct);
+               }
+       }
+       _STORE_SHARED(defer_queue.q[head++ & DEFER_QUEUE_MASK], p);
        smp_wmb();      /* Write q[] before head. */
-       STORE_SHARED(defer_queue.head, head + 1);
+       STORE_SHARED(defer_queue.head, head);
 }
 
 #endif /* _URCU_DEFER_STATIC_H */
index 1e5d2b6d6dba842c3aaa13eae8aed8d92dd17ebc..00dd3c60b3bcf1a2ea7895b3612953a0203af6f6 100644 (file)
@@ -101,18 +101,35 @@ static void internal_urcu_unlock(pthread_mutex_t *mutex)
  * Must be called after Q.S. is reached.
  */
 static void rcu_defer_barrier_queue(struct defer_queue *queue,
-                                     unsigned long head)
+                                   unsigned long head)
 {
        unsigned long i;
+       void (*fct)(void *p);
+       void *p;
 
        /*
         * Tail is only modified when lock is held.
         * Head is only modified by owner thread.
         */
 
-       for (i = queue->tail; i != head; i++) {
+       for (i = queue->tail; i != head;) {
                smp_rmb();       /* read head before q[]. */
-               free(LOAD_SHARED(queue->q[i & DEFER_QUEUE_MASK]));
+               p = LOAD_SHARED(queue->q[i++ & DEFER_QUEUE_MASK]);
+               if (unlikely(DQ_IS_FCT_BIT(p))) {
+                       //printf("%lu fct bit %p\n", i-1, p);
+                       DQ_CLEAR_FCT_BIT(p);
+                       queue->last_fct_out = p;
+                       p = LOAD_SHARED(queue->q[i++ & DEFER_QUEUE_MASK]);
+               } else if (unlikely(p == DQ_FCT_MARK)) {
+                       //printf("%lu fct mark %p\n", i-1, p);
+                       p = LOAD_SHARED(queue->q[i++ & DEFER_QUEUE_MASK]);
+                       queue->last_fct_out = p;
+                       p = LOAD_SHARED(queue->q[i++ & DEFER_QUEUE_MASK]);
+               }// else
+                       //printf("%lu data %p\n", i-1, p);
+               fct = queue->last_fct_out;
+               //printf("tid %lu %lu last_fct %p data %p\n", pthread_self(), i-1, fct, p);
+               fct(p);
        }
        smp_mb();       /* push tail after having used q[] */
        STORE_SHARED(queue->tail, i);
@@ -168,9 +185,9 @@ void *thr_defer(void *args)
  * library wrappers to be used by non-LGPL compatible source code.
  */
 
-void rcu_defer_queue(void *p)
+void rcu_defer_queue(void (*fct)(void *p), void *p)
 {
-       _rcu_defer_queue(p);
+       _rcu_defer_queue(fct, p);
 }
 
 static void rcu_add_deferer(pthread_t id)
@@ -195,6 +212,7 @@ static void rcu_add_deferer(pthread_t id)
        registry[num_deferers].tid = id;
        /* reference to the TLS of _this_ deferer thread. */
        registry[num_deferers].defer_queue = &defer_queue;
+       registry[num_deferers].last_head = 0;
        num_deferers++;
 }
 
@@ -213,6 +231,7 @@ static void rcu_remove_deferer(pthread_t id)
                                sizeof(struct deferer_registry));
                        registry[num_deferers - 1].tid = 0;
                        registry[num_deferers - 1].defer_queue = NULL;
+                       registry[num_deferers - 1].last_head = 0;
                        num_deferers--;
                        return;
                }
index 456b8b7ff9bcbc635731cca576a609dc6de8c289..3e04c00fddb09d1cdd8465945eba4acbe385967a 100644 (file)
  * library wrappers to be used by non-LGPL compatible source code.
  */
 
-extern void rcu_defer_queue(void *p);
+extern void rcu_defer_queue(void (*fct)(void *p), void *p);
 
 #endif /* !_LGPL_SOURCE */
 
+#define call_rcu               rcu_defer_queue
+#define rcu_reclaim_queue(p)   rcu_defer_queue(free, p)
+
 /*
  * Thread registration for reclamation.
  */
This page took 0.029952 seconds and 4 git commands to generate.