From: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
Date: Sun, 20 Sep 2009 02:20:16 +0000 (-0400)
Subject: generic urcu deferral (call_rcu())
X-Git-Tag: v0.1~64
X-Git-Url: https://git.lttng.org./?a=commitdiff_plain;h=804b437535f97b519db3e8715807950f5818cf27;p=userspace-rcu.git

generic urcu deferral (call_rcu())

Found out a way to encode the queues so the standard scenario is to use a single
pointer per call_rcu().

Uses more space for:
- unaligned functions pointers.
- unaligned data pointers.
- function/data value : -2L. (this is arbitrary)

Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
---

diff --git a/tests/test_urcu_defer.c b/tests/test_urcu_defer.c
index e56dffe..233b384 100644
--- a/tests/test_urcu_defer.c
+++ b/tests/test_urcu_defer.c
@@ -209,6 +209,14 @@ void *thr_reader(void *_count)
 
 }
 
+static void test_cb2(void *data)
+{
+}
+
+static void test_cb1(void *data)
+{
+}
+
 void *thr_writer(void *data)
 {
 	unsigned long wtidx = (unsigned long)data;
@@ -230,7 +238,16 @@ void *thr_writer(void *data)
 		new = malloc(sizeof(*new));
 		new->a = 8;
 		old = rcu_xchg_pointer(&test_rcu_pointer, new);
-		rcu_defer_queue(old);
+		call_rcu(free, old);
+#if 0
+		call_rcu(test_cb1, old);
+		call_rcu(test_cb1, (void *)-2L);
+		call_rcu(test_cb1, (void *)-2L);
+		call_rcu(test_cb1, old);
+		call_rcu(test_cb2, (void *)-2L);
+#endif //0
+		call_rcu(test_cb2, (void *)-4L);
+		//call_rcu(test_cb2, (void *)-2L);
 		nr_writes++;
 		if (unlikely(!test_duration_write()))
 			break;
diff --git a/urcu-defer-static.h b/urcu-defer-static.h
index 2513769..7d0ed52 100644
--- a/urcu-defer-static.h
+++ b/urcu-defer-static.h
@@ -42,6 +42,23 @@
 #define DEFER_QUEUE_SIZE	(1 << 12)
 #define DEFER_QUEUE_MASK	(DEFER_QUEUE_SIZE - 1)
 
+/*
+ * Typically, data is aligned at least on the architecture size.
+ * Use lowest bit to indicate that the current callback is changing.
+ * Assumes that (void *)-2L is not used often. Used to encode non-aligned
+ * functions and non-aligned data using extra space.
+ * We encode the (void *)-2L fct as: -2L, fct, data.
+ * We encode the (void *)-2L data as: -2L, fct, data.
+ * Here, DQ_FCT_MARK == ~DQ_FCT_BIT. Required for the test order.
+ */
+#define DQ_FCT_BIT		(1 << 0)
+#define DQ_IS_FCT_BIT(x)	((unsigned long)(x) & DQ_FCT_BIT)
+#define DQ_SET_FCT_BIT(x)	\
+	(x = (void *)((unsigned long)(x) | DQ_FCT_BIT))
+#define DQ_CLEAR_FCT_BIT(x)	\
+	(x = (void *)((unsigned long)(x) & ~DQ_FCT_BIT))
+#define DQ_FCT_MARK		((void *)(~DQ_FCT_BIT))
+
 /*
  * Identify a shared load. A smp_rmc() or smp_mc() should come before the load.
  */
@@ -86,9 +103,22 @@
 #define rcu_assert(args...)
 #endif
 
+/*
+ * defer queue.
+ * Contains pointers. Encoded to save space when same callback is often used.
+ * When looking up the next item:
+ * - if DQ_FCT_BIT is set, set the current callback to DQ_CLEAR_FCT_BIT(ptr)
+ *   - next element contains pointer to data.
+ * - else if item == DQ_FCT_MARK
+ *   - set the current callback to next element ptr
+ *   - following next element contains pointer to data.
+ * - else current element contains data
+ */
 struct defer_queue {
 	unsigned long head;	/* add element at head */
+	void *last_fct_in;	/* last fct pointer encoded */
 	unsigned long tail;	/* next element to remove at tail */
+	void *last_fct_out;	/* last fct pointer encoded */
 	void **q;
 };
 
@@ -99,7 +129,7 @@ extern void rcu_defer_barrier_thread(void);
 /*
  * not signal-safe.
  */
-static inline void _rcu_defer_queue(void *p)
+static inline void _rcu_defer_queue(void (*fct)(void *p), void *p)
 {
 	unsigned long head, tail;
 
@@ -112,17 +142,49 @@ static inline void _rcu_defer_queue(void *p)
 
 	/*
 	 * If queue is full, empty it ourself.
+	 * Worse-case: must allow 2 supplementary entries for fct pointer.
 	 */
-	if (unlikely(head - tail >= DEFER_QUEUE_SIZE)) {
-		assert(head - tail == DEFER_QUEUE_SIZE);
+	if (unlikely(head - tail >= DEFER_QUEUE_SIZE - 2)) {
+		assert(head - tail <= DEFER_QUEUE_SIZE);
 		rcu_defer_barrier_thread();
 		assert(head - LOAD_SHARED(defer_queue.tail) == 0);
 	}
 
 	smp_wmb();	/* Publish new pointer before write q[] */
-	_STORE_SHARED(defer_queue.q[head & DEFER_QUEUE_MASK], p);
+	if (unlikely(defer_queue.last_fct_in != fct)) {
+		//printf("fct diff %p %p\n", defer_queue.last_fct, fct);
+		defer_queue.last_fct_in = fct;
+		if (unlikely(DQ_IS_FCT_BIT(fct) || fct == DQ_FCT_MARK)) {
+			/*
+			 * If the function to encode is not aligned or the
+			 * marker, write DQ_FCT_MARK followed by the function
+			 * pointer.
+			 */
+			_STORE_SHARED(defer_queue.q[head++ & DEFER_QUEUE_MASK],
+				      DQ_FCT_MARK);
+			_STORE_SHARED(defer_queue.q[head++ & DEFER_QUEUE_MASK],
+				      fct);
+		} else {
+			DQ_SET_FCT_BIT(fct);
+			_STORE_SHARED(defer_queue.q[head++ & DEFER_QUEUE_MASK],
+				      fct);
+		}
+	} else {
+		//printf("fct same %p\n", fct);
+		if (unlikely(DQ_IS_FCT_BIT(p) || p == DQ_FCT_MARK)) {
+			/*
+			 * If the data to encode is not aligned or the marker,
+			 * write DQ_FCT_MARK followed by the function pointer.
+			 */
+			_STORE_SHARED(defer_queue.q[head++ & DEFER_QUEUE_MASK],
+				      DQ_FCT_MARK);
+			_STORE_SHARED(defer_queue.q[head++ & DEFER_QUEUE_MASK],
+				      fct);
+		}
+	}
+	_STORE_SHARED(defer_queue.q[head++ & DEFER_QUEUE_MASK], p);
 	smp_wmb();	/* Write q[] before head. */
-	STORE_SHARED(defer_queue.head, head + 1);
+	STORE_SHARED(defer_queue.head, head);
 }
 
 #endif /* _URCU_DEFER_STATIC_H */
diff --git a/urcu-defer.c b/urcu-defer.c
index 1e5d2b6..00dd3c6 100644
--- a/urcu-defer.c
+++ b/urcu-defer.c
@@ -101,18 +101,35 @@ static void internal_urcu_unlock(pthread_mutex_t *mutex)
  * Must be called after Q.S. is reached.
  */
 static void rcu_defer_barrier_queue(struct defer_queue *queue,
-				      unsigned long head)
+				    unsigned long head)
 {
 	unsigned long i;
+	void (*fct)(void *p);
+	void *p;
 
 	/*
 	 * Tail is only modified when lock is held.
 	 * Head is only modified by owner thread.
 	 */
 
-	for (i = queue->tail; i != head; i++) {
+	for (i = queue->tail; i != head;) {
 		smp_rmb();       /* read head before q[]. */
-		free(LOAD_SHARED(queue->q[i & DEFER_QUEUE_MASK]));
+		p = LOAD_SHARED(queue->q[i++ & DEFER_QUEUE_MASK]);
+		if (unlikely(DQ_IS_FCT_BIT(p))) {
+			//printf("%lu fct bit %p\n", i-1, p);
+			DQ_CLEAR_FCT_BIT(p);
+			queue->last_fct_out = p;
+			p = LOAD_SHARED(queue->q[i++ & DEFER_QUEUE_MASK]);
+		} else if (unlikely(p == DQ_FCT_MARK)) {
+			//printf("%lu fct mark %p\n", i-1, p);
+			p = LOAD_SHARED(queue->q[i++ & DEFER_QUEUE_MASK]);
+			queue->last_fct_out = p;
+			p = LOAD_SHARED(queue->q[i++ & DEFER_QUEUE_MASK]);
+		}// else
+			//printf("%lu data %p\n", i-1, p);
+		fct = queue->last_fct_out;
+		//printf("tid %lu %lu last_fct %p data %p\n", pthread_self(), i-1, fct, p);
+		fct(p);
 	}
 	smp_mb();	/* push tail after having used q[] */
 	STORE_SHARED(queue->tail, i);
@@ -168,9 +185,9 @@ void *thr_defer(void *args)
  * library wrappers to be used by non-LGPL compatible source code.
  */
 
-void rcu_defer_queue(void *p)
+void rcu_defer_queue(void (*fct)(void *p), void *p)
 {
-	_rcu_defer_queue(p);
+	_rcu_defer_queue(fct, p);
 }
 
 static void rcu_add_deferer(pthread_t id)
@@ -195,6 +212,7 @@ static void rcu_add_deferer(pthread_t id)
 	registry[num_deferers].tid = id;
 	/* reference to the TLS of _this_ deferer thread. */
 	registry[num_deferers].defer_queue = &defer_queue;
+	registry[num_deferers].last_head = 0;
 	num_deferers++;
 }
 
@@ -213,6 +231,7 @@ static void rcu_remove_deferer(pthread_t id)
 				sizeof(struct deferer_registry));
 			registry[num_deferers - 1].tid = 0;
 			registry[num_deferers - 1].defer_queue = NULL;
+			registry[num_deferers - 1].last_head = 0;
 			num_deferers--;
 			return;
 		}
diff --git a/urcu-defer.h b/urcu-defer.h
index 456b8b7..3e04c00 100644
--- a/urcu-defer.h
+++ b/urcu-defer.h
@@ -57,10 +57,13 @@
  * library wrappers to be used by non-LGPL compatible source code.
  */
 
-extern void rcu_defer_queue(void *p);
+extern void rcu_defer_queue(void (*fct)(void *p), void *p);
 
 #endif /* !_LGPL_SOURCE */
 
+#define call_rcu		rcu_defer_queue
+#define rcu_reclaim_queue(p)	rcu_defer_queue(free, p)
+
 /*
  * Thread registration for reclamation.
  */