}
+static void test_cb2(void *data)
+{
+}
+
+static void test_cb1(void *data)
+{
+}
+
void *thr_writer(void *data)
{
unsigned long wtidx = (unsigned long)data;
new = malloc(sizeof(*new));
new->a = 8;
old = rcu_xchg_pointer(&test_rcu_pointer, new);
- rcu_defer_queue(old);
+ call_rcu(free, old);
+#if 0
+ call_rcu(test_cb1, old);
+ call_rcu(test_cb1, (void *)-2L);
+ call_rcu(test_cb1, (void *)-2L);
+ call_rcu(test_cb1, old);
+ call_rcu(test_cb2, (void *)-2L);
+#endif //0
+ call_rcu(test_cb2, (void *)-4L);
+ //call_rcu(test_cb2, (void *)-2L);
nr_writes++;
if (unlikely(!test_duration_write()))
break;
#define DEFER_QUEUE_SIZE (1 << 12)
#define DEFER_QUEUE_MASK (DEFER_QUEUE_SIZE - 1)
+/*
+ * Typically, data is aligned at least on the architecture size.
+ * Use lowest bit to indicate that the current callback is changing.
+ * Assumes that (void *)-2L is not used often. Used to encode non-aligned
+ * functions and non-aligned data using extra space.
+ * We encode the (void *)-2L fct as: -2L, fct, data.
+ * We encode the (void *)-2L data as: -2L, fct, data.
+ * Here, DQ_FCT_MARK == ~DQ_FCT_BIT. Required for the test order.
+ */
+#define DQ_FCT_BIT (1 << 0)
+#define DQ_IS_FCT_BIT(x) ((unsigned long)(x) & DQ_FCT_BIT)
+#define DQ_SET_FCT_BIT(x) \
+ (x = (void *)((unsigned long)(x) | DQ_FCT_BIT))
+#define DQ_CLEAR_FCT_BIT(x) \
+ (x = (void *)((unsigned long)(x) & ~DQ_FCT_BIT))
+#define DQ_FCT_MARK ((void *)(~DQ_FCT_BIT))
+
/*
* Identify a shared load. A smp_rmc() or smp_mc() should come before the load.
*/
#define rcu_assert(args...)
#endif
+/*
+ * defer queue.
+ * Contains pointers. Encoded to save space when same callback is often used.
+ * When looking up the next item:
+ * - if DQ_FCT_BIT is set, set the current callback to DQ_CLEAR_FCT_BIT(ptr)
+ * - next element contains pointer to data.
+ * - else if item == DQ_FCT_MARK
+ * - set the current callback to next element ptr
+ * - following next element contains pointer to data.
+ * - else current element contains data
+ */
struct defer_queue {
unsigned long head; /* add element at head */
+ void *last_fct_in; /* last fct pointer encoded */
unsigned long tail; /* next element to remove at tail */
+ void *last_fct_out; /* last fct pointer encoded */
void **q;
};
/*
* not signal-safe.
*/
-static inline void _rcu_defer_queue(void *p)
+static inline void _rcu_defer_queue(void (*fct)(void *p), void *p)
{
unsigned long head, tail;
/*
* If queue is full, empty it ourself.
+ * Worse-case: must allow 2 supplementary entries for fct pointer.
*/
- if (unlikely(head - tail >= DEFER_QUEUE_SIZE)) {
- assert(head - tail == DEFER_QUEUE_SIZE);
+ if (unlikely(head - tail >= DEFER_QUEUE_SIZE - 2)) {
+ assert(head - tail <= DEFER_QUEUE_SIZE);
rcu_defer_barrier_thread();
assert(head - LOAD_SHARED(defer_queue.tail) == 0);
}
smp_wmb(); /* Publish new pointer before write q[] */
- _STORE_SHARED(defer_queue.q[head & DEFER_QUEUE_MASK], p);
+ if (unlikely(defer_queue.last_fct_in != fct)) {
+ //printf("fct diff %p %p\n", defer_queue.last_fct, fct);
+ defer_queue.last_fct_in = fct;
+ if (unlikely(DQ_IS_FCT_BIT(fct) || fct == DQ_FCT_MARK)) {
+ /*
+ * If the function to encode is not aligned or the
+ * marker, write DQ_FCT_MARK followed by the function
+ * pointer.
+ */
+ _STORE_SHARED(defer_queue.q[head++ & DEFER_QUEUE_MASK],
+ DQ_FCT_MARK);
+ _STORE_SHARED(defer_queue.q[head++ & DEFER_QUEUE_MASK],
+ fct);
+ } else {
+ DQ_SET_FCT_BIT(fct);
+ _STORE_SHARED(defer_queue.q[head++ & DEFER_QUEUE_MASK],
+ fct);
+ }
+ } else {
+ //printf("fct same %p\n", fct);
+ if (unlikely(DQ_IS_FCT_BIT(p) || p == DQ_FCT_MARK)) {
+ /*
+ * If the data to encode is not aligned or the marker,
+ * write DQ_FCT_MARK followed by the function pointer.
+ */
+ _STORE_SHARED(defer_queue.q[head++ & DEFER_QUEUE_MASK],
+ DQ_FCT_MARK);
+ _STORE_SHARED(defer_queue.q[head++ & DEFER_QUEUE_MASK],
+ fct);
+ }
+ }
+ _STORE_SHARED(defer_queue.q[head++ & DEFER_QUEUE_MASK], p);
smp_wmb(); /* Write q[] before head. */
- STORE_SHARED(defer_queue.head, head + 1);
+ STORE_SHARED(defer_queue.head, head);
}
#endif /* _URCU_DEFER_STATIC_H */
* Must be called after Q.S. is reached.
*/
static void rcu_defer_barrier_queue(struct defer_queue *queue,
- unsigned long head)
+ unsigned long head)
{
unsigned long i;
+ void (*fct)(void *p);
+ void *p;
/*
* Tail is only modified when lock is held.
* Head is only modified by owner thread.
*/
- for (i = queue->tail; i != head; i++) {
+ for (i = queue->tail; i != head;) {
smp_rmb(); /* read head before q[]. */
- free(LOAD_SHARED(queue->q[i & DEFER_QUEUE_MASK]));
+ p = LOAD_SHARED(queue->q[i++ & DEFER_QUEUE_MASK]);
+ if (unlikely(DQ_IS_FCT_BIT(p))) {
+ //printf("%lu fct bit %p\n", i-1, p);
+ DQ_CLEAR_FCT_BIT(p);
+ queue->last_fct_out = p;
+ p = LOAD_SHARED(queue->q[i++ & DEFER_QUEUE_MASK]);
+ } else if (unlikely(p == DQ_FCT_MARK)) {
+ //printf("%lu fct mark %p\n", i-1, p);
+ p = LOAD_SHARED(queue->q[i++ & DEFER_QUEUE_MASK]);
+ queue->last_fct_out = p;
+ p = LOAD_SHARED(queue->q[i++ & DEFER_QUEUE_MASK]);
+ }// else
+ //printf("%lu data %p\n", i-1, p);
+ fct = queue->last_fct_out;
+ //printf("tid %lu %lu last_fct %p data %p\n", pthread_self(), i-1, fct, p);
+ fct(p);
}
smp_mb(); /* push tail after having used q[] */
STORE_SHARED(queue->tail, i);
* library wrappers to be used by non-LGPL compatible source code.
*/
-void rcu_defer_queue(void *p)
+void rcu_defer_queue(void (*fct)(void *p), void *p)
{
- _rcu_defer_queue(p);
+ _rcu_defer_queue(fct, p);
}
static void rcu_add_deferer(pthread_t id)
registry[num_deferers].tid = id;
/* reference to the TLS of _this_ deferer thread. */
registry[num_deferers].defer_queue = &defer_queue;
+ registry[num_deferers].last_head = 0;
num_deferers++;
}
sizeof(struct deferer_registry));
registry[num_deferers - 1].tid = 0;
registry[num_deferers - 1].defer_queue = NULL;
+ registry[num_deferers - 1].last_head = 0;
num_deferers--;
return;
}