--- /dev/null
- static DEFINE_RCU_RBTREE(rbtree, tree_comp, malloc, free);
+/*
+ * test_urcu_rbtree.c
+ *
+ * Userspace RCU library - test program for RB tree
+ *
+ * Copyright February 2010 - Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#define _GNU_SOURCE
+#ifndef DYNAMIC_LINK_TEST
+#define _LGPL_SOURCE
+#else
+#define debug_yield_read()
+#endif
+#include "../config.h"
+#include <stdio.h>
+#include <pthread.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <unistd.h>
+#include <stdio.h>
+#include <assert.h>
+#include <sys/syscall.h>
+#include <sched.h>
+#include <errno.h>
+#include <time.h>
+
+#include <urcu/arch.h>
+
+extern int __thread disable_debug;
+
+/* hardcoded number of CPUs */
+#define NR_CPUS 16384
+
+/* number of insert/delete */
+#define NR_RAND 6
+//#define NR_RAND 7
+
+#if defined(_syscall0)
+_syscall0(pid_t, gettid)
+#elif defined(__NR_gettid)
+static inline pid_t gettid(void)
+{
+ return syscall(__NR_gettid);
+}
+#else
+#warning "use pid as tid"
+static inline pid_t gettid(void)
+{
+ return getpid();
+}
+#endif
+
+#include <urcu.h>
+#include <urcu/rcurbtree.h>
+#include <urcu-defer.h>
+
+int tree_comp(void *a, void *b)
+{
+ if ((unsigned long)a < (unsigned long)b)
+ return -1;
+ else if ((unsigned long)a > (unsigned long)b)
+ return 1;
+ else
+ return 0;
+}
+
++static DEFINE_RCU_RBTREE(rbtree, tree_comp, malloc, free, call_rcu);
+
+static volatile int test_go, test_stop;
+
+static unsigned long wdelay;
+
+static unsigned long duration;
+
+/* read-side C.S. duration, in loops */
+static unsigned long rduration;
+
+/* write-side C.S. duration, in loops */
+static unsigned long wduration;
+
+static inline void loop_sleep(unsigned long l)
+{
+ while(l-- != 0)
+ caa_cpu_relax();
+}
+
+static int verbose_mode;
+
+#define printf_verbose(fmt, args...) \
+ do { \
+ if (verbose_mode) \
+ printf(fmt, args); \
+ } while (0)
+
+static unsigned int cpu_affinities[NR_CPUS];
+static unsigned int next_aff = 0;
+static int use_affinity = 0;
+
+pthread_mutex_t affinity_mutex = PTHREAD_MUTEX_INITIALIZER;
+
+#ifndef HAVE_CPU_SET_T
+typedef unsigned long cpu_set_t;
+# define CPU_ZERO(cpuset) do { *(cpuset) = 0; } while(0)
+# define CPU_SET(cpu, cpuset) do { *(cpuset) |= (1UL << (cpu)); } while(0)
+#endif
+
+static void set_affinity(void)
+{
+ cpu_set_t mask;
+ int cpu;
+ int ret;
+
+ if (!use_affinity)
+ return;
+
+#if HAVE_SCHED_SETAFFINITY
+ ret = pthread_mutex_lock(&affinity_mutex);
+ if (ret) {
+ perror("Error in pthread mutex lock");
+ exit(-1);
+ }
+ cpu = cpu_affinities[next_aff++];
+ ret = pthread_mutex_unlock(&affinity_mutex);
+ if (ret) {
+ perror("Error in pthread mutex unlock");
+ exit(-1);
+ }
+
+ CPU_ZERO(&mask);
+ CPU_SET(cpu, &mask);
+#if SCHED_SETAFFINITY_ARGS == 2
+ sched_setaffinity(0, &mask);
+#else
+ sched_setaffinity(0, sizeof(mask), &mask);
+#endif
+#endif /* HAVE_SCHED_SETAFFINITY */
+}
+
+/*
+ * returns 0 if test should end.
+ */
+static int test_duration_write(void)
+{
+ return !test_stop;
+}
+
+static int test_duration_read(void)
+{
+ return !test_stop;
+}
+
+static unsigned long long __thread nr_writes;
+static unsigned long long __thread nr_reads;
+
+static unsigned int nr_readers;
+static unsigned int nr_writers;
+
+static unsigned long global_items;
+static void **global_key = NULL;
+
+pthread_mutex_t rcu_copy_mutex = PTHREAD_MUTEX_INITIALIZER;
+
+void rcu_copy_mutex_lock(void)
+{
+ int ret;
+ ret = pthread_mutex_lock(&rcu_copy_mutex);
+ if (ret) {
+ perror("Error in pthread mutex lock");
+ exit(-1);
+ }
+}
+
+void rcu_copy_mutex_unlock(void)
+{
+ int ret;
+
+ ret = pthread_mutex_unlock(&rcu_copy_mutex);
+ if (ret) {
+ perror("Error in pthread mutex unlock");
+ exit(-1);
+ }
+}
+
+static
+void set_lookup_index(struct rcu_rbtree_node *node,
+ char *lookup_hit)
+{
+ int i;
+
+ for (i = 0; i < global_items; i++) {
+ if (node->begin == global_key[i]
+ && !lookup_hit[i]) {
+ lookup_hit[i] = 1;
+ break;
+ }
+ }
+}
+
+void *thr_reader(void *_count)
+{
+ unsigned long long *count = _count;
+ struct rcu_rbtree_node *node;
+ int i, index;
+ char *lookup_hit;
+
+ printf_verbose("thread_begin %s, thread id : %lx, tid %lu\n",
+ "reader", pthread_self(), (unsigned long)gettid());
+
+ set_affinity();
+
+ rcu_register_thread();
+
+ lookup_hit = malloc(sizeof(*lookup_hit) * global_items);
+
+ while (!test_go)
+ {
+ }
+ cmm_smp_mb();
+
+ for (;;) {
+ /* search bottom of range */
+ for (i = 0; i < global_items; i++) {
+ rcu_read_lock();
+ node = rcu_rbtree_search(&rbtree,
+ rcu_dereference(rbtree.root),
+ global_key[i]);
+ assert(!rcu_rbtree_is_nil(&rbtree, node));
+ rcu_read_unlock();
+ }
+
+ /* search end of range */
+ for (i = 0; i < global_items; i++) {
+ rcu_read_lock();
+ node = rcu_rbtree_search(&rbtree,
+ rcu_dereference(rbtree.root),
+ (void*) ((unsigned long) global_key[i] + 3));
+ assert(!rcu_rbtree_is_nil(&rbtree, node));
+ rcu_read_unlock();
+ }
+
+ /* search range (middle) */
+ for (i = 0; i < global_items; i++) {
+ rcu_read_lock();
+ node = rcu_rbtree_search_range(&rbtree,
+ rcu_dereference(rbtree.root),
+ (void*) ((unsigned long) global_key[i] + 1),
+ (void*) ((unsigned long) global_key[i] + 2));
+ assert(!rcu_rbtree_is_nil(&rbtree, node));
+ rcu_read_unlock();
+ }
+
+ /* search begin key */
+ for (i = 0; i < global_items; i++) {
+ rcu_read_lock();
+ node = rcu_rbtree_search_begin_key(&rbtree,
+ rcu_dereference(rbtree.root),
+ global_key[i]);
+ assert(!rcu_rbtree_is_nil(&rbtree, node));
+ rcu_read_unlock();
+ }
+
+ /* min + next */
+ memset(lookup_hit, 0, sizeof(*lookup_hit) * global_items);
+
+ rcu_read_lock();
+ node = rcu_rbtree_min(&rbtree,
+ rcu_dereference(rbtree.root));
+ while (!rcu_rbtree_is_nil(&rbtree, node)) {
+ set_lookup_index(node, lookup_hit);
+ node = rcu_rbtree_next(&rbtree, node);
+ }
+ rcu_read_unlock();
+
+ for (i = 0; i < global_items; i++)
+ assert(lookup_hit[i]);
+
+ /* max + prev */
+ memset(lookup_hit, 0, sizeof(*lookup_hit) * global_items);
+
+ rcu_read_lock();
+ node = rcu_rbtree_max(&rbtree,
+ rcu_dereference(rbtree.root));
+ while (!rcu_rbtree_is_nil(&rbtree, node)) {
+ set_lookup_index(node, lookup_hit);
+ node = rcu_rbtree_prev(&rbtree, node);
+ }
+ rcu_read_unlock();
+
+ for (i = 0; i < global_items; i++)
+ assert(lookup_hit[i]);
+
+ debug_yield_read();
+ if (unlikely(rduration))
+ loop_sleep(rduration);
+ nr_reads++;
+ if (unlikely(!test_duration_read()))
+ break;
+ }
+
+ rcu_unregister_thread();
+
+ /* test extra thread registration */
+ rcu_register_thread();
+ rcu_unregister_thread();
+
+ free(lookup_hit);
+
+ *count = nr_reads;
+ printf_verbose("thread_end %s, thread id : %lx, tid %lu\n",
+ "reader", pthread_self(), (unsigned long)gettid());
+ return ((void*)1);
+
+}
+
+void *thr_writer(void *_count)
+{
+ unsigned long long *count = _count;
+ struct rcu_rbtree_node *node;
+ void *key[NR_RAND];
+ int i;
+
+ printf_verbose("thread_begin %s, thread id : %lx, tid %lu\n",
+ "writer", pthread_self(), (unsigned long)gettid());
+
+ set_affinity();
+
+ //disable_debug = 1;
+
+ rcu_register_thread();
+
+ while (!test_go)
+ {
+ }
+ cmm_smp_mb();
+
+ for (;;) {
+ rcu_copy_mutex_lock();
+
+ for (i = 0; i < NR_RAND; i++) {
+ //key[i] = (void *)(unsigned long)(rand() % 2048);
+ key[i] = (void *)(unsigned long)(((unsigned long) rand() * 4) % 2048);
+ //For more collisions
+ //key[i] = (void *)(unsigned long)(rand() % 6);
+ //node->begin = key[i];
+ //node->end = (void *)((unsigned long) key[i] + 1);
+ //node->end = (void *)((unsigned long) key[i] + 4);
+ rcu_read_lock();
+ rcu_rbtree_insert(&rbtree, key[i],
+ (void *)((unsigned long) key[i] + 4));
+ rcu_read_unlock();
+ }
+ rcu_copy_mutex_unlock();
+
+ if (unlikely(wduration))
+ loop_sleep(wduration);
+
+ rcu_copy_mutex_lock();
+ for (i = 0; i < NR_RAND; i++) {
+#if 0
+ node = rcu_rbtree_min(rbtree, rbtree->root);
+ while (!rcu_rbtree_is_nil(&rbtree, node)) {
+ printf("{ 0x%lX p:%lX r:%lX l:%lX %s %s %s} ",
+ (unsigned long)node->key,
+ node->p->key,
+ node->right->key,
+ node->left->key,
+ node->color ? "red" : "black",
+ node->pos ? "right" : "left",
+ node->nil ? "nil" : "");
+ node = rcu_rbtree_next(rbtree, node);
+ }
+ printf("\n");
+#endif
+ rcu_read_lock();
+ node = rcu_rbtree_search(&rbtree, rbtree.root, key[i]);
+ assert(!rcu_rbtree_is_nil(&rbtree, node));
+ rcu_rbtree_remove(&rbtree, node);
+ rcu_read_unlock();
+ }
+
+ rcu_copy_mutex_unlock();
+ nr_writes++;
+ if (unlikely(!test_duration_write()))
+ break;
+ if (unlikely(wdelay))
+ loop_sleep(wdelay);
+ }
+
+ rcu_unregister_thread();
+
+ printf_verbose("thread_end %s, thread id : %lx, tid %lu\n",
+ "writer", pthread_self(), (unsigned long)gettid());
+ *count = nr_writes;
+ return ((void*)2);
+}
+
+void show_usage(int argc, char **argv)
+{
+ printf("Usage : %s nr_readers nr_writers duration (s)", argv[0]);
+#ifdef DEBUG_YIELD
+ printf(" [-r] [-w] (yield reader and/or writer)");
+#endif
+ printf(" [-d delay] (writer period (us))");
+ printf(" [-c duration] (reader C.S. duration (in loops))");
+ printf(" [-e duration] (writer C.S. duration (in loops))");
+ printf(" [-v] (verbose output)");
+ printf(" [-a cpu#] [-a cpu#]... (affinity)");
+ printf("\n");
+}
+
+int main(int argc, char **argv)
+{
+ int err;
+ pthread_t *tid_reader, *tid_writer;
+ void *tret;
+ unsigned long long *count_reader, *count_writer;
+ unsigned long long tot_reads = 0, tot_writes = 0;
+ int i, a;
+ struct rcu_rbtree_node *node;
+
+ if (argc < 4) {
+ show_usage(argc, argv);
+ return -1;
+ }
+
+ err = sscanf(argv[1], "%u", &nr_readers);
+ if (err != 1) {
+ show_usage(argc, argv);
+ return -1;
+ }
+
+ err = sscanf(argv[2], "%u", &nr_writers);
+ if (err != 1) {
+ show_usage(argc, argv);
+ return -1;
+ }
+
+ err = sscanf(argv[3], "%lu", &duration);
+ if (err != 1) {
+ show_usage(argc, argv);
+ return -1;
+ }
+
+ for (i = 4; i < argc; i++) {
+ if (argv[i][0] != '-')
+ continue;
+ switch (argv[i][1]) {
+#ifdef DEBUG_YIELD
+ case 'r':
+ yield_active |= YIELD_READ;
+ break;
+ case 'w':
+ yield_active |= YIELD_WRITE;
+ break;
+#endif
+ case 'a':
+ if (argc < i + 2) {
+ show_usage(argc, argv);
+ return -1;
+ }
+ a = atoi(argv[++i]);
+ cpu_affinities[next_aff++] = a;
+ use_affinity = 1;
+ printf_verbose("Adding CPU %d affinity\n", a);
+ break;
+ case 'c':
+ if (argc < i + 2) {
+ show_usage(argc, argv);
+ return -1;
+ }
+ rduration = atol(argv[++i]);
+ break;
+ case 'd':
+ if (argc < i + 2) {
+ show_usage(argc, argv);
+ return -1;
+ }
+ wdelay = atol(argv[++i]);
+ break;
+ case 'e':
+ if (argc < i + 2) {
+ show_usage(argc, argv);
+ return -1;
+ }
+ wduration = atol(argv[++i]);
+ break;
+ case 'v':
+ verbose_mode = 1;
+ break;
+ case 'g':
+ if (argc < i + 2) {
+ show_usage(argc, argv);
+ return -1;
+ }
+ global_items = atol(argv[++i]);
+ break;
+ }
+ }
+
+ printf_verbose("running test for %lu seconds, %u readers, %u writers.\n",
+ duration, nr_readers, nr_writers);
+ printf_verbose("Writer delay : %lu loops.\n", wdelay);
+ printf_verbose("Reader duration : %lu loops.\n", rduration);
+ printf_verbose("thread %-6s, thread id : %lx, tid %lu\n",
+ "main", pthread_self(), (unsigned long)gettid());
+
+ tid_reader = malloc(sizeof(*tid_reader) * nr_readers);
+ tid_writer = malloc(sizeof(*tid_writer) * nr_writers);
+ count_reader = malloc(sizeof(*count_reader) * nr_readers);
+ count_writer = malloc(sizeof(*count_writer) * nr_writers);
+ global_key = malloc(sizeof(*global_key) * global_items);
+
+ srand(time(NULL));
+
+ next_aff = 0;
+
+ for (i = 0; i < nr_readers; i++) {
+ err = pthread_create(&tid_reader[i], NULL, thr_reader,
+ &count_reader[i]);
+ if (err != 0)
+ exit(1);
+ }
+ for (i = 0; i < nr_writers; i++) {
+ err = pthread_create(&tid_writer[i], NULL, thr_writer,
+ &count_writer[i]);
+ if (err != 0)
+ exit(1);
+ }
+
+ rcu_register_thread();
+ rcu_read_lock();
+ /* Insert items looked up by readers */
+ for (i = 0; i < global_items; i++) {
+ global_key[i] = (void *)(unsigned long)(((unsigned long) rand() * 4) % 2048);
+ //global_key[i] = (void *)(unsigned long)(rand() % 2048);
+ //For more collisions
+ //global_key[i] = (void *)(unsigned long)(rand() % 6);
+ //node->begin = global_key[i];
+ //node->end = (void *)((unsigned long) global_key[i] + 1);
+ //node->end = (void *)((unsigned long) global_key[i] + 4);
+ rcu_rbtree_insert(&rbtree, global_key[i],
+ (void *)((unsigned long) global_key[i] + 4));
+ }
+ rcu_read_unlock();
+
+ cmm_smp_mb();
+
+ test_go = 1;
+
+ sleep(duration);
+
+ test_stop = 1;
+
+ for (i = 0; i < nr_readers; i++) {
+ err = pthread_join(tid_reader[i], &tret);
+ if (err != 0)
+ exit(1);
+ tot_reads += count_reader[i];
+ }
+ for (i = 0; i < nr_writers; i++) {
+ err = pthread_join(tid_writer[i], &tret);
+ if (err != 0)
+ exit(1);
+ tot_writes += count_writer[i];
+ }
+
+ rcu_read_lock();
+ for (i = 0; i < global_items; i++) {
+ node = rcu_rbtree_search(&rbtree, rbtree.root, global_key[i]);
+ assert(!rcu_rbtree_is_nil(&rbtree, node));
+ rcu_rbtree_remove(&rbtree, node);
+ }
+ rcu_read_unlock();
+ rcu_unregister_thread();
+
+ printf_verbose("total number of reads : %llu, writes %llu\n", tot_reads,
+ tot_writes);
+ printf("SUMMARY %-25s testdur %4lu nr_readers %3u rdur %6lu wdur %6lu "
+ "nr_writers %3u "
+ "wdelay %6lu nr_reads %12llu nr_writes %12llu nr_ops %12llu "
+ "global_items %6lu\n",
+ argv[0], duration, nr_readers, rduration, wduration,
+ nr_writers, wdelay, tot_reads, tot_writes,
+ tot_reads + tot_writes, global_items);
+ free(tid_reader);
+ free(tid_writer);
+ free(count_reader);
+ free(count_writer);
+ free(global_key);
+ return 0;
+}
--- /dev/null
- call_rcu(&x->head, _rcu_rbtree_free_node);
+/*
+ * urcu-rbtree.c
+ *
+ * Userspace RCU library - Red-Black Tree
+ *
+ * Copyright (c) 2010 Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ *
+ * Implementation of RCU-adapted data structures and operations based on the RB
+ * tree algorithms found in chapter 12 of:
+ *
+ * Thomas H. Cormen, Charles E. Leiserson, Ronald L. Rivest, and
+ * Clifford Stein. Introduction to Algorithms, Third Edition. The MIT
+ * Press, September 2009.
+ */
+
+#define _BSD_SOURCE
+#define _LGPL_SOURCE
+
+#include <stdio.h>
+#include <pthread.h>
+#include <assert.h>
+#include <string.h>
+#include <unistd.h>
+#include <errno.h>
+
+#include <urcu/rcurbtree.h>
+#include <urcu-pointer.h>
+#include <urcu-call-rcu.h>
+#include <urcu/compiler.h>
+
+/*
+ * Explanation of next/prev walk coherency and search coherency when
+ * performed concurrently with updates.
+ *
+ * next/prev walk coherency with respect to concurrent updates:
+ *
+ * There are 3 scenarios for which we need to model and validate this:
+ * rotation, transplant and "teleportation" (the latter being a remote
+ * transplant in a remove non-nil case).
+ *
+ * - rotation left (right is symmetric)
+ * xl and yr point to the same parent nodes before/after left
+ * rotation. yll and ylr also point to the same parent node
+ * before/after left rotation.
+ * As we are copying x, y and yl (the 3 nodes which parent/child
+ * relationship are changed) to "new" version of this node cluster,
+ * all external references to the cluster either point to the old
+ * cluster or the new one. If we take this cluster as a "black box"
+ * from the point of view of next/prev traversal, all we have to
+ * ensure is that the old and the new cluster behave in the exact same
+ * way with respect to traversal order.
+ *
+ * - transplant
+ * In this operation, we transplant a copy of "v" into its parent
+ * location (u), thus replacing it. The children of "v", vl and vr,
+ * still point to v (new version) after the transplant, so it does not
+ * change the behavior when considering the next/prev traversal. "v"
+ * being copied to a new version ensures that the parent pointers of v
+ * are pointing to its new parent (parent of u) before it is published
+ * to readers (by setting the child pointer of u's parent to the new
+ * copy of v).
+ *
+ * - teleportation
+ * This one is probably the most tricky and will require some ascii
+ * art to explain.
+ *
+ * We want to remove z from this tree:
+ *
+ * zp
+ * \
+ * z
+ * / \
+ * zl zr
+ * /
+ * a
+ * / \
+ * b ar
+ * / \
+ * y br
+ * \
+ * yr
+ * / \
+ * yrl yrr
+ *
+ * What we are going to do is to "teleport" y into z's location,
+ * reparenting yr to b. We are taking care to create a new cluster
+ * copy that is isolated from any reader. We will represent the new
+ * members of the cluster with capital letters.
+ *
+ * zp
+ * \
+ * Y
+ * / \
+ * zl ZR
+ * /
+ * A
+ * / \
+ * B ar
+ * / \
+ * YR br
+ * / \
+ * yrl yrr
+ *
+ * In this transient state, we notice that the pointers within the
+ * cluster all point to the new cluster nodes, and they point to the
+ * correct external nodes. However, no external pointer point to the
+ * cluster (yet). The first pointer to point to this cluster will be
+ * "zp->right". It will therefore make the cluster visible for search.
+ *
+ * In this intermediate state, we can walk through the new cluster
+ * when coming from the top (in a next/prev traversal), but can come
+ * back to the old cluster when going back up from the children nodes.
+ * All we have to ensure is that the two clusters, taken as a black
+ * box from a next/prev traversal perspective, yield to the exact same
+ * result.
+ *
+ * Search coherency with concurrent updates:
+ *
+ * Simple "search" (only going down the tree) is also handled by this
+ * cluster scheme. The explanation is a subset of the prev/next
+ * explanation, where we don't have to care about the intermediate
+ * stages where the children point to the old cluster, because we only
+ * ever use the top level pointers to go down into the children nodes,
+ * we never go back up. So by simply making sure that all the cluster
+ * internal nodes pointers are setup correctly before making the
+ * cluster visible to the readers (by setting the parent pointer to
+ * the topmost new node in the cluster), we are sure that readers will
+ * see a coherent view of the cluster at all times.
+ */
+
+#ifdef DEBUG
+#define dbg_printf(args...) printf(args)
+#define dbg_usleep(usecs) usleep(usecs)
+#else
+#define dbg_printf(args...)
+#define dbg_usleep(usecs)
+#endif
+
+/*
+ * Undefine this to enable the non-RCU rotate and transplant functions
+ * (for debugging). Note that these versions don't support the tree
+ * max_end updates, so lookups must be performed with
+ * rcu_rbtree_search_begin_key when using this debug facility.
+ */
+#define RBTREE_RCU_SUPPORT_ROTATE_LEFT
+#define RBTREE_RCU_SUPPORT_ROTATE_RIGHT
+#define RBTREE_RCU_SUPPORT_TRANSPLANT
+
+#ifdef EXTRA_DEBUG
+static pthread_mutex_t test_mutex = PTHREAD_MUTEX_INITIALIZER;
+static pthread_mutex_t outer_mutex = PTHREAD_MUTEX_INITIALIZER;
+
+static
+void lock_outer_mutex(void)
+{
+ pthread_mutex_lock(&outer_mutex);
+}
+
+static
+void unlock_outer_mutex(void)
+{
+ pthread_mutex_unlock(&outer_mutex);
+}
+
+static
+void lock_test_mutex(void)
+{
+ pthread_mutex_lock(&test_mutex);
+}
+
+static
+void unlock_test_mutex(void)
+{
+ pthread_mutex_unlock(&test_mutex);
+}
+#endif
+
+static
+void set_parent(struct rcu_rbtree_node *node,
+ struct rcu_rbtree_node *parent,
+ unsigned int pos)
+{
+ _CMM_STORE_SHARED(node->parent, ((unsigned long) parent) | pos);
+}
+
+static
+struct rcu_rbtree_node *get_parent(struct rcu_rbtree_node *node)
+{
+ return (struct rcu_rbtree_node *) (node->parent & ~1UL);
+}
+
+static
+unsigned int get_pos(struct rcu_rbtree_node *node)
+{
+ return (unsigned int) (node->parent & 1UL);
+}
+
+static
+struct rcu_rbtree_node *get_parent_and_pos(struct rcu_rbtree_node *node,
+ unsigned int *pos)
+{
+ unsigned long parent_pos = rcu_dereference(node->parent);
+
+ *pos = (unsigned int) (parent_pos & 1UL);
+ return (struct rcu_rbtree_node *) (parent_pos & ~1UL);
+}
+
+static
+void set_decay(struct rcu_rbtree_node *x, struct rcu_rbtree_node *xc)
+{
+ x->decay_next = xc;
+}
+
+static
+struct rcu_rbtree_node *get_decay(struct rcu_rbtree_node *x)
+{
+ if (!x)
+ return NULL;
+ while (x->decay_next)
+ x = x->decay_next;
+ return x;
+}
+
+static
+struct rcu_rbtree_node *is_decay(struct rcu_rbtree_node *x)
+{
+ return x->decay_next;
+}
+
+static
+struct rcu_rbtree_node *_rcu_rbtree_alloc_node(struct rcu_rbtree *rbtree)
+{
+ return rbtree->rballoc(sizeof(struct rcu_rbtree_node));
+}
+
+static
+void _rcu_rbtree_free_node(struct rcu_head *head)
+{
+ struct rcu_rbtree_node *node =
+ caa_container_of(head, struct rcu_rbtree_node, head);
+ node->rbtree->rbfree(node);
+}
+
+static
+struct rcu_rbtree_node *dup_decay_node(struct rcu_rbtree *rbtree,
+ struct rcu_rbtree_node *x)
+{
+ struct rcu_rbtree_node *xc;
+
+ if (rcu_rbtree_is_nil(rbtree, x))
+ return x;
+
+ xc = _rcu_rbtree_alloc_node(rbtree);
+ memcpy(xc, x, sizeof(*xc));
+ xc->decay_next = NULL;
+ set_decay(x, xc);
- call_rcu(&z->head, _rcu_rbtree_free_node);
++ rbtree->call_rcu(&x->head, _rcu_rbtree_free_node);
+ return xc;
+}
+
+/*
+ * Info for range lookups:
+ * Range lookup information is only valid when used when searching for
+ * ranges. It should never be used in next/prev traversal because the
+ * pointers to parents are not in sync with the parent vision of the
+ * children range.
+ */
+static
+void set_left(struct rcu_rbtree *rbtree, struct rcu_rbtree_node *node,
+ struct rcu_rbtree_node *left)
+{
+ node->_left = left;
+}
+
+static
+void set_right(struct rcu_rbtree *rbtree, struct rcu_rbtree_node *node,
+ struct rcu_rbtree_node *right)
+{
+ node->_right = right;
+}
+
+static
+void *calculate_node_max_end(struct rcu_rbtree *rbtree, struct rcu_rbtree_node *node)
+{
+ void *max_end;
+
+ max_end = node->end;
+ if (!rcu_rbtree_is_nil(rbtree, node->_right)) {
+ if (rbtree->comp(max_end, node->_right->max_end) < 0)
+ max_end = node->_right->max_end;
+ }
+ if (!rcu_rbtree_is_nil(rbtree, node->_left)) {
+ if (rbtree->comp(max_end, node->_left->max_end) < 0)
+ max_end = node->_left->max_end;
+ }
+ return max_end;
+}
+
+/*
+ * TODO
+ * Deal with memory allocation errors.
+ * Can be ensured by reserving a pool of memory entries before doing the
+ * insertion, which will have to be function of number of
+ * transplantations/rotations required for the operation (which is a
+ * multiple of the tree height).
+ */
+
+#ifdef DEBUG
+static
+void show_tree(struct rcu_rbtree *rbtree)
+{
+ struct rcu_rbtree_node *node;
+
+ node = rcu_rbtree_min(rbtree, rbtree->root);
+ while (!rcu_rbtree_is_nil(rbtree, node)) {
+ assert(!is_decay(node));
+ printf("{ b:%lX e:%lX pb: %lX r:%lX l:%lX %s %s %s} ",
+ (unsigned long) node->begin,
+ (unsigned long) node->end,
+ (unsigned long) get_parent(node)->begin,
+ (unsigned long) node->_right->begin,
+ (unsigned long) node->_left->begin,
+ node->color ? "red" : "black",
+ get_pos(node) ? "right" : "left",
+ rcu_rbtree_is_nil(rbtree, node) ? "nil" : "");
+ node = rcu_rbtree_next(rbtree, node);
+ }
+ printf("\n");
+}
+
+#define check_max_end(rbtree, x) \
+ do { \
+ if (rcu_rbtree_is_nil(rbtree, x)) \
+ break; \
+ assert(rbtree->comp(x->max_end, \
+ calculate_node_max_end(rbtree, x)) == 0); \
+ } while (0)
+
+#else /* DEBUG */
+static
+void show_tree(struct rcu_rbtree *rbtree)
+{
+}
+
+static
+void check_max_end(struct rcu_rbtree *rbtree, struct rcu_rbtree_node *x)
+{
+}
+#endif /* DEBUG */
+
+static
+struct rcu_rbtree_node *make_nil(struct rcu_rbtree *rbtree)
+{
+ return &rbtree->nil_node;
+}
+
+/*
+ * Iterative rbtree search.
+ */
+struct rcu_rbtree_node *rcu_rbtree_search(struct rcu_rbtree *rbtree,
+ struct rcu_rbtree_node *x,
+ void *point)
+{
+ struct rcu_rbtree_node *xl;
+
+ dbg_printf("searching point 0x%lx\n", (unsigned long) point);
+ x = rcu_dereference(x);
+
+ while (!rcu_rbtree_is_nil(rbtree, x)) {
+ dbg_usleep(10);
+ xl = rcu_dereference(x->_left);
+ dbg_printf("search x %lx x_end %lx x_max_end %lx\n", (unsigned long) x->begin,
+ (unsigned long) x->end, (unsigned long) x->max_end);
+ dbg_printf("search xl %lx xl_end %lx xl_max_end %lx\n", (unsigned long) xl->begin,
+ (unsigned long) xl->end, (unsigned long) xl->max_end);
+ if (!rcu_rbtree_is_nil(rbtree, xl)
+ && (rbtree->comp(xl->max_end, point) > 0)) {
+ dbg_printf("go left\n");
+ x = xl;
+ } else if (rbtree->comp(x->begin, point) <= 0
+ && rbtree->comp(point, x->end) < 0) {
+ dbg_printf("got it!\n");
+ break;
+ } else if (rbtree->comp(point, x->begin) > 0) {
+ dbg_printf("go right\n");
+ x = rcu_dereference(x->_right);
+ } else {
+ dbg_printf("not found!\n");
+ x = make_nil(rbtree);
+ }
+ }
+ if (rcu_rbtree_is_nil(rbtree, x))
+ dbg_printf("Reached bottom of tree.\n");
+
+ return x;
+}
+
+struct rcu_rbtree_node *rcu_rbtree_search_range(struct rcu_rbtree *rbtree,
+ struct rcu_rbtree_node *x,
+ void *begin, void *end)
+{
+ struct rcu_rbtree_node *node;
+
+ node = rcu_rbtree_search(rbtree, x, begin);
+ if (rcu_rbtree_is_nil(rbtree, node))
+ return node;
+ if (rbtree->comp(node->end, end) < 0)
+ return NULL; /* High is outside lookup range */
+ return node;
+}
+
+/*
+ * Search by exact range start value.
+ */
+struct rcu_rbtree_node *rcu_rbtree_search_begin_key(struct rcu_rbtree *rbtree,
+ struct rcu_rbtree_node *x,
+ void *k)
+{
+ x = rcu_dereference(x);
+ int comp;
+
+ while (!rcu_rbtree_is_nil(rbtree, x) && (comp = rbtree->comp(k, x->begin)) != 0) {
+ dbg_usleep(10);
+ if (comp < 0)
+ x = rcu_dereference(x->_left);
+ else
+ x = rcu_dereference(x->_right);
+ }
+ return x;
+}
+
+static
+struct rcu_rbtree_node *rcu_rbtree_min_dup_decay(struct rcu_rbtree *rbtree,
+ struct rcu_rbtree_node *x,
+ struct rcu_rbtree_node **zr)
+{
+ struct rcu_rbtree_node *xl;
+
+ x = rcu_dereference(x);
+
+ if (rcu_rbtree_is_nil(rbtree, x)) {
+ *zr = x;
+ return x;
+ } else
+ *zr = x = dup_decay_node(rbtree, x);
+
+ while (!rcu_rbtree_is_nil(rbtree, xl = rcu_dereference(x->_left))) {
+ x = dup_decay_node(rbtree, xl);
+ set_parent(x, get_decay(get_parent(x)), get_pos(x));
+ get_parent(x)->_left = get_decay(get_parent(x)->_left);
+ }
+ return x;
+}
+
+static
+struct rcu_rbtree_node *rcu_rbtree_min_update_decay(struct rcu_rbtree *rbtree,
+ struct rcu_rbtree_node *x)
+{
+ struct rcu_rbtree_node *xl;
+
+ x = rcu_dereference(x);
+
+ if (rcu_rbtree_is_nil(rbtree, x))
+ return x;
+ else {
+ set_parent(x->_right, get_decay(get_parent(x->_right)),
+ get_pos(x->_right));
+ set_parent(x->_left, get_decay(get_parent(x->_left)),
+ get_pos(x->_left));
+ }
+
+ while (!rcu_rbtree_is_nil(rbtree, xl = rcu_dereference(x->_left))) {
+ x = xl;
+ set_parent(x->_right, get_decay(get_parent(x->_right)),
+ get_pos(x->_right));
+ set_parent(x->_left, get_decay(get_parent(x->_left)),
+ get_pos(x->_left));
+ }
+ return x;
+}
+
+struct rcu_rbtree_node *rcu_rbtree_min(struct rcu_rbtree *rbtree,
+ struct rcu_rbtree_node *x)
+{
+ struct rcu_rbtree_node *xl;
+
+ x = rcu_dereference(x);
+
+ if (rcu_rbtree_is_nil(rbtree, x))
+ return x;
+
+ while (!rcu_rbtree_is_nil(rbtree, xl = rcu_dereference(x->_left)))
+ x = xl;
+ return x;
+}
+
+struct rcu_rbtree_node *rcu_rbtree_max(struct rcu_rbtree *rbtree,
+ struct rcu_rbtree_node *x)
+{
+ struct rcu_rbtree_node *xr;
+
+ x = rcu_dereference(x);
+
+ if (rcu_rbtree_is_nil(rbtree, x))
+ return x;
+
+ while (!rcu_rbtree_is_nil(rbtree, xr = rcu_dereference(x->_right)))
+ x = xr;
+ return x;
+}
+
+/*
+ * RCU read lock must be held across the next/prev calls to ensure validity of
+ * the returned node.
+ */
+struct rcu_rbtree_node *rcu_rbtree_next(struct rcu_rbtree *rbtree,
+ struct rcu_rbtree_node *x)
+{
+ struct rcu_rbtree_node *xr, *y;
+ unsigned int x_pos;
+
+ x = rcu_dereference(x);
+
+ if (!rcu_rbtree_is_nil(rbtree, xr = rcu_dereference(x->_right)))
+ return rcu_rbtree_min(rbtree, xr);
+ y = get_parent_and_pos(x, &x_pos);
+ while (!rcu_rbtree_is_nil(rbtree, y) && x_pos == IS_RIGHT) {
+ x = y;
+ y = get_parent_and_pos(y, &x_pos);
+ }
+ return y;
+}
+
+struct rcu_rbtree_node *rcu_rbtree_prev(struct rcu_rbtree *rbtree,
+ struct rcu_rbtree_node *x)
+{
+ struct rcu_rbtree_node *xl, *y;
+ unsigned int x_pos;
+
+ x = rcu_dereference(x);
+
+ if (!rcu_rbtree_is_nil(rbtree, xl = rcu_dereference(x->_left)))
+ return rcu_rbtree_max(rbtree, xl);
+ y = get_parent_and_pos(x, &x_pos);
+ while (!rcu_rbtree_is_nil(rbtree, y) && x_pos == IS_LEFT) {
+ x = y;
+ y = get_parent_and_pos(y, &x_pos);
+ }
+ return y;
+}
+
+/*
+ * "node" needs to be non-visible by readers.
+ */
+static
+void populate_node_end(struct rcu_rbtree *rbtree, struct rcu_rbtree_node *node,
+ unsigned int copy_parents, struct rcu_rbtree_node *stop)
+{
+ struct rcu_rbtree_node *prev = NULL, *orig_node = node, *top;
+
+ do {
+ void *max_end;
+
+ assert(node);
+ assert(!rcu_rbtree_is_nil(rbtree, node));
+
+ if (prev && copy_parents) {
+ node = dup_decay_node(rbtree, node);
+ if (get_pos(prev) == IS_RIGHT)
+ node->_right = prev;
+ else
+ node->_left = prev;
+ set_parent(prev, node, get_pos(prev));
+ }
+
+ max_end = calculate_node_max_end(rbtree, node);
+ /*
+ * Compare the node max_end keys to make sure we replace
+ * references to a key belonging to a node we remove
+ * from the tree. Otherwise we would still be using this
+ * pointer as an invalid reference after garbage
+ * collection of the node and of its associated
+ * begin/end pointers.
+ */
+ if (max_end != node->max_end) {
+ node->max_end = max_end;
+ } else {
+ top = get_parent(node);
+ cmm_smp_wmb(); /* write into node before publish */
+ /* make new branch visible to readers */
+ if (rcu_rbtree_is_nil(rbtree, top))
+ _CMM_STORE_SHARED(rbtree->root, node);
+ if (get_pos(node) == IS_LEFT)
+ _CMM_STORE_SHARED(top->_left, node);
+ else
+ _CMM_STORE_SHARED(top->_right, node);
+ goto end;
+ }
+
+ /* Check for propagation stop */
+ if (node == stop)
+ return;
+
+ prev = node;
+ node = get_parent(node);
+ } while (!rcu_rbtree_is_nil(rbtree, node));
+
+ top = node; /* nil */
+ cmm_smp_wmb(); /* write into node before publish */
+ /* make new branch visible to readers */
+ _CMM_STORE_SHARED(rbtree->root, prev);
+
+end:
+ if (!copy_parents)
+ return;
+ /* update children */
+ node = orig_node;
+ do {
+ assert(!rcu_rbtree_is_nil(rbtree, node));
+ set_parent(node->_left, get_decay(get_parent(node->_left)), IS_LEFT);
+ set_parent(node->_right, get_decay(get_parent(node->_right)), IS_RIGHT);
+ } while ((node = get_parent(node)) != top);
+}
+
+/*
+ * We have to ensure these assumptions are correct for prev/next
+ * traversal:
+ *
+ * with x being a right child, the assumption that:
+ * get_parent(x)->_right == x
+ * or if x is a left child, the assumption that:
+ * get_parent(x)->_left == x
+ *
+ * This explains why we have to allocate a vc copy of the node for left_rotate,
+ * right_rotate and transplant operations.
+ *
+ * We always ensure that the right/left child and correct parent is set in the
+ * node copies *before* we reparent the children and make the upper-level point
+ * to the copy.
+ */
+
+/* RCU: copy x and y, atomically point to new versions. GC old. */
+/* Should be eventually followed by a cmm_smp_wmc() */
+
+#ifdef RBTREE_RCU_SUPPORT_ROTATE_LEFT
+
+static
+void left_rotate(struct rcu_rbtree *rbtree,
+ struct rcu_rbtree_node *x)
+{
+ struct rcu_rbtree_node *y, *y_left;
+
+ dbg_printf("left rotate %lx\n", (unsigned long) x->begin);
+
+ y = x->_right;
+ y_left = y->_left;
+
+ /* Now operate on new copy, decay old versions */
+ x = dup_decay_node(rbtree, x);
+ y = dup_decay_node(rbtree, y);
+ y_left = dup_decay_node(rbtree, y_left);
+
+ check_max_end(rbtree, get_parent(x));
+ check_max_end(rbtree, x);
+ check_max_end(rbtree, y);
+
+ /* Internal node modifications */
+ set_parent(y, get_parent(x), get_pos(x));
+ set_parent(x, y, IS_LEFT);
+ set_left(rbtree, y, x);
+ set_right(rbtree, x, y_left);
+
+ if (!rcu_rbtree_is_nil(rbtree, y_left))
+ set_parent(y_left, x, IS_RIGHT);
+
+ /*
+ * We only changed the relative position of x and y wrt their
+ * children, and reparented y (but are keeping the same nodes in
+ * place, so its parent does not need to have end value
+ * recalculated).
+ */
+ x->max_end = calculate_node_max_end(rbtree, x);
+ y->max_end = calculate_node_max_end(rbtree, y);
+
+ cmm_smp_wmb(); /* write into node before publish */
+
+ /* External references update (visible by readers) */
+ if (rcu_rbtree_is_nil(rbtree, get_parent(y)))
+ _CMM_STORE_SHARED(rbtree->root, y);
+ else if (get_pos(y) == IS_LEFT)
+ _CMM_STORE_SHARED(get_parent(y)->_left, y);
+ else
+ _CMM_STORE_SHARED(get_parent(y)->_right, y);
+
+ /* Point children to new copy (parent only used by updates/next/prev) */
+ set_parent(x->_left, get_decay(get_parent(x->_left)),
+ get_pos(x->_left));
+ set_parent(y->_right, get_decay(get_parent(y->_right)),
+ get_pos(y->_right));
+ if (!rcu_rbtree_is_nil(rbtree, y_left)) {
+ set_parent(y_left->_right,
+ get_decay(get_parent(y_left->_right)),
+ get_pos(y_left->_right));
+ set_parent(y_left->_left,
+ get_decay(get_parent(y_left->_left)),
+ get_pos(y_left->_left));
+ }
+
+ /* Sanity checks */
+ assert(y == rbtree->root || get_parent(y)->_left == y
+ || get_parent(y)->_right == y);
+ assert(x == rbtree->root || get_parent(x)->_left == x
+ || get_parent(x)->_right == x);
+ assert(rcu_rbtree_is_nil(rbtree, x->_right) || get_parent(x->_right) == x);
+ assert(rcu_rbtree_is_nil(rbtree, x->_left) || get_parent(x->_left) == x);
+ assert(rcu_rbtree_is_nil(rbtree, y->_right) || get_parent(y->_right) == y);
+ assert(rcu_rbtree_is_nil(rbtree, y->_left) || get_parent(y->_left) == y);
+ assert(!is_decay(rbtree->root));
+ assert(!is_decay(x));
+ assert(!is_decay(y));
+ assert(!is_decay(x->_right));
+ assert(!is_decay(x->_left));
+ assert(!is_decay(y->_right));
+ assert(!is_decay(y->_left));
+ check_max_end(rbtree, get_parent(y));
+ check_max_end(rbtree, x);
+ check_max_end(rbtree, y);
+}
+
+#else
+
+/* non-rcu version */
+static
+void left_rotate(struct rcu_rbtree *rbtree,
+ struct rcu_rbtree_node *x)
+{
+ struct rcu_rbtree_node *y;
+
+ lock_test_mutex();
+ y = x->_right;
+ x->_right = y->_left;
+ if (!rcu_rbtree_is_nil(rbtree, y->_left))
+ set_parent(y->_left, x, IS_RIGHT);
+ set_parent(y, get_parent(x), get_pos(x));
+ if (rcu_rbtree_is_nil(rbtree, get_parent(x)))
+ rbtree->root = y;
+ else if (x == get_parent(x)->_left) {
+ get_parent(x)->_left = y;
+ } else {
+ get_parent(x)->_right = y;
+ }
+ y->_left = x;
+ set_parent(x, y, IS_LEFT);
+ unlock_test_mutex();
+}
+
+#endif
+
+#ifdef RBTREE_RCU_SUPPORT_ROTATE_RIGHT
+static
+void right_rotate(struct rcu_rbtree *rbtree,
+ struct rcu_rbtree_node *x)
+{
+ struct rcu_rbtree_node *y, *y_right;
+
+ dbg_printf("right rotate %lx\n", (unsigned long) x->begin);
+
+ y = x->_left;
+ y_right = y->_right;
+
+ /* Now operate on new copy, decay old versions */
+ x = dup_decay_node(rbtree, x);
+ y = dup_decay_node(rbtree, y);
+ y_right = dup_decay_node(rbtree, y_right);
+
+ check_max_end(rbtree, get_parent(x));
+ check_max_end(rbtree, x);
+ check_max_end(rbtree, y);
+
+ /* Internal node modifications */
+ set_parent(y, get_parent(x), get_pos(x));
+ set_parent(x, y, IS_RIGHT);
+ set_right(rbtree, y, x);
+ set_left(rbtree, x, y_right);
+
+ if (!rcu_rbtree_is_nil(rbtree, y_right))
+ set_parent(y_right, x, IS_LEFT);
+
+ /*
+ * We only changed the relative position of x and y wrt their
+ * children, and reparented y (but are keeping the same nodes in
+ * place, so its parent does not need to have end value
+ * recalculated).
+ */
+ x->max_end = calculate_node_max_end(rbtree, x);
+ y->max_end = calculate_node_max_end(rbtree, y);
+
+ cmm_smp_wmb(); /* write into node before publish */
+
+ /* External references update (visible by readers) */
+ if (rcu_rbtree_is_nil(rbtree, get_parent(y)))
+ _CMM_STORE_SHARED(rbtree->root, y);
+ else if (get_pos(y) == IS_RIGHT)
+ _CMM_STORE_SHARED(get_parent(y)->_right, y);
+ else
+ _CMM_STORE_SHARED(get_parent(y)->_left, y);
+
+ /* Point children to new copy (parent only used by updates/next/prev) */
+ set_parent(x->_right, get_decay(get_parent(x->_right)),
+ get_pos(x->_right));
+ set_parent(y->_left, get_decay(get_parent(y->_left)),
+ get_pos(y->_left));
+ if (!rcu_rbtree_is_nil(rbtree, y_right)) {
+ set_parent(y_right->_left,
+ get_decay(get_parent(y_right->_left)),
+ get_pos(y_right->_left));
+ set_parent(y_right->_right,
+ get_decay(get_parent(y_right->_right)),
+ get_pos(y_right->_right));
+ }
+
+ /* Sanity checks */
+ assert(y == rbtree->root || get_parent(y)->_right == y
+ || get_parent(y)->_left == y);
+ assert(x == rbtree->root || get_parent(x)->_right == x
+ || get_parent(x)->_left == x);
+ assert(rcu_rbtree_is_nil(rbtree, x->_left) || get_parent(x->_left) == x);
+ assert(rcu_rbtree_is_nil(rbtree, x->_right) || get_parent(x->_right) == x);
+ assert(rcu_rbtree_is_nil(rbtree, y->_left) || get_parent(y->_left) == y);
+ assert(rcu_rbtree_is_nil(rbtree, y->_right) || get_parent(y->_right) == y);
+ assert(!is_decay(rbtree->root));
+ assert(!is_decay(x));
+ assert(!is_decay(y));
+ assert(!is_decay(x->_left));
+ assert(!is_decay(x->_right));
+ assert(!is_decay(y->_left));
+ assert(!is_decay(y->_right));
+ check_max_end(rbtree, x);
+ check_max_end(rbtree, y);
+ check_max_end(rbtree, get_parent(y));
+}
+
+#else
+
+/* non-rcu version */
+static
+void right_rotate(struct rcu_rbtree *rbtree,
+ struct rcu_rbtree_node *x)
+{
+ struct rcu_rbtree_node *y;
+
+ lock_test_mutex();
+ y = x->_left;
+ x->_left = y->_right;
+ if (!rcu_rbtree_is_nil(rbtree, y->_right))
+ set_parent(y->_right, x, IS_LEFT);
+ set_parent(y, get_parent(x), get_pos(x));
+ if (rcu_rbtree_is_nil(rbtree, get_parent(x)))
+ rbtree->root = y;
+ else if (x == get_parent(x)->_right) {
+ get_parent(x)->_right = y;
+ } else {
+ get_parent(x)->_left = y;
+ }
+ y->_right = x;
+ set_parent(x, y, IS_RIGHT);
+ unlock_test_mutex();
+}
+
+#endif
+
+static void rcu_rbtree_insert_fixup(struct rcu_rbtree *rbtree,
+ struct rcu_rbtree_node *z)
+{
+ struct rcu_rbtree_node *y;
+
+ dbg_printf("insert fixup %p\n", z->begin);
+ assert(!is_decay(rbtree->root));
+
+ while (get_parent(z)->color == COLOR_RED) {
+ if (get_parent(z) == get_parent(get_parent(z))->_left) {
+ y = get_parent(get_parent(z))->_right;
+ if (y->color == COLOR_RED) {
+ get_parent(z)->color = COLOR_BLACK;
+ y->color = COLOR_BLACK;
+ get_parent(get_parent(z))->color = COLOR_RED;
+ z = get_parent(get_parent(z));
+ } else {
+ if (z == get_parent(z)->_right) {
+ z = get_parent(z);
+ left_rotate(rbtree, z);
+ z = get_decay(z);
+ assert(!is_decay(rbtree->root));
+ }
+ get_parent(z)->color = COLOR_BLACK;
+ get_parent(get_parent(z))->color = COLOR_RED;
+ assert(!is_decay(z));
+ assert(!is_decay(get_parent(z)));
+ assert(!is_decay(get_parent(get_parent(z))));
+ right_rotate(rbtree, get_parent(get_parent(z)));
+ assert(!is_decay(z));
+ assert(!is_decay(rbtree->root));
+ }
+ } else {
+ y = get_parent(get_parent(z))->_left;
+ if (y->color == COLOR_RED) {
+ get_parent(z)->color = COLOR_BLACK;
+ y->color = COLOR_BLACK;
+ get_parent(get_parent(z))->color = COLOR_RED;
+ z = get_parent(get_parent(z));
+ } else {
+ if (z == get_parent(z)->_left) {
+ z = get_parent(z);
+ right_rotate(rbtree, z);
+ z = get_decay(z);
+ assert(!is_decay(rbtree->root));
+ }
+ get_parent(z)->color = COLOR_BLACK;
+ get_parent(get_parent(z))->color = COLOR_RED;
+ left_rotate(rbtree, get_parent(get_parent(z)));
+ assert(!is_decay(z));
+ assert(!is_decay(rbtree->root));
+ }
+ }
+ }
+ rbtree->root->color = COLOR_BLACK;
+}
+
+/*
+ * rcu_rbtree_insert - Insert a node in the RCU rbtree
+ *
+ * Returns 0 on success, or < 0 on error.
+ */
+int rcu_rbtree_insert(struct rcu_rbtree *rbtree,
+ void *begin, void *end)
+{
+ struct rcu_rbtree_node *x, *y, *z;
+
+ z = _rcu_rbtree_alloc_node(rbtree);
+ if (!z)
+ return -ENOMEM;
+ z->begin = begin;
+ z->end = end;
+
+ dbg_printf("insert %p\n", z->begin);
+ assert(!is_decay(rbtree->root));
+
+ y = make_nil(rbtree);
+ x = rbtree->root;
+ while (!rcu_rbtree_is_nil(rbtree, x)) {
+ y = x;
+ if (rbtree->comp(z->begin, x->begin) < 0)
+ x = x->_left;
+ else
+ x = x->_right;
+ }
+
+ z->_left = make_nil(rbtree);
+ z->_right = make_nil(rbtree);
+ z->color = COLOR_RED;
+ z->decay_next = NULL;
+ z->max_end = z->end;
+ z->rbtree = rbtree;
+
+ if (rcu_rbtree_is_nil(rbtree, y)) {
+ set_parent(z, y, IS_RIGHT); /* pos arbitrary for root node */
+ /*
+ * Order stores to z (children/parents) before stores
+ * that will make it visible to the rest of the tree.
+ */
+ cmm_smp_wmb();
+ _CMM_STORE_SHARED(rbtree->root, z);
+ } else if (rbtree->comp(z->begin, y->begin) < 0) {
+ y = dup_decay_node(rbtree, y);
+ set_parent(z, y, IS_LEFT);
+ if (get_pos(z) == IS_LEFT)
+ _CMM_STORE_SHARED(y->_left, z);
+ else
+ _CMM_STORE_SHARED(y->_right, z);
+ populate_node_end(rbtree, y, 1, NULL);
+ } else {
+ y = dup_decay_node(rbtree, y);
+ set_parent(z, y, IS_RIGHT);
+ if (get_pos(z) == IS_LEFT)
+ _CMM_STORE_SHARED(y->_left, z);
+ else
+ _CMM_STORE_SHARED(y->_right, z);
+ populate_node_end(rbtree, y, 1, NULL);
+ }
+ rcu_rbtree_insert_fixup(rbtree, z);
+ /*
+ * Make sure to commit all _CMM_STORE_SHARED() for non-coherent caches.
+ */
+ cmm_smp_wmc();
+ show_tree(rbtree);
+ check_max_end(rbtree, z);
+ check_max_end(rbtree, y);
+
+ return 0;
+}
+
+/*
+ * Transplant v into u position.
+ */
+
+#ifdef RBTREE_RCU_SUPPORT_TRANSPLANT
+
+static
+void rcu_rbtree_transplant(struct rcu_rbtree *rbtree,
+ struct rcu_rbtree_node *u,
+ struct rcu_rbtree_node *v,
+ unsigned int copy_parents,
+ struct rcu_rbtree_node *stop)
+{
+ dbg_printf("transplant %p\n", v->begin);
+
+ if (!rcu_rbtree_is_nil(rbtree, v))
+ v = dup_decay_node(rbtree, v);
+
+ if (rcu_rbtree_is_nil(rbtree, get_parent(u))) {
+ /* pos is arbitrary for root node */
+ set_parent(v, get_parent(u), IS_RIGHT);
+ cmm_smp_wmb(); /* write into node before publish */
+ _CMM_STORE_SHARED(rbtree->root, v);
+ } else {
+ struct rcu_rbtree_node *vp;
+
+ vp = get_parent(u);
+ if (copy_parents)
+ vp = dup_decay_node(rbtree, vp);
+ set_parent(v, vp, get_pos(u));
+ if (get_pos(v) == IS_LEFT)
+ _CMM_STORE_SHARED(vp->_left, v);
+ else
+ _CMM_STORE_SHARED(vp->_right, v);
+ populate_node_end(rbtree, vp, copy_parents, stop);
+ check_max_end(rbtree, vp);
+ }
+
+ /* Point children to new copy (parent only used by updates/next/prev) */
+ if (!rcu_rbtree_is_nil(rbtree, v)) {
+ set_parent(v->_right, get_decay(get_parent(v->_right)),
+ get_pos(v->_right));
+ set_parent(v->_left, get_decay(get_parent(v->_left)),
+ get_pos(v->_left));
+ }
+ assert(!is_decay(rbtree->root));
+ check_max_end(rbtree, v);
+}
+
+#else
+
+/* Non-RCU version */
+static
+void rcu_rbtree_transplant(struct rcu_rbtree *rbtree,
+ struct rcu_rbtree_node *u,
+ struct rcu_rbtree_node *v,
+ unsigned int copy_parents,
+ struct rcu_rbtree_node *stop)
+{
+ dbg_printf("transplant %p\n", v->begin);
+
+ lock_test_mutex();
+ if (rcu_rbtree_is_nil(rbtree, get_parent(u)))
+ rbtree->root = v;
+ else if (u == get_parent(u)->_left)
+ get_parent(u)->_left = v;
+ else
+ get_parent(u)->_right = v;
+ set_parent(v, get_parent(u), get_pos(u));
+ unlock_test_mutex();
+}
+
+#endif
+
+static void rcu_rbtree_remove_fixup(struct rcu_rbtree *rbtree,
+ struct rcu_rbtree_node *x)
+{
+ dbg_printf("remove fixup %p\n", x->begin);
+
+ while (x != rbtree->root && x->color == COLOR_BLACK) {
+ assert(!is_decay(get_parent(x)));
+ assert(!is_decay(get_parent(x)->_left));
+ if (x == get_parent(x)->_left) {
+ struct rcu_rbtree_node *w;
+
+ w = get_parent(x)->_right;
+
+ if (w->color == COLOR_RED) {
+ w->color = COLOR_BLACK;
+ get_parent(x)->color = COLOR_RED;
+ left_rotate(rbtree, get_parent(x));
+ x = get_decay(x);
+ assert(!is_decay(rbtree->root));
+ w = get_parent(x)->_right;
+ }
+ if (w->_left->color == COLOR_BLACK
+ && w->_right->color == COLOR_BLACK) {
+ w->color = COLOR_RED;
+ x = get_parent(x);
+ assert(!is_decay(rbtree->root));
+ assert(!is_decay(x));
+ } else {
+ if (w->_right->color == COLOR_BLACK) {
+ w->_left->color = COLOR_BLACK;
+ w->color = COLOR_RED;
+ right_rotate(rbtree, w);
+ assert(!is_decay(rbtree->root));
+ x = get_decay(x);
+ w = get_parent(x)->_right;
+ }
+ w->color = get_parent(x)->color;
+ get_parent(x)->color = COLOR_BLACK;
+ w->_right->color = COLOR_BLACK;
+ left_rotate(rbtree, get_parent(x));
+ assert(!is_decay(rbtree->root));
+ x = rbtree->root;
+ }
+ } else {
+ struct rcu_rbtree_node *w;
+
+ w = get_parent(x)->_left;
+
+ if (w->color == COLOR_RED) {
+ w->color = COLOR_BLACK;
+ get_parent(x)->color = COLOR_RED;
+ right_rotate(rbtree, get_parent(x));
+ assert(!is_decay(rbtree->root));
+ x = get_decay(x);
+ w = get_parent(x)->_left;
+ }
+ if (w->_right->color == COLOR_BLACK
+ && w->_left->color == COLOR_BLACK) {
+ w->color = COLOR_RED;
+ x = get_parent(x);
+ assert(!is_decay(rbtree->root));
+ assert(!is_decay(x));
+ } else {
+ if (w->_left->color == COLOR_BLACK) {
+ w->_right->color = COLOR_BLACK;
+ w->color = COLOR_RED;
+ left_rotate(rbtree, w);
+ assert(!is_decay(rbtree->root));
+ x = get_decay(x);
+ w = get_parent(x)->_left;
+ }
+ w->color = get_parent(x)->color;
+ get_parent(x)->color = COLOR_BLACK;
+ w->_left->color = COLOR_BLACK;
+ right_rotate(rbtree, get_parent(x));
+ assert(!is_decay(rbtree->root));
+ x = rbtree->root;
+ }
+ }
+ }
+ x->color = COLOR_BLACK;
+}
+
+/*
+ * Delete z. All non-copied children left/right positions are unchanged.
+ */
+static
+void rcu_rbtree_remove_nonil(struct rcu_rbtree *rbtree,
+ struct rcu_rbtree_node *z,
+ struct rcu_rbtree_node *y)
+{
+ struct rcu_rbtree_node *x;
+
+ dbg_printf("remove nonil %p\n", z->begin);
+ show_tree(rbtree);
+
+ assert(!is_decay(z));
+ assert(!is_decay(y));
+ assert(!is_decay(y->_right));
+ assert(!is_decay(get_parent(y)));
+ x = y->_right;
+ assert(!is_decay(x));
+ if (get_parent(y) == z) {
+ y = dup_decay_node(rbtree, y);
+ set_parent(x, y, get_pos(x)); /* parent for nil */
+ /* y is z's right node */
+ set_left(rbtree, y, z->_left);
+ y->max_end = calculate_node_max_end(rbtree, y);
+ rcu_rbtree_transplant(rbtree, z, y, 1, NULL);
+ } else {
+ struct rcu_rbtree_node *oy_right, *z_right;
+
+ /*
+ * Need to make sure y is always visible by readers.
+ */
+ y = rcu_rbtree_min_dup_decay(rbtree, z->_right, &z_right);
+ assert(!is_decay(y));
+ assert(!is_decay(z));
+ oy_right = y->_right;
+
+ /*
+ * The max child begin of z_right does not change, because
+ * we're only changing its left children.
+ */
+ y->_right = z_right;
+ set_parent(y->_right, y, IS_RIGHT);
+ assert(!is_decay(z->_left));
+ y->_left = z->_left;
+ assert(!is_decay(oy_right));
+ /*
+ * Transplant of oy_right to old y's location will only
+ * trigger a "end" value update of the already copied branch
+ * (which is not visible yet). We are transplanting
+ * oy_right as a left child of old y's parent, so the
+ * min values update propagated upward necessarily stops
+ * at z_right.
+ */
+ rcu_rbtree_transplant(rbtree, y, oy_right, 0, y);
+ y->max_end = calculate_node_max_end(rbtree, y);
+ rcu_rbtree_transplant(rbtree, z, y, 1, NULL);
+ /* Update children */
+ (void) rcu_rbtree_min_update_decay(rbtree, y->_right);
+ }
+ y = get_decay(y);
+ assert(!is_decay(z));
+ assert(!is_decay(z->_left));
+ y->color = z->color;
+ set_parent(y->_left, y, IS_LEFT);
+ set_parent(y->_right, get_decay(get_parent(y->_right)), IS_RIGHT);
+ assert(!is_decay(y->_left));
+ assert(!is_decay(y->_right));
+}
+
+int rcu_rbtree_remove(struct rcu_rbtree *rbtree,
+ struct rcu_rbtree_node *z)
+{
+ struct rcu_rbtree_node *x, *y;
+ unsigned int y_original_color;
+
+ assert(!is_decay(rbtree->root));
+ dbg_printf("remove %p\n", z->begin);
+ show_tree(rbtree);
+
+ assert(!is_decay(z));
+ y = z;
+ y_original_color = y->color;
+
+ if (rcu_rbtree_is_nil(rbtree, z->_left)) {
+ rcu_rbtree_transplant(rbtree, z, z->_right, 1, NULL);
+ assert(!is_decay(z));
+ x = get_decay(z->_right);
+ show_tree(rbtree);
+ } else if (rcu_rbtree_is_nil(rbtree, z->_right)) {
+ rcu_rbtree_transplant(rbtree, z, z->_left, 1, NULL);
+ assert(!is_decay(z));
+ x = get_decay(z->_left);
+ show_tree(rbtree);
+ } else {
+ y = rcu_rbtree_min(rbtree, z->_right);
+ assert(!is_decay(y));
+ y_original_color = y->color;
+ x = y->_right;
+ rcu_rbtree_remove_nonil(rbtree, z, y);
+ x = get_decay(x);
+ show_tree(rbtree);
+ }
+ if (y_original_color == COLOR_BLACK)
+ rcu_rbtree_remove_fixup(rbtree, x);
+ show_tree(rbtree);
+ check_max_end(rbtree, x);
+ check_max_end(rbtree, get_decay(y));
+ /*
+ * Commit all _CMM_STORE_SHARED().
+ */
+ cmm_smp_wmc();
++ rbtree->call_rcu(&z->head, _rcu_rbtree_free_node);
+
+ return 0;
+}