From: Mathieu Desnoyers Date: Fri, 3 Jun 2011 16:57:57 +0000 (-0400) Subject: Merge branch 'master' into rbtree2 X-Git-Url: https://git.lttng.org./?a=commitdiff_plain;h=1cdd423d778cf14415e4929e2b80379fc10c49bc;p=userspace-rcu.git Merge branch 'master' into rbtree2 Conflicts: Makefile.am Edited rbtree API to allow rbtree users to specify which call_rcu should be used. Signed-off-by: Mathieu Desnoyers --- 1cdd423d778cf14415e4929e2b80379fc10c49bc diff --cc Makefile.am index d51ff7a,1589884..413d880 --- a/Makefile.am +++ b/Makefile.am @@@ -30,9 -30,8 +30,8 @@@ COMPAT+=compat_futex. endif lib_LTLIBRARIES = liburcu.la liburcu-qsbr.la liburcu-mb.la liburcu-signal.la \ - liburcu-bp.la liburcu-defer.la liburcu-call.la \ - libwfqueue.la libwfstack.la librculfqueue.la \ - liburcu-rbtree.la librculfstack.la - liburcu-bp.la \ ++ liburcu-bp.la liburcu-rbtree.la \ + libwfqueue.la libwfstack.la librculfqueue.la librculfstack.la liburcu_la_SOURCES = urcu.c urcu-pointer.c $(COMPAT) @@@ -53,4 -49,6 +49,7 @@@ libwfqueue_la_SOURCES = wfqueue.c $(COM libwfstack_la_SOURCES = wfstack.c $(COMPAT) librculfqueue_la_SOURCES = rculfqueue.c $(COMPAT) librculfstack_la_SOURCES = rculfstack.c $(COMPAT) +liburcu_rbtree_la_SOURCES = urcu-rbtree.c $(COMPAT) + + pkgconfigdir = $(libdir)/pkgconfig -pkgconfig_DATA = liburcu.pc liburcu-bp.pc ++pkgconfig_DATA = liburcu.pc liburcu-bp.pc diff --cc tests/Makefile.am index 0abfb8b,5598689..7aced3a --- a/tests/Makefile.am +++ b/tests/Makefile.am @@@ -48,7 -46,6 +47,7 @@@ WFQUEUE_LIB=$(top_builddir)/libwfqueue. WFSTACK_LIB=$(top_builddir)/libwfstack.la RCULFQUEUE_LIB=$(top_builddir)/librculfqueue.la RCULFSTACK_LIB=$(top_builddir)/librculfstack.la - URCU_RBTREE=$(URCU_DEFER) $(top_srcdir)/urcu-rbtree.c ++URCU_RBTREE=$(URCU) $(top_srcdir)/urcu-rbtree.c EXTRA_DIST = $(top_srcdir)/tests/api_*.h diff --cc tests/test_urcu_rbtree.c index a15045b,0000000..6003645 mode 100644,000000..100644 --- a/tests/test_urcu_rbtree.c +++ b/tests/test_urcu_rbtree.c @@@ -1,608 -1,0 +1,608 @@@ +/* + * test_urcu_rbtree.c + * + * Userspace RCU library - test program for RB tree + * + * Copyright February 2010 - Mathieu Desnoyers + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + */ + +#define _GNU_SOURCE +#ifndef DYNAMIC_LINK_TEST +#define _LGPL_SOURCE +#else +#define debug_yield_read() +#endif +#include "../config.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +extern int __thread disable_debug; + +/* hardcoded number of CPUs */ +#define NR_CPUS 16384 + +/* number of insert/delete */ +#define NR_RAND 6 +//#define NR_RAND 7 + +#if defined(_syscall0) +_syscall0(pid_t, gettid) +#elif defined(__NR_gettid) +static inline pid_t gettid(void) +{ + return syscall(__NR_gettid); +} +#else +#warning "use pid as tid" +static inline pid_t gettid(void) +{ + return getpid(); +} +#endif + +#include +#include +#include + +int tree_comp(void *a, void *b) +{ + if ((unsigned long)a < (unsigned long)b) + return -1; + else if ((unsigned long)a > (unsigned long)b) + return 1; + else + return 0; +} + - static DEFINE_RCU_RBTREE(rbtree, tree_comp, malloc, free); ++static DEFINE_RCU_RBTREE(rbtree, tree_comp, malloc, free, call_rcu); + +static volatile int test_go, test_stop; + +static unsigned long wdelay; + +static unsigned long duration; + +/* read-side C.S. duration, in loops */ +static unsigned long rduration; + +/* write-side C.S. duration, in loops */ +static unsigned long wduration; + +static inline void loop_sleep(unsigned long l) +{ + while(l-- != 0) + caa_cpu_relax(); +} + +static int verbose_mode; + +#define printf_verbose(fmt, args...) \ + do { \ + if (verbose_mode) \ + printf(fmt, args); \ + } while (0) + +static unsigned int cpu_affinities[NR_CPUS]; +static unsigned int next_aff = 0; +static int use_affinity = 0; + +pthread_mutex_t affinity_mutex = PTHREAD_MUTEX_INITIALIZER; + +#ifndef HAVE_CPU_SET_T +typedef unsigned long cpu_set_t; +# define CPU_ZERO(cpuset) do { *(cpuset) = 0; } while(0) +# define CPU_SET(cpu, cpuset) do { *(cpuset) |= (1UL << (cpu)); } while(0) +#endif + +static void set_affinity(void) +{ + cpu_set_t mask; + int cpu; + int ret; + + if (!use_affinity) + return; + +#if HAVE_SCHED_SETAFFINITY + ret = pthread_mutex_lock(&affinity_mutex); + if (ret) { + perror("Error in pthread mutex lock"); + exit(-1); + } + cpu = cpu_affinities[next_aff++]; + ret = pthread_mutex_unlock(&affinity_mutex); + if (ret) { + perror("Error in pthread mutex unlock"); + exit(-1); + } + + CPU_ZERO(&mask); + CPU_SET(cpu, &mask); +#if SCHED_SETAFFINITY_ARGS == 2 + sched_setaffinity(0, &mask); +#else + sched_setaffinity(0, sizeof(mask), &mask); +#endif +#endif /* HAVE_SCHED_SETAFFINITY */ +} + +/* + * returns 0 if test should end. + */ +static int test_duration_write(void) +{ + return !test_stop; +} + +static int test_duration_read(void) +{ + return !test_stop; +} + +static unsigned long long __thread nr_writes; +static unsigned long long __thread nr_reads; + +static unsigned int nr_readers; +static unsigned int nr_writers; + +static unsigned long global_items; +static void **global_key = NULL; + +pthread_mutex_t rcu_copy_mutex = PTHREAD_MUTEX_INITIALIZER; + +void rcu_copy_mutex_lock(void) +{ + int ret; + ret = pthread_mutex_lock(&rcu_copy_mutex); + if (ret) { + perror("Error in pthread mutex lock"); + exit(-1); + } +} + +void rcu_copy_mutex_unlock(void) +{ + int ret; + + ret = pthread_mutex_unlock(&rcu_copy_mutex); + if (ret) { + perror("Error in pthread mutex unlock"); + exit(-1); + } +} + +static +void set_lookup_index(struct rcu_rbtree_node *node, + char *lookup_hit) +{ + int i; + + for (i = 0; i < global_items; i++) { + if (node->begin == global_key[i] + && !lookup_hit[i]) { + lookup_hit[i] = 1; + break; + } + } +} + +void *thr_reader(void *_count) +{ + unsigned long long *count = _count; + struct rcu_rbtree_node *node; + int i, index; + char *lookup_hit; + + printf_verbose("thread_begin %s, thread id : %lx, tid %lu\n", + "reader", pthread_self(), (unsigned long)gettid()); + + set_affinity(); + + rcu_register_thread(); + + lookup_hit = malloc(sizeof(*lookup_hit) * global_items); + + while (!test_go) + { + } + cmm_smp_mb(); + + for (;;) { + /* search bottom of range */ + for (i = 0; i < global_items; i++) { + rcu_read_lock(); + node = rcu_rbtree_search(&rbtree, + rcu_dereference(rbtree.root), + global_key[i]); + assert(!rcu_rbtree_is_nil(&rbtree, node)); + rcu_read_unlock(); + } + + /* search end of range */ + for (i = 0; i < global_items; i++) { + rcu_read_lock(); + node = rcu_rbtree_search(&rbtree, + rcu_dereference(rbtree.root), + (void*) ((unsigned long) global_key[i] + 3)); + assert(!rcu_rbtree_is_nil(&rbtree, node)); + rcu_read_unlock(); + } + + /* search range (middle) */ + for (i = 0; i < global_items; i++) { + rcu_read_lock(); + node = rcu_rbtree_search_range(&rbtree, + rcu_dereference(rbtree.root), + (void*) ((unsigned long) global_key[i] + 1), + (void*) ((unsigned long) global_key[i] + 2)); + assert(!rcu_rbtree_is_nil(&rbtree, node)); + rcu_read_unlock(); + } + + /* search begin key */ + for (i = 0; i < global_items; i++) { + rcu_read_lock(); + node = rcu_rbtree_search_begin_key(&rbtree, + rcu_dereference(rbtree.root), + global_key[i]); + assert(!rcu_rbtree_is_nil(&rbtree, node)); + rcu_read_unlock(); + } + + /* min + next */ + memset(lookup_hit, 0, sizeof(*lookup_hit) * global_items); + + rcu_read_lock(); + node = rcu_rbtree_min(&rbtree, + rcu_dereference(rbtree.root)); + while (!rcu_rbtree_is_nil(&rbtree, node)) { + set_lookup_index(node, lookup_hit); + node = rcu_rbtree_next(&rbtree, node); + } + rcu_read_unlock(); + + for (i = 0; i < global_items; i++) + assert(lookup_hit[i]); + + /* max + prev */ + memset(lookup_hit, 0, sizeof(*lookup_hit) * global_items); + + rcu_read_lock(); + node = rcu_rbtree_max(&rbtree, + rcu_dereference(rbtree.root)); + while (!rcu_rbtree_is_nil(&rbtree, node)) { + set_lookup_index(node, lookup_hit); + node = rcu_rbtree_prev(&rbtree, node); + } + rcu_read_unlock(); + + for (i = 0; i < global_items; i++) + assert(lookup_hit[i]); + + debug_yield_read(); + if (unlikely(rduration)) + loop_sleep(rduration); + nr_reads++; + if (unlikely(!test_duration_read())) + break; + } + + rcu_unregister_thread(); + + /* test extra thread registration */ + rcu_register_thread(); + rcu_unregister_thread(); + + free(lookup_hit); + + *count = nr_reads; + printf_verbose("thread_end %s, thread id : %lx, tid %lu\n", + "reader", pthread_self(), (unsigned long)gettid()); + return ((void*)1); + +} + +void *thr_writer(void *_count) +{ + unsigned long long *count = _count; + struct rcu_rbtree_node *node; + void *key[NR_RAND]; + int i; + + printf_verbose("thread_begin %s, thread id : %lx, tid %lu\n", + "writer", pthread_self(), (unsigned long)gettid()); + + set_affinity(); + + //disable_debug = 1; + + rcu_register_thread(); + + while (!test_go) + { + } + cmm_smp_mb(); + + for (;;) { + rcu_copy_mutex_lock(); + + for (i = 0; i < NR_RAND; i++) { + //key[i] = (void *)(unsigned long)(rand() % 2048); + key[i] = (void *)(unsigned long)(((unsigned long) rand() * 4) % 2048); + //For more collisions + //key[i] = (void *)(unsigned long)(rand() % 6); + //node->begin = key[i]; + //node->end = (void *)((unsigned long) key[i] + 1); + //node->end = (void *)((unsigned long) key[i] + 4); + rcu_read_lock(); + rcu_rbtree_insert(&rbtree, key[i], + (void *)((unsigned long) key[i] + 4)); + rcu_read_unlock(); + } + rcu_copy_mutex_unlock(); + + if (unlikely(wduration)) + loop_sleep(wduration); + + rcu_copy_mutex_lock(); + for (i = 0; i < NR_RAND; i++) { +#if 0 + node = rcu_rbtree_min(rbtree, rbtree->root); + while (!rcu_rbtree_is_nil(&rbtree, node)) { + printf("{ 0x%lX p:%lX r:%lX l:%lX %s %s %s} ", + (unsigned long)node->key, + node->p->key, + node->right->key, + node->left->key, + node->color ? "red" : "black", + node->pos ? "right" : "left", + node->nil ? "nil" : ""); + node = rcu_rbtree_next(rbtree, node); + } + printf("\n"); +#endif + rcu_read_lock(); + node = rcu_rbtree_search(&rbtree, rbtree.root, key[i]); + assert(!rcu_rbtree_is_nil(&rbtree, node)); + rcu_rbtree_remove(&rbtree, node); + rcu_read_unlock(); + } + + rcu_copy_mutex_unlock(); + nr_writes++; + if (unlikely(!test_duration_write())) + break; + if (unlikely(wdelay)) + loop_sleep(wdelay); + } + + rcu_unregister_thread(); + + printf_verbose("thread_end %s, thread id : %lx, tid %lu\n", + "writer", pthread_self(), (unsigned long)gettid()); + *count = nr_writes; + return ((void*)2); +} + +void show_usage(int argc, char **argv) +{ + printf("Usage : %s nr_readers nr_writers duration (s)", argv[0]); +#ifdef DEBUG_YIELD + printf(" [-r] [-w] (yield reader and/or writer)"); +#endif + printf(" [-d delay] (writer period (us))"); + printf(" [-c duration] (reader C.S. duration (in loops))"); + printf(" [-e duration] (writer C.S. duration (in loops))"); + printf(" [-v] (verbose output)"); + printf(" [-a cpu#] [-a cpu#]... (affinity)"); + printf("\n"); +} + +int main(int argc, char **argv) +{ + int err; + pthread_t *tid_reader, *tid_writer; + void *tret; + unsigned long long *count_reader, *count_writer; + unsigned long long tot_reads = 0, tot_writes = 0; + int i, a; + struct rcu_rbtree_node *node; + + if (argc < 4) { + show_usage(argc, argv); + return -1; + } + + err = sscanf(argv[1], "%u", &nr_readers); + if (err != 1) { + show_usage(argc, argv); + return -1; + } + + err = sscanf(argv[2], "%u", &nr_writers); + if (err != 1) { + show_usage(argc, argv); + return -1; + } + + err = sscanf(argv[3], "%lu", &duration); + if (err != 1) { + show_usage(argc, argv); + return -1; + } + + for (i = 4; i < argc; i++) { + if (argv[i][0] != '-') + continue; + switch (argv[i][1]) { +#ifdef DEBUG_YIELD + case 'r': + yield_active |= YIELD_READ; + break; + case 'w': + yield_active |= YIELD_WRITE; + break; +#endif + case 'a': + if (argc < i + 2) { + show_usage(argc, argv); + return -1; + } + a = atoi(argv[++i]); + cpu_affinities[next_aff++] = a; + use_affinity = 1; + printf_verbose("Adding CPU %d affinity\n", a); + break; + case 'c': + if (argc < i + 2) { + show_usage(argc, argv); + return -1; + } + rduration = atol(argv[++i]); + break; + case 'd': + if (argc < i + 2) { + show_usage(argc, argv); + return -1; + } + wdelay = atol(argv[++i]); + break; + case 'e': + if (argc < i + 2) { + show_usage(argc, argv); + return -1; + } + wduration = atol(argv[++i]); + break; + case 'v': + verbose_mode = 1; + break; + case 'g': + if (argc < i + 2) { + show_usage(argc, argv); + return -1; + } + global_items = atol(argv[++i]); + break; + } + } + + printf_verbose("running test for %lu seconds, %u readers, %u writers.\n", + duration, nr_readers, nr_writers); + printf_verbose("Writer delay : %lu loops.\n", wdelay); + printf_verbose("Reader duration : %lu loops.\n", rduration); + printf_verbose("thread %-6s, thread id : %lx, tid %lu\n", + "main", pthread_self(), (unsigned long)gettid()); + + tid_reader = malloc(sizeof(*tid_reader) * nr_readers); + tid_writer = malloc(sizeof(*tid_writer) * nr_writers); + count_reader = malloc(sizeof(*count_reader) * nr_readers); + count_writer = malloc(sizeof(*count_writer) * nr_writers); + global_key = malloc(sizeof(*global_key) * global_items); + + srand(time(NULL)); + + next_aff = 0; + + for (i = 0; i < nr_readers; i++) { + err = pthread_create(&tid_reader[i], NULL, thr_reader, + &count_reader[i]); + if (err != 0) + exit(1); + } + for (i = 0; i < nr_writers; i++) { + err = pthread_create(&tid_writer[i], NULL, thr_writer, + &count_writer[i]); + if (err != 0) + exit(1); + } + + rcu_register_thread(); + rcu_read_lock(); + /* Insert items looked up by readers */ + for (i = 0; i < global_items; i++) { + global_key[i] = (void *)(unsigned long)(((unsigned long) rand() * 4) % 2048); + //global_key[i] = (void *)(unsigned long)(rand() % 2048); + //For more collisions + //global_key[i] = (void *)(unsigned long)(rand() % 6); + //node->begin = global_key[i]; + //node->end = (void *)((unsigned long) global_key[i] + 1); + //node->end = (void *)((unsigned long) global_key[i] + 4); + rcu_rbtree_insert(&rbtree, global_key[i], + (void *)((unsigned long) global_key[i] + 4)); + } + rcu_read_unlock(); + + cmm_smp_mb(); + + test_go = 1; + + sleep(duration); + + test_stop = 1; + + for (i = 0; i < nr_readers; i++) { + err = pthread_join(tid_reader[i], &tret); + if (err != 0) + exit(1); + tot_reads += count_reader[i]; + } + for (i = 0; i < nr_writers; i++) { + err = pthread_join(tid_writer[i], &tret); + if (err != 0) + exit(1); + tot_writes += count_writer[i]; + } + + rcu_read_lock(); + for (i = 0; i < global_items; i++) { + node = rcu_rbtree_search(&rbtree, rbtree.root, global_key[i]); + assert(!rcu_rbtree_is_nil(&rbtree, node)); + rcu_rbtree_remove(&rbtree, node); + } + rcu_read_unlock(); + rcu_unregister_thread(); + + printf_verbose("total number of reads : %llu, writes %llu\n", tot_reads, + tot_writes); + printf("SUMMARY %-25s testdur %4lu nr_readers %3u rdur %6lu wdur %6lu " + "nr_writers %3u " + "wdelay %6lu nr_reads %12llu nr_writes %12llu nr_ops %12llu " + "global_items %6lu\n", + argv[0], duration, nr_readers, rduration, wduration, + nr_writers, wdelay, tot_reads, tot_writes, + tot_reads + tot_writes, global_items); + free(tid_reader); + free(tid_writer); + free(count_reader); + free(count_writer); + free(global_key); + return 0; +} diff --cc urcu-rbtree.c index bc6d130,0000000..01733f0 mode 100644,000000..100644 --- a/urcu-rbtree.c +++ b/urcu-rbtree.c @@@ -1,1288 -1,0 +1,1288 @@@ +/* + * urcu-rbtree.c + * + * Userspace RCU library - Red-Black Tree + * + * Copyright (c) 2010 Mathieu Desnoyers + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + * Implementation of RCU-adapted data structures and operations based on the RB + * tree algorithms found in chapter 12 of: + * + * Thomas H. Cormen, Charles E. Leiserson, Ronald L. Rivest, and + * Clifford Stein. Introduction to Algorithms, Third Edition. The MIT + * Press, September 2009. + */ + +#define _BSD_SOURCE +#define _LGPL_SOURCE + +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +/* + * Explanation of next/prev walk coherency and search coherency when + * performed concurrently with updates. + * + * next/prev walk coherency with respect to concurrent updates: + * + * There are 3 scenarios for which we need to model and validate this: + * rotation, transplant and "teleportation" (the latter being a remote + * transplant in a remove non-nil case). + * + * - rotation left (right is symmetric) + * xl and yr point to the same parent nodes before/after left + * rotation. yll and ylr also point to the same parent node + * before/after left rotation. + * As we are copying x, y and yl (the 3 nodes which parent/child + * relationship are changed) to "new" version of this node cluster, + * all external references to the cluster either point to the old + * cluster or the new one. If we take this cluster as a "black box" + * from the point of view of next/prev traversal, all we have to + * ensure is that the old and the new cluster behave in the exact same + * way with respect to traversal order. + * + * - transplant + * In this operation, we transplant a copy of "v" into its parent + * location (u), thus replacing it. The children of "v", vl and vr, + * still point to v (new version) after the transplant, so it does not + * change the behavior when considering the next/prev traversal. "v" + * being copied to a new version ensures that the parent pointers of v + * are pointing to its new parent (parent of u) before it is published + * to readers (by setting the child pointer of u's parent to the new + * copy of v). + * + * - teleportation + * This one is probably the most tricky and will require some ascii + * art to explain. + * + * We want to remove z from this tree: + * + * zp + * \ + * z + * / \ + * zl zr + * / + * a + * / \ + * b ar + * / \ + * y br + * \ + * yr + * / \ + * yrl yrr + * + * What we are going to do is to "teleport" y into z's location, + * reparenting yr to b. We are taking care to create a new cluster + * copy that is isolated from any reader. We will represent the new + * members of the cluster with capital letters. + * + * zp + * \ + * Y + * / \ + * zl ZR + * / + * A + * / \ + * B ar + * / \ + * YR br + * / \ + * yrl yrr + * + * In this transient state, we notice that the pointers within the + * cluster all point to the new cluster nodes, and they point to the + * correct external nodes. However, no external pointer point to the + * cluster (yet). The first pointer to point to this cluster will be + * "zp->right". It will therefore make the cluster visible for search. + * + * In this intermediate state, we can walk through the new cluster + * when coming from the top (in a next/prev traversal), but can come + * back to the old cluster when going back up from the children nodes. + * All we have to ensure is that the two clusters, taken as a black + * box from a next/prev traversal perspective, yield to the exact same + * result. + * + * Search coherency with concurrent updates: + * + * Simple "search" (only going down the tree) is also handled by this + * cluster scheme. The explanation is a subset of the prev/next + * explanation, where we don't have to care about the intermediate + * stages where the children point to the old cluster, because we only + * ever use the top level pointers to go down into the children nodes, + * we never go back up. So by simply making sure that all the cluster + * internal nodes pointers are setup correctly before making the + * cluster visible to the readers (by setting the parent pointer to + * the topmost new node in the cluster), we are sure that readers will + * see a coherent view of the cluster at all times. + */ + +#ifdef DEBUG +#define dbg_printf(args...) printf(args) +#define dbg_usleep(usecs) usleep(usecs) +#else +#define dbg_printf(args...) +#define dbg_usleep(usecs) +#endif + +/* + * Undefine this to enable the non-RCU rotate and transplant functions + * (for debugging). Note that these versions don't support the tree + * max_end updates, so lookups must be performed with + * rcu_rbtree_search_begin_key when using this debug facility. + */ +#define RBTREE_RCU_SUPPORT_ROTATE_LEFT +#define RBTREE_RCU_SUPPORT_ROTATE_RIGHT +#define RBTREE_RCU_SUPPORT_TRANSPLANT + +#ifdef EXTRA_DEBUG +static pthread_mutex_t test_mutex = PTHREAD_MUTEX_INITIALIZER; +static pthread_mutex_t outer_mutex = PTHREAD_MUTEX_INITIALIZER; + +static +void lock_outer_mutex(void) +{ + pthread_mutex_lock(&outer_mutex); +} + +static +void unlock_outer_mutex(void) +{ + pthread_mutex_unlock(&outer_mutex); +} + +static +void lock_test_mutex(void) +{ + pthread_mutex_lock(&test_mutex); +} + +static +void unlock_test_mutex(void) +{ + pthread_mutex_unlock(&test_mutex); +} +#endif + +static +void set_parent(struct rcu_rbtree_node *node, + struct rcu_rbtree_node *parent, + unsigned int pos) +{ + _CMM_STORE_SHARED(node->parent, ((unsigned long) parent) | pos); +} + +static +struct rcu_rbtree_node *get_parent(struct rcu_rbtree_node *node) +{ + return (struct rcu_rbtree_node *) (node->parent & ~1UL); +} + +static +unsigned int get_pos(struct rcu_rbtree_node *node) +{ + return (unsigned int) (node->parent & 1UL); +} + +static +struct rcu_rbtree_node *get_parent_and_pos(struct rcu_rbtree_node *node, + unsigned int *pos) +{ + unsigned long parent_pos = rcu_dereference(node->parent); + + *pos = (unsigned int) (parent_pos & 1UL); + return (struct rcu_rbtree_node *) (parent_pos & ~1UL); +} + +static +void set_decay(struct rcu_rbtree_node *x, struct rcu_rbtree_node *xc) +{ + x->decay_next = xc; +} + +static +struct rcu_rbtree_node *get_decay(struct rcu_rbtree_node *x) +{ + if (!x) + return NULL; + while (x->decay_next) + x = x->decay_next; + return x; +} + +static +struct rcu_rbtree_node *is_decay(struct rcu_rbtree_node *x) +{ + return x->decay_next; +} + +static +struct rcu_rbtree_node *_rcu_rbtree_alloc_node(struct rcu_rbtree *rbtree) +{ + return rbtree->rballoc(sizeof(struct rcu_rbtree_node)); +} + +static +void _rcu_rbtree_free_node(struct rcu_head *head) +{ + struct rcu_rbtree_node *node = + caa_container_of(head, struct rcu_rbtree_node, head); + node->rbtree->rbfree(node); +} + +static +struct rcu_rbtree_node *dup_decay_node(struct rcu_rbtree *rbtree, + struct rcu_rbtree_node *x) +{ + struct rcu_rbtree_node *xc; + + if (rcu_rbtree_is_nil(rbtree, x)) + return x; + + xc = _rcu_rbtree_alloc_node(rbtree); + memcpy(xc, x, sizeof(*xc)); + xc->decay_next = NULL; + set_decay(x, xc); - call_rcu(&x->head, _rcu_rbtree_free_node); ++ rbtree->call_rcu(&x->head, _rcu_rbtree_free_node); + return xc; +} + +/* + * Info for range lookups: + * Range lookup information is only valid when used when searching for + * ranges. It should never be used in next/prev traversal because the + * pointers to parents are not in sync with the parent vision of the + * children range. + */ +static +void set_left(struct rcu_rbtree *rbtree, struct rcu_rbtree_node *node, + struct rcu_rbtree_node *left) +{ + node->_left = left; +} + +static +void set_right(struct rcu_rbtree *rbtree, struct rcu_rbtree_node *node, + struct rcu_rbtree_node *right) +{ + node->_right = right; +} + +static +void *calculate_node_max_end(struct rcu_rbtree *rbtree, struct rcu_rbtree_node *node) +{ + void *max_end; + + max_end = node->end; + if (!rcu_rbtree_is_nil(rbtree, node->_right)) { + if (rbtree->comp(max_end, node->_right->max_end) < 0) + max_end = node->_right->max_end; + } + if (!rcu_rbtree_is_nil(rbtree, node->_left)) { + if (rbtree->comp(max_end, node->_left->max_end) < 0) + max_end = node->_left->max_end; + } + return max_end; +} + +/* + * TODO + * Deal with memory allocation errors. + * Can be ensured by reserving a pool of memory entries before doing the + * insertion, which will have to be function of number of + * transplantations/rotations required for the operation (which is a + * multiple of the tree height). + */ + +#ifdef DEBUG +static +void show_tree(struct rcu_rbtree *rbtree) +{ + struct rcu_rbtree_node *node; + + node = rcu_rbtree_min(rbtree, rbtree->root); + while (!rcu_rbtree_is_nil(rbtree, node)) { + assert(!is_decay(node)); + printf("{ b:%lX e:%lX pb: %lX r:%lX l:%lX %s %s %s} ", + (unsigned long) node->begin, + (unsigned long) node->end, + (unsigned long) get_parent(node)->begin, + (unsigned long) node->_right->begin, + (unsigned long) node->_left->begin, + node->color ? "red" : "black", + get_pos(node) ? "right" : "left", + rcu_rbtree_is_nil(rbtree, node) ? "nil" : ""); + node = rcu_rbtree_next(rbtree, node); + } + printf("\n"); +} + +#define check_max_end(rbtree, x) \ + do { \ + if (rcu_rbtree_is_nil(rbtree, x)) \ + break; \ + assert(rbtree->comp(x->max_end, \ + calculate_node_max_end(rbtree, x)) == 0); \ + } while (0) + +#else /* DEBUG */ +static +void show_tree(struct rcu_rbtree *rbtree) +{ +} + +static +void check_max_end(struct rcu_rbtree *rbtree, struct rcu_rbtree_node *x) +{ +} +#endif /* DEBUG */ + +static +struct rcu_rbtree_node *make_nil(struct rcu_rbtree *rbtree) +{ + return &rbtree->nil_node; +} + +/* + * Iterative rbtree search. + */ +struct rcu_rbtree_node *rcu_rbtree_search(struct rcu_rbtree *rbtree, + struct rcu_rbtree_node *x, + void *point) +{ + struct rcu_rbtree_node *xl; + + dbg_printf("searching point 0x%lx\n", (unsigned long) point); + x = rcu_dereference(x); + + while (!rcu_rbtree_is_nil(rbtree, x)) { + dbg_usleep(10); + xl = rcu_dereference(x->_left); + dbg_printf("search x %lx x_end %lx x_max_end %lx\n", (unsigned long) x->begin, + (unsigned long) x->end, (unsigned long) x->max_end); + dbg_printf("search xl %lx xl_end %lx xl_max_end %lx\n", (unsigned long) xl->begin, + (unsigned long) xl->end, (unsigned long) xl->max_end); + if (!rcu_rbtree_is_nil(rbtree, xl) + && (rbtree->comp(xl->max_end, point) > 0)) { + dbg_printf("go left\n"); + x = xl; + } else if (rbtree->comp(x->begin, point) <= 0 + && rbtree->comp(point, x->end) < 0) { + dbg_printf("got it!\n"); + break; + } else if (rbtree->comp(point, x->begin) > 0) { + dbg_printf("go right\n"); + x = rcu_dereference(x->_right); + } else { + dbg_printf("not found!\n"); + x = make_nil(rbtree); + } + } + if (rcu_rbtree_is_nil(rbtree, x)) + dbg_printf("Reached bottom of tree.\n"); + + return x; +} + +struct rcu_rbtree_node *rcu_rbtree_search_range(struct rcu_rbtree *rbtree, + struct rcu_rbtree_node *x, + void *begin, void *end) +{ + struct rcu_rbtree_node *node; + + node = rcu_rbtree_search(rbtree, x, begin); + if (rcu_rbtree_is_nil(rbtree, node)) + return node; + if (rbtree->comp(node->end, end) < 0) + return NULL; /* High is outside lookup range */ + return node; +} + +/* + * Search by exact range start value. + */ +struct rcu_rbtree_node *rcu_rbtree_search_begin_key(struct rcu_rbtree *rbtree, + struct rcu_rbtree_node *x, + void *k) +{ + x = rcu_dereference(x); + int comp; + + while (!rcu_rbtree_is_nil(rbtree, x) && (comp = rbtree->comp(k, x->begin)) != 0) { + dbg_usleep(10); + if (comp < 0) + x = rcu_dereference(x->_left); + else + x = rcu_dereference(x->_right); + } + return x; +} + +static +struct rcu_rbtree_node *rcu_rbtree_min_dup_decay(struct rcu_rbtree *rbtree, + struct rcu_rbtree_node *x, + struct rcu_rbtree_node **zr) +{ + struct rcu_rbtree_node *xl; + + x = rcu_dereference(x); + + if (rcu_rbtree_is_nil(rbtree, x)) { + *zr = x; + return x; + } else + *zr = x = dup_decay_node(rbtree, x); + + while (!rcu_rbtree_is_nil(rbtree, xl = rcu_dereference(x->_left))) { + x = dup_decay_node(rbtree, xl); + set_parent(x, get_decay(get_parent(x)), get_pos(x)); + get_parent(x)->_left = get_decay(get_parent(x)->_left); + } + return x; +} + +static +struct rcu_rbtree_node *rcu_rbtree_min_update_decay(struct rcu_rbtree *rbtree, + struct rcu_rbtree_node *x) +{ + struct rcu_rbtree_node *xl; + + x = rcu_dereference(x); + + if (rcu_rbtree_is_nil(rbtree, x)) + return x; + else { + set_parent(x->_right, get_decay(get_parent(x->_right)), + get_pos(x->_right)); + set_parent(x->_left, get_decay(get_parent(x->_left)), + get_pos(x->_left)); + } + + while (!rcu_rbtree_is_nil(rbtree, xl = rcu_dereference(x->_left))) { + x = xl; + set_parent(x->_right, get_decay(get_parent(x->_right)), + get_pos(x->_right)); + set_parent(x->_left, get_decay(get_parent(x->_left)), + get_pos(x->_left)); + } + return x; +} + +struct rcu_rbtree_node *rcu_rbtree_min(struct rcu_rbtree *rbtree, + struct rcu_rbtree_node *x) +{ + struct rcu_rbtree_node *xl; + + x = rcu_dereference(x); + + if (rcu_rbtree_is_nil(rbtree, x)) + return x; + + while (!rcu_rbtree_is_nil(rbtree, xl = rcu_dereference(x->_left))) + x = xl; + return x; +} + +struct rcu_rbtree_node *rcu_rbtree_max(struct rcu_rbtree *rbtree, + struct rcu_rbtree_node *x) +{ + struct rcu_rbtree_node *xr; + + x = rcu_dereference(x); + + if (rcu_rbtree_is_nil(rbtree, x)) + return x; + + while (!rcu_rbtree_is_nil(rbtree, xr = rcu_dereference(x->_right))) + x = xr; + return x; +} + +/* + * RCU read lock must be held across the next/prev calls to ensure validity of + * the returned node. + */ +struct rcu_rbtree_node *rcu_rbtree_next(struct rcu_rbtree *rbtree, + struct rcu_rbtree_node *x) +{ + struct rcu_rbtree_node *xr, *y; + unsigned int x_pos; + + x = rcu_dereference(x); + + if (!rcu_rbtree_is_nil(rbtree, xr = rcu_dereference(x->_right))) + return rcu_rbtree_min(rbtree, xr); + y = get_parent_and_pos(x, &x_pos); + while (!rcu_rbtree_is_nil(rbtree, y) && x_pos == IS_RIGHT) { + x = y; + y = get_parent_and_pos(y, &x_pos); + } + return y; +} + +struct rcu_rbtree_node *rcu_rbtree_prev(struct rcu_rbtree *rbtree, + struct rcu_rbtree_node *x) +{ + struct rcu_rbtree_node *xl, *y; + unsigned int x_pos; + + x = rcu_dereference(x); + + if (!rcu_rbtree_is_nil(rbtree, xl = rcu_dereference(x->_left))) + return rcu_rbtree_max(rbtree, xl); + y = get_parent_and_pos(x, &x_pos); + while (!rcu_rbtree_is_nil(rbtree, y) && x_pos == IS_LEFT) { + x = y; + y = get_parent_and_pos(y, &x_pos); + } + return y; +} + +/* + * "node" needs to be non-visible by readers. + */ +static +void populate_node_end(struct rcu_rbtree *rbtree, struct rcu_rbtree_node *node, + unsigned int copy_parents, struct rcu_rbtree_node *stop) +{ + struct rcu_rbtree_node *prev = NULL, *orig_node = node, *top; + + do { + void *max_end; + + assert(node); + assert(!rcu_rbtree_is_nil(rbtree, node)); + + if (prev && copy_parents) { + node = dup_decay_node(rbtree, node); + if (get_pos(prev) == IS_RIGHT) + node->_right = prev; + else + node->_left = prev; + set_parent(prev, node, get_pos(prev)); + } + + max_end = calculate_node_max_end(rbtree, node); + /* + * Compare the node max_end keys to make sure we replace + * references to a key belonging to a node we remove + * from the tree. Otherwise we would still be using this + * pointer as an invalid reference after garbage + * collection of the node and of its associated + * begin/end pointers. + */ + if (max_end != node->max_end) { + node->max_end = max_end; + } else { + top = get_parent(node); + cmm_smp_wmb(); /* write into node before publish */ + /* make new branch visible to readers */ + if (rcu_rbtree_is_nil(rbtree, top)) + _CMM_STORE_SHARED(rbtree->root, node); + if (get_pos(node) == IS_LEFT) + _CMM_STORE_SHARED(top->_left, node); + else + _CMM_STORE_SHARED(top->_right, node); + goto end; + } + + /* Check for propagation stop */ + if (node == stop) + return; + + prev = node; + node = get_parent(node); + } while (!rcu_rbtree_is_nil(rbtree, node)); + + top = node; /* nil */ + cmm_smp_wmb(); /* write into node before publish */ + /* make new branch visible to readers */ + _CMM_STORE_SHARED(rbtree->root, prev); + +end: + if (!copy_parents) + return; + /* update children */ + node = orig_node; + do { + assert(!rcu_rbtree_is_nil(rbtree, node)); + set_parent(node->_left, get_decay(get_parent(node->_left)), IS_LEFT); + set_parent(node->_right, get_decay(get_parent(node->_right)), IS_RIGHT); + } while ((node = get_parent(node)) != top); +} + +/* + * We have to ensure these assumptions are correct for prev/next + * traversal: + * + * with x being a right child, the assumption that: + * get_parent(x)->_right == x + * or if x is a left child, the assumption that: + * get_parent(x)->_left == x + * + * This explains why we have to allocate a vc copy of the node for left_rotate, + * right_rotate and transplant operations. + * + * We always ensure that the right/left child and correct parent is set in the + * node copies *before* we reparent the children and make the upper-level point + * to the copy. + */ + +/* RCU: copy x and y, atomically point to new versions. GC old. */ +/* Should be eventually followed by a cmm_smp_wmc() */ + +#ifdef RBTREE_RCU_SUPPORT_ROTATE_LEFT + +static +void left_rotate(struct rcu_rbtree *rbtree, + struct rcu_rbtree_node *x) +{ + struct rcu_rbtree_node *y, *y_left; + + dbg_printf("left rotate %lx\n", (unsigned long) x->begin); + + y = x->_right; + y_left = y->_left; + + /* Now operate on new copy, decay old versions */ + x = dup_decay_node(rbtree, x); + y = dup_decay_node(rbtree, y); + y_left = dup_decay_node(rbtree, y_left); + + check_max_end(rbtree, get_parent(x)); + check_max_end(rbtree, x); + check_max_end(rbtree, y); + + /* Internal node modifications */ + set_parent(y, get_parent(x), get_pos(x)); + set_parent(x, y, IS_LEFT); + set_left(rbtree, y, x); + set_right(rbtree, x, y_left); + + if (!rcu_rbtree_is_nil(rbtree, y_left)) + set_parent(y_left, x, IS_RIGHT); + + /* + * We only changed the relative position of x and y wrt their + * children, and reparented y (but are keeping the same nodes in + * place, so its parent does not need to have end value + * recalculated). + */ + x->max_end = calculate_node_max_end(rbtree, x); + y->max_end = calculate_node_max_end(rbtree, y); + + cmm_smp_wmb(); /* write into node before publish */ + + /* External references update (visible by readers) */ + if (rcu_rbtree_is_nil(rbtree, get_parent(y))) + _CMM_STORE_SHARED(rbtree->root, y); + else if (get_pos(y) == IS_LEFT) + _CMM_STORE_SHARED(get_parent(y)->_left, y); + else + _CMM_STORE_SHARED(get_parent(y)->_right, y); + + /* Point children to new copy (parent only used by updates/next/prev) */ + set_parent(x->_left, get_decay(get_parent(x->_left)), + get_pos(x->_left)); + set_parent(y->_right, get_decay(get_parent(y->_right)), + get_pos(y->_right)); + if (!rcu_rbtree_is_nil(rbtree, y_left)) { + set_parent(y_left->_right, + get_decay(get_parent(y_left->_right)), + get_pos(y_left->_right)); + set_parent(y_left->_left, + get_decay(get_parent(y_left->_left)), + get_pos(y_left->_left)); + } + + /* Sanity checks */ + assert(y == rbtree->root || get_parent(y)->_left == y + || get_parent(y)->_right == y); + assert(x == rbtree->root || get_parent(x)->_left == x + || get_parent(x)->_right == x); + assert(rcu_rbtree_is_nil(rbtree, x->_right) || get_parent(x->_right) == x); + assert(rcu_rbtree_is_nil(rbtree, x->_left) || get_parent(x->_left) == x); + assert(rcu_rbtree_is_nil(rbtree, y->_right) || get_parent(y->_right) == y); + assert(rcu_rbtree_is_nil(rbtree, y->_left) || get_parent(y->_left) == y); + assert(!is_decay(rbtree->root)); + assert(!is_decay(x)); + assert(!is_decay(y)); + assert(!is_decay(x->_right)); + assert(!is_decay(x->_left)); + assert(!is_decay(y->_right)); + assert(!is_decay(y->_left)); + check_max_end(rbtree, get_parent(y)); + check_max_end(rbtree, x); + check_max_end(rbtree, y); +} + +#else + +/* non-rcu version */ +static +void left_rotate(struct rcu_rbtree *rbtree, + struct rcu_rbtree_node *x) +{ + struct rcu_rbtree_node *y; + + lock_test_mutex(); + y = x->_right; + x->_right = y->_left; + if (!rcu_rbtree_is_nil(rbtree, y->_left)) + set_parent(y->_left, x, IS_RIGHT); + set_parent(y, get_parent(x), get_pos(x)); + if (rcu_rbtree_is_nil(rbtree, get_parent(x))) + rbtree->root = y; + else if (x == get_parent(x)->_left) { + get_parent(x)->_left = y; + } else { + get_parent(x)->_right = y; + } + y->_left = x; + set_parent(x, y, IS_LEFT); + unlock_test_mutex(); +} + +#endif + +#ifdef RBTREE_RCU_SUPPORT_ROTATE_RIGHT +static +void right_rotate(struct rcu_rbtree *rbtree, + struct rcu_rbtree_node *x) +{ + struct rcu_rbtree_node *y, *y_right; + + dbg_printf("right rotate %lx\n", (unsigned long) x->begin); + + y = x->_left; + y_right = y->_right; + + /* Now operate on new copy, decay old versions */ + x = dup_decay_node(rbtree, x); + y = dup_decay_node(rbtree, y); + y_right = dup_decay_node(rbtree, y_right); + + check_max_end(rbtree, get_parent(x)); + check_max_end(rbtree, x); + check_max_end(rbtree, y); + + /* Internal node modifications */ + set_parent(y, get_parent(x), get_pos(x)); + set_parent(x, y, IS_RIGHT); + set_right(rbtree, y, x); + set_left(rbtree, x, y_right); + + if (!rcu_rbtree_is_nil(rbtree, y_right)) + set_parent(y_right, x, IS_LEFT); + + /* + * We only changed the relative position of x and y wrt their + * children, and reparented y (but are keeping the same nodes in + * place, so its parent does not need to have end value + * recalculated). + */ + x->max_end = calculate_node_max_end(rbtree, x); + y->max_end = calculate_node_max_end(rbtree, y); + + cmm_smp_wmb(); /* write into node before publish */ + + /* External references update (visible by readers) */ + if (rcu_rbtree_is_nil(rbtree, get_parent(y))) + _CMM_STORE_SHARED(rbtree->root, y); + else if (get_pos(y) == IS_RIGHT) + _CMM_STORE_SHARED(get_parent(y)->_right, y); + else + _CMM_STORE_SHARED(get_parent(y)->_left, y); + + /* Point children to new copy (parent only used by updates/next/prev) */ + set_parent(x->_right, get_decay(get_parent(x->_right)), + get_pos(x->_right)); + set_parent(y->_left, get_decay(get_parent(y->_left)), + get_pos(y->_left)); + if (!rcu_rbtree_is_nil(rbtree, y_right)) { + set_parent(y_right->_left, + get_decay(get_parent(y_right->_left)), + get_pos(y_right->_left)); + set_parent(y_right->_right, + get_decay(get_parent(y_right->_right)), + get_pos(y_right->_right)); + } + + /* Sanity checks */ + assert(y == rbtree->root || get_parent(y)->_right == y + || get_parent(y)->_left == y); + assert(x == rbtree->root || get_parent(x)->_right == x + || get_parent(x)->_left == x); + assert(rcu_rbtree_is_nil(rbtree, x->_left) || get_parent(x->_left) == x); + assert(rcu_rbtree_is_nil(rbtree, x->_right) || get_parent(x->_right) == x); + assert(rcu_rbtree_is_nil(rbtree, y->_left) || get_parent(y->_left) == y); + assert(rcu_rbtree_is_nil(rbtree, y->_right) || get_parent(y->_right) == y); + assert(!is_decay(rbtree->root)); + assert(!is_decay(x)); + assert(!is_decay(y)); + assert(!is_decay(x->_left)); + assert(!is_decay(x->_right)); + assert(!is_decay(y->_left)); + assert(!is_decay(y->_right)); + check_max_end(rbtree, x); + check_max_end(rbtree, y); + check_max_end(rbtree, get_parent(y)); +} + +#else + +/* non-rcu version */ +static +void right_rotate(struct rcu_rbtree *rbtree, + struct rcu_rbtree_node *x) +{ + struct rcu_rbtree_node *y; + + lock_test_mutex(); + y = x->_left; + x->_left = y->_right; + if (!rcu_rbtree_is_nil(rbtree, y->_right)) + set_parent(y->_right, x, IS_LEFT); + set_parent(y, get_parent(x), get_pos(x)); + if (rcu_rbtree_is_nil(rbtree, get_parent(x))) + rbtree->root = y; + else if (x == get_parent(x)->_right) { + get_parent(x)->_right = y; + } else { + get_parent(x)->_left = y; + } + y->_right = x; + set_parent(x, y, IS_RIGHT); + unlock_test_mutex(); +} + +#endif + +static void rcu_rbtree_insert_fixup(struct rcu_rbtree *rbtree, + struct rcu_rbtree_node *z) +{ + struct rcu_rbtree_node *y; + + dbg_printf("insert fixup %p\n", z->begin); + assert(!is_decay(rbtree->root)); + + while (get_parent(z)->color == COLOR_RED) { + if (get_parent(z) == get_parent(get_parent(z))->_left) { + y = get_parent(get_parent(z))->_right; + if (y->color == COLOR_RED) { + get_parent(z)->color = COLOR_BLACK; + y->color = COLOR_BLACK; + get_parent(get_parent(z))->color = COLOR_RED; + z = get_parent(get_parent(z)); + } else { + if (z == get_parent(z)->_right) { + z = get_parent(z); + left_rotate(rbtree, z); + z = get_decay(z); + assert(!is_decay(rbtree->root)); + } + get_parent(z)->color = COLOR_BLACK; + get_parent(get_parent(z))->color = COLOR_RED; + assert(!is_decay(z)); + assert(!is_decay(get_parent(z))); + assert(!is_decay(get_parent(get_parent(z)))); + right_rotate(rbtree, get_parent(get_parent(z))); + assert(!is_decay(z)); + assert(!is_decay(rbtree->root)); + } + } else { + y = get_parent(get_parent(z))->_left; + if (y->color == COLOR_RED) { + get_parent(z)->color = COLOR_BLACK; + y->color = COLOR_BLACK; + get_parent(get_parent(z))->color = COLOR_RED; + z = get_parent(get_parent(z)); + } else { + if (z == get_parent(z)->_left) { + z = get_parent(z); + right_rotate(rbtree, z); + z = get_decay(z); + assert(!is_decay(rbtree->root)); + } + get_parent(z)->color = COLOR_BLACK; + get_parent(get_parent(z))->color = COLOR_RED; + left_rotate(rbtree, get_parent(get_parent(z))); + assert(!is_decay(z)); + assert(!is_decay(rbtree->root)); + } + } + } + rbtree->root->color = COLOR_BLACK; +} + +/* + * rcu_rbtree_insert - Insert a node in the RCU rbtree + * + * Returns 0 on success, or < 0 on error. + */ +int rcu_rbtree_insert(struct rcu_rbtree *rbtree, + void *begin, void *end) +{ + struct rcu_rbtree_node *x, *y, *z; + + z = _rcu_rbtree_alloc_node(rbtree); + if (!z) + return -ENOMEM; + z->begin = begin; + z->end = end; + + dbg_printf("insert %p\n", z->begin); + assert(!is_decay(rbtree->root)); + + y = make_nil(rbtree); + x = rbtree->root; + while (!rcu_rbtree_is_nil(rbtree, x)) { + y = x; + if (rbtree->comp(z->begin, x->begin) < 0) + x = x->_left; + else + x = x->_right; + } + + z->_left = make_nil(rbtree); + z->_right = make_nil(rbtree); + z->color = COLOR_RED; + z->decay_next = NULL; + z->max_end = z->end; + z->rbtree = rbtree; + + if (rcu_rbtree_is_nil(rbtree, y)) { + set_parent(z, y, IS_RIGHT); /* pos arbitrary for root node */ + /* + * Order stores to z (children/parents) before stores + * that will make it visible to the rest of the tree. + */ + cmm_smp_wmb(); + _CMM_STORE_SHARED(rbtree->root, z); + } else if (rbtree->comp(z->begin, y->begin) < 0) { + y = dup_decay_node(rbtree, y); + set_parent(z, y, IS_LEFT); + if (get_pos(z) == IS_LEFT) + _CMM_STORE_SHARED(y->_left, z); + else + _CMM_STORE_SHARED(y->_right, z); + populate_node_end(rbtree, y, 1, NULL); + } else { + y = dup_decay_node(rbtree, y); + set_parent(z, y, IS_RIGHT); + if (get_pos(z) == IS_LEFT) + _CMM_STORE_SHARED(y->_left, z); + else + _CMM_STORE_SHARED(y->_right, z); + populate_node_end(rbtree, y, 1, NULL); + } + rcu_rbtree_insert_fixup(rbtree, z); + /* + * Make sure to commit all _CMM_STORE_SHARED() for non-coherent caches. + */ + cmm_smp_wmc(); + show_tree(rbtree); + check_max_end(rbtree, z); + check_max_end(rbtree, y); + + return 0; +} + +/* + * Transplant v into u position. + */ + +#ifdef RBTREE_RCU_SUPPORT_TRANSPLANT + +static +void rcu_rbtree_transplant(struct rcu_rbtree *rbtree, + struct rcu_rbtree_node *u, + struct rcu_rbtree_node *v, + unsigned int copy_parents, + struct rcu_rbtree_node *stop) +{ + dbg_printf("transplant %p\n", v->begin); + + if (!rcu_rbtree_is_nil(rbtree, v)) + v = dup_decay_node(rbtree, v); + + if (rcu_rbtree_is_nil(rbtree, get_parent(u))) { + /* pos is arbitrary for root node */ + set_parent(v, get_parent(u), IS_RIGHT); + cmm_smp_wmb(); /* write into node before publish */ + _CMM_STORE_SHARED(rbtree->root, v); + } else { + struct rcu_rbtree_node *vp; + + vp = get_parent(u); + if (copy_parents) + vp = dup_decay_node(rbtree, vp); + set_parent(v, vp, get_pos(u)); + if (get_pos(v) == IS_LEFT) + _CMM_STORE_SHARED(vp->_left, v); + else + _CMM_STORE_SHARED(vp->_right, v); + populate_node_end(rbtree, vp, copy_parents, stop); + check_max_end(rbtree, vp); + } + + /* Point children to new copy (parent only used by updates/next/prev) */ + if (!rcu_rbtree_is_nil(rbtree, v)) { + set_parent(v->_right, get_decay(get_parent(v->_right)), + get_pos(v->_right)); + set_parent(v->_left, get_decay(get_parent(v->_left)), + get_pos(v->_left)); + } + assert(!is_decay(rbtree->root)); + check_max_end(rbtree, v); +} + +#else + +/* Non-RCU version */ +static +void rcu_rbtree_transplant(struct rcu_rbtree *rbtree, + struct rcu_rbtree_node *u, + struct rcu_rbtree_node *v, + unsigned int copy_parents, + struct rcu_rbtree_node *stop) +{ + dbg_printf("transplant %p\n", v->begin); + + lock_test_mutex(); + if (rcu_rbtree_is_nil(rbtree, get_parent(u))) + rbtree->root = v; + else if (u == get_parent(u)->_left) + get_parent(u)->_left = v; + else + get_parent(u)->_right = v; + set_parent(v, get_parent(u), get_pos(u)); + unlock_test_mutex(); +} + +#endif + +static void rcu_rbtree_remove_fixup(struct rcu_rbtree *rbtree, + struct rcu_rbtree_node *x) +{ + dbg_printf("remove fixup %p\n", x->begin); + + while (x != rbtree->root && x->color == COLOR_BLACK) { + assert(!is_decay(get_parent(x))); + assert(!is_decay(get_parent(x)->_left)); + if (x == get_parent(x)->_left) { + struct rcu_rbtree_node *w; + + w = get_parent(x)->_right; + + if (w->color == COLOR_RED) { + w->color = COLOR_BLACK; + get_parent(x)->color = COLOR_RED; + left_rotate(rbtree, get_parent(x)); + x = get_decay(x); + assert(!is_decay(rbtree->root)); + w = get_parent(x)->_right; + } + if (w->_left->color == COLOR_BLACK + && w->_right->color == COLOR_BLACK) { + w->color = COLOR_RED; + x = get_parent(x); + assert(!is_decay(rbtree->root)); + assert(!is_decay(x)); + } else { + if (w->_right->color == COLOR_BLACK) { + w->_left->color = COLOR_BLACK; + w->color = COLOR_RED; + right_rotate(rbtree, w); + assert(!is_decay(rbtree->root)); + x = get_decay(x); + w = get_parent(x)->_right; + } + w->color = get_parent(x)->color; + get_parent(x)->color = COLOR_BLACK; + w->_right->color = COLOR_BLACK; + left_rotate(rbtree, get_parent(x)); + assert(!is_decay(rbtree->root)); + x = rbtree->root; + } + } else { + struct rcu_rbtree_node *w; + + w = get_parent(x)->_left; + + if (w->color == COLOR_RED) { + w->color = COLOR_BLACK; + get_parent(x)->color = COLOR_RED; + right_rotate(rbtree, get_parent(x)); + assert(!is_decay(rbtree->root)); + x = get_decay(x); + w = get_parent(x)->_left; + } + if (w->_right->color == COLOR_BLACK + && w->_left->color == COLOR_BLACK) { + w->color = COLOR_RED; + x = get_parent(x); + assert(!is_decay(rbtree->root)); + assert(!is_decay(x)); + } else { + if (w->_left->color == COLOR_BLACK) { + w->_right->color = COLOR_BLACK; + w->color = COLOR_RED; + left_rotate(rbtree, w); + assert(!is_decay(rbtree->root)); + x = get_decay(x); + w = get_parent(x)->_left; + } + w->color = get_parent(x)->color; + get_parent(x)->color = COLOR_BLACK; + w->_left->color = COLOR_BLACK; + right_rotate(rbtree, get_parent(x)); + assert(!is_decay(rbtree->root)); + x = rbtree->root; + } + } + } + x->color = COLOR_BLACK; +} + +/* + * Delete z. All non-copied children left/right positions are unchanged. + */ +static +void rcu_rbtree_remove_nonil(struct rcu_rbtree *rbtree, + struct rcu_rbtree_node *z, + struct rcu_rbtree_node *y) +{ + struct rcu_rbtree_node *x; + + dbg_printf("remove nonil %p\n", z->begin); + show_tree(rbtree); + + assert(!is_decay(z)); + assert(!is_decay(y)); + assert(!is_decay(y->_right)); + assert(!is_decay(get_parent(y))); + x = y->_right; + assert(!is_decay(x)); + if (get_parent(y) == z) { + y = dup_decay_node(rbtree, y); + set_parent(x, y, get_pos(x)); /* parent for nil */ + /* y is z's right node */ + set_left(rbtree, y, z->_left); + y->max_end = calculate_node_max_end(rbtree, y); + rcu_rbtree_transplant(rbtree, z, y, 1, NULL); + } else { + struct rcu_rbtree_node *oy_right, *z_right; + + /* + * Need to make sure y is always visible by readers. + */ + y = rcu_rbtree_min_dup_decay(rbtree, z->_right, &z_right); + assert(!is_decay(y)); + assert(!is_decay(z)); + oy_right = y->_right; + + /* + * The max child begin of z_right does not change, because + * we're only changing its left children. + */ + y->_right = z_right; + set_parent(y->_right, y, IS_RIGHT); + assert(!is_decay(z->_left)); + y->_left = z->_left; + assert(!is_decay(oy_right)); + /* + * Transplant of oy_right to old y's location will only + * trigger a "end" value update of the already copied branch + * (which is not visible yet). We are transplanting + * oy_right as a left child of old y's parent, so the + * min values update propagated upward necessarily stops + * at z_right. + */ + rcu_rbtree_transplant(rbtree, y, oy_right, 0, y); + y->max_end = calculate_node_max_end(rbtree, y); + rcu_rbtree_transplant(rbtree, z, y, 1, NULL); + /* Update children */ + (void) rcu_rbtree_min_update_decay(rbtree, y->_right); + } + y = get_decay(y); + assert(!is_decay(z)); + assert(!is_decay(z->_left)); + y->color = z->color; + set_parent(y->_left, y, IS_LEFT); + set_parent(y->_right, get_decay(get_parent(y->_right)), IS_RIGHT); + assert(!is_decay(y->_left)); + assert(!is_decay(y->_right)); +} + +int rcu_rbtree_remove(struct rcu_rbtree *rbtree, + struct rcu_rbtree_node *z) +{ + struct rcu_rbtree_node *x, *y; + unsigned int y_original_color; + + assert(!is_decay(rbtree->root)); + dbg_printf("remove %p\n", z->begin); + show_tree(rbtree); + + assert(!is_decay(z)); + y = z; + y_original_color = y->color; + + if (rcu_rbtree_is_nil(rbtree, z->_left)) { + rcu_rbtree_transplant(rbtree, z, z->_right, 1, NULL); + assert(!is_decay(z)); + x = get_decay(z->_right); + show_tree(rbtree); + } else if (rcu_rbtree_is_nil(rbtree, z->_right)) { + rcu_rbtree_transplant(rbtree, z, z->_left, 1, NULL); + assert(!is_decay(z)); + x = get_decay(z->_left); + show_tree(rbtree); + } else { + y = rcu_rbtree_min(rbtree, z->_right); + assert(!is_decay(y)); + y_original_color = y->color; + x = y->_right; + rcu_rbtree_remove_nonil(rbtree, z, y); + x = get_decay(x); + show_tree(rbtree); + } + if (y_original_color == COLOR_BLACK) + rcu_rbtree_remove_fixup(rbtree, x); + show_tree(rbtree); + check_max_end(rbtree, x); + check_max_end(rbtree, get_decay(y)); + /* + * Commit all _CMM_STORE_SHARED(). + */ + cmm_smp_wmc(); - call_rcu(&z->head, _rcu_rbtree_free_node); ++ rbtree->call_rcu(&z->head, _rcu_rbtree_free_node); + + return 0; +} diff --cc urcu/rcurbtree.h index db99b33,0000000..4ee639e mode 100644,000000..100644 --- a/urcu/rcurbtree.h +++ b/urcu/rcurbtree.h @@@ -1,203 -1,0 +1,206 @@@ +#ifndef URCU_RBTREE_H +#define URCU_RBTREE_H + +/* + * urcu-rbtree.h + * + * Userspace RCU library - Red-Black Tree + * + * Copyright (c) 2010 Mathieu Desnoyers + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + * Implementation of RCU-adapted data structures and operations based on the RB + * tree algorithms found in chapter 12 of: + * + * Thomas H. Cormen, Charles E. Leiserson, Ronald L. Rivest, and + * Clifford Stein. Introduction to Algorithms, Third Edition. The MIT + * Press, September 2009. + */ + +#include +#include + +#define COLOR_BLACK 0 +#define COLOR_RED 1 + +#define IS_LEFT 0 +#define IS_RIGHT 1 + +/* + * Node key comparison function. + * < 0 : a lower than b. + * > 0 : a greater than b. + * == 0 : a equals b. + */ +typedef int (*rcu_rbtree_comp)(void *a, void *b); + +/* + * Allocation and deletion functions. + */ +typedef void *(*rcu_rbtree_alloc)(size_t size); +typedef void (*rcu_rbtree_free)(void *ptr); + +/* + * struct rcu_rbtree_node must be aligned at least on 2 bytes. + * Lowest bit reserved for position (left/right) in pointer to parent. + * + * Set "high" to key + 1 to insert single-value nodes. + */ +struct rcu_rbtree_node { + /* useful node information returned by search */ + void *begin; /* Start of range (inclusive) */ + void *end; /* range end (exclusive) */ + /* internally reserved */ + void *max_end; /* max range end of node and children */ + /* parent uses low bit for "0 -> is left, 1 -> is right" */ + unsigned long parent; + /* _left and _right must be updated with set_left(), set_right() */ + struct rcu_rbtree_node *_left, *_right; + struct rcu_rbtree_node *decay_next; + struct rcu_rbtree *rbtree; + struct rcu_head head; /* For delayed free */ + unsigned int color:1; +}; + +struct rcu_rbtree { + struct rcu_rbtree_node *root; + struct rcu_rbtree_node nil_node; + rcu_rbtree_comp comp; + rcu_rbtree_alloc rballoc; + rcu_rbtree_free rbfree; ++ void (*call_rcu)(struct rcu_head *head, ++ void (*func)(struct rcu_head *head)); +}; + - #define DEFINE_RCU_RBTREE(x, _comp, _rballoc, _rbfree) \ ++#define DEFINE_RCU_RBTREE(x, _comp, _rballoc, _rbfree, _call_rcu) \ + struct rcu_rbtree x = \ + { \ + .comp = _comp, \ + .rballoc = _rballoc, \ + .rbfree = _rbfree, \ ++ .call_rcu = _call_rcu, \ + .nil_node = { \ + .color = COLOR_BLACK, \ + }, \ + .root = &(x).nil_node, \ + }; + +/* + * Each of the search primitive and "prev"/"next" iteration must be called with + * the RCU read-side lock held. + * + * Insertion and removal must be protected by a mutex. At the moment, insertion + * and removal use defer_rcu, so calling them with rcu read-lock held is + * prohibited. + */ + +/* + * Node insertion. Returns 0 on success. May fail with -ENOMEM. + * Caller must have exclusive write access and hold RCU read-side lock. + */ +int rcu_rbtree_insert(struct rcu_rbtree *rbtree, + void *begin, void *end); + +/* + * Remove node from tree. + * Must wait for a grace period after removal before performing deletion of the + * node. Note: it is illegal to re-use the same node pointer passed to "insert" + * also to "remove", because it may have been copied and garbage-collected since + * the insertion. A "search" for the key in the tree should be done to get + * "node". + * Returns 0 on success. May fail with -ENOMEM. + * + * Caller must have exclusive write access and hold RCU read-side lock + * across "search" and "remove". + */ +int rcu_rbtree_remove(struct rcu_rbtree *rbtree, + struct rcu_rbtree_node *node); + +/* RCU read-side */ + +/* + * For all these read-side privimitives, RCU read-side lock must be held + * across the duration for which the search is done and the returned + * rbtree node is expected to be valid. + */ + +/* + * Search point in range starting from node x (node x is typically the + * rbtree root node). Returns nil node if not found. + */ +struct rcu_rbtree_node *rcu_rbtree_search(struct rcu_rbtree *rbtree, + struct rcu_rbtree_node *x, + void *point); + +/* + * Search range starting from node x (typically the rbtree root node). + * Returns the first range containing the range specified as parameters. + * Returns nil node if not found. + * + * Note: ranges in the rbtree should not partially overlap when this search + * range function is used. Otherwise, a range matching the low value (but not + * containing the high value) could hide a range that would match this query. + * It is OK for the ranges to overlap entirely though. + */ +struct rcu_rbtree_node *rcu_rbtree_search_range(struct rcu_rbtree *rbtree, + struct rcu_rbtree_node *x, + void *begin, void *end); + +/* + * Search exact range begin value starting from node x (typically rbtree + * root node). Returns nil node if not found. + * This function is only useful if you do not use the range feature at + * all and only care about range begin values. + */ +struct rcu_rbtree_node *rcu_rbtree_search_begin_key(struct rcu_rbtree *rbtree, + struct rcu_rbtree_node *x, + void *key); + +/* + * Search for minimum node of the tree under node x. + */ +struct rcu_rbtree_node *rcu_rbtree_min(struct rcu_rbtree *rbtree, + struct rcu_rbtree_node *x); + +/* + * Search for maximum node of the tree under node x. + */ +struct rcu_rbtree_node *rcu_rbtree_max(struct rcu_rbtree *rbtree, + struct rcu_rbtree_node *x); + +/* + * Get next node after node x. + */ +struct rcu_rbtree_node *rcu_rbtree_next(struct rcu_rbtree *rbtree, + struct rcu_rbtree_node *x); + +/* + * Get previous node before node x. + */ +struct rcu_rbtree_node *rcu_rbtree_prev(struct rcu_rbtree *rbtree, + struct rcu_rbtree_node *x); + +/* + * Sentinel (bottom nodes). + * Don't care about p, left, right, pos and key values. + */ +static +int rcu_rbtree_is_nil(struct rcu_rbtree *rbtree, struct rcu_rbtree_node *node) +{ + return node == &rbtree->nil_node; +} + +#endif /* URCU_RBTREE_H */