RCU Judy Array (rcuja) implementation urcu/rcuja-simple-int
authorMathieu Desnoyers <mathieu.desnoyers@efficios.com>
Wed, 24 Aug 2016 20:02:02 +0000 (16:02 -0400)
committerMathieu Desnoyers <mathieu.desnoyers@efficios.com>
Mon, 30 Jan 2023 11:07:17 +0000 (06:07 -0500)
Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
13 files changed:
doc/Makefile.am
doc/rcuja-design.txt [new file with mode: 0644]
include/Makefile.am
include/urcu/rcuja.h [new file with mode: 0644]
src/Makefile.am
src/rcuja-internal.h [new file with mode: 0644]
src/rcuja-shadow-nodes.c [new file with mode: 0644]
src/rcuja.c [new file with mode: 0644]
tests/regression/Makefile.am
tests/regression/test_urcu_ja.c [new file with mode: 0644]
tests/regression/test_urcu_ja.h [new file with mode: 0644]
tests/utils/Makefile.am
tests/utils/rcuja-testpop.c [new file with mode: 0644]

index cf2e902c7c0210e94408ee749837283b2a71b1b0..6db23b937ff3aae4c3800ab60b0de48a8c1d0e12 100644 (file)
@@ -3,4 +3,5 @@ SUBDIRS = examples
 dist_doc_DATA = rcu-api.md \
                cds-api.md \
                solaris-build.md \
-               uatomic-api.md
+               uatomic-api.md \
+               rcuja-design.txt
diff --git a/doc/rcuja-design.txt b/doc/rcuja-design.txt
new file mode 100644 (file)
index 0000000..09c9150
--- /dev/null
@@ -0,0 +1,448 @@
+RCU Judy Array Design
+Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
+March 8, 2012
+
+Initial ideas based on the released Judy Shop Manual
+(http://judy.sourceforge.net/). Judy was invented by Doug Baskins and
+implemented by Hewlett-Packard.
+
+Thresholds and RCU-specific analysis is introduced in this document.
+
+Advantages of using Judy Array (compressed nodes) for RCU tree:
+- no rebalancing
+- no transplant
+- RCU-friendly!
+- favor cache-line alignment of structures
+
+Disadvantage:
+- updates that need to reallocate nodes are slower than, e.g. non-rcu
+  red-black trees.
+
+Choice: Using 256 entries intermediate nodes (index can be represented
+on 8 bits): 4 levels on 32-bit, 8 levels on 64-bit
+
+
+* Node types (from less dense node to most dense)
+
+
+- empty node:
+
+Parent pointer is NULL.
+
+
+- Type A: sequential search in value and pointer arrays
+
++ Add/removal just needs to update value and pointer array, single-entry
+  (non-RCU...). For RCU, we might need to update the entire node anyway.
+- Requires sequential search through all value array for lookup fail
+  test.
+
+Filled at 3 entries max 64-bit
+8 bits indicating number of children
+Array of 8-bit values followed by array of associated pointers.
+64-bit: 1 byte + 3 bytes + 4 bytes pad + 3*8 = 32 bytes
+
+-> up to this point on 64-bit, sequential lookup and pointer read fit in
+a 32-byte cache line.
+  - lookup fail&success: 1 cache-line.
+
+Filled at 6 entries max 32-bit, 7 entries max 64-bit
+8 bits indicating number of children
+Array of 8-bit values followed by array of associated pointers.
+32-bit: 1 byte + 6 bytes + 1 byte pad + 6*4bytes = 32 bytes
+64-bit: 1 byte + 7 bytes + 7*8 = 64 bytes
+
+-> up to this point on 32-bit, sequential lookup and pointer read fit in
+a 32-byte cache line.
+  - lookup fail&success: 1 cache-line.
+
+Filled at 12 entries max 32-bit, 14 entries max 64-bit
+8 bits indicating number of children
+Array of 8-bit values followed by array of associated pointers.
+32-bit: 1 byte + 12 bytes + 3 bytes pad + 12*4bytes = 64 bytes
+64-bit: 1 byte + 14 bytes + 1 byte pad + 14*8 = 128 bytes
+
+Filled at 25 entries max 32-bit, 28 entries max 64-bit
+8 bits indicating number of children
+Array of 8-bit values followed by array of associated pointers.
+32-bit: 1 byte + 25 bytes + 2 bytes pad + 25*4bytes = 128 bytes
+64-bit: 1 byte + 28 bytes + 3 bytes pad + 28*8 = 256 bytes
+
+---> up to this point, on both 32-bit and 64-bit, the sequential lookup
+in values array fits in a 32-byte cache line.
+  - lookup failure: 1 cache line.
+  - lookup success: 2 cache lines.
+
+The two below are listed for completeness sake, but because they require
+2 32-byte cache lines for lookup, these are deemed inappropriate.
+
+Filled at 51 entries max 32-bit, 56 entries max 64-bit
+8 bits indicating number of children
+Array of 8-bit values followed by array of associated pointers.
+32-bit: 1 byte + 51 bytes + 51*4bytes = 256 bytes
+64-bit: 1 byte + 56 bytes + 7 bytes pad + 56*8 = 512 bytes
+
+Filled at 102 entries max 32-bit, 113 entries max 64-bit
+8 bits indicating number of children
+Array of 8-bit values followed by array of associated pointers.
+32-bit: 1 byte + 102 bytes + 1 byte pad + 102*4bytes = 512 bytes
+64-bit: 1 byte + 113 bytes + 6 bytes pad + 113*8 = 1024 bytes
+
+
+- Type B: pools of values and pointers arrays
+
+Pools of values and pointers arrays. Each pool values array is 32-bytes
+in size (so it fits in a L1 cacheline). Each pool begins with an 8-bit
+integer, which is the number of children in this pool, followed by an
+array of 8-bit values, padding, and an array of pointers. Values and
+pointer arrays are associated as in Type A.
+
+The entries of a node are associated to their respective pool based
+on their index position.
+
++ Allows lookup failure to use 1 32-byte cache-line only. (1 cacheline)
+  lookup success: 2 cache lines.
+
++ Allows in-place updates without reallocation, except when a pool is
+  full. (this was not possible with bitmap-based nodes)
+- If one pool exhausts its space, we need to increase the node size.
+  Therefore, for very dense populations, we will end up using the
+  pigeon-hole node type sooner, thus consuming more space.
+
+Pool configuration:
+
+Per pool, filled at 25 entries (32-bit), 28 entries (64-bit)
+32-bit: 1 byte + 25 bytes + 2 bytes pad + 25*4bytes = 128 bytes
+64-bit: 1 byte + 28 bytes + 3 bytes pad + 28*8 = 256 bytes
+
+Total up to 50 entries (32-bit), 56 entries (64-bit)
+2 pools: 32-bit = 256 bytes
+2 pools: 64-bit = 512 bytes
+
+Total up to 100 entries (32-bit), 112 entries (64-bit)
+4 pools: 32-bit = 512 bytes
+4 pools: 32-bit = 1024 bytes
+
+
+* Choice of pool configuration distribution:
+
+We have pools of either 2 or 4 linear arrays. Their total size is
+between 256 bytes (32-bit 2 arrays) and 1024 bytes (64-bit 4 arrays).
+
+Alignment on 256 bytes means that we can spare the 8 least significant
+bits of the pointers. Given that the type selection already uses 3 bits,
+we have 7 bits left.
+
+Alignment on 512 bytes -> 8 bits left.
+
+We can therefore encode which bit, or which two bits, are used as
+distribution selection. We can use this technique to reequilibrate pools
+if they become unbalanced (e.g. all children are within one of the two
+linear arrays).
+
+Assuming that finding the exact sub-pool usage maximum for any given
+distribution is NP complete (not proven).
+
+Taking into account sub-class size unbalance (tested programmatically by
+randomly taking N entries from 256, calculating the distribution for
+each bit (number of nodes for which bit is one/zero), and calculating
+the difference in number of nodes for each bit, choosing the minimum
+difference -- for millions of runs).
+
+We start from the "ideal" population size (with no unbalance), and do a
+fixed point to find the appropriate population size.
+
+tot entries    subclass extra items       largest linear array (stat. approx.)
+---------------------------------------------------------------------
+48 entries:      1 (98%)                  24+1=25 (target ~50/2=25)
+54 entries:      1 (97%)                  27+1=28 (target ~56/2=28)
+
+Note: there exists rare worse cases where the unbalance is larger, but
+it happens _very_ rarely. But need to provide a fallback if the subclass
+does not fit, but it does not need to be efficient.
+
+
+For pool of size 4, we need to approximate what is the maximum unbalance
+we can get for choice of distributions grouped by pairs of bits.
+
+tot entries     subclass extra items   largest linear array (stat. approx.)
+---------------------------------------------------------------------
+92 entries:      2 (99%)                23+2=25  (target: ~100/4=25)
+104 entries:     2 (99%)                26+2=28  (target: ~112/4=28)
+
+
+Note: there exists rare worse cases where the unbalance is larger, but
+it happens _very_ rarely. But need to provide a fallback if the subclass
+does not fit, but it does not need to be efficient.
+
+
+* Population "does not fit" and distribution fallback
+
+When adding a child to a distribution node, if the child does not fit,
+we recalculate the best distribution. If it does not fit in that
+distribution neither, we need to expand the node type.
+
+When removing a child, if the node child count is brought to the number
+of entries expected to statistically fit in the lower order node, we try
+to shrink. However, if we notice that the distribution does not actually
+fit in that shrinked node, we abort the shrink operation. If shrink
+fails, we keep a counter of insertion/removal operations on the node
+before we allow the shrink to be attempted again.
+
+
+- Type C: pigeon-hole array
+
+Filled at 47.2%/48.8% or more (32-bit: 121 entries+, 64-bit: 125 entries+)
+Array of children node pointers. Pointers NULL if no child at index.
+32-bit: 4*256 = 1024 bytes
+64-bit: 8*256 = 2048 bytes
+
+
+* Analysis of the thresholds:
+
+Analysis of number of cache-lines touched for each node, per-node-type,
+depending on the number of children per node, as we increment the number
+of children from 0 to 256. Through this, we choose number of children
+thresholds at which it is worthwhile to use a different node type.
+
+Per node:
+
+- ALWAYS 1 cache line hit for lookup failure (all cases)
+
+32-bit
+
+- Unexisting
+
+0 children
+
+- Type A: sequential search in value and pointer arrays
+- 1 cache line hit for lookup success
+- 32 bytes storage
+
+up to 6 children
+
+- 2 cache line hit for lookup success
+- 64 bytes storage
+
+up to 12 children
+
+- 128 bytes storage
+
+up to 25 children
+
+- Type B: pool
+
+- 256 bytes storage
+
+up to 50 children
+
+- 512 bytes storage
+up to 100 children
+
+- Type C: pigeon-hole array
+- 1 cache line hit for lookup success
+- 1024 bytes storage
+
+up to 256 children
+
+
+64-bit
+
+- Unexisting
+
+0 children
+
+- Type A: sequential search in value and pointer arrays
+- 1 cache line hit for lookup success
+- 32 bytes storage
+
+up to 3 children
+
+- 2 cache line hit for lookup success
+- 64 bytes storage
+
+up to 7 children
+
+- 128 bytes storage
+
+up to 14 children
+
+- 256 bytes storage
+
+up to 28 children
+
+- Type B: pool
+
+- 512 bytes storage
+up to 56 children
+
+- 1024 bytes storage
+up to 112 children
+
+- Type C: pigeon-hole array
+- 1 cache line hit for lookup success
+- 2048 bytes storage
+
+up to 256 children
+
+
+* Analysis of node type encoding and node pointers:
+
+Lookups are _always_ from the top of the tree going down. This
+facilitates RCU replacement as we only keep track of pointers going
+downward.
+
+Type of node encoded in the parent's pointer. Need to reserve 2
+least-significant bits.
+
+Types of children:
+
+enum child_type {
+       RCU_JA_LINEAR = 0,      /* Type A */
+                       /* 32-bit: 1 to 25 children, 8 to 128 bytes */
+                       /* 64-bit: 1 to 28 children, 16 to 256 bytes */
+       RCU_JA_POOL = 1,        /* Type B */
+                       /* 32-bit: 26 to 100 children, 256 to 512 bytes */
+                       /* 64-bit: 29 to 112 children, 512 to 1024 bytes */
+       RCU_JA_PIGEON = 2,      /* Type C */
+                       /* 32-bit: 101 to 256 children, 1024 bytes */
+                       /* 64-bit: 113 to 256 children, 2048 bytes */
+       /* Leaf nodes are implicit from their height in the tree */
+};
+
+If entire pointer is NULL, children is empty.
+
+
+* Lookup and Update Algorithms
+
+Let's propose a quite simple scheme that uses a mutex on nodes to manage
+update concurrency. It's certainly not optimal in terms of concurrency
+management within a node, but it has the advantage of being simple to
+implement and understand.
+
+We need to keep a count of the number of children nodes (for each node),
+to keep track of when the node type thresholds are reached. It would be
+important to put an hysteresis loop so we don't change between node
+types too often for a loop on add/removal of the same node.
+
+We acquire locks from child to parent, nested. We take all locks
+required to perform a given update in the tree (but no more) to keep it
+consistent with respect to number of children per node.
+
+If check for node being gc'd (always under node lock) fails, we simply
+need to release the lock and lookup the node again.
+
+
+- Leaf lookup
+
+rcu_read_lock()
+
+RCU-lookup each level of the tree. If level is not populated, fail.
+Until we reach the leaf node.
+
+rcu_read_unlock()
+
+
+- Leaf insertion
+
+A) Lookup
+
+rcu_read_lock()
+RCU-lookup insert position. Find location in tree where nodes are
+missing for this insertion. If leaf is already present, insert fails,
+releasing the rcu read lock.  The insert location consists of a parent
+node to which we want to attach a new node.
+
+B) Lock
+
+RCU-lookup parent node. Take the parent lock. If the parent needs to be
+reallocated to make room for this insertion, RCU-lookup parent-parent
+node and take the parent-parent lock.  For each lock taken, check if
+node is being gc'd. If gc'd, release lock, re-RCU-lookup this node, and
+retry.
+
+C) Create
+
+Construct the whole branch from the new topmost intermediate node down
+to the new leaf node we are inserting.
+
+D) Populate:
+  - If parent node reallocation is needed:
+     Reallocate the parent node, adding the new branch to it, and
+     increment its node count.
+     set gc flag in old nodes.
+     call_rcu free for all old nodes.
+     Populate new parent node with rcu_assign_pointer.
+  - Else:
+    Increment parent node count.
+    Use rcu_assign_pointer to populate this new branch into the parent
+    node.
+
+E) Locks
+
+Release parent and (if taken) parent-parent locks.
+rcu_read_unlock()
+
+
+- Leaf removal
+
+A) Lookup
+
+rcu_read_lock()
+RCU-lookup leaf to remove. If leaf is missing, fail and release rcu
+read lock.
+
+B) Lock
+
+RCU-lookup parent. Take the parent lock. If the parent needs to be
+reallocated because it would be too large for the decremented number of
+children, RCU-lookup parent-parent and take the parent-parent lock. Do
+so recursively until no node reallocation is needed, or until root is
+reached.
+
+For each lock taken, check if node is being gc'd. If gc'd, release lock,
+re-RCU-lookup this node, and retry.
+
+C) Create
+
+The branch (or portion of branch) consisting of taken locks necessarily
+has a simple node removal or update as operation to do on its top node.
+
+If the operation is a node removal, then, necessarily, the entire branch
+under the node removal operation will simply disappear. No node
+allocation is needed.
+
+Else, if the operation is a child node reallocation, the child node will
+necessarily do a node removal. So _its_ entire child branch will
+disappear. So reallocate this child node without the removed branch
+(remember to decrement its nr children count).
+
+D) Populate
+
+No reallocation case: simply set the appropriate child pointer in the
+topmost locked node to NULL. Decrement its nr children count.
+
+Reallocation case: set the child pointer in the topmost locked node to
+the newly allocated node.
+set old nodes gc flag.
+call_rcu free for all old nodes.
+
+E) Locks
+
+Release all locks.
+rcu_read_unlock()
+
+
+For the various types of nodes:
+
+- sequential search (type A)
+  - RCU replacement: mutex
+  - Entry update: mutex
+
+- bitmap followed by pointer array (type B)
+  - RCU replacement: mutex
+  - Entry update: mutex
+
+- pigeon hole array (type C)
+  - RCU replacement: mutex
+  - Entry update: mutex
index 2f68aa2446538d72a811e74262529b067b4bf7d7..c6d312ac28aa110cbc841e444736933c8b8eb0f3 100644 (file)
@@ -11,7 +11,7 @@ nobase_dist_include_HEADERS = urcu/compiler.h urcu/hlist.h urcu/list.h \
                urcu/static/urcu.h urcu/static/urcu-pointer.h \
                urcu/static/urcu-qsbr.h urcu/static/wfcqueue.h \
                urcu/static/wfqueue.h urcu/static/wfstack.h \
-               urcu/tls-compat.h urcu/debug.h
+               urcu/tls-compat.h urcu/debug.h urcu/rcuja.h
 
 # Don't distribute generated headers
 nobase_nodist_include_HEADERS = urcu/arch.h urcu/uatomic.h urcu/config.h
diff --git a/include/urcu/rcuja.h b/include/urcu/rcuja.h
new file mode 100644 (file)
index 0000000..82e272b
--- /dev/null
@@ -0,0 +1,234 @@
+#ifndef _URCU_RCUJA_H
+#define _URCU_RCUJA_H
+
+/*
+ * urcu/rcuja.h
+ *
+ * Userspace RCU library - RCU Judy Array
+ *
+ * Copyright 2012-2013 - Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ *
+ * Include this file _after_ including your URCU flavor.
+ */
+
+#include <urcu/compiler.h>
+#include <urcu-call-rcu.h>
+#include <urcu-flavor.h>
+#include <stdint.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*
+ * Duplicate nodes with the same key are chained into a singly-linked
+ * list. The last item of this list has a NULL next pointer.
+ */
+struct cds_ja_node {
+       struct cds_ja_node *next;
+};
+
+struct cds_ja;
+
+/*
+ * cds_ja_node_init - initialize a judy array node
+ * @node: the node to initialize.
+ *
+ * This function is kept to be eventually used for debugging purposes
+ * (detection of memory corruption).
+ */
+static inline
+void cds_ja_node_init(struct cds_ja_node *node)
+{
+}
+
+/*
+ * Note: key UINT64_MAX is reserved internally for iteration.
+ */
+
+/*
+ * cds_ja_lookup - look up by key.
+ * @ja: the Judy array.
+ * @key: key to look up.
+ *
+ * Returns the first node of a duplicate chain if a match is found, else
+ * returns NULL.
+ * A RCU read-side lock should be held across call to this function and
+ * use of its return value.
+ */
+struct cds_ja_node *cds_ja_lookup(struct cds_ja *ja, uint64_t key);
+
+/*
+ * cds_ja_lookup_below_equal - look up first node with key <= @key.
+ * @ja: the Judy array.
+ * @key: key to look up.
+ * @result_key: key found.
+ *
+ * Returns the first node of a duplicate chain if a node is present in
+ * the tree which has a key below or equal to @key, else returns NULL.
+ * A RCU read-side lock should be held across call to this function and
+ * use of its return value.
+ */
+struct cds_ja_node *cds_ja_lookup_below_equal(struct cds_ja *ja,
+               uint64_t key, uint64_t *result_key);
+
+/*
+ * cds_ja_lookup_above_equal - look up first node with key >= @key.
+ * @ja: the Judy array.
+ * @key: key to look up.
+ * @result_key: key found.
+ *
+ * Returns the first node of a duplicate chain if a node is present in
+ * the tree which has a key above or equal to @key, else returns NULL.
+ * A RCU read-side lock should be held across call to this function and
+ * use of its return value.
+ */
+struct cds_ja_node *cds_ja_lookup_above_equal(struct cds_ja *ja,
+               uint64_t key, uint64_t *result_key);
+
+/*
+ * cds_ja_add - Add @node at @key, allowing duplicates.
+ * @ja: the Judy array.
+ * @key: key at which @node should be added.
+ * @node: node to add.
+ *
+ * Returns 0 on success, negative error value on error.
+ * A RCU read-side lock should be held across call to this function.
+ */
+int cds_ja_add(struct cds_ja *ja, uint64_t key,
+               struct cds_ja_node *node);
+
+/*
+ * cds_ja_add_unique - Add @node at @key, without duplicates.
+ * @ja: the Judy array.
+ * @key: key at which @node should be added.
+ * @node: node to add.
+ *
+ * Returns @node if successfully added, else returns the already
+ * existing node (acts as a RCU lookup).
+ * A RCU read-side lock should be held across call to this function and
+ * use of its return value.
+ */
+struct cds_ja_node *cds_ja_add_unique(struct cds_ja *ja, uint64_t key,
+               struct cds_ja_node *node);
+
+/*
+ * cds_ja_del - Remove @node at @key.
+ * @ja: the Judy array.
+ * @key: key at which @node is expected.
+ * @node: node to remove.
+ *
+ * Returns 0 on success, negative error value on error.
+ * A RCU read-side lock should be held across call to this function.
+ */
+int cds_ja_del(struct cds_ja *ja, uint64_t key,
+               struct cds_ja_node *node);
+
+struct cds_ja *_cds_ja_new(unsigned int key_bits,
+               const struct rcu_flavor_struct *flavor);
+
+/*
+ * cds_ja_new - Create a Judy array.
+ * @key_bits: Number of bits for key.
+ *
+ * Returns non-NULL pointer on success, else NULL on error. @key_bits
+ * needs to be multiple of 8, either: 8, 16, 24, 32, 40, 48, 56, or 64.
+ */
+static inline
+struct cds_ja *cds_ja_new(unsigned int key_bits)
+{
+       return _cds_ja_new(key_bits, &rcu_flavor);
+}
+
+/*
+ * cds_ja_destroy - Destroy a Judy array.
+ * @ja: the Judy array.
+ *
+ * Returns 0 on success, negative error value on error.
+ * There should be no more concurrent add, delete, nor look-up performed
+ * on the Judy array while it is being destroyed (ensured by the caller).
+ * RCU read-side lock should _not_ be held when calling this function,
+ * however, QSBR threads need to be online.
+ */
+int cds_ja_destroy(struct cds_ja *ja);
+
+/*
+ * cds_ja_for_each_duplicate_rcu: Iterate through duplicates.
+ * @pos: struct cds_ja_node *, start of duplicate list and loop cursor.
+ *
+ * Iterate through duplicates returned by cds_ja_lookup*()
+ * This must be done while rcu_read_lock() is held.
+ * Receives a struct cds_ja_node * as parameter, which is used as start
+ * of duplicate list and loop cursor.
+ * _NOT_ safe against node removal within iteration.
+ */
+#define cds_ja_for_each_duplicate_rcu(pos)                                     \
+       for (; (pos) != NULL; (pos) = rcu_dereference((pos)->next))
+
+/*
+ * cds_ja_for_each_duplicate_safe: Iterate through duplicates.
+ * @pos: struct cds_ja_node *, start of duplicate list and loop cursor.
+ * @p: struct cds_ja_node *, temporary pointer to next.
+ *
+ * Iterate through duplicates returned by cds_ja_lookup*().
+ * Safe against node removal within iteration.
+ * This must be done while rcu_read_lock() is held.
+ */
+#define cds_ja_for_each_duplicate_safe_rcu(pos, p)                     \
+       for (; (pos) != NULL ?                                          \
+                       ((p) = rcu_dereference((pos)->next), 1) : 0;    \
+                       (pos) = (p))
+
+/*
+ * cds_ja_for_each_key_rcu: Iterate over all keys in ascending order.
+ * @ja: Judy array on which iteration should be done.
+ * @key: Key cursor, needs to be a uint64_t.
+ * @pos: struct cds_ja_node *, used as loop cursor.
+ *
+ * Iterate over all keys of a RCU Judy array (_not_ duplicates) in
+ * ascending order.
+ * This must be done while rcu_read_lock() is held.
+ * Safe against node removal during iteration.
+ */
+#define cds_ja_for_each_key_rcu(ja, key, pos)                          \
+       for ((key) = 0;                                                 \
+               ((key) != UINT64_MAX ?                                  \
+                       ((pos) = cds_ja_lookup_above_equal(ja, key, &(key))) : 0); \
+               (key)++)
+
+/*
+ * cds_ja_for_each_key_prev_rcu: Iterate over all keys in descending order.
+ * @ja: Judy array on which iteration should be done.
+ * @key: Key cursor, needs to be a uint64_t.
+ * @pos: struct cds_ja_node *, used as loop cursor.
+ *
+ * Iterate over all keys of a RCU Judy array (_not_ duplicates) in
+ * descending order.
+ * This must be done while rcu_read_lock() is held.
+ * Safe against node removal during iteration.
+ */
+#define cds_ja_for_each_key_prev_rcu(ja, key, pos)                     \
+       for ((key) = UINT64_MAX - 1;                                    \
+               ((key) != UINT64_MAX ?                                  \
+                       ((pos) = cds_ja_lookup_below_equal(ja, key, &(key))) : 0); \
+               (key)--)
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _URCU_RCUJA_H */
index 6a2fd7a54a7646e8e496b1280c09d03766311f1b..0cd41e19c83bccce7416014b6d951f761b6fce94 100644 (file)
@@ -26,6 +26,8 @@ COMPAT+=compat_futex.c
 RCULFHASH = rculfhash.c rculfhash-mm-order.c rculfhash-mm-chunk.c \
                rculfhash-mm-mmap.c
 
+RCUJA = rcuja.c rcuja-shadow-nodes.c rcuja-internal.h
+
 lib_LTLIBRARIES = liburcu-common.la \
                liburcu.la liburcu-qsbr.la \
                liburcu-mb.la liburcu-signal.la liburcu-bp.la \
@@ -55,7 +57,7 @@ liburcu_bp_la_SOURCES = urcu-bp.c urcu-pointer.c $(COMPAT)
 liburcu_bp_la_LIBADD = liburcu-common.la
 
 liburcu_cds_la_SOURCES = rculfqueue.c rculfstack.c lfstack.c \
-       $(RCULFHASH) $(COMPAT)
+       $(RCULFHASH) $(RCUJA) $(COMPAT)
 liburcu_cds_la_LIBADD = liburcu-common.la
 
 pkgconfigdir = $(libdir)/pkgconfig
diff --git a/src/rcuja-internal.h b/src/rcuja-internal.h
new file mode 100644 (file)
index 0000000..6ffaeb1
--- /dev/null
@@ -0,0 +1,277 @@
+#ifndef _URCU_RCUJA_INTERNAL_H
+#define _URCU_RCUJA_INTERNAL_H
+
+/*
+ * rcuja/rcuja-internal.h
+ *
+ * Userspace RCU library - RCU Judy Array Internal Header
+ *
+ * Copyright (C) 2000 - 2002 Hewlett-Packard Company
+ * Copyright 2012-2013 - Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include <pthread.h>
+#include <stdio.h>
+#include <inttypes.h>
+#include <unistd.h>
+#include <urcu/rculfhash.h>
+
+/*
+ * Number of least significant pointer bits reserved to represent the
+ * child type.
+ */
+#define JA_TYPE_BITS   3
+#define JA_TYPE_MAX_NR (1UL << JA_TYPE_BITS)
+#define JA_TYPE_MASK   (JA_TYPE_MAX_NR - 1)
+#define JA_PTR_MASK    (~JA_TYPE_MASK)
+
+#define JA_ENTRY_PER_NODE      256UL
+#define JA_LOG2_BITS_PER_BYTE  3U
+#define JA_BITS_PER_BYTE       (1U << JA_LOG2_BITS_PER_BYTE)
+
+#define JA_POOL_1D_MASK        ((JA_BITS_PER_BYTE - 1) << JA_TYPE_BITS)
+#define JA_POOL_2D_MASK        (JA_POOL_1D_MASK << JA_LOG2_BITS_PER_BYTE)
+
+#define JA_MAX_DEPTH   9       /* Maximum depth, including leafs */
+
+/*
+ * Entry for NULL node is at index 8 of the table. It is never encoded
+ * in flags.
+ */
+#define NODE_INDEX_NULL                8
+
+/*
+ * Number of removals needed on a fallback node before we try to shrink
+ * it.
+ */
+#define JA_FALLBACK_REMOVAL_COUNT      8
+
+/* Never declared. Opaque type used to store flagged node pointers. */
+struct cds_ja_inode_flag;
+struct cds_ja_inode;
+
+/*
+ * Shadow node contains mutex and call_rcu head associated with a node.
+ */
+struct cds_ja_shadow_node {
+       struct cds_lfht_node ht_node;   /* hash table node */
+       struct cds_ja_inode_flag *node_flag;    /* reverse mapping and hash table key */
+       /*
+        * mutual exclusion on all nodes belonging to the same tree
+        * position (e.g. both nodes before and after recompaction
+        * use the same lock).
+        */
+       pthread_mutex_t *lock;
+       unsigned int nr_child;          /* number of children in node */
+       struct rcu_head head;           /* for deferred node and shadow node reclaim */
+       int fallback_removal_count;     /* removals left keeping fallback */
+       int level;                      /* level in the tree */
+       struct cds_ja *ja;              /* toplevel judy array */
+};
+
+struct cds_ja {
+       struct cds_ja_inode_flag *root;
+       unsigned int tree_depth;
+       uint64_t key_max;
+       /*
+        * We use a hash table to associate node keys to their
+        * respective shadow node. This helps reducing lookup hot path
+        * cache footprint, especially for very small nodes.
+        */
+       struct cds_lfht *ht;
+       unsigned long nr_fallback;      /* Number of fallback nodes used */
+
+       /* For debugging */
+       unsigned long node_fallback_count_distribution[JA_ENTRY_PER_NODE];
+       unsigned long nr_nodes_allocated, nr_nodes_freed;
+};
+
+static inline
+struct cds_ja_inode_flag *ja_node_flag(struct cds_ja_inode *node,
+               unsigned long type)
+{
+       assert(type < (1UL << JA_TYPE_BITS));
+       return (struct cds_ja_inode_flag *) (((unsigned long) node) | type);
+}
+
+static inline
+struct cds_ja_inode_flag *ja_node_flag_pool_1d(struct cds_ja_inode *node,
+               unsigned long type, unsigned long bitsel)
+{
+       assert(type < (1UL << JA_TYPE_BITS));
+       assert(bitsel < JA_BITS_PER_BYTE);
+       return (struct cds_ja_inode_flag *) (((unsigned long) node) | (bitsel << JA_TYPE_BITS) | type);
+}
+
+static inline
+struct cds_ja_inode_flag *ja_node_flag_pool_2d(struct cds_ja_inode *node,
+               unsigned long type, unsigned int bitsel[2])
+{
+       assert(type < (1UL << JA_TYPE_BITS));
+       assert(bitsel[0] < JA_BITS_PER_BYTE);
+       assert(bitsel[1] < JA_BITS_PER_BYTE);
+       return (struct cds_ja_inode_flag *) (((unsigned long) node) | (bitsel[0] << (JA_TYPE_BITS + JA_LOG2_BITS_PER_BYTE)) | (bitsel[1] << JA_TYPE_BITS) | type);
+}
+
+static inline
+unsigned long ja_node_pool_1d_bitsel(struct cds_ja_inode_flag *node)
+{
+       return ((unsigned long) node & JA_POOL_1D_MASK) >> JA_TYPE_BITS;
+}
+
+static inline
+void ja_node_pool_2d_bitsel(struct cds_ja_inode_flag *node, unsigned long *bits)
+{
+       bits[0] = ((unsigned long) node & JA_POOL_2D_MASK) >> (JA_TYPE_BITS + JA_LOG2_BITS_PER_BYTE);
+       bits[1] = ((unsigned long) node & JA_POOL_1D_MASK) >> JA_TYPE_BITS;
+}
+
+/* Hardcoded pool indexes for fast path */
+#define RCU_JA_POOL_IDX_5      5
+#define RCU_JA_POOL_IDX_6      6
+static inline
+struct cds_ja_inode *ja_node_ptr(struct cds_ja_inode_flag *node)
+{
+       unsigned long v, type_idx;
+
+       if (!node)
+               return NULL;    /* RCU_JA_NULL */
+       v = (unsigned long) node;
+       type_idx = v & JA_TYPE_MASK;
+
+       switch (type_idx) {
+       case RCU_JA_POOL_IDX_5:
+               v &= ~(JA_POOL_1D_MASK | JA_TYPE_MASK);
+               break;
+       case RCU_JA_POOL_IDX_6:
+               v &= ~(JA_POOL_2D_MASK | JA_POOL_1D_MASK | JA_TYPE_MASK);
+               break;
+       default:
+               /* RCU_JA_LINEAR or RCU_JA_PIGEON */
+               v &= JA_PTR_MASK;
+               break;
+       }
+       return (struct cds_ja_inode *) v;
+}
+
+__attribute__((visibility("protected")))
+unsigned long ja_node_type(struct cds_ja_inode_flag *node);
+
+__attribute__((visibility("protected")))
+void rcuja_free_all_children(struct cds_ja_shadow_node *shadow_node,
+               struct cds_ja_inode_flag *node_flag);
+
+__attribute__((visibility("protected")))
+struct cds_ja_shadow_node *rcuja_shadow_lookup_lock(struct cds_lfht *ht,
+               struct cds_ja_inode_flag *node_flag);
+
+__attribute__((visibility("protected")))
+void rcuja_shadow_unlock(struct cds_ja_shadow_node *shadow_node);
+
+__attribute__((visibility("protected")))
+struct cds_ja_shadow_node *rcuja_shadow_set(struct cds_lfht *ht,
+               struct cds_ja_inode_flag *new_node_flag,
+               struct cds_ja_shadow_node *inherit_from,
+               struct cds_ja *ja, int level);
+
+/* rcuja_shadow_clear flags */
+enum {
+       RCUJA_SHADOW_CLEAR_FREE_NODE = (1U << 0),
+       RCUJA_SHADOW_CLEAR_FREE_LOCK = (1U << 1),
+};
+
+__attribute__((visibility("protected")))
+int rcuja_shadow_clear(struct cds_lfht *ht,
+               struct cds_ja_inode_flag *node_flag,
+               struct cds_ja_shadow_node *shadow_node,
+               unsigned int flags);
+
+__attribute__((visibility("protected")))
+void rcuja_shadow_prune(struct cds_lfht *ht,
+               unsigned int flags);
+
+__attribute__((visibility("protected")))
+struct cds_lfht *rcuja_create_ht(const struct rcu_flavor_struct *flavor);
+
+__attribute__((visibility("protected")))
+int rcuja_delete_ht(struct cds_lfht *ht);
+
+__attribute__((visibility("protected")))
+void free_cds_ja_node(struct cds_ja *ja, struct cds_ja_inode *node);
+
+/*
+ * Iterate through duplicates returned by cds_ja_lookup*()
+ * Receives a struct cds_ja_node * as parameter, which is used as start
+ * of duplicate list and loop cursor.
+ */
+#define cds_ja_for_each_duplicate(pos)                         \
+       for (; (pos) != NULL; (pos) = (pos)->next)
+
+//#define DEBUG
+//#define DEBUG_COUNTERS
+
+#ifdef __linux__
+#include <syscall.h>
+#endif
+
+#if defined(_syscall0)
+_syscall0(pid_t, gettid)
+#elif defined(__NR_gettid)
+static inline pid_t gettid(void)
+{
+       return syscall(__NR_gettid);
+}
+#else
+#warning "use pid as tid"
+static inline pid_t gettid(void)
+{
+       return getpid();
+}
+#endif
+
+#ifdef DEBUG
+#define dbg_printf(fmt, args...)                               \
+       fprintf(stderr, "[debug rcuja %lu %s()@%s:%u] " fmt,    \
+               (unsigned long) gettid(), __func__,             \
+               __FILE__, __LINE__, ## args)
+
+#else
+#define dbg_printf(fmt, args...)                               \
+do {                                                           \
+       /* do nothing but check printf format */                \
+       if (0)                                                  \
+               fprintf(stderr, "[debug rcuja %lu %s()@%s:%u] " fmt, \
+                       (unsigned long) gettid(), __func__,     \
+                       __FILE__, __LINE__, ## args);           \
+} while (0)
+#endif
+
+#ifdef DEBUG_COUNTERS
+static inline
+int ja_debug_counters(void)
+{
+       return 1;
+}
+#else
+static inline
+int ja_debug_counters(void)
+{
+       return 0;
+}
+#endif
+
+#endif /* _URCU_RCUJA_INTERNAL_H */
diff --git a/src/rcuja-shadow-nodes.c b/src/rcuja-shadow-nodes.c
new file mode 100644 (file)
index 0000000..19cd143
--- /dev/null
@@ -0,0 +1,443 @@
+/*
+ * rcuja/rcuja-hashtable.c
+ *
+ * Userspace RCU library - RCU Judy Array Shadow Node Hash Table
+ *
+ * Copyright 2012-2013 - Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#define _LGPL_SOURCE
+#include <stdint.h>
+#include <errno.h>
+#include <limits.h>
+#include <assert.h>
+#include <stdlib.h>
+#include <time.h>
+#include <urcu/rcuja.h>
+#include <urcu/compiler.h>
+#include <urcu/arch.h>
+#include <urcu-pointer.h>
+
+#include "rcuja-internal.h"
+
+static unsigned long hash_seed;
+
+/*
+ * Hash function
+ * Source: http://burtleburtle.net/bob/c/lookup3.c
+ * Originally Public Domain
+ */
+
+#define rot(x, k) (((x) << (k)) | ((x) >> (32 - (k))))
+
+#define mix(a, b, c) \
+do { \
+       a -= c; a ^= rot(c,  4); c += b; \
+       b -= a; b ^= rot(a,  6); a += c; \
+       c -= b; c ^= rot(b,  8); b += a; \
+       a -= c; a ^= rot(c, 16); c += b; \
+       b -= a; b ^= rot(a, 19); a += c; \
+       c -= b; c ^= rot(b,  4); b += a; \
+} while (0)
+
+#define final(a, b, c) \
+{ \
+       c ^= b; c -= rot(b, 14); \
+       a ^= c; a -= rot(c, 11); \
+       b ^= a; b -= rot(a, 25); \
+       c ^= b; c -= rot(b, 16); \
+       a ^= c; a -= rot(c,  4);\
+       b ^= a; b -= rot(a, 14); \
+       c ^= b; c -= rot(b, 24); \
+}
+
+static inline __attribute__((unused))
+uint32_t hash_u32(
+       const uint32_t *k,      /* the key, an array of uint32_t values */
+       size_t length,          /* the length of the key, in uint32_ts */
+       uint32_t initval)       /* the previous hash, or an arbitrary value */
+{
+       uint32_t a, b, c;
+
+       /* Set up the internal state */
+       a = b = c = 0xdeadbeef + (((uint32_t) length) << 2) + initval;
+
+       /*----------------------------------------- handle most of the key */
+       while (length > 3) {
+               a += k[0];
+               b += k[1];
+               c += k[2];
+               mix(a, b, c);
+               length -= 3;
+               k += 3;
+       }
+
+       /*----------------------------------- handle the last 3 uint32_t's */
+       switch (length) {       /* all the case statements fall through */
+       case 3: c += k[2];
+       case 2: b += k[1];
+       case 1: a += k[0];
+               final(a, b, c);
+       case 0:                 /* case 0: nothing left to add */
+               break;
+       }
+       /*---------------------------------------------- report the result */
+       return c;
+}
+
+static inline
+void hashword2(
+       const uint32_t *k,      /* the key, an array of uint32_t values */
+       size_t length,          /* the length of the key, in uint32_ts */
+       uint32_t *pc,           /* IN: seed OUT: primary hash value */
+       uint32_t *pb)           /* IN: more seed OUT: secondary hash value */
+{
+       uint32_t a, b, c;
+
+       /* Set up the internal state */
+       a = b = c = 0xdeadbeef + ((uint32_t) (length << 2)) + *pc;
+       c += *pb;
+
+       /*----------------------------------------- handle most of the key */
+       while (length > 3) {
+               a += k[0];
+               b += k[1];
+               c += k[2];
+               mix(a, b, c);
+               length -= 3;
+               k += 3;
+       }
+
+       /*----------------------------------- handle the last 3 uint32_t's */
+       switch (length) {       /* all the case statements fall through */
+       case 3: c += k[2];
+       case 2: b += k[1];
+       case 1: a += k[0];
+               final(a, b, c);
+       case 0:                 /* case 0: nothing left to add */
+               break;
+       }
+       /*---------------------------------------------- report the result */
+       *pc = c;
+       *pb = b;
+}
+
+#if (CAA_BITS_PER_LONG == 32)
+static
+unsigned long hash_pointer(const void *_key, unsigned long seed)
+{
+       unsigned int key = (unsigned int) _key;
+
+       return hash_u32(&key, 1, seed);
+}
+#else
+static
+unsigned long hash_pointer(const void *_key, unsigned long seed)
+{
+       union {
+               uint64_t v64;
+               uint32_t v32[2];
+       } v;
+       union {
+               uint64_t v64;
+               uint32_t v32[2];
+       } key;
+
+       v.v64 = (uint64_t) seed;
+       key.v64 = (uint64_t) _key;
+       hashword2(key.v32, 2, &v.v32[0], &v.v32[1]);
+       return v.v64;
+}
+#endif
+
+static
+int match_pointer(struct cds_lfht_node *node, const void *key)
+{
+       struct cds_ja_shadow_node *shadow =
+               caa_container_of(node, struct cds_ja_shadow_node, ht_node);
+
+       return (key == shadow->node_flag);
+}
+
+__attribute__((visibility("protected")))
+struct cds_ja_shadow_node *rcuja_shadow_lookup_lock(struct cds_lfht *ht,
+               struct cds_ja_inode_flag *node_flag)
+{
+       struct cds_lfht_iter iter;
+       struct cds_lfht_node *lookup_node;
+       struct cds_ja_shadow_node *shadow_node;
+       const struct rcu_flavor_struct *flavor;
+       int ret;
+
+       flavor = cds_lfht_rcu_flavor(ht);
+       flavor->read_lock();
+       cds_lfht_lookup(ht, hash_pointer(node_flag, hash_seed),
+                       match_pointer, node_flag, &iter);
+
+       lookup_node = cds_lfht_iter_get_node(&iter);
+       if (!lookup_node) {
+               shadow_node = NULL;
+               goto rcu_unlock;
+       }
+       shadow_node = caa_container_of(lookup_node,
+                       struct cds_ja_shadow_node, ht_node);
+       dbg_printf("Lock %p\n", shadow_node->lock);
+       ret = pthread_mutex_lock(shadow_node->lock);
+       assert(!ret);
+       if (cds_lfht_is_node_deleted(lookup_node)) {
+               ret = pthread_mutex_unlock(shadow_node->lock);
+               assert(!ret);
+               shadow_node = NULL;
+       }
+rcu_unlock:
+       flavor->read_unlock();
+       return shadow_node;
+}
+
+__attribute__((visibility("protected")))
+void rcuja_shadow_unlock(struct cds_ja_shadow_node *shadow_node)
+{
+       int ret;
+
+       dbg_printf("Unlock %p\n", shadow_node->lock);
+       ret = pthread_mutex_unlock(shadow_node->lock);
+       assert(!ret);
+}
+
+__attribute__((visibility("protected")))
+struct cds_ja_shadow_node *rcuja_shadow_set(struct cds_lfht *ht,
+               struct cds_ja_inode_flag *new_node_flag,
+               struct cds_ja_shadow_node *inherit_from,
+               struct cds_ja *ja, int level)
+{
+       struct cds_ja_shadow_node *shadow_node;
+       struct cds_lfht_node *ret_node;
+       const struct rcu_flavor_struct *flavor;
+
+       shadow_node = calloc(sizeof(*shadow_node), 1);
+       if (!shadow_node)
+               return NULL;
+
+       shadow_node->node_flag = new_node_flag;
+       shadow_node->ja = ja;
+       /*
+        * Lock can be inherited from previous node at this position.
+        */
+       if (inherit_from) {
+               shadow_node->lock = inherit_from->lock;
+               shadow_node->level = inherit_from->level;
+       } else {
+               shadow_node->lock = calloc(sizeof(*shadow_node->lock), 1);
+               if (!shadow_node->lock) {
+                       free(shadow_node);
+                       return NULL;
+               }
+               pthread_mutex_init(shadow_node->lock, NULL);
+               shadow_node->level = level;
+       }
+
+       flavor = cds_lfht_rcu_flavor(ht);
+       flavor->read_lock();
+       ret_node = cds_lfht_add_unique(ht,
+                       hash_pointer(new_node_flag, hash_seed),
+                       match_pointer,
+                       new_node_flag,
+                       &shadow_node->ht_node);
+       flavor->read_unlock();
+
+       if (ret_node != &shadow_node->ht_node) {
+               free(shadow_node);
+               return NULL;
+       }
+       return shadow_node;
+}
+
+static
+void free_shadow_node(struct rcu_head *head)
+{
+       struct cds_ja_shadow_node *shadow_node =
+               caa_container_of(head, struct cds_ja_shadow_node, head);
+       free(shadow_node);
+}
+
+static
+void free_shadow_node_and_node(struct rcu_head *head)
+{
+       struct cds_ja_shadow_node *shadow_node =
+               caa_container_of(head, struct cds_ja_shadow_node, head);
+       free_cds_ja_node(shadow_node->ja, ja_node_ptr(shadow_node->node_flag));
+       free(shadow_node);
+}
+
+static
+void free_shadow_node_and_lock(struct rcu_head *head)
+{
+       struct cds_ja_shadow_node *shadow_node =
+               caa_container_of(head, struct cds_ja_shadow_node, head);
+       free(shadow_node->lock);
+       free(shadow_node);
+}
+
+static
+void free_shadow_node_and_node_and_lock(struct rcu_head *head)
+{
+       struct cds_ja_shadow_node *shadow_node =
+               caa_container_of(head, struct cds_ja_shadow_node, head);
+       assert(shadow_node->level);
+       free_cds_ja_node(shadow_node->ja, ja_node_ptr(shadow_node->node_flag));
+       free(shadow_node->lock);
+       free(shadow_node);
+}
+
+__attribute__((visibility("protected")))
+int rcuja_shadow_clear(struct cds_lfht *ht,
+               struct cds_ja_inode_flag *node_flag,
+               struct cds_ja_shadow_node *shadow_node,
+               unsigned int flags)
+{
+       struct cds_lfht_iter iter;
+       struct cds_lfht_node *lookup_node;
+       const struct rcu_flavor_struct *flavor;
+       int ret, lockret;
+       int lookup_shadow = 0;
+
+       flavor = cds_lfht_rcu_flavor(ht);
+       flavor->read_lock();
+
+       cds_lfht_lookup(ht, hash_pointer(node_flag, hash_seed),
+                       match_pointer, node_flag, &iter);
+       lookup_node = cds_lfht_iter_get_node(&iter);
+       if (!lookup_node) {
+               ret = -ENOENT;
+               goto rcu_unlock;
+       }
+
+       if (!shadow_node) {
+               shadow_node = caa_container_of(lookup_node,
+                               struct cds_ja_shadow_node, ht_node);
+               lockret = pthread_mutex_lock(shadow_node->lock);
+               assert(!lockret);
+               lookup_shadow = 1;
+       }
+
+       /*
+        * Holding the mutex across deletion, and by also re-checking if
+        * the node is deleted with mutex held at lookup ensure that we
+        * don't let RCU JA use a node being removed.
+        */
+       ret = cds_lfht_del(ht, lookup_node);
+       if (ret)
+               goto unlock;
+       if ((flags & RCUJA_SHADOW_CLEAR_FREE_NODE)
+                       && shadow_node->level) {
+               if (flags & RCUJA_SHADOW_CLEAR_FREE_LOCK) {
+                       flavor->update_call_rcu(&shadow_node->head,
+                               free_shadow_node_and_node_and_lock);
+               } else {
+                       flavor->update_call_rcu(&shadow_node->head,
+                               free_shadow_node_and_node);
+               }
+       } else {
+               if (flags & RCUJA_SHADOW_CLEAR_FREE_LOCK) {
+                       flavor->update_call_rcu(&shadow_node->head,
+                               free_shadow_node_and_lock);
+               } else {
+                       flavor->update_call_rcu(&shadow_node->head,
+                               free_shadow_node);
+               }
+       }
+unlock:
+       if (lookup_shadow) {
+               lockret = pthread_mutex_unlock(shadow_node->lock);
+               assert(!lockret);
+       }
+rcu_unlock:
+       flavor->read_unlock();
+
+       return ret;
+}
+
+/*
+ * Delete all shadow nodes and nodes from hash table, along with their
+ * associated lock.
+ */
+__attribute__((visibility("protected")))
+void rcuja_shadow_prune(struct cds_lfht *ht,
+               unsigned int flags)
+{
+       const struct rcu_flavor_struct *flavor;
+       struct cds_ja_shadow_node *shadow_node;
+       struct cds_lfht_iter iter;
+       int ret, lockret;
+
+       flavor = cds_lfht_rcu_flavor(ht);
+       /*
+        * Read-side lock is needed to ensure hash table node existence
+        * vs concurrent resize.
+        */
+       flavor->read_lock();
+       cds_lfht_for_each_entry(ht, &iter, shadow_node, ht_node) {
+               lockret = pthread_mutex_lock(shadow_node->lock);
+               assert(!lockret);
+
+               ret = cds_lfht_del(ht, &shadow_node->ht_node);
+               if (ret)
+                       goto unlock;
+               if ((flags & RCUJA_SHADOW_CLEAR_FREE_NODE)
+                               && shadow_node->level) {
+                       if (flags & RCUJA_SHADOW_CLEAR_FREE_LOCK) {
+                               flavor->update_call_rcu(&shadow_node->head,
+                                       free_shadow_node_and_node_and_lock);
+                       } else {
+                               flavor->update_call_rcu(&shadow_node->head,
+                                       free_shadow_node_and_node);
+                       }
+               } else {
+                       if (flags & RCUJA_SHADOW_CLEAR_FREE_LOCK) {
+                               flavor->update_call_rcu(&shadow_node->head,
+                                       free_shadow_node_and_lock);
+                       } else {
+                               flavor->update_call_rcu(&shadow_node->head,
+                                       free_shadow_node);
+                       }
+               }
+       unlock:
+               lockret = pthread_mutex_unlock(shadow_node->lock);
+               assert(!lockret);
+       }
+       flavor->read_unlock();
+}
+
+__attribute__((visibility("protected")))
+struct cds_lfht *rcuja_create_ht(const struct rcu_flavor_struct *flavor)
+{
+       return _cds_lfht_new(1, 1, 0,
+               CDS_LFHT_AUTO_RESIZE | CDS_LFHT_ACCOUNTING,
+               NULL, flavor, NULL);
+}
+
+__attribute__((visibility("protected")))
+int rcuja_delete_ht(struct cds_lfht *ht)
+{
+       return cds_lfht_destroy(ht, NULL);
+}
+
+__attribute__((constructor))
+void rcuja_ht_init(void)
+{
+       hash_seed = (unsigned long) time(NULL);
+}
diff --git a/src/rcuja.c b/src/rcuja.c
new file mode 100644 (file)
index 0000000..92d8d9b
--- /dev/null
@@ -0,0 +1,2757 @@
+/*
+ * rcuja/rcuja.c
+ *
+ * Userspace RCU library - RCU Judy Array
+ *
+ * Copyright (C) 2000 - 2002 Hewlett-Packard Company
+ * Copyright 2012-2013 - Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#define _LGPL_SOURCE
+#include <stdint.h>
+#include <errno.h>
+#include <limits.h>
+#include <string.h>
+#include <assert.h>
+#include <urcu/rcuja.h>
+#include <urcu/compiler.h>
+#include <urcu/arch.h>
+#include <urcu-pointer.h>
+#include <urcu/uatomic.h>
+
+#include "rcuja-internal.h"
+
+#ifndef abs
+#define abs_int(a)     ((int) (a) > 0 ? (int) (a) : -((int) (a)))
+#endif
+
+enum cds_ja_type_class {
+       RCU_JA_LINEAR = 0,      /* Type A */
+                       /* 32-bit: 1 to 25 children, 8 to 128 bytes */
+                       /* 64-bit: 1 to 28 children, 16 to 256 bytes */
+       RCU_JA_POOL = 1,        /* Type B */
+                       /* 32-bit: 26 to 100 children, 256 to 512 bytes */
+                       /* 64-bit: 29 to 112 children, 512 to 1024 bytes */
+       RCU_JA_PIGEON = 2,      /* Type C */
+                       /* 32-bit: 101 to 256 children, 1024 bytes */
+                       /* 64-bit: 113 to 256 children, 2048 bytes */
+       /* Leaf nodes are implicit from their height in the tree */
+       RCU_JA_NR_TYPES,
+
+       RCU_JA_NULL,    /* not an encoded type, but keeps code regular */
+};
+
+struct cds_ja_type {
+       enum cds_ja_type_class type_class;
+       uint16_t min_child;             /* minimum number of children: 1 to 256 */
+       uint16_t max_child;             /* maximum number of children: 1 to 256 */
+       uint16_t max_linear_child;      /* per-pool max nr. children: 1 to 256 */
+       uint16_t order;                 /* node size is (1 << order), in bytes */
+       uint16_t nr_pool_order;         /* number of pools */
+       uint16_t pool_size_order;       /* pool size */
+};
+
+/*
+ * Iteration on the array to find the right node size for the number of
+ * children stops when it reaches .max_child == 256 (this is the largest
+ * possible node size, which contains 256 children).
+ * The min_child overlaps with the previous max_child to provide an
+ * hysteresis loop to reallocation for patterns of cyclic add/removal
+ * within the same node.
+ * The node the index within the following arrays is represented on 3
+ * bits. It identifies the node type, min/max number of children, and
+ * the size order.
+ * The max_child values for the RCU_JA_POOL below result from
+ * statistical approximation: over million populations, the max_child
+ * covers between 97% and 99% of the populations generated. Therefore, a
+ * fallback should exist to cover the rare extreme population unbalance
+ * cases, but it will not have a major impact on speed nor space
+ * consumption, since those are rare cases.
+ */
+
+#if (CAA_BITS_PER_LONG < 64)
+/* 32-bit pointers */
+enum {
+       ja_type_0_max_child = 1,
+       ja_type_1_max_child = 3,
+       ja_type_2_max_child = 6,
+       ja_type_3_max_child = 12,
+       ja_type_4_max_child = 25,
+       ja_type_5_max_child = 48,
+       ja_type_6_max_child = 92,
+       ja_type_7_max_child = 256,
+       ja_type_8_max_child = 0,        /* NULL */
+};
+
+enum {
+       ja_type_0_max_linear_child = 1,
+       ja_type_1_max_linear_child = 3,
+       ja_type_2_max_linear_child = 6,
+       ja_type_3_max_linear_child = 12,
+       ja_type_4_max_linear_child = 25,
+       ja_type_5_max_linear_child = 24,
+       ja_type_6_max_linear_child = 23,
+};
+
+enum {
+       ja_type_5_nr_pool_order = 1,
+       ja_type_6_nr_pool_order = 2,
+};
+
+const struct cds_ja_type ja_types[] = {
+       { .type_class = RCU_JA_LINEAR, .min_child = 1, .max_child = ja_type_0_max_child, .max_linear_child = ja_type_0_max_linear_child, .order = 3, },
+       { .type_class = RCU_JA_LINEAR, .min_child = 1, .max_child = ja_type_1_max_child, .max_linear_child = ja_type_1_max_linear_child, .order = 4, },
+       { .type_class = RCU_JA_LINEAR, .min_child = 3, .max_child = ja_type_2_max_child, .max_linear_child = ja_type_2_max_linear_child, .order = 5, },
+       { .type_class = RCU_JA_LINEAR, .min_child = 4, .max_child = ja_type_3_max_child, .max_linear_child = ja_type_3_max_linear_child, .order = 6, },
+       { .type_class = RCU_JA_LINEAR, .min_child = 10, .max_child = ja_type_4_max_child, .max_linear_child = ja_type_4_max_linear_child, .order = 7, },
+
+       /* Pools may fill sooner than max_child */
+       /* This pool is hardcoded at index 5. See ja_node_ptr(). */
+       { .type_class = RCU_JA_POOL, .min_child = 20, .max_child = ja_type_5_max_child, .max_linear_child = ja_type_5_max_linear_child, .order = 8, .nr_pool_order = ja_type_5_nr_pool_order, .pool_size_order = 7, },
+       /* This pool is hardcoded at index 6. See ja_node_ptr(). */
+       { .type_class = RCU_JA_POOL, .min_child = 45, .max_child = ja_type_6_max_child, .max_linear_child = ja_type_6_max_linear_child, .order = 9, .nr_pool_order = ja_type_6_nr_pool_order, .pool_size_order = 7, },
+
+       /*
+        * Upon node removal below min_child, if child pool is filled
+        * beyond capacity, we roll back to pigeon.
+        */
+       { .type_class = RCU_JA_PIGEON, .min_child = 83, .max_child = ja_type_7_max_child, .order = 10, },
+
+       { .type_class = RCU_JA_NULL, .min_child = 0, .max_child = ja_type_8_max_child, },
+};
+#else /* !(CAA_BITS_PER_LONG < 64) */
+/* 64-bit pointers */
+enum {
+       ja_type_0_max_child = 1,
+       ja_type_1_max_child = 3,
+       ja_type_2_max_child = 7,
+       ja_type_3_max_child = 14,
+       ja_type_4_max_child = 28,
+       ja_type_5_max_child = 54,
+       ja_type_6_max_child = 104,
+       ja_type_7_max_child = 256,
+       ja_type_8_max_child = 256,
+};
+
+enum {
+       ja_type_0_max_linear_child = 1,
+       ja_type_1_max_linear_child = 3,
+       ja_type_2_max_linear_child = 7,
+       ja_type_3_max_linear_child = 14,
+       ja_type_4_max_linear_child = 28,
+       ja_type_5_max_linear_child = 27,
+       ja_type_6_max_linear_child = 26,
+};
+
+enum {
+       ja_type_5_nr_pool_order = 1,
+       ja_type_6_nr_pool_order = 2,
+};
+
+const struct cds_ja_type ja_types[] = {
+       { .type_class = RCU_JA_LINEAR, .min_child = 1, .max_child = ja_type_0_max_child, .max_linear_child = ja_type_0_max_linear_child, .order = 4, },
+       { .type_class = RCU_JA_LINEAR, .min_child = 1, .max_child = ja_type_1_max_child, .max_linear_child = ja_type_1_max_linear_child, .order = 5, },
+       { .type_class = RCU_JA_LINEAR, .min_child = 3, .max_child = ja_type_2_max_child, .max_linear_child = ja_type_2_max_linear_child, .order = 6, },
+       { .type_class = RCU_JA_LINEAR, .min_child = 5, .max_child = ja_type_3_max_child, .max_linear_child = ja_type_3_max_linear_child, .order = 7, },
+       { .type_class = RCU_JA_LINEAR, .min_child = 10, .max_child = ja_type_4_max_child, .max_linear_child = ja_type_4_max_linear_child, .order = 8, },
+
+       /* Pools may fill sooner than max_child. */
+       /* This pool is hardcoded at index 5. See ja_node_ptr(). */
+       { .type_class = RCU_JA_POOL, .min_child = 22, .max_child = ja_type_5_max_child, .max_linear_child = ja_type_5_max_linear_child, .order = 9, .nr_pool_order = ja_type_5_nr_pool_order, .pool_size_order = 8, },
+       /* This pool is hardcoded at index 6. See ja_node_ptr(). */
+       { .type_class = RCU_JA_POOL, .min_child = 51, .max_child = ja_type_6_max_child, .max_linear_child = ja_type_6_max_linear_child, .order = 10, .nr_pool_order = ja_type_6_nr_pool_order, .pool_size_order = 8, },
+
+       /*
+        * Upon node removal below min_child, if child pool is filled
+        * beyond capacity, we roll back to pigeon.
+        */
+       { .type_class = RCU_JA_PIGEON, .min_child = 95, .max_child = ja_type_7_max_child, .order = 11, },
+
+       { .type_class = RCU_JA_NULL, .min_child = 0, .max_child = ja_type_8_max_child, },
+};
+#endif /* !(BITS_PER_LONG < 64) */
+
+static inline __attribute__((unused))
+void static_array_size_check(void)
+{
+       CAA_BUILD_BUG_ON(CAA_ARRAY_SIZE(ja_types) < JA_TYPE_MAX_NR);
+}
+
+/*
+ * The cds_ja_node contains the compressed node data needed for
+ * read-side. For linear and pool node configurations, it starts with a
+ * byte counting the number of children in the node.  Then, the
+ * node-specific data is placed.
+ * The node mutex, if any is needed, protecting concurrent updated of
+ * each node is placed in a separate hash table indexed by node address.
+ * For the pigeon configuration, the number of children is also kept in
+ * a separate hash table, indexed by node address, because it is only
+ * required for updates.
+ */
+
+#define DECLARE_LINEAR_NODE(index)                                                             \
+       struct {                                                                                \
+               uint8_t nr_child;                                                               \
+               uint8_t child_value[ja_type_## index ##_max_linear_child];                      \
+               struct cds_ja_inode_flag *child_ptr[ja_type_## index ##_max_linear_child];      \
+       }
+
+#define DECLARE_POOL_NODE(index)                                                               \
+       struct {                                                                                \
+               struct {                                                                        \
+                       uint8_t nr_child;                                                       \
+                       uint8_t child_value[ja_type_## index ##_max_linear_child];              \
+                       struct cds_ja_inode_flag *child_ptr[ja_type_## index ##_max_linear_child]; \
+               } linear[1U << ja_type_## index ##_nr_pool_order];                              \
+       }
+
+struct cds_ja_inode {
+       union {
+               /* Linear configuration */
+               DECLARE_LINEAR_NODE(0) conf_0;
+               DECLARE_LINEAR_NODE(1) conf_1;
+               DECLARE_LINEAR_NODE(2) conf_2;
+               DECLARE_LINEAR_NODE(3) conf_3;
+               DECLARE_LINEAR_NODE(4) conf_4;
+
+               /* Pool configuration */
+               DECLARE_POOL_NODE(5) conf_5;
+               DECLARE_POOL_NODE(6) conf_6;
+
+               /* Pigeon configuration */
+               struct {
+                       struct cds_ja_inode_flag *child[ja_type_7_max_child];
+               } conf_7;
+               /* data aliasing nodes for computed accesses */
+               uint8_t data[sizeof(struct cds_ja_inode_flag *) * ja_type_7_max_child];
+       } u;
+};
+
+enum ja_recompact {
+       JA_RECOMPACT_ADD_SAME,
+       JA_RECOMPACT_ADD_NEXT,
+       JA_RECOMPACT_DEL,
+};
+
+enum ja_lookup_inequality {
+       JA_LOOKUP_BE,
+       JA_LOOKUP_AE,
+};
+
+enum ja_direction {
+       JA_LEFT,
+       JA_RIGHT,
+       JA_LEFTMOST,
+       JA_RIGHTMOST,
+};
+
+static
+struct cds_ja_inode *_ja_node_mask_ptr(struct cds_ja_inode_flag *node)
+{
+       return (struct cds_ja_inode *) (((unsigned long) node) & JA_PTR_MASK);
+}
+
+unsigned long ja_node_type(struct cds_ja_inode_flag *node)
+{
+       unsigned long type;
+
+       if (_ja_node_mask_ptr(node) == NULL) {
+               return NODE_INDEX_NULL;
+       }
+       type = (unsigned int) ((unsigned long) node & JA_TYPE_MASK);
+       assert(type < (1UL << JA_TYPE_BITS));
+       return type;
+}
+
+static
+struct cds_ja_inode *alloc_cds_ja_node(struct cds_ja *ja,
+               const struct cds_ja_type *ja_type)
+{
+       size_t len = 1U << ja_type->order;
+       void *p;
+       int ret;
+
+       ret = posix_memalign(&p, len, len);
+       if (ret || !p) {
+               return NULL;
+       }
+       memset(p, 0, len);
+       if (ja_debug_counters())
+               uatomic_inc(&ja->nr_nodes_allocated);
+       return p;
+}
+
+void free_cds_ja_node(struct cds_ja *ja, struct cds_ja_inode *node)
+{
+       free(node);
+       if (ja_debug_counters() && node)
+               uatomic_inc(&ja->nr_nodes_freed);
+}
+
+#define __JA_ALIGN_MASK(v, mask)       (((v) + (mask)) & ~(mask))
+#define JA_ALIGN(v, align)             __JA_ALIGN_MASK(v, (typeof(v)) (align) - 1)
+#define __JA_FLOOR_MASK(v, mask)       ((v) & ~(mask))
+#define JA_FLOOR(v, align)             __JA_FLOOR_MASK(v, (typeof(v)) (align) - 1)
+
+static
+uint8_t *align_ptr_size(uint8_t *ptr)
+{
+       return (uint8_t *) JA_ALIGN((unsigned long) ptr, sizeof(void *));
+}
+
+static
+uint8_t ja_linear_node_get_nr_child(const struct cds_ja_type *type,
+               struct cds_ja_inode *node)
+{
+       assert(type->type_class == RCU_JA_LINEAR || type->type_class == RCU_JA_POOL);
+       return rcu_dereference(node->u.data[0]);
+}
+
+/*
+ * The order in which values and pointers are does does not matter: if
+ * a value is missing, we return NULL. If a value is there, but its
+ * associated pointers is still NULL, we return NULL too.
+ */
+static
+struct cds_ja_inode_flag *ja_linear_node_get_nth(const struct cds_ja_type *type,
+               struct cds_ja_inode *node,
+               struct cds_ja_inode_flag ***node_flag_ptr,
+               uint8_t n)
+{
+       uint8_t nr_child;
+       uint8_t *values;
+       struct cds_ja_inode_flag **pointers;
+       struct cds_ja_inode_flag *ptr;
+       unsigned int i;
+
+       assert(type->type_class == RCU_JA_LINEAR || type->type_class == RCU_JA_POOL);
+
+       nr_child = ja_linear_node_get_nr_child(type, node);
+       cmm_smp_rmb();  /* read nr_child before values and pointers */
+       assert(nr_child <= type->max_linear_child);
+       assert(type->type_class != RCU_JA_LINEAR || nr_child >= type->min_child);
+
+       values = &node->u.data[1];
+       for (i = 0; i < nr_child; i++) {
+               if (CMM_LOAD_SHARED(values[i]) == n)
+                       break;
+       }
+       if (i >= nr_child) {
+               if (caa_unlikely(node_flag_ptr))
+                       *node_flag_ptr = NULL;
+               return NULL;
+       }
+       pointers = (struct cds_ja_inode_flag **) align_ptr_size(&values[type->max_linear_child]);
+       ptr = rcu_dereference(pointers[i]);
+       if (caa_unlikely(node_flag_ptr))
+               *node_flag_ptr = &pointers[i];
+       return ptr;
+}
+
+static
+struct cds_ja_inode_flag *ja_linear_node_get_direction(const struct cds_ja_type *type,
+               struct cds_ja_inode *node,
+               int n, uint8_t *result_key,
+               enum ja_direction dir)
+{
+       uint8_t nr_child;
+       uint8_t *values;
+       struct cds_ja_inode_flag **pointers;
+       struct cds_ja_inode_flag *ptr, *match_ptr = NULL;
+       unsigned int i;
+       int match_v;
+
+       assert(type->type_class == RCU_JA_LINEAR || type->type_class == RCU_JA_POOL);
+       assert(dir == JA_LEFT || dir == JA_RIGHT);
+
+       if (dir == JA_LEFT) {
+               match_v = -1;
+       } else {
+               match_v = JA_ENTRY_PER_NODE;
+       }
+
+       nr_child = ja_linear_node_get_nr_child(type, node);
+       cmm_smp_rmb();  /* read nr_child before values and pointers */
+       assert(nr_child <= type->max_linear_child);
+       assert(type->type_class != RCU_JA_LINEAR || nr_child >= type->min_child);
+
+       values = &node->u.data[1];
+       pointers = (struct cds_ja_inode_flag **) align_ptr_size(&values[type->max_linear_child]);
+       for (i = 0; i < nr_child; i++) {
+               unsigned int v;
+
+               v = CMM_LOAD_SHARED(values[i]);
+               ptr = CMM_LOAD_SHARED(pointers[i]);
+               if (!ptr)
+                       continue;
+               if (dir == JA_LEFT) {
+                       if ((int) v < n && (int) v > match_v) {
+                               match_v = v;
+                               match_ptr = ptr;
+                       }
+               } else {
+                       if ((int) v > n && (int) v < match_v) {
+                               match_v = v;
+                               match_ptr = ptr;
+                       }
+               }
+       }
+
+       if (!match_ptr) {
+               return NULL;
+       }
+       assert(match_v >= 0 && match_v < JA_ENTRY_PER_NODE);
+
+       *result_key = (uint8_t) match_v;
+       return rcu_dereference(match_ptr);
+}
+
+static
+void ja_linear_node_get_ith_pos(const struct cds_ja_type *type,
+               struct cds_ja_inode *node,
+               uint8_t i,
+               uint8_t *v,
+               struct cds_ja_inode_flag **iter)
+{
+       uint8_t *values;
+       struct cds_ja_inode_flag **pointers;
+
+       assert(type->type_class == RCU_JA_LINEAR || type->type_class == RCU_JA_POOL);
+       assert(i < ja_linear_node_get_nr_child(type, node));
+
+       values = &node->u.data[1];
+       *v = values[i];
+       pointers = (struct cds_ja_inode_flag **) align_ptr_size(&values[type->max_linear_child]);
+       *iter = pointers[i];
+}
+
+static
+struct cds_ja_inode_flag *ja_pool_node_get_nth(const struct cds_ja_type *type,
+               struct cds_ja_inode *node,
+               struct cds_ja_inode_flag *node_flag,
+               struct cds_ja_inode_flag ***node_flag_ptr,
+               uint8_t n)
+{
+       struct cds_ja_inode *linear;
+
+       assert(type->type_class == RCU_JA_POOL);
+
+       switch (type->nr_pool_order) {
+       case 1:
+       {
+               unsigned long bitsel, index;
+
+               bitsel = ja_node_pool_1d_bitsel(node_flag);
+               assert(bitsel < CHAR_BIT);
+               index = ((unsigned long) n >> bitsel) & 0x1;
+               linear = (struct cds_ja_inode *) &node->u.data[index << type->pool_size_order];
+               break;
+       }
+       case 2:
+       {
+               unsigned long bitsel[2], index[2], rindex;
+
+               ja_node_pool_2d_bitsel(node_flag, bitsel);
+               assert(bitsel[0] < CHAR_BIT);
+               assert(bitsel[1] < CHAR_BIT);
+               index[0] = ((unsigned long) n >> bitsel[0]) & 0x1;
+               index[0] <<= 1;
+               index[1] = ((unsigned long) n >> bitsel[1]) & 0x1;
+               rindex = index[0] | index[1];
+               linear = (struct cds_ja_inode *) &node->u.data[rindex << type->pool_size_order];
+               break;
+       }
+       default:
+               linear = NULL;
+               assert(0);
+       }
+       return ja_linear_node_get_nth(type, linear, node_flag_ptr, n);
+}
+
+static
+struct cds_ja_inode *ja_pool_node_get_ith_pool(const struct cds_ja_type *type,
+               struct cds_ja_inode *node,
+               uint8_t i)
+{
+       assert(type->type_class == RCU_JA_POOL);
+       return (struct cds_ja_inode *)
+               &node->u.data[(unsigned int) i << type->pool_size_order];
+}
+
+static
+struct cds_ja_inode_flag *ja_pool_node_get_direction(const struct cds_ja_type *type,
+               struct cds_ja_inode *node,
+               int n, uint8_t *result_key,
+               enum ja_direction dir)
+{
+       unsigned int pool_nr;
+       int match_v;
+       struct cds_ja_inode_flag *match_node_flag = NULL;
+
+       assert(type->type_class == RCU_JA_POOL);
+       assert(dir == JA_LEFT || dir == JA_RIGHT);
+
+       if (dir == JA_LEFT) {
+               match_v = -1;
+       } else {
+               match_v = JA_ENTRY_PER_NODE;
+       }
+
+       for (pool_nr = 0; pool_nr < (1U << type->nr_pool_order); pool_nr++) {
+               struct cds_ja_inode *pool =
+                       ja_pool_node_get_ith_pool(type,
+                               node, pool_nr);
+               uint8_t nr_child =
+                       ja_linear_node_get_nr_child(type, pool);
+               unsigned int j;
+
+               for (j = 0; j < nr_child; j++) {
+                       struct cds_ja_inode_flag *iter;
+                       uint8_t v;
+
+                       ja_linear_node_get_ith_pos(type, pool,
+                                       j, &v, &iter);
+                       if (!iter)
+                               continue;
+                       if (dir == JA_LEFT) {
+                               if ((int) v < n && (int) v > match_v) {
+                                       match_v = v;
+                                       match_node_flag = iter;
+                               }
+                       } else {
+                               if ((int) v > n && (int) v < match_v) {
+                                       match_v = v;
+                                       match_node_flag = iter;
+                               }
+                       }
+               }
+       }
+       if (match_node_flag)
+               *result_key = (uint8_t) match_v;
+       return match_node_flag;
+}
+
+static
+struct cds_ja_inode_flag *ja_pigeon_node_get_nth(const struct cds_ja_type *type,
+               struct cds_ja_inode *node,
+               struct cds_ja_inode_flag ***node_flag_ptr,
+               uint8_t n)
+{
+       struct cds_ja_inode_flag **child_node_flag_ptr;
+       struct cds_ja_inode_flag *child_node_flag;
+
+       assert(type->type_class == RCU_JA_PIGEON);
+       child_node_flag_ptr = &((struct cds_ja_inode_flag **) node->u.data)[n];
+       child_node_flag = rcu_dereference(*child_node_flag_ptr);
+       dbg_printf("ja_pigeon_node_get_nth child_node_flag_ptr %p\n",
+               child_node_flag_ptr);
+       if (caa_unlikely(node_flag_ptr))
+               *node_flag_ptr = child_node_flag_ptr;
+       return child_node_flag;
+}
+
+static
+struct cds_ja_inode_flag *ja_pigeon_node_get_direction(const struct cds_ja_type *type,
+               struct cds_ja_inode *node,
+               int n, uint8_t *result_key,
+               enum ja_direction dir)
+{
+       struct cds_ja_inode_flag **child_node_flag_ptr;
+       struct cds_ja_inode_flag *child_node_flag;
+       int i;
+
+       assert(type->type_class == RCU_JA_PIGEON);
+       assert(dir == JA_LEFT || dir == JA_RIGHT);
+
+       if (dir == JA_LEFT) {
+               /* n - 1 is first value left of n */
+               for (i = n - 1; i >= 0; i--) {
+                       child_node_flag_ptr = &((struct cds_ja_inode_flag **) node->u.data)[i];
+                       child_node_flag = rcu_dereference(*child_node_flag_ptr);
+                       if (child_node_flag) {
+                               dbg_printf("ja_pigeon_node_get_left child_node_flag %p\n",
+                                       child_node_flag);
+                               *result_key = (uint8_t) i;
+                               return child_node_flag;
+                       }
+               }
+       } else {
+               /* n + 1 is first value right of n */
+               for (i = n + 1; i < JA_ENTRY_PER_NODE; i++) {
+                       child_node_flag_ptr = &((struct cds_ja_inode_flag **) node->u.data)[i];
+                       child_node_flag = rcu_dereference(*child_node_flag_ptr);
+                       if (child_node_flag) {
+                               dbg_printf("ja_pigeon_node_get_right child_node_flag %p\n",
+                                       child_node_flag);
+                               *result_key = (uint8_t) i;
+                               return child_node_flag;
+                       }
+               }
+       }
+       return NULL;
+}
+
+static
+struct cds_ja_inode_flag *ja_pigeon_node_get_ith_pos(const struct cds_ja_type *type,
+               struct cds_ja_inode *node,
+               uint8_t i)
+{
+       return ja_pigeon_node_get_nth(type, node, NULL, i);
+}
+
+/*
+ * ja_node_get_nth: get nth item from a node.
+ * node_flag is already rcu_dereference'd.
+ */
+static
+struct cds_ja_inode_flag *ja_node_get_nth(struct cds_ja_inode_flag *node_flag,
+               struct cds_ja_inode_flag ***node_flag_ptr,
+               uint8_t n)
+{
+       unsigned int type_index;
+       struct cds_ja_inode *node;
+       const struct cds_ja_type *type;
+
+       node = ja_node_ptr(node_flag);
+       assert(node != NULL);
+       type_index = ja_node_type(node_flag);
+       type = &ja_types[type_index];
+
+       switch (type->type_class) {
+       case RCU_JA_LINEAR:
+               return ja_linear_node_get_nth(type, node,
+                               node_flag_ptr, n);
+       case RCU_JA_POOL:
+               return ja_pool_node_get_nth(type, node, node_flag,
+                               node_flag_ptr, n);
+       case RCU_JA_PIGEON:
+               return ja_pigeon_node_get_nth(type, node,
+                               node_flag_ptr, n);
+       default:
+               assert(0);
+               return (void *) -1UL;
+       }
+}
+
+static
+struct cds_ja_inode_flag *ja_node_get_direction(struct cds_ja_inode_flag *node_flag,
+               int n, uint8_t *result_key,
+               enum ja_direction dir)
+{
+       unsigned int type_index;
+       struct cds_ja_inode *node;
+       const struct cds_ja_type *type;
+
+       node = ja_node_ptr(node_flag);
+       assert(node != NULL);
+       type_index = ja_node_type(node_flag);
+       type = &ja_types[type_index];
+
+       switch (type->type_class) {
+       case RCU_JA_LINEAR:
+               return ja_linear_node_get_direction(type, node, n, result_key, dir);
+       case RCU_JA_POOL:
+               return ja_pool_node_get_direction(type, node, n, result_key, dir);
+       case RCU_JA_PIGEON:
+               return ja_pigeon_node_get_direction(type, node, n, result_key, dir);
+       default:
+               assert(0);
+               return (void *) -1UL;
+       }
+}
+
+static
+struct cds_ja_inode_flag *ja_node_get_leftright(struct cds_ja_inode_flag *node_flag,
+               unsigned int n, uint8_t *result_key,
+               enum ja_direction dir)
+{
+       return ja_node_get_direction(node_flag, n, result_key, dir);
+}
+
+static
+struct cds_ja_inode_flag *ja_node_get_minmax(struct cds_ja_inode_flag *node_flag,
+               uint8_t *result_key,
+               enum ja_direction dir)
+{
+       switch (dir) {
+       case JA_LEFTMOST:
+               return ja_node_get_direction(node_flag,
+                               -1, result_key, JA_RIGHT);
+       case JA_RIGHTMOST:
+               return ja_node_get_direction(node_flag,
+                               JA_ENTRY_PER_NODE, result_key, JA_LEFT);
+       default:
+               assert(0);
+       }
+}
+
+static
+int ja_linear_node_set_nth(const struct cds_ja_type *type,
+               struct cds_ja_inode *node,
+               struct cds_ja_shadow_node *shadow_node,
+               uint8_t n,
+               struct cds_ja_inode_flag *child_node_flag)
+{
+       uint8_t nr_child;
+       uint8_t *values, *nr_child_ptr;
+       struct cds_ja_inode_flag **pointers;
+       unsigned int i, unused = 0;
+
+       assert(type->type_class == RCU_JA_LINEAR || type->type_class == RCU_JA_POOL);
+
+       nr_child_ptr = &node->u.data[0];
+       dbg_printf("linear set nth: n %u, nr_child_ptr %p\n",
+               (unsigned int) n, nr_child_ptr);
+       nr_child = *nr_child_ptr;
+       assert(nr_child <= type->max_linear_child);
+
+       values = &node->u.data[1];
+       pointers = (struct cds_ja_inode_flag **) align_ptr_size(&values[type->max_linear_child]);
+       /* Check if node value is already populated */
+       for (i = 0; i < nr_child; i++) {
+               if (values[i] == n) {
+                       if (pointers[i])
+                               return -EEXIST;
+                       else
+                               break;
+               } else {
+                       if (!pointers[i])
+                               unused++;
+               }
+       }
+       if (i == nr_child && nr_child >= type->max_linear_child) {
+               if (unused)
+                       return -ERANGE; /* recompact node */
+               else
+                       return -ENOSPC; /* No space left in this node type */
+       }
+
+       assert(pointers[i] == NULL);
+       rcu_assign_pointer(pointers[i], child_node_flag);
+       /* If we expanded the nr_child, increment it */
+       if (i == nr_child) {
+               CMM_STORE_SHARED(values[nr_child], n);
+               /* write pointer and value before nr_child */
+               cmm_smp_wmb();
+               CMM_STORE_SHARED(*nr_child_ptr, nr_child + 1);
+       }
+       shadow_node->nr_child++;
+       dbg_printf("linear set nth: %u child, shadow: %u child, for node %p shadow %p\n",
+               (unsigned int) CMM_LOAD_SHARED(*nr_child_ptr),
+               (unsigned int) shadow_node->nr_child,
+               node, shadow_node);
+
+       return 0;
+}
+
+static
+int ja_pool_node_set_nth(const struct cds_ja_type *type,
+               struct cds_ja_inode *node,
+               struct cds_ja_inode_flag *node_flag,
+               struct cds_ja_shadow_node *shadow_node,
+               uint8_t n,
+               struct cds_ja_inode_flag *child_node_flag)
+{
+       struct cds_ja_inode *linear;
+
+       assert(type->type_class == RCU_JA_POOL);
+
+       switch (type->nr_pool_order) {
+       case 1:
+       {
+               unsigned long bitsel, index;
+
+               bitsel = ja_node_pool_1d_bitsel(node_flag);
+               assert(bitsel < CHAR_BIT);
+               index = ((unsigned long) n >> bitsel) & 0x1;
+               linear = (struct cds_ja_inode *) &node->u.data[index << type->pool_size_order];
+               break;
+       }
+       case 2:
+       {
+               unsigned long bitsel[2], index[2], rindex;
+
+               ja_node_pool_2d_bitsel(node_flag, bitsel);
+               assert(bitsel[0] < CHAR_BIT);
+               assert(bitsel[1] < CHAR_BIT);
+               index[0] = ((unsigned long) n >> bitsel[0]) & 0x1;
+               index[0] <<= 1;
+               index[1] = ((unsigned long) n >> bitsel[1]) & 0x1;
+               rindex = index[0] | index[1];
+               linear = (struct cds_ja_inode *) &node->u.data[rindex << type->pool_size_order];
+               break;
+       }
+       default:
+               linear = NULL;
+               assert(0);
+       }
+
+       return ja_linear_node_set_nth(type, linear, shadow_node,
+                       n, child_node_flag);
+}
+
+static
+int ja_pigeon_node_set_nth(const struct cds_ja_type *type,
+               struct cds_ja_inode *node,
+               struct cds_ja_shadow_node *shadow_node,
+               uint8_t n,
+               struct cds_ja_inode_flag *child_node_flag)
+{
+       struct cds_ja_inode_flag **ptr;
+
+       assert(type->type_class == RCU_JA_PIGEON);
+       ptr = &((struct cds_ja_inode_flag **) node->u.data)[n];
+       if (*ptr)
+               return -EEXIST;
+       rcu_assign_pointer(*ptr, child_node_flag);
+       shadow_node->nr_child++;
+       return 0;
+}
+
+/*
+ * _ja_node_set_nth: set nth item within a node. Return an error
+ * (negative error value) if it is already there.
+ */
+static
+int _ja_node_set_nth(const struct cds_ja_type *type,
+               struct cds_ja_inode *node,
+               struct cds_ja_inode_flag *node_flag,
+               struct cds_ja_shadow_node *shadow_node,
+               uint8_t n,
+               struct cds_ja_inode_flag *child_node_flag)
+{
+       switch (type->type_class) {
+       case RCU_JA_LINEAR:
+               return ja_linear_node_set_nth(type, node, shadow_node, n,
+                               child_node_flag);
+       case RCU_JA_POOL:
+               return ja_pool_node_set_nth(type, node, node_flag, shadow_node, n,
+                               child_node_flag);
+       case RCU_JA_PIGEON:
+               return ja_pigeon_node_set_nth(type, node, shadow_node, n,
+                               child_node_flag);
+       case RCU_JA_NULL:
+               return -ENOSPC;
+       default:
+               assert(0);
+               return -EINVAL;
+       }
+
+       return 0;
+}
+
+static
+int ja_linear_node_clear_ptr(const struct cds_ja_type *type,
+               struct cds_ja_inode *node,
+               struct cds_ja_shadow_node *shadow_node,
+               struct cds_ja_inode_flag **node_flag_ptr)
+{
+       uint8_t nr_child;
+       uint8_t *nr_child_ptr;
+
+       assert(type->type_class == RCU_JA_LINEAR || type->type_class == RCU_JA_POOL);
+
+       nr_child_ptr = &node->u.data[0];
+       nr_child = *nr_child_ptr;
+       assert(nr_child <= type->max_linear_child);
+
+       if (type->type_class == RCU_JA_LINEAR) {
+               assert(!shadow_node->fallback_removal_count);
+               if (shadow_node->nr_child <= type->min_child) {
+                       /* We need to try recompacting the node */
+                       return -EFBIG;
+               }
+       }
+       dbg_printf("linear clear ptr: nr_child_ptr %p\n", nr_child_ptr);
+       assert(*node_flag_ptr != NULL);
+       rcu_assign_pointer(*node_flag_ptr, NULL);
+       /*
+        * Value and nr_child are never changed (would cause ABA issue).
+        * Instead, we leave the pointer to NULL and recompact the node
+        * once in a while. It is allowed to set a NULL pointer to a new
+        * value without recompaction though.
+        * Only update the shadow node accounting.
+        */
+       shadow_node->nr_child--;
+       dbg_printf("linear clear ptr: %u child, shadow: %u child, for node %p shadow %p\n",
+               (unsigned int) CMM_LOAD_SHARED(*nr_child_ptr),
+               (unsigned int) shadow_node->nr_child,
+               node, shadow_node);
+       return 0;
+}
+
+static
+int ja_pool_node_clear_ptr(const struct cds_ja_type *type,
+               struct cds_ja_inode *node,
+               struct cds_ja_inode_flag *node_flag,
+               struct cds_ja_shadow_node *shadow_node,
+               struct cds_ja_inode_flag **node_flag_ptr,
+               uint8_t n)
+{
+       struct cds_ja_inode *linear;
+
+       assert(type->type_class == RCU_JA_POOL);
+
+       if (shadow_node->fallback_removal_count) {
+               shadow_node->fallback_removal_count--;
+       } else {
+               /* We should try recompacting the node */
+               if (shadow_node->nr_child <= type->min_child)
+                       return -EFBIG;
+       }
+
+       switch (type->nr_pool_order) {
+       case 1:
+       {
+               unsigned long bitsel, index;
+
+               bitsel = ja_node_pool_1d_bitsel(node_flag);
+               assert(bitsel < CHAR_BIT);
+               index = ((unsigned long) n >> bitsel) & type->nr_pool_order;
+               linear = (struct cds_ja_inode *) &node->u.data[index << type->pool_size_order];
+               break;
+       }
+       case 2:
+       {
+               unsigned long bitsel[2], index[2], rindex;
+
+               ja_node_pool_2d_bitsel(node_flag, bitsel);
+               assert(bitsel[0] < CHAR_BIT);
+               assert(bitsel[1] < CHAR_BIT);
+               index[0] = ((unsigned long) n >> bitsel[0]) & 0x1;
+               index[0] <<= 1;
+               index[1] = ((unsigned long) n >> bitsel[1]) & 0x1;
+               rindex = index[0] | index[1];
+               linear = (struct cds_ja_inode *) &node->u.data[rindex << type->pool_size_order];
+               break;
+       }
+       default:
+               linear = NULL;
+               assert(0);
+       }
+
+       return ja_linear_node_clear_ptr(type, linear, shadow_node, node_flag_ptr);
+}
+
+static
+int ja_pigeon_node_clear_ptr(const struct cds_ja_type *type,
+               struct cds_ja_inode *node,
+               struct cds_ja_shadow_node *shadow_node,
+               struct cds_ja_inode_flag **node_flag_ptr)
+{
+       assert(type->type_class == RCU_JA_PIGEON);
+
+       if (shadow_node->fallback_removal_count) {
+               shadow_node->fallback_removal_count--;
+       } else {
+               /* We should try recompacting the node */
+               if (shadow_node->nr_child <= type->min_child)
+                       return -EFBIG;
+       }
+       dbg_printf("ja_pigeon_node_clear_ptr: clearing ptr: %p\n", *node_flag_ptr);
+       rcu_assign_pointer(*node_flag_ptr, NULL);
+       shadow_node->nr_child--;
+       return 0;
+}
+
+/*
+ * _ja_node_clear_ptr: clear ptr item within a node. Return an error
+ * (negative error value) if it is not found (-ENOENT).
+ */
+static
+int _ja_node_clear_ptr(const struct cds_ja_type *type,
+               struct cds_ja_inode *node,
+               struct cds_ja_inode_flag *node_flag,
+               struct cds_ja_shadow_node *shadow_node,
+               struct cds_ja_inode_flag **node_flag_ptr,
+               uint8_t n)
+{
+       switch (type->type_class) {
+       case RCU_JA_LINEAR:
+               return ja_linear_node_clear_ptr(type, node, shadow_node, node_flag_ptr);
+       case RCU_JA_POOL:
+               return ja_pool_node_clear_ptr(type, node, node_flag, shadow_node, node_flag_ptr, n);
+       case RCU_JA_PIGEON:
+               return ja_pigeon_node_clear_ptr(type, node, shadow_node, node_flag_ptr);
+       case RCU_JA_NULL:
+               return -ENOENT;
+       default:
+               assert(0);
+               return -EINVAL;
+       }
+
+       return 0;
+}
+
+/*
+ * Calculate bit distribution. Returns the bit (0 to 7) that splits the
+ * distribution in two sub-distributions containing as much elements one
+ * compared to the other.
+ */
+static
+unsigned int ja_node_sum_distribution_1d(enum ja_recompact mode,
+               struct cds_ja *ja,
+               unsigned int type_index,
+               const struct cds_ja_type *type,
+               struct cds_ja_inode *node,
+               struct cds_ja_shadow_node *shadow_node,
+               uint8_t n,
+               struct cds_ja_inode_flag *child_node_flag,
+               struct cds_ja_inode_flag **nullify_node_flag_ptr)
+{
+       uint8_t nr_one[JA_BITS_PER_BYTE];
+       unsigned int bitsel = 0, bit_i, overall_best_distance = UINT_MAX;
+       unsigned int distrib_nr_child = 0;
+
+       memset(nr_one, 0, sizeof(nr_one));
+
+       switch (type->type_class) {
+       case RCU_JA_LINEAR:
+       {
+               uint8_t nr_child =
+                       ja_linear_node_get_nr_child(type, node);
+               unsigned int i;
+
+               for (i = 0; i < nr_child; i++) {
+                       struct cds_ja_inode_flag *iter;
+                       uint8_t v;
+
+                       ja_linear_node_get_ith_pos(type, node, i, &v, &iter);
+                       if (!iter)
+                               continue;
+                       if (mode == JA_RECOMPACT_DEL && *nullify_node_flag_ptr == iter)
+                               continue;
+                       for (bit_i = 0; bit_i < JA_BITS_PER_BYTE; bit_i++) {
+                               if (v & (1U << bit_i))
+                                       nr_one[bit_i]++;
+                       }
+                       distrib_nr_child++;
+               }
+               break;
+       }
+       case RCU_JA_POOL:
+       {
+               unsigned int pool_nr;
+
+               for (pool_nr = 0; pool_nr < (1U << type->nr_pool_order); pool_nr++) {
+                       struct cds_ja_inode *pool =
+                               ja_pool_node_get_ith_pool(type,
+                                       node, pool_nr);
+                       uint8_t nr_child =
+                               ja_linear_node_get_nr_child(type, pool);
+                       unsigned int j;
+
+                       for (j = 0; j < nr_child; j++) {
+                               struct cds_ja_inode_flag *iter;
+                               uint8_t v;
+
+                               ja_linear_node_get_ith_pos(type, pool,
+                                               j, &v, &iter);
+                               if (!iter)
+                                       continue;
+                               if (mode == JA_RECOMPACT_DEL && *nullify_node_flag_ptr == iter)
+                                       continue;
+                               for (bit_i = 0; bit_i < JA_BITS_PER_BYTE; bit_i++) {
+                                       if (v & (1U << bit_i))
+                                               nr_one[bit_i]++;
+                               }
+                               distrib_nr_child++;
+                       }
+               }
+               break;
+       }
+       case RCU_JA_PIGEON:
+       {
+               unsigned int i;
+
+               assert(mode == JA_RECOMPACT_DEL);
+               for (i = 0; i < JA_ENTRY_PER_NODE; i++) {
+                       struct cds_ja_inode_flag *iter;
+
+                       iter = ja_pigeon_node_get_ith_pos(type, node, i);
+                       if (!iter)
+                               continue;
+                       if (mode == JA_RECOMPACT_DEL && *nullify_node_flag_ptr == iter)
+                               continue;
+                       for (bit_i = 0; bit_i < JA_BITS_PER_BYTE; bit_i++) {
+                               if (i & (1U << bit_i))
+                                       nr_one[bit_i]++;
+                       }
+                       distrib_nr_child++;
+               }
+               break;
+       }
+       case RCU_JA_NULL:
+               assert(mode == JA_RECOMPACT_ADD_NEXT);
+               break;
+       default:
+               assert(0);
+               break;
+       }
+
+       if (mode == JA_RECOMPACT_ADD_NEXT || mode == JA_RECOMPACT_ADD_SAME) {
+               for (bit_i = 0; bit_i < JA_BITS_PER_BYTE; bit_i++) {
+                       if (n & (1U << bit_i))
+                               nr_one[bit_i]++;
+               }
+               distrib_nr_child++;
+       }
+
+       /*
+        * The best bit selector is that for which the number of ones is
+        * closest to half of the number of children in the
+        * distribution. We calculate the distance using the double of
+        * the sub-distribution sizes to eliminate truncation error.
+        */
+       for (bit_i = 0; bit_i < JA_BITS_PER_BYTE; bit_i++) {
+               unsigned int distance_to_best;
+
+               distance_to_best = abs_int(((unsigned int) nr_one[bit_i] << 1U) - distrib_nr_child);
+               if (distance_to_best < overall_best_distance) {
+                       overall_best_distance = distance_to_best;
+                       bitsel = bit_i;
+               }
+       }
+       dbg_printf("1 dimension pool bit selection: (%u)\n", bitsel);
+       return bitsel;
+}
+
+/*
+ * Calculate bit distribution in two dimensions. Returns the two bits
+ * (each 0 to 7) that splits the distribution in four sub-distributions
+ * containing as much elements one compared to the other.
+ */
+static
+void ja_node_sum_distribution_2d(enum ja_recompact mode,
+               struct cds_ja *ja,
+               unsigned int type_index,
+               const struct cds_ja_type *type,
+               struct cds_ja_inode *node,
+               struct cds_ja_shadow_node *shadow_node,
+               uint8_t n,
+               struct cds_ja_inode_flag *child_node_flag,
+               struct cds_ja_inode_flag **nullify_node_flag_ptr,
+               unsigned int *_bitsel)
+{
+       uint8_t nr_2d_11[JA_BITS_PER_BYTE][JA_BITS_PER_BYTE],
+               nr_2d_10[JA_BITS_PER_BYTE][JA_BITS_PER_BYTE],
+               nr_2d_01[JA_BITS_PER_BYTE][JA_BITS_PER_BYTE],
+               nr_2d_00[JA_BITS_PER_BYTE][JA_BITS_PER_BYTE];
+       unsigned int bitsel[2] = { 0, 1 };
+       unsigned int bit_i, bit_j;
+       int overall_best_distance = INT_MAX;
+       unsigned int distrib_nr_child = 0;
+
+       memset(nr_2d_11, 0, sizeof(nr_2d_11));
+       memset(nr_2d_10, 0, sizeof(nr_2d_10));
+       memset(nr_2d_01, 0, sizeof(nr_2d_01));
+       memset(nr_2d_00, 0, sizeof(nr_2d_00));
+
+       switch (type->type_class) {
+       case RCU_JA_LINEAR:
+       {
+               uint8_t nr_child =
+                       ja_linear_node_get_nr_child(type, node);
+               unsigned int i;
+
+               for (i = 0; i < nr_child; i++) {
+                       struct cds_ja_inode_flag *iter;
+                       uint8_t v;
+
+                       ja_linear_node_get_ith_pos(type, node, i, &v, &iter);
+                       if (!iter)
+                               continue;
+                       if (mode == JA_RECOMPACT_DEL && *nullify_node_flag_ptr == iter)
+                               continue;
+                       for (bit_i = 0; bit_i < JA_BITS_PER_BYTE; bit_i++) {
+                               for (bit_j = 0; bit_j < bit_i; bit_j++) {
+                                       if (v & (1U << bit_i)) {
+                                               if (v & (1U << bit_j)) {
+                                                       nr_2d_11[bit_i][bit_j]++;
+                                               } else {
+                                                       nr_2d_10[bit_i][bit_j]++;
+                                               }
+                                       } else {
+                                               if (v & (1U << bit_j)) {
+                                                       nr_2d_01[bit_i][bit_j]++;
+                                               } else {
+                                                       nr_2d_00[bit_i][bit_j]++;
+                                               }
+                                       }
+                               }
+                       }
+                       distrib_nr_child++;
+               }
+               break;
+       }
+       case RCU_JA_POOL:
+       {
+               unsigned int pool_nr;
+
+               for (pool_nr = 0; pool_nr < (1U << type->nr_pool_order); pool_nr++) {
+                       struct cds_ja_inode *pool =
+                               ja_pool_node_get_ith_pool(type,
+                                       node, pool_nr);
+                       uint8_t nr_child =
+                               ja_linear_node_get_nr_child(type, pool);
+                       unsigned int j;
+
+                       for (j = 0; j < nr_child; j++) {
+                               struct cds_ja_inode_flag *iter;
+                               uint8_t v;
+
+                               ja_linear_node_get_ith_pos(type, pool,
+                                               j, &v, &iter);
+                               if (!iter)
+                                       continue;
+                               if (mode == JA_RECOMPACT_DEL && *nullify_node_flag_ptr == iter)
+                                       continue;
+                               for (bit_i = 0; bit_i < JA_BITS_PER_BYTE; bit_i++) {
+                                       for (bit_j = 0; bit_j < bit_i; bit_j++) {
+                                               if (v & (1U << bit_i)) {
+                                                       if (v & (1U << bit_j)) {
+                                                               nr_2d_11[bit_i][bit_j]++;
+                                                       } else {
+                                                               nr_2d_10[bit_i][bit_j]++;
+                                                       }
+                                               } else {
+                                                       if (v & (1U << bit_j)) {
+                                                               nr_2d_01[bit_i][bit_j]++;
+                                                       } else {
+                                                               nr_2d_00[bit_i][bit_j]++;
+                                                       }
+                                               }
+                                       }
+                               }
+                               distrib_nr_child++;
+                       }
+               }
+               break;
+       }
+       case RCU_JA_PIGEON:
+       {
+               unsigned int i;
+
+               assert(mode == JA_RECOMPACT_DEL);
+               for (i = 0; i < JA_ENTRY_PER_NODE; i++) {
+                       struct cds_ja_inode_flag *iter;
+
+                       iter = ja_pigeon_node_get_ith_pos(type, node, i);
+                       if (!iter)
+                               continue;
+                       if (mode == JA_RECOMPACT_DEL && *nullify_node_flag_ptr == iter)
+                               continue;
+                       for (bit_i = 0; bit_i < JA_BITS_PER_BYTE; bit_i++) {
+                               for (bit_j = 0; bit_j < bit_i; bit_j++) {
+                                       if (i & (1U << bit_i)) {
+                                               if (i & (1U << bit_j)) {
+                                                       nr_2d_11[bit_i][bit_j]++;
+                                               } else {
+                                                       nr_2d_10[bit_i][bit_j]++;
+                                               }
+                                       } else {
+                                               if (i & (1U << bit_j)) {
+                                                       nr_2d_01[bit_i][bit_j]++;
+                                               } else {
+                                                       nr_2d_00[bit_i][bit_j]++;
+                                               }
+                                       }
+                               }
+                       }
+                       distrib_nr_child++;
+               }
+               break;
+       }
+       case RCU_JA_NULL:
+               assert(mode == JA_RECOMPACT_ADD_NEXT);
+               break;
+       default:
+               assert(0);
+               break;
+       }
+
+       if (mode == JA_RECOMPACT_ADD_NEXT || mode == JA_RECOMPACT_ADD_SAME) {
+               for (bit_i = 0; bit_i < JA_BITS_PER_BYTE; bit_i++) {
+                       for (bit_j = 0; bit_j < bit_i; bit_j++) {
+                               if (n & (1U << bit_i)) {
+                                       if (n & (1U << bit_j)) {
+                                               nr_2d_11[bit_i][bit_j]++;
+                                       } else {
+                                               nr_2d_10[bit_i][bit_j]++;
+                                       }
+                               } else {
+                                       if (n & (1U << bit_j)) {
+                                               nr_2d_01[bit_i][bit_j]++;
+                                       } else {
+                                               nr_2d_00[bit_i][bit_j]++;
+                                       }
+                               }
+                       }
+               }
+               distrib_nr_child++;
+       }
+
+       /*
+        * The best bit selector is that for which the number of nodes
+        * in each sub-class is closest to one-fourth of the number of
+        * children in the distribution. We calculate the distance using
+        * 4 times the size of the sub-distribution to eliminate
+        * truncation error.
+        */
+       for (bit_i = 0; bit_i < JA_BITS_PER_BYTE; bit_i++) {
+               for (bit_j = 0; bit_j < bit_i; bit_j++) {
+                       int distance_to_best[4];
+
+                       distance_to_best[0] = ((unsigned int) nr_2d_11[bit_i][bit_j] << 2U) - distrib_nr_child;
+                       distance_to_best[1] = ((unsigned int) nr_2d_10[bit_i][bit_j] << 2U) - distrib_nr_child;
+                       distance_to_best[2] = ((unsigned int) nr_2d_01[bit_i][bit_j] << 2U) - distrib_nr_child;
+                       distance_to_best[3] = ((unsigned int) nr_2d_00[bit_i][bit_j] << 2U) - distrib_nr_child;
+
+                       /* Consider worse distance above best */
+                       if (distance_to_best[1] > 0 && distance_to_best[1] > distance_to_best[0])
+                               distance_to_best[0] = distance_to_best[1];
+                       if (distance_to_best[2] > 0 && distance_to_best[2] > distance_to_best[0])
+                               distance_to_best[0] = distance_to_best[2];
+                       if (distance_to_best[3] > 0 && distance_to_best[3] > distance_to_best[0])
+                               distance_to_best[0] = distance_to_best[3];
+
+                       /*
+                        * If our worse distance is better than overall,
+                        * we become new best candidate.
+                        */
+                       if (distance_to_best[0] < overall_best_distance) {
+                               overall_best_distance = distance_to_best[0];
+                               bitsel[0] = bit_i;
+                               bitsel[1] = bit_j;
+                       }
+               }
+       }
+
+       dbg_printf("2 dimensions pool bit selection: (%u,%u)\n", bitsel[0], bitsel[1]);
+
+       /* Return our bit selection */
+       _bitsel[0] = bitsel[0];
+       _bitsel[1] = bitsel[1];
+}
+
+static
+unsigned int find_nearest_type_index(unsigned int type_index,
+               unsigned int nr_nodes)
+{
+       const struct cds_ja_type *type;
+
+       assert(type_index != NODE_INDEX_NULL);
+       if (nr_nodes == 0)
+               return NODE_INDEX_NULL;
+       for (;;) {
+               type = &ja_types[type_index];
+               if (nr_nodes < type->min_child)
+                       type_index--;
+               else if (nr_nodes > type->max_child)
+                       type_index++;
+               else
+                       break;
+       }
+       return type_index;
+}
+
+/*
+ * ja_node_recompact_add: recompact a node, adding a new child.
+ * Return 0 on success, -EAGAIN if need to retry, or other negative
+ * error value otherwise.
+ */
+static
+int ja_node_recompact(enum ja_recompact mode,
+               struct cds_ja *ja,
+               unsigned int old_type_index,
+               const struct cds_ja_type *old_type,
+               struct cds_ja_inode *old_node,
+               struct cds_ja_shadow_node *shadow_node,
+               struct cds_ja_inode_flag **old_node_flag_ptr, uint8_t n,
+               struct cds_ja_inode_flag *child_node_flag,
+               struct cds_ja_inode_flag **nullify_node_flag_ptr,
+               int level)
+{
+       unsigned int new_type_index;
+       struct cds_ja_inode *new_node;
+       struct cds_ja_shadow_node *new_shadow_node = NULL;
+       const struct cds_ja_type *new_type;
+       struct cds_ja_inode_flag *new_node_flag, *old_node_flag;
+       int ret;
+       int fallback = 0;
+
+       old_node_flag = *old_node_flag_ptr;
+
+       /*
+        * Need to find nearest type index even for ADD_SAME, because
+        * this recompaction, when applied to linear nodes, will garbage
+        * collect dummy (NULL) entries, and can therefore cause a few
+        * linear representations to be skipped.
+        */
+       switch (mode) {
+       case JA_RECOMPACT_ADD_SAME:
+               new_type_index = find_nearest_type_index(old_type_index,
+                       shadow_node->nr_child + 1);
+               dbg_printf("Recompact for node with %u children\n",
+                       shadow_node->nr_child + 1);
+               break;
+       case JA_RECOMPACT_ADD_NEXT:
+               if (!shadow_node || old_type_index == NODE_INDEX_NULL) {
+                       new_type_index = 0;
+                       dbg_printf("Recompact for NULL\n");
+               } else {
+                       new_type_index = find_nearest_type_index(old_type_index,
+                               shadow_node->nr_child + 1);
+                       dbg_printf("Recompact for node with %u children\n",
+                               shadow_node->nr_child + 1);
+               }
+               break;
+       case JA_RECOMPACT_DEL:
+               new_type_index = find_nearest_type_index(old_type_index,
+                       shadow_node->nr_child - 1);
+               dbg_printf("Recompact for node with %u children\n",
+                       shadow_node->nr_child - 1);
+               break;
+       default:
+               assert(0);
+       }
+
+retry:         /* for fallback */
+       dbg_printf("Recompact from type %d to type %d\n",
+                       old_type_index, new_type_index);
+       new_type = &ja_types[new_type_index];
+       if (new_type_index != NODE_INDEX_NULL) {
+               new_node = alloc_cds_ja_node(ja, new_type);
+               if (!new_node)
+                       return -ENOMEM;
+
+               if (new_type->type_class == RCU_JA_POOL) {
+                       switch (new_type->nr_pool_order) {
+                       case 1:
+                       {
+                               unsigned int node_distrib_bitsel;
+
+                               node_distrib_bitsel =
+                                       ja_node_sum_distribution_1d(mode, ja,
+                                               old_type_index, old_type,
+                                               old_node, shadow_node,
+                                               n, child_node_flag,
+                                               nullify_node_flag_ptr);
+                               assert(!((unsigned long) new_node & JA_POOL_1D_MASK));
+                               new_node_flag = ja_node_flag_pool_1d(new_node,
+                                       new_type_index, node_distrib_bitsel);
+                               break;
+                       }
+                       case 2:
+                       {
+                               unsigned int node_distrib_bitsel[2];
+
+                               ja_node_sum_distribution_2d(mode, ja,
+                                       old_type_index, old_type,
+                                       old_node, shadow_node,
+                                       n, child_node_flag,
+                                       nullify_node_flag_ptr,
+                                       node_distrib_bitsel);
+                               assert(!((unsigned long) new_node & JA_POOL_1D_MASK));
+                               assert(!((unsigned long) new_node & JA_POOL_2D_MASK));
+                               new_node_flag = ja_node_flag_pool_2d(new_node,
+                                       new_type_index, node_distrib_bitsel);
+                               break;
+                       }
+                       default:
+                               assert(0);
+                       }
+               } else {
+                       new_node_flag = ja_node_flag(new_node, new_type_index);
+               }
+
+               dbg_printf("Recompact inherit lock from %p\n", shadow_node);
+               new_shadow_node = rcuja_shadow_set(ja->ht, new_node_flag, shadow_node, ja, level);
+               if (!new_shadow_node) {
+                       free_cds_ja_node(ja, new_node);
+                       return -ENOMEM;
+               }
+               if (fallback)
+                       new_shadow_node->fallback_removal_count =
+                                               JA_FALLBACK_REMOVAL_COUNT;
+       } else {
+               new_node = NULL;
+               new_node_flag = NULL;
+       }
+
+       assert(mode != JA_RECOMPACT_ADD_NEXT || old_type->type_class != RCU_JA_PIGEON);
+
+       if (new_type_index == NODE_INDEX_NULL)
+               goto skip_copy;
+
+       switch (old_type->type_class) {
+       case RCU_JA_LINEAR:
+       {
+               uint8_t nr_child =
+                       ja_linear_node_get_nr_child(old_type, old_node);
+               unsigned int i;
+
+               for (i = 0; i < nr_child; i++) {
+                       struct cds_ja_inode_flag *iter;
+                       uint8_t v;
+
+                       ja_linear_node_get_ith_pos(old_type, old_node, i, &v, &iter);
+                       if (!iter)
+                               continue;
+                       if (mode == JA_RECOMPACT_DEL && *nullify_node_flag_ptr == iter)
+                               continue;
+                       ret = _ja_node_set_nth(new_type, new_node, new_node_flag,
+                                       new_shadow_node,
+                                       v, iter);
+                       if (new_type->type_class == RCU_JA_POOL && ret) {
+                               goto fallback_toosmall;
+                       }
+                       assert(!ret);
+               }
+               break;
+       }
+       case RCU_JA_POOL:
+       {
+               unsigned int pool_nr;
+
+               for (pool_nr = 0; pool_nr < (1U << old_type->nr_pool_order); pool_nr++) {
+                       struct cds_ja_inode *pool =
+                               ja_pool_node_get_ith_pool(old_type,
+                                       old_node, pool_nr);
+                       uint8_t nr_child =
+                               ja_linear_node_get_nr_child(old_type, pool);
+                       unsigned int j;
+
+                       for (j = 0; j < nr_child; j++) {
+                               struct cds_ja_inode_flag *iter;
+                               uint8_t v;
+
+                               ja_linear_node_get_ith_pos(old_type, pool,
+                                               j, &v, &iter);
+                               if (!iter)
+                                       continue;
+                               if (mode == JA_RECOMPACT_DEL && *nullify_node_flag_ptr == iter)
+                                       continue;
+                               ret = _ja_node_set_nth(new_type, new_node, new_node_flag,
+                                               new_shadow_node,
+                                               v, iter);
+                               if (new_type->type_class == RCU_JA_POOL
+                                               && ret) {
+                                       goto fallback_toosmall;
+                               }
+                               assert(!ret);
+                       }
+               }
+               break;
+       }
+       case RCU_JA_NULL:
+               assert(mode == JA_RECOMPACT_ADD_NEXT);
+               break;
+       case RCU_JA_PIGEON:
+       {
+               unsigned int i;
+
+               assert(mode == JA_RECOMPACT_DEL);
+               for (i = 0; i < JA_ENTRY_PER_NODE; i++) {
+                       struct cds_ja_inode_flag *iter;
+
+                       iter = ja_pigeon_node_get_ith_pos(old_type, old_node, i);
+                       if (!iter)
+                               continue;
+                       if (mode == JA_RECOMPACT_DEL && *nullify_node_flag_ptr == iter)
+                               continue;
+                       ret = _ja_node_set_nth(new_type, new_node, new_node_flag,
+                                       new_shadow_node,
+                                       i, iter);
+                       if (new_type->type_class == RCU_JA_POOL && ret) {
+                               goto fallback_toosmall;
+                       }
+                       assert(!ret);
+               }
+               break;
+       }
+       default:
+               assert(0);
+               ret = -EINVAL;
+               goto end;
+       }
+skip_copy:
+
+       if (mode == JA_RECOMPACT_ADD_NEXT || mode == JA_RECOMPACT_ADD_SAME) {
+               /* add node */
+               ret = _ja_node_set_nth(new_type, new_node, new_node_flag,
+                               new_shadow_node,
+                               n, child_node_flag);
+               if (new_type->type_class == RCU_JA_POOL && ret) {
+                       goto fallback_toosmall;
+               }
+               assert(!ret);
+       }
+
+       if (fallback) {
+               dbg_printf("Using fallback for %u children, node type index: %u, mode %s\n",
+                       new_shadow_node->nr_child, old_type_index, mode == JA_RECOMPACT_ADD_NEXT ? "add_next" :
+                               (mode == JA_RECOMPACT_DEL ? "del" : "add_same"));
+               if (ja_debug_counters())
+                       uatomic_inc(&ja->node_fallback_count_distribution[new_shadow_node->nr_child]);
+       }
+
+       /* Return pointer to new recompacted node through old_node_flag_ptr */
+       *old_node_flag_ptr = new_node_flag;
+       if (old_node) {
+               int flags;
+
+               flags = RCUJA_SHADOW_CLEAR_FREE_NODE;
+               /*
+                * It is OK to free the lock associated with a node
+                * going to NULL, since we are holding the parent lock.
+                * This synchronizes removal with re-add of that node.
+                */
+               if (new_type_index == NODE_INDEX_NULL)
+                       flags |= RCUJA_SHADOW_CLEAR_FREE_LOCK;
+               ret = rcuja_shadow_clear(ja->ht, old_node_flag, shadow_node,
+                               flags);
+               assert(!ret);
+       }
+
+       ret = 0;
+end:
+       return ret;
+
+fallback_toosmall:
+       /* fallback if next pool is too small */
+       assert(new_shadow_node);
+       ret = rcuja_shadow_clear(ja->ht, new_node_flag, new_shadow_node,
+                       RCUJA_SHADOW_CLEAR_FREE_NODE);
+       assert(!ret);
+
+       switch (mode) {
+       case JA_RECOMPACT_ADD_SAME:
+               /*
+                * JA_RECOMPACT_ADD_SAME is only triggered if a linear
+                * node within a pool has unused entries. It should
+                * therefore _never_ be too small.
+                */
+               assert(0);
+
+               /* Fall-through */
+       case JA_RECOMPACT_ADD_NEXT:
+       {
+               const struct cds_ja_type *next_type;
+
+               /*
+                * Recompaction attempt on add failed. Should only
+                * happen if target node type is pool. Caused by
+                * hard-to-split distribution. Recompact using the next
+                * distribution size.
+                */
+               assert(new_type->type_class == RCU_JA_POOL);
+               next_type = &ja_types[new_type_index + 1];
+               /*
+                * Try going to the next pool size if our population
+                * fits within its range. This is not flagged as a
+                * fallback.
+                */
+               if (shadow_node->nr_child + 1 >= next_type->min_child
+                               && shadow_node->nr_child + 1 <= next_type->max_child) {
+                       new_type_index++;
+                       goto retry;
+               } else {
+                       new_type_index++;
+                       dbg_printf("Add fallback to type %d\n", new_type_index);
+                       if (ja_debug_counters())
+                               uatomic_inc(&ja->nr_fallback);
+                       fallback = 1;
+                       goto retry;
+               }
+               break;
+       }
+       case JA_RECOMPACT_DEL:
+               /*
+                * Recompaction attempt on delete failed. Should only
+                * happen if target node type is pool. This is caused by
+                * a hard-to-split distribution. Recompact on same node
+                * size, but flag current node as "fallback" to ensure
+                * we don't attempt recompaction before some activity
+                * has reshuffled our node.
+                */
+               assert(new_type->type_class == RCU_JA_POOL);
+               new_type_index = old_type_index;
+               dbg_printf("Delete fallback keeping type %d\n", new_type_index);
+               uatomic_inc(&ja->nr_fallback);
+               fallback = 1;
+               goto retry;
+       default:
+               assert(0);
+               return -EINVAL;
+       }
+
+       /*
+        * Last resort fallback: pigeon.
+        */
+       new_type_index = (1UL << JA_TYPE_BITS) - 1;
+       dbg_printf("Fallback to type %d\n", new_type_index);
+       uatomic_inc(&ja->nr_fallback);
+       fallback = 1;
+       goto retry;
+}
+
+/*
+ * Return 0 on success, -EAGAIN if need to retry, or other negative
+ * error value otherwise.
+ */
+static
+int ja_node_set_nth(struct cds_ja *ja,
+               struct cds_ja_inode_flag **node_flag, uint8_t n,
+               struct cds_ja_inode_flag *child_node_flag,
+               struct cds_ja_shadow_node *shadow_node,
+               int level)
+{
+       int ret;
+       unsigned int type_index;
+       const struct cds_ja_type *type;
+       struct cds_ja_inode *node;
+
+       dbg_printf("ja_node_set_nth for n=%u, node %p, shadow %p\n",
+               (unsigned int) n, ja_node_ptr(*node_flag), shadow_node);
+
+       node = ja_node_ptr(*node_flag);
+       type_index = ja_node_type(*node_flag);
+       type = &ja_types[type_index];
+       ret = _ja_node_set_nth(type, node, *node_flag, shadow_node,
+                       n, child_node_flag);
+       switch (ret) {
+       case -ENOSPC:
+               /* Not enough space in node, need to recompact to next type. */
+               ret = ja_node_recompact(JA_RECOMPACT_ADD_NEXT, ja, type_index, type, node,
+                               shadow_node, node_flag, n, child_node_flag, NULL, level);
+               break;
+       case -ERANGE:
+               /* Node needs to be recompacted. */
+               ret = ja_node_recompact(JA_RECOMPACT_ADD_SAME, ja, type_index, type, node,
+                               shadow_node, node_flag, n, child_node_flag, NULL, level);
+               break;
+       }
+       return ret;
+}
+
+/*
+ * Return 0 on success, -EAGAIN if need to retry, or other negative
+ * error value otherwise.
+ */
+static
+int ja_node_clear_ptr(struct cds_ja *ja,
+               struct cds_ja_inode_flag **node_flag_ptr,       /* Pointer to location to nullify */
+               struct cds_ja_inode_flag **parent_node_flag_ptr,        /* Address of parent ptr in its parent */
+               struct cds_ja_shadow_node *shadow_node,         /* of parent */
+               uint8_t n, int level)
+{
+       int ret;
+       unsigned int type_index;
+       const struct cds_ja_type *type;
+       struct cds_ja_inode *node;
+
+       dbg_printf("ja_node_clear_ptr for node %p, shadow %p, target ptr %p\n",
+               ja_node_ptr(*parent_node_flag_ptr), shadow_node, node_flag_ptr);
+
+       node = ja_node_ptr(*parent_node_flag_ptr);
+       type_index = ja_node_type(*parent_node_flag_ptr);
+       type = &ja_types[type_index];
+       ret = _ja_node_clear_ptr(type, node, *parent_node_flag_ptr, shadow_node, node_flag_ptr, n);
+       if (ret == -EFBIG) {
+               /* Should try recompaction. */
+               ret = ja_node_recompact(JA_RECOMPACT_DEL, ja, type_index, type, node,
+                               shadow_node, parent_node_flag_ptr, n, NULL,
+                               node_flag_ptr, level);
+       }
+       return ret;
+}
+
+struct cds_ja_node *cds_ja_lookup(struct cds_ja *ja, uint64_t key)
+{
+       unsigned int tree_depth, i;
+       struct cds_ja_inode_flag *node_flag;
+
+       if (caa_unlikely(key > ja->key_max || key == UINT64_MAX))
+               return NULL;
+       tree_depth = ja->tree_depth;
+       node_flag = rcu_dereference(ja->root);
+
+       /* level 0: root node */
+       if (!ja_node_ptr(node_flag))
+               return NULL;
+
+       for (i = 1; i < tree_depth; i++) {
+               uint8_t iter_key;
+
+               iter_key = (uint8_t) (key >> (JA_BITS_PER_BYTE * (tree_depth - i - 1)));
+               node_flag = ja_node_get_nth(node_flag, NULL, iter_key);
+               dbg_printf("cds_ja_lookup iter key lookup %u finds node_flag %p\n",
+                               (unsigned int) iter_key, node_flag);
+               if (!ja_node_ptr(node_flag))
+                       return NULL;
+       }
+
+       /* Last level lookup succeded. We got an actual match. */
+       return (struct cds_ja_node *) node_flag;
+}
+
+static
+struct cds_ja_node *cds_ja_lookup_inequality(struct cds_ja *ja, uint64_t key,
+               uint64_t *result_key, enum ja_lookup_inequality mode)
+{
+       int tree_depth, level;
+       struct cds_ja_inode_flag *node_flag, *cur_node_depth[JA_MAX_DEPTH];
+       uint8_t cur_key[JA_MAX_DEPTH];
+       uint64_t _result_key = 0;
+       enum ja_direction dir;
+
+       switch (mode) {
+       case JA_LOOKUP_BE:
+       case JA_LOOKUP_AE:
+               if (caa_unlikely(key > ja->key_max || key == UINT64_MAX))
+                       return NULL;
+               break;
+       default:
+               return NULL;
+       }
+
+       memset(cur_node_depth, 0, sizeof(cur_node_depth));
+       memset(cur_key, 0, sizeof(cur_key));
+       tree_depth = ja->tree_depth;
+       node_flag = rcu_dereference(ja->root);
+       cur_node_depth[0] = node_flag;
+
+       /* level 0: root node */
+       if (!ja_node_ptr(node_flag))
+               return NULL;
+
+       for (level = 1; level < tree_depth; level++) {
+               uint8_t iter_key;
+
+               iter_key = (uint8_t) (key >> (JA_BITS_PER_BYTE * (tree_depth - level - 1)));
+               node_flag = ja_node_get_nth(node_flag, NULL, iter_key);
+               if (!ja_node_ptr(node_flag))
+                       break;
+               cur_key[level - 1] = iter_key;
+               cur_node_depth[level] = node_flag;
+               dbg_printf("cds_ja_lookup_inequality iter key lookup %u finds node_flag %p\n",
+                               (unsigned int) iter_key, node_flag);
+       }
+
+       if (level == tree_depth) {
+               /* Last level lookup succeded. We got an equal match. */
+               if (result_key)
+                       *result_key = key;
+               return (struct cds_ja_node *) node_flag;
+       }
+
+       /*
+        * Find highest value left/right of current node.
+        * Current node is cur_node_depth[level].
+        * Start at current level. If we cannot find any key left/right
+        * of ours, go one level up, seek highest value left/right of
+        * current (recursively), and when we find one, get the
+        * rightmost/leftmost child of its rightmost/leftmost child
+        * (recursively).
+        */
+       switch (mode) {
+       case JA_LOOKUP_BE:
+               dir = JA_LEFT;
+               break;
+       case JA_LOOKUP_AE:
+               dir = JA_RIGHT;
+               break;
+       default:
+               assert(0);
+       }
+       for (; level > 0; level--) {
+               uint8_t iter_key;
+
+               iter_key = (uint8_t) (key >> (JA_BITS_PER_BYTE * (tree_depth - level - 1)));
+               node_flag = ja_node_get_leftright(cur_node_depth[level - 1],
+                               iter_key, &cur_key[level - 1], dir);
+               dbg_printf("cds_ja_lookup_inequality find sibling from %u at %u finds node_flag %p\n",
+                               (unsigned int) iter_key, (unsigned int) cur_key[level - 1],
+                               node_flag);
+               /* If found left/right sibling, find rightmost/leftmost child. */
+               if (ja_node_ptr(node_flag))
+                       break;
+       }
+
+       if (!level) {
+               /* Reached the root and could not find a left/right sibling. */
+               return NULL;
+       }
+
+       level++;
+
+       /*
+        * From this point, we are guaranteed to be able to find a
+        * "below than"/"above than" match. ja_attach_node() and
+        * ja_detach_node() both guarantee that it is not possible for a
+        * lookup to reach a dead-end.
+        */
+
+       /*
+        * Find rightmost/leftmost child of rightmost/leftmost child
+        * (recursively).
+        */
+       switch (mode) {
+       case JA_LOOKUP_BE:
+               dir = JA_RIGHTMOST;
+               break;
+       case JA_LOOKUP_AE:
+               dir = JA_LEFTMOST;
+               break;
+       default:
+               assert(0);
+       }
+       for (; level < tree_depth; level++) {
+               node_flag = ja_node_get_minmax(node_flag, &cur_key[level - 1], dir);
+               dbg_printf("cds_ja_lookup_inequality find minmax at %u finds node_flag %p\n",
+                               (unsigned int) cur_key[level - 1],
+                               node_flag);
+               if (!ja_node_ptr(node_flag))
+                       break;
+       }
+
+       assert(level == tree_depth);
+
+       if (result_key) {
+               for (level = 1; level < tree_depth; level++) {
+                       _result_key |= ((uint64_t) cur_key[level - 1])
+                                       << (JA_BITS_PER_BYTE * (tree_depth - level - 1));
+               }
+               *result_key = _result_key;
+       }
+       return (struct cds_ja_node *) node_flag;
+}
+
+struct cds_ja_node *cds_ja_lookup_below_equal(struct cds_ja *ja,
+               uint64_t key, uint64_t *result_key)
+{
+       dbg_printf("cds_ja_lookup_below_equal key %" PRIu64 "\n", key);
+       return cds_ja_lookup_inequality(ja, key, result_key, JA_LOOKUP_BE);
+}
+
+struct cds_ja_node *cds_ja_lookup_above_equal(struct cds_ja *ja,
+               uint64_t key, uint64_t *result_key)
+{
+       dbg_printf("cds_ja_lookup_above_equal key %" PRIu64 "\n", key);
+       return cds_ja_lookup_inequality(ja, key, result_key, JA_LOOKUP_AE);
+}
+
+/*
+ * We reached an unpopulated node. Create it and the children we need,
+ * and then attach the entire branch to the current node. This may
+ * trigger recompaction of the current node.  Locks needed: node lock
+ * (for add), and, possibly, parent node lock (to update pointer due to
+ * node recompaction).
+ *
+ * First take node lock, check if recompaction is needed, then take
+ * parent lock (if needed).  Then we can proceed to create the new
+ * branch. Publish the new branch, and release locks.
+ * TODO: we currently always take the parent lock even when not needed.
+ *
+ * ja_attach_node() ensures that a lookup will _never_ see a branch that
+ * leads to a dead-end: before attaching a branch, the entire content of
+ * the new branch is populated, thus creating a cluster, before
+ * attaching the cluster to the rest of the tree, thus making it visible
+ * to lookups.
+ */
+static
+int ja_attach_node(struct cds_ja *ja,
+               struct cds_ja_inode_flag **attach_node_flag_ptr,
+               struct cds_ja_inode_flag *attach_node_flag,
+               struct cds_ja_inode_flag *parent_attach_node_flag,
+               struct cds_ja_inode_flag **old_node_flag_ptr,
+               struct cds_ja_inode_flag *old_node_flag,
+               uint64_t key,
+               unsigned int level,
+               struct cds_ja_node *child_node)
+{
+       struct cds_ja_shadow_node *shadow_node = NULL,
+                       *parent_shadow_node = NULL;
+       struct cds_ja_inode_flag *iter_node_flag, *iter_dest_node_flag;
+       int ret, i;
+       struct cds_ja_inode_flag *created_nodes[JA_MAX_DEPTH];
+       int nr_created_nodes = 0;
+
+       dbg_printf("Attach node at level %u (old_node_flag %p, attach_node_flag_ptr %p attach_node_flag %p, parent_attach_node_flag %p)\n",
+               level, old_node_flag, attach_node_flag_ptr, attach_node_flag, parent_attach_node_flag);
+
+       assert(!old_node_flag);
+       if (attach_node_flag) {
+               shadow_node = rcuja_shadow_lookup_lock(ja->ht, attach_node_flag);
+               if (!shadow_node) {
+                       ret = -EAGAIN;
+                       goto end;
+               }
+       }
+       if (parent_attach_node_flag) {
+               parent_shadow_node = rcuja_shadow_lookup_lock(ja->ht,
+                                               parent_attach_node_flag);
+               if (!parent_shadow_node) {
+                       ret = -EAGAIN;
+                       goto unlock_shadow;
+               }
+       }
+
+       if (old_node_flag_ptr && ja_node_ptr(*old_node_flag_ptr)) {
+               /*
+                * Target node has been updated between RCU lookup and
+                * lock acquisition. We need to re-try lookup and
+                * attach.
+                */
+               ret = -EAGAIN;
+               goto unlock_parent;
+       }
+
+       /*
+        * Perform a lookup query to handle the case where
+        * old_node_flag_ptr is NULL. We cannot use it to check if the
+        * node has been populated between RCU lookup and mutex
+        * acquisition.
+        */
+       if (!old_node_flag_ptr) {
+               uint8_t iter_key;
+               struct cds_ja_inode_flag *lookup_node_flag;
+               struct cds_ja_inode_flag **lookup_node_flag_ptr;
+
+               iter_key = (uint8_t) (key >> (JA_BITS_PER_BYTE * (ja->tree_depth - level)));
+               lookup_node_flag = ja_node_get_nth(attach_node_flag,
+                       &lookup_node_flag_ptr,
+                       iter_key);
+               if (lookup_node_flag) {
+                       ret = -EEXIST;
+                       goto unlock_parent;
+               }
+       }
+
+       if (attach_node_flag_ptr && ja_node_ptr(*attach_node_flag_ptr) !=
+                       ja_node_ptr(attach_node_flag)) {
+               /*
+                * Target node has been updated between RCU lookup and
+                * lock acquisition. We need to re-try lookup and
+                * attach.
+                */
+               ret = -EAGAIN;
+               goto unlock_parent;
+       }
+
+       /* Create new branch, starting from bottom */
+       iter_node_flag = (struct cds_ja_inode_flag *) child_node;
+
+       for (i = ja->tree_depth - 1; i >= (int) level; i--) {
+               uint8_t iter_key;
+
+               iter_key = (uint8_t) (key >> (JA_BITS_PER_BYTE * (ja->tree_depth - i - 1)));
+               dbg_printf("branch creation level %d, key %u\n",
+                               i, (unsigned int) iter_key);
+               iter_dest_node_flag = NULL;
+               ret = ja_node_set_nth(ja, &iter_dest_node_flag,
+                       iter_key,
+                       iter_node_flag,
+                       NULL, i);
+               if (ret) {
+                       dbg_printf("branch creation error %d\n", ret);
+                       goto check_error;
+               }
+               created_nodes[nr_created_nodes++] = iter_dest_node_flag;
+               iter_node_flag = iter_dest_node_flag;
+       }
+       assert(level > 0);
+
+       /* Publish branch */
+       if (level == 1) {
+               /*
+                * Attaching to root node.
+                */
+               rcu_assign_pointer(ja->root, iter_node_flag);
+       } else {
+               uint8_t iter_key;
+
+               iter_key = (uint8_t) (key >> (JA_BITS_PER_BYTE * (ja->tree_depth - level)));
+               dbg_printf("publish branch at level %d, key %u\n",
+                               level - 1, (unsigned int) iter_key);
+               /* We need to use set_nth on the previous level. */
+               iter_dest_node_flag = attach_node_flag;
+               ret = ja_node_set_nth(ja, &iter_dest_node_flag,
+                       iter_key,
+                       iter_node_flag,
+                       shadow_node, level - 1);
+               if (ret) {
+                       dbg_printf("branch publish error %d\n", ret);
+                       goto check_error;
+               }
+               /*
+                * Attach branch
+                */
+               rcu_assign_pointer(*attach_node_flag_ptr, iter_dest_node_flag);
+       }
+
+       /* Success */
+       ret = 0;
+
+check_error:
+       if (ret) {
+               for (i = 0; i < nr_created_nodes; i++) {
+                       int tmpret;
+                       int flags;
+
+                       flags = RCUJA_SHADOW_CLEAR_FREE_LOCK;
+                       if (i)
+                               flags |= RCUJA_SHADOW_CLEAR_FREE_NODE;
+                       tmpret = rcuja_shadow_clear(ja->ht,
+                                       created_nodes[i],
+                                       NULL,
+                                       flags);
+                       assert(!tmpret);
+               }
+       }
+unlock_parent:
+       if (parent_shadow_node)
+               rcuja_shadow_unlock(parent_shadow_node);
+unlock_shadow:
+       if (shadow_node)
+               rcuja_shadow_unlock(shadow_node);
+end:
+       return ret;
+}
+
+/*
+ * Lock the parent containing the pointer to list of duplicates, and add
+ * node to this list. Failure can happen if concurrent update changes
+ * the parent before we get the lock. We return -EAGAIN in that case.
+ * Return 0 on success, negative error value on failure.
+ */
+static
+int ja_chain_node(struct cds_ja *ja,
+               struct cds_ja_inode_flag *parent_node_flag,
+               struct cds_ja_inode_flag **node_flag_ptr,
+               struct cds_ja_inode_flag *node_flag,
+               struct cds_ja_node *last_node,
+               struct cds_ja_node *node)
+{
+       struct cds_ja_shadow_node *shadow_node;
+       struct cds_ja_node *iter_node;
+       int ret = 0, found = 0;
+
+       shadow_node = rcuja_shadow_lookup_lock(ja->ht, parent_node_flag);
+       if (!shadow_node) {
+               return -EAGAIN;
+       }
+       /*
+        * Ensure that previous node is still there at end of list.
+        */
+       iter_node = (struct cds_ja_node *) ja_node_ptr(node_flag);
+       if ((struct cds_ja_node *) ja_node_ptr(*node_flag_ptr) != iter_node) {
+               ret = -EAGAIN;
+               goto end;
+       }
+       cds_ja_for_each_duplicate(iter_node) {
+               if (iter_node == last_node)
+                       found = 1;
+       }
+       if (!found) {
+               ret = -EAGAIN;
+               goto end;
+       }
+       /*
+        * Add node to tail of list to ensure that RCU traversals will
+        * always see either the prior node or the newly added if
+        * executed concurrently with a sequence of add followed by del
+        * on the same key. Safe against concurrent RCU read traversals.
+        */
+       node->next = NULL;
+       rcu_assign_pointer(last_node->next, node);
+end:
+       rcuja_shadow_unlock(shadow_node);
+       return ret;
+}
+
+static
+int _cds_ja_add(struct cds_ja *ja, uint64_t key,
+               struct cds_ja_node *node,
+               struct cds_ja_node **unique_node_ret)
+{
+       unsigned int tree_depth, i;
+       struct cds_ja_inode_flag *attach_node_flag,
+               *parent_node_flag,
+               *parent2_node_flag,
+               *node_flag,
+               *parent_attach_node_flag;
+       struct cds_ja_inode_flag **attach_node_flag_ptr,
+               **parent_node_flag_ptr,
+               **node_flag_ptr;
+       int ret;
+
+       if (caa_unlikely(key > ja->key_max || key == UINT64_MAX)) {
+               return -EINVAL;
+       }
+       tree_depth = ja->tree_depth;
+
+retry:
+       dbg_printf("cds_ja_add attempt: key %" PRIu64 ", node %p\n",
+               key, node);
+       parent2_node_flag = NULL;
+       parent_node_flag =
+               (struct cds_ja_inode_flag *) &ja->root; /* Use root ptr address as key for mutex */
+       parent_node_flag_ptr = NULL;
+       node_flag = rcu_dereference(ja->root);
+       node_flag_ptr = &ja->root;
+
+       /* Iterate on all internal levels */
+       for (i = 1; i < tree_depth; i++) {
+               uint8_t iter_key;
+
+               if (!ja_node_ptr(node_flag))
+                       break;
+               dbg_printf("cds_ja_add iter parent2_node_flag %p parent_node_flag %p node_flag_ptr %p node_flag %p\n",
+                               parent2_node_flag, parent_node_flag, node_flag_ptr, node_flag);
+               iter_key = (uint8_t) (key >> (JA_BITS_PER_BYTE * (tree_depth - i - 1)));
+               parent2_node_flag = parent_node_flag;
+               parent_node_flag = node_flag;
+               parent_node_flag_ptr = node_flag_ptr;
+               node_flag = ja_node_get_nth(node_flag,
+                       &node_flag_ptr,
+                       iter_key);
+       }
+
+       /*
+        * We reached either bottom of tree or internal NULL node,
+        * simply add node to last internal level, or chain it if key is
+        * already present.
+        */
+       if (!ja_node_ptr(node_flag)) {
+               dbg_printf("cds_ja_add NULL parent2_node_flag %p parent_node_flag %p node_flag_ptr %p node_flag %p\n",
+                               parent2_node_flag, parent_node_flag, node_flag_ptr, node_flag);
+
+               attach_node_flag = parent_node_flag;
+               attach_node_flag_ptr = parent_node_flag_ptr;
+               parent_attach_node_flag = parent2_node_flag;
+
+               ret = ja_attach_node(ja, attach_node_flag_ptr,
+                               attach_node_flag,
+                               parent_attach_node_flag,
+                               node_flag_ptr,
+                               node_flag,
+                               key, i, node);
+       } else {
+               struct cds_ja_node *iter_node, *last_node = NULL;
+
+               if (unique_node_ret) {
+                       *unique_node_ret = (struct cds_ja_node *) ja_node_ptr(node_flag);
+                       return -EEXIST;
+               }
+
+               /* Find last duplicate */
+               iter_node = (struct cds_ja_node *) ja_node_ptr(node_flag);
+               cds_ja_for_each_duplicate_rcu(iter_node)
+                       last_node = iter_node;
+
+               dbg_printf("cds_ja_add duplicate parent2_node_flag %p parent_node_flag %p node_flag_ptr %p node_flag %p\n",
+                               parent2_node_flag, parent_node_flag, node_flag_ptr, node_flag);
+
+               attach_node_flag = node_flag;
+               attach_node_flag_ptr = node_flag_ptr;
+               parent_attach_node_flag = parent_node_flag;
+
+               ret = ja_chain_node(ja,
+                       parent_attach_node_flag,
+                       attach_node_flag_ptr,
+                       attach_node_flag,
+                       last_node,
+                       node);
+       }
+       if (ret == -EAGAIN || ret == -EEXIST)
+               goto retry;
+
+       return ret;
+}
+
+int cds_ja_add(struct cds_ja *ja, uint64_t key,
+               struct cds_ja_node *node)
+{
+       return _cds_ja_add(ja, key, node, NULL);
+}
+
+struct cds_ja_node *cds_ja_add_unique(struct cds_ja *ja, uint64_t key,
+               struct cds_ja_node *node)
+{
+       int ret;
+       struct cds_ja_node *ret_node;
+
+       ret = _cds_ja_add(ja, key, node, &ret_node);
+       if (ret == -EEXIST)
+               return ret_node;
+       else
+               return node;
+}
+
+/*
+ * Note: there is no need to lookup the pointer address associated with
+ * each node's nth item after taking the lock: it's already been done by
+ * cds_ja_del while holding the rcu read-side lock, and our node rules
+ * ensure that when a match value -> pointer is found in a node, it is
+ * _NEVER_ changed for that node without recompaction, and recompaction
+ * reallocates the node.
+ * However, when a child is removed from "linear" nodes, its pointer
+ * is set to NULL. We therefore check, while holding the locks, if this
+ * pointer is NULL, and return -ENOENT to the caller if it is the case.
+ *
+ * ja_detach_node() ensures that a lookup will _never_ see a branch that
+ * leads to a dead-end: when removing branch, it makes sure to perform
+ * the "cut" at the highest node that has only one child, effectively
+ * replacing it with a NULL pointer.
+ */
+static
+int ja_detach_node(struct cds_ja *ja,
+               struct cds_ja_inode_flag **snapshot,
+               struct cds_ja_inode_flag ***snapshot_ptr,
+               uint8_t *snapshot_n,
+               int nr_snapshot,
+               uint64_t key,
+               struct cds_ja_node *node)
+{
+       struct cds_ja_shadow_node *shadow_nodes[JA_MAX_DEPTH];
+       struct cds_ja_inode_flag **node_flag_ptr = NULL,
+                       *parent_node_flag = NULL,
+                       **parent_node_flag_ptr = NULL;
+       struct cds_ja_inode_flag *iter_node_flag;
+       int ret, i, nr_shadow = 0, nr_clear = 0, nr_branch = 0;
+       uint8_t n = 0;
+
+       assert(nr_snapshot == ja->tree_depth + 1);
+
+       /*
+        * From the last internal level node going up, get the node
+        * lock, check if the node has only one child left. If it is the
+        * case, we continue iterating upward. When we reach a node
+        * which has more that one child left, we lock the parent, and
+        * proceed to the node deletion (removing its children too).
+        */
+       for (i = nr_snapshot - 2; i >= 1; i--) {
+               struct cds_ja_shadow_node *shadow_node;
+
+               shadow_node = rcuja_shadow_lookup_lock(ja->ht,
+                                       snapshot[i]);
+               if (!shadow_node) {
+                       ret = -EAGAIN;
+                       goto end;
+               }
+               shadow_nodes[nr_shadow++] = shadow_node;
+
+               /*
+                * Check if node has been removed between RCU
+                * lookup and lock acquisition.
+                */
+               assert(snapshot_ptr[i + 1]);
+               if (ja_node_ptr(*snapshot_ptr[i + 1])
+                               != ja_node_ptr(snapshot[i + 1])) {
+                       ret = -ENOENT;
+                       goto end;
+               }
+
+               assert(shadow_node->nr_child > 0);
+               if (shadow_node->nr_child == 1 && i > 1)
+                       nr_clear++;
+               nr_branch++;
+               if (shadow_node->nr_child > 1 || i == 1) {
+                       /* Lock parent and break */
+                       shadow_node = rcuja_shadow_lookup_lock(ja->ht,
+                                       snapshot[i - 1]);
+                       if (!shadow_node) {
+                               ret = -EAGAIN;
+                               goto end;
+                       }
+                       shadow_nodes[nr_shadow++] = shadow_node;
+
+                       /*
+                        * Check if node has been removed between RCU
+                        * lookup and lock acquisition.
+                        */
+                       assert(snapshot_ptr[i]);
+                       if (ja_node_ptr(*snapshot_ptr[i])
+                                       != ja_node_ptr(snapshot[i])) {
+                               ret = -ENOENT;
+                               goto end;
+                       }
+
+                       node_flag_ptr = snapshot_ptr[i + 1];
+                       n = snapshot_n[i + 1];
+                       parent_node_flag_ptr = snapshot_ptr[i];
+                       parent_node_flag = snapshot[i];
+
+                       if (i > 1) {
+                               /*
+                                * Lock parent's parent, in case we need
+                                * to recompact parent.
+                                */
+                               shadow_node = rcuja_shadow_lookup_lock(ja->ht,
+                                               snapshot[i - 2]);
+                               if (!shadow_node) {
+                                       ret = -EAGAIN;
+                                       goto end;
+                               }
+                               shadow_nodes[nr_shadow++] = shadow_node;
+
+                               /*
+                                * Check if node has been removed between RCU
+                                * lookup and lock acquisition.
+                                */
+                               assert(snapshot_ptr[i - 1]);
+                               if (ja_node_ptr(*snapshot_ptr[i - 1])
+                                               != ja_node_ptr(snapshot[i - 1])) {
+                                       ret = -ENOENT;
+                                       goto end;
+                               }
+                       }
+
+                       break;
+               }
+       }
+
+       /*
+        * At this point, we want to delete all nodes that are about to
+        * be removed from shadow_nodes (except the last one, which is
+        * either the root or the parent of the upmost node with 1
+        * child). OK to free lock here, because RCU read lock is held,
+        * and free only performed in call_rcu.
+        */
+
+       for (i = 0; i < nr_clear; i++) {
+               ret = rcuja_shadow_clear(ja->ht,
+                               shadow_nodes[i]->node_flag,
+                               shadow_nodes[i],
+                               RCUJA_SHADOW_CLEAR_FREE_NODE
+                               | RCUJA_SHADOW_CLEAR_FREE_LOCK);
+               assert(!ret);
+       }
+
+       iter_node_flag = parent_node_flag;
+       /* Remove from parent */
+       if (nr_branch < 1)
+               abort();        /* Should never happen. */
+       ret = ja_node_clear_ptr(ja,
+               node_flag_ptr,          /* Pointer to location to nullify */
+               &iter_node_flag,        /* Old new parent ptr in its parent */
+               shadow_nodes[nr_branch - 1],    /* of parent */
+               n, nr_branch - 1);
+       if (ret)
+               goto end;
+
+       dbg_printf("ja_detach_node: publish %p instead of %p\n",
+               iter_node_flag, *parent_node_flag_ptr);
+       /* Update address of parent ptr in its parent */
+       rcu_assign_pointer(*parent_node_flag_ptr, iter_node_flag);
+
+end:
+       for (i = 0; i < nr_shadow; i++)
+               rcuja_shadow_unlock(shadow_nodes[i]);
+       return ret;
+}
+
+static
+int ja_unchain_node(struct cds_ja *ja,
+               struct cds_ja_inode_flag *parent_node_flag,
+               struct cds_ja_inode_flag **node_flag_ptr,
+               struct cds_ja_inode_flag *node_flag,
+               struct cds_ja_node *node)
+{
+       struct cds_ja_shadow_node *shadow_node;
+       struct cds_ja_node *iter_node, **iter_node_ptr, **prev_node_ptr = NULL;
+       int ret = 0, count = 0, found = 0;
+
+       shadow_node = rcuja_shadow_lookup_lock(ja->ht, parent_node_flag);
+       if (!shadow_node)
+               return -EAGAIN;
+       if (ja_node_ptr(*node_flag_ptr) != ja_node_ptr(node_flag)) {
+               ret = -EAGAIN;
+               goto end;
+       }
+       /*
+        * Find the previous node's next pointer pointing to our node,
+        * so we can update it. Retry if another thread removed all but
+        * one of duplicates since check (this check was performed
+        * without lock). Ensure that the node we are about to remove is
+        * still in the list (while holding lock). No need for RCU
+        * traversal here since we hold the lock on the parent.
+        */
+       iter_node_ptr = (struct cds_ja_node **) node_flag_ptr;
+       iter_node = (struct cds_ja_node *) ja_node_ptr(node_flag);
+       cds_ja_for_each_duplicate(iter_node) {
+               count++;
+               if (iter_node == node) {
+                       prev_node_ptr = iter_node_ptr;
+                       found++;
+               }
+               iter_node_ptr = &iter_node->next;
+       }
+       assert(found <= 1);
+       if (!found || count == 1) {
+               ret = -EAGAIN;
+               goto end;
+       }
+       CMM_STORE_SHARED(*prev_node_ptr, node->next);
+       /*
+        * Validate that we indeed removed the node from linked list.
+        */
+       assert(ja_node_ptr(*node_flag_ptr) != (struct cds_ja_inode *) node);
+end:
+       rcuja_shadow_unlock(shadow_node);
+       return ret;
+}
+
+/*
+ * Called with RCU read lock held.
+ */
+int cds_ja_del(struct cds_ja *ja, uint64_t key,
+               struct cds_ja_node *node)
+{
+       unsigned int tree_depth, i;
+       struct cds_ja_inode_flag *snapshot[JA_MAX_DEPTH];
+       struct cds_ja_inode_flag **snapshot_ptr[JA_MAX_DEPTH];
+       uint8_t snapshot_n[JA_MAX_DEPTH];
+       struct cds_ja_inode_flag *node_flag;
+       struct cds_ja_inode_flag **prev_node_flag_ptr,
+               **node_flag_ptr;
+       int nr_snapshot;
+       int ret;
+
+       if (caa_unlikely(key > ja->key_max || key == UINT64_MAX))
+               return -EINVAL;
+       tree_depth = ja->tree_depth;
+
+retry:
+       nr_snapshot = 0;
+       dbg_printf("cds_ja_del attempt: key %" PRIu64 ", node %p\n",
+               key, node);
+
+       /* snapshot for level 0 is only for shadow node lookup */
+       snapshot_n[0] = 0;
+       snapshot_n[1] = 0;
+       snapshot_ptr[nr_snapshot] = NULL;
+       snapshot[nr_snapshot++] = (struct cds_ja_inode_flag *) &ja->root;
+       node_flag = rcu_dereference(ja->root);
+       prev_node_flag_ptr = &ja->root;
+       node_flag_ptr = &ja->root;
+
+       /* Iterate on all internal levels */
+       for (i = 1; i < tree_depth; i++) {
+               uint8_t iter_key;
+
+               dbg_printf("cds_ja_del iter node_flag %p\n",
+                               node_flag);
+               if (!ja_node_ptr(node_flag)) {
+                       return -ENOENT;
+               }
+               iter_key = (uint8_t) (key >> (JA_BITS_PER_BYTE * (tree_depth - i - 1)));
+               snapshot_n[nr_snapshot + 1] = iter_key;
+               snapshot_ptr[nr_snapshot] = prev_node_flag_ptr;
+               snapshot[nr_snapshot++] = node_flag;
+               node_flag = ja_node_get_nth(node_flag,
+                       &node_flag_ptr,
+                       iter_key);
+               if (node_flag)
+                       prev_node_flag_ptr = node_flag_ptr;
+               dbg_printf("cds_ja_del iter key lookup %u finds node_flag %p, prev_node_flag_ptr %p\n",
+                               (unsigned int) iter_key, node_flag,
+                               prev_node_flag_ptr);
+       }
+       /*
+        * We reached bottom of tree, try to find the node we are trying
+        * to remove. Fail if we cannot find it.
+        */
+       if (!ja_node_ptr(node_flag)) {
+               dbg_printf("cds_ja_del: no node found for key %" PRIu64 "\n",
+                               key);
+               return -ENOENT;
+       } else {
+               struct cds_ja_node *iter_node, *match = NULL;
+               int count = 0;
+
+               iter_node = (struct cds_ja_node *) ja_node_ptr(node_flag);
+               cds_ja_for_each_duplicate_rcu(iter_node) {
+                       dbg_printf("cds_ja_del: compare %p with iter_node %p\n", node, iter_node);
+                       if (iter_node == node)
+                               match = iter_node;
+                       count++;
+               }
+
+               if (!match) {
+                       dbg_printf("cds_ja_del: no node match for node %p key %" PRIu64 "\n", node, key);
+                       return -ENOENT;
+               }
+               assert(count > 0);
+               if (count == 1) {
+                       /*
+                        * Removing last of duplicates. Last snapshot
+                        * does not have a shadow node (external leafs).
+                        */
+                       snapshot_ptr[nr_snapshot] = prev_node_flag_ptr;
+                       snapshot[nr_snapshot++] = node_flag;
+                       ret = ja_detach_node(ja, snapshot, snapshot_ptr,
+                                       snapshot_n, nr_snapshot, key, node);
+               } else {
+                       ret = ja_unchain_node(ja, snapshot[nr_snapshot - 1],
+                               node_flag_ptr, node_flag, match);
+               }
+       }
+       /*
+        * Explanation of -ENOENT handling: caused by concurrent delete
+        * between RCU lookup and actual removal. Need to re-do the
+        * lookup and removal attempt.
+        */
+       if (ret == -EAGAIN || ret == -ENOENT)
+               goto retry;
+       return ret;
+}
+
+struct cds_ja *_cds_ja_new(unsigned int key_bits,
+               const struct rcu_flavor_struct *flavor)
+{
+       struct cds_ja *ja;
+       int ret;
+       struct cds_ja_shadow_node *root_shadow_node;
+
+       ja = calloc(sizeof(*ja), 1);
+       if (!ja)
+               goto ja_error;
+
+       switch (key_bits) {
+       case 8:
+       case 16:
+       case 24:
+       case 32:
+       case 40:
+       case 48:
+       case 56:
+               ja->key_max = (1ULL << key_bits) - 1;
+               break;
+       case 64:
+               ja->key_max = UINT64_MAX;
+               break;
+       default:
+               goto check_error;
+       }
+
+       /* ja->root is NULL */
+       /* tree_depth 0 is for pointer to root node */
+       ja->tree_depth = (key_bits >> JA_LOG2_BITS_PER_BYTE) + 1;
+       assert(ja->tree_depth <= JA_MAX_DEPTH);
+       ja->ht = rcuja_create_ht(flavor);
+       if (!ja->ht)
+               goto ht_error;
+
+       /*
+        * Note: we should not free this node until judy array destroy.
+        */
+       root_shadow_node = rcuja_shadow_set(ja->ht,
+                       (struct cds_ja_inode_flag *) &ja->root,
+                       NULL, ja, 0);
+       if (!root_shadow_node) {
+               ret = -ENOMEM;
+               goto ht_node_error;
+       }
+
+       return ja;
+
+ht_node_error:
+       ret = rcuja_delete_ht(ja->ht);
+       assert(!ret);
+ht_error:
+check_error:
+       free(ja);
+ja_error:
+       return NULL;
+}
+
+static
+void print_debug_fallback_distribution(struct cds_ja *ja)
+{
+       int i;
+
+       fprintf(stderr, "Fallback node distribution:\n");
+       for (i = 0; i < JA_ENTRY_PER_NODE; i++) {
+               if (!ja->node_fallback_count_distribution[i])
+                       continue;
+               fprintf(stderr, "       %3u: %4lu\n",
+                       i, ja->node_fallback_count_distribution[i]);
+       }
+}
+
+static
+int ja_final_checks(struct cds_ja *ja)
+{
+       double fallback_ratio;
+       unsigned long na, nf, nr_fallback;
+       int ret = 0;
+
+       if (!ja_debug_counters())
+               return 0;
+
+       fallback_ratio = (double) uatomic_read(&ja->nr_fallback);
+       fallback_ratio /= (double) uatomic_read(&ja->nr_nodes_allocated);
+       nr_fallback = uatomic_read(&ja->nr_fallback);
+       if (nr_fallback)
+               fprintf(stderr,
+                       "[warning] RCU Judy Array used %lu fallback node(s) (ratio: %g)\n",
+                       uatomic_read(&ja->nr_fallback),
+                       fallback_ratio);
+
+       na = uatomic_read(&ja->nr_nodes_allocated);
+       nf = uatomic_read(&ja->nr_nodes_freed);
+       dbg_printf("Nodes allocated: %lu, Nodes freed: %lu.\n", na, nf);
+       if (nr_fallback)
+               print_debug_fallback_distribution(ja);
+
+       if (na != nf) {
+               fprintf(stderr, "[error] Judy array leaked %ld nodes. Allocated: %lu, freed: %lu.\n",
+                       (long) na - nf, na, nf);
+               ret = -1;
+       }
+       return ret;
+}
+
+/*
+ * There should be no more concurrent add, delete, nor look-up performed
+ * on the Judy array while it is being destroyed (ensured by the
+ * caller).
+ */
+int cds_ja_destroy(struct cds_ja *ja)
+{
+       const struct rcu_flavor_struct *flavor;
+       int ret;
+
+       flavor = cds_lfht_rcu_flavor(ja->ht);
+       rcuja_shadow_prune(ja->ht,
+               RCUJA_SHADOW_CLEAR_FREE_NODE | RCUJA_SHADOW_CLEAR_FREE_LOCK);
+       flavor->thread_offline();
+       ret = rcuja_delete_ht(ja->ht);
+       if (ret)
+               return ret;
+
+       /* Wait for in-flight call_rcu free to complete. */
+       flavor->barrier();
+
+       flavor->thread_online();
+       ret = ja_final_checks(ja);
+       free(ja);
+       return ret;
+}
index aa7a00c2171262fe324c1a4169f9b605018225d0..c5c6198ed255d1af76ac57540a81133b1f98b218 100644 (file)
@@ -11,7 +11,8 @@ noinst_PROGRAMS = test_urcu_fork \
        rcutorture_urcu_signal \
        rcutorture_urcu_mb \
        rcutorture_urcu_bp \
-       rcutorture_urcu_qsbr
+       rcutorture_urcu_qsbr \
+       test_urcu_ja
 
 noinst_HEADERS = rcutorture.h
 
@@ -47,6 +48,10 @@ rcutorture_urcu_bp_SOURCES = urcutorture.c
 rcutorture_urcu_bp_CFLAGS = -DRCU_BP $(AM_CFLAGS)
 rcutorture_urcu_bp_LDADD = $(URCU_BP_LIB) $(TAP_LIB)
 
+test_urcu_ja_SOURCES = test_urcu_ja.c test_urcu_ja.h
+test_urcu_ja_CFLAGS = -DRCU_MEMBARRIER $(AM_CFLAGS)
+test_urcu_ja_LDADD = $(URCU_QSBR_LIB) $(URCU_CDS_LIB) $(TAP_LIB)
+
 urcutorture.c: ../common/api.h
 
 all-local:
diff --git a/tests/regression/test_urcu_ja.c b/tests/regression/test_urcu_ja.c
new file mode 100644 (file)
index 0000000..0141648
--- /dev/null
@@ -0,0 +1,1339 @@
+/*
+ * test_urcu_ja.c
+ *
+ * Userspace RCU library - test program
+ *
+ * Copyright 2009-2012 - Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include "test_urcu_ja.h"
+#include "../common/debug-yield.h"
+#include <inttypes.h>
+#include <stdint.h>
+
+DEFINE_URCU_TLS(unsigned int, rand_lookup);
+DEFINE_URCU_TLS(unsigned long, nr_add);
+DEFINE_URCU_TLS(unsigned long, nr_addexist);
+DEFINE_URCU_TLS(unsigned long, nr_del);
+DEFINE_URCU_TLS(unsigned long, nr_delnoent);
+DEFINE_URCU_TLS(unsigned long, lookup_fail);
+DEFINE_URCU_TLS(unsigned long, lookup_ok);
+
+struct cds_ja *test_ja;
+
+volatile int test_go, test_stop;
+
+unsigned long wdelay;
+
+unsigned long duration;
+
+/* read-side C.S. duration, in loops */
+unsigned long rduration;
+
+unsigned long init_populate;
+int add_only;
+
+unsigned long init_pool_offset, lookup_pool_offset, write_pool_offset;
+unsigned long init_pool_size = DEFAULT_RAND_POOL,
+       lookup_pool_size = DEFAULT_RAND_POOL,
+       write_pool_size = DEFAULT_RAND_POOL;
+int validate_lookup;
+int sanity_test;
+unsigned int key_bits = 32;
+
+int count_pipe[2];
+
+int verbose_mode;
+
+unsigned int cpu_affinities[NR_CPUS];
+unsigned int next_aff = 0;
+int use_affinity = 0;
+
+pthread_mutex_t affinity_mutex = PTHREAD_MUTEX_INITIALIZER;
+
+DEFINE_URCU_TLS(unsigned long long, nr_writes);
+DEFINE_URCU_TLS(unsigned long long, nr_reads);
+
+unsigned int nr_readers;
+unsigned int nr_writers;
+
+static unsigned int add_ratio = 50;
+static uint64_t key_mul = 1ULL;
+
+static int add_unique, add_replace;
+
+static pthread_mutex_t rcu_copy_mutex = PTHREAD_MUTEX_INITIALIZER;
+
+static int leak_detection;
+static unsigned long test_nodes_allocated, test_nodes_freed;
+
+void set_affinity(void)
+{
+       cpu_set_t mask;
+       int cpu;
+       int ret;
+
+       if (!use_affinity)
+               return;
+
+#if HAVE_SCHED_SETAFFINITY
+       ret = pthread_mutex_lock(&affinity_mutex);
+       if (ret) {
+               perror("Error in pthread mutex lock");
+               exit(-1);
+       }
+       cpu = cpu_affinities[next_aff++];
+       ret = pthread_mutex_unlock(&affinity_mutex);
+       if (ret) {
+               perror("Error in pthread mutex unlock");
+               exit(-1);
+       }
+       CPU_ZERO(&mask);
+       CPU_SET(cpu, &mask);
+#if SCHED_SETAFFINITY_ARGS == 2
+       sched_setaffinity(0, &mask);
+#else
+        sched_setaffinity(0, sizeof(mask), &mask);
+#endif
+#endif /* HAVE_SCHED_SETAFFINITY */
+}
+
+void rcu_copy_mutex_lock(void)
+{
+       int ret;
+       ret = pthread_mutex_lock(&rcu_copy_mutex);
+       if (ret) {
+               perror("Error in pthread mutex lock");
+               exit(-1);
+       }
+}
+
+void rcu_copy_mutex_unlock(void)
+{
+       int ret;
+
+       ret = pthread_mutex_unlock(&rcu_copy_mutex);
+       if (ret) {
+               perror("Error in pthread mutex unlock");
+               exit(-1);
+       }
+}
+
+static
+struct ja_test_node *node_alloc(void)
+{
+       struct ja_test_node *node;
+
+       node = calloc(sizeof(*node), 1);
+       if (leak_detection && node)
+               uatomic_inc(&test_nodes_allocated);
+       return node;
+}
+
+static
+void free_test_node(struct ja_test_node *node)
+{
+       poison_free(node);
+       if (leak_detection)
+               uatomic_inc(&test_nodes_freed);
+}
+
+static
+void free_test_node_cb(struct rcu_head *head)
+{
+       struct ja_test_node *node =
+               caa_container_of(head, struct ja_test_node, head);
+       free_test_node(node);
+}
+
+static
+void rcu_free_test_node(struct ja_test_node *test_node)
+{
+       call_rcu(&test_node->head, free_test_node_cb);
+}
+
+static
+void free_node(struct cds_ja_node *node)
+{
+       struct ja_test_node *test_node = to_test_node(node);
+
+       free_test_node(test_node);
+}
+
+#if 0
+static
+void test_delete_all_nodes(struct cds_lfht *ht)
+{
+       struct cds_lfht_iter iter;
+       struct lfht_test_node *node;
+       unsigned long count = 0;
+
+       cds_lfht_for_each_entry(ht, &iter, node, node) {
+               int ret;
+
+               ret = cds_lfht_del(test_ht, cds_lfht_iter_get_node(&iter));
+               assert(!ret);
+               call_rcu(&node->head, free_node_cb);
+               count++;
+       }
+       printf("deleted %lu nodes.\n", count);
+}
+#endif
+
+void show_usage(int argc, char **argv)
+{
+       printf("Usage : %s nr_readers nr_writers duration (s)\n", argv[0]);
+#ifdef DEBUG_YIELD
+       printf("        [-r] [-w] (yield reader and/or writer)\n");
+#endif
+       printf("        [-d delay] (writer period (us))\n");
+       printf("        [-c duration] (reader C.S. duration (in loops))\n");
+       printf("        [-v] (verbose output)\n");
+       printf("        [-a cpu#] [-a cpu#]... (affinity)\n");
+       printf("        [-u] Add unique keys.\n");
+       printf("        [-s] Replace existing keys.\n");
+printf("        [not -u nor -s] Add entries (supports redundant keys).\n");
+       printf("        [-r ratio] Add ratio (in %% of add+removal).\n");
+       printf("        [-k] Populate init nodes.\n");
+       printf("        [-R offset] Lookup pool offset.\n");
+       printf("        [-S offset] Write pool offset.\n");
+       printf("        [-T offset] Init pool offset.\n");
+       printf("        [-M size] Lookup pool size.\n");
+       printf("        [-N size] Write pool size.\n");
+       printf("        [-O size] Init pool size.\n");
+       printf("        [-V] Validate lookups of init values (use with filled init pool, same lookup range, with different write range).\n");
+       printf("        [-t] Do sanity test.\n");
+       printf("        [-B] Key bits for multithread test (default: 32).\n");
+       printf("        [-m factor] Key multiplication factor.\n");
+       printf("        [-l] Memory leak detection.\n");
+       printf("\n\n");
+}
+
+static
+int test_free_all_nodes(struct cds_ja *ja)
+{
+       uint64_t key;
+       struct cds_ja_node *ja_node;
+       int ret = 0;
+
+       rcu_read_lock();
+       cds_ja_for_each_key_rcu(test_ja, key, ja_node) {
+               struct cds_ja_node *tmp_node;
+
+               cds_ja_for_each_duplicate_safe_rcu(ja_node, tmp_node) {
+                       ret = cds_ja_del(test_ja, key, ja_node);
+                       if (ret) {
+                               fprintf(stderr, "Error (%d) removing node %" PRIu64 "\n", ret, key);
+                               goto end;
+                       }
+                       /* Alone using Judy array, OK to free now */
+                       free_node(ja_node);
+               }
+       }
+end:
+       rcu_read_unlock();
+       return ret;
+}
+
+static
+int test_8bit_key(void)
+{
+       int ret, i;
+       uint64_t key;
+       uint64_t ka[] = { 5, 17, 100, 222 };
+       uint64_t ka_test_offset = 5;
+       struct cds_ja_node *ja_node;
+
+       /* Test with 8-bit key */
+       test_ja = cds_ja_new(8);
+       if (!test_ja) {
+               printf("Error allocating judy array.\n");
+               return -1;
+       }
+
+       /* Add keys */
+       printf("Test #1: add keys (8-bit).\n");
+       for (key = 0; key < 200; key++) {
+               struct ja_test_node *node = node_alloc();
+
+               ja_test_node_init(node, key);
+               rcu_read_lock();
+               ret = cds_ja_add(test_ja, key, &node->node);
+               rcu_read_unlock();
+               if (ret) {
+                       fprintf(stderr, "Error (%d) adding node %" PRIu64 "\n",
+                               ret, key);
+                       assert(0);
+               }
+       }
+       printf("OK\n");
+
+       printf("Test #2: successful key lookup (8-bit).\n");
+       for (key = 0; key < 200; key++) {
+               rcu_read_lock();
+               ja_node = cds_ja_lookup(test_ja, key);
+               if (!ja_node) {
+                       fprintf(stderr, "Error lookup node %" PRIu64 "\n", key);
+                       assert(0);
+               }
+               rcu_read_unlock();
+       }
+       printf("OK\n");
+       printf("Test #3: unsuccessful key lookup (8-bit).\n");
+       for (key = 200; key < 240; key++) {
+               rcu_read_lock();
+               ja_node = cds_ja_lookup(test_ja, key);
+               if (ja_node) {
+                       fprintf(stderr,
+                               "Error unexpected lookup node %" PRIu64 "\n",
+                               key);
+                       assert(0);
+               }
+               rcu_read_unlock();
+       }
+       printf("OK\n");
+       printf("Test #4: remove keys (8-bit).\n");
+       for (key = 0; key < 200; key++) {
+               struct ja_test_node *node;
+
+               rcu_read_lock();
+               ja_node = cds_ja_lookup(test_ja, key);
+               if (!ja_node) {
+                       fprintf(stderr, "Error lookup node %" PRIu64 "\n", key);
+                       assert(0);
+               }
+               node = caa_container_of(ja_node, struct ja_test_node, node);
+               ret = cds_ja_del(test_ja, key, &node->node);
+               if (ret) {
+                       fprintf(stderr, "Error (%d) removing node %" PRIu64 "\n", ret, key);
+                       assert(0);
+               }
+               rcu_free_test_node(node);
+               ja_node = cds_ja_lookup(test_ja, key);
+               if (ja_node) {
+                       fprintf(stderr, "Error lookup %" PRIu64 ": %p (after delete) failed. Node is not expected.\n", key, ja_node);
+                       assert(0);
+               }
+               rcu_read_unlock();
+       }
+       printf("OK\n");
+
+       printf("Test #5: lookup below/above equal (8-bit).\n");
+
+       for (i = 0; i < CAA_ARRAY_SIZE(ka); i++) {
+               struct ja_test_node *node = node_alloc();
+
+               key = ka[i];
+               ja_test_node_init(node, key);
+               rcu_read_lock();
+               ret = cds_ja_add(test_ja, key, &node->node);
+               rcu_read_unlock();
+               if (ret) {
+                       fprintf(stderr, "Error (%d) adding node %" PRIu64 "\n",
+                               ret, key);
+                       assert(0);
+               }
+       }
+
+       for (i = 0; i < CAA_ARRAY_SIZE(ka); i++) {
+               struct ja_test_node *node;
+               uint64_t result_key;
+
+               key = ka[i] + ka_test_offset;
+               rcu_read_lock();
+               ja_node = cds_ja_lookup_below_equal(test_ja, key, &result_key);
+               if (!ja_node) {
+                       fprintf(stderr, "Error lookup below equal. Cannot find expected key %" PRIu64" below or equal to %" PRIu64 ".\n",
+                               ka[i], key);
+                       assert(0);
+               }
+               node = caa_container_of(ja_node, struct ja_test_node, node);
+               if (node->key != ka[i] || result_key != ka[i]) {
+                       fprintf(stderr, "Error lookup below equal. Expecting key %" PRIu64 " below or equal to %" PRIu64 ", but found %" PRIu64 "/%" PRIu64" instead.\n",
+                               ka[i], key, node->key, result_key);
+                       assert(0);
+               }
+               rcu_read_unlock();
+       }
+
+       for (i = 0; i < CAA_ARRAY_SIZE(ka); i++) {
+               struct ja_test_node *node;
+               uint64_t result_key;
+
+               key = ka[i] - ka_test_offset;
+               rcu_read_lock();
+               ja_node = cds_ja_lookup_above_equal(test_ja, key, &result_key);
+               if (!ja_node) {
+                       fprintf(stderr, "Error lookup above equal. Cannot find expected key %" PRIu64" below or equal to %" PRIu64 ".\n",
+                               ka[i], key);
+                       assert(0);
+               }
+               node = caa_container_of(ja_node, struct ja_test_node, node);
+               if (node->key != ka[i] || result_key != ka[i]) {
+                       fprintf(stderr, "Error lookup above equal. Expecting key %" PRIu64 " below or equal to %" PRIu64 ", but found %" PRIu64 "/%" PRIu64" instead.\n",
+                               ka[i], key, node->key, result_key);
+                       assert(0);
+               }
+               rcu_read_unlock();
+       }
+
+       for (i = 0; i < CAA_ARRAY_SIZE(ka); i++) {
+               struct ja_test_node *node;
+               uint64_t result_key;
+
+               key = ka[i];    /* without offset */
+               rcu_read_lock();
+               ja_node = cds_ja_lookup_below_equal(test_ja, key, &result_key);
+               if (!ja_node) {
+                       fprintf(stderr, "Error lookup below equal. Cannot find expected key %" PRIu64" below or equal to %" PRIu64 ".\n",
+                               ka[i], key);
+                       assert(0);
+               }
+               node = caa_container_of(ja_node, struct ja_test_node, node);
+               if (node->key != ka[i] || result_key != ka[i]) {
+                       fprintf(stderr, "Error lookup below equal. Expecting key %" PRIu64 " below or equal to %" PRIu64 ", but found %" PRIu64 "/%" PRIu64" instead.\n",
+                               ka[i], key, node->key, result_key);
+                       assert(0);
+               }
+
+               ja_node = cds_ja_lookup_above_equal(test_ja, key, &result_key);
+               if (!ja_node) {
+                       fprintf(stderr, "Error lookup above equal. Cannot find expected key %" PRIu64" below or equal to %" PRIu64 ".\n",
+                               ka[i], key);
+                       assert(0);
+               }
+               node = caa_container_of(ja_node, struct ja_test_node, node);
+               if (node->key != ka[i] || result_key != ka[i]) {
+                       fprintf(stderr, "Error lookup above equal. Expecting key %" PRIu64 " below or equal to %" PRIu64 ", but found %" PRIu64 "/%" PRIu64" instead.\n",
+                               ka[i], key, node->key, result_key);
+                       assert(0);
+               }
+               rcu_read_unlock();
+       }
+
+       printf("OK\n");
+
+       ret = test_free_all_nodes(test_ja);
+       if (ret) {
+               fprintf(stderr, "Error freeing all nodes\n");
+               return -1;
+       }
+
+       ret = cds_ja_destroy(test_ja);
+       if (ret) {
+               fprintf(stderr, "Error destroying judy array\n");
+               return -1;
+       }
+       return 0;
+}
+
+static
+int test_16bit_key(void)
+{
+       int ret, i;
+       uint64_t key;
+       uint64_t ka[] = { 105, 206, 4000, 4111, 59990, 65435 };
+       uint64_t ka_test_offset = 100;
+       struct cds_ja_node *ja_node;
+
+       /* Test with 16-bit key */
+       test_ja = cds_ja_new(16);
+       if (!test_ja) {
+               printf("Error allocating judy array.\n");
+               return -1;
+       }
+
+       /* Add keys */
+       printf("Test #1: add keys (16-bit).\n");
+       for (key = 0; key < 10000; key++) {
+       //for (key = 0; key < 65536; key+=256) {
+               struct ja_test_node *node = node_alloc();
+
+               ja_test_node_init(node, key);
+               rcu_read_lock();
+               ret = cds_ja_add(test_ja, key, &node->node);
+               rcu_read_unlock();
+               if (ret) {
+                       fprintf(stderr, "Error (%d) adding node %" PRIu64 "\n",
+                               ret, key);
+                       assert(0);
+               }
+       }
+       printf("OK\n");
+
+       printf("Test #2: successful key lookup (16-bit).\n");
+       for (key = 0; key < 10000; key++) {
+       //for (key = 0; key < 65536; key+=256) {
+               struct cds_ja_node *ja_node;
+
+               rcu_read_lock();
+               ja_node = cds_ja_lookup(test_ja, key);
+               if (!ja_node) {
+                       fprintf(stderr, "Error lookup node %" PRIu64 "\n", key);
+                       assert(0);
+               }
+               rcu_read_unlock();
+       }
+       printf("OK\n");
+       printf("Test #3: unsuccessful key lookup (16-bit).\n");
+       for (key = 11000; key <= 11002; key++) {
+               struct cds_ja_node *ja_node;
+
+               rcu_read_lock();
+               ja_node = cds_ja_lookup(test_ja, key);
+               if (ja_node) {
+                       fprintf(stderr,
+                               "Error unexpected lookup node %" PRIu64 "\n",
+                               key);
+                       assert(0);
+               }
+               rcu_read_unlock();
+       }
+       printf("OK\n");
+       printf("Test #4: remove keys (16-bit).\n");
+       for (key = 0; key < 10000; key++) {
+       //for (key = 0; key < 65536; key+=256) {
+               struct ja_test_node *node;
+
+               rcu_read_lock();
+               ja_node = cds_ja_lookup(test_ja, key);
+               if (!ja_node) {
+                       fprintf(stderr, "Error lookup node %" PRIu64 "\n", key);
+                       assert(0);
+               }
+               node = caa_container_of(ja_node, struct ja_test_node, node);
+               ret = cds_ja_del(test_ja, key, &node->node);
+               if (ret) {
+                       fprintf(stderr, "Error (%d) removing node %" PRIu64 "\n", ret, key);
+                       assert(0);
+               }
+               rcu_free_test_node(node);
+               ja_node = cds_ja_lookup(test_ja, key);
+               if (ja_node) {
+                       fprintf(stderr, "Error lookup %" PRIu64 ": %p (after delete) failed. Node is not expected.\n", key, ja_node);
+                       assert(0);
+               }
+               rcu_read_unlock();
+       }
+       printf("OK\n");
+
+       printf("Test #5: lookup below/above equal (16-bit).\n");
+
+       for (i = 0; i < CAA_ARRAY_SIZE(ka); i++) {
+               struct ja_test_node *node = node_alloc();
+
+               key = ka[i];
+               ja_test_node_init(node, key);
+               rcu_read_lock();
+               ret = cds_ja_add(test_ja, key, &node->node);
+               rcu_read_unlock();
+               if (ret) {
+                       fprintf(stderr, "Error (%d) adding node %" PRIu64 "\n",
+                               ret, key);
+                       assert(0);
+               }
+       }
+
+       for (i = 0; i < CAA_ARRAY_SIZE(ka); i++) {
+               struct ja_test_node *node;
+               uint64_t result_key;
+
+               key = ka[i] + ka_test_offset;
+               rcu_read_lock();
+               ja_node = cds_ja_lookup_below_equal(test_ja, key, &result_key);
+               if (!ja_node) {
+                       fprintf(stderr, "Error lookup below equal. Cannot find expected key %" PRIu64" below or equal to %" PRIu64 ".\n",
+                               ka[i], key);
+                       assert(0);
+               }
+               node = caa_container_of(ja_node, struct ja_test_node, node);
+               if (node->key != ka[i] || result_key != ka[i]) {
+                       fprintf(stderr, "Error lookup below equal. Expecting key %" PRIu64 " below or equal to %" PRIu64 ", but found %" PRIu64 "/%" PRIu64" instead.\n",
+                               ka[i], key, node->key, result_key);
+                       assert(0);
+               }
+               rcu_read_unlock();
+       }
+
+       for (i = 0; i < CAA_ARRAY_SIZE(ka); i++) {
+               struct ja_test_node *node;
+               uint64_t result_key;
+
+               key = ka[i] - ka_test_offset;
+               rcu_read_lock();
+               ja_node = cds_ja_lookup_above_equal(test_ja, key, &result_key);
+               if (!ja_node) {
+                       fprintf(stderr, "Error lookup above equal. Cannot find expected key %" PRIu64" above or equal to %" PRIu64 ".\n",
+                               ka[i], key);
+                       assert(0);
+               }
+               node = caa_container_of(ja_node, struct ja_test_node, node);
+               if (node->key != ka[i] || result_key != ka[i]) {
+                       fprintf(stderr, "Error lookup above equal. Expecting key %" PRIu64 " above or equal to %" PRIu64 ", but found %" PRIu64 "/%" PRIu64" instead.\n",
+                               ka[i], key, node->key, result_key);
+                       assert(0);
+               }
+               rcu_read_unlock();
+       }
+
+       for (i = 0; i < CAA_ARRAY_SIZE(ka); i++) {
+               struct ja_test_node *node;
+               uint64_t result_key;
+
+               key = ka[i];    /* without offset */
+               rcu_read_lock();
+               ja_node = cds_ja_lookup_below_equal(test_ja, key, &result_key);
+               if (!ja_node) {
+                       fprintf(stderr, "Error lookup below equal. Cannot find expected key %" PRIu64" below or equal to %" PRIu64 ".\n",
+                               ka[i], key);
+                       assert(0);
+               }
+               node = caa_container_of(ja_node, struct ja_test_node, node);
+               if (node->key != ka[i] || result_key != ka[i]) {
+                       fprintf(stderr, "Error lookup below equal. Expecting key %" PRIu64 " below or equal to %" PRIu64 ", but found %" PRIu64 "/%" PRIu64" instead.\n",
+                               ka[i], key, node->key, result_key);
+                       assert(0);
+               }
+
+               ja_node = cds_ja_lookup_above_equal(test_ja, key, &result_key);
+               if (!ja_node) {
+                       fprintf(stderr, "Error lookup above equal. Cannot find expected key %" PRIu64" above or equal to %" PRIu64 ".\n",
+                               ka[i], key);
+                       assert(0);
+               }
+               node = caa_container_of(ja_node, struct ja_test_node, node);
+               if (node->key != ka[i] || result_key != ka[i]) {
+                       fprintf(stderr, "Error lookup above equal. Expecting key %" PRIu64 " above or equal to %" PRIu64 ", but found %" PRIu64 "/%" PRIu64" instead.\n",
+                               ka[i], key, node->key, result_key);
+                       assert(0);
+               }
+               rcu_read_unlock();
+       }
+
+       printf("OK\n");
+
+       ret = test_free_all_nodes(test_ja);
+       if (ret) {
+               fprintf(stderr, "Error freeing all nodes\n");
+               return -1;
+       }
+
+       ret = cds_ja_destroy(test_ja);
+       if (ret) {
+               fprintf(stderr, "Error destroying judy array\n");
+               return -1;
+       }
+       return 0;
+}
+
+/*
+ * nr_dup is number of nodes per key.
+ */
+static
+int test_sparse_key(unsigned int bits, int nr_dup)
+{
+       uint64_t key, max_key;
+       int zerocount, i, ret;
+       struct cds_ja_node *ja_node;
+
+       if (bits == 64)
+               max_key = UINT64_MAX;
+       else
+               max_key = (1ULL << bits) - 1;
+
+       printf("Sparse key test begins for %u-bit keys\n", bits);
+       /* Test with 16-bit key */
+       test_ja = cds_ja_new(bits);
+       if (!test_ja) {
+               printf("Error allocating judy array.\n");
+               return -1;
+       }
+
+       /* Add keys */
+       printf("Test #1: add keys (%u-bit).\n", bits);
+       for (i = 0; i < nr_dup; i++) {
+               zerocount = 0;
+               for (key = 0; key <= max_key && (key != 0 || zerocount < 1); key += 1ULL << (bits - 8)) {
+                       struct ja_test_node *node = node_alloc();
+
+                       ja_test_node_init(node, key);
+                       rcu_read_lock();
+                       ret = cds_ja_add(test_ja, key, &node->node);
+                       rcu_read_unlock();
+                       if (ret) {
+                               fprintf(stderr, "Error (%d) adding node %" PRIu64 "\n",
+                                       ret, key);
+                               assert(0);
+                       }
+                       if (key == 0)
+                               zerocount++;
+               }
+       }
+       printf("OK\n");
+
+       printf("Test #2: successful key lookup (%u-bit).\n", bits);
+       zerocount = 0;
+       for (key = 0; key <= max_key && (key != 0 || zerocount < 1); key += 1ULL << (bits - 8)) {
+               struct ja_test_node *node;
+               int count = 0;
+
+               rcu_read_lock();
+               ja_node = cds_ja_lookup(test_ja, key);
+               if (!ja_node) {
+                       fprintf(stderr, "Error lookup node %" PRIu64 "\n", key);
+                       assert(0);
+               }
+               cds_ja_for_each_duplicate_rcu(ja_node) {
+                       count++;
+               }
+               if (count != nr_dup) {
+                       fprintf(stderr, "Unexpected number of match for key %" PRIu64 ", expected %d, got %d.\n", key, nr_dup, count);
+               }
+               rcu_read_unlock();
+               if (key == 0)
+                       zerocount++;
+       }
+       printf("OK\n");
+       if (bits > 8) {
+               printf("Test #3: unsuccessful key lookup (%u-bit).\n", bits);
+               zerocount = 0;
+               for (key = 0; key <= max_key && (key != 0 || zerocount < 1); key += 1ULL << (bits - 8)) {
+                       rcu_read_lock();
+                       ja_node = cds_ja_lookup(test_ja, key + 42);
+                       if (ja_node) {
+                               fprintf(stderr,
+                                       "Error unexpected lookup node %" PRIu64 "\n",
+                                       key + 42);
+                               assert(0);
+                       }
+                       rcu_read_unlock();
+                       if (key == 0)
+                               zerocount++;
+               }
+               printf("OK\n");
+       }
+       printf("Test #4: remove keys (%u-bit).\n", bits);
+       zerocount = 0;
+       for (key = 0; key <= max_key && (key != 0 || zerocount < 1); key += 1ULL << (bits - 8)) {
+               int count = 0;
+
+               rcu_read_lock();
+               ja_node = cds_ja_lookup(test_ja, key);
+
+               cds_ja_for_each_duplicate_rcu(ja_node) {
+                       struct cds_ja_node *test_ja_node;
+                       struct ja_test_node *node;
+
+                       count++;
+                       node = caa_container_of(ja_node,
+                               struct ja_test_node, node);
+                       ret = cds_ja_del(test_ja, key, &node->node);
+                       if (ret) {
+                               fprintf(stderr, "Error (%d) removing node %" PRIu64 "\n", ret, key);
+                               assert(0);
+                       }
+                       rcu_free_test_node(node);
+                       test_ja_node = cds_ja_lookup(test_ja, key);
+                       if (count < nr_dup && !test_ja_node) {
+                               fprintf(stderr, "Error: no node found after deletion of some nodes of a key\n");
+                               assert(0);
+                       }
+               }
+               ja_node = cds_ja_lookup(test_ja, key);
+               if (ja_node) {
+                       fprintf(stderr, "Error lookup %" PRIu64 ": %p (after delete) failed. Node is not expected.\n", key, ja_node);
+                       assert(0);
+               }
+               rcu_read_unlock();
+               if (key == 0)
+                       zerocount++;
+       }
+       printf("OK\n");
+
+       ret = test_free_all_nodes(test_ja);
+       if (ret) {
+               fprintf(stderr, "Error freeing all nodes\n");
+               return -1;
+       }
+
+       ret = cds_ja_destroy(test_ja);
+       if (ret) {
+               fprintf(stderr, "Error destroying judy array\n");
+               return -1;
+       }
+       printf("Test ends\n");
+
+       return 0;
+}
+
+static
+int do_sanity_test(void)
+{
+       int i, j, ret;
+
+       printf("Sanity test start.\n");
+
+       for (i = 0; i < 3; i++) {
+               ret = test_8bit_key();
+               if (ret) {
+                       return ret;
+               }
+               rcu_quiescent_state();
+       }
+       ret = test_16bit_key();
+       if (ret) {
+               return ret;
+       }
+       rcu_quiescent_state();
+
+       /* key bits */
+       for (i = 8; i <= 64; i *= 2) {
+               /* nr of nodes per key */
+               for (j = 1; j < 4; j++) {
+                       ret = test_sparse_key(i, j);
+                       if (ret) {
+                               return ret;
+                       }
+                       rcu_quiescent_state();
+               }
+       }
+       printf("Sanity test end.\n");
+
+       return 0;
+}
+
+enum urcu_ja_addremove {
+       AR_RANDOM = 0,
+       AR_ADD = 1,
+       AR_REMOVE = -1,
+};     /* 1: add, -1 remove, 0: random */
+
+static enum urcu_ja_addremove addremove; /* 1: add, -1 remove, 0: random */
+
+static
+void test_ja_rw_sigusr1_handler(int signo)
+{
+       switch (addremove) {
+       case AR_ADD:
+               printf("Add/Remove: random.\n");
+               addremove = AR_RANDOM;
+               break;
+       case AR_RANDOM:
+               printf("Add/Remove: remove only.\n");
+               addremove = AR_REMOVE;
+               break;
+       case AR_REMOVE:
+               printf("Add/Remove: add only.\n");
+               addremove = AR_ADD;
+               break;
+       }
+}
+
+static
+void *test_ja_rw_thr_reader(void *_count)
+{
+       unsigned long long *count = _count;
+       struct cds_ja_node *ja_node;
+       uint64_t key;
+
+       printf_verbose("thread_begin %s, tid %lu\n",
+                       "reader", urcu_get_thread_id());
+
+       URCU_TLS(rand_lookup) = urcu_get_thread_id() ^ time(NULL);
+
+       set_affinity();
+
+       rcu_register_thread();
+
+       while (!test_go)
+       {
+       }
+       cmm_smp_mb();
+
+       for (;;) {
+               rcu_read_lock();
+
+               /* note: only looking up ulong keys */
+               key = ((unsigned long) rand_r(&URCU_TLS(rand_lookup)) % lookup_pool_size) + lookup_pool_offset;
+               key *= key_mul;
+               ja_node = cds_ja_lookup(test_ja, key);
+               if (!ja_node) {
+                       if (validate_lookup) {
+                               printf("[ERROR] Lookup cannot find initial node.\n");
+                               exit(-1);
+                       }
+                       URCU_TLS(lookup_fail)++;
+               } else {
+                       URCU_TLS(lookup_ok)++;
+               }
+               rcu_debug_yield_read();
+               if (caa_unlikely(rduration))
+                       loop_sleep(rduration);
+               rcu_read_unlock();
+               URCU_TLS(nr_reads)++;
+               if (caa_unlikely(!test_duration_read()))
+                       break;
+               if (caa_unlikely((URCU_TLS(nr_reads) & ((1 << 10) - 1)) == 0))
+                       rcu_quiescent_state();
+       }
+
+       rcu_unregister_thread();
+
+       *count = URCU_TLS(nr_reads);
+       printf_verbose("thread_end %s, tid %lu\n",
+                       "reader", urcu_get_thread_id());
+       printf_verbose("readid : %lx, lookupfail %lu, lookupok %lu\n",
+                       pthread_self(), URCU_TLS(lookup_fail),
+                       URCU_TLS(lookup_ok));
+       return ((void*)1);
+}
+
+static
+int is_add(void)
+{
+       return ((unsigned int) rand_r(&URCU_TLS(rand_lookup)) % 100) < add_ratio;
+}
+
+static
+void *test_ja_rw_thr_writer(void *_count)
+{
+       struct wr_count *count = _count;
+       uint64_t key;
+       int ret;
+
+       printf_verbose("thread_begin %s, tid %lu\n",
+                       "writer", urcu_get_thread_id());
+
+       URCU_TLS(rand_lookup) = urcu_get_thread_id() ^ time(NULL);
+
+       set_affinity();
+
+       rcu_register_thread();
+
+       while (!test_go)
+       {
+       }
+       cmm_smp_mb();
+
+       for (;;) {
+               if ((addremove == AR_ADD)
+                               || (addremove == AR_RANDOM && is_add())) {
+                       struct ja_test_node *node = node_alloc();
+                       struct cds_ja_node *ret_node;
+
+                       /* note: only inserting ulong keys */
+                       key = ((unsigned long) rand_r(&URCU_TLS(rand_lookup)) % write_pool_size) + write_pool_offset;
+                       key *= key_mul;
+                       ja_test_node_init(node, key);
+                       rcu_read_lock();
+                       if (add_unique) {
+                               ret_node = cds_ja_add_unique(test_ja, key, &node->node);
+                               if (ret_node != &node->node) {
+                                       free_test_node(node);
+                                       URCU_TLS(nr_addexist)++;
+                               } else {
+                                       URCU_TLS(nr_add)++;
+                               }
+                       } else if (add_replace) {
+                               assert(0);      /* not implemented yet. */
+                       } else {
+                               ret = cds_ja_add(test_ja, key, &node->node);
+                               if (ret) {
+                                       fprintf(stderr, "Error in cds_ja_add: %d\n", ret);
+                                       free_test_node(node);
+                               } else {
+                                       URCU_TLS(nr_add)++;
+                               }
+                       }
+                       rcu_read_unlock();
+               } else {
+                       struct cds_ja_node *ja_node;
+                       struct ja_test_node *node;
+
+                       /* May delete */
+                       /* note: only deleting ulong keys */
+                       key = ((unsigned long) rand_r(&URCU_TLS(rand_lookup)) % write_pool_size) + write_pool_offset;
+                       key *= key_mul;
+
+                       rcu_read_lock();
+
+                       ja_node = cds_ja_lookup(test_ja, key);
+                       /* Remove first entry */
+                       if (ja_node) {
+                               node = caa_container_of(ja_node,
+                                       struct ja_test_node, node);
+                               ret = cds_ja_del(test_ja, key, &node->node);
+                               if (!ret) {
+                                       rcu_free_test_node(node);
+                                       URCU_TLS(nr_del)++;
+                               } else {
+                                       URCU_TLS(nr_delnoent)++;
+                               }
+                       } else {
+                               URCU_TLS(nr_delnoent)++;
+                       }
+                       rcu_read_unlock();
+               }
+
+               URCU_TLS(nr_writes)++;
+               if (caa_unlikely(!test_duration_write()))
+                       break;
+               if (caa_unlikely(wdelay))
+                       loop_sleep(wdelay);
+               if (caa_unlikely((URCU_TLS(nr_writes) & ((1 << 10) - 1)) == 0))
+                       rcu_quiescent_state();
+       }
+
+       rcu_unregister_thread();
+
+       printf_verbose("thread_end %s, tid %lu\n",
+                       "writer", urcu_get_thread_id());
+       printf_verbose("info id %lx: nr_add %lu, nr_addexist %lu, nr_del %lu, "
+                       "nr_delnoent %lu\n", pthread_self(), URCU_TLS(nr_add),
+                       URCU_TLS(nr_addexist), URCU_TLS(nr_del),
+                       URCU_TLS(nr_delnoent));
+       count->update_ops = URCU_TLS(nr_writes);
+       count->add = URCU_TLS(nr_add);
+       count->add_exist = URCU_TLS(nr_addexist);
+       count->remove = URCU_TLS(nr_del);
+       return ((void*)2);
+}
+
+static
+int do_mt_populate_ja(void)
+{
+       uint64_t iter;
+       int ret;
+
+       if (!init_populate)
+               return 0;
+
+       printf("Starting rw test\n");
+
+       for (iter = init_pool_offset; iter < init_pool_offset + init_pool_size; iter++) {
+               struct ja_test_node *node = node_alloc();
+               uint64_t key;
+
+               /* note: only inserting ulong keys */
+               key = (unsigned long) iter;
+               key *= key_mul;
+               ja_test_node_init(node, key);
+               rcu_read_lock();
+               ret = cds_ja_add(test_ja, key, &node->node);
+               URCU_TLS(nr_add)++;
+               URCU_TLS(nr_writes)++;
+               rcu_read_unlock();
+               /* Hash table resize only occurs in call_rcu thread */
+               if (!(iter % 100))
+                       rcu_quiescent_state();
+               if (ret) {
+                       fprintf(stderr, "Error (%d) adding node %" PRIu64 "\n",
+                               ret, key);
+                       assert(0);
+               }
+       }
+       return 0;
+}
+
+static
+int do_mt_test(void)
+{
+       pthread_t *tid_reader, *tid_writer;
+       void *tret;
+       int ret, i, err;
+       unsigned long long *count_reader;
+       struct wr_count *count_writer;
+       unsigned long long tot_reads = 0, tot_writes = 0,
+               tot_add = 0, tot_add_exist = 0, tot_remove = 0;
+       unsigned int remain;
+
+       tid_reader = malloc(sizeof(*tid_reader) * nr_readers);
+       tid_writer = malloc(sizeof(*tid_writer) * nr_writers);
+       count_reader = malloc(sizeof(*count_reader) * nr_readers);
+       count_writer = malloc(sizeof(*count_writer) * nr_writers);
+
+       printf("Allocating Judy Array for %u-bit keys\n", key_bits);
+       test_ja = cds_ja_new(key_bits);
+       if (!test_ja) {
+               printf("Error allocating judy array.\n");
+               ret = -1;
+               goto end;
+       }
+
+       do_mt_populate_ja();
+
+       next_aff = 0;
+
+       for (i = 0; i < nr_readers; i++) {
+               err = pthread_create(&tid_reader[i],
+                                    NULL, test_ja_rw_thr_reader,
+                                    &count_reader[i]);
+               if (err != 0)
+                       exit(1);
+       }
+       for (i = 0; i < nr_writers; i++) {
+               err = pthread_create(&tid_writer[i],
+                                    NULL, test_ja_rw_thr_writer,
+                                    &count_writer[i]);
+               if (err != 0)
+                       exit(1);
+       }
+
+       cmm_smp_mb();
+
+       test_go = 1;
+
+       rcu_thread_offline_qsbr();
+
+       remain = duration;
+       do {
+               remain = sleep(remain);
+       } while (remain > 0);
+
+       test_stop = 1;
+
+       for (i = 0; i < nr_readers; i++) {
+               err = pthread_join(tid_reader[i], &tret);
+               if (err != 0)
+                       exit(1);
+               tot_reads += count_reader[i];
+       }
+       for (i = 0; i < nr_writers; i++) {
+               err = pthread_join(tid_writer[i], &tret);
+               if (err != 0)
+                       exit(1);
+               tot_writes += count_writer[i].update_ops;
+               tot_add += count_writer[i].add;
+               tot_add_exist += count_writer[i].add_exist;
+               tot_remove += count_writer[i].remove;
+       }
+       rcu_thread_online_qsbr();
+
+       ret = test_free_all_nodes(test_ja);
+       if (ret) {
+               fprintf(stderr, "Error freeing all nodes\n");
+               return -1;
+       }
+
+       ret = cds_ja_destroy(test_ja);
+       if (ret) {
+               fprintf(stderr, "Error destroying judy array\n");
+               goto end;
+       }
+
+       free(tid_reader);
+       free(tid_writer);
+       free(count_reader);
+       free(count_writer);
+       ret = 0;
+end:
+       return ret;
+}
+
+static
+int check_memory_leaks(void)
+{
+       unsigned long na, nf;
+
+       na = uatomic_read(&test_nodes_allocated);
+       nf = uatomic_read(&test_nodes_freed);
+       if (na != nf) {
+               fprintf(stderr, "Memory leak of %ld test nodes detected. Allocated: %lu, freed: %lu\n",
+                       na - nf, na, nf);
+               return -1;
+       }
+       return 0;
+}
+
+int main(int argc, char **argv)
+{
+       int i, j, a, ret, err;
+       uint64_t key;
+       struct sigaction act;
+
+       if (argc < 4) {
+               show_usage(argc, argv);
+               return -1;
+       }
+
+       err = sscanf(argv[1], "%u", &nr_readers);
+       if (err != 1) {
+               show_usage(argc, argv);
+               return -1;
+       }
+
+       err = sscanf(argv[2], "%u", &nr_writers);
+       if (err != 1) {
+               show_usage(argc, argv);
+               return -1;
+       }
+
+       err = sscanf(argv[3], "%lu", &duration);
+       if (err != 1) {
+               show_usage(argc, argv);
+               return -1;
+       }
+
+       for (i = 4; i < argc; i++) {
+               if (argv[i][0] != '-')
+                       continue;
+               switch (argv[i][1]) {
+#ifdef DEBUG_YIELD
+               case 'r':
+                       yield_active |= YIELD_READ;
+                       break;
+               case 'w':
+                       yield_active |= YIELD_WRITE;
+                       break;
+#endif
+               case 'a':
+                       if (argc < i + 2) {
+                               show_usage(argc, argv);
+                               return -1;
+                       }
+                       a = atoi(argv[++i]);
+                       cpu_affinities[next_aff++] = a;
+                       use_affinity = 1;
+                       printf_verbose("Adding CPU %d affinity\n", a);
+                       break;
+               case 'c':
+                       if (argc < i + 2) {
+                               show_usage(argc, argv);
+                               return -1;
+                       }
+                       rduration = atol(argv[++i]);
+                       break;
+               case 'd':
+                       if (argc < i + 2) {
+                               show_usage(argc, argv);
+                               return -1;
+                       }
+                       wdelay = atol(argv[++i]);
+                       break;
+               case 'v':
+                       verbose_mode = 1;
+                       break;
+               case 'r':
+                       add_ratio = atoi(argv[++i]);
+                       break;
+               case 'k':
+                       init_populate = 1;
+                       break;
+               case 'R':
+                       lookup_pool_offset = atol(argv[++i]);
+                       break;
+               case 'S':
+                       write_pool_offset = atol(argv[++i]);
+                       break;
+               case 'T':
+                       init_pool_offset = atol(argv[++i]);
+                       break;
+               case 'M':
+                       lookup_pool_size = atol(argv[++i]);
+                       break;
+               case 'N':
+                       write_pool_size = atol(argv[++i]);
+                       break;
+               case 'O':
+                       init_pool_size = atol(argv[++i]);
+                       break;
+               case 'V':
+                       validate_lookup = 1;
+                       break;
+               case 't':
+                       sanity_test = 1;
+                       break;
+               case 'B':
+                       key_bits = atol(argv[++i]);
+                       break;
+               case 'm':
+                       key_mul = atoll(argv[++i]);
+                       break;
+               case 'u':
+                       add_unique = 1;
+                       break;
+               case 's':
+                       add_replace = 1;
+                       break;
+               case 'l':
+                       leak_detection = 1;
+                       break;
+               }
+       }
+
+       printf_verbose("running test for %lu seconds, %u readers, %u writers.\n",
+               duration, nr_readers, nr_writers);
+       printf_verbose("Writer delay : %lu loops.\n", wdelay);
+       printf_verbose("Reader duration : %lu loops.\n", rduration);
+       printf_verbose("Add ratio: %u%%.\n", add_ratio);
+       printf_verbose("Mode:%s%s.\n",
+               " add/remove",
+               add_unique ? " uniquify" : ( add_replace ? " replace" : " insert"));
+       printf_verbose("Key multiplication factor: %" PRIu64 ".\n", key_mul);
+       printf_verbose("Init pool size offset %lu size %lu.\n",
+               init_pool_offset, init_pool_size);
+       printf_verbose("Lookup pool size offset %lu size %lu.\n",
+               lookup_pool_offset, lookup_pool_size);
+       printf_verbose("Update pool size offset %lu size %lu.\n",
+               write_pool_offset, write_pool_size);
+       if (validate_lookup)
+               printf_verbose("Validating lookups.\n");
+       if (leak_detection)
+               printf_verbose("Memory leak dection activated.\n");
+       printf_verbose("thread %-6s, tid %lu\n",
+                       "main", urcu_get_thread_id());
+
+       memset(&act, 0, sizeof(act));
+       ret = sigemptyset(&act.sa_mask);
+       if (ret == -1) {
+               perror("sigemptyset");
+               return -1;
+       }
+       act.sa_handler = test_ja_rw_sigusr1_handler;
+       act.sa_flags = SA_RESTART;
+       ret = sigaction(SIGUSR1, &act, NULL);
+       if (ret == -1) {
+               perror("sigaction");
+               return -1;
+       }
+
+       err = create_all_cpu_call_rcu_data(0);
+       if (err) {
+               printf("Per-CPU call_rcu() worker threads unavailable. Using default global worker thread.\n");
+       }
+
+       rcu_register_thread();
+
+       if (sanity_test) {
+               ret = do_sanity_test();
+       } else {
+               ret = do_mt_test();
+       }
+
+       /* Wait for in-flight call_rcu free to complete for leak detection */
+       rcu_barrier();
+
+       ret |= check_memory_leaks();
+
+       rcu_unregister_thread();
+       free_all_cpu_call_rcu_data();
+
+       if (ret) {
+               printf("Test ended with error: %d\n", ret);
+       }
+       return ret;
+}
diff --git a/tests/regression/test_urcu_ja.h b/tests/regression/test_urcu_ja.h
new file mode 100644 (file)
index 0000000..092945d
--- /dev/null
@@ -0,0 +1,175 @@
+#ifndef _TEST_URCU_JA_H
+#define _TEST_URCU_JA_H
+
+/*
+ * test_urcu_ja.h
+ *
+ * Userspace RCU library - test program
+ *
+ * Copyright 2009-2012 - Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include <stdio.h>
+#include <pthread.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <unistd.h>
+#include <stdio.h>
+#include <assert.h>
+#include <sched.h>
+#include <errno.h>
+#include <signal.h>
+
+#include <urcu/tls-compat.h>
+#include "thread-id.h"
+
+#define DEFAULT_RAND_POOL      1000000
+
+/* Make this big enough to include the POWER5+ L3 cacheline size of 256B */
+#define CACHE_LINE_SIZE 4096
+
+/* hardcoded number of CPUs */
+#define NR_CPUS 16384
+
+#ifdef POISON_FREE
+#define poison_free(ptr)                               \
+       do {                                            \
+               memset(ptr, 0x42, sizeof(*(ptr)));      \
+               free(ptr);                              \
+       } while (0)
+#else
+#define poison_free(ptr)       free(ptr)
+#endif
+
+#ifndef DYNAMIC_LINK_TEST
+#define _LGPL_SOURCE
+#else
+#define debug_yield_read()
+#endif
+#include <urcu-qsbr.h>
+#include <urcu/rcuja.h>
+#include <urcu-call-rcu.h>
+
+struct wr_count {
+       unsigned long update_ops;
+       unsigned long add;
+       unsigned long add_exist;
+       unsigned long remove;
+};
+
+extern DECLARE_URCU_TLS(unsigned int, rand_lookup);
+extern DECLARE_URCU_TLS(unsigned long, nr_add);
+extern DECLARE_URCU_TLS(unsigned long, nr_addexist);
+extern DECLARE_URCU_TLS(unsigned long, nr_del);
+extern DECLARE_URCU_TLS(unsigned long, nr_delnoent);
+extern DECLARE_URCU_TLS(unsigned long, lookup_fail);
+extern DECLARE_URCU_TLS(unsigned long, lookup_ok);
+
+extern struct cds_ja *test_ja;
+
+struct ja_test_node {
+       struct cds_ja_node node;
+       uint64_t key;           /* for testing */
+       struct rcu_head head;   /* delayed reclaim */
+};
+
+static inline struct ja_test_node *
+to_test_node(struct cds_ja_node *node)
+{
+       return caa_container_of(node, struct ja_test_node, node);
+}
+
+static inline
+void ja_test_node_init(struct ja_test_node *node, uint64_t key)
+{
+       cds_ja_node_init(&node->node);
+       node->key = key;
+}
+
+extern volatile int test_go, test_stop;
+
+extern unsigned long wdelay;
+
+extern unsigned long duration;
+
+/* read-side C.S. duration, in loops */
+extern unsigned long rduration;
+
+extern unsigned long init_populate;
+extern int add_only;
+
+extern unsigned long init_pool_offset, lookup_pool_offset, write_pool_offset;
+extern unsigned long init_pool_size,
+       lookup_pool_size,
+       write_pool_size;
+extern int validate_lookup;
+
+extern int count_pipe[2];
+
+static inline void loop_sleep(unsigned long l)
+{
+       while(l-- != 0)
+               caa_cpu_relax();
+}
+
+extern int verbose_mode;
+
+#define printf_verbose(fmt, args...)           \
+       do {                                    \
+               if (verbose_mode)               \
+                       printf(fmt, ## args);   \
+       } while (0)
+
+extern unsigned int cpu_affinities[NR_CPUS];
+extern unsigned int next_aff;
+extern int use_affinity;
+
+extern pthread_mutex_t affinity_mutex;
+
+#ifndef HAVE_CPU_SET_T
+typedef unsigned long cpu_set_t;
+# define CPU_ZERO(cpuset) do { *(cpuset) = 0; } while(0)
+# define CPU_SET(cpu, cpuset) do { *(cpuset) |= (1UL << (cpu)); } while(0)
+#endif
+
+void set_affinity(void);
+
+/*
+ * returns 0 if test should end.
+ */
+static inline int test_duration_write(void)
+{
+       return !test_stop;
+}
+
+static inline int test_duration_read(void)
+{
+       return !test_stop;
+}
+
+extern DECLARE_URCU_TLS(unsigned long long, nr_writes);
+extern DECLARE_URCU_TLS(unsigned long long, nr_reads);
+
+extern unsigned int nr_readers;
+extern unsigned int nr_writers;
+
+void rcu_copy_mutex_lock(void);
+void rcu_copy_mutex_unlock(void);
+
+#endif /* _TEST_URCU_JA_H */
index 440ac382a8412e738971869bf5dfdc0ef06f98d9..a26b0385734720cddec34b1f57341f79630f7100 100644 (file)
@@ -3,6 +3,9 @@ AM_CFLAGS = -I$(top_srcdir)/include -I$(top_builddir)/include -I$(top_srcdir)/sr
 noinst_LIBRARIES = libtap.a
 libtap_a_SOURCES = tap.c tap.h
 
+noinst_PROGRAMS = rcuja-testpop
+rcuja_testpop_SOURCES = rcuja-testpop.c
+
 SCRIPT_LIST = tap.sh
 
 dist_noinst_SCRIPTS = $(SCRIPT_LIST)
diff --git a/tests/utils/rcuja-testpop.c b/tests/utils/rcuja-testpop.c
new file mode 100644 (file)
index 0000000..76dbb03
--- /dev/null
@@ -0,0 +1,333 @@
+/*
+ * rcuja/testpop.c
+ *
+ * Userspace RCU library - RCU Judy Array population size test
+ *
+ * Copyright 2012-2013 - Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/*
+ * This program generates random populations, and shows the largest
+ * sub-class generated, as well as the distribution of sub-class size
+ * for the largest sub-class of each population.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <time.h>
+#include <string.h>
+#include <limits.h>
+#include <assert.h>
+
+static int sel_pool_len = 50;  /* default */
+static int nr_distrib = 2;     /* default */
+//#define SEL_POOL_LEN 100
+//#define NR_POOLS     10000000ULL
+
+static uint8_t pool[256];
+static uint8_t nr_one[8];
+static uint8_t nr_2d_11[8][8];
+static uint8_t nr_2d_10[8][8];
+static uint8_t nr_2d_01[8][8];
+static uint8_t nr_2d_00[8][8];
+static int global_max_minsubclass_len = 0;
+
+static unsigned int subclass_len_distrib[256];
+
+static int verbose;
+
+static
+uint8_t random_char(void)
+{
+       return (uint8_t) random();
+}
+
+static
+void print_pool(void)
+{
+       int i;
+
+       printf("pool: ");
+       for (i = 0; i < sel_pool_len; i++) {
+               printf("%d ", (int) pool[i]);
+       }
+       printf("\n");
+}
+
+static
+void gen_pool(void)
+{
+       uint8_t src_pool[256];
+       int i;
+       int nr_left = 256;
+
+       memset(pool, 0, sizeof(pool));
+       for (i = 0; i < 256; i++)
+               src_pool[i] = (uint8_t) i;
+       for (i = 0; i < sel_pool_len; i++) {
+               int sel;
+
+               sel = random_char() % nr_left;
+               pool[i] = src_pool[sel];
+               src_pool[sel] = src_pool[nr_left - 1];
+               nr_left--;
+       }
+}
+
+static
+void count_pool(void)
+{
+       int i;
+
+       memset(nr_one, 0, sizeof(nr_one));
+       memset(nr_2d_11, 0, sizeof(nr_2d_11));
+       memset(nr_2d_10, 0, sizeof(nr_2d_10));
+       memset(nr_2d_01, 0, sizeof(nr_2d_01));
+       memset(nr_2d_00, 0, sizeof(nr_2d_00));
+
+       for (i = 0; i < sel_pool_len; i++) {
+               if (nr_distrib == 2) {
+                       int j;
+
+                       for (j = 0; j < 8; j++) {
+                               if (pool[i] & (1U << j))
+                                       nr_one[j]++;
+                       }
+               }
+
+               if (nr_distrib == 4) {
+                       int bit_i, bit_j;
+
+                       for (bit_i = 0; bit_i < 8; bit_i++) {
+                               for (bit_j = 0; bit_j < bit_i; bit_j++) {
+                                       if (pool[i] & (1U << bit_i)) {
+                                               if (pool[i] & (1U << bit_j)) {
+                                                       nr_2d_11[bit_i][bit_j]++;
+                                               } else {
+                                                       nr_2d_10[bit_i][bit_j]++;
+                                               }
+                                       } else {
+                                               if (pool[i] & (1U << bit_j)) {
+                                                       nr_2d_01[bit_i][bit_j]++;
+                                               } else {
+                                                       nr_2d_00[bit_i][bit_j]++;
+                                               }
+                                       }
+                               }
+                       }
+               }
+       }
+}
+
+static
+void print_count(void)
+{
+       int i;
+
+       printf("pool distribution:\n");
+
+       if (nr_distrib == 2) {
+               printf("  0      1\n");
+               printf("----------\n");
+               for (i = 0; i < 8; i++) {
+                       printf("%3d    %3d\n",
+                               sel_pool_len - nr_one[i], nr_one[i]);
+               }
+       }
+
+       if (nr_distrib == 4) {
+               /* TODO */
+       }
+       printf("\n");
+}
+
+static
+void stat_count_1d(void)
+{
+       unsigned int overall_best_distance = UINT_MAX;
+       unsigned int overall_minsubclass_len;
+       int i;
+
+       for (i = 0; i < 8; i++) {
+               int distance_to_best;
+
+               distance_to_best = ((unsigned int) nr_one[i] << 1U) - sel_pool_len;
+               if (distance_to_best < 0)
+                       distance_to_best = -distance_to_best;
+               if (distance_to_best < overall_best_distance) {
+                       overall_best_distance = distance_to_best;
+               }
+       }
+       overall_minsubclass_len = (overall_best_distance + sel_pool_len) >> 1UL;
+       if (overall_minsubclass_len > global_max_minsubclass_len) {
+               global_max_minsubclass_len = overall_minsubclass_len;
+       }
+       subclass_len_distrib[overall_minsubclass_len]++;
+}
+
+static
+void stat_count_2d(void)
+{
+       int overall_best_distance = INT_MAX;
+       unsigned int overall_minsubclass_len = 0;
+       int bit_i, bit_j;
+
+       for (bit_i = 0; bit_i < 8; bit_i++) {
+               for (bit_j = 0; bit_j < bit_i; bit_j++) {
+                       int distance_to_best[4], subclass_len[4];
+
+                       distance_to_best[0] = ((unsigned int) nr_2d_11[bit_i][bit_j] << 2U) - sel_pool_len;
+                       distance_to_best[1] = ((unsigned int) nr_2d_10[bit_i][bit_j] << 2U) - sel_pool_len;
+                       distance_to_best[2] = ((unsigned int) nr_2d_01[bit_i][bit_j] << 2U) - sel_pool_len;
+                       distance_to_best[3] = ((unsigned int) nr_2d_00[bit_i][bit_j] << 2U) - sel_pool_len;
+
+                       subclass_len[0] = nr_2d_11[bit_i][bit_j];
+                       subclass_len[1] = nr_2d_10[bit_i][bit_j];
+                       subclass_len[2] = nr_2d_01[bit_i][bit_j];
+                       subclass_len[3] = nr_2d_00[bit_i][bit_j];
+
+                       /* Consider worse distance above best */
+                       if (distance_to_best[1] > 0 && distance_to_best[1] > distance_to_best[0]) {
+                               distance_to_best[0] = distance_to_best[1];
+                               subclass_len[0] = subclass_len[1];
+                       }
+                       if (distance_to_best[2] > 0 && distance_to_best[2] > distance_to_best[0]) {
+                               distance_to_best[0] = distance_to_best[2];
+                               subclass_len[0] = subclass_len[2];
+                       }
+                       if (distance_to_best[3] > 0 && distance_to_best[3] > distance_to_best[0]) {
+                               distance_to_best[0] = distance_to_best[3];
+                               subclass_len[0] = subclass_len[3];
+                       }
+
+                       /*
+                        * If our worse distance is better than overall,
+                        * we become new best candidate.
+                        */
+                       if (distance_to_best[0] < overall_best_distance) {
+                               overall_best_distance = distance_to_best[0];
+                               overall_minsubclass_len = subclass_len[0];
+                       }
+               }
+       }
+       if (overall_minsubclass_len > global_max_minsubclass_len) {
+               global_max_minsubclass_len = overall_minsubclass_len;
+       }
+       subclass_len_distrib[overall_minsubclass_len]++;
+}
+
+static
+void stat_count(void)
+{
+       switch (nr_distrib) {
+       case 2:
+               stat_count_1d();
+               break;
+       case 4:
+               stat_count_2d();
+               break;
+       default:
+               assert(0);
+               break;
+       }
+}
+
+static
+void print_distrib(void)
+{
+       int i;
+       unsigned long long tot = 0;
+
+       for (i = 0; i < 256; i++) {
+               tot += subclass_len_distrib[i];
+       }
+       if (tot == 0)
+               return;
+       printf("Distribution:\n");
+       for (i = 0; i < 256; i++) {
+               if (!subclass_len_distrib[i])
+                       continue;
+               printf("(%u, %u, %llu%%) ",
+                       i, subclass_len_distrib[i],
+                       100 * (unsigned long long) subclass_len_distrib[i] / tot);
+       }
+       printf("\n");
+}
+
+static
+void print_stat(uint64_t i)
+{
+       printf("after %llu pools, global_max_minsubclass_len: %d\n",
+               (unsigned long long) i, global_max_minsubclass_len);
+       print_distrib();
+}
+
+int main(int argc, char **argv)
+{
+       uint64_t i = 0;
+
+       srandom(time(NULL));
+
+       if (argc > 1) {
+               sel_pool_len = atoi(argv[1]);
+               if (sel_pool_len > 256 || sel_pool_len < 1) {
+                       printf("Wrong pool len\n");
+                       return -1;
+               }
+       }
+       printf("pool len: %d\n", sel_pool_len);
+
+       if (argc > 2) {
+               nr_distrib = atoi(argv[2]);
+               if (nr_distrib > 256 || nr_distrib < 1) {
+                       printf("Wrong number of distributions\n");
+                       return -1;
+               }
+       }
+
+       if (argc > 3) {
+               if (!strcmp(argv[3], "-v")) {
+                       verbose = 1;
+               }
+       }
+
+       printf("pool distributions: %d\n", nr_distrib);
+
+       if (nr_distrib != 2 && nr_distrib != 4) {
+               printf("Wrong number of distributions. Only 2 and 4 supported.\n");
+               return -1;
+       }
+
+       //for (i = 0; i < NR_POOLS; i++) {
+       while (1) {
+               gen_pool();
+               count_pool();
+               if (verbose) {
+                       print_pool();
+                       print_count();
+               }
+               stat_count();
+               if (!(i % 100000ULL))
+                       print_stat(i);
+               i++;
+       }
+       print_stat(i);
+       print_distrib();
+
+       return 0;
+}
This page took 0.085457 seconds and 4 git commands to generate.