dist_doc_DATA = rcu-api.md \
cds-api.md \
solaris-build.md \
- uatomic-api.md
+ uatomic-api.md \
+ rcuja-design.txt
--- /dev/null
+RCU Judy Array Design
+Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
+March 8, 2012
+
+Initial ideas based on the released Judy Shop Manual
+(http://judy.sourceforge.net/). Judy was invented by Doug Baskins and
+implemented by Hewlett-Packard.
+
+Thresholds and RCU-specific analysis is introduced in this document.
+
+Advantages of using Judy Array (compressed nodes) for RCU tree:
+- no rebalancing
+- no transplant
+- RCU-friendly!
+- favor cache-line alignment of structures
+
+Disadvantage:
+- updates that need to reallocate nodes are slower than, e.g. non-rcu
+ red-black trees.
+
+Choice: Using 256 entries intermediate nodes (index can be represented
+on 8 bits): 4 levels on 32-bit, 8 levels on 64-bit
+
+
+* Node types (from less dense node to most dense)
+
+
+- empty node:
+
+Parent pointer is NULL.
+
+
+- Type A: sequential search in value and pointer arrays
+
++ Add/removal just needs to update value and pointer array, single-entry
+ (non-RCU...). For RCU, we might need to update the entire node anyway.
+- Requires sequential search through all value array for lookup fail
+ test.
+
+Filled at 3 entries max 64-bit
+8 bits indicating number of children
+Array of 8-bit values followed by array of associated pointers.
+64-bit: 1 byte + 3 bytes + 4 bytes pad + 3*8 = 32 bytes
+
+-> up to this point on 64-bit, sequential lookup and pointer read fit in
+a 32-byte cache line.
+ - lookup fail&success: 1 cache-line.
+
+Filled at 6 entries max 32-bit, 7 entries max 64-bit
+8 bits indicating number of children
+Array of 8-bit values followed by array of associated pointers.
+32-bit: 1 byte + 6 bytes + 1 byte pad + 6*4bytes = 32 bytes
+64-bit: 1 byte + 7 bytes + 7*8 = 64 bytes
+
+-> up to this point on 32-bit, sequential lookup and pointer read fit in
+a 32-byte cache line.
+ - lookup fail&success: 1 cache-line.
+
+Filled at 12 entries max 32-bit, 14 entries max 64-bit
+8 bits indicating number of children
+Array of 8-bit values followed by array of associated pointers.
+32-bit: 1 byte + 12 bytes + 3 bytes pad + 12*4bytes = 64 bytes
+64-bit: 1 byte + 14 bytes + 1 byte pad + 14*8 = 128 bytes
+
+Filled at 25 entries max 32-bit, 28 entries max 64-bit
+8 bits indicating number of children
+Array of 8-bit values followed by array of associated pointers.
+32-bit: 1 byte + 25 bytes + 2 bytes pad + 25*4bytes = 128 bytes
+64-bit: 1 byte + 28 bytes + 3 bytes pad + 28*8 = 256 bytes
+
+---> up to this point, on both 32-bit and 64-bit, the sequential lookup
+in values array fits in a 32-byte cache line.
+ - lookup failure: 1 cache line.
+ - lookup success: 2 cache lines.
+
+The two below are listed for completeness sake, but because they require
+2 32-byte cache lines for lookup, these are deemed inappropriate.
+
+Filled at 51 entries max 32-bit, 56 entries max 64-bit
+8 bits indicating number of children
+Array of 8-bit values followed by array of associated pointers.
+32-bit: 1 byte + 51 bytes + 51*4bytes = 256 bytes
+64-bit: 1 byte + 56 bytes + 7 bytes pad + 56*8 = 512 bytes
+
+Filled at 102 entries max 32-bit, 113 entries max 64-bit
+8 bits indicating number of children
+Array of 8-bit values followed by array of associated pointers.
+32-bit: 1 byte + 102 bytes + 1 byte pad + 102*4bytes = 512 bytes
+64-bit: 1 byte + 113 bytes + 6 bytes pad + 113*8 = 1024 bytes
+
+
+- Type B: pools of values and pointers arrays
+
+Pools of values and pointers arrays. Each pool values array is 32-bytes
+in size (so it fits in a L1 cacheline). Each pool begins with an 8-bit
+integer, which is the number of children in this pool, followed by an
+array of 8-bit values, padding, and an array of pointers. Values and
+pointer arrays are associated as in Type A.
+
+The entries of a node are associated to their respective pool based
+on their index position.
+
++ Allows lookup failure to use 1 32-byte cache-line only. (1 cacheline)
+ lookup success: 2 cache lines.
+
++ Allows in-place updates without reallocation, except when a pool is
+ full. (this was not possible with bitmap-based nodes)
+- If one pool exhausts its space, we need to increase the node size.
+ Therefore, for very dense populations, we will end up using the
+ pigeon-hole node type sooner, thus consuming more space.
+
+Pool configuration:
+
+Per pool, filled at 25 entries (32-bit), 28 entries (64-bit)
+32-bit: 1 byte + 25 bytes + 2 bytes pad + 25*4bytes = 128 bytes
+64-bit: 1 byte + 28 bytes + 3 bytes pad + 28*8 = 256 bytes
+
+Total up to 50 entries (32-bit), 56 entries (64-bit)
+2 pools: 32-bit = 256 bytes
+2 pools: 64-bit = 512 bytes
+
+Total up to 100 entries (32-bit), 112 entries (64-bit)
+4 pools: 32-bit = 512 bytes
+4 pools: 32-bit = 1024 bytes
+
+
+* Choice of pool configuration distribution:
+
+We have pools of either 2 or 4 linear arrays. Their total size is
+between 256 bytes (32-bit 2 arrays) and 1024 bytes (64-bit 4 arrays).
+
+Alignment on 256 bytes means that we can spare the 8 least significant
+bits of the pointers. Given that the type selection already uses 3 bits,
+we have 7 bits left.
+
+Alignment on 512 bytes -> 8 bits left.
+
+We can therefore encode which bit, or which two bits, are used as
+distribution selection. We can use this technique to reequilibrate pools
+if they become unbalanced (e.g. all children are within one of the two
+linear arrays).
+
+Assuming that finding the exact sub-pool usage maximum for any given
+distribution is NP complete (not proven).
+
+Taking into account sub-class size unbalance (tested programmatically by
+randomly taking N entries from 256, calculating the distribution for
+each bit (number of nodes for which bit is one/zero), and calculating
+the difference in number of nodes for each bit, choosing the minimum
+difference -- for millions of runs).
+
+We start from the "ideal" population size (with no unbalance), and do a
+fixed point to find the appropriate population size.
+
+tot entries subclass extra items largest linear array (stat. approx.)
+---------------------------------------------------------------------
+48 entries: 1 (98%) 24+1=25 (target ~50/2=25)
+54 entries: 1 (97%) 27+1=28 (target ~56/2=28)
+
+Note: there exists rare worse cases where the unbalance is larger, but
+it happens _very_ rarely. But need to provide a fallback if the subclass
+does not fit, but it does not need to be efficient.
+
+
+For pool of size 4, we need to approximate what is the maximum unbalance
+we can get for choice of distributions grouped by pairs of bits.
+
+tot entries subclass extra items largest linear array (stat. approx.)
+---------------------------------------------------------------------
+92 entries: 2 (99%) 23+2=25 (target: ~100/4=25)
+104 entries: 2 (99%) 26+2=28 (target: ~112/4=28)
+
+
+Note: there exists rare worse cases where the unbalance is larger, but
+it happens _very_ rarely. But need to provide a fallback if the subclass
+does not fit, but it does not need to be efficient.
+
+
+* Population "does not fit" and distribution fallback
+
+When adding a child to a distribution node, if the child does not fit,
+we recalculate the best distribution. If it does not fit in that
+distribution neither, we need to expand the node type.
+
+When removing a child, if the node child count is brought to the number
+of entries expected to statistically fit in the lower order node, we try
+to shrink. However, if we notice that the distribution does not actually
+fit in that shrinked node, we abort the shrink operation. If shrink
+fails, we keep a counter of insertion/removal operations on the node
+before we allow the shrink to be attempted again.
+
+
+- Type C: pigeon-hole array
+
+Filled at 47.2%/48.8% or more (32-bit: 121 entries+, 64-bit: 125 entries+)
+Array of children node pointers. Pointers NULL if no child at index.
+32-bit: 4*256 = 1024 bytes
+64-bit: 8*256 = 2048 bytes
+
+
+* Analysis of the thresholds:
+
+Analysis of number of cache-lines touched for each node, per-node-type,
+depending on the number of children per node, as we increment the number
+of children from 0 to 256. Through this, we choose number of children
+thresholds at which it is worthwhile to use a different node type.
+
+Per node:
+
+- ALWAYS 1 cache line hit for lookup failure (all cases)
+
+32-bit
+
+- Unexisting
+
+0 children
+
+- Type A: sequential search in value and pointer arrays
+- 1 cache line hit for lookup success
+- 32 bytes storage
+
+up to 6 children
+
+- 2 cache line hit for lookup success
+- 64 bytes storage
+
+up to 12 children
+
+- 128 bytes storage
+
+up to 25 children
+
+- Type B: pool
+
+- 256 bytes storage
+
+up to 50 children
+
+- 512 bytes storage
+up to 100 children
+
+- Type C: pigeon-hole array
+- 1 cache line hit for lookup success
+- 1024 bytes storage
+
+up to 256 children
+
+
+64-bit
+
+- Unexisting
+
+0 children
+
+- Type A: sequential search in value and pointer arrays
+- 1 cache line hit for lookup success
+- 32 bytes storage
+
+up to 3 children
+
+- 2 cache line hit for lookup success
+- 64 bytes storage
+
+up to 7 children
+
+- 128 bytes storage
+
+up to 14 children
+
+- 256 bytes storage
+
+up to 28 children
+
+- Type B: pool
+
+- 512 bytes storage
+up to 56 children
+
+- 1024 bytes storage
+up to 112 children
+
+- Type C: pigeon-hole array
+- 1 cache line hit for lookup success
+- 2048 bytes storage
+
+up to 256 children
+
+
+* Analysis of node type encoding and node pointers:
+
+Lookups are _always_ from the top of the tree going down. This
+facilitates RCU replacement as we only keep track of pointers going
+downward.
+
+Type of node encoded in the parent's pointer. Need to reserve 2
+least-significant bits.
+
+Types of children:
+
+enum child_type {
+ RCU_JA_LINEAR = 0, /* Type A */
+ /* 32-bit: 1 to 25 children, 8 to 128 bytes */
+ /* 64-bit: 1 to 28 children, 16 to 256 bytes */
+ RCU_JA_POOL = 1, /* Type B */
+ /* 32-bit: 26 to 100 children, 256 to 512 bytes */
+ /* 64-bit: 29 to 112 children, 512 to 1024 bytes */
+ RCU_JA_PIGEON = 2, /* Type C */
+ /* 32-bit: 101 to 256 children, 1024 bytes */
+ /* 64-bit: 113 to 256 children, 2048 bytes */
+ /* Leaf nodes are implicit from their height in the tree */
+};
+
+If entire pointer is NULL, children is empty.
+
+
+* Lookup and Update Algorithms
+
+Let's propose a quite simple scheme that uses a mutex on nodes to manage
+update concurrency. It's certainly not optimal in terms of concurrency
+management within a node, but it has the advantage of being simple to
+implement and understand.
+
+We need to keep a count of the number of children nodes (for each node),
+to keep track of when the node type thresholds are reached. It would be
+important to put an hysteresis loop so we don't change between node
+types too often for a loop on add/removal of the same node.
+
+We acquire locks from child to parent, nested. We take all locks
+required to perform a given update in the tree (but no more) to keep it
+consistent with respect to number of children per node.
+
+If check for node being gc'd (always under node lock) fails, we simply
+need to release the lock and lookup the node again.
+
+
+- Leaf lookup
+
+rcu_read_lock()
+
+RCU-lookup each level of the tree. If level is not populated, fail.
+Until we reach the leaf node.
+
+rcu_read_unlock()
+
+
+- Leaf insertion
+
+A) Lookup
+
+rcu_read_lock()
+RCU-lookup insert position. Find location in tree where nodes are
+missing for this insertion. If leaf is already present, insert fails,
+releasing the rcu read lock. The insert location consists of a parent
+node to which we want to attach a new node.
+
+B) Lock
+
+RCU-lookup parent node. Take the parent lock. If the parent needs to be
+reallocated to make room for this insertion, RCU-lookup parent-parent
+node and take the parent-parent lock. For each lock taken, check if
+node is being gc'd. If gc'd, release lock, re-RCU-lookup this node, and
+retry.
+
+C) Create
+
+Construct the whole branch from the new topmost intermediate node down
+to the new leaf node we are inserting.
+
+D) Populate:
+ - If parent node reallocation is needed:
+ Reallocate the parent node, adding the new branch to it, and
+ increment its node count.
+ set gc flag in old nodes.
+ call_rcu free for all old nodes.
+ Populate new parent node with rcu_assign_pointer.
+ - Else:
+ Increment parent node count.
+ Use rcu_assign_pointer to populate this new branch into the parent
+ node.
+
+E) Locks
+
+Release parent and (if taken) parent-parent locks.
+rcu_read_unlock()
+
+
+- Leaf removal
+
+A) Lookup
+
+rcu_read_lock()
+RCU-lookup leaf to remove. If leaf is missing, fail and release rcu
+read lock.
+
+B) Lock
+
+RCU-lookup parent. Take the parent lock. If the parent needs to be
+reallocated because it would be too large for the decremented number of
+children, RCU-lookup parent-parent and take the parent-parent lock. Do
+so recursively until no node reallocation is needed, or until root is
+reached.
+
+For each lock taken, check if node is being gc'd. If gc'd, release lock,
+re-RCU-lookup this node, and retry.
+
+C) Create
+
+The branch (or portion of branch) consisting of taken locks necessarily
+has a simple node removal or update as operation to do on its top node.
+
+If the operation is a node removal, then, necessarily, the entire branch
+under the node removal operation will simply disappear. No node
+allocation is needed.
+
+Else, if the operation is a child node reallocation, the child node will
+necessarily do a node removal. So _its_ entire child branch will
+disappear. So reallocate this child node without the removed branch
+(remember to decrement its nr children count).
+
+D) Populate
+
+No reallocation case: simply set the appropriate child pointer in the
+topmost locked node to NULL. Decrement its nr children count.
+
+Reallocation case: set the child pointer in the topmost locked node to
+the newly allocated node.
+set old nodes gc flag.
+call_rcu free for all old nodes.
+
+E) Locks
+
+Release all locks.
+rcu_read_unlock()
+
+
+For the various types of nodes:
+
+- sequential search (type A)
+ - RCU replacement: mutex
+ - Entry update: mutex
+
+- bitmap followed by pointer array (type B)
+ - RCU replacement: mutex
+ - Entry update: mutex
+
+- pigeon hole array (type C)
+ - RCU replacement: mutex
+ - Entry update: mutex
urcu/static/urcu.h urcu/static/urcu-pointer.h \
urcu/static/urcu-qsbr.h urcu/static/wfcqueue.h \
urcu/static/wfqueue.h urcu/static/wfstack.h \
- urcu/tls-compat.h urcu/debug.h
+ urcu/tls-compat.h urcu/debug.h urcu/rcuja.h
# Don't distribute generated headers
nobase_nodist_include_HEADERS = urcu/arch.h urcu/uatomic.h urcu/config.h
--- /dev/null
+#ifndef _URCU_RCUJA_H
+#define _URCU_RCUJA_H
+
+/*
+ * urcu/rcuja.h
+ *
+ * Userspace RCU library - RCU Judy Array
+ *
+ * Copyright 2012-2013 - Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ *
+ * Include this file _after_ including your URCU flavor.
+ */
+
+#include <urcu/compiler.h>
+#include <urcu-call-rcu.h>
+#include <urcu-flavor.h>
+#include <stdint.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*
+ * Duplicate nodes with the same key are chained into a singly-linked
+ * list. The last item of this list has a NULL next pointer.
+ */
+struct cds_ja_node {
+ struct cds_ja_node *next;
+};
+
+struct cds_ja;
+
+/*
+ * cds_ja_node_init - initialize a judy array node
+ * @node: the node to initialize.
+ *
+ * This function is kept to be eventually used for debugging purposes
+ * (detection of memory corruption).
+ */
+static inline
+void cds_ja_node_init(struct cds_ja_node *node)
+{
+}
+
+/*
+ * Note: key UINT64_MAX is reserved internally for iteration.
+ */
+
+/*
+ * cds_ja_lookup - look up by key.
+ * @ja: the Judy array.
+ * @key: key to look up.
+ *
+ * Returns the first node of a duplicate chain if a match is found, else
+ * returns NULL.
+ * A RCU read-side lock should be held across call to this function and
+ * use of its return value.
+ */
+struct cds_ja_node *cds_ja_lookup(struct cds_ja *ja, uint64_t key);
+
+/*
+ * cds_ja_lookup_below_equal - look up first node with key <= @key.
+ * @ja: the Judy array.
+ * @key: key to look up.
+ * @result_key: key found.
+ *
+ * Returns the first node of a duplicate chain if a node is present in
+ * the tree which has a key below or equal to @key, else returns NULL.
+ * A RCU read-side lock should be held across call to this function and
+ * use of its return value.
+ */
+struct cds_ja_node *cds_ja_lookup_below_equal(struct cds_ja *ja,
+ uint64_t key, uint64_t *result_key);
+
+/*
+ * cds_ja_lookup_above_equal - look up first node with key >= @key.
+ * @ja: the Judy array.
+ * @key: key to look up.
+ * @result_key: key found.
+ *
+ * Returns the first node of a duplicate chain if a node is present in
+ * the tree which has a key above or equal to @key, else returns NULL.
+ * A RCU read-side lock should be held across call to this function and
+ * use of its return value.
+ */
+struct cds_ja_node *cds_ja_lookup_above_equal(struct cds_ja *ja,
+ uint64_t key, uint64_t *result_key);
+
+/*
+ * cds_ja_add - Add @node at @key, allowing duplicates.
+ * @ja: the Judy array.
+ * @key: key at which @node should be added.
+ * @node: node to add.
+ *
+ * Returns 0 on success, negative error value on error.
+ * A RCU read-side lock should be held across call to this function.
+ */
+int cds_ja_add(struct cds_ja *ja, uint64_t key,
+ struct cds_ja_node *node);
+
+/*
+ * cds_ja_add_unique - Add @node at @key, without duplicates.
+ * @ja: the Judy array.
+ * @key: key at which @node should be added.
+ * @node: node to add.
+ *
+ * Returns @node if successfully added, else returns the already
+ * existing node (acts as a RCU lookup).
+ * A RCU read-side lock should be held across call to this function and
+ * use of its return value.
+ */
+struct cds_ja_node *cds_ja_add_unique(struct cds_ja *ja, uint64_t key,
+ struct cds_ja_node *node);
+
+/*
+ * cds_ja_del - Remove @node at @key.
+ * @ja: the Judy array.
+ * @key: key at which @node is expected.
+ * @node: node to remove.
+ *
+ * Returns 0 on success, negative error value on error.
+ * A RCU read-side lock should be held across call to this function.
+ */
+int cds_ja_del(struct cds_ja *ja, uint64_t key,
+ struct cds_ja_node *node);
+
+struct cds_ja *_cds_ja_new(unsigned int key_bits,
+ const struct rcu_flavor_struct *flavor);
+
+/*
+ * cds_ja_new - Create a Judy array.
+ * @key_bits: Number of bits for key.
+ *
+ * Returns non-NULL pointer on success, else NULL on error. @key_bits
+ * needs to be multiple of 8, either: 8, 16, 24, 32, 40, 48, 56, or 64.
+ */
+static inline
+struct cds_ja *cds_ja_new(unsigned int key_bits)
+{
+ return _cds_ja_new(key_bits, &rcu_flavor);
+}
+
+/*
+ * cds_ja_destroy - Destroy a Judy array.
+ * @ja: the Judy array.
+ *
+ * Returns 0 on success, negative error value on error.
+ * There should be no more concurrent add, delete, nor look-up performed
+ * on the Judy array while it is being destroyed (ensured by the caller).
+ * RCU read-side lock should _not_ be held when calling this function,
+ * however, QSBR threads need to be online.
+ */
+int cds_ja_destroy(struct cds_ja *ja);
+
+/*
+ * cds_ja_for_each_duplicate_rcu: Iterate through duplicates.
+ * @pos: struct cds_ja_node *, start of duplicate list and loop cursor.
+ *
+ * Iterate through duplicates returned by cds_ja_lookup*()
+ * This must be done while rcu_read_lock() is held.
+ * Receives a struct cds_ja_node * as parameter, which is used as start
+ * of duplicate list and loop cursor.
+ * _NOT_ safe against node removal within iteration.
+ */
+#define cds_ja_for_each_duplicate_rcu(pos) \
+ for (; (pos) != NULL; (pos) = rcu_dereference((pos)->next))
+
+/*
+ * cds_ja_for_each_duplicate_safe: Iterate through duplicates.
+ * @pos: struct cds_ja_node *, start of duplicate list and loop cursor.
+ * @p: struct cds_ja_node *, temporary pointer to next.
+ *
+ * Iterate through duplicates returned by cds_ja_lookup*().
+ * Safe against node removal within iteration.
+ * This must be done while rcu_read_lock() is held.
+ */
+#define cds_ja_for_each_duplicate_safe_rcu(pos, p) \
+ for (; (pos) != NULL ? \
+ ((p) = rcu_dereference((pos)->next), 1) : 0; \
+ (pos) = (p))
+
+/*
+ * cds_ja_for_each_key_rcu: Iterate over all keys in ascending order.
+ * @ja: Judy array on which iteration should be done.
+ * @key: Key cursor, needs to be a uint64_t.
+ * @pos: struct cds_ja_node *, used as loop cursor.
+ *
+ * Iterate over all keys of a RCU Judy array (_not_ duplicates) in
+ * ascending order.
+ * This must be done while rcu_read_lock() is held.
+ * Safe against node removal during iteration.
+ */
+#define cds_ja_for_each_key_rcu(ja, key, pos) \
+ for ((key) = 0; \
+ ((key) != UINT64_MAX ? \
+ ((pos) = cds_ja_lookup_above_equal(ja, key, &(key))) : 0); \
+ (key)++)
+
+/*
+ * cds_ja_for_each_key_prev_rcu: Iterate over all keys in descending order.
+ * @ja: Judy array on which iteration should be done.
+ * @key: Key cursor, needs to be a uint64_t.
+ * @pos: struct cds_ja_node *, used as loop cursor.
+ *
+ * Iterate over all keys of a RCU Judy array (_not_ duplicates) in
+ * descending order.
+ * This must be done while rcu_read_lock() is held.
+ * Safe against node removal during iteration.
+ */
+#define cds_ja_for_each_key_prev_rcu(ja, key, pos) \
+ for ((key) = UINT64_MAX - 1; \
+ ((key) != UINT64_MAX ? \
+ ((pos) = cds_ja_lookup_below_equal(ja, key, &(key))) : 0); \
+ (key)--)
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _URCU_RCUJA_H */
RCULFHASH = rculfhash.c rculfhash-mm-order.c rculfhash-mm-chunk.c \
rculfhash-mm-mmap.c
+RCUJA = rcuja.c rcuja-shadow-nodes.c rcuja-internal.h
+
lib_LTLIBRARIES = liburcu-common.la \
liburcu.la liburcu-qsbr.la \
liburcu-mb.la liburcu-signal.la liburcu-bp.la \
liburcu_bp_la_LIBADD = liburcu-common.la
liburcu_cds_la_SOURCES = rculfqueue.c rculfstack.c lfstack.c \
- $(RCULFHASH) $(COMPAT)
+ $(RCULFHASH) $(RCUJA) $(COMPAT)
liburcu_cds_la_LIBADD = liburcu-common.la
pkgconfigdir = $(libdir)/pkgconfig
--- /dev/null
+#ifndef _URCU_RCUJA_INTERNAL_H
+#define _URCU_RCUJA_INTERNAL_H
+
+/*
+ * rcuja/rcuja-internal.h
+ *
+ * Userspace RCU library - RCU Judy Array Internal Header
+ *
+ * Copyright (C) 2000 - 2002 Hewlett-Packard Company
+ * Copyright 2012-2013 - Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include <pthread.h>
+#include <stdio.h>
+#include <inttypes.h>
+#include <unistd.h>
+#include <urcu/rculfhash.h>
+
+/*
+ * Number of least significant pointer bits reserved to represent the
+ * child type.
+ */
+#define JA_TYPE_BITS 3
+#define JA_TYPE_MAX_NR (1UL << JA_TYPE_BITS)
+#define JA_TYPE_MASK (JA_TYPE_MAX_NR - 1)
+#define JA_PTR_MASK (~JA_TYPE_MASK)
+
+#define JA_ENTRY_PER_NODE 256UL
+#define JA_LOG2_BITS_PER_BYTE 3U
+#define JA_BITS_PER_BYTE (1U << JA_LOG2_BITS_PER_BYTE)
+
+#define JA_POOL_1D_MASK ((JA_BITS_PER_BYTE - 1) << JA_TYPE_BITS)
+#define JA_POOL_2D_MASK (JA_POOL_1D_MASK << JA_LOG2_BITS_PER_BYTE)
+
+#define JA_MAX_DEPTH 9 /* Maximum depth, including leafs */
+
+/*
+ * Entry for NULL node is at index 8 of the table. It is never encoded
+ * in flags.
+ */
+#define NODE_INDEX_NULL 8
+
+/*
+ * Number of removals needed on a fallback node before we try to shrink
+ * it.
+ */
+#define JA_FALLBACK_REMOVAL_COUNT 8
+
+/* Never declared. Opaque type used to store flagged node pointers. */
+struct cds_ja_inode_flag;
+struct cds_ja_inode;
+
+/*
+ * Shadow node contains mutex and call_rcu head associated with a node.
+ */
+struct cds_ja_shadow_node {
+ struct cds_lfht_node ht_node; /* hash table node */
+ struct cds_ja_inode_flag *node_flag; /* reverse mapping and hash table key */
+ /*
+ * mutual exclusion on all nodes belonging to the same tree
+ * position (e.g. both nodes before and after recompaction
+ * use the same lock).
+ */
+ pthread_mutex_t *lock;
+ unsigned int nr_child; /* number of children in node */
+ struct rcu_head head; /* for deferred node and shadow node reclaim */
+ int fallback_removal_count; /* removals left keeping fallback */
+ int level; /* level in the tree */
+ struct cds_ja *ja; /* toplevel judy array */
+};
+
+struct cds_ja {
+ struct cds_ja_inode_flag *root;
+ unsigned int tree_depth;
+ uint64_t key_max;
+ /*
+ * We use a hash table to associate node keys to their
+ * respective shadow node. This helps reducing lookup hot path
+ * cache footprint, especially for very small nodes.
+ */
+ struct cds_lfht *ht;
+ unsigned long nr_fallback; /* Number of fallback nodes used */
+
+ /* For debugging */
+ unsigned long node_fallback_count_distribution[JA_ENTRY_PER_NODE];
+ unsigned long nr_nodes_allocated, nr_nodes_freed;
+};
+
+static inline
+struct cds_ja_inode_flag *ja_node_flag(struct cds_ja_inode *node,
+ unsigned long type)
+{
+ assert(type < (1UL << JA_TYPE_BITS));
+ return (struct cds_ja_inode_flag *) (((unsigned long) node) | type);
+}
+
+static inline
+struct cds_ja_inode_flag *ja_node_flag_pool_1d(struct cds_ja_inode *node,
+ unsigned long type, unsigned long bitsel)
+{
+ assert(type < (1UL << JA_TYPE_BITS));
+ assert(bitsel < JA_BITS_PER_BYTE);
+ return (struct cds_ja_inode_flag *) (((unsigned long) node) | (bitsel << JA_TYPE_BITS) | type);
+}
+
+static inline
+struct cds_ja_inode_flag *ja_node_flag_pool_2d(struct cds_ja_inode *node,
+ unsigned long type, unsigned int bitsel[2])
+{
+ assert(type < (1UL << JA_TYPE_BITS));
+ assert(bitsel[0] < JA_BITS_PER_BYTE);
+ assert(bitsel[1] < JA_BITS_PER_BYTE);
+ return (struct cds_ja_inode_flag *) (((unsigned long) node) | (bitsel[0] << (JA_TYPE_BITS + JA_LOG2_BITS_PER_BYTE)) | (bitsel[1] << JA_TYPE_BITS) | type);
+}
+
+static inline
+unsigned long ja_node_pool_1d_bitsel(struct cds_ja_inode_flag *node)
+{
+ return ((unsigned long) node & JA_POOL_1D_MASK) >> JA_TYPE_BITS;
+}
+
+static inline
+void ja_node_pool_2d_bitsel(struct cds_ja_inode_flag *node, unsigned long *bits)
+{
+ bits[0] = ((unsigned long) node & JA_POOL_2D_MASK) >> (JA_TYPE_BITS + JA_LOG2_BITS_PER_BYTE);
+ bits[1] = ((unsigned long) node & JA_POOL_1D_MASK) >> JA_TYPE_BITS;
+}
+
+/* Hardcoded pool indexes for fast path */
+#define RCU_JA_POOL_IDX_5 5
+#define RCU_JA_POOL_IDX_6 6
+static inline
+struct cds_ja_inode *ja_node_ptr(struct cds_ja_inode_flag *node)
+{
+ unsigned long v, type_idx;
+
+ if (!node)
+ return NULL; /* RCU_JA_NULL */
+ v = (unsigned long) node;
+ type_idx = v & JA_TYPE_MASK;
+
+ switch (type_idx) {
+ case RCU_JA_POOL_IDX_5:
+ v &= ~(JA_POOL_1D_MASK | JA_TYPE_MASK);
+ break;
+ case RCU_JA_POOL_IDX_6:
+ v &= ~(JA_POOL_2D_MASK | JA_POOL_1D_MASK | JA_TYPE_MASK);
+ break;
+ default:
+ /* RCU_JA_LINEAR or RCU_JA_PIGEON */
+ v &= JA_PTR_MASK;
+ break;
+ }
+ return (struct cds_ja_inode *) v;
+}
+
+__attribute__((visibility("protected")))
+unsigned long ja_node_type(struct cds_ja_inode_flag *node);
+
+__attribute__((visibility("protected")))
+void rcuja_free_all_children(struct cds_ja_shadow_node *shadow_node,
+ struct cds_ja_inode_flag *node_flag);
+
+__attribute__((visibility("protected")))
+struct cds_ja_shadow_node *rcuja_shadow_lookup_lock(struct cds_lfht *ht,
+ struct cds_ja_inode_flag *node_flag);
+
+__attribute__((visibility("protected")))
+void rcuja_shadow_unlock(struct cds_ja_shadow_node *shadow_node);
+
+__attribute__((visibility("protected")))
+struct cds_ja_shadow_node *rcuja_shadow_set(struct cds_lfht *ht,
+ struct cds_ja_inode_flag *new_node_flag,
+ struct cds_ja_shadow_node *inherit_from,
+ struct cds_ja *ja, int level);
+
+/* rcuja_shadow_clear flags */
+enum {
+ RCUJA_SHADOW_CLEAR_FREE_NODE = (1U << 0),
+ RCUJA_SHADOW_CLEAR_FREE_LOCK = (1U << 1),
+};
+
+__attribute__((visibility("protected")))
+int rcuja_shadow_clear(struct cds_lfht *ht,
+ struct cds_ja_inode_flag *node_flag,
+ struct cds_ja_shadow_node *shadow_node,
+ unsigned int flags);
+
+__attribute__((visibility("protected")))
+void rcuja_shadow_prune(struct cds_lfht *ht,
+ unsigned int flags);
+
+__attribute__((visibility("protected")))
+struct cds_lfht *rcuja_create_ht(const struct rcu_flavor_struct *flavor);
+
+__attribute__((visibility("protected")))
+int rcuja_delete_ht(struct cds_lfht *ht);
+
+__attribute__((visibility("protected")))
+void free_cds_ja_node(struct cds_ja *ja, struct cds_ja_inode *node);
+
+/*
+ * Iterate through duplicates returned by cds_ja_lookup*()
+ * Receives a struct cds_ja_node * as parameter, which is used as start
+ * of duplicate list and loop cursor.
+ */
+#define cds_ja_for_each_duplicate(pos) \
+ for (; (pos) != NULL; (pos) = (pos)->next)
+
+//#define DEBUG
+//#define DEBUG_COUNTERS
+
+#ifdef __linux__
+#include <syscall.h>
+#endif
+
+#if defined(_syscall0)
+_syscall0(pid_t, gettid)
+#elif defined(__NR_gettid)
+static inline pid_t gettid(void)
+{
+ return syscall(__NR_gettid);
+}
+#else
+#warning "use pid as tid"
+static inline pid_t gettid(void)
+{
+ return getpid();
+}
+#endif
+
+#ifdef DEBUG
+#define dbg_printf(fmt, args...) \
+ fprintf(stderr, "[debug rcuja %lu %s()@%s:%u] " fmt, \
+ (unsigned long) gettid(), __func__, \
+ __FILE__, __LINE__, ## args)
+
+#else
+#define dbg_printf(fmt, args...) \
+do { \
+ /* do nothing but check printf format */ \
+ if (0) \
+ fprintf(stderr, "[debug rcuja %lu %s()@%s:%u] " fmt, \
+ (unsigned long) gettid(), __func__, \
+ __FILE__, __LINE__, ## args); \
+} while (0)
+#endif
+
+#ifdef DEBUG_COUNTERS
+static inline
+int ja_debug_counters(void)
+{
+ return 1;
+}
+#else
+static inline
+int ja_debug_counters(void)
+{
+ return 0;
+}
+#endif
+
+#endif /* _URCU_RCUJA_INTERNAL_H */
--- /dev/null
+/*
+ * rcuja/rcuja-hashtable.c
+ *
+ * Userspace RCU library - RCU Judy Array Shadow Node Hash Table
+ *
+ * Copyright 2012-2013 - Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#define _LGPL_SOURCE
+#include <stdint.h>
+#include <errno.h>
+#include <limits.h>
+#include <assert.h>
+#include <stdlib.h>
+#include <time.h>
+#include <urcu/rcuja.h>
+#include <urcu/compiler.h>
+#include <urcu/arch.h>
+#include <urcu-pointer.h>
+
+#include "rcuja-internal.h"
+
+static unsigned long hash_seed;
+
+/*
+ * Hash function
+ * Source: http://burtleburtle.net/bob/c/lookup3.c
+ * Originally Public Domain
+ */
+
+#define rot(x, k) (((x) << (k)) | ((x) >> (32 - (k))))
+
+#define mix(a, b, c) \
+do { \
+ a -= c; a ^= rot(c, 4); c += b; \
+ b -= a; b ^= rot(a, 6); a += c; \
+ c -= b; c ^= rot(b, 8); b += a; \
+ a -= c; a ^= rot(c, 16); c += b; \
+ b -= a; b ^= rot(a, 19); a += c; \
+ c -= b; c ^= rot(b, 4); b += a; \
+} while (0)
+
+#define final(a, b, c) \
+{ \
+ c ^= b; c -= rot(b, 14); \
+ a ^= c; a -= rot(c, 11); \
+ b ^= a; b -= rot(a, 25); \
+ c ^= b; c -= rot(b, 16); \
+ a ^= c; a -= rot(c, 4);\
+ b ^= a; b -= rot(a, 14); \
+ c ^= b; c -= rot(b, 24); \
+}
+
+static inline __attribute__((unused))
+uint32_t hash_u32(
+ const uint32_t *k, /* the key, an array of uint32_t values */
+ size_t length, /* the length of the key, in uint32_ts */
+ uint32_t initval) /* the previous hash, or an arbitrary value */
+{
+ uint32_t a, b, c;
+
+ /* Set up the internal state */
+ a = b = c = 0xdeadbeef + (((uint32_t) length) << 2) + initval;
+
+ /*----------------------------------------- handle most of the key */
+ while (length > 3) {
+ a += k[0];
+ b += k[1];
+ c += k[2];
+ mix(a, b, c);
+ length -= 3;
+ k += 3;
+ }
+
+ /*----------------------------------- handle the last 3 uint32_t's */
+ switch (length) { /* all the case statements fall through */
+ case 3: c += k[2];
+ case 2: b += k[1];
+ case 1: a += k[0];
+ final(a, b, c);
+ case 0: /* case 0: nothing left to add */
+ break;
+ }
+ /*---------------------------------------------- report the result */
+ return c;
+}
+
+static inline
+void hashword2(
+ const uint32_t *k, /* the key, an array of uint32_t values */
+ size_t length, /* the length of the key, in uint32_ts */
+ uint32_t *pc, /* IN: seed OUT: primary hash value */
+ uint32_t *pb) /* IN: more seed OUT: secondary hash value */
+{
+ uint32_t a, b, c;
+
+ /* Set up the internal state */
+ a = b = c = 0xdeadbeef + ((uint32_t) (length << 2)) + *pc;
+ c += *pb;
+
+ /*----------------------------------------- handle most of the key */
+ while (length > 3) {
+ a += k[0];
+ b += k[1];
+ c += k[2];
+ mix(a, b, c);
+ length -= 3;
+ k += 3;
+ }
+
+ /*----------------------------------- handle the last 3 uint32_t's */
+ switch (length) { /* all the case statements fall through */
+ case 3: c += k[2];
+ case 2: b += k[1];
+ case 1: a += k[0];
+ final(a, b, c);
+ case 0: /* case 0: nothing left to add */
+ break;
+ }
+ /*---------------------------------------------- report the result */
+ *pc = c;
+ *pb = b;
+}
+
+#if (CAA_BITS_PER_LONG == 32)
+static
+unsigned long hash_pointer(const void *_key, unsigned long seed)
+{
+ unsigned int key = (unsigned int) _key;
+
+ return hash_u32(&key, 1, seed);
+}
+#else
+static
+unsigned long hash_pointer(const void *_key, unsigned long seed)
+{
+ union {
+ uint64_t v64;
+ uint32_t v32[2];
+ } v;
+ union {
+ uint64_t v64;
+ uint32_t v32[2];
+ } key;
+
+ v.v64 = (uint64_t) seed;
+ key.v64 = (uint64_t) _key;
+ hashword2(key.v32, 2, &v.v32[0], &v.v32[1]);
+ return v.v64;
+}
+#endif
+
+static
+int match_pointer(struct cds_lfht_node *node, const void *key)
+{
+ struct cds_ja_shadow_node *shadow =
+ caa_container_of(node, struct cds_ja_shadow_node, ht_node);
+
+ return (key == shadow->node_flag);
+}
+
+__attribute__((visibility("protected")))
+struct cds_ja_shadow_node *rcuja_shadow_lookup_lock(struct cds_lfht *ht,
+ struct cds_ja_inode_flag *node_flag)
+{
+ struct cds_lfht_iter iter;
+ struct cds_lfht_node *lookup_node;
+ struct cds_ja_shadow_node *shadow_node;
+ const struct rcu_flavor_struct *flavor;
+ int ret;
+
+ flavor = cds_lfht_rcu_flavor(ht);
+ flavor->read_lock();
+ cds_lfht_lookup(ht, hash_pointer(node_flag, hash_seed),
+ match_pointer, node_flag, &iter);
+
+ lookup_node = cds_lfht_iter_get_node(&iter);
+ if (!lookup_node) {
+ shadow_node = NULL;
+ goto rcu_unlock;
+ }
+ shadow_node = caa_container_of(lookup_node,
+ struct cds_ja_shadow_node, ht_node);
+ dbg_printf("Lock %p\n", shadow_node->lock);
+ ret = pthread_mutex_lock(shadow_node->lock);
+ assert(!ret);
+ if (cds_lfht_is_node_deleted(lookup_node)) {
+ ret = pthread_mutex_unlock(shadow_node->lock);
+ assert(!ret);
+ shadow_node = NULL;
+ }
+rcu_unlock:
+ flavor->read_unlock();
+ return shadow_node;
+}
+
+__attribute__((visibility("protected")))
+void rcuja_shadow_unlock(struct cds_ja_shadow_node *shadow_node)
+{
+ int ret;
+
+ dbg_printf("Unlock %p\n", shadow_node->lock);
+ ret = pthread_mutex_unlock(shadow_node->lock);
+ assert(!ret);
+}
+
+__attribute__((visibility("protected")))
+struct cds_ja_shadow_node *rcuja_shadow_set(struct cds_lfht *ht,
+ struct cds_ja_inode_flag *new_node_flag,
+ struct cds_ja_shadow_node *inherit_from,
+ struct cds_ja *ja, int level)
+{
+ struct cds_ja_shadow_node *shadow_node;
+ struct cds_lfht_node *ret_node;
+ const struct rcu_flavor_struct *flavor;
+
+ shadow_node = calloc(sizeof(*shadow_node), 1);
+ if (!shadow_node)
+ return NULL;
+
+ shadow_node->node_flag = new_node_flag;
+ shadow_node->ja = ja;
+ /*
+ * Lock can be inherited from previous node at this position.
+ */
+ if (inherit_from) {
+ shadow_node->lock = inherit_from->lock;
+ shadow_node->level = inherit_from->level;
+ } else {
+ shadow_node->lock = calloc(sizeof(*shadow_node->lock), 1);
+ if (!shadow_node->lock) {
+ free(shadow_node);
+ return NULL;
+ }
+ pthread_mutex_init(shadow_node->lock, NULL);
+ shadow_node->level = level;
+ }
+
+ flavor = cds_lfht_rcu_flavor(ht);
+ flavor->read_lock();
+ ret_node = cds_lfht_add_unique(ht,
+ hash_pointer(new_node_flag, hash_seed),
+ match_pointer,
+ new_node_flag,
+ &shadow_node->ht_node);
+ flavor->read_unlock();
+
+ if (ret_node != &shadow_node->ht_node) {
+ free(shadow_node);
+ return NULL;
+ }
+ return shadow_node;
+}
+
+static
+void free_shadow_node(struct rcu_head *head)
+{
+ struct cds_ja_shadow_node *shadow_node =
+ caa_container_of(head, struct cds_ja_shadow_node, head);
+ free(shadow_node);
+}
+
+static
+void free_shadow_node_and_node(struct rcu_head *head)
+{
+ struct cds_ja_shadow_node *shadow_node =
+ caa_container_of(head, struct cds_ja_shadow_node, head);
+ free_cds_ja_node(shadow_node->ja, ja_node_ptr(shadow_node->node_flag));
+ free(shadow_node);
+}
+
+static
+void free_shadow_node_and_lock(struct rcu_head *head)
+{
+ struct cds_ja_shadow_node *shadow_node =
+ caa_container_of(head, struct cds_ja_shadow_node, head);
+ free(shadow_node->lock);
+ free(shadow_node);
+}
+
+static
+void free_shadow_node_and_node_and_lock(struct rcu_head *head)
+{
+ struct cds_ja_shadow_node *shadow_node =
+ caa_container_of(head, struct cds_ja_shadow_node, head);
+ assert(shadow_node->level);
+ free_cds_ja_node(shadow_node->ja, ja_node_ptr(shadow_node->node_flag));
+ free(shadow_node->lock);
+ free(shadow_node);
+}
+
+__attribute__((visibility("protected")))
+int rcuja_shadow_clear(struct cds_lfht *ht,
+ struct cds_ja_inode_flag *node_flag,
+ struct cds_ja_shadow_node *shadow_node,
+ unsigned int flags)
+{
+ struct cds_lfht_iter iter;
+ struct cds_lfht_node *lookup_node;
+ const struct rcu_flavor_struct *flavor;
+ int ret, lockret;
+ int lookup_shadow = 0;
+
+ flavor = cds_lfht_rcu_flavor(ht);
+ flavor->read_lock();
+
+ cds_lfht_lookup(ht, hash_pointer(node_flag, hash_seed),
+ match_pointer, node_flag, &iter);
+ lookup_node = cds_lfht_iter_get_node(&iter);
+ if (!lookup_node) {
+ ret = -ENOENT;
+ goto rcu_unlock;
+ }
+
+ if (!shadow_node) {
+ shadow_node = caa_container_of(lookup_node,
+ struct cds_ja_shadow_node, ht_node);
+ lockret = pthread_mutex_lock(shadow_node->lock);
+ assert(!lockret);
+ lookup_shadow = 1;
+ }
+
+ /*
+ * Holding the mutex across deletion, and by also re-checking if
+ * the node is deleted with mutex held at lookup ensure that we
+ * don't let RCU JA use a node being removed.
+ */
+ ret = cds_lfht_del(ht, lookup_node);
+ if (ret)
+ goto unlock;
+ if ((flags & RCUJA_SHADOW_CLEAR_FREE_NODE)
+ && shadow_node->level) {
+ if (flags & RCUJA_SHADOW_CLEAR_FREE_LOCK) {
+ flavor->update_call_rcu(&shadow_node->head,
+ free_shadow_node_and_node_and_lock);
+ } else {
+ flavor->update_call_rcu(&shadow_node->head,
+ free_shadow_node_and_node);
+ }
+ } else {
+ if (flags & RCUJA_SHADOW_CLEAR_FREE_LOCK) {
+ flavor->update_call_rcu(&shadow_node->head,
+ free_shadow_node_and_lock);
+ } else {
+ flavor->update_call_rcu(&shadow_node->head,
+ free_shadow_node);
+ }
+ }
+unlock:
+ if (lookup_shadow) {
+ lockret = pthread_mutex_unlock(shadow_node->lock);
+ assert(!lockret);
+ }
+rcu_unlock:
+ flavor->read_unlock();
+
+ return ret;
+}
+
+/*
+ * Delete all shadow nodes and nodes from hash table, along with their
+ * associated lock.
+ */
+__attribute__((visibility("protected")))
+void rcuja_shadow_prune(struct cds_lfht *ht,
+ unsigned int flags)
+{
+ const struct rcu_flavor_struct *flavor;
+ struct cds_ja_shadow_node *shadow_node;
+ struct cds_lfht_iter iter;
+ int ret, lockret;
+
+ flavor = cds_lfht_rcu_flavor(ht);
+ /*
+ * Read-side lock is needed to ensure hash table node existence
+ * vs concurrent resize.
+ */
+ flavor->read_lock();
+ cds_lfht_for_each_entry(ht, &iter, shadow_node, ht_node) {
+ lockret = pthread_mutex_lock(shadow_node->lock);
+ assert(!lockret);
+
+ ret = cds_lfht_del(ht, &shadow_node->ht_node);
+ if (ret)
+ goto unlock;
+ if ((flags & RCUJA_SHADOW_CLEAR_FREE_NODE)
+ && shadow_node->level) {
+ if (flags & RCUJA_SHADOW_CLEAR_FREE_LOCK) {
+ flavor->update_call_rcu(&shadow_node->head,
+ free_shadow_node_and_node_and_lock);
+ } else {
+ flavor->update_call_rcu(&shadow_node->head,
+ free_shadow_node_and_node);
+ }
+ } else {
+ if (flags & RCUJA_SHADOW_CLEAR_FREE_LOCK) {
+ flavor->update_call_rcu(&shadow_node->head,
+ free_shadow_node_and_lock);
+ } else {
+ flavor->update_call_rcu(&shadow_node->head,
+ free_shadow_node);
+ }
+ }
+ unlock:
+ lockret = pthread_mutex_unlock(shadow_node->lock);
+ assert(!lockret);
+ }
+ flavor->read_unlock();
+}
+
+__attribute__((visibility("protected")))
+struct cds_lfht *rcuja_create_ht(const struct rcu_flavor_struct *flavor)
+{
+ return _cds_lfht_new(1, 1, 0,
+ CDS_LFHT_AUTO_RESIZE | CDS_LFHT_ACCOUNTING,
+ NULL, flavor, NULL);
+}
+
+__attribute__((visibility("protected")))
+int rcuja_delete_ht(struct cds_lfht *ht)
+{
+ return cds_lfht_destroy(ht, NULL);
+}
+
+__attribute__((constructor))
+void rcuja_ht_init(void)
+{
+ hash_seed = (unsigned long) time(NULL);
+}
--- /dev/null
+/*
+ * rcuja/rcuja.c
+ *
+ * Userspace RCU library - RCU Judy Array
+ *
+ * Copyright (C) 2000 - 2002 Hewlett-Packard Company
+ * Copyright 2012-2013 - Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#define _LGPL_SOURCE
+#include <stdint.h>
+#include <errno.h>
+#include <limits.h>
+#include <string.h>
+#include <assert.h>
+#include <urcu/rcuja.h>
+#include <urcu/compiler.h>
+#include <urcu/arch.h>
+#include <urcu-pointer.h>
+#include <urcu/uatomic.h>
+
+#include "rcuja-internal.h"
+
+#ifndef abs
+#define abs_int(a) ((int) (a) > 0 ? (int) (a) : -((int) (a)))
+#endif
+
+enum cds_ja_type_class {
+ RCU_JA_LINEAR = 0, /* Type A */
+ /* 32-bit: 1 to 25 children, 8 to 128 bytes */
+ /* 64-bit: 1 to 28 children, 16 to 256 bytes */
+ RCU_JA_POOL = 1, /* Type B */
+ /* 32-bit: 26 to 100 children, 256 to 512 bytes */
+ /* 64-bit: 29 to 112 children, 512 to 1024 bytes */
+ RCU_JA_PIGEON = 2, /* Type C */
+ /* 32-bit: 101 to 256 children, 1024 bytes */
+ /* 64-bit: 113 to 256 children, 2048 bytes */
+ /* Leaf nodes are implicit from their height in the tree */
+ RCU_JA_NR_TYPES,
+
+ RCU_JA_NULL, /* not an encoded type, but keeps code regular */
+};
+
+struct cds_ja_type {
+ enum cds_ja_type_class type_class;
+ uint16_t min_child; /* minimum number of children: 1 to 256 */
+ uint16_t max_child; /* maximum number of children: 1 to 256 */
+ uint16_t max_linear_child; /* per-pool max nr. children: 1 to 256 */
+ uint16_t order; /* node size is (1 << order), in bytes */
+ uint16_t nr_pool_order; /* number of pools */
+ uint16_t pool_size_order; /* pool size */
+};
+
+/*
+ * Iteration on the array to find the right node size for the number of
+ * children stops when it reaches .max_child == 256 (this is the largest
+ * possible node size, which contains 256 children).
+ * The min_child overlaps with the previous max_child to provide an
+ * hysteresis loop to reallocation for patterns of cyclic add/removal
+ * within the same node.
+ * The node the index within the following arrays is represented on 3
+ * bits. It identifies the node type, min/max number of children, and
+ * the size order.
+ * The max_child values for the RCU_JA_POOL below result from
+ * statistical approximation: over million populations, the max_child
+ * covers between 97% and 99% of the populations generated. Therefore, a
+ * fallback should exist to cover the rare extreme population unbalance
+ * cases, but it will not have a major impact on speed nor space
+ * consumption, since those are rare cases.
+ */
+
+#if (CAA_BITS_PER_LONG < 64)
+/* 32-bit pointers */
+enum {
+ ja_type_0_max_child = 1,
+ ja_type_1_max_child = 3,
+ ja_type_2_max_child = 6,
+ ja_type_3_max_child = 12,
+ ja_type_4_max_child = 25,
+ ja_type_5_max_child = 48,
+ ja_type_6_max_child = 92,
+ ja_type_7_max_child = 256,
+ ja_type_8_max_child = 0, /* NULL */
+};
+
+enum {
+ ja_type_0_max_linear_child = 1,
+ ja_type_1_max_linear_child = 3,
+ ja_type_2_max_linear_child = 6,
+ ja_type_3_max_linear_child = 12,
+ ja_type_4_max_linear_child = 25,
+ ja_type_5_max_linear_child = 24,
+ ja_type_6_max_linear_child = 23,
+};
+
+enum {
+ ja_type_5_nr_pool_order = 1,
+ ja_type_6_nr_pool_order = 2,
+};
+
+const struct cds_ja_type ja_types[] = {
+ { .type_class = RCU_JA_LINEAR, .min_child = 1, .max_child = ja_type_0_max_child, .max_linear_child = ja_type_0_max_linear_child, .order = 3, },
+ { .type_class = RCU_JA_LINEAR, .min_child = 1, .max_child = ja_type_1_max_child, .max_linear_child = ja_type_1_max_linear_child, .order = 4, },
+ { .type_class = RCU_JA_LINEAR, .min_child = 3, .max_child = ja_type_2_max_child, .max_linear_child = ja_type_2_max_linear_child, .order = 5, },
+ { .type_class = RCU_JA_LINEAR, .min_child = 4, .max_child = ja_type_3_max_child, .max_linear_child = ja_type_3_max_linear_child, .order = 6, },
+ { .type_class = RCU_JA_LINEAR, .min_child = 10, .max_child = ja_type_4_max_child, .max_linear_child = ja_type_4_max_linear_child, .order = 7, },
+
+ /* Pools may fill sooner than max_child */
+ /* This pool is hardcoded at index 5. See ja_node_ptr(). */
+ { .type_class = RCU_JA_POOL, .min_child = 20, .max_child = ja_type_5_max_child, .max_linear_child = ja_type_5_max_linear_child, .order = 8, .nr_pool_order = ja_type_5_nr_pool_order, .pool_size_order = 7, },
+ /* This pool is hardcoded at index 6. See ja_node_ptr(). */
+ { .type_class = RCU_JA_POOL, .min_child = 45, .max_child = ja_type_6_max_child, .max_linear_child = ja_type_6_max_linear_child, .order = 9, .nr_pool_order = ja_type_6_nr_pool_order, .pool_size_order = 7, },
+
+ /*
+ * Upon node removal below min_child, if child pool is filled
+ * beyond capacity, we roll back to pigeon.
+ */
+ { .type_class = RCU_JA_PIGEON, .min_child = 83, .max_child = ja_type_7_max_child, .order = 10, },
+
+ { .type_class = RCU_JA_NULL, .min_child = 0, .max_child = ja_type_8_max_child, },
+};
+#else /* !(CAA_BITS_PER_LONG < 64) */
+/* 64-bit pointers */
+enum {
+ ja_type_0_max_child = 1,
+ ja_type_1_max_child = 3,
+ ja_type_2_max_child = 7,
+ ja_type_3_max_child = 14,
+ ja_type_4_max_child = 28,
+ ja_type_5_max_child = 54,
+ ja_type_6_max_child = 104,
+ ja_type_7_max_child = 256,
+ ja_type_8_max_child = 256,
+};
+
+enum {
+ ja_type_0_max_linear_child = 1,
+ ja_type_1_max_linear_child = 3,
+ ja_type_2_max_linear_child = 7,
+ ja_type_3_max_linear_child = 14,
+ ja_type_4_max_linear_child = 28,
+ ja_type_5_max_linear_child = 27,
+ ja_type_6_max_linear_child = 26,
+};
+
+enum {
+ ja_type_5_nr_pool_order = 1,
+ ja_type_6_nr_pool_order = 2,
+};
+
+const struct cds_ja_type ja_types[] = {
+ { .type_class = RCU_JA_LINEAR, .min_child = 1, .max_child = ja_type_0_max_child, .max_linear_child = ja_type_0_max_linear_child, .order = 4, },
+ { .type_class = RCU_JA_LINEAR, .min_child = 1, .max_child = ja_type_1_max_child, .max_linear_child = ja_type_1_max_linear_child, .order = 5, },
+ { .type_class = RCU_JA_LINEAR, .min_child = 3, .max_child = ja_type_2_max_child, .max_linear_child = ja_type_2_max_linear_child, .order = 6, },
+ { .type_class = RCU_JA_LINEAR, .min_child = 5, .max_child = ja_type_3_max_child, .max_linear_child = ja_type_3_max_linear_child, .order = 7, },
+ { .type_class = RCU_JA_LINEAR, .min_child = 10, .max_child = ja_type_4_max_child, .max_linear_child = ja_type_4_max_linear_child, .order = 8, },
+
+ /* Pools may fill sooner than max_child. */
+ /* This pool is hardcoded at index 5. See ja_node_ptr(). */
+ { .type_class = RCU_JA_POOL, .min_child = 22, .max_child = ja_type_5_max_child, .max_linear_child = ja_type_5_max_linear_child, .order = 9, .nr_pool_order = ja_type_5_nr_pool_order, .pool_size_order = 8, },
+ /* This pool is hardcoded at index 6. See ja_node_ptr(). */
+ { .type_class = RCU_JA_POOL, .min_child = 51, .max_child = ja_type_6_max_child, .max_linear_child = ja_type_6_max_linear_child, .order = 10, .nr_pool_order = ja_type_6_nr_pool_order, .pool_size_order = 8, },
+
+ /*
+ * Upon node removal below min_child, if child pool is filled
+ * beyond capacity, we roll back to pigeon.
+ */
+ { .type_class = RCU_JA_PIGEON, .min_child = 95, .max_child = ja_type_7_max_child, .order = 11, },
+
+ { .type_class = RCU_JA_NULL, .min_child = 0, .max_child = ja_type_8_max_child, },
+};
+#endif /* !(BITS_PER_LONG < 64) */
+
+static inline __attribute__((unused))
+void static_array_size_check(void)
+{
+ CAA_BUILD_BUG_ON(CAA_ARRAY_SIZE(ja_types) < JA_TYPE_MAX_NR);
+}
+
+/*
+ * The cds_ja_node contains the compressed node data needed for
+ * read-side. For linear and pool node configurations, it starts with a
+ * byte counting the number of children in the node. Then, the
+ * node-specific data is placed.
+ * The node mutex, if any is needed, protecting concurrent updated of
+ * each node is placed in a separate hash table indexed by node address.
+ * For the pigeon configuration, the number of children is also kept in
+ * a separate hash table, indexed by node address, because it is only
+ * required for updates.
+ */
+
+#define DECLARE_LINEAR_NODE(index) \
+ struct { \
+ uint8_t nr_child; \
+ uint8_t child_value[ja_type_## index ##_max_linear_child]; \
+ struct cds_ja_inode_flag *child_ptr[ja_type_## index ##_max_linear_child]; \
+ }
+
+#define DECLARE_POOL_NODE(index) \
+ struct { \
+ struct { \
+ uint8_t nr_child; \
+ uint8_t child_value[ja_type_## index ##_max_linear_child]; \
+ struct cds_ja_inode_flag *child_ptr[ja_type_## index ##_max_linear_child]; \
+ } linear[1U << ja_type_## index ##_nr_pool_order]; \
+ }
+
+struct cds_ja_inode {
+ union {
+ /* Linear configuration */
+ DECLARE_LINEAR_NODE(0) conf_0;
+ DECLARE_LINEAR_NODE(1) conf_1;
+ DECLARE_LINEAR_NODE(2) conf_2;
+ DECLARE_LINEAR_NODE(3) conf_3;
+ DECLARE_LINEAR_NODE(4) conf_4;
+
+ /* Pool configuration */
+ DECLARE_POOL_NODE(5) conf_5;
+ DECLARE_POOL_NODE(6) conf_6;
+
+ /* Pigeon configuration */
+ struct {
+ struct cds_ja_inode_flag *child[ja_type_7_max_child];
+ } conf_7;
+ /* data aliasing nodes for computed accesses */
+ uint8_t data[sizeof(struct cds_ja_inode_flag *) * ja_type_7_max_child];
+ } u;
+};
+
+enum ja_recompact {
+ JA_RECOMPACT_ADD_SAME,
+ JA_RECOMPACT_ADD_NEXT,
+ JA_RECOMPACT_DEL,
+};
+
+enum ja_lookup_inequality {
+ JA_LOOKUP_BE,
+ JA_LOOKUP_AE,
+};
+
+enum ja_direction {
+ JA_LEFT,
+ JA_RIGHT,
+ JA_LEFTMOST,
+ JA_RIGHTMOST,
+};
+
+static
+struct cds_ja_inode *_ja_node_mask_ptr(struct cds_ja_inode_flag *node)
+{
+ return (struct cds_ja_inode *) (((unsigned long) node) & JA_PTR_MASK);
+}
+
+unsigned long ja_node_type(struct cds_ja_inode_flag *node)
+{
+ unsigned long type;
+
+ if (_ja_node_mask_ptr(node) == NULL) {
+ return NODE_INDEX_NULL;
+ }
+ type = (unsigned int) ((unsigned long) node & JA_TYPE_MASK);
+ assert(type < (1UL << JA_TYPE_BITS));
+ return type;
+}
+
+static
+struct cds_ja_inode *alloc_cds_ja_node(struct cds_ja *ja,
+ const struct cds_ja_type *ja_type)
+{
+ size_t len = 1U << ja_type->order;
+ void *p;
+ int ret;
+
+ ret = posix_memalign(&p, len, len);
+ if (ret || !p) {
+ return NULL;
+ }
+ memset(p, 0, len);
+ if (ja_debug_counters())
+ uatomic_inc(&ja->nr_nodes_allocated);
+ return p;
+}
+
+void free_cds_ja_node(struct cds_ja *ja, struct cds_ja_inode *node)
+{
+ free(node);
+ if (ja_debug_counters() && node)
+ uatomic_inc(&ja->nr_nodes_freed);
+}
+
+#define __JA_ALIGN_MASK(v, mask) (((v) + (mask)) & ~(mask))
+#define JA_ALIGN(v, align) __JA_ALIGN_MASK(v, (typeof(v)) (align) - 1)
+#define __JA_FLOOR_MASK(v, mask) ((v) & ~(mask))
+#define JA_FLOOR(v, align) __JA_FLOOR_MASK(v, (typeof(v)) (align) - 1)
+
+static
+uint8_t *align_ptr_size(uint8_t *ptr)
+{
+ return (uint8_t *) JA_ALIGN((unsigned long) ptr, sizeof(void *));
+}
+
+static
+uint8_t ja_linear_node_get_nr_child(const struct cds_ja_type *type,
+ struct cds_ja_inode *node)
+{
+ assert(type->type_class == RCU_JA_LINEAR || type->type_class == RCU_JA_POOL);
+ return rcu_dereference(node->u.data[0]);
+}
+
+/*
+ * The order in which values and pointers are does does not matter: if
+ * a value is missing, we return NULL. If a value is there, but its
+ * associated pointers is still NULL, we return NULL too.
+ */
+static
+struct cds_ja_inode_flag *ja_linear_node_get_nth(const struct cds_ja_type *type,
+ struct cds_ja_inode *node,
+ struct cds_ja_inode_flag ***node_flag_ptr,
+ uint8_t n)
+{
+ uint8_t nr_child;
+ uint8_t *values;
+ struct cds_ja_inode_flag **pointers;
+ struct cds_ja_inode_flag *ptr;
+ unsigned int i;
+
+ assert(type->type_class == RCU_JA_LINEAR || type->type_class == RCU_JA_POOL);
+
+ nr_child = ja_linear_node_get_nr_child(type, node);
+ cmm_smp_rmb(); /* read nr_child before values and pointers */
+ assert(nr_child <= type->max_linear_child);
+ assert(type->type_class != RCU_JA_LINEAR || nr_child >= type->min_child);
+
+ values = &node->u.data[1];
+ for (i = 0; i < nr_child; i++) {
+ if (CMM_LOAD_SHARED(values[i]) == n)
+ break;
+ }
+ if (i >= nr_child) {
+ if (caa_unlikely(node_flag_ptr))
+ *node_flag_ptr = NULL;
+ return NULL;
+ }
+ pointers = (struct cds_ja_inode_flag **) align_ptr_size(&values[type->max_linear_child]);
+ ptr = rcu_dereference(pointers[i]);
+ if (caa_unlikely(node_flag_ptr))
+ *node_flag_ptr = &pointers[i];
+ return ptr;
+}
+
+static
+struct cds_ja_inode_flag *ja_linear_node_get_direction(const struct cds_ja_type *type,
+ struct cds_ja_inode *node,
+ int n, uint8_t *result_key,
+ enum ja_direction dir)
+{
+ uint8_t nr_child;
+ uint8_t *values;
+ struct cds_ja_inode_flag **pointers;
+ struct cds_ja_inode_flag *ptr, *match_ptr = NULL;
+ unsigned int i;
+ int match_v;
+
+ assert(type->type_class == RCU_JA_LINEAR || type->type_class == RCU_JA_POOL);
+ assert(dir == JA_LEFT || dir == JA_RIGHT);
+
+ if (dir == JA_LEFT) {
+ match_v = -1;
+ } else {
+ match_v = JA_ENTRY_PER_NODE;
+ }
+
+ nr_child = ja_linear_node_get_nr_child(type, node);
+ cmm_smp_rmb(); /* read nr_child before values and pointers */
+ assert(nr_child <= type->max_linear_child);
+ assert(type->type_class != RCU_JA_LINEAR || nr_child >= type->min_child);
+
+ values = &node->u.data[1];
+ pointers = (struct cds_ja_inode_flag **) align_ptr_size(&values[type->max_linear_child]);
+ for (i = 0; i < nr_child; i++) {
+ unsigned int v;
+
+ v = CMM_LOAD_SHARED(values[i]);
+ ptr = CMM_LOAD_SHARED(pointers[i]);
+ if (!ptr)
+ continue;
+ if (dir == JA_LEFT) {
+ if ((int) v < n && (int) v > match_v) {
+ match_v = v;
+ match_ptr = ptr;
+ }
+ } else {
+ if ((int) v > n && (int) v < match_v) {
+ match_v = v;
+ match_ptr = ptr;
+ }
+ }
+ }
+
+ if (!match_ptr) {
+ return NULL;
+ }
+ assert(match_v >= 0 && match_v < JA_ENTRY_PER_NODE);
+
+ *result_key = (uint8_t) match_v;
+ return rcu_dereference(match_ptr);
+}
+
+static
+void ja_linear_node_get_ith_pos(const struct cds_ja_type *type,
+ struct cds_ja_inode *node,
+ uint8_t i,
+ uint8_t *v,
+ struct cds_ja_inode_flag **iter)
+{
+ uint8_t *values;
+ struct cds_ja_inode_flag **pointers;
+
+ assert(type->type_class == RCU_JA_LINEAR || type->type_class == RCU_JA_POOL);
+ assert(i < ja_linear_node_get_nr_child(type, node));
+
+ values = &node->u.data[1];
+ *v = values[i];
+ pointers = (struct cds_ja_inode_flag **) align_ptr_size(&values[type->max_linear_child]);
+ *iter = pointers[i];
+}
+
+static
+struct cds_ja_inode_flag *ja_pool_node_get_nth(const struct cds_ja_type *type,
+ struct cds_ja_inode *node,
+ struct cds_ja_inode_flag *node_flag,
+ struct cds_ja_inode_flag ***node_flag_ptr,
+ uint8_t n)
+{
+ struct cds_ja_inode *linear;
+
+ assert(type->type_class == RCU_JA_POOL);
+
+ switch (type->nr_pool_order) {
+ case 1:
+ {
+ unsigned long bitsel, index;
+
+ bitsel = ja_node_pool_1d_bitsel(node_flag);
+ assert(bitsel < CHAR_BIT);
+ index = ((unsigned long) n >> bitsel) & 0x1;
+ linear = (struct cds_ja_inode *) &node->u.data[index << type->pool_size_order];
+ break;
+ }
+ case 2:
+ {
+ unsigned long bitsel[2], index[2], rindex;
+
+ ja_node_pool_2d_bitsel(node_flag, bitsel);
+ assert(bitsel[0] < CHAR_BIT);
+ assert(bitsel[1] < CHAR_BIT);
+ index[0] = ((unsigned long) n >> bitsel[0]) & 0x1;
+ index[0] <<= 1;
+ index[1] = ((unsigned long) n >> bitsel[1]) & 0x1;
+ rindex = index[0] | index[1];
+ linear = (struct cds_ja_inode *) &node->u.data[rindex << type->pool_size_order];
+ break;
+ }
+ default:
+ linear = NULL;
+ assert(0);
+ }
+ return ja_linear_node_get_nth(type, linear, node_flag_ptr, n);
+}
+
+static
+struct cds_ja_inode *ja_pool_node_get_ith_pool(const struct cds_ja_type *type,
+ struct cds_ja_inode *node,
+ uint8_t i)
+{
+ assert(type->type_class == RCU_JA_POOL);
+ return (struct cds_ja_inode *)
+ &node->u.data[(unsigned int) i << type->pool_size_order];
+}
+
+static
+struct cds_ja_inode_flag *ja_pool_node_get_direction(const struct cds_ja_type *type,
+ struct cds_ja_inode *node,
+ int n, uint8_t *result_key,
+ enum ja_direction dir)
+{
+ unsigned int pool_nr;
+ int match_v;
+ struct cds_ja_inode_flag *match_node_flag = NULL;
+
+ assert(type->type_class == RCU_JA_POOL);
+ assert(dir == JA_LEFT || dir == JA_RIGHT);
+
+ if (dir == JA_LEFT) {
+ match_v = -1;
+ } else {
+ match_v = JA_ENTRY_PER_NODE;
+ }
+
+ for (pool_nr = 0; pool_nr < (1U << type->nr_pool_order); pool_nr++) {
+ struct cds_ja_inode *pool =
+ ja_pool_node_get_ith_pool(type,
+ node, pool_nr);
+ uint8_t nr_child =
+ ja_linear_node_get_nr_child(type, pool);
+ unsigned int j;
+
+ for (j = 0; j < nr_child; j++) {
+ struct cds_ja_inode_flag *iter;
+ uint8_t v;
+
+ ja_linear_node_get_ith_pos(type, pool,
+ j, &v, &iter);
+ if (!iter)
+ continue;
+ if (dir == JA_LEFT) {
+ if ((int) v < n && (int) v > match_v) {
+ match_v = v;
+ match_node_flag = iter;
+ }
+ } else {
+ if ((int) v > n && (int) v < match_v) {
+ match_v = v;
+ match_node_flag = iter;
+ }
+ }
+ }
+ }
+ if (match_node_flag)
+ *result_key = (uint8_t) match_v;
+ return match_node_flag;
+}
+
+static
+struct cds_ja_inode_flag *ja_pigeon_node_get_nth(const struct cds_ja_type *type,
+ struct cds_ja_inode *node,
+ struct cds_ja_inode_flag ***node_flag_ptr,
+ uint8_t n)
+{
+ struct cds_ja_inode_flag **child_node_flag_ptr;
+ struct cds_ja_inode_flag *child_node_flag;
+
+ assert(type->type_class == RCU_JA_PIGEON);
+ child_node_flag_ptr = &((struct cds_ja_inode_flag **) node->u.data)[n];
+ child_node_flag = rcu_dereference(*child_node_flag_ptr);
+ dbg_printf("ja_pigeon_node_get_nth child_node_flag_ptr %p\n",
+ child_node_flag_ptr);
+ if (caa_unlikely(node_flag_ptr))
+ *node_flag_ptr = child_node_flag_ptr;
+ return child_node_flag;
+}
+
+static
+struct cds_ja_inode_flag *ja_pigeon_node_get_direction(const struct cds_ja_type *type,
+ struct cds_ja_inode *node,
+ int n, uint8_t *result_key,
+ enum ja_direction dir)
+{
+ struct cds_ja_inode_flag **child_node_flag_ptr;
+ struct cds_ja_inode_flag *child_node_flag;
+ int i;
+
+ assert(type->type_class == RCU_JA_PIGEON);
+ assert(dir == JA_LEFT || dir == JA_RIGHT);
+
+ if (dir == JA_LEFT) {
+ /* n - 1 is first value left of n */
+ for (i = n - 1; i >= 0; i--) {
+ child_node_flag_ptr = &((struct cds_ja_inode_flag **) node->u.data)[i];
+ child_node_flag = rcu_dereference(*child_node_flag_ptr);
+ if (child_node_flag) {
+ dbg_printf("ja_pigeon_node_get_left child_node_flag %p\n",
+ child_node_flag);
+ *result_key = (uint8_t) i;
+ return child_node_flag;
+ }
+ }
+ } else {
+ /* n + 1 is first value right of n */
+ for (i = n + 1; i < JA_ENTRY_PER_NODE; i++) {
+ child_node_flag_ptr = &((struct cds_ja_inode_flag **) node->u.data)[i];
+ child_node_flag = rcu_dereference(*child_node_flag_ptr);
+ if (child_node_flag) {
+ dbg_printf("ja_pigeon_node_get_right child_node_flag %p\n",
+ child_node_flag);
+ *result_key = (uint8_t) i;
+ return child_node_flag;
+ }
+ }
+ }
+ return NULL;
+}
+
+static
+struct cds_ja_inode_flag *ja_pigeon_node_get_ith_pos(const struct cds_ja_type *type,
+ struct cds_ja_inode *node,
+ uint8_t i)
+{
+ return ja_pigeon_node_get_nth(type, node, NULL, i);
+}
+
+/*
+ * ja_node_get_nth: get nth item from a node.
+ * node_flag is already rcu_dereference'd.
+ */
+static
+struct cds_ja_inode_flag *ja_node_get_nth(struct cds_ja_inode_flag *node_flag,
+ struct cds_ja_inode_flag ***node_flag_ptr,
+ uint8_t n)
+{
+ unsigned int type_index;
+ struct cds_ja_inode *node;
+ const struct cds_ja_type *type;
+
+ node = ja_node_ptr(node_flag);
+ assert(node != NULL);
+ type_index = ja_node_type(node_flag);
+ type = &ja_types[type_index];
+
+ switch (type->type_class) {
+ case RCU_JA_LINEAR:
+ return ja_linear_node_get_nth(type, node,
+ node_flag_ptr, n);
+ case RCU_JA_POOL:
+ return ja_pool_node_get_nth(type, node, node_flag,
+ node_flag_ptr, n);
+ case RCU_JA_PIGEON:
+ return ja_pigeon_node_get_nth(type, node,
+ node_flag_ptr, n);
+ default:
+ assert(0);
+ return (void *) -1UL;
+ }
+}
+
+static
+struct cds_ja_inode_flag *ja_node_get_direction(struct cds_ja_inode_flag *node_flag,
+ int n, uint8_t *result_key,
+ enum ja_direction dir)
+{
+ unsigned int type_index;
+ struct cds_ja_inode *node;
+ const struct cds_ja_type *type;
+
+ node = ja_node_ptr(node_flag);
+ assert(node != NULL);
+ type_index = ja_node_type(node_flag);
+ type = &ja_types[type_index];
+
+ switch (type->type_class) {
+ case RCU_JA_LINEAR:
+ return ja_linear_node_get_direction(type, node, n, result_key, dir);
+ case RCU_JA_POOL:
+ return ja_pool_node_get_direction(type, node, n, result_key, dir);
+ case RCU_JA_PIGEON:
+ return ja_pigeon_node_get_direction(type, node, n, result_key, dir);
+ default:
+ assert(0);
+ return (void *) -1UL;
+ }
+}
+
+static
+struct cds_ja_inode_flag *ja_node_get_leftright(struct cds_ja_inode_flag *node_flag,
+ unsigned int n, uint8_t *result_key,
+ enum ja_direction dir)
+{
+ return ja_node_get_direction(node_flag, n, result_key, dir);
+}
+
+static
+struct cds_ja_inode_flag *ja_node_get_minmax(struct cds_ja_inode_flag *node_flag,
+ uint8_t *result_key,
+ enum ja_direction dir)
+{
+ switch (dir) {
+ case JA_LEFTMOST:
+ return ja_node_get_direction(node_flag,
+ -1, result_key, JA_RIGHT);
+ case JA_RIGHTMOST:
+ return ja_node_get_direction(node_flag,
+ JA_ENTRY_PER_NODE, result_key, JA_LEFT);
+ default:
+ assert(0);
+ }
+}
+
+static
+int ja_linear_node_set_nth(const struct cds_ja_type *type,
+ struct cds_ja_inode *node,
+ struct cds_ja_shadow_node *shadow_node,
+ uint8_t n,
+ struct cds_ja_inode_flag *child_node_flag)
+{
+ uint8_t nr_child;
+ uint8_t *values, *nr_child_ptr;
+ struct cds_ja_inode_flag **pointers;
+ unsigned int i, unused = 0;
+
+ assert(type->type_class == RCU_JA_LINEAR || type->type_class == RCU_JA_POOL);
+
+ nr_child_ptr = &node->u.data[0];
+ dbg_printf("linear set nth: n %u, nr_child_ptr %p\n",
+ (unsigned int) n, nr_child_ptr);
+ nr_child = *nr_child_ptr;
+ assert(nr_child <= type->max_linear_child);
+
+ values = &node->u.data[1];
+ pointers = (struct cds_ja_inode_flag **) align_ptr_size(&values[type->max_linear_child]);
+ /* Check if node value is already populated */
+ for (i = 0; i < nr_child; i++) {
+ if (values[i] == n) {
+ if (pointers[i])
+ return -EEXIST;
+ else
+ break;
+ } else {
+ if (!pointers[i])
+ unused++;
+ }
+ }
+ if (i == nr_child && nr_child >= type->max_linear_child) {
+ if (unused)
+ return -ERANGE; /* recompact node */
+ else
+ return -ENOSPC; /* No space left in this node type */
+ }
+
+ assert(pointers[i] == NULL);
+ rcu_assign_pointer(pointers[i], child_node_flag);
+ /* If we expanded the nr_child, increment it */
+ if (i == nr_child) {
+ CMM_STORE_SHARED(values[nr_child], n);
+ /* write pointer and value before nr_child */
+ cmm_smp_wmb();
+ CMM_STORE_SHARED(*nr_child_ptr, nr_child + 1);
+ }
+ shadow_node->nr_child++;
+ dbg_printf("linear set nth: %u child, shadow: %u child, for node %p shadow %p\n",
+ (unsigned int) CMM_LOAD_SHARED(*nr_child_ptr),
+ (unsigned int) shadow_node->nr_child,
+ node, shadow_node);
+
+ return 0;
+}
+
+static
+int ja_pool_node_set_nth(const struct cds_ja_type *type,
+ struct cds_ja_inode *node,
+ struct cds_ja_inode_flag *node_flag,
+ struct cds_ja_shadow_node *shadow_node,
+ uint8_t n,
+ struct cds_ja_inode_flag *child_node_flag)
+{
+ struct cds_ja_inode *linear;
+
+ assert(type->type_class == RCU_JA_POOL);
+
+ switch (type->nr_pool_order) {
+ case 1:
+ {
+ unsigned long bitsel, index;
+
+ bitsel = ja_node_pool_1d_bitsel(node_flag);
+ assert(bitsel < CHAR_BIT);
+ index = ((unsigned long) n >> bitsel) & 0x1;
+ linear = (struct cds_ja_inode *) &node->u.data[index << type->pool_size_order];
+ break;
+ }
+ case 2:
+ {
+ unsigned long bitsel[2], index[2], rindex;
+
+ ja_node_pool_2d_bitsel(node_flag, bitsel);
+ assert(bitsel[0] < CHAR_BIT);
+ assert(bitsel[1] < CHAR_BIT);
+ index[0] = ((unsigned long) n >> bitsel[0]) & 0x1;
+ index[0] <<= 1;
+ index[1] = ((unsigned long) n >> bitsel[1]) & 0x1;
+ rindex = index[0] | index[1];
+ linear = (struct cds_ja_inode *) &node->u.data[rindex << type->pool_size_order];
+ break;
+ }
+ default:
+ linear = NULL;
+ assert(0);
+ }
+
+ return ja_linear_node_set_nth(type, linear, shadow_node,
+ n, child_node_flag);
+}
+
+static
+int ja_pigeon_node_set_nth(const struct cds_ja_type *type,
+ struct cds_ja_inode *node,
+ struct cds_ja_shadow_node *shadow_node,
+ uint8_t n,
+ struct cds_ja_inode_flag *child_node_flag)
+{
+ struct cds_ja_inode_flag **ptr;
+
+ assert(type->type_class == RCU_JA_PIGEON);
+ ptr = &((struct cds_ja_inode_flag **) node->u.data)[n];
+ if (*ptr)
+ return -EEXIST;
+ rcu_assign_pointer(*ptr, child_node_flag);
+ shadow_node->nr_child++;
+ return 0;
+}
+
+/*
+ * _ja_node_set_nth: set nth item within a node. Return an error
+ * (negative error value) if it is already there.
+ */
+static
+int _ja_node_set_nth(const struct cds_ja_type *type,
+ struct cds_ja_inode *node,
+ struct cds_ja_inode_flag *node_flag,
+ struct cds_ja_shadow_node *shadow_node,
+ uint8_t n,
+ struct cds_ja_inode_flag *child_node_flag)
+{
+ switch (type->type_class) {
+ case RCU_JA_LINEAR:
+ return ja_linear_node_set_nth(type, node, shadow_node, n,
+ child_node_flag);
+ case RCU_JA_POOL:
+ return ja_pool_node_set_nth(type, node, node_flag, shadow_node, n,
+ child_node_flag);
+ case RCU_JA_PIGEON:
+ return ja_pigeon_node_set_nth(type, node, shadow_node, n,
+ child_node_flag);
+ case RCU_JA_NULL:
+ return -ENOSPC;
+ default:
+ assert(0);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static
+int ja_linear_node_clear_ptr(const struct cds_ja_type *type,
+ struct cds_ja_inode *node,
+ struct cds_ja_shadow_node *shadow_node,
+ struct cds_ja_inode_flag **node_flag_ptr)
+{
+ uint8_t nr_child;
+ uint8_t *nr_child_ptr;
+
+ assert(type->type_class == RCU_JA_LINEAR || type->type_class == RCU_JA_POOL);
+
+ nr_child_ptr = &node->u.data[0];
+ nr_child = *nr_child_ptr;
+ assert(nr_child <= type->max_linear_child);
+
+ if (type->type_class == RCU_JA_LINEAR) {
+ assert(!shadow_node->fallback_removal_count);
+ if (shadow_node->nr_child <= type->min_child) {
+ /* We need to try recompacting the node */
+ return -EFBIG;
+ }
+ }
+ dbg_printf("linear clear ptr: nr_child_ptr %p\n", nr_child_ptr);
+ assert(*node_flag_ptr != NULL);
+ rcu_assign_pointer(*node_flag_ptr, NULL);
+ /*
+ * Value and nr_child are never changed (would cause ABA issue).
+ * Instead, we leave the pointer to NULL and recompact the node
+ * once in a while. It is allowed to set a NULL pointer to a new
+ * value without recompaction though.
+ * Only update the shadow node accounting.
+ */
+ shadow_node->nr_child--;
+ dbg_printf("linear clear ptr: %u child, shadow: %u child, for node %p shadow %p\n",
+ (unsigned int) CMM_LOAD_SHARED(*nr_child_ptr),
+ (unsigned int) shadow_node->nr_child,
+ node, shadow_node);
+ return 0;
+}
+
+static
+int ja_pool_node_clear_ptr(const struct cds_ja_type *type,
+ struct cds_ja_inode *node,
+ struct cds_ja_inode_flag *node_flag,
+ struct cds_ja_shadow_node *shadow_node,
+ struct cds_ja_inode_flag **node_flag_ptr,
+ uint8_t n)
+{
+ struct cds_ja_inode *linear;
+
+ assert(type->type_class == RCU_JA_POOL);
+
+ if (shadow_node->fallback_removal_count) {
+ shadow_node->fallback_removal_count--;
+ } else {
+ /* We should try recompacting the node */
+ if (shadow_node->nr_child <= type->min_child)
+ return -EFBIG;
+ }
+
+ switch (type->nr_pool_order) {
+ case 1:
+ {
+ unsigned long bitsel, index;
+
+ bitsel = ja_node_pool_1d_bitsel(node_flag);
+ assert(bitsel < CHAR_BIT);
+ index = ((unsigned long) n >> bitsel) & type->nr_pool_order;
+ linear = (struct cds_ja_inode *) &node->u.data[index << type->pool_size_order];
+ break;
+ }
+ case 2:
+ {
+ unsigned long bitsel[2], index[2], rindex;
+
+ ja_node_pool_2d_bitsel(node_flag, bitsel);
+ assert(bitsel[0] < CHAR_BIT);
+ assert(bitsel[1] < CHAR_BIT);
+ index[0] = ((unsigned long) n >> bitsel[0]) & 0x1;
+ index[0] <<= 1;
+ index[1] = ((unsigned long) n >> bitsel[1]) & 0x1;
+ rindex = index[0] | index[1];
+ linear = (struct cds_ja_inode *) &node->u.data[rindex << type->pool_size_order];
+ break;
+ }
+ default:
+ linear = NULL;
+ assert(0);
+ }
+
+ return ja_linear_node_clear_ptr(type, linear, shadow_node, node_flag_ptr);
+}
+
+static
+int ja_pigeon_node_clear_ptr(const struct cds_ja_type *type,
+ struct cds_ja_inode *node,
+ struct cds_ja_shadow_node *shadow_node,
+ struct cds_ja_inode_flag **node_flag_ptr)
+{
+ assert(type->type_class == RCU_JA_PIGEON);
+
+ if (shadow_node->fallback_removal_count) {
+ shadow_node->fallback_removal_count--;
+ } else {
+ /* We should try recompacting the node */
+ if (shadow_node->nr_child <= type->min_child)
+ return -EFBIG;
+ }
+ dbg_printf("ja_pigeon_node_clear_ptr: clearing ptr: %p\n", *node_flag_ptr);
+ rcu_assign_pointer(*node_flag_ptr, NULL);
+ shadow_node->nr_child--;
+ return 0;
+}
+
+/*
+ * _ja_node_clear_ptr: clear ptr item within a node. Return an error
+ * (negative error value) if it is not found (-ENOENT).
+ */
+static
+int _ja_node_clear_ptr(const struct cds_ja_type *type,
+ struct cds_ja_inode *node,
+ struct cds_ja_inode_flag *node_flag,
+ struct cds_ja_shadow_node *shadow_node,
+ struct cds_ja_inode_flag **node_flag_ptr,
+ uint8_t n)
+{
+ switch (type->type_class) {
+ case RCU_JA_LINEAR:
+ return ja_linear_node_clear_ptr(type, node, shadow_node, node_flag_ptr);
+ case RCU_JA_POOL:
+ return ja_pool_node_clear_ptr(type, node, node_flag, shadow_node, node_flag_ptr, n);
+ case RCU_JA_PIGEON:
+ return ja_pigeon_node_clear_ptr(type, node, shadow_node, node_flag_ptr);
+ case RCU_JA_NULL:
+ return -ENOENT;
+ default:
+ assert(0);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+/*
+ * Calculate bit distribution. Returns the bit (0 to 7) that splits the
+ * distribution in two sub-distributions containing as much elements one
+ * compared to the other.
+ */
+static
+unsigned int ja_node_sum_distribution_1d(enum ja_recompact mode,
+ struct cds_ja *ja,
+ unsigned int type_index,
+ const struct cds_ja_type *type,
+ struct cds_ja_inode *node,
+ struct cds_ja_shadow_node *shadow_node,
+ uint8_t n,
+ struct cds_ja_inode_flag *child_node_flag,
+ struct cds_ja_inode_flag **nullify_node_flag_ptr)
+{
+ uint8_t nr_one[JA_BITS_PER_BYTE];
+ unsigned int bitsel = 0, bit_i, overall_best_distance = UINT_MAX;
+ unsigned int distrib_nr_child = 0;
+
+ memset(nr_one, 0, sizeof(nr_one));
+
+ switch (type->type_class) {
+ case RCU_JA_LINEAR:
+ {
+ uint8_t nr_child =
+ ja_linear_node_get_nr_child(type, node);
+ unsigned int i;
+
+ for (i = 0; i < nr_child; i++) {
+ struct cds_ja_inode_flag *iter;
+ uint8_t v;
+
+ ja_linear_node_get_ith_pos(type, node, i, &v, &iter);
+ if (!iter)
+ continue;
+ if (mode == JA_RECOMPACT_DEL && *nullify_node_flag_ptr == iter)
+ continue;
+ for (bit_i = 0; bit_i < JA_BITS_PER_BYTE; bit_i++) {
+ if (v & (1U << bit_i))
+ nr_one[bit_i]++;
+ }
+ distrib_nr_child++;
+ }
+ break;
+ }
+ case RCU_JA_POOL:
+ {
+ unsigned int pool_nr;
+
+ for (pool_nr = 0; pool_nr < (1U << type->nr_pool_order); pool_nr++) {
+ struct cds_ja_inode *pool =
+ ja_pool_node_get_ith_pool(type,
+ node, pool_nr);
+ uint8_t nr_child =
+ ja_linear_node_get_nr_child(type, pool);
+ unsigned int j;
+
+ for (j = 0; j < nr_child; j++) {
+ struct cds_ja_inode_flag *iter;
+ uint8_t v;
+
+ ja_linear_node_get_ith_pos(type, pool,
+ j, &v, &iter);
+ if (!iter)
+ continue;
+ if (mode == JA_RECOMPACT_DEL && *nullify_node_flag_ptr == iter)
+ continue;
+ for (bit_i = 0; bit_i < JA_BITS_PER_BYTE; bit_i++) {
+ if (v & (1U << bit_i))
+ nr_one[bit_i]++;
+ }
+ distrib_nr_child++;
+ }
+ }
+ break;
+ }
+ case RCU_JA_PIGEON:
+ {
+ unsigned int i;
+
+ assert(mode == JA_RECOMPACT_DEL);
+ for (i = 0; i < JA_ENTRY_PER_NODE; i++) {
+ struct cds_ja_inode_flag *iter;
+
+ iter = ja_pigeon_node_get_ith_pos(type, node, i);
+ if (!iter)
+ continue;
+ if (mode == JA_RECOMPACT_DEL && *nullify_node_flag_ptr == iter)
+ continue;
+ for (bit_i = 0; bit_i < JA_BITS_PER_BYTE; bit_i++) {
+ if (i & (1U << bit_i))
+ nr_one[bit_i]++;
+ }
+ distrib_nr_child++;
+ }
+ break;
+ }
+ case RCU_JA_NULL:
+ assert(mode == JA_RECOMPACT_ADD_NEXT);
+ break;
+ default:
+ assert(0);
+ break;
+ }
+
+ if (mode == JA_RECOMPACT_ADD_NEXT || mode == JA_RECOMPACT_ADD_SAME) {
+ for (bit_i = 0; bit_i < JA_BITS_PER_BYTE; bit_i++) {
+ if (n & (1U << bit_i))
+ nr_one[bit_i]++;
+ }
+ distrib_nr_child++;
+ }
+
+ /*
+ * The best bit selector is that for which the number of ones is
+ * closest to half of the number of children in the
+ * distribution. We calculate the distance using the double of
+ * the sub-distribution sizes to eliminate truncation error.
+ */
+ for (bit_i = 0; bit_i < JA_BITS_PER_BYTE; bit_i++) {
+ unsigned int distance_to_best;
+
+ distance_to_best = abs_int(((unsigned int) nr_one[bit_i] << 1U) - distrib_nr_child);
+ if (distance_to_best < overall_best_distance) {
+ overall_best_distance = distance_to_best;
+ bitsel = bit_i;
+ }
+ }
+ dbg_printf("1 dimension pool bit selection: (%u)\n", bitsel);
+ return bitsel;
+}
+
+/*
+ * Calculate bit distribution in two dimensions. Returns the two bits
+ * (each 0 to 7) that splits the distribution in four sub-distributions
+ * containing as much elements one compared to the other.
+ */
+static
+void ja_node_sum_distribution_2d(enum ja_recompact mode,
+ struct cds_ja *ja,
+ unsigned int type_index,
+ const struct cds_ja_type *type,
+ struct cds_ja_inode *node,
+ struct cds_ja_shadow_node *shadow_node,
+ uint8_t n,
+ struct cds_ja_inode_flag *child_node_flag,
+ struct cds_ja_inode_flag **nullify_node_flag_ptr,
+ unsigned int *_bitsel)
+{
+ uint8_t nr_2d_11[JA_BITS_PER_BYTE][JA_BITS_PER_BYTE],
+ nr_2d_10[JA_BITS_PER_BYTE][JA_BITS_PER_BYTE],
+ nr_2d_01[JA_BITS_PER_BYTE][JA_BITS_PER_BYTE],
+ nr_2d_00[JA_BITS_PER_BYTE][JA_BITS_PER_BYTE];
+ unsigned int bitsel[2] = { 0, 1 };
+ unsigned int bit_i, bit_j;
+ int overall_best_distance = INT_MAX;
+ unsigned int distrib_nr_child = 0;
+
+ memset(nr_2d_11, 0, sizeof(nr_2d_11));
+ memset(nr_2d_10, 0, sizeof(nr_2d_10));
+ memset(nr_2d_01, 0, sizeof(nr_2d_01));
+ memset(nr_2d_00, 0, sizeof(nr_2d_00));
+
+ switch (type->type_class) {
+ case RCU_JA_LINEAR:
+ {
+ uint8_t nr_child =
+ ja_linear_node_get_nr_child(type, node);
+ unsigned int i;
+
+ for (i = 0; i < nr_child; i++) {
+ struct cds_ja_inode_flag *iter;
+ uint8_t v;
+
+ ja_linear_node_get_ith_pos(type, node, i, &v, &iter);
+ if (!iter)
+ continue;
+ if (mode == JA_RECOMPACT_DEL && *nullify_node_flag_ptr == iter)
+ continue;
+ for (bit_i = 0; bit_i < JA_BITS_PER_BYTE; bit_i++) {
+ for (bit_j = 0; bit_j < bit_i; bit_j++) {
+ if (v & (1U << bit_i)) {
+ if (v & (1U << bit_j)) {
+ nr_2d_11[bit_i][bit_j]++;
+ } else {
+ nr_2d_10[bit_i][bit_j]++;
+ }
+ } else {
+ if (v & (1U << bit_j)) {
+ nr_2d_01[bit_i][bit_j]++;
+ } else {
+ nr_2d_00[bit_i][bit_j]++;
+ }
+ }
+ }
+ }
+ distrib_nr_child++;
+ }
+ break;
+ }
+ case RCU_JA_POOL:
+ {
+ unsigned int pool_nr;
+
+ for (pool_nr = 0; pool_nr < (1U << type->nr_pool_order); pool_nr++) {
+ struct cds_ja_inode *pool =
+ ja_pool_node_get_ith_pool(type,
+ node, pool_nr);
+ uint8_t nr_child =
+ ja_linear_node_get_nr_child(type, pool);
+ unsigned int j;
+
+ for (j = 0; j < nr_child; j++) {
+ struct cds_ja_inode_flag *iter;
+ uint8_t v;
+
+ ja_linear_node_get_ith_pos(type, pool,
+ j, &v, &iter);
+ if (!iter)
+ continue;
+ if (mode == JA_RECOMPACT_DEL && *nullify_node_flag_ptr == iter)
+ continue;
+ for (bit_i = 0; bit_i < JA_BITS_PER_BYTE; bit_i++) {
+ for (bit_j = 0; bit_j < bit_i; bit_j++) {
+ if (v & (1U << bit_i)) {
+ if (v & (1U << bit_j)) {
+ nr_2d_11[bit_i][bit_j]++;
+ } else {
+ nr_2d_10[bit_i][bit_j]++;
+ }
+ } else {
+ if (v & (1U << bit_j)) {
+ nr_2d_01[bit_i][bit_j]++;
+ } else {
+ nr_2d_00[bit_i][bit_j]++;
+ }
+ }
+ }
+ }
+ distrib_nr_child++;
+ }
+ }
+ break;
+ }
+ case RCU_JA_PIGEON:
+ {
+ unsigned int i;
+
+ assert(mode == JA_RECOMPACT_DEL);
+ for (i = 0; i < JA_ENTRY_PER_NODE; i++) {
+ struct cds_ja_inode_flag *iter;
+
+ iter = ja_pigeon_node_get_ith_pos(type, node, i);
+ if (!iter)
+ continue;
+ if (mode == JA_RECOMPACT_DEL && *nullify_node_flag_ptr == iter)
+ continue;
+ for (bit_i = 0; bit_i < JA_BITS_PER_BYTE; bit_i++) {
+ for (bit_j = 0; bit_j < bit_i; bit_j++) {
+ if (i & (1U << bit_i)) {
+ if (i & (1U << bit_j)) {
+ nr_2d_11[bit_i][bit_j]++;
+ } else {
+ nr_2d_10[bit_i][bit_j]++;
+ }
+ } else {
+ if (i & (1U << bit_j)) {
+ nr_2d_01[bit_i][bit_j]++;
+ } else {
+ nr_2d_00[bit_i][bit_j]++;
+ }
+ }
+ }
+ }
+ distrib_nr_child++;
+ }
+ break;
+ }
+ case RCU_JA_NULL:
+ assert(mode == JA_RECOMPACT_ADD_NEXT);
+ break;
+ default:
+ assert(0);
+ break;
+ }
+
+ if (mode == JA_RECOMPACT_ADD_NEXT || mode == JA_RECOMPACT_ADD_SAME) {
+ for (bit_i = 0; bit_i < JA_BITS_PER_BYTE; bit_i++) {
+ for (bit_j = 0; bit_j < bit_i; bit_j++) {
+ if (n & (1U << bit_i)) {
+ if (n & (1U << bit_j)) {
+ nr_2d_11[bit_i][bit_j]++;
+ } else {
+ nr_2d_10[bit_i][bit_j]++;
+ }
+ } else {
+ if (n & (1U << bit_j)) {
+ nr_2d_01[bit_i][bit_j]++;
+ } else {
+ nr_2d_00[bit_i][bit_j]++;
+ }
+ }
+ }
+ }
+ distrib_nr_child++;
+ }
+
+ /*
+ * The best bit selector is that for which the number of nodes
+ * in each sub-class is closest to one-fourth of the number of
+ * children in the distribution. We calculate the distance using
+ * 4 times the size of the sub-distribution to eliminate
+ * truncation error.
+ */
+ for (bit_i = 0; bit_i < JA_BITS_PER_BYTE; bit_i++) {
+ for (bit_j = 0; bit_j < bit_i; bit_j++) {
+ int distance_to_best[4];
+
+ distance_to_best[0] = ((unsigned int) nr_2d_11[bit_i][bit_j] << 2U) - distrib_nr_child;
+ distance_to_best[1] = ((unsigned int) nr_2d_10[bit_i][bit_j] << 2U) - distrib_nr_child;
+ distance_to_best[2] = ((unsigned int) nr_2d_01[bit_i][bit_j] << 2U) - distrib_nr_child;
+ distance_to_best[3] = ((unsigned int) nr_2d_00[bit_i][bit_j] << 2U) - distrib_nr_child;
+
+ /* Consider worse distance above best */
+ if (distance_to_best[1] > 0 && distance_to_best[1] > distance_to_best[0])
+ distance_to_best[0] = distance_to_best[1];
+ if (distance_to_best[2] > 0 && distance_to_best[2] > distance_to_best[0])
+ distance_to_best[0] = distance_to_best[2];
+ if (distance_to_best[3] > 0 && distance_to_best[3] > distance_to_best[0])
+ distance_to_best[0] = distance_to_best[3];
+
+ /*
+ * If our worse distance is better than overall,
+ * we become new best candidate.
+ */
+ if (distance_to_best[0] < overall_best_distance) {
+ overall_best_distance = distance_to_best[0];
+ bitsel[0] = bit_i;
+ bitsel[1] = bit_j;
+ }
+ }
+ }
+
+ dbg_printf("2 dimensions pool bit selection: (%u,%u)\n", bitsel[0], bitsel[1]);
+
+ /* Return our bit selection */
+ _bitsel[0] = bitsel[0];
+ _bitsel[1] = bitsel[1];
+}
+
+static
+unsigned int find_nearest_type_index(unsigned int type_index,
+ unsigned int nr_nodes)
+{
+ const struct cds_ja_type *type;
+
+ assert(type_index != NODE_INDEX_NULL);
+ if (nr_nodes == 0)
+ return NODE_INDEX_NULL;
+ for (;;) {
+ type = &ja_types[type_index];
+ if (nr_nodes < type->min_child)
+ type_index--;
+ else if (nr_nodes > type->max_child)
+ type_index++;
+ else
+ break;
+ }
+ return type_index;
+}
+
+/*
+ * ja_node_recompact_add: recompact a node, adding a new child.
+ * Return 0 on success, -EAGAIN if need to retry, or other negative
+ * error value otherwise.
+ */
+static
+int ja_node_recompact(enum ja_recompact mode,
+ struct cds_ja *ja,
+ unsigned int old_type_index,
+ const struct cds_ja_type *old_type,
+ struct cds_ja_inode *old_node,
+ struct cds_ja_shadow_node *shadow_node,
+ struct cds_ja_inode_flag **old_node_flag_ptr, uint8_t n,
+ struct cds_ja_inode_flag *child_node_flag,
+ struct cds_ja_inode_flag **nullify_node_flag_ptr,
+ int level)
+{
+ unsigned int new_type_index;
+ struct cds_ja_inode *new_node;
+ struct cds_ja_shadow_node *new_shadow_node = NULL;
+ const struct cds_ja_type *new_type;
+ struct cds_ja_inode_flag *new_node_flag, *old_node_flag;
+ int ret;
+ int fallback = 0;
+
+ old_node_flag = *old_node_flag_ptr;
+
+ /*
+ * Need to find nearest type index even for ADD_SAME, because
+ * this recompaction, when applied to linear nodes, will garbage
+ * collect dummy (NULL) entries, and can therefore cause a few
+ * linear representations to be skipped.
+ */
+ switch (mode) {
+ case JA_RECOMPACT_ADD_SAME:
+ new_type_index = find_nearest_type_index(old_type_index,
+ shadow_node->nr_child + 1);
+ dbg_printf("Recompact for node with %u children\n",
+ shadow_node->nr_child + 1);
+ break;
+ case JA_RECOMPACT_ADD_NEXT:
+ if (!shadow_node || old_type_index == NODE_INDEX_NULL) {
+ new_type_index = 0;
+ dbg_printf("Recompact for NULL\n");
+ } else {
+ new_type_index = find_nearest_type_index(old_type_index,
+ shadow_node->nr_child + 1);
+ dbg_printf("Recompact for node with %u children\n",
+ shadow_node->nr_child + 1);
+ }
+ break;
+ case JA_RECOMPACT_DEL:
+ new_type_index = find_nearest_type_index(old_type_index,
+ shadow_node->nr_child - 1);
+ dbg_printf("Recompact for node with %u children\n",
+ shadow_node->nr_child - 1);
+ break;
+ default:
+ assert(0);
+ }
+
+retry: /* for fallback */
+ dbg_printf("Recompact from type %d to type %d\n",
+ old_type_index, new_type_index);
+ new_type = &ja_types[new_type_index];
+ if (new_type_index != NODE_INDEX_NULL) {
+ new_node = alloc_cds_ja_node(ja, new_type);
+ if (!new_node)
+ return -ENOMEM;
+
+ if (new_type->type_class == RCU_JA_POOL) {
+ switch (new_type->nr_pool_order) {
+ case 1:
+ {
+ unsigned int node_distrib_bitsel;
+
+ node_distrib_bitsel =
+ ja_node_sum_distribution_1d(mode, ja,
+ old_type_index, old_type,
+ old_node, shadow_node,
+ n, child_node_flag,
+ nullify_node_flag_ptr);
+ assert(!((unsigned long) new_node & JA_POOL_1D_MASK));
+ new_node_flag = ja_node_flag_pool_1d(new_node,
+ new_type_index, node_distrib_bitsel);
+ break;
+ }
+ case 2:
+ {
+ unsigned int node_distrib_bitsel[2];
+
+ ja_node_sum_distribution_2d(mode, ja,
+ old_type_index, old_type,
+ old_node, shadow_node,
+ n, child_node_flag,
+ nullify_node_flag_ptr,
+ node_distrib_bitsel);
+ assert(!((unsigned long) new_node & JA_POOL_1D_MASK));
+ assert(!((unsigned long) new_node & JA_POOL_2D_MASK));
+ new_node_flag = ja_node_flag_pool_2d(new_node,
+ new_type_index, node_distrib_bitsel);
+ break;
+ }
+ default:
+ assert(0);
+ }
+ } else {
+ new_node_flag = ja_node_flag(new_node, new_type_index);
+ }
+
+ dbg_printf("Recompact inherit lock from %p\n", shadow_node);
+ new_shadow_node = rcuja_shadow_set(ja->ht, new_node_flag, shadow_node, ja, level);
+ if (!new_shadow_node) {
+ free_cds_ja_node(ja, new_node);
+ return -ENOMEM;
+ }
+ if (fallback)
+ new_shadow_node->fallback_removal_count =
+ JA_FALLBACK_REMOVAL_COUNT;
+ } else {
+ new_node = NULL;
+ new_node_flag = NULL;
+ }
+
+ assert(mode != JA_RECOMPACT_ADD_NEXT || old_type->type_class != RCU_JA_PIGEON);
+
+ if (new_type_index == NODE_INDEX_NULL)
+ goto skip_copy;
+
+ switch (old_type->type_class) {
+ case RCU_JA_LINEAR:
+ {
+ uint8_t nr_child =
+ ja_linear_node_get_nr_child(old_type, old_node);
+ unsigned int i;
+
+ for (i = 0; i < nr_child; i++) {
+ struct cds_ja_inode_flag *iter;
+ uint8_t v;
+
+ ja_linear_node_get_ith_pos(old_type, old_node, i, &v, &iter);
+ if (!iter)
+ continue;
+ if (mode == JA_RECOMPACT_DEL && *nullify_node_flag_ptr == iter)
+ continue;
+ ret = _ja_node_set_nth(new_type, new_node, new_node_flag,
+ new_shadow_node,
+ v, iter);
+ if (new_type->type_class == RCU_JA_POOL && ret) {
+ goto fallback_toosmall;
+ }
+ assert(!ret);
+ }
+ break;
+ }
+ case RCU_JA_POOL:
+ {
+ unsigned int pool_nr;
+
+ for (pool_nr = 0; pool_nr < (1U << old_type->nr_pool_order); pool_nr++) {
+ struct cds_ja_inode *pool =
+ ja_pool_node_get_ith_pool(old_type,
+ old_node, pool_nr);
+ uint8_t nr_child =
+ ja_linear_node_get_nr_child(old_type, pool);
+ unsigned int j;
+
+ for (j = 0; j < nr_child; j++) {
+ struct cds_ja_inode_flag *iter;
+ uint8_t v;
+
+ ja_linear_node_get_ith_pos(old_type, pool,
+ j, &v, &iter);
+ if (!iter)
+ continue;
+ if (mode == JA_RECOMPACT_DEL && *nullify_node_flag_ptr == iter)
+ continue;
+ ret = _ja_node_set_nth(new_type, new_node, new_node_flag,
+ new_shadow_node,
+ v, iter);
+ if (new_type->type_class == RCU_JA_POOL
+ && ret) {
+ goto fallback_toosmall;
+ }
+ assert(!ret);
+ }
+ }
+ break;
+ }
+ case RCU_JA_NULL:
+ assert(mode == JA_RECOMPACT_ADD_NEXT);
+ break;
+ case RCU_JA_PIGEON:
+ {
+ unsigned int i;
+
+ assert(mode == JA_RECOMPACT_DEL);
+ for (i = 0; i < JA_ENTRY_PER_NODE; i++) {
+ struct cds_ja_inode_flag *iter;
+
+ iter = ja_pigeon_node_get_ith_pos(old_type, old_node, i);
+ if (!iter)
+ continue;
+ if (mode == JA_RECOMPACT_DEL && *nullify_node_flag_ptr == iter)
+ continue;
+ ret = _ja_node_set_nth(new_type, new_node, new_node_flag,
+ new_shadow_node,
+ i, iter);
+ if (new_type->type_class == RCU_JA_POOL && ret) {
+ goto fallback_toosmall;
+ }
+ assert(!ret);
+ }
+ break;
+ }
+ default:
+ assert(0);
+ ret = -EINVAL;
+ goto end;
+ }
+skip_copy:
+
+ if (mode == JA_RECOMPACT_ADD_NEXT || mode == JA_RECOMPACT_ADD_SAME) {
+ /* add node */
+ ret = _ja_node_set_nth(new_type, new_node, new_node_flag,
+ new_shadow_node,
+ n, child_node_flag);
+ if (new_type->type_class == RCU_JA_POOL && ret) {
+ goto fallback_toosmall;
+ }
+ assert(!ret);
+ }
+
+ if (fallback) {
+ dbg_printf("Using fallback for %u children, node type index: %u, mode %s\n",
+ new_shadow_node->nr_child, old_type_index, mode == JA_RECOMPACT_ADD_NEXT ? "add_next" :
+ (mode == JA_RECOMPACT_DEL ? "del" : "add_same"));
+ if (ja_debug_counters())
+ uatomic_inc(&ja->node_fallback_count_distribution[new_shadow_node->nr_child]);
+ }
+
+ /* Return pointer to new recompacted node through old_node_flag_ptr */
+ *old_node_flag_ptr = new_node_flag;
+ if (old_node) {
+ int flags;
+
+ flags = RCUJA_SHADOW_CLEAR_FREE_NODE;
+ /*
+ * It is OK to free the lock associated with a node
+ * going to NULL, since we are holding the parent lock.
+ * This synchronizes removal with re-add of that node.
+ */
+ if (new_type_index == NODE_INDEX_NULL)
+ flags |= RCUJA_SHADOW_CLEAR_FREE_LOCK;
+ ret = rcuja_shadow_clear(ja->ht, old_node_flag, shadow_node,
+ flags);
+ assert(!ret);
+ }
+
+ ret = 0;
+end:
+ return ret;
+
+fallback_toosmall:
+ /* fallback if next pool is too small */
+ assert(new_shadow_node);
+ ret = rcuja_shadow_clear(ja->ht, new_node_flag, new_shadow_node,
+ RCUJA_SHADOW_CLEAR_FREE_NODE);
+ assert(!ret);
+
+ switch (mode) {
+ case JA_RECOMPACT_ADD_SAME:
+ /*
+ * JA_RECOMPACT_ADD_SAME is only triggered if a linear
+ * node within a pool has unused entries. It should
+ * therefore _never_ be too small.
+ */
+ assert(0);
+
+ /* Fall-through */
+ case JA_RECOMPACT_ADD_NEXT:
+ {
+ const struct cds_ja_type *next_type;
+
+ /*
+ * Recompaction attempt on add failed. Should only
+ * happen if target node type is pool. Caused by
+ * hard-to-split distribution. Recompact using the next
+ * distribution size.
+ */
+ assert(new_type->type_class == RCU_JA_POOL);
+ next_type = &ja_types[new_type_index + 1];
+ /*
+ * Try going to the next pool size if our population
+ * fits within its range. This is not flagged as a
+ * fallback.
+ */
+ if (shadow_node->nr_child + 1 >= next_type->min_child
+ && shadow_node->nr_child + 1 <= next_type->max_child) {
+ new_type_index++;
+ goto retry;
+ } else {
+ new_type_index++;
+ dbg_printf("Add fallback to type %d\n", new_type_index);
+ if (ja_debug_counters())
+ uatomic_inc(&ja->nr_fallback);
+ fallback = 1;
+ goto retry;
+ }
+ break;
+ }
+ case JA_RECOMPACT_DEL:
+ /*
+ * Recompaction attempt on delete failed. Should only
+ * happen if target node type is pool. This is caused by
+ * a hard-to-split distribution. Recompact on same node
+ * size, but flag current node as "fallback" to ensure
+ * we don't attempt recompaction before some activity
+ * has reshuffled our node.
+ */
+ assert(new_type->type_class == RCU_JA_POOL);
+ new_type_index = old_type_index;
+ dbg_printf("Delete fallback keeping type %d\n", new_type_index);
+ uatomic_inc(&ja->nr_fallback);
+ fallback = 1;
+ goto retry;
+ default:
+ assert(0);
+ return -EINVAL;
+ }
+
+ /*
+ * Last resort fallback: pigeon.
+ */
+ new_type_index = (1UL << JA_TYPE_BITS) - 1;
+ dbg_printf("Fallback to type %d\n", new_type_index);
+ uatomic_inc(&ja->nr_fallback);
+ fallback = 1;
+ goto retry;
+}
+
+/*
+ * Return 0 on success, -EAGAIN if need to retry, or other negative
+ * error value otherwise.
+ */
+static
+int ja_node_set_nth(struct cds_ja *ja,
+ struct cds_ja_inode_flag **node_flag, uint8_t n,
+ struct cds_ja_inode_flag *child_node_flag,
+ struct cds_ja_shadow_node *shadow_node,
+ int level)
+{
+ int ret;
+ unsigned int type_index;
+ const struct cds_ja_type *type;
+ struct cds_ja_inode *node;
+
+ dbg_printf("ja_node_set_nth for n=%u, node %p, shadow %p\n",
+ (unsigned int) n, ja_node_ptr(*node_flag), shadow_node);
+
+ node = ja_node_ptr(*node_flag);
+ type_index = ja_node_type(*node_flag);
+ type = &ja_types[type_index];
+ ret = _ja_node_set_nth(type, node, *node_flag, shadow_node,
+ n, child_node_flag);
+ switch (ret) {
+ case -ENOSPC:
+ /* Not enough space in node, need to recompact to next type. */
+ ret = ja_node_recompact(JA_RECOMPACT_ADD_NEXT, ja, type_index, type, node,
+ shadow_node, node_flag, n, child_node_flag, NULL, level);
+ break;
+ case -ERANGE:
+ /* Node needs to be recompacted. */
+ ret = ja_node_recompact(JA_RECOMPACT_ADD_SAME, ja, type_index, type, node,
+ shadow_node, node_flag, n, child_node_flag, NULL, level);
+ break;
+ }
+ return ret;
+}
+
+/*
+ * Return 0 on success, -EAGAIN if need to retry, or other negative
+ * error value otherwise.
+ */
+static
+int ja_node_clear_ptr(struct cds_ja *ja,
+ struct cds_ja_inode_flag **node_flag_ptr, /* Pointer to location to nullify */
+ struct cds_ja_inode_flag **parent_node_flag_ptr, /* Address of parent ptr in its parent */
+ struct cds_ja_shadow_node *shadow_node, /* of parent */
+ uint8_t n, int level)
+{
+ int ret;
+ unsigned int type_index;
+ const struct cds_ja_type *type;
+ struct cds_ja_inode *node;
+
+ dbg_printf("ja_node_clear_ptr for node %p, shadow %p, target ptr %p\n",
+ ja_node_ptr(*parent_node_flag_ptr), shadow_node, node_flag_ptr);
+
+ node = ja_node_ptr(*parent_node_flag_ptr);
+ type_index = ja_node_type(*parent_node_flag_ptr);
+ type = &ja_types[type_index];
+ ret = _ja_node_clear_ptr(type, node, *parent_node_flag_ptr, shadow_node, node_flag_ptr, n);
+ if (ret == -EFBIG) {
+ /* Should try recompaction. */
+ ret = ja_node_recompact(JA_RECOMPACT_DEL, ja, type_index, type, node,
+ shadow_node, parent_node_flag_ptr, n, NULL,
+ node_flag_ptr, level);
+ }
+ return ret;
+}
+
+struct cds_ja_node *cds_ja_lookup(struct cds_ja *ja, uint64_t key)
+{
+ unsigned int tree_depth, i;
+ struct cds_ja_inode_flag *node_flag;
+
+ if (caa_unlikely(key > ja->key_max || key == UINT64_MAX))
+ return NULL;
+ tree_depth = ja->tree_depth;
+ node_flag = rcu_dereference(ja->root);
+
+ /* level 0: root node */
+ if (!ja_node_ptr(node_flag))
+ return NULL;
+
+ for (i = 1; i < tree_depth; i++) {
+ uint8_t iter_key;
+
+ iter_key = (uint8_t) (key >> (JA_BITS_PER_BYTE * (tree_depth - i - 1)));
+ node_flag = ja_node_get_nth(node_flag, NULL, iter_key);
+ dbg_printf("cds_ja_lookup iter key lookup %u finds node_flag %p\n",
+ (unsigned int) iter_key, node_flag);
+ if (!ja_node_ptr(node_flag))
+ return NULL;
+ }
+
+ /* Last level lookup succeded. We got an actual match. */
+ return (struct cds_ja_node *) node_flag;
+}
+
+static
+struct cds_ja_node *cds_ja_lookup_inequality(struct cds_ja *ja, uint64_t key,
+ uint64_t *result_key, enum ja_lookup_inequality mode)
+{
+ int tree_depth, level;
+ struct cds_ja_inode_flag *node_flag, *cur_node_depth[JA_MAX_DEPTH];
+ uint8_t cur_key[JA_MAX_DEPTH];
+ uint64_t _result_key = 0;
+ enum ja_direction dir;
+
+ switch (mode) {
+ case JA_LOOKUP_BE:
+ case JA_LOOKUP_AE:
+ if (caa_unlikely(key > ja->key_max || key == UINT64_MAX))
+ return NULL;
+ break;
+ default:
+ return NULL;
+ }
+
+ memset(cur_node_depth, 0, sizeof(cur_node_depth));
+ memset(cur_key, 0, sizeof(cur_key));
+ tree_depth = ja->tree_depth;
+ node_flag = rcu_dereference(ja->root);
+ cur_node_depth[0] = node_flag;
+
+ /* level 0: root node */
+ if (!ja_node_ptr(node_flag))
+ return NULL;
+
+ for (level = 1; level < tree_depth; level++) {
+ uint8_t iter_key;
+
+ iter_key = (uint8_t) (key >> (JA_BITS_PER_BYTE * (tree_depth - level - 1)));
+ node_flag = ja_node_get_nth(node_flag, NULL, iter_key);
+ if (!ja_node_ptr(node_flag))
+ break;
+ cur_key[level - 1] = iter_key;
+ cur_node_depth[level] = node_flag;
+ dbg_printf("cds_ja_lookup_inequality iter key lookup %u finds node_flag %p\n",
+ (unsigned int) iter_key, node_flag);
+ }
+
+ if (level == tree_depth) {
+ /* Last level lookup succeded. We got an equal match. */
+ if (result_key)
+ *result_key = key;
+ return (struct cds_ja_node *) node_flag;
+ }
+
+ /*
+ * Find highest value left/right of current node.
+ * Current node is cur_node_depth[level].
+ * Start at current level. If we cannot find any key left/right
+ * of ours, go one level up, seek highest value left/right of
+ * current (recursively), and when we find one, get the
+ * rightmost/leftmost child of its rightmost/leftmost child
+ * (recursively).
+ */
+ switch (mode) {
+ case JA_LOOKUP_BE:
+ dir = JA_LEFT;
+ break;
+ case JA_LOOKUP_AE:
+ dir = JA_RIGHT;
+ break;
+ default:
+ assert(0);
+ }
+ for (; level > 0; level--) {
+ uint8_t iter_key;
+
+ iter_key = (uint8_t) (key >> (JA_BITS_PER_BYTE * (tree_depth - level - 1)));
+ node_flag = ja_node_get_leftright(cur_node_depth[level - 1],
+ iter_key, &cur_key[level - 1], dir);
+ dbg_printf("cds_ja_lookup_inequality find sibling from %u at %u finds node_flag %p\n",
+ (unsigned int) iter_key, (unsigned int) cur_key[level - 1],
+ node_flag);
+ /* If found left/right sibling, find rightmost/leftmost child. */
+ if (ja_node_ptr(node_flag))
+ break;
+ }
+
+ if (!level) {
+ /* Reached the root and could not find a left/right sibling. */
+ return NULL;
+ }
+
+ level++;
+
+ /*
+ * From this point, we are guaranteed to be able to find a
+ * "below than"/"above than" match. ja_attach_node() and
+ * ja_detach_node() both guarantee that it is not possible for a
+ * lookup to reach a dead-end.
+ */
+
+ /*
+ * Find rightmost/leftmost child of rightmost/leftmost child
+ * (recursively).
+ */
+ switch (mode) {
+ case JA_LOOKUP_BE:
+ dir = JA_RIGHTMOST;
+ break;
+ case JA_LOOKUP_AE:
+ dir = JA_LEFTMOST;
+ break;
+ default:
+ assert(0);
+ }
+ for (; level < tree_depth; level++) {
+ node_flag = ja_node_get_minmax(node_flag, &cur_key[level - 1], dir);
+ dbg_printf("cds_ja_lookup_inequality find minmax at %u finds node_flag %p\n",
+ (unsigned int) cur_key[level - 1],
+ node_flag);
+ if (!ja_node_ptr(node_flag))
+ break;
+ }
+
+ assert(level == tree_depth);
+
+ if (result_key) {
+ for (level = 1; level < tree_depth; level++) {
+ _result_key |= ((uint64_t) cur_key[level - 1])
+ << (JA_BITS_PER_BYTE * (tree_depth - level - 1));
+ }
+ *result_key = _result_key;
+ }
+ return (struct cds_ja_node *) node_flag;
+}
+
+struct cds_ja_node *cds_ja_lookup_below_equal(struct cds_ja *ja,
+ uint64_t key, uint64_t *result_key)
+{
+ dbg_printf("cds_ja_lookup_below_equal key %" PRIu64 "\n", key);
+ return cds_ja_lookup_inequality(ja, key, result_key, JA_LOOKUP_BE);
+}
+
+struct cds_ja_node *cds_ja_lookup_above_equal(struct cds_ja *ja,
+ uint64_t key, uint64_t *result_key)
+{
+ dbg_printf("cds_ja_lookup_above_equal key %" PRIu64 "\n", key);
+ return cds_ja_lookup_inequality(ja, key, result_key, JA_LOOKUP_AE);
+}
+
+/*
+ * We reached an unpopulated node. Create it and the children we need,
+ * and then attach the entire branch to the current node. This may
+ * trigger recompaction of the current node. Locks needed: node lock
+ * (for add), and, possibly, parent node lock (to update pointer due to
+ * node recompaction).
+ *
+ * First take node lock, check if recompaction is needed, then take
+ * parent lock (if needed). Then we can proceed to create the new
+ * branch. Publish the new branch, and release locks.
+ * TODO: we currently always take the parent lock even when not needed.
+ *
+ * ja_attach_node() ensures that a lookup will _never_ see a branch that
+ * leads to a dead-end: before attaching a branch, the entire content of
+ * the new branch is populated, thus creating a cluster, before
+ * attaching the cluster to the rest of the tree, thus making it visible
+ * to lookups.
+ */
+static
+int ja_attach_node(struct cds_ja *ja,
+ struct cds_ja_inode_flag **attach_node_flag_ptr,
+ struct cds_ja_inode_flag *attach_node_flag,
+ struct cds_ja_inode_flag *parent_attach_node_flag,
+ struct cds_ja_inode_flag **old_node_flag_ptr,
+ struct cds_ja_inode_flag *old_node_flag,
+ uint64_t key,
+ unsigned int level,
+ struct cds_ja_node *child_node)
+{
+ struct cds_ja_shadow_node *shadow_node = NULL,
+ *parent_shadow_node = NULL;
+ struct cds_ja_inode_flag *iter_node_flag, *iter_dest_node_flag;
+ int ret, i;
+ struct cds_ja_inode_flag *created_nodes[JA_MAX_DEPTH];
+ int nr_created_nodes = 0;
+
+ dbg_printf("Attach node at level %u (old_node_flag %p, attach_node_flag_ptr %p attach_node_flag %p, parent_attach_node_flag %p)\n",
+ level, old_node_flag, attach_node_flag_ptr, attach_node_flag, parent_attach_node_flag);
+
+ assert(!old_node_flag);
+ if (attach_node_flag) {
+ shadow_node = rcuja_shadow_lookup_lock(ja->ht, attach_node_flag);
+ if (!shadow_node) {
+ ret = -EAGAIN;
+ goto end;
+ }
+ }
+ if (parent_attach_node_flag) {
+ parent_shadow_node = rcuja_shadow_lookup_lock(ja->ht,
+ parent_attach_node_flag);
+ if (!parent_shadow_node) {
+ ret = -EAGAIN;
+ goto unlock_shadow;
+ }
+ }
+
+ if (old_node_flag_ptr && ja_node_ptr(*old_node_flag_ptr)) {
+ /*
+ * Target node has been updated between RCU lookup and
+ * lock acquisition. We need to re-try lookup and
+ * attach.
+ */
+ ret = -EAGAIN;
+ goto unlock_parent;
+ }
+
+ /*
+ * Perform a lookup query to handle the case where
+ * old_node_flag_ptr is NULL. We cannot use it to check if the
+ * node has been populated between RCU lookup and mutex
+ * acquisition.
+ */
+ if (!old_node_flag_ptr) {
+ uint8_t iter_key;
+ struct cds_ja_inode_flag *lookup_node_flag;
+ struct cds_ja_inode_flag **lookup_node_flag_ptr;
+
+ iter_key = (uint8_t) (key >> (JA_BITS_PER_BYTE * (ja->tree_depth - level)));
+ lookup_node_flag = ja_node_get_nth(attach_node_flag,
+ &lookup_node_flag_ptr,
+ iter_key);
+ if (lookup_node_flag) {
+ ret = -EEXIST;
+ goto unlock_parent;
+ }
+ }
+
+ if (attach_node_flag_ptr && ja_node_ptr(*attach_node_flag_ptr) !=
+ ja_node_ptr(attach_node_flag)) {
+ /*
+ * Target node has been updated between RCU lookup and
+ * lock acquisition. We need to re-try lookup and
+ * attach.
+ */
+ ret = -EAGAIN;
+ goto unlock_parent;
+ }
+
+ /* Create new branch, starting from bottom */
+ iter_node_flag = (struct cds_ja_inode_flag *) child_node;
+
+ for (i = ja->tree_depth - 1; i >= (int) level; i--) {
+ uint8_t iter_key;
+
+ iter_key = (uint8_t) (key >> (JA_BITS_PER_BYTE * (ja->tree_depth - i - 1)));
+ dbg_printf("branch creation level %d, key %u\n",
+ i, (unsigned int) iter_key);
+ iter_dest_node_flag = NULL;
+ ret = ja_node_set_nth(ja, &iter_dest_node_flag,
+ iter_key,
+ iter_node_flag,
+ NULL, i);
+ if (ret) {
+ dbg_printf("branch creation error %d\n", ret);
+ goto check_error;
+ }
+ created_nodes[nr_created_nodes++] = iter_dest_node_flag;
+ iter_node_flag = iter_dest_node_flag;
+ }
+ assert(level > 0);
+
+ /* Publish branch */
+ if (level == 1) {
+ /*
+ * Attaching to root node.
+ */
+ rcu_assign_pointer(ja->root, iter_node_flag);
+ } else {
+ uint8_t iter_key;
+
+ iter_key = (uint8_t) (key >> (JA_BITS_PER_BYTE * (ja->tree_depth - level)));
+ dbg_printf("publish branch at level %d, key %u\n",
+ level - 1, (unsigned int) iter_key);
+ /* We need to use set_nth on the previous level. */
+ iter_dest_node_flag = attach_node_flag;
+ ret = ja_node_set_nth(ja, &iter_dest_node_flag,
+ iter_key,
+ iter_node_flag,
+ shadow_node, level - 1);
+ if (ret) {
+ dbg_printf("branch publish error %d\n", ret);
+ goto check_error;
+ }
+ /*
+ * Attach branch
+ */
+ rcu_assign_pointer(*attach_node_flag_ptr, iter_dest_node_flag);
+ }
+
+ /* Success */
+ ret = 0;
+
+check_error:
+ if (ret) {
+ for (i = 0; i < nr_created_nodes; i++) {
+ int tmpret;
+ int flags;
+
+ flags = RCUJA_SHADOW_CLEAR_FREE_LOCK;
+ if (i)
+ flags |= RCUJA_SHADOW_CLEAR_FREE_NODE;
+ tmpret = rcuja_shadow_clear(ja->ht,
+ created_nodes[i],
+ NULL,
+ flags);
+ assert(!tmpret);
+ }
+ }
+unlock_parent:
+ if (parent_shadow_node)
+ rcuja_shadow_unlock(parent_shadow_node);
+unlock_shadow:
+ if (shadow_node)
+ rcuja_shadow_unlock(shadow_node);
+end:
+ return ret;
+}
+
+/*
+ * Lock the parent containing the pointer to list of duplicates, and add
+ * node to this list. Failure can happen if concurrent update changes
+ * the parent before we get the lock. We return -EAGAIN in that case.
+ * Return 0 on success, negative error value on failure.
+ */
+static
+int ja_chain_node(struct cds_ja *ja,
+ struct cds_ja_inode_flag *parent_node_flag,
+ struct cds_ja_inode_flag **node_flag_ptr,
+ struct cds_ja_inode_flag *node_flag,
+ struct cds_ja_node *last_node,
+ struct cds_ja_node *node)
+{
+ struct cds_ja_shadow_node *shadow_node;
+ struct cds_ja_node *iter_node;
+ int ret = 0, found = 0;
+
+ shadow_node = rcuja_shadow_lookup_lock(ja->ht, parent_node_flag);
+ if (!shadow_node) {
+ return -EAGAIN;
+ }
+ /*
+ * Ensure that previous node is still there at end of list.
+ */
+ iter_node = (struct cds_ja_node *) ja_node_ptr(node_flag);
+ if ((struct cds_ja_node *) ja_node_ptr(*node_flag_ptr) != iter_node) {
+ ret = -EAGAIN;
+ goto end;
+ }
+ cds_ja_for_each_duplicate(iter_node) {
+ if (iter_node == last_node)
+ found = 1;
+ }
+ if (!found) {
+ ret = -EAGAIN;
+ goto end;
+ }
+ /*
+ * Add node to tail of list to ensure that RCU traversals will
+ * always see either the prior node or the newly added if
+ * executed concurrently with a sequence of add followed by del
+ * on the same key. Safe against concurrent RCU read traversals.
+ */
+ node->next = NULL;
+ rcu_assign_pointer(last_node->next, node);
+end:
+ rcuja_shadow_unlock(shadow_node);
+ return ret;
+}
+
+static
+int _cds_ja_add(struct cds_ja *ja, uint64_t key,
+ struct cds_ja_node *node,
+ struct cds_ja_node **unique_node_ret)
+{
+ unsigned int tree_depth, i;
+ struct cds_ja_inode_flag *attach_node_flag,
+ *parent_node_flag,
+ *parent2_node_flag,
+ *node_flag,
+ *parent_attach_node_flag;
+ struct cds_ja_inode_flag **attach_node_flag_ptr,
+ **parent_node_flag_ptr,
+ **node_flag_ptr;
+ int ret;
+
+ if (caa_unlikely(key > ja->key_max || key == UINT64_MAX)) {
+ return -EINVAL;
+ }
+ tree_depth = ja->tree_depth;
+
+retry:
+ dbg_printf("cds_ja_add attempt: key %" PRIu64 ", node %p\n",
+ key, node);
+ parent2_node_flag = NULL;
+ parent_node_flag =
+ (struct cds_ja_inode_flag *) &ja->root; /* Use root ptr address as key for mutex */
+ parent_node_flag_ptr = NULL;
+ node_flag = rcu_dereference(ja->root);
+ node_flag_ptr = &ja->root;
+
+ /* Iterate on all internal levels */
+ for (i = 1; i < tree_depth; i++) {
+ uint8_t iter_key;
+
+ if (!ja_node_ptr(node_flag))
+ break;
+ dbg_printf("cds_ja_add iter parent2_node_flag %p parent_node_flag %p node_flag_ptr %p node_flag %p\n",
+ parent2_node_flag, parent_node_flag, node_flag_ptr, node_flag);
+ iter_key = (uint8_t) (key >> (JA_BITS_PER_BYTE * (tree_depth - i - 1)));
+ parent2_node_flag = parent_node_flag;
+ parent_node_flag = node_flag;
+ parent_node_flag_ptr = node_flag_ptr;
+ node_flag = ja_node_get_nth(node_flag,
+ &node_flag_ptr,
+ iter_key);
+ }
+
+ /*
+ * We reached either bottom of tree or internal NULL node,
+ * simply add node to last internal level, or chain it if key is
+ * already present.
+ */
+ if (!ja_node_ptr(node_flag)) {
+ dbg_printf("cds_ja_add NULL parent2_node_flag %p parent_node_flag %p node_flag_ptr %p node_flag %p\n",
+ parent2_node_flag, parent_node_flag, node_flag_ptr, node_flag);
+
+ attach_node_flag = parent_node_flag;
+ attach_node_flag_ptr = parent_node_flag_ptr;
+ parent_attach_node_flag = parent2_node_flag;
+
+ ret = ja_attach_node(ja, attach_node_flag_ptr,
+ attach_node_flag,
+ parent_attach_node_flag,
+ node_flag_ptr,
+ node_flag,
+ key, i, node);
+ } else {
+ struct cds_ja_node *iter_node, *last_node = NULL;
+
+ if (unique_node_ret) {
+ *unique_node_ret = (struct cds_ja_node *) ja_node_ptr(node_flag);
+ return -EEXIST;
+ }
+
+ /* Find last duplicate */
+ iter_node = (struct cds_ja_node *) ja_node_ptr(node_flag);
+ cds_ja_for_each_duplicate_rcu(iter_node)
+ last_node = iter_node;
+
+ dbg_printf("cds_ja_add duplicate parent2_node_flag %p parent_node_flag %p node_flag_ptr %p node_flag %p\n",
+ parent2_node_flag, parent_node_flag, node_flag_ptr, node_flag);
+
+ attach_node_flag = node_flag;
+ attach_node_flag_ptr = node_flag_ptr;
+ parent_attach_node_flag = parent_node_flag;
+
+ ret = ja_chain_node(ja,
+ parent_attach_node_flag,
+ attach_node_flag_ptr,
+ attach_node_flag,
+ last_node,
+ node);
+ }
+ if (ret == -EAGAIN || ret == -EEXIST)
+ goto retry;
+
+ return ret;
+}
+
+int cds_ja_add(struct cds_ja *ja, uint64_t key,
+ struct cds_ja_node *node)
+{
+ return _cds_ja_add(ja, key, node, NULL);
+}
+
+struct cds_ja_node *cds_ja_add_unique(struct cds_ja *ja, uint64_t key,
+ struct cds_ja_node *node)
+{
+ int ret;
+ struct cds_ja_node *ret_node;
+
+ ret = _cds_ja_add(ja, key, node, &ret_node);
+ if (ret == -EEXIST)
+ return ret_node;
+ else
+ return node;
+}
+
+/*
+ * Note: there is no need to lookup the pointer address associated with
+ * each node's nth item after taking the lock: it's already been done by
+ * cds_ja_del while holding the rcu read-side lock, and our node rules
+ * ensure that when a match value -> pointer is found in a node, it is
+ * _NEVER_ changed for that node without recompaction, and recompaction
+ * reallocates the node.
+ * However, when a child is removed from "linear" nodes, its pointer
+ * is set to NULL. We therefore check, while holding the locks, if this
+ * pointer is NULL, and return -ENOENT to the caller if it is the case.
+ *
+ * ja_detach_node() ensures that a lookup will _never_ see a branch that
+ * leads to a dead-end: when removing branch, it makes sure to perform
+ * the "cut" at the highest node that has only one child, effectively
+ * replacing it with a NULL pointer.
+ */
+static
+int ja_detach_node(struct cds_ja *ja,
+ struct cds_ja_inode_flag **snapshot,
+ struct cds_ja_inode_flag ***snapshot_ptr,
+ uint8_t *snapshot_n,
+ int nr_snapshot,
+ uint64_t key,
+ struct cds_ja_node *node)
+{
+ struct cds_ja_shadow_node *shadow_nodes[JA_MAX_DEPTH];
+ struct cds_ja_inode_flag **node_flag_ptr = NULL,
+ *parent_node_flag = NULL,
+ **parent_node_flag_ptr = NULL;
+ struct cds_ja_inode_flag *iter_node_flag;
+ int ret, i, nr_shadow = 0, nr_clear = 0, nr_branch = 0;
+ uint8_t n = 0;
+
+ assert(nr_snapshot == ja->tree_depth + 1);
+
+ /*
+ * From the last internal level node going up, get the node
+ * lock, check if the node has only one child left. If it is the
+ * case, we continue iterating upward. When we reach a node
+ * which has more that one child left, we lock the parent, and
+ * proceed to the node deletion (removing its children too).
+ */
+ for (i = nr_snapshot - 2; i >= 1; i--) {
+ struct cds_ja_shadow_node *shadow_node;
+
+ shadow_node = rcuja_shadow_lookup_lock(ja->ht,
+ snapshot[i]);
+ if (!shadow_node) {
+ ret = -EAGAIN;
+ goto end;
+ }
+ shadow_nodes[nr_shadow++] = shadow_node;
+
+ /*
+ * Check if node has been removed between RCU
+ * lookup and lock acquisition.
+ */
+ assert(snapshot_ptr[i + 1]);
+ if (ja_node_ptr(*snapshot_ptr[i + 1])
+ != ja_node_ptr(snapshot[i + 1])) {
+ ret = -ENOENT;
+ goto end;
+ }
+
+ assert(shadow_node->nr_child > 0);
+ if (shadow_node->nr_child == 1 && i > 1)
+ nr_clear++;
+ nr_branch++;
+ if (shadow_node->nr_child > 1 || i == 1) {
+ /* Lock parent and break */
+ shadow_node = rcuja_shadow_lookup_lock(ja->ht,
+ snapshot[i - 1]);
+ if (!shadow_node) {
+ ret = -EAGAIN;
+ goto end;
+ }
+ shadow_nodes[nr_shadow++] = shadow_node;
+
+ /*
+ * Check if node has been removed between RCU
+ * lookup and lock acquisition.
+ */
+ assert(snapshot_ptr[i]);
+ if (ja_node_ptr(*snapshot_ptr[i])
+ != ja_node_ptr(snapshot[i])) {
+ ret = -ENOENT;
+ goto end;
+ }
+
+ node_flag_ptr = snapshot_ptr[i + 1];
+ n = snapshot_n[i + 1];
+ parent_node_flag_ptr = snapshot_ptr[i];
+ parent_node_flag = snapshot[i];
+
+ if (i > 1) {
+ /*
+ * Lock parent's parent, in case we need
+ * to recompact parent.
+ */
+ shadow_node = rcuja_shadow_lookup_lock(ja->ht,
+ snapshot[i - 2]);
+ if (!shadow_node) {
+ ret = -EAGAIN;
+ goto end;
+ }
+ shadow_nodes[nr_shadow++] = shadow_node;
+
+ /*
+ * Check if node has been removed between RCU
+ * lookup and lock acquisition.
+ */
+ assert(snapshot_ptr[i - 1]);
+ if (ja_node_ptr(*snapshot_ptr[i - 1])
+ != ja_node_ptr(snapshot[i - 1])) {
+ ret = -ENOENT;
+ goto end;
+ }
+ }
+
+ break;
+ }
+ }
+
+ /*
+ * At this point, we want to delete all nodes that are about to
+ * be removed from shadow_nodes (except the last one, which is
+ * either the root or the parent of the upmost node with 1
+ * child). OK to free lock here, because RCU read lock is held,
+ * and free only performed in call_rcu.
+ */
+
+ for (i = 0; i < nr_clear; i++) {
+ ret = rcuja_shadow_clear(ja->ht,
+ shadow_nodes[i]->node_flag,
+ shadow_nodes[i],
+ RCUJA_SHADOW_CLEAR_FREE_NODE
+ | RCUJA_SHADOW_CLEAR_FREE_LOCK);
+ assert(!ret);
+ }
+
+ iter_node_flag = parent_node_flag;
+ /* Remove from parent */
+ if (nr_branch < 1)
+ abort(); /* Should never happen. */
+ ret = ja_node_clear_ptr(ja,
+ node_flag_ptr, /* Pointer to location to nullify */
+ &iter_node_flag, /* Old new parent ptr in its parent */
+ shadow_nodes[nr_branch - 1], /* of parent */
+ n, nr_branch - 1);
+ if (ret)
+ goto end;
+
+ dbg_printf("ja_detach_node: publish %p instead of %p\n",
+ iter_node_flag, *parent_node_flag_ptr);
+ /* Update address of parent ptr in its parent */
+ rcu_assign_pointer(*parent_node_flag_ptr, iter_node_flag);
+
+end:
+ for (i = 0; i < nr_shadow; i++)
+ rcuja_shadow_unlock(shadow_nodes[i]);
+ return ret;
+}
+
+static
+int ja_unchain_node(struct cds_ja *ja,
+ struct cds_ja_inode_flag *parent_node_flag,
+ struct cds_ja_inode_flag **node_flag_ptr,
+ struct cds_ja_inode_flag *node_flag,
+ struct cds_ja_node *node)
+{
+ struct cds_ja_shadow_node *shadow_node;
+ struct cds_ja_node *iter_node, **iter_node_ptr, **prev_node_ptr = NULL;
+ int ret = 0, count = 0, found = 0;
+
+ shadow_node = rcuja_shadow_lookup_lock(ja->ht, parent_node_flag);
+ if (!shadow_node)
+ return -EAGAIN;
+ if (ja_node_ptr(*node_flag_ptr) != ja_node_ptr(node_flag)) {
+ ret = -EAGAIN;
+ goto end;
+ }
+ /*
+ * Find the previous node's next pointer pointing to our node,
+ * so we can update it. Retry if another thread removed all but
+ * one of duplicates since check (this check was performed
+ * without lock). Ensure that the node we are about to remove is
+ * still in the list (while holding lock). No need for RCU
+ * traversal here since we hold the lock on the parent.
+ */
+ iter_node_ptr = (struct cds_ja_node **) node_flag_ptr;
+ iter_node = (struct cds_ja_node *) ja_node_ptr(node_flag);
+ cds_ja_for_each_duplicate(iter_node) {
+ count++;
+ if (iter_node == node) {
+ prev_node_ptr = iter_node_ptr;
+ found++;
+ }
+ iter_node_ptr = &iter_node->next;
+ }
+ assert(found <= 1);
+ if (!found || count == 1) {
+ ret = -EAGAIN;
+ goto end;
+ }
+ CMM_STORE_SHARED(*prev_node_ptr, node->next);
+ /*
+ * Validate that we indeed removed the node from linked list.
+ */
+ assert(ja_node_ptr(*node_flag_ptr) != (struct cds_ja_inode *) node);
+end:
+ rcuja_shadow_unlock(shadow_node);
+ return ret;
+}
+
+/*
+ * Called with RCU read lock held.
+ */
+int cds_ja_del(struct cds_ja *ja, uint64_t key,
+ struct cds_ja_node *node)
+{
+ unsigned int tree_depth, i;
+ struct cds_ja_inode_flag *snapshot[JA_MAX_DEPTH];
+ struct cds_ja_inode_flag **snapshot_ptr[JA_MAX_DEPTH];
+ uint8_t snapshot_n[JA_MAX_DEPTH];
+ struct cds_ja_inode_flag *node_flag;
+ struct cds_ja_inode_flag **prev_node_flag_ptr,
+ **node_flag_ptr;
+ int nr_snapshot;
+ int ret;
+
+ if (caa_unlikely(key > ja->key_max || key == UINT64_MAX))
+ return -EINVAL;
+ tree_depth = ja->tree_depth;
+
+retry:
+ nr_snapshot = 0;
+ dbg_printf("cds_ja_del attempt: key %" PRIu64 ", node %p\n",
+ key, node);
+
+ /* snapshot for level 0 is only for shadow node lookup */
+ snapshot_n[0] = 0;
+ snapshot_n[1] = 0;
+ snapshot_ptr[nr_snapshot] = NULL;
+ snapshot[nr_snapshot++] = (struct cds_ja_inode_flag *) &ja->root;
+ node_flag = rcu_dereference(ja->root);
+ prev_node_flag_ptr = &ja->root;
+ node_flag_ptr = &ja->root;
+
+ /* Iterate on all internal levels */
+ for (i = 1; i < tree_depth; i++) {
+ uint8_t iter_key;
+
+ dbg_printf("cds_ja_del iter node_flag %p\n",
+ node_flag);
+ if (!ja_node_ptr(node_flag)) {
+ return -ENOENT;
+ }
+ iter_key = (uint8_t) (key >> (JA_BITS_PER_BYTE * (tree_depth - i - 1)));
+ snapshot_n[nr_snapshot + 1] = iter_key;
+ snapshot_ptr[nr_snapshot] = prev_node_flag_ptr;
+ snapshot[nr_snapshot++] = node_flag;
+ node_flag = ja_node_get_nth(node_flag,
+ &node_flag_ptr,
+ iter_key);
+ if (node_flag)
+ prev_node_flag_ptr = node_flag_ptr;
+ dbg_printf("cds_ja_del iter key lookup %u finds node_flag %p, prev_node_flag_ptr %p\n",
+ (unsigned int) iter_key, node_flag,
+ prev_node_flag_ptr);
+ }
+ /*
+ * We reached bottom of tree, try to find the node we are trying
+ * to remove. Fail if we cannot find it.
+ */
+ if (!ja_node_ptr(node_flag)) {
+ dbg_printf("cds_ja_del: no node found for key %" PRIu64 "\n",
+ key);
+ return -ENOENT;
+ } else {
+ struct cds_ja_node *iter_node, *match = NULL;
+ int count = 0;
+
+ iter_node = (struct cds_ja_node *) ja_node_ptr(node_flag);
+ cds_ja_for_each_duplicate_rcu(iter_node) {
+ dbg_printf("cds_ja_del: compare %p with iter_node %p\n", node, iter_node);
+ if (iter_node == node)
+ match = iter_node;
+ count++;
+ }
+
+ if (!match) {
+ dbg_printf("cds_ja_del: no node match for node %p key %" PRIu64 "\n", node, key);
+ return -ENOENT;
+ }
+ assert(count > 0);
+ if (count == 1) {
+ /*
+ * Removing last of duplicates. Last snapshot
+ * does not have a shadow node (external leafs).
+ */
+ snapshot_ptr[nr_snapshot] = prev_node_flag_ptr;
+ snapshot[nr_snapshot++] = node_flag;
+ ret = ja_detach_node(ja, snapshot, snapshot_ptr,
+ snapshot_n, nr_snapshot, key, node);
+ } else {
+ ret = ja_unchain_node(ja, snapshot[nr_snapshot - 1],
+ node_flag_ptr, node_flag, match);
+ }
+ }
+ /*
+ * Explanation of -ENOENT handling: caused by concurrent delete
+ * between RCU lookup and actual removal. Need to re-do the
+ * lookup and removal attempt.
+ */
+ if (ret == -EAGAIN || ret == -ENOENT)
+ goto retry;
+ return ret;
+}
+
+struct cds_ja *_cds_ja_new(unsigned int key_bits,
+ const struct rcu_flavor_struct *flavor)
+{
+ struct cds_ja *ja;
+ int ret;
+ struct cds_ja_shadow_node *root_shadow_node;
+
+ ja = calloc(sizeof(*ja), 1);
+ if (!ja)
+ goto ja_error;
+
+ switch (key_bits) {
+ case 8:
+ case 16:
+ case 24:
+ case 32:
+ case 40:
+ case 48:
+ case 56:
+ ja->key_max = (1ULL << key_bits) - 1;
+ break;
+ case 64:
+ ja->key_max = UINT64_MAX;
+ break;
+ default:
+ goto check_error;
+ }
+
+ /* ja->root is NULL */
+ /* tree_depth 0 is for pointer to root node */
+ ja->tree_depth = (key_bits >> JA_LOG2_BITS_PER_BYTE) + 1;
+ assert(ja->tree_depth <= JA_MAX_DEPTH);
+ ja->ht = rcuja_create_ht(flavor);
+ if (!ja->ht)
+ goto ht_error;
+
+ /*
+ * Note: we should not free this node until judy array destroy.
+ */
+ root_shadow_node = rcuja_shadow_set(ja->ht,
+ (struct cds_ja_inode_flag *) &ja->root,
+ NULL, ja, 0);
+ if (!root_shadow_node) {
+ ret = -ENOMEM;
+ goto ht_node_error;
+ }
+
+ return ja;
+
+ht_node_error:
+ ret = rcuja_delete_ht(ja->ht);
+ assert(!ret);
+ht_error:
+check_error:
+ free(ja);
+ja_error:
+ return NULL;
+}
+
+static
+void print_debug_fallback_distribution(struct cds_ja *ja)
+{
+ int i;
+
+ fprintf(stderr, "Fallback node distribution:\n");
+ for (i = 0; i < JA_ENTRY_PER_NODE; i++) {
+ if (!ja->node_fallback_count_distribution[i])
+ continue;
+ fprintf(stderr, " %3u: %4lu\n",
+ i, ja->node_fallback_count_distribution[i]);
+ }
+}
+
+static
+int ja_final_checks(struct cds_ja *ja)
+{
+ double fallback_ratio;
+ unsigned long na, nf, nr_fallback;
+ int ret = 0;
+
+ if (!ja_debug_counters())
+ return 0;
+
+ fallback_ratio = (double) uatomic_read(&ja->nr_fallback);
+ fallback_ratio /= (double) uatomic_read(&ja->nr_nodes_allocated);
+ nr_fallback = uatomic_read(&ja->nr_fallback);
+ if (nr_fallback)
+ fprintf(stderr,
+ "[warning] RCU Judy Array used %lu fallback node(s) (ratio: %g)\n",
+ uatomic_read(&ja->nr_fallback),
+ fallback_ratio);
+
+ na = uatomic_read(&ja->nr_nodes_allocated);
+ nf = uatomic_read(&ja->nr_nodes_freed);
+ dbg_printf("Nodes allocated: %lu, Nodes freed: %lu.\n", na, nf);
+ if (nr_fallback)
+ print_debug_fallback_distribution(ja);
+
+ if (na != nf) {
+ fprintf(stderr, "[error] Judy array leaked %ld nodes. Allocated: %lu, freed: %lu.\n",
+ (long) na - nf, na, nf);
+ ret = -1;
+ }
+ return ret;
+}
+
+/*
+ * There should be no more concurrent add, delete, nor look-up performed
+ * on the Judy array while it is being destroyed (ensured by the
+ * caller).
+ */
+int cds_ja_destroy(struct cds_ja *ja)
+{
+ const struct rcu_flavor_struct *flavor;
+ int ret;
+
+ flavor = cds_lfht_rcu_flavor(ja->ht);
+ rcuja_shadow_prune(ja->ht,
+ RCUJA_SHADOW_CLEAR_FREE_NODE | RCUJA_SHADOW_CLEAR_FREE_LOCK);
+ flavor->thread_offline();
+ ret = rcuja_delete_ht(ja->ht);
+ if (ret)
+ return ret;
+
+ /* Wait for in-flight call_rcu free to complete. */
+ flavor->barrier();
+
+ flavor->thread_online();
+ ret = ja_final_checks(ja);
+ free(ja);
+ return ret;
+}
rcutorture_urcu_signal \
rcutorture_urcu_mb \
rcutorture_urcu_bp \
- rcutorture_urcu_qsbr
+ rcutorture_urcu_qsbr \
+ test_urcu_ja
noinst_HEADERS = rcutorture.h
rcutorture_urcu_bp_CFLAGS = -DRCU_BP $(AM_CFLAGS)
rcutorture_urcu_bp_LDADD = $(URCU_BP_LIB) $(TAP_LIB)
+test_urcu_ja_SOURCES = test_urcu_ja.c test_urcu_ja.h
+test_urcu_ja_CFLAGS = -DRCU_MEMBARRIER $(AM_CFLAGS)
+test_urcu_ja_LDADD = $(URCU_QSBR_LIB) $(URCU_CDS_LIB) $(TAP_LIB)
+
urcutorture.c: ../common/api.h
all-local:
--- /dev/null
+/*
+ * test_urcu_ja.c
+ *
+ * Userspace RCU library - test program
+ *
+ * Copyright 2009-2012 - Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include "test_urcu_ja.h"
+#include "../common/debug-yield.h"
+#include <inttypes.h>
+#include <stdint.h>
+
+DEFINE_URCU_TLS(unsigned int, rand_lookup);
+DEFINE_URCU_TLS(unsigned long, nr_add);
+DEFINE_URCU_TLS(unsigned long, nr_addexist);
+DEFINE_URCU_TLS(unsigned long, nr_del);
+DEFINE_URCU_TLS(unsigned long, nr_delnoent);
+DEFINE_URCU_TLS(unsigned long, lookup_fail);
+DEFINE_URCU_TLS(unsigned long, lookup_ok);
+
+struct cds_ja *test_ja;
+
+volatile int test_go, test_stop;
+
+unsigned long wdelay;
+
+unsigned long duration;
+
+/* read-side C.S. duration, in loops */
+unsigned long rduration;
+
+unsigned long init_populate;
+int add_only;
+
+unsigned long init_pool_offset, lookup_pool_offset, write_pool_offset;
+unsigned long init_pool_size = DEFAULT_RAND_POOL,
+ lookup_pool_size = DEFAULT_RAND_POOL,
+ write_pool_size = DEFAULT_RAND_POOL;
+int validate_lookup;
+int sanity_test;
+unsigned int key_bits = 32;
+
+int count_pipe[2];
+
+int verbose_mode;
+
+unsigned int cpu_affinities[NR_CPUS];
+unsigned int next_aff = 0;
+int use_affinity = 0;
+
+pthread_mutex_t affinity_mutex = PTHREAD_MUTEX_INITIALIZER;
+
+DEFINE_URCU_TLS(unsigned long long, nr_writes);
+DEFINE_URCU_TLS(unsigned long long, nr_reads);
+
+unsigned int nr_readers;
+unsigned int nr_writers;
+
+static unsigned int add_ratio = 50;
+static uint64_t key_mul = 1ULL;
+
+static int add_unique, add_replace;
+
+static pthread_mutex_t rcu_copy_mutex = PTHREAD_MUTEX_INITIALIZER;
+
+static int leak_detection;
+static unsigned long test_nodes_allocated, test_nodes_freed;
+
+void set_affinity(void)
+{
+ cpu_set_t mask;
+ int cpu;
+ int ret;
+
+ if (!use_affinity)
+ return;
+
+#if HAVE_SCHED_SETAFFINITY
+ ret = pthread_mutex_lock(&affinity_mutex);
+ if (ret) {
+ perror("Error in pthread mutex lock");
+ exit(-1);
+ }
+ cpu = cpu_affinities[next_aff++];
+ ret = pthread_mutex_unlock(&affinity_mutex);
+ if (ret) {
+ perror("Error in pthread mutex unlock");
+ exit(-1);
+ }
+ CPU_ZERO(&mask);
+ CPU_SET(cpu, &mask);
+#if SCHED_SETAFFINITY_ARGS == 2
+ sched_setaffinity(0, &mask);
+#else
+ sched_setaffinity(0, sizeof(mask), &mask);
+#endif
+#endif /* HAVE_SCHED_SETAFFINITY */
+}
+
+void rcu_copy_mutex_lock(void)
+{
+ int ret;
+ ret = pthread_mutex_lock(&rcu_copy_mutex);
+ if (ret) {
+ perror("Error in pthread mutex lock");
+ exit(-1);
+ }
+}
+
+void rcu_copy_mutex_unlock(void)
+{
+ int ret;
+
+ ret = pthread_mutex_unlock(&rcu_copy_mutex);
+ if (ret) {
+ perror("Error in pthread mutex unlock");
+ exit(-1);
+ }
+}
+
+static
+struct ja_test_node *node_alloc(void)
+{
+ struct ja_test_node *node;
+
+ node = calloc(sizeof(*node), 1);
+ if (leak_detection && node)
+ uatomic_inc(&test_nodes_allocated);
+ return node;
+}
+
+static
+void free_test_node(struct ja_test_node *node)
+{
+ poison_free(node);
+ if (leak_detection)
+ uatomic_inc(&test_nodes_freed);
+}
+
+static
+void free_test_node_cb(struct rcu_head *head)
+{
+ struct ja_test_node *node =
+ caa_container_of(head, struct ja_test_node, head);
+ free_test_node(node);
+}
+
+static
+void rcu_free_test_node(struct ja_test_node *test_node)
+{
+ call_rcu(&test_node->head, free_test_node_cb);
+}
+
+static
+void free_node(struct cds_ja_node *node)
+{
+ struct ja_test_node *test_node = to_test_node(node);
+
+ free_test_node(test_node);
+}
+
+#if 0
+static
+void test_delete_all_nodes(struct cds_lfht *ht)
+{
+ struct cds_lfht_iter iter;
+ struct lfht_test_node *node;
+ unsigned long count = 0;
+
+ cds_lfht_for_each_entry(ht, &iter, node, node) {
+ int ret;
+
+ ret = cds_lfht_del(test_ht, cds_lfht_iter_get_node(&iter));
+ assert(!ret);
+ call_rcu(&node->head, free_node_cb);
+ count++;
+ }
+ printf("deleted %lu nodes.\n", count);
+}
+#endif
+
+void show_usage(int argc, char **argv)
+{
+ printf("Usage : %s nr_readers nr_writers duration (s)\n", argv[0]);
+#ifdef DEBUG_YIELD
+ printf(" [-r] [-w] (yield reader and/or writer)\n");
+#endif
+ printf(" [-d delay] (writer period (us))\n");
+ printf(" [-c duration] (reader C.S. duration (in loops))\n");
+ printf(" [-v] (verbose output)\n");
+ printf(" [-a cpu#] [-a cpu#]... (affinity)\n");
+ printf(" [-u] Add unique keys.\n");
+ printf(" [-s] Replace existing keys.\n");
+printf(" [not -u nor -s] Add entries (supports redundant keys).\n");
+ printf(" [-r ratio] Add ratio (in %% of add+removal).\n");
+ printf(" [-k] Populate init nodes.\n");
+ printf(" [-R offset] Lookup pool offset.\n");
+ printf(" [-S offset] Write pool offset.\n");
+ printf(" [-T offset] Init pool offset.\n");
+ printf(" [-M size] Lookup pool size.\n");
+ printf(" [-N size] Write pool size.\n");
+ printf(" [-O size] Init pool size.\n");
+ printf(" [-V] Validate lookups of init values (use with filled init pool, same lookup range, with different write range).\n");
+ printf(" [-t] Do sanity test.\n");
+ printf(" [-B] Key bits for multithread test (default: 32).\n");
+ printf(" [-m factor] Key multiplication factor.\n");
+ printf(" [-l] Memory leak detection.\n");
+ printf("\n\n");
+}
+
+static
+int test_free_all_nodes(struct cds_ja *ja)
+{
+ uint64_t key;
+ struct cds_ja_node *ja_node;
+ int ret = 0;
+
+ rcu_read_lock();
+ cds_ja_for_each_key_rcu(test_ja, key, ja_node) {
+ struct cds_ja_node *tmp_node;
+
+ cds_ja_for_each_duplicate_safe_rcu(ja_node, tmp_node) {
+ ret = cds_ja_del(test_ja, key, ja_node);
+ if (ret) {
+ fprintf(stderr, "Error (%d) removing node %" PRIu64 "\n", ret, key);
+ goto end;
+ }
+ /* Alone using Judy array, OK to free now */
+ free_node(ja_node);
+ }
+ }
+end:
+ rcu_read_unlock();
+ return ret;
+}
+
+static
+int test_8bit_key(void)
+{
+ int ret, i;
+ uint64_t key;
+ uint64_t ka[] = { 5, 17, 100, 222 };
+ uint64_t ka_test_offset = 5;
+ struct cds_ja_node *ja_node;
+
+ /* Test with 8-bit key */
+ test_ja = cds_ja_new(8);
+ if (!test_ja) {
+ printf("Error allocating judy array.\n");
+ return -1;
+ }
+
+ /* Add keys */
+ printf("Test #1: add keys (8-bit).\n");
+ for (key = 0; key < 200; key++) {
+ struct ja_test_node *node = node_alloc();
+
+ ja_test_node_init(node, key);
+ rcu_read_lock();
+ ret = cds_ja_add(test_ja, key, &node->node);
+ rcu_read_unlock();
+ if (ret) {
+ fprintf(stderr, "Error (%d) adding node %" PRIu64 "\n",
+ ret, key);
+ assert(0);
+ }
+ }
+ printf("OK\n");
+
+ printf("Test #2: successful key lookup (8-bit).\n");
+ for (key = 0; key < 200; key++) {
+ rcu_read_lock();
+ ja_node = cds_ja_lookup(test_ja, key);
+ if (!ja_node) {
+ fprintf(stderr, "Error lookup node %" PRIu64 "\n", key);
+ assert(0);
+ }
+ rcu_read_unlock();
+ }
+ printf("OK\n");
+ printf("Test #3: unsuccessful key lookup (8-bit).\n");
+ for (key = 200; key < 240; key++) {
+ rcu_read_lock();
+ ja_node = cds_ja_lookup(test_ja, key);
+ if (ja_node) {
+ fprintf(stderr,
+ "Error unexpected lookup node %" PRIu64 "\n",
+ key);
+ assert(0);
+ }
+ rcu_read_unlock();
+ }
+ printf("OK\n");
+ printf("Test #4: remove keys (8-bit).\n");
+ for (key = 0; key < 200; key++) {
+ struct ja_test_node *node;
+
+ rcu_read_lock();
+ ja_node = cds_ja_lookup(test_ja, key);
+ if (!ja_node) {
+ fprintf(stderr, "Error lookup node %" PRIu64 "\n", key);
+ assert(0);
+ }
+ node = caa_container_of(ja_node, struct ja_test_node, node);
+ ret = cds_ja_del(test_ja, key, &node->node);
+ if (ret) {
+ fprintf(stderr, "Error (%d) removing node %" PRIu64 "\n", ret, key);
+ assert(0);
+ }
+ rcu_free_test_node(node);
+ ja_node = cds_ja_lookup(test_ja, key);
+ if (ja_node) {
+ fprintf(stderr, "Error lookup %" PRIu64 ": %p (after delete) failed. Node is not expected.\n", key, ja_node);
+ assert(0);
+ }
+ rcu_read_unlock();
+ }
+ printf("OK\n");
+
+ printf("Test #5: lookup below/above equal (8-bit).\n");
+
+ for (i = 0; i < CAA_ARRAY_SIZE(ka); i++) {
+ struct ja_test_node *node = node_alloc();
+
+ key = ka[i];
+ ja_test_node_init(node, key);
+ rcu_read_lock();
+ ret = cds_ja_add(test_ja, key, &node->node);
+ rcu_read_unlock();
+ if (ret) {
+ fprintf(stderr, "Error (%d) adding node %" PRIu64 "\n",
+ ret, key);
+ assert(0);
+ }
+ }
+
+ for (i = 0; i < CAA_ARRAY_SIZE(ka); i++) {
+ struct ja_test_node *node;
+ uint64_t result_key;
+
+ key = ka[i] + ka_test_offset;
+ rcu_read_lock();
+ ja_node = cds_ja_lookup_below_equal(test_ja, key, &result_key);
+ if (!ja_node) {
+ fprintf(stderr, "Error lookup below equal. Cannot find expected key %" PRIu64" below or equal to %" PRIu64 ".\n",
+ ka[i], key);
+ assert(0);
+ }
+ node = caa_container_of(ja_node, struct ja_test_node, node);
+ if (node->key != ka[i] || result_key != ka[i]) {
+ fprintf(stderr, "Error lookup below equal. Expecting key %" PRIu64 " below or equal to %" PRIu64 ", but found %" PRIu64 "/%" PRIu64" instead.\n",
+ ka[i], key, node->key, result_key);
+ assert(0);
+ }
+ rcu_read_unlock();
+ }
+
+ for (i = 0; i < CAA_ARRAY_SIZE(ka); i++) {
+ struct ja_test_node *node;
+ uint64_t result_key;
+
+ key = ka[i] - ka_test_offset;
+ rcu_read_lock();
+ ja_node = cds_ja_lookup_above_equal(test_ja, key, &result_key);
+ if (!ja_node) {
+ fprintf(stderr, "Error lookup above equal. Cannot find expected key %" PRIu64" below or equal to %" PRIu64 ".\n",
+ ka[i], key);
+ assert(0);
+ }
+ node = caa_container_of(ja_node, struct ja_test_node, node);
+ if (node->key != ka[i] || result_key != ka[i]) {
+ fprintf(stderr, "Error lookup above equal. Expecting key %" PRIu64 " below or equal to %" PRIu64 ", but found %" PRIu64 "/%" PRIu64" instead.\n",
+ ka[i], key, node->key, result_key);
+ assert(0);
+ }
+ rcu_read_unlock();
+ }
+
+ for (i = 0; i < CAA_ARRAY_SIZE(ka); i++) {
+ struct ja_test_node *node;
+ uint64_t result_key;
+
+ key = ka[i]; /* without offset */
+ rcu_read_lock();
+ ja_node = cds_ja_lookup_below_equal(test_ja, key, &result_key);
+ if (!ja_node) {
+ fprintf(stderr, "Error lookup below equal. Cannot find expected key %" PRIu64" below or equal to %" PRIu64 ".\n",
+ ka[i], key);
+ assert(0);
+ }
+ node = caa_container_of(ja_node, struct ja_test_node, node);
+ if (node->key != ka[i] || result_key != ka[i]) {
+ fprintf(stderr, "Error lookup below equal. Expecting key %" PRIu64 " below or equal to %" PRIu64 ", but found %" PRIu64 "/%" PRIu64" instead.\n",
+ ka[i], key, node->key, result_key);
+ assert(0);
+ }
+
+ ja_node = cds_ja_lookup_above_equal(test_ja, key, &result_key);
+ if (!ja_node) {
+ fprintf(stderr, "Error lookup above equal. Cannot find expected key %" PRIu64" below or equal to %" PRIu64 ".\n",
+ ka[i], key);
+ assert(0);
+ }
+ node = caa_container_of(ja_node, struct ja_test_node, node);
+ if (node->key != ka[i] || result_key != ka[i]) {
+ fprintf(stderr, "Error lookup above equal. Expecting key %" PRIu64 " below or equal to %" PRIu64 ", but found %" PRIu64 "/%" PRIu64" instead.\n",
+ ka[i], key, node->key, result_key);
+ assert(0);
+ }
+ rcu_read_unlock();
+ }
+
+ printf("OK\n");
+
+ ret = test_free_all_nodes(test_ja);
+ if (ret) {
+ fprintf(stderr, "Error freeing all nodes\n");
+ return -1;
+ }
+
+ ret = cds_ja_destroy(test_ja);
+ if (ret) {
+ fprintf(stderr, "Error destroying judy array\n");
+ return -1;
+ }
+ return 0;
+}
+
+static
+int test_16bit_key(void)
+{
+ int ret, i;
+ uint64_t key;
+ uint64_t ka[] = { 105, 206, 4000, 4111, 59990, 65435 };
+ uint64_t ka_test_offset = 100;
+ struct cds_ja_node *ja_node;
+
+ /* Test with 16-bit key */
+ test_ja = cds_ja_new(16);
+ if (!test_ja) {
+ printf("Error allocating judy array.\n");
+ return -1;
+ }
+
+ /* Add keys */
+ printf("Test #1: add keys (16-bit).\n");
+ for (key = 0; key < 10000; key++) {
+ //for (key = 0; key < 65536; key+=256) {
+ struct ja_test_node *node = node_alloc();
+
+ ja_test_node_init(node, key);
+ rcu_read_lock();
+ ret = cds_ja_add(test_ja, key, &node->node);
+ rcu_read_unlock();
+ if (ret) {
+ fprintf(stderr, "Error (%d) adding node %" PRIu64 "\n",
+ ret, key);
+ assert(0);
+ }
+ }
+ printf("OK\n");
+
+ printf("Test #2: successful key lookup (16-bit).\n");
+ for (key = 0; key < 10000; key++) {
+ //for (key = 0; key < 65536; key+=256) {
+ struct cds_ja_node *ja_node;
+
+ rcu_read_lock();
+ ja_node = cds_ja_lookup(test_ja, key);
+ if (!ja_node) {
+ fprintf(stderr, "Error lookup node %" PRIu64 "\n", key);
+ assert(0);
+ }
+ rcu_read_unlock();
+ }
+ printf("OK\n");
+ printf("Test #3: unsuccessful key lookup (16-bit).\n");
+ for (key = 11000; key <= 11002; key++) {
+ struct cds_ja_node *ja_node;
+
+ rcu_read_lock();
+ ja_node = cds_ja_lookup(test_ja, key);
+ if (ja_node) {
+ fprintf(stderr,
+ "Error unexpected lookup node %" PRIu64 "\n",
+ key);
+ assert(0);
+ }
+ rcu_read_unlock();
+ }
+ printf("OK\n");
+ printf("Test #4: remove keys (16-bit).\n");
+ for (key = 0; key < 10000; key++) {
+ //for (key = 0; key < 65536; key+=256) {
+ struct ja_test_node *node;
+
+ rcu_read_lock();
+ ja_node = cds_ja_lookup(test_ja, key);
+ if (!ja_node) {
+ fprintf(stderr, "Error lookup node %" PRIu64 "\n", key);
+ assert(0);
+ }
+ node = caa_container_of(ja_node, struct ja_test_node, node);
+ ret = cds_ja_del(test_ja, key, &node->node);
+ if (ret) {
+ fprintf(stderr, "Error (%d) removing node %" PRIu64 "\n", ret, key);
+ assert(0);
+ }
+ rcu_free_test_node(node);
+ ja_node = cds_ja_lookup(test_ja, key);
+ if (ja_node) {
+ fprintf(stderr, "Error lookup %" PRIu64 ": %p (after delete) failed. Node is not expected.\n", key, ja_node);
+ assert(0);
+ }
+ rcu_read_unlock();
+ }
+ printf("OK\n");
+
+ printf("Test #5: lookup below/above equal (16-bit).\n");
+
+ for (i = 0; i < CAA_ARRAY_SIZE(ka); i++) {
+ struct ja_test_node *node = node_alloc();
+
+ key = ka[i];
+ ja_test_node_init(node, key);
+ rcu_read_lock();
+ ret = cds_ja_add(test_ja, key, &node->node);
+ rcu_read_unlock();
+ if (ret) {
+ fprintf(stderr, "Error (%d) adding node %" PRIu64 "\n",
+ ret, key);
+ assert(0);
+ }
+ }
+
+ for (i = 0; i < CAA_ARRAY_SIZE(ka); i++) {
+ struct ja_test_node *node;
+ uint64_t result_key;
+
+ key = ka[i] + ka_test_offset;
+ rcu_read_lock();
+ ja_node = cds_ja_lookup_below_equal(test_ja, key, &result_key);
+ if (!ja_node) {
+ fprintf(stderr, "Error lookup below equal. Cannot find expected key %" PRIu64" below or equal to %" PRIu64 ".\n",
+ ka[i], key);
+ assert(0);
+ }
+ node = caa_container_of(ja_node, struct ja_test_node, node);
+ if (node->key != ka[i] || result_key != ka[i]) {
+ fprintf(stderr, "Error lookup below equal. Expecting key %" PRIu64 " below or equal to %" PRIu64 ", but found %" PRIu64 "/%" PRIu64" instead.\n",
+ ka[i], key, node->key, result_key);
+ assert(0);
+ }
+ rcu_read_unlock();
+ }
+
+ for (i = 0; i < CAA_ARRAY_SIZE(ka); i++) {
+ struct ja_test_node *node;
+ uint64_t result_key;
+
+ key = ka[i] - ka_test_offset;
+ rcu_read_lock();
+ ja_node = cds_ja_lookup_above_equal(test_ja, key, &result_key);
+ if (!ja_node) {
+ fprintf(stderr, "Error lookup above equal. Cannot find expected key %" PRIu64" above or equal to %" PRIu64 ".\n",
+ ka[i], key);
+ assert(0);
+ }
+ node = caa_container_of(ja_node, struct ja_test_node, node);
+ if (node->key != ka[i] || result_key != ka[i]) {
+ fprintf(stderr, "Error lookup above equal. Expecting key %" PRIu64 " above or equal to %" PRIu64 ", but found %" PRIu64 "/%" PRIu64" instead.\n",
+ ka[i], key, node->key, result_key);
+ assert(0);
+ }
+ rcu_read_unlock();
+ }
+
+ for (i = 0; i < CAA_ARRAY_SIZE(ka); i++) {
+ struct ja_test_node *node;
+ uint64_t result_key;
+
+ key = ka[i]; /* without offset */
+ rcu_read_lock();
+ ja_node = cds_ja_lookup_below_equal(test_ja, key, &result_key);
+ if (!ja_node) {
+ fprintf(stderr, "Error lookup below equal. Cannot find expected key %" PRIu64" below or equal to %" PRIu64 ".\n",
+ ka[i], key);
+ assert(0);
+ }
+ node = caa_container_of(ja_node, struct ja_test_node, node);
+ if (node->key != ka[i] || result_key != ka[i]) {
+ fprintf(stderr, "Error lookup below equal. Expecting key %" PRIu64 " below or equal to %" PRIu64 ", but found %" PRIu64 "/%" PRIu64" instead.\n",
+ ka[i], key, node->key, result_key);
+ assert(0);
+ }
+
+ ja_node = cds_ja_lookup_above_equal(test_ja, key, &result_key);
+ if (!ja_node) {
+ fprintf(stderr, "Error lookup above equal. Cannot find expected key %" PRIu64" above or equal to %" PRIu64 ".\n",
+ ka[i], key);
+ assert(0);
+ }
+ node = caa_container_of(ja_node, struct ja_test_node, node);
+ if (node->key != ka[i] || result_key != ka[i]) {
+ fprintf(stderr, "Error lookup above equal. Expecting key %" PRIu64 " above or equal to %" PRIu64 ", but found %" PRIu64 "/%" PRIu64" instead.\n",
+ ka[i], key, node->key, result_key);
+ assert(0);
+ }
+ rcu_read_unlock();
+ }
+
+ printf("OK\n");
+
+ ret = test_free_all_nodes(test_ja);
+ if (ret) {
+ fprintf(stderr, "Error freeing all nodes\n");
+ return -1;
+ }
+
+ ret = cds_ja_destroy(test_ja);
+ if (ret) {
+ fprintf(stderr, "Error destroying judy array\n");
+ return -1;
+ }
+ return 0;
+}
+
+/*
+ * nr_dup is number of nodes per key.
+ */
+static
+int test_sparse_key(unsigned int bits, int nr_dup)
+{
+ uint64_t key, max_key;
+ int zerocount, i, ret;
+ struct cds_ja_node *ja_node;
+
+ if (bits == 64)
+ max_key = UINT64_MAX;
+ else
+ max_key = (1ULL << bits) - 1;
+
+ printf("Sparse key test begins for %u-bit keys\n", bits);
+ /* Test with 16-bit key */
+ test_ja = cds_ja_new(bits);
+ if (!test_ja) {
+ printf("Error allocating judy array.\n");
+ return -1;
+ }
+
+ /* Add keys */
+ printf("Test #1: add keys (%u-bit).\n", bits);
+ for (i = 0; i < nr_dup; i++) {
+ zerocount = 0;
+ for (key = 0; key <= max_key && (key != 0 || zerocount < 1); key += 1ULL << (bits - 8)) {
+ struct ja_test_node *node = node_alloc();
+
+ ja_test_node_init(node, key);
+ rcu_read_lock();
+ ret = cds_ja_add(test_ja, key, &node->node);
+ rcu_read_unlock();
+ if (ret) {
+ fprintf(stderr, "Error (%d) adding node %" PRIu64 "\n",
+ ret, key);
+ assert(0);
+ }
+ if (key == 0)
+ zerocount++;
+ }
+ }
+ printf("OK\n");
+
+ printf("Test #2: successful key lookup (%u-bit).\n", bits);
+ zerocount = 0;
+ for (key = 0; key <= max_key && (key != 0 || zerocount < 1); key += 1ULL << (bits - 8)) {
+ struct ja_test_node *node;
+ int count = 0;
+
+ rcu_read_lock();
+ ja_node = cds_ja_lookup(test_ja, key);
+ if (!ja_node) {
+ fprintf(stderr, "Error lookup node %" PRIu64 "\n", key);
+ assert(0);
+ }
+ cds_ja_for_each_duplicate_rcu(ja_node) {
+ count++;
+ }
+ if (count != nr_dup) {
+ fprintf(stderr, "Unexpected number of match for key %" PRIu64 ", expected %d, got %d.\n", key, nr_dup, count);
+ }
+ rcu_read_unlock();
+ if (key == 0)
+ zerocount++;
+ }
+ printf("OK\n");
+ if (bits > 8) {
+ printf("Test #3: unsuccessful key lookup (%u-bit).\n", bits);
+ zerocount = 0;
+ for (key = 0; key <= max_key && (key != 0 || zerocount < 1); key += 1ULL << (bits - 8)) {
+ rcu_read_lock();
+ ja_node = cds_ja_lookup(test_ja, key + 42);
+ if (ja_node) {
+ fprintf(stderr,
+ "Error unexpected lookup node %" PRIu64 "\n",
+ key + 42);
+ assert(0);
+ }
+ rcu_read_unlock();
+ if (key == 0)
+ zerocount++;
+ }
+ printf("OK\n");
+ }
+ printf("Test #4: remove keys (%u-bit).\n", bits);
+ zerocount = 0;
+ for (key = 0; key <= max_key && (key != 0 || zerocount < 1); key += 1ULL << (bits - 8)) {
+ int count = 0;
+
+ rcu_read_lock();
+ ja_node = cds_ja_lookup(test_ja, key);
+
+ cds_ja_for_each_duplicate_rcu(ja_node) {
+ struct cds_ja_node *test_ja_node;
+ struct ja_test_node *node;
+
+ count++;
+ node = caa_container_of(ja_node,
+ struct ja_test_node, node);
+ ret = cds_ja_del(test_ja, key, &node->node);
+ if (ret) {
+ fprintf(stderr, "Error (%d) removing node %" PRIu64 "\n", ret, key);
+ assert(0);
+ }
+ rcu_free_test_node(node);
+ test_ja_node = cds_ja_lookup(test_ja, key);
+ if (count < nr_dup && !test_ja_node) {
+ fprintf(stderr, "Error: no node found after deletion of some nodes of a key\n");
+ assert(0);
+ }
+ }
+ ja_node = cds_ja_lookup(test_ja, key);
+ if (ja_node) {
+ fprintf(stderr, "Error lookup %" PRIu64 ": %p (after delete) failed. Node is not expected.\n", key, ja_node);
+ assert(0);
+ }
+ rcu_read_unlock();
+ if (key == 0)
+ zerocount++;
+ }
+ printf("OK\n");
+
+ ret = test_free_all_nodes(test_ja);
+ if (ret) {
+ fprintf(stderr, "Error freeing all nodes\n");
+ return -1;
+ }
+
+ ret = cds_ja_destroy(test_ja);
+ if (ret) {
+ fprintf(stderr, "Error destroying judy array\n");
+ return -1;
+ }
+ printf("Test ends\n");
+
+ return 0;
+}
+
+static
+int do_sanity_test(void)
+{
+ int i, j, ret;
+
+ printf("Sanity test start.\n");
+
+ for (i = 0; i < 3; i++) {
+ ret = test_8bit_key();
+ if (ret) {
+ return ret;
+ }
+ rcu_quiescent_state();
+ }
+ ret = test_16bit_key();
+ if (ret) {
+ return ret;
+ }
+ rcu_quiescent_state();
+
+ /* key bits */
+ for (i = 8; i <= 64; i *= 2) {
+ /* nr of nodes per key */
+ for (j = 1; j < 4; j++) {
+ ret = test_sparse_key(i, j);
+ if (ret) {
+ return ret;
+ }
+ rcu_quiescent_state();
+ }
+ }
+ printf("Sanity test end.\n");
+
+ return 0;
+}
+
+enum urcu_ja_addremove {
+ AR_RANDOM = 0,
+ AR_ADD = 1,
+ AR_REMOVE = -1,
+}; /* 1: add, -1 remove, 0: random */
+
+static enum urcu_ja_addremove addremove; /* 1: add, -1 remove, 0: random */
+
+static
+void test_ja_rw_sigusr1_handler(int signo)
+{
+ switch (addremove) {
+ case AR_ADD:
+ printf("Add/Remove: random.\n");
+ addremove = AR_RANDOM;
+ break;
+ case AR_RANDOM:
+ printf("Add/Remove: remove only.\n");
+ addremove = AR_REMOVE;
+ break;
+ case AR_REMOVE:
+ printf("Add/Remove: add only.\n");
+ addremove = AR_ADD;
+ break;
+ }
+}
+
+static
+void *test_ja_rw_thr_reader(void *_count)
+{
+ unsigned long long *count = _count;
+ struct cds_ja_node *ja_node;
+ uint64_t key;
+
+ printf_verbose("thread_begin %s, tid %lu\n",
+ "reader", urcu_get_thread_id());
+
+ URCU_TLS(rand_lookup) = urcu_get_thread_id() ^ time(NULL);
+
+ set_affinity();
+
+ rcu_register_thread();
+
+ while (!test_go)
+ {
+ }
+ cmm_smp_mb();
+
+ for (;;) {
+ rcu_read_lock();
+
+ /* note: only looking up ulong keys */
+ key = ((unsigned long) rand_r(&URCU_TLS(rand_lookup)) % lookup_pool_size) + lookup_pool_offset;
+ key *= key_mul;
+ ja_node = cds_ja_lookup(test_ja, key);
+ if (!ja_node) {
+ if (validate_lookup) {
+ printf("[ERROR] Lookup cannot find initial node.\n");
+ exit(-1);
+ }
+ URCU_TLS(lookup_fail)++;
+ } else {
+ URCU_TLS(lookup_ok)++;
+ }
+ rcu_debug_yield_read();
+ if (caa_unlikely(rduration))
+ loop_sleep(rduration);
+ rcu_read_unlock();
+ URCU_TLS(nr_reads)++;
+ if (caa_unlikely(!test_duration_read()))
+ break;
+ if (caa_unlikely((URCU_TLS(nr_reads) & ((1 << 10) - 1)) == 0))
+ rcu_quiescent_state();
+ }
+
+ rcu_unregister_thread();
+
+ *count = URCU_TLS(nr_reads);
+ printf_verbose("thread_end %s, tid %lu\n",
+ "reader", urcu_get_thread_id());
+ printf_verbose("readid : %lx, lookupfail %lu, lookupok %lu\n",
+ pthread_self(), URCU_TLS(lookup_fail),
+ URCU_TLS(lookup_ok));
+ return ((void*)1);
+}
+
+static
+int is_add(void)
+{
+ return ((unsigned int) rand_r(&URCU_TLS(rand_lookup)) % 100) < add_ratio;
+}
+
+static
+void *test_ja_rw_thr_writer(void *_count)
+{
+ struct wr_count *count = _count;
+ uint64_t key;
+ int ret;
+
+ printf_verbose("thread_begin %s, tid %lu\n",
+ "writer", urcu_get_thread_id());
+
+ URCU_TLS(rand_lookup) = urcu_get_thread_id() ^ time(NULL);
+
+ set_affinity();
+
+ rcu_register_thread();
+
+ while (!test_go)
+ {
+ }
+ cmm_smp_mb();
+
+ for (;;) {
+ if ((addremove == AR_ADD)
+ || (addremove == AR_RANDOM && is_add())) {
+ struct ja_test_node *node = node_alloc();
+ struct cds_ja_node *ret_node;
+
+ /* note: only inserting ulong keys */
+ key = ((unsigned long) rand_r(&URCU_TLS(rand_lookup)) % write_pool_size) + write_pool_offset;
+ key *= key_mul;
+ ja_test_node_init(node, key);
+ rcu_read_lock();
+ if (add_unique) {
+ ret_node = cds_ja_add_unique(test_ja, key, &node->node);
+ if (ret_node != &node->node) {
+ free_test_node(node);
+ URCU_TLS(nr_addexist)++;
+ } else {
+ URCU_TLS(nr_add)++;
+ }
+ } else if (add_replace) {
+ assert(0); /* not implemented yet. */
+ } else {
+ ret = cds_ja_add(test_ja, key, &node->node);
+ if (ret) {
+ fprintf(stderr, "Error in cds_ja_add: %d\n", ret);
+ free_test_node(node);
+ } else {
+ URCU_TLS(nr_add)++;
+ }
+ }
+ rcu_read_unlock();
+ } else {
+ struct cds_ja_node *ja_node;
+ struct ja_test_node *node;
+
+ /* May delete */
+ /* note: only deleting ulong keys */
+ key = ((unsigned long) rand_r(&URCU_TLS(rand_lookup)) % write_pool_size) + write_pool_offset;
+ key *= key_mul;
+
+ rcu_read_lock();
+
+ ja_node = cds_ja_lookup(test_ja, key);
+ /* Remove first entry */
+ if (ja_node) {
+ node = caa_container_of(ja_node,
+ struct ja_test_node, node);
+ ret = cds_ja_del(test_ja, key, &node->node);
+ if (!ret) {
+ rcu_free_test_node(node);
+ URCU_TLS(nr_del)++;
+ } else {
+ URCU_TLS(nr_delnoent)++;
+ }
+ } else {
+ URCU_TLS(nr_delnoent)++;
+ }
+ rcu_read_unlock();
+ }
+
+ URCU_TLS(nr_writes)++;
+ if (caa_unlikely(!test_duration_write()))
+ break;
+ if (caa_unlikely(wdelay))
+ loop_sleep(wdelay);
+ if (caa_unlikely((URCU_TLS(nr_writes) & ((1 << 10) - 1)) == 0))
+ rcu_quiescent_state();
+ }
+
+ rcu_unregister_thread();
+
+ printf_verbose("thread_end %s, tid %lu\n",
+ "writer", urcu_get_thread_id());
+ printf_verbose("info id %lx: nr_add %lu, nr_addexist %lu, nr_del %lu, "
+ "nr_delnoent %lu\n", pthread_self(), URCU_TLS(nr_add),
+ URCU_TLS(nr_addexist), URCU_TLS(nr_del),
+ URCU_TLS(nr_delnoent));
+ count->update_ops = URCU_TLS(nr_writes);
+ count->add = URCU_TLS(nr_add);
+ count->add_exist = URCU_TLS(nr_addexist);
+ count->remove = URCU_TLS(nr_del);
+ return ((void*)2);
+}
+
+static
+int do_mt_populate_ja(void)
+{
+ uint64_t iter;
+ int ret;
+
+ if (!init_populate)
+ return 0;
+
+ printf("Starting rw test\n");
+
+ for (iter = init_pool_offset; iter < init_pool_offset + init_pool_size; iter++) {
+ struct ja_test_node *node = node_alloc();
+ uint64_t key;
+
+ /* note: only inserting ulong keys */
+ key = (unsigned long) iter;
+ key *= key_mul;
+ ja_test_node_init(node, key);
+ rcu_read_lock();
+ ret = cds_ja_add(test_ja, key, &node->node);
+ URCU_TLS(nr_add)++;
+ URCU_TLS(nr_writes)++;
+ rcu_read_unlock();
+ /* Hash table resize only occurs in call_rcu thread */
+ if (!(iter % 100))
+ rcu_quiescent_state();
+ if (ret) {
+ fprintf(stderr, "Error (%d) adding node %" PRIu64 "\n",
+ ret, key);
+ assert(0);
+ }
+ }
+ return 0;
+}
+
+static
+int do_mt_test(void)
+{
+ pthread_t *tid_reader, *tid_writer;
+ void *tret;
+ int ret, i, err;
+ unsigned long long *count_reader;
+ struct wr_count *count_writer;
+ unsigned long long tot_reads = 0, tot_writes = 0,
+ tot_add = 0, tot_add_exist = 0, tot_remove = 0;
+ unsigned int remain;
+
+ tid_reader = malloc(sizeof(*tid_reader) * nr_readers);
+ tid_writer = malloc(sizeof(*tid_writer) * nr_writers);
+ count_reader = malloc(sizeof(*count_reader) * nr_readers);
+ count_writer = malloc(sizeof(*count_writer) * nr_writers);
+
+ printf("Allocating Judy Array for %u-bit keys\n", key_bits);
+ test_ja = cds_ja_new(key_bits);
+ if (!test_ja) {
+ printf("Error allocating judy array.\n");
+ ret = -1;
+ goto end;
+ }
+
+ do_mt_populate_ja();
+
+ next_aff = 0;
+
+ for (i = 0; i < nr_readers; i++) {
+ err = pthread_create(&tid_reader[i],
+ NULL, test_ja_rw_thr_reader,
+ &count_reader[i]);
+ if (err != 0)
+ exit(1);
+ }
+ for (i = 0; i < nr_writers; i++) {
+ err = pthread_create(&tid_writer[i],
+ NULL, test_ja_rw_thr_writer,
+ &count_writer[i]);
+ if (err != 0)
+ exit(1);
+ }
+
+ cmm_smp_mb();
+
+ test_go = 1;
+
+ rcu_thread_offline_qsbr();
+
+ remain = duration;
+ do {
+ remain = sleep(remain);
+ } while (remain > 0);
+
+ test_stop = 1;
+
+ for (i = 0; i < nr_readers; i++) {
+ err = pthread_join(tid_reader[i], &tret);
+ if (err != 0)
+ exit(1);
+ tot_reads += count_reader[i];
+ }
+ for (i = 0; i < nr_writers; i++) {
+ err = pthread_join(tid_writer[i], &tret);
+ if (err != 0)
+ exit(1);
+ tot_writes += count_writer[i].update_ops;
+ tot_add += count_writer[i].add;
+ tot_add_exist += count_writer[i].add_exist;
+ tot_remove += count_writer[i].remove;
+ }
+ rcu_thread_online_qsbr();
+
+ ret = test_free_all_nodes(test_ja);
+ if (ret) {
+ fprintf(stderr, "Error freeing all nodes\n");
+ return -1;
+ }
+
+ ret = cds_ja_destroy(test_ja);
+ if (ret) {
+ fprintf(stderr, "Error destroying judy array\n");
+ goto end;
+ }
+
+ free(tid_reader);
+ free(tid_writer);
+ free(count_reader);
+ free(count_writer);
+ ret = 0;
+end:
+ return ret;
+}
+
+static
+int check_memory_leaks(void)
+{
+ unsigned long na, nf;
+
+ na = uatomic_read(&test_nodes_allocated);
+ nf = uatomic_read(&test_nodes_freed);
+ if (na != nf) {
+ fprintf(stderr, "Memory leak of %ld test nodes detected. Allocated: %lu, freed: %lu\n",
+ na - nf, na, nf);
+ return -1;
+ }
+ return 0;
+}
+
+int main(int argc, char **argv)
+{
+ int i, j, a, ret, err;
+ uint64_t key;
+ struct sigaction act;
+
+ if (argc < 4) {
+ show_usage(argc, argv);
+ return -1;
+ }
+
+ err = sscanf(argv[1], "%u", &nr_readers);
+ if (err != 1) {
+ show_usage(argc, argv);
+ return -1;
+ }
+
+ err = sscanf(argv[2], "%u", &nr_writers);
+ if (err != 1) {
+ show_usage(argc, argv);
+ return -1;
+ }
+
+ err = sscanf(argv[3], "%lu", &duration);
+ if (err != 1) {
+ show_usage(argc, argv);
+ return -1;
+ }
+
+ for (i = 4; i < argc; i++) {
+ if (argv[i][0] != '-')
+ continue;
+ switch (argv[i][1]) {
+#ifdef DEBUG_YIELD
+ case 'r':
+ yield_active |= YIELD_READ;
+ break;
+ case 'w':
+ yield_active |= YIELD_WRITE;
+ break;
+#endif
+ case 'a':
+ if (argc < i + 2) {
+ show_usage(argc, argv);
+ return -1;
+ }
+ a = atoi(argv[++i]);
+ cpu_affinities[next_aff++] = a;
+ use_affinity = 1;
+ printf_verbose("Adding CPU %d affinity\n", a);
+ break;
+ case 'c':
+ if (argc < i + 2) {
+ show_usage(argc, argv);
+ return -1;
+ }
+ rduration = atol(argv[++i]);
+ break;
+ case 'd':
+ if (argc < i + 2) {
+ show_usage(argc, argv);
+ return -1;
+ }
+ wdelay = atol(argv[++i]);
+ break;
+ case 'v':
+ verbose_mode = 1;
+ break;
+ case 'r':
+ add_ratio = atoi(argv[++i]);
+ break;
+ case 'k':
+ init_populate = 1;
+ break;
+ case 'R':
+ lookup_pool_offset = atol(argv[++i]);
+ break;
+ case 'S':
+ write_pool_offset = atol(argv[++i]);
+ break;
+ case 'T':
+ init_pool_offset = atol(argv[++i]);
+ break;
+ case 'M':
+ lookup_pool_size = atol(argv[++i]);
+ break;
+ case 'N':
+ write_pool_size = atol(argv[++i]);
+ break;
+ case 'O':
+ init_pool_size = atol(argv[++i]);
+ break;
+ case 'V':
+ validate_lookup = 1;
+ break;
+ case 't':
+ sanity_test = 1;
+ break;
+ case 'B':
+ key_bits = atol(argv[++i]);
+ break;
+ case 'm':
+ key_mul = atoll(argv[++i]);
+ break;
+ case 'u':
+ add_unique = 1;
+ break;
+ case 's':
+ add_replace = 1;
+ break;
+ case 'l':
+ leak_detection = 1;
+ break;
+ }
+ }
+
+ printf_verbose("running test for %lu seconds, %u readers, %u writers.\n",
+ duration, nr_readers, nr_writers);
+ printf_verbose("Writer delay : %lu loops.\n", wdelay);
+ printf_verbose("Reader duration : %lu loops.\n", rduration);
+ printf_verbose("Add ratio: %u%%.\n", add_ratio);
+ printf_verbose("Mode:%s%s.\n",
+ " add/remove",
+ add_unique ? " uniquify" : ( add_replace ? " replace" : " insert"));
+ printf_verbose("Key multiplication factor: %" PRIu64 ".\n", key_mul);
+ printf_verbose("Init pool size offset %lu size %lu.\n",
+ init_pool_offset, init_pool_size);
+ printf_verbose("Lookup pool size offset %lu size %lu.\n",
+ lookup_pool_offset, lookup_pool_size);
+ printf_verbose("Update pool size offset %lu size %lu.\n",
+ write_pool_offset, write_pool_size);
+ if (validate_lookup)
+ printf_verbose("Validating lookups.\n");
+ if (leak_detection)
+ printf_verbose("Memory leak dection activated.\n");
+ printf_verbose("thread %-6s, tid %lu\n",
+ "main", urcu_get_thread_id());
+
+ memset(&act, 0, sizeof(act));
+ ret = sigemptyset(&act.sa_mask);
+ if (ret == -1) {
+ perror("sigemptyset");
+ return -1;
+ }
+ act.sa_handler = test_ja_rw_sigusr1_handler;
+ act.sa_flags = SA_RESTART;
+ ret = sigaction(SIGUSR1, &act, NULL);
+ if (ret == -1) {
+ perror("sigaction");
+ return -1;
+ }
+
+ err = create_all_cpu_call_rcu_data(0);
+ if (err) {
+ printf("Per-CPU call_rcu() worker threads unavailable. Using default global worker thread.\n");
+ }
+
+ rcu_register_thread();
+
+ if (sanity_test) {
+ ret = do_sanity_test();
+ } else {
+ ret = do_mt_test();
+ }
+
+ /* Wait for in-flight call_rcu free to complete for leak detection */
+ rcu_barrier();
+
+ ret |= check_memory_leaks();
+
+ rcu_unregister_thread();
+ free_all_cpu_call_rcu_data();
+
+ if (ret) {
+ printf("Test ended with error: %d\n", ret);
+ }
+ return ret;
+}
--- /dev/null
+#ifndef _TEST_URCU_JA_H
+#define _TEST_URCU_JA_H
+
+/*
+ * test_urcu_ja.h
+ *
+ * Userspace RCU library - test program
+ *
+ * Copyright 2009-2012 - Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include <stdio.h>
+#include <pthread.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <unistd.h>
+#include <stdio.h>
+#include <assert.h>
+#include <sched.h>
+#include <errno.h>
+#include <signal.h>
+
+#include <urcu/tls-compat.h>
+#include "thread-id.h"
+
+#define DEFAULT_RAND_POOL 1000000
+
+/* Make this big enough to include the POWER5+ L3 cacheline size of 256B */
+#define CACHE_LINE_SIZE 4096
+
+/* hardcoded number of CPUs */
+#define NR_CPUS 16384
+
+#ifdef POISON_FREE
+#define poison_free(ptr) \
+ do { \
+ memset(ptr, 0x42, sizeof(*(ptr))); \
+ free(ptr); \
+ } while (0)
+#else
+#define poison_free(ptr) free(ptr)
+#endif
+
+#ifndef DYNAMIC_LINK_TEST
+#define _LGPL_SOURCE
+#else
+#define debug_yield_read()
+#endif
+#include <urcu-qsbr.h>
+#include <urcu/rcuja.h>
+#include <urcu-call-rcu.h>
+
+struct wr_count {
+ unsigned long update_ops;
+ unsigned long add;
+ unsigned long add_exist;
+ unsigned long remove;
+};
+
+extern DECLARE_URCU_TLS(unsigned int, rand_lookup);
+extern DECLARE_URCU_TLS(unsigned long, nr_add);
+extern DECLARE_URCU_TLS(unsigned long, nr_addexist);
+extern DECLARE_URCU_TLS(unsigned long, nr_del);
+extern DECLARE_URCU_TLS(unsigned long, nr_delnoent);
+extern DECLARE_URCU_TLS(unsigned long, lookup_fail);
+extern DECLARE_URCU_TLS(unsigned long, lookup_ok);
+
+extern struct cds_ja *test_ja;
+
+struct ja_test_node {
+ struct cds_ja_node node;
+ uint64_t key; /* for testing */
+ struct rcu_head head; /* delayed reclaim */
+};
+
+static inline struct ja_test_node *
+to_test_node(struct cds_ja_node *node)
+{
+ return caa_container_of(node, struct ja_test_node, node);
+}
+
+static inline
+void ja_test_node_init(struct ja_test_node *node, uint64_t key)
+{
+ cds_ja_node_init(&node->node);
+ node->key = key;
+}
+
+extern volatile int test_go, test_stop;
+
+extern unsigned long wdelay;
+
+extern unsigned long duration;
+
+/* read-side C.S. duration, in loops */
+extern unsigned long rduration;
+
+extern unsigned long init_populate;
+extern int add_only;
+
+extern unsigned long init_pool_offset, lookup_pool_offset, write_pool_offset;
+extern unsigned long init_pool_size,
+ lookup_pool_size,
+ write_pool_size;
+extern int validate_lookup;
+
+extern int count_pipe[2];
+
+static inline void loop_sleep(unsigned long l)
+{
+ while(l-- != 0)
+ caa_cpu_relax();
+}
+
+extern int verbose_mode;
+
+#define printf_verbose(fmt, args...) \
+ do { \
+ if (verbose_mode) \
+ printf(fmt, ## args); \
+ } while (0)
+
+extern unsigned int cpu_affinities[NR_CPUS];
+extern unsigned int next_aff;
+extern int use_affinity;
+
+extern pthread_mutex_t affinity_mutex;
+
+#ifndef HAVE_CPU_SET_T
+typedef unsigned long cpu_set_t;
+# define CPU_ZERO(cpuset) do { *(cpuset) = 0; } while(0)
+# define CPU_SET(cpu, cpuset) do { *(cpuset) |= (1UL << (cpu)); } while(0)
+#endif
+
+void set_affinity(void);
+
+/*
+ * returns 0 if test should end.
+ */
+static inline int test_duration_write(void)
+{
+ return !test_stop;
+}
+
+static inline int test_duration_read(void)
+{
+ return !test_stop;
+}
+
+extern DECLARE_URCU_TLS(unsigned long long, nr_writes);
+extern DECLARE_URCU_TLS(unsigned long long, nr_reads);
+
+extern unsigned int nr_readers;
+extern unsigned int nr_writers;
+
+void rcu_copy_mutex_lock(void);
+void rcu_copy_mutex_unlock(void);
+
+#endif /* _TEST_URCU_JA_H */
noinst_LIBRARIES = libtap.a
libtap_a_SOURCES = tap.c tap.h
+noinst_PROGRAMS = rcuja-testpop
+rcuja_testpop_SOURCES = rcuja-testpop.c
+
SCRIPT_LIST = tap.sh
dist_noinst_SCRIPTS = $(SCRIPT_LIST)
--- /dev/null
+/*
+ * rcuja/testpop.c
+ *
+ * Userspace RCU library - RCU Judy Array population size test
+ *
+ * Copyright 2012-2013 - Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/*
+ * This program generates random populations, and shows the largest
+ * sub-class generated, as well as the distribution of sub-class size
+ * for the largest sub-class of each population.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <time.h>
+#include <string.h>
+#include <limits.h>
+#include <assert.h>
+
+static int sel_pool_len = 50; /* default */
+static int nr_distrib = 2; /* default */
+//#define SEL_POOL_LEN 100
+//#define NR_POOLS 10000000ULL
+
+static uint8_t pool[256];
+static uint8_t nr_one[8];
+static uint8_t nr_2d_11[8][8];
+static uint8_t nr_2d_10[8][8];
+static uint8_t nr_2d_01[8][8];
+static uint8_t nr_2d_00[8][8];
+static int global_max_minsubclass_len = 0;
+
+static unsigned int subclass_len_distrib[256];
+
+static int verbose;
+
+static
+uint8_t random_char(void)
+{
+ return (uint8_t) random();
+}
+
+static
+void print_pool(void)
+{
+ int i;
+
+ printf("pool: ");
+ for (i = 0; i < sel_pool_len; i++) {
+ printf("%d ", (int) pool[i]);
+ }
+ printf("\n");
+}
+
+static
+void gen_pool(void)
+{
+ uint8_t src_pool[256];
+ int i;
+ int nr_left = 256;
+
+ memset(pool, 0, sizeof(pool));
+ for (i = 0; i < 256; i++)
+ src_pool[i] = (uint8_t) i;
+ for (i = 0; i < sel_pool_len; i++) {
+ int sel;
+
+ sel = random_char() % nr_left;
+ pool[i] = src_pool[sel];
+ src_pool[sel] = src_pool[nr_left - 1];
+ nr_left--;
+ }
+}
+
+static
+void count_pool(void)
+{
+ int i;
+
+ memset(nr_one, 0, sizeof(nr_one));
+ memset(nr_2d_11, 0, sizeof(nr_2d_11));
+ memset(nr_2d_10, 0, sizeof(nr_2d_10));
+ memset(nr_2d_01, 0, sizeof(nr_2d_01));
+ memset(nr_2d_00, 0, sizeof(nr_2d_00));
+
+ for (i = 0; i < sel_pool_len; i++) {
+ if (nr_distrib == 2) {
+ int j;
+
+ for (j = 0; j < 8; j++) {
+ if (pool[i] & (1U << j))
+ nr_one[j]++;
+ }
+ }
+
+ if (nr_distrib == 4) {
+ int bit_i, bit_j;
+
+ for (bit_i = 0; bit_i < 8; bit_i++) {
+ for (bit_j = 0; bit_j < bit_i; bit_j++) {
+ if (pool[i] & (1U << bit_i)) {
+ if (pool[i] & (1U << bit_j)) {
+ nr_2d_11[bit_i][bit_j]++;
+ } else {
+ nr_2d_10[bit_i][bit_j]++;
+ }
+ } else {
+ if (pool[i] & (1U << bit_j)) {
+ nr_2d_01[bit_i][bit_j]++;
+ } else {
+ nr_2d_00[bit_i][bit_j]++;
+ }
+ }
+ }
+ }
+ }
+ }
+}
+
+static
+void print_count(void)
+{
+ int i;
+
+ printf("pool distribution:\n");
+
+ if (nr_distrib == 2) {
+ printf(" 0 1\n");
+ printf("----------\n");
+ for (i = 0; i < 8; i++) {
+ printf("%3d %3d\n",
+ sel_pool_len - nr_one[i], nr_one[i]);
+ }
+ }
+
+ if (nr_distrib == 4) {
+ /* TODO */
+ }
+ printf("\n");
+}
+
+static
+void stat_count_1d(void)
+{
+ unsigned int overall_best_distance = UINT_MAX;
+ unsigned int overall_minsubclass_len;
+ int i;
+
+ for (i = 0; i < 8; i++) {
+ int distance_to_best;
+
+ distance_to_best = ((unsigned int) nr_one[i] << 1U) - sel_pool_len;
+ if (distance_to_best < 0)
+ distance_to_best = -distance_to_best;
+ if (distance_to_best < overall_best_distance) {
+ overall_best_distance = distance_to_best;
+ }
+ }
+ overall_minsubclass_len = (overall_best_distance + sel_pool_len) >> 1UL;
+ if (overall_minsubclass_len > global_max_minsubclass_len) {
+ global_max_minsubclass_len = overall_minsubclass_len;
+ }
+ subclass_len_distrib[overall_minsubclass_len]++;
+}
+
+static
+void stat_count_2d(void)
+{
+ int overall_best_distance = INT_MAX;
+ unsigned int overall_minsubclass_len = 0;
+ int bit_i, bit_j;
+
+ for (bit_i = 0; bit_i < 8; bit_i++) {
+ for (bit_j = 0; bit_j < bit_i; bit_j++) {
+ int distance_to_best[4], subclass_len[4];
+
+ distance_to_best[0] = ((unsigned int) nr_2d_11[bit_i][bit_j] << 2U) - sel_pool_len;
+ distance_to_best[1] = ((unsigned int) nr_2d_10[bit_i][bit_j] << 2U) - sel_pool_len;
+ distance_to_best[2] = ((unsigned int) nr_2d_01[bit_i][bit_j] << 2U) - sel_pool_len;
+ distance_to_best[3] = ((unsigned int) nr_2d_00[bit_i][bit_j] << 2U) - sel_pool_len;
+
+ subclass_len[0] = nr_2d_11[bit_i][bit_j];
+ subclass_len[1] = nr_2d_10[bit_i][bit_j];
+ subclass_len[2] = nr_2d_01[bit_i][bit_j];
+ subclass_len[3] = nr_2d_00[bit_i][bit_j];
+
+ /* Consider worse distance above best */
+ if (distance_to_best[1] > 0 && distance_to_best[1] > distance_to_best[0]) {
+ distance_to_best[0] = distance_to_best[1];
+ subclass_len[0] = subclass_len[1];
+ }
+ if (distance_to_best[2] > 0 && distance_to_best[2] > distance_to_best[0]) {
+ distance_to_best[0] = distance_to_best[2];
+ subclass_len[0] = subclass_len[2];
+ }
+ if (distance_to_best[3] > 0 && distance_to_best[3] > distance_to_best[0]) {
+ distance_to_best[0] = distance_to_best[3];
+ subclass_len[0] = subclass_len[3];
+ }
+
+ /*
+ * If our worse distance is better than overall,
+ * we become new best candidate.
+ */
+ if (distance_to_best[0] < overall_best_distance) {
+ overall_best_distance = distance_to_best[0];
+ overall_minsubclass_len = subclass_len[0];
+ }
+ }
+ }
+ if (overall_minsubclass_len > global_max_minsubclass_len) {
+ global_max_minsubclass_len = overall_minsubclass_len;
+ }
+ subclass_len_distrib[overall_minsubclass_len]++;
+}
+
+static
+void stat_count(void)
+{
+ switch (nr_distrib) {
+ case 2:
+ stat_count_1d();
+ break;
+ case 4:
+ stat_count_2d();
+ break;
+ default:
+ assert(0);
+ break;
+ }
+}
+
+static
+void print_distrib(void)
+{
+ int i;
+ unsigned long long tot = 0;
+
+ for (i = 0; i < 256; i++) {
+ tot += subclass_len_distrib[i];
+ }
+ if (tot == 0)
+ return;
+ printf("Distribution:\n");
+ for (i = 0; i < 256; i++) {
+ if (!subclass_len_distrib[i])
+ continue;
+ printf("(%u, %u, %llu%%) ",
+ i, subclass_len_distrib[i],
+ 100 * (unsigned long long) subclass_len_distrib[i] / tot);
+ }
+ printf("\n");
+}
+
+static
+void print_stat(uint64_t i)
+{
+ printf("after %llu pools, global_max_minsubclass_len: %d\n",
+ (unsigned long long) i, global_max_minsubclass_len);
+ print_distrib();
+}
+
+int main(int argc, char **argv)
+{
+ uint64_t i = 0;
+
+ srandom(time(NULL));
+
+ if (argc > 1) {
+ sel_pool_len = atoi(argv[1]);
+ if (sel_pool_len > 256 || sel_pool_len < 1) {
+ printf("Wrong pool len\n");
+ return -1;
+ }
+ }
+ printf("pool len: %d\n", sel_pool_len);
+
+ if (argc > 2) {
+ nr_distrib = atoi(argv[2]);
+ if (nr_distrib > 256 || nr_distrib < 1) {
+ printf("Wrong number of distributions\n");
+ return -1;
+ }
+ }
+
+ if (argc > 3) {
+ if (!strcmp(argv[3], "-v")) {
+ verbose = 1;
+ }
+ }
+
+ printf("pool distributions: %d\n", nr_distrib);
+
+ if (nr_distrib != 2 && nr_distrib != 4) {
+ printf("Wrong number of distributions. Only 2 and 4 supported.\n");
+ return -1;
+ }
+
+ //for (i = 0; i < NR_POOLS; i++) {
+ while (1) {
+ gen_pool();
+ count_pool();
+ if (verbose) {
+ print_pool();
+ print_count();
+ }
+ stat_count();
+ if (!(i % 100000ULL))
+ print_stat(i);
+ i++;
+ }
+ print_stat(i);
+ print_distrib();
+
+ return 0;
+}