4 * Userspace RCU library - RCU Judy Array Range Support
6 * Copyright 2012-2013 - Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
8 * This library is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License as published by the Free Software Foundation; either
11 * version 2.1 of the License, or (at your option) any later version.
13 * This library is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Lesser General Public License for more details.
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with this library; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
30 #include <urcu/rcuja.h>
31 #include <urcu/compiler.h>
32 #include <urcu/arch.h>
33 #include <urcu-pointer.h>
34 #include <urcu/uatomic.h>
35 #include <urcu/rcuja-range.h>
36 #include <urcu-flavor.h>
38 #include "rcuja-internal.h"
41 * Discussion about order of lookup/lock vs allocated node deletion.
43 * - If node deletion returns before call to
44 * cds_ja_range_lookup(), the node will not be found by lookup.
45 * - If node deletion is called after cds_ja_range_lock() returns a
46 * non-NULL range, the deletion will wait until the lock is released
47 * before it takes place.
48 * - If node deletion call/return overlaps with the call to
49 * cds_ja_range_lookup() and return from cds_ja_range_lock(), the node
50 * may or may not be found by each of cds_ja_range_lookup() and
51 * cds_ja_range_lock().
55 * Discussion about order of lookup/lock vs allocated node add. Assuming
56 * no concurrent delete.
58 * - If node add returns before call to
59 * cds_ja_range_lookup(), the node will be found by lookup.
60 * - If node add is called after cds_ja_range_lookup returns, the node
61 * will not be found by lookup.
62 * - If node add call/return overlaps with the call to and return from
63 * cds_ja_range_lookup(), the node may or may not be found.
64 * - If node add call/return overlaps with call to cds_ja_range_lookup()
65 * and return from cds_ja_range_lock(), in the specific case where
66 * cds_ja_range_lookup() _does_ succeed, then cds_ja_range_lock() will
67 * succeed (still assuming no concurrent deletion).
71 * Discussion: concurrent deletion of contiguous allocated ranges.
73 * Ensuring that merge of contiguous free ranges is always performed, we
74 * need to ensure locking of concurrent removal of contiguous allocated
75 * ranges one with respect to another. This is done by locking the
76 * ranges prior to and after the range to remove, even if that range is
77 * allocated. This serializes removal of contiguous ranges. The only
78 * cases for which there is no range to lock is when removing an
79 * allocated range starting at 0, and/or ending at the end of the key
84 * Discussion: concurrent lookup vs add
86 * When executed concurrently with node add, the inequality
87 * lookup can see no node for the looked-up range, because a range can
88 * be shrinked. This can happen if, for instance, we lookup key 2
89 * between addition of a "free" range for values [1,2], and removal of
90 * the old "free" range for values [0,2]. We would then fail to observe
91 * any range for key 2. Given that the lookup is performed during a
92 * range transition, we can safely return that there is no allocated
97 * Discussion: concurrent lookup vs del
99 * There is no special case for lookups performed concurrently with node
100 * del, because node del either replaces the node with the exact same
101 * start key (see duplicates guarantees), or replaces it with a larger
102 * range containing the prior range. Therefore, we are sure that
103 * inequality lookups will see the larger range before the old range is
104 * deleted, in whichever direction the lookup is performed.
108 * Discussion of the type state transitions.
110 * State transitions of "type" always go from either:
112 * CDS_JA_RANGE_FREE -> CDS_JA_RANGE_REMOVED
114 * CDS_JA_RANGE_ALLOCATED -> CDS_JA_RANGE_REMOVED
116 * A range type never changes otherwise.
119 //#define RANGE_DEBUG
124 #define dbg_printf(fmt, args...) \
125 fprintf(stderr, "[debug rcuja-range %lu %s()@%s:%u] " fmt, \
126 (unsigned long) gettid(), __func__, \
127 __FILE__, __LINE__, ## args)
129 #define dbg_printf(fmt, args...) \
131 /* do nothing but check printf format */ \
133 fprintf(stderr, "[debug rcuja-range %lu %s()@%s:%u] " fmt, \
134 (unsigned long) gettid(), __func__, \
135 __FILE__, __LINE__, ## args); \
139 enum cds_ja_range_type
{
140 CDS_JA_RANGE_ALLOCATED
,
142 CDS_JA_RANGE_REMOVED
,
146 * Range goes from start (inclusive) to end (inclusive).
147 * Range start is used as node key in the Judy array.
149 struct cds_ja_range
{
151 struct cds_ja_node ja_node
;
152 pthread_mutex_t lock
;
154 enum cds_ja_range_type type
;
156 /* not required on lookup fast-path */
158 struct rcu_head head
;
161 struct cds_ja_range
*cds_ja_range_lookup(struct cds_ja
*ja
, uint64_t key
)
163 struct cds_ja_node
*node
, *last_node
;
164 struct cds_ja_range
*range
;
166 dbg_printf("key: %" PRIu64
"\n", key
);
167 node
= cds_ja_lookup_below_equal(ja
, key
, NULL
);
171 * Get the last of duplicate chain. Adding a node to Judy array
172 * duplicates inserts them at the end of the chain.
174 cds_ja_for_each_duplicate_rcu(node
)
176 range
= caa_container_of(last_node
, struct cds_ja_range
, ja_node
);
178 /* Check if range is currently hidden by concurrent add */
179 if (range
->end
< key
)
183 * If last node in the duplicates is removed or free, we can
184 * consider that either a removal or add operation is in
185 * progress, or removal is the last completed operation to
186 * update this range. We can therefore consider that this area
189 if (range
->type
!= CDS_JA_RANGE_ALLOCATED
)
192 * We found an allocated range. We can return it for use with
193 * RCU read-side protection for existence. However, we have no
194 * mutual exclusion against removal at this point.
200 * Provide mutual exclusion against removal.
202 struct cds_ja_range
*cds_ja_range_lock(struct cds_ja_range
*range
)
204 pthread_mutex_lock(&range
->lock
);
206 if (range
->type
== CDS_JA_RANGE_REMOVED
)
211 pthread_mutex_unlock(&range
->lock
);
215 void cds_ja_range_unlock(struct cds_ja_range
*range
)
217 pthread_mutex_unlock(&range
->lock
);
220 void cds_ja_range_get_values(const struct cds_ja_range
*range
,
221 uint64_t *start
, uint64_t *end
, void **priv
)
223 *start
= range
->start
;
229 struct cds_ja_range
*range_create(
230 uint64_t start
, /* inclusive */
231 uint64_t end
, /* inclusive */
233 enum cds_ja_range_type type
)
235 struct cds_ja_range
*range
;
237 range
= calloc(sizeof(*range
), 1);
240 range
->start
= start
;
244 pthread_mutex_init(&range
->lock
, NULL
);
249 void free_range_cb(struct rcu_head
*head
)
251 struct cds_ja_range
*range
=
252 caa_container_of(head
, struct cds_ja_range
, head
);
257 void free_range(struct cds_ja_range
*range
)
263 void rcu_free_range(struct cds_ja
*ja
, struct cds_ja_range
*range
)
265 cds_lfht_rcu_flavor(ja
->ht
)->update_call_rcu(&range
->head
,
269 int cds_ja_range_add(struct cds_ja
*ja
,
270 uint64_t start
, /* inclusive */
271 uint64_t end
, /* inclusive */
274 struct cds_ja_node
*old_node
;
275 struct cds_ja_range
*old_range
, *new_range
, *ranges
[3];
276 unsigned int nr_ranges
, i
;
279 if (start
> end
|| end
== UINT64_MAX
|| end
> ja
->key_max
)
283 dbg_printf("start: %" PRIu64
", end: %" PRIu64
", priv %p\n",
286 * Find if requested range is entirely contained within a single
289 old_node
= cds_ja_lookup_below_equal(ja
, start
, NULL
);
290 /* Range hidden by concurrent add */
294 old_range
= caa_container_of(old_node
, struct cds_ja_range
, ja_node
);
296 /* Range hidden by concurrent add */
297 if (old_range
->end
< start
)
300 /* We now know that old_range overlaps with our range */
301 switch (CMM_LOAD_SHARED(old_range
->type
)) {
302 case CDS_JA_RANGE_ALLOCATED
:
304 case CDS_JA_RANGE_FREE
:
306 case CDS_JA_RANGE_REMOVED
:
310 /* We do not fit entirely within the range */
311 if (old_range
->end
< end
)
314 pthread_mutex_lock(&old_range
->lock
);
316 if (old_range
->type
== CDS_JA_RANGE_REMOVED
) {
317 pthread_mutex_unlock(&old_range
->lock
);
321 /* Create replacement ranges: at most 2 free and 1 allocated */
322 if (start
== old_range
->start
) {
323 if (end
== old_range
->end
) {
325 ranges
[0] = new_range
= range_create(start
, end
,
326 priv
, CDS_JA_RANGE_ALLOCATED
);
330 assert(old_range
->end
> end
);
331 ranges
[0] = new_range
= range_create(start
, end
,
332 priv
, CDS_JA_RANGE_ALLOCATED
);
333 ranges
[1] = range_create(end
+ 1, old_range
->end
,
334 NULL
, CDS_JA_RANGE_FREE
);
338 if (end
== old_range
->end
) {
340 assert(old_range
->start
< start
);
341 ranges
[0] = range_create(old_range
->start
, start
- 1,
342 NULL
, CDS_JA_RANGE_FREE
);
343 ranges
[1] = new_range
= range_create(start
, end
,
344 priv
, CDS_JA_RANGE_ALLOCATED
);
348 assert(old_range
->start
< start
);
349 assert(old_range
->end
> end
);
350 ranges
[0] = range_create(old_range
->start
, start
- 1,
351 NULL
, CDS_JA_RANGE_FREE
);
352 ranges
[1] = new_range
= range_create(start
, end
,
353 priv
, CDS_JA_RANGE_ALLOCATED
);
354 ranges
[2] = range_create(end
+ 1, old_range
->end
,
355 NULL
, CDS_JA_RANGE_FREE
);
360 /* Add replacement ranges to Judy array */
361 for (i
= 0; i
< nr_ranges
; i
++) {
362 dbg_printf("ADD RANGE: %" PRIu64
"-%" PRIu64
" %s.\n",
363 ranges
[i
]->start
, ranges
[i
]->end
,
364 ranges
[i
]->type
== CDS_JA_RANGE_ALLOCATED
?
365 "allocated" : "free");
366 pthread_mutex_lock(&ranges
[i
]->lock
);
367 ret
= cds_ja_add(ja
, ranges
[i
]->start
, &ranges
[i
]->ja_node
);
372 * We add replacement ranges _before_ removing old ranges, so
373 * concurrent traversals will always see one or the other. This
374 * is OK because we temporarily have a duplicate key, and Judy
375 * arrays provide key existence guarantee for lookups performed
376 * concurrently with add followed by del of duplicate keys.
379 dbg_printf("REM RANGE: %" PRIu64
"-%" PRIu64
" %s.\n",
380 old_range
->start
, old_range
->end
,
381 old_range
->type
== CDS_JA_RANGE_ALLOCATED
?
382 "allocated" : "free");
383 /* Remove old free range */
384 ret
= cds_ja_del(ja
, old_range
->start
, &old_range
->ja_node
);
386 old_range
->type
= CDS_JA_RANGE_REMOVED
;
387 pthread_mutex_unlock(&old_range
->lock
);
388 for (i
= 0; i
< nr_ranges
; i
++)
389 pthread_mutex_unlock(&ranges
[i
]->lock
);
391 rcu_free_range(ja
, old_range
);
393 dbg_printf("<SUCCEED>\n");
398 int cds_ja_range_del(struct cds_ja
*ja
, struct cds_ja_range
*range
)
400 struct cds_ja_node
*prev_node
, *next_node
;
401 struct cds_ja_range
*new_range
;
402 struct cds_ja_range
*merge_ranges
[3], *lock_ranges
[3];
403 unsigned int nr_merge
, nr_lock
, i
;
408 dbg_printf("start: %" PRIu64
", end %" PRIu64
", priv: %p\n",
409 range
->start
, range
->end
, range
->priv
);
415 * Range has been concurrently updated.
417 if (range
->type
!= CDS_JA_RANGE_ALLOCATED
)
420 if (range
->start
> 0) {
421 struct cds_ja_range
*prev_range
;
423 prev_node
= cds_ja_lookup_below_equal(ja
, range
->start
- 1,
428 prev_range
= caa_container_of(prev_node
,
429 struct cds_ja_range
, ja_node
);
430 /* Prev range temporarily hidden due to concurrent add. */
431 if (prev_range
->end
!= range
->start
- 1)
434 lock_ranges
[nr_lock
++] = prev_range
;
435 if (prev_range
->type
!= CDS_JA_RANGE_ALLOCATED
)
436 merge_ranges
[nr_merge
++] = prev_range
;
439 lock_ranges
[nr_lock
++] = range
;
440 merge_ranges
[nr_merge
++] = range
;
442 if (range
->end
< UINT64_MAX
- 1) {
443 struct cds_ja_range
*next_range
;
445 next_node
= cds_ja_lookup_below_equal(ja
, range
->end
+ 1,
447 /* Next range temporarily hidden due to concurrent add. */
451 next_range
= caa_container_of(next_node
,
452 struct cds_ja_range
, ja_node
);
453 if (next_range
->start
!= range
->end
+ 1)
456 lock_ranges
[nr_lock
++] = next_range
;
457 if (next_range
->type
!= CDS_JA_RANGE_ALLOCATED
)
458 merge_ranges
[nr_merge
++] = next_range
;
461 /* Acquire locks in increasing key order for range merge */
462 for (i
= 0; i
< nr_lock
; i
++)
463 pthread_mutex_lock(&lock_ranges
[i
]->lock
);
464 if (range
->type
!= CDS_JA_RANGE_ALLOCATED
) {
468 /* Ensure they are valid */
469 for (i
= 0; i
< nr_lock
; i
++) {
470 if (lock_ranges
[i
]->type
== CDS_JA_RANGE_REMOVED
)
474 /* Create new free range */
475 start
= merge_ranges
[0]->start
;
476 end
= merge_ranges
[nr_merge
- 1]->end
;
477 new_range
= range_create(start
, end
, NULL
, CDS_JA_RANGE_FREE
);
478 pthread_mutex_lock(&new_range
->lock
);
480 dbg_printf("ADD RANGE: %" PRIu64
"-%" PRIu64
" %s.\n",
481 new_range
->start
, new_range
->end
,
482 new_range
->type
== CDS_JA_RANGE_ALLOCATED
?
483 "allocated" : "free");
485 ret
= cds_ja_add(ja
, start
, &new_range
->ja_node
);
488 /* Remove old ranges */
489 for (i
= 0; i
< nr_merge
; i
++) {
491 dbg_printf("REM RANGE: %" PRIu64
"-%" PRIu64
" %s.\n",
492 merge_ranges
[i
]->start
, merge_ranges
[i
]->end
,
493 merge_ranges
[i
]->type
== CDS_JA_RANGE_ALLOCATED
?
494 "allocated" : "free");
495 ret
= cds_ja_del(ja
, merge_ranges
[i
]->start
,
496 &merge_ranges
[i
]->ja_node
);
498 merge_ranges
[i
]->type
= CDS_JA_RANGE_REMOVED
;
500 for (i
= 0; i
< nr_lock
; i
++)
501 pthread_mutex_unlock(&lock_ranges
[i
]->lock
);
502 pthread_mutex_unlock(&new_range
->lock
);
503 /* Free old merged ranges */
504 for (i
= 0; i
< nr_merge
; i
++)
505 rcu_free_range(ja
, merge_ranges
[i
]);
507 dbg_printf("<SUCCEED>\n");
513 for (i
= 0; i
< nr_lock
; i
++)
514 pthread_mutex_unlock(&lock_ranges
[i
]->lock
);
518 for (i
= 0; i
< nr_lock
; i
++)
519 pthread_mutex_unlock(&lock_ranges
[i
]->lock
);
523 struct cds_ja
*_cds_ja_range_new(unsigned int key_bits
,
524 const struct rcu_flavor_struct
*flavor
)
526 struct cds_ja_range
*range
;
530 ja
= _cds_ja_new(key_bits
, flavor
);
533 range
= range_create(0, UINT64_MAX
- 1, NULL
, CDS_JA_RANGE_FREE
);
536 cds_lfht_rcu_flavor(ja
->ht
)->read_lock();
537 ret
= cds_ja_add(ja
, 0, &range
->ja_node
);
538 cds_lfht_rcu_flavor(ja
->ht
)->read_unlock();
546 ret
= cds_ja_destroy(ja
);
551 int cds_ja_range_validate(struct cds_ja
*ja
)
553 uint64_t iter_key
, start
, end
, last_end
= UINT64_MAX
;
554 struct cds_ja_node
*ja_node
, *last_node
;
557 cds_lfht_rcu_flavor(ja
->ht
)->read_lock();
558 cds_ja_for_each_key_rcu(ja
, iter_key
, ja_node
) {
559 struct cds_ja_range
*range
;
560 struct cds_ja_node
*first_node
;
562 first_node
= ja_node
;
563 cds_ja_for_each_duplicate_rcu(ja_node
)
565 if (last_node
!= first_node
) {
566 struct cds_ja_range
*first_range
= caa_container_of(first_node
,
567 struct cds_ja_range
, ja_node
);
568 struct cds_ja_range
*last_range
= caa_container_of(last_node
,
569 struct cds_ja_range
, ja_node
);
570 fprintf(stderr
, "found duplicate node: first %" PRIu64
"-%" PRIu64
" last %" PRIu64
"-%" PRIu64
"\n",
571 first_range
->start
, first_range
->end
, last_range
->start
, last_range
->end
);
574 range
= caa_container_of(last_node
,
575 struct cds_ja_range
, ja_node
);
576 start
= range
->start
;
578 if (last_end
!= UINT64_MAX
) {
579 if (start
!= last_end
+ 1) {
580 fprintf(stderr
, "ja range discrepancy: last end: %" PRIu64
", start: %" PRIu64
"\n",
587 if (last_end
!= UINT64_MAX
- 1) {
588 fprintf(stderr
, "ja range error: end of last range is: %" PRIu64
"\n",
592 cds_lfht_rcu_flavor(ja
->ht
)->read_unlock();
596 int cds_ja_range_destroy(struct cds_ja
*ja
,
597 void (*free_priv
)(void *ptr
))
600 struct cds_ja_node
*ja_node
;
603 cds_lfht_rcu_flavor(ja
->ht
)->read_lock();
604 cds_ja_for_each_key_rcu(ja
, key
, ja_node
) {
605 struct cds_ja_node
*tmp_node
;
607 cds_ja_for_each_duplicate_safe_rcu(ja_node
, tmp_node
) {
608 struct cds_ja_range
*range
;
610 range
= caa_container_of(ja_node
,
611 struct cds_ja_range
, ja_node
);
612 ret
= cds_ja_del(ja
, key
, &range
->ja_node
);
616 free_priv(range
->priv
);
617 /* Alone using Judy array, OK to free now */
621 cds_lfht_rcu_flavor(ja
->ht
)->read_unlock();
622 return cds_ja_destroy(ja
);
625 cds_lfht_rcu_flavor(ja
->ht
)->read_unlock();
This page took 0.045716 seconds and 4 git commands to generate.