3 * TODO: keys are currently assumed <= sizeof(void *). Key target never freed.
10 #include <arch_atomic.h>
13 #include <urcu-defer.h>
16 #include <urcu/jhash.h>
23 struct rcu_ht_node
*next
;
29 struct rcu_ht_node
**tbl
;
31 void (*free_fct
)(void *data
); /* fct to free data */
35 pthread_mutex_t resize_mutex
; /* resize mutex: add/del mutex */
36 int resize_ongoing
; /* fast-path resize check */
39 struct rcu_ht
*ht_new(ht_hash_fct hash_fct
, void (*free_fct
)(void *data
),
40 unsigned long init_size
, uint32_t keylen
,
45 ht
= calloc(1, sizeof(struct rcu_ht
));
46 ht
->hash_fct
= hash_fct
;
47 ht
->free_fct
= free_fct
;
50 ht
->hashseed
= hashseed
;
51 /* this mutex should not nest in read-side C.S. */
52 pthread_mutex_init(&ht
->resize_mutex
, NULL
);
53 ht
->resize_ongoing
= 0;
54 ht
->tbl
= calloc(init_size
, sizeof(struct rcu_ht_node
*));
58 void *ht_lookup(struct rcu_ht
*ht
, void *key
)
61 struct rcu_ht_node
*node
;
64 hash
= ht
->hash_fct(key
, ht
->keylen
, ht
->hashseed
) % ht
->size
;
65 smp_read_barrier_depends(); /* read size before links */
68 node
= rcu_dereference(ht
->tbl
[hash
]);
74 if (node
->key
== key
) {
78 node
= rcu_dereference(node
->next
);
86 * Will re-try until either:
87 * - The key is already there (-EEXIST)
88 * - We successfully add the key at the head of a table bucket.
90 int ht_add(struct rcu_ht
*ht
, void *key
, void *data
)
92 struct rcu_ht_node
*node
, *old_head
, *new_head
;
96 new_head
= calloc(1, sizeof(struct rcu_ht_node
));
98 new_head
->data
= data
;
99 /* here comes the fun and tricky part.
100 * Add at the beginning with a cmpxchg.
101 * Hold a read lock between the moment the first element is read
102 * and the nodes traversal (to find duplicates). This ensures
103 * the head pointer has not been reclaimed when cmpxchg is done.
104 * Always adding at the head ensures that we would have to
105 * re-try if a new item has been added concurrently. So we ensure that
106 * we never add duplicates. */
110 if (unlikely(ht
->resize_ongoing
)) {
113 * Wait for resize to complete before continuing.
115 ret
= pthread_mutex_lock(&ht
->resize_mutex
);
117 ret
= pthread_mutex_unlock(&ht
->resize_mutex
);
122 hash
= ht
->hash_fct(key
, ht
->keylen
, ht
->hashseed
) % ht
->size
;
124 old_head
= node
= rcu_dereference(ht
->tbl
[hash
]);
129 if (node
->key
== key
) {
133 node
= rcu_dereference(node
->next
);
135 new_head
->next
= old_head
;
136 if (rcu_cmpxchg_pointer(&ht
->tbl
[hash
], old_head
, new_head
) != old_head
)
142 /* restart loop, release and re-take the read lock to be kind to GP */
149 * Restart until we successfully remove the entry, or no entry is left
150 * ((void *)(unsigned long)-ENOENT).
151 * Deal with concurrent stealers by verifying that there are no element
152 * in the list still pointing to the element stolen. (del_node)
154 void *ht_steal(struct rcu_ht
*ht
, void *key
)
156 struct rcu_ht_node
**prev
, *node
, *del_node
= NULL
;
164 if (unlikely(ht
->resize_ongoing
)) {
167 * Wait for resize to complete before continuing.
169 ret
= pthread_mutex_lock(&ht
->resize_mutex
);
171 ret
= pthread_mutex_unlock(&ht
->resize_mutex
);
176 hash
= ht
->hash_fct(key
, ht
->keylen
, ht
->hashseed
) % ht
->size
;
178 prev
= &ht
->tbl
[hash
];
179 node
= rcu_dereference(*prev
);
185 data
= (void *)(unsigned long)-ENOENT
;
189 if (node
->key
== key
) {
193 node
= rcu_dereference(*prev
);
195 /* Found it ! pointer to object is in "prev" */
196 if (rcu_cmpxchg_pointer(prev
, node
, node
->next
) != node
)
202 * From that point, we own node. Note that there can still be concurrent
203 * RCU readers using it. We can free it outside of read lock after a GP.
208 call_rcu(free
, node
);
215 /* restart loop, release and re-take the read lock to be kind to GP */
221 int ht_delete(struct rcu_ht
*ht
, void *key
)
225 data
= ht_steal(ht
, key
);
226 if (data
&& data
!= (void *)(unsigned long)-ENOENT
) {
228 call_rcu(ht
->free_fct
, data
);
235 /* Delete all old elements. Allow concurrent writer accesses. */
236 int ht_delete_all(struct rcu_ht
*ht
)
239 struct rcu_ht_node
**prev
, *node
, *inext
;
244 * Mutual exclusion with resize operations, but leave add/steal execute
245 * concurrently. This is OK because we operate only on the heads.
247 ret
= pthread_mutex_lock(&ht
->resize_mutex
);
250 for (i
= 0; i
< ht
->size
; i
++) {
254 * Cut the head. After that, we own the first element.
256 node
= rcu_xchg_pointer(prev
, NULL
);
262 * We manage a list shared with concurrent writers and readers.
263 * Note that a concurrent add may or may not be deleted by us,
264 * depending if it arrives before or after the head is cut.
265 * "node" points to our first node. Remove first elements
272 inext
= rcu_xchg_pointer(prev
, NULL
);
274 * "node" is the first element of the list we have cut.
275 * We therefore own it, no concurrent writer may delete
276 * it. There can only be concurrent lookups. Concurrent
277 * add can only be done on a bucket head, but we've cut
278 * it already. inext is also owned by us, because we
279 * have exchanged it for "NULL". It will therefore be
280 * safe to use it after a G.P.
284 call_rcu(ht
->free_fct
, node
->data
);
285 call_rcu(free
, node
);
294 ret
= pthread_mutex_unlock(&ht
->resize_mutex
);
300 * Should only be called when no more concurrent readers nor writers can
301 * possibly access the table.
303 int ht_destroy(struct rcu_ht
*ht
)
307 ret
= ht_delete_all(ht
);
313 static void ht_resize_grow(struct rcu_ht
*ht
)
315 unsigned long i
, new_size
, old_size
;
316 struct rcu_ht_node
**new_tbl
, **old_tbl
;
317 struct rcu_ht_node
*node
, *new_node
, *tmp
;
325 new_size
= old_size
<< 1;
326 new_tbl
= calloc(new_size
, sizeof(struct rcu_ht_node
*));
328 for (i
= 0; i
< old_size
; i
++) {
330 * Re-hash each entry, insert in new table.
331 * It's important that a reader looking for a key _will_ find it
332 * if it's in the table.
333 * Copy each node. (just the node, not ->data)
337 hash
= ht
->hash_fct(node
->key
, ht
->keylen
, ht
->hashseed
)
339 new_node
= malloc(sizeof(struct rcu_ht_node
));
340 new_node
->key
= node
->key
;
341 new_node
->data
= node
->data
;
342 new_node
->next
= new_tbl
[i
]; /* add to head */
343 new_tbl
[i
] = new_node
;
350 smp_wmb(); /* write links and table before changing size */
353 /* Ensure all concurrent lookups use new size and table */
356 for (i
= 0; i
< old_size
; i
++) {
367 static void ht_resize_shrink(struct rcu_ht
*ht
)
369 unsigned long i
, new_size
;
370 struct rcu_ht_node
**new_tbl
;
371 struct rcu_ht_node
**prev
, *node
;
376 new_size
= ht
->size
>> 1;
378 for (i
= 0; i
< new_size
; i
++) {
379 /* Link end with first entry of 2*i */
386 *prev
= ht
->tbl
[i
<< 1];
388 smp_wmb(); /* write links before changing size */
391 /* Ensure all concurrent lookups use new size */
394 new_tbl
= realloc(ht
->tbl
, new_size
* sizeof(struct rcu_ht_node
*));
395 /* shrinking, pointers should not move */
396 assert(new_tbl
== ht
->tbl
);
400 * growth: >0: *2, <0: /2
402 void ht_resize(struct rcu_ht
*ht
, int growth
)
406 ret
= pthread_mutex_lock(&ht
->resize_mutex
);
408 ht
->resize_ongoing
= 1;
410 /* All add/remove are waiting on the mutex. */
414 ht_resize_shrink(ht
);
416 ht
->resize_ongoing
= 0;
417 ret
= pthread_mutex_unlock(&ht
->resize_mutex
);
422 * Expects keys <= than pointer size to be encoded in the pointer itself.
424 uint32_t ht_jhash(void *key
, uint32_t length
, uint32_t initval
)
429 if (length
<= sizeof(void *))
433 ret
= jhash(vkey
, length
, initval
);