rcu lf hash table runtime fixes
[userspace-rcu.git] / rculfhash.c
CommitLineData
5e28c532 1/*
abc490a1
MD
2 * rculfhash.c
3 *
4 * Userspace RCU library - Lock-Free Expandable RCU Hash Table
5 *
6 * Copyright 2010-2011 - Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
7 *
8 * This library is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License as published by the Free Software Foundation; either
11 * version 2.1 of the License, or (at your option) any later version.
12 *
13 * This library is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Lesser General Public License for more details.
17 *
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with this library; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
5e28c532
MD
21 */
22
2ed95849
MD
23#define _LGPL_SOURCE
24#include <stdlib.h>
e0ba718a
MD
25#include <errno.h>
26#include <assert.h>
27#include <stdio.h>
abc490a1 28#include <stdint.h>
f000907d 29#include <string.h>
e0ba718a 30
2ed95849 31#include <urcu.h>
abc490a1 32#include <urcu-call-rcu.h>
a42cc659
MD
33#include <urcu/arch.h>
34#include <urcu/uatomic.h>
674f7a69 35#include <urcu/jhash.h>
a42cc659 36#include <urcu/compiler.h>
abc490a1 37#include <urcu/rculfhash.h>
5e28c532 38#include <stdio.h>
464a1ec9 39#include <pthread.h>
44395fb7 40
f000907d
MD
41#define BUCKET_SIZE_RESIZE_THRESHOLD 32
42#define MAX_NR_BUCKETS 1048576 /* 1M buckets */
2ed95849 43
abc490a1
MD
44#ifndef max
45#define max(a, b) ((a) > (b) ? (a) : (b))
46#endif
2ed95849 47
395270b6 48struct rcu_table {
abc490a1
MD
49 unsigned long size; /* always a power of 2 */
50 struct rcu_head head;
395270b6
MD
51 struct rcu_ht_node *tbl[0];
52};
53
2ed95849 54struct rcu_ht {
395270b6 55 struct rcu_table *t; /* shared */
2ed95849 56 ht_hash_fct hash_fct;
abc490a1 57 void *hashseed;
464a1ec9 58 pthread_mutex_t resize_mutex; /* resize mutex: add/del mutex */
abc490a1
MD
59 unsigned long target_size;
60 void (*ht_call_rcu)(struct rcu_head *head,
61 void (*func)(struct rcu_head *head));
2ed95849
MD
62};
63
abc490a1
MD
64struct rcu_resize_work {
65 struct rcu_head head;
2ed95849 66 struct rcu_ht *ht;
abc490a1 67};
2ed95849 68
abc490a1 69static
f000907d 70void ht_resize_lazy(struct rcu_ht *ht, struct rcu_table *t, int growth);
abc490a1
MD
71
72static
f000907d
MD
73void check_resize(struct rcu_ht *ht, struct rcu_table *t,
74 unsigned long chain_len)
abc490a1 75{
f000907d 76 //printf("check resize chain len %lu\n", chain_len);
abc490a1 77 if (chain_len >= BUCKET_SIZE_RESIZE_THRESHOLD)
f000907d 78 ht_resize_lazy(ht, t, chain_len / BUCKET_SIZE_RESIZE_THRESHOLD);
2ed95849
MD
79}
80
abc490a1
MD
81/*
82 * Algorithm to reverse bits in a word by lookup table, extended to
83 * 64-bit words.
84 * ref.
85 * http://graphics.stanford.edu/~seander/bithacks.html#BitReverseTable
86 */
87
88static const uint8_t BitReverseTable256[256] =
2ed95849 89{
abc490a1
MD
90#define R2(n) (n), (n) + 2*64, (n) + 1*64, (n) + 3*64
91#define R4(n) R2(n), R2((n) + 2*16), R2((n) + 1*16), R2((n) + 3*16)
92#define R6(n) R4(n), R4((n) + 2*4 ), R4((n) + 1*4 ), R4((n) + 3*4 )
93 R6(0), R6(2), R6(1), R6(3)
94};
95#undef R2
96#undef R4
97#undef R6
2ed95849 98
abc490a1
MD
99static
100uint8_t bit_reverse_u8(uint8_t v)
101{
102 return BitReverseTable256[v];
103}
ab7d5fc6 104
abc490a1
MD
105static __attribute__((unused))
106uint32_t bit_reverse_u32(uint32_t v)
107{
108 return ((uint32_t) bit_reverse_u8(v) << 24) |
109 ((uint32_t) bit_reverse_u8(v >> 8) << 16) |
110 ((uint32_t) bit_reverse_u8(v >> 16) << 8) |
111 ((uint32_t) bit_reverse_u8(v >> 24));
2ed95849
MD
112}
113
abc490a1
MD
114static __attribute__((unused))
115uint64_t bit_reverse_u64(uint64_t v)
2ed95849 116{
abc490a1
MD
117 return ((uint64_t) bit_reverse_u8(v) << 56) |
118 ((uint64_t) bit_reverse_u8(v >> 8) << 48) |
119 ((uint64_t) bit_reverse_u8(v >> 16) << 40) |
120 ((uint64_t) bit_reverse_u8(v >> 24) << 32) |
121 ((uint64_t) bit_reverse_u8(v >> 32) << 24) |
122 ((uint64_t) bit_reverse_u8(v >> 40) << 16) |
123 ((uint64_t) bit_reverse_u8(v >> 48) << 8) |
124 ((uint64_t) bit_reverse_u8(v >> 56));
125}
126
127static
128unsigned long bit_reverse_ulong(unsigned long v)
129{
130#if (CAA_BITS_PER_LONG == 32)
131 return bit_reverse_u32(v);
132#else
133 return bit_reverse_u64(v);
134#endif
135}
136
137static
138struct rcu_ht_node *clear_flag(struct rcu_ht_node *node)
139{
140 return (struct rcu_ht_node *) (((unsigned long) node) & ~0x1);
141}
142
143static
144int is_removed(struct rcu_ht_node *node)
145{
146 return ((unsigned long) node) & 0x1;
147}
148
149static
150struct rcu_ht_node *flag_removed(struct rcu_ht_node *node)
151{
152 return (struct rcu_ht_node *) (((unsigned long) node) | 0x1);
153}
154
155static
156void _uatomic_max(unsigned long *ptr, unsigned long v)
157{
158 unsigned long old1, old2;
159
160 old1 = uatomic_read(ptr);
161 do {
162 old2 = old1;
163 if (old2 >= v)
164 break;
165 } while ((old1 = uatomic_cmpxchg(ptr, old2, v)) != old2);
166}
167
168static
f000907d 169void _ht_add(struct rcu_ht *ht, struct rcu_table *t, struct rcu_ht_node *node)
abc490a1
MD
170{
171 struct rcu_ht_node *iter_prev = NULL, *iter = NULL;
172
f000907d
MD
173 if (!t->size)
174 return;
abc490a1
MD
175 for (;;) {
176 unsigned long chain_len = 0;
177
178 iter_prev = rcu_dereference(t->tbl[node->hash & (t->size - 1)]);
f000907d
MD
179 //printf("iter prev %p hash %lu bucket %lu\n", iter_prev,
180 // node->hash, node->hash & (t->size - 1));
abc490a1
MD
181 assert(iter_prev);
182 assert(iter_prev->reverse_hash <= node->reverse_hash);
183 for (;;) {
184 iter = clear_flag(rcu_dereference(iter_prev->next));
185 if (unlikely(!iter))
186 break;
187 if (iter->reverse_hash < node->reverse_hash)
188 break;
189 iter_prev = iter;
f000907d 190 check_resize(ht, t, ++chain_len);
abc490a1
MD
191 }
192 /* add in iter_prev->next */
193 if (is_removed(iter))
194 continue;
f000907d 195 assert(node != iter);
abc490a1 196 node->next = iter;
f000907d 197 assert(iter_prev != node);
abc490a1
MD
198 if (uatomic_cmpxchg(&iter_prev->next, iter, node) != iter)
199 continue;
f000907d 200 break;
464a1ec9 201 }
abc490a1 202}
464a1ec9 203
abc490a1
MD
204static
205int _ht_remove(struct rcu_ht *ht, struct rcu_table *t, struct rcu_ht_node *node)
206{
207 struct rcu_ht_node *iter_prev, *iter, *next, *old;
208 unsigned long chain_len;
209 int found, ret = 0;
210 int flagged = 0;
5e28c532 211
abc490a1
MD
212retry:
213 chain_len = 0;
214 found = 0;
215 iter_prev = rcu_dereference(t->tbl[node->hash & (t->size - 1)]);
216 assert(iter_prev);
217 assert(iter_prev->reverse_hash <= node->reverse_hash);
2ed95849 218 for (;;) {
abc490a1
MD
219 iter = clear_flag(rcu_dereference(iter_prev->next));
220 if (unlikely(!iter))
221 break;
222 if (iter->reverse_hash < node->reverse_hash)
223 break;
224 if (iter == node) {
225 found = 1;
2ed95849
MD
226 break;
227 }
abc490a1
MD
228 iter_prev = iter;
229 }
230 if (!found) {
231 ret = -ENOENT;
232 goto end;
233 }
234 next = rcu_dereference(iter->next);
235 if (!flagged) {
236 if (is_removed(next)) {
237 ret = -ENOENT;
2ed95849
MD
238 goto end;
239 }
abc490a1 240 /* set deletion flag */
f000907d
MD
241 if ((old = uatomic_cmpxchg(&iter->next, next,
242 flag_removed(next))) != next) {
abc490a1
MD
243 if (old == flag_removed(next)) {
244 ret = -ENOENT;
245 goto end;
246 } else {
247 goto retry;
248 }
249 }
250 flagged = 1;
2ed95849 251 }
abc490a1
MD
252 /*
253 * Remove the element from the list. Retry if there has been a
254 * concurrent add (there cannot be a concurrent delete, because
255 * we won the deletion flag cmpxchg).
256 */
257 if (uatomic_cmpxchg(&iter_prev->next, iter, clear_flag(next)) != iter)
258 goto retry;
2ed95849 259end:
2ed95849 260 return ret;
abc490a1 261}
2ed95849 262
abc490a1
MD
263static
264void init_table(struct rcu_ht *ht, struct rcu_table *t,
265 unsigned long first, unsigned long len)
266{
267 unsigned long i, end;
268
269 end = first + len;
270 for (i = first; i < end; i++) {
271 /* Update table size when power of two */
272 if (i != 0 && !(i & (i - 1)))
273 t->size = i;
274 t->tbl[i] = calloc(1, sizeof(struct rcu_ht_node));
275 t->tbl[i]->dummy = 1;
276 t->tbl[i]->hash = i;
277 t->tbl[i]->reverse_hash = bit_reverse_ulong(i);
278 _ht_add(ht, t, t->tbl[i]);
279 }
280 t->size = end;
2ed95849
MD
281}
282
abc490a1
MD
283struct rcu_ht *ht_new(ht_hash_fct hash_fct,
284 void *hashseed,
285 unsigned long init_size,
286 void (*ht_call_rcu)(struct rcu_head *head,
287 void (*func)(struct rcu_head *head)))
288{
289 struct rcu_ht *ht;
290
291 ht = calloc(1, sizeof(struct rcu_ht));
292 ht->hash_fct = hash_fct;
293 ht->hashseed = hashseed;
f000907d 294 ht->ht_call_rcu = ht_call_rcu;
abc490a1
MD
295 /* this mutex should not nest in read-side C.S. */
296 pthread_mutex_init(&ht->resize_mutex, NULL);
297 ht->t = calloc(1, sizeof(struct rcu_table)
298 + (max(init_size, 1) * sizeof(struct rcu_ht_node *)));
299 ht->t->size = 0;
f000907d 300 pthread_mutex_lock(&ht->resize_mutex);
abc490a1 301 init_table(ht, ht->t, 0, max(init_size, 1));
f000907d 302 pthread_mutex_unlock(&ht->resize_mutex);
abc490a1 303 ht->target_size = ht->t->size;
abc490a1
MD
304 return ht;
305}
306
307struct rcu_ht_node *ht_lookup(struct rcu_ht *ht, void *key)
2ed95849 308{
395270b6 309 struct rcu_table *t;
abc490a1
MD
310 struct rcu_ht_node *node;
311 unsigned long hash, reverse_hash;
2ed95849 312
abc490a1
MD
313 hash = ht->hash_fct(ht->hashseed, key);
314 reverse_hash = bit_reverse_ulong(hash);
464a1ec9 315
395270b6 316 t = rcu_dereference(ht->t);
abc490a1 317 node = rcu_dereference(t->tbl[hash & (t->size - 1)]);
2ed95849 318 for (;;) {
abc490a1
MD
319 if (unlikely(!node))
320 break;
321 if (node->reverse_hash > reverse_hash) {
322 node = NULL;
323 break;
2ed95849
MD
324 }
325 if (node->key == key) {
abc490a1
MD
326 if (is_removed(rcu_dereference(node->next)))
327 node = NULL;
2ed95849
MD
328 break;
329 }
abc490a1 330 node = clear_flag(rcu_dereference(node->next));
2ed95849 331 }
abc490a1
MD
332 return node;
333}
e0ba718a 334
f000907d 335void ht_add(struct rcu_ht *ht, struct rcu_ht_node *node)
abc490a1
MD
336{
337 struct rcu_table *t;
ab7d5fc6 338
abc490a1
MD
339 node->hash = ht->hash_fct(ht->hashseed, node->key);
340 node->reverse_hash = bit_reverse_ulong((unsigned long) node->hash);
2ed95849 341
abc490a1 342 t = rcu_dereference(ht->t);
f000907d 343 _ht_add(ht, t, node);
2ed95849
MD
344}
345
abc490a1 346int ht_remove(struct rcu_ht *ht, struct rcu_ht_node *node)
2ed95849 347{
abc490a1
MD
348 struct rcu_table *t;
349
350 t = rcu_dereference(ht->t);
abc490a1 351 return _ht_remove(ht, t, node);
2ed95849 352}
ab7d5fc6 353
abc490a1
MD
354static
355int ht_delete_dummy(struct rcu_ht *ht)
674f7a69 356{
395270b6 357 struct rcu_table *t;
abc490a1
MD
358 struct rcu_ht_node *node;
359 unsigned long i;
674f7a69 360
abc490a1
MD
361 t = ht->t;
362 /* Check that the table is empty */
363 node = t->tbl[0];
364 do {
365 if (!node->dummy)
366 return -EPERM;
367 node = node->next;
368 } while (node);
369 /* Internal sanity check: all nodes left should be dummy */
395270b6 370 for (i = 0; i < t->size; i++) {
abc490a1
MD
371 assert(t->tbl[i]->dummy);
372 free(t->tbl[i]);
674f7a69 373 }
abc490a1 374 return 0;
674f7a69
MD
375}
376
377/*
378 * Should only be called when no more concurrent readers nor writers can
379 * possibly access the table.
380 */
5e28c532 381int ht_destroy(struct rcu_ht *ht)
674f7a69 382{
5e28c532
MD
383 int ret;
384
abc490a1
MD
385 ret = ht_delete_dummy(ht);
386 if (ret)
387 return ret;
395270b6 388 free(ht->t);
674f7a69 389 free(ht);
5e28c532 390 return ret;
674f7a69
MD
391}
392
abc490a1
MD
393static
394void ht_free_table_cb(struct rcu_head *head)
395{
396 struct rcu_table *t =
397 caa_container_of(head, struct rcu_table, head);
398 free(t);
399}
400
401/* called with resize mutex held */
402static
403void _do_ht_resize(struct rcu_ht *ht)
464a1ec9 404{
abc490a1 405 unsigned long new_size, old_size;
395270b6 406 struct rcu_table *new_t, *old_t;
464a1ec9 407
f000907d
MD
408 //return; //TEST
409
395270b6
MD
410 old_t = ht->t;
411 old_size = old_t->size;
464a1ec9 412
abc490a1
MD
413 new_size = CMM_LOAD_SHARED(ht->target_size);
414 if (old_size == new_size)
464a1ec9 415 return;
f000907d 416 new_t = malloc(sizeof(struct rcu_table)
abc490a1 417 + (new_size * sizeof(struct rcu_ht_node *)));
f000907d
MD
418 assert(new_size > old_size);
419 memcpy(&new_t->tbl, &old_t->tbl,
420 old_size * sizeof(struct rcu_ht_node *));
421 init_table(ht, new_t, old_size, new_size - old_size);
422 new_t->size = new_size;
423 /* Changing table and size atomically wrt lookups */
424 rcu_assign_pointer(ht->t, new_t);
425 ht->ht_call_rcu(&old_t->head, ht_free_table_cb);
464a1ec9
MD
426}
427
abc490a1 428static
f000907d
MD
429void resize_target_update(struct rcu_ht *ht, struct rcu_table *t,
430 int growth_order)
464a1ec9 431{
f000907d
MD
432 unsigned long new_size = t->size << growth_order;
433
434 if (new_size > MAX_NR_BUCKETS)
435 new_size = MAX_NR_BUCKETS;
436 //printf("resize update prevtarget %lu current %lu order %d\n",
437 // ht->target_size, t->size, growth_order);
438 _uatomic_max(&ht->target_size, new_size);
464a1ec9
MD
439}
440
464a1ec9
MD
441void ht_resize(struct rcu_ht *ht, int growth)
442{
f000907d 443 resize_target_update(ht, rcu_dereference(ht->t), growth);
abc490a1
MD
444 pthread_mutex_lock(&ht->resize_mutex);
445 _do_ht_resize(ht);
446 pthread_mutex_unlock(&ht->resize_mutex);
447}
464a1ec9 448
abc490a1
MD
449static
450void do_resize_cb(struct rcu_head *head)
451{
452 struct rcu_resize_work *work =
453 caa_container_of(head, struct rcu_resize_work, head);
454 struct rcu_ht *ht = work->ht;
455
456 pthread_mutex_lock(&ht->resize_mutex);
457 _do_ht_resize(ht);
458 pthread_mutex_unlock(&ht->resize_mutex);
459 free(work);
464a1ec9
MD
460}
461
abc490a1 462static
f000907d 463void ht_resize_lazy(struct rcu_ht *ht, struct rcu_table *t, int growth)
ab7d5fc6 464{
abc490a1
MD
465 struct rcu_resize_work *work;
466
467 work = malloc(sizeof(*work));
468 work->ht = ht;
f000907d 469 resize_target_update(ht, t, growth);
abc490a1 470 ht->ht_call_rcu(&work->head, do_resize_cb);
ab7d5fc6 471}
This page took 0.053107 seconds and 4 git commands to generate.