automake: Rename INCLUDES to AM_CPPFLAGS (new name)
[userspace-rcu.git] / urcu-bp.c
1 /*
2 * urcu-bp.c
3 *
4 * Userspace RCU library, "bulletproof" version.
5 *
6 * Copyright (c) 2009 Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
7 * Copyright (c) 2009 Paul E. McKenney, IBM Corporation.
8 *
9 * This library is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public
11 * License as published by the Free Software Foundation; either
12 * version 2.1 of the License, or (at your option) any later version.
13 *
14 * This library is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
18 *
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with this library; if not, write to the Free Software
21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22 *
23 * IBM's contributions to this file may be relicensed under LGPLv2 or later.
24 */
25
26 #define _GNU_SOURCE
27 #define _LGPL_SOURCE
28 #include <stdio.h>
29 #include <pthread.h>
30 #include <signal.h>
31 #include <assert.h>
32 #include <stdlib.h>
33 #include <string.h>
34 #include <errno.h>
35 #include <poll.h>
36 #include <unistd.h>
37 #include <sys/mman.h>
38
39 #include "urcu/wfcqueue.h"
40 #include "urcu/map/urcu-bp.h"
41 #include "urcu/static/urcu-bp.h"
42 #include "urcu-pointer.h"
43 #include "urcu/tls-compat.h"
44
45 #include "urcu-die.h"
46
47 /* Do not #define _LGPL_SOURCE to ensure we can emit the wrapper symbols */
48 #undef _LGPL_SOURCE
49 #include "urcu-bp.h"
50 #define _LGPL_SOURCE
51
52 #ifndef MAP_ANONYMOUS
53 #define MAP_ANONYMOUS MAP_ANON
54 #endif
55
56 #ifdef __linux__
57 static
58 void *mremap_wrapper(void *old_address, size_t old_size,
59 size_t new_size, int flags)
60 {
61 return mremap(old_address, old_size, new_size, flags);
62 }
63 #else
64
65 #define MREMAP_MAYMOVE 1
66 #define MREMAP_FIXED 2
67
68 /*
69 * mremap wrapper for non-Linux systems not allowing MAYMOVE.
70 * This is not generic.
71 */
72 static
73 void *mremap_wrapper(void *old_address, size_t old_size,
74 size_t new_size, int flags)
75 {
76 assert(!(flags & MREMAP_MAYMOVE));
77
78 return MAP_FAILED;
79 }
80 #endif
81
82 /* Sleep delay in us */
83 #define RCU_SLEEP_DELAY 1000
84 #define INIT_NR_THREADS 8
85 #define ARENA_INIT_ALLOC \
86 sizeof(struct registry_chunk) \
87 + INIT_NR_THREADS * sizeof(struct rcu_reader)
88
89 /*
90 * Active attempts to check for reader Q.S. before calling sleep().
91 */
92 #define RCU_QS_ACTIVE_ATTEMPTS 100
93
94 static
95 int rcu_bp_refcount;
96
97 static
98 void __attribute__((constructor)) rcu_bp_init(void);
99 static
100 void __attribute__((destructor)) rcu_bp_exit(void);
101
102 static pthread_mutex_t rcu_gp_lock = PTHREAD_MUTEX_INITIALIZER;
103
104 static pthread_mutex_t init_lock = PTHREAD_MUTEX_INITIALIZER;
105 static int initialized;
106
107 static pthread_key_t urcu_bp_key;
108
109 #ifdef DEBUG_YIELD
110 unsigned int rcu_yield_active;
111 DEFINE_URCU_TLS(unsigned int, rcu_rand_yield);
112 #endif
113
114 struct rcu_gp rcu_gp = { .ctr = RCU_GP_COUNT };
115
116 /*
117 * Pointer to registry elements. Written to only by each individual reader. Read
118 * by both the reader and the writers.
119 */
120 DEFINE_URCU_TLS(struct rcu_reader *, rcu_reader);
121
122 static CDS_LIST_HEAD(registry);
123
124 struct registry_chunk {
125 size_t data_len; /* data length */
126 size_t used; /* amount of data used */
127 struct cds_list_head node; /* chunk_list node */
128 char data[];
129 };
130
131 struct registry_arena {
132 struct cds_list_head chunk_list;
133 };
134
135 static struct registry_arena registry_arena = {
136 .chunk_list = CDS_LIST_HEAD_INIT(registry_arena.chunk_list),
137 };
138
139 /* Saved fork signal mask, protected by rcu_gp_lock */
140 static sigset_t saved_fork_signal_mask;
141
142 static void mutex_lock(pthread_mutex_t *mutex)
143 {
144 int ret;
145
146 #ifndef DISTRUST_SIGNALS_EXTREME
147 ret = pthread_mutex_lock(mutex);
148 if (ret)
149 urcu_die(ret);
150 #else /* #ifndef DISTRUST_SIGNALS_EXTREME */
151 while ((ret = pthread_mutex_trylock(mutex)) != 0) {
152 if (ret != EBUSY && ret != EINTR)
153 urcu_die(ret);
154 poll(NULL,0,10);
155 }
156 #endif /* #else #ifndef DISTRUST_SIGNALS_EXTREME */
157 }
158
159 static void mutex_unlock(pthread_mutex_t *mutex)
160 {
161 int ret;
162
163 ret = pthread_mutex_unlock(mutex);
164 if (ret)
165 urcu_die(ret);
166 }
167
168 static void wait_for_readers(struct cds_list_head *input_readers,
169 struct cds_list_head *cur_snap_readers,
170 struct cds_list_head *qsreaders)
171 {
172 int wait_loops = 0;
173 struct rcu_reader *index, *tmp;
174
175 /*
176 * Wait for each thread URCU_TLS(rcu_reader).ctr to either
177 * indicate quiescence (not nested), or observe the current
178 * rcu_gp.ctr value.
179 */
180 for (;;) {
181 wait_loops++;
182 cds_list_for_each_entry_safe(index, tmp, input_readers, node) {
183 switch (rcu_reader_state(&index->ctr)) {
184 case RCU_READER_ACTIVE_CURRENT:
185 if (cur_snap_readers) {
186 cds_list_move(&index->node,
187 cur_snap_readers);
188 break;
189 }
190 /* Fall-through */
191 case RCU_READER_INACTIVE:
192 cds_list_move(&index->node, qsreaders);
193 break;
194 case RCU_READER_ACTIVE_OLD:
195 /*
196 * Old snapshot. Leaving node in
197 * input_readers will make us busy-loop
198 * until the snapshot becomes current or
199 * the reader becomes inactive.
200 */
201 break;
202 }
203 }
204
205 if (cds_list_empty(input_readers)) {
206 break;
207 } else {
208 if (wait_loops == RCU_QS_ACTIVE_ATTEMPTS)
209 usleep(RCU_SLEEP_DELAY);
210 else
211 caa_cpu_relax();
212 }
213 }
214 }
215
216 void synchronize_rcu(void)
217 {
218 CDS_LIST_HEAD(cur_snap_readers);
219 CDS_LIST_HEAD(qsreaders);
220 sigset_t newmask, oldmask;
221 int ret;
222
223 ret = sigfillset(&newmask);
224 assert(!ret);
225 ret = pthread_sigmask(SIG_BLOCK, &newmask, &oldmask);
226 assert(!ret);
227
228 mutex_lock(&rcu_gp_lock);
229
230 if (cds_list_empty(&registry))
231 goto out;
232
233 /* All threads should read qparity before accessing data structure
234 * where new ptr points to. */
235 /* Write new ptr before changing the qparity */
236 cmm_smp_mb();
237
238 /*
239 * Wait for readers to observe original parity or be quiescent.
240 */
241 wait_for_readers(&registry, &cur_snap_readers, &qsreaders);
242
243 /*
244 * Adding a cmm_smp_mb() which is _not_ formally required, but makes the
245 * model easier to understand. It does not have a big performance impact
246 * anyway, given this is the write-side.
247 */
248 cmm_smp_mb();
249
250 /* Switch parity: 0 -> 1, 1 -> 0 */
251 CMM_STORE_SHARED(rcu_gp.ctr, rcu_gp.ctr ^ RCU_GP_CTR_PHASE);
252
253 /*
254 * Must commit qparity update to memory before waiting for other parity
255 * quiescent state. Failure to do so could result in the writer waiting
256 * forever while new readers are always accessing data (no progress).
257 * Ensured by CMM_STORE_SHARED and CMM_LOAD_SHARED.
258 */
259
260 /*
261 * Adding a cmm_smp_mb() which is _not_ formally required, but makes the
262 * model easier to understand. It does not have a big performance impact
263 * anyway, given this is the write-side.
264 */
265 cmm_smp_mb();
266
267 /*
268 * Wait for readers to observe new parity or be quiescent.
269 */
270 wait_for_readers(&cur_snap_readers, NULL, &qsreaders);
271
272 /*
273 * Put quiescent reader list back into registry.
274 */
275 cds_list_splice(&qsreaders, &registry);
276
277 /*
278 * Finish waiting for reader threads before letting the old ptr being
279 * freed.
280 */
281 cmm_smp_mb();
282 out:
283 mutex_unlock(&rcu_gp_lock);
284 ret = pthread_sigmask(SIG_SETMASK, &oldmask, NULL);
285 assert(!ret);
286 }
287
288 /*
289 * library wrappers to be used by non-LGPL compatible source code.
290 */
291
292 void rcu_read_lock(void)
293 {
294 _rcu_read_lock();
295 }
296
297 void rcu_read_unlock(void)
298 {
299 _rcu_read_unlock();
300 }
301
302 int rcu_read_ongoing(void)
303 {
304 return _rcu_read_ongoing();
305 }
306
307 /*
308 * Only grow for now. If empty, allocate a ARENA_INIT_ALLOC sized chunk.
309 * Else, try expanding the last chunk. If this fails, allocate a new
310 * chunk twice as big as the last chunk.
311 * Memory used by chunks _never_ moves. A chunk could theoretically be
312 * freed when all "used" slots are released, but we don't do it at this
313 * point.
314 */
315 static
316 void expand_arena(struct registry_arena *arena)
317 {
318 struct registry_chunk *new_chunk, *last_chunk;
319 size_t old_chunk_len, new_chunk_len;
320
321 /* No chunk. */
322 if (cds_list_empty(&arena->chunk_list)) {
323 assert(ARENA_INIT_ALLOC >=
324 sizeof(struct registry_chunk)
325 + sizeof(struct rcu_reader));
326 new_chunk_len = ARENA_INIT_ALLOC;
327 new_chunk = mmap(NULL, new_chunk_len,
328 PROT_READ | PROT_WRITE,
329 MAP_ANONYMOUS | MAP_PRIVATE,
330 -1, 0);
331 if (new_chunk == MAP_FAILED)
332 abort();
333 bzero(new_chunk, new_chunk_len);
334 new_chunk->data_len =
335 new_chunk_len - sizeof(struct registry_chunk);
336 cds_list_add_tail(&new_chunk->node, &arena->chunk_list);
337 return; /* We're done. */
338 }
339
340 /* Try expanding last chunk. */
341 last_chunk = cds_list_entry(arena->chunk_list.prev,
342 struct registry_chunk, node);
343 old_chunk_len =
344 last_chunk->data_len + sizeof(struct registry_chunk);
345 new_chunk_len = old_chunk_len << 1;
346
347 /* Don't allow memory mapping to move, just expand. */
348 new_chunk = mremap_wrapper(last_chunk, old_chunk_len,
349 new_chunk_len, 0);
350 if (new_chunk != MAP_FAILED) {
351 /* Should not have moved. */
352 assert(new_chunk == last_chunk);
353 bzero((char *) last_chunk + old_chunk_len,
354 new_chunk_len - old_chunk_len);
355 last_chunk->data_len =
356 new_chunk_len - sizeof(struct registry_chunk);
357 return; /* We're done. */
358 }
359
360 /* Remap did not succeed, we need to add a new chunk. */
361 new_chunk = mmap(NULL, new_chunk_len,
362 PROT_READ | PROT_WRITE,
363 MAP_ANONYMOUS | MAP_PRIVATE,
364 -1, 0);
365 if (new_chunk == MAP_FAILED)
366 abort();
367 bzero(new_chunk, new_chunk_len);
368 new_chunk->data_len =
369 new_chunk_len - sizeof(struct registry_chunk);
370 cds_list_add_tail(&new_chunk->node, &arena->chunk_list);
371 }
372
373 static
374 struct rcu_reader *arena_alloc(struct registry_arena *arena)
375 {
376 struct registry_chunk *chunk;
377 struct rcu_reader *rcu_reader_reg;
378 int expand_done = 0; /* Only allow to expand once per alloc */
379 size_t len = sizeof(struct rcu_reader);
380
381 retry:
382 cds_list_for_each_entry(chunk, &arena->chunk_list, node) {
383 if (chunk->data_len - chunk->used < len)
384 continue;
385 /* Find spot */
386 for (rcu_reader_reg = (struct rcu_reader *) &chunk->data[0];
387 rcu_reader_reg < (struct rcu_reader *) &chunk->data[chunk->data_len];
388 rcu_reader_reg++) {
389 if (!rcu_reader_reg->alloc) {
390 rcu_reader_reg->alloc = 1;
391 chunk->used += len;
392 return rcu_reader_reg;
393 }
394 }
395 }
396
397 if (!expand_done) {
398 expand_arena(arena);
399 expand_done = 1;
400 goto retry;
401 }
402
403 return NULL;
404 }
405
406 /* Called with signals off and mutex locked */
407 static
408 void add_thread(void)
409 {
410 struct rcu_reader *rcu_reader_reg;
411 int ret;
412
413 rcu_reader_reg = arena_alloc(&registry_arena);
414 if (!rcu_reader_reg)
415 abort();
416 ret = pthread_setspecific(urcu_bp_key, rcu_reader_reg);
417 if (ret)
418 abort();
419
420 /* Add to registry */
421 rcu_reader_reg->tid = pthread_self();
422 assert(rcu_reader_reg->ctr == 0);
423 cds_list_add(&rcu_reader_reg->node, &registry);
424 /*
425 * Reader threads are pointing to the reader registry. This is
426 * why its memory should never be relocated.
427 */
428 URCU_TLS(rcu_reader) = rcu_reader_reg;
429 }
430
431 /* Called with mutex locked */
432 static
433 void cleanup_thread(struct registry_chunk *chunk,
434 struct rcu_reader *rcu_reader_reg)
435 {
436 rcu_reader_reg->ctr = 0;
437 cds_list_del(&rcu_reader_reg->node);
438 rcu_reader_reg->tid = 0;
439 rcu_reader_reg->alloc = 0;
440 chunk->used -= sizeof(struct rcu_reader);
441 }
442
443 static
444 struct registry_chunk *find_chunk(struct rcu_reader *rcu_reader_reg)
445 {
446 struct registry_chunk *chunk;
447
448 cds_list_for_each_entry(chunk, &registry_arena.chunk_list, node) {
449 if (rcu_reader_reg < (struct rcu_reader *) &chunk->data[0])
450 continue;
451 if (rcu_reader_reg >= (struct rcu_reader *) &chunk->data[chunk->data_len])
452 continue;
453 return chunk;
454 }
455 return NULL;
456 }
457
458 /* Called with signals off and mutex locked */
459 static
460 void remove_thread(struct rcu_reader *rcu_reader_reg)
461 {
462 cleanup_thread(find_chunk(rcu_reader_reg), rcu_reader_reg);
463 URCU_TLS(rcu_reader) = NULL;
464 }
465
466 /* Disable signals, take mutex, add to registry */
467 void rcu_bp_register(void)
468 {
469 sigset_t newmask, oldmask;
470 int ret;
471
472 ret = sigfillset(&newmask);
473 if (ret)
474 abort();
475 ret = pthread_sigmask(SIG_BLOCK, &newmask, &oldmask);
476 if (ret)
477 abort();
478
479 /*
480 * Check if a signal concurrently registered our thread since
481 * the check in rcu_read_lock().
482 */
483 if (URCU_TLS(rcu_reader))
484 goto end;
485
486 /*
487 * Take care of early registration before urcu_bp constructor.
488 */
489 rcu_bp_init();
490
491 mutex_lock(&rcu_gp_lock);
492 add_thread();
493 mutex_unlock(&rcu_gp_lock);
494 end:
495 ret = pthread_sigmask(SIG_SETMASK, &oldmask, NULL);
496 if (ret)
497 abort();
498 }
499
500 /* Disable signals, take mutex, remove from registry */
501 static
502 void rcu_bp_unregister(struct rcu_reader *rcu_reader_reg)
503 {
504 sigset_t newmask, oldmask;
505 int ret;
506
507 ret = sigfillset(&newmask);
508 if (ret)
509 abort();
510 ret = pthread_sigmask(SIG_BLOCK, &newmask, &oldmask);
511 if (ret)
512 abort();
513
514 mutex_lock(&rcu_gp_lock);
515 remove_thread(rcu_reader_reg);
516 mutex_unlock(&rcu_gp_lock);
517 ret = pthread_sigmask(SIG_SETMASK, &oldmask, NULL);
518 if (ret)
519 abort();
520 rcu_bp_exit();
521 }
522
523 /*
524 * Remove thread from the registry when it exits, and flag it as
525 * destroyed so garbage collection can take care of it.
526 */
527 static
528 void urcu_bp_thread_exit_notifier(void *rcu_key)
529 {
530 rcu_bp_unregister(rcu_key);
531 }
532
533 static
534 void rcu_bp_init(void)
535 {
536 mutex_lock(&init_lock);
537 if (!rcu_bp_refcount++) {
538 int ret;
539
540 ret = pthread_key_create(&urcu_bp_key,
541 urcu_bp_thread_exit_notifier);
542 if (ret)
543 abort();
544 initialized = 1;
545 }
546 mutex_unlock(&init_lock);
547 }
548
549 static
550 void rcu_bp_exit(void)
551 {
552 mutex_lock(&init_lock);
553 if (!--rcu_bp_refcount) {
554 struct registry_chunk *chunk, *tmp;
555 int ret;
556
557 cds_list_for_each_entry_safe(chunk, tmp,
558 &registry_arena.chunk_list, node) {
559 munmap(chunk, chunk->data_len
560 + sizeof(struct registry_chunk));
561 }
562 ret = pthread_key_delete(urcu_bp_key);
563 if (ret)
564 abort();
565 }
566 mutex_unlock(&init_lock);
567 }
568
569 /*
570 * Holding the rcu_gp_lock across fork will make sure we fork() don't race with
571 * a concurrent thread executing with this same lock held. This ensures that the
572 * registry is in a coherent state in the child.
573 */
574 void rcu_bp_before_fork(void)
575 {
576 sigset_t newmask, oldmask;
577 int ret;
578
579 ret = sigfillset(&newmask);
580 assert(!ret);
581 ret = pthread_sigmask(SIG_BLOCK, &newmask, &oldmask);
582 assert(!ret);
583 mutex_lock(&rcu_gp_lock);
584 saved_fork_signal_mask = oldmask;
585 }
586
587 void rcu_bp_after_fork_parent(void)
588 {
589 sigset_t oldmask;
590 int ret;
591
592 oldmask = saved_fork_signal_mask;
593 mutex_unlock(&rcu_gp_lock);
594 ret = pthread_sigmask(SIG_SETMASK, &oldmask, NULL);
595 assert(!ret);
596 }
597
598 /*
599 * Prune all entries from registry except our own thread. Fits the Linux
600 * fork behavior. Called with rcu_gp_lock held.
601 */
602 static
603 void urcu_bp_prune_registry(void)
604 {
605 struct registry_chunk *chunk;
606 struct rcu_reader *rcu_reader_reg;
607
608 cds_list_for_each_entry(chunk, &registry_arena.chunk_list, node) {
609 for (rcu_reader_reg = (struct rcu_reader *) &chunk->data[0];
610 rcu_reader_reg < (struct rcu_reader *) &chunk->data[chunk->data_len];
611 rcu_reader_reg++) {
612 if (!rcu_reader_reg->alloc)
613 continue;
614 if (rcu_reader_reg->tid == pthread_self())
615 continue;
616 cleanup_thread(chunk, rcu_reader_reg);
617 }
618 }
619 }
620
621 void rcu_bp_after_fork_child(void)
622 {
623 sigset_t oldmask;
624 int ret;
625
626 urcu_bp_prune_registry();
627 oldmask = saved_fork_signal_mask;
628 mutex_unlock(&rcu_gp_lock);
629 ret = pthread_sigmask(SIG_SETMASK, &oldmask, NULL);
630 assert(!ret);
631 }
632
633 void *rcu_dereference_sym_bp(void *p)
634 {
635 return _rcu_dereference(p);
636 }
637
638 void *rcu_set_pointer_sym_bp(void **p, void *v)
639 {
640 cmm_wmb();
641 uatomic_set(p, v);
642 return v;
643 }
644
645 void *rcu_xchg_pointer_sym_bp(void **p, void *v)
646 {
647 cmm_wmb();
648 return uatomic_xchg(p, v);
649 }
650
651 void *rcu_cmpxchg_pointer_sym_bp(void **p, void *old, void *_new)
652 {
653 cmm_wmb();
654 return uatomic_cmpxchg(p, old, _new);
655 }
656
657 DEFINE_RCU_FLAVOR(rcu_flavor);
658
659 #include "urcu-call-rcu-impl.h"
660 #include "urcu-defer-impl.h"
This page took 0.071423 seconds and 4 git commands to generate.