Fix urcu-bp: don't move registry
[urcu.git] / urcu-bp.c
CommitLineData
fdee2e6d
MD
1/*
2 * urcu-bp.c
3 *
4 * Userspace RCU library, "bulletproof" version.
5 *
6982d6d7 6 * Copyright (c) 2009 Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
fdee2e6d
MD
7 * Copyright (c) 2009 Paul E. McKenney, IBM Corporation.
8 *
9 * This library is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public
11 * License as published by the Free Software Foundation; either
12 * version 2.1 of the License, or (at your option) any later version.
13 *
14 * This library is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
18 *
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with this library; if not, write to the Free Software
21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22 *
23 * IBM's contributions to this file may be relicensed under LGPLv2 or later.
24 */
25
0617bf4c 26#define _GNU_SOURCE
71c811bf 27#define _LGPL_SOURCE
fdee2e6d
MD
28#include <stdio.h>
29#include <pthread.h>
30#include <signal.h>
31#include <assert.h>
32#include <stdlib.h>
33#include <string.h>
34#include <errno.h>
35#include <poll.h>
36#include <unistd.h>
37#include <sys/mman.h>
38
d73fb81f 39#include "urcu/wfcqueue.h"
57760d44 40#include "urcu/map/urcu-bp.h"
af7c2dbe 41#include "urcu/static/urcu-bp.h"
618b2595 42#include "urcu-pointer.h"
bd252a04 43#include "urcu/tls-compat.h"
71c811bf 44
4a6d7378
MD
45#include "urcu-die.h"
46
fdee2e6d 47/* Do not #define _LGPL_SOURCE to ensure we can emit the wrapper symbols */
71c811bf 48#undef _LGPL_SOURCE
fdee2e6d 49#include "urcu-bp.h"
71c811bf 50#define _LGPL_SOURCE
fdee2e6d 51
4c1ae2ea
MD
52#ifndef MAP_ANONYMOUS
53#define MAP_ANONYMOUS MAP_ANON
54#endif
55
c7eaf61c
MD
56#ifdef __linux__
57static
58void *mremap_wrapper(void *old_address, size_t old_size,
59 size_t new_size, int flags)
60{
61 return mremap(old_address, old_size, new_size, flags);
62}
63#else
45a4872f
MD
64
65#define MREMAP_MAYMOVE 1
66#define MREMAP_FIXED 2
67
68/*
a0d529ec 69 * mremap wrapper for non-Linux systems not allowing MAYMOVE.
45a4872f
MD
70 * This is not generic.
71*/
c7eaf61c
MD
72static
73void *mremap_wrapper(void *old_address, size_t old_size,
74 size_t new_size, int flags)
45a4872f 75{
a0d529ec
MD
76 assert(!(flags & MREMAP_MAYMOVE));
77
78 return MAP_FAILED;
45a4872f
MD
79}
80#endif
81
fdee2e6d
MD
82/* Sleep delay in us */
83#define RCU_SLEEP_DELAY 1000
a0d529ec
MD
84#define INIT_NR_THREADS 8
85#define ARENA_INIT_ALLOC \
86 sizeof(struct registry_chunk) \
87 + INIT_NR_THREADS * sizeof(struct rcu_reader)
fdee2e6d 88
b7b6a8f5
PB
89/*
90 * Active attempts to check for reader Q.S. before calling sleep().
91 */
92#define RCU_QS_ACTIVE_ATTEMPTS 100
93
02be5561 94void __attribute__((destructor)) rcu_bp_exit(void);
fdee2e6d 95
6abb4bd5 96static pthread_mutex_t rcu_gp_lock = PTHREAD_MUTEX_INITIALIZER;
fdee2e6d
MD
97
98#ifdef DEBUG_YIELD
1de4df4b
MD
99unsigned int rcu_yield_active;
100DEFINE_URCU_TLS(unsigned int, rcu_rand_yield);
fdee2e6d
MD
101#endif
102
c13c2e55 103struct rcu_gp rcu_gp = { .ctr = RCU_GP_COUNT };
fdee2e6d
MD
104
105/*
106 * Pointer to registry elements. Written to only by each individual reader. Read
107 * by both the reader and the writers.
108 */
bd252a04 109DEFINE_URCU_TLS(struct rcu_reader *, rcu_reader);
fdee2e6d 110
16aa9ee8 111static CDS_LIST_HEAD(registry);
fdee2e6d 112
a0d529ec
MD
113struct registry_chunk {
114 size_t data_len; /* data length */
115 size_t used; /* data used */
116 struct cds_list_head node; /* chunk_list node */
117 char data[];
118};
119
fdee2e6d 120struct registry_arena {
a0d529ec 121 struct cds_list_head chunk_list;
fdee2e6d
MD
122};
123
a0d529ec
MD
124static struct registry_arena registry_arena = {
125 .chunk_list = CDS_LIST_HEAD_INIT(registry_arena.chunk_list),
126};
fdee2e6d 127
4cf1675f
MD
128/* Saved fork signal mask, protected by rcu_gp_lock */
129static sigset_t saved_fork_signal_mask;
130
fdee2e6d
MD
131static void rcu_gc_registry(void);
132
6abb4bd5 133static void mutex_lock(pthread_mutex_t *mutex)
fdee2e6d
MD
134{
135 int ret;
136
137#ifndef DISTRUST_SIGNALS_EXTREME
6abb4bd5 138 ret = pthread_mutex_lock(mutex);
4a6d7378
MD
139 if (ret)
140 urcu_die(ret);
fdee2e6d 141#else /* #ifndef DISTRUST_SIGNALS_EXTREME */
6abb4bd5 142 while ((ret = pthread_mutex_trylock(mutex)) != 0) {
4a6d7378
MD
143 if (ret != EBUSY && ret != EINTR)
144 urcu_die(ret);
fdee2e6d
MD
145 poll(NULL,0,10);
146 }
147#endif /* #else #ifndef DISTRUST_SIGNALS_EXTREME */
148}
149
6abb4bd5 150static void mutex_unlock(pthread_mutex_t *mutex)
fdee2e6d
MD
151{
152 int ret;
153
6abb4bd5 154 ret = pthread_mutex_unlock(mutex);
4a6d7378
MD
155 if (ret)
156 urcu_die(ret);
fdee2e6d
MD
157}
158
52c75091
MD
159static void wait_for_readers(struct cds_list_head *input_readers,
160 struct cds_list_head *cur_snap_readers,
161 struct cds_list_head *qsreaders)
fdee2e6d 162{
fdee2e6d 163 int wait_loops = 0;
02be5561 164 struct rcu_reader *index, *tmp;
fdee2e6d 165
fdee2e6d 166 /*
dd61d077
MD
167 * Wait for each thread URCU_TLS(rcu_reader).ctr to either
168 * indicate quiescence (not nested), or observe the current
c13c2e55 169 * rcu_gp.ctr value.
fdee2e6d
MD
170 */
171 for (;;) {
172 wait_loops++;
52c75091
MD
173 cds_list_for_each_entry_safe(index, tmp, input_readers, node) {
174 switch (rcu_reader_state(&index->ctr)) {
175 case RCU_READER_ACTIVE_CURRENT:
176 if (cur_snap_readers) {
177 cds_list_move(&index->node,
178 cur_snap_readers);
179 break;
180 }
181 /* Fall-through */
182 case RCU_READER_INACTIVE:
183 cds_list_move(&index->node, qsreaders);
184 break;
185 case RCU_READER_ACTIVE_OLD:
186 /*
187 * Old snapshot. Leaving node in
188 * input_readers will make us busy-loop
189 * until the snapshot becomes current or
190 * the reader becomes inactive.
191 */
192 break;
193 }
fdee2e6d
MD
194 }
195
52c75091 196 if (cds_list_empty(input_readers)) {
fdee2e6d
MD
197 break;
198 } else {
199 if (wait_loops == RCU_QS_ACTIVE_ATTEMPTS)
200 usleep(RCU_SLEEP_DELAY);
201 else
06f22bdb 202 caa_cpu_relax();
fdee2e6d
MD
203 }
204 }
fdee2e6d
MD
205}
206
207void synchronize_rcu(void)
208{
52c75091
MD
209 CDS_LIST_HEAD(cur_snap_readers);
210 CDS_LIST_HEAD(qsreaders);
fdee2e6d
MD
211 sigset_t newmask, oldmask;
212 int ret;
213
6ed4b2e6 214 ret = sigfillset(&newmask);
fdee2e6d 215 assert(!ret);
6ed4b2e6 216 ret = pthread_sigmask(SIG_BLOCK, &newmask, &oldmask);
fdee2e6d
MD
217 assert(!ret);
218
6abb4bd5 219 mutex_lock(&rcu_gp_lock);
fdee2e6d 220
16aa9ee8 221 if (cds_list_empty(&registry))
2dfb8b5e 222 goto out;
fdee2e6d
MD
223
224 /* All threads should read qparity before accessing data structure
2dfb8b5e 225 * where new ptr points to. */
fdee2e6d 226 /* Write new ptr before changing the qparity */
5481ddb3 227 cmm_smp_mb();
fdee2e6d 228
2dfb8b5e
MD
229 /* Remove old registry elements */
230 rcu_gc_registry();
fdee2e6d
MD
231
232 /*
dd61d077
MD
233 * Wait for readers to observe original parity or be quiescent.
234 */
52c75091 235 wait_for_readers(&registry, &cur_snap_readers, &qsreaders);
dd61d077
MD
236
237 /*
238 * Adding a cmm_smp_mb() which is _not_ formally required, but makes the
239 * model easier to understand. It does not have a big performance impact
240 * anyway, given this is the write-side.
241 */
242 cmm_smp_mb();
243
244 /* Switch parity: 0 -> 1, 1 -> 0 */
c13c2e55 245 CMM_STORE_SHARED(rcu_gp.ctr, rcu_gp.ctr ^ RCU_GP_CTR_PHASE);
dd61d077
MD
246
247 /*
248 * Must commit qparity update to memory before waiting for other parity
249 * quiescent state. Failure to do so could result in the writer waiting
250 * forever while new readers are always accessing data (no progress).
251 * Ensured by CMM_STORE_SHARED and CMM_LOAD_SHARED.
fdee2e6d 252 */
fdee2e6d
MD
253
254 /*
5481ddb3 255 * Adding a cmm_smp_mb() which is _not_ formally required, but makes the
fdee2e6d
MD
256 * model easier to understand. It does not have a big performance impact
257 * anyway, given this is the write-side.
258 */
5481ddb3 259 cmm_smp_mb();
fdee2e6d 260
fdee2e6d 261 /*
dd61d077 262 * Wait for readers to observe new parity or be quiescent.
fdee2e6d 263 */
52c75091
MD
264 wait_for_readers(&cur_snap_readers, NULL, &qsreaders);
265
266 /*
267 * Put quiescent reader list back into registry.
268 */
269 cds_list_splice(&qsreaders, &registry);
fdee2e6d
MD
270
271 /*
2dfb8b5e
MD
272 * Finish waiting for reader threads before letting the old ptr being
273 * freed.
fdee2e6d 274 */
5481ddb3 275 cmm_smp_mb();
2dfb8b5e 276out:
6abb4bd5 277 mutex_unlock(&rcu_gp_lock);
fdee2e6d
MD
278 ret = pthread_sigmask(SIG_SETMASK, &oldmask, NULL);
279 assert(!ret);
280}
281
282/*
283 * library wrappers to be used by non-LGPL compatible source code.
284 */
285
286void rcu_read_lock(void)
287{
288 _rcu_read_lock();
289}
290
291void rcu_read_unlock(void)
292{
293 _rcu_read_unlock();
294}
295
882f3357
MD
296int rcu_read_ongoing(void)
297{
298 return _rcu_read_ongoing();
299}
300
fdee2e6d 301/*
a0d529ec
MD
302 * Only grow for now. If empty, allocate a ARENA_INIT_ALLOC sized chunk.
303 * Else, try expanding the last chunk. If this fails, allocate a new
304 * chunk twice as big as the last chunk.
305 * Memory used by chunks _never_ moves. A chunk could theoretically be
306 * freed when all "used" slots are released, but we don't do it at this
307 * point.
fdee2e6d 308 */
a0d529ec
MD
309static
310void expand_arena(struct registry_arena *arena)
fdee2e6d 311{
a0d529ec
MD
312 struct registry_chunk *new_chunk, *last_chunk;
313 size_t old_chunk_len, new_chunk_len;
314
315 /* No chunk. */
316 if (cds_list_empty(&arena->chunk_list)) {
317 assert(ARENA_INIT_ALLOC >=
318 sizeof(struct registry_chunk)
319 + sizeof(struct rcu_reader));
320 new_chunk_len = ARENA_INIT_ALLOC;
321 new_chunk = mmap(NULL, new_chunk_len,
39be94ed
MD
322 PROT_READ | PROT_WRITE,
323 MAP_ANONYMOUS | MAP_PRIVATE,
324 -1, 0);
a0d529ec
MD
325 if (new_chunk == MAP_FAILED)
326 abort();
327 bzero(new_chunk, new_chunk_len);
328 new_chunk->data_len =
329 new_chunk_len - sizeof(struct registry_chunk);
330 cds_list_add_tail(&new_chunk->node, &arena->chunk_list);
331 return; /* We're done. */
332 }
39be94ed 333
a0d529ec
MD
334 /* Try expanding last chunk. */
335 last_chunk = cds_list_entry(arena->chunk_list.prev,
336 struct registry_chunk, node);
337 old_chunk_len =
338 last_chunk->data_len + sizeof(struct registry_chunk);
339 new_chunk_len = old_chunk_len << 1;
340
341 /* Don't allow memory mapping to move, just expand. */
342 new_chunk = mremap_wrapper(last_chunk, old_chunk_len,
343 new_chunk_len, 0);
344 if (new_chunk != MAP_FAILED) {
345 /* Should not have moved. */
346 assert(new_chunk == last_chunk);
347 bzero((char *) last_chunk + old_chunk_len,
348 new_chunk_len - old_chunk_len);
349 last_chunk->data_len =
350 new_chunk_len - sizeof(struct registry_chunk);
351 return; /* We're done. */
352 }
0617bf4c 353
a0d529ec
MD
354 /* Remap did not succeed, we need to add a new chunk. */
355 new_chunk = mmap(NULL, new_chunk_len,
356 PROT_READ | PROT_WRITE,
357 MAP_ANONYMOUS | MAP_PRIVATE,
358 -1, 0);
359 if (new_chunk == MAP_FAILED)
360 abort();
361 bzero(new_chunk, new_chunk_len);
362 new_chunk->data_len =
363 new_chunk_len - sizeof(struct registry_chunk);
364 cds_list_add_tail(&new_chunk->node, &arena->chunk_list);
365}
fdee2e6d 366
a0d529ec
MD
367static
368struct rcu_reader *arena_alloc(struct registry_arena *arena)
369{
370 struct registry_chunk *chunk;
371 struct rcu_reader *rcu_reader_reg;
372 int expand_done = 0; /* Only allow to expand once per alloc */
373 size_t len = sizeof(struct rcu_reader);
374
375retry:
376 cds_list_for_each_entry(chunk, &arena->chunk_list, node) {
377 if (chunk->data_len - chunk->used < len)
378 continue;
379 /* Find spot */
380 for (rcu_reader_reg = (struct rcu_reader *) &chunk->data[0];
381 rcu_reader_reg < (struct rcu_reader *) &chunk->data[chunk->data_len];
382 rcu_reader_reg++) {
383 if (!rcu_reader_reg->alloc) {
384 rcu_reader_reg->alloc = 1;
385 chunk->used += len;
386 return rcu_reader_reg;
387 }
388 }
389 }
390
391 if (!expand_done) {
392 expand_arena(arena);
393 expand_done = 1;
394 goto retry;
395 }
396
397 return NULL;
fdee2e6d
MD
398}
399
400/* Called with signals off and mutex locked */
a0d529ec
MD
401static
402void add_thread(void)
fdee2e6d 403{
02be5561 404 struct rcu_reader *rcu_reader_reg;
fdee2e6d 405
a0d529ec
MD
406 rcu_reader_reg = arena_alloc(&registry_arena);
407 if (!rcu_reader_reg)
408 abort();
fdee2e6d
MD
409
410 /* Add to registry */
02be5561
MD
411 rcu_reader_reg->tid = pthread_self();
412 assert(rcu_reader_reg->ctr == 0);
16aa9ee8 413 cds_list_add(&rcu_reader_reg->node, &registry);
a0d529ec
MD
414 /*
415 * Reader threads are pointing to the reader registry. This is
416 * why its memory should never be relocated.
417 */
bd252a04 418 URCU_TLS(rcu_reader) = rcu_reader_reg;
fdee2e6d
MD
419}
420
421/* Called with signals off and mutex locked */
422static void rcu_gc_registry(void)
423{
a0d529ec 424 struct registry_chunk *chunk;
02be5561 425 struct rcu_reader *rcu_reader_reg;
fdee2e6d 426
a0d529ec
MD
427 cds_list_for_each_entry(chunk, &registry_arena.chunk_list, node) {
428 for (rcu_reader_reg = (struct rcu_reader *) &chunk->data[0];
429 rcu_reader_reg < (struct rcu_reader *) &chunk->data[chunk->data_len];
430 rcu_reader_reg++) {
431 pthread_t tid;
432 int ret;
433
434 if (!rcu_reader_reg->alloc)
435 continue;
436 tid = rcu_reader_reg->tid;
437 ret = pthread_kill(tid, 0);
438 assert(ret != EINVAL);
439 if (ret == ESRCH) {
440 cds_list_del(&rcu_reader_reg->node);
441 rcu_reader_reg->ctr = 0;
442 rcu_reader_reg->alloc = 0;
443 chunk->used -= sizeof(struct rcu_reader);
444 }
445
fdee2e6d
MD
446 }
447 }
448}
449
450/* Disable signals, take mutex, add to registry */
451void rcu_bp_register(void)
452{
453 sigset_t newmask, oldmask;
454 int ret;
455
6ed4b2e6 456 ret = sigfillset(&newmask);
fdee2e6d 457 assert(!ret);
6ed4b2e6 458 ret = pthread_sigmask(SIG_BLOCK, &newmask, &oldmask);
fdee2e6d
MD
459 assert(!ret);
460
461 /*
462 * Check if a signal concurrently registered our thread since
463 * the check in rcu_read_lock(). */
bd252a04 464 if (URCU_TLS(rcu_reader))
fdee2e6d
MD
465 goto end;
466
6abb4bd5 467 mutex_lock(&rcu_gp_lock);
fdee2e6d 468 add_thread();
6abb4bd5 469 mutex_unlock(&rcu_gp_lock);
fdee2e6d
MD
470end:
471 ret = pthread_sigmask(SIG_SETMASK, &oldmask, NULL);
472 assert(!ret);
473}
474
9380711a 475void rcu_bp_exit(void)
fdee2e6d 476{
a0d529ec
MD
477 struct registry_chunk *chunk, *tmp;
478
479 cds_list_for_each_entry_safe(chunk, tmp,
480 &registry_arena.chunk_list, node) {
481 munmap(chunk, chunk->data_len + sizeof(struct registry_chunk));
482 }
fdee2e6d 483}
4cf1675f
MD
484
485/*
486 * Holding the rcu_gp_lock across fork will make sure we fork() don't race with
487 * a concurrent thread executing with this same lock held. This ensures that the
488 * registry is in a coherent state in the child.
489 */
490void rcu_bp_before_fork(void)
491{
492 sigset_t newmask, oldmask;
493 int ret;
494
6ed4b2e6 495 ret = sigfillset(&newmask);
4cf1675f 496 assert(!ret);
6ed4b2e6 497 ret = pthread_sigmask(SIG_BLOCK, &newmask, &oldmask);
4cf1675f
MD
498 assert(!ret);
499 mutex_lock(&rcu_gp_lock);
500 saved_fork_signal_mask = oldmask;
501}
502
503void rcu_bp_after_fork_parent(void)
504{
505 sigset_t oldmask;
506 int ret;
507
508 oldmask = saved_fork_signal_mask;
509 mutex_unlock(&rcu_gp_lock);
510 ret = pthread_sigmask(SIG_SETMASK, &oldmask, NULL);
511 assert(!ret);
512}
513
514void rcu_bp_after_fork_child(void)
515{
516 sigset_t oldmask;
517 int ret;
518
519 rcu_gc_registry();
520 oldmask = saved_fork_signal_mask;
521 mutex_unlock(&rcu_gp_lock);
522 ret = pthread_sigmask(SIG_SETMASK, &oldmask, NULL);
523 assert(!ret);
524}
5e77fc1f 525
9b7981bb
MD
526void *rcu_dereference_sym_bp(void *p)
527{
528 return _rcu_dereference(p);
529}
530
5efd3cd2
MD
531void *rcu_set_pointer_sym_bp(void **p, void *v)
532{
533 cmm_wmb();
424d4ed5
MD
534 uatomic_set(p, v);
535 return v;
5efd3cd2
MD
536}
537
538void *rcu_xchg_pointer_sym_bp(void **p, void *v)
539{
540 cmm_wmb();
541 return uatomic_xchg(p, v);
542}
543
544void *rcu_cmpxchg_pointer_sym_bp(void **p, void *old, void *_new)
545{
546 cmm_wmb();
547 return uatomic_cmpxchg(p, old, _new);
548}
549
5e6b23a6 550DEFINE_RCU_FLAVOR(rcu_flavor);
541d828d 551
5e77fc1f 552#include "urcu-call-rcu-impl.h"
0376e7b2 553#include "urcu-defer-impl.h"
This page took 0.061158 seconds and 4 git commands to generate.