1 // SPDX-FileCopyrightText: 2009 Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
2 // SPDX-FileCopyrightText: 2009 Paul E. McKenney, IBM Corporation.
4 // SPDX-License-Identifier: LGPL-2.1-or-later
7 * Userspace RCU library
9 * IBM's contributions to this file may be relicensed under LGPLv2 or later.
12 #define URCU_NO_COMPAT_IDENTIFIERS
15 #define _DEFAULT_SOURCE
26 #include <urcu/config.h>
27 #include <urcu/annotate.h>
28 #include <urcu/assert.h>
29 #include <urcu/arch.h>
30 #include <urcu/wfcqueue.h>
31 #include <urcu/map/urcu.h>
32 #include <urcu/static/urcu.h>
33 #include <urcu/pointer.h>
34 #include <urcu/tls-compat.h>
37 #include "urcu-wait.h"
38 #include "urcu-utils.h"
41 /* Do not #define _LGPL_SOURCE to ensure we can emit the wrapper symbols */
43 #include <urcu/urcu.h>
47 * If a reader is really non-cooperative and refuses to commit its
48 * rcu_active_readers count to memory (there is no barrier in the reader
49 * per-se), kick it after 10 loops waiting for it.
51 #define KICK_READER_LOOPS 10
54 * Active attempts to check for reader Q.S. before calling futex().
56 #define RCU_QS_ACTIVE_ATTEMPTS 100
58 /* If the headers do not support membarrier system call, fall back on RCU_MB */
59 #ifdef __NR_membarrier
60 # define membarrier(...) syscall(__NR_membarrier, __VA_ARGS__)
62 # define membarrier(...) -ENOSYS
66 MEMBARRIER_CMD_QUERY
= 0,
67 MEMBARRIER_CMD_SHARED
= (1 << 0),
68 /* reserved for MEMBARRIER_CMD_SHARED_EXPEDITED (1 << 1) */
69 /* reserved for MEMBARRIER_CMD_PRIVATE (1 << 2) */
70 MEMBARRIER_CMD_PRIVATE_EXPEDITED
= (1 << 3),
71 MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED
= (1 << 4),
76 static int urcu_memb_has_sys_membarrier_private_expedited
;
78 #ifndef CONFIG_RCU_FORCE_SYS_MEMBARRIER
80 * Explicitly initialize to zero because we can't alias a non-static
81 * uninitialized variable.
83 int urcu_memb_has_sys_membarrier
= 0;
86 void __attribute__((constructor
)) rcu_init(void);
98 void __attribute__((constructor
)) rcu_init(void);
100 static DEFINE_URCU_TLS(int, rcu_signal_was_blocked
);
103 void __attribute__((destructor
)) rcu_exit(void);
104 static void urcu_call_rcu_exit(void);
107 * rcu_gp_lock ensures mutual exclusion between threads calling
110 static pthread_mutex_t rcu_gp_lock
= PTHREAD_MUTEX_INITIALIZER
;
112 * rcu_registry_lock ensures mutual exclusion between threads
113 * registering and unregistering themselves to/from the registry, and
114 * with threads reading that registry from synchronize_rcu(). However,
115 * this lock is not held all the way through the completion of awaiting
116 * for the grace period. It is sporadically released between iterations
118 * rcu_registry_lock may nest inside rcu_gp_lock.
120 static pthread_mutex_t rcu_registry_lock
= PTHREAD_MUTEX_INITIALIZER
;
121 struct urcu_gp rcu_gp
= { .ctr
= URCU_GP_COUNT
};
124 * Written to only by each individual reader. Read by both the reader and the
127 DEFINE_URCU_TLS(struct urcu_reader
, rcu_reader
);
129 static CDS_LIST_HEAD(registry
);
132 * Queue keeping threads awaiting to wait for a grace period. Contains
133 * struct gp_waiters_thread objects.
135 static DEFINE_URCU_WAIT_QUEUE(gp_waiters
);
137 static void mutex_lock(pthread_mutex_t
*mutex
)
141 #ifndef DISTRUST_SIGNALS_EXTREME
142 ret
= pthread_mutex_lock(mutex
);
145 #else /* #ifndef DISTRUST_SIGNALS_EXTREME */
146 while ((ret
= pthread_mutex_trylock(mutex
)) != 0) {
147 if (ret
!= EBUSY
&& ret
!= EINTR
)
149 if (CMM_LOAD_SHARED(URCU_TLS(rcu_reader
).need_mb
)) {
151 _CMM_STORE_SHARED(URCU_TLS(rcu_reader
).need_mb
, 0);
154 (void) poll(NULL
, 0, 10);
156 #endif /* #else #ifndef DISTRUST_SIGNALS_EXTREME */
159 static void mutex_unlock(pthread_mutex_t
*mutex
)
163 ret
= pthread_mutex_unlock(mutex
);
168 #ifdef RCU_MEMBARRIER
169 static void smp_mb_master(void)
171 if (caa_likely(urcu_memb_has_sys_membarrier
)) {
172 if (membarrier(urcu_memb_has_sys_membarrier_private_expedited
?
173 MEMBARRIER_CMD_PRIVATE_EXPEDITED
:
174 MEMBARRIER_CMD_SHARED
, 0))
183 static void smp_mb_master(void)
190 static void force_mb_all_readers(void)
192 struct urcu_reader
*index
;
195 * Ask for each threads to execute a cmm_smp_mb() so we can consider the
196 * compiler barriers around rcu read lock as real memory barriers.
198 if (cds_list_empty(®istry
))
201 * pthread_kill has a cmm_smp_mb(). But beware, we assume it performs
202 * a cache flush on architectures with non-coherent cache. Let's play
203 * safe and don't assume anything : we use cmm_smp_mc() to make sure the
204 * cache flush is enforced.
206 cds_list_for_each_entry(index
, ®istry
, node
) {
207 CMM_STORE_SHARED(index
->need_mb
, 1);
208 pthread_kill(index
->tid
, SIGRCU
);
211 * Wait for sighandler (and thus mb()) to execute on every thread.
213 * Note that the pthread_kill() will never be executed on systems
214 * that correctly deliver signals in a timely manner. However, it
215 * is not uncommon for kernels to have bugs that can result in
216 * lost or unduly delayed signals.
218 * If you are seeing the below pthread_kill() executing much at
219 * all, we suggest testing the underlying kernel and filing the
220 * relevant bug report. For Linux kernels, we recommend getting
221 * the Linux Test Project (LTP).
223 cds_list_for_each_entry(index
, ®istry
, node
) {
224 while (CMM_LOAD_SHARED(index
->need_mb
)) {
225 pthread_kill(index
->tid
, SIGRCU
);
226 (void) poll(NULL
, 0, 1);
229 cmm_smp_mb(); /* read ->need_mb before ending the barrier */
232 static void smp_mb_master(void)
234 force_mb_all_readers();
236 #endif /* #ifdef RCU_SIGNAL */
239 * synchronize_rcu() waiting. Single thread.
240 * Always called with rcu_registry lock held. Releases this lock and
241 * grabs it again. Holds the lock when it returns.
243 static void wait_gp(void)
246 * Read reader_gp before read futex. smp_mb_master() needs to
247 * be called with the rcu registry lock held in RCU_SIGNAL
251 /* Temporarily unlock the registry lock. */
252 mutex_unlock(&rcu_registry_lock
);
253 while (uatomic_read(&rcu_gp
.futex
) == -1) {
254 if (!futex_async(&rcu_gp
.futex
, FUTEX_WAIT
, -1, NULL
, NULL
, 0)) {
256 * Prior queued wakeups queued by unrelated code
257 * using the same address can cause futex wait to
258 * return 0 even through the futex value is still
259 * -1 (spurious wakeups). Check the value again
260 * in user-space to validate whether it really
267 /* Value already changed. */
270 /* Retry if interrupted by signal. */
271 break; /* Get out of switch. Check again. */
273 /* Unexpected error. */
279 * Re-lock the registry lock before the next loop.
281 mutex_lock(&rcu_registry_lock
);
285 * Always called with rcu_registry lock held. Releases this lock between
286 * iterations and grabs it again. Holds the lock when it returns.
288 static void wait_for_readers(struct cds_list_head
*input_readers
,
289 struct cds_list_head
*cur_snap_readers
,
290 struct cds_list_head
*qsreaders
,
291 cmm_annotate_t
*group
)
293 unsigned int wait_loops
= 0;
294 struct urcu_reader
*index
, *tmp
;
295 #ifdef HAS_INCOHERENT_CACHES
296 unsigned int wait_gp_loops
= 0;
297 #endif /* HAS_INCOHERENT_CACHES */
300 * Wait for each thread URCU_TLS(rcu_reader).ctr to either
301 * indicate quiescence (not nested), or observe the current
305 if (wait_loops
< RCU_QS_ACTIVE_ATTEMPTS
)
307 if (wait_loops
>= RCU_QS_ACTIVE_ATTEMPTS
) {
308 uatomic_dec(&rcu_gp
.futex
);
309 /* Write futex before read reader_gp */
313 cds_list_for_each_entry_safe(index
, tmp
, input_readers
, node
) {
314 switch (urcu_common_reader_state(&rcu_gp
, &index
->ctr
, group
)) {
315 case URCU_READER_ACTIVE_CURRENT
:
316 if (cur_snap_readers
) {
317 cds_list_move(&index
->node
,
322 case URCU_READER_INACTIVE
:
323 cds_list_move(&index
->node
, qsreaders
);
325 case URCU_READER_ACTIVE_OLD
:
327 * Old snapshot. Leaving node in
328 * input_readers will make us busy-loop
329 * until the snapshot becomes current or
330 * the reader becomes inactive.
336 #ifndef HAS_INCOHERENT_CACHES
337 if (cds_list_empty(input_readers
)) {
338 if (wait_loops
>= RCU_QS_ACTIVE_ATTEMPTS
) {
339 /* Read reader_gp before write futex */
341 uatomic_set(&rcu_gp
.futex
, 0);
345 if (wait_loops
>= RCU_QS_ACTIVE_ATTEMPTS
) {
346 /* wait_gp unlocks/locks registry lock. */
349 /* Temporarily unlock the registry lock. */
350 mutex_unlock(&rcu_registry_lock
);
353 * Re-lock the registry lock before the
356 mutex_lock(&rcu_registry_lock
);
359 #else /* #ifndef HAS_INCOHERENT_CACHES */
361 * BUSY-LOOP. Force the reader thread to commit its
362 * URCU_TLS(rcu_reader).ctr update to memory if we wait
365 if (cds_list_empty(input_readers
)) {
366 if (wait_loops
>= RCU_QS_ACTIVE_ATTEMPTS
) {
367 /* Read reader_gp before write futex */
369 uatomic_set(&rcu_gp
.futex
, 0);
373 if (wait_gp_loops
== KICK_READER_LOOPS
) {
377 if (wait_loops
>= RCU_QS_ACTIVE_ATTEMPTS
) {
378 /* wait_gp unlocks/locks registry lock. */
382 /* Temporarily unlock the registry lock. */
383 mutex_unlock(&rcu_registry_lock
);
386 * Re-lock the registry lock before the
389 mutex_lock(&rcu_registry_lock
);
392 #endif /* #else #ifndef HAS_INCOHERENT_CACHES */
396 void synchronize_rcu(void)
398 cmm_annotate_define(acquire_group
);
399 cmm_annotate_define(release_group
);
400 CDS_LIST_HEAD(cur_snap_readers
);
401 CDS_LIST_HEAD(qsreaders
);
402 DEFINE_URCU_WAIT_NODE(wait
, URCU_WAIT_WAITING
);
403 struct urcu_waiters waiters
;
406 * Add ourself to gp_waiters queue of threads awaiting to wait
407 * for a grace period. Proceed to perform the grace period only
408 * if we are the first thread added into the queue.
409 * The implicit memory barrier before urcu_wait_add()
410 * orders prior memory accesses of threads put into the wait
411 * queue before their insertion into the wait queue.
413 if (urcu_wait_add(&gp_waiters
, &wait
) != 0) {
415 * Not first in queue: will be awakened by another thread.
416 * Implies a memory barrier after grace period.
418 urcu_adaptative_busy_wait(&wait
);
421 /* We won't need to wake ourself up */
422 urcu_wait_set_state(&wait
, URCU_WAIT_RUNNING
);
424 mutex_lock(&rcu_gp_lock
);
427 * Move all waiters into our local queue.
429 urcu_move_waiters(&waiters
, &gp_waiters
);
431 mutex_lock(&rcu_registry_lock
);
433 if (cds_list_empty(®istry
))
437 * All threads should read qparity before accessing data structure
438 * where new ptr points to. Must be done within rcu_registry_lock
439 * because it iterates on reader threads.
441 /* Write new ptr before changing the qparity */
443 cmm_annotate_group_mb_release(&release_group
);
446 * Wait for readers to observe original parity or be quiescent.
447 * wait_for_readers() can release and grab again rcu_registry_lock
450 wait_for_readers(®istry
, &cur_snap_readers
, &qsreaders
, &acquire_group
);
453 * Must finish waiting for quiescent state for original parity before
454 * committing next rcu_gp.ctr update to memory. Failure to do so could
455 * result in the writer waiting forever while new readers are always
456 * accessing data (no progress). Enforce compiler-order of load
457 * URCU_TLS(rcu_reader).ctr before store to rcu_gp.ctr.
462 * Adding a cmm_smp_mb() which is _not_ formally required, but makes the
463 * model easier to understand. It does not have a big performance impact
464 * anyway, given this is the write-side.
468 /* Switch parity: 0 -> 1, 1 -> 0 */
469 cmm_annotate_group_mem_release(&release_group
, &rcu_gp
.ctr
);
470 uatomic_store(&rcu_gp
.ctr
, rcu_gp
.ctr
^ URCU_GP_CTR_PHASE
, CMM_RELAXED
);
473 * Must commit rcu_gp.ctr update to memory before waiting for quiescent
474 * state. Failure to do so could result in the writer waiting forever
475 * while new readers are always accessing data (no progress). Enforce
476 * compiler-order of store to rcu_gp.ctr before load rcu_reader ctr.
482 * Adding a cmm_smp_mb() which is _not_ formally required, but makes the
483 * model easier to understand. It does not have a big performance impact
484 * anyway, given this is the write-side.
489 * Wait for readers to observe new parity or be quiescent.
490 * wait_for_readers() can release and grab again rcu_registry_lock
493 wait_for_readers(&cur_snap_readers
, NULL
, &qsreaders
, &acquire_group
);
496 * Put quiescent reader list back into registry.
498 cds_list_splice(&qsreaders
, ®istry
);
501 * Finish waiting for reader threads before letting the old ptr
502 * being freed. Must be done within rcu_registry_lock because it
503 * iterates on reader threads.
506 cmm_annotate_group_mb_acquire(&acquire_group
);
508 mutex_unlock(&rcu_registry_lock
);
509 mutex_unlock(&rcu_gp_lock
);
512 * Wakeup waiters only after we have completed the grace period
513 * and have ensured the memory barriers at the end of the grace
514 * period have been issued.
516 urcu_wake_all_waiters(&waiters
);
520 * library wrappers to be used by non-LGPL compatible source code.
523 void rcu_read_lock(void)
528 void rcu_read_unlock(void)
533 int rcu_read_ongoing(void)
535 return _rcu_read_ongoing();
540 * Make sure the signal used by the urcu-signal flavor is unblocked
541 * while the thread is registered.
544 void urcu_signal_unblock(void)
546 sigset_t mask
, oldmask
;
549 ret
= sigemptyset(&mask
);
550 urcu_posix_assert(!ret
);
551 ret
= sigaddset(&mask
, SIGRCU
);
552 urcu_posix_assert(!ret
);
553 ret
= pthread_sigmask(SIG_UNBLOCK
, &mask
, &oldmask
);
554 urcu_posix_assert(!ret
);
555 URCU_TLS(rcu_signal_was_blocked
) = sigismember(&oldmask
, SIGRCU
);
559 void urcu_signal_restore(void)
564 if (!URCU_TLS(rcu_signal_was_blocked
))
566 ret
= sigemptyset(&mask
);
567 urcu_posix_assert(!ret
);
568 ret
= sigaddset(&mask
, SIGRCU
);
569 urcu_posix_assert(!ret
);
570 ret
= pthread_sigmask(SIG_BLOCK
, &mask
, NULL
);
571 urcu_posix_assert(!ret
);
575 void urcu_signal_unblock(void) { }
577 void urcu_signal_restore(void) { }
580 void rcu_register_thread(void)
582 urcu_signal_unblock();
584 URCU_TLS(rcu_reader
).tid
= pthread_self();
585 urcu_posix_assert(URCU_TLS(rcu_reader
).need_mb
== 0);
586 urcu_posix_assert(!(URCU_TLS(rcu_reader
).ctr
& URCU_GP_CTR_NEST_MASK
));
588 mutex_lock(&rcu_registry_lock
);
589 urcu_posix_assert(!URCU_TLS(rcu_reader
).registered
);
590 URCU_TLS(rcu_reader
).registered
= 1;
591 rcu_init(); /* In case gcc does not support constructor attribute */
592 cds_list_add(&URCU_TLS(rcu_reader
).node
, ®istry
);
593 mutex_unlock(&rcu_registry_lock
);
596 void rcu_unregister_thread(void)
598 mutex_lock(&rcu_registry_lock
);
599 urcu_posix_assert(URCU_TLS(rcu_reader
).registered
);
600 URCU_TLS(rcu_reader
).registered
= 0;
601 cds_list_del(&URCU_TLS(rcu_reader
).node
);
602 mutex_unlock(&rcu_registry_lock
);
604 urcu_signal_restore();
607 #ifdef RCU_MEMBARRIER
609 #ifdef CONFIG_RCU_FORCE_SYS_MEMBARRIER
611 void rcu_sys_membarrier_status(bool available
)
618 void rcu_sys_membarrier_status(bool available
)
622 urcu_memb_has_sys_membarrier
= 1;
627 void rcu_sys_membarrier_init(void)
629 bool available
= false;
632 mask
= membarrier(MEMBARRIER_CMD_QUERY
, 0);
634 if (mask
& MEMBARRIER_CMD_PRIVATE_EXPEDITED
) {
635 if (membarrier(MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED
, 0))
637 urcu_memb_has_sys_membarrier_private_expedited
= 1;
639 } else if (mask
& MEMBARRIER_CMD_SHARED
) {
643 rcu_sys_membarrier_status(available
);
651 rcu_sys_membarrier_init();
656 static void sigrcu_handler(int signo
__attribute__((unused
)),
657 siginfo_t
*siginfo
__attribute__((unused
)),
658 void *context
__attribute__((unused
)))
661 * Executing this cmm_smp_mb() is the only purpose of this signal handler.
662 * It punctually promotes cmm_barrier() into cmm_smp_mb() on every thread it is
666 _CMM_STORE_SHARED(URCU_TLS(rcu_reader
).need_mb
, 0);
671 * rcu_init constructor. Called when the library is linked, but also when
672 * reader threads are calling rcu_register_thread().
673 * Should only be called by a single thread at a given time. This is ensured by
674 * holing the rcu_registry_lock from rcu_register_thread() or by running
675 * at library load time, which should not be executed by multiple
676 * threads nor concurrently with rcu_register_thread() anyway.
680 struct sigaction act
;
687 act
.sa_sigaction
= sigrcu_handler
;
688 act
.sa_flags
= SA_SIGINFO
| SA_RESTART
;
689 sigemptyset(&act
.sa_mask
);
690 ret
= sigaction(SIGRCU
, &act
, NULL
);
696 * Don't unregister the SIGRCU signal handler anymore, because
697 * call_rcu threads could still be using it shortly before the
699 * Assertion disabled because call_rcu threads are now rcu
700 * readers, and left running at exit.
701 * urcu_posix_assert(cds_list_empty(®istry));
704 #endif /* #ifdef RCU_SIGNAL */
708 urcu_call_rcu_exit();
711 DEFINE_RCU_FLAVOR(rcu_flavor
);
713 #include "urcu-call-rcu-impl.h"
714 #include "urcu-defer-impl.h"
715 #include "urcu-poll-impl.h"