urcu-call-rcu-impl.h

   1 /*
   2  * urcu-call-rcu.c
   3  *
   4  * Userspace RCU library - batch memory reclamation with kernel API
   5  *
   6  * Copyright (c) 2010 Paul E. McKenney <paulmck@linux.vnet.ibm.com>
   7  *
   8  * This library is free software; you can redistribute it and/or
   9  * modify it under the terms of the GNU Lesser General Public
  10  * License as published by the Free Software Foundation; either
  11  * version 2.1 of the License, or (at your option) any later version.
  12  *
  13  * This library is distributed in the hope that it will be useful,
  14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  16  * Lesser General Public License for more details.
  17  *
  18  * You should have received a copy of the GNU Lesser General Public
  19  * License along with this library; if not, write to the Free Software
  20  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  21  */
  22
  23 #define _GNU_SOURCE
  24 #include <stdio.h>
  25 #include <pthread.h>
  26 #include <signal.h>
  27 #include <assert.h>
  28 #include <stdlib.h>
  29 #include <stdint.h>
  30 #include <string.h>
  31 #include <errno.h>
  32 #include <poll.h>
  33 #include <sys/time.h>
  34 #include <unistd.h>
  35 #include <sched.h>
  36
  37 #include "config.h"
  38 #include "urcu/wfqueue.h"
  39 #include "urcu-call-rcu.h"
  40 #include "urcu-pointer.h"
  41 #include "urcu/list.h"
  42 #include "urcu/futex.h"
  43 #include "urcu/tls-compat.h"
  44
  45 /* Data structure that identifies a call_rcu thread. */
  46
  47 struct call_rcu_data {
  48         struct cds_wfq_queue cbs;
  49         unsigned long flags;
  50         int32_t futex;
  51         unsigned long qlen; /* maintained for debugging. */
  52         pthread_t tid;
  53         int cpu_affinity;
  54         struct cds_list_head list;
  55 } __attribute__((aligned(CAA_CACHE_LINE_SIZE)));
  56
  57 /*
  58  * List of all call_rcu_data structures to keep valgrind happy.
  59  * Protected by call_rcu_mutex.
  60  */
  61
  62 CDS_LIST_HEAD(call_rcu_data_list);
  63
  64 /* Link a thread using call_rcu() to its call_rcu thread. */
  65
  66 static DEFINE_URCU_TLS(struct call_rcu_data *, thread_call_rcu_data);
  67
  68 /* Guard call_rcu thread creation. */
  69
  70 static pthread_mutex_t call_rcu_mutex = PTHREAD_MUTEX_INITIALIZER;
  71
  72 /* If a given thread does not have its own call_rcu thread, this is default. */
  73
  74 static struct call_rcu_data *default_call_rcu_data;
  75
  76 /*
  77  * If the sched_getcpu() and sysconf(_SC_NPROCESSORS_CONF) calls are
  78  * available, then we can have call_rcu threads assigned to individual
  79  * CPUs rather than only to specific threads.
  80  */
  81
  82 #if defined(HAVE_SCHED_GETCPU) && defined(HAVE_SYSCONF)
  83
  84 /*
  85  * Pointer to array of pointers to per-CPU call_rcu_data structures
  86  * and # CPUs. per_cpu_call_rcu_data is a RCU-protected pointer to an
  87  * array of RCU-protected pointers to call_rcu_data. call_rcu acts as a
  88  * RCU read-side and reads per_cpu_call_rcu_data and the per-cpu pointer
  89  * without mutex. The call_rcu_mutex protects updates.
  90  */
  91
  92 static struct call_rcu_data **per_cpu_call_rcu_data;
  93 static long maxcpus;
  94
  95 static void maxcpus_reset(void)
  96 {
  97         maxcpus = 0;
  98 }
  99
 100 /* Allocate the array if it has not already been allocated. */
 101
 102 static void alloc_cpu_call_rcu_data(void)
 103 {
 104         struct call_rcu_data **p;
 105         static int warned = 0;
 106
 107         if (maxcpus != 0)
 108                 return;
 109         maxcpus = sysconf(_SC_NPROCESSORS_CONF);
 110         if (maxcpus <= 0) {
 111                 return;
 112         }
 113         p = malloc(maxcpus * sizeof(*per_cpu_call_rcu_data));
 114         if (p != NULL) {
 115                 memset(p, '\0', maxcpus * sizeof(*per_cpu_call_rcu_data));
 116                 rcu_set_pointer(&per_cpu_call_rcu_data, p);
 117         } else {
 118                 if (!warned) {
 119                         fprintf(stderr, "[error] liburcu: unable to allocate per-CPU pointer array\n");
 120                 }
 121                 warned = 1;
 122         }
 123 }
 124
 125 #else /* #if defined(HAVE_SCHED_GETCPU) && defined(HAVE_SYSCONF) */
 126
 127 /*
 128  * per_cpu_call_rcu_data should be constant, but some functions below, used both
 129  * for cases where cpu number is available and not available, assume it it not
 130  * constant.
 131  */
 132 static struct call_rcu_data **per_cpu_call_rcu_data = NULL;
 133 static const long maxcpus = -1;
 134
 135 static void maxcpus_reset(void)
 136 {
 137 }
 138
 139 static void alloc_cpu_call_rcu_data(void)
 140 {
 141 }
 142
 143 static int sched_getcpu(void)
 144 {
 145         return -1;
 146 }
 147
 148 #endif /* #else #if defined(HAVE_SCHED_GETCPU) && defined(HAVE_SYSCONF) */
 149
 150 /* Acquire the specified pthread mutex. */
 151
 152 static void call_rcu_lock(pthread_mutex_t *pmp)
 153 {
 154         if (pthread_mutex_lock(pmp) != 0) {
 155                 perror("pthread_mutex_lock");
 156                 exit(-1);
 157         }
 158 }
 159
 160 /* Release the specified pthread mutex. */
 161
 162 static void call_rcu_unlock(pthread_mutex_t *pmp)
 163 {
 164         if (pthread_mutex_unlock(pmp) != 0) {
 165                 perror("pthread_mutex_unlock");
 166                 exit(-1);
 167         }
 168 }
 169
 170 #if HAVE_SCHED_SETAFFINITY
 171 static
 172 int set_thread_cpu_affinity(struct call_rcu_data *crdp)
 173 {
 174         cpu_set_t mask;
 175
 176         if (crdp->cpu_affinity < 0)
 177                 return 0;
 178
 179         CPU_ZERO(&mask);
 180         CPU_SET(crdp->cpu_affinity, &mask);
 181 #if SCHED_SETAFFINITY_ARGS == 2
 182         return sched_setaffinity(0, &mask);
 183 #else
 184         return sched_setaffinity(0, sizeof(mask), &mask);
 185 #endif
 186 }
 187 #else
 188 static
 189 int set_thread_cpu_affinity(struct call_rcu_data *crdp)
 190 {
 191         return 0;
 192 }
 193 #endif
 194
 195 static void call_rcu_wait(struct call_rcu_data *crdp)
 196 {
 197         /* Read call_rcu list before read futex */
 198         cmm_smp_mb();
 199         if (uatomic_read(&crdp->futex) == -1)
 200                 futex_async(&crdp->futex, FUTEX_WAIT, -1,
 201                       NULL, NULL, 0);
 202 }
 203
 204 static void call_rcu_wake_up(struct call_rcu_data *crdp)
 205 {
 206         /* Write to call_rcu list before reading/writing futex */
 207         cmm_smp_mb();
 208         if (caa_unlikely(uatomic_read(&crdp->futex) == -1)) {
 209                 uatomic_set(&crdp->futex, 0);
 210                 futex_async(&crdp->futex, FUTEX_WAKE, 1,
 211                       NULL, NULL, 0);
 212         }
 213 }
 214
 215 /* This is the code run by each call_rcu thread. */
 216
 217 static void *call_rcu_thread(void *arg)
 218 {
 219         unsigned long cbcount;
 220         struct cds_wfq_node *cbs;
 221         struct cds_wfq_node **cbs_tail;
 222         struct call_rcu_data *crdp = (struct call_rcu_data *)arg;
 223         struct rcu_head *rhp;
 224         int rt = !!(uatomic_read(&crdp->flags) & URCU_CALL_RCU_RT);
 225
 226         if (set_thread_cpu_affinity(crdp) != 0) {
 227                 perror("pthread_setaffinity_np");
 228                 exit(-1);
 229         }
 230
 231         /*
 232          * If callbacks take a read-side lock, we need to be registered.
 233          */
 234         rcu_register_thread();
 235
 236         URCU_TLS(thread_call_rcu_data) = crdp;
 237         if (!rt) {
 238                 uatomic_dec(&crdp->futex);
 239                 /* Decrement futex before reading call_rcu list */
 240                 cmm_smp_mb();
 241         }
 242         for (;;) {
 243                 if (&crdp->cbs.head != _CMM_LOAD_SHARED(crdp->cbs.tail)) {
 244                         while ((cbs = _CMM_LOAD_SHARED(crdp->cbs.head)) == NULL)
 245                                 poll(NULL, 0, 1);
 246                         _CMM_STORE_SHARED(crdp->cbs.head, NULL);
 247                         cbs_tail = (struct cds_wfq_node **)
 248                                 uatomic_xchg(&crdp->cbs.tail, &crdp->cbs.head);
 249                         synchronize_rcu();
 250                         cbcount = 0;
 251                         do {
 252                                 while (cbs->next == NULL &&
 253                                        &cbs->next != cbs_tail)
 254                                         poll(NULL, 0, 1);
 255                                 if (cbs == &crdp->cbs.dummy) {
 256                                         cbs = cbs->next;
 257                                         continue;
 258                                 }
 259                                 rhp = (struct rcu_head *)cbs;
 260                                 cbs = cbs->next;
 261                                 rhp->func(rhp);
 262                                 cbcount++;
 263                         } while (cbs != NULL);
 264                         uatomic_sub(&crdp->qlen, cbcount);
 265                 }
 266                 if (uatomic_read(&crdp->flags) & URCU_CALL_RCU_STOP)
 267                         break;
 268                 rcu_thread_offline();
 269                 if (!rt) {
 270                         if (&crdp->cbs.head
 271                             == _CMM_LOAD_SHARED(crdp->cbs.tail)) {
 272                                 call_rcu_wait(crdp);
 273                                 poll(NULL, 0, 10);
 274                                 uatomic_dec(&crdp->futex);
 275                                 /*
 276                                  * Decrement futex before reading
 277                                  * call_rcu list.
 278                                  */
 279                                 cmm_smp_mb();
 280                         } else {
 281                                 poll(NULL, 0, 10);
 282                         }
 283                 } else {
 284                         poll(NULL, 0, 10);
 285                 }
 286                 rcu_thread_online();
 287         }
 288         if (!rt) {
 289                 /*
 290                  * Read call_rcu list before write futex.
 291                  */
 292                 cmm_smp_mb();
 293                 uatomic_set(&crdp->futex, 0);
 294         }
 295         uatomic_or(&crdp->flags, URCU_CALL_RCU_STOPPED);
 296         rcu_unregister_thread();
 297         return NULL;
 298 }
 299
 300 /*
 301  * Create both a call_rcu thread and the corresponding call_rcu_data
 302  * structure, linking the structure in as specified.  Caller must hold
 303  * call_rcu_mutex.
 304  */
 305
 306 static void call_rcu_data_init(struct call_rcu_data **crdpp,
 307                                unsigned long flags,
 308                                int cpu_affinity)
 309 {
 310         struct call_rcu_data *crdp;
 311
 312         crdp = malloc(sizeof(*crdp));
 313         if (crdp == NULL) {
 314                 fprintf(stderr, "Out of memory.\n");
 315                 exit(-1);
 316         }
 317         memset(crdp, '\0', sizeof(*crdp));
 318         cds_wfq_init(&crdp->cbs);
 319         crdp->qlen = 0;
 320         crdp->futex = 0;
 321         crdp->flags = flags;
 322         cds_list_add(&crdp->list, &call_rcu_data_list);
 323         crdp->cpu_affinity = cpu_affinity;
 324         cmm_smp_mb();  /* Structure initialized before pointer is planted. */
 325         *crdpp = crdp;
 326         if (pthread_create(&crdp->tid, NULL, call_rcu_thread, crdp) != 0) {
 327                 perror("pthread_create");
 328                 exit(-1);
 329         }
 330 }
 331
 332 /*
 333  * Return a pointer to the call_rcu_data structure for the specified
 334  * CPU, returning NULL if there is none.  We cannot automatically
 335  * created it because the platform we are running on might not define
 336  * sched_getcpu().
 337  *
 338  * The call to this function and use of the returned call_rcu_data
 339  * should be protected by RCU read-side lock.
 340  */
 341
 342 struct call_rcu_data *get_cpu_call_rcu_data(int cpu)
 343 {
 344         static int warned = 0;
 345         struct call_rcu_data **pcpu_crdp;
 346
 347         pcpu_crdp = rcu_dereference(per_cpu_call_rcu_data);
 348         if (pcpu_crdp == NULL)
 349                 return NULL;
 350         if (!warned && maxcpus > 0 && (cpu < 0 || maxcpus <= cpu)) {
 351                 fprintf(stderr, "[error] liburcu: get CPU # out of range\n");
 352                 warned = 1;
 353         }
 354         if (cpu < 0 || maxcpus <= cpu)
 355                 return NULL;
 356         return rcu_dereference(pcpu_crdp[cpu]);
 357 }
 358
 359 /*
 360  * Return the tid corresponding to the call_rcu thread whose
 361  * call_rcu_data structure is specified.
 362  */
 363
 364 pthread_t get_call_rcu_thread(struct call_rcu_data *crdp)
 365 {
 366         return crdp->tid;
 367 }
 368
 369 /*
 370  * Create a call_rcu_data structure (with thread) and return a pointer.
 371  */
 372
 373 static struct call_rcu_data *__create_call_rcu_data(unsigned long flags,
 374                                                     int cpu_affinity)
 375 {
 376         struct call_rcu_data *crdp;
 377
 378         call_rcu_data_init(&crdp, flags, cpu_affinity);
 379         return crdp;
 380 }
 381
 382 struct call_rcu_data *create_call_rcu_data(unsigned long flags,
 383                                            int cpu_affinity)
 384 {
 385         struct call_rcu_data *crdp;
 386
 387         call_rcu_lock(&call_rcu_mutex);
 388         crdp = __create_call_rcu_data(flags, cpu_affinity);
 389         call_rcu_unlock(&call_rcu_mutex);
 390         return crdp;
 391 }
 392
 393 /*
 394  * Set the specified CPU to use the specified call_rcu_data structure.
 395  *
 396  * Use NULL to remove a CPU's call_rcu_data structure, but it is
 397  * the caller's responsibility to dispose of the removed structure.
 398  * Use get_cpu_call_rcu_data() to obtain a pointer to the old structure
 399  * (prior to NULLing it out, of course).
 400  *
 401  * The caller must wait for a grace-period to pass between return from
 402  * set_cpu_call_rcu_data() and call to call_rcu_data_free() passing the
 403  * previous call rcu data as argument.
 404  */
 405
 406 int set_cpu_call_rcu_data(int cpu, struct call_rcu_data *crdp)
 407 {
 408         static int warned = 0;
 409
 410         call_rcu_lock(&call_rcu_mutex);
 411         alloc_cpu_call_rcu_data();
 412         if (cpu < 0 || maxcpus <= cpu) {
 413                 if (!warned) {
 414                         fprintf(stderr, "[error] liburcu: set CPU # out of range\n");
 415                         warned = 1;
 416                 }
 417                 call_rcu_unlock(&call_rcu_mutex);
 418                 errno = EINVAL;
 419                 return -EINVAL;
 420         }
 421
 422         if (per_cpu_call_rcu_data == NULL) {
 423                 call_rcu_unlock(&call_rcu_mutex);
 424                 errno = ENOMEM;
 425                 return -ENOMEM;
 426         }
 427
 428         if (per_cpu_call_rcu_data[cpu] != NULL && crdp != NULL) {
 429                 call_rcu_unlock(&call_rcu_mutex);
 430                 errno = EEXIST;
 431                 return -EEXIST;
 432         }
 433
 434         rcu_set_pointer(&per_cpu_call_rcu_data[cpu], crdp);
 435         call_rcu_unlock(&call_rcu_mutex);
 436         return 0;
 437 }
 438
 439 /*
 440  * Return a pointer to the default call_rcu_data structure, creating
 441  * one if need be.  Because we never free call_rcu_data structures,
 442  * we don't need to be in an RCU read-side critical section.
 443  */
 444
 445 struct call_rcu_data *get_default_call_rcu_data(void)
 446 {
 447         if (default_call_rcu_data != NULL)
 448                 return rcu_dereference(default_call_rcu_data);
 449         call_rcu_lock(&call_rcu_mutex);
 450         if (default_call_rcu_data != NULL) {
 451                 call_rcu_unlock(&call_rcu_mutex);
 452                 return default_call_rcu_data;
 453         }
 454         call_rcu_data_init(&default_call_rcu_data, 0, -1);
 455         call_rcu_unlock(&call_rcu_mutex);
 456         return default_call_rcu_data;
 457 }
 458
 459 /*
 460  * Return the call_rcu_data structure that applies to the currently
 461  * running thread.  Any call_rcu_data structure assigned specifically
 462  * to this thread has first priority, followed by any call_rcu_data
 463  * structure assigned to the CPU on which the thread is running,
 464  * followed by the default call_rcu_data structure.  If there is not
 465  * yet a default call_rcu_data structure, one will be created.
 466  *
 467  * Calls to this function and use of the returned call_rcu_data should
 468  * be protected by RCU read-side lock.
 469  */
 470 struct call_rcu_data *get_call_rcu_data(void)
 471 {
 472         struct call_rcu_data *crd;
 473
 474         if (URCU_TLS(thread_call_rcu_data) != NULL)
 475                 return URCU_TLS(thread_call_rcu_data);
 476
 477         if (maxcpus > 0) {
 478                 crd = get_cpu_call_rcu_data(sched_getcpu());
 479                 if (crd)
 480                         return crd;
 481         }
 482
 483         return get_default_call_rcu_data();
 484 }
 485
 486 /*
 487  * Return a pointer to this task's call_rcu_data if there is one.
 488  */
 489
 490 struct call_rcu_data *get_thread_call_rcu_data(void)
 491 {
 492         return URCU_TLS(thread_call_rcu_data);
 493 }
 494
 495 /*
 496  * Set this task's call_rcu_data structure as specified, regardless
 497  * of whether or not this task already had one.  (This allows switching
 498  * to and from real-time call_rcu threads, for example.)
 499  *
 500  * Use NULL to remove a thread's call_rcu_data structure, but it is
 501  * the caller's responsibility to dispose of the removed structure.
 502  * Use get_thread_call_rcu_data() to obtain a pointer to the old structure
 503  * (prior to NULLing it out, of course).
 504  */
 505
 506 void set_thread_call_rcu_data(struct call_rcu_data *crdp)
 507 {
 508         URCU_TLS(thread_call_rcu_data) = crdp;
 509 }
 510
 511 /*
 512  * Create a separate call_rcu thread for each CPU.  This does not
 513  * replace a pre-existing call_rcu thread -- use the set_cpu_call_rcu_data()
 514  * function if you want that behavior. Should be paired with
 515  * free_all_cpu_call_rcu_data() to teardown these call_rcu worker
 516  * threads.
 517  */
 518
 519 int create_all_cpu_call_rcu_data(unsigned long flags)
 520 {
 521         int i;
 522         struct call_rcu_data *crdp;
 523         int ret;
 524
 525         call_rcu_lock(&call_rcu_mutex);
 526         alloc_cpu_call_rcu_data();
 527         call_rcu_unlock(&call_rcu_mutex);
 528         if (maxcpus <= 0) {
 529                 errno = EINVAL;
 530                 return -EINVAL;
 531         }
 532         if (per_cpu_call_rcu_data == NULL) {
 533                 errno = ENOMEM;
 534                 return -ENOMEM;
 535         }
 536         for (i = 0; i < maxcpus; i++) {
 537                 call_rcu_lock(&call_rcu_mutex);
 538                 if (get_cpu_call_rcu_data(i)) {
 539                         call_rcu_unlock(&call_rcu_mutex);
 540                         continue;
 541                 }
 542                 crdp = __create_call_rcu_data(flags, i);
 543                 if (crdp == NULL) {
 544                         call_rcu_unlock(&call_rcu_mutex);
 545                         errno = ENOMEM;
 546                         return -ENOMEM;
 547                 }
 548                 call_rcu_unlock(&call_rcu_mutex);
 549                 if ((ret = set_cpu_call_rcu_data(i, crdp)) != 0) {
 550                         call_rcu_data_free(crdp);
 551
 552                         /* it has been created by other thread */
 553                         if (ret == -EEXIST)
 554                                 continue;
 555
 556                         return ret;
 557                 }
 558         }
 559         return 0;
 560 }
 561
 562 /*
 563  * Wake up the call_rcu thread corresponding to the specified
 564  * call_rcu_data structure.
 565  */
 566 static void wake_call_rcu_thread(struct call_rcu_data *crdp)
 567 {
 568         if (!(_CMM_LOAD_SHARED(crdp->flags) & URCU_CALL_RCU_RT))
 569                 call_rcu_wake_up(crdp);
 570 }
 571
 572 /*
 573  * Schedule a function to be invoked after a following grace period.
 574  * This is the only function that must be called -- the others are
 575  * only present to allow applications to tune their use of RCU for
 576  * maximum performance.
 577  *
 578  * Note that unless a call_rcu thread has not already been created,
 579  * the first invocation of call_rcu() will create one.  So, if you
 580  * need the first invocation of call_rcu() to be fast, make sure
 581  * to create a call_rcu thread first.  One way to accomplish this is
 582  * "get_call_rcu_data();", and another is create_all_cpu_call_rcu_data().
 583  *
 584  * call_rcu must be called by registered RCU read-side threads.
 585  */
 586
 587 void call_rcu(struct rcu_head *head,
 588               void (*func)(struct rcu_head *head))
 589 {
 590         struct call_rcu_data *crdp;
 591
 592         cds_wfq_node_init(&head->next);
 593         head->func = func;
 594         /* Holding rcu read-side lock across use of per-cpu crdp */
 595         rcu_read_lock();
 596         crdp = get_call_rcu_data();
 597         cds_wfq_enqueue(&crdp->cbs, &head->next);
 598         uatomic_inc(&crdp->qlen);
 599         wake_call_rcu_thread(crdp);
 600         rcu_read_unlock();
 601 }
 602
 603 /*
 604  * Free up the specified call_rcu_data structure, terminating the
 605  * associated call_rcu thread.  The caller must have previously
 606  * removed the call_rcu_data structure from per-thread or per-CPU
 607  * usage.  For example, set_cpu_call_rcu_data(cpu, NULL) for per-CPU
 608  * call_rcu_data structures or set_thread_call_rcu_data(NULL) for
 609  * per-thread call_rcu_data structures.
 610  *
 611  * We silently refuse to free up the default call_rcu_data structure
 612  * because that is where we put any leftover callbacks.  Note that
 613  * the possibility of self-spawning callbacks makes it impossible
 614  * to execute all the callbacks in finite time without putting any
 615  * newly spawned callbacks somewhere else.  The "somewhere else" of
 616  * last resort is the default call_rcu_data structure.
 617  *
 618  * We also silently refuse to free NULL pointers.  This simplifies
 619  * the calling code.
 620  *
 621  * The caller must wait for a grace-period to pass between return from
 622  * set_cpu_call_rcu_data() and call to call_rcu_data_free() passing the
 623  * previous call rcu data as argument.
 624  */
 625 void call_rcu_data_free(struct call_rcu_data *crdp)
 626 {
 627         struct cds_wfq_node *cbs;
 628         struct cds_wfq_node **cbs_tail;
 629         struct cds_wfq_node **cbs_endprev;
 630
 631         if (crdp == NULL || crdp == default_call_rcu_data) {
 632                 return;
 633         }
 634         if ((uatomic_read(&crdp->flags) & URCU_CALL_RCU_STOPPED) == 0) {
 635                 uatomic_or(&crdp->flags, URCU_CALL_RCU_STOP);
 636                 wake_call_rcu_thread(crdp);
 637                 while ((uatomic_read(&crdp->flags) & URCU_CALL_RCU_STOPPED) == 0)
 638                         poll(NULL, 0, 1);
 639         }
 640         if (&crdp->cbs.head != _CMM_LOAD_SHARED(crdp->cbs.tail)) {
 641                 while ((cbs = _CMM_LOAD_SHARED(crdp->cbs.head)) == NULL)
 642                         poll(NULL, 0, 1);
 643                 _CMM_STORE_SHARED(crdp->cbs.head, NULL);
 644                 cbs_tail = (struct cds_wfq_node **)
 645                         uatomic_xchg(&crdp->cbs.tail, &crdp->cbs.head);
 646                 /* Create default call rcu data if need be */
 647                 (void) get_default_call_rcu_data();
 648                 cbs_endprev = (struct cds_wfq_node **)
 649                         uatomic_xchg(&default_call_rcu_data, cbs_tail);
 650                 *cbs_endprev = cbs;
 651                 uatomic_add(&default_call_rcu_data->qlen,
 652                             uatomic_read(&crdp->qlen));
 653                 wake_call_rcu_thread(default_call_rcu_data);
 654         }
 655
 656         call_rcu_lock(&call_rcu_mutex);
 657         cds_list_del(&crdp->list);
 658         call_rcu_unlock(&call_rcu_mutex);
 659
 660         free(crdp);
 661 }
 662
 663 /*
 664  * Clean up all the per-CPU call_rcu threads.
 665  */
 666 void free_all_cpu_call_rcu_data(void)
 667 {
 668         int cpu;
 669         struct call_rcu_data **crdp;
 670         static int warned = 0;
 671
 672         if (maxcpus <= 0)
 673                 return;
 674
 675         crdp = malloc(sizeof(*crdp) * maxcpus);
 676         if (!crdp) {
 677                 if (!warned) {
 678                         fprintf(stderr, "[error] liburcu: unable to allocate per-CPU pointer array\n");
 679                 }
 680                 warned = 1;
 681                 return;
 682         }
 683
 684         for (cpu = 0; cpu < maxcpus; cpu++) {
 685                 crdp[cpu] = get_cpu_call_rcu_data(cpu);
 686                 if (crdp[cpu] == NULL)
 687                         continue;
 688                 set_cpu_call_rcu_data(cpu, NULL);
 689         }
 690         /*
 691          * Wait for call_rcu sites acting as RCU readers of the
 692          * call_rcu_data to become quiescent.
 693          */
 694         synchronize_rcu();
 695         for (cpu = 0; cpu < maxcpus; cpu++) {
 696                 if (crdp[cpu] == NULL)
 697                         continue;
 698                 call_rcu_data_free(crdp[cpu]);
 699         }
 700         free(crdp);
 701 }
 702
 703 /*
 704  * Acquire the call_rcu_mutex in order to ensure that the child sees
 705  * all of the call_rcu() data structures in a consistent state.
 706  * Suitable for pthread_atfork() and friends.
 707  */
 708 void call_rcu_before_fork(void)
 709 {
 710         call_rcu_lock(&call_rcu_mutex);
 711 }
 712
 713 /*
 714  * Clean up call_rcu data structures in the parent of a successful fork()
 715  * that is not followed by exec() in the child.  Suitable for
 716  * pthread_atfork() and friends.
 717  */
 718 void call_rcu_after_fork_parent(void)
 719 {
 720         call_rcu_unlock(&call_rcu_mutex);
 721 }
 722
 723 /*
 724  * Clean up call_rcu data structures in the child of a successful fork()
 725  * that is not followed by exec().  Suitable for pthread_atfork() and
 726  * friends.
 727  */
 728 void call_rcu_after_fork_child(void)
 729 {
 730         struct call_rcu_data *crdp, *next;
 731
 732         /* Release the mutex. */
 733         call_rcu_unlock(&call_rcu_mutex);
 734
 735         /* Do nothing when call_rcu() has not been used */
 736         if (cds_list_empty(&call_rcu_data_list))
 737                 return;
 738
 739         /*
 740          * Allocate a new default call_rcu_data structure in order
 741          * to get a working call_rcu thread to go with it.
 742          */
 743         default_call_rcu_data = NULL;
 744         (void)get_default_call_rcu_data();
 745
 746         /* Cleanup call_rcu_data pointers before use */
 747         maxcpus_reset();
 748         free(per_cpu_call_rcu_data);
 749         rcu_set_pointer(&per_cpu_call_rcu_data, NULL);
 750         URCU_TLS(thread_call_rcu_data) = NULL;
 751
 752         /* Dispose of all of the rest of the call_rcu_data structures. */
 753         cds_list_for_each_entry_safe(crdp, next, &call_rcu_data_list, list) {
 754                 if (crdp == default_call_rcu_data)
 755                         continue;
 756                 uatomic_set(&crdp->flags, URCU_CALL_RCU_STOPPED);
 757                 call_rcu_data_free(crdp);
 758         }
 759 }