[urcu.git] / urcu-call-rcu-impl.h

/*
 * urcu-call-rcu.c
 *
 * Userspace RCU library - batch memory reclamation with kernel API
 *
 * Copyright (c) 2010 Paul E. McKenney <paulmck@linux.vnet.ibm.com>
 *
 * This library is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 *
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with this library; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 */

#define _GNU_SOURCE
#include <stdio.h>
#include <pthread.h>
#include <signal.h>
#include <assert.h>
#include <stdlib.h>
#include <stdint.h>
#include <string.h>
#include <errno.h>
#include <poll.h>
#include <sys/time.h>
#include <unistd.h>
#include <sched.h>

#include "config.h"
#include "urcu/wfqueue.h"
#include "urcu-call-rcu.h"
#include "urcu-pointer.h"
#include "urcu/list.h"
#include "urcu/futex.h"

/* Data structure that identifies a call_rcu thread. */

struct call_rcu_data {
	struct cds_wfq_queue cbs;
	unsigned long flags;
	int32_t futex;
	unsigned long qlen; /* maintained for debugging. */
	pthread_t tid;
	int cpu_affinity;
	struct cds_list_head list;
} __attribute__((aligned(CAA_CACHE_LINE_SIZE)));

/*
 * List of all call_rcu_data structures to keep valgrind happy.
 * Protected by call_rcu_mutex.
 */

CDS_LIST_HEAD(call_rcu_data_list);

/* Link a thread using call_rcu() to its call_rcu thread. */

static __thread struct call_rcu_data *thread_call_rcu_data;

/* Guard call_rcu thread creation. */

static pthread_mutex_t call_rcu_mutex = PTHREAD_MUTEX_INITIALIZER;

/* If a given thread does not have its own call_rcu thread, this is default. */

static struct call_rcu_data *default_call_rcu_data;

/*
 * If the sched_getcpu() and sysconf(_SC_NPROCESSORS_CONF) calls are
 * available, then we can have call_rcu threads assigned to individual
 * CPUs rather than only to specific threads.
 */

#if defined(HAVE_SCHED_GETCPU) && defined(HAVE_SYSCONF)

/*
 * Pointer to array of pointers to per-CPU call_rcu_data structures
 * and # CPUs.
 */

static struct call_rcu_data **per_cpu_call_rcu_data;
static long maxcpus;

/* Allocate the array if it has not already been allocated. */

static void alloc_cpu_call_rcu_data(void)
{
	struct call_rcu_data **p;
	static int warned = 0;

	if (maxcpus != 0)
		return;
	maxcpus = sysconf(_SC_NPROCESSORS_CONF);
	if (maxcpus <= 0) {
		return;
	}
	p = malloc(maxcpus * sizeof(*per_cpu_call_rcu_data));
	if (p != NULL) {
		memset(p, '\0', maxcpus * sizeof(*per_cpu_call_rcu_data));
		per_cpu_call_rcu_data = p;
	} else {
		if (!warned) {
			fprintf(stderr, "[error] liburcu: unable to allocate per-CPU pointer array\n");
		}
		warned = 1;
	}
}

#else /* #if defined(HAVE_SCHED_GETCPU) && defined(HAVE_SYSCONF) */

static const struct call_rcu_data **per_cpu_call_rcu_data = NULL;
static const long maxcpus = -1;

static void alloc_cpu_call_rcu_data(void)
{
}

static int sched_getcpu(void)
{
	return -1;
}

#endif /* #else #if defined(HAVE_SCHED_GETCPU) && defined(HAVE_SYSCONF) */

/* Acquire the specified pthread mutex. */

static void call_rcu_lock(pthread_mutex_t *pmp)
{
	if (pthread_mutex_lock(pmp) != 0) {
		perror("pthread_mutex_lock");
		exit(-1);
	}
}

/* Release the specified pthread mutex. */

static void call_rcu_unlock(pthread_mutex_t *pmp)
{
	if (pthread_mutex_unlock(pmp) != 0) {
		perror("pthread_mutex_unlock");
		exit(-1);
	}
}

#if HAVE_SCHED_SETAFFINITY
static
int set_thread_cpu_affinity(struct call_rcu_data *crdp)
{
	cpu_set_t mask;

	if (crdp->cpu_affinity < 0)
		return 0;

	CPU_ZERO(&mask);
	CPU_SET(crdp->cpu_affinity, &mask);
#if SCHED_SETAFFINITY_ARGS == 2
	return sched_setaffinity(0, &mask);
#else
	return sched_setaffinity(0, sizeof(mask), &mask);
#endif
}
#else
static
int set_thread_cpu_affinity(struct call_rcu_data *crdp)
{
	return 0;
}
#endif

static void call_rcu_wait(struct call_rcu_data *crdp)
{
	/* Read call_rcu list before read futex */
	cmm_smp_mb();
	if (uatomic_read(&crdp->futex) == -1)
		futex_async(&crdp->futex, FUTEX_WAIT, -1,
		      NULL, NULL, 0);
}

static void call_rcu_wake_up(struct call_rcu_data *crdp)
{
	/* Write to call_rcu list before reading/writing futex */
	cmm_smp_mb();
	if (unlikely(uatomic_read(&crdp->futex) == -1)) {
		uatomic_set(&crdp->futex, 0);
		futex_async(&crdp->futex, FUTEX_WAKE, 1,
		      NULL, NULL, 0);
	}
}

/* This is the code run by each call_rcu thread. */

static void *call_rcu_thread(void *arg)
{
	unsigned long cbcount;
	struct cds_wfq_node *cbs;
	struct cds_wfq_node **cbs_tail;
	struct call_rcu_data *crdp = (struct call_rcu_data *)arg;
	struct rcu_head *rhp;
	int rt = !!(uatomic_read(&crdp->flags) & URCU_CALL_RCU_RT);

	if (set_thread_cpu_affinity(crdp) != 0) {
		perror("pthread_setaffinity_np");
		exit(-1);
	}

	thread_call_rcu_data = crdp;
	if (!rt) {
		uatomic_dec(&crdp->futex);
		/* Decrement futex before reading call_rcu list */
		cmm_smp_mb();
	}
	for (;;) {
		if (&crdp->cbs.head != _CMM_LOAD_SHARED(crdp->cbs.tail)) {
			while ((cbs = _CMM_LOAD_SHARED(crdp->cbs.head)) == NULL)
				poll(NULL, 0, 1);
			_CMM_STORE_SHARED(crdp->cbs.head, NULL);
			cbs_tail = (struct cds_wfq_node **)
				uatomic_xchg(&crdp->cbs.tail, &crdp->cbs.head);
			synchronize_rcu();
			cbcount = 0;
			do {
				while (cbs->next == NULL &&
				       &cbs->next != cbs_tail)
				       	poll(NULL, 0, 1);
				if (cbs == &crdp->cbs.dummy) {
					cbs = cbs->next;
					continue;
				}
				rhp = (struct rcu_head *)cbs;
				cbs = cbs->next;
				rhp->func(rhp);
				cbcount++;
			} while (cbs != NULL);
			uatomic_sub(&crdp->qlen, cbcount);
		}
		if (uatomic_read(&crdp->flags) & URCU_CALL_RCU_STOP)
			break;
		if (!rt) {
			if (&crdp->cbs.head
			    == _CMM_LOAD_SHARED(crdp->cbs.tail)) {
				call_rcu_wait(crdp);
				poll(NULL, 0, 10);
				uatomic_dec(&crdp->futex);
				/*
				 * Decrement futex before reading
				 * call_rcu list.
				 */
				cmm_smp_mb();
			} else {
				poll(NULL, 0, 10);
			}
		} else {
			poll(NULL, 0, 10);
		}
	}
	if (!rt) {
		/*
		 * Read call_rcu list before write futex.
		 */
		cmm_smp_mb();
		uatomic_set(&crdp->futex, 0);
	}
	uatomic_or(&crdp->flags, URCU_CALL_RCU_STOPPED);
	return NULL;
}

/*
 * Create both a call_rcu thread and the corresponding call_rcu_data
 * structure, linking the structure in as specified.  Caller must hold
 * call_rcu_mutex.
 */

static void call_rcu_data_init(struct call_rcu_data **crdpp,
			       unsigned long flags,
			       int cpu_affinity)
{
	struct call_rcu_data *crdp;

	crdp = malloc(sizeof(*crdp));
	if (crdp == NULL) {
		fprintf(stderr, "Out of memory.\n");
		exit(-1);
	}
	memset(crdp, '\0', sizeof(*crdp));
	cds_wfq_init(&crdp->cbs);
	crdp->qlen = 0;
	crdp->futex = 0;
	crdp->flags = flags;
	cds_list_add(&crdp->list, &call_rcu_data_list);
	crdp->cpu_affinity = cpu_affinity;
	cmm_smp_mb();  /* Structure initialized before pointer is planted. */
	*crdpp = crdp;
	if (pthread_create(&crdp->tid, NULL, call_rcu_thread, crdp) != 0) {
		perror("pthread_create");
		exit(-1);
	}
}

/*
 * Return a pointer to the call_rcu_data structure for the specified
 * CPU, returning NULL if there is none.  We cannot automatically
 * created it because the platform we are running on might not define
 * sched_getcpu().
 */

struct call_rcu_data *get_cpu_call_rcu_data(int cpu)
{
	static int warned = 0;

	if (per_cpu_call_rcu_data == NULL)
		return NULL;
	if (!warned && maxcpus > 0 && (cpu < 0 || maxcpus <= cpu)) {
		fprintf(stderr, "[error] liburcu: get CPU # out of range\n");
		warned = 1;
	}
	if (cpu < 0 || maxcpus <= cpu)
		return NULL;
	return per_cpu_call_rcu_data[cpu];
}

/*
 * Return the tid corresponding to the call_rcu thread whose
 * call_rcu_data structure is specified.
 */

pthread_t get_call_rcu_thread(struct call_rcu_data *crdp)
{
	return crdp->tid;
}

/*
 * Create a call_rcu_data structure (with thread) and return a pointer.
 */

static struct call_rcu_data *__create_call_rcu_data(unsigned long flags,
						    int cpu_affinity)
{
	struct call_rcu_data *crdp;

	call_rcu_data_init(&crdp, flags, cpu_affinity);
	return crdp;
}

struct call_rcu_data *create_call_rcu_data(unsigned long flags,
					   int cpu_affinity)
{
	struct call_rcu_data *crdp;

	call_rcu_lock(&call_rcu_mutex);
	crdp = __create_call_rcu_data(flags, cpu_affinity);
	call_rcu_unlock(&call_rcu_mutex);
	return crdp;
}

/*
 * Set the specified CPU to use the specified call_rcu_data structure.
 *
 * Use NULL to remove a CPU's call_rcu_data structure, but it is
 * the caller's responsibility to dispose of the removed structure.
 * Use get_cpu_call_rcu_data() to obtain a pointer to the old structure
 * (prior to NULLing it out, of course).
 */

int set_cpu_call_rcu_data(int cpu, struct call_rcu_data *crdp)
{
	int warned = 0;

	call_rcu_lock(&call_rcu_mutex);
	if (cpu < 0 || maxcpus <= cpu) {
		if (!warned) {
			fprintf(stderr, "[error] liburcu: set CPU # out of range\n");
			warned = 1;
		}
		call_rcu_unlock(&call_rcu_mutex);
		errno = EINVAL;
		return -EINVAL;
	}
	alloc_cpu_call_rcu_data();
	call_rcu_unlock(&call_rcu_mutex);
	if (per_cpu_call_rcu_data == NULL) {
		errno = ENOMEM;
		return -ENOMEM;
	}
	per_cpu_call_rcu_data[cpu] = crdp;
	return 0;
}

/*
 * Return a pointer to the default call_rcu_data structure, creating
 * one if need be.  Because we never free call_rcu_data structures,
 * we don't need to be in an RCU read-side critical section.
 */

struct call_rcu_data *get_default_call_rcu_data(void)
{
	if (default_call_rcu_data != NULL)
		return rcu_dereference(default_call_rcu_data);
	call_rcu_lock(&call_rcu_mutex);
	if (default_call_rcu_data != NULL) {
		call_rcu_unlock(&call_rcu_mutex);
		return default_call_rcu_data;
	}
	call_rcu_data_init(&default_call_rcu_data, 0, -1);
	call_rcu_unlock(&call_rcu_mutex);
	return default_call_rcu_data;
}

/*
 * Return the call_rcu_data structure that applies to the currently
 * running thread.  Any call_rcu_data structure assigned specifically
 * to this thread has first priority, followed by any call_rcu_data
 * structure assigned to the CPU on which the thread is running,
 * followed by the default call_rcu_data structure.  If there is not
 * yet a default call_rcu_data structure, one will be created.
 */
struct call_rcu_data *get_call_rcu_data(void)
{
	int curcpu;
	static int warned = 0;

	if (thread_call_rcu_data != NULL)
		return thread_call_rcu_data;
	if (maxcpus <= 0)
		return get_default_call_rcu_data();
	curcpu = sched_getcpu();
	if (!warned && (curcpu < 0 || maxcpus <= curcpu)) {
		fprintf(stderr, "[error] liburcu: gcrd CPU # out of range\n");
		warned = 1;
	}
	if (curcpu >= 0 && maxcpus > curcpu &&
	    per_cpu_call_rcu_data != NULL &&
	    per_cpu_call_rcu_data[curcpu] != NULL)
	    	return per_cpu_call_rcu_data[curcpu];
	return get_default_call_rcu_data();
}

/*
 * Return a pointer to this task's call_rcu_data if there is one.
 */

struct call_rcu_data *get_thread_call_rcu_data(void)
{
	return thread_call_rcu_data;
}

/*
 * Set this task's call_rcu_data structure as specified, regardless
 * of whether or not this task already had one.  (This allows switching
 * to and from real-time call_rcu threads, for example.)
 *
 * Use NULL to remove a thread's call_rcu_data structure, but it is
 * the caller's responsibility to dispose of the removed structure.
 * Use get_thread_call_rcu_data() to obtain a pointer to the old structure
 * (prior to NULLing it out, of course).
 */

void set_thread_call_rcu_data(struct call_rcu_data *crdp)
{
	thread_call_rcu_data = crdp;
}

/*
 * Create a separate call_rcu thread for each CPU.  This does not
 * replace a pre-existing call_rcu thread -- use the set_cpu_call_rcu_data()
 * function if you want that behavior.
 */

int create_all_cpu_call_rcu_data(unsigned long flags)
{
	int i;
	struct call_rcu_data *crdp;
	int ret;

	call_rcu_lock(&call_rcu_mutex);
	alloc_cpu_call_rcu_data();
	call_rcu_unlock(&call_rcu_mutex);
	if (maxcpus <= 0) {
		errno = EINVAL;
		return -EINVAL;
	}
	if (per_cpu_call_rcu_data == NULL) {
		errno = ENOMEM;
		return -ENOMEM;
	}
	for (i = 0; i < maxcpus; i++) {
		call_rcu_lock(&call_rcu_mutex);
		if (get_cpu_call_rcu_data(i)) {
			call_rcu_unlock(&call_rcu_mutex);
			continue;
		}
		crdp = __create_call_rcu_data(flags, i);
		if (crdp == NULL) {
			call_rcu_unlock(&call_rcu_mutex);
			errno = ENOMEM;
			return -ENOMEM;
		}
		call_rcu_unlock(&call_rcu_mutex);
		if ((ret = set_cpu_call_rcu_data(i, crdp)) != 0) {
			/* FIXME: Leaks crdp for now. */
			return ret; /* Can happen on race. */
		}
	}
	return 0;
}

/*
 * Wake up the call_rcu thread corresponding to the specified
 * call_rcu_data structure.
 */
static void wake_call_rcu_thread(struct call_rcu_data *crdp)
{
	if (!(_CMM_LOAD_SHARED(crdp->flags) & URCU_CALL_RCU_RT))
		call_rcu_wake_up(crdp);
}

/*
 * Schedule a function to be invoked after a following grace period.
 * This is the only function that must be called -- the others are
 * only present to allow applications to tune their use of RCU for
 * maximum performance.
 *
 * Note that unless a call_rcu thread has not already been created,
 * the first invocation of call_rcu() will create one.  So, if you
 * need the first invocation of call_rcu() to be fast, make sure
 * to create a call_rcu thread first.  One way to accomplish this is
 * "get_call_rcu_data();", and another is create_all_cpu_call_rcu_data().
 */

void call_rcu(struct rcu_head *head,
	      void (*func)(struct rcu_head *head))
{
	struct call_rcu_data *crdp;

	cds_wfq_node_init(&head->next);
	head->func = func;
	crdp = get_call_rcu_data();
	cds_wfq_enqueue(&crdp->cbs, &head->next);
	uatomic_inc(&crdp->qlen);
	wake_call_rcu_thread(crdp);
}

/*
 * Free up the specified call_rcu_data structure, terminating the
 * associated call_rcu thread.  The caller must have previously
 * removed the call_rcu_data structure from per-thread or per-CPU
 * usage.  For example, set_cpu_call_rcu_data(cpu, NULL) for per-CPU
 * call_rcu_data structures or set_thread_call_rcu_data(NULL) for
 * per-thread call_rcu_data structures.
 *
 * We silently refuse to free up the default call_rcu_data structure
 * because that is where we put any leftover callbacks.  Note that
 * the possibility of self-spawning callbacks makes it impossible
 * to execute all the callbacks in finite time without putting any
 * newly spawned callbacks somewhere else.  The "somewhere else" of
 * last resort is the default call_rcu_data structure.
 *
 * We also silently refuse to free NULL pointers.  This simplifies
 * the calling code.
 */
void call_rcu_data_free(struct call_rcu_data *crdp)
{
	struct cds_wfq_node *cbs;
	struct cds_wfq_node **cbs_tail;
	struct cds_wfq_node **cbs_endprev;

	if (crdp == NULL || crdp == default_call_rcu_data) {
		return;
	}
	if ((uatomic_read(&crdp->flags) & URCU_CALL_RCU_STOPPED) == 0) {
		uatomic_or(&crdp->flags, URCU_CALL_RCU_STOP);
		wake_call_rcu_thread(crdp);
		while ((uatomic_read(&crdp->flags) & URCU_CALL_RCU_STOPPED) == 0)
			poll(NULL, 0, 1);
	}
	if (&crdp->cbs.head != _CMM_LOAD_SHARED(crdp->cbs.tail)) {
		while ((cbs = _CMM_LOAD_SHARED(crdp->cbs.head)) == NULL)
			poll(NULL, 0, 1);
		_CMM_STORE_SHARED(crdp->cbs.head, NULL);
		cbs_tail = (struct cds_wfq_node **)
			uatomic_xchg(&crdp->cbs.tail, &crdp->cbs.head);
		cbs_endprev = (struct cds_wfq_node **)
			uatomic_xchg(&default_call_rcu_data, cbs_tail);
		*cbs_endprev = cbs;
		uatomic_add(&default_call_rcu_data->qlen,
			    uatomic_read(&crdp->qlen));
		cds_list_del(&crdp->list);
		free(crdp);
	}
}

/*
 * Clean up all the per-CPU call_rcu threads.
 */
void free_all_cpu_call_rcu_data(void)
{
	int cpu;
	struct call_rcu_data *crdp;

	if (maxcpus <= 0)
		return;
	for (cpu = 0; cpu < maxcpus; cpu++) {
		crdp = get_cpu_call_rcu_data(cpu);
		if (crdp == NULL)
			continue;
		set_cpu_call_rcu_data(cpu, NULL);
		call_rcu_data_free(crdp);
	}
}

/*
 * Acquire the call_rcu_mutex in order to ensure that the child sees
 * all of the call_rcu() data structures in a consistent state.
 * Suitable for pthread_atfork() and friends.
 */
void call_rcu_before_fork(void)
{
	call_rcu_lock(&call_rcu_mutex);
}

/*
 * Clean up call_rcu data structures in the parent of a successful fork()
 * that is not followed by exec() in the child.  Suitable for
 * pthread_atfork() and friends.
 */
void call_rcu_after_fork_parent(void)
{
	call_rcu_unlock(&call_rcu_mutex);
}

/*
 * Clean up call_rcu data structures in the child of a successful fork()
 * that is not followed by exec().  Suitable for pthread_atfork() and
 * friends.
 */
void call_rcu_after_fork_child(void)
{
	struct call_rcu_data *crdp;

	/* Release the mutex. */
	call_rcu_unlock(&call_rcu_mutex);

	/*
	 * Allocate a new default call_rcu_data structure in order
	 * to get a working call_rcu thread to go with it.
	 */
	default_call_rcu_data = NULL;
	(void)get_default_call_rcu_data();

	/* Dispose of all of the rest of the call_rcu_data structures. */
	while (call_rcu_data_list.next != call_rcu_data_list.prev) {
		crdp = cds_list_entry(call_rcu_data_list.prev,
				      struct call_rcu_data, list);
		if (crdp == default_call_rcu_data)
			crdp = cds_list_entry(crdp->list.prev,
					      struct call_rcu_data, list);
		uatomic_set(&crdp->flags, URCU_CALL_RCU_STOPPED);
		call_rcu_data_free(crdp);
	}
}
Commit	Line	Data
b57aee66 PM	1	/*
	2	* urcu-call-rcu.c
	3	*
	4	* Userspace RCU library - batch memory reclamation with kernel API
	5	*
	6	* Copyright (c) 2010 Paul E. McKenney <paulmck@linux.vnet.ibm.com>
	7	*
	8	* This library is free software; you can redistribute it and/or
	9	* modify it under the terms of the GNU Lesser General Public
	10	* License as published by the Free Software Foundation; either
	11	* version 2.1 of the License, or (at your option) any later version.
	12	*
	13	* This library is distributed in the hope that it will be useful,
	14	* but WITHOUT ANY WARRANTY; without even the implied warranty of
	15	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
	16	* Lesser General Public License for more details.
	17	*
	18	* You should have received a copy of the GNU Lesser General Public
	19	* License along with this library; if not, write to the Free Software
	20	* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
	21	*/
	22
c1d2c60b	23	#define _GNU_SOURCE
b57aee66 PM	24	#include <stdio.h>
	25	#include <pthread.h>
	26	#include <signal.h>
	27	#include <assert.h>
	28	#include <stdlib.h>
6d841bc2	29	#include <stdint.h>
b57aee66 PM	30	#include <string.h>
	31	#include <errno.h>
	32	#include <poll.h>
	33	#include <sys/time.h>
b57aee66	34	#include <unistd.h>
c1d2c60b	35	#include <sched.h>
b57aee66 PM	36
	37	#include "config.h"
	38	#include "urcu/wfqueue.h"
	39	#include "urcu-call-rcu.h"
	40	#include "urcu-pointer.h"
3c24913f	41	#include "urcu/list.h"
41849996	42	#include "urcu/futex.h"
b57aee66 PM	43
	44	/* Data structure that identifies a call_rcu thread. */
	45
	46	struct call_rcu_data {
	47	struct cds_wfq_queue cbs;
	48	unsigned long flags;
6d841bc2	49	int32_t futex;
73987721	50	unsigned long qlen; /* maintained for debugging. */
b57aee66	51	pthread_t tid;
c1d2c60b	52	int cpu_affinity;
3c24913f	53	struct cds_list_head list;
b57aee66 PM	54	} __attribute__((aligned(CAA_CACHE_LINE_SIZE)));
b57aee66 PM	55
3c24913f PM	56	/*
	57	* List of all call_rcu_data structures to keep valgrind happy.
	58	* Protected by call_rcu_mutex.
	59	*/
	60
	61	CDS_LIST_HEAD(call_rcu_data_list);
	62
b57aee66 PM	63	/* Link a thread using call_rcu() to its call_rcu thread. */
	64
	65	static __thread struct call_rcu_data *thread_call_rcu_data;
	66
	67	/* Guard call_rcu thread creation. */
	68
	69	static pthread_mutex_t call_rcu_mutex = PTHREAD_MUTEX_INITIALIZER;
	70
	71	/* If a given thread does not have its own call_rcu thread, this is default. */
	72
	73	static struct call_rcu_data *default_call_rcu_data;
	74
b57aee66 PM	75	/*
	76	* If the sched_getcpu() and sysconf(_SC_NPROCESSORS_CONF) calls are
	77	* available, then we can have call_rcu threads assigned to individual
	78	* CPUs rather than only to specific threads.
	79	*/
	80
	81	#if defined(HAVE_SCHED_GETCPU) && defined(HAVE_SYSCONF)
	82
	83	/*
	84	* Pointer to array of pointers to per-CPU call_rcu_data structures
	85	* and # CPUs.
	86	*/
	87
	88	static struct call_rcu_data **per_cpu_call_rcu_data;
	89	static long maxcpus;
	90
	91	/* Allocate the array if it has not already been allocated. */
	92
	93	static void alloc_cpu_call_rcu_data(void)
	94	{
	95	struct call_rcu_data **p;
	96	static int warned = 0;
	97
	98	if (maxcpus != 0)
	99	return;
	100	maxcpus = sysconf(_SC_NPROCESSORS_CONF);
	101	if (maxcpus <= 0) {
	102	return;
	103	}
	104	p = malloc(maxcpus * sizeof(*per_cpu_call_rcu_data));
	105	if (p != NULL) {
	106	memset(p, '\0', maxcpus * sizeof(*per_cpu_call_rcu_data));
	107	per_cpu_call_rcu_data = p;
	108	} else {
	109	if (!warned) {
	110	fprintf(stderr, "[error] liburcu: unable to allocate per-CPU pointer array\n");
	111	}
	112	warned = 1;
	113	}
	114	}
	115
	116	#else /* #if defined(HAVE_SCHED_GETCPU) && defined(HAVE_SYSCONF) */
	117
	118	static const struct call_rcu_data **per_cpu_call_rcu_data = NULL;
	119	static const long maxcpus = -1;
	120
	121	static void alloc_cpu_call_rcu_data(void)
	122	{
	123	}
	124
	125	static int sched_getcpu(void)
	126	{
	127	return -1;
	128	}
	129
	130	#endif /* #else #if defined(HAVE_SCHED_GETCPU) && defined(HAVE_SYSCONF) */
	131
	132	/* Acquire the specified pthread mutex. */
	133
	134	static void call_rcu_lock(pthread_mutex_t *pmp)
	135	{
	136	if (pthread_mutex_lock(pmp) != 0) {
	137	perror("pthread_mutex_lock");
	138	exit(-1);
139	}
140	}
141
142	/* Release the specified pthread mutex. */
143
144	static void call_rcu_unlock(pthread_mutex_t *pmp)
145	{
146	if (pthread_mutex_unlock(pmp) != 0) {
147	perror("pthread_mutex_unlock");
148	exit(-1);
149	}
150	}
151
c1d2c60b MD	152	#if HAVE_SCHED_SETAFFINITY
	153	static
	154	int set_thread_cpu_affinity(struct call_rcu_data *crdp)
	155	{
	156	cpu_set_t mask;
	157
	158	if (crdp->cpu_affinity < 0)
	159	return 0;
	160
	161	CPU_ZERO(&mask);
	162	CPU_SET(crdp->cpu_affinity, &mask);
	163	#if SCHED_SETAFFINITY_ARGS == 2
	164	return sched_setaffinity(0, &mask);
	165	#else
	166	return sched_setaffinity(0, sizeof(mask), &mask);
	167	#endif
	168	}
	169	#else
	170	static
	171	int set_thread_cpu_affinity(struct call_rcu_data *crdp)
	172	{
	173	return 0;
	174	}
	175	#endif
	176
03fe58b3 MD	177	static void call_rcu_wait(struct call_rcu_data *crdp)
	178	{
	179	/* Read call_rcu list before read futex */
	180	cmm_smp_mb();
	181	if (uatomic_read(&crdp->futex) == -1)
	182	futex_async(&crdp->futex, FUTEX_WAIT, -1,
	183	NULL, NULL, 0);
	184	}
	185
	186	static void call_rcu_wake_up(struct call_rcu_data *crdp)
	187	{
	188	/* Write to call_rcu list before reading/writing futex */
	189	cmm_smp_mb();
	190	if (unlikely(uatomic_read(&crdp->futex) == -1)) {
	191	uatomic_set(&crdp->futex, 0);
	192	futex_async(&crdp->futex, FUTEX_WAKE, 1,
	193	NULL, NULL, 0);
	194	}
	195	}
	196
b57aee66 PM	197	/* This is the code run by each call_rcu thread. */
	198
	199	static void call_rcu_thread(void arg)
	200	{
	201	unsigned long cbcount;
	202	struct cds_wfq_node *cbs;
	203	struct cds_wfq_node **cbs_tail;
	204	struct call_rcu_data crdp = (struct call_rcu_data )arg;
	205	struct rcu_head *rhp;
2870aa1e	206	int rt = !!(uatomic_read(&crdp->flags) & URCU_CALL_RCU_RT);
b57aee66	207
c1d2c60b MD	208	if (set_thread_cpu_affinity(crdp) != 0) {
	209	perror("pthread_setaffinity_np");
	210	exit(-1);
	211	}
	212
b57aee66	213	thread_call_rcu_data = crdp;
bc94ca9b MD	214	if (!rt) {
	215	uatomic_dec(&crdp->futex);
	216	/* Decrement futex before reading call_rcu list */
	217	cmm_smp_mb();
	218	}
b57aee66 PM	219	for (;;) {
	220	if (&crdp->cbs.head != _CMM_LOAD_SHARED(crdp->cbs.tail)) {
	221	while ((cbs = _CMM_LOAD_SHARED(crdp->cbs.head)) == NULL)
	222	poll(NULL, 0, 1);
	223	_CMM_STORE_SHARED(crdp->cbs.head, NULL);
	224	cbs_tail = (struct cds_wfq_node **)
	225	uatomic_xchg(&crdp->cbs.tail, &crdp->cbs.head);
	226	synchronize_rcu();
	227	cbcount = 0;
	228	do {
	229	while (cbs->next == NULL &&
	230	&cbs->next != cbs_tail)
	231	poll(NULL, 0, 1);
	232	if (cbs == &crdp->cbs.dummy) {
	233	cbs = cbs->next;
	234	continue;
	235	}
	236	rhp = (struct rcu_head *)cbs;
	237	cbs = cbs->next;
	238	rhp->func(rhp);
	239	cbcount++;
	240	} while (cbs != NULL);
	241	uatomic_sub(&crdp->qlen, cbcount);
	242	}
bc94ca9b MD	243	if (uatomic_read(&crdp->flags) & URCU_CALL_RCU_STOP)
	244	break;
	245	if (!rt) {
	246	if (&crdp->cbs.head
	247	== _CMM_LOAD_SHARED(crdp->cbs.tail)) {
	248	call_rcu_wait(crdp);
	249	poll(NULL, 0, 10);
	250	uatomic_dec(&crdp->futex);
c768e45e	251	/*
bc94ca9b MD	252	* Decrement futex before reading
bc94ca9b MD	253	* call_rcu list.
c768e45e MD	254	*/
c768e45e MD	255	cmm_smp_mb();
ccbac24d MD	256	} else {
ccbac24d MD	257	poll(NULL, 0, 10);
c768e45e	258	}
bc94ca9b MD	259	} else {
bc94ca9b MD	260	poll(NULL, 0, 10);
b57aee66	261	}
bc94ca9b MD	262	}
	263	if (!rt) {
	264	/*
	265	* Read call_rcu list before write futex.
	266	*/
	267	cmm_smp_mb();
	268	uatomic_set(&crdp->futex, 0);
b57aee66	269	}
2870aa1e	270	uatomic_or(&crdp->flags, URCU_CALL_RCU_STOPPED);
7106ddf8	271	return NULL;
b57aee66 PM	272	}
	273
	274	/*
	275	* Create both a call_rcu thread and the corresponding call_rcu_data
3c24913f PM	276	* structure, linking the structure in as specified. Caller must hold
3c24913f PM	277	* call_rcu_mutex.
b57aee66 PM	278	*/
b57aee66 PM	279
3c24913f	280	static void call_rcu_data_init(struct call_rcu_data **crdpp,
c1d2c60b MD	281	unsigned long flags,
c1d2c60b MD	282	int cpu_affinity)
b57aee66 PM	283	{
	284	struct call_rcu_data *crdp;
	285
	286	crdp = malloc(sizeof(*crdp));
	287	if (crdp == NULL) {
	288	fprintf(stderr, "Out of memory.\n");
	289	exit(-1);
	290	}
	291	memset(crdp, '\0', sizeof(*crdp));
	292	cds_wfq_init(&crdp->cbs);
	293	crdp->qlen = 0;
263e3cf9 MD	294	crdp->futex = 0;
263e3cf9 MD	295	crdp->flags = flags;
3c24913f	296	cds_list_add(&crdp->list, &call_rcu_data_list);
c1d2c60b	297	crdp->cpu_affinity = cpu_affinity;
b57aee66 PM	298	cmm_smp_mb(); /* Structure initialized before pointer is planted. */
	299	*crdpp = crdp;
	300	if (pthread_create(&crdp->tid, NULL, call_rcu_thread, crdp) != 0) {
	301	perror("pthread_create");
	302	exit(-1);
	303	}
	304	}
	305
	306	/*
	307	* Return a pointer to the call_rcu_data structure for the specified
	308	* CPU, returning NULL if there is none. We cannot automatically
	309	* created it because the platform we are running on might not define
	310	* sched_getcpu().
	311	*/
	312
	313	struct call_rcu_data *get_cpu_call_rcu_data(int cpu)
	314	{
	315	static int warned = 0;
	316
	317	if (per_cpu_call_rcu_data == NULL)
	318	return NULL;
	319	if (!warned && maxcpus > 0 && (cpu < 0 \|\| maxcpus <= cpu)) {
	320	fprintf(stderr, "[error] liburcu: get CPU # out of range\n");
	321	warned = 1;
	322	}
	323	if (cpu < 0 \|\| maxcpus <= cpu)
	324	return NULL;
	325	return per_cpu_call_rcu_data[cpu];
	326	}
	327
	328	/*
	329	* Return the tid corresponding to the call_rcu thread whose
	330	* call_rcu_data structure is specified.
	331	*/
	332
	333	pthread_t get_call_rcu_thread(struct call_rcu_data *crdp)
	334	{
	335	return crdp->tid;
	336	}
	337
	338	/*
	339	* Create a call_rcu_data structure (with thread) and return a pointer.
	340	*/
	341
c1d2c60b MD	342	static struct call_rcu_data *__create_call_rcu_data(unsigned long flags,
c1d2c60b MD	343	int cpu_affinity)
b57aee66 PM	344	{
	345	struct call_rcu_data *crdp;
	346
c1d2c60b	347	call_rcu_data_init(&crdp, flags, cpu_affinity);
b57aee66 PM	348	return crdp;
	349	}
	350
c1d2c60b MD	351	struct call_rcu_data *create_call_rcu_data(unsigned long flags,
c1d2c60b MD	352	int cpu_affinity)
3c24913f PM	353	{
	354	struct call_rcu_data *crdp;
	355
	356	call_rcu_lock(&call_rcu_mutex);
c1d2c60b	357	crdp = __create_call_rcu_data(flags, cpu_affinity);
3c24913f PM	358	call_rcu_unlock(&call_rcu_mutex);
	359	return crdp;
	360	}
	361
b57aee66 PM	362	/*
b57aee66 PM	363	* Set the specified CPU to use the specified call_rcu_data structure.
7106ddf8 PM	364	*
	365	* Use NULL to remove a CPU's call_rcu_data structure, but it is
	366	* the caller's responsibility to dispose of the removed structure.
	367	* Use get_cpu_call_rcu_data() to obtain a pointer to the old structure
	368	* (prior to NULLing it out, of course).
b57aee66 PM	369	*/
	370
	371	int set_cpu_call_rcu_data(int cpu, struct call_rcu_data *crdp)
	372	{
	373	int warned = 0;
	374
	375	call_rcu_lock(&call_rcu_mutex);
	376	if (cpu < 0 \|\| maxcpus <= cpu) {
	377	if (!warned) {
	378	fprintf(stderr, "[error] liburcu: set CPU # out of range\n");
	379	warned = 1;
	380	}
	381	call_rcu_unlock(&call_rcu_mutex);
	382	errno = EINVAL;
	383	return -EINVAL;
	384	}
	385	alloc_cpu_call_rcu_data();
	386	call_rcu_unlock(&call_rcu_mutex);
	387	if (per_cpu_call_rcu_data == NULL) {
	388	errno = ENOMEM;
	389	return -ENOMEM;
	390	}
	391	per_cpu_call_rcu_data[cpu] = crdp;
	392	return 0;
	393	}
	394
	395	/*
	396	* Return a pointer to the default call_rcu_data structure, creating
	397	* one if need be. Because we never free call_rcu_data structures,
	398	* we don't need to be in an RCU read-side critical section.
	399	*/
	400
	401	struct call_rcu_data *get_default_call_rcu_data(void)
	402	{
	403	if (default_call_rcu_data != NULL)
	404	return rcu_dereference(default_call_rcu_data);
	405	call_rcu_lock(&call_rcu_mutex);
	406	if (default_call_rcu_data != NULL) {
	407	call_rcu_unlock(&call_rcu_mutex);
	408	return default_call_rcu_data;
	409	}
c1d2c60b	410	call_rcu_data_init(&default_call_rcu_data, 0, -1);
b57aee66 PM	411	call_rcu_unlock(&call_rcu_mutex);
	412	return default_call_rcu_data;
	413	}
	414
	415	/*
	416	* Return the call_rcu_data structure that applies to the currently
	417	* running thread. Any call_rcu_data structure assigned specifically
	418	* to this thread has first priority, followed by any call_rcu_data
	419	* structure assigned to the CPU on which the thread is running,
	420	* followed by the default call_rcu_data structure. If there is not
	421	* yet a default call_rcu_data structure, one will be created.
	422	*/
	423	struct call_rcu_data *get_call_rcu_data(void)
	424	{
	425	int curcpu;
	426	static int warned = 0;
	427
	428	if (thread_call_rcu_data != NULL)
	429	return thread_call_rcu_data;
	430	if (maxcpus <= 0)
	431	return get_default_call_rcu_data();
	432	curcpu = sched_getcpu();
	433	if (!warned && (curcpu < 0 \|\| maxcpus <= curcpu)) {
	434	fprintf(stderr, "[error] liburcu: gcrd CPU # out of range\n");
	435	warned = 1;
	436	}
	437	if (curcpu >= 0 && maxcpus > curcpu &&
	438	per_cpu_call_rcu_data != NULL &&
	439	per_cpu_call_rcu_data[curcpu] != NULL)
	440	return per_cpu_call_rcu_data[curcpu];
	441	return get_default_call_rcu_data();
	442	}
	443
	444	/*
	445	* Return a pointer to this task's call_rcu_data if there is one.
	446	*/
	447
	448	struct call_rcu_data *get_thread_call_rcu_data(void)
	449	{
	450	return thread_call_rcu_data;
	451	}
	452
	453	/*
	454	* Set this task's call_rcu_data structure as specified, regardless
	455	* of whether or not this task already had one. (This allows switching
	456	* to and from real-time call_rcu threads, for example.)
7106ddf8 PM	457	*
	458	* Use NULL to remove a thread's call_rcu_data structure, but it is
	459	* the caller's responsibility to dispose of the removed structure.
	460	* Use get_thread_call_rcu_data() to obtain a pointer to the old structure
	461	* (prior to NULLing it out, of course).
b57aee66 PM	462	*/
	463
	464	void set_thread_call_rcu_data(struct call_rcu_data *crdp)
	465	{
	466	thread_call_rcu_data = crdp;
	467	}
	468
	469	/*
	470	* Create a separate call_rcu thread for each CPU. This does not
	471	* replace a pre-existing call_rcu thread -- use the set_cpu_call_rcu_data()
	472	* function if you want that behavior.
	473	*/
	474
	475	int create_all_cpu_call_rcu_data(unsigned long flags)
	476	{
	477	int i;
	478	struct call_rcu_data *crdp;
	479	int ret;
	480
	481	call_rcu_lock(&call_rcu_mutex);
	482	alloc_cpu_call_rcu_data();
	483	call_rcu_unlock(&call_rcu_mutex);
	484	if (maxcpus <= 0) {
	485	errno = EINVAL;
	486	return -EINVAL;
	487	}
	488	if (per_cpu_call_rcu_data == NULL) {
	489	errno = ENOMEM;
	490	return -ENOMEM;
	491	}
	492	for (i = 0; i < maxcpus; i++) {
	493	call_rcu_lock(&call_rcu_mutex);
	494	if (get_cpu_call_rcu_data(i)) {
	495	call_rcu_unlock(&call_rcu_mutex);
	496	continue;
	497	}
c1d2c60b	498	crdp = __create_call_rcu_data(flags, i);
b57aee66 PM	499	if (crdp == NULL) {
	500	call_rcu_unlock(&call_rcu_mutex);
	501	errno = ENOMEM;
	502	return -ENOMEM;
	503	}
	504	call_rcu_unlock(&call_rcu_mutex);
	505	if ((ret = set_cpu_call_rcu_data(i, crdp)) != 0) {
	506	/* FIXME: Leaks crdp for now. */
	507	return ret; /* Can happen on race. */
	508	}
	509	}
	510	return 0;
	511	}
	512
7106ddf8 PM	513	/*
	514	* Wake up the call_rcu thread corresponding to the specified
	515	* call_rcu_data structure.
	516	*/
	517	static void wake_call_rcu_thread(struct call_rcu_data *crdp)
	518	{
263e3cf9 MD	519	if (!(_CMM_LOAD_SHARED(crdp->flags) & URCU_CALL_RCU_RT))
263e3cf9 MD	520	call_rcu_wake_up(crdp);
7106ddf8 PM	521	}
7106ddf8 PM	522
b57aee66 PM	523	/*
	524	* Schedule a function to be invoked after a following grace period.
	525	* This is the only function that must be called -- the others are
	526	* only present to allow applications to tune their use of RCU for
	527	* maximum performance.
	528	*
	529	* Note that unless a call_rcu thread has not already been created,
	530	* the first invocation of call_rcu() will create one. So, if you
	531	* need the first invocation of call_rcu() to be fast, make sure
	532	* to create a call_rcu thread first. One way to accomplish this is
	533	* "get_call_rcu_data();", and another is create_all_cpu_call_rcu_data().
	534	*/
	535
	536	void call_rcu(struct rcu_head *head,
	537	void (func)(struct rcu_head head))
	538	{
	539	struct call_rcu_data *crdp;
	540
	541	cds_wfq_node_init(&head->next);
	542	head->func = func;
	543	crdp = get_call_rcu_data();
	544	cds_wfq_enqueue(&crdp->cbs, &head->next);
	545	uatomic_inc(&crdp->qlen);
7106ddf8 PM	546	wake_call_rcu_thread(crdp);
	547	}
	548
	549	/*
	550	* Free up the specified call_rcu_data structure, terminating the
	551	* associated call_rcu thread. The caller must have previously
	552	* removed the call_rcu_data structure from per-thread or per-CPU
	553	* usage. For example, set_cpu_call_rcu_data(cpu, NULL) for per-CPU
	554	* call_rcu_data structures or set_thread_call_rcu_data(NULL) for
	555	* per-thread call_rcu_data structures.
	556	*
	557	* We silently refuse to free up the default call_rcu_data structure
	558	* because that is where we put any leftover callbacks. Note that
	559	* the possibility of self-spawning callbacks makes it impossible
	560	* to execute all the callbacks in finite time without putting any
	561	* newly spawned callbacks somewhere else. The "somewhere else" of
	562	* last resort is the default call_rcu_data structure.
	563	*
	564	* We also silently refuse to free NULL pointers. This simplifies
	565	* the calling code.
	566	*/
	567	void call_rcu_data_free(struct call_rcu_data *crdp)
	568	{
	569	struct cds_wfq_node *cbs;
	570	struct cds_wfq_node **cbs_tail;
	571	struct cds_wfq_node **cbs_endprev;
	572
	573	if (crdp == NULL \|\| crdp == default_call_rcu_data) {
	574	return;
	575	}
2870aa1e PB	576	if ((uatomic_read(&crdp->flags) & URCU_CALL_RCU_STOPPED) == 0) {
2870aa1e PB	577	uatomic_or(&crdp->flags, URCU_CALL_RCU_STOP);
7106ddf8	578	wake_call_rcu_thread(crdp);
2870aa1e	579	while ((uatomic_read(&crdp->flags) & URCU_CALL_RCU_STOPPED) == 0)
7106ddf8 PM	580	poll(NULL, 0, 1);
	581	}
	582	if (&crdp->cbs.head != _CMM_LOAD_SHARED(crdp->cbs.tail)) {
	583	while ((cbs = _CMM_LOAD_SHARED(crdp->cbs.head)) == NULL)
	584	poll(NULL, 0, 1);
	585	_CMM_STORE_SHARED(crdp->cbs.head, NULL);
	586	cbs_tail = (struct cds_wfq_node **)
	587	uatomic_xchg(&crdp->cbs.tail, &crdp->cbs.head);
	588	cbs_endprev = (struct cds_wfq_node **)
	589	uatomic_xchg(&default_call_rcu_data, cbs_tail);
	590	*cbs_endprev = cbs;
	591	uatomic_add(&default_call_rcu_data->qlen,
	592	uatomic_read(&crdp->qlen));
	593	cds_list_del(&crdp->list);
	594	free(crdp);
	595	}
	596	}
	597
	598	/*
	599	* Clean up all the per-CPU call_rcu threads.
	600	*/
	601	void free_all_cpu_call_rcu_data(void)
	602	{
	603	int cpu;
	604	struct call_rcu_data *crdp;
	605
	606	if (maxcpus <= 0)
	607	return;
	608	for (cpu = 0; cpu < maxcpus; cpu++) {
	609	crdp = get_cpu_call_rcu_data(cpu);
	610	if (crdp == NULL)
	611	continue;
	612	set_cpu_call_rcu_data(cpu, NULL);
	613	call_rcu_data_free(crdp);
	614	}
	615	}
	616
81ad2e19 PM	617	/*
	618	* Acquire the call_rcu_mutex in order to ensure that the child sees
	619	* all of the call_rcu() data structures in a consistent state.
	620	* Suitable for pthread_atfork() and friends.
	621	*/
	622	void call_rcu_before_fork(void)
	623	{
	624	call_rcu_lock(&call_rcu_mutex);
	625	}
	626
	627	/*
	628	* Clean up call_rcu data structures in the parent of a successful fork()
	629	* that is not followed by exec() in the child. Suitable for
	630	* pthread_atfork() and friends.
	631	*/
	632	void call_rcu_after_fork_parent(void)
	633	{
	634	call_rcu_unlock(&call_rcu_mutex);
	635	}
	636
7106ddf8 PM	637	/*
7106ddf8 PM	638	* Clean up call_rcu data structures in the child of a successful fork()
81ad2e19 PM	639	* that is not followed by exec(). Suitable for pthread_atfork() and
81ad2e19 PM	640	* friends.
7106ddf8 PM	641	*/
	642	void call_rcu_after_fork_child(void)
	643	{
	644	struct call_rcu_data *crdp;
	645
81ad2e19 PM	646	/* Release the mutex. */
	647	call_rcu_unlock(&call_rcu_mutex);
	648
7106ddf8 PM	649	/*
	650	* Allocate a new default call_rcu_data structure in order
	651	* to get a working call_rcu thread to go with it.
	652	*/
	653	default_call_rcu_data = NULL;
	654	(void)get_default_call_rcu_data();
	655
	656	/* Dispose of all of the rest of the call_rcu_data structures. */
	657	while (call_rcu_data_list.next != call_rcu_data_list.prev) {
	658	crdp = cds_list_entry(call_rcu_data_list.prev,
	659	struct call_rcu_data, list);
	660	if (crdp == default_call_rcu_data)
	661	crdp = cds_list_entry(crdp->list.prev,
	662	struct call_rcu_data, list);
2870aa1e	663	uatomic_set(&crdp->flags, URCU_CALL_RCU_STOPPED);
7106ddf8	664	call_rcu_data_free(crdp);
b57aee66 PM	665	}
b57aee66 PM	666	}