From: Mathieu Desnoyers Date: Mon, 29 Jun 2015 22:45:07 +0000 (-0400) Subject: Fix: call_rcu_thread() affinity failure X-Git-Tag: v0.7.15~5 X-Git-Url: https://git.lttng.org./?a=commitdiff_plain;h=5a631bd917d9173deda3fa06cf2ec22ab9db3194;p=userspace-rcu.git Fix: call_rcu_thread() affinity failure Make call_rcu_thread() affine itself more persistently Currently, URCU simply fails if a call_rcu_thread() fails to affine itself. This is problematic when execution is constrained by cgroup and hotunplugged CPUs. This commit therefore makes call_rcu_thread() retry setting its affinity every 256 grace periods, but only if it detects that it migrated to a different CPU. Since sched_getcpu() is cheap on many architectures, this check is less costly than going through a system call. Reported-by: Michael Jeanson Suggested-by: Paul E. McKenney Acked-by: Paul E. McKenney Signed-off-by: Mathieu Desnoyers --- diff --git a/urcu-call-rcu-impl.h b/urcu-call-rcu-impl.h index 51ee91f..e70789a 100644 --- a/urcu-call-rcu-impl.h +++ b/urcu-call-rcu-impl.h @@ -43,6 +43,9 @@ #include "urcu/tls-compat.h" #include "urcu-die.h" +#define SET_AFFINITY_CHECK_PERIOD (1U << 8) /* 256 */ +#define SET_AFFINITY_CHECK_PERIOD_MASK (SET_AFFINITY_CHECK_PERIOD - 1) + /* Data structure that identifies a call_rcu thread. */ struct call_rcu_data { @@ -52,6 +55,7 @@ struct call_rcu_data { unsigned long qlen; /* maintained for debugging. */ pthread_t tid; int cpu_affinity; + unsigned long gp_count; struct cds_list_head list; } __attribute__((aligned(CAA_CACHE_LINE_SIZE))); @@ -182,22 +186,42 @@ static void call_rcu_unlock(pthread_mutex_t *pmp) urcu_die(ret); } +/* + * Periodically retry setting CPU affinity if we migrate. + * Losing affinity can be caused by CPU hotunplug/hotplug, or by + * cpuset(7). + */ #if HAVE_SCHED_SETAFFINITY static int set_thread_cpu_affinity(struct call_rcu_data *crdp) { cpu_set_t mask; + int ret; if (crdp->cpu_affinity < 0) return 0; + if (++crdp->gp_count & SET_AFFINITY_CHECK_PERIOD_MASK) + return 0; + if (urcu_sched_getcpu() == crdp->cpu_affinity) + return 0; CPU_ZERO(&mask); CPU_SET(crdp->cpu_affinity, &mask); #if SCHED_SETAFFINITY_ARGS == 2 - return sched_setaffinity(0, &mask); + ret = sched_setaffinity(0, &mask); #else - return sched_setaffinity(0, sizeof(mask), &mask); + ret = sched_setaffinity(0, sizeof(mask), &mask); #endif + /* + * EINVAL is fine: can be caused by hotunplugged CPUs, or by + * cpuset(7). This is why we should always retry if we detect + * migration. + */ + if (ret && errno == EINVAL) { + ret = 0; + errno = 0; + } + return ret; } #else static @@ -237,10 +261,8 @@ static void *call_rcu_thread(void *arg) struct call_rcu_data *crdp = (struct call_rcu_data *)arg; struct rcu_head *rhp; int rt = !!(uatomic_read(&crdp->flags) & URCU_CALL_RCU_RT); - int ret; - ret = set_thread_cpu_affinity(crdp); - if (ret) + if (set_thread_cpu_affinity(crdp)) urcu_die(errno); /* @@ -255,6 +277,9 @@ static void *call_rcu_thread(void *arg) cmm_smp_mb(); } for (;;) { + if (set_thread_cpu_affinity(crdp)) + urcu_die(errno); + if (uatomic_read(&crdp->flags) & URCU_CALL_RCU_PAUSE) { /* * Pause requested. Become quiescent: remove @@ -352,6 +377,7 @@ static void call_rcu_data_init(struct call_rcu_data **crdpp, crdp->flags = flags; cds_list_add(&crdp->list, &call_rcu_data_list); crdp->cpu_affinity = cpu_affinity; + crdp->gp_count = 0; cmm_smp_mb(); /* Structure initialized before pointer is planted. */ *crdpp = crdp; ret = pthread_create(&crdp->tid, NULL, call_rcu_thread, crdp);