From 017308529e8daf68c7a9da1cdf692f5d149d0ccd Mon Sep 17 00:00:00 2001 From: Michael Jeanson Date: Wed, 27 Jul 2022 10:44:00 -0400 Subject: [PATCH] fix: sysconf(_SC_NPROCESSORS_CONF) can be less than max cpu id We rely on sysconf(_SC_NPROCESSORS_CONF) to get the maximum possible number of CPUs that can be attached to the system for the lifetime of an application. As such we expect that the highest possible CPU id would be one less than the number returned by sysconf(_SC_NPROCESSORS_CONF) which is unfortunatly not always the case and can vary across libc implementations and versions. Glibc up to 2.35 will count the number of "cpuX" directories in "/sys/devices/system/cpu" which doesn't include CPUS that were hot-unplugged. This information is however provided by the Linux kernel in "/sys/devices/system/cpu/possible" in the form of a mask listing all the CPUs that could possibly be hot-plugged in the system. This patch replaces sysconf(_SC_NPROCESSORS_CONF) with an internal function that first tries parsing the possible CPU mask to extract the highest possible value and if this fails fallback to the previous behavior. Change-Id: I68dfed42ebbab02728a02eeefd4a395a22bb1bea Signed-off-by: Michael Jeanson Signed-off-by: Mathieu Desnoyers --- src/Makefile.am | 2 +- src/compat-smp.h | 275 +++++++++++++++++++++++++++++++++++++++ src/rculfhash.c | 10 +- src/urcu-call-rcu-impl.h | 43 +++--- 4 files changed, 300 insertions(+), 30 deletions(-) create mode 100644 src/compat-smp.h diff --git a/src/Makefile.am b/src/Makefile.am index 2ec35de..d989537 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -8,7 +8,7 @@ AM_LDFLAGS+=-no-undefined endif dist_noinst_HEADERS = urcu-die.h urcu-wait.h compat-getcpu.h \ - compat-rand.h urcu-utils.h + compat-rand.h urcu-utils.h compat-smp.h if COMPAT_ARCH COMPAT=compat_arch_@ARCHTYPE@.c diff --git a/src/compat-smp.h b/src/compat-smp.h new file mode 100644 index 0000000..7240d2f --- /dev/null +++ b/src/compat-smp.h @@ -0,0 +1,275 @@ +/* + * SPDX-License-Identifier: LGPL-2.1-only + * + * Copyright (C) 2011-2012 Mathieu Desnoyers + * Copyright (C) 2019 Michael Jeanson + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#define URCU_CPUMASK_SIZE 4096 + +#if defined(HAVE_SYSCONF) +static inline int get_num_possible_cpus_sysconf(void) +{ + return sysconf(_SC_NPROCESSORS_CONF); +} +#else +/* + * On platforms without sysconf(), always return -1. + */ +static inline int get_num_possible_cpus_sysconf(void) +{ + return -1; +} +#endif + +/* + * Get the highest CPU id from sysfs. + * + * Iterate on all the folders in "/sys/devices/system/cpu" that start with + * "cpu" followed by an integer, keep the highest CPU id encountered during + * this iteration and add 1 to get a number of CPUs. + * + * Returns the highest CPU id, or -1 on error. + */ +static inline int _get_max_cpuid_from_sysfs(const char *path) +{ + long max_cpuid = -1; + + DIR *cpudir; + struct dirent *entry; + + assert(path); + + cpudir = opendir(path); + if (cpudir == NULL) + goto end; + + /* + * Iterate on all directories named "cpu" followed by an integer. + */ + while ((entry = readdir(cpudir))) { + if (entry->d_type == DT_DIR && + strncmp(entry->d_name, "cpu", 3) == 0) { + + char *endptr; + long cpu_id; + + cpu_id = strtol(entry->d_name + 3, &endptr, 10); + if ((cpu_id < LONG_MAX) && (endptr != entry->d_name + 3) + && (*endptr == '\0')) { + if (cpu_id > max_cpuid) + max_cpuid = cpu_id; + } + } + } + + if (closedir(cpudir)) + perror("closedir"); + + /* + * If the max CPU id is out of bound, set it to -1 so it results in a + * CPU num of 0. + */ + if (max_cpuid < 0 || max_cpuid > INT_MAX) + max_cpuid = -1; + +end: + return max_cpuid; +} + +static inline int get_max_cpuid_from_sysfs(void) +{ + return _get_max_cpuid_from_sysfs("/sys/devices/system/cpu"); +} + + +/* + * As a fallback to parsing the CPU mask in "/sys/devices/system/cpu/possible", + * iterate on all the folders in "/sys/devices/system/cpu" that start with + * "cpu" followed by an integer, keep the highest CPU id encountered during + * this iteration and add 1 to get a number of CPUs. + * + * Then get the value from sysconf(_SC_NPROCESSORS_CONF) as a fallback and + * return the highest one. + * + * On Linux, using the value from sysconf can be unreliable since the way it + * counts CPUs varies between C libraries and even between versions of the same + * library. If we used it directly, getcpu() could return a value greater than + * this sysconf, in which case the arrays indexed by processor would overflow. + * + * As another example, the MUSL libc implementation of the _SC_NPROCESSORS_CONF + * sysconf does not return the number of configured CPUs in the system but + * relies on the cpu affinity mask of the current task. + * + * Returns 0 or less on error. + */ +static inline int get_num_possible_cpus_fallback(void) +{ + /* + * Get the sysconf value as a last resort. Keep the highest number. + */ + return caa_max(get_num_possible_cpus_sysconf(), get_max_cpuid_from_sysfs() + 1); +} + +/* + * Get a CPU mask string from sysfs. + * + * buf: the buffer where the mask will be read. + * max_bytes: the maximum number of bytes to write in the buffer. + * path: file path to read the mask from. + * + * Returns the number of bytes read or -1 on error. + */ +static inline int get_cpu_mask_from_sysfs(char *buf, size_t max_bytes, const char *path) +{ + ssize_t bytes_read = 0; + size_t total_bytes_read = 0; + int fd = -1, ret = -1; + + assert(path); + + if (buf == NULL) + goto end; + + fd = open(path, O_RDONLY); + if (fd < 0) + goto end; + + do { + bytes_read = read(fd, buf + total_bytes_read, + max_bytes - total_bytes_read); + + if (bytes_read < 0) { + if (errno == EINTR) { + continue; /* retry operation */ + } else { + goto end; + } + } + + total_bytes_read += bytes_read; + assert(total_bytes_read <= max_bytes); + } while (max_bytes > total_bytes_read && bytes_read > 0); + + /* + * Make sure the mask read is a null terminated string. + */ + if (total_bytes_read < max_bytes) + buf[total_bytes_read] = '\0'; + else + buf[max_bytes - 1] = '\0'; + + if (total_bytes_read > INT_MAX) + goto end; + + ret = (int) total_bytes_read; + +end: + if (fd >= 0 && close(fd) < 0) + perror("close"); + + return ret; +} + +/* + * Get the CPU possible mask string from sysfs. + * + * buf: the buffer where the mask will be read. + * max_bytes: the maximum number of bytes to write in the buffer. + * + * Returns the number of bytes read or -1 on error. + */ +static inline int get_possible_cpu_mask_from_sysfs(char *buf, size_t max_bytes) +{ + return get_cpu_mask_from_sysfs(buf, max_bytes, + "/sys/devices/system/cpu/possible"); +} + +/* + * Get the highest CPU id from the possible CPU mask. + * + * pmask: the mask to parse. + * len: the len of the mask excluding '\0'. + * + * Returns the highest CPU id from the mask or -1 on error. + */ +static inline int get_max_cpuid_from_mask(const char *pmask, size_t len) +{ + ssize_t i; + unsigned long cpu_index; + char *endptr; + + /* We need at least one char to read */ + if (len < 1) + goto error; + + /* Start from the end to read the last CPU index. */ + for (i = len - 1; i > 0; i--) { + /* Break when we hit the first separator. */ + if ((pmask[i] == ',') || (pmask[i] == '-')) { + i++; + break; + } + } + + cpu_index = strtoul(&pmask[i], &endptr, 10); + + if ((&pmask[i] != endptr) && (cpu_index < INT_MAX)) + return (int) cpu_index; + +error: + return -1; +} + +#ifdef __linux__ +/* + * On Linux try sysfs first and fallback to sysconf. + */ +static inline int get_possible_cpus_array_len(void) +{ + int ret; + char buf[URCU_CPUMASK_SIZE]; + + /* Get the possible cpu mask from sysfs, fallback to sysconf. */ + ret = get_possible_cpu_mask_from_sysfs((char *) &buf, URCU_CPUMASK_SIZE); + if (ret <= 0) + goto fallback; + + /* Parse the possible cpu mask, on failure fallback to sysconf. */ + ret = get_max_cpuid_from_mask((char *) &buf, ret); + if (ret >= 0) { + /* Add 1 to convert from max cpuid to an array len. */ + ret++; + goto end; + } + +fallback: + /* Fallback to sysconf. */ + ret = get_num_possible_cpus_fallback(); + +end: + return ret; +} +#else +/* + * On other platforms, only use sysconf. + */ +static inline int get_possible_cpus_array_len(void) +{ + return get_num_possible_cpus_sysconf(); +} +#endif diff --git a/src/rculfhash.c b/src/rculfhash.c index c286f43..95b13a2 100644 --- a/src/rculfhash.c +++ b/src/rculfhash.c @@ -281,6 +281,7 @@ #include "workqueue.h" #include "urcu-die.h" #include "urcu-utils.h" +#include "compat-smp.h" /* * Split-counters lazily update the global counter each 1024 @@ -645,12 +646,11 @@ static long nr_cpus_mask = -1; static long split_count_mask = -1; static int split_count_order = -1; -#if defined(HAVE_SYSCONF) static void ht_init_nr_cpus_mask(void) { long maxcpus; - maxcpus = sysconf(_SC_NPROCESSORS_CONF); + maxcpus = get_possible_cpus_array_len(); if (maxcpus <= 0) { nr_cpus_mask = -2; return; @@ -662,12 +662,6 @@ static void ht_init_nr_cpus_mask(void) maxcpus = 1UL << cds_lfht_get_count_order_ulong(maxcpus); nr_cpus_mask = maxcpus - 1; } -#else /* #if defined(HAVE_SYSCONF) */ -static void ht_init_nr_cpus_mask(void) -{ - nr_cpus_mask = -2; -} -#endif /* #else #if defined(HAVE_SYSCONF) */ static void alloc_split_items_count(struct cds_lfht *ht) diff --git a/src/urcu-call-rcu-impl.h b/src/urcu-call-rcu-impl.h index 3627e6e..5d3ca4b 100644 --- a/src/urcu-call-rcu-impl.h +++ b/src/urcu-call-rcu-impl.h @@ -44,6 +44,7 @@ #include #include "urcu-die.h" #include "urcu-utils.h" +#include "compat-smp.h" #define SET_AFFINITY_CHECK_PERIOD (1U << 8) /* 256 */ #define SET_AFFINITY_CHECK_PERIOD_MASK (SET_AFFINITY_CHECK_PERIOD - 1) @@ -120,11 +121,11 @@ static unsigned long registered_rculfhash_atfork_refcount; */ static struct call_rcu_data **per_cpu_call_rcu_data; -static long maxcpus; +static long cpus_array_len; -static void maxcpus_reset(void) +static void cpus_array_len_reset(void) { - maxcpus = 0; + cpus_array_len = 0; } /* Allocate the array if it has not already been allocated. */ @@ -134,15 +135,15 @@ static void alloc_cpu_call_rcu_data(void) struct call_rcu_data **p; static int warned = 0; - if (maxcpus != 0) + if (cpus_array_len != 0) return; - maxcpus = sysconf(_SC_NPROCESSORS_CONF); - if (maxcpus <= 0) { + cpus_array_len = get_possible_cpus_array_len(); + if (cpus_array_len <= 0) { return; } - p = malloc(maxcpus * sizeof(*per_cpu_call_rcu_data)); + p = malloc(cpus_array_len * sizeof(*per_cpu_call_rcu_data)); if (p != NULL) { - memset(p, '\0', maxcpus * sizeof(*per_cpu_call_rcu_data)); + memset(p, '\0', cpus_array_len * sizeof(*per_cpu_call_rcu_data)); rcu_set_pointer(&per_cpu_call_rcu_data, p); } else { if (!warned) { @@ -160,9 +161,9 @@ static void alloc_cpu_call_rcu_data(void) * constant. */ static struct call_rcu_data **per_cpu_call_rcu_data = NULL; -static const long maxcpus = -1; +static const long cpus_array_len = -1; -static void maxcpus_reset(void) +static void cpus_array_len_reset(void) { } @@ -473,11 +474,11 @@ struct call_rcu_data *get_cpu_call_rcu_data(int cpu) pcpu_crdp = rcu_dereference(per_cpu_call_rcu_data); if (pcpu_crdp == NULL) return NULL; - if (!warned && maxcpus > 0 && (cpu < 0 || maxcpus <= cpu)) { + if (!warned && cpus_array_len > 0 && (cpu < 0 || cpus_array_len <= cpu)) { fprintf(stderr, "[error] liburcu: get CPU # out of range\n"); warned = 1; } - if (cpu < 0 || maxcpus <= cpu) + if (cpu < 0 || cpus_array_len <= cpu) return NULL; return rcu_dereference(pcpu_crdp[cpu]); } @@ -541,7 +542,7 @@ int set_cpu_call_rcu_data(int cpu, struct call_rcu_data *crdp) call_rcu_lock(&call_rcu_mutex); alloc_cpu_call_rcu_data(); - if (cpu < 0 || maxcpus <= cpu) { + if (cpu < 0 || cpus_array_len <= cpu) { if (!warned) { fprintf(stderr, "[error] liburcu: set CPU # out of range\n"); warned = 1; @@ -610,7 +611,7 @@ struct call_rcu_data *get_call_rcu_data(void) if (URCU_TLS(thread_call_rcu_data) != NULL) return URCU_TLS(thread_call_rcu_data); - if (maxcpus > 0) { + if (cpus_array_len > 0) { crd = get_cpu_call_rcu_data(urcu_sched_getcpu()); if (crd) return crd; @@ -667,7 +668,7 @@ int create_all_cpu_call_rcu_data(unsigned long flags) call_rcu_lock(&call_rcu_mutex); alloc_cpu_call_rcu_data(); call_rcu_unlock(&call_rcu_mutex); - if (maxcpus <= 0) { + if (cpus_array_len <= 0) { errno = EINVAL; return -EINVAL; } @@ -675,7 +676,7 @@ int create_all_cpu_call_rcu_data(unsigned long flags) errno = ENOMEM; return -ENOMEM; } - for (i = 0; i < maxcpus; i++) { + for (i = 0; i < cpus_array_len; i++) { call_rcu_lock(&call_rcu_mutex); if (get_cpu_call_rcu_data(i)) { call_rcu_unlock(&call_rcu_mutex); @@ -820,10 +821,10 @@ void free_all_cpu_call_rcu_data(void) struct call_rcu_data **crdp; static int warned = 0; - if (maxcpus <= 0) + if (cpus_array_len <= 0) return; - crdp = malloc(sizeof(*crdp) * maxcpus); + crdp = malloc(sizeof(*crdp) * cpus_array_len); if (!crdp) { if (!warned) { fprintf(stderr, "[error] liburcu: unable to allocate per-CPU pointer array\n"); @@ -832,7 +833,7 @@ void free_all_cpu_call_rcu_data(void) return; } - for (cpu = 0; cpu < maxcpus; cpu++) { + for (cpu = 0; cpu < cpus_array_len; cpu++) { crdp[cpu] = get_cpu_call_rcu_data(cpu); if (crdp[cpu] == NULL) continue; @@ -843,7 +844,7 @@ void free_all_cpu_call_rcu_data(void) * call_rcu_data to become quiescent. */ synchronize_rcu(); - for (cpu = 0; cpu < maxcpus; cpu++) { + for (cpu = 0; cpu < cpus_array_len; cpu++) { if (crdp[cpu] == NULL) continue; call_rcu_data_free(crdp[cpu]); @@ -1037,7 +1038,7 @@ void call_rcu_after_fork_child(void) (void)get_default_call_rcu_data(); /* Cleanup call_rcu_data pointers before use */ - maxcpus_reset(); + cpus_array_len_reset(); free(per_cpu_call_rcu_data); rcu_set_pointer(&per_cpu_call_rcu_data, NULL); URCU_TLS(thread_call_rcu_data) = NULL; -- 2.34.1