From: Michael Jeanson Date: Mon, 25 Jul 2022 18:21:12 +0000 (-0400) Subject: fix: num_possible_cpus() with hot-unplugged CPUs X-Git-Url: https://git.lttng.org./?a=commitdiff_plain;h=4783663eea19352a7f29b34cd810ed42a8b5b51e;p=lttng-ust.git fix: num_possible_cpus() with hot-unplugged CPUs We rely on sysconf(_SC_NPROCESSORS_CONF) to get the maximum possible number of CPUs that can be attached to the system for the lifetime of an application. We use this value to allocate an array of per-CPU buffers that is indexed by the numerical id of the CPUs. As such we expect that the highest possible CPU id would be one less than the number returned by sysconf(_SC_NPROCESSORS_CONF) which is unfortunatly not always the case and can vary across libc implementations and versions. Glibc up to 2.35 will count the number of "cpuX" directories in "/sys/devices/system/cpu" which doesn't include CPUS that were hot-unplugged. This information is however provided by the kernel in "/sys/devices/system/cpu/possible" in the form of a mask listing all the CPUs that could possibly be hot-plugged in the system. This patch changes the implementation of num_possible_cpus() to first try parsing the possible CPU mask to extract the highest possible value and if this fails fallback to the previous behavior. Change-Id: I1a3cb1a446154ec443a391d6689cb7d4165726fd Signed-off-by: Michael Jeanson Signed-off-by: Mathieu Desnoyers --- diff --git a/libringbuffer/smp.c b/libringbuffer/smp.c index 656a75da..8b66b9e8 100644 --- a/libringbuffer/smp.c +++ b/libringbuffer/smp.c @@ -21,17 +21,23 @@ #define _GNU_SOURCE #define _LGPL_SOURCE -#include +#include +#include +#include +#include +#include #include +#include +#include + #include "smp.h" +#include "usterr-signal-safe.h" int __num_possible_cpus; #if (defined(__GLIBC__) || defined( __UCLIBC__)) -void _get_num_possible_cpus(void) +int get_num_possible_cpus_fallback(void) { - int result; - /* On Linux, when some processors are offline * _SC_NPROCESSORS_CONF counts the offline * processors, whereas _SC_NPROCESSORS_ONLN @@ -40,10 +46,7 @@ void _get_num_possible_cpus(void) * this sysconf, in which case the arrays * indexed by processor would overflow. */ - result = sysconf(_SC_NPROCESSORS_CONF); - if (result == -1) - return; - __num_possible_cpus = result; + return sysconf(_SC_NPROCESSORS_CONF); } #else @@ -66,9 +69,9 @@ void _get_num_possible_cpus(void) #define __max(a,b) ((a)>(b)?(a):(b)) -void _get_num_possible_cpus(void) +int get_num_possible_cpus_fallback(void) { - int result, count = 0; + int count = 0; DIR *cpudir; struct dirent *entry; @@ -99,13 +102,131 @@ end: /* * Get the sysconf value as a fallback. Keep the highest number. */ - result = __max(sysconf(_SC_NPROCESSORS_CONF), count); + return __max(sysconf(_SC_NPROCESSORS_CONF), count); +} +#endif + +/* + * Get the CPU possible mask string from sysfs. + * + * buf: the buffer where the mask will be read. + * max_bytes: the maximum number of bytes to write in the buffer. + * + * Returns the number of bytes read or -1 on error. + */ +int get_possible_cpu_mask_from_sysfs(char *buf, size_t max_bytes) +{ + ssize_t bytes_read = 0; + size_t total_bytes_read = 0; + int fd = -1, ret = -1; + + if (buf == NULL) + goto end; + + fd = open("/sys/devices/system/cpu/possible", O_RDONLY); + if (fd < 0) + goto end; + + do { + bytes_read = read(fd, buf + total_bytes_read, + max_bytes - total_bytes_read); + + if (bytes_read < 0) { + if (errno == EINTR) { + continue; /* retry operation */ + } else { + goto end; + } + } + + total_bytes_read += bytes_read; + assert(total_bytes_read <= max_bytes); + } while (max_bytes > total_bytes_read && bytes_read > 0); + + /* + * Make sure the mask read is a null terminated string. + */ + if (total_bytes_read < max_bytes) + buf[total_bytes_read] = '\0'; + else + buf[max_bytes - 1] = '\0'; + + if (total_bytes_read > INT_MAX) + goto end; + + ret = (int) total_bytes_read; + +end: + if (fd >= 0 && close(fd) < 0) + PERROR("close"); + + return ret; +} + +/* + * Get the number of CPUs from the possible cpu mask. + * + * pmask: the mask to parse. + * len: the len of the mask excluding '\0'. + * + * Returns the number of possible CPUs from the mask or 0 on error. + */ +int get_num_possible_cpus_from_mask(const char *pmask, size_t len) +{ + ssize_t i; + unsigned long cpu_index; + char *endptr; + + /* We need at least one char to read */ + if (len < 1) + goto error; + + /* Start from the end to read the last CPU index. */ + for (i = len - 1; i > 0; i--) { + /* Break when we hit the first separator. */ + if ((pmask[i] == ',') || (pmask[i] == '-')) { + i++; + break; + } + } + + cpu_index = strtoul(&pmask[i], &endptr, 10); /* - * If both methods failed, don't store the value. + * If we read a CPU index, increment it by one to return a number of + * CPUs. */ - if (result < 1) + if ((&pmask[i] != endptr) && (cpu_index < INT_MAX)) + return (int) cpu_index + 1; + +error: + return 0; +} + +void _get_num_possible_cpus(void) +{ + int ret; + int buf_len = sysconf(_SC_PAGE_SIZE); + char buf[buf_len]; + + /* Get the possible cpu mask from sysfs, fallback to sysconf. */ + ret = get_possible_cpu_mask_from_sysfs((char *) &buf, buf_len); + if (ret <= 0) + goto fallback; + + /* Parse the possible cpu mask, on failure fallback to sysconf. */ + ret = get_num_possible_cpus_from_mask((char *) &buf, ret); + if (ret > 0) + goto end; + +fallback: + /* Fallback to sysconf. */ + ret = get_num_possible_cpus_fallback(); + +end: + /* If all methods failed, don't store the value. */ + if (ret < 1) return; - __num_possible_cpus = result; + + __num_possible_cpus = ret; } -#endif diff --git a/libringbuffer/smp.h b/libringbuffer/smp.h index 479a9dc6..1a880994 100644 --- a/libringbuffer/smp.h +++ b/libringbuffer/smp.h @@ -30,13 +30,53 @@ #define PER_CPU_MEM_SIZE 4096 extern int __num_possible_cpus; + +/* + * Get the CPU possible mask string from sysfs. + * + * buf: the buffer where the mask will be read. + * max_bytes: the maximum number of bytes to write in the buffer. + * + * Returns the number of bytes read or -1 on error. + */ +int get_possible_cpu_mask_from_sysfs(char *buf, size_t max_bytes) + __attribute__((visibility("hidden"))); + +/* + * Get the number of possible CPUs in the system from either + * sysconf(_SC_NPROCESSORS_CONF) or some other mechanism depending on the libc. + * + * Returns the number of possible CPUs in the system or 0 on error. + */ +int get_num_possible_cpus_fallback(void) + __attribute__((visibility("hidden"))); + +/* + * Get the number of CPUs from the possible cpu mask. + * + * pmask: the mask to parse. + * len: the len of the mask excluding '\0'. + * + * Returns the number of possible CPUs from the mask or 0 on error. + */ +int get_num_possible_cpus_from_mask(const char *pmask, size_t len) + __attribute__((visibility("hidden"))); + extern void _get_num_possible_cpus(void); +/* + * Returns the total number of CPUs in the system. If the cache is not yet + * initialized, get the value from "/sys/devices/system/cpu/possible" or + * fallback to sysconf and cache it. + * + * If all methods fail, don't populate the cache and return 0. + */ static inline int num_possible_cpus(void) { - if (!__num_possible_cpus) + if (caa_unlikely(!__num_possible_cpus)) _get_num_possible_cpus(); + return __num_possible_cpus; }