From: Mathieu Desnoyers Date: Sun, 15 Sep 2013 22:03:24 +0000 (-0500) Subject: Move health into its own common/ static library X-Git-Tag: v2.4.0-rc1~119 X-Git-Url: https://git.lttng.org./?a=commitdiff_plain;h=55d097957f5bb8138959ad2202a40d85d49f029e;p=lttng-tools.git Move health into its own common/ static library - Introduce lttng/health-internal.h (not installed) Signed-off-by: Mathieu Desnoyers --- diff --git a/configure.ac b/configure.ac index eee05aacd..2b4a340f5 100644 --- a/configure.ac +++ b/configure.ac @@ -354,6 +354,7 @@ AC_CONFIG_FILES([ src/common/relayd/Makefile src/common/testpoint/Makefile src/common/index/Makefile + src/common/health/Makefile src/lib/Makefile src/lib/lttng-ctl/Makefile src/lib/lttng-ctl/filter/Makefile diff --git a/include/Makefile.am b/include/Makefile.am index f3413e6df..15479d4a7 100644 --- a/include/Makefile.am +++ b/include/Makefile.am @@ -1,3 +1,8 @@ -lttnginclude_HEADERS = lttng/lttng.h lttng/lttng-error.h lttng/snapshot.h +lttnginclude_HEADERS = \ + lttng/lttng.h \ + lttng/lttng-error.h \ + lttng/snapshot.h -noinst_HEADERS = lttng/snapshot-internal.h +noinst_HEADERS = \ + lttng/snapshot-internal.h \ + lttng/health-internal.h diff --git a/include/lttng/health-internal.h b/include/lttng/health-internal.h new file mode 100644 index 000000000..764e998e3 --- /dev/null +++ b/include/lttng/health-internal.h @@ -0,0 +1,116 @@ +#ifndef HEALTH_INTERNAL_H +#define HEALTH_INTERNAL_H + +/* + * Copyright (C) 2012 - David Goulet + * Copyright (C) 2013 - Mathieu Desnoyers + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License, version 2 only, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 51 + * Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + */ + +#include +#include +#include +#include +#include +#include + +/* + * These are the value added to the current state depending of the position in + * the thread where is either waiting on a poll() or running in the code. + */ +#define HEALTH_POLL_VALUE (1UL << 0) +#define HEALTH_CODE_VALUE (1UL << 1) + +#define HEALTH_IS_IN_POLL(x) ((x) & HEALTH_POLL_VALUE) + +struct health_app; + +enum health_flags { + HEALTH_ERROR = (1U << 0), +}; + +struct health_state { + /* + * last counter and last_time are only read and updated by the health_check + * thread (single updater). + */ + unsigned long last; + struct timespec last_time; + + /* + * current and flags are updated by multiple threads concurrently. + */ + unsigned long current; /* progress counter, updated atomically */ + enum health_flags flags; /* other flags, updated atomically */ + int type; /* Indicates the nature of the thread. */ + /* Node of the global TLS state list. */ + struct cds_list_head node; +}; + +/* Declare TLS health state. */ +extern DECLARE_URCU_TLS(struct health_state, health_state); + +/* + * Update current counter by 1 to indicate that the thread entered or left a + * blocking state caused by a poll(). If the counter's value is not an even + * number (meaning a code execution flow), an assert() is raised. + */ +static inline void health_poll_entry(void) +{ + /* Code MUST be in code execution state which is an even number. */ + assert(!(uatomic_read(&URCU_TLS(health_state).current) + & HEALTH_POLL_VALUE)); + + uatomic_add(&URCU_TLS(health_state).current, HEALTH_POLL_VALUE); +} + +/* + * Update current counter by 1 indicating the exit of a poll or blocking call. + * If the counter's value is not an odd number (a poll execution), an assert() + * is raised. + */ +static inline void health_poll_exit(void) +{ + /* Code MUST be in poll execution state which is an odd number. */ + assert(uatomic_read(&URCU_TLS(health_state).current) + & HEALTH_POLL_VALUE); + + uatomic_add(&URCU_TLS(health_state).current, HEALTH_POLL_VALUE); +} + +/* + * Update current counter by 2 indicates progress in execution of a + * thread. + */ +static inline void health_code_update(void) +{ + uatomic_add(&URCU_TLS(health_state).current, HEALTH_CODE_VALUE); +} + +/* + * Set health "error" flag. + */ +static inline void health_error(void) +{ + uatomic_or(&URCU_TLS(health_state).flags, HEALTH_ERROR); +} + +struct health_app *health_app_create(int nr_types); +void health_app_destroy(struct health_app *ha); +int health_check_state(struct health_app *ha, int type); +void health_register(struct health_app *ha, int type); +void health_unregister(struct health_app *ha); + +#endif /* HEALTH_INTERNAL_H */ diff --git a/src/bin/lttng-sessiond/Makefile.am b/src/bin/lttng-sessiond/Makefile.am index 0fab90f33..3d96907de 100644 --- a/src/bin/lttng-sessiond/Makefile.am +++ b/src/bin/lttng-sessiond/Makefile.am @@ -22,7 +22,7 @@ lttng_sessiond_SOURCES = utils.c utils.h \ fd-limit.c fd-limit.h \ kernel-consumer.c kernel-consumer.h \ consumer.h \ - health.c health.h health-sessiond.h \ + health-sessiond.h \ cmd.c cmd.h \ buffer-registry.c buffer-registry.h \ testpoint.h ht-cleanup.c \ @@ -46,7 +46,8 @@ lttng_sessiond_LDADD = -lrt -lurcu-common -lurcu \ $(top_builddir)/src/common/libcommon.la \ $(top_builddir)/src/common/compat/libcompat.la \ $(top_builddir)/src/common/relayd/librelayd.la \ - $(top_builddir)/src/common/testpoint/libtestpoint.la + $(top_builddir)/src/common/testpoint/libtestpoint.la \ + $(top_builddir)/src/common/health/libhealth.la if HAVE_LIBLTTNG_UST_CTL lttng_sessiond_LDADD += -llttng-ust-ctl diff --git a/src/bin/lttng-sessiond/health-sessiond.h b/src/bin/lttng-sessiond/health-sessiond.h index 18f922728..49f9e0b9c 100644 --- a/src/bin/lttng-sessiond/health-sessiond.h +++ b/src/bin/lttng-sessiond/health-sessiond.h @@ -18,7 +18,7 @@ * Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. */ -#include "health.h" +#include enum health_type { HEALTH_TYPE_CMD = 0, diff --git a/src/bin/lttng-sessiond/health.c b/src/bin/lttng-sessiond/health.c deleted file mode 100644 index 7e1d4731a..000000000 --- a/src/bin/lttng-sessiond/health.c +++ /dev/null @@ -1,304 +0,0 @@ -/* - * Copyright (C) 2012 - David Goulet - * Copyright (C) 2013 - Mathieu Desnoyers - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License, version 2 only, as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along with - * this program; if not, write to the Free Software Foundation, Inc., 51 - * Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - */ - -#define _GNU_SOURCE -#include -#include -#include -#include -#include - -#include -#include -#include -#include - -#include "health.h" - -/* - * An application-specific error state for unregistered thread keeps - * track of thread errors. A thread reporting a health error, normally - * unregisters and quits. This makes the TLS health state not available - * to the health_check_state() call so on unregister we update this - * global error array so we can keep track of which thread was on error - * if the TLS health state has been removed. - */ -struct health_app { - /* List of health state, for each application thread */ - struct cds_list_head list; - /* - * This lock ensures that TLS memory used for the node and its - * container structure don't get reclaimed after the TLS owner - * thread exits until we have finished using it. - */ - pthread_mutex_t lock; - int nr_types; - struct timespec time_delta; - /* Health flags containing thread type error state */ - enum health_flags *flags; -}; - -/* Define TLS health state. */ -DEFINE_URCU_TLS(struct health_state, health_state); - -struct health_app *health_app_create(int nr_types) -{ - struct health_app *ha; - - ha = zmalloc(sizeof(*ha)); - if (!ha) { - return NULL; - } - ha->flags = zmalloc(sizeof(*ha->flags)); - if (!ha->flags) { - goto error_flags; - } - CDS_INIT_LIST_HEAD(&ha->list); - pthread_mutex_init(&ha->lock, NULL); - ha->nr_types = nr_types; - ha->time_delta.tv_sec = DEFAULT_HEALTH_CHECK_DELTA_S; - ha->time_delta.tv_nsec = DEFAULT_HEALTH_CHECK_DELTA_NS; - return ha; - -error_flags: - free(ha); - return NULL; -} - -void health_app_destroy(struct health_app *ha) -{ - free(ha->flags); - free(ha); -} - -/* - * Lock health state global list mutex. - */ -static void state_lock(struct health_app *ha) -{ - pthread_mutex_lock(&ha->lock); -} - -/* - * Unlock health state global list mutex. - */ -static void state_unlock(struct health_app *ha) -{ - pthread_mutex_unlock(&ha->lock); -} - -/* - * Set time difference in res from time_a and time_b. - */ -static void time_diff(const struct timespec *time_a, - const struct timespec *time_b, struct timespec *res) -{ - if (time_a->tv_nsec - time_b->tv_nsec < 0) { - res->tv_sec = time_a->tv_sec - time_b->tv_sec - 1; - res->tv_nsec = 1000000000L + time_a->tv_sec - time_b->tv_sec; - } else { - res->tv_sec = time_a->tv_sec - time_b->tv_sec; - res->tv_nsec = time_a->tv_nsec - time_b->tv_nsec; - } -} - -/* - * Return true if time_a - time_b > diff, else false. - */ -static int time_diff_gt(const struct timespec *time_a, - const struct timespec *time_b, const struct timespec *diff) -{ - struct timespec res; - - time_diff(time_a, time_b, &res); - time_diff(&res, diff, &res); - - if (res.tv_sec > 0) { - return 1; - } else if (res.tv_sec == 0 && res.tv_nsec > 0) { - return 1; - } - - return 0; -} - -/* - * Validate health state. Checks for the error flag or health conditions. - * - * Return 0 if health is bad or else 1. - */ -static int validate_state(struct health_app *ha, struct health_state *state) -{ - int retval = 1, ret; - unsigned long current, last; - struct timespec current_time; - - assert(state); - - last = state->last; - current = uatomic_read(&state->current); - - ret = clock_gettime(CLOCK_MONOTONIC, ¤t_time); - if (ret < 0) { - PERROR("Error reading time\n"); - /* error */ - retval = 0; - goto end; - } - - /* - * Thread is in bad health if flag HEALTH_ERROR is set. It is also in bad - * health if, after the delta delay has passed, its the progress counter - * has not moved and it has NOT been waiting for a poll() call. - */ - if (uatomic_read(&state->flags) & HEALTH_ERROR) { - retval = 0; - goto end; - } - - /* - * Initial condition need to update the last counter and sample time, but - * should not check health in this initial case, because we don't know how - * much time has passed. - */ - if (state->last_time.tv_sec == 0 && state->last_time.tv_nsec == 0) { - /* update last counter and last sample time */ - state->last = current; - memcpy(&state->last_time, ¤t_time, sizeof(current_time)); - } else { - if (time_diff_gt(¤t_time, &state->last_time, - &ha->time_delta)) { - if (current == last && !HEALTH_IS_IN_POLL(current)) { - /* error */ - retval = 0; - } - /* update last counter and last sample time */ - state->last = current; - memcpy(&state->last_time, ¤t_time, sizeof(current_time)); - - /* On error, stop right now and notify caller. */ - if (retval == 0) { - goto end; - } - } - } - -end: - DBG("Health state current %lu, last %lu, ret %d", - current, last, ret); - return retval; -} - -/* - * Check health of a specific health type. Note that if a thread has not yet - * initialize its health subsystem or has quit, it's considered in a good - * state. - * - * Return 0 if health is bad or else 1. - */ -int health_check_state(struct health_app *ha, int type) -{ - int retval = 1; - struct health_state *state; - - assert(type < ha->nr_types); - - state_lock(ha); - - cds_list_for_each_entry(state, &ha->list, node) { - int ret; - - if (state->type != type) { - continue; - } - - ret = validate_state(ha, state); - if (!ret) { - retval = 0; - goto end; - } - } - - /* Check the global state since some state might not be visible anymore. */ - if (ha->flags[type] & HEALTH_ERROR) { - retval = 0; - } - -end: - state_unlock(ha); - - DBG("Health check for type %d is %s", (int) type, - (retval == 0) ? "BAD" : "GOOD"); - return retval; -} - -/* - * Init health state. - */ -void health_register(struct health_app *ha, int type) -{ - assert(type < ha->nr_types); - - /* Init TLS state. */ - uatomic_set(&URCU_TLS(health_state).last, 0); - uatomic_set(&URCU_TLS(health_state).last_time.tv_sec, 0); - uatomic_set(&URCU_TLS(health_state).last_time.tv_nsec, 0); - uatomic_set(&URCU_TLS(health_state).current, 0); - uatomic_set(&URCU_TLS(health_state).flags, 0); - uatomic_set(&URCU_TLS(health_state).type, type); - - /* Add it to the global TLS state list. */ - state_lock(ha); - cds_list_add(&URCU_TLS(health_state).node, &ha->list); - state_unlock(ha); -} - -/* - * Remove node from global list. - */ -void health_unregister(struct health_app *ha) -{ - state_lock(ha); - /* - * On error, set the global_error_state since we are about to remove - * the node from the global list. - */ - if (uatomic_read(&URCU_TLS(health_state).flags) & HEALTH_ERROR) { - uatomic_set(&ha->flags[URCU_TLS(health_state).type], - HEALTH_ERROR); - } - cds_list_del(&URCU_TLS(health_state).node); - state_unlock(ha); -} - -/* - * Initiliazie health check subsytem. This should be called before any health - * register occurs. - */ -void health_init(struct health_app *ha) -{ - /* - * Get the maximum value between the default delta value and the TCP - * timeout with a safety net of the default health check delta. - */ - ha->time_delta.tv_sec = max_t(unsigned long, - lttcomm_inet_tcp_timeout + DEFAULT_HEALTH_CHECK_DELTA_S, - ha->time_delta.tv_sec); - DBG("Health check time delta in seconds set to %lu", - ha->time_delta.tv_sec); -} diff --git a/src/bin/lttng-sessiond/health.h b/src/bin/lttng-sessiond/health.h deleted file mode 100644 index 82cfc87df..000000000 --- a/src/bin/lttng-sessiond/health.h +++ /dev/null @@ -1,116 +0,0 @@ -/* - * Copyright (C) 2012 - David Goulet - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License, version 2 only, as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along with - * this program; if not, write to the Free Software Foundation, Inc., 51 - * Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - */ - -#ifndef _HEALTH_H -#define _HEALTH_H - -#include -#include -#include -#include -#include -#include - -/* - * These are the value added to the current state depending of the position in - * the thread where is either waiting on a poll() or running in the code. - */ -#define HEALTH_POLL_VALUE (1UL << 0) -#define HEALTH_CODE_VALUE (1UL << 1) - -#define HEALTH_IS_IN_POLL(x) ((x) & HEALTH_POLL_VALUE) - -struct health_app; - -enum health_flags { - HEALTH_ERROR = (1U << 0), -}; - -struct health_state { - /* - * last counter and last_time are only read and updated by the health_check - * thread (single updater). - */ - unsigned long last; - struct timespec last_time; - - /* - * current and flags are updated by multiple threads concurrently. - */ - unsigned long current; /* progress counter, updated atomically */ - enum health_flags flags; /* other flags, updated atomically */ - int type; /* Indicates the nature of the thread. */ - /* Node of the global TLS state list. */ - struct cds_list_head node; -}; - -/* Declare TLS health state. */ -extern DECLARE_URCU_TLS(struct health_state, health_state); - -/* - * Update current counter by 1 to indicate that the thread entered or left a - * blocking state caused by a poll(). If the counter's value is not an even - * number (meaning a code execution flow), an assert() is raised. - */ -static inline void health_poll_entry(void) -{ - /* Code MUST be in code execution state which is an even number. */ - assert(!(uatomic_read(&URCU_TLS(health_state).current) - & HEALTH_POLL_VALUE)); - - uatomic_add(&URCU_TLS(health_state).current, HEALTH_POLL_VALUE); -} - -/* - * Update current counter by 1 indicating the exit of a poll or blocking call. - * If the counter's value is not an odd number (a poll execution), an assert() - * is raised. - */ -static inline void health_poll_exit(void) -{ - /* Code MUST be in poll execution state which is an odd number. */ - assert(uatomic_read(&URCU_TLS(health_state).current) - & HEALTH_POLL_VALUE); - - uatomic_add(&URCU_TLS(health_state).current, HEALTH_POLL_VALUE); -} - -/* - * Update current counter by 2 indicates progress in execution of a - * thread. - */ -static inline void health_code_update(void) -{ - uatomic_add(&URCU_TLS(health_state).current, HEALTH_CODE_VALUE); -} - -/* - * Set health "error" flag. - */ -static inline void health_error(void) -{ - uatomic_or(&URCU_TLS(health_state).flags, HEALTH_ERROR); -} - -struct health_app *health_app_create(int nr_types); -void health_app_destroy(struct health_app *ha); -int health_check_state(struct health_app *ha, int type); -void health_register(struct health_app *ha, int type); -void health_unregister(struct health_app *ha); -void health_init(struct health_app *ha); - -#endif /* _HEALTH_H */ diff --git a/src/bin/lttng-sessiond/main.c b/src/bin/lttng-sessiond/main.c index a54c0bf8d..550aa1b83 100644 --- a/src/bin/lttng-sessiond/main.c +++ b/src/bin/lttng-sessiond/main.c @@ -4727,7 +4727,6 @@ int main(int argc, char **argv) PERROR("health_app_create error"); goto exit_health_sessiond_cleanup; } - health_init(health_sessiond); /* Create thread to manage the client socket */ ret = pthread_create(&ht_cleanup_thread, NULL, diff --git a/src/common/Makefile.am b/src/common/Makefile.am index 8454b5a51..0c02d533c 100644 --- a/src/common/Makefile.am +++ b/src/common/Makefile.am @@ -1,6 +1,6 @@ AM_CPPFLAGS = -I$(top_srcdir)/include -I$(top_srcdir)/src -SUBDIRS = compat hashtable kernel-ctl sessiond-comm relayd \ +SUBDIRS = compat health hashtable kernel-ctl sessiond-comm relayd \ kernel-consumer ust-consumer testpoint index AM_CFLAGS = -fno-strict-aliasing diff --git a/src/common/health/Makefile.am b/src/common/health/Makefile.am new file mode 100644 index 000000000..d82f9de9e --- /dev/null +++ b/src/common/health/Makefile.am @@ -0,0 +1,5 @@ +AM_CPPFLAGS = -I$(top_srcdir)/include -I$(top_srcdir)/src + +noinst_LTLIBRARIES = libhealth.la + +libhealth_la_SOURCES = health.c diff --git a/src/common/health/health.c b/src/common/health/health.c new file mode 100644 index 000000000..d2414ae57 --- /dev/null +++ b/src/common/health/health.c @@ -0,0 +1,305 @@ +/* + * Copyright (C) 2012 - David Goulet + * Copyright (C) 2013 - Mathieu Desnoyers + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License, version 2 only, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 51 + * Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + */ + +#define _GNU_SOURCE +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#include + +/* + * An application-specific error state for unregistered thread keeps + * track of thread errors. A thread reporting a health error, normally + * unregisters and quits. This makes the TLS health state not available + * to the health_check_state() call so on unregister we update this + * global error array so we can keep track of which thread was on error + * if the TLS health state has been removed. + */ +struct health_app { + /* List of health state, for each application thread */ + struct cds_list_head list; + /* + * This lock ensures that TLS memory used for the node and its + * container structure don't get reclaimed after the TLS owner + * thread exits until we have finished using it. + */ + pthread_mutex_t lock; + int nr_types; + struct timespec time_delta; + /* Health flags containing thread type error state */ + enum health_flags *flags; +}; + +/* Define TLS health state. */ +DEFINE_URCU_TLS(struct health_state, health_state); + +/* + * Initialize health check subsytem. + */ +static +void health_init(struct health_app *ha) +{ + /* + * Get the maximum value between the default delta value and the TCP + * timeout with a safety net of the default health check delta. + */ + ha->time_delta.tv_sec = max_t(unsigned long, + lttcomm_inet_tcp_timeout + DEFAULT_HEALTH_CHECK_DELTA_S, + ha->time_delta.tv_sec); + DBG("Health check time delta in seconds set to %lu", + ha->time_delta.tv_sec); +} + +struct health_app *health_app_create(int nr_types) +{ + struct health_app *ha; + + ha = zmalloc(sizeof(*ha)); + if (!ha) { + return NULL; + } + ha->flags = zmalloc(sizeof(*ha->flags)); + if (!ha->flags) { + goto error_flags; + } + CDS_INIT_LIST_HEAD(&ha->list); + pthread_mutex_init(&ha->lock, NULL); + ha->nr_types = nr_types; + ha->time_delta.tv_sec = DEFAULT_HEALTH_CHECK_DELTA_S; + ha->time_delta.tv_nsec = DEFAULT_HEALTH_CHECK_DELTA_NS; + health_init(ha); + return ha; + +error_flags: + free(ha); + return NULL; +} + +void health_app_destroy(struct health_app *ha) +{ + free(ha->flags); + free(ha); +} + +/* + * Lock health state global list mutex. + */ +static void state_lock(struct health_app *ha) +{ + pthread_mutex_lock(&ha->lock); +} + +/* + * Unlock health state global list mutex. + */ +static void state_unlock(struct health_app *ha) +{ + pthread_mutex_unlock(&ha->lock); +} + +/* + * Set time difference in res from time_a and time_b. + */ +static void time_diff(const struct timespec *time_a, + const struct timespec *time_b, struct timespec *res) +{ + if (time_a->tv_nsec - time_b->tv_nsec < 0) { + res->tv_sec = time_a->tv_sec - time_b->tv_sec - 1; + res->tv_nsec = 1000000000L + time_a->tv_sec - time_b->tv_sec; + } else { + res->tv_sec = time_a->tv_sec - time_b->tv_sec; + res->tv_nsec = time_a->tv_nsec - time_b->tv_nsec; + } +} + +/* + * Return true if time_a - time_b > diff, else false. + */ +static int time_diff_gt(const struct timespec *time_a, + const struct timespec *time_b, const struct timespec *diff) +{ + struct timespec res; + + time_diff(time_a, time_b, &res); + time_diff(&res, diff, &res); + + if (res.tv_sec > 0) { + return 1; + } else if (res.tv_sec == 0 && res.tv_nsec > 0) { + return 1; + } + + return 0; +} + +/* + * Validate health state. Checks for the error flag or health conditions. + * + * Return 0 if health is bad or else 1. + */ +static int validate_state(struct health_app *ha, struct health_state *state) +{ + int retval = 1, ret; + unsigned long current, last; + struct timespec current_time; + + assert(state); + + last = state->last; + current = uatomic_read(&state->current); + + ret = clock_gettime(CLOCK_MONOTONIC, ¤t_time); + if (ret < 0) { + PERROR("Error reading time\n"); + /* error */ + retval = 0; + goto end; + } + + /* + * Thread is in bad health if flag HEALTH_ERROR is set. It is also in bad + * health if, after the delta delay has passed, its the progress counter + * has not moved and it has NOT been waiting for a poll() call. + */ + if (uatomic_read(&state->flags) & HEALTH_ERROR) { + retval = 0; + goto end; + } + + /* + * Initial condition need to update the last counter and sample time, but + * should not check health in this initial case, because we don't know how + * much time has passed. + */ + if (state->last_time.tv_sec == 0 && state->last_time.tv_nsec == 0) { + /* update last counter and last sample time */ + state->last = current; + memcpy(&state->last_time, ¤t_time, sizeof(current_time)); + } else { + if (time_diff_gt(¤t_time, &state->last_time, + &ha->time_delta)) { + if (current == last && !HEALTH_IS_IN_POLL(current)) { + /* error */ + retval = 0; + } + /* update last counter and last sample time */ + state->last = current; + memcpy(&state->last_time, ¤t_time, sizeof(current_time)); + + /* On error, stop right now and notify caller. */ + if (retval == 0) { + goto end; + } + } + } + +end: + DBG("Health state current %lu, last %lu, ret %d", + current, last, ret); + return retval; +} + +/* + * Check health of a specific health type. Note that if a thread has not yet + * initialize its health subsystem or has quit, it's considered in a good + * state. + * + * Return 0 if health is bad or else 1. + */ +int health_check_state(struct health_app *ha, int type) +{ + int retval = 1; + struct health_state *state; + + assert(type < ha->nr_types); + + state_lock(ha); + + cds_list_for_each_entry(state, &ha->list, node) { + int ret; + + if (state->type != type) { + continue; + } + + ret = validate_state(ha, state); + if (!ret) { + retval = 0; + goto end; + } + } + + /* Check the global state since some state might not be visible anymore. */ + if (ha->flags[type] & HEALTH_ERROR) { + retval = 0; + } + +end: + state_unlock(ha); + + DBG("Health check for type %d is %s", (int) type, + (retval == 0) ? "BAD" : "GOOD"); + return retval; +} + +/* + * Init health state. + */ +void health_register(struct health_app *ha, int type) +{ + assert(type < ha->nr_types); + + /* Init TLS state. */ + uatomic_set(&URCU_TLS(health_state).last, 0); + uatomic_set(&URCU_TLS(health_state).last_time.tv_sec, 0); + uatomic_set(&URCU_TLS(health_state).last_time.tv_nsec, 0); + uatomic_set(&URCU_TLS(health_state).current, 0); + uatomic_set(&URCU_TLS(health_state).flags, 0); + uatomic_set(&URCU_TLS(health_state).type, type); + + /* Add it to the global TLS state list. */ + state_lock(ha); + cds_list_add(&URCU_TLS(health_state).node, &ha->list); + state_unlock(ha); +} + +/* + * Remove node from global list. + */ +void health_unregister(struct health_app *ha) +{ + state_lock(ha); + /* + * On error, set the global_error_state since we are about to remove + * the node from the global list. + */ + if (uatomic_read(&URCU_TLS(health_state).flags) & HEALTH_ERROR) { + uatomic_set(&ha->flags[URCU_TLS(health_state).type], + HEALTH_ERROR); + } + cds_list_del(&URCU_TLS(health_state).node); + state_unlock(ha); +}