All app-specific state is now in an object: struct health_app.
This moves the per-application enumeration into health-sessiond.h (for
sessiond).
Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
fd-limit.c fd-limit.h \
kernel-consumer.c kernel-consumer.h \
consumer.h \
- health.c health.h \
+ health.c health.h health-sessiond.h \
cmd.c cmd.h \
buffer-registry.c buffer-registry.h \
testpoint.h ht-cleanup.c \
#include "channel.h"
#include "consumer.h"
#include "event.h"
-#include "health.h"
+#include "health-sessiond.h"
#include "kernel.h"
#include "kernel-consumer.h"
#include "lttng-sessiond.h"
#include <common/relayd/relayd.h>
#include "consumer.h"
-#include "health.h"
+#include "health-sessiond.h"
#include "ust-app.h"
#include "utils.h"
--- /dev/null
+#ifndef HEALTH_SESSIOND_H
+#define HEALTH_SESSIOND_H
+
+/*
+ * Copyright (C) 2012 - David Goulet <dgoulet@efficios.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License, version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 51
+ * Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include "health.h"
+
+enum health_type {
+ HEALTH_TYPE_CMD = 0,
+ HEALTH_TYPE_APP_MANAGE = 1,
+ HEALTH_TYPE_APP_REG = 2,
+ HEALTH_TYPE_KERNEL = 3,
+ HEALTH_TYPE_CONSUMER = 4,
+ HEALTH_TYPE_HT_CLEANUP = 5,
+ HEALTH_TYPE_APP_MANAGE_NOTIFY = 6,
+ HEALTH_TYPE_APP_REG_DISPATCH = 7,
+
+ HEALTH_NUM_TYPE,
+};
+
+/* Application health monitoring */
+extern struct health_app *health_sessiond;
+
+#endif /* HEALTH_SESSIOND_H */
/*
* Copyright (C) 2012 - David Goulet <dgoulet@efficios.com>
+ * Copyright (C) 2013 - Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License, version 2 only, as
#include "health.h"
-static struct timespec time_delta = {
- .tv_sec = DEFAULT_HEALTH_CHECK_DELTA_S,
- .tv_nsec = DEFAULT_HEALTH_CHECK_DELTA_NS,
+/*
+ * An application-specific error state for unregistered thread keeps
+ * track of thread errors. A thread reporting a health error, normally
+ * unregisters and quits. This makes the TLS health state not available
+ * to the health_check_state() call so on unregister we update this
+ * global error array so we can keep track of which thread was on error
+ * if the TLS health state has been removed.
+ */
+struct health_app {
+ /* List of health state, for each application thread */
+ struct cds_list_head list;
+ /*
+ * This lock ensures that TLS memory used for the node and its
+ * container structure don't get reclaimed after the TLS owner
+ * thread exits until we have finished using it.
+ */
+ pthread_mutex_t lock;
+ int nr_types;
+ struct timespec time_delta;
+ /* Health flags containing thread type error state */
+ enum health_flags *flags;
};
/* Define TLS health state. */
DEFINE_URCU_TLS(struct health_state, health_state);
-/*
- * It ensures that TLS memory used for the node and its container structure
- * don't get reclaimed after the TLS owner thread exits until we have finished
- * using it.
- */
-static pthread_mutex_t health_mutex = PTHREAD_MUTEX_INITIALIZER;
+struct health_app *health_app_create(int nr_types)
+{
+ struct health_app *ha;
-static struct health_tls_state_list health_state_list = {
- .head = CDS_LIST_HEAD_INIT(health_state_list.head),
-};
+ ha = zmalloc(sizeof(*ha));
+ if (!ha) {
+ return NULL;
+ }
+ ha->flags = zmalloc(sizeof(*ha->flags));
+ if (!ha->flags) {
+ goto error_flags;
+ }
+ CDS_INIT_LIST_HEAD(&ha->list);
+ pthread_mutex_init(&ha->lock, NULL);
+ ha->nr_types = nr_types;
+ ha->time_delta.tv_sec = DEFAULT_HEALTH_CHECK_DELTA_S;
+ ha->time_delta.tv_nsec = DEFAULT_HEALTH_CHECK_DELTA_NS;
+ return ha;
+
+error_flags:
+ free(ha);
+ return NULL;
+}
-/*
- * This keeps track of the error state for unregistered thread. A thread
- * reporting a health error, normally unregisters and quits. This makes the TLS
- * health state not available to the health_check_state() call so on unregister
- * we update this global error array so we can keep track of which thread was
- * on error if the TLS health state has been removed.
- */
-static enum health_flags global_error_state[HEALTH_NUM_TYPE];
+void health_app_destroy(struct health_app *ha)
+{
+ free(ha->flags);
+ free(ha);
+}
/*
* Lock health state global list mutex.
*/
-static void state_lock(void)
+static void state_lock(struct health_app *ha)
{
- pthread_mutex_lock(&health_mutex);
+ pthread_mutex_lock(&ha->lock);
}
/*
* Unlock health state global list mutex.
*/
-static void state_unlock(void)
+static void state_unlock(struct health_app *ha)
{
- pthread_mutex_unlock(&health_mutex);
+ pthread_mutex_unlock(&ha->lock);
}
/*
*
* Return 0 if health is bad or else 1.
*/
-static int validate_state(struct health_state *state)
+static int validate_state(struct health_app *ha, struct health_state *state)
{
int retval = 1, ret;
unsigned long current, last;
state->last = current;
memcpy(&state->last_time, ¤t_time, sizeof(current_time));
} else {
- if (time_diff_gt(¤t_time, &state->last_time, &time_delta)) {
+ if (time_diff_gt(¤t_time, &state->last_time,
+ &ha->time_delta)) {
if (current == last && !HEALTH_IS_IN_POLL(current)) {
/* error */
retval = 0;
*
* Return 0 if health is bad or else 1.
*/
-int health_check_state(enum health_type type)
+int health_check_state(struct health_app *ha, int type)
{
int retval = 1;
struct health_state *state;
- assert(type < HEALTH_NUM_TYPE);
+ assert(type < ha->nr_types);
- state_lock();
+ state_lock(ha);
- cds_list_for_each_entry(state, &health_state_list.head, node) {
+ cds_list_for_each_entry(state, &ha->list, node) {
int ret;
if (state->type != type) {
continue;
}
- ret = validate_state(state);
+ ret = validate_state(ha, state);
if (!ret) {
retval = 0;
goto end;
}
/* Check the global state since some state might not be visible anymore. */
- if (global_error_state[type] & HEALTH_ERROR) {
+ if (ha->flags[type] & HEALTH_ERROR) {
retval = 0;
}
end:
- state_unlock();
+ state_unlock(ha);
DBG("Health check for type %d is %s", (int) type,
(retval == 0) ? "BAD" : "GOOD");
/*
* Init health state.
*/
-void health_register(enum health_type type)
+void health_register(struct health_app *ha, int type)
{
- assert(type < HEALTH_NUM_TYPE);
+ assert(type < ha->nr_types);
/* Init TLS state. */
uatomic_set(&URCU_TLS(health_state).last, 0);
uatomic_set(&URCU_TLS(health_state).type, type);
/* Add it to the global TLS state list. */
- state_lock();
- cds_list_add(&URCU_TLS(health_state).node, &health_state_list.head);
- state_unlock();
+ state_lock(ha);
+ cds_list_add(&URCU_TLS(health_state).node, &ha->list);
+ state_unlock(ha);
}
/*
* Remove node from global list.
*/
-void health_unregister(void)
+void health_unregister(struct health_app *ha)
{
- state_lock();
+ state_lock(ha);
/*
* On error, set the global_error_state since we are about to remove
* the node from the global list.
*/
if (uatomic_read(&URCU_TLS(health_state).flags) & HEALTH_ERROR) {
- uatomic_set(&global_error_state[URCU_TLS(health_state).type],
+ uatomic_set(&ha->flags[URCU_TLS(health_state).type],
HEALTH_ERROR);
}
cds_list_del(&URCU_TLS(health_state).node);
- state_unlock();
+ state_unlock(ha);
}
/*
* Initiliazie health check subsytem. This should be called before any health
* register occurs.
*/
-void health_init(void)
+void health_init(struct health_app *ha)
{
/*
* Get the maximum value between the default delta value and the TCP
* timeout with a safety net of the default health check delta.
*/
- time_delta.tv_sec = max_t(unsigned long,
+ ha->time_delta.tv_sec = max_t(unsigned long,
lttcomm_inet_tcp_timeout + DEFAULT_HEALTH_CHECK_DELTA_S,
- time_delta.tv_sec);
- DBG("Health check time delta in seconds set to %lu", time_delta.tv_sec);
+ ha->time_delta.tv_sec);
+ DBG("Health check time delta in seconds set to %lu",
+ ha->time_delta.tv_sec);
}
#define HEALTH_IS_IN_POLL(x) ((x) & HEALTH_POLL_VALUE)
+struct health_app;
+
enum health_flags {
HEALTH_ERROR = (1U << 0),
};
-enum health_type {
- HEALTH_TYPE_CMD = 0,
- HEALTH_TYPE_APP_MANAGE = 1,
- HEALTH_TYPE_APP_REG = 2,
- HEALTH_TYPE_KERNEL = 3,
- HEALTH_TYPE_CONSUMER = 4,
- HEALTH_TYPE_HT_CLEANUP = 5,
- HEALTH_TYPE_APP_MANAGE_NOTIFY = 6,
- HEALTH_TYPE_APP_REG_DISPATCH = 7,
-
- HEALTH_NUM_TYPE,
-};
-
-struct health_tls_state_list {
- struct cds_list_head head;
-};
-
struct health_state {
/*
* last counter and last_time are only read and updated by the health_check
*/
unsigned long current; /* progress counter, updated atomically */
enum health_flags flags; /* other flags, updated atomically */
- enum health_type type; /* Indicates the nature of the thread. */
+ int type; /* Indicates the nature of the thread. */
/* Node of the global TLS state list. */
struct cds_list_head node;
};
uatomic_or(&URCU_TLS(health_state).flags, HEALTH_ERROR);
}
-int health_check_state(enum health_type type);
-void health_register(enum health_type type);
-void health_unregister(void);
-void health_init(void);
+struct health_app *health_app_create(int nr_types);
+void health_app_destroy(struct health_app *ha);
+int health_check_state(struct health_app *ha, int type);
+void health_register(struct health_app *ha, int type);
+void health_unregister(struct health_app *ha);
+void health_init(struct health_app *ha);
#endif /* _HEALTH_H */
#include <common/utils.h>
#include "lttng-sessiond.h"
-#include "health.h"
+#include "health-sessiond.h"
void *thread_ht_cleanup(void *data)
{
rcu_register_thread();
rcu_thread_online();
- health_register(HEALTH_TYPE_HT_CLEANUP);
+ health_register(health_sessiond, HEALTH_TYPE_HT_CLEANUP);
health_code_update();
health_error();
ERR("Health error occurred in %s", __func__);
}
- health_unregister();
+ health_unregister(health_sessiond);
rcu_thread_offline();
rcu_unregister_thread();
return NULL;
#include <common/defaults.h>
#include "consumer.h"
-#include "health.h"
+#include "health-sessiond.h"
#include "kernel-consumer.h"
static char *create_channel_path(struct consumer_output *consumer,
#include "ust-consumer.h"
#include "utils.h"
#include "fd-limit.h"
-#include "health.h"
+#include "health-sessiond.h"
#include "testpoint.h"
#include "ust-thread.h"
/* Set in main() with the current page size. */
long page_size;
+/* Application health monitoring */
+struct health_app *health_sessiond;
+
static
void setup_consumerd_path(void)
{
DBG("[thread] Thread manage kernel started");
- health_register(HEALTH_TYPE_KERNEL);
+ health_register(health_sessiond, HEALTH_TYPE_KERNEL);
/*
* This first step of the while is to clean this structure which could free
WARN("Kernel thread died unexpectedly. "
"Kernel tracing can continue but CPU hotplug is disabled.");
}
- health_unregister();
+ health_unregister(health_sessiond);
DBG("Kernel thread dying");
return NULL;
}
DBG("[thread] Manage consumer started");
- health_register(HEALTH_TYPE_CONSUMER);
+ health_register(health_sessiond, HEALTH_TYPE_CONSUMER);
health_code_update();
health_error();
ERR("Health error occurred in %s", __func__);
}
- health_unregister();
+ health_unregister(health_sessiond);
DBG("consumer thread cleanup completed");
return NULL;
rcu_register_thread();
rcu_thread_online();
- health_register(HEALTH_TYPE_APP_MANAGE);
+ health_register(health_sessiond, HEALTH_TYPE_APP_MANAGE);
if (testpoint(thread_manage_apps)) {
goto error_testpoint;
health_error();
ERR("Health error occurred in %s", __func__);
}
- health_unregister();
+ health_unregister(health_sessiond);
DBG("Application communication apps thread cleanup complete");
rcu_thread_offline();
rcu_unregister_thread();
.count = 0,
};
- health_register(HEALTH_TYPE_APP_REG_DISPATCH);
+ health_register(health_sessiond, HEALTH_TYPE_APP_REG_DISPATCH);
health_code_update();
health_error();
ERR("Health error occurred in %s", __func__);
}
- health_unregister();
+ health_unregister(health_sessiond);
return NULL;
}
DBG("[thread] Manage application registration started");
- health_register(HEALTH_TYPE_APP_REG);
+ health_register(health_sessiond, HEALTH_TYPE_APP_REG);
if (testpoint(thread_registration_apps)) {
goto error_testpoint;
error_create_poll:
error_testpoint:
DBG("UST Registration thread cleanup complete");
- health_unregister();
+ health_unregister(health_sessiond);
return NULL;
}
{
int ret;
- ret = health_check_state(HEALTH_TYPE_CONSUMER);
+ ret = health_check_state(health_sessiond, HEALTH_TYPE_CONSUMER);
DBG3("Health consumer check %d", ret);
switch (msg.component) {
case LTTNG_HEALTH_CMD:
- reply.ret_code = health_check_state(HEALTH_TYPE_CMD);
+ reply.ret_code = health_check_state(health_sessiond, HEALTH_TYPE_CMD);
break;
case LTTNG_HEALTH_APP_MANAGE:
- reply.ret_code = health_check_state(HEALTH_TYPE_APP_MANAGE);
+ reply.ret_code = health_check_state(health_sessiond, HEALTH_TYPE_APP_MANAGE);
break;
case LTTNG_HEALTH_APP_REG:
- reply.ret_code = health_check_state(HEALTH_TYPE_APP_REG);
+ reply.ret_code = health_check_state(health_sessiond, HEALTH_TYPE_APP_REG);
break;
case LTTNG_HEALTH_KERNEL:
- reply.ret_code = health_check_state(HEALTH_TYPE_KERNEL);
+ reply.ret_code = health_check_state(health_sessiond, HEALTH_TYPE_KERNEL);
break;
case LTTNG_HEALTH_CONSUMER:
reply.ret_code = check_consumer_health();
break;
case LTTNG_HEALTH_HT_CLEANUP:
- reply.ret_code = health_check_state(HEALTH_TYPE_HT_CLEANUP);
+ reply.ret_code = health_check_state(health_sessiond, HEALTH_TYPE_HT_CLEANUP);
break;
case LTTNG_HEALTH_APP_MANAGE_NOTIFY:
- reply.ret_code = health_check_state(HEALTH_TYPE_APP_MANAGE_NOTIFY);
+ reply.ret_code = health_check_state(health_sessiond, HEALTH_TYPE_APP_MANAGE_NOTIFY);
break;
case LTTNG_HEALTH_APP_REG_DISPATCH:
- reply.ret_code = health_check_state(HEALTH_TYPE_APP_REG_DISPATCH);
+ reply.ret_code = health_check_state(health_sessiond, HEALTH_TYPE_APP_REG_DISPATCH);
break;
case LTTNG_HEALTH_ALL:
reply.ret_code =
- health_check_state(HEALTH_TYPE_APP_MANAGE) &&
- health_check_state(HEALTH_TYPE_APP_REG) &&
- health_check_state(HEALTH_TYPE_CMD) &&
- health_check_state(HEALTH_TYPE_KERNEL) &&
+ health_check_state(health_sessiond, HEALTH_TYPE_APP_MANAGE) &&
+ health_check_state(health_sessiond, HEALTH_TYPE_APP_REG) &&
+ health_check_state(health_sessiond, HEALTH_TYPE_CMD) &&
+ health_check_state(health_sessiond, HEALTH_TYPE_KERNEL) &&
check_consumer_health() &&
- health_check_state(HEALTH_TYPE_HT_CLEANUP) &&
- health_check_state(HEALTH_TYPE_APP_MANAGE_NOTIFY) &&
- health_check_state(HEALTH_TYPE_APP_REG_DISPATCH);
+ health_check_state(health_sessiond, HEALTH_TYPE_HT_CLEANUP) &&
+ health_check_state(health_sessiond, HEALTH_TYPE_APP_MANAGE_NOTIFY) &&
+ health_check_state(health_sessiond, HEALTH_TYPE_APP_REG_DISPATCH);
break;
default:
reply.ret_code = LTTNG_ERR_UND;
rcu_register_thread();
- health_register(HEALTH_TYPE_CMD);
+ health_register(health_sessiond, HEALTH_TYPE_CMD);
if (testpoint(thread_manage_clients)) {
goto error_testpoint;
ERR("Health error occurred in %s", __func__);
}
- health_unregister();
+ health_unregister(health_sessiond);
DBG("Client thread dying");
* Initialize the health check subsystem. This call should set the
* appropriate time values.
*/
- health_init();
+ health_sessiond = health_app_create(HEALTH_NUM_TYPE);
+ if (!health_sessiond) {
+ PERROR("health_app_create error");
+ goto exit_health_sessiond_cleanup;
+ }
+ health_init(health_sessiond);
/* Create thread to manage the client socket */
ret = pthread_create(&ht_cleanup_thread, NULL,
goto error; /* join error, exit without cleanup */
}
exit_ht_cleanup:
+ health_app_destroy(health_sessiond);
+exit_health_sessiond_cleanup:
exit:
/*
* cleanup() is called when no other thread is running.
#include "buffer-registry.h"
#include "fd-limit.h"
-#include "health.h"
+#include "health-sessiond.h"
#include "ust-app.h"
#include "ust-consumer.h"
#include "ust-ctl.h"
#include <common/defaults.h>
#include "consumer.h"
-#include "health.h"
+#include "health-sessiond.h"
#include "ust-consumer.h"
#include "buffer-registry.h"
#include "session.h"
#include "fd-limit.h"
#include "lttng-sessiond.h"
#include "ust-thread.h"
-#include "health.h"
+#include "health-sessiond.h"
/*
* This thread manage application notify communication.
rcu_register_thread();
rcu_thread_online();
- health_register(HEALTH_TYPE_APP_MANAGE_NOTIFY);
+ health_register(health_sessiond, HEALTH_TYPE_APP_MANAGE_NOTIFY);
health_code_update();
health_error();
ERR("Health error occurred in %s", __func__);
}
- health_unregister();
+ health_unregister(health_sessiond);
rcu_thread_offline();
rcu_unregister_thread();
return NULL;
SESSIONS=$(top_builddir)/src/bin/lttng-sessiond/session.o \
$(top_builddir)/src/bin/lttng-sessiond/consumer.o \
$(top_builddir)/src/bin/lttng-sessiond/utils.o \
- $(top_builddir)/src/bin/lttng-sessiond/health.o \
$(top_builddir)/src/bin/lttng-sessiond/snapshot.o \
$(top_builddir)/src/common/.libs/uri.o \
$(top_builddir)/src/common/.libs/utils.o \
- $(top_builddir)/src/common/.libs/error.o
+ $(top_builddir)/src/common/.libs/error.o \
+ $(top_builddir)/src/common/health/libhealth.la \
+ $(top_builddir)/src/common/sessiond-comm/libsessiond-comm.la
+
test_session_SOURCES = test_session.c
test_session_LDADD = $(LIBTAP) $(LIBCOMMON) $(LIBRELAYD) $(LIBSESSIOND_COMM) \
$(top_builddir)/src/bin/lttng-sessiond/ust-app.o \
$(top_builddir)/src/bin/lttng-sessiond/ust-consumer.o \
$(top_builddir)/src/bin/lttng-sessiond/fd-limit.o \
- $(top_builddir)/src/bin/lttng-sessiond/health.o \
$(top_builddir)/src/bin/lttng-sessiond/session.o \
$(top_builddir)/src/bin/lttng-sessiond/snapshot.o \
$(top_builddir)/src/common/.libs/uri.o \
- $(top_builddir)/src/common/.libs/utils.o
+ $(top_builddir)/src/common/.libs/utils.o \
+ $(top_builddir)/src/common/health/libhealth.la \
+ $(top_builddir)/src/common/sessiond-comm/libsessiond-comm.la
test_ust_data_SOURCES = test_ust_data.c
test_ust_data_LDADD = $(LIBTAP) $(LIBCOMMON) $(LIBRELAYD) $(LIBSESSIOND_COMM)\
# Kernel data structures unit test
KERN_DATA_TRACE=$(top_builddir)/src/bin/lttng-sessiond/trace-kernel.o \
$(top_builddir)/src/bin/lttng-sessiond/consumer.o \
- $(top_builddir)/src/bin/lttng-sessiond/health.o \
$(top_builddir)/src/bin/lttng-sessiond/utils.o \
$(top_builddir)/src/common/.libs/uri.o \
- $(top_builddir)/src/common/.libs/utils.o
+ $(top_builddir)/src/common/.libs/utils.o \
+ $(top_builddir)/src/common/health/libhealth.la \
+ $(top_builddir)/src/common/sessiond-comm/libsessiond-comm.la
test_kernel_data_SOURCES = test_kernel_data.c
test_kernel_data_LDADD = $(LIBTAP) $(LIBCOMMON) $(LIBRELAYD) $(LIBSESSIOND_COMM) \