#include <urcu/uatomic.h>
#include <urcu/list.h>
#include <lttng/health.h>
+#include <common/macros.h>
/*
* These are the value added to the current state depending of the position in
};
struct health_comm_msg {
- uint32_t component;
uint32_t cmd; /* enum health_cmd */
} LTTNG_PACKED;
struct health_comm_reply {
- uint32_t ret_code;
+ uint64_t ret_code; /* bitmask of threads in bad health */
} LTTNG_PACKED;
-/*
- * Status returned to lttng clients.
- */
-struct lttng_health_status {
- uint64_t error_threads_bitmask;
-};
-
/* Declare TLS health state. */
extern DECLARE_URCU_TLS(struct health_state, health_state);
LTTNG_HEALTH_CONSUMERD_UST_32,
LTTNG_HEALTH_CONSUMERD_UST_64,
LTTNG_HEALTH_CONSUMERD_KERNEL,
+
+ NR_LTTNG_HEALTH_CONSUMERD,
};
/**
*/
const struct lttng_health_thread *
lttng_health_get_thread(const struct lttng_health *health,
- int nth_thread);
+ unsigned int nth_thread);
/**
* lttng_health_thread_state - Get thread health state
#include <config.h>
#include <urcu/compiler.h>
#include <ulimit.h>
+#include <inttypes.h>
#include <common/defaults.h>
#include <common/common.h>
struct lttng_poll_event events;
struct health_comm_msg msg;
struct health_comm_reply reply;
+ int is_root;
DBG("[thread] Manage health check started");
goto error;
}
+ is_root = !getuid();
+ if (is_root) {
+ /* lttng health client socket path permissions */
+ ret = chown(health_unix_sock_path, 0,
+ utils_get_group_id(tracing_group_name));
+ if (ret < 0) {
+ ERR("Unable to set group on %s", health_unix_sock_path);
+ PERROR("chown");
+ ret = -1;
+ goto error;
+ }
+
+ ret = chmod(health_unix_sock_path,
+ S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP);
+ if (ret < 0) {
+ ERR("Unable to set permissions on %s", health_unix_sock_path);
+ PERROR("chmod");
+ ret = -1;
+ goto error;
+ }
+ }
+
/*
* Set the CLOEXEC flag. Return code is useless because either way, the
* show must go on.
assert(msg.cmd == HEALTH_CMD_CHECK);
- switch (msg.component) {
- case LTTNG_HEALTH_CONSUMERD_CHANNEL:
- reply.ret_code = health_check_state(health_consumerd, HEALTH_CONSUMERD_TYPE_CHANNEL);
- break;
- case LTTNG_HEALTH_CONSUMERD_METADATA:
- reply.ret_code = health_check_state(health_consumerd, HEALTH_CONSUMERD_TYPE_METADATA);
- break;
- case LTTNG_HEALTH_CONSUMERD_DATA:
- reply.ret_code = health_check_state(health_consumerd, HEALTH_CONSUMERD_TYPE_DATA);
- break;
- case LTTNG_HEALTH_CONSUMERD_SESSIOND:
- reply.ret_code = health_check_state(health_consumerd, HEALTH_CONSUMERD_TYPE_SESSIOND);
- break;
- case LTTNG_HEALTH_CONSUMERD_METADATA_TIMER:
- reply.ret_code = health_check_state(health_consumerd, HEALTH_CONSUMERD_TYPE_METADATA_TIMER);
- break;
-
- case LTTNG_HEALTH_CONSUMERD_ALL:
- reply.ret_code =
- health_check_state(health_consumerd, HEALTH_CONSUMERD_TYPE_CHANNEL) &&
- health_check_state(health_consumerd, HEALTH_CONSUMERD_TYPE_METADATA) &&
- health_check_state(health_consumerd, HEALTH_CONSUMERD_TYPE_DATA) &&
- health_check_state(health_consumerd, HEALTH_CONSUMERD_TYPE_SESSIOND) &&
- health_check_state(health_consumerd, HEALTH_CONSUMERD_TYPE_METADATA_TIMER);
- break;
- default:
- reply.ret_code = LTTNG_ERR_UND;
- break;
- }
-
- /*
- * Flip ret value since 0 is a success and 1 indicates a bad health for
- * the client where in the sessiond it is the opposite. Again, this is
- * just to make things easier for us poor developer which enjoy a lot
- * lazyness.
- */
- if (reply.ret_code == 0 || reply.ret_code == 1) {
- reply.ret_code = !reply.ret_code;
+ reply.ret_code = 0;
+ for (i = 0; i < NR_HEALTH_CONSUMERD_TYPES; i++) {
+ /*
+ * health_check_state return 0 if thread is in
+ * error.
+ */
+ if (!health_check_state(health_consumerd, i)) {
+ reply.ret_code |= 1ULL << i;
+ }
}
- DBG2("Health check return value %d", reply.ret_code);
+ DBG2("Health check return value %" PRIx64, reply.ret_code);
ret = send_unix_sock(new_sock, (void *) &reply, sizeof(reply));
if (ret < 0) {
#include <lttng/health-internal.h>
-enum health_type {
+enum health_type_consumerd {
HEALTH_CONSUMERD_TYPE_CHANNEL = 0,
HEALTH_CONSUMERD_TYPE_METADATA = 1,
HEALTH_CONSUMERD_TYPE_DATA = 2,
/* Consumerd health monitoring */
struct health_app *health_consumerd;
+const char *tracing_group_name = DEFAULT_TRACING_GROUP;
+
enum lttng_consumer_type lttng_consumer_get_type(void)
{
if (!ctx) {
fprintf(fp, "Usage: %s OPTIONS\n\nOptions:\n", progname);
fprintf(fp, " -h, --help "
"Display this usage.\n");
- fprintf(fp, " -c, --consumerd-cmd-sock PATH "
+ fprintf(fp, " -c, --consumerd-cmd-sock PATH "
"Specify path for the command socket\n");
- fprintf(fp, " -e, --consumerd-err-sock PATH "
+ fprintf(fp, " -e, --consumerd-err-sock PATH "
"Specify path for the error socket\n");
fprintf(fp, " -d, --daemonize "
"Start as a daemon.\n");
"Verbose mode. Activate DBG() macro.\n");
fprintf(fp, " -V, --version "
"Show version number.\n");
+ fprintf(fp, " -g, --group NAME "
+ "Specify the tracing group name. (default: tracing)\n");
fprintf(fp, " -k, --kernel "
"Consumer kernel buffers (default).\n");
fprintf(fp, " -u, --ust "
{ "consumerd-cmd-sock", 1, 0, 'c' },
{ "consumerd-err-sock", 1, 0, 'e' },
{ "daemonize", 0, 0, 'd' },
+ { "group", 1, 0, 'g' },
{ "help", 0, 0, 'h' },
{ "quiet", 0, 0, 'q' },
{ "verbose", 0, 0, 'v' },
while (1) {
int option_index = 0;
- c = getopt_long(argc, argv, "dhqvVku" "c:e:", long_options, &option_index);
+ c = getopt_long(argc, argv, "dhqvVku" "c:e:g:", long_options, &option_index);
if (c == -1) {
break;
}
case 'd':
opt_daemon = 1;
break;
+ case 'g':
+ tracing_group_name = optarg;
+ break;
case 'h':
usage(stdout);
exit(EXIT_SUCCESS);
#ifndef _LTTNG_CONSUMERD_H
#define _LTTNG_CONSUMERD_H
+const char *tracing_group_name;
+
enum lttng_consumer_type lttng_consumer_get_type(void);
#endif /* _LTTNG_CONSUMERD_H */
#include <lttng/health-internal.h>
-enum health_type {
+enum health_type_relayd {
HEALTH_RELAYD_TYPE_DISPATCHER = 0,
HEALTH_RELAYD_TYPE_WORKER = 1,
HEALTH_RELAYD_TYPE_LISTENER = 2,
#include <lttng/health-internal.h>
-enum health_type {
- HEALTH_TYPE_CMD = 0,
- HEALTH_TYPE_APP_MANAGE = 1,
- HEALTH_TYPE_APP_REG = 2,
- HEALTH_TYPE_KERNEL = 3,
- HEALTH_TYPE_CONSUMER = 4,
- HEALTH_TYPE_HT_CLEANUP = 5,
- HEALTH_TYPE_APP_MANAGE_NOTIFY = 6,
- HEALTH_TYPE_APP_REG_DISPATCH = 7,
+enum health_type_sessiond {
+ HEALTH_SESSIOND_TYPE_CMD = 0,
+ HEALTH_SESSIOND_TYPE_APP_MANAGE = 1,
+ HEALTH_SESSIOND_TYPE_APP_REG = 2,
+ HEALTH_SESSIOND_TYPE_KERNEL = 3,
+ HEALTH_SESSIOND_TYPE_CONSUMER = 4,
+ HEALTH_SESSIOND_TYPE_HT_CLEANUP = 5,
+ HEALTH_SESSIOND_TYPE_APP_MANAGE_NOTIFY = 6,
+ HEALTH_SESSIOND_TYPE_APP_REG_DISPATCH = 7,
- HEALTH_NUM_TYPE,
+ NR_HEALTH_SESSIOND_TYPES,
};
/* Application health monitoring */
rcu_register_thread();
rcu_thread_online();
- health_register(health_sessiond, HEALTH_TYPE_HT_CLEANUP);
+ health_register(health_sessiond, HEALTH_SESSIOND_TYPE_HT_CLEANUP);
health_code_update();
#define CONSUMERD_FILE "lttng-consumerd"
-/* Const values */
-const char default_tracing_group[] = DEFAULT_TRACING_GROUP;
-
const char *progname;
-const char *opt_tracing_group;
+static const char *tracing_group_name = DEFAULT_TRACING_GROUP;
static const char *opt_pidfile;
static int opt_sig_parent;
static int opt_verbose_consumer;
return 0;
}
-/*
- * Return group ID of the tracing group or -1 if not found.
- */
-static gid_t allowed_group(void)
-{
- struct group *grp;
-
- if (opt_tracing_group) {
- grp = getgrnam(opt_tracing_group);
- } else {
- grp = getgrnam(default_tracing_group);
- }
- if (!grp) {
- return -1;
- } else {
- return grp->gr_gid;
- }
-}
-
/*
* Init thread quit pipe.
*
DBG("[thread] Thread manage kernel started");
- health_register(health_sessiond, HEALTH_TYPE_KERNEL);
+ health_register(health_sessiond, HEALTH_SESSIOND_TYPE_KERNEL);
/*
* This first step of the while is to clean this structure which could free
DBG("[thread] Manage consumer started");
- health_register(health_sessiond, HEALTH_TYPE_CONSUMER);
+ health_register(health_sessiond, HEALTH_SESSIOND_TYPE_CONSUMER);
health_code_update();
rcu_register_thread();
rcu_thread_online();
- health_register(health_sessiond, HEALTH_TYPE_APP_MANAGE);
+ health_register(health_sessiond, HEALTH_SESSIOND_TYPE_APP_MANAGE);
if (testpoint(thread_manage_apps)) {
goto error_testpoint;
.count = 0,
};
- health_register(health_sessiond, HEALTH_TYPE_APP_REG_DISPATCH);
+ health_register(health_sessiond, HEALTH_SESSIOND_TYPE_APP_REG_DISPATCH);
health_code_update();
DBG("[thread] Manage application registration started");
- health_register(health_sessiond, HEALTH_TYPE_APP_REG);
+ health_register(health_sessiond, HEALTH_SESSIOND_TYPE_APP_REG);
if (testpoint(thread_registration_apps)) {
goto error_testpoint;
"lttng-consumerd", verbosity, "-k",
"--consumerd-cmd-sock", consumer_data->cmd_unix_sock_path,
"--consumerd-err-sock", consumer_data->err_unix_sock_path,
+ "--group", tracing_group_name,
NULL);
break;
case LTTNG_CONSUMER64_UST:
ret = execl(consumerd64_bin, "lttng-consumerd", verbosity, "-u",
"--consumerd-cmd-sock", consumer_data->cmd_unix_sock_path,
"--consumerd-err-sock", consumer_data->err_unix_sock_path,
+ "--group", tracing_group_name,
NULL);
if (consumerd64_libdir[0] != '\0') {
free(tmpnew);
ret = execl(consumerd32_bin, "lttng-consumerd", verbosity, "-u",
"--consumerd-cmd-sock", consumer_data->cmd_unix_sock_path,
"--consumerd-err-sock", consumer_data->err_unix_sock_path,
+ "--group", tracing_group_name,
NULL);
if (consumerd32_libdir[0] != '\0') {
free(tmpnew);
return ret;
}
-/*
- * Compute health status of each consumer. If one of them is zero (bad
- * state), we return 0.
- */
-static int check_consumer_health(void)
-{
- int ret;
-
- ret = health_check_state(health_sessiond, HEALTH_TYPE_CONSUMER);
-
- DBG3("Health consumer check %d", ret);
-
- return ret;
-}
-
/*
* Setup necessary data for kernel tracer action.
*/
goto error;
}
+ if (is_root) {
+ /* lttng health client socket path permissions */
+ ret = chown(health_unix_sock_path, 0,
+ utils_get_group_id(tracing_group_name));
+ if (ret < 0) {
+ ERR("Unable to set group on %s", health_unix_sock_path);
+ PERROR("chown");
+ ret = -1;
+ goto error;
+ }
+
+ ret = chmod(health_unix_sock_path,
+ S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP);
+ if (ret < 0) {
+ ERR("Unable to set permissions on %s", health_unix_sock_path);
+ PERROR("chmod");
+ ret = -1;
+ goto error;
+ }
+ }
+
/*
* Set the CLOEXEC flag. Return code is useless because either way, the
* show must go on.
rcu_thread_online();
- switch (msg.component) {
- case LTTNG_HEALTH_CMD:
- reply.ret_code = health_check_state(health_sessiond, HEALTH_TYPE_CMD);
- break;
- case LTTNG_HEALTH_APP_MANAGE:
- reply.ret_code = health_check_state(health_sessiond, HEALTH_TYPE_APP_MANAGE);
- break;
- case LTTNG_HEALTH_APP_REG:
- reply.ret_code = health_check_state(health_sessiond, HEALTH_TYPE_APP_REG);
- break;
- case LTTNG_HEALTH_KERNEL:
- reply.ret_code = health_check_state(health_sessiond, HEALTH_TYPE_KERNEL);
- break;
- case LTTNG_HEALTH_CONSUMER:
- reply.ret_code = check_consumer_health();
- break;
- case LTTNG_HEALTH_HT_CLEANUP:
- reply.ret_code = health_check_state(health_sessiond, HEALTH_TYPE_HT_CLEANUP);
- break;
- case LTTNG_HEALTH_APP_MANAGE_NOTIFY:
- reply.ret_code = health_check_state(health_sessiond, HEALTH_TYPE_APP_MANAGE_NOTIFY);
- break;
- case LTTNG_HEALTH_APP_REG_DISPATCH:
- reply.ret_code = health_check_state(health_sessiond, HEALTH_TYPE_APP_REG_DISPATCH);
- break;
- case LTTNG_HEALTH_ALL:
- reply.ret_code =
- health_check_state(health_sessiond, HEALTH_TYPE_APP_MANAGE) &&
- health_check_state(health_sessiond, HEALTH_TYPE_APP_REG) &&
- health_check_state(health_sessiond, HEALTH_TYPE_CMD) &&
- health_check_state(health_sessiond, HEALTH_TYPE_KERNEL) &&
- check_consumer_health() &&
- health_check_state(health_sessiond, HEALTH_TYPE_HT_CLEANUP) &&
- health_check_state(health_sessiond, HEALTH_TYPE_APP_MANAGE_NOTIFY) &&
- health_check_state(health_sessiond, HEALTH_TYPE_APP_REG_DISPATCH);
- break;
- default:
- reply.ret_code = LTTNG_ERR_UND;
- break;
- }
-
- /*
- * Flip ret value since 0 is a success and 1 indicates a bad health for
- * the client where in the sessiond it is the opposite. Again, this is
- * just to make things easier for us poor developer which enjoy a lot
- * lazyness.
- */
- if (reply.ret_code == 0 || reply.ret_code == 1) {
- reply.ret_code = !reply.ret_code;
+ reply.ret_code = 0;
+ for (i = 0; i < NR_HEALTH_SESSIOND_TYPES; i++) {
+ /*
+ * health_check_state returns 0 if health is
+ * bad.
+ */
+ if (!health_check_state(health_sessiond, i)) {
+ reply.ret_code |= 1ULL << i;
+ }
}
- DBG2("Health check return value %d", reply.ret_code);
+ DBG2("Health check return value %" PRIx64, reply.ret_code);
ret = send_unix_sock(new_sock, (void *) &reply, sizeof(reply));
if (ret < 0) {
rcu_register_thread();
- health_register(health_sessiond, HEALTH_TYPE_CMD);
+ health_register(health_sessiond, HEALTH_SESSIOND_TYPE_CMD);
if (testpoint(thread_manage_clients)) {
goto error_testpoint;
opt_daemon = 1;
break;
case 'g':
- opt_tracing_group = optarg;
+ tracing_group_name = optarg;
break;
case 'h':
usage();
int ret;
gid_t gid;
- ret = allowed_group();
- if (ret < 0) {
- WARN("No tracing group detected");
- /* Setting gid to 0 if no tracing group is found */
- gid = 0;
- } else {
- gid = ret;
- }
+ gid = utils_get_group_id(tracing_group_name);
/* Set lttng run dir */
ret = chown(rundir, 0, gid);
PERROR("chown");
}
- /* Ensure all applications and tracing group can search the run dir */
- ret = chmod(rundir, S_IRWXU | S_IXGRP | S_IXOTH);
+ /*
+ * Ensure all applications and tracing group can search the run
+ * dir. Allow everyone to read the directory, since it does not
+ * buy us anything to hide its content.
+ */
+ ret = chmod(rundir, S_IRWXU | S_IRGRP | S_IXGRP | S_IROTH | S_IXOTH);
if (ret < 0) {
ERR("Unable to set permissions on %s", rundir);
PERROR("chmod");
}
/* kconsumer error socket path */
- ret = chown(kconsumer_data.err_unix_sock_path, 0, gid);
+ ret = chown(kconsumer_data.err_unix_sock_path, 0, 0);
if (ret < 0) {
ERR("Unable to set group on %s", kconsumer_data.err_unix_sock_path);
PERROR("chown");
}
/* 64-bit ustconsumer error socket path */
- ret = chown(ustconsumer64_data.err_unix_sock_path, 0, gid);
+ ret = chown(ustconsumer64_data.err_unix_sock_path, 0, 0);
if (ret < 0) {
ERR("Unable to set group on %s", ustconsumer64_data.err_unix_sock_path);
PERROR("chown");
}
/* 32-bit ustconsumer compat32 error socket path */
- ret = chown(ustconsumer32_data.err_unix_sock_path, 0, gid);
+ ret = chown(ustconsumer32_data.err_unix_sock_path, 0, 0);
if (ret < 0) {
ERR("Unable to set group on %s", ustconsumer32_data.err_unix_sock_path);
PERROR("chown");
int ret;
char path[PATH_MAX];
- switch (consumer_data->type) {
+ switch (consumer_data->type) {
case LTTNG_CONSUMER_KERNEL:
snprintf(path, PATH_MAX, DEFAULT_KCONSUMERD_PATH, rundir);
break;
DBG2("Creating consumer directory: %s", path);
- ret = mkdir(path, S_IRWXU);
+ ret = mkdir(path, S_IRWXU | S_IRGRP | S_IXGRP);
if (ret < 0) {
if (errno != EEXIST) {
PERROR("mkdir");
}
ret = -1;
}
+ if (is_root) {
+ ret = chown(path, 0, utils_get_group_id(tracing_group_name));
+ if (ret < 0) {
+ ERR("Unable to set group on %s", path);
+ PERROR("chown");
+ goto error;
+ }
+ }
/* Create the kconsumerd error unix socket */
consumer_data->err_sock =
* Initialize the health check subsystem. This call should set the
* appropriate time values.
*/
- health_sessiond = health_app_create(HEALTH_NUM_TYPE);
+ health_sessiond = health_app_create(NR_HEALTH_SESSIOND_TYPES);
if (!health_sessiond) {
PERROR("health_app_create error");
goto exit_health_sessiond_cleanup;
rcu_register_thread();
rcu_thread_online();
- health_register(health_sessiond, HEALTH_TYPE_APP_MANAGE_NOTIFY);
+ health_register(health_sessiond,
+ HEALTH_SESSIOND_TYPE_APP_MANAGE_NOTIFY);
health_code_update();
/* Default unix socket path */
#define DEFAULT_GLOBAL_CLIENT_UNIX_SOCK DEFAULT_LTTNG_RUNDIR "/client-lttng-sessiond"
#define DEFAULT_HOME_CLIENT_UNIX_SOCK DEFAULT_LTTNG_HOME_RUNDIR "/client-lttng-sessiond"
-#define DEFAULT_GLOBAL_HEALTH_UNIX_SOCK DEFAULT_LTTNG_RUNDIR "/health.sock"
-#define DEFAULT_HOME_HEALTH_UNIX_SOCK DEFAULT_LTTNG_HOME_RUNDIR "/health.sock"
+#define DEFAULT_GLOBAL_HEALTH_UNIX_SOCK DEFAULT_LTTNG_RUNDIR "/sessiond-health"
+#define DEFAULT_HOME_HEALTH_UNIX_SOCK DEFAULT_LTTNG_HOME_RUNDIR "/sessiond-health"
/* Default consumer health unix socket path */
-#define DEFAULT_GLOBAL_USTCONSUMER32_HEALTH_UNIX_SOCK DEFAULT_LTTNG_RUNDIR "/health.ustconsumer32.sock"
-#define DEFAULT_HOME_USTCONSUMER32_HEALTH_UNIX_SOCK DEFAULT_LTTNG_HOME_RUNDIR "/health.ustconsumer32.sock"
-#define DEFAULT_GLOBAL_USTCONSUMER64_HEALTH_UNIX_SOCK DEFAULT_LTTNG_RUNDIR "/health.ustconsumer64.sock"
-#define DEFAULT_HOME_USTCONSUMER64_HEALTH_UNIX_SOCK DEFAULT_LTTNG_HOME_RUNDIR "/health.ustconsumer64.sock"
-#define DEFAULT_GLOBAL_KCONSUMER_HEALTH_UNIX_SOCK DEFAULT_LTTNG_RUNDIR "/health.kconsumer.sock"
-#define DEFAULT_HOME_KCONSUMER_HEALTH_UNIX_SOCK DEFAULT_LTTNG_HOME_RUNDIR "/health.kconsumer.sock"
+#define DEFAULT_GLOBAL_USTCONSUMER32_HEALTH_UNIX_SOCK DEFAULT_LTTNG_RUNDIR "/ustconsumerd32/health"
+#define DEFAULT_HOME_USTCONSUMER32_HEALTH_UNIX_SOCK DEFAULT_LTTNG_HOME_RUNDIR "/ustconsumerd32/health"
+#define DEFAULT_GLOBAL_USTCONSUMER64_HEALTH_UNIX_SOCK DEFAULT_LTTNG_RUNDIR "/ustconsumerd64/health"
+#define DEFAULT_HOME_USTCONSUMER64_HEALTH_UNIX_SOCK DEFAULT_LTTNG_HOME_RUNDIR "/ustconsumerd64/health"
+#define DEFAULT_GLOBAL_KCONSUMER_HEALTH_UNIX_SOCK DEFAULT_LTTNG_RUNDIR "/kconsumerd/health"
+#define DEFAULT_HOME_KCONSUMER_HEALTH_UNIX_SOCK DEFAULT_LTTNG_HOME_RUNDIR "/kconsumerd/health"
#define DEFAULT_GLOBAL_APPS_UNIX_SOCK \
DEFAULT_LTTNG_RUNDIR "/" LTTNG_UST_SOCK_FILENAME
if (!ha) {
return NULL;
}
- ha->flags = zmalloc(sizeof(*ha->flags));
+ ha->flags = zmalloc(sizeof(*ha->flags) * nr_types);
if (!ha->flags) {
goto error_flags;
}
#include <unistd.h>
#include <inttypes.h>
#include <regex.h>
+#include <grp.h>
#include <common/common.h>
#include <common/runas.h>
return ret;
}
+
+/*
+ * Return the group ID matching name, else 0 if it cannot be found.
+ */
+LTTNG_HIDDEN
+gid_t utils_get_group_id(const char *name)
+{
+ struct group *grp;
+
+ grp = getgrnam(name);
+ if (!grp) {
+ static volatile int warn_once;
+
+ if (!warn_once) {
+ WARN("No tracing group detected");
+ warn_once = 1;
+ }
+ return 0;
+ }
+ return grp->gr_gid;
+}
int utils_get_count_order_u32(uint32_t x);
char *utils_get_home_dir(void);
size_t utils_get_current_time_str(const char *format, char *dst, size_t len);
+gid_t utils_get_group_id(const char *name);
#endif /* _COMMON_UTILS_H */
lib_LTLIBRARIES = liblttng-ctl.la
-liblttng_ctl_la_SOURCES = lttng-ctl.c snapshot.c lttng-ctl-helper.h
+liblttng_ctl_la_SOURCES = lttng-ctl.c snapshot.c lttng-ctl-helper.h \
+ lttng-ctl-health.c
liblttng_ctl_la_LIBADD = \
$(top_builddir)/src/common/sessiond-comm/libsessiond-comm.la \
--- /dev/null
+/*
+ * lttng-ctl-health.c
+ *
+ * Linux Trace Toolkit Health Control Library
+ *
+ * Copyright (C) 2011 David Goulet <david.goulet@polymtl.ca>
+ * Copyright (C) 2013 Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
+ *
+ * This library is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU Lesser General Public License, version 2.1 only,
+ * as published by the Free Software Foundation.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this library; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include <unistd.h>
+#include <sys/types.h>
+#include <stdint.h>
+#include <limits.h>
+#include <errno.h>
+#include <lttng/health-internal.h>
+
+#include <bin/lttng-sessiond/health-sessiond.h>
+#include <bin/lttng-consumerd/health-consumerd.h>
+#include <bin/lttng-relayd/health-relayd.h>
+#include <common/defaults.h>
+#include <common/utils.h>
+
+#include "lttng-ctl-helper.h"
+
+enum health_component {
+ HEALTH_COMPONENT_SESSIOND,
+ HEALTH_COMPONENT_CONSUMERD,
+ HEALTH_COMPONENT_RELAYD,
+
+ NR_HEALTH_COMPONENT,
+};
+
+struct lttng_health_thread {
+ struct lttng_health *p;
+ int state;
+};
+
+struct lttng_health {
+ enum health_component component;
+ uint64_t state;
+ unsigned int nr_threads;
+ char health_sock_path[PATH_MAX];
+ /* For consumer health only */
+ enum lttng_health_consumerd consumerd_type;
+ struct lttng_health_thread thread[];
+};
+
+static
+const char *sessiond_thread_name[NR_HEALTH_SESSIOND_TYPES] = {
+ [ HEALTH_SESSIOND_TYPE_CMD ] = "Session daemon command",
+ [ HEALTH_SESSIOND_TYPE_APP_MANAGE ] = "Session daemon application manager",
+ [ HEALTH_SESSIOND_TYPE_APP_REG ] = "Session daemon application registration",
+ [ HEALTH_SESSIOND_TYPE_KERNEL ] = "Session daemon kernel",
+ [ HEALTH_SESSIOND_TYPE_CONSUMER ] = "Session daemon consumer manager",
+ [ HEALTH_SESSIOND_TYPE_HT_CLEANUP ] = "Session daemon hash table cleanup",
+ [ HEALTH_SESSIOND_TYPE_APP_MANAGE_NOTIFY ] = "Session daemon application notification manager",
+ [ HEALTH_SESSIOND_TYPE_APP_REG_DISPATCH ] = "Session daemon application registration dispatcher",
+};
+
+static
+const char *consumerd_thread_name[NR_HEALTH_CONSUMERD_TYPES] = {
+ [ HEALTH_CONSUMERD_TYPE_CHANNEL ] = "Consumer daemon channel",
+ [ HEALTH_CONSUMERD_TYPE_METADATA ] = "Consumer daemon metadata",
+ [ HEALTH_CONSUMERD_TYPE_DATA ] = "Consumer daemon data",
+ [ HEALTH_CONSUMERD_TYPE_SESSIOND ] = "Consumer daemon session daemon command manager",
+ [ HEALTH_CONSUMERD_TYPE_METADATA_TIMER ] = "Consumer daemon metadata timer",
+};
+
+static
+const char *relayd_thread_name[NR_HEALTH_RELAYD_TYPES] = {
+ [ HEALTH_RELAYD_TYPE_DISPATCHER ] = "Relay daemon dispatcher",
+ [ HEALTH_RELAYD_TYPE_WORKER ] = "Relay daemon worker",
+ [ HEALTH_RELAYD_TYPE_LISTENER ] = "Relay daemon listener",
+};
+
+static
+const char **thread_name[NR_HEALTH_COMPONENT] = {
+ [ HEALTH_COMPONENT_SESSIOND ] = sessiond_thread_name,
+ [ HEALTH_COMPONENT_CONSUMERD] = consumerd_thread_name,
+ [ HEALTH_COMPONENT_RELAYD ] = relayd_thread_name,
+};
+
+/*
+ * Set health socket path.
+ *
+ * Returns 0 on success or -ENOMEM.
+ */
+static
+int set_health_socket_path(struct lttng_health *lh,
+ int tracing_group)
+{
+ uid_t uid;
+ const char *home;
+ int ret;
+ /* Global and home format strings */
+ const char *global_str, *home_str;
+
+ switch (lh->component) {
+ case HEALTH_COMPONENT_SESSIOND:
+ global_str = DEFAULT_GLOBAL_HEALTH_UNIX_SOCK;
+ home_str = DEFAULT_HOME_HEALTH_UNIX_SOCK;
+ break;
+ case HEALTH_COMPONENT_CONSUMERD:
+ switch (lh->consumerd_type) {
+ case LTTNG_HEALTH_CONSUMERD_UST_32:
+ global_str = DEFAULT_GLOBAL_USTCONSUMER32_HEALTH_UNIX_SOCK;
+ home_str = DEFAULT_HOME_USTCONSUMER32_HEALTH_UNIX_SOCK;
+ break;
+ case LTTNG_HEALTH_CONSUMERD_UST_64:
+ global_str = DEFAULT_GLOBAL_USTCONSUMER64_HEALTH_UNIX_SOCK;
+ home_str = DEFAULT_HOME_USTCONSUMER64_HEALTH_UNIX_SOCK;
+ break;
+ case LTTNG_HEALTH_CONSUMERD_KERNEL:
+ global_str = DEFAULT_GLOBAL_KCONSUMER_HEALTH_UNIX_SOCK;
+ home_str = DEFAULT_HOME_KCONSUMER_HEALTH_UNIX_SOCK;
+ break;
+ default:
+ return -EINVAL;
+ }
+ break;
+ case HEALTH_COMPONENT_RELAYD:
+ if (lh->health_sock_path[0] == '\0') {
+ return -EINVAL;
+ } else {
+ return 0;
+ }
+ break; /* Unreached */
+ default:
+ return -EINVAL;
+ }
+
+ uid = getuid();
+
+ if (uid == 0 || tracing_group) {
+ lttng_ctl_copy_string(lh->health_sock_path,
+ global_str,
+ sizeof(lh->health_sock_path));
+ return 0;
+ }
+
+ /*
+ * With GNU C < 2.1, snprintf returns -1 if the target buffer
+ * is too small; With GNU C >= 2.1, snprintf returns the
+ * required size (excluding closing null).
+ */
+ home = utils_get_home_dir();
+ if (home == NULL) {
+ /* Fallback in /tmp */
+ home = "/tmp";
+ }
+
+ ret = snprintf(lh->health_sock_path, sizeof(lh->health_sock_path),
+ home_str, home);
+ if ((ret < 0) || (ret >= sizeof(lh->health_sock_path))) {
+ return -ENOMEM;
+ }
+
+ return 0;
+}
+
+static
+struct lttng_health *lttng_health_create(enum health_component hc,
+ unsigned int nr_threads)
+{
+ struct lttng_health *lh;
+ int i;
+
+ lh = zmalloc(sizeof(*lh) + sizeof(lh->thread[0]) * nr_threads);
+ if (!lh) {
+ return NULL;
+ }
+
+ lh->component = hc;
+ lh->state = UINT64_MAX; /* All bits in error initially */
+ lh->nr_threads = nr_threads;
+ for (i = 0; i < nr_threads; i++) {
+ lh->thread[i].p = lh;
+ }
+ return lh;
+}
+
+struct lttng_health *lttng_health_create_sessiond(void)
+{
+ struct lttng_health *lh;
+
+ lh = lttng_health_create(HEALTH_COMPONENT_SESSIOND,
+ NR_HEALTH_SESSIOND_TYPES);
+ if (!lh) {
+ return NULL;
+ }
+ return lh;
+}
+
+struct lttng_health *
+ lttng_health_create_consumerd(enum lttng_health_consumerd consumerd)
+{
+ struct lttng_health *lh;
+
+ lh = lttng_health_create(HEALTH_COMPONENT_CONSUMERD,
+ NR_HEALTH_CONSUMERD_TYPES);
+ if (!lh) {
+ return NULL;
+ }
+ lh->consumerd_type = consumerd;
+ return lh;
+}
+
+struct lttng_health *lttng_health_create_relayd(const char *path)
+{
+ struct lttng_health *lh;
+
+ if (!path) {
+ return NULL;
+ }
+
+ lh = lttng_health_create(HEALTH_COMPONENT_RELAYD,
+ NR_HEALTH_RELAYD_TYPES);
+ if (!lh) {
+ return NULL;
+ }
+ lttng_ctl_copy_string(lh->health_sock_path, path,
+ sizeof(lh->health_sock_path));
+ return lh;
+}
+
+void lttng_health_destroy(struct lttng_health *lh)
+{
+ free(lh);
+}
+
+int lttng_health_query(struct lttng_health *health)
+{
+ int sock, ret, i, tracing_group;
+ struct health_comm_msg msg;
+ struct health_comm_reply reply;
+
+ if (!health) {
+ return -EINVAL;
+ }
+
+ tracing_group = lttng_check_tracing_group();
+retry:
+ ret = set_health_socket_path(health, tracing_group);
+ if (ret) {
+ goto error;
+ }
+ /* Connect to the sesssion daemon */
+ sock = lttcomm_connect_unix_sock(health->health_sock_path);
+ if (sock < 0) {
+ if (tracing_group) {
+ /* For tracing group, fallback to per-user */
+ tracing_group = 0;
+ goto retry;
+ }
+ ret = -1;
+ goto error;
+ }
+
+ msg.cmd = HEALTH_CMD_CHECK;
+
+ ret = lttcomm_send_unix_sock(sock, (void *)&msg, sizeof(msg));
+ if (ret < 0) {
+ ret = -1;
+ goto close_error;
+ }
+
+ ret = lttcomm_recv_unix_sock(sock, (void *)&reply, sizeof(reply));
+ if (ret < 0) {
+ ret = -1;
+ goto close_error;
+ }
+
+ health->state = reply.ret_code;
+ for (i = 0; i < health->nr_threads; i++) {
+ if (health->state & (1ULL << i)) {
+ health->thread[i].state = -1;
+ } else {
+ health->thread[i].state = 0;
+ }
+ }
+
+close_error:
+ {
+ int closeret;
+
+ closeret = close(sock);
+ assert(!closeret);
+ }
+
+error:
+ if (ret >= 0)
+ ret = 0;
+ return ret;
+}
+
+int lttng_health_state(const struct lttng_health *health)
+{
+ if (!health) {
+ return -EINVAL;
+ }
+
+ if (health->state == 0) {
+ return 0;
+ } else {
+ return -1;
+ }
+}
+
+int lttng_health_get_nr_threads(const struct lttng_health *health)
+{
+ if (!health) {
+ return -EINVAL;
+ }
+ return health->nr_threads;
+}
+
+const struct lttng_health_thread *
+ lttng_health_get_thread(const struct lttng_health *health,
+ unsigned int nth_thread)
+{
+ if (!health || nth_thread >= health->nr_threads) {
+ return NULL;
+ }
+ return &health->thread[nth_thread];
+}
+
+int lttng_health_thread_state(const struct lttng_health_thread *thread)
+{
+ if (!thread) {
+ return -EINVAL;
+ }
+ return thread->state;
+}
+
+const char *lttng_health_thread_name(const struct lttng_health_thread *thread)
+{
+ unsigned int nr;
+
+ if (!thread) {
+ return NULL;
+ }
+ nr = thread - &thread->p->thread[0];
+ return thread_name[thread->p->component][nr];
+}
return lttng_ctl_ask_sessiond_varlen(lsm, NULL, 0, buf);
}
+int lttng_check_tracing_group(void);
+
#endif /* LTTNG_CTL_HELPER_H */
/* Socket to session daemon for communication */
static int sessiond_socket;
static char sessiond_sock_path[PATH_MAX];
-static char health_sock_path[PATH_MAX];
/* Variables */
static char *tracing_group;
*
* If yes return 1, else return -1.
*/
-static int check_tracing_group(const char *grp_name)
+LTTNG_HIDDEN
+int lttng_check_tracing_group(void)
{
struct group *grp_tracing; /* no free(). See getgrnam(3) */
gid_t *grp_list;
int grp_list_size, grp_id, i;
int ret = -1;
+ const char *grp_name = tracing_group;
/* Get GID of group 'tracing' */
grp_tracing = getgrnam(grp_name);
if (uid != 0) {
/* Are we in the tracing group ? */
- in_tgroup = check_tracing_group(tracing_group);
+ in_tgroup = lttng_check_tracing_group();
}
if ((uid == 0) || in_tgroup) {
return -ENOSYS;
}
-/*
- * Set health socket path by putting it in the global health_sock_path
- * variable.
- *
- * Returns 0 on success or -ENOMEM.
- */
-static int set_health_socket_path(void)
-{
- uid_t uid;
- const char *home;
- int ret;
-
- uid = getuid();
-
- if (uid == 0 || check_tracing_group(tracing_group)) {
- lttng_ctl_copy_string(health_sock_path,
- DEFAULT_GLOBAL_HEALTH_UNIX_SOCK, sizeof(health_sock_path));
- return 0;
- }
-
- /*
- * With GNU C < 2.1, snprintf returns -1 if the target buffer
- * is too small; With GNU C >= 2.1, snprintf returns the
- * required size (excluding closing null).
- */
- home = utils_get_home_dir();
- if (home == NULL) {
- /* Fallback in /tmp */
- home = "/tmp";
- }
-
- ret = snprintf(health_sock_path, sizeof(health_sock_path),
- DEFAULT_HOME_HEALTH_UNIX_SOCK, home);
- if ((ret < 0) || (ret >= sizeof(health_sock_path))) {
- return -ENOMEM;
- }
-
- return 0;
-}
-
-/*
- * Check session daemon health for a specific health component.
- *
- * Return 0 if health is OK or else 1 if BAD.
- *
- * Any other negative value is a lttng error code which can be translated with
- * lttng_strerror().
- */
-int lttng_health_check(enum lttng_health_component c)
-{
- int sock, ret;
- struct health_comm_msg msg;
- struct health_comm_reply reply;
-
- /* Connect to the sesssion daemon */
- sock = lttcomm_connect_unix_sock(health_sock_path);
- if (sock < 0) {
- ret = -LTTNG_ERR_NO_SESSIOND;
- goto error;
- }
-
- msg.cmd = HEALTH_CMD_CHECK;
- msg.component = c;
-
- ret = lttcomm_send_unix_sock(sock, (void *)&msg, sizeof(msg));
- if (ret < 0) {
- ret = -LTTNG_ERR_FATAL;
- goto close_error;
- }
-
- ret = lttcomm_recv_unix_sock(sock, (void *)&reply, sizeof(reply));
- if (ret < 0) {
- ret = -LTTNG_ERR_FATAL;
- goto close_error;
- }
-
- ret = reply.ret_code;
-
-close_error:
- {
- int closeret;
-
- closeret = close(sock);
- assert(!closeret);
- }
-
-error:
- return ret;
-}
-
/*
* This is an extension of create session that is ONLY and SHOULD only be used
* by the lttng command line program. It exists to avoid using URI parsing in
{
/* Set default session group */
lttng_set_tracing_group(DEFAULT_TRACING_GROUP);
- /* Set socket for health check */
- if (set_health_socket_path()) {
- abort();
- }
}
/*
*/
#include <stdio.h>
+#include <stdlib.h>
-#include "lttng/lttng.h"
+#include <lttng/health.h>
-#define HEALTH_CMD_FAIL (1 << 0)
-#define HEALTH_APP_MNG_FAIL (1 << 1)
-#define HEALTH_APP_REG_FAIL (1 << 2)
-#define HEALTH_KERNEL_FAIL (1 << 3)
-#define HEALTH_CSMR_FAIL (1 << 4)
-
-int main(int argc, char *argv[])
+static
+int check_component(struct lttng_health *lh, const char *component_name)
{
- int health = -1;
- int status = 0;
+ const struct lttng_health_thread *thread;
+ int nr_threads, i, status;
- /* Command thread */
- health = lttng_health_check(LTTNG_HEALTH_CMD);
- printf("Health check cmd: %d\n", health);
+ if (lttng_health_query(lh)) {
+ fprintf(stderr, "Error querying %s health\n",
+ component_name);
+ return -1;
+ }
+ status = lttng_health_state(lh);
+ if (!status) {
+ return status;
+ }
- if (health) {
- status |= HEALTH_CMD_FAIL;
+ nr_threads = lttng_health_get_nr_threads(lh);
+ if (nr_threads < 0) {
+ fprintf(stderr, "Error getting number of threads\n");
+ return -1;
}
- /* App manage thread */
- health = lttng_health_check(LTTNG_HEALTH_APP_MANAGE);
- printf("Health check app. manage: %d\n", health);
+ printf("Component \"%s\" is in error.\n", component_name);
+ for (i = 0; i < nr_threads; i++) {
+ int thread_state;
- if (health) {
- status |= HEALTH_APP_MNG_FAIL;
- }
- /* App registration thread */
- health = lttng_health_check(LTTNG_HEALTH_APP_REG);
- printf("Health check app. registration: %d\n", health);
+ thread = lttng_health_get_thread(lh, i);
+ if (!thread) {
+ fprintf(stderr, "Error getting thread %d\n", i);
+ return -1;
+ }
+ thread_state = lttng_health_thread_state(thread);
+ if (!thread_state) {
+ continue;
+ }
+ printf("Thread \"%s\" is not responding in component \"%s\".\n",
+ lttng_health_thread_name(thread),
+ component_name);
- if (health) {
- status |= HEALTH_APP_REG_FAIL;
}
+ return status;
+}
- /* Kernel thread */
- health = lttng_health_check(LTTNG_HEALTH_KERNEL);
- printf("Health check kernel: %d\n", health);
+static
+int check_sessiond(void)
+{
+ struct lttng_health *lh;
+ int status;
- if (health) {
- status |= HEALTH_KERNEL_FAIL;
+ lh = lttng_health_create_sessiond();
+ if (!lh) {
+ perror("lttng_health_create_sessiond");
+ return -1;
}
- /* Consumer thread */
- health = lttng_health_check(LTTNG_HEALTH_CONSUMER);
- printf("Health check consumer: %d\n", health);
+ status = check_component(lh, "sessiond");
+
+ lttng_health_destroy(lh);
+
+ return status;
+}
+
+static
+int check_consumerd(enum lttng_health_consumerd hc)
+{
+ struct lttng_health *lh;
+ int status;
+ static const char *cnames[NR_LTTNG_HEALTH_CONSUMERD] = {
+ "ust-consumerd-32",
+ "ust-consumerd-64",
+ "kernel-consumerd",
+ };
- if (health) {
- status |= HEALTH_CSMR_FAIL;
+ lh = lttng_health_create_consumerd(hc);
+ if (!lh) {
+ perror("lttng_health_create_consumerd");
+ return -1;
}
+ status = check_component(lh, cnames[hc]);
+
+ lttng_health_destroy(lh);
+
return status;
}
+
+
+int main(int argc, char *argv[])
+{
+ int status = 0, i;
+
+ status |= check_sessiond();
+ for (i = 0; i < NR_LTTNG_HEALTH_CONSUMERD; i++) {
+ status |= check_consumerd(i);
+ }
+ if (!status) {
+ exit(EXIT_SUCCESS);
+ } else {
+ exit(EXIT_FAILURE);
+ }
+}