Health check: implement health check query in sessiond and consumerd
authorMathieu Desnoyers <mathieu.desnoyers@efficios.com>
Sat, 5 Oct 2013 00:15:19 +0000 (20:15 -0400)
committerMathieu Desnoyers <mathieu.desnoyers@efficios.com>
Wed, 9 Oct 2013 13:16:59 +0000 (09:16 -0400)
At this stage, tests/regression/tools/health/health_check queries
sessiond and consumerd with the new API, however, the scripts using this
program have not yet been changed, and are therefore expected to fail.

Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
20 files changed:
include/lttng/health-internal.h
include/lttng/health.h
src/bin/lttng-consumerd/health-consumerd.c
src/bin/lttng-consumerd/health-consumerd.h
src/bin/lttng-consumerd/lttng-consumerd.c
src/bin/lttng-consumerd/lttng-consumerd.h
src/bin/lttng-relayd/health-relayd.h
src/bin/lttng-sessiond/health-sessiond.h
src/bin/lttng-sessiond/ht-cleanup.c
src/bin/lttng-sessiond/main.c
src/bin/lttng-sessiond/ust-thread.c
src/common/defaults.h
src/common/health/health.c
src/common/utils.c
src/common/utils.h
src/lib/lttng-ctl/Makefile.am
src/lib/lttng-ctl/lttng-ctl-health.c [new file with mode: 0644]
src/lib/lttng-ctl/lttng-ctl-helper.h
src/lib/lttng-ctl/lttng-ctl.c
tests/regression/tools/health/health_check.c

index 3ad25d8261858c767ec0891fb61d165714e9b245..2225e33f1f0d77190eac25119f011d2300e038cd 100644 (file)
@@ -26,6 +26,7 @@
 #include <urcu/uatomic.h>
 #include <urcu/list.h>
 #include <lttng/health.h>
+#include <common/macros.h>
 
 /*
  * These are the value added to the current state depending of the position in
@@ -65,21 +66,13 @@ enum health_cmd {
 };
 
 struct health_comm_msg {
-       uint32_t component;
        uint32_t cmd;           /* enum health_cmd */
 } LTTNG_PACKED;
 
 struct health_comm_reply {
-       uint32_t ret_code;
+       uint64_t ret_code;      /* bitmask of threads in bad health */
 } LTTNG_PACKED;
 
-/*
- * Status returned to lttng clients.
- */
-struct lttng_health_status {
-       uint64_t error_threads_bitmask;
-};
-
 /* Declare TLS health state. */
 extern DECLARE_URCU_TLS(struct health_state, health_state);
 
index 477cb3d28dfe72e367da5f81f7fcaa0345872385..996092869465f0a8ea17bfd0396d785d0274b1f5 100644 (file)
@@ -26,6 +26,8 @@ enum lttng_health_consumerd {
        LTTNG_HEALTH_CONSUMERD_UST_32,
        LTTNG_HEALTH_CONSUMERD_UST_64,
        LTTNG_HEALTH_CONSUMERD_KERNEL,
+
+       NR_LTTNG_HEALTH_CONSUMERD,
 };
 
 /**
@@ -104,7 +106,7 @@ int lttng_health_get_nr_threads(const struct lttng_health *health);
  */
 const struct lttng_health_thread *
        lttng_health_get_thread(const struct lttng_health *health,
-               int nth_thread);
+               unsigned int nth_thread);
 
 /**
  * lttng_health_thread_state - Get thread health state
index 062e46b9343adda4da3d1edf9a5453eee465535f..60808f37094d304480468875c5cde6b944599c9c 100644 (file)
@@ -39,6 +39,7 @@
 #include <config.h>
 #include <urcu/compiler.h>
 #include <ulimit.h>
+#include <inttypes.h>
 
 #include <common/defaults.h>
 #include <common/common.h>
@@ -175,6 +176,7 @@ void *thread_manage_health(void *data)
        struct lttng_poll_event events;
        struct health_comm_msg msg;
        struct health_comm_reply reply;
+       int is_root;
 
        DBG("[thread] Manage health check started");
 
@@ -193,6 +195,28 @@ void *thread_manage_health(void *data)
                goto error;
        }
 
+       is_root = !getuid();
+       if (is_root) {
+               /* lttng health client socket path permissions */
+               ret = chown(health_unix_sock_path, 0,
+                               utils_get_group_id(tracing_group_name));
+               if (ret < 0) {
+                       ERR("Unable to set group on %s", health_unix_sock_path);
+                       PERROR("chown");
+                       ret = -1;
+                       goto error;
+               }
+
+               ret = chmod(health_unix_sock_path,
+                               S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP);
+               if (ret < 0) {
+                       ERR("Unable to set permissions on %s", health_unix_sock_path);
+                       PERROR("chmod");
+                       ret = -1;
+                       goto error;
+               }
+       }
+
        /*
         * Set the CLOEXEC flag. Return code is useless because either way, the
         * show must go on.
@@ -288,47 +312,18 @@ restart:
 
                assert(msg.cmd == HEALTH_CMD_CHECK);
 
-               switch (msg.component) {
-               case LTTNG_HEALTH_CONSUMERD_CHANNEL:
-                       reply.ret_code = health_check_state(health_consumerd, HEALTH_CONSUMERD_TYPE_CHANNEL);
-                       break;
-               case LTTNG_HEALTH_CONSUMERD_METADATA:
-                       reply.ret_code = health_check_state(health_consumerd, HEALTH_CONSUMERD_TYPE_METADATA);
-                       break;
-               case LTTNG_HEALTH_CONSUMERD_DATA:
-                       reply.ret_code = health_check_state(health_consumerd, HEALTH_CONSUMERD_TYPE_DATA);
-                       break;
-               case LTTNG_HEALTH_CONSUMERD_SESSIOND:
-                       reply.ret_code = health_check_state(health_consumerd, HEALTH_CONSUMERD_TYPE_SESSIOND);
-                       break;
-               case LTTNG_HEALTH_CONSUMERD_METADATA_TIMER:
-                       reply.ret_code = health_check_state(health_consumerd, HEALTH_CONSUMERD_TYPE_METADATA_TIMER);
-                       break;
-
-               case LTTNG_HEALTH_CONSUMERD_ALL:
-                       reply.ret_code =
-                               health_check_state(health_consumerd, HEALTH_CONSUMERD_TYPE_CHANNEL) &&
-                               health_check_state(health_consumerd, HEALTH_CONSUMERD_TYPE_METADATA) &&
-                               health_check_state(health_consumerd, HEALTH_CONSUMERD_TYPE_DATA) &&
-                               health_check_state(health_consumerd, HEALTH_CONSUMERD_TYPE_SESSIOND) &&
-                               health_check_state(health_consumerd, HEALTH_CONSUMERD_TYPE_METADATA_TIMER);
-                       break;
-               default:
-                       reply.ret_code = LTTNG_ERR_UND;
-                       break;
-               }
-
-               /*
-                * Flip ret value since 0 is a success and 1 indicates a bad health for
-                * the client where in the sessiond it is the opposite. Again, this is
-                * just to make things easier for us poor developer which enjoy a lot
-                * lazyness.
-                */
-               if (reply.ret_code == 0 || reply.ret_code == 1) {
-                       reply.ret_code = !reply.ret_code;
+               reply.ret_code = 0;
+               for (i = 0; i < NR_HEALTH_CONSUMERD_TYPES; i++) {
+                       /*
+                        * health_check_state return 0 if thread is in
+                        * error.
+                        */
+                       if (!health_check_state(health_consumerd, i)) {
+                               reply.ret_code |= 1ULL << i;
+                       }
                }
 
-               DBG2("Health check return value %d", reply.ret_code);
+               DBG2("Health check return value %" PRIx64, reply.ret_code);
 
                ret = send_unix_sock(new_sock, (void *) &reply, sizeof(reply));
                if (ret < 0) {
index b1cf4a2c912456c92276d21d899fa141a5d7fac7..812a7378abd5b077b12b59612e5b9ac102e5ff83 100644 (file)
@@ -21,7 +21,7 @@
 
 #include <lttng/health-internal.h>
 
-enum health_type {
+enum health_type_consumerd {
        HEALTH_CONSUMERD_TYPE_CHANNEL           = 0,
        HEALTH_CONSUMERD_TYPE_METADATA          = 1,
        HEALTH_CONSUMERD_TYPE_DATA              = 2,
index cf9cb205c35ba73b600bb08125ced84dbb43aada..04adb973c76a198acd06fa7dc000ead230cca61c 100644 (file)
@@ -77,6 +77,8 @@ static struct lttng_consumer_local_data *ctx;
 /* Consumerd health monitoring */
 struct health_app *health_consumerd;
 
+const char *tracing_group_name = DEFAULT_TRACING_GROUP;
+
 enum lttng_consumer_type lttng_consumer_get_type(void)
 {
        if (!ctx) {
@@ -150,9 +152,9 @@ static void usage(FILE *fp)
        fprintf(fp, "Usage: %s OPTIONS\n\nOptions:\n", progname);
        fprintf(fp, "  -h, --help                         "
                        "Display this usage.\n");
-       fprintf(fp, "  -c, --consumerd-cmd-sock PATH     "
+       fprintf(fp, "  -c, --consumerd-cmd-sock PATH      "
                        "Specify path for the command socket\n");
-       fprintf(fp, "  -e, --consumerd-err-sock PATH     "
+       fprintf(fp, "  -e, --consumerd-err-sock PATH      "
                        "Specify path for the error socket\n");
        fprintf(fp, "  -d, --daemonize                    "
                        "Start as a daemon.\n");
@@ -162,6 +164,8 @@ static void usage(FILE *fp)
                        "Verbose mode. Activate DBG() macro.\n");
        fprintf(fp, "  -V, --version                      "
                        "Show version number.\n");
+       fprintf(fp, "  -g, --group NAME                   "
+                       "Specify the tracing group name. (default: tracing)\n");
        fprintf(fp, "  -k, --kernel                       "
                        "Consumer kernel buffers (default).\n");
        fprintf(fp, "  -u, --ust                          "
@@ -185,6 +189,7 @@ static void parse_args(int argc, char **argv)
                { "consumerd-cmd-sock", 1, 0, 'c' },
                { "consumerd-err-sock", 1, 0, 'e' },
                { "daemonize", 0, 0, 'd' },
+               { "group", 1, 0, 'g' },
                { "help", 0, 0, 'h' },
                { "quiet", 0, 0, 'q' },
                { "verbose", 0, 0, 'v' },
@@ -198,7 +203,7 @@ static void parse_args(int argc, char **argv)
 
        while (1) {
                int option_index = 0;
-               c = getopt_long(argc, argv, "dhqvVku" "c:e:", long_options, &option_index);
+               c = getopt_long(argc, argv, "dhqvVku" "c:e:g:", long_options, &option_index);
                if (c == -1) {
                        break;
                }
@@ -219,6 +224,9 @@ static void parse_args(int argc, char **argv)
                case 'd':
                        opt_daemon = 1;
                        break;
+               case 'g':
+                       tracing_group_name = optarg;
+                       break;
                case 'h':
                        usage(stdout);
                        exit(EXIT_SUCCESS);
index 6deb789c6f98be6e1ef125053ec4839096918601..5662429ad4c82e958ff47219d88cc8642f173e00 100644 (file)
@@ -19,6 +19,8 @@
 #ifndef _LTTNG_CONSUMERD_H
 #define _LTTNG_CONSUMERD_H
 
+const char *tracing_group_name;
+
 enum lttng_consumer_type lttng_consumer_get_type(void);
 
 #endif /* _LTTNG_CONSUMERD_H */
index 6b55cc3b11dbdc1476b5c550e252f4789018239f..aaf67ad45df8655984853fe64f8c61d2cd387f13 100644 (file)
@@ -21,7 +21,7 @@
 
 #include <lttng/health-internal.h>
 
-enum health_type {
+enum health_type_relayd {
        HEALTH_RELAYD_TYPE_DISPATCHER           = 0,
        HEALTH_RELAYD_TYPE_WORKER               = 1,
        HEALTH_RELAYD_TYPE_LISTENER             = 2,
index 49f9e0b9ca2f0133124403fbfea81ed2186967be..22ea1bb3e6de87635b5623c8fff8c5dc6163531a 100644 (file)
 
 #include <lttng/health-internal.h>
 
-enum health_type {
-       HEALTH_TYPE_CMD                 = 0,
-       HEALTH_TYPE_APP_MANAGE          = 1,
-       HEALTH_TYPE_APP_REG             = 2,
-       HEALTH_TYPE_KERNEL              = 3,
-       HEALTH_TYPE_CONSUMER            = 4,
-       HEALTH_TYPE_HT_CLEANUP          = 5,
-       HEALTH_TYPE_APP_MANAGE_NOTIFY   = 6,
-       HEALTH_TYPE_APP_REG_DISPATCH    = 7,
+enum health_type_sessiond {
+       HEALTH_SESSIOND_TYPE_CMD                = 0,
+       HEALTH_SESSIOND_TYPE_APP_MANAGE         = 1,
+       HEALTH_SESSIOND_TYPE_APP_REG            = 2,
+       HEALTH_SESSIOND_TYPE_KERNEL             = 3,
+       HEALTH_SESSIOND_TYPE_CONSUMER           = 4,
+       HEALTH_SESSIOND_TYPE_HT_CLEANUP         = 5,
+       HEALTH_SESSIOND_TYPE_APP_MANAGE_NOTIFY  = 6,
+       HEALTH_SESSIOND_TYPE_APP_REG_DISPATCH   = 7,
 
-       HEALTH_NUM_TYPE,
+       NR_HEALTH_SESSIOND_TYPES,
 };
 
 /* Application health monitoring */
index 4eb0c003437574e8790fa95ce02d7cac3ef09ec2..890c9a8215e26967c5ff6b42b5cf171295cdc306 100644 (file)
@@ -36,7 +36,7 @@ void *thread_ht_cleanup(void *data)
        rcu_register_thread();
        rcu_thread_online();
 
-       health_register(health_sessiond, HEALTH_TYPE_HT_CLEANUP);
+       health_register(health_sessiond, HEALTH_SESSIOND_TYPE_HT_CLEANUP);
 
        health_code_update();
 
index e119c8bf0305f6d19084dcfe04f9f12abbe12c9e..5ec39649969f9fef523e10b7e94896149d5812c1 100644 (file)
 
 #define CONSUMERD_FILE "lttng-consumerd"
 
-/* Const values */
-const char default_tracing_group[] = DEFAULT_TRACING_GROUP;
-
 const char *progname;
-const char *opt_tracing_group;
+static const char *tracing_group_name = DEFAULT_TRACING_GROUP;
 static const char *opt_pidfile;
 static int opt_sig_parent;
 static int opt_verbose_consumer;
@@ -325,25 +322,6 @@ int sessiond_check_thread_quit_pipe(int fd, uint32_t events)
        return 0;
 }
 
-/*
- * Return group ID of the tracing group or -1 if not found.
- */
-static gid_t allowed_group(void)
-{
-       struct group *grp;
-
-       if (opt_tracing_group) {
-               grp = getgrnam(opt_tracing_group);
-       } else {
-               grp = getgrnam(default_tracing_group);
-       }
-       if (!grp) {
-               return -1;
-       } else {
-               return grp->gr_gid;
-       }
-}
-
 /*
  * Init thread quit pipe.
  *
@@ -758,7 +736,7 @@ static void *thread_manage_kernel(void *data)
 
        DBG("[thread] Thread manage kernel started");
 
-       health_register(health_sessiond, HEALTH_TYPE_KERNEL);
+       health_register(health_sessiond, HEALTH_SESSIOND_TYPE_KERNEL);
 
        /*
         * This first step of the while is to clean this structure which could free
@@ -924,7 +902,7 @@ static void *thread_manage_consumer(void *data)
 
        DBG("[thread] Manage consumer started");
 
-       health_register(health_sessiond, HEALTH_TYPE_CONSUMER);
+       health_register(health_sessiond, HEALTH_SESSIOND_TYPE_CONSUMER);
 
        health_code_update();
 
@@ -1223,7 +1201,7 @@ static void *thread_manage_apps(void *data)
        rcu_register_thread();
        rcu_thread_online();
 
-       health_register(health_sessiond, HEALTH_TYPE_APP_MANAGE);
+       health_register(health_sessiond, HEALTH_SESSIOND_TYPE_APP_MANAGE);
 
        if (testpoint(thread_manage_apps)) {
                goto error_testpoint;
@@ -1515,7 +1493,7 @@ static void *thread_dispatch_ust_registration(void *data)
                .count = 0,
        };
 
-       health_register(health_sessiond, HEALTH_TYPE_APP_REG_DISPATCH);
+       health_register(health_sessiond, HEALTH_SESSIOND_TYPE_APP_REG_DISPATCH);
 
        health_code_update();
 
@@ -1746,7 +1724,7 @@ static void *thread_registration_apps(void *data)
 
        DBG("[thread] Manage application registration started");
 
-       health_register(health_sessiond, HEALTH_TYPE_APP_REG);
+       health_register(health_sessiond, HEALTH_SESSIOND_TYPE_APP_REG);
 
        if (testpoint(thread_registration_apps)) {
                goto error_testpoint;
@@ -2127,6 +2105,7 @@ static pid_t spawn_consumerd(struct consumer_data *consumer_data)
                                "lttng-consumerd", verbosity, "-k",
                                "--consumerd-cmd-sock", consumer_data->cmd_unix_sock_path,
                                "--consumerd-err-sock", consumer_data->err_unix_sock_path,
+                               "--group", tracing_group_name,
                                NULL);
                        break;
                case LTTNG_CONSUMER64_UST:
@@ -2165,6 +2144,7 @@ static pid_t spawn_consumerd(struct consumer_data *consumer_data)
                        ret = execl(consumerd64_bin, "lttng-consumerd", verbosity, "-u",
                                        "--consumerd-cmd-sock", consumer_data->cmd_unix_sock_path,
                                        "--consumerd-err-sock", consumer_data->err_unix_sock_path,
+                                       "--group", tracing_group_name,
                                        NULL);
                        if (consumerd64_libdir[0] != '\0') {
                                free(tmpnew);
@@ -2210,6 +2190,7 @@ static pid_t spawn_consumerd(struct consumer_data *consumer_data)
                        ret = execl(consumerd32_bin, "lttng-consumerd", verbosity, "-u",
                                        "--consumerd-cmd-sock", consumer_data->cmd_unix_sock_path,
                                        "--consumerd-err-sock", consumer_data->err_unix_sock_path,
+                                       "--group", tracing_group_name,
                                        NULL);
                        if (consumerd32_libdir[0] != '\0') {
                                free(tmpnew);
@@ -2295,21 +2276,6 @@ error:
        return ret;
 }
 
-/*
- * Compute health status of each consumer. If one of them is zero (bad
- * state), we return 0.
- */
-static int check_consumer_health(void)
-{
-       int ret;
-
-       ret = health_check_state(health_sessiond, HEALTH_TYPE_CONSUMER);
-
-       DBG3("Health consumer check %d", ret);
-
-       return ret;
-}
-
 /*
  * Setup necessary data for kernel tracer action.
  */
@@ -3475,6 +3441,27 @@ static void *thread_manage_health(void *data)
                goto error;
        }
 
+       if (is_root) {
+               /* lttng health client socket path permissions */
+               ret = chown(health_unix_sock_path, 0,
+                               utils_get_group_id(tracing_group_name));
+               if (ret < 0) {
+                       ERR("Unable to set group on %s", health_unix_sock_path);
+                       PERROR("chown");
+                       ret = -1;
+                       goto error;
+               }
+
+               ret = chmod(health_unix_sock_path,
+                               S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP);
+               if (ret < 0) {
+                       ERR("Unable to set permissions on %s", health_unix_sock_path);
+                       PERROR("chmod");
+                       ret = -1;
+                       goto error;
+               }
+       }
+
        /*
         * Set the CLOEXEC flag. Return code is useless because either way, the
         * show must go on.
@@ -3565,58 +3552,18 @@ restart:
 
                rcu_thread_online();
 
-               switch (msg.component) {
-               case LTTNG_HEALTH_CMD:
-                       reply.ret_code = health_check_state(health_sessiond, HEALTH_TYPE_CMD);
-                       break;
-               case LTTNG_HEALTH_APP_MANAGE:
-                       reply.ret_code = health_check_state(health_sessiond, HEALTH_TYPE_APP_MANAGE);
-                       break;
-               case LTTNG_HEALTH_APP_REG:
-                       reply.ret_code = health_check_state(health_sessiond, HEALTH_TYPE_APP_REG);
-                       break;
-               case LTTNG_HEALTH_KERNEL:
-                       reply.ret_code = health_check_state(health_sessiond, HEALTH_TYPE_KERNEL);
-                       break;
-               case LTTNG_HEALTH_CONSUMER:
-                       reply.ret_code = check_consumer_health();
-                       break;
-               case LTTNG_HEALTH_HT_CLEANUP:
-                       reply.ret_code = health_check_state(health_sessiond, HEALTH_TYPE_HT_CLEANUP);
-                       break;
-               case LTTNG_HEALTH_APP_MANAGE_NOTIFY:
-                       reply.ret_code = health_check_state(health_sessiond, HEALTH_TYPE_APP_MANAGE_NOTIFY);
-                       break;
-               case LTTNG_HEALTH_APP_REG_DISPATCH:
-                       reply.ret_code = health_check_state(health_sessiond, HEALTH_TYPE_APP_REG_DISPATCH);
-                       break;
-               case LTTNG_HEALTH_ALL:
-                       reply.ret_code =
-                               health_check_state(health_sessiond, HEALTH_TYPE_APP_MANAGE) &&
-                               health_check_state(health_sessiond, HEALTH_TYPE_APP_REG) &&
-                               health_check_state(health_sessiond, HEALTH_TYPE_CMD) &&
-                               health_check_state(health_sessiond, HEALTH_TYPE_KERNEL) &&
-                               check_consumer_health() &&
-                               health_check_state(health_sessiond, HEALTH_TYPE_HT_CLEANUP) &&
-                               health_check_state(health_sessiond, HEALTH_TYPE_APP_MANAGE_NOTIFY) &&
-                               health_check_state(health_sessiond, HEALTH_TYPE_APP_REG_DISPATCH);
-                       break;
-               default:
-                       reply.ret_code = LTTNG_ERR_UND;
-                       break;
-               }
-
-               /*
-                * Flip ret value since 0 is a success and 1 indicates a bad health for
-                * the client where in the sessiond it is the opposite. Again, this is
-                * just to make things easier for us poor developer which enjoy a lot
-                * lazyness.
-                */
-               if (reply.ret_code == 0 || reply.ret_code == 1) {
-                       reply.ret_code = !reply.ret_code;
+               reply.ret_code = 0;
+               for (i = 0; i < NR_HEALTH_SESSIOND_TYPES; i++) {
+                       /*
+                        * health_check_state returns 0 if health is
+                        * bad.
+                        */
+                       if (!health_check_state(health_sessiond, i)) {
+                               reply.ret_code |= 1ULL << i;
+                       }
                }
 
-               DBG2("Health check return value %d", reply.ret_code);
+               DBG2("Health check return value %" PRIx64, reply.ret_code);
 
                ret = send_unix_sock(new_sock, (void *) &reply, sizeof(reply));
                if (ret < 0) {
@@ -3667,7 +3614,7 @@ static void *thread_manage_clients(void *data)
 
        rcu_register_thread();
 
-       health_register(health_sessiond, HEALTH_TYPE_CMD);
+       health_register(health_sessiond, HEALTH_SESSIOND_TYPE_CMD);
 
        if (testpoint(thread_manage_clients)) {
                goto error_testpoint;
@@ -3988,7 +3935,7 @@ static int parse_args(int argc, char **argv)
                        opt_daemon = 1;
                        break;
                case 'g':
-                       opt_tracing_group = optarg;
+                       tracing_group_name = optarg;
                        break;
                case 'h':
                        usage();
@@ -4149,14 +4096,7 @@ static int set_permissions(char *rundir)
        int ret;
        gid_t gid;
 
-       ret = allowed_group();
-       if (ret < 0) {
-               WARN("No tracing group detected");
-               /* Setting gid to 0 if no tracing group is found */
-               gid = 0;
-       } else {
-               gid = ret;
-       }
+       gid = utils_get_group_id(tracing_group_name);
 
        /* Set lttng run dir */
        ret = chown(rundir, 0, gid);
@@ -4165,8 +4105,12 @@ static int set_permissions(char *rundir)
                PERROR("chown");
        }
 
-       /* Ensure all applications and tracing group can search the run dir */
-       ret = chmod(rundir, S_IRWXU | S_IXGRP | S_IXOTH);
+       /*
+        * Ensure all applications and tracing group can search the run
+        * dir. Allow everyone to read the directory, since it does not
+        * buy us anything to hide its content.
+        */
+       ret = chmod(rundir, S_IRWXU | S_IRGRP | S_IXGRP | S_IROTH | S_IXOTH);
        if (ret < 0) {
                ERR("Unable to set permissions on %s", rundir);
                PERROR("chmod");
@@ -4180,21 +4124,21 @@ static int set_permissions(char *rundir)
        }
 
        /* kconsumer error socket path */
-       ret = chown(kconsumer_data.err_unix_sock_path, 0, gid);
+       ret = chown(kconsumer_data.err_unix_sock_path, 0, 0);
        if (ret < 0) {
                ERR("Unable to set group on %s", kconsumer_data.err_unix_sock_path);
                PERROR("chown");
        }
 
        /* 64-bit ustconsumer error socket path */
-       ret = chown(ustconsumer64_data.err_unix_sock_path, 0, gid);
+       ret = chown(ustconsumer64_data.err_unix_sock_path, 0, 0);
        if (ret < 0) {
                ERR("Unable to set group on %s", ustconsumer64_data.err_unix_sock_path);
                PERROR("chown");
        }
 
        /* 32-bit ustconsumer compat32 error socket path */
-       ret = chown(ustconsumer32_data.err_unix_sock_path, 0, gid);
+       ret = chown(ustconsumer32_data.err_unix_sock_path, 0, 0);
        if (ret < 0) {
                ERR("Unable to set group on %s", ustconsumer32_data.err_unix_sock_path);
                PERROR("chown");
@@ -4238,7 +4182,7 @@ static int set_consumer_sockets(struct consumer_data *consumer_data,
        int ret;
        char path[PATH_MAX];
 
-    switch (consumer_data->type) {
+       switch (consumer_data->type) {
        case LTTNG_CONSUMER_KERNEL:
                snprintf(path, PATH_MAX, DEFAULT_KCONSUMERD_PATH, rundir);
                break;
@@ -4256,7 +4200,7 @@ static int set_consumer_sockets(struct consumer_data *consumer_data,
 
        DBG2("Creating consumer directory: %s", path);
 
-       ret = mkdir(path, S_IRWXU);
+       ret = mkdir(path, S_IRWXU | S_IRGRP | S_IXGRP);
        if (ret < 0) {
                if (errno != EEXIST) {
                        PERROR("mkdir");
@@ -4265,6 +4209,14 @@ static int set_consumer_sockets(struct consumer_data *consumer_data,
                }
                ret = -1;
        }
+       if (is_root) {
+               ret = chown(path, 0, utils_get_group_id(tracing_group_name));
+               if (ret < 0) {
+                       ERR("Unable to set group on %s", path);
+                       PERROR("chown");
+                       goto error;
+               }
+       }
 
        /* Create the kconsumerd error unix socket */
        consumer_data->err_sock =
@@ -4722,7 +4674,7 @@ int main(int argc, char **argv)
         * Initialize the health check subsystem. This call should set the
         * appropriate time values.
         */
-       health_sessiond = health_app_create(HEALTH_NUM_TYPE);
+       health_sessiond = health_app_create(NR_HEALTH_SESSIOND_TYPES);
        if (!health_sessiond) {
                PERROR("health_app_create error");
                goto exit_health_sessiond_cleanup;
index 683718326881f8697058d6ce7a896d7b3ad9e292..85803e472217901a50eeec10e6ace02c3c426ca1 100644 (file)
@@ -39,7 +39,8 @@ void *ust_thread_manage_notify(void *data)
        rcu_register_thread();
        rcu_thread_online();
 
-       health_register(health_sessiond, HEALTH_TYPE_APP_MANAGE_NOTIFY);
+       health_register(health_sessiond,
+               HEALTH_SESSIOND_TYPE_APP_MANAGE_NOTIFY);
 
        health_code_update();
 
index ff7425815255595b9b3d05e1ede28065d8b5b2d4..40b814a16bd1dc370623f79ae041c5e4412cd049 100644 (file)
 /* Default unix socket path */
 #define DEFAULT_GLOBAL_CLIENT_UNIX_SOCK         DEFAULT_LTTNG_RUNDIR "/client-lttng-sessiond"
 #define DEFAULT_HOME_CLIENT_UNIX_SOCK           DEFAULT_LTTNG_HOME_RUNDIR "/client-lttng-sessiond"
-#define DEFAULT_GLOBAL_HEALTH_UNIX_SOCK         DEFAULT_LTTNG_RUNDIR "/health.sock"
-#define DEFAULT_HOME_HEALTH_UNIX_SOCK           DEFAULT_LTTNG_HOME_RUNDIR "/health.sock"
+#define DEFAULT_GLOBAL_HEALTH_UNIX_SOCK         DEFAULT_LTTNG_RUNDIR "/sessiond-health"
+#define DEFAULT_HOME_HEALTH_UNIX_SOCK          DEFAULT_LTTNG_HOME_RUNDIR "/sessiond-health"
 
 /* Default consumer health unix socket path */
-#define DEFAULT_GLOBAL_USTCONSUMER32_HEALTH_UNIX_SOCK  DEFAULT_LTTNG_RUNDIR "/health.ustconsumer32.sock"
-#define DEFAULT_HOME_USTCONSUMER32_HEALTH_UNIX_SOCK    DEFAULT_LTTNG_HOME_RUNDIR "/health.ustconsumer32.sock"
-#define DEFAULT_GLOBAL_USTCONSUMER64_HEALTH_UNIX_SOCK  DEFAULT_LTTNG_RUNDIR "/health.ustconsumer64.sock"
-#define DEFAULT_HOME_USTCONSUMER64_HEALTH_UNIX_SOCK    DEFAULT_LTTNG_HOME_RUNDIR "/health.ustconsumer64.sock"
-#define DEFAULT_GLOBAL_KCONSUMER_HEALTH_UNIX_SOCK      DEFAULT_LTTNG_RUNDIR "/health.kconsumer.sock"
-#define DEFAULT_HOME_KCONSUMER_HEALTH_UNIX_SOCK                DEFAULT_LTTNG_HOME_RUNDIR "/health.kconsumer.sock"
+#define DEFAULT_GLOBAL_USTCONSUMER32_HEALTH_UNIX_SOCK  DEFAULT_LTTNG_RUNDIR "/ustconsumerd32/health"
+#define DEFAULT_HOME_USTCONSUMER32_HEALTH_UNIX_SOCK    DEFAULT_LTTNG_HOME_RUNDIR "/ustconsumerd32/health"
+#define DEFAULT_GLOBAL_USTCONSUMER64_HEALTH_UNIX_SOCK  DEFAULT_LTTNG_RUNDIR "/ustconsumerd64/health"
+#define DEFAULT_HOME_USTCONSUMER64_HEALTH_UNIX_SOCK    DEFAULT_LTTNG_HOME_RUNDIR "/ustconsumerd64/health"
+#define DEFAULT_GLOBAL_KCONSUMER_HEALTH_UNIX_SOCK      DEFAULT_LTTNG_RUNDIR "/kconsumerd/health"
+#define DEFAULT_HOME_KCONSUMER_HEALTH_UNIX_SOCK                DEFAULT_LTTNG_HOME_RUNDIR "/kconsumerd/health"
 
 #define DEFAULT_GLOBAL_APPS_UNIX_SOCK \
        DEFAULT_LTTNG_RUNDIR "/" LTTNG_UST_SOCK_FILENAME
index d2414ae5776cbd563e0df3cd1b0d1709979dd869..49f6dc0ab91d7c7b3a3a1c8adfc6a360916ddddc 100644 (file)
@@ -81,7 +81,7 @@ struct health_app *health_app_create(int nr_types)
        if (!ha) {
                return NULL;
        }
-       ha->flags = zmalloc(sizeof(*ha->flags));
+       ha->flags = zmalloc(sizeof(*ha->flags) * nr_types);
        if (!ha->flags) {
                goto error_flags;
        }
index dd9967312aca1be20253df8fd881c485858d0e6f..da4c036b9630a3ecddebd9aa74d419609705379d 100644 (file)
@@ -27,6 +27,7 @@
 #include <unistd.h>
 #include <inttypes.h>
 #include <regex.h>
+#include <grp.h>
 
 #include <common/common.h>
 #include <common/runas.h>
@@ -694,3 +695,24 @@ size_t utils_get_current_time_str(const char *format, char *dst, size_t len)
 
        return ret;
 }
+
+/*
+ * Return the group ID matching name, else 0 if it cannot be found.
+ */
+LTTNG_HIDDEN
+gid_t utils_get_group_id(const char *name)
+{
+       struct group *grp;
+
+       grp = getgrnam(name);
+       if (!grp) {
+               static volatile int warn_once;
+
+               if (!warn_once) {
+                       WARN("No tracing group detected");
+                       warn_once = 1;
+               }
+               return 0;
+       }
+       return grp->gr_gid;
+}
index f7241996db3d55f7dac1291a306913fc2abcdf41..52f2798d116764cf01a783f64eb14d64751b5e8d 100644 (file)
@@ -44,5 +44,6 @@ int utils_parse_size_suffix(char *str, uint64_t *size);
 int utils_get_count_order_u32(uint32_t x);
 char *utils_get_home_dir(void);
 size_t utils_get_current_time_str(const char *format, char *dst, size_t len);
+gid_t utils_get_group_id(const char *name);
 
 #endif /* _COMMON_UTILS_H */
index d87eb85486c978995fbf925a20e58604149511c3..c588037d9ed50b6c39d548580b68057192e43b3f 100644 (file)
@@ -4,7 +4,8 @@ SUBDIRS = filter
 
 lib_LTLIBRARIES = liblttng-ctl.la
 
-liblttng_ctl_la_SOURCES = lttng-ctl.c snapshot.c lttng-ctl-helper.h
+liblttng_ctl_la_SOURCES = lttng-ctl.c snapshot.c lttng-ctl-helper.h \
+               lttng-ctl-health.c
 
 liblttng_ctl_la_LIBADD = \
                $(top_builddir)/src/common/sessiond-comm/libsessiond-comm.la \
diff --git a/src/lib/lttng-ctl/lttng-ctl-health.c b/src/lib/lttng-ctl/lttng-ctl-health.c
new file mode 100644 (file)
index 0000000..a386361
--- /dev/null
@@ -0,0 +1,357 @@
+/*
+ * lttng-ctl-health.c
+ *
+ * Linux Trace Toolkit Health Control Library
+ *
+ * Copyright (C) 2011 David Goulet <david.goulet@polymtl.ca>
+ * Copyright (C) 2013 Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
+ *
+ * This library is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU Lesser General Public License, version 2.1 only,
+ * as published by the Free Software Foundation.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this library; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include <unistd.h>
+#include <sys/types.h>
+#include <stdint.h>
+#include <limits.h>
+#include <errno.h>
+#include <lttng/health-internal.h>
+
+#include <bin/lttng-sessiond/health-sessiond.h>
+#include <bin/lttng-consumerd/health-consumerd.h>
+#include <bin/lttng-relayd/health-relayd.h>
+#include <common/defaults.h>
+#include <common/utils.h>
+
+#include "lttng-ctl-helper.h"
+
+enum health_component {
+       HEALTH_COMPONENT_SESSIOND,
+       HEALTH_COMPONENT_CONSUMERD,
+       HEALTH_COMPONENT_RELAYD,
+
+       NR_HEALTH_COMPONENT,
+};
+
+struct lttng_health_thread {
+       struct lttng_health *p;
+       int state;
+};
+
+struct lttng_health {
+       enum health_component component;
+       uint64_t state;
+       unsigned int nr_threads;
+       char health_sock_path[PATH_MAX];
+       /* For consumer health only */
+       enum lttng_health_consumerd consumerd_type;
+       struct lttng_health_thread thread[];
+};
+
+static
+const char *sessiond_thread_name[NR_HEALTH_SESSIOND_TYPES] = {
+       [ HEALTH_SESSIOND_TYPE_CMD ] = "Session daemon command",
+       [ HEALTH_SESSIOND_TYPE_APP_MANAGE ] = "Session daemon application manager",
+       [ HEALTH_SESSIOND_TYPE_APP_REG ] = "Session daemon application registration",
+       [ HEALTH_SESSIOND_TYPE_KERNEL ] = "Session daemon kernel",
+       [ HEALTH_SESSIOND_TYPE_CONSUMER ] = "Session daemon consumer manager",
+       [ HEALTH_SESSIOND_TYPE_HT_CLEANUP ] = "Session daemon hash table cleanup",
+       [ HEALTH_SESSIOND_TYPE_APP_MANAGE_NOTIFY ] = "Session daemon application notification manager",
+       [ HEALTH_SESSIOND_TYPE_APP_REG_DISPATCH ] = "Session daemon application registration dispatcher",
+};
+
+static
+const char *consumerd_thread_name[NR_HEALTH_CONSUMERD_TYPES] = {
+       [ HEALTH_CONSUMERD_TYPE_CHANNEL ] = "Consumer daemon channel",
+       [ HEALTH_CONSUMERD_TYPE_METADATA ] = "Consumer daemon metadata",
+       [ HEALTH_CONSUMERD_TYPE_DATA ] = "Consumer daemon data",
+       [ HEALTH_CONSUMERD_TYPE_SESSIOND ] = "Consumer daemon session daemon command manager",
+       [ HEALTH_CONSUMERD_TYPE_METADATA_TIMER ] = "Consumer daemon metadata timer",
+};
+
+static
+const char *relayd_thread_name[NR_HEALTH_RELAYD_TYPES] = {
+       [ HEALTH_RELAYD_TYPE_DISPATCHER ] = "Relay daemon dispatcher",
+       [ HEALTH_RELAYD_TYPE_WORKER ] = "Relay daemon worker",
+       [ HEALTH_RELAYD_TYPE_LISTENER ] = "Relay daemon listener",
+};
+
+static
+const char **thread_name[NR_HEALTH_COMPONENT] = {
+       [ HEALTH_COMPONENT_SESSIOND ] = sessiond_thread_name,
+       [ HEALTH_COMPONENT_CONSUMERD] = consumerd_thread_name,
+       [ HEALTH_COMPONENT_RELAYD ] = relayd_thread_name,
+};
+
+/*
+ * Set health socket path.
+ *
+ * Returns 0 on success or -ENOMEM.
+ */
+static
+int set_health_socket_path(struct lttng_health *lh,
+               int tracing_group)
+{
+       uid_t uid;
+       const char *home;
+       int ret;
+       /* Global and home format strings */
+       const char *global_str, *home_str;
+
+       switch (lh->component) {
+       case HEALTH_COMPONENT_SESSIOND:
+               global_str = DEFAULT_GLOBAL_HEALTH_UNIX_SOCK;
+               home_str = DEFAULT_HOME_HEALTH_UNIX_SOCK;
+               break;
+       case HEALTH_COMPONENT_CONSUMERD:
+               switch (lh->consumerd_type) {
+               case LTTNG_HEALTH_CONSUMERD_UST_32:
+                       global_str = DEFAULT_GLOBAL_USTCONSUMER32_HEALTH_UNIX_SOCK;
+                       home_str = DEFAULT_HOME_USTCONSUMER32_HEALTH_UNIX_SOCK;
+                       break;
+               case LTTNG_HEALTH_CONSUMERD_UST_64:
+                       global_str = DEFAULT_GLOBAL_USTCONSUMER64_HEALTH_UNIX_SOCK;
+                       home_str = DEFAULT_HOME_USTCONSUMER64_HEALTH_UNIX_SOCK;
+                       break;
+               case LTTNG_HEALTH_CONSUMERD_KERNEL:
+                       global_str = DEFAULT_GLOBAL_KCONSUMER_HEALTH_UNIX_SOCK;
+                       home_str = DEFAULT_HOME_KCONSUMER_HEALTH_UNIX_SOCK;
+                       break;
+               default:
+                       return -EINVAL;
+               }
+               break;
+       case HEALTH_COMPONENT_RELAYD:
+               if (lh->health_sock_path[0] == '\0') {
+                       return -EINVAL;
+               } else {
+                       return 0;
+               }
+               break;  /* Unreached */
+       default:
+               return -EINVAL;
+       }
+
+       uid = getuid();
+
+       if (uid == 0 || tracing_group) {
+               lttng_ctl_copy_string(lh->health_sock_path,
+                               global_str,
+                               sizeof(lh->health_sock_path));
+               return 0;
+       }
+
+       /*
+        * With GNU C <  2.1, snprintf returns -1 if the target buffer
+        * is too small; With GNU C >= 2.1, snprintf returns the
+        * required size (excluding closing null).
+        */
+       home = utils_get_home_dir();
+       if (home == NULL) {
+               /* Fallback in /tmp */
+               home = "/tmp";
+       }
+
+       ret = snprintf(lh->health_sock_path, sizeof(lh->health_sock_path),
+                       home_str, home);
+       if ((ret < 0) || (ret >= sizeof(lh->health_sock_path))) {
+               return -ENOMEM;
+       }
+
+       return 0;
+}
+
+static
+struct lttng_health *lttng_health_create(enum health_component hc,
+               unsigned int nr_threads)
+{
+       struct lttng_health *lh;
+       int i;
+
+       lh = zmalloc(sizeof(*lh) + sizeof(lh->thread[0]) * nr_threads);
+       if (!lh) {
+               return NULL;
+       }
+
+       lh->component = hc;
+       lh->state = UINT64_MAX;         /* All bits in error initially */
+       lh->nr_threads = nr_threads;
+       for (i = 0; i < nr_threads; i++) {
+               lh->thread[i].p = lh;
+       }
+       return lh;
+}
+
+struct lttng_health *lttng_health_create_sessiond(void)
+{
+       struct lttng_health *lh;
+
+       lh = lttng_health_create(HEALTH_COMPONENT_SESSIOND,
+                       NR_HEALTH_SESSIOND_TYPES);
+       if (!lh) {
+               return NULL;
+       }
+       return lh;
+}
+
+struct lttng_health *
+       lttng_health_create_consumerd(enum lttng_health_consumerd consumerd)
+{
+       struct lttng_health *lh;
+
+       lh = lttng_health_create(HEALTH_COMPONENT_CONSUMERD,
+                       NR_HEALTH_CONSUMERD_TYPES);
+       if (!lh) {
+               return NULL;
+       }
+       lh->consumerd_type = consumerd;
+       return lh;
+}
+
+struct lttng_health *lttng_health_create_relayd(const char *path)
+{
+       struct lttng_health *lh;
+
+       if (!path) {
+               return NULL;
+       }
+
+       lh = lttng_health_create(HEALTH_COMPONENT_RELAYD,
+                       NR_HEALTH_RELAYD_TYPES);
+       if (!lh) {
+               return NULL;
+       }
+       lttng_ctl_copy_string(lh->health_sock_path, path,
+               sizeof(lh->health_sock_path));
+       return lh;
+}
+
+void lttng_health_destroy(struct lttng_health *lh)
+{
+       free(lh);
+}
+
+int lttng_health_query(struct lttng_health *health)
+{
+       int sock, ret, i, tracing_group;
+       struct health_comm_msg msg;
+       struct health_comm_reply reply;
+
+       if (!health) {
+               return -EINVAL;
+       }
+
+       tracing_group = lttng_check_tracing_group();
+retry:
+       ret = set_health_socket_path(health, tracing_group);
+       if (ret) {
+               goto error;
+       }
+       /* Connect to the sesssion daemon */
+       sock = lttcomm_connect_unix_sock(health->health_sock_path);
+       if (sock < 0) {
+               if (tracing_group) {
+                       /* For tracing group, fallback to per-user */
+                       tracing_group = 0;
+                       goto retry;
+               }
+               ret = -1;
+               goto error;
+       }
+
+       msg.cmd = HEALTH_CMD_CHECK;
+
+       ret = lttcomm_send_unix_sock(sock, (void *)&msg, sizeof(msg));
+       if (ret < 0) {
+               ret = -1;
+               goto close_error;
+       }
+
+       ret = lttcomm_recv_unix_sock(sock, (void *)&reply, sizeof(reply));
+       if (ret < 0) {
+               ret = -1;
+               goto close_error;
+       }
+
+       health->state = reply.ret_code;
+       for (i = 0; i < health->nr_threads; i++) {
+               if (health->state & (1ULL << i)) {
+                       health->thread[i].state = -1;
+               } else {
+                       health->thread[i].state = 0;
+               }
+       }
+
+close_error:
+       {
+               int closeret;
+
+               closeret = close(sock);
+               assert(!closeret);
+       }
+
+error:
+       if (ret >= 0)
+               ret = 0;
+       return ret;
+}
+
+int lttng_health_state(const struct lttng_health *health)
+{
+       if (!health) {
+               return -EINVAL;
+       }
+
+       if (health->state == 0) {
+               return 0;
+       } else {
+               return -1;
+       }
+}
+
+int lttng_health_get_nr_threads(const struct lttng_health *health)
+{
+       if (!health) {
+               return -EINVAL;
+       }
+       return health->nr_threads;
+}
+
+const struct lttng_health_thread *
+       lttng_health_get_thread(const struct lttng_health *health,
+               unsigned int nth_thread)
+{
+       if (!health || nth_thread >= health->nr_threads) {
+               return NULL;
+       }
+       return &health->thread[nth_thread];
+}
+
+int lttng_health_thread_state(const struct lttng_health_thread *thread)
+{
+       if (!thread) {
+               return -EINVAL;
+       }
+       return thread->state;
+}
+
+const char *lttng_health_thread_name(const struct lttng_health_thread *thread)
+{
+       unsigned int nr;
+
+       if (!thread) {
+               return NULL;
+       }
+       nr = thread - &thread->p->thread[0];
+       return thread_name[thread->p->component][nr];
+}
index ed9a24fbfcb5f02508b9d3f2ab7a8456e00b1a4d..58b3dba57fecb13c93a946fd8b759ace6b531ba1 100644 (file)
@@ -54,4 +54,6 @@ int lttng_ctl_ask_sessiond(struct lttcomm_session_msg *lsm, void **buf)
        return lttng_ctl_ask_sessiond_varlen(lsm, NULL, 0, buf);
 }
 
+int lttng_check_tracing_group(void);
+
 #endif /* LTTNG_CTL_HELPER_H */
index 0efc1591510fbff0b1eebf8f4e0bd313166204a6..540e24590cab7e45a897d4944c41889d6c6198da 100644 (file)
@@ -60,7 +60,6 @@ do {                                                          \
 /* Socket to session daemon for communication */
 static int sessiond_socket;
 static char sessiond_sock_path[PATH_MAX];
-static char health_sock_path[PATH_MAX];
 
 /* Variables */
 static char *tracing_group;
@@ -198,12 +197,14 @@ end:
  *
  *  If yes return 1, else return -1.
  */
-static int check_tracing_group(const char *grp_name)
+LTTNG_HIDDEN
+int lttng_check_tracing_group(void)
 {
        struct group *grp_tracing;      /* no free(). See getgrnam(3) */
        gid_t *grp_list;
        int grp_list_size, grp_id, i;
        int ret = -1;
+       const char *grp_name = tracing_group;
 
        /* Get GID of group 'tracing' */
        grp_tracing = getgrnam(grp_name);
@@ -294,7 +295,7 @@ static int set_session_daemon_path(void)
 
        if (uid != 0) {
                /* Are we in the tracing group ? */
-               in_tgroup = check_tracing_group(tracing_group);
+               in_tgroup = lttng_check_tracing_group();
        }
 
        if ((uid == 0) || in_tgroup) {
@@ -1362,96 +1363,6 @@ int lttng_disable_consumer(struct lttng_handle *handle)
        return -ENOSYS;
 }
 
-/*
- * Set health socket path by putting it in the global health_sock_path
- * variable.
- *
- * Returns 0 on success or -ENOMEM.
- */
-static int set_health_socket_path(void)
-{
-       uid_t uid;
-       const char *home;
-       int ret;
-
-       uid = getuid();
-
-       if (uid == 0 || check_tracing_group(tracing_group)) {
-               lttng_ctl_copy_string(health_sock_path,
-                               DEFAULT_GLOBAL_HEALTH_UNIX_SOCK, sizeof(health_sock_path));
-               return 0;
-       }
-
-       /*
-        * With GNU C <  2.1, snprintf returns -1 if the target buffer
-        * is too small; With GNU C >= 2.1, snprintf returns the
-        * required size (excluding closing null).
-        */
-       home = utils_get_home_dir();
-       if (home == NULL) {
-               /* Fallback in /tmp */
-               home = "/tmp";
-       }
-
-       ret = snprintf(health_sock_path, sizeof(health_sock_path),
-                       DEFAULT_HOME_HEALTH_UNIX_SOCK, home);
-       if ((ret < 0) || (ret >= sizeof(health_sock_path))) {
-               return -ENOMEM;
-       }
-
-       return 0;
-}
-
-/*
- * Check session daemon health for a specific health component.
- *
- * Return 0 if health is OK or else 1 if BAD.
- *
- * Any other negative value is a lttng error code which can be translated with
- * lttng_strerror().
- */
-int lttng_health_check(enum lttng_health_component c)
-{
-       int sock, ret;
-       struct health_comm_msg msg;
-       struct health_comm_reply reply;
-
-       /* Connect to the sesssion daemon */
-       sock = lttcomm_connect_unix_sock(health_sock_path);
-       if (sock < 0) {
-               ret = -LTTNG_ERR_NO_SESSIOND;
-               goto error;
-       }
-
-       msg.cmd = HEALTH_CMD_CHECK;
-       msg.component = c;
-
-       ret = lttcomm_send_unix_sock(sock, (void *)&msg, sizeof(msg));
-       if (ret < 0) {
-               ret = -LTTNG_ERR_FATAL;
-               goto close_error;
-       }
-
-       ret = lttcomm_recv_unix_sock(sock, (void *)&reply, sizeof(reply));
-       if (ret < 0) {
-               ret = -LTTNG_ERR_FATAL;
-               goto close_error;
-       }
-
-       ret = reply.ret_code;
-
-close_error:
-       {
-               int closeret;
-
-               closeret = close(sock);
-               assert(!closeret);
-       }
-
-error:
-       return ret;
-}
-
 /*
  * This is an extension of create session that is ONLY and SHOULD only be used
  * by the lttng command line program. It exists to avoid using URI parsing in
@@ -1641,10 +1552,6 @@ static void __attribute__((constructor)) init()
 {
        /* Set default session group */
        lttng_set_tracing_group(DEFAULT_TRACING_GROUP);
-       /* Set socket for health check */
-       if (set_health_socket_path()) {
-               abort();
-       }
 }
 
 /*
index 3eef1104008d0deab00324740d9c8447a1dd07f1..0569a418775c98b9c9a5f24aac086be43f1b7786 100644 (file)
  */
 
 #include <stdio.h>
+#include <stdlib.h>
 
-#include "lttng/lttng.h"
+#include <lttng/health.h>
 
-#define HEALTH_CMD_FAIL     (1 << 0)
-#define HEALTH_APP_MNG_FAIL (1 << 1)
-#define HEALTH_APP_REG_FAIL (1 << 2)
-#define HEALTH_KERNEL_FAIL  (1 << 3)
-#define HEALTH_CSMR_FAIL    (1 << 4)
-
-int main(int argc, char *argv[])
+static
+int check_component(struct lttng_health *lh, const char *component_name)
 {
-       int health = -1;
-       int status = 0;
+       const struct lttng_health_thread *thread;
+       int nr_threads, i, status;
 
-       /* Command thread */
-       health = lttng_health_check(LTTNG_HEALTH_CMD);
-       printf("Health check cmd: %d\n", health);
+       if (lttng_health_query(lh)) {
+               fprintf(stderr, "Error querying %s health\n",
+                       component_name);
+               return -1;
+       }
+       status = lttng_health_state(lh);
+       if (!status) {
+               return status;
+       }
 
-       if (health) {
-               status |= HEALTH_CMD_FAIL;
+       nr_threads = lttng_health_get_nr_threads(lh);
+       if (nr_threads < 0) {
+               fprintf(stderr, "Error getting number of threads\n");
+               return -1;
        }
 
-       /* App manage thread */
-       health = lttng_health_check(LTTNG_HEALTH_APP_MANAGE);
-       printf("Health check app. manage: %d\n", health);
+       printf("Component \"%s\" is in error.\n", component_name);
+       for (i = 0; i < nr_threads; i++) {
+               int thread_state;
 
-       if (health) {
-               status |= HEALTH_APP_MNG_FAIL;
-       }
-       /* App registration thread */
-       health = lttng_health_check(LTTNG_HEALTH_APP_REG);
-       printf("Health check app. registration: %d\n", health);
+               thread = lttng_health_get_thread(lh, i);
+               if (!thread) {
+                       fprintf(stderr, "Error getting thread %d\n", i);
+                       return -1;
+               }
+               thread_state = lttng_health_thread_state(thread);
+               if (!thread_state) {
+                       continue;
+               }
+               printf("Thread \"%s\" is not responding in component \"%s\".\n",
+                       lttng_health_thread_name(thread),
+                       component_name);
 
-       if (health) {
-               status |= HEALTH_APP_REG_FAIL;
        }
+       return status;
+}
 
-       /* Kernel thread */
-       health = lttng_health_check(LTTNG_HEALTH_KERNEL);
-       printf("Health check kernel: %d\n", health);
+static
+int check_sessiond(void)
+{
+       struct lttng_health *lh;
+       int status;
 
-       if (health) {
-               status |= HEALTH_KERNEL_FAIL;
+       lh = lttng_health_create_sessiond();
+       if (!lh) {
+               perror("lttng_health_create_sessiond");
+               return -1;
        }
 
-       /* Consumer thread */
-       health = lttng_health_check(LTTNG_HEALTH_CONSUMER);
-       printf("Health check consumer: %d\n", health);
+       status = check_component(lh, "sessiond");
+
+       lttng_health_destroy(lh);
+
+       return status;
+}
+
+static
+int check_consumerd(enum lttng_health_consumerd hc)
+{
+       struct lttng_health *lh;
+       int status;
+       static const char *cnames[NR_LTTNG_HEALTH_CONSUMERD] = {
+               "ust-consumerd-32",
+               "ust-consumerd-64",
+               "kernel-consumerd",
+       };
 
-       if (health) {
-               status |= HEALTH_CSMR_FAIL;
+       lh = lttng_health_create_consumerd(hc);
+       if (!lh) {
+               perror("lttng_health_create_consumerd");
+               return -1;
        }
 
+       status = check_component(lh, cnames[hc]);
+
+       lttng_health_destroy(lh);
+
        return status;
 }
+
+
+int main(int argc, char *argv[])
+{
+       int status = 0, i;
+
+       status |= check_sessiond();
+       for (i = 0; i < NR_LTTNG_HEALTH_CONSUMERD; i++) {
+               status |= check_consumerd(i);
+       }
+       if (!status) {
+               exit(EXIT_SUCCESS);
+       } else {
+               exit(EXIT_FAILURE);
+       }
+}
This page took 0.046072 seconds and 4 git commands to generate.