* These are the value added to the current state depending of the position in
* the thread where is either waiting on a poll() or running in the code.
*/
-#define HEALTH_POLL_VALUE 1
-#define HEALTH_CODE_VALUE 2
+#define HEALTH_POLL_VALUE (1UL << 0)
+#define HEALTH_CODE_VALUE (1UL << 1)
-#define HEALTH_IS_IN_POLL(x) (x % HEALTH_CODE_VALUE)
-#define HEALTH_IS_IN_CODE(x) (x % HEALTH_POLL_VALUE)
+#define HEALTH_IS_IN_POLL(x) ((x) & HEALTH_POLL_VALUE)
+
+enum health_flags {
+ HEALTH_EXIT = (1U << 0),
+ HEALTH_ERROR = (1U << 1),
+};
struct health_state {
- uint64_t last;
- uint64_t current;
+ /*
+ * last counter is only read and updated by the health_check
+ * thread (single updater).
+ */
+ unsigned long last;
+ /*
+ * current and flags are updated by multiple threads concurrently.
+ */
+ unsigned long current; /* progress counter, updated atomically */
+ enum health_flags flags; /* other flags, updated atomically */
};
/* Health state counters for the client command thread */
extern struct health_state health_thread_cmd;
+/* Health state counters for the application management thread */
+extern struct health_state health_thread_app_manage;
+
/* Health state counters for the application registration thread */
extern struct health_state health_thread_app_reg;
extern struct health_state health_thread_kernel;
/*
- * Update current counter by 1 to indicate that the thread is in a blocking
- * state cause by a poll().
+ * Update current counter by 1 to indicate that the thread entered or
+ * left a blocking state caused by a poll().
*/
static inline void health_poll_update(struct health_state *state)
{
assert(state);
-
uatomic_add(&state->current, HEALTH_POLL_VALUE);
}
/*
- * Update current counter by 2 which indicates that we are currently running in
- * a thread and NOT blocked at a poll().
+ * Update current counter by 2 indicates progress in execution of a
+ * thread.
*/
static inline void health_code_update(struct health_state *state)
{
assert(state);
-
uatomic_add(&state->current, HEALTH_CODE_VALUE);
}
/*
- * Reset health state. A value of zero indicate a bad health state.
+ * Set health "exit" flag.
*/
-static inline void health_reset(struct health_state *state)
+static inline void health_exit(struct health_state *state)
{
assert(state);
+ uatomic_or(&state->flags, HEALTH_EXIT);
+}
- uatomic_set(&state->current, 0);
- uatomic_set(&state->last, 0);
+/*
+ * Set health "error" flag.
+ */
+static inline void health_error(struct health_state *state)
+{
+ assert(state);
+ uatomic_or(&state->flags, HEALTH_ERROR);
}
/*
static inline void health_init(struct health_state *state)
{
assert(state);
-
uatomic_set(&state->last, 0);
- uatomic_set(&state->current, HEALTH_CODE_VALUE);
+ uatomic_set(&state->current, 0);
+ uatomic_set(&state->flags, 0);
}
int health_check_state(struct health_state *state);
/* Used for the health monitoring of the session daemon. See health.h */
struct health_state health_thread_cmd;
+struct health_state health_thread_app_manage;
struct health_state health_thread_app_reg;
struct health_state health_thread_kernel;
*/
static void *thread_manage_kernel(void *data)
{
- int ret, i, pollfd, update_poll_flag = 1;
+ int ret, i, pollfd, update_poll_flag = 1, err = -1;
uint32_t revents, nb_fd;
char tmp;
struct lttng_poll_event events;
/* Thread quit pipe has been closed. Killing thread. */
ret = check_thread_quit_pipe(pollfd, revents);
if (ret) {
- goto error;
+ err = 0;
+ goto exit;
}
/* Check for data on kernel pipe */
}
}
+exit:
error:
lttng_poll_clean(&events);
error_poll_create:
- health_reset(&health_thread_kernel);
+ if (err) {
+ health_error(&health_thread_kernel);
+ ERR("Health error occurred in %s", __func__);
+ }
+ health_exit(&health_thread_kernel);
DBG("Kernel thread dying");
return NULL;
}
*/
static void *thread_manage_consumer(void *data)
{
- int sock = -1, i, ret, pollfd;
+ int sock = -1, i, ret, pollfd, err = -1;
uint32_t revents, nb_fd;
enum lttcomm_return_code code;
struct lttng_poll_event events;
/* Thread quit pipe has been closed. Killing thread. */
ret = check_thread_quit_pipe(pollfd, revents);
if (ret) {
- goto error;
+ err = 0;
+ goto exit;
}
/* Event on the registration socket */
/* Thread quit pipe has been closed. Killing thread. */
ret = check_thread_quit_pipe(pollfd, revents);
if (ret) {
- goto error;
+ err = 0;
+ goto exit;
}
/* Event on the kconsumerd socket */
ERR("consumer return code : %s", lttcomm_get_readable_code(-code));
+exit:
error:
/* Immediately set the consumerd state to stopped */
if (consumer_data->type == LTTNG_CONSUMER_KERNEL) {
lttng_poll_clean(&events);
error_poll:
error_listen:
- health_reset(&consumer_data->health);
+ if (err) {
+ health_error(&consumer_data->health);
+ ERR("Health error occurred in %s", __func__);
+ }
+ health_exit(&consumer_data->health);
DBG("consumer thread cleanup completed");
return NULL;
*/
static void *thread_manage_apps(void *data)
{
- int i, ret, pollfd;
+ int i, ret, pollfd, err = -1;
uint32_t revents, nb_fd;
struct ust_command ust_cmd;
struct lttng_poll_event events;
rcu_register_thread();
rcu_thread_online();
- health_code_update(&health_thread_app_reg);
+ health_code_update(&health_thread_app_manage);
ret = create_thread_poll_set(&events, 2);
if (ret < 0) {
goto error;
}
- health_code_update(&health_thread_app_reg);
+ health_code_update(&health_thread_app_manage);
while (1) {
/* Zeroed the events structure */
/* Inifinite blocking call, waiting for transmission */
restart:
- health_poll_update(&health_thread_app_reg);
+ health_poll_update(&health_thread_app_manage);
ret = lttng_poll_wait(&events, -1);
- health_poll_update(&health_thread_app_reg);
+ health_poll_update(&health_thread_app_manage);
if (ret < 0) {
/*
* Restart interrupted system call.
revents = LTTNG_POLL_GETEV(&events, i);
pollfd = LTTNG_POLL_GETFD(&events, i);
- health_code_update(&health_thread_app_reg);
+ health_code_update(&health_thread_app_manage);
/* Thread quit pipe has been closed. Killing thread. */
ret = check_thread_quit_pipe(pollfd, revents);
if (ret) {
- goto error;
+ err = 0;
+ goto exit;
}
/* Inspect the apps cmd pipe */
goto error;
}
- health_code_update(&health_thread_app_reg);
+ health_code_update(&health_thread_app_manage);
/* Register applicaton to the session daemon */
ret = ust_app_register(&ust_cmd.reg_msg,
break;
}
- health_code_update(&health_thread_app_reg);
+ health_code_update(&health_thread_app_manage);
/*
* Validate UST version compatibility.
update_ust_app(ust_cmd.sock);
}
- health_code_update(&health_thread_app_reg);
+ health_code_update(&health_thread_app_manage);
ret = ust_app_register_done(ust_cmd.sock);
if (ret < 0) {
ust_cmd.sock);
}
- health_code_update(&health_thread_app_reg);
+ health_code_update(&health_thread_app_manage);
break;
}
}
}
- health_code_update(&health_thread_app_reg);
+ health_code_update(&health_thread_app_manage);
}
}
+exit:
error:
lttng_poll_clean(&events);
error_poll_create:
- health_reset(&health_thread_app_reg);
+ if (err) {
+ health_error(&health_thread_app_manage);
+ ERR("Health error occurred in %s", __func__);
+ }
+ health_exit(&health_thread_app_manage);
DBG("Application communication apps thread cleanup complete");
rcu_thread_offline();
rcu_unregister_thread();
*/
static void *thread_registration_apps(void *data)
{
- int sock = -1, i, ret, pollfd;
+ int sock = -1, i, ret, pollfd, err = -1;
uint32_t revents, nb_fd;
struct lttng_poll_event events;
/*
/* Inifinite blocking call, waiting for transmission */
restart:
+ health_poll_update(&health_thread_app_reg);
ret = lttng_poll_wait(&events, -1);
+ health_poll_update(&health_thread_app_reg);
if (ret < 0) {
/*
* Restart interrupted system call.
}
for (i = 0; i < nb_fd; i++) {
+ health_code_update(&health_thread_app_reg);
+
/* Fetch once the poll data */
revents = LTTNG_POLL_GETEV(&events, i);
pollfd = LTTNG_POLL_GETFD(&events, i);
/* Thread quit pipe has been closed. Killing thread. */
ret = check_thread_quit_pipe(pollfd, revents);
if (ret) {
- goto error;
+ err = 0;
+ goto exit;
}
/* Event on the registration socket */
sock = -1;
continue;
}
+ health_code_update(&health_thread_app_reg);
ret = lttcomm_recv_unix_sock(sock, &ust_cmd->reg_msg,
sizeof(struct ust_register_msg));
if (ret < 0 || ret < sizeof(struct ust_register_msg)) {
sock = -1;
continue;
}
+ health_code_update(&health_thread_app_reg);
ust_cmd->sock = sock;
sock = -1;
}
}
+exit:
error:
+ if (err) {
+ health_error(&health_thread_app_reg);
+ ERR("Health error occurred in %s", __func__);
+ }
+ health_exit(&health_thread_app_reg);
+
/* Notify that the registration thread is gone */
notify_ust_apps(0);
}
/*
- * Compute health status of each consumer.
+ * Compute health status of each consumer. If one of them is zero (bad
+ * state), we return 0.
*/
static int check_consumer_health(void)
{
int ret;
- ret =
- health_check_state(&kconsumer_data.health) &
- health_check_state(&ustconsumer32_data.health) &
+ ret = health_check_state(&kconsumer_data.health) &&
+ health_check_state(&ustconsumer32_data.health) &&
health_check_state(&ustconsumer64_data.health);
DBG3("Health consumer check %d", ret);
*/
static void *thread_manage_health(void *data)
{
- int sock = -1, new_sock, ret, i, pollfd;
+ int sock = -1, new_sock, ret, i, pollfd, err = -1;
uint32_t revents, nb_fd;
struct lttng_poll_event events;
struct lttcomm_health_msg msg;
/* Thread quit pipe has been closed. Killing thread. */
ret = check_thread_quit_pipe(pollfd, revents);
if (ret) {
- goto error;
+ err = 0;
+ goto exit;
}
/* Event on the registration socket */
case LTTNG_HEALTH_CMD:
reply.ret_code = health_check_state(&health_thread_cmd);
break;
+ case LTTNG_HEALTH_APP_MANAGE:
+ reply.ret_code = health_check_state(&health_thread_app_manage);
+ break;
case LTTNG_HEALTH_APP_REG:
reply.ret_code = health_check_state(&health_thread_app_reg);
break;
reply.ret_code = check_consumer_health();
break;
case LTTNG_HEALTH_ALL:
- ret = check_consumer_health();
-
reply.ret_code =
- health_check_state(&health_thread_app_reg) &
- health_check_state(&health_thread_cmd) &
- health_check_state(&health_thread_kernel) &
- ret;
+ health_check_state(&health_thread_app_manage) &&
+ health_check_state(&health_thread_app_reg) &&
+ health_check_state(&health_thread_cmd) &&
+ health_check_state(&health_thread_kernel) &&
+ check_consumer_health();
break;
default:
reply.ret_code = LTTCOMM_UND;
new_sock = -1;
}
+exit:
error:
+ if (err) {
+ ERR("Health error occurred in %s", __func__);
+ }
DBG("Health check thread dying");
unlink(health_unix_sock_path);
if (sock >= 0) {
*/
static void *thread_manage_clients(void *data)
{
- int sock = -1, ret, i, pollfd;
+ int sock = -1, ret, i, pollfd, err = -1;
int sock_error;
uint32_t revents, nb_fd;
struct command_ctx *cmd_ctx = NULL;
/* Thread quit pipe has been closed. Killing thread. */
ret = check_thread_quit_pipe(pollfd, revents);
if (ret) {
- goto error;
+ err = 0;
+ goto exit;
}
/* Event on the registration socket */
health_code_update(&health_thread_cmd);
}
+exit:
error:
- health_reset(&health_thread_cmd);
+ if (err) {
+ health_error(&health_thread_cmd);
+ ERR("Health error occurred in %s", __func__);
+ }
+ health_exit(&health_thread_cmd);
DBG("Client thread dying");
unlink(client_unix_sock_path);
/* Init all health thread counters. */
health_init(&health_thread_cmd);
health_init(&health_thread_kernel);
+ health_init(&health_thread_app_manage);
health_init(&health_thread_app_reg);
/*