Fix: remove break in epoll loop of apps. thread
[lttng-tools.git] / src / bin / lttng-sessiond / main.c
index e05d72bacb0b9f0ec3278c9c427b6295d7af8347..bdd48c24cb2ca6edbcc8ab7f2c6a898881f7fbb5 100644 (file)
@@ -90,7 +90,6 @@ static struct consumer_data kconsumer_data = {
        .cmd_unix_sock_path = DEFAULT_KCONSUMERD_CMD_SOCK_PATH,
        .err_sock = -1,
        .cmd_sock = -1,
-       .metadata_sock.fd = -1,
        .pid_mutex = PTHREAD_MUTEX_INITIALIZER,
        .lock = PTHREAD_MUTEX_INITIALIZER,
        .cond = PTHREAD_COND_INITIALIZER,
@@ -102,7 +101,6 @@ static struct consumer_data ustconsumer64_data = {
        .cmd_unix_sock_path = DEFAULT_USTCONSUMERD64_CMD_SOCK_PATH,
        .err_sock = -1,
        .cmd_sock = -1,
-       .metadata_sock.fd = -1,
        .pid_mutex = PTHREAD_MUTEX_INITIALIZER,
        .lock = PTHREAD_MUTEX_INITIALIZER,
        .cond = PTHREAD_COND_INITIALIZER,
@@ -114,7 +112,6 @@ static struct consumer_data ustconsumer32_data = {
        .cmd_unix_sock_path = DEFAULT_USTCONSUMERD32_CMD_SOCK_PATH,
        .err_sock = -1,
        .cmd_sock = -1,
-       .metadata_sock.fd = -1,
        .pid_mutex = PTHREAD_MUTEX_INITIALIZER,
        .lock = PTHREAD_MUTEX_INITIALIZER,
        .cond = PTHREAD_COND_INITIALIZER,
@@ -693,9 +690,6 @@ static int update_kernel_stream(struct consumer_data *consumer_data, int fd)
                                        rcu_read_lock();
                                        cds_lfht_for_each_entry(ksess->consumer->socks->ht,
                                                        &iter.iter, socket, node.node) {
-                                               /* Code flow error */
-                                               assert(socket->fd >= 0);
-
                                                pthread_mutex_lock(socket->lock);
                                                ret = kernel_consumer_send_channel_stream(socket,
                                                                channel, ksess,
@@ -730,6 +724,12 @@ static void update_ust_app(int app_sock)
 {
        struct ltt_session *sess, *stmp;
 
+       /* Consumer is in an ERROR state. Stop any application update. */
+       if (uatomic_read(&ust_consumerd_state) == CONSUMER_ERROR) {
+               /* Stop the update process since the consumer is dead. */
+               return;
+       }
+
        /* For all tracing session(s) */
        cds_list_for_each_entry_safe(sess, stmp, &session_list_ptr->head, list) {
                session_lock(sess);
@@ -1019,15 +1019,16 @@ restart:
                /* Connect both socket, command and metadata. */
                consumer_data->cmd_sock =
                        lttcomm_connect_unix_sock(consumer_data->cmd_unix_sock_path);
-               consumer_data->metadata_sock.fd =
+               consumer_data->metadata_fd =
                        lttcomm_connect_unix_sock(consumer_data->cmd_unix_sock_path);
-               if (consumer_data->cmd_sock < 0 ||
-                               consumer_data->metadata_sock.fd < 0) {
+               if (consumer_data->cmd_sock < 0
+                               || consumer_data->metadata_fd < 0) {
                        PERROR("consumer connect cmd socket");
                        /* On error, signal condition and quit. */
                        signal_consumer_condition(consumer_data, -1);
                        goto error;
                }
+               consumer_data->metadata_sock.fd_ptr = &consumer_data->metadata_fd;
                /* Create metadata socket lock. */
                consumer_data->metadata_sock.lock = zmalloc(sizeof(pthread_mutex_t));
                if (consumer_data->metadata_sock.lock == NULL) {
@@ -1040,7 +1041,7 @@ restart:
                signal_consumer_condition(consumer_data, 1);
                DBG("Consumer command socket ready (fd: %d", consumer_data->cmd_sock);
                DBG("Consumer metadata socket ready (fd: %d)",
-                               consumer_data->metadata_sock.fd);
+                               consumer_data->metadata_fd);
        } else {
                ERR("consumer error when waiting for SOCK_READY : %s",
                                lttcomm_get_readable_code(-code));
@@ -1060,7 +1061,7 @@ restart:
        }
 
        /* Add metadata socket that is successfully connected. */
-       ret = lttng_poll_add(&events, consumer_data->metadata_sock.fd,
+       ret = lttng_poll_add(&events, consumer_data->metadata_fd,
                        LPOLLIN | LPOLLRDHUP);
        if (ret < 0) {
                goto error;
@@ -1119,7 +1120,7 @@ restart_poll:
                                                lttcomm_get_readable_code(-code));
 
                                goto exit;
-                       } else if (pollfd == consumer_data->metadata_sock.fd) {
+                       } else if (pollfd == consumer_data->metadata_fd) {
                                /* UST metadata requests */
                                ret = ust_consumer_metadata_request(
                                                &consumer_data->metadata_sock);
@@ -1138,6 +1139,13 @@ restart_poll:
 
 exit:
 error:
+       /*
+        * We lock here because we are about to close the sockets and some other
+        * thread might be using them so get exclusive access which will abort all
+        * other consumer command by other threads.
+        */
+       pthread_mutex_lock(&consumer_data->lock);
+
        /* Immediately set the consumerd state to stopped */
        if (consumer_data->type == LTTNG_CONSUMER_KERNEL) {
                uatomic_set(&kernel_consumerd_state, CONSUMER_ERROR);
@@ -1163,15 +1171,12 @@ error:
                }
                consumer_data->cmd_sock = -1;
        }
-       if (consumer_data->metadata_sock.fd >= 0) {
-               ret = close(consumer_data->metadata_sock.fd);
+       if (*consumer_data->metadata_sock.fd_ptr >= 0) {
+               ret = close(*consumer_data->metadata_sock.fd_ptr);
                if (ret) {
                        PERROR("close");
                }
        }
-       /* Cleanup metadata socket mutex. */
-       pthread_mutex_destroy(consumer_data->metadata_sock.lock);
-       free(consumer_data->metadata_sock.lock);
 
        if (sock >= 0) {
                ret = close(sock);
@@ -1183,6 +1188,11 @@ error:
        unlink(consumer_data->err_unix_sock_path);
        unlink(consumer_data->cmd_unix_sock_path);
        consumer_data->pid = 0;
+       pthread_mutex_unlock(&consumer_data->lock);
+
+       /* Cleanup metadata socket mutex. */
+       pthread_mutex_destroy(consumer_data->metadata_sock.lock);
+       free(consumer_data->metadata_sock.lock);
 
        lttng_poll_clean(&events);
 error_poll:
@@ -1298,17 +1308,7 @@ static void *thread_manage_apps(void *data)
                                                goto error;
                                        }
 
-                                       /* Set socket timeout for both receiving and ending */
-                                       (void) lttcomm_setsockopt_rcv_timeout(sock,
-                                                       app_socket_timeout);
-                                       (void) lttcomm_setsockopt_snd_timeout(sock,
-                                                       app_socket_timeout);
-
                                        DBG("Apps with sock %d added to poll set", sock);
-
-                                       health_code_update();
-
-                                       break;
                                }
                        } else {
                                /*
@@ -1324,7 +1324,6 @@ static void *thread_manage_apps(void *data)
 
                                        /* Socket closed on remote end. */
                                        ust_app_unregister(pollfd);
-                                       break;
                                }
                        }
 
@@ -1361,6 +1360,9 @@ error_testpoint:
  * Send a socket to a thread This is called from the dispatch UST registration
  * thread once all sockets are set for the application.
  *
+ * The sock value can be invalid, we don't really care, the thread will handle
+ * it and make the necessary cleanup if so.
+ *
  * On success, return 0 else a negative value being the errno message of the
  * write().
  */
@@ -1368,9 +1370,14 @@ static int send_socket_to_thread(int fd, int sock)
 {
        int ret;
 
-       /* Sockets MUST be set or else this should not have been called. */
-       assert(fd >= 0);
-       assert(sock >= 0);
+       /*
+        * It's possible that the FD is set as invalid with -1 concurrently just
+        * before calling this function being a shutdown state of the thread.
+        */
+       if (fd < 0) {
+               ret = -EBADF;
+               goto error;
+       }
 
        do {
                ret = write(fd, &sock, sizeof(sock));
@@ -1539,7 +1546,7 @@ static void *thread_dispatch_ust_registration(void *data)
                                        if (ret < 0) {
                                                PERROR("close ust sock dispatch %d", ust_cmd->sock);
                                        }
-                                       lttng_fd_put(1, LTTNG_FD_APPS);
+                                       lttng_fd_put(LTTNG_FD_APPS, 1);
                                        free(ust_cmd);
                                        goto error;
                                }
@@ -1553,7 +1560,7 @@ static void *thread_dispatch_ust_registration(void *data)
                                        if (ret < 0) {
                                                PERROR("close ust sock dispatch %d", ust_cmd->sock);
                                        }
-                                       lttng_fd_put(1, LTTNG_FD_APPS);
+                                       lttng_fd_put(LTTNG_FD_APPS, 1);
                                        free(wait_node);
                                        free(ust_cmd);
                                        continue;
@@ -1601,7 +1608,7 @@ static void *thread_dispatch_ust_registration(void *data)
                                        if (ret < 0) {
                                                PERROR("close ust sock dispatch %d", ust_cmd->sock);
                                        }
-                                       lttng_fd_put(1, LTTNG_FD_APPS);
+                                       lttng_fd_put(LTTNG_FD_APPS, 1);
                                }
                                free(ust_cmd);
                        }
@@ -1633,7 +1640,12 @@ static void *thread_dispatch_ust_registration(void *data)
                                if (ret < 0) {
                                        rcu_read_unlock();
                                        session_unlock_list();
-                                       /* No notify thread, stop the UST tracing. */
+                                       /*
+                                        * No notify thread, stop the UST tracing. However, this is
+                                        * not an internal error of the this thread thus setting
+                                        * the health error code to a normal exit.
+                                        */
+                                       err = 0;
                                        goto error;
                                }
 
@@ -1658,7 +1670,12 @@ static void *thread_dispatch_ust_registration(void *data)
                                if (ret < 0) {
                                        rcu_read_unlock();
                                        session_unlock_list();
-                                       /* No apps. thread, stop the UST tracing. */
+                                       /*
+                                        * No apps. thread, stop the UST tracing. However, this is
+                                        * not an internal error of the this thread thus setting
+                                        * the health error code to a normal exit.
+                                        */
+                                       err = 0;
                                        goto error;
                                }
 
@@ -1788,6 +1805,18 @@ static void *thread_registration_apps(void *data)
                                                goto error;
                                        }
 
+                                       /*
+                                        * Set socket timeout for both receiving and ending.
+                                        * app_socket_timeout is in seconds, whereas
+                                        * lttcomm_setsockopt_rcv_timeout and
+                                        * lttcomm_setsockopt_snd_timeout expect msec as
+                                        * parameter.
+                                        */
+                                       (void) lttcomm_setsockopt_rcv_timeout(sock,
+                                                       app_socket_timeout * 1000);
+                                       (void) lttcomm_setsockopt_snd_timeout(sock,
+                                                       app_socket_timeout * 1000);
+
                                        /*
                                         * Set the CLOEXEC flag. Return code is useless because
                                         * either way, the show must go on.
@@ -2704,6 +2733,10 @@ static int process_client_msg(struct command_ctx *cmd_ctx, int sock,
                break;
        case LTTNG_DOMAIN_UST:
        {
+               if (!ust_app_supported()) {
+                       ret = LTTNG_ERR_NO_UST;
+                       goto error;
+               }
                /* Consumer is in an ERROR state. Report back to client */
                if (uatomic_read(&ust_consumerd_state) == CONSUMER_ERROR) {
                        ret = LTTNG_ERR_NO_USTCONSUMERD;
@@ -3839,7 +3872,7 @@ static void usage(void)
        fprintf(stderr, "  -d, --daemonize                    Start as a daemon.\n");
        fprintf(stderr, "  -g, --group NAME                   Specify the tracing group name. (default: tracing)\n");
        fprintf(stderr, "  -V, --version                      Show version number.\n");
-       fprintf(stderr, "  -S, --sig-parent                   Send SIGCHLD to parent pid to notify readiness.\n");
+       fprintf(stderr, "  -S, --sig-parent                   Send SIGUSR1 to parent pid to notify readiness.\n");
        fprintf(stderr, "  -q, --quiet                        No output at all.\n");
        fprintf(stderr, "  -v, --verbose                      Verbose mode. Activate DBG() macro.\n");
        fprintf(stderr, "  -p, --pidfile FILE                 Write a pid to FILE name overriding the default value.\n");
@@ -4069,12 +4102,12 @@ static int set_permissions(char *rundir)
        ret = allowed_group();
        if (ret < 0) {
                WARN("No tracing group detected");
-               ret = 0;
-               goto end;
+               /* Setting gid to 0 if no tracing group is found */
+               gid = 0;
+       } else {
+               gid = ret;
        }
 
-       gid = ret;
-
        /* Set lttng run dir */
        ret = chown(rundir, 0, gid);
        if (ret < 0) {
@@ -4082,7 +4115,7 @@ static int set_permissions(char *rundir)
                PERROR("chown");
        }
 
-       /* Ensure tracing group can search the run dir */
+       /* Ensure all applications and tracing group can search the run dir */
        ret = chmod(rundir, S_IRWXU | S_IXGRP | S_IXOTH);
        if (ret < 0) {
                ERR("Unable to set permissions on %s", rundir);
@@ -4119,7 +4152,6 @@ static int set_permissions(char *rundir)
 
        DBG("All permissions are set");
 
-end:
        return ret;
 }
 
@@ -4633,6 +4665,14 @@ int main(int argc, char **argv)
 
        /* Initialize communication library */
        lttcomm_init();
+       /* This is to get the TCP timeout value. */
+       lttcomm_inet_init();
+
+       /*
+        * Initialize the health check subsystem. This call should set the
+        * appropriate time values.
+        */
+       health_init();
 
        /* Create thread to manage the client socket */
        ret = pthread_create(&ht_cleanup_thread, NULL,
This page took 0.029087 seconds and 4 git commands to generate.