Fix: report to client when snapshot will be empty
[lttng-tools.git] / src / bin / lttng-sessiond / main.c
index b688a4c09e45f25c3d62bc82b86cc73bcb23fe16..bdd48c24cb2ca6edbcc8ab7f2c6a898881f7fbb5 100644 (file)
@@ -690,9 +690,6 @@ static int update_kernel_stream(struct consumer_data *consumer_data, int fd)
                                        rcu_read_lock();
                                        cds_lfht_for_each_entry(ksess->consumer->socks->ht,
                                                        &iter.iter, socket, node.node) {
-                                               /* Code flow error */
-                                               assert(socket->fd);
-
                                                pthread_mutex_lock(socket->lock);
                                                ret = kernel_consumer_send_channel_stream(socket,
                                                                channel, ksess,
@@ -727,6 +724,12 @@ static void update_ust_app(int app_sock)
 {
        struct ltt_session *sess, *stmp;
 
+       /* Consumer is in an ERROR state. Stop any application update. */
+       if (uatomic_read(&ust_consumerd_state) == CONSUMER_ERROR) {
+               /* Stop the update process since the consumer is dead. */
+               return;
+       }
+
        /* For all tracing session(s) */
        cds_list_for_each_entry_safe(sess, stmp, &session_list_ptr->head, list) {
                session_lock(sess);
@@ -1018,13 +1021,14 @@ restart:
                        lttcomm_connect_unix_sock(consumer_data->cmd_unix_sock_path);
                consumer_data->metadata_fd =
                        lttcomm_connect_unix_sock(consumer_data->cmd_unix_sock_path);
-               if (consumer_data->cmd_sock < 0 || consumer_data->metadata_fd < 0) {
+               if (consumer_data->cmd_sock < 0
+                               || consumer_data->metadata_fd < 0) {
                        PERROR("consumer connect cmd socket");
                        /* On error, signal condition and quit. */
                        signal_consumer_condition(consumer_data, -1);
                        goto error;
                }
-               consumer_data->metadata_sock.fd = &consumer_data->metadata_fd;
+               consumer_data->metadata_sock.fd_ptr = &consumer_data->metadata_fd;
                /* Create metadata socket lock. */
                consumer_data->metadata_sock.lock = zmalloc(sizeof(pthread_mutex_t));
                if (consumer_data->metadata_sock.lock == NULL) {
@@ -1135,6 +1139,13 @@ restart_poll:
 
 exit:
 error:
+       /*
+        * We lock here because we are about to close the sockets and some other
+        * thread might be using them so get exclusive access which will abort all
+        * other consumer command by other threads.
+        */
+       pthread_mutex_lock(&consumer_data->lock);
+
        /* Immediately set the consumerd state to stopped */
        if (consumer_data->type == LTTNG_CONSUMER_KERNEL) {
                uatomic_set(&kernel_consumerd_state, CONSUMER_ERROR);
@@ -1160,15 +1171,12 @@ error:
                }
                consumer_data->cmd_sock = -1;
        }
-       if (*consumer_data->metadata_sock.fd >= 0) {
-               ret = close(*consumer_data->metadata_sock.fd);
+       if (*consumer_data->metadata_sock.fd_ptr >= 0) {
+               ret = close(*consumer_data->metadata_sock.fd_ptr);
                if (ret) {
                        PERROR("close");
                }
        }
-       /* Cleanup metadata socket mutex. */
-       pthread_mutex_destroy(consumer_data->metadata_sock.lock);
-       free(consumer_data->metadata_sock.lock);
 
        if (sock >= 0) {
                ret = close(sock);
@@ -1180,6 +1188,11 @@ error:
        unlink(consumer_data->err_unix_sock_path);
        unlink(consumer_data->cmd_unix_sock_path);
        consumer_data->pid = 0;
+       pthread_mutex_unlock(&consumer_data->lock);
+
+       /* Cleanup metadata socket mutex. */
+       pthread_mutex_destroy(consumer_data->metadata_sock.lock);
+       free(consumer_data->metadata_sock.lock);
 
        lttng_poll_clean(&events);
 error_poll:
@@ -1295,23 +1308,7 @@ static void *thread_manage_apps(void *data)
                                                goto error;
                                        }
 
-                                       /*
-                                        * Set socket timeout for both receiving and ending.
-                                        * app_socket_timeout is in seconds, whereas
-                                        * lttcomm_setsockopt_rcv_timeout and
-                                        * lttcomm_setsockopt_snd_timeout expect msec as
-                                        * parameter.
-                                        */
-                                       (void) lttcomm_setsockopt_rcv_timeout(sock,
-                                                       app_socket_timeout * 1000);
-                                       (void) lttcomm_setsockopt_snd_timeout(sock,
-                                                       app_socket_timeout * 1000);
-
                                        DBG("Apps with sock %d added to poll set", sock);
-
-                                       health_code_update();
-
-                                       break;
                                }
                        } else {
                                /*
@@ -1327,7 +1324,6 @@ static void *thread_manage_apps(void *data)
 
                                        /* Socket closed on remote end. */
                                        ust_app_unregister(pollfd);
-                                       break;
                                }
                        }
 
@@ -1364,6 +1360,9 @@ error_testpoint:
  * Send a socket to a thread This is called from the dispatch UST registration
  * thread once all sockets are set for the application.
  *
+ * The sock value can be invalid, we don't really care, the thread will handle
+ * it and make the necessary cleanup if so.
+ *
  * On success, return 0 else a negative value being the errno message of the
  * write().
  */
@@ -1371,9 +1370,14 @@ static int send_socket_to_thread(int fd, int sock)
 {
        int ret;
 
-       /* Sockets MUST be set or else this should not have been called. */
-       assert(fd >= 0);
-       assert(sock >= 0);
+       /*
+        * It's possible that the FD is set as invalid with -1 concurrently just
+        * before calling this function being a shutdown state of the thread.
+        */
+       if (fd < 0) {
+               ret = -EBADF;
+               goto error;
+       }
 
        do {
                ret = write(fd, &sock, sizeof(sock));
@@ -1542,7 +1546,7 @@ static void *thread_dispatch_ust_registration(void *data)
                                        if (ret < 0) {
                                                PERROR("close ust sock dispatch %d", ust_cmd->sock);
                                        }
-                                       lttng_fd_put(1, LTTNG_FD_APPS);
+                                       lttng_fd_put(LTTNG_FD_APPS, 1);
                                        free(ust_cmd);
                                        goto error;
                                }
@@ -1556,7 +1560,7 @@ static void *thread_dispatch_ust_registration(void *data)
                                        if (ret < 0) {
                                                PERROR("close ust sock dispatch %d", ust_cmd->sock);
                                        }
-                                       lttng_fd_put(1, LTTNG_FD_APPS);
+                                       lttng_fd_put(LTTNG_FD_APPS, 1);
                                        free(wait_node);
                                        free(ust_cmd);
                                        continue;
@@ -1604,7 +1608,7 @@ static void *thread_dispatch_ust_registration(void *data)
                                        if (ret < 0) {
                                                PERROR("close ust sock dispatch %d", ust_cmd->sock);
                                        }
-                                       lttng_fd_put(1, LTTNG_FD_APPS);
+                                       lttng_fd_put(LTTNG_FD_APPS, 1);
                                }
                                free(ust_cmd);
                        }
@@ -1636,7 +1640,12 @@ static void *thread_dispatch_ust_registration(void *data)
                                if (ret < 0) {
                                        rcu_read_unlock();
                                        session_unlock_list();
-                                       /* No notify thread, stop the UST tracing. */
+                                       /*
+                                        * No notify thread, stop the UST tracing. However, this is
+                                        * not an internal error of the this thread thus setting
+                                        * the health error code to a normal exit.
+                                        */
+                                       err = 0;
                                        goto error;
                                }
 
@@ -1661,7 +1670,12 @@ static void *thread_dispatch_ust_registration(void *data)
                                if (ret < 0) {
                                        rcu_read_unlock();
                                        session_unlock_list();
-                                       /* No apps. thread, stop the UST tracing. */
+                                       /*
+                                        * No apps. thread, stop the UST tracing. However, this is
+                                        * not an internal error of the this thread thus setting
+                                        * the health error code to a normal exit.
+                                        */
+                                       err = 0;
                                        goto error;
                                }
 
@@ -1791,6 +1805,18 @@ static void *thread_registration_apps(void *data)
                                                goto error;
                                        }
 
+                                       /*
+                                        * Set socket timeout for both receiving and ending.
+                                        * app_socket_timeout is in seconds, whereas
+                                        * lttcomm_setsockopt_rcv_timeout and
+                                        * lttcomm_setsockopt_snd_timeout expect msec as
+                                        * parameter.
+                                        */
+                                       (void) lttcomm_setsockopt_rcv_timeout(sock,
+                                                       app_socket_timeout * 1000);
+                                       (void) lttcomm_setsockopt_snd_timeout(sock,
+                                                       app_socket_timeout * 1000);
+
                                        /*
                                         * Set the CLOEXEC flag. Return code is useless because
                                         * either way, the show must go on.
@@ -3846,7 +3872,7 @@ static void usage(void)
        fprintf(stderr, "  -d, --daemonize                    Start as a daemon.\n");
        fprintf(stderr, "  -g, --group NAME                   Specify the tracing group name. (default: tracing)\n");
        fprintf(stderr, "  -V, --version                      Show version number.\n");
-       fprintf(stderr, "  -S, --sig-parent                   Send SIGCHLD to parent pid to notify readiness.\n");
+       fprintf(stderr, "  -S, --sig-parent                   Send SIGUSR1 to parent pid to notify readiness.\n");
        fprintf(stderr, "  -q, --quiet                        No output at all.\n");
        fprintf(stderr, "  -v, --verbose                      Verbose mode. Activate DBG() macro.\n");
        fprintf(stderr, "  -p, --pidfile FILE                 Write a pid to FILE name overriding the default value.\n");
@@ -4639,6 +4665,14 @@ int main(int argc, char **argv)
 
        /* Initialize communication library */
        lttcomm_init();
+       /* This is to get the TCP timeout value. */
+       lttcomm_inet_init();
+
+       /*
+        * Initialize the health check subsystem. This call should set the
+        * appropriate time values.
+        */
+       health_init();
 
        /* Create thread to manage the client socket */
        ret = pthread_create(&ht_cleanup_thread, NULL,
This page took 0.028071 seconds and 4 git commands to generate.