2 * Copyright (C) 2011 - David Goulet <david.goulet@polymtl.ca>
3 * Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
4 * 2013 - Jérémie Galarneau <jeremie.galarneau@efficios.com>
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License, version 2 only,
8 * as published by the Free Software Foundation.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License along
16 * with this program; if not, write to the Free Software Foundation, Inc.,
17 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
32 #include <sys/mount.h>
33 #include <sys/resource.h>
34 #include <sys/socket.h>
36 #include <sys/types.h>
38 #include <urcu/uatomic.h>
42 #include <common/common.h>
43 #include <common/compat/socket.h>
44 #include <common/compat/getenv.h>
45 #include <common/defaults.h>
46 #include <common/kernel-consumer/kernel-consumer.h>
47 #include <common/futex.h>
48 #include <common/relayd/relayd.h>
49 #include <common/utils.h>
50 #include <common/daemonize.h>
51 #include <common/config/session-config.h>
52 #include <common/dynamic-buffer.h>
53 #include <lttng/event-internal.h>
55 #include "lttng-sessiond.h"
56 #include "buffer-registry.h"
63 #include "kernel-consumer.h"
67 #include "ust-consumer.h"
70 #include "health-sessiond.h"
71 #include "testpoint.h"
72 #include "ust-thread.h"
73 #include "agent-thread.h"
75 #include "load-session-thread.h"
76 #include "notification-thread.h"
77 #include "notification-thread-commands.h"
78 #include "rotation-thread.h"
79 #include "lttng-syscall.h"
81 #include "ht-cleanup.h"
82 #include "sessiond-config.h"
87 static const char *help_msg
=
88 #ifdef LTTNG_EMBED_HELP
89 #include <lttng-sessiond.8.h>
96 static int lockfile_fd
= -1;
98 /* Set to 1 when a SIGUSR1 signal is received. */
99 static int recv_child_signal
;
101 /* Command line options */
102 static const struct option long_options
[] = {
103 { "client-sock", required_argument
, 0, 'c' },
104 { "apps-sock", required_argument
, 0, 'a' },
105 { "kconsumerd-cmd-sock", required_argument
, 0, '\0' },
106 { "kconsumerd-err-sock", required_argument
, 0, '\0' },
107 { "ustconsumerd32-cmd-sock", required_argument
, 0, '\0' },
108 { "ustconsumerd32-err-sock", required_argument
, 0, '\0' },
109 { "ustconsumerd64-cmd-sock", required_argument
, 0, '\0' },
110 { "ustconsumerd64-err-sock", required_argument
, 0, '\0' },
111 { "consumerd32-path", required_argument
, 0, '\0' },
112 { "consumerd32-libdir", required_argument
, 0, '\0' },
113 { "consumerd64-path", required_argument
, 0, '\0' },
114 { "consumerd64-libdir", required_argument
, 0, '\0' },
115 { "daemonize", no_argument
, 0, 'd' },
116 { "background", no_argument
, 0, 'b' },
117 { "sig-parent", no_argument
, 0, 'S' },
118 { "help", no_argument
, 0, 'h' },
119 { "group", required_argument
, 0, 'g' },
120 { "version", no_argument
, 0, 'V' },
121 { "quiet", no_argument
, 0, 'q' },
122 { "verbose", no_argument
, 0, 'v' },
123 { "verbose-consumer", no_argument
, 0, '\0' },
124 { "no-kernel", no_argument
, 0, '\0' },
125 { "pidfile", required_argument
, 0, 'p' },
126 { "agent-tcp-port", required_argument
, 0, '\0' },
127 { "config", required_argument
, 0, 'f' },
128 { "load", required_argument
, 0, 'l' },
129 { "kmod-probes", required_argument
, 0, '\0' },
130 { "extra-kmod-probes", required_argument
, 0, '\0' },
134 /* Command line options to ignore from configuration file */
135 static const char *config_ignore_options
[] = { "help", "version", "config" };
137 /* Shared between threads */
138 static int dispatch_thread_exit
;
140 static int apps_sock
= -1;
143 * This pipe is used to inform the thread managing application communication
144 * that a command is queued and ready to be processed.
146 static int apps_cmd_pipe
[2] = { -1, -1 };
148 /* Pthread, Mutexes and Semaphores */
149 static pthread_t apps_thread
;
150 static pthread_t apps_notify_thread
;
151 static pthread_t reg_apps_thread
;
152 static pthread_t kernel_thread
;
153 static pthread_t dispatch_thread
;
154 static pthread_t agent_reg_thread
;
155 static pthread_t load_session_thread
;
158 * UST registration command queue. This queue is tied with a futex and uses a N
159 * wakers / 1 waiter implemented and detailed in futex.c/.h
161 * The thread_registration_apps and thread_dispatch_ust_registration uses this
162 * queue along with the wait/wake scheme. The thread_manage_apps receives down
163 * the line new application socket and monitors it for any I/O error or clean
164 * close that triggers an unregistration of the application.
166 static struct ust_cmd_queue ust_cmd_queue
;
168 static const char *module_proc_lttng
= "/proc/lttng";
170 /* Load session thread information to operate. */
171 static struct load_session_thread_data
*load_info
;
174 * Section name to look for in the daemon configuration file.
176 static const char * const config_section_name
= "sessiond";
178 /* Am I root or not. Set to 1 if the daemon is running as root */
182 * Stop all threads by closing the thread quit pipe.
184 static void stop_threads(void)
188 /* Stopping all threads */
189 DBG("Terminating all threads");
190 ret
= sessiond_notify_quit_pipe();
192 ERR("write error on thread quit pipe");
195 /* Dispatch thread */
196 CMM_STORE_SHARED(dispatch_thread_exit
, 1);
197 futex_nto1_wake(&ust_cmd_queue
.futex
);
201 * Close every consumer sockets.
203 static void close_consumer_sockets(void)
207 if (kconsumer_data
.err_sock
>= 0) {
208 ret
= close(kconsumer_data
.err_sock
);
210 PERROR("kernel consumer err_sock close");
213 if (ustconsumer32_data
.err_sock
>= 0) {
214 ret
= close(ustconsumer32_data
.err_sock
);
216 PERROR("UST consumerd32 err_sock close");
219 if (ustconsumer64_data
.err_sock
>= 0) {
220 ret
= close(ustconsumer64_data
.err_sock
);
222 PERROR("UST consumerd64 err_sock close");
225 if (kconsumer_data
.cmd_sock
>= 0) {
226 ret
= close(kconsumer_data
.cmd_sock
);
228 PERROR("kernel consumer cmd_sock close");
231 if (ustconsumer32_data
.cmd_sock
>= 0) {
232 ret
= close(ustconsumer32_data
.cmd_sock
);
234 PERROR("UST consumerd32 cmd_sock close");
237 if (ustconsumer64_data
.cmd_sock
>= 0) {
238 ret
= close(ustconsumer64_data
.cmd_sock
);
240 PERROR("UST consumerd64 cmd_sock close");
243 if (kconsumer_data
.channel_monitor_pipe
>= 0) {
244 ret
= close(kconsumer_data
.channel_monitor_pipe
);
246 PERROR("kernel consumer channel monitor pipe close");
249 if (ustconsumer32_data
.channel_monitor_pipe
>= 0) {
250 ret
= close(ustconsumer32_data
.channel_monitor_pipe
);
252 PERROR("UST consumerd32 channel monitor pipe close");
255 if (ustconsumer64_data
.channel_monitor_pipe
>= 0) {
256 ret
= close(ustconsumer64_data
.channel_monitor_pipe
);
258 PERROR("UST consumerd64 channel monitor pipe close");
264 * Wait on consumer process termination.
266 * Need to be called with the consumer data lock held or from a context
267 * ensuring no concurrent access to data (e.g: cleanup).
269 static void wait_consumer(struct consumer_data
*consumer_data
)
274 if (consumer_data
->pid
<= 0) {
278 DBG("Waiting for complete teardown of consumerd (PID: %d)",
280 ret
= waitpid(consumer_data
->pid
, &status
, 0);
282 PERROR("consumerd waitpid pid: %d", consumer_data
->pid
)
283 } else if (!WIFEXITED(status
)) {
284 ERR("consumerd termination with error: %d",
287 consumer_data
->pid
= 0;
291 * Cleanup the session daemon's data structures.
293 static void sessiond_cleanup(void)
296 struct ltt_session_list
*session_list
= session_get_list();
298 DBG("Cleanup sessiond");
301 * Close the thread quit pipe. It has already done its job,
302 * since we are now called.
304 sessiond_close_quit_pipe();
306 ret
= remove(config
.pid_file_path
.value
);
308 PERROR("remove pidfile %s", config
.pid_file_path
.value
);
311 DBG("Removing sessiond and consumerd content of directory %s",
312 config
.rundir
.value
);
315 DBG("Removing %s", config
.pid_file_path
.value
);
316 (void) unlink(config
.pid_file_path
.value
);
318 DBG("Removing %s", config
.agent_port_file_path
.value
);
319 (void) unlink(config
.agent_port_file_path
.value
);
322 DBG("Removing %s", kconsumer_data
.err_unix_sock_path
);
323 (void) unlink(kconsumer_data
.err_unix_sock_path
);
325 DBG("Removing directory %s", config
.kconsumerd_path
.value
);
326 (void) rmdir(config
.kconsumerd_path
.value
);
328 /* ust consumerd 32 */
329 DBG("Removing %s", config
.consumerd32_err_unix_sock_path
.value
);
330 (void) unlink(config
.consumerd32_err_unix_sock_path
.value
);
332 DBG("Removing directory %s", config
.consumerd32_path
.value
);
333 (void) rmdir(config
.consumerd32_path
.value
);
335 /* ust consumerd 64 */
336 DBG("Removing %s", config
.consumerd64_err_unix_sock_path
.value
);
337 (void) unlink(config
.consumerd64_err_unix_sock_path
.value
);
339 DBG("Removing directory %s", config
.consumerd64_path
.value
);
340 (void) rmdir(config
.consumerd64_path
.value
);
342 pthread_mutex_destroy(&session_list
->lock
);
344 wait_consumer(&kconsumer_data
);
345 wait_consumer(&ustconsumer64_data
);
346 wait_consumer(&ustconsumer32_data
);
348 DBG("Cleaning up all agent apps");
349 agent_app_ht_clean();
351 DBG("Closing all UST sockets");
352 ust_app_clean_list();
353 buffer_reg_destroy_registries();
355 if (is_root
&& !config
.no_kernel
) {
356 DBG2("Closing kernel fd");
357 if (kernel_tracer_fd
>= 0) {
358 ret
= close(kernel_tracer_fd
);
363 DBG("Unloading kernel modules");
364 modprobe_remove_lttng_all();
368 close_consumer_sockets();
371 load_session_destroy_data(load_info
);
376 * We do NOT rmdir rundir because there are other processes
377 * using it, for instance lttng-relayd, which can start in
378 * parallel with this teardown.
383 * Cleanup the daemon's option data structures.
385 static void sessiond_cleanup_options(void)
387 DBG("Cleaning up options");
389 sessiond_config_fini(&config
);
391 run_as_destroy_worker();
395 * Notify UST applications using the shm mmap futex.
397 static int notify_ust_apps(int active
)
401 DBG("Notifying applications of session daemon state: %d", active
);
403 /* See shm.c for this call implying mmap, shm and futex calls */
404 wait_shm_mmap
= shm_ust_get_mmap(config
.wait_shm_path
.value
, is_root
);
405 if (wait_shm_mmap
== NULL
) {
409 /* Wake waiting process */
410 futex_wait_update((int32_t *) wait_shm_mmap
, active
);
412 /* Apps notified successfully */
420 * Update the kernel poll set of all channel fd available over all tracing
421 * session. Add the wakeup pipe at the end of the set.
423 static int update_kernel_poll(struct lttng_poll_event
*events
)
426 struct ltt_kernel_channel
*channel
;
427 struct ltt_session
*session
;
428 const struct ltt_session_list
*session_list
= session_get_list();
430 DBG("Updating kernel poll set");
433 cds_list_for_each_entry(session
, &session_list
->head
, list
) {
434 if (!session_get(session
)) {
437 session_lock(session
);
438 if (session
->kernel_session
== NULL
) {
439 session_unlock(session
);
440 session_put(session
);
444 cds_list_for_each_entry(channel
,
445 &session
->kernel_session
->channel_list
.head
, list
) {
446 /* Add channel fd to the kernel poll set */
447 ret
= lttng_poll_add(events
, channel
->fd
, LPOLLIN
| LPOLLRDNORM
);
449 session_unlock(session
);
450 session_put(session
);
453 DBG("Channel fd %d added to kernel set", channel
->fd
);
455 session_unlock(session
);
457 session_unlock_list();
462 session_unlock_list();
467 * Find the channel fd from 'fd' over all tracing session. When found, check
468 * for new channel stream and send those stream fds to the kernel consumer.
470 * Useful for CPU hotplug feature.
472 static int update_kernel_stream(int fd
)
475 struct ltt_session
*session
;
476 struct ltt_kernel_session
*ksess
;
477 struct ltt_kernel_channel
*channel
;
478 const struct ltt_session_list
*session_list
= session_get_list();
480 DBG("Updating kernel streams for channel fd %d", fd
);
483 cds_list_for_each_entry(session
, &session_list
->head
, list
) {
484 if (!session_get(session
)) {
487 session_lock(session
);
488 if (session
->kernel_session
== NULL
) {
489 session_unlock(session
);
490 session_put(session
);
493 ksess
= session
->kernel_session
;
495 cds_list_for_each_entry(channel
,
496 &ksess
->channel_list
.head
, list
) {
497 struct lttng_ht_iter iter
;
498 struct consumer_socket
*socket
;
500 if (channel
->fd
!= fd
) {
503 DBG("Channel found, updating kernel streams");
504 ret
= kernel_open_channel_stream(channel
);
508 /* Update the stream global counter */
509 ksess
->stream_count_global
+= ret
;
512 * Have we already sent fds to the consumer? If yes, it
513 * means that tracing is started so it is safe to send
514 * our updated stream fds.
516 if (ksess
->consumer_fds_sent
!= 1
517 || ksess
->consumer
== NULL
) {
523 cds_lfht_for_each_entry(ksess
->consumer
->socks
->ht
,
524 &iter
.iter
, socket
, node
.node
) {
525 pthread_mutex_lock(socket
->lock
);
526 ret
= kernel_consumer_send_channel_streams(socket
,
528 session
->output_traces
? 1 : 0);
529 pthread_mutex_unlock(socket
->lock
);
537 session_unlock(session
);
538 session_put(session
);
540 session_unlock_list();
544 session_unlock(session
);
545 session_put(session
);
546 session_unlock_list();
551 * For each tracing session, update newly registered apps. The session list
552 * lock MUST be acquired before calling this.
554 static void update_ust_app(int app_sock
)
556 struct ltt_session
*sess
, *stmp
;
557 const struct ltt_session_list
*session_list
= session_get_list();
559 /* Consumer is in an ERROR state. Stop any application update. */
560 if (uatomic_read(&ust_consumerd_state
) == CONSUMER_ERROR
) {
561 /* Stop the update process since the consumer is dead. */
565 /* For all tracing session(s) */
566 cds_list_for_each_entry_safe(sess
, stmp
, &session_list
->head
, list
) {
569 if (!session_get(sess
)) {
573 if (!sess
->ust_session
) {
578 assert(app_sock
>= 0);
579 app
= ust_app_find_by_sock(app_sock
);
582 * Application can be unregistered before so
583 * this is possible hence simply stopping the
586 DBG3("UST app update failed to find app sock %d",
590 ust_app_global_update(sess
->ust_session
, app
);
594 session_unlock(sess
);
600 * This thread manage event coming from the kernel.
602 * Features supported in this thread:
605 static void *thread_manage_kernel(void *data
)
607 int ret
, i
, pollfd
, update_poll_flag
= 1, err
= -1;
608 uint32_t revents
, nb_fd
;
610 struct lttng_poll_event events
;
612 DBG("[thread] Thread manage kernel started");
614 health_register(health_sessiond
, HEALTH_SESSIOND_TYPE_KERNEL
);
617 * This first step of the while is to clean this structure which could free
618 * non NULL pointers so initialize it before the loop.
620 lttng_poll_init(&events
);
622 if (testpoint(sessiond_thread_manage_kernel
)) {
623 goto error_testpoint
;
626 health_code_update();
628 if (testpoint(sessiond_thread_manage_kernel_before_loop
)) {
629 goto error_testpoint
;
633 health_code_update();
635 if (update_poll_flag
== 1) {
636 /* Clean events object. We are about to populate it again. */
637 lttng_poll_clean(&events
);
639 ret
= sessiond_set_thread_pollset(&events
, 2);
641 goto error_poll_create
;
644 ret
= lttng_poll_add(&events
, kernel_poll_pipe
[0], LPOLLIN
);
649 /* This will add the available kernel channel if any. */
650 ret
= update_kernel_poll(&events
);
654 update_poll_flag
= 0;
657 DBG("Thread kernel polling");
659 /* Poll infinite value of time */
662 ret
= lttng_poll_wait(&events
, -1);
663 DBG("Thread kernel return from poll on %d fds",
664 LTTNG_POLL_GETNB(&events
));
668 * Restart interrupted system call.
670 if (errno
== EINTR
) {
674 } else if (ret
== 0) {
675 /* Should not happen since timeout is infinite */
676 ERR("Return value of poll is 0 with an infinite timeout.\n"
677 "This should not have happened! Continuing...");
683 for (i
= 0; i
< nb_fd
; i
++) {
684 /* Fetch once the poll data */
685 revents
= LTTNG_POLL_GETEV(&events
, i
);
686 pollfd
= LTTNG_POLL_GETFD(&events
, i
);
688 health_code_update();
691 /* No activity for this FD (poll implementation). */
695 /* Thread quit pipe has been closed. Killing thread. */
696 ret
= sessiond_check_thread_quit_pipe(pollfd
, revents
);
702 /* Check for data on kernel pipe */
703 if (revents
& LPOLLIN
) {
704 if (pollfd
== kernel_poll_pipe
[0]) {
705 (void) lttng_read(kernel_poll_pipe
[0],
708 * Ret value is useless here, if this pipe gets any actions an
709 * update is required anyway.
711 update_poll_flag
= 1;
715 * New CPU detected by the kernel. Adding kernel stream to
716 * kernel session and updating the kernel consumer
718 ret
= update_kernel_stream(pollfd
);
724 } else if (revents
& (LPOLLERR
| LPOLLHUP
| LPOLLRDHUP
)) {
725 update_poll_flag
= 1;
728 ERR("Unexpected poll events %u for sock %d", revents
, pollfd
);
736 lttng_poll_clean(&events
);
739 utils_close_pipe(kernel_poll_pipe
);
740 kernel_poll_pipe
[0] = kernel_poll_pipe
[1] = -1;
743 ERR("Health error occurred in %s", __func__
);
744 WARN("Kernel thread died unexpectedly. "
745 "Kernel tracing can continue but CPU hotplug is disabled.");
747 health_unregister(health_sessiond
);
748 DBG("Kernel thread dying");
753 * Signal pthread condition of the consumer data that the thread.
755 static void signal_consumer_condition(struct consumer_data
*data
, int state
)
757 pthread_mutex_lock(&data
->cond_mutex
);
760 * The state is set before signaling. It can be any value, it's the waiter
761 * job to correctly interpret this condition variable associated to the
762 * consumer pthread_cond.
764 * A value of 0 means that the corresponding thread of the consumer data
765 * was not started. 1 indicates that the thread has started and is ready
766 * for action. A negative value means that there was an error during the
769 data
->consumer_thread_is_ready
= state
;
770 (void) pthread_cond_signal(&data
->cond
);
772 pthread_mutex_unlock(&data
->cond_mutex
);
776 * This thread manage the consumer error sent back to the session daemon.
778 void *thread_manage_consumer(void *data
)
780 int sock
= -1, i
, ret
, pollfd
, err
= -1, should_quit
= 0;
781 uint32_t revents
, nb_fd
;
782 enum lttcomm_return_code code
;
783 struct lttng_poll_event events
;
784 struct consumer_data
*consumer_data
= data
;
785 struct consumer_socket
*cmd_socket_wrapper
= NULL
;
787 DBG("[thread] Manage consumer started");
789 rcu_register_thread();
792 health_register(health_sessiond
, HEALTH_SESSIOND_TYPE_CONSUMER
);
794 health_code_update();
797 * Pass 3 as size here for the thread quit pipe, consumerd_err_sock and the
798 * metadata_sock. Nothing more will be added to this poll set.
800 ret
= sessiond_set_thread_pollset(&events
, 3);
806 * The error socket here is already in a listening state which was done
807 * just before spawning this thread to avoid a race between the consumer
808 * daemon exec trying to connect and the listen() call.
810 ret
= lttng_poll_add(&events
, consumer_data
->err_sock
, LPOLLIN
| LPOLLRDHUP
);
815 health_code_update();
817 /* Infinite blocking call, waiting for transmission */
821 if (testpoint(sessiond_thread_manage_consumer
)) {
825 ret
= lttng_poll_wait(&events
, -1);
829 * Restart interrupted system call.
831 if (errno
== EINTR
) {
839 for (i
= 0; i
< nb_fd
; i
++) {
840 /* Fetch once the poll data */
841 revents
= LTTNG_POLL_GETEV(&events
, i
);
842 pollfd
= LTTNG_POLL_GETFD(&events
, i
);
844 health_code_update();
847 /* No activity for this FD (poll implementation). */
851 /* Thread quit pipe has been closed. Killing thread. */
852 ret
= sessiond_check_thread_quit_pipe(pollfd
, revents
);
858 /* Event on the registration socket */
859 if (pollfd
== consumer_data
->err_sock
) {
860 if (revents
& LPOLLIN
) {
862 } else if (revents
& (LPOLLERR
| LPOLLHUP
| LPOLLRDHUP
)) {
863 ERR("consumer err socket poll error");
866 ERR("Unexpected poll events %u for sock %d", revents
, pollfd
);
872 sock
= lttcomm_accept_unix_sock(consumer_data
->err_sock
);
878 * Set the CLOEXEC flag. Return code is useless because either way, the
881 (void) utils_set_fd_cloexec(sock
);
883 health_code_update();
885 DBG2("Receiving code from consumer err_sock");
887 /* Getting status code from kconsumerd */
888 ret
= lttcomm_recv_unix_sock(sock
, &code
,
889 sizeof(enum lttcomm_return_code
));
894 health_code_update();
895 if (code
!= LTTCOMM_CONSUMERD_COMMAND_SOCK_READY
) {
896 ERR("consumer error when waiting for SOCK_READY : %s",
897 lttcomm_get_readable_code(-code
));
901 /* Connect both command and metadata sockets. */
902 consumer_data
->cmd_sock
=
903 lttcomm_connect_unix_sock(
904 consumer_data
->cmd_unix_sock_path
);
905 consumer_data
->metadata_fd
=
906 lttcomm_connect_unix_sock(
907 consumer_data
->cmd_unix_sock_path
);
908 if (consumer_data
->cmd_sock
< 0 || consumer_data
->metadata_fd
< 0) {
909 PERROR("consumer connect cmd socket");
910 /* On error, signal condition and quit. */
911 signal_consumer_condition(consumer_data
, -1);
915 consumer_data
->metadata_sock
.fd_ptr
= &consumer_data
->metadata_fd
;
917 /* Create metadata socket lock. */
918 consumer_data
->metadata_sock
.lock
= zmalloc(sizeof(pthread_mutex_t
));
919 if (consumer_data
->metadata_sock
.lock
== NULL
) {
920 PERROR("zmalloc pthread mutex");
923 pthread_mutex_init(consumer_data
->metadata_sock
.lock
, NULL
);
925 DBG("Consumer command socket ready (fd: %d", consumer_data
->cmd_sock
);
926 DBG("Consumer metadata socket ready (fd: %d)",
927 consumer_data
->metadata_fd
);
930 * Remove the consumerd error sock since we've established a connection.
932 ret
= lttng_poll_del(&events
, consumer_data
->err_sock
);
937 /* Add new accepted error socket. */
938 ret
= lttng_poll_add(&events
, sock
, LPOLLIN
| LPOLLRDHUP
);
943 /* Add metadata socket that is successfully connected. */
944 ret
= lttng_poll_add(&events
, consumer_data
->metadata_fd
,
945 LPOLLIN
| LPOLLRDHUP
);
950 health_code_update();
953 * Transfer the write-end of the channel monitoring and rotate pipe
954 * to the consumer by issuing a SET_CHANNEL_MONITOR_PIPE command.
956 cmd_socket_wrapper
= consumer_allocate_socket(&consumer_data
->cmd_sock
);
957 if (!cmd_socket_wrapper
) {
960 cmd_socket_wrapper
->lock
= &consumer_data
->lock
;
962 ret
= consumer_send_channel_monitor_pipe(cmd_socket_wrapper
,
963 consumer_data
->channel_monitor_pipe
);
968 /* Discard the socket wrapper as it is no longer needed. */
969 consumer_destroy_socket(cmd_socket_wrapper
);
970 cmd_socket_wrapper
= NULL
;
972 /* The thread is completely initialized, signal that it is ready. */
973 signal_consumer_condition(consumer_data
, 1);
975 /* Infinite blocking call, waiting for transmission */
978 health_code_update();
980 /* Exit the thread because the thread quit pipe has been triggered. */
982 /* Not a health error. */
988 ret
= lttng_poll_wait(&events
, -1);
992 * Restart interrupted system call.
994 if (errno
== EINTR
) {
1002 for (i
= 0; i
< nb_fd
; i
++) {
1003 /* Fetch once the poll data */
1004 revents
= LTTNG_POLL_GETEV(&events
, i
);
1005 pollfd
= LTTNG_POLL_GETFD(&events
, i
);
1007 health_code_update();
1010 /* No activity for this FD (poll implementation). */
1015 * Thread quit pipe has been triggered, flag that we should stop
1016 * but continue the current loop to handle potential data from
1019 should_quit
= sessiond_check_thread_quit_pipe(pollfd
, revents
);
1021 if (pollfd
== sock
) {
1022 /* Event on the consumerd socket */
1023 if (revents
& (LPOLLERR
| LPOLLHUP
| LPOLLRDHUP
)
1024 && !(revents
& LPOLLIN
)) {
1025 ERR("consumer err socket second poll error");
1028 health_code_update();
1029 /* Wait for any kconsumerd error */
1030 ret
= lttcomm_recv_unix_sock(sock
, &code
,
1031 sizeof(enum lttcomm_return_code
));
1033 ERR("consumer closed the command socket");
1037 ERR("consumer return code : %s",
1038 lttcomm_get_readable_code(-code
));
1041 } else if (pollfd
== consumer_data
->metadata_fd
) {
1042 if (revents
& (LPOLLERR
| LPOLLHUP
| LPOLLRDHUP
)
1043 && !(revents
& LPOLLIN
)) {
1044 ERR("consumer err metadata socket second poll error");
1047 /* UST metadata requests */
1048 ret
= ust_consumer_metadata_request(
1049 &consumer_data
->metadata_sock
);
1051 ERR("Handling metadata request");
1055 /* No need for an else branch all FDs are tested prior. */
1057 health_code_update();
1063 * We lock here because we are about to close the sockets and some other
1064 * thread might be using them so get exclusive access which will abort all
1065 * other consumer command by other threads.
1067 pthread_mutex_lock(&consumer_data
->lock
);
1069 /* Immediately set the consumerd state to stopped */
1070 if (consumer_data
->type
== LTTNG_CONSUMER_KERNEL
) {
1071 uatomic_set(&kernel_consumerd_state
, CONSUMER_ERROR
);
1072 } else if (consumer_data
->type
== LTTNG_CONSUMER64_UST
||
1073 consumer_data
->type
== LTTNG_CONSUMER32_UST
) {
1074 uatomic_set(&ust_consumerd_state
, CONSUMER_ERROR
);
1076 /* Code flow error... */
1080 if (consumer_data
->err_sock
>= 0) {
1081 ret
= close(consumer_data
->err_sock
);
1085 consumer_data
->err_sock
= -1;
1087 if (consumer_data
->cmd_sock
>= 0) {
1088 ret
= close(consumer_data
->cmd_sock
);
1092 consumer_data
->cmd_sock
= -1;
1094 if (consumer_data
->metadata_sock
.fd_ptr
&&
1095 *consumer_data
->metadata_sock
.fd_ptr
>= 0) {
1096 ret
= close(*consumer_data
->metadata_sock
.fd_ptr
);
1108 unlink(consumer_data
->err_unix_sock_path
);
1109 unlink(consumer_data
->cmd_unix_sock_path
);
1110 pthread_mutex_unlock(&consumer_data
->lock
);
1112 /* Cleanup metadata socket mutex. */
1113 if (consumer_data
->metadata_sock
.lock
) {
1114 pthread_mutex_destroy(consumer_data
->metadata_sock
.lock
);
1115 free(consumer_data
->metadata_sock
.lock
);
1117 lttng_poll_clean(&events
);
1119 if (cmd_socket_wrapper
) {
1120 consumer_destroy_socket(cmd_socket_wrapper
);
1125 ERR("Health error occurred in %s", __func__
);
1127 health_unregister(health_sessiond
);
1128 DBG("consumer thread cleanup completed");
1130 rcu_thread_offline();
1131 rcu_unregister_thread();
1137 * This thread receives application command sockets (FDs) on the
1138 * apps_cmd_pipe and waits (polls) on them until they are closed
1139 * or an error occurs.
1141 * At that point, it flushes the data (tracing and metadata) associated
1142 * with this application and tears down ust app sessions and other
1143 * associated data structures through ust_app_unregister().
1145 * Note that this thread never sends commands to the applications
1146 * through the command sockets; it merely listens for hang-ups
1147 * and errors on those sockets and cleans-up as they occur.
1149 static void *thread_manage_apps(void *data
)
1151 int i
, ret
, pollfd
, err
= -1;
1153 uint32_t revents
, nb_fd
;
1154 struct lttng_poll_event events
;
1156 DBG("[thread] Manage application started");
1158 rcu_register_thread();
1159 rcu_thread_online();
1161 health_register(health_sessiond
, HEALTH_SESSIOND_TYPE_APP_MANAGE
);
1163 if (testpoint(sessiond_thread_manage_apps
)) {
1164 goto error_testpoint
;
1167 health_code_update();
1169 ret
= sessiond_set_thread_pollset(&events
, 2);
1171 goto error_poll_create
;
1174 ret
= lttng_poll_add(&events
, apps_cmd_pipe
[0], LPOLLIN
| LPOLLRDHUP
);
1179 if (testpoint(sessiond_thread_manage_apps_before_loop
)) {
1183 health_code_update();
1186 DBG("Apps thread polling");
1188 /* Inifinite blocking call, waiting for transmission */
1190 health_poll_entry();
1191 ret
= lttng_poll_wait(&events
, -1);
1192 DBG("Apps thread return from poll on %d fds",
1193 LTTNG_POLL_GETNB(&events
));
1197 * Restart interrupted system call.
1199 if (errno
== EINTR
) {
1207 for (i
= 0; i
< nb_fd
; i
++) {
1208 /* Fetch once the poll data */
1209 revents
= LTTNG_POLL_GETEV(&events
, i
);
1210 pollfd
= LTTNG_POLL_GETFD(&events
, i
);
1212 health_code_update();
1215 /* No activity for this FD (poll implementation). */
1219 /* Thread quit pipe has been closed. Killing thread. */
1220 ret
= sessiond_check_thread_quit_pipe(pollfd
, revents
);
1226 /* Inspect the apps cmd pipe */
1227 if (pollfd
== apps_cmd_pipe
[0]) {
1228 if (revents
& LPOLLIN
) {
1232 size_ret
= lttng_read(apps_cmd_pipe
[0], &sock
, sizeof(sock
));
1233 if (size_ret
< sizeof(sock
)) {
1234 PERROR("read apps cmd pipe");
1238 health_code_update();
1241 * Since this is a command socket (write then read),
1242 * we only monitor the error events of the socket.
1244 ret
= lttng_poll_add(&events
, sock
,
1245 LPOLLERR
| LPOLLHUP
| LPOLLRDHUP
);
1250 DBG("Apps with sock %d added to poll set", sock
);
1251 } else if (revents
& (LPOLLERR
| LPOLLHUP
| LPOLLRDHUP
)) {
1252 ERR("Apps command pipe error");
1255 ERR("Unknown poll events %u for sock %d", revents
, pollfd
);
1260 * At this point, we know that a registered application made
1261 * the event at poll_wait.
1263 if (revents
& (LPOLLERR
| LPOLLHUP
| LPOLLRDHUP
)) {
1264 /* Removing from the poll set */
1265 ret
= lttng_poll_del(&events
, pollfd
);
1270 /* Socket closed on remote end. */
1271 ust_app_unregister(pollfd
);
1273 ERR("Unexpected poll events %u for sock %d", revents
, pollfd
);
1278 health_code_update();
1284 lttng_poll_clean(&events
);
1287 utils_close_pipe(apps_cmd_pipe
);
1288 apps_cmd_pipe
[0] = apps_cmd_pipe
[1] = -1;
1291 * We don't clean the UST app hash table here since already registered
1292 * applications can still be controlled so let them be until the session
1293 * daemon dies or the applications stop.
1298 ERR("Health error occurred in %s", __func__
);
1300 health_unregister(health_sessiond
);
1301 DBG("Application communication apps thread cleanup complete");
1302 rcu_thread_offline();
1303 rcu_unregister_thread();
1308 * Send a socket to a thread This is called from the dispatch UST registration
1309 * thread once all sockets are set for the application.
1311 * The sock value can be invalid, we don't really care, the thread will handle
1312 * it and make the necessary cleanup if so.
1314 * On success, return 0 else a negative value being the errno message of the
1317 static int send_socket_to_thread(int fd
, int sock
)
1322 * It's possible that the FD is set as invalid with -1 concurrently just
1323 * before calling this function being a shutdown state of the thread.
1330 ret
= lttng_write(fd
, &sock
, sizeof(sock
));
1331 if (ret
< sizeof(sock
)) {
1332 PERROR("write apps pipe %d", fd
);
1339 /* All good. Don't send back the write positive ret value. */
1346 * Sanitize the wait queue of the dispatch registration thread meaning removing
1347 * invalid nodes from it. This is to avoid memory leaks for the case the UST
1348 * notify socket is never received.
1350 static void sanitize_wait_queue(struct ust_reg_wait_queue
*wait_queue
)
1352 int ret
, nb_fd
= 0, i
;
1353 unsigned int fd_added
= 0;
1354 struct lttng_poll_event events
;
1355 struct ust_reg_wait_node
*wait_node
= NULL
, *tmp_wait_node
;
1359 lttng_poll_init(&events
);
1361 /* Just skip everything for an empty queue. */
1362 if (!wait_queue
->count
) {
1366 ret
= lttng_poll_create(&events
, wait_queue
->count
, LTTNG_CLOEXEC
);
1371 cds_list_for_each_entry_safe(wait_node
, tmp_wait_node
,
1372 &wait_queue
->head
, head
) {
1373 assert(wait_node
->app
);
1374 ret
= lttng_poll_add(&events
, wait_node
->app
->sock
,
1375 LPOLLHUP
| LPOLLERR
);
1388 * Poll but don't block so we can quickly identify the faulty events and
1389 * clean them afterwards from the wait queue.
1391 ret
= lttng_poll_wait(&events
, 0);
1397 for (i
= 0; i
< nb_fd
; i
++) {
1398 /* Get faulty FD. */
1399 uint32_t revents
= LTTNG_POLL_GETEV(&events
, i
);
1400 int pollfd
= LTTNG_POLL_GETFD(&events
, i
);
1403 /* No activity for this FD (poll implementation). */
1407 cds_list_for_each_entry_safe(wait_node
, tmp_wait_node
,
1408 &wait_queue
->head
, head
) {
1409 if (pollfd
== wait_node
->app
->sock
&&
1410 (revents
& (LPOLLHUP
| LPOLLERR
))) {
1411 cds_list_del(&wait_node
->head
);
1412 wait_queue
->count
--;
1413 ust_app_destroy(wait_node
->app
);
1416 * Silence warning of use-after-free in
1417 * cds_list_for_each_entry_safe which uses
1418 * __typeof__(*wait_node).
1423 ERR("Unexpected poll events %u for sock %d", revents
, pollfd
);
1430 DBG("Wait queue sanitized, %d node were cleaned up", nb_fd
);
1434 lttng_poll_clean(&events
);
1438 lttng_poll_clean(&events
);
1440 ERR("Unable to sanitize wait queue");
1445 * Dispatch request from the registration threads to the application
1446 * communication thread.
1448 static void *thread_dispatch_ust_registration(void *data
)
1451 struct cds_wfcq_node
*node
;
1452 struct ust_command
*ust_cmd
= NULL
;
1453 struct ust_reg_wait_node
*wait_node
= NULL
, *tmp_wait_node
;
1454 struct ust_reg_wait_queue wait_queue
= {
1458 rcu_register_thread();
1460 health_register(health_sessiond
, HEALTH_SESSIOND_TYPE_APP_REG_DISPATCH
);
1462 if (testpoint(sessiond_thread_app_reg_dispatch
)) {
1463 goto error_testpoint
;
1466 health_code_update();
1468 CDS_INIT_LIST_HEAD(&wait_queue
.head
);
1470 DBG("[thread] Dispatch UST command started");
1473 health_code_update();
1475 /* Atomically prepare the queue futex */
1476 futex_nto1_prepare(&ust_cmd_queue
.futex
);
1478 if (CMM_LOAD_SHARED(dispatch_thread_exit
)) {
1483 struct ust_app
*app
= NULL
;
1487 * Make sure we don't have node(s) that have hung up before receiving
1488 * the notify socket. This is to clean the list in order to avoid
1489 * memory leaks from notify socket that are never seen.
1491 sanitize_wait_queue(&wait_queue
);
1493 health_code_update();
1494 /* Dequeue command for registration */
1495 node
= cds_wfcq_dequeue_blocking(&ust_cmd_queue
.head
, &ust_cmd_queue
.tail
);
1497 DBG("Woken up but nothing in the UST command queue");
1498 /* Continue thread execution */
1502 ust_cmd
= caa_container_of(node
, struct ust_command
, node
);
1504 DBG("Dispatching UST registration pid:%d ppid:%d uid:%d"
1505 " gid:%d sock:%d name:%s (version %d.%d)",
1506 ust_cmd
->reg_msg
.pid
, ust_cmd
->reg_msg
.ppid
,
1507 ust_cmd
->reg_msg
.uid
, ust_cmd
->reg_msg
.gid
,
1508 ust_cmd
->sock
, ust_cmd
->reg_msg
.name
,
1509 ust_cmd
->reg_msg
.major
, ust_cmd
->reg_msg
.minor
);
1511 if (ust_cmd
->reg_msg
.type
== USTCTL_SOCKET_CMD
) {
1512 wait_node
= zmalloc(sizeof(*wait_node
));
1514 PERROR("zmalloc wait_node dispatch");
1515 ret
= close(ust_cmd
->sock
);
1517 PERROR("close ust sock dispatch %d", ust_cmd
->sock
);
1519 lttng_fd_put(LTTNG_FD_APPS
, 1);
1523 CDS_INIT_LIST_HEAD(&wait_node
->head
);
1525 /* Create application object if socket is CMD. */
1526 wait_node
->app
= ust_app_create(&ust_cmd
->reg_msg
,
1528 if (!wait_node
->app
) {
1529 ret
= close(ust_cmd
->sock
);
1531 PERROR("close ust sock dispatch %d", ust_cmd
->sock
);
1533 lttng_fd_put(LTTNG_FD_APPS
, 1);
1539 * Add application to the wait queue so we can set the notify
1540 * socket before putting this object in the global ht.
1542 cds_list_add(&wait_node
->head
, &wait_queue
.head
);
1547 * We have to continue here since we don't have the notify
1548 * socket and the application MUST be added to the hash table
1549 * only at that moment.
1554 * Look for the application in the local wait queue and set the
1555 * notify socket if found.
1557 cds_list_for_each_entry_safe(wait_node
, tmp_wait_node
,
1558 &wait_queue
.head
, head
) {
1559 health_code_update();
1560 if (wait_node
->app
->pid
== ust_cmd
->reg_msg
.pid
) {
1561 wait_node
->app
->notify_sock
= ust_cmd
->sock
;
1562 cds_list_del(&wait_node
->head
);
1564 app
= wait_node
->app
;
1566 DBG3("UST app notify socket %d is set", ust_cmd
->sock
);
1572 * With no application at this stage the received socket is
1573 * basically useless so close it before we free the cmd data
1574 * structure for good.
1577 ret
= close(ust_cmd
->sock
);
1579 PERROR("close ust sock dispatch %d", ust_cmd
->sock
);
1581 lttng_fd_put(LTTNG_FD_APPS
, 1);
1588 * @session_lock_list
1590 * Lock the global session list so from the register up to the
1591 * registration done message, no thread can see the application
1592 * and change its state.
1594 session_lock_list();
1598 * Add application to the global hash table. This needs to be
1599 * done before the update to the UST registry can locate the
1604 /* Set app version. This call will print an error if needed. */
1605 (void) ust_app_version(app
);
1607 /* Send notify socket through the notify pipe. */
1608 ret
= send_socket_to_thread(apps_cmd_notify_pipe
[1],
1612 session_unlock_list();
1614 * No notify thread, stop the UST tracing. However, this is
1615 * not an internal error of the this thread thus setting
1616 * the health error code to a normal exit.
1623 * Update newly registered application with the tracing
1624 * registry info already enabled information.
1626 update_ust_app(app
->sock
);
1629 * Don't care about return value. Let the manage apps threads
1630 * handle app unregistration upon socket close.
1632 (void) ust_app_register_done(app
);
1635 * Even if the application socket has been closed, send the app
1636 * to the thread and unregistration will take place at that
1639 ret
= send_socket_to_thread(apps_cmd_pipe
[1], app
->sock
);
1642 session_unlock_list();
1644 * No apps. thread, stop the UST tracing. However, this is
1645 * not an internal error of the this thread thus setting
1646 * the health error code to a normal exit.
1653 session_unlock_list();
1655 } while (node
!= NULL
);
1657 health_poll_entry();
1658 /* Futex wait on queue. Blocking call on futex() */
1659 futex_nto1_wait(&ust_cmd_queue
.futex
);
1662 /* Normal exit, no error */
1666 /* Clean up wait queue. */
1667 cds_list_for_each_entry_safe(wait_node
, tmp_wait_node
,
1668 &wait_queue
.head
, head
) {
1669 cds_list_del(&wait_node
->head
);
1674 /* Empty command queue. */
1676 /* Dequeue command for registration */
1677 node
= cds_wfcq_dequeue_blocking(&ust_cmd_queue
.head
, &ust_cmd_queue
.tail
);
1681 ust_cmd
= caa_container_of(node
, struct ust_command
, node
);
1682 ret
= close(ust_cmd
->sock
);
1684 PERROR("close ust sock exit dispatch %d", ust_cmd
->sock
);
1686 lttng_fd_put(LTTNG_FD_APPS
, 1);
1691 DBG("Dispatch thread dying");
1694 ERR("Health error occurred in %s", __func__
);
1696 health_unregister(health_sessiond
);
1697 rcu_unregister_thread();
1702 * This thread manage application registration.
1704 static void *thread_registration_apps(void *data
)
1706 int sock
= -1, i
, ret
, pollfd
, err
= -1;
1707 uint32_t revents
, nb_fd
;
1708 struct lttng_poll_event events
;
1710 * Get allocated in this thread, enqueued to a global queue, dequeued and
1711 * freed in the manage apps thread.
1713 struct ust_command
*ust_cmd
= NULL
;
1715 DBG("[thread] Manage application registration started");
1717 health_register(health_sessiond
, HEALTH_SESSIOND_TYPE_APP_REG
);
1719 if (testpoint(sessiond_thread_registration_apps
)) {
1720 goto error_testpoint
;
1723 ret
= lttcomm_listen_unix_sock(apps_sock
);
1729 * Pass 2 as size here for the thread quit pipe and apps socket. Nothing
1730 * more will be added to this poll set.
1732 ret
= sessiond_set_thread_pollset(&events
, 2);
1734 goto error_create_poll
;
1737 /* Add the application registration socket */
1738 ret
= lttng_poll_add(&events
, apps_sock
, LPOLLIN
| LPOLLRDHUP
);
1740 goto error_poll_add
;
1743 /* Notify all applications to register */
1744 ret
= notify_ust_apps(1);
1746 ERR("Failed to notify applications or create the wait shared memory.\n"
1747 "Execution continues but there might be problem for already\n"
1748 "running applications that wishes to register.");
1752 DBG("Accepting application registration");
1754 /* Inifinite blocking call, waiting for transmission */
1756 health_poll_entry();
1757 ret
= lttng_poll_wait(&events
, -1);
1761 * Restart interrupted system call.
1763 if (errno
== EINTR
) {
1771 for (i
= 0; i
< nb_fd
; i
++) {
1772 health_code_update();
1774 /* Fetch once the poll data */
1775 revents
= LTTNG_POLL_GETEV(&events
, i
);
1776 pollfd
= LTTNG_POLL_GETFD(&events
, i
);
1779 /* No activity for this FD (poll implementation). */
1783 /* Thread quit pipe has been closed. Killing thread. */
1784 ret
= sessiond_check_thread_quit_pipe(pollfd
, revents
);
1790 /* Event on the registration socket */
1791 if (pollfd
== apps_sock
) {
1792 if (revents
& LPOLLIN
) {
1793 sock
= lttcomm_accept_unix_sock(apps_sock
);
1799 * Set socket timeout for both receiving and ending.
1800 * app_socket_timeout is in seconds, whereas
1801 * lttcomm_setsockopt_rcv_timeout and
1802 * lttcomm_setsockopt_snd_timeout expect msec as
1805 if (config
.app_socket_timeout
>= 0) {
1806 (void) lttcomm_setsockopt_rcv_timeout(sock
,
1807 config
.app_socket_timeout
* 1000);
1808 (void) lttcomm_setsockopt_snd_timeout(sock
,
1809 config
.app_socket_timeout
* 1000);
1813 * Set the CLOEXEC flag. Return code is useless because
1814 * either way, the show must go on.
1816 (void) utils_set_fd_cloexec(sock
);
1818 /* Create UST registration command for enqueuing */
1819 ust_cmd
= zmalloc(sizeof(struct ust_command
));
1820 if (ust_cmd
== NULL
) {
1821 PERROR("ust command zmalloc");
1830 * Using message-based transmissions to ensure we don't
1831 * have to deal with partially received messages.
1833 ret
= lttng_fd_get(LTTNG_FD_APPS
, 1);
1835 ERR("Exhausted file descriptors allowed for applications.");
1845 health_code_update();
1846 ret
= ust_app_recv_registration(sock
, &ust_cmd
->reg_msg
);
1849 /* Close socket of the application. */
1854 lttng_fd_put(LTTNG_FD_APPS
, 1);
1858 health_code_update();
1860 ust_cmd
->sock
= sock
;
1863 DBG("UST registration received with pid:%d ppid:%d uid:%d"
1864 " gid:%d sock:%d name:%s (version %d.%d)",
1865 ust_cmd
->reg_msg
.pid
, ust_cmd
->reg_msg
.ppid
,
1866 ust_cmd
->reg_msg
.uid
, ust_cmd
->reg_msg
.gid
,
1867 ust_cmd
->sock
, ust_cmd
->reg_msg
.name
,
1868 ust_cmd
->reg_msg
.major
, ust_cmd
->reg_msg
.minor
);
1871 * Lock free enqueue the registration request. The red pill
1872 * has been taken! This apps will be part of the *system*.
1874 cds_wfcq_enqueue(&ust_cmd_queue
.head
, &ust_cmd_queue
.tail
, &ust_cmd
->node
);
1877 * Wake the registration queue futex. Implicit memory
1878 * barrier with the exchange in cds_wfcq_enqueue.
1880 futex_nto1_wake(&ust_cmd_queue
.futex
);
1881 } else if (revents
& (LPOLLERR
| LPOLLHUP
| LPOLLRDHUP
)) {
1882 ERR("Register apps socket poll error");
1885 ERR("Unexpected poll events %u for sock %d", revents
, pollfd
);
1894 /* Notify that the registration thread is gone */
1897 if (apps_sock
>= 0) {
1898 ret
= close(apps_sock
);
1908 lttng_fd_put(LTTNG_FD_APPS
, 1);
1910 unlink(config
.apps_unix_sock_path
.value
);
1913 lttng_poll_clean(&events
);
1917 DBG("UST Registration thread cleanup complete");
1920 ERR("Health error occurred in %s", __func__
);
1922 health_unregister(health_sessiond
);
1928 * Setup necessary data for kernel tracer action.
1930 static int init_kernel_tracer(void)
1934 /* Modprobe lttng kernel modules */
1935 ret
= modprobe_lttng_control();
1940 /* Open debugfs lttng */
1941 kernel_tracer_fd
= open(module_proc_lttng
, O_RDWR
);
1942 if (kernel_tracer_fd
< 0) {
1943 DBG("Failed to open %s", module_proc_lttng
);
1947 /* Validate kernel version */
1948 ret
= kernel_validate_version(kernel_tracer_fd
, &kernel_tracer_version
,
1949 &kernel_tracer_abi_version
);
1954 ret
= modprobe_lttng_data();
1959 ret
= kernel_supports_ring_buffer_snapshot_sample_positions(
1966 WARN("Kernel tracer does not support buffer monitoring. "
1967 "The monitoring timer of channels in the kernel domain "
1968 "will be set to 0 (disabled).");
1971 DBG("Kernel tracer fd %d", kernel_tracer_fd
);
1975 modprobe_remove_lttng_control();
1976 ret
= close(kernel_tracer_fd
);
1980 kernel_tracer_fd
= -1;
1981 return LTTNG_ERR_KERN_VERSION
;
1984 ret
= close(kernel_tracer_fd
);
1990 modprobe_remove_lttng_control();
1993 WARN("No kernel tracer available");
1994 kernel_tracer_fd
= -1;
1996 return LTTNG_ERR_NEED_ROOT_SESSIOND
;
1998 return LTTNG_ERR_KERN_NA
;
2002 static int string_match(const char *str1
, const char *str2
)
2004 return (str1
&& str2
) && !strcmp(str1
, str2
);
2008 * Take an option from the getopt output and set it in the right variable to be
2011 * Return 0 on success else a negative value.
2013 static int set_option(int opt
, const char *arg
, const char *optname
)
2017 if (string_match(optname
, "client-sock") || opt
== 'c') {
2018 if (!arg
|| *arg
== '\0') {
2022 if (lttng_is_setuid_setgid()) {
2023 WARN("Getting '%s' argument from setuid/setgid binary refused for security reasons.",
2024 "-c, --client-sock");
2026 config_string_set(&config
.client_unix_sock_path
,
2028 if (!config
.client_unix_sock_path
.value
) {
2033 } else if (string_match(optname
, "apps-sock") || opt
== 'a') {
2034 if (!arg
|| *arg
== '\0') {
2038 if (lttng_is_setuid_setgid()) {
2039 WARN("Getting '%s' argument from setuid/setgid binary refused for security reasons.",
2042 config_string_set(&config
.apps_unix_sock_path
,
2044 if (!config
.apps_unix_sock_path
.value
) {
2049 } else if (string_match(optname
, "daemonize") || opt
== 'd') {
2050 config
.daemonize
= true;
2051 } else if (string_match(optname
, "background") || opt
== 'b') {
2052 config
.background
= true;
2053 } else if (string_match(optname
, "group") || opt
== 'g') {
2054 if (!arg
|| *arg
== '\0') {
2058 if (lttng_is_setuid_setgid()) {
2059 WARN("Getting '%s' argument from setuid/setgid binary refused for security reasons.",
2062 config_string_set(&config
.tracing_group_name
,
2064 if (!config
.tracing_group_name
.value
) {
2069 } else if (string_match(optname
, "help") || opt
== 'h') {
2070 ret
= utils_show_help(8, "lttng-sessiond", help_msg
);
2072 ERR("Cannot show --help for `lttng-sessiond`");
2075 exit(ret
? EXIT_FAILURE
: EXIT_SUCCESS
);
2076 } else if (string_match(optname
, "version") || opt
== 'V') {
2077 fprintf(stdout
, "%s\n", VERSION
);
2079 } else if (string_match(optname
, "sig-parent") || opt
== 'S') {
2080 config
.sig_parent
= true;
2081 } else if (string_match(optname
, "kconsumerd-err-sock")) {
2082 if (!arg
|| *arg
== '\0') {
2086 if (lttng_is_setuid_setgid()) {
2087 WARN("Getting '%s' argument from setuid/setgid binary refused for security reasons.",
2088 "--kconsumerd-err-sock");
2090 config_string_set(&config
.kconsumerd_err_unix_sock_path
,
2092 if (!config
.kconsumerd_err_unix_sock_path
.value
) {
2097 } else if (string_match(optname
, "kconsumerd-cmd-sock")) {
2098 if (!arg
|| *arg
== '\0') {
2102 if (lttng_is_setuid_setgid()) {
2103 WARN("Getting '%s' argument from setuid/setgid binary refused for security reasons.",
2104 "--kconsumerd-cmd-sock");
2106 config_string_set(&config
.kconsumerd_cmd_unix_sock_path
,
2108 if (!config
.kconsumerd_cmd_unix_sock_path
.value
) {
2113 } else if (string_match(optname
, "ustconsumerd64-err-sock")) {
2114 if (!arg
|| *arg
== '\0') {
2118 if (lttng_is_setuid_setgid()) {
2119 WARN("Getting '%s' argument from setuid/setgid binary refused for security reasons.",
2120 "--ustconsumerd64-err-sock");
2122 config_string_set(&config
.consumerd64_err_unix_sock_path
,
2124 if (!config
.consumerd64_err_unix_sock_path
.value
) {
2129 } else if (string_match(optname
, "ustconsumerd64-cmd-sock")) {
2130 if (!arg
|| *arg
== '\0') {
2134 if (lttng_is_setuid_setgid()) {
2135 WARN("Getting '%s' argument from setuid/setgid binary refused for security reasons.",
2136 "--ustconsumerd64-cmd-sock");
2138 config_string_set(&config
.consumerd64_cmd_unix_sock_path
,
2140 if (!config
.consumerd64_cmd_unix_sock_path
.value
) {
2145 } else if (string_match(optname
, "ustconsumerd32-err-sock")) {
2146 if (!arg
|| *arg
== '\0') {
2150 if (lttng_is_setuid_setgid()) {
2151 WARN("Getting '%s' argument from setuid/setgid binary refused for security reasons.",
2152 "--ustconsumerd32-err-sock");
2154 config_string_set(&config
.consumerd32_err_unix_sock_path
,
2156 if (!config
.consumerd32_err_unix_sock_path
.value
) {
2161 } else if (string_match(optname
, "ustconsumerd32-cmd-sock")) {
2162 if (!arg
|| *arg
== '\0') {
2166 if (lttng_is_setuid_setgid()) {
2167 WARN("Getting '%s' argument from setuid/setgid binary refused for security reasons.",
2168 "--ustconsumerd32-cmd-sock");
2170 config_string_set(&config
.consumerd32_cmd_unix_sock_path
,
2172 if (!config
.consumerd32_cmd_unix_sock_path
.value
) {
2177 } else if (string_match(optname
, "no-kernel")) {
2178 config
.no_kernel
= true;
2179 } else if (string_match(optname
, "quiet") || opt
== 'q') {
2180 config
.quiet
= true;
2181 } else if (string_match(optname
, "verbose") || opt
== 'v') {
2182 /* Verbose level can increase using multiple -v */
2184 /* Value obtained from config file */
2185 config
.verbose
= config_parse_value(arg
);
2187 /* -v used on command line */
2190 /* Clamp value to [0, 3] */
2191 config
.verbose
= config
.verbose
< 0 ? 0 :
2192 (config
.verbose
<= 3 ? config
.verbose
: 3);
2193 } else if (string_match(optname
, "verbose-consumer")) {
2195 config
.verbose_consumer
= config_parse_value(arg
);
2197 config
.verbose_consumer
++;
2199 } else if (string_match(optname
, "consumerd32-path")) {
2200 if (!arg
|| *arg
== '\0') {
2204 if (lttng_is_setuid_setgid()) {
2205 WARN("Getting '%s' argument from setuid/setgid binary refused for security reasons.",
2206 "--consumerd32-path");
2208 config_string_set(&config
.consumerd32_bin_path
,
2210 if (!config
.consumerd32_bin_path
.value
) {
2215 } else if (string_match(optname
, "consumerd32-libdir")) {
2216 if (!arg
|| *arg
== '\0') {
2220 if (lttng_is_setuid_setgid()) {
2221 WARN("Getting '%s' argument from setuid/setgid binary refused for security reasons.",
2222 "--consumerd32-libdir");
2224 config_string_set(&config
.consumerd32_lib_dir
,
2226 if (!config
.consumerd32_lib_dir
.value
) {
2231 } else if (string_match(optname
, "consumerd64-path")) {
2232 if (!arg
|| *arg
== '\0') {
2236 if (lttng_is_setuid_setgid()) {
2237 WARN("Getting '%s' argument from setuid/setgid binary refused for security reasons.",
2238 "--consumerd64-path");
2240 config_string_set(&config
.consumerd64_bin_path
,
2242 if (!config
.consumerd64_bin_path
.value
) {
2247 } else if (string_match(optname
, "consumerd64-libdir")) {
2248 if (!arg
|| *arg
== '\0') {
2252 if (lttng_is_setuid_setgid()) {
2253 WARN("Getting '%s' argument from setuid/setgid binary refused for security reasons.",
2254 "--consumerd64-libdir");
2256 config_string_set(&config
.consumerd64_lib_dir
,
2258 if (!config
.consumerd64_lib_dir
.value
) {
2263 } else if (string_match(optname
, "pidfile") || opt
== 'p') {
2264 if (!arg
|| *arg
== '\0') {
2268 if (lttng_is_setuid_setgid()) {
2269 WARN("Getting '%s' argument from setuid/setgid binary refused for security reasons.",
2272 config_string_set(&config
.pid_file_path
, strdup(arg
));
2273 if (!config
.pid_file_path
.value
) {
2278 } else if (string_match(optname
, "agent-tcp-port")) {
2279 if (!arg
|| *arg
== '\0') {
2283 if (lttng_is_setuid_setgid()) {
2284 WARN("Getting '%s' argument from setuid/setgid binary refused for security reasons.",
2285 "--agent-tcp-port");
2290 v
= strtoul(arg
, NULL
, 0);
2291 if (errno
!= 0 || !isdigit(arg
[0])) {
2292 ERR("Wrong value in --agent-tcp-port parameter: %s", arg
);
2295 if (v
== 0 || v
>= 65535) {
2296 ERR("Port overflow in --agent-tcp-port parameter: %s", arg
);
2299 config
.agent_tcp_port
.begin
= config
.agent_tcp_port
.end
= (int) v
;
2300 DBG3("Agent TCP port set to non default: %i", (int) v
);
2302 } else if (string_match(optname
, "load") || opt
== 'l') {
2303 if (!arg
|| *arg
== '\0') {
2307 if (lttng_is_setuid_setgid()) {
2308 WARN("Getting '%s' argument from setuid/setgid binary refused for security reasons.",
2311 config_string_set(&config
.load_session_path
, strdup(arg
));
2312 if (!config
.load_session_path
.value
) {
2317 } else if (string_match(optname
, "kmod-probes")) {
2318 if (!arg
|| *arg
== '\0') {
2322 if (lttng_is_setuid_setgid()) {
2323 WARN("Getting '%s' argument from setuid/setgid binary refused for security reasons.",
2326 config_string_set(&config
.kmod_probes_list
, strdup(arg
));
2327 if (!config
.kmod_probes_list
.value
) {
2332 } else if (string_match(optname
, "extra-kmod-probes")) {
2333 if (!arg
|| *arg
== '\0') {
2337 if (lttng_is_setuid_setgid()) {
2338 WARN("Getting '%s' argument from setuid/setgid binary refused for security reasons.",
2339 "--extra-kmod-probes");
2341 config_string_set(&config
.kmod_extra_probes_list
,
2343 if (!config
.kmod_extra_probes_list
.value
) {
2348 } else if (string_match(optname
, "config") || opt
== 'f') {
2349 /* This is handled in set_options() thus silent skip. */
2352 /* Unknown option or other error.
2353 * Error is printed by getopt, just return */
2358 if (ret
== -EINVAL
) {
2359 const char *opt_name
= "unknown";
2362 for (i
= 0; i
< sizeof(long_options
) / sizeof(struct option
);
2364 if (opt
== long_options
[i
].val
) {
2365 opt_name
= long_options
[i
].name
;
2370 WARN("Invalid argument provided for option \"%s\", using default value.",
2378 * config_entry_handler_cb used to handle options read from a config file.
2379 * See config_entry_handler_cb comment in common/config/session-config.h for the
2380 * return value conventions.
2382 static int config_entry_handler(const struct config_entry
*entry
, void *unused
)
2386 if (!entry
|| !entry
->name
|| !entry
->value
) {
2391 /* Check if the option is to be ignored */
2392 for (i
= 0; i
< sizeof(config_ignore_options
) / sizeof(char *); i
++) {
2393 if (!strcmp(entry
->name
, config_ignore_options
[i
])) {
2398 for (i
= 0; i
< (sizeof(long_options
) / sizeof(struct option
)) - 1;
2401 /* Ignore if not fully matched. */
2402 if (strcmp(entry
->name
, long_options
[i
].name
)) {
2407 * If the option takes no argument on the command line, we have to
2408 * check if the value is "true". We support non-zero numeric values,
2411 if (!long_options
[i
].has_arg
) {
2412 ret
= config_parse_value(entry
->value
);
2415 WARN("Invalid configuration value \"%s\" for option %s",
2416 entry
->value
, entry
->name
);
2418 /* False, skip boolean config option. */
2423 ret
= set_option(long_options
[i
].val
, entry
->value
, entry
->name
);
2427 WARN("Unrecognized option \"%s\" in daemon configuration file.", entry
->name
);
2434 * daemon configuration loading and argument parsing
2436 static int set_options(int argc
, char **argv
)
2438 int ret
= 0, c
= 0, option_index
= 0;
2439 int orig_optopt
= optopt
, orig_optind
= optind
;
2441 const char *config_path
= NULL
;
2443 optstring
= utils_generate_optstring(long_options
,
2444 sizeof(long_options
) / sizeof(struct option
));
2450 /* Check for the --config option */
2451 while ((c
= getopt_long(argc
, argv
, optstring
, long_options
,
2452 &option_index
)) != -1) {
2456 } else if (c
!= 'f') {
2457 /* if not equal to --config option. */
2461 if (lttng_is_setuid_setgid()) {
2462 WARN("Getting '%s' argument from setuid/setgid binary refused for security reasons.",
2465 config_path
= utils_expand_path(optarg
);
2467 ERR("Failed to resolve path: %s", optarg
);
2472 ret
= config_get_section_entries(config_path
, config_section_name
,
2473 config_entry_handler
, NULL
);
2476 ERR("Invalid configuration option at line %i", ret
);
2482 /* Reset getopt's global state */
2483 optopt
= orig_optopt
;
2484 optind
= orig_optind
;
2488 * getopt_long() will not set option_index if it encounters a
2491 c
= getopt_long(argc
, argv
, optstring
, long_options
,
2498 * Pass NULL as the long option name if popt left the index
2501 ret
= set_option(c
, optarg
,
2502 option_index
< 0 ? NULL
:
2503 long_options
[option_index
].name
);
2515 * Creates the application socket.
2517 static int init_daemon_socket(void)
2522 old_umask
= umask(0);
2524 /* Create the application unix socket */
2525 apps_sock
= lttcomm_create_unix_sock(config
.apps_unix_sock_path
.value
);
2526 if (apps_sock
< 0) {
2527 ERR("Create unix sock failed: %s", config
.apps_unix_sock_path
.value
);
2532 /* Set the cloexec flag */
2533 ret
= utils_set_fd_cloexec(apps_sock
);
2535 ERR("Unable to set CLOEXEC flag to the app Unix socket (fd: %d). "
2536 "Continuing but note that the consumer daemon will have a "
2537 "reference to this socket on exec()", apps_sock
);
2540 /* File permission MUST be 666 */
2541 ret
= chmod(config
.apps_unix_sock_path
.value
,
2542 S_IRUSR
| S_IWUSR
| S_IRGRP
| S_IWGRP
| S_IROTH
| S_IWOTH
);
2544 ERR("Set file permissions failed: %s", config
.apps_unix_sock_path
.value
);
2549 DBG3("Session daemon application socket %d created",
2558 * Create lockfile using the rundir and return its fd.
2560 static int create_lockfile(void)
2562 return utils_create_lock_file(config
.lock_file_path
.value
);
2566 * Check if the global socket is available, and if a daemon is answering at the
2567 * other side. If yes, error is returned.
2569 * Also attempts to create and hold the lock file.
2571 static int check_existing_daemon(void)
2575 /* Is there anybody out there ? */
2576 if (lttng_session_daemon_alive()) {
2581 lockfile_fd
= create_lockfile();
2582 if (lockfile_fd
< 0) {
2590 static void sessiond_cleanup_lock_file(void)
2595 * Cleanup lock file by deleting it and finaly closing it which will
2596 * release the file system lock.
2598 if (lockfile_fd
>= 0) {
2599 ret
= remove(config
.lock_file_path
.value
);
2601 PERROR("remove lock file");
2603 ret
= close(lockfile_fd
);
2605 PERROR("close lock file");
2611 * Set the tracing group gid onto the client socket.
2613 * Race window between mkdir and chown is OK because we are going from more
2614 * permissive (root.root) to less permissive (root.tracing).
2616 static int set_permissions(char *rundir
)
2621 gid
= utils_get_group_id(config
.tracing_group_name
.value
);
2623 /* Set lttng run dir */
2624 ret
= chown(rundir
, 0, gid
);
2626 ERR("Unable to set group on %s", rundir
);
2631 * Ensure all applications and tracing group can search the run
2632 * dir. Allow everyone to read the directory, since it does not
2633 * buy us anything to hide its content.
2635 ret
= chmod(rundir
, S_IRWXU
| S_IRGRP
| S_IXGRP
| S_IROTH
| S_IXOTH
);
2637 ERR("Unable to set permissions on %s", rundir
);
2641 /* lttng client socket path */
2642 ret
= chown(config
.client_unix_sock_path
.value
, 0, gid
);
2644 ERR("Unable to set group on %s", config
.client_unix_sock_path
.value
);
2648 /* kconsumer error socket path */
2649 ret
= chown(kconsumer_data
.err_unix_sock_path
, 0, 0);
2651 ERR("Unable to set group on %s", kconsumer_data
.err_unix_sock_path
);
2655 /* 64-bit ustconsumer error socket path */
2656 ret
= chown(ustconsumer64_data
.err_unix_sock_path
, 0, 0);
2658 ERR("Unable to set group on %s", ustconsumer64_data
.err_unix_sock_path
);
2662 /* 32-bit ustconsumer compat32 error socket path */
2663 ret
= chown(ustconsumer32_data
.err_unix_sock_path
, 0, 0);
2665 ERR("Unable to set group on %s", ustconsumer32_data
.err_unix_sock_path
);
2669 DBG("All permissions are set");
2675 * Create the lttng run directory needed for all global sockets and pipe.
2677 static int create_lttng_rundir(void)
2681 DBG3("Creating LTTng run directory: %s", config
.rundir
.value
);
2683 ret
= mkdir(config
.rundir
.value
, S_IRWXU
);
2685 if (errno
!= EEXIST
) {
2686 ERR("Unable to create %s", config
.rundir
.value
);
2698 * Setup sockets and directory needed by the consumerds' communication with the
2701 static int set_consumer_sockets(struct consumer_data
*consumer_data
)
2706 switch (consumer_data
->type
) {
2707 case LTTNG_CONSUMER_KERNEL
:
2708 path
= config
.kconsumerd_path
.value
;
2710 case LTTNG_CONSUMER64_UST
:
2711 path
= config
.consumerd64_path
.value
;
2713 case LTTNG_CONSUMER32_UST
:
2714 path
= config
.consumerd32_path
.value
;
2717 ERR("Consumer type unknown");
2723 DBG2("Creating consumer directory: %s", path
);
2725 ret
= mkdir(path
, S_IRWXU
| S_IRGRP
| S_IXGRP
);
2726 if (ret
< 0 && errno
!= EEXIST
) {
2728 ERR("Failed to create %s", path
);
2732 ret
= chown(path
, 0, utils_get_group_id(config
.tracing_group_name
.value
));
2734 ERR("Unable to set group on %s", path
);
2740 /* Create the consumerd error unix socket */
2741 consumer_data
->err_sock
=
2742 lttcomm_create_unix_sock(consumer_data
->err_unix_sock_path
);
2743 if (consumer_data
->err_sock
< 0) {
2744 ERR("Create unix sock failed: %s", consumer_data
->err_unix_sock_path
);
2750 * Set the CLOEXEC flag. Return code is useless because either way, the
2753 ret
= utils_set_fd_cloexec(consumer_data
->err_sock
);
2755 PERROR("utils_set_fd_cloexec");
2756 /* continue anyway */
2759 /* File permission MUST be 660 */
2760 ret
= chmod(consumer_data
->err_unix_sock_path
,
2761 S_IRUSR
| S_IWUSR
| S_IRGRP
| S_IWGRP
);
2763 ERR("Set file permissions failed: %s", consumer_data
->err_unix_sock_path
);
2773 * Signal handler for the daemon
2775 * Simply stop all worker threads, leaving main() return gracefully after
2776 * joining all threads and calling cleanup().
2778 static void sighandler(int sig
)
2782 DBG("SIGINT caught");
2786 DBG("SIGTERM caught");
2790 CMM_STORE_SHARED(recv_child_signal
, 1);
2798 * Setup signal handler for :
2799 * SIGINT, SIGTERM, SIGPIPE
2801 static int set_signal_handler(void)
2804 struct sigaction sa
;
2807 if ((ret
= sigemptyset(&sigset
)) < 0) {
2808 PERROR("sigemptyset");
2812 sa
.sa_mask
= sigset
;
2815 sa
.sa_handler
= sighandler
;
2816 if ((ret
= sigaction(SIGTERM
, &sa
, NULL
)) < 0) {
2817 PERROR("sigaction");
2821 if ((ret
= sigaction(SIGINT
, &sa
, NULL
)) < 0) {
2822 PERROR("sigaction");
2826 if ((ret
= sigaction(SIGUSR1
, &sa
, NULL
)) < 0) {
2827 PERROR("sigaction");
2831 sa
.sa_handler
= SIG_IGN
;
2832 if ((ret
= sigaction(SIGPIPE
, &sa
, NULL
)) < 0) {
2833 PERROR("sigaction");
2837 DBG("Signal handler set for SIGTERM, SIGUSR1, SIGPIPE and SIGINT");
2843 * Set open files limit to unlimited. This daemon can open a large number of
2844 * file descriptors in order to consume multiple kernel traces.
2846 static void set_ulimit(void)
2851 /* The kernel does not allow an infinite limit for open files */
2852 lim
.rlim_cur
= 65535;
2853 lim
.rlim_max
= 65535;
2855 ret
= setrlimit(RLIMIT_NOFILE
, &lim
);
2857 PERROR("failed to set open files limit");
2861 static int write_pidfile(void)
2863 return utils_create_pid_file(getpid(), config
.pid_file_path
.value
);
2866 static int set_clock_plugin_env(void)
2869 char *env_value
= NULL
;
2871 if (!config
.lttng_ust_clock_plugin
.value
) {
2875 ret
= asprintf(&env_value
, "LTTNG_UST_CLOCK_PLUGIN=%s",
2876 config
.lttng_ust_clock_plugin
.value
);
2882 ret
= putenv(env_value
);
2885 PERROR("putenv of LTTNG_UST_CLOCK_PLUGIN");
2889 DBG("Updated LTTNG_UST_CLOCK_PLUGIN environment variable to \"%s\"",
2890 config
.lttng_ust_clock_plugin
.value
);
2895 static void destroy_all_sessions_and_wait(void)
2897 struct ltt_session
*session
, *tmp
;
2898 struct ltt_session_list
*session_list
;
2900 session_list
= session_get_list();
2901 DBG("Initiating destruction of all sessions");
2903 if (!session_list
) {
2907 session_lock_list();
2908 /* Initiate the destruction of all sessions. */
2909 cds_list_for_each_entry_safe(session
, tmp
,
2910 &session_list
->head
, list
) {
2911 if (!session_get(session
)) {
2915 session_lock(session
);
2916 if (session
->destroyed
) {
2917 goto unlock_session
;
2919 (void) cmd_destroy_session(session
,
2920 notification_thread_handle
);
2922 session_unlock(session
);
2923 session_put(session
);
2925 session_unlock_list();
2927 /* Wait for the destruction of all sessions to complete. */
2928 DBG("Waiting for the destruction of all sessions to complete");
2929 session_list_wait_empty();
2930 DBG("Destruction of all sessions completed");
2936 int main(int argc
, char **argv
)
2938 int ret
= 0, retval
= 0;
2940 const char *env_app_timeout
;
2941 struct lttng_pipe
*ust32_channel_monitor_pipe
= NULL
,
2942 *ust64_channel_monitor_pipe
= NULL
,
2943 *kernel_channel_monitor_pipe
= NULL
;
2944 struct lttng_thread
*ht_cleanup_thread
= NULL
;
2945 struct timer_thread_parameters timer_thread_parameters
;
2946 /* Rotation thread handle. */
2947 struct rotation_thread_handle
*rotation_thread_handle
= NULL
;
2948 /* Queue of rotation jobs populated by the sessiond-timer. */
2949 struct rotation_thread_timer_queue
*rotation_timer_queue
= NULL
;
2950 struct lttng_thread
*client_thread
= NULL
;
2952 init_kernel_workarounds();
2954 rcu_register_thread();
2956 if (set_signal_handler()) {
2958 goto exit_set_signal_handler
;
2961 if (timer_signal_init()) {
2963 goto exit_set_signal_handler
;
2966 page_size
= sysconf(_SC_PAGESIZE
);
2967 if (page_size
< 0) {
2968 PERROR("sysconf _SC_PAGESIZE");
2969 page_size
= LONG_MAX
;
2970 WARN("Fallback page size to %ld", page_size
);
2973 ret
= sessiond_config_init(&config
);
2976 goto exit_set_signal_handler
;
2980 * Init config from environment variables.
2981 * Command line option override env configuration per-doc. Do env first.
2983 sessiond_config_apply_env_config(&config
);
2986 * Parse arguments and load the daemon configuration file.
2988 * We have an exit_options exit path to free memory reserved by
2989 * set_options. This is needed because the rest of sessiond_cleanup()
2990 * depends on ht_cleanup_thread, which depends on lttng_daemonize, which
2991 * depends on set_options.
2994 if (set_options(argc
, argv
)) {
3000 * Resolve all paths received as arguments, configuration option, or
3001 * through environment variable as absolute paths. This is necessary
3002 * since daemonizing causes the sessiond's current working directory
3005 ret
= sessiond_config_resolve_paths(&config
);
3011 lttng_opt_verbose
= config
.verbose
;
3012 lttng_opt_quiet
= config
.quiet
;
3013 kconsumer_data
.err_unix_sock_path
=
3014 config
.kconsumerd_err_unix_sock_path
.value
;
3015 kconsumer_data
.cmd_unix_sock_path
=
3016 config
.kconsumerd_cmd_unix_sock_path
.value
;
3017 ustconsumer32_data
.err_unix_sock_path
=
3018 config
.consumerd32_err_unix_sock_path
.value
;
3019 ustconsumer32_data
.cmd_unix_sock_path
=
3020 config
.consumerd32_cmd_unix_sock_path
.value
;
3021 ustconsumer64_data
.err_unix_sock_path
=
3022 config
.consumerd64_err_unix_sock_path
.value
;
3023 ustconsumer64_data
.cmd_unix_sock_path
=
3024 config
.consumerd64_cmd_unix_sock_path
.value
;
3025 set_clock_plugin_env();
3027 sessiond_config_log(&config
);
3029 if (create_lttng_rundir()) {
3034 /* Abort launch if a session daemon is already running. */
3035 if (check_existing_daemon()) {
3036 ERR("A session daemon is already running.");
3042 if (config
.daemonize
|| config
.background
) {
3045 ret
= lttng_daemonize(&child_ppid
, &recv_child_signal
,
3046 !config
.background
);
3053 * We are in the child. Make sure all other file descriptors are
3054 * closed, in case we are called with more opened file
3055 * descriptors than the standard ones and the lock file.
3057 for (i
= 3; i
< sysconf(_SC_OPEN_MAX
); i
++) {
3058 if (i
== lockfile_fd
) {
3065 if (run_as_create_worker(argv
[0]) < 0) {
3066 goto exit_create_run_as_worker_cleanup
;
3070 * Starting from here, we can create threads. This needs to be after
3071 * lttng_daemonize due to RCU.
3075 * Initialize the health check subsystem. This call should set the
3076 * appropriate time values.
3078 health_sessiond
= health_app_create(NR_HEALTH_SESSIOND_TYPES
);
3079 if (!health_sessiond
) {
3080 PERROR("health_app_create error");
3082 goto exit_health_sessiond_cleanup
;
3085 /* Create thread to clean up RCU hash tables */
3086 ht_cleanup_thread
= launch_ht_cleanup_thread();
3087 if (!ht_cleanup_thread
) {
3089 goto exit_ht_cleanup
;
3092 /* Create thread quit pipe */
3093 if (sessiond_init_thread_quit_pipe()) {
3095 goto exit_init_data
;
3098 /* Check if daemon is UID = 0 */
3099 is_root
= !getuid();
3101 /* Create global run dir with root access */
3103 kernel_channel_monitor_pipe
= lttng_pipe_open(0);
3104 if (!kernel_channel_monitor_pipe
) {
3105 ERR("Failed to create kernel consumer channel monitor pipe");
3107 goto exit_init_data
;
3109 kconsumer_data
.channel_monitor_pipe
=
3110 lttng_pipe_release_writefd(
3111 kernel_channel_monitor_pipe
);
3112 if (kconsumer_data
.channel_monitor_pipe
< 0) {
3114 goto exit_init_data
;
3118 /* Set consumer initial state */
3119 kernel_consumerd_state
= CONSUMER_STOPPED
;
3120 ust_consumerd_state
= CONSUMER_STOPPED
;
3122 ust32_channel_monitor_pipe
= lttng_pipe_open(0);
3123 if (!ust32_channel_monitor_pipe
) {
3124 ERR("Failed to create 32-bit user space consumer channel monitor pipe");
3126 goto exit_init_data
;
3128 ustconsumer32_data
.channel_monitor_pipe
= lttng_pipe_release_writefd(
3129 ust32_channel_monitor_pipe
);
3130 if (ustconsumer32_data
.channel_monitor_pipe
< 0) {
3132 goto exit_init_data
;
3136 * The rotation_thread_timer_queue structure is shared between the
3137 * sessiond timer thread and the rotation thread. The main thread keeps
3138 * its ownership and destroys it when both threads have been joined.
3140 rotation_timer_queue
= rotation_thread_timer_queue_create();
3141 if (!rotation_timer_queue
) {
3143 goto exit_init_data
;
3145 timer_thread_parameters
.rotation_thread_job_queue
=
3146 rotation_timer_queue
;
3148 ust64_channel_monitor_pipe
= lttng_pipe_open(0);
3149 if (!ust64_channel_monitor_pipe
) {
3150 ERR("Failed to create 64-bit user space consumer channel monitor pipe");
3152 goto exit_init_data
;
3154 ustconsumer64_data
.channel_monitor_pipe
= lttng_pipe_release_writefd(
3155 ust64_channel_monitor_pipe
);
3156 if (ustconsumer64_data
.channel_monitor_pipe
< 0) {
3158 goto exit_init_data
;
3162 * Init UST app hash table. Alloc hash table before this point since
3163 * cleanup() can get called after that point.
3165 if (ust_app_ht_alloc()) {
3166 ERR("Failed to allocate UST app hash table");
3168 goto exit_init_data
;
3172 * Initialize agent app hash table. We allocate the hash table here
3173 * since cleanup() can get called after this point.
3175 if (agent_app_ht_alloc()) {
3176 ERR("Failed to allocate Agent app hash table");
3178 goto exit_init_data
;
3182 * These actions must be executed as root. We do that *after* setting up
3183 * the sockets path because we MUST make the check for another daemon using
3184 * those paths *before* trying to set the kernel consumer sockets and init
3188 if (set_consumer_sockets(&kconsumer_data
)) {
3190 goto exit_init_data
;
3193 /* Setup kernel tracer */
3194 if (!config
.no_kernel
) {
3195 init_kernel_tracer();
3196 if (kernel_tracer_fd
>= 0) {
3197 ret
= syscall_init_table();
3199 ERR("Unable to populate syscall table. "
3200 "Syscall tracing won't work "
3201 "for this session daemon.");
3206 /* Set ulimit for open files */
3209 /* init lttng_fd tracking must be done after set_ulimit. */
3212 if (set_consumer_sockets(&ustconsumer64_data
)) {
3214 goto exit_init_data
;
3217 if (set_consumer_sockets(&ustconsumer32_data
)) {
3219 goto exit_init_data
;
3222 /* Setup the needed unix socket */
3223 if (init_daemon_socket()) {
3225 goto exit_init_data
;
3228 /* Set credentials to socket */
3229 if (is_root
&& set_permissions(config
.rundir
.value
)) {
3231 goto exit_init_data
;
3234 /* Get parent pid if -S, --sig-parent is specified. */
3235 if (config
.sig_parent
) {
3239 /* Setup the kernel pipe for waking up the kernel thread */
3240 if (is_root
&& !config
.no_kernel
) {
3241 if (utils_create_pipe_cloexec(kernel_poll_pipe
)) {
3243 goto exit_init_data
;
3247 /* Setup the thread apps communication pipe. */
3248 if (utils_create_pipe_cloexec(apps_cmd_pipe
)) {
3250 goto exit_init_data
;
3253 /* Setup the thread apps notify communication pipe. */
3254 if (utils_create_pipe_cloexec(apps_cmd_notify_pipe
)) {
3256 goto exit_init_data
;
3259 /* Initialize global buffer per UID and PID registry. */
3260 buffer_reg_init_uid_registry();
3261 buffer_reg_init_pid_registry();
3263 /* Init UST command queue. */
3264 cds_wfcq_init(&ust_cmd_queue
.head
, &ust_cmd_queue
.tail
);
3268 /* Check for the application socket timeout env variable. */
3269 env_app_timeout
= getenv(DEFAULT_APP_SOCKET_TIMEOUT_ENV
);
3270 if (env_app_timeout
) {
3271 config
.app_socket_timeout
= atoi(env_app_timeout
);
3273 config
.app_socket_timeout
= DEFAULT_APP_SOCKET_RW_TIMEOUT
;
3276 ret
= write_pidfile();
3278 ERR("Error in write_pidfile");
3280 goto exit_init_data
;
3283 /* Initialize communication library */
3285 /* Initialize TCP timeout values */
3286 lttcomm_inet_init();
3288 if (load_session_init_data(&load_info
) < 0) {
3290 goto exit_init_data
;
3292 load_info
->path
= config
.load_session_path
.value
;
3294 /* Create health-check thread. */
3295 if (!launch_health_management_thread()) {
3300 /* notification_thread_data acquires the pipes' read side. */
3301 notification_thread_handle
= notification_thread_handle_create(
3302 ust32_channel_monitor_pipe
,
3303 ust64_channel_monitor_pipe
,
3304 kernel_channel_monitor_pipe
);
3305 if (!notification_thread_handle
) {
3307 ERR("Failed to create notification thread shared data");
3308 goto exit_notification
;
3311 /* Create notification thread. */
3312 if (!launch_notification_thread(notification_thread_handle
)) {
3314 goto exit_notification
;
3317 /* Create timer thread. */
3318 if (!launch_timer_thread(&timer_thread_parameters
)) {
3320 goto exit_notification
;
3323 /* rotation_thread_data acquires the pipes' read side. */
3324 rotation_thread_handle
= rotation_thread_handle_create(
3325 rotation_timer_queue
,
3326 notification_thread_handle
);
3327 if (!rotation_thread_handle
) {
3329 ERR("Failed to create rotation thread shared data");
3334 /* Create rotation thread. */
3335 if (!launch_rotation_thread(rotation_thread_handle
)) {
3340 /* Create thread to manage the client socket */
3341 client_thread
= launch_client_thread();
3342 if (!client_thread
) {
3347 /* Create thread to dispatch registration */
3348 ret
= pthread_create(&dispatch_thread
, default_pthread_attr(),
3349 thread_dispatch_ust_registration
, (void *) NULL
);
3352 PERROR("pthread_create dispatch");
3358 /* Create thread to manage application registration. */
3359 ret
= pthread_create(®_apps_thread
, default_pthread_attr(),
3360 thread_registration_apps
, (void *) NULL
);
3363 PERROR("pthread_create registration");
3369 /* Create thread to manage application socket */
3370 ret
= pthread_create(&apps_thread
, default_pthread_attr(),
3371 thread_manage_apps
, (void *) NULL
);
3374 PERROR("pthread_create apps");
3380 /* Create thread to manage application notify socket */
3381 ret
= pthread_create(&apps_notify_thread
, default_pthread_attr(),
3382 ust_thread_manage_notify
, (void *) NULL
);
3385 PERROR("pthread_create notify");
3388 goto exit_apps_notify
;
3391 /* Create agent registration thread. */
3392 ret
= pthread_create(&agent_reg_thread
, default_pthread_attr(),
3393 agent_thread_manage_registration
, (void *) NULL
);
3396 PERROR("pthread_create agent");
3399 goto exit_agent_reg
;
3402 /* Don't start this thread if kernel tracing is not requested nor root */
3403 if (is_root
&& !config
.no_kernel
) {
3404 /* Create kernel thread to manage kernel event */
3405 ret
= pthread_create(&kernel_thread
, default_pthread_attr(),
3406 thread_manage_kernel
, (void *) NULL
);
3409 PERROR("pthread_create kernel");
3416 /* Create session loading thread. */
3417 ret
= pthread_create(&load_session_thread
, default_pthread_attr(),
3418 thread_load_session
, load_info
);
3421 PERROR("pthread_create load_session_thread");
3424 goto exit_load_session
;
3428 * This is where we start awaiting program completion (e.g. through
3429 * signal that asks threads to teardown).
3432 ret
= pthread_join(load_session_thread
, &status
);
3435 PERROR("pthread_join load_session_thread");
3439 /* Initiate teardown once activity occurs on the quit pipe. */
3440 sessiond_wait_for_quit_pipe(-1U);
3443 * Ensure that the client thread is no longer accepting new commands,
3444 * which could cause new sessions to be created.
3446 if (!lttng_thread_shutdown(client_thread
)) {
3447 ERR("Failed to shutdown the client thread, continuing teardown");
3448 lttng_thread_put(client_thread
);
3449 client_thread
= NULL
;
3452 destroy_all_sessions_and_wait();
3455 if (is_root
&& !config
.no_kernel
) {
3456 ret
= pthread_join(kernel_thread
, &status
);
3459 PERROR("pthread_join");
3465 ret
= pthread_join(agent_reg_thread
, &status
);
3468 PERROR("pthread_join agent");
3473 ret
= pthread_join(apps_notify_thread
, &status
);
3476 PERROR("pthread_join apps notify");
3481 ret
= pthread_join(apps_thread
, &status
);
3484 PERROR("pthread_join apps");
3489 ret
= pthread_join(reg_apps_thread
, &status
);
3492 PERROR("pthread_join");
3498 * Join dispatch thread after joining reg_apps_thread to ensure
3499 * we don't leak applications in the queue.
3501 ret
= pthread_join(dispatch_thread
, &status
);
3504 PERROR("pthread_join");
3511 lttng_thread_list_shutdown_orphans();
3514 if (client_thread
) {
3515 lttng_thread_put(client_thread
);
3519 * Wait for all pending call_rcu work to complete before tearing
3520 * down data structures. call_rcu worker may be trying to
3521 * perform lookups in those structures.
3525 * sessiond_cleanup() is called when no other thread is running, except
3526 * the ht_cleanup thread, which is needed to destroy the hash tables.
3528 rcu_thread_online();
3532 * Ensure all prior call_rcu are done. call_rcu callbacks may push
3533 * hash tables to the ht_cleanup thread. Therefore, we ensure that
3534 * the queue is empty before shutting down the clean-up thread.
3538 if (ht_cleanup_thread
) {
3539 lttng_thread_shutdown(ht_cleanup_thread
);
3540 lttng_thread_put(ht_cleanup_thread
);
3543 rcu_thread_offline();
3544 rcu_unregister_thread();
3546 if (rotation_thread_handle
) {
3547 rotation_thread_handle_destroy(rotation_thread_handle
);
3551 * After the rotation and timer thread have quit, we can safely destroy
3552 * the rotation_timer_queue.
3554 rotation_thread_timer_queue_destroy(rotation_timer_queue
);
3556 * The teardown of the notification system is performed after the
3557 * session daemon's teardown in order to allow it to be notified
3558 * of the active session and channels at the moment of the teardown.
3560 if (notification_thread_handle
) {
3561 notification_thread_handle_destroy(notification_thread_handle
);
3563 lttng_pipe_destroy(ust32_channel_monitor_pipe
);
3564 lttng_pipe_destroy(ust64_channel_monitor_pipe
);
3565 lttng_pipe_destroy(kernel_channel_monitor_pipe
);
3568 health_app_destroy(health_sessiond
);
3569 exit_health_sessiond_cleanup
:
3570 exit_create_run_as_worker_cleanup
:
3573 sessiond_cleanup_lock_file();
3574 sessiond_cleanup_options();
3576 exit_set_signal_handler
: