2 * Copyright (C) 2011 - David Goulet <david.goulet@polymtl.ca>
3 * Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
4 * 2013 - Jérémie Galarneau <jeremie.galarneau@efficios.com>
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License, version 2 only,
8 * as published by the Free Software Foundation.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License along
16 * with this program; if not, write to the Free Software Foundation, Inc.,
17 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
32 #include <sys/mount.h>
33 #include <sys/resource.h>
34 #include <sys/socket.h>
36 #include <sys/types.h>
38 #include <urcu/uatomic.h>
42 #include <common/common.h>
43 #include <common/compat/socket.h>
44 #include <common/compat/getenv.h>
45 #include <common/defaults.h>
46 #include <common/kernel-consumer/kernel-consumer.h>
47 #include <common/futex.h>
48 #include <common/relayd/relayd.h>
49 #include <common/utils.h>
50 #include <common/daemonize.h>
51 #include <common/config/session-config.h>
52 #include <common/dynamic-buffer.h>
53 #include <lttng/event-internal.h>
55 #include "lttng-sessiond.h"
56 #include "buffer-registry.h"
63 #include "kernel-consumer.h"
67 #include "ust-consumer.h"
70 #include "health-sessiond.h"
71 #include "testpoint.h"
72 #include "ust-thread.h"
73 #include "agent-thread.h"
75 #include "load-session-thread.h"
76 #include "notification-thread.h"
77 #include "notification-thread-commands.h"
78 #include "rotation-thread.h"
79 #include "lttng-syscall.h"
81 #include "ht-cleanup.h"
82 #include "sessiond-config.h"
89 static const char *help_msg
=
90 #ifdef LTTNG_EMBED_HELP
91 #include <lttng-sessiond.8.h>
98 static int lockfile_fd
= -1;
100 /* Set to 1 when a SIGUSR1 signal is received. */
101 static int recv_child_signal
;
103 /* Command line options */
104 static const struct option long_options
[] = {
105 { "client-sock", required_argument
, 0, 'c' },
106 { "apps-sock", required_argument
, 0, 'a' },
107 { "kconsumerd-cmd-sock", required_argument
, 0, '\0' },
108 { "kconsumerd-err-sock", required_argument
, 0, '\0' },
109 { "ustconsumerd32-cmd-sock", required_argument
, 0, '\0' },
110 { "ustconsumerd32-err-sock", required_argument
, 0, '\0' },
111 { "ustconsumerd64-cmd-sock", required_argument
, 0, '\0' },
112 { "ustconsumerd64-err-sock", required_argument
, 0, '\0' },
113 { "consumerd32-path", required_argument
, 0, '\0' },
114 { "consumerd32-libdir", required_argument
, 0, '\0' },
115 { "consumerd64-path", required_argument
, 0, '\0' },
116 { "consumerd64-libdir", required_argument
, 0, '\0' },
117 { "daemonize", no_argument
, 0, 'd' },
118 { "background", no_argument
, 0, 'b' },
119 { "sig-parent", no_argument
, 0, 'S' },
120 { "help", no_argument
, 0, 'h' },
121 { "group", required_argument
, 0, 'g' },
122 { "version", no_argument
, 0, 'V' },
123 { "quiet", no_argument
, 0, 'q' },
124 { "verbose", no_argument
, 0, 'v' },
125 { "verbose-consumer", no_argument
, 0, '\0' },
126 { "no-kernel", no_argument
, 0, '\0' },
127 { "pidfile", required_argument
, 0, 'p' },
128 { "agent-tcp-port", required_argument
, 0, '\0' },
129 { "config", required_argument
, 0, 'f' },
130 { "load", required_argument
, 0, 'l' },
131 { "kmod-probes", required_argument
, 0, '\0' },
132 { "extra-kmod-probes", required_argument
, 0, '\0' },
136 /* Command line options to ignore from configuration file */
137 static const char *config_ignore_options
[] = { "help", "version", "config" };
140 * This pipe is used to inform the thread managing application communication
141 * that a command is queued and ready to be processed.
143 static int apps_cmd_pipe
[2] = { -1, -1 };
145 /* Pthread, Mutexes and Semaphores */
146 static pthread_t apps_thread
;
147 static pthread_t apps_notify_thread
;
148 static pthread_t kernel_thread
;
149 static pthread_t agent_reg_thread
;
150 static pthread_t load_session_thread
;
153 * UST registration command queue. This queue is tied with a futex and uses a N
154 * wakers / 1 waiter implemented and detailed in futex.c/.h
156 * The thread_registration_apps and thread_dispatch_ust_registration uses this
157 * queue along with the wait/wake scheme. The thread_manage_apps receives down
158 * the line new application socket and monitors it for any I/O error or clean
159 * close that triggers an unregistration of the application.
161 static struct ust_cmd_queue ust_cmd_queue
;
163 static const char *module_proc_lttng
= "/proc/lttng";
165 /* Load session thread information to operate. */
166 static struct load_session_thread_data
*load_info
;
169 * Section name to look for in the daemon configuration file.
171 static const char * const config_section_name
= "sessiond";
173 /* Am I root or not. Set to 1 if the daemon is running as root */
177 * Stop all threads by closing the thread quit pipe.
179 static void stop_threads(void)
183 /* Stopping all threads */
184 DBG("Terminating all threads");
185 ret
= sessiond_notify_quit_pipe();
187 ERR("write error on thread quit pipe");
192 * Close every consumer sockets.
194 static void close_consumer_sockets(void)
198 if (kconsumer_data
.err_sock
>= 0) {
199 ret
= close(kconsumer_data
.err_sock
);
201 PERROR("kernel consumer err_sock close");
204 if (ustconsumer32_data
.err_sock
>= 0) {
205 ret
= close(ustconsumer32_data
.err_sock
);
207 PERROR("UST consumerd32 err_sock close");
210 if (ustconsumer64_data
.err_sock
>= 0) {
211 ret
= close(ustconsumer64_data
.err_sock
);
213 PERROR("UST consumerd64 err_sock close");
216 if (kconsumer_data
.cmd_sock
>= 0) {
217 ret
= close(kconsumer_data
.cmd_sock
);
219 PERROR("kernel consumer cmd_sock close");
222 if (ustconsumer32_data
.cmd_sock
>= 0) {
223 ret
= close(ustconsumer32_data
.cmd_sock
);
225 PERROR("UST consumerd32 cmd_sock close");
228 if (ustconsumer64_data
.cmd_sock
>= 0) {
229 ret
= close(ustconsumer64_data
.cmd_sock
);
231 PERROR("UST consumerd64 cmd_sock close");
234 if (kconsumer_data
.channel_monitor_pipe
>= 0) {
235 ret
= close(kconsumer_data
.channel_monitor_pipe
);
237 PERROR("kernel consumer channel monitor pipe close");
240 if (ustconsumer32_data
.channel_monitor_pipe
>= 0) {
241 ret
= close(ustconsumer32_data
.channel_monitor_pipe
);
243 PERROR("UST consumerd32 channel monitor pipe close");
246 if (ustconsumer64_data
.channel_monitor_pipe
>= 0) {
247 ret
= close(ustconsumer64_data
.channel_monitor_pipe
);
249 PERROR("UST consumerd64 channel monitor pipe close");
255 * Wait on consumer process termination.
257 * Need to be called with the consumer data lock held or from a context
258 * ensuring no concurrent access to data (e.g: cleanup).
260 static void wait_consumer(struct consumer_data
*consumer_data
)
265 if (consumer_data
->pid
<= 0) {
269 DBG("Waiting for complete teardown of consumerd (PID: %d)",
271 ret
= waitpid(consumer_data
->pid
, &status
, 0);
273 PERROR("consumerd waitpid pid: %d", consumer_data
->pid
)
274 } else if (!WIFEXITED(status
)) {
275 ERR("consumerd termination with error: %d",
278 consumer_data
->pid
= 0;
282 * Cleanup the session daemon's data structures.
284 static void sessiond_cleanup(void)
287 struct ltt_session_list
*session_list
= session_get_list();
289 DBG("Cleanup sessiond");
292 * Close the thread quit pipe. It has already done its job,
293 * since we are now called.
295 sessiond_close_quit_pipe();
297 ret
= remove(config
.pid_file_path
.value
);
299 PERROR("remove pidfile %s", config
.pid_file_path
.value
);
302 DBG("Removing sessiond and consumerd content of directory %s",
303 config
.rundir
.value
);
306 DBG("Removing %s", config
.pid_file_path
.value
);
307 (void) unlink(config
.pid_file_path
.value
);
309 DBG("Removing %s", config
.agent_port_file_path
.value
);
310 (void) unlink(config
.agent_port_file_path
.value
);
313 DBG("Removing %s", kconsumer_data
.err_unix_sock_path
);
314 (void) unlink(kconsumer_data
.err_unix_sock_path
);
316 DBG("Removing directory %s", config
.kconsumerd_path
.value
);
317 (void) rmdir(config
.kconsumerd_path
.value
);
319 /* ust consumerd 32 */
320 DBG("Removing %s", config
.consumerd32_err_unix_sock_path
.value
);
321 (void) unlink(config
.consumerd32_err_unix_sock_path
.value
);
323 DBG("Removing directory %s", config
.consumerd32_path
.value
);
324 (void) rmdir(config
.consumerd32_path
.value
);
326 /* ust consumerd 64 */
327 DBG("Removing %s", config
.consumerd64_err_unix_sock_path
.value
);
328 (void) unlink(config
.consumerd64_err_unix_sock_path
.value
);
330 DBG("Removing directory %s", config
.consumerd64_path
.value
);
331 (void) rmdir(config
.consumerd64_path
.value
);
333 pthread_mutex_destroy(&session_list
->lock
);
335 wait_consumer(&kconsumer_data
);
336 wait_consumer(&ustconsumer64_data
);
337 wait_consumer(&ustconsumer32_data
);
339 DBG("Cleaning up all agent apps");
340 agent_app_ht_clean();
342 DBG("Closing all UST sockets");
343 ust_app_clean_list();
344 buffer_reg_destroy_registries();
346 if (is_root
&& !config
.no_kernel
) {
347 DBG2("Closing kernel fd");
348 if (kernel_tracer_fd
>= 0) {
349 ret
= close(kernel_tracer_fd
);
354 DBG("Unloading kernel modules");
355 modprobe_remove_lttng_all();
359 close_consumer_sockets();
362 load_session_destroy_data(load_info
);
367 * We do NOT rmdir rundir because there are other processes
368 * using it, for instance lttng-relayd, which can start in
369 * parallel with this teardown.
374 * Cleanup the daemon's option data structures.
376 static void sessiond_cleanup_options(void)
378 DBG("Cleaning up options");
380 sessiond_config_fini(&config
);
382 run_as_destroy_worker();
386 * Update the kernel poll set of all channel fd available over all tracing
387 * session. Add the wakeup pipe at the end of the set.
389 static int update_kernel_poll(struct lttng_poll_event
*events
)
392 struct ltt_kernel_channel
*channel
;
393 struct ltt_session
*session
;
394 const struct ltt_session_list
*session_list
= session_get_list();
396 DBG("Updating kernel poll set");
399 cds_list_for_each_entry(session
, &session_list
->head
, list
) {
400 if (!session_get(session
)) {
403 session_lock(session
);
404 if (session
->kernel_session
== NULL
) {
405 session_unlock(session
);
406 session_put(session
);
410 cds_list_for_each_entry(channel
,
411 &session
->kernel_session
->channel_list
.head
, list
) {
412 /* Add channel fd to the kernel poll set */
413 ret
= lttng_poll_add(events
, channel
->fd
, LPOLLIN
| LPOLLRDNORM
);
415 session_unlock(session
);
416 session_put(session
);
419 DBG("Channel fd %d added to kernel set", channel
->fd
);
421 session_unlock(session
);
423 session_unlock_list();
428 session_unlock_list();
433 * Find the channel fd from 'fd' over all tracing session. When found, check
434 * for new channel stream and send those stream fds to the kernel consumer.
436 * Useful for CPU hotplug feature.
438 static int update_kernel_stream(int fd
)
441 struct ltt_session
*session
;
442 struct ltt_kernel_session
*ksess
;
443 struct ltt_kernel_channel
*channel
;
444 const struct ltt_session_list
*session_list
= session_get_list();
446 DBG("Updating kernel streams for channel fd %d", fd
);
449 cds_list_for_each_entry(session
, &session_list
->head
, list
) {
450 if (!session_get(session
)) {
453 session_lock(session
);
454 if (session
->kernel_session
== NULL
) {
455 session_unlock(session
);
456 session_put(session
);
459 ksess
= session
->kernel_session
;
461 cds_list_for_each_entry(channel
,
462 &ksess
->channel_list
.head
, list
) {
463 struct lttng_ht_iter iter
;
464 struct consumer_socket
*socket
;
466 if (channel
->fd
!= fd
) {
469 DBG("Channel found, updating kernel streams");
470 ret
= kernel_open_channel_stream(channel
);
474 /* Update the stream global counter */
475 ksess
->stream_count_global
+= ret
;
478 * Have we already sent fds to the consumer? If yes, it
479 * means that tracing is started so it is safe to send
480 * our updated stream fds.
482 if (ksess
->consumer_fds_sent
!= 1
483 || ksess
->consumer
== NULL
) {
489 cds_lfht_for_each_entry(ksess
->consumer
->socks
->ht
,
490 &iter
.iter
, socket
, node
.node
) {
491 pthread_mutex_lock(socket
->lock
);
492 ret
= kernel_consumer_send_channel_streams(socket
,
494 session
->output_traces
? 1 : 0);
495 pthread_mutex_unlock(socket
->lock
);
503 session_unlock(session
);
504 session_put(session
);
506 session_unlock_list();
510 session_unlock(session
);
511 session_put(session
);
512 session_unlock_list();
517 * This thread manage event coming from the kernel.
519 * Features supported in this thread:
522 static void *thread_manage_kernel(void *data
)
524 int ret
, i
, pollfd
, update_poll_flag
= 1, err
= -1;
525 uint32_t revents
, nb_fd
;
527 struct lttng_poll_event events
;
529 DBG("[thread] Thread manage kernel started");
531 health_register(health_sessiond
, HEALTH_SESSIOND_TYPE_KERNEL
);
534 * This first step of the while is to clean this structure which could free
535 * non NULL pointers so initialize it before the loop.
537 lttng_poll_init(&events
);
539 if (testpoint(sessiond_thread_manage_kernel
)) {
540 goto error_testpoint
;
543 health_code_update();
545 if (testpoint(sessiond_thread_manage_kernel_before_loop
)) {
546 goto error_testpoint
;
550 health_code_update();
552 if (update_poll_flag
== 1) {
553 /* Clean events object. We are about to populate it again. */
554 lttng_poll_clean(&events
);
556 ret
= sessiond_set_thread_pollset(&events
, 2);
558 goto error_poll_create
;
561 ret
= lttng_poll_add(&events
, kernel_poll_pipe
[0], LPOLLIN
);
566 /* This will add the available kernel channel if any. */
567 ret
= update_kernel_poll(&events
);
571 update_poll_flag
= 0;
574 DBG("Thread kernel polling");
576 /* Poll infinite value of time */
579 ret
= lttng_poll_wait(&events
, -1);
580 DBG("Thread kernel return from poll on %d fds",
581 LTTNG_POLL_GETNB(&events
));
585 * Restart interrupted system call.
587 if (errno
== EINTR
) {
591 } else if (ret
== 0) {
592 /* Should not happen since timeout is infinite */
593 ERR("Return value of poll is 0 with an infinite timeout.\n"
594 "This should not have happened! Continuing...");
600 for (i
= 0; i
< nb_fd
; i
++) {
601 /* Fetch once the poll data */
602 revents
= LTTNG_POLL_GETEV(&events
, i
);
603 pollfd
= LTTNG_POLL_GETFD(&events
, i
);
605 health_code_update();
608 /* No activity for this FD (poll implementation). */
612 /* Thread quit pipe has been closed. Killing thread. */
613 ret
= sessiond_check_thread_quit_pipe(pollfd
, revents
);
619 /* Check for data on kernel pipe */
620 if (revents
& LPOLLIN
) {
621 if (pollfd
== kernel_poll_pipe
[0]) {
622 (void) lttng_read(kernel_poll_pipe
[0],
625 * Ret value is useless here, if this pipe gets any actions an
626 * update is required anyway.
628 update_poll_flag
= 1;
632 * New CPU detected by the kernel. Adding kernel stream to
633 * kernel session and updating the kernel consumer
635 ret
= update_kernel_stream(pollfd
);
641 } else if (revents
& (LPOLLERR
| LPOLLHUP
| LPOLLRDHUP
)) {
642 update_poll_flag
= 1;
645 ERR("Unexpected poll events %u for sock %d", revents
, pollfd
);
653 lttng_poll_clean(&events
);
656 utils_close_pipe(kernel_poll_pipe
);
657 kernel_poll_pipe
[0] = kernel_poll_pipe
[1] = -1;
660 ERR("Health error occurred in %s", __func__
);
661 WARN("Kernel thread died unexpectedly. "
662 "Kernel tracing can continue but CPU hotplug is disabled.");
664 health_unregister(health_sessiond
);
665 DBG("Kernel thread dying");
670 * Signal pthread condition of the consumer data that the thread.
672 static void signal_consumer_condition(struct consumer_data
*data
, int state
)
674 pthread_mutex_lock(&data
->cond_mutex
);
677 * The state is set before signaling. It can be any value, it's the waiter
678 * job to correctly interpret this condition variable associated to the
679 * consumer pthread_cond.
681 * A value of 0 means that the corresponding thread of the consumer data
682 * was not started. 1 indicates that the thread has started and is ready
683 * for action. A negative value means that there was an error during the
686 data
->consumer_thread_is_ready
= state
;
687 (void) pthread_cond_signal(&data
->cond
);
689 pthread_mutex_unlock(&data
->cond_mutex
);
693 * This thread manage the consumer error sent back to the session daemon.
695 void *thread_manage_consumer(void *data
)
697 int sock
= -1, i
, ret
, pollfd
, err
= -1, should_quit
= 0;
698 uint32_t revents
, nb_fd
;
699 enum lttcomm_return_code code
;
700 struct lttng_poll_event events
;
701 struct consumer_data
*consumer_data
= data
;
702 struct consumer_socket
*cmd_socket_wrapper
= NULL
;
704 DBG("[thread] Manage consumer started");
706 rcu_register_thread();
709 health_register(health_sessiond
, HEALTH_SESSIOND_TYPE_CONSUMER
);
711 health_code_update();
714 * Pass 3 as size here for the thread quit pipe, consumerd_err_sock and the
715 * metadata_sock. Nothing more will be added to this poll set.
717 ret
= sessiond_set_thread_pollset(&events
, 3);
723 * The error socket here is already in a listening state which was done
724 * just before spawning this thread to avoid a race between the consumer
725 * daemon exec trying to connect and the listen() call.
727 ret
= lttng_poll_add(&events
, consumer_data
->err_sock
, LPOLLIN
| LPOLLRDHUP
);
732 health_code_update();
734 /* Infinite blocking call, waiting for transmission */
738 if (testpoint(sessiond_thread_manage_consumer
)) {
742 ret
= lttng_poll_wait(&events
, -1);
746 * Restart interrupted system call.
748 if (errno
== EINTR
) {
756 for (i
= 0; i
< nb_fd
; i
++) {
757 /* Fetch once the poll data */
758 revents
= LTTNG_POLL_GETEV(&events
, i
);
759 pollfd
= LTTNG_POLL_GETFD(&events
, i
);
761 health_code_update();
764 /* No activity for this FD (poll implementation). */
768 /* Thread quit pipe has been closed. Killing thread. */
769 ret
= sessiond_check_thread_quit_pipe(pollfd
, revents
);
775 /* Event on the registration socket */
776 if (pollfd
== consumer_data
->err_sock
) {
777 if (revents
& LPOLLIN
) {
779 } else if (revents
& (LPOLLERR
| LPOLLHUP
| LPOLLRDHUP
)) {
780 ERR("consumer err socket poll error");
783 ERR("Unexpected poll events %u for sock %d", revents
, pollfd
);
789 sock
= lttcomm_accept_unix_sock(consumer_data
->err_sock
);
795 * Set the CLOEXEC flag. Return code is useless because either way, the
798 (void) utils_set_fd_cloexec(sock
);
800 health_code_update();
802 DBG2("Receiving code from consumer err_sock");
804 /* Getting status code from kconsumerd */
805 ret
= lttcomm_recv_unix_sock(sock
, &code
,
806 sizeof(enum lttcomm_return_code
));
811 health_code_update();
812 if (code
!= LTTCOMM_CONSUMERD_COMMAND_SOCK_READY
) {
813 ERR("consumer error when waiting for SOCK_READY : %s",
814 lttcomm_get_readable_code(-code
));
818 /* Connect both command and metadata sockets. */
819 consumer_data
->cmd_sock
=
820 lttcomm_connect_unix_sock(
821 consumer_data
->cmd_unix_sock_path
);
822 consumer_data
->metadata_fd
=
823 lttcomm_connect_unix_sock(
824 consumer_data
->cmd_unix_sock_path
);
825 if (consumer_data
->cmd_sock
< 0 || consumer_data
->metadata_fd
< 0) {
826 PERROR("consumer connect cmd socket");
827 /* On error, signal condition and quit. */
828 signal_consumer_condition(consumer_data
, -1);
832 consumer_data
->metadata_sock
.fd_ptr
= &consumer_data
->metadata_fd
;
834 /* Create metadata socket lock. */
835 consumer_data
->metadata_sock
.lock
= zmalloc(sizeof(pthread_mutex_t
));
836 if (consumer_data
->metadata_sock
.lock
== NULL
) {
837 PERROR("zmalloc pthread mutex");
840 pthread_mutex_init(consumer_data
->metadata_sock
.lock
, NULL
);
842 DBG("Consumer command socket ready (fd: %d", consumer_data
->cmd_sock
);
843 DBG("Consumer metadata socket ready (fd: %d)",
844 consumer_data
->metadata_fd
);
847 * Remove the consumerd error sock since we've established a connection.
849 ret
= lttng_poll_del(&events
, consumer_data
->err_sock
);
854 /* Add new accepted error socket. */
855 ret
= lttng_poll_add(&events
, sock
, LPOLLIN
| LPOLLRDHUP
);
860 /* Add metadata socket that is successfully connected. */
861 ret
= lttng_poll_add(&events
, consumer_data
->metadata_fd
,
862 LPOLLIN
| LPOLLRDHUP
);
867 health_code_update();
870 * Transfer the write-end of the channel monitoring and rotate pipe
871 * to the consumer by issuing a SET_CHANNEL_MONITOR_PIPE command.
873 cmd_socket_wrapper
= consumer_allocate_socket(&consumer_data
->cmd_sock
);
874 if (!cmd_socket_wrapper
) {
877 cmd_socket_wrapper
->lock
= &consumer_data
->lock
;
879 ret
= consumer_send_channel_monitor_pipe(cmd_socket_wrapper
,
880 consumer_data
->channel_monitor_pipe
);
885 /* Discard the socket wrapper as it is no longer needed. */
886 consumer_destroy_socket(cmd_socket_wrapper
);
887 cmd_socket_wrapper
= NULL
;
889 /* The thread is completely initialized, signal that it is ready. */
890 signal_consumer_condition(consumer_data
, 1);
892 /* Infinite blocking call, waiting for transmission */
895 health_code_update();
897 /* Exit the thread because the thread quit pipe has been triggered. */
899 /* Not a health error. */
905 ret
= lttng_poll_wait(&events
, -1);
909 * Restart interrupted system call.
911 if (errno
== EINTR
) {
919 for (i
= 0; i
< nb_fd
; i
++) {
920 /* Fetch once the poll data */
921 revents
= LTTNG_POLL_GETEV(&events
, i
);
922 pollfd
= LTTNG_POLL_GETFD(&events
, i
);
924 health_code_update();
927 /* No activity for this FD (poll implementation). */
932 * Thread quit pipe has been triggered, flag that we should stop
933 * but continue the current loop to handle potential data from
936 should_quit
= sessiond_check_thread_quit_pipe(pollfd
, revents
);
938 if (pollfd
== sock
) {
939 /* Event on the consumerd socket */
940 if (revents
& (LPOLLERR
| LPOLLHUP
| LPOLLRDHUP
)
941 && !(revents
& LPOLLIN
)) {
942 ERR("consumer err socket second poll error");
945 health_code_update();
946 /* Wait for any kconsumerd error */
947 ret
= lttcomm_recv_unix_sock(sock
, &code
,
948 sizeof(enum lttcomm_return_code
));
950 ERR("consumer closed the command socket");
954 ERR("consumer return code : %s",
955 lttcomm_get_readable_code(-code
));
958 } else if (pollfd
== consumer_data
->metadata_fd
) {
959 if (revents
& (LPOLLERR
| LPOLLHUP
| LPOLLRDHUP
)
960 && !(revents
& LPOLLIN
)) {
961 ERR("consumer err metadata socket second poll error");
964 /* UST metadata requests */
965 ret
= ust_consumer_metadata_request(
966 &consumer_data
->metadata_sock
);
968 ERR("Handling metadata request");
972 /* No need for an else branch all FDs are tested prior. */
974 health_code_update();
980 * We lock here because we are about to close the sockets and some other
981 * thread might be using them so get exclusive access which will abort all
982 * other consumer command by other threads.
984 pthread_mutex_lock(&consumer_data
->lock
);
986 /* Immediately set the consumerd state to stopped */
987 if (consumer_data
->type
== LTTNG_CONSUMER_KERNEL
) {
988 uatomic_set(&kernel_consumerd_state
, CONSUMER_ERROR
);
989 } else if (consumer_data
->type
== LTTNG_CONSUMER64_UST
||
990 consumer_data
->type
== LTTNG_CONSUMER32_UST
) {
991 uatomic_set(&ust_consumerd_state
, CONSUMER_ERROR
);
993 /* Code flow error... */
997 if (consumer_data
->err_sock
>= 0) {
998 ret
= close(consumer_data
->err_sock
);
1002 consumer_data
->err_sock
= -1;
1004 if (consumer_data
->cmd_sock
>= 0) {
1005 ret
= close(consumer_data
->cmd_sock
);
1009 consumer_data
->cmd_sock
= -1;
1011 if (consumer_data
->metadata_sock
.fd_ptr
&&
1012 *consumer_data
->metadata_sock
.fd_ptr
>= 0) {
1013 ret
= close(*consumer_data
->metadata_sock
.fd_ptr
);
1025 unlink(consumer_data
->err_unix_sock_path
);
1026 unlink(consumer_data
->cmd_unix_sock_path
);
1027 pthread_mutex_unlock(&consumer_data
->lock
);
1029 /* Cleanup metadata socket mutex. */
1030 if (consumer_data
->metadata_sock
.lock
) {
1031 pthread_mutex_destroy(consumer_data
->metadata_sock
.lock
);
1032 free(consumer_data
->metadata_sock
.lock
);
1034 lttng_poll_clean(&events
);
1036 if (cmd_socket_wrapper
) {
1037 consumer_destroy_socket(cmd_socket_wrapper
);
1042 ERR("Health error occurred in %s", __func__
);
1044 health_unregister(health_sessiond
);
1045 DBG("consumer thread cleanup completed");
1047 rcu_thread_offline();
1048 rcu_unregister_thread();
1054 * This thread receives application command sockets (FDs) on the
1055 * apps_cmd_pipe and waits (polls) on them until they are closed
1056 * or an error occurs.
1058 * At that point, it flushes the data (tracing and metadata) associated
1059 * with this application and tears down ust app sessions and other
1060 * associated data structures through ust_app_unregister().
1062 * Note that this thread never sends commands to the applications
1063 * through the command sockets; it merely listens for hang-ups
1064 * and errors on those sockets and cleans-up as they occur.
1066 static void *thread_manage_apps(void *data
)
1068 int i
, ret
, pollfd
, err
= -1;
1070 uint32_t revents
, nb_fd
;
1071 struct lttng_poll_event events
;
1073 DBG("[thread] Manage application started");
1075 rcu_register_thread();
1076 rcu_thread_online();
1078 health_register(health_sessiond
, HEALTH_SESSIOND_TYPE_APP_MANAGE
);
1080 if (testpoint(sessiond_thread_manage_apps
)) {
1081 goto error_testpoint
;
1084 health_code_update();
1086 ret
= sessiond_set_thread_pollset(&events
, 2);
1088 goto error_poll_create
;
1091 ret
= lttng_poll_add(&events
, apps_cmd_pipe
[0], LPOLLIN
| LPOLLRDHUP
);
1096 if (testpoint(sessiond_thread_manage_apps_before_loop
)) {
1100 health_code_update();
1103 DBG("Apps thread polling");
1105 /* Inifinite blocking call, waiting for transmission */
1107 health_poll_entry();
1108 ret
= lttng_poll_wait(&events
, -1);
1109 DBG("Apps thread return from poll on %d fds",
1110 LTTNG_POLL_GETNB(&events
));
1114 * Restart interrupted system call.
1116 if (errno
== EINTR
) {
1124 for (i
= 0; i
< nb_fd
; i
++) {
1125 /* Fetch once the poll data */
1126 revents
= LTTNG_POLL_GETEV(&events
, i
);
1127 pollfd
= LTTNG_POLL_GETFD(&events
, i
);
1129 health_code_update();
1132 /* No activity for this FD (poll implementation). */
1136 /* Thread quit pipe has been closed. Killing thread. */
1137 ret
= sessiond_check_thread_quit_pipe(pollfd
, revents
);
1143 /* Inspect the apps cmd pipe */
1144 if (pollfd
== apps_cmd_pipe
[0]) {
1145 if (revents
& LPOLLIN
) {
1149 size_ret
= lttng_read(apps_cmd_pipe
[0], &sock
, sizeof(sock
));
1150 if (size_ret
< sizeof(sock
)) {
1151 PERROR("read apps cmd pipe");
1155 health_code_update();
1158 * Since this is a command socket (write then read),
1159 * we only monitor the error events of the socket.
1161 ret
= lttng_poll_add(&events
, sock
,
1162 LPOLLERR
| LPOLLHUP
| LPOLLRDHUP
);
1167 DBG("Apps with sock %d added to poll set", sock
);
1168 } else if (revents
& (LPOLLERR
| LPOLLHUP
| LPOLLRDHUP
)) {
1169 ERR("Apps command pipe error");
1172 ERR("Unknown poll events %u for sock %d", revents
, pollfd
);
1177 * At this point, we know that a registered application made
1178 * the event at poll_wait.
1180 if (revents
& (LPOLLERR
| LPOLLHUP
| LPOLLRDHUP
)) {
1181 /* Removing from the poll set */
1182 ret
= lttng_poll_del(&events
, pollfd
);
1187 /* Socket closed on remote end. */
1188 ust_app_unregister(pollfd
);
1190 ERR("Unexpected poll events %u for sock %d", revents
, pollfd
);
1195 health_code_update();
1201 lttng_poll_clean(&events
);
1204 utils_close_pipe(apps_cmd_pipe
);
1205 apps_cmd_pipe
[0] = apps_cmd_pipe
[1] = -1;
1208 * We don't clean the UST app hash table here since already registered
1209 * applications can still be controlled so let them be until the session
1210 * daemon dies or the applications stop.
1215 ERR("Health error occurred in %s", __func__
);
1217 health_unregister(health_sessiond
);
1218 DBG("Application communication apps thread cleanup complete");
1219 rcu_thread_offline();
1220 rcu_unregister_thread();
1225 * Setup necessary data for kernel tracer action.
1227 static int init_kernel_tracer(void)
1231 /* Modprobe lttng kernel modules */
1232 ret
= modprobe_lttng_control();
1237 /* Open debugfs lttng */
1238 kernel_tracer_fd
= open(module_proc_lttng
, O_RDWR
);
1239 if (kernel_tracer_fd
< 0) {
1240 DBG("Failed to open %s", module_proc_lttng
);
1244 /* Validate kernel version */
1245 ret
= kernel_validate_version(kernel_tracer_fd
, &kernel_tracer_version
,
1246 &kernel_tracer_abi_version
);
1251 ret
= modprobe_lttng_data();
1256 ret
= kernel_supports_ring_buffer_snapshot_sample_positions(
1263 WARN("Kernel tracer does not support buffer monitoring. "
1264 "The monitoring timer of channels in the kernel domain "
1265 "will be set to 0 (disabled).");
1268 DBG("Kernel tracer fd %d", kernel_tracer_fd
);
1272 modprobe_remove_lttng_control();
1273 ret
= close(kernel_tracer_fd
);
1277 kernel_tracer_fd
= -1;
1278 return LTTNG_ERR_KERN_VERSION
;
1281 ret
= close(kernel_tracer_fd
);
1287 modprobe_remove_lttng_control();
1290 WARN("No kernel tracer available");
1291 kernel_tracer_fd
= -1;
1293 return LTTNG_ERR_NEED_ROOT_SESSIOND
;
1295 return LTTNG_ERR_KERN_NA
;
1299 static int string_match(const char *str1
, const char *str2
)
1301 return (str1
&& str2
) && !strcmp(str1
, str2
);
1305 * Take an option from the getopt output and set it in the right variable to be
1308 * Return 0 on success else a negative value.
1310 static int set_option(int opt
, const char *arg
, const char *optname
)
1314 if (string_match(optname
, "client-sock") || opt
== 'c') {
1315 if (!arg
|| *arg
== '\0') {
1319 if (lttng_is_setuid_setgid()) {
1320 WARN("Getting '%s' argument from setuid/setgid binary refused for security reasons.",
1321 "-c, --client-sock");
1323 config_string_set(&config
.client_unix_sock_path
,
1325 if (!config
.client_unix_sock_path
.value
) {
1330 } else if (string_match(optname
, "apps-sock") || opt
== 'a') {
1331 if (!arg
|| *arg
== '\0') {
1335 if (lttng_is_setuid_setgid()) {
1336 WARN("Getting '%s' argument from setuid/setgid binary refused for security reasons.",
1339 config_string_set(&config
.apps_unix_sock_path
,
1341 if (!config
.apps_unix_sock_path
.value
) {
1346 } else if (string_match(optname
, "daemonize") || opt
== 'd') {
1347 config
.daemonize
= true;
1348 } else if (string_match(optname
, "background") || opt
== 'b') {
1349 config
.background
= true;
1350 } else if (string_match(optname
, "group") || opt
== 'g') {
1351 if (!arg
|| *arg
== '\0') {
1355 if (lttng_is_setuid_setgid()) {
1356 WARN("Getting '%s' argument from setuid/setgid binary refused for security reasons.",
1359 config_string_set(&config
.tracing_group_name
,
1361 if (!config
.tracing_group_name
.value
) {
1366 } else if (string_match(optname
, "help") || opt
== 'h') {
1367 ret
= utils_show_help(8, "lttng-sessiond", help_msg
);
1369 ERR("Cannot show --help for `lttng-sessiond`");
1372 exit(ret
? EXIT_FAILURE
: EXIT_SUCCESS
);
1373 } else if (string_match(optname
, "version") || opt
== 'V') {
1374 fprintf(stdout
, "%s\n", VERSION
);
1376 } else if (string_match(optname
, "sig-parent") || opt
== 'S') {
1377 config
.sig_parent
= true;
1378 } else if (string_match(optname
, "kconsumerd-err-sock")) {
1379 if (!arg
|| *arg
== '\0') {
1383 if (lttng_is_setuid_setgid()) {
1384 WARN("Getting '%s' argument from setuid/setgid binary refused for security reasons.",
1385 "--kconsumerd-err-sock");
1387 config_string_set(&config
.kconsumerd_err_unix_sock_path
,
1389 if (!config
.kconsumerd_err_unix_sock_path
.value
) {
1394 } else if (string_match(optname
, "kconsumerd-cmd-sock")) {
1395 if (!arg
|| *arg
== '\0') {
1399 if (lttng_is_setuid_setgid()) {
1400 WARN("Getting '%s' argument from setuid/setgid binary refused for security reasons.",
1401 "--kconsumerd-cmd-sock");
1403 config_string_set(&config
.kconsumerd_cmd_unix_sock_path
,
1405 if (!config
.kconsumerd_cmd_unix_sock_path
.value
) {
1410 } else if (string_match(optname
, "ustconsumerd64-err-sock")) {
1411 if (!arg
|| *arg
== '\0') {
1415 if (lttng_is_setuid_setgid()) {
1416 WARN("Getting '%s' argument from setuid/setgid binary refused for security reasons.",
1417 "--ustconsumerd64-err-sock");
1419 config_string_set(&config
.consumerd64_err_unix_sock_path
,
1421 if (!config
.consumerd64_err_unix_sock_path
.value
) {
1426 } else if (string_match(optname
, "ustconsumerd64-cmd-sock")) {
1427 if (!arg
|| *arg
== '\0') {
1431 if (lttng_is_setuid_setgid()) {
1432 WARN("Getting '%s' argument from setuid/setgid binary refused for security reasons.",
1433 "--ustconsumerd64-cmd-sock");
1435 config_string_set(&config
.consumerd64_cmd_unix_sock_path
,
1437 if (!config
.consumerd64_cmd_unix_sock_path
.value
) {
1442 } else if (string_match(optname
, "ustconsumerd32-err-sock")) {
1443 if (!arg
|| *arg
== '\0') {
1447 if (lttng_is_setuid_setgid()) {
1448 WARN("Getting '%s' argument from setuid/setgid binary refused for security reasons.",
1449 "--ustconsumerd32-err-sock");
1451 config_string_set(&config
.consumerd32_err_unix_sock_path
,
1453 if (!config
.consumerd32_err_unix_sock_path
.value
) {
1458 } else if (string_match(optname
, "ustconsumerd32-cmd-sock")) {
1459 if (!arg
|| *arg
== '\0') {
1463 if (lttng_is_setuid_setgid()) {
1464 WARN("Getting '%s' argument from setuid/setgid binary refused for security reasons.",
1465 "--ustconsumerd32-cmd-sock");
1467 config_string_set(&config
.consumerd32_cmd_unix_sock_path
,
1469 if (!config
.consumerd32_cmd_unix_sock_path
.value
) {
1474 } else if (string_match(optname
, "no-kernel")) {
1475 config
.no_kernel
= true;
1476 } else if (string_match(optname
, "quiet") || opt
== 'q') {
1477 config
.quiet
= true;
1478 } else if (string_match(optname
, "verbose") || opt
== 'v') {
1479 /* Verbose level can increase using multiple -v */
1481 /* Value obtained from config file */
1482 config
.verbose
= config_parse_value(arg
);
1484 /* -v used on command line */
1487 /* Clamp value to [0, 3] */
1488 config
.verbose
= config
.verbose
< 0 ? 0 :
1489 (config
.verbose
<= 3 ? config
.verbose
: 3);
1490 } else if (string_match(optname
, "verbose-consumer")) {
1492 config
.verbose_consumer
= config_parse_value(arg
);
1494 config
.verbose_consumer
++;
1496 } else if (string_match(optname
, "consumerd32-path")) {
1497 if (!arg
|| *arg
== '\0') {
1501 if (lttng_is_setuid_setgid()) {
1502 WARN("Getting '%s' argument from setuid/setgid binary refused for security reasons.",
1503 "--consumerd32-path");
1505 config_string_set(&config
.consumerd32_bin_path
,
1507 if (!config
.consumerd32_bin_path
.value
) {
1512 } else if (string_match(optname
, "consumerd32-libdir")) {
1513 if (!arg
|| *arg
== '\0') {
1517 if (lttng_is_setuid_setgid()) {
1518 WARN("Getting '%s' argument from setuid/setgid binary refused for security reasons.",
1519 "--consumerd32-libdir");
1521 config_string_set(&config
.consumerd32_lib_dir
,
1523 if (!config
.consumerd32_lib_dir
.value
) {
1528 } else if (string_match(optname
, "consumerd64-path")) {
1529 if (!arg
|| *arg
== '\0') {
1533 if (lttng_is_setuid_setgid()) {
1534 WARN("Getting '%s' argument from setuid/setgid binary refused for security reasons.",
1535 "--consumerd64-path");
1537 config_string_set(&config
.consumerd64_bin_path
,
1539 if (!config
.consumerd64_bin_path
.value
) {
1544 } else if (string_match(optname
, "consumerd64-libdir")) {
1545 if (!arg
|| *arg
== '\0') {
1549 if (lttng_is_setuid_setgid()) {
1550 WARN("Getting '%s' argument from setuid/setgid binary refused for security reasons.",
1551 "--consumerd64-libdir");
1553 config_string_set(&config
.consumerd64_lib_dir
,
1555 if (!config
.consumerd64_lib_dir
.value
) {
1560 } else if (string_match(optname
, "pidfile") || opt
== 'p') {
1561 if (!arg
|| *arg
== '\0') {
1565 if (lttng_is_setuid_setgid()) {
1566 WARN("Getting '%s' argument from setuid/setgid binary refused for security reasons.",
1569 config_string_set(&config
.pid_file_path
, strdup(arg
));
1570 if (!config
.pid_file_path
.value
) {
1575 } else if (string_match(optname
, "agent-tcp-port")) {
1576 if (!arg
|| *arg
== '\0') {
1580 if (lttng_is_setuid_setgid()) {
1581 WARN("Getting '%s' argument from setuid/setgid binary refused for security reasons.",
1582 "--agent-tcp-port");
1587 v
= strtoul(arg
, NULL
, 0);
1588 if (errno
!= 0 || !isdigit(arg
[0])) {
1589 ERR("Wrong value in --agent-tcp-port parameter: %s", arg
);
1592 if (v
== 0 || v
>= 65535) {
1593 ERR("Port overflow in --agent-tcp-port parameter: %s", arg
);
1596 config
.agent_tcp_port
.begin
= config
.agent_tcp_port
.end
= (int) v
;
1597 DBG3("Agent TCP port set to non default: %i", (int) v
);
1599 } else if (string_match(optname
, "load") || opt
== 'l') {
1600 if (!arg
|| *arg
== '\0') {
1604 if (lttng_is_setuid_setgid()) {
1605 WARN("Getting '%s' argument from setuid/setgid binary refused for security reasons.",
1608 config_string_set(&config
.load_session_path
, strdup(arg
));
1609 if (!config
.load_session_path
.value
) {
1614 } else if (string_match(optname
, "kmod-probes")) {
1615 if (!arg
|| *arg
== '\0') {
1619 if (lttng_is_setuid_setgid()) {
1620 WARN("Getting '%s' argument from setuid/setgid binary refused for security reasons.",
1623 config_string_set(&config
.kmod_probes_list
, strdup(arg
));
1624 if (!config
.kmod_probes_list
.value
) {
1629 } else if (string_match(optname
, "extra-kmod-probes")) {
1630 if (!arg
|| *arg
== '\0') {
1634 if (lttng_is_setuid_setgid()) {
1635 WARN("Getting '%s' argument from setuid/setgid binary refused for security reasons.",
1636 "--extra-kmod-probes");
1638 config_string_set(&config
.kmod_extra_probes_list
,
1640 if (!config
.kmod_extra_probes_list
.value
) {
1645 } else if (string_match(optname
, "config") || opt
== 'f') {
1646 /* This is handled in set_options() thus silent skip. */
1649 /* Unknown option or other error.
1650 * Error is printed by getopt, just return */
1655 if (ret
== -EINVAL
) {
1656 const char *opt_name
= "unknown";
1659 for (i
= 0; i
< sizeof(long_options
) / sizeof(struct option
);
1661 if (opt
== long_options
[i
].val
) {
1662 opt_name
= long_options
[i
].name
;
1667 WARN("Invalid argument provided for option \"%s\", using default value.",
1675 * config_entry_handler_cb used to handle options read from a config file.
1676 * See config_entry_handler_cb comment in common/config/session-config.h for the
1677 * return value conventions.
1679 static int config_entry_handler(const struct config_entry
*entry
, void *unused
)
1683 if (!entry
|| !entry
->name
|| !entry
->value
) {
1688 /* Check if the option is to be ignored */
1689 for (i
= 0; i
< sizeof(config_ignore_options
) / sizeof(char *); i
++) {
1690 if (!strcmp(entry
->name
, config_ignore_options
[i
])) {
1695 for (i
= 0; i
< (sizeof(long_options
) / sizeof(struct option
)) - 1;
1698 /* Ignore if not fully matched. */
1699 if (strcmp(entry
->name
, long_options
[i
].name
)) {
1704 * If the option takes no argument on the command line, we have to
1705 * check if the value is "true". We support non-zero numeric values,
1708 if (!long_options
[i
].has_arg
) {
1709 ret
= config_parse_value(entry
->value
);
1712 WARN("Invalid configuration value \"%s\" for option %s",
1713 entry
->value
, entry
->name
);
1715 /* False, skip boolean config option. */
1720 ret
= set_option(long_options
[i
].val
, entry
->value
, entry
->name
);
1724 WARN("Unrecognized option \"%s\" in daemon configuration file.", entry
->name
);
1731 * daemon configuration loading and argument parsing
1733 static int set_options(int argc
, char **argv
)
1735 int ret
= 0, c
= 0, option_index
= 0;
1736 int orig_optopt
= optopt
, orig_optind
= optind
;
1738 const char *config_path
= NULL
;
1740 optstring
= utils_generate_optstring(long_options
,
1741 sizeof(long_options
) / sizeof(struct option
));
1747 /* Check for the --config option */
1748 while ((c
= getopt_long(argc
, argv
, optstring
, long_options
,
1749 &option_index
)) != -1) {
1753 } else if (c
!= 'f') {
1754 /* if not equal to --config option. */
1758 if (lttng_is_setuid_setgid()) {
1759 WARN("Getting '%s' argument from setuid/setgid binary refused for security reasons.",
1762 config_path
= utils_expand_path(optarg
);
1764 ERR("Failed to resolve path: %s", optarg
);
1769 ret
= config_get_section_entries(config_path
, config_section_name
,
1770 config_entry_handler
, NULL
);
1773 ERR("Invalid configuration option at line %i", ret
);
1779 /* Reset getopt's global state */
1780 optopt
= orig_optopt
;
1781 optind
= orig_optind
;
1785 * getopt_long() will not set option_index if it encounters a
1788 c
= getopt_long(argc
, argv
, optstring
, long_options
,
1795 * Pass NULL as the long option name if popt left the index
1798 ret
= set_option(c
, optarg
,
1799 option_index
< 0 ? NULL
:
1800 long_options
[option_index
].name
);
1812 * Create lockfile using the rundir and return its fd.
1814 static int create_lockfile(void)
1816 return utils_create_lock_file(config
.lock_file_path
.value
);
1820 * Check if the global socket is available, and if a daemon is answering at the
1821 * other side. If yes, error is returned.
1823 * Also attempts to create and hold the lock file.
1825 static int check_existing_daemon(void)
1829 /* Is there anybody out there ? */
1830 if (lttng_session_daemon_alive()) {
1835 lockfile_fd
= create_lockfile();
1836 if (lockfile_fd
< 0) {
1844 static void sessiond_cleanup_lock_file(void)
1849 * Cleanup lock file by deleting it and finaly closing it which will
1850 * release the file system lock.
1852 if (lockfile_fd
>= 0) {
1853 ret
= remove(config
.lock_file_path
.value
);
1855 PERROR("remove lock file");
1857 ret
= close(lockfile_fd
);
1859 PERROR("close lock file");
1865 * Set the tracing group gid onto the client socket.
1867 * Race window between mkdir and chown is OK because we are going from more
1868 * permissive (root.root) to less permissive (root.tracing).
1870 static int set_permissions(char *rundir
)
1875 gid
= utils_get_group_id(config
.tracing_group_name
.value
);
1877 /* Set lttng run dir */
1878 ret
= chown(rundir
, 0, gid
);
1880 ERR("Unable to set group on %s", rundir
);
1885 * Ensure all applications and tracing group can search the run
1886 * dir. Allow everyone to read the directory, since it does not
1887 * buy us anything to hide its content.
1889 ret
= chmod(rundir
, S_IRWXU
| S_IRGRP
| S_IXGRP
| S_IROTH
| S_IXOTH
);
1891 ERR("Unable to set permissions on %s", rundir
);
1895 /* lttng client socket path */
1896 ret
= chown(config
.client_unix_sock_path
.value
, 0, gid
);
1898 ERR("Unable to set group on %s", config
.client_unix_sock_path
.value
);
1902 /* kconsumer error socket path */
1903 ret
= chown(kconsumer_data
.err_unix_sock_path
, 0, 0);
1905 ERR("Unable to set group on %s", kconsumer_data
.err_unix_sock_path
);
1909 /* 64-bit ustconsumer error socket path */
1910 ret
= chown(ustconsumer64_data
.err_unix_sock_path
, 0, 0);
1912 ERR("Unable to set group on %s", ustconsumer64_data
.err_unix_sock_path
);
1916 /* 32-bit ustconsumer compat32 error socket path */
1917 ret
= chown(ustconsumer32_data
.err_unix_sock_path
, 0, 0);
1919 ERR("Unable to set group on %s", ustconsumer32_data
.err_unix_sock_path
);
1923 DBG("All permissions are set");
1929 * Create the lttng run directory needed for all global sockets and pipe.
1931 static int create_lttng_rundir(void)
1935 DBG3("Creating LTTng run directory: %s", config
.rundir
.value
);
1937 ret
= mkdir(config
.rundir
.value
, S_IRWXU
);
1939 if (errno
!= EEXIST
) {
1940 ERR("Unable to create %s", config
.rundir
.value
);
1952 * Setup sockets and directory needed by the consumerds' communication with the
1955 static int set_consumer_sockets(struct consumer_data
*consumer_data
)
1960 switch (consumer_data
->type
) {
1961 case LTTNG_CONSUMER_KERNEL
:
1962 path
= config
.kconsumerd_path
.value
;
1964 case LTTNG_CONSUMER64_UST
:
1965 path
= config
.consumerd64_path
.value
;
1967 case LTTNG_CONSUMER32_UST
:
1968 path
= config
.consumerd32_path
.value
;
1971 ERR("Consumer type unknown");
1977 DBG2("Creating consumer directory: %s", path
);
1979 ret
= mkdir(path
, S_IRWXU
| S_IRGRP
| S_IXGRP
);
1980 if (ret
< 0 && errno
!= EEXIST
) {
1982 ERR("Failed to create %s", path
);
1986 ret
= chown(path
, 0, utils_get_group_id(config
.tracing_group_name
.value
));
1988 ERR("Unable to set group on %s", path
);
1994 /* Create the consumerd error unix socket */
1995 consumer_data
->err_sock
=
1996 lttcomm_create_unix_sock(consumer_data
->err_unix_sock_path
);
1997 if (consumer_data
->err_sock
< 0) {
1998 ERR("Create unix sock failed: %s", consumer_data
->err_unix_sock_path
);
2004 * Set the CLOEXEC flag. Return code is useless because either way, the
2007 ret
= utils_set_fd_cloexec(consumer_data
->err_sock
);
2009 PERROR("utils_set_fd_cloexec");
2010 /* continue anyway */
2013 /* File permission MUST be 660 */
2014 ret
= chmod(consumer_data
->err_unix_sock_path
,
2015 S_IRUSR
| S_IWUSR
| S_IRGRP
| S_IWGRP
);
2017 ERR("Set file permissions failed: %s", consumer_data
->err_unix_sock_path
);
2027 * Signal handler for the daemon
2029 * Simply stop all worker threads, leaving main() return gracefully after
2030 * joining all threads and calling cleanup().
2032 static void sighandler(int sig
)
2036 DBG("SIGINT caught");
2040 DBG("SIGTERM caught");
2044 CMM_STORE_SHARED(recv_child_signal
, 1);
2052 * Setup signal handler for :
2053 * SIGINT, SIGTERM, SIGPIPE
2055 static int set_signal_handler(void)
2058 struct sigaction sa
;
2061 if ((ret
= sigemptyset(&sigset
)) < 0) {
2062 PERROR("sigemptyset");
2066 sa
.sa_mask
= sigset
;
2069 sa
.sa_handler
= sighandler
;
2070 if ((ret
= sigaction(SIGTERM
, &sa
, NULL
)) < 0) {
2071 PERROR("sigaction");
2075 if ((ret
= sigaction(SIGINT
, &sa
, NULL
)) < 0) {
2076 PERROR("sigaction");
2080 if ((ret
= sigaction(SIGUSR1
, &sa
, NULL
)) < 0) {
2081 PERROR("sigaction");
2085 sa
.sa_handler
= SIG_IGN
;
2086 if ((ret
= sigaction(SIGPIPE
, &sa
, NULL
)) < 0) {
2087 PERROR("sigaction");
2091 DBG("Signal handler set for SIGTERM, SIGUSR1, SIGPIPE and SIGINT");
2097 * Set open files limit to unlimited. This daemon can open a large number of
2098 * file descriptors in order to consume multiple kernel traces.
2100 static void set_ulimit(void)
2105 /* The kernel does not allow an infinite limit for open files */
2106 lim
.rlim_cur
= 65535;
2107 lim
.rlim_max
= 65535;
2109 ret
= setrlimit(RLIMIT_NOFILE
, &lim
);
2111 PERROR("failed to set open files limit");
2115 static int write_pidfile(void)
2117 return utils_create_pid_file(getpid(), config
.pid_file_path
.value
);
2120 static int set_clock_plugin_env(void)
2123 char *env_value
= NULL
;
2125 if (!config
.lttng_ust_clock_plugin
.value
) {
2129 ret
= asprintf(&env_value
, "LTTNG_UST_CLOCK_PLUGIN=%s",
2130 config
.lttng_ust_clock_plugin
.value
);
2136 ret
= putenv(env_value
);
2139 PERROR("putenv of LTTNG_UST_CLOCK_PLUGIN");
2143 DBG("Updated LTTNG_UST_CLOCK_PLUGIN environment variable to \"%s\"",
2144 config
.lttng_ust_clock_plugin
.value
);
2149 static void destroy_all_sessions_and_wait(void)
2151 struct ltt_session
*session
, *tmp
;
2152 struct ltt_session_list
*session_list
;
2154 session_list
= session_get_list();
2155 DBG("Initiating destruction of all sessions");
2157 if (!session_list
) {
2161 session_lock_list();
2162 /* Initiate the destruction of all sessions. */
2163 cds_list_for_each_entry_safe(session
, tmp
,
2164 &session_list
->head
, list
) {
2165 if (!session_get(session
)) {
2169 session_lock(session
);
2170 if (session
->destroyed
) {
2171 goto unlock_session
;
2173 (void) cmd_destroy_session(session
,
2174 notification_thread_handle
);
2176 session_unlock(session
);
2177 session_put(session
);
2179 session_unlock_list();
2181 /* Wait for the destruction of all sessions to complete. */
2182 DBG("Waiting for the destruction of all sessions to complete");
2183 session_list_wait_empty();
2184 DBG("Destruction of all sessions completed");
2190 int main(int argc
, char **argv
)
2192 int ret
= 0, retval
= 0;
2194 const char *env_app_timeout
;
2195 struct lttng_pipe
*ust32_channel_monitor_pipe
= NULL
,
2196 *ust64_channel_monitor_pipe
= NULL
,
2197 *kernel_channel_monitor_pipe
= NULL
;
2198 struct lttng_thread
*ht_cleanup_thread
= NULL
;
2199 struct timer_thread_parameters timer_thread_parameters
;
2200 /* Rotation thread handle. */
2201 struct rotation_thread_handle
*rotation_thread_handle
= NULL
;
2202 /* Queue of rotation jobs populated by the sessiond-timer. */
2203 struct rotation_thread_timer_queue
*rotation_timer_queue
= NULL
;
2204 struct lttng_thread
*client_thread
= NULL
;
2206 init_kernel_workarounds();
2208 rcu_register_thread();
2210 if (set_signal_handler()) {
2212 goto exit_set_signal_handler
;
2215 if (timer_signal_init()) {
2217 goto exit_set_signal_handler
;
2220 page_size
= sysconf(_SC_PAGESIZE
);
2221 if (page_size
< 0) {
2222 PERROR("sysconf _SC_PAGESIZE");
2223 page_size
= LONG_MAX
;
2224 WARN("Fallback page size to %ld", page_size
);
2227 ret
= sessiond_config_init(&config
);
2230 goto exit_set_signal_handler
;
2234 * Init config from environment variables.
2235 * Command line option override env configuration per-doc. Do env first.
2237 sessiond_config_apply_env_config(&config
);
2240 * Parse arguments and load the daemon configuration file.
2242 * We have an exit_options exit path to free memory reserved by
2243 * set_options. This is needed because the rest of sessiond_cleanup()
2244 * depends on ht_cleanup_thread, which depends on lttng_daemonize, which
2245 * depends on set_options.
2248 if (set_options(argc
, argv
)) {
2254 * Resolve all paths received as arguments, configuration option, or
2255 * through environment variable as absolute paths. This is necessary
2256 * since daemonizing causes the sessiond's current working directory
2259 ret
= sessiond_config_resolve_paths(&config
);
2265 lttng_opt_verbose
= config
.verbose
;
2266 lttng_opt_quiet
= config
.quiet
;
2267 kconsumer_data
.err_unix_sock_path
=
2268 config
.kconsumerd_err_unix_sock_path
.value
;
2269 kconsumer_data
.cmd_unix_sock_path
=
2270 config
.kconsumerd_cmd_unix_sock_path
.value
;
2271 ustconsumer32_data
.err_unix_sock_path
=
2272 config
.consumerd32_err_unix_sock_path
.value
;
2273 ustconsumer32_data
.cmd_unix_sock_path
=
2274 config
.consumerd32_cmd_unix_sock_path
.value
;
2275 ustconsumer64_data
.err_unix_sock_path
=
2276 config
.consumerd64_err_unix_sock_path
.value
;
2277 ustconsumer64_data
.cmd_unix_sock_path
=
2278 config
.consumerd64_cmd_unix_sock_path
.value
;
2279 set_clock_plugin_env();
2281 sessiond_config_log(&config
);
2283 if (create_lttng_rundir()) {
2288 /* Abort launch if a session daemon is already running. */
2289 if (check_existing_daemon()) {
2290 ERR("A session daemon is already running.");
2296 if (config
.daemonize
|| config
.background
) {
2299 ret
= lttng_daemonize(&child_ppid
, &recv_child_signal
,
2300 !config
.background
);
2307 * We are in the child. Make sure all other file descriptors are
2308 * closed, in case we are called with more opened file
2309 * descriptors than the standard ones and the lock file.
2311 for (i
= 3; i
< sysconf(_SC_OPEN_MAX
); i
++) {
2312 if (i
== lockfile_fd
) {
2319 if (run_as_create_worker(argv
[0]) < 0) {
2320 goto exit_create_run_as_worker_cleanup
;
2324 * Starting from here, we can create threads. This needs to be after
2325 * lttng_daemonize due to RCU.
2329 * Initialize the health check subsystem. This call should set the
2330 * appropriate time values.
2332 health_sessiond
= health_app_create(NR_HEALTH_SESSIOND_TYPES
);
2333 if (!health_sessiond
) {
2334 PERROR("health_app_create error");
2336 goto exit_health_sessiond_cleanup
;
2339 /* Create thread to clean up RCU hash tables */
2340 ht_cleanup_thread
= launch_ht_cleanup_thread();
2341 if (!ht_cleanup_thread
) {
2343 goto exit_ht_cleanup
;
2346 /* Create thread quit pipe */
2347 if (sessiond_init_thread_quit_pipe()) {
2349 goto exit_init_data
;
2352 /* Check if daemon is UID = 0 */
2353 is_root
= !getuid();
2355 /* Create global run dir with root access */
2357 kernel_channel_monitor_pipe
= lttng_pipe_open(0);
2358 if (!kernel_channel_monitor_pipe
) {
2359 ERR("Failed to create kernel consumer channel monitor pipe");
2361 goto exit_init_data
;
2363 kconsumer_data
.channel_monitor_pipe
=
2364 lttng_pipe_release_writefd(
2365 kernel_channel_monitor_pipe
);
2366 if (kconsumer_data
.channel_monitor_pipe
< 0) {
2368 goto exit_init_data
;
2372 /* Set consumer initial state */
2373 kernel_consumerd_state
= CONSUMER_STOPPED
;
2374 ust_consumerd_state
= CONSUMER_STOPPED
;
2376 ust32_channel_monitor_pipe
= lttng_pipe_open(0);
2377 if (!ust32_channel_monitor_pipe
) {
2378 ERR("Failed to create 32-bit user space consumer channel monitor pipe");
2380 goto exit_init_data
;
2382 ustconsumer32_data
.channel_monitor_pipe
= lttng_pipe_release_writefd(
2383 ust32_channel_monitor_pipe
);
2384 if (ustconsumer32_data
.channel_monitor_pipe
< 0) {
2386 goto exit_init_data
;
2390 * The rotation_thread_timer_queue structure is shared between the
2391 * sessiond timer thread and the rotation thread. The main thread keeps
2392 * its ownership and destroys it when both threads have been joined.
2394 rotation_timer_queue
= rotation_thread_timer_queue_create();
2395 if (!rotation_timer_queue
) {
2397 goto exit_init_data
;
2399 timer_thread_parameters
.rotation_thread_job_queue
=
2400 rotation_timer_queue
;
2402 ust64_channel_monitor_pipe
= lttng_pipe_open(0);
2403 if (!ust64_channel_monitor_pipe
) {
2404 ERR("Failed to create 64-bit user space consumer channel monitor pipe");
2406 goto exit_init_data
;
2408 ustconsumer64_data
.channel_monitor_pipe
= lttng_pipe_release_writefd(
2409 ust64_channel_monitor_pipe
);
2410 if (ustconsumer64_data
.channel_monitor_pipe
< 0) {
2412 goto exit_init_data
;
2416 * Init UST app hash table. Alloc hash table before this point since
2417 * cleanup() can get called after that point.
2419 if (ust_app_ht_alloc()) {
2420 ERR("Failed to allocate UST app hash table");
2422 goto exit_init_data
;
2426 * Initialize agent app hash table. We allocate the hash table here
2427 * since cleanup() can get called after this point.
2429 if (agent_app_ht_alloc()) {
2430 ERR("Failed to allocate Agent app hash table");
2432 goto exit_init_data
;
2436 * These actions must be executed as root. We do that *after* setting up
2437 * the sockets path because we MUST make the check for another daemon using
2438 * those paths *before* trying to set the kernel consumer sockets and init
2442 if (set_consumer_sockets(&kconsumer_data
)) {
2444 goto exit_init_data
;
2447 /* Setup kernel tracer */
2448 if (!config
.no_kernel
) {
2449 init_kernel_tracer();
2450 if (kernel_tracer_fd
>= 0) {
2451 ret
= syscall_init_table();
2453 ERR("Unable to populate syscall table. "
2454 "Syscall tracing won't work "
2455 "for this session daemon.");
2460 /* Set ulimit for open files */
2463 /* init lttng_fd tracking must be done after set_ulimit. */
2466 if (set_consumer_sockets(&ustconsumer64_data
)) {
2468 goto exit_init_data
;
2471 if (set_consumer_sockets(&ustconsumer32_data
)) {
2473 goto exit_init_data
;
2476 /* Set credentials to socket */
2477 if (is_root
&& set_permissions(config
.rundir
.value
)) {
2479 goto exit_init_data
;
2482 /* Get parent pid if -S, --sig-parent is specified. */
2483 if (config
.sig_parent
) {
2487 /* Setup the kernel pipe for waking up the kernel thread */
2488 if (is_root
&& !config
.no_kernel
) {
2489 if (utils_create_pipe_cloexec(kernel_poll_pipe
)) {
2491 goto exit_init_data
;
2495 /* Setup the thread apps communication pipe. */
2496 if (utils_create_pipe_cloexec(apps_cmd_pipe
)) {
2498 goto exit_init_data
;
2501 /* Setup the thread apps notify communication pipe. */
2502 if (utils_create_pipe_cloexec(apps_cmd_notify_pipe
)) {
2504 goto exit_init_data
;
2507 /* Initialize global buffer per UID and PID registry. */
2508 buffer_reg_init_uid_registry();
2509 buffer_reg_init_pid_registry();
2511 /* Init UST command queue. */
2512 cds_wfcq_init(&ust_cmd_queue
.head
, &ust_cmd_queue
.tail
);
2516 /* Check for the application socket timeout env variable. */
2517 env_app_timeout
= getenv(DEFAULT_APP_SOCKET_TIMEOUT_ENV
);
2518 if (env_app_timeout
) {
2519 config
.app_socket_timeout
= atoi(env_app_timeout
);
2521 config
.app_socket_timeout
= DEFAULT_APP_SOCKET_RW_TIMEOUT
;
2524 ret
= write_pidfile();
2526 ERR("Error in write_pidfile");
2528 goto exit_init_data
;
2531 /* Initialize communication library */
2533 /* Initialize TCP timeout values */
2534 lttcomm_inet_init();
2536 if (load_session_init_data(&load_info
) < 0) {
2538 goto exit_init_data
;
2540 load_info
->path
= config
.load_session_path
.value
;
2542 /* Create health-check thread. */
2543 if (!launch_health_management_thread()) {
2548 /* notification_thread_data acquires the pipes' read side. */
2549 notification_thread_handle
= notification_thread_handle_create(
2550 ust32_channel_monitor_pipe
,
2551 ust64_channel_monitor_pipe
,
2552 kernel_channel_monitor_pipe
);
2553 if (!notification_thread_handle
) {
2555 ERR("Failed to create notification thread shared data");
2556 goto exit_notification
;
2559 /* Create notification thread. */
2560 if (!launch_notification_thread(notification_thread_handle
)) {
2562 goto exit_notification
;
2565 /* Create timer thread. */
2566 if (!launch_timer_thread(&timer_thread_parameters
)) {
2568 goto exit_notification
;
2571 /* rotation_thread_data acquires the pipes' read side. */
2572 rotation_thread_handle
= rotation_thread_handle_create(
2573 rotation_timer_queue
,
2574 notification_thread_handle
);
2575 if (!rotation_thread_handle
) {
2577 ERR("Failed to create rotation thread shared data");
2582 /* Create rotation thread. */
2583 if (!launch_rotation_thread(rotation_thread_handle
)) {
2588 /* Create thread to manage the client socket */
2589 client_thread
= launch_client_thread();
2590 if (!client_thread
) {
2595 if (!launch_ust_dispatch_thread(&ust_cmd_queue
, apps_cmd_pipe
[1],
2596 apps_cmd_notify_pipe
[1])) {
2601 /* Create thread to manage application registration. */
2602 if (!launch_application_registration_thread(&ust_cmd_queue
)) {
2607 /* Create thread to manage application socket */
2608 ret
= pthread_create(&apps_thread
, default_pthread_attr(),
2609 thread_manage_apps
, (void *) NULL
);
2612 PERROR("pthread_create apps");
2618 /* Create thread to manage application notify socket */
2619 ret
= pthread_create(&apps_notify_thread
, default_pthread_attr(),
2620 ust_thread_manage_notify
, (void *) NULL
);
2623 PERROR("pthread_create notify");
2626 goto exit_apps_notify
;
2629 /* Create agent registration thread. */
2630 ret
= pthread_create(&agent_reg_thread
, default_pthread_attr(),
2631 agent_thread_manage_registration
, (void *) NULL
);
2634 PERROR("pthread_create agent");
2637 goto exit_agent_reg
;
2640 /* Don't start this thread if kernel tracing is not requested nor root */
2641 if (is_root
&& !config
.no_kernel
) {
2642 /* Create kernel thread to manage kernel event */
2643 ret
= pthread_create(&kernel_thread
, default_pthread_attr(),
2644 thread_manage_kernel
, (void *) NULL
);
2647 PERROR("pthread_create kernel");
2654 /* Create session loading thread. */
2655 ret
= pthread_create(&load_session_thread
, default_pthread_attr(),
2656 thread_load_session
, load_info
);
2659 PERROR("pthread_create load_session_thread");
2662 goto exit_load_session
;
2666 * This is where we start awaiting program completion (e.g. through
2667 * signal that asks threads to teardown).
2670 ret
= pthread_join(load_session_thread
, &status
);
2673 PERROR("pthread_join load_session_thread");
2677 /* Initiate teardown once activity occurs on the quit pipe. */
2678 sessiond_wait_for_quit_pipe(-1U);
2681 * Ensure that the client thread is no longer accepting new commands,
2682 * which could cause new sessions to be created.
2684 if (!lttng_thread_shutdown(client_thread
)) {
2685 ERR("Failed to shutdown the client thread, continuing teardown");
2686 lttng_thread_put(client_thread
);
2687 client_thread
= NULL
;
2690 destroy_all_sessions_and_wait();
2693 if (is_root
&& !config
.no_kernel
) {
2694 ret
= pthread_join(kernel_thread
, &status
);
2697 PERROR("pthread_join");
2703 ret
= pthread_join(agent_reg_thread
, &status
);
2706 PERROR("pthread_join agent");
2711 ret
= pthread_join(apps_notify_thread
, &status
);
2714 PERROR("pthread_join apps notify");
2719 ret
= pthread_join(apps_thread
, &status
);
2722 PERROR("pthread_join apps");
2731 lttng_thread_list_shutdown_orphans();
2734 if (client_thread
) {
2735 lttng_thread_put(client_thread
);
2739 * Wait for all pending call_rcu work to complete before tearing
2740 * down data structures. call_rcu worker may be trying to
2741 * perform lookups in those structures.
2745 * sessiond_cleanup() is called when no other thread is running, except
2746 * the ht_cleanup thread, which is needed to destroy the hash tables.
2748 rcu_thread_online();
2752 * Ensure all prior call_rcu are done. call_rcu callbacks may push
2753 * hash tables to the ht_cleanup thread. Therefore, we ensure that
2754 * the queue is empty before shutting down the clean-up thread.
2758 if (ht_cleanup_thread
) {
2759 lttng_thread_shutdown(ht_cleanup_thread
);
2760 lttng_thread_put(ht_cleanup_thread
);
2763 rcu_thread_offline();
2764 rcu_unregister_thread();
2766 if (rotation_thread_handle
) {
2767 rotation_thread_handle_destroy(rotation_thread_handle
);
2771 * After the rotation and timer thread have quit, we can safely destroy
2772 * the rotation_timer_queue.
2774 rotation_thread_timer_queue_destroy(rotation_timer_queue
);
2776 * The teardown of the notification system is performed after the
2777 * session daemon's teardown in order to allow it to be notified
2778 * of the active session and channels at the moment of the teardown.
2780 if (notification_thread_handle
) {
2781 notification_thread_handle_destroy(notification_thread_handle
);
2783 lttng_pipe_destroy(ust32_channel_monitor_pipe
);
2784 lttng_pipe_destroy(ust64_channel_monitor_pipe
);
2785 lttng_pipe_destroy(kernel_channel_monitor_pipe
);
2788 health_app_destroy(health_sessiond
);
2789 exit_health_sessiond_cleanup
:
2790 exit_create_run_as_worker_cleanup
:
2793 sessiond_cleanup_lock_file();
2794 sessiond_cleanup_options();
2796 exit_set_signal_handler
: