2 * Copyright (C) 2017 Julien Desfossez <jdesfossez@efficios.com>
3 * Copyright (C) 2018 Jérémie Galarneau <jeremie.galarneau@efficios.com>
5 * SPDX-License-Identifier: GPL-2.0-only
10 #include <lttng/trigger/trigger.h>
11 #include <common/error.h>
12 #include <common/config/session-config.h>
13 #include <common/defaults.h>
14 #include <common/utils.h>
15 #include <common/futex.h>
16 #include <common/align.h>
17 #include <common/time.h>
18 #include <common/hashtable/utils.h>
19 #include <sys/eventfd.h>
25 #include <common/kernel-ctl/kernel-ctl.h>
26 #include <lttng/notification/channel-internal.h>
27 #include <lttng/rotate-internal.h>
28 #include <lttng/location-internal.h>
29 #include <lttng/condition/condition-internal.h>
31 #include "rotation-thread.h"
32 #include "lttng-sessiond.h"
33 #include "health-sessiond.h"
38 #include "notification-thread-commands.h"
43 #include <urcu/list.h>
45 struct lttng_notification_channel
*rotate_notification_channel
= NULL
;
47 struct rotation_thread
{
48 struct lttng_poll_event events
;
51 struct rotation_thread_job
{
52 enum rotation_thread_job_type type
;
53 struct ltt_session
*session
;
54 /* List member in struct rotation_thread_timer_queue. */
55 struct cds_list_head head
;
59 * The timer thread enqueues jobs and wakes up the rotation thread.
60 * When the rotation thread wakes up, it empties the queue.
62 struct rotation_thread_timer_queue
{
63 struct lttng_pipe
*event_pipe
;
64 struct cds_list_head list
;
68 struct rotation_thread_handle
{
69 struct rotation_thread_timer_queue
*rotation_timer_queue
;
70 /* Access to the notification thread cmd_queue */
71 struct notification_thread_handle
*notification_thread_handle
;
72 /* Thread-specific quit pipe. */
73 struct lttng_pipe
*quit_pipe
;
77 const char *get_job_type_str(enum rotation_thread_job_type job_type
)
80 case ROTATION_THREAD_JOB_TYPE_CHECK_PENDING_ROTATION
:
81 return "CHECK_PENDING_ROTATION";
82 case ROTATION_THREAD_JOB_TYPE_SCHEDULED_ROTATION
:
83 return "SCHEDULED_ROTATION";
89 struct rotation_thread_timer_queue
*rotation_thread_timer_queue_create(void)
91 struct rotation_thread_timer_queue
*queue
= NULL
;
93 queue
= zmalloc(sizeof(*queue
));
95 PERROR("Failed to allocate timer rotate queue");
99 queue
->event_pipe
= lttng_pipe_open(FD_CLOEXEC
| O_NONBLOCK
);
100 CDS_INIT_LIST_HEAD(&queue
->list
);
101 pthread_mutex_init(&queue
->lock
, NULL
);
106 void rotation_thread_timer_queue_destroy(
107 struct rotation_thread_timer_queue
*queue
)
113 lttng_pipe_destroy(queue
->event_pipe
);
115 pthread_mutex_lock(&queue
->lock
);
116 assert(cds_list_empty(&queue
->list
));
117 pthread_mutex_unlock(&queue
->lock
);
118 pthread_mutex_destroy(&queue
->lock
);
123 * Destroy the thread data previously created by the init function.
125 void rotation_thread_handle_destroy(
126 struct rotation_thread_handle
*handle
)
128 lttng_pipe_destroy(handle
->quit_pipe
);
132 struct rotation_thread_handle
*rotation_thread_handle_create(
133 struct rotation_thread_timer_queue
*rotation_timer_queue
,
134 struct notification_thread_handle
*notification_thread_handle
)
136 struct rotation_thread_handle
*handle
;
138 handle
= zmalloc(sizeof(*handle
));
143 handle
->rotation_timer_queue
= rotation_timer_queue
;
144 handle
->notification_thread_handle
= notification_thread_handle
;
145 handle
->quit_pipe
= lttng_pipe_open(FD_CLOEXEC
);
146 if (!handle
->quit_pipe
) {
153 rotation_thread_handle_destroy(handle
);
158 * Called with the rotation_thread_timer_queue lock held.
159 * Return true if the same timer job already exists in the queue, false if not.
162 bool timer_job_exists(const struct rotation_thread_timer_queue
*queue
,
163 enum rotation_thread_job_type job_type
,
164 struct ltt_session
*session
)
167 struct rotation_thread_job
*job
;
169 cds_list_for_each_entry(job
, &queue
->list
, head
) {
170 if (job
->session
== session
&& job
->type
== job_type
) {
179 void rotation_thread_enqueue_job(struct rotation_thread_timer_queue
*queue
,
180 enum rotation_thread_job_type job_type
,
181 struct ltt_session
*session
)
184 const char dummy
= '!';
185 struct rotation_thread_job
*job
= NULL
;
186 const char *job_type_str
= get_job_type_str(job_type
);
188 pthread_mutex_lock(&queue
->lock
);
189 if (timer_job_exists(queue
, job_type
, session
)) {
191 * This timer job is already pending, we don't need to add
197 job
= zmalloc(sizeof(struct rotation_thread_job
));
199 PERROR("Failed to allocate rotation thread job of type \"%s\" for session \"%s\"",
200 job_type_str
, session
->name
);
203 /* No reason for this to fail as the caller must hold a reference. */
204 (void) session_get(session
);
206 job
->session
= session
;
207 job
->type
= job_type
;
208 cds_list_add_tail(&job
->head
, &queue
->list
);
210 ret
= lttng_write(lttng_pipe_get_writefd(queue
->event_pipe
), &dummy
,
214 * We do not want to block in the timer handler, the job has
215 * been enqueued in the list, the wakeup pipe is probably full,
216 * the job will be processed when the rotation_thread catches
219 if (errno
== EAGAIN
|| errno
== EWOULDBLOCK
) {
221 * Not an error, but would be surprising and indicate
222 * that the rotation thread can't keep up with the
225 DBG("Wake-up pipe of rotation thread job queue is full");
228 PERROR("Failed to wake-up the rotation thread after pushing a job of type \"%s\" for session \"%s\"",
229 job_type_str
, session
->name
);
234 pthread_mutex_unlock(&queue
->lock
);
238 int init_poll_set(struct lttng_poll_event
*poll_set
,
239 struct rotation_thread_handle
*handle
)
244 * Create pollset with size 3:
245 * - rotation thread quit pipe,
246 * - rotation thread timer queue pipe,
247 * - notification channel sock,
249 ret
= lttng_poll_create(poll_set
, 5, LTTNG_CLOEXEC
);
254 ret
= lttng_poll_add(poll_set
,
255 lttng_pipe_get_readfd(handle
->quit_pipe
),
258 ERR("[rotation-thread] Failed to add quit pipe read fd to poll set");
262 ret
= lttng_poll_add(poll_set
,
263 lttng_pipe_get_readfd(handle
->rotation_timer_queue
->event_pipe
),
266 ERR("[rotation-thread] Failed to add rotate_pending fd to poll set");
272 lttng_poll_clean(poll_set
);
277 void fini_thread_state(struct rotation_thread
*state
)
279 lttng_poll_clean(&state
->events
);
280 if (rotate_notification_channel
) {
281 lttng_notification_channel_destroy(rotate_notification_channel
);
286 int init_thread_state(struct rotation_thread_handle
*handle
,
287 struct rotation_thread
*state
)
291 memset(state
, 0, sizeof(*state
));
292 lttng_poll_init(&state
->events
);
294 ret
= init_poll_set(&state
->events
, handle
);
296 ERR("[rotation-thread] Failed to initialize rotation thread poll set");
300 rotate_notification_channel
= lttng_notification_channel_create(
301 lttng_session_daemon_notification_endpoint
);
302 if (!rotate_notification_channel
) {
303 ERR("[rotation-thread] Could not create notification channel");
307 ret
= lttng_poll_add(&state
->events
, rotate_notification_channel
->socket
,
310 ERR("[rotation-thread] Failed to add notification fd to pollset");
319 void check_session_rotation_pending_on_consumers(struct ltt_session
*session
,
320 bool *_rotation_completed
)
323 struct consumer_socket
*socket
;
324 struct cds_lfht_iter iter
;
325 enum consumer_trace_chunk_exists_status exists_status
;
327 bool chunk_exists_on_peer
= false;
328 enum lttng_trace_chunk_status chunk_status
;
330 assert(session
->chunk_being_archived
);
333 * Check for a local pending rotation on all consumers (32-bit
334 * user space, 64-bit user space, and kernel).
337 if (!session
->ust_session
) {
340 cds_lfht_for_each_entry(session
->ust_session
->consumer
->socks
->ht
,
341 &iter
, socket
, node
.node
) {
342 relayd_id
= session
->ust_session
->consumer
->type
== CONSUMER_DST_LOCAL
?
344 session
->ust_session
->consumer
->net_seq_index
;
346 pthread_mutex_lock(socket
->lock
);
347 ret
= consumer_trace_chunk_exists(socket
,
349 session
->id
, session
->chunk_being_archived
,
352 pthread_mutex_unlock(socket
->lock
);
353 ERR("Error occurred while checking rotation status on consumer daemon");
357 if (exists_status
!= CONSUMER_TRACE_CHUNK_EXISTS_STATUS_UNKNOWN_CHUNK
) {
358 pthread_mutex_unlock(socket
->lock
);
359 chunk_exists_on_peer
= true;
362 pthread_mutex_unlock(socket
->lock
);
366 if (!session
->kernel_session
) {
369 cds_lfht_for_each_entry(session
->kernel_session
->consumer
->socks
->ht
,
370 &iter
, socket
, node
.node
) {
371 pthread_mutex_lock(socket
->lock
);
372 relayd_id
= session
->kernel_session
->consumer
->type
== CONSUMER_DST_LOCAL
?
374 session
->kernel_session
->consumer
->net_seq_index
;
376 ret
= consumer_trace_chunk_exists(socket
,
378 session
->id
, session
->chunk_being_archived
,
381 pthread_mutex_unlock(socket
->lock
);
382 ERR("Error occurred while checking rotation status on consumer daemon");
386 if (exists_status
!= CONSUMER_TRACE_CHUNK_EXISTS_STATUS_UNKNOWN_CHUNK
) {
387 pthread_mutex_unlock(socket
->lock
);
388 chunk_exists_on_peer
= true;
391 pthread_mutex_unlock(socket
->lock
);
397 if (!chunk_exists_on_peer
) {
398 uint64_t chunk_being_archived_id
;
400 chunk_status
= lttng_trace_chunk_get_id(
401 session
->chunk_being_archived
,
402 &chunk_being_archived_id
);
403 assert(chunk_status
== LTTNG_TRACE_CHUNK_STATUS_OK
);
404 DBG("[rotation-thread] Rotation of trace archive %" PRIu64
" of session \"%s\" is complete on all consumers",
405 chunk_being_archived_id
,
408 *_rotation_completed
= !chunk_exists_on_peer
;
410 ret
= session_reset_rotation_state(session
,
411 LTTNG_ROTATION_STATE_ERROR
);
413 ERR("Failed to reset rotation state of session \"%s\"",
420 * Check if the last rotation was completed, called with session lock held.
421 * Should only return non-zero in the event of a fatal error. Doing so will
422 * shutdown the thread.
425 int check_session_rotation_pending(struct ltt_session
*session
,
426 struct notification_thread_handle
*notification_thread_handle
)
429 struct lttng_trace_archive_location
*location
;
430 enum lttng_trace_chunk_status chunk_status
;
431 bool rotation_completed
= false;
432 const char *archived_chunk_name
;
433 uint64_t chunk_being_archived_id
;
435 if (!session
->chunk_being_archived
) {
440 chunk_status
= lttng_trace_chunk_get_id(session
->chunk_being_archived
,
441 &chunk_being_archived_id
);
442 assert(chunk_status
== LTTNG_TRACE_CHUNK_STATUS_OK
);
444 DBG("[rotation-thread] Checking for pending rotation on session \"%s\", trace archive %" PRIu64
,
445 session
->name
, chunk_being_archived_id
);
448 * The rotation-pending check timer of a session is launched in
449 * one-shot mode. If the rotation is incomplete, the rotation
450 * thread will re-enable the pending-check timer.
452 * The timer thread can't stop the timer itself since it is involved
453 * in the check for the timer's quiescence.
455 ret
= timer_session_rotation_pending_check_stop(session
);
457 goto check_ongoing_rotation
;
460 check_session_rotation_pending_on_consumers(session
,
461 &rotation_completed
);
462 if (!rotation_completed
||
463 session
->rotation_state
== LTTNG_ROTATION_STATE_ERROR
) {
464 goto check_ongoing_rotation
;
468 * Now we can clear the "ONGOING" state in the session. New
469 * rotations can start now.
471 chunk_status
= lttng_trace_chunk_get_name(session
->chunk_being_archived
,
472 &archived_chunk_name
, NULL
);
473 assert(chunk_status
== LTTNG_TRACE_CHUNK_STATUS_OK
);
474 free(session
->last_archived_chunk_name
);
475 session
->last_archived_chunk_name
= strdup(archived_chunk_name
);
476 if (!session
->last_archived_chunk_name
) {
477 PERROR("Failed to duplicate archived chunk name");
479 session_reset_rotation_state(session
, LTTNG_ROTATION_STATE_COMPLETED
);
481 if (!session
->quiet_rotation
) {
482 location
= session_get_trace_archive_location(session
);
483 ret
= notification_thread_command_session_rotation_completed(
484 notification_thread_handle
,
488 session
->last_archived_chunk_id
.value
,
490 lttng_trace_archive_location_put(location
);
491 if (ret
!= LTTNG_OK
) {
492 ERR("[rotation-thread] Failed to notify notification thread of completed rotation for session %s",
498 check_ongoing_rotation
:
499 if (session
->rotation_state
== LTTNG_ROTATION_STATE_ONGOING
) {
500 uint64_t chunk_being_archived_id
;
502 chunk_status
= lttng_trace_chunk_get_id(
503 session
->chunk_being_archived
,
504 &chunk_being_archived_id
);
505 assert(chunk_status
== LTTNG_TRACE_CHUNK_STATUS_OK
);
507 DBG("[rotation-thread] Rotation of trace archive %" PRIu64
" is still pending for session %s",
508 chunk_being_archived_id
, session
->name
);
509 ret
= timer_session_rotation_pending_check_start(session
,
510 DEFAULT_ROTATE_PENDING_TIMER
);
512 ERR("Failed to re-enable rotation pending timer");
522 /* Call with the session and session_list locks held. */
524 int launch_session_rotation(struct ltt_session
*session
)
527 struct lttng_rotate_session_return rotation_return
;
529 DBG("[rotation-thread] Launching scheduled time-based rotation on session \"%s\"",
532 ret
= cmd_rotate_session(session
, &rotation_return
, false,
533 LTTNG_TRACE_CHUNK_COMMAND_TYPE_MOVE_TO_COMPLETED
);
534 if (ret
== LTTNG_OK
) {
535 DBG("[rotation-thread] Scheduled time-based rotation successfully launched on session \"%s\"",
538 /* Don't consider errors as fatal. */
539 DBG("[rotation-thread] Scheduled time-based rotation aborted for session %s: %s",
540 session
->name
, lttng_strerror(ret
));
546 int run_job(struct rotation_thread_job
*job
, struct ltt_session
*session
,
547 struct notification_thread_handle
*notification_thread_handle
)
552 case ROTATION_THREAD_JOB_TYPE_SCHEDULED_ROTATION
:
553 ret
= launch_session_rotation(session
);
555 case ROTATION_THREAD_JOB_TYPE_CHECK_PENDING_ROTATION
:
556 ret
= check_session_rotation_pending(session
,
557 notification_thread_handle
);
566 int handle_job_queue(struct rotation_thread_handle
*handle
,
567 struct rotation_thread
*state
,
568 struct rotation_thread_timer_queue
*queue
)
573 struct ltt_session
*session
;
574 struct rotation_thread_job
*job
;
576 /* Take the queue lock only to pop an element from the list. */
577 pthread_mutex_lock(&queue
->lock
);
578 if (cds_list_empty(&queue
->list
)) {
579 pthread_mutex_unlock(&queue
->lock
);
582 job
= cds_list_first_entry(&queue
->list
,
584 cds_list_del(&job
->head
);
585 pthread_mutex_unlock(&queue
->lock
);
588 session
= job
->session
;
590 DBG("[rotation-thread] Session \"%s\" not found",
593 * This is a non-fatal error, and we cannot report it to
594 * the user (timer), so just print the error and
595 * continue the processing.
597 * While the timer thread will purge pending signals for
598 * a session on the session's destruction, it is
599 * possible for a job targeting that session to have
600 * already been queued before it was destroyed.
603 session_put(session
);
604 session_unlock_list();
608 session_lock(session
);
609 ret
= run_job(job
, session
, handle
->notification_thread_handle
);
610 session_unlock(session
);
611 /* Release reference held by the job. */
612 session_put(session
);
613 session_unlock_list();
627 int handle_condition(const struct lttng_condition
*condition
,
628 const struct lttng_evaluation
*evaluation
,
629 struct notification_thread_handle
*notification_thread_handle
)
632 const char *condition_session_name
= NULL
;
633 enum lttng_condition_type condition_type
;
634 enum lttng_condition_status condition_status
;
635 enum lttng_evaluation_status evaluation_status
;
637 struct ltt_session
*session
;
639 condition_type
= lttng_condition_get_type(condition
);
641 if (condition_type
!= LTTNG_CONDITION_TYPE_SESSION_CONSUMED_SIZE
) {
643 ERR("[rotation-thread] Condition type and session usage type are not the same");
647 /* Fetch info to test */
648 condition_status
= lttng_condition_session_consumed_size_get_session_name(
649 condition
, &condition_session_name
);
650 if (condition_status
!= LTTNG_CONDITION_STATUS_OK
) {
651 ERR("[rotation-thread] Session name could not be fetched");
655 evaluation_status
= lttng_evaluation_session_consumed_size_get_consumed_size(evaluation
,
657 if (evaluation_status
!= LTTNG_EVALUATION_STATUS_OK
) {
658 ERR("[rotation-thread] Failed to get evaluation");
664 session
= session_find_by_name(condition_session_name
);
666 DBG("[rotation-thread] Failed to find session while handling notification: session name = `%s`",
667 condition_session_name
);
669 * Not a fatal error: a session can be destroyed before we get
670 * the chance to handle the notification.
673 session_unlock_list();
676 session_lock(session
);
678 ret
= unsubscribe_session_consumed_size_rotation(session
,
679 notification_thread_handle
);
684 ret
= cmd_rotate_session(session
, NULL
, false,
685 LTTNG_TRACE_CHUNK_COMMAND_TYPE_MOVE_TO_COMPLETED
);
686 if (ret
== -LTTNG_ERR_ROTATION_PENDING
) {
687 DBG("Rotate already pending, subscribe to the next threshold value");
688 } else if (ret
!= LTTNG_OK
) {
689 ERR("[rotation-thread] Failed to rotate on size notification with error: %s",
690 lttng_strerror(ret
));
694 ret
= subscribe_session_consumed_size_rotation(session
,
695 consumed
+ session
->rotate_size
,
696 notification_thread_handle
);
698 ERR("[rotation-thread] Failed to subscribe to session consumed size condition");
704 session_unlock(session
);
705 session_put(session
);
706 session_unlock_list();
712 int handle_notification_channel(int fd
,
713 struct rotation_thread_handle
*handle
,
714 struct rotation_thread
*state
)
717 bool notification_pending
;
718 struct lttng_notification
*notification
= NULL
;
719 enum lttng_notification_channel_status status
;
720 const struct lttng_evaluation
*notification_evaluation
;
721 const struct lttng_condition
*notification_condition
;
723 status
= lttng_notification_channel_has_pending_notification(
724 rotate_notification_channel
, ¬ification_pending
);
725 if (status
!= LTTNG_NOTIFICATION_CHANNEL_STATUS_OK
) {
726 ERR("[rotation-thread ]Error occurred while checking for pending notification");
731 if (!notification_pending
) {
736 /* Receive the next notification. */
737 status
= lttng_notification_channel_get_next_notification(
738 rotate_notification_channel
,
742 case LTTNG_NOTIFICATION_CHANNEL_STATUS_OK
:
744 case LTTNG_NOTIFICATION_CHANNEL_STATUS_NOTIFICATIONS_DROPPED
:
745 /* Not an error, we will wait for the next one */
748 case LTTNG_NOTIFICATION_CHANNEL_STATUS_CLOSED
:
749 ERR("Notification channel was closed");
753 /* Unhandled conditions / errors. */
754 ERR("Unknown notification channel status");
759 notification_condition
= lttng_notification_get_condition(notification
);
760 notification_evaluation
= lttng_notification_get_evaluation(notification
);
762 ret
= handle_condition(notification_condition
, notification_evaluation
,
763 handle
->notification_thread_handle
);
766 lttng_notification_destroy(notification
);
771 void *thread_rotation(void *data
)
774 struct rotation_thread_handle
*handle
= data
;
775 struct rotation_thread thread
;
778 DBG("[rotation-thread] Started rotation thread");
779 rcu_register_thread();
781 health_register(health_sessiond
, HEALTH_SESSIOND_TYPE_ROTATION
);
782 health_code_update();
785 ERR("[rotation-thread] Invalid thread context provided");
789 queue_pipe_fd
= lttng_pipe_get_readfd(
790 handle
->rotation_timer_queue
->event_pipe
);
793 ret
= init_thread_state(handle
, &thread
);
802 DBG("[rotation-thread] Entering poll wait");
803 ret
= lttng_poll_wait(&thread
.events
, -1);
804 DBG("[rotation-thread] Poll wait returned (%i)", ret
);
808 * Restart interrupted system call.
810 if (errno
== EINTR
) {
813 ERR("[rotation-thread] Error encountered during lttng_poll_wait (%i)", ret
);
818 for (i
= 0; i
< fd_count
; i
++) {
819 int fd
= LTTNG_POLL_GETFD(&thread
.events
, i
);
820 uint32_t revents
= LTTNG_POLL_GETEV(&thread
.events
, i
);
822 DBG("[rotation-thread] Handling fd (%i) activity (%u)",
825 if (revents
& LPOLLERR
) {
826 ERR("[rotation-thread] Polling returned an error on fd %i", fd
);
830 if (fd
== rotate_notification_channel
->socket
) {
831 ret
= handle_notification_channel(fd
, handle
,
834 ERR("[rotation-thread] Error occurred while handling activity on notification channel socket");
838 /* Job queue or quit pipe activity. */
841 * The job queue is serviced if there is
842 * activity on the quit pipe to ensure it is
843 * flushed and all references held in the queue
846 ret
= handle_job_queue(handle
, &thread
,
847 handle
->rotation_timer_queue
);
849 ERR("[rotation-thread] Failed to handle rotation timer pipe event");
853 if (fd
== queue_pipe_fd
) {
856 ret
= lttng_read(fd
, &buf
, 1);
858 ERR("[rotation-thread] Failed to read from wakeup pipe (fd = %i)", fd
);
862 DBG("[rotation-thread] Quit pipe activity");
870 DBG("[rotation-thread] Exit");
871 fini_thread_state(&thread
);
873 health_unregister(health_sessiond
);
874 rcu_thread_offline();
875 rcu_unregister_thread();
880 bool shutdown_rotation_thread(void *thread_data
)
882 struct rotation_thread_handle
*handle
= thread_data
;
883 const int write_fd
= lttng_pipe_get_writefd(handle
->quit_pipe
);
885 return notify_thread_pipe(write_fd
) == 1;
888 bool launch_rotation_thread(struct rotation_thread_handle
*handle
)
890 struct lttng_thread
*thread
;
892 thread
= lttng_thread_create("Rotation",
894 shutdown_rotation_thread
,
900 lttng_thread_put(thread
);