2 * Copyright (C) 2011 Julien Desfossez <julien.desfossez@polymtl.ca>
3 * Copyright (C) 2011 Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
4 * Copyright (C) 2017 Jérémie Galarneau <jeremie.galarneau@efficios.com>
6 * SPDX-License-Identifier: GPL-2.0-only
17 #include <sys/socket.h>
18 #include <sys/types.h>
23 #include <bin/lttng-consumerd/health-consumerd.h>
24 #include <common/common.h>
25 #include <common/kernel-ctl/kernel-ctl.h>
26 #include <common/sessiond-comm/sessiond-comm.h>
27 #include <common/sessiond-comm/relayd.h>
28 #include <common/compat/fcntl.h>
29 #include <common/compat/endian.h>
30 #include <common/pipe.h>
31 #include <common/relayd/relayd.h>
32 #include <common/utils.h>
33 #include <common/consumer/consumer-stream.h>
34 #include <common/index/index.h>
35 #include <common/consumer/consumer-timer.h>
36 #include <common/optional.h>
37 #include <common/buffer-view.h>
38 #include <common/consumer/consumer.h>
41 #include "kernel-consumer.h"
43 extern struct lttng_consumer_global_data consumer_data
;
44 extern int consumer_poll_timeout
;
47 * Take a snapshot for a specific fd
49 * Returns 0 on success, < 0 on error
51 int lttng_kconsumer_take_snapshot(struct lttng_consumer_stream
*stream
)
54 int infd
= stream
->wait_fd
;
56 ret
= kernctl_snapshot(infd
);
58 * -EAGAIN is not an error, it just means that there is no data to
61 if (ret
!= 0 && ret
!= -EAGAIN
) {
62 PERROR("Getting sub-buffer snapshot.");
69 * Sample consumed and produced positions for a specific fd.
71 * Returns 0 on success, < 0 on error.
73 int lttng_kconsumer_sample_snapshot_positions(
74 struct lttng_consumer_stream
*stream
)
78 return kernctl_snapshot_sample_positions(stream
->wait_fd
);
82 * Get the produced position
84 * Returns 0 on success, < 0 on error
86 int lttng_kconsumer_get_produced_snapshot(struct lttng_consumer_stream
*stream
,
90 int infd
= stream
->wait_fd
;
92 ret
= kernctl_snapshot_get_produced(infd
, pos
);
94 PERROR("kernctl_snapshot_get_produced");
101 * Get the consumerd position
103 * Returns 0 on success, < 0 on error
105 int lttng_kconsumer_get_consumed_snapshot(struct lttng_consumer_stream
*stream
,
109 int infd
= stream
->wait_fd
;
111 ret
= kernctl_snapshot_get_consumed(infd
, pos
);
113 PERROR("kernctl_snapshot_get_consumed");
120 int get_current_subbuf_addr(struct lttng_consumer_stream
*stream
,
124 unsigned long mmap_offset
;
125 const char *mmap_base
= stream
->mmap_base
;
127 ret
= kernctl_get_mmap_read_offset(stream
->wait_fd
, &mmap_offset
);
129 PERROR("Failed to get mmap read offset");
133 *addr
= mmap_base
+ mmap_offset
;
139 * Take a snapshot of all the stream of a channel
140 * RCU read-side lock must be held across this function to ensure existence of
141 * channel. The channel lock must be held by the caller.
143 * Returns 0 on success, < 0 on error
145 static int lttng_kconsumer_snapshot_channel(
146 struct lttng_consumer_channel
*channel
,
147 uint64_t key
, char *path
, uint64_t relayd_id
,
148 uint64_t nb_packets_per_stream
,
149 struct lttng_consumer_local_data
*ctx
)
152 struct lttng_consumer_stream
*stream
;
154 DBG("Kernel consumer snapshot channel %" PRIu64
, key
);
158 /* Splice is not supported yet for channel snapshot. */
159 if (channel
->output
!= CONSUMER_CHANNEL_MMAP
) {
160 ERR("Unsupported output type for channel \"%s\": mmap output is required to record a snapshot",
166 cds_list_for_each_entry(stream
, &channel
->streams
.head
, send_node
) {
167 unsigned long consumed_pos
, produced_pos
;
169 health_code_update();
172 * Lock stream because we are about to change its state.
174 pthread_mutex_lock(&stream
->lock
);
176 assert(channel
->trace_chunk
);
177 if (!lttng_trace_chunk_get(channel
->trace_chunk
)) {
179 * Can't happen barring an internal error as the channel
180 * holds a reference to the trace chunk.
182 ERR("Failed to acquire reference to channel's trace chunk");
186 assert(!stream
->trace_chunk
);
187 stream
->trace_chunk
= channel
->trace_chunk
;
190 * Assign the received relayd ID so we can use it for streaming. The streams
191 * are not visible to anyone so this is OK to change it.
193 stream
->net_seq_idx
= relayd_id
;
194 channel
->relayd_id
= relayd_id
;
195 if (relayd_id
!= (uint64_t) -1ULL) {
196 ret
= consumer_send_relayd_stream(stream
, path
);
198 ERR("sending stream to relayd");
202 ret
= consumer_stream_create_output_files(stream
,
207 DBG("Kernel consumer snapshot stream (%" PRIu64
")",
211 ret
= kernctl_buffer_flush_empty(stream
->wait_fd
);
214 * Doing a buffer flush which does not take into
215 * account empty packets. This is not perfect
216 * for stream intersection, but required as a
217 * fall-back when "flush_empty" is not
218 * implemented by lttng-modules.
220 ret
= kernctl_buffer_flush(stream
->wait_fd
);
222 ERR("Failed to flush kernel stream");
228 ret
= lttng_kconsumer_take_snapshot(stream
);
230 ERR("Taking kernel snapshot");
234 ret
= lttng_kconsumer_get_produced_snapshot(stream
, &produced_pos
);
236 ERR("Produced kernel snapshot position");
240 ret
= lttng_kconsumer_get_consumed_snapshot(stream
, &consumed_pos
);
242 ERR("Consumerd kernel snapshot position");
246 consumed_pos
= consumer_get_consume_start_pos(consumed_pos
,
247 produced_pos
, nb_packets_per_stream
,
248 stream
->max_sb_size
);
250 while ((long) (consumed_pos
- produced_pos
) < 0) {
252 unsigned long len
, padded_len
;
253 const char *subbuf_addr
;
254 struct lttng_buffer_view subbuf_view
;
256 health_code_update();
257 DBG("Kernel consumer taking snapshot at pos %lu", consumed_pos
);
259 ret
= kernctl_get_subbuf(stream
->wait_fd
, &consumed_pos
);
261 if (ret
!= -EAGAIN
) {
262 PERROR("kernctl_get_subbuf snapshot");
265 DBG("Kernel consumer get subbuf failed. Skipping it.");
266 consumed_pos
+= stream
->max_sb_size
;
267 stream
->chan
->lost_packets
++;
271 ret
= kernctl_get_subbuf_size(stream
->wait_fd
, &len
);
273 ERR("Snapshot kernctl_get_subbuf_size");
274 goto error_put_subbuf
;
277 ret
= kernctl_get_padded_subbuf_size(stream
->wait_fd
, &padded_len
);
279 ERR("Snapshot kernctl_get_padded_subbuf_size");
280 goto error_put_subbuf
;
283 ret
= get_current_subbuf_addr(stream
, &subbuf_addr
);
285 goto error_put_subbuf
;
288 subbuf_view
= lttng_buffer_view_init(
289 subbuf_addr
, 0, padded_len
);
290 read_len
= lttng_consumer_on_read_subbuffer_mmap(ctx
,
291 stream
, &subbuf_view
,
294 * We write the padded len in local tracefiles but the data len
295 * when using a relay. Display the error but continue processing
296 * to try to release the subbuffer.
298 if (relayd_id
!= (uint64_t) -1ULL) {
299 if (read_len
!= len
) {
300 ERR("Error sending to the relay (ret: %zd != len: %lu)",
304 if (read_len
!= padded_len
) {
305 ERR("Error writing to tracefile (ret: %zd != len: %lu)",
306 read_len
, padded_len
);
310 ret
= kernctl_put_subbuf(stream
->wait_fd
);
312 ERR("Snapshot kernctl_put_subbuf");
315 consumed_pos
+= stream
->max_sb_size
;
318 if (relayd_id
== (uint64_t) -1ULL) {
319 if (stream
->out_fd
>= 0) {
320 ret
= close(stream
->out_fd
);
322 PERROR("Kernel consumer snapshot close out_fd");
328 close_relayd_stream(stream
);
329 stream
->net_seq_idx
= (uint64_t) -1ULL;
331 lttng_trace_chunk_put(stream
->trace_chunk
);
332 stream
->trace_chunk
= NULL
;
333 pthread_mutex_unlock(&stream
->lock
);
341 ret
= kernctl_put_subbuf(stream
->wait_fd
);
343 ERR("Snapshot kernctl_put_subbuf error path");
346 pthread_mutex_unlock(&stream
->lock
);
353 * Read the whole metadata available for a snapshot.
354 * RCU read-side lock must be held across this function to ensure existence of
355 * metadata_channel. The channel lock must be held by the caller.
357 * Returns 0 on success, < 0 on error
359 static int lttng_kconsumer_snapshot_metadata(
360 struct lttng_consumer_channel
*metadata_channel
,
361 uint64_t key
, char *path
, uint64_t relayd_id
,
362 struct lttng_consumer_local_data
*ctx
)
364 int ret
, use_relayd
= 0;
366 struct lttng_consumer_stream
*metadata_stream
;
370 DBG("Kernel consumer snapshot metadata with key %" PRIu64
" at path %s",
375 metadata_stream
= metadata_channel
->metadata_stream
;
376 assert(metadata_stream
);
378 pthread_mutex_lock(&metadata_stream
->lock
);
379 assert(metadata_channel
->trace_chunk
);
380 assert(metadata_stream
->trace_chunk
);
382 /* Flag once that we have a valid relayd for the stream. */
383 if (relayd_id
!= (uint64_t) -1ULL) {
388 ret
= consumer_send_relayd_stream(metadata_stream
, path
);
393 ret
= consumer_stream_create_output_files(metadata_stream
,
401 health_code_update();
403 ret_read
= lttng_consumer_read_subbuffer(metadata_stream
, ctx
, true);
405 if (ret_read
!= -EAGAIN
) {
406 ERR("Kernel snapshot reading metadata subbuffer (ret: %zd)",
411 /* ret_read is negative at this point so we will exit the loop. */
414 } while (ret_read
>= 0);
417 close_relayd_stream(metadata_stream
);
418 metadata_stream
->net_seq_idx
= (uint64_t) -1ULL;
420 if (metadata_stream
->out_fd
>= 0) {
421 ret
= close(metadata_stream
->out_fd
);
423 PERROR("Kernel consumer snapshot metadata close out_fd");
425 * Don't go on error here since the snapshot was successful at this
426 * point but somehow the close failed.
429 metadata_stream
->out_fd
= -1;
430 lttng_trace_chunk_put(metadata_stream
->trace_chunk
);
431 metadata_stream
->trace_chunk
= NULL
;
437 pthread_mutex_unlock(&metadata_stream
->lock
);
438 cds_list_del(&metadata_stream
->send_node
);
439 consumer_stream_destroy(metadata_stream
, NULL
);
440 metadata_channel
->metadata_stream
= NULL
;
446 * Receive command from session daemon and process it.
448 * Return 1 on success else a negative value or 0.
450 int lttng_kconsumer_recv_cmd(struct lttng_consumer_local_data
*ctx
,
451 int sock
, struct pollfd
*consumer_sockpoll
)
454 enum lttcomm_return_code ret_code
= LTTCOMM_CONSUMERD_SUCCESS
;
455 struct lttcomm_consumer_msg msg
;
457 health_code_update();
459 ret
= lttcomm_recv_unix_sock(sock
, &msg
, sizeof(msg
));
460 if (ret
!= sizeof(msg
)) {
462 lttng_consumer_send_error(ctx
, LTTCOMM_CONSUMERD_ERROR_RECV_CMD
);
468 health_code_update();
470 /* Deprecated command */
471 assert(msg
.cmd_type
!= LTTNG_CONSUMER_STOP
);
473 health_code_update();
475 /* relayd needs RCU read-side protection */
478 switch (msg
.cmd_type
) {
479 case LTTNG_CONSUMER_ADD_RELAYD_SOCKET
:
481 /* Session daemon status message are handled in the following call. */
482 consumer_add_relayd_socket(msg
.u
.relayd_sock
.net_index
,
483 msg
.u
.relayd_sock
.type
, ctx
, sock
, consumer_sockpoll
,
484 &msg
.u
.relayd_sock
.sock
, msg
.u
.relayd_sock
.session_id
,
485 msg
.u
.relayd_sock
.relayd_session_id
);
488 case LTTNG_CONSUMER_ADD_CHANNEL
:
490 struct lttng_consumer_channel
*new_channel
;
492 const uint64_t chunk_id
= msg
.u
.channel
.chunk_id
.value
;
494 health_code_update();
496 /* First send a status message before receiving the fds. */
497 ret
= consumer_send_status_msg(sock
, ret_code
);
499 /* Somehow, the session daemon is not responding anymore. */
503 health_code_update();
505 DBG("consumer_add_channel %" PRIu64
, msg
.u
.channel
.channel_key
);
506 new_channel
= consumer_allocate_channel(msg
.u
.channel
.channel_key
,
507 msg
.u
.channel
.session_id
,
508 msg
.u
.channel
.chunk_id
.is_set
?
510 msg
.u
.channel
.pathname
,
512 msg
.u
.channel
.relayd_id
, msg
.u
.channel
.output
,
513 msg
.u
.channel
.tracefile_size
,
514 msg
.u
.channel
.tracefile_count
, 0,
515 msg
.u
.channel
.monitor
,
516 msg
.u
.channel
.live_timer_interval
,
517 msg
.u
.channel
.is_live
,
519 if (new_channel
== NULL
) {
520 lttng_consumer_send_error(ctx
, LTTCOMM_CONSUMERD_OUTFD_ERROR
);
523 new_channel
->nb_init_stream_left
= msg
.u
.channel
.nb_init_streams
;
524 switch (msg
.u
.channel
.output
) {
525 case LTTNG_EVENT_SPLICE
:
526 new_channel
->output
= CONSUMER_CHANNEL_SPLICE
;
528 case LTTNG_EVENT_MMAP
:
529 new_channel
->output
= CONSUMER_CHANNEL_MMAP
;
532 ERR("Channel output unknown %d", msg
.u
.channel
.output
);
536 /* Translate and save channel type. */
537 switch (msg
.u
.channel
.type
) {
538 case CONSUMER_CHANNEL_TYPE_DATA
:
539 case CONSUMER_CHANNEL_TYPE_METADATA
:
540 new_channel
->type
= msg
.u
.channel
.type
;
547 health_code_update();
549 if (ctx
->on_recv_channel
!= NULL
) {
550 ret_recv
= ctx
->on_recv_channel(new_channel
);
552 ret
= consumer_add_channel(new_channel
, ctx
);
553 } else if (ret_recv
< 0) {
557 ret
= consumer_add_channel(new_channel
, ctx
);
559 if (msg
.u
.channel
.type
== CONSUMER_CHANNEL_TYPE_DATA
&& !ret
) {
560 int monitor_start_ret
;
562 DBG("Consumer starting monitor timer");
563 consumer_timer_live_start(new_channel
,
564 msg
.u
.channel
.live_timer_interval
);
565 monitor_start_ret
= consumer_timer_monitor_start(
567 msg
.u
.channel
.monitor_timer_interval
);
568 if (monitor_start_ret
< 0) {
569 ERR("Starting channel monitoring timer failed");
575 health_code_update();
577 /* If we received an error in add_channel, we need to report it. */
579 ret
= consumer_send_status_msg(sock
, ret
);
588 case LTTNG_CONSUMER_ADD_STREAM
:
591 struct lttng_pipe
*stream_pipe
;
592 struct lttng_consumer_stream
*new_stream
;
593 struct lttng_consumer_channel
*channel
;
597 * Get stream's channel reference. Needed when adding the stream to the
600 channel
= consumer_find_channel(msg
.u
.stream
.channel_key
);
603 * We could not find the channel. Can happen if cpu hotplug
604 * happens while tearing down.
606 ERR("Unable to find channel key %" PRIu64
, msg
.u
.stream
.channel_key
);
607 ret_code
= LTTCOMM_CONSUMERD_CHAN_NOT_FOUND
;
610 health_code_update();
612 /* First send a status message before receiving the fds. */
613 ret
= consumer_send_status_msg(sock
, ret_code
);
615 /* Somehow, the session daemon is not responding anymore. */
616 goto error_add_stream_fatal
;
619 health_code_update();
621 if (ret_code
!= LTTCOMM_CONSUMERD_SUCCESS
) {
622 /* Channel was not found. */
623 goto error_add_stream_nosignal
;
628 ret
= lttng_consumer_poll_socket(consumer_sockpoll
);
631 goto error_add_stream_fatal
;
634 health_code_update();
636 /* Get stream file descriptor from socket */
637 ret
= lttcomm_recv_fds_unix_sock(sock
, &fd
, 1);
638 if (ret
!= sizeof(fd
)) {
639 lttng_consumer_send_error(ctx
, LTTCOMM_CONSUMERD_ERROR_RECV_FD
);
643 health_code_update();
646 * Send status code to session daemon only if the recv works. If the
647 * above recv() failed, the session daemon is notified through the
648 * error socket and the teardown is eventually done.
650 ret
= consumer_send_status_msg(sock
, ret_code
);
652 /* Somehow, the session daemon is not responding anymore. */
653 goto error_add_stream_nosignal
;
656 health_code_update();
658 pthread_mutex_lock(&channel
->lock
);
659 new_stream
= consumer_stream_create(
666 channel
->trace_chunk
,
671 if (new_stream
== NULL
) {
676 lttng_consumer_send_error(ctx
, LTTCOMM_CONSUMERD_OUTFD_ERROR
);
679 pthread_mutex_unlock(&channel
->lock
);
680 goto error_add_stream_nosignal
;
683 new_stream
->wait_fd
= fd
;
684 ret
= kernctl_get_max_subbuf_size(new_stream
->wait_fd
,
685 &new_stream
->max_sb_size
);
687 pthread_mutex_unlock(&channel
->lock
);
688 ERR("Failed to get kernel maximal subbuffer size");
689 goto error_add_stream_nosignal
;
692 consumer_stream_update_channel_attributes(new_stream
,
696 * We've just assigned the channel to the stream so increment the
697 * refcount right now. We don't need to increment the refcount for
698 * streams in no monitor because we handle manually the cleanup of
699 * those. It is very important to make sure there is NO prior
700 * consumer_del_stream() calls or else the refcount will be unbalanced.
702 if (channel
->monitor
) {
703 uatomic_inc(&new_stream
->chan
->refcount
);
707 * The buffer flush is done on the session daemon side for the kernel
708 * so no need for the stream "hangup_flush_done" variable to be
709 * tracked. This is important for a kernel stream since we don't rely
710 * on the flush state of the stream to read data. It's not the case for
711 * user space tracing.
713 new_stream
->hangup_flush_done
= 0;
715 health_code_update();
717 pthread_mutex_lock(&new_stream
->lock
);
718 if (ctx
->on_recv_stream
) {
719 ret
= ctx
->on_recv_stream(new_stream
);
721 pthread_mutex_unlock(&new_stream
->lock
);
722 pthread_mutex_unlock(&channel
->lock
);
723 consumer_stream_free(new_stream
);
724 goto error_add_stream_nosignal
;
727 health_code_update();
729 if (new_stream
->metadata_flag
) {
730 channel
->metadata_stream
= new_stream
;
733 /* Do not monitor this stream. */
734 if (!channel
->monitor
) {
735 DBG("Kernel consumer add stream %s in no monitor mode with "
736 "relayd id %" PRIu64
, new_stream
->name
,
737 new_stream
->net_seq_idx
);
738 cds_list_add(&new_stream
->send_node
, &channel
->streams
.head
);
739 pthread_mutex_unlock(&new_stream
->lock
);
740 pthread_mutex_unlock(&channel
->lock
);
744 /* Send stream to relayd if the stream has an ID. */
745 if (new_stream
->net_seq_idx
!= (uint64_t) -1ULL) {
746 ret
= consumer_send_relayd_stream(new_stream
,
747 new_stream
->chan
->pathname
);
749 pthread_mutex_unlock(&new_stream
->lock
);
750 pthread_mutex_unlock(&channel
->lock
);
751 consumer_stream_free(new_stream
);
752 goto error_add_stream_nosignal
;
756 * If adding an extra stream to an already
757 * existing channel (e.g. cpu hotplug), we need
758 * to send the "streams_sent" command to relayd.
760 if (channel
->streams_sent_to_relayd
) {
761 ret
= consumer_send_relayd_streams_sent(
762 new_stream
->net_seq_idx
);
764 pthread_mutex_unlock(&new_stream
->lock
);
765 pthread_mutex_unlock(&channel
->lock
);
766 goto error_add_stream_nosignal
;
770 pthread_mutex_unlock(&new_stream
->lock
);
771 pthread_mutex_unlock(&channel
->lock
);
773 /* Get the right pipe where the stream will be sent. */
774 if (new_stream
->metadata_flag
) {
775 consumer_add_metadata_stream(new_stream
);
776 stream_pipe
= ctx
->consumer_metadata_pipe
;
778 consumer_add_data_stream(new_stream
);
779 stream_pipe
= ctx
->consumer_data_pipe
;
782 /* Visible to other threads */
783 new_stream
->globally_visible
= 1;
785 health_code_update();
787 ret
= lttng_pipe_write(stream_pipe
, &new_stream
, sizeof(new_stream
));
789 ERR("Consumer write %s stream to pipe %d",
790 new_stream
->metadata_flag
? "metadata" : "data",
791 lttng_pipe_get_writefd(stream_pipe
));
792 if (new_stream
->metadata_flag
) {
793 consumer_del_stream_for_metadata(new_stream
);
795 consumer_del_stream_for_data(new_stream
);
797 goto error_add_stream_nosignal
;
800 DBG("Kernel consumer ADD_STREAM %s (fd: %d) %s with relayd id %" PRIu64
,
801 new_stream
->name
, fd
, new_stream
->chan
->pathname
, new_stream
->relayd_stream_id
);
804 error_add_stream_nosignal
:
806 error_add_stream_fatal
:
809 case LTTNG_CONSUMER_STREAMS_SENT
:
811 struct lttng_consumer_channel
*channel
;
814 * Get stream's channel reference. Needed when adding the stream to the
817 channel
= consumer_find_channel(msg
.u
.sent_streams
.channel_key
);
820 * We could not find the channel. Can happen if cpu hotplug
821 * happens while tearing down.
823 ERR("Unable to find channel key %" PRIu64
,
824 msg
.u
.sent_streams
.channel_key
);
825 ret_code
= LTTCOMM_CONSUMERD_CHAN_NOT_FOUND
;
828 health_code_update();
831 * Send status code to session daemon.
833 ret
= consumer_send_status_msg(sock
, ret_code
);
834 if (ret
< 0 || ret_code
!= LTTCOMM_CONSUMERD_SUCCESS
) {
835 /* Somehow, the session daemon is not responding anymore. */
836 goto error_streams_sent_nosignal
;
839 health_code_update();
842 * We should not send this message if we don't monitor the
843 * streams in this channel.
845 if (!channel
->monitor
) {
846 goto end_error_streams_sent
;
849 health_code_update();
850 /* Send stream to relayd if the stream has an ID. */
851 if (msg
.u
.sent_streams
.net_seq_idx
!= (uint64_t) -1ULL) {
852 ret
= consumer_send_relayd_streams_sent(
853 msg
.u
.sent_streams
.net_seq_idx
);
855 goto error_streams_sent_nosignal
;
857 channel
->streams_sent_to_relayd
= true;
859 end_error_streams_sent
:
861 error_streams_sent_nosignal
:
864 case LTTNG_CONSUMER_UPDATE_STREAM
:
869 case LTTNG_CONSUMER_DESTROY_RELAYD
:
871 uint64_t index
= msg
.u
.destroy_relayd
.net_seq_idx
;
872 struct consumer_relayd_sock_pair
*relayd
;
874 DBG("Kernel consumer destroying relayd %" PRIu64
, index
);
876 /* Get relayd reference if exists. */
877 relayd
= consumer_find_relayd(index
);
878 if (relayd
== NULL
) {
879 DBG("Unable to find relayd %" PRIu64
, index
);
880 ret_code
= LTTCOMM_CONSUMERD_RELAYD_FAIL
;
884 * Each relayd socket pair has a refcount of stream attached to it
885 * which tells if the relayd is still active or not depending on the
888 * This will set the destroy flag of the relayd object and destroy it
889 * if the refcount reaches zero when called.
891 * The destroy can happen either here or when a stream fd hangs up.
894 consumer_flag_relayd_for_destroy(relayd
);
897 health_code_update();
899 ret
= consumer_send_status_msg(sock
, ret_code
);
901 /* Somehow, the session daemon is not responding anymore. */
907 case LTTNG_CONSUMER_DATA_PENDING
:
910 uint64_t id
= msg
.u
.data_pending
.session_id
;
912 DBG("Kernel consumer data pending command for id %" PRIu64
, id
);
914 ret
= consumer_data_pending(id
);
916 health_code_update();
918 /* Send back returned value to session daemon */
919 ret
= lttcomm_send_unix_sock(sock
, &ret
, sizeof(ret
));
921 PERROR("send data pending ret code");
926 * No need to send back a status message since the data pending
927 * returned value is the response.
931 case LTTNG_CONSUMER_SNAPSHOT_CHANNEL
:
933 struct lttng_consumer_channel
*channel
;
934 uint64_t key
= msg
.u
.snapshot_channel
.key
;
936 channel
= consumer_find_channel(key
);
938 ERR("Channel %" PRIu64
" not found", key
);
939 ret_code
= LTTCOMM_CONSUMERD_CHAN_NOT_FOUND
;
941 pthread_mutex_lock(&channel
->lock
);
942 if (msg
.u
.snapshot_channel
.metadata
== 1) {
943 ret
= lttng_kconsumer_snapshot_metadata(channel
, key
,
944 msg
.u
.snapshot_channel
.pathname
,
945 msg
.u
.snapshot_channel
.relayd_id
, ctx
);
947 ERR("Snapshot metadata failed");
948 ret_code
= LTTCOMM_CONSUMERD_SNAPSHOT_FAILED
;
951 ret
= lttng_kconsumer_snapshot_channel(channel
, key
,
952 msg
.u
.snapshot_channel
.pathname
,
953 msg
.u
.snapshot_channel
.relayd_id
,
954 msg
.u
.snapshot_channel
.nb_packets_per_stream
,
957 ERR("Snapshot channel failed");
958 ret_code
= LTTCOMM_CONSUMERD_SNAPSHOT_FAILED
;
961 pthread_mutex_unlock(&channel
->lock
);
963 health_code_update();
965 ret
= consumer_send_status_msg(sock
, ret_code
);
967 /* Somehow, the session daemon is not responding anymore. */
972 case LTTNG_CONSUMER_DESTROY_CHANNEL
:
974 uint64_t key
= msg
.u
.destroy_channel
.key
;
975 struct lttng_consumer_channel
*channel
;
977 channel
= consumer_find_channel(key
);
979 ERR("Kernel consumer destroy channel %" PRIu64
" not found", key
);
980 ret_code
= LTTCOMM_CONSUMERD_CHAN_NOT_FOUND
;
983 health_code_update();
985 ret
= consumer_send_status_msg(sock
, ret_code
);
987 /* Somehow, the session daemon is not responding anymore. */
988 goto end_destroy_channel
;
991 health_code_update();
993 /* Stop right now if no channel was found. */
995 goto end_destroy_channel
;
999 * This command should ONLY be issued for channel with streams set in
1002 assert(!channel
->monitor
);
1005 * The refcount should ALWAYS be 0 in the case of a channel in no
1008 assert(!uatomic_sub_return(&channel
->refcount
, 1));
1010 consumer_del_channel(channel
);
1011 end_destroy_channel
:
1014 case LTTNG_CONSUMER_DISCARDED_EVENTS
:
1018 struct lttng_consumer_channel
*channel
;
1019 uint64_t id
= msg
.u
.discarded_events
.session_id
;
1020 uint64_t key
= msg
.u
.discarded_events
.channel_key
;
1022 DBG("Kernel consumer discarded events command for session id %"
1023 PRIu64
", channel key %" PRIu64
, id
, key
);
1025 channel
= consumer_find_channel(key
);
1027 ERR("Kernel consumer discarded events channel %"
1028 PRIu64
" not found", key
);
1031 count
= channel
->discarded_events
;
1034 health_code_update();
1036 /* Send back returned value to session daemon */
1037 ret
= lttcomm_send_unix_sock(sock
, &count
, sizeof(count
));
1039 PERROR("send discarded events");
1045 case LTTNG_CONSUMER_LOST_PACKETS
:
1049 struct lttng_consumer_channel
*channel
;
1050 uint64_t id
= msg
.u
.lost_packets
.session_id
;
1051 uint64_t key
= msg
.u
.lost_packets
.channel_key
;
1053 DBG("Kernel consumer lost packets command for session id %"
1054 PRIu64
", channel key %" PRIu64
, id
, key
);
1056 channel
= consumer_find_channel(key
);
1058 ERR("Kernel consumer lost packets channel %"
1059 PRIu64
" not found", key
);
1062 count
= channel
->lost_packets
;
1065 health_code_update();
1067 /* Send back returned value to session daemon */
1068 ret
= lttcomm_send_unix_sock(sock
, &count
, sizeof(count
));
1070 PERROR("send lost packets");
1076 case LTTNG_CONSUMER_SET_CHANNEL_MONITOR_PIPE
:
1078 int channel_monitor_pipe
;
1080 ret_code
= LTTCOMM_CONSUMERD_SUCCESS
;
1081 /* Successfully received the command's type. */
1082 ret
= consumer_send_status_msg(sock
, ret_code
);
1087 ret
= lttcomm_recv_fds_unix_sock(sock
, &channel_monitor_pipe
,
1089 if (ret
!= sizeof(channel_monitor_pipe
)) {
1090 ERR("Failed to receive channel monitor pipe");
1094 DBG("Received channel monitor pipe (%d)", channel_monitor_pipe
);
1095 ret
= consumer_timer_thread_set_channel_monitor_pipe(
1096 channel_monitor_pipe
);
1100 ret_code
= LTTCOMM_CONSUMERD_SUCCESS
;
1101 /* Set the pipe as non-blocking. */
1102 ret
= fcntl(channel_monitor_pipe
, F_GETFL
, 0);
1104 PERROR("fcntl get flags of the channel monitoring pipe");
1109 ret
= fcntl(channel_monitor_pipe
, F_SETFL
,
1110 flags
| O_NONBLOCK
);
1112 PERROR("fcntl set O_NONBLOCK flag of the channel monitoring pipe");
1115 DBG("Channel monitor pipe set as non-blocking");
1117 ret_code
= LTTCOMM_CONSUMERD_ALREADY_SET
;
1119 ret
= consumer_send_status_msg(sock
, ret_code
);
1125 case LTTNG_CONSUMER_ROTATE_CHANNEL
:
1127 struct lttng_consumer_channel
*channel
;
1128 uint64_t key
= msg
.u
.rotate_channel
.key
;
1130 DBG("Consumer rotate channel %" PRIu64
, key
);
1132 channel
= consumer_find_channel(key
);
1134 ERR("Channel %" PRIu64
" not found", key
);
1135 ret_code
= LTTCOMM_CONSUMERD_CHAN_NOT_FOUND
;
1138 * Sample the rotate position of all the streams in this channel.
1140 ret
= lttng_consumer_rotate_channel(channel
, key
,
1141 msg
.u
.rotate_channel
.relayd_id
,
1142 msg
.u
.rotate_channel
.metadata
,
1145 ERR("Rotate channel failed");
1146 ret_code
= LTTCOMM_CONSUMERD_ROTATION_FAIL
;
1149 health_code_update();
1151 ret
= consumer_send_status_msg(sock
, ret_code
);
1153 /* Somehow, the session daemon is not responding anymore. */
1154 goto error_rotate_channel
;
1157 /* Rotate the streams that are ready right now. */
1158 ret
= lttng_consumer_rotate_ready_streams(
1161 ERR("Rotate ready streams failed");
1165 error_rotate_channel
:
1168 case LTTNG_CONSUMER_CLEAR_CHANNEL
:
1170 struct lttng_consumer_channel
*channel
;
1171 uint64_t key
= msg
.u
.clear_channel
.key
;
1173 channel
= consumer_find_channel(key
);
1175 DBG("Channel %" PRIu64
" not found", key
);
1176 ret_code
= LTTCOMM_CONSUMERD_CHAN_NOT_FOUND
;
1178 ret
= lttng_consumer_clear_channel(channel
);
1180 ERR("Clear channel failed");
1184 health_code_update();
1186 ret
= consumer_send_status_msg(sock
, ret_code
);
1188 /* Somehow, the session daemon is not responding anymore. */
1194 case LTTNG_CONSUMER_INIT
:
1196 ret_code
= lttng_consumer_init_command(ctx
,
1197 msg
.u
.init
.sessiond_uuid
);
1198 health_code_update();
1199 ret
= consumer_send_status_msg(sock
, ret_code
);
1201 /* Somehow, the session daemon is not responding anymore. */
1206 case LTTNG_CONSUMER_CREATE_TRACE_CHUNK
:
1208 const struct lttng_credentials credentials
= {
1209 .uid
= msg
.u
.create_trace_chunk
.credentials
.value
.uid
,
1210 .gid
= msg
.u
.create_trace_chunk
.credentials
.value
.gid
,
1212 const bool is_local_trace
=
1213 !msg
.u
.create_trace_chunk
.relayd_id
.is_set
;
1214 const uint64_t relayd_id
=
1215 msg
.u
.create_trace_chunk
.relayd_id
.value
;
1216 const char *chunk_override_name
=
1217 *msg
.u
.create_trace_chunk
.override_name
?
1218 msg
.u
.create_trace_chunk
.override_name
:
1220 struct lttng_directory_handle
*chunk_directory_handle
= NULL
;
1223 * The session daemon will only provide a chunk directory file
1224 * descriptor for local traces.
1226 if (is_local_trace
) {
1229 /* Acnowledge the reception of the command. */
1230 ret
= consumer_send_status_msg(sock
,
1231 LTTCOMM_CONSUMERD_SUCCESS
);
1233 /* Somehow, the session daemon is not responding anymore. */
1237 ret
= lttcomm_recv_fds_unix_sock(sock
, &chunk_dirfd
, 1);
1238 if (ret
!= sizeof(chunk_dirfd
)) {
1239 ERR("Failed to receive trace chunk directory file descriptor");
1243 DBG("Received trace chunk directory fd (%d)",
1245 chunk_directory_handle
= lttng_directory_handle_create_from_dirfd(
1247 if (!chunk_directory_handle
) {
1248 ERR("Failed to initialize chunk directory handle from directory file descriptor");
1249 if (close(chunk_dirfd
)) {
1250 PERROR("Failed to close chunk directory file descriptor");
1256 ret_code
= lttng_consumer_create_trace_chunk(
1257 !is_local_trace
? &relayd_id
: NULL
,
1258 msg
.u
.create_trace_chunk
.session_id
,
1259 msg
.u
.create_trace_chunk
.chunk_id
,
1260 (time_t) msg
.u
.create_trace_chunk
1261 .creation_timestamp
,
1262 chunk_override_name
,
1263 msg
.u
.create_trace_chunk
.credentials
.is_set
?
1266 chunk_directory_handle
);
1267 lttng_directory_handle_put(chunk_directory_handle
);
1268 goto end_msg_sessiond
;
1270 case LTTNG_CONSUMER_CLOSE_TRACE_CHUNK
:
1272 enum lttng_trace_chunk_command_type close_command
=
1273 msg
.u
.close_trace_chunk
.close_command
.value
;
1274 const uint64_t relayd_id
=
1275 msg
.u
.close_trace_chunk
.relayd_id
.value
;
1276 struct lttcomm_consumer_close_trace_chunk_reply reply
;
1277 char path
[LTTNG_PATH_MAX
];
1279 ret_code
= lttng_consumer_close_trace_chunk(
1280 msg
.u
.close_trace_chunk
.relayd_id
.is_set
?
1283 msg
.u
.close_trace_chunk
.session_id
,
1284 msg
.u
.close_trace_chunk
.chunk_id
,
1285 (time_t) msg
.u
.close_trace_chunk
.close_timestamp
,
1286 msg
.u
.close_trace_chunk
.close_command
.is_set
?
1289 reply
.ret_code
= ret_code
;
1290 reply
.path_length
= strlen(path
) + 1;
1291 ret
= lttcomm_send_unix_sock(sock
, &reply
, sizeof(reply
));
1292 if (ret
!= sizeof(reply
)) {
1295 ret
= lttcomm_send_unix_sock(sock
, path
, reply
.path_length
);
1296 if (ret
!= reply
.path_length
) {
1301 case LTTNG_CONSUMER_TRACE_CHUNK_EXISTS
:
1303 const uint64_t relayd_id
=
1304 msg
.u
.trace_chunk_exists
.relayd_id
.value
;
1306 ret_code
= lttng_consumer_trace_chunk_exists(
1307 msg
.u
.trace_chunk_exists
.relayd_id
.is_set
?
1309 msg
.u
.trace_chunk_exists
.session_id
,
1310 msg
.u
.trace_chunk_exists
.chunk_id
);
1311 goto end_msg_sessiond
;
1319 * Return 1 to indicate success since the 0 value can be a socket
1320 * shutdown during the recv() or send() call.
1325 /* This will issue a consumer stop. */
1330 * The returned value here is not useful since either way we'll return 1 to
1331 * the caller because the session daemon socket management is done
1332 * elsewhere. Returning a negative code or 0 will shutdown the consumer.
1334 ret
= consumer_send_status_msg(sock
, ret_code
);
1340 health_code_update();
1346 * Sync metadata meaning request them to the session daemon and snapshot to the
1347 * metadata thread can consumer them.
1349 * Metadata stream lock MUST be acquired.
1351 * Return 0 if new metadatda is available, EAGAIN if the metadata stream
1352 * is empty or a negative value on error.
1354 int lttng_kconsumer_sync_metadata(struct lttng_consumer_stream
*metadata
)
1360 ret
= kernctl_buffer_flush(metadata
->wait_fd
);
1362 ERR("Failed to flush kernel stream");
1366 ret
= kernctl_snapshot(metadata
->wait_fd
);
1368 if (ret
!= -EAGAIN
) {
1369 ERR("Sync metadata, taking kernel snapshot failed.");
1372 DBG("Sync metadata, no new kernel metadata");
1373 /* No new metadata, exit. */
1383 int extract_common_subbuffer_info(struct lttng_consumer_stream
*stream
,
1384 struct stream_subbuffer
*subbuf
)
1388 ret
= kernctl_get_subbuf_size(
1389 stream
->wait_fd
, &subbuf
->info
.data
.subbuf_size
);
1394 ret
= kernctl_get_padded_subbuf_size(
1395 stream
->wait_fd
, &subbuf
->info
.data
.padded_subbuf_size
);
1405 int extract_metadata_subbuffer_info(struct lttng_consumer_stream
*stream
,
1406 struct stream_subbuffer
*subbuf
)
1410 ret
= extract_common_subbuffer_info(stream
, subbuf
);
1415 ret
= kernctl_get_metadata_version(
1416 stream
->wait_fd
, &subbuf
->info
.metadata
.version
);
1426 int extract_data_subbuffer_info(struct lttng_consumer_stream
*stream
,
1427 struct stream_subbuffer
*subbuf
)
1431 ret
= extract_common_subbuffer_info(stream
, subbuf
);
1436 ret
= kernctl_get_packet_size(
1437 stream
->wait_fd
, &subbuf
->info
.data
.packet_size
);
1439 PERROR("Failed to get sub-buffer packet size");
1443 ret
= kernctl_get_content_size(
1444 stream
->wait_fd
, &subbuf
->info
.data
.content_size
);
1446 PERROR("Failed to get sub-buffer content size");
1450 ret
= kernctl_get_timestamp_begin(
1451 stream
->wait_fd
, &subbuf
->info
.data
.timestamp_begin
);
1453 PERROR("Failed to get sub-buffer begin timestamp");
1457 ret
= kernctl_get_timestamp_end(
1458 stream
->wait_fd
, &subbuf
->info
.data
.timestamp_end
);
1460 PERROR("Failed to get sub-buffer end timestamp");
1464 ret
= kernctl_get_events_discarded(
1465 stream
->wait_fd
, &subbuf
->info
.data
.events_discarded
);
1467 PERROR("Failed to get sub-buffer events discarded count");
1471 ret
= kernctl_get_sequence_number(stream
->wait_fd
,
1472 &subbuf
->info
.data
.sequence_number
.value
);
1474 /* May not be supported by older LTTng-modules. */
1475 if (ret
!= -ENOTTY
) {
1476 PERROR("Failed to get sub-buffer sequence number");
1480 subbuf
->info
.data
.sequence_number
.is_set
= true;
1483 ret
= kernctl_get_stream_id(
1484 stream
->wait_fd
, &subbuf
->info
.data
.stream_id
);
1486 PERROR("Failed to get stream id");
1490 ret
= kernctl_get_instance_id(stream
->wait_fd
,
1491 &subbuf
->info
.data
.stream_instance_id
.value
);
1493 /* May not be supported by older LTTng-modules. */
1494 if (ret
!= -ENOTTY
) {
1495 PERROR("Failed to get stream instance id");
1499 subbuf
->info
.data
.stream_instance_id
.is_set
= true;
1506 int get_subbuffer_common(struct lttng_consumer_stream
*stream
,
1507 struct stream_subbuffer
*subbuffer
)
1511 ret
= kernctl_get_next_subbuf(stream
->wait_fd
);
1516 ret
= stream
->read_subbuffer_ops
.extract_subbuffer_info(
1523 int get_next_subbuffer_splice(struct lttng_consumer_stream
*stream
,
1524 struct stream_subbuffer
*subbuffer
)
1528 ret
= get_subbuffer_common(stream
, subbuffer
);
1533 subbuffer
->buffer
.fd
= stream
->wait_fd
;
1539 int get_next_subbuffer_mmap(struct lttng_consumer_stream
*stream
,
1540 struct stream_subbuffer
*subbuffer
)
1545 ret
= get_subbuffer_common(stream
, subbuffer
);
1550 ret
= get_current_subbuf_addr(stream
, &addr
);
1555 subbuffer
->buffer
.buffer
= lttng_buffer_view_init(
1556 addr
, 0, subbuffer
->info
.data
.padded_subbuf_size
);
1562 int put_next_subbuffer(struct lttng_consumer_stream
*stream
,
1563 struct stream_subbuffer
*subbuffer
)
1565 const int ret
= kernctl_put_next_subbuf(stream
->wait_fd
);
1568 if (ret
== -EFAULT
) {
1569 PERROR("Error in unreserving sub buffer");
1570 } else if (ret
== -EIO
) {
1571 /* Should never happen with newer LTTng versions */
1572 PERROR("Reader has been pushed by the writer, last sub-buffer corrupted");
1579 static void lttng_kconsumer_set_stream_ops(
1580 struct lttng_consumer_stream
*stream
)
1582 if (stream
->chan
->output
== CONSUMER_CHANNEL_MMAP
) {
1583 stream
->read_subbuffer_ops
.get_next_subbuffer
=
1584 get_next_subbuffer_mmap
;
1586 stream
->read_subbuffer_ops
.get_next_subbuffer
=
1587 get_next_subbuffer_splice
;
1590 if (stream
->metadata_flag
) {
1591 stream
->read_subbuffer_ops
.extract_subbuffer_info
=
1592 extract_metadata_subbuffer_info
;
1594 stream
->read_subbuffer_ops
.extract_subbuffer_info
=
1595 extract_data_subbuffer_info
;
1596 if (stream
->chan
->is_live
) {
1597 stream
->read_subbuffer_ops
.send_live_beacon
=
1598 consumer_flush_kernel_index
;
1602 stream
->read_subbuffer_ops
.put_next_subbuffer
= put_next_subbuffer
;
1605 int lttng_kconsumer_on_recv_stream(struct lttng_consumer_stream
*stream
)
1612 * Don't create anything if this is set for streaming or if there is
1613 * no current trace chunk on the parent channel.
1615 if (stream
->net_seq_idx
== (uint64_t) -1ULL && stream
->chan
->monitor
&&
1616 stream
->chan
->trace_chunk
) {
1617 ret
= consumer_stream_create_output_files(stream
, true);
1623 if (stream
->output
== LTTNG_EVENT_MMAP
) {
1624 /* get the len of the mmap region */
1625 unsigned long mmap_len
;
1627 ret
= kernctl_get_mmap_len(stream
->wait_fd
, &mmap_len
);
1629 PERROR("kernctl_get_mmap_len");
1630 goto error_close_fd
;
1632 stream
->mmap_len
= (size_t) mmap_len
;
1634 stream
->mmap_base
= mmap(NULL
, stream
->mmap_len
, PROT_READ
,
1635 MAP_PRIVATE
, stream
->wait_fd
, 0);
1636 if (stream
->mmap_base
== MAP_FAILED
) {
1637 PERROR("Error mmaping");
1639 goto error_close_fd
;
1643 lttng_kconsumer_set_stream_ops(stream
);
1645 /* we return 0 to let the library handle the FD internally */
1649 if (stream
->out_fd
>= 0) {
1652 err
= close(stream
->out_fd
);
1654 stream
->out_fd
= -1;
1661 * Check if data is still being extracted from the buffers for a specific
1662 * stream. Consumer data lock MUST be acquired before calling this function
1663 * and the stream lock.
1665 * Return 1 if the traced data are still getting read else 0 meaning that the
1666 * data is available for trace viewer reading.
1668 int lttng_kconsumer_data_pending(struct lttng_consumer_stream
*stream
)
1674 if (stream
->endpoint_status
!= CONSUMER_ENDPOINT_ACTIVE
) {
1679 ret
= kernctl_get_next_subbuf(stream
->wait_fd
);
1681 /* There is still data so let's put back this subbuffer. */
1682 ret
= kernctl_put_subbuf(stream
->wait_fd
);
1684 ret
= 1; /* Data is pending */
1688 /* Data is NOT pending and ready to be read. */