Fix: relayd: add LPOLLERR to events
[lttng-tools.git] / src / bin / lttng-relayd / live.c
1 /*
2 * Copyright (C) 2013 - Julien Desfossez <jdesfossez@efficios.com>
3 * David Goulet <dgoulet@efficios.com>
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License, version 2 only,
7 * as published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
12 * more details.
13 *
14 * You should have received a copy of the GNU General Public License along
15 * with this program; if not, write to the Free Software Foundation, Inc.,
16 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
17 */
18
19 #define _GNU_SOURCE
20 #include <getopt.h>
21 #include <grp.h>
22 #include <limits.h>
23 #include <pthread.h>
24 #include <signal.h>
25 #include <stdio.h>
26 #include <stdlib.h>
27 #include <string.h>
28 #include <sys/mman.h>
29 #include <sys/mount.h>
30 #include <sys/resource.h>
31 #include <sys/socket.h>
32 #include <sys/stat.h>
33 #include <sys/types.h>
34 #include <sys/wait.h>
35 #include <inttypes.h>
36 #include <urcu/futex.h>
37 #include <urcu/uatomic.h>
38 #include <unistd.h>
39 #include <fcntl.h>
40 #include <config.h>
41
42 #include <lttng/lttng.h>
43 #include <common/common.h>
44 #include <common/compat/poll.h>
45 #include <common/compat/socket.h>
46 #include <common/defaults.h>
47 #include <common/futex.h>
48 #include <common/sessiond-comm/sessiond-comm.h>
49 #include <common/sessiond-comm/inet.h>
50 #include <common/sessiond-comm/relayd.h>
51 #include <common/uri.h>
52 #include <common/utils.h>
53
54 #include "cmd.h"
55 #include "live.h"
56 #include "lttng-relayd.h"
57 #include "lttng-viewer.h"
58 #include "utils.h"
59 #include "health-relayd.h"
60
61 static struct lttng_uri *live_uri;
62
63 /*
64 * Quit pipe for all threads. This permits a single cancellation point
65 * for all threads when receiving an event on the pipe.
66 */
67 static int live_thread_quit_pipe[2] = { -1, -1 };
68
69 /*
70 * This pipe is used to inform the worker thread that a command is queued and
71 * ready to be processed.
72 */
73 static int live_relay_cmd_pipe[2] = { -1, -1 };
74
75 /* Shared between threads */
76 static int live_dispatch_thread_exit;
77
78 static pthread_t live_listener_thread;
79 static pthread_t live_dispatcher_thread;
80 static pthread_t live_worker_thread;
81
82 /*
83 * Relay command queue.
84 *
85 * The live_thread_listener and live_thread_dispatcher communicate with this
86 * queue.
87 */
88 static struct relay_cmd_queue viewer_cmd_queue;
89
90 static uint64_t last_relay_viewer_session_id;
91
92 /*
93 * Cleanup the daemon
94 */
95 static
96 void cleanup(void)
97 {
98 DBG("Cleaning up");
99
100 free(live_uri);
101 }
102
103 /*
104 * Write to writable pipe used to notify a thread.
105 */
106 static
107 int notify_thread_pipe(int wpipe)
108 {
109 ssize_t ret;
110
111 ret = lttng_write(wpipe, "!", 1);
112 if (ret < 1) {
113 PERROR("write poll pipe");
114 }
115
116 return (int) ret;
117 }
118
119 /*
120 * Stop all threads by closing the thread quit pipe.
121 */
122 static
123 void stop_threads(void)
124 {
125 int ret;
126
127 /* Stopping all threads */
128 DBG("Terminating all live threads");
129 ret = notify_thread_pipe(live_thread_quit_pipe[1]);
130 if (ret < 0) {
131 ERR("write error on thread quit pipe");
132 }
133
134 /* Dispatch thread */
135 CMM_STORE_SHARED(live_dispatch_thread_exit, 1);
136 futex_nto1_wake(&viewer_cmd_queue.futex);
137 }
138
139 /*
140 * Create a poll set with O_CLOEXEC and add the thread quit pipe to the set.
141 */
142 static
143 int create_thread_poll_set(struct lttng_poll_event *events, int size)
144 {
145 int ret;
146
147 if (events == NULL || size == 0) {
148 ret = -1;
149 goto error;
150 }
151
152 ret = lttng_poll_create(events, size, LTTNG_CLOEXEC);
153 if (ret < 0) {
154 goto error;
155 }
156
157 /* Add quit pipe */
158 ret = lttng_poll_add(events, live_thread_quit_pipe[0], LPOLLIN | LPOLLERR);
159 if (ret < 0) {
160 goto error;
161 }
162
163 return 0;
164
165 error:
166 return ret;
167 }
168
169 /*
170 * Check if the thread quit pipe was triggered.
171 *
172 * Return 1 if it was triggered else 0;
173 */
174 static
175 int check_thread_quit_pipe(int fd, uint32_t events)
176 {
177 if (fd == live_thread_quit_pipe[0] && (events & LPOLLIN)) {
178 return 1;
179 }
180
181 return 0;
182 }
183
184 /*
185 * Create and init socket from uri.
186 */
187 static
188 struct lttcomm_sock *init_socket(struct lttng_uri *uri)
189 {
190 int ret;
191 struct lttcomm_sock *sock = NULL;
192
193 sock = lttcomm_alloc_sock_from_uri(uri);
194 if (sock == NULL) {
195 ERR("Allocating socket");
196 goto error;
197 }
198
199 ret = lttcomm_create_sock(sock);
200 if (ret < 0) {
201 goto error;
202 }
203 DBG("Listening on sock %d for live", sock->fd);
204
205 ret = sock->ops->bind(sock);
206 if (ret < 0) {
207 goto error;
208 }
209
210 ret = sock->ops->listen(sock, -1);
211 if (ret < 0) {
212 goto error;
213
214 }
215
216 return sock;
217
218 error:
219 if (sock) {
220 lttcomm_destroy_sock(sock);
221 }
222 return NULL;
223 }
224
225 /*
226 * This thread manages the listening for new connections on the network
227 */
228 static
229 void *thread_listener(void *data)
230 {
231 int i, ret, pollfd, err = -1;
232 int val = 1;
233 uint32_t revents, nb_fd;
234 struct lttng_poll_event events;
235 struct lttcomm_sock *live_control_sock;
236
237 DBG("[thread] Relay live listener started");
238
239 health_register(health_relayd, HEALTH_RELAYD_TYPE_LIVE_LISTENER);
240
241 health_code_update();
242
243 live_control_sock = init_socket(live_uri);
244 if (!live_control_sock) {
245 goto error_sock_control;
246 }
247
248 /*
249 * Pass 3 as size here for the thread quit pipe, control and data socket.
250 */
251 ret = create_thread_poll_set(&events, 2);
252 if (ret < 0) {
253 goto error_create_poll;
254 }
255
256 /* Add the control socket */
257 ret = lttng_poll_add(&events, live_control_sock->fd, LPOLLIN | LPOLLRDHUP);
258 if (ret < 0) {
259 goto error_poll_add;
260 }
261
262 lttng_relay_notify_ready();
263
264 while (1) {
265 health_code_update();
266
267 DBG("Listener accepting live viewers connections");
268
269 restart:
270 health_poll_entry();
271 ret = lttng_poll_wait(&events, -1);
272 health_poll_exit();
273 if (ret < 0) {
274 /*
275 * Restart interrupted system call.
276 */
277 if (errno == EINTR) {
278 goto restart;
279 }
280 goto error;
281 }
282 nb_fd = ret;
283
284 DBG("Relay new viewer connection received");
285 for (i = 0; i < nb_fd; i++) {
286 health_code_update();
287
288 /* Fetch once the poll data */
289 revents = LTTNG_POLL_GETEV(&events, i);
290 pollfd = LTTNG_POLL_GETFD(&events, i);
291
292 /* Thread quit pipe has been closed. Killing thread. */
293 ret = check_thread_quit_pipe(pollfd, revents);
294 if (ret) {
295 err = 0;
296 goto exit;
297 }
298
299 if (revents & (LPOLLERR | LPOLLHUP | LPOLLRDHUP)) {
300 ERR("socket poll error");
301 goto error;
302 } else if (revents & LPOLLIN) {
303 /*
304 * Get allocated in this thread, enqueued to a global queue,
305 * dequeued and freed in the worker thread.
306 */
307 struct relay_command *relay_cmd;
308 struct lttcomm_sock *newsock;
309
310 relay_cmd = zmalloc(sizeof(*relay_cmd));
311 if (!relay_cmd) {
312 PERROR("relay command zmalloc");
313 goto error;
314 }
315
316 assert(pollfd == live_control_sock->fd);
317 newsock = live_control_sock->ops->accept(live_control_sock);
318 if (!newsock) {
319 PERROR("accepting control sock");
320 free(relay_cmd);
321 goto error;
322 }
323 DBG("Relay viewer connection accepted socket %d", newsock->fd);
324 ret = setsockopt(newsock->fd, SOL_SOCKET, SO_REUSEADDR, &val,
325 sizeof(int));
326 if (ret < 0) {
327 PERROR("setsockopt inet");
328 lttcomm_destroy_sock(newsock);
329 free(relay_cmd);
330 goto error;
331 }
332 relay_cmd->sock = newsock;
333
334 /*
335 * Lock free enqueue the request.
336 */
337 cds_wfq_enqueue(&viewer_cmd_queue.queue, &relay_cmd->node);
338
339 /*
340 * Wake the dispatch queue futex. Implicit memory
341 * barrier with the exchange in cds_wfq_enqueue.
342 */
343 futex_nto1_wake(&viewer_cmd_queue.futex);
344 }
345 }
346 }
347
348 exit:
349 error:
350 error_poll_add:
351 lttng_poll_clean(&events);
352 error_create_poll:
353 if (live_control_sock->fd >= 0) {
354 ret = live_control_sock->ops->close(live_control_sock);
355 if (ret) {
356 PERROR("close");
357 }
358 }
359 lttcomm_destroy_sock(live_control_sock);
360 error_sock_control:
361 if (err) {
362 health_error();
363 DBG("Live viewer listener thread exited with error");
364 }
365 health_unregister(health_relayd);
366 DBG("Live viewer listener thread cleanup complete");
367 stop_threads();
368 return NULL;
369 }
370
371 /*
372 * This thread manages the dispatching of the requests to worker threads
373 */
374 static
375 void *thread_dispatcher(void *data)
376 {
377 int err = -1;
378 ssize_t ret;
379 struct cds_wfq_node *node;
380 struct relay_command *relay_cmd = NULL;
381
382 DBG("[thread] Live viewer relay dispatcher started");
383
384 health_register(health_relayd, HEALTH_RELAYD_TYPE_LIVE_DISPATCHER);
385
386 health_code_update();
387
388 while (!CMM_LOAD_SHARED(live_dispatch_thread_exit)) {
389 health_code_update();
390
391 /* Atomically prepare the queue futex */
392 futex_nto1_prepare(&viewer_cmd_queue.futex);
393
394 do {
395 health_code_update();
396
397 /* Dequeue commands */
398 node = cds_wfq_dequeue_blocking(&viewer_cmd_queue.queue);
399 if (node == NULL) {
400 DBG("Woken up but nothing in the live-viewer "
401 "relay command queue");
402 /* Continue thread execution */
403 break;
404 }
405
406 relay_cmd = caa_container_of(node, struct relay_command, node);
407 DBG("Dispatching viewer request waiting on sock %d",
408 relay_cmd->sock->fd);
409
410 /*
411 * Inform worker thread of the new request. This call is blocking
412 * so we can be assured that the data will be read at some point in
413 * time or wait to the end of the world :)
414 */
415 ret = lttng_write(live_relay_cmd_pipe[1], relay_cmd,
416 sizeof(*relay_cmd));
417 free(relay_cmd);
418 if (ret < sizeof(struct relay_command)) {
419 PERROR("write cmd pipe");
420 goto error;
421 }
422 } while (node != NULL);
423
424 /* Futex wait on queue. Blocking call on futex() */
425 health_poll_entry();
426 futex_nto1_wait(&viewer_cmd_queue.futex);
427 health_poll_exit();
428 }
429
430 /* Normal exit, no error */
431 err = 0;
432
433 error:
434 if (err) {
435 health_error();
436 ERR("Health error occurred in %s", __func__);
437 }
438 health_unregister(health_relayd);
439 DBG("Live viewer dispatch thread dying");
440 stop_threads();
441 return NULL;
442 }
443
444 /*
445 * Establish connection with the viewer and check the versions.
446 *
447 * Return 0 on success or else negative value.
448 */
449 static
450 int viewer_connect(struct relay_command *cmd)
451 {
452 int ret;
453 struct lttng_viewer_connect reply, msg;
454
455 assert(cmd);
456
457 cmd->version_check_done = 1;
458
459 health_code_update();
460
461 /* Get version from the other side. */
462 ret = cmd->sock->ops->recvmsg(cmd->sock, &msg, sizeof(msg), 0);
463 if (ret < 0 || ret != sizeof(msg)) {
464 if (ret == 0) {
465 /* Orderly shutdown. Not necessary to print an error. */
466 DBG("Socket %d did an orderly shutdown", cmd->sock->fd);
467 } else {
468 ERR("Relay failed to receive the version values.");
469 }
470 ret = -1;
471 goto end;
472 }
473
474 health_code_update();
475
476 reply.major = RELAYD_VERSION_COMM_MAJOR;
477 reply.minor = RELAYD_VERSION_COMM_MINOR;
478
479 /* Major versions must be the same */
480 if (reply.major != be32toh(msg.major)) {
481 DBG("Incompatible major versions (%u vs %u)", reply.major,
482 be32toh(msg.major));
483 ret = -1;
484 goto end;
485 }
486
487 cmd->major = reply.major;
488 /* We adapt to the lowest compatible version */
489 if (reply.minor <= be32toh(msg.minor)) {
490 cmd->minor = reply.minor;
491 } else {
492 cmd->minor = be32toh(msg.minor);
493 }
494
495 if (be32toh(msg.type) == VIEWER_CLIENT_COMMAND) {
496 cmd->type = RELAY_VIEWER_COMMAND;
497 } else if (be32toh(msg.type) == VIEWER_CLIENT_NOTIFICATION) {
498 cmd->type = RELAY_VIEWER_NOTIFICATION;
499 } else {
500 ERR("Unknown connection type : %u", be32toh(msg.type));
501 ret = -1;
502 goto end;
503 }
504
505 reply.major = htobe32(reply.major);
506 reply.minor = htobe32(reply.minor);
507 if (cmd->type == RELAY_VIEWER_COMMAND) {
508 reply.viewer_session_id = htobe64(++last_relay_viewer_session_id);
509 }
510
511 health_code_update();
512
513 ret = cmd->sock->ops->sendmsg(cmd->sock, &reply,
514 sizeof(struct lttng_viewer_connect), 0);
515 if (ret < 0) {
516 ERR("Relay sending version");
517 }
518
519 health_code_update();
520
521 DBG("Version check done using protocol %u.%u", cmd->major, cmd->minor);
522 ret = 0;
523
524 end:
525 return ret;
526 }
527
528 /*
529 * Send the viewer the list of current sessions.
530 *
531 * Return 0 on success or else a negative value.
532 */
533 static
534 int viewer_list_sessions(struct relay_command *cmd,
535 struct lttng_ht *sessions_ht)
536 {
537 int ret;
538 struct lttng_viewer_list_sessions session_list;
539 unsigned long count;
540 long approx_before, approx_after;
541 struct lttng_ht_node_ulong *node;
542 struct lttng_ht_iter iter;
543 struct lttng_viewer_session send_session;
544 struct relay_session *session;
545
546 DBG("List sessions received");
547
548 if (cmd->version_check_done == 0) {
549 ERR("Trying to list sessions before version check");
550 ret = -1;
551 goto end_no_session;
552 }
553
554 rcu_read_lock();
555 cds_lfht_count_nodes(sessions_ht->ht, &approx_before, &count, &approx_after);
556 session_list.sessions_count = htobe32(count);
557
558 health_code_update();
559
560 ret = cmd->sock->ops->sendmsg(cmd->sock, &session_list,
561 sizeof(session_list), 0);
562 if (ret < 0) {
563 ERR("Relay sending sessions list");
564 goto end_unlock;
565 }
566
567 health_code_update();
568
569 cds_lfht_for_each_entry(sessions_ht->ht, &iter.iter, node, node) {
570 health_code_update();
571
572 node = lttng_ht_iter_get_node_ulong(&iter);
573 if (!node) {
574 goto end_unlock;
575 }
576 session = caa_container_of(node, struct relay_session, session_n);
577
578 strncpy(send_session.session_name, session->session_name,
579 sizeof(send_session.session_name));
580 strncpy(send_session.hostname, session->hostname,
581 sizeof(send_session.hostname));
582 send_session.id = htobe64(session->id);
583 send_session.live_timer = htobe32(session->live_timer);
584 send_session.clients = htobe32(session->viewer_attached);
585 send_session.streams = htobe32(session->stream_count);
586
587 health_code_update();
588
589 ret = cmd->sock->ops->sendmsg(cmd->sock, &send_session,
590 sizeof(send_session), 0);
591 if (ret < 0) {
592 ERR("Relay sending session info");
593 goto end_unlock;
594 }
595 }
596 health_code_update();
597
598 rcu_read_unlock();
599 ret = 0;
600 goto end;
601
602 end_unlock:
603 rcu_read_unlock();
604
605 end:
606 end_no_session:
607 return ret;
608 }
609
610 /*
611 * Open index file using a given viewer stream.
612 *
613 * Return 0 on success or else a negative value.
614 */
615 static int open_index(struct relay_viewer_stream *stream)
616 {
617 int ret;
618 char fullpath[PATH_MAX];
619 struct ctf_packet_index_file_hdr hdr;
620
621 if (stream->tracefile_count > 0) {
622 ret = snprintf(fullpath, sizeof(fullpath), "%s/" DEFAULT_INDEX_DIR "/%s_%"
623 PRIu64 DEFAULT_INDEX_FILE_SUFFIX, stream->path_name,
624 stream->channel_name, stream->tracefile_count_current);
625 } else {
626 ret = snprintf(fullpath, sizeof(fullpath), "%s/" DEFAULT_INDEX_DIR "/%s"
627 DEFAULT_INDEX_FILE_SUFFIX, stream->path_name,
628 stream->channel_name);
629 }
630 if (ret < 0) {
631 PERROR("snprintf index path");
632 goto error;
633 }
634
635 DBG("Opening index file %s in read only", fullpath);
636 ret = open(fullpath, O_RDONLY);
637 if (ret < 0) {
638 if (errno == ENOENT) {
639 ret = -ENOENT;
640 goto error;
641 } else {
642 PERROR("opening index in read-only");
643 }
644 goto error;
645 }
646 stream->index_read_fd = ret;
647 DBG("Opening index file %s in read only, (fd: %d)", fullpath, ret);
648
649 ret = lttng_read(stream->index_read_fd, &hdr, sizeof(hdr));
650 if (ret < sizeof(hdr)) {
651 PERROR("Reading index header");
652 goto error;
653 }
654 if (be32toh(hdr.magic) != CTF_INDEX_MAGIC) {
655 ERR("Invalid header magic");
656 ret = -1;
657 goto error;
658 }
659 if (be32toh(hdr.index_major) != CTF_INDEX_MAJOR ||
660 be32toh(hdr.index_minor) != CTF_INDEX_MINOR) {
661 ERR("Invalid header version");
662 ret = -1;
663 goto error;
664 }
665 ret = 0;
666
667 error:
668 return ret;
669 }
670
671 /*
672 * Allocate and init a new viewer_stream.
673 *
674 * Copies the values from the stream passed in parameter and insert the new
675 * stream in the viewer_streams_ht.
676 *
677 * MUST be called with rcu_read_lock held.
678 *
679 * Returns 0 on success or a negative value on error.
680 */
681 static
682 int init_viewer_stream(struct relay_stream *stream, int seek_last)
683 {
684 int ret;
685 struct relay_viewer_stream *viewer_stream;
686
687 assert(stream);
688
689 viewer_stream = zmalloc(sizeof(*viewer_stream));
690 if (!viewer_stream) {
691 PERROR("relay viewer stream zmalloc");
692 ret = -1;
693 goto error;
694 }
695 viewer_stream->session_id = stream->session->id;
696 viewer_stream->stream_handle = stream->stream_handle;
697 viewer_stream->path_name = strndup(stream->path_name,
698 LTTNG_VIEWER_PATH_MAX);
699 viewer_stream->channel_name = strndup(stream->channel_name,
700 LTTNG_VIEWER_NAME_MAX);
701 viewer_stream->tracefile_count = stream->tracefile_count;
702 viewer_stream->metadata_flag = stream->metadata_flag;
703 viewer_stream->tracefile_count_last = -1ULL;
704 if (seek_last) {
705 viewer_stream->tracefile_count_current =
706 stream->tracefile_count_current;
707 } else {
708 viewer_stream->tracefile_count_current =
709 stream->oldest_tracefile_id;
710 }
711
712 viewer_stream->ctf_trace = stream->ctf_trace;
713 if (viewer_stream->metadata_flag) {
714 viewer_stream->ctf_trace->viewer_metadata_stream =
715 viewer_stream;
716 }
717 uatomic_inc(&viewer_stream->ctf_trace->refcount);
718
719 lttng_ht_node_init_u64(&viewer_stream->stream_n, stream->stream_handle);
720 lttng_ht_add_unique_u64(viewer_streams_ht, &viewer_stream->stream_n);
721
722 viewer_stream->index_read_fd = -1;
723 viewer_stream->read_fd = -1;
724
725 /*
726 * This is to avoid a race between the initialization of this object and
727 * the close of the given stream. If the stream is unable to find this
728 * viewer stream when closing, this copy will at least take the latest
729 * value.
730 * We also need that for the seek_last.
731 */
732 viewer_stream->total_index_received = stream->total_index_received;
733
734 /*
735 * If we never received an index for the current stream, delay
736 * the opening of the index, otherwise open it right now.
737 */
738 if (viewer_stream->tracefile_count_current ==
739 stream->tracefile_count_current &&
740 viewer_stream->total_index_received == 0) {
741 viewer_stream->index_read_fd = -1;
742 } else {
743 ret = open_index(viewer_stream);
744 if (ret < 0) {
745 goto error;
746 }
747 }
748
749 if (seek_last && viewer_stream->index_read_fd > 0) {
750 ret = lseek(viewer_stream->index_read_fd,
751 viewer_stream->total_index_received *
752 sizeof(struct ctf_packet_index),
753 SEEK_CUR);
754 if (ret < 0) {
755 goto error;
756 }
757 viewer_stream->last_sent_index =
758 viewer_stream->total_index_received;
759 }
760
761 ret = 0;
762
763 error:
764 return ret;
765 }
766
767 /*
768 * Rotate a stream to the next tracefile.
769 *
770 * Returns 0 on success, 1 on EOF, a negative value on error.
771 */
772 static
773 int rotate_viewer_stream(struct relay_viewer_stream *viewer_stream,
774 struct relay_stream *stream)
775 {
776 int ret;
777 uint64_t tracefile_id;
778
779 assert(viewer_stream);
780
781 tracefile_id = (viewer_stream->tracefile_count_current + 1) %
782 viewer_stream->tracefile_count;
783 /*
784 * Detect the last tracefile to open.
785 */
786 if (viewer_stream->tracefile_count_last != -1ULL &&
787 viewer_stream->tracefile_count_last ==
788 viewer_stream->tracefile_count_current) {
789 ret = 1;
790 goto end;
791 }
792
793 if (stream) {
794 pthread_mutex_lock(&stream->viewer_stream_rotation_lock);
795 }
796 /*
797 * The writer and the reader are not working in the same
798 * tracefile, we can read up to EOF, we don't care about the
799 * total_index_received.
800 */
801 if (!stream || (stream->tracefile_count_current != tracefile_id)) {
802 viewer_stream->close_write_flag = 1;
803 } else {
804 /*
805 * We are opening a file that is still open in write, make
806 * sure we limit our reading to the number of indexes
807 * received.
808 */
809 viewer_stream->close_write_flag = 0;
810 if (stream) {
811 viewer_stream->total_index_received =
812 stream->total_index_received;
813 }
814 }
815 viewer_stream->tracefile_count_current = tracefile_id;
816
817 ret = close(viewer_stream->index_read_fd);
818 if (ret < 0) {
819 PERROR("close index file %d",
820 viewer_stream->index_read_fd);
821 }
822 viewer_stream->index_read_fd = -1;
823 ret = close(viewer_stream->read_fd);
824 if (ret < 0) {
825 PERROR("close tracefile %d",
826 viewer_stream->read_fd);
827 }
828 viewer_stream->read_fd = -1;
829
830 pthread_mutex_lock(&viewer_stream->overwrite_lock);
831 viewer_stream->abort_flag = 0;
832 pthread_mutex_unlock(&viewer_stream->overwrite_lock);
833
834 viewer_stream->index_read_fd = -1;
835 viewer_stream->read_fd = -1;
836
837 if (stream) {
838 pthread_mutex_unlock(&stream->viewer_stream_rotation_lock);
839 }
840 ret = open_index(viewer_stream);
841 if (ret < 0) {
842 goto error;
843 }
844
845 ret = 0;
846
847 end:
848 error:
849 return ret;
850 }
851
852 /*
853 * Send the viewer the list of current sessions.
854 */
855 static
856 int viewer_attach_session(struct relay_command *cmd,
857 struct lttng_ht *sessions_ht)
858 {
859 int ret, send_streams = 0;
860 uint32_t nb_streams = 0, nb_streams_ready = 0;
861 struct lttng_viewer_attach_session_request request;
862 struct lttng_viewer_attach_session_response response;
863 struct lttng_viewer_stream send_stream;
864 struct relay_stream *stream;
865 struct relay_viewer_stream *viewer_stream;
866 struct lttng_ht_node_ulong *node;
867 struct lttng_ht_node_u64 *node64;
868 struct lttng_ht_iter iter;
869 struct relay_session *session;
870 int seek_last = 0;
871
872 assert(cmd);
873 assert(sessions_ht);
874
875 DBG("Attach session received");
876
877 if (cmd->version_check_done == 0) {
878 ERR("Trying to attach session before version check");
879 ret = -1;
880 goto end_no_session;
881 }
882
883 health_code_update();
884
885 ret = cmd->sock->ops->recvmsg(cmd->sock, &request, sizeof(request), 0);
886 if (ret < 0 || ret != sizeof(request)) {
887 if (ret == 0) {
888 /* Orderly shutdown. Not necessary to print an error. */
889 DBG("Socket %d did an orderly shutdown", cmd->sock->fd);
890 } else {
891 ERR("Relay failed to receive the attach parameters.");
892 }
893 ret = -1;
894 goto error;
895 }
896
897 health_code_update();
898
899 rcu_read_lock();
900 lttng_ht_lookup(sessions_ht,
901 (void *)((unsigned long) be64toh(request.session_id)), &iter);
902 node = lttng_ht_iter_get_node_ulong(&iter);
903 if (node == NULL) {
904 DBG("Relay session %" PRIu64 " not found",
905 be64toh(request.session_id));
906 response.status = htobe32(VIEWER_ATTACH_UNK);
907 goto send_reply;
908 }
909
910 session = caa_container_of(node, struct relay_session, session_n);
911 if (cmd->session_id == session->id) {
912 /* Same viewer already attached, just send the stream list. */
913 send_streams = 1;
914 response.status = htobe32(VIEWER_ATTACH_OK);
915 } else if (session->viewer_attached != 0) {
916 DBG("Already a viewer attached");
917 response.status = htobe32(VIEWER_ATTACH_ALREADY);
918 goto send_reply;
919 } else if (session->live_timer == 0) {
920 DBG("Not live session");
921 response.status = htobe32(VIEWER_ATTACH_NOT_LIVE);
922 goto send_reply;
923 } else {
924 session->viewer_attached++;
925 send_streams = 1;
926 response.status = htobe32(VIEWER_ATTACH_OK);
927 cmd->session_id = session->id;
928 cmd->session = session;
929 }
930
931 switch (be32toh(request.seek)) {
932 case VIEWER_SEEK_BEGINNING:
933 /* Default behaviour. */
934 break;
935 case VIEWER_SEEK_LAST:
936 seek_last = 1;
937 break;
938 default:
939 ERR("Wrong seek parameter");
940 response.status = htobe32(VIEWER_ATTACH_SEEK_ERR);
941 send_streams = 0;
942 goto send_reply;
943 }
944
945 if (send_streams) {
946 /* We should only be there if we have a session to attach to. */
947 assert(session);
948
949 /*
950 * Fill the viewer_streams_ht to count the number of streams
951 * ready to be sent and avoid concurrency issues on the
952 * relay_streams_ht and don't rely on a total session stream count.
953 */
954 cds_lfht_for_each_entry(relay_streams_ht->ht, &iter.iter, node, node) {
955 struct relay_viewer_stream *vstream;
956
957 health_code_update();
958
959 node = lttng_ht_iter_get_node_ulong(&iter);
960 if (!node) {
961 continue;
962 }
963 stream = caa_container_of(node, struct relay_stream, stream_n);
964 if (stream->session != cmd->session) {
965 continue;
966 }
967 nb_streams++;
968
969 /*
970 * Don't send streams with no ctf_trace, they are not
971 * ready to be read.
972 */
973 if (!stream->ctf_trace || !stream->viewer_ready) {
974 continue;
975 }
976 nb_streams_ready++;
977
978 vstream = live_find_viewer_stream_by_id(stream->stream_handle);
979 if (!vstream) {
980 ret = init_viewer_stream(stream, seek_last);
981 if (ret < 0) {
982 goto end_unlock;
983 }
984 }
985 }
986
987 /* We must have the same amount of existing stream and ready stream. */
988 if (nb_streams != nb_streams_ready) {
989 nb_streams = 0;
990 }
991 response.streams_count = htobe32(nb_streams);
992 }
993
994 send_reply:
995 health_code_update();
996 ret = cmd->sock->ops->sendmsg(cmd->sock, &response, sizeof(response), 0);
997 if (ret < 0) {
998 ERR("Relay sending viewer attach response");
999 goto end_unlock;
1000 }
1001 health_code_update();
1002
1003 /*
1004 * Unknown or empty session, just return gracefully, the viewer knows what
1005 * is happening.
1006 */
1007 if (!send_streams || !nb_streams) {
1008 ret = 0;
1009 goto end_unlock;
1010 }
1011
1012 /* We should only be there if we have a session to attach to. */
1013 assert(session);
1014 cds_lfht_for_each_entry(viewer_streams_ht->ht, &iter.iter, node, node) {
1015 health_code_update();
1016
1017 node64 = lttng_ht_iter_get_node_u64(&iter);
1018 if (!node64) {
1019 continue;
1020 }
1021 viewer_stream = caa_container_of(node64, struct relay_viewer_stream,
1022 stream_n);
1023 if (viewer_stream->session_id != cmd->session->id) {
1024 continue;
1025 }
1026
1027 send_stream.id = htobe64(viewer_stream->stream_handle);
1028 send_stream.ctf_trace_id = htobe64(viewer_stream->ctf_trace->id);
1029 send_stream.metadata_flag = htobe32(viewer_stream->metadata_flag);
1030 strncpy(send_stream.path_name, viewer_stream->path_name,
1031 sizeof(send_stream.path_name));
1032 strncpy(send_stream.channel_name, viewer_stream->channel_name,
1033 sizeof(send_stream.channel_name));
1034
1035 ret = cmd->sock->ops->sendmsg(cmd->sock, &send_stream,
1036 sizeof(send_stream), 0);
1037 if (ret < 0) {
1038 ERR("Relay sending stream %" PRIu64, viewer_stream->stream_handle);
1039 goto end_unlock;
1040 }
1041 DBG("Sent stream %" PRIu64 " to viewer", viewer_stream->stream_handle);
1042 }
1043 ret = 0;
1044
1045 end_unlock:
1046 rcu_read_unlock();
1047 end_no_session:
1048 error:
1049 return ret;
1050 }
1051
1052 /*
1053 * Get viewer stream from stream id.
1054 *
1055 * RCU read side lock MUST be acquired.
1056 */
1057 struct relay_viewer_stream *live_find_viewer_stream_by_id(uint64_t stream_id)
1058 {
1059 struct lttng_ht_node_u64 *node;
1060 struct lttng_ht_iter iter;
1061 struct relay_viewer_stream *stream = NULL;
1062
1063 lttng_ht_lookup(viewer_streams_ht, &stream_id, &iter);
1064 node = lttng_ht_iter_get_node_u64(&iter);
1065 if (node == NULL) {
1066 DBG("Relay viewer stream %" PRIu64 " not found", stream_id);
1067 goto end;
1068 }
1069 stream = caa_container_of(node, struct relay_viewer_stream, stream_n);
1070
1071 end:
1072 return stream;
1073 }
1074
1075 static
1076 void deferred_free_viewer_stream(struct rcu_head *head)
1077 {
1078 struct relay_viewer_stream *stream =
1079 caa_container_of(head, struct relay_viewer_stream, rcu_node);
1080
1081 free(stream->path_name);
1082 free(stream->channel_name);
1083 free(stream);
1084 }
1085
1086 static
1087 void delete_viewer_stream(struct relay_viewer_stream *vstream)
1088 {
1089 int delret;
1090 struct lttng_ht_iter iter;
1091
1092 iter.iter.node = &vstream->stream_n.node;
1093 delret = lttng_ht_del(viewer_streams_ht, &iter);
1094 assert(!delret);
1095 }
1096
1097 static
1098 void destroy_viewer_stream(struct relay_viewer_stream *vstream)
1099 {
1100 unsigned long ret_ref;
1101 int ret;
1102
1103 assert(vstream);
1104 ret_ref = uatomic_add_return(&vstream->ctf_trace->refcount, -1);
1105 assert(ret_ref >= 0);
1106
1107 if (vstream->read_fd >= 0) {
1108 ret = close(vstream->read_fd);
1109 if (ret < 0) {
1110 PERROR("close read_fd");
1111 }
1112 }
1113 if (vstream->index_read_fd >= 0) {
1114 ret = close(vstream->index_read_fd);
1115 if (ret < 0) {
1116 PERROR("close index_read_fd");
1117 }
1118 }
1119
1120 /*
1121 * If the only stream left in the HT is the metadata stream,
1122 * we need to remove it because we won't detect a EOF for this
1123 * stream.
1124 */
1125 if (ret_ref == 1 && vstream->ctf_trace->viewer_metadata_stream) {
1126 delete_viewer_stream(vstream->ctf_trace->viewer_metadata_stream);
1127 destroy_viewer_stream(vstream->ctf_trace->viewer_metadata_stream);
1128 vstream->ctf_trace->metadata_stream = NULL;
1129 DBG("Freeing ctf_trace %" PRIu64, vstream->ctf_trace->id);
1130 /*
1131 * The streaming-side is already closed and we can't receive a new
1132 * stream concurrently at this point (since the session is being
1133 * destroyed), so when we detect the refcount equals 0, we are the
1134 * only owners of the ctf_trace and we can free it ourself.
1135 */
1136 free(vstream->ctf_trace);
1137 }
1138
1139 call_rcu(&vstream->rcu_node, deferred_free_viewer_stream);
1140 }
1141
1142 /*
1143 * Send the next index for a stream.
1144 *
1145 * Return 0 on success or else a negative value.
1146 */
1147 static
1148 int viewer_get_next_index(struct relay_command *cmd,
1149 struct lttng_ht *sessions_ht)
1150 {
1151 int ret;
1152 struct lttng_viewer_get_next_index request_index;
1153 struct lttng_viewer_index viewer_index;
1154 struct ctf_packet_index packet_index;
1155 struct relay_viewer_stream *vstream;
1156 struct relay_stream *rstream;
1157
1158 assert(cmd);
1159 assert(sessions_ht);
1160
1161 DBG("Viewer get next index");
1162
1163 if (cmd->version_check_done == 0) {
1164 ERR("Trying to request index before version check");
1165 ret = -1;
1166 goto end_no_session;
1167 }
1168
1169 health_code_update();
1170 ret = cmd->sock->ops->recvmsg(cmd->sock, &request_index,
1171 sizeof(request_index), 0);
1172 if (ret < 0 || ret != sizeof(request_index)) {
1173 ret = -1;
1174 ERR("Relay didn't receive the whole packet");
1175 goto end;
1176 }
1177 health_code_update();
1178
1179 rcu_read_lock();
1180 vstream = live_find_viewer_stream_by_id(be64toh(request_index.stream_id));
1181 if (!vstream) {
1182 ret = -1;
1183 goto end_unlock;
1184 }
1185
1186 memset(&viewer_index, 0, sizeof(viewer_index));
1187
1188 /*
1189 * The viewer should not ask for index on metadata stream.
1190 */
1191 if (vstream->metadata_flag) {
1192 viewer_index.status = htobe32(VIEWER_INDEX_HUP);
1193 goto send_reply;
1194 }
1195
1196 /* First time, we open the index file */
1197 if (vstream->index_read_fd < 0) {
1198 ret = open_index(vstream);
1199 if (ret == -ENOENT) {
1200 /*
1201 * The index is created only when the first data packet arrives, it
1202 * might not be ready at the beginning of the session
1203 */
1204 viewer_index.status = htobe32(VIEWER_INDEX_RETRY);
1205 goto send_reply;
1206 } else if (ret < 0) {
1207 viewer_index.status = htobe32(VIEWER_INDEX_ERR);
1208 goto send_reply;
1209 }
1210 }
1211
1212 rstream = relay_stream_find_by_id(vstream->stream_handle);
1213 if (rstream) {
1214 if (vstream->abort_flag) {
1215 /* Rotate on abort (overwrite). */
1216 DBG("Viewer rotate because of overwrite");
1217 ret = rotate_viewer_stream(vstream, rstream);
1218 if (ret < 0) {
1219 goto end_unlock;
1220 } else if (ret == 1) {
1221 viewer_index.status = htobe32(VIEWER_INDEX_HUP);
1222 delete_viewer_stream(vstream);
1223 destroy_viewer_stream(vstream);
1224 goto send_reply;
1225 }
1226 }
1227 pthread_mutex_lock(&rstream->viewer_stream_rotation_lock);
1228 if (rstream->tracefile_count_current == vstream->tracefile_count_current) {
1229 if (rstream->beacon_ts_end != -1ULL &&
1230 vstream->last_sent_index == rstream->total_index_received) {
1231 viewer_index.status = htobe32(VIEWER_INDEX_INACTIVE);
1232 viewer_index.timestamp_end = htobe64(rstream->beacon_ts_end);
1233 pthread_mutex_unlock(&rstream->viewer_stream_rotation_lock);
1234 goto send_reply;
1235 /*
1236 * Reader and writer are working in the same tracefile, so we care
1237 * about the number of index received and sent. Otherwise, we read
1238 * up to EOF.
1239 */
1240 } else if (rstream->total_index_received <= vstream->last_sent_index
1241 && !vstream->close_write_flag) {
1242 pthread_mutex_unlock(&rstream->viewer_stream_rotation_lock);
1243 /* No new index to send, retry later. */
1244 viewer_index.status = htobe32(VIEWER_INDEX_RETRY);
1245 goto send_reply;
1246 }
1247 }
1248 pthread_mutex_unlock(&rstream->viewer_stream_rotation_lock);
1249 } else if (!rstream && vstream->close_write_flag &&
1250 vstream->total_index_received == vstream->last_sent_index) {
1251 /* Last index sent and current tracefile closed in write */
1252 viewer_index.status = htobe32(VIEWER_INDEX_HUP);
1253 delete_viewer_stream(vstream);
1254 destroy_viewer_stream(vstream);
1255 goto send_reply;
1256 } else {
1257 vstream->close_write_flag = 1;
1258 }
1259
1260 if (!vstream->ctf_trace->metadata_received ||
1261 vstream->ctf_trace->metadata_received >
1262 vstream->ctf_trace->metadata_sent) {
1263 viewer_index.flags |= LTTNG_VIEWER_FLAG_NEW_METADATA;
1264 }
1265
1266 pthread_mutex_lock(&vstream->overwrite_lock);
1267 if (vstream->abort_flag) {
1268 /*
1269 * The file is being overwritten by the writer, we cannot
1270 * use it.
1271 */
1272 viewer_index.status = htobe32(VIEWER_INDEX_RETRY);
1273 pthread_mutex_unlock(&vstream->overwrite_lock);
1274 ret = rotate_viewer_stream(vstream, rstream);
1275 if (ret < 0) {
1276 goto end_unlock;
1277 } else if (ret == 1) {
1278 viewer_index.status = htobe32(VIEWER_INDEX_HUP);
1279 delete_viewer_stream(vstream);
1280 destroy_viewer_stream(vstream);
1281 goto send_reply;
1282 }
1283 goto send_reply;
1284 }
1285 ret = lttng_read(vstream->index_read_fd, &packet_index,
1286 sizeof(packet_index));
1287 pthread_mutex_unlock(&vstream->overwrite_lock);
1288 if (ret < sizeof(packet_index)) {
1289 /*
1290 * The tracefile is closed in write, so we read up to EOF.
1291 */
1292 if (vstream->close_write_flag == 1) {
1293 viewer_index.status = htobe32(VIEWER_INDEX_RETRY);
1294 /* Rotate on normal EOF */
1295 ret = rotate_viewer_stream(vstream, rstream);
1296 if (ret < 0) {
1297 goto end_unlock;
1298 } else if (ret == 1) {
1299 viewer_index.status = htobe32(VIEWER_INDEX_HUP);
1300 delete_viewer_stream(vstream);
1301 destroy_viewer_stream(vstream);
1302 goto send_reply;
1303 }
1304 } else {
1305 PERROR("Relay reading index file %d",
1306 vstream->index_read_fd);
1307 viewer_index.status = htobe32(VIEWER_INDEX_ERR);
1308 }
1309 goto send_reply;
1310 } else {
1311 viewer_index.status = htobe32(VIEWER_INDEX_OK);
1312 vstream->last_sent_index++;
1313 }
1314
1315 /*
1316 * Indexes are stored in big endian, no need to switch before sending.
1317 */
1318 viewer_index.offset = packet_index.offset;
1319 viewer_index.packet_size = packet_index.packet_size;
1320 viewer_index.content_size = packet_index.content_size;
1321 viewer_index.timestamp_begin = packet_index.timestamp_begin;
1322 viewer_index.timestamp_end = packet_index.timestamp_end;
1323 viewer_index.events_discarded = packet_index.events_discarded;
1324 viewer_index.stream_id = packet_index.stream_id;
1325
1326 send_reply:
1327 viewer_index.flags = htobe32(viewer_index.flags);
1328 health_code_update();
1329 ret = cmd->sock->ops->sendmsg(cmd->sock, &viewer_index,
1330 sizeof(viewer_index), 0);
1331 if (ret < 0) {
1332 ERR("Relay index to viewer");
1333 goto end_unlock;
1334 }
1335 health_code_update();
1336
1337 DBG("Index %" PRIu64 "for stream %" PRIu64 "sent",
1338 vstream->last_sent_index, vstream->stream_handle);
1339
1340 end_unlock:
1341 rcu_read_unlock();
1342
1343 end_no_session:
1344 end:
1345 return ret;
1346 }
1347
1348 /*
1349 * Send the next index for a stream
1350 *
1351 * Return 0 on success or else a negative value.
1352 */
1353 static
1354 int viewer_get_packet(struct relay_command *cmd)
1355 {
1356 int ret, send_data = 0;
1357 char *data = NULL;
1358 uint32_t len = 0;
1359 ssize_t read_len;
1360 struct lttng_viewer_get_packet get_packet_info;
1361 struct lttng_viewer_trace_packet reply;
1362 struct relay_viewer_stream *stream;
1363
1364 assert(cmd);
1365
1366 DBG2("Relay get data packet");
1367
1368 if (cmd->version_check_done == 0) {
1369 ERR("Trying to get packet before version check");
1370 ret = -1;
1371 goto end;
1372 }
1373
1374 health_code_update();
1375 ret = cmd->sock->ops->recvmsg(cmd->sock, &get_packet_info,
1376 sizeof(get_packet_info), 0);
1377 if (ret < 0 || ret != sizeof(get_packet_info)) {
1378 ret = -1;
1379 ERR("Relay didn't receive the whole packet");
1380 goto end;
1381 }
1382 health_code_update();
1383
1384 /* From this point on, the error label can be reached. */
1385 memset(&reply, 0, sizeof(reply));
1386
1387 rcu_read_lock();
1388 stream = live_find_viewer_stream_by_id(be64toh(get_packet_info.stream_id));
1389 if (!stream) {
1390 goto error;
1391 }
1392 assert(stream->ctf_trace);
1393
1394 /*
1395 * First time we read this stream, we need open the tracefile, we should
1396 * only arrive here if an index has already been sent to the viewer, so the
1397 * tracefile must exist, if it does not it is a fatal error.
1398 */
1399 if (stream->read_fd < 0) {
1400 char fullpath[PATH_MAX];
1401
1402 if (stream->tracefile_count > 0) {
1403 ret = snprintf(fullpath, PATH_MAX, "%s/%s_%" PRIu64, stream->path_name,
1404 stream->channel_name,
1405 stream->tracefile_count_current);
1406 } else {
1407 ret = snprintf(fullpath, PATH_MAX, "%s/%s", stream->path_name,
1408 stream->channel_name);
1409 }
1410 if (ret < 0) {
1411 goto error;
1412 }
1413 ret = open(fullpath, O_RDONLY);
1414 if (ret < 0) {
1415 PERROR("Relay opening trace file");
1416 goto error;
1417 }
1418 stream->read_fd = ret;
1419 }
1420
1421 if (!stream->ctf_trace->metadata_received ||
1422 stream->ctf_trace->metadata_received >
1423 stream->ctf_trace->metadata_sent) {
1424 reply.status = htobe32(VIEWER_GET_PACKET_ERR);
1425 reply.flags |= LTTNG_VIEWER_FLAG_NEW_METADATA;
1426 goto send_reply;
1427 }
1428
1429 len = be32toh(get_packet_info.len);
1430 data = zmalloc(len);
1431 if (!data) {
1432 PERROR("relay data zmalloc");
1433 goto error;
1434 }
1435
1436 ret = lseek(stream->read_fd, be64toh(get_packet_info.offset), SEEK_SET);
1437 if (ret < 0) {
1438 /*
1439 * If the read fd was closed by the streaming side, the
1440 * abort_flag will be set to 1, otherwise it is an error.
1441 */
1442 if (stream->abort_flag == 0) {
1443 PERROR("lseek");
1444 goto error;
1445 }
1446 reply.status = htobe32(VIEWER_GET_PACKET_EOF);
1447 goto send_reply;
1448 }
1449 read_len = lttng_read(stream->read_fd, data, len);
1450 if (read_len < len) {
1451 /*
1452 * If the read fd was closed by the streaming side, the
1453 * abort_flag will be set to 1, otherwise it is an error.
1454 */
1455 if (stream->abort_flag == 0) {
1456 PERROR("Relay reading trace file, fd: %d, offset: %" PRIu64,
1457 stream->read_fd,
1458 be64toh(get_packet_info.offset));
1459 goto error;
1460 } else {
1461 reply.status = htobe32(VIEWER_GET_PACKET_EOF);
1462 goto send_reply;
1463 }
1464 }
1465 reply.status = htobe32(VIEWER_GET_PACKET_OK);
1466 reply.len = htobe32(len);
1467 send_data = 1;
1468 goto send_reply;
1469
1470 error:
1471 reply.status = htobe32(VIEWER_GET_PACKET_ERR);
1472
1473 send_reply:
1474 reply.flags = htobe32(reply.flags);
1475
1476 health_code_update();
1477 ret = cmd->sock->ops->sendmsg(cmd->sock, &reply, sizeof(reply), 0);
1478 if (ret < 0) {
1479 ERR("Relay data header to viewer");
1480 goto end_unlock;
1481 }
1482 health_code_update();
1483
1484 if (send_data) {
1485 health_code_update();
1486 ret = cmd->sock->ops->sendmsg(cmd->sock, data, len, 0);
1487 if (ret < 0) {
1488 ERR("Relay send data to viewer");
1489 goto end_unlock;
1490 }
1491 health_code_update();
1492 }
1493
1494 DBG("Sent %u bytes for stream %" PRIu64, len,
1495 be64toh(get_packet_info.stream_id));
1496
1497 end_unlock:
1498 free(data);
1499 rcu_read_unlock();
1500
1501 end:
1502 return ret;
1503 }
1504
1505 /*
1506 * Send the session's metadata
1507 *
1508 * Return 0 on success else a negative value.
1509 */
1510 static
1511 int viewer_get_metadata(struct relay_command *cmd)
1512 {
1513 int ret = 0;
1514 ssize_t read_len;
1515 uint64_t len = 0;
1516 char *data = NULL;
1517 struct lttng_viewer_get_metadata request;
1518 struct lttng_viewer_metadata_packet reply;
1519 struct relay_viewer_stream *stream;
1520
1521 assert(cmd);
1522
1523 DBG("Relay get metadata");
1524
1525 if (cmd->version_check_done == 0) {
1526 ERR("Trying to get metadata before version check");
1527 ret = -1;
1528 goto end;
1529 }
1530
1531 health_code_update();
1532 ret = cmd->sock->ops->recvmsg(cmd->sock, &request,
1533 sizeof(request), 0);
1534 if (ret < 0 || ret != sizeof(request)) {
1535 ret = -1;
1536 ERR("Relay didn't receive the whole packet");
1537 goto end;
1538 }
1539 health_code_update();
1540
1541 rcu_read_lock();
1542 stream = live_find_viewer_stream_by_id(be64toh(request.stream_id));
1543 if (!stream || !stream->metadata_flag) {
1544 ERR("Invalid metadata stream");
1545 goto error;
1546 }
1547 assert(stream->ctf_trace);
1548 assert(stream->ctf_trace->metadata_sent <=
1549 stream->ctf_trace->metadata_received);
1550
1551 len = stream->ctf_trace->metadata_received -
1552 stream->ctf_trace->metadata_sent;
1553 if (len == 0) {
1554 reply.status = htobe32(VIEWER_NO_NEW_METADATA);
1555 goto send_reply;
1556 }
1557
1558 /* first time, we open the metadata file */
1559 if (stream->read_fd < 0) {
1560 char fullpath[PATH_MAX];
1561
1562 ret = snprintf(fullpath, PATH_MAX, "%s/%s", stream->path_name,
1563 stream->channel_name);
1564 if (ret < 0) {
1565 goto error;
1566 }
1567 ret = open(fullpath, O_RDONLY);
1568 if (ret < 0) {
1569 PERROR("Relay opening metadata file");
1570 goto error;
1571 }
1572 stream->read_fd = ret;
1573 }
1574
1575 reply.len = htobe64(len);
1576 data = zmalloc(len);
1577 if (!data) {
1578 PERROR("viewer metadata zmalloc");
1579 goto error;
1580 }
1581
1582 read_len = lttng_read(stream->read_fd, data, len);
1583 if (read_len < len) {
1584 PERROR("Relay reading metadata file");
1585 goto error;
1586 }
1587 stream->ctf_trace->metadata_sent += read_len;
1588 reply.status = htobe32(VIEWER_METADATA_OK);
1589 goto send_reply;
1590
1591 error:
1592 reply.status = htobe32(VIEWER_METADATA_ERR);
1593
1594 send_reply:
1595 health_code_update();
1596 ret = cmd->sock->ops->sendmsg(cmd->sock, &reply, sizeof(reply), 0);
1597 if (ret < 0) {
1598 ERR("Relay data header to viewer");
1599 goto end_unlock;
1600 }
1601 health_code_update();
1602
1603 if (len > 0) {
1604 ret = cmd->sock->ops->sendmsg(cmd->sock, data, len, 0);
1605 if (ret < 0) {
1606 ERR("Relay send data to viewer");
1607 goto end_unlock;
1608 }
1609 }
1610
1611 DBG("Sent %" PRIu64 " bytes of metadata for stream %" PRIu64, len,
1612 be64toh(request.stream_id));
1613
1614 DBG("Metadata sent");
1615
1616 end_unlock:
1617 free(data);
1618 rcu_read_unlock();
1619 end:
1620 return ret;
1621 }
1622
1623 /*
1624 * live_relay_unknown_command: send -1 if received unknown command
1625 */
1626 static
1627 void live_relay_unknown_command(struct relay_command *cmd)
1628 {
1629 struct lttcomm_relayd_generic_reply reply;
1630 int ret;
1631
1632 reply.ret_code = htobe32(LTTNG_ERR_UNK);
1633 ret = cmd->sock->ops->sendmsg(cmd->sock, &reply,
1634 sizeof(struct lttcomm_relayd_generic_reply), 0);
1635 if (ret < 0) {
1636 ERR("Relay sending unknown command");
1637 }
1638 }
1639
1640 /*
1641 * Process the commands received on the control socket
1642 */
1643 static
1644 int process_control(struct lttng_viewer_cmd *recv_hdr,
1645 struct relay_command *cmd, struct lttng_ht *sessions_ht)
1646 {
1647 int ret = 0;
1648
1649 switch (be32toh(recv_hdr->cmd)) {
1650 case VIEWER_CONNECT:
1651 ret = viewer_connect(cmd);
1652 break;
1653 case VIEWER_LIST_SESSIONS:
1654 ret = viewer_list_sessions(cmd, sessions_ht);
1655 break;
1656 case VIEWER_ATTACH_SESSION:
1657 ret = viewer_attach_session(cmd, sessions_ht);
1658 break;
1659 case VIEWER_GET_NEXT_INDEX:
1660 ret = viewer_get_next_index(cmd, sessions_ht);
1661 break;
1662 case VIEWER_GET_PACKET:
1663 ret = viewer_get_packet(cmd);
1664 break;
1665 case VIEWER_GET_METADATA:
1666 ret = viewer_get_metadata(cmd);
1667 break;
1668 default:
1669 ERR("Received unknown viewer command (%u)", be32toh(recv_hdr->cmd));
1670 live_relay_unknown_command(cmd);
1671 ret = -1;
1672 goto end;
1673 }
1674
1675 end:
1676 return ret;
1677 }
1678
1679 static
1680 void cleanup_poll_connection(struct lttng_poll_event *events, int pollfd)
1681 {
1682 int ret;
1683
1684 assert(events);
1685
1686 lttng_poll_del(events, pollfd);
1687
1688 ret = close(pollfd);
1689 if (ret < 0) {
1690 ERR("Closing pollfd %d", pollfd);
1691 }
1692 }
1693
1694 /*
1695 * Create and add connection to the given hash table.
1696 *
1697 * Return poll add value or else -1 on error.
1698 */
1699 static
1700 int add_connection(int fd, struct lttng_poll_event *events,
1701 struct lttng_ht *relay_connections_ht)
1702 {
1703 int ret;
1704 struct relay_command *relay_connection;
1705
1706 assert(events);
1707 assert(relay_connections_ht);
1708
1709 relay_connection = zmalloc(sizeof(struct relay_command));
1710 if (relay_connection == NULL) {
1711 PERROR("Relay command zmalloc");
1712 goto error;
1713 }
1714
1715 ret = lttng_read(fd, relay_connection, sizeof(*relay_connection));
1716 if (ret < sizeof(*relay_connection)) {
1717 PERROR("read relay cmd pipe");
1718 goto error_read;
1719 }
1720
1721 lttng_ht_node_init_ulong(&relay_connection->sock_n,
1722 (unsigned long) relay_connection->sock->fd);
1723 rcu_read_lock();
1724 lttng_ht_add_unique_ulong(relay_connections_ht,
1725 &relay_connection->sock_n);
1726 rcu_read_unlock();
1727
1728 return lttng_poll_add(events, relay_connection->sock->fd,
1729 LPOLLIN | LPOLLRDHUP);
1730
1731 error_read:
1732 free(relay_connection);
1733 error:
1734 return -1;
1735 }
1736
1737 static
1738 void deferred_free_connection(struct rcu_head *head)
1739 {
1740 struct relay_command *relay_connection =
1741 caa_container_of(head, struct relay_command, rcu_node);
1742
1743 if (relay_connection->session &&
1744 relay_connection->session->viewer_attached > 0) {
1745 relay_connection->session->viewer_attached--;
1746 }
1747 lttcomm_destroy_sock(relay_connection->sock);
1748 free(relay_connection);
1749 }
1750
1751 /*
1752 * Delete all streams for a specific session ID.
1753 */
1754 static
1755 void viewer_del_streams(uint64_t session_id)
1756 {
1757 struct relay_viewer_stream *stream;
1758 struct lttng_ht_iter iter;
1759
1760 rcu_read_lock();
1761 cds_lfht_for_each_entry(viewer_streams_ht->ht, &iter.iter, stream,
1762 stream_n.node) {
1763 health_code_update();
1764
1765 if (stream->session_id != session_id) {
1766 continue;
1767 }
1768
1769 delete_viewer_stream(stream);
1770 assert(stream->ctf_trace);
1771
1772 if (stream->metadata_flag) {
1773 /*
1774 * The metadata viewer stream is destroyed once the refcount on the
1775 * ctf trace goes to 0 in the destroy stream function thus there is
1776 * no explicit call to that function here.
1777 */
1778 stream->ctf_trace->metadata_sent = 0;
1779 stream->ctf_trace->viewer_metadata_stream = NULL;
1780 } else {
1781 destroy_viewer_stream(stream);
1782 }
1783 }
1784 rcu_read_unlock();
1785 }
1786
1787 /*
1788 * Delete and free a connection.
1789 *
1790 * RCU read side lock MUST be acquired.
1791 */
1792 static
1793 void del_connection(struct lttng_ht *relay_connections_ht,
1794 struct lttng_ht_iter *iter, struct relay_command *relay_connection)
1795 {
1796 int ret;
1797
1798 assert(relay_connections_ht);
1799 assert(iter);
1800 assert(relay_connection);
1801
1802 DBG("Cleaning connection of session ID %" PRIu64,
1803 relay_connection->session_id);
1804
1805 ret = lttng_ht_del(relay_connections_ht, iter);
1806 assert(!ret);
1807
1808 viewer_del_streams(relay_connection->session_id);
1809
1810 call_rcu(&relay_connection->rcu_node, deferred_free_connection);
1811 }
1812
1813 /*
1814 * This thread does the actual work
1815 */
1816 static
1817 void *thread_worker(void *data)
1818 {
1819 int ret, err = -1;
1820 uint32_t nb_fd;
1821 struct relay_command *relay_connection;
1822 struct lttng_poll_event events;
1823 struct lttng_ht *relay_connections_ht;
1824 struct lttng_ht_node_ulong *node;
1825 struct lttng_ht_iter iter;
1826 struct lttng_viewer_cmd recv_hdr;
1827 struct relay_local_data *relay_ctx = (struct relay_local_data *) data;
1828 struct lttng_ht *sessions_ht = relay_ctx->sessions_ht;
1829
1830 DBG("[thread] Live viewer relay worker started");
1831
1832 rcu_register_thread();
1833
1834 health_register(health_relayd, HEALTH_RELAYD_TYPE_LIVE_WORKER);
1835
1836 /* table of connections indexed on socket */
1837 relay_connections_ht = lttng_ht_new(0, LTTNG_HT_TYPE_ULONG);
1838 if (!relay_connections_ht) {
1839 goto relay_connections_ht_error;
1840 }
1841
1842 ret = create_thread_poll_set(&events, 2);
1843 if (ret < 0) {
1844 goto error_poll_create;
1845 }
1846
1847 ret = lttng_poll_add(&events, live_relay_cmd_pipe[0], LPOLLIN | LPOLLRDHUP);
1848 if (ret < 0) {
1849 goto error;
1850 }
1851
1852 restart:
1853 while (1) {
1854 int i;
1855
1856 health_code_update();
1857
1858 /* Infinite blocking call, waiting for transmission */
1859 DBG3("Relayd live viewer worker thread polling...");
1860 health_poll_entry();
1861 ret = lttng_poll_wait(&events, -1);
1862 health_poll_exit();
1863 if (ret < 0) {
1864 /*
1865 * Restart interrupted system call.
1866 */
1867 if (errno == EINTR) {
1868 goto restart;
1869 }
1870 goto error;
1871 }
1872
1873 nb_fd = ret;
1874
1875 /*
1876 * Process control. The control connection is prioritised so we don't
1877 * starve it with high throughput tracing data on the data
1878 * connection.
1879 */
1880 for (i = 0; i < nb_fd; i++) {
1881 /* Fetch once the poll data */
1882 uint32_t revents = LTTNG_POLL_GETEV(&events, i);
1883 int pollfd = LTTNG_POLL_GETFD(&events, i);
1884
1885 health_code_update();
1886
1887 /* Thread quit pipe has been closed. Killing thread. */
1888 ret = check_thread_quit_pipe(pollfd, revents);
1889 if (ret) {
1890 err = 0;
1891 goto exit;
1892 }
1893
1894 /* Inspect the relay cmd pipe for new connection */
1895 if (pollfd == live_relay_cmd_pipe[0]) {
1896 if (revents & (LPOLLERR | LPOLLHUP | LPOLLRDHUP)) {
1897 ERR("Relay live pipe error");
1898 goto error;
1899 } else if (revents & LPOLLIN) {
1900 DBG("Relay live viewer command received");
1901 ret = add_connection(live_relay_cmd_pipe[0],
1902 &events, relay_connections_ht);
1903 if (ret < 0) {
1904 goto error;
1905 }
1906 }
1907 } else if (revents) {
1908 rcu_read_lock();
1909 lttng_ht_lookup(relay_connections_ht,
1910 (void *)((unsigned long) pollfd), &iter);
1911 node = lttng_ht_iter_get_node_ulong(&iter);
1912 if (node == NULL) {
1913 DBG2("Relay viewer sock %d not found", pollfd);
1914 rcu_read_unlock();
1915 goto error;
1916 }
1917 relay_connection = caa_container_of(node, struct relay_command,
1918 sock_n);
1919
1920 if (revents & (LPOLLERR)) {
1921 cleanup_poll_connection(&events, pollfd);
1922 del_connection(relay_connections_ht, &iter,
1923 relay_connection);
1924 } else if (revents & (LPOLLHUP | LPOLLRDHUP)) {
1925 DBG("Viewer socket %d hung up", pollfd);
1926 cleanup_poll_connection(&events, pollfd);
1927 del_connection(relay_connections_ht, &iter,
1928 relay_connection);
1929 } else if (revents & LPOLLIN) {
1930 ret = relay_connection->sock->ops->recvmsg(
1931 relay_connection->sock, &recv_hdr,
1932 sizeof(struct lttng_viewer_cmd),
1933 0);
1934 /* connection closed */
1935 if (ret <= 0) {
1936 cleanup_poll_connection(&events, pollfd);
1937 del_connection(relay_connections_ht, &iter,
1938 relay_connection);
1939 DBG("Viewer control connection closed with %d",
1940 pollfd);
1941 } else {
1942 if (relay_connection->session) {
1943 DBG2("Relay viewer worker receiving data for "
1944 "session: %" PRIu64,
1945 relay_connection->session->id);
1946 }
1947 ret = process_control(&recv_hdr, relay_connection,
1948 sessions_ht);
1949 if (ret < 0) {
1950 /* Clear the session on error. */
1951 cleanup_poll_connection(&events, pollfd);
1952 del_connection(relay_connections_ht, &iter,
1953 relay_connection);
1954 DBG("Viewer connection closed with %d", pollfd);
1955 }
1956 }
1957 }
1958 rcu_read_unlock();
1959 }
1960 }
1961 }
1962
1963 exit:
1964 error:
1965 lttng_poll_clean(&events);
1966
1967 /* empty the hash table and free the memory */
1968 rcu_read_lock();
1969 cds_lfht_for_each_entry(relay_connections_ht->ht, &iter.iter, node, node) {
1970 health_code_update();
1971
1972 node = lttng_ht_iter_get_node_ulong(&iter);
1973 if (!node) {
1974 continue;
1975 }
1976
1977 relay_connection = caa_container_of(node, struct relay_command,
1978 sock_n);
1979 del_connection(relay_connections_ht, &iter, relay_connection);
1980 }
1981 rcu_read_unlock();
1982 error_poll_create:
1983 lttng_ht_destroy(relay_connections_ht);
1984 relay_connections_ht_error:
1985 /* Close relay cmd pipes */
1986 utils_close_pipe(live_relay_cmd_pipe);
1987 if (err) {
1988 DBG("Viewer worker thread exited with error");
1989 }
1990 DBG("Viewer worker thread cleanup complete");
1991 if (err) {
1992 health_error();
1993 ERR("Health error occurred in %s", __func__);
1994 }
1995 health_unregister(health_relayd);
1996 stop_threads();
1997 rcu_unregister_thread();
1998 return NULL;
1999 }
2000
2001 /*
2002 * Create the relay command pipe to wake thread_manage_apps.
2003 * Closed in cleanup().
2004 */
2005 static int create_relay_cmd_pipe(void)
2006 {
2007 int ret;
2008
2009 ret = utils_create_pipe_cloexec(live_relay_cmd_pipe);
2010
2011 return ret;
2012 }
2013
2014 void live_stop_threads(void)
2015 {
2016 int ret;
2017 void *status;
2018
2019 stop_threads();
2020
2021 ret = pthread_join(live_listener_thread, &status);
2022 if (ret != 0) {
2023 PERROR("pthread_join live listener");
2024 goto error; /* join error, exit without cleanup */
2025 }
2026
2027 ret = pthread_join(live_worker_thread, &status);
2028 if (ret != 0) {
2029 PERROR("pthread_join live worker");
2030 goto error; /* join error, exit without cleanup */
2031 }
2032
2033 ret = pthread_join(live_dispatcher_thread, &status);
2034 if (ret != 0) {
2035 PERROR("pthread_join live dispatcher");
2036 goto error; /* join error, exit without cleanup */
2037 }
2038
2039 cleanup();
2040
2041 error:
2042 return;
2043 }
2044
2045 /*
2046 * main
2047 */
2048 int live_start_threads(struct lttng_uri *uri,
2049 struct relay_local_data *relay_ctx, int quit_pipe[2])
2050 {
2051 int ret = 0;
2052 void *status;
2053 int is_root;
2054
2055 assert(uri);
2056 live_uri = uri;
2057
2058 live_thread_quit_pipe[0] = quit_pipe[0];
2059 live_thread_quit_pipe[1] = quit_pipe[1];
2060
2061 /* Check if daemon is UID = 0 */
2062 is_root = !getuid();
2063
2064 if (!is_root) {
2065 if (live_uri->port < 1024) {
2066 ERR("Need to be root to use ports < 1024");
2067 ret = -1;
2068 goto exit;
2069 }
2070 }
2071
2072 /* Setup the thread apps communication pipe. */
2073 if ((ret = create_relay_cmd_pipe()) < 0) {
2074 goto exit;
2075 }
2076
2077 /* Init relay command queue. */
2078 cds_wfq_init(&viewer_cmd_queue.queue);
2079
2080 /* Set up max poll set size */
2081 lttng_poll_set_max_size();
2082
2083 /* Setup the dispatcher thread */
2084 ret = pthread_create(&live_dispatcher_thread, NULL,
2085 thread_dispatcher, (void *) NULL);
2086 if (ret != 0) {
2087 PERROR("pthread_create viewer dispatcher");
2088 goto exit_dispatcher;
2089 }
2090
2091 /* Setup the worker thread */
2092 ret = pthread_create(&live_worker_thread, NULL,
2093 thread_worker, relay_ctx);
2094 if (ret != 0) {
2095 PERROR("pthread_create viewer worker");
2096 goto exit_worker;
2097 }
2098
2099 /* Setup the listener thread */
2100 ret = pthread_create(&live_listener_thread, NULL,
2101 thread_listener, (void *) NULL);
2102 if (ret != 0) {
2103 PERROR("pthread_create viewer listener");
2104 goto exit_listener;
2105 }
2106
2107 ret = 0;
2108 goto end;
2109
2110 exit_listener:
2111 ret = pthread_join(live_listener_thread, &status);
2112 if (ret != 0) {
2113 PERROR("pthread_join live listener");
2114 goto error; /* join error, exit without cleanup */
2115 }
2116
2117 exit_worker:
2118 ret = pthread_join(live_worker_thread, &status);
2119 if (ret != 0) {
2120 PERROR("pthread_join live worker");
2121 goto error; /* join error, exit without cleanup */
2122 }
2123
2124 exit_dispatcher:
2125 ret = pthread_join(live_dispatcher_thread, &status);
2126 if (ret != 0) {
2127 PERROR("pthread_join live dispatcher");
2128 goto error; /* join error, exit without cleanup */
2129 }
2130
2131 exit:
2132 cleanup();
2133
2134 end:
2135 error:
2136 return ret;
2137 }
This page took 0.073259 seconds and 5 git commands to generate.