Fix: relayd: live client fails on clear of multi-domain session
authorJérémie Galarneau <jeremie.galarneau@efficios.com>
Wed, 2 Mar 2022 17:37:39 +0000 (12:37 -0500)
committerJérémie Galarneau <jeremie.galarneau@efficios.com>
Thu, 3 Mar 2022 21:49:05 +0000 (16:49 -0500)
Observed issue
==============

Two test cases of the clear/test_ust test suite occasionally fail in the
integration jobs testing cross-bitness (32/64) LTTng deployments.

Babeltrace fails with the following error when a clear occurs while a
live client consumes a trace:

  02-28 16:55:03.262 32362 32362 E PLUGIN/SRC.CTF.LTTNG-LIVE/VIEWER lttng_live_recv@viewer-connection.c:198 [lttng-live] Remote side has closed connection
  02-28 16:55:03.262 32362 32362 E PLUGIN/SRC.CTF.LTTNG-LIVE/VIEWER lttng_live_session_get_new_streams@viewer-connection.c:1706 [lttng-live] Error receiving get new streams reply
  02-28 16:55:03.262 32362 32362 E PLUGIN/SRC.CTF.LTTNG-LIVE lttng_live_msg_iter_next@lttng-live.c:1665 [lttng-live] Error preparing the next batch of messages: live-iter-status=LTTNG_LIVE_ITERATOR_STATUS_ERROR
  02-28 16:55:03.262 32362 32362 W LIB/MSG-ITER bt_message_iterator_next@iterator.c:864 Component input port message iterator's "next" method failed: iter-addr=0x55eab7eb1170, iter-upstream-comp-name="lttng-live", iter-upstream-comp-log-level=WARNING, iter-upstream-comp-class-type=SOURCE, iter-upstream-comp-class-name="lttng-live", iter-upstream-comp-class-partial-descr="Connect to an LTTng relay daemon", iter-upstream-port-type=OUTPUT, iter-upstream-port-name="out", status=ERROR
  02-28 16:55:03.262 32362 32362 E PLUGIN/FLT.UTILS.MUXER muxer_upstream_msg_iter_next@muxer.c:454 [muxer] Upstream iterator's next method returned an error: status=ERROR
  02-28 16:55:03.262 32362 32362 E PLUGIN/FLT.UTILS.MUXER validate_muxer_upstream_msg_iters@muxer.c:991 [muxer] Cannot validate muxer's upstream message iterator wrapper: muxer-msg-iter-addr=0x55eab7eb1120, muxer-upstream-msg-iter-wrap-addr=0x55eab7eb3a70
  02-28 16:55:03.262 32362 32362 E PLUGIN/FLT.UTILS.MUXER muxer_msg_iter_next@muxer.c:1415 [muxer] Cannot get next message: comp-addr=0x55eab7eb0470, muxer-comp-addr=0x55eab7eb0510, muxer-msg-iter-addr=0x55eab7eb1120, msg-iter-addr=0x55eab7eb0fb0, status=ERROR
  02-28 16:55:03.262 32362 32362 W LIB/MSG-ITER bt_message_iterator_next@iterator.c:864 Component input port message iterator's "next" method failed: iter-addr=0x55eab7eb0fb0, iter-upstream-comp-name="muxer", iter-upstream-comp-log-level=WARNING, iter-upstream-comp-class-type=FILTER, iter-upstream-comp-class-name="muxer", iter-upstream-comp-class-partial-descr="Sort messages from multiple inpu", iter-upstream-port-type=OUTPUT, iter-upstream-port-name="out", status=ERROR
  02-28 16:55:03.262 32362 32362 W LIB/GRAPH consume_graph_sink@graph.c:473 Component's "consume" method failed: status=ERROR, comp-addr=0x55eab7eb0760, comp-name="pretty", comp-log-level=WARNING, comp-class-type=SINK, comp-class-name="pretty", comp-class-partial-descr="Pretty-print messages (`text` fo", comp-class-is-frozen=1, comp-class-so-handle-addr=0x55eab7ebd910, comp-class-so-handle-path="/root/workspace/joraj_integration_base_job/deps-64/build/lib/babeltrace2/plugins/babeltrace-plugin-text.so", comp-input-port-count=1, comp-output-port-count=0
  02-28 16:55:03.262 32362 32362 E CLI cmd_run@babeltrace2.c:2548 Graph failed to complete successfully

  ERROR:    [Babeltrace CLI] (babeltrace2.c:2548)
    Graph failed to complete successfully
  CAUSED BY [libbabeltrace2] (graph.c:473)
    Component's "consume" method failed: status=ERROR, comp-addr=0x55eab7eb0760,
    comp-name="pretty", comp-log-level=WARNING, comp-class-type=SINK,
    comp-class-name="pretty", comp-class-partial-descr="Pretty-print messages
    (`text` fo", comp-class-is-frozen=1, comp-class-so-handle-addr=0x55eab7ebd910,
    comp-class-so-handle-path="/root/workspace/joraj_integration_base_job/deps-64/build/lib/babeltrace2/plugins/babeltrace-plugin-text.so",
    comp-input-port-count=1, comp-output-port-count=0
  CAUSED BY [libbabeltrace2] (iterator.c:864)
    Component input port message iterator's "next" method failed:
    iter-addr=0x55eab7eb0fb0, iter-upstream-comp-name="muxer",
    iter-upstream-comp-log-level=WARNING, iter-upstream-comp-class-type=FILTER,
    iter-upstream-comp-class-name="muxer",
    iter-upstream-comp-class-partial-descr="Sort messages from multiple inpu",
    iter-upstream-port-type=OUTPUT, iter-upstream-port-name="out", status=ERROR
  CAUSED BY [muxer: 'filter.utils.muxer'] (muxer.c:991)
    Cannot validate muxer's upstream message iterator wrapper:
    muxer-msg-iter-addr=0x55eab7eb1120,
    muxer-upstream-msg-iter-wrap-addr=0x55eab7eb3a70
  CAUSED BY [muxer: 'filter.utils.muxer'] (muxer.c:454)
    Upstream iterator's next method returned an error: status=ERROR
  CAUSED BY [libbabeltrace2] (iterator.c:864)
    Component input port message iterator's "next" method failed:
    iter-addr=0x55eab7eb1170, iter-upstream-comp-name="lttng-live",
    iter-upstream-comp-log-level=WARNING, iter-upstream-comp-class-type=SOURCE,
    iter-upstream-comp-class-name="lttng-live",
    iter-upstream-comp-class-partial-descr="Connect to an LTTng relay daemon",
    iter-upstream-port-type=OUTPUT, iter-upstream-port-name="out", status=ERROR
  CAUSED BY [lttng-live: 'source.ctf.lttng-live'] (lttng-live.c:1665)
    Error preparing the next batch of messages:
    live-iter-status=LTTNG_LIVE_ITERATOR_STATUS_ERROR
  CAUSED BY [lttng-live: 'source.ctf.lttng-live'] (viewer-connection.c:1706)
    Error receiving get new streams reply
  CAUSED BY [lttng-live: 'source.ctf.lttng-live'] (viewer-connection.c:198)
    Remote side has closed connection

Looking at the relay daemon logs, we see the following error:
  DBG1 - 16:55:03.262106718 [32139/32146]: Adding new file "ust/pid/gen-ust-events-32373-20220228-165503/chan_0" to trace chunk "(unnamed)" (in lttng_trace_chunk_add_file() at trace-chunk.cpp:1310)
  PERROR - 16:55:03.262133333 [32139/32146]: Failed to open fs handle to ust/pid/gen-ust-events-32373-20220228-165503/chan_0, open() returned: No such file or directory (in fd_tracker_open_fs_handle() at fd-tracker/fd-tracker.cpp:548)

Cause
=====

Adding more debugging logging allows us to see the following situation
takes place:

- relay thread: Create trace chunk on session 1.
- live thread: get new streams against session 1, returns NO_NEW_STREAMS
  since the session has an 'ongoing_rotation'.
- live thread: get new streams against session 2, sees no rotation
  ongoing and attempts to open `chan_0` when creating a viewer stream

The "ongoing rotation" check was introduced in a7ceb342d and, in a
nutshell, prevents live viewers from creating new viewer streams during
a rotation.

The "ongoing rotation" state is entered when a CREATE_NEW_TRACE_CHUNK
command is issued against a session.

However, this presumes that a relay_session maps 1:1 to a session on the
session daemon's end. This isn't the case as, in multi-domain
scenarios (tracing 32-bit, 64-bit, and kernel events), a single session
daemon session can map to multiple relay_session objects. This is
because the consumer daemons maintain independant connections to the
relay daemon.

To synchronize rotations accross related relay_session instances, the
relay daemon uses the same trace chunk instances accross relay_session
instances. This means that while a trace chunk is created against a
specific relay session, it can be used by other relay_session instances.

To manage shared trace chunks between relay_sessions, the relay daemon
makes use of the trace_chunk_registry. This registry allows
relay_sessions to share trace chunk instances using a unique key tuple:
  - session daemon instance uuid,
  - session daemon session id,
  - trace chunk id.

There is no equivalent mechanism to track the "ongoing_rotation" state
accross relay_sessions originating from the same sessiond session.

In the current scenario, this causes the live client to correctly see
that no new streams are available for session 1 (say, the 32-bit user
space session). Unfortunately, this state is not entered for other
sessions (64-bit and kernel relay sessions). Hence, the viewer succeds
in acquiring new streams from session 2, exposing the race the 'ongoing
rotation' state aims to protect against.

Solution
========

Like the trace chunk instances, the "ongoing rotation" state must be
shared accross relay sessions that originate from the same session
daemon session.

To "emulate" this shared state, session_has_ongoing_rotation() checks
if any relay session originating from the same sessiond session
have an ongoing rotation. If it is the case, we temporarily prevent
live viewers from acquiring new streams.

Known drawbacks
===============

session_has_ongoing_rotation() iterates over all sessions, acquiring
their lock in the process, which is certainly undesirable from a
performance standpoint.

Optimizing this is not a great challenge, but is beyond the scope
of this immediate fix.

Signed-off-by: Jérémie Galarneau <jeremie.galarneau@efficios.com>
Change-Id: I457a32fa497d151ca933c25433c80665268a7c1c

src/bin/lttng-relayd/live.cpp
src/bin/lttng-relayd/session.cpp
src/bin/lttng-relayd/session.h

index 70103302cf36f5e49309917b283da94db41b9d2b..5925642f09837ce7a421c095a3992abc1fc82424 100644 (file)
@@ -464,7 +464,7 @@ static int make_viewer_streams(struct relay_session *relay_session,
                                 * chunk can be used safely.
                                 */
                                if ((relay_stream->ongoing_rotation.is_set ||
-                                                   relay_session->ongoing_rotation) &&
+                                               session_has_ongoing_rotation(relay_session)) &&
                                                relay_stream->trace_chunk) {
                                        viewer_stream_trace_chunk = lttng_trace_chunk_copy(
                                                        relay_stream->trace_chunk);
@@ -1256,7 +1256,7 @@ int viewer_get_new_streams(struct relay_connection *conn)
         * stream, because the chunk can be in an intermediate state
         * due to directory renaming.
         */
-       if (session->ongoing_rotation) {
+       if (session_has_ongoing_rotation(session)) {
                DBG("Relay session %" PRIu64 " rotation ongoing", session_id);
                response.status = htobe32(LTTNG_VIEWER_NEW_STREAMS_NO_NEW);
                goto send_reply_unlock;
@@ -1416,7 +1416,7 @@ int viewer_attach_session(struct relay_connection *conn)
         * stream, because the chunk can be in an intermediate state
         * due to directory renaming.
         */
-       if (session->ongoing_rotation) {
+       if (session_has_ongoing_rotation(session)) {
                DBG("Relay session %" PRIu64 " rotation ongoing", session_id);
                send_streams = 0;
                goto send_reply;
@@ -1788,7 +1788,7 @@ int viewer_get_next_index(struct relay_connection *conn)
                goto send_reply;
        }
 
-       if (rstream->trace->session->ongoing_rotation) {
+       if (session_has_ongoing_rotation(rstream->trace->session)) {
                /* Rotation is ongoing, try again later. */
                viewer_index.status = LTTNG_VIEWER_INDEX_RETRY;
                DBG("Client requested index for stream id %" PRIu64" while a session rotation is ongoing, returning status=%s",
index 5fa39663a5a63fc80fbc024509278d88aea988f0..ca8e183e64bbb2ca5118b52217c6228aca522271 100644 (file)
@@ -474,6 +474,91 @@ end:
        return session;
 }
 
+/*
+ * Check if any of the relay sessions originating from the same
+ * session daemon session have the 'ongoing_rotation' state set.
+ *
+ * The caller must hold the lock of session.
+ */
+bool session_has_ongoing_rotation(const struct relay_session *session)
+{
+       bool ongoing_rotation = false;
+       struct lttng_ht_iter iter;
+       struct relay_session *iterated_session;
+
+       ASSERT_LOCKED(session->lock);
+
+       if (!session->id_sessiond.is_set) {
+               /*
+                * The peer that created this session is too old to
+                * support rotations; we can assume no rotations are ongoing.
+                */
+               goto end;
+       }
+
+       if (session->ongoing_rotation) {
+               ongoing_rotation = true;
+               goto end;
+       }
+
+       rcu_read_lock();
+       /*
+        * Sample the 'ongoing_rotation' status of all relay sessions that
+        * originate from the same session daemon session.
+        */
+       cds_lfht_for_each_entry(sessions_ht->ht, &iter.iter, iterated_session,
+                       session_n.node) {
+               if (!session_get(iterated_session)) {
+                       continue;
+               }
+
+               if (session == iterated_session) {
+                       /* Skip this session. */
+                       goto next_session_no_unlock;
+               }
+
+               pthread_mutex_lock(&iterated_session->lock);
+
+               if (!iterated_session->id_sessiond.is_set) {
+                       /*
+                        * Session belongs to a peer that doesn't support
+                        * rotations.
+                        */
+                       goto next_session;
+               }
+
+               if (!lttng_uuid_is_equal(session->sessiond_uuid,
+                               iterated_session->sessiond_uuid)) {
+                       /* Sessions do not originate from the same sessiond. */
+                       goto next_session;
+               }
+
+               if (LTTNG_OPTIONAL_GET(session->id_sessiond) !=
+                               LTTNG_OPTIONAL_GET(iterated_session->id_sessiond)) {
+                       /*
+                        * Sessions do not originate from the same sessiond
+                        * session.
+                        */
+                       goto next_session;
+               }
+
+               ongoing_rotation = iterated_session->ongoing_rotation;
+
+next_session:
+               pthread_mutex_unlock(&iterated_session->lock);
+next_session_no_unlock:
+               session_put(iterated_session);
+
+               if (ongoing_rotation) {
+                       break;
+               }
+       }
+       rcu_read_unlock();
+
+end:
+       return ongoing_rotation;
+}
+
 static void rcu_destroy_session(struct rcu_head *rcu_head)
 {
        struct relay_session *session =
index 3a9b92436f3aa44601d8b65ea0b050fe06b5bce3..8029dcd8e96f0865083094d2b1c84a0ccb4ae6aa 100644 (file)
@@ -73,7 +73,7 @@ struct relay_session {
 
        struct urcu_ref ref;
 
-       pthread_mutex_t lock;
+       mutable pthread_mutex_t lock;
 
        /* major/minor version used for this session. */
        uint32_t major;
@@ -156,6 +156,8 @@ void session_put(struct relay_session *session);
 int session_close(struct relay_session *session);
 int session_abort(struct relay_session *session);
 
+bool session_has_ongoing_rotation(const struct relay_session *session);
+
 void print_sessions(void);
 
 #endif /* _SESSION_H */
This page took 0.032546 seconds and 4 git commands to generate.