Fix: sessiond: session destruction errors are unreported
authorJérémie Galarneau <jeremie.galarneau@efficios.com>
Wed, 16 Oct 2019 22:22:32 +0000 (18:22 -0400)
committerJérémie Galarneau <jeremie.galarneau@efficios.com>
Wed, 16 Oct 2019 22:22:32 +0000 (18:22 -0400)
The session daemon does not report errors which occur while setting-up
a session's destruction. For instance, if the implicit rotation or
rotation to the "null" chunk fails. While the session will be
destroyed (it will no longer appear in session listings), the session
daemon could have failed to destroy it properly and it could be
corrupted/unreadable.

This reports those errors so the user does not expect the session to
be readable (but it _could_ be).

This was discovered while investigating another, unrelated, issue.

Signed-off-by: Jérémie Galarneau <jeremie.galarneau@efficios.com>
include/lttng/lttng-error.h
src/bin/lttng-sessiond/cmd.c
src/bin/lttng/commands/destroy.c
src/common/error.c

index e1c6ab0a8de61d4836a8b95e1fd82a3d85b9debc..48e7656cbfbd78c2b5f607a5f23c9dea33f1aeca 100644 (file)
@@ -174,6 +174,7 @@ enum lttng_error_code {
        LTTNG_ERR_TRACE_CHUNK_EXISTS_FAIL_CONSUMER     = 151, /* failed to query consumer for trace chunk existence */
        LTTNG_ERR_INVALID_PROTOCOL                     = 152, /* a protocol error occurred */
        LTTNG_ERR_FILE_CREATION_ERROR                  = 153, /* failed to create a file */
+       LTTNG_ERR_TIMER_STOP_ERROR                     = 154, /* failed to stop timer. */
 
        /* MUST be last element */
        LTTNG_ERR_NR,                           /* Last element */
index c2231f1b07f128de561fd1849ed845c00fc91483..a17cc678ecab2363a95166b82f1c794a15c8ff96 100644 (file)
 struct cmd_destroy_session_reply_context {
        int reply_sock_fd;
        bool implicit_rotation_on_destroy;
+       /*
+        * Indicates whether or not an error occurred while launching the
+        * destruction of a session.
+        */
+       enum lttng_error_code destruction_status;
 };
 
 static enum lttng_error_code wait_on_path(void *path);
@@ -3070,7 +3075,7 @@ void cmd_destroy_session_reply(const struct ltt_session *session,
        struct lttng_trace_archive_location *location = NULL;
        struct lttcomm_lttng_msg llm = {
                .cmd_type = LTTNG_DESTROY_SESSION,
-               .ret_code = LTTNG_OK,
+               .ret_code = reply_context->destruction_status,
                .pid = UINT32_MAX,
                .cmd_header_size =
                        sizeof(struct lttcomm_session_destroy_command_header),
@@ -3152,6 +3157,7 @@ int cmd_destroy_session(struct ltt_session *session,
                int *sock_fd)
 {
        int ret;
+       enum lttng_error_code destruction_last_error = LTTNG_OK;
        struct cmd_destroy_session_reply_context *reply_context = NULL;
 
        if (sock_fd) {
@@ -3176,6 +3182,7 @@ int cmd_destroy_session(struct ltt_session *session,
                        /* Carry on with the destruction of the session. */
                        ERR("Failed to stop session \"%s\" as part of its destruction: %s",
                                        session->name, lttng_strerror(-ret));
+                       destruction_last_error = ret;
                }
        }
 
@@ -3184,6 +3191,7 @@ int cmd_destroy_session(struct ltt_session *session,
                                session)) {
                        ERR("Failed to stop the \"rotation schedule\" timer of session %s",
                                        session->name);
+                       destruction_last_error = LTTNG_ERR_TIMER_STOP_ERROR;
                }
        }
 
@@ -3203,6 +3211,7 @@ int cmd_destroy_session(struct ltt_session *session,
                if (ret != LTTNG_OK) {
                        ERR("Failed to perform an implicit rotation as part of the destruction of session \"%s\": %s",
                                        session->name, lttng_strerror(-ret));
+                       destruction_last_error = -ret;
                }
                 if (reply_context) {
                        reply_context->implicit_rotation_on_destroy = true;
@@ -3221,6 +3230,7 @@ int cmd_destroy_session(struct ltt_session *session,
                if (ret != LTTNG_OK) {
                        ERR("Failed to perform a quiet rotation as part of the destruction of session \"%s\": %s",
                                        session->name, lttng_strerror(-ret));
+                       destruction_last_error = -ret;
                }
        }
 
@@ -3285,6 +3295,7 @@ int cmd_destroy_session(struct ltt_session *session,
         */
        session_destroy(session);
        if (reply_context) {
+               reply_context->destruction_status = destruction_last_error;
                ret = session_add_destroy_notifier(session,
                                cmd_destroy_session_reply,
                                (void *) reply_context);
index cac1e96fd8d1f9655f67adf084833880673027a8..be0261d11b760b67561b52d324334c35f39162e7 100644 (file)
@@ -155,7 +155,7 @@ static int destroy_session(struct lttng_session *session)
                goto error;
        }
        if (ret_code != LTTNG_OK) {
-               ret = -LTTNG_OK;
+               ret = -ret_code;
                goto error;
        }
 
@@ -228,23 +228,27 @@ error:
  */
 static int destroy_all_sessions(struct lttng_session *sessions, int count)
 {
-       int i, ret = CMD_SUCCESS;
+       int i;
+       bool error_occurred = false;
 
+       assert(count >= 0);
        if (count == 0) {
                MSG("No session found, nothing to do.");
-       } else if (count < 0) {
-               ERR("%s", lttng_strerror(ret));
-               goto error;
        }
 
        for (i = 0; i < count; i++) {
-               ret = destroy_session(&sessions[i]);
+               int ret = destroy_session(&sessions[i]);
+
                if (ret < 0) {
-                       goto error;
+                       ERR("%s during the destruction of session \"%s\"",
+                                       lttng_strerror(ret),
+                                       sessions[i].name);
+                       /* Continue to next session. */
+                       error_occurred = true;
                }
        }
-error:
-       return ret;
+
+       return error_occurred ? CMD_ERROR : CMD_SUCCESS;
 }
 
 /*
@@ -350,8 +354,10 @@ int cmd_destroy(int argc, const char **argv)
                                command_ret = destroy_session(&sessions[i]);
                                if (command_ret) {
                                        success = 0;
+                                       ERR("%s during the destruction of session \"%s\"",
+                                                       lttng_strerror(command_ret),
+                                                       sessions[i].name);
                                }
-
                        }
                }
 
index f79827ab277c279bc6caa7ac2b59a03dfbef1416..2e4993fef35e0f32b86fc828609dddfd06a94148 100644 (file)
@@ -219,6 +219,7 @@ static const char *error_string_array[] = {
        [ ERROR_INDEX(LTTNG_ERR_TRACE_CHUNK_EXISTS_FAIL_CONSUMER) ] = "Failed to query consumer for trace chunk existence",
        [ ERROR_INDEX(LTTNG_ERR_INVALID_PROTOCOL) ] = "Protocol error occurred",
        [ ERROR_INDEX(LTTNG_ERR_FILE_CREATION_ERROR) ] = "Failed to create file",
+       [ ERROR_INDEX(LTTNG_ERR_TIMER_STOP_ERROR) ] = "Failed to stop a timer",
 
        /* Last element */
        [ ERROR_INDEX(LTTNG_ERR_NR) ] = "Unknown error code"
This page took 0.032563 seconds and 4 git commands to generate.