From: Jérémie Galarneau Date: Tue, 30 Oct 2018 12:47:52 +0000 (+0100) Subject: Fix: session destruction blocks indefinitely if rotation is ongoing X-Git-Tag: v2.11.0-rc2~190 X-Git-Url: https://git.lttng.org./?a=commitdiff_plain;h=e87d3c18a40fb3d3bee3dff29615636257824e29;p=lttng-tools.git Fix: session destruction blocks indefinitely if rotation is ongoing Issue --- The destruction of an active session can hang indefinitely if it occurs while a rotation is ongoing. This was observed when automatic session rotations were scheduled on a time basis. The destruction of the session causes it to be stopped. The 'stop' command causes the session's timers to be stopped. These timers include the rotation pending check timer. Meanwhile, 'data pending' queries are performed against the session until one of them returns that no data is pending. The 'data pending' check returns that data is pending if a session rotation is ongoing at the moment of the check. Hence, stopping the rotation completion check timer causes the session to remain in the 'session ongoing' state forever and prevents the session destruction from completing. Solution --- The session's rotation schedule timer is correctly stopped when a 'stop' is performed; we don't want new rotations to be issued from this point. However, it is incorrect to stop the 'rotation pending check' timer at this stage if a rotation is ongoing. This commit leaves the 'rotation pending check' timer running, allowing the rotation thread to update the session's rotation state on completion of the rotation. The operations that were performed as part of the stop command, namely renaming the 'current' chunk, are then performed from the context of the rotation thread. Signed-off-by: Jérémie Galarneau --- diff --git a/src/bin/lttng-sessiond/cmd.c b/src/bin/lttng-sessiond/cmd.c index b1bcc10a5..545269525 100644 --- a/src/bin/lttng-sessiond/cmd.c +++ b/src/bin/lttng-sessiond/cmd.c @@ -2716,48 +2716,6 @@ error: return ret; } -static -int rename_active_chunk(struct ltt_session *session) -{ - int ret; - - session->current_archive_id++; - - /* - * The currently active tracing path is now the folder we - * want to rename. - */ - ret = lttng_strncpy(session->rotation_chunk.current_rotate_path, - session->rotation_chunk.active_tracing_path, - sizeof(session->rotation_chunk.current_rotate_path)); - if (ret) { - ERR("Failed to copy active tracing path"); - goto end; - } - - ret = rename_completed_chunk(session, time(NULL)); - if (ret < 0) { - ERR("Failed to rename current rotation's path"); - goto end; - } - - /* - * We just renamed, the folder, we didn't do an actual rotation, so - * the active tracing path is now the renamed folder and we have to - * restore the rotate count. - */ - ret = lttng_strncpy(session->rotation_chunk.active_tracing_path, - session->rotation_chunk.current_rotate_path, - sizeof(session->rotation_chunk.active_tracing_path)); - if (ret) { - ERR("Failed to rename active session chunk tracing path"); - goto end; - } -end: - session->current_archive_id--; - return ret; -} - /* * Command LTTNG_STOP_TRACE processed by the client thread. */ @@ -2782,13 +2740,6 @@ int cmd_stop_trace(struct ltt_session *session) goto error; } - if (session->rotation_pending_check_timer_enabled) { - if (timer_session_rotation_pending_check_stop(session)) { - ERR("Failed to stop the \"rotation pending check\" timer of session %s", - session->name); - } - } - if (session->rotation_schedule_timer_enabled) { if (timer_session_rotation_schedule_timer_stop( session)) { @@ -2797,6 +2748,12 @@ int cmd_stop_trace(struct ltt_session *session) } } + /* + * A rotation is still ongoing. The check timer will continue to wait + * for the rotation to complete. When the rotation finally completes, + * a check will be performed to rename the "active" chunk to the + * expected "timestamp_begin-timestamp_end" format. + */ if (session->current_archive_id > 0 && session->rotation_state != LTTNG_ROTATION_STATE_ONGOING) { ret = rename_active_chunk(session); diff --git a/src/bin/lttng-sessiond/rotate.c b/src/bin/lttng-sessiond/rotate.c index 17d3c51fd..7abfaed64 100644 --- a/src/bin/lttng-sessiond/rotate.c +++ b/src/bin/lttng-sessiond/rotate.c @@ -320,6 +320,47 @@ end: return ret; } +int rename_active_chunk(struct ltt_session *session) +{ + int ret; + + session->current_archive_id++; + + /* + * The currently active tracing path is now the folder we + * want to rename. + */ + ret = lttng_strncpy(session->rotation_chunk.current_rotate_path, + session->rotation_chunk.active_tracing_path, + sizeof(session->rotation_chunk.current_rotate_path)); + if (ret) { + ERR("Failed to copy active tracing path"); + goto end; + } + + ret = rename_completed_chunk(session, time(NULL)); + if (ret < 0) { + ERR("Failed to rename current rotation's path"); + goto end; + } + + /* + * We just renamed, the folder, we didn't do an actual rotation, so + * the active tracing path is now the renamed folder and we have to + * restore the rotate count. + */ + ret = lttng_strncpy(session->rotation_chunk.active_tracing_path, + session->rotation_chunk.current_rotate_path, + sizeof(session->rotation_chunk.active_tracing_path)); + if (ret) { + ERR("Failed to rename active session chunk tracing path"); + goto end; + } +end: + session->current_archive_id--; + return ret; +} + int subscribe_session_consumed_size_rotation(struct ltt_session *session, uint64_t size, struct notification_thread_handle *notification_thread_handle) { diff --git a/src/bin/lttng-sessiond/rotate.h b/src/bin/lttng-sessiond/rotate.h index 9fa58a709..281213080 100644 --- a/src/bin/lttng-sessiond/rotate.h +++ b/src/bin/lttng-sessiond/rotate.h @@ -22,6 +22,7 @@ #include "rotation-thread.h" #include +int rename_active_chunk(struct ltt_session *session); int rename_completed_chunk(struct ltt_session *session, time_t ts); /* diff --git a/src/bin/lttng-sessiond/rotation-thread.c b/src/bin/lttng-sessiond/rotation-thread.c index c0d1cf146..59b712357 100644 --- a/src/bin/lttng-sessiond/rotation-thread.c +++ b/src/bin/lttng-sessiond/rotation-thread.c @@ -598,6 +598,44 @@ int check_session_rotation_pending(struct ltt_session *session, session->name); } + if (!session->active) { + /* + * A stop command was issued during the rotation, it is + * up to the rotation completion check to perform the + * renaming of the last chunk that was produced. + */ + ret = notification_thread_command_session_rotation_ongoing( + notification_thread_handle, + session->name, + session->uid, + session->gid, + session->current_archive_id); + if (ret != LTTNG_OK) { + ERR("[rotation-thread] Failed to notify notification thread of completed rotation for session %s", + session->name); + } + + ret = rename_active_chunk(session); + if (ret < 0) { + ERR("[rotation-thread] Failed to rename active rotation chunk"); + goto end; + } + + /* Ownership of location is transferred. */ + location = session_get_trace_archive_location(session); + ret = notification_thread_command_session_rotation_completed( + notification_thread_handle, + session->name, + session->uid, + session->gid, + session->current_archive_id, + location); + if (ret != LTTNG_OK) { + ERR("[rotation-thread] Failed to notify notification thread of completed rotation for session %s", + session->name); + } + } + ret = 0; end: if (session->rotation_state == LTTNG_ROTATION_STATE_ONGOING) {