From: Jonathan Rajotte Date: Fri, 22 May 2020 14:36:46 +0000 (-0400) Subject: Fix: tests: `pgrep -f` flags unrelated process as lttng-sessiond X-Git-Tag: v2.13.0-rc1~505 X-Git-Url: https://git.lttng.org./?a=commitdiff_plain;h=7cb78e2f73ef7bc0cfedef707f47f1c229bb4c43;p=lttng-tools.git Fix: tests: `pgrep -f` flags unrelated process as lttng-sessiond Observed issue ============== lttng-sessiond is not started by start_lttng_sessiond_opt and a vim process is killed on stop_lttng_sessiond_opt. Cause ===== We uses "pgrep -f" with the interested pattern to gather the pids that should be the lttng processes we are interested in. `pgrep -f` yields false positives since it matches against the complete cmdline including parent directory of the command and all arguments. For example, the following will currently match for the sessiond pattern: vim src/bin/lttng-sessiond/notification-thread-internal.h This prevents the launch of sessiond by start_lttng_sessiond_opt and end up killing the vim process on stop_lttng_sessiond_opt. Solution ======== To alleviate this, we propose a two stage lookup. The first stage uses "pgrep -f" yielding potential candidates. The second stage performs grep on the basename of the first field of the /proc/[pid]/cmdline for each pid candidates. The first field of /proc/[pid]/cmdline corresponds to the actual command. We use the basename to ensure that we do not match on the path to the executable. Known drawbacks ========= None References ========== https://review.lttng.org/c/lttng-tools/+/3043 Signed-off-by: Jonathan Rajotte Signed-off-by: Jérémie Galarneau Change-Id: I479ebad27f4965ae16d4442a6fe58ff3157d76fa --- diff --git a/tests/regression/tools/working-directory/test_relayd_working_directory b/tests/regression/tools/working-directory/test_relayd_working_directory index 0a5bad027..24b87a9cc 100755 --- a/tests/regression/tools/working-directory/test_relayd_working_directory +++ b/tests/regression/tools/working-directory/test_relayd_working_directory @@ -67,7 +67,7 @@ function test_relayd_daemon() start_lttng_relayd_opt 1 "-d" "--working-directory $working_dir" - pid=$(pgrep "$RELAYD_MATCH") + pid=$(lttng_pgrep "$RELAYD_MATCH") ok $? "Found lttng-relayd" cwd=$(readlink "/proc/${pid}/cwd") @@ -88,7 +88,7 @@ function test_relayd_daemon_no_working_dir() start_lttng_relayd_opt 1 "-d" "" - pid=$(pgrep "$RELAYD_MATCH") + pid=$(lttng_pgrep "$RELAYD_MATCH") ok $? "Found lttng-relayd" cwd=$(readlink "/proc/${pid}/cwd") @@ -111,7 +111,7 @@ function test_relayd_background() start_lttng_relayd_opt 1 "-b" "--working-directory $working_dir" - pid=$(pgrep "$RELAYD_MATCH") + pid=$(lttng_pgrep "$RELAYD_MATCH") ok $? "Found lttng-relayd" cwd=$(readlink "/proc/${pid}/cwd") @@ -132,7 +132,7 @@ function test_relayd_background_no_working_dir() start_lttng_relayd_opt 1 "-b" "" - pid=$(pgrep "$RELAYD_MATCH") + pid=$(lttng_pgrep "$RELAYD_MATCH") ok $? "Found lttng-relayd" cwd=$(readlink "/proc/${pid}/cwd") @@ -172,7 +172,7 @@ function test_relayd_debug_permission() ERROR_OUTPUT_DEST=$(mktemp) start_lttng_relayd_opt 1 "-b" "-v --working-dir $working_dir" - pid=$(pgrep "$RELAYD_MATCH") + pid=$(lttng_pgrep "$RELAYD_MATCH") ok $? "Found lttng-relayd" cwd=$(readlink "/proc/${pid}/cwd") @@ -208,7 +208,7 @@ function test_relayd_failure() test $? -eq "1" ok $? "Expect failure to start lttng-relayd for non-existent working directory" - pid=$(pgrep "$RELAYD_MATCH") + pid=$(lttng_pgrep "$RELAYD_MATCH") if [ -z "$pid" ]; then pass "No lttng-relayd present" else @@ -237,7 +237,7 @@ function test_relayd_env() export LTTNG_RELAYD_WORKING_DIRECTORY=${working_dir} start_lttng_relayd_opt 1 "-b" "" - pid=$(pgrep "$RELAYD_MATCH") + pid=$(lttng_pgrep "$RELAYD_MATCH") ok $? "Found lttng-relayd" cwd=$(readlink "/proc/$pid/cwd") @@ -264,7 +264,7 @@ function test_relayd_cmdline_overwrite_env() export LTTNG_RELAYD_WORKING_DIRECTORY=${working_dir_env} start_lttng_relayd_opt 1 "-b" "--working-dir ${working_dir_cmdline}" - pid=$(pgrep "$RELAYD_MATCH") + pid=$(lttng_pgrep "$RELAYD_MATCH") ok $? "Found lttng-relayd" cwd=$(readlink "/proc/$pid/cwd") diff --git a/tests/stress/test_multi_sessions_per_uid_10app b/tests/stress/test_multi_sessions_per_uid_10app index 4d4a78241..4aaf1d91a 100755 --- a/tests/stress/test_multi_sessions_per_uid_10app +++ b/tests/stress/test_multi_sessions_per_uid_10app @@ -36,7 +36,7 @@ function enable_channel_per_uid() function check_sessiond() { - if [ -z "$(pgrep --full lt-lttng-sessiond)" ]; then + if [ -z "$(lttng_pgrep lt-lttng-sessiond)" ]; then local str_date=$(date +%H%M%S-%d%m%Y) diag "!!!The session daemon died unexpectedly!!!" @@ -56,7 +56,7 @@ function start_sessiond() BAIL_OUT "*** Kernel too old for session daemon tests ***" fi - if [ -z $(pgrep --full lt-$SESSIOND_BIN) ]; then + if [ -z $(lttng_pgrep lt-$SESSIOND_BIN) ]; then # We have to start it like this so the ulimit -c is used by this # process. Also, we collect any error message printed out. $TESTDIR/../src/bin/lttng-sessiond/$SESSIOND_BIN --quiet --background --consumerd32-path="$TESTDIR/../src/bin/lttng-consumerd/lttng-consumerd" --consumerd64-path="$TESTDIR/../src/bin/lttng-consumerd/lttng-consumerd" >$LOG_FILE 2>&1 diff --git a/tests/stress/test_multi_sessions_per_uid_5app_streaming b/tests/stress/test_multi_sessions_per_uid_5app_streaming index f8a0a4755..a0f1898b6 100755 --- a/tests/stress/test_multi_sessions_per_uid_5app_streaming +++ b/tests/stress/test_multi_sessions_per_uid_5app_streaming @@ -37,7 +37,7 @@ function enable_channel_per_uid() function check_sessiond() { - if [ -z "$(pgrep --full lt-lttng-sessiond)" ]; then + if [ -z "$(lttng_pgrep lt-lttng-sessiond)" ]; then local str_date=$(date +%H%M%S-%d%m%Y) diag "!!!The session daemon died unexpectedly!!!" @@ -51,7 +51,7 @@ function check_sessiond() function check_relayd() { - if [ -z "$(pgrep --full lt-lttng-relayd)" ]; then + if [ -z "$(lttng_pgrep lt-lttng-relayd)" ]; then local str_date=$(date +%H%M%S-%d%m%Y) diag "!!!The relay daemon died unexpectedly!!!" @@ -71,7 +71,7 @@ function start_sessiond() BAIL_OUT "*** Kernel too old for session daemon tests ***" fi - if [ -z $(pgrep --full lt-$SESSIOND_BIN) ]; then + if [ -z $(lttng_pgrep lt-$SESSIOND_BIN) ]; then # We have to start it like this so the ulimit -c is used by this # process. Also, we collect any error message printed out. $TESTDIR/../src/bin/lttng-sessiond/$SESSIOND_BIN --quiet --background --consumerd32-path="$TESTDIR/../src/bin/lttng-consumerd/lttng-consumerd" --consumerd64-path="$TESTDIR/../src/bin/lttng-consumerd/lttng-consumerd" >$LOG_FILE_SESSIOND 2>&1 @@ -84,7 +84,7 @@ function start_relayd { local opt=$1 - if [ -z $(pgrep --full lt-$RELAYD_BIN) ]; then + if [ -z $(lttng_pgrep lt-$RELAYD_BIN) ]; then $TESTDIR/../src/bin/lttng-relayd/$RELAYD_BIN $opt >$LOG_FILE_RELAYD 2>&1 & ok $? "Start lttng-relayd (opt: \"$opt\")" fi diff --git a/tests/stress/test_multi_sessions_per_uid_5app_streaming_kill_relayd b/tests/stress/test_multi_sessions_per_uid_5app_streaming_kill_relayd index e6ee7df7d..c9a4576b6 100755 --- a/tests/stress/test_multi_sessions_per_uid_5app_streaming_kill_relayd +++ b/tests/stress/test_multi_sessions_per_uid_5app_streaming_kill_relayd @@ -38,7 +38,7 @@ function enable_channel_per_uid() function check_sessiond() { - if [ -z "$(pgrep --full lt-lttng-sessiond)" ]; then + if [ -z "$(lttng_pgrep lt-lttng-sessiond)" ]; then local str_date=$(date +%H%M%S-%d%m%Y) diag "!!!The session daemon died unexpectedly!!!" @@ -58,7 +58,7 @@ function start_sessiond() BAIL_OUT "*** Kernel too old for session daemon tests ***" fi - if [ -z $(pgrep --full lt-$SESSIOND_BIN) ]; then + if [ -z $(lttng_pgrep lt-$SESSIOND_BIN) ]; then # We have to start it like this so the ulimit -c is used by this # process. Also, we collect any error message printed out. #$TESTDIR/../src/bin/lttng-sessiond/$SESSIOND_BIN --quiet --background --consumerd32-path="$TESTDIR/../src/bin/lttng-consumerd/lttng-consumerd" --consumerd64-path="$TESTDIR/../src/bin/lttng-consumerd/lttng-consumerd" >$LOG_FILE_SESSIOND 2>&1 @@ -73,7 +73,7 @@ function start_relayd { local opt=$1 - if [ -z $(pgrep --full lt-$RELAYD_BIN) ]; then + if [ -z $(lttng_pgrep lt-$RELAYD_BIN) ]; then $TESTDIR/../src/bin/lttng-relayd/$RELAYD_BIN $opt >$LOG_FILE_RELAYD 2>&1 & ok $? "Start lttng-relayd (opt: \"$opt\")" fi @@ -81,7 +81,7 @@ function start_relayd function check_relayd() { - if [ -z "$(pgrep --full lt-lttng-relayd)" ]; then + if [ -z "$(lttng_pgrep lt-lttng-relayd)" ]; then local str_date=$(date +%H%M%S-%d%m%Y) #diag "Relay daemon died. Starting it again" diff --git a/tests/utils/utils.sh b/tests/utils/utils.sh index d65662e42..1a6ea3c42 100644 --- a/tests/utils/utils.sh +++ b/tests/utils/utils.sh @@ -74,6 +74,42 @@ trap full_cleanup SIGINT SIGTERM trap null_pipes SIGPIPE +# Check pgrep from env, default to pgrep if none +if [ -z "$PGREP" ]; then + PGREP=pgrep +fi + +# Due to the renaming of threads we need to use the full command (pgrep -f) to +# identify the pids for multiple lttng related processes. The problem with "pgrep +# -f" is that it ends up also looking at the arguments. We use a two stage +# lookup. The first one is using "pgrep -f" yielding potential candidate. +# The second on perform grep on the basename of the first field of the +# /proc/pid/cmdline of the previously identified pids. The first field +# correspond to the actual command. +function lttng_pgrep () +{ + local pattern=$1 + local possible_pids + local full_command_no_argument + local command_basename + + possible_pids=$($PGREP -f "$pattern") + if [ -z "$possible_pids" ]; then + return 0 + fi + + while IFS= read -r pid ; do + # /proc/pid/cmdline is null separated. + if full_command_no_argument=$(cut -d '' -f 1 < /proc/"$pid"/cmdline); then + command_basename=$(basename "$full_command_no_argument") + if grep -q "$pattern" <<< "$command_basename"; then + echo "$pid" + fi + fi + done <<< "$possible_pids" + return 0 +} + function print_ok () { # Check if we are a terminal @@ -401,7 +437,7 @@ function start_lttng_relayd_opt() DIR=$(readlink -f "$TESTDIR") - if [ -z $(pgrep -f $RELAYD_MATCH) ]; then + if [ -z $(lttng_pgrep "$RELAYD_MATCH") ]; then # shellcheck disable=SC2086 $DIR/../src/bin/lttng-relayd/$RELAYD_BIN $process_mode $opt 1> $OUTPUT_DEST 2> $ERROR_OUTPUT_DEST #$DIR/../src/bin/lttng-relayd/$RELAYD_BIN $opt -vvv >>/tmp/relayd.log 2>&1 & @@ -450,7 +486,7 @@ function stop_lttng_relayd_opt() local retval=0 local pids= - pids=$(pgrep -f "$RELAYD_MATCH") + pids=$(lttng_pgrep "$RELAYD_MATCH") if [ -z "$pids" ]; then if [ "$withtap" -eq "1" ]; then pass "No relay daemon to kill" @@ -469,7 +505,7 @@ function stop_lttng_relayd_opt() else out=1 while [ -n "$out" ]; do - out=$(pgrep -f "$RELAYD_MATCH") + out=$(lttng_pgrep "$RELAYD_MATCH") if [ -n "$dtimeleft_s" ]; then if [ $dtimeleft_s -lt 0 ]; then out= @@ -547,7 +583,7 @@ function start_lttng_sessiond_opt() : "${LTTNG_SESSION_CONFIG_XSD_PATH="${DIR}/../src/common/config/"}" export LTTNG_SESSION_CONFIG_XSD_PATH - if [ -z "$(pgrep -f "${SESSIOND_MATCH}")" ]; then + if [ -z "$(lttng_pgrep "${SESSIOND_MATCH}")" ]; then # Have a load path ? if [ -n "$load_path" ]; then # shellcheck disable=SC2086 @@ -599,10 +635,10 @@ function stop_lttng_sessiond_opt() local retval=0 local runas_pids= - runas_pids=$(pgrep -f "$RUNAS_MATCH") + runas_pids=$(lttng_pgrep "$RUNAS_MATCH") local pids= - pids=$(pgrep -f "$SESSIOND_MATCH") + pids=$(lttng_pgrep "$SESSIOND_MATCH") if [ -n "$runas_pids" ]; then pids="$pids $runas_pids" @@ -626,7 +662,7 @@ function stop_lttng_sessiond_opt() else out=1 while [ -n "$out" ]; do - out=$(pgrep -f "${SESSIOND_MATCH}") + out=$(lttng_pgrep "${SESSIOND_MATCH}") if [ -n "$dtimeleft_s" ]; then if [ $dtimeleft_s -lt 0 ]; then out= @@ -638,7 +674,7 @@ function stop_lttng_sessiond_opt() done out=1 while [ -n "$out" ]; do - out=$(pgrep -f "$CONSUMERD_MATCH") + out=$(lttng_pgrep "$CONSUMERD_MATCH") if [ -n "$dtimeleft_s" ]; then if [ $dtimeleft_s -lt 0 ]; then out= @@ -692,7 +728,7 @@ function sigstop_lttng_sessiond_opt() return fi - PID_SESSIOND="$(pgrep -f "${SESSIOND_MATCH}") $(pgrep -f "$RUNAS_MATCH")" + PID_SESSIOND="$(lttng_pgrep "${SESSIOND_MATCH}") $(lttng_pgrep "$RUNAS_MATCH")" if [ "$withtap" -eq "1" ]; then diag "Sending SIGSTOP to lt-$SESSIOND_BIN and $SESSIOND_BIN pids: $(echo "$PID_SESSIOND" | tr '\n' ' ')" @@ -706,7 +742,7 @@ function sigstop_lttng_sessiond_opt() else out=1 while [ $out -ne 0 ]; do - pid="$(pgrep -f "$SESSIOND_MATCH")" + pid="$(lttng_pgrep "$SESSIOND_MATCH")" # Wait until state becomes stopped for session # daemon(s). @@ -754,7 +790,7 @@ function stop_lttng_consumerd_opt() local retval=0 - PID_CONSUMERD="$(pgrep -f "$CONSUMERD_MATCH")" + PID_CONSUMERD="$(lttng_pgrep "$CONSUMERD_MATCH")" if [ -z "$PID_CONSUMERD" ]; then if [ "$withtap" -eq "1" ]; then @@ -774,7 +810,7 @@ function stop_lttng_consumerd_opt() else out=1 while [ $out -ne 0 ]; do - pid="$(pgrep -f "$CONSUMERD_MATCH")" + pid="$(lttng_pgrep "$CONSUMERD_MATCH")" # If consumerds are still present check their status. # A zombie status qualifies the consumerd as *killed* @@ -821,7 +857,7 @@ function sigstop_lttng_consumerd_opt() local withtap=$1 local signal=SIGSTOP - PID_CONSUMERD="$(pgrep -f "$CONSUMERD_MATCH")" + PID_CONSUMERD="$(lttng_pgrep "$CONSUMERD_MATCH")" diag "Sending SIGSTOP to $CONSUMERD_BIN pids: $(echo "$PID_CONSUMERD" | tr '\n' ' ')" @@ -837,7 +873,7 @@ function sigstop_lttng_consumerd_opt() else out=1 while [ $out -ne 0 ]; do - pid="$(pgrep -f "$CONSUMERD_MATCH")" + pid="$(lttng_pgrep "$CONSUMERD_MATCH")" # Wait until state becomes stopped for all # consumers.