| 1 | # Copyright (C) 2012 Christian Babeux <christian.babeux@efficios.com> |
| 2 | # Copyright (C) 2014 Mathieu Desnoyers <mathieu.desnoyers@efficios.com> |
| 3 | # |
| 4 | # SPDX-License-Identifier: GPL-2.0-only |
| 5 | |
| 6 | TESTDIR=${CURDIR}/../../.. |
| 7 | UST_EVENT_NAME="tp:tptest" |
| 8 | KERNEL_EVENT_NAME="sched_switch" |
| 9 | CHANNEL_NAME="testchan" |
| 10 | HEALTH_CHECK_BIN="health_check" |
| 11 | NUM_TESTS=106 |
| 12 | SLEEP_TIME=30 |
| 13 | |
| 14 | source $TESTDIR/utils/utils.sh |
| 15 | |
| 16 | function report_errors |
| 17 | { |
| 18 | test_thread_error_string="$1" |
| 19 | test_relayd="$2" |
| 20 | err_no_relayd_match="Error querying relayd health" |
| 21 | |
| 22 | # Check for health errors |
| 23 | # Include inability to contact relayd health as an expected |
| 24 | # error, since this can happen whenever the relayd shutdown due |
| 25 | # to an error in any thread. |
| 26 | out=$(grep "${test_thread_error_string}" ${STDOUT_PATH} | wc -l) |
| 27 | if [ $test_relayd -ne 0 ]; then |
| 28 | outerr=$(grep "${err_no_relayd_match}" ${STDERR_PATH} | wc -l) |
| 29 | else |
| 30 | outerr=0 |
| 31 | fi |
| 32 | if [ $out -eq 0 ] && [ $outerr -eq 0 ]; then |
| 33 | fail "Validation failure" |
| 34 | diag "Health returned:" |
| 35 | diag "stdout:" |
| 36 | file=${STDOUT_PATH} |
| 37 | while read line ; do |
| 38 | diag "$line" |
| 39 | done < ${file} |
| 40 | |
| 41 | diag "stderr:" |
| 42 | file=${STDERR_PATH} |
| 43 | while read line ; do |
| 44 | diag "$line" |
| 45 | done < ${file} |
| 46 | else |
| 47 | pass "Validation OK" |
| 48 | fi |
| 49 | } |
| 50 | |
| 51 | function test_health |
| 52 | { |
| 53 | test_suffix="$1" |
| 54 | test_thread_name="$2" |
| 55 | test_thread_error_string="$3" |
| 56 | test_needs_root="$4" |
| 57 | test_consumerd="$5" |
| 58 | test_relayd="$6" |
| 59 | |
| 60 | diag "Test health problem detection with ${test_thread_name}" |
| 61 | |
| 62 | # Set the socket timeout to 5 so the health check detection |
| 63 | # happens within 25 s |
| 64 | export LTTNG_NETWORK_SOCKET_TIMEOUT=5 |
| 65 | export LTTNG_RELAYD_HEALTH="${HEALTH_PATH}/test-health" |
| 66 | |
| 67 | # Activate testpoints |
| 68 | export LTTNG_TESTPOINT_ENABLE=1 |
| 69 | |
| 70 | # Activate specific thread test |
| 71 | export ${test_thread_name}_${test_suffix}=1 |
| 72 | |
| 73 | # Spawn sessiond with preloaded testpoint override lib |
| 74 | export LD_PRELOAD="$CURDIR/$SESSIOND_PRELOAD" |
| 75 | |
| 76 | diag "Start session daemon" |
| 77 | start_lttng_sessiond |
| 78 | |
| 79 | if [ ${test_consumerd} -eq 1 ]; then |
| 80 | create_lttng_session_no_output $SESSION_NAME |
| 81 | |
| 82 | diag "With UST consumer daemons" |
| 83 | enable_ust_lttng_event_ok $SESSION_NAME $UST_EVENT_NAME $CHANNEL_NAME |
| 84 | |
| 85 | skip $isroot "Root access is needed. Skipping kernel consumer health check test." "1" || |
| 86 | { |
| 87 | diag "With kernel consumer daemon" |
| 88 | lttng_enable_kernel_event $SESSION_NAME $KERNEL_EVENT_NAME $CHANNEL_NAME |
| 89 | } |
| 90 | start_lttng_tracing_ok $SESSION_NAME |
| 91 | fi |
| 92 | |
| 93 | if [ ${test_relayd} -eq 1 ]; then |
| 94 | diag "With relay daemon" |
| 95 | RELAYD_ARGS="--relayd-path=${LTTNG_RELAYD_HEALTH}" |
| 96 | |
| 97 | start_lttng_relayd "-o $TRACE_PATH" |
| 98 | else |
| 99 | RELAYD_ARGS= |
| 100 | fi |
| 101 | |
| 102 | # Check health status, not caring about result |
| 103 | $CURDIR/$HEALTH_CHECK_BIN ${RELAYD_ARGS} \ |
| 104 | > /dev/null |
| 105 | |
| 106 | # Wait |
| 107 | diag "Check after running for ${SLEEP_TIME} seconds" |
| 108 | sleep ${SLEEP_TIME} |
| 109 | |
| 110 | # Check health status |
| 111 | $CURDIR/$HEALTH_CHECK_BIN ${RELAYD_ARGS} \ |
| 112 | > ${STDOUT_PATH} 2> ${STDERR_PATH} |
| 113 | |
| 114 | |
| 115 | if [ ${test_needs_root} -eq 1 ]; then |
| 116 | skip ${isroot} "Root access needed for test \"${test_thread_name}\"." "1" || |
| 117 | { |
| 118 | report_errors "${test_thread_error_string}" "${test_relayd}" |
| 119 | } |
| 120 | else |
| 121 | report_errors "${test_thread_error_string}" "${test_relayd}" |
| 122 | fi |
| 123 | |
| 124 | if [ ${test_relayd} -eq 1 ]; then |
| 125 | # We may fail to stop relayd here, and this is OK, since |
| 126 | # it may have been killed volountarily by testpoint. |
| 127 | stop_lttng_relayd_notap $KILL_SIGNAL |
| 128 | fi |
| 129 | stop_lttng_consumerd $KILL_SIGNAL |
| 130 | stop_lttng_sessiond $KILL_SIGNAL |
| 131 | |
| 132 | unset LTTNG_TESTPOINT_ENABLE |
| 133 | unset ${test_thread_name}_${test_suffix} |
| 134 | unset LD_PRELOAD |
| 135 | unset LTTNG_NETWORK_SOCKET_TIMEOUT |
| 136 | unset LTTNG_RELAYD_HEALTH |
| 137 | } |
| 138 | |
| 139 | plan_tests $NUM_TESTS |
| 140 | |
| 141 | print_test_banner "$TEST_DESC" |
| 142 | |
| 143 | if [ -f "$CURDIR/$SESSIOND_PRELOAD" ]; then |
| 144 | foundobj=1 |
| 145 | else |
| 146 | foundobj=0 |
| 147 | fi |
| 148 | |
| 149 | skip $foundobj "No shared object generated. Skipping all tests." $NUM_TESTS && exit 0 |
| 150 | |
| 151 | THREAD=("LTTNG_SESSIOND_THREAD_MANAGE_CLIENTS" |
| 152 | "LTTNG_SESSIOND_THREAD_MANAGE_APPS" |
| 153 | "LTTNG_SESSIOND_THREAD_REG_APPS" |
| 154 | "LTTNG_SESSIOND_THREAD_HT_CLEANUP" |
| 155 | "LTTNG_SESSIOND_THREAD_APP_MANAGE_NOTIFY" |
| 156 | "LTTNG_SESSIOND_THREAD_APP_REG_DISPATCH" |
| 157 | "LTTNG_SESSIOND_THREAD_MANAGE_KERNEL" |
| 158 | |
| 159 | "LTTNG_CONSUMERD_THREAD_CHANNEL" |
| 160 | "LTTNG_CONSUMERD_THREAD_METADATA" |
| 161 | "LTTNG_CONSUMERD_THREAD_METADATA_TIMER" |
| 162 | |
| 163 | "LTTNG_RELAYD_THREAD_DISPATCHER" |
| 164 | "LTTNG_RELAYD_THREAD_WORKER" |
| 165 | "LTTNG_RELAYD_THREAD_LISTENER" |
| 166 | "LTTNG_RELAYD_THREAD_LIVE_DISPATCHER" |
| 167 | "LTTNG_RELAYD_THREAD_LIVE_WORKER" |
| 168 | "LTTNG_RELAYD_THREAD_LIVE_LISTENER" |
| 169 | ) |
| 170 | |
| 171 | ERROR_STRING=( |
| 172 | "Thread \"Session daemon command\" is not responding in component \"sessiond\"." |
| 173 | "Thread \"Session daemon application manager\" is not responding in component \"sessiond\"." |
| 174 | "Thread \"Session daemon application registration\" is not responding in component \"sessiond\"." |
| 175 | "Thread \"Session daemon hash table cleanup\" is not responding in component \"sessiond\"." |
| 176 | "Thread \"Session daemon application notification manager\" is not responding in component \"sessiond\"." |
| 177 | "Thread \"Session daemon application registration dispatcher\" is not responding in component \"sessiond\"." |
| 178 | "Thread \"Session daemon kernel\" is not responding in component \"sessiond\"." |
| 179 | |
| 180 | "Thread \"Consumer daemon channel\" is not responding" |
| 181 | "Thread \"Consumer daemon metadata\" is not responding" |
| 182 | "Thread \"Consumer daemon metadata timer\" is not responding" |
| 183 | |
| 184 | "Thread \"Relay daemon dispatcher\" is not responding in component \"relayd\"." |
| 185 | "Thread \"Relay daemon worker\" is not responding in component \"relayd\"." |
| 186 | "Thread \"Relay daemon listener\" is not responding in component \"relayd\"." |
| 187 | "Thread \"Relay daemon live dispatcher\" is not responding in component \"relayd\"." |
| 188 | "Thread \"Relay daemon live worker\" is not responding in component \"relayd\"." |
| 189 | "Thread \"Relay daemon live listener\" is not responding in component \"relayd\"." |
| 190 | ) |
| 191 | |
| 192 | # TODO |
| 193 | # "LTTNG_SESSIOND_THREAD_MANAGE_CONSUMER" |
| 194 | # "Thread \"Session daemon manage consumer\" is not responding in component \"sessiond\"." |
| 195 | |
| 196 | # TODO: test kernel consumerd specifically in addition to UST consumerd |
| 197 | |
| 198 | # TODO: need refactoring of consumerd teardown |
| 199 | # "LTTNG_CONSUMERD_THREAD_SESSIOND" |
| 200 | # "Thread \"Consumer daemon session daemon command manager\" is not responding" |
| 201 | |
| 202 | # TODO: this thread is responsible for close a file descriptor that |
| 203 | # triggers teardown of metadata thread. We should revisit teardown of |
| 204 | # consumerd. |
| 205 | # "LTTNG_CONSUMERD_THREAD_DATA" |
| 206 | # "Thread \"Consumer daemon data\" is not responding" |
| 207 | |
| 208 | NEEDS_ROOT=( |
| 209 | 0 |
| 210 | 0 |
| 211 | 0 |
| 212 | 0 |
| 213 | 0 |
| 214 | 0 |
| 215 | 1 |
| 216 | |
| 217 | 0 |
| 218 | 0 |
| 219 | 0 |
| 220 | |
| 221 | 0 |
| 222 | 0 |
| 223 | 0 |
| 224 | 0 |
| 225 | 0 |
| 226 | 0 |
| 227 | ) |
| 228 | |
| 229 | TEST_CONSUMERD=( |
| 230 | 0 |
| 231 | 0 |
| 232 | 0 |
| 233 | 0 |
| 234 | 0 |
| 235 | 0 |
| 236 | 0 |
| 237 | |
| 238 | 1 |
| 239 | 1 |
| 240 | 1 |
| 241 | |
| 242 | 1 |
| 243 | 1 |
| 244 | 1 |
| 245 | 1 |
| 246 | 1 |
| 247 | 1 |
| 248 | ) |
| 249 | |
| 250 | TEST_RELAYD=( |
| 251 | 0 |
| 252 | 0 |
| 253 | 0 |
| 254 | 0 |
| 255 | 0 |
| 256 | 0 |
| 257 | 0 |
| 258 | |
| 259 | 0 |
| 260 | 0 |
| 261 | 0 |
| 262 | |
| 263 | 1 |
| 264 | 1 |
| 265 | 1 |
| 266 | 1 |
| 267 | 1 |
| 268 | 1 |
| 269 | ) |
| 270 | |
| 271 | STDOUT_PATH=$(mktemp) |
| 272 | STDERR_PATH=$(mktemp) |
| 273 | TRACE_PATH=$(mktemp -d) |
| 274 | HEALTH_PATH=$(mktemp -d) |
| 275 | |
| 276 | if [ "$(id -u)" == "0" ]; then |
| 277 | isroot=1 |
| 278 | else |
| 279 | isroot=0 |
| 280 | fi |
| 281 | |
| 282 | THREAD_COUNT=${#THREAD[@]} |
| 283 | i=0 |
| 284 | while [ "$i" -lt "$THREAD_COUNT" ]; do |
| 285 | test_health "${TEST_SUFFIX}" \ |
| 286 | "${THREAD[$i]}" \ |
| 287 | "${ERROR_STRING[$i]}" \ |
| 288 | "${NEEDS_ROOT[$i]}" \ |
| 289 | "${TEST_CONSUMERD[$i]}" \ |
| 290 | "${TEST_RELAYD[$i]}" |
| 291 | let "i++" |
| 292 | done |
| 293 | |
| 294 | rm -rf ${HEALTH_PATH} |
| 295 | rm -rf ${TRACE_PATH} |
| 296 | rm -f ${STDOUT_PATH} |
| 297 | rm -f ${STDERR_PATH} |