From f60a888eeff3048d223e7e8300f6c8ec659c3568 Mon Sep 17 00:00:00 2001
From: =?utf8?q?J=C3=A9r=C3=A9mie=20Galarneau?=
 <jeremie.galarneau@efficios.com>
Date: Fri, 20 Dec 2024 10:02:31 -0500
Subject: [PATCH] Tests: serial runner: fail test on abort (SIGINT and SIGTERM)
MIME-Version: 1.0
Content-Type: text/plain; charset=utf8
Content-Transfer-Encoding: 8bit

The CI kills the test suite using SIGTERM when it remains stuck
for a given time. When this occurs, it is hard to figure out which
test was running.

Catch both SIGTERM and SIGINT signals to gracefully report a test
failure that indicates the top-level test that was running when
the signal was received.

The number of planned tests is extracted to give a "hint" to the
user that the test could not complete.

Signed-off-by: JÃ©rÃ©mie Galarneau <jeremie.galarneau@efficios.com>
Change-Id: Id2aa025b43e9ddc205c784cc5e100fd04b46c3b9
---
 tests/regression/tests.serial | 85 +++++++++++++++++++++++++++--------
 1 file changed, 66 insertions(+), 19 deletions(-)

diff --git a/tests/regression/tests.serial b/tests/regression/tests.serial
index b46cb3fd8..b07421df0 100755
--- a/tests/regression/tests.serial
+++ b/tests/regression/tests.serial
@@ -31,6 +31,8 @@ import shlex
 import shutil
 import subprocess
 import sys
+import signal
+import time
 
 test_utils_import_path = pathlib.Path(__file__).absolute().parents[1] / "utils"
 sys.path.append(str(test_utils_import_path))
@@ -38,10 +40,41 @@ sys.path.append(str(test_utils_import_path))
 import lttngtest
 
 # This is not tap 14 compliant, but should suffice
+TAP_PLAN_LINE_RE = re.compile(r"^1\.\.(\d+)$", re.MULTILINE)
 TAP_OK_LINE_RE = re.compile(r"^ok \d+.*$", re.MULTILINE)
 TAP_SKIP_LINE_RE = re.compile(r"skip \d+.*$", re.MULTILINE)
 TAP_NOT_OK_LINE_RE = re.compile(r"^not ok \d+.*$", re.MULTILINE)
 
+test_being_aborted = False
+current_test_process = None
+
+
+def handle_abort(signum, frame):
+    global test_being_aborted
+
+    signal_name = signal.Signals(signum).name
+    logging.warning("Received {}, aborting current test".format(signal_name))
+    test_being_aborted = True
+
+    if current_test_process:
+        logging.info("Forwarding {} to test process".format(signal_name))
+        try:
+            current_test_process.send_signal(signum)
+            current_test_process.wait(timeout=10)
+        except subprocess.TimeoutExpired:
+            logging.warning(
+                "Test process did not terminate before timeout, sending SIGKILL"
+            )
+            current_test_process.kill()
+            current_test_process.wait()
+    else:
+        logging.debug("No test process to terminate")
+
+
+signal.signal(signal.SIGTERM, handle_abort)
+signal.signal(signal.SIGINT, handle_abort)
+signal.signal(signal.SIGABRT, handle_abort)
+
 
 def run_tests(test_scripts):
     """
@@ -52,16 +85,21 @@ def run_tests(test_scripts):
     tap = lttngtest.TapGenerator(len(test_scripts))
     for test_script in test_scripts:
         stdout, retcode = run_test(test_script)
-        results.append(
-            {
-                "test": test_script,
-                "stdout": stdout,
-                "returncode": retcode,
-                "passed": len(TAP_OK_LINE_RE.findall(stdout)),
-                "failed": len(TAP_NOT_OK_LINE_RE.findall(stdout)),
-                "skipped": len(TAP_SKIP_LINE_RE.findall(stdout)),
-            }
-        )
+        result = {
+            "test": test_script,
+            "stdout": stdout,
+            "returncode": retcode,
+            "passed": len(TAP_OK_LINE_RE.findall(stdout)),
+            "failed": len(TAP_NOT_OK_LINE_RE.findall(stdout)),
+            "skipped": len(TAP_SKIP_LINE_RE.findall(stdout)),
+        }
+        plan_match = TAP_PLAN_LINE_RE.search(stdout)
+        if plan_match:
+            result["planned"] = int(plan_match.group(1))
+        else:
+            result["planned"] = "unknown"
+
+        results.append(result)
         results[-1]["total_test_count"] = (
             results[-1]["passed"] + results[-1]["failed"] + results[-1]["skipped"]
         )
@@ -73,19 +111,27 @@ def run_tests(test_scripts):
             any_failures = any_failures or (retcode != 0 or results[-1]["failed"] != 0)
             tap.test(
                 retcode == 0 and results[-1]["failed"] == 0,
-                "Test script '{}' returned code {}. {} passed, {} failed, {} skipped [total: {}]".format(
+                "Test script '{}' returned code {}. {} passed, {} failed, {} skipped, {} planned [total: {}]".format(
                     test_script,
                     retcode,
                     results[-1]["passed"],
                     results[-1]["failed"],
                     results[-1]["skipped"],
+                    results[-1]["planned"],
                     results[-1]["total_test_count"],
                 ),
             )
+
+        if test_being_aborted:
+            logging.warning("Aborting test suite")
+            break
+
     return not any_failures
 
 
 def run_test(test_script):
+    global current_test_process
+
     stdout = ""
     logging.info("Starting test '{}'".format(test_script))
 
@@ -101,30 +147,31 @@ def run_test(test_script):
         except FileExistsError:
             pass
         test_env["LTTNG_TEST_LOG_DIR"] = str(test_log_dir)
-    process = subprocess.Popen(
+    current_test_process = subprocess.Popen(
         [test_script],
         stdout=subprocess.PIPE,
         stderr=subprocess.STDOUT,
         env=test_env,
     )
-    while process.returncode is None:
+    while current_test_process.returncode is None:
         try:
-            out, err = process.communicate(timeout=1)
+            out, err = current_test_process.communicate(timeout=1)
             stdout += out.decode("utf-8")
         except subprocess.TimeoutExpired:
             continue
+
+    return_code = current_test_process.returncode
+    current_test_process = None
     logging.info(
-        "Test '{}' terminated with return code {}".format(
-            test_script, process.returncode
-        )
+        "Test '{}' terminated with return code {}".format(test_script, return_code)
     )
     with open(str(test_log), "w") as f:
         logging.debug("Wrote test output to '{}'".format(str(test_log)))
         f.write(stdout)
-    if process.returncode == 0:
+    if return_code == 0:
         if test_log_dir:
             shutil.rmtree(str(test_log_dir))
-    return (stdout, process.returncode)
+    return (stdout, return_code)
 
 
 if __name__ == "__main__":
-- 
2.39.5