From a2092cf3d4bd0df50679344a4c1d0b56b3af4a59 Mon Sep 17 00:00:00 2001 From: Kienan Stewart Date: Thu, 7 Nov 2024 12:20:58 -0500 Subject: [PATCH] Tests: Use serial runner MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Observed issue ============== Running `make check -jN` with N greater than 1 will fail a number of different checks. Cause ===== The majority of the tests aren't designed to work in isolation (e.g. using `LTTNG_HOME` and `LTTNG_RUNDIR`), and some tests that exercise global system resources (e.g. kernel modules) will never work reliably in parallel. Solution ======== Create a serial test runner that can execute all the tests that require global resources while the other tests that are defined as 'safe to parallelize' can be run concurrently. Known drawbacks =============== None. Change-Id: I0b87d2cd5e870ee7f9241ec78390ed96a01efb38 Signed-off-by: Kienan Stewart Signed-off-by: Jérémie Galarneau --- tests/regression/Makefile.am | 45 ++++++---- tests/regression/tests.serial | 149 ++++++++++++++++++++++++++++++++++ 2 files changed, 177 insertions(+), 17 deletions(-) create mode 100755 tests/regression/tests.serial diff --git a/tests/regression/Makefile.am b/tests/regression/Makefile.am index 9f4ec7ab8..86a351d08 100644 --- a/tests/regression/Makefile.am +++ b/tests/regression/Makefile.am @@ -2,11 +2,13 @@ SUBDIRS = tools -LOG_DRIVER_FLAGS = --merge --post-script $(top_srcdir)/tests/utils/warn_processes.sh +LOG_DRIVER_FLAGS = --merge LOG_DRIVER = env PGREP='$(PGREP)' AM_TAP_AWK='$(AWK)' $(SHELL) \ $(top_srcdir)/tests/utils/tap-driver.sh -TESTS = tools/base-path/test_ust \ +TEST_EXTENSIONS = .serial + +SERIAL_TESTS = tools/base-path/test_ust \ tools/channel/test_channel \ tools/filtering/test_invalid_filter \ tools/filtering/test_unsupported_op \ @@ -14,11 +16,8 @@ TESTS = tools/base-path/test_ust \ tools/streaming/test_kernel \ tools/streaming/test_ust \ tools/health/test_thread_ok \ - tools/live/test_early_inactive_app.py \ tools/live/test_kernel \ tools/live/test_lttng_kernel \ - tools/live/test_miss_short_lived_app.py \ - tools/live/test_per_application_leaks.py \ tools/live/test_ust \ tools/live/test_ust_tracefile_count \ tools/live/test_lttng_ust \ @@ -67,12 +66,16 @@ TESTS = tools/base-path/test_ust \ tools/trigger/test_list_triggers_cli \ tools/trigger/test_remove_trigger_cli \ tools/trigger/name/test_trigger_name_backwards_compat \ - tools/trigger/hidden/test_hidden_trigger \ + tools/trigger/hidden/test_hidden_trigger + +TESTS = tools/live/test_early_inactive_app.py \ + tools/live/test_miss_short_lived_app.py \ + tools/live/test_per_application_leaks.py \ tools/context/test_ust.py \ tools/client/test_session_commands.py \ tools/client/test_event_rule_listing.py \ - tools/client/test_bug1373_events_differ_only_by_loglevel \ - tools/client/test_warn_on_shm_too_small.py + tools/client/test_warn_on_shm_too_small.py \ + tests.serial # Only build kernel tests on Linux. if IS_LINUX @@ -81,7 +84,7 @@ endif # IS_LINUX if HAVE_LIBLTTNG_UST_CTL SUBDIRS += ust -TESTS += ust/before-after/test_before_after \ +SERIAL_TESTS += ust/before-after/test_before_after \ ust/buffers-pid/test_buffers_pid \ ust/multi-session/test_multi_session \ ust/nprocesses/test_nprocesses \ @@ -99,30 +102,32 @@ TESTS += ust/before-after/test_before_after \ ust/ust-app-ctl-paths/test_path_separators \ ust/ust-app-ctl-paths/test_ust_app_ctl_paths \ ust/ust-constructor/test_ust_constructor_c_dynamic.py \ - ust/ust-constructor/test_ust_constructor_c_static.py \ - ust/ust-constructor/test_ust_constructor_cpp_dynamic.py \ - ust/ust-constructor/test_ust_constructor_cpp_static.py \ + tools/client/test_bug1373_events_differ_only_by_loglevel \ tools/config-directory/test_config.py \ tools/metadata/test_ust \ tools/relayd-grouping/test_ust \ tools/trigger/rate-policy/test_ust_rate_policy +TESTS += ust/ust-constructor/test_ust_constructor_c_static.py \ + ust/ust-constructor/test_ust_constructor_cpp_dynamic.py \ + ust/ust-constructor/test_ust_constructor_cpp_static.py + if TEST_JAVA_JUL_AGENT -TESTS += ust/java-jul/test_java_jul \ +SERIAL_TESTS += ust/java-jul/test_java_jul \ ust/java-jul/test_ust_app_ctl_path_separators.sh endif # TEST_JAVA_JUL_AGENT if TEST_JAVA_LOG4J_AGENT -TESTS += ust/java-log4j/test_java_log4j +SERIAL_TESTS += ust/java-log4j/test_java_log4j endif # TEST_JAVA_LOG4J_AGENT if TEST_JAVA_LOG4J2_AGENT -TESTS += ust/java-log4j2/test_agent_log4j2_domain_log4j \ +SERIAL_TESTS += ust/java-log4j2/test_agent_log4j2_domain_log4j \ ust/java-log4j2/test_agent_log4j2_domain_log4j2 endif # TEST_JAVA_LOG4J2_AGENT if IS_LINUX -TESTS += \ +SERIAL_TESTS += \ kernel/test_all_events \ kernel/test_callstack \ kernel/test_channel \ @@ -142,7 +147,7 @@ endif # IS_LINUX endif # HAVE_LIBLTTNG_UST_CTL if PYTHON_BINDING -TESTS += ust/linking/test_linking \ +SERIAL_TESTS += ust/linking/test_linking \ ust/daemon/test_daemon \ ust/exit-fast/test_exit-fast \ ust/fork/test_fork \ @@ -152,6 +157,12 @@ TESTS += ust/linking/test_linking \ ust/type-declarations/test_type_declarations endif +SERIAL_LOG_DRIVER_FLAGS = --merge +SERIAL_LOG_DRIVER = env SERIAL_TESTS='$(SERIAL_TESTS)' PGREP='$(PGREP)' AM_TAP_AWK='$(AWK)' $(SHELL) \ + $(top_srcdir)/tests/utils/tap-driver.sh + +EXTRA_DIST = tests.serial + if PRECIOUS_TESTS .PRECIOUS: $(TEST_LOGS) endif diff --git a/tests/regression/tests.serial b/tests/regression/tests.serial new file mode 100755 index 000000000..6de8422e8 --- /dev/null +++ b/tests/regression/tests.serial @@ -0,0 +1,149 @@ +#!/usr/bin/env python3 +# +# SPDX-FileCopyrightText: 2024 Kienan Stewart +# SPDX-LicenseIdentifier: GPL-2.0-only +# + +""" +This script runs the tests defined in the environment variable +`SERIAL_TESTS` one at a time in subprocesses. + +For each serial test, `.log` and `.trs` will be produced. The log file +will contain the line-buffered combined stdout and stderr from the test itself. +The trs file will be formatted as an autotools trs file. + +This test cannot know how many subtests are going to be run before hand, so all +tests are run and the output produced before emitting a TAP plan and creating it's +own test and trs log files. + +For each test (that is - a TAP `ok`, `not ok`, and `skip` a single line following +the rough format of the autotools `make check` wrapper will be output to stdout. + + PASS|SKIP|FAIL: testname NN - Description + +""" + +import logging +import os +import pathlib +import re +import shlex +import shutil +import subprocess +import sys +import time + +test_utils_import_path = pathlib.Path(__file__).absolute().parents[1] / "utils" +sys.path.append(str(test_utils_import_path)) + +import lttngtest + +# This is not tap 14 compliant, but should suffice +TAP_OK_LINE_RE = re.compile(r"^ok \d+.*$", re.MULTILINE) +TAP_SKIP_LINE_RE = re.compile(r"skip \d+.*$", re.MULTILINE) +TAP_NOT_OK_LINE_RE = re.compile(r"^not ok \d+.*$", re.MULTILINE) + + +def run_tests(test_scripts): + """ + Returns True if all the tests pass or skip + """ + any_failures = False + results = [] + tap = lttngtest.TapGenerator(len(test_scripts)) + for test_script in test_scripts: + stdout, retcode = run_test(test_script) + results.append( + { + "test": test_script, + "stdout": stdout, + "returncode": retcode, + "passed": len(TAP_OK_LINE_RE.findall(stdout)), + "failed": len(TAP_NOT_OK_LINE_RE.findall(stdout)), + "skipped": len(TAP_SKIP_LINE_RE.findall(stdout)), + } + ) + results[-1]["total_test_count"] = ( + results[-1]["passed"] + results[-1]["failed"] + results[-1]["skipped"] + ) + if retcode == 77: + # Special code for 'not platform applicable' + # See https://www.gnu.org/software/automake/manual/html_node/Scripts_002dbased-Testsuites.html + tap.skip("Test script '{}' returned code 77".format(test_script)) + else: + any_failures = any_failures or (retcode != 0 or results[-1]["failed"] != 0) + tap.test( + retcode == 0 and results[-1]["failed"] == 0, + "Test script '{}' returned code {}. {} passed, {} failed, {} skipped [total: {}]".format( + test_script, + retcode, + results[-1]["passed"], + results[-1]["failed"], + results[-1]["skipped"], + results[-1]["total_test_count"], + ), + ) + return not any_failures + + +def run_test(test_script): + stdout = "" + logging.info("Starting test '{}'".format(test_script)) + + # Support logd from the modified tap driver + current_test_log_dir = os.environ.get("LTTNG_TEST_LOG_DIR", None) + test_env = os.environ.copy() + test_log = pathlib.Path("{}.log".format(test_script)) + test_log_dir = None + if current_test_log_dir: + test_log_dir = pathlib.Path("{}.log.d".format(test_script)) + try: + test_log_dir.mkdir(parents=True) + except pathlib.FileExistsError: + pass + test_env["LTTNG_TEST_LOG_DIR"] = str(test_log_dir) + process = subprocess.Popen( + [test_script], + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT, + env=test_env, + ) + while process.returncode is None: + try: + out, err = process.communicate(timeout=1) + stdout += out.decode("utf-8") + except subprocess.TimeoutExpired: + continue + logging.info( + "Test '{}' terminated with return code {}".format( + test_script, process.returncode + ) + ) + with open(str(test_log), "w") as f: + logging.debug("Wrote test output to '{}'".format(str(test_log))) + f.write(stdout) + if process.returncode == 0: + if test_log_dir: + shutil.rmtree(str(test_log_dir)) + return (stdout, process.returncode) + + +if __name__ == "__main__": + logging.basicConfig( + level=logging.DEBUG, format="# [%(created)f] %(levelname)s:%(name)s:%(message)s" + ) + + tests = shlex.split(os.environ.get("SERIAL_TESTS", "")) + skip_tests = shlex.split(os.environ.get("SKIP_SERIAL_TESTS", "")) + if skip_tests: + logging.debug("Skipped serial tests: {}".format(skip_tests)) + tests = [test for test in tests if test not in skip_tests] + limit_tests = shlex.split(os.environ.get("TESTS", "")) + if limit_tests: + logging.debug("Limiting tests to: {}".format(limit_tests)) + tests = [test for test in tests if test in limit_tests] + + logging.info("Serial tests received %d tests", len(tests)) + logging.debug("Serial tests: {}".format(tests)) + if not run_tests(tests): + sys.exit(1) -- 2.39.5