From 21fec189ebfeb433f4d86eb273aac16e1c5b72f8 Mon Sep 17 00:00:00 2001 From: Jonathan Rajotte Date: Wed, 22 May 2019 11:06:45 -0400 Subject: [PATCH] Add retry on submit for 502 http error The root cause of 5022 is still not known on submit. Retry 10 times with sleep of 5 seconds between attempts. Signed-off-by: Jonathan Rajotte --- scripts/system-tests/lava2-submit.py | 104 +++++++++++++++------------ 1 file changed, 60 insertions(+), 44 deletions(-) diff --git a/scripts/system-tests/lava2-submit.py b/scripts/system-tests/lava2-submit.py index 1001256..75b5a37 100644 --- a/scripts/system-tests/lava2-submit.py +++ b/scripts/system-tests/lava2-submit.py @@ -1,4 +1,4 @@ -#!/usr/bin/python +#!/usr/bin/python3 # Copyright (C) 2016 - Francis Deslauriers # # This program is free software: you can redistribute it and/or modify @@ -15,29 +15,27 @@ # along with this program. If not, see . import argparse -import base64 import json import os import random import sys import time -import yaml import xmlrpc.client -import pprint from urllib.parse import urljoin from urllib.request import urlretrieve - -from jinja2 import Environment, FileSystemLoader, meta +import yaml +from jinja2 import Environment, FileSystemLoader USERNAME = 'lava-jenkins' HOSTNAME = 'lava-master-02.internal.efficios.com' OBJSTORE_URL = "https://obj.internal.efficios.com/lava/results/" class TestType(): - baremetal_benchmarks=1 - baremetal_tests=2 - kvm_tests=3 - kvm_fuzzing_tests=4 + """ Enum like for test type """ + baremetal_benchmarks = 1 + baremetal_tests = 2 + kvm_tests = 3 + kvm_fuzzing_tests = 4 values = { 'baremetal-benchmarks' : baremetal_benchmarks, 'baremetal-tests' : baremetal_tests, @@ -46,6 +44,7 @@ class TestType(): } class DeviceType(): + """ Enum like for device type """ x86 = 'x86' kvm = 'qemu' values = { @@ -57,21 +56,23 @@ def get_job_bundle_content(server, job): try: bundle_sha = server.scheduler.job_status(str(job))['bundle_sha1'] bundle = server.dashboard.get(bundle_sha) - except xmlrpc.client.Fault as f: - print('Error while fetching results bundle', f.faultString) - raise f + except xmlrpc.client.Fault as error: + print('Error while fetching results bundle', error.faultString) + raise error return json.loads(bundle['content']) -# Parse the results bundle to see the run-tests testcase -# of the lttng-kernel-tests passed successfully def check_job_all_test_cases_state_count(server, job): + """ + Parse the results bundle to see the run-tests testcase + of the lttng-kernel-tests passed successfully + """ print("Testcase result:") content = server.results.get_testjob_results_yaml(str(job)) testcases = yaml.load(content) - passed_tests=0 - failed_tests=0 + passed_tests = 0 + failed_tests = 0 for testcase in testcases: if testcase['result'] != 'pass': print("\tFAILED {}\n\t\t See http://{}{}".format( @@ -79,27 +80,31 @@ def check_job_all_test_cases_state_count(server, job): HOSTNAME, testcase['url'] )) - failed_tests+=1 + failed_tests += 1 else: - passed_tests+=1 + passed_tests += 1 return (passed_tests, failed_tests) -# Get the benchmark results from the objstore -# save them as CSV files localy def fetch_benchmark_results(build_id): + """ + Get the benchmark results from the objstore + save them as CSV files localy + """ testcases = ['processed_results_close.csv', - 'processed_results_ioctl.csv', - 'processed_results_open_efault.csv', - 'processed_results_open_enoent.csv', - 'processed_results_dup_close.csv', - 'processed_results_raw_syscall_getpid.csv', - 'processed_results_lttng_test_filter.csv'] + 'processed_results_ioctl.csv', + 'processed_results_open_efault.csv', + 'processed_results_open_enoent.csv', + 'processed_results_dup_close.csv', + 'processed_results_raw_syscall_getpid.csv', + 'processed_results_lttng_test_filter.csv'] for testcase in testcases: url = urljoin(OBJSTORE_URL, "{:s}/{:s}".format(build_id, testcase)) urlretrieve(url, testcase) -# Parse the attachment of the testcase to fetch the stdout of the test suite def print_test_output(server, job): + """ + Parse the attachment of the testcase to fetch the stdout of the test suite + """ job_finished, log = server.scheduler.jobs.logs(str(job)) logs = yaml.load(log.data.decode('ascii')) print_line = False @@ -116,7 +121,10 @@ def print_test_output(server, job): if print_line: print("{} {}".format(line['dt'], line['msg'])) -def get_vlttng_cmd(device, lttng_tools_commit, lttng_ust_commit=None): +def get_vlttng_cmd(lttng_tools_commit, lttng_ust_commit=None): + """ + Return vlttng cmd to be used in the job template for setup. + """ vlttng_cmd = 'vlttng --jobs=$(nproc) --profile urcu-master' \ ' --override projects.babeltrace.build-env.PYTHON=python3' \ @@ -163,17 +171,15 @@ def main(): if not args.debug: try: lava_api_key = os.environ['LAVA2_JENKINS_TOKEN'] - except Exception as e: - print('LAVA2_JENKINS_TOKEN not found in the environment variable. Exiting...', e ) + except Exception as error: + print('LAVA2_JENKINS_TOKEN not found in the environment variable. Exiting...', + error) return -1 jinja_loader = FileSystemLoader(os.path.dirname(os.path.realpath(__file__))) jinja_env = Environment(loader=jinja_loader, trim_blocks=True, - lstrip_blocks= True) + lstrip_blocks=True) jinja_template = jinja_env.get_template('template_lava_job.jinja2') - template_source = jinja_env.loader.get_source(jinja_env, 'template_lava_job.jinja2') - parsed_content = jinja_env.parse(template_source) - undef = meta.find_undeclared_variables(parsed_content) test_type = TestType.values[args.type] @@ -184,7 +190,7 @@ def main(): vlttng_path = '/tmp/virtenv' - vlttng_cmd = get_vlttng_cmd(device_type, args.tools_commit, args.ust_commit) + vlttng_cmd = get_vlttng_cmd(args.tools_commit, args.ust_commit) context = dict() context['DeviceType'] = DeviceType @@ -216,7 +222,16 @@ def main(): server = xmlrpc.client.ServerProxy('http://%s:%s@%s/RPC2' % (USERNAME, lava_api_key, HOSTNAME)) - jobid = server.scheduler.submit_job(render) + for attempt in range(10): + try: + jobid = server.scheduler.submit_job(render) + except xmlrpc.client.ProtocolError as error: + print('Protocol error on submit, sleeping and retrying. Attempt #{}' + .format(attempt)) + time.sleep(5) + continue + else: + break print('Lava jobid:{}'.format(jobid)) print('Lava job URL: http://lava-master-02.internal.efficios.com/scheduler/job/{}'.format(jobid)) @@ -224,15 +239,16 @@ def main(): #Check the status of the job every 30 seconds jobstatus = server.scheduler.job_state(jobid)['job_state'] running = False - while jobstatus in ['Submitted','Scheduling','Scheduled','Running']: + while jobstatus in ['Submitted', 'Scheduling', 'Scheduled', 'Running']: if not running and jobstatus == 'Running': print('Job started running') running = True time.sleep(30) try: jobstatus = server.scheduler.job_state(jobid)['job_state'] - except xmlrpc.client.ProtocolError as e: - print('Protocol error, retring') + except xmlrpc.client.ProtocolError as error: + print('Protocol error, retrying') + continue print('Job ended with {} status.'.format(jobstatus)) if jobstatus != 'Finished': @@ -243,13 +259,13 @@ def main(): elif test_type is TestType.baremetal_benchmarks: fetch_benchmark_results(args.build_id) - passed, failed=check_job_all_test_cases_state_count(server, jobid) + passed, failed = check_job_all_test_cases_state_count(server, jobid) print('With {} passed and {} failed Lava test cases.'.format(passed, failed)) - if failed == 0: - return 0 - else: + if failed != 0: return -1 + return 0 + if __name__ == "__main__": sys.exit(main()) -- 2.34.1