[lttng-ci.git] / scripts / babeltrace-benchmark / benchmark.py

#!/usr/bin/python3
# Copyright (C) 2019 - Jonathan Rajotte <jonathan.rajotte-julien@efficios.com>
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.

import json
import os
import tempfile
from statistics import mean
import argparse
import sys
from operator import add

import matplotlib.pyplot as plt
from matplotlib.backends.backend_pdf import PdfPages
from matplotlib.ticker import PercentFormatter

import git
import numpy
import lava_submit

from minio import Minio
from minio.error import NoSuchKey
from minio.error import ResponseError


BENCHMARK_TYPES = ["dummy", "text"]
DEFAULT_BUCKET = "lava"

invalid_commits = {
    "ec9a9794af488a9accce7708a8b0d8188b498789", # Does not build
    "8c99128c640cbce71fb8a6caa15e4c672252b662", # Block on configure
    "f3847c753f1b4f12353c38d97b0577d9993d19fb", # Does not build
    "e0111295f17ddfcc33ec771a8deac505473a06ad", # Does not build
    "d0d4e0ed487ea23aaf0d023513c0a4d86901b79b", # Does not build
    "c24f7ab4dd9edeb5e50b0070fd9d9e8691057dde", # Does not build
    "ce67f5614a4db3b2de4d887eca52135b439b4937", # Does not build
    "80aff5efc66679fd934cef433c0e698694748385", # Does not build
    "f4f11e84942d36fcc8a597d226928bce2ccac4b3", # Does not build
    "ae466a6e1b856d96cf5112a371b4df2b732503ec", # Does not build
}

def json_type(string):
    """
    Argpase type for json args.
    We expect a base dictionary.
    """
    passed_json = json.loads(string)
    if not isinstance(passed_json, dict):
        msg = "%r is not a dict" % string
        raise argparse.ArgumentTypeError(msg)
    return passed_json

def graph_get_color(branch):
    """
    Get the color matching the branch.
    """
    color = {"stable-1.5": "red", "stable-2.0": "green", "master": "blue"}
    return color[branch]


def graph_get_title(branch, benchmark_type):
    """
    Get title for graph based on benchmark type.
    """
    string = {"dummy": "Dummy output", "text": "Text output"}
    return "{} - {}".format(branch, string[benchmark_type])


def get_client():
    """
    Return minio client configured.
    """
    return Minio(
        "obj.internal.efficios.com", access_key="jenkins", secret_key="echo123456"
    )


def get_file(client, prefix, file_name, workdir_name):
    """
    Return the path of the downloaded file.
    Return None on error
    """
    destination = os.path.join(workdir_name, file_name)
    object_name = "{}/{}".format(prefix, file_name)
    try:
        client.fget_object(DEFAULT_BUCKET, object_name, destination)
    except NoSuchKey:
        return None

    return destination


def delete_file(client, prefix, file_name):
    """
    Delete the file on remote.
    """
    object_name = "{}/{}".format(prefix, file_name)
    try:
        client.remove_object(DEFAULT_BUCKET, object_name)
    except ResponseError as err:
        print(err)
    except NoSuchKey:
        pass


def get_git_log(bt_version, cutoff, repo_path):
    """
    Return an ordered (older to newer) list of commits for the bt_version and
    cutoff. WARNING: This changes the git repo HEAD.
    """
    repo = git.Repo(repo_path)
    repo.git.fetch()
    return repo.git.log(
        "{}..origin/{}".format(cutoff, bt_version), "--pretty=format:%H", "--reverse"
    ).split("\n")


def parse_result(result_path):
    """
    Parse the result file. Return a dataset of User time + System time.
    """
    with open(result_path) as result:
        parsed_result = json.load(result)
        return list(
            map(
                add,
                parsed_result["User time (seconds)"],
                parsed_result["System time (seconds)"],
            )
        )


def get_benchmark_results(client, commit, workdir):
    """
    Fetch the benchmark result from a certain commit across all benchmark type.
    """
    results = {}
    benchmark_valid = True
    for b_type in BENCHMARK_TYPES:
        prefix = "/results/benchmarks/babeltrace/{}".format(b_type)
        result_file = get_file(client, prefix, commit, workdir)
        if not result_file:
            """
            Benchmark is either corrupted or not complete.
            """
            return None, benchmark_valid
        results[b_type] = parse_result(result_file)
        if all(i == 0.0 for i in results[b_type]):
            benchmark_valid = False
            print("Invalid benchmark for {}/{}/{}".format(prefix, b_type, commit))
    # The dataset is valid return immediately.
    return results, benchmark_valid


def plot_raw_value(branch, benchmark_type, x_data, y_data, labels, latest_values):
    """
    Plot the graph using the raw value.
    """
    point_x_data = []
    outlier_x_data = []
    point_y_data = []
    outlier_y_data = []
    for pos in range(len(x_data)):
        x = x_data[pos]
        valid_points, outliers = sanitize_dataset(y_data[pos])
        for y in valid_points:
            point_x_data.append(x)
            point_y_data.append(y)
        for y in outliers:
            outlier_x_data.append(x)
            outlier_y_data.append(y)

    plt.plot(
        point_x_data, point_y_data, "o", label=branch, color=graph_get_color(branch)
    )
    plt.plot(outlier_x_data, outlier_y_data, "+", label="outlier", color="black")

    ymax = 1
    if y_data:
        ymin = 0.8 * min([item for sublist in y_data for item in sublist])
        ymax = 1.2 * max([item for sublist in y_data for item in sublist])
    # Put latest of other branches for reference as horizontal line.
    for l_branch, l_result in latest_values.items():
        if not l_result or l_branch == branch:
            continue
        plt.axhline(
            y=l_result,
            label="Latest {}".format(l_branch),
            color=graph_get_color(l_branch),
        )
        if l_result >= ymax:
            ymax = 1.2 * l_result
    ax = plt.gca()
    plt.ylim(ymin=0, ymax=ymax)
    plt.xticks(x_data, labels, rotation=90, family="monospace")
    plt.title(graph_get_title(branch, benchmark_type), fontweight="bold")
    plt.ylabel("User + system time (s)")
    plt.xlabel("Latest commits")
    plt.legend()
    plt.grid(True)

    # Put tick on the right side
    ax.tick_params(labeltop=False, labelright=True)

    plt.tight_layout()
    return


def plot_delta_between_point(
    branch, benchmark_type, x_data, y_data, labels, latest_values
):
    """
    Plot the graph of delta between each sequential commit.
    """
    local_abs_max = 100

    # Transform y_data to a list of  for which the reference is the first
    # element.
    local_y_data = []
    for pos, y in enumerate(y_data):
        if pos == 0:
            local_y_data.append(0.0)
            continue
        local_y_data.append(y - y_data[pos - 1])

    plt.plot(x_data, local_y_data, "o", label=branch, color=graph_get_color(branch))

    # Get max absolute value to align the y axis with zero in the middle.
    if local_y_data:
        local_abs_max = abs(max(local_y_data, key=abs)) * 1.3

    plt.ylim(ymin=local_abs_max * -1, ymax=local_abs_max)

    ax = plt.gca()
    plt.xticks(x_data, labels, rotation=90, family="monospace")
    plt.title(
        graph_get_title(branch, benchmark_type) + " Delta to previous commit",
        fontweight="bold",
    )
    plt.ylabel("Seconds")
    plt.xlabel("Latest commits")
    plt.legend()
    plt.grid(True)

    # Put tick on the right side
    ax.tick_params(labeltop=False, labelright=True)

    plt.tight_layout()
    return


def plot_ratio(branch, benchmark_type, x_data, y_data, labels, latest_values):
    """
    Plot the graph using a ratio using first point as reference (0%).
    """
    reference = 0.01
    y_abs_max = 100

    if y_data:
        reference = y_data[0]

    # Transform y_data to a list of ratio for which the reference is the first
    # element.
    local_y_data = list(map(lambda y: ((y / reference) - 1.0) * 100, y_data))

    plt.plot(x_data, local_y_data, "o", label=branch, color=graph_get_color(branch))

    # Put latest of other branches for reference as horizontal line.
    for l_branch, l_result in latest_values.items():
        if not l_result or l_branch == branch:
            continue
        ratio_l_result = ((l_result / reference) - 1.0) * 100.0
        print(
            "branch {} branch {} value {} l_result {} reference {}".format(
                branch, l_branch, ratio_l_result, l_result, reference
            )
        )
        plt.axhline(
            y=ratio_l_result,
            label="Latest {}".format(l_branch),
            color=graph_get_color(l_branch),
        )

    # Draw the reference line.
    plt.axhline(y=0, label="Reference (leftmost point)", linestyle="-", color="Black")

    # Get max absolute value to align the y axis with zero in the middle.
    if local_y_data:
        local_abs_max = abs(max(local_y_data, key=abs)) * 1.3
        if y_abs_max > 100:
            y_abs_max = local_abs_max

    plt.ylim(ymin=y_abs_max * -1, ymax=y_abs_max)

    ax = plt.gca()
    percent_formatter = PercentFormatter()
    ax.yaxis.set_major_formatter(percent_formatter)
    ax.yaxis.set_minor_formatter(percent_formatter)
    plt.xticks(x_data, labels, rotation=90, family="monospace")
    plt.title(graph_get_title(branch, benchmark_type), fontweight="bold")
    plt.ylabel("Ratio")
    plt.xlabel("Latest commits")
    plt.legend()
    plt.grid(True)

    # Put tick on the right side
    ax.tick_params(labeltop=False, labelright=True)

    plt.tight_layout()
    return


def generate_graph(branches, report_name, git_path):

    # The PDF document
    pdf_pages = PdfPages(report_name)

    client = get_client()
    branch_results = dict()

    # Fetch the results for each branch.
    for branch, cutoff in branches.items():
        commits = get_git_log(branch, cutoff, git_path)
        results = []
        with tempfile.TemporaryDirectory() as workdir:
            for commit in commits:
                b_results, valid = get_benchmark_results(client, commit, workdir)
                if not b_results or not valid:
                    continue
                results.append((commit, b_results))
        branch_results[branch] = results

    for b_type in BENCHMARK_TYPES:
        latest_values = {}
        max_len = 0

        # Find the maximum size for a series inside our series dataset.
        # This is used later to compute the size of the actual plot (pdf).
        # While there gather the comparison value used to draw comparison line
        # between branches.
        for branch, results in branch_results.items():
            max_len = max([max_len, len(results)])
            if results:
                latest_values[branch] = mean(
                    sanitize_dataset(results[-1][1][b_type])[0]
                )
            else:
                latest_values[branch] = None

        for branch, results in branch_results.items():
            # Create a figure instance
            if max_len and max_len > 10:
                width = 0.16 * max_len
            else:
                width = 11.69

            x_data = list(range(len(results)))
            y_data = [c[1][b_type] for c in results]
            labels = [c[0][:8] for c in results]

            fig = plt.figure(figsize=(width, 8.27), dpi=100)
            plot_raw_value(branch, b_type, x_data, y_data, labels, latest_values)
            pdf_pages.savefig(fig)

            # Use the mean of each sanitize dataset here, we do not care for
            # variance for ratio. At least not yet.
            y_data = [mean(sanitize_dataset(c[1][b_type])[0]) for c in results]
            fig = plt.figure(figsize=(width, 8.27), dpi=100)
            plot_ratio(branch, b_type, x_data, y_data, labels, latest_values)
            pdf_pages.savefig(fig)

            fig = plt.figure(figsize=(width, 8.27), dpi=100)
            plot_delta_between_point(
                branch, b_type, x_data, y_data, labels, latest_values
            )
            pdf_pages.savefig(fig)

    pdf_pages.close()


def launch_jobs(branches, git_path, wait_for_completion, debug, force):
    """
    Lauch jobs for all missing results.
    """
    client = get_client()
    commits_to_test = set()
    for branch, cutoff in branches.items():
        commits = [x for x in get_git_log(branch, cutoff, git_path) if x not in invalid_commits]
        with tempfile.TemporaryDirectory() as workdir:
            for commit in commits:
                b_results = get_benchmark_results(client, commit, workdir)[0]
                if b_results and not force:
                    continue
                commits_to_test.add(commit)
    for index, commit in enumerate(commits_to_test):
        print("Job {}/{}".format(index+1, len(commits_to_test)))
        lava_submit.submit(
            commit, wait_for_completion=wait_for_completion, debug=debug
        )


def main():
    """
    Parse arguments and execute as needed.
    """
    bt_branches = {
        "master": "31976fe2d70a8b6b7f8b31b9e0b3bc004d415575",
        "stable-2.0": "07f585356018b4ddfbd0e09c49a14e38977c6973",
        "stable-1.5": "49e98b837a5667130e0d1e062a6bd7985c7c4582",
    }

    parser = argparse.ArgumentParser(description="Babeltrace benchmark utility")
    parser.add_argument(
        "--generate-jobs", action="store_true", help="Generate and send jobs"
    )
    parser.add_argument(
        "--force-jobs", action="store_true", help="Force the queueing of jobs to lava"
    )
    parser.add_argument(
        "--do-not-wait-on-completion",
        action="store_true",
        default=False,
        help="Wait for the completion of each jobs sent. This is useful"
        "for the ci. Otherwise we could end up spaming the lava instance.",
    )
    parser.add_argument(
        "--generate-report",
        action="store_true",
        help="Generate graphs and save them to pdf",
    )
    parser.add_argument(
        "--report-name", default="report.pdf", help="The name of the pdf report."
    )
    parser.add_argument(
        "--debug", action="store_true", default=False, help="Do not send jobs to lava."
    )
    parser.add_argument(
        "--repo-path", help="The location of the git repo to use.", required=True
    )
    parser.add_argument(
        "--overwrite-branches-cutoff",
        help="A dictionary of the form {"
        "'branch_name': 'commit_hash_cutoff',...}. Allow custom graphing and"
        "jobs generation.",
        required=False, type=json_type
    )

    args = parser.parse_args()

    if args.overwrite_branches_cutoff:
        bt_branches = args.overwrite_branches_cutoff

    if not os.path.exists(args.repo_path):
        print("Repository location does not exists.")
        return 1

    if args.generate_jobs:
        print("Launching jobs for:")

        for branch, cutoff in bt_branches.items():
            print("\t Branch {} with cutoff {}".format(branch, cutoff))

        launch_jobs(
            bt_branches,
            args.repo_path,
            not args.do_not_wait_on_completion,
            args.debug,
            args.force_jobs,
        )

    if args.generate_report:
        print("Generating pdf report ({}) for:".format(args.report_name))
        for branch, cutoff in bt_branches.items():
            print("\t Branch {} with cutoff {}".format(branch, cutoff))
        generate_graph(bt_branches, args.report_name, args.repo_path)

    return 0


def sanitize_dataset(dataset):
    """
    Use IRQ 1.5 [1] to remove outlier from the dataset. This is useful to get a
    representative mean without outlier in it.
    [1] https://en.wikipedia.org/wiki/Interquartile_range#Outliers
    """
    sorted_data = sorted(dataset)
    q1, q3 = numpy.percentile(sorted_data, [25, 75])
    iqr = q3 - q1
    lower_bound = q1 - (1.5 * iqr)
    upper_bound = q3 + (1.5 * iqr)
    new_dataset = []
    outliers = []
    for i in dataset:
        if lower_bound <= i <= upper_bound:
            new_dataset.append(i)
        else:
            outliers.append(i)
    return new_dataset, outliers


if __name__ == "__main__":
    sys.exit(main())
Commit	Line	Data
5c65bbc2 JR	1	#!/usr/bin/python3
	2	# Copyright (C) 2019 - Jonathan Rajotte <jonathan.rajotte-julien@efficios.com>
	3	#
	4	# This program is free software: you can redistribute it and/or modify
	5	# it under the terms of the GNU General Public License as published by
	6	# the Free Software Foundation, either version 3 of the License, or
	7	# (at your option) any later version.
	8	#
	9	# This program is distributed in the hope that it will be useful,
	10	# but WITHOUT ANY WARRANTY; without even the implied warranty of
	11	# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	12	# GNU General Public License for more details.
	13	#
	14	# You should have received a copy of the GNU General Public License
	15	# along with this program. If not, see <http://www.gnu.org/licenses/>.
	16
	17	import json
	18	import os
	19	import tempfile
	20	from statistics import mean
	21	import argparse
	22	import sys
	23	from operator import add
	24
	25	import matplotlib.pyplot as plt
	26	from matplotlib.backends.backend_pdf import PdfPages
	27	from matplotlib.ticker import PercentFormatter
	28
	29	import git
	30	import numpy
	31	import lava_submit
	32
	33	from minio import Minio
	34	from minio.error import NoSuchKey
	35	from minio.error import ResponseError
	36
	37
	38	BENCHMARK_TYPES = ["dummy", "text"]
	39	DEFAULT_BUCKET = "lava"
	40
e085717c	41	invalid_commits = {
c19fa307 KS	42	"ec9a9794af488a9accce7708a8b0d8188b498789", # Does not build
	43	"8c99128c640cbce71fb8a6caa15e4c672252b662", # Block on configure
	44	"f3847c753f1b4f12353c38d97b0577d9993d19fb", # Does not build
	45	"e0111295f17ddfcc33ec771a8deac505473a06ad", # Does not build
	46	"d0d4e0ed487ea23aaf0d023513c0a4d86901b79b", # Does not build
	47	"c24f7ab4dd9edeb5e50b0070fd9d9e8691057dde", # Does not build
	48	"ce67f5614a4db3b2de4d887eca52135b439b4937", # Does not build
	49	"80aff5efc66679fd934cef433c0e698694748385", # Does not build
	50	"f4f11e84942d36fcc8a597d226928bce2ccac4b3", # Does not build
	51	"ae466a6e1b856d96cf5112a371b4df2b732503ec", # Does not build
	52	}
5c65bbc2	53
cf595cda JR	54	def json_type(string):
	55	"""
	56	Argpase type for json args.
	57	We expect a base dictionary.
	58	"""
	59	passed_json = json.loads(string)
	60	if not isinstance(passed_json, dict):
	61	msg = "%r is not a dict" % string
	62	raise argparse.ArgumentTypeError(msg)
	63	return passed_json
	64
5c65bbc2 JR	65	def graph_get_color(branch):
	66	"""
	67	Get the color matching the branch.
	68	"""
	69	color = {"stable-1.5": "red", "stable-2.0": "green", "master": "blue"}
	70	return color[branch]
	71
	72
	73	def graph_get_title(branch, benchmark_type):
	74	"""
	75	Get title for graph based on benchmark type.
	76	"""
	77	string = {"dummy": "Dummy output", "text": "Text output"}
	78	return "{} - {}".format(branch, string[benchmark_type])
	79
	80
	81	def get_client():
	82	"""
	83	Return minio client configured.
	84	"""
	85	return Minio(
	86	"obj.internal.efficios.com", access_key="jenkins", secret_key="echo123456"
	87	)
	88
	89
	90	def get_file(client, prefix, file_name, workdir_name):
	91	"""
	92	Return the path of the downloaded file.
	93	Return None on error
	94	"""
	95	destination = os.path.join(workdir_name, file_name)
	96	object_name = "{}/{}".format(prefix, file_name)
	97	try:
	98	client.fget_object(DEFAULT_BUCKET, object_name, destination)
	99	except NoSuchKey:
	100	return None
	101
	102	return destination
	103
	104
	105	def delete_file(client, prefix, file_name):
	106	"""
	107	Delete the file on remote.
	108	"""
	109	object_name = "{}/{}".format(prefix, file_name)
	110	try:
	111	client.remove_object(DEFAULT_BUCKET, object_name)
	112	except ResponseError as err:
	113	print(err)
	114	except NoSuchKey:
	115	pass
	116
	117
	118	def get_git_log(bt_version, cutoff, repo_path):
	119	"""
	120	Return an ordered (older to newer) list of commits for the bt_version and
	121	cutoff. WARNING: This changes the git repo HEAD.
	122	"""
	123	repo = git.Repo(repo_path)
	124	repo.git.fetch()
	125	return repo.git.log(
	126	"{}..origin/{}".format(cutoff, bt_version), "--pretty=format:%H", "--reverse"
	127	).split("\n")
	128
129
130	def parse_result(result_path):
131	"""
132	Parse the result file. Return a dataset of User time + System time.
133	"""
134	with open(result_path) as result:
135	parsed_result = json.load(result)
136	return list(
137	map(
138	add,
139	parsed_result["User time (seconds)"],
140	parsed_result["System time (seconds)"],
141	)
142	)
143
144
145	def get_benchmark_results(client, commit, workdir):
146	"""
147	Fetch the benchmark result from a certain commit across all benchmark type.
148	"""
149	results = {}
150	benchmark_valid = True
151	for b_type in BENCHMARK_TYPES:
cdace203	152	prefix = "/results/benchmarks/babeltrace/{}".format(b_type)
5c65bbc2 JR	153	result_file = get_file(client, prefix, commit, workdir)
	154	if not result_file:
	155	"""
	156	Benchmark is either corrupted or not complete.
	157	"""
	158	return None, benchmark_valid
	159	results[b_type] = parse_result(result_file)
	160	if all(i == 0.0 for i in results[b_type]):
	161	benchmark_valid = False
	162	print("Invalid benchmark for {}/{}/{}".format(prefix, b_type, commit))
	163	# The dataset is valid return immediately.
	164	return results, benchmark_valid
	165
	166
	167	def plot_raw_value(branch, benchmark_type, x_data, y_data, labels, latest_values):
	168	"""
	169	Plot the graph using the raw value.
	170	"""
	171	point_x_data = []
	172	outlier_x_data = []
	173	point_y_data = []
	174	outlier_y_data = []
	175	for pos in range(len(x_data)):
	176	x = x_data[pos]
	177	valid_points, outliers = sanitize_dataset(y_data[pos])
	178	for y in valid_points:
	179	point_x_data.append(x)
	180	point_y_data.append(y)
	181	for y in outliers:
	182	outlier_x_data.append(x)
	183	outlier_y_data.append(y)
	184
	185	plt.plot(
	186	point_x_data, point_y_data, "o", label=branch, color=graph_get_color(branch)
	187	)
	188	plt.plot(outlier_x_data, outlier_y_data, "+", label="outlier", color="black")
	189
5c65bbc2 JR	190	ymax = 1
	191	if y_data:
	192	ymin = 0.8 * min([item for sublist in y_data for item in sublist])
	193	ymax = 1.2 * max([item for sublist in y_data for item in sublist])
	194	# Put latest of other branches for reference as horizontal line.
	195	for l_branch, l_result in latest_values.items():
	196	if not l_result or l_branch == branch:
	197	continue
	198	plt.axhline(
	199	y=l_result,
	200	label="Latest {}".format(l_branch),
	201	color=graph_get_color(l_branch),
	202	)
5c65bbc2 JR	203	if l_result >= ymax:
5c65bbc2 JR	204	ymax = 1.2 * l_result
056f7519	205	ax = plt.gca()
925d7893	206	plt.ylim(ymin=0, ymax=ymax)
5c65bbc2 JR	207	plt.xticks(x_data, labels, rotation=90, family="monospace")
	208	plt.title(graph_get_title(branch, benchmark_type), fontweight="bold")
	209	plt.ylabel("User + system time (s)")
	210	plt.xlabel("Latest commits")
	211	plt.legend()
526aab11	212	plt.grid(True)
5c65bbc2	213
056f7519 JR	214	# Put tick on the right side
	215	ax.tick_params(labeltop=False, labelright=True)
	216
5c65bbc2 JR	217	plt.tight_layout()
	218	return
	219
09de7b53 JR	220
	221	def plot_delta_between_point(
	222	branch, benchmark_type, x_data, y_data, labels, latest_values
	223	):
20defd5e JR	224	"""
	225	Plot the graph of delta between each sequential commit.
	226	"""
	227	local_abs_max = 100
	228
	229	# Transform y_data to a list of for which the reference is the first
	230	# element.
	231	local_y_data = []
	232	for pos, y in enumerate(y_data):
	233	if pos == 0:
	234	local_y_data.append(0.0)
	235	continue
	236	local_y_data.append(y - y_data[pos - 1])
	237
	238	plt.plot(x_data, local_y_data, "o", label=branch, color=graph_get_color(branch))
	239
	240	# Get max absolute value to align the y axis with zero in the middle.
	241	if local_y_data:
	242	local_abs_max = abs(max(local_y_data, key=abs)) * 1.3
	243
	244	plt.ylim(ymin=local_abs_max * -1, ymax=local_abs_max)
	245
	246	ax = plt.gca()
	247	plt.xticks(x_data, labels, rotation=90, family="monospace")
09de7b53 JR	248	plt.title(
	249	graph_get_title(branch, benchmark_type) + " Delta to previous commit",
	250	fontweight="bold",
	251	)
20defd5e JR	252	plt.ylabel("Seconds")
	253	plt.xlabel("Latest commits")
	254	plt.legend()
526aab11	255	plt.grid(True)
20defd5e JR	256
	257	# Put tick on the right side
	258	ax.tick_params(labeltop=False, labelright=True)
	259
	260	plt.tight_layout()
	261	return
5c65bbc2	262
09de7b53	263
5c65bbc2 JR	264	def plot_ratio(branch, benchmark_type, x_data, y_data, labels, latest_values):
	265	"""
	266	Plot the graph using a ratio using first point as reference (0%).
	267	"""
	268	reference = 0.01
	269	y_abs_max = 100
	270
	271	if y_data:
	272	reference = y_data[0]
	273
	274	# Transform y_data to a list of ratio for which the reference is the first
	275	# element.
	276	local_y_data = list(map(lambda y: ((y / reference) - 1.0) * 100, y_data))
	277
	278	plt.plot(x_data, local_y_data, "o", label=branch, color=graph_get_color(branch))
	279
	280	# Put latest of other branches for reference as horizontal line.
	281	for l_branch, l_result in latest_values.items():
	282	if not l_result or l_branch == branch:
	283	continue
	284	ratio_l_result = ((l_result / reference) - 1.0) * 100.0
	285	print(
	286	"branch {} branch {} value {} l_result {} reference {}".format(
	287	branch, l_branch, ratio_l_result, l_result, reference
	288	)
	289	)
	290	plt.axhline(
	291	y=ratio_l_result,
	292	label="Latest {}".format(l_branch),
	293	color=graph_get_color(l_branch),
	294	)
	295
	296	# Draw the reference line.
	297	plt.axhline(y=0, label="Reference (leftmost point)", linestyle="-", color="Black")
	298
	299	# Get max absolute value to align the y axis with zero in the middle.
	300	if local_y_data:
	301	local_abs_max = abs(max(local_y_data, key=abs)) * 1.3
	302	if y_abs_max > 100:
	303	y_abs_max = local_abs_max
	304
	305	plt.ylim(ymin=y_abs_max * -1, ymax=y_abs_max)
	306
	307	ax = plt.gca()
	308	percent_formatter = PercentFormatter()
	309	ax.yaxis.set_major_formatter(percent_formatter)
	310	ax.yaxis.set_minor_formatter(percent_formatter)
	311	plt.xticks(x_data, labels, rotation=90, family="monospace")
	312	plt.title(graph_get_title(branch, benchmark_type), fontweight="bold")
	313	plt.ylabel("Ratio")
	314	plt.xlabel("Latest commits")
	315	plt.legend()
526aab11	316	plt.grid(True)
5c65bbc2	317
056f7519 JR	318	# Put tick on the right side
	319	ax.tick_params(labeltop=False, labelright=True)
	320
5c65bbc2 JR	321	plt.tight_layout()
	322	return
	323
09de7b53	324
5c65bbc2 JR	325	def generate_graph(branches, report_name, git_path):
	326
	327	# The PDF document
	328	pdf_pages = PdfPages(report_name)
	329
	330	client = get_client()
	331	branch_results = dict()
	332
	333	# Fetch the results for each branch.
	334	for branch, cutoff in branches.items():
	335	commits = get_git_log(branch, cutoff, git_path)
	336	results = []
	337	with tempfile.TemporaryDirectory() as workdir:
	338	for commit in commits:
	339	b_results, valid = get_benchmark_results(client, commit, workdir)
	340	if not b_results or not valid:
	341	continue
	342	results.append((commit, b_results))
	343	branch_results[branch] = results
	344
	345	for b_type in BENCHMARK_TYPES:
	346	latest_values = {}
	347	max_len = 0
	348
	349	# Find the maximum size for a series inside our series dataset.
	350	# This is used later to compute the size of the actual plot (pdf).
	351	# While there gather the comparison value used to draw comparison line
	352	# between branches.
	353	for branch, results in branch_results.items():
	354	max_len = max([max_len, len(results)])
	355	if results:
	356	latest_values[branch] = mean(
	357	sanitize_dataset(results[-1][1][b_type])[0]
	358	)
	359	else:
	360	latest_values[branch] = None
	361
	362	for branch, results in branch_results.items():
	363	# Create a figure instance
	364	if max_len and max_len > 10:
	365	width = 0.16 * max_len
	366	else:
	367	width = 11.69
	368
	369	x_data = list(range(len(results)))
	370	y_data = [c[1][b_type] for c in results]
	371	labels = [c[0][:8] for c in results]
	372
	373	fig = plt.figure(figsize=(width, 8.27), dpi=100)
	374	plot_raw_value(branch, b_type, x_data, y_data, labels, latest_values)
	375	pdf_pages.savefig(fig)
	376
5c65bbc2 JR	377	# Use the mean of each sanitize dataset here, we do not care for
	378	# variance for ratio. At least not yet.
	379	y_data = [mean(sanitize_dataset(c[1][b_type])[0]) for c in results]
20defd5e	380	fig = plt.figure(figsize=(width, 8.27), dpi=100)
5c65bbc2 JR	381	plot_ratio(branch, b_type, x_data, y_data, labels, latest_values)
	382	pdf_pages.savefig(fig)
	383
20defd5e	384	fig = plt.figure(figsize=(width, 8.27), dpi=100)
09de7b53 JR	385	plot_delta_between_point(
	386	branch, b_type, x_data, y_data, labels, latest_values
	387	)
20defd5e JR	388	pdf_pages.savefig(fig)
20defd5e JR	389
5c65bbc2 JR	390	pdf_pages.close()
	391
	392
d373c66e	393	def launch_jobs(branches, git_path, wait_for_completion, debug, force):
5c65bbc2 JR	394	"""
	395	Lauch jobs for all missing results.
	396	"""
	397	client = get_client()
73fe8ab4	398	commits_to_test = set()
5c65bbc2	399	for branch, cutoff in branches.items():
73fe8ab4	400	commits = [x for x in get_git_log(branch, cutoff, git_path) if x not in invalid_commits]
5c65bbc2 JR	401	with tempfile.TemporaryDirectory() as workdir:
	402	for commit in commits:
	403	b_results = get_benchmark_results(client, commit, workdir)[0]
d373c66e	404	if b_results and not force:
5c65bbc2	405	continue
73fe8ab4 KS	406	commits_to_test.add(commit)
	407	for index, commit in enumerate(commits_to_test):
	408	print("Job {}/{}".format(index+1, len(commits_to_test)))
	409	lava_submit.submit(
	410	commit, wait_for_completion=wait_for_completion, debug=debug
	411	)
5c65bbc2 JR	412
	413
	414	def main():
	415	"""
	416	Parse arguments and execute as needed.
	417	"""
	418	bt_branches = {
	419	"master": "31976fe2d70a8b6b7f8b31b9e0b3bc004d415575",
	420	"stable-2.0": "07f585356018b4ddfbd0e09c49a14e38977c6973",
	421	"stable-1.5": "49e98b837a5667130e0d1e062a6bd7985c7c4582",
	422	}
	423
	424	parser = argparse.ArgumentParser(description="Babeltrace benchmark utility")
	425	parser.add_argument(
	426	"--generate-jobs", action="store_true", help="Generate and send jobs"
	427	)
d373c66e JR	428	parser.add_argument(
	429	"--force-jobs", action="store_true", help="Force the queueing of jobs to lava"
	430	)
5c65bbc2 JR	431	parser.add_argument(
	432	"--do-not-wait-on-completion",
	433	action="store_true",
	434	default=False,
	435	help="Wait for the completion of each jobs sent. This is useful"
	436	"for the ci. Otherwise we could end up spaming the lava instance.",
	437	)
	438	parser.add_argument(
	439	"--generate-report",
	440	action="store_true",
	441	help="Generate graphs and save them to pdf",
	442	)
	443	parser.add_argument(
	444	"--report-name", default="report.pdf", help="The name of the pdf report."
	445	)
	446	parser.add_argument(
	447	"--debug", action="store_true", default=False, help="Do not send jobs to lava."
	448	)
	449	parser.add_argument(
	450	"--repo-path", help="The location of the git repo to use.", required=True
	451	)
cf595cda JR	452	parser.add_argument(
	453	"--overwrite-branches-cutoff",
	454	help="A dictionary of the form {"
	455	"'branch_name': 'commit_hash_cutoff',...}. Allow custom graphing and"
	456	"jobs generation.",
	457	required=False, type=json_type
	458	)
5c65bbc2 JR	459
	460	args = parser.parse_args()
	461
cf595cda JR	462	if args.overwrite_branches_cutoff:
	463	bt_branches = args.overwrite_branches_cutoff
	464
5c65bbc2 JR	465	if not os.path.exists(args.repo_path):
	466	print("Repository location does not exists.")
	467	return 1
	468
	469	if args.generate_jobs:
	470	print("Launching jobs for:")
d373c66e	471
5c65bbc2 JR	472	for branch, cutoff in bt_branches.items():
5c65bbc2 JR	473	print("\t Branch {} with cutoff {}".format(branch, cutoff))
d373c66e	474
5c65bbc2	475	launch_jobs(
d373c66e JR	476	bt_branches,
	477	args.repo_path,
	478	not args.do_not_wait_on_completion,
	479	args.debug,
	480	args.force_jobs,
5c65bbc2 JR	481	)
	482
	483	if args.generate_report:
	484	print("Generating pdf report ({}) for:".format(args.report_name))
	485	for branch, cutoff in bt_branches.items():
	486	print("\t Branch {} with cutoff {}".format(branch, cutoff))
	487	generate_graph(bt_branches, args.report_name, args.repo_path)
	488
	489	return 0
	490
	491
	492	def sanitize_dataset(dataset):
	493	"""
	494	Use IRQ 1.5 [1] to remove outlier from the dataset. This is useful to get a
	495	representative mean without outlier in it.
	496	[1] https://en.wikipedia.org/wiki/Interquartile_range#Outliers
	497	"""
	498	sorted_data = sorted(dataset)
	499	q1, q3 = numpy.percentile(sorted_data, [25, 75])
	500	iqr = q3 - q1
	501	lower_bound = q1 - (1.5 * iqr)
	502	upper_bound = q3 + (1.5 * iqr)
	503	new_dataset = []
	504	outliers = []
	505	for i in dataset:
	506	if lower_bound <= i <= upper_bound:
	507	new_dataset.append(i)
	508	else:
	509	outliers.append(i)
	510	return new_dataset, outliers
	511
	512
	513	if __name__ == "__main__":
	514	sys.exit(main())