[lttng-ci.git] / scripts / babeltrace-benchmark / benchmark.py

#!/usr/bin/python3
# Copyright (C) 2019 - Jonathan Rajotte <jonathan.rajotte-julien@efficios.com>
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.

import json
import os
import tempfile
from statistics import mean
import argparse
import sys
from operator import add

import matplotlib.pyplot as plt
from matplotlib.backends.backend_pdf import PdfPages
from matplotlib.ticker import PercentFormatter

import git
import numpy
import lava_submit

from minio import Minio
from minio.error import NoSuchKey
from minio.error import ResponseError


BENCHMARK_TYPES = ["dummy", "text"]
DEFAULT_BUCKET = "lava"


def graph_get_color(branch):
    """
    Get the color matching the branch.
    """
    color = {"stable-1.5": "red", "stable-2.0": "green", "master": "blue"}
    return color[branch]


def graph_get_title(branch, benchmark_type):
    """
    Get title for graph based on benchmark type.
    """
    string = {"dummy": "Dummy output", "text": "Text output"}
    return "{} - {}".format(branch, string[benchmark_type])


def get_client():
    """
    Return minio client configured.
    """
    return Minio(
        "obj.internal.efficios.com", access_key="jenkins", secret_key="echo123456"
    )


def get_file(client, prefix, file_name, workdir_name):
    """
    Return the path of the downloaded file.
    Return None on error
    """
    destination = os.path.join(workdir_name, file_name)
    object_name = "{}/{}".format(prefix, file_name)
    try:
        client.fget_object(DEFAULT_BUCKET, object_name, destination)
    except NoSuchKey:
        return None

    return destination


def delete_file(client, prefix, file_name):
    """
    Delete the file on remote.
    """
    object_name = "{}/{}".format(prefix, file_name)
    try:
        client.remove_object(DEFAULT_BUCKET, object_name)
    except ResponseError as err:
        print(err)
    except NoSuchKey:
        pass


def get_git_log(bt_version, cutoff, repo_path):
    """
    Return an ordered (older to newer) list of commits for the bt_version and
    cutoff. WARNING: This changes the git repo HEAD.
    """
    repo = git.Repo(repo_path)
    repo.git.fetch()
    return repo.git.log(
        "{}..origin/{}".format(cutoff, bt_version), "--pretty=format:%H", "--reverse"
    ).split("\n")


def parse_result(result_path):
    """
    Parse the result file. Return a dataset of User time + System time.
    """
    with open(result_path) as result:
        parsed_result = json.load(result)
        return list(
            map(
                add,
                parsed_result["User time (seconds)"],
                parsed_result["System time (seconds)"],
            )
        )


def get_benchmark_results(client, commit, workdir):
    """
    Fetch the benchmark result from a certain commit across all benchmark type.
    """
    results = {}
    benchmark_valid = True
    for b_type in BENCHMARK_TYPES:
        prefix = "/results/benchmarks/babeltrace/{}/".format(b_type)
        result_file = get_file(client, prefix, commit, workdir)
        if not result_file:
            """
            Benchmark is either corrupted or not complete.
            """
            return None, benchmark_valid
        results[b_type] = parse_result(result_file)
        if all(i == 0.0 for i in results[b_type]):
            benchmark_valid = False
            print("Invalid benchmark for {}/{}/{}".format(prefix, b_type, commit))
    # The dataset is valid return immediately.
    return results, benchmark_valid


def plot_raw_value(branch, benchmark_type, x_data, y_data, labels, latest_values):
    """
    Plot the graph using the raw value.
    """
    point_x_data = []
    outlier_x_data = []
    point_y_data = []
    outlier_y_data = []
    for pos in range(len(x_data)):
        x = x_data[pos]
        valid_points, outliers = sanitize_dataset(y_data[pos])
        for y in valid_points:
            point_x_data.append(x)
            point_y_data.append(y)
        for y in outliers:
            outlier_x_data.append(x)
            outlier_y_data.append(y)

    plt.plot(
        point_x_data, point_y_data, "o", label=branch, color=graph_get_color(branch)
    )
    plt.plot(outlier_x_data, outlier_y_data, "+", label="outlier", color="black")

    ymax = 1
    if y_data:
        ymin = 0.8 * min([item for sublist in y_data for item in sublist])
        ymax = 1.2 * max([item for sublist in y_data for item in sublist])
    # Put latest of other branches for reference as horizontal line.
    for l_branch, l_result in latest_values.items():
        if not l_result or l_branch == branch:
            continue
        plt.axhline(
            y=l_result,
            label="Latest {}".format(l_branch),
            color=graph_get_color(l_branch),
        )
        if l_result >= ymax:
            ymax = 1.2 * l_result
    ax = plt.gca()
    plt.ylim(ymin=0, ymax=ymax)
    plt.xticks(x_data, labels, rotation=90, family="monospace")
    plt.title(graph_get_title(branch, benchmark_type), fontweight="bold")
    plt.ylabel("User + system time (s)")
    plt.xlabel("Latest commits")
    plt.legend()

    # Put tick on the right side
    ax.tick_params(labeltop=False, labelright=True)

    plt.tight_layout()
    return

def plot_delta_between_point(branch, benchmark_type, x_data, y_data, labels, latest_values):
    """
    Plot the graph of delta between each sequential commit.
    """
    local_abs_max = 100

    # Transform y_data to a list of  for which the reference is the first
    # element.
    local_y_data = []
    for pos, y in enumerate(y_data):
        if pos == 0:
            local_y_data.append(0.0)
            continue
        local_y_data.append(y - y_data[pos - 1])

    plt.plot(x_data, local_y_data, "o", label=branch, color=graph_get_color(branch))

    # Get max absolute value to align the y axis with zero in the middle.
    if local_y_data:
        local_abs_max = abs(max(local_y_data, key=abs)) * 1.3

    plt.ylim(ymin=local_abs_max * -1, ymax=local_abs_max)

    ax = plt.gca()
    plt.xticks(x_data, labels, rotation=90, family="monospace")
    plt.title(graph_get_title(branch, benchmark_type) + " Delta to previous commit", fontweight="bold")
    plt.ylabel("Seconds")
    plt.xlabel("Latest commits")
    plt.legend()

    # Put tick on the right side
    ax.tick_params(labeltop=False, labelright=True)

    plt.tight_layout()
    return

def plot_ratio(branch, benchmark_type, x_data, y_data, labels, latest_values):
    """
    Plot the graph using a ratio using first point as reference (0%).
    """
    reference = 0.01
    y_abs_max = 100

    if y_data:
        reference = y_data[0]

    # Transform y_data to a list of ratio for which the reference is the first
    # element.
    local_y_data = list(map(lambda y: ((y / reference) - 1.0) * 100, y_data))

    plt.plot(x_data, local_y_data, "o", label=branch, color=graph_get_color(branch))

    # Put latest of other branches for reference as horizontal line.
    for l_branch, l_result in latest_values.items():
        if not l_result or l_branch == branch:
            continue
        ratio_l_result = ((l_result / reference) - 1.0) * 100.0
        print(
            "branch {} branch {} value {} l_result {} reference {}".format(
                branch, l_branch, ratio_l_result, l_result, reference
            )
        )
        plt.axhline(
            y=ratio_l_result,
            label="Latest {}".format(l_branch),
            color=graph_get_color(l_branch),
        )

    # Draw the reference line.
    plt.axhline(y=0, label="Reference (leftmost point)", linestyle="-", color="Black")

    # Get max absolute value to align the y axis with zero in the middle.
    if local_y_data:
        local_abs_max = abs(max(local_y_data, key=abs)) * 1.3
        if y_abs_max > 100:
            y_abs_max = local_abs_max

    plt.ylim(ymin=y_abs_max * -1, ymax=y_abs_max)

    ax = plt.gca()
    percent_formatter = PercentFormatter()
    ax.yaxis.set_major_formatter(percent_formatter)
    ax.yaxis.set_minor_formatter(percent_formatter)
    plt.xticks(x_data, labels, rotation=90, family="monospace")
    plt.title(graph_get_title(branch, benchmark_type), fontweight="bold")
    plt.ylabel("Ratio")
    plt.xlabel("Latest commits")
    plt.legend()

    # Put tick on the right side
    ax.tick_params(labeltop=False, labelright=True)

    plt.tight_layout()
    return

def generate_graph(branches, report_name, git_path):

    # The PDF document
    pdf_pages = PdfPages(report_name)

    client = get_client()
    branch_results = dict()

    # Fetch the results for each branch.
    for branch, cutoff in branches.items():
        commits = get_git_log(branch, cutoff, git_path)
        results = []
        with tempfile.TemporaryDirectory() as workdir:
            for commit in commits:
                b_results, valid = get_benchmark_results(client, commit, workdir)
                if not b_results or not valid:
                    continue
                results.append((commit, b_results))
        branch_results[branch] = results

    for b_type in BENCHMARK_TYPES:
        latest_values = {}
        max_len = 0

        # Find the maximum size for a series inside our series dataset.
        # This is used later to compute the size of the actual plot (pdf).
        # While there gather the comparison value used to draw comparison line
        # between branches.
        for branch, results in branch_results.items():
            max_len = max([max_len, len(results)])
            if results:
                latest_values[branch] = mean(
                    sanitize_dataset(results[-1][1][b_type])[0]
                )
            else:
                latest_values[branch] = None

        for branch, results in branch_results.items():
            # Create a figure instance
            if max_len and max_len > 10:
                width = 0.16 * max_len
            else:
                width = 11.69

            x_data = list(range(len(results)))
            y_data = [c[1][b_type] for c in results]
            labels = [c[0][:8] for c in results]

            fig = plt.figure(figsize=(width, 8.27), dpi=100)
            plot_raw_value(branch, b_type, x_data, y_data, labels, latest_values)
            pdf_pages.savefig(fig)

            # Use the mean of each sanitize dataset here, we do not care for
            # variance for ratio. At least not yet.
            y_data = [mean(sanitize_dataset(c[1][b_type])[0]) for c in results]
            fig = plt.figure(figsize=(width, 8.27), dpi=100)
            plot_ratio(branch, b_type, x_data, y_data, labels, latest_values)
            pdf_pages.savefig(fig)

            fig = plt.figure(figsize=(width, 8.27), dpi=100)
            plot_delta_between_point(branch, b_type, x_data, y_data, labels, latest_values)
            pdf_pages.savefig(fig)

    pdf_pages.close()


def launch_jobs(branches, git_path, wait_for_completion, debug):
    """
    Lauch jobs for all missing results.
    """
    client = get_client()
    for branch, cutoff in branches.items():
        commits = get_git_log(branch, cutoff, git_path)

        with tempfile.TemporaryDirectory() as workdir:
            for commit in commits:
                b_results = get_benchmark_results(client, commit, workdir)[0]
                if b_results:
                    continue
                lava_submit.submit(
                    commit, wait_for_completion=wait_for_completion, debug=debug
                )


def main():
    """
    Parse arguments and execute as needed.
    """
    bt_branches = {
        "master": "31976fe2d70a8b6b7f8b31b9e0b3bc004d415575",
        "stable-2.0": "07f585356018b4ddfbd0e09c49a14e38977c6973",
        "stable-1.5": "49e98b837a5667130e0d1e062a6bd7985c7c4582",
    }

    parser = argparse.ArgumentParser(description="Babeltrace benchmark utility")
    parser.add_argument(
        "--generate-jobs", action="store_true", help="Generate and send jobs"
    )
    parser.add_argument(
        "--do-not-wait-on-completion",
        action="store_true",
        default=False,
        help="Wait for the completion of each jobs sent. This is useful"
        "for the ci. Otherwise we could end up spaming the lava instance.",
    )
    parser.add_argument(
        "--generate-report",
        action="store_true",
        help="Generate graphs and save them to pdf",
    )
    parser.add_argument(
        "--report-name", default="report.pdf", help="The name of the pdf report."
    )
    parser.add_argument(
        "--debug", action="store_true", default=False, help="Do not send jobs to lava."
    )
    parser.add_argument(
        "--repo-path", help="The location of the git repo to use.", required=True
    )

    args = parser.parse_args()

    if not os.path.exists(args.repo_path):
        print("Repository location does not exists.")
        return 1

    if args.generate_jobs:
        print("Launching jobs for:")
        for branch, cutoff in bt_branches.items():
            print("\t Branch {} with cutoff {}".format(branch, cutoff))
        launch_jobs(
            bt_branches, args.repo_path, not args.do_not_wait_on_completion, args.debug
        )

    if args.generate_report:
        print("Generating pdf report ({}) for:".format(args.report_name))
        for branch, cutoff in bt_branches.items():
            print("\t Branch {} with cutoff {}".format(branch, cutoff))
        generate_graph(bt_branches, args.report_name, args.repo_path)

    return 0


def sanitize_dataset(dataset):
    """
    Use IRQ 1.5 [1] to remove outlier from the dataset. This is useful to get a
    representative mean without outlier in it.
    [1] https://en.wikipedia.org/wiki/Interquartile_range#Outliers
    """
    sorted_data = sorted(dataset)
    q1, q3 = numpy.percentile(sorted_data, [25, 75])
    iqr = q3 - q1
    lower_bound = q1 - (1.5 * iqr)
    upper_bound = q3 + (1.5 * iqr)
    new_dataset = []
    outliers = []
    for i in dataset:
        if lower_bound <= i <= upper_bound:
            new_dataset.append(i)
        else:
            outliers.append(i)
    return new_dataset, outliers


if __name__ == "__main__":
    sys.exit(main())
Commit	Line	Data
5c65bbc2 JR	1	#!/usr/bin/python3
	2	# Copyright (C) 2019 - Jonathan Rajotte <jonathan.rajotte-julien@efficios.com>
	3	#
	4	# This program is free software: you can redistribute it and/or modify
	5	# it under the terms of the GNU General Public License as published by
	6	# the Free Software Foundation, either version 3 of the License, or
	7	# (at your option) any later version.
	8	#
	9	# This program is distributed in the hope that it will be useful,
	10	# but WITHOUT ANY WARRANTY; without even the implied warranty of
	11	# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	12	# GNU General Public License for more details.
	13	#
	14	# You should have received a copy of the GNU General Public License
	15	# along with this program. If not, see <http://www.gnu.org/licenses/>.
	16
	17	import json
	18	import os
	19	import tempfile
	20	from statistics import mean
	21	import argparse
	22	import sys
	23	from operator import add
	24
	25	import matplotlib.pyplot as plt
	26	from matplotlib.backends.backend_pdf import PdfPages
	27	from matplotlib.ticker import PercentFormatter
	28
	29	import git
	30	import numpy
	31	import lava_submit
	32
	33	from minio import Minio
	34	from minio.error import NoSuchKey
	35	from minio.error import ResponseError
	36
	37
	38	BENCHMARK_TYPES = ["dummy", "text"]
	39	DEFAULT_BUCKET = "lava"
	40
	41
	42	def graph_get_color(branch):
	43	"""
	44	Get the color matching the branch.
	45	"""
	46	color = {"stable-1.5": "red", "stable-2.0": "green", "master": "blue"}
	47	return color[branch]
	48
	49
	50	def graph_get_title(branch, benchmark_type):
	51	"""
	52	Get title for graph based on benchmark type.
	53	"""
	54	string = {"dummy": "Dummy output", "text": "Text output"}
	55	return "{} - {}".format(branch, string[benchmark_type])
	56
	57
	58	def get_client():
	59	"""
	60	Return minio client configured.
	61	"""
	62	return Minio(
	63	"obj.internal.efficios.com", access_key="jenkins", secret_key="echo123456"
	64	)
65
66
67	def get_file(client, prefix, file_name, workdir_name):
68	"""
69	Return the path of the downloaded file.
70	Return None on error
71	"""
72	destination = os.path.join(workdir_name, file_name)
73	object_name = "{}/{}".format(prefix, file_name)
74	try:
75	client.fget_object(DEFAULT_BUCKET, object_name, destination)
76	except NoSuchKey:
77	return None
78
79	return destination
80
81
82	def delete_file(client, prefix, file_name):
83	"""
84	Delete the file on remote.
85	"""
86	object_name = "{}/{}".format(prefix, file_name)
87	try:
88	client.remove_object(DEFAULT_BUCKET, object_name)
89	except ResponseError as err:
90	print(err)
91	except NoSuchKey:
92	pass
93
94
95	def get_git_log(bt_version, cutoff, repo_path):
96	"""
97	Return an ordered (older to newer) list of commits for the bt_version and
98	cutoff. WARNING: This changes the git repo HEAD.
99	"""
100	repo = git.Repo(repo_path)
101	repo.git.fetch()
102	return repo.git.log(
103	"{}..origin/{}".format(cutoff, bt_version), "--pretty=format:%H", "--reverse"
104	).split("\n")
105
106
107	def parse_result(result_path):
108	"""
109	Parse the result file. Return a dataset of User time + System time.
110	"""
111	with open(result_path) as result:
112	parsed_result = json.load(result)
113	return list(
114	map(
115	add,
116	parsed_result["User time (seconds)"],
117	parsed_result["System time (seconds)"],
118	)
119	)
120
121
122	def get_benchmark_results(client, commit, workdir):
123	"""
124	Fetch the benchmark result from a certain commit across all benchmark type.
125	"""
126	results = {}
127	benchmark_valid = True
128	for b_type in BENCHMARK_TYPES:
129	prefix = "/results/benchmarks/babeltrace/{}/".format(b_type)
130	result_file = get_file(client, prefix, commit, workdir)
131	if not result_file:
132	"""
133	Benchmark is either corrupted or not complete.
134	"""
135	return None, benchmark_valid
136	results[b_type] = parse_result(result_file)
137	if all(i == 0.0 for i in results[b_type]):
138	benchmark_valid = False
139	print("Invalid benchmark for {}/{}/{}".format(prefix, b_type, commit))
140	# The dataset is valid return immediately.
141	return results, benchmark_valid
142
143
144	def plot_raw_value(branch, benchmark_type, x_data, y_data, labels, latest_values):
145	"""
146	Plot the graph using the raw value.
147	"""
148	point_x_data = []
149	outlier_x_data = []
150	point_y_data = []
151	outlier_y_data = []
152	for pos in range(len(x_data)):
153	x = x_data[pos]
154	valid_points, outliers = sanitize_dataset(y_data[pos])
155	for y in valid_points:
156	point_x_data.append(x)
157	point_y_data.append(y)
158	for y in outliers:
159	outlier_x_data.append(x)
160	outlier_y_data.append(y)
161
162	plt.plot(
163	point_x_data, point_y_data, "o", label=branch, color=graph_get_color(branch)
164	)
165	plt.plot(outlier_x_data, outlier_y_data, "+", label="outlier", color="black")
166
5c65bbc2 JR	167	ymax = 1
	168	if y_data:
	169	ymin = 0.8 * min([item for sublist in y_data for item in sublist])
	170	ymax = 1.2 * max([item for sublist in y_data for item in sublist])
	171	# Put latest of other branches for reference as horizontal line.
	172	for l_branch, l_result in latest_values.items():
	173	if not l_result or l_branch == branch:
	174	continue
	175	plt.axhline(
	176	y=l_result,
	177	label="Latest {}".format(l_branch),
	178	color=graph_get_color(l_branch),
	179	)
5c65bbc2 JR	180	if l_result >= ymax:
5c65bbc2 JR	181	ymax = 1.2 * l_result
056f7519	182	ax = plt.gca()
925d7893	183	plt.ylim(ymin=0, ymax=ymax)
5c65bbc2 JR	184	plt.xticks(x_data, labels, rotation=90, family="monospace")
	185	plt.title(graph_get_title(branch, benchmark_type), fontweight="bold")
	186	plt.ylabel("User + system time (s)")
	187	plt.xlabel("Latest commits")
	188	plt.legend()
	189
056f7519 JR	190	# Put tick on the right side
	191	ax.tick_params(labeltop=False, labelright=True)
	192
5c65bbc2 JR	193	plt.tight_layout()
	194	return
	195
20defd5e JR	196	def plot_delta_between_point(branch, benchmark_type, x_data, y_data, labels, latest_values):
	197	"""
	198	Plot the graph of delta between each sequential commit.
	199	"""
	200	local_abs_max = 100
	201
	202	# Transform y_data to a list of for which the reference is the first
	203	# element.
	204	local_y_data = []
	205	for pos, y in enumerate(y_data):
	206	if pos == 0:
	207	local_y_data.append(0.0)
	208	continue
	209	local_y_data.append(y - y_data[pos - 1])
	210
	211	plt.plot(x_data, local_y_data, "o", label=branch, color=graph_get_color(branch))
	212
	213	# Get max absolute value to align the y axis with zero in the middle.
	214	if local_y_data:
	215	local_abs_max = abs(max(local_y_data, key=abs)) * 1.3
	216
	217	plt.ylim(ymin=local_abs_max * -1, ymax=local_abs_max)
	218
	219	ax = plt.gca()
	220	plt.xticks(x_data, labels, rotation=90, family="monospace")
	221	plt.title(graph_get_title(branch, benchmark_type) + " Delta to previous commit", fontweight="bold")
	222	plt.ylabel("Seconds")
	223	plt.xlabel("Latest commits")
	224	plt.legend()
	225
	226	# Put tick on the right side
	227	ax.tick_params(labeltop=False, labelright=True)
	228
	229	plt.tight_layout()
	230	return
5c65bbc2 JR	231
	232	def plot_ratio(branch, benchmark_type, x_data, y_data, labels, latest_values):
	233	"""
	234	Plot the graph using a ratio using first point as reference (0%).
	235	"""
	236	reference = 0.01
	237	y_abs_max = 100
	238
	239	if y_data:
	240	reference = y_data[0]
	241
	242	# Transform y_data to a list of ratio for which the reference is the first
	243	# element.
	244	local_y_data = list(map(lambda y: ((y / reference) - 1.0) * 100, y_data))
	245
	246	plt.plot(x_data, local_y_data, "o", label=branch, color=graph_get_color(branch))
	247
	248	# Put latest of other branches for reference as horizontal line.
	249	for l_branch, l_result in latest_values.items():
	250	if not l_result or l_branch == branch:
	251	continue
	252	ratio_l_result = ((l_result / reference) - 1.0) * 100.0
	253	print(
	254	"branch {} branch {} value {} l_result {} reference {}".format(
	255	branch, l_branch, ratio_l_result, l_result, reference
	256	)
	257	)
	258	plt.axhline(
	259	y=ratio_l_result,
	260	label="Latest {}".format(l_branch),
	261	color=graph_get_color(l_branch),
	262	)
	263
	264	# Draw the reference line.
	265	plt.axhline(y=0, label="Reference (leftmost point)", linestyle="-", color="Black")
	266
	267	# Get max absolute value to align the y axis with zero in the middle.
	268	if local_y_data:
	269	local_abs_max = abs(max(local_y_data, key=abs)) * 1.3
	270	if y_abs_max > 100:
	271	y_abs_max = local_abs_max
	272
	273	plt.ylim(ymin=y_abs_max * -1, ymax=y_abs_max)
	274
	275	ax = plt.gca()
	276	percent_formatter = PercentFormatter()
	277	ax.yaxis.set_major_formatter(percent_formatter)
	278	ax.yaxis.set_minor_formatter(percent_formatter)
	279	plt.xticks(x_data, labels, rotation=90, family="monospace")
	280	plt.title(graph_get_title(branch, benchmark_type), fontweight="bold")
	281	plt.ylabel("Ratio")
	282	plt.xlabel("Latest commits")
	283	plt.legend()
	284
056f7519 JR	285	# Put tick on the right side
	286	ax.tick_params(labeltop=False, labelright=True)
	287
5c65bbc2 JR	288	plt.tight_layout()
	289	return
	290
5c65bbc2 JR	291	def generate_graph(branches, report_name, git_path):
	292
	293	# The PDF document
	294	pdf_pages = PdfPages(report_name)
	295
	296	client = get_client()
	297	branch_results = dict()
	298
	299	# Fetch the results for each branch.
	300	for branch, cutoff in branches.items():
	301	commits = get_git_log(branch, cutoff, git_path)
	302	results = []
	303	with tempfile.TemporaryDirectory() as workdir:
	304	for commit in commits:
	305	b_results, valid = get_benchmark_results(client, commit, workdir)
	306	if not b_results or not valid:
	307	continue
	308	results.append((commit, b_results))
	309	branch_results[branch] = results
	310
	311	for b_type in BENCHMARK_TYPES:
	312	latest_values = {}
	313	max_len = 0
	314
	315	# Find the maximum size for a series inside our series dataset.
	316	# This is used later to compute the size of the actual plot (pdf).
	317	# While there gather the comparison value used to draw comparison line
	318	# between branches.
	319	for branch, results in branch_results.items():
	320	max_len = max([max_len, len(results)])
	321	if results:
	322	latest_values[branch] = mean(
	323	sanitize_dataset(results[-1][1][b_type])[0]
	324	)
	325	else:
	326	latest_values[branch] = None
	327
	328	for branch, results in branch_results.items():
	329	# Create a figure instance
	330	if max_len and max_len > 10:
	331	width = 0.16 * max_len
	332	else:
	333	width = 11.69
	334
	335	x_data = list(range(len(results)))
	336	y_data = [c[1][b_type] for c in results]
	337	labels = [c[0][:8] for c in results]
	338
	339	fig = plt.figure(figsize=(width, 8.27), dpi=100)
	340	plot_raw_value(branch, b_type, x_data, y_data, labels, latest_values)
	341	pdf_pages.savefig(fig)
	342
5c65bbc2 JR	343	# Use the mean of each sanitize dataset here, we do not care for
	344	# variance for ratio. At least not yet.
	345	y_data = [mean(sanitize_dataset(c[1][b_type])[0]) for c in results]
20defd5e	346	fig = plt.figure(figsize=(width, 8.27), dpi=100)
5c65bbc2 JR	347	plot_ratio(branch, b_type, x_data, y_data, labels, latest_values)
	348	pdf_pages.savefig(fig)
	349
20defd5e JR	350	fig = plt.figure(figsize=(width, 8.27), dpi=100)
	351	plot_delta_between_point(branch, b_type, x_data, y_data, labels, latest_values)
	352	pdf_pages.savefig(fig)
	353
5c65bbc2 JR	354	pdf_pages.close()
	355
	356
	357	def launch_jobs(branches, git_path, wait_for_completion, debug):
	358	"""
	359	Lauch jobs for all missing results.
	360	"""
	361	client = get_client()
	362	for branch, cutoff in branches.items():
	363	commits = get_git_log(branch, cutoff, git_path)
	364
	365	with tempfile.TemporaryDirectory() as workdir:
	366	for commit in commits:
	367	b_results = get_benchmark_results(client, commit, workdir)[0]
	368	if b_results:
	369	continue
	370	lava_submit.submit(
	371	commit, wait_for_completion=wait_for_completion, debug=debug
	372	)
	373
	374
	375	def main():
	376	"""
	377	Parse arguments and execute as needed.
	378	"""
	379	bt_branches = {
	380	"master": "31976fe2d70a8b6b7f8b31b9e0b3bc004d415575",
	381	"stable-2.0": "07f585356018b4ddfbd0e09c49a14e38977c6973",
	382	"stable-1.5": "49e98b837a5667130e0d1e062a6bd7985c7c4582",
	383	}
	384
	385	parser = argparse.ArgumentParser(description="Babeltrace benchmark utility")
	386	parser.add_argument(
	387	"--generate-jobs", action="store_true", help="Generate and send jobs"
	388	)
	389	parser.add_argument(
	390	"--do-not-wait-on-completion",
	391	action="store_true",
	392	default=False,
	393	help="Wait for the completion of each jobs sent. This is useful"
	394	"for the ci. Otherwise we could end up spaming the lava instance.",
	395	)
	396	parser.add_argument(
	397	"--generate-report",
	398	action="store_true",
	399	help="Generate graphs and save them to pdf",
	400	)
	401	parser.add_argument(
	402	"--report-name", default="report.pdf", help="The name of the pdf report."
	403	)
	404	parser.add_argument(
	405	"--debug", action="store_true", default=False, help="Do not send jobs to lava."
	406	)
	407	parser.add_argument(
	408	"--repo-path", help="The location of the git repo to use.", required=True
	409	)
	410
	411	args = parser.parse_args()
	412
	413	if not os.path.exists(args.repo_path):
	414	print("Repository location does not exists.")
	415	return 1
	416
	417	if args.generate_jobs:
418	print("Launching jobs for:")
419	for branch, cutoff in bt_branches.items():
420	print("\t Branch {} with cutoff {}".format(branch, cutoff))
421	launch_jobs(
422	bt_branches, args.repo_path, not args.do_not_wait_on_completion, args.debug
423	)
424
425	if args.generate_report:
426	print("Generating pdf report ({}) for:".format(args.report_name))
427	for branch, cutoff in bt_branches.items():
428	print("\t Branch {} with cutoff {}".format(branch, cutoff))
429	generate_graph(bt_branches, args.report_name, args.repo_path)
430
431	return 0
432
433
434	def sanitize_dataset(dataset):
435	"""
436	Use IRQ 1.5 [1] to remove outlier from the dataset. This is useful to get a
437	representative mean without outlier in it.
438	[1] https://en.wikipedia.org/wiki/Interquartile_range#Outliers
439	"""
440	sorted_data = sorted(dataset)
441	q1, q3 = numpy.percentile(sorted_data, [25, 75])
442	iqr = q3 - q1
443	lower_bound = q1 - (1.5 * iqr)
444	upper_bound = q3 + (1.5 * iqr)
445	new_dataset = []
446	outliers = []
447	for i in dataset:
448	if lower_bound <= i <= upper_bound:
449	new_dataset.append(i)
450	else:
451	outliers.append(i)
452	return new_dataset, outliers
453
454
455	if __name__ == "__main__":
456	sys.exit(main())