[lttng-ci.git] / scripts / babeltrace-benchmark / benchmark.py

#!/usr/bin/python3
# Copyright (C) 2019 - Jonathan Rajotte <jonathan.rajotte-julien@efficios.com>
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.

import json
import os
import tempfile
from statistics import mean
import argparse
import sys
from operator import add

import matplotlib.pyplot as plt
from matplotlib.backends.backend_pdf import PdfPages
from matplotlib.ticker import PercentFormatter

import git
import numpy
import lava_submit

from minio import Minio
from minio.error import NoSuchKey
from minio.error import ResponseError


BENCHMARK_TYPES = ["dummy", "text"]
DEFAULT_BUCKET = "lava"


def json_type(string):
    """
    Argpase type for json args.
    We expect a base dictionary.
    """
    passed_json = json.loads(string)
    if not isinstance(passed_json, dict):
        msg = "%r is not a dict" % string
        raise argparse.ArgumentTypeError(msg)
    return passed_json

def graph_get_color(branch):
    """
    Get the color matching the branch.
    """
    color = {"stable-1.5": "red", "stable-2.0": "green", "master": "blue"}
    return color[branch]


def graph_get_title(branch, benchmark_type):
    """
    Get title for graph based on benchmark type.
    """
    string = {"dummy": "Dummy output", "text": "Text output"}
    return "{} - {}".format(branch, string[benchmark_type])


def get_client():
    """
    Return minio client configured.
    """
    return Minio(
        "obj.internal.efficios.com", access_key="jenkins", secret_key="echo123456"
    )


def get_file(client, prefix, file_name, workdir_name):
    """
    Return the path of the downloaded file.
    Return None on error
    """
    destination = os.path.join(workdir_name, file_name)
    object_name = "{}/{}".format(prefix, file_name)
    try:
        client.fget_object(DEFAULT_BUCKET, object_name, destination)
    except NoSuchKey:
        return None

    return destination


def delete_file(client, prefix, file_name):
    """
    Delete the file on remote.
    """
    object_name = "{}/{}".format(prefix, file_name)
    try:
        client.remove_object(DEFAULT_BUCKET, object_name)
    except ResponseError as err:
        print(err)
    except NoSuchKey:
        pass


def get_git_log(bt_version, cutoff, repo_path):
    """
    Return an ordered (older to newer) list of commits for the bt_version and
    cutoff. WARNING: This changes the git repo HEAD.
    """
    repo = git.Repo(repo_path)
    repo.git.fetch()
    return repo.git.log(
        "{}..origin/{}".format(cutoff, bt_version), "--pretty=format:%H", "--reverse"
    ).split("\n")


def parse_result(result_path):
    """
    Parse the result file. Return a dataset of User time + System time.
    """
    with open(result_path) as result:
        parsed_result = json.load(result)
        return list(
            map(
                add,
                parsed_result["User time (seconds)"],
                parsed_result["System time (seconds)"],
            )
        )


def get_benchmark_results(client, commit, workdir):
    """
    Fetch the benchmark result from a certain commit across all benchmark type.
    """
    results = {}
    benchmark_valid = True
    for b_type in BENCHMARK_TYPES:
        prefix = "/results/benchmarks/babeltrace/{}/".format(b_type)
        result_file = get_file(client, prefix, commit, workdir)
        if not result_file:
            """
            Benchmark is either corrupted or not complete.
            """
            return None, benchmark_valid
        results[b_type] = parse_result(result_file)
        if all(i == 0.0 for i in results[b_type]):
            benchmark_valid = False
            print("Invalid benchmark for {}/{}/{}".format(prefix, b_type, commit))
    # The dataset is valid return immediately.
    return results, benchmark_valid


def plot_raw_value(branch, benchmark_type, x_data, y_data, labels, latest_values):
    """
    Plot the graph using the raw value.
    """
    point_x_data = []
    outlier_x_data = []
    point_y_data = []
    outlier_y_data = []
    for pos in range(len(x_data)):
        x = x_data[pos]
        valid_points, outliers = sanitize_dataset(y_data[pos])
        for y in valid_points:
            point_x_data.append(x)
            point_y_data.append(y)
        for y in outliers:
            outlier_x_data.append(x)
            outlier_y_data.append(y)

    plt.plot(
        point_x_data, point_y_data, "o", label=branch, color=graph_get_color(branch)
    )
    plt.plot(outlier_x_data, outlier_y_data, "+", label="outlier", color="black")

    ymax = 1
    if y_data:
        ymin = 0.8 * min([item for sublist in y_data for item in sublist])
        ymax = 1.2 * max([item for sublist in y_data for item in sublist])
    # Put latest of other branches for reference as horizontal line.
    for l_branch, l_result in latest_values.items():
        if not l_result or l_branch == branch:
            continue
        plt.axhline(
            y=l_result,
            label="Latest {}".format(l_branch),
            color=graph_get_color(l_branch),
        )
        if l_result >= ymax:
            ymax = 1.2 * l_result
    ax = plt.gca()
    plt.ylim(ymin=0, ymax=ymax)
    plt.xticks(x_data, labels, rotation=90, family="monospace")
    plt.title(graph_get_title(branch, benchmark_type), fontweight="bold")
    plt.ylabel("User + system time (s)")
    plt.xlabel("Latest commits")
    plt.legend()
    plt.grid(True)

    # Put tick on the right side
    ax.tick_params(labeltop=False, labelright=True)

    plt.tight_layout()
    return


def plot_delta_between_point(
    branch, benchmark_type, x_data, y_data, labels, latest_values
):
    """
    Plot the graph of delta between each sequential commit.
    """
    local_abs_max = 100

    # Transform y_data to a list of  for which the reference is the first
    # element.
    local_y_data = []
    for pos, y in enumerate(y_data):
        if pos == 0:
            local_y_data.append(0.0)
            continue
        local_y_data.append(y - y_data[pos - 1])

    plt.plot(x_data, local_y_data, "o", label=branch, color=graph_get_color(branch))

    # Get max absolute value to align the y axis with zero in the middle.
    if local_y_data:
        local_abs_max = abs(max(local_y_data, key=abs)) * 1.3

    plt.ylim(ymin=local_abs_max * -1, ymax=local_abs_max)

    ax = plt.gca()
    plt.xticks(x_data, labels, rotation=90, family="monospace")
    plt.title(
        graph_get_title(branch, benchmark_type) + " Delta to previous commit",
        fontweight="bold",
    )
    plt.ylabel("Seconds")
    plt.xlabel("Latest commits")
    plt.legend()
    plt.grid(True)

    # Put tick on the right side
    ax.tick_params(labeltop=False, labelright=True)

    plt.tight_layout()
    return


def plot_ratio(branch, benchmark_type, x_data, y_data, labels, latest_values):
    """
    Plot the graph using a ratio using first point as reference (0%).
    """
    reference = 0.01
    y_abs_max = 100

    if y_data:
        reference = y_data[0]

    # Transform y_data to a list of ratio for which the reference is the first
    # element.
    local_y_data = list(map(lambda y: ((y / reference) - 1.0) * 100, y_data))

    plt.plot(x_data, local_y_data, "o", label=branch, color=graph_get_color(branch))

    # Put latest of other branches for reference as horizontal line.
    for l_branch, l_result in latest_values.items():
        if not l_result or l_branch == branch:
            continue
        ratio_l_result = ((l_result / reference) - 1.0) * 100.0
        print(
            "branch {} branch {} value {} l_result {} reference {}".format(
                branch, l_branch, ratio_l_result, l_result, reference
            )
        )
        plt.axhline(
            y=ratio_l_result,
            label="Latest {}".format(l_branch),
            color=graph_get_color(l_branch),
        )

    # Draw the reference line.
    plt.axhline(y=0, label="Reference (leftmost point)", linestyle="-", color="Black")

    # Get max absolute value to align the y axis with zero in the middle.
    if local_y_data:
        local_abs_max = abs(max(local_y_data, key=abs)) * 1.3
        if y_abs_max > 100:
            y_abs_max = local_abs_max

    plt.ylim(ymin=y_abs_max * -1, ymax=y_abs_max)

    ax = plt.gca()
    percent_formatter = PercentFormatter()
    ax.yaxis.set_major_formatter(percent_formatter)
    ax.yaxis.set_minor_formatter(percent_formatter)
    plt.xticks(x_data, labels, rotation=90, family="monospace")
    plt.title(graph_get_title(branch, benchmark_type), fontweight="bold")
    plt.ylabel("Ratio")
    plt.xlabel("Latest commits")
    plt.legend()
    plt.grid(True)

    # Put tick on the right side
    ax.tick_params(labeltop=False, labelright=True)

    plt.tight_layout()
    return


def generate_graph(branches, report_name, git_path):

    # The PDF document
    pdf_pages = PdfPages(report_name)

    client = get_client()
    branch_results = dict()

    # Fetch the results for each branch.
    for branch, cutoff in branches.items():
        commits = get_git_log(branch, cutoff, git_path)
        results = []
        with tempfile.TemporaryDirectory() as workdir:
            for commit in commits:
                b_results, valid = get_benchmark_results(client, commit, workdir)
                if not b_results or not valid:
                    continue
                results.append((commit, b_results))
        branch_results[branch] = results

    for b_type in BENCHMARK_TYPES:
        latest_values = {}
        max_len = 0

        # Find the maximum size for a series inside our series dataset.
        # This is used later to compute the size of the actual plot (pdf).
        # While there gather the comparison value used to draw comparison line
        # between branches.
        for branch, results in branch_results.items():
            max_len = max([max_len, len(results)])
            if results:
                latest_values[branch] = mean(
                    sanitize_dataset(results[-1][1][b_type])[0]
                )
            else:
                latest_values[branch] = None

        for branch, results in branch_results.items():
            # Create a figure instance
            if max_len and max_len > 10:
                width = 0.16 * max_len
            else:
                width = 11.69

            x_data = list(range(len(results)))
            y_data = [c[1][b_type] for c in results]
            labels = [c[0][:8] for c in results]

            fig = plt.figure(figsize=(width, 8.27), dpi=100)
            plot_raw_value(branch, b_type, x_data, y_data, labels, latest_values)
            pdf_pages.savefig(fig)

            # Use the mean of each sanitize dataset here, we do not care for
            # variance for ratio. At least not yet.
            y_data = [mean(sanitize_dataset(c[1][b_type])[0]) for c in results]
            fig = plt.figure(figsize=(width, 8.27), dpi=100)
            plot_ratio(branch, b_type, x_data, y_data, labels, latest_values)
            pdf_pages.savefig(fig)

            fig = plt.figure(figsize=(width, 8.27), dpi=100)
            plot_delta_between_point(
                branch, b_type, x_data, y_data, labels, latest_values
            )
            pdf_pages.savefig(fig)

    pdf_pages.close()


def launch_jobs(branches, git_path, wait_for_completion, debug, force):
    """
    Lauch jobs for all missing results.
    """
    client = get_client()
    for branch, cutoff in branches.items():
        commits = get_git_log(branch, cutoff, git_path)

        with tempfile.TemporaryDirectory() as workdir:
            for commit in commits:
                b_results = get_benchmark_results(client, commit, workdir)[0]
                if b_results and not force:
                    continue
                lava_submit.submit(
                    commit, wait_for_completion=wait_for_completion, debug=debug
                )


def main():
    """
    Parse arguments and execute as needed.
    """
    bt_branches = {
        "master": "31976fe2d70a8b6b7f8b31b9e0b3bc004d415575",
        "stable-2.0": "07f585356018b4ddfbd0e09c49a14e38977c6973",
        "stable-1.5": "49e98b837a5667130e0d1e062a6bd7985c7c4582",
    }

    parser = argparse.ArgumentParser(description="Babeltrace benchmark utility")
    parser.add_argument(
        "--generate-jobs", action="store_true", help="Generate and send jobs"
    )
    parser.add_argument(
        "--force-jobs", action="store_true", help="Force the queueing of jobs to lava"
    )
    parser.add_argument(
        "--do-not-wait-on-completion",
        action="store_true",
        default=False,
        help="Wait for the completion of each jobs sent. This is useful"
        "for the ci. Otherwise we could end up spaming the lava instance.",
    )
    parser.add_argument(
        "--generate-report",
        action="store_true",
        help="Generate graphs and save them to pdf",
    )
    parser.add_argument(
        "--report-name", default="report.pdf", help="The name of the pdf report."
    )
    parser.add_argument(
        "--debug", action="store_true", default=False, help="Do not send jobs to lava."
    )
    parser.add_argument(
        "--repo-path", help="The location of the git repo to use.", required=True
    )
    parser.add_argument(
        "--overwrite-branches-cutoff",
        help="A dictionary of the form {"
        "'branch_name': 'commit_hash_cutoff',...}. Allow custom graphing and"
        "jobs generation.",
        required=False, type=json_type
    )

    args = parser.parse_args()

    if args.overwrite_branches_cutoff:
        bt_branches = args.overwrite_branches_cutoff

    if not os.path.exists(args.repo_path):
        print("Repository location does not exists.")
        return 1

    if args.generate_jobs:
        print("Launching jobs for:")

        for branch, cutoff in bt_branches.items():
            print("\t Branch {} with cutoff {}".format(branch, cutoff))

        launch_jobs(
            bt_branches,
            args.repo_path,
            not args.do_not_wait_on_completion,
            args.debug,
            args.force_jobs,
        )

    if args.generate_report:
        print("Generating pdf report ({}) for:".format(args.report_name))
        for branch, cutoff in bt_branches.items():
            print("\t Branch {} with cutoff {}".format(branch, cutoff))
        generate_graph(bt_branches, args.report_name, args.repo_path)

    return 0


def sanitize_dataset(dataset):
    """
    Use IRQ 1.5 [1] to remove outlier from the dataset. This is useful to get a
    representative mean without outlier in it.
    [1] https://en.wikipedia.org/wiki/Interquartile_range#Outliers
    """
    sorted_data = sorted(dataset)
    q1, q3 = numpy.percentile(sorted_data, [25, 75])
    iqr = q3 - q1
    lower_bound = q1 - (1.5 * iqr)
    upper_bound = q3 + (1.5 * iqr)
    new_dataset = []
    outliers = []
    for i in dataset:
        if lower_bound <= i <= upper_bound:
            new_dataset.append(i)
        else:
            outliers.append(i)
    return new_dataset, outliers


if __name__ == "__main__":
    sys.exit(main())
Commit	Line	Data
5c65bbc2 JR	1	#!/usr/bin/python3
	2	# Copyright (C) 2019 - Jonathan Rajotte <jonathan.rajotte-julien@efficios.com>
	3	#
	4	# This program is free software: you can redistribute it and/or modify
	5	# it under the terms of the GNU General Public License as published by
	6	# the Free Software Foundation, either version 3 of the License, or
	7	# (at your option) any later version.
	8	#
	9	# This program is distributed in the hope that it will be useful,
	10	# but WITHOUT ANY WARRANTY; without even the implied warranty of
	11	# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	12	# GNU General Public License for more details.
	13	#
	14	# You should have received a copy of the GNU General Public License
	15	# along with this program. If not, see <http://www.gnu.org/licenses/>.
	16
	17	import json
	18	import os
	19	import tempfile
	20	from statistics import mean
	21	import argparse
	22	import sys
	23	from operator import add
	24
	25	import matplotlib.pyplot as plt
	26	from matplotlib.backends.backend_pdf import PdfPages
	27	from matplotlib.ticker import PercentFormatter
	28
	29	import git
	30	import numpy
	31	import lava_submit
	32
	33	from minio import Minio
	34	from minio.error import NoSuchKey
	35	from minio.error import ResponseError
	36
	37
	38	BENCHMARK_TYPES = ["dummy", "text"]
	39	DEFAULT_BUCKET = "lava"
	40
	41
cf595cda JR	42	def json_type(string):
	43	"""
	44	Argpase type for json args.
	45	We expect a base dictionary.
	46	"""
	47	passed_json = json.loads(string)
	48	if not isinstance(passed_json, dict):
	49	msg = "%r is not a dict" % string
	50	raise argparse.ArgumentTypeError(msg)
	51	return passed_json
	52
5c65bbc2 JR	53	def graph_get_color(branch):
	54	"""
	55	Get the color matching the branch.
	56	"""
	57	color = {"stable-1.5": "red", "stable-2.0": "green", "master": "blue"}
	58	return color[branch]
	59
	60
	61	def graph_get_title(branch, benchmark_type):
	62	"""
	63	Get title for graph based on benchmark type.
	64	"""
	65	string = {"dummy": "Dummy output", "text": "Text output"}
	66	return "{} - {}".format(branch, string[benchmark_type])
	67
	68
	69	def get_client():
	70	"""
	71	Return minio client configured.
	72	"""
	73	return Minio(
	74	"obj.internal.efficios.com", access_key="jenkins", secret_key="echo123456"
	75	)
	76
	77
	78	def get_file(client, prefix, file_name, workdir_name):
	79	"""
	80	Return the path of the downloaded file.
	81	Return None on error
	82	"""
	83	destination = os.path.join(workdir_name, file_name)
	84	object_name = "{}/{}".format(prefix, file_name)
	85	try:
	86	client.fget_object(DEFAULT_BUCKET, object_name, destination)
	87	except NoSuchKey:
	88	return None
	89
	90	return destination
	91
	92
	93	def delete_file(client, prefix, file_name):
	94	"""
	95	Delete the file on remote.
	96	"""
	97	object_name = "{}/{}".format(prefix, file_name)
	98	try:
	99	client.remove_object(DEFAULT_BUCKET, object_name)
	100	except ResponseError as err:
	101	print(err)
	102	except NoSuchKey:
	103	pass
	104
	105
	106	def get_git_log(bt_version, cutoff, repo_path):
	107	"""
	108	Return an ordered (older to newer) list of commits for the bt_version and
	109	cutoff. WARNING: This changes the git repo HEAD.
	110	"""
	111	repo = git.Repo(repo_path)
	112	repo.git.fetch()
	113	return repo.git.log(
	114	"{}..origin/{}".format(cutoff, bt_version), "--pretty=format:%H", "--reverse"
	115	).split("\n")
	116
117
118	def parse_result(result_path):
119	"""
120	Parse the result file. Return a dataset of User time + System time.
121	"""
122	with open(result_path) as result:
123	parsed_result = json.load(result)
124	return list(
125	map(
126	add,
127	parsed_result["User time (seconds)"],
128	parsed_result["System time (seconds)"],
129	)
130	)
131
132
133	def get_benchmark_results(client, commit, workdir):
134	"""
135	Fetch the benchmark result from a certain commit across all benchmark type.
136	"""
137	results = {}
138	benchmark_valid = True
139	for b_type in BENCHMARK_TYPES:
140	prefix = "/results/benchmarks/babeltrace/{}/".format(b_type)
141	result_file = get_file(client, prefix, commit, workdir)
142	if not result_file:
143	"""
144	Benchmark is either corrupted or not complete.
145	"""
146	return None, benchmark_valid
147	results[b_type] = parse_result(result_file)
148	if all(i == 0.0 for i in results[b_type]):
149	benchmark_valid = False
150	print("Invalid benchmark for {}/{}/{}".format(prefix, b_type, commit))
151	# The dataset is valid return immediately.
152	return results, benchmark_valid
153
154
155	def plot_raw_value(branch, benchmark_type, x_data, y_data, labels, latest_values):
156	"""
157	Plot the graph using the raw value.
158	"""
159	point_x_data = []
160	outlier_x_data = []
161	point_y_data = []
162	outlier_y_data = []
163	for pos in range(len(x_data)):
164	x = x_data[pos]
165	valid_points, outliers = sanitize_dataset(y_data[pos])
166	for y in valid_points:
167	point_x_data.append(x)
168	point_y_data.append(y)
169	for y in outliers:
170	outlier_x_data.append(x)
171	outlier_y_data.append(y)
172
173	plt.plot(
174	point_x_data, point_y_data, "o", label=branch, color=graph_get_color(branch)
175	)
176	plt.plot(outlier_x_data, outlier_y_data, "+", label="outlier", color="black")
177
5c65bbc2 JR	178	ymax = 1
	179	if y_data:
	180	ymin = 0.8 * min([item for sublist in y_data for item in sublist])
	181	ymax = 1.2 * max([item for sublist in y_data for item in sublist])
	182	# Put latest of other branches for reference as horizontal line.
	183	for l_branch, l_result in latest_values.items():
	184	if not l_result or l_branch == branch:
	185	continue
	186	plt.axhline(
	187	y=l_result,
	188	label="Latest {}".format(l_branch),
	189	color=graph_get_color(l_branch),
	190	)
5c65bbc2 JR	191	if l_result >= ymax:
5c65bbc2 JR	192	ymax = 1.2 * l_result
056f7519	193	ax = plt.gca()
925d7893	194	plt.ylim(ymin=0, ymax=ymax)
5c65bbc2 JR	195	plt.xticks(x_data, labels, rotation=90, family="monospace")
	196	plt.title(graph_get_title(branch, benchmark_type), fontweight="bold")
	197	plt.ylabel("User + system time (s)")
	198	plt.xlabel("Latest commits")
	199	plt.legend()
526aab11	200	plt.grid(True)
5c65bbc2	201
056f7519 JR	202	# Put tick on the right side
	203	ax.tick_params(labeltop=False, labelright=True)
	204
5c65bbc2 JR	205	plt.tight_layout()
	206	return
	207
09de7b53 JR	208
	209	def plot_delta_between_point(
	210	branch, benchmark_type, x_data, y_data, labels, latest_values
	211	):
20defd5e JR	212	"""
	213	Plot the graph of delta between each sequential commit.
	214	"""
	215	local_abs_max = 100
	216
	217	# Transform y_data to a list of for which the reference is the first
	218	# element.
	219	local_y_data = []
	220	for pos, y in enumerate(y_data):
	221	if pos == 0:
	222	local_y_data.append(0.0)
	223	continue
	224	local_y_data.append(y - y_data[pos - 1])
	225
	226	plt.plot(x_data, local_y_data, "o", label=branch, color=graph_get_color(branch))
	227
	228	# Get max absolute value to align the y axis with zero in the middle.
	229	if local_y_data:
	230	local_abs_max = abs(max(local_y_data, key=abs)) * 1.3
	231
	232	plt.ylim(ymin=local_abs_max * -1, ymax=local_abs_max)
	233
	234	ax = plt.gca()
	235	plt.xticks(x_data, labels, rotation=90, family="monospace")
09de7b53 JR	236	plt.title(
	237	graph_get_title(branch, benchmark_type) + " Delta to previous commit",
	238	fontweight="bold",
	239	)
20defd5e JR	240	plt.ylabel("Seconds")
	241	plt.xlabel("Latest commits")
	242	plt.legend()
526aab11	243	plt.grid(True)
20defd5e JR	244
	245	# Put tick on the right side
	246	ax.tick_params(labeltop=False, labelright=True)
	247
	248	plt.tight_layout()
	249	return
5c65bbc2	250
09de7b53	251
5c65bbc2 JR	252	def plot_ratio(branch, benchmark_type, x_data, y_data, labels, latest_values):
	253	"""
	254	Plot the graph using a ratio using first point as reference (0%).
	255	"""
	256	reference = 0.01
	257	y_abs_max = 100
	258
	259	if y_data:
	260	reference = y_data[0]
	261
	262	# Transform y_data to a list of ratio for which the reference is the first
	263	# element.
	264	local_y_data = list(map(lambda y: ((y / reference) - 1.0) * 100, y_data))
	265
	266	plt.plot(x_data, local_y_data, "o", label=branch, color=graph_get_color(branch))
	267
	268	# Put latest of other branches for reference as horizontal line.
	269	for l_branch, l_result in latest_values.items():
	270	if not l_result or l_branch == branch:
	271	continue
	272	ratio_l_result = ((l_result / reference) - 1.0) * 100.0
	273	print(
	274	"branch {} branch {} value {} l_result {} reference {}".format(
	275	branch, l_branch, ratio_l_result, l_result, reference
	276	)
	277	)
	278	plt.axhline(
	279	y=ratio_l_result,
	280	label="Latest {}".format(l_branch),
	281	color=graph_get_color(l_branch),
	282	)
	283
	284	# Draw the reference line.
	285	plt.axhline(y=0, label="Reference (leftmost point)", linestyle="-", color="Black")
	286
	287	# Get max absolute value to align the y axis with zero in the middle.
	288	if local_y_data:
	289	local_abs_max = abs(max(local_y_data, key=abs)) * 1.3
	290	if y_abs_max > 100:
	291	y_abs_max = local_abs_max
	292
	293	plt.ylim(ymin=y_abs_max * -1, ymax=y_abs_max)
	294
	295	ax = plt.gca()
	296	percent_formatter = PercentFormatter()
	297	ax.yaxis.set_major_formatter(percent_formatter)
	298	ax.yaxis.set_minor_formatter(percent_formatter)
	299	plt.xticks(x_data, labels, rotation=90, family="monospace")
	300	plt.title(graph_get_title(branch, benchmark_type), fontweight="bold")
	301	plt.ylabel("Ratio")
	302	plt.xlabel("Latest commits")
	303	plt.legend()
526aab11	304	plt.grid(True)
5c65bbc2	305
056f7519 JR	306	# Put tick on the right side
	307	ax.tick_params(labeltop=False, labelright=True)
	308
5c65bbc2 JR	309	plt.tight_layout()
	310	return
	311
09de7b53	312
5c65bbc2 JR	313	def generate_graph(branches, report_name, git_path):
	314
	315	# The PDF document
	316	pdf_pages = PdfPages(report_name)
	317
	318	client = get_client()
	319	branch_results = dict()
	320
	321	# Fetch the results for each branch.
	322	for branch, cutoff in branches.items():
	323	commits = get_git_log(branch, cutoff, git_path)
	324	results = []
	325	with tempfile.TemporaryDirectory() as workdir:
	326	for commit in commits:
	327	b_results, valid = get_benchmark_results(client, commit, workdir)
	328	if not b_results or not valid:
	329	continue
	330	results.append((commit, b_results))
	331	branch_results[branch] = results
	332
	333	for b_type in BENCHMARK_TYPES:
	334	latest_values = {}
	335	max_len = 0
	336
	337	# Find the maximum size for a series inside our series dataset.
	338	# This is used later to compute the size of the actual plot (pdf).
	339	# While there gather the comparison value used to draw comparison line
	340	# between branches.
	341	for branch, results in branch_results.items():
	342	max_len = max([max_len, len(results)])
	343	if results:
	344	latest_values[branch] = mean(
	345	sanitize_dataset(results[-1][1][b_type])[0]
	346	)
	347	else:
	348	latest_values[branch] = None
	349
	350	for branch, results in branch_results.items():
	351	# Create a figure instance
	352	if max_len and max_len > 10:
	353	width = 0.16 * max_len
	354	else:
	355	width = 11.69
	356
	357	x_data = list(range(len(results)))
	358	y_data = [c[1][b_type] for c in results]
	359	labels = [c[0][:8] for c in results]
	360
	361	fig = plt.figure(figsize=(width, 8.27), dpi=100)
	362	plot_raw_value(branch, b_type, x_data, y_data, labels, latest_values)
	363	pdf_pages.savefig(fig)
	364
5c65bbc2 JR	365	# Use the mean of each sanitize dataset here, we do not care for
	366	# variance for ratio. At least not yet.
	367	y_data = [mean(sanitize_dataset(c[1][b_type])[0]) for c in results]
20defd5e	368	fig = plt.figure(figsize=(width, 8.27), dpi=100)
5c65bbc2 JR	369	plot_ratio(branch, b_type, x_data, y_data, labels, latest_values)
	370	pdf_pages.savefig(fig)
	371
20defd5e	372	fig = plt.figure(figsize=(width, 8.27), dpi=100)
09de7b53 JR	373	plot_delta_between_point(
	374	branch, b_type, x_data, y_data, labels, latest_values
	375	)
20defd5e JR	376	pdf_pages.savefig(fig)
20defd5e JR	377
5c65bbc2 JR	378	pdf_pages.close()
	379
	380
d373c66e	381	def launch_jobs(branches, git_path, wait_for_completion, debug, force):
5c65bbc2 JR	382	"""
	383	Lauch jobs for all missing results.
	384	"""
	385	client = get_client()
	386	for branch, cutoff in branches.items():
	387	commits = get_git_log(branch, cutoff, git_path)
	388
	389	with tempfile.TemporaryDirectory() as workdir:
	390	for commit in commits:
	391	b_results = get_benchmark_results(client, commit, workdir)[0]
d373c66e	392	if b_results and not force:
5c65bbc2 JR	393	continue
	394	lava_submit.submit(
	395	commit, wait_for_completion=wait_for_completion, debug=debug
	396	)
	397
	398
	399	def main():
	400	"""
	401	Parse arguments and execute as needed.
	402	"""
	403	bt_branches = {
	404	"master": "31976fe2d70a8b6b7f8b31b9e0b3bc004d415575",
	405	"stable-2.0": "07f585356018b4ddfbd0e09c49a14e38977c6973",
	406	"stable-1.5": "49e98b837a5667130e0d1e062a6bd7985c7c4582",
	407	}
	408
	409	parser = argparse.ArgumentParser(description="Babeltrace benchmark utility")
	410	parser.add_argument(
	411	"--generate-jobs", action="store_true", help="Generate and send jobs"
	412	)
d373c66e JR	413	parser.add_argument(
	414	"--force-jobs", action="store_true", help="Force the queueing of jobs to lava"
	415	)
5c65bbc2 JR	416	parser.add_argument(
	417	"--do-not-wait-on-completion",
	418	action="store_true",
	419	default=False,
	420	help="Wait for the completion of each jobs sent. This is useful"
	421	"for the ci. Otherwise we could end up spaming the lava instance.",
	422	)
	423	parser.add_argument(
	424	"--generate-report",
	425	action="store_true",
	426	help="Generate graphs and save them to pdf",
	427	)
	428	parser.add_argument(
	429	"--report-name", default="report.pdf", help="The name of the pdf report."
	430	)
	431	parser.add_argument(
	432	"--debug", action="store_true", default=False, help="Do not send jobs to lava."
	433	)
	434	parser.add_argument(
	435	"--repo-path", help="The location of the git repo to use.", required=True
	436	)
cf595cda JR	437	parser.add_argument(
	438	"--overwrite-branches-cutoff",
	439	help="A dictionary of the form {"
	440	"'branch_name': 'commit_hash_cutoff',...}. Allow custom graphing and"
	441	"jobs generation.",
	442	required=False, type=json_type
	443	)
5c65bbc2 JR	444
	445	args = parser.parse_args()
	446
cf595cda JR	447	if args.overwrite_branches_cutoff:
	448	bt_branches = args.overwrite_branches_cutoff
	449
5c65bbc2 JR	450	if not os.path.exists(args.repo_path):
	451	print("Repository location does not exists.")
	452	return 1
	453
	454	if args.generate_jobs:
	455	print("Launching jobs for:")
d373c66e	456
5c65bbc2 JR	457	for branch, cutoff in bt_branches.items():
5c65bbc2 JR	458	print("\t Branch {} with cutoff {}".format(branch, cutoff))
d373c66e	459
5c65bbc2	460	launch_jobs(
d373c66e JR	461	bt_branches,
	462	args.repo_path,
	463	not args.do_not_wait_on_completion,
	464	args.debug,
	465	args.force_jobs,
5c65bbc2 JR	466	)
	467
	468	if args.generate_report:
	469	print("Generating pdf report ({}) for:".format(args.report_name))
	470	for branch, cutoff in bt_branches.items():
	471	print("\t Branch {} with cutoff {}".format(branch, cutoff))
	472	generate_graph(bt_branches, args.report_name, args.repo_path)
	473
	474	return 0
	475
	476
	477	def sanitize_dataset(dataset):
	478	"""
	479	Use IRQ 1.5 [1] to remove outlier from the dataset. This is useful to get a
	480	representative mean without outlier in it.
	481	[1] https://en.wikipedia.org/wiki/Interquartile_range#Outliers
	482	"""
	483	sorted_data = sorted(dataset)
	484	q1, q3 = numpy.percentile(sorted_data, [25, 75])
	485	iqr = q3 - q1
	486	lower_bound = q1 - (1.5 * iqr)
	487	upper_bound = q3 + (1.5 * iqr)
	488	new_dataset = []
	489	outliers = []
	490	for i in dataset:
	491	if lower_bound <= i <= upper_bound:
	492	new_dataset.append(i)
	493	else:
	494	outliers.append(i)
	495	return new_dataset, outliers
	496
	497
	498	if __name__ == "__main__":
	499	sys.exit(main())