[lttng-ci.git] / scripts / babeltrace-benchmark / benchmark.py

#!/usr/bin/python3
# Copyright (C) 2019 - Jonathan Rajotte <jonathan.rajotte-julien@efficios.com>
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.

import json
import os
import tempfile
from statistics import mean
import argparse
import sys
from operator import add

import matplotlib.pyplot as plt
from matplotlib.backends.backend_pdf import PdfPages
from matplotlib.ticker import PercentFormatter

import git
import numpy
import lava_submit

from minio import Minio
from minio.error import NoSuchKey
from minio.error import ResponseError


BENCHMARK_TYPES = ["dummy", "text"]
DEFAULT_BUCKET = "lava"

invalid_commits = {
        "ec9a9794af488a9accce7708a8b0d8188b498789", # Does not build
        "8c99128c640cbce71fb8a6caa15e4c672252b662", # Block on configure
        "f3847c753f1b4f12353c38d97b0577d9993d19fb", # Does not build
        "e0111295f17ddfcc33ec771a8deac505473a06ad", # Does not build
        }

def json_type(string):
    """
    Argpase type for json args.
    We expect a base dictionary.
    """
    passed_json = json.loads(string)
    if not isinstance(passed_json, dict):
        msg = "%r is not a dict" % string
        raise argparse.ArgumentTypeError(msg)
    return passed_json

def graph_get_color(branch):
    """
    Get the color matching the branch.
    """
    color = {"stable-1.5": "red", "stable-2.0": "green", "master": "blue"}
    return color[branch]


def graph_get_title(branch, benchmark_type):
    """
    Get title for graph based on benchmark type.
    """
    string = {"dummy": "Dummy output", "text": "Text output"}
    return "{} - {}".format(branch, string[benchmark_type])


def get_client():
    """
    Return minio client configured.
    """
    return Minio(
        "obj.internal.efficios.com", access_key="jenkins", secret_key="echo123456"
    )


def get_file(client, prefix, file_name, workdir_name):
    """
    Return the path of the downloaded file.
    Return None on error
    """
    destination = os.path.join(workdir_name, file_name)
    object_name = "{}/{}".format(prefix, file_name)
    try:
        client.fget_object(DEFAULT_BUCKET, object_name, destination)
    except NoSuchKey:
        return None

    return destination


def delete_file(client, prefix, file_name):
    """
    Delete the file on remote.
    """
    object_name = "{}/{}".format(prefix, file_name)
    try:
        client.remove_object(DEFAULT_BUCKET, object_name)
    except ResponseError as err:
        print(err)
    except NoSuchKey:
        pass


def get_git_log(bt_version, cutoff, repo_path):
    """
    Return an ordered (older to newer) list of commits for the bt_version and
    cutoff. WARNING: This changes the git repo HEAD.
    """
    repo = git.Repo(repo_path)
    repo.git.fetch()
    return repo.git.log(
        "{}..origin/{}".format(cutoff, bt_version), "--pretty=format:%H", "--reverse"
    ).split("\n")


def parse_result(result_path):
    """
    Parse the result file. Return a dataset of User time + System time.
    """
    with open(result_path) as result:
        parsed_result = json.load(result)
        return list(
            map(
                add,
                parsed_result["User time (seconds)"],
                parsed_result["System time (seconds)"],
            )
        )


def get_benchmark_results(client, commit, workdir):
    """
    Fetch the benchmark result from a certain commit across all benchmark type.
    """
    results = {}
    benchmark_valid = True
    for b_type in BENCHMARK_TYPES:
        prefix = "/results/benchmarks/babeltrace/{}/".format(b_type)
        result_file = get_file(client, prefix, commit, workdir)
        if not result_file:
            """
            Benchmark is either corrupted or not complete.
            """
            return None, benchmark_valid
        results[b_type] = parse_result(result_file)
        if all(i == 0.0 for i in results[b_type]):
            benchmark_valid = False
            print("Invalid benchmark for {}/{}/{}".format(prefix, b_type, commit))
    # The dataset is valid return immediately.
    return results, benchmark_valid


def plot_raw_value(branch, benchmark_type, x_data, y_data, labels, latest_values):
    """
    Plot the graph using the raw value.
    """
    point_x_data = []
    outlier_x_data = []
    point_y_data = []
    outlier_y_data = []
    for pos in range(len(x_data)):
        x = x_data[pos]
        valid_points, outliers = sanitize_dataset(y_data[pos])
        for y in valid_points:
            point_x_data.append(x)
            point_y_data.append(y)
        for y in outliers:
            outlier_x_data.append(x)
            outlier_y_data.append(y)

    plt.plot(
        point_x_data, point_y_data, "o", label=branch, color=graph_get_color(branch)
    )
    plt.plot(outlier_x_data, outlier_y_data, "+", label="outlier", color="black")

    ymax = 1
    if y_data:
        ymin = 0.8 * min([item for sublist in y_data for item in sublist])
        ymax = 1.2 * max([item for sublist in y_data for item in sublist])
    # Put latest of other branches for reference as horizontal line.
    for l_branch, l_result in latest_values.items():
        if not l_result or l_branch == branch:
            continue
        plt.axhline(
            y=l_result,
            label="Latest {}".format(l_branch),
            color=graph_get_color(l_branch),
        )
        if l_result >= ymax:
            ymax = 1.2 * l_result
    ax = plt.gca()
    plt.ylim(ymin=0, ymax=ymax)
    plt.xticks(x_data, labels, rotation=90, family="monospace")
    plt.title(graph_get_title(branch, benchmark_type), fontweight="bold")
    plt.ylabel("User + system time (s)")
    plt.xlabel("Latest commits")
    plt.legend()
    plt.grid(True)

    # Put tick on the right side
    ax.tick_params(labeltop=False, labelright=True)

    plt.tight_layout()
    return


def plot_delta_between_point(
    branch, benchmark_type, x_data, y_data, labels, latest_values
):
    """
    Plot the graph of delta between each sequential commit.
    """
    local_abs_max = 100

    # Transform y_data to a list of  for which the reference is the first
    # element.
    local_y_data = []
    for pos, y in enumerate(y_data):
        if pos == 0:
            local_y_data.append(0.0)
            continue
        local_y_data.append(y - y_data[pos - 1])

    plt.plot(x_data, local_y_data, "o", label=branch, color=graph_get_color(branch))

    # Get max absolute value to align the y axis with zero in the middle.
    if local_y_data:
        local_abs_max = abs(max(local_y_data, key=abs)) * 1.3

    plt.ylim(ymin=local_abs_max * -1, ymax=local_abs_max)

    ax = plt.gca()
    plt.xticks(x_data, labels, rotation=90, family="monospace")
    plt.title(
        graph_get_title(branch, benchmark_type) + " Delta to previous commit",
        fontweight="bold",
    )
    plt.ylabel("Seconds")
    plt.xlabel("Latest commits")
    plt.legend()
    plt.grid(True)

    # Put tick on the right side
    ax.tick_params(labeltop=False, labelright=True)

    plt.tight_layout()
    return


def plot_ratio(branch, benchmark_type, x_data, y_data, labels, latest_values):
    """
    Plot the graph using a ratio using first point as reference (0%).
    """
    reference = 0.01
    y_abs_max = 100

    if y_data:
        reference = y_data[0]

    # Transform y_data to a list of ratio for which the reference is the first
    # element.
    local_y_data = list(map(lambda y: ((y / reference) - 1.0) * 100, y_data))

    plt.plot(x_data, local_y_data, "o", label=branch, color=graph_get_color(branch))

    # Put latest of other branches for reference as horizontal line.
    for l_branch, l_result in latest_values.items():
        if not l_result or l_branch == branch:
            continue
        ratio_l_result = ((l_result / reference) - 1.0) * 100.0
        print(
            "branch {} branch {} value {} l_result {} reference {}".format(
                branch, l_branch, ratio_l_result, l_result, reference
            )
        )
        plt.axhline(
            y=ratio_l_result,
            label="Latest {}".format(l_branch),
            color=graph_get_color(l_branch),
        )

    # Draw the reference line.
    plt.axhline(y=0, label="Reference (leftmost point)", linestyle="-", color="Black")

    # Get max absolute value to align the y axis with zero in the middle.
    if local_y_data:
        local_abs_max = abs(max(local_y_data, key=abs)) * 1.3
        if y_abs_max > 100:
            y_abs_max = local_abs_max

    plt.ylim(ymin=y_abs_max * -1, ymax=y_abs_max)

    ax = plt.gca()
    percent_formatter = PercentFormatter()
    ax.yaxis.set_major_formatter(percent_formatter)
    ax.yaxis.set_minor_formatter(percent_formatter)
    plt.xticks(x_data, labels, rotation=90, family="monospace")
    plt.title(graph_get_title(branch, benchmark_type), fontweight="bold")
    plt.ylabel("Ratio")
    plt.xlabel("Latest commits")
    plt.legend()
    plt.grid(True)

    # Put tick on the right side
    ax.tick_params(labeltop=False, labelright=True)

    plt.tight_layout()
    return


def generate_graph(branches, report_name, git_path):

    # The PDF document
    pdf_pages = PdfPages(report_name)

    client = get_client()
    branch_results = dict()

    # Fetch the results for each branch.
    for branch, cutoff in branches.items():
        commits = get_git_log(branch, cutoff, git_path)
        results = []
        with tempfile.TemporaryDirectory() as workdir:
            for commit in commits:
                b_results, valid = get_benchmark_results(client, commit, workdir)
                if not b_results or not valid:
                    continue
                results.append((commit, b_results))
        branch_results[branch] = results

    for b_type in BENCHMARK_TYPES:
        latest_values = {}
        max_len = 0

        # Find the maximum size for a series inside our series dataset.
        # This is used later to compute the size of the actual plot (pdf).
        # While there gather the comparison value used to draw comparison line
        # between branches.
        for branch, results in branch_results.items():
            max_len = max([max_len, len(results)])
            if results:
                latest_values[branch] = mean(
                    sanitize_dataset(results[-1][1][b_type])[0]
                )
            else:
                latest_values[branch] = None

        for branch, results in branch_results.items():
            # Create a figure instance
            if max_len and max_len > 10:
                width = 0.16 * max_len
            else:
                width = 11.69

            x_data = list(range(len(results)))
            y_data = [c[1][b_type] for c in results]
            labels = [c[0][:8] for c in results]

            fig = plt.figure(figsize=(width, 8.27), dpi=100)
            plot_raw_value(branch, b_type, x_data, y_data, labels, latest_values)
            pdf_pages.savefig(fig)

            # Use the mean of each sanitize dataset here, we do not care for
            # variance for ratio. At least not yet.
            y_data = [mean(sanitize_dataset(c[1][b_type])[0]) for c in results]
            fig = plt.figure(figsize=(width, 8.27), dpi=100)
            plot_ratio(branch, b_type, x_data, y_data, labels, latest_values)
            pdf_pages.savefig(fig)

            fig = plt.figure(figsize=(width, 8.27), dpi=100)
            plot_delta_between_point(
                branch, b_type, x_data, y_data, labels, latest_values
            )
            pdf_pages.savefig(fig)

    pdf_pages.close()


def launch_jobs(branches, git_path, wait_for_completion, debug, force):
    """
    Lauch jobs for all missing results.
    """
    client = get_client()
    for branch, cutoff in branches.items():
        commits = get_git_log(branch, cutoff, git_path)

        with tempfile.TemporaryDirectory() as workdir:
            for commit in commits:
                if commit in invalid_commits:
                    continue
                b_results = get_benchmark_results(client, commit, workdir)[0]
                if b_results and not force:
                    continue
                lava_submit.submit(
                    commit, wait_for_completion=wait_for_completion, debug=debug
                )


def main():
    """
    Parse arguments and execute as needed.
    """
    bt_branches = {
        "master": "31976fe2d70a8b6b7f8b31b9e0b3bc004d415575",
        "stable-2.0": "07f585356018b4ddfbd0e09c49a14e38977c6973",
        "stable-1.5": "49e98b837a5667130e0d1e062a6bd7985c7c4582",
    }

    parser = argparse.ArgumentParser(description="Babeltrace benchmark utility")
    parser.add_argument(
        "--generate-jobs", action="store_true", help="Generate and send jobs"
    )
    parser.add_argument(
        "--force-jobs", action="store_true", help="Force the queueing of jobs to lava"
    )
    parser.add_argument(
        "--do-not-wait-on-completion",
        action="store_true",
        default=False,
        help="Wait for the completion of each jobs sent. This is useful"
        "for the ci. Otherwise we could end up spaming the lava instance.",
    )
    parser.add_argument(
        "--generate-report",
        action="store_true",
        help="Generate graphs and save them to pdf",
    )
    parser.add_argument(
        "--report-name", default="report.pdf", help="The name of the pdf report."
    )
    parser.add_argument(
        "--debug", action="store_true", default=False, help="Do not send jobs to lava."
    )
    parser.add_argument(
        "--repo-path", help="The location of the git repo to use.", required=True
    )
    parser.add_argument(
        "--overwrite-branches-cutoff",
        help="A dictionary of the form {"
        "'branch_name': 'commit_hash_cutoff',...}. Allow custom graphing and"
        "jobs generation.",
        required=False, type=json_type
    )

    args = parser.parse_args()

    if args.overwrite_branches_cutoff:
        bt_branches = args.overwrite_branches_cutoff

    if not os.path.exists(args.repo_path):
        print("Repository location does not exists.")
        return 1

    if args.generate_jobs:
        print("Launching jobs for:")

        for branch, cutoff in bt_branches.items():
            print("\t Branch {} with cutoff {}".format(branch, cutoff))

        launch_jobs(
            bt_branches,
            args.repo_path,
            not args.do_not_wait_on_completion,
            args.debug,
            args.force_jobs,
        )

    if args.generate_report:
        print("Generating pdf report ({}) for:".format(args.report_name))
        for branch, cutoff in bt_branches.items():
            print("\t Branch {} with cutoff {}".format(branch, cutoff))
        generate_graph(bt_branches, args.report_name, args.repo_path)

    return 0


def sanitize_dataset(dataset):
    """
    Use IRQ 1.5 [1] to remove outlier from the dataset. This is useful to get a
    representative mean without outlier in it.
    [1] https://en.wikipedia.org/wiki/Interquartile_range#Outliers
    """
    sorted_data = sorted(dataset)
    q1, q3 = numpy.percentile(sorted_data, [25, 75])
    iqr = q3 - q1
    lower_bound = q1 - (1.5 * iqr)
    upper_bound = q3 + (1.5 * iqr)
    new_dataset = []
    outliers = []
    for i in dataset:
        if lower_bound <= i <= upper_bound:
            new_dataset.append(i)
        else:
            outliers.append(i)
    return new_dataset, outliers


if __name__ == "__main__":
    sys.exit(main())
Commit	Line	Data
5c65bbc2 JR	1	#!/usr/bin/python3
	2	# Copyright (C) 2019 - Jonathan Rajotte <jonathan.rajotte-julien@efficios.com>
	3	#
	4	# This program is free software: you can redistribute it and/or modify
	5	# it under the terms of the GNU General Public License as published by
	6	# the Free Software Foundation, either version 3 of the License, or
	7	# (at your option) any later version.
	8	#
	9	# This program is distributed in the hope that it will be useful,
	10	# but WITHOUT ANY WARRANTY; without even the implied warranty of
	11	# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	12	# GNU General Public License for more details.
	13	#
	14	# You should have received a copy of the GNU General Public License
	15	# along with this program. If not, see <http://www.gnu.org/licenses/>.
	16
	17	import json
	18	import os
	19	import tempfile
	20	from statistics import mean
	21	import argparse
	22	import sys
	23	from operator import add
	24
	25	import matplotlib.pyplot as plt
	26	from matplotlib.backends.backend_pdf import PdfPages
	27	from matplotlib.ticker import PercentFormatter
	28
	29	import git
	30	import numpy
	31	import lava_submit
	32
	33	from minio import Minio
	34	from minio.error import NoSuchKey
	35	from minio.error import ResponseError
	36
	37
	38	BENCHMARK_TYPES = ["dummy", "text"]
	39	DEFAULT_BUCKET = "lava"
	40
e085717c JR	41	invalid_commits = {
	42	"ec9a9794af488a9accce7708a8b0d8188b498789", # Does not build
	43	"8c99128c640cbce71fb8a6caa15e4c672252b662", # Block on configure
	44	"f3847c753f1b4f12353c38d97b0577d9993d19fb", # Does not build
	45	"e0111295f17ddfcc33ec771a8deac505473a06ad", # Does not build
	46	}
5c65bbc2	47
cf595cda JR	48	def json_type(string):
	49	"""
	50	Argpase type for json args.
	51	We expect a base dictionary.
	52	"""
	53	passed_json = json.loads(string)
	54	if not isinstance(passed_json, dict):
	55	msg = "%r is not a dict" % string
	56	raise argparse.ArgumentTypeError(msg)
	57	return passed_json
	58
5c65bbc2 JR	59	def graph_get_color(branch):
	60	"""
	61	Get the color matching the branch.
	62	"""
	63	color = {"stable-1.5": "red", "stable-2.0": "green", "master": "blue"}
	64	return color[branch]
	65
	66
	67	def graph_get_title(branch, benchmark_type):
	68	"""
	69	Get title for graph based on benchmark type.
	70	"""
	71	string = {"dummy": "Dummy output", "text": "Text output"}
	72	return "{} - {}".format(branch, string[benchmark_type])
	73
	74
	75	def get_client():
	76	"""
	77	Return minio client configured.
	78	"""
	79	return Minio(
	80	"obj.internal.efficios.com", access_key="jenkins", secret_key="echo123456"
	81	)
	82
	83
	84	def get_file(client, prefix, file_name, workdir_name):
	85	"""
	86	Return the path of the downloaded file.
	87	Return None on error
	88	"""
	89	destination = os.path.join(workdir_name, file_name)
	90	object_name = "{}/{}".format(prefix, file_name)
	91	try:
	92	client.fget_object(DEFAULT_BUCKET, object_name, destination)
	93	except NoSuchKey:
	94	return None
	95
	96	return destination
	97
	98
	99	def delete_file(client, prefix, file_name):
	100	"""
	101	Delete the file on remote.
	102	"""
	103	object_name = "{}/{}".format(prefix, file_name)
	104	try:
	105	client.remove_object(DEFAULT_BUCKET, object_name)
	106	except ResponseError as err:
	107	print(err)
	108	except NoSuchKey:
	109	pass
	110
	111
	112	def get_git_log(bt_version, cutoff, repo_path):
	113	"""
	114	Return an ordered (older to newer) list of commits for the bt_version and
	115	cutoff. WARNING: This changes the git repo HEAD.
	116	"""
	117	repo = git.Repo(repo_path)
	118	repo.git.fetch()
	119	return repo.git.log(
	120	"{}..origin/{}".format(cutoff, bt_version), "--pretty=format:%H", "--reverse"
	121	).split("\n")
	122
123
124	def parse_result(result_path):
125	"""
126	Parse the result file. Return a dataset of User time + System time.
127	"""
128	with open(result_path) as result:
129	parsed_result = json.load(result)
130	return list(
131	map(
132	add,
133	parsed_result["User time (seconds)"],
134	parsed_result["System time (seconds)"],
135	)
136	)
137
138
139	def get_benchmark_results(client, commit, workdir):
140	"""
141	Fetch the benchmark result from a certain commit across all benchmark type.
142	"""
143	results = {}
144	benchmark_valid = True
145	for b_type in BENCHMARK_TYPES:
146	prefix = "/results/benchmarks/babeltrace/{}/".format(b_type)
147	result_file = get_file(client, prefix, commit, workdir)
148	if not result_file:
149	"""
150	Benchmark is either corrupted or not complete.
151	"""
152	return None, benchmark_valid
153	results[b_type] = parse_result(result_file)
154	if all(i == 0.0 for i in results[b_type]):
155	benchmark_valid = False
156	print("Invalid benchmark for {}/{}/{}".format(prefix, b_type, commit))
157	# The dataset is valid return immediately.
158	return results, benchmark_valid
159
160
161	def plot_raw_value(branch, benchmark_type, x_data, y_data, labels, latest_values):
162	"""
163	Plot the graph using the raw value.
164	"""
165	point_x_data = []
166	outlier_x_data = []
167	point_y_data = []
168	outlier_y_data = []
169	for pos in range(len(x_data)):
170	x = x_data[pos]
171	valid_points, outliers = sanitize_dataset(y_data[pos])
172	for y in valid_points:
173	point_x_data.append(x)
174	point_y_data.append(y)
175	for y in outliers:
176	outlier_x_data.append(x)
177	outlier_y_data.append(y)
178
179	plt.plot(
180	point_x_data, point_y_data, "o", label=branch, color=graph_get_color(branch)
181	)
182	plt.plot(outlier_x_data, outlier_y_data, "+", label="outlier", color="black")
183
5c65bbc2 JR	184	ymax = 1
	185	if y_data:
	186	ymin = 0.8 * min([item for sublist in y_data for item in sublist])
	187	ymax = 1.2 * max([item for sublist in y_data for item in sublist])
	188	# Put latest of other branches for reference as horizontal line.
	189	for l_branch, l_result in latest_values.items():
	190	if not l_result or l_branch == branch:
	191	continue
	192	plt.axhline(
	193	y=l_result,
	194	label="Latest {}".format(l_branch),
	195	color=graph_get_color(l_branch),
	196	)
5c65bbc2 JR	197	if l_result >= ymax:
5c65bbc2 JR	198	ymax = 1.2 * l_result
056f7519	199	ax = plt.gca()
925d7893	200	plt.ylim(ymin=0, ymax=ymax)
5c65bbc2 JR	201	plt.xticks(x_data, labels, rotation=90, family="monospace")
	202	plt.title(graph_get_title(branch, benchmark_type), fontweight="bold")
	203	plt.ylabel("User + system time (s)")
	204	plt.xlabel("Latest commits")
	205	plt.legend()
526aab11	206	plt.grid(True)
5c65bbc2	207
056f7519 JR	208	# Put tick on the right side
	209	ax.tick_params(labeltop=False, labelright=True)
	210
5c65bbc2 JR	211	plt.tight_layout()
	212	return
	213
09de7b53 JR	214
	215	def plot_delta_between_point(
	216	branch, benchmark_type, x_data, y_data, labels, latest_values
	217	):
20defd5e JR	218	"""
	219	Plot the graph of delta between each sequential commit.
	220	"""
	221	local_abs_max = 100
	222
	223	# Transform y_data to a list of for which the reference is the first
	224	# element.
	225	local_y_data = []
	226	for pos, y in enumerate(y_data):
	227	if pos == 0:
	228	local_y_data.append(0.0)
	229	continue
	230	local_y_data.append(y - y_data[pos - 1])
	231
	232	plt.plot(x_data, local_y_data, "o", label=branch, color=graph_get_color(branch))
	233
	234	# Get max absolute value to align the y axis with zero in the middle.
	235	if local_y_data:
	236	local_abs_max = abs(max(local_y_data, key=abs)) * 1.3
	237
	238	plt.ylim(ymin=local_abs_max * -1, ymax=local_abs_max)
	239
	240	ax = plt.gca()
	241	plt.xticks(x_data, labels, rotation=90, family="monospace")
09de7b53 JR	242	plt.title(
	243	graph_get_title(branch, benchmark_type) + " Delta to previous commit",
	244	fontweight="bold",
	245	)
20defd5e JR	246	plt.ylabel("Seconds")
	247	plt.xlabel("Latest commits")
	248	plt.legend()
526aab11	249	plt.grid(True)
20defd5e JR	250
	251	# Put tick on the right side
	252	ax.tick_params(labeltop=False, labelright=True)
	253
	254	plt.tight_layout()
	255	return
5c65bbc2	256
09de7b53	257
5c65bbc2 JR	258	def plot_ratio(branch, benchmark_type, x_data, y_data, labels, latest_values):
	259	"""
	260	Plot the graph using a ratio using first point as reference (0%).
	261	"""
	262	reference = 0.01
	263	y_abs_max = 100
	264
	265	if y_data:
	266	reference = y_data[0]
	267
	268	# Transform y_data to a list of ratio for which the reference is the first
	269	# element.
	270	local_y_data = list(map(lambda y: ((y / reference) - 1.0) * 100, y_data))
	271
	272	plt.plot(x_data, local_y_data, "o", label=branch, color=graph_get_color(branch))
	273
	274	# Put latest of other branches for reference as horizontal line.
	275	for l_branch, l_result in latest_values.items():
	276	if not l_result or l_branch == branch:
	277	continue
	278	ratio_l_result = ((l_result / reference) - 1.0) * 100.0
	279	print(
	280	"branch {} branch {} value {} l_result {} reference {}".format(
	281	branch, l_branch, ratio_l_result, l_result, reference
	282	)
	283	)
	284	plt.axhline(
	285	y=ratio_l_result,
	286	label="Latest {}".format(l_branch),
	287	color=graph_get_color(l_branch),
	288	)
	289
	290	# Draw the reference line.
	291	plt.axhline(y=0, label="Reference (leftmost point)", linestyle="-", color="Black")
	292
	293	# Get max absolute value to align the y axis with zero in the middle.
	294	if local_y_data:
	295	local_abs_max = abs(max(local_y_data, key=abs)) * 1.3
	296	if y_abs_max > 100:
	297	y_abs_max = local_abs_max
	298
	299	plt.ylim(ymin=y_abs_max * -1, ymax=y_abs_max)
	300
	301	ax = plt.gca()
	302	percent_formatter = PercentFormatter()
	303	ax.yaxis.set_major_formatter(percent_formatter)
	304	ax.yaxis.set_minor_formatter(percent_formatter)
	305	plt.xticks(x_data, labels, rotation=90, family="monospace")
	306	plt.title(graph_get_title(branch, benchmark_type), fontweight="bold")
	307	plt.ylabel("Ratio")
	308	plt.xlabel("Latest commits")
	309	plt.legend()
526aab11	310	plt.grid(True)
5c65bbc2	311
056f7519 JR	312	# Put tick on the right side
	313	ax.tick_params(labeltop=False, labelright=True)
	314
5c65bbc2 JR	315	plt.tight_layout()
	316	return
	317
09de7b53	318
5c65bbc2 JR	319	def generate_graph(branches, report_name, git_path):
	320
	321	# The PDF document
	322	pdf_pages = PdfPages(report_name)
	323
	324	client = get_client()
	325	branch_results = dict()
	326
	327	# Fetch the results for each branch.
	328	for branch, cutoff in branches.items():
	329	commits = get_git_log(branch, cutoff, git_path)
	330	results = []
	331	with tempfile.TemporaryDirectory() as workdir:
	332	for commit in commits:
	333	b_results, valid = get_benchmark_results(client, commit, workdir)
	334	if not b_results or not valid:
	335	continue
	336	results.append((commit, b_results))
	337	branch_results[branch] = results
	338
	339	for b_type in BENCHMARK_TYPES:
	340	latest_values = {}
	341	max_len = 0
	342
	343	# Find the maximum size for a series inside our series dataset.
	344	# This is used later to compute the size of the actual plot (pdf).
	345	# While there gather the comparison value used to draw comparison line
	346	# between branches.
	347	for branch, results in branch_results.items():
	348	max_len = max([max_len, len(results)])
	349	if results:
	350	latest_values[branch] = mean(
	351	sanitize_dataset(results[-1][1][b_type])[0]
	352	)
	353	else:
	354	latest_values[branch] = None
	355
	356	for branch, results in branch_results.items():
	357	# Create a figure instance
	358	if max_len and max_len > 10:
	359	width = 0.16 * max_len
	360	else:
	361	width = 11.69
	362
	363	x_data = list(range(len(results)))
	364	y_data = [c[1][b_type] for c in results]
	365	labels = [c[0][:8] for c in results]
	366
	367	fig = plt.figure(figsize=(width, 8.27), dpi=100)
	368	plot_raw_value(branch, b_type, x_data, y_data, labels, latest_values)
	369	pdf_pages.savefig(fig)
	370
5c65bbc2 JR	371	# Use the mean of each sanitize dataset here, we do not care for
	372	# variance for ratio. At least not yet.
	373	y_data = [mean(sanitize_dataset(c[1][b_type])[0]) for c in results]
20defd5e	374	fig = plt.figure(figsize=(width, 8.27), dpi=100)
5c65bbc2 JR	375	plot_ratio(branch, b_type, x_data, y_data, labels, latest_values)
	376	pdf_pages.savefig(fig)
	377
20defd5e	378	fig = plt.figure(figsize=(width, 8.27), dpi=100)
09de7b53 JR	379	plot_delta_between_point(
	380	branch, b_type, x_data, y_data, labels, latest_values
	381	)
20defd5e JR	382	pdf_pages.savefig(fig)
20defd5e JR	383
5c65bbc2 JR	384	pdf_pages.close()
	385
	386
d373c66e	387	def launch_jobs(branches, git_path, wait_for_completion, debug, force):
5c65bbc2 JR	388	"""
	389	Lauch jobs for all missing results.
	390	"""
	391	client = get_client()
	392	for branch, cutoff in branches.items():
	393	commits = get_git_log(branch, cutoff, git_path)
	394
	395	with tempfile.TemporaryDirectory() as workdir:
	396	for commit in commits:
e085717c JR	397	if commit in invalid_commits:
e085717c JR	398	continue
5c65bbc2	399	b_results = get_benchmark_results(client, commit, workdir)[0]
d373c66e	400	if b_results and not force:
5c65bbc2 JR	401	continue
	402	lava_submit.submit(
	403	commit, wait_for_completion=wait_for_completion, debug=debug
	404	)
	405
	406
	407	def main():
	408	"""
	409	Parse arguments and execute as needed.
	410	"""
	411	bt_branches = {
	412	"master": "31976fe2d70a8b6b7f8b31b9e0b3bc004d415575",
	413	"stable-2.0": "07f585356018b4ddfbd0e09c49a14e38977c6973",
	414	"stable-1.5": "49e98b837a5667130e0d1e062a6bd7985c7c4582",
	415	}
	416
	417	parser = argparse.ArgumentParser(description="Babeltrace benchmark utility")
	418	parser.add_argument(
	419	"--generate-jobs", action="store_true", help="Generate and send jobs"
	420	)
d373c66e JR	421	parser.add_argument(
	422	"--force-jobs", action="store_true", help="Force the queueing of jobs to lava"
	423	)
5c65bbc2 JR	424	parser.add_argument(
	425	"--do-not-wait-on-completion",
	426	action="store_true",
	427	default=False,
	428	help="Wait for the completion of each jobs sent. This is useful"
	429	"for the ci. Otherwise we could end up spaming the lava instance.",
	430	)
	431	parser.add_argument(
	432	"--generate-report",
	433	action="store_true",
	434	help="Generate graphs and save them to pdf",
	435	)
	436	parser.add_argument(
	437	"--report-name", default="report.pdf", help="The name of the pdf report."
	438	)
	439	parser.add_argument(
	440	"--debug", action="store_true", default=False, help="Do not send jobs to lava."
	441	)
	442	parser.add_argument(
	443	"--repo-path", help="The location of the git repo to use.", required=True
	444	)
cf595cda JR	445	parser.add_argument(
	446	"--overwrite-branches-cutoff",
	447	help="A dictionary of the form {"
	448	"'branch_name': 'commit_hash_cutoff',...}. Allow custom graphing and"
	449	"jobs generation.",
	450	required=False, type=json_type
	451	)
5c65bbc2 JR	452
	453	args = parser.parse_args()
	454
cf595cda JR	455	if args.overwrite_branches_cutoff:
	456	bt_branches = args.overwrite_branches_cutoff
	457
5c65bbc2 JR	458	if not os.path.exists(args.repo_path):
	459	print("Repository location does not exists.")
	460	return 1
	461
	462	if args.generate_jobs:
	463	print("Launching jobs for:")
d373c66e	464
5c65bbc2 JR	465	for branch, cutoff in bt_branches.items():
5c65bbc2 JR	466	print("\t Branch {} with cutoff {}".format(branch, cutoff))
d373c66e	467
5c65bbc2	468	launch_jobs(
d373c66e JR	469	bt_branches,
	470	args.repo_path,
	471	not args.do_not_wait_on_completion,
	472	args.debug,
	473	args.force_jobs,
5c65bbc2 JR	474	)
	475
	476	if args.generate_report:
	477	print("Generating pdf report ({}) for:".format(args.report_name))
	478	for branch, cutoff in bt_branches.items():
	479	print("\t Branch {} with cutoff {}".format(branch, cutoff))
	480	generate_graph(bt_branches, args.report_name, args.repo_path)
	481
	482	return 0
	483
	484
	485	def sanitize_dataset(dataset):
	486	"""
	487	Use IRQ 1.5 [1] to remove outlier from the dataset. This is useful to get a
	488	representative mean without outlier in it.
	489	[1] https://en.wikipedia.org/wiki/Interquartile_range#Outliers
	490	"""
	491	sorted_data = sorted(dataset)
	492	q1, q3 = numpy.percentile(sorted_data, [25, 75])
	493	iqr = q3 - q1
	494	lower_bound = q1 - (1.5 * iqr)
	495	upper_bound = q3 + (1.5 * iqr)
	496	new_dataset = []
	497	outliers = []
	498	for i in dataset:
	499	if lower_bound <= i <= upper_bound:
	500	new_dataset.append(i)
	501	else:
	502	outliers.append(i)
	503	return new_dataset, outliers
	504
	505
	506	if __name__ == "__main__":
	507	sys.exit(main())