[lttng-ci.git] / scripts / babeltrace-benchmark / benchmark.py

#!/usr/bin/python3
# Copyright (C) 2019 - Jonathan Rajotte <jonathan.rajotte-julien@efficios.com>
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.

import json
import os
import tempfile
from statistics import mean
import argparse
import sys
from operator import add

import matplotlib.pyplot as plt
from matplotlib.backends.backend_pdf import PdfPages
from matplotlib.ticker import PercentFormatter

import git
import numpy
import lava_submit

from minio import Minio
from minio.error import NoSuchKey
from minio.error import ResponseError


BENCHMARK_TYPES = ["dummy", "text"]
DEFAULT_BUCKET = "lava"

invalid_commits = {
    "ec9a9794af488a9accce7708a8b0d8188b498789", # Does not build
    "8c99128c640cbce71fb8a6caa15e4c672252b662", # Block on configure
    "f3847c753f1b4f12353c38d97b0577d9993d19fb", # Does not build
    "e0111295f17ddfcc33ec771a8deac505473a06ad", # Does not build
    "d0d4e0ed487ea23aaf0d023513c0a4d86901b79b", # Does not build
    "c24f7ab4dd9edeb5e50b0070fd9d9e8691057dde", # Does not build
    "ce67f5614a4db3b2de4d887eca52135b439b4937", # Does not build
    "80aff5efc66679fd934cef433c0e698694748385", # Does not build
    "f4f11e84942d36fcc8a597d226928bce2ccac4b3", # Does not build
    "ae466a6e1b856d96cf5112a371b4df2b732503ec", # Does not build
    "ade5c95e2a4f90f839f222fc1a66175b3b199922", # Configuration fails
    "30341532906d62808e9d66fb115f5edb4e6f5706", # Configuration fails
    "006c5ffb42f32e802136e3c27a63accb59b4d6c4", # Does not build
    "88488ff5bdcd7679ff1f04fe6cff0d24b4f8fc0c", # Does not build
    # Other errors
    "7c7301d5827bd10ec7c34da7ffc5fe74e5047d38",
    "a0df3abf88616cb0799f87f4eb57c54268e63448",
    "b7045dd71bc0524ad6b5db96df365e98e237d395",
    "cf7b259eaa602abcef308d2b5dd8e6c9ee995d8b",
    "90a55a4ef47cac7b568f5f0a8a78bd760f82d23c",
    "baa5e3aa82a82c9d0fa59e3c586c0168bb5dc267",
    "af9f8da7ba4a9b16fc36d637b8c3a0c7a8774da2",
    "fe748379adbd385efdfc7acae9c2340fb8b7d717",
    "baa5e3aa82a82c9d0fa59e3c586c0168bb5dc267",
    "af9f8da7ba4a9b16fc36d637b8c3a0c7a8774da2",
    "fe748379adbd385efdfc7acae9c2340fb8b7d717",
    "929627965e33e06dc77254d81e8ec1d66cc06590",
    "48a0e52c4632a60cd43423f2f34f10de350bf868",
    "b7fa35fce415b33207a9eba111069ed31ef122a0",
    "828c8a25785e0cedaeb6987256a4dfc3c43b982f",
    "213489680861e4d796173513effac7023312ec2d",
    "430a5ccbbd15782501ca56bb148f3850126277ad",
    "629d19044c43b195498d0a4e002906c54b6186d5",
    "c423217ed1640b4152739f7e5613775d46c25050",
    # Elfutils
    "776a2a252c9875caa1e8b4f41cb8cc12c79611c3",
    "435aa29aff0527d36aafa1b657ae70b9db5f9ea5",
    "95651695473495501fc6b2c4a1cf6a78cfb3cd6a",
    "e0748fb2ba8994c136bcc0b67d3044f09841cf8e",
    "9e632b22e1310fe773edc32ab08a60602f4b2861",
    "271fb6907a6f4705a1c799d925394243eae51d68",
    "328342cd737582216dc7b8b7d558b2a1bf8ea5e8",
    "ae5c1a4481be68fae027910b141354c1d86daa64",
    "e6938018975e45d35dab5fef795fe7344eef7d62",
    "e015bae2ef343b30c890eebb9182a8be13d12ed0",
    "5e8a0751ae0c418a615025d1da10bc84f91b3d97",
    "887d26fa0fd0ae0c5c15e4b885473c4cdc0bf078",
    "e97fe75eac59fc39a6e4f3c4f9f3301835a0315e",
    "8b130e7f1d6a41fb5c64a014c15246ba74b79470",
    "f4f8f79893b18199b38edc3330093a9403c4c737",
}

def json_type(string):
    """
    Argpase type for json args.
    We expect a base dictionary.
    """
    passed_json = json.loads(string)
    if not isinstance(passed_json, dict):
        msg = "%r is not a dict" % string
        raise argparse.ArgumentTypeError(msg)
    return passed_json

def graph_get_color(branch):
    """
    Get the color matching the branch.
    """
    color = {"stable-1.5": "red", "stable-2.0": "green", "master": "blue"}
    return color[branch]


def graph_get_title(branch, benchmark_type):
    """
    Get title for graph based on benchmark type.
    """
    string = {"dummy": "Dummy output", "text": "Text output"}
    return "{} - {}".format(branch, string[benchmark_type])


def get_client():
    """
    Return minio client configured.
    """
    return Minio(
        "obj.internal.efficios.com", access_key="jenkins", secret_key="echo123456"
    )


def get_file(client, prefix, file_name, workdir_name):
    """
    Return the path of the downloaded file.
    Return None on error
    """
    destination = os.path.join(workdir_name, file_name)
    object_name = "{}/{}".format(prefix, file_name)
    try:
        client.fget_object(DEFAULT_BUCKET, object_name, destination)
    except NoSuchKey:
        return None

    return destination


def delete_file(client, prefix, file_name):
    """
    Delete the file on remote.
    """
    object_name = "{}/{}".format(prefix, file_name)
    try:
        client.remove_object(DEFAULT_BUCKET, object_name)
    except ResponseError as err:
        print(err)
    except NoSuchKey:
        pass


def get_git_log(bt_version, cutoff, repo_path):
    """
    Return an ordered (older to newer) list of commits for the bt_version and
    cutoff. WARNING: This changes the git repo HEAD.
    """
    repo = git.Repo(repo_path)
    repo.git.fetch()
    return repo.git.log(
        "{}..origin/{}".format(cutoff, bt_version), "--pretty=format:%H", "--reverse"
    ).split("\n")


def parse_result(result_path):
    """
    Parse the result file. Return a dataset of User time + System time.
    """
    with open(result_path) as result:
        parsed_result = json.load(result)
        return list(
            map(
                add,
                parsed_result["User time (seconds)"],
                parsed_result["System time (seconds)"],
            )
        )


def get_benchmark_results(client, commit, workdir):
    """
    Fetch the benchmark result from a certain commit across all benchmark type.
    """
    results = {}
    benchmark_valid = True
    for b_type in BENCHMARK_TYPES:
        prefix = "/results/benchmarks/babeltrace/{}".format(b_type)
        result_file = get_file(client, prefix, commit, workdir)
        if not result_file:
            """
            Benchmark is either corrupted or not complete.
            """
            return None, benchmark_valid
        results[b_type] = parse_result(result_file)
        if all(i == 0.0 for i in results[b_type]):
            benchmark_valid = False
            print("Invalid benchmark for {}/{}/{}".format(prefix, b_type, commit))
    # The dataset is valid return immediately.
    return results, benchmark_valid


def plot_raw_value(branch, benchmark_type, x_data, y_data, labels, latest_values):
    """
    Plot the graph using the raw value.
    """
    point_x_data = []
    outlier_x_data = []
    point_y_data = []
    outlier_y_data = []
    for pos in range(len(x_data)):
        x = x_data[pos]
        valid_points, outliers = sanitize_dataset(y_data[pos])
        for y in valid_points:
            point_x_data.append(x)
            point_y_data.append(y)
        for y in outliers:
            outlier_x_data.append(x)
            outlier_y_data.append(y)

    plt.plot(
        point_x_data, point_y_data, "o", label=branch, color=graph_get_color(branch)
    )
    plt.plot(outlier_x_data, outlier_y_data, "+", label="outlier", color="black")

    ymax = 1
    if y_data:
        ymin = 0.8 * min([item for sublist in y_data for item in sublist])
        ymax = 1.2 * max([item for sublist in y_data for item in sublist])
    # Put latest of other branches for reference as horizontal line.
    for l_branch, l_result in latest_values.items():
        if not l_result or l_branch == branch:
            continue
        plt.axhline(
            y=l_result,
            label="Latest {}".format(l_branch),
            color=graph_get_color(l_branch),
        )
        if l_result >= ymax:
            ymax = 1.2 * l_result
    ax = plt.gca()
    plt.ylim(ymin=0, ymax=ymax)
    plt.xticks(x_data, labels, rotation=90, family="monospace")
    plt.title(graph_get_title(branch, benchmark_type), fontweight="bold")
    plt.ylabel("User + system time (s)")
    plt.xlabel("Latest commits")
    plt.legend()
    plt.grid(True)

    # Put tick on the right side
    ax.tick_params(labeltop=False, labelright=True)

    plt.tight_layout()
    return


def plot_delta_between_point(
    branch, benchmark_type, x_data, y_data, labels, latest_values
):
    """
    Plot the graph of delta between each sequential commit.
    """
    local_abs_max = 100

    # Transform y_data to a list of  for which the reference is the first
    # element.
    local_y_data = []
    for pos, y in enumerate(y_data):
        if pos == 0:
            local_y_data.append(0.0)
            continue
        local_y_data.append(y - y_data[pos - 1])

    plt.plot(x_data, local_y_data, "o", label=branch, color=graph_get_color(branch))

    # Get max absolute value to align the y axis with zero in the middle.
    if local_y_data:
        local_abs_max = abs(max(local_y_data, key=abs)) * 1.3

    plt.ylim(ymin=local_abs_max * -1, ymax=local_abs_max)

    ax = plt.gca()
    plt.xticks(x_data, labels, rotation=90, family="monospace")
    plt.title(
        graph_get_title(branch, benchmark_type) + " Delta to previous commit",
        fontweight="bold",
    )
    plt.ylabel("Seconds")
    plt.xlabel("Latest commits")
    plt.legend()
    plt.grid(True)

    # Put tick on the right side
    ax.tick_params(labeltop=False, labelright=True)

    plt.tight_layout()
    return


def plot_ratio(branch, benchmark_type, x_data, y_data, labels, latest_values):
    """
    Plot the graph using a ratio using first point as reference (0%).
    """
    reference = 0.01
    y_abs_max = 100

    if y_data:
        reference = y_data[0]

    # Transform y_data to a list of ratio for which the reference is the first
    # element.
    local_y_data = list(map(lambda y: ((y / reference) - 1.0) * 100, y_data))

    plt.plot(x_data, local_y_data, "o", label=branch, color=graph_get_color(branch))

    # Put latest of other branches for reference as horizontal line.
    for l_branch, l_result in latest_values.items():
        if not l_result or l_branch == branch:
            continue
        ratio_l_result = ((l_result / reference) - 1.0) * 100.0
        print(
            "branch {} branch {} value {} l_result {} reference {}".format(
                branch, l_branch, ratio_l_result, l_result, reference
            )
        )
        plt.axhline(
            y=ratio_l_result,
            label="Latest {}".format(l_branch),
            color=graph_get_color(l_branch),
        )

    # Draw the reference line.
    plt.axhline(y=0, label="Reference (leftmost point)", linestyle="-", color="Black")

    # Get max absolute value to align the y axis with zero in the middle.
    if local_y_data:
        local_abs_max = abs(max(local_y_data, key=abs)) * 1.3
        if y_abs_max > 100:
            y_abs_max = local_abs_max

    plt.ylim(ymin=y_abs_max * -1, ymax=y_abs_max)

    ax = plt.gca()
    percent_formatter = PercentFormatter()
    ax.yaxis.set_major_formatter(percent_formatter)
    ax.yaxis.set_minor_formatter(percent_formatter)
    plt.xticks(x_data, labels, rotation=90, family="monospace")
    plt.title(graph_get_title(branch, benchmark_type), fontweight="bold")
    plt.ylabel("Ratio")
    plt.xlabel("Latest commits")
    plt.legend()
    plt.grid(True)

    # Put tick on the right side
    ax.tick_params(labeltop=False, labelright=True)

    plt.tight_layout()
    return


def generate_graph(branches, report_name, git_path):

    # The PDF document
    pdf_pages = PdfPages(report_name)

    client = get_client()
    branch_results = dict()

    # Fetch the results for each branch.
    for branch, cutoff in branches.items():
        commits = get_git_log(branch, cutoff, git_path)
        results = []
        with tempfile.TemporaryDirectory() as workdir:
            for commit in commits:
                b_results, valid = get_benchmark_results(client, commit, workdir)
                if not b_results or not valid:
                    continue
                results.append((commit, b_results))
        branch_results[branch] = results

    for b_type in BENCHMARK_TYPES:
        latest_values = {}
        max_len = 0

        # Find the maximum size for a series inside our series dataset.
        # This is used later to compute the size of the actual plot (pdf).
        # While there gather the comparison value used to draw comparison line
        # between branches.
        for branch, results in branch_results.items():
            max_len = max([max_len, len(results)])
            if results:
                latest_values[branch] = mean(
                    sanitize_dataset(results[-1][1][b_type])[0]
                )
            else:
                latest_values[branch] = None

        for branch, results in branch_results.items():
            # Create a figure instance
            if max_len and max_len > 10:
                width = 0.16 * max_len
            else:
                width = 11.69

            x_data = list(range(len(results)))
            y_data = [c[1][b_type] for c in results]
            labels = [c[0][:8] for c in results]

            fig = plt.figure(figsize=(width, 8.27), dpi=100)
            plot_raw_value(branch, b_type, x_data, y_data, labels, latest_values)
            pdf_pages.savefig(fig)

            # Use the mean of each sanitize dataset here, we do not care for
            # variance for ratio. At least not yet.
            y_data = [mean(sanitize_dataset(c[1][b_type])[0]) for c in results]
            fig = plt.figure(figsize=(width, 8.27), dpi=100)
            plot_ratio(branch, b_type, x_data, y_data, labels, latest_values)
            pdf_pages.savefig(fig)

            fig = plt.figure(figsize=(width, 8.27), dpi=100)
            plot_delta_between_point(
                branch, b_type, x_data, y_data, labels, latest_values
            )
            pdf_pages.savefig(fig)

    pdf_pages.close()


def launch_jobs(branches, git_path, wait_for_completion, debug, force):
    """
    Lauch jobs for all missing results.
    """
    client = get_client()
    commits_to_test = set()
    for branch, cutoff in branches.items():
        commits = [x for x in get_git_log(branch, cutoff, git_path) if x not in invalid_commits]
        with tempfile.TemporaryDirectory() as workdir:
            for commit in commits:
                b_results = get_benchmark_results(client, commit, workdir)[0]
                if b_results and not force:
                    continue
                commits_to_test.add(commit)
    for index, commit in enumerate(commits_to_test):
        print("Job {}/{}".format(index+1, len(commits_to_test)))
        lava_submit.submit(
            commit, wait_for_completion=wait_for_completion, debug=debug
        )


def main():
    """
    Parse arguments and execute as needed.
    """
    bt_branches = {
        "master": "31976fe2d70a8b6b7f8b31b9e0b3bc004d415575",
        "stable-2.0": "07f585356018b4ddfbd0e09c49a14e38977c6973",
        "stable-1.5": "49e98b837a5667130e0d1e062a6bd7985c7c4582",
    }

    parser = argparse.ArgumentParser(description="Babeltrace benchmark utility")
    parser.add_argument(
        "--generate-jobs", action="store_true", help="Generate and send jobs"
    )
    parser.add_argument(
        "--force-jobs", action="store_true", help="Force the queueing of jobs to lava"
    )
    parser.add_argument(
        "--do-not-wait-on-completion",
        action="store_true",
        default=False,
        help="Wait for the completion of each jobs sent. This is useful"
        "for the ci. Otherwise we could end up spaming the lava instance.",
    )
    parser.add_argument(
        "--generate-report",
        action="store_true",
        help="Generate graphs and save them to pdf",
    )
    parser.add_argument(
        "--report-name", default="report.pdf", help="The name of the pdf report."
    )
    parser.add_argument(
        "--debug", action="store_true", default=False, help="Do not send jobs to lava."
    )
    parser.add_argument(
        "--repo-path", help="The location of the git repo to use.", required=True
    )
    parser.add_argument(
        "--overwrite-branches-cutoff",
        help="A dictionary of the form {"
        "'branch_name': 'commit_hash_cutoff',...}. Allow custom graphing and"
        "jobs generation.",
        required=False, type=json_type
    )

    args = parser.parse_args()

    if args.overwrite_branches_cutoff:
        bt_branches = args.overwrite_branches_cutoff

    if not os.path.exists(args.repo_path):
        print("Repository location does not exists.")
        return 1

    if args.generate_jobs:
        print("Launching jobs for:")

        for branch, cutoff in bt_branches.items():
            print("\t Branch {} with cutoff {}".format(branch, cutoff))

        launch_jobs(
            bt_branches,
            args.repo_path,
            not args.do_not_wait_on_completion,
            args.debug,
            args.force_jobs,
        )

    if args.generate_report:
        print("Generating pdf report ({}) for:".format(args.report_name))
        for branch, cutoff in bt_branches.items():
            print("\t Branch {} with cutoff {}".format(branch, cutoff))
        generate_graph(bt_branches, args.report_name, args.repo_path)

    return 0


def sanitize_dataset(dataset):
    """
    Use IRQ 1.5 [1] to remove outlier from the dataset. This is useful to get a
    representative mean without outlier in it.
    [1] https://en.wikipedia.org/wiki/Interquartile_range#Outliers
    """
    sorted_data = sorted(dataset)
    q1, q3 = numpy.percentile(sorted_data, [25, 75])
    iqr = q3 - q1
    lower_bound = q1 - (1.5 * iqr)
    upper_bound = q3 + (1.5 * iqr)
    new_dataset = []
    outliers = []
    for i in dataset:
        if lower_bound <= i <= upper_bound:
            new_dataset.append(i)
        else:
            outliers.append(i)
    return new_dataset, outliers


if __name__ == "__main__":
    sys.exit(main())
Commit	Line	Data
5c65bbc2 JR	1	#!/usr/bin/python3
	2	# Copyright (C) 2019 - Jonathan Rajotte <jonathan.rajotte-julien@efficios.com>
	3	#
	4	# This program is free software: you can redistribute it and/or modify
	5	# it under the terms of the GNU General Public License as published by
	6	# the Free Software Foundation, either version 3 of the License, or
	7	# (at your option) any later version.
	8	#
	9	# This program is distributed in the hope that it will be useful,
	10	# but WITHOUT ANY WARRANTY; without even the implied warranty of
	11	# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	12	# GNU General Public License for more details.
	13	#
	14	# You should have received a copy of the GNU General Public License
	15	# along with this program. If not, see <http://www.gnu.org/licenses/>.
	16
	17	import json
	18	import os
	19	import tempfile
	20	from statistics import mean
	21	import argparse
	22	import sys
	23	from operator import add
	24
	25	import matplotlib.pyplot as plt
	26	from matplotlib.backends.backend_pdf import PdfPages
	27	from matplotlib.ticker import PercentFormatter
	28
	29	import git
	30	import numpy
	31	import lava_submit
	32
	33	from minio import Minio
	34	from minio.error import NoSuchKey
	35	from minio.error import ResponseError
	36
	37
	38	BENCHMARK_TYPES = ["dummy", "text"]
	39	DEFAULT_BUCKET = "lava"
	40
e085717c	41	invalid_commits = {
c19fa307 KS	42	"ec9a9794af488a9accce7708a8b0d8188b498789", # Does not build
	43	"8c99128c640cbce71fb8a6caa15e4c672252b662", # Block on configure
	44	"f3847c753f1b4f12353c38d97b0577d9993d19fb", # Does not build
	45	"e0111295f17ddfcc33ec771a8deac505473a06ad", # Does not build
	46	"d0d4e0ed487ea23aaf0d023513c0a4d86901b79b", # Does not build
	47	"c24f7ab4dd9edeb5e50b0070fd9d9e8691057dde", # Does not build
	48	"ce67f5614a4db3b2de4d887eca52135b439b4937", # Does not build
	49	"80aff5efc66679fd934cef433c0e698694748385", # Does not build
	50	"f4f11e84942d36fcc8a597d226928bce2ccac4b3", # Does not build
	51	"ae466a6e1b856d96cf5112a371b4df2b732503ec", # Does not build
b2f18af4 KS	52	"ade5c95e2a4f90f839f222fc1a66175b3b199922", # Configuration fails
	53	"30341532906d62808e9d66fb115f5edb4e6f5706", # Configuration fails
	54	"006c5ffb42f32e802136e3c27a63accb59b4d6c4", # Does not build
	55	"88488ff5bdcd7679ff1f04fe6cff0d24b4f8fc0c", # Does not build
	56	# Other errors
	57	"7c7301d5827bd10ec7c34da7ffc5fe74e5047d38",
	58	"a0df3abf88616cb0799f87f4eb57c54268e63448",
	59	"b7045dd71bc0524ad6b5db96df365e98e237d395",
	60	"cf7b259eaa602abcef308d2b5dd8e6c9ee995d8b",
	61	"90a55a4ef47cac7b568f5f0a8a78bd760f82d23c",
	62	"baa5e3aa82a82c9d0fa59e3c586c0168bb5dc267",
	63	"af9f8da7ba4a9b16fc36d637b8c3a0c7a8774da2",
	64	"fe748379adbd385efdfc7acae9c2340fb8b7d717",
	65	"baa5e3aa82a82c9d0fa59e3c586c0168bb5dc267",
	66	"af9f8da7ba4a9b16fc36d637b8c3a0c7a8774da2",
	67	"fe748379adbd385efdfc7acae9c2340fb8b7d717",
	68	"929627965e33e06dc77254d81e8ec1d66cc06590",
	69	"48a0e52c4632a60cd43423f2f34f10de350bf868",
	70	"b7fa35fce415b33207a9eba111069ed31ef122a0",
	71	"828c8a25785e0cedaeb6987256a4dfc3c43b982f",
	72	"213489680861e4d796173513effac7023312ec2d",
	73	"430a5ccbbd15782501ca56bb148f3850126277ad",
	74	"629d19044c43b195498d0a4e002906c54b6186d5",
	75	"c423217ed1640b4152739f7e5613775d46c25050",
	76	# Elfutils
	77	"776a2a252c9875caa1e8b4f41cb8cc12c79611c3",
	78	"435aa29aff0527d36aafa1b657ae70b9db5f9ea5",
	79	"95651695473495501fc6b2c4a1cf6a78cfb3cd6a",
	80	"e0748fb2ba8994c136bcc0b67d3044f09841cf8e",
	81	"9e632b22e1310fe773edc32ab08a60602f4b2861",
	82	"271fb6907a6f4705a1c799d925394243eae51d68",
	83	"328342cd737582216dc7b8b7d558b2a1bf8ea5e8",
	84	"ae5c1a4481be68fae027910b141354c1d86daa64",
	85	"e6938018975e45d35dab5fef795fe7344eef7d62",
	86	"e015bae2ef343b30c890eebb9182a8be13d12ed0",
	87	"5e8a0751ae0c418a615025d1da10bc84f91b3d97",
	88	"887d26fa0fd0ae0c5c15e4b885473c4cdc0bf078",
	89	"e97fe75eac59fc39a6e4f3c4f9f3301835a0315e",
	90	"8b130e7f1d6a41fb5c64a014c15246ba74b79470",
	91	"f4f8f79893b18199b38edc3330093a9403c4c737",
c19fa307	92	}
5c65bbc2	93
cf595cda JR	94	def json_type(string):
	95	"""
	96	Argpase type for json args.
	97	We expect a base dictionary.
	98	"""
	99	passed_json = json.loads(string)
	100	if not isinstance(passed_json, dict):
	101	msg = "%r is not a dict" % string
	102	raise argparse.ArgumentTypeError(msg)
	103	return passed_json
	104
5c65bbc2 JR	105	def graph_get_color(branch):
	106	"""
	107	Get the color matching the branch.
	108	"""
	109	color = {"stable-1.5": "red", "stable-2.0": "green", "master": "blue"}
	110	return color[branch]
	111
	112
	113	def graph_get_title(branch, benchmark_type):
	114	"""
	115	Get title for graph based on benchmark type.
	116	"""
	117	string = {"dummy": "Dummy output", "text": "Text output"}
	118	return "{} - {}".format(branch, string[benchmark_type])
	119
	120
	121	def get_client():
	122	"""
	123	Return minio client configured.
	124	"""
	125	return Minio(
	126	"obj.internal.efficios.com", access_key="jenkins", secret_key="echo123456"
	127	)
	128
	129
	130	def get_file(client, prefix, file_name, workdir_name):
	131	"""
	132	Return the path of the downloaded file.
	133	Return None on error
	134	"""
	135	destination = os.path.join(workdir_name, file_name)
	136	object_name = "{}/{}".format(prefix, file_name)
	137	try:
	138	client.fget_object(DEFAULT_BUCKET, object_name, destination)
	139	except NoSuchKey:
	140	return None
	141
	142	return destination
	143
	144
	145	def delete_file(client, prefix, file_name):
	146	"""
	147	Delete the file on remote.
	148	"""
	149	object_name = "{}/{}".format(prefix, file_name)
	150	try:
	151	client.remove_object(DEFAULT_BUCKET, object_name)
	152	except ResponseError as err:
	153	print(err)
	154	except NoSuchKey:
	155	pass
	156
	157
	158	def get_git_log(bt_version, cutoff, repo_path):
	159	"""
	160	Return an ordered (older to newer) list of commits for the bt_version and
	161	cutoff. WARNING: This changes the git repo HEAD.
	162	"""
	163	repo = git.Repo(repo_path)
	164	repo.git.fetch()
	165	return repo.git.log(
	166	"{}..origin/{}".format(cutoff, bt_version), "--pretty=format:%H", "--reverse"
	167	).split("\n")
	168
169
170	def parse_result(result_path):
171	"""
172	Parse the result file. Return a dataset of User time + System time.
173	"""
174	with open(result_path) as result:
175	parsed_result = json.load(result)
176	return list(
177	map(
178	add,
179	parsed_result["User time (seconds)"],
180	parsed_result["System time (seconds)"],
181	)
182	)
183
184
185	def get_benchmark_results(client, commit, workdir):
186	"""
187	Fetch the benchmark result from a certain commit across all benchmark type.
188	"""
189	results = {}
190	benchmark_valid = True
191	for b_type in BENCHMARK_TYPES:
cdace203	192	prefix = "/results/benchmarks/babeltrace/{}".format(b_type)
5c65bbc2 JR	193	result_file = get_file(client, prefix, commit, workdir)
	194	if not result_file:
	195	"""
	196	Benchmark is either corrupted or not complete.
	197	"""
	198	return None, benchmark_valid
	199	results[b_type] = parse_result(result_file)
	200	if all(i == 0.0 for i in results[b_type]):
	201	benchmark_valid = False
	202	print("Invalid benchmark for {}/{}/{}".format(prefix, b_type, commit))
	203	# The dataset is valid return immediately.
	204	return results, benchmark_valid
	205
	206
	207	def plot_raw_value(branch, benchmark_type, x_data, y_data, labels, latest_values):
	208	"""
	209	Plot the graph using the raw value.
	210	"""
	211	point_x_data = []
	212	outlier_x_data = []
	213	point_y_data = []
	214	outlier_y_data = []
	215	for pos in range(len(x_data)):
	216	x = x_data[pos]
	217	valid_points, outliers = sanitize_dataset(y_data[pos])
	218	for y in valid_points:
	219	point_x_data.append(x)
	220	point_y_data.append(y)
	221	for y in outliers:
	222	outlier_x_data.append(x)
	223	outlier_y_data.append(y)
	224
	225	plt.plot(
	226	point_x_data, point_y_data, "o", label=branch, color=graph_get_color(branch)
	227	)
	228	plt.plot(outlier_x_data, outlier_y_data, "+", label="outlier", color="black")
	229
5c65bbc2 JR	230	ymax = 1
	231	if y_data:
	232	ymin = 0.8 * min([item for sublist in y_data for item in sublist])
	233	ymax = 1.2 * max([item for sublist in y_data for item in sublist])
	234	# Put latest of other branches for reference as horizontal line.
	235	for l_branch, l_result in latest_values.items():
	236	if not l_result or l_branch == branch:
	237	continue
	238	plt.axhline(
	239	y=l_result,
	240	label="Latest {}".format(l_branch),
	241	color=graph_get_color(l_branch),
	242	)
5c65bbc2 JR	243	if l_result >= ymax:
5c65bbc2 JR	244	ymax = 1.2 * l_result
056f7519	245	ax = plt.gca()
925d7893	246	plt.ylim(ymin=0, ymax=ymax)
5c65bbc2 JR	247	plt.xticks(x_data, labels, rotation=90, family="monospace")
	248	plt.title(graph_get_title(branch, benchmark_type), fontweight="bold")
	249	plt.ylabel("User + system time (s)")
	250	plt.xlabel("Latest commits")
	251	plt.legend()
526aab11	252	plt.grid(True)
5c65bbc2	253
056f7519 JR	254	# Put tick on the right side
	255	ax.tick_params(labeltop=False, labelright=True)
	256
5c65bbc2 JR	257	plt.tight_layout()
	258	return
	259
09de7b53 JR	260
	261	def plot_delta_between_point(
	262	branch, benchmark_type, x_data, y_data, labels, latest_values
	263	):
20defd5e JR	264	"""
	265	Plot the graph of delta between each sequential commit.
	266	"""
	267	local_abs_max = 100
	268
	269	# Transform y_data to a list of for which the reference is the first
	270	# element.
	271	local_y_data = []
	272	for pos, y in enumerate(y_data):
	273	if pos == 0:
	274	local_y_data.append(0.0)
	275	continue
	276	local_y_data.append(y - y_data[pos - 1])
	277
	278	plt.plot(x_data, local_y_data, "o", label=branch, color=graph_get_color(branch))
	279
	280	# Get max absolute value to align the y axis with zero in the middle.
	281	if local_y_data:
	282	local_abs_max = abs(max(local_y_data, key=abs)) * 1.3
	283
	284	plt.ylim(ymin=local_abs_max * -1, ymax=local_abs_max)
	285
	286	ax = plt.gca()
	287	plt.xticks(x_data, labels, rotation=90, family="monospace")
09de7b53 JR	288	plt.title(
	289	graph_get_title(branch, benchmark_type) + " Delta to previous commit",
	290	fontweight="bold",
	291	)
20defd5e JR	292	plt.ylabel("Seconds")
	293	plt.xlabel("Latest commits")
	294	plt.legend()
526aab11	295	plt.grid(True)
20defd5e JR	296
	297	# Put tick on the right side
	298	ax.tick_params(labeltop=False, labelright=True)
	299
	300	plt.tight_layout()
	301	return
5c65bbc2	302
09de7b53	303
5c65bbc2 JR	304	def plot_ratio(branch, benchmark_type, x_data, y_data, labels, latest_values):
	305	"""
	306	Plot the graph using a ratio using first point as reference (0%).
	307	"""
	308	reference = 0.01
	309	y_abs_max = 100
	310
	311	if y_data:
	312	reference = y_data[0]
	313
	314	# Transform y_data to a list of ratio for which the reference is the first
	315	# element.
	316	local_y_data = list(map(lambda y: ((y / reference) - 1.0) * 100, y_data))
	317
	318	plt.plot(x_data, local_y_data, "o", label=branch, color=graph_get_color(branch))
	319
	320	# Put latest of other branches for reference as horizontal line.
	321	for l_branch, l_result in latest_values.items():
	322	if not l_result or l_branch == branch:
	323	continue
	324	ratio_l_result = ((l_result / reference) - 1.0) * 100.0
	325	print(
	326	"branch {} branch {} value {} l_result {} reference {}".format(
	327	branch, l_branch, ratio_l_result, l_result, reference
	328	)
	329	)
	330	plt.axhline(
	331	y=ratio_l_result,
	332	label="Latest {}".format(l_branch),
	333	color=graph_get_color(l_branch),
	334	)
	335
	336	# Draw the reference line.
	337	plt.axhline(y=0, label="Reference (leftmost point)", linestyle="-", color="Black")
	338
	339	# Get max absolute value to align the y axis with zero in the middle.
	340	if local_y_data:
	341	local_abs_max = abs(max(local_y_data, key=abs)) * 1.3
	342	if y_abs_max > 100:
	343	y_abs_max = local_abs_max
	344
	345	plt.ylim(ymin=y_abs_max * -1, ymax=y_abs_max)
	346
	347	ax = plt.gca()
	348	percent_formatter = PercentFormatter()
	349	ax.yaxis.set_major_formatter(percent_formatter)
	350	ax.yaxis.set_minor_formatter(percent_formatter)
	351	plt.xticks(x_data, labels, rotation=90, family="monospace")
	352	plt.title(graph_get_title(branch, benchmark_type), fontweight="bold")
	353	plt.ylabel("Ratio")
	354	plt.xlabel("Latest commits")
	355	plt.legend()
526aab11	356	plt.grid(True)
5c65bbc2	357
056f7519 JR	358	# Put tick on the right side
	359	ax.tick_params(labeltop=False, labelright=True)
	360
5c65bbc2 JR	361	plt.tight_layout()
	362	return
	363
09de7b53	364
5c65bbc2 JR	365	def generate_graph(branches, report_name, git_path):
	366
	367	# The PDF document
	368	pdf_pages = PdfPages(report_name)
	369
	370	client = get_client()
	371	branch_results = dict()
	372
	373	# Fetch the results for each branch.
	374	for branch, cutoff in branches.items():
	375	commits = get_git_log(branch, cutoff, git_path)
	376	results = []
	377	with tempfile.TemporaryDirectory() as workdir:
	378	for commit in commits:
	379	b_results, valid = get_benchmark_results(client, commit, workdir)
	380	if not b_results or not valid:
	381	continue
	382	results.append((commit, b_results))
	383	branch_results[branch] = results
	384
	385	for b_type in BENCHMARK_TYPES:
	386	latest_values = {}
	387	max_len = 0
	388
	389	# Find the maximum size for a series inside our series dataset.
	390	# This is used later to compute the size of the actual plot (pdf).
	391	# While there gather the comparison value used to draw comparison line
	392	# between branches.
	393	for branch, results in branch_results.items():
	394	max_len = max([max_len, len(results)])
	395	if results:
	396	latest_values[branch] = mean(
	397	sanitize_dataset(results[-1][1][b_type])[0]
	398	)
	399	else:
	400	latest_values[branch] = None
	401
	402	for branch, results in branch_results.items():
	403	# Create a figure instance
	404	if max_len and max_len > 10:
	405	width = 0.16 * max_len
	406	else:
	407	width = 11.69
	408
	409	x_data = list(range(len(results)))
	410	y_data = [c[1][b_type] for c in results]
	411	labels = [c[0][:8] for c in results]
	412
	413	fig = plt.figure(figsize=(width, 8.27), dpi=100)
	414	plot_raw_value(branch, b_type, x_data, y_data, labels, latest_values)
	415	pdf_pages.savefig(fig)
	416
5c65bbc2 JR	417	# Use the mean of each sanitize dataset here, we do not care for
	418	# variance for ratio. At least not yet.
	419	y_data = [mean(sanitize_dataset(c[1][b_type])[0]) for c in results]
20defd5e	420	fig = plt.figure(figsize=(width, 8.27), dpi=100)
5c65bbc2 JR	421	plot_ratio(branch, b_type, x_data, y_data, labels, latest_values)
	422	pdf_pages.savefig(fig)
	423
20defd5e	424	fig = plt.figure(figsize=(width, 8.27), dpi=100)
09de7b53 JR	425	plot_delta_between_point(
	426	branch, b_type, x_data, y_data, labels, latest_values
	427	)
20defd5e JR	428	pdf_pages.savefig(fig)
20defd5e JR	429
5c65bbc2 JR	430	pdf_pages.close()
	431
	432
d373c66e	433	def launch_jobs(branches, git_path, wait_for_completion, debug, force):
5c65bbc2 JR	434	"""
	435	Lauch jobs for all missing results.
	436	"""
	437	client = get_client()
73fe8ab4	438	commits_to_test = set()
5c65bbc2	439	for branch, cutoff in branches.items():
73fe8ab4	440	commits = [x for x in get_git_log(branch, cutoff, git_path) if x not in invalid_commits]
5c65bbc2 JR	441	with tempfile.TemporaryDirectory() as workdir:
	442	for commit in commits:
	443	b_results = get_benchmark_results(client, commit, workdir)[0]
d373c66e	444	if b_results and not force:
5c65bbc2	445	continue
73fe8ab4 KS	446	commits_to_test.add(commit)
	447	for index, commit in enumerate(commits_to_test):
	448	print("Job {}/{}".format(index+1, len(commits_to_test)))
	449	lava_submit.submit(
	450	commit, wait_for_completion=wait_for_completion, debug=debug
	451	)
5c65bbc2 JR	452
	453
	454	def main():
	455	"""
	456	Parse arguments and execute as needed.
	457	"""
	458	bt_branches = {
	459	"master": "31976fe2d70a8b6b7f8b31b9e0b3bc004d415575",
	460	"stable-2.0": "07f585356018b4ddfbd0e09c49a14e38977c6973",
	461	"stable-1.5": "49e98b837a5667130e0d1e062a6bd7985c7c4582",
	462	}
	463
	464	parser = argparse.ArgumentParser(description="Babeltrace benchmark utility")
	465	parser.add_argument(
	466	"--generate-jobs", action="store_true", help="Generate and send jobs"
	467	)
d373c66e JR	468	parser.add_argument(
	469	"--force-jobs", action="store_true", help="Force the queueing of jobs to lava"
	470	)
5c65bbc2 JR	471	parser.add_argument(
	472	"--do-not-wait-on-completion",
	473	action="store_true",
	474	default=False,
	475	help="Wait for the completion of each jobs sent. This is useful"
	476	"for the ci. Otherwise we could end up spaming the lava instance.",
	477	)
	478	parser.add_argument(
	479	"--generate-report",
	480	action="store_true",
	481	help="Generate graphs and save them to pdf",
	482	)
	483	parser.add_argument(
	484	"--report-name", default="report.pdf", help="The name of the pdf report."
	485	)
	486	parser.add_argument(
	487	"--debug", action="store_true", default=False, help="Do not send jobs to lava."
	488	)
	489	parser.add_argument(
	490	"--repo-path", help="The location of the git repo to use.", required=True
	491	)
cf595cda JR	492	parser.add_argument(
	493	"--overwrite-branches-cutoff",
	494	help="A dictionary of the form {"
	495	"'branch_name': 'commit_hash_cutoff',...}. Allow custom graphing and"
	496	"jobs generation.",
	497	required=False, type=json_type
	498	)
5c65bbc2 JR	499
	500	args = parser.parse_args()
	501
cf595cda JR	502	if args.overwrite_branches_cutoff:
	503	bt_branches = args.overwrite_branches_cutoff
	504
5c65bbc2 JR	505	if not os.path.exists(args.repo_path):
	506	print("Repository location does not exists.")
	507	return 1
	508
	509	if args.generate_jobs:
	510	print("Launching jobs for:")
d373c66e	511
5c65bbc2 JR	512	for branch, cutoff in bt_branches.items():
5c65bbc2 JR	513	print("\t Branch {} with cutoff {}".format(branch, cutoff))
d373c66e	514
5c65bbc2	515	launch_jobs(
d373c66e JR	516	bt_branches,
	517	args.repo_path,
	518	not args.do_not_wait_on_completion,
	519	args.debug,
	520	args.force_jobs,
5c65bbc2 JR	521	)
	522
	523	if args.generate_report:
	524	print("Generating pdf report ({}) for:".format(args.report_name))
	525	for branch, cutoff in bt_branches.items():
	526	print("\t Branch {} with cutoff {}".format(branch, cutoff))
	527	generate_graph(bt_branches, args.report_name, args.repo_path)
	528
	529	return 0
	530
	531
	532	def sanitize_dataset(dataset):
	533	"""
	534	Use IRQ 1.5 [1] to remove outlier from the dataset. This is useful to get a
	535	representative mean without outlier in it.
	536	[1] https://en.wikipedia.org/wiki/Interquartile_range#Outliers
	537	"""
	538	sorted_data = sorted(dataset)
	539	q1, q3 = numpy.percentile(sorted_data, [25, 75])
	540	iqr = q3 - q1
	541	lower_bound = q1 - (1.5 * iqr)
	542	upper_bound = q3 + (1.5 * iqr)
	543	new_dataset = []
	544	outliers = []
	545	for i in dataset:
	546	if lower_bound <= i <= upper_bound:
	547	new_dataset.append(i)
	548	else:
	549	outliers.append(i)
	550	return new_dataset, outliers
	551
	552
	553	if __name__ == "__main__":
	554	sys.exit(main())