bt benchmark: Normalize indentation around invalid_commits
[lttng-ci.git] / scripts / babeltrace-benchmark / benchmark.py
CommitLineData
5c65bbc2
JR
1#!/usr/bin/python3
2# Copyright (C) 2019 - Jonathan Rajotte <jonathan.rajotte-julien@efficios.com>
3#
4# This program is free software: you can redistribute it and/or modify
5# it under the terms of the GNU General Public License as published by
6# the Free Software Foundation, either version 3 of the License, or
7# (at your option) any later version.
8#
9# This program is distributed in the hope that it will be useful,
10# but WITHOUT ANY WARRANTY; without even the implied warranty of
11# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12# GNU General Public License for more details.
13#
14# You should have received a copy of the GNU General Public License
15# along with this program. If not, see <http://www.gnu.org/licenses/>.
16
17import json
18import os
19import tempfile
20from statistics import mean
21import argparse
22import sys
23from operator import add
24
25import matplotlib.pyplot as plt
26from matplotlib.backends.backend_pdf import PdfPages
27from matplotlib.ticker import PercentFormatter
28
29import git
30import numpy
31import lava_submit
32
33from minio import Minio
34from minio.error import NoSuchKey
35from minio.error import ResponseError
36
37
38BENCHMARK_TYPES = ["dummy", "text"]
39DEFAULT_BUCKET = "lava"
40
e085717c 41invalid_commits = {
c19fa307
KS
42 "ec9a9794af488a9accce7708a8b0d8188b498789", # Does not build
43 "8c99128c640cbce71fb8a6caa15e4c672252b662", # Block on configure
44 "f3847c753f1b4f12353c38d97b0577d9993d19fb", # Does not build
45 "e0111295f17ddfcc33ec771a8deac505473a06ad", # Does not build
46 "d0d4e0ed487ea23aaf0d023513c0a4d86901b79b", # Does not build
47 "c24f7ab4dd9edeb5e50b0070fd9d9e8691057dde", # Does not build
48 "ce67f5614a4db3b2de4d887eca52135b439b4937", # Does not build
49 "80aff5efc66679fd934cef433c0e698694748385", # Does not build
50 "f4f11e84942d36fcc8a597d226928bce2ccac4b3", # Does not build
51 "ae466a6e1b856d96cf5112a371b4df2b732503ec", # Does not build
52}
5c65bbc2 53
cf595cda
JR
54def json_type(string):
55 """
56 Argpase type for json args.
57 We expect a base dictionary.
58 """
59 passed_json = json.loads(string)
60 if not isinstance(passed_json, dict):
61 msg = "%r is not a dict" % string
62 raise argparse.ArgumentTypeError(msg)
63 return passed_json
64
5c65bbc2
JR
65def graph_get_color(branch):
66 """
67 Get the color matching the branch.
68 """
69 color = {"stable-1.5": "red", "stable-2.0": "green", "master": "blue"}
70 return color[branch]
71
72
73def graph_get_title(branch, benchmark_type):
74 """
75 Get title for graph based on benchmark type.
76 """
77 string = {"dummy": "Dummy output", "text": "Text output"}
78 return "{} - {}".format(branch, string[benchmark_type])
79
80
81def get_client():
82 """
83 Return minio client configured.
84 """
85 return Minio(
86 "obj.internal.efficios.com", access_key="jenkins", secret_key="echo123456"
87 )
88
89
90def get_file(client, prefix, file_name, workdir_name):
91 """
92 Return the path of the downloaded file.
93 Return None on error
94 """
95 destination = os.path.join(workdir_name, file_name)
96 object_name = "{}/{}".format(prefix, file_name)
97 try:
98 client.fget_object(DEFAULT_BUCKET, object_name, destination)
99 except NoSuchKey:
100 return None
101
102 return destination
103
104
105def delete_file(client, prefix, file_name):
106 """
107 Delete the file on remote.
108 """
109 object_name = "{}/{}".format(prefix, file_name)
110 try:
111 client.remove_object(DEFAULT_BUCKET, object_name)
112 except ResponseError as err:
113 print(err)
114 except NoSuchKey:
115 pass
116
117
118def get_git_log(bt_version, cutoff, repo_path):
119 """
120 Return an ordered (older to newer) list of commits for the bt_version and
121 cutoff. WARNING: This changes the git repo HEAD.
122 """
123 repo = git.Repo(repo_path)
124 repo.git.fetch()
125 return repo.git.log(
126 "{}..origin/{}".format(cutoff, bt_version), "--pretty=format:%H", "--reverse"
127 ).split("\n")
128
129
130def parse_result(result_path):
131 """
132 Parse the result file. Return a dataset of User time + System time.
133 """
134 with open(result_path) as result:
135 parsed_result = json.load(result)
136 return list(
137 map(
138 add,
139 parsed_result["User time (seconds)"],
140 parsed_result["System time (seconds)"],
141 )
142 )
143
144
145def get_benchmark_results(client, commit, workdir):
146 """
147 Fetch the benchmark result from a certain commit across all benchmark type.
148 """
149 results = {}
150 benchmark_valid = True
151 for b_type in BENCHMARK_TYPES:
cdace203 152 prefix = "/results/benchmarks/babeltrace/{}".format(b_type)
5c65bbc2
JR
153 result_file = get_file(client, prefix, commit, workdir)
154 if not result_file:
155 """
156 Benchmark is either corrupted or not complete.
157 """
158 return None, benchmark_valid
159 results[b_type] = parse_result(result_file)
160 if all(i == 0.0 for i in results[b_type]):
161 benchmark_valid = False
162 print("Invalid benchmark for {}/{}/{}".format(prefix, b_type, commit))
163 # The dataset is valid return immediately.
164 return results, benchmark_valid
165
166
167def plot_raw_value(branch, benchmark_type, x_data, y_data, labels, latest_values):
168 """
169 Plot the graph using the raw value.
170 """
171 point_x_data = []
172 outlier_x_data = []
173 point_y_data = []
174 outlier_y_data = []
175 for pos in range(len(x_data)):
176 x = x_data[pos]
177 valid_points, outliers = sanitize_dataset(y_data[pos])
178 for y in valid_points:
179 point_x_data.append(x)
180 point_y_data.append(y)
181 for y in outliers:
182 outlier_x_data.append(x)
183 outlier_y_data.append(y)
184
185 plt.plot(
186 point_x_data, point_y_data, "o", label=branch, color=graph_get_color(branch)
187 )
188 plt.plot(outlier_x_data, outlier_y_data, "+", label="outlier", color="black")
189
5c65bbc2
JR
190 ymax = 1
191 if y_data:
192 ymin = 0.8 * min([item for sublist in y_data for item in sublist])
193 ymax = 1.2 * max([item for sublist in y_data for item in sublist])
194 # Put latest of other branches for reference as horizontal line.
195 for l_branch, l_result in latest_values.items():
196 if not l_result or l_branch == branch:
197 continue
198 plt.axhline(
199 y=l_result,
200 label="Latest {}".format(l_branch),
201 color=graph_get_color(l_branch),
202 )
5c65bbc2
JR
203 if l_result >= ymax:
204 ymax = 1.2 * l_result
056f7519 205 ax = plt.gca()
925d7893 206 plt.ylim(ymin=0, ymax=ymax)
5c65bbc2
JR
207 plt.xticks(x_data, labels, rotation=90, family="monospace")
208 plt.title(graph_get_title(branch, benchmark_type), fontweight="bold")
209 plt.ylabel("User + system time (s)")
210 plt.xlabel("Latest commits")
211 plt.legend()
526aab11 212 plt.grid(True)
5c65bbc2 213
056f7519
JR
214 # Put tick on the right side
215 ax.tick_params(labeltop=False, labelright=True)
216
5c65bbc2
JR
217 plt.tight_layout()
218 return
219
09de7b53
JR
220
221def plot_delta_between_point(
222 branch, benchmark_type, x_data, y_data, labels, latest_values
223):
20defd5e
JR
224 """
225 Plot the graph of delta between each sequential commit.
226 """
227 local_abs_max = 100
228
229 # Transform y_data to a list of for which the reference is the first
230 # element.
231 local_y_data = []
232 for pos, y in enumerate(y_data):
233 if pos == 0:
234 local_y_data.append(0.0)
235 continue
236 local_y_data.append(y - y_data[pos - 1])
237
238 plt.plot(x_data, local_y_data, "o", label=branch, color=graph_get_color(branch))
239
240 # Get max absolute value to align the y axis with zero in the middle.
241 if local_y_data:
242 local_abs_max = abs(max(local_y_data, key=abs)) * 1.3
243
244 plt.ylim(ymin=local_abs_max * -1, ymax=local_abs_max)
245
246 ax = plt.gca()
247 plt.xticks(x_data, labels, rotation=90, family="monospace")
09de7b53
JR
248 plt.title(
249 graph_get_title(branch, benchmark_type) + " Delta to previous commit",
250 fontweight="bold",
251 )
20defd5e
JR
252 plt.ylabel("Seconds")
253 plt.xlabel("Latest commits")
254 plt.legend()
526aab11 255 plt.grid(True)
20defd5e
JR
256
257 # Put tick on the right side
258 ax.tick_params(labeltop=False, labelright=True)
259
260 plt.tight_layout()
261 return
5c65bbc2 262
09de7b53 263
5c65bbc2
JR
264def plot_ratio(branch, benchmark_type, x_data, y_data, labels, latest_values):
265 """
266 Plot the graph using a ratio using first point as reference (0%).
267 """
268 reference = 0.01
269 y_abs_max = 100
270
271 if y_data:
272 reference = y_data[0]
273
274 # Transform y_data to a list of ratio for which the reference is the first
275 # element.
276 local_y_data = list(map(lambda y: ((y / reference) - 1.0) * 100, y_data))
277
278 plt.plot(x_data, local_y_data, "o", label=branch, color=graph_get_color(branch))
279
280 # Put latest of other branches for reference as horizontal line.
281 for l_branch, l_result in latest_values.items():
282 if not l_result or l_branch == branch:
283 continue
284 ratio_l_result = ((l_result / reference) - 1.0) * 100.0
285 print(
286 "branch {} branch {} value {} l_result {} reference {}".format(
287 branch, l_branch, ratio_l_result, l_result, reference
288 )
289 )
290 plt.axhline(
291 y=ratio_l_result,
292 label="Latest {}".format(l_branch),
293 color=graph_get_color(l_branch),
294 )
295
296 # Draw the reference line.
297 plt.axhline(y=0, label="Reference (leftmost point)", linestyle="-", color="Black")
298
299 # Get max absolute value to align the y axis with zero in the middle.
300 if local_y_data:
301 local_abs_max = abs(max(local_y_data, key=abs)) * 1.3
302 if y_abs_max > 100:
303 y_abs_max = local_abs_max
304
305 plt.ylim(ymin=y_abs_max * -1, ymax=y_abs_max)
306
307 ax = plt.gca()
308 percent_formatter = PercentFormatter()
309 ax.yaxis.set_major_formatter(percent_formatter)
310 ax.yaxis.set_minor_formatter(percent_formatter)
311 plt.xticks(x_data, labels, rotation=90, family="monospace")
312 plt.title(graph_get_title(branch, benchmark_type), fontweight="bold")
313 plt.ylabel("Ratio")
314 plt.xlabel("Latest commits")
315 plt.legend()
526aab11 316 plt.grid(True)
5c65bbc2 317
056f7519
JR
318 # Put tick on the right side
319 ax.tick_params(labeltop=False, labelright=True)
320
5c65bbc2
JR
321 plt.tight_layout()
322 return
323
09de7b53 324
5c65bbc2
JR
325def generate_graph(branches, report_name, git_path):
326
327 # The PDF document
328 pdf_pages = PdfPages(report_name)
329
330 client = get_client()
331 branch_results = dict()
332
333 # Fetch the results for each branch.
334 for branch, cutoff in branches.items():
335 commits = get_git_log(branch, cutoff, git_path)
336 results = []
337 with tempfile.TemporaryDirectory() as workdir:
338 for commit in commits:
339 b_results, valid = get_benchmark_results(client, commit, workdir)
340 if not b_results or not valid:
341 continue
342 results.append((commit, b_results))
343 branch_results[branch] = results
344
345 for b_type in BENCHMARK_TYPES:
346 latest_values = {}
347 max_len = 0
348
349 # Find the maximum size for a series inside our series dataset.
350 # This is used later to compute the size of the actual plot (pdf).
351 # While there gather the comparison value used to draw comparison line
352 # between branches.
353 for branch, results in branch_results.items():
354 max_len = max([max_len, len(results)])
355 if results:
356 latest_values[branch] = mean(
357 sanitize_dataset(results[-1][1][b_type])[0]
358 )
359 else:
360 latest_values[branch] = None
361
362 for branch, results in branch_results.items():
363 # Create a figure instance
364 if max_len and max_len > 10:
365 width = 0.16 * max_len
366 else:
367 width = 11.69
368
369 x_data = list(range(len(results)))
370 y_data = [c[1][b_type] for c in results]
371 labels = [c[0][:8] for c in results]
372
373 fig = plt.figure(figsize=(width, 8.27), dpi=100)
374 plot_raw_value(branch, b_type, x_data, y_data, labels, latest_values)
375 pdf_pages.savefig(fig)
376
5c65bbc2
JR
377 # Use the mean of each sanitize dataset here, we do not care for
378 # variance for ratio. At least not yet.
379 y_data = [mean(sanitize_dataset(c[1][b_type])[0]) for c in results]
20defd5e 380 fig = plt.figure(figsize=(width, 8.27), dpi=100)
5c65bbc2
JR
381 plot_ratio(branch, b_type, x_data, y_data, labels, latest_values)
382 pdf_pages.savefig(fig)
383
20defd5e 384 fig = plt.figure(figsize=(width, 8.27), dpi=100)
09de7b53
JR
385 plot_delta_between_point(
386 branch, b_type, x_data, y_data, labels, latest_values
387 )
20defd5e
JR
388 pdf_pages.savefig(fig)
389
5c65bbc2
JR
390 pdf_pages.close()
391
392
d373c66e 393def launch_jobs(branches, git_path, wait_for_completion, debug, force):
5c65bbc2
JR
394 """
395 Lauch jobs for all missing results.
396 """
397 client = get_client()
73fe8ab4 398 commits_to_test = set()
5c65bbc2 399 for branch, cutoff in branches.items():
73fe8ab4 400 commits = [x for x in get_git_log(branch, cutoff, git_path) if x not in invalid_commits]
5c65bbc2
JR
401 with tempfile.TemporaryDirectory() as workdir:
402 for commit in commits:
403 b_results = get_benchmark_results(client, commit, workdir)[0]
d373c66e 404 if b_results and not force:
5c65bbc2 405 continue
73fe8ab4
KS
406 commits_to_test.add(commit)
407 for index, commit in enumerate(commits_to_test):
408 print("Job {}/{}".format(index+1, len(commits_to_test)))
409 lava_submit.submit(
410 commit, wait_for_completion=wait_for_completion, debug=debug
411 )
5c65bbc2
JR
412
413
414def main():
415 """
416 Parse arguments and execute as needed.
417 """
418 bt_branches = {
419 "master": "31976fe2d70a8b6b7f8b31b9e0b3bc004d415575",
420 "stable-2.0": "07f585356018b4ddfbd0e09c49a14e38977c6973",
421 "stable-1.5": "49e98b837a5667130e0d1e062a6bd7985c7c4582",
422 }
423
424 parser = argparse.ArgumentParser(description="Babeltrace benchmark utility")
425 parser.add_argument(
426 "--generate-jobs", action="store_true", help="Generate and send jobs"
427 )
d373c66e
JR
428 parser.add_argument(
429 "--force-jobs", action="store_true", help="Force the queueing of jobs to lava"
430 )
5c65bbc2
JR
431 parser.add_argument(
432 "--do-not-wait-on-completion",
433 action="store_true",
434 default=False,
435 help="Wait for the completion of each jobs sent. This is useful"
436 "for the ci. Otherwise we could end up spaming the lava instance.",
437 )
438 parser.add_argument(
439 "--generate-report",
440 action="store_true",
441 help="Generate graphs and save them to pdf",
442 )
443 parser.add_argument(
444 "--report-name", default="report.pdf", help="The name of the pdf report."
445 )
446 parser.add_argument(
447 "--debug", action="store_true", default=False, help="Do not send jobs to lava."
448 )
449 parser.add_argument(
450 "--repo-path", help="The location of the git repo to use.", required=True
451 )
cf595cda
JR
452 parser.add_argument(
453 "--overwrite-branches-cutoff",
454 help="A dictionary of the form {"
455 "'branch_name': 'commit_hash_cutoff',...}. Allow custom graphing and"
456 "jobs generation.",
457 required=False, type=json_type
458 )
5c65bbc2
JR
459
460 args = parser.parse_args()
461
cf595cda
JR
462 if args.overwrite_branches_cutoff:
463 bt_branches = args.overwrite_branches_cutoff
464
5c65bbc2
JR
465 if not os.path.exists(args.repo_path):
466 print("Repository location does not exists.")
467 return 1
468
469 if args.generate_jobs:
470 print("Launching jobs for:")
d373c66e 471
5c65bbc2
JR
472 for branch, cutoff in bt_branches.items():
473 print("\t Branch {} with cutoff {}".format(branch, cutoff))
d373c66e 474
5c65bbc2 475 launch_jobs(
d373c66e
JR
476 bt_branches,
477 args.repo_path,
478 not args.do_not_wait_on_completion,
479 args.debug,
480 args.force_jobs,
5c65bbc2
JR
481 )
482
483 if args.generate_report:
484 print("Generating pdf report ({}) for:".format(args.report_name))
485 for branch, cutoff in bt_branches.items():
486 print("\t Branch {} with cutoff {}".format(branch, cutoff))
487 generate_graph(bt_branches, args.report_name, args.repo_path)
488
489 return 0
490
491
492def sanitize_dataset(dataset):
493 """
494 Use IRQ 1.5 [1] to remove outlier from the dataset. This is useful to get a
495 representative mean without outlier in it.
496 [1] https://en.wikipedia.org/wiki/Interquartile_range#Outliers
497 """
498 sorted_data = sorted(dataset)
499 q1, q3 = numpy.percentile(sorted_data, [25, 75])
500 iqr = q3 - q1
501 lower_bound = q1 - (1.5 * iqr)
502 upper_bound = q3 + (1.5 * iqr)
503 new_dataset = []
504 outliers = []
505 for i in dataset:
506 if lower_bound <= i <= upper_bound:
507 new_dataset.append(i)
508 else:
509 outliers.append(i)
510 return new_dataset, outliers
511
512
513if __name__ == "__main__":
514 sys.exit(main())
This page took 0.04669 seconds and 4 git commands to generate.