Add invalid commit skip for babeltrace benchmark
[lttng-ci.git] / scripts / babeltrace-benchmark / benchmark.py
CommitLineData
5c65bbc2
JR
1#!/usr/bin/python3
2# Copyright (C) 2019 - Jonathan Rajotte <jonathan.rajotte-julien@efficios.com>
3#
4# This program is free software: you can redistribute it and/or modify
5# it under the terms of the GNU General Public License as published by
6# the Free Software Foundation, either version 3 of the License, or
7# (at your option) any later version.
8#
9# This program is distributed in the hope that it will be useful,
10# but WITHOUT ANY WARRANTY; without even the implied warranty of
11# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12# GNU General Public License for more details.
13#
14# You should have received a copy of the GNU General Public License
15# along with this program. If not, see <http://www.gnu.org/licenses/>.
16
17import json
18import os
19import tempfile
20from statistics import mean
21import argparse
22import sys
23from operator import add
24
25import matplotlib.pyplot as plt
26from matplotlib.backends.backend_pdf import PdfPages
27from matplotlib.ticker import PercentFormatter
28
29import git
30import numpy
31import lava_submit
32
33from minio import Minio
34from minio.error import NoSuchKey
35from minio.error import ResponseError
36
37
38BENCHMARK_TYPES = ["dummy", "text"]
39DEFAULT_BUCKET = "lava"
40
e085717c
JR
41invalid_commits = {
42 "ec9a9794af488a9accce7708a8b0d8188b498789", # Does not build
43 "8c99128c640cbce71fb8a6caa15e4c672252b662", # Block on configure
44 "f3847c753f1b4f12353c38d97b0577d9993d19fb", # Does not build
45 "e0111295f17ddfcc33ec771a8deac505473a06ad", # Does not build
46 }
5c65bbc2 47
cf595cda
JR
48def json_type(string):
49 """
50 Argpase type for json args.
51 We expect a base dictionary.
52 """
53 passed_json = json.loads(string)
54 if not isinstance(passed_json, dict):
55 msg = "%r is not a dict" % string
56 raise argparse.ArgumentTypeError(msg)
57 return passed_json
58
5c65bbc2
JR
59def graph_get_color(branch):
60 """
61 Get the color matching the branch.
62 """
63 color = {"stable-1.5": "red", "stable-2.0": "green", "master": "blue"}
64 return color[branch]
65
66
67def graph_get_title(branch, benchmark_type):
68 """
69 Get title for graph based on benchmark type.
70 """
71 string = {"dummy": "Dummy output", "text": "Text output"}
72 return "{} - {}".format(branch, string[benchmark_type])
73
74
75def get_client():
76 """
77 Return minio client configured.
78 """
79 return Minio(
80 "obj.internal.efficios.com", access_key="jenkins", secret_key="echo123456"
81 )
82
83
84def get_file(client, prefix, file_name, workdir_name):
85 """
86 Return the path of the downloaded file.
87 Return None on error
88 """
89 destination = os.path.join(workdir_name, file_name)
90 object_name = "{}/{}".format(prefix, file_name)
91 try:
92 client.fget_object(DEFAULT_BUCKET, object_name, destination)
93 except NoSuchKey:
94 return None
95
96 return destination
97
98
99def delete_file(client, prefix, file_name):
100 """
101 Delete the file on remote.
102 """
103 object_name = "{}/{}".format(prefix, file_name)
104 try:
105 client.remove_object(DEFAULT_BUCKET, object_name)
106 except ResponseError as err:
107 print(err)
108 except NoSuchKey:
109 pass
110
111
112def get_git_log(bt_version, cutoff, repo_path):
113 """
114 Return an ordered (older to newer) list of commits for the bt_version and
115 cutoff. WARNING: This changes the git repo HEAD.
116 """
117 repo = git.Repo(repo_path)
118 repo.git.fetch()
119 return repo.git.log(
120 "{}..origin/{}".format(cutoff, bt_version), "--pretty=format:%H", "--reverse"
121 ).split("\n")
122
123
124def parse_result(result_path):
125 """
126 Parse the result file. Return a dataset of User time + System time.
127 """
128 with open(result_path) as result:
129 parsed_result = json.load(result)
130 return list(
131 map(
132 add,
133 parsed_result["User time (seconds)"],
134 parsed_result["System time (seconds)"],
135 )
136 )
137
138
139def get_benchmark_results(client, commit, workdir):
140 """
141 Fetch the benchmark result from a certain commit across all benchmark type.
142 """
143 results = {}
144 benchmark_valid = True
145 for b_type in BENCHMARK_TYPES:
146 prefix = "/results/benchmarks/babeltrace/{}/".format(b_type)
147 result_file = get_file(client, prefix, commit, workdir)
148 if not result_file:
149 """
150 Benchmark is either corrupted or not complete.
151 """
152 return None, benchmark_valid
153 results[b_type] = parse_result(result_file)
154 if all(i == 0.0 for i in results[b_type]):
155 benchmark_valid = False
156 print("Invalid benchmark for {}/{}/{}".format(prefix, b_type, commit))
157 # The dataset is valid return immediately.
158 return results, benchmark_valid
159
160
161def plot_raw_value(branch, benchmark_type, x_data, y_data, labels, latest_values):
162 """
163 Plot the graph using the raw value.
164 """
165 point_x_data = []
166 outlier_x_data = []
167 point_y_data = []
168 outlier_y_data = []
169 for pos in range(len(x_data)):
170 x = x_data[pos]
171 valid_points, outliers = sanitize_dataset(y_data[pos])
172 for y in valid_points:
173 point_x_data.append(x)
174 point_y_data.append(y)
175 for y in outliers:
176 outlier_x_data.append(x)
177 outlier_y_data.append(y)
178
179 plt.plot(
180 point_x_data, point_y_data, "o", label=branch, color=graph_get_color(branch)
181 )
182 plt.plot(outlier_x_data, outlier_y_data, "+", label="outlier", color="black")
183
5c65bbc2
JR
184 ymax = 1
185 if y_data:
186 ymin = 0.8 * min([item for sublist in y_data for item in sublist])
187 ymax = 1.2 * max([item for sublist in y_data for item in sublist])
188 # Put latest of other branches for reference as horizontal line.
189 for l_branch, l_result in latest_values.items():
190 if not l_result or l_branch == branch:
191 continue
192 plt.axhline(
193 y=l_result,
194 label="Latest {}".format(l_branch),
195 color=graph_get_color(l_branch),
196 )
5c65bbc2
JR
197 if l_result >= ymax:
198 ymax = 1.2 * l_result
056f7519 199 ax = plt.gca()
925d7893 200 plt.ylim(ymin=0, ymax=ymax)
5c65bbc2
JR
201 plt.xticks(x_data, labels, rotation=90, family="monospace")
202 plt.title(graph_get_title(branch, benchmark_type), fontweight="bold")
203 plt.ylabel("User + system time (s)")
204 plt.xlabel("Latest commits")
205 plt.legend()
526aab11 206 plt.grid(True)
5c65bbc2 207
056f7519
JR
208 # Put tick on the right side
209 ax.tick_params(labeltop=False, labelright=True)
210
5c65bbc2
JR
211 plt.tight_layout()
212 return
213
09de7b53
JR
214
215def plot_delta_between_point(
216 branch, benchmark_type, x_data, y_data, labels, latest_values
217):
20defd5e
JR
218 """
219 Plot the graph of delta between each sequential commit.
220 """
221 local_abs_max = 100
222
223 # Transform y_data to a list of for which the reference is the first
224 # element.
225 local_y_data = []
226 for pos, y in enumerate(y_data):
227 if pos == 0:
228 local_y_data.append(0.0)
229 continue
230 local_y_data.append(y - y_data[pos - 1])
231
232 plt.plot(x_data, local_y_data, "o", label=branch, color=graph_get_color(branch))
233
234 # Get max absolute value to align the y axis with zero in the middle.
235 if local_y_data:
236 local_abs_max = abs(max(local_y_data, key=abs)) * 1.3
237
238 plt.ylim(ymin=local_abs_max * -1, ymax=local_abs_max)
239
240 ax = plt.gca()
241 plt.xticks(x_data, labels, rotation=90, family="monospace")
09de7b53
JR
242 plt.title(
243 graph_get_title(branch, benchmark_type) + " Delta to previous commit",
244 fontweight="bold",
245 )
20defd5e
JR
246 plt.ylabel("Seconds")
247 plt.xlabel("Latest commits")
248 plt.legend()
526aab11 249 plt.grid(True)
20defd5e
JR
250
251 # Put tick on the right side
252 ax.tick_params(labeltop=False, labelright=True)
253
254 plt.tight_layout()
255 return
5c65bbc2 256
09de7b53 257
5c65bbc2
JR
258def plot_ratio(branch, benchmark_type, x_data, y_data, labels, latest_values):
259 """
260 Plot the graph using a ratio using first point as reference (0%).
261 """
262 reference = 0.01
263 y_abs_max = 100
264
265 if y_data:
266 reference = y_data[0]
267
268 # Transform y_data to a list of ratio for which the reference is the first
269 # element.
270 local_y_data = list(map(lambda y: ((y / reference) - 1.0) * 100, y_data))
271
272 plt.plot(x_data, local_y_data, "o", label=branch, color=graph_get_color(branch))
273
274 # Put latest of other branches for reference as horizontal line.
275 for l_branch, l_result in latest_values.items():
276 if not l_result or l_branch == branch:
277 continue
278 ratio_l_result = ((l_result / reference) - 1.0) * 100.0
279 print(
280 "branch {} branch {} value {} l_result {} reference {}".format(
281 branch, l_branch, ratio_l_result, l_result, reference
282 )
283 )
284 plt.axhline(
285 y=ratio_l_result,
286 label="Latest {}".format(l_branch),
287 color=graph_get_color(l_branch),
288 )
289
290 # Draw the reference line.
291 plt.axhline(y=0, label="Reference (leftmost point)", linestyle="-", color="Black")
292
293 # Get max absolute value to align the y axis with zero in the middle.
294 if local_y_data:
295 local_abs_max = abs(max(local_y_data, key=abs)) * 1.3
296 if y_abs_max > 100:
297 y_abs_max = local_abs_max
298
299 plt.ylim(ymin=y_abs_max * -1, ymax=y_abs_max)
300
301 ax = plt.gca()
302 percent_formatter = PercentFormatter()
303 ax.yaxis.set_major_formatter(percent_formatter)
304 ax.yaxis.set_minor_formatter(percent_formatter)
305 plt.xticks(x_data, labels, rotation=90, family="monospace")
306 plt.title(graph_get_title(branch, benchmark_type), fontweight="bold")
307 plt.ylabel("Ratio")
308 plt.xlabel("Latest commits")
309 plt.legend()
526aab11 310 plt.grid(True)
5c65bbc2 311
056f7519
JR
312 # Put tick on the right side
313 ax.tick_params(labeltop=False, labelright=True)
314
5c65bbc2
JR
315 plt.tight_layout()
316 return
317
09de7b53 318
5c65bbc2
JR
319def generate_graph(branches, report_name, git_path):
320
321 # The PDF document
322 pdf_pages = PdfPages(report_name)
323
324 client = get_client()
325 branch_results = dict()
326
327 # Fetch the results for each branch.
328 for branch, cutoff in branches.items():
329 commits = get_git_log(branch, cutoff, git_path)
330 results = []
331 with tempfile.TemporaryDirectory() as workdir:
332 for commit in commits:
333 b_results, valid = get_benchmark_results(client, commit, workdir)
334 if not b_results or not valid:
335 continue
336 results.append((commit, b_results))
337 branch_results[branch] = results
338
339 for b_type in BENCHMARK_TYPES:
340 latest_values = {}
341 max_len = 0
342
343 # Find the maximum size for a series inside our series dataset.
344 # This is used later to compute the size of the actual plot (pdf).
345 # While there gather the comparison value used to draw comparison line
346 # between branches.
347 for branch, results in branch_results.items():
348 max_len = max([max_len, len(results)])
349 if results:
350 latest_values[branch] = mean(
351 sanitize_dataset(results[-1][1][b_type])[0]
352 )
353 else:
354 latest_values[branch] = None
355
356 for branch, results in branch_results.items():
357 # Create a figure instance
358 if max_len and max_len > 10:
359 width = 0.16 * max_len
360 else:
361 width = 11.69
362
363 x_data = list(range(len(results)))
364 y_data = [c[1][b_type] for c in results]
365 labels = [c[0][:8] for c in results]
366
367 fig = plt.figure(figsize=(width, 8.27), dpi=100)
368 plot_raw_value(branch, b_type, x_data, y_data, labels, latest_values)
369 pdf_pages.savefig(fig)
370
5c65bbc2
JR
371 # Use the mean of each sanitize dataset here, we do not care for
372 # variance for ratio. At least not yet.
373 y_data = [mean(sanitize_dataset(c[1][b_type])[0]) for c in results]
20defd5e 374 fig = plt.figure(figsize=(width, 8.27), dpi=100)
5c65bbc2
JR
375 plot_ratio(branch, b_type, x_data, y_data, labels, latest_values)
376 pdf_pages.savefig(fig)
377
20defd5e 378 fig = plt.figure(figsize=(width, 8.27), dpi=100)
09de7b53
JR
379 plot_delta_between_point(
380 branch, b_type, x_data, y_data, labels, latest_values
381 )
20defd5e
JR
382 pdf_pages.savefig(fig)
383
5c65bbc2
JR
384 pdf_pages.close()
385
386
d373c66e 387def launch_jobs(branches, git_path, wait_for_completion, debug, force):
5c65bbc2
JR
388 """
389 Lauch jobs for all missing results.
390 """
391 client = get_client()
392 for branch, cutoff in branches.items():
393 commits = get_git_log(branch, cutoff, git_path)
394
395 with tempfile.TemporaryDirectory() as workdir:
396 for commit in commits:
e085717c
JR
397 if commit in invalid_commits:
398 continue
5c65bbc2 399 b_results = get_benchmark_results(client, commit, workdir)[0]
d373c66e 400 if b_results and not force:
5c65bbc2
JR
401 continue
402 lava_submit.submit(
403 commit, wait_for_completion=wait_for_completion, debug=debug
404 )
405
406
407def main():
408 """
409 Parse arguments and execute as needed.
410 """
411 bt_branches = {
412 "master": "31976fe2d70a8b6b7f8b31b9e0b3bc004d415575",
413 "stable-2.0": "07f585356018b4ddfbd0e09c49a14e38977c6973",
414 "stable-1.5": "49e98b837a5667130e0d1e062a6bd7985c7c4582",
415 }
416
417 parser = argparse.ArgumentParser(description="Babeltrace benchmark utility")
418 parser.add_argument(
419 "--generate-jobs", action="store_true", help="Generate and send jobs"
420 )
d373c66e
JR
421 parser.add_argument(
422 "--force-jobs", action="store_true", help="Force the queueing of jobs to lava"
423 )
5c65bbc2
JR
424 parser.add_argument(
425 "--do-not-wait-on-completion",
426 action="store_true",
427 default=False,
428 help="Wait for the completion of each jobs sent. This is useful"
429 "for the ci. Otherwise we could end up spaming the lava instance.",
430 )
431 parser.add_argument(
432 "--generate-report",
433 action="store_true",
434 help="Generate graphs and save them to pdf",
435 )
436 parser.add_argument(
437 "--report-name", default="report.pdf", help="The name of the pdf report."
438 )
439 parser.add_argument(
440 "--debug", action="store_true", default=False, help="Do not send jobs to lava."
441 )
442 parser.add_argument(
443 "--repo-path", help="The location of the git repo to use.", required=True
444 )
cf595cda
JR
445 parser.add_argument(
446 "--overwrite-branches-cutoff",
447 help="A dictionary of the form {"
448 "'branch_name': 'commit_hash_cutoff',...}. Allow custom graphing and"
449 "jobs generation.",
450 required=False, type=json_type
451 )
5c65bbc2
JR
452
453 args = parser.parse_args()
454
cf595cda
JR
455 if args.overwrite_branches_cutoff:
456 bt_branches = args.overwrite_branches_cutoff
457
5c65bbc2
JR
458 if not os.path.exists(args.repo_path):
459 print("Repository location does not exists.")
460 return 1
461
462 if args.generate_jobs:
463 print("Launching jobs for:")
d373c66e 464
5c65bbc2
JR
465 for branch, cutoff in bt_branches.items():
466 print("\t Branch {} with cutoff {}".format(branch, cutoff))
d373c66e 467
5c65bbc2 468 launch_jobs(
d373c66e
JR
469 bt_branches,
470 args.repo_path,
471 not args.do_not_wait_on_completion,
472 args.debug,
473 args.force_jobs,
5c65bbc2
JR
474 )
475
476 if args.generate_report:
477 print("Generating pdf report ({}) for:".format(args.report_name))
478 for branch, cutoff in bt_branches.items():
479 print("\t Branch {} with cutoff {}".format(branch, cutoff))
480 generate_graph(bt_branches, args.report_name, args.repo_path)
481
482 return 0
483
484
485def sanitize_dataset(dataset):
486 """
487 Use IRQ 1.5 [1] to remove outlier from the dataset. This is useful to get a
488 representative mean without outlier in it.
489 [1] https://en.wikipedia.org/wiki/Interquartile_range#Outliers
490 """
491 sorted_data = sorted(dataset)
492 q1, q3 = numpy.percentile(sorted_data, [25, 75])
493 iqr = q3 - q1
494 lower_bound = q1 - (1.5 * iqr)
495 upper_bound = q3 + (1.5 * iqr)
496 new_dataset = []
497 outliers = []
498 for i in dataset:
499 if lower_bound <= i <= upper_bound:
500 new_dataset.append(i)
501 else:
502 outliers.append(i)
503 return new_dataset, outliers
504
505
506if __name__ == "__main__":
507 sys.exit(main())
This page took 0.042612 seconds and 4 git commands to generate.