Fix: missing "fs" to template variable
[lttng-ci.git] / scripts / babeltrace-benchmark / benchmark.py
CommitLineData
5c65bbc2
JR
1#!/usr/bin/python3
2# Copyright (C) 2019 - Jonathan Rajotte <jonathan.rajotte-julien@efficios.com>
3#
4# This program is free software: you can redistribute it and/or modify
5# it under the terms of the GNU General Public License as published by
6# the Free Software Foundation, either version 3 of the License, or
7# (at your option) any later version.
8#
9# This program is distributed in the hope that it will be useful,
10# but WITHOUT ANY WARRANTY; without even the implied warranty of
11# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12# GNU General Public License for more details.
13#
14# You should have received a copy of the GNU General Public License
15# along with this program. If not, see <http://www.gnu.org/licenses/>.
16
17import json
18import os
19import tempfile
20from statistics import mean
21import argparse
22import sys
23from operator import add
24
25import matplotlib.pyplot as plt
26from matplotlib.backends.backend_pdf import PdfPages
27from matplotlib.ticker import PercentFormatter
28
29import git
30import numpy
31import lava_submit
32
33from minio import Minio
34from minio.error import NoSuchKey
35from minio.error import ResponseError
36
37
38BENCHMARK_TYPES = ["dummy", "text"]
39DEFAULT_BUCKET = "lava"
40
41
cf595cda
JR
42def json_type(string):
43 """
44 Argpase type for json args.
45 We expect a base dictionary.
46 """
47 passed_json = json.loads(string)
48 if not isinstance(passed_json, dict):
49 msg = "%r is not a dict" % string
50 raise argparse.ArgumentTypeError(msg)
51 return passed_json
52
5c65bbc2
JR
53def graph_get_color(branch):
54 """
55 Get the color matching the branch.
56 """
57 color = {"stable-1.5": "red", "stable-2.0": "green", "master": "blue"}
58 return color[branch]
59
60
61def graph_get_title(branch, benchmark_type):
62 """
63 Get title for graph based on benchmark type.
64 """
65 string = {"dummy": "Dummy output", "text": "Text output"}
66 return "{} - {}".format(branch, string[benchmark_type])
67
68
69def get_client():
70 """
71 Return minio client configured.
72 """
73 return Minio(
74 "obj.internal.efficios.com", access_key="jenkins", secret_key="echo123456"
75 )
76
77
78def get_file(client, prefix, file_name, workdir_name):
79 """
80 Return the path of the downloaded file.
81 Return None on error
82 """
83 destination = os.path.join(workdir_name, file_name)
84 object_name = "{}/{}".format(prefix, file_name)
85 try:
86 client.fget_object(DEFAULT_BUCKET, object_name, destination)
87 except NoSuchKey:
88 return None
89
90 return destination
91
92
93def delete_file(client, prefix, file_name):
94 """
95 Delete the file on remote.
96 """
97 object_name = "{}/{}".format(prefix, file_name)
98 try:
99 client.remove_object(DEFAULT_BUCKET, object_name)
100 except ResponseError as err:
101 print(err)
102 except NoSuchKey:
103 pass
104
105
106def get_git_log(bt_version, cutoff, repo_path):
107 """
108 Return an ordered (older to newer) list of commits for the bt_version and
109 cutoff. WARNING: This changes the git repo HEAD.
110 """
111 repo = git.Repo(repo_path)
112 repo.git.fetch()
113 return repo.git.log(
114 "{}..origin/{}".format(cutoff, bt_version), "--pretty=format:%H", "--reverse"
115 ).split("\n")
116
117
118def parse_result(result_path):
119 """
120 Parse the result file. Return a dataset of User time + System time.
121 """
122 with open(result_path) as result:
123 parsed_result = json.load(result)
124 return list(
125 map(
126 add,
127 parsed_result["User time (seconds)"],
128 parsed_result["System time (seconds)"],
129 )
130 )
131
132
133def get_benchmark_results(client, commit, workdir):
134 """
135 Fetch the benchmark result from a certain commit across all benchmark type.
136 """
137 results = {}
138 benchmark_valid = True
139 for b_type in BENCHMARK_TYPES:
140 prefix = "/results/benchmarks/babeltrace/{}/".format(b_type)
141 result_file = get_file(client, prefix, commit, workdir)
142 if not result_file:
143 """
144 Benchmark is either corrupted or not complete.
145 """
146 return None, benchmark_valid
147 results[b_type] = parse_result(result_file)
148 if all(i == 0.0 for i in results[b_type]):
149 benchmark_valid = False
150 print("Invalid benchmark for {}/{}/{}".format(prefix, b_type, commit))
151 # The dataset is valid return immediately.
152 return results, benchmark_valid
153
154
155def plot_raw_value(branch, benchmark_type, x_data, y_data, labels, latest_values):
156 """
157 Plot the graph using the raw value.
158 """
159 point_x_data = []
160 outlier_x_data = []
161 point_y_data = []
162 outlier_y_data = []
163 for pos in range(len(x_data)):
164 x = x_data[pos]
165 valid_points, outliers = sanitize_dataset(y_data[pos])
166 for y in valid_points:
167 point_x_data.append(x)
168 point_y_data.append(y)
169 for y in outliers:
170 outlier_x_data.append(x)
171 outlier_y_data.append(y)
172
173 plt.plot(
174 point_x_data, point_y_data, "o", label=branch, color=graph_get_color(branch)
175 )
176 plt.plot(outlier_x_data, outlier_y_data, "+", label="outlier", color="black")
177
5c65bbc2
JR
178 ymax = 1
179 if y_data:
180 ymin = 0.8 * min([item for sublist in y_data for item in sublist])
181 ymax = 1.2 * max([item for sublist in y_data for item in sublist])
182 # Put latest of other branches for reference as horizontal line.
183 for l_branch, l_result in latest_values.items():
184 if not l_result or l_branch == branch:
185 continue
186 plt.axhline(
187 y=l_result,
188 label="Latest {}".format(l_branch),
189 color=graph_get_color(l_branch),
190 )
5c65bbc2
JR
191 if l_result >= ymax:
192 ymax = 1.2 * l_result
056f7519 193 ax = plt.gca()
925d7893 194 plt.ylim(ymin=0, ymax=ymax)
5c65bbc2
JR
195 plt.xticks(x_data, labels, rotation=90, family="monospace")
196 plt.title(graph_get_title(branch, benchmark_type), fontweight="bold")
197 plt.ylabel("User + system time (s)")
198 plt.xlabel("Latest commits")
199 plt.legend()
526aab11 200 plt.grid(True)
5c65bbc2 201
056f7519
JR
202 # Put tick on the right side
203 ax.tick_params(labeltop=False, labelright=True)
204
5c65bbc2
JR
205 plt.tight_layout()
206 return
207
09de7b53
JR
208
209def plot_delta_between_point(
210 branch, benchmark_type, x_data, y_data, labels, latest_values
211):
20defd5e
JR
212 """
213 Plot the graph of delta between each sequential commit.
214 """
215 local_abs_max = 100
216
217 # Transform y_data to a list of for which the reference is the first
218 # element.
219 local_y_data = []
220 for pos, y in enumerate(y_data):
221 if pos == 0:
222 local_y_data.append(0.0)
223 continue
224 local_y_data.append(y - y_data[pos - 1])
225
226 plt.plot(x_data, local_y_data, "o", label=branch, color=graph_get_color(branch))
227
228 # Get max absolute value to align the y axis with zero in the middle.
229 if local_y_data:
230 local_abs_max = abs(max(local_y_data, key=abs)) * 1.3
231
232 plt.ylim(ymin=local_abs_max * -1, ymax=local_abs_max)
233
234 ax = plt.gca()
235 plt.xticks(x_data, labels, rotation=90, family="monospace")
09de7b53
JR
236 plt.title(
237 graph_get_title(branch, benchmark_type) + " Delta to previous commit",
238 fontweight="bold",
239 )
20defd5e
JR
240 plt.ylabel("Seconds")
241 plt.xlabel("Latest commits")
242 plt.legend()
526aab11 243 plt.grid(True)
20defd5e
JR
244
245 # Put tick on the right side
246 ax.tick_params(labeltop=False, labelright=True)
247
248 plt.tight_layout()
249 return
5c65bbc2 250
09de7b53 251
5c65bbc2
JR
252def plot_ratio(branch, benchmark_type, x_data, y_data, labels, latest_values):
253 """
254 Plot the graph using a ratio using first point as reference (0%).
255 """
256 reference = 0.01
257 y_abs_max = 100
258
259 if y_data:
260 reference = y_data[0]
261
262 # Transform y_data to a list of ratio for which the reference is the first
263 # element.
264 local_y_data = list(map(lambda y: ((y / reference) - 1.0) * 100, y_data))
265
266 plt.plot(x_data, local_y_data, "o", label=branch, color=graph_get_color(branch))
267
268 # Put latest of other branches for reference as horizontal line.
269 for l_branch, l_result in latest_values.items():
270 if not l_result or l_branch == branch:
271 continue
272 ratio_l_result = ((l_result / reference) - 1.0) * 100.0
273 print(
274 "branch {} branch {} value {} l_result {} reference {}".format(
275 branch, l_branch, ratio_l_result, l_result, reference
276 )
277 )
278 plt.axhline(
279 y=ratio_l_result,
280 label="Latest {}".format(l_branch),
281 color=graph_get_color(l_branch),
282 )
283
284 # Draw the reference line.
285 plt.axhline(y=0, label="Reference (leftmost point)", linestyle="-", color="Black")
286
287 # Get max absolute value to align the y axis with zero in the middle.
288 if local_y_data:
289 local_abs_max = abs(max(local_y_data, key=abs)) * 1.3
290 if y_abs_max > 100:
291 y_abs_max = local_abs_max
292
293 plt.ylim(ymin=y_abs_max * -1, ymax=y_abs_max)
294
295 ax = plt.gca()
296 percent_formatter = PercentFormatter()
297 ax.yaxis.set_major_formatter(percent_formatter)
298 ax.yaxis.set_minor_formatter(percent_formatter)
299 plt.xticks(x_data, labels, rotation=90, family="monospace")
300 plt.title(graph_get_title(branch, benchmark_type), fontweight="bold")
301 plt.ylabel("Ratio")
302 plt.xlabel("Latest commits")
303 plt.legend()
526aab11 304 plt.grid(True)
5c65bbc2 305
056f7519
JR
306 # Put tick on the right side
307 ax.tick_params(labeltop=False, labelright=True)
308
5c65bbc2
JR
309 plt.tight_layout()
310 return
311
09de7b53 312
5c65bbc2
JR
313def generate_graph(branches, report_name, git_path):
314
315 # The PDF document
316 pdf_pages = PdfPages(report_name)
317
318 client = get_client()
319 branch_results = dict()
320
321 # Fetch the results for each branch.
322 for branch, cutoff in branches.items():
323 commits = get_git_log(branch, cutoff, git_path)
324 results = []
325 with tempfile.TemporaryDirectory() as workdir:
326 for commit in commits:
327 b_results, valid = get_benchmark_results(client, commit, workdir)
328 if not b_results or not valid:
329 continue
330 results.append((commit, b_results))
331 branch_results[branch] = results
332
333 for b_type in BENCHMARK_TYPES:
334 latest_values = {}
335 max_len = 0
336
337 # Find the maximum size for a series inside our series dataset.
338 # This is used later to compute the size of the actual plot (pdf).
339 # While there gather the comparison value used to draw comparison line
340 # between branches.
341 for branch, results in branch_results.items():
342 max_len = max([max_len, len(results)])
343 if results:
344 latest_values[branch] = mean(
345 sanitize_dataset(results[-1][1][b_type])[0]
346 )
347 else:
348 latest_values[branch] = None
349
350 for branch, results in branch_results.items():
351 # Create a figure instance
352 if max_len and max_len > 10:
353 width = 0.16 * max_len
354 else:
355 width = 11.69
356
357 x_data = list(range(len(results)))
358 y_data = [c[1][b_type] for c in results]
359 labels = [c[0][:8] for c in results]
360
361 fig = plt.figure(figsize=(width, 8.27), dpi=100)
362 plot_raw_value(branch, b_type, x_data, y_data, labels, latest_values)
363 pdf_pages.savefig(fig)
364
5c65bbc2
JR
365 # Use the mean of each sanitize dataset here, we do not care for
366 # variance for ratio. At least not yet.
367 y_data = [mean(sanitize_dataset(c[1][b_type])[0]) for c in results]
20defd5e 368 fig = plt.figure(figsize=(width, 8.27), dpi=100)
5c65bbc2
JR
369 plot_ratio(branch, b_type, x_data, y_data, labels, latest_values)
370 pdf_pages.savefig(fig)
371
20defd5e 372 fig = plt.figure(figsize=(width, 8.27), dpi=100)
09de7b53
JR
373 plot_delta_between_point(
374 branch, b_type, x_data, y_data, labels, latest_values
375 )
20defd5e
JR
376 pdf_pages.savefig(fig)
377
5c65bbc2
JR
378 pdf_pages.close()
379
380
d373c66e 381def launch_jobs(branches, git_path, wait_for_completion, debug, force):
5c65bbc2
JR
382 """
383 Lauch jobs for all missing results.
384 """
385 client = get_client()
386 for branch, cutoff in branches.items():
387 commits = get_git_log(branch, cutoff, git_path)
388
389 with tempfile.TemporaryDirectory() as workdir:
390 for commit in commits:
391 b_results = get_benchmark_results(client, commit, workdir)[0]
d373c66e 392 if b_results and not force:
5c65bbc2
JR
393 continue
394 lava_submit.submit(
395 commit, wait_for_completion=wait_for_completion, debug=debug
396 )
397
398
399def main():
400 """
401 Parse arguments and execute as needed.
402 """
403 bt_branches = {
404 "master": "31976fe2d70a8b6b7f8b31b9e0b3bc004d415575",
405 "stable-2.0": "07f585356018b4ddfbd0e09c49a14e38977c6973",
406 "stable-1.5": "49e98b837a5667130e0d1e062a6bd7985c7c4582",
407 }
408
409 parser = argparse.ArgumentParser(description="Babeltrace benchmark utility")
410 parser.add_argument(
411 "--generate-jobs", action="store_true", help="Generate and send jobs"
412 )
d373c66e
JR
413 parser.add_argument(
414 "--force-jobs", action="store_true", help="Force the queueing of jobs to lava"
415 )
5c65bbc2
JR
416 parser.add_argument(
417 "--do-not-wait-on-completion",
418 action="store_true",
419 default=False,
420 help="Wait for the completion of each jobs sent. This is useful"
421 "for the ci. Otherwise we could end up spaming the lava instance.",
422 )
423 parser.add_argument(
424 "--generate-report",
425 action="store_true",
426 help="Generate graphs and save them to pdf",
427 )
428 parser.add_argument(
429 "--report-name", default="report.pdf", help="The name of the pdf report."
430 )
431 parser.add_argument(
432 "--debug", action="store_true", default=False, help="Do not send jobs to lava."
433 )
434 parser.add_argument(
435 "--repo-path", help="The location of the git repo to use.", required=True
436 )
cf595cda
JR
437 parser.add_argument(
438 "--overwrite-branches-cutoff",
439 help="A dictionary of the form {"
440 "'branch_name': 'commit_hash_cutoff',...}. Allow custom graphing and"
441 "jobs generation.",
442 required=False, type=json_type
443 )
5c65bbc2
JR
444
445 args = parser.parse_args()
446
cf595cda
JR
447 if args.overwrite_branches_cutoff:
448 bt_branches = args.overwrite_branches_cutoff
449
5c65bbc2
JR
450 if not os.path.exists(args.repo_path):
451 print("Repository location does not exists.")
452 return 1
453
454 if args.generate_jobs:
455 print("Launching jobs for:")
d373c66e 456
5c65bbc2
JR
457 for branch, cutoff in bt_branches.items():
458 print("\t Branch {} with cutoff {}".format(branch, cutoff))
d373c66e 459
5c65bbc2 460 launch_jobs(
d373c66e
JR
461 bt_branches,
462 args.repo_path,
463 not args.do_not_wait_on_completion,
464 args.debug,
465 args.force_jobs,
5c65bbc2
JR
466 )
467
468 if args.generate_report:
469 print("Generating pdf report ({}) for:".format(args.report_name))
470 for branch, cutoff in bt_branches.items():
471 print("\t Branch {} with cutoff {}".format(branch, cutoff))
472 generate_graph(bt_branches, args.report_name, args.repo_path)
473
474 return 0
475
476
477def sanitize_dataset(dataset):
478 """
479 Use IRQ 1.5 [1] to remove outlier from the dataset. This is useful to get a
480 representative mean without outlier in it.
481 [1] https://en.wikipedia.org/wiki/Interquartile_range#Outliers
482 """
483 sorted_data = sorted(dataset)
484 q1, q3 = numpy.percentile(sorted_data, [25, 75])
485 iqr = q3 - q1
486 lower_bound = q1 - (1.5 * iqr)
487 upper_bound = q3 + (1.5 * iqr)
488 new_dataset = []
489 outliers = []
490 for i in dataset:
491 if lower_bound <= i <= upper_bound:
492 new_dataset.append(i)
493 else:
494 outliers.append(i)
495 return new_dataset, outliers
496
497
498if __name__ == "__main__":
499 sys.exit(main())
This page took 0.041256 seconds and 4 git commands to generate.