bt benchmark: add delta plot
[lttng-ci.git] / scripts / babeltrace-benchmark / benchmark.py
CommitLineData
5c65bbc2
JR
1#!/usr/bin/python3
2# Copyright (C) 2019 - Jonathan Rajotte <jonathan.rajotte-julien@efficios.com>
3#
4# This program is free software: you can redistribute it and/or modify
5# it under the terms of the GNU General Public License as published by
6# the Free Software Foundation, either version 3 of the License, or
7# (at your option) any later version.
8#
9# This program is distributed in the hope that it will be useful,
10# but WITHOUT ANY WARRANTY; without even the implied warranty of
11# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12# GNU General Public License for more details.
13#
14# You should have received a copy of the GNU General Public License
15# along with this program. If not, see <http://www.gnu.org/licenses/>.
16
17import json
18import os
19import tempfile
20from statistics import mean
21import argparse
22import sys
23from operator import add
24
25import matplotlib.pyplot as plt
26from matplotlib.backends.backend_pdf import PdfPages
27from matplotlib.ticker import PercentFormatter
28
29import git
30import numpy
31import lava_submit
32
33from minio import Minio
34from minio.error import NoSuchKey
35from minio.error import ResponseError
36
37
38BENCHMARK_TYPES = ["dummy", "text"]
39DEFAULT_BUCKET = "lava"
40
41
42def graph_get_color(branch):
43 """
44 Get the color matching the branch.
45 """
46 color = {"stable-1.5": "red", "stable-2.0": "green", "master": "blue"}
47 return color[branch]
48
49
50def graph_get_title(branch, benchmark_type):
51 """
52 Get title for graph based on benchmark type.
53 """
54 string = {"dummy": "Dummy output", "text": "Text output"}
55 return "{} - {}".format(branch, string[benchmark_type])
56
57
58def get_client():
59 """
60 Return minio client configured.
61 """
62 return Minio(
63 "obj.internal.efficios.com", access_key="jenkins", secret_key="echo123456"
64 )
65
66
67def get_file(client, prefix, file_name, workdir_name):
68 """
69 Return the path of the downloaded file.
70 Return None on error
71 """
72 destination = os.path.join(workdir_name, file_name)
73 object_name = "{}/{}".format(prefix, file_name)
74 try:
75 client.fget_object(DEFAULT_BUCKET, object_name, destination)
76 except NoSuchKey:
77 return None
78
79 return destination
80
81
82def delete_file(client, prefix, file_name):
83 """
84 Delete the file on remote.
85 """
86 object_name = "{}/{}".format(prefix, file_name)
87 try:
88 client.remove_object(DEFAULT_BUCKET, object_name)
89 except ResponseError as err:
90 print(err)
91 except NoSuchKey:
92 pass
93
94
95def get_git_log(bt_version, cutoff, repo_path):
96 """
97 Return an ordered (older to newer) list of commits for the bt_version and
98 cutoff. WARNING: This changes the git repo HEAD.
99 """
100 repo = git.Repo(repo_path)
101 repo.git.fetch()
102 return repo.git.log(
103 "{}..origin/{}".format(cutoff, bt_version), "--pretty=format:%H", "--reverse"
104 ).split("\n")
105
106
107def parse_result(result_path):
108 """
109 Parse the result file. Return a dataset of User time + System time.
110 """
111 with open(result_path) as result:
112 parsed_result = json.load(result)
113 return list(
114 map(
115 add,
116 parsed_result["User time (seconds)"],
117 parsed_result["System time (seconds)"],
118 )
119 )
120
121
122def get_benchmark_results(client, commit, workdir):
123 """
124 Fetch the benchmark result from a certain commit across all benchmark type.
125 """
126 results = {}
127 benchmark_valid = True
128 for b_type in BENCHMARK_TYPES:
129 prefix = "/results/benchmarks/babeltrace/{}/".format(b_type)
130 result_file = get_file(client, prefix, commit, workdir)
131 if not result_file:
132 """
133 Benchmark is either corrupted or not complete.
134 """
135 return None, benchmark_valid
136 results[b_type] = parse_result(result_file)
137 if all(i == 0.0 for i in results[b_type]):
138 benchmark_valid = False
139 print("Invalid benchmark for {}/{}/{}".format(prefix, b_type, commit))
140 # The dataset is valid return immediately.
141 return results, benchmark_valid
142
143
144def plot_raw_value(branch, benchmark_type, x_data, y_data, labels, latest_values):
145 """
146 Plot the graph using the raw value.
147 """
148 point_x_data = []
149 outlier_x_data = []
150 point_y_data = []
151 outlier_y_data = []
152 for pos in range(len(x_data)):
153 x = x_data[pos]
154 valid_points, outliers = sanitize_dataset(y_data[pos])
155 for y in valid_points:
156 point_x_data.append(x)
157 point_y_data.append(y)
158 for y in outliers:
159 outlier_x_data.append(x)
160 outlier_y_data.append(y)
161
162 plt.plot(
163 point_x_data, point_y_data, "o", label=branch, color=graph_get_color(branch)
164 )
165 plt.plot(outlier_x_data, outlier_y_data, "+", label="outlier", color="black")
166
5c65bbc2
JR
167 ymax = 1
168 if y_data:
169 ymin = 0.8 * min([item for sublist in y_data for item in sublist])
170 ymax = 1.2 * max([item for sublist in y_data for item in sublist])
171 # Put latest of other branches for reference as horizontal line.
172 for l_branch, l_result in latest_values.items():
173 if not l_result or l_branch == branch:
174 continue
175 plt.axhline(
176 y=l_result,
177 label="Latest {}".format(l_branch),
178 color=graph_get_color(l_branch),
179 )
5c65bbc2
JR
180 if l_result >= ymax:
181 ymax = 1.2 * l_result
056f7519 182 ax = plt.gca()
925d7893 183 plt.ylim(ymin=0, ymax=ymax)
5c65bbc2
JR
184 plt.xticks(x_data, labels, rotation=90, family="monospace")
185 plt.title(graph_get_title(branch, benchmark_type), fontweight="bold")
186 plt.ylabel("User + system time (s)")
187 plt.xlabel("Latest commits")
188 plt.legend()
189
056f7519
JR
190 # Put tick on the right side
191 ax.tick_params(labeltop=False, labelright=True)
192
5c65bbc2
JR
193 plt.tight_layout()
194 return
195
20defd5e
JR
196def plot_delta_between_point(branch, benchmark_type, x_data, y_data, labels, latest_values):
197 """
198 Plot the graph of delta between each sequential commit.
199 """
200 local_abs_max = 100
201
202 # Transform y_data to a list of for which the reference is the first
203 # element.
204 local_y_data = []
205 for pos, y in enumerate(y_data):
206 if pos == 0:
207 local_y_data.append(0.0)
208 continue
209 local_y_data.append(y - y_data[pos - 1])
210
211 plt.plot(x_data, local_y_data, "o", label=branch, color=graph_get_color(branch))
212
213 # Get max absolute value to align the y axis with zero in the middle.
214 if local_y_data:
215 local_abs_max = abs(max(local_y_data, key=abs)) * 1.3
216
217 plt.ylim(ymin=local_abs_max * -1, ymax=local_abs_max)
218
219 ax = plt.gca()
220 plt.xticks(x_data, labels, rotation=90, family="monospace")
221 plt.title(graph_get_title(branch, benchmark_type) + " Delta to previous commit", fontweight="bold")
222 plt.ylabel("Seconds")
223 plt.xlabel("Latest commits")
224 plt.legend()
225
226 # Put tick on the right side
227 ax.tick_params(labeltop=False, labelright=True)
228
229 plt.tight_layout()
230 return
5c65bbc2
JR
231
232def plot_ratio(branch, benchmark_type, x_data, y_data, labels, latest_values):
233 """
234 Plot the graph using a ratio using first point as reference (0%).
235 """
236 reference = 0.01
237 y_abs_max = 100
238
239 if y_data:
240 reference = y_data[0]
241
242 # Transform y_data to a list of ratio for which the reference is the first
243 # element.
244 local_y_data = list(map(lambda y: ((y / reference) - 1.0) * 100, y_data))
245
246 plt.plot(x_data, local_y_data, "o", label=branch, color=graph_get_color(branch))
247
248 # Put latest of other branches for reference as horizontal line.
249 for l_branch, l_result in latest_values.items():
250 if not l_result or l_branch == branch:
251 continue
252 ratio_l_result = ((l_result / reference) - 1.0) * 100.0
253 print(
254 "branch {} branch {} value {} l_result {} reference {}".format(
255 branch, l_branch, ratio_l_result, l_result, reference
256 )
257 )
258 plt.axhline(
259 y=ratio_l_result,
260 label="Latest {}".format(l_branch),
261 color=graph_get_color(l_branch),
262 )
263
264 # Draw the reference line.
265 plt.axhline(y=0, label="Reference (leftmost point)", linestyle="-", color="Black")
266
267 # Get max absolute value to align the y axis with zero in the middle.
268 if local_y_data:
269 local_abs_max = abs(max(local_y_data, key=abs)) * 1.3
270 if y_abs_max > 100:
271 y_abs_max = local_abs_max
272
273 plt.ylim(ymin=y_abs_max * -1, ymax=y_abs_max)
274
275 ax = plt.gca()
276 percent_formatter = PercentFormatter()
277 ax.yaxis.set_major_formatter(percent_formatter)
278 ax.yaxis.set_minor_formatter(percent_formatter)
279 plt.xticks(x_data, labels, rotation=90, family="monospace")
280 plt.title(graph_get_title(branch, benchmark_type), fontweight="bold")
281 plt.ylabel("Ratio")
282 plt.xlabel("Latest commits")
283 plt.legend()
284
056f7519
JR
285 # Put tick on the right side
286 ax.tick_params(labeltop=False, labelright=True)
287
5c65bbc2
JR
288 plt.tight_layout()
289 return
290
5c65bbc2
JR
291def generate_graph(branches, report_name, git_path):
292
293 # The PDF document
294 pdf_pages = PdfPages(report_name)
295
296 client = get_client()
297 branch_results = dict()
298
299 # Fetch the results for each branch.
300 for branch, cutoff in branches.items():
301 commits = get_git_log(branch, cutoff, git_path)
302 results = []
303 with tempfile.TemporaryDirectory() as workdir:
304 for commit in commits:
305 b_results, valid = get_benchmark_results(client, commit, workdir)
306 if not b_results or not valid:
307 continue
308 results.append((commit, b_results))
309 branch_results[branch] = results
310
311 for b_type in BENCHMARK_TYPES:
312 latest_values = {}
313 max_len = 0
314
315 # Find the maximum size for a series inside our series dataset.
316 # This is used later to compute the size of the actual plot (pdf).
317 # While there gather the comparison value used to draw comparison line
318 # between branches.
319 for branch, results in branch_results.items():
320 max_len = max([max_len, len(results)])
321 if results:
322 latest_values[branch] = mean(
323 sanitize_dataset(results[-1][1][b_type])[0]
324 )
325 else:
326 latest_values[branch] = None
327
328 for branch, results in branch_results.items():
329 # Create a figure instance
330 if max_len and max_len > 10:
331 width = 0.16 * max_len
332 else:
333 width = 11.69
334
335 x_data = list(range(len(results)))
336 y_data = [c[1][b_type] for c in results]
337 labels = [c[0][:8] for c in results]
338
339 fig = plt.figure(figsize=(width, 8.27), dpi=100)
340 plot_raw_value(branch, b_type, x_data, y_data, labels, latest_values)
341 pdf_pages.savefig(fig)
342
5c65bbc2
JR
343 # Use the mean of each sanitize dataset here, we do not care for
344 # variance for ratio. At least not yet.
345 y_data = [mean(sanitize_dataset(c[1][b_type])[0]) for c in results]
20defd5e 346 fig = plt.figure(figsize=(width, 8.27), dpi=100)
5c65bbc2
JR
347 plot_ratio(branch, b_type, x_data, y_data, labels, latest_values)
348 pdf_pages.savefig(fig)
349
20defd5e
JR
350 fig = plt.figure(figsize=(width, 8.27), dpi=100)
351 plot_delta_between_point(branch, b_type, x_data, y_data, labels, latest_values)
352 pdf_pages.savefig(fig)
353
5c65bbc2
JR
354 pdf_pages.close()
355
356
357def launch_jobs(branches, git_path, wait_for_completion, debug):
358 """
359 Lauch jobs for all missing results.
360 """
361 client = get_client()
362 for branch, cutoff in branches.items():
363 commits = get_git_log(branch, cutoff, git_path)
364
365 with tempfile.TemporaryDirectory() as workdir:
366 for commit in commits:
367 b_results = get_benchmark_results(client, commit, workdir)[0]
368 if b_results:
369 continue
370 lava_submit.submit(
371 commit, wait_for_completion=wait_for_completion, debug=debug
372 )
373
374
375def main():
376 """
377 Parse arguments and execute as needed.
378 """
379 bt_branches = {
380 "master": "31976fe2d70a8b6b7f8b31b9e0b3bc004d415575",
381 "stable-2.0": "07f585356018b4ddfbd0e09c49a14e38977c6973",
382 "stable-1.5": "49e98b837a5667130e0d1e062a6bd7985c7c4582",
383 }
384
385 parser = argparse.ArgumentParser(description="Babeltrace benchmark utility")
386 parser.add_argument(
387 "--generate-jobs", action="store_true", help="Generate and send jobs"
388 )
389 parser.add_argument(
390 "--do-not-wait-on-completion",
391 action="store_true",
392 default=False,
393 help="Wait for the completion of each jobs sent. This is useful"
394 "for the ci. Otherwise we could end up spaming the lava instance.",
395 )
396 parser.add_argument(
397 "--generate-report",
398 action="store_true",
399 help="Generate graphs and save them to pdf",
400 )
401 parser.add_argument(
402 "--report-name", default="report.pdf", help="The name of the pdf report."
403 )
404 parser.add_argument(
405 "--debug", action="store_true", default=False, help="Do not send jobs to lava."
406 )
407 parser.add_argument(
408 "--repo-path", help="The location of the git repo to use.", required=True
409 )
410
411 args = parser.parse_args()
412
413 if not os.path.exists(args.repo_path):
414 print("Repository location does not exists.")
415 return 1
416
417 if args.generate_jobs:
418 print("Launching jobs for:")
419 for branch, cutoff in bt_branches.items():
420 print("\t Branch {} with cutoff {}".format(branch, cutoff))
421 launch_jobs(
422 bt_branches, args.repo_path, not args.do_not_wait_on_completion, args.debug
423 )
424
425 if args.generate_report:
426 print("Generating pdf report ({}) for:".format(args.report_name))
427 for branch, cutoff in bt_branches.items():
428 print("\t Branch {} with cutoff {}".format(branch, cutoff))
429 generate_graph(bt_branches, args.report_name, args.repo_path)
430
431 return 0
432
433
434def sanitize_dataset(dataset):
435 """
436 Use IRQ 1.5 [1] to remove outlier from the dataset. This is useful to get a
437 representative mean without outlier in it.
438 [1] https://en.wikipedia.org/wiki/Interquartile_range#Outliers
439 """
440 sorted_data = sorted(dataset)
441 q1, q3 = numpy.percentile(sorted_data, [25, 75])
442 iqr = q3 - q1
443 lower_bound = q1 - (1.5 * iqr)
444 upper_bound = q3 + (1.5 * iqr)
445 new_dataset = []
446 outliers = []
447 for i in dataset:
448 if lower_bound <= i <= upper_bound:
449 new_dataset.append(i)
450 else:
451 outliers.append(i)
452 return new_dataset, outliers
453
454
455if __name__ == "__main__":
456 sys.exit(main())
This page took 0.039001 seconds and 4 git commands to generate.