2 # Copyright (C) 2019 - Jonathan Rajotte <jonathan.rajotte-julien@efficios.com>
4 # This program is free software: you can redistribute it and/or modify
5 # it under the terms of the GNU General Public License as published by
6 # the Free Software Foundation, either version 3 of the License, or
7 # (at your option) any later version.
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 # GNU General Public License for more details.
14 # You should have received a copy of the GNU General Public License
15 # along with this program. If not, see <http://www.gnu.org/licenses/>.
20 from statistics
import mean
23 from operator
import add
25 import matplotlib
.pyplot
as plt
26 from matplotlib
.backends
.backend_pdf
import PdfPages
27 from matplotlib
.ticker
import PercentFormatter
33 from minio
import Minio
34 from minio
.error
import NoSuchKey
35 from minio
.error
import ResponseError
38 BENCHMARK_TYPES
= ["dummy", "text"]
39 DEFAULT_BUCKET
= "lava"
42 "ec9a9794af488a9accce7708a8b0d8188b498789", # Does not build
43 "8c99128c640cbce71fb8a6caa15e4c672252b662", # Block on configure
44 "f3847c753f1b4f12353c38d97b0577d9993d19fb", # Does not build
45 "e0111295f17ddfcc33ec771a8deac505473a06ad", # Does not build
46 "d0d4e0ed487ea23aaf0d023513c0a4d86901b79b", # Does not build
47 "c24f7ab4dd9edeb5e50b0070fd9d9e8691057dde", # Does not build
48 "ce67f5614a4db3b2de4d887eca52135b439b4937", # Does not build
49 "80aff5efc66679fd934cef433c0e698694748385", # Does not build
50 "f4f11e84942d36fcc8a597d226928bce2ccac4b3", # Does not build
51 "ae466a6e1b856d96cf5112a371b4df2b732503ec", # Does not build
54 def json_type(string
):
56 Argpase type for json args.
57 We expect a base dictionary.
59 passed_json
= json
.loads(string
)
60 if not isinstance(passed_json
, dict):
61 msg
= "%r is not a dict" % string
62 raise argparse
.ArgumentTypeError(msg
)
65 def graph_get_color(branch
):
67 Get the color matching the branch.
69 color
= {"stable-1.5": "red", "stable-2.0": "green", "master": "blue"}
73 def graph_get_title(branch
, benchmark_type
):
75 Get title for graph based on benchmark type.
77 string
= {"dummy": "Dummy output", "text": "Text output"}
78 return "{} - {}".format(branch
, string
[benchmark_type
])
83 Return minio client configured.
86 "obj.internal.efficios.com", access_key
="jenkins", secret_key
="echo123456"
90 def get_file(client
, prefix
, file_name
, workdir_name
):
92 Return the path of the downloaded file.
95 destination
= os
.path
.join(workdir_name
, file_name
)
96 object_name
= "{}/{}".format(prefix
, file_name
)
98 client
.fget_object(DEFAULT_BUCKET
, object_name
, destination
)
105 def delete_file(client
, prefix
, file_name
):
107 Delete the file on remote.
109 object_name
= "{}/{}".format(prefix
, file_name
)
111 client
.remove_object(DEFAULT_BUCKET
, object_name
)
112 except ResponseError
as err
:
118 def get_git_log(bt_version
, cutoff
, repo_path
):
120 Return an ordered (older to newer) list of commits for the bt_version and
121 cutoff. WARNING: This changes the git repo HEAD.
123 repo
= git
.Repo(repo_path
)
126 "{}..origin/{}".format(cutoff
, bt_version
), "--pretty=format:%H", "--reverse"
130 def parse_result(result_path
):
132 Parse the result file. Return a dataset of User time + System time.
134 with
open(result_path
) as result
:
135 parsed_result
= json
.load(result
)
139 parsed_result
["User time (seconds)"],
140 parsed_result
["System time (seconds)"],
145 def get_benchmark_results(client
, commit
, workdir
):
147 Fetch the benchmark result from a certain commit across all benchmark type.
150 benchmark_valid
= True
151 for b_type
in BENCHMARK_TYPES
:
152 prefix
= "/results/benchmarks/babeltrace/{}".format(b_type
)
153 result_file
= get_file(client
, prefix
, commit
, workdir
)
156 Benchmark is either corrupted or not complete.
158 return None, benchmark_valid
159 results
[b_type
] = parse_result(result_file
)
160 if all(i
== 0.0 for i
in results
[b_type
]):
161 benchmark_valid
= False
162 print("Invalid benchmark for {}/{}/{}".format(prefix
, b_type
, commit
))
163 # The dataset is valid return immediately.
164 return results
, benchmark_valid
167 def plot_raw_value(branch
, benchmark_type
, x_data
, y_data
, labels
, latest_values
):
169 Plot the graph using the raw value.
175 for pos
in range(len(x_data
)):
177 valid_points
, outliers
= sanitize_dataset(y_data
[pos
])
178 for y
in valid_points
:
179 point_x_data
.append(x
)
180 point_y_data
.append(y
)
182 outlier_x_data
.append(x
)
183 outlier_y_data
.append(y
)
186 point_x_data
, point_y_data
, "o", label
=branch
, color
=graph_get_color(branch
)
188 plt
.plot(outlier_x_data
, outlier_y_data
, "+", label
="outlier", color
="black")
192 ymin
= 0.8 * min([item
for sublist
in y_data
for item
in sublist
])
193 ymax
= 1.2 * max([item
for sublist
in y_data
for item
in sublist
])
194 # Put latest of other branches for reference as horizontal line.
195 for l_branch
, l_result
in latest_values
.items():
196 if not l_result
or l_branch
== branch
:
200 label
="Latest {}".format(l_branch
),
201 color
=graph_get_color(l_branch
),
204 ymax
= 1.2 * l_result
206 plt
.ylim(ymin
=0, ymax
=ymax
)
207 plt
.xticks(x_data
, labels
, rotation
=90, family
="monospace")
208 plt
.title(graph_get_title(branch
, benchmark_type
), fontweight
="bold")
209 plt
.ylabel("User + system time (s)")
210 plt
.xlabel("Latest commits")
214 # Put tick on the right side
215 ax
.tick_params(labeltop
=False, labelright
=True)
221 def plot_delta_between_point(
222 branch
, benchmark_type
, x_data
, y_data
, labels
, latest_values
225 Plot the graph of delta between each sequential commit.
229 # Transform y_data to a list of for which the reference is the first
232 for pos
, y
in enumerate(y_data
):
234 local_y_data
.append(0.0)
236 local_y_data
.append(y
- y_data
[pos
- 1])
238 plt
.plot(x_data
, local_y_data
, "o", label
=branch
, color
=graph_get_color(branch
))
240 # Get max absolute value to align the y axis with zero in the middle.
242 local_abs_max
= abs(max(local_y_data
, key
=abs)) * 1.3
244 plt
.ylim(ymin
=local_abs_max
* -1, ymax
=local_abs_max
)
247 plt
.xticks(x_data
, labels
, rotation
=90, family
="monospace")
249 graph_get_title(branch
, benchmark_type
) + " Delta to previous commit",
252 plt
.ylabel("Seconds")
253 plt
.xlabel("Latest commits")
257 # Put tick on the right side
258 ax
.tick_params(labeltop
=False, labelright
=True)
264 def plot_ratio(branch
, benchmark_type
, x_data
, y_data
, labels
, latest_values
):
266 Plot the graph using a ratio using first point as reference (0%).
272 reference
= y_data
[0]
274 # Transform y_data to a list of ratio for which the reference is the first
276 local_y_data
= list(map(lambda y
: ((y
/ reference
) - 1.0) * 100, y_data
))
278 plt
.plot(x_data
, local_y_data
, "o", label
=branch
, color
=graph_get_color(branch
))
280 # Put latest of other branches for reference as horizontal line.
281 for l_branch
, l_result
in latest_values
.items():
282 if not l_result
or l_branch
== branch
:
284 ratio_l_result
= ((l_result
/ reference
) - 1.0) * 100.0
286 "branch {} branch {} value {} l_result {} reference {}".format(
287 branch
, l_branch
, ratio_l_result
, l_result
, reference
292 label
="Latest {}".format(l_branch
),
293 color
=graph_get_color(l_branch
),
296 # Draw the reference line.
297 plt
.axhline(y
=0, label
="Reference (leftmost point)", linestyle
="-", color
="Black")
299 # Get max absolute value to align the y axis with zero in the middle.
301 local_abs_max
= abs(max(local_y_data
, key
=abs)) * 1.3
303 y_abs_max
= local_abs_max
305 plt
.ylim(ymin
=y_abs_max
* -1, ymax
=y_abs_max
)
308 percent_formatter
= PercentFormatter()
309 ax
.yaxis
.set_major_formatter(percent_formatter
)
310 ax
.yaxis
.set_minor_formatter(percent_formatter
)
311 plt
.xticks(x_data
, labels
, rotation
=90, family
="monospace")
312 plt
.title(graph_get_title(branch
, benchmark_type
), fontweight
="bold")
314 plt
.xlabel("Latest commits")
318 # Put tick on the right side
319 ax
.tick_params(labeltop
=False, labelright
=True)
325 def generate_graph(branches
, report_name
, git_path
):
328 pdf_pages
= PdfPages(report_name
)
330 client
= get_client()
331 branch_results
= dict()
333 # Fetch the results for each branch.
334 for branch
, cutoff
in branches
.items():
335 commits
= get_git_log(branch
, cutoff
, git_path
)
337 with tempfile
.TemporaryDirectory() as workdir
:
338 for commit
in commits
:
339 b_results
, valid
= get_benchmark_results(client
, commit
, workdir
)
340 if not b_results
or not valid
:
342 results
.append((commit
, b_results
))
343 branch_results
[branch
] = results
345 for b_type
in BENCHMARK_TYPES
:
349 # Find the maximum size for a series inside our series dataset.
350 # This is used later to compute the size of the actual plot (pdf).
351 # While there gather the comparison value used to draw comparison line
353 for branch
, results
in branch_results
.items():
354 max_len
= max([max_len
, len(results
)])
356 latest_values
[branch
] = mean(
357 sanitize_dataset(results
[-1][1][b_type
])[0]
360 latest_values
[branch
] = None
362 for branch
, results
in branch_results
.items():
363 # Create a figure instance
364 if max_len
and max_len
> 10:
365 width
= 0.16 * max_len
369 x_data
= list(range(len(results
)))
370 y_data
= [c
[1][b_type
] for c
in results
]
371 labels
= [c
[0][:8] for c
in results
]
373 fig
= plt
.figure(figsize
=(width
, 8.27), dpi
=100)
374 plot_raw_value(branch
, b_type
, x_data
, y_data
, labels
, latest_values
)
375 pdf_pages
.savefig(fig
)
377 # Use the mean of each sanitize dataset here, we do not care for
378 # variance for ratio. At least not yet.
379 y_data
= [mean(sanitize_dataset(c
[1][b_type
])[0]) for c
in results
]
380 fig
= plt
.figure(figsize
=(width
, 8.27), dpi
=100)
381 plot_ratio(branch
, b_type
, x_data
, y_data
, labels
, latest_values
)
382 pdf_pages
.savefig(fig
)
384 fig
= plt
.figure(figsize
=(width
, 8.27), dpi
=100)
385 plot_delta_between_point(
386 branch
, b_type
, x_data
, y_data
, labels
, latest_values
388 pdf_pages
.savefig(fig
)
393 def launch_jobs(branches
, git_path
, wait_for_completion
, debug
, force
):
395 Lauch jobs for all missing results.
397 client
= get_client()
398 commits_to_test
= set()
399 for branch
, cutoff
in branches
.items():
400 commits
= [x
for x
in get_git_log(branch
, cutoff
, git_path
) if x
not in invalid_commits
]
401 with tempfile
.TemporaryDirectory() as workdir
:
402 for commit
in commits
:
403 b_results
= get_benchmark_results(client
, commit
, workdir
)[0]
404 if b_results
and not force
:
406 commits_to_test
.add(commit
)
407 for index
, commit
in enumerate(commits_to_test
):
408 print("Job {}/{}".format(index
+1, len(commits_to_test
)))
410 commit
, wait_for_completion
=wait_for_completion
, debug
=debug
416 Parse arguments and execute as needed.
419 "master": "31976fe2d70a8b6b7f8b31b9e0b3bc004d415575",
420 "stable-2.0": "07f585356018b4ddfbd0e09c49a14e38977c6973",
421 "stable-1.5": "49e98b837a5667130e0d1e062a6bd7985c7c4582",
424 parser
= argparse
.ArgumentParser(description
="Babeltrace benchmark utility")
426 "--generate-jobs", action
="store_true", help="Generate and send jobs"
429 "--force-jobs", action
="store_true", help="Force the queueing of jobs to lava"
432 "--do-not-wait-on-completion",
435 help="Wait for the completion of each jobs sent. This is useful"
436 "for the ci. Otherwise we could end up spaming the lava instance.",
441 help="Generate graphs and save them to pdf",
444 "--report-name", default
="report.pdf", help="The name of the pdf report."
447 "--debug", action
="store_true", default
=False, help="Do not send jobs to lava."
450 "--repo-path", help="The location of the git repo to use.", required
=True
453 "--overwrite-branches-cutoff",
454 help="A dictionary of the form {"
455 "'branch_name': 'commit_hash_cutoff',...}. Allow custom graphing and"
457 required
=False, type=json_type
460 args
= parser
.parse_args()
462 if args
.overwrite_branches_cutoff
:
463 bt_branches
= args
.overwrite_branches_cutoff
465 if not os
.path
.exists(args
.repo_path
):
466 print("Repository location does not exists.")
469 if args
.generate_jobs
:
470 print("Launching jobs for:")
472 for branch
, cutoff
in bt_branches
.items():
473 print("\t Branch {} with cutoff {}".format(branch
, cutoff
))
478 not args
.do_not_wait_on_completion
,
483 if args
.generate_report
:
484 print("Generating pdf report ({}) for:".format(args
.report_name
))
485 for branch
, cutoff
in bt_branches
.items():
486 print("\t Branch {} with cutoff {}".format(branch
, cutoff
))
487 generate_graph(bt_branches
, args
.report_name
, args
.repo_path
)
492 def sanitize_dataset(dataset
):
494 Use IRQ 1.5 [1] to remove outlier from the dataset. This is useful to get a
495 representative mean without outlier in it.
496 [1] https://en.wikipedia.org/wiki/Interquartile_range#Outliers
498 sorted_data
= sorted(dataset
)
499 q1
, q3
= numpy
.percentile(sorted_data
, [25, 75])
501 lower_bound
= q1
- (1.5 * iqr
)
502 upper_bound
= q3
+ (1.5 * iqr
)
506 if lower_bound
<= i
<= upper_bound
:
507 new_dataset
.append(i
)
510 return new_dataset
, outliers
513 if __name__
== "__main__":