2 # Copyright (C) 2019 - Jonathan Rajotte <jonathan.rajotte-julien@efficios.com>
4 # This program is free software: you can redistribute it and/or modify
5 # it under the terms of the GNU General Public License as published by
6 # the Free Software Foundation, either version 3 of the License, or
7 # (at your option) any later version.
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 # GNU General Public License for more details.
14 # You should have received a copy of the GNU General Public License
15 # along with this program. If not, see <http://www.gnu.org/licenses/>.
20 from statistics
import mean
23 from operator
import add
25 import matplotlib
.pyplot
as plt
26 from matplotlib
.backends
.backend_pdf
import PdfPages
27 from matplotlib
.ticker
import PercentFormatter
33 from minio
import Minio
34 from minio
.error
import NoSuchKey
35 from minio
.error
import ResponseError
38 BENCHMARK_TYPES
= ["dummy", "text"]
39 DEFAULT_BUCKET
= "lava"
42 "ec9a9794af488a9accce7708a8b0d8188b498789", # Does not build
43 "8c99128c640cbce71fb8a6caa15e4c672252b662", # Block on configure
44 "f3847c753f1b4f12353c38d97b0577d9993d19fb", # Does not build
45 "e0111295f17ddfcc33ec771a8deac505473a06ad", # Does not build
46 "d0d4e0ed487ea23aaf0d023513c0a4d86901b79b", # Does not build
47 "c24f7ab4dd9edeb5e50b0070fd9d9e8691057dde", # Does not build
48 "ce67f5614a4db3b2de4d887eca52135b439b4937", # Does not build
49 "80aff5efc66679fd934cef433c0e698694748385", # Does not build
50 "f4f11e84942d36fcc8a597d226928bce2ccac4b3", # Does not build
51 "ae466a6e1b856d96cf5112a371b4df2b732503ec", # Does not build
52 "ade5c95e2a4f90f839f222fc1a66175b3b199922", # Configuration fails
53 "30341532906d62808e9d66fb115f5edb4e6f5706", # Configuration fails
54 "006c5ffb42f32e802136e3c27a63accb59b4d6c4", # Does not build
55 "88488ff5bdcd7679ff1f04fe6cff0d24b4f8fc0c", # Does not build
57 "7c7301d5827bd10ec7c34da7ffc5fe74e5047d38",
58 "a0df3abf88616cb0799f87f4eb57c54268e63448",
59 "b7045dd71bc0524ad6b5db96df365e98e237d395",
60 "cf7b259eaa602abcef308d2b5dd8e6c9ee995d8b",
61 "90a55a4ef47cac7b568f5f0a8a78bd760f82d23c",
62 "baa5e3aa82a82c9d0fa59e3c586c0168bb5dc267",
63 "af9f8da7ba4a9b16fc36d637b8c3a0c7a8774da2",
64 "fe748379adbd385efdfc7acae9c2340fb8b7d717",
65 "baa5e3aa82a82c9d0fa59e3c586c0168bb5dc267",
66 "af9f8da7ba4a9b16fc36d637b8c3a0c7a8774da2",
67 "fe748379adbd385efdfc7acae9c2340fb8b7d717",
68 "929627965e33e06dc77254d81e8ec1d66cc06590",
69 "48a0e52c4632a60cd43423f2f34f10de350bf868",
70 "b7fa35fce415b33207a9eba111069ed31ef122a0",
71 "828c8a25785e0cedaeb6987256a4dfc3c43b982f",
72 "213489680861e4d796173513effac7023312ec2d",
73 "430a5ccbbd15782501ca56bb148f3850126277ad",
74 "629d19044c43b195498d0a4e002906c54b6186d5",
75 "c423217ed1640b4152739f7e5613775d46c25050",
77 "776a2a252c9875caa1e8b4f41cb8cc12c79611c3",
78 "435aa29aff0527d36aafa1b657ae70b9db5f9ea5",
79 "95651695473495501fc6b2c4a1cf6a78cfb3cd6a",
80 "e0748fb2ba8994c136bcc0b67d3044f09841cf8e",
81 "9e632b22e1310fe773edc32ab08a60602f4b2861",
82 "271fb6907a6f4705a1c799d925394243eae51d68",
83 "328342cd737582216dc7b8b7d558b2a1bf8ea5e8",
84 "ae5c1a4481be68fae027910b141354c1d86daa64",
85 "e6938018975e45d35dab5fef795fe7344eef7d62",
86 "e015bae2ef343b30c890eebb9182a8be13d12ed0",
87 "5e8a0751ae0c418a615025d1da10bc84f91b3d97",
88 "887d26fa0fd0ae0c5c15e4b885473c4cdc0bf078",
89 "e97fe75eac59fc39a6e4f3c4f9f3301835a0315e",
90 "8b130e7f1d6a41fb5c64a014c15246ba74b79470",
91 "f4f8f79893b18199b38edc3330093a9403c4c737",
94 def json_type(string
):
96 Argpase type for json args.
97 We expect a base dictionary.
99 passed_json
= json
.loads(string
)
100 if not isinstance(passed_json
, dict):
101 msg
= "%r is not a dict" % string
102 raise argparse
.ArgumentTypeError(msg
)
105 def graph_get_color(branch
):
107 Get the color matching the branch.
109 color
= {"stable-1.5": "red", "stable-2.0": "green", "master": "blue"}
113 def graph_get_title(branch
, benchmark_type
):
115 Get title for graph based on benchmark type.
117 string
= {"dummy": "Dummy output", "text": "Text output"}
118 return "{} - {}".format(branch
, string
[benchmark_type
])
123 Return minio client configured.
126 "obj.internal.efficios.com", access_key
="jenkins", secret_key
="echo123456"
130 def get_file(client
, prefix
, file_name
, workdir_name
):
132 Return the path of the downloaded file.
135 destination
= os
.path
.join(workdir_name
, file_name
)
136 object_name
= "{}/{}".format(prefix
, file_name
)
138 client
.fget_object(DEFAULT_BUCKET
, object_name
, destination
)
145 def delete_file(client
, prefix
, file_name
):
147 Delete the file on remote.
149 object_name
= "{}/{}".format(prefix
, file_name
)
151 client
.remove_object(DEFAULT_BUCKET
, object_name
)
152 except ResponseError
as err
:
158 def get_git_log(bt_version
, cutoff
, repo_path
):
160 Return an ordered (older to newer) list of commits for the bt_version and
161 cutoff. WARNING: This changes the git repo HEAD.
163 repo
= git
.Repo(repo_path
)
166 "{}..origin/{}".format(cutoff
, bt_version
), "--pretty=format:%H", "--reverse"
170 def parse_result(result_path
):
172 Parse the result file. Return a dataset of User time + System time.
174 with
open(result_path
) as result
:
175 parsed_result
= json
.load(result
)
179 parsed_result
["User time (seconds)"],
180 parsed_result
["System time (seconds)"],
185 def get_benchmark_results(client
, commit
, workdir
):
187 Fetch the benchmark result from a certain commit across all benchmark type.
190 benchmark_valid
= True
191 for b_type
in BENCHMARK_TYPES
:
192 prefix
= "/results/benchmarks/babeltrace/{}".format(b_type
)
193 result_file
= get_file(client
, prefix
, commit
, workdir
)
196 Benchmark is either corrupted or not complete.
198 return None, benchmark_valid
199 results
[b_type
] = parse_result(result_file
)
200 if all(i
== 0.0 for i
in results
[b_type
]):
201 benchmark_valid
= False
202 print("Invalid benchmark for {}/{}/{}".format(prefix
, b_type
, commit
))
203 # The dataset is valid return immediately.
204 return results
, benchmark_valid
207 def plot_raw_value(branch
, benchmark_type
, x_data
, y_data
, labels
, latest_values
):
209 Plot the graph using the raw value.
215 for pos
in range(len(x_data
)):
217 valid_points
, outliers
= sanitize_dataset(y_data
[pos
])
218 for y
in valid_points
:
219 point_x_data
.append(x
)
220 point_y_data
.append(y
)
222 outlier_x_data
.append(x
)
223 outlier_y_data
.append(y
)
226 point_x_data
, point_y_data
, "o", label
=branch
, color
=graph_get_color(branch
)
228 plt
.plot(outlier_x_data
, outlier_y_data
, "+", label
="outlier", color
="black")
232 ymin
= 0.8 * min([item
for sublist
in y_data
for item
in sublist
])
233 ymax
= 1.2 * max([item
for sublist
in y_data
for item
in sublist
])
234 # Put latest of other branches for reference as horizontal line.
235 for l_branch
, l_result
in latest_values
.items():
236 if not l_result
or l_branch
== branch
:
240 label
="Latest {}".format(l_branch
),
241 color
=graph_get_color(l_branch
),
244 ymax
= 1.2 * l_result
246 plt
.ylim(ymin
=0, ymax
=ymax
)
247 plt
.xticks(x_data
, labels
, rotation
=90, family
="monospace")
248 plt
.title(graph_get_title(branch
, benchmark_type
), fontweight
="bold")
249 plt
.ylabel("User + system time (s)")
250 plt
.xlabel("Latest commits")
254 # Put tick on the right side
255 ax
.tick_params(labeltop
=False, labelright
=True)
261 def plot_delta_between_point(
262 branch
, benchmark_type
, x_data
, y_data
, labels
, latest_values
265 Plot the graph of delta between each sequential commit.
269 # Transform y_data to a list of for which the reference is the first
272 for pos
, y
in enumerate(y_data
):
274 local_y_data
.append(0.0)
276 local_y_data
.append(y
- y_data
[pos
- 1])
278 plt
.plot(x_data
, local_y_data
, "o", label
=branch
, color
=graph_get_color(branch
))
280 # Get max absolute value to align the y axis with zero in the middle.
282 local_abs_max
= abs(max(local_y_data
, key
=abs)) * 1.3
284 plt
.ylim(ymin
=local_abs_max
* -1, ymax
=local_abs_max
)
287 plt
.xticks(x_data
, labels
, rotation
=90, family
="monospace")
289 graph_get_title(branch
, benchmark_type
) + " Delta to previous commit",
292 plt
.ylabel("Seconds")
293 plt
.xlabel("Latest commits")
297 # Put tick on the right side
298 ax
.tick_params(labeltop
=False, labelright
=True)
304 def plot_ratio(branch
, benchmark_type
, x_data
, y_data
, labels
, latest_values
):
306 Plot the graph using a ratio using first point as reference (0%).
312 reference
= y_data
[0]
314 # Transform y_data to a list of ratio for which the reference is the first
316 local_y_data
= list(map(lambda y
: ((y
/ reference
) - 1.0) * 100, y_data
))
318 plt
.plot(x_data
, local_y_data
, "o", label
=branch
, color
=graph_get_color(branch
))
320 # Put latest of other branches for reference as horizontal line.
321 for l_branch
, l_result
in latest_values
.items():
322 if not l_result
or l_branch
== branch
:
324 ratio_l_result
= ((l_result
/ reference
) - 1.0) * 100.0
326 "branch {} branch {} value {} l_result {} reference {}".format(
327 branch
, l_branch
, ratio_l_result
, l_result
, reference
332 label
="Latest {}".format(l_branch
),
333 color
=graph_get_color(l_branch
),
336 # Draw the reference line.
337 plt
.axhline(y
=0, label
="Reference (leftmost point)", linestyle
="-", color
="Black")
339 # Get max absolute value to align the y axis with zero in the middle.
341 local_abs_max
= abs(max(local_y_data
, key
=abs)) * 1.3
343 y_abs_max
= local_abs_max
345 plt
.ylim(ymin
=y_abs_max
* -1, ymax
=y_abs_max
)
348 percent_formatter
= PercentFormatter()
349 ax
.yaxis
.set_major_formatter(percent_formatter
)
350 ax
.yaxis
.set_minor_formatter(percent_formatter
)
351 plt
.xticks(x_data
, labels
, rotation
=90, family
="monospace")
352 plt
.title(graph_get_title(branch
, benchmark_type
), fontweight
="bold")
354 plt
.xlabel("Latest commits")
358 # Put tick on the right side
359 ax
.tick_params(labeltop
=False, labelright
=True)
365 def generate_graph(branches
, report_name
, git_path
):
368 pdf_pages
= PdfPages(report_name
)
370 client
= get_client()
371 branch_results
= dict()
373 # Fetch the results for each branch.
374 for branch
, cutoff
in branches
.items():
375 commits
= get_git_log(branch
, cutoff
, git_path
)
377 with tempfile
.TemporaryDirectory() as workdir
:
378 for commit
in commits
:
379 b_results
, valid
= get_benchmark_results(client
, commit
, workdir
)
380 if not b_results
or not valid
:
382 results
.append((commit
, b_results
))
383 branch_results
[branch
] = results
385 for b_type
in BENCHMARK_TYPES
:
389 # Find the maximum size for a series inside our series dataset.
390 # This is used later to compute the size of the actual plot (pdf).
391 # While there gather the comparison value used to draw comparison line
393 for branch
, results
in branch_results
.items():
394 max_len
= max([max_len
, len(results
)])
396 latest_values
[branch
] = mean(
397 sanitize_dataset(results
[-1][1][b_type
])[0]
400 latest_values
[branch
] = None
402 for branch
, results
in branch_results
.items():
403 # Create a figure instance
404 if max_len
and max_len
> 10:
405 width
= 0.16 * max_len
409 x_data
= list(range(len(results
)))
410 y_data
= [c
[1][b_type
] for c
in results
]
411 labels
= [c
[0][:8] for c
in results
]
413 fig
= plt
.figure(figsize
=(width
, 8.27), dpi
=100)
414 plot_raw_value(branch
, b_type
, x_data
, y_data
, labels
, latest_values
)
415 pdf_pages
.savefig(fig
)
417 # Use the mean of each sanitize dataset here, we do not care for
418 # variance for ratio. At least not yet.
419 y_data
= [mean(sanitize_dataset(c
[1][b_type
])[0]) for c
in results
]
420 fig
= plt
.figure(figsize
=(width
, 8.27), dpi
=100)
421 plot_ratio(branch
, b_type
, x_data
, y_data
, labels
, latest_values
)
422 pdf_pages
.savefig(fig
)
424 fig
= plt
.figure(figsize
=(width
, 8.27), dpi
=100)
425 plot_delta_between_point(
426 branch
, b_type
, x_data
, y_data
, labels
, latest_values
428 pdf_pages
.savefig(fig
)
433 def launch_jobs(branches
, git_path
, wait_for_completion
, debug
, force
):
435 Lauch jobs for all missing results.
437 client
= get_client()
438 commits_to_test
= set()
439 for branch
, cutoff
in branches
.items():
440 commits
= [x
for x
in get_git_log(branch
, cutoff
, git_path
) if x
not in invalid_commits
]
441 with tempfile
.TemporaryDirectory() as workdir
:
442 for commit
in commits
:
443 b_results
= get_benchmark_results(client
, commit
, workdir
)[0]
444 if b_results
and not force
:
446 commits_to_test
.add(commit
)
447 for index
, commit
in enumerate(commits_to_test
):
448 print("Job {}/{}".format(index
+1, len(commits_to_test
)))
450 commit
, wait_for_completion
=wait_for_completion
, debug
=debug
456 Parse arguments and execute as needed.
459 "master": "31976fe2d70a8b6b7f8b31b9e0b3bc004d415575",
460 "stable-2.0": "07f585356018b4ddfbd0e09c49a14e38977c6973",
461 "stable-1.5": "49e98b837a5667130e0d1e062a6bd7985c7c4582",
464 parser
= argparse
.ArgumentParser(description
="Babeltrace benchmark utility")
466 "--generate-jobs", action
="store_true", help="Generate and send jobs"
469 "--force-jobs", action
="store_true", help="Force the queueing of jobs to lava"
472 "--do-not-wait-on-completion",
475 help="Wait for the completion of each jobs sent. This is useful"
476 "for the ci. Otherwise we could end up spaming the lava instance.",
481 help="Generate graphs and save them to pdf",
484 "--report-name", default
="report.pdf", help="The name of the pdf report."
487 "--debug", action
="store_true", default
=False, help="Do not send jobs to lava."
490 "--repo-path", help="The location of the git repo to use.", required
=True
493 "--overwrite-branches-cutoff",
494 help="A dictionary of the form {"
495 "'branch_name': 'commit_hash_cutoff',...}. Allow custom graphing and"
497 required
=False, type=json_type
500 args
= parser
.parse_args()
502 if args
.overwrite_branches_cutoff
:
503 bt_branches
= args
.overwrite_branches_cutoff
505 if not os
.path
.exists(args
.repo_path
):
506 print("Repository location does not exists.")
509 if args
.generate_jobs
:
510 print("Launching jobs for:")
512 for branch
, cutoff
in bt_branches
.items():
513 print("\t Branch {} with cutoff {}".format(branch
, cutoff
))
518 not args
.do_not_wait_on_completion
,
523 if args
.generate_report
:
524 print("Generating pdf report ({}) for:".format(args
.report_name
))
525 for branch
, cutoff
in bt_branches
.items():
526 print("\t Branch {} with cutoff {}".format(branch
, cutoff
))
527 generate_graph(bt_branches
, args
.report_name
, args
.repo_path
)
532 def sanitize_dataset(dataset
):
534 Use IRQ 1.5 [1] to remove outlier from the dataset. This is useful to get a
535 representative mean without outlier in it.
536 [1] https://en.wikipedia.org/wiki/Interquartile_range#Outliers
538 sorted_data
= sorted(dataset
)
539 q1
, q3
= numpy
.percentile(sorted_data
, [25, 75])
541 lower_bound
= q1
- (1.5 * iqr
)
542 upper_bound
= q3
+ (1.5 * iqr
)
546 if lower_bound
<= i
<= upper_bound
:
547 new_dataset
.append(i
)
550 return new_dataset
, outliers
553 if __name__
== "__main__":