Commit | Line | Data |
---|---|---|
5c65bbc2 JR |
1 | #!/usr/bin/python3 |
2 | # Copyright (C) 2019 - Jonathan Rajotte <jonathan.rajotte-julien@efficios.com> | |
3 | # | |
4 | # This program is free software: you can redistribute it and/or modify | |
5 | # it under the terms of the GNU General Public License as published by | |
6 | # the Free Software Foundation, either version 3 of the License, or | |
7 | # (at your option) any later version. | |
8 | # | |
9 | # This program is distributed in the hope that it will be useful, | |
10 | # but WITHOUT ANY WARRANTY; without even the implied warranty of | |
11 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
12 | # GNU General Public License for more details. | |
13 | # | |
14 | # You should have received a copy of the GNU General Public License | |
15 | # along with this program. If not, see <http://www.gnu.org/licenses/>. | |
16 | ||
17 | import json | |
18 | import os | |
19 | import tempfile | |
20 | from statistics import mean | |
21 | import argparse | |
22 | import sys | |
23 | from operator import add | |
24 | ||
25 | import matplotlib.pyplot as plt | |
26 | from matplotlib.backends.backend_pdf import PdfPages | |
27 | from matplotlib.ticker import PercentFormatter | |
28 | ||
29 | import git | |
30 | import numpy | |
31 | import lava_submit | |
32 | ||
33 | from minio import Minio | |
34 | from minio.error import NoSuchKey | |
35 | from minio.error import ResponseError | |
36 | ||
37 | ||
38 | BENCHMARK_TYPES = ["dummy", "text"] | |
39 | DEFAULT_BUCKET = "lava" | |
40 | ||
e085717c | 41 | invalid_commits = { |
c19fa307 KS |
42 | "ec9a9794af488a9accce7708a8b0d8188b498789", # Does not build |
43 | "8c99128c640cbce71fb8a6caa15e4c672252b662", # Block on configure | |
44 | "f3847c753f1b4f12353c38d97b0577d9993d19fb", # Does not build | |
45 | "e0111295f17ddfcc33ec771a8deac505473a06ad", # Does not build | |
46 | "d0d4e0ed487ea23aaf0d023513c0a4d86901b79b", # Does not build | |
47 | "c24f7ab4dd9edeb5e50b0070fd9d9e8691057dde", # Does not build | |
48 | "ce67f5614a4db3b2de4d887eca52135b439b4937", # Does not build | |
49 | "80aff5efc66679fd934cef433c0e698694748385", # Does not build | |
50 | "f4f11e84942d36fcc8a597d226928bce2ccac4b3", # Does not build | |
51 | "ae466a6e1b856d96cf5112a371b4df2b732503ec", # Does not build | |
b2f18af4 KS |
52 | "ade5c95e2a4f90f839f222fc1a66175b3b199922", # Configuration fails |
53 | "30341532906d62808e9d66fb115f5edb4e6f5706", # Configuration fails | |
54 | "006c5ffb42f32e802136e3c27a63accb59b4d6c4", # Does not build | |
55 | "88488ff5bdcd7679ff1f04fe6cff0d24b4f8fc0c", # Does not build | |
56 | # Other errors | |
57 | "7c7301d5827bd10ec7c34da7ffc5fe74e5047d38", | |
58 | "a0df3abf88616cb0799f87f4eb57c54268e63448", | |
59 | "b7045dd71bc0524ad6b5db96df365e98e237d395", | |
60 | "cf7b259eaa602abcef308d2b5dd8e6c9ee995d8b", | |
61 | "90a55a4ef47cac7b568f5f0a8a78bd760f82d23c", | |
62 | "baa5e3aa82a82c9d0fa59e3c586c0168bb5dc267", | |
63 | "af9f8da7ba4a9b16fc36d637b8c3a0c7a8774da2", | |
64 | "fe748379adbd385efdfc7acae9c2340fb8b7d717", | |
65 | "baa5e3aa82a82c9d0fa59e3c586c0168bb5dc267", | |
66 | "af9f8da7ba4a9b16fc36d637b8c3a0c7a8774da2", | |
67 | "fe748379adbd385efdfc7acae9c2340fb8b7d717", | |
68 | "929627965e33e06dc77254d81e8ec1d66cc06590", | |
69 | "48a0e52c4632a60cd43423f2f34f10de350bf868", | |
70 | "b7fa35fce415b33207a9eba111069ed31ef122a0", | |
71 | "828c8a25785e0cedaeb6987256a4dfc3c43b982f", | |
72 | "213489680861e4d796173513effac7023312ec2d", | |
73 | "430a5ccbbd15782501ca56bb148f3850126277ad", | |
74 | "629d19044c43b195498d0a4e002906c54b6186d5", | |
75 | "c423217ed1640b4152739f7e5613775d46c25050", | |
76 | # Elfutils | |
77 | "776a2a252c9875caa1e8b4f41cb8cc12c79611c3", | |
78 | "435aa29aff0527d36aafa1b657ae70b9db5f9ea5", | |
79 | "95651695473495501fc6b2c4a1cf6a78cfb3cd6a", | |
80 | "e0748fb2ba8994c136bcc0b67d3044f09841cf8e", | |
81 | "9e632b22e1310fe773edc32ab08a60602f4b2861", | |
82 | "271fb6907a6f4705a1c799d925394243eae51d68", | |
83 | "328342cd737582216dc7b8b7d558b2a1bf8ea5e8", | |
84 | "ae5c1a4481be68fae027910b141354c1d86daa64", | |
85 | "e6938018975e45d35dab5fef795fe7344eef7d62", | |
86 | "e015bae2ef343b30c890eebb9182a8be13d12ed0", | |
87 | "5e8a0751ae0c418a615025d1da10bc84f91b3d97", | |
88 | "887d26fa0fd0ae0c5c15e4b885473c4cdc0bf078", | |
89 | "e97fe75eac59fc39a6e4f3c4f9f3301835a0315e", | |
90 | "8b130e7f1d6a41fb5c64a014c15246ba74b79470", | |
91 | "f4f8f79893b18199b38edc3330093a9403c4c737", | |
c19fa307 | 92 | } |
5c65bbc2 | 93 | |
cf595cda JR |
94 | def json_type(string): |
95 | """ | |
96 | Argpase type for json args. | |
97 | We expect a base dictionary. | |
98 | """ | |
99 | passed_json = json.loads(string) | |
100 | if not isinstance(passed_json, dict): | |
101 | msg = "%r is not a dict" % string | |
102 | raise argparse.ArgumentTypeError(msg) | |
103 | return passed_json | |
104 | ||
5c65bbc2 JR |
105 | def graph_get_color(branch): |
106 | """ | |
107 | Get the color matching the branch. | |
108 | """ | |
109 | color = {"stable-1.5": "red", "stable-2.0": "green", "master": "blue"} | |
110 | return color[branch] | |
111 | ||
112 | ||
113 | def graph_get_title(branch, benchmark_type): | |
114 | """ | |
115 | Get title for graph based on benchmark type. | |
116 | """ | |
117 | string = {"dummy": "Dummy output", "text": "Text output"} | |
118 | return "{} - {}".format(branch, string[benchmark_type]) | |
119 | ||
120 | ||
121 | def get_client(): | |
122 | """ | |
123 | Return minio client configured. | |
124 | """ | |
125 | return Minio( | |
126 | "obj.internal.efficios.com", access_key="jenkins", secret_key="echo123456" | |
127 | ) | |
128 | ||
129 | ||
130 | def get_file(client, prefix, file_name, workdir_name): | |
131 | """ | |
132 | Return the path of the downloaded file. | |
133 | Return None on error | |
134 | """ | |
135 | destination = os.path.join(workdir_name, file_name) | |
136 | object_name = "{}/{}".format(prefix, file_name) | |
137 | try: | |
138 | client.fget_object(DEFAULT_BUCKET, object_name, destination) | |
139 | except NoSuchKey: | |
140 | return None | |
141 | ||
142 | return destination | |
143 | ||
144 | ||
145 | def delete_file(client, prefix, file_name): | |
146 | """ | |
147 | Delete the file on remote. | |
148 | """ | |
149 | object_name = "{}/{}".format(prefix, file_name) | |
150 | try: | |
151 | client.remove_object(DEFAULT_BUCKET, object_name) | |
152 | except ResponseError as err: | |
153 | print(err) | |
154 | except NoSuchKey: | |
155 | pass | |
156 | ||
157 | ||
158 | def get_git_log(bt_version, cutoff, repo_path): | |
159 | """ | |
160 | Return an ordered (older to newer) list of commits for the bt_version and | |
161 | cutoff. WARNING: This changes the git repo HEAD. | |
162 | """ | |
163 | repo = git.Repo(repo_path) | |
164 | repo.git.fetch() | |
165 | return repo.git.log( | |
166 | "{}..origin/{}".format(cutoff, bt_version), "--pretty=format:%H", "--reverse" | |
167 | ).split("\n") | |
168 | ||
169 | ||
170 | def parse_result(result_path): | |
171 | """ | |
172 | Parse the result file. Return a dataset of User time + System time. | |
173 | """ | |
174 | with open(result_path) as result: | |
175 | parsed_result = json.load(result) | |
176 | return list( | |
177 | map( | |
178 | add, | |
179 | parsed_result["User time (seconds)"], | |
180 | parsed_result["System time (seconds)"], | |
181 | ) | |
182 | ) | |
183 | ||
184 | ||
185 | def get_benchmark_results(client, commit, workdir): | |
186 | """ | |
187 | Fetch the benchmark result from a certain commit across all benchmark type. | |
188 | """ | |
189 | results = {} | |
190 | benchmark_valid = True | |
191 | for b_type in BENCHMARK_TYPES: | |
cdace203 | 192 | prefix = "/results/benchmarks/babeltrace/{}".format(b_type) |
5c65bbc2 JR |
193 | result_file = get_file(client, prefix, commit, workdir) |
194 | if not result_file: | |
195 | """ | |
196 | Benchmark is either corrupted or not complete. | |
197 | """ | |
198 | return None, benchmark_valid | |
199 | results[b_type] = parse_result(result_file) | |
200 | if all(i == 0.0 for i in results[b_type]): | |
201 | benchmark_valid = False | |
202 | print("Invalid benchmark for {}/{}/{}".format(prefix, b_type, commit)) | |
203 | # The dataset is valid return immediately. | |
204 | return results, benchmark_valid | |
205 | ||
206 | ||
207 | def plot_raw_value(branch, benchmark_type, x_data, y_data, labels, latest_values): | |
208 | """ | |
209 | Plot the graph using the raw value. | |
210 | """ | |
211 | point_x_data = [] | |
212 | outlier_x_data = [] | |
213 | point_y_data = [] | |
214 | outlier_y_data = [] | |
215 | for pos in range(len(x_data)): | |
216 | x = x_data[pos] | |
217 | valid_points, outliers = sanitize_dataset(y_data[pos]) | |
218 | for y in valid_points: | |
219 | point_x_data.append(x) | |
220 | point_y_data.append(y) | |
221 | for y in outliers: | |
222 | outlier_x_data.append(x) | |
223 | outlier_y_data.append(y) | |
224 | ||
225 | plt.plot( | |
226 | point_x_data, point_y_data, "o", label=branch, color=graph_get_color(branch) | |
227 | ) | |
228 | plt.plot(outlier_x_data, outlier_y_data, "+", label="outlier", color="black") | |
229 | ||
5c65bbc2 JR |
230 | ymax = 1 |
231 | if y_data: | |
232 | ymin = 0.8 * min([item for sublist in y_data for item in sublist]) | |
233 | ymax = 1.2 * max([item for sublist in y_data for item in sublist]) | |
234 | # Put latest of other branches for reference as horizontal line. | |
235 | for l_branch, l_result in latest_values.items(): | |
236 | if not l_result or l_branch == branch: | |
237 | continue | |
238 | plt.axhline( | |
239 | y=l_result, | |
240 | label="Latest {}".format(l_branch), | |
241 | color=graph_get_color(l_branch), | |
242 | ) | |
5c65bbc2 JR |
243 | if l_result >= ymax: |
244 | ymax = 1.2 * l_result | |
056f7519 | 245 | ax = plt.gca() |
925d7893 | 246 | plt.ylim(ymin=0, ymax=ymax) |
5c65bbc2 JR |
247 | plt.xticks(x_data, labels, rotation=90, family="monospace") |
248 | plt.title(graph_get_title(branch, benchmark_type), fontweight="bold") | |
249 | plt.ylabel("User + system time (s)") | |
250 | plt.xlabel("Latest commits") | |
251 | plt.legend() | |
526aab11 | 252 | plt.grid(True) |
5c65bbc2 | 253 | |
056f7519 JR |
254 | # Put tick on the right side |
255 | ax.tick_params(labeltop=False, labelright=True) | |
256 | ||
5c65bbc2 JR |
257 | plt.tight_layout() |
258 | return | |
259 | ||
09de7b53 JR |
260 | |
261 | def plot_delta_between_point( | |
262 | branch, benchmark_type, x_data, y_data, labels, latest_values | |
263 | ): | |
20defd5e JR |
264 | """ |
265 | Plot the graph of delta between each sequential commit. | |
266 | """ | |
267 | local_abs_max = 100 | |
268 | ||
269 | # Transform y_data to a list of for which the reference is the first | |
270 | # element. | |
271 | local_y_data = [] | |
272 | for pos, y in enumerate(y_data): | |
273 | if pos == 0: | |
274 | local_y_data.append(0.0) | |
275 | continue | |
276 | local_y_data.append(y - y_data[pos - 1]) | |
277 | ||
278 | plt.plot(x_data, local_y_data, "o", label=branch, color=graph_get_color(branch)) | |
279 | ||
280 | # Get max absolute value to align the y axis with zero in the middle. | |
281 | if local_y_data: | |
282 | local_abs_max = abs(max(local_y_data, key=abs)) * 1.3 | |
283 | ||
284 | plt.ylim(ymin=local_abs_max * -1, ymax=local_abs_max) | |
285 | ||
286 | ax = plt.gca() | |
287 | plt.xticks(x_data, labels, rotation=90, family="monospace") | |
09de7b53 JR |
288 | plt.title( |
289 | graph_get_title(branch, benchmark_type) + " Delta to previous commit", | |
290 | fontweight="bold", | |
291 | ) | |
20defd5e JR |
292 | plt.ylabel("Seconds") |
293 | plt.xlabel("Latest commits") | |
294 | plt.legend() | |
526aab11 | 295 | plt.grid(True) |
20defd5e JR |
296 | |
297 | # Put tick on the right side | |
298 | ax.tick_params(labeltop=False, labelright=True) | |
299 | ||
300 | plt.tight_layout() | |
301 | return | |
5c65bbc2 | 302 | |
09de7b53 | 303 | |
5c65bbc2 JR |
304 | def plot_ratio(branch, benchmark_type, x_data, y_data, labels, latest_values): |
305 | """ | |
306 | Plot the graph using a ratio using first point as reference (0%). | |
307 | """ | |
308 | reference = 0.01 | |
309 | y_abs_max = 100 | |
310 | ||
311 | if y_data: | |
312 | reference = y_data[0] | |
313 | ||
314 | # Transform y_data to a list of ratio for which the reference is the first | |
315 | # element. | |
316 | local_y_data = list(map(lambda y: ((y / reference) - 1.0) * 100, y_data)) | |
317 | ||
318 | plt.plot(x_data, local_y_data, "o", label=branch, color=graph_get_color(branch)) | |
319 | ||
320 | # Put latest of other branches for reference as horizontal line. | |
321 | for l_branch, l_result in latest_values.items(): | |
322 | if not l_result or l_branch == branch: | |
323 | continue | |
324 | ratio_l_result = ((l_result / reference) - 1.0) * 100.0 | |
325 | print( | |
326 | "branch {} branch {} value {} l_result {} reference {}".format( | |
327 | branch, l_branch, ratio_l_result, l_result, reference | |
328 | ) | |
329 | ) | |
330 | plt.axhline( | |
331 | y=ratio_l_result, | |
332 | label="Latest {}".format(l_branch), | |
333 | color=graph_get_color(l_branch), | |
334 | ) | |
335 | ||
336 | # Draw the reference line. | |
337 | plt.axhline(y=0, label="Reference (leftmost point)", linestyle="-", color="Black") | |
338 | ||
339 | # Get max absolute value to align the y axis with zero in the middle. | |
340 | if local_y_data: | |
341 | local_abs_max = abs(max(local_y_data, key=abs)) * 1.3 | |
342 | if y_abs_max > 100: | |
343 | y_abs_max = local_abs_max | |
344 | ||
345 | plt.ylim(ymin=y_abs_max * -1, ymax=y_abs_max) | |
346 | ||
347 | ax = plt.gca() | |
348 | percent_formatter = PercentFormatter() | |
349 | ax.yaxis.set_major_formatter(percent_formatter) | |
350 | ax.yaxis.set_minor_formatter(percent_formatter) | |
351 | plt.xticks(x_data, labels, rotation=90, family="monospace") | |
352 | plt.title(graph_get_title(branch, benchmark_type), fontweight="bold") | |
353 | plt.ylabel("Ratio") | |
354 | plt.xlabel("Latest commits") | |
355 | plt.legend() | |
526aab11 | 356 | plt.grid(True) |
5c65bbc2 | 357 | |
056f7519 JR |
358 | # Put tick on the right side |
359 | ax.tick_params(labeltop=False, labelright=True) | |
360 | ||
5c65bbc2 JR |
361 | plt.tight_layout() |
362 | return | |
363 | ||
09de7b53 | 364 | |
5c65bbc2 JR |
365 | def generate_graph(branches, report_name, git_path): |
366 | ||
367 | # The PDF document | |
368 | pdf_pages = PdfPages(report_name) | |
369 | ||
370 | client = get_client() | |
371 | branch_results = dict() | |
372 | ||
373 | # Fetch the results for each branch. | |
374 | for branch, cutoff in branches.items(): | |
375 | commits = get_git_log(branch, cutoff, git_path) | |
376 | results = [] | |
377 | with tempfile.TemporaryDirectory() as workdir: | |
378 | for commit in commits: | |
379 | b_results, valid = get_benchmark_results(client, commit, workdir) | |
380 | if not b_results or not valid: | |
381 | continue | |
382 | results.append((commit, b_results)) | |
383 | branch_results[branch] = results | |
384 | ||
385 | for b_type in BENCHMARK_TYPES: | |
386 | latest_values = {} | |
387 | max_len = 0 | |
388 | ||
389 | # Find the maximum size for a series inside our series dataset. | |
390 | # This is used later to compute the size of the actual plot (pdf). | |
391 | # While there gather the comparison value used to draw comparison line | |
392 | # between branches. | |
393 | for branch, results in branch_results.items(): | |
394 | max_len = max([max_len, len(results)]) | |
395 | if results: | |
396 | latest_values[branch] = mean( | |
397 | sanitize_dataset(results[-1][1][b_type])[0] | |
398 | ) | |
399 | else: | |
400 | latest_values[branch] = None | |
401 | ||
402 | for branch, results in branch_results.items(): | |
403 | # Create a figure instance | |
404 | if max_len and max_len > 10: | |
405 | width = 0.16 * max_len | |
406 | else: | |
407 | width = 11.69 | |
408 | ||
409 | x_data = list(range(len(results))) | |
410 | y_data = [c[1][b_type] for c in results] | |
411 | labels = [c[0][:8] for c in results] | |
412 | ||
413 | fig = plt.figure(figsize=(width, 8.27), dpi=100) | |
414 | plot_raw_value(branch, b_type, x_data, y_data, labels, latest_values) | |
415 | pdf_pages.savefig(fig) | |
416 | ||
5c65bbc2 JR |
417 | # Use the mean of each sanitize dataset here, we do not care for |
418 | # variance for ratio. At least not yet. | |
419 | y_data = [mean(sanitize_dataset(c[1][b_type])[0]) for c in results] | |
20defd5e | 420 | fig = plt.figure(figsize=(width, 8.27), dpi=100) |
5c65bbc2 JR |
421 | plot_ratio(branch, b_type, x_data, y_data, labels, latest_values) |
422 | pdf_pages.savefig(fig) | |
423 | ||
20defd5e | 424 | fig = plt.figure(figsize=(width, 8.27), dpi=100) |
09de7b53 JR |
425 | plot_delta_between_point( |
426 | branch, b_type, x_data, y_data, labels, latest_values | |
427 | ) | |
20defd5e JR |
428 | pdf_pages.savefig(fig) |
429 | ||
5c65bbc2 JR |
430 | pdf_pages.close() |
431 | ||
432 | ||
d373c66e | 433 | def launch_jobs(branches, git_path, wait_for_completion, debug, force): |
5c65bbc2 JR |
434 | """ |
435 | Lauch jobs for all missing results. | |
436 | """ | |
437 | client = get_client() | |
73fe8ab4 | 438 | commits_to_test = set() |
5c65bbc2 | 439 | for branch, cutoff in branches.items(): |
73fe8ab4 | 440 | commits = [x for x in get_git_log(branch, cutoff, git_path) if x not in invalid_commits] |
5c65bbc2 JR |
441 | with tempfile.TemporaryDirectory() as workdir: |
442 | for commit in commits: | |
443 | b_results = get_benchmark_results(client, commit, workdir)[0] | |
d373c66e | 444 | if b_results and not force: |
5c65bbc2 | 445 | continue |
73fe8ab4 KS |
446 | commits_to_test.add(commit) |
447 | for index, commit in enumerate(commits_to_test): | |
448 | print("Job {}/{}".format(index+1, len(commits_to_test))) | |
449 | lava_submit.submit( | |
450 | commit, wait_for_completion=wait_for_completion, debug=debug | |
451 | ) | |
5c65bbc2 JR |
452 | |
453 | ||
454 | def main(): | |
455 | """ | |
456 | Parse arguments and execute as needed. | |
457 | """ | |
458 | bt_branches = { | |
459 | "master": "31976fe2d70a8b6b7f8b31b9e0b3bc004d415575", | |
460 | "stable-2.0": "07f585356018b4ddfbd0e09c49a14e38977c6973", | |
461 | "stable-1.5": "49e98b837a5667130e0d1e062a6bd7985c7c4582", | |
462 | } | |
463 | ||
464 | parser = argparse.ArgumentParser(description="Babeltrace benchmark utility") | |
465 | parser.add_argument( | |
466 | "--generate-jobs", action="store_true", help="Generate and send jobs" | |
467 | ) | |
d373c66e JR |
468 | parser.add_argument( |
469 | "--force-jobs", action="store_true", help="Force the queueing of jobs to lava" | |
470 | ) | |
5c65bbc2 JR |
471 | parser.add_argument( |
472 | "--do-not-wait-on-completion", | |
473 | action="store_true", | |
474 | default=False, | |
475 | help="Wait for the completion of each jobs sent. This is useful" | |
476 | "for the ci. Otherwise we could end up spaming the lava instance.", | |
477 | ) | |
478 | parser.add_argument( | |
479 | "--generate-report", | |
480 | action="store_true", | |
481 | help="Generate graphs and save them to pdf", | |
482 | ) | |
483 | parser.add_argument( | |
484 | "--report-name", default="report.pdf", help="The name of the pdf report." | |
485 | ) | |
486 | parser.add_argument( | |
487 | "--debug", action="store_true", default=False, help="Do not send jobs to lava." | |
488 | ) | |
489 | parser.add_argument( | |
490 | "--repo-path", help="The location of the git repo to use.", required=True | |
491 | ) | |
cf595cda JR |
492 | parser.add_argument( |
493 | "--overwrite-branches-cutoff", | |
494 | help="A dictionary of the form {" | |
495 | "'branch_name': 'commit_hash_cutoff',...}. Allow custom graphing and" | |
496 | "jobs generation.", | |
497 | required=False, type=json_type | |
498 | ) | |
5c65bbc2 JR |
499 | |
500 | args = parser.parse_args() | |
501 | ||
cf595cda JR |
502 | if args.overwrite_branches_cutoff: |
503 | bt_branches = args.overwrite_branches_cutoff | |
504 | ||
5c65bbc2 JR |
505 | if not os.path.exists(args.repo_path): |
506 | print("Repository location does not exists.") | |
507 | return 1 | |
508 | ||
509 | if args.generate_jobs: | |
510 | print("Launching jobs for:") | |
d373c66e | 511 | |
5c65bbc2 JR |
512 | for branch, cutoff in bt_branches.items(): |
513 | print("\t Branch {} with cutoff {}".format(branch, cutoff)) | |
d373c66e | 514 | |
5c65bbc2 | 515 | launch_jobs( |
d373c66e JR |
516 | bt_branches, |
517 | args.repo_path, | |
518 | not args.do_not_wait_on_completion, | |
519 | args.debug, | |
520 | args.force_jobs, | |
5c65bbc2 JR |
521 | ) |
522 | ||
523 | if args.generate_report: | |
524 | print("Generating pdf report ({}) for:".format(args.report_name)) | |
525 | for branch, cutoff in bt_branches.items(): | |
526 | print("\t Branch {} with cutoff {}".format(branch, cutoff)) | |
527 | generate_graph(bt_branches, args.report_name, args.repo_path) | |
528 | ||
529 | return 0 | |
530 | ||
531 | ||
532 | def sanitize_dataset(dataset): | |
533 | """ | |
534 | Use IRQ 1.5 [1] to remove outlier from the dataset. This is useful to get a | |
535 | representative mean without outlier in it. | |
536 | [1] https://en.wikipedia.org/wiki/Interquartile_range#Outliers | |
537 | """ | |
538 | sorted_data = sorted(dataset) | |
539 | q1, q3 = numpy.percentile(sorted_data, [25, 75]) | |
540 | iqr = q3 - q1 | |
541 | lower_bound = q1 - (1.5 * iqr) | |
542 | upper_bound = q3 + (1.5 * iqr) | |
543 | new_dataset = [] | |
544 | outliers = [] | |
545 | for i in dataset: | |
546 | if lower_bound <= i <= upper_bound: | |
547 | new_dataset.append(i) | |
548 | else: | |
549 | outliers.append(i) | |
550 | return new_dataset, outliers | |
551 | ||
552 | ||
553 | if __name__ == "__main__": | |
554 | sys.exit(main()) |