| 1 | #!/usr/bin/env python3 |
| 2 | # |
| 3 | # Copyright (C) 2017 Francis Deslauriers <francis.deslauriers@efficios.com> |
| 4 | # |
| 5 | # SPDX-License-Identifier: LGPL-2.1-only |
| 6 | # |
| 7 | |
| 8 | import sys |
| 9 | import bisect |
| 10 | import subprocess |
| 11 | import re |
| 12 | |
| 13 | def addr2line(executable, addr): |
| 14 | """ |
| 15 | Uses binutils' addr2line to get function containing a given address |
| 16 | """ |
| 17 | cmd =['addr2line'] |
| 18 | |
| 19 | cmd += ['-e', executable] |
| 20 | |
| 21 | # Print function names |
| 22 | cmd += ['--functions'] |
| 23 | |
| 24 | # Expand inlined functions |
| 25 | cmd += ['--addresses', addr] |
| 26 | |
| 27 | status = subprocess.run(cmd, stdout=subprocess.PIPE, check=True) |
| 28 | |
| 29 | addr2line_output = status.stdout.decode("utf-8") |
| 30 | |
| 31 | # Omit the last 2 lines as the caller of main can not be determine |
| 32 | fcts = [addr2line_output.split()[-2]] |
| 33 | |
| 34 | fcts = [ f for f in fcts if '??' not in f] |
| 35 | |
| 36 | return fcts |
| 37 | |
| 38 | def extract_user_func_names(executable, raw_callstack): |
| 39 | """ |
| 40 | Given a callstack from the Babeltrace CLI output, returns a set |
| 41 | containing the name of the functions. This assumes that the binary have |
| 42 | not changed since the execution. |
| 43 | """ |
| 44 | recorded_callstack = set() |
| 45 | |
| 46 | # Remove commas and split on spaces |
| 47 | for index, addr in enumerate(raw_callstack.replace(',', '').split(' ')): |
| 48 | # Consider only the elements starting with '0x' which are the |
| 49 | # addresses recorded in the callstack |
| 50 | if '0x' in addr[:2]: |
| 51 | funcs = addr2line(executable, addr) |
| 52 | recorded_callstack.update(funcs) |
| 53 | |
| 54 | return recorded_callstack |
| 55 | |
| 56 | def extract_kernel_func_names(raw_callstack): |
| 57 | """ |
| 58 | Given a callstack from the Babeltrace CLI output, returns a set |
| 59 | containing the name of the functions. |
| 60 | Uses the /proc/kallsyms procfile to find the symbol associated with an |
| 61 | address. This function should only be used if the user is root or has |
| 62 | access to /proc/kallsyms. |
| 63 | """ |
| 64 | recorded_callstack = set() |
| 65 | syms=[] |
| 66 | addresses=[] |
| 67 | # We read kallsyms file and save the output |
| 68 | with open('/proc/kallsyms') as kallsyms_f: |
| 69 | for line in kallsyms_f: |
| 70 | line_tokens = line.split() |
| 71 | addr = line_tokens[0] |
| 72 | symbol = line_tokens[2] |
| 73 | addresses.append(int(addr, 16)) |
| 74 | syms.append({'addr':int(addr, 16), 'symbol':symbol}) |
| 75 | |
| 76 | # Save the address and symbol in a sorted list of tupple |
| 77 | syms = sorted(syms, key=lambda k:k['addr']) |
| 78 | # We save the list of addresses in a seperate sorted list to easily bisect |
| 79 | # the closer address of a symbol. |
| 80 | addresses = sorted(addresses) |
| 81 | |
| 82 | # Remove commas and split on spaces |
| 83 | for addr in raw_callstack.replace(',', '').split(' '): |
| 84 | if '0x' in addr[:2]: |
| 85 | # Search the location of the address in the addresses list and |
| 86 | # deference this location in the syms list to add the associated |
| 87 | # symbol. |
| 88 | loc = bisect.bisect(addresses, int(addr, 16)) |
| 89 | recorded_callstack.add(syms[loc-1]['symbol']) |
| 90 | |
| 91 | return recorded_callstack |
| 92 | |
| 93 | # Regex capturing the callstack_user and callstack_kernel context |
| 94 | user_cs_rexp='.*callstack_user\ \=\ \[(.*)\]\ .*\}, \{.*\}' |
| 95 | kernel_cs_rexp='.*callstack_kernel\ \=\ \[(.*)\]\ .*\}, \{.*\}' |
| 96 | |
| 97 | def main(): |
| 98 | """ |
| 99 | Reads a line from stdin and expect it to be a wellformed Babeltrace CLI |
| 100 | output containing containing a callstack context of the domain passed |
| 101 | as argument. |
| 102 | """ |
| 103 | expected_callstack = set() |
| 104 | recorded_callstack = set() |
| 105 | cs_type=None |
| 106 | |
| 107 | if len(sys.argv) <= 2: |
| 108 | print(sys.argv) |
| 109 | raise ValueError('USAGE: ./{} (--kernel|--user EXE) FUNC-NAMES'.format(sys.argv[0])) |
| 110 | |
| 111 | # If the `--user` option is passed, save the next argument as the path |
| 112 | # to the executable |
| 113 | argc=1 |
| 114 | executable=None |
| 115 | if sys.argv[argc] in '--kernel': |
| 116 | rexp = kernel_cs_rexp |
| 117 | cs_type='kernel' |
| 118 | elif sys.argv[argc] in '--user': |
| 119 | rexp = user_cs_rexp |
| 120 | cs_type='user' |
| 121 | argc+=1 |
| 122 | executable = sys.argv[argc] |
| 123 | else: |
| 124 | raise Exception('Unknown domain') |
| 125 | |
| 126 | argc+=1 |
| 127 | |
| 128 | # Extract the function names that are expected to be found call stack of |
| 129 | # the current events |
| 130 | for func in sys.argv[argc:]: |
| 131 | expected_callstack.add(func) |
| 132 | |
| 133 | # Read the tested line for STDIN |
| 134 | event_line = None |
| 135 | for line in sys.stdin: |
| 136 | event_line = line |
| 137 | break |
| 138 | |
| 139 | # Extract the userspace callstack context of the event |
| 140 | m = re.match(rexp, event_line) |
| 141 | |
| 142 | # If there is no match, exit with error |
| 143 | if m is None: |
| 144 | raise re.error('Callstack not found in event line') |
| 145 | else: |
| 146 | raw_callstack = str(m.group(1)) |
| 147 | if cs_type in 'user': |
| 148 | recorded_callstack=extract_user_func_names(executable, raw_callstack) |
| 149 | elif cs_type in 'kernel': |
| 150 | recorded_callstack=extract_kernel_func_names(raw_callstack) |
| 151 | else: |
| 152 | raise Exception('Unknown domain') |
| 153 | |
| 154 | # Verify that all expected function are present in the callstack |
| 155 | for e in expected_callstack: |
| 156 | if e not in recorded_callstack: |
| 157 | raise Exception('Expected function name not found in recorded callstack') |
| 158 | |
| 159 | sys.exit(0) |
| 160 | |
| 161 | if __name__ == '__main__': |
| 162 | main() |