| 1 | #!/usr/bin/python3 |
| 2 | # |
| 3 | # Copyright (C) 2017 Francis Deslauriers <francis.deslauriers@efficios.com> |
| 4 | # |
| 5 | # SPDX-License-Identifier: LGPL-2.1-only |
| 6 | # |
| 7 | |
| 8 | import sys |
| 9 | import bisect |
| 10 | import subprocess |
| 11 | import re |
| 12 | |
| 13 | def addr2line(executable, addr): |
| 14 | """ |
| 15 | Uses binutils' addr2line to get function containing a given address |
| 16 | """ |
| 17 | cmd =['addr2line'] |
| 18 | |
| 19 | cmd += ['-e', executable] |
| 20 | |
| 21 | # Print function names |
| 22 | cmd += ['--functions'] |
| 23 | |
| 24 | # Expand inlined functions |
| 25 | cmd += ['--addresses', addr] |
| 26 | |
| 27 | addr2line_output = subprocess.getoutput(' '.join(cmd)) |
| 28 | |
| 29 | # Omit the last 2 lines as the caller of main can not be determine |
| 30 | fcts = [addr2line_output.split()[-2]] |
| 31 | |
| 32 | fcts = [ f for f in fcts if '??' not in f] |
| 33 | |
| 34 | return fcts |
| 35 | |
| 36 | def extract_user_func_names(executable, raw_callstack): |
| 37 | """ |
| 38 | Given a callstack from the Babeltrace CLI output, returns a set |
| 39 | containing the name of the functions. This assumes that the binary have |
| 40 | not changed since the execution. |
| 41 | """ |
| 42 | recorded_callstack = set() |
| 43 | |
| 44 | # Remove commas and split on spaces |
| 45 | for index, addr in enumerate(raw_callstack.replace(',', '').split(' ')): |
| 46 | # Consider only the elements starting with '0x' which are the |
| 47 | # addresses recorded in the callstack |
| 48 | if '0x' in addr[:2]: |
| 49 | funcs = addr2line(executable, addr) |
| 50 | recorded_callstack.update(funcs) |
| 51 | |
| 52 | return recorded_callstack |
| 53 | |
| 54 | def extract_kernel_func_names(raw_callstack): |
| 55 | """ |
| 56 | Given a callstack from the Babeltrace CLI output, returns a set |
| 57 | containing the name of the functions. |
| 58 | Uses the /proc/kallsyms procfile to find the symbol associated with an |
| 59 | address. This function should only be used if the user is root or has |
| 60 | access to /proc/kallsyms. |
| 61 | """ |
| 62 | recorded_callstack = set() |
| 63 | syms=[] |
| 64 | addresses=[] |
| 65 | # We read kallsyms file and save the output |
| 66 | with open('/proc/kallsyms') as kallsyms_f: |
| 67 | for line in kallsyms_f: |
| 68 | line_tokens = line.split() |
| 69 | addr = line_tokens[0] |
| 70 | symbol = line_tokens[2] |
| 71 | addresses.append(int(addr, 16)) |
| 72 | syms.append({'addr':int(addr, 16), 'symbol':symbol}) |
| 73 | |
| 74 | # Save the address and symbol in a sorted list of tupple |
| 75 | syms = sorted(syms, key=lambda k:k['addr']) |
| 76 | # We save the list of addresses in a seperate sorted list to easily bisect |
| 77 | # the closer address of a symbol. |
| 78 | addresses = sorted(addresses) |
| 79 | |
| 80 | # Remove commas and split on spaces |
| 81 | for addr in raw_callstack.replace(',', '').split(' '): |
| 82 | if '0x' in addr[:2]: |
| 83 | # Search the location of the address in the addresses list and |
| 84 | # deference this location in the syms list to add the associated |
| 85 | # symbol. |
| 86 | loc = bisect.bisect(addresses, int(addr, 16)) |
| 87 | recorded_callstack.add(syms[loc-1]['symbol']) |
| 88 | |
| 89 | return recorded_callstack |
| 90 | |
| 91 | # Regex capturing the callstack_user and callstack_kernel context |
| 92 | user_cs_rexp='.*callstack_user\ \=\ \[(.*)\]\ .*\}, \{.*\}' |
| 93 | kernel_cs_rexp='.*callstack_kernel\ \=\ \[(.*)\]\ .*\}, \{.*\}' |
| 94 | |
| 95 | def main(): |
| 96 | """ |
| 97 | Reads a line from stdin and expect it to be a wellformed Babeltrace CLI |
| 98 | output containing containing a callstack context of the domain passed |
| 99 | as argument. |
| 100 | """ |
| 101 | expected_callstack = set() |
| 102 | recorded_callstack = set() |
| 103 | cs_type=None |
| 104 | |
| 105 | if len(sys.argv) <= 2: |
| 106 | print(sys.argv) |
| 107 | raise ValueError('USAGE: ./{} (--kernel|--user EXE) FUNC-NAMES'.format(sys.argv[0])) |
| 108 | |
| 109 | # If the `--user` option is passed, save the next argument as the path |
| 110 | # to the executable |
| 111 | argc=1 |
| 112 | executable=None |
| 113 | if sys.argv[argc] in '--kernel': |
| 114 | rexp = kernel_cs_rexp |
| 115 | cs_type='kernel' |
| 116 | elif sys.argv[argc] in '--user': |
| 117 | rexp = user_cs_rexp |
| 118 | cs_type='user' |
| 119 | argc+=1 |
| 120 | executable = sys.argv[argc] |
| 121 | else: |
| 122 | raise Exception('Unknown domain') |
| 123 | |
| 124 | argc+=1 |
| 125 | |
| 126 | # Extract the function names that are expected to be found call stack of |
| 127 | # the current events |
| 128 | for func in sys.argv[argc:]: |
| 129 | expected_callstack.add(func) |
| 130 | |
| 131 | # Read the tested line for STDIN |
| 132 | event_line = None |
| 133 | for line in sys.stdin: |
| 134 | event_line = line |
| 135 | break |
| 136 | |
| 137 | # Extract the userspace callstack context of the event |
| 138 | m = re.match(rexp, event_line) |
| 139 | |
| 140 | # If there is no match, exit with error |
| 141 | if m is None: |
| 142 | raise re.error('Callstack not found in event line') |
| 143 | else: |
| 144 | raw_callstack = str(m.group(1)) |
| 145 | if cs_type in 'user': |
| 146 | recorded_callstack=extract_user_func_names(executable, raw_callstack) |
| 147 | elif cs_type in 'kernel': |
| 148 | recorded_callstack=extract_kernel_func_names(raw_callstack) |
| 149 | else: |
| 150 | raise Exception('Unknown domain') |
| 151 | |
| 152 | # Verify that all expected function are present in the callstack |
| 153 | for e in expected_callstack: |
| 154 | if e not in recorded_callstack: |
| 155 | raise Exception('Expected function name not found in recorded callstack') |
| 156 | |
| 157 | sys.exit(0) |
| 158 | |
| 159 | if __name__ == '__main__': |
| 160 | main() |