From: Julien Desfossez Date: Sat, 30 Apr 2016 15:09:27 +0000 (-0400) Subject: Extract the FD sets in select and pselect6 X-Git-Tag: v2.9.0-rc1~72 X-Git-Url: https://git.lttng.org./?a=commitdiff_plain;h=29751f7ce6ed098d4181309b4cf977e837fad526;p=lttng-modules.git Extract the FD sets in select and pselect6 Instead of extracting the user-space pointers of the 3 fd_set, we now extract the bitmask of the FDs in the sets (in, out, ex) in the form of an array of uint8_t (1024 FDs is the limit in the kernel). In this example, we select in input FDs 5 to 19 (0xFFFF0), it returns that one FD is ready: FD 12 (0x1000). syscall_entry_select: { n = 20, _fdset_in_length = 3, fdset_in = [ [0] = 0xF0, [1] = 0xFF, [2] = 0xF ], _fdset_out_length = 0, fdset_out = [ ], _fdset_ex_length = 0, fdset_ex = [ ], tvp = 0 } syscall_exit_select: { ret = 1, _fdset_in_length = 3, fdset_in = [ [0] = 0x0, [1] = 0x10, [2] = 0x0 ], _fdset_out_length = 0, fdset_out = [ ], _fdset_ex_length = 0, fdset_ex = [ ], tvp = 0 } Signed-off-by: Julien Desfossez Signed-off-by: Mathieu Desnoyers --- diff --git a/instrumentation/syscalls/headers/syscalls_pointers_override.h b/instrumentation/syscalls/headers/syscalls_pointers_override.h index bf5c632e..d116052f 100644 --- a/instrumentation/syscalls/headers/syscalls_pointers_override.h +++ b/instrumentation/syscalls/headers/syscalls_pointers_override.h @@ -53,4 +53,251 @@ SC_LTTNG_TRACEPOINT_EVENT(pipe2, ) ) +#define LTTNG_SYSCALL_SELECT_locvar \ + unsigned long *fds_in, *fds_out, *fds_ex; \ + unsigned long nr_bytes, nr_ulong; \ + uint8_t overflow; + +#define LTTNG_SYSCALL_SELECT_code_pre \ + sc_inout( \ + { \ + int err; \ + unsigned int n_in_bytes; \ + \ + tp_locvar->fds_in = NULL; \ + tp_locvar->fds_out = NULL; \ + tp_locvar->fds_ex = NULL; \ + tp_locvar->overflow = 0; \ + \ + sc_out( \ + if (ret <= 0) \ + goto error; \ + ) \ + \ + if (n <= 0) \ + goto error; \ + \ + /* On error or bogus input, don't copy anything. */ \ + if (n >__FD_SETSIZE) \ + goto error; \ + \ + n_in_bytes = DIV_ROUND_UP((unsigned int) n, BITS_PER_BYTE); \ + \ + /* \ + * Limit atomic memory allocation to one page, since n \ + * is limited to 1024 and the smallest page size on Linux \ + * is 4k, this should not happen, don't try to make it work. \ + */ \ + if (n_in_bytes > PAGE_SIZE) { \ + WARN_ON_ONCE(1); \ + /* Inform the user that we did not output everything. */ \ + tp_locvar->overflow = 1; \ + goto error; \ + } else { \ + tp_locvar->nr_bytes = n_in_bytes; \ + tp_locvar->nr_ulong = DIV_ROUND_UP(n_in_bytes, \ + sizeof(unsigned long)); \ + } \ + \ + if (inp) { \ + tp_locvar->fds_in = kmalloc( \ + tp_locvar->nr_ulong * sizeof(unsigned long), \ + GFP_ATOMIC | GFP_NOWAIT); \ + if (!tp_locvar->fds_in) \ + goto error; \ + \ + err = lib_ring_buffer_copy_from_user_check_nofault( \ + tp_locvar->fds_in, inp, \ + tp_locvar->nr_ulong * sizeof(unsigned long)); \ + if (err != 0) \ + goto error; \ + } \ + if (outp) { \ + tp_locvar->fds_out = kmalloc( \ + tp_locvar->nr_ulong * sizeof(unsigned long), \ + GFP_ATOMIC | GFP_NOWAIT); \ + if (!tp_locvar->fds_out) \ + goto error; \ + \ + err = lib_ring_buffer_copy_from_user_check_nofault( \ + tp_locvar->fds_out, outp, \ + tp_locvar->nr_ulong * sizeof(unsigned long)); \ + if (err != 0) \ + goto error; \ + } \ + if (exp) { \ + tp_locvar->fds_ex = kmalloc( \ + tp_locvar->nr_ulong * sizeof(unsigned long), \ + GFP_ATOMIC | GFP_NOWAIT); \ + if (!tp_locvar->fds_ex) \ + goto error; \ + \ + err = lib_ring_buffer_copy_from_user_check_nofault( \ + tp_locvar->fds_ex, exp, \ + tp_locvar->nr_ulong * sizeof(unsigned long)); \ + if (err != 0) \ + goto error; \ + } \ + goto end; \ + \ +error: \ + tp_locvar->nr_bytes = 0; \ + tp_locvar->nr_ulong = 0; \ +end: ; /* Label at end of compound statement. */ \ + } \ + ) + +#define LTTNG_SYSCALL_SELECT_fds_field_LE(name, input) \ + ctf_custom_field( \ + ctf_custom_type( \ + .atype = atype_sequence, \ + .u.sequence.length_type = __type_integer( \ + uint8_t, 0, 0, 0, __BYTE_ORDER, 10, none), \ + .u.sequence.elem_type = __type_integer(uint8_t, 0, 0, 0, \ + __BYTE_ORDER, 16, none), \ + ), \ + name, \ + ctf_custom_code( \ + unsigned int src; \ + unsigned int nr_bytes_out = 0; \ + \ + if (input) { \ + ctf_integer_type(uint8_t, tp_locvar->nr_bytes) \ + ctf_align(uint8_t) \ + } else { \ + ctf_integer_type(uint8_t, 0) \ + ctf_align(uint8_t) \ + goto skip_##name; \ + } \ + \ + for (src = 0; src < tp_locvar->nr_ulong; src++) { \ + int dst; \ + for (dst = 0; dst < sizeof(long); dst++) { \ + if (nr_bytes_out++ >= tp_locvar->nr_bytes) { \ + goto skip_##name; \ + } \ + ctf_user_integer_type(uint8_t, \ + ((uint8_t __user *) (input->fds_bits + src))[dst]); \ + } \ + } \ + skip_##name: ; \ + ) \ + ) + +#define LTTNG_SYSCALL_SELECT_fds_field_BE(name, input) \ + ctf_custom_field( \ + ctf_custom_type( \ + .atype = atype_sequence, \ + .u.sequence.length_type = __type_integer( \ + uint8_t, 0, 0, 0, __BYTE_ORDER, 10, none), \ + .u.sequence.elem_type = __type_integer(uint8_t, 0, 0, 0, \ + __BYTE_ORDER, 16, none), \ + ), \ + name, \ + ctf_custom_code( \ + unsigned int src, nr_bytes_out = 0; \ + \ + if (input) { \ + ctf_integer_type(uint8_t, tp_locvar->nr_bytes) \ + ctf_align(uint8_t) \ + } else { \ + ctf_integer_type(uint8_t, 0) \ + ctf_align(uint8_t) \ + goto skip_##name; \ + } \ + \ + for (src = 0; src < tp_locvar->nr_ulong; src++) { \ + int dst; \ + for (dst = sizeof(long); dst >= 0; dst--) { \ + if (nr_bytes_out++ >= tp_locvar->nr_bytes) { \ + goto skip_##name; \ + } \ + ctf_user_integer_type(uint8_t, \ + ((uint8_t __user *) (input->fds_bits + src))[dst]); \ + } \ + } \ + skip_##name: ; \ + ) \ + ) + +#define LTTNG_SYSCALL_SELECT_code_post \ + kfree(tp_locvar->fds_in); \ + kfree(tp_locvar->fds_out); \ + kfree(tp_locvar->fds_ex); + +#if defined(CONFIG_X86_32) || defined(CONFIG_X86_64) +#define OVERRIDE_32_select +#define OVERRIDE_64_select +SC_LTTNG_TRACEPOINT_EVENT_CODE(select, + TP_PROTO(sc_exit(long ret,) int n, fd_set __user *inp, fd_set __user *outp, + fd_set __user *exp, struct timeval *tvp), + TP_ARGS(sc_exit(ret,) n, inp, outp, exp, tvp), + TP_locvar( + LTTNG_SYSCALL_SELECT_locvar + ), + TP_code_pre( + LTTNG_SYSCALL_SELECT_code_pre + ), + TP_FIELDS( + sc_exit(ctf_integer(long, ret, ret)) + sc_in(ctf_integer(int, n, n)) + sc_inout(ctf_integer(uint8_t, overflow, tp_locvar->overflow)) + sc_inout(ctf_integer(struct timeval *, tvp, tvp)) + + sc_inout( +#if (__BYTE_ORDER == __LITTLE_ENDIAN) + LTTNG_SYSCALL_SELECT_fds_field_LE(readfds, inp) + LTTNG_SYSCALL_SELECT_fds_field_LE(writefds, outp) + LTTNG_SYSCALL_SELECT_fds_field_LE(exceptfds, exp) +#else + LTTNG_SYSCALL_SELECT_fds_field_BE(readfds, inp) + LTTNG_SYSCALL_SELECT_fds_field_BE(writefds, outp) + LTTNG_SYSCALL_SELECT_fds_field_BE(exceptfds, exp) +#endif + ) + ), + TP_code_post( + LTTNG_SYSCALL_SELECT_code_post + ) +) +#endif /* defined(CONFIG_X86_32) || defined(CONFIG_X86_64) */ + +#if defined(CONFIG_X86_32) || defined(CONFIG_X86_64) || defined(CONFIG_ARM64) || defined(CONFIG_ARM) +#define OVERRIDE_32_pselect6 +#define OVERRIDE_64_pselect6 +SC_LTTNG_TRACEPOINT_EVENT_CODE(pselect6, + TP_PROTO(sc_exit(long ret,) int n, fd_set __user * inp, fd_set __user * outp, + fd_set __user * exp, struct timeval __user * tvp, void __user * sig), + TP_ARGS(sc_exit(ret,) n, inp, outp, exp, tvp, sig), + TP_locvar( + LTTNG_SYSCALL_SELECT_locvar + ), + TP_code_pre( + LTTNG_SYSCALL_SELECT_code_pre + ), + TP_FIELDS( + sc_exit(ctf_integer(long, ret, ret)) + sc_in(ctf_integer(int, n, n)) + sc_inout(ctf_integer(uint8_t, overflow, tp_locvar->overflow)) + sc_inout(ctf_integer(struct timeval *, tvp, tvp)) + sc_in(ctf_integer_hex(void *, sig, sig)) + + sc_inout( +#if (__BYTE_ORDER == __LITTLE_ENDIAN) + LTTNG_SYSCALL_SELECT_fds_field_LE(readfds, inp) + LTTNG_SYSCALL_SELECT_fds_field_LE(writefds, outp) + LTTNG_SYSCALL_SELECT_fds_field_LE(exceptfds, exp) +#else + LTTNG_SYSCALL_SELECT_fds_field_BE(readfds, inp) + LTTNG_SYSCALL_SELECT_fds_field_BE(writefds, outp) + LTTNG_SYSCALL_SELECT_fds_field_BE(exceptfds, exp) +#endif + ) + ), + TP_code_post( + LTTNG_SYSCALL_SELECT_code_post + ) +) +#endif /* defined(CONFIG_X86_32) || defined(CONFIG_X86_64) || defined(CONFIG_ARM64) || defined(CONFIG_ARM) */ + #endif /* CREATE_SYSCALL_TABLE */