From c337ddc219f608d4d35f461bdc9d2246324d6708 Mon Sep 17 00:00:00 2001 From: Mathieu Desnoyers Date: Wed, 15 Feb 2012 21:02:00 -0500 Subject: [PATCH] Implement state dump Signed-off-by: Mathieu Desnoyers --- Makefile | 3 + .../events/lttng-module/lttng-statedump.h | 167 ++++++++ lttng-events.c | 5 + lttng-events.h | 2 + lttng-probes.c | 4 +- lttng-statedump-impl.c | 366 ++++++++++++++++++ probes/Makefile | 2 + probes/lttng-events.h | 19 +- probes/lttng-probe-statedump.c | 28 ++ wrapper/irqdesc.c | 44 +++ wrapper/irqdesc.h | 19 + 11 files changed, 656 insertions(+), 3 deletions(-) create mode 100644 instrumentation/events/lttng-module/lttng-statedump.h create mode 100644 lttng-statedump-impl.c create mode 100644 probes/lttng-probe-statedump.c create mode 100644 wrapper/irqdesc.c create mode 100644 wrapper/irqdesc.h diff --git a/Makefile b/Makefile index 2cfc6901..dfa0792b 100644 --- a/Makefile +++ b/Makefile @@ -22,6 +22,9 @@ lttng-tracer-objs := lttng-events.o lttng-abi.o \ lttng-context-vppid.o lttng-calibrate.o \ wrapper/random.o +obj-m += lttng-statedump.o +lttng-statedump-objs := lttng-statedump-impl.o wrapper/irqdesc.o + ifneq ($(CONFIG_HAVE_SYSCALL_TRACEPOINTS),) lttng-tracer-objs += lttng-syscalls.o endif diff --git a/instrumentation/events/lttng-module/lttng-statedump.h b/instrumentation/events/lttng-module/lttng-statedump.h new file mode 100644 index 00000000..50411880 --- /dev/null +++ b/instrumentation/events/lttng-module/lttng-statedump.h @@ -0,0 +1,167 @@ +#undef TRACE_SYSTEM +#define TRACE_SYSTEM lttng_statedump + +#if !defined(_TRACE_LTTNG_STATEDUMP_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_LTTNG_STATEDUMP_H + +#include +#include +#include +#include +#include +#include + +TRACE_EVENT(lttng_statedump_start, + TP_PROTO(struct lttng_session *session), + TP_ARGS(session), + TP_STRUCT__entry( + ), + TP_fast_assign( + ), + TP_printk("") +) + +TRACE_EVENT(lttng_statedump_end, + TP_PROTO(struct lttng_session *session), + TP_ARGS(session), + TP_STRUCT__entry( + ), + TP_fast_assign( + ), + TP_printk("") +) + +TRACE_EVENT(lttng_statedump_process_state, + TP_PROTO(struct lttng_session *session, + struct task_struct *p, + int type, int mode, int submode, int status), + TP_ARGS(session, p, type, mode, submode, status), + TP_STRUCT__entry( + __field(pid_t, tid) + __field(pid_t, vtid) + __field(pid_t, pid) + __field(pid_t, vpid) + __field(pid_t, ppid) + __field(pid_t, vppid) + __array_text(char, name, TASK_COMM_LEN) + __field(int, type) + __field(int, mode) + __field(int, submode) + __field(int, status) + ), + TP_fast_assign( + tp_assign(tid, p->pid) + tp_assign(vtid, !p->nsproxy ? 0 : task_pid_vnr(p)) + tp_assign(pid, p->tgid) + tp_assign(vpid, !p->nsproxy ? 0 : task_tgid_vnr(p)) + tp_assign(ppid, + ({ + pid_t ret; + + rcu_read_lock(); + ret = task_tgid_nr(p->real_parent); + rcu_read_unlock(); + ret; + })) + tp_assign(vppid, + ({ + struct task_struct *parent; + pid_t ret; + + rcu_read_lock(); + parent = rcu_dereference(current->real_parent); + if (!parent->nsproxy) + ret = 0; + else + ret = task_tgid_nr(parent); + rcu_read_unlock(); + ret; + })) + tp_memcpy(name, p->comm, TASK_COMM_LEN) + tp_assign(type, type) + tp_assign(mode, mode) + tp_assign(submode, submode) + tp_assign(status, status) + ), + TP_printk("") +) + +TRACE_EVENT(lttng_statedump_file_descriptor, + TP_PROTO(struct lttng_session *session, + struct task_struct *p, int fd, const char *filename), + TP_ARGS(session, p, fd, filename), + TP_STRUCT__entry( + __field(pid_t, pid) + __field(int, fd) + __string(filename, filename) + ), + TP_fast_assign( + tp_assign(pid, p->tgid) + tp_assign(fd, p->tgid) + tp_strcpy(filename, filename) + ), + TP_printk("") +) + +TRACE_EVENT(lttng_statedump_vm_map, + TP_PROTO(struct lttng_session *session, + struct task_struct *p, struct vm_area_struct *map, + unsigned long inode), + TP_ARGS(session, p, map, inode), + TP_STRUCT__entry( + __field(pid_t, pid) + __field_hex(unsigned long, start) + __field_hex(unsigned long, end) + __field_hex(unsigned long, flags) + __field(unsigned long, inode) + __field(unsigned long, pgoff) + ), + TP_fast_assign( + tp_assign(pid, p->tgid) + tp_assign(start, map->vm_start) + tp_assign(end, map->vm_end) + tp_assign(flags, map->vm_flags) + tp_assign(inode, inode) + tp_assign(pgoff, map->vm_pgoff << PAGE_SHIFT) + ), + TP_printk("") +) + +TRACE_EVENT(lttng_statedump_network_interface, + TP_PROTO(struct lttng_session *session, + struct net_device *dev, struct in_ifaddr *ifa), + TP_ARGS(session, dev, ifa), + TP_STRUCT__entry( + __string(name, dev->name) + __field_network_hex(uint32_t, address_ipv4) + ), + TP_fast_assign( + tp_strcpy(name, dev->name) + tp_assign(address_ipv4, ifa ? ifa->ifa_address : 0U) + ), + TP_printk("") +) + +/* Called with desc->lock held */ +TRACE_EVENT(lttng_statedump_interrupt, + TP_PROTO(struct lttng_session *session, + unsigned int irq, const char *chip_name, + struct irqaction *action), + TP_ARGS(session, irq, chip_name, action), + TP_STRUCT__entry( + __field(unsigned int, irq) + __string(name, chip_name) + __string(action, action->name) + ), + TP_fast_assign( + tp_assign(irq, irq) + tp_strcpy(name, chip_name) + tp_strcpy(action, action->name) + ), + TP_printk("") +) + +#endif /* _TRACE_LTTNG_STATEDUMP_H */ + +/* This part must be outside protection */ +#include "../../../probes/define_trace.h" diff --git a/lttng-events.c b/lttng-events.c index a01d0af8..a3758229 100644 --- a/lttng-events.c +++ b/lttng-events.c @@ -113,6 +113,11 @@ int lttng_session_enable(struct lttng_session *session) ACCESS_ONCE(session->active) = 1; ACCESS_ONCE(session->been_active) = 1; ret = _lttng_session_metadata_statedump(session); + if (ret) { + ACCESS_ONCE(session->active) = 0; + goto end; + } + ret = lttng_statedump_start(session); if (ret) ACCESS_ONCE(session->active) = 0; end: diff --git a/lttng-events.h b/lttng-events.h index 9c652c88..35806a91 100644 --- a/lttng-events.h +++ b/lttng-events.h @@ -353,6 +353,8 @@ int lttng_add_perf_counter_to_ctx(uint32_t type, } #endif +extern int lttng_statedump_start(struct lttng_session *session); + #ifdef CONFIG_KPROBES int lttng_kprobes_register(const char *name, const char *symbol_name, diff --git a/lttng-probes.c b/lttng-probes.c index 97dcc359..15e05bef 100644 --- a/lttng-probes.c +++ b/lttng-probes.c @@ -132,9 +132,9 @@ int tp_list_show(struct seq_file *m, void *p) const struct lttng_event_desc *probe_desc = p; /* - * Don't export lttng internal events (metadata). + * Don't export lttng internal event: lttng_metadata. */ - if (!strncmp(probe_desc->name, "lttng_", sizeof("lttng_") - 1)) + if (!strcmp(probe_desc->name, "lttng_metadata")) return 0; seq_printf(m, "event { name = %s; };\n", probe_desc->name); diff --git a/lttng-statedump-impl.c b/lttng-statedump-impl.c new file mode 100644 index 00000000..cfcf4feb --- /dev/null +++ b/lttng-statedump-impl.c @@ -0,0 +1,366 @@ +/* + * Linux Trace Toolkit Next Generation Kernel State Dump + * + * Copyright 2005 Jean-Hugues Deschenes + * Copyright 2006-2012 Mathieu Desnoyers + * + * Changes: + * Eric Clement: Add listing of network IP interface + * 2006, 2007 Mathieu Desnoyers Fix kernel threads + * Various updates + * + * Dual LGPL v2.1/GPL v2 license. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "lttng-events.h" +#include "wrapper/irqdesc.h" + +#ifdef CONFIG_GENERIC_HARDIRQS +#include +#endif + +/* Define the tracepoints, but do not build the probes */ +#define CREATE_TRACE_POINTS +#define TRACE_INCLUDE_PATH ../instrumentation/events/lttng-module +#define TRACE_INCLUDE_FILE lttng-statedump +#include "instrumentation/events/lttng-module/lttng-statedump.h" + +/* + * Protected by the trace lock. + */ +static struct delayed_work cpu_work[NR_CPUS]; +static DECLARE_WAIT_QUEUE_HEAD(statedump_wq); +static atomic_t kernel_threads_to_run; + +enum lttng_thread_type { + LTTNG_USER_THREAD = 0, + LTTNG_KERNEL_THREAD = 1, +}; + +enum lttng_execution_mode { + LTTNG_USER_MODE = 0, + LTTNG_SYSCALL = 1, + LTTNG_TRAP = 2, + LTTNG_IRQ = 3, + LTTNG_SOFTIRQ = 4, + LTTNG_MODE_UNKNOWN = 5, +}; + +enum lttng_execution_submode { + LTTNG_NONE = 0, + LTTNG_UNKNOWN = 1, +}; + +enum lttng_process_status { + LTTNG_UNNAMED = 0, + LTTNG_WAIT_FORK = 1, + LTTNG_WAIT_CPU = 2, + LTTNG_EXIT = 3, + LTTNG_ZOMBIE = 4, + LTTNG_WAIT = 5, + LTTNG_RUN = 6, + LTTNG_DEAD = 7, +}; + +#ifdef CONFIG_INET +static +void lttng_enumerate_device(struct lttng_session *session, + struct net_device *dev) +{ + struct in_device *in_dev; + struct in_ifaddr *ifa; + + if (dev->flags & IFF_UP) { + in_dev = in_dev_get(dev); + if (in_dev) { + for (ifa = in_dev->ifa_list; ifa != NULL; + ifa = ifa->ifa_next) { + trace_lttng_statedump_network_interface( + session, dev, ifa); + } + in_dev_put(in_dev); + } + } else { + trace_lttng_statedump_network_interface( + session, dev, NULL); + } +} + +static +int lttng_enumerate_network_ip_interface(struct lttng_session *session) +{ + struct net_device *dev; + + read_lock(&dev_base_lock); + for_each_netdev(&init_net, dev) + lttng_enumerate_device(session, dev); + read_unlock(&dev_base_lock); + + return 0; +} +#else /* CONFIG_INET */ +static inline +int lttng_enumerate_network_ip_interface(struct lttng_session *session) +{ + return 0; +} +#endif /* CONFIG_INET */ + + +static +void lttng_enumerate_task_fd(struct lttng_session *session, + struct task_struct *p, char *tmp) +{ + struct fdtable *fdt; + struct file *filp; + unsigned int i; + const unsigned char *path; + + task_lock(p); + if (!p->files) + goto unlock_task; + spin_lock(&p->files->file_lock); + fdt = files_fdtable(p->files); + for (i = 0; i < fdt->max_fds; i++) { + filp = fcheck_files(p->files, i); + if (!filp) + continue; + path = d_path(&filp->f_path, tmp, PAGE_SIZE); + /* Make sure we give at least some info */ + trace_lttng_statedump_file_descriptor(session, p, i, + IS_ERR(path) ? + filp->f_dentry->d_name.name : + path); + } + spin_unlock(&p->files->file_lock); +unlock_task: + task_unlock(p); +} + +static +int lttng_enumerate_file_descriptors(struct lttng_session *session) +{ + struct task_struct *p; + char *tmp = (char *) __get_free_page(GFP_KERNEL); + + /* Enumerate active file descriptors */ + rcu_read_lock(); + for_each_process(p) + lttng_enumerate_task_fd(session, p, tmp); + rcu_read_unlock(); + free_page((unsigned long) tmp); + return 0; +} + +static +void lttng_enumerate_task_vm_maps(struct lttng_session *session, + struct task_struct *p) +{ + struct mm_struct *mm; + struct vm_area_struct *map; + unsigned long ino; + + /* get_task_mm does a task_lock... */ + mm = get_task_mm(p); + if (!mm) + return; + + map = mm->mmap; + if (map) { + down_read(&mm->mmap_sem); + while (map) { + if (map->vm_file) + ino = map->vm_file->f_dentry->d_inode->i_ino; + else + ino = 0; + trace_lttng_statedump_vm_map(session, p, map, ino); + map = map->vm_next; + } + up_read(&mm->mmap_sem); + } + mmput(mm); +} + +static +int lttng_enumerate_vm_maps(struct lttng_session *session) +{ + struct task_struct *p; + + rcu_read_lock(); + for_each_process(p) + lttng_enumerate_task_vm_maps(session, p); + rcu_read_unlock(); + return 0; +} + +#ifdef CONFIG_GENERIC_HARDIRQS +static +void lttng_list_interrupts(struct lttng_session *session) +{ + unsigned int irq; + unsigned long flags = 0; + struct irq_desc *desc; + +#define irq_to_desc wrapper_irq_to_desc + /* needs irq_desc */ + for_each_irq_desc(irq, desc) { + struct irqaction *action; + const char *irq_chip_name = + irq_desc_get_chip(desc)->name ? : "unnamed_irq_chip"; + + local_irq_save(flags); + raw_spin_lock(&desc->lock); + for (action = desc->action; action; action = action->next) { + trace_lttng_statedump_interrupt(session, + irq, irq_chip_name, action); + } + raw_spin_unlock(&desc->lock); + local_irq_restore(flags); + } +#undef irq_to_desc +} +#else +static inline +void list_interrupts(struct lttng_session *session) +{ +} +#endif + +static +int lttng_enumerate_process_states(struct lttng_session *session) +{ + struct task_struct *g, *p; + + rcu_read_lock(); + for_each_process(g) { + p = g; + do { + enum lttng_execution_mode mode = + LTTNG_MODE_UNKNOWN; + enum lttng_execution_submode submode = + LTTNG_UNKNOWN; + enum lttng_process_status status; + enum lttng_thread_type type; + + task_lock(p); + if (p->exit_state == EXIT_ZOMBIE) + status = LTTNG_ZOMBIE; + else if (p->exit_state == EXIT_DEAD) + status = LTTNG_DEAD; + else if (p->state == TASK_RUNNING) { + /* Is this a forked child that has not run yet? */ + if (list_empty(&p->rt.run_list)) + status = LTTNG_WAIT_FORK; + else + /* + * All tasks are considered as wait_cpu; + * the viewer will sort out if the task + * was really running at this time. + */ + status = LTTNG_WAIT_CPU; + } else if (p->state & + (TASK_INTERRUPTIBLE | TASK_UNINTERRUPTIBLE)) { + /* Task is waiting for something to complete */ + status = LTTNG_WAIT; + } else + status = LTTNG_UNNAMED; + submode = LTTNG_NONE; + + /* + * Verification of t->mm is to filter out kernel + * threads; Viewer will further filter out if a + * user-space thread was in syscall mode or not. + */ + if (p->mm) + type = LTTNG_USER_THREAD; + else + type = LTTNG_KERNEL_THREAD; + trace_lttng_statedump_process_state(session, + p, type, mode, submode, status); + task_unlock(p); + } while_each_thread(g, p); + } + rcu_read_unlock(); + + return 0; +} + +static +void lttng_statedump_work_func(struct work_struct *work) +{ + if (atomic_dec_and_test(&kernel_threads_to_run)) + /* If we are the last thread, wake up do_lttng_statedump */ + wake_up(&statedump_wq); +} + +static +int do_lttng_statedump(struct lttng_session *session) +{ + int cpu; + + printk(KERN_DEBUG "LTT state dump thread start\n"); + trace_lttng_statedump_start(session); + lttng_enumerate_process_states(session); + lttng_enumerate_file_descriptors(session); + lttng_enumerate_vm_maps(session); + lttng_list_interrupts(session); + lttng_enumerate_network_ip_interface(session); + + /* TODO lttng_dump_idt_table(session); */ + /* TODO lttng_dump_softirq_vec(session); */ + /* TODO lttng_list_modules(session); */ + /* TODO lttng_dump_swap_files(session); */ + + /* + * Fire off a work queue on each CPU. Their sole purpose in life + * is to guarantee that each CPU has been in a state where is was in + * syscall mode (i.e. not in a trap, an IRQ or a soft IRQ). + */ + get_online_cpus(); + atomic_set(&kernel_threads_to_run, num_online_cpus()); + for_each_online_cpu(cpu) { + INIT_DELAYED_WORK(&cpu_work[cpu], lttng_statedump_work_func); + schedule_delayed_work_on(cpu, &cpu_work[cpu], 0); + } + /* Wait for all threads to run */ + __wait_event(statedump_wq, (atomic_read(&kernel_threads_to_run) != 0)); + put_online_cpus(); + /* Our work is done */ + printk(KERN_DEBUG "LTT state dump end\n"); + trace_lttng_statedump_end(session); + return 0; +} + +/* + * Called with session mutex held. + */ +int lttng_statedump_start(struct lttng_session *session) +{ + printk(KERN_DEBUG "LTTng: state dump begin\n"); + return do_lttng_statedump(session); +} +EXPORT_SYMBOL_GPL(lttng_statedump_start); + +MODULE_LICENSE("GPL and additional rights"); +MODULE_AUTHOR("Jean-Hugues Deschenes"); +MODULE_DESCRIPTION("Linux Trace Toolkit Next Generation Statedump"); diff --git a/probes/Makefile b/probes/Makefile index 2b61ce5f..ff6b9291 100644 --- a/probes/Makefile +++ b/probes/Makefile @@ -13,6 +13,8 @@ obj-m += lttng-probe-lttng.o obj-m += lttng-probe-sched.o obj-m += lttng-probe-irq.o +obj-m += lttng-probe-statedump.o + ifneq ($(CONFIG_KVM),) obj-m += lttng-probe-kvm.o endif diff --git a/probes/lttng-events.h b/probes/lttng-events.h index d269490e..cc65656d 100644 --- a/probes/lttng-events.h +++ b/probes/lttng-events.h @@ -592,6 +592,17 @@ __assign_##dest##_2: \ #undef TP_fast_assign #define TP_fast_assign(args...) args +/* + * For state dump, check that "session" argument (mandatory) matches the + * session this event belongs to. Ensures that we write state dump data only + * into the started session, not into all sessions. + */ +#ifdef TP_SESSION_CHECK +#define _TP_SESSION_CHECK(session, csession) (session == csession) +#else /* TP_SESSION_CHECK */ +#define _TP_SESSION_CHECK(session, csession) 1 +#endif /* TP_SESSION_CHECK */ + #undef DECLARE_EVENT_CLASS #define DECLARE_EVENT_CLASS(_name, _proto, _args, _tstruct, _assign, _print) \ static void __event_probe__##_name(void *__data, _proto) \ @@ -605,8 +616,12 @@ static void __event_probe__##_name(void *__data, _proto) \ struct __event_typemap__##_name __typemap; \ int __ret; \ \ - if (0) \ + if (0) { \ (void) __dynamic_len_idx; /* don't warn if unused */ \ + (void) __typemap; /* don't warn if unused */ \ + } \ + if (!_TP_SESSION_CHECK(session, __chan->session)) \ + return; \ if (unlikely(!ACCESS_ONCE(__chan->session->active))) \ return; \ if (unlikely(!ACCESS_ONCE(__chan->enabled))) \ @@ -638,6 +653,8 @@ static void __event_probe__##_name(void *__data) \ size_t __event_len, __event_align; \ int __ret; \ \ + if (!_TP_SESSION_CHECK(session, __chan->session)) \ + return; \ if (unlikely(!ACCESS_ONCE(__chan->session->active))) \ return; \ if (unlikely(!ACCESS_ONCE(__chan->enabled))) \ diff --git a/probes/lttng-probe-statedump.c b/probes/lttng-probe-statedump.c new file mode 100644 index 00000000..328e8ed3 --- /dev/null +++ b/probes/lttng-probe-statedump.c @@ -0,0 +1,28 @@ +/* + * probes/lttng-probe-statedump.c + * + * Copyright 2010 (c) - Mathieu Desnoyers + * + * LTTng statedump probes. + * + * Dual LGPL v2.1/GPL v2 license. + */ + +#include +#include +#include "../lttng-events.h" + +/* + * Create LTTng tracepoint probes. + */ +#define LTTNG_PACKAGE_BUILD +#define CREATE_TRACE_POINTS +#define TP_SESSION_CHECK +#define TRACE_INCLUDE_PATH ../instrumentation/events/lttng-module +#define TRACE_INCLUDE_FILE lttng-statedump + +#include "../instrumentation/events/lttng-module/lttng-statedump.h" + +MODULE_LICENSE("GPL and additional rights"); +MODULE_AUTHOR("Mathieu Desnoyers "); +MODULE_DESCRIPTION("LTTng statedump probes"); diff --git a/wrapper/irqdesc.c b/wrapper/irqdesc.c new file mode 100644 index 00000000..a71981e7 --- /dev/null +++ b/wrapper/irqdesc.c @@ -0,0 +1,44 @@ +/* + * Copyright (C) 2011 Mathieu Desnoyers (mathieu.desnoyers@efficios.com) + * + * wrapper around irq_to_desc. Using KALLSYMS to get its address when + * available, else we need to have a kernel that exports this function to GPL + * modules. + * + * Dual LGPL v2.1/GPL v2 license. + */ + +#ifdef CONFIG_KALLSYMS + +#include +#include +#include +#include "kallsyms.h" +#include "irqdesc.h" + +static +struct irq_desc *(*irq_to_desc_sym)(unsigned int irq); + +struct irq_desc *wrapper_irq_to_desc(unsigned int irq) +{ + if (!irq_to_desc_sym) + irq_to_desc_sym = (void *) kallsyms_lookup_funcptr("irq_to_desc"); + if (irq_to_desc_sym) { + return irq_to_desc_sym(irq); + } else { + printk(KERN_WARNING "LTTng: irq_to_desc symbol lookup failed.\n"); + return NULL; + } +} + +#else + +#include +#include + +struct irq_desc *wrapper_irq_to_desc(unsigned int irq) +{ + return irq_to_desc(irq); +} + +#endif diff --git a/wrapper/irqdesc.h b/wrapper/irqdesc.h new file mode 100644 index 00000000..4670fc64 --- /dev/null +++ b/wrapper/irqdesc.h @@ -0,0 +1,19 @@ +#ifndef _LTTNG_WRAPPER_IRQDESC_H +#define _LTTNG_WRAPPER_IRQDESC_H + +/* + * Copyright (C) 2011 Mathieu Desnoyers (mathieu.desnoyers@efficios.com) + * + * wrapper around irq_to_desc. Using KALLSYMS to get its address when + * available, else we need to have a kernel that exports this function to GPL + * modules. + * + * Dual LGPL v2.1/GPL v2 license. + */ + +#include +#include + +struct irq_desc *wrapper_irq_to_desc(unsigned int irq); + +#endif /* _LTTNG_WRAPPER_IRQDESC_H */ -- 2.34.1