Commit | Line | Data |
---|---|---|
1c8284eb MD |
1 | /* |
2 | * Linux Trace Toolkit Kernel State Dump | |
3 | * | |
4 | * Copyright 2005 - | |
5 | * Jean-Hugues Deschenes <jean-hugues.deschenes@polymtl.ca> | |
6 | * | |
7 | * Changes: | |
8 | * Eric Clement: Add listing of network IP interface | |
9 | * 2006, 2007 Mathieu Desnoyers Fix kernel threads | |
10 | * Various updates | |
11 | * | |
12 | * Dual LGPL v2.1/GPL v2 license. | |
13 | */ | |
14 | ||
15 | #include <linux/init.h> | |
16 | #include <linux/module.h> | |
17 | #include <linux/netlink.h> | |
18 | #include <linux/inet.h> | |
19 | #include <linux/ip.h> | |
20 | #include <linux/kthread.h> | |
21 | #include <linux/proc_fs.h> | |
22 | #include <linux/file.h> | |
23 | #include <linux/interrupt.h> | |
24 | #include <linux/irqnr.h> | |
25 | #include <linux/cpu.h> | |
26 | #include <linux/netdevice.h> | |
27 | #include <linux/inetdevice.h> | |
28 | #include <linux/sched.h> | |
29 | #include <linux/mm.h> | |
30 | #include <linux/marker.h> | |
31 | #include <linux/fdtable.h> | |
32 | #include <linux/swap.h> | |
33 | #include <linux/wait.h> | |
34 | #include <linux/mutex.h> | |
35 | ||
36 | #include "ltt-tracer.h" | |
37 | ||
38 | #ifdef CONFIG_GENERIC_HARDIRQS | |
39 | #include <linux/irq.h> | |
40 | #endif | |
41 | ||
42 | #define NB_PROC_CHUNK 20 | |
43 | ||
44 | /* | |
45 | * Protected by the trace lock. | |
46 | */ | |
47 | static struct delayed_work cpu_work[NR_CPUS]; | |
48 | static DECLARE_WAIT_QUEUE_HEAD(statedump_wq); | |
49 | static atomic_t kernel_threads_to_run; | |
50 | ||
51 | static void empty_cb(void *call_data) | |
52 | { | |
53 | } | |
54 | ||
55 | static DEFINE_MUTEX(statedump_cb_mutex); | |
56 | static void (*ltt_dump_kprobes_table_cb)(void *call_data) = empty_cb; | |
57 | ||
58 | enum lttng_thread_type { | |
59 | LTTNG_USER_THREAD = 0, | |
60 | LTTNG_KERNEL_THREAD = 1, | |
61 | }; | |
62 | ||
63 | enum lttng_execution_mode { | |
64 | LTTNG_USER_MODE = 0, | |
65 | LTTNG_SYSCALL = 1, | |
66 | LTTNG_TRAP = 2, | |
67 | LTTNG_IRQ = 3, | |
68 | LTTNG_SOFTIRQ = 4, | |
69 | LTTNG_MODE_UNKNOWN = 5, | |
70 | }; | |
71 | ||
72 | enum lttng_execution_submode { | |
73 | LTTNG_NONE = 0, | |
74 | LTTNG_UNKNOWN = 1, | |
75 | }; | |
76 | ||
77 | enum lttng_process_status { | |
78 | LTTNG_UNNAMED = 0, | |
79 | LTTNG_WAIT_FORK = 1, | |
80 | LTTNG_WAIT_CPU = 2, | |
81 | LTTNG_EXIT = 3, | |
82 | LTTNG_ZOMBIE = 4, | |
83 | LTTNG_WAIT = 5, | |
84 | LTTNG_RUN = 6, | |
85 | LTTNG_DEAD = 7, | |
86 | }; | |
87 | ||
88 | #ifdef CONFIG_INET | |
89 | static void ltt_enumerate_device(struct ltt_probe_private_data *call_data, | |
90 | struct net_device *dev) | |
91 | { | |
92 | struct in_device *in_dev; | |
93 | struct in_ifaddr *ifa; | |
94 | ||
95 | if (dev->flags & IFF_UP) { | |
96 | in_dev = in_dev_get(dev); | |
97 | if (in_dev) { | |
98 | for (ifa = in_dev->ifa_list; ifa != NULL; | |
99 | ifa = ifa->ifa_next) | |
100 | __trace_mark(0, netif_state, | |
101 | network_ipv4_interface, | |
102 | call_data, | |
103 | "name %s address #n4u%lu up %d", | |
104 | dev->name, | |
105 | (unsigned long)ifa->ifa_address, | |
106 | 0); | |
107 | in_dev_put(in_dev); | |
108 | } | |
109 | } else | |
110 | __trace_mark(0, netif_state, network_ip_interface, | |
111 | call_data, "name %s address #n4u%lu up %d", | |
112 | dev->name, 0UL, 0); | |
113 | } | |
114 | ||
115 | static inline int | |
116 | ltt_enumerate_network_ip_interface(struct ltt_probe_private_data *call_data) | |
117 | { | |
118 | struct net_device *dev; | |
119 | ||
120 | read_lock(&dev_base_lock); | |
121 | for_each_netdev(&init_net, dev) | |
122 | ltt_enumerate_device(call_data, dev); | |
123 | read_unlock(&dev_base_lock); | |
124 | ||
125 | return 0; | |
126 | } | |
127 | #else /* CONFIG_INET */ | |
128 | static inline int | |
129 | ltt_enumerate_network_ip_interface(struct ltt_probe_private_data *call_data) | |
130 | { | |
131 | return 0; | |
132 | } | |
133 | #endif /* CONFIG_INET */ | |
134 | ||
135 | ||
136 | static inline void | |
137 | ltt_enumerate_task_fd(struct ltt_probe_private_data *call_data, | |
138 | struct task_struct *t, char *tmp) | |
139 | { | |
140 | struct fdtable *fdt; | |
141 | struct file *filp; | |
142 | unsigned int i; | |
143 | const unsigned char *path; | |
144 | ||
145 | if (!t->files) | |
146 | return; | |
147 | ||
148 | spin_lock(&t->files->file_lock); | |
149 | fdt = files_fdtable(t->files); | |
150 | for (i = 0; i < fdt->max_fds; i++) { | |
151 | filp = fcheck_files(t->files, i); | |
152 | if (!filp) | |
153 | continue; | |
154 | path = d_path(&filp->f_path, tmp, PAGE_SIZE); | |
155 | /* Make sure we give at least some info */ | |
156 | __trace_mark(0, fd_state, file_descriptor, call_data, | |
157 | "filename %s pid %d fd %u", | |
158 | (IS_ERR(path))?(filp->f_dentry->d_name.name):(path), | |
159 | t->pid, i); | |
160 | } | |
161 | spin_unlock(&t->files->file_lock); | |
162 | } | |
163 | ||
164 | static inline int | |
165 | ltt_enumerate_file_descriptors(struct ltt_probe_private_data *call_data) | |
166 | { | |
167 | struct task_struct *t = &init_task; | |
168 | char *tmp = (char *)__get_free_page(GFP_KERNEL); | |
169 | ||
170 | /* Enumerate active file descriptors */ | |
171 | do { | |
172 | read_lock(&tasklist_lock); | |
173 | if (t != &init_task) | |
174 | atomic_dec(&t->usage); | |
175 | t = next_task(t); | |
176 | atomic_inc(&t->usage); | |
177 | read_unlock(&tasklist_lock); | |
178 | task_lock(t); | |
179 | ltt_enumerate_task_fd(call_data, t, tmp); | |
180 | task_unlock(t); | |
181 | } while (t != &init_task); | |
182 | free_page((unsigned long)tmp); | |
183 | return 0; | |
184 | } | |
185 | ||
186 | static inline void | |
187 | ltt_enumerate_task_vm_maps(struct ltt_probe_private_data *call_data, | |
188 | struct task_struct *t) | |
189 | { | |
190 | struct mm_struct *mm; | |
191 | struct vm_area_struct *map; | |
192 | unsigned long ino; | |
193 | ||
194 | /* get_task_mm does a task_lock... */ | |
195 | mm = get_task_mm(t); | |
196 | if (!mm) | |
197 | return; | |
198 | ||
199 | map = mm->mmap; | |
200 | if (map) { | |
201 | down_read(&mm->mmap_sem); | |
202 | while (map) { | |
203 | if (map->vm_file) | |
204 | ino = map->vm_file->f_dentry->d_inode->i_ino; | |
205 | else | |
206 | ino = 0; | |
207 | __trace_mark(0, vm_state, vm_map, call_data, | |
208 | "pid %d start %lu end %lu flags %lu " | |
209 | "pgoff %lu inode %lu", | |
210 | t->pid, map->vm_start, map->vm_end, | |
211 | map->vm_flags, map->vm_pgoff << PAGE_SHIFT, | |
212 | ino); | |
213 | map = map->vm_next; | |
214 | } | |
215 | up_read(&mm->mmap_sem); | |
216 | } | |
217 | mmput(mm); | |
218 | } | |
219 | ||
220 | static inline int | |
221 | ltt_enumerate_vm_maps(struct ltt_probe_private_data *call_data) | |
222 | { | |
223 | struct task_struct *t = &init_task; | |
224 | ||
225 | do { | |
226 | read_lock(&tasklist_lock); | |
227 | if (t != &init_task) | |
228 | atomic_dec(&t->usage); | |
229 | t = next_task(t); | |
230 | atomic_inc(&t->usage); | |
231 | read_unlock(&tasklist_lock); | |
232 | ltt_enumerate_task_vm_maps(call_data, t); | |
233 | } while (t != &init_task); | |
234 | return 0; | |
235 | } | |
236 | ||
237 | #ifdef CONFIG_GENERIC_HARDIRQS | |
238 | static inline void list_interrupts(struct ltt_probe_private_data *call_data) | |
239 | { | |
240 | unsigned int irq; | |
241 | unsigned long flags = 0; | |
242 | struct irq_desc *desc; | |
243 | ||
244 | /* needs irq_desc */ | |
245 | for_each_irq_desc(irq, desc) { | |
246 | struct irqaction *action; | |
247 | const char *irq_chip_name = | |
248 | desc->chip->name ? : "unnamed_irq_chip"; | |
249 | ||
250 | local_irq_save(flags); | |
251 | raw_spin_lock(&desc->lock); | |
252 | for (action = desc->action; action; action = action->next) | |
253 | __trace_mark(0, irq_state, interrupt, call_data, | |
254 | "name %s action %s irq_id %u", | |
255 | irq_chip_name, action->name, irq); | |
256 | raw_spin_unlock(&desc->lock); | |
257 | local_irq_restore(flags); | |
258 | } | |
259 | } | |
260 | #else | |
261 | static inline void list_interrupts(struct ltt_probe_private_data *call_data) | |
262 | { | |
263 | } | |
264 | #endif | |
265 | ||
266 | static inline int | |
267 | ltt_enumerate_process_states(struct ltt_probe_private_data *call_data) | |
268 | { | |
269 | struct task_struct *t = &init_task; | |
270 | struct task_struct *p = t; | |
271 | enum lttng_process_status status; | |
272 | enum lttng_thread_type type; | |
273 | enum lttng_execution_mode mode; | |
274 | enum lttng_execution_submode submode; | |
275 | ||
276 | do { | |
277 | mode = LTTNG_MODE_UNKNOWN; | |
278 | submode = LTTNG_UNKNOWN; | |
279 | ||
280 | read_lock(&tasklist_lock); | |
281 | if (t != &init_task) { | |
282 | atomic_dec(&t->usage); | |
283 | t = next_thread(t); | |
284 | } | |
285 | if (t == p) { | |
286 | p = next_task(t); | |
287 | t = p; | |
288 | } | |
289 | atomic_inc(&t->usage); | |
290 | read_unlock(&tasklist_lock); | |
291 | ||
292 | task_lock(t); | |
293 | ||
294 | if (t->exit_state == EXIT_ZOMBIE) | |
295 | status = LTTNG_ZOMBIE; | |
296 | else if (t->exit_state == EXIT_DEAD) | |
297 | status = LTTNG_DEAD; | |
298 | else if (t->state == TASK_RUNNING) { | |
299 | /* Is this a forked child that has not run yet? */ | |
300 | if (list_empty(&t->rt.run_list)) | |
301 | status = LTTNG_WAIT_FORK; | |
302 | else | |
303 | /* | |
304 | * All tasks are considered as wait_cpu; | |
305 | * the viewer will sort out if the task was | |
306 | * really running at this time. | |
307 | */ | |
308 | status = LTTNG_WAIT_CPU; | |
309 | } else if (t->state & | |
310 | (TASK_INTERRUPTIBLE | TASK_UNINTERRUPTIBLE)) { | |
311 | /* Task is waiting for something to complete */ | |
312 | status = LTTNG_WAIT; | |
313 | } else | |
314 | status = LTTNG_UNNAMED; | |
315 | submode = LTTNG_NONE; | |
316 | ||
317 | /* | |
318 | * Verification of t->mm is to filter out kernel threads; | |
319 | * Viewer will further filter out if a user-space thread was | |
320 | * in syscall mode or not. | |
321 | */ | |
322 | if (t->mm) | |
323 | type = LTTNG_USER_THREAD; | |
324 | else | |
325 | type = LTTNG_KERNEL_THREAD; | |
326 | ||
327 | __trace_mark(0, task_state, process_state, call_data, | |
328 | "pid %d parent_pid %d name %s type %d mode %d " | |
329 | "submode %d status %d tgid %d", | |
330 | t->pid, t->parent->pid, t->comm, | |
331 | type, mode, submode, status, t->tgid); | |
332 | task_unlock(t); | |
333 | } while (t != &init_task); | |
334 | ||
335 | return 0; | |
336 | } | |
337 | ||
338 | void ltt_statedump_register_kprobes_dump(void (*callback)(void *call_data)) | |
339 | { | |
340 | mutex_lock(&statedump_cb_mutex); | |
341 | ltt_dump_kprobes_table_cb = callback; | |
342 | mutex_unlock(&statedump_cb_mutex); | |
343 | } | |
344 | EXPORT_SYMBOL_GPL(ltt_statedump_register_kprobes_dump); | |
345 | ||
346 | void ltt_statedump_unregister_kprobes_dump(void (*callback)(void *call_data)) | |
347 | { | |
348 | mutex_lock(&statedump_cb_mutex); | |
349 | ltt_dump_kprobes_table_cb = empty_cb; | |
350 | mutex_unlock(&statedump_cb_mutex); | |
351 | } | |
352 | EXPORT_SYMBOL_GPL(ltt_statedump_unregister_kprobes_dump); | |
353 | ||
354 | void ltt_statedump_work_func(struct work_struct *work) | |
355 | { | |
356 | if (atomic_dec_and_test(&kernel_threads_to_run)) | |
357 | /* If we are the last thread, wake up do_ltt_statedump */ | |
358 | wake_up(&statedump_wq); | |
359 | } | |
360 | ||
361 | static int do_ltt_statedump(struct ltt_probe_private_data *call_data) | |
362 | { | |
363 | int cpu; | |
364 | struct module *cb_owner; | |
365 | ||
366 | printk(KERN_DEBUG "LTT state dump thread start\n"); | |
367 | ltt_enumerate_process_states(call_data); | |
368 | ltt_enumerate_file_descriptors(call_data); | |
369 | list_modules(call_data); | |
370 | ltt_enumerate_vm_maps(call_data); | |
371 | list_interrupts(call_data); | |
372 | ltt_enumerate_network_ip_interface(call_data); | |
373 | ltt_dump_swap_files(call_data); | |
374 | ltt_dump_sys_call_table(call_data); | |
375 | ltt_dump_softirq_vec(call_data); | |
376 | ltt_dump_idt_table(call_data); | |
377 | ||
378 | mutex_lock(&statedump_cb_mutex); | |
379 | ||
380 | cb_owner = __module_address((unsigned long)ltt_dump_kprobes_table_cb); | |
381 | __module_get(cb_owner); | |
382 | ltt_dump_kprobes_table_cb(call_data); | |
383 | module_put(cb_owner); | |
384 | ||
385 | mutex_unlock(&statedump_cb_mutex); | |
386 | ||
387 | /* | |
388 | * Fire off a work queue on each CPU. Their sole purpose in life | |
389 | * is to guarantee that each CPU has been in a state where is was in | |
390 | * syscall mode (i.e. not in a trap, an IRQ or a soft IRQ). | |
391 | */ | |
392 | get_online_cpus(); | |
393 | atomic_set(&kernel_threads_to_run, num_online_cpus()); | |
394 | for_each_online_cpu(cpu) { | |
395 | INIT_DELAYED_WORK(&cpu_work[cpu], ltt_statedump_work_func); | |
396 | schedule_delayed_work_on(cpu, &cpu_work[cpu], 0); | |
397 | } | |
398 | /* Wait for all threads to run */ | |
399 | __wait_event(statedump_wq, (atomic_read(&kernel_threads_to_run) != 0)); | |
400 | put_online_cpus(); | |
401 | /* Our work is done */ | |
402 | printk(KERN_DEBUG "LTT state dump end\n"); | |
403 | __trace_mark(0, global_state, statedump_end, | |
404 | call_data, MARK_NOARGS); | |
405 | return 0; | |
406 | } | |
407 | ||
408 | /* | |
409 | * Called with trace lock held. | |
410 | */ | |
411 | int ltt_statedump_start(struct ltt_trace *trace) | |
412 | { | |
413 | struct ltt_probe_private_data call_data; | |
414 | printk(KERN_DEBUG "LTT state dump begin\n"); | |
415 | ||
416 | call_data.trace = trace; | |
417 | call_data.serializer = NULL; | |
418 | return do_ltt_statedump(&call_data); | |
419 | } | |
420 | ||
421 | static int __init statedump_init(void) | |
422 | { | |
423 | int ret; | |
424 | printk(KERN_DEBUG "LTT : State dump init\n"); | |
425 | ret = ltt_module_register(LTT_FUNCTION_STATEDUMP, | |
426 | ltt_statedump_start, THIS_MODULE); | |
427 | return ret; | |
428 | } | |
429 | ||
430 | static void __exit statedump_exit(void) | |
431 | { | |
432 | printk(KERN_DEBUG "LTT : State dump exit\n"); | |
433 | ltt_module_unregister(LTT_FUNCTION_STATEDUMP); | |
434 | } | |
435 | ||
436 | module_init(statedump_init) | |
437 | module_exit(statedump_exit) | |
438 | ||
439 | MODULE_LICENSE("GPL and additional rights"); | |
440 | MODULE_AUTHOR("Jean-Hugues Deschenes"); | |
441 | MODULE_DESCRIPTION("Linux Trace Toolkit Statedump"); |