Commit | Line | Data |
---|---|---|
c337ddc2 | 1 | /* |
886d51a3 MD |
2 | * lttng-statedump.c |
3 | * | |
c337ddc2 MD |
4 | * Linux Trace Toolkit Next Generation Kernel State Dump |
5 | * | |
6 | * Copyright 2005 Jean-Hugues Deschenes <jean-hugues.deschenes@polymtl.ca> | |
7 | * Copyright 2006-2012 Mathieu Desnoyers <mathieu.desnoyers@efficios.com> | |
8 | * | |
886d51a3 MD |
9 | * This library is free software; you can redistribute it and/or |
10 | * modify it under the terms of the GNU Lesser General Public | |
11 | * License as published by the Free Software Foundation; only | |
12 | * version 2.1 of the License. | |
13 | * | |
14 | * This library is distributed in the hope that it will be useful, | |
15 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
16 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
17 | * Lesser General Public License for more details. | |
18 | * | |
19 | * You should have received a copy of the GNU Lesser General Public | |
20 | * License along with this library; if not, write to the Free Software | |
21 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |
22 | * | |
c337ddc2 MD |
23 | * Changes: |
24 | * Eric Clement: Add listing of network IP interface | |
25 | * 2006, 2007 Mathieu Desnoyers Fix kernel threads | |
26 | * Various updates | |
c337ddc2 MD |
27 | */ |
28 | ||
29 | #include <linux/init.h> | |
30 | #include <linux/module.h> | |
31 | #include <linux/netlink.h> | |
32 | #include <linux/inet.h> | |
33 | #include <linux/ip.h> | |
34 | #include <linux/kthread.h> | |
35 | #include <linux/proc_fs.h> | |
36 | #include <linux/file.h> | |
37 | #include <linux/interrupt.h> | |
38 | #include <linux/irqnr.h> | |
39 | #include <linux/cpu.h> | |
40 | #include <linux/netdevice.h> | |
41 | #include <linux/inetdevice.h> | |
42 | #include <linux/sched.h> | |
43 | #include <linux/mm.h> | |
44 | #include <linux/fdtable.h> | |
45 | #include <linux/swap.h> | |
46 | #include <linux/wait.h> | |
47 | #include <linux/mutex.h> | |
f0dbdefb | 48 | #include <linux/device.h> |
c337ddc2 MD |
49 | |
50 | #include "lttng-events.h" | |
13ab8b0a | 51 | #include "lttng-tracer.h" |
c337ddc2 | 52 | #include "wrapper/irqdesc.h" |
3a523f5b | 53 | #include "wrapper/spinlock.h" |
361c023a | 54 | #include "wrapper/fdtable.h" |
3247f8bd | 55 | #include "wrapper/nsproxy.h" |
29784493 | 56 | #include "wrapper/irq.h" |
dd8d5afb | 57 | #include "wrapper/tracepoint.h" |
f0dbdefb | 58 | #include "wrapper/genhd.h" |
c337ddc2 | 59 | |
29784493 | 60 | #ifdef CONFIG_LTTNG_HAS_LIST_IRQ |
c337ddc2 MD |
61 | #include <linux/irq.h> |
62 | #endif | |
63 | ||
64 | /* Define the tracepoints, but do not build the probes */ | |
65 | #define CREATE_TRACE_POINTS | |
66 | #define TRACE_INCLUDE_PATH ../instrumentation/events/lttng-module | |
67 | #define TRACE_INCLUDE_FILE lttng-statedump | |
68 | #include "instrumentation/events/lttng-module/lttng-statedump.h" | |
69 | ||
f0dbdefb | 70 | DEFINE_TRACE(lttng_statedump_block_device); |
20591cf7 MD |
71 | DEFINE_TRACE(lttng_statedump_end); |
72 | DEFINE_TRACE(lttng_statedump_interrupt); | |
73 | DEFINE_TRACE(lttng_statedump_file_descriptor); | |
74 | DEFINE_TRACE(lttng_statedump_start); | |
75 | DEFINE_TRACE(lttng_statedump_process_state); | |
76 | DEFINE_TRACE(lttng_statedump_network_interface); | |
77 | ||
361c023a MD |
78 | struct lttng_fd_ctx { |
79 | char *page; | |
80 | struct lttng_session *session; | |
81 | struct task_struct *p; | |
82 | }; | |
83 | ||
c337ddc2 MD |
84 | /* |
85 | * Protected by the trace lock. | |
86 | */ | |
87 | static struct delayed_work cpu_work[NR_CPUS]; | |
88 | static DECLARE_WAIT_QUEUE_HEAD(statedump_wq); | |
89 | static atomic_t kernel_threads_to_run; | |
90 | ||
91 | enum lttng_thread_type { | |
92 | LTTNG_USER_THREAD = 0, | |
93 | LTTNG_KERNEL_THREAD = 1, | |
94 | }; | |
95 | ||
96 | enum lttng_execution_mode { | |
97 | LTTNG_USER_MODE = 0, | |
98 | LTTNG_SYSCALL = 1, | |
99 | LTTNG_TRAP = 2, | |
100 | LTTNG_IRQ = 3, | |
101 | LTTNG_SOFTIRQ = 4, | |
102 | LTTNG_MODE_UNKNOWN = 5, | |
103 | }; | |
104 | ||
105 | enum lttng_execution_submode { | |
106 | LTTNG_NONE = 0, | |
107 | LTTNG_UNKNOWN = 1, | |
108 | }; | |
109 | ||
110 | enum lttng_process_status { | |
111 | LTTNG_UNNAMED = 0, | |
112 | LTTNG_WAIT_FORK = 1, | |
113 | LTTNG_WAIT_CPU = 2, | |
114 | LTTNG_EXIT = 3, | |
115 | LTTNG_ZOMBIE = 4, | |
116 | LTTNG_WAIT = 5, | |
117 | LTTNG_RUN = 6, | |
118 | LTTNG_DEAD = 7, | |
119 | }; | |
120 | ||
f0dbdefb HD |
121 | static |
122 | int lttng_enumerate_block_devices(struct lttng_session *session) | |
123 | { | |
124 | struct class *ptr_block_class; | |
125 | struct device_type *ptr_disk_type; | |
126 | struct class_dev_iter iter; | |
127 | struct device *dev; | |
128 | ||
129 | ptr_block_class = wrapper_get_block_class(); | |
130 | if (!ptr_block_class) | |
131 | return -ENOSYS; | |
132 | ptr_disk_type = wrapper_get_disk_type(); | |
133 | if (!ptr_disk_type) { | |
134 | return -ENOSYS; | |
135 | } | |
136 | class_dev_iter_init(&iter, ptr_block_class, NULL, ptr_disk_type); | |
137 | while ((dev = class_dev_iter_next(&iter))) { | |
138 | struct disk_part_iter piter; | |
139 | struct gendisk *disk = dev_to_disk(dev); | |
140 | struct hd_struct *part; | |
141 | ||
142 | disk_part_iter_init(&piter, disk, DISK_PITER_INCL_PART0); | |
143 | while ((part = disk_part_iter_next(&piter))) { | |
144 | char name_buf[BDEVNAME_SIZE]; | |
145 | char *p; | |
146 | ||
147 | p = wrapper_disk_name(disk, part->partno, name_buf); | |
148 | if (!p) { | |
149 | disk_part_iter_exit(&piter); | |
150 | class_dev_iter_exit(&iter); | |
151 | return -ENOSYS; | |
152 | } | |
153 | trace_lttng_statedump_block_device(session, | |
154 | part_devt(part), name_buf); | |
155 | } | |
156 | disk_part_iter_exit(&piter); | |
157 | } | |
158 | class_dev_iter_exit(&iter); | |
159 | return 0; | |
160 | } | |
161 | ||
c337ddc2 | 162 | #ifdef CONFIG_INET |
f0dbdefb | 163 | |
c337ddc2 MD |
164 | static |
165 | void lttng_enumerate_device(struct lttng_session *session, | |
166 | struct net_device *dev) | |
167 | { | |
168 | struct in_device *in_dev; | |
169 | struct in_ifaddr *ifa; | |
170 | ||
171 | if (dev->flags & IFF_UP) { | |
172 | in_dev = in_dev_get(dev); | |
173 | if (in_dev) { | |
174 | for (ifa = in_dev->ifa_list; ifa != NULL; | |
175 | ifa = ifa->ifa_next) { | |
176 | trace_lttng_statedump_network_interface( | |
177 | session, dev, ifa); | |
178 | } | |
179 | in_dev_put(in_dev); | |
180 | } | |
181 | } else { | |
182 | trace_lttng_statedump_network_interface( | |
183 | session, dev, NULL); | |
184 | } | |
185 | } | |
186 | ||
187 | static | |
188 | int lttng_enumerate_network_ip_interface(struct lttng_session *session) | |
189 | { | |
190 | struct net_device *dev; | |
191 | ||
192 | read_lock(&dev_base_lock); | |
193 | for_each_netdev(&init_net, dev) | |
194 | lttng_enumerate_device(session, dev); | |
195 | read_unlock(&dev_base_lock); | |
196 | ||
197 | return 0; | |
198 | } | |
199 | #else /* CONFIG_INET */ | |
200 | static inline | |
201 | int lttng_enumerate_network_ip_interface(struct lttng_session *session) | |
202 | { | |
203 | return 0; | |
204 | } | |
205 | #endif /* CONFIG_INET */ | |
206 | ||
361c023a MD |
207 | static |
208 | int lttng_dump_one_fd(const void *p, struct file *file, unsigned int fd) | |
209 | { | |
210 | const struct lttng_fd_ctx *ctx = p; | |
211 | const char *s = d_path(&file->f_path, ctx->page, PAGE_SIZE); | |
212 | ||
213 | if (IS_ERR(s)) { | |
214 | struct dentry *dentry = file->f_path.dentry; | |
215 | ||
216 | /* Make sure we give at least some info */ | |
217 | spin_lock(&dentry->d_lock); | |
218 | trace_lttng_statedump_file_descriptor(ctx->session, ctx->p, fd, | |
219 | dentry->d_name.name); | |
220 | spin_unlock(&dentry->d_lock); | |
221 | goto end; | |
222 | } | |
223 | trace_lttng_statedump_file_descriptor(ctx->session, ctx->p, fd, s); | |
224 | end: | |
225 | return 0; | |
226 | } | |
c337ddc2 MD |
227 | |
228 | static | |
229 | void lttng_enumerate_task_fd(struct lttng_session *session, | |
230 | struct task_struct *p, char *tmp) | |
231 | { | |
361c023a | 232 | struct lttng_fd_ctx ctx = { .page = tmp, .session = session, .p = p }; |
c337ddc2 MD |
233 | |
234 | task_lock(p); | |
361c023a | 235 | lttng_iterate_fd(p->files, 0, lttng_dump_one_fd, &ctx); |
c337ddc2 MD |
236 | task_unlock(p); |
237 | } | |
238 | ||
239 | static | |
240 | int lttng_enumerate_file_descriptors(struct lttng_session *session) | |
241 | { | |
242 | struct task_struct *p; | |
243 | char *tmp = (char *) __get_free_page(GFP_KERNEL); | |
244 | ||
245 | /* Enumerate active file descriptors */ | |
246 | rcu_read_lock(); | |
247 | for_each_process(p) | |
248 | lttng_enumerate_task_fd(session, p, tmp); | |
249 | rcu_read_unlock(); | |
250 | free_page((unsigned long) tmp); | |
251 | return 0; | |
252 | } | |
253 | ||
0658bdda MD |
254 | #if 0 |
255 | /* | |
256 | * FIXME: we cannot take a mmap_sem while in a RCU read-side critical section | |
257 | * (scheduling in atomic). Normally, the tasklist lock protects this kind of | |
258 | * iteration, but it is not exported to modules. | |
259 | */ | |
c337ddc2 MD |
260 | static |
261 | void lttng_enumerate_task_vm_maps(struct lttng_session *session, | |
262 | struct task_struct *p) | |
263 | { | |
264 | struct mm_struct *mm; | |
265 | struct vm_area_struct *map; | |
266 | unsigned long ino; | |
267 | ||
268 | /* get_task_mm does a task_lock... */ | |
269 | mm = get_task_mm(p); | |
270 | if (!mm) | |
271 | return; | |
272 | ||
273 | map = mm->mmap; | |
274 | if (map) { | |
275 | down_read(&mm->mmap_sem); | |
276 | while (map) { | |
277 | if (map->vm_file) | |
278 | ino = map->vm_file->f_dentry->d_inode->i_ino; | |
279 | else | |
280 | ino = 0; | |
281 | trace_lttng_statedump_vm_map(session, p, map, ino); | |
282 | map = map->vm_next; | |
283 | } | |
284 | up_read(&mm->mmap_sem); | |
285 | } | |
286 | mmput(mm); | |
287 | } | |
288 | ||
289 | static | |
290 | int lttng_enumerate_vm_maps(struct lttng_session *session) | |
291 | { | |
292 | struct task_struct *p; | |
293 | ||
294 | rcu_read_lock(); | |
295 | for_each_process(p) | |
296 | lttng_enumerate_task_vm_maps(session, p); | |
297 | rcu_read_unlock(); | |
298 | return 0; | |
299 | } | |
0658bdda | 300 | #endif |
c337ddc2 | 301 | |
29784493 | 302 | #ifdef CONFIG_LTTNG_HAS_LIST_IRQ |
47faec4b JN |
303 | |
304 | #if (LINUX_VERSION_CODE < KERNEL_VERSION(2,6,39)) | |
305 | #define irq_desc_get_chip(desc) get_irq_desc_chip(desc) | |
306 | #endif | |
307 | ||
c337ddc2 MD |
308 | static |
309 | void lttng_list_interrupts(struct lttng_session *session) | |
310 | { | |
311 | unsigned int irq; | |
312 | unsigned long flags = 0; | |
313 | struct irq_desc *desc; | |
314 | ||
315 | #define irq_to_desc wrapper_irq_to_desc | |
316 | /* needs irq_desc */ | |
317 | for_each_irq_desc(irq, desc) { | |
318 | struct irqaction *action; | |
319 | const char *irq_chip_name = | |
320 | irq_desc_get_chip(desc)->name ? : "unnamed_irq_chip"; | |
321 | ||
322 | local_irq_save(flags); | |
3a523f5b | 323 | wrapper_desc_spin_lock(&desc->lock); |
c337ddc2 MD |
324 | for (action = desc->action; action; action = action->next) { |
325 | trace_lttng_statedump_interrupt(session, | |
326 | irq, irq_chip_name, action); | |
327 | } | |
3a523f5b | 328 | wrapper_desc_spin_unlock(&desc->lock); |
c337ddc2 MD |
329 | local_irq_restore(flags); |
330 | } | |
331 | #undef irq_to_desc | |
332 | } | |
333 | #else | |
334 | static inline | |
2e7a0709 | 335 | void lttng_list_interrupts(struct lttng_session *session) |
c337ddc2 MD |
336 | { |
337 | } | |
338 | #endif | |
339 | ||
73e8ba37 JD |
340 | static |
341 | void lttng_statedump_process_ns(struct lttng_session *session, | |
342 | struct task_struct *p, | |
343 | enum lttng_thread_type type, | |
344 | enum lttng_execution_mode mode, | |
345 | enum lttng_execution_submode submode, | |
346 | enum lttng_process_status status) | |
347 | { | |
348 | struct nsproxy *proxy; | |
349 | struct pid_namespace *pid_ns; | |
350 | ||
351 | rcu_read_lock(); | |
352 | proxy = task_nsproxy(p); | |
353 | if (proxy) { | |
3247f8bd | 354 | pid_ns = lttng_get_proxy_pid_ns(proxy); |
73e8ba37 JD |
355 | do { |
356 | trace_lttng_statedump_process_state(session, | |
357 | p, type, mode, submode, status, pid_ns); | |
af73f727 | 358 | pid_ns = pid_ns->parent; |
73e8ba37 JD |
359 | } while (pid_ns); |
360 | } else { | |
361 | trace_lttng_statedump_process_state(session, | |
362 | p, type, mode, submode, status, NULL); | |
363 | } | |
364 | rcu_read_unlock(); | |
365 | } | |
366 | ||
c337ddc2 MD |
367 | static |
368 | int lttng_enumerate_process_states(struct lttng_session *session) | |
369 | { | |
370 | struct task_struct *g, *p; | |
371 | ||
372 | rcu_read_lock(); | |
373 | for_each_process(g) { | |
374 | p = g; | |
375 | do { | |
376 | enum lttng_execution_mode mode = | |
377 | LTTNG_MODE_UNKNOWN; | |
378 | enum lttng_execution_submode submode = | |
379 | LTTNG_UNKNOWN; | |
380 | enum lttng_process_status status; | |
381 | enum lttng_thread_type type; | |
382 | ||
383 | task_lock(p); | |
384 | if (p->exit_state == EXIT_ZOMBIE) | |
385 | status = LTTNG_ZOMBIE; | |
386 | else if (p->exit_state == EXIT_DEAD) | |
387 | status = LTTNG_DEAD; | |
388 | else if (p->state == TASK_RUNNING) { | |
389 | /* Is this a forked child that has not run yet? */ | |
390 | if (list_empty(&p->rt.run_list)) | |
391 | status = LTTNG_WAIT_FORK; | |
392 | else | |
393 | /* | |
394 | * All tasks are considered as wait_cpu; | |
395 | * the viewer will sort out if the task | |
396 | * was really running at this time. | |
397 | */ | |
398 | status = LTTNG_WAIT_CPU; | |
399 | } else if (p->state & | |
400 | (TASK_INTERRUPTIBLE | TASK_UNINTERRUPTIBLE)) { | |
401 | /* Task is waiting for something to complete */ | |
402 | status = LTTNG_WAIT; | |
403 | } else | |
404 | status = LTTNG_UNNAMED; | |
405 | submode = LTTNG_NONE; | |
406 | ||
407 | /* | |
408 | * Verification of t->mm is to filter out kernel | |
409 | * threads; Viewer will further filter out if a | |
410 | * user-space thread was in syscall mode or not. | |
411 | */ | |
412 | if (p->mm) | |
413 | type = LTTNG_USER_THREAD; | |
414 | else | |
415 | type = LTTNG_KERNEL_THREAD; | |
73e8ba37 | 416 | lttng_statedump_process_ns(session, |
c337ddc2 MD |
417 | p, type, mode, submode, status); |
418 | task_unlock(p); | |
419 | } while_each_thread(g, p); | |
420 | } | |
421 | rcu_read_unlock(); | |
422 | ||
423 | return 0; | |
424 | } | |
425 | ||
426 | static | |
427 | void lttng_statedump_work_func(struct work_struct *work) | |
428 | { | |
429 | if (atomic_dec_and_test(&kernel_threads_to_run)) | |
430 | /* If we are the last thread, wake up do_lttng_statedump */ | |
431 | wake_up(&statedump_wq); | |
432 | } | |
433 | ||
434 | static | |
435 | int do_lttng_statedump(struct lttng_session *session) | |
436 | { | |
437 | int cpu; | |
438 | ||
c337ddc2 MD |
439 | trace_lttng_statedump_start(session); |
440 | lttng_enumerate_process_states(session); | |
441 | lttng_enumerate_file_descriptors(session); | |
0658bdda | 442 | /* FIXME lttng_enumerate_vm_maps(session); */ |
c337ddc2 MD |
443 | lttng_list_interrupts(session); |
444 | lttng_enumerate_network_ip_interface(session); | |
f0dbdefb | 445 | lttng_enumerate_block_devices(session); |
c337ddc2 MD |
446 | |
447 | /* TODO lttng_dump_idt_table(session); */ | |
448 | /* TODO lttng_dump_softirq_vec(session); */ | |
449 | /* TODO lttng_list_modules(session); */ | |
450 | /* TODO lttng_dump_swap_files(session); */ | |
451 | ||
452 | /* | |
453 | * Fire off a work queue on each CPU. Their sole purpose in life | |
454 | * is to guarantee that each CPU has been in a state where is was in | |
455 | * syscall mode (i.e. not in a trap, an IRQ or a soft IRQ). | |
456 | */ | |
457 | get_online_cpus(); | |
458 | atomic_set(&kernel_threads_to_run, num_online_cpus()); | |
459 | for_each_online_cpu(cpu) { | |
460 | INIT_DELAYED_WORK(&cpu_work[cpu], lttng_statedump_work_func); | |
461 | schedule_delayed_work_on(cpu, &cpu_work[cpu], 0); | |
462 | } | |
463 | /* Wait for all threads to run */ | |
7a7128e0 | 464 | __wait_event(statedump_wq, (atomic_read(&kernel_threads_to_run) == 0)); |
c337ddc2 MD |
465 | put_online_cpus(); |
466 | /* Our work is done */ | |
c337ddc2 MD |
467 | trace_lttng_statedump_end(session); |
468 | return 0; | |
469 | } | |
470 | ||
471 | /* | |
472 | * Called with session mutex held. | |
473 | */ | |
474 | int lttng_statedump_start(struct lttng_session *session) | |
475 | { | |
c337ddc2 MD |
476 | return do_lttng_statedump(session); |
477 | } | |
478 | EXPORT_SYMBOL_GPL(lttng_statedump_start); | |
479 | ||
dd8d5afb MD |
480 | static |
481 | int __init lttng_statedump_init(void) | |
482 | { | |
d16aa9c9 MD |
483 | /* |
484 | * Allow module to load even if the fixup cannot be done. This | |
485 | * will allow seemless transition when the underlying issue fix | |
486 | * is merged into the Linux kernel, and when tracepoint.c | |
487 | * "tracepoint_module_notify" is turned into a static function. | |
488 | */ | |
489 | (void) wrapper_lttng_fixup_sig(THIS_MODULE); | |
490 | return 0; | |
dd8d5afb MD |
491 | } |
492 | ||
493 | module_init(lttng_statedump_init); | |
494 | ||
461277e7 MD |
495 | static |
496 | void __exit lttng_statedump_exit(void) | |
497 | { | |
498 | } | |
499 | ||
500 | module_exit(lttng_statedump_exit); | |
501 | ||
c337ddc2 MD |
502 | MODULE_LICENSE("GPL and additional rights"); |
503 | MODULE_AUTHOR("Jean-Hugues Deschenes"); | |
504 | MODULE_DESCRIPTION("Linux Trace Toolkit Next Generation Statedump"); | |
13ab8b0a MD |
505 | MODULE_VERSION(__stringify(LTTNG_MODULES_MAJOR_VERSION) "." |
506 | __stringify(LTTNG_MODULES_MINOR_VERSION) "." | |
507 | __stringify(LTTNG_MODULES_PATCHLEVEL_VERSION) | |
508 | LTTNG_MODULES_EXTRAVERSION); |