From 1c8284ebdbd119314b8f01e442e64cf5fd4b9fe6 Mon Sep 17 00:00:00 2001 From: Mathieu Desnoyers Date: Fri, 3 Sep 2010 08:08:18 -0400 Subject: [PATCH 1/1] LTTng modularization, import of lttng 0.226 Will match kernel tree "lttng 0.227". Signed-off-by: Mathieu Desnoyers --- Makefile | 42 ++ ltt-ascii.c | 586 ++++++++++++++ ltt-channels.c | 388 ++++++++++ ltt-core.c | 108 +++ ltt-event-header.c | 92 +++ ltt-filter.c | 55 ++ ltt-kprobes.c | 493 ++++++++++++ ltt-marker-control.c | 254 +++++++ ltt-relay-alloc.c | 732 ++++++++++++++++++ ltt-relay-lockless.c | 1366 +++++++++++++++++++++++++++++++++ ltt-relay-lockless.h | 549 ++++++++++++++ ltt-relay-splice.c | 159 ++++ ltt-relay-vfs.c | 244 ++++++ ltt-relay.h | 377 +++++++++ ltt-serialize.c | 969 ++++++++++++++++++++++++ ltt-statedump.c | 441 +++++++++++ ltt-trace-control.c | 1426 +++++++++++++++++++++++++++++++++++ ltt-tracer-core.h | 50 ++ ltt-tracer.c | 1293 +++++++++++++++++++++++++++++++ ltt-tracer.h | 663 ++++++++++++++++ ltt-type-serializer.c | 113 +++ ltt-type-serializer.h | 187 +++++ ltt-userspace-event.c | 122 +++ probes/Makefile | 47 ++ probes/block-trace.c | 309 ++++++++ probes/ext4-trace.c | 611 +++++++++++++++ probes/fs-trace.c | 158 ++++ probes/ipc-trace.c | 39 + probes/jbd2-trace.c | 208 +++++ probes/kernel-trace.c | 581 ++++++++++++++ probes/lockdep-trace.c | 60 ++ probes/mm-trace.c | 146 ++++ probes/net-extended-trace.c | 146 ++++ probes/net-trace.c | 406 ++++++++++ probes/pm-trace.c | 43 ++ probes/rcu-trace.c | 36 + probes/syscall-trace.c | 54 ++ probes/trap-trace.c | 56 ++ 38 files changed, 13609 insertions(+) create mode 100644 Makefile create mode 100644 ltt-ascii.c create mode 100644 ltt-channels.c create mode 100644 ltt-core.c create mode 100644 ltt-event-header.c create mode 100644 ltt-filter.c create mode 100644 ltt-kprobes.c create mode 100644 ltt-marker-control.c create mode 100644 ltt-relay-alloc.c create mode 100644 ltt-relay-lockless.c create mode 100644 ltt-relay-lockless.h create mode 100644 ltt-relay-splice.c create mode 100644 ltt-relay-vfs.c create mode 100644 ltt-relay.h create mode 100644 ltt-serialize.c create mode 100644 ltt-statedump.c create mode 100644 ltt-trace-control.c create mode 100644 ltt-tracer-core.h create mode 100644 ltt-tracer.c create mode 100644 ltt-tracer.h create mode 100644 ltt-type-serializer.c create mode 100644 ltt-type-serializer.h create mode 100644 ltt-userspace-event.c create mode 100644 probes/Makefile create mode 100644 probes/block-trace.c create mode 100644 probes/ext4-trace.c create mode 100644 probes/fs-trace.c create mode 100644 probes/ipc-trace.c create mode 100644 probes/jbd2-trace.c create mode 100644 probes/kernel-trace.c create mode 100644 probes/lockdep-trace.c create mode 100644 probes/mm-trace.c create mode 100644 probes/net-extended-trace.c create mode 100644 probes/net-trace.c create mode 100644 probes/pm-trace.c create mode 100644 probes/rcu-trace.c create mode 100644 probes/syscall-trace.c create mode 100644 probes/trap-trace.c diff --git a/Makefile b/Makefile new file mode 100644 index 00000000..b9f48ca9 --- /dev/null +++ b/Makefile @@ -0,0 +1,42 @@ +# +# Makefile for the LTT objects. +# + +ifneq ($(KERNELRELEASE),) +ifneq ($(CONFIG_MARKERS),) + +obj-m += ltt-core.o +obj-m += ltt-tracer.o +obj-m += ltt-marker-control.o + +obj-m += ltt-relay.o +ltt-relay-objs := ltt-relay-lockless.o ltt-relay-alloc.o ltt-relay-splice.o \ + ltt-relay-vfs.o ltt-event-header.o + +obj-m += ltt-serialize.o +obj-m += ltt-statedump.o +obj-m += ltt-type-serializer.o +obj-m += ltt-trace-control.o +obj-m += ltt-userspace-event.o +obj-m += ltt-filter.o +obj-m += ltt-kprobes.o +obj-m += probes/ +obj-m += ltt-ascii.o + +endif + +else + KERNELDIR ?= /lib/modules/$(shell uname -r)/build + PWD := $(shell pwd) + +default: + $(MAKE) -C $(KERNELDIR) M=$(PWD) modules + +modules_install: + $(MAKE) -C $(KERNELDIR) M=$(PWD) modules_install + /sbin/depmod -a + +clean: + $(MAKE) -C $(KERNELDIR) M=$(PWD) clean + +endif diff --git a/ltt-ascii.c b/ltt-ascii.c new file mode 100644 index 00000000..975f94ad --- /dev/null +++ b/ltt-ascii.c @@ -0,0 +1,586 @@ +/* + * LTT ascii binary buffer to ascii converter. + * + * Copyright 2008 - 2009 Lai Jiangshan (laijs@cn.fujitsu.com) + * Copyright 2009 - Mathieu Desnoyers mathieu.desnoyers@polymtl.ca + * + * Dual LGPL v2.1/GPL v2 license. + */ + +/* + * TODO + * + * Move to new switch behavior: Wait for data for the duration of the + * timer interval + safety, if none is coming, consider that no activity occured + * in the buffer. + * + * Fix case when having a text file open and destroying trace. + * + * - Automate periodical switch: + * + * The debugfs file "switch_timer" receives a timer period as parameter + * (e.g. echo 100 > switch_timer) to activate the timer per channel. This can + * also be accessed through the internal API _before the trace session starts_. + * This timer will insure that we periodically have subbuffers to read, and + * therefore that the merge-sort does not wait endlessly for a subbuffer. + * + * - If a channel is switched and read without data, make sure it is still + * considered afterward (not removed from the queue). + * + * - Create a ascii/tracename/ALL file to merge-sort all active channels. + * - Create a ascii/tracename/README file to contain the text output legend. + * - Remove leading zeroes from timestamps. + * - Enhance pretty-printing to make sure all types used for addesses output in + * the form 0xAB00000000 (not decimal). This is true for %p and 0x%...X. + * - Hotplug support + */ + + + + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "ltt-tracer.h" +#include "ltt-relay.h" +#include "ltt-relay-lockless.h" + +#if 0 +#define DEBUGP printk +#else +#define DEBUGP(fmt , a...) +#endif + +struct dentry *ltt_ascii_dir_dentry; +EXPORT_SYMBOL_GPL(ltt_ascii_dir_dentry); + +struct ltt_relay_iter; + +struct ltt_relay_cpu_iter { + /* cpu buffer information */ + struct ltt_chanbuf *buf; + struct ltt_relay_iter *iter; + int sb_ref; /* holding a reference to a subbuffer */ + long read_sb_offset; /* offset of the subbuffer read */ + + /* current event information */ + struct ltt_subbuffer_header *header; + long hdr_offset; /* event header offset */ + long payload_offset; /* event payload offset */ + u64 tsc; /* full 64-bits timestamp value */ + u32 data_size; + u16 chID; /* channel ID, const */ + u16 eID; +}; + +struct ltt_relay_iter { + struct ltt_relay_cpu_iter iter_cpu[NR_CPUS]; + struct ltt_chan *chan; + loff_t pos; + int cpu; + int nr_refs; +}; + +/* + * offset of 0 in subbuffer means "subbuf size" (filled subbuffer). + */ +static int is_subbuffer_offset_end(struct ltt_relay_cpu_iter *citer, + long offset) +{ + struct ltt_chan *chan = container_of(citer->buf->a.chan, + struct ltt_chan, a); + long sub_offset = SUBBUF_OFFSET(offset - 1, chan) + 1; + + return (sub_offset <= citer->header->data_size); +} + +static u64 calculate_tsc(u64 pre_tsc, u64 read_tsc, unsigned int rflags) +{ + u64 new_tsc = read_tsc; + + if (rflags != LTT_RFLAG_ID_SIZE_TSC) { + BUG_ON(read_tsc >> LTT_TSC_BITS); + + new_tsc = (pre_tsc & ~LTT_TSC_MASK) + read_tsc; + if (read_tsc < (pre_tsc & LTT_TSC_MASK)) + new_tsc += 1UL << LTT_TSC_BITS; + } + + return new_tsc; +} + +/* + * calculate payload offset */ +static inline long calculate_payload_offset(long offset, u16 chID, u16 eID) +{ + const char *fmt; + + if (!ltt_get_alignment()) + return offset; + + fmt = marker_get_fmt_from_id(chID, eID); + BUG_ON(!fmt); + + return offset + ltt_fmt_largest_align(offset, fmt); +} + +static void update_new_event(struct ltt_relay_cpu_iter *citer, long hdr_offset) +{ + u64 read_tsc; + unsigned int rflags; + long tmp_offset; + + WARN_ON_ONCE(hdr_offset != citer->hdr_offset); + + tmp_offset = ltt_read_event_header(&citer->buf->a, hdr_offset, + &read_tsc, &citer->data_size, + &citer->eID, &rflags); + citer->payload_offset = calculate_payload_offset(tmp_offset, + citer->chID, + citer->eID); + + citer->tsc = calculate_tsc(citer->tsc, read_tsc, rflags); +} + +static void update_event_size(struct ltt_relay_cpu_iter *citer, long hdr_offset) +{ + char output[1]; + const char *fmt; + size_t data_size; + + if (citer->data_size != INT_MAX) + return; + + fmt = marker_get_fmt_from_id(citer->chID, citer->eID); + BUG_ON(!fmt); + ltt_serialize_printf(citer->buf, citer->payload_offset, + &data_size, output, 0, fmt); + citer->data_size = data_size; +} + +static void update_cpu_iter(struct ltt_relay_cpu_iter *citer, long hdr_offset) +{ + if (unlikely((!citer->sb_ref) + || is_subbuffer_offset_end(citer, hdr_offset))) { + citer->header = NULL; + return; + } + update_new_event(citer, hdr_offset); + update_event_size(citer, hdr_offset); +} + +/* + * returns 0 if we get a subbuffer reference. + * else, the buffer has not available data, try again later. + */ +static int subbuffer_start(struct ltt_relay_cpu_iter *citer, long *offset) +{ + int ret; + struct ltt_relay_iter *iter = citer->iter; + + ret = ltt_chanbuf_get_subbuf(citer->buf, offset); + if (!ret) { + citer->header = ltt_relay_read_offset_address(&citer->buf->a, + *offset); + citer->hdr_offset = (*offset) + ltt_sb_header_size(); + citer->tsc = citer->header->cycle_count_begin; + iter->nr_refs++; + citer->sb_ref = 1; + return 0; + } else { + if (ltt_chanbuf_is_finalized(citer->buf)) + return -ENODATA; + else + return -EAGAIN; + } +} + +static void subbuffer_stop(struct ltt_relay_cpu_iter *citer, + long offset) +{ + int ret; + struct ltt_relay_iter *iter = citer->iter; + + WARN_ON_ONCE(!citer->sb_ref); + ret = ltt_chanbuf_put_subbuf(citer->buf, offset); + WARN_ON_ONCE(ret); + citer->sb_ref = 0; + iter->nr_refs--; +} + +static void ltt_relay_advance_cpu_iter(struct ltt_relay_cpu_iter *citer) +{ + long old_offset = citer->payload_offset; + long new_offset = citer->payload_offset; + int ret; + + /* find that whether we read all data in this subbuffer */ + if (unlikely(is_subbuffer_offset_end(citer, + old_offset + citer->data_size))) { + DEBUGP(KERN_DEBUG "LTT ASCII stop cpu %d offset %lX\n", + citer->buf->a.cpu, citer->read_sb_offset); + subbuffer_stop(citer, citer->read_sb_offset); + for (;;) { + ret = subbuffer_start(citer, &citer->read_sb_offset); + DEBUGP(KERN_DEBUG + "LTT ASCII start cpu %d ret %d offset %lX\n", + citer->buf->a.cpu, ret, citer->read_sb_offset); + if (!ret || ret == -ENODATA) { + break; /* got data, or finalized */ + } else { /* -EAGAIN */ + if (signal_pending(current)) + break; + schedule_timeout_interruptible(1); + //TODO: check for no-data delay. take ref. break + } + } + } else { + new_offset += citer->data_size; + citer->hdr_offset = new_offset + ltt_align(new_offset, sizeof(struct ltt_event_header)); + DEBUGP(KERN_DEBUG + "LTT ASCII old_offset %lX new_offset %lX cpu %d\n", + old_offset, new_offset, citer->buf->a.cpu); + } + + update_cpu_iter(citer, citer->hdr_offset); +} + +static int cpu_iter_eof(struct ltt_relay_cpu_iter *citer) +{ + return !citer->sb_ref; +} + +static int ltt_relay_iter_eof(struct ltt_relay_iter *iter) +{ + return iter->nr_refs == 0; +} + +static void ltt_relay_advance_iter(struct ltt_relay_iter *iter) +{ + int i; + struct ltt_relay_cpu_iter *curr, *min = NULL; + iter->cpu = -1; + + /* + * find the event with the minimum tsc. + * TODO: use min-heep for 4096CPUS + */ + for_each_possible_cpu(i) { + curr = &iter->iter_cpu[i]; + + if (!curr->buf->a.allocated || !curr->header) + continue; + + if (cpu_iter_eof(curr)) + continue; + + if (!min || curr->tsc < min->tsc) { + min = curr; + iter->cpu = i; + } + } + + /* update cpu_iter for next ltt_relay_advance_iter() */ + if (min) + ltt_relay_advance_cpu_iter(min); +} + +static void *ascii_next(struct seq_file *m, void *v, loff_t *ppos) +{ + struct ltt_relay_iter *iter = m->private; + + WARN_ON_ONCE(!iter->nr_refs); + BUG_ON(v != iter); + + ltt_relay_advance_iter(iter); + return (ltt_relay_iter_eof(iter) || signal_pending(current)) + ? NULL : iter; +} + +static void *ascii_start(struct seq_file *m, loff_t *ppos) +{ + struct ltt_relay_iter *iter = m->private; + + ltt_relay_advance_iter(iter); + return (ltt_relay_iter_eof(iter) || signal_pending(current)) + ? NULL : iter; +} + +static void ascii_stop(struct seq_file *m, void *v) +{ +} + +static +int seq_serialize(struct seq_file *m, struct ltt_chanbuf *buf, + size_t buf_offset, const char *fmt, size_t *data_size) +{ + int len; + + if (m->count < m->size) { + len = ltt_serialize_printf(buf, buf_offset, data_size, + m->buf + m->count, + m->size - m->count, fmt); + if (m->count + len < m->size) { + m->count += len; + return 0; + } + } + + m->count = m->size; + return -1; +} + +static int ascii_show(struct seq_file *m, void *v) +{ + struct ltt_relay_iter *iter = v; + struct ltt_relay_cpu_iter *citer; + const char *name; + const char *fmt; + unsigned long long tsc; + size_t data_size; + + if (iter->cpu == -1) + return 0; + + citer = &iter->iter_cpu[iter->cpu]; + WARN_ON_ONCE(!citer->sb_ref); + /* + * Nothing to show, we are at the end of the last subbuffer currently + * having data. + */ + if (!citer->header) + return 0; + + tsc = citer->tsc; + name = marker_get_name_from_id(citer->chID, citer->eID); + fmt = marker_get_fmt_from_id(citer->chID, citer->eID); + + if (!name || !fmt) + return 0; + + seq_printf(m, "event:%16.16s: cpu:%2d time:%20.20llu ", + name, iter->cpu, tsc); + seq_serialize(m, citer->buf, citer->payload_offset, fmt, &data_size); + seq_puts(m, "\n"); + if (citer->data_size == INT_MAX) + citer->data_size = data_size; + + return 0; +} + +static struct seq_operations ascii_seq_ops = { + .start = ascii_start, + .next = ascii_next, + .stop = ascii_stop, + .show = ascii_show, +}; + +/* FIXME : cpu hotplug support */ +static int ltt_relay_iter_open_channel(struct ltt_relay_iter *iter, + struct ltt_chan *chan) +{ + int i, ret; + u16 chID = ltt_channels_get_index_from_name(chan->a.filename); + + /* we don't need lock relay_channels_mutex */ + for_each_possible_cpu(i) { + struct ltt_relay_cpu_iter *citer = &iter->iter_cpu[i]; + + citer->buf = per_cpu_ptr(chan->a.buf, i); + if (!citer->buf->a.allocated) + continue; + + citer->iter = iter; /* easy lazy parent info */ + citer->chID = chID; + + ret = ltt_chanbuf_open_read(citer->buf); + if (ret) { + /* Failed to open a percpu buffer, close everything. */ + citer->buf = NULL; + goto error; + } + + for (;;) { + ret = subbuffer_start(citer, + &citer->read_sb_offset); + DEBUGP(KERN_DEBUG + "LTT ASCII open start " + "cpu %d ret %d offset %lX\n", + citer->buf->a.cpu, ret, citer->read_sb_offset); + if (!ret || ret == -ENODATA) { + break; /* got data, or finalized */ + } else { /* -EAGAIN */ + if (signal_pending(current)) + break; + schedule_timeout_interruptible(1); + } + } + update_cpu_iter(citer, citer->hdr_offset); + } + if (!iter->nr_refs) { + ret = -ENODATA; + goto error; + } + + return 0; + +error: + for_each_possible_cpu(i) { + struct ltt_relay_cpu_iter *citer = &iter->iter_cpu[i]; + + if (!citer->buf) + break; + + if (citer->buf->a.allocated) + ltt_chanbuf_release_read(citer->buf); + } + return ret; +} + +/* FIXME : cpu hotplug support */ +static int ltt_relay_iter_release_channel(struct ltt_relay_iter *iter) +{ + int i; + + for_each_possible_cpu(i) { + struct ltt_relay_cpu_iter *citer = &iter->iter_cpu[i]; + + if (citer->sb_ref) { + WARN_ON_ONCE(!citer->buf->a.allocated); + DEBUGP(KERN_DEBUG + "LTT ASCII release stop cpu %d offset %lX\n", + citer->buf->a.cpu, citer->read_sb_offset); + subbuffer_stop(&iter->iter_cpu[i], + citer->read_sb_offset); + } + if (citer->buf->a.allocated) + ltt_chanbuf_release_read(citer->buf); + } + WARN_ON_ONCE(iter->nr_refs); + return 0; +} + +static int ltt_relay_ascii_open(struct inode *inode, struct file *file) +{ + int ret; + struct ltt_chan *chan = inode->i_private; + struct ltt_relay_iter *iter = kzalloc(sizeof(*iter), GFP_KERNEL); + if (!iter) + return -ENOMEM; + + iter->chan = chan; + ret = ltt_relay_iter_open_channel(iter, chan); + if (ret) + goto error_free_alloc; + + ret = seq_open(file, &ascii_seq_ops); + if (ret) + goto error_release_channel; + ((struct seq_file *)file->private_data)->private = iter; + return 0; + +error_release_channel: + ltt_relay_iter_release_channel(iter); +error_free_alloc: + kfree(iter); + return ret; +} + +static int ltt_relay_ascii_release(struct inode *inode, struct file *file) +{ + struct seq_file *seq = file->private_data; + struct ltt_relay_iter *iter = seq->private; + + ltt_relay_iter_release_channel(iter); + kfree(iter); + return 0; +} + +static struct file_operations ltt_ascii_fops = +{ + .read = seq_read, + .open = ltt_relay_ascii_open, + .release = ltt_relay_ascii_release, + .llseek = no_llseek, + .owner = THIS_MODULE, +}; + +int ltt_ascii_create(struct ltt_chan *chan) +{ + struct dentry *dentry; + + dentry = debugfs_create_file(chan->a.filename, + S_IRUSR | S_IRGRP, + chan->a.trace->dentry.ascii_root, + chan, <t_ascii_fops); + if (IS_ERR(dentry)) + return PTR_ERR(dentry); + + if (!dentry) + return -EEXIST; + + chan->a.ascii_dentry = dentry; + dentry->d_inode->i_private = chan; + return 0; +} +EXPORT_SYMBOL_GPL(ltt_ascii_create); + +void ltt_ascii_remove(struct ltt_chan *chan) +{ + struct dentry *dentry; + + dentry = dget(chan->a.ascii_dentry); + debugfs_remove(dentry); + /* TODO: wait / wakeup instead */ + /* + * Wait for every reference to the dentry to be gone, + * except us. + */ + while (atomic_read(&dentry->d_count) != 1) + msleep(100); + dput(dentry); +} +EXPORT_SYMBOL_GPL(ltt_ascii_remove); + +int ltt_ascii_create_dir(struct ltt_trace *new_trace) +{ + new_trace->dentry.ascii_root = debugfs_create_dir(new_trace->trace_name, + ltt_ascii_dir_dentry); + if (!new_trace->dentry.ascii_root) + return -EEXIST; + return 0; +} +EXPORT_SYMBOL_GPL(ltt_ascii_create_dir); + +void ltt_ascii_remove_dir(struct ltt_trace *trace) +{ + debugfs_remove(trace->dentry.ascii_root); +} +EXPORT_SYMBOL_GPL(ltt_ascii_remove_dir); + +static __init int ltt_ascii_init(void) +{ + ltt_ascii_dir_dentry = debugfs_create_dir(LTT_ASCII, get_ltt_root()); + put_ltt_root(); + + return ltt_ascii_dir_dentry ? 0 : -EFAULT; +} + +static __exit void ltt_ascii_exit(void) +{ + debugfs_remove(ltt_ascii_dir_dentry); +} + +module_init(ltt_ascii_init); +module_exit(ltt_ascii_exit); + +MODULE_LICENSE("GPL and additional rights"); +MODULE_AUTHOR("Lai Jiangshan@FNST and Mathieu Desnoyers"); +MODULE_DESCRIPTION("Linux Trace Toolkit Next Generation Ascii Converter"); diff --git a/ltt-channels.c b/ltt-channels.c new file mode 100644 index 00000000..c1cee264 --- /dev/null +++ b/ltt-channels.c @@ -0,0 +1,388 @@ + /* + * ltt/ltt-channels.c + * + * (C) Copyright 2008 - Mathieu Desnoyers (mathieu.desnoyers@polymtl.ca) + * + * LTTng channel management. + * + * Author: + * Mathieu Desnoyers (mathieu.desnoyers@polymtl.ca) + * + * Dual LGPL v2.1/GPL v2 license. + */ + +#include +#include +#include +#include +#include + +/* + * ltt_channel_mutex may be nested inside the LTT trace mutex. + * ltt_channel_mutex mutex may be nested inside markers mutex. + */ +static DEFINE_MUTEX(ltt_channel_mutex); +static LIST_HEAD(ltt_channels); +/* + * Index of next channel in array. Makes sure that as long as a trace channel is + * allocated, no array index will be re-used when a channel is freed and then + * another channel is allocated. This index is cleared and the array indexeds + * get reassigned when the index_kref goes back to 0, which indicates that no + * more trace channels are allocated. + */ +static unsigned int free_index; +/* index_kref is protected by both ltt_channel_mutex and lock_markers */ +static struct kref index_kref; /* Keeps track of allocated trace channels */ + +static struct ltt_channel_setting *lookup_channel(const char *name) +{ + struct ltt_channel_setting *iter; + + list_for_each_entry(iter, <t_channels, list) + if (strcmp(name, iter->name) == 0) + return iter; + return NULL; +} + +/* + * Must be called when channel refcount falls to 0 _and_ also when the last + * trace is freed. This function is responsible for compacting the channel and + * event IDs when no users are active. + * + * Called with lock_markers() and channels mutex held. + */ +static void release_channel_setting(struct kref *kref) +{ + struct ltt_channel_setting *setting = container_of(kref, + struct ltt_channel_setting, kref); + struct ltt_channel_setting *iter; + + if (atomic_read(&index_kref.refcount) == 0 + && atomic_read(&setting->kref.refcount) == 0) { + list_del(&setting->list); + kfree(setting); + + free_index = 0; + list_for_each_entry(iter, <t_channels, list) { + iter->index = free_index++; + iter->free_event_id = 0; + } + } +} + +/* + * Perform channel index compaction when the last trace channel is freed. + * + * Called with lock_markers() and channels mutex held. + */ +static void release_trace_channel(struct kref *kref) +{ + struct ltt_channel_setting *iter, *n; + + list_for_each_entry_safe(iter, n, <t_channels, list) + release_channel_setting(&iter->kref); + if (atomic_read(&index_kref.refcount) == 0) + markers_compact_event_ids(); +} + +/* + * ltt_channel_trace_ref : Is there an existing trace session ? + * + * Must be called with lock_markers() held. + */ +int ltt_channels_trace_ref(void) +{ + return !!atomic_read(&index_kref.refcount); +} +EXPORT_SYMBOL_GPL(ltt_channels_trace_ref); + +/** + * ltt_channels_register - Register a trace channel. + * @name: channel name + * + * Uses refcounting. + */ +int ltt_channels_register(const char *name) +{ + struct ltt_channel_setting *setting; + int ret = 0; + + mutex_lock(<t_channel_mutex); + setting = lookup_channel(name); + if (setting) { + if (atomic_read(&setting->kref.refcount) == 0) + goto init_kref; + else { + kref_get(&setting->kref); + goto end; + } + } + setting = kzalloc(sizeof(*setting), GFP_KERNEL); + if (!setting) { + ret = -ENOMEM; + goto end; + } + list_add(&setting->list, <t_channels); + strncpy(setting->name, name, PATH_MAX-1); + setting->index = free_index++; +init_kref: + kref_init(&setting->kref); +end: + mutex_unlock(<t_channel_mutex); + return ret; +} +EXPORT_SYMBOL_GPL(ltt_channels_register); + +/** + * ltt_channels_unregister - Unregister a trace channel. + * @name: channel name + * @compacting: performing compaction + * + * Must be called with markers mutex held. + */ +int ltt_channels_unregister(const char *name, int compacting) +{ + struct ltt_channel_setting *setting; + int ret = 0; + + if (!compacting) + mutex_lock(<t_channel_mutex); + setting = lookup_channel(name); + if (!setting || atomic_read(&setting->kref.refcount) == 0) { + ret = -ENOENT; + goto end; + } + kref_put(&setting->kref, release_channel_setting); + if (!compacting && atomic_read(&index_kref.refcount) == 0) + markers_compact_event_ids(); +end: + if (!compacting) + mutex_unlock(<t_channel_mutex); + return ret; +} +EXPORT_SYMBOL_GPL(ltt_channels_unregister); + +/** + * ltt_channels_set_default - Set channel default behavior. + * @name: default channel name + * @sb_size: size of the subbuffers + * @n_sb: number of subbuffers + */ +int ltt_channels_set_default(const char *name, + unsigned int sb_size, + unsigned int n_sb) +{ + struct ltt_channel_setting *setting; + int ret = 0; + + mutex_lock(<t_channel_mutex); + setting = lookup_channel(name); + if (!setting || atomic_read(&setting->kref.refcount) == 0) { + ret = -ENOENT; + goto end; + } + setting->sb_size = sb_size; + setting->n_sb = n_sb; +end: + mutex_unlock(<t_channel_mutex); + return ret; +} +EXPORT_SYMBOL_GPL(ltt_channels_set_default); + +/** + * ltt_channels_get_name_from_index - get channel name from channel index + * @index: channel index + * + * Allows to lookup the channel name given its index. Done to keep the name + * information outside of each trace channel instance. + */ +const char *ltt_channels_get_name_from_index(unsigned int index) +{ + struct ltt_channel_setting *iter; + + list_for_each_entry(iter, <t_channels, list) + if (iter->index == index && atomic_read(&iter->kref.refcount)) + return iter->name; + return NULL; +} +EXPORT_SYMBOL_GPL(ltt_channels_get_name_from_index); + +static struct ltt_channel_setting * +ltt_channels_get_setting_from_name(const char *name) +{ + struct ltt_channel_setting *iter; + + list_for_each_entry(iter, <t_channels, list) + if (!strcmp(iter->name, name) + && atomic_read(&iter->kref.refcount)) + return iter; + return NULL; +} + +/** + * ltt_channels_get_index_from_name - get channel index from channel name + * @name: channel name + * + * Allows to lookup the channel index given its name. Done to keep the name + * information outside of each trace channel instance. + * Returns -1 if not found. + */ +int ltt_channels_get_index_from_name(const char *name) +{ + struct ltt_channel_setting *setting; + + setting = ltt_channels_get_setting_from_name(name); + if (setting) + return setting->index; + else + return -1; +} +EXPORT_SYMBOL_GPL(ltt_channels_get_index_from_name); + +/** + * ltt_channels_trace_alloc - Allocate channel structures for a trace + * @sb_size: subbuffer size. 0 uses default. + * @n_sb: number of subbuffers per per-cpu buffers. 0 uses default. + * @flags: Default channel flags + * + * Use the current channel list to allocate the channels for a trace. + * Called with trace lock held. Does not perform the trace buffer allocation, + * because we must let the user overwrite specific channel sizes. + */ +struct ltt_chan *ltt_channels_trace_alloc(unsigned int *nr_channels, + int overwrite, int active) +{ + struct ltt_chan *chan = NULL; + struct ltt_channel_setting *iter; + + lock_markers(); + mutex_lock(<t_channel_mutex); + if (!free_index) + goto end; + if (!atomic_read(&index_kref.refcount)) + kref_init(&index_kref); + else + kref_get(&index_kref); + *nr_channels = free_index; + chan = kzalloc(sizeof(struct ltt_chan) * free_index, GFP_KERNEL); + if (!chan) + goto end; + list_for_each_entry(iter, <t_channels, list) { + if (!atomic_read(&iter->kref.refcount)) + continue; + chan[iter->index].a.sb_size = iter->sb_size; + chan[iter->index].a.n_sb = iter->n_sb; + chan[iter->index].overwrite = overwrite; + chan[iter->index].active = active; + strncpy(chan[iter->index].a.filename, iter->name, NAME_MAX - 1); + chan[iter->index].switch_timer_interval = 0; + } +end: + mutex_unlock(<t_channel_mutex); + unlock_markers(); + return chan; +} +EXPORT_SYMBOL_GPL(ltt_channels_trace_alloc); + +/** + * ltt_channels_trace_free - Free one trace's channels + * @channels: channels to free + * + * Called with trace lock held. The actual channel buffers must be freed before + * this function is called. + */ +void ltt_channels_trace_free(struct ltt_chan *channels, + unsigned int nr_channels) +{ + lock_markers(); + mutex_lock(<t_channel_mutex); + kfree(channels); + kref_put(&index_kref, release_trace_channel); + mutex_unlock(<t_channel_mutex); + unlock_markers(); + marker_update_probes(); +} +EXPORT_SYMBOL_GPL(ltt_channels_trace_free); + +/** + * ltt_channels_trace_set_timer - set switch timer + * @channel: channel + * @interval: interval of timer interrupt, in jiffies. 0 inhibits timer. + */ + +void ltt_channels_trace_set_timer(struct ltt_chan *chan, + unsigned long interval) +{ + chan->switch_timer_interval = interval; +} +EXPORT_SYMBOL_GPL(ltt_channels_trace_set_timer); + +/** + * _ltt_channels_get_event_id - get next event ID for a marker + * @channel: channel name + * @name: event name + * + * Returns a unique event ID (for this channel) or < 0 on error. + * Must be called with channels mutex held. + */ +int _ltt_channels_get_event_id(const char *channel, const char *name) +{ + struct ltt_channel_setting *setting; + int ret; + + setting = ltt_channels_get_setting_from_name(channel); + if (!setting) { + ret = -ENOENT; + goto end; + } + if (strcmp(channel, "metadata") == 0) { + if (strcmp(name, "core_marker_id") == 0) + ret = 0; + else if (strcmp(name, "core_marker_format") == 0) + ret = 1; + else + ret = -ENOENT; + goto end; + } + if (setting->free_event_id == EVENTS_PER_CHANNEL - 1) { + ret = -ENOSPC; + goto end; + } + ret = setting->free_event_id++; +end: + return ret; +} + +/** + * ltt_channels_get_event_id - get next event ID for a marker + * @channel: channel name + * @name: event name + * + * Returns a unique event ID (for this channel) or < 0 on error. + */ +int ltt_channels_get_event_id(const char *channel, const char *name) +{ + int ret; + + mutex_lock(<t_channel_mutex); + ret = _ltt_channels_get_event_id(channel, name); + mutex_unlock(<t_channel_mutex); + return ret; +} + +/** + * ltt_channels_reset_event_ids - reset event IDs at compaction + * + * Called with lock marker and channel mutex held. + */ +void _ltt_channels_reset_event_ids(void) +{ + struct ltt_channel_setting *iter; + + list_for_each_entry(iter, <t_channels, list) + iter->free_event_id = 0; +} + +MODULE_LICENSE("GPL and additional rights"); +MODULE_AUTHOR("Mathieu Desnoyers"); +MODULE_DESCRIPTION("Linux Trace Toolkit Next Generation Channel Management"); diff --git a/ltt-core.c b/ltt-core.c new file mode 100644 index 00000000..13d517f5 --- /dev/null +++ b/ltt-core.c @@ -0,0 +1,108 @@ +/* + * LTT core in-kernel infrastructure. + * + * Copyright 2006 - Mathieu Desnoyers mathieu.desnoyers@polymtl.ca + * + * Dual LGPL v2.1/GPL v2 license. + */ + +#include +#include +#include +#include +#include + +#include "ltt-tracer-core.h" + +/* Traces structures */ +struct ltt_traces ltt_traces = { + .setup_head = LIST_HEAD_INIT(ltt_traces.setup_head), + .head = LIST_HEAD_INIT(ltt_traces.head), +}; +EXPORT_SYMBOL(ltt_traces); + +/* Traces list writer locking */ +static DEFINE_MUTEX(ltt_traces_mutex); + +/* root dentry mutex */ +static DEFINE_MUTEX(ltt_root_mutex); +/* dentry of ltt's root dir */ +static struct dentry *ltt_root_dentry; +static struct kref ltt_root_kref = { + .refcount = ATOMIC_INIT(0), +}; + +static void ltt_root_release(struct kref *ref) +{ + debugfs_remove(ltt_root_dentry); + ltt_root_dentry = NULL; +} + +void put_ltt_root(void) +{ + mutex_lock(<t_root_mutex); + if (ltt_root_dentry) + kref_put(<t_root_kref, ltt_root_release); + mutex_unlock(<t_root_mutex); +} +EXPORT_SYMBOL_GPL(put_ltt_root); + +struct dentry *get_ltt_root(void) +{ + mutex_lock(<t_root_mutex); + if (!ltt_root_dentry) { + ltt_root_dentry = debugfs_create_dir(LTT_ROOT, NULL); + if (!ltt_root_dentry) { + printk(KERN_ERR "LTT : create ltt root dir failed\n"); + goto out; + } + kref_init(<t_root_kref); + goto out; + } + kref_get(<t_root_kref); +out: + mutex_unlock(<t_root_mutex); + return ltt_root_dentry; +} +EXPORT_SYMBOL_GPL(get_ltt_root); + +/* + * ltt_lock_traces/ltt_unlock_traces also disables cpu hotplug. + */ +void ltt_lock_traces(void) +{ + mutex_lock(<t_traces_mutex); + get_online_cpus(); +} +EXPORT_SYMBOL_GPL(ltt_lock_traces); + +void ltt_unlock_traces(void) +{ + put_online_cpus(); + mutex_unlock(<t_traces_mutex); +} +EXPORT_SYMBOL_GPL(ltt_unlock_traces); + +DEFINE_PER_CPU(unsigned int, ltt_nesting); +EXPORT_PER_CPU_SYMBOL(ltt_nesting); + +int ltt_run_filter_default(void *trace, uint16_t eID) +{ + return 1; +} + +/* This function pointer is protected by a trace activation check */ +ltt_run_filter_functor ltt_run_filter = ltt_run_filter_default; +EXPORT_SYMBOL_GPL(ltt_run_filter); + +void ltt_filter_register(ltt_run_filter_functor func) +{ + ltt_run_filter = func; +} +EXPORT_SYMBOL_GPL(ltt_filter_register); + +void ltt_filter_unregister(void) +{ + ltt_run_filter = ltt_run_filter_default; +} +EXPORT_SYMBOL_GPL(ltt_filter_unregister); diff --git a/ltt-event-header.c b/ltt-event-header.c new file mode 100644 index 00000000..4f049d31 --- /dev/null +++ b/ltt-event-header.c @@ -0,0 +1,92 @@ +/* + * ltt/ltt-event-header.c + * + * (C) Copyright 2010 - Mathieu Desnoyers (mathieu.desnoyers@efficios.com) + * + * LTTng event header. + * + * Author: + * Mathieu Desnoyers (mathieu.desnoyers@efficios.com) + * + * Dual LGPL v2.1/GPL v2 license. + */ + +#include + +#include "ltt-tracer.h" +#include "ltt-relay.h" + +size_t ltt_write_event_header_slow(struct ltt_chanbuf_alloc *bufa, + struct ltt_chan_alloc *chana, + long buf_offset, u16 eID, u32 event_size, + u64 tsc, unsigned int rflags) +{ + struct ltt_event_header header; + u16 small_size; + + switch (rflags) { + case LTT_RFLAG_ID_SIZE_TSC: + header.id_time = 29 << LTT_TSC_BITS; + break; + case LTT_RFLAG_ID_SIZE: + header.id_time = 30 << LTT_TSC_BITS; + break; + case LTT_RFLAG_ID: + header.id_time = 31 << LTT_TSC_BITS; + break; + default: + WARN_ON_ONCE(1); + header.id_time = 0; + } + + header.id_time |= (u32)tsc & LTT_TSC_MASK; + ltt_relay_write(bufa, chana, buf_offset, &header, sizeof(header)); + buf_offset += sizeof(header); + + switch (rflags) { + case LTT_RFLAG_ID_SIZE_TSC: + small_size = (u16)min_t(u32, event_size, LTT_MAX_SMALL_SIZE); + ltt_relay_write(bufa, chana, buf_offset, + &eID, sizeof(u16)); + buf_offset += sizeof(u16); + ltt_relay_write(bufa, chana, buf_offset, + &small_size, sizeof(u16)); + buf_offset += sizeof(u16); + if (small_size == LTT_MAX_SMALL_SIZE) { + ltt_relay_write(bufa, chana, buf_offset, + &event_size, sizeof(u32)); + buf_offset += sizeof(u32); + } + buf_offset += ltt_align(buf_offset, sizeof(u64)); + ltt_relay_write(bufa, chana, buf_offset, + &tsc, sizeof(u64)); + buf_offset += sizeof(u64); + break; + case LTT_RFLAG_ID_SIZE: + small_size = (u16)min_t(u32, event_size, LTT_MAX_SMALL_SIZE); + ltt_relay_write(bufa, chana, buf_offset, + &eID, sizeof(u16)); + buf_offset += sizeof(u16); + ltt_relay_write(bufa, chana, buf_offset, + &small_size, sizeof(u16)); + buf_offset += sizeof(u16); + if (small_size == LTT_MAX_SMALL_SIZE) { + ltt_relay_write(bufa, chana, buf_offset, + &event_size, sizeof(u32)); + buf_offset += sizeof(u32); + } + break; + case LTT_RFLAG_ID: + ltt_relay_write(bufa, chana, buf_offset, + &eID, sizeof(u16)); + buf_offset += sizeof(u16); + break; + } + + return buf_offset; +} +EXPORT_SYMBOL_GPL(ltt_write_event_header_slow); + +MODULE_LICENSE("GPL and additional rights"); +MODULE_AUTHOR("Mathieu Desnoyers"); +MODULE_DESCRIPTION("Linux Trace Toolkit Next Generation Event Header"); diff --git a/ltt-filter.c b/ltt-filter.c new file mode 100644 index 00000000..ec113af6 --- /dev/null +++ b/ltt-filter.c @@ -0,0 +1,55 @@ +/* + * Copyright (C) 2008 Mathieu Desnoyers + * + * Dual LGPL v2.1/GPL v2 license. + */ + +#include +#include +#include +#include + +#include "ltt-tracer.h" + +#define LTT_FILTER_DIR "filter" + +/* + * Protects the ltt_filter_dir allocation. + */ +static DEFINE_MUTEX(ltt_filter_mutex); + +static struct dentry *ltt_filter_dir; + +struct dentry *get_filter_root(void) +{ + struct dentry *ltt_root_dentry; + + mutex_lock(<t_filter_mutex); + if (!ltt_filter_dir) { + ltt_root_dentry = get_ltt_root(); + if (!ltt_root_dentry) + goto err_no_root; + + ltt_filter_dir = debugfs_create_dir(LTT_FILTER_DIR, + ltt_root_dentry); + if (!ltt_filter_dir) + printk(KERN_ERR + "ltt_filter_init: failed to create dir %s\n", + LTT_FILTER_DIR); + } +err_no_root: + mutex_unlock(<t_filter_mutex); + return ltt_filter_dir; +} +EXPORT_SYMBOL_GPL(get_filter_root); + +static void __exit ltt_filter_exit(void) +{ + debugfs_remove(ltt_filter_dir); +} + +module_exit(ltt_filter_exit); + +MODULE_LICENSE("GPL and additional rights"); +MODULE_AUTHOR("Mathieu Desnoyers "); +MODULE_DESCRIPTION("Linux Trace Toolkit Filter"); diff --git a/ltt-kprobes.c b/ltt-kprobes.c new file mode 100644 index 00000000..7539381b --- /dev/null +++ b/ltt-kprobes.c @@ -0,0 +1,493 @@ +/* + * (C) Copyright 2009 - + * Mathieu Desnoyers (mathieu.desnoyers@polymtl.ca) + * + * LTTng kprobes integration module. + * + * Dual LGPL v2.1/GPL v2 license. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "ltt-type-serializer.h" +#include "ltt-tracer.h" + +#define LTT_KPROBES_DIR "kprobes" +#define LTT_KPROBES_ENABLE "enable" +#define LTT_KPROBES_DISABLE "disable" +#define LTT_KPROBES_LIST "list" + +/* Active LTTng kprobes hash table */ +static DEFINE_MUTEX(ltt_kprobes_mutex); + +#define LTT_KPROBE_HASH_BITS 6 +#define LTT_KPROBE_TABLE_SIZE (1 << LTT_KPROBE_HASH_BITS) +static struct hlist_head ltt_kprobe_table[LTT_KPROBE_TABLE_SIZE]; + +struct kprobe_entry { + struct hlist_node hlist; + struct kprobe kp; + char key[0]; +}; + +static struct dentry *ltt_kprobes_dir, + *ltt_kprobes_enable_dentry, + *ltt_kprobes_disable_dentry, + *ltt_kprobes_list_dentry; + +static int module_exit; + + +static void trace_kprobe_table_entry(void *call_data, struct kprobe_entry *e) +{ + unsigned long addr; + char *namebuf = (char *)__get_free_page(GFP_KERNEL); + + if (e->kp.addr) { + sprint_symbol(namebuf, (unsigned long)e->kp.addr); + addr = (unsigned long)e->kp.addr; + } else { + strncpy(namebuf, e->kp.symbol_name, PAGE_SIZE - 1); + /* TODO : add offset */ + addr = kallsyms_lookup_name(namebuf); + } + if (addr) + __trace_mark(0, kprobe_state, kprobe_table, call_data, + "ip 0x%lX symbol %s", addr, namebuf); + free_page((unsigned long)namebuf); +} + +DEFINE_MARKER(kernel, kprobe, "ip %lX"); + +static int ltt_kprobe_handler_pre(struct kprobe *p, struct pt_regs *regs) +{ + struct marker *marker; + unsigned long data; + + data = (unsigned long)p->addr; + marker = &GET_MARKER(kernel, kprobe); + ltt_specialized_trace(marker, marker->single.probe_private, + &data, sizeof(data), sizeof(data)); + return 0; +} + +static int ltt_register_kprobe(const char *key) +{ + struct hlist_head *head; + struct hlist_node *node; + struct kprobe_entry *e = NULL; + char *symbol_name = NULL; + unsigned long addr; + unsigned int offset = 0; + u32 hash; + size_t key_len = strlen(key) + 1; + int ret; + + if (key_len == 1) + return -ENOENT; /* only \0 */ + + if (sscanf(key, "%li", &addr) != 1) + addr = 0; + + if (!addr) { + const char *symbol_end = NULL; + unsigned int symbol_len; /* includes final \0 */ + + symbol_end = strchr(key, ' '); + if (symbol_end) + symbol_len = symbol_end - key + 1; + else + symbol_len = key_len; + symbol_name = kmalloc(symbol_len, GFP_KERNEL); + if (!symbol_name) { + ret = -ENOMEM; + goto error; + } + memcpy(symbol_name, key, symbol_len - 1); + symbol_name[symbol_len-1] = '\0'; + if (symbol_end) { + symbol_end++; /* start of offset */ + if (sscanf(symbol_end, "%i", &offset) != 1) + offset = 0; + } + } + + hash = jhash(key, key_len-1, 0); + head = <t_kprobe_table[hash & ((1 << LTT_KPROBE_HASH_BITS)-1)]; + hlist_for_each_entry(e, node, head, hlist) { + if (!strcmp(key, e->key)) { + printk(KERN_NOTICE "Kprobe %s busy\n", key); + ret = -EBUSY; + goto error; + } + } + /* + * Using kzalloc here to allocate a variable length element. Could + * cause some memory fragmentation if overused. + */ + e = kzalloc(sizeof(struct kprobe_entry) + key_len, GFP_KERNEL); + if (!e) { + ret = -ENOMEM; + goto error; + } + memcpy(e->key, key, key_len); + hlist_add_head(&e->hlist, head); + e->kp.pre_handler = ltt_kprobe_handler_pre; + e->kp.symbol_name = symbol_name; + e->kp.offset = offset; + e->kp.addr = (void *)addr; + ret = register_kprobe(&e->kp); + if (ret < 0) + goto error_list_del; + trace_kprobe_table_entry(NULL, e); + return 0; + +error_list_del: + hlist_del(&e->hlist); +error: + kfree(symbol_name); + kfree(e); + return ret; +} + +static int ltt_unregister_kprobe(const char *key) +{ + struct hlist_head *head; + struct hlist_node *node; + struct kprobe_entry *e; + int found = 0; + size_t key_len = strlen(key) + 1; + u32 hash; + + hash = jhash(key, key_len-1, 0); + head = <t_kprobe_table[hash & ((1 << LTT_KPROBE_HASH_BITS)-1)]; + hlist_for_each_entry(e, node, head, hlist) { + if (!strcmp(key, e->key)) { + found = 1; + break; + } + } + if (!found) + return -ENOENT; + hlist_del(&e->hlist); + unregister_kprobe(&e->kp); + kfree(e->kp.symbol_name); + kfree(e); + return 0; +} + +static void ltt_unregister_all_kprobes(void) +{ + struct kprobe_entry *e; + struct hlist_head *head; + struct hlist_node *node, *tmp; + unsigned int i; + + for (i = 0; i < LTT_KPROBE_TABLE_SIZE; i++) { + head = <t_kprobe_table[i]; + hlist_for_each_entry_safe(e, node, tmp, head, hlist) { + hlist_del(&e->hlist); + unregister_kprobe(&e->kp); + kfree(e->kp.symbol_name); + kfree(e); + } + } +} + +/* + * Allows to specify either + * - symbol + * - symbol offset + * - address + */ +static ssize_t enable_op_write(struct file *file, + const char __user *user_buf, size_t count, loff_t *ppos) +{ + int err, buf_size; + char *end; + char *buf = (char *)__get_free_page(GFP_KERNEL); + + mutex_lock(<t_kprobes_mutex); + if (module_exit) { + err = -EPERM; + goto error; + } + + buf_size = min_t(size_t, count, PAGE_SIZE - 1); + err = copy_from_user(buf, user_buf, buf_size); + if (err) + goto error; + buf[buf_size] = '\0'; + end = strchr(buf, '\n'); + if (end) + *end = '\0'; + err = ltt_register_kprobe(buf); + if (err) + goto error; + + mutex_unlock(<t_kprobes_mutex); + free_page((unsigned long)buf); + return count; +error: + mutex_unlock(<t_kprobes_mutex); + free_page((unsigned long)buf); + return err; +} + +static const struct file_operations ltt_kprobes_enable = { + .write = enable_op_write, +}; + +static ssize_t disable_op_write(struct file *file, + const char __user *user_buf, size_t count, loff_t *ppos) +{ + int err, buf_size; + char *end; + char *buf = (char *)__get_free_page(GFP_KERNEL); + + mutex_lock(<t_kprobes_mutex); + if (module_exit) + goto end; + + buf_size = min_t(size_t, count, PAGE_SIZE - 1); + err = copy_from_user(buf, user_buf, buf_size); + if (err) + goto error; + buf[buf_size] = '\0'; + end = strchr(buf, '\n'); + if (end) + *end = '\0'; + err = ltt_unregister_kprobe(buf); + if (err) + goto error; +end: + mutex_unlock(<t_kprobes_mutex); + free_page((unsigned long)buf); + return count; +error: + mutex_unlock(<t_kprobes_mutex); + free_page((unsigned long)buf); + return err; +} + +static const struct file_operations ltt_kprobes_disable = { + .write = disable_op_write, +}; + +/* + * This seqfile read is not perfectly safe, as a kprobe could be removed from + * the hash table between two reads. This will result in an incomplete output. + */ +static struct kprobe_entry *ltt_find_next_kprobe(struct kprobe_entry *prev) +{ + struct kprobe_entry *e; + struct hlist_head *head; + struct hlist_node *node; + unsigned int i; + int found = 0; + + if (prev == (void *)-1UL) + return NULL; + + if (!prev) + found = 1; + + for (i = 0; i < LTT_KPROBE_TABLE_SIZE; i++) { + head = <t_kprobe_table[i]; + hlist_for_each_entry(e, node, head, hlist) { + if (found) + return e; + if (e == prev) + found = 1; + } + } + return NULL; +} + +static void *lk_next(struct seq_file *m, void *p, loff_t *pos) +{ + m->private = ltt_find_next_kprobe(m->private); + if (!m->private) { + m->private = (void *)-1UL; + return NULL; + } + return m->private; +} + +static void *lk_start(struct seq_file *m, loff_t *pos) +{ + mutex_lock(<t_kprobes_mutex); + if (!*pos) + m->private = NULL; + m->private = ltt_find_next_kprobe(m->private); + if (!m->private) { + m->private = (void *)-1UL; + return NULL; + } + return m->private; +} + +static void lk_stop(struct seq_file *m, void *p) +{ + mutex_unlock(<t_kprobes_mutex); +} + +static int lk_show(struct seq_file *m, void *p) +{ + struct kprobe_entry *e = m->private; + seq_printf(m, "%s\n", e->key); + return 0; +} + +static const struct seq_operations ltt_kprobes_list_op = { + .start = lk_start, + .next = lk_next, + .stop = lk_stop, + .show = lk_show, +}; + +static int ltt_kprobes_list_open(struct inode *inode, struct file *file) +{ + int ret; + + ret = seq_open(file, <t_kprobes_list_op); + if (ret == 0) + ((struct seq_file *)file->private_data)->private = NULL; + return ret; +} + +static int ltt_kprobes_list_release(struct inode *inode, struct file *file) +{ + struct seq_file *seq = file->private_data; + + seq->private = NULL; + return seq_release(inode, file); +} + +static const struct file_operations ltt_kprobes_list = { + .open = ltt_kprobes_list_open, + .read = seq_read, + .llseek = seq_lseek, + .release = ltt_kprobes_list_release, +}; + +/* + * kprobes table dump. Callback invoked by ltt-statedump. ltt-statedump must + * take a reference to this module before calling this callback. + */ +void ltt_dump_kprobes_table(void *call_data) +{ + struct kprobe_entry *e; + struct hlist_head *head; + struct hlist_node *node; + unsigned int i; + + for (i = 0; i < LTT_KPROBE_TABLE_SIZE; i++) { + head = <t_kprobe_table[i]; + hlist_for_each_entry(e, node, head, hlist) + trace_kprobe_table_entry(call_data, e); + } +} +EXPORT_SYMBOL_GPL(ltt_dump_kprobes_table); + +static int __init ltt_kprobes_init(void) +{ + struct dentry *ltt_root_dentry; + int ret = 0; + + printk(KERN_INFO "LTT : ltt-kprobes init\n"); + mutex_lock(<t_kprobes_mutex); + + ltt_root_dentry = get_ltt_root(); + if (!ltt_root_dentry) { + ret = -ENOENT; + goto err_no_root; + } + + ltt_kprobes_dir = debugfs_create_dir(LTT_KPROBES_DIR, ltt_root_dentry); + if (!ltt_kprobes_dir) { + printk(KERN_ERR + "ltt_kprobes_init: failed to create dir %s\n", + LTT_KPROBES_DIR); + ret = -ENOMEM; + goto err_no_dir; + } + + ltt_kprobes_enable_dentry = debugfs_create_file(LTT_KPROBES_ENABLE, + S_IWUSR, + ltt_kprobes_dir, NULL, + <t_kprobes_enable); + if (IS_ERR(ltt_kprobes_enable_dentry) || !ltt_kprobes_enable_dentry) { + printk(KERN_ERR + "ltt_kprobes_init: failed to create file %s\n", + LTT_KPROBES_ENABLE); + ret = -ENOMEM; + goto err_no_enable; + } + + ltt_kprobes_disable_dentry = debugfs_create_file(LTT_KPROBES_DISABLE, + S_IWUSR, + ltt_kprobes_dir, NULL, + <t_kprobes_disable); + if (IS_ERR(ltt_kprobes_disable_dentry) || !ltt_kprobes_disable_dentry) { + printk(KERN_ERR + "ltt_kprobes_init: failed to create file %s\n", + LTT_KPROBES_DISABLE); + ret = -ENOMEM; + goto err_no_disable; + } + + ltt_kprobes_list_dentry = debugfs_create_file(LTT_KPROBES_LIST, + S_IWUSR, ltt_kprobes_dir, + NULL, <t_kprobes_list); + if (IS_ERR(ltt_kprobes_list_dentry) || !ltt_kprobes_list_dentry) { + printk(KERN_ERR + "ltt_kprobes_init: failed to create file %s\n", + LTT_KPROBES_LIST); + ret = -ENOMEM; + goto err_no_list; + } + ltt_statedump_register_kprobes_dump(ltt_dump_kprobes_table); + + mutex_unlock(<t_kprobes_mutex); + return ret; + +err_no_list: + debugfs_remove(ltt_kprobes_disable_dentry); +err_no_disable: + debugfs_remove(ltt_kprobes_enable_dentry); +err_no_enable: + debugfs_remove(ltt_kprobes_dir); +err_no_dir: +err_no_root: + mutex_unlock(<t_kprobes_mutex); + return ret; +} +module_init(ltt_kprobes_init); + +static void __exit ltt_kprobes_exit(void) +{ + printk(KERN_INFO "LTT : ltt-kprobes exit\n"); + mutex_lock(<t_kprobes_mutex); + module_exit = 1; + ltt_statedump_unregister_kprobes_dump(ltt_dump_kprobes_table); + debugfs_remove(ltt_kprobes_list_dentry); + debugfs_remove(ltt_kprobes_disable_dentry); + debugfs_remove(ltt_kprobes_enable_dentry); + debugfs_remove(ltt_kprobes_dir); + ltt_unregister_all_kprobes(); + mutex_unlock(<t_kprobes_mutex); +} +module_exit(ltt_kprobes_exit); + +MODULE_LICENSE("GPL and additional rights"); +MODULE_AUTHOR("Mathieu Desnoyers"); +MODULE_DESCRIPTION("Linux Trace Toolkit Kprobes Support"); diff --git a/ltt-marker-control.c b/ltt-marker-control.c new file mode 100644 index 00000000..61424308 --- /dev/null +++ b/ltt-marker-control.c @@ -0,0 +1,254 @@ +/* + * Copyright (C) 2007 Mathieu Desnoyers + * + * Dual LGPL v2.1/GPL v2 license. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "ltt-tracer.h" + +#define DEFAULT_CHANNEL "cpu" +#define DEFAULT_PROBE "default" + +LIST_HEAD(probes_list); + +/* + * Mutex protecting the probe slab cache. + * Nests inside the traces mutex. + */ +DEFINE_MUTEX(probes_mutex); + +struct ltt_available_probe default_probe = { + .name = "default", + .format = NULL, + .probe_func = ltt_vtrace, + .callbacks[0] = ltt_serialize_data, +}; + +static struct kmem_cache *markers_loaded_cachep; +static LIST_HEAD(markers_loaded_list); +/* + * List sorted by name strcmp order. + */ +static LIST_HEAD(probes_registered_list); + +static struct ltt_available_probe *get_probe_from_name(const char *pname) +{ + struct ltt_available_probe *iter; + int comparison, found = 0; + + if (!pname) + pname = DEFAULT_PROBE; + list_for_each_entry(iter, &probes_registered_list, node) { + comparison = strcmp(pname, iter->name); + if (!comparison) + found = 1; + if (comparison <= 0) + break; + } + if (found) + return iter; + else + return NULL; +} + +int ltt_probe_register(struct ltt_available_probe *pdata) +{ + int ret = 0; + int comparison; + struct ltt_available_probe *iter; + + mutex_lock(&probes_mutex); + list_for_each_entry_reverse(iter, &probes_registered_list, node) { + comparison = strcmp(pdata->name, iter->name); + if (!comparison) { + ret = -EBUSY; + goto end; + } else if (comparison > 0) { + /* We belong to the location right after iter. */ + list_add(&pdata->node, &iter->node); + goto end; + } + } + /* Should be added at the head of the list */ + list_add(&pdata->node, &probes_registered_list); +end: + mutex_unlock(&probes_mutex); + return ret; +} +EXPORT_SYMBOL_GPL(ltt_probe_register); + +/* + * Called when a probe does not want to be called anymore. + */ +int ltt_probe_unregister(struct ltt_available_probe *pdata) +{ + int ret = 0; + struct ltt_active_marker *amark, *tmp; + + mutex_lock(&probes_mutex); + list_for_each_entry_safe(amark, tmp, &markers_loaded_list, node) { + if (amark->probe == pdata) { + ret = marker_probe_unregister_private_data( + pdata->probe_func, amark); + if (ret) + goto end; + list_del(&amark->node); + kmem_cache_free(markers_loaded_cachep, amark); + } + } + list_del(&pdata->node); +end: + mutex_unlock(&probes_mutex); + return ret; +} +EXPORT_SYMBOL_GPL(ltt_probe_unregister); + +/* + * Connect marker "mname" to probe "pname". + * Only allow _only_ probe instance to be connected to a marker. + */ +int ltt_marker_connect(const char *channel, const char *mname, + const char *pname) + +{ + int ret; + struct ltt_active_marker *pdata; + struct ltt_available_probe *probe; + + ltt_lock_traces(); + mutex_lock(&probes_mutex); + probe = get_probe_from_name(pname); + if (!probe) { + ret = -ENOENT; + goto end; + } + pdata = marker_get_private_data(channel, mname, probe->probe_func, 0); + if (pdata && !IS_ERR(pdata)) { + ret = -EEXIST; + goto end; + } + pdata = kmem_cache_zalloc(markers_loaded_cachep, GFP_KERNEL); + if (!pdata) { + ret = -ENOMEM; + goto end; + } + pdata->probe = probe; + /* + * ID has priority over channel in case of conflict. + */ + ret = marker_probe_register(channel, mname, NULL, + probe->probe_func, pdata); + if (ret) + kmem_cache_free(markers_loaded_cachep, pdata); + else + list_add(&pdata->node, &markers_loaded_list); +end: + mutex_unlock(&probes_mutex); + ltt_unlock_traces(); + return ret; +} +EXPORT_SYMBOL_GPL(ltt_marker_connect); + +/* + * Disconnect marker "mname", probe "pname". + */ +int ltt_marker_disconnect(const char *channel, const char *mname, + const char *pname) +{ + struct ltt_active_marker *pdata; + struct ltt_available_probe *probe; + int ret = 0; + + mutex_lock(&probes_mutex); + probe = get_probe_from_name(pname); + if (!probe) { + ret = -ENOENT; + goto end; + } + pdata = marker_get_private_data(channel, mname, probe->probe_func, 0); + if (IS_ERR(pdata)) { + ret = PTR_ERR(pdata); + goto end; + } else if (!pdata) { + /* + * Not registered by us. + */ + ret = -EPERM; + goto end; + } + ret = marker_probe_unregister(channel, mname, probe->probe_func, pdata); + if (ret) + goto end; + else { + list_del(&pdata->node); + kmem_cache_free(markers_loaded_cachep, pdata); + } +end: + mutex_unlock(&probes_mutex); + return ret; +} +EXPORT_SYMBOL_GPL(ltt_marker_disconnect); + +static void disconnect_all_markers(void) +{ + struct ltt_active_marker *pdata, *tmp; + + list_for_each_entry_safe(pdata, tmp, &markers_loaded_list, node) { + marker_probe_unregister_private_data(pdata->probe->probe_func, + pdata); + list_del(&pdata->node); + kmem_cache_free(markers_loaded_cachep, pdata); + } +} + +static int __init marker_control_init(void) +{ + int ret; + + markers_loaded_cachep = KMEM_CACHE(ltt_active_marker, 0); + + ret = ltt_probe_register(&default_probe); + BUG_ON(ret); + ret = ltt_marker_connect("metadata", "core_marker_format", + DEFAULT_PROBE); + BUG_ON(ret); + ret = ltt_marker_connect("metadata", "core_marker_id", DEFAULT_PROBE); + BUG_ON(ret); + + return 0; +} +module_init(marker_control_init); + +static void __exit marker_control_exit(void) +{ + int ret; + + ret = ltt_marker_disconnect("metadata", "core_marker_format", + DEFAULT_PROBE); + BUG_ON(ret); + ret = ltt_marker_disconnect("metadata", "core_marker_id", + DEFAULT_PROBE); + BUG_ON(ret); + ret = ltt_probe_unregister(&default_probe); + BUG_ON(ret); + disconnect_all_markers(); + kmem_cache_destroy(markers_loaded_cachep); + marker_synchronize_unregister(); +} +module_exit(marker_control_exit); + +MODULE_LICENSE("GPL and additional rights"); +MODULE_AUTHOR("Mathieu Desnoyers"); +MODULE_DESCRIPTION("Linux Trace Toolkit Marker Control"); diff --git a/ltt-relay-alloc.c b/ltt-relay-alloc.c new file mode 100644 index 00000000..a6697dec --- /dev/null +++ b/ltt-relay-alloc.c @@ -0,0 +1,732 @@ +/* + * ltt-relay-alloc.c + * + * Copyright (C) 2008,2009 - Mathieu Desnoyers (mathieu.desnoyers@polymtl.ca) + * + * Dual LGPL v2.1/GPL v2 license. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "ltt-relay.h" +#include "ltt-tracer.h" +#include "ltt-relay-lockless.h" /* for cpu hotplug */ + +/** + * ltt_chanbuf_allocate - allocate a channel buffer + * @buf: the buffer struct + * @size: total size of the buffer + * @n_sb: number of subbuffers + * @extra_reader_sb: need extra subbuffer for reader + */ +static +int ltt_chanbuf_allocate(struct ltt_chanbuf_alloc *buf, size_t size, + size_t n_sb, int extra_reader_sb) +{ + long i, j, n_pages, n_pages_per_sb, page_idx = 0; + struct page **pages; + void **virt; + + n_pages = size >> PAGE_SHIFT; + n_pages_per_sb = n_pages >> get_count_order(n_sb); + if (extra_reader_sb) + n_pages += n_pages_per_sb; /* Add pages for reader */ + + pages = kmalloc_node(max_t(size_t, sizeof(*pages) * n_pages, + 1 << INTERNODE_CACHE_SHIFT), + GFP_KERNEL, cpu_to_node(buf->cpu)); + if (unlikely(!pages)) + goto pages_error; + + virt = kmalloc_node(ALIGN(sizeof(*virt) * n_pages, + 1 << INTERNODE_CACHE_SHIFT), + GFP_KERNEL, cpu_to_node(buf->cpu)); + if (unlikely(!virt)) + goto virt_error; + + for (i = 0; i < n_pages; i++) { + pages[i] = alloc_pages_node(cpu_to_node(buf->cpu), + GFP_KERNEL | __GFP_ZERO, 0); + if (unlikely(!pages[i])) + goto depopulate; + virt[i] = page_address(pages[i]); + } + buf->nr_pages = n_pages; + buf->_pages = pages; + buf->_virt = virt; + + /* Allocate write-side page index */ + buf->buf_wsb = kzalloc_node(max_t(size_t, + sizeof(struct chanbuf_sb) * n_sb, + 1 << INTERNODE_CACHE_SHIFT), + GFP_KERNEL, cpu_to_node(buf->cpu)); + if (unlikely(!buf->buf_wsb)) + goto depopulate; + + for (i = 0; i < n_sb; i++) { + buf->buf_wsb[i].pages = + kzalloc_node(max_t(size_t, + sizeof(struct chanbuf_page) * n_pages_per_sb, + 1 << INTERNODE_CACHE_SHIFT), + GFP_KERNEL, cpu_to_node(buf->cpu)); + if (!buf->buf_wsb[i].pages) + goto free_buf_wsb; + } + + if (extra_reader_sb) { + /* Allocate read-side page index */ + buf->buf_rsb.pages = + kzalloc_node(max_t(size_t, + sizeof(struct chanbuf_page) * n_pages_per_sb, + 1 << INTERNODE_CACHE_SHIFT), + GFP_KERNEL, cpu_to_node(buf->cpu)); + if (unlikely(!buf->buf_rsb.pages)) + goto free_buf_wsb; + } else { + buf->buf_rsb.pages = buf->buf_wsb[0].pages; + } + + /* Assign pages to write-side page index */ + for (i = 0; i < n_sb; i++) { + for (j = 0; j < n_pages_per_sb; j++) { + WARN_ON(page_idx > n_pages); + buf->buf_wsb[i].pages[j].virt = virt[page_idx]; + buf->buf_wsb[i].pages[j].page = pages[page_idx]; + page_idx++; + } + RCHAN_SB_SET_NOREF(buf->buf_wsb[i].pages); + } + + if (extra_reader_sb) { + for (j = 0; j < n_pages_per_sb; j++) { + WARN_ON(page_idx > n_pages); + buf->buf_rsb.pages[j].virt = virt[page_idx]; + buf->buf_rsb.pages[j].page = pages[page_idx]; + page_idx++; + } + RCHAN_SB_SET_NOREF(buf->buf_rsb.pages); + } + + /* + * If kmalloc ever uses vmalloc underneath, make sure the buffer pages + * will not fault. + */ + vmalloc_sync_all(); + return 0; + +free_buf_wsb: + for (i = 0; i < n_sb; i++) { + RCHAN_SB_CLEAR_NOREF(buf->buf_wsb[i].pages); + kfree(buf->buf_wsb[i].pages); + } + kfree(buf->buf_wsb); +depopulate: + /* + * Free all pages from [ i - 1 down to 0 ]. + * If i = 0, don't free anything. + */ + for (i--; i >= 0; i--) + __free_page(pages[i]); + kfree(virt); +virt_error: + kfree(pages); +pages_error: + return -ENOMEM; +} + +int ltt_chanbuf_alloc_create(struct ltt_chanbuf_alloc *buf, + struct ltt_chan_alloc *chan, int cpu) +{ + int ret = 0; + + ret = ltt_chanbuf_allocate(buf, chan->buf_size, chan->n_sb, + chan->extra_reader_sb); + if (ret) + goto end; + + buf->chan = chan; + buf->cpu = cpu; +end: + return ret; +} + +void ltt_chanbuf_alloc_free(struct ltt_chanbuf_alloc *buf) +{ + struct ltt_chan_alloc *chan = buf->chan; + struct page **pages; + long i; + + /* Destroy index */ + if (chan->extra_reader_sb) { + RCHAN_SB_CLEAR_NOREF(buf->buf_rsb.pages); + kfree(buf->buf_rsb.pages); + } + for (i = 0; i < chan->n_sb; i++) { + RCHAN_SB_CLEAR_NOREF(buf->buf_wsb[i].pages); + kfree(buf->buf_wsb[i].pages); + } + kfree(buf->buf_wsb); + + /* Destroy pages */ + pages = buf->_pages; + for (i = 0; i < buf->nr_pages; i++) + __free_page(pages[i]); + kfree(buf->_pages); + kfree(buf->_virt); + buf->allocated = 0; +} + +/** + * ltt_relay_hotcpu_callback - CPU hotplug callback + * @nb: notifier block + * @action: hotplug action to take + * @hcpu: CPU number + * + * Returns the success/failure of the operation. (%NOTIFY_OK, %NOTIFY_BAD) + */ +static +int __cpuinit ltt_relay_hotcpu_callback(struct notifier_block *nb, + unsigned long action, + void *hcpu) +{ + unsigned int cpu = (unsigned long)hcpu; + struct ltt_trace *trace; + struct ltt_chan *chan; + struct ltt_chanbuf *buf; + int ret, i; + + switch (action) { + case CPU_UP_PREPARE: + case CPU_UP_PREPARE_FROZEN: + /* + * CPU hotplug lock protects trace lock from this callback. + */ + __list_for_each_entry_rcu(trace, <t_traces.head, list) { + for (i = 0; i < trace->nr_channels; i++) { + chan = &trace->channels[i]; + buf = per_cpu_ptr(chan->a.buf, cpu); + ret = ltt_chanbuf_create(buf, &chan->a, cpu); + if (ret) { + printk(KERN_ERR + "ltt_relay_hotcpu_callback: cpu %d " + "buffer creation failed\n", cpu); + return NOTIFY_BAD; + } + + } + } + break; + case CPU_DEAD: + case CPU_DEAD_FROZEN: + /* No need to do a buffer switch here, because it will happen + * when tracing is stopped, or will be done by switch timer CPU + * DEAD callback. */ + break; + } + return NOTIFY_OK; +} + +/* + * Must be called with either trace lock or rcu read lock sched held. + */ +void ltt_chan_for_each_channel(void (*cb) (struct ltt_chanbuf *buf), int cpu) +{ + struct ltt_trace *trace; + struct ltt_chan *chan; + struct ltt_chanbuf *buf; + int i; + + __list_for_each_entry_rcu(trace, <t_traces.head, list) { + for (i = 0; i < trace->nr_channels; i++) { + chan = &trace->channels[i]; + if (!chan->active) + continue; + buf = per_cpu_ptr(chan->a.buf, cpu); + cb(buf); + } + } +} + +/** + * ltt_chan_create - create a new relay channel + * @chan: channel + * @trace: trace + * @base_filename: base name of files to create + * @parent: dentry of parent directory, %NULL for root directory + * @sb_size: size of sub-buffers (> PAGE_SIZE, power of 2) + * @n_sb: number of sub-buffers (power of 2) + * @extra_reader_sb: allocate an extra subbuffer for the reader + * @overwrite: channel is in overwrite mode + * + * Returns channel pointer if successful, %NULL otherwise. + * + * Creates per-cpu channel buffers using the sizes and attributes + * specified. The created channel buffer files will be named + * base_filename_0...base_filename_N-1. File permissions will + * be %S_IRUSR. + */ +int ltt_chan_alloc_init(struct ltt_chan_alloc *chan, struct ltt_trace *trace, + const char *base_filename, + struct dentry *parent, size_t sb_size, + size_t n_sb, int extra_reader_sb, int overwrite) +{ + unsigned int i; + int ret; + + if (!base_filename) + return -EPERM; + + if (!(sb_size && n_sb)) + return -EPERM; + + /* Check that the subbuffer size is larger than a page. */ + WARN_ON_ONCE(sb_size < PAGE_SIZE); + + /* + * Make sure the number of subbuffers and subbuffer size are power of 2. + */ + WARN_ON_ONCE(hweight32(sb_size) != 1); + WARN_ON(hweight32(n_sb) != 1); + + chan->trace = trace; + chan->buf_size = n_sb * sb_size; + chan->sb_size = sb_size; + chan->sb_size_order = get_count_order(sb_size); + chan->n_sb_order = get_count_order(n_sb); + chan->extra_reader_sb = extra_reader_sb; + chan->n_sb = n_sb; + chan->parent = parent; + strlcpy(chan->filename, base_filename, NAME_MAX); + kref_init(&chan->kref); + kref_get(&chan->trace->kref); + + /* Allocating the child structure */ + chan->buf = alloc_percpu(struct ltt_chanbuf); + if (!chan->buf) + goto free_chan; + + for_each_online_cpu(i) { + ret = ltt_chanbuf_create(per_cpu_ptr(chan->buf, i), chan, i); + if (ret) + goto free_bufs; + } + + return 0; + +free_bufs: + for_each_possible_cpu(i) { + struct ltt_chanbuf *buf = per_cpu_ptr(chan->buf, i); + + if (!buf->a.allocated) + continue; + ltt_chanbuf_remove_file(buf); + ltt_chanbuf_free(buf); + } + free_percpu(chan->buf); +free_chan: + kref_put(&chan->kref, ltt_chan_free); + return -ENOMEM; +} + +/** + * ltt_chan_alloc_remove_files - remove channel files. + * @chan: the channel + * + * Remove all channel files and wait for dentry use counts to become zero. + */ +void ltt_chan_alloc_remove_files(struct ltt_chan_alloc *chan) +{ + unsigned int i; + struct dentry *dentry; + + for_each_possible_cpu(i) { + struct ltt_chanbuf *buf = per_cpu_ptr(chan->buf, i); + + if (!buf->a.allocated) + continue; + dentry = dget(buf->a.dentry); + ltt_chanbuf_remove_file(buf); + /* TODO: wait / wakeup instead */ + /* + * Wait for every reference to the dentry to be gone, + * except us. + */ + while (atomic_read(&dentry->d_count) != 1) + msleep(100); + dput(dentry); + } +} + +/** + * ltt_chan_alloc_free - destroy the channel + * @chan: the channel + * + * Destroy all channel buffers and frees the channel. + */ +void ltt_chan_alloc_free(struct ltt_chan_alloc *chan) +{ + unsigned int i; + + for_each_possible_cpu(i) { + struct ltt_chanbuf *buf = per_cpu_ptr(chan->buf, i); + + if (!buf->a.allocated) + continue; + ltt_chanbuf_free(buf); + } + free_percpu(chan->buf); + kref_put(&chan->trace->kref, ltt_release_trace); + wake_up_interruptible(&chan->trace->kref_wq); +} + +/** + * _ltt_relay_write - write data to a ltt_relay buffer. + * @bufa : buffer + * @offset : offset within the buffer + * @src : source address + * @len : length to write + * @pagecpy : page size copied so far + */ +void _ltt_relay_write(struct ltt_chanbuf_alloc *bufa, size_t offset, + const void *src, size_t len, ssize_t pagecpy) +{ + struct ltt_chan_alloc *chana = bufa->chan; + size_t sbidx, index; + struct chanbuf_page *rpages; + + do { + len -= pagecpy; + src += pagecpy; + offset += pagecpy; + sbidx = offset >> chana->sb_size_order; + index = (offset & (chana->sb_size - 1)) >> PAGE_SHIFT; + + /* + * Underlying layer should never ask for writes across + * subbuffers. + */ + WARN_ON(offset >= chana->buf_size); + + pagecpy = min_t(size_t, len, PAGE_SIZE - (offset & ~PAGE_MASK)); + rpages = bufa->buf_wsb[sbidx].pages; + WARN_ON_ONCE(RCHAN_SB_IS_NOREF(rpages)); + ltt_relay_do_copy(rpages[index].virt + (offset & ~PAGE_MASK), + src, pagecpy); + } while (unlikely(len != pagecpy)); +} +EXPORT_SYMBOL_GPL(_ltt_relay_write); + +/** + * _ltt_relay_strncpy_fixup - Fix an incomplete string in a ltt_relay buffer. + * @bufa : buffer + * @offset : offset within the buffer + * @len : length to write + * @copied: string actually copied + * @terminated: does string end with \0 + * + * Fills string with "X" if incomplete. + */ +void _ltt_relay_strncpy_fixup(struct ltt_chanbuf_alloc *bufa, size_t offset, + size_t len, size_t copied, int terminated) +{ + struct ltt_chan_alloc *chana = bufa->chan; + size_t sbidx, index; + ssize_t pagecpy; + struct chanbuf_page *rpages; + + if (copied == len) { + /* + * Deal with non-terminated string. + */ + WARN_ON_ONCE(terminated); + offset += copied - 1; + sbidx = offset >> chana->sb_size_order; + index = (offset & (chana->sb_size - 1)) >> PAGE_SHIFT; + /* + * Underlying layer should never ask for writes across + * subbuffers. + */ + WARN_ON(offset >= chana->buf_size); + rpages = bufa->buf_wsb[sbidx].pages; + WARN_ON_ONCE(RCHAN_SB_IS_NOREF(rpages)); + ltt_relay_do_memset(rpages[index].virt + (offset & ~PAGE_MASK), + '\0', 1); + return; + } + + /* + * Deal with incomplete string. + * Overwrite string's \0 with X too. + */ + pagecpy = copied - 1; + do { + WARN_ON_ONCE(!terminated); + len -= pagecpy; + offset += pagecpy; + sbidx = offset >> chana->sb_size_order; + index = (offset & (chana->sb_size - 1)) >> PAGE_SHIFT; + + /* + * Underlying layer should never ask for writes across + * subbuffers. + */ + WARN_ON(offset >= chana->buf_size); + + pagecpy = min_t(size_t, len, PAGE_SIZE - (offset & ~PAGE_MASK)); + rpages = bufa->buf_wsb[sbidx].pages; + WARN_ON_ONCE(RCHAN_SB_IS_NOREF(rpages)); + ltt_relay_do_memset(rpages[index].virt + (offset & ~PAGE_MASK), + 'X', pagecpy); + } while (unlikely(len != pagecpy)); + /* + * Overwrite last 'X' with '\0'. + */ + offset += pagecpy - 1; + sbidx = offset >> chana->sb_size_order; + index = (offset & (chana->sb_size - 1)) >> PAGE_SHIFT; + /* + * Underlying layer should never ask for writes across subbuffers. + */ + WARN_ON(offset >= chana->buf_size); + rpages = bufa->buf_wsb[sbidx].pages; + WARN_ON_ONCE(RCHAN_SB_IS_NOREF(rpages)); + ltt_relay_do_memset(rpages[index].virt + (offset & ~PAGE_MASK), + '\0', 1); +} +EXPORT_SYMBOL_GPL(_ltt_relay_strncpy_fixup); + +/** + * _ltt_relay_strncpy - copy a string to a ltt_relay buffer. + * @bufa : buffer + * @offset : offset within the buffer + * @src : source address + * @len : length to write + * @pagecpy : page size copied so far + */ +void _ltt_relay_strncpy(struct ltt_chanbuf_alloc *bufa, size_t offset, + const void *src, size_t len, ssize_t pagecpy) +{ + struct ltt_chan_alloc *chana = bufa->chan; + size_t sbidx, index, copied; + struct chanbuf_page *rpages; + int terminated; + + do { + len -= pagecpy; + src += pagecpy; + offset += pagecpy; + sbidx = offset >> chana->sb_size_order; + index = (offset & (chana->sb_size - 1)) >> PAGE_SHIFT; + + /* + * Underlying layer should never ask for writes across + * subbuffers. + */ + WARN_ON(offset >= chana->buf_size); + + pagecpy = min_t(size_t, len, PAGE_SIZE - (offset & ~PAGE_MASK)); + rpages = bufa->buf_wsb[sbidx].pages; + WARN_ON_ONCE(RCHAN_SB_IS_NOREF(rpages)); + copied = ltt_relay_do_strncpy(rpages[index].virt + + (offset & ~PAGE_MASK), + src, pagecpy, &terminated); + if (copied < pagecpy || ((len == pagecpy) && !terminated)) { + _ltt_relay_strncpy_fixup(bufa, offset, len, copied, + terminated); + break; + } + } while (unlikely(len != pagecpy)); +} +EXPORT_SYMBOL_GPL(_ltt_relay_strncpy); + +/** + * ltt_relay_read - read data from ltt_relay_buffer. + * @bufa : buffer + * @offset : offset within the buffer + * @dest : destination address + * @len : length to write + * + * Should be protected by get_subbuf/put_subbuf. + */ +int ltt_relay_read(struct ltt_chanbuf_alloc *bufa, size_t offset, void *dest, + size_t len) +{ + struct ltt_chan_alloc *chana = bufa->chan; + size_t index; + ssize_t pagecpy, orig_len; + struct chanbuf_page *rpages; + + orig_len = len; + offset &= chana->buf_size - 1; + index = (offset & (chana->sb_size - 1)) >> PAGE_SHIFT; + if (unlikely(!len)) + return 0; + for (;;) { + pagecpy = min_t(size_t, len, PAGE_SIZE - (offset & ~PAGE_MASK)); + rpages = bufa->buf_rsb.pages; + WARN_ON_ONCE(RCHAN_SB_IS_NOREF(rpages)); + memcpy(dest, rpages[index].virt + (offset & ~PAGE_MASK), + pagecpy); + len -= pagecpy; + if (likely(!len)) + break; + dest += pagecpy; + offset += pagecpy; + index = (offset & (chana->sb_size - 1)) >> PAGE_SHIFT; + /* + * Underlying layer should never ask for reads across + * subbuffers. + */ + WARN_ON(offset >= chana->buf_size); + } + return orig_len; +} +EXPORT_SYMBOL_GPL(ltt_relay_read); + +/** + * ltt_relay_read_cstr - read a C-style string from ltt_relay_buffer. + * @bufa : buffer + * @offset : offset within the buffer + * @dest : destination address + * @len : destination's length + * + * return string's length + * Should be protected by get_subbuf/put_subbuf. + */ +int ltt_relay_read_cstr(struct ltt_chanbuf_alloc *bufa, size_t offset, + void *dest, size_t len) +{ + struct ltt_chan_alloc *chana = bufa->chan; + size_t index; + ssize_t pagecpy, pagelen, strpagelen, orig_offset; + char *str; + struct chanbuf_page *rpages; + + offset &= chana->buf_size - 1; + index = (offset & (chana->sb_size - 1)) >> PAGE_SHIFT; + orig_offset = offset; + for (;;) { + rpages = bufa->buf_rsb.pages; + WARN_ON_ONCE(RCHAN_SB_IS_NOREF(rpages)); + str = (char *)rpages[index].virt + (offset & ~PAGE_MASK); + pagelen = PAGE_SIZE - (offset & ~PAGE_MASK); + strpagelen = strnlen(str, pagelen); + if (len) { + pagecpy = min_t(size_t, len, strpagelen); + if (dest) { + memcpy(dest, str, pagecpy); + dest += pagecpy; + } + len -= pagecpy; + } + offset += strpagelen; + index = (offset & (chana->sb_size - 1)) >> PAGE_SHIFT; + if (strpagelen < pagelen) + break; + /* + * Underlying layer should never ask for reads across + * subbuffers. + */ + WARN_ON(offset >= chana->buf_size); + } + if (dest && len) + ((char *)dest)[0] = 0; + return offset - orig_offset; +} +EXPORT_SYMBOL_GPL(ltt_relay_read_cstr); + +/** + * ltt_relay_read_get_page - Get a whole page to read from + * @bufa : buffer + * @offset : offset within the buffer + * + * Should be protected by get_subbuf/put_subbuf. + */ +struct page *ltt_relay_read_get_page(struct ltt_chanbuf_alloc *bufa, + size_t offset) +{ + size_t index; + struct chanbuf_page *rpages; + struct ltt_chan_alloc *chana = bufa->chan; + + offset &= chana->buf_size - 1; + index = (offset & (chana->sb_size - 1)) >> PAGE_SHIFT; + rpages = bufa->buf_rsb.pages; + WARN_ON_ONCE(RCHAN_SB_IS_NOREF(rpages)); + return rpages[index].page; +} +EXPORT_SYMBOL_GPL(ltt_relay_read_get_page); + +/** + * ltt_relay_read_offset_address - get address of a location within the buffer + * @bufa : buffer + * @offset : offset within the buffer. + * + * Return the address where a given offset is located (for read). + * Should be used to get the current subbuffer header pointer. Given we know + * it's never on a page boundary, it's safe to write directly to this address, + * as long as the write is never bigger than a page size. + */ +void *ltt_relay_read_offset_address(struct ltt_chanbuf_alloc *bufa, + size_t offset) +{ + size_t index; + struct chanbuf_page *rpages; + struct ltt_chan_alloc *chana = bufa->chan; + + offset &= chana->buf_size - 1; + index = (offset & (chana->sb_size - 1)) >> PAGE_SHIFT; + rpages = bufa->buf_rsb.pages; + WARN_ON_ONCE(RCHAN_SB_IS_NOREF(rpages)); + return rpages[index].virt + (offset & ~PAGE_MASK); +} +EXPORT_SYMBOL_GPL(ltt_relay_read_offset_address); + +/** + * ltt_relay_offset_address - get address of a location within the buffer + * @bufa : buffer + * @offset : offset within the buffer. + * + * Return the address where a given offset is located. + * Should be used to get the current subbuffer header pointer. Given we know + * it's never on a page boundary, it's safe to write directly to this address, + * as long as the write is never bigger than a page size. + */ +void *ltt_relay_offset_address(struct ltt_chanbuf_alloc *bufa, size_t offset) +{ + size_t sbidx, index; + struct chanbuf_page *rpages; + struct ltt_chan_alloc *chana = bufa->chan; + + offset &= chana->buf_size - 1; + sbidx = offset >> chana->sb_size_order; + index = (offset & (chana->sb_size - 1)) >> PAGE_SHIFT; + rpages = bufa->buf_wsb[sbidx].pages; + WARN_ON_ONCE(RCHAN_SB_IS_NOREF(rpages)); + return rpages[index].virt + (offset & ~PAGE_MASK); +} +EXPORT_SYMBOL_GPL(ltt_relay_offset_address); + +static __init int ltt_relay_alloc_init(void) +{ + hotcpu_notifier(ltt_relay_hotcpu_callback, 5); + ltt_relay_init(); + return 0; +} + +static void __exit ltt_relay_alloc_exit(void) +{ + ltt_relay_exit(); +} + +module_init(ltt_relay_alloc_init); +module_exit(ltt_relay_alloc_exit); diff --git a/ltt-relay-lockless.c b/ltt-relay-lockless.c new file mode 100644 index 00000000..0c4c83c2 --- /dev/null +++ b/ltt-relay-lockless.c @@ -0,0 +1,1366 @@ +/* + * ltt/ltt-relay-lockless.c + * + * (C) Copyright 2005-2008 - Mathieu Desnoyers (mathieu.desnoyers@polymtl.ca) + * + * LTTng lockless buffer space management (reader/writer). + * + * Author: + * Mathieu Desnoyers (mathieu.desnoyers@polymtl.ca) + * + * Inspired from LTT : + * Karim Yaghmour (karim@opersys.com) + * Tom Zanussi (zanussi@us.ibm.com) + * Bob Wisniewski (bob@watson.ibm.com) + * And from K42 : + * Bob Wisniewski (bob@watson.ibm.com) + * + * Changelog: + * 08/10/08, Cleanup. + * 19/10/05, Complete lockless mechanism. + * 27/05/05, Modular redesign and rewrite. + * + * Userspace reader semantic : + * while (poll fd != POLLHUP) { + * - ioctl RELAY_GET_SUBBUF_SIZE + * while (1) { + * - ioctl GET_SUBBUF + * - splice 1 subbuffer worth of data to a pipe + * - splice the data from pipe to disk/network + * - ioctl PUT_SUBBUF, check error value + * if err val < 0, previous subbuffer was corrupted. + * } + * } + * + * Dual LGPL v2.1/GPL v2 license. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "ltt-tracer.h" +#include "ltt-relay.h" +#include "ltt-relay-lockless.h" + +#if 0 +#define printk_dbg(fmt, args...) printk(fmt, args) +#else +#define printk_dbg(fmt, args...) +#endif + +struct ltt_reserve_switch_offsets { + long begin, end, old; + long begin_switch, end_switch_current, end_switch_old; + size_t before_hdr_pad, size; +}; + +static +void ltt_force_switch(struct ltt_chanbuf *buf, enum force_switch_mode mode); + +static +void ltt_relay_print_buffer_errors(struct ltt_chan *chan, unsigned int cpu); + +static const struct file_operations ltt_file_operations; + +static +void ltt_buffer_begin(struct ltt_chanbuf *buf, u64 tsc, unsigned int subbuf_idx) +{ + struct ltt_chan *chan = container_of(buf->a.chan, struct ltt_chan, a); + struct ltt_subbuffer_header *header = + (struct ltt_subbuffer_header *) + ltt_relay_offset_address(&buf->a, + subbuf_idx * chan->a.sb_size); + + header->cycle_count_begin = tsc; + header->data_size = 0xFFFFFFFF; /* for debugging */ + ltt_write_trace_header(chan->a.trace, header); +} + +/* + * offset is assumed to never be 0 here : never deliver a completely empty + * subbuffer. The lost size is between 0 and subbuf_size-1. + */ +static +void ltt_buffer_end(struct ltt_chanbuf *buf, u64 tsc, unsigned int offset, + unsigned int subbuf_idx) +{ + struct ltt_chan *chan = container_of(buf->a.chan, struct ltt_chan, a); + struct ltt_subbuffer_header *header = + (struct ltt_subbuffer_header *) + ltt_relay_offset_address(&buf->a, + subbuf_idx * chan->a.sb_size); + u32 data_size = SUBBUF_OFFSET(offset - 1, chan) + 1; + + header->data_size = data_size; + header->sb_size = PAGE_ALIGN(data_size); + header->cycle_count_end = tsc; + header->events_lost = local_read(&buf->events_lost); + header->subbuf_corrupt = local_read(&buf->corrupted_subbuffers); +} + +/* + * Must be called under trace lock or cpu hotplug protection. + */ +void ltt_chanbuf_free(struct ltt_chanbuf *buf) +{ + struct ltt_chan *chan = container_of(buf->a.chan, struct ltt_chan, a); + + ltt_relay_print_buffer_errors(chan, buf->a.cpu); +#ifdef CONFIG_LTT_VMCORE + kfree(buf->commit_seq); +#endif + kfree(buf->commit_count); + + ltt_chanbuf_alloc_free(&buf->a); +} + +/* + * Must be called under trace lock or cpu hotplug protection. + */ +int ltt_chanbuf_create(struct ltt_chanbuf *buf, struct ltt_chan_alloc *chana, + int cpu) +{ + struct ltt_chan *chan = container_of(chana, struct ltt_chan, a); + struct ltt_trace *trace = chana->trace; + unsigned int j, n_sb; + int ret; + + /* Test for cpu hotplug */ + if (buf->a.allocated) + return 0; + + ret = ltt_chanbuf_alloc_create(&buf->a, &chan->a, cpu); + if (ret) + return ret; + + buf->commit_count = + kzalloc_node(ALIGN(sizeof(*buf->commit_count) * chan->a.n_sb, + 1 << INTERNODE_CACHE_SHIFT), + GFP_KERNEL, cpu_to_node(cpu)); + if (!buf->commit_count) { + ret = -ENOMEM; + goto free_chanbuf; + } + +#ifdef CONFIG_LTT_VMCORE + buf->commit_seq = + kzalloc_node(ALIGN(sizeof(*buf->commit_seq) * chan->a.n_sb, + 1 << INTERNODE_CACHE_SHIFT), + GFP_KERNEL, cpu_to_node(cpu)); + if (!buf->commit_seq) { + kfree(buf->commit_count); + ret = -ENOMEM; + goto free_commit; + } +#endif + + local_set(&buf->offset, ltt_sb_header_size()); + atomic_long_set(&buf->consumed, 0); + atomic_long_set(&buf->active_readers, 0); + n_sb = chan->a.n_sb; + for (j = 0; j < n_sb; j++) { + local_set(&buf->commit_count[j].cc, 0); + local_set(&buf->commit_count[j].cc_sb, 0); + local_set(&buf->commit_count[j].events, 0); + } + init_waitqueue_head(&buf->write_wait); + init_waitqueue_head(&buf->read_wait); + spin_lock_init(&buf->full_lock); + + RCHAN_SB_CLEAR_NOREF(buf->a.buf_wsb[0].pages); + ltt_buffer_begin(buf, trace->start_tsc, 0); + /* atomic_add made on local variable on data that belongs to + * various CPUs : ok because tracing not started (for this cpu). */ + local_add(ltt_sb_header_size(), &buf->commit_count[0].cc); + + local_set(&buf->events_lost, 0); + local_set(&buf->corrupted_subbuffers, 0); + buf->finalized = 0; + + ret = ltt_chanbuf_create_file(chan->a.filename, chan->a.parent, + S_IRUSR, buf); + if (ret) + goto free_init; + + /* + * Ensure the buffer is ready before setting it to allocated. + * Used for cpu hotplug vs async wakeup. + */ + smp_wmb(); + buf->a.allocated = 1; + + return 0; + + /* Error handling */ +free_init: +#ifdef CONFIG_LTT_VMCORE + kfree(buf->commit_seq); +free_commit: +#endif + kfree(buf->commit_count); +free_chanbuf: + ltt_chanbuf_alloc_free(&buf->a); + return ret; +} + +void ltt_chan_remove_files(struct ltt_chan *chan) +{ + ltt_ascii_remove(chan); + ltt_chan_alloc_remove_files(&chan->a); +} +EXPORT_SYMBOL_GPL(ltt_chan_remove_files); + + +void ltt_chan_free(struct kref *kref) +{ + struct ltt_chan *chan = container_of(kref, struct ltt_chan, a.kref); + + ltt_chan_alloc_free(&chan->a); +} +EXPORT_SYMBOL_GPL(ltt_chan_free); + +/** + * ltt_chan_create - Create channel. + */ +int ltt_chan_create(const char *base_filename, + struct ltt_chan *chan, struct dentry *parent, + size_t sb_size, size_t n_sb, + int overwrite, struct ltt_trace *trace) +{ + int ret; + + chan->overwrite = overwrite; + + ret = ltt_chan_alloc_init(&chan->a, trace, base_filename, parent, + sb_size, n_sb, overwrite, overwrite); + if (ret) + goto error; + + chan->commit_count_mask = (~0UL >> chan->a.n_sb_order); + + ret = ltt_ascii_create(chan); + if (ret) + goto error_chan_alloc_free; + + return ret; + +error_chan_alloc_free: + ltt_chan_alloc_free(&chan->a); +error: + return ret; +} +EXPORT_SYMBOL_GPL(ltt_chan_create); + +int ltt_chanbuf_open_read(struct ltt_chanbuf *buf) +{ + kref_get(&buf->a.chan->kref); + if (!atomic_long_add_unless(&buf->active_readers, 1, 1)) { + kref_put(&buf->a.chan->kref, ltt_chan_free); + return -EBUSY; + } + + return 0; +} +EXPORT_SYMBOL_GPL(ltt_chanbuf_open_read); + +void ltt_chanbuf_release_read(struct ltt_chanbuf *buf) +{ + //ltt_relay_destroy_buffer(&buf->a.chan->a, buf->a.cpu); + WARN_ON(atomic_long_read(&buf->active_readers) != 1); + atomic_long_dec(&buf->active_readers); + kref_put(&buf->a.chan->kref, ltt_chan_free); +} +EXPORT_SYMBOL_GPL(ltt_chanbuf_release_read); + +/* + * Wake writers : + * + * This must be done after the trace is removed from the RCU list so that there + * are no stalled writers. + */ +static void ltt_relay_wake_writers(struct ltt_chanbuf *buf) +{ + + if (waitqueue_active(&buf->write_wait)) + wake_up_interruptible(&buf->write_wait); +} + +/* + * This function should not be called from NMI interrupt context + */ +static void ltt_buf_unfull(struct ltt_chanbuf *buf) +{ + ltt_relay_wake_writers(buf); +} + +/* + * Promote compiler barrier to a smp_mb(). + * For the specific LTTng case, this IPI call should be removed if the + * architecture does not reorder writes. This should eventually be provided by + * a separate architecture-specific infrastructure. + */ +static void remote_mb(void *info) +{ + smp_mb(); +} + +int ltt_chanbuf_get_subbuf(struct ltt_chanbuf *buf, unsigned long *consumed) +{ + struct ltt_chan *chan = container_of(buf->a.chan, struct ltt_chan, a); + long consumed_old, consumed_idx, commit_count, write_offset; + int ret; + + consumed_old = atomic_long_read(&buf->consumed); + consumed_idx = SUBBUF_INDEX(consumed_old, chan); + commit_count = local_read(&buf->commit_count[consumed_idx].cc_sb); + /* + * Make sure we read the commit count before reading the buffer + * data and the write offset. Correct consumed offset ordering + * wrt commit count is insured by the use of cmpxchg to update + * the consumed offset. + * smp_call_function_single can fail if the remote CPU is offline, + * this is OK because then there is no wmb to execute there. + * If our thread is executing on the same CPU as the on the buffers + * belongs to, we don't have to synchronize it at all. If we are + * migrated, the scheduler will take care of the memory barriers. + * Normally, smp_call_function_single() should ensure program order when + * executing the remote function, which implies that it surrounds the + * function execution with : + * smp_mb() + * send IPI + * csd_lock_wait + * recv IPI + * smp_mb() + * exec. function + * smp_mb() + * csd unlock + * smp_mb() + * + * However, smp_call_function_single() does not seem to clearly execute + * such barriers. It depends on spinlock semantic to provide the barrier + * before executing the IPI and, when busy-looping, csd_lock_wait only + * executes smp_mb() when it has to wait for the other CPU. + * + * I don't trust this code. Therefore, let's add the smp_mb() sequence + * required ourself, even if duplicated. It has no performance impact + * anyway. + * + * smp_mb() is needed because smp_rmb() and smp_wmb() only order read vs + * read and write vs write. They do not ensure core synchronization. We + * really have to ensure total order between the 3 barriers running on + * the 2 CPUs. + */ +#ifdef LTT_NO_IPI_BARRIER + /* + * Local rmb to match the remote wmb to read the commit count before the + * buffer data and the write offset. + */ + smp_rmb(); +#else + if (raw_smp_processor_id() != buf->a.cpu) { + smp_mb(); /* Total order with IPI handler smp_mb() */ + smp_call_function_single(buf->a.cpu, remote_mb, NULL, 1); + smp_mb(); /* Total order with IPI handler smp_mb() */ + } +#endif + write_offset = local_read(&buf->offset); + /* + * Check that the subbuffer we are trying to consume has been + * already fully committed. + */ + if (((commit_count - chan->a.sb_size) + & chan->commit_count_mask) + - (BUFFER_TRUNC(consumed_old, chan) + >> chan->a.n_sb_order) + != 0) { + return -EAGAIN; + } + /* + * Check that we are not about to read the same subbuffer in + * which the writer head is. + */ + if ((SUBBUF_TRUNC(write_offset, chan) + - SUBBUF_TRUNC(consumed_old, chan)) + == 0) { + return -EAGAIN; + } + + ret = update_read_sb_index(&buf->a, &chan->a, consumed_idx); + if (ret) + return ret; + + *consumed = consumed_old; + return 0; +} +EXPORT_SYMBOL_GPL(ltt_chanbuf_get_subbuf); + +int ltt_chanbuf_put_subbuf(struct ltt_chanbuf *buf, unsigned long consumed) +{ + struct ltt_chan *chan = container_of(buf->a.chan, struct ltt_chan, a); + long consumed_new, consumed_old; + + WARN_ON(atomic_long_read(&buf->active_readers) != 1); + + consumed_old = consumed; + consumed_new = SUBBUF_ALIGN(consumed_old, chan); + WARN_ON_ONCE(RCHAN_SB_IS_NOREF(buf->a.buf_rsb.pages)); + RCHAN_SB_SET_NOREF(buf->a.buf_rsb.pages); + + spin_lock(&buf->full_lock); + if (atomic_long_cmpxchg(&buf->consumed, consumed_old, consumed_new) + != consumed_old) { + /* We have been pushed by the writer. */ + spin_unlock(&buf->full_lock); + /* + * We exchanged the subbuffer pages. No corruption possible + * even if the writer did push us. No more -EIO possible. + */ + return 0; + } else { + /* tell the client that buffer is now unfull */ + int index; + long data; + index = SUBBUF_INDEX(consumed_old, chan); + data = BUFFER_OFFSET(consumed_old, chan); + ltt_buf_unfull(buf); + spin_unlock(&buf->full_lock); + } + return 0; +} +EXPORT_SYMBOL_GPL(ltt_chanbuf_put_subbuf); + +static void switch_buffer(unsigned long data) +{ + struct ltt_chanbuf *buf = (struct ltt_chanbuf *)data; + struct ltt_chan *chan = container_of(buf->a.chan, struct ltt_chan, a); + + /* + * Only flush buffers periodically if readers are active. + */ + if (atomic_long_read(&buf->active_readers)) + ltt_force_switch(buf, FORCE_ACTIVE); + + mod_timer_pinned(&buf->switch_timer, + jiffies + chan->switch_timer_interval); +} + +static void ltt_chanbuf_start_switch_timer(struct ltt_chanbuf *buf) +{ + struct ltt_chan *chan = container_of(buf->a.chan, struct ltt_chan, a); + + if (!chan->switch_timer_interval) + return; + + init_timer_deferrable(&buf->switch_timer); + buf->switch_timer.function = switch_buffer; + buf->switch_timer.expires = jiffies + chan->switch_timer_interval; + buf->switch_timer.data = (unsigned long)buf; + add_timer_on(&buf->switch_timer, buf->a.cpu); +} + +/* + * called with ltt traces lock held. + */ +void ltt_chan_start_switch_timer(struct ltt_chan *chan) +{ + int cpu; + + if (!chan->switch_timer_interval) + return; + + for_each_online_cpu(cpu) { + struct ltt_chanbuf *buf; + + buf = per_cpu_ptr(chan->a.buf, cpu); + ltt_chanbuf_start_switch_timer(buf); + } +} + +static void ltt_chanbuf_stop_switch_timer(struct ltt_chanbuf *buf) +{ + struct ltt_chan *chan = container_of(buf->a.chan, struct ltt_chan, a); + + if (!chan->switch_timer_interval) + return; + + del_timer_sync(&buf->switch_timer); +} + +/* + * called with ltt traces lock held. + */ +void ltt_chan_stop_switch_timer(struct ltt_chan *chan) +{ + int cpu; + + if (!chan->switch_timer_interval) + return; + + for_each_online_cpu(cpu) { + struct ltt_chanbuf *buf; + + buf = per_cpu_ptr(chan->a.buf, cpu); + ltt_chanbuf_stop_switch_timer(buf); + } +} + +static void ltt_chanbuf_idle_switch(struct ltt_chanbuf *buf) +{ + struct ltt_chan *chan = container_of(buf->a.chan, struct ltt_chan, a); + + if (chan->switch_timer_interval) + ltt_force_switch(buf, FORCE_ACTIVE); +} + +/* + * ltt_chanbuf_switch is called from a remote CPU to ensure that the buffers of + * a cpu which went down are flushed. Note that if we execute concurrently + * with trace allocation, a buffer might appear be unallocated (because it + * detects that the target CPU is offline). + */ +static void ltt_chanbuf_switch(struct ltt_chanbuf *buf) +{ + if (buf->a.allocated) + ltt_force_switch(buf, FORCE_ACTIVE); +} + +/** + * ltt_chanbuf_hotcpu_callback - CPU hotplug callback + * @nb: notifier block + * @action: hotplug action to take + * @hcpu: CPU number + * + * Returns the success/failure of the operation. (%NOTIFY_OK, %NOTIFY_BAD) + */ +static +int ltt_chanbuf_hotcpu_callback(struct notifier_block *nb, + unsigned long action, + void *hcpu) +{ + unsigned int cpu = (unsigned long)hcpu; + + switch (action) { + case CPU_DOWN_FAILED: + case CPU_DOWN_FAILED_FROZEN: + case CPU_ONLINE: + case CPU_ONLINE_FROZEN: + /* + * CPU hotplug lock protects trace lock from this callback. + */ + ltt_chan_for_each_channel(ltt_chanbuf_start_switch_timer, cpu); + return NOTIFY_OK; + + case CPU_DOWN_PREPARE: + case CPU_DOWN_PREPARE_FROZEN: + /* + * Performs an IPI to delete the timer locally on the target + * CPU. CPU hotplug lock protects trace lock from this + * callback. + */ + ltt_chan_for_each_channel(ltt_chanbuf_stop_switch_timer, cpu); + return NOTIFY_OK; + + case CPU_DEAD: + case CPU_DEAD_FROZEN: + /* + * Performing a buffer switch on a remote CPU. Performed by + * the CPU responsible for doing the hotunplug after the target + * CPU stopped running completely. Ensures that all data + * from that remote CPU is flushed. CPU hotplug lock protects + * trace lock from this callback. + */ + ltt_chan_for_each_channel(ltt_chanbuf_switch, cpu); + return NOTIFY_OK; + + default: + return NOTIFY_DONE; + } +} + +static int pm_idle_entry_callback(struct notifier_block *self, + unsigned long val, void *data) +{ + if (val == IDLE_START) { + rcu_read_lock_sched_notrace(); + ltt_chan_for_each_channel(ltt_chanbuf_idle_switch, + smp_processor_id()); + rcu_read_unlock_sched_notrace(); + } + return 0; +} + +struct notifier_block pm_idle_entry_notifier = { + .notifier_call = pm_idle_entry_callback, + .priority = ~0U, /* smallest prio, run after tracing events */ +}; + +static +void ltt_relay_print_written(struct ltt_chan *chan, long cons_off, + unsigned int cpu) +{ + struct ltt_chanbuf *buf = per_cpu_ptr(chan->a.buf, cpu); + long cons_idx, events_count; + + cons_idx = SUBBUF_INDEX(cons_off, chan); + events_count = local_read(&buf->commit_count[cons_idx].events); + + if (events_count) + printk(KERN_INFO + "LTT: %lu events written in channel %s " + "(cpu %u, index %lu)\n", + events_count, chan->a.filename, cpu, cons_idx); +} + +static +void ltt_relay_print_subbuffer_errors(struct ltt_chanbuf *buf, + struct ltt_chan *chan, long cons_off, + unsigned int cpu) +{ + long cons_idx, commit_count, commit_count_sb, write_offset; + + cons_idx = SUBBUF_INDEX(cons_off, chan); + commit_count = local_read(&buf->commit_count[cons_idx].cc); + commit_count_sb = local_read(&buf->commit_count[cons_idx].cc_sb); + /* + * No need to order commit_count and write_offset reads because we + * execute after trace is stopped when there are no readers left. + */ + write_offset = local_read(&buf->offset); + printk(KERN_WARNING + "LTT : unread channel %s offset is %ld " + "and cons_off : %ld (cpu %u)\n", + chan->a.filename, write_offset, cons_off, cpu); + /* Check each sub-buffer for non filled commit count */ + if (((commit_count - chan->a.sb_size) & chan->commit_count_mask) + - (BUFFER_TRUNC(cons_off, chan) >> chan->a.n_sb_order) + != 0) + printk(KERN_ALERT + "LTT : %s : subbuffer %lu has non filled " + "commit count [cc, cc_sb] [%lu,%lu].\n", + chan->a.filename, cons_idx, commit_count, + commit_count_sb); + printk(KERN_ALERT "LTT : %s : commit count : %lu, subbuf size %lu\n", + chan->a.filename, commit_count, chan->a.sb_size); +} + +static +void ltt_relay_print_errors(struct ltt_chanbuf *buf, struct ltt_chan *chan, + struct ltt_trace *trace, int cpu) +{ + long cons_off; + + /* + * Can be called in the error path of allocation when + * trans_channel_data is not yet set. + */ + if (!chan) + return; + for (cons_off = 0; cons_off < chan->a.buf_size; + cons_off = SUBBUF_ALIGN(cons_off, chan)) + ltt_relay_print_written(chan, cons_off, cpu); + for (cons_off = atomic_long_read(&buf->consumed); + (SUBBUF_TRUNC(local_read(&buf->offset), chan) + - cons_off) > 0; + cons_off = SUBBUF_ALIGN(cons_off, chan)) + ltt_relay_print_subbuffer_errors(buf, chan, cons_off, cpu); +} + +static +void ltt_relay_print_buffer_errors(struct ltt_chan *chan, unsigned int cpu) +{ + struct ltt_trace *trace = chan->a.trace; + struct ltt_chanbuf *buf = per_cpu_ptr(chan->a.buf, cpu); + + if (local_read(&buf->events_lost)) + printk(KERN_ALERT + "LTT : %s : %ld events lost " + "in %s channel (cpu %u).\n", + chan->a.filename, local_read(&buf->events_lost), + chan->a.filename, cpu); + if (local_read(&buf->corrupted_subbuffers)) + printk(KERN_ALERT + "LTT : %s : %ld corrupted subbuffers " + "in %s channel (cpu %u).\n", + chan->a.filename, + local_read(&buf->corrupted_subbuffers), + chan->a.filename, cpu); + + ltt_relay_print_errors(buf, chan, trace, cpu); +} + +static void ltt_relay_remove_dirs(struct ltt_trace *trace) +{ + ltt_ascii_remove_dir(trace); + debugfs_remove(trace->dentry.trace_root); +} + +static int ltt_relay_create_dirs(struct ltt_trace *new_trace) +{ + struct dentry *ltt_root_dentry; + int ret; + + ltt_root_dentry = get_ltt_root(); + if (!ltt_root_dentry) + return ENOENT; + + new_trace->dentry.trace_root = debugfs_create_dir(new_trace->trace_name, + ltt_root_dentry); + put_ltt_root(); + if (new_trace->dentry.trace_root == NULL) { + printk(KERN_ERR "LTT : Trace directory name %s already taken\n", + new_trace->trace_name); + return EEXIST; + } + ret = ltt_ascii_create_dir(new_trace); + if (ret) + printk(KERN_WARNING "LTT : Unable to create ascii output file " + "for trace %s\n", new_trace->trace_name); + + return 0; +} + +/* + * LTTng channel flush function. + * + * Must be called when no tracing is active in the channel, because of + * accesses across CPUs. + */ +static notrace void ltt_relay_buffer_flush(struct ltt_chanbuf *buf) +{ + buf->finalized = 1; + ltt_force_switch(buf, FORCE_FLUSH); +} + +static void ltt_relay_async_wakeup_chan(struct ltt_chan *chan) +{ + unsigned int i; + + for_each_possible_cpu(i) { + struct ltt_chanbuf *buf; + + buf = per_cpu_ptr(chan->a.buf, i); + if (!buf->a.allocated) + continue; + /* + * Ensure the buffer has been allocated before reading its + * content. Sync cpu hotplug vs async wakeup. + */ + smp_rmb(); + if (ltt_poll_deliver(buf, chan)) + wake_up_interruptible(&buf->read_wait); + } +} + +static void ltt_relay_finish_buffer(struct ltt_chan *chan, unsigned int cpu) +{ + struct ltt_chanbuf *buf = per_cpu_ptr(chan->a.buf, cpu); + + if (buf->a.allocated) { + ltt_relay_buffer_flush(buf); + ltt_relay_wake_writers(buf); + } +} + + +static void ltt_relay_finish_channel(struct ltt_chan *chan) +{ + unsigned int i; + + for_each_possible_cpu(i) + ltt_relay_finish_buffer(chan, i); +} + +/* + * This is called with preemption disabled when user space has requested + * blocking mode. If one of the active traces has free space below a + * specific threshold value, we reenable preemption and block. + */ +static +int ltt_relay_user_blocking(struct ltt_trace *trace, unsigned int chan_index, + size_t data_size, struct user_dbg_data *dbg) +{ + struct ltt_chanbuf *buf; + struct ltt_chan *chan; + int cpu; + DECLARE_WAITQUEUE(wait, current); + + chan = &trace->channels[chan_index]; + cpu = smp_processor_id(); + buf = per_cpu_ptr(chan->a.buf, cpu); + + /* + * Check if data is too big for the channel : do not + * block for it. + */ + if (LTT_RESERVE_CRITICAL + data_size > chan->a.sb_size) + return 0; + + /* + * If free space too low, we block. We restart from the + * beginning after we resume (cpu id may have changed + * while preemption is active). + */ + spin_lock(&buf->full_lock); + if (!chan->overwrite) { + dbg->write = local_read(&buf->offset); + dbg->read = atomic_long_read(&buf->consumed); + dbg->avail_size = dbg->write + LTT_RESERVE_CRITICAL + data_size + - SUBBUF_TRUNC(dbg->read, chan); + if (dbg->avail_size > chan->a.buf_size) { + __set_current_state(TASK_INTERRUPTIBLE); + add_wait_queue(&buf->write_wait, &wait); + spin_unlock(&buf->full_lock); + preempt_enable(); + schedule(); + __set_current_state(TASK_RUNNING); + remove_wait_queue(&buf->write_wait, &wait); + if (signal_pending(current)) + return -ERESTARTSYS; + preempt_disable(); + return 1; + } + } + spin_unlock(&buf->full_lock); + return 0; +} + +static +void ltt_relay_print_user_errors(struct ltt_trace *trace, + unsigned int chan_index, size_t data_size, + struct user_dbg_data *dbg, int cpu) +{ + struct ltt_chanbuf *buf; + struct ltt_chan *chan; + + chan = &trace->channels[chan_index]; + buf = per_cpu_ptr(chan->a.buf, cpu); + + printk(KERN_ERR "Error in LTT usertrace : " + "buffer full : event lost in blocking " + "mode. Increase LTT_RESERVE_CRITICAL.\n"); + printk(KERN_ERR "LTT nesting level is %u.\n", + per_cpu(ltt_nesting, cpu)); + printk(KERN_ERR "LTT available size %lu.\n", + dbg->avail_size); + printk(KERN_ERR "available write : %lu, read : %lu\n", + dbg->write, dbg->read); + + dbg->write = local_read(&buf->offset); + dbg->read = atomic_long_read(&buf->consumed); + + printk(KERN_ERR "LTT current size %lu.\n", + dbg->write + LTT_RESERVE_CRITICAL + data_size + - SUBBUF_TRUNC(dbg->read, chan)); + printk(KERN_ERR "current write : %lu, read : %lu\n", + dbg->write, dbg->read); +} + +/* + * ltt_reserve_switch_old_subbuf: switch old subbuffer + * + * Concurrency safe because we are the last and only thread to alter this + * sub-buffer. As long as it is not delivered and read, no other thread can + * alter the offset, alter the reserve_count or call the + * client_buffer_end_callback on this sub-buffer. + * + * The only remaining threads could be the ones with pending commits. They will + * have to do the deliver themselves. Not concurrency safe in overwrite mode. + * We detect corrupted subbuffers with commit and reserve counts. We keep a + * corrupted sub-buffers count and push the readers across these sub-buffers. + * + * Not concurrency safe if a writer is stalled in a subbuffer and another writer + * switches in, finding out it's corrupted. The result will be than the old + * (uncommited) subbuffer will be declared corrupted, and that the new subbuffer + * will be declared corrupted too because of the commit count adjustment. + * + * Note : offset_old should never be 0 here. + */ +static +void ltt_reserve_switch_old_subbuf(struct ltt_chanbuf *buf, + struct ltt_chan *chan, + struct ltt_reserve_switch_offsets *offsets, + u64 *tsc) +{ + long oldidx = SUBBUF_INDEX(offsets->old - 1, chan); + long commit_count, padding_size; + + padding_size = chan->a.sb_size + - (SUBBUF_OFFSET(offsets->old - 1, chan) + 1); + ltt_buffer_end(buf, *tsc, offsets->old, oldidx); + + /* + * Must write slot data before incrementing commit count. + * This compiler barrier is upgraded into a smp_wmb() by the IPI + * sent by get_subbuf() when it does its smp_rmb(). + */ + barrier(); + local_add(padding_size, &buf->commit_count[oldidx].cc); + commit_count = local_read(&buf->commit_count[oldidx].cc); + ltt_check_deliver(buf, chan, offsets->old - 1, commit_count, oldidx); + ltt_write_commit_counter(buf, chan, oldidx, offsets->old, commit_count, + padding_size); +} + +/* + * ltt_reserve_switch_new_subbuf: Populate new subbuffer. + * + * This code can be executed unordered : writers may already have written to the + * sub-buffer before this code gets executed, caution. The commit makes sure + * that this code is executed before the deliver of this sub-buffer. + */ +static +void ltt_reserve_switch_new_subbuf(struct ltt_chanbuf *buf, + struct ltt_chan *chan, + struct ltt_reserve_switch_offsets *offsets, + u64 *tsc) +{ + long beginidx = SUBBUF_INDEX(offsets->begin, chan); + long commit_count; + + ltt_buffer_begin(buf, *tsc, beginidx); + + /* + * Must write slot data before incrementing commit count. + * This compiler barrier is upgraded into a smp_wmb() by the IPI + * sent by get_subbuf() when it does its smp_rmb(). + */ + barrier(); + local_add(ltt_sb_header_size(), &buf->commit_count[beginidx].cc); + commit_count = local_read(&buf->commit_count[beginidx].cc); + /* Check if the written buffer has to be delivered */ + ltt_check_deliver(buf, chan, offsets->begin, commit_count, beginidx); + ltt_write_commit_counter(buf, chan, beginidx, offsets->begin, + commit_count, ltt_sb_header_size()); +} + + +/* + * ltt_reserve_end_switch_current: finish switching current subbuffer + * + * Concurrency safe because we are the last and only thread to alter this + * sub-buffer. As long as it is not delivered and read, no other thread can + * alter the offset, alter the reserve_count or call the + * client_buffer_end_callback on this sub-buffer. + * + * The only remaining threads could be the ones with pending commits. They will + * have to do the deliver themselves. Not concurrency safe in overwrite mode. + * We detect corrupted subbuffers with commit and reserve counts. We keep a + * corrupted sub-buffers count and push the readers across these sub-buffers. + * + * Not concurrency safe if a writer is stalled in a subbuffer and another writer + * switches in, finding out it's corrupted. The result will be than the old + * (uncommited) subbuffer will be declared corrupted, and that the new subbuffer + * will be declared corrupted too because of the commit count adjustment. + */ +static +void ltt_reserve_end_switch_current(struct ltt_chanbuf *buf, + struct ltt_chan *chan, + struct ltt_reserve_switch_offsets *offsets, + u64 *tsc) +{ + long endidx = SUBBUF_INDEX(offsets->end - 1, chan); + long commit_count, padding_size; + + padding_size = chan->a.sb_size + - (SUBBUF_OFFSET(offsets->end - 1, chan) + 1); + + ltt_buffer_end(buf, *tsc, offsets->end, endidx); + + /* + * Must write slot data before incrementing commit count. + * This compiler barrier is upgraded into a smp_wmb() by the IPI + * sent by get_subbuf() when it does its smp_rmb(). + */ + barrier(); + local_add(padding_size, &buf->commit_count[endidx].cc); + commit_count = local_read(&buf->commit_count[endidx].cc); + ltt_check_deliver(buf, chan, offsets->end - 1, commit_count, endidx); + ltt_write_commit_counter(buf, chan, endidx, offsets->end, commit_count, + padding_size); +} + +/* + * Returns : + * 0 if ok + * !0 if execution must be aborted. + */ +static +int ltt_relay_try_switch_slow(enum force_switch_mode mode, + struct ltt_chanbuf *buf, struct ltt_chan *chan, + struct ltt_reserve_switch_offsets *offsets, + u64 *tsc) +{ + long sb_index; + long reserve_commit_diff; + long off; + + offsets->begin = local_read(&buf->offset); + offsets->old = offsets->begin; + offsets->begin_switch = 0; + offsets->end_switch_old = 0; + + *tsc = trace_clock_read64(); + + off = SUBBUF_OFFSET(offsets->begin, chan); + if ((mode != FORCE_ACTIVE && off > 0) || off > ltt_sb_header_size()) { + offsets->begin = SUBBUF_ALIGN(offsets->begin, chan); + offsets->end_switch_old = 1; + } else { + /* we do not have to switch : buffer is empty */ + return -1; + } + if (mode == FORCE_ACTIVE) + offsets->begin += ltt_sb_header_size(); + /* + * Always begin_switch in FORCE_ACTIVE mode. + * Test new buffer integrity + */ + sb_index = SUBBUF_INDEX(offsets->begin, chan); + reserve_commit_diff = + (BUFFER_TRUNC(offsets->begin, chan) + >> chan->a.n_sb_order) + - (local_read(&buf->commit_count[sb_index].cc_sb) + & chan->commit_count_mask); + if (reserve_commit_diff == 0) { + /* Next buffer not corrupted. */ + if (mode == FORCE_ACTIVE + && !chan->overwrite + && offsets->begin - atomic_long_read(&buf->consumed) + >= chan->a.buf_size) { + /* + * We do not overwrite non consumed buffers and we are + * full : ignore switch while tracing is active. + */ + return -1; + } + } else { + /* + * Next subbuffer corrupted. Force pushing reader even in normal + * mode + */ + } + offsets->end = offsets->begin; + return 0; +} + +/* + * Force a sub-buffer switch for a per-cpu buffer. This operation is + * completely reentrant : can be called while tracing is active with + * absolutely no lock held. + * + * Note, however, that as a local_cmpxchg is used for some atomic + * operations, this function must be called from the CPU which owns the buffer + * for a ACTIVE flush. + */ +void ltt_force_switch_lockless_slow(struct ltt_chanbuf *buf, + enum force_switch_mode mode) +{ + struct ltt_chan *chan = container_of(buf->a.chan, struct ltt_chan, a); + struct ltt_reserve_switch_offsets offsets; + u64 tsc; + + offsets.size = 0; + + /* + * Perform retryable operations. + */ + do { + if (ltt_relay_try_switch_slow(mode, buf, chan, &offsets, &tsc)) + return; + } while (local_cmpxchg(&buf->offset, offsets.old, offsets.end) + != offsets.old); + + /* + * Atomically update last_tsc. This update races against concurrent + * atomic updates, but the race will always cause supplementary full TSC + * events, never the opposite (missing a full TSC event when it would be + * needed). + */ + save_last_tsc(buf, tsc); + + /* + * Push the reader if necessary + */ + if (mode == FORCE_ACTIVE) { + ltt_reserve_push_reader(buf, chan, offsets.end - 1); + ltt_clear_noref_flag(&buf->a, SUBBUF_INDEX(offsets.end - 1, + chan)); + } + + /* + * Switch old subbuffer if needed. + */ + if (offsets.end_switch_old) { + ltt_clear_noref_flag(&buf->a, SUBBUF_INDEX(offsets.old - 1, + chan)); + ltt_reserve_switch_old_subbuf(buf, chan, &offsets, &tsc); + } + + /* + * Populate new subbuffer. + */ + if (mode == FORCE_ACTIVE) + ltt_reserve_switch_new_subbuf(buf, chan, &offsets, &tsc); +} +EXPORT_SYMBOL_GPL(ltt_force_switch_lockless_slow); + +/* + * Returns : + * 0 if ok + * !0 if execution must be aborted. + */ +static +int ltt_relay_try_reserve_slow(struct ltt_chanbuf *buf, struct ltt_chan *chan, + struct ltt_reserve_switch_offsets *offsets, + size_t data_size, u64 *tsc, unsigned int *rflags, + int largest_align) +{ + long reserve_commit_diff; + + offsets->begin = local_read(&buf->offset); + offsets->old = offsets->begin; + offsets->begin_switch = 0; + offsets->end_switch_current = 0; + offsets->end_switch_old = 0; + + *tsc = trace_clock_read64(); + if (last_tsc_overflow(buf, *tsc)) + *rflags = LTT_RFLAG_ID_SIZE_TSC; + + if (unlikely(SUBBUF_OFFSET(offsets->begin, chan) == 0)) { + offsets->begin_switch = 1; /* For offsets->begin */ + } else { + offsets->size = ltt_get_header_size(chan, offsets->begin, + data_size, + &offsets->before_hdr_pad, + *rflags); + offsets->size += ltt_align(offsets->begin + offsets->size, + largest_align) + + data_size; + if (unlikely((SUBBUF_OFFSET(offsets->begin, chan) + + offsets->size) > chan->a.sb_size)) { + offsets->end_switch_old = 1; /* For offsets->old */ + offsets->begin_switch = 1; /* For offsets->begin */ + } + } + if (unlikely(offsets->begin_switch)) { + long sb_index; + + /* + * We are typically not filling the previous buffer completely. + */ + if (likely(offsets->end_switch_old)) + offsets->begin = SUBBUF_ALIGN(offsets->begin, chan); + offsets->begin = offsets->begin + ltt_sb_header_size(); + /* Test new buffer integrity */ + sb_index = SUBBUF_INDEX(offsets->begin, chan); + reserve_commit_diff = + (BUFFER_TRUNC(offsets->begin, chan) + >> chan->a.n_sb_order) + - (local_read(&buf->commit_count[sb_index].cc_sb) + & chan->commit_count_mask); + if (likely(reserve_commit_diff == 0)) { + /* Next buffer not corrupted. */ + if (unlikely(!chan->overwrite && + (SUBBUF_TRUNC(offsets->begin, chan) + - SUBBUF_TRUNC(atomic_long_read(&buf->consumed), + chan)) + >= chan->a.buf_size)) { + /* + * We do not overwrite non consumed buffers + * and we are full : event is lost. + */ + local_inc(&buf->events_lost); + return -1; + } else { + /* + * next buffer not corrupted, we are either in + * overwrite mode or the buffer is not full. + * It's safe to write in this new subbuffer. + */ + } + } else { + /* + * Next subbuffer corrupted. Drop event in normal and + * overwrite mode. Caused by either a writer OOPS or + * too many nested writes over a reserve/commit pair. + */ + local_inc(&buf->events_lost); + return -1; + } + offsets->size = ltt_get_header_size(chan, offsets->begin, + data_size, + &offsets->before_hdr_pad, + *rflags); + offsets->size += ltt_align(offsets->begin + offsets->size, + largest_align) + + data_size; + if (unlikely((SUBBUF_OFFSET(offsets->begin, chan) + + offsets->size) > chan->a.sb_size)) { + /* + * Event too big for subbuffers, report error, don't + * complete the sub-buffer switch. + */ + local_inc(&buf->events_lost); + return -1; + } else { + /* + * We just made a successful buffer switch and the event + * fits in the new subbuffer. Let's write. + */ + } + } else { + /* + * Event fits in the current buffer and we are not on a switch + * boundary. It's safe to write. + */ + } + offsets->end = offsets->begin + offsets->size; + + if (unlikely((SUBBUF_OFFSET(offsets->end, chan)) == 0)) { + /* + * The offset_end will fall at the very beginning of the next + * subbuffer. + */ + offsets->end_switch_current = 1; /* For offsets->begin */ + } + return 0; +} + +/** + * ltt_relay_reserve_slot_lockless_slow - Atomic slot reservation in a buffer. + * @trace: the trace structure to log to. + * @ltt_channel: channel structure + * @transport_data: data structure specific to ltt relay + * @data_size: size of the variable length data to log. + * @slot_size: pointer to total size of the slot (out) + * @buf_offset : pointer to reserved buffer offset (out) + * @tsc: pointer to the tsc at the slot reservation (out) + * @cpu: cpuid + * + * Return : -ENOSPC if not enough space, else returns 0. + * It will take care of sub-buffer switching. + */ +int ltt_reserve_slot_lockless_slow(struct ltt_chan *chan, + struct ltt_trace *trace, size_t data_size, + int largest_align, int cpu, + struct ltt_chanbuf **ret_buf, + size_t *slot_size, long *buf_offset, + u64 *tsc, unsigned int *rflags) +{ + struct ltt_chanbuf *buf = *ret_buf = per_cpu_ptr(chan->a.buf, cpu); + struct ltt_reserve_switch_offsets offsets; + + offsets.size = 0; + + do { + if (unlikely(ltt_relay_try_reserve_slow(buf, chan, &offsets, + data_size, tsc, rflags, + largest_align))) + return -ENOSPC; + } while (unlikely(local_cmpxchg(&buf->offset, offsets.old, offsets.end) + != offsets.old)); + + /* + * Atomically update last_tsc. This update races against concurrent + * atomic updates, but the race will always cause supplementary full TSC + * events, never the opposite (missing a full TSC event when it would be + * needed). + */ + save_last_tsc(buf, *tsc); + + /* + * Push the reader if necessary + */ + ltt_reserve_push_reader(buf, chan, offsets.end - 1); + + /* + * Clear noref flag for this subbuffer. + */ + ltt_clear_noref_flag(&buf->a, SUBBUF_INDEX(offsets.end - 1, chan)); + + /* + * Switch old subbuffer if needed. + */ + if (unlikely(offsets.end_switch_old)) { + ltt_clear_noref_flag(&buf->a, SUBBUF_INDEX(offsets.old - 1, + chan)); + ltt_reserve_switch_old_subbuf(buf, chan, &offsets, tsc); + } + + /* + * Populate new subbuffer. + */ + if (unlikely(offsets.begin_switch)) + ltt_reserve_switch_new_subbuf(buf, chan, &offsets, tsc); + + if (unlikely(offsets.end_switch_current)) + ltt_reserve_end_switch_current(buf, chan, &offsets, tsc); + + *slot_size = offsets.size; + *buf_offset = offsets.begin + offsets.before_hdr_pad; + return 0; +} +EXPORT_SYMBOL_GPL(ltt_reserve_slot_lockless_slow); + +static struct ltt_transport ltt_relay_transport = { + .name = "relay", + .owner = THIS_MODULE, + .ops = { + .create_dirs = ltt_relay_create_dirs, + .remove_dirs = ltt_relay_remove_dirs, + .create_channel = ltt_chan_create, + .finish_channel = ltt_relay_finish_channel, + .remove_channel = ltt_chan_free, + .remove_channel_files = ltt_chan_remove_files, + .wakeup_channel = ltt_relay_async_wakeup_chan, + .user_blocking = ltt_relay_user_blocking, + .user_errors = ltt_relay_print_user_errors, + .start_switch_timer = ltt_chan_start_switch_timer, + .stop_switch_timer = ltt_chan_stop_switch_timer, + }, +}; + +static struct notifier_block fn_ltt_chanbuf_hotcpu_callback = { + .notifier_call = ltt_chanbuf_hotcpu_callback, + .priority = 6, +}; + +int __init ltt_relay_init(void) +{ + printk(KERN_INFO "LTT : ltt-relay init\n"); + + ltt_transport_register(<t_relay_transport); + register_cpu_notifier(&fn_ltt_chanbuf_hotcpu_callback); + register_idle_notifier(&pm_idle_entry_notifier); + + return 0; +} + +void __exit ltt_relay_exit(void) +{ + printk(KERN_INFO "LTT : ltt-relay exit\n"); + + unregister_idle_notifier(&pm_idle_entry_notifier); + unregister_cpu_notifier(&fn_ltt_chanbuf_hotcpu_callback); + ltt_transport_unregister(<t_relay_transport); +} + +MODULE_LICENSE("GPL and additional rights"); +MODULE_AUTHOR("Mathieu Desnoyers"); +MODULE_DESCRIPTION("Linux Trace Toolkit Next Generation Lockless Relay"); diff --git a/ltt-relay-lockless.h b/ltt-relay-lockless.h new file mode 100644 index 00000000..62fc5159 --- /dev/null +++ b/ltt-relay-lockless.h @@ -0,0 +1,549 @@ +#ifndef _LTT_LTT_RELAY_LOCKLESS_H +#define _LTT_LTT_RELAY_LOCKLESS_H + +/* + * ltt/ltt-relay-lockless.h + * + * (C) Copyright 2005-2008 - Mathieu Desnoyers (mathieu.desnoyers@polymtl.ca) + * + * LTTng lockless buffer space management (reader/writer). + * + * Author: + * Mathieu Desnoyers (mathieu.desnoyers@polymtl.ca) + * + * Inspired from LTT : + * Karim Yaghmour (karim@opersys.com) + * Tom Zanussi (zanussi@us.ibm.com) + * Bob Wisniewski (bob@watson.ibm.com) + * And from K42 : + * Bob Wisniewski (bob@watson.ibm.com) + * + * Changelog: + * 08/10/08, Cleanup. + * 19/10/05, Complete lockless mechanism. + * 27/05/05, Modular redesign and rewrite. + * + * Userspace reader semantic : + * while (poll fd != POLLHUP) { + * - ioctl RELAY_GET_SUBBUF_SIZE + * while (1) { + * - ioctl GET_SUBBUF + * - splice 1 subbuffer worth of data to a pipe + * - splice the data from pipe to disk/network + * - ioctl PUT_SUBBUF, check error value + * if err val < 0, previous subbuffer was corrupted. + * } + * } + * + * Dual LGPL v2.1/GPL v2 license. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "ltt-tracer.h" +#include "ltt-relay.h" + +#if 0 +#define printk_dbg(fmt, args...) printk(fmt, args) +#else +#define printk_dbg(fmt, args...) +#endif + +struct commit_counters { + local_t cc; + local_t cc_sb; /* Incremented _once_ at sb switch */ + local_t events; /* Event count */ +}; + +/* LTTng lockless logging buffer info */ +struct ltt_chanbuf { + struct ltt_chanbuf_alloc a; /* Parent. First field. */ + /* First 32 bytes cache-hot cacheline */ + local_t offset; /* Current offset in the buffer */ + struct commit_counters *commit_count; + /* Commit count per sub-buffer */ + atomic_long_t consumed; /* + * Current offset in the buffer + * standard atomic access (shared) + */ + unsigned long last_tsc; /* + * Last timestamp written in the buffer. + */ + /* End of first 32 bytes cacheline */ +#ifdef CONFIG_LTT_VMCORE + local_t *commit_seq; /* Consecutive commits */ +#endif + atomic_long_t active_readers; /* + * Active readers count + * standard atomic access (shared) + */ + local_t events_lost; + local_t corrupted_subbuffers; + spinlock_t full_lock; /* + * buffer full condition spinlock, only + * for userspace tracing blocking mode + * synchronization with reader. + */ + wait_queue_head_t write_wait; /* + * Wait queue for blocking user space + * writers + */ + wait_queue_head_t read_wait; /* reader wait queue */ + unsigned int finalized; /* buffer has been finalized */ + struct timer_list switch_timer; /* timer for periodical switch */ +}; + +/* + * A switch is done during tracing or as a final flush after tracing (so it + * won't write in the new sub-buffer). + */ +enum force_switch_mode { FORCE_ACTIVE, FORCE_FLUSH }; + +extern +int ltt_reserve_slot_lockless_slow(struct ltt_chan *chan, + struct ltt_trace *trace, size_t data_size, + int largest_align, int cpu, + struct ltt_chanbuf **ret_buf, + size_t *slot_size, long *buf_offset, + u64 *tsc, unsigned int *rflags); + +extern void ltt_force_switch_lockless_slow(struct ltt_chanbuf *buf, + enum force_switch_mode mode); + +/* + * Last TSC comparison functions. Check if the current TSC overflows + * LTT_TSC_BITS bits from the last TSC read. Reads and writes last_tsc + * atomically. + */ + +#if (BITS_PER_LONG == 32) +static __inline__ void save_last_tsc(struct ltt_chanbuf *buf, u64 tsc) +{ + buf->last_tsc = (unsigned long)(tsc >> LTT_TSC_BITS); +} + +static __inline__ int last_tsc_overflow(struct ltt_chanbuf *buf, u64 tsc) +{ + unsigned long tsc_shifted = (unsigned long)(tsc >> LTT_TSC_BITS); + + if (unlikely((tsc_shifted - buf->last_tsc))) + return 1; + else + return 0; +} +#else +static __inline__ void save_last_tsc(struct ltt_chanbuf *buf, u64 tsc) +{ + buf->last_tsc = (unsigned long)tsc; +} + +static __inline__ int last_tsc_overflow(struct ltt_chanbuf *buf, u64 tsc) +{ + if (unlikely((tsc - buf->last_tsc) >> LTT_TSC_BITS)) + return 1; + else + return 0; +} +#endif + +extern +int ltt_chanbuf_create(struct ltt_chanbuf *buf, struct ltt_chan_alloc *chana, + int cpu); +extern void ltt_chanbuf_free(struct ltt_chanbuf *buf); +extern int ltt_chan_create(const char *base_filename, struct ltt_chan *chan, + struct dentry *parent, size_t sb_size, size_t n_sb, + int overwrite, struct ltt_trace *trace); +extern void ltt_chan_free(struct kref *kref); +extern void ltt_chan_remove_files(struct ltt_chan *chan); + +/* Buffer access operations */ + +extern int ltt_chanbuf_open_read(struct ltt_chanbuf *buf); +extern void ltt_chanbuf_release_read(struct ltt_chanbuf *buf); +extern int ltt_chanbuf_get_subbuf(struct ltt_chanbuf *buf, + unsigned long *consumed); +extern int ltt_chanbuf_put_subbuf(struct ltt_chanbuf *buf, + unsigned long consumed); +extern void ltt_chan_start_switch_timer(struct ltt_chan *chan); +extern void ltt_chan_stop_switch_timer(struct ltt_chan *chan); + +extern int ltt_relay_init(void); +extern void ltt_relay_exit(void); + +static __inline__ +unsigned long ltt_chanbuf_get_offset(struct ltt_chanbuf *buf) +{ + return local_read(&buf->offset); +} + +static __inline__ +unsigned long ltt_chanbuf_get_consumed(struct ltt_chanbuf *buf) +{ + return atomic_long_read(&buf->consumed); +} + +static __inline__ +int ltt_chanbuf_is_finalized(struct ltt_chanbuf *buf) +{ + return buf->finalized; +} + +static __inline__ +void ltt_reserve_push_reader(struct ltt_chanbuf *buf, struct ltt_chan *chan, + long offset) +{ + long consumed_old, consumed_new; + + do { + consumed_old = atomic_long_read(&buf->consumed); + /* + * If buffer is in overwrite mode, push the reader consumed + * count if the write position has reached it and we are not + * at the first iteration (don't push the reader farther than + * the writer). This operation can be done concurrently by many + * writers in the same buffer, the writer being at the farthest + * write position sub-buffer index in the buffer being the one + * which will win this loop. + * If the buffer is not in overwrite mode, pushing the reader + * only happens if a sub-buffer is corrupted. + */ + if (unlikely((SUBBUF_TRUNC(offset, chan) + - SUBBUF_TRUNC(consumed_old, chan)) + >= chan->a.buf_size)) + consumed_new = SUBBUF_ALIGN(consumed_old, chan); + else + return; + } while (unlikely(atomic_long_cmpxchg(&buf->consumed, consumed_old, + consumed_new) != consumed_old)); +} + +#ifdef CONFIG_LTT_VMCORE +static __inline__ +void ltt_vmcore_check_deliver(struct ltt_chanbuf *buf, long commit_count, + long idx) +{ + local_set(&buf->commit_seq[idx], commit_count); +} +#else +static __inline__ +void ltt_vmcore_check_deliver(struct ltt_chanbuf *buf, long commit_count, + long idx) +{ +} +#endif + +static __inline__ +void ltt_check_deliver(struct ltt_chanbuf *buf, struct ltt_chan *chan, + long offset, long commit_count, long idx) +{ + long old_commit_count = commit_count - chan->a.sb_size; + + /* Check if all commits have been done */ + if (unlikely((BUFFER_TRUNC(offset, chan) >> chan->a.n_sb_order) + - (old_commit_count & chan->commit_count_mask) == 0)) { + /* + * If we succeeded in updating the cc_sb, we are delivering + * the subbuffer. Deals with concurrent updates of the "cc" + * value without adding a add_return atomic operation to the + * fast path. + */ + if (likely(local_cmpxchg(&buf->commit_count[idx].cc_sb, + old_commit_count, commit_count) + == old_commit_count)) { + /* + * Set noref flag for this subbuffer. + */ + ltt_set_noref_flag(&buf->a, idx); + ltt_vmcore_check_deliver(buf, commit_count, idx); + } + } +} + + +static __inline__ +int ltt_poll_deliver(struct ltt_chanbuf *buf, struct ltt_chan *chan) +{ + long consumed_old, consumed_idx, commit_count, write_offset; + + consumed_old = atomic_long_read(&buf->consumed); + consumed_idx = SUBBUF_INDEX(consumed_old, chan); + commit_count = local_read(&buf->commit_count[consumed_idx].cc_sb); + /* + * No memory barrier here, since we are only interested + * in a statistically correct polling result. The next poll will + * get the data is we are racing. The mb() that ensures correct + * memory order is in get_subbuf. + */ + write_offset = local_read(&buf->offset); + + /* + * Check that the subbuffer we are trying to consume has been + * already fully committed. + */ + + if (((commit_count - chan->a.sb_size) + & chan->commit_count_mask) + - (BUFFER_TRUNC(consumed_old, chan) + >> chan->a.n_sb_order) + != 0) + return 0; + + /* + * Check that we are not about to read the same subbuffer in + * which the writer head is. + */ + if ((SUBBUF_TRUNC(write_offset, chan) + - SUBBUF_TRUNC(consumed_old, chan)) + == 0) + return 0; + + return 1; + +} + +static __inline__ +u32 get_read_sb_size(struct ltt_chanbuf *buf) +{ + struct ltt_subbuffer_header *header = + (struct ltt_subbuffer_header *) + ltt_relay_read_offset_address(&buf->a, 0); + return header->sb_size; +} + +/* + * returns 0 if reserve ok, or 1 if the slow path must be taken. + */ +static __inline__ +int ltt_relay_try_reserve(struct ltt_chanbuf *buf, struct ltt_chan *chan, + size_t data_size, u64 *tsc, unsigned int *rflags, + int largest_align, long *o_begin, long *o_end, + long *o_old, size_t *before_hdr_pad, size_t *size) +{ + *o_begin = local_read(&buf->offset); + *o_old = *o_begin; + + *tsc = trace_clock_read64(); + +#ifdef CONFIG_LTT_VMCORE + prefetch(&buf->commit_count[SUBBUF_INDEX(*o_begin, chan)]); + prefetch(&buf->commit_seq[SUBBUF_INDEX(*o_begin, chan)]); +#else + prefetchw(&buf->commit_count[SUBBUF_INDEX(*o_begin, chan)]); +#endif + if (last_tsc_overflow(buf, *tsc)) + *rflags = LTT_RFLAG_ID_SIZE_TSC; + + if (unlikely(SUBBUF_OFFSET(*o_begin, chan) == 0)) + return 1; + + *size = ltt_get_header_size(chan, *o_begin, data_size, before_hdr_pad, + *rflags); + *size += ltt_align(*o_begin + *size, largest_align) + data_size; + if (unlikely((SUBBUF_OFFSET(*o_begin, chan) + *size) > chan->a.sb_size)) + return 1; + + /* + * Event fits in the current buffer and we are not on a switch + * boundary. It's safe to write. + */ + *o_end = *o_begin + *size; + + if (unlikely((SUBBUF_OFFSET(*o_end, chan)) == 0)) + /* + * The offset_end will fall at the very beginning of the next + * subbuffer. + */ + return 1; + + return 0; +} + +static __inline__ +int ltt_reserve_slot(struct ltt_chan *chan, + struct ltt_trace *trace, size_t data_size, + int largest_align, int cpu, + struct ltt_chanbuf **ret_buf, + size_t *slot_size, long *buf_offset, u64 *tsc, + unsigned int *rflags) +{ + struct ltt_chanbuf *buf = *ret_buf = per_cpu_ptr(chan->a.buf, cpu); + long o_begin, o_end, o_old; + size_t before_hdr_pad; + + /* + * Perform retryable operations. + */ + if (unlikely(__get_cpu_var(ltt_nesting) > 4)) { + local_inc(&buf->events_lost); + return -EPERM; + } + + if (unlikely(ltt_relay_try_reserve(buf, chan, data_size, tsc, rflags, + largest_align, &o_begin, &o_end, + &o_old, &before_hdr_pad, slot_size))) + goto slow_path; + + if (unlikely(local_cmpxchg(&buf->offset, o_old, o_end) != o_old)) + goto slow_path; + + /* + * Atomically update last_tsc. This update races against concurrent + * atomic updates, but the race will always cause supplementary full TSC + * events, never the opposite (missing a full TSC event when it would be + * needed). + */ + save_last_tsc(buf, *tsc); + + /* + * Push the reader if necessary + */ + ltt_reserve_push_reader(buf, chan, o_end - 1); + + /* + * Clear noref flag for this subbuffer. + */ + ltt_clear_noref_flag(&buf->a, SUBBUF_INDEX(o_end - 1, chan)); + + *buf_offset = o_begin + before_hdr_pad; + return 0; +slow_path: + return ltt_reserve_slot_lockless_slow(chan, trace, data_size, + largest_align, cpu, ret_buf, + slot_size, buf_offset, tsc, + rflags); +} + +/* + * Force a sub-buffer switch for a per-cpu buffer. This operation is + * completely reentrant : can be called while tracing is active with + * absolutely no lock held. + * + * Note, however, that as a local_cmpxchg is used for some atomic + * operations, this function must be called from the CPU which owns the buffer + * for a ACTIVE flush. + */ +static __inline__ +void ltt_force_switch(struct ltt_chanbuf *buf, enum force_switch_mode mode) +{ + return ltt_force_switch_lockless_slow(buf, mode); +} + +/* + * for flight recording. must be called after relay_commit. + * This function increments the subbuffer's commit_seq counter each time the + * commit count reaches back the reserve offset (module subbuffer size). It is + * useful for crash dump. + */ +#ifdef CONFIG_LTT_VMCORE +static __inline__ +void ltt_write_commit_counter(struct ltt_chanbuf *buf, struct ltt_chan *chan, + long idx, long buf_offset, long commit_count, + size_t data_size) +{ + long offset; + long commit_seq_old; + + offset = buf_offset + data_size; + + /* + * SUBBUF_OFFSET includes commit_count_mask. We can simply + * compare the offsets within the subbuffer without caring about + * buffer full/empty mismatch because offset is never zero here + * (subbuffer header and event headers have non-zero length). + */ + if (unlikely(SUBBUF_OFFSET(offset - commit_count, chan))) + return; + + commit_seq_old = local_read(&buf->commit_seq[idx]); + while (commit_seq_old < commit_count) + commit_seq_old = local_cmpxchg(&buf->commit_seq[idx], + commit_seq_old, commit_count); +} +#else +static __inline__ +void ltt_write_commit_counter(struct ltt_chanbuf *buf, struct ltt_chan *chan, + long idx, long buf_offset, long commit_count, + size_t data_size) +{ +} +#endif + +/* + * Atomic unordered slot commit. Increments the commit count in the + * specified sub-buffer, and delivers it if necessary. + * + * Parameters: + * + * @buf: buffer. + * @chan: channel. + * @buf_offset : offset following the event header. + * @data_size : size of the event data. + * @slot_size : size of the reserved slot. + */ +static __inline__ +void ltt_commit_slot(struct ltt_chanbuf *buf, struct ltt_chan *chan, + long buf_offset, size_t data_size, size_t slot_size) +{ + long offset_end = buf_offset; + long endidx = SUBBUF_INDEX(offset_end - 1, chan); + long commit_count; + +#ifdef LTT_NO_IPI_BARRIER + smp_wmb(); +#else + /* + * Must write slot data before incrementing commit count. + * This compiler barrier is upgraded into a smp_mb() by the IPI + * sent by get_subbuf(). + */ + barrier(); +#endif + local_add(slot_size, &buf->commit_count[endidx].cc); + local_inc(&buf->commit_count[endidx].events); + /* + * commit count read can race with concurrent OOO commit count updates. + * This is only needed for ltt_check_deliver (for non-polling delivery + * only) and for ltt_write_commit_counter. The race can only cause the + * counter to be read with the same value more than once, which could + * cause : + * - Multiple delivery for the same sub-buffer (which is handled + * gracefully by the reader code) if the value is for a full + * sub-buffer. It's important that we can never miss a sub-buffer + * delivery. Re-reading the value after the local_add ensures this. + * - Reading a commit_count with a higher value that what was actually + * added to it for the ltt_write_commit_counter call (again caused by + * a concurrent committer). It does not matter, because this function + * is interested in the fact that the commit count reaches back the + * reserve offset for a specific sub-buffer, which is completely + * independent of the order. + */ + commit_count = local_read(&buf->commit_count[endidx].cc); + + ltt_check_deliver(buf, chan, offset_end - 1, commit_count, endidx); + /* + * Update data_size for each commit. It's needed only for extracting + * ltt buffers from vmcore, after crash. + */ + ltt_write_commit_counter(buf, chan, endidx, buf_offset, + commit_count, data_size); +} + +#endif //_LTT_LTT_RELAY_LOCKLESS_H diff --git a/ltt-relay-splice.c b/ltt-relay-splice.c new file mode 100644 index 00000000..e4694c17 --- /dev/null +++ b/ltt-relay-splice.c @@ -0,0 +1,159 @@ +/* + * Copyright (C) 2002-2005 - Tom Zanussi (zanussi@us.ibm.com), IBM Corp + * Copyright (C) 1999-2005 - Karim Yaghmour (karim@opersys.com) + * Copyright (C) 2008-2009 - Mathieu Desnoyers (mathieu.desnoyers@polymtl.ca) + * + * Re-using content from kernel/relay.c + * + * This file is released under the GPL. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "ltt-relay.h" +#include "ltt-relay-lockless.h" + +loff_t ltt_relay_no_llseek(struct file *file, loff_t offset, int origin) +{ + return -ESPIPE; +} + +static void ltt_relay_pipe_buf_release(struct pipe_inode_info *pipe, + struct pipe_buffer *pbuf) +{ +} + +static struct pipe_buf_operations ltt_relay_pipe_buf_ops = { + .can_merge = 0, + .map = generic_pipe_buf_map, + .unmap = generic_pipe_buf_unmap, + .confirm = generic_pipe_buf_confirm, + .release = ltt_relay_pipe_buf_release, + .steal = generic_pipe_buf_steal, + .get = generic_pipe_buf_get, +}; + +static void ltt_relay_page_release(struct splice_pipe_desc *spd, unsigned int i) +{ +} + +/* + * subbuf_splice_actor - splice up to one subbuf's worth of data + */ +static int subbuf_splice_actor(struct file *in, + loff_t *ppos, + struct pipe_inode_info *pipe, + size_t len, + unsigned int flags) +{ + struct ltt_chanbuf *buf = in->private_data; + struct ltt_chan *chan = container_of(buf->a.chan, struct ltt_chan, a); + unsigned int poff, subbuf_pages, nr_pages; + struct page *pages[PIPE_DEF_BUFFERS]; + struct partial_page partial[PIPE_DEF_BUFFERS]; + struct splice_pipe_desc spd = { + .pages = pages, + .nr_pages = 0, + .partial = partial, + .flags = flags, + .ops = <t_relay_pipe_buf_ops, + .spd_release = ltt_relay_page_release, + }; + long consumed_old, consumed_idx, roffset; + unsigned long bytes_avail; + + /* + * Check that a GET_SUBBUF ioctl has been done before. + */ + WARN_ON(atomic_long_read(&buf->active_readers) != 1); + consumed_old = atomic_long_read(&buf->consumed); + consumed_old += *ppos; + consumed_idx = SUBBUF_INDEX(consumed_old, chan); + + /* + * Adjust read len, if longer than what is available. + * Max read size is 1 subbuffer due to get_subbuf/put_subbuf for + * protection. + */ + bytes_avail = chan->a.sb_size; + WARN_ON(bytes_avail > chan->a.buf_size); + len = min_t(size_t, len, bytes_avail); + subbuf_pages = bytes_avail >> PAGE_SHIFT; + nr_pages = min_t(unsigned int, subbuf_pages, PIPE_DEF_BUFFERS); + roffset = consumed_old & PAGE_MASK; + poff = consumed_old & ~PAGE_MASK; + printk_dbg(KERN_DEBUG "SPLICE actor len %zu pos %zd write_pos %ld\n", + len, (ssize_t)*ppos, local_read(&buf->offset)); + + for (; spd.nr_pages < nr_pages; spd.nr_pages++) { + unsigned int this_len; + struct page *page; + + if (!len) + break; + printk_dbg(KERN_DEBUG "SPLICE actor loop len %zu roffset %ld\n", + len, roffset); + + this_len = PAGE_SIZE - poff; + page = ltt_relay_read_get_page(&buf->a, roffset); + spd.pages[spd.nr_pages] = page; + spd.partial[spd.nr_pages].offset = poff; + spd.partial[spd.nr_pages].len = this_len; + + poff = 0; + roffset += PAGE_SIZE; + len -= this_len; + } + + if (!spd.nr_pages) + return 0; + + return splice_to_pipe(pipe, &spd); +} + +ssize_t ltt_relay_file_splice_read(struct file *in, loff_t *ppos, + struct pipe_inode_info *pipe, size_t len, + unsigned int flags) +{ + ssize_t spliced; + int ret; + + ret = 0; + spliced = 0; + + printk_dbg(KERN_DEBUG "SPLICE read len %zu pos %zd\n", len, + (ssize_t)*ppos); + while (len && !spliced) { + ret = subbuf_splice_actor(in, ppos, pipe, len, flags); + printk_dbg(KERN_DEBUG "SPLICE read loop ret %d\n", ret); + if (ret < 0) + break; + else if (!ret) { + if (flags & SPLICE_F_NONBLOCK) + ret = -EAGAIN; + break; + } + + *ppos += ret; + if (ret > len) + len = 0; + else + len -= ret; + spliced += ret; + } + + if (spliced) + return spliced; + + return ret; +} diff --git a/ltt-relay-vfs.c b/ltt-relay-vfs.c new file mode 100644 index 00000000..defbe2df --- /dev/null +++ b/ltt-relay-vfs.c @@ -0,0 +1,244 @@ +/* + * ltt/ltt-relay-vfs.c + * + * (C) Copyright 2009 - Mathieu Desnoyers (mathieu.desnoyers@polymtl.ca) + * + * LTTng VFS interface. + * + * Author: + * Mathieu Desnoyers (mathieu.desnoyers@polymtl.ca) + * + * Dual LGPL v2.1/GPL v2 license. + */ + +#include +#include +#include +#include +#include + +#include "ltt-tracer.h" +#include "ltt-relay.h" +#include "ltt-relay-lockless.h" + +/** + * ltt_open - open file op for ltt files + * @inode: opened inode + * @file: opened file + * + * Open implementation. Makes sure only one open instance of a buffer is + * done at a given moment. + */ +static int ltt_open(struct inode *inode, struct file *file) +{ + struct ltt_chanbuf *buf = inode->i_private; + int ret; + + ret = ltt_chanbuf_open_read(buf); + if (ret) + goto end; + + file->private_data = buf; + ret = nonseekable_open(inode, file); + /* + * Let LTTng splice operation must believe that the file descriptor is + * seekable. This is a temporary fix to follow new checks added to + * splice.c. We should probably do the proper thing and implement a + * llseek function eventually, which involves modifying the lttng splice + * actors accordingly. TODO + */ + file->f_mode |= FMODE_PREAD; +end: + return ret; +} + +/** + * ltt_release - release file op for ltt files + * @inode: opened inode + * @file: opened file + * + * Release implementation. + */ +static int ltt_release(struct inode *inode, struct file *file) +{ + struct ltt_chanbuf *buf = inode->i_private; + + ltt_chanbuf_release_read(buf); + + return 0; +} + +/** + * ltt_poll - file op for ltt files + * @filp: the file + * @wait: poll table + * + * Poll implementation. + */ +static unsigned int ltt_poll(struct file *filp, poll_table *wait) +{ + unsigned int mask = 0; + struct inode *inode = filp->f_dentry->d_inode; + struct ltt_chanbuf *buf = inode->i_private; + struct ltt_chan *chan = container_of(buf->a.chan, struct ltt_chan, a); + + if (filp->f_mode & FMODE_READ) { + poll_wait_set_exclusive(wait); + poll_wait(filp, &buf->read_wait, wait); + + WARN_ON(atomic_long_read(&buf->active_readers) != 1); + if (SUBBUF_TRUNC(ltt_chanbuf_get_offset(buf), chan) + - SUBBUF_TRUNC(ltt_chanbuf_get_consumed(buf), chan) + == 0) { + if (buf->finalized) + return POLLHUP; + else + return 0; + } else { + if (SUBBUF_TRUNC(ltt_chanbuf_get_offset(buf), chan) + - SUBBUF_TRUNC(ltt_chanbuf_get_consumed(buf), chan) + >= chan->a.buf_size) + return POLLPRI | POLLRDBAND; + else + return POLLIN | POLLRDNORM; + } + } + return mask; +} + +/** + * ltt_ioctl - control on the debugfs file + * + * @inode: the inode + * @filp: the file + * @cmd: the command + * @arg: command arg + * + * This ioctl implements three commands necessary for a minimal + * producer/consumer implementation : + * RELAY_GET_SB + * Get the next sub-buffer that can be read. It never blocks. + * RELAY_PUT_SB + * Release the currently read sub-buffer. Parameter is the last + * put subbuffer (returned by GET_SUBBUF). + * RELAY_GET_N_SB + * returns the number of sub-buffers in the per cpu channel. + * RELAY_GET_SB_SIZE + * returns the size of the current sub-buffer. + * RELAY_GET_MAX_SB_SIZE + * returns the maximum size for sub-buffers. + */ +static +int ltt_ioctl(struct inode *inode, struct file *filp, unsigned int cmd, + unsigned long arg) +{ + struct ltt_chanbuf *buf = inode->i_private; + u32 __user *argp = (u32 __user *)arg; + + switch (cmd) { + case RELAY_GET_SB: + { + unsigned long consumed; + int ret; + + ret = ltt_chanbuf_get_subbuf(buf, &consumed); + if (ret) + return ret; + else + return put_user((u32)consumed, argp); + break; + } + case RELAY_PUT_SB: + { + u32 uconsumed_old; + int ret; + long consumed_old; + + ret = get_user(uconsumed_old, argp); + if (ret) + return ret; /* will return -EFAULT */ + + consumed_old = ltt_chanbuf_get_consumed(buf); + consumed_old = consumed_old & (~0xFFFFFFFFL); + consumed_old = consumed_old | uconsumed_old; + ret = ltt_chanbuf_put_subbuf(buf, consumed_old); + if (ret) + return ret; + break; + } + case RELAY_GET_N_SB: + return put_user((u32)buf->a.chan->n_sb, argp); + break; + case RELAY_GET_SB_SIZE: + return put_user(get_read_sb_size(buf), argp); + break; + case RELAY_GET_MAX_SB_SIZE: + return put_user((u32)buf->a.chan->sb_size, argp); + break; + default: + return -ENOIOCTLCMD; + } + return 0; +} + +#ifdef CONFIG_COMPAT +static +long ltt_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg) +{ + long ret = -ENOIOCTLCMD; + + lock_kernel(); + ret = ltt_ioctl(file->f_dentry->d_inode, file, cmd, arg); + unlock_kernel(); + + return ret; +} +#endif + +static const struct file_operations ltt_file_operations = { + .open = ltt_open, + .release = ltt_release, + .poll = ltt_poll, + .splice_read = ltt_relay_file_splice_read, + .ioctl = ltt_ioctl, + .llseek = ltt_relay_no_llseek, +#ifdef CONFIG_COMPAT + .compat_ioctl = ltt_compat_ioctl, +#endif +}; + +int ltt_chanbuf_create_file(const char *filename, struct dentry *parent, + int mode, struct ltt_chanbuf *buf) +{ + struct ltt_chan *chan = container_of(buf->a.chan, struct ltt_chan, a); + char *tmpname; + int ret = 0; + + tmpname = kzalloc(NAME_MAX + 1, GFP_KERNEL); + if (!tmpname) { + ret = -ENOMEM; + goto end; + } + + snprintf(tmpname, NAME_MAX, "%s%s_%d", + chan->overwrite ? LTT_FLIGHT_PREFIX : "", + chan->a.filename, buf->a.cpu); + + buf->a.dentry = debugfs_create_file(tmpname, mode, parent, buf, + <t_file_operations); + if (!buf->a.dentry) { + ret = -ENOMEM; + goto free_name; + } +free_name: + kfree(tmpname); +end: + return ret; +} + +int ltt_chanbuf_remove_file(struct ltt_chanbuf *buf) +{ + debugfs_remove(buf->a.dentry); + + return 0; +} diff --git a/ltt-relay.h b/ltt-relay.h new file mode 100644 index 00000000..c79403bf --- /dev/null +++ b/ltt-relay.h @@ -0,0 +1,377 @@ + /* + * include/linux/ltt-relay.h + * + * Copyright (C) 2008,2009 - Mathieu Desnoyers + * + * Dual LGPL v2.1/GPL v2 license. + * + * Credits to Steven Rostedt for proposing to use an extra-subbuffer owned by + * the reader in flight recorder mode. + */ + +#ifndef _LINUX_LTT_RELAY_H +#define _LINUX_LTT_RELAY_H + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "ltt-tracer-core.h" + +/* Use lowest pointer bit to show the sub-buffer has no reference. */ +#define RCHAN_NOREF_FLAG 0x1UL + +#define RCHAN_SB_IS_NOREF(x) ((unsigned long)(x) & RCHAN_NOREF_FLAG) +#define RCHAN_SB_SET_NOREF(x) \ + (x = (struct chanbuf_page *)((unsigned long)(x) | RCHAN_NOREF_FLAG)) +#define RCHAN_SB_CLEAR_NOREF(x) \ + (x = (struct chanbuf_page *)((unsigned long)(x) & ~RCHAN_NOREF_FLAG)) + +struct ltt_trace; + +struct chanbuf_page { + void *virt; /* page virtual address (cached) */ + struct page *page; /* pointer to page structure */ +}; + +struct chanbuf_sb { + struct chanbuf_page *pages; /* Pointer to rchan pages for subbuf */ +}; + +struct ltt_chanbuf_alloc { + struct chanbuf_sb *buf_wsb; /* Array of rchan_sb for writer */ + struct chanbuf_sb buf_rsb; /* chanbuf_sb for reader */ + void **_virt; /* Array of pointers to page addr */ + struct page **_pages; /* Array of pointers to pages */ + struct dentry *dentry; /* Associated file dentry */ + unsigned int nr_pages; /* Number pages in buffer */ + + struct ltt_chan_alloc *chan; /* Associated channel */ + unsigned int cpu; /* This buffer's cpu */ + unsigned int allocated:1; /* Bool: is buffer allocated ? */ +}; + +int ltt_chanbuf_alloc_create(struct ltt_chanbuf_alloc *buf, + struct ltt_chan_alloc *chan, int cpu); +void ltt_chanbuf_alloc_free(struct ltt_chanbuf_alloc *buf); +int ltt_chan_alloc_init(struct ltt_chan_alloc *chan, struct ltt_trace *trace, + const char *base_filename, + struct dentry *parent, size_t sb_size, + size_t n_sb, int extra_reader_sb, int overwrite); +void ltt_chan_alloc_free(struct ltt_chan_alloc *chan); +void ltt_chan_alloc_remove_files(struct ltt_chan_alloc *chan); +int ltt_chanbuf_create_file(const char *filename, struct dentry *parent, + int mode, struct ltt_chanbuf *buf); +int ltt_chanbuf_remove_file(struct ltt_chanbuf *buf); + +void ltt_chan_for_each_channel(void (*cb) (struct ltt_chanbuf *buf), int cpu); + +extern void _ltt_relay_write(struct ltt_chanbuf_alloc *bufa, + size_t offset, const void *src, size_t len, + ssize_t pagecpy); + +extern void _ltt_relay_strncpy(struct ltt_chanbuf_alloc *bufa, + size_t offset, const void *src, size_t len, + ssize_t pagecpy); + +extern void _ltt_relay_strncpy_fixup(struct ltt_chanbuf_alloc *bufa, + size_t offset, size_t len, size_t copied, + int terminated); + +extern int ltt_relay_read(struct ltt_chanbuf_alloc *bufa, + size_t offset, void *dest, size_t len); + +extern int ltt_relay_read_cstr(struct ltt_chanbuf_alloc *bufa, + size_t offset, void *dest, size_t len); + +extern struct page *ltt_relay_read_get_page(struct ltt_chanbuf_alloc *bufa, + size_t offset); + +/* + * Return the address where a given offset is located. + * Should be used to get the current subbuffer header pointer. Given we know + * it's never on a page boundary, it's safe to write directly to this address, + * as long as the write is never bigger than a page size. + */ +extern void *ltt_relay_offset_address(struct ltt_chanbuf_alloc *bufa, + size_t offset); +extern void *ltt_relay_read_offset_address(struct ltt_chanbuf_alloc *bufa, + size_t offset); + +#ifdef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS +static __inline__ +void ltt_relay_do_copy(void *dest, const void *src, size_t len) +{ + switch (len) { + case 0: + break; + case 1: + *(u8 *)dest = *(const u8 *)src; + break; + case 2: + *(u16 *)dest = *(const u16 *)src; + break; + case 4: + *(u32 *)dest = *(const u32 *)src; + break; + case 8: + *(u64 *)dest = *(const u64 *)src; + break; + default: + /* + * What we really want here is an __inline__ memcpy, but we don't + * have constants, so gcc generally uses a function call. + */ + for (; len > 0; len--) + *(u8 *)dest++ = *(const u8 *)src++; + } +} +#else +/* + * Returns whether the dest and src addresses are aligned on + * min(sizeof(void *), len). Call this with statically known len for efficiency. + */ +static __inline__ +int addr_aligned(const void *dest, const void *src, size_t len) +{ + if (ltt_align((size_t)dest, len)) + return 0; + if (ltt_align((size_t)src, len)) + return 0; + return 1; +} + +static __inline__ +void ltt_relay_do_copy(void *dest, const void *src, size_t len) +{ + switch (len) { + case 0: + break; + case 1: + *(u8 *)dest = *(const u8 *)src; + break; + case 2: + if (unlikely(!addr_aligned(dest, src, 2))) + goto memcpy_fallback; + *(u16 *)dest = *(const u16 *)src; + break; + case 4: + if (unlikely(!addr_aligned(dest, src, 4))) + goto memcpy_fallback; + *(u32 *)dest = *(const u32 *)src; + break; + case 8: + if (unlikely(!addr_aligned(dest, src, 8))) + goto memcpy_fallback; + *(u64 *)dest = *(const u64 *)src; + break; + default: + goto memcpy_fallback; + } + return; + +memcpy_fallback: + /* + * What we really want here is an inline memcpy, but we don't + * have constants, so gcc generally uses a function call. + */ + for (; len > 0; len--) + *(u8 *)dest++ = *(const u8 *)src++; +} +#endif + +/* + * ltt_relay_do_memset - write character into dest. + * @dest: destination + * @src: source character + * @len: length to write + */ +static __inline__ +void ltt_relay_do_memset(void *dest, char src, size_t len) +{ + /* + * What we really want here is an __inline__ memset, but we + * don't have constants, so gcc generally uses a function call. + */ + for (; len > 0; len--) + *(u8 *)dest++ = src; +} + + +/* + * ltt_relay_do_strncpy - copy a string up to a certain number of bytes + * @dest: destination + * @src: source + * @len: max. length to copy + * @terminated: output string ends with \0 (output) + * + * returns the number of bytes copied. Does not finalize with \0 if len is + * reached. + */ +static __inline__ +size_t ltt_relay_do_strncpy(void *dest, const void *src, size_t len, + int *terminated) +{ + size_t orig_len = len; + + *terminated = 0; + /* + * What we really want here is an __inline__ strncpy, but we + * don't have constants, so gcc generally uses a function call. + */ + for (; len > 0; len--) { + *(u8 *)dest = ACCESS_ONCE(*(const u8 *)src); + /* Check with dest, because src may be modified concurrently */ + if (*(const u8 *)dest == '\0') { + len--; + *terminated = 1; + break; + } + dest++; + src++; + } + return orig_len - len; +} + +static __inline__ +int ltt_relay_write(struct ltt_chanbuf_alloc *bufa, + struct ltt_chan_alloc *chana, size_t offset, + const void *src, size_t len) +{ + size_t sbidx, index; + ssize_t pagecpy; + struct chanbuf_page *rpages; + + offset &= chana->buf_size - 1; + sbidx = offset >> chana->sb_size_order; + index = (offset & (chana->sb_size - 1)) >> PAGE_SHIFT; + pagecpy = min_t(size_t, len, (- offset) & ~PAGE_MASK); + rpages = bufa->buf_wsb[sbidx].pages; + WARN_ON_ONCE(RCHAN_SB_IS_NOREF(rpages)); + ltt_relay_do_copy(rpages[index].virt + (offset & ~PAGE_MASK), + src, pagecpy); + + if (unlikely(len != pagecpy)) + _ltt_relay_write(bufa, offset, src, len, pagecpy); + return len; +} + +static __inline__ +int ltt_relay_strncpy(struct ltt_chanbuf_alloc *bufa, + struct ltt_chan_alloc *chana, size_t offset, + const void *src, size_t len) +{ + size_t sbidx, index; + ssize_t pagecpy, copied; + struct chanbuf_page *rpages; + int terminated; + + offset &= chana->buf_size - 1; + sbidx = offset >> chana->sb_size_order; + index = (offset & (chana->sb_size - 1)) >> PAGE_SHIFT; + pagecpy = min_t(size_t, len, (- offset) & ~PAGE_MASK); + rpages = bufa->buf_wsb[sbidx].pages; + WARN_ON_ONCE(RCHAN_SB_IS_NOREF(rpages)); + copied = ltt_relay_do_strncpy(rpages[index].virt + + (offset & ~PAGE_MASK), + src, pagecpy, &terminated); + if (unlikely(copied < pagecpy || ((len == pagecpy) && !terminated))) + _ltt_relay_strncpy_fixup(bufa, offset, len, copied, + terminated); + else { + if (unlikely(len != pagecpy)) + _ltt_relay_strncpy(bufa, offset, src, len, pagecpy); + } + return len; +} + +/** + * ltt_clear_noref_flag - Clear the noref subbuffer flag, for writer. + */ +static __inline__ +void ltt_clear_noref_flag(struct ltt_chanbuf_alloc *bufa, long idx) +{ + struct chanbuf_page *sb_pages, *new_sb_pages; + + sb_pages = bufa->buf_wsb[idx].pages; + for (;;) { + if (!RCHAN_SB_IS_NOREF(sb_pages)) + return; /* Already writing to this buffer */ + new_sb_pages = sb_pages; + RCHAN_SB_CLEAR_NOREF(new_sb_pages); + new_sb_pages = cmpxchg(&bufa->buf_wsb[idx].pages, + sb_pages, new_sb_pages); + if (likely(new_sb_pages == sb_pages)) + break; + sb_pages = new_sb_pages; + } +} + +/** + * ltt_set_noref_flag - Set the noref subbuffer flag, for writer. + */ +static __inline__ +void ltt_set_noref_flag(struct ltt_chanbuf_alloc *bufa, long idx) +{ + struct chanbuf_page *sb_pages, *new_sb_pages; + + sb_pages = bufa->buf_wsb[idx].pages; + for (;;) { + if (RCHAN_SB_IS_NOREF(sb_pages)) + return; /* Already set */ + new_sb_pages = sb_pages; + RCHAN_SB_SET_NOREF(new_sb_pages); + new_sb_pages = cmpxchg(&bufa->buf_wsb[idx].pages, + sb_pages, new_sb_pages); + if (likely(new_sb_pages == sb_pages)) + break; + sb_pages = new_sb_pages; + } +} + +/** + * update_read_sb_index - Read-side subbuffer index update. + */ +static __inline__ +int update_read_sb_index(struct ltt_chanbuf_alloc *bufa, + struct ltt_chan_alloc *chana, + long consumed_idx) +{ + struct chanbuf_page *old_wpage, *new_wpage; + + if (unlikely(chana->extra_reader_sb)) { + /* + * Exchange the target writer subbuffer with our own unused + * subbuffer. + */ + old_wpage = bufa->buf_wsb[consumed_idx].pages; + if (unlikely(!RCHAN_SB_IS_NOREF(old_wpage))) + return -EAGAIN; + WARN_ON_ONCE(!RCHAN_SB_IS_NOREF(bufa->buf_rsb.pages)); + new_wpage = cmpxchg(&bufa->buf_wsb[consumed_idx].pages, + old_wpage, + bufa->buf_rsb.pages); + if (unlikely(old_wpage != new_wpage)) + return -EAGAIN; + bufa->buf_rsb.pages = new_wpage; + RCHAN_SB_CLEAR_NOREF(bufa->buf_rsb.pages); + } else { + /* No page exchange, use the writer page directly */ + bufa->buf_rsb.pages = bufa->buf_wsb[consumed_idx].pages; + RCHAN_SB_CLEAR_NOREF(bufa->buf_rsb.pages); + } + return 0; +} + +ssize_t ltt_relay_file_splice_read(struct file *in, loff_t *ppos, + struct pipe_inode_info *pipe, size_t len, + unsigned int flags); +loff_t ltt_relay_no_llseek(struct file *file, loff_t offset, int origin); + +#endif /* _LINUX_LTT_RELAY_H */ diff --git a/ltt-serialize.c b/ltt-serialize.c new file mode 100644 index 00000000..305b3eb8 --- /dev/null +++ b/ltt-serialize.c @@ -0,0 +1,969 @@ +/* + * LTTng serializing code. + * + * Copyright Mathieu Desnoyers, March 2007. + * + * Dual LGPL v2.1/GPL v2 license. + * + * See this discussion about weirdness about passing va_list and then va_list to + * functions. (related to array argument passing). va_list seems to be + * implemented as an array on x86_64, but not on i386... This is why we pass a + * va_list * to ltt_vtrace. + */ + +#include +#include +#include +#include + +#include "ltt-tracer.h" +#include "ltt-relay-lockless.h" + +enum ltt_type { + LTT_TYPE_SIGNED_INT, + LTT_TYPE_UNSIGNED_INT, + LTT_TYPE_STRING, + LTT_TYPE_NONE, +}; + +#define LTT_ATTRIBUTE_NETWORK_BYTE_ORDER (1<<1) + +/* + * Stack used to keep track of string length at size calculation, passed to + * string copy to handle racy input string updates. + * Can be used by any context; this is ensured by putting the stack position + * back to its original position after using it. + */ +#define TRACER_STACK_LEN (PAGE_SIZE / sizeof(unsigned long)) +static DEFINE_PER_CPU(unsigned long [TRACER_STACK_LEN], + tracer_stack); +static DEFINE_PER_CPU(unsigned int, tracer_stack_pos); + +/* + * Inspired from vsnprintf + * + * The serialization format string supports the basic printf format strings. + * In addition, it defines new formats that can be used to serialize more + * complex/non portable data structures. + * + * Typical use: + * + * field_name %ctype + * field_name #tracetype %ctype + * field_name #tracetype %ctype1 %ctype2 ... + * + * A conversion is performed between format string types supported by GCC and + * the trace type requested. GCC type is used to perform type checking on format + * strings. Trace type is used to specify the exact binary representation + * in the trace. A mapping is done between one or more GCC types to one trace + * type. Sign extension, if required by the conversion, is performed following + * the trace type. + * + * If a gcc format is not declared with a trace format, the gcc format is + * also used as binary representation in the trace. + * + * Strings are supported with %s. + * A single tracetype (sequence) can take multiple c types as parameter. + * + * c types: + * + * see printf(3). + * + * Note: to write a uint32_t in a trace, the following expression is recommended + * si it can be portable: + * + * ("#4u%lu", (unsigned long)var) + * + * trace types: + * + * Serialization specific formats : + * + * Fixed size integers + * #1u writes uint8_t + * #2u writes uint16_t + * #4u writes uint32_t + * #8u writes uint64_t + * #1d writes int8_t + * #2d writes int16_t + * #4d writes int32_t + * #8d writes int64_t + * i.e.: + * #1u%lu #2u%lu #4d%lu #8d%lu #llu%hu #d%lu + * + * * Attributes: + * + * n: (for network byte order) + * #ntracetype%ctype + * is written in the trace in network byte order. + * + * i.e.: #bn4u%lu, #n%lu, #b%u + * + * TODO (eventually) + * Variable length sequence + * #a #tracetype1 #tracetype2 %array_ptr %elem_size %num_elems + * In the trace: + * #a specifies that this is a sequence + * #tracetype1 is the type of elements in the sequence + * #tracetype2 is the type of the element count + * GCC input: + * array_ptr is a pointer to an array that contains members of size + * elem_size. + * num_elems is the number of elements in the array. + * i.e.: #a #lu #lu %p %lu %u + * + * Callback + * #k callback (taken from the probe data) + * The following % arguments are exepected by the callback + * + * i.e.: #a #lu #lu #k %p + * + * Note: No conversion is done from floats to integers, nor from integers to + * floats between c types and trace types. float conversion from double to float + * or from float to double is also not supported. + * + * REMOVE + * %*b expects sizeof(data), data + * where sizeof(data) is 1, 2, 4 or 8 + * + * Fixed length struct, union or array. + * FIXME: unable to extract those sizes statically. + * %*r expects sizeof(*ptr), ptr + * %*.*r expects sizeof(*ptr), __alignof__(*ptr), ptr + * struct and unions removed. + * Fixed length array: + * [%p]#a[len #tracetype] + * i.e.: [%p]#a[12 #lu] + * + * Variable length sequence + * %*.*:*v expects sizeof(*ptr), __alignof__(*ptr), elem_num, ptr + * where elem_num is the number of elements in the sequence + */ +static inline +const char *parse_trace_type(const char *fmt, char *trace_size, + enum ltt_type *trace_type, + unsigned long *attributes) +{ + int qualifier; /* 'h', 'l', or 'L' for integer fields */ + /* 'z' support added 23/7/1999 S.H. */ + /* 'z' changed to 'Z' --davidm 1/25/99 */ + /* 't' added for ptrdiff_t */ + + /* parse attributes. */ +repeat: + switch (*fmt) { + case 'n': + *attributes |= LTT_ATTRIBUTE_NETWORK_BYTE_ORDER; + ++fmt; + goto repeat; + } + + /* get the conversion qualifier */ + qualifier = -1; + if (*fmt == 'h' || *fmt == 'l' || *fmt == 'L' || + *fmt == 'Z' || *fmt == 'z' || *fmt == 't' || + *fmt == 'S' || *fmt == '1' || *fmt == '2' || + *fmt == '4' || *fmt == 8) { + qualifier = *fmt; + ++fmt; + if (qualifier == 'l' && *fmt == 'l') { + qualifier = 'L'; + ++fmt; + } + } + + switch (*fmt) { + case 'c': + *trace_type = LTT_TYPE_UNSIGNED_INT; + *trace_size = sizeof(unsigned char); + goto parse_end; + case 's': + *trace_type = LTT_TYPE_STRING; + goto parse_end; + case 'p': + *trace_type = LTT_TYPE_UNSIGNED_INT; + *trace_size = sizeof(void *); + goto parse_end; + case 'd': + case 'i': + *trace_type = LTT_TYPE_SIGNED_INT; + break; + case 'o': + case 'u': + case 'x': + case 'X': + *trace_type = LTT_TYPE_UNSIGNED_INT; + break; + default: + if (!*fmt) + --fmt; + goto parse_end; + } + switch (qualifier) { + case 'L': + *trace_size = sizeof(long long); + break; + case 'l': + *trace_size = sizeof(long); + break; + case 'Z': + case 'z': + *trace_size = sizeof(size_t); + break; + case 't': + *trace_size = sizeof(ptrdiff_t); + break; + case 'h': + *trace_size = sizeof(short); + break; + case '1': + *trace_size = sizeof(uint8_t); + break; + case '2': + *trace_size = sizeof(uint16_t); + break; + case '4': + *trace_size = sizeof(uint32_t); + break; + case '8': + *trace_size = sizeof(uint64_t); + break; + default: + *trace_size = sizeof(int); + } + +parse_end: + return fmt; +} + +/* + * Restrictions: + * Field width and precision are *not* supported. + * %n not supported. + */ +static inline +const char *parse_c_type(const char *fmt, char *c_size, enum ltt_type *c_type, + char *outfmt) +{ + int qualifier; /* 'h', 'l', or 'L' for integer fields */ + /* 'z' support added 23/7/1999 S.H. */ + /* 'z' changed to 'Z' --davidm 1/25/99 */ + /* 't' added for ptrdiff_t */ + + /* process flags : ignore standard print formats for now. */ +repeat: + switch (*fmt) { + case '-': + case '+': + case ' ': + case '#': + case '0': + ++fmt; + goto repeat; + } + + /* get the conversion qualifier */ + qualifier = -1; + if (*fmt == 'h' || *fmt == 'l' || *fmt == 'L' || + *fmt == 'Z' || *fmt == 'z' || *fmt == 't' || + *fmt == 'S') { + qualifier = *fmt; + ++fmt; + if (qualifier == 'l' && *fmt == 'l') { + qualifier = 'L'; + ++fmt; + } + } + + if (outfmt) { + if (qualifier != -1) + *outfmt++ = (char)qualifier; + *outfmt++ = *fmt; + *outfmt = 0; + } + + switch (*fmt) { + case 'c': + *c_type = LTT_TYPE_UNSIGNED_INT; + *c_size = sizeof(unsigned char); + goto parse_end; + case 's': + *c_type = LTT_TYPE_STRING; + goto parse_end; + case 'p': + *c_type = LTT_TYPE_UNSIGNED_INT; + *c_size = sizeof(void *); + goto parse_end; + case 'd': + case 'i': + *c_type = LTT_TYPE_SIGNED_INT; + break; + case 'o': + case 'u': + case 'x': + case 'X': + *c_type = LTT_TYPE_UNSIGNED_INT; + break; + default: + if (!*fmt) + --fmt; + goto parse_end; + } + switch (qualifier) { + case 'L': + *c_size = sizeof(long long); + break; + case 'l': + *c_size = sizeof(long); + break; + case 'Z': + case 'z': + *c_size = sizeof(size_t); + break; + case 't': + *c_size = sizeof(ptrdiff_t); + break; + case 'h': + *c_size = sizeof(short); + break; + default: + *c_size = sizeof(int); + } + +parse_end: + return fmt; +} + +static inline +size_t serialize_trace_data(struct ltt_chanbuf *buf, size_t buf_offset, + char trace_size, enum ltt_type trace_type, + char c_size, enum ltt_type c_type, + unsigned int *stack_pos_ctx, + int *largest_align, + va_list *args) +{ + union { + unsigned long v_ulong; + uint64_t v_uint64; + struct { + const char *s; + size_t len; + } v_string; + } tmp; + + /* + * Be careful about sign extension here. + * Sign extension is done with the destination (trace) type. + */ + switch (trace_type) { + case LTT_TYPE_SIGNED_INT: + switch (c_size) { + case 1: + tmp.v_ulong = (long)(int8_t)va_arg(*args, int); + break; + case 2: + tmp.v_ulong = (long)(int16_t)va_arg(*args, int); + break; + case 4: + tmp.v_ulong = (long)(int32_t)va_arg(*args, int); + break; + case 8: + tmp.v_uint64 = va_arg(*args, int64_t); + break; + default: + BUG(); + } + break; + case LTT_TYPE_UNSIGNED_INT: + switch (c_size) { + case 1: + tmp.v_ulong = (unsigned long)(uint8_t)va_arg(*args, unsigned int); + break; + case 2: + tmp.v_ulong = (unsigned long)(uint16_t)va_arg(*args, unsigned int); + break; + case 4: + tmp.v_ulong = (unsigned long)(uint32_t)va_arg(*args, unsigned int); + break; + case 8: + tmp.v_uint64 = va_arg(*args, uint64_t); + break; + default: + BUG(); + } + break; + case LTT_TYPE_STRING: + tmp.v_string.s = va_arg(*args, const char *); + if ((unsigned long)tmp.v_string.s < PAGE_SIZE) + tmp.v_string.s = ""; + if (!buf) { + /* + * Reserve tracer stack entry. + */ + __get_cpu_var(tracer_stack_pos)++; + WARN_ON_ONCE(__get_cpu_var(tracer_stack_pos) + > TRACER_STACK_LEN); + barrier(); + __get_cpu_var(tracer_stack)[*stack_pos_ctx] = + strlen(tmp.v_string.s) + 1; + } + tmp.v_string.len = __get_cpu_var(tracer_stack) + [(*stack_pos_ctx)++]; + if (buf) + ltt_relay_strncpy(&buf->a, buf->a.chan, buf_offset, + tmp.v_string.s, tmp.v_string.len); + buf_offset += tmp.v_string.len; + goto copydone; + default: + BUG(); + } + + /* + * If trace_size is lower or equal to 4 bytes, there is no sign + * extension to do because we are already encoded in a long. Therefore, + * we can combine signed and unsigned ops. 4 bytes float also works + * with this, because we do a simple copy of 4 bytes into 4 bytes + * without manipulation (and we do not support conversion from integers + * to floats). + * It is also the case if c_size is 8 bytes, which is the largest + * possible integer. + */ + if (ltt_get_alignment()) { + buf_offset += ltt_align(buf_offset, trace_size); + if (largest_align) + *largest_align = max_t(int, *largest_align, trace_size); + } + if (trace_size <= 4 || c_size == 8) { + if (buf) { + switch (trace_size) { + case 1: + if (c_size == 8) + ltt_relay_write(&buf->a, buf->a.chan, + buf_offset, + (uint8_t[]){ (uint8_t)tmp.v_uint64 }, + sizeof(uint8_t)); + else + ltt_relay_write(&buf->a, buf->a.chan, + buf_offset, + (uint8_t[]){ (uint8_t)tmp.v_ulong }, + sizeof(uint8_t)); + break; + case 2: + if (c_size == 8) + ltt_relay_write(&buf->a, buf->a.chan, + buf_offset, + (uint16_t[]){ (uint16_t)tmp.v_uint64 }, + sizeof(uint16_t)); + else + ltt_relay_write(&buf->a, buf->a.chan, + buf_offset, + (uint16_t[]){ (uint16_t)tmp.v_ulong }, + sizeof(uint16_t)); + break; + case 4: + if (c_size == 8) + ltt_relay_write(&buf->a, buf->a.chan, + buf_offset, + (uint32_t[]){ (uint32_t)tmp.v_uint64 }, + sizeof(uint32_t)); + else + ltt_relay_write(&buf->a, buf->a.chan, + buf_offset, + (uint32_t[]){ (uint32_t)tmp.v_ulong }, + sizeof(uint32_t)); + break; + case 8: + /* + * c_size cannot be other than 8 here because + * trace_size > 4. + */ + ltt_relay_write(&buf->a, buf->a.chan, buf_offset, + (uint64_t[]){ (uint64_t)tmp.v_uint64 }, + sizeof(uint64_t)); + break; + default: + BUG(); + } + } + buf_offset += trace_size; + goto copydone; + } else { + /* + * Perform sign extension. + */ + if (buf) { + switch (trace_type) { + case LTT_TYPE_SIGNED_INT: + ltt_relay_write(&buf->a, buf->a.chan, buf_offset, + (int64_t[]){ (int64_t)tmp.v_ulong }, + sizeof(int64_t)); + break; + case LTT_TYPE_UNSIGNED_INT: + ltt_relay_write(&buf->a, buf->a.chan, buf_offset, + (uint64_t[]){ (uint64_t)tmp.v_ulong }, + sizeof(uint64_t)); + break; + default: + BUG(); + } + } + buf_offset += trace_size; + goto copydone; + } + +copydone: + return buf_offset; +} + +notrace size_t +ltt_serialize_data(struct ltt_chanbuf *buf, size_t buf_offset, + struct ltt_serialize_closure *closure, + void *serialize_private, unsigned int stack_pos_ctx, + int *largest_align, const char *fmt, va_list *args) +{ + char trace_size = 0, c_size = 0; /* + * 0 (unset), 1, 2, 4, 8 bytes. + */ + enum ltt_type trace_type = LTT_TYPE_NONE, c_type = LTT_TYPE_NONE; + unsigned long attributes = 0; + + for (; *fmt ; ++fmt) { + switch (*fmt) { + case '#': + /* tracetypes (#) */ + ++fmt; /* skip first '#' */ + if (*fmt == '#') /* Escaped ## */ + break; + attributes = 0; + fmt = parse_trace_type(fmt, &trace_size, &trace_type, + &attributes); + break; + case '%': + /* c types (%) */ + ++fmt; /* skip first '%' */ + if (*fmt == '%') /* Escaped %% */ + break; + fmt = parse_c_type(fmt, &c_size, &c_type, NULL); + /* + * Output c types if no trace types has been + * specified. + */ + if (!trace_size) + trace_size = c_size; + if (trace_type == LTT_TYPE_NONE) + trace_type = c_type; + if (c_type == LTT_TYPE_STRING) + trace_type = LTT_TYPE_STRING; + /* perform trace write */ + buf_offset = serialize_trace_data(buf, buf_offset, + trace_size, + trace_type, c_size, + c_type, + &stack_pos_ctx, + largest_align, + args); + trace_size = 0; + c_size = 0; + trace_type = LTT_TYPE_NONE; + c_size = LTT_TYPE_NONE; + attributes = 0; + break; + /* default is to skip the text, doing nothing */ + } + } + return buf_offset; +} +EXPORT_SYMBOL_GPL(ltt_serialize_data); + +static inline +uint64_t unserialize_base_type(struct ltt_chanbuf *buf, + size_t *ppos, char trace_size, + enum ltt_type trace_type) +{ + uint64_t tmp; + + *ppos += ltt_align(*ppos, trace_size); + ltt_relay_read(&buf->a, *ppos, &tmp, trace_size); + *ppos += trace_size; + + switch (trace_type) { + case LTT_TYPE_SIGNED_INT: + switch (trace_size) { + case 1: + return (uint64_t)*(int8_t *)&tmp; + case 2: + return (uint64_t)*(int16_t *)&tmp; + case 4: + return (uint64_t)*(int32_t *)&tmp; + case 8: + return tmp; + } + break; + case LTT_TYPE_UNSIGNED_INT: + switch (trace_size) { + case 1: + return (uint64_t)*(uint8_t *)&tmp; + case 2: + return (uint64_t)*(uint16_t *)&tmp; + case 4: + return (uint64_t)*(uint32_t *)&tmp; + case 8: + return tmp; + } + break; + default: + break; + } + + BUG(); + return 0; +} + +static +int serialize_printf_data(struct ltt_chanbuf *buf, size_t *ppos, + char trace_size, enum ltt_type trace_type, + char c_size, enum ltt_type c_type, char *output, + ssize_t outlen, const char *outfmt) +{ + u64 value; + outlen = outlen < 0 ? 0 : outlen; + + if (trace_type == LTT_TYPE_STRING) { + size_t len = ltt_relay_read_cstr(&buf->a, *ppos, output, + outlen); + *ppos += len + 1; + return len; + } + + value = unserialize_base_type(buf, ppos, trace_size, trace_type); + + if (c_size == 8) + return snprintf(output, outlen, outfmt, value); + else + return snprintf(output, outlen, outfmt, (unsigned int)value); +} + +/** + * ltt_serialize_printf - Format a string and place it in a buffer + * @buf: The ltt-relay buffer that store binary data + * @buf_offset: binary data's offset in @buf (should be masked to use as offset) + * @msg_size: return message's length + * @output: The buffer to place the result into + * @outlen: The size of the buffer, including the trailing '\0' + * @fmt: The format string to use + * + * The return value is the number of characters which would + * be generated for the given input, excluding the trailing + * '\0', as per ISO C99. If the return is greater than or equal to @outlen, + * the resulting string is truncated. + */ +size_t ltt_serialize_printf(struct ltt_chanbuf *buf, unsigned long buf_offset, + size_t *msg_size, char *output, size_t outlen, + const char *fmt) +{ + char trace_size = 0, c_size = 0; /* + * 0 (unset), 1, 2, 4, 8 bytes. + */ + enum ltt_type trace_type = LTT_TYPE_NONE, c_type = LTT_TYPE_NONE; + unsigned long attributes = 0; + char outfmt[4] = "%"; + size_t outpos = 0; + size_t len; + size_t msgpos = buf_offset; + + for (; *fmt ; ++fmt) { + switch (*fmt) { + case '#': + /* tracetypes (#) */ + ++fmt; /* skip first '#' */ + if (*fmt == '#') { /* Escaped ## */ + if (outpos < outlen) + output[outpos] = '#'; + outpos++; + break; + } + attributes = 0; + fmt = parse_trace_type(fmt, &trace_size, &trace_type, + &attributes); + break; + case '%': + /* c types (%) */ + ++fmt; /* skip first '%' */ + if (*fmt == '%') { /* Escaped %% */ + if (outpos < outlen) + output[outpos] = '%'; + outpos++; + break; + } + fmt = parse_c_type(fmt, &c_size, &c_type, outfmt + 1); + /* + * Output c types if no trace types has been + * specified. + */ + if (!trace_size) + trace_size = c_size; + if (trace_type == LTT_TYPE_NONE) + trace_type = c_type; + if (c_type == LTT_TYPE_STRING) + trace_type = LTT_TYPE_STRING; + + /* perform trace printf */ + len = serialize_printf_data(buf, &msgpos, trace_size, + trace_type, c_size, c_type, + output + outpos, + outlen - outpos, outfmt); + outpos += len; + trace_size = 0; + c_size = 0; + trace_type = LTT_TYPE_NONE; + c_size = LTT_TYPE_NONE; + attributes = 0; + break; + default: + if (outpos < outlen) + output[outpos] = *fmt; + outpos++; + break; + } + } + if (msg_size) + *msg_size = (size_t)(msgpos - buf_offset); + /* + * Make sure we end output with terminating \0 when truncated. + */ + if (outpos >= outlen + 1) + output[outlen] = '\0'; + return outpos; +} +EXPORT_SYMBOL_GPL(ltt_serialize_printf); + +#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS + +unsigned int ltt_fmt_largest_align(size_t align_drift, const char *fmt) +{ + char trace_size = 0, c_size = 0; + enum ltt_type trace_type = LTT_TYPE_NONE, c_type = LTT_TYPE_NONE; + unsigned long attributes = 0; + int largest_align = 1; + + for (; *fmt ; ++fmt) { + switch (*fmt) { + case '#': + /* tracetypes (#) */ + ++fmt; /* skip first '#' */ + if (*fmt == '#') /* Escaped ## */ + break; + attributes = 0; + fmt = parse_trace_type(fmt, &trace_size, &trace_type, + &attributes); + + largest_align = max_t(int, largest_align, trace_size); + if (largest_align >= ltt_get_alignment()) + goto exit; + break; + case '%': + /* c types (%) */ + ++fmt; /* skip first '%' */ + if (*fmt == '%') /* Escaped %% */ + break; + fmt = parse_c_type(fmt, &c_size, &c_type, NULL); + /* + * Output c types if no trace types has been + * specified. + */ + if (!trace_size) + trace_size = c_size; + if (trace_type == LTT_TYPE_NONE) + trace_type = c_type; + if (c_type == LTT_TYPE_STRING) + trace_type = LTT_TYPE_STRING; + + largest_align = max_t(int, largest_align, trace_size); + if (largest_align >= ltt_get_alignment()) + goto exit; + + trace_size = 0; + c_size = 0; + trace_type = LTT_TYPE_NONE; + c_size = LTT_TYPE_NONE; + break; + } + } + +exit: + largest_align = min_t(int, largest_align, ltt_get_alignment()); + return (largest_align - align_drift) & (largest_align - 1); +} +EXPORT_SYMBOL_GPL(ltt_fmt_largest_align); + +#endif + +/* + * Calculate data size + * Assume that the padding for alignment starts at a sizeof(void *) address. + */ +static notrace +size_t ltt_get_data_size(struct ltt_serialize_closure *closure, + void *serialize_private, unsigned int stack_pos_ctx, + int *largest_align, const char *fmt, va_list *args) +{ + ltt_serialize_cb cb = closure->callbacks[0]; + closure->cb_idx = 0; + return (size_t)cb(NULL, 0, closure, serialize_private, stack_pos_ctx, + largest_align, fmt, args); +} + +static notrace +void ltt_write_event_data(struct ltt_chanbuf *buf, size_t buf_offset, + struct ltt_serialize_closure *closure, + void *serialize_private, unsigned int stack_pos_ctx, + int largest_align, const char *fmt, va_list *args) +{ + ltt_serialize_cb cb = closure->callbacks[0]; + closure->cb_idx = 0; + buf_offset += ltt_align(buf_offset, largest_align); + cb(buf, buf_offset, closure, serialize_private, stack_pos_ctx, NULL, + fmt, args); +} + + +notrace +void ltt_vtrace(const struct marker *mdata, void *probe_data, void *call_data, + const char *fmt, va_list *args) +{ + int largest_align, ret; + struct ltt_active_marker *pdata; + uint16_t eID; + size_t data_size, slot_size; + unsigned int chan_index; + struct ltt_chanbuf *buf; + struct ltt_chan *chan; + struct ltt_trace *trace, *dest_trace = NULL; + uint64_t tsc; + long buf_offset; + va_list args_copy; + struct ltt_serialize_closure closure; + struct ltt_probe_private_data *private_data = call_data; + void *serialize_private = NULL; + int cpu; + unsigned int rflags; + unsigned int stack_pos_ctx; + + /* + * This test is useful for quickly exiting static tracing when no trace + * is active. We expect to have an active trace when we get here. + */ + if (unlikely(ltt_traces.num_active_traces == 0)) + return; + + rcu_read_lock_sched_notrace(); + cpu = smp_processor_id(); + __get_cpu_var(ltt_nesting)++; + stack_pos_ctx = __get_cpu_var(tracer_stack_pos); + /* + * asm volatile and "memory" clobber prevent the compiler from moving + * instructions out of the ltt nesting count. This is required to ensure + * that probe side-effects which can cause recursion (e.g. unforeseen + * traps, divisions by 0, ...) are triggered within the incremented + * nesting count section. + */ + barrier(); + pdata = (struct ltt_active_marker *)probe_data; + eID = mdata->event_id; + chan_index = mdata->channel_id; + closure.callbacks = pdata->probe->callbacks; + + if (unlikely(private_data)) { + dest_trace = private_data->trace; + if (private_data->serializer) + closure.callbacks = &private_data->serializer; + serialize_private = private_data->serialize_private; + } + + va_copy(args_copy, *args); + /* + * Assumes event payload to start on largest_align alignment. + */ + largest_align = 1; /* must be non-zero for ltt_align */ + data_size = ltt_get_data_size(&closure, serialize_private, + stack_pos_ctx, &largest_align, + fmt, &args_copy); + largest_align = min_t(int, largest_align, sizeof(void *)); + va_end(args_copy); + + /* Iterate on each trace */ + list_for_each_entry_rcu(trace, <t_traces.head, list) { + /* + * Expect the filter to filter out events. If we get here, + * we went through tracepoint activation as a first step. + */ + if (unlikely(dest_trace && trace != dest_trace)) + continue; + if (unlikely(!trace->active)) + continue; + if (unlikely(!ltt_run_filter(trace, eID))) + continue; +#ifdef CONFIG_LTT_DEBUG_EVENT_SIZE + rflags = LTT_RFLAG_ID_SIZE; +#else + if (unlikely(eID >= LTT_FREE_EVENTS)) + rflags = LTT_RFLAG_ID; + else + rflags = 0; +#endif + /* + * Skip channels added after trace creation. + */ + if (unlikely(chan_index >= trace->nr_channels)) + continue; + chan = &trace->channels[chan_index]; + if (!chan->active) + continue; + + /* reserve space : header and data */ + ret = ltt_reserve_slot(chan, trace, data_size, largest_align, + cpu, &buf, &slot_size, &buf_offset, + &tsc, &rflags); + if (unlikely(ret < 0)) + continue; /* buffer full */ + + va_copy(args_copy, *args); + /* Out-of-order write : header and data */ + buf_offset = ltt_write_event_header(&buf->a, &chan->a, + buf_offset, eID, data_size, + tsc, rflags); + ltt_write_event_data(buf, buf_offset, &closure, + serialize_private, stack_pos_ctx, + largest_align, fmt, &args_copy); + va_end(args_copy); + /* Out-of-order commit */ + ltt_commit_slot(buf, chan, buf_offset, data_size, slot_size); + } + /* + * asm volatile and "memory" clobber prevent the compiler from moving + * instructions out of the ltt nesting count. This is required to ensure + * that probe side-effects which can cause recursion (e.g. unforeseen + * traps, divisions by 0, ...) are triggered within the incremented + * nesting count section. + */ + barrier(); + __get_cpu_var(tracer_stack_pos) = stack_pos_ctx; + __get_cpu_var(ltt_nesting)--; + rcu_read_unlock_sched_notrace(); +} +EXPORT_SYMBOL_GPL(ltt_vtrace); + +notrace +void ltt_trace(const struct marker *mdata, void *probe_data, void *call_data, + const char *fmt, ...) +{ + va_list args; + + va_start(args, fmt); + ltt_vtrace(mdata, probe_data, call_data, fmt, &args); + va_end(args); +} +EXPORT_SYMBOL_GPL(ltt_trace); + +MODULE_LICENSE("GPL and additional rights"); +MODULE_AUTHOR("Mathieu Desnoyers"); +MODULE_DESCRIPTION("Linux Trace Toolkit Next Generation Serializer"); diff --git a/ltt-statedump.c b/ltt-statedump.c new file mode 100644 index 00000000..06ade69a --- /dev/null +++ b/ltt-statedump.c @@ -0,0 +1,441 @@ +/* + * Linux Trace Toolkit Kernel State Dump + * + * Copyright 2005 - + * Jean-Hugues Deschenes + * + * Changes: + * Eric Clement: Add listing of network IP interface + * 2006, 2007 Mathieu Desnoyers Fix kernel threads + * Various updates + * + * Dual LGPL v2.1/GPL v2 license. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "ltt-tracer.h" + +#ifdef CONFIG_GENERIC_HARDIRQS +#include +#endif + +#define NB_PROC_CHUNK 20 + +/* + * Protected by the trace lock. + */ +static struct delayed_work cpu_work[NR_CPUS]; +static DECLARE_WAIT_QUEUE_HEAD(statedump_wq); +static atomic_t kernel_threads_to_run; + +static void empty_cb(void *call_data) +{ +} + +static DEFINE_MUTEX(statedump_cb_mutex); +static void (*ltt_dump_kprobes_table_cb)(void *call_data) = empty_cb; + +enum lttng_thread_type { + LTTNG_USER_THREAD = 0, + LTTNG_KERNEL_THREAD = 1, +}; + +enum lttng_execution_mode { + LTTNG_USER_MODE = 0, + LTTNG_SYSCALL = 1, + LTTNG_TRAP = 2, + LTTNG_IRQ = 3, + LTTNG_SOFTIRQ = 4, + LTTNG_MODE_UNKNOWN = 5, +}; + +enum lttng_execution_submode { + LTTNG_NONE = 0, + LTTNG_UNKNOWN = 1, +}; + +enum lttng_process_status { + LTTNG_UNNAMED = 0, + LTTNG_WAIT_FORK = 1, + LTTNG_WAIT_CPU = 2, + LTTNG_EXIT = 3, + LTTNG_ZOMBIE = 4, + LTTNG_WAIT = 5, + LTTNG_RUN = 6, + LTTNG_DEAD = 7, +}; + +#ifdef CONFIG_INET +static void ltt_enumerate_device(struct ltt_probe_private_data *call_data, + struct net_device *dev) +{ + struct in_device *in_dev; + struct in_ifaddr *ifa; + + if (dev->flags & IFF_UP) { + in_dev = in_dev_get(dev); + if (in_dev) { + for (ifa = in_dev->ifa_list; ifa != NULL; + ifa = ifa->ifa_next) + __trace_mark(0, netif_state, + network_ipv4_interface, + call_data, + "name %s address #n4u%lu up %d", + dev->name, + (unsigned long)ifa->ifa_address, + 0); + in_dev_put(in_dev); + } + } else + __trace_mark(0, netif_state, network_ip_interface, + call_data, "name %s address #n4u%lu up %d", + dev->name, 0UL, 0); +} + +static inline int +ltt_enumerate_network_ip_interface(struct ltt_probe_private_data *call_data) +{ + struct net_device *dev; + + read_lock(&dev_base_lock); + for_each_netdev(&init_net, dev) + ltt_enumerate_device(call_data, dev); + read_unlock(&dev_base_lock); + + return 0; +} +#else /* CONFIG_INET */ +static inline int +ltt_enumerate_network_ip_interface(struct ltt_probe_private_data *call_data) +{ + return 0; +} +#endif /* CONFIG_INET */ + + +static inline void +ltt_enumerate_task_fd(struct ltt_probe_private_data *call_data, + struct task_struct *t, char *tmp) +{ + struct fdtable *fdt; + struct file *filp; + unsigned int i; + const unsigned char *path; + + if (!t->files) + return; + + spin_lock(&t->files->file_lock); + fdt = files_fdtable(t->files); + for (i = 0; i < fdt->max_fds; i++) { + filp = fcheck_files(t->files, i); + if (!filp) + continue; + path = d_path(&filp->f_path, tmp, PAGE_SIZE); + /* Make sure we give at least some info */ + __trace_mark(0, fd_state, file_descriptor, call_data, + "filename %s pid %d fd %u", + (IS_ERR(path))?(filp->f_dentry->d_name.name):(path), + t->pid, i); + } + spin_unlock(&t->files->file_lock); +} + +static inline int +ltt_enumerate_file_descriptors(struct ltt_probe_private_data *call_data) +{ + struct task_struct *t = &init_task; + char *tmp = (char *)__get_free_page(GFP_KERNEL); + + /* Enumerate active file descriptors */ + do { + read_lock(&tasklist_lock); + if (t != &init_task) + atomic_dec(&t->usage); + t = next_task(t); + atomic_inc(&t->usage); + read_unlock(&tasklist_lock); + task_lock(t); + ltt_enumerate_task_fd(call_data, t, tmp); + task_unlock(t); + } while (t != &init_task); + free_page((unsigned long)tmp); + return 0; +} + +static inline void +ltt_enumerate_task_vm_maps(struct ltt_probe_private_data *call_data, + struct task_struct *t) +{ + struct mm_struct *mm; + struct vm_area_struct *map; + unsigned long ino; + + /* get_task_mm does a task_lock... */ + mm = get_task_mm(t); + if (!mm) + return; + + map = mm->mmap; + if (map) { + down_read(&mm->mmap_sem); + while (map) { + if (map->vm_file) + ino = map->vm_file->f_dentry->d_inode->i_ino; + else + ino = 0; + __trace_mark(0, vm_state, vm_map, call_data, + "pid %d start %lu end %lu flags %lu " + "pgoff %lu inode %lu", + t->pid, map->vm_start, map->vm_end, + map->vm_flags, map->vm_pgoff << PAGE_SHIFT, + ino); + map = map->vm_next; + } + up_read(&mm->mmap_sem); + } + mmput(mm); +} + +static inline int +ltt_enumerate_vm_maps(struct ltt_probe_private_data *call_data) +{ + struct task_struct *t = &init_task; + + do { + read_lock(&tasklist_lock); + if (t != &init_task) + atomic_dec(&t->usage); + t = next_task(t); + atomic_inc(&t->usage); + read_unlock(&tasklist_lock); + ltt_enumerate_task_vm_maps(call_data, t); + } while (t != &init_task); + return 0; +} + +#ifdef CONFIG_GENERIC_HARDIRQS +static inline void list_interrupts(struct ltt_probe_private_data *call_data) +{ + unsigned int irq; + unsigned long flags = 0; + struct irq_desc *desc; + + /* needs irq_desc */ + for_each_irq_desc(irq, desc) { + struct irqaction *action; + const char *irq_chip_name = + desc->chip->name ? : "unnamed_irq_chip"; + + local_irq_save(flags); + raw_spin_lock(&desc->lock); + for (action = desc->action; action; action = action->next) + __trace_mark(0, irq_state, interrupt, call_data, + "name %s action %s irq_id %u", + irq_chip_name, action->name, irq); + raw_spin_unlock(&desc->lock); + local_irq_restore(flags); + } +} +#else +static inline void list_interrupts(struct ltt_probe_private_data *call_data) +{ +} +#endif + +static inline int +ltt_enumerate_process_states(struct ltt_probe_private_data *call_data) +{ + struct task_struct *t = &init_task; + struct task_struct *p = t; + enum lttng_process_status status; + enum lttng_thread_type type; + enum lttng_execution_mode mode; + enum lttng_execution_submode submode; + + do { + mode = LTTNG_MODE_UNKNOWN; + submode = LTTNG_UNKNOWN; + + read_lock(&tasklist_lock); + if (t != &init_task) { + atomic_dec(&t->usage); + t = next_thread(t); + } + if (t == p) { + p = next_task(t); + t = p; + } + atomic_inc(&t->usage); + read_unlock(&tasklist_lock); + + task_lock(t); + + if (t->exit_state == EXIT_ZOMBIE) + status = LTTNG_ZOMBIE; + else if (t->exit_state == EXIT_DEAD) + status = LTTNG_DEAD; + else if (t->state == TASK_RUNNING) { + /* Is this a forked child that has not run yet? */ + if (list_empty(&t->rt.run_list)) + status = LTTNG_WAIT_FORK; + else + /* + * All tasks are considered as wait_cpu; + * the viewer will sort out if the task was + * really running at this time. + */ + status = LTTNG_WAIT_CPU; + } else if (t->state & + (TASK_INTERRUPTIBLE | TASK_UNINTERRUPTIBLE)) { + /* Task is waiting for something to complete */ + status = LTTNG_WAIT; + } else + status = LTTNG_UNNAMED; + submode = LTTNG_NONE; + + /* + * Verification of t->mm is to filter out kernel threads; + * Viewer will further filter out if a user-space thread was + * in syscall mode or not. + */ + if (t->mm) + type = LTTNG_USER_THREAD; + else + type = LTTNG_KERNEL_THREAD; + + __trace_mark(0, task_state, process_state, call_data, + "pid %d parent_pid %d name %s type %d mode %d " + "submode %d status %d tgid %d", + t->pid, t->parent->pid, t->comm, + type, mode, submode, status, t->tgid); + task_unlock(t); + } while (t != &init_task); + + return 0; +} + +void ltt_statedump_register_kprobes_dump(void (*callback)(void *call_data)) +{ + mutex_lock(&statedump_cb_mutex); + ltt_dump_kprobes_table_cb = callback; + mutex_unlock(&statedump_cb_mutex); +} +EXPORT_SYMBOL_GPL(ltt_statedump_register_kprobes_dump); + +void ltt_statedump_unregister_kprobes_dump(void (*callback)(void *call_data)) +{ + mutex_lock(&statedump_cb_mutex); + ltt_dump_kprobes_table_cb = empty_cb; + mutex_unlock(&statedump_cb_mutex); +} +EXPORT_SYMBOL_GPL(ltt_statedump_unregister_kprobes_dump); + +void ltt_statedump_work_func(struct work_struct *work) +{ + if (atomic_dec_and_test(&kernel_threads_to_run)) + /* If we are the last thread, wake up do_ltt_statedump */ + wake_up(&statedump_wq); +} + +static int do_ltt_statedump(struct ltt_probe_private_data *call_data) +{ + int cpu; + struct module *cb_owner; + + printk(KERN_DEBUG "LTT state dump thread start\n"); + ltt_enumerate_process_states(call_data); + ltt_enumerate_file_descriptors(call_data); + list_modules(call_data); + ltt_enumerate_vm_maps(call_data); + list_interrupts(call_data); + ltt_enumerate_network_ip_interface(call_data); + ltt_dump_swap_files(call_data); + ltt_dump_sys_call_table(call_data); + ltt_dump_softirq_vec(call_data); + ltt_dump_idt_table(call_data); + + mutex_lock(&statedump_cb_mutex); + + cb_owner = __module_address((unsigned long)ltt_dump_kprobes_table_cb); + __module_get(cb_owner); + ltt_dump_kprobes_table_cb(call_data); + module_put(cb_owner); + + mutex_unlock(&statedump_cb_mutex); + + /* + * Fire off a work queue on each CPU. Their sole purpose in life + * is to guarantee that each CPU has been in a state where is was in + * syscall mode (i.e. not in a trap, an IRQ or a soft IRQ). + */ + get_online_cpus(); + atomic_set(&kernel_threads_to_run, num_online_cpus()); + for_each_online_cpu(cpu) { + INIT_DELAYED_WORK(&cpu_work[cpu], ltt_statedump_work_func); + schedule_delayed_work_on(cpu, &cpu_work[cpu], 0); + } + /* Wait for all threads to run */ + __wait_event(statedump_wq, (atomic_read(&kernel_threads_to_run) != 0)); + put_online_cpus(); + /* Our work is done */ + printk(KERN_DEBUG "LTT state dump end\n"); + __trace_mark(0, global_state, statedump_end, + call_data, MARK_NOARGS); + return 0; +} + +/* + * Called with trace lock held. + */ +int ltt_statedump_start(struct ltt_trace *trace) +{ + struct ltt_probe_private_data call_data; + printk(KERN_DEBUG "LTT state dump begin\n"); + + call_data.trace = trace; + call_data.serializer = NULL; + return do_ltt_statedump(&call_data); +} + +static int __init statedump_init(void) +{ + int ret; + printk(KERN_DEBUG "LTT : State dump init\n"); + ret = ltt_module_register(LTT_FUNCTION_STATEDUMP, + ltt_statedump_start, THIS_MODULE); + return ret; +} + +static void __exit statedump_exit(void) +{ + printk(KERN_DEBUG "LTT : State dump exit\n"); + ltt_module_unregister(LTT_FUNCTION_STATEDUMP); +} + +module_init(statedump_init) +module_exit(statedump_exit) + +MODULE_LICENSE("GPL and additional rights"); +MODULE_AUTHOR("Jean-Hugues Deschenes"); +MODULE_DESCRIPTION("Linux Trace Toolkit Statedump"); diff --git a/ltt-trace-control.c b/ltt-trace-control.c new file mode 100644 index 00000000..9d6d239b --- /dev/null +++ b/ltt-trace-control.c @@ -0,0 +1,1426 @@ +/* + * LTT trace control module over debugfs. + * + * Copyright 2008 - Zhaolei + * + * Copyright 2009 - Gui Jianfeng + * Make mark-control work in debugfs + * + * Dual LGPL v2.1/GPL v2 license. + */ + +/* + * Todo: + * Impl read operations for control file to read attributes + * Create a README file in ltt control dir, for display help info + */ + +#include +#include +#include +#include +#include +#include +#include + +#include "ltt-tracer.h" + +#define LTT_CONTROL_DIR "control" +#define MARKERS_CONTROL_DIR "markers" +#define LTT_SETUP_TRACE_FILE "setup_trace" +#define LTT_DESTROY_TRACE_FILE "destroy_trace" + +#define LTT_WRITE_MAXLEN (128) + +struct dentry *ltt_control_dir, *ltt_setup_trace_file, *ltt_destroy_trace_file, + *markers_control_dir; + +/* + * the traces_lock nests inside control_lock. + * control_lock protects the consistency of directories presented in ltt + * directory. + */ +static DEFINE_MUTEX(control_lock); + +/* + * big note about locking for marker control files : + * If a marker control file is added/removed manually racing with module + * load/unload, there may be warning messages appearing, but those two + * operations should be able to execute concurrently without any lock + * synchronizing their operation one wrt another. + * Locking the marker mutex, module mutex and also keeping a mutex here + * from mkdir/rmdir _and_ from the notifier called from module load/unload makes + * life miserable and just asks for deadlocks. + */ + +/* + * lookup a file/dir in parent dir. + * only designed to work well for debugfs. + * (although it maybe ok for other fs) + * + * return: + * file/dir's dentry on success + * NULL on failure + */ +static struct dentry *dir_lookup(struct dentry *parent, const char *name) +{ + struct qstr q; + struct dentry *d; + + q.name = name; + q.len = strlen(name); + q.hash = full_name_hash(q.name, q.len); + + d = d_lookup(parent, &q); + if (d) + dput(d); + + return d; +} + + +static ssize_t alloc_write(struct file *file, const char __user *user_buf, + size_t count, loff_t *ppos) +{ + int err = 0; + int buf_size; + char *buf = (char *)__get_free_page(GFP_KERNEL); + char *cmd = (char *)__get_free_page(GFP_KERNEL); + + buf_size = min_t(size_t, count, PAGE_SIZE - 1); + err = copy_from_user(buf, user_buf, buf_size); + if (err) + goto err_copy_from_user; + buf[buf_size] = 0; + + if (sscanf(buf, "%s", cmd) != 1) { + err = -EPERM; + goto err_get_cmd; + } + + if ((cmd[0] != 'Y' && cmd[0] != 'y' && cmd[0] != '1') || cmd[1]) { + err = -EPERM; + goto err_bad_cmd; + } + + err = ltt_trace_alloc(file->f_dentry->d_parent->d_name.name); + if (IS_ERR_VALUE(err)) { + printk(KERN_ERR "alloc_write: ltt_trace_alloc failed: %d\n", + err); + goto err_alloc_trace; + } + + free_page((unsigned long)buf); + free_page((unsigned long)cmd); + return count; + +err_alloc_trace: +err_bad_cmd: +err_get_cmd: +err_copy_from_user: + free_page((unsigned long)buf); + free_page((unsigned long)cmd); + return err; +} + +static const struct file_operations ltt_alloc_operations = { + .write = alloc_write, +}; + + +static ssize_t enabled_write(struct file *file, const char __user *user_buf, + size_t count, loff_t *ppos) +{ + int err = 0; + int buf_size; + char *buf = (char *)__get_free_page(GFP_KERNEL); + char *cmd = (char *)__get_free_page(GFP_KERNEL); + + buf_size = min_t(size_t, count, PAGE_SIZE - 1); + err = copy_from_user(buf, user_buf, buf_size); + if (err) + goto err_copy_from_user; + buf[buf_size] = 0; + + if (sscanf(buf, "%s", cmd) != 1) { + err = -EPERM; + goto err_get_cmd; + } + + if (cmd[1]) { + err = -EPERM; + goto err_bad_cmd; + } + + switch (cmd[0]) { + case 'Y': + case 'y': + case '1': + err = ltt_trace_start(file->f_dentry->d_parent->d_name.name); + if (IS_ERR_VALUE(err)) { + printk(KERN_ERR + "enabled_write: ltt_trace_start failed: %d\n", + err); + err = -EPERM; + goto err_start_trace; + } + break; + case 'N': + case 'n': + case '0': + err = ltt_trace_stop(file->f_dentry->d_parent->d_name.name); + if (IS_ERR_VALUE(err)) { + printk(KERN_ERR + "enabled_write: ltt_trace_stop failed: %d\n", + err); + err = -EPERM; + goto err_stop_trace; + } + break; + default: + err = -EPERM; + goto err_bad_cmd; + } + + free_page((unsigned long)buf); + free_page((unsigned long)cmd); + return count; + +err_stop_trace: +err_start_trace: +err_bad_cmd: +err_get_cmd: +err_copy_from_user: + free_page((unsigned long)buf); + free_page((unsigned long)cmd); + return err; +} + +static const struct file_operations ltt_enabled_operations = { + .write = enabled_write, +}; + + +static ssize_t trans_write(struct file *file, const char __user *user_buf, + size_t count, loff_t *ppos) +{ + char *buf = (char *)__get_free_page(GFP_KERNEL); + char *trans_name = (char *)__get_free_page(GFP_KERNEL); + int err = 0; + int buf_size; + + buf_size = min_t(size_t, count, PAGE_SIZE - 1); + err = copy_from_user(buf, user_buf, buf_size); + if (err) + goto err_copy_from_user; + buf[buf_size] = 0; + + if (sscanf(buf, "%s", trans_name) != 1) { + err = -EPERM; + goto err_get_transname; + } + + err = ltt_trace_set_type(file->f_dentry->d_parent->d_name.name, + trans_name); + if (IS_ERR_VALUE(err)) { + printk(KERN_ERR "trans_write: ltt_trace_set_type failed: %d\n", + err); + goto err_set_trans; + } + + free_page((unsigned long)buf); + free_page((unsigned long)trans_name); + return count; + +err_set_trans: +err_get_transname: +err_copy_from_user: + free_page((unsigned long)buf); + free_page((unsigned long)trans_name); + return err; +} + +static const struct file_operations ltt_trans_operations = { + .write = trans_write, +}; + + +static ssize_t channel_subbuf_num_write(struct file *file, + const char __user *user_buf, size_t count, loff_t *ppos) +{ + int err = 0; + int buf_size; + unsigned int num; + const char *channel_name; + const char *trace_name; + char *buf = (char *)__get_free_page(GFP_KERNEL); + + buf_size = min_t(size_t, count, PAGE_SIZE - 1); + err = copy_from_user(buf, user_buf, buf_size); + if (err) + goto err_copy_from_user; + buf[buf_size] = 0; + + if (sscanf(buf, "%u", &num) != 1) { + err = -EPERM; + goto err_get_number; + } + + channel_name = file->f_dentry->d_parent->d_name.name; + trace_name = file->f_dentry->d_parent->d_parent->d_parent->d_name.name; + + err = ltt_trace_set_channel_subbufcount(trace_name, channel_name, num); + if (IS_ERR_VALUE(err)) { + printk(KERN_ERR "channel_subbuf_num_write: " + "ltt_trace_set_channel_subbufcount failed: %d\n", err); + goto err_set_subbufcount; + } + + free_page((unsigned long)buf); + return count; + +err_set_subbufcount: +err_get_number: +err_copy_from_user: + free_page((unsigned long)buf); + return err; +} + +static const struct file_operations ltt_channel_subbuf_num_operations = { + .write = channel_subbuf_num_write, +}; + + +static +ssize_t channel_subbuf_size_write(struct file *file, + const char __user *user_buf, + size_t count, loff_t *ppos) +{ + int err = 0; + int buf_size; + unsigned int num; + const char *channel_name; + const char *trace_name; + char *buf = (char *)__get_free_page(GFP_KERNEL); + + buf_size = min_t(size_t, count, PAGE_SIZE - 1); + err = copy_from_user(buf, user_buf, buf_size); + if (err) + goto err_copy_from_user; + buf[buf_size] = 0; + + if (sscanf(buf, "%u", &num) != 1) { + err = -EPERM; + goto err_get_number; + } + + channel_name = file->f_dentry->d_parent->d_name.name; + trace_name = file->f_dentry->d_parent->d_parent->d_parent->d_name.name; + + err = ltt_trace_set_channel_subbufsize(trace_name, channel_name, num); + if (IS_ERR_VALUE(err)) { + printk(KERN_ERR "channel_subbuf_size_write: " + "ltt_trace_set_channel_subbufsize failed: %d\n", err); + goto err_set_subbufsize; + } + + free_page((unsigned long)buf); + return count; + +err_set_subbufsize: +err_get_number: +err_copy_from_user: + free_page((unsigned long)buf); + return err; +} + +static const struct file_operations ltt_channel_subbuf_size_operations = { + .write = channel_subbuf_size_write, +}; + +static +ssize_t channel_switch_timer_write(struct file *file, + const char __user *user_buf, + size_t count, loff_t *ppos) +{ + int err = 0; + int buf_size; + unsigned long num; + const char *channel_name; + const char *trace_name; + char *buf = (char *)__get_free_page(GFP_KERNEL); + + buf_size = min_t(size_t, count, PAGE_SIZE - 1); + err = copy_from_user(buf, user_buf, buf_size); + if (err) + goto err_copy_from_user; + buf[buf_size] = 0; + + if (sscanf(buf, "%lu", &num) != 1) { + err = -EPERM; + goto err_get_number; + } + + channel_name = file->f_dentry->d_parent->d_name.name; + trace_name = file->f_dentry->d_parent->d_parent->d_parent->d_name.name; + + /* Convert from ms to jiffies */ + num = msecs_to_jiffies(num); + + err = ltt_trace_set_channel_switch_timer(trace_name, channel_name, num); + if (IS_ERR_VALUE(err)) { + printk(KERN_ERR "channel_switch_timer_write: " + "ltt_trace_set_channel_switch_timer failed: %d\n", err); + goto err_set_switch_timer; + } + + free_page((unsigned long)buf); + return count; + +err_set_switch_timer: +err_get_number: +err_copy_from_user: + free_page((unsigned long)buf); + return err; +} + +static struct file_operations ltt_channel_switch_timer_operations = { + .write = channel_switch_timer_write, +}; + +static +ssize_t channel_overwrite_write(struct file *file, + const char __user *user_buf, size_t count, + loff_t *ppos) +{ + int err = 0; + int buf_size; + const char *channel_name; + const char *trace_name; + char *buf = (char *)__get_free_page(GFP_KERNEL); + char *cmd = (char *)__get_free_page(GFP_KERNEL); + + buf_size = min_t(size_t, count, PAGE_SIZE - 1); + err = copy_from_user(buf, user_buf, buf_size); + if (err) + goto err_copy_from_user; + buf[buf_size] = 0; + + if (sscanf(buf, "%s", cmd) != 1) { + err = -EPERM; + goto err_get_cmd; + } + + if (cmd[1]) { + err = -EPERM; + goto err_bad_cmd; + } + + channel_name = file->f_dentry->d_parent->d_name.name; + trace_name = file->f_dentry->d_parent->d_parent->d_parent->d_name.name; + + switch (cmd[0]) { + case 'Y': + case 'y': + case '1': + err = ltt_trace_set_channel_overwrite(trace_name, channel_name, + 1); + if (IS_ERR_VALUE(err)) { + printk(KERN_ERR "channel_overwrite_write: " + "ltt_trace_set_channel_overwrite failed: %d\n", + err); + goto err_set_subbufsize; + } + break; + case 'N': + case 'n': + case '0': + err = ltt_trace_set_channel_overwrite(trace_name, channel_name, + 0); + if (IS_ERR_VALUE(err)) { + printk(KERN_ERR "channel_overwrite_write: " + "ltt_trace_set_channel_overwrite failed: %d\n", + err); + goto err_set_subbufsize; + } + break; + default: + err = -EPERM; + goto err_bad_cmd; + } + + free_page((unsigned long)buf); + free_page((unsigned long)cmd); + return count; + +err_set_subbufsize: +err_bad_cmd: +err_get_cmd: +err_copy_from_user: + free_page((unsigned long)buf); + free_page((unsigned long)cmd); + return err; +} + +static const struct file_operations ltt_channel_overwrite_operations = { + .write = channel_overwrite_write, +}; + + +static +ssize_t channel_enable_write(struct file *file, + const char __user *user_buf, size_t count, + loff_t *ppos) +{ + int err = 0; + int buf_size; + const char *channel_name; + const char *trace_name; + char *buf = (char *)__get_free_page(GFP_KERNEL); + char *cmd = (char *)__get_free_page(GFP_KERNEL); + + buf_size = min_t(size_t, count, PAGE_SIZE - 1); + err = copy_from_user(buf, user_buf, buf_size); + if (err) + goto err_copy_from_user; + buf[buf_size] = 0; + + if (sscanf(buf, "%s", cmd) != 1) { + err = -EPERM; + goto err_get_cmd; + } + + if (cmd[1]) { + err = -EPERM; + goto err_bad_cmd; + } + + channel_name = file->f_dentry->d_parent->d_name.name; + trace_name = file->f_dentry->d_parent->d_parent->d_parent->d_name.name; + + switch (cmd[0]) { + case 'Y': + case 'y': + case '1': + err = ltt_trace_set_channel_enable(trace_name, channel_name, + 1); + if (IS_ERR_VALUE(err)) { + printk(KERN_ERR "channel_enable_write: " + "ltt_trace_set_channel_enable failed: %d\n", + err); + goto err_set_subbufsize; + } + break; + case 'N': + case 'n': + case '0': + err = ltt_trace_set_channel_enable(trace_name, channel_name, + 0); + if (IS_ERR_VALUE(err)) { + printk(KERN_ERR "channel_enable_write: " + "ltt_trace_set_channel_enable failed: %d\n", + err); + goto err_set_subbufsize; + } + break; + default: + err = -EPERM; + goto err_bad_cmd; + } + + free_page((unsigned long)buf); + free_page((unsigned long)cmd); + return count; + +err_set_subbufsize: +err_bad_cmd: +err_get_cmd: +err_copy_from_user: + free_page((unsigned long)buf); + free_page((unsigned long)cmd); + return err; +} + +static const struct file_operations ltt_channel_enable_operations = { + .write = channel_enable_write, +}; + + +static int _create_trace_control_dir(const char *trace_name, + struct ltt_trace *trace) +{ + int err; + struct dentry *trace_root, *channel_root; + struct dentry *tmp_den; + int i; + + /* debugfs/control/trace_name */ + trace_root = debugfs_create_dir(trace_name, ltt_control_dir); + if (IS_ERR(trace_root) || !trace_root) { + printk(KERN_ERR "_create_trace_control_dir: " + "create control root dir of %s failed\n", trace_name); + err = -ENOMEM; + goto err_create_trace_root; + } + + /* debugfs/control/trace_name/alloc */ + tmp_den = debugfs_create_file("alloc", S_IWUSR, trace_root, NULL, + <t_alloc_operations); + if (IS_ERR(tmp_den) || !tmp_den) { + printk(KERN_ERR "_create_trace_control_dir: " + "create file of alloc failed\n"); + err = -ENOMEM; + goto err_create_subdir; + } + + /* debugfs/control/trace_name/trans */ + tmp_den = debugfs_create_file("trans", S_IWUSR, trace_root, NULL, + <t_trans_operations); + if (IS_ERR(tmp_den) || !tmp_den) { + printk(KERN_ERR "_create_trace_control_dir: " + "create file of trans failed\n"); + err = -ENOMEM; + goto err_create_subdir; + } + + /* debugfs/control/trace_name/enabled */ + tmp_den = debugfs_create_file("enabled", S_IWUSR, trace_root, NULL, + <t_enabled_operations); + if (IS_ERR(tmp_den) || !tmp_den) { + printk(KERN_ERR "_create_trace_control_dir: " + "create file of enabled failed\n"); + err = -ENOMEM; + goto err_create_subdir; + } + + /* debugfs/control/trace_name/channel/ */ + channel_root = debugfs_create_dir("channel", trace_root); + if (IS_ERR(channel_root) || !channel_root) { + printk(KERN_ERR "_create_trace_control_dir: " + "create dir of channel failed\n"); + err = -ENOMEM; + goto err_create_subdir; + } + + /* + * Create dir and files in debugfs/ltt/control/trace_name/channel/ + * Following things(without <>) will be created: + * `-- + * `-- + * `-- + * |-- + * | |-- enable + * | |-- overwrite + * | |-- subbuf_num + * | |-- subbuf_size + * | `-- switch_timer + * `-- ... + */ + + for (i = 0; i < trace->nr_channels; i++) { + struct dentry *channel_den; + struct ltt_chan *chan; + + chan = &trace->channels[i]; + if (!chan->active) + continue; + channel_den = debugfs_create_dir(chan->a.filename, + channel_root); + if (IS_ERR(channel_den) || !channel_den) { + printk(KERN_ERR "_create_trace_control_dir: " + "create channel dir of %s failed\n", + chan->a.filename); + err = -ENOMEM; + goto err_create_subdir; + } + + tmp_den = debugfs_create_file("subbuf_num", S_IWUSR, + channel_den, NULL, + <t_channel_subbuf_num_operations); + if (IS_ERR(tmp_den) || !tmp_den) { + printk(KERN_ERR "_create_trace_control_dir: " + "create subbuf_num in %s failed\n", + chan->a.filename); + err = -ENOMEM; + goto err_create_subdir; + } + + tmp_den = debugfs_create_file("subbuf_size", S_IWUSR, + channel_den, NULL, + <t_channel_subbuf_size_operations); + if (IS_ERR(tmp_den) || !tmp_den) { + printk(KERN_ERR "_create_trace_control_dir: " + "create subbuf_size in %s failed\n", + chan->a.filename); + err = -ENOMEM; + goto err_create_subdir; + } + + tmp_den = debugfs_create_file("enable", S_IWUSR, channel_den, + NULL, + <t_channel_enable_operations); + if (IS_ERR(tmp_den) || !tmp_den) { + printk(KERN_ERR "_create_trace_control_dir: " + "create enable in %s failed\n", + chan->a.filename); + err = -ENOMEM; + goto err_create_subdir; + } + + tmp_den = debugfs_create_file("overwrite", S_IWUSR, channel_den, + NULL, + <t_channel_overwrite_operations); + if (IS_ERR(tmp_den) || !tmp_den) { + printk(KERN_ERR "_create_trace_control_dir: " + "create overwrite in %s failed\n", + chan->a.filename); + err = -ENOMEM; + goto err_create_subdir; + } + + tmp_den = debugfs_create_file("switch_timer", S_IWUSR, + channel_den, NULL, + <t_channel_switch_timer_operations); + if (IS_ERR(tmp_den) || !tmp_den) { + printk(KERN_ERR "_create_trace_control_dir: " + "create switch_timer in %s failed\n", + chan->a.filename); + err = -ENOMEM; + goto err_create_subdir; + } + } + + return 0; + +err_create_subdir: + debugfs_remove_recursive(trace_root); +err_create_trace_root: + return err; +} + +static +ssize_t setup_trace_write(struct file *file, const char __user *user_buf, + size_t count, loff_t *ppos) +{ + int err = 0; + int buf_size; + struct ltt_trace *trace; + char *buf = (char *)__get_free_page(GFP_KERNEL); + char *trace_name = (char *)__get_free_page(GFP_KERNEL); + + buf_size = min_t(size_t, count, PAGE_SIZE - 1); + err = copy_from_user(buf, user_buf, buf_size); + if (err) + goto err_copy_from_user; + buf[buf_size] = 0; + + if (sscanf(buf, "%s", trace_name) != 1) { + err = -EPERM; + goto err_get_tracename; + } + + mutex_lock(&control_lock); + ltt_lock_traces(); + + err = _ltt_trace_setup(trace_name); + if (IS_ERR_VALUE(err)) { + printk(KERN_ERR + "setup_trace_write: ltt_trace_setup failed: %d\n", err); + goto err_setup_trace; + } + trace = _ltt_trace_find_setup(trace_name); + BUG_ON(!trace); + err = _create_trace_control_dir(trace_name, trace); + if (IS_ERR_VALUE(err)) { + printk(KERN_ERR "setup_trace_write: " + "_create_trace_control_dir failed: %d\n", err); + goto err_create_trace_control_dir; + } + + ltt_unlock_traces(); + mutex_unlock(&control_lock); + + free_page((unsigned long)buf); + free_page((unsigned long)trace_name); + return count; + +err_create_trace_control_dir: + ltt_trace_destroy(trace_name); +err_setup_trace: + ltt_unlock_traces(); + mutex_unlock(&control_lock); +err_get_tracename: +err_copy_from_user: + free_page((unsigned long)buf); + free_page((unsigned long)trace_name); + return err; +} + +static const struct file_operations ltt_setup_trace_operations = { + .write = setup_trace_write, +}; + +static +ssize_t destroy_trace_write(struct file *file, const char __user *user_buf, + size_t count, loff_t *ppos) +{ + struct dentry *trace_den; + int buf_size; + int err = 0; + char *buf = (char *)__get_free_page(GFP_KERNEL); + char *trace_name = (char *)__get_free_page(GFP_KERNEL); + + buf_size = min_t(size_t, count, PAGE_SIZE - 1); + err = copy_from_user(buf, user_buf, buf_size); + if (err) + goto err_copy_from_user; + buf[buf_size] = 0; + + if (sscanf(buf, "%s", trace_name) != 1) { + err = -EPERM; + goto err_get_tracename; + } + + mutex_lock(&control_lock); + + err = ltt_trace_destroy(trace_name); + if (IS_ERR_VALUE(err)) { + printk(KERN_ERR + "destroy_trace_write: ltt_trace_destroy failed: %d\n", + err); + err = -EPERM; + goto err_destroy_trace; + } + + trace_den = dir_lookup(ltt_control_dir, trace_name); + if (!trace_den) { + printk(KERN_ERR + "destroy_trace_write: lookup for %s's dentry failed\n", + trace_name); + err = -ENOENT; + goto err_get_dentry; + } + + debugfs_remove_recursive(trace_den); + + mutex_unlock(&control_lock); + + free_page((unsigned long)buf); + free_page((unsigned long)trace_name); + return count; + +err_get_dentry: +err_destroy_trace: + mutex_unlock(&control_lock); +err_get_tracename: +err_copy_from_user: + free_page((unsigned long)buf); + free_page((unsigned long)trace_name); + return err; +} + +static const struct file_operations ltt_destroy_trace_operations = { + .write = destroy_trace_write, +}; + +static void init_marker_dir(struct dentry *dentry, + const struct inode_operations *opt) +{ + dentry->d_inode->i_op = opt; +} + +static +ssize_t marker_enable_read(struct file *filp, char __user *ubuf, + size_t cnt, loff_t *ppos) +{ + char *buf; + const char *channel, *marker; + int len, enabled, present; + + marker = filp->f_dentry->d_parent->d_name.name; + channel = filp->f_dentry->d_parent->d_parent->d_name.name; + + len = 0; + buf = (char *)__get_free_page(GFP_KERNEL); + + /* + * Note: we cannot take the marker lock to make these two checks + * atomic, because the marker mutex nests inside the module mutex, taken + * inside the marker present check. + */ + enabled = is_marker_enabled(channel, marker); + present = is_marker_present(channel, marker); + + if (enabled && present) + len = snprintf(buf, PAGE_SIZE, "%d\n", 1); + else if (enabled && !present) + len = snprintf(buf, PAGE_SIZE, "%d\n", 2); + else + len = snprintf(buf, PAGE_SIZE, "%d\n", 0); + + + if (len >= PAGE_SIZE) { + len = PAGE_SIZE; + buf[PAGE_SIZE] = '\0'; + } + len = simple_read_from_buffer(ubuf, cnt, ppos, buf, len); + free_page((unsigned long)buf); + + return len; +} + +static +ssize_t marker_enable_write(struct file *filp, const char __user *ubuf, + size_t cnt, loff_t *ppos) +{ + char *buf = (char *)__get_free_page(GFP_KERNEL); + int buf_size; + ssize_t ret = 0; + const char *channel, *marker; + + marker = filp->f_dentry->d_parent->d_name.name; + channel = filp->f_dentry->d_parent->d_parent->d_name.name; + + buf_size = min_t(size_t, cnt, PAGE_SIZE - 1); + ret = copy_from_user(buf, ubuf, buf_size); + if (ret) + goto end; + + buf[buf_size] = 0; + + switch (buf[0]) { + case 'Y': + case 'y': + case '1': + ret = ltt_marker_connect(channel, marker, "default"); + if (ret) + goto end; + break; + case 'N': + case 'n': + case '0': + ret = ltt_marker_disconnect(channel, marker, "default"); + if (ret) + goto end; + break; + default: + ret = -EPERM; + goto end; + } + ret = cnt; +end: + free_page((unsigned long)buf); + return ret; +} + +static const struct file_operations enable_fops = { + .read = marker_enable_read, + .write = marker_enable_write, +}; + +/* + * In practice, the output size should never be larger than 4096 kB. If it + * ever happens, the output will simply be truncated. + */ +static +ssize_t marker_info_read(struct file *filp, char __user *ubuf, + size_t cnt, loff_t *ppos) +{ + char *buf; + const char *channel, *marker; + int len; + struct marker_iter iter; + + marker = filp->f_dentry->d_parent->d_name.name; + channel = filp->f_dentry->d_parent->d_parent->d_name.name; + + len = 0; + buf = (char *)__get_free_page(GFP_KERNEL); + + if (is_marker_enabled(channel, marker) && + !is_marker_present(channel, marker)) { + len += snprintf(buf + len, PAGE_SIZE - len, + "Marker Pre-enabled\n"); + goto out; + } + + marker_iter_reset(&iter); + marker_iter_start(&iter); + for (; iter.marker != NULL; marker_iter_next(&iter)) { + if (!strcmp(iter.marker->channel, channel) && + !strcmp(iter.marker->name, marker)) + len += snprintf(buf + len, PAGE_SIZE - len, + "Location: %s\n" + "format: \"%s\"\nstate: %d\n" + "event_id: %hu\n" + "call: 0x%p\n" + "probe %s : 0x%p\n\n", +#ifdef CONFIG_MODULES + iter.module ? iter.module->name : +#endif + "Core Kernel", + iter.marker->format, + _imv_read(iter.marker->state), + iter.marker->event_id, + iter.marker->call, + iter.marker->ptype ? + "multi" : "single", iter.marker->ptype ? + (void *)iter.marker->multi : + (void *)iter.marker->single.func); + if (len >= PAGE_SIZE) + break; + } + marker_iter_stop(&iter); + +out: + if (len >= PAGE_SIZE) { + len = PAGE_SIZE; + buf[PAGE_SIZE] = '\0'; + } + + len = simple_read_from_buffer(ubuf, cnt, ppos, buf, len); + free_page((unsigned long)buf); + + return len; +} + +static const struct file_operations info_fops = { + .read = marker_info_read, +}; + +static int marker_mkdir(struct inode *dir, struct dentry *dentry, int mode) +{ + struct dentry *marker_d, *enable_d, *info_d, *channel_d; + int ret; + + ret = 0; + channel_d = (struct dentry *)dir->i_private; + mutex_unlock(&dir->i_mutex); + + marker_d = debugfs_create_dir(dentry->d_name.name, + channel_d); + if (IS_ERR(marker_d)) { + ret = PTR_ERR(marker_d); + goto out; + } + + enable_d = debugfs_create_file("enable", 0644, marker_d, + NULL, &enable_fops); + if (IS_ERR(enable_d) || !enable_d) { + printk(KERN_ERR + "%s: create file of %s failed\n", + __func__, "enable"); + ret = -ENOMEM; + goto remove_marker_dir; + } + + info_d = debugfs_create_file("info", 0644, marker_d, + NULL, &info_fops); + if (IS_ERR(info_d) || !info_d) { + printk(KERN_ERR + "%s: create file of %s failed\n", + __func__, "info"); + ret = -ENOMEM; + goto remove_enable_dir; + } + + goto out; + +remove_enable_dir: + debugfs_remove(enable_d); +remove_marker_dir: + debugfs_remove(marker_d); +out: + mutex_lock_nested(&dir->i_mutex, I_MUTEX_PARENT); + return ret; +} + +static int marker_rmdir(struct inode *dir, struct dentry *dentry) +{ + struct dentry *marker_d, *channel_d; + const char *channel, *name; + int ret, enabled, present; + + ret = 0; + + channel_d = (struct dentry *)dir->i_private; + channel = channel_d->d_name.name; + + marker_d = dir_lookup(channel_d, dentry->d_name.name); + + if (!marker_d) { + ret = -ENOENT; + goto out; + } + + name = marker_d->d_name.name; + + enabled = is_marker_enabled(channel, name); + present = is_marker_present(channel, name); + + if (present || (!present && enabled)) { + ret = -EPERM; + goto out; + } + + mutex_unlock(&dir->i_mutex); + mutex_unlock(&dentry->d_inode->i_mutex); + debugfs_remove_recursive(marker_d); + mutex_lock_nested(&dir->i_mutex, I_MUTEX_PARENT); + mutex_lock(&dentry->d_inode->i_mutex); +out: + return ret; +} + +const struct inode_operations channel_dir_opt = { + .lookup = simple_lookup, + .mkdir = marker_mkdir, + .rmdir = marker_rmdir, +}; + +static int channel_mkdir(struct inode *dir, struct dentry *dentry, int mode) +{ + struct dentry *channel_d; + int ret; + + ret = 0; + mutex_unlock(&dir->i_mutex); + + channel_d = debugfs_create_dir(dentry->d_name.name, + markers_control_dir); + if (IS_ERR(channel_d)) { + ret = PTR_ERR(channel_d); + goto out; + } + + channel_d->d_inode->i_private = (void *)channel_d; + init_marker_dir(channel_d, &channel_dir_opt); +out: + mutex_lock_nested(&dir->i_mutex, I_MUTEX_PARENT); + return ret; +} + +static int channel_rmdir(struct inode *dir, struct dentry *dentry) +{ + struct dentry *channel_d; + int ret; + + ret = 0; + + channel_d = dir_lookup(markers_control_dir, dentry->d_name.name); + if (!channel_d) { + ret = -ENOENT; + goto out; + } + + if (list_empty(&channel_d->d_subdirs)) { + mutex_unlock(&dir->i_mutex); + mutex_unlock(&dentry->d_inode->i_mutex); + debugfs_remove(channel_d); + mutex_lock_nested(&dir->i_mutex, I_MUTEX_PARENT); + mutex_lock(&dentry->d_inode->i_mutex); + } else + ret = -EPERM; + +out: + return ret; +} + +const struct inode_operations root_dir_opt = { + .lookup = simple_lookup, + .mkdir = channel_mkdir, + .rmdir = channel_rmdir +}; + +static int build_marker_file(struct marker *marker) +{ + struct dentry *channel_d, *marker_d, *enable_d, *info_d; + int err; + + channel_d = dir_lookup(markers_control_dir, marker->channel); + if (!channel_d) { + channel_d = debugfs_create_dir(marker->channel, + markers_control_dir); + if (IS_ERR(channel_d) || !channel_d) { + printk(KERN_ERR + "%s: build channel dir of %s failed\n", + __func__, marker->channel); + err = -ENOMEM; + goto err_build_fail; + } + channel_d->d_inode->i_private = (void *)channel_d; + init_marker_dir(channel_d, &channel_dir_opt); + } + + marker_d = dir_lookup(channel_d, marker->name); + if (!marker_d) { + marker_d = debugfs_create_dir(marker->name, channel_d); + if (IS_ERR(marker_d) || !marker_d) { + printk(KERN_ERR + "%s: marker dir of %s failed\n", + __func__, marker->name); + err = -ENOMEM; + goto err_build_fail; + } + } + + enable_d = dir_lookup(marker_d, "enable"); + if (!enable_d) { + enable_d = debugfs_create_file("enable", 0644, marker_d, + NULL, &enable_fops); + if (IS_ERR(enable_d) || !enable_d) { + printk(KERN_ERR + "%s: create file of %s failed\n", + __func__, "enable"); + err = -ENOMEM; + goto err_build_fail; + } + } + + info_d = dir_lookup(marker_d, "info"); + if (!info_d) { + info_d = debugfs_create_file("info", 0444, marker_d, + NULL, &info_fops); + if (IS_ERR(info_d) || !info_d) { + printk(KERN_ERR + "%s: create file of %s failed\n", + __func__, "enable"); + err = -ENOMEM; + goto err_build_fail; + } + } + + return 0; + +err_build_fail: + return err; +} + +static int build_marker_control_files(void) +{ + struct marker_iter iter; + int err; + + err = 0; + if (!markers_control_dir) + return -EEXIST; + + marker_iter_reset(&iter); + marker_iter_start(&iter); + for (; iter.marker != NULL; marker_iter_next(&iter)) { + err = build_marker_file(iter.marker); + if (err) + goto out; + } + marker_iter_stop(&iter); + +out: + return err; +} + +#ifdef CONFIG_MODULES +static int remove_marker_control_dir(struct module *mod, struct marker *marker) +{ + struct dentry *channel_d, *marker_d; + const char *channel, *name; + int count; + struct marker_iter iter; + + count = 0; + + channel_d = dir_lookup(markers_control_dir, marker->channel); + if (!channel_d) + return -ENOENT; + channel = channel_d->d_name.name; + + marker_d = dir_lookup(channel_d, marker->name); + if (!marker_d) + return -ENOENT; + name = marker_d->d_name.name; + + marker_iter_reset(&iter); + marker_iter_start(&iter); + for (; iter.marker != NULL; marker_iter_next(&iter)) { + if (!strcmp(iter.marker->channel, channel) && + !strcmp(iter.marker->name, name) && mod != iter.module) + count++; + } + + if (count > 0) + goto end; + + debugfs_remove_recursive(marker_d); + if (list_empty(&channel_d->d_subdirs)) + debugfs_remove(channel_d); + +end: + marker_iter_stop(&iter); + return 0; +} + +static void cleanup_control_dir(struct module *mod, struct marker *begin, + struct marker *end) +{ + struct marker *iter; + + if (!markers_control_dir) + return; + + for (iter = begin; iter < end; iter++) + remove_marker_control_dir(mod, iter); + + return; +} + +static void build_control_dir(struct module *mod, struct marker *begin, + struct marker *end) +{ + struct marker *iter; + int err; + + err = 0; + if (!markers_control_dir) + return; + + for (iter = begin; iter < end; iter++) { + err = build_marker_file(iter); + if (err) + goto err_build_fail; + } + + return; +err_build_fail: + cleanup_control_dir(mod, begin, end); +} + +static int module_notify(struct notifier_block *self, + unsigned long val, void *data) +{ + struct module *mod = data; + + switch (val) { + case MODULE_STATE_COMING: + build_control_dir(mod, mod->markers, + mod->markers + mod->num_markers); + break; + case MODULE_STATE_GOING: + cleanup_control_dir(mod, mod->markers, + mod->markers + mod->num_markers); + break; + } + return NOTIFY_DONE; +} +#else +static inline int module_notify(struct notifier_block *self, + unsigned long val, void *data) +{ + return 0; +} +#endif + +static struct notifier_block module_nb = { + .notifier_call = module_notify, +}; + +static int __init ltt_trace_control_init(void) +{ + int err = 0; + struct dentry *ltt_root_dentry; + + ltt_root_dentry = get_ltt_root(); + if (!ltt_root_dentry) { + err = -ENOENT; + goto err_no_root; + } + + ltt_control_dir = debugfs_create_dir(LTT_CONTROL_DIR, ltt_root_dentry); + if (IS_ERR(ltt_control_dir) || !ltt_control_dir) { + printk(KERN_ERR + "ltt_channel_control_init: create dir of %s failed\n", + LTT_CONTROL_DIR); + err = -ENOMEM; + goto err_create_control_dir; + } + + ltt_setup_trace_file = debugfs_create_file(LTT_SETUP_TRACE_FILE, + S_IWUSR, ltt_root_dentry, + NULL, + <t_setup_trace_operations); + if (IS_ERR(ltt_setup_trace_file) || !ltt_setup_trace_file) { + printk(KERN_ERR + "ltt_channel_control_init: create file of %s failed\n", + LTT_SETUP_TRACE_FILE); + err = -ENOMEM; + goto err_create_setup_trace_file; + } + + ltt_destroy_trace_file = debugfs_create_file(LTT_DESTROY_TRACE_FILE, + S_IWUSR, ltt_root_dentry, + NULL, + <t_destroy_trace_operations); + if (IS_ERR(ltt_destroy_trace_file) || !ltt_destroy_trace_file) { + printk(KERN_ERR + "ltt_channel_control_init: create file of %s failed\n", + LTT_DESTROY_TRACE_FILE); + err = -ENOMEM; + goto err_create_destroy_trace_file; + } + + markers_control_dir = debugfs_create_dir(MARKERS_CONTROL_DIR, + ltt_root_dentry); + if (IS_ERR(markers_control_dir) || !markers_control_dir) { + printk(KERN_ERR + "ltt_channel_control_init: create dir of %s failed\n", + MARKERS_CONTROL_DIR); + err = -ENOMEM; + goto err_create_marker_control_dir; + } + + init_marker_dir(markers_control_dir, &root_dir_opt); + + if (build_marker_control_files()) + goto err_build_fail; + + if (!register_module_notifier(&module_nb)) + return 0; + +err_build_fail: + debugfs_remove_recursive(markers_control_dir); + markers_control_dir = NULL; +err_create_marker_control_dir: + debugfs_remove(ltt_destroy_trace_file); +err_create_destroy_trace_file: + debugfs_remove(ltt_setup_trace_file); +err_create_setup_trace_file: + debugfs_remove(ltt_control_dir); +err_create_control_dir: +err_no_root: + return err; +} + +static void __exit ltt_trace_control_exit(void) +{ + struct dentry *trace_dir; + + /* destory all traces */ + list_for_each_entry(trace_dir, <t_control_dir->d_subdirs, + d_u.d_child) { + ltt_trace_stop(trace_dir->d_name.name); + ltt_trace_destroy(trace_dir->d_name.name); + } + + /* clean dirs in debugfs */ + debugfs_remove(ltt_setup_trace_file); + debugfs_remove(ltt_destroy_trace_file); + debugfs_remove_recursive(ltt_control_dir); + debugfs_remove_recursive(markers_control_dir); + unregister_module_notifier(&module_nb); + put_ltt_root(); +} + +module_init(ltt_trace_control_init); +module_exit(ltt_trace_control_exit); + +MODULE_LICENSE("GPL and additional rights"); +MODULE_AUTHOR("Zhao Lei "); +MODULE_DESCRIPTION("Linux Trace Toolkit Trace Controller"); diff --git a/ltt-tracer-core.h b/ltt-tracer-core.h new file mode 100644 index 00000000..1ac8c5b3 --- /dev/null +++ b/ltt-tracer-core.h @@ -0,0 +1,50 @@ +/* + * Copyright (C) 2005,2006 Mathieu Desnoyers (mathieu.desnoyers@polymtl.ca) + * + * This contains the core definitions for the Linux Trace Toolkit. + * + * Dual LGPL v2.1/GPL v2 license. + */ + +#ifndef LTT_TRACER_CORE_H +#define LTT_TRACER_CORE_H + +#include +#include +#include + +/* ltt's root dir in debugfs */ +#define LTT_ROOT "ltt" + +/* + * All modifications of ltt_traces must be done by ltt-tracer.c, while holding + * the semaphore. Only reading of this information can be done elsewhere, with + * the RCU mechanism : the preemption must be disabled while reading the + * list. + */ +struct ltt_traces { + struct list_head setup_head; /* Pre-allocated traces list */ + struct list_head head; /* Allocated Traces list */ + unsigned int num_active_traces; /* Number of active traces */ +} ____cacheline_aligned; + +extern struct ltt_traces ltt_traces; + +/* + * get dentry of ltt's root dir + */ +struct dentry *get_ltt_root(void); + +void put_ltt_root(void); + +/* Keep track of trap nesting inside LTT */ +DECLARE_PER_CPU(unsigned int, ltt_nesting); + +typedef int (*ltt_run_filter_functor)(void *trace, uint16_t eID); + +extern ltt_run_filter_functor ltt_run_filter; + +extern void ltt_filter_register(ltt_run_filter_functor func); +extern void ltt_filter_unregister(void); + +#endif /* LTT_TRACER_CORE_H */ diff --git a/ltt-tracer.c b/ltt-tracer.c new file mode 100644 index 00000000..8eae966e --- /dev/null +++ b/ltt-tracer.c @@ -0,0 +1,1293 @@ +/* + * ltt/ltt-tracer.c + * + * (C) Copyright 2005-2008 - + * Mathieu Desnoyers (mathieu.desnoyers@polymtl.ca) + * + * Tracing management internal kernel API. Trace buffer allocation/free, tracing + * start/stop. + * + * Author: + * Mathieu Desnoyers (mathieu.desnoyers@polymtl.ca) + * + * Inspired from LTT : + * Karim Yaghmour (karim@opersys.com) + * Tom Zanussi (zanussi@us.ibm.com) + * Bob Wisniewski (bob@watson.ibm.com) + * And from K42 : + * Bob Wisniewski (bob@watson.ibm.com) + * + * Changelog: + * 22/09/06, Move to the marker/probes mechanism. + * 19/10/05, Complete lockless mechanism. + * 27/05/05, Modular redesign and rewrite. + * + * Dual LGPL v2.1/GPL v2 license. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "ltt-tracer.h" + +static void synchronize_trace(void) +{ + synchronize_sched(); +#ifdef CONFIG_PREEMPT_RT + synchronize_rcu(); +#endif +} + +static void async_wakeup(unsigned long data); + +static DEFINE_TIMER(ltt_async_wakeup_timer, async_wakeup, 0, 0); + +/* Default callbacks for modules */ +notrace +int ltt_filter_control_default(enum ltt_filter_control_msg msg, + struct ltt_trace *trace) +{ + return 0; +} + +int ltt_statedump_default(struct ltt_trace *trace) +{ + return 0; +} + +/* Callbacks for registered modules */ + +int (*ltt_filter_control_functor) + (enum ltt_filter_control_msg msg, struct ltt_trace *trace) = + ltt_filter_control_default; +struct module *ltt_filter_control_owner; + +/* These function pointers are protected by a trace activation check */ +struct module *ltt_run_filter_owner; +int (*ltt_statedump_functor)(struct ltt_trace *trace) = ltt_statedump_default; +struct module *ltt_statedump_owner; + +struct chan_info_struct { + const char *name; + unsigned int def_sb_size; + unsigned int def_n_sb; +} chan_infos[] = { + [LTT_CHANNEL_METADATA] = { + LTT_METADATA_CHANNEL, + LTT_DEFAULT_SUBBUF_SIZE_LOW, + LTT_DEFAULT_N_SUBBUFS_LOW, + }, + [LTT_CHANNEL_FD_STATE] = { + LTT_FD_STATE_CHANNEL, + LTT_DEFAULT_SUBBUF_SIZE_LOW, + LTT_DEFAULT_N_SUBBUFS_LOW, + }, + [LTT_CHANNEL_GLOBAL_STATE] = { + LTT_GLOBAL_STATE_CHANNEL, + LTT_DEFAULT_SUBBUF_SIZE_LOW, + LTT_DEFAULT_N_SUBBUFS_LOW, + }, + [LTT_CHANNEL_IRQ_STATE] = { + LTT_IRQ_STATE_CHANNEL, + LTT_DEFAULT_SUBBUF_SIZE_LOW, + LTT_DEFAULT_N_SUBBUFS_LOW, + }, + [LTT_CHANNEL_MODULE_STATE] = { + LTT_MODULE_STATE_CHANNEL, + LTT_DEFAULT_SUBBUF_SIZE_LOW, + LTT_DEFAULT_N_SUBBUFS_LOW, + }, + [LTT_CHANNEL_NETIF_STATE] = { + LTT_NETIF_STATE_CHANNEL, + LTT_DEFAULT_SUBBUF_SIZE_LOW, + LTT_DEFAULT_N_SUBBUFS_LOW, + }, + [LTT_CHANNEL_SOFTIRQ_STATE] = { + LTT_SOFTIRQ_STATE_CHANNEL, + LTT_DEFAULT_SUBBUF_SIZE_LOW, + LTT_DEFAULT_N_SUBBUFS_LOW, + }, + [LTT_CHANNEL_SWAP_STATE] = { + LTT_SWAP_STATE_CHANNEL, + LTT_DEFAULT_SUBBUF_SIZE_LOW, + LTT_DEFAULT_N_SUBBUFS_LOW, + }, + [LTT_CHANNEL_SYSCALL_STATE] = { + LTT_SYSCALL_STATE_CHANNEL, + LTT_DEFAULT_SUBBUF_SIZE_LOW, + LTT_DEFAULT_N_SUBBUFS_LOW, + }, + [LTT_CHANNEL_TASK_STATE] = { + LTT_TASK_STATE_CHANNEL, + LTT_DEFAULT_SUBBUF_SIZE_LOW, + LTT_DEFAULT_N_SUBBUFS_LOW, + }, + [LTT_CHANNEL_VM_STATE] = { + LTT_VM_STATE_CHANNEL, + LTT_DEFAULT_SUBBUF_SIZE_MED, + LTT_DEFAULT_N_SUBBUFS_MED, + }, + [LTT_CHANNEL_FS] = { + LTT_FS_CHANNEL, + LTT_DEFAULT_SUBBUF_SIZE_MED, + LTT_DEFAULT_N_SUBBUFS_MED, + }, + [LTT_CHANNEL_INPUT] = { + LTT_INPUT_CHANNEL, + LTT_DEFAULT_SUBBUF_SIZE_LOW, + LTT_DEFAULT_N_SUBBUFS_LOW, + }, + [LTT_CHANNEL_IPC] = { + LTT_IPC_CHANNEL, + LTT_DEFAULT_SUBBUF_SIZE_LOW, + LTT_DEFAULT_N_SUBBUFS_LOW, + }, + [LTT_CHANNEL_KERNEL] = { + LTT_KERNEL_CHANNEL, + LTT_DEFAULT_SUBBUF_SIZE_HIGH, + LTT_DEFAULT_N_SUBBUFS_HIGH, + }, + [LTT_CHANNEL_MM] = { + LTT_MM_CHANNEL, + LTT_DEFAULT_SUBBUF_SIZE_MED, + LTT_DEFAULT_N_SUBBUFS_MED, + }, + [LTT_CHANNEL_RCU] = { + LTT_RCU_CHANNEL, + LTT_DEFAULT_SUBBUF_SIZE_MED, + LTT_DEFAULT_N_SUBBUFS_MED, + }, + [LTT_CHANNEL_DEFAULT] = { + NULL, + LTT_DEFAULT_SUBBUF_SIZE_MED, + LTT_DEFAULT_N_SUBBUFS_MED, + }, +}; + +static enum ltt_channels get_channel_type_from_name(const char *name) +{ + int i; + + if (!name) + return LTT_CHANNEL_DEFAULT; + + for (i = 0; i < ARRAY_SIZE(chan_infos); i++) + if (chan_infos[i].name && !strcmp(name, chan_infos[i].name)) + return (enum ltt_channels)i; + + return LTT_CHANNEL_DEFAULT; +} + +/** + * ltt_module_register - LTT module registration + * @name: module type + * @function: callback to register + * @owner: module which owns the callback + * + * The module calling this registration function must ensure that no + * trap-inducing code will be executed by "function". E.g. vmalloc_sync_all() + * must be called between a vmalloc and the moment the memory is made visible to + * "function". This registration acts as a vmalloc_sync_all. Therefore, only if + * the module allocates virtual memory after its registration must it + * synchronize the TLBs. + */ +int ltt_module_register(enum ltt_module_function name, void *function, + struct module *owner) +{ + int ret = 0; + + /* + * Make sure no page fault can be triggered by the module about to be + * registered. We deal with this here so we don't have to call + * vmalloc_sync_all() in each module's init. + */ + vmalloc_sync_all(); + + switch (name) { + case LTT_FUNCTION_RUN_FILTER: + if (ltt_run_filter_owner != NULL) { + ret = -EEXIST; + goto end; + } + ltt_filter_register((ltt_run_filter_functor)function); + ltt_run_filter_owner = owner; + break; + case LTT_FUNCTION_FILTER_CONTROL: + if (ltt_filter_control_owner != NULL) { + ret = -EEXIST; + goto end; + } + ltt_filter_control_functor = + (int (*)(enum ltt_filter_control_msg, + struct ltt_trace *))function; + ltt_filter_control_owner = owner; + break; + case LTT_FUNCTION_STATEDUMP: + if (ltt_statedump_owner != NULL) { + ret = -EEXIST; + goto end; + } + ltt_statedump_functor = + (int (*)(struct ltt_trace *))function; + ltt_statedump_owner = owner; + break; + } + +end: + + return ret; +} +EXPORT_SYMBOL_GPL(ltt_module_register); + +/** + * ltt_module_unregister - LTT module unregistration + * @name: module type + */ +void ltt_module_unregister(enum ltt_module_function name) +{ + switch (name) { + case LTT_FUNCTION_RUN_FILTER: + ltt_filter_unregister(); + ltt_run_filter_owner = NULL; + /* Wait for preempt sections to finish */ + synchronize_trace(); + break; + case LTT_FUNCTION_FILTER_CONTROL: + ltt_filter_control_functor = ltt_filter_control_default; + ltt_filter_control_owner = NULL; + break; + case LTT_FUNCTION_STATEDUMP: + ltt_statedump_functor = ltt_statedump_default; + ltt_statedump_owner = NULL; + break; + } + +} +EXPORT_SYMBOL_GPL(ltt_module_unregister); + +static LIST_HEAD(ltt_transport_list); + +/** + * ltt_transport_register - LTT transport registration + * @transport: transport structure + * + * Registers a transport which can be used as output to extract the data out of + * LTTng. The module calling this registration function must ensure that no + * trap-inducing code will be executed by the transport functions. E.g. + * vmalloc_sync_all() must be called between a vmalloc and the moment the memory + * is made visible to the transport function. This registration acts as a + * vmalloc_sync_all. Therefore, only if the module allocates virtual memory + * after its registration must it synchronize the TLBs. + */ +void ltt_transport_register(struct ltt_transport *transport) +{ + /* + * Make sure no page fault can be triggered by the module about to be + * registered. We deal with this here so we don't have to call + * vmalloc_sync_all() in each module's init. + */ + vmalloc_sync_all(); + + ltt_lock_traces(); + list_add_tail(&transport->node, <t_transport_list); + ltt_unlock_traces(); +} +EXPORT_SYMBOL_GPL(ltt_transport_register); + +/** + * ltt_transport_unregister - LTT transport unregistration + * @transport: transport structure + */ +void ltt_transport_unregister(struct ltt_transport *transport) +{ + ltt_lock_traces(); + list_del(&transport->node); + ltt_unlock_traces(); +} +EXPORT_SYMBOL_GPL(ltt_transport_unregister); + +static inline +int is_channel_overwrite(enum ltt_channels chan, enum trace_mode mode) +{ + switch (mode) { + case LTT_TRACE_NORMAL: + return 0; + case LTT_TRACE_FLIGHT: + switch (chan) { + case LTT_CHANNEL_METADATA: + return 0; + default: + return 1; + } + case LTT_TRACE_HYBRID: + switch (chan) { + case LTT_CHANNEL_KERNEL: + case LTT_CHANNEL_FS: + case LTT_CHANNEL_MM: + case LTT_CHANNEL_RCU: + case LTT_CHANNEL_IPC: + case LTT_CHANNEL_INPUT: + return 1; + default: + return 0; + } + default: + return 0; + } +} + +static void trace_async_wakeup(struct ltt_trace *trace) +{ + int i; + struct ltt_chan *chan; + + /* Must check each channel for pending read wakeup */ + for (i = 0; i < trace->nr_channels; i++) { + chan = &trace->channels[i]; + if (chan->active) + trace->ops->wakeup_channel(chan); + } +} + +/* Timer to send async wakeups to the readers */ +static void async_wakeup(unsigned long data) +{ + struct ltt_trace *trace; + + /* + * PREEMPT_RT does not allow spinlocks to be taken within preempt + * disable sections (spinlock taken in wake_up). However, mainline won't + * allow mutex to be taken in interrupt context. Ugly. + * Take a standard RCU read lock for RT kernels, which imply that we + * also have to synchronize_rcu() upon updates. + */ +#ifndef CONFIG_PREEMPT_RT + rcu_read_lock_sched(); +#else + rcu_read_lock(); +#endif + list_for_each_entry_rcu(trace, <t_traces.head, list) { + trace_async_wakeup(trace); + } +#ifndef CONFIG_PREEMPT_RT + rcu_read_unlock_sched(); +#else + rcu_read_unlock(); +#endif + + mod_timer(<t_async_wakeup_timer, jiffies + LTT_PERCPU_TIMER_INTERVAL); +} + +/** + * _ltt_trace_find - find a trace by given name. + * trace_name: trace name + * + * Returns a pointer to the trace structure, NULL if not found. + */ +static struct ltt_trace *_ltt_trace_find(const char *trace_name) +{ + struct ltt_trace *trace; + + list_for_each_entry(trace, <t_traces.head, list) + if (!strncmp(trace->trace_name, trace_name, NAME_MAX)) + return trace; + + return NULL; +} + +/* _ltt_trace_find_setup : + * find a trace in setup list by given name. + * + * Returns a pointer to the trace structure, NULL if not found. + */ +struct ltt_trace *_ltt_trace_find_setup(const char *trace_name) +{ + struct ltt_trace *trace; + + list_for_each_entry(trace, <t_traces.setup_head, list) + if (!strncmp(trace->trace_name, trace_name, NAME_MAX)) + return trace; + + return NULL; +} +EXPORT_SYMBOL_GPL(_ltt_trace_find_setup); + +/** + * ltt_release_trace - Release a LTT trace + * @kref : reference count on the trace + */ +void ltt_release_trace(struct kref *kref) +{ + struct ltt_trace *trace = container_of(kref, struct ltt_trace, kref); + + trace->ops->remove_dirs(trace); + module_put(trace->transport->owner); + ltt_channels_trace_free(trace->channels, trace->nr_channels); + kfree(trace); +} +EXPORT_SYMBOL_GPL(ltt_release_trace); + +static inline void prepare_chan_size_num(unsigned int *subbuf_size, + unsigned int *n_subbufs) +{ + /* Make sure the subbuffer size is larger than a page */ + *subbuf_size = max_t(unsigned int, *subbuf_size, PAGE_SIZE); + + /* round to next power of 2 */ + *subbuf_size = 1 << get_count_order(*subbuf_size); + *n_subbufs = 1 << get_count_order(*n_subbufs); + + /* Subbuf size and number must both be power of two */ + WARN_ON(hweight32(*subbuf_size) != 1); + WARN_ON(hweight32(*n_subbufs) != 1); +} + +int _ltt_trace_setup(const char *trace_name) +{ + int err = 0; + struct ltt_trace *new_trace = NULL; + int metadata_index; + unsigned int chan; + enum ltt_channels chantype; + + if (_ltt_trace_find_setup(trace_name)) { + printk(KERN_ERR "LTT : Trace name %s already used.\n", + trace_name); + err = -EEXIST; + goto traces_error; + } + + if (_ltt_trace_find(trace_name)) { + printk(KERN_ERR "LTT : Trace name %s already used.\n", + trace_name); + err = -EEXIST; + goto traces_error; + } + + new_trace = kzalloc(sizeof(struct ltt_trace), GFP_KERNEL); + if (!new_trace) { + printk(KERN_ERR + "LTT : Unable to allocate memory for trace %s\n", + trace_name); + err = -ENOMEM; + goto traces_error; + } + strncpy(new_trace->trace_name, trace_name, NAME_MAX); + new_trace->channels = ltt_channels_trace_alloc(&new_trace->nr_channels, + 0, 1); + if (!new_trace->channels) { + printk(KERN_ERR + "LTT : Unable to allocate memory for chaninfo %s\n", + trace_name); + err = -ENOMEM; + goto trace_free; + } + + /* + * Force metadata channel to active, no overwrite. + */ + metadata_index = ltt_channels_get_index_from_name("metadata"); + WARN_ON(metadata_index < 0); + new_trace->channels[metadata_index].overwrite = 0; + new_trace->channels[metadata_index].active = 1; + + /* + * Set hardcoded tracer defaults for some channels + */ + for (chan = 0; chan < new_trace->nr_channels; chan++) { + if (!(new_trace->channels[chan].active)) + continue; + + chantype = get_channel_type_from_name( + ltt_channels_get_name_from_index(chan)); + new_trace->channels[chan].a.sb_size = + chan_infos[chantype].def_sb_size; + new_trace->channels[chan].a.n_sb = + chan_infos[chantype].def_n_sb; + } + + list_add(&new_trace->list, <t_traces.setup_head); + return 0; + +trace_free: + kfree(new_trace); +traces_error: + return err; +} +EXPORT_SYMBOL_GPL(_ltt_trace_setup); + + +int ltt_trace_setup(const char *trace_name) +{ + int ret; + ltt_lock_traces(); + ret = _ltt_trace_setup(trace_name); + ltt_unlock_traces(); + return ret; +} +EXPORT_SYMBOL_GPL(ltt_trace_setup); + +/* must be called from within a traces lock. */ +static void _ltt_trace_free(struct ltt_trace *trace) +{ + list_del(&trace->list); + kfree(trace); +} + +int ltt_trace_set_type(const char *trace_name, const char *trace_type) +{ + int err = 0; + struct ltt_trace *trace; + struct ltt_transport *tran_iter, *transport = NULL; + + ltt_lock_traces(); + + trace = _ltt_trace_find_setup(trace_name); + if (!trace) { + printk(KERN_ERR "LTT : Trace not found %s\n", trace_name); + err = -ENOENT; + goto traces_error; + } + + list_for_each_entry(tran_iter, <t_transport_list, node) { + if (!strcmp(tran_iter->name, trace_type)) { + transport = tran_iter; + break; + } + } + if (!transport) { + printk(KERN_ERR "LTT : Transport %s is not present.\n", + trace_type); + err = -EINVAL; + goto traces_error; + } + + trace->transport = transport; + +traces_error: + ltt_unlock_traces(); + return err; +} +EXPORT_SYMBOL_GPL(ltt_trace_set_type); + +int ltt_trace_set_channel_subbufsize(const char *trace_name, + const char *channel_name, + unsigned int size) +{ + int err = 0; + struct ltt_trace *trace; + int index; + + ltt_lock_traces(); + + trace = _ltt_trace_find_setup(trace_name); + if (!trace) { + printk(KERN_ERR "LTT : Trace not found %s\n", trace_name); + err = -ENOENT; + goto traces_error; + } + + index = ltt_channels_get_index_from_name(channel_name); + if (index < 0) { + printk(KERN_ERR "LTT : Channel %s not found\n", channel_name); + err = -ENOENT; + goto traces_error; + } + trace->channels[index].a.sb_size = size; + +traces_error: + ltt_unlock_traces(); + return err; +} +EXPORT_SYMBOL_GPL(ltt_trace_set_channel_subbufsize); + +int ltt_trace_set_channel_subbufcount(const char *trace_name, + const char *channel_name, + unsigned int cnt) +{ + int err = 0; + struct ltt_trace *trace; + int index; + + ltt_lock_traces(); + + trace = _ltt_trace_find_setup(trace_name); + if (!trace) { + printk(KERN_ERR "LTT : Trace not found %s\n", trace_name); + err = -ENOENT; + goto traces_error; + } + + index = ltt_channels_get_index_from_name(channel_name); + if (index < 0) { + printk(KERN_ERR "LTT : Channel %s not found\n", channel_name); + err = -ENOENT; + goto traces_error; + } + trace->channels[index].a.n_sb = cnt; + +traces_error: + ltt_unlock_traces(); + return err; +} +EXPORT_SYMBOL_GPL(ltt_trace_set_channel_subbufcount); + +int ltt_trace_set_channel_switch_timer(const char *trace_name, + const char *channel_name, + unsigned long interval) +{ + int err = 0; + struct ltt_trace *trace; + int index; + + ltt_lock_traces(); + + trace = _ltt_trace_find_setup(trace_name); + if (!trace) { + printk(KERN_ERR "LTT : Trace not found %s\n", trace_name); + err = -ENOENT; + goto traces_error; + } + + index = ltt_channels_get_index_from_name(channel_name); + if (index < 0) { + printk(KERN_ERR "LTT : Channel %s not found\n", channel_name); + err = -ENOENT; + goto traces_error; + } + ltt_channels_trace_set_timer(&trace->channels[index], interval); + +traces_error: + ltt_unlock_traces(); + return err; +} +EXPORT_SYMBOL_GPL(ltt_trace_set_channel_switch_timer); + +int ltt_trace_set_channel_enable(const char *trace_name, + const char *channel_name, unsigned int enable) +{ + int err = 0; + struct ltt_trace *trace; + int index; + + ltt_lock_traces(); + + trace = _ltt_trace_find_setup(trace_name); + if (!trace) { + printk(KERN_ERR "LTT : Trace not found %s\n", trace_name); + err = -ENOENT; + goto traces_error; + } + + /* + * Datas in metadata channel(marker info) is necessary to be able to + * read the trace, we always enable this channel. + */ + if (!enable && !strcmp(channel_name, "metadata")) { + printk(KERN_ERR "LTT : Trying to disable metadata channel\n"); + err = -EINVAL; + goto traces_error; + } + + index = ltt_channels_get_index_from_name(channel_name); + if (index < 0) { + printk(KERN_ERR "LTT : Channel %s not found\n", channel_name); + err = -ENOENT; + goto traces_error; + } + + trace->channels[index].active = enable; + +traces_error: + ltt_unlock_traces(); + return err; +} +EXPORT_SYMBOL_GPL(ltt_trace_set_channel_enable); + +int ltt_trace_set_channel_overwrite(const char *trace_name, + const char *channel_name, + unsigned int overwrite) +{ + int err = 0; + struct ltt_trace *trace; + int index; + + ltt_lock_traces(); + + trace = _ltt_trace_find_setup(trace_name); + if (!trace) { + printk(KERN_ERR "LTT : Trace not found %s\n", trace_name); + err = -ENOENT; + goto traces_error; + } + + /* + * Always put the metadata channel in non-overwrite mode : + * This is a very low traffic channel and it can't afford to have its + * data overwritten : this data (marker info) is necessary to be + * able to read the trace. + */ + if (overwrite && !strcmp(channel_name, "metadata")) { + printk(KERN_ERR "LTT : Trying to set metadata channel to " + "overwrite mode\n"); + err = -EINVAL; + goto traces_error; + } + + index = ltt_channels_get_index_from_name(channel_name); + if (index < 0) { + printk(KERN_ERR "LTT : Channel %s not found\n", channel_name); + err = -ENOENT; + goto traces_error; + } + + trace->channels[index].overwrite = overwrite; + +traces_error: + ltt_unlock_traces(); + return err; +} +EXPORT_SYMBOL_GPL(ltt_trace_set_channel_overwrite); + +int ltt_trace_alloc(const char *trace_name) +{ + int err = 0; + struct ltt_trace *trace; + int sb_size, n_sb; + unsigned long flags; + int chan; + const char *channel_name; + + ltt_lock_traces(); + + trace = _ltt_trace_find_setup(trace_name); + if (!trace) { + printk(KERN_ERR "LTT : Trace not found %s\n", trace_name); + err = -ENOENT; + goto traces_error; + } + + kref_init(&trace->kref); + init_waitqueue_head(&trace->kref_wq); + trace->active = 0; + get_trace_clock(); + trace->freq_scale = trace_clock_freq_scale(); + + if (!trace->transport) { + printk(KERN_ERR "LTT : Transport is not set.\n"); + err = -EINVAL; + goto transport_error; + } + if (!try_module_get(trace->transport->owner)) { + printk(KERN_ERR "LTT : Can't lock transport module.\n"); + err = -ENODEV; + goto transport_error; + } + trace->ops = &trace->transport->ops; + + err = trace->ops->create_dirs(trace); + if (err) { + printk(KERN_ERR "LTT : Can't create dir for trace %s.\n", + trace_name); + goto dirs_error; + } + + local_irq_save(flags); + trace->start_freq = trace_clock_frequency(); + trace->start_tsc = trace_clock_read64(); + do_gettimeofday(&trace->start_time); + local_irq_restore(flags); + + for (chan = 0; chan < trace->nr_channels; chan++) { + if (!(trace->channels[chan].active)) + continue; + + channel_name = ltt_channels_get_name_from_index(chan); + WARN_ON(!channel_name); + /* + * note: sb_size and n_sb will be overwritten with updated + * values by channel creation. + */ + sb_size = trace->channels[chan].a.sb_size; + n_sb = trace->channels[chan].a.n_sb; + prepare_chan_size_num(&sb_size, &n_sb); + err = trace->ops->create_channel(channel_name, + &trace->channels[chan], + trace->dentry.trace_root, + sb_size, n_sb, + trace->channels[chan].overwrite, trace); + if (err != 0) { + printk(KERN_ERR "LTT : Can't create channel %s.\n", + channel_name); + goto create_channel_error; + } + } + + list_del(&trace->list); + if (list_empty(<t_traces.head)) { + mod_timer(<t_async_wakeup_timer, + jiffies + LTT_PERCPU_TIMER_INTERVAL); + set_kernel_trace_flag_all_tasks(); + } + list_add_rcu(&trace->list, <t_traces.head); + synchronize_trace(); + + ltt_unlock_traces(); + + return 0; + +create_channel_error: + for (chan--; chan >= 0; chan--) { + if (trace->channels[chan].active) { + struct ltt_chan *chanp = &trace->channels[chan]; + trace->ops->remove_channel_files(chanp); + kref_put(&chanp->a.kref, trace->ops->remove_channel); + } + } + trace->ops->remove_dirs(trace); + +dirs_error: + module_put(trace->transport->owner); +transport_error: + put_trace_clock(); +traces_error: + ltt_unlock_traces(); + return err; +} +EXPORT_SYMBOL_GPL(ltt_trace_alloc); + +/* + * It is worked as a wrapper for current version of ltt_control.ko. + * We will make a new ltt_control based on debugfs, and control each channel's + * buffer. + */ +static +int ltt_trace_create(const char *trace_name, const char *trace_type, + enum trace_mode mode, + unsigned int subbuf_size_low, unsigned int n_subbufs_low, + unsigned int subbuf_size_med, unsigned int n_subbufs_med, + unsigned int subbuf_size_high, unsigned int n_subbufs_high) +{ + int err = 0; + + err = ltt_trace_setup(trace_name); + if (IS_ERR_VALUE(err)) + return err; + + err = ltt_trace_set_type(trace_name, trace_type); + if (IS_ERR_VALUE(err)) + return err; + + err = ltt_trace_alloc(trace_name); + if (IS_ERR_VALUE(err)) + return err; + + return err; +} + +/* Must be called while sure that trace is in the list. */ +static int _ltt_trace_destroy(struct ltt_trace *trace) +{ + int err = -EPERM; + + if (trace == NULL) { + err = -ENOENT; + goto traces_error; + } + if (trace->active) { + printk(KERN_ERR + "LTT : Can't destroy trace %s : tracer is active\n", + trace->trace_name); + err = -EBUSY; + goto active_error; + } + /* Everything went fine */ + list_del_rcu(&trace->list); + synchronize_trace(); + if (list_empty(<t_traces.head)) { + clear_kernel_trace_flag_all_tasks(); + /* + * We stop the asynchronous delivery of reader wakeup, but + * we must make one last check for reader wakeups pending + * later in __ltt_trace_destroy. + */ + del_timer_sync(<t_async_wakeup_timer); + } + return 0; + + /* error handling */ +active_error: +traces_error: + return err; +} + +/* Sleepable part of the destroy */ +static void __ltt_trace_destroy(struct ltt_trace *trace) +{ + int i; + struct ltt_chan *chan; + + for (i = 0; i < trace->nr_channels; i++) { + chan = &trace->channels[i]; + if (chan->active) + trace->ops->finish_channel(chan); + } + + flush_scheduled_work(); + + /* + * The currently destroyed trace is not in the trace list anymore, + * so it's safe to call the async wakeup ourself. It will deliver + * the last subbuffers. + */ + trace_async_wakeup(trace); + + for (i = 0; i < trace->nr_channels; i++) { + chan = &trace->channels[i]; + if (chan->active) { + trace->ops->remove_channel_files(chan); + kref_put(&chan->a.kref, + trace->ops->remove_channel); + } + } + + /* + * Wait for lttd readers to release the files, therefore making sure + * the last subbuffers have been read. + */ + if (atomic_read(&trace->kref.refcount) > 1) { + int ret = 0; + /* + * Unlock traces and CPU hotplug while we wait for lttd to + * release the files. + */ + ltt_unlock_traces(); + __wait_event_interruptible(trace->kref_wq, + (atomic_read(&trace->kref.refcount) == 1), ret); + ltt_lock_traces(); + } + + kref_put(&trace->kref, ltt_release_trace); +} + +int ltt_trace_destroy(const char *trace_name) +{ + int err = 0; + struct ltt_trace *trace; + + ltt_lock_traces(); + + trace = _ltt_trace_find(trace_name); + if (trace) { + err = _ltt_trace_destroy(trace); + if (err) + goto error; + + __ltt_trace_destroy(trace); + ltt_unlock_traces(); + put_trace_clock(); + + return 0; + } + + trace = _ltt_trace_find_setup(trace_name); + if (trace) { + _ltt_trace_free(trace); + ltt_unlock_traces(); + return 0; + } + + err = -ENOENT; + + /* Error handling */ +error: + ltt_unlock_traces(); + return err; +} +EXPORT_SYMBOL_GPL(ltt_trace_destroy); + +/* + * called with trace lock held. + */ +static +void ltt_channels_trace_start_timer(struct ltt_chan *channels, + unsigned int nr_channels) +{ + int i; + + for (i = 0; i < nr_channels; i++) { + struct ltt_chan *chan = &channels[i]; + chan->a.trace->ops->start_switch_timer(chan); + } +} + +/* + * called with trace lock held. + */ +static +void ltt_channels_trace_stop_timer(struct ltt_chan *channels, + unsigned int nr_channels) +{ + int i; + + for (i = 0; i < nr_channels; i++) { + struct ltt_chan *chan = &channels[i]; + chan->a.trace->ops->stop_switch_timer(chan); + } +} + +/* must be called from within a traces lock. */ +static int _ltt_trace_start(struct ltt_trace *trace) +{ + int err = 0; + + if (trace == NULL) { + err = -ENOENT; + goto traces_error; + } + if (trace->active) + printk(KERN_INFO "LTT : Tracing already active for trace %s\n", + trace->trace_name); + if (!try_module_get(ltt_run_filter_owner)) { + err = -ENODEV; + printk(KERN_ERR "LTT : Can't lock filter module.\n"); + goto get_ltt_run_filter_error; + } + ltt_channels_trace_start_timer(trace->channels, trace->nr_channels); + trace->active = 1; + /* Read by trace points without protection : be careful */ + ltt_traces.num_active_traces++; + return err; + + /* error handling */ +get_ltt_run_filter_error: +traces_error: + return err; +} + +int ltt_trace_start(const char *trace_name) +{ + int err = 0; + struct ltt_trace *trace; + + ltt_lock_traces(); + + trace = _ltt_trace_find(trace_name); + err = _ltt_trace_start(trace); + if (err) + goto no_trace; + + ltt_unlock_traces(); + + /* + * Call the kernel state dump. + * Events will be mixed with real kernel events, it's ok. + * Notice that there is no protection on the trace : that's exactly + * why we iterate on the list and check for trace equality instead of + * directly using this trace handle inside the logging function. + */ + + ltt_dump_marker_state(trace); + + if (!try_module_get(ltt_statedump_owner)) { + err = -ENODEV; + printk(KERN_ERR + "LTT : Can't lock state dump module.\n"); + } else { + ltt_statedump_functor(trace); + module_put(ltt_statedump_owner); + } + + return err; + + /* Error handling */ +no_trace: + ltt_unlock_traces(); + return err; +} +EXPORT_SYMBOL_GPL(ltt_trace_start); + +/* must be called from within traces lock */ +static int _ltt_trace_stop(struct ltt_trace *trace) +{ + int err = -EPERM; + + if (trace == NULL) { + err = -ENOENT; + goto traces_error; + } + if (!trace->active) + printk(KERN_INFO "LTT : Tracing not active for trace %s\n", + trace->trace_name); + if (trace->active) { + ltt_channels_trace_stop_timer(trace->channels, + trace->nr_channels); + trace->active = 0; + ltt_traces.num_active_traces--; + synchronize_trace(); /* Wait for each tracing to be finished */ + } + module_put(ltt_run_filter_owner); + /* Everything went fine */ + return 0; + + /* Error handling */ +traces_error: + return err; +} + +int ltt_trace_stop(const char *trace_name) +{ + int err = 0; + struct ltt_trace *trace; + + ltt_lock_traces(); + trace = _ltt_trace_find(trace_name); + err = _ltt_trace_stop(trace); + ltt_unlock_traces(); + return err; +} +EXPORT_SYMBOL_GPL(ltt_trace_stop); + +/** + * ltt_control - Trace control in-kernel API + * @msg: Action to perform + * @trace_name: Trace on which the action must be done + * @trace_type: Type of trace (normal, flight, hybrid) + * @args: Arguments specific to the action + */ +int ltt_control(enum ltt_control_msg msg, const char *trace_name, + const char *trace_type, union ltt_control_args args) +{ + int err = -EPERM; + + printk(KERN_ALERT "ltt_control : trace %s\n", trace_name); + switch (msg) { + case LTT_CONTROL_START: + printk(KERN_DEBUG "Start tracing %s\n", trace_name); + err = ltt_trace_start(trace_name); + break; + case LTT_CONTROL_STOP: + printk(KERN_DEBUG "Stop tracing %s\n", trace_name); + err = ltt_trace_stop(trace_name); + break; + case LTT_CONTROL_CREATE_TRACE: + printk(KERN_DEBUG "Creating trace %s\n", trace_name); + err = ltt_trace_create(trace_name, trace_type, + args.new_trace.mode, + args.new_trace.subbuf_size_low, + args.new_trace.n_subbufs_low, + args.new_trace.subbuf_size_med, + args.new_trace.n_subbufs_med, + args.new_trace.subbuf_size_high, + args.new_trace.n_subbufs_high); + break; + case LTT_CONTROL_DESTROY_TRACE: + printk(KERN_DEBUG "Destroying trace %s\n", trace_name); + err = ltt_trace_destroy(trace_name); + break; + } + return err; +} +EXPORT_SYMBOL_GPL(ltt_control); + +/** + * ltt_filter_control - Trace filter control in-kernel API + * @msg: Action to perform on the filter + * @trace_name: Trace on which the action must be done + */ +int ltt_filter_control(enum ltt_filter_control_msg msg, const char *trace_name) +{ + int err; + struct ltt_trace *trace; + + printk(KERN_DEBUG "ltt_filter_control : trace %s\n", trace_name); + ltt_lock_traces(); + trace = _ltt_trace_find(trace_name); + if (trace == NULL) { + printk(KERN_ALERT + "Trace does not exist. Cannot proxy control request\n"); + err = -ENOENT; + goto trace_error; + } + if (!try_module_get(ltt_filter_control_owner)) { + err = -ENODEV; + goto get_module_error; + } + switch (msg) { + case LTT_FILTER_DEFAULT_ACCEPT: + printk(KERN_DEBUG + "Proxy filter default accept %s\n", trace_name); + err = (*ltt_filter_control_functor)(msg, trace); + break; + case LTT_FILTER_DEFAULT_REJECT: + printk(KERN_DEBUG + "Proxy filter default reject %s\n", trace_name); + err = (*ltt_filter_control_functor)(msg, trace); + break; + default: + err = -EPERM; + } + module_put(ltt_filter_control_owner); + +get_module_error: +trace_error: + ltt_unlock_traces(); + return err; +} +EXPORT_SYMBOL_GPL(ltt_filter_control); + +int __init ltt_init(void) +{ + /* Make sure no page fault can be triggered by this module */ + vmalloc_sync_all(); + init_timer_deferrable(<t_async_wakeup_timer); + return 0; +} + +module_init(ltt_init) + +static void __exit ltt_exit(void) +{ + struct ltt_trace *trace; + struct list_head *pos, *n; + + ltt_lock_traces(); + /* Stop each trace, currently being read by RCU read-side */ + list_for_each_entry_rcu(trace, <t_traces.head, list) + _ltt_trace_stop(trace); + /* Wait for quiescent state. Readers have preemption disabled. */ + synchronize_trace(); + /* Safe iteration is now permitted. It does not have to be RCU-safe + * because no readers are left. */ + list_for_each_safe(pos, n, <t_traces.head) { + trace = container_of(pos, struct ltt_trace, list); + /* _ltt_trace_destroy does a synchronize_trace() */ + _ltt_trace_destroy(trace); + __ltt_trace_destroy(trace); + } + /* free traces in pre-alloc status */ + list_for_each_safe(pos, n, <t_traces.setup_head) { + trace = container_of(pos, struct ltt_trace, list); + _ltt_trace_free(trace); + } + + ltt_unlock_traces(); +} + +module_exit(ltt_exit) + +MODULE_LICENSE("GPL and additional rights"); +MODULE_AUTHOR("Mathieu Desnoyers"); +MODULE_DESCRIPTION("Linux Trace Toolkit Next Generation Tracer Kernel API"); diff --git a/ltt-tracer.h b/ltt-tracer.h new file mode 100644 index 00000000..9564c3f9 --- /dev/null +++ b/ltt-tracer.h @@ -0,0 +1,663 @@ +/* + * Copyright (C) 2005,2006,2008 Mathieu Desnoyers (mathieu.desnoyers@polymtl.ca) + * + * This contains the definitions for the Linux Trace Toolkit tracer. + * + * Dual LGPL v2.1/GPL v2 license. + */ + +#ifndef _LTT_TRACER_H +#define _LTT_TRACER_H + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "ltt-tracer-core.h" +#include "ltt-relay.h" + +/* Number of bytes to log with a read/write event */ +#define LTT_LOG_RW_SIZE 32L + +/* Interval (in jiffies) at which the LTT per-CPU timer fires */ +#define LTT_PERCPU_TIMER_INTERVAL 1 + +#ifndef LTT_ARCH_TYPE +#define LTT_ARCH_TYPE LTT_ARCH_TYPE_UNDEFINED +#endif + +#ifndef LTT_ARCH_VARIANT +#define LTT_ARCH_VARIANT LTT_ARCH_VARIANT_NONE +#endif + +struct ltt_active_marker; + +/* Maximum number of callbacks per marker */ +#define LTT_NR_CALLBACKS 10 + +struct ltt_serialize_closure { + ltt_serialize_cb *callbacks; + long cb_args[LTT_NR_CALLBACKS]; + unsigned int cb_idx; +}; + +size_t ltt_serialize_data(struct ltt_chanbuf *buf, size_t buf_offset, + struct ltt_serialize_closure *closure, + void *serialize_private, unsigned int stack_pos_ctx, + int *largest_align, const char *fmt, va_list *args); + +struct ltt_available_probe { + const char *name; /* probe name */ + const char *format; + marker_probe_func *probe_func; + ltt_serialize_cb callbacks[LTT_NR_CALLBACKS]; + struct list_head node; /* registered probes list */ +}; + +enum ltt_channels { + LTT_CHANNEL_METADATA, + LTT_CHANNEL_FD_STATE, + LTT_CHANNEL_GLOBAL_STATE, + LTT_CHANNEL_IRQ_STATE, + LTT_CHANNEL_MODULE_STATE, + LTT_CHANNEL_NETIF_STATE, + LTT_CHANNEL_SOFTIRQ_STATE, + LTT_CHANNEL_SWAP_STATE, + LTT_CHANNEL_SYSCALL_STATE, + LTT_CHANNEL_TASK_STATE, + LTT_CHANNEL_VM_STATE, + LTT_CHANNEL_FS, + LTT_CHANNEL_INPUT, + LTT_CHANNEL_IPC, + LTT_CHANNEL_KERNEL, + LTT_CHANNEL_MM, + LTT_CHANNEL_RCU, + LTT_CHANNEL_DEFAULT, +}; + +struct ltt_active_marker { + struct list_head node; /* active markers list */ + const char *channel; + const char *name; + const char *format; + struct ltt_available_probe *probe; +}; + +extern void ltt_vtrace(const struct marker *mdata, void *probe_data, + void *call_data, const char *fmt, va_list *args); +extern void ltt_trace(const struct marker *mdata, void *probe_data, + void *call_data, const char *fmt, ...); + +size_t ltt_serialize_printf(struct ltt_chanbuf *buf, unsigned long buf_offset, + size_t *msg_size, char *output, size_t outlen, + const char *fmt); + +/* + * Unique ID assigned to each registered probe. + */ +enum marker_id { + MARKER_ID_SET_MARKER_ID = 0, /* Static IDs available (range 0-7) */ + MARKER_ID_SET_MARKER_FORMAT, + MARKER_ID_COMPACT, /* Compact IDs (range: 8-127) */ + MARKER_ID_DYNAMIC, /* Dynamic IDs (range: 128-65535) */ +}; + +/* static ids 0-1 reserved for internal use. */ +#define MARKER_CORE_IDS 2 +static __inline__ enum marker_id marker_id_type(uint16_t id) +{ + if (id < MARKER_CORE_IDS) + return (enum marker_id)id; + else + return MARKER_ID_DYNAMIC; +} + +struct user_dbg_data { + unsigned long avail_size; + unsigned long write; + unsigned long read; +}; + +struct ltt_trace_ops { + /* First 32 bytes cache-hot cacheline */ + void (*wakeup_channel) (struct ltt_chan *chan); + int (*user_blocking) (struct ltt_trace *trace, unsigned int index, + size_t data_size, struct user_dbg_data *dbg); + /* End of first 32 bytes cacheline */ + int (*create_dirs) (struct ltt_trace *new_trace); + void (*remove_dirs) (struct ltt_trace *new_trace); + int (*create_channel) (const char *channel_name, struct ltt_chan *chan, + struct dentry *parent, size_t sb_size, + size_t n_sb, int overwrite, + struct ltt_trace *trace); + void (*finish_channel) (struct ltt_chan *chan); + void (*remove_channel) (struct kref *kref); + void (*remove_channel_files) (struct ltt_chan *chan); + void (*user_errors) (struct ltt_trace *trace, unsigned int index, + size_t data_size, struct user_dbg_data *dbg, + int cpu); + void (*start_switch_timer) (struct ltt_chan *chan); + void (*stop_switch_timer) (struct ltt_chan *chan); +#ifdef CONFIG_HOTPLUG_CPU + int (*handle_cpuhp) (struct notifier_block *nb, unsigned long action, + void *hcpu, struct ltt_trace *trace); +#endif +}; + +struct ltt_transport { + char *name; + struct module *owner; + struct list_head node; + struct ltt_trace_ops ops; +}; + +enum trace_mode { LTT_TRACE_NORMAL, LTT_TRACE_FLIGHT, LTT_TRACE_HYBRID }; + +#define CHANNEL_FLAG_ENABLE (1U<<0) +#define CHANNEL_FLAG_OVERWRITE (1U<<1) + +/* Per-trace information - each trace/flight recorder represented by one */ +struct ltt_trace { + /* First 32 bytes cache-hot cacheline */ + struct list_head list; + struct ltt_chan *channels; + unsigned int nr_channels; + int active; + /* Second 32 bytes cache-hot cacheline */ + struct ltt_trace_ops *ops; + u32 freq_scale; + u64 start_freq; + u64 start_tsc; + unsigned long long start_monotonic; + struct timeval start_time; + struct ltt_channel_setting *settings; + struct { + struct dentry *trace_root; + struct dentry *ascii_root; + } dentry; + struct kref kref; /* Each channel has a kref of the trace struct */ + struct ltt_transport *transport; + struct kref ltt_transport_kref; + wait_queue_head_t kref_wq; /* Place for ltt_trace_destroy to sleep */ + char trace_name[NAME_MAX]; +} ____cacheline_aligned; + +/* Hardcoded event headers + * + * event header for a trace with active heartbeat : 27 bits timestamps + * + * headers are 32-bits aligned. In order to insure such alignment, a dynamic per + * trace alignment value must be done. + * + * Remember that the C compiler does align each member on the boundary + * equivalent to their own size. + * + * As relay subbuffers are aligned on pages, we are sure that they are 4 and 8 + * bytes aligned, so the buffer header and trace header are aligned. + * + * Event headers are aligned depending on the trace alignment option. + * + * Note using C structure bitfields for cross-endianness and portability + * concerns. + */ + +#define LTT_RESERVED_EVENTS 3 +#define LTT_EVENT_BITS 5 +#define LTT_FREE_EVENTS ((1 << LTT_EVENT_BITS) - LTT_RESERVED_EVENTS) +#define LTT_TSC_BITS 27 +#define LTT_TSC_MASK ((1 << LTT_TSC_BITS) - 1) + +struct ltt_event_header { + u32 id_time; /* 5 bits event id (MSB); 27 bits time (LSB) */ +}; + +/* Reservation flags */ +#define LTT_RFLAG_ID (1 << 0) +#define LTT_RFLAG_ID_SIZE (1 << 1) +#define LTT_RFLAG_ID_SIZE_TSC (1 << 2) + +#define LTT_MAX_SMALL_SIZE 0xFFFFU + +/* + * We use asm/timex.h : cpu_khz/HZ variable in here : we might have to deal + * specifically with CPU frequency scaling someday, so using an interpolation + * between the start and end of buffer values is not flexible enough. Using an + * immediate frequency value permits to calculate directly the times for parts + * of a buffer that would be before a frequency change. + * + * Keep the natural field alignment for _each field_ within this structure if + * you ever add/remove a field from this header. Packed attribute is not used + * because gcc generates poor code on at least powerpc and mips. Don't ever + * let gcc add padding between the structure elements. + */ +struct ltt_subbuffer_header { + uint64_t cycle_count_begin; /* Cycle count at subbuffer start */ + uint64_t cycle_count_end; /* Cycle count at subbuffer end */ + uint32_t magic_number; /* + * Trace magic number. + * contains endianness information. + */ + uint8_t major_version; + uint8_t minor_version; + uint8_t arch_size; /* Architecture pointer size */ + uint8_t alignment; /* LTT data alignment */ + uint64_t start_time_sec; /* NTP-corrected start time */ + uint64_t start_time_usec; + uint64_t start_freq; /* + * Frequency at trace start, + * used all along the trace. + */ + uint32_t freq_scale; /* Frequency scaling (divisor) */ + uint32_t data_size; /* Size of data in subbuffer */ + uint32_t sb_size; /* Subbuffer size (include padding) */ + uint32_t events_lost; /* + * Events lost in this subbuffer since + * the beginning of the trace. + * (may overflow) + */ + uint32_t subbuf_corrupt; /* + * Corrupted (lost) subbuffers since + * the begginig of the trace. + * (may overflow) + */ + uint8_t header_end[0]; /* End of header */ +}; + +/** + * ltt_sb_header_size - called on buffer-switch to a new sub-buffer + * + * Return header size without padding after the structure. Don't use packed + * structure because gcc generates inefficient code on some architectures + * (powerpc, mips..) + */ +static __inline__ size_t ltt_sb_header_size(void) +{ + return offsetof(struct ltt_subbuffer_header, header_end); +} + +/* + * ltt_get_header_size + * + * Calculate alignment offset to 32-bits. This is the alignment offset of the + * event header. + * + * Important note : + * The event header must be 32-bits. The total offset calculated here : + * + * Alignment of header struct on 32 bits (min arch size, header size) + * + sizeof(header struct) (32-bits) + * + (opt) u16 (ext. event id) + * + (opt) u16 (event_size) + * (if event_size == LTT_MAX_SMALL_SIZE, has ext. event size) + * + (opt) u32 (ext. event size) + * + (opt) u64 full TSC (aligned on min(64-bits, arch size)) + * + * The payload must itself determine its own alignment from the biggest type it + * contains. + * */ +static __inline__ +unsigned char ltt_get_header_size(struct ltt_chan *chan, size_t offset, + size_t data_size, size_t *before_hdr_pad, + unsigned int rflags) +{ + size_t orig_offset = offset; + size_t padding; + + BUILD_BUG_ON(sizeof(struct ltt_event_header) != sizeof(u32)); + + padding = ltt_align(offset, sizeof(struct ltt_event_header)); + offset += padding; + offset += sizeof(struct ltt_event_header); + + if (unlikely(rflags)) { + switch (rflags) { + case LTT_RFLAG_ID_SIZE_TSC: + offset += sizeof(u16) + sizeof(u16); + if (data_size >= LTT_MAX_SMALL_SIZE) + offset += sizeof(u32); + offset += ltt_align(offset, sizeof(u64)); + offset += sizeof(u64); + break; + case LTT_RFLAG_ID_SIZE: + offset += sizeof(u16) + sizeof(u16); + if (data_size >= LTT_MAX_SMALL_SIZE) + offset += sizeof(u32); + break; + case LTT_RFLAG_ID: + offset += sizeof(u16); + break; + } + } + + *before_hdr_pad = padding; + return offset - orig_offset; +} + +extern +size_t ltt_write_event_header_slow(struct ltt_chanbuf_alloc *bufa, + struct ltt_chan_alloc *chana, + long buf_offset, u16 eID, u32 event_size, + u64 tsc, unsigned int rflags); + +/* + * ltt_write_event_header + * + * Writes the event header to the offset (already aligned on 32-bits). + * + * @buf : buffer to write to. + * @chan : pointer to the channel structure.. + * @buf_offset : buffer offset to write to (aligned on 32 bits). + * @eID : event ID + * @event_size : size of the event, excluding the event header. + * @tsc : time stamp counter. + * @rflags : reservation flags. + * + * returns : offset where the event data must be written. + */ +static __inline__ +size_t ltt_write_event_header(struct ltt_chanbuf_alloc *bufa, + struct ltt_chan_alloc *chana, + long buf_offset, u16 eID, u32 event_size, u64 tsc, + unsigned int rflags) +{ + struct ltt_event_header header; + + if (unlikely(rflags)) + goto slow_path; + + header.id_time = eID << LTT_TSC_BITS; + header.id_time |= (u32)tsc & LTT_TSC_MASK; + ltt_relay_write(bufa, chana, buf_offset, &header, sizeof(header)); + buf_offset += sizeof(header); + + return buf_offset; + +slow_path: + return ltt_write_event_header_slow(bufa, chana, buf_offset, + eID, event_size, tsc, rflags); +} + +/* + * ltt_read_event_header + * buf_offset must aligned on 32 bits + */ +static __inline__ +size_t ltt_read_event_header(struct ltt_chanbuf_alloc *bufa, long buf_offset, + u64 *tsc, u32 *event_size, u16 *eID, + unsigned int *rflags) +{ + struct ltt_event_header header; + u16 small_size; + + ltt_relay_read(bufa, buf_offset, &header, sizeof(header)); + buf_offset += sizeof(header); + + *event_size = INT_MAX; + *eID = header.id_time >> LTT_TSC_BITS; + *tsc = header.id_time & LTT_TSC_MASK; + + switch (*eID) { + case 29: + *rflags = LTT_RFLAG_ID_SIZE_TSC; + ltt_relay_read(bufa, buf_offset, eID, sizeof(u16)); + buf_offset += sizeof(u16); + ltt_relay_read(bufa, buf_offset, &small_size, sizeof(u16)); + buf_offset += sizeof(u16); + if (small_size == LTT_MAX_SMALL_SIZE) { + ltt_relay_read(bufa, buf_offset, event_size, + sizeof(u32)); + buf_offset += sizeof(u32); + } else + *event_size = small_size; + buf_offset += ltt_align(buf_offset, sizeof(u64)); + ltt_relay_read(bufa, buf_offset, tsc, sizeof(u64)); + buf_offset += sizeof(u64); + break; + case 30: + *rflags = LTT_RFLAG_ID_SIZE; + ltt_relay_read(bufa, buf_offset, eID, sizeof(u16)); + buf_offset += sizeof(u16); + ltt_relay_read(bufa, buf_offset, &small_size, sizeof(u16)); + buf_offset += sizeof(u16); + if (small_size == LTT_MAX_SMALL_SIZE) { + ltt_relay_read(bufa, buf_offset, event_size, + sizeof(u32)); + buf_offset += sizeof(u32); + } else + *event_size = small_size; + break; + case 31: + *rflags = LTT_RFLAG_ID; + ltt_relay_read(bufa, buf_offset, eID, sizeof(u16)); + buf_offset += sizeof(u16); + break; + default: + *rflags = 0; + break; + } + + return buf_offset; +} + +/* Lockless LTTng */ + +/* Buffer offset macros */ + +/* + * BUFFER_TRUNC zeroes the subbuffer offset and the subbuffer number parts of + * the offset, which leaves only the buffer number. + */ +#define BUFFER_TRUNC(offset, chan) \ + ((offset) & (~((chan)->a.buf_size - 1))) +#define BUFFER_OFFSET(offset, chan) ((offset) & ((chan)->a.buf_size - 1)) +#define SUBBUF_OFFSET(offset, chan) ((offset) & ((chan)->a.sb_size - 1)) +#define SUBBUF_ALIGN(offset, chan) \ + (((offset) + (chan)->a.sb_size) & (~((chan)->a.sb_size - 1))) +#define SUBBUF_TRUNC(offset, chan) \ + ((offset) & (~((chan)->a.sb_size - 1))) +#define SUBBUF_INDEX(offset, chan) \ + (BUFFER_OFFSET((offset), chan) >> (chan)->a.sb_size_order) + +/* + * Control channels : + * control/metadata + * control/interrupts + * control/... + * + * cpu channel : + * cpu + */ +#define LTT_RELAY_ROOT "ltt" +#define LTT_RELAY_LOCKED_ROOT "ltt-locked" + +#define LTT_METADATA_CHANNEL "metadata_state" +#define LTT_FD_STATE_CHANNEL "fd_state" +#define LTT_GLOBAL_STATE_CHANNEL "global_state" +#define LTT_IRQ_STATE_CHANNEL "irq_state" +#define LTT_MODULE_STATE_CHANNEL "module_state" +#define LTT_NETIF_STATE_CHANNEL "netif_state" +#define LTT_SOFTIRQ_STATE_CHANNEL "softirq_state" +#define LTT_SWAP_STATE_CHANNEL "swap_state" +#define LTT_SYSCALL_STATE_CHANNEL "syscall_state" +#define LTT_TASK_STATE_CHANNEL "task_state" +#define LTT_VM_STATE_CHANNEL "vm_state" +#define LTT_FS_CHANNEL "fs" +#define LTT_INPUT_CHANNEL "input" +#define LTT_IPC_CHANNEL "ipc" +#define LTT_KERNEL_CHANNEL "kernel" +#define LTT_MM_CHANNEL "mm" +#define LTT_RCU_CHANNEL "rcu" + +#define LTT_FLIGHT_PREFIX "flight-" + +#define LTT_ASCII "ascii" + +/* Tracer properties */ +#define LTT_DEFAULT_SUBBUF_SIZE_LOW 65536 +#define LTT_DEFAULT_N_SUBBUFS_LOW 2 +#define LTT_DEFAULT_SUBBUF_SIZE_MED 262144 +#define LTT_DEFAULT_N_SUBBUFS_MED 2 +#define LTT_DEFAULT_SUBBUF_SIZE_HIGH 1048576 +#define LTT_DEFAULT_N_SUBBUFS_HIGH 2 +#define LTT_TRACER_MAGIC_NUMBER 0x00D6B7ED +#define LTT_TRACER_VERSION_MAJOR 2 +#define LTT_TRACER_VERSION_MINOR 6 + +/** + * ltt_write_trace_header - Write trace header + * @trace: Trace information + * @header: Memory address where the information must be written to + */ +static __inline__ +void ltt_write_trace_header(struct ltt_trace *trace, + struct ltt_subbuffer_header *header) +{ + header->magic_number = LTT_TRACER_MAGIC_NUMBER; + header->major_version = LTT_TRACER_VERSION_MAJOR; + header->minor_version = LTT_TRACER_VERSION_MINOR; + header->arch_size = sizeof(void *); + header->alignment = ltt_get_alignment(); + header->start_time_sec = trace->start_time.tv_sec; + header->start_time_usec = trace->start_time.tv_usec; + header->start_freq = trace->start_freq; + header->freq_scale = trace->freq_scale; +} + +/* + * Size reserved for high priority events (interrupts, NMI, BH) at the end of a + * nearly full buffer. User space won't use this last amount of space when in + * blocking mode. This space also includes the event header that would be + * written by this user space event. + */ +#define LTT_RESERVE_CRITICAL 4096 + +/* Register and unregister function pointers */ + +enum ltt_module_function { + LTT_FUNCTION_RUN_FILTER, + LTT_FUNCTION_FILTER_CONTROL, + LTT_FUNCTION_STATEDUMP +}; + +extern int ltt_module_register(enum ltt_module_function name, void *function, + struct module *owner); +extern void ltt_module_unregister(enum ltt_module_function name); + +void ltt_transport_register(struct ltt_transport *transport); +void ltt_transport_unregister(struct ltt_transport *transport); + +/* Exported control function */ + +enum ltt_control_msg { + LTT_CONTROL_START, + LTT_CONTROL_STOP, + LTT_CONTROL_CREATE_TRACE, + LTT_CONTROL_DESTROY_TRACE +}; + +union ltt_control_args { + struct { + enum trace_mode mode; + unsigned int subbuf_size_low; + unsigned int n_subbufs_low; + unsigned int subbuf_size_med; + unsigned int n_subbufs_med; + unsigned int subbuf_size_high; + unsigned int n_subbufs_high; + } new_trace; +}; + +int _ltt_trace_setup(const char *trace_name); +int ltt_trace_setup(const char *trace_name); +struct ltt_trace *_ltt_trace_find_setup(const char *trace_name); +int ltt_trace_set_type(const char *trace_name, const char *trace_type); +int ltt_trace_set_channel_subbufsize(const char *trace_name, + const char *channel_name, + unsigned int size); +int ltt_trace_set_channel_subbufcount(const char *trace_name, + const char *channel_name, + unsigned int cnt); +int ltt_trace_set_channel_switch_timer(const char *trace_name, + const char *channel_name, + unsigned long interval); +int ltt_trace_set_channel_enable(const char *trace_name, + const char *channel_name, + unsigned int enable); +int ltt_trace_set_channel_overwrite(const char *trace_name, + const char *channel_name, + unsigned int overwrite); +int ltt_trace_alloc(const char *trace_name); +int ltt_trace_destroy(const char *trace_name); +int ltt_trace_start(const char *trace_name); +int ltt_trace_stop(const char *trace_name); + +extern int ltt_control(enum ltt_control_msg msg, const char *trace_name, + const char *trace_type, union ltt_control_args args); + +enum ltt_filter_control_msg { + LTT_FILTER_DEFAULT_ACCEPT, + LTT_FILTER_DEFAULT_REJECT +}; + +extern int ltt_filter_control(enum ltt_filter_control_msg msg, + const char *trace_name); + +extern struct dentry *get_filter_root(void); + +void ltt_core_register(int (*function)(u8, void *)); + +void ltt_core_unregister(void); + +void ltt_release_trace(struct kref *kref); +void ltt_release_transport(struct kref *kref); + +extern int ltt_probe_register(struct ltt_available_probe *pdata); +extern int ltt_probe_unregister(struct ltt_available_probe *pdata); +extern int ltt_marker_connect(const char *channel, const char *mname, + const char *pname); +extern int ltt_marker_disconnect(const char *channel, const char *mname, + const char *pname); +extern void ltt_dump_marker_state(struct ltt_trace *trace); + +void ltt_lock_traces(void); +void ltt_unlock_traces(void); + +extern int ltt_ascii_create_dir(struct ltt_trace *new_trace); +extern void ltt_ascii_remove_dir(struct ltt_trace *trace); +extern int ltt_ascii_create(struct ltt_chan *chan); +extern void ltt_ascii_remove(struct ltt_chan *chan); + +extern +void ltt_statedump_register_kprobes_dump(void (*callback)(void *call_data)); +extern +void ltt_statedump_unregister_kprobes_dump(void (*callback)(void *call_data)); + +extern void ltt_dump_softirq_vec(void *call_data); + +extern void ltt_dump_sys_call_table(void *call_data); +extern void ltt_dump_idt_table(void *call_data); + +/* Relay IOCTL */ + +/* Get the next sub-buffer that can be read. */ +#define RELAY_GET_SB _IOR(0xF5, 0x00, __u32) +/* Release the oldest reserved (by "get") sub-buffer. */ +#define RELAY_PUT_SB _IOW(0xF5, 0x01, __u32) +/* returns the number of sub-buffers in the per cpu channel. */ +#define RELAY_GET_N_SB _IOR(0xF5, 0x02, __u32) +/* returns the size of the current sub-buffer. */ +#define RELAY_GET_SB_SIZE _IOR(0xF5, 0x03, __u32) +/* returns the maximum size for sub-buffers. */ +#define RELAY_GET_MAX_SB_SIZE _IOR(0xF5, 0x04, __u32) + +#endif /* _LTT_TRACER_H */ diff --git a/ltt-type-serializer.c b/ltt-type-serializer.c new file mode 100644 index 00000000..cb92aeed --- /dev/null +++ b/ltt-type-serializer.c @@ -0,0 +1,113 @@ +/** + * ltt-type-serializer.c + * + * LTTng specialized type serializer. + * + * Copyright Mathieu Desnoyers, 2008. + * + * Dual LGPL v2.1/GPL v2 license. + */ +#include + +#include "ltt-type-serializer.h" +#include "ltt-relay-lockless.h" + +notrace +void _ltt_specialized_trace(const struct marker *mdata, void *probe_data, + void *serialize_private, unsigned int data_size, + unsigned int largest_align) +{ + int ret; + uint16_t eID; + size_t slot_size; + unsigned int chan_index; + struct ltt_chanbuf *buf; + struct ltt_chan *chan; + struct ltt_trace *trace; + uint64_t tsc; + long buf_offset; + int cpu; + unsigned int rflags; + + /* + * If we get here, it's probably because we have useful work to do. + */ + if (unlikely(ltt_traces.num_active_traces == 0)) + return; + + rcu_read_lock_sched_notrace(); + cpu = smp_processor_id(); + __get_cpu_var(ltt_nesting)++; + /* + * asm volatile and "memory" clobber prevent the compiler from moving + * instructions out of the ltt nesting count. This is required to ensure + * that probe side-effects which can cause recursion (e.g. unforeseen + * traps, divisions by 0, ...) are triggered within the incremented + * nesting count section. + */ + barrier(); + eID = mdata->event_id; + chan_index = mdata->channel_id; + + /* + * Iterate on each trace, typically small number of active traces, + * list iteration with prefetch is usually slower. + */ + __list_for_each_entry_rcu(trace, <t_traces.head, list) { + if (unlikely(!trace->active)) + continue; + if (unlikely(!ltt_run_filter(trace, eID))) + continue; +#ifdef CONFIG_LTT_DEBUG_EVENT_SIZE + rflags = LTT_RFLAG_ID_SIZE; +#else + if (unlikely(eID >= LTT_FREE_EVENTS)) + rflags = LTT_RFLAG_ID; + else + rflags = 0; +#endif + /* + * Skip channels added after trace creation. + */ + if (unlikely(chan_index >= trace->nr_channels)) + continue; + chan = &trace->channels[chan_index]; + if (!chan->active) + continue; + + /* reserve space : header and data */ + ret = ltt_reserve_slot(chan, trace, data_size, largest_align, + cpu, &buf, &slot_size, &buf_offset, &tsc, + &rflags); + if (unlikely(ret < 0)) + continue; /* buffer full */ + + /* Out-of-order write : header and data */ + buf_offset = ltt_write_event_header(&buf->a, &chan->a, + buf_offset, eID, data_size, + tsc, rflags); + if (data_size) { + buf_offset += ltt_align(buf_offset, largest_align); + ltt_relay_write(&buf->a, &chan->a, buf_offset, + serialize_private, data_size); + buf_offset += data_size; + } + /* Out-of-order commit */ + ltt_commit_slot(buf, chan, buf_offset, data_size, slot_size); + } + /* + * asm volatile and "memory" clobber prevent the compiler from moving + * instructions out of the ltt nesting count. This is required to ensure + * that probe side-effects which can cause recursion (e.g. unforeseen + * traps, divisions by 0, ...) are triggered within the incremented + * nesting count section. + */ + barrier(); + __get_cpu_var(ltt_nesting)--; + rcu_read_unlock_sched_notrace(); +} +EXPORT_SYMBOL_GPL(_ltt_specialized_trace); + +MODULE_LICENSE("GPL and additional rights"); +MODULE_AUTHOR("Mathieu Desnoyers"); +MODULE_DESCRIPTION("LTT type serializer"); diff --git a/ltt-type-serializer.h b/ltt-type-serializer.h new file mode 100644 index 00000000..3e5cd3c3 --- /dev/null +++ b/ltt-type-serializer.h @@ -0,0 +1,187 @@ +#ifndef _LTT_TYPE_SERIALIZER_H +#define _LTT_TYPE_SERIALIZER_H + +#include /* For IFNAMSIZ */ + +#include "ltt-tracer.h" + +/* + * largest_align must be non-zero, equal to the minimum between the largest type + * and sizeof(void *). + */ +extern void _ltt_specialized_trace(const struct marker *mdata, void *probe_data, + void *serialize_private, unsigned int data_size, + unsigned int largest_align); + +/* + * Statically check that 0 < largest_align < sizeof(void *) to make sure it is + * dumb-proof. It will make sure 0 is changed into 1 and unsigned long long is + * changed into sizeof(void *) on 32-bit architectures. + */ +static inline void ltt_specialized_trace(const struct marker *mdata, + void *probe_data, + void *serialize_private, unsigned int data_size, + unsigned int largest_align) +{ + largest_align = min_t(unsigned int, largest_align, sizeof(void *)); + largest_align = max_t(unsigned int, largest_align, 1); + _ltt_specialized_trace(mdata, probe_data, serialize_private, data_size, + largest_align); +} + +/* + * Type serializer definitions. + */ + +/* + * Return size of structure without end-of-structure padding. + */ +#define serialize_sizeof(type) offsetof(typeof(type), end_field) + +struct serialize_long_int { + unsigned long f1; + unsigned int f2; + unsigned char end_field[0]; +} LTT_ALIGN; + +struct serialize_int_int_long { + unsigned int f1; + unsigned int f2; + unsigned long f3; + unsigned char end_field[0]; +} LTT_ALIGN; + +struct serialize_int_int_short { + unsigned int f1; + unsigned int f2; + unsigned short f3; + unsigned char end_field[0]; +} LTT_ALIGN; + +struct serialize_long_long_long { + unsigned long f1; + unsigned long f2; + unsigned long f3; + unsigned char end_field[0]; +} LTT_ALIGN; + +struct serialize_long_long_int { + unsigned long f1; + unsigned long f2; + unsigned int f3; + unsigned char end_field[0]; +} LTT_ALIGN; + +struct serialize_long_long_short_char { + unsigned long f1; + unsigned long f2; + unsigned short f3; + unsigned char f4; + unsigned char end_field[0]; +} LTT_ALIGN; + +struct serialize_long_long_short { + unsigned long f1; + unsigned long f2; + unsigned short f3; + unsigned char end_field[0]; +} LTT_ALIGN; + +struct serialize_long_short_char { + unsigned long f1; + unsigned short f2; + unsigned char f3; + unsigned char end_field[0]; +} LTT_ALIGN; + +struct serialize_long_short { + unsigned long f1; + unsigned short f2; + unsigned char end_field[0]; +} LTT_ALIGN; + +struct serialize_long_char { + unsigned long f1; + unsigned char f2; + unsigned char end_field[0]; +} LTT_ALIGN; + +struct serialize_long_ifname { + unsigned long f1; + unsigned char f2[IFNAMSIZ]; + unsigned char end_field[0]; +} LTT_ALIGN; + +struct serialize_sizet_int { + size_t f1; + unsigned int f2; + unsigned char end_field[0]; +} LTT_ALIGN; + +struct serialize_long_long_sizet_int { + unsigned long f1; + unsigned long f2; + size_t f3; + unsigned int f4; + unsigned char end_field[0]; +} LTT_ALIGN; + +struct serialize_long_long_sizet_int_int { + unsigned long f1; + unsigned long f2; + size_t f3; + unsigned int f4; + unsigned int f5; + unsigned char end_field[0]; +} LTT_ALIGN; + +struct serialize_l4421224411111 { + unsigned long f1; + uint32_t f2; + uint32_t f3; + uint16_t f4; + uint8_t f5; + uint16_t f6; + uint16_t f7; + uint32_t f8; + uint32_t f9; + uint8_t f10; + uint8_t f11; + uint8_t f12; + uint8_t f13; + uint8_t f14; + unsigned char end_field[0]; +} LTT_ALIGN; + +struct serialize_l214421224411111 { + unsigned long f1; + uint16_t f2; + uint8_t f3; + uint32_t f4; + uint32_t f5; + uint16_t f6; + uint8_t f7; + uint16_t f8; + uint16_t f9; + uint32_t f10; + uint32_t f11; + uint8_t f12; + uint8_t f13; + uint8_t f14; + uint8_t f15; + uint8_t f16; + uint8_t end_field[0]; +} LTT_ALIGN; + +struct serialize_l4412228 { + unsigned long f1; + uint32_t f2; + uint32_t f3; + uint8_t f4; + uint16_t f5; + uint16_t f6; + uint16_t f7; + uint64_t f8; + unsigned char end_field[0]; +} LTT_ALIGN; +#endif /* _LTT_TYPE_SERIALIZER_H */ diff --git a/ltt-userspace-event.c b/ltt-userspace-event.c new file mode 100644 index 00000000..c716d724 --- /dev/null +++ b/ltt-userspace-event.c @@ -0,0 +1,122 @@ +/* + * Copyright (C) 2008 Mathieu Desnoyers + * + * Dual LGPL v2.1/GPL v2 license. + */ + +#include +#include +#include +#include +#include +#include +#include + +#include "ltt-type-serializer.h" + +#define LTT_WRITE_EVENT_FILE "write_event" + +DEFINE_MARKER(userspace, event, "string %s"); +static struct dentry *ltt_event_file; + +/** + * write_event - write a userspace string into the trace system + * @file: file pointer + * @user_buf: user string + * @count: length to copy, including the final NULL + * @ppos: unused + * + * Copy a string into a trace event, in channel "userspace", event "event". + * Copies until either \n or \0 is reached. + * On success, returns the number of bytes copied from the source, including the + * \n or \0 character (if there was one in the count range). It cannot return + * more than count. + * Inspired from tracing_mark_write implementation from Steven Rostedt and + * Ingo Molnar. + */ +static +ssize_t write_event(struct file *file, const char __user *user_buf, + size_t count, loff_t *ppos) +{ + struct marker *marker; + char *buf, *end; + long copycount; + ssize_t ret; + + buf = kmalloc(count + 1, GFP_KERNEL); + if (!buf) { + ret = -ENOMEM; + goto string_out; + } + copycount = strncpy_from_user(buf, user_buf, count); + if (copycount < 0) { + ret = -EFAULT; + goto string_err; + } + /* Cut from the first nil or newline. */ + buf[copycount] = '\0'; + end = strchr(buf, '\n'); + if (end) { + *end = '\0'; + copycount = end - buf; + } + /* Add final \0 to copycount */ + copycount++; + marker = &GET_MARKER(userspace, event); + ltt_specialized_trace(marker, marker->single.probe_private, buf, + copycount, sizeof(char)); + /* If there is no \0 nor \n in count, do not return a larger value */ + ret = min_t(size_t, copycount, count); +string_err: + kfree(buf); +string_out: + return ret; +} + +static const struct file_operations ltt_userspace_operations = { + .write = write_event, +}; + +static int __init ltt_userspace_init(void) +{ + struct dentry *ltt_root_dentry; + int err = 0; + + ltt_root_dentry = get_ltt_root(); + if (!ltt_root_dentry) { + err = -ENOENT; + goto err_no_root; + } + + ltt_event_file = debugfs_create_file(LTT_WRITE_EVENT_FILE, + S_IWUGO, + ltt_root_dentry, + NULL, + <t_userspace_operations); + if (IS_ERR(ltt_event_file) || !ltt_event_file) { + printk(KERN_ERR + "ltt_userspace_init: failed to create file %s\n", + LTT_WRITE_EVENT_FILE); + err = -EPERM; + goto err_no_file; + } + + return err; +err_no_file: + put_ltt_root(); +err_no_root: + return err; +} + +static void __exit ltt_userspace_exit(void) +{ + debugfs_remove(ltt_event_file); + put_ltt_root(); +} + +module_init(ltt_userspace_init); +module_exit(ltt_userspace_exit); + +MODULE_LICENSE("GPL and additional rights"); +MODULE_AUTHOR("Mathieu Desnoyers "); +MODULE_DESCRIPTION("Linux Trace Toolkit Userspace Event"); diff --git a/probes/Makefile b/probes/Makefile new file mode 100644 index 00000000..d8f1c403 --- /dev/null +++ b/probes/Makefile @@ -0,0 +1,47 @@ +# LTTng tracing probes + +ifdef CONFIG_FTRACE +CFLAGS_REMOVE_kernel-trace.o = -pg +CFLAGS_REMOVE_mm-trace.o = -pg +CFLAGS_REMOVE_fs-trace.o = -pg +CFLAGS_REMOVE_ipc-trace.o = -pg +CFLAGS_REMOVE_lockdep-trace.o = -pg +CFLAGS_REMOVE_rcu-trace.o = -pg +CFLAGS_REMOVE_syscall-trace.o = -pg +CFLAGS_REMOVE_trap-trace.o = -pg +CFLAGS_REMOVE_pm-trace.o = -pg +endif + +obj-m += kernel-trace.o mm-trace.o fs-trace.o ipc-trace.o lockdep-trace.o \ + rcu-trace.o syscall-trace.o trap-trace.o pm-trace.o + +ifeq ($(CONFIG_NET),y) +ifdef CONFIG_FTRACE +CFLAGS_REMOVE_net-trace.o = -pg +CFLAGS_REMOVE_net-extended-trace.o = -pg +endif +obj-m += net-trace.o net-extended-trace.o +endif + +ifdef CONFIG_JBD2 +ifdef CONFIG_FTRACE +CFLAGS_REMOVE_jbd2-trace.o = -pg +endif +obj-m += jbd2-trace.o +endif + +#ifdef CONFIG_EXT4_FS +#ifdef CONFIG_FTRACE +#CFLAGS_REMOVE_ext4-trace.o = -pg +#endif +#obj-$(CONFIG_LTT_TRACEPROBES) += ext4-trace.o +#endif + +ifdef CONFIG_BLOCK +ifdef CONFIG_FTRACE +CFLAGS_REMOVE_block-trace.o = -pg +endif +obj-m += block-trace.o +endif + + diff --git a/probes/block-trace.c b/probes/block-trace.c new file mode 100644 index 00000000..51ae2cdd --- /dev/null +++ b/probes/block-trace.c @@ -0,0 +1,309 @@ +/* + * ltt/probes/block-trace.c + * + * block layer tracepoint probes. + * + * (C) Copyright 2009 - Mathieu Desnoyers + * Dual LGPL v2.1/GPL v2 license. + */ + +#include + +#include + +/* + * Add rq cmd as a sequence. Needs new type. (size + binary blob) + */ + +void probe_block_rq_abort(void *data, struct request_queue *q, struct request *rq) +{ + int rw = rq->cmd_flags & 0x03; + + if (blk_discard_rq(rq)) + rw |= (1 << BIO_RW_DISCARD); + + if (blk_pc_request(rq)) { + trace_mark_tp(block, rq_abort_pc, block_rq_abort, + probe_block_rq_abort, + "data_len %u rw %d errors %d", + blk_rq_bytes(rq), rw, rq->errors); + } else { + /* + * FIXME Using a simple trace_mark for the second event + * possibility because tracepoints do not support multiple + * connections to the same probe yet. They should have some + * refcounting. Need to enable both rq_abort_pc and rq_abort_fs + * markers to have the rq_abort_fs marker enabled. + */ + trace_mark(block, rq_abort_fs, + "hard_sector %llu " + "rw %d errors %d", (unsigned long long)blk_rq_pos(rq), + rw, rq->errors); + } +} + +void probe_block_rq_insert(void *data, struct request_queue *q, struct request *rq) +{ + int rw = rq->cmd_flags & 0x03; + + if (blk_discard_rq(rq)) + rw |= (1 << BIO_RW_DISCARD); + + if (blk_pc_request(rq)) { + trace_mark_tp(block, rq_insert_pc, block_rq_insert, + probe_block_rq_insert, + "data_len %u rw %d errors %d", + blk_rq_bytes(rq), rw, rq->errors); + } else { + /* + * FIXME Using a simple trace_mark for the second event + * possibility because tracepoints do not support multiple + * connections to the same probe yet. They should have some + * refcounting. Need to enable both rq_insert_pc and + * rq_insert_fs markers to have the rq_insert_fs marker enabled. + */ + trace_mark(block, rq_insert_fs, + "hard_sector %llu " + "rw %d errors %d", (unsigned long long)blk_rq_pos(rq), + rw, rq->errors); + } +} + +void probe_block_rq_issue(void *data, struct request_queue *q, struct request *rq) +{ + int rw = rq->cmd_flags & 0x03; + + if (blk_discard_rq(rq)) + rw |= (1 << BIO_RW_DISCARD); + + if (blk_pc_request(rq)) { + trace_mark_tp(block, rq_issue_pc, block_rq_issue, + probe_block_rq_issue, + "data_len %u rw %d errors %d", + blk_rq_bytes(rq), rw, rq->errors); + } else { + /* + * FIXME Using a simple trace_mark for the second event + * possibility because tracepoints do not support multiple + * connections to the same probe yet. They should have some + * refcounting. Need to enable both rq_issue_pc and rq_issue_fs + * markers to have the rq_issue_fs marker enabled. + */ + trace_mark(block, rq_issue_fs, + "hard_sector %llu " + "rw %d errors %d", (unsigned long long)blk_rq_pos(rq), + rw, rq->errors); + } +} + +void probe_block_rq_requeue(void *data, struct request_queue *q, struct request *rq) +{ + int rw = rq->cmd_flags & 0x03; + + if (blk_discard_rq(rq)) + rw |= (1 << BIO_RW_DISCARD); + + if (blk_pc_request(rq)) { + trace_mark_tp(block, rq_requeue_pc, block_rq_requeue, + probe_block_rq_requeue, + "data_len %u rw %d errors %d", + blk_rq_bytes(rq), rw, rq->errors); + } else { + /* + * FIXME Using a simple trace_mark for the second event + * possibility because tracepoints do not support multiple + * connections to the same probe yet. They should have some + * refcounting. Need to enable both rq_requeue_pc and + * rq_requeue_fs markers to have the rq_requeue_fs marker + * enabled. + */ + trace_mark(block, rq_requeue_fs, + "hard_sector %llu " + "rw %d errors %d", (unsigned long long)blk_rq_pos(rq), + rw, rq->errors); + } +} + +void probe_block_rq_complete(void *data, struct request_queue *q, struct request *rq) +{ + int rw = rq->cmd_flags & 0x03; + + if (blk_discard_rq(rq)) + rw |= (1 << BIO_RW_DISCARD); + + if (blk_pc_request(rq)) { + trace_mark_tp(block, rq_complete_pc, block_rq_complete, + probe_block_rq_complete, + "data_len %u rw %d errors %d", + blk_rq_bytes(rq), rw, rq->errors); + } else { + /* + * FIXME Using a simple trace_mark for the second event + * possibility because tracepoints do not support multiple + * connections to the same probe yet. They should have some + * refcounting. Need to enable both rq_complete_pc and + * rq_complete_fs markers to have the rq_complete_fs marker + * enabled. + */ + trace_mark(block, rq_complete_fs, + "hard_sector %llu " + "rw %d errors %d", (unsigned long long)blk_rq_pos(rq), + rw, rq->errors); + } +} + +void probe_block_bio_bounce(void *data, struct request_queue *q, struct bio *bio) +{ + trace_mark_tp(block, bio_bounce, block_bio_bounce, + probe_block_bio_bounce, + "sector %llu size %u rw(FAILFAST_DRIVER,FAILFAST_TRANSPORT," + "FAILFAST_DEV,DISCARD,META,SYNC,BARRIER,AHEAD,RW) %lX " + "not_uptodate #1u%d", + (unsigned long long)bio->bi_sector, bio->bi_size, + bio->bi_rw, !bio_flagged(bio, BIO_UPTODATE)); +} + +void probe_block_bio_complete(void *data, struct request_queue *q, struct bio *bio) +{ + trace_mark_tp(block, bio_complete, block_bio_complete, + probe_block_bio_complete, + "sector %llu size %u rw(FAILFAST_DRIVER,FAILFAST_TRANSPORT," + "FAILFAST_DEV,DISCARD,META,SYNC,BARRIER,AHEAD,RW) %lX " + "not_uptodate #1u%d", + (unsigned long long)bio->bi_sector, bio->bi_size, + bio->bi_rw, !bio_flagged(bio, BIO_UPTODATE)); +} + +void probe_block_bio_backmerge(void *data, struct request_queue *q, struct bio *bio) +{ + trace_mark_tp(block, bio_backmerge, block_bio_backmerge, + probe_block_bio_backmerge, + "sector %llu size %u rw(FAILFAST_DRIVER,FAILFAST_TRANSPORT," + "FAILFAST_DEV,DISCARD,META,SYNC,BARRIER,AHEAD,RW) %lX " + "not_uptodate #1u%d", + (unsigned long long)bio->bi_sector, bio->bi_size, + bio->bi_rw, !bio_flagged(bio, BIO_UPTODATE)); +} + +void probe_block_bio_frontmerge(void *data, struct request_queue *q, struct bio *bio) +{ + trace_mark_tp(block, bio_frontmerge, block_bio_frontmerge, + probe_block_bio_frontmerge, + "sector %llu size %u rw(FAILFAST_DRIVER,FAILFAST_TRANSPORT," + "FAILFAST_DEV,DISCARD,META,SYNC,BARRIER,AHEAD,RW) %lX " + "not_uptodate #1u%d", + (unsigned long long)bio->bi_sector, bio->bi_size, + bio->bi_rw, !bio_flagged(bio, BIO_UPTODATE)); +} + +void probe_block_bio_queue(void *data, struct request_queue *q, struct bio *bio) +{ + trace_mark_tp(block, bio_queue, block_bio_queue, + probe_block_bio_queue, + "sector %llu size %u rw(FAILFAST_DRIVER,FAILFAST_TRANSPORT," + "FAILFAST_DEV,DISCARD,META,SYNC,BARRIER,AHEAD,RW) %lX " + "not_uptodate #1u%d", + (unsigned long long)bio->bi_sector, bio->bi_size, + bio->bi_rw, !bio_flagged(bio, BIO_UPTODATE)); +} + +void probe_block_getrq(void *data, struct request_queue *q, struct bio *bio, int rw) +{ + if (bio) { + trace_mark_tp(block, getrq_bio, block_getrq, + probe_block_getrq, + "sector %llu size %u " + "rw(FAILFAST_DRIVER,FAILFAST_TRANSPORT," + "FAILFAST_DEV,DISCARD,META,SYNC,BARRIER,AHEAD,RW) %lX " + "not_uptodate #1u%d", + (unsigned long long)bio->bi_sector, bio->bi_size, + bio->bi_rw, !bio_flagged(bio, BIO_UPTODATE)); + } else { + /* + * FIXME Using a simple trace_mark for the second event + * possibility because tracepoints do not support multiple + * connections to the same probe yet. They should have some + * refcounting. Need to enable both getrq_bio and getrq markers + * to have the getrq marker enabled. + */ + trace_mark(block, getrq, "rw %d", rw); + } +} + +void probe_block_sleeprq(void *data, struct request_queue *q, struct bio *bio, int rw) +{ + if (bio) { + trace_mark_tp(block, sleeprq_bio, block_sleeprq, + probe_block_sleeprq, + "sector %llu size %u " + "rw(FAILFAST_DRIVER,FAILFAST_TRANSPORT," + "FAILFAST_DEV,DISCARD,META,SYNC,BARRIER,AHEAD,RW) %lX " + "not_uptodate #1u%d", + (unsigned long long)bio->bi_sector, bio->bi_size, + bio->bi_rw, !bio_flagged(bio, BIO_UPTODATE)); + } else { + /* + * FIXME Using a simple trace_mark for the second event + * possibility because tracepoints do not support multiple + * connections to the same probe yet. They should have some + * refcounting. Need to enable both sleeprq_bio and sleeprq + * markers to have the sleeprq marker enabled. + */ + trace_mark(block, sleeprq, "rw %d", rw); + } +} + +void probe_block_plug(void *data, struct request_queue *q) +{ + trace_mark_tp(block, plug, block_plug, probe_block_plug, + MARK_NOARGS); +} + +void probe_block_unplug_io(void *data, struct request_queue *q) +{ + unsigned int pdu = q->rq.count[READ] + q->rq.count[WRITE]; + + trace_mark_tp(block, unplug_io, block_unplug_io, probe_block_unplug_io, + "pdu %u", pdu); +} + +void probe_block_unplug_timer(void *data, struct request_queue *q) +{ + unsigned int pdu = q->rq.count[READ] + q->rq.count[WRITE]; + + trace_mark_tp(block, unplug_timer, block_unplug_timer, + probe_block_unplug_timer, + "pdu %u", pdu); +} + +void probe_block_split(void *data, struct request_queue *q, struct bio *bio, + unsigned int pdu) +{ + trace_mark_tp(block, split, block_split, + probe_block_split, + "sector %llu size %u rw(FAILFAST_DRIVER,FAILFAST_TRANSPORT," + "FAILFAST_DEV,DISCARD,META,SYNC,BARRIER,AHEAD,RW) %lX " + "not_uptodate #1u%d pdu %u", + (unsigned long long)bio->bi_sector, bio->bi_size, + bio->bi_rw, !bio_flagged(bio, BIO_UPTODATE), pdu); +} + +void probe_block_remap(void *data, struct request_queue *q, struct bio *bio, + dev_t dev, sector_t from) +{ + trace_mark_tp(block, remap, block_remap, + probe_block_remap, + "device_from %lu sector_from %llu device_to %lu " + "size %u rw(FAILFAST_DRIVER,FAILFAST_TRANSPORT," + "FAILFAST_DEV,DISCARD,META,SYNC,BARRIER,AHEAD,RW) %lX " + "not_uptodate #1u%d", + (unsigned long)bio->bi_bdev->bd_dev, + (unsigned long long)from, + (unsigned long)dev, + bio->bi_size, bio->bi_rw, + !bio_flagged(bio, BIO_UPTODATE)); +} + +MODULE_LICENSE("GPL and additional rights"); +MODULE_AUTHOR("Mathieu Desnoyers"); +MODULE_DESCRIPTION("Block Tracepoint Probes"); diff --git a/probes/ext4-trace.c b/probes/ext4-trace.c new file mode 100644 index 00000000..83683e70 --- /dev/null +++ b/probes/ext4-trace.c @@ -0,0 +1,611 @@ +/* + * ltt/probes/ext4-trace.c + * + * ext4 tracepoint probes. + * + * (C) Copyright 2009 - Mathieu Desnoyers + * Dual LGPL v2.1/GPL v2 license. + */ + +#include +#include +#include +#include +#include +#include + +#include "../ltt-tracer.h" +#include "../../fs/ext4/mballoc.h" + +static struct dentry *ext4_filter_dentry, *ext4_filter_dev_dentry, + *ext4_filter_inode_dentry; +static DEFINE_MUTEX(ext4_filter_mutex); +/* Make sure we don't race between module exit and file write */ +static int module_exits; + +struct rcu_dev_filter { + struct rcu_head rcu; + char devname[NAME_MAX]; +}; + +static struct rcu_dev_filter *dev_filter; +/* ~0UL inode_filter enables all inodes */ +static unsigned long inode_filter = ~0UL; + +/* + * Probes are executed in rcu_sched read-side critical section. + */ + +static int do_dev_filter(const char *dev) +{ + struct rcu_dev_filter *ldev_filter = rcu_dereference(dev_filter); + + if (unlikely(ldev_filter)) + if (unlikely(strcmp(ldev_filter->devname, dev))) + return 0; + return 1; +} + +static int do_inode_filter(unsigned long ino) +{ + if (unlikely(inode_filter != ~0UL)) + if (unlikely(inode_filter != ino)) + return 0; + return 1; +} + +/* + * Logical AND between dev and inode filter. + */ +static int do_filter(const char *dev, unsigned long ino) +{ + if (unlikely(!do_dev_filter(dev))) + return 0; + if (unlikely(!do_inode_filter(ino))) + return 0; + return 1; +} + + +void probe_ext4_free_inode(void *data, struct inode *inode) +{ + if (unlikely(!do_filter(inode->i_sb->s_id, inode->i_ino))) + return; + trace_mark_tp(ext4, free_inode, ext4_free_inode, + probe_ext4_free_inode, + "dev %s ino %lu mode %d uid %lu gid %lu blocks %llu", + inode->i_sb->s_id, inode->i_ino, inode->i_mode, + (unsigned long) inode->i_uid, (unsigned long) inode->i_gid, + (unsigned long long) inode->i_blocks); +} + +void probe_ext4_request_inode(void *data, struct inode *dir, int mode) +{ + if (unlikely(!do_filter(dir->i_sb->s_id, dir->i_ino))) + return; + trace_mark_tp(ext4, request_inode, ext4_request_inode, + probe_ext4_request_inode, + "dev %s dir %lu mode %d", + dir->i_sb->s_id, dir->i_ino, mode); +} + +void probe_ext4_allocate_inode(void *data, struct inode *inode, struct inode *dir, int mode) +{ + if (unlikely(!do_filter(inode->i_sb->s_id, inode->i_ino) + && !do_filter(dir->i_sb->s_id, dir->i_ino))) + return; + trace_mark_tp(ext4, allocate_inode, ext4_allocate_inode, + probe_ext4_allocate_inode, + "dev %s ino %lu dir %lu mode %d", + dir->i_sb->s_id, inode->i_ino, dir->i_ino, mode); +} + +void probe_ext4_write_begin(void *data, struct inode *inode, loff_t pos, unsigned int len, + unsigned int flags) +{ + if (unlikely(!do_filter(inode->i_sb->s_id, inode->i_ino))) + return; + trace_mark_tp(ext4, write_begin, ext4_write_begin, + probe_ext4_write_begin, + "dev %s ino %lu pos %llu len %u flags %u", + inode->i_sb->s_id, inode->i_ino, + (unsigned long long) pos, len, flags); +} + +void probe_ext4_ordered_write_end(void *data, struct inode *inode, loff_t pos, + unsigned int len, unsigned int copied) +{ + if (unlikely(!do_filter(inode->i_sb->s_id, inode->i_ino))) + return; + trace_mark_tp(ext4, ordered_write_end, ext4_ordered_write_end, + probe_ext4_ordered_write_end, + "dev %s ino %lu pos %llu len %u copied %u", + inode->i_sb->s_id, inode->i_ino, + (unsigned long long) pos, len, copied); +} + +void probe_ext4_writeback_write_end(void *data, struct inode *inode, loff_t pos, + unsigned int len, unsigned int copied) +{ + if (unlikely(!do_filter(inode->i_sb->s_id, inode->i_ino))) + return; + trace_mark_tp(ext4, writeback_write_end, ext4_writeback_write_end, + probe_ext4_writeback_write_end, + "dev %s ino %lu pos %llu len %u copied %u", + inode->i_sb->s_id, inode->i_ino, + (unsigned long long) pos, len, copied); +} + +void probe_ext4_journalled_write_end(void *data, struct inode *inode, loff_t pos, + unsigned int len, unsigned int copied) +{ + if (unlikely(!do_filter(inode->i_sb->s_id, inode->i_ino))) + return; + trace_mark_tp(ext4, journalled_write_end, ext4_journalled_write_end, + probe_ext4_journalled_write_end, + "dev %s ino %lu pos %llu len %u copied %u", + inode->i_sb->s_id, inode->i_ino, + (unsigned long long) pos, len, copied); +} + +/* + * note : wbc_flags will have to be decoded by userspace. + * #1x uses a single byte in the trace. Limits to 8 bits. + */ +void probe_ext4_da_writepages(void *data, struct inode *inode, + struct writeback_control *wbc) +{ + if (unlikely(!do_filter(inode->i_sb->s_id, inode->i_ino))) + return; + trace_mark_tp(ext4, da_writepages, ext4_da_writepages, + probe_ext4_da_writepages, + "dev %s ino %lu nr_to_write %ld " + "pages_skipped %ld range_start %llu range_end %llu " + "wbc_flags(nonblocking,for_kupdate," + "for_reclaim,range_cyclic) #1x%u", + inode->i_sb->s_id, inode->i_ino, wbc->nr_to_write, + wbc->pages_skipped, + (unsigned long long) wbc->range_start, + (unsigned long long) wbc->range_end, + (wbc->nonblocking << 3) + | (wbc->for_kupdate << 2) + | (wbc->for_reclaim << 1) + | wbc->range_cyclic); +} + +/* + * note : wbc_flags will have to be decoded by userspace. + * #1x uses a single byte in the trace. Limits to 8 bits. + */ +void probe_ext4_da_writepages_result(void *data, struct inode *inode, + struct writeback_control *wbc, + int ret, int pages_written) +{ + if (unlikely(!do_filter(inode->i_sb->s_id, inode->i_ino))) + return; + trace_mark_tp(ext4, da_writepages_result, ext4_da_writepages_result, + probe_ext4_da_writepages_result, + "dev %s ino %lu ret %d pages_written %d " + "pages_skipped %ld " + "wbc_flags(encountered_congestion," + "more_io,no_nrwrite_index_update) #1x%u", + inode->i_sb->s_id, inode->i_ino, ret, pages_written, + wbc->pages_skipped, + (wbc->encountered_congestion << 2) + | (wbc->more_io << 1) + | wbc->no_nrwrite_index_update); +} + +void probe_ext4_da_write_begin(void *data, struct inode *inode, loff_t pos, + unsigned int len, unsigned int flags) +{ + if (unlikely(!do_filter(inode->i_sb->s_id, inode->i_ino))) + return; + trace_mark_tp(ext4, da_write_begin, ext4_da_write_begin, + probe_ext4_da_write_begin, + "dev %s ino %lu pos %llu len %u flags %u", + inode->i_sb->s_id, inode->i_ino, + (unsigned long long) pos, len, flags); +} + +void probe_ext4_da_write_end(void *data, struct inode *inode, loff_t pos, + unsigned int len, unsigned int copied) +{ + if (unlikely(!do_filter(inode->i_sb->s_id, inode->i_ino))) + return; + trace_mark_tp(ext4, da_write_end, ext4_da_write_end, + probe_ext4_da_write_end, + "dev %s ino %lu pos %llu len %u copied %u", + inode->i_sb->s_id, inode->i_ino, + (unsigned long long) pos, len, copied); +} + +void probe_ext4_discard_blocks(void *data, struct super_block *sb, unsigned long long blk, + unsigned long long count) +{ + if (unlikely(!do_dev_filter(sb->s_id))) + return; + trace_mark_tp(ext4, discard_blocks, ext4_discard_blocks, + probe_ext4_discard_blocks, + "dev %s blk %llu count %llu", + sb->s_id, blk, count); +} + +void probe_ext4_mb_new_inode_pa(void *data, struct ext4_allocation_context *ac, + struct ext4_prealloc_space *pa) +{ + if (unlikely(!do_filter(ac->ac_sb->s_id, ac->ac_inode->i_ino))) + return; + trace_mark_tp(ext4, mb_new_inode_pa, ext4_mb_new_inode_pa, + probe_ext4_mb_new_inode_pa, + "dev %s ino %lu pstart %llu len %u lstart %u", + ac->ac_sb->s_id, ac->ac_inode->i_ino, pa->pa_pstart, + pa->pa_len, pa->pa_lstart); +} + +void probe_ext4_mb_new_group_pa(void *data, struct ext4_allocation_context *ac, + struct ext4_prealloc_space *pa) +{ + if (unlikely(!do_dev_filter(ac->ac_sb->s_id))) + return; + trace_mark_tp(ext4, mb_new_group_pa, ext4_mb_new_group_pa, + probe_ext4_mb_new_group_pa, + "dev %s pstart %llu len %u lstart %u", + ac->ac_sb->s_id, pa->pa_pstart, + pa->pa_len, pa->pa_lstart); +} + +void probe_ext4_mb_release_inode_pa(void *data, struct ext4_allocation_context *ac, + struct ext4_prealloc_space *pa, + unsigned long long block, + unsigned int count) +{ + if (unlikely(!do_filter(ac->ac_sb->s_id, ac->ac_inode->i_ino))) + return; + trace_mark_tp(ext4, mb_release_inode_pa, ext4_mb_release_inode_pa, + probe_ext4_mb_release_inode_pa, + "dev %s ino %lu block %llu count %u", + ac->ac_sb->s_id, pa->pa_inode->i_ino, block, count); +} + +void probe_ext4_mb_release_group_pa(void *data, struct ext4_allocation_context *ac, + struct ext4_prealloc_space *pa) +{ + if (unlikely(!do_dev_filter(ac->ac_sb->s_id))) + return; + trace_mark_tp(ext4, mb_release_group_pa, ext4_mb_release_group_pa, + probe_ext4_mb_release_group_pa, + "dev %s pstart %llu len %d", + ac->ac_sb->s_id, pa->pa_pstart, pa->pa_len); +} + +void probe_ext4_discard_preallocations(void *data, struct inode *inode) +{ + if (unlikely(!do_filter(inode->i_sb->s_id, inode->i_ino))) + return; + trace_mark_tp(ext4, discard_preallocations, + ext4_discard_preallocations, + probe_ext4_discard_preallocations, + "dev %s ino %lu", + inode->i_sb->s_id, inode->i_ino); +} + +void probe_ext4_mb_discard_preallocations(void *data, struct super_block *sb, int needed) +{ + if (unlikely(!do_dev_filter(sb->s_id))) + return; + trace_mark_tp(ext4, mb_discard_preallocations, + ext4_mb_discard_preallocations, + probe_ext4_mb_discard_preallocations, + "dev %s needed %d", + sb->s_id, needed); +} + +void probe_ext4_request_blocks(void *data, struct ext4_allocation_request *ar) +{ + if (ar->inode) { + if (unlikely(!do_filter(ar->inode->i_sb->s_id, + ar->inode->i_ino))) + return; + } else { + if (unlikely(!do_dev_filter(ar->inode->i_sb->s_id))) + return; + } + trace_mark_tp(ext4, request_blocks, ext4_request_blocks, + probe_ext4_request_blocks, + "dev %s flags %u len %u ino %lu " + "lblk %llu goal %llu lleft %llu lright %llu " + "pleft %llu pright %llu", + ar->inode->i_sb->s_id, ar->flags, ar->len, + ar->inode ? ar->inode->i_ino : 0, + (unsigned long long) ar->logical, + (unsigned long long) ar->goal, + (unsigned long long) ar->lleft, + (unsigned long long) ar->lright, + (unsigned long long) ar->pleft, + (unsigned long long) ar->pright); +} + +void probe_ext4_allocate_blocks(void *data, struct ext4_allocation_request *ar, + unsigned long long block) +{ + if (ar->inode) { + if (unlikely(!do_filter(ar->inode->i_sb->s_id, + ar->inode->i_ino))) + return; + } else { + if (unlikely(!do_dev_filter(ar->inode->i_sb->s_id))) + return; + } + trace_mark_tp(ext4, allocate_blocks, ext4_allocate_blocks, + probe_ext4_allocate_blocks, + "dev %s block %llu flags %u len %u ino %lu " + "logical %llu goal %llu lleft %llu lright %llu " + "pleft %llu pright %llu", + ar->inode->i_sb->s_id, (unsigned long long) block, + ar->flags, ar->len, ar->inode ? ar->inode->i_ino : 0, + (unsigned long long) ar->logical, + (unsigned long long) ar->goal, + (unsigned long long) ar->lleft, + (unsigned long long) ar->lright, + (unsigned long long) ar->pleft, + (unsigned long long) ar->pright); +} + +void probe_ext4_free_blocks(void *data, struct inode *inode, __u64 block, + unsigned long count, int metadata) +{ + if (unlikely(!do_filter(inode->i_sb->s_id, inode->i_ino))) + return; + trace_mark_tp(ext4, free_blocks, ext4_free_blocks, + probe_ext4_free_blocks, + "dev %s block %llu count %lu metadata %d ino %lu", + inode->i_sb->s_id, (unsigned long long)block, + count, metadata, inode->i_ino); +} + +void probe_ext4_sync_file(void *data, struct file *file, struct dentry *dentry, + int datasync) +{ + if (unlikely(!do_dev_filter(dentry->d_inode->i_sb->s_id))) + return; + if (unlikely(!do_inode_filter(dentry->d_inode->i_ino) + && !do_inode_filter(dentry->d_parent->d_inode->i_ino))) + return; + trace_mark_tp(ext4, sync_file, ext4_sync_file, + probe_ext4_sync_file, + "dev %s datasync %d ino %ld parent %ld", + dentry->d_inode->i_sb->s_id, datasync, dentry->d_inode->i_ino, + dentry->d_parent->d_inode->i_ino); +} + +void probe_ext4_sync_fs(void *data, struct super_block *sb, int wait) +{ + if (unlikely(!do_dev_filter(sb->s_id))) + return; + trace_mark_tp(ext4, sync_fs, ext4_sync_fs, + probe_ext4_sync_fs, + "dev %s wait %d", + sb->s_id, wait); +} + +static void free_dev_filter(struct rcu_head *head) +{ + kfree(container_of(head, struct rcu_dev_filter, rcu)); +} + +static ssize_t dev_filter_op_write(struct file *file, + const char __user *user_buf, size_t count, loff_t *ppos) +{ + int err = 0; + char buf[NAME_MAX]; + int buf_size; + char name[NAME_MAX]; + struct rcu_dev_filter *new, *old; + + mutex_lock(&ext4_filter_mutex); + if (module_exits) { + err = -EPERM; + goto error; + } + buf_size = min(count, sizeof(buf) - 1); + err = copy_from_user(buf, user_buf, buf_size); + if (err) + goto error; + buf[buf_size] = 0; + + if (sscanf(buf, "%s", name) != 1) { + err = -EPERM; + goto error; + } + + old = dev_filter; + + /* Empty string or * means all active */ + if (name[0] == '\0' || (name[0] == '*' && name[1] == '\0')) { + new = NULL; + } else { + new = kmalloc(sizeof(*new), GFP_KERNEL); + strcpy(new->devname, name); + } + + rcu_assign_pointer(dev_filter, new); + if (old) + call_rcu_sched(&old->rcu, free_dev_filter); + + mutex_unlock(&ext4_filter_mutex); + return count; + +error: + mutex_unlock(&ext4_filter_mutex); + return err; +} + +static ssize_t dev_filter_op_read(struct file *filp, char __user *buffer, + size_t count, loff_t *ppos) +{ + ssize_t bcount; + const char *devname; + + mutex_lock(&ext4_filter_mutex); + if (!dev_filter) + devname = "*"; + else + devname = dev_filter->devname; + bcount = simple_read_from_buffer(buffer, count, ppos, + devname, strlen(devname)); + mutex_unlock(&ext4_filter_mutex); + return bcount; +} + +static struct file_operations ext4_dev_file_operations = { + .write = dev_filter_op_write, + .read = dev_filter_op_read, +}; + +static ssize_t inode_filter_op_write(struct file *file, + const char __user *user_buf, size_t count, loff_t *ppos) +{ + int err = 0; + char buf[NAME_MAX]; + int buf_size; + char name[NAME_MAX]; + unsigned long inode_num; + + mutex_lock(&ext4_filter_mutex); + if (module_exits) { + err = -EPERM; + goto error; + } + buf_size = min(count, sizeof(buf) - 1); + err = copy_from_user(buf, user_buf, buf_size); + if (err) + goto error; + buf[buf_size] = 0; + + if (sscanf(buf, "%s", name) != 1) { + err = -EPERM; + goto error; + } + + /* Empty string or * means all active */ + if (name[0] == '\0' || (name[0] == '*' && name[1] == '\0')) { + inode_filter = ~0UL; + } else { + if (sscanf(buf, "%lu", &inode_num) != 1) { + err = -EPERM; + goto error; + } + inode_filter = inode_num; + } + + mutex_unlock(&ext4_filter_mutex); + return count; + +error: + mutex_unlock(&ext4_filter_mutex); + return err; +} + +static ssize_t inode_filter_op_read(struct file *filp, char __user *buffer, + size_t count, loff_t *ppos) +{ + ssize_t bcount; + char inode_str[NAME_MAX]; + + mutex_lock(&ext4_filter_mutex); + if (inode_filter == ~0UL) + strcpy(inode_str, "*"); + else { + bcount = snprintf(inode_str, sizeof(inode_str), "%lu", + inode_filter); + if (bcount == sizeof(inode_str)) + bcount = -ENOSPC; + if (bcount < 0) + goto end; + } + bcount = simple_read_from_buffer(buffer, count, ppos, + inode_str, strlen(inode_str)); +end: + mutex_unlock(&ext4_filter_mutex); + return bcount; +} + +static struct file_operations ext4_inode_file_operations = { + .write = inode_filter_op_write, + .read = inode_filter_op_read, +}; + +static void release_filter_dev(void) +{ + struct rcu_dev_filter *old; + + mutex_lock(&ext4_filter_mutex); + module_exits = 1; + old = dev_filter; + rcu_assign_pointer(dev_filter, NULL); + if (old) + call_rcu_sched(&old->rcu, free_dev_filter); + mutex_unlock(&ext4_filter_mutex); +} + +static int __init filter_init(void) +{ + struct dentry *filter_root_dentry; + int err = 0; + + filter_root_dentry = get_filter_root(); + if (!filter_root_dentry) { + err = -ENOENT; + goto end; + } + + ext4_filter_dentry = debugfs_create_dir("ext4", filter_root_dentry); + + if (IS_ERR(ext4_filter_dentry) || !ext4_filter_dentry) { + printk(KERN_ERR "Failed to create ext4 filter file\n"); + err = -ENOMEM; + goto end; + } + + ext4_filter_dev_dentry = debugfs_create_file("dev", S_IWUSR, + ext4_filter_dentry, NULL, &ext4_dev_file_operations); + if (IS_ERR(ext4_filter_dev_dentry) || !ext4_filter_dev_dentry) { + printk(KERN_ERR "Failed to create ext4 dev filter file\n"); + err = -ENOMEM; + goto release_filter_dentry; + } + + ext4_filter_inode_dentry = debugfs_create_file("inode", S_IWUSR, + ext4_filter_dentry, NULL, &ext4_inode_file_operations); + if (IS_ERR(ext4_filter_inode_dentry) || !ext4_filter_inode_dentry) { + printk(KERN_ERR "Failed to create ext4 inode filter file\n"); + err = -ENOMEM; + goto release_filter_dev_dentry; + } + + goto end; + +release_filter_dev_dentry: + debugfs_remove(ext4_filter_dev_dentry); +release_filter_dentry: + debugfs_remove(ext4_filter_dentry); + release_filter_dev(); +end: + return err; +} + +static void __exit filter_exit(void) +{ + debugfs_remove(ext4_filter_dev_dentry); + debugfs_remove(ext4_filter_inode_dentry); + debugfs_remove(ext4_filter_dentry); + release_filter_dev(); +} + +module_init(filter_init); +module_exit(filter_exit); + +MODULE_LICENSE("GPL and additional rights"); +MODULE_AUTHOR("Mathieu Desnoyers"); +MODULE_DESCRIPTION("ext4 Tracepoint Probes"); diff --git a/probes/fs-trace.c b/probes/fs-trace.c new file mode 100644 index 00000000..bca28275 --- /dev/null +++ b/probes/fs-trace.c @@ -0,0 +1,158 @@ +/* + * ltt/probes/fs-trace.c + * + * FS tracepoint probes. + * + * (C) Copyright 2009 - Mathieu Desnoyers + * Dual LGPL v2.1/GPL v2 license. + */ + +#include +#include +#include + +#include "../ltt-type-serializer.h" + +void probe_fs_buffer_wait_start(void *_data, struct buffer_head *bh) +{ + trace_mark_tp(fs, buffer_wait_start, fs_buffer_wait_start, + probe_fs_buffer_wait_start, "bh %p", bh); +} + +void probe_fs_buffer_wait_end(void *_data, struct buffer_head *bh) +{ + trace_mark_tp(fs, buffer_wait_end, fs_buffer_wait_end, + probe_fs_buffer_wait_end, "bh %p", bh); +} + +void probe_fs_exec(void *_data, char *filename) +{ + trace_mark_tp(fs, exec, fs_exec, probe_fs_exec, "filename %s", + filename); +} + +void probe_fs_ioctl(void *_data, unsigned int fd, unsigned int cmd, unsigned long arg) +{ + trace_mark_tp(fs, ioctl, fs_ioctl, probe_fs_ioctl, + "fd %u cmd %u arg %lu", fd, cmd, arg); +} + +void probe_fs_open(void *_data, int fd, char *filename) +{ + trace_mark_tp(fs, open, fs_open, probe_fs_open, + "fd %d filename %s", fd, filename); +} + +void probe_fs_close(void *_data, unsigned int fd) +{ + trace_mark_tp(fs, close, fs_close, probe_fs_close, "fd %u", fd); +} + +void probe_fs_lseek(void *_data, unsigned int fd, long offset, unsigned int origin) +{ + trace_mark_tp(fs, lseek, fs_lseek, probe_fs_lseek, + "fd %u offset %ld origin %u", fd, offset, origin); +} + +void probe_fs_llseek(void *_data, unsigned int fd, loff_t offset, unsigned int origin) +{ + trace_mark_tp(fs, llseek, fs_llseek, probe_fs_llseek, + "fd %u offset %lld origin %u", fd, + (long long)offset, origin); +} + +void probe_fs_read(void *_data, unsigned int fd, char __user *buf, size_t count, + ssize_t ret); + +DEFINE_MARKER_TP(fs, read, fs_read, probe_fs_read, + "count %zu fd %u"); + +notrace void probe_fs_read(void *_data, unsigned int fd, char __user *buf, size_t count, + ssize_t ret) +{ + struct marker *marker; + struct serialize_sizet_int data; + + data.f1 = count; + data.f2 = fd; + + marker = &GET_MARKER(fs, read); + ltt_specialized_trace(marker, marker->single.probe_private, + &data, serialize_sizeof(data), sizeof(size_t)); +} + +void probe_fs_write(void *_data, unsigned int fd, char __user *buf, size_t count, + ssize_t ret); + +DEFINE_MARKER_TP(fs, write, fs_write, probe_fs_write, + "count %zu fd %u"); + +notrace void probe_fs_write(void *_data, unsigned int fd, char __user *buf, size_t count, + ssize_t ret) +{ + struct marker *marker; + struct serialize_sizet_int data; + + data.f1 = count; + data.f2 = fd; + + marker = &GET_MARKER(fs, write); + ltt_specialized_trace(marker, marker->single.probe_private, + &data, serialize_sizeof(data), sizeof(size_t)); +} + +void probe_fs_pread64(void *_data, unsigned int fd, char __user *buf, size_t count, + loff_t pos, ssize_t ret) +{ + trace_mark_tp(fs, pread64, fs_pread64, probe_fs_pread64, + "fd %u count %zu pos %llu", + fd, count, (unsigned long long)pos); +} + +void probe_fs_pwrite64(void *_data, unsigned int fd, const char __user *buf, + size_t count, loff_t pos, ssize_t ret) +{ + trace_mark_tp(fs, pwrite64, fs_pwrite64, probe_fs_pwrite64, + "fd %u count %zu pos %llu", + fd, count, (unsigned long long)pos); +} + +void probe_fs_readv(void *_data, unsigned long fd, const struct iovec __user *vec, + unsigned long vlen, ssize_t ret) +{ + trace_mark_tp(fs, readv, fs_readv, probe_fs_readv, + "fd %lu vlen %lu", fd, vlen); +} + +void probe_fs_writev(void *_data, unsigned long fd, const struct iovec __user *vec, + unsigned long vlen, ssize_t ret) +{ + trace_mark_tp(fs, writev, fs_writev, probe_fs_writev, + "fd %lu vlen %lu", fd, vlen); +} + +void probe_fs_select(void *_data, int fd, struct timespec *end_time) +{ + struct timespec tmptime; + + if (end_time) { + tmptime = *end_time; + } else { + tmptime.tv_sec = -1L; + tmptime.tv_nsec = -1L; + } + + trace_mark_tp(fs, select, fs_select, probe_fs_select, + "fd %d end_time_sec %ld end_time_nsec %ld", fd, + tmptime.tv_sec, tmptime.tv_nsec); +} + +void probe_fs_poll(void *_data, int fd) +{ + trace_mark_tp(fs, pollfd, fs_poll, probe_fs_poll, + "fd %d", fd); +} + +MODULE_LICENSE("GPL and additional rights"); +MODULE_AUTHOR("Mathieu Desnoyers"); +MODULE_DESCRIPTION("FS Tracepoint Probes"); diff --git a/probes/ipc-trace.c b/probes/ipc-trace.c new file mode 100644 index 00000000..3a095252 --- /dev/null +++ b/probes/ipc-trace.c @@ -0,0 +1,39 @@ +/* + * ltt/probes/ipc-trace.c + * + * IPC tracepoint probes. + * + * (C) Copyright 2009 - Mathieu Desnoyers + * Dual LGPL v2.1/GPL v2 license. + */ + +#include +#include + +void probe_ipc_msg_create(void *data, long id, int flags) +{ + trace_mark_tp(ipc, msg_create, ipc_msg_create, probe_ipc_msg_create, + "id %ld flags %d", id, flags); +} + +void probe_ipc_sem_create(void *data, long id, int flags) +{ + trace_mark_tp(ipc, sem_create, ipc_sem_create, probe_ipc_sem_create, + "id %ld flags %d", id, flags); +} + +void probe_ipc_shm_create(void *data, long id, int flags) +{ + trace_mark_tp(ipc, shm_create, ipc_shm_create, probe_ipc_shm_create, + "id %ld flags %d", id, flags); +} + +void probe_ipc_call(void *data, unsigned int call, unsigned int first) +{ + trace_mark_tp(ipc, call, ipc_call, probe_ipc_call, + "call %u first %d", call, first); +} + +MODULE_LICENSE("GPL and additional rights"); +MODULE_AUTHOR("Mathieu Desnoyers"); +MODULE_DESCRIPTION("IPC Tracepoint Probes"); diff --git a/probes/jbd2-trace.c b/probes/jbd2-trace.c new file mode 100644 index 00000000..3da32cd4 --- /dev/null +++ b/probes/jbd2-trace.c @@ -0,0 +1,208 @@ +/* + * ltt/probes/jbd2-trace.c + * + * JBD2 tracepoint probes. + * + * (C) Copyright 2009 - Mathieu Desnoyers + * Dual LGPL v2.1/GPL v2 license. + */ + +#include +#include +#include +#include +#include + +#include "../ltt-tracer.h" + +static struct dentry *jbd2_filter_dentry, *jbd2_filter_dev_dentry; +static DEFINE_MUTEX(jbd2_filter_mutex); +/* Make sure we don't race between module exit and file write */ +static int module_exits; + +struct rcu_dev_filter { + struct rcu_head rcu; + char devname[NAME_MAX]; +}; + +static struct rcu_dev_filter *dev_filter; + +/* + * Probes are executed in rcu_sched read-side critical section. + */ +static int do_filter(const char *dev) +{ + struct rcu_dev_filter *ldev_filter = rcu_dereference(dev_filter); + + if (unlikely(ldev_filter)) + if (unlikely(strcmp(ldev_filter->devname, dev))) + return 0; + return 1; +} + +void probe_jbd2_checkpoint(void *data, journal_t *journal, int result) +{ + if (unlikely(!do_filter(journal->j_devname))) + return; + trace_mark_tp(jbd2, checkpoint, jbd2_checkpoint, + probe_jbd2_checkpoint, "dev %s need_checkpoint %d", + journal->j_devname, result); +} + +void probe_jbd2_start_commit(void *data, journal_t *journal, + transaction_t *commit_transaction) +{ + if (unlikely(!do_filter(journal->j_devname))) + return; + trace_mark_tp(jbd2, start_commit, jbd2_start_commit, + probe_jbd2_start_commit, "dev %s transaction %d", + journal->j_devname, commit_transaction->t_tid); +} + +void probe_jbd2_end_commit(void *data, journal_t *journal, + transaction_t *commit_transaction) +{ + if (unlikely(!do_filter(journal->j_devname))) + return; + trace_mark_tp(jbd2, end_commit, jbd2_end_commit, + probe_jbd2_end_commit, "dev %s transaction %d head %d", + journal->j_devname, commit_transaction->t_tid, + journal->j_tail_sequence); +} + +static void free_dev_filter(struct rcu_head *head) +{ + kfree(container_of(head, struct rcu_dev_filter, rcu)); +} + +static ssize_t filter_op_write(struct file *file, + const char __user *user_buf, size_t count, loff_t *ppos) +{ + int err = 0; + char buf[NAME_MAX]; + int buf_size; + char name[NAME_MAX]; + struct rcu_dev_filter *new, *old; + + mutex_lock(&jbd2_filter_mutex); + if (module_exits) { + err = -EPERM; + goto error; + } + buf_size = min(count, sizeof(buf) - 1); + err = copy_from_user(buf, user_buf, buf_size); + if (err) + goto error; + buf[buf_size] = 0; + + if (sscanf(buf, "%s", name) != 1) { + err = -EPERM; + goto error; + } + + old = dev_filter; + + /* Empty string or * means all active */ + if (name[0] == '\0' || (name[0] == '*' && name[1] == '\0')) { + new = NULL; + } else { + new = kmalloc(sizeof(*new), GFP_KERNEL); + strcpy(new->devname, name); + } + + rcu_assign_pointer(dev_filter, new); + if (old) + call_rcu_sched(&old->rcu, free_dev_filter); + + mutex_unlock(&jbd2_filter_mutex); + return count; + +error: + mutex_unlock(&jbd2_filter_mutex); + return err; +} + +static ssize_t filter_op_read(struct file *filp, char __user *buffer, + size_t count, loff_t *ppos) +{ + ssize_t bcount; + const char *devname; + + mutex_lock(&jbd2_filter_mutex); + if (!dev_filter) + devname = "*"; + else + devname = dev_filter->devname; + bcount = simple_read_from_buffer(buffer, count, ppos, + devname, strlen(devname)); + mutex_unlock(&jbd2_filter_mutex); + return bcount; +} + +static struct file_operations jbd2_file_operations = { + .write = filter_op_write, + .read = filter_op_read, +}; + +static void release_filter_dev(void) +{ + struct rcu_dev_filter *old; + + mutex_lock(&jbd2_filter_mutex); + module_exits = 1; + old = dev_filter; + rcu_assign_pointer(dev_filter, NULL); + if (old) + call_rcu_sched(&old->rcu, free_dev_filter); + mutex_unlock(&jbd2_filter_mutex); +} + +static int __init filter_init(void) +{ + struct dentry *filter_root_dentry; + int err = 0; + + filter_root_dentry = get_filter_root(); + if (!filter_root_dentry) { + err = -ENOENT; + goto end; + } + + jbd2_filter_dentry = debugfs_create_dir("jbd2", filter_root_dentry); + + if (IS_ERR(jbd2_filter_dentry) || !jbd2_filter_dentry) { + printk(KERN_ERR "Failed to create jbd2 filter file\n"); + err = -ENOMEM; + goto end; + } + + jbd2_filter_dev_dentry = debugfs_create_file("dev", S_IWUSR, + jbd2_filter_dentry, NULL, &jbd2_file_operations); + if (IS_ERR(jbd2_filter_dentry) || !jbd2_filter_dentry) { + printk(KERN_ERR "Failed to create jbd2 filter file\n"); + err = -ENOMEM; + goto release_filter_dentry; + } + + goto end; + +release_filter_dentry: + debugfs_remove(jbd2_filter_dentry); + release_filter_dev(); +end: + return err; +} + +static void __exit filter_exit(void) +{ + debugfs_remove(jbd2_filter_dev_dentry); + debugfs_remove(jbd2_filter_dentry); + release_filter_dev(); +} + +module_init(filter_init); +module_exit(filter_exit); + +MODULE_LICENSE("GPL and additional rights"); +MODULE_AUTHOR("Mathieu Desnoyers"); +MODULE_DESCRIPTION("JBD2 Tracepoint Probes"); diff --git a/probes/kernel-trace.c b/probes/kernel-trace.c new file mode 100644 index 00000000..cabe60e1 --- /dev/null +++ b/probes/kernel-trace.c @@ -0,0 +1,581 @@ +/* + * ltt/probes/kernel-trace.c + * + * kernel tracepoint probes. + * + * (C) Copyright 2009 - Mathieu Desnoyers + * Dual LGPL v2.1/GPL v2 license. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "../ltt-tracer.h" +#include "../ltt-type-serializer.h" + +/* + * This should probably be added to s390. + */ +#ifdef CONFIG_S390 +static struct pt_regs *get_irq_regs(void) +{ + return task_pt_regs(current); +} +#endif + +/* + * FIXME : + * currently, the specialized tracepoint probes cannot call into other marker + * probes, such as ftrace enable/disable. Given we want them to be as fast as + * possible, it might not be so bad to lose this flexibility. But that means + * such probes would have to connect to tracepoints on their own. + */ + +/* kernel_irq_entry specialized tracepoint probe */ + +void probe_irq_entry(void *_data, unsigned int id, struct pt_regs *regs, + struct irqaction *action); + +DEFINE_MARKER_TP(kernel, irq_entry, irq_entry, probe_irq_entry, + "ip %lu handler %p irq_id #2u%u kernel_mode #1u%u"); + +notrace void probe_irq_entry(void *_data, unsigned int id, struct pt_regs *regs, + struct irqaction *action) +{ + struct marker *marker; + struct serialize_long_long_short_char data; + + if (unlikely(!regs)) + regs = get_irq_regs(); + if (likely(regs)) { + data.f1 = instruction_pointer(regs); + data.f4 = !user_mode(regs); + } else { + data.f1 = 0UL; + data.f4 = 1; + } + data.f2 = (unsigned long) (action ? action->handler : NULL); + data.f3 = id; + + marker = &GET_MARKER(kernel, irq_entry); + ltt_specialized_trace(marker, marker->single.probe_private, + &data, serialize_sizeof(data), sizeof(long)); +} + +void probe_irq_next_handler(void *_data, unsigned int id, struct irqaction *action, + irqreturn_t prev_ret); + +DEFINE_MARKER_TP(kernel, irq_next_handler, irq_next_handler, + probe_irq_next_handler, + "handler %p prev_ret #1u%u"); + +notrace void probe_irq_next_handler(void *_data, unsigned int id, struct irqaction *action, + irqreturn_t prev_ret) +{ + struct marker *marker; + struct serialize_long_char data; + + data.f1 = (unsigned long) (action ? action->handler : NULL); + data.f2 = prev_ret; + + marker = &GET_MARKER(kernel, irq_next_handler); + ltt_specialized_trace(marker, marker->single.probe_private, + &data, serialize_sizeof(data), sizeof(long)); +} + +/* kernel_irq_exit specialized tracepoint probe */ + +void probe_irq_exit(void *_data, irqreturn_t retval); + +DEFINE_MARKER_TP(kernel, irq_exit, irq_exit, probe_irq_exit, + "handled #1u%u"); + +notrace void probe_irq_exit(void *_data, irqreturn_t retval) +{ + struct marker *marker; + unsigned char data; + + data = IRQ_RETVAL(retval); + + marker = &GET_MARKER(kernel, irq_exit); + ltt_specialized_trace(marker, marker->single.probe_private, + &data, sizeof(data), sizeof(data)); +} + +/* kernel_softirq_entry specialized tracepoint probe */ + +void probe_softirq_entry(void *_data, struct softirq_action *h, + struct softirq_action *softirq_vec); + +DEFINE_MARKER_TP(kernel, softirq_entry, softirq_entry, + probe_softirq_entry, "softirq_id #1u%lu"); + +notrace void probe_softirq_entry(void *_data, struct softirq_action *h, + struct softirq_action *softirq_vec) +{ + struct marker *marker; + unsigned char data; + + data = ((unsigned long)h - (unsigned long)softirq_vec) / sizeof(*h); + + marker = &GET_MARKER(kernel, softirq_entry); + ltt_specialized_trace(marker, marker->single.probe_private, + &data, sizeof(data), sizeof(data)); +} + +/* kernel_softirq_exit specialized tracepoint probe */ + +void probe_softirq_exit(void *_data, struct softirq_action *h, + struct softirq_action *softirq_vec); + +DEFINE_MARKER_TP(kernel, softirq_exit, softirq_exit, + probe_softirq_exit, "softirq_id #1u%lu"); + +notrace void probe_softirq_exit(void *_data, struct softirq_action *h, + struct softirq_action *softirq_vec) +{ + struct marker *marker; + unsigned char data; + + data = ((unsigned long)h - (unsigned long)softirq_vec) / sizeof(*h); + + marker = &GET_MARKER(kernel, softirq_exit); + ltt_specialized_trace(marker, marker->single.probe_private, + &data, sizeof(data), sizeof(data)); +} + +/* kernel_softirq_raise specialized tracepoint probe */ + +void probe_softirq_raise(void *_data, unsigned int nr); + +DEFINE_MARKER_TP(kernel, softirq_raise, softirq_raise, + probe_softirq_raise, "softirq_id #1u%u"); + +notrace void probe_softirq_raise(void *_data, unsigned int nr) +{ + struct marker *marker; + unsigned char data; + + data = nr; + + marker = &GET_MARKER(kernel, softirq_raise); + ltt_specialized_trace(marker, marker->single.probe_private, + &data, sizeof(data), sizeof(data)); +} + +/* Standard probes */ +void probe_irq_tasklet_low_entry(void *_data, struct tasklet_struct *t) +{ + trace_mark_tp(kernel, tasklet_low_entry, irq_tasklet_low_entry, + probe_irq_tasklet_low_entry, "func %p data %lu", + t->func, t->data); +} + +void probe_irq_tasklet_low_exit(void *_data, struct tasklet_struct *t) +{ + trace_mark_tp(kernel, tasklet_low_exit, irq_tasklet_low_exit, + probe_irq_tasklet_low_exit, "func %p data %lu", + t->func, t->data); +} + +void probe_irq_tasklet_high_entry(void *_data, struct tasklet_struct *t) +{ + trace_mark_tp(kernel, tasklet_high_entry, irq_tasklet_high_entry, + probe_irq_tasklet_high_entry, "func %p data %lu", + t->func, t->data); +} + +void probe_irq_tasklet_high_exit(void *_data, struct tasklet_struct *t) +{ + trace_mark_tp(kernel, tasklet_high_exit, irq_tasklet_high_exit, + probe_irq_tasklet_high_exit, "func %p data %lu", + t->func, t->data); +} + +void probe_sched_kthread_stop(void *_data, struct task_struct *t) +{ + trace_mark_tp(kernel, kthread_stop, sched_kthread_stop, + probe_sched_kthread_stop, "pid %d", t->pid); +} + +void probe_sched_kthread_stop_ret(void *_data, int ret) +{ + trace_mark_tp(kernel, kthread_stop_ret, sched_kthread_stop_ret, + probe_sched_kthread_stop_ret, "ret %d", ret); +} + +void probe_sched_wait_task(void *_data, struct task_struct *p) +{ + trace_mark_tp(kernel, sched_wait_task, sched_wait_task, + probe_sched_wait_task, "pid %d state #2d%ld", + p->pid, p->state); +} + +/* kernel_sched_try_wakeup specialized tracepoint probe */ + +void probe_sched_wakeup(void *_data, struct task_struct *p, int success); + +DEFINE_MARKER_TP(kernel, sched_try_wakeup, sched_wakeup, + probe_sched_wakeup, "pid %d cpu_id %u state #2d%ld"); + +notrace void probe_sched_wakeup(void *_data, struct task_struct *p, int success) +{ + struct marker *marker; + struct serialize_int_int_short data; + + data.f1 = p->pid; + data.f2 = task_cpu(p); + data.f3 = p->state; + + marker = &GET_MARKER(kernel, sched_try_wakeup); + ltt_specialized_trace(marker, marker->single.probe_private, + &data, serialize_sizeof(data), sizeof(int)); +} + +void probe_sched_wakeup_new(void *_data, struct task_struct *p, int success) +{ + trace_mark_tp(kernel, sched_wakeup_new_task, sched_wakeup_new, + probe_sched_wakeup_new, "pid %d state #2d%ld cpu_id %u", + p->pid, p->state, task_cpu(p)); +} + +/* kernel_sched_schedule specialized tracepoint probe */ + +void probe_sched_switch(void *_data, struct task_struct *prev, + struct task_struct *next); + +DEFINE_MARKER_TP(kernel, sched_schedule, sched_switch, probe_sched_switch, + "prev_pid %d next_pid %d prev_state #2d%ld"); + +notrace void probe_sched_switch(void *_data, struct task_struct *prev, + struct task_struct *next) +{ + struct marker *marker; + struct serialize_int_int_short data; + + data.f1 = prev->pid; + data.f2 = next->pid; + data.f3 = prev->state; + + marker = &GET_MARKER(kernel, sched_schedule); + ltt_specialized_trace(marker, marker->single.probe_private, + &data, serialize_sizeof(data), sizeof(int)); +} + +void probe_sched_migrate_task(void *_data, struct task_struct *p, int dest_cpu) +{ + trace_mark_tp(kernel, sched_migrate_task, sched_migrate_task, + probe_sched_migrate_task, "pid %d state #2d%ld dest_cpu %d", + p->pid, p->state, dest_cpu); +} + +void probe_sched_signal_send(void *_data, int sig, struct siginfo *info, struct task_struct *t) +{ + trace_mark_tp(kernel, send_signal, signal_generate, + probe_sched_signal_send, "pid %d signal %d", t->pid, sig); +} + +void probe_sched_process_free(void *_data, struct task_struct *p) +{ + trace_mark_tp(kernel, process_free, sched_process_free, + probe_sched_process_free, "pid %d", p->pid); +} + +void probe_sched_process_exit(void *_data, struct task_struct *p) +{ + trace_mark_tp(kernel, process_exit, sched_process_exit, + probe_sched_process_exit, "pid %d", p->pid); +} + +void probe_sched_process_wait(void *_data, struct pid *pid) +{ + trace_mark_tp(kernel, process_wait, sched_process_wait, + probe_sched_process_wait, "pid %d", pid_nr(pid)); +} + +void probe_sched_process_fork(void *_data, struct task_struct *parent, + struct task_struct *child) +{ + trace_mark_tp(kernel, process_fork, sched_process_fork, + probe_sched_process_fork, + "parent_pid %d child_pid %d child_tgid %d", + parent->pid, child->pid, child->tgid); +} + +void probe_sched_kthread_create(void *_data, void *fn, int pid) +{ + trace_mark_tp(kernel, kthread_create, sched_kthread_create, + probe_sched_kthread_create, + "fn %p pid %d", fn, pid); +} + +void probe_timer_itimer_expired(void *_data, struct signal_struct *sig) +{ + trace_mark_tp(kernel, timer_itimer_expired, timer_itimer_expired, + probe_timer_itimer_expired, "pid %d", + pid_nr(sig->leader_pid)); +} + +void probe_timer_itimer_set(void *_data, int which, struct itimerval *value) +{ + trace_mark_tp(kernel, timer_itimer_set, + timer_itimer_set, probe_timer_itimer_set, + "which %d interval_sec %ld interval_usec %ld " + "value_sec %ld value_usec %ld", + which, + value->it_interval.tv_sec, + value->it_interval.tv_usec, + value->it_value.tv_sec, + value->it_value.tv_usec); +} + +/* kernel_timer_set specialized tracepoint probe */ + +void probe_timer_set(void *_data, struct timer_list *timer); + +DEFINE_MARKER_TP(kernel, timer_set, timer_set, probe_timer_set, + "expires %lu function %p data %lu"); + +notrace void probe_timer_set(void *_data, struct timer_list *timer) +{ + struct marker *marker; + struct serialize_long_long_long data; + + data.f1 = timer->expires; + data.f2 = (unsigned long)timer->function; + data.f3 = timer->data; + + marker = &GET_MARKER(kernel, timer_set); + ltt_specialized_trace(marker, marker->single.probe_private, + &data, serialize_sizeof(data), sizeof(long)); +} + +void probe_timer_update_time(void *_data, struct timespec *_xtime, + struct timespec *_wall_to_monotonic) +{ + trace_mark_tp(kernel, timer_update_time, timer_update_time, + probe_timer_update_time, + "jiffies #8u%llu xtime_sec %ld xtime_nsec %ld " + "walltomonotonic_sec %ld walltomonotonic_nsec %ld", + (unsigned long long)jiffies_64, _xtime->tv_sec, _xtime->tv_nsec, + _wall_to_monotonic->tv_sec, _wall_to_monotonic->tv_nsec); +} + +void probe_timer_timeout(void *_data, struct task_struct *p) +{ + trace_mark_tp(kernel, timer_timeout, timer_timeout, + probe_timer_timeout, "pid %d", p->pid); +} + +void probe_kernel_printk(void *_data, unsigned long retaddr) +{ + trace_mark_tp(kernel, printk, kernel_printk, + probe_kernel_printk, "ip 0x%lX", retaddr); +} + +void probe_kernel_vprintk(void *_data, unsigned long retaddr, char *buf, int len) +{ + if (len > 0) { + unsigned int loglevel; + int mark_len; + char *mark_buf; + char saved_char; + + if (buf[0] == '<' && buf[1] >= '0' && + buf[1] <= '7' && buf[2] == '>') { + loglevel = buf[1] - '0'; + mark_buf = &buf[3]; + mark_len = len - 3; + } else { + loglevel = default_message_loglevel; + mark_buf = buf; + mark_len = len; + } + if (mark_buf[mark_len - 1] == '\n') + mark_len--; + saved_char = mark_buf[mark_len]; + mark_buf[mark_len] = '\0'; + trace_mark_tp(kernel, vprintk, kernel_vprintk, + probe_kernel_vprintk, + "loglevel #1u%u string %s ip 0x%lX", + loglevel, mark_buf, retaddr); + mark_buf[mark_len] = saved_char; + } +} + +#ifdef CONFIG_MODULES +void probe_kernel_module_free(void *_data, struct module *mod) +{ + trace_mark_tp(kernel, module_free, kernel_module_free, + probe_kernel_module_free, "name %s", mod->name); +} + +void probe_kernel_module_load(void *_data, struct module *mod) +{ + trace_mark_tp(kernel, module_load, kernel_module_load, + probe_kernel_module_load, "name %s", mod->name); +} +#endif + +void probe_kernel_panic(void *_data, const char *fmt, va_list args) +{ + char info[64]; + vsnprintf(info, sizeof(info), fmt, args); + trace_mark_tp(kernel, panic, kernel_panic, probe_kernel_panic, + "info %s", info); +} + +void probe_kernel_kernel_kexec(void *_data, struct kimage *image) +{ + trace_mark_tp(kernel, kernel_kexec, kernel_kernel_kexec, + probe_kernel_kernel_kexec, "image %p", image); +} + +void probe_kernel_crash_kexec(void *_data, struct kimage *image, struct pt_regs *regs) +{ + trace_mark_tp(kernel, crash_kexec, kernel_crash_kexec, + probe_kernel_crash_kexec, "image %p ip %p", image, + regs ? (void *)instruction_pointer(regs) : NULL); +} + +/* kernel_page_fault_entry specialized tracepoint probe */ + +void probe_kernel_page_fault_entry(void *_data, struct pt_regs *regs, int trapnr, + struct mm_struct *mm, struct vm_area_struct *vma, + unsigned long address, int write_access); + +DEFINE_MARKER_TP(kernel, page_fault_entry, page_fault_entry, + probe_kernel_page_fault_entry, + "ip #p%lu address #p%lu trap_id #2u%u write_access #1u%u"); + +notrace void probe_kernel_page_fault_entry(void *_data, struct pt_regs *regs, int trapnr, + struct mm_struct *mm, struct vm_area_struct *vma, + unsigned long address, int write_access) +{ + struct marker *marker; + struct serialize_long_long_short_char data; + + if (likely(regs)) + data.f1 = instruction_pointer(regs); + else + data.f1 = 0UL; + data.f2 = address; + data.f3 = (unsigned short)trapnr; + data.f4 = (unsigned char)!!write_access; + + marker = &GET_MARKER(kernel, page_fault_entry); + ltt_specialized_trace(marker, marker->single.probe_private, + &data, serialize_sizeof(data), sizeof(long)); +} + +/* kernel_page_fault_exit specialized tracepoint probe */ + +void probe_kernel_page_fault_exit(void *_data, int res); + +DEFINE_MARKER_TP(kernel, page_fault_exit, page_fault_exit, + probe_kernel_page_fault_exit, + "res %d"); + +notrace void probe_kernel_page_fault_exit(void *_data, int res) +{ + struct marker *marker; + + marker = &GET_MARKER(kernel, page_fault_exit); + ltt_specialized_trace(marker, marker->single.probe_private, + &res, sizeof(res), sizeof(res)); +} + +/* kernel_page_fault_nosem_entry specialized tracepoint probe */ + +void probe_kernel_page_fault_nosem_entry(void *_data, struct pt_regs *regs, + int trapnr, unsigned long address); + +DEFINE_MARKER_TP(kernel, page_fault_nosem_entry, page_fault_nosem_entry, + probe_kernel_page_fault_nosem_entry, + "ip #p%lu address #p%lu trap_id #2u%u"); + +notrace void probe_kernel_page_fault_nosem_entry(void *_data, struct pt_regs *regs, + int trapnr, unsigned long address) +{ + struct marker *marker; + struct serialize_long_long_short data; + + if (likely(regs)) + data.f1 = instruction_pointer(regs); + else + data.f1 = 0UL; + data.f2 = address; + data.f3 = (unsigned short)trapnr; + + marker = &GET_MARKER(kernel, page_fault_nosem_entry); + ltt_specialized_trace(marker, marker->single.probe_private, + &data, serialize_sizeof(data), sizeof(long)); +} + +/* kernel_page_fault_nosem_exit specialized tracepoint probe */ + +void probe_kernel_page_fault_nosem_exit(void *_data, int res); + +DEFINE_MARKER_TP(kernel, page_fault_nosem_exit, page_fault_nosem_exit, + probe_kernel_page_fault_nosem_exit, + MARK_NOARGS); + +notrace void probe_kernel_page_fault_nosem_exit(void *_data, int res) +{ + struct marker *marker; + + marker = &GET_MARKER(kernel, page_fault_nosem_exit); + ltt_specialized_trace(marker, marker->single.probe_private, + NULL, 0, 0); +} + +/* kernel_page_fault_get_user_entry specialized tracepoint probe */ + +void probe_kernel_page_fault_get_user_entry(void *_data, struct mm_struct *mm, + struct vm_area_struct *vma, unsigned long address, int write_access); + +DEFINE_MARKER_TP(kernel, page_fault_get_user_entry, page_fault_get_user_entry, + probe_kernel_page_fault_get_user_entry, + "address #p%lu write_access #1u%u"); + +notrace void probe_kernel_page_fault_get_user_entry(void *_data, struct mm_struct *mm, + struct vm_area_struct *vma, unsigned long address, int write_access) +{ + struct marker *marker; + struct serialize_long_char data; + + data.f1 = address; + data.f2 = (unsigned char)!!write_access; + + marker = &GET_MARKER(kernel, page_fault_get_user_entry); + ltt_specialized_trace(marker, marker->single.probe_private, + &data, serialize_sizeof(data), sizeof(long)); +} + +/* kernel_page_fault_get_user_exit specialized tracepoint probe */ + +void probe_kernel_page_fault_get_user_exit(void *_data, int res); + +DEFINE_MARKER_TP(kernel, page_fault_get_user_exit, page_fault_get_user_exit, + probe_kernel_page_fault_get_user_exit, + "res %d"); + +notrace void probe_kernel_page_fault_get_user_exit(void *_data, int res) +{ + struct marker *marker; + + marker = &GET_MARKER(kernel, page_fault_get_user_exit); + ltt_specialized_trace(marker, marker->single.probe_private, + &res, sizeof(res), sizeof(res)); +} + +MODULE_LICENSE("GPL and additional rights"); +MODULE_AUTHOR("Mathieu Desnoyers"); +MODULE_DESCRIPTION("kernel Tracepoint Probes"); diff --git a/probes/lockdep-trace.c b/probes/lockdep-trace.c new file mode 100644 index 00000000..a9a77344 --- /dev/null +++ b/probes/lockdep-trace.c @@ -0,0 +1,60 @@ +/* + * ltt/probes/lockdep-trace.c + * + * lockdep tracepoint probes. + * + * (C) Copyright 2009 - Mathieu Desnoyers + * Dual LGPL v2.1/GPL v2 license. + */ + +#include +#include +#include + +void probe_lockdep_hardirqs_on(void *data, unsigned long retaddr) +{ + trace_mark_tp(lockdep, hardirqs_on, lockdep_hardirqs_on, + probe_lockdep_hardirqs_on, "retaddr 0x%lX", retaddr); +} + +void probe_lockdep_hardirqs_off(void *data, unsigned long retaddr) +{ + trace_mark_tp(lockdep, hardirqs_off, lockdep_hardirqs_off, + probe_lockdep_hardirqs_off, "retaddr 0x%lX", retaddr); +} + +void probe_lockdep_softirqs_on(void *data, unsigned long retaddr) +{ + trace_mark_tp(lockdep, softirqs_on, lockdep_softirqs_on, + probe_lockdep_softirqs_on, "retaddr 0x%lX", retaddr); +} + +void probe_lockdep_softirqs_off(void *data, unsigned long retaddr) +{ + trace_mark_tp(lockdep, softirqs_off, lockdep_softirqs_off, + probe_lockdep_softirqs_off, "retaddr 0x%lX", retaddr); +} + +void probe_lockdep_lock_acquire(void *data, unsigned long retaddr, + unsigned int subclass, struct lockdep_map *lock, int trylock, + int read, int hardirqs_off) +{ + trace_mark_tp(lockdep, lock_acquire, lockdep_lock_acquire, + probe_lockdep_lock_acquire, + "retaddr 0x%lX subclass %u lock %p trylock %d read %d " + "hardirqs_off %d", + retaddr, subclass, lock, trylock, read, hardirqs_off); +} + +void probe_lockdep_lock_release(void *data, unsigned long retaddr, + struct lockdep_map *lock, int nested) +{ + trace_mark_tp(lockdep, lock_release, lockdep_lock_release, + probe_lockdep_lock_release, + "retaddr 0x%lX lock %p nested %d", + retaddr, lock, nested); +} + +MODULE_LICENSE("GPL and additional rights"); +MODULE_AUTHOR("Mathieu Desnoyers"); +MODULE_DESCRIPTION("lockdep Tracepoint Probes"); diff --git a/probes/mm-trace.c b/probes/mm-trace.c new file mode 100644 index 00000000..935e366c --- /dev/null +++ b/probes/mm-trace.c @@ -0,0 +1,146 @@ +/* + * ltt/probes/mm-trace.c + * + * MM tracepoint probes. + * + * (C) Copyright 2009 - Mathieu Desnoyers + * Dual LGPL v2.1/GPL v2 license. + */ + +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +#include "../ltt-type-serializer.h" + +void probe_wait_on_page_start(void *_data, struct page *page, int bit_nr) +{ + trace_mark_tp(mm, wait_on_page_start, wait_on_page_start, + probe_wait_on_page_start, "pfn %lu bit_nr %d", + page_to_pfn(page), bit_nr); +} + +void probe_wait_on_page_end(void *_data, struct page *page, int bit_nr) +{ + trace_mark_tp(mm, wait_on_page_end, wait_on_page_end, + probe_wait_on_page_end, "pfn %lu bit_nr %d", + page_to_pfn(page), bit_nr); +} + +void probe_hugetlb_page_free(void *_data, struct page *page) +{ + trace_mark_tp(mm, huge_page_free, hugetlb_page_free, + probe_hugetlb_page_free, "pfn %lu", page_to_pfn(page)); +} + +void probe_hugetlb_page_alloc(void *_data, struct page *page) +{ + if (page) + trace_mark_tp(mm, huge_page_alloc, hugetlb_page_alloc, + probe_hugetlb_page_alloc, "pfn %lu", page_to_pfn(page)); +} + +/* mm_page_free specialized tracepoint probe */ + +void probe_page_free(void *_data, struct page *page, unsigned int order); + +DEFINE_MARKER_TP(mm, page_free, page_free, probe_page_free, + "pfn %lu order %u"); + +notrace void probe_page_free(void *_data, struct page *page, unsigned int order) +{ + struct marker *marker; + struct serialize_long_int data; + + data.f1 = page_to_pfn(page); + data.f2 = order; + + marker = &GET_MARKER(mm, page_free); + ltt_specialized_trace(marker, marker->single.probe_private, + &data, serialize_sizeof(data), sizeof(long)); +} + +/* mm_page_alloc specialized tracepoint probe */ + +void probe_page_alloc(void *_data, struct page *page, unsigned int order); + +DEFINE_MARKER_TP(mm, page_alloc, page_alloc, probe_page_alloc, + "pfn %lu order %u"); + +notrace void probe_page_alloc(void *_data, struct page *page, unsigned int order) +{ + struct marker *marker; + struct serialize_long_int data; + + if (unlikely(!page)) + return; + + data.f1 = page_to_pfn(page); + data.f2 = order; + + marker = &GET_MARKER(mm, page_alloc); + ltt_specialized_trace(marker, marker->single.probe_private, + &data, serialize_sizeof(data), sizeof(long)); +} + +#ifdef CONFIG_SWAP +void probe_swap_in(void *_data, struct page *page, swp_entry_t entry) +{ + trace_mark_tp(mm, swap_in, swap_in, probe_swap_in, + "pfn %lu filp %p offset %lu", + page_to_pfn(page), + get_swap_info_struct(swp_type(entry))->swap_file, + swp_offset(entry)); +} + +void probe_swap_out(void *_data, struct page *page) +{ + trace_mark_tp(mm, swap_out, swap_out, probe_swap_out, + "pfn %lu filp %p offset %lu", + page_to_pfn(page), + get_swap_info_struct(swp_type( + page_swp_entry(page)))->swap_file, + swp_offset(page_swp_entry(page))); +} + +void probe_swap_file_close(void *_data, struct file *file) +{ + trace_mark_tp(mm, swap_file_close, swap_file_close, + probe_swap_file_close, "filp %p", file); +} + +void probe_swap_file_open(void *_data, struct file *file, char *filename) +{ + trace_mark_tp(mm, swap_file_open, swap_file_open, + probe_swap_file_open, "filp %p filename %s", + file, filename); +} +#endif + +void probe_add_to_page_cache(void *_data, struct address_space *mapping, pgoff_t offset) +{ + trace_mark_tp(mm, add_to_page_cache, add_to_page_cache, + probe_add_to_page_cache, + "inode %lu sdev %u", + mapping->host->i_ino, mapping->host->i_sb->s_dev); +} + +void probe_remove_from_page_cache(void *_data, struct address_space *mapping) +{ + trace_mark_tp(mm, remove_from_page_cache, remove_from_page_cache, + probe_remove_from_page_cache, + "inode %lu sdev %u", + mapping->host->i_ino, mapping->host->i_sb->s_dev); +} + +MODULE_LICENSE("GPL and additional rights"); +MODULE_AUTHOR("Mathieu Desnoyers"); +MODULE_DESCRIPTION("MM Tracepoint Probes"); diff --git a/probes/net-extended-trace.c b/probes/net-extended-trace.c new file mode 100644 index 00000000..15fc8109 --- /dev/null +++ b/probes/net-extended-trace.c @@ -0,0 +1,146 @@ +/* + * ltt/probes/net-extended-trace.c + * + * Net tracepoint extended probes. + * + * These probes record many header fields from TCP and UDP messages. Here are + * the consequences of this: + * 1) it allows analyzing network traffic to provide some pcap-like + * functionality within LTTng + * 2) it allows offline synchronization of a group of concurrent traces + * recorded on different nodes + * 3) it increases tracing overhead + * + * You can leave out these probes or not activate them if you are not + * especially interested in the details of network traffic and do not wish to + * synchronize distributed traces. + * + * Dual LGPL v2.1/GPL v2 license. + */ + +#include +#include +#include +#include +#include +#include +#include + +#include "../ltt-type-serializer.h" + +void probe_net_dev_xmit_extended(void *_data, struct sk_buff *skb); + +DEFINE_MARKER_TP(net, dev_xmit_extended, net_dev_xmit, + probe_net_dev_xmit_extended, "skb 0x%lX network_protocol #n2u%hu " + "transport_protocol #1u%u saddr #n4u%lu daddr #n4u%lu " + "tot_len #n2u%hu ihl #1u%u source #n2u%hu dest #n2u%hu seq #n4u%lu " + "ack_seq #n4u%lu doff #1u%u ack #1u%u rst #1u%u syn #1u%u fin #1u%u"); + +notrace void probe_net_dev_xmit_extended(void *_data, struct sk_buff *skb) +{ + struct marker *marker; + struct serialize_l214421224411111 data; + struct iphdr *iph = ip_hdr(skb); + struct tcphdr *th = tcp_hdr(skb); + + data.f1 = (unsigned long)skb; + data.f2 = skb->protocol; + + if (ntohs(skb->protocol) == ETH_P_IP) { + data.f3 = ip_hdr(skb)->protocol; + data.f4 = iph->saddr; + data.f5 = iph->daddr; + data.f6 = iph->tot_len; + data.f7 = iph->ihl; + + if (data.f3 == IPPROTO_TCP) { + data.f8 = th->source; + data.f9 = th->dest; + data.f10 = th->seq; + data.f11 = th->ack_seq; + data.f12 = th->doff; + data.f13 = th->ack; + data.f14 = th->rst; + data.f15 = th->syn; + data.f16 = th->fin; + } + } + + marker = &GET_MARKER(net, dev_xmit_extended); + ltt_specialized_trace(marker, marker->single.probe_private, + &data, serialize_sizeof(data), sizeof(long)); +} + +void probe_tcpv4_rcv_extended(void *_data, struct sk_buff *skb); + +DEFINE_MARKER_TP(net, tcpv4_rcv_extended, net_tcpv4_rcv, + probe_tcpv4_rcv_extended, "skb 0x%lX saddr #n4u%lu daddr #n4u%lu " + "tot_len #n2u%hu ihl #1u%u source #n2u%hu dest #n2u%hu seq #n4u%lu " + "ack_seq #n4u%lu doff #1u%u ack #1u%u rst #1u%u syn #1u%u fin #1u%u"); + +notrace void probe_tcpv4_rcv_extended(void *_data, struct sk_buff *skb) +{ + struct marker *marker; + struct serialize_l4421224411111 data; + struct iphdr *iph = ip_hdr(skb); + struct tcphdr *th = tcp_hdr(skb); + + data.f1 = (unsigned long)skb; + data.f2 = iph->saddr; + data.f3 = iph->daddr; + data.f4 = iph->tot_len; + data.f5 = iph->ihl; + data.f6 = th->source; + data.f7 = th->dest; + data.f8 = th->seq; + data.f9 = th->ack_seq; + data.f10 = th->doff; + data.f11 = th->ack; + data.f12 = th->rst; + data.f13 = th->syn; + data.f14 = th->fin; + + marker = &GET_MARKER(net, tcpv4_rcv_extended); + ltt_specialized_trace(marker, marker->single.probe_private, + &data, serialize_sizeof(data), sizeof(long)); +} + +void probe_udpv4_rcv_extended(void *_data, struct sk_buff *skb); + +DEFINE_MARKER_TP(net, udpv4_rcv_extended, net_udpv4_rcv, + probe_udpv4_rcv_extended, "skb 0x%lX saddr #n4u%lu daddr #n4u%lu " + "unicast #1u%u ulen #n2u%hu source #n2u%hu dest #n2u%hu " + "data_start #8u%lx"); + +notrace void probe_udpv4_rcv_extended(void *_data, struct sk_buff *skb) +{ + struct marker *marker; + struct serialize_l4412228 data; + struct iphdr *iph = ip_hdr(skb); + struct rtable *rt = skb_rtable(skb); + struct udphdr *uh = udp_hdr(skb); + + data.f1 = (unsigned long)skb; + data.f2 = iph->saddr; + data.f3 = iph->daddr; + data.f4 = rt->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST) ? 0 : 1; + data.f5 = uh->len; + data.f6 = uh->source; + data.f7 = uh->dest; + /* UDP header has not been pulled from skb->data, read the first 8 + * bytes of UDP data if they are not in a fragment*/ + data.f8 = 0; + if (skb_headlen(skb) >= sizeof(struct udphdr) + 8) + data.f8 = *(unsigned long long *)(skb->data + sizeof(*uh)); + else if (skb_headlen(skb) >= sizeof(struct udphdr)) + memcpy(&data.f8, skb->data + sizeof(struct udphdr), + skb_headlen(skb) - sizeof(struct udphdr)); + + marker = &GET_MARKER(net, udpv4_rcv_extended); + ltt_specialized_trace(marker, marker->single.probe_private, + &data, serialize_sizeof(data), sizeof(unsigned long long)); +} + +MODULE_LICENSE("GPL and additional rights"); +MODULE_AUTHOR("Benjamin Poirier"); +MODULE_DESCRIPTION("Net Tracepoint Extended Probes"); diff --git a/probes/net-trace.c b/probes/net-trace.c new file mode 100644 index 00000000..3124125d --- /dev/null +++ b/probes/net-trace.c @@ -0,0 +1,406 @@ +/* + * ltt/probes/net-trace.c + * + * Net tracepoint probes. + * + * (C) Copyright 2009 - Mathieu Desnoyers + * Dual LGPL v2.1/GPL v2 license. + */ + +#include +#include +#include +#include +#include +#include +#include + +#include "../ltt-type-serializer.h" + +void probe_net_dev_xmit(void *_data, struct sk_buff *skb); + +DEFINE_MARKER_TP(net, dev_xmit, net_dev_xmit, probe_net_dev_xmit, + "skb %p protocol #n2u%hu"); + +notrace void probe_net_dev_xmit(void *_data, struct sk_buff *skb) +{ + struct marker *marker; + struct serialize_long_short data; + + data.f1 = (unsigned long)skb; + data.f2 = skb->protocol; + + marker = &GET_MARKER(net, dev_xmit); + ltt_specialized_trace(marker, marker->single.probe_private, + &data, serialize_sizeof(data), sizeof(long)); +} + +void probe_net_dev_receive(void *_data, struct sk_buff *skb); + +DEFINE_MARKER_TP(net, dev_receive, net_dev_receive, probe_net_dev_receive, + "skb %p protocol #n2u%hu"); + +notrace void probe_net_dev_receive(void *_data, struct sk_buff *skb) +{ + struct marker *marker; + struct serialize_long_short data; + + data.f1 = (unsigned long)skb; + data.f2 = skb->protocol; + + marker = &GET_MARKER(net, dev_receive); + ltt_specialized_trace(marker, marker->single.probe_private, + &data, serialize_sizeof(data), sizeof(long)); +} + +void probe_ipv4_addr_add(void *_data, struct in_ifaddr *ifa) +{ + trace_mark_tp(netif_state, insert_ifa_ipv4, ipv4_addr_add, + probe_ipv4_addr_add, "label %s address #4u%u", + ifa->ifa_label, (unsigned int)ifa->ifa_address); +} + +void probe_ipv4_addr_del(void *_data, struct in_ifaddr *ifa) +{ + trace_mark_tp(netif_state, del_ifa_ipv4, ipv4_addr_del, + probe_ipv4_addr_del, "label %s address #4u%u", + ifa->ifa_label, (unsigned int)ifa->ifa_address); +} + +void probe_ipv6_addr_add(void *_data, struct inet6_ifaddr *ifa) +{ + __u8 *addr = ifa->addr.s6_addr; + + trace_mark_tp(netif_state, insert_ifa_ipv6, ipv6_addr_add, + probe_ipv6_addr_add, + "label %s " + "a15 #1x%c a14 #1x%c a13 #1x%c a12 #1x%c " + "a11 #1x%c a10 #1x%c a9 #1x%c a8 #1x%c " + "a7 #1x%c a6 #1x%c a5 #1x%c a4 #1x%c " + "a3 #1x%c a2 #1x%c a1 #1x%c a0 #1x%c", + ifa->idev->dev->name, + addr[15], addr[14], addr[13], addr[12], + addr[11], addr[10], addr[9], addr[8], + addr[7], addr[6], addr[5], addr[4], + addr[3], addr[2], addr[1], addr[0]); +} + +void probe_ipv6_addr_del(void *_data, struct inet6_ifaddr *ifa) +{ + __u8 *addr = ifa->addr.s6_addr; + + trace_mark_tp(netif_state, insert_ifa_ipv6, ipv6_addr_del, + probe_ipv6_addr_del, + "label %s " + "a15 #1x%c a14 #1x%c a13 #1x%c a12 #1x%c " + "a11 #1x%c a10 #1x%c a9 #1x%c a8 #1x%c " + "a7 #1x%c a6 #1x%c a5 #1x%c a4 #1x%c " + "a3 #1x%c a2 #1x%c a1 #1x%c a0 #1x%c", + ifa->idev->dev->name, + addr[15], addr[14], addr[13], addr[12], + addr[11], addr[10], addr[9], addr[8], + addr[7], addr[6], addr[5], addr[4], + addr[3], addr[2], addr[1], addr[0]); +} + +void probe_socket_create(void *_data, int family, int type, int protocol, + struct socket *sock, int ret) +{ + trace_mark_tp(net, socket_create, socket_create, probe_socket_create, + "family %d type %d protocol %d sock %p ret %d", + family, type, protocol, sock, ret); +} + +void probe_socket_bind(void *_data, int fd, struct sockaddr __user *umyaddr, int addrlen, + int ret) +{ + trace_mark_tp(net, socket_bind, socket_bind, probe_socket_bind, + "fd %d umyaddr %p addrlen %d ret %d", + fd, umyaddr, addrlen, ret); +} + +void probe_socket_connect(void *_data, int fd, struct sockaddr __user *uservaddr, + int addrlen, int ret) +{ + trace_mark_tp(net, socket_connect, socket_connect, probe_socket_connect, + "fd %d uservaddr %p addrlen %d ret %d", + fd, uservaddr, addrlen, ret); +} + +void probe_socket_listen(void *_data, int fd, int backlog, int ret) +{ + trace_mark_tp(net, socket_listen, socket_listen, probe_socket_listen, + "fd %d backlog %d ret %d", + fd, backlog, ret); +} + +void probe_socket_accept(void *_data, int fd, struct sockaddr __user *upeer_sockaddr, + int __user *upeer_addrlen, int flags, int ret) +{ + trace_mark_tp(net, socket_accept, socket_accept, probe_socket_accept, + "fd %d upeer_sockaddr %p upeer_addrlen %p flags %d ret %d", + fd, upeer_sockaddr, upeer_addrlen, flags, ret); +} + +void probe_socket_getsockname(void *_data, int fd, struct sockaddr __user *usockaddr, + int __user *usockaddr_len, int ret) +{ + trace_mark_tp(net, socket_getsockname, socket_getsockname, + probe_socket_getsockname, + "fd %d usockaddr %p usockaddr_len %p ret %d", + fd, usockaddr, usockaddr_len, ret); +} + +void probe_socket_getpeername(void *_data, int fd, struct sockaddr __user *usockaddr, + int __user *usockaddr_len, int ret) +{ + trace_mark_tp(net, socket_getpeername, socket_getpeername, + probe_socket_getpeername, + "fd %d usockaddr %p usockaddr_len %p ret %d", + fd, usockaddr, usockaddr_len, ret); +} + +void probe_socket_socketpair(void *_data, int family, int type, int protocol, + int __user *usockvec, int ret) +{ + trace_mark_tp(net, socket_socketpair, socket_socketpair, + probe_socket_socketpair, + "family %d type %d protocol %d usockvec %p ret %d", + family, type, protocol, usockvec, ret); +} + +void probe_socket_sendmsg(void *_data, struct socket *sock, struct msghdr *msg, size_t size, + int ret); + +DEFINE_MARKER_TP(net, socket_sendmsg, net_socket_sendmsg, + probe_socket_sendmsg, + "sock %p msg %p size %zu ret %d"); + +notrace void probe_socket_sendmsg(void *_data, struct socket *sock, struct msghdr *msg, + size_t size, int ret) +{ + struct marker *marker; + struct serialize_long_long_sizet_int data; + + data.f1 = (unsigned long)sock; + data.f2 = (unsigned long)msg; + data.f3 = size; + data.f4 = ret; + + marker = &GET_MARKER(net, socket_sendmsg); + ltt_specialized_trace(marker, marker->single.probe_private, + &data, serialize_sizeof(data), sizeof(size_t)); +} + +void probe_socket_recvmsg(void *_data, struct socket *sock, struct msghdr *msg, size_t size, + int flags, int ret); + +DEFINE_MARKER_TP(net, socket_recvmsg, net_socket_recvmsg, + probe_socket_recvmsg, + "sock %p msg %p size %zu flags %d ret %d"); + +notrace void probe_socket_recvmsg(void *_data, struct socket *sock, struct msghdr *msg, + size_t size, int flags, int ret) +{ + struct marker *marker; + struct serialize_long_long_sizet_int_int data; + + data.f1 = (unsigned long)sock; + data.f2 = (unsigned long)msg; + data.f3 = size; + data.f4 = flags; + data.f5 = ret; + + marker = &GET_MARKER(net, socket_recvmsg); + ltt_specialized_trace(marker, marker->single.probe_private, + &data, serialize_sizeof(data), sizeof(size_t)); +} + +void probe_socket_setsockopt(void *_data, int fd, int level, int optname, + char __user *optval, int optlen, int ret) +{ + trace_mark_tp(net, socket_setsockopt, socket_setsockopt, + probe_socket_setsockopt, + "fd %d level %d optname %d optval %p optlen %d ret %d", + fd, level, optname, optval, optlen, ret); +} + +void probe_socket_getsockopt(void *_data, int fd, int level, int optname, + char __user *optval, int __user *optlen, int ret) +{ + trace_mark_tp(net, socket_getsockopt, socket_getsockopt, + probe_socket_getsockopt, + "fd %d level %d optname %d optval %p optlen %p ret %d", + fd, level, optname, optval, optlen, ret); +} + +void probe_socket_shutdown(void *_data, int fd, int how, int ret) +{ + trace_mark_tp(net, socket_shutdown, socket_shutdown, + probe_socket_shutdown, + "fd %d how %d ret %d", + fd, how, ret); +} + +void probe_socket_call(void *_data, int call, unsigned long a0) +{ + trace_mark_tp(net, socket_call, socket_call, probe_socket_call, + "call %d a0 %lu", call, a0); +} + +void probe_tcpv4_rcv(void *_data, struct sk_buff *skb); + +DEFINE_MARKER_TP(net, tcpv4_rcv, net_tcpv4_rcv, probe_tcpv4_rcv, + "skb %p"); + +notrace void probe_tcpv4_rcv(void *_data, struct sk_buff *skb) +{ + struct marker *marker; + + marker = &GET_MARKER(net, tcpv4_rcv); + ltt_specialized_trace(marker, marker->single.probe_private, + &skb, sizeof(skb), sizeof(skb)); +} + +void probe_udpv4_rcv(void *_data, struct sk_buff *skb); + +DEFINE_MARKER_TP(net, udpv4_rcv, net_udpv4_rcv, probe_udpv4_rcv, + "skb %p"); + +notrace void probe_udpv4_rcv(void *_data, struct sk_buff *skb) +{ + struct marker *marker; + + marker = &GET_MARKER(net, udpv4_rcv); + ltt_specialized_trace(marker, marker->single.probe_private, + &skb, sizeof(skb), sizeof(skb)); +} + +#ifdef CONFIG_NETPOLL +void probe_net_napi_schedule(void *_data, struct napi_struct *n); + +DEFINE_MARKER_TP(net, napi_schedule, net_napi_schedule, + probe_net_napi_schedule, + "napi_struct %p name %s"); + +notrace void probe_net_napi_schedule(void *_data, struct napi_struct *n) +{ + struct marker *marker; + struct serialize_long_ifname data; + size_t data_len = 0; + + data.f1 = (unsigned long)n; + data_len += sizeof(data.f1); + /* No need to align for strings */ + strcpy(data.f2, n->dev ? n->dev->name : ""); + data_len += strlen(data.f2) + 1; + + marker = &GET_MARKER(net, napi_schedule); + ltt_specialized_trace(marker, marker->single.probe_private, + &data, data_len, sizeof(long)); +} + +void probe_net_napi_poll(void *_data, struct napi_struct *n); + +DEFINE_MARKER_TP(net, napi_poll, net_napi_poll, + probe_net_napi_poll, + "napi_struct %p name %s"); + +notrace void probe_net_napi_poll(void *_data, struct napi_struct *n) +{ + struct marker *marker; + struct serialize_long_ifname data; + size_t data_len = 0; + + data.f1 = (unsigned long)n; + data_len += sizeof(data.f1); + /* No need to align for strings */ + strcpy(data.f2, n->dev ? n->dev->name : ""); + data_len += strlen(data.f2) + 1; + + marker = &GET_MARKER(net, napi_poll); + ltt_specialized_trace(marker, marker->single.probe_private, + &data, data_len, sizeof(long)); +} + +void probe_net_napi_complete(void *_data, struct napi_struct *n); + +DEFINE_MARKER_TP(net, napi_complete, net_napi_complete, + probe_net_napi_complete, + "napi_struct %p name %s"); + +notrace void probe_net_napi_complete(void *_data, struct napi_struct *n) +{ + struct marker *marker; + struct serialize_long_ifname data; + size_t data_len = 0; + + data.f1 = (unsigned long)n; + data_len += sizeof(data.f1); + /* No need to align for strings */ + strcpy(data.f2, n->dev ? n->dev->name : ""); + data_len += strlen(data.f2) + 1; + + marker = &GET_MARKER(net, napi_complete); + ltt_specialized_trace(marker, marker->single.probe_private, + &data, data_len, sizeof(long)); +} +#else /* !CONFIG_NETPOLL */ +void probe_net_napi_schedule(void *_data, struct napi_struct *n); + +DEFINE_MARKER_TP(net, napi_schedule, net_napi_schedule, + probe_net_napi_schedule, + "napi_struct %p"); + +notrace void probe_net_napi_schedule(void *_data, struct napi_struct *n) +{ + struct marker *marker; + unsigned long data; + + data = (unsigned long)n; + + marker = &GET_MARKER(net, napi_schedule); + ltt_specialized_trace(marker, marker->single.probe_private, + &data, sizeof(data), sizeof(data)); +} + +void probe_net_napi_poll(void *_data, struct napi_struct *n); + +DEFINE_MARKER_TP(net, napi_poll, net_napi_poll, + probe_net_napi_poll, + "napi_struct %p"); + +notrace void probe_net_napi_poll(void *_data, struct napi_struct *n) +{ + struct marker *marker; + unsigned long data; + + data = (unsigned long)n; + + marker = &GET_MARKER(net, napi_poll); + ltt_specialized_trace(marker, marker->single.probe_private, + &data, sizeof(data), sizeof(data)); +} + +void probe_net_napi_complete(void *_data, struct napi_struct *n); + +DEFINE_MARKER_TP(net, napi_complete, net_napi_complete, + probe_net_napi_complete, + "napi_struct %p"); + +notrace void probe_net_napi_complete(void *_data, struct napi_struct *n) +{ + struct marker *marker; + unsigned long data; + + data = (unsigned long)n; + + marker = &GET_MARKER(net, napi_complete); + ltt_specialized_trace(marker, marker->single.probe_private, + &data, sizeof(data), sizeof(data)); +} +#endif + +MODULE_LICENSE("GPL and additional rights"); +MODULE_AUTHOR("Mathieu Desnoyers"); +MODULE_DESCRIPTION("Net Tracepoint Probes"); diff --git a/probes/pm-trace.c b/probes/pm-trace.c new file mode 100644 index 00000000..7abe8e37 --- /dev/null +++ b/probes/pm-trace.c @@ -0,0 +1,43 @@ +/* + * ltt/probes/pm-trace.c + * + * Power Management tracepoint probes. + * + * (C) Copyright 2009 - Mathieu Desnoyers + * Dual LGPL v2.1/GPL v2 license. + */ + +#include +#include + +void probe_pm_idle_entry(void *_data) +{ + trace_mark_tp(pm, idle_entry, pm_idle_entry, + probe_pm_idle_entry, "irqstate #1%d", + irqs_disabled()); +} + +void probe_pm_idle_exit(void *_data) +{ + trace_mark_tp(pm, idle_exit, pm_idle_exit, + probe_pm_idle_exit, "irqstate #1%d", + irqs_disabled()); +} + +void probe_pm_suspend_entry(void *_data) +{ + trace_mark_tp(pm, suspend_entry, pm_suspend_entry, + probe_pm_suspend_entry, "irqstate #1%d", + irqs_disabled()); +} + +void probe_pm_suspend_exit(void *_data) +{ + trace_mark_tp(pm, suspend_exit, pm_suspend_exit, + probe_pm_suspend_exit, "irqstate #1%d", + irqs_disabled()); +} + +MODULE_LICENSE("GPL and additional rights"); +MODULE_AUTHOR("Mathieu Desnoyers"); +MODULE_DESCRIPTION("Power Management Tracepoint Probes"); diff --git a/probes/rcu-trace.c b/probes/rcu-trace.c new file mode 100644 index 00000000..cc164546 --- /dev/null +++ b/probes/rcu-trace.c @@ -0,0 +1,36 @@ +/* + * ltt/probes/rcu-trace.c + * + * RCU tracepoint probes. + * + * (C) Copyright 2009 - Mathieu Desnoyers + * Dual LGPL v2.1/GPL v2 license. + */ + +#include +#include + +#ifdef CONFIG_TREE_RCU +void probe_rcu_tree_callback(void *data, struct rcu_head *head) +{ + trace_mark_tp(rcu, tree_callback, rcu_tree_callback, + probe_rcu_tree_callback, "func %p", head->func); +} + +void probe_rcu_tree_call_rcu(void *data, struct rcu_head *head, unsigned long ip) +{ + trace_mark_tp(rcu, tree_call_rcu, rcu_tree_call_rcu, + probe_rcu_tree_call_rcu, "func %p ip 0x%lX", head->func, ip); +} + +void probe_rcu_tree_call_rcu_bh(void *data, struct rcu_head *head, unsigned long ip) +{ + trace_mark_tp(rcu, tree_call_rcu_bh, rcu_tree_call_rcu_bh, + probe_rcu_tree_call_rcu_bh, "func %p ip 0x%lX", + head->func, ip); +} +#endif + +MODULE_LICENSE("GPL and additional rights"); +MODULE_AUTHOR("Mathieu Desnoyers"); +MODULE_DESCRIPTION("RCU Tracepoint Probes"); diff --git a/probes/syscall-trace.c b/probes/syscall-trace.c new file mode 100644 index 00000000..9ae419fc --- /dev/null +++ b/probes/syscall-trace.c @@ -0,0 +1,54 @@ +/* + * ltt/probes/syscall-trace.c + * + * System call tracepoint probes. + * + * (C) Copyright 2009 - Mathieu Desnoyers + * Dual LGPL v2.1/GPL v2 license. + */ + +#include +#include + +#include "../ltt-type-serializer.h" + + +/* kernel_syscall_entry specialized tracepoint probe */ + +void probe_syscall_entry(void *_data, struct pt_regs *regs, long id); + +DEFINE_MARKER_TP(kernel, syscall_entry, syscall_entry, + probe_syscall_entry, "ip #p%ld syscall_id #2u%u"); + +notrace void probe_syscall_entry(void *_data, struct pt_regs *regs, long id) +{ + struct marker *marker; + struct serialize_long_short data; + + data.f1 = instruction_pointer(regs); + data.f2 = (unsigned short)id; + + marker = &GET_MARKER(kernel, syscall_entry); + ltt_specialized_trace(marker, marker->single.probe_private, + &data, serialize_sizeof(data), sizeof(long)); +} + +/* kernel_syscall_exit specialized tracepoint probe */ + +void probe_syscall_exit(void *_data, long ret); + +DEFINE_MARKER_TP(kernel, syscall_exit, syscall_exit, + probe_syscall_exit, "ret %ld"); + +notrace void probe_syscall_exit(void *_data, long ret) +{ + struct marker *marker; + + marker = &GET_MARKER(kernel, syscall_exit); + ltt_specialized_trace(marker, marker->single.probe_private, + &ret, sizeof(ret), sizeof(ret)); +} + +MODULE_LICENSE("GPL and additional rights"); +MODULE_AUTHOR("Mathieu Desnoyers"); +MODULE_DESCRIPTION("syscall Tracepoint Probes"); diff --git a/probes/trap-trace.c b/probes/trap-trace.c new file mode 100644 index 00000000..397254cd --- /dev/null +++ b/probes/trap-trace.c @@ -0,0 +1,56 @@ +/* + * ltt/probes/trap-trace.c + * + * Trap tracepoint probes. + * + * (C) Copyright 2009 - Mathieu Desnoyers + * Dual LGPL v2.1/GPL v2 license. + */ + +#include +#include + +#include "../ltt-type-serializer.h" + +/* kernel_trap_entry specialized tracepoint probe */ + +void probe_trap_entry(void *_data, struct pt_regs *regs, long id); + +DEFINE_MARKER_TP(kernel, trap_entry, trap_entry, + probe_trap_entry, "ip #p%ld trap_id #2u%u"); + +notrace void probe_trap_entry(void *_data, struct pt_regs *regs, long id) +{ + struct marker *marker; + struct serialize_long_short data; + + if (likely(regs)) + data.f1 = instruction_pointer(regs); + else + data.f1 = 0UL; + data.f2 = (unsigned short)id; + + marker = &GET_MARKER(kernel, trap_entry); + ltt_specialized_trace(marker, marker->single.probe_private, + &data, serialize_sizeof(data), sizeof(long)); +} + +/* kernel_syscall_exit specialized tracepoint probe */ + +void probe_trap_exit(void *_data); + +DEFINE_MARKER_TP(kernel, trap_exit, trap_exit, + probe_trap_exit, MARK_NOARGS); + +notrace void probe_trap_exit(void *_data) +{ + struct marker *marker; + + marker = &GET_MARKER(kernel, trap_exit); + ltt_specialized_trace(marker, marker->single.probe_private, + NULL, 0, 0); +} + +MODULE_LICENSE("GPL and additional rights"); +MODULE_AUTHOR("Mathieu Desnoyers"); +MODULE_DESCRIPTION("Trap Tracepoint Probes"); -- 2.34.1