2 * Copyright (C) 2005,2006,2008 Mathieu Desnoyers (mathieu.desnoyers@polymtl.ca)
4 * This contains the definitions for the Linux Trace Toolkit tracer.
6 * Dual LGPL v2.1/GPL v2 license.
13 #include <linux/types.h>
14 #include <linux/limits.h>
15 #include <linux/list.h>
16 #include <linux/cache.h>
17 #include <linux/kernel.h>
18 #include <linux/timex.h>
19 #include <linux/wait.h>
20 #include <linux/marker.h>
21 #include <linux/trace-clock.h>
22 #include <linux/ltt-channels.h>
23 #include <asm/atomic.h>
24 #include <asm/local.h>
26 #include "ltt-tracer-core.h"
27 #include "ltt-relay.h"
29 /* Number of bytes to log with a read/write event */
30 #define LTT_LOG_RW_SIZE 32L
32 /* Interval (in jiffies) at which the LTT per-CPU timer fires */
33 #define LTT_PERCPU_TIMER_INTERVAL 1
36 #define LTT_ARCH_TYPE LTT_ARCH_TYPE_UNDEFINED
39 #ifndef LTT_ARCH_VARIANT
40 #define LTT_ARCH_VARIANT LTT_ARCH_VARIANT_NONE
43 struct ltt_active_marker
;
45 /* Maximum number of callbacks per marker */
46 #define LTT_NR_CALLBACKS 10
48 struct ltt_serialize_closure
{
49 ltt_serialize_cb
*callbacks
;
50 long cb_args
[LTT_NR_CALLBACKS
];
54 size_t ltt_serialize_data(struct ltt_chanbuf
*buf
, size_t buf_offset
,
55 struct ltt_serialize_closure
*closure
,
56 void *serialize_private
, unsigned int stack_pos_ctx
,
57 int *largest_align
, const char *fmt
, va_list *args
);
59 struct ltt_available_probe
{
60 const char *name
; /* probe name */
62 marker_probe_func
*probe_func
;
63 ltt_serialize_cb callbacks
[LTT_NR_CALLBACKS
];
64 struct list_head node
; /* registered probes list */
70 LTT_CHANNEL_GLOBAL_STATE
,
71 LTT_CHANNEL_IRQ_STATE
,
72 LTT_CHANNEL_MODULE_STATE
,
73 LTT_CHANNEL_NETIF_STATE
,
74 LTT_CHANNEL_SOFTIRQ_STATE
,
75 LTT_CHANNEL_SWAP_STATE
,
76 LTT_CHANNEL_SYSCALL_STATE
,
77 LTT_CHANNEL_TASK_STATE
,
88 struct ltt_active_marker
{
89 struct list_head node
; /* active markers list */
93 struct ltt_available_probe
*probe
;
96 extern void ltt_vtrace(const struct marker
*mdata
, void *probe_data
,
97 void *call_data
, const char *fmt
, va_list *args
);
98 extern void ltt_trace(const struct marker
*mdata
, void *probe_data
,
99 void *call_data
, const char *fmt
, ...);
101 size_t ltt_serialize_printf(struct ltt_chanbuf
*buf
, unsigned long buf_offset
,
102 size_t *msg_size
, char *output
, size_t outlen
,
106 * Unique ID assigned to each registered probe.
109 MARKER_ID_SET_MARKER_ID
= 0, /* Static IDs available (range 0-7) */
110 MARKER_ID_SET_MARKER_FORMAT
,
111 MARKER_ID_COMPACT
, /* Compact IDs (range: 8-127) */
112 MARKER_ID_DYNAMIC
, /* Dynamic IDs (range: 128-65535) */
115 /* static ids 0-1 reserved for internal use. */
116 #define MARKER_CORE_IDS 2
117 static __inline__
enum marker_id
marker_id_type(uint16_t id
)
119 if (id
< MARKER_CORE_IDS
)
120 return (enum marker_id
)id
;
122 return MARKER_ID_DYNAMIC
;
125 struct user_dbg_data
{
126 unsigned long avail_size
;
131 struct ltt_trace_ops
{
132 /* First 32 bytes cache-hot cacheline */
133 void (*wakeup_channel
) (struct ltt_chan
*chan
);
134 int (*user_blocking
) (struct ltt_trace
*trace
, unsigned int index
,
135 size_t data_size
, struct user_dbg_data
*dbg
);
136 /* End of first 32 bytes cacheline */
137 int (*create_dirs
) (struct ltt_trace
*new_trace
);
138 void (*remove_dirs
) (struct ltt_trace
*new_trace
);
139 int (*create_channel
) (const char *channel_name
, struct ltt_chan
*chan
,
140 struct dentry
*parent
, size_t sb_size
,
141 size_t n_sb
, int overwrite
,
142 struct ltt_trace
*trace
);
143 void (*finish_channel
) (struct ltt_chan
*chan
);
144 void (*remove_channel
) (struct kref
*kref
);
145 void (*remove_channel_files
) (struct ltt_chan
*chan
);
146 void (*user_errors
) (struct ltt_trace
*trace
, unsigned int index
,
147 size_t data_size
, struct user_dbg_data
*dbg
,
149 void (*start_switch_timer
) (struct ltt_chan
*chan
);
150 void (*stop_switch_timer
) (struct ltt_chan
*chan
);
151 #ifdef CONFIG_HOTPLUG_CPU
152 int (*handle_cpuhp
) (struct notifier_block
*nb
, unsigned long action
,
153 void *hcpu
, struct ltt_trace
*trace
);
157 struct ltt_transport
{
159 struct module
*owner
;
160 struct list_head node
;
161 struct ltt_trace_ops ops
;
164 enum trace_mode
{ LTT_TRACE_NORMAL
, LTT_TRACE_FLIGHT
, LTT_TRACE_HYBRID
};
166 #define CHANNEL_FLAG_ENABLE (1U<<0)
167 #define CHANNEL_FLAG_OVERWRITE (1U<<1)
169 /* Per-trace information - each trace/flight recorder represented by one */
171 /* First 32 bytes cache-hot cacheline */
172 struct list_head list
;
173 struct ltt_chan
*channels
;
174 unsigned int nr_channels
;
176 /* Second 32 bytes cache-hot cacheline */
177 struct ltt_trace_ops
*ops
;
181 unsigned long long start_monotonic
;
182 struct timeval start_time
;
183 struct ltt_channel_setting
*settings
;
185 struct dentry
*trace_root
;
186 struct dentry
*ascii_root
;
188 struct kref kref
; /* Each channel has a kref of the trace struct */
189 struct ltt_transport
*transport
;
190 struct kref ltt_transport_kref
;
191 wait_queue_head_t kref_wq
; /* Place for ltt_trace_destroy to sleep */
192 char trace_name
[NAME_MAX
];
193 } ____cacheline_aligned
;
195 /* Hardcoded event headers
197 * event header for a trace with active heartbeat : 27 bits timestamps
199 * headers are 32-bits aligned. In order to insure such alignment, a dynamic per
200 * trace alignment value must be done.
202 * Remember that the C compiler does align each member on the boundary
203 * equivalent to their own size.
205 * As relay subbuffers are aligned on pages, we are sure that they are 4 and 8
206 * bytes aligned, so the buffer header and trace header are aligned.
208 * Event headers are aligned depending on the trace alignment option.
210 * Note using C structure bitfields for cross-endianness and portability
214 #define LTT_RESERVED_EVENTS 3
215 #define LTT_EVENT_BITS 5
216 #define LTT_FREE_EVENTS ((1 << LTT_EVENT_BITS) - LTT_RESERVED_EVENTS)
217 #define LTT_TSC_BITS 27
218 #define LTT_TSC_MASK ((1 << LTT_TSC_BITS) - 1)
220 struct ltt_event_header
{
221 u32 id_time
; /* 5 bits event id (MSB); 27 bits time (LSB) */
224 /* Reservation flags */
225 #define LTT_RFLAG_ID (1 << 0)
226 #define LTT_RFLAG_ID_SIZE (1 << 1)
227 #define LTT_RFLAG_ID_SIZE_TSC (1 << 2)
229 #define LTT_MAX_SMALL_SIZE 0xFFFFU
232 * We use asm/timex.h : cpu_khz/HZ variable in here : we might have to deal
233 * specifically with CPU frequency scaling someday, so using an interpolation
234 * between the start and end of buffer values is not flexible enough. Using an
235 * immediate frequency value permits to calculate directly the times for parts
236 * of a buffer that would be before a frequency change.
238 * Keep the natural field alignment for _each field_ within this structure if
239 * you ever add/remove a field from this header. Packed attribute is not used
240 * because gcc generates poor code on at least powerpc and mips. Don't ever
241 * let gcc add padding between the structure elements.
243 struct ltt_subbuffer_header
{
244 uint64_t cycle_count_begin
; /* Cycle count at subbuffer start */
245 uint64_t cycle_count_end
; /* Cycle count at subbuffer end */
246 uint32_t magic_number
; /*
247 * Trace magic number.
248 * contains endianness information.
250 uint8_t major_version
;
251 uint8_t minor_version
;
252 uint8_t arch_size
; /* Architecture pointer size */
253 uint8_t alignment
; /* LTT data alignment */
254 uint64_t start_time_sec
; /* NTP-corrected start time */
255 uint64_t start_time_usec
;
256 uint64_t start_freq
; /*
257 * Frequency at trace start,
258 * used all along the trace.
260 uint32_t freq_scale
; /* Frequency scaling (divisor) */
261 uint32_t data_size
; /* Size of data in subbuffer */
262 uint32_t sb_size
; /* Subbuffer size (include padding) */
263 uint32_t events_lost
; /*
264 * Events lost in this subbuffer since
265 * the beginning of the trace.
268 uint32_t subbuf_corrupt
; /*
269 * Corrupted (lost) subbuffers since
270 * the begginig of the trace.
273 uint8_t header_end
[0]; /* End of header */
277 * ltt_sb_header_size - called on buffer-switch to a new sub-buffer
279 * Return header size without padding after the structure. Don't use packed
280 * structure because gcc generates inefficient code on some architectures
283 static __inline__
size_t ltt_sb_header_size(void)
285 return offsetof(struct ltt_subbuffer_header
, header_end
);
289 * ltt_get_header_size
291 * Calculate alignment offset to 32-bits. This is the alignment offset of the
295 * The event header must be 32-bits. The total offset calculated here :
297 * Alignment of header struct on 32 bits (min arch size, header size)
298 * + sizeof(header struct) (32-bits)
299 * + (opt) u16 (ext. event id)
300 * + (opt) u16 (event_size)
301 * (if event_size == LTT_MAX_SMALL_SIZE, has ext. event size)
302 * + (opt) u32 (ext. event size)
303 * + (opt) u64 full TSC (aligned on min(64-bits, arch size))
305 * The payload must itself determine its own alignment from the biggest type it
309 unsigned char ltt_get_header_size(struct ltt_chan
*chan
, size_t offset
,
310 size_t data_size
, size_t *before_hdr_pad
,
313 size_t orig_offset
= offset
;
316 BUILD_BUG_ON(sizeof(struct ltt_event_header
) != sizeof(u32
));
318 padding
= ltt_align(offset
, sizeof(struct ltt_event_header
));
320 offset
+= sizeof(struct ltt_event_header
);
322 if (unlikely(rflags
)) {
324 case LTT_RFLAG_ID_SIZE_TSC
:
325 offset
+= sizeof(u16
) + sizeof(u16
);
326 if (data_size
>= LTT_MAX_SMALL_SIZE
)
327 offset
+= sizeof(u32
);
328 offset
+= ltt_align(offset
, sizeof(u64
));
329 offset
+= sizeof(u64
);
331 case LTT_RFLAG_ID_SIZE
:
332 offset
+= sizeof(u16
) + sizeof(u16
);
333 if (data_size
>= LTT_MAX_SMALL_SIZE
)
334 offset
+= sizeof(u32
);
337 offset
+= sizeof(u16
);
342 *before_hdr_pad
= padding
;
343 return offset
- orig_offset
;
347 size_t ltt_write_event_header_slow(struct ltt_chanbuf_alloc
*bufa
,
348 struct ltt_chan_alloc
*chana
,
349 long buf_offset
, u16 eID
, u32 event_size
,
350 u64 tsc
, unsigned int rflags
);
353 * ltt_write_event_header
355 * Writes the event header to the offset (already aligned on 32-bits).
357 * @buf : buffer to write to.
358 * @chan : pointer to the channel structure..
359 * @buf_offset : buffer offset to write to (aligned on 32 bits).
361 * @event_size : size of the event, excluding the event header.
362 * @tsc : time stamp counter.
363 * @rflags : reservation flags.
365 * returns : offset where the event data must be written.
368 size_t ltt_write_event_header(struct ltt_chanbuf_alloc
*bufa
,
369 struct ltt_chan_alloc
*chana
,
370 long buf_offset
, u16 eID
, u32 event_size
, u64 tsc
,
373 struct ltt_event_header header
;
375 if (unlikely(rflags
))
378 header
.id_time
= eID
<< LTT_TSC_BITS
;
379 header
.id_time
|= (u32
)tsc
& LTT_TSC_MASK
;
380 ltt_relay_write(bufa
, chana
, buf_offset
, &header
, sizeof(header
));
381 buf_offset
+= sizeof(header
);
386 return ltt_write_event_header_slow(bufa
, chana
, buf_offset
,
387 eID
, event_size
, tsc
, rflags
);
391 * ltt_read_event_header
392 * buf_offset must aligned on 32 bits
395 size_t ltt_read_event_header(struct ltt_chanbuf_alloc
*bufa
, long buf_offset
,
396 u64
*tsc
, u32
*event_size
, u16
*eID
,
397 unsigned int *rflags
)
399 struct ltt_event_header header
;
402 ltt_relay_read(bufa
, buf_offset
, &header
, sizeof(header
));
403 buf_offset
+= sizeof(header
);
405 *event_size
= INT_MAX
;
406 *eID
= header
.id_time
>> LTT_TSC_BITS
;
407 *tsc
= header
.id_time
& LTT_TSC_MASK
;
411 *rflags
= LTT_RFLAG_ID_SIZE_TSC
;
412 ltt_relay_read(bufa
, buf_offset
, eID
, sizeof(u16
));
413 buf_offset
+= sizeof(u16
);
414 ltt_relay_read(bufa
, buf_offset
, &small_size
, sizeof(u16
));
415 buf_offset
+= sizeof(u16
);
416 if (small_size
== LTT_MAX_SMALL_SIZE
) {
417 ltt_relay_read(bufa
, buf_offset
, event_size
,
419 buf_offset
+= sizeof(u32
);
421 *event_size
= small_size
;
422 buf_offset
+= ltt_align(buf_offset
, sizeof(u64
));
423 ltt_relay_read(bufa
, buf_offset
, tsc
, sizeof(u64
));
424 buf_offset
+= sizeof(u64
);
427 *rflags
= LTT_RFLAG_ID_SIZE
;
428 ltt_relay_read(bufa
, buf_offset
, eID
, sizeof(u16
));
429 buf_offset
+= sizeof(u16
);
430 ltt_relay_read(bufa
, buf_offset
, &small_size
, sizeof(u16
));
431 buf_offset
+= sizeof(u16
);
432 if (small_size
== LTT_MAX_SMALL_SIZE
) {
433 ltt_relay_read(bufa
, buf_offset
, event_size
,
435 buf_offset
+= sizeof(u32
);
437 *event_size
= small_size
;
440 *rflags
= LTT_RFLAG_ID
;
441 ltt_relay_read(bufa
, buf_offset
, eID
, sizeof(u16
));
442 buf_offset
+= sizeof(u16
);
454 /* Buffer offset macros */
457 * BUFFER_TRUNC zeroes the subbuffer offset and the subbuffer number parts of
458 * the offset, which leaves only the buffer number.
460 #define BUFFER_TRUNC(offset, chan) \
461 ((offset) & (~((chan)->a.buf_size - 1)))
462 #define BUFFER_OFFSET(offset, chan) ((offset) & ((chan)->a.buf_size - 1))
463 #define SUBBUF_OFFSET(offset, chan) ((offset) & ((chan)->a.sb_size - 1))
464 #define SUBBUF_ALIGN(offset, chan) \
465 (((offset) + (chan)->a.sb_size) & (~((chan)->a.sb_size - 1)))
466 #define SUBBUF_TRUNC(offset, chan) \
467 ((offset) & (~((chan)->a.sb_size - 1)))
468 #define SUBBUF_INDEX(offset, chan) \
469 (BUFFER_OFFSET((offset), chan) >> (chan)->a.sb_size_order)
480 #define LTT_RELAY_ROOT "ltt"
481 #define LTT_RELAY_LOCKED_ROOT "ltt-locked"
483 #define LTT_METADATA_CHANNEL "metadata_state"
484 #define LTT_FD_STATE_CHANNEL "fd_state"
485 #define LTT_GLOBAL_STATE_CHANNEL "global_state"
486 #define LTT_IRQ_STATE_CHANNEL "irq_state"
487 #define LTT_MODULE_STATE_CHANNEL "module_state"
488 #define LTT_NETIF_STATE_CHANNEL "netif_state"
489 #define LTT_SOFTIRQ_STATE_CHANNEL "softirq_state"
490 #define LTT_SWAP_STATE_CHANNEL "swap_state"
491 #define LTT_SYSCALL_STATE_CHANNEL "syscall_state"
492 #define LTT_TASK_STATE_CHANNEL "task_state"
493 #define LTT_VM_STATE_CHANNEL "vm_state"
494 #define LTT_FS_CHANNEL "fs"
495 #define LTT_INPUT_CHANNEL "input"
496 #define LTT_IPC_CHANNEL "ipc"
497 #define LTT_KERNEL_CHANNEL "kernel"
498 #define LTT_MM_CHANNEL "mm"
499 #define LTT_RCU_CHANNEL "rcu"
501 #define LTT_FLIGHT_PREFIX "flight-"
503 #define LTT_ASCII "ascii"
505 /* Tracer properties */
506 #define LTT_DEFAULT_SUBBUF_SIZE_LOW 65536
507 #define LTT_DEFAULT_N_SUBBUFS_LOW 2
508 #define LTT_DEFAULT_SUBBUF_SIZE_MED 262144
509 #define LTT_DEFAULT_N_SUBBUFS_MED 2
510 #define LTT_DEFAULT_SUBBUF_SIZE_HIGH 1048576
511 #define LTT_DEFAULT_N_SUBBUFS_HIGH 2
512 #define LTT_TRACER_MAGIC_NUMBER 0x00D6B7ED
513 #define LTT_TRACER_VERSION_MAJOR 2
514 #define LTT_TRACER_VERSION_MINOR 6
517 * ltt_write_trace_header - Write trace header
518 * @trace: Trace information
519 * @header: Memory address where the information must be written to
522 void ltt_write_trace_header(struct ltt_trace
*trace
,
523 struct ltt_subbuffer_header
*header
)
525 header
->magic_number
= LTT_TRACER_MAGIC_NUMBER
;
526 header
->major_version
= LTT_TRACER_VERSION_MAJOR
;
527 header
->minor_version
= LTT_TRACER_VERSION_MINOR
;
528 header
->arch_size
= sizeof(void *);
529 header
->alignment
= ltt_get_alignment();
530 header
->start_time_sec
= trace
->start_time
.tv_sec
;
531 header
->start_time_usec
= trace
->start_time
.tv_usec
;
532 header
->start_freq
= trace
->start_freq
;
533 header
->freq_scale
= trace
->freq_scale
;
537 * Size reserved for high priority events (interrupts, NMI, BH) at the end of a
538 * nearly full buffer. User space won't use this last amount of space when in
539 * blocking mode. This space also includes the event header that would be
540 * written by this user space event.
542 #define LTT_RESERVE_CRITICAL 4096
544 /* Register and unregister function pointers */
546 enum ltt_module_function
{
547 LTT_FUNCTION_RUN_FILTER
,
548 LTT_FUNCTION_FILTER_CONTROL
,
549 LTT_FUNCTION_STATEDUMP
552 extern int ltt_module_register(enum ltt_module_function name
, void *function
,
553 struct module
*owner
);
554 extern void ltt_module_unregister(enum ltt_module_function name
);
556 void ltt_transport_register(struct ltt_transport
*transport
);
557 void ltt_transport_unregister(struct ltt_transport
*transport
);
559 /* Exported control function */
561 enum ltt_control_msg
{
564 LTT_CONTROL_CREATE_TRACE
,
565 LTT_CONTROL_DESTROY_TRACE
568 union ltt_control_args
{
570 enum trace_mode mode
;
571 unsigned int subbuf_size_low
;
572 unsigned int n_subbufs_low
;
573 unsigned int subbuf_size_med
;
574 unsigned int n_subbufs_med
;
575 unsigned int subbuf_size_high
;
576 unsigned int n_subbufs_high
;
580 int _ltt_trace_setup(const char *trace_name
);
581 int ltt_trace_setup(const char *trace_name
);
582 struct ltt_trace
*_ltt_trace_find_setup(const char *trace_name
);
583 int ltt_trace_set_type(const char *trace_name
, const char *trace_type
);
584 int ltt_trace_set_channel_subbufsize(const char *trace_name
,
585 const char *channel_name
,
587 int ltt_trace_set_channel_subbufcount(const char *trace_name
,
588 const char *channel_name
,
590 int ltt_trace_set_channel_switch_timer(const char *trace_name
,
591 const char *channel_name
,
592 unsigned long interval
);
593 int ltt_trace_set_channel_enable(const char *trace_name
,
594 const char *channel_name
,
595 unsigned int enable
);
596 int ltt_trace_set_channel_overwrite(const char *trace_name
,
597 const char *channel_name
,
598 unsigned int overwrite
);
599 int ltt_trace_alloc(const char *trace_name
);
600 int ltt_trace_destroy(const char *trace_name
);
601 int ltt_trace_start(const char *trace_name
);
602 int ltt_trace_stop(const char *trace_name
);
604 extern int ltt_control(enum ltt_control_msg msg
, const char *trace_name
,
605 const char *trace_type
, union ltt_control_args args
);
607 enum ltt_filter_control_msg
{
608 LTT_FILTER_DEFAULT_ACCEPT
,
609 LTT_FILTER_DEFAULT_REJECT
612 extern int ltt_filter_control(enum ltt_filter_control_msg msg
,
613 const char *trace_name
);
615 extern struct dentry
*get_filter_root(void);
617 void ltt_core_register(int (*function
)(u8
, void *));
619 void ltt_core_unregister(void);
621 void ltt_release_trace(struct kref
*kref
);
622 void ltt_release_transport(struct kref
*kref
);
624 extern int ltt_probe_register(struct ltt_available_probe
*pdata
);
625 extern int ltt_probe_unregister(struct ltt_available_probe
*pdata
);
626 extern int ltt_marker_connect(const char *channel
, const char *mname
,
628 extern int ltt_marker_disconnect(const char *channel
, const char *mname
,
630 extern void ltt_dump_marker_state(struct ltt_trace
*trace
);
632 void ltt_lock_traces(void);
633 void ltt_unlock_traces(void);
635 extern int ltt_ascii_create_dir(struct ltt_trace
*new_trace
);
636 extern void ltt_ascii_remove_dir(struct ltt_trace
*trace
);
637 extern int ltt_ascii_create(struct ltt_chan
*chan
);
638 extern void ltt_ascii_remove(struct ltt_chan
*chan
);
641 void ltt_statedump_register_kprobes_dump(void (*callback
)(void *call_data
));
643 void ltt_statedump_unregister_kprobes_dump(void (*callback
)(void *call_data
));
645 extern void ltt_dump_softirq_vec(void *call_data
);
647 #ifdef CONFIG_HAVE_LTT_DUMP_TABLES
648 extern void ltt_dump_sys_call_table(void *call_data
);
649 extern void ltt_dump_idt_table(void *call_data
);
651 static inline void ltt_dump_sys_call_table(void *call_data
)
655 static inline void ltt_dump_idt_table(void *call_data
)
662 /* Get the next sub-buffer that can be read. */
663 #define RELAY_GET_SB _IOR(0xF5, 0x00, __u32)
664 /* Release the oldest reserved (by "get") sub-buffer. */
665 #define RELAY_PUT_SB _IOW(0xF5, 0x01, __u32)
666 /* returns the number of sub-buffers in the per cpu channel. */
667 #define RELAY_GET_N_SB _IOR(0xF5, 0x02, __u32)
668 /* returns the size of the current sub-buffer. */
669 #define RELAY_GET_SB_SIZE _IOR(0xF5, 0x03, __u32)
670 /* returns the maximum size for sub-buffers. */
671 #define RELAY_GET_MAX_SB_SIZE _IOR(0xF5, 0x04, __u32)
673 #endif /* _LTT_TRACER_H */