3 * LTTng userspace tracer buffering system
5 * Copyright (C) 2009 - Pierre-Marc Fournier (pierre-marc dot fournier at polymtl dot ca)
6 * Copyright (C) 2008 - Mathieu Desnoyers (mathieu.desnoyers@polymtl.ca)
8 * This library is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License as published by the Free Software Foundation; either
11 * version 2.1 of the License, or (at your option) any later version.
13 * This library is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Lesser General Public License for more details.
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with this library; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23 #ifndef _UST_BUFFERS_H
24 #define _UST_BUFFERS_H
29 #include <ust/clock.h>
31 #include "usterr_signal_safe.h"
33 #include "tracerconst.h"
34 #include "tracercore.h"
37 * BUFFER_TRUNC zeroes the subbuffer offset and the subbuffer number parts of
38 * the offset, which leaves only the buffer number.
40 #define BUFFER_TRUNC(offset, chan) \
41 ((offset) & (~((chan)->alloc_size-1)))
42 #define BUFFER_OFFSET(offset, chan) ((offset) & ((chan)->alloc_size - 1))
43 #define SUBBUF_OFFSET(offset, chan) ((offset) & ((chan)->subbuf_size - 1))
44 #define SUBBUF_ALIGN(offset, chan) \
45 (((offset) + (chan)->subbuf_size) & (~((chan)->subbuf_size - 1)))
46 #define SUBBUF_TRUNC(offset, chan) \
47 ((offset) & (~((chan)->subbuf_size - 1)))
48 #define SUBBUF_INDEX(offset, chan) \
49 (BUFFER_OFFSET((offset), chan) >> (chan)->subbuf_size_order)
52 * Tracks changes to rchan/rchan_buf structs
54 #define UST_CHANNEL_VERSION 8
56 /**************************************/
59 * TODO: using "long" type for struct ust_buffer (control structure
60 * shared between traced apps and the consumer) is a very bad idea when
61 * we get to systems with mixed 32/64-bit processes.
63 * But on 64-bit system, we want the full power of 64-bit counters,
64 * which wraps less often. Therefore, it's not as easy as "use 32-bit
67 * One way to deal with this is to:
68 * 1) Design 64-bit consumer so it can detect 32-bit and 64-bit apps.
69 * 2) The 32-bit consumer only supports 32-bit apps.
72 struct commit_counters
{
74 long cc_sb
; /* ATOMIC - Incremented _once_ at sb switch */
78 /* First 32 bytes cache-hot cacheline */
79 long offset
; /* Current offset in the buffer *atomic* */
80 struct commit_counters
*commit_count
; /* Commit count per sub-buffer */
81 long consumed
; /* Current offset in the buffer *atomic* access (shared) */
82 unsigned long last_tsc
; /*
83 * Last timestamp written in the buffer.
85 /* End of first 32 bytes cacheline */
86 long active_readers
; /* ATOMIC - Active readers count standard atomic access (shared) */
87 long events_lost
; /* ATOMIC */
88 long corrupted_subbuffers
; /* *ATOMIC* */
89 /* one byte is written to this pipe when data is available, in order
90 to wake the consumer */
91 /* portability: Single byte writes must be as quick as possible. The kernel-side
92 buffer must be large enough so the writer doesn't block. From the pipe(7)
93 man page: Since linux 2.6.11, the pipe capacity is 65536 bytes. */
94 int data_ready_fd_write
;
95 /* the reading end of the pipe */
96 int data_ready_fd_read
;
98 * List of buffers with an open pipe, used for fork and forced subbuffer
101 struct cds_list_head open_buffers_list
;
103 unsigned int finalized
;
104 //ust// struct timer_list switch_timer; /* timer for periodical switch */
105 unsigned long switch_timer_interval
; /* 0 = unset */
107 struct ust_channel
*chan
;
109 struct urcu_ref urcu_ref
;
115 /* commit count per subbuffer; must be at end of struct */
116 long commit_seq
[0]; /* ATOMIC */
117 } ____cacheline_aligned
;
120 * A switch is either done during tracing (FORCE_ACTIVE) or as a final
121 * flush after tracing (with FORCE_FLUSH). FORCE_FLUSH ensures we won't
122 * write in the new sub-buffer).
124 enum force_switch_mode
{ FORCE_ACTIVE
, FORCE_FLUSH
};
126 extern int ltt_reserve_slot_lockless_slow(struct ust_channel
*chan
,
127 struct ust_trace
*trace
, size_t data_size
,
128 int largest_align
, int cpu
,
129 struct ust_buffer
**ret_buf
,
130 size_t *slot_size
, long *buf_offset
,
131 u64
*tsc
, unsigned int *rflags
);
133 extern void ltt_force_switch_lockless_slow(struct ust_buffer
*buf
,
134 enum force_switch_mode mode
);
136 #ifndef HAVE_EFFICIENT_UNALIGNED_ACCESS
139 * Calculate the offset needed to align the type.
140 * size_of_type must be non-zero.
142 static inline unsigned int ltt_align(size_t align_drift
, size_t size_of_type
)
144 size_t alignment
= min(sizeof(void *), size_of_type
);
145 return (alignment
- align_drift
) & (alignment
- 1);
147 /* Default arch alignment */
150 static inline int ltt_get_alignment(void)
152 return sizeof(void *);
155 #else /* HAVE_EFFICIENT_UNALIGNED_ACCESS */
157 static inline unsigned int ltt_align(size_t align_drift
,
163 #define LTT_ALIGN __attribute__((packed))
165 static inline int ltt_get_alignment(void)
169 #endif /* HAVE_EFFICIENT_UNALIGNED_ACCESS */
171 static __inline__
void ust_buffers_do_copy(void *dest
, const void *src
, size_t len
)
179 } u
= { .src
= src
};
183 case 1: *(u8
*)dest
= *u
.src8
;
185 case 2: *(u16
*)dest
= *u
.src16
;
187 case 4: *(u32
*)dest
= *u
.src32
;
189 case 8: *(u64
*)dest
= *u
.src64
;
192 memcpy(dest
, src
, len
);
196 static __inline__
void *ust_buffers_offset_address(struct ust_buffer
*buf
, size_t offset
)
198 return ((char *)buf
->buf_data
)+offset
;
202 * Last TSC comparison functions. Check if the current TSC overflows
203 * LTT_TSC_BITS bits from the last TSC read. Reads and writes last_tsc
207 /* FIXME: does this test work properly? */
208 #if (BITS_PER_LONG == 32)
209 static __inline__
void save_last_tsc(struct ust_buffer
*ltt_buf
,
212 ltt_buf
->last_tsc
= (unsigned long)(tsc
>> LTT_TSC_BITS
);
215 static __inline__
int last_tsc_overflow(struct ust_buffer
*ltt_buf
,
218 unsigned long tsc_shifted
= (unsigned long)(tsc
>> LTT_TSC_BITS
);
220 if (unlikely((tsc_shifted
- ltt_buf
->last_tsc
)))
226 static __inline__
void save_last_tsc(struct ust_buffer
*ltt_buf
,
229 ltt_buf
->last_tsc
= (unsigned long)tsc
;
232 static __inline__
int last_tsc_overflow(struct ust_buffer
*ltt_buf
,
235 if (unlikely((tsc
- ltt_buf
->last_tsc
) >> LTT_TSC_BITS
))
243 * ust_get_header_size
245 * Calculate alignment offset to 32-bits. This is the alignment offset of the
249 * The event header must be 32-bits. The total offset calculated here :
251 * Alignment of header struct on 32 bits (min arch size, header size)
252 * + sizeof(header struct) (32-bits)
253 * + (opt) u16 (ext. event id)
254 * + (opt) u16 (event_size) (if event_size == 0xFFFFUL, has ext. event size)
255 * + (opt) u32 (ext. event size)
256 * + (opt) u64 full TSC (aligned on min(64-bits, arch size))
258 * The payload must itself determine its own alignment from the biggest type it
261 static __inline__
unsigned char ust_get_header_size(
262 struct ust_channel
*channel
,
265 size_t *before_hdr_pad
,
268 size_t orig_offset
= offset
;
271 padding
= ltt_align(offset
, sizeof(struct ltt_event_header
));
273 offset
+= sizeof(struct ltt_event_header
);
275 if(unlikely(rflags
)) {
277 case LTT_RFLAG_ID_SIZE_TSC
:
278 offset
+= sizeof(u16
) + sizeof(u16
);
279 if (data_size
>= 0xFFFFU
)
280 offset
+= sizeof(u32
);
281 offset
+= ltt_align(offset
, sizeof(u64
));
282 offset
+= sizeof(u64
);
284 case LTT_RFLAG_ID_SIZE
:
285 offset
+= sizeof(u16
) + sizeof(u16
);
286 if (data_size
>= 0xFFFFU
)
287 offset
+= sizeof(u32
);
290 offset
+= sizeof(u16
);
295 *before_hdr_pad
= padding
;
296 return offset
- orig_offset
;
299 static __inline__
void ltt_reserve_push_reader(
300 struct ust_channel
*rchan
,
301 struct ust_buffer
*buf
,
304 long consumed_old
, consumed_new
;
307 consumed_old
= uatomic_read(&buf
->consumed
);
309 * If buffer is in overwrite mode, push the reader consumed
310 * count if the write position has reached it and we are not
311 * at the first iteration (don't push the reader farther than
312 * the writer). This operation can be done concurrently by many
313 * writers in the same buffer, the writer being at the farthest
314 * write position sub-buffer index in the buffer being the one
315 * which will win this loop.
316 * If the buffer is not in overwrite mode, pushing the reader
317 * only happens if a sub-buffer is corrupted.
319 if (unlikely((SUBBUF_TRUNC(offset
, buf
->chan
)
320 - SUBBUF_TRUNC(consumed_old
, buf
->chan
))
321 >= rchan
->alloc_size
))
322 consumed_new
= SUBBUF_ALIGN(consumed_old
, buf
->chan
);
325 } while (unlikely(uatomic_cmpxchg(&buf
->consumed
, consumed_old
,
326 consumed_new
) != consumed_old
));
329 static __inline__
void ltt_vmcore_check_deliver(
330 struct ust_buffer
*buf
,
331 long commit_count
, long idx
)
333 uatomic_set(&buf
->commit_seq
[idx
], commit_count
);
336 static __inline__
void ltt_check_deliver(struct ust_channel
*chan
,
337 struct ust_buffer
*buf
,
338 long offset
, long commit_count
, long idx
)
340 long old_commit_count
= commit_count
- chan
->subbuf_size
;
342 /* Check if all commits have been done */
343 if (unlikely((BUFFER_TRUNC(offset
, chan
)
344 >> chan
->n_subbufs_order
)
346 & chan
->commit_count_mask
) == 0)) {
348 * If we succeeded in updating the cc_sb, we are delivering
349 * the subbuffer. Deals with concurrent updates of the "cc"
350 * value without adding a add_return atomic operation to the
353 if (likely(uatomic_cmpxchg(&buf
->commit_count
[idx
].cc_sb
,
354 old_commit_count
, commit_count
)
355 == old_commit_count
)) {
359 * Set noref flag for this subbuffer.
361 //ust// ltt_set_noref_flag(rchan, buf, idx);
362 ltt_vmcore_check_deliver(buf
, commit_count
, idx
);
364 /* wakeup consumer */
365 result
= write(buf
->data_ready_fd_write
, "1", 1);
367 PERROR("write (in ltt_relay_buffer_flush)");
368 ERR("this should never happen!");
374 static __inline__
int ltt_poll_deliver(struct ust_channel
*chan
, struct ust_buffer
*buf
)
376 long consumed_old
, consumed_idx
, commit_count
, write_offset
;
378 consumed_old
= uatomic_read(&buf
->consumed
);
379 consumed_idx
= SUBBUF_INDEX(consumed_old
, buf
->chan
);
380 commit_count
= uatomic_read(&buf
->commit_count
[consumed_idx
].cc_sb
);
382 * No memory cmm_barrier here, since we are only interested
383 * in a statistically correct polling result. The next poll will
384 * get the data is we are racing. The mb() that ensures correct
385 * memory order is in get_subbuf.
387 write_offset
= uatomic_read(&buf
->offset
);
390 * Check that the subbuffer we are trying to consume has been
391 * already fully committed.
394 if (((commit_count
- chan
->subbuf_size
)
395 & chan
->commit_count_mask
)
396 - (BUFFER_TRUNC(consumed_old
, buf
->chan
)
397 >> chan
->n_subbufs_order
)
402 * Check that we are not about to read the same subbuffer in
403 * which the writer head is.
405 if ((SUBBUF_TRUNC(write_offset
, buf
->chan
)
406 - SUBBUF_TRUNC(consumed_old
, buf
->chan
))
415 * returns 0 if reserve ok, or 1 if the slow path must be taken.
417 static __inline__
int ltt_relay_try_reserve(
418 struct ust_channel
*chan
,
419 struct ust_buffer
*buf
,
421 u64
*tsc
, unsigned int *rflags
, int largest_align
,
422 long *o_begin
, long *o_end
, long *o_old
,
423 size_t *before_hdr_pad
, size_t *size
)
425 *o_begin
= uatomic_read(&buf
->offset
);
428 *tsc
= trace_clock_read64();
430 if (last_tsc_overflow(buf
, *tsc
))
431 *rflags
= LTT_RFLAG_ID_SIZE_TSC
;
433 if (unlikely(SUBBUF_OFFSET(*o_begin
, buf
->chan
) == 0))
436 *size
= ust_get_header_size(chan
,
438 before_hdr_pad
, *rflags
);
439 *size
+= ltt_align(*o_begin
+ *size
, largest_align
) + data_size
;
440 if (unlikely((SUBBUF_OFFSET(*o_begin
, buf
->chan
) + *size
)
441 > buf
->chan
->subbuf_size
))
445 * Event fits in the current buffer and we are not on a switch
446 * boundary. It's safe to write.
448 *o_end
= *o_begin
+ *size
;
450 if (unlikely((SUBBUF_OFFSET(*o_end
, buf
->chan
)) == 0))
452 * The offset_end will fall at the very beginning of the next
460 static __inline__
int ltt_reserve_slot(struct ust_channel
*chan
,
461 struct ust_trace
*trace
, size_t data_size
,
462 int largest_align
, int cpu
,
463 struct ust_buffer
**ret_buf
,
464 size_t *slot_size
, long *buf_offset
, u64
*tsc
,
465 unsigned int *rflags
)
467 struct ust_buffer
*buf
= *ret_buf
= chan
->buf
[cpu
];
468 long o_begin
, o_end
, o_old
;
469 size_t before_hdr_pad
;
472 * Perform retryable operations.
474 if (unlikely(CMM_LOAD_SHARED(ltt_nesting
) > 4)) {
475 DBG("Dropping event because nesting is too deep.");
476 uatomic_inc(&buf
->events_lost
);
480 if (unlikely(ltt_relay_try_reserve(chan
, buf
,
481 data_size
, tsc
, rflags
,
482 largest_align
, &o_begin
, &o_end
, &o_old
,
483 &before_hdr_pad
, slot_size
)))
486 if (unlikely(uatomic_cmpxchg(&buf
->offset
, o_old
, o_end
) != o_old
))
490 * Atomically update last_tsc. This update races against concurrent
491 * atomic updates, but the race will always cause supplementary full TSC
492 * events, never the opposite (missing a full TSC event when it would be
495 save_last_tsc(buf
, *tsc
);
498 * Push the reader if necessary
500 ltt_reserve_push_reader(chan
, buf
, o_end
- 1);
502 *buf_offset
= o_begin
+ before_hdr_pad
;
505 return ltt_reserve_slot_lockless_slow(chan
, trace
, data_size
,
506 largest_align
, cpu
, ret_buf
,
507 slot_size
, buf_offset
, tsc
,
512 * Force a sub-buffer switch for a per-cpu buffer. This operation is
513 * completely reentrant : can be called while tracing is active with
514 * absolutely no lock held.
516 static __inline__
void ltt_force_switch(struct ust_buffer
*buf
,
517 enum force_switch_mode mode
)
519 return ltt_force_switch_lockless_slow(buf
, mode
);
523 * for flight recording. must be called after relay_commit.
524 * This function increments the subbuffers's commit_seq counter each time the
525 * commit count reaches back the reserve offset (module subbuffer size). It is
526 * useful for crash dump.
528 static __inline__
void ltt_write_commit_counter(struct ust_channel
*chan
,
529 struct ust_buffer
*buf
, long idx
, long buf_offset
,
530 long commit_count
, size_t data_size
)
535 offset
= buf_offset
+ data_size
;
538 * SUBBUF_OFFSET includes commit_count_mask. We can simply
539 * compare the offsets within the subbuffer without caring about
540 * buffer full/empty mismatch because offset is never zero here
541 * (subbuffer header and event headers have non-zero length).
543 if (unlikely(SUBBUF_OFFSET(offset
- commit_count
, buf
->chan
)))
546 commit_seq_old
= uatomic_read(&buf
->commit_seq
[idx
]);
547 while (commit_seq_old
< commit_count
)
548 commit_seq_old
= uatomic_cmpxchg(&buf
->commit_seq
[idx
],
549 commit_seq_old
, commit_count
);
551 DBG("commit_seq for channel %s_%d, subbuf %ld is now %ld", buf
->chan
->channel_name
, buf
->cpu
, idx
, commit_count
);
555 * Atomic unordered slot commit. Increments the commit count in the
556 * specified sub-buffer, and delivers it if necessary.
560 * @ltt_channel : channel structure
561 * @transport_data: transport-specific data
562 * @buf_offset : offset following the event header.
563 * @data_size : size of the event data.
564 * @slot_size : size of the reserved slot.
566 static __inline__
void ltt_commit_slot(
567 struct ust_channel
*chan
,
568 struct ust_buffer
*buf
, long buf_offset
,
569 size_t data_size
, size_t slot_size
)
571 long offset_end
= buf_offset
;
572 long endidx
= SUBBUF_INDEX(offset_end
- 1, chan
);
577 uatomic_add(&buf
->commit_count
[endidx
].cc
, slot_size
);
579 * commit count read can race with concurrent OOO commit count updates.
580 * This is only needed for ltt_check_deliver (for non-polling delivery
581 * only) and for ltt_write_commit_counter. The race can only cause the
582 * counter to be read with the same value more than once, which could
584 * - Multiple delivery for the same sub-buffer (which is handled
585 * gracefully by the reader code) if the value is for a full
586 * sub-buffer. It's important that we can never miss a sub-buffer
587 * delivery. Re-reading the value after the uatomic_add ensures this.
588 * - Reading a commit_count with a higher value that what was actually
589 * added to it for the ltt_write_commit_counter call (again caused by
590 * a concurrent committer). It does not matter, because this function
591 * is interested in the fact that the commit count reaches back the
592 * reserve offset for a specific sub-buffer, which is completely
593 * independent of the order.
595 commit_count
= uatomic_read(&buf
->commit_count
[endidx
].cc
);
597 ltt_check_deliver(chan
, buf
, offset_end
- 1, commit_count
, endidx
);
599 * Update data_size for each commit. It's needed only for extracting
600 * ltt buffers from vmcore, after crash.
602 ltt_write_commit_counter(chan
, buf
, endidx
, buf_offset
, commit_count
, data_size
);
605 void _ust_buffers_strncpy_fixup(struct ust_buffer
*buf
, size_t offset
,
606 size_t len
, size_t copied
, int terminated
);
608 static __inline__
int ust_buffers_write(struct ust_buffer
*buf
, size_t offset
,
609 const void *src
, size_t len
)
611 size_t buf_offset
= BUFFER_OFFSET(offset
, buf
->chan
);
613 assert(buf_offset
< buf
->chan
->subbuf_size
*buf
->chan
->subbuf_cnt
);
614 assert(buf_offset
+ len
615 <= buf
->chan
->subbuf_size
*buf
->chan
->subbuf_cnt
);
617 ust_buffers_do_copy(buf
->buf_data
+ buf_offset
, src
, len
);
623 * ust_buffers_do_memset - write character into dest.
625 * @src: source character
626 * @len: length to write
629 void ust_buffers_do_memset(void *dest
, char src
, size_t len
)
632 * What we really want here is an __inline__ memset, but we
633 * don't have constants, so gcc generally uses a function call.
635 for (; len
> 0; len
--)
640 * ust_buffers_do_strncpy - copy a string up to a certain number of bytes
643 * @len: max. length to copy
644 * @terminated: output string ends with \0 (output)
646 * returns the number of bytes copied. Does not finalize with \0 if len is
650 size_t ust_buffers_do_strncpy(void *dest
, const void *src
, size_t len
,
653 size_t orig_len
= len
;
657 * What we really want here is an __inline__ strncpy, but we
658 * don't have constants, so gcc generally uses a function call.
660 for (; len
> 0; len
--) {
661 *(u8
*)dest
= CMM_LOAD_SHARED(*(const u8
*)src
);
662 /* Check with dest, because src may be modified concurrently */
663 if (*(const u8
*)dest
== '\0') {
671 return orig_len
- len
;
675 int ust_buffers_strncpy(struct ust_buffer
*buf
, size_t offset
, const void *src
,
678 size_t buf_offset
= BUFFER_OFFSET(offset
, buf
->chan
);
682 assert(buf_offset
< buf
->chan
->subbuf_size
*buf
->chan
->subbuf_cnt
);
683 assert(buf_offset
+ len
684 <= buf
->chan
->subbuf_size
*buf
->chan
->subbuf_cnt
);
686 copied
= ust_buffers_do_strncpy(buf
->buf_data
+ buf_offset
,
687 src
, len
, &terminated
);
688 if (unlikely(copied
< len
|| !terminated
))
689 _ust_buffers_strncpy_fixup(buf
, offset
, len
, copied
,
694 extern int ust_buffers_get_subbuf(struct ust_buffer
*buf
, long *consumed
);
695 extern int ust_buffers_put_subbuf(struct ust_buffer
*buf
, unsigned long uconsumed_old
);
697 extern void init_ustrelay_transport(void);
699 #endif /* _UST_BUFFERS_H */