4 * Copyright (C) 2009 - Pierre-Marc Fournier (pierre-marc dot fournier at polymtl dot ca)
5 * Copyright (C) 2008 - Mathieu Desnoyers (mathieu.desnoyers@polymtl.ca)
10 #define _UST_BUFFERS_H
12 #include <kcompat/kref.h>
15 #include "tracerconst.h"
16 #include "tracercore.h"
17 #include "header-inline.h"
20 /***** SHOULD BE REMOVED ***** */
23 * BUFFER_TRUNC zeroes the subbuffer offset and the subbuffer number parts of
24 * the offset, which leaves only the buffer number.
26 #define BUFFER_TRUNC(offset, chan) \
27 ((offset) & (~((chan)->alloc_size-1)))
28 #define BUFFER_OFFSET(offset, chan) ((offset) & ((chan)->alloc_size - 1))
29 #define SUBBUF_OFFSET(offset, chan) ((offset) & ((chan)->subbuf_size - 1))
30 #define SUBBUF_ALIGN(offset, chan) \
31 (((offset) + (chan)->subbuf_size) & (~((chan)->subbuf_size - 1)))
32 #define SUBBUF_TRUNC(offset, chan) \
33 ((offset) & (~((chan)->subbuf_size - 1)))
34 #define SUBBUF_INDEX(offset, chan) \
35 (BUFFER_OFFSET((offset), chan) >> (chan)->subbuf_size_order)
38 * Tracks changes to rchan/rchan_buf structs
40 #define UST_CHANNEL_VERSION 8
42 /**************************************/
44 struct commit_counters
{
46 local_t cc_sb
; /* Incremented _once_ at sb switch */
50 /* First 32 bytes cache-hot cacheline */
51 local_t offset
; /* Current offset in the buffer */
52 struct commit_counters
*commit_count
; /* Commit count per sub-buffer */
53 atomic_long_t consumed
; /*
54 * Current offset in the buffer
55 * standard atomic access (shared)
57 unsigned long last_tsc
; /*
58 * Last timestamp written in the buffer.
60 /* End of first 32 bytes cacheline */
61 atomic_long_t active_readers
; /*
62 * Active readers count
63 * standard atomic access (shared)
66 local_t corrupted_subbuffers
;
67 /* one byte is written to this pipe when data is available, in order
68 to wake the consumer */
69 /* portability: Single byte writes must be as quick as possible. The kernel-side
70 buffer must be large enough so the writer doesn't block. From the pipe(7)
71 man page: Since linux 2.6.11, the pipe capacity is 65536 bytes. */
72 int data_ready_fd_write
;
73 /* the reading end of the pipe */
74 int data_ready_fd_read
;
76 unsigned int finalized
;
77 //ust// struct timer_list switch_timer; /* timer for periodical switch */
78 unsigned long switch_timer_interval
; /* 0 = unset */
80 struct ust_channel
*chan
;
88 /* commit count per subbuffer; must be at end of struct */
89 local_t commit_seq
[0] ____cacheline_aligned
;
90 } ____cacheline_aligned
;
93 * A switch is done during tracing or as a final flush after tracing (so it
94 * won't write in the new sub-buffer).
95 * FIXME: make this message clearer
97 enum force_switch_mode
{ FORCE_ACTIVE
, FORCE_FLUSH
};
99 extern int ltt_reserve_slot_lockless_slow(struct ust_trace
*trace
,
100 struct ust_channel
*ltt_channel
, void **transport_data
,
101 size_t data_size
, size_t *slot_size
, long *buf_offset
, u64
*tsc
,
102 unsigned int *rflags
, int largest_align
, int cpu
);
104 extern void ltt_force_switch_lockless_slow(struct ust_buffer
*buf
,
105 enum force_switch_mode mode
);
108 static __inline__
void ust_buffers_do_copy(void *dest
, const void *src
, size_t len
)
116 } u
= { .src
= src
};
120 case 1: *(u8
*)dest
= *u
.src8
;
122 case 2: *(u16
*)dest
= *u
.src16
;
124 case 4: *(u32
*)dest
= *u
.src32
;
126 case 8: *(u64
*)dest
= *u
.src64
;
129 memcpy(dest
, src
, len
);
133 static __inline__
void *ust_buffers_offset_address(struct ust_buffer
*buf
, size_t offset
)
135 return ((char *)buf
->buf_data
)+offset
;
139 * Last TSC comparison functions. Check if the current TSC overflows
140 * LTT_TSC_BITS bits from the last TSC read. Reads and writes last_tsc
144 /* FIXME: does this test work properly? */
145 #if (BITS_PER_LONG == 32)
146 static __inline__
void save_last_tsc(struct ust_buffer
*ltt_buf
,
149 ltt_buf
->last_tsc
= (unsigned long)(tsc
>> LTT_TSC_BITS
);
152 static __inline__
int last_tsc_overflow(struct ust_buffer
*ltt_buf
,
155 unsigned long tsc_shifted
= (unsigned long)(tsc
>> LTT_TSC_BITS
);
157 if (unlikely((tsc_shifted
- ltt_buf
->last_tsc
)))
163 static __inline__
void save_last_tsc(struct ust_buffer
*ltt_buf
,
166 ltt_buf
->last_tsc
= (unsigned long)tsc
;
169 static __inline__
int last_tsc_overflow(struct ust_buffer
*ltt_buf
,
172 if (unlikely((tsc
- ltt_buf
->last_tsc
) >> LTT_TSC_BITS
))
179 static __inline__
void ltt_reserve_push_reader(
180 struct ust_channel
*rchan
,
181 struct ust_buffer
*buf
,
184 long consumed_old
, consumed_new
;
187 consumed_old
= atomic_long_read(&buf
->consumed
);
189 * If buffer is in overwrite mode, push the reader consumed
190 * count if the write position has reached it and we are not
191 * at the first iteration (don't push the reader farther than
192 * the writer). This operation can be done concurrently by many
193 * writers in the same buffer, the writer being at the farthest
194 * write position sub-buffer index in the buffer being the one
195 * which will win this loop.
196 * If the buffer is not in overwrite mode, pushing the reader
197 * only happens if a sub-buffer is corrupted.
199 if (unlikely((SUBBUF_TRUNC(offset
, buf
->chan
)
200 - SUBBUF_TRUNC(consumed_old
, buf
->chan
))
201 >= rchan
->alloc_size
))
202 consumed_new
= SUBBUF_ALIGN(consumed_old
, buf
->chan
);
205 } while (unlikely(atomic_long_cmpxchg(&buf
->consumed
, consumed_old
,
206 consumed_new
) != consumed_old
));
209 static __inline__
void ltt_vmcore_check_deliver(
210 struct ust_buffer
*buf
,
211 long commit_count
, long idx
)
213 local_set(&buf
->commit_seq
[idx
], commit_count
);
216 static __inline__
void ltt_check_deliver(struct ust_channel
*chan
,
217 struct ust_buffer
*buf
,
218 long offset
, long commit_count
, long idx
)
220 long old_commit_count
= commit_count
- chan
->subbuf_size
;
222 /* Check if all commits have been done */
223 if (unlikely((BUFFER_TRUNC(offset
, chan
)
224 >> chan
->n_subbufs_order
)
226 & chan
->commit_count_mask
) == 0)) {
228 * If we succeeded in updating the cc_sb, we are delivering
229 * the subbuffer. Deals with concurrent updates of the "cc"
230 * value without adding a add_return atomic operation to the
233 if (likely(local_cmpxchg(&buf
->commit_count
[idx
].cc_sb
,
234 old_commit_count
, commit_count
)
235 == old_commit_count
)) {
239 * Set noref flag for this subbuffer.
241 //ust// ltt_set_noref_flag(rchan, buf, idx);
242 ltt_vmcore_check_deliver(buf
, commit_count
, idx
);
244 /* wakeup consumer */
245 result
= write(buf
->data_ready_fd_write
, "1", 1);
247 PERROR("write (in ltt_relay_buffer_flush)");
248 ERR("this should never happen!");
254 static __inline__
int ltt_poll_deliver(struct ust_channel
*chan
, struct ust_buffer
*buf
)
256 long consumed_old
, consumed_idx
, commit_count
, write_offset
;
258 consumed_old
= atomic_long_read(&buf
->consumed
);
259 consumed_idx
= SUBBUF_INDEX(consumed_old
, buf
->chan
);
260 commit_count
= local_read(&buf
->commit_count
[consumed_idx
].cc_sb
);
262 * No memory barrier here, since we are only interested
263 * in a statistically correct polling result. The next poll will
264 * get the data is we are racing. The mb() that ensures correct
265 * memory order is in get_subbuf.
267 write_offset
= local_read(&buf
->offset
);
270 * Check that the subbuffer we are trying to consume has been
271 * already fully committed.
274 if (((commit_count
- chan
->subbuf_size
)
275 & chan
->commit_count_mask
)
276 - (BUFFER_TRUNC(consumed_old
, buf
->chan
)
277 >> chan
->n_subbufs_order
)
282 * Check that we are not about to read the same subbuffer in
283 * which the writer head is.
285 if ((SUBBUF_TRUNC(write_offset
, buf
->chan
)
286 - SUBBUF_TRUNC(consumed_old
, buf
->chan
))
295 * returns 0 if reserve ok, or 1 if the slow path must be taken.
297 static __inline__
int ltt_relay_try_reserve(
298 struct ust_channel
*chan
,
299 struct ust_buffer
*buf
,
301 u64
*tsc
, unsigned int *rflags
, int largest_align
,
302 long *o_begin
, long *o_end
, long *o_old
,
303 size_t *before_hdr_pad
, size_t *size
)
305 *o_begin
= local_read(&buf
->offset
);
308 *tsc
= trace_clock_read64();
310 //ust// #ifdef CONFIG_LTT_VMCORE
311 //ust// prefetch(&buf->commit_count[SUBBUF_INDEX(*o_begin, rchan)]);
312 //ust// prefetch(&buf->commit_seq[SUBBUF_INDEX(*o_begin, rchan)]);
314 //ust// prefetchw(&buf->commit_count[SUBBUF_INDEX(*o_begin, rchan)]);
316 if (last_tsc_overflow(buf
, *tsc
))
317 *rflags
= LTT_RFLAG_ID_SIZE_TSC
;
319 if (unlikely(SUBBUF_OFFSET(*o_begin
, buf
->chan
) == 0))
322 *size
= ust_get_header_size(chan
,
324 before_hdr_pad
, *rflags
);
325 *size
+= ltt_align(*o_begin
+ *size
, largest_align
) + data_size
;
326 if (unlikely((SUBBUF_OFFSET(*o_begin
, buf
->chan
) + *size
)
327 > buf
->chan
->subbuf_size
))
331 * Event fits in the current buffer and we are not on a switch
332 * boundary. It's safe to write.
334 *o_end
= *o_begin
+ *size
;
336 if (unlikely((SUBBUF_OFFSET(*o_end
, buf
->chan
)) == 0))
338 * The offset_end will fall at the very beginning of the next
346 static __inline__
int ltt_reserve_slot(struct ust_trace
*trace
,
347 struct ust_channel
*chan
, void **transport_data
,
348 size_t data_size
, size_t *slot_size
, long *buf_offset
, u64
*tsc
,
349 unsigned int *rflags
, int largest_align
, int cpu
)
351 struct ust_buffer
*buf
= chan
->buf
[cpu
];
352 long o_begin
, o_end
, o_old
;
353 size_t before_hdr_pad
;
356 * Perform retryable operations.
358 /* FIXME: make this rellay per cpu? */
359 if (unlikely(LOAD_SHARED(ltt_nesting
) > 4)) {
360 DBG("Dropping event because nesting is too deep.");
361 local_inc(&buf
->events_lost
);
365 if (unlikely(ltt_relay_try_reserve(chan
, buf
,
366 data_size
, tsc
, rflags
,
367 largest_align
, &o_begin
, &o_end
, &o_old
,
368 &before_hdr_pad
, slot_size
)))
371 if (unlikely(local_cmpxchg(&buf
->offset
, o_old
, o_end
) != o_old
))
375 * Atomically update last_tsc. This update races against concurrent
376 * atomic updates, but the race will always cause supplementary full TSC
377 * events, never the opposite (missing a full TSC event when it would be
380 save_last_tsc(buf
, *tsc
);
383 * Push the reader if necessary
385 ltt_reserve_push_reader(chan
, buf
, o_end
- 1);
388 * Clear noref flag for this subbuffer.
390 //ust// ltt_clear_noref_flag(chan, buf, SUBBUF_INDEX(o_end - 1, chan));
392 *buf_offset
= o_begin
+ before_hdr_pad
;
395 return ltt_reserve_slot_lockless_slow(trace
, chan
,
396 transport_data
, data_size
, slot_size
, buf_offset
, tsc
,
397 rflags
, largest_align
, cpu
);
401 * Force a sub-buffer switch for a per-cpu buffer. This operation is
402 * completely reentrant : can be called while tracing is active with
403 * absolutely no lock held.
405 * Note, however, that as a local_cmpxchg is used for some atomic
406 * operations, this function must be called from the CPU which owns the buffer
407 * for a ACTIVE flush.
409 static __inline__
void ltt_force_switch(struct ust_buffer
*buf
,
410 enum force_switch_mode mode
)
412 return ltt_force_switch_lockless_slow(buf
, mode
);
416 * for flight recording. must be called after relay_commit.
417 * This function increments the subbuffers's commit_seq counter each time the
418 * commit count reaches back the reserve offset (module subbuffer size). It is
419 * useful for crash dump.
421 //ust// #ifdef CONFIG_LTT_VMCORE
422 static __inline__
void ltt_write_commit_counter(struct ust_channel
*chan
,
423 struct ust_buffer
*buf
, long idx
, long buf_offset
,
424 long commit_count
, size_t data_size
)
429 offset
= buf_offset
+ data_size
;
432 * SUBBUF_OFFSET includes commit_count_mask. We can simply
433 * compare the offsets within the subbuffer without caring about
434 * buffer full/empty mismatch because offset is never zero here
435 * (subbuffer header and event headers have non-zero length).
437 if (unlikely(SUBBUF_OFFSET(offset
- commit_count
, buf
->chan
)))
440 commit_seq_old
= local_read(&buf
->commit_seq
[idx
]);
441 while (commit_seq_old
< commit_count
)
442 commit_seq_old
= local_cmpxchg(&buf
->commit_seq
[idx
],
443 commit_seq_old
, commit_count
);
445 DBG("commit_seq for channel %s_%d, subbuf %ld is now %ld", buf
->chan
->channel_name
, buf
->cpu
, idx
, commit_count
);
448 //ust// static __inline__ void ltt_write_commit_counter(struct ust_buffer *buf,
449 //ust// long idx, long buf_offset, long commit_count, size_t data_size)
455 * Atomic unordered slot commit. Increments the commit count in the
456 * specified sub-buffer, and delivers it if necessary.
460 * @ltt_channel : channel structure
461 * @transport_data: transport-specific data
462 * @buf_offset : offset following the event header.
463 * @data_size : size of the event data.
464 * @slot_size : size of the reserved slot.
466 static __inline__
void ltt_commit_slot(
467 struct ust_channel
*chan
,
468 struct ust_buffer
*buf
, long buf_offset
,
469 size_t data_size
, size_t slot_size
)
471 long offset_end
= buf_offset
;
472 long endidx
= SUBBUF_INDEX(offset_end
- 1, chan
);
475 #ifdef LTT_NO_IPI_BARRIER
479 * Must write slot data before incrementing commit count.
480 * This compiler barrier is upgraded into a smp_mb() by the IPI
481 * sent by get_subbuf().
485 local_add(slot_size
, &buf
->commit_count
[endidx
].cc
);
487 * commit count read can race with concurrent OOO commit count updates.
488 * This is only needed for ltt_check_deliver (for non-polling delivery
489 * only) and for ltt_write_commit_counter. The race can only cause the
490 * counter to be read with the same value more than once, which could
492 * - Multiple delivery for the same sub-buffer (which is handled
493 * gracefully by the reader code) if the value is for a full
494 * sub-buffer. It's important that we can never miss a sub-buffer
495 * delivery. Re-reading the value after the local_add ensures this.
496 * - Reading a commit_count with a higher value that what was actually
497 * added to it for the ltt_write_commit_counter call (again caused by
498 * a concurrent committer). It does not matter, because this function
499 * is interested in the fact that the commit count reaches back the
500 * reserve offset for a specific sub-buffer, which is completely
501 * independent of the order.
503 commit_count
= local_read(&buf
->commit_count
[endidx
].cc
);
505 ltt_check_deliver(chan
, buf
, offset_end
- 1, commit_count
, endidx
);
507 * Update data_size for each commit. It's needed only for extracting
508 * ltt buffers from vmcore, after crash.
510 ltt_write_commit_counter(chan
, buf
, endidx
, buf_offset
, commit_count
, data_size
);
513 void _ust_buffers_write(struct ust_buffer
*buf
, size_t offset
,
514 const void *src
, size_t len
, ssize_t cpy
);
516 static __inline__
int ust_buffers_write(struct ust_buffer
*buf
, size_t offset
,
517 const void *src
, size_t len
)
520 size_t buf_offset
= BUFFER_OFFSET(offset
, buf
->chan
);
522 assert(buf_offset
< buf
->chan
->subbuf_size
*buf
->chan
->subbuf_cnt
);
524 cpy
= min_t(size_t, len
, buf
->buf_size
- buf_offset
);
525 ust_buffers_do_copy(buf
->buf_data
+ buf_offset
, src
, cpy
);
527 if (unlikely(len
!= cpy
))
528 _ust_buffers_write(buf
, buf_offset
, src
, len
, cpy
);
532 int ust_buffers_get_subbuf(struct ust_buffer
*buf
, long *consumed
);
533 int ust_buffers_put_subbuf(struct ust_buffer
*buf
, unsigned long uconsumed_old
);
535 #endif /* _UST_BUFFERS_H */