add debug message when events are lost in a subbuffer, on buffer_end
[lttng-ust.git] / libust / buffers.h
CommitLineData
b5b073e2
PMF
1/*
2 * buffers.h
3 *
4 * Copyright (C) 2009 - Pierre-Marc Fournier (pierre-marc dot fournier at polymtl dot ca)
5 * Copyright (C) 2008 - Mathieu Desnoyers (mathieu.desnoyers@polymtl.ca)
6 *
7 */
8
9#ifndef _UST_BUFFERS_H
10#define _UST_BUFFERS_H
11
12#include <kcompat/kref.h>
13#include <assert.h>
14#include "channels.h"
b73a4c47
PMF
15#include "tracerconst.h"
16#include "tracercore.h"
17#include "header-inline.h"
18#include <usterr.h>
b5b073e2 19
b73a4c47 20/***** SHOULD BE REMOVED ***** */
b5b073e2
PMF
21
22/*
23 * BUFFER_TRUNC zeroes the subbuffer offset and the subbuffer number parts of
24 * the offset, which leaves only the buffer number.
25 */
26#define BUFFER_TRUNC(offset, chan) \
27 ((offset) & (~((chan)->alloc_size-1)))
28#define BUFFER_OFFSET(offset, chan) ((offset) & ((chan)->alloc_size - 1))
29#define SUBBUF_OFFSET(offset, chan) ((offset) & ((chan)->subbuf_size - 1))
30#define SUBBUF_ALIGN(offset, chan) \
31 (((offset) + (chan)->subbuf_size) & (~((chan)->subbuf_size - 1)))
32#define SUBBUF_TRUNC(offset, chan) \
33 ((offset) & (~((chan)->subbuf_size - 1)))
34#define SUBBUF_INDEX(offset, chan) \
35 (BUFFER_OFFSET((offset), chan) >> (chan)->subbuf_size_order)
36
37/*
38 * Tracks changes to rchan/rchan_buf structs
39 */
40#define UST_CHANNEL_VERSION 8
41
b73a4c47
PMF
42/**************************************/
43
44struct commit_counters {
45 local_t cc;
46 local_t cc_sb; /* Incremented _once_ at sb switch */
47};
48
b5b073e2
PMF
49struct ust_buffer {
50 /* First 32 bytes cache-hot cacheline */
51 local_t offset; /* Current offset in the buffer */
b73a4c47 52 struct commit_counters *commit_count; /* Commit count per sub-buffer */
b5b073e2
PMF
53 atomic_long_t consumed; /*
54 * Current offset in the buffer
55 * standard atomic access (shared)
56 */
57 unsigned long last_tsc; /*
58 * Last timestamp written in the buffer.
59 */
60 /* End of first 32 bytes cacheline */
61 atomic_long_t active_readers; /*
62 * Active readers count
63 * standard atomic access (shared)
64 */
65 local_t events_lost;
66 local_t corrupted_subbuffers;
67 /* one byte is written to this pipe when data is available, in order
68 to wake the consumer */
69 /* portability: Single byte writes must be as quick as possible. The kernel-side
70 buffer must be large enough so the writer doesn't block. From the pipe(7)
71 man page: Since linux 2.6.11, the pipe capacity is 65536 bytes. */
72 int data_ready_fd_write;
73 /* the reading end of the pipe */
74 int data_ready_fd_read;
75
b73a4c47
PMF
76 unsigned int finalized;
77//ust// struct timer_list switch_timer; /* timer for periodical switch */
78 unsigned long switch_timer_interval; /* 0 = unset */
79
b5b073e2 80 struct ust_channel *chan;
b73a4c47 81
b5b073e2
PMF
82 struct kref kref;
83 void *buf_data;
84 size_t buf_size;
85 int shmid;
204141ee 86 unsigned int cpu;
b5b073e2
PMF
87
88 /* commit count per subbuffer; must be at end of struct */
89 local_t commit_seq[0] ____cacheline_aligned;
90} ____cacheline_aligned;
91
b5b073e2 92/*
b73a4c47
PMF
93 * A switch is done during tracing or as a final flush after tracing (so it
94 * won't write in the new sub-buffer).
95 * FIXME: make this message clearer
b5b073e2 96 */
b73a4c47
PMF
97enum force_switch_mode { FORCE_ACTIVE, FORCE_FLUSH };
98
99extern int ltt_reserve_slot_lockless_slow(struct ust_trace *trace,
100 struct ust_channel *ltt_channel, void **transport_data,
101 size_t data_size, size_t *slot_size, long *buf_offset, u64 *tsc,
102 unsigned int *rflags, int largest_align, int cpu);
103
104extern void ltt_force_switch_lockless_slow(struct ust_buffer *buf,
105 enum force_switch_mode mode);
106
b5b073e2 107
a2fd50ef 108static __inline__ void ust_buffers_do_copy(void *dest, const void *src, size_t len)
b5b073e2
PMF
109{
110 union {
111 const void *src;
112 const u8 *src8;
113 const u16 *src16;
114 const u32 *src32;
115 const u64 *src64;
116 } u = { .src = src };
117
118 switch (len) {
b73a4c47
PMF
119 case 0: break;
120 case 1: *(u8 *)dest = *u.src8;
b5b073e2 121 break;
b73a4c47 122 case 2: *(u16 *)dest = *u.src16;
b5b073e2 123 break;
b73a4c47 124 case 4: *(u32 *)dest = *u.src32;
b5b073e2 125 break;
b73a4c47 126 case 8: *(u64 *)dest = *u.src64;
b5b073e2
PMF
127 break;
128 default:
129 memcpy(dest, src, len);
130 }
131}
132
b73a4c47
PMF
133static __inline__ void *ust_buffers_offset_address(struct ust_buffer *buf, size_t offset)
134{
135 return ((char *)buf->buf_data)+offset;
136}
137
138/*
139 * Last TSC comparison functions. Check if the current TSC overflows
140 * LTT_TSC_BITS bits from the last TSC read. Reads and writes last_tsc
141 * atomically.
142 */
143
144/* FIXME: does this test work properly? */
145#if (BITS_PER_LONG == 32)
146static __inline__ void save_last_tsc(struct ust_buffer *ltt_buf,
147 u64 tsc)
148{
149 ltt_buf->last_tsc = (unsigned long)(tsc >> LTT_TSC_BITS);
150}
151
152static __inline__ int last_tsc_overflow(struct ust_buffer *ltt_buf,
153 u64 tsc)
154{
155 unsigned long tsc_shifted = (unsigned long)(tsc >> LTT_TSC_BITS);
156
157 if (unlikely((tsc_shifted - ltt_buf->last_tsc)))
158 return 1;
159 else
160 return 0;
161}
162#else
163static __inline__ void save_last_tsc(struct ust_buffer *ltt_buf,
164 u64 tsc)
165{
166 ltt_buf->last_tsc = (unsigned long)tsc;
167}
168
169static __inline__ int last_tsc_overflow(struct ust_buffer *ltt_buf,
170 u64 tsc)
171{
172 if (unlikely((tsc - ltt_buf->last_tsc) >> LTT_TSC_BITS))
173 return 1;
174 else
175 return 0;
176}
177#endif
178
179static __inline__ void ltt_reserve_push_reader(
180 struct ust_channel *rchan,
181 struct ust_buffer *buf,
182 long offset)
183{
184 long consumed_old, consumed_new;
185
186 do {
187 consumed_old = atomic_long_read(&buf->consumed);
188 /*
189 * If buffer is in overwrite mode, push the reader consumed
190 * count if the write position has reached it and we are not
191 * at the first iteration (don't push the reader farther than
192 * the writer). This operation can be done concurrently by many
193 * writers in the same buffer, the writer being at the farthest
194 * write position sub-buffer index in the buffer being the one
195 * which will win this loop.
196 * If the buffer is not in overwrite mode, pushing the reader
197 * only happens if a sub-buffer is corrupted.
198 */
199 if (unlikely((SUBBUF_TRUNC(offset, buf->chan)
200 - SUBBUF_TRUNC(consumed_old, buf->chan))
201 >= rchan->alloc_size))
202 consumed_new = SUBBUF_ALIGN(consumed_old, buf->chan);
203 else
204 return;
205 } while (unlikely(atomic_long_cmpxchg(&buf->consumed, consumed_old,
206 consumed_new) != consumed_old));
207}
208
209static __inline__ void ltt_vmcore_check_deliver(
210 struct ust_buffer *buf,
211 long commit_count, long idx)
212{
213 local_set(&buf->commit_seq[idx], commit_count);
214}
215
216static __inline__ void ltt_check_deliver(struct ust_channel *chan,
217 struct ust_buffer *buf,
218 long offset, long commit_count, long idx)
219{
220 long old_commit_count = commit_count - chan->subbuf_size;
221
222 /* Check if all commits have been done */
223 if (unlikely((BUFFER_TRUNC(offset, chan)
224 >> chan->n_subbufs_order)
225 - (old_commit_count
226 & chan->commit_count_mask) == 0)) {
227 /*
228 * If we succeeded in updating the cc_sb, we are delivering
229 * the subbuffer. Deals with concurrent updates of the "cc"
230 * value without adding a add_return atomic operation to the
231 * fast path.
232 */
233 if (likely(local_cmpxchg(&buf->commit_count[idx].cc_sb,
234 old_commit_count, commit_count)
235 == old_commit_count)) {
236 int result;
237
238 /*
239 * Set noref flag for this subbuffer.
240 */
241//ust// ltt_set_noref_flag(rchan, buf, idx);
242 ltt_vmcore_check_deliver(buf, commit_count, idx);
243
244 /* wakeup consumer */
245 result = write(buf->data_ready_fd_write, "1", 1);
246 if(result == -1) {
247 PERROR("write (in ltt_relay_buffer_flush)");
248 ERR("this should never happen!");
249 }
250 }
251 }
252}
253
254static __inline__ int ltt_poll_deliver(struct ust_channel *chan, struct ust_buffer *buf)
255{
256 long consumed_old, consumed_idx, commit_count, write_offset;
257
258 consumed_old = atomic_long_read(&buf->consumed);
259 consumed_idx = SUBBUF_INDEX(consumed_old, buf->chan);
260 commit_count = local_read(&buf->commit_count[consumed_idx].cc_sb);
261 /*
262 * No memory barrier here, since we are only interested
263 * in a statistically correct polling result. The next poll will
264 * get the data is we are racing. The mb() that ensures correct
265 * memory order is in get_subbuf.
266 */
267 write_offset = local_read(&buf->offset);
268
269 /*
270 * Check that the subbuffer we are trying to consume has been
271 * already fully committed.
272 */
273
274 if (((commit_count - chan->subbuf_size)
275 & chan->commit_count_mask)
276 - (BUFFER_TRUNC(consumed_old, buf->chan)
277 >> chan->n_subbufs_order)
278 != 0)
279 return 0;
280
281 /*
282 * Check that we are not about to read the same subbuffer in
283 * which the writer head is.
284 */
285 if ((SUBBUF_TRUNC(write_offset, buf->chan)
286 - SUBBUF_TRUNC(consumed_old, buf->chan))
287 == 0)
288 return 0;
289
290 return 1;
291
292}
293
294/*
295 * returns 0 if reserve ok, or 1 if the slow path must be taken.
296 */
297static __inline__ int ltt_relay_try_reserve(
298 struct ust_channel *chan,
299 struct ust_buffer *buf,
300 size_t data_size,
301 u64 *tsc, unsigned int *rflags, int largest_align,
302 long *o_begin, long *o_end, long *o_old,
303 size_t *before_hdr_pad, size_t *size)
304{
305 *o_begin = local_read(&buf->offset);
306 *o_old = *o_begin;
307
308 *tsc = trace_clock_read64();
309
310//ust// #ifdef CONFIG_LTT_VMCORE
311//ust// prefetch(&buf->commit_count[SUBBUF_INDEX(*o_begin, rchan)]);
312//ust// prefetch(&buf->commit_seq[SUBBUF_INDEX(*o_begin, rchan)]);
313//ust// #else
314//ust// prefetchw(&buf->commit_count[SUBBUF_INDEX(*o_begin, rchan)]);
315//ust// #endif
316 if (last_tsc_overflow(buf, *tsc))
317 *rflags = LTT_RFLAG_ID_SIZE_TSC;
318
319 if (unlikely(SUBBUF_OFFSET(*o_begin, buf->chan) == 0))
320 return 1;
321
322 *size = ust_get_header_size(chan,
323 *o_begin, data_size,
324 before_hdr_pad, *rflags);
325 *size += ltt_align(*o_begin + *size, largest_align) + data_size;
326 if (unlikely((SUBBUF_OFFSET(*o_begin, buf->chan) + *size)
327 > buf->chan->subbuf_size))
328 return 1;
329
330 /*
331 * Event fits in the current buffer and we are not on a switch
332 * boundary. It's safe to write.
333 */
334 *o_end = *o_begin + *size;
335
336 if (unlikely((SUBBUF_OFFSET(*o_end, buf->chan)) == 0))
337 /*
338 * The offset_end will fall at the very beginning of the next
339 * subbuffer.
340 */
341 return 1;
342
343 return 0;
344}
345
346static __inline__ int ltt_reserve_slot(struct ust_trace *trace,
347 struct ust_channel *chan, void **transport_data,
348 size_t data_size, size_t *slot_size, long *buf_offset, u64 *tsc,
349 unsigned int *rflags, int largest_align, int cpu)
350{
351 struct ust_buffer *buf = chan->buf[cpu];
352 long o_begin, o_end, o_old;
353 size_t before_hdr_pad;
354
355 /*
356 * Perform retryable operations.
357 */
358 /* FIXME: make this rellay per cpu? */
359 if (unlikely(__get_cpu_var(ltt_nesting) > 4)) {
360 local_inc(&buf->events_lost);
361 return -EPERM;
362 }
363
364 if (unlikely(ltt_relay_try_reserve(chan, buf,
365 data_size, tsc, rflags,
366 largest_align, &o_begin, &o_end, &o_old,
367 &before_hdr_pad, slot_size)))
368 goto slow_path;
369
370 if (unlikely(local_cmpxchg(&buf->offset, o_old, o_end) != o_old))
371 goto slow_path;
372
373 /*
374 * Atomically update last_tsc. This update races against concurrent
375 * atomic updates, but the race will always cause supplementary full TSC
376 * events, never the opposite (missing a full TSC event when it would be
377 * needed).
378 */
379 save_last_tsc(buf, *tsc);
380
381 /*
382 * Push the reader if necessary
383 */
384 ltt_reserve_push_reader(chan, buf, o_end - 1);
385
386 /*
387 * Clear noref flag for this subbuffer.
388 */
389//ust// ltt_clear_noref_flag(chan, buf, SUBBUF_INDEX(o_end - 1, chan));
390
391 *buf_offset = o_begin + before_hdr_pad;
392 return 0;
393slow_path:
394 return ltt_reserve_slot_lockless_slow(trace, chan,
395 transport_data, data_size, slot_size, buf_offset, tsc,
396 rflags, largest_align, cpu);
397}
398
399/*
400 * Force a sub-buffer switch for a per-cpu buffer. This operation is
401 * completely reentrant : can be called while tracing is active with
402 * absolutely no lock held.
403 *
404 * Note, however, that as a local_cmpxchg is used for some atomic
405 * operations, this function must be called from the CPU which owns the buffer
406 * for a ACTIVE flush.
407 */
408static __inline__ void ltt_force_switch(struct ust_buffer *buf,
409 enum force_switch_mode mode)
410{
411 return ltt_force_switch_lockless_slow(buf, mode);
412}
413
414/*
415 * for flight recording. must be called after relay_commit.
8c36d1ee
PMF
416 * This function increments the subbuffers's commit_seq counter each time the
417 * commit count reaches back the reserve offset (module subbuffer size). It is
418 * useful for crash dump.
b73a4c47 419 */
1e8c9e7b
PMF
420//ust// #ifdef CONFIG_LTT_VMCORE
421static __inline__ void ltt_write_commit_counter(struct ust_channel *chan,
422 struct ust_buffer *buf, long idx, long buf_offset,
423 long commit_count, size_t data_size)
b73a4c47
PMF
424{
425 long offset;
426 long commit_seq_old;
427
428 offset = buf_offset + data_size;
429
430 /*
431 * SUBBUF_OFFSET includes commit_count_mask. We can simply
432 * compare the offsets within the subbuffer without caring about
433 * buffer full/empty mismatch because offset is never zero here
434 * (subbuffer header and event headers have non-zero length).
435 */
436 if (unlikely(SUBBUF_OFFSET(offset - commit_count, buf->chan)))
437 return;
438
1e8c9e7b 439 commit_seq_old = local_read(&buf->commit_seq[idx]);
b73a4c47 440 while (commit_seq_old < commit_count)
1e8c9e7b 441 commit_seq_old = local_cmpxchg(&buf->commit_seq[idx],
b73a4c47 442 commit_seq_old, commit_count);
1e8c9e7b
PMF
443
444 DBG("commit_seq for channel %s_%d, subbuf %ld is now %ld", buf->chan->channel_name, buf->cpu, idx, commit_count);
b73a4c47 445}
1e8c9e7b
PMF
446//ust// #else
447//ust// static __inline__ void ltt_write_commit_counter(struct ust_buffer *buf,
448//ust// long idx, long buf_offset, long commit_count, size_t data_size)
449//ust// {
450//ust// }
451//ust// #endif
b73a4c47
PMF
452
453/*
454 * Atomic unordered slot commit. Increments the commit count in the
455 * specified sub-buffer, and delivers it if necessary.
456 *
457 * Parameters:
458 *
459 * @ltt_channel : channel structure
460 * @transport_data: transport-specific data
461 * @buf_offset : offset following the event header.
462 * @data_size : size of the event data.
463 * @slot_size : size of the reserved slot.
464 */
465static __inline__ void ltt_commit_slot(
466 struct ust_channel *chan,
467 struct ust_buffer *buf, long buf_offset,
468 size_t data_size, size_t slot_size)
469{
470 long offset_end = buf_offset;
471 long endidx = SUBBUF_INDEX(offset_end - 1, chan);
472 long commit_count;
473
474#ifdef LTT_NO_IPI_BARRIER
475 smp_wmb();
476#else
477 /*
478 * Must write slot data before incrementing commit count.
479 * This compiler barrier is upgraded into a smp_mb() by the IPI
480 * sent by get_subbuf().
481 */
482 barrier();
483#endif
484 local_add(slot_size, &buf->commit_count[endidx].cc);
485 /*
486 * commit count read can race with concurrent OOO commit count updates.
487 * This is only needed for ltt_check_deliver (for non-polling delivery
488 * only) and for ltt_write_commit_counter. The race can only cause the
489 * counter to be read with the same value more than once, which could
490 * cause :
491 * - Multiple delivery for the same sub-buffer (which is handled
492 * gracefully by the reader code) if the value is for a full
493 * sub-buffer. It's important that we can never miss a sub-buffer
494 * delivery. Re-reading the value after the local_add ensures this.
495 * - Reading a commit_count with a higher value that what was actually
496 * added to it for the ltt_write_commit_counter call (again caused by
497 * a concurrent committer). It does not matter, because this function
498 * is interested in the fact that the commit count reaches back the
499 * reserve offset for a specific sub-buffer, which is completely
500 * independent of the order.
501 */
502 commit_count = local_read(&buf->commit_count[endidx].cc);
503
504 ltt_check_deliver(chan, buf, offset_end - 1, commit_count, endidx);
505 /*
8c36d1ee 506 * Update data_size for each commit. It's needed only for extracting
b73a4c47
PMF
507 * ltt buffers from vmcore, after crash.
508 */
1e8c9e7b 509 ltt_write_commit_counter(chan, buf, endidx, buf_offset, commit_count, data_size);
b73a4c47
PMF
510}
511
512void _ust_buffers_write(struct ust_buffer *buf, size_t offset,
513 const void *src, size_t len, ssize_t cpy);
514
a2fd50ef 515static __inline__ int ust_buffers_write(struct ust_buffer *buf, size_t offset,
b73a4c47 516 const void *src, size_t len)
b5b073e2
PMF
517{
518 size_t cpy;
519 size_t buf_offset = BUFFER_OFFSET(offset, buf->chan);
520
521 assert(buf_offset < buf->chan->subbuf_size*buf->chan->subbuf_cnt);
522
523 cpy = min_t(size_t, len, buf->buf_size - buf_offset);
524 ust_buffers_do_copy(buf->buf_data + buf_offset, src, cpy);
b73a4c47 525
b5b073e2
PMF
526 if (unlikely(len != cpy))
527 _ust_buffers_write(buf, buf_offset, src, len, cpy);
528 return len;
529}
530
b73a4c47
PMF
531int ust_buffers_get_subbuf(struct ust_buffer *buf, long *consumed);
532int ust_buffers_put_subbuf(struct ust_buffer *buf, unsigned long uconsumed_old);
b5b073e2
PMF
533
534#endif /* _UST_BUFFERS_H */
This page took 0.062144 seconds and 4 git commands to generate.