Re-write ustcomm parts of UST v2
[lttng-ust.git] / libust / buffers.h
1 /*
2 * buffers.h
3 * LTTng userspace tracer buffering system
4 *
5 * Copyright (C) 2009 - Pierre-Marc Fournier (pierre-marc dot fournier at polymtl dot ca)
6 * Copyright (C) 2008 - Mathieu Desnoyers (mathieu.desnoyers@polymtl.ca)
7 *
8 * This library is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License as published by the Free Software Foundation; either
11 * version 2.1 of the License, or (at your option) any later version.
12 *
13 * This library is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Lesser General Public License for more details.
17 *
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with this library; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21 */
22
23 #ifndef _UST_BUFFERS_H
24 #define _UST_BUFFERS_H
25
26 #include <assert.h>
27
28 #include <ust/core.h>
29
30 #include "usterr.h"
31 #include "channels.h"
32 #include "tracerconst.h"
33 #include "tracercore.h"
34 #include "header-inline.h"
35
36 /***** FIXME: SHOULD BE REMOVED ***** */
37
38 /*
39 * BUFFER_TRUNC zeroes the subbuffer offset and the subbuffer number parts of
40 * the offset, which leaves only the buffer number.
41 */
42 #define BUFFER_TRUNC(offset, chan) \
43 ((offset) & (~((chan)->alloc_size-1)))
44 #define BUFFER_OFFSET(offset, chan) ((offset) & ((chan)->alloc_size - 1))
45 #define SUBBUF_OFFSET(offset, chan) ((offset) & ((chan)->subbuf_size - 1))
46 #define SUBBUF_ALIGN(offset, chan) \
47 (((offset) + (chan)->subbuf_size) & (~((chan)->subbuf_size - 1)))
48 #define SUBBUF_TRUNC(offset, chan) \
49 ((offset) & (~((chan)->subbuf_size - 1)))
50 #define SUBBUF_INDEX(offset, chan) \
51 (BUFFER_OFFSET((offset), chan) >> (chan)->subbuf_size_order)
52
53 /*
54 * Tracks changes to rchan/rchan_buf structs
55 */
56 #define UST_CHANNEL_VERSION 8
57
58 /**************************************/
59
60 struct commit_counters {
61 long cc; /* ATOMIC */
62 long cc_sb; /* ATOMIC - Incremented _once_ at sb switch */
63 };
64
65 struct ust_buffer {
66 /* First 32 bytes cache-hot cacheline */
67 long offset; /* Current offset in the buffer *atomic* */
68 struct commit_counters *commit_count; /* Commit count per sub-buffer */
69 long consumed; /* Current offset in the buffer *atomic* access (shared) */
70 unsigned long last_tsc; /*
71 * Last timestamp written in the buffer.
72 */
73 /* End of first 32 bytes cacheline */
74 long active_readers; /* ATOMIC - Active readers count standard atomic access (shared) */
75 long events_lost; /* ATOMIC */
76 long corrupted_subbuffers; /* *ATOMIC* */
77 /* one byte is written to this pipe when data is available, in order
78 to wake the consumer */
79 /* portability: Single byte writes must be as quick as possible. The kernel-side
80 buffer must be large enough so the writer doesn't block. From the pipe(7)
81 man page: Since linux 2.6.11, the pipe capacity is 65536 bytes. */
82 int data_ready_fd_write;
83 /* the reading end of the pipe */
84 int data_ready_fd_read;
85 /*
86 * List of buffers with an open pipe, used for fork and forced subbuffer
87 * switch.
88 */
89 struct list_head open_buffers_list;
90
91 unsigned int finalized;
92 //ust// struct timer_list switch_timer; /* timer for periodical switch */
93 unsigned long switch_timer_interval; /* 0 = unset */
94
95 struct ust_channel *chan;
96
97 struct kref kref;
98 void *buf_data;
99 size_t buf_size;
100 int shmid;
101 unsigned int cpu;
102
103 /* commit count per subbuffer; must be at end of struct */
104 long commit_seq[0]; /* ATOMIC */
105 } ____cacheline_aligned;
106
107 /*
108 * A switch is done during tracing or as a final flush after tracing (so it
109 * won't write in the new sub-buffer).
110 * FIXME: make this message clearer
111 */
112 enum force_switch_mode { FORCE_ACTIVE, FORCE_FLUSH };
113
114 extern int ltt_reserve_slot_lockless_slow(struct ust_channel *chan,
115 struct ust_trace *trace, size_t data_size,
116 int largest_align, int cpu,
117 struct ust_buffer **ret_buf,
118 size_t *slot_size, long *buf_offset,
119 u64 *tsc, unsigned int *rflags);
120
121 extern void ltt_force_switch_lockless_slow(struct ust_buffer *buf,
122 enum force_switch_mode mode);
123
124
125 static __inline__ void ust_buffers_do_copy(void *dest, const void *src, size_t len)
126 {
127 union {
128 const void *src;
129 const u8 *src8;
130 const u16 *src16;
131 const u32 *src32;
132 const u64 *src64;
133 } u = { .src = src };
134
135 switch (len) {
136 case 0: break;
137 case 1: *(u8 *)dest = *u.src8;
138 break;
139 case 2: *(u16 *)dest = *u.src16;
140 break;
141 case 4: *(u32 *)dest = *u.src32;
142 break;
143 case 8: *(u64 *)dest = *u.src64;
144 break;
145 default:
146 memcpy(dest, src, len);
147 }
148 }
149
150 static __inline__ void *ust_buffers_offset_address(struct ust_buffer *buf, size_t offset)
151 {
152 return ((char *)buf->buf_data)+offset;
153 }
154
155 /*
156 * Last TSC comparison functions. Check if the current TSC overflows
157 * LTT_TSC_BITS bits from the last TSC read. Reads and writes last_tsc
158 * atomically.
159 */
160
161 /* FIXME: does this test work properly? */
162 #if (BITS_PER_LONG == 32)
163 static __inline__ void save_last_tsc(struct ust_buffer *ltt_buf,
164 u64 tsc)
165 {
166 ltt_buf->last_tsc = (unsigned long)(tsc >> LTT_TSC_BITS);
167 }
168
169 static __inline__ int last_tsc_overflow(struct ust_buffer *ltt_buf,
170 u64 tsc)
171 {
172 unsigned long tsc_shifted = (unsigned long)(tsc >> LTT_TSC_BITS);
173
174 if (unlikely((tsc_shifted - ltt_buf->last_tsc)))
175 return 1;
176 else
177 return 0;
178 }
179 #else
180 static __inline__ void save_last_tsc(struct ust_buffer *ltt_buf,
181 u64 tsc)
182 {
183 ltt_buf->last_tsc = (unsigned long)tsc;
184 }
185
186 static __inline__ int last_tsc_overflow(struct ust_buffer *ltt_buf,
187 u64 tsc)
188 {
189 if (unlikely((tsc - ltt_buf->last_tsc) >> LTT_TSC_BITS))
190 return 1;
191 else
192 return 0;
193 }
194 #endif
195
196 static __inline__ void ltt_reserve_push_reader(
197 struct ust_channel *rchan,
198 struct ust_buffer *buf,
199 long offset)
200 {
201 long consumed_old, consumed_new;
202
203 do {
204 consumed_old = uatomic_read(&buf->consumed);
205 /*
206 * If buffer is in overwrite mode, push the reader consumed
207 * count if the write position has reached it and we are not
208 * at the first iteration (don't push the reader farther than
209 * the writer). This operation can be done concurrently by many
210 * writers in the same buffer, the writer being at the farthest
211 * write position sub-buffer index in the buffer being the one
212 * which will win this loop.
213 * If the buffer is not in overwrite mode, pushing the reader
214 * only happens if a sub-buffer is corrupted.
215 */
216 if (unlikely((SUBBUF_TRUNC(offset, buf->chan)
217 - SUBBUF_TRUNC(consumed_old, buf->chan))
218 >= rchan->alloc_size))
219 consumed_new = SUBBUF_ALIGN(consumed_old, buf->chan);
220 else
221 return;
222 } while (unlikely(uatomic_cmpxchg(&buf->consumed, consumed_old,
223 consumed_new) != consumed_old));
224 }
225
226 static __inline__ void ltt_vmcore_check_deliver(
227 struct ust_buffer *buf,
228 long commit_count, long idx)
229 {
230 uatomic_set(&buf->commit_seq[idx], commit_count);
231 }
232
233 static __inline__ void ltt_check_deliver(struct ust_channel *chan,
234 struct ust_buffer *buf,
235 long offset, long commit_count, long idx)
236 {
237 long old_commit_count = commit_count - chan->subbuf_size;
238
239 /* Check if all commits have been done */
240 if (unlikely((BUFFER_TRUNC(offset, chan)
241 >> chan->n_subbufs_order)
242 - (old_commit_count
243 & chan->commit_count_mask) == 0)) {
244 /*
245 * If we succeeded in updating the cc_sb, we are delivering
246 * the subbuffer. Deals with concurrent updates of the "cc"
247 * value without adding a add_return atomic operation to the
248 * fast path.
249 */
250 if (likely(uatomic_cmpxchg(&buf->commit_count[idx].cc_sb,
251 old_commit_count, commit_count)
252 == old_commit_count)) {
253 int result;
254
255 /*
256 * Set noref flag for this subbuffer.
257 */
258 //ust// ltt_set_noref_flag(rchan, buf, idx);
259 ltt_vmcore_check_deliver(buf, commit_count, idx);
260
261 /* wakeup consumer */
262 result = write(buf->data_ready_fd_write, "1", 1);
263 if(result == -1) {
264 PERROR("write (in ltt_relay_buffer_flush)");
265 ERR("this should never happen!");
266 }
267 }
268 }
269 }
270
271 static __inline__ int ltt_poll_deliver(struct ust_channel *chan, struct ust_buffer *buf)
272 {
273 long consumed_old, consumed_idx, commit_count, write_offset;
274
275 consumed_old = uatomic_read(&buf->consumed);
276 consumed_idx = SUBBUF_INDEX(consumed_old, buf->chan);
277 commit_count = uatomic_read(&buf->commit_count[consumed_idx].cc_sb);
278 /*
279 * No memory barrier here, since we are only interested
280 * in a statistically correct polling result. The next poll will
281 * get the data is we are racing. The mb() that ensures correct
282 * memory order is in get_subbuf.
283 */
284 write_offset = uatomic_read(&buf->offset);
285
286 /*
287 * Check that the subbuffer we are trying to consume has been
288 * already fully committed.
289 */
290
291 if (((commit_count - chan->subbuf_size)
292 & chan->commit_count_mask)
293 - (BUFFER_TRUNC(consumed_old, buf->chan)
294 >> chan->n_subbufs_order)
295 != 0)
296 return 0;
297
298 /*
299 * Check that we are not about to read the same subbuffer in
300 * which the writer head is.
301 */
302 if ((SUBBUF_TRUNC(write_offset, buf->chan)
303 - SUBBUF_TRUNC(consumed_old, buf->chan))
304 == 0)
305 return 0;
306
307 return 1;
308
309 }
310
311 /*
312 * returns 0 if reserve ok, or 1 if the slow path must be taken.
313 */
314 static __inline__ int ltt_relay_try_reserve(
315 struct ust_channel *chan,
316 struct ust_buffer *buf,
317 size_t data_size,
318 u64 *tsc, unsigned int *rflags, int largest_align,
319 long *o_begin, long *o_end, long *o_old,
320 size_t *before_hdr_pad, size_t *size)
321 {
322 *o_begin = uatomic_read(&buf->offset);
323 *o_old = *o_begin;
324
325 *tsc = trace_clock_read64();
326
327 //ust// #ifdef CONFIG_LTT_VMCORE
328 //ust// prefetch(&buf->commit_count[SUBBUF_INDEX(*o_begin, rchan)]);
329 //ust// prefetch(&buf->commit_seq[SUBBUF_INDEX(*o_begin, rchan)]);
330 //ust// #else
331 //ust// prefetchw(&buf->commit_count[SUBBUF_INDEX(*o_begin, rchan)]);
332 //ust// #endif
333 if (last_tsc_overflow(buf, *tsc))
334 *rflags = LTT_RFLAG_ID_SIZE_TSC;
335
336 if (unlikely(SUBBUF_OFFSET(*o_begin, buf->chan) == 0))
337 return 1;
338
339 *size = ust_get_header_size(chan,
340 *o_begin, data_size,
341 before_hdr_pad, *rflags);
342 *size += ltt_align(*o_begin + *size, largest_align) + data_size;
343 if (unlikely((SUBBUF_OFFSET(*o_begin, buf->chan) + *size)
344 > buf->chan->subbuf_size))
345 return 1;
346
347 /*
348 * Event fits in the current buffer and we are not on a switch
349 * boundary. It's safe to write.
350 */
351 *o_end = *o_begin + *size;
352
353 if (unlikely((SUBBUF_OFFSET(*o_end, buf->chan)) == 0))
354 /*
355 * The offset_end will fall at the very beginning of the next
356 * subbuffer.
357 */
358 return 1;
359
360 return 0;
361 }
362
363 static __inline__ int ltt_reserve_slot(struct ust_channel *chan,
364 struct ust_trace *trace, size_t data_size,
365 int largest_align, int cpu,
366 struct ust_buffer **ret_buf,
367 size_t *slot_size, long *buf_offset, u64 *tsc,
368 unsigned int *rflags)
369 {
370 struct ust_buffer *buf = *ret_buf = chan->buf[cpu];
371 long o_begin, o_end, o_old;
372 size_t before_hdr_pad;
373
374 /*
375 * Perform retryable operations.
376 */
377 /* FIXME: make this really per cpu? */
378 if (unlikely(LOAD_SHARED(ltt_nesting) > 4)) {
379 DBG("Dropping event because nesting is too deep.");
380 uatomic_inc(&buf->events_lost);
381 return -EPERM;
382 }
383
384 if (unlikely(ltt_relay_try_reserve(chan, buf,
385 data_size, tsc, rflags,
386 largest_align, &o_begin, &o_end, &o_old,
387 &before_hdr_pad, slot_size)))
388 goto slow_path;
389
390 if (unlikely(uatomic_cmpxchg(&buf->offset, o_old, o_end) != o_old))
391 goto slow_path;
392
393 /*
394 * Atomically update last_tsc. This update races against concurrent
395 * atomic updates, but the race will always cause supplementary full TSC
396 * events, never the opposite (missing a full TSC event when it would be
397 * needed).
398 */
399 save_last_tsc(buf, *tsc);
400
401 /*
402 * Push the reader if necessary
403 */
404 ltt_reserve_push_reader(chan, buf, o_end - 1);
405
406 /*
407 * Clear noref flag for this subbuffer.
408 */
409 //ust// ltt_clear_noref_flag(chan, buf, SUBBUF_INDEX(o_end - 1, chan));
410
411 *buf_offset = o_begin + before_hdr_pad;
412 return 0;
413 slow_path:
414 return ltt_reserve_slot_lockless_slow(chan, trace, data_size,
415 largest_align, cpu, ret_buf,
416 slot_size, buf_offset, tsc,
417 rflags);
418 }
419
420 /*
421 * Force a sub-buffer switch for a per-cpu buffer. This operation is
422 * completely reentrant : can be called while tracing is active with
423 * absolutely no lock held.
424 */
425 static __inline__ void ltt_force_switch(struct ust_buffer *buf,
426 enum force_switch_mode mode)
427 {
428 return ltt_force_switch_lockless_slow(buf, mode);
429 }
430
431 /*
432 * for flight recording. must be called after relay_commit.
433 * This function increments the subbuffers's commit_seq counter each time the
434 * commit count reaches back the reserve offset (module subbuffer size). It is
435 * useful for crash dump.
436 */
437 //ust// #ifdef CONFIG_LTT_VMCORE
438 static __inline__ void ltt_write_commit_counter(struct ust_channel *chan,
439 struct ust_buffer *buf, long idx, long buf_offset,
440 long commit_count, size_t data_size)
441 {
442 long offset;
443 long commit_seq_old;
444
445 offset = buf_offset + data_size;
446
447 /*
448 * SUBBUF_OFFSET includes commit_count_mask. We can simply
449 * compare the offsets within the subbuffer without caring about
450 * buffer full/empty mismatch because offset is never zero here
451 * (subbuffer header and event headers have non-zero length).
452 */
453 if (unlikely(SUBBUF_OFFSET(offset - commit_count, buf->chan)))
454 return;
455
456 commit_seq_old = uatomic_read(&buf->commit_seq[idx]);
457 while (commit_seq_old < commit_count)
458 commit_seq_old = uatomic_cmpxchg(&buf->commit_seq[idx],
459 commit_seq_old, commit_count);
460
461 DBG("commit_seq for channel %s_%d, subbuf %ld is now %ld", buf->chan->channel_name, buf->cpu, idx, commit_count);
462 }
463 //ust// #else
464 //ust// static __inline__ void ltt_write_commit_counter(struct ust_buffer *buf,
465 //ust// long idx, long buf_offset, long commit_count, size_t data_size)
466 //ust// {
467 //ust// }
468 //ust// #endif
469
470 /*
471 * Atomic unordered slot commit. Increments the commit count in the
472 * specified sub-buffer, and delivers it if necessary.
473 *
474 * Parameters:
475 *
476 * @ltt_channel : channel structure
477 * @transport_data: transport-specific data
478 * @buf_offset : offset following the event header.
479 * @data_size : size of the event data.
480 * @slot_size : size of the reserved slot.
481 */
482 static __inline__ void ltt_commit_slot(
483 struct ust_channel *chan,
484 struct ust_buffer *buf, long buf_offset,
485 size_t data_size, size_t slot_size)
486 {
487 long offset_end = buf_offset;
488 long endidx = SUBBUF_INDEX(offset_end - 1, chan);
489 long commit_count;
490
491 smp_wmb();
492
493 uatomic_add(&buf->commit_count[endidx].cc, slot_size);
494 /*
495 * commit count read can race with concurrent OOO commit count updates.
496 * This is only needed for ltt_check_deliver (for non-polling delivery
497 * only) and for ltt_write_commit_counter. The race can only cause the
498 * counter to be read with the same value more than once, which could
499 * cause :
500 * - Multiple delivery for the same sub-buffer (which is handled
501 * gracefully by the reader code) if the value is for a full
502 * sub-buffer. It's important that we can never miss a sub-buffer
503 * delivery. Re-reading the value after the uatomic_add ensures this.
504 * - Reading a commit_count with a higher value that what was actually
505 * added to it for the ltt_write_commit_counter call (again caused by
506 * a concurrent committer). It does not matter, because this function
507 * is interested in the fact that the commit count reaches back the
508 * reserve offset for a specific sub-buffer, which is completely
509 * independent of the order.
510 */
511 commit_count = uatomic_read(&buf->commit_count[endidx].cc);
512
513 ltt_check_deliver(chan, buf, offset_end - 1, commit_count, endidx);
514 /*
515 * Update data_size for each commit. It's needed only for extracting
516 * ltt buffers from vmcore, after crash.
517 */
518 ltt_write_commit_counter(chan, buf, endidx, buf_offset, commit_count, data_size);
519 }
520
521 void _ust_buffers_strncpy_fixup(struct ust_buffer *buf, size_t offset,
522 size_t len, size_t copied, int terminated);
523
524 static __inline__ int ust_buffers_write(struct ust_buffer *buf, size_t offset,
525 const void *src, size_t len)
526 {
527 size_t buf_offset = BUFFER_OFFSET(offset, buf->chan);
528
529 assert(buf_offset < buf->chan->subbuf_size*buf->chan->subbuf_cnt);
530 assert(buf_offset + len
531 <= buf->chan->subbuf_size*buf->chan->subbuf_cnt);
532
533 ust_buffers_do_copy(buf->buf_data + buf_offset, src, len);
534
535 return len;
536 }
537
538 /*
539 * ust_buffers_do_memset - write character into dest.
540 * @dest: destination
541 * @src: source character
542 * @len: length to write
543 */
544 static __inline__
545 void ust_buffers_do_memset(void *dest, char src, size_t len)
546 {
547 /*
548 * What we really want here is an __inline__ memset, but we
549 * don't have constants, so gcc generally uses a function call.
550 */
551 for (; len > 0; len--)
552 *(u8 *)dest++ = src;
553 }
554
555 /*
556 * ust_buffers_do_strncpy - copy a string up to a certain number of bytes
557 * @dest: destination
558 * @src: source
559 * @len: max. length to copy
560 * @terminated: output string ends with \0 (output)
561 *
562 * returns the number of bytes copied. Does not finalize with \0 if len is
563 * reached.
564 */
565 static __inline__
566 size_t ust_buffers_do_strncpy(void *dest, const void *src, size_t len,
567 int *terminated)
568 {
569 size_t orig_len = len;
570
571 *terminated = 0;
572 /*
573 * What we really want here is an __inline__ strncpy, but we
574 * don't have constants, so gcc generally uses a function call.
575 */
576 for (; len > 0; len--) {
577 *(u8 *)dest = LOAD_SHARED(*(const u8 *)src);
578 /* Check with dest, because src may be modified concurrently */
579 if (*(const u8 *)dest == '\0') {
580 len--;
581 *terminated = 1;
582 break;
583 }
584 dest++;
585 src++;
586 }
587 return orig_len - len;
588 }
589
590 static __inline__
591 int ust_buffers_strncpy(struct ust_buffer *buf, size_t offset, const void *src,
592 size_t len)
593 {
594 size_t buf_offset = BUFFER_OFFSET(offset, buf->chan);
595 ssize_t copied;
596 int terminated;
597
598 assert(buf_offset < buf->chan->subbuf_size*buf->chan->subbuf_cnt);
599 assert(buf_offset + len
600 <= buf->chan->subbuf_size*buf->chan->subbuf_cnt);
601
602 copied = ust_buffers_do_strncpy(buf->buf_data + buf_offset,
603 src, len, &terminated);
604 if (unlikely(copied < len || !terminated))
605 _ust_buffers_strncpy_fixup(buf, offset, len, copied,
606 terminated);
607 return len;
608 }
609
610 extern int ust_buffers_get_subbuf(struct ust_buffer *buf, long *consumed);
611 extern int ust_buffers_put_subbuf(struct ust_buffer *buf, unsigned long uconsumed_old);
612
613 extern void init_ustrelay_transport(void);
614
615 #endif /* _UST_BUFFERS_H */
This page took 0.052583 seconds and 4 git commands to generate.