Make only libust and libustconsumer use a signal safe usterr.h
[lttng-ust.git] / libust / buffers.h
1 /*
2 * buffers.h
3 * LTTng userspace tracer buffering system
4 *
5 * Copyright (C) 2009 - Pierre-Marc Fournier (pierre-marc dot fournier at polymtl dot ca)
6 * Copyright (C) 2008 - Mathieu Desnoyers (mathieu.desnoyers@polymtl.ca)
7 *
8 * This library is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License as published by the Free Software Foundation; either
11 * version 2.1 of the License, or (at your option) any later version.
12 *
13 * This library is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Lesser General Public License for more details.
17 *
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with this library; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21 */
22
23 #ifndef _UST_BUFFERS_H
24 #define _UST_BUFFERS_H
25
26 #include <assert.h>
27
28 #include <ust/core.h>
29 #include <ust/clock.h>
30
31 #include "usterr_signal_safe.h"
32 #include "channels.h"
33 #include "tracerconst.h"
34 #include "tracercore.h"
35 #include "header-inline.h"
36
37 /***** FIXME: SHOULD BE REMOVED ***** */
38
39 /*
40 * BUFFER_TRUNC zeroes the subbuffer offset and the subbuffer number parts of
41 * the offset, which leaves only the buffer number.
42 */
43 #define BUFFER_TRUNC(offset, chan) \
44 ((offset) & (~((chan)->alloc_size-1)))
45 #define BUFFER_OFFSET(offset, chan) ((offset) & ((chan)->alloc_size - 1))
46 #define SUBBUF_OFFSET(offset, chan) ((offset) & ((chan)->subbuf_size - 1))
47 #define SUBBUF_ALIGN(offset, chan) \
48 (((offset) + (chan)->subbuf_size) & (~((chan)->subbuf_size - 1)))
49 #define SUBBUF_TRUNC(offset, chan) \
50 ((offset) & (~((chan)->subbuf_size - 1)))
51 #define SUBBUF_INDEX(offset, chan) \
52 (BUFFER_OFFSET((offset), chan) >> (chan)->subbuf_size_order)
53
54 /*
55 * Tracks changes to rchan/rchan_buf structs
56 */
57 #define UST_CHANNEL_VERSION 8
58
59 /**************************************/
60
61 struct commit_counters {
62 long cc; /* ATOMIC */
63 long cc_sb; /* ATOMIC - Incremented _once_ at sb switch */
64 };
65
66 struct ust_buffer {
67 /* First 32 bytes cache-hot cacheline */
68 long offset; /* Current offset in the buffer *atomic* */
69 struct commit_counters *commit_count; /* Commit count per sub-buffer */
70 long consumed; /* Current offset in the buffer *atomic* access (shared) */
71 unsigned long last_tsc; /*
72 * Last timestamp written in the buffer.
73 */
74 /* End of first 32 bytes cacheline */
75 long active_readers; /* ATOMIC - Active readers count standard atomic access (shared) */
76 long events_lost; /* ATOMIC */
77 long corrupted_subbuffers; /* *ATOMIC* */
78 /* one byte is written to this pipe when data is available, in order
79 to wake the consumer */
80 /* portability: Single byte writes must be as quick as possible. The kernel-side
81 buffer must be large enough so the writer doesn't block. From the pipe(7)
82 man page: Since linux 2.6.11, the pipe capacity is 65536 bytes. */
83 int data_ready_fd_write;
84 /* the reading end of the pipe */
85 int data_ready_fd_read;
86 /*
87 * List of buffers with an open pipe, used for fork and forced subbuffer
88 * switch.
89 */
90 struct cds_list_head open_buffers_list;
91
92 unsigned int finalized;
93 //ust// struct timer_list switch_timer; /* timer for periodical switch */
94 unsigned long switch_timer_interval; /* 0 = unset */
95
96 struct ust_channel *chan;
97
98 struct urcu_ref urcu_ref;
99 void *buf_data;
100 size_t buf_size;
101 int shmid;
102 unsigned int cpu;
103
104 /* commit count per subbuffer; must be at end of struct */
105 long commit_seq[0]; /* ATOMIC */
106 } ____cacheline_aligned;
107
108 /*
109 * A switch is done during tracing or as a final flush after tracing (so it
110 * won't write in the new sub-buffer).
111 * FIXME: make this message clearer
112 */
113 enum force_switch_mode { FORCE_ACTIVE, FORCE_FLUSH };
114
115 extern int ltt_reserve_slot_lockless_slow(struct ust_channel *chan,
116 struct ust_trace *trace, size_t data_size,
117 int largest_align, int cpu,
118 struct ust_buffer **ret_buf,
119 size_t *slot_size, long *buf_offset,
120 u64 *tsc, unsigned int *rflags);
121
122 extern void ltt_force_switch_lockless_slow(struct ust_buffer *buf,
123 enum force_switch_mode mode);
124
125
126 static __inline__ void ust_buffers_do_copy(void *dest, const void *src, size_t len)
127 {
128 union {
129 const void *src;
130 const u8 *src8;
131 const u16 *src16;
132 const u32 *src32;
133 const u64 *src64;
134 } u = { .src = src };
135
136 switch (len) {
137 case 0: break;
138 case 1: *(u8 *)dest = *u.src8;
139 break;
140 case 2: *(u16 *)dest = *u.src16;
141 break;
142 case 4: *(u32 *)dest = *u.src32;
143 break;
144 case 8: *(u64 *)dest = *u.src64;
145 break;
146 default:
147 memcpy(dest, src, len);
148 }
149 }
150
151 static __inline__ void *ust_buffers_offset_address(struct ust_buffer *buf, size_t offset)
152 {
153 return ((char *)buf->buf_data)+offset;
154 }
155
156 /*
157 * Last TSC comparison functions. Check if the current TSC overflows
158 * LTT_TSC_BITS bits from the last TSC read. Reads and writes last_tsc
159 * atomically.
160 */
161
162 /* FIXME: does this test work properly? */
163 #if (BITS_PER_LONG == 32)
164 static __inline__ void save_last_tsc(struct ust_buffer *ltt_buf,
165 u64 tsc)
166 {
167 ltt_buf->last_tsc = (unsigned long)(tsc >> LTT_TSC_BITS);
168 }
169
170 static __inline__ int last_tsc_overflow(struct ust_buffer *ltt_buf,
171 u64 tsc)
172 {
173 unsigned long tsc_shifted = (unsigned long)(tsc >> LTT_TSC_BITS);
174
175 if (unlikely((tsc_shifted - ltt_buf->last_tsc)))
176 return 1;
177 else
178 return 0;
179 }
180 #else
181 static __inline__ void save_last_tsc(struct ust_buffer *ltt_buf,
182 u64 tsc)
183 {
184 ltt_buf->last_tsc = (unsigned long)tsc;
185 }
186
187 static __inline__ int last_tsc_overflow(struct ust_buffer *ltt_buf,
188 u64 tsc)
189 {
190 if (unlikely((tsc - ltt_buf->last_tsc) >> LTT_TSC_BITS))
191 return 1;
192 else
193 return 0;
194 }
195 #endif
196
197 static __inline__ void ltt_reserve_push_reader(
198 struct ust_channel *rchan,
199 struct ust_buffer *buf,
200 long offset)
201 {
202 long consumed_old, consumed_new;
203
204 do {
205 consumed_old = uatomic_read(&buf->consumed);
206 /*
207 * If buffer is in overwrite mode, push the reader consumed
208 * count if the write position has reached it and we are not
209 * at the first iteration (don't push the reader farther than
210 * the writer). This operation can be done concurrently by many
211 * writers in the same buffer, the writer being at the farthest
212 * write position sub-buffer index in the buffer being the one
213 * which will win this loop.
214 * If the buffer is not in overwrite mode, pushing the reader
215 * only happens if a sub-buffer is corrupted.
216 */
217 if (unlikely((SUBBUF_TRUNC(offset, buf->chan)
218 - SUBBUF_TRUNC(consumed_old, buf->chan))
219 >= rchan->alloc_size))
220 consumed_new = SUBBUF_ALIGN(consumed_old, buf->chan);
221 else
222 return;
223 } while (unlikely(uatomic_cmpxchg(&buf->consumed, consumed_old,
224 consumed_new) != consumed_old));
225 }
226
227 static __inline__ void ltt_vmcore_check_deliver(
228 struct ust_buffer *buf,
229 long commit_count, long idx)
230 {
231 uatomic_set(&buf->commit_seq[idx], commit_count);
232 }
233
234 static __inline__ void ltt_check_deliver(struct ust_channel *chan,
235 struct ust_buffer *buf,
236 long offset, long commit_count, long idx)
237 {
238 long old_commit_count = commit_count - chan->subbuf_size;
239
240 /* Check if all commits have been done */
241 if (unlikely((BUFFER_TRUNC(offset, chan)
242 >> chan->n_subbufs_order)
243 - (old_commit_count
244 & chan->commit_count_mask) == 0)) {
245 /*
246 * If we succeeded in updating the cc_sb, we are delivering
247 * the subbuffer. Deals with concurrent updates of the "cc"
248 * value without adding a add_return atomic operation to the
249 * fast path.
250 */
251 if (likely(uatomic_cmpxchg(&buf->commit_count[idx].cc_sb,
252 old_commit_count, commit_count)
253 == old_commit_count)) {
254 int result;
255
256 /*
257 * Set noref flag for this subbuffer.
258 */
259 //ust// ltt_set_noref_flag(rchan, buf, idx);
260 ltt_vmcore_check_deliver(buf, commit_count, idx);
261
262 /* wakeup consumer */
263 result = write(buf->data_ready_fd_write, "1", 1);
264 if(result == -1) {
265 PERROR("write (in ltt_relay_buffer_flush)");
266 ERR("this should never happen!");
267 }
268 }
269 }
270 }
271
272 static __inline__ int ltt_poll_deliver(struct ust_channel *chan, struct ust_buffer *buf)
273 {
274 long consumed_old, consumed_idx, commit_count, write_offset;
275
276 consumed_old = uatomic_read(&buf->consumed);
277 consumed_idx = SUBBUF_INDEX(consumed_old, buf->chan);
278 commit_count = uatomic_read(&buf->commit_count[consumed_idx].cc_sb);
279 /*
280 * No memory cmm_barrier here, since we are only interested
281 * in a statistically correct polling result. The next poll will
282 * get the data is we are racing. The mb() that ensures correct
283 * memory order is in get_subbuf.
284 */
285 write_offset = uatomic_read(&buf->offset);
286
287 /*
288 * Check that the subbuffer we are trying to consume has been
289 * already fully committed.
290 */
291
292 if (((commit_count - chan->subbuf_size)
293 & chan->commit_count_mask)
294 - (BUFFER_TRUNC(consumed_old, buf->chan)
295 >> chan->n_subbufs_order)
296 != 0)
297 return 0;
298
299 /*
300 * Check that we are not about to read the same subbuffer in
301 * which the writer head is.
302 */
303 if ((SUBBUF_TRUNC(write_offset, buf->chan)
304 - SUBBUF_TRUNC(consumed_old, buf->chan))
305 == 0)
306 return 0;
307
308 return 1;
309
310 }
311
312 /*
313 * returns 0 if reserve ok, or 1 if the slow path must be taken.
314 */
315 static __inline__ int ltt_relay_try_reserve(
316 struct ust_channel *chan,
317 struct ust_buffer *buf,
318 size_t data_size,
319 u64 *tsc, unsigned int *rflags, int largest_align,
320 long *o_begin, long *o_end, long *o_old,
321 size_t *before_hdr_pad, size_t *size)
322 {
323 *o_begin = uatomic_read(&buf->offset);
324 *o_old = *o_begin;
325
326 *tsc = trace_clock_read64();
327
328 //ust// #ifdef CONFIG_LTT_VMCORE
329 //ust// prefetch(&buf->commit_count[SUBBUF_INDEX(*o_begin, rchan)]);
330 //ust// prefetch(&buf->commit_seq[SUBBUF_INDEX(*o_begin, rchan)]);
331 //ust// #else
332 //ust// prefetchw(&buf->commit_count[SUBBUF_INDEX(*o_begin, rchan)]);
333 //ust// #endif
334 if (last_tsc_overflow(buf, *tsc))
335 *rflags = LTT_RFLAG_ID_SIZE_TSC;
336
337 if (unlikely(SUBBUF_OFFSET(*o_begin, buf->chan) == 0))
338 return 1;
339
340 *size = ust_get_header_size(chan,
341 *o_begin, data_size,
342 before_hdr_pad, *rflags);
343 *size += ltt_align(*o_begin + *size, largest_align) + data_size;
344 if (unlikely((SUBBUF_OFFSET(*o_begin, buf->chan) + *size)
345 > buf->chan->subbuf_size))
346 return 1;
347
348 /*
349 * Event fits in the current buffer and we are not on a switch
350 * boundary. It's safe to write.
351 */
352 *o_end = *o_begin + *size;
353
354 if (unlikely((SUBBUF_OFFSET(*o_end, buf->chan)) == 0))
355 /*
356 * The offset_end will fall at the very beginning of the next
357 * subbuffer.
358 */
359 return 1;
360
361 return 0;
362 }
363
364 static __inline__ int ltt_reserve_slot(struct ust_channel *chan,
365 struct ust_trace *trace, size_t data_size,
366 int largest_align, int cpu,
367 struct ust_buffer **ret_buf,
368 size_t *slot_size, long *buf_offset, u64 *tsc,
369 unsigned int *rflags)
370 {
371 struct ust_buffer *buf = *ret_buf = chan->buf[cpu];
372 long o_begin, o_end, o_old;
373 size_t before_hdr_pad;
374
375 /*
376 * Perform retryable operations.
377 */
378 /* FIXME: make this really per cpu? */
379 if (unlikely(CMM_LOAD_SHARED(ltt_nesting) > 4)) {
380 DBG("Dropping event because nesting is too deep.");
381 uatomic_inc(&buf->events_lost);
382 return -EPERM;
383 }
384
385 if (unlikely(ltt_relay_try_reserve(chan, buf,
386 data_size, tsc, rflags,
387 largest_align, &o_begin, &o_end, &o_old,
388 &before_hdr_pad, slot_size)))
389 goto slow_path;
390
391 if (unlikely(uatomic_cmpxchg(&buf->offset, o_old, o_end) != o_old))
392 goto slow_path;
393
394 /*
395 * Atomically update last_tsc. This update races against concurrent
396 * atomic updates, but the race will always cause supplementary full TSC
397 * events, never the opposite (missing a full TSC event when it would be
398 * needed).
399 */
400 save_last_tsc(buf, *tsc);
401
402 /*
403 * Push the reader if necessary
404 */
405 ltt_reserve_push_reader(chan, buf, o_end - 1);
406
407 /*
408 * Clear noref flag for this subbuffer.
409 */
410 //ust// ltt_clear_noref_flag(chan, buf, SUBBUF_INDEX(o_end - 1, chan));
411
412 *buf_offset = o_begin + before_hdr_pad;
413 return 0;
414 slow_path:
415 return ltt_reserve_slot_lockless_slow(chan, trace, data_size,
416 largest_align, cpu, ret_buf,
417 slot_size, buf_offset, tsc,
418 rflags);
419 }
420
421 /*
422 * Force a sub-buffer switch for a per-cpu buffer. This operation is
423 * completely reentrant : can be called while tracing is active with
424 * absolutely no lock held.
425 */
426 static __inline__ void ltt_force_switch(struct ust_buffer *buf,
427 enum force_switch_mode mode)
428 {
429 return ltt_force_switch_lockless_slow(buf, mode);
430 }
431
432 /*
433 * for flight recording. must be called after relay_commit.
434 * This function increments the subbuffers's commit_seq counter each time the
435 * commit count reaches back the reserve offset (module subbuffer size). It is
436 * useful for crash dump.
437 */
438 //ust// #ifdef CONFIG_LTT_VMCORE
439 static __inline__ void ltt_write_commit_counter(struct ust_channel *chan,
440 struct ust_buffer *buf, long idx, long buf_offset,
441 long commit_count, size_t data_size)
442 {
443 long offset;
444 long commit_seq_old;
445
446 offset = buf_offset + data_size;
447
448 /*
449 * SUBBUF_OFFSET includes commit_count_mask. We can simply
450 * compare the offsets within the subbuffer without caring about
451 * buffer full/empty mismatch because offset is never zero here
452 * (subbuffer header and event headers have non-zero length).
453 */
454 if (unlikely(SUBBUF_OFFSET(offset - commit_count, buf->chan)))
455 return;
456
457 commit_seq_old = uatomic_read(&buf->commit_seq[idx]);
458 while (commit_seq_old < commit_count)
459 commit_seq_old = uatomic_cmpxchg(&buf->commit_seq[idx],
460 commit_seq_old, commit_count);
461
462 DBG("commit_seq for channel %s_%d, subbuf %ld is now %ld", buf->chan->channel_name, buf->cpu, idx, commit_count);
463 }
464 //ust// #else
465 //ust// static __inline__ void ltt_write_commit_counter(struct ust_buffer *buf,
466 //ust// long idx, long buf_offset, long commit_count, size_t data_size)
467 //ust// {
468 //ust// }
469 //ust// #endif
470
471 /*
472 * Atomic unordered slot commit. Increments the commit count in the
473 * specified sub-buffer, and delivers it if necessary.
474 *
475 * Parameters:
476 *
477 * @ltt_channel : channel structure
478 * @transport_data: transport-specific data
479 * @buf_offset : offset following the event header.
480 * @data_size : size of the event data.
481 * @slot_size : size of the reserved slot.
482 */
483 static __inline__ void ltt_commit_slot(
484 struct ust_channel *chan,
485 struct ust_buffer *buf, long buf_offset,
486 size_t data_size, size_t slot_size)
487 {
488 long offset_end = buf_offset;
489 long endidx = SUBBUF_INDEX(offset_end - 1, chan);
490 long commit_count;
491
492 cmm_smp_wmb();
493
494 uatomic_add(&buf->commit_count[endidx].cc, slot_size);
495 /*
496 * commit count read can race with concurrent OOO commit count updates.
497 * This is only needed for ltt_check_deliver (for non-polling delivery
498 * only) and for ltt_write_commit_counter. The race can only cause the
499 * counter to be read with the same value more than once, which could
500 * cause :
501 * - Multiple delivery for the same sub-buffer (which is handled
502 * gracefully by the reader code) if the value is for a full
503 * sub-buffer. It's important that we can never miss a sub-buffer
504 * delivery. Re-reading the value after the uatomic_add ensures this.
505 * - Reading a commit_count with a higher value that what was actually
506 * added to it for the ltt_write_commit_counter call (again caused by
507 * a concurrent committer). It does not matter, because this function
508 * is interested in the fact that the commit count reaches back the
509 * reserve offset for a specific sub-buffer, which is completely
510 * independent of the order.
511 */
512 commit_count = uatomic_read(&buf->commit_count[endidx].cc);
513
514 ltt_check_deliver(chan, buf, offset_end - 1, commit_count, endidx);
515 /*
516 * Update data_size for each commit. It's needed only for extracting
517 * ltt buffers from vmcore, after crash.
518 */
519 ltt_write_commit_counter(chan, buf, endidx, buf_offset, commit_count, data_size);
520 }
521
522 void _ust_buffers_strncpy_fixup(struct ust_buffer *buf, size_t offset,
523 size_t len, size_t copied, int terminated);
524
525 static __inline__ int ust_buffers_write(struct ust_buffer *buf, size_t offset,
526 const void *src, size_t len)
527 {
528 size_t buf_offset = BUFFER_OFFSET(offset, buf->chan);
529
530 assert(buf_offset < buf->chan->subbuf_size*buf->chan->subbuf_cnt);
531 assert(buf_offset + len
532 <= buf->chan->subbuf_size*buf->chan->subbuf_cnt);
533
534 ust_buffers_do_copy(buf->buf_data + buf_offset, src, len);
535
536 return len;
537 }
538
539 /*
540 * ust_buffers_do_memset - write character into dest.
541 * @dest: destination
542 * @src: source character
543 * @len: length to write
544 */
545 static __inline__
546 void ust_buffers_do_memset(void *dest, char src, size_t len)
547 {
548 /*
549 * What we really want here is an __inline__ memset, but we
550 * don't have constants, so gcc generally uses a function call.
551 */
552 for (; len > 0; len--)
553 *(u8 *)dest++ = src;
554 }
555
556 /*
557 * ust_buffers_do_strncpy - copy a string up to a certain number of bytes
558 * @dest: destination
559 * @src: source
560 * @len: max. length to copy
561 * @terminated: output string ends with \0 (output)
562 *
563 * returns the number of bytes copied. Does not finalize with \0 if len is
564 * reached.
565 */
566 static __inline__
567 size_t ust_buffers_do_strncpy(void *dest, const void *src, size_t len,
568 int *terminated)
569 {
570 size_t orig_len = len;
571
572 *terminated = 0;
573 /*
574 * What we really want here is an __inline__ strncpy, but we
575 * don't have constants, so gcc generally uses a function call.
576 */
577 for (; len > 0; len--) {
578 *(u8 *)dest = CMM_LOAD_SHARED(*(const u8 *)src);
579 /* Check with dest, because src may be modified concurrently */
580 if (*(const u8 *)dest == '\0') {
581 len--;
582 *terminated = 1;
583 break;
584 }
585 dest++;
586 src++;
587 }
588 return orig_len - len;
589 }
590
591 static __inline__
592 int ust_buffers_strncpy(struct ust_buffer *buf, size_t offset, const void *src,
593 size_t len)
594 {
595 size_t buf_offset = BUFFER_OFFSET(offset, buf->chan);
596 ssize_t copied;
597 int terminated;
598
599 assert(buf_offset < buf->chan->subbuf_size*buf->chan->subbuf_cnt);
600 assert(buf_offset + len
601 <= buf->chan->subbuf_size*buf->chan->subbuf_cnt);
602
603 copied = ust_buffers_do_strncpy(buf->buf_data + buf_offset,
604 src, len, &terminated);
605 if (unlikely(copied < len || !terminated))
606 _ust_buffers_strncpy_fixup(buf, offset, len, copied,
607 terminated);
608 return len;
609 }
610
611 extern int ust_buffers_get_subbuf(struct ust_buffer *buf, long *consumed);
612 extern int ust_buffers_put_subbuf(struct ust_buffer *buf, unsigned long uconsumed_old);
613
614 extern void init_ustrelay_transport(void);
615
616 #endif /* _UST_BUFFERS_H */
This page took 0.040988 seconds and 4 git commands to generate.