UST synchronization fix
[ust.git] / libust / buffers.h
CommitLineData
b5b073e2
PMF
1/*
2 * buffers.h
a09dac63 3 * LTTng userspace tracer buffering system
b5b073e2
PMF
4 *
5 * Copyright (C) 2009 - Pierre-Marc Fournier (pierre-marc dot fournier at polymtl dot ca)
6 * Copyright (C) 2008 - Mathieu Desnoyers (mathieu.desnoyers@polymtl.ca)
7 *
a09dac63
PMF
8 * This library is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License as published by the Free Software Foundation; either
11 * version 2.1 of the License, or (at your option) any later version.
12 *
13 * This library is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Lesser General Public License for more details.
17 *
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with this library; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
b5b073e2
PMF
21 */
22
23#ifndef _UST_BUFFERS_H
24#define _UST_BUFFERS_H
25
b5b073e2 26#include <assert.h>
518d7abb
PMF
27
28#include <ust/core.h>
29
17bb07b4 30#include "usterr.h"
b5b073e2 31#include "channels.h"
b73a4c47
PMF
32#include "tracerconst.h"
33#include "tracercore.h"
34#include "header-inline.h"
b5b073e2 35
dc284811 36/***** FIXME: SHOULD BE REMOVED ***** */
b5b073e2
PMF
37
38/*
39 * BUFFER_TRUNC zeroes the subbuffer offset and the subbuffer number parts of
40 * the offset, which leaves only the buffer number.
41 */
42#define BUFFER_TRUNC(offset, chan) \
43 ((offset) & (~((chan)->alloc_size-1)))
44#define BUFFER_OFFSET(offset, chan) ((offset) & ((chan)->alloc_size - 1))
45#define SUBBUF_OFFSET(offset, chan) ((offset) & ((chan)->subbuf_size - 1))
46#define SUBBUF_ALIGN(offset, chan) \
47 (((offset) + (chan)->subbuf_size) & (~((chan)->subbuf_size - 1)))
48#define SUBBUF_TRUNC(offset, chan) \
49 ((offset) & (~((chan)->subbuf_size - 1)))
50#define SUBBUF_INDEX(offset, chan) \
51 (BUFFER_OFFSET((offset), chan) >> (chan)->subbuf_size_order)
52
53/*
54 * Tracks changes to rchan/rchan_buf structs
55 */
56#define UST_CHANNEL_VERSION 8
57
b73a4c47
PMF
58/**************************************/
59
60struct commit_counters {
b102c2b0
PMF
61 long cc; /* ATOMIC */
62 long cc_sb; /* ATOMIC - Incremented _once_ at sb switch */
b73a4c47
PMF
63};
64
b5b073e2
PMF
65struct ust_buffer {
66 /* First 32 bytes cache-hot cacheline */
b102c2b0 67 long offset; /* Current offset in the buffer *atomic* */
b73a4c47 68 struct commit_counters *commit_count; /* Commit count per sub-buffer */
b102c2b0 69 long consumed; /* Current offset in the buffer *atomic* access (shared) */
b5b073e2
PMF
70 unsigned long last_tsc; /*
71 * Last timestamp written in the buffer.
72 */
73 /* End of first 32 bytes cacheline */
b102c2b0
PMF
74 long active_readers; /* ATOMIC - Active readers count standard atomic access (shared) */
75 long events_lost; /* ATOMIC */
76 long corrupted_subbuffers; /* *ATOMIC* */
b5b073e2
PMF
77 /* one byte is written to this pipe when data is available, in order
78 to wake the consumer */
79 /* portability: Single byte writes must be as quick as possible. The kernel-side
80 buffer must be large enough so the writer doesn't block. From the pipe(7)
81 man page: Since linux 2.6.11, the pipe capacity is 65536 bytes. */
82 int data_ready_fd_write;
83 /* the reading end of the pipe */
84 int data_ready_fd_read;
85
b73a4c47
PMF
86 unsigned int finalized;
87//ust// struct timer_list switch_timer; /* timer for periodical switch */
88 unsigned long switch_timer_interval; /* 0 = unset */
89
b5b073e2 90 struct ust_channel *chan;
b73a4c47 91
b5b073e2
PMF
92 struct kref kref;
93 void *buf_data;
94 size_t buf_size;
95 int shmid;
204141ee 96 unsigned int cpu;
b5b073e2
PMF
97
98 /* commit count per subbuffer; must be at end of struct */
b102c2b0 99 long commit_seq[0] ____cacheline_aligned; /* ATOMIC */
b5b073e2
PMF
100} ____cacheline_aligned;
101
b5b073e2 102/*
b73a4c47
PMF
103 * A switch is done during tracing or as a final flush after tracing (so it
104 * won't write in the new sub-buffer).
105 * FIXME: make this message clearer
b5b073e2 106 */
b73a4c47
PMF
107enum force_switch_mode { FORCE_ACTIVE, FORCE_FLUSH };
108
12e81b07
PMF
109extern int ltt_reserve_slot_lockless_slow(struct ust_channel *chan,
110 struct ust_trace *trace, size_t data_size,
111 int largest_align, int cpu,
112 struct ust_buffer **ret_buf,
113 size_t *slot_size, long *buf_offset,
114 u64 *tsc, unsigned int *rflags);
b73a4c47
PMF
115
116extern void ltt_force_switch_lockless_slow(struct ust_buffer *buf,
117 enum force_switch_mode mode);
118
b5b073e2 119
a2fd50ef 120static __inline__ void ust_buffers_do_copy(void *dest, const void *src, size_t len)
b5b073e2
PMF
121{
122 union {
123 const void *src;
124 const u8 *src8;
125 const u16 *src16;
126 const u32 *src32;
127 const u64 *src64;
128 } u = { .src = src };
129
130 switch (len) {
b73a4c47
PMF
131 case 0: break;
132 case 1: *(u8 *)dest = *u.src8;
b5b073e2 133 break;
b73a4c47 134 case 2: *(u16 *)dest = *u.src16;
b5b073e2 135 break;
b73a4c47 136 case 4: *(u32 *)dest = *u.src32;
b5b073e2 137 break;
b73a4c47 138 case 8: *(u64 *)dest = *u.src64;
b5b073e2
PMF
139 break;
140 default:
141 memcpy(dest, src, len);
142 }
143}
144
b73a4c47
PMF
145static __inline__ void *ust_buffers_offset_address(struct ust_buffer *buf, size_t offset)
146{
147 return ((char *)buf->buf_data)+offset;
148}
149
150/*
151 * Last TSC comparison functions. Check if the current TSC overflows
152 * LTT_TSC_BITS bits from the last TSC read. Reads and writes last_tsc
153 * atomically.
154 */
155
156/* FIXME: does this test work properly? */
157#if (BITS_PER_LONG == 32)
158static __inline__ void save_last_tsc(struct ust_buffer *ltt_buf,
159 u64 tsc)
160{
161 ltt_buf->last_tsc = (unsigned long)(tsc >> LTT_TSC_BITS);
162}
163
164static __inline__ int last_tsc_overflow(struct ust_buffer *ltt_buf,
165 u64 tsc)
166{
167 unsigned long tsc_shifted = (unsigned long)(tsc >> LTT_TSC_BITS);
168
169 if (unlikely((tsc_shifted - ltt_buf->last_tsc)))
170 return 1;
171 else
172 return 0;
173}
174#else
175static __inline__ void save_last_tsc(struct ust_buffer *ltt_buf,
176 u64 tsc)
177{
178 ltt_buf->last_tsc = (unsigned long)tsc;
179}
180
181static __inline__ int last_tsc_overflow(struct ust_buffer *ltt_buf,
182 u64 tsc)
183{
184 if (unlikely((tsc - ltt_buf->last_tsc) >> LTT_TSC_BITS))
185 return 1;
186 else
187 return 0;
188}
189#endif
190
191static __inline__ void ltt_reserve_push_reader(
192 struct ust_channel *rchan,
193 struct ust_buffer *buf,
194 long offset)
195{
196 long consumed_old, consumed_new;
197
198 do {
b102c2b0 199 consumed_old = uatomic_read(&buf->consumed);
b73a4c47
PMF
200 /*
201 * If buffer is in overwrite mode, push the reader consumed
202 * count if the write position has reached it and we are not
203 * at the first iteration (don't push the reader farther than
204 * the writer). This operation can be done concurrently by many
205 * writers in the same buffer, the writer being at the farthest
206 * write position sub-buffer index in the buffer being the one
207 * which will win this loop.
208 * If the buffer is not in overwrite mode, pushing the reader
209 * only happens if a sub-buffer is corrupted.
210 */
211 if (unlikely((SUBBUF_TRUNC(offset, buf->chan)
212 - SUBBUF_TRUNC(consumed_old, buf->chan))
213 >= rchan->alloc_size))
214 consumed_new = SUBBUF_ALIGN(consumed_old, buf->chan);
215 else
216 return;
b102c2b0 217 } while (unlikely(uatomic_cmpxchg(&buf->consumed, consumed_old,
b73a4c47
PMF
218 consumed_new) != consumed_old));
219}
220
221static __inline__ void ltt_vmcore_check_deliver(
222 struct ust_buffer *buf,
223 long commit_count, long idx)
224{
b102c2b0 225 uatomic_set(&buf->commit_seq[idx], commit_count);
b73a4c47
PMF
226}
227
228static __inline__ void ltt_check_deliver(struct ust_channel *chan,
229 struct ust_buffer *buf,
230 long offset, long commit_count, long idx)
231{
232 long old_commit_count = commit_count - chan->subbuf_size;
233
234 /* Check if all commits have been done */
235 if (unlikely((BUFFER_TRUNC(offset, chan)
236 >> chan->n_subbufs_order)
237 - (old_commit_count
238 & chan->commit_count_mask) == 0)) {
239 /*
240 * If we succeeded in updating the cc_sb, we are delivering
241 * the subbuffer. Deals with concurrent updates of the "cc"
242 * value without adding a add_return atomic operation to the
243 * fast path.
244 */
b102c2b0 245 if (likely(uatomic_cmpxchg(&buf->commit_count[idx].cc_sb,
b73a4c47
PMF
246 old_commit_count, commit_count)
247 == old_commit_count)) {
248 int result;
249
250 /*
251 * Set noref flag for this subbuffer.
252 */
253//ust// ltt_set_noref_flag(rchan, buf, idx);
254 ltt_vmcore_check_deliver(buf, commit_count, idx);
255
256 /* wakeup consumer */
257 result = write(buf->data_ready_fd_write, "1", 1);
258 if(result == -1) {
259 PERROR("write (in ltt_relay_buffer_flush)");
260 ERR("this should never happen!");
261 }
262 }
263 }
264}
265
266static __inline__ int ltt_poll_deliver(struct ust_channel *chan, struct ust_buffer *buf)
267{
268 long consumed_old, consumed_idx, commit_count, write_offset;
269
b102c2b0 270 consumed_old = uatomic_read(&buf->consumed);
b73a4c47 271 consumed_idx = SUBBUF_INDEX(consumed_old, buf->chan);
b102c2b0 272 commit_count = uatomic_read(&buf->commit_count[consumed_idx].cc_sb);
b73a4c47
PMF
273 /*
274 * No memory barrier here, since we are only interested
275 * in a statistically correct polling result. The next poll will
276 * get the data is we are racing. The mb() that ensures correct
277 * memory order is in get_subbuf.
278 */
b102c2b0 279 write_offset = uatomic_read(&buf->offset);
b73a4c47
PMF
280
281 /*
282 * Check that the subbuffer we are trying to consume has been
283 * already fully committed.
284 */
285
286 if (((commit_count - chan->subbuf_size)
287 & chan->commit_count_mask)
288 - (BUFFER_TRUNC(consumed_old, buf->chan)
289 >> chan->n_subbufs_order)
290 != 0)
291 return 0;
292
293 /*
294 * Check that we are not about to read the same subbuffer in
295 * which the writer head is.
296 */
297 if ((SUBBUF_TRUNC(write_offset, buf->chan)
298 - SUBBUF_TRUNC(consumed_old, buf->chan))
299 == 0)
300 return 0;
301
302 return 1;
303
304}
305
306/*
307 * returns 0 if reserve ok, or 1 if the slow path must be taken.
308 */
309static __inline__ int ltt_relay_try_reserve(
310 struct ust_channel *chan,
311 struct ust_buffer *buf,
312 size_t data_size,
313 u64 *tsc, unsigned int *rflags, int largest_align,
314 long *o_begin, long *o_end, long *o_old,
315 size_t *before_hdr_pad, size_t *size)
316{
b102c2b0 317 *o_begin = uatomic_read(&buf->offset);
b73a4c47
PMF
318 *o_old = *o_begin;
319
320 *tsc = trace_clock_read64();
321
322//ust// #ifdef CONFIG_LTT_VMCORE
323//ust// prefetch(&buf->commit_count[SUBBUF_INDEX(*o_begin, rchan)]);
324//ust// prefetch(&buf->commit_seq[SUBBUF_INDEX(*o_begin, rchan)]);
325//ust// #else
326//ust// prefetchw(&buf->commit_count[SUBBUF_INDEX(*o_begin, rchan)]);
327//ust// #endif
328 if (last_tsc_overflow(buf, *tsc))
329 *rflags = LTT_RFLAG_ID_SIZE_TSC;
330
331 if (unlikely(SUBBUF_OFFSET(*o_begin, buf->chan) == 0))
332 return 1;
333
334 *size = ust_get_header_size(chan,
335 *o_begin, data_size,
336 before_hdr_pad, *rflags);
337 *size += ltt_align(*o_begin + *size, largest_align) + data_size;
338 if (unlikely((SUBBUF_OFFSET(*o_begin, buf->chan) + *size)
339 > buf->chan->subbuf_size))
340 return 1;
341
342 /*
343 * Event fits in the current buffer and we are not on a switch
344 * boundary. It's safe to write.
345 */
346 *o_end = *o_begin + *size;
347
348 if (unlikely((SUBBUF_OFFSET(*o_end, buf->chan)) == 0))
349 /*
350 * The offset_end will fall at the very beginning of the next
351 * subbuffer.
352 */
353 return 1;
354
355 return 0;
356}
357
12e81b07
PMF
358static __inline__ int ltt_reserve_slot(struct ust_channel *chan,
359 struct ust_trace *trace, size_t data_size,
360 int largest_align, int cpu,
361 struct ust_buffer **ret_buf,
362 size_t *slot_size, long *buf_offset, u64 *tsc,
363 unsigned int *rflags)
b73a4c47 364{
12e81b07 365 struct ust_buffer *buf = *ret_buf = chan->buf[cpu];
b73a4c47
PMF
366 long o_begin, o_end, o_old;
367 size_t before_hdr_pad;
368
369 /*
370 * Perform retryable operations.
371 */
6a843332 372 /* FIXME: make this really per cpu? */
015d08b6 373 if (unlikely(LOAD_SHARED(ltt_nesting) > 4)) {
e5bc3b0f 374 DBG("Dropping event because nesting is too deep.");
b102c2b0 375 uatomic_inc(&buf->events_lost);
b73a4c47
PMF
376 return -EPERM;
377 }
378
379 if (unlikely(ltt_relay_try_reserve(chan, buf,
380 data_size, tsc, rflags,
381 largest_align, &o_begin, &o_end, &o_old,
382 &before_hdr_pad, slot_size)))
383 goto slow_path;
384
b102c2b0 385 if (unlikely(uatomic_cmpxchg(&buf->offset, o_old, o_end) != o_old))
b73a4c47
PMF
386 goto slow_path;
387
388 /*
389 * Atomically update last_tsc. This update races against concurrent
390 * atomic updates, but the race will always cause supplementary full TSC
391 * events, never the opposite (missing a full TSC event when it would be
392 * needed).
393 */
394 save_last_tsc(buf, *tsc);
395
396 /*
397 * Push the reader if necessary
398 */
399 ltt_reserve_push_reader(chan, buf, o_end - 1);
400
401 /*
402 * Clear noref flag for this subbuffer.
403 */
404//ust// ltt_clear_noref_flag(chan, buf, SUBBUF_INDEX(o_end - 1, chan));
405
406 *buf_offset = o_begin + before_hdr_pad;
407 return 0;
408slow_path:
12e81b07
PMF
409 return ltt_reserve_slot_lockless_slow(chan, trace, data_size,
410 largest_align, cpu, ret_buf,
411 slot_size, buf_offset, tsc,
412 rflags);
b73a4c47
PMF
413}
414
415/*
416 * Force a sub-buffer switch for a per-cpu buffer. This operation is
417 * completely reentrant : can be called while tracing is active with
418 * absolutely no lock held.
b73a4c47
PMF
419 */
420static __inline__ void ltt_force_switch(struct ust_buffer *buf,
421 enum force_switch_mode mode)
422{
423 return ltt_force_switch_lockless_slow(buf, mode);
424}
425
426/*
427 * for flight recording. must be called after relay_commit.
8c36d1ee
PMF
428 * This function increments the subbuffers's commit_seq counter each time the
429 * commit count reaches back the reserve offset (module subbuffer size). It is
430 * useful for crash dump.
b73a4c47 431 */
1e8c9e7b
PMF
432//ust// #ifdef CONFIG_LTT_VMCORE
433static __inline__ void ltt_write_commit_counter(struct ust_channel *chan,
434 struct ust_buffer *buf, long idx, long buf_offset,
435 long commit_count, size_t data_size)
b73a4c47
PMF
436{
437 long offset;
438 long commit_seq_old;
439
440 offset = buf_offset + data_size;
441
442 /*
443 * SUBBUF_OFFSET includes commit_count_mask. We can simply
444 * compare the offsets within the subbuffer without caring about
445 * buffer full/empty mismatch because offset is never zero here
446 * (subbuffer header and event headers have non-zero length).
447 */
448 if (unlikely(SUBBUF_OFFSET(offset - commit_count, buf->chan)))
449 return;
450
b102c2b0 451 commit_seq_old = uatomic_read(&buf->commit_seq[idx]);
b73a4c47 452 while (commit_seq_old < commit_count)
b102c2b0 453 commit_seq_old = uatomic_cmpxchg(&buf->commit_seq[idx],
b73a4c47 454 commit_seq_old, commit_count);
1e8c9e7b
PMF
455
456 DBG("commit_seq for channel %s_%d, subbuf %ld is now %ld", buf->chan->channel_name, buf->cpu, idx, commit_count);
b73a4c47 457}
1e8c9e7b
PMF
458//ust// #else
459//ust// static __inline__ void ltt_write_commit_counter(struct ust_buffer *buf,
460//ust// long idx, long buf_offset, long commit_count, size_t data_size)
461//ust// {
462//ust// }
463//ust// #endif
b73a4c47
PMF
464
465/*
466 * Atomic unordered slot commit. Increments the commit count in the
467 * specified sub-buffer, and delivers it if necessary.
468 *
469 * Parameters:
470 *
471 * @ltt_channel : channel structure
472 * @transport_data: transport-specific data
473 * @buf_offset : offset following the event header.
474 * @data_size : size of the event data.
475 * @slot_size : size of the reserved slot.
476 */
477static __inline__ void ltt_commit_slot(
478 struct ust_channel *chan,
479 struct ust_buffer *buf, long buf_offset,
480 size_t data_size, size_t slot_size)
481{
482 long offset_end = buf_offset;
483 long endidx = SUBBUF_INDEX(offset_end - 1, chan);
484 long commit_count;
485
b73a4c47 486 smp_wmb();
6a843332 487
b102c2b0 488 uatomic_add(&buf->commit_count[endidx].cc, slot_size);
b73a4c47
PMF
489 /*
490 * commit count read can race with concurrent OOO commit count updates.
491 * This is only needed for ltt_check_deliver (for non-polling delivery
492 * only) and for ltt_write_commit_counter. The race can only cause the
493 * counter to be read with the same value more than once, which could
494 * cause :
495 * - Multiple delivery for the same sub-buffer (which is handled
496 * gracefully by the reader code) if the value is for a full
497 * sub-buffer. It's important that we can never miss a sub-buffer
b102c2b0 498 * delivery. Re-reading the value after the uatomic_add ensures this.
b73a4c47
PMF
499 * - Reading a commit_count with a higher value that what was actually
500 * added to it for the ltt_write_commit_counter call (again caused by
501 * a concurrent committer). It does not matter, because this function
502 * is interested in the fact that the commit count reaches back the
503 * reserve offset for a specific sub-buffer, which is completely
504 * independent of the order.
505 */
b102c2b0 506 commit_count = uatomic_read(&buf->commit_count[endidx].cc);
b73a4c47
PMF
507
508 ltt_check_deliver(chan, buf, offset_end - 1, commit_count, endidx);
509 /*
8c36d1ee 510 * Update data_size for each commit. It's needed only for extracting
b73a4c47
PMF
511 * ltt buffers from vmcore, after crash.
512 */
1e8c9e7b 513 ltt_write_commit_counter(chan, buf, endidx, buf_offset, commit_count, data_size);
b73a4c47
PMF
514}
515
516void _ust_buffers_write(struct ust_buffer *buf, size_t offset,
517 const void *src, size_t len, ssize_t cpy);
518
a2fd50ef 519static __inline__ int ust_buffers_write(struct ust_buffer *buf, size_t offset,
b73a4c47 520 const void *src, size_t len)
b5b073e2
PMF
521{
522 size_t cpy;
523 size_t buf_offset = BUFFER_OFFSET(offset, buf->chan);
524
525 assert(buf_offset < buf->chan->subbuf_size*buf->chan->subbuf_cnt);
526
527 cpy = min_t(size_t, len, buf->buf_size - buf_offset);
528 ust_buffers_do_copy(buf->buf_data + buf_offset, src, cpy);
b73a4c47 529
b5b073e2
PMF
530 if (unlikely(len != cpy))
531 _ust_buffers_write(buf, buf_offset, src, len, cpy);
532 return len;
533}
534
dc284811
PMF
535extern int ust_buffers_get_subbuf(struct ust_buffer *buf, long *consumed);
536extern int ust_buffers_put_subbuf(struct ust_buffer *buf, unsigned long uconsumed_old);
537
538extern void init_ustrelay_transport(void);
b5b073e2
PMF
539
540#endif /* _UST_BUFFERS_H */
This page took 0.051521 seconds and 4 git commands to generate.