don't close file descriptors twice
[ust.git] / libust / buffers.h
CommitLineData
b5b073e2
PMF
1/*
2 * buffers.h
3 *
4 * Copyright (C) 2009 - Pierre-Marc Fournier (pierre-marc dot fournier at polymtl dot ca)
5 * Copyright (C) 2008 - Mathieu Desnoyers (mathieu.desnoyers@polymtl.ca)
6 *
7 */
8
9#ifndef _UST_BUFFERS_H
10#define _UST_BUFFERS_H
11
12#include <kcompat/kref.h>
13#include <assert.h>
14#include "channels.h"
b73a4c47
PMF
15#include "tracerconst.h"
16#include "tracercore.h"
17#include "header-inline.h"
18#include <usterr.h>
b5b073e2 19
b73a4c47 20/***** SHOULD BE REMOVED ***** */
b5b073e2
PMF
21
22/*
23 * BUFFER_TRUNC zeroes the subbuffer offset and the subbuffer number parts of
24 * the offset, which leaves only the buffer number.
25 */
26#define BUFFER_TRUNC(offset, chan) \
27 ((offset) & (~((chan)->alloc_size-1)))
28#define BUFFER_OFFSET(offset, chan) ((offset) & ((chan)->alloc_size - 1))
29#define SUBBUF_OFFSET(offset, chan) ((offset) & ((chan)->subbuf_size - 1))
30#define SUBBUF_ALIGN(offset, chan) \
31 (((offset) + (chan)->subbuf_size) & (~((chan)->subbuf_size - 1)))
32#define SUBBUF_TRUNC(offset, chan) \
33 ((offset) & (~((chan)->subbuf_size - 1)))
34#define SUBBUF_INDEX(offset, chan) \
35 (BUFFER_OFFSET((offset), chan) >> (chan)->subbuf_size_order)
36
37/*
38 * Tracks changes to rchan/rchan_buf structs
39 */
40#define UST_CHANNEL_VERSION 8
41
b73a4c47
PMF
42/**************************************/
43
44struct commit_counters {
b102c2b0
PMF
45 long cc; /* ATOMIC */
46 long cc_sb; /* ATOMIC - Incremented _once_ at sb switch */
b73a4c47
PMF
47};
48
b5b073e2
PMF
49struct ust_buffer {
50 /* First 32 bytes cache-hot cacheline */
b102c2b0 51 long offset; /* Current offset in the buffer *atomic* */
b73a4c47 52 struct commit_counters *commit_count; /* Commit count per sub-buffer */
b102c2b0 53 long consumed; /* Current offset in the buffer *atomic* access (shared) */
b5b073e2
PMF
54 unsigned long last_tsc; /*
55 * Last timestamp written in the buffer.
56 */
57 /* End of first 32 bytes cacheline */
b102c2b0
PMF
58 long active_readers; /* ATOMIC - Active readers count standard atomic access (shared) */
59 long events_lost; /* ATOMIC */
60 long corrupted_subbuffers; /* *ATOMIC* */
b5b073e2
PMF
61 /* one byte is written to this pipe when data is available, in order
62 to wake the consumer */
63 /* portability: Single byte writes must be as quick as possible. The kernel-side
64 buffer must be large enough so the writer doesn't block. From the pipe(7)
65 man page: Since linux 2.6.11, the pipe capacity is 65536 bytes. */
66 int data_ready_fd_write;
67 /* the reading end of the pipe */
68 int data_ready_fd_read;
69
b73a4c47
PMF
70 unsigned int finalized;
71//ust// struct timer_list switch_timer; /* timer for periodical switch */
72 unsigned long switch_timer_interval; /* 0 = unset */
73
b5b073e2 74 struct ust_channel *chan;
b73a4c47 75
b5b073e2
PMF
76 struct kref kref;
77 void *buf_data;
78 size_t buf_size;
79 int shmid;
204141ee 80 unsigned int cpu;
b5b073e2
PMF
81
82 /* commit count per subbuffer; must be at end of struct */
b102c2b0 83 long commit_seq[0] ____cacheline_aligned; /* ATOMIC */
b5b073e2
PMF
84} ____cacheline_aligned;
85
b5b073e2 86/*
b73a4c47
PMF
87 * A switch is done during tracing or as a final flush after tracing (so it
88 * won't write in the new sub-buffer).
89 * FIXME: make this message clearer
b5b073e2 90 */
b73a4c47
PMF
91enum force_switch_mode { FORCE_ACTIVE, FORCE_FLUSH };
92
93extern int ltt_reserve_slot_lockless_slow(struct ust_trace *trace,
94 struct ust_channel *ltt_channel, void **transport_data,
95 size_t data_size, size_t *slot_size, long *buf_offset, u64 *tsc,
96 unsigned int *rflags, int largest_align, int cpu);
97
98extern void ltt_force_switch_lockless_slow(struct ust_buffer *buf,
99 enum force_switch_mode mode);
100
b5b073e2 101
a2fd50ef 102static __inline__ void ust_buffers_do_copy(void *dest, const void *src, size_t len)
b5b073e2
PMF
103{
104 union {
105 const void *src;
106 const u8 *src8;
107 const u16 *src16;
108 const u32 *src32;
109 const u64 *src64;
110 } u = { .src = src };
111
112 switch (len) {
b73a4c47
PMF
113 case 0: break;
114 case 1: *(u8 *)dest = *u.src8;
b5b073e2 115 break;
b73a4c47 116 case 2: *(u16 *)dest = *u.src16;
b5b073e2 117 break;
b73a4c47 118 case 4: *(u32 *)dest = *u.src32;
b5b073e2 119 break;
b73a4c47 120 case 8: *(u64 *)dest = *u.src64;
b5b073e2
PMF
121 break;
122 default:
123 memcpy(dest, src, len);
124 }
125}
126
b73a4c47
PMF
127static __inline__ void *ust_buffers_offset_address(struct ust_buffer *buf, size_t offset)
128{
129 return ((char *)buf->buf_data)+offset;
130}
131
132/*
133 * Last TSC comparison functions. Check if the current TSC overflows
134 * LTT_TSC_BITS bits from the last TSC read. Reads and writes last_tsc
135 * atomically.
136 */
137
138/* FIXME: does this test work properly? */
139#if (BITS_PER_LONG == 32)
140static __inline__ void save_last_tsc(struct ust_buffer *ltt_buf,
141 u64 tsc)
142{
143 ltt_buf->last_tsc = (unsigned long)(tsc >> LTT_TSC_BITS);
144}
145
146static __inline__ int last_tsc_overflow(struct ust_buffer *ltt_buf,
147 u64 tsc)
148{
149 unsigned long tsc_shifted = (unsigned long)(tsc >> LTT_TSC_BITS);
150
151 if (unlikely((tsc_shifted - ltt_buf->last_tsc)))
152 return 1;
153 else
154 return 0;
155}
156#else
157static __inline__ void save_last_tsc(struct ust_buffer *ltt_buf,
158 u64 tsc)
159{
160 ltt_buf->last_tsc = (unsigned long)tsc;
161}
162
163static __inline__ int last_tsc_overflow(struct ust_buffer *ltt_buf,
164 u64 tsc)
165{
166 if (unlikely((tsc - ltt_buf->last_tsc) >> LTT_TSC_BITS))
167 return 1;
168 else
169 return 0;
170}
171#endif
172
173static __inline__ void ltt_reserve_push_reader(
174 struct ust_channel *rchan,
175 struct ust_buffer *buf,
176 long offset)
177{
178 long consumed_old, consumed_new;
179
180 do {
b102c2b0 181 consumed_old = uatomic_read(&buf->consumed);
b73a4c47
PMF
182 /*
183 * If buffer is in overwrite mode, push the reader consumed
184 * count if the write position has reached it and we are not
185 * at the first iteration (don't push the reader farther than
186 * the writer). This operation can be done concurrently by many
187 * writers in the same buffer, the writer being at the farthest
188 * write position sub-buffer index in the buffer being the one
189 * which will win this loop.
190 * If the buffer is not in overwrite mode, pushing the reader
191 * only happens if a sub-buffer is corrupted.
192 */
193 if (unlikely((SUBBUF_TRUNC(offset, buf->chan)
194 - SUBBUF_TRUNC(consumed_old, buf->chan))
195 >= rchan->alloc_size))
196 consumed_new = SUBBUF_ALIGN(consumed_old, buf->chan);
197 else
198 return;
b102c2b0 199 } while (unlikely(uatomic_cmpxchg(&buf->consumed, consumed_old,
b73a4c47
PMF
200 consumed_new) != consumed_old));
201}
202
203static __inline__ void ltt_vmcore_check_deliver(
204 struct ust_buffer *buf,
205 long commit_count, long idx)
206{
b102c2b0 207 uatomic_set(&buf->commit_seq[idx], commit_count);
b73a4c47
PMF
208}
209
210static __inline__ void ltt_check_deliver(struct ust_channel *chan,
211 struct ust_buffer *buf,
212 long offset, long commit_count, long idx)
213{
214 long old_commit_count = commit_count - chan->subbuf_size;
215
216 /* Check if all commits have been done */
217 if (unlikely((BUFFER_TRUNC(offset, chan)
218 >> chan->n_subbufs_order)
219 - (old_commit_count
220 & chan->commit_count_mask) == 0)) {
221 /*
222 * If we succeeded in updating the cc_sb, we are delivering
223 * the subbuffer. Deals with concurrent updates of the "cc"
224 * value without adding a add_return atomic operation to the
225 * fast path.
226 */
b102c2b0 227 if (likely(uatomic_cmpxchg(&buf->commit_count[idx].cc_sb,
b73a4c47
PMF
228 old_commit_count, commit_count)
229 == old_commit_count)) {
230 int result;
231
232 /*
233 * Set noref flag for this subbuffer.
234 */
235//ust// ltt_set_noref_flag(rchan, buf, idx);
236 ltt_vmcore_check_deliver(buf, commit_count, idx);
237
238 /* wakeup consumer */
239 result = write(buf->data_ready_fd_write, "1", 1);
240 if(result == -1) {
241 PERROR("write (in ltt_relay_buffer_flush)");
242 ERR("this should never happen!");
243 }
244 }
245 }
246}
247
248static __inline__ int ltt_poll_deliver(struct ust_channel *chan, struct ust_buffer *buf)
249{
250 long consumed_old, consumed_idx, commit_count, write_offset;
251
b102c2b0 252 consumed_old = uatomic_read(&buf->consumed);
b73a4c47 253 consumed_idx = SUBBUF_INDEX(consumed_old, buf->chan);
b102c2b0 254 commit_count = uatomic_read(&buf->commit_count[consumed_idx].cc_sb);
b73a4c47
PMF
255 /*
256 * No memory barrier here, since we are only interested
257 * in a statistically correct polling result. The next poll will
258 * get the data is we are racing. The mb() that ensures correct
259 * memory order is in get_subbuf.
260 */
b102c2b0 261 write_offset = uatomic_read(&buf->offset);
b73a4c47
PMF
262
263 /*
264 * Check that the subbuffer we are trying to consume has been
265 * already fully committed.
266 */
267
268 if (((commit_count - chan->subbuf_size)
269 & chan->commit_count_mask)
270 - (BUFFER_TRUNC(consumed_old, buf->chan)
271 >> chan->n_subbufs_order)
272 != 0)
273 return 0;
274
275 /*
276 * Check that we are not about to read the same subbuffer in
277 * which the writer head is.
278 */
279 if ((SUBBUF_TRUNC(write_offset, buf->chan)
280 - SUBBUF_TRUNC(consumed_old, buf->chan))
281 == 0)
282 return 0;
283
284 return 1;
285
286}
287
288/*
289 * returns 0 if reserve ok, or 1 if the slow path must be taken.
290 */
291static __inline__ int ltt_relay_try_reserve(
292 struct ust_channel *chan,
293 struct ust_buffer *buf,
294 size_t data_size,
295 u64 *tsc, unsigned int *rflags, int largest_align,
296 long *o_begin, long *o_end, long *o_old,
297 size_t *before_hdr_pad, size_t *size)
298{
b102c2b0 299 *o_begin = uatomic_read(&buf->offset);
b73a4c47
PMF
300 *o_old = *o_begin;
301
302 *tsc = trace_clock_read64();
303
304//ust// #ifdef CONFIG_LTT_VMCORE
305//ust// prefetch(&buf->commit_count[SUBBUF_INDEX(*o_begin, rchan)]);
306//ust// prefetch(&buf->commit_seq[SUBBUF_INDEX(*o_begin, rchan)]);
307//ust// #else
308//ust// prefetchw(&buf->commit_count[SUBBUF_INDEX(*o_begin, rchan)]);
309//ust// #endif
310 if (last_tsc_overflow(buf, *tsc))
311 *rflags = LTT_RFLAG_ID_SIZE_TSC;
312
313 if (unlikely(SUBBUF_OFFSET(*o_begin, buf->chan) == 0))
314 return 1;
315
316 *size = ust_get_header_size(chan,
317 *o_begin, data_size,
318 before_hdr_pad, *rflags);
319 *size += ltt_align(*o_begin + *size, largest_align) + data_size;
320 if (unlikely((SUBBUF_OFFSET(*o_begin, buf->chan) + *size)
321 > buf->chan->subbuf_size))
322 return 1;
323
324 /*
325 * Event fits in the current buffer and we are not on a switch
326 * boundary. It's safe to write.
327 */
328 *o_end = *o_begin + *size;
329
330 if (unlikely((SUBBUF_OFFSET(*o_end, buf->chan)) == 0))
331 /*
332 * The offset_end will fall at the very beginning of the next
333 * subbuffer.
334 */
335 return 1;
336
337 return 0;
338}
339
340static __inline__ int ltt_reserve_slot(struct ust_trace *trace,
341 struct ust_channel *chan, void **transport_data,
342 size_t data_size, size_t *slot_size, long *buf_offset, u64 *tsc,
343 unsigned int *rflags, int largest_align, int cpu)
344{
345 struct ust_buffer *buf = chan->buf[cpu];
346 long o_begin, o_end, o_old;
347 size_t before_hdr_pad;
348
349 /*
350 * Perform retryable operations.
351 */
352 /* FIXME: make this rellay per cpu? */
015d08b6 353 if (unlikely(LOAD_SHARED(ltt_nesting) > 4)) {
e5bc3b0f 354 DBG("Dropping event because nesting is too deep.");
b102c2b0 355 uatomic_inc(&buf->events_lost);
b73a4c47
PMF
356 return -EPERM;
357 }
358
359 if (unlikely(ltt_relay_try_reserve(chan, buf,
360 data_size, tsc, rflags,
361 largest_align, &o_begin, &o_end, &o_old,
362 &before_hdr_pad, slot_size)))
363 goto slow_path;
364
b102c2b0 365 if (unlikely(uatomic_cmpxchg(&buf->offset, o_old, o_end) != o_old))
b73a4c47
PMF
366 goto slow_path;
367
368 /*
369 * Atomically update last_tsc. This update races against concurrent
370 * atomic updates, but the race will always cause supplementary full TSC
371 * events, never the opposite (missing a full TSC event when it would be
372 * needed).
373 */
374 save_last_tsc(buf, *tsc);
375
376 /*
377 * Push the reader if necessary
378 */
379 ltt_reserve_push_reader(chan, buf, o_end - 1);
380
381 /*
382 * Clear noref flag for this subbuffer.
383 */
384//ust// ltt_clear_noref_flag(chan, buf, SUBBUF_INDEX(o_end - 1, chan));
385
386 *buf_offset = o_begin + before_hdr_pad;
387 return 0;
388slow_path:
389 return ltt_reserve_slot_lockless_slow(trace, chan,
390 transport_data, data_size, slot_size, buf_offset, tsc,
391 rflags, largest_align, cpu);
392}
393
394/*
395 * Force a sub-buffer switch for a per-cpu buffer. This operation is
396 * completely reentrant : can be called while tracing is active with
397 * absolutely no lock held.
b73a4c47
PMF
398 */
399static __inline__ void ltt_force_switch(struct ust_buffer *buf,
400 enum force_switch_mode mode)
401{
402 return ltt_force_switch_lockless_slow(buf, mode);
403}
404
405/*
406 * for flight recording. must be called after relay_commit.
8c36d1ee
PMF
407 * This function increments the subbuffers's commit_seq counter each time the
408 * commit count reaches back the reserve offset (module subbuffer size). It is
409 * useful for crash dump.
b73a4c47 410 */
1e8c9e7b
PMF
411//ust// #ifdef CONFIG_LTT_VMCORE
412static __inline__ void ltt_write_commit_counter(struct ust_channel *chan,
413 struct ust_buffer *buf, long idx, long buf_offset,
414 long commit_count, size_t data_size)
b73a4c47
PMF
415{
416 long offset;
417 long commit_seq_old;
418
419 offset = buf_offset + data_size;
420
421 /*
422 * SUBBUF_OFFSET includes commit_count_mask. We can simply
423 * compare the offsets within the subbuffer without caring about
424 * buffer full/empty mismatch because offset is never zero here
425 * (subbuffer header and event headers have non-zero length).
426 */
427 if (unlikely(SUBBUF_OFFSET(offset - commit_count, buf->chan)))
428 return;
429
b102c2b0 430 commit_seq_old = uatomic_read(&buf->commit_seq[idx]);
b73a4c47 431 while (commit_seq_old < commit_count)
b102c2b0 432 commit_seq_old = uatomic_cmpxchg(&buf->commit_seq[idx],
b73a4c47 433 commit_seq_old, commit_count);
1e8c9e7b
PMF
434
435 DBG("commit_seq for channel %s_%d, subbuf %ld is now %ld", buf->chan->channel_name, buf->cpu, idx, commit_count);
b73a4c47 436}
1e8c9e7b
PMF
437//ust// #else
438//ust// static __inline__ void ltt_write_commit_counter(struct ust_buffer *buf,
439//ust// long idx, long buf_offset, long commit_count, size_t data_size)
440//ust// {
441//ust// }
442//ust// #endif
b73a4c47
PMF
443
444/*
445 * Atomic unordered slot commit. Increments the commit count in the
446 * specified sub-buffer, and delivers it if necessary.
447 *
448 * Parameters:
449 *
450 * @ltt_channel : channel structure
451 * @transport_data: transport-specific data
452 * @buf_offset : offset following the event header.
453 * @data_size : size of the event data.
454 * @slot_size : size of the reserved slot.
455 */
456static __inline__ void ltt_commit_slot(
457 struct ust_channel *chan,
458 struct ust_buffer *buf, long buf_offset,
459 size_t data_size, size_t slot_size)
460{
461 long offset_end = buf_offset;
462 long endidx = SUBBUF_INDEX(offset_end - 1, chan);
463 long commit_count;
464
465#ifdef LTT_NO_IPI_BARRIER
466 smp_wmb();
467#else
468 /*
469 * Must write slot data before incrementing commit count.
470 * This compiler barrier is upgraded into a smp_mb() by the IPI
471 * sent by get_subbuf().
472 */
473 barrier();
474#endif
b102c2b0 475 uatomic_add(&buf->commit_count[endidx].cc, slot_size);
b73a4c47
PMF
476 /*
477 * commit count read can race with concurrent OOO commit count updates.
478 * This is only needed for ltt_check_deliver (for non-polling delivery
479 * only) and for ltt_write_commit_counter. The race can only cause the
480 * counter to be read with the same value more than once, which could
481 * cause :
482 * - Multiple delivery for the same sub-buffer (which is handled
483 * gracefully by the reader code) if the value is for a full
484 * sub-buffer. It's important that we can never miss a sub-buffer
b102c2b0 485 * delivery. Re-reading the value after the uatomic_add ensures this.
b73a4c47
PMF
486 * - Reading a commit_count with a higher value that what was actually
487 * added to it for the ltt_write_commit_counter call (again caused by
488 * a concurrent committer). It does not matter, because this function
489 * is interested in the fact that the commit count reaches back the
490 * reserve offset for a specific sub-buffer, which is completely
491 * independent of the order.
492 */
b102c2b0 493 commit_count = uatomic_read(&buf->commit_count[endidx].cc);
b73a4c47
PMF
494
495 ltt_check_deliver(chan, buf, offset_end - 1, commit_count, endidx);
496 /*
8c36d1ee 497 * Update data_size for each commit. It's needed only for extracting
b73a4c47
PMF
498 * ltt buffers from vmcore, after crash.
499 */
1e8c9e7b 500 ltt_write_commit_counter(chan, buf, endidx, buf_offset, commit_count, data_size);
b73a4c47
PMF
501}
502
503void _ust_buffers_write(struct ust_buffer *buf, size_t offset,
504 const void *src, size_t len, ssize_t cpy);
505
a2fd50ef 506static __inline__ int ust_buffers_write(struct ust_buffer *buf, size_t offset,
b73a4c47 507 const void *src, size_t len)
b5b073e2
PMF
508{
509 size_t cpy;
510 size_t buf_offset = BUFFER_OFFSET(offset, buf->chan);
511
512 assert(buf_offset < buf->chan->subbuf_size*buf->chan->subbuf_cnt);
513
514 cpy = min_t(size_t, len, buf->buf_size - buf_offset);
515 ust_buffers_do_copy(buf->buf_data + buf_offset, src, cpy);
b73a4c47 516
b5b073e2
PMF
517 if (unlikely(len != cpy))
518 _ust_buffers_write(buf, buf_offset, src, len, cpy);
519 return len;
520}
521
b73a4c47
PMF
522int ust_buffers_get_subbuf(struct ust_buffer *buf, long *consumed);
523int ust_buffers_put_subbuf(struct ust_buffer *buf, unsigned long uconsumed_old);
b5b073e2
PMF
524
525#endif /* _UST_BUFFERS_H */
This page took 0.050906 seconds and 4 git commands to generate.