fix futex
[lttv.git] / usertrace-fast / ltt / ltt-usertrace-fast.h
1
2 /* LTTng user-space "fast" tracing header
3 *
4 * Copyright 2006 Mathieu Desnoyers
5 *
6 */
7
8 #ifndef _LTT_USERTRACE_FAST_H
9 #define _LTT_USERTRACE_FAST_H
10
11 #ifdef LTT_TRACE
12
13 #include <errno.h>
14 #include <asm/atomic.h>
15 #include <pthread.h>
16 #include <stdint.h>
17 #include <syscall.h>
18 #include <linux/futex.h>
19 #include <asm/timex.h>
20
21 #include <ltt/ltt-facility-id-user_generic.h>
22 #include <ltt/ltt-generic.h>
23
24 #ifndef futex
25 static inline __attribute__((no_instrument_function))
26 _syscall6(long, futex, unsigned long, uaddr, int, op, int, val,
27 unsigned long, timeout, unsigned long, uaddr2, int, val2)
28 #endif //futex
29
30
31
32 #ifndef LTT_N_SUBBUFS
33 #define LTT_N_SUBBUFS 2
34 #endif //LTT_N_SUBBUFS
35
36 #ifndef LTT_SUBBUF_SIZE_PROCESS
37 #define LTT_SUBBUF_SIZE_PROCESS 1048576
38 #endif //LTT_BUF_SIZE_CPU
39
40 #define LTT_BUF_SIZE_PROCESS (LTT_SUBBUF_SIZE_PROCESS * LTT_N_SUBBUFS)
41
42 #ifndef LTT_USERTRACE_ROOT
43 #define LTT_USERTRACE_ROOT "/tmp/ltt-usertrace"
44 #endif //LTT_USERTRACE_ROOT
45
46
47 /* Buffer offset macros */
48
49 #define BUFFER_OFFSET(offset, buf) (offset & (buf->alloc_size-1))
50 #define SUBBUF_OFFSET(offset, buf) (offset & (buf->subbuf_size-1))
51 #define SUBBUF_ALIGN(offset, buf) \
52 (((offset) + buf->subbuf_size) & (~(buf->subbuf_size-1)))
53 #define SUBBUF_TRUNC(offset, buf) \
54 ((offset) & (~(buf->subbuf_size-1)))
55 #define SUBBUF_INDEX(offset, buf) \
56 (BUFFER_OFFSET(offset,buf)/buf->subbuf_size)
57
58
59 #define LTT_TRACER_MAGIC_NUMBER 0x00D6B7ED
60 #define LTT_TRACER_VERSION_MAJOR 0
61 #define LTT_TRACER_VERSION_MINOR 7
62
63 #ifndef atomic_cmpxchg
64 #define atomic_cmpxchg(v, old, new) ((int)cmpxchg(&((v)->counter), old, new))
65 #endif //atomic_cmpxchg
66
67 typedef unsigned int ltt_facility_t;
68
69 struct ltt_trace_header {
70 uint32_t magic_number;
71 uint32_t arch_type;
72 uint32_t arch_variant;
73 uint32_t float_word_order; /* Only useful for user space traces */
74 uint8_t arch_size;
75 //uint32_t system_type;
76 uint8_t major_version;
77 uint8_t minor_version;
78 uint8_t flight_recorder;
79 uint8_t has_heartbeat;
80 uint8_t has_alignment; /* Event header alignment */
81 uint32_t freq_scale;
82 uint64_t start_freq;
83 uint64_t start_tsc;
84 uint64_t start_monotonic;
85 uint64_t start_time_sec;
86 uint64_t start_time_usec;
87 } __attribute((packed));
88
89
90 struct ltt_block_start_header {
91 struct {
92 uint64_t cycle_count;
93 uint64_t freq; /* khz */
94 } begin;
95 struct {
96 uint64_t cycle_count;
97 uint64_t freq; /* khz */
98 } end;
99 uint32_t lost_size; /* Size unused at the end of the buffer */
100 uint32_t buf_size; /* The size of this sub-buffer */
101 struct ltt_trace_header trace;
102 } __attribute((packed));
103
104
105
106 struct ltt_buf {
107 void *start;
108 atomic_t offset;
109 atomic_t consumed;
110 atomic_t reserve_count[LTT_N_SUBBUFS];
111 atomic_t commit_count[LTT_N_SUBBUFS];
112
113 atomic_t events_lost;
114 atomic_t corrupted_subbuffers;
115 atomic_t writer_futex; /* futex on which the writer waits */
116 unsigned int alloc_size;
117 unsigned int subbuf_size;
118 };
119
120 struct ltt_trace_info {
121 int init;
122 int filter;
123 pid_t daemon_id;
124 int nesting;
125 struct {
126 struct ltt_buf process;
127 char process_buf[LTT_BUF_SIZE_PROCESS] __attribute__ ((aligned (8)));
128 } channel;
129 };
130
131
132 struct ltt_event_header_nohb {
133 uint64_t timestamp;
134 unsigned char facility_id;
135 unsigned char event_id;
136 uint16_t event_size;
137 } __attribute((packed));
138
139 extern __thread struct ltt_trace_info *thread_trace_info;
140
141 void ltt_thread_init(void);
142
143 void __attribute__((no_instrument_function))
144 ltt_usertrace_fast_buffer_switch(void);
145
146 /* Get the offset of the channel in the ltt_trace_struct */
147 #define GET_CHANNEL_INDEX(chan) \
148 (unsigned int)&((struct ltt_trace_info*)NULL)->channel.chan
149
150 /* ltt_get_index_from_facility
151 *
152 * Get channel index from facility and event id.
153 *
154 * @fID : facility ID
155 * @eID : event number
156 *
157 * Get the channel index into which events must be written for the given
158 * facility and event number. We get this structure offset as soon as possible
159 * and remember it so we pass through this logic only once per trace call (not
160 * for every trace).
161 */
162 static inline unsigned int __attribute__((no_instrument_function))
163 ltt_get_index_from_facility(ltt_facility_t fID,
164 uint8_t eID)
165 {
166 return GET_CHANNEL_INDEX(process);
167 }
168
169
170 static inline struct ltt_buf * __attribute__((no_instrument_function))
171 ltt_get_channel_from_index(
172 struct ltt_trace_info *trace, unsigned int index)
173 {
174 return (struct ltt_buf *)((void*)trace+index);
175 }
176
177
178 /*
179 * ltt_get_header_size
180 *
181 * Calculate alignment offset for arch size void*. This is the
182 * alignment offset of the event header.
183 *
184 * Important note :
185 * The event header must be a size multiple of the void* size. This is necessary
186 * to be able to calculate statically the alignment offset of the variable
187 * length data fields that follows. The total offset calculated here :
188 *
189 * Alignment of header struct on arch size
190 * + sizeof(header struct)
191 * + padding added to end of struct to align on arch size.
192 * */
193 static inline unsigned char __attribute__((no_instrument_function))
194 ltt_get_header_size(struct ltt_trace_info *trace,
195 void *address,
196 size_t *before_hdr_pad,
197 size_t *after_hdr_pad,
198 size_t *header_size)
199 {
200 unsigned int padding;
201 unsigned int header;
202
203 header = sizeof(struct ltt_event_header_nohb);
204
205 /* Padding before the header. Calculated dynamically */
206 *before_hdr_pad = ltt_align((unsigned long)address, header);
207 padding = *before_hdr_pad;
208
209 /* Padding after header, considering header aligned on ltt_align.
210 * Calculated statically if header size if known. */
211 *after_hdr_pad = ltt_align(header, sizeof(void*));
212 padding += *after_hdr_pad;
213
214 *header_size = header;
215
216 return header+padding;
217 }
218
219
220 /* ltt_write_event_header
221 *
222 * Writes the event header to the pointer.
223 *
224 * @channel : pointer to the channel structure
225 * @ptr : buffer pointer
226 * @fID : facility ID
227 * @eID : event ID
228 * @event_size : size of the event, excluding the event header.
229 * @offset : offset of the beginning of the header, for alignment.
230 * Calculated by ltt_get_event_header_size.
231 * @tsc : time stamp counter.
232 */
233 static inline void __attribute__((no_instrument_function))
234 ltt_write_event_header(
235 struct ltt_trace_info *trace, struct ltt_buf *buf,
236 void *ptr, ltt_facility_t fID, uint32_t eID, size_t event_size,
237 size_t offset, uint64_t tsc)
238 {
239 struct ltt_event_header_nohb *nohb;
240
241 event_size = min(event_size, 0xFFFFU);
242 nohb = (struct ltt_event_header_nohb *)(ptr+offset);
243 nohb->timestamp = (uint64_t)tsc;
244 nohb->facility_id = fID;
245 nohb->event_id = eID;
246 nohb->event_size = (uint16_t)event_size;
247 }
248
249
250
251 static inline uint64_t __attribute__((no_instrument_function))
252 ltt_get_timestamp()
253 {
254 return get_cycles();
255 }
256
257 static inline unsigned int __attribute__((no_instrument_function))
258 ltt_subbuf_header_len(struct ltt_buf *buf)
259 {
260 return sizeof(struct ltt_block_start_header);
261 }
262
263
264
265 static inline void __attribute__((no_instrument_function))
266 ltt_write_trace_header(struct ltt_trace_header *header)
267 {
268 header->magic_number = LTT_TRACER_MAGIC_NUMBER;
269 header->major_version = LTT_TRACER_VERSION_MAJOR;
270 header->minor_version = LTT_TRACER_VERSION_MINOR;
271 header->float_word_order = 0; //FIXME
272 header->arch_type = 0; //FIXME LTT_ARCH_TYPE;
273 header->arch_size = sizeof(void*);
274 header->arch_variant = 0; //FIXME LTT_ARCH_VARIANT;
275 header->flight_recorder = 0;
276 header->has_heartbeat = 0;
277
278 #ifndef LTT_PACK
279 header->has_alignment = sizeof(void*);
280 #else
281 header->has_alignment = 0;
282 #endif
283
284 //FIXME
285 header->freq_scale = 0;
286 header->start_freq = 0;
287 header->start_tsc = 0;
288 header->start_monotonic = 0;
289 header->start_time_sec = 0;
290 header->start_time_usec = 0;
291 }
292
293
294 static inline void __attribute__((no_instrument_function))
295 ltt_buffer_begin_callback(struct ltt_buf *buf,
296 uint64_t tsc, unsigned int subbuf_idx)
297 {
298 struct ltt_block_start_header *header =
299 (struct ltt_block_start_header*)
300 (buf->start + (subbuf_idx*buf->subbuf_size));
301
302 header->begin.cycle_count = tsc;
303 header->begin.freq = 0; //ltt_frequency();
304
305 header->lost_size = 0xFFFFFFFF; // for debugging...
306
307 header->buf_size = buf->subbuf_size;
308
309 ltt_write_trace_header(&header->trace);
310
311 }
312
313
314
315 static inline void __attribute__((no_instrument_function))
316 ltt_buffer_end_callback(struct ltt_buf *buf,
317 uint64_t tsc, unsigned int offset, unsigned int subbuf_idx)
318 {
319 struct ltt_block_start_header *header =
320 (struct ltt_block_start_header*)
321 (buf->start + (subbuf_idx*buf->subbuf_size));
322 /* offset is assumed to never be 0 here : never deliver a completely
323 * empty subbuffer. */
324 /* The lost size is between 0 and subbuf_size-1 */
325 header->lost_size = SUBBUF_OFFSET((buf->subbuf_size - offset),
326 buf);
327 header->end.cycle_count = tsc;
328 header->end.freq = 0; //ltt_frequency();
329 }
330
331
332 static inline void __attribute__((no_instrument_function))
333 ltt_deliver_callback(struct ltt_buf *buf,
334 unsigned subbuf_idx,
335 void *subbuf)
336 {
337 ltt_usertrace_fast_buffer_switch();
338 }
339
340
341 /* ltt_reserve_slot
342 *
343 * Atomic slot reservation in a LTTng buffer. It will take care of
344 * sub-buffer switching.
345 *
346 * Parameters:
347 *
348 * @trace : the trace structure to log to.
349 * @buf : the buffer to reserve space into.
350 * @data_size : size of the variable length data to log.
351 * @slot_size : pointer to total size of the slot (out)
352 * @tsc : pointer to the tsc at the slot reservation (out)
353 * @before_hdr_pad : dynamic padding before the event header.
354 * @after_hdr_pad : dynamic padding after the event header.
355 *
356 * Return : NULL if not enough space, else returns the pointer
357 * to the beginning of the reserved slot. */
358 static inline void * __attribute__((no_instrument_function)) ltt_reserve_slot(
359 struct ltt_trace_info *trace,
360 struct ltt_buf *ltt_buf,
361 unsigned int data_size,
362 unsigned int *slot_size,
363 uint64_t *tsc,
364 size_t *before_hdr_pad,
365 size_t *after_hdr_pad,
366 size_t *header_size)
367 {
368 int offset_begin, offset_end, offset_old;
369 //int has_switch;
370 int begin_switch, end_switch_current, end_switch_old;
371 int reserve_commit_diff = 0;
372 unsigned int size;
373 int consumed_old, consumed_new;
374 int commit_count, reserve_count;
375 int ret;
376
377 do {
378 offset_old = atomic_read(&ltt_buf->offset);
379 offset_begin = offset_old;
380 //has_switch = 0;
381 begin_switch = 0;
382 end_switch_current = 0;
383 end_switch_old = 0;
384 *tsc = ltt_get_timestamp();
385 if(*tsc == 0) {
386 /* Error in getting the timestamp, event lost */
387 atomic_inc(&ltt_buf->events_lost);
388 return NULL;
389 }
390
391 if(SUBBUF_OFFSET(offset_begin, ltt_buf) == 0) {
392 begin_switch = 1; /* For offset_begin */
393 } else {
394 size = ltt_get_header_size(trace, ltt_buf->start + offset_begin,
395 before_hdr_pad, after_hdr_pad, header_size)
396 + data_size;
397
398 if((SUBBUF_OFFSET(offset_begin, ltt_buf)+size)>ltt_buf->subbuf_size) {
399 //has_switch = 1;
400 end_switch_old = 1; /* For offset_old */
401 begin_switch = 1; /* For offset_begin */
402 }
403 }
404
405 if(begin_switch) {
406 if(end_switch_old) {
407 offset_begin = SUBBUF_ALIGN(offset_begin, ltt_buf);
408 }
409 offset_begin = offset_begin + ltt_subbuf_header_len(ltt_buf);
410 /* Test new buffer integrity */
411 reserve_commit_diff =
412 atomic_read(&ltt_buf->reserve_count[SUBBUF_INDEX(offset_begin,
413 ltt_buf)])
414 - atomic_read(&ltt_buf->commit_count[SUBBUF_INDEX(offset_begin,
415 ltt_buf)]);
416 if(reserve_commit_diff == 0) {
417 /* Next buffer not corrupted. */
418 //if((SUBBUF_TRUNC(offset_begin, ltt_buf)
419 // - SUBBUF_TRUNC(atomic_read(&ltt_buf->consumed), ltt_buf))
420 // >= ltt_buf->alloc_size) {
421 if(atomic_dec_return(&ltt_buf->writer_futex) >= 0) {
422 /* non contended */
423 } else {
424 /* We block until the reader unblocks us */
425 atomic_set(&ltt_buf->writer_futex, -1);
426 /* We block until the reader tells us to wake up.
427 Signals will simply cause this loop to restart.
428 */
429 do {
430 ret = futex((unsigned long)&ltt_buf->writer_futex,
431 FUTEX_WAIT, -1, 0, 0, 0);
432 } while(ret != 0 && ret != EWOULDBLOCK);
433 }
434 /* go on with the write */
435
436 //} else {
437 // /* next buffer not corrupted, we are either in overwrite mode or
438 // * the buffer is not full. It's safe to write in this new subbuffer.*/
439 //}
440 } else {
441 /* Next subbuffer corrupted. Force pushing reader even in normal
442 * mode. It's safe to write in this new subbuffer. */
443 }
444 size = ltt_get_header_size(trace, ltt_buf->start + offset_begin,
445 before_hdr_pad, after_hdr_pad, header_size) + data_size;
446 if((SUBBUF_OFFSET(offset_begin,ltt_buf)+size)>ltt_buf->subbuf_size) {
447 /* Event too big for subbuffers, report error, don't complete
448 * the sub-buffer switch. */
449 atomic_inc(&ltt_buf->events_lost);
450 return NULL;
451 } else {
452 /* We just made a successful buffer switch and the event fits in the
453 * new subbuffer. Let's write. */
454 }
455 } else {
456 /* Event fits in the current buffer and we are not on a switch boundary.
457 * It's safe to write */
458 }
459 offset_end = offset_begin + size;
460
461 if((SUBBUF_OFFSET(offset_end, ltt_buf)) == 0) {
462 /* The offset_end will fall at the very beginning of the next subbuffer.
463 */
464 end_switch_current = 1; /* For offset_begin */
465 }
466
467 } while(atomic_cmpxchg(&ltt_buf->offset, offset_old, offset_end)
468 != offset_old);
469
470
471 /* Push the reader if necessary */
472 do {
473 consumed_old = atomic_read(&ltt_buf->consumed);
474 /* If buffer is in overwrite mode, push the reader consumed count if
475 the write position has reached it and we are not at the first
476 iteration (don't push the reader farther than the writer).
477 This operation can be done concurrently by many writers in the
478 same buffer, the writer being at the fartest write position sub-buffer
479 index in the buffer being the one which will win this loop. */
480 /* If the buffer is not in overwrite mode, pushing the reader only
481 happen if a sub-buffer is corrupted */
482 if((SUBBUF_TRUNC(offset_end, ltt_buf)
483 - SUBBUF_TRUNC(consumed_old, ltt_buf))
484 >= ltt_buf->alloc_size)
485 consumed_new = SUBBUF_ALIGN(consumed_old, ltt_buf);
486 else {
487 consumed_new = consumed_old;
488 break;
489 }
490 } while(atomic_cmpxchg(&ltt_buf->consumed, consumed_old, consumed_new)
491 != consumed_old);
492
493 if(consumed_old != consumed_new) {
494 /* Reader pushed : we are the winner of the push, we can therefore
495 reequilibrate reserve and commit. Atomic increment of the commit
496 count permits other writers to play around with this variable
497 before us. We keep track of corrupted_subbuffers even in overwrite mode :
498 we never want to write over a non completely committed sub-buffer :
499 possible causes : the buffer size is too low compared to the unordered
500 data input, or there is a writer who died between the reserve and the
501 commit. */
502 if(reserve_commit_diff) {
503 /* We have to alter the sub-buffer commit count : a sub-buffer is
504 corrupted. We do not deliver it. */
505 atomic_add(reserve_commit_diff,
506 &ltt_buf->commit_count[SUBBUF_INDEX(offset_begin, ltt_buf)]);
507 atomic_inc(&ltt_buf->corrupted_subbuffers);
508 }
509 }
510
511
512 if(end_switch_old) {
513 /* old subbuffer */
514 /* Concurrency safe because we are the last and only thread to alter this
515 sub-buffer. As long as it is not delivered and read, no other thread can
516 alter the offset, alter the reserve_count or call the
517 client_buffer_end_callback on this sub-buffer.
518 The only remaining threads could be the ones with pending commits. They
519 will have to do the deliver themself.
520 Not concurrency safe in overwrite mode. We detect corrupted subbuffers
521 with commit and reserve counts. We keep a corrupted sub-buffers count
522 and push the readers across these sub-buffers.
523 Not concurrency safe if a writer is stalled in a subbuffer and
524 another writer switches in, finding out it's corrupted. The result will
525 be than the old (uncommited) subbuffer will be declared corrupted, and
526 that the new subbuffer will be declared corrupted too because of the
527 commit count adjustment.
528 Note : offset_old should never be 0 here.*/
529 ltt_buffer_end_callback(ltt_buf, *tsc, offset_old,
530 SUBBUF_INDEX((offset_old-1), ltt_buf));
531 /* Setting this reserve_count will allow the sub-buffer to be delivered by
532 the last committer. */
533 reserve_count =
534 atomic_add_return((SUBBUF_OFFSET((offset_old-1), ltt_buf)+1),
535 &ltt_buf->reserve_count[SUBBUF_INDEX((offset_old-1), ltt_buf)]);
536 if(reserve_count
537 == atomic_read(&ltt_buf->commit_count[SUBBUF_INDEX((offset_old-1),
538 ltt_buf)])) {
539 ltt_deliver_callback(ltt_buf, SUBBUF_INDEX((offset_old-1), ltt_buf),
540 NULL);
541 }
542 }
543
544 if(begin_switch) {
545 /* New sub-buffer */
546 /* This code can be executed unordered : writers may already have written
547 to the sub-buffer before this code gets executed, caution. */
548 /* The commit makes sure that this code is executed before the deliver
549 of this sub-buffer */
550 ltt_buffer_begin_callback(ltt_buf, *tsc, SUBBUF_INDEX(offset_begin, ltt_buf));
551 commit_count = atomic_add_return(ltt_subbuf_header_len(ltt_buf),
552 &ltt_buf->commit_count[SUBBUF_INDEX(offset_begin, ltt_buf)]);
553 /* Check if the written buffer has to be delivered */
554 if(commit_count
555 == atomic_read(&ltt_buf->reserve_count[SUBBUF_INDEX(offset_begin,
556 ltt_buf)])) {
557 ltt_deliver_callback(ltt_buf, SUBBUF_INDEX(offset_begin, ltt_buf), NULL);
558 }
559 }
560
561 if(end_switch_current) {
562 /* current subbuffer */
563 /* Concurrency safe because we are the last and only thread to alter this
564 sub-buffer. As long as it is not delivered and read, no other thread can
565 alter the offset, alter the reserve_count or call the
566 client_buffer_end_callback on this sub-buffer.
567 The only remaining threads could be the ones with pending commits. They
568 will have to do the deliver themself.
569 Not concurrency safe in overwrite mode. We detect corrupted subbuffers
570 with commit and reserve counts. We keep a corrupted sub-buffers count
571 and push the readers across these sub-buffers.
572 Not concurrency safe if a writer is stalled in a subbuffer and
573 another writer switches in, finding out it's corrupted. The result will
574 be than the old (uncommited) subbuffer will be declared corrupted, and
575 that the new subbuffer will be declared corrupted too because of the
576 commit count adjustment. */
577 ltt_buffer_end_callback(ltt_buf, *tsc, offset_end,
578 SUBBUF_INDEX((offset_end-1), ltt_buf));
579 /* Setting this reserve_count will allow the sub-buffer to be delivered by
580 the last committer. */
581 reserve_count =
582 atomic_add_return((SUBBUF_OFFSET((offset_end-1), ltt_buf)+1),
583 &ltt_buf->reserve_count[SUBBUF_INDEX((offset_end-1), ltt_buf)]);
584 if(reserve_count
585 == atomic_read(&ltt_buf->commit_count[SUBBUF_INDEX((offset_end-1),
586 ltt_buf)])) {
587 ltt_deliver_callback(ltt_buf, SUBBUF_INDEX((offset_end-1), ltt_buf), NULL);
588 }
589 }
590
591 *slot_size = size;
592
593 //BUG_ON(*slot_size != (data_size + *before_hdr_pad + *after_hdr_pad + *header_size));
594 //BUG_ON(*slot_size != (offset_end - offset_begin));
595
596 return ltt_buf->start + BUFFER_OFFSET(offset_begin, ltt_buf);
597 }
598
599
600 /* ltt_commit_slot
601 *
602 * Atomic unordered slot commit. Increments the commit count in the
603 * specified sub-buffer, and delivers it if necessary.
604 *
605 * Parameters:
606 *
607 * @buf : the buffer to commit to.
608 * @reserved : address of the beginnig of the reserved slot.
609 * @slot_size : size of the reserved slot.
610 *
611 */
612 static inline void __attribute__((no_instrument_function)) ltt_commit_slot(
613 struct ltt_buf *ltt_buf,
614 void *reserved,
615 unsigned int slot_size)
616 {
617 unsigned int offset_begin = reserved - ltt_buf->start;
618 int commit_count;
619
620 commit_count = atomic_add_return(slot_size,
621 &ltt_buf->commit_count[SUBBUF_INDEX(offset_begin,
622 ltt_buf)]);
623
624 /* Check if all commits have been done */
625 if(commit_count ==
626 atomic_read(&ltt_buf->reserve_count[SUBBUF_INDEX(offset_begin, ltt_buf)])) {
627 ltt_deliver_callback(ltt_buf, SUBBUF_INDEX(offset_begin, ltt_buf), NULL);
628 }
629 }
630
631
632 #endif //LTT_TRACE
633
634
635 #endif //_LTT_USERTRACE_FAST_H
This page took 0.045357 seconds and 4 git commands to generate.