3 * LTTng userspace tracer buffering system
5 * Copyright (C) 2009 - Pierre-Marc Fournier (pierre-marc dot fournier at polymtl dot ca)
6 * Copyright (C) 2008 - Mathieu Desnoyers (mathieu.desnoyers@polymtl.ca)
8 * This library is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License as published by the Free Software Foundation; either
11 * version 2.1 of the License, or (at your option) any later version.
13 * This library is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Lesser General Public License for more details.
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with this library; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
27 #include <ust/kernelcompat.h>
28 #include <kcompat/kref.h>
32 #include "tracercore.h"
35 static DEFINE_MUTEX(ust_buffers_channels_mutex
);
36 static LIST_HEAD(ust_buffers_channels
);
38 static int ust_buffers_init_buffer(struct ltt_trace_struct
*trace
,
39 struct ust_channel
*ltt_chan
,
40 struct ust_buffer
*buf
,
41 unsigned int n_subbufs
);
43 static int ust_buffers_alloc_buf(struct ust_buffer
*buf
, size_t *size
)
48 *size
= PAGE_ALIGN(*size
);
50 result
= buf
->shmid
= shmget(getpid(), *size
, IPC_CREAT
| IPC_EXCL
| 0700);
51 if(result
== -1 && errno
== EINVAL
) {
52 ERR("shmget() returned EINVAL; maybe /proc/sys/kernel/shmmax should be increased.");
55 else if(result
== -1) {
60 ptr
= shmat(buf
->shmid
, NULL
, 0);
61 if(ptr
== (void *) -1) {
66 /* Already mark the shared memory for destruction. This will occur only
67 * when all users have detached.
69 result
= shmctl(buf
->shmid
, IPC_RMID
, NULL
);
76 buf
->buf_size
= *size
;
81 result
= shmctl(buf
->shmid
, IPC_RMID
, NULL
);
89 static struct ust_buffer
*ust_buffers_create_buf(struct ust_channel
*channel
)
93 result
= ust_buffers_alloc_buf(channel
->buf
, &channel
->alloc_size
);
97 ((struct ust_buffer
*)channel
->buf
)->chan
= channel
;
98 kref_get(&channel
->kref
);
105 static void ust_buffers_destroy_channel(struct kref
*kref
)
107 struct ust_channel
*chan
= container_of(kref
, struct ust_channel
, kref
);
111 static void ust_buffers_destroy_buf(struct ust_buffer
*buf
)
113 struct ust_channel
*chan
= buf
->chan
;
116 result
= munmap(buf
->buf_data
, buf
->buf_size
);
122 kref_put(&chan
->kref
, ust_buffers_destroy_channel
);
125 /* called from kref_put */
126 static void ust_buffers_remove_buf(struct kref
*kref
)
128 struct ust_buffer
*buf
= container_of(kref
, struct ust_buffer
, kref
);
129 ust_buffers_destroy_buf(buf
);
132 static struct ust_buffer
*ust_buffers_open_buf(struct ust_channel
*chan
)
134 struct ust_buffer
*buf
= NULL
;
137 buf
= ust_buffers_create_buf(chan
);
141 kref_init(&buf
->kref
);
143 err
= ust_buffers_init_buffer(chan
->trace
, chan
, buf
, chan
->subbuf_cnt
);
150 /* FIXME: decrementally destroy on error? */
154 * ust_buffers_close_buf - close a channel buffer
157 static void ust_buffers_close_buf(struct ust_buffer
*buf
)
159 kref_put(&buf
->kref
, ust_buffers_remove_buf
);
162 int ust_buffers_channel_open(struct ust_channel
*chan
, size_t subbuf_size
, size_t subbuf_cnt
)
164 if(subbuf_size
== 0 || subbuf_cnt
== 0)
167 chan
->version
= UST_CHANNEL_VERSION
;
168 chan
->subbuf_cnt
= subbuf_cnt
;
169 chan
->subbuf_size
= subbuf_size
;
170 chan
->subbuf_size_order
= get_count_order(subbuf_size
);
171 chan
->alloc_size
= FIX_SIZE(subbuf_size
* subbuf_cnt
);
172 kref_init(&chan
->kref
);
174 mutex_lock(&ust_buffers_channels_mutex
);
175 chan
->buf
= ust_buffers_open_buf(chan
);
178 list_add(&chan
->list
, &ust_buffers_channels
);
179 mutex_unlock(&ust_buffers_channels_mutex
);
184 kref_put(&chan
->kref
, ust_buffers_destroy_channel
);
185 mutex_unlock(&ust_buffers_channels_mutex
);
189 void ust_buffers_channel_close(struct ust_channel
*chan
)
194 mutex_lock(&ust_buffers_channels_mutex
);
196 ust_buffers_close_buf(chan
->buf
);
198 list_del(&chan
->list
);
199 kref_put(&chan
->kref
, ust_buffers_destroy_channel
);
200 mutex_unlock(&ust_buffers_channels_mutex
);
203 /* _ust_buffers_write()
205 * @buf: destination buffer
206 * @offset: offset in destination
207 * @src: source buffer
208 * @len: length of source
209 * @cpy: already copied
212 void _ust_buffers_write(struct ust_buffer
*buf
, size_t offset
,
213 const void *src
, size_t len
, ssize_t cpy
)
219 WARN_ON(offset
>= buf
->buf_size
);
221 cpy
= min_t(size_t, len
, buf
->buf_size
- offset
);
222 ust_buffers_do_copy(buf
->buf_data
+ offset
, src
, cpy
);
223 } while (unlikely(len
!= cpy
));
227 * ltt_buffers_offset_address - get address of a location within the buffer
229 * @offset : offset within the buffer.
231 * Return the address where a given offset is located.
232 * Should be used to get the current subbuffer header pointer. Given we know
233 * it's never on a page boundary, it's safe to write directly to this address,
234 * as long as the write is never bigger than a page size.
236 void *ltt_buffers_offset_address(struct ust_buffer
*buf
, size_t offset
)
238 return ((char *)buf
->buf_data
)+offset
;
246 * Last TSC comparison functions. Check if the current TSC overflows
247 * LTT_TSC_BITS bits from the last TSC read. Reads and writes last_tsc
251 /* FIXME: does this test work properly? */
252 #if (BITS_PER_LONG == 32)
253 static inline void save_last_tsc(struct ust_buffer
*ltt_buf
,
256 ltt_buf
->last_tsc
= (unsigned long)(tsc
>> LTT_TSC_BITS
);
259 static inline int last_tsc_overflow(struct ust_buffer
*ltt_buf
,
262 unsigned long tsc_shifted
= (unsigned long)(tsc
>> LTT_TSC_BITS
);
264 if (unlikely((tsc_shifted
- ltt_buf
->last_tsc
)))
270 static inline void save_last_tsc(struct ust_buffer
*ltt_buf
,
273 ltt_buf
->last_tsc
= (unsigned long)tsc
;
276 static inline int last_tsc_overflow(struct ust_buffer
*ltt_buf
,
279 if (unlikely((tsc
- ltt_buf
->last_tsc
) >> LTT_TSC_BITS
))
287 * A switch is done during tracing or as a final flush after tracing (so it
288 * won't write in the new sub-buffer).
290 enum force_switch_mode
{ FORCE_ACTIVE
, FORCE_FLUSH
};
292 static void ust_buffers_destroy_buffer(struct ust_channel
*ltt_chan
);
294 static void ltt_force_switch(struct ust_buffer
*buf
,
295 enum force_switch_mode mode
);
300 static void ltt_buffer_begin_callback(struct ust_buffer
*buf
,
301 u64 tsc
, unsigned int subbuf_idx
)
303 struct ust_channel
*channel
= buf
->chan
;
304 struct ltt_subbuffer_header
*header
=
305 (struct ltt_subbuffer_header
*)
306 ltt_buffers_offset_address(buf
,
307 subbuf_idx
* buf
->chan
->subbuf_size
);
309 header
->cycle_count_begin
= tsc
;
310 header
->lost_size
= 0xFFFFFFFF; /* for debugging */
311 header
->buf_size
= buf
->chan
->subbuf_size
;
312 ltt_write_trace_header(channel
->trace
, header
);
316 * offset is assumed to never be 0 here : never deliver a completely empty
317 * subbuffer. The lost size is between 0 and subbuf_size-1.
319 static notrace
void ltt_buffer_end_callback(struct ust_buffer
*buf
,
320 u64 tsc
, unsigned int offset
, unsigned int subbuf_idx
)
322 struct ltt_subbuffer_header
*header
=
323 (struct ltt_subbuffer_header
*)
324 ltt_buffers_offset_address(buf
,
325 subbuf_idx
* buf
->chan
->subbuf_size
);
327 header
->lost_size
= SUBBUF_OFFSET((buf
->chan
->subbuf_size
- offset
),
329 header
->cycle_count_end
= tsc
;
330 header
->events_lost
= local_read(&buf
->events_lost
);
331 header
->subbuf_corrupt
= local_read(&buf
->corrupted_subbuffers
);
335 void (*wake_consumer
)(void *, int) = NULL
;
337 void relay_set_wake_consumer(void (*wake
)(void *, int))
339 wake_consumer
= wake
;
342 void relay_wake_consumer(void *arg
, int finished
)
345 wake_consumer(arg
, finished
);
348 static notrace
void ltt_deliver(struct ust_buffer
*buf
, unsigned int subbuf_idx
,
353 //ust// #ifdef CONFIG_LTT_VMCORE
354 local_set(&buf
->commit_seq
[subbuf_idx
], commit_count
);
357 /* wakeup consumer */
358 result
= write(buf
->data_ready_fd_write
, "1", 1);
360 PERROR("write (in ltt_relay_buffer_flush)");
361 ERR("this should never happen!");
363 //ust// atomic_set(<t_buf->wakeup_readers, 1);
367 * This function should not be called from NMI interrupt context
369 static notrace
void ltt_buf_unfull(struct ust_buffer
*buf
,
370 unsigned int subbuf_idx
,
373 //ust// struct ltt_channel_struct *ltt_channel =
374 //ust// (struct ltt_channel_struct *)buf->chan->private_data;
375 //ust// struct ltt_channel_buf_struct *ltt_buf = ltt_channel->buf;
377 //ust// ltt_relay_wake_writers(ltt_buf);
380 int ust_buffers_do_get_subbuf(struct ust_buffer
*buf
, long *pconsumed_old
)
382 struct ust_channel
*channel
= buf
->chan
;
383 long consumed_old
, consumed_idx
, commit_count
, write_offset
;
384 consumed_old
= atomic_long_read(&buf
->consumed
);
385 consumed_idx
= SUBBUF_INDEX(consumed_old
, buf
->chan
);
386 commit_count
= local_read(&buf
->commit_count
[consumed_idx
]);
388 * Make sure we read the commit count before reading the buffer
389 * data and the write offset. Correct consumed offset ordering
390 * wrt commit count is insured by the use of cmpxchg to update
391 * the consumed offset.
394 write_offset
= local_read(&buf
->offset
);
396 * Check that the subbuffer we are trying to consume has been
397 * already fully committed.
399 if (((commit_count
- buf
->chan
->subbuf_size
)
400 & channel
->commit_count_mask
)
401 - (BUFFER_TRUNC(consumed_old
, buf
->chan
)
402 >> channel
->n_subbufs_order
)
407 * Check that we are not about to read the same subbuffer in
408 * which the writer head is.
410 if ((SUBBUF_TRUNC(write_offset
, buf
->chan
)
411 - SUBBUF_TRUNC(consumed_old
, buf
->chan
))
416 *pconsumed_old
= consumed_old
;
420 int ust_buffers_do_put_subbuf(struct ust_buffer
*buf
, u32 uconsumed_old
)
422 long consumed_new
, consumed_old
;
424 consumed_old
= atomic_long_read(&buf
->consumed
);
425 consumed_old
= consumed_old
& (~0xFFFFFFFFL
);
426 consumed_old
= consumed_old
| uconsumed_old
;
427 consumed_new
= SUBBUF_ALIGN(consumed_old
, buf
->chan
);
429 //ust// spin_lock(<t_buf->full_lock);
430 if (atomic_long_cmpxchg(&buf
->consumed
, consumed_old
,
433 /* We have been pushed by the writer : the last
434 * buffer read _is_ corrupted! It can also
435 * happen if this is a buffer we never got. */
436 //ust// spin_unlock(<t_buf->full_lock);
439 /* tell the client that buffer is now unfull */
442 index
= SUBBUF_INDEX(consumed_old
, buf
->chan
);
443 data
= BUFFER_OFFSET(consumed_old
, buf
->chan
);
444 ltt_buf_unfull(buf
, index
, data
);
445 //ust// spin_unlock(<t_buf->full_lock);
450 static void ltt_relay_print_subbuffer_errors(
451 struct ust_channel
*channel
,
454 struct ust_buffer
*ltt_buf
= channel
->buf
;
455 long cons_idx
, commit_count
, write_offset
;
457 cons_idx
= SUBBUF_INDEX(cons_off
, channel
);
458 commit_count
= local_read(<t_buf
->commit_count
[cons_idx
]);
460 * No need to order commit_count and write_offset reads because we
461 * execute after trace is stopped when there are no readers left.
463 write_offset
= local_read(<t_buf
->offset
);
464 WARN( "LTT : unread channel %s offset is %ld "
465 "and cons_off : %ld\n",
466 channel
->channel_name
, write_offset
, cons_off
);
467 /* Check each sub-buffer for non filled commit count */
468 if (((commit_count
- channel
->subbuf_size
) & channel
->commit_count_mask
)
469 - (BUFFER_TRUNC(cons_off
, channel
) >> channel
->n_subbufs_order
) != 0) {
470 ERR("LTT : %s : subbuffer %lu has non filled "
471 "commit count %lu.\n",
472 channel
->channel_name
, cons_idx
, commit_count
);
474 ERR("LTT : %s : commit count : %lu, subbuf size %zd\n",
475 channel
->channel_name
, commit_count
,
476 channel
->subbuf_size
);
479 static void ltt_relay_print_errors(struct ltt_trace_struct
*trace
,
480 struct ust_channel
*channel
)
482 struct ust_buffer
*ltt_buf
= channel
->buf
;
485 for (cons_off
= atomic_long_read(<t_buf
->consumed
);
486 (SUBBUF_TRUNC(local_read(<t_buf
->offset
),
489 cons_off
= SUBBUF_ALIGN(cons_off
, channel
))
490 ltt_relay_print_subbuffer_errors(channel
, cons_off
);
493 static void ltt_relay_print_buffer_errors(struct ust_channel
*channel
)
495 struct ltt_trace_struct
*trace
= channel
->trace
;
496 struct ust_buffer
*ltt_buf
= channel
->buf
;
498 if (local_read(<t_buf
->events_lost
))
500 "LTT : %s : %ld events lost "
502 channel
->channel_name
,
503 local_read(<t_buf
->events_lost
),
504 channel
->channel_name
);
505 if (local_read(<t_buf
->corrupted_subbuffers
))
507 "LTT : %s : %ld corrupted subbuffers "
509 channel
->channel_name
,
510 local_read(<t_buf
->corrupted_subbuffers
),
511 channel
->channel_name
);
513 ltt_relay_print_errors(trace
, channel
);
516 static void ltt_relay_release_channel(struct kref
*kref
)
518 struct ust_channel
*ltt_chan
= container_of(kref
,
519 struct ust_channel
, kref
);
526 //ust// static int ltt_relay_create_buffer(struct ltt_trace_struct *trace,
527 //ust// struct ltt_channel_struct *ltt_chan, struct rchan_buf *buf,
528 //ust// unsigned int cpu, unsigned int n_subbufs)
530 //ust// struct ltt_channel_buf_struct *ltt_buf =
531 //ust// percpu_ptr(ltt_chan->buf, cpu);
532 //ust// unsigned int j;
534 //ust// ltt_buf->commit_count =
535 //ust// kzalloc_node(sizeof(ltt_buf->commit_count) * n_subbufs,
536 //ust// GFP_KERNEL, cpu_to_node(cpu));
537 //ust// if (!ltt_buf->commit_count)
538 //ust// return -ENOMEM;
539 //ust// kref_get(&trace->kref);
540 //ust// kref_get(&trace->ltt_transport_kref);
541 //ust// kref_get(<t_chan->kref);
542 //ust// local_set(<t_buf->offset, ltt_subbuffer_header_size());
543 //ust// atomic_long_set(<t_buf->consumed, 0);
544 //ust// atomic_long_set(<t_buf->active_readers, 0);
545 //ust// for (j = 0; j < n_subbufs; j++)
546 //ust// local_set(<t_buf->commit_count[j], 0);
547 //ust// init_waitqueue_head(<t_buf->write_wait);
548 //ust// atomic_set(<t_buf->wakeup_readers, 0);
549 //ust// spin_lock_init(<t_buf->full_lock);
551 //ust// ltt_buffer_begin_callback(buf, trace->start_tsc, 0);
552 //ust// /* atomic_add made on local variable on data that belongs to
553 //ust// * various CPUs : ok because tracing not started (for this cpu). */
554 //ust// local_add(ltt_subbuffer_header_size(), <t_buf->commit_count[0]);
556 //ust// local_set(<t_buf->events_lost, 0);
557 //ust// local_set(<t_buf->corrupted_subbuffers, 0);
562 static int ust_buffers_init_buffer(struct ltt_trace_struct
*trace
,
563 struct ust_channel
*ltt_chan
, struct ust_buffer
*buf
,
564 unsigned int n_subbufs
)
571 zmalloc(sizeof(buf
->commit_count
) * n_subbufs
);
572 if (!buf
->commit_count
)
574 kref_get(&trace
->kref
);
575 kref_get(&trace
->ltt_transport_kref
);
576 kref_get(<t_chan
->kref
);
577 local_set(&buf
->offset
, ltt_subbuffer_header_size());
578 atomic_long_set(&buf
->consumed
, 0);
579 atomic_long_set(&buf
->active_readers
, 0);
580 for (j
= 0; j
< n_subbufs
; j
++)
581 local_set(&buf
->commit_count
[j
], 0);
582 //ust// init_waitqueue_head(&buf->write_wait);
583 //ust// atomic_set(&buf->wakeup_readers, 0);
584 //ust// spin_lock_init(&buf->full_lock);
586 ltt_buffer_begin_callback(buf
, trace
->start_tsc
, 0);
588 local_add(ltt_subbuffer_header_size(), &buf
->commit_count
[0]);
590 local_set(&buf
->events_lost
, 0);
591 local_set(&buf
->corrupted_subbuffers
, 0);
598 buf
->data_ready_fd_read
= fds
[0];
599 buf
->data_ready_fd_write
= fds
[1];
601 /* FIXME: do we actually need this? */
602 result
= fcntl(fds
[0], F_SETFL
, O_NONBLOCK
);
607 //ust// buf->commit_seq = malloc(sizeof(buf->commit_seq) * n_subbufs);
608 //ust// if(!ltt_buf->commit_seq) {
612 /* FIXME: decrementally destroy on error */
617 /* FIXME: use this function */
618 static void ust_buffers_destroy_buffer(struct ust_channel
*ltt_chan
)
620 struct ltt_trace_struct
*trace
= ltt_chan
->trace
;
621 struct ust_buffer
*ltt_buf
= ltt_chan
->buf
;
623 kref_put(<t_chan
->trace
->ltt_transport_kref
,
624 ltt_release_transport
);
625 ltt_relay_print_buffer_errors(ltt_chan
);
626 //ust// free(ltt_buf->commit_seq);
627 kfree(ltt_buf
->commit_count
);
628 ltt_buf
->commit_count
= NULL
;
629 kref_put(<t_chan
->kref
, ltt_relay_release_channel
);
630 kref_put(&trace
->kref
, ltt_release_trace
);
631 //ust// wake_up_interruptible(&trace->kref_wq);
634 static void ltt_chan_alloc_ltt_buf(struct ust_channel
*chan
)
640 /* FIXME: increase size if we have a seq_commit array that overflows the page */
641 size_t size
= PAGE_ALIGN(1);
643 result
= chan
->buf_shmid
= shmget(getpid(), size
, IPC_CREAT
| IPC_EXCL
| 0700);
644 if(chan
->buf_shmid
== -1) {
649 ptr
= shmat(chan
->buf_shmid
, NULL
, 0);
650 if(ptr
== (void *) -1) {
655 /* Already mark the shared memory for destruction. This will occur only
656 * when all users have detached.
658 result
= shmctl(chan
->buf_shmid
, IPC_RMID
, NULL
);
669 result
= shmctl(chan
->buf_shmid
, IPC_RMID
, NULL
);
680 static int ust_buffers_create_channel(const char *trace_name
, struct ltt_trace_struct
*trace
,
681 const char *channel_name
, struct ust_channel
*ltt_chan
,
682 unsigned int subbuf_size
, unsigned int n_subbufs
, int overwrite
)
687 kref_init(<t_chan
->kref
);
689 ltt_chan
->trace
= trace
;
690 ltt_chan
->buffer_begin
= ltt_buffer_begin_callback
;
691 ltt_chan
->buffer_end
= ltt_buffer_end_callback
;
692 ltt_chan
->overwrite
= overwrite
;
693 ltt_chan
->n_subbufs_order
= get_count_order(n_subbufs
);
694 ltt_chan
->commit_count_mask
= (~0UL >> ltt_chan
->n_subbufs_order
);
695 //ust// ltt_chan->buf = percpu_alloc_mask(sizeof(struct ltt_channel_buf_struct), GFP_KERNEL, cpu_possible_map);
697 ltt_chan_alloc_ltt_buf(ltt_chan
);
699 //ust// ltt_chan->buf = malloc(sizeof(struct ltt_channel_buf_struct));
702 /* FIXME: handle error of this call */
703 result
= ust_buffers_channel_open(ltt_chan
, subbuf_size
, n_subbufs
);
705 printk(KERN_ERR
"LTT : Can't open channel for trace %s\n",
707 goto relay_open_error
;
714 //ust// percpu_free(ltt_chan->buf);
722 * LTTng channel flush function.
724 * Must be called when no tracing is active in the channel, because of
725 * accesses across CPUs.
727 static notrace
void ltt_relay_buffer_flush(struct ust_buffer
*buf
)
731 //ust// buf->finalized = 1;
732 ltt_force_switch(buf
, FORCE_FLUSH
);
734 result
= write(buf
->data_ready_fd_write
, "1", 1);
736 PERROR("write (in ltt_relay_buffer_flush)");
737 ERR("this should never happen!");
741 static void ltt_relay_async_wakeup_chan(struct ust_channel
*ltt_channel
)
743 //ust// unsigned int i;
744 //ust// struct rchan *rchan = ltt_channel->trans_channel_data;
746 //ust// for_each_possible_cpu(i) {
747 //ust// struct ltt_channel_buf_struct *ltt_buf =
748 //ust// percpu_ptr(ltt_channel->buf, i);
750 //ust// if (atomic_read(<t_buf->wakeup_readers) == 1) {
751 //ust// atomic_set(<t_buf->wakeup_readers, 0);
752 //ust// wake_up_interruptible(&rchan->buf[i]->read_wait);
757 static void ltt_relay_finish_buffer(struct ust_channel
*channel
)
762 struct ust_buffer
*buf
= channel
->buf
;
763 ltt_relay_buffer_flush(buf
);
764 //ust// ltt_relay_wake_writers(ltt_buf);
765 /* closing the pipe tells the consumer the buffer is finished */
767 //result = write(ltt_buf->data_ready_fd_write, "D", 1);
769 // PERROR("write (in ltt_relay_finish_buffer)");
770 // ERR("this should never happen!");
772 close(buf
->data_ready_fd_write
);
777 static void ltt_relay_finish_channel(struct ust_channel
*channel
)
779 //ust// unsigned int i;
781 //ust// for_each_possible_cpu(i)
782 ltt_relay_finish_buffer(channel
);
785 static void ltt_relay_remove_channel(struct ust_channel
*channel
)
787 ust_buffers_channel_close(channel
);
788 kref_put(&channel
->kref
, ltt_relay_release_channel
);
791 struct ltt_reserve_switch_offsets
{
792 long begin
, end
, old
;
793 long begin_switch
, end_switch_current
, end_switch_old
;
794 long commit_count
, reserve_commit_diff
;
795 size_t before_hdr_pad
, size
;
801 * !0 if execution must be aborted.
803 static inline int ltt_relay_try_reserve(
804 struct ust_channel
*channel
, struct ust_buffer
*buf
,
805 struct ltt_reserve_switch_offsets
*offsets
, size_t data_size
,
806 u64
*tsc
, unsigned int *rflags
, int largest_align
)
808 offsets
->begin
= local_read(&buf
->offset
);
809 offsets
->old
= offsets
->begin
;
810 offsets
->begin_switch
= 0;
811 offsets
->end_switch_current
= 0;
812 offsets
->end_switch_old
= 0;
814 *tsc
= trace_clock_read64();
815 if (last_tsc_overflow(buf
, *tsc
))
816 *rflags
= LTT_RFLAG_ID_SIZE_TSC
;
818 if (SUBBUF_OFFSET(offsets
->begin
, buf
->chan
) == 0) {
819 offsets
->begin_switch
= 1; /* For offsets->begin */
821 offsets
->size
= ust_get_header_size(channel
,
822 offsets
->begin
, data_size
,
823 &offsets
->before_hdr_pad
, *rflags
);
824 offsets
->size
+= ltt_align(offsets
->begin
+ offsets
->size
,
827 if ((SUBBUF_OFFSET(offsets
->begin
, buf
->chan
) + offsets
->size
)
828 > buf
->chan
->subbuf_size
) {
829 offsets
->end_switch_old
= 1; /* For offsets->old */
830 offsets
->begin_switch
= 1; /* For offsets->begin */
833 if (offsets
->begin_switch
) {
836 if (offsets
->end_switch_old
)
837 offsets
->begin
= SUBBUF_ALIGN(offsets
->begin
,
839 offsets
->begin
= offsets
->begin
+ ltt_subbuffer_header_size();
840 /* Test new buffer integrity */
841 subbuf_index
= SUBBUF_INDEX(offsets
->begin
, buf
->chan
);
842 offsets
->reserve_commit_diff
=
843 (BUFFER_TRUNC(offsets
->begin
, buf
->chan
)
844 >> channel
->n_subbufs_order
)
845 - (local_read(&buf
->commit_count
[subbuf_index
])
846 & channel
->commit_count_mask
);
847 if (offsets
->reserve_commit_diff
== 0) {
850 consumed
= atomic_long_read(&buf
->consumed
);
852 /* Next buffer not corrupted. */
853 if (!channel
->overwrite
&&
854 (SUBBUF_TRUNC(offsets
->begin
, buf
->chan
)
855 - SUBBUF_TRUNC(consumed
, buf
->chan
))
856 >= channel
->alloc_size
) {
858 long consumed_idx
= SUBBUF_INDEX(consumed
, buf
->chan
);
859 long commit_count
= local_read(&buf
->commit_count
[consumed_idx
]);
860 if(((commit_count
- buf
->chan
->subbuf_size
) & channel
->commit_count_mask
) - (BUFFER_TRUNC(consumed
, buf
->chan
) >> channel
->n_subbufs_order
) != 0) {
861 WARN("Event dropped. Caused by non-committed event.");
864 WARN("Event dropped. Caused by non-consumed buffer.");
867 * We do not overwrite non consumed buffers
868 * and we are full : event is lost.
870 local_inc(&buf
->events_lost
);
874 * next buffer not corrupted, we are either in
875 * overwrite mode or the buffer is not full.
876 * It's safe to write in this new subbuffer.
881 * Next subbuffer corrupted. Force pushing reader even
882 * in normal mode. It's safe to write in this new
886 offsets
->size
= ust_get_header_size(channel
,
887 offsets
->begin
, data_size
,
888 &offsets
->before_hdr_pad
, *rflags
);
889 offsets
->size
+= ltt_align(offsets
->begin
+ offsets
->size
,
892 if ((SUBBUF_OFFSET(offsets
->begin
, buf
->chan
) + offsets
->size
)
893 > buf
->chan
->subbuf_size
) {
895 * Event too big for subbuffers, report error, don't
896 * complete the sub-buffer switch.
898 local_inc(&buf
->events_lost
);
902 * We just made a successful buffer switch and the event
903 * fits in the new subbuffer. Let's write.
908 * Event fits in the current buffer and we are not on a switch
909 * boundary. It's safe to write.
912 offsets
->end
= offsets
->begin
+ offsets
->size
;
914 if ((SUBBUF_OFFSET(offsets
->end
, buf
->chan
)) == 0) {
916 * The offset_end will fall at the very beginning of the next
919 offsets
->end_switch_current
= 1; /* For offsets->begin */
927 * !0 if execution must be aborted.
929 static inline int ltt_relay_try_switch(
930 enum force_switch_mode mode
,
931 struct ust_channel
*channel
,
932 struct ust_buffer
*buf
,
933 struct ltt_reserve_switch_offsets
*offsets
,
938 offsets
->begin
= local_read(&buf
->offset
);
939 offsets
->old
= offsets
->begin
;
940 offsets
->begin_switch
= 0;
941 offsets
->end_switch_old
= 0;
943 *tsc
= trace_clock_read64();
945 if (SUBBUF_OFFSET(offsets
->begin
, buf
->chan
) != 0) {
946 offsets
->begin
= SUBBUF_ALIGN(offsets
->begin
, buf
->chan
);
947 offsets
->end_switch_old
= 1;
949 /* we do not have to switch : buffer is empty */
952 if (mode
== FORCE_ACTIVE
)
953 offsets
->begin
+= ltt_subbuffer_header_size();
955 * Always begin_switch in FORCE_ACTIVE mode.
956 * Test new buffer integrity
958 subbuf_index
= SUBBUF_INDEX(offsets
->begin
, buf
->chan
);
959 offsets
->reserve_commit_diff
=
960 (BUFFER_TRUNC(offsets
->begin
, buf
->chan
)
961 >> channel
->n_subbufs_order
)
962 - (local_read(&buf
->commit_count
[subbuf_index
])
963 & channel
->commit_count_mask
);
964 if (offsets
->reserve_commit_diff
== 0) {
965 /* Next buffer not corrupted. */
966 if (mode
== FORCE_ACTIVE
967 && !channel
->overwrite
968 && offsets
->begin
- atomic_long_read(&buf
->consumed
)
969 >= channel
->alloc_size
) {
971 * We do not overwrite non consumed buffers and we are
972 * full : ignore switch while tracing is active.
978 * Next subbuffer corrupted. Force pushing reader even in normal
982 offsets
->end
= offsets
->begin
;
986 static inline void ltt_reserve_push_reader(
987 struct ust_channel
*channel
,
988 struct ust_buffer
*buf
,
989 struct ltt_reserve_switch_offsets
*offsets
)
991 long consumed_old
, consumed_new
;
994 consumed_old
= atomic_long_read(&buf
->consumed
);
996 * If buffer is in overwrite mode, push the reader consumed
997 * count if the write position has reached it and we are not
998 * at the first iteration (don't push the reader farther than
999 * the writer). This operation can be done concurrently by many
1000 * writers in the same buffer, the writer being at the farthest
1001 * write position sub-buffer index in the buffer being the one
1002 * which will win this loop.
1003 * If the buffer is not in overwrite mode, pushing the reader
1004 * only happens if a sub-buffer is corrupted.
1006 if ((SUBBUF_TRUNC(offsets
->end
-1, buf
->chan
)
1007 - SUBBUF_TRUNC(consumed_old
, buf
->chan
))
1008 >= channel
->alloc_size
)
1009 consumed_new
= SUBBUF_ALIGN(consumed_old
, buf
->chan
);
1011 consumed_new
= consumed_old
;
1014 } while (atomic_long_cmpxchg(&buf
->consumed
, consumed_old
,
1015 consumed_new
) != consumed_old
);
1017 if (consumed_old
!= consumed_new
) {
1019 * Reader pushed : we are the winner of the push, we can
1020 * therefore reequilibrate reserve and commit. Atomic increment
1021 * of the commit count permits other writers to play around
1022 * with this variable before us. We keep track of
1023 * corrupted_subbuffers even in overwrite mode :
1024 * we never want to write over a non completely committed
1025 * sub-buffer : possible causes : the buffer size is too low
1026 * compared to the unordered data input, or there is a writer
1027 * that died between the reserve and the commit.
1029 if (offsets
->reserve_commit_diff
) {
1031 * We have to alter the sub-buffer commit count.
1032 * We do not deliver the previous subbuffer, given it
1033 * was either corrupted or not consumed (overwrite
1036 local_add(offsets
->reserve_commit_diff
,
1038 SUBBUF_INDEX(offsets
->begin
,
1040 if (!channel
->overwrite
1041 || offsets
->reserve_commit_diff
1042 != channel
->subbuf_size
) {
1044 * The reserve commit diff was not subbuf_size :
1045 * it means the subbuffer was partly written to
1046 * and is therefore corrupted. If it is multiple
1047 * of subbuffer size and we are in flight
1048 * recorder mode, we are skipping over a whole
1051 local_inc(&buf
->corrupted_subbuffers
);
1059 * ltt_reserve_switch_old_subbuf: switch old subbuffer
1061 * Concurrency safe because we are the last and only thread to alter this
1062 * sub-buffer. As long as it is not delivered and read, no other thread can
1063 * alter the offset, alter the reserve_count or call the
1064 * client_buffer_end_callback on this sub-buffer.
1066 * The only remaining threads could be the ones with pending commits. They will
1067 * have to do the deliver themselves. Not concurrency safe in overwrite mode.
1068 * We detect corrupted subbuffers with commit and reserve counts. We keep a
1069 * corrupted sub-buffers count and push the readers across these sub-buffers.
1071 * Not concurrency safe if a writer is stalled in a subbuffer and another writer
1072 * switches in, finding out it's corrupted. The result will be than the old
1073 * (uncommited) subbuffer will be declared corrupted, and that the new subbuffer
1074 * will be declared corrupted too because of the commit count adjustment.
1076 * Note : offset_old should never be 0 here.
1078 static inline void ltt_reserve_switch_old_subbuf(
1079 struct ust_channel
*channel
,
1080 struct ust_buffer
*buf
,
1081 struct ltt_reserve_switch_offsets
*offsets
, u64
*tsc
)
1083 long oldidx
= SUBBUF_INDEX(offsets
->old
- 1, channel
);
1085 channel
->buffer_end(buf
, *tsc
, offsets
->old
, oldidx
);
1086 /* Must write buffer end before incrementing commit count */
1088 offsets
->commit_count
=
1089 local_add_return(channel
->subbuf_size
1090 - (SUBBUF_OFFSET(offsets
->old
- 1, channel
)
1092 &buf
->commit_count
[oldidx
]);
1093 if ((BUFFER_TRUNC(offsets
->old
- 1, channel
)
1094 >> channel
->n_subbufs_order
)
1095 - ((offsets
->commit_count
- channel
->subbuf_size
)
1096 & channel
->commit_count_mask
) == 0)
1097 ltt_deliver(buf
, oldidx
, offsets
->commit_count
);
1101 * ltt_reserve_switch_new_subbuf: Populate new subbuffer.
1103 * This code can be executed unordered : writers may already have written to the
1104 * sub-buffer before this code gets executed, caution. The commit makes sure
1105 * that this code is executed before the deliver of this sub-buffer.
1107 static /*inline*/ void ltt_reserve_switch_new_subbuf(
1108 struct ust_channel
*channel
,
1109 struct ust_buffer
*buf
,
1110 struct ltt_reserve_switch_offsets
*offsets
, u64
*tsc
)
1112 long beginidx
= SUBBUF_INDEX(offsets
->begin
, channel
);
1114 channel
->buffer_begin(buf
, *tsc
, beginidx
);
1115 /* Must write buffer end before incrementing commit count */
1117 offsets
->commit_count
= local_add_return(ltt_subbuffer_header_size(),
1118 &buf
->commit_count
[beginidx
]);
1119 /* Check if the written buffer has to be delivered */
1120 if ((BUFFER_TRUNC(offsets
->begin
, channel
)
1121 >> channel
->n_subbufs_order
)
1122 - ((offsets
->commit_count
- channel
->subbuf_size
)
1123 & channel
->commit_count_mask
) == 0)
1124 ltt_deliver(buf
, beginidx
, offsets
->commit_count
);
1129 * ltt_reserve_end_switch_current: finish switching current subbuffer
1131 * Concurrency safe because we are the last and only thread to alter this
1132 * sub-buffer. As long as it is not delivered and read, no other thread can
1133 * alter the offset, alter the reserve_count or call the
1134 * client_buffer_end_callback on this sub-buffer.
1136 * The only remaining threads could be the ones with pending commits. They will
1137 * have to do the deliver themselves. Not concurrency safe in overwrite mode.
1138 * We detect corrupted subbuffers with commit and reserve counts. We keep a
1139 * corrupted sub-buffers count and push the readers across these sub-buffers.
1141 * Not concurrency safe if a writer is stalled in a subbuffer and another writer
1142 * switches in, finding out it's corrupted. The result will be than the old
1143 * (uncommited) subbuffer will be declared corrupted, and that the new subbuffer
1144 * will be declared corrupted too because of the commit count adjustment.
1146 static inline void ltt_reserve_end_switch_current(
1147 struct ust_channel
*channel
,
1148 struct ust_buffer
*buf
,
1149 struct ltt_reserve_switch_offsets
*offsets
, u64
*tsc
)
1151 long endidx
= SUBBUF_INDEX(offsets
->end
- 1, channel
);
1153 channel
->buffer_end(buf
, *tsc
, offsets
->end
, endidx
);
1154 /* Must write buffer begin before incrementing commit count */
1156 offsets
->commit_count
=
1157 local_add_return(channel
->subbuf_size
1158 - (SUBBUF_OFFSET(offsets
->end
- 1, channel
)
1160 &buf
->commit_count
[endidx
]);
1161 if ((BUFFER_TRUNC(offsets
->end
- 1, channel
)
1162 >> channel
->n_subbufs_order
)
1163 - ((offsets
->commit_count
- channel
->subbuf_size
)
1164 & channel
->commit_count_mask
) == 0)
1165 ltt_deliver(buf
, endidx
, offsets
->commit_count
);
1169 * ltt_relay_reserve_slot - Atomic slot reservation in a LTTng buffer.
1170 * @trace: the trace structure to log to.
1171 * @ltt_channel: channel structure
1172 * @transport_data: data structure specific to ltt relay
1173 * @data_size: size of the variable length data to log.
1174 * @slot_size: pointer to total size of the slot (out)
1175 * @buf_offset : pointer to reserved buffer offset (out)
1176 * @tsc: pointer to the tsc at the slot reservation (out)
1179 * Return : -ENOSPC if not enough space, else returns 0.
1180 * It will take care of sub-buffer switching.
1182 static notrace
int ltt_relay_reserve_slot(struct ltt_trace_struct
*trace
,
1183 struct ust_channel
*channel
, void **transport_data
,
1184 size_t data_size
, size_t *slot_size
, long *buf_offset
, u64
*tsc
,
1185 unsigned int *rflags
, int largest_align
)
1187 struct ust_buffer
*buf
= *transport_data
= channel
->buf
;
1188 struct ltt_reserve_switch_offsets offsets
;
1190 offsets
.reserve_commit_diff
= 0;
1194 * Perform retryable operations.
1196 if (ltt_nesting
> 4) {
1197 local_inc(&buf
->events_lost
);
1201 if (ltt_relay_try_reserve(channel
, buf
, &offsets
, data_size
, tsc
, rflags
,
1204 } while (local_cmpxchg(&buf
->offset
, offsets
.old
,
1205 offsets
.end
) != offsets
.old
);
1208 * Atomically update last_tsc. This update races against concurrent
1209 * atomic updates, but the race will always cause supplementary full TSC
1210 * events, never the opposite (missing a full TSC event when it would be
1213 save_last_tsc(buf
, *tsc
);
1216 * Push the reader if necessary
1218 ltt_reserve_push_reader(channel
, buf
, &offsets
);
1221 * Switch old subbuffer if needed.
1223 if (offsets
.end_switch_old
)
1224 ltt_reserve_switch_old_subbuf(channel
, buf
, &offsets
, tsc
);
1227 * Populate new subbuffer.
1229 if (offsets
.begin_switch
)
1230 ltt_reserve_switch_new_subbuf(channel
, buf
, &offsets
, tsc
);
1232 if (offsets
.end_switch_current
)
1233 ltt_reserve_end_switch_current(channel
, buf
, &offsets
, tsc
);
1235 *slot_size
= offsets
.size
;
1236 *buf_offset
= offsets
.begin
+ offsets
.before_hdr_pad
;
1241 * Force a sub-buffer switch for a per-cpu buffer. This operation is
1242 * completely reentrant : can be called while tracing is active with
1243 * absolutely no lock held.
1245 * Note, however, that as a local_cmpxchg is used for some atomic
1246 * operations, this function must be called from the CPU which owns the buffer
1247 * for a ACTIVE flush.
1249 static notrace
void ltt_force_switch(struct ust_buffer
*buf
,
1250 enum force_switch_mode mode
)
1252 struct ust_channel
*channel
= buf
->chan
;
1253 struct ltt_reserve_switch_offsets offsets
;
1256 offsets
.reserve_commit_diff
= 0;
1260 * Perform retryable operations.
1263 if (ltt_relay_try_switch(mode
, channel
, buf
, &offsets
, &tsc
))
1265 } while (local_cmpxchg(&buf
->offset
, offsets
.old
,
1266 offsets
.end
) != offsets
.old
);
1269 * Atomically update last_tsc. This update races against concurrent
1270 * atomic updates, but the race will always cause supplementary full TSC
1271 * events, never the opposite (missing a full TSC event when it would be
1274 save_last_tsc(buf
, tsc
);
1277 * Push the reader if necessary
1279 if (mode
== FORCE_ACTIVE
)
1280 ltt_reserve_push_reader(channel
, buf
, &offsets
);
1283 * Switch old subbuffer if needed.
1285 if (offsets
.end_switch_old
)
1286 ltt_reserve_switch_old_subbuf(channel
, buf
, &offsets
, &tsc
);
1289 * Populate new subbuffer.
1291 if (mode
== FORCE_ACTIVE
)
1292 ltt_reserve_switch_new_subbuf(channel
, buf
, &offsets
, &tsc
);
1295 static void ltt_relay_print_user_errors(struct ltt_trace_struct
*trace
,
1296 unsigned int chan_index
, size_t data_size
,
1297 struct user_dbg_data
*dbg
)
1299 struct ust_channel
*channel
;
1300 struct ust_buffer
*buf
;
1302 channel
= &trace
->channels
[chan_index
];
1305 printk(KERN_ERR
"Error in LTT usertrace : "
1306 "buffer full : event lost in blocking "
1307 "mode. Increase LTT_RESERVE_CRITICAL.\n");
1308 printk(KERN_ERR
"LTT nesting level is %u.\n", ltt_nesting
);
1309 printk(KERN_ERR
"LTT avail size %lu.\n",
1311 printk(KERN_ERR
"avai write : %lu, read : %lu\n",
1312 dbg
->write
, dbg
->read
);
1314 dbg
->write
= local_read(&buf
->offset
);
1315 dbg
->read
= atomic_long_read(&buf
->consumed
);
1317 printk(KERN_ERR
"LTT cur size %lu.\n",
1318 dbg
->write
+ LTT_RESERVE_CRITICAL
+ data_size
1319 - SUBBUF_TRUNC(dbg
->read
, channel
));
1320 printk(KERN_ERR
"cur write : %lu, read : %lu\n",
1321 dbg
->write
, dbg
->read
);
1324 static struct ltt_transport ust_relay_transport
= {
1327 .create_channel
= ust_buffers_create_channel
,
1328 .finish_channel
= ltt_relay_finish_channel
,
1329 .remove_channel
= ltt_relay_remove_channel
,
1330 .wakeup_channel
= ltt_relay_async_wakeup_chan
,
1331 // .commit_slot = ltt_relay_commit_slot,
1332 .reserve_slot
= ltt_relay_reserve_slot
,
1333 .user_errors
= ltt_relay_print_user_errors
,
1338 * for flight recording. must be called after relay_commit.
1339 * This function decrements de subbuffer's lost_size each time the commit count
1340 * reaches back the reserve offset (module subbuffer size). It is useful for
1343 static /* inline */ void ltt_write_commit_counter(struct ust_buffer
*buf
,
1344 struct ust_buffer
*ltt_buf
,
1345 long idx
, long buf_offset
, long commit_count
, size_t data_size
)
1348 long commit_seq_old
;
1350 offset
= buf_offset
+ data_size
;
1353 * SUBBUF_OFFSET includes commit_count_mask. We can simply
1354 * compare the offsets within the subbuffer without caring about
1355 * buffer full/empty mismatch because offset is never zero here
1356 * (subbuffer header and event headers have non-zero length).
1358 if (unlikely(SUBBUF_OFFSET(offset
- commit_count
, buf
->chan
)))
1361 commit_seq_old
= local_read(<t_buf
->commit_seq
[idx
]);
1362 while (commit_seq_old
< commit_count
)
1363 commit_seq_old
= local_cmpxchg(<t_buf
->commit_seq
[idx
],
1364 commit_seq_old
, commit_count
);
1368 * Atomic unordered slot commit. Increments the commit count in the
1369 * specified sub-buffer, and delivers it if necessary.
1373 * @ltt_channel : channel structure
1374 * @transport_data: transport-specific data
1375 * @buf_offset : offset following the event header.
1376 * @data_size : size of the event data.
1377 * @slot_size : size of the reserved slot.
1379 /* FIXME: make this function static inline in the .h! */
1380 /*static*/ /* inline */ notrace
void ltt_commit_slot(
1381 struct ust_channel
*channel
,
1382 void **transport_data
, long buf_offset
,
1383 size_t data_size
, size_t slot_size
)
1385 struct ust_buffer
*buf
= *transport_data
;
1386 long offset_end
= buf_offset
;
1387 long endidx
= SUBBUF_INDEX(offset_end
- 1, channel
);
1390 /* Must write slot data before incrementing commit count */
1392 commit_count
= local_add_return(slot_size
,
1393 &buf
->commit_count
[endidx
]);
1394 /* Check if all commits have been done */
1395 if ((BUFFER_TRUNC(offset_end
- 1, channel
)
1396 >> channel
->n_subbufs_order
)
1397 - ((commit_count
- channel
->subbuf_size
)
1398 & channel
->commit_count_mask
) == 0)
1399 ltt_deliver(buf
, endidx
, commit_count
);
1401 * Update lost_size for each commit. It's needed only for extracting
1402 * ltt buffers from vmcore, after crash.
1404 ltt_write_commit_counter(buf
, buf
, endidx
,
1405 buf_offset
, commit_count
, data_size
);
1409 static char initialized
= 0;
1411 void __attribute__((constructor
)) init_ustrelay_transport(void)
1414 ltt_transport_register(&ust_relay_transport
);
1419 static void __attribute__((destructor
)) ltt_relay_exit(void)
1421 ltt_transport_unregister(&ust_relay_transport
);