1 /* LTTng user-space "fast" library
3 * This daemon is spawned by each traced thread (to share the mmap).
5 * Its job is to dump periodically this buffer to disk (when it receives a
6 * SIGUSR1 from its parent).
8 * It uses the control information in the shared memory area (producer/consumer
11 * When the parent thread dies (yes, those thing may happen) ;) , this daemon
12 * will flush the last buffer and write it to disk.
14 * Supplement note for streaming : the daemon is responsible for flushing
15 * periodically the buffer if it is streaming data.
19 * shm memory is typically limited to 4096 units (system wide limit SHMMNI in
20 * /proc/sys/kernel/shmmni). As it requires computation time upon creation, we
21 * do not use it : we will use a shared mmap() instead which is passed through
23 * MAP_SHARED mmap segment. Updated when msync or munmap are called.
25 * Memory mapped by mmap() is preserved across fork(2), with the same
28 * Eventually, there will be two mode :
29 * * Slow thread spawn : a fork() is done for each new thread. If the process
30 * dies, the data is not lost.
31 * * Fast thread spawn : a pthread_create() is done by the application for each
34 * We use a timer to check periodically if the parent died. I think it is less
35 * intrusive than a ptrace() on the parent, which would get every signal. The
36 * side effect of this is that we won't be notified if the parent does an
37 * exec(). In this case, we will just sit there until the parent exits.
40 * Copyright 2006 Mathieu Desnoyers
44 #define inline inline __attribute__((always_inline))
48 #define LTT_TRACE_FAST
62 #include <sys/param.h>
64 #include <sys/types.h>
68 #include <linux/unistd.h>
70 // included with hack for powerpc in ltt-usertrace.h #include <asm/atomic.h>
71 #include <asm/timex.h> //for get_cycles()
73 #include <ltt/ltt-usertrace.h>
75 _syscall0(pid_t
,gettid
);
78 #define dbg_printf(...) printf(__VA_ARGS__)
80 #define dbg_printf(...)
81 #endif //LTT_SHOW_DEBUG
84 enum force_switch_mode
{ FORCE_ACTIVE
, FORCE_FLUSH
};
86 /* Writer (the traced application) */
88 __thread
struct ltt_trace_info
*thread_trace_info
= NULL
;
90 void ltt_usertrace_fast_buffer_switch(void)
92 struct ltt_trace_info
*tmp
= thread_trace_info
;
94 kill(tmp
->daemon_id
, SIGUSR1
);
97 /* The cleanup should never be called from a signal handler */
98 static void ltt_usertrace_fast_cleanup(void *arg
)
100 struct ltt_trace_info
*tmp
= thread_trace_info
;
102 thread_trace_info
= NULL
;
103 kill(tmp
->daemon_id
, SIGUSR2
);
104 munmap(tmp
, sizeof(*tmp
));
108 /* Reader (the disk dumper daemon) */
110 static pid_t traced_pid
= 0;
111 static pid_t traced_tid
= 0;
112 static int parent_exited
= 0;
113 static int fd_process
= -1;
114 static char outfile_name
[PATH_MAX
];
115 static char identifier_name
[PATH_MAX
];
117 /* signal handling */
118 static void handler_sigusr1(int signo
)
120 dbg_printf("LTT Signal %d received : parent buffer switch.\n", signo
);
123 static void handler_sigusr2(int signo
)
125 dbg_printf("LTT Signal %d received : parent exited.\n", signo
);
129 static void handler_sigalarm(int signo
)
131 dbg_printf("LTT Signal %d received\n", signo
);
133 if(getppid() != traced_pid
) {
135 dbg_printf("LTT Parent %lu died, cleaning up\n", traced_pid
);
141 /* Do a buffer switch. Don't switch if buffer is completely empty */
142 static void flush_buffer(struct ltt_buf
*ltt_buf
, enum force_switch_mode mode
)
145 int offset_begin
, offset_end
, offset_old
;
146 int reserve_commit_diff
;
147 int consumed_old
, consumed_new
;
148 int commit_count
, reserve_count
;
152 offset_old
= atomic_read(<t_buf
->offset
);
153 offset_begin
= offset_old
;
155 tsc
= ltt_get_timestamp();
157 /* Error in getting the timestamp : should not happen : it would
158 * mean we are called from an NMI during a write seqlock on xtime. */
162 if(SUBBUF_OFFSET(offset_begin
, ltt_buf
) != 0) {
163 offset_begin
= SUBBUF_ALIGN(offset_begin
, ltt_buf
);
166 /* we do not have to switch : buffer is empty */
169 if(mode
== FORCE_ACTIVE
)
170 offset_begin
+= ltt_subbuf_header_len(ltt_buf
);
171 /* Always begin_switch in FORCE_ACTIVE mode */
173 /* Test new buffer integrity */
174 reserve_commit_diff
=
176 <t_buf
->reserve_count
[SUBBUF_INDEX(offset_begin
, ltt_buf
)])
178 <t_buf
->commit_count
[SUBBUF_INDEX(offset_begin
, ltt_buf
)]);
179 if(reserve_commit_diff
== 0) {
180 /* Next buffer not corrupted. */
181 if(mode
== FORCE_ACTIVE
182 && (offset_begin
-atomic_read(<t_buf
->consumed
))
183 >= ltt_buf
->alloc_size
) {
184 /* We do not overwrite non consumed buffers and we are full : ignore
185 switch while tracing is active. */
189 /* Next subbuffer corrupted. Force pushing reader even in normal mode */
192 offset_end
= offset_begin
;
193 } while(atomic_cmpxchg(<t_buf
->offset
, offset_old
, offset_end
)
197 if(mode
== FORCE_ACTIVE
) {
198 /* Push the reader if necessary */
200 consumed_old
= atomic_read(<t_buf
->consumed
);
201 /* If buffer is in overwrite mode, push the reader consumed count if
202 the write position has reached it and we are not at the first
203 iteration (don't push the reader farther than the writer).
204 This operation can be done concurrently by many writers in the
205 same buffer, the writer being at the fartest write position sub-buffer
206 index in the buffer being the one which will win this loop. */
207 /* If the buffer is not in overwrite mode, pushing the reader only
208 happen if a sub-buffer is corrupted */
209 if((SUBBUF_TRUNC(offset_end
-1, ltt_buf
)
210 - SUBBUF_TRUNC(consumed_old
, ltt_buf
))
211 >= ltt_buf
->alloc_size
)
212 consumed_new
= SUBBUF_ALIGN(consumed_old
, ltt_buf
);
214 consumed_new
= consumed_old
;
217 } while(atomic_cmpxchg(<t_buf
->consumed
, consumed_old
, consumed_new
)
220 if(consumed_old
!= consumed_new
) {
221 /* Reader pushed : we are the winner of the push, we can therefore
222 reequilibrate reserve and commit. Atomic increment of the commit
223 count permits other writers to play around with this variable
224 before us. We keep track of corrupted_subbuffers even in overwrite
226 we never want to write over a non completely committed sub-buffer :
227 possible causes : the buffer size is too low compared to the unordered
228 data input, or there is a writer who died between the reserve and the
230 if(reserve_commit_diff
) {
231 /* We have to alter the sub-buffer commit count : a sub-buffer is
233 atomic_add(reserve_commit_diff
,
234 <t_buf
->commit_count
[SUBBUF_INDEX(offset_begin
, ltt_buf
)]);
235 atomic_inc(<t_buf
->corrupted_subbuffers
);
244 /* Concurrency safe because we are the last and only thread to alter this
245 sub-buffer. As long as it is not delivered and read, no other thread can
246 alter the offset, alter the reserve_count or call the
247 client_buffer_end_callback on this sub-buffer.
248 The only remaining threads could be the ones with pending commits. They
249 will have to do the deliver themself.
250 Not concurrency safe in overwrite mode. We detect corrupted subbuffers with
251 commit and reserve counts. We keep a corrupted sub-buffers count and push
252 the readers across these sub-buffers.
253 Not concurrency safe if a writer is stalled in a subbuffer and
254 another writer switches in, finding out it's corrupted. The result will be
255 than the old (uncommited) subbuffer will be declared corrupted, and that
256 the new subbuffer will be declared corrupted too because of the commit
258 Offset old should never be 0. */
259 ltt_buffer_end_callback(ltt_buf
, tsc
, offset_old
,
260 SUBBUF_INDEX((offset_old
), ltt_buf
));
261 /* Setting this reserve_count will allow the sub-buffer to be delivered by
262 the last committer. */
263 reserve_count
= atomic_add_return((SUBBUF_OFFSET((offset_old
-1),
265 <t_buf
->reserve_count
[SUBBUF_INDEX((offset_old
),
267 if(reserve_count
== atomic_read(
268 <t_buf
->commit_count
[SUBBUF_INDEX((offset_old
), ltt_buf
)])) {
269 ltt_deliver_callback(ltt_buf
, SUBBUF_INDEX((offset_old
), ltt_buf
), NULL
);
273 if(mode
== FORCE_ACTIVE
) {
275 /* This code can be executed unordered : writers may already have written
276 to the sub-buffer before this code gets executed, caution. */
277 /* The commit makes sure that this code is executed before the deliver
278 of this sub-buffer */
279 ltt_buffer_begin_callback(ltt_buf
, tsc
, SUBBUF_INDEX(offset_begin
, ltt_buf
));
280 commit_count
= atomic_add_return(ltt_subbuf_header_len(ltt_buf
),
281 <t_buf
->commit_count
[SUBBUF_INDEX(offset_begin
, ltt_buf
)]);
282 /* Check if the written buffer has to be delivered */
283 if(commit_count
== atomic_read(
284 <t_buf
->reserve_count
[SUBBUF_INDEX(offset_begin
, ltt_buf
)])) {
285 ltt_deliver_callback(ltt_buf
, SUBBUF_INDEX(offset_begin
, ltt_buf
), NULL
);
292 static int open_output_files(void)
296 /* Open output files */
298 ret
= mkdir(LTT_USERTRACE_ROOT
, 0777);
299 if(ret
< 0 && errno
!= EEXIST
) {
300 perror("LTT Error in creating output (mkdir)");
303 ret
= chdir(LTT_USERTRACE_ROOT
);
305 perror("LTT Error in creating output (chdir)");
308 snprintf(identifier_name
, PATH_MAX
-1, "%lu.%lu.%llu",
309 traced_tid
, traced_pid
, get_cycles());
310 snprintf(outfile_name
, PATH_MAX
-1, "process-%s", identifier_name
);
312 #ifndef LTT_NULL_OUTPUT_TEST
313 fd
= creat(outfile_name
, 0644);
316 ret
= symlink("/dev/null", outfile_name
);
318 perror("error in symlink");
321 fd
= open(outfile_name
, O_WRONLY
);
323 perror("Error in open");
326 #endif //LTT_NULL_OUTPUT_TEST
330 static inline int ltt_buffer_get(struct ltt_buf
*ltt_buf
,
331 unsigned int *offset
)
333 unsigned int consumed_old
, consumed_idx
;
334 consumed_old
= atomic_read(<t_buf
->consumed
);
335 consumed_idx
= SUBBUF_INDEX(consumed_old
, ltt_buf
);
337 if(atomic_read(<t_buf
->commit_count
[consumed_idx
])
338 != atomic_read(<t_buf
->reserve_count
[consumed_idx
])) {
341 if((SUBBUF_TRUNC(atomic_read(<t_buf
->offset
), ltt_buf
)
342 -SUBBUF_TRUNC(consumed_old
, ltt_buf
)) == 0) {
346 *offset
= consumed_old
;
351 static inline int ltt_buffer_put(struct ltt_buf
*ltt_buf
,
354 unsigned int consumed_old
, consumed_new
;
357 consumed_old
= offset
;
358 consumed_new
= SUBBUF_ALIGN(consumed_old
, ltt_buf
);
359 if(atomic_cmpxchg(<t_buf
->consumed
, consumed_old
, consumed_new
)
361 /* We have been pushed by the writer : the last buffer read _is_
363 * It can also happen if this is a buffer we never got. */
366 if(traced_pid
== 0 || parent_exited
) return 0;
368 ret
= sem_post(<t_buf
->writer_sem
);
370 printf("error in sem_post");
375 static int read_subbuffer(struct ltt_buf
*ltt_buf
, int fd
)
377 unsigned int consumed_old
;
379 dbg_printf("LTT read buffer\n");
382 err
= ltt_buffer_get(ltt_buf
, &consumed_old
);
384 if(err
!= -EAGAIN
) dbg_printf("LTT Reserving sub buffer failed\n");
387 if(fd_process
== -1) {
388 fd_process
= fd
= open_output_files();
391 err
= TEMP_FAILURE_RETRY(write(fd
,
393 + (consumed_old
& ((ltt_buf
->alloc_size
)-1)),
394 ltt_buf
->subbuf_size
));
397 perror("Error in writing to file");
401 err
= fsync(pair
->trace
);
404 perror("Error in writing to file");
409 err
= ltt_buffer_put(ltt_buf
, consumed_old
);
413 dbg_printf("Reader has been pushed by the writer, last subbuffer corrupted.\n");
414 /* FIXME : we may delete the last written buffer if we wish. */
423 /* This function is called by ltt_rw_init which has signals blocked */
424 static void ltt_usertrace_fast_daemon(struct ltt_trace_info
*shared_trace_info
,
425 sigset_t oldset
, pid_t l_traced_pid
, pthread_t l_traced_tid
)
427 struct sigaction act
;
430 traced_pid
= l_traced_pid
;
431 traced_tid
= l_traced_tid
;
433 dbg_printf("LTT ltt_usertrace_fast_daemon : init is %d, pid is %lu, traced_pid is %lu, traced_tid is %lu\n",
434 shared_trace_info
->init
, getpid(), traced_pid
, traced_tid
);
436 act
.sa_handler
= handler_sigusr1
;
438 sigemptyset(&(act
.sa_mask
));
439 sigaddset(&(act
.sa_mask
), SIGUSR1
);
440 sigaction(SIGUSR1
, &act
, NULL
);
442 act
.sa_handler
= handler_sigusr2
;
444 sigemptyset(&(act
.sa_mask
));
445 sigaddset(&(act
.sa_mask
), SIGUSR2
);
446 sigaction(SIGUSR2
, &act
, NULL
);
448 act
.sa_handler
= handler_sigalarm
;
450 sigemptyset(&(act
.sa_mask
));
451 sigaddset(&(act
.sa_mask
), SIGALRM
);
452 sigaction(SIGALRM
, &act
, NULL
);
457 ret
= sigsuspend(&oldset
);
459 perror("LTT Error in sigsuspend\n");
461 if(traced_pid
== 0) break; /* parent died */
462 if(parent_exited
) break;
463 dbg_printf("LTT Doing a buffer switch read. pid is : %lu\n", getpid());
466 ret
= read_subbuffer(&shared_trace_info
->channel
.process
, fd_process
);
469 /* The parent thread is dead and we have finished with the buffer */
471 /* Buffer force switch (flush). Using FLUSH instead of ACTIVE because we know
472 * there is no writer. */
473 flush_buffer(&shared_trace_info
->channel
.process
, FORCE_FLUSH
);
475 ret
= read_subbuffer(&shared_trace_info
->channel
.process
, fd_process
);
481 ret
= sem_destroy(&shared_trace_info
->channel
.process
.writer_sem
);
483 perror("error in sem_destroy");
485 munmap(shared_trace_info
, sizeof(*shared_trace_info
));
491 /* Reader-writer initialization */
493 static enum ltt_process_role
{ LTT_ROLE_WRITER
, LTT_ROLE_READER
}
494 role
= LTT_ROLE_WRITER
;
497 void ltt_rw_init(void)
500 struct ltt_trace_info
*shared_trace_info
;
502 sigset_t set
, oldset
;
503 pid_t l_traced_pid
= getpid();
504 pid_t l_traced_tid
= gettid();
506 /* parent : create the shared memory map */
507 shared_trace_info
= mmap(0, sizeof(*thread_trace_info
),
508 PROT_READ
|PROT_WRITE
, MAP_SHARED
|MAP_ANONYMOUS
, 0, 0);
509 shared_trace_info
->init
=0;
510 shared_trace_info
->filter
=0;
511 shared_trace_info
->daemon_id
=0;
512 shared_trace_info
->nesting
=0;
513 memset(&shared_trace_info
->channel
.process
, 0,
514 sizeof(shared_trace_info
->channel
.process
));
516 ret
= sem_init(&shared_trace_info
->channel
.process
.writer_sem
, 1,
519 perror("error in sem_init");
521 shared_trace_info
->channel
.process
.alloc_size
= LTT_BUF_SIZE_PROCESS
;
522 shared_trace_info
->channel
.process
.subbuf_size
= LTT_SUBBUF_SIZE_PROCESS
;
523 shared_trace_info
->channel
.process
.start
=
524 shared_trace_info
->channel
.process_buf
;
525 ltt_buffer_begin_callback(&shared_trace_info
->channel
.process
,
526 ltt_get_timestamp(), 0);
528 shared_trace_info
->init
= 1;
530 /* Disable signals */
531 ret
= sigfillset(&set
);
533 dbg_printf("LTT Error in sigfillset\n");
536 ret
= pthread_sigmask(SIG_BLOCK
, &set
, &oldset
);
538 dbg_printf("LTT Error in pthread_sigmask\n");
544 shared_trace_info
->daemon_id
= pid
;
545 thread_trace_info
= shared_trace_info
;
548 ret
= pthread_sigmask(SIG_SETMASK
, &oldset
, NULL
);
550 dbg_printf("LTT Error in pthread_sigmask\n");
552 } else if(pid
== 0) {
555 role
= LTT_ROLE_READER
;
557 //Not a good idea to renice, unless futex wait eventually implement
558 //priority inheritence.
561 // perror("Error in nice");
564 perror("Error setting sid");
566 ltt_usertrace_fast_daemon(shared_trace_info
, oldset
, l_traced_pid
,
568 /* Should never return */
572 perror("LTT Error in forking ltt-usertrace-fast");
576 static __thread
struct _pthread_cleanup_buffer cleanup_buffer
;
578 void ltt_thread_init(void)
580 _pthread_cleanup_push(&cleanup_buffer
, ltt_usertrace_fast_cleanup
, NULL
);
584 void __attribute__((constructor
)) __ltt_usertrace_fast_init(void)
586 dbg_printf("LTT usertrace-fast init\n");
591 void __attribute__((destructor
)) __ltt_usertrace_fast_fini(void)
593 if(role
== LTT_ROLE_WRITER
) {
594 dbg_printf("LTT usertrace-fast fini\n");
595 ltt_usertrace_fast_cleanup(NULL
);