1 /* LTTng user-space "fast" library
3 * This daemon is spawned by each traced thread (to share the mmap).
5 * Its job is to dump periodically this buffer to disk (when it receives a
6 * SIGUSR1 from its parent).
8 * It uses the control information in the shared memory area (producer/consumer
11 * When the parent thread dies (yes, those thing may happen) ;) , this daemon
12 * will flush the last buffer and write it to disk.
14 * Supplement note for streaming : the daemon is responsible for flushing
15 * periodically the buffer if it is streaming data.
19 * shm memory is typically limited to 4096 units (system wide limit SHMMNI in
20 * /proc/sys/kernel/shmmni). As it requires computation time upon creation, we
21 * do not use it : we will use a shared mmap() instead which is passed through
23 * MAP_SHARED mmap segment. Updated when msync or munmap are called.
25 * Memory mapped by mmap() is preserved across fork(2), with the same
28 * Eventually, there will be two mode :
29 * * Slow thread spawn : a fork() is done for each new thread. If the process
30 * dies, the data is not lost.
31 * * Fast thread spawn : a pthread_create() is done by the application for each
34 * We use a timer to check periodically if the parent died. I think it is less
35 * intrusive than a ptrace() on the parent, which would get every signal. The
36 * side effect of this is that we won't be notified if the parent does an
37 * exec(). In this case, we will just sit there until the parent exits.
40 * Copyright 2006 Mathieu Desnoyers
44 #define inline inline __attribute__((always_inline))
48 #define LTT_TRACE_FAST
49 #include <sys/types.h>
65 #include <sys/param.h>
69 // included with hack for powerpc in ltt-usertrace.h #include <asm/atomic.h>
70 #include <asm/timex.h> //for get_cycles()
72 _syscall0(pid_t
,gettid
)
74 #include <ltt/ltt-usertrace.h>
77 #define dbg_printf(...) printf(__VA_ARGS__)
79 #define dbg_printf(...)
80 #endif //LTT_SHOW_DEBUG
83 enum force_switch_mode
{ FORCE_ACTIVE
, FORCE_FLUSH
};
85 /* Writer (the traced application) */
87 __thread
struct ltt_trace_info
*thread_trace_info
= NULL
;
89 void ltt_usertrace_fast_buffer_switch(void)
91 struct ltt_trace_info
*tmp
= thread_trace_info
;
93 kill(tmp
->daemon_id
, SIGUSR1
);
96 /* The cleanup should never be called from a signal handler */
97 static void ltt_usertrace_fast_cleanup(void *arg
)
99 struct ltt_trace_info
*tmp
= thread_trace_info
;
101 thread_trace_info
= NULL
;
102 kill(tmp
->daemon_id
, SIGUSR2
);
103 munmap(tmp
, sizeof(*tmp
));
107 /* Reader (the disk dumper daemon) */
109 static pid_t traced_pid
= 0;
110 static pid_t traced_tid
= 0;
111 static int parent_exited
= 0;
112 static int fd_process
= -1;
113 static char outfile_name
[PATH_MAX
];
114 static char identifier_name
[PATH_MAX
];
116 /* signal handling */
117 static void handler_sigusr1(int signo
)
119 dbg_printf("LTT Signal %d received : parent buffer switch.\n", signo
);
122 static void handler_sigusr2(int signo
)
124 dbg_printf("LTT Signal %d received : parent exited.\n", signo
);
128 static void handler_sigalarm(int signo
)
130 dbg_printf("LTT Signal %d received\n", signo
);
132 if(getppid() != traced_pid
) {
134 dbg_printf("LTT Parent %lu died, cleaning up\n", traced_pid
);
140 /* Do a buffer switch. Don't switch if buffer is completely empty */
141 static void flush_buffer(struct ltt_buf
*ltt_buf
, enum force_switch_mode mode
)
144 int offset_begin
, offset_end
, offset_old
;
145 int reserve_commit_diff
;
146 int consumed_old
, consumed_new
;
147 int commit_count
, reserve_count
;
151 offset_old
= atomic_read(<t_buf
->offset
);
152 offset_begin
= offset_old
;
154 tsc
= ltt_get_timestamp();
156 /* Error in getting the timestamp : should not happen : it would
157 * mean we are called from an NMI during a write seqlock on xtime. */
161 if(SUBBUF_OFFSET(offset_begin
, ltt_buf
) != 0) {
162 offset_begin
= SUBBUF_ALIGN(offset_begin
, ltt_buf
);
165 /* we do not have to switch : buffer is empty */
168 if(mode
== FORCE_ACTIVE
)
169 offset_begin
+= ltt_subbuf_header_len(ltt_buf
);
170 /* Always begin_switch in FORCE_ACTIVE mode */
172 /* Test new buffer integrity */
173 reserve_commit_diff
=
175 <t_buf
->reserve_count
[SUBBUF_INDEX(offset_begin
, ltt_buf
)])
177 <t_buf
->commit_count
[SUBBUF_INDEX(offset_begin
, ltt_buf
)]);
178 if(reserve_commit_diff
== 0) {
179 /* Next buffer not corrupted. */
180 if(mode
== FORCE_ACTIVE
181 && (offset_begin
-atomic_read(<t_buf
->consumed
))
182 >= ltt_buf
->alloc_size
) {
183 /* We do not overwrite non consumed buffers and we are full : ignore
184 switch while tracing is active. */
188 /* Next subbuffer corrupted. Force pushing reader even in normal mode */
191 offset_end
= offset_begin
;
192 } while(atomic_cmpxchg(<t_buf
->offset
, offset_old
, offset_end
)
196 if(mode
== FORCE_ACTIVE
) {
197 /* Push the reader if necessary */
199 consumed_old
= atomic_read(<t_buf
->consumed
);
200 /* If buffer is in overwrite mode, push the reader consumed count if
201 the write position has reached it and we are not at the first
202 iteration (don't push the reader farther than the writer).
203 This operation can be done concurrently by many writers in the
204 same buffer, the writer being at the fartest write position sub-buffer
205 index in the buffer being the one which will win this loop. */
206 /* If the buffer is not in overwrite mode, pushing the reader only
207 happen if a sub-buffer is corrupted */
208 if((SUBBUF_TRUNC(offset_end
-1, ltt_buf
)
209 - SUBBUF_TRUNC(consumed_old
, ltt_buf
))
210 >= ltt_buf
->alloc_size
)
211 consumed_new
= SUBBUF_ALIGN(consumed_old
, ltt_buf
);
213 consumed_new
= consumed_old
;
216 } while(atomic_cmpxchg(<t_buf
->consumed
, consumed_old
, consumed_new
)
219 if(consumed_old
!= consumed_new
) {
220 /* Reader pushed : we are the winner of the push, we can therefore
221 reequilibrate reserve and commit. Atomic increment of the commit
222 count permits other writers to play around with this variable
223 before us. We keep track of corrupted_subbuffers even in overwrite
225 we never want to write over a non completely committed sub-buffer :
226 possible causes : the buffer size is too low compared to the unordered
227 data input, or there is a writer who died between the reserve and the
229 if(reserve_commit_diff
) {
230 /* We have to alter the sub-buffer commit count : a sub-buffer is
232 atomic_add(reserve_commit_diff
,
233 <t_buf
->commit_count
[SUBBUF_INDEX(offset_begin
, ltt_buf
)]);
234 atomic_inc(<t_buf
->corrupted_subbuffers
);
243 /* Concurrency safe because we are the last and only thread to alter this
244 sub-buffer. As long as it is not delivered and read, no other thread can
245 alter the offset, alter the reserve_count or call the
246 client_buffer_end_callback on this sub-buffer.
247 The only remaining threads could be the ones with pending commits. They
248 will have to do the deliver themself.
249 Not concurrency safe in overwrite mode. We detect corrupted subbuffers with
250 commit and reserve counts. We keep a corrupted sub-buffers count and push
251 the readers across these sub-buffers.
252 Not concurrency safe if a writer is stalled in a subbuffer and
253 another writer switches in, finding out it's corrupted. The result will be
254 than the old (uncommited) subbuffer will be declared corrupted, and that
255 the new subbuffer will be declared corrupted too because of the commit
257 Offset old should never be 0. */
258 ltt_buffer_end_callback(ltt_buf
, tsc
, offset_old
,
259 SUBBUF_INDEX((offset_old
), ltt_buf
));
260 /* Setting this reserve_count will allow the sub-buffer to be delivered by
261 the last committer. */
262 reserve_count
= atomic_add_return((SUBBUF_OFFSET((offset_old
-1),
264 <t_buf
->reserve_count
[SUBBUF_INDEX((offset_old
),
266 if(reserve_count
== atomic_read(
267 <t_buf
->commit_count
[SUBBUF_INDEX((offset_old
), ltt_buf
)])) {
268 ltt_deliver_callback(ltt_buf
, SUBBUF_INDEX((offset_old
), ltt_buf
), NULL
);
272 if(mode
== FORCE_ACTIVE
) {
274 /* This code can be executed unordered : writers may already have written
275 to the sub-buffer before this code gets executed, caution. */
276 /* The commit makes sure that this code is executed before the deliver
277 of this sub-buffer */
278 ltt_buffer_begin_callback(ltt_buf
, tsc
, SUBBUF_INDEX(offset_begin
, ltt_buf
));
279 commit_count
= atomic_add_return(ltt_subbuf_header_len(ltt_buf
),
280 <t_buf
->commit_count
[SUBBUF_INDEX(offset_begin
, ltt_buf
)]);
281 /* Check if the written buffer has to be delivered */
282 if(commit_count
== atomic_read(
283 <t_buf
->reserve_count
[SUBBUF_INDEX(offset_begin
, ltt_buf
)])) {
284 ltt_deliver_callback(ltt_buf
, SUBBUF_INDEX(offset_begin
, ltt_buf
), NULL
);
291 static int open_output_files(void)
295 /* Open output files */
297 ret
= mkdir(LTT_USERTRACE_ROOT
, 0777);
298 if(ret
< 0 && errno
!= EEXIST
) {
299 perror("LTT Error in creating output (mkdir)");
302 ret
= chdir(LTT_USERTRACE_ROOT
);
304 perror("LTT Error in creating output (chdir)");
307 snprintf(identifier_name
, PATH_MAX
-1, "%lu.%lu.%llu",
308 traced_tid
, traced_pid
, get_cycles());
309 snprintf(outfile_name
, PATH_MAX
-1, "process-%s", identifier_name
);
311 #ifndef LTT_NULL_OUTPUT_TEST
312 fd
= creat(outfile_name
, 0644);
315 ret
= symlink("/dev/null", outfile_name
);
317 perror("error in symlink");
320 fd
= open(outfile_name
, O_WRONLY
);
322 perror("Error in open");
325 #endif //LTT_NULL_OUTPUT_TEST
329 static inline int ltt_buffer_get(struct ltt_buf
*ltt_buf
,
330 unsigned int *offset
)
332 unsigned int consumed_old
, consumed_idx
;
333 consumed_old
= atomic_read(<t_buf
->consumed
);
334 consumed_idx
= SUBBUF_INDEX(consumed_old
, ltt_buf
);
336 if(atomic_read(<t_buf
->commit_count
[consumed_idx
])
337 != atomic_read(<t_buf
->reserve_count
[consumed_idx
])) {
340 if((SUBBUF_TRUNC(atomic_read(<t_buf
->offset
), ltt_buf
)
341 -SUBBUF_TRUNC(consumed_old
, ltt_buf
)) == 0) {
345 *offset
= consumed_old
;
350 static inline int ltt_buffer_put(struct ltt_buf
*ltt_buf
,
353 unsigned int consumed_old
, consumed_new
;
356 consumed_old
= offset
;
357 consumed_new
= SUBBUF_ALIGN(consumed_old
, ltt_buf
);
358 if(atomic_cmpxchg(<t_buf
->consumed
, consumed_old
, consumed_new
)
360 /* We have been pushed by the writer : the last buffer read _is_
362 * It can also happen if this is a buffer we never got. */
365 if(traced_pid
== 0 || parent_exited
) return 0;
367 ret
= sem_post(<t_buf
->writer_sem
);
369 printf("error in sem_post");
374 static int read_subbuffer(struct ltt_buf
*ltt_buf
, int fd
)
376 unsigned int consumed_old
;
378 dbg_printf("LTT read buffer\n");
381 err
= ltt_buffer_get(ltt_buf
, &consumed_old
);
383 if(err
!= -EAGAIN
) dbg_printf("LTT Reserving sub buffer failed\n");
386 if(fd_process
== -1) {
387 fd_process
= fd
= open_output_files();
390 err
= TEMP_FAILURE_RETRY(write(fd
,
392 + (consumed_old
& ((ltt_buf
->alloc_size
)-1)),
393 ltt_buf
->subbuf_size
));
396 perror("Error in writing to file");
400 err
= fsync(pair
->trace
);
403 perror("Error in writing to file");
408 err
= ltt_buffer_put(ltt_buf
, consumed_old
);
412 dbg_printf("Reader has been pushed by the writer, last subbuffer corrupted.\n");
413 /* FIXME : we may delete the last written buffer if we wish. */
422 /* This function is called by ltt_rw_init which has signals blocked */
423 static void ltt_usertrace_fast_daemon(struct ltt_trace_info
*shared_trace_info
,
424 sigset_t oldset
, pid_t l_traced_pid
, pthread_t l_traced_tid
)
426 struct sigaction act
;
429 traced_pid
= l_traced_pid
;
430 traced_tid
= l_traced_tid
;
432 dbg_printf("LTT ltt_usertrace_fast_daemon : init is %d, pid is %lu, traced_pid is %lu, traced_tid is %lu\n",
433 shared_trace_info
->init
, getpid(), traced_pid
, traced_tid
);
435 act
.sa_handler
= handler_sigusr1
;
437 sigemptyset(&(act
.sa_mask
));
438 sigaddset(&(act
.sa_mask
), SIGUSR1
);
439 sigaction(SIGUSR1
, &act
, NULL
);
441 act
.sa_handler
= handler_sigusr2
;
443 sigemptyset(&(act
.sa_mask
));
444 sigaddset(&(act
.sa_mask
), SIGUSR2
);
445 sigaction(SIGUSR2
, &act
, NULL
);
447 act
.sa_handler
= handler_sigalarm
;
449 sigemptyset(&(act
.sa_mask
));
450 sigaddset(&(act
.sa_mask
), SIGALRM
);
451 sigaction(SIGALRM
, &act
, NULL
);
456 ret
= sigsuspend(&oldset
);
458 perror("LTT Error in sigsuspend\n");
460 if(traced_pid
== 0) break; /* parent died */
461 if(parent_exited
) break;
462 dbg_printf("LTT Doing a buffer switch read. pid is : %lu\n", getpid());
465 ret
= read_subbuffer(&shared_trace_info
->channel
.process
, fd_process
);
468 /* The parent thread is dead and we have finished with the buffer */
470 /* Buffer force switch (flush). Using FLUSH instead of ACTIVE because we know
471 * there is no writer. */
472 flush_buffer(&shared_trace_info
->channel
.process
, FORCE_FLUSH
);
474 ret
= read_subbuffer(&shared_trace_info
->channel
.process
, fd_process
);
480 ret
= sem_destroy(&shared_trace_info
->channel
.process
.writer_sem
);
482 perror("error in sem_destroy");
484 munmap(shared_trace_info
, sizeof(*shared_trace_info
));
490 /* Reader-writer initialization */
492 static enum ltt_process_role
{ LTT_ROLE_WRITER
, LTT_ROLE_READER
}
493 role
= LTT_ROLE_WRITER
;
496 void ltt_rw_init(void)
499 struct ltt_trace_info
*shared_trace_info
;
501 sigset_t set
, oldset
;
502 pid_t l_traced_pid
= getpid();
503 pid_t l_traced_tid
= gettid();
505 /* parent : create the shared memory map */
506 shared_trace_info
= mmap(0, sizeof(*thread_trace_info
),
507 PROT_READ
|PROT_WRITE
, MAP_SHARED
|MAP_ANONYMOUS
, 0, 0);
508 shared_trace_info
->init
=0;
509 shared_trace_info
->filter
=0;
510 shared_trace_info
->daemon_id
=0;
511 shared_trace_info
->nesting
=0;
512 memset(&shared_trace_info
->channel
.process
, 0,
513 sizeof(shared_trace_info
->channel
.process
));
515 ret
= sem_init(&shared_trace_info
->channel
.process
.writer_sem
, 1,
518 perror("error in sem_init");
520 shared_trace_info
->channel
.process
.alloc_size
= LTT_BUF_SIZE_PROCESS
;
521 shared_trace_info
->channel
.process
.subbuf_size
= LTT_SUBBUF_SIZE_PROCESS
;
522 shared_trace_info
->channel
.process
.start
=
523 shared_trace_info
->channel
.process_buf
;
524 ltt_buffer_begin_callback(&shared_trace_info
->channel
.process
,
525 ltt_get_timestamp(), 0);
527 shared_trace_info
->init
= 1;
529 /* Disable signals */
530 ret
= sigfillset(&set
);
532 dbg_printf("LTT Error in sigfillset\n");
535 ret
= pthread_sigmask(SIG_BLOCK
, &set
, &oldset
);
537 dbg_printf("LTT Error in pthread_sigmask\n");
543 shared_trace_info
->daemon_id
= pid
;
544 thread_trace_info
= shared_trace_info
;
547 ret
= pthread_sigmask(SIG_SETMASK
, &oldset
, NULL
);
549 dbg_printf("LTT Error in pthread_sigmask\n");
551 } else if(pid
== 0) {
554 role
= LTT_ROLE_READER
;
556 //Not a good idea to renice, unless futex wait eventually implement
557 //priority inheritence.
560 // perror("Error in nice");
563 perror("Error setting sid");
565 ltt_usertrace_fast_daemon(shared_trace_info
, oldset
, l_traced_pid
,
567 /* Should never return */
571 perror("LTT Error in forking ltt-usertrace-fast");
575 static __thread
struct _pthread_cleanup_buffer cleanup_buffer
;
577 void ltt_thread_init(void)
579 _pthread_cleanup_push(&cleanup_buffer
, ltt_usertrace_fast_cleanup
, NULL
);
583 void __attribute__((constructor
)) __ltt_usertrace_fast_init(void)
585 dbg_printf("LTT usertrace-fast init\n");
590 void __attribute__((destructor
)) __ltt_usertrace_fast_fini(void)
592 if(role
== LTT_ROLE_WRITER
) {
593 dbg_printf("LTT usertrace-fast fini\n");
594 ltt_usertrace_fast_cleanup(NULL
);