1 /* LTTng user-space "fast" library
3 * This daemon is spawned by each traced thread (to share the mmap).
5 * Its job is to dump periodically this buffer to disk (when it receives a
6 * SIGUSR1 from its parent).
8 * It uses the control information in the shared memory area (producer/consumer
11 * When the parent thread dies (yes, those thing may happen) ;) , this daemon
12 * will flush the last buffer and write it to disk.
14 * Supplement note for streaming : the daemon is responsible for flushing
15 * periodically the buffer if it is streaming data.
19 * shm memory is typically limited to 4096 units (system wide limit SHMMNI in
20 * /proc/sys/kernel/shmmni). As it requires computation time upon creation, we
21 * do not use it : we will use a shared mmap() instead which is passed through
23 * MAP_SHARED mmap segment. Updated when msync or munmap are called.
25 * Memory mapped by mmap() is preserved across fork(2), with the same
28 * Eventually, there will be two mode :
29 * * Slow thread spawn : a fork() is done for each new thread. If the process
30 * dies, the data is not lost.
31 * * Fast thread spawn : a pthread_create() is done by the application for each
34 * We use a timer to check periodically if the parent died. I think it is less
35 * intrusive than a ptrace() on the parent, which would get every signal. The
36 * side effect of this is that we won't be notified if the parent does an
37 * exec(). In this case, we will just sit there until the parent exits.
40 * Copyright 2006 Mathieu Desnoyers
44 #define inline inline __attribute__((always_inline))
48 #define LTT_TRACE_FAST
62 #include <sys/param.h>
64 #include <sys/types.h>
69 #include <sys/syscall.h>
71 // included with hack for powerpc in ltt-usertrace.h #include <asm/atomic.h>
72 #include <asm/timex.h> //for get_cycles()
74 #include <ltt/ltt-usertrace.h>
76 #define gettid() syscall(__NR_gettid)
79 #define dbg_printf(...) printf(__VA_ARGS__)
81 #define dbg_printf(...)
82 #endif //LTT_SHOW_DEBUG
85 enum force_switch_mode
{ FORCE_ACTIVE
, FORCE_FLUSH
};
87 /* Writer (the traced application) */
89 __thread
struct ltt_trace_info
*thread_trace_info
= NULL
;
91 void ltt_usertrace_fast_buffer_switch(void)
93 struct ltt_trace_info
*tmp
= thread_trace_info
;
95 kill(tmp
->daemon_id
, SIGUSR1
);
98 /* The cleanup should never be called from a signal handler */
99 static void ltt_usertrace_fast_cleanup(void *arg
)
101 struct ltt_trace_info
*tmp
= thread_trace_info
;
103 thread_trace_info
= NULL
;
104 kill(tmp
->daemon_id
, SIGUSR2
);
105 munmap(tmp
, sizeof(*tmp
));
109 /* Reader (the disk dumper daemon) */
111 static pid_t traced_pid
= 0;
112 static pid_t traced_tid
= 0;
113 static int parent_exited
= 0;
114 static int fd_process
= -1;
115 static char outfile_name
[PATH_MAX
];
116 static char identifier_name
[PATH_MAX
];
118 /* signal handling */
119 static void handler_sigusr1(int signo
)
121 dbg_printf("LTT Signal %d received : parent buffer switch.\n", signo
);
124 static void handler_sigusr2(int signo
)
126 dbg_printf("LTT Signal %d received : parent exited.\n", signo
);
130 static void handler_sigalarm(int signo
)
132 dbg_printf("LTT Signal %d received\n", signo
);
134 if(getppid() != traced_pid
) {
136 dbg_printf("LTT Parent %lu died, cleaning up\n", traced_pid
);
142 /* Do a buffer switch. Don't switch if buffer is completely empty */
143 static void flush_buffer(struct ltt_buf
*ltt_buf
, enum force_switch_mode mode
)
146 int offset_begin
, offset_end
, offset_old
;
147 int reserve_commit_diff
;
148 int consumed_old
, consumed_new
;
149 int commit_count
, reserve_count
;
153 offset_old
= atomic_read(<t_buf
->offset
);
154 offset_begin
= offset_old
;
156 tsc
= ltt_get_timestamp();
158 /* Error in getting the timestamp : should not happen : it would
159 * mean we are called from an NMI during a write seqlock on xtime. */
163 if(SUBBUF_OFFSET(offset_begin
, ltt_buf
) != 0) {
164 offset_begin
= SUBBUF_ALIGN(offset_begin
, ltt_buf
);
167 /* we do not have to switch : buffer is empty */
170 if(mode
== FORCE_ACTIVE
)
171 offset_begin
+= ltt_subbuf_header_len(ltt_buf
);
172 /* Always begin_switch in FORCE_ACTIVE mode */
174 /* Test new buffer integrity */
175 reserve_commit_diff
=
177 <t_buf
->reserve_count
[SUBBUF_INDEX(offset_begin
, ltt_buf
)])
179 <t_buf
->commit_count
[SUBBUF_INDEX(offset_begin
, ltt_buf
)]);
180 if(reserve_commit_diff
== 0) {
181 /* Next buffer not corrupted. */
182 if(mode
== FORCE_ACTIVE
183 && (offset_begin
-atomic_read(<t_buf
->consumed
))
184 >= ltt_buf
->alloc_size
) {
185 /* We do not overwrite non consumed buffers and we are full : ignore
186 switch while tracing is active. */
190 /* Next subbuffer corrupted. Force pushing reader even in normal mode */
193 offset_end
= offset_begin
;
194 } while(atomic_cmpxchg(<t_buf
->offset
, offset_old
, offset_end
)
198 if(mode
== FORCE_ACTIVE
) {
199 /* Push the reader if necessary */
201 consumed_old
= atomic_read(<t_buf
->consumed
);
202 /* If buffer is in overwrite mode, push the reader consumed count if
203 the write position has reached it and we are not at the first
204 iteration (don't push the reader farther than the writer).
205 This operation can be done concurrently by many writers in the
206 same buffer, the writer being at the fartest write position sub-buffer
207 index in the buffer being the one which will win this loop. */
208 /* If the buffer is not in overwrite mode, pushing the reader only
209 happen if a sub-buffer is corrupted */
210 if((SUBBUF_TRUNC(offset_end
-1, ltt_buf
)
211 - SUBBUF_TRUNC(consumed_old
, ltt_buf
))
212 >= ltt_buf
->alloc_size
)
213 consumed_new
= SUBBUF_ALIGN(consumed_old
, ltt_buf
);
215 consumed_new
= consumed_old
;
218 } while(atomic_cmpxchg(<t_buf
->consumed
, consumed_old
, consumed_new
)
221 if(consumed_old
!= consumed_new
) {
222 /* Reader pushed : we are the winner of the push, we can therefore
223 reequilibrate reserve and commit. Atomic increment of the commit
224 count permits other writers to play around with this variable
225 before us. We keep track of corrupted_subbuffers even in overwrite
227 we never want to write over a non completely committed sub-buffer :
228 possible causes : the buffer size is too low compared to the unordered
229 data input, or there is a writer who died between the reserve and the
231 if(reserve_commit_diff
) {
232 /* We have to alter the sub-buffer commit count : a sub-buffer is
234 atomic_add(reserve_commit_diff
,
235 <t_buf
->commit_count
[SUBBUF_INDEX(offset_begin
, ltt_buf
)]);
236 atomic_inc(<t_buf
->corrupted_subbuffers
);
245 /* Concurrency safe because we are the last and only thread to alter this
246 sub-buffer. As long as it is not delivered and read, no other thread can
247 alter the offset, alter the reserve_count or call the
248 client_buffer_end_callback on this sub-buffer.
249 The only remaining threads could be the ones with pending commits. They
250 will have to do the deliver themself.
251 Not concurrency safe in overwrite mode. We detect corrupted subbuffers with
252 commit and reserve counts. We keep a corrupted sub-buffers count and push
253 the readers across these sub-buffers.
254 Not concurrency safe if a writer is stalled in a subbuffer and
255 another writer switches in, finding out it's corrupted. The result will be
256 than the old (uncommited) subbuffer will be declared corrupted, and that
257 the new subbuffer will be declared corrupted too because of the commit
259 Offset old should never be 0. */
260 ltt_buffer_end_callback(ltt_buf
, tsc
, offset_old
,
261 SUBBUF_INDEX((offset_old
), ltt_buf
));
262 /* Setting this reserve_count will allow the sub-buffer to be delivered by
263 the last committer. */
264 reserve_count
= atomic_add_return((SUBBUF_OFFSET((offset_old
-1),
266 <t_buf
->reserve_count
[SUBBUF_INDEX((offset_old
),
268 if(reserve_count
== atomic_read(
269 <t_buf
->commit_count
[SUBBUF_INDEX((offset_old
), ltt_buf
)])) {
270 ltt_deliver_callback(ltt_buf
, SUBBUF_INDEX((offset_old
), ltt_buf
), NULL
);
274 if(mode
== FORCE_ACTIVE
) {
276 /* This code can be executed unordered : writers may already have written
277 to the sub-buffer before this code gets executed, caution. */
278 /* The commit makes sure that this code is executed before the deliver
279 of this sub-buffer */
280 ltt_buffer_begin_callback(ltt_buf
, tsc
, SUBBUF_INDEX(offset_begin
, ltt_buf
));
281 commit_count
= atomic_add_return(ltt_subbuf_header_len(ltt_buf
),
282 <t_buf
->commit_count
[SUBBUF_INDEX(offset_begin
, ltt_buf
)]);
283 /* Check if the written buffer has to be delivered */
284 if(commit_count
== atomic_read(
285 <t_buf
->reserve_count
[SUBBUF_INDEX(offset_begin
, ltt_buf
)])) {
286 ltt_deliver_callback(ltt_buf
, SUBBUF_INDEX(offset_begin
, ltt_buf
), NULL
);
293 static int open_output_files(void)
297 /* Open output files */
299 ret
= mkdir(LTT_USERTRACE_ROOT
, 0777);
300 if(ret
< 0 && errno
!= EEXIST
) {
301 perror("LTT Error in creating output (mkdir)");
304 ret
= chdir(LTT_USERTRACE_ROOT
);
306 perror("LTT Error in creating output (chdir)");
309 snprintf(identifier_name
, PATH_MAX
-1, "%lu.%lu.%llu",
310 traced_tid
, traced_pid
, get_cycles());
311 snprintf(outfile_name
, PATH_MAX
-1, "process-%s", identifier_name
);
313 #ifndef LTT_NULL_OUTPUT_TEST
314 fd
= creat(outfile_name
, 0644);
317 ret
= symlink("/dev/null", outfile_name
);
319 perror("error in symlink");
322 fd
= open(outfile_name
, O_WRONLY
);
324 perror("Error in open");
327 #endif //LTT_NULL_OUTPUT_TEST
331 static inline int ltt_buffer_get(struct ltt_buf
*ltt_buf
,
332 unsigned int *offset
)
334 unsigned int consumed_old
, consumed_idx
;
335 consumed_old
= atomic_read(<t_buf
->consumed
);
336 consumed_idx
= SUBBUF_INDEX(consumed_old
, ltt_buf
);
338 if(atomic_read(<t_buf
->commit_count
[consumed_idx
])
339 != atomic_read(<t_buf
->reserve_count
[consumed_idx
])) {
342 if((SUBBUF_TRUNC(atomic_read(<t_buf
->offset
), ltt_buf
)
343 -SUBBUF_TRUNC(consumed_old
, ltt_buf
)) == 0) {
347 *offset
= consumed_old
;
352 static inline int ltt_buffer_put(struct ltt_buf
*ltt_buf
,
355 unsigned int consumed_old
, consumed_new
;
358 consumed_old
= offset
;
359 consumed_new
= SUBBUF_ALIGN(consumed_old
, ltt_buf
);
360 if(atomic_cmpxchg(<t_buf
->consumed
, consumed_old
, consumed_new
)
362 /* We have been pushed by the writer : the last buffer read _is_
364 * It can also happen if this is a buffer we never got. */
367 if(traced_pid
== 0 || parent_exited
) return 0;
369 ret
= sem_post(<t_buf
->writer_sem
);
371 printf("error in sem_post");
376 static int read_subbuffer(struct ltt_buf
*ltt_buf
, int fd
)
378 unsigned int consumed_old
;
380 dbg_printf("LTT read buffer\n");
383 err
= ltt_buffer_get(ltt_buf
, &consumed_old
);
385 if(err
!= -EAGAIN
) dbg_printf("LTT Reserving sub buffer failed\n");
388 if(fd_process
== -1) {
389 fd_process
= fd
= open_output_files();
392 err
= TEMP_FAILURE_RETRY(write(fd
,
394 + (consumed_old
& ((ltt_buf
->alloc_size
)-1)),
395 ltt_buf
->subbuf_size
));
398 perror("Error in writing to file");
402 err
= fsync(pair
->trace
);
405 perror("Error in writing to file");
410 err
= ltt_buffer_put(ltt_buf
, consumed_old
);
414 dbg_printf("Reader has been pushed by the writer, last subbuffer corrupted.\n");
415 /* FIXME : we may delete the last written buffer if we wish. */
424 /* This function is called by ltt_rw_init which has signals blocked */
425 static void ltt_usertrace_fast_daemon(struct ltt_trace_info
*shared_trace_info
,
426 sigset_t oldset
, pid_t l_traced_pid
, pthread_t l_traced_tid
)
428 struct sigaction act
;
431 traced_pid
= l_traced_pid
;
432 traced_tid
= l_traced_tid
;
434 dbg_printf("LTT ltt_usertrace_fast_daemon : init is %d, pid is %lu, traced_pid is %lu, traced_tid is %lu\n",
435 shared_trace_info
->init
, getpid(), traced_pid
, traced_tid
);
437 act
.sa_handler
= handler_sigusr1
;
439 sigemptyset(&(act
.sa_mask
));
440 sigaddset(&(act
.sa_mask
), SIGUSR1
);
441 sigaction(SIGUSR1
, &act
, NULL
);
443 act
.sa_handler
= handler_sigusr2
;
445 sigemptyset(&(act
.sa_mask
));
446 sigaddset(&(act
.sa_mask
), SIGUSR2
);
447 sigaction(SIGUSR2
, &act
, NULL
);
449 act
.sa_handler
= handler_sigalarm
;
451 sigemptyset(&(act
.sa_mask
));
452 sigaddset(&(act
.sa_mask
), SIGALRM
);
453 sigaction(SIGALRM
, &act
, NULL
);
458 ret
= sigsuspend(&oldset
);
460 perror("LTT Error in sigsuspend\n");
462 if(traced_pid
== 0) break; /* parent died */
463 if(parent_exited
) break;
464 dbg_printf("LTT Doing a buffer switch read. pid is : %lu\n", getpid());
467 ret
= read_subbuffer(&shared_trace_info
->channel
.process
, fd_process
);
470 /* The parent thread is dead and we have finished with the buffer */
472 /* Buffer force switch (flush). Using FLUSH instead of ACTIVE because we know
473 * there is no writer. */
474 flush_buffer(&shared_trace_info
->channel
.process
, FORCE_FLUSH
);
476 ret
= read_subbuffer(&shared_trace_info
->channel
.process
, fd_process
);
482 ret
= sem_destroy(&shared_trace_info
->channel
.process
.writer_sem
);
484 perror("error in sem_destroy");
486 munmap(shared_trace_info
, sizeof(*shared_trace_info
));
492 /* Reader-writer initialization */
494 static enum ltt_process_role
{ LTT_ROLE_WRITER
, LTT_ROLE_READER
}
495 role
= LTT_ROLE_WRITER
;
498 void ltt_rw_init(void)
501 struct ltt_trace_info
*shared_trace_info
;
503 sigset_t set
, oldset
;
504 pid_t l_traced_pid
= getpid();
505 pid_t l_traced_tid
= gettid();
507 /* parent : create the shared memory map */
508 shared_trace_info
= mmap(0, sizeof(*thread_trace_info
),
509 PROT_READ
|PROT_WRITE
, MAP_SHARED
|MAP_ANONYMOUS
, 0, 0);
510 shared_trace_info
->init
=0;
511 shared_trace_info
->filter
=0;
512 shared_trace_info
->daemon_id
=0;
513 shared_trace_info
->nesting
=0;
514 memset(&shared_trace_info
->channel
.process
, 0,
515 sizeof(shared_trace_info
->channel
.process
));
517 ret
= sem_init(&shared_trace_info
->channel
.process
.writer_sem
, 1,
520 perror("error in sem_init");
522 shared_trace_info
->channel
.process
.alloc_size
= LTT_BUF_SIZE_PROCESS
;
523 shared_trace_info
->channel
.process
.subbuf_size
= LTT_SUBBUF_SIZE_PROCESS
;
524 shared_trace_info
->channel
.process
.start
=
525 shared_trace_info
->channel
.process_buf
;
526 ltt_buffer_begin_callback(&shared_trace_info
->channel
.process
,
527 ltt_get_timestamp(), 0);
529 shared_trace_info
->init
= 1;
531 /* Disable signals */
532 ret
= sigfillset(&set
);
534 dbg_printf("LTT Error in sigfillset\n");
537 ret
= pthread_sigmask(SIG_BLOCK
, &set
, &oldset
);
539 dbg_printf("LTT Error in pthread_sigmask\n");
545 shared_trace_info
->daemon_id
= pid
;
546 thread_trace_info
= shared_trace_info
;
549 ret
= pthread_sigmask(SIG_SETMASK
, &oldset
, NULL
);
551 dbg_printf("LTT Error in pthread_sigmask\n");
553 } else if(pid
== 0) {
556 role
= LTT_ROLE_READER
;
558 //Not a good idea to renice, unless futex wait eventually implement
559 //priority inheritence.
562 // perror("Error in nice");
565 perror("Error setting sid");
567 ltt_usertrace_fast_daemon(shared_trace_info
, oldset
, l_traced_pid
,
569 /* Should never return */
573 perror("LTT Error in forking ltt-usertrace-fast");
577 static __thread
struct _pthread_cleanup_buffer cleanup_buffer
;
579 void ltt_thread_init(void)
581 _pthread_cleanup_push(&cleanup_buffer
, ltt_usertrace_fast_cleanup
, NULL
);
585 void __attribute__((constructor
)) __ltt_usertrace_fast_init(void)
587 dbg_printf("LTT usertrace-fast init\n");
592 void __attribute__((destructor
)) __ltt_usertrace_fast_fini(void)
594 if(role
== LTT_ROLE_WRITER
) {
595 dbg_printf("LTT usertrace-fast fini\n");
596 ltt_usertrace_fast_cleanup(NULL
);