1 /* LTTng user-space "fast" library
3 * This daemon is spawned by each traced thread (to share the mmap).
5 * Its job is to dump periodically this buffer to disk (when it receives a
6 * SIGUSR1 from its parent).
8 * It uses the control information in the shared memory area (producer/consumer
11 * When the parent thread dies (yes, those thing may happen) ;) , this daemon
12 * will flush the last buffer and write it to disk.
14 * Supplement note for streaming : the daemon is responsible for flushing
15 * periodically the buffer if it is streaming data.
19 * shm memory is typically limited to 4096 units (system wide limit SHMMNI in
20 * /proc/sys/kernel/shmmni). As it requires computation time upon creation, we
21 * do not use it : we will use a shared mmap() instead which is passed through
23 * MAP_SHARED mmap segment. Updated when msync or munmap are called.
25 * Memory mapped by mmap() is preserved across fork(2), with the same
28 * Eventually, there will be two mode :
29 * * Slow thread spawn : a fork() is done for each new thread. If the process
30 * dies, the data is not lost.
31 * * Fast thread spawn : a pthread_create() is done by the application for each
34 * We use a timer to check periodically if the parent died. I think it is less
35 * intrusive than a ptrace() on the parent, which would get every signal. The
36 * side effect of this is that we won't be notified if the parent does an
37 * exec(). In this case, we will just sit there until the parent exits.
40 * Copyright 2006 Mathieu Desnoyers
44 #define inline inline __attribute__((always_inline))
48 #define LTT_TRACE_FAST
62 #include <sys/param.h>
64 #include <sys/types.h>
69 #include <sys/syscall.h>
71 #include <ltt/ltt-usertrace.h>
73 #define gettid() syscall(__NR_gettid)
76 #define dbg_printf(...) printf(__VA_ARGS__)
78 #define dbg_printf(...)
79 #endif //LTT_SHOW_DEBUG
82 enum force_switch_mode
{ FORCE_ACTIVE
, FORCE_FLUSH
};
84 /* Writer (the traced application) */
86 __thread
struct ltt_trace_info
*thread_trace_info
= NULL
;
88 void ltt_usertrace_fast_buffer_switch(void)
90 struct ltt_trace_info
*tmp
= thread_trace_info
;
92 kill(tmp
->daemon_id
, SIGUSR1
);
95 /* The cleanup should never be called from a signal handler */
96 static void ltt_usertrace_fast_cleanup(void *arg
)
98 struct ltt_trace_info
*tmp
= thread_trace_info
;
100 thread_trace_info
= NULL
;
101 kill(tmp
->daemon_id
, SIGUSR2
);
102 munmap(tmp
, sizeof(*tmp
));
106 /* Reader (the disk dumper daemon) */
108 static pid_t traced_pid
= 0;
109 static pid_t traced_tid
= 0;
110 static int parent_exited
= 0;
111 static int fd_process
= -1;
112 static char outfile_name
[PATH_MAX
];
113 static char identifier_name
[PATH_MAX
];
115 /* signal handling */
116 static void handler_sigusr1(int signo
)
118 dbg_printf("LTT Signal %d received : parent buffer switch.\n", signo
);
121 static void handler_sigusr2(int signo
)
123 dbg_printf("LTT Signal %d received : parent exited.\n", signo
);
127 static void handler_sigalarm(int signo
)
129 dbg_printf("LTT Signal %d received\n", signo
);
131 if(getppid() != traced_pid
) {
133 dbg_printf("LTT Parent %lu died, cleaning up\n", traced_pid
);
139 /* Do a buffer switch. Don't switch if buffer is completely empty */
140 static void flush_buffer(struct ltt_buf
*ltt_buf
, enum force_switch_mode mode
)
143 int offset_begin
, offset_end
, offset_old
;
144 int reserve_commit_diff
;
145 int consumed_old
, consumed_new
;
146 int commit_count
, reserve_count
;
150 offset_old
= atomic_read(<t_buf
->offset
);
151 offset_begin
= offset_old
;
153 tsc
= ltt_get_timestamp();
155 /* Error in getting the timestamp : should not happen : it would
156 * mean we are called from an NMI during a write seqlock on xtime. */
160 if(SUBBUF_OFFSET(offset_begin
, ltt_buf
) != 0) {
161 offset_begin
= SUBBUF_ALIGN(offset_begin
, ltt_buf
);
164 /* we do not have to switch : buffer is empty */
167 if(mode
== FORCE_ACTIVE
)
168 offset_begin
+= ltt_subbuf_header_len(ltt_buf
);
169 /* Always begin_switch in FORCE_ACTIVE mode */
171 /* Test new buffer integrity */
172 reserve_commit_diff
=
174 <t_buf
->reserve_count
[SUBBUF_INDEX(offset_begin
, ltt_buf
)])
176 <t_buf
->commit_count
[SUBBUF_INDEX(offset_begin
, ltt_buf
)]);
177 if(reserve_commit_diff
== 0) {
178 /* Next buffer not corrupted. */
179 if(mode
== FORCE_ACTIVE
180 && (offset_begin
-atomic_read(<t_buf
->consumed
))
181 >= ltt_buf
->alloc_size
) {
182 /* We do not overwrite non consumed buffers and we are full : ignore
183 switch while tracing is active. */
187 /* Next subbuffer corrupted. Force pushing reader even in normal mode */
190 offset_end
= offset_begin
;
191 } while(atomic_cmpxchg(<t_buf
->offset
, offset_old
, offset_end
)
195 if(mode
== FORCE_ACTIVE
) {
196 /* Push the reader if necessary */
198 consumed_old
= atomic_read(<t_buf
->consumed
);
199 /* If buffer is in overwrite mode, push the reader consumed count if
200 the write position has reached it and we are not at the first
201 iteration (don't push the reader farther than the writer).
202 This operation can be done concurrently by many writers in the
203 same buffer, the writer being at the fartest write position sub-buffer
204 index in the buffer being the one which will win this loop. */
205 /* If the buffer is not in overwrite mode, pushing the reader only
206 happen if a sub-buffer is corrupted */
207 if((SUBBUF_TRUNC(offset_end
-1, ltt_buf
)
208 - SUBBUF_TRUNC(consumed_old
, ltt_buf
))
209 >= ltt_buf
->alloc_size
)
210 consumed_new
= SUBBUF_ALIGN(consumed_old
, ltt_buf
);
212 consumed_new
= consumed_old
;
215 } while(atomic_cmpxchg(<t_buf
->consumed
, consumed_old
, consumed_new
)
218 if(consumed_old
!= consumed_new
) {
219 /* Reader pushed : we are the winner of the push, we can therefore
220 reequilibrate reserve and commit. Atomic increment of the commit
221 count permits other writers to play around with this variable
222 before us. We keep track of corrupted_subbuffers even in overwrite
224 we never want to write over a non completely committed sub-buffer :
225 possible causes : the buffer size is too low compared to the unordered
226 data input, or there is a writer who died between the reserve and the
228 if(reserve_commit_diff
) {
229 /* We have to alter the sub-buffer commit count : a sub-buffer is
231 atomic_add(reserve_commit_diff
,
232 <t_buf
->commit_count
[SUBBUF_INDEX(offset_begin
, ltt_buf
)]);
233 atomic_inc(<t_buf
->corrupted_subbuffers
);
242 /* Concurrency safe because we are the last and only thread to alter this
243 sub-buffer. As long as it is not delivered and read, no other thread can
244 alter the offset, alter the reserve_count or call the
245 client_buffer_end_callback on this sub-buffer.
246 The only remaining threads could be the ones with pending commits. They
247 will have to do the deliver themself.
248 Not concurrency safe in overwrite mode. We detect corrupted subbuffers with
249 commit and reserve counts. We keep a corrupted sub-buffers count and push
250 the readers across these sub-buffers.
251 Not concurrency safe if a writer is stalled in a subbuffer and
252 another writer switches in, finding out it's corrupted. The result will be
253 than the old (uncommited) subbuffer will be declared corrupted, and that
254 the new subbuffer will be declared corrupted too because of the commit
256 Offset old should never be 0. */
257 ltt_buffer_end_callback(ltt_buf
, tsc
, offset_old
,
258 SUBBUF_INDEX((offset_old
), ltt_buf
));
259 /* Setting this reserve_count will allow the sub-buffer to be delivered by
260 the last committer. */
261 reserve_count
= atomic_add_return((SUBBUF_OFFSET((offset_old
-1),
263 <t_buf
->reserve_count
[SUBBUF_INDEX((offset_old
),
265 if(reserve_count
== atomic_read(
266 <t_buf
->commit_count
[SUBBUF_INDEX((offset_old
), ltt_buf
)])) {
267 ltt_deliver_callback(ltt_buf
, SUBBUF_INDEX((offset_old
), ltt_buf
), NULL
);
271 if(mode
== FORCE_ACTIVE
) {
273 /* This code can be executed unordered : writers may already have written
274 to the sub-buffer before this code gets executed, caution. */
275 /* The commit makes sure that this code is executed before the deliver
276 of this sub-buffer */
277 ltt_buffer_begin_callback(ltt_buf
, tsc
, SUBBUF_INDEX(offset_begin
, ltt_buf
));
278 commit_count
= atomic_add_return(ltt_subbuf_header_len(ltt_buf
),
279 <t_buf
->commit_count
[SUBBUF_INDEX(offset_begin
, ltt_buf
)]);
280 /* Check if the written buffer has to be delivered */
281 if(commit_count
== atomic_read(
282 <t_buf
->reserve_count
[SUBBUF_INDEX(offset_begin
, ltt_buf
)])) {
283 ltt_deliver_callback(ltt_buf
, SUBBUF_INDEX(offset_begin
, ltt_buf
), NULL
);
290 static int open_output_files(void)
294 /* Open output files */
296 ret
= mkdir(LTT_USERTRACE_ROOT
, 0777);
297 if(ret
< 0 && errno
!= EEXIST
) {
298 perror("LTT Error in creating output (mkdir)");
301 ret
= chdir(LTT_USERTRACE_ROOT
);
303 perror("LTT Error in creating output (chdir)");
306 snprintf(identifier_name
, PATH_MAX
-1, "%lu.%lu.%llu",
307 traced_tid
, traced_pid
, get_cycles());
308 snprintf(outfile_name
, PATH_MAX
-1, "process-%s", identifier_name
);
310 #ifndef LTT_NULL_OUTPUT_TEST
311 fd
= creat(outfile_name
, 0644);
314 ret
= symlink("/dev/null", outfile_name
);
316 perror("error in symlink");
319 fd
= open(outfile_name
, O_WRONLY
);
321 perror("Error in open");
324 #endif //LTT_NULL_OUTPUT_TEST
328 static inline int ltt_buffer_get(struct ltt_buf
*ltt_buf
,
329 unsigned int *offset
)
331 unsigned int consumed_old
, consumed_idx
;
332 consumed_old
= atomic_read(<t_buf
->consumed
);
333 consumed_idx
= SUBBUF_INDEX(consumed_old
, ltt_buf
);
335 if(atomic_read(<t_buf
->commit_count
[consumed_idx
])
336 != atomic_read(<t_buf
->reserve_count
[consumed_idx
])) {
339 if((SUBBUF_TRUNC(atomic_read(<t_buf
->offset
), ltt_buf
)
340 -SUBBUF_TRUNC(consumed_old
, ltt_buf
)) == 0) {
344 *offset
= consumed_old
;
349 static inline int ltt_buffer_put(struct ltt_buf
*ltt_buf
,
352 unsigned int consumed_old
, consumed_new
;
355 consumed_old
= offset
;
356 consumed_new
= SUBBUF_ALIGN(consumed_old
, ltt_buf
);
357 if(atomic_cmpxchg(<t_buf
->consumed
, consumed_old
, consumed_new
)
359 /* We have been pushed by the writer : the last buffer read _is_
361 * It can also happen if this is a buffer we never got. */
364 if(traced_pid
== 0 || parent_exited
) return 0;
366 ret
= sem_post(<t_buf
->writer_sem
);
368 printf("error in sem_post");
373 static int read_subbuffer(struct ltt_buf
*ltt_buf
, int fd
)
375 unsigned int consumed_old
;
377 dbg_printf("LTT read buffer\n");
380 err
= ltt_buffer_get(ltt_buf
, &consumed_old
);
382 if(err
!= -EAGAIN
) dbg_printf("LTT Reserving sub buffer failed\n");
385 if(fd_process
== -1) {
386 fd_process
= fd
= open_output_files();
389 err
= TEMP_FAILURE_RETRY(write(fd
,
391 + (consumed_old
& ((ltt_buf
->alloc_size
)-1)),
392 ltt_buf
->subbuf_size
));
395 perror("Error in writing to file");
399 err
= fsync(pair
->trace
);
402 perror("Error in writing to file");
407 err
= ltt_buffer_put(ltt_buf
, consumed_old
);
411 dbg_printf("Reader has been pushed by the writer, last subbuffer corrupted.\n");
412 /* FIXME : we may delete the last written buffer if we wish. */
421 /* This function is called by ltt_rw_init which has signals blocked */
422 static void ltt_usertrace_fast_daemon(struct ltt_trace_info
*shared_trace_info
,
423 sigset_t oldset
, pid_t l_traced_pid
, pthread_t l_traced_tid
)
425 struct sigaction act
;
428 traced_pid
= l_traced_pid
;
429 traced_tid
= l_traced_tid
;
431 dbg_printf("LTT ltt_usertrace_fast_daemon : init is %d, pid is %lu, traced_pid is %lu, traced_tid is %lu\n",
432 shared_trace_info
->init
, getpid(), traced_pid
, traced_tid
);
434 act
.sa_handler
= handler_sigusr1
;
436 sigemptyset(&(act
.sa_mask
));
437 sigaddset(&(act
.sa_mask
), SIGUSR1
);
438 sigaction(SIGUSR1
, &act
, NULL
);
440 act
.sa_handler
= handler_sigusr2
;
442 sigemptyset(&(act
.sa_mask
));
443 sigaddset(&(act
.sa_mask
), SIGUSR2
);
444 sigaction(SIGUSR2
, &act
, NULL
);
446 act
.sa_handler
= handler_sigalarm
;
448 sigemptyset(&(act
.sa_mask
));
449 sigaddset(&(act
.sa_mask
), SIGALRM
);
450 sigaction(SIGALRM
, &act
, NULL
);
455 ret
= sigsuspend(&oldset
);
457 perror("LTT Error in sigsuspend\n");
459 if(traced_pid
== 0) break; /* parent died */
460 if(parent_exited
) break;
461 dbg_printf("LTT Doing a buffer switch read. pid is : %lu\n", getpid());
464 ret
= read_subbuffer(&shared_trace_info
->channel
.process
, fd_process
);
467 /* The parent thread is dead and we have finished with the buffer */
469 /* Buffer force switch (flush). Using FLUSH instead of ACTIVE because we know
470 * there is no writer. */
471 flush_buffer(&shared_trace_info
->channel
.process
, FORCE_FLUSH
);
473 ret
= read_subbuffer(&shared_trace_info
->channel
.process
, fd_process
);
479 ret
= sem_destroy(&shared_trace_info
->channel
.process
.writer_sem
);
481 perror("error in sem_destroy");
483 munmap(shared_trace_info
, sizeof(*shared_trace_info
));
489 /* Reader-writer initialization */
491 static enum ltt_process_role
{ LTT_ROLE_WRITER
, LTT_ROLE_READER
}
492 role
= LTT_ROLE_WRITER
;
495 void ltt_rw_init(void)
498 struct ltt_trace_info
*shared_trace_info
;
500 sigset_t set
, oldset
;
501 pid_t l_traced_pid
= getpid();
502 pid_t l_traced_tid
= gettid();
504 /* parent : create the shared memory map */
505 shared_trace_info
= mmap(0, sizeof(*thread_trace_info
),
506 PROT_READ
|PROT_WRITE
, MAP_SHARED
|MAP_ANONYMOUS
, 0, 0);
507 shared_trace_info
->init
=0;
508 shared_trace_info
->filter
=0;
509 shared_trace_info
->daemon_id
=0;
510 shared_trace_info
->nesting
=0;
511 memset(&shared_trace_info
->channel
.process
, 0,
512 sizeof(shared_trace_info
->channel
.process
));
514 ret
= sem_init(&shared_trace_info
->channel
.process
.writer_sem
, 1,
517 perror("error in sem_init");
519 shared_trace_info
->channel
.process
.alloc_size
= LTT_BUF_SIZE_PROCESS
;
520 shared_trace_info
->channel
.process
.subbuf_size
= LTT_SUBBUF_SIZE_PROCESS
;
521 shared_trace_info
->channel
.process
.start
=
522 shared_trace_info
->channel
.process_buf
;
523 ltt_buffer_begin_callback(&shared_trace_info
->channel
.process
,
524 ltt_get_timestamp(), 0);
526 shared_trace_info
->init
= 1;
528 /* Disable signals */
529 ret
= sigfillset(&set
);
531 dbg_printf("LTT Error in sigfillset\n");
534 ret
= pthread_sigmask(SIG_BLOCK
, &set
, &oldset
);
536 dbg_printf("LTT Error in pthread_sigmask\n");
542 shared_trace_info
->daemon_id
= pid
;
543 thread_trace_info
= shared_trace_info
;
546 ret
= pthread_sigmask(SIG_SETMASK
, &oldset
, NULL
);
548 dbg_printf("LTT Error in pthread_sigmask\n");
550 } else if(pid
== 0) {
553 role
= LTT_ROLE_READER
;
555 //Not a good idea to renice, unless futex wait eventually implement
556 //priority inheritence.
559 // perror("Error in nice");
562 perror("Error setting sid");
564 ltt_usertrace_fast_daemon(shared_trace_info
, oldset
, l_traced_pid
,
566 /* Should never return */
570 perror("LTT Error in forking ltt-usertrace-fast");
574 static __thread
struct _pthread_cleanup_buffer cleanup_buffer
;
576 void ltt_thread_init(void)
578 _pthread_cleanup_push(&cleanup_buffer
, ltt_usertrace_fast_cleanup
, NULL
);
582 void __attribute__((constructor
)) __ltt_usertrace_fast_init(void)
584 dbg_printf("LTT usertrace-fast init\n");
589 void __attribute__((destructor
)) __ltt_usertrace_fast_fini(void)
591 if(role
== LTT_ROLE_WRITER
) {
592 dbg_printf("LTT usertrace-fast fini\n");
593 ltt_usertrace_fast_cleanup(NULL
);