1 /* LTTng user-space "fast" library
3 * This daemon is spawned by each traced thread (to share the mmap).
5 * Its job is to dump periodically this buffer to disk (when it receives a
6 * SIGUSR1 from its parent).
8 * It uses the control information in the shared memory area (producer/consumer
11 * When the parent thread dies (yes, those thing may happen) ;) , this daemon
12 * will flush the last buffer and write it to disk.
14 * Supplement note for streaming : the daemon is responsible for flushing
15 * periodically the buffer if it is streaming data.
19 * shm memory is typically limited to 4096 units (system wide limit SHMMNI in
20 * /proc/sys/kernel/shmmni). As it requires computation time upon creation, we
21 * do not use it : we will use a shared mmap() instead which is passed through
23 * MAP_SHARED mmap segment. Updated when msync or munmap are called.
25 * Memory mapped by mmap() is preserved across fork(2), with the same
28 * Eventually, there will be two mode :
29 * * Slow thread spawn : a fork() is done for each new thread. If the process
30 * dies, the data is not lost.
31 * * Fast thread spawn : a pthread_create() is done by the application for each
34 * We use a timer to check periodically if the parent died. I think it is less
35 * intrusive than a ptrace() on the parent, which would get every signal. The
36 * side effect of this is that we won't be notified if the parent does an
37 * exec(). In this case, we will just sit there until the parent exits.
40 * Copyright 2006 Mathieu Desnoyers
44 #define inline inline __attribute__((always_inline))
48 #define LTT_TRACE_FAST
49 #include <sys/types.h>
65 #include <sys/param.h>
69 #include <asm/atomic.h>
70 #include <asm/timex.h> //for get_cycles()
72 _syscall0(pid_t
,gettid
)
74 #include <ltt/ltt-usertrace.h>
77 #define dbg_printf(...) dbg_printf(__VA_ARGS__)
79 #define dbg_printf(...)
80 #endif //LTT_SHOW_DEBUG
83 enum force_switch_mode
{ FORCE_ACTIVE
, FORCE_FLUSH
};
85 /* Writer (the traced application) */
87 __thread
struct ltt_trace_info
*thread_trace_info
= NULL
;
89 void ltt_usertrace_fast_buffer_switch(void)
91 struct ltt_trace_info
*tmp
= thread_trace_info
;
93 kill(tmp
->daemon_id
, SIGUSR1
);
96 /* The cleanup should never be called from a signal handler */
97 static void ltt_usertrace_fast_cleanup(void *arg
)
99 struct ltt_trace_info
*tmp
= thread_trace_info
;
101 thread_trace_info
= NULL
;
102 kill(tmp
->daemon_id
, SIGUSR2
);
103 munmap(tmp
, sizeof(*tmp
));
107 /* Reader (the disk dumper daemon) */
109 static pid_t traced_pid
= 0;
110 static pid_t traced_tid
= 0;
111 static int parent_exited
= 0;
113 /* signal handling */
114 static void handler_sigusr1(int signo
)
116 dbg_printf("LTT Signal %d received : parent buffer switch.\n", signo
);
119 static void handler_sigusr2(int signo
)
121 dbg_printf("LTT Signal %d received : parent exited.\n", signo
);
125 static void handler_sigalarm(int signo
)
127 dbg_printf("LTT Signal %d received\n", signo
);
129 if(getppid() != traced_pid
) {
131 dbg_printf("LTT Parent %lu died, cleaning up\n", traced_pid
);
137 /* Do a buffer switch. Don't switch if buffer is completely empty */
138 static void flush_buffer(struct ltt_buf
*ltt_buf
, enum force_switch_mode mode
)
141 int offset_begin
, offset_end
, offset_old
;
142 int reserve_commit_diff
;
143 int consumed_old
, consumed_new
;
144 int commit_count
, reserve_count
;
148 offset_old
= atomic_read(<t_buf
->offset
);
149 offset_begin
= offset_old
;
151 tsc
= ltt_get_timestamp();
153 /* Error in getting the timestamp : should not happen : it would
154 * mean we are called from an NMI during a write seqlock on xtime. */
158 if(SUBBUF_OFFSET(offset_begin
, ltt_buf
) != 0) {
159 offset_begin
= SUBBUF_ALIGN(offset_begin
, ltt_buf
);
162 /* we do not have to switch : buffer is empty */
165 if(mode
== FORCE_ACTIVE
)
166 offset_begin
+= ltt_subbuf_header_len(ltt_buf
);
167 /* Always begin_switch in FORCE_ACTIVE mode */
169 /* Test new buffer integrity */
170 reserve_commit_diff
=
172 <t_buf
->reserve_count
[SUBBUF_INDEX(offset_begin
, ltt_buf
)])
174 <t_buf
->commit_count
[SUBBUF_INDEX(offset_begin
, ltt_buf
)]);
175 if(reserve_commit_diff
== 0) {
176 /* Next buffer not corrupted. */
177 if(mode
== FORCE_ACTIVE
178 && (offset_begin
-atomic_read(<t_buf
->consumed
))
179 >= ltt_buf
->alloc_size
) {
180 /* We do not overwrite non consumed buffers and we are full : ignore
181 switch while tracing is active. */
185 /* Next subbuffer corrupted. Force pushing reader even in normal mode */
188 offset_end
= offset_begin
;
189 } while(atomic_cmpxchg(<t_buf
->offset
, offset_old
, offset_end
)
193 if(mode
== FORCE_ACTIVE
) {
194 /* Push the reader if necessary */
196 consumed_old
= atomic_read(<t_buf
->consumed
);
197 /* If buffer is in overwrite mode, push the reader consumed count if
198 the write position has reached it and we are not at the first
199 iteration (don't push the reader farther than the writer).
200 This operation can be done concurrently by many writers in the
201 same buffer, the writer being at the fartest write position sub-buffer
202 index in the buffer being the one which will win this loop. */
203 /* If the buffer is not in overwrite mode, pushing the reader only
204 happen if a sub-buffer is corrupted */
205 if((SUBBUF_TRUNC(offset_end
, ltt_buf
)
206 - SUBBUF_TRUNC(consumed_old
, ltt_buf
))
207 >= ltt_buf
->alloc_size
)
208 consumed_new
= SUBBUF_ALIGN(consumed_old
, ltt_buf
);
210 consumed_new
= consumed_old
;
213 } while(atomic_cmpxchg(<t_buf
->consumed
, consumed_old
, consumed_new
)
216 if(consumed_old
!= consumed_new
) {
217 /* Reader pushed : we are the winner of the push, we can therefore
218 reequilibrate reserve and commit. Atomic increment of the commit
219 count permits other writers to play around with this variable
220 before us. We keep track of corrupted_subbuffers even in overwrite
222 we never want to write over a non completely committed sub-buffer :
223 possible causes : the buffer size is too low compared to the unordered
224 data input, or there is a writer who died between the reserve and the
226 if(reserve_commit_diff
) {
227 /* We have to alter the sub-buffer commit count : a sub-buffer is
229 atomic_add(reserve_commit_diff
,
230 <t_buf
->commit_count
[SUBBUF_INDEX(offset_begin
, ltt_buf
)]);
231 atomic_inc(<t_buf
->corrupted_subbuffers
);
240 /* Concurrency safe because we are the last and only thread to alter this
241 sub-buffer. As long as it is not delivered and read, no other thread can
242 alter the offset, alter the reserve_count or call the
243 client_buffer_end_callback on this sub-buffer.
244 The only remaining threads could be the ones with pending commits. They
245 will have to do the deliver themself.
246 Not concurrency safe in overwrite mode. We detect corrupted subbuffers with
247 commit and reserve counts. We keep a corrupted sub-buffers count and push
248 the readers across these sub-buffers.
249 Not concurrency safe if a writer is stalled in a subbuffer and
250 another writer switches in, finding out it's corrupted. The result will be
251 than the old (uncommited) subbuffer will be declared corrupted, and that
252 the new subbuffer will be declared corrupted too because of the commit
254 Offset old should never be 0. */
255 ltt_buffer_end_callback(ltt_buf
, tsc
, offset_old
,
256 SUBBUF_INDEX((offset_old
), ltt_buf
));
257 /* Setting this reserve_count will allow the sub-buffer to be delivered by
258 the last committer. */
259 reserve_count
= atomic_add_return((SUBBUF_OFFSET((offset_old
-1),
261 <t_buf
->reserve_count
[SUBBUF_INDEX((offset_old
),
263 if(reserve_count
== atomic_read(
264 <t_buf
->commit_count
[SUBBUF_INDEX((offset_old
), ltt_buf
)])) {
265 ltt_deliver_callback(ltt_buf
, SUBBUF_INDEX((offset_old
), ltt_buf
), NULL
);
269 if(mode
== FORCE_ACTIVE
) {
271 /* This code can be executed unordered : writers may already have written
272 to the sub-buffer before this code gets executed, caution. */
273 /* The commit makes sure that this code is executed before the deliver
274 of this sub-buffer */
275 ltt_buffer_begin_callback(ltt_buf
, tsc
, SUBBUF_INDEX(offset_begin
, ltt_buf
));
276 commit_count
= atomic_add_return(ltt_subbuf_header_len(ltt_buf
),
277 <t_buf
->commit_count
[SUBBUF_INDEX(offset_begin
, ltt_buf
)]);
278 /* Check if the written buffer has to be delivered */
279 if(commit_count
== atomic_read(
280 <t_buf
->reserve_count
[SUBBUF_INDEX(offset_begin
, ltt_buf
)])) {
281 ltt_deliver_callback(ltt_buf
, SUBBUF_INDEX(offset_begin
, ltt_buf
), NULL
);
287 static inline int ltt_buffer_get(struct ltt_buf
*ltt_buf
,
288 unsigned int *offset
)
290 unsigned int consumed_old
, consumed_idx
;
291 consumed_old
= atomic_read(<t_buf
->consumed
);
292 consumed_idx
= SUBBUF_INDEX(consumed_old
, ltt_buf
);
294 if(atomic_read(<t_buf
->commit_count
[consumed_idx
])
295 != atomic_read(<t_buf
->reserve_count
[consumed_idx
])) {
298 if((SUBBUF_TRUNC(atomic_read(<t_buf
->offset
), ltt_buf
)
299 -SUBBUF_TRUNC(consumed_old
, ltt_buf
)) == 0) {
303 *offset
= consumed_old
;
308 static inline int ltt_buffer_put(struct ltt_buf
*ltt_buf
,
311 unsigned int consumed_old
, consumed_new
;
314 consumed_old
= offset
;
315 consumed_new
= SUBBUF_ALIGN(consumed_old
, ltt_buf
);
316 if(atomic_cmpxchg(<t_buf
->consumed
, consumed_old
, consumed_new
)
318 /* We have been pushed by the writer : the last buffer read _is_
320 * It can also happen if this is a buffer we never got. */
323 ret
= sem_post(<t_buf
->writer_sem
);
325 printf("error in sem_post");
330 static int read_subbuffer(struct ltt_buf
*ltt_buf
, int fd
)
332 unsigned int consumed_old
;
334 dbg_printf("LTT read buffer\n");
337 err
= ltt_buffer_get(ltt_buf
, &consumed_old
);
339 if(err
!= -EAGAIN
) dbg_printf("LTT Reserving sub buffer failed\n");
343 err
= TEMP_FAILURE_RETRY(write(fd
,
345 + (consumed_old
& ((ltt_buf
->alloc_size
)-1)),
346 ltt_buf
->subbuf_size
));
349 perror("Error in writing to file");
353 err
= fsync(pair
->trace
);
356 perror("Error in writing to file");
361 err
= ltt_buffer_put(ltt_buf
, consumed_old
);
365 dbg_printf("Reader has been pushed by the writer, last subbuffer corrupted.\n");
366 /* FIXME : we may delete the last written buffer if we wish. */
375 /* This function is called by ltt_rw_init which has signals blocked */
376 static void ltt_usertrace_fast_daemon(struct ltt_trace_info
*shared_trace_info
,
377 sigset_t oldset
, pid_t l_traced_pid
, pthread_t l_traced_tid
)
380 struct sigaction act
;
383 char outfile_name
[PATH_MAX
];
384 char identifier_name
[PATH_MAX
];
387 traced_pid
= l_traced_pid
;
388 traced_tid
= l_traced_tid
;
390 dbg_printf("LTT ltt_usertrace_fast_daemon : init is %d, pid is %lu, traced_pid is %lu, traced_tid is %lu\n",
391 shared_trace_info
->init
, getpid(), traced_pid
, traced_tid
);
393 act
.sa_handler
= handler_sigusr1
;
395 sigemptyset(&(act
.sa_mask
));
396 sigaddset(&(act
.sa_mask
), SIGUSR1
);
397 sigaction(SIGUSR1
, &act
, NULL
);
399 act
.sa_handler
= handler_sigusr2
;
401 sigemptyset(&(act
.sa_mask
));
402 sigaddset(&(act
.sa_mask
), SIGUSR2
);
403 sigaction(SIGUSR2
, &act
, NULL
);
405 act
.sa_handler
= handler_sigalarm
;
407 sigemptyset(&(act
.sa_mask
));
408 sigaddset(&(act
.sa_mask
), SIGALRM
);
409 sigaction(SIGALRM
, &act
, NULL
);
413 /* Open output files */
415 ret
= mkdir(LTT_USERTRACE_ROOT
, 0777);
416 if(ret
< 0 && errno
!= EEXIST
) {
417 perror("LTT Error in creating output (mkdir)");
420 ret
= chdir(LTT_USERTRACE_ROOT
);
422 perror("LTT Error in creating output (chdir)");
425 snprintf(identifier_name
, PATH_MAX
-1, "%lu.%lu.%llu",
426 traced_tid
, traced_pid
, get_cycles());
427 snprintf(outfile_name
, PATH_MAX
-1, "process-%s", identifier_name
);
429 /* Wait for the first signal before creating files */
430 ret
= sigsuspend(&oldset
);
432 perror("LTT Error in sigsuspend\n");
434 if(traced_pid
== 0 || parent_exited
) goto dead_parent
;
436 #ifndef LTT_NULL_OUTPUT_TEST
437 fd_process
= creat(outfile_name
, 0644);
440 ret
= symlink("/dev/null", outfile_name
);
442 perror("error in symlink");
444 fd_process
= open(outfile_name
, O_WRONLY
);
446 perror("Error in open");
448 #endif //LTT_NULL_OUTPUT_TEST
451 if(traced_pid
== 0) break; /* parent died */
452 if(parent_exited
) break;
453 dbg_printf("LTT Doing a buffer switch read. pid is : %lu\n", getpid());
456 ret
= read_subbuffer(&shared_trace_info
->channel
.process
, fd_process
);
458 ret
= sigsuspend(&oldset
);
460 perror("LTT Error in sigsuspend\n");
464 /* The parent thread is dead and we have finished with the buffer */
466 /* Buffer force switch (flush). Using FLUSH instead of ACTIVE because we know
467 * there is no writer. */
468 flush_buffer(&shared_trace_info
->channel
.process
, FORCE_FLUSH
);
470 ret
= read_subbuffer(&shared_trace_info
->channel
.process
, fd_process
);
476 ret
= sem_destroy(&shared_trace_info
->channel
.process
.writer_sem
);
478 perror("error in sem_destroy");
480 munmap(shared_trace_info
, sizeof(*shared_trace_info
));
486 /* Reader-writer initialization */
488 static enum ltt_process_role
{ LTT_ROLE_WRITER
, LTT_ROLE_READER
}
489 role
= LTT_ROLE_WRITER
;
492 void ltt_rw_init(void)
495 struct ltt_trace_info
*shared_trace_info
;
497 sigset_t set
, oldset
;
498 pid_t l_traced_pid
= getpid();
499 pid_t l_traced_tid
= gettid();
501 /* parent : create the shared memory map */
502 shared_trace_info
= mmap(0, sizeof(*thread_trace_info
),
503 PROT_READ
|PROT_WRITE
, MAP_SHARED
|MAP_ANONYMOUS
, 0, 0);
504 shared_trace_info
->init
=0;
505 shared_trace_info
->filter
=0;
506 shared_trace_info
->daemon_id
=0;
507 shared_trace_info
->nesting
=0;
508 memset(&shared_trace_info
->channel
.process
, 0,
509 sizeof(shared_trace_info
->channel
.process
));
511 ret
= sem_init(&shared_trace_info
->channel
.process
.writer_sem
, 1,
514 perror("error in sem_init");
516 shared_trace_info
->channel
.process
.alloc_size
= LTT_BUF_SIZE_PROCESS
;
517 shared_trace_info
->channel
.process
.subbuf_size
= LTT_SUBBUF_SIZE_PROCESS
;
518 shared_trace_info
->channel
.process
.start
=
519 shared_trace_info
->channel
.process_buf
;
520 ltt_buffer_begin_callback(&shared_trace_info
->channel
.process
,
521 ltt_get_timestamp(), 0);
523 shared_trace_info
->init
= 1;
525 /* Disable signals */
526 ret
= sigfillset(&set
);
528 dbg_printf("LTT Error in sigfillset\n");
532 ret
= pthread_sigmask(SIG_BLOCK
, &set
, &oldset
);
534 dbg_printf("LTT Error in pthread_sigmask\n");
540 shared_trace_info
->daemon_id
= pid
;
541 thread_trace_info
= shared_trace_info
;
544 ret
= pthread_sigmask(SIG_SETMASK
, &oldset
, NULL
);
546 dbg_printf("LTT Error in pthread_sigmask\n");
548 } else if(pid
== 0) {
551 role
= LTT_ROLE_READER
;
553 //Not a good idea to renice, unless futex wait eventually implement
554 //priority inheritence.
557 // perror("Error in nice");
560 perror("Error setting sid");
562 ltt_usertrace_fast_daemon(shared_trace_info
, oldset
, l_traced_pid
,
564 /* Should never return */
568 perror("LTT Error in forking ltt-usertrace-fast");
572 static __thread
struct _pthread_cleanup_buffer cleanup_buffer
;
574 void ltt_thread_init(void)
576 _pthread_cleanup_push(&cleanup_buffer
, ltt_usertrace_fast_cleanup
, NULL
);
580 void __attribute__((constructor
)) __ltt_usertrace_fast_init(void)
582 dbg_printf("LTT usertrace-fast init\n");
587 void __attribute__((destructor
)) __ltt_usertrace_fast_fini(void)
589 if(role
== LTT_ROLE_WRITER
) {
590 dbg_printf("LTT usertrace-fast fini\n");
591 ltt_usertrace_fast_cleanup(NULL
);