X-Git-Url: http://git.lttng.org./?a=blobdiff_plain;f=usertrace-fast%2Fltt-usertrace-fast.c;h=55ba880ab8684f9038bc05f3f9ba4fba64fe0edf;hb=8b30e7bcf2fe4ab9afecf8012c180aa7d4424a34;hp=42dde3e50695f53fd73448a4bfcd1856fe1b9777;hpb=1c48e587d7940c6f370ce4a9d01dd8b3f3fd7a32;p=lttv.git diff --git a/usertrace-fast/ltt-usertrace-fast.c b/usertrace-fast/ltt-usertrace-fast.c index 42dde3e5..55ba880a 100644 --- a/usertrace-fast/ltt-usertrace-fast.c +++ b/usertrace-fast/ltt-usertrace-fast.c @@ -1,11 +1,48 @@ - -/* LTTng user-space "fast" tracing code +/* LTTng user-space "fast" library + * + * This daemon is spawned by each traced thread (to share the mmap). + * + * Its job is to dump periodically this buffer to disk (when it receives a + * SIGUSR1 from its parent). + * + * It uses the control information in the shared memory area (producer/consumer + * count). + * + * When the parent thread dies (yes, those thing may happen) ;) , this daemon + * will flush the last buffer and write it to disk. + * + * Supplement note for streaming : the daemon is responsible for flushing + * periodically the buffer if it is streaming data. + * * + * Notes : + * shm memory is typically limited to 4096 units (system wide limit SHMMNI in + * /proc/sys/kernel/shmmni). As it requires computation time upon creation, we + * do not use it : we will use a shared mmap() instead which is passed through + * the fork(). + * MAP_SHARED mmap segment. Updated when msync or munmap are called. + * MAP_ANONYMOUS. + * Memory mapped by mmap() is preserved across fork(2), with the same + * attributes. + * + * Eventually, there will be two mode : + * * Slow thread spawn : a fork() is done for each new thread. If the process + * dies, the data is not lost. + * * Fast thread spawn : a pthread_create() is done by the application for each + * new thread. + * + * We use a timer to check periodically if the parent died. I think it is less + * intrusive than a ptrace() on the parent, which would get every signal. The + * side effect of this is that we won't be notified if the parent does an + * exec(). In this case, we will just sit there until the parent exits. + * + * * Copyright 2006 Mathieu Desnoyers * */ - +#define _GNU_SOURCE +#define LTT_TRACE #include #include #include @@ -17,61 +54,517 @@ #include #include #include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include //for get_cycles() + +_syscall0(pid_t,gettid) #include "ltt-usertrace-fast.h" -/* TLS for the trace buffer - * http://www.dis.com/gnu/gcc/C--98-Thread-Local-Edits.html - * - * Add after paragraph 4 - * - * The storage for an object of thread storage duration shall be statically - * initialized before the first statement of the thread startup function. An - * object of thread storage duration shall not require dynamic - * initialization. - */ +enum force_switch_mode { FORCE_ACTIVE, FORCE_FLUSH }; -__thread struct lttng_trace_info lttng_trace_info = +/* Writer (the traced application) */ + +__thread struct ltt_trace_info *thread_trace_info = NULL; + +void ltt_usertrace_fast_buffer_switch(void) { - .init = 0, - .filter = 0, - .nesting = ATOMIC_INIT(0), - .channel.facilities = - { ATOMIC_INIT(0), - ATOMIC_INIT(0), - ATOMIC_INIT(0), - ATOMIC_INIT(0) - }, - .channel.cpu = - { ATOMIC_INIT(0), - ATOMIC_INIT(0), - ATOMIC_INIT(0), - ATOMIC_INIT(0) - }, -}; - - -static void ltt_cleanup_thread(void *arg) + struct ltt_trace_info *tmp = thread_trace_info; + if(tmp) + kill(tmp->daemon_id, SIGUSR1); +} + +/* The cleanup should never be called from a signal handler */ +static void ltt_usertrace_fast_cleanup(void *arg) { - /* Flush the data in the lttng_trace_info */ + struct ltt_trace_info *tmp = thread_trace_info; + if(tmp) { + thread_trace_info = NULL; + kill(tmp->daemon_id, SIGUSR2); + munmap(tmp, sizeof(*tmp)); + } +} + +/* Reader (the disk dumper daemon) */ +static pid_t traced_pid = 0; +static pid_t traced_tid = 0; +static int parent_exited = 0; + +/* signal handling */ +static void handler_sigusr1(int signo) +{ + printf("LTT Signal %d received : parent buffer switch.\n", signo); } +static void handler_sigusr2(int signo) +{ + printf("LTT Signal %d received : parent exited.\n", signo); + parent_exited = 1; +} -void ltt_thread_init(void) +static void handler_sigalarm(int signo) { - _pthread_cleanup_push(<tng_trace_info.cleanup, - ltt_cleanup_thread, NULL); + printf("LTT Signal %d received\n", signo); + + if(getppid() != traced_pid) { + /* Parent died */ + printf("LTT Parent %lu died, cleaning up\n", traced_pid); + traced_pid = 0; + } + alarm(3); } +/* Do a buffer switch. Don't switch if buffer is completely empty */ +static void flush_buffer(struct ltt_buf *ltt_buf, enum force_switch_mode mode) +{ + uint64_t tsc; + int offset_begin, offset_end, offset_old; + int reserve_commit_diff; + int consumed_old, consumed_new; + int commit_count, reserve_count; + int end_switch_old; -void __attribute__((constructor)) __ltt_usertrace_fast_init(void) + do { + offset_old = atomic_read(<t_buf->offset); + offset_begin = offset_old; + end_switch_old = 0; + tsc = ltt_get_timestamp(); + if(tsc == 0) { + /* Error in getting the timestamp : should not happen : it would + * mean we are called from an NMI during a write seqlock on xtime. */ + return; + } + + if(SUBBUF_OFFSET(offset_begin, ltt_buf) != 0) { + offset_begin = SUBBUF_ALIGN(offset_begin, ltt_buf); + end_switch_old = 1; + } else { + /* we do not have to switch : buffer is empty */ + return; + } + if(mode == FORCE_ACTIVE) + offset_begin += ltt_subbuf_header_len(ltt_buf); + /* Always begin_switch in FORCE_ACTIVE mode */ + + /* Test new buffer integrity */ + reserve_commit_diff = + atomic_read( + <t_buf->reserve_count[SUBBUF_INDEX(offset_begin, ltt_buf)]) + - atomic_read( + <t_buf->commit_count[SUBBUF_INDEX(offset_begin, ltt_buf)]); + if(reserve_commit_diff == 0) { + /* Next buffer not corrupted. */ + if(mode == FORCE_ACTIVE + && (offset_begin-atomic_read(<t_buf->consumed)) + >= ltt_buf->alloc_size) { + /* We do not overwrite non consumed buffers and we are full : ignore + switch while tracing is active. */ + return; + } + } else { + /* Next subbuffer corrupted. Force pushing reader even in normal mode */ + } + + offset_end = offset_begin; + } while(atomic_cmpxchg(<t_buf->offset, offset_old, offset_end) + != offset_old); + + + if(mode == FORCE_ACTIVE) { + /* Push the reader if necessary */ + do { + consumed_old = atomic_read(<t_buf->consumed); + /* If buffer is in overwrite mode, push the reader consumed count if + the write position has reached it and we are not at the first + iteration (don't push the reader farther than the writer). + This operation can be done concurrently by many writers in the + same buffer, the writer being at the fartest write position sub-buffer + index in the buffer being the one which will win this loop. */ + /* If the buffer is not in overwrite mode, pushing the reader only + happen if a sub-buffer is corrupted */ + if((SUBBUF_TRUNC(offset_end, ltt_buf) + - SUBBUF_TRUNC(consumed_old, ltt_buf)) + >= ltt_buf->alloc_size) + consumed_new = SUBBUF_ALIGN(consumed_old, ltt_buf); + else { + consumed_new = consumed_old; + break; + } + } while(atomic_cmpxchg(<t_buf->consumed, consumed_old, consumed_new) + != consumed_old); + + if(consumed_old != consumed_new) { + /* Reader pushed : we are the winner of the push, we can therefore + reequilibrate reserve and commit. Atomic increment of the commit + count permits other writers to play around with this variable + before us. We keep track of corrupted_subbuffers even in overwrite + mode : + we never want to write over a non completely committed sub-buffer : + possible causes : the buffer size is too low compared to the unordered + data input, or there is a writer who died between the reserve and the + commit. */ + if(reserve_commit_diff) { + /* We have to alter the sub-buffer commit count : a sub-buffer is + corrupted */ + atomic_add(reserve_commit_diff, + <t_buf->commit_count[SUBBUF_INDEX(offset_begin, ltt_buf)]); + atomic_inc(<t_buf->corrupted_subbuffers); + } + } + } + + /* Always switch */ + + if(end_switch_old) { + /* old subbuffer */ + /* Concurrency safe because we are the last and only thread to alter this + sub-buffer. As long as it is not delivered and read, no other thread can + alter the offset, alter the reserve_count or call the + client_buffer_end_callback on this sub-buffer. + The only remaining threads could be the ones with pending commits. They + will have to do the deliver themself. + Not concurrency safe in overwrite mode. We detect corrupted subbuffers with + commit and reserve counts. We keep a corrupted sub-buffers count and push + the readers across these sub-buffers. + Not concurrency safe if a writer is stalled in a subbuffer and + another writer switches in, finding out it's corrupted. The result will be + than the old (uncommited) subbuffer will be declared corrupted, and that + the new subbuffer will be declared corrupted too because of the commit + count adjustment. + Offset old should never be 0. */ + ltt_buffer_end_callback(ltt_buf, tsc, offset_old, + SUBBUF_INDEX((offset_old), ltt_buf)); + /* Setting this reserve_count will allow the sub-buffer to be delivered by + the last committer. */ + reserve_count = atomic_add_return((SUBBUF_OFFSET((offset_old-1), + ltt_buf) + 1), + <t_buf->reserve_count[SUBBUF_INDEX((offset_old), + ltt_buf)]); + if(reserve_count == atomic_read( + <t_buf->commit_count[SUBBUF_INDEX((offset_old), ltt_buf)])) { + ltt_deliver_callback(ltt_buf, SUBBUF_INDEX((offset_old), ltt_buf), NULL); + } + } + + if(mode == FORCE_ACTIVE) { + /* New sub-buffer */ + /* This code can be executed unordered : writers may already have written + to the sub-buffer before this code gets executed, caution. */ + /* The commit makes sure that this code is executed before the deliver + of this sub-buffer */ + ltt_buffer_begin_callback(ltt_buf, tsc, SUBBUF_INDEX(offset_begin, ltt_buf)); + commit_count = atomic_add_return(ltt_subbuf_header_len(ltt_buf), + <t_buf->commit_count[SUBBUF_INDEX(offset_begin, ltt_buf)]); + /* Check if the written buffer has to be delivered */ + if(commit_count == atomic_read( + <t_buf->reserve_count[SUBBUF_INDEX(offset_begin, ltt_buf)])) { + ltt_deliver_callback(ltt_buf, SUBBUF_INDEX(offset_begin, ltt_buf), NULL); + } + } + +} + +static inline int ltt_buffer_get(struct ltt_buf *ltt_buf, + unsigned int *offset) +{ + unsigned int consumed_old, consumed_idx; + consumed_old = atomic_read(<t_buf->consumed); + consumed_idx = SUBBUF_INDEX(consumed_old, ltt_buf); + + if(atomic_read(<t_buf->commit_count[consumed_idx]) + != atomic_read(<t_buf->reserve_count[consumed_idx])) { + return -EAGAIN; + } + if((SUBBUF_TRUNC(atomic_read(<t_buf->offset), ltt_buf) + -SUBBUF_TRUNC(consumed_old, ltt_buf)) == 0) { + return -EAGAIN; + } + + *offset = consumed_old; + + return 0; +} + +static inline int ltt_buffer_put(struct ltt_buf *ltt_buf, + unsigned int offset) +{ + unsigned int consumed_old, consumed_new; + int ret; + + consumed_old = offset; + consumed_new = SUBBUF_ALIGN(consumed_old, ltt_buf); + if(atomic_cmpxchg(<t_buf->consumed, consumed_old, consumed_new) + != consumed_old) { + /* We have been pushed by the writer : the last buffer read _is_ + * corrupted! + * It can also happen if this is a buffer we never got. */ + return -EIO; + } else { + if(atomic_read(<t_buf->full) == 1) { + /* tell the client that buffer is now unfull */ + ret = futex((unsigned long)<t_buf->full, + FUTEX_WAKE, 1, 0, 0, 0); + if(ret != 1) { + printf("LTT warning : race condition : writer not waiting or too many writers\n"); + } + atomic_set(<t_buf->full, 0); + } + } +} + +static int read_subbuffer(struct ltt_buf *ltt_buf, int fd) { + unsigned int consumed_old; int err; + printf("LTT read buffer\n"); - printf("LTTng usertrace-fast init\n"); - ltt_thread_init(); + err = ltt_buffer_get(ltt_buf, &consumed_old); + if(err != 0) { + if(err != -EAGAIN) printf("LTT Reserving sub buffer failed\n"); + goto get_error; + } + err = TEMP_FAILURE_RETRY(write(fd, + ltt_buf->start + + (consumed_old & ((ltt_buf->alloc_size)-1)), + ltt_buf->subbuf_size)); + + if(err < 0) { + perror("Error in writing to file"); + goto write_error; + } +#if 0 + err = fsync(pair->trace); + if(err < 0) { + ret = errno; + perror("Error in writing to file"); + goto write_error; + } +#endif //0 +write_error: + err = ltt_buffer_put(ltt_buf, consumed_old); + + if(err != 0) { + if(err == -EIO) { + printf("Reader has been pushed by the writer, last subbuffer corrupted.\n"); + /* FIXME : we may delete the last written buffer if we wish. */ + } + goto get_error; + } + +get_error: + return err; +} + +/* This function is called by ltt_rw_init which has signals blocked */ +static void ltt_usertrace_fast_daemon(struct ltt_trace_info *shared_trace_info, + sigset_t oldset, pid_t l_traced_pid, pthread_t l_traced_tid) +{ + struct sigaction act; + int ret; + int fd_fac; + int fd_cpu; + char outfile_name[PATH_MAX]; + char identifier_name[PATH_MAX]; + + + traced_pid = l_traced_pid; + traced_tid = l_traced_tid; + + printf("LTT ltt_usertrace_fast_daemon : init is %d, pid is %lu, traced_pid is %lu, traced_tid is %lu\n", + shared_trace_info->init, getpid(), traced_pid, traced_tid); + + act.sa_handler = handler_sigusr1; + act.sa_flags = 0; + sigemptyset(&(act.sa_mask)); + sigaddset(&(act.sa_mask), SIGUSR1); + sigaction(SIGUSR1, &act, NULL); + + act.sa_handler = handler_sigusr2; + act.sa_flags = 0; + sigemptyset(&(act.sa_mask)); + sigaddset(&(act.sa_mask), SIGUSR2); + sigaction(SIGUSR2, &act, NULL); + + act.sa_handler = handler_sigalarm; + act.sa_flags = 0; + sigemptyset(&(act.sa_mask)); + sigaddset(&(act.sa_mask), SIGALRM); + sigaction(SIGALRM, &act, NULL); + + /* Enable signals */ + ret = pthread_sigmask(SIG_SETMASK, &oldset, NULL); + if(ret) { + printf("LTT Error in pthread_sigmask\n"); + } + + alarm(3); + + /* Open output files */ + umask(00000); + ret = mkdir(LTT_USERTRACE_ROOT, 0777); + if(ret < 0 && errno != EEXIST) { + perror("LTT Error in creating output (mkdir)"); + exit(-1); + } + ret = chdir(LTT_USERTRACE_ROOT); + if(ret < 0) { + perror("LTT Error in creating output (chdir)"); + exit(-1); + } + snprintf(identifier_name, PATH_MAX-1, "%lu.%lu.%llu", + traced_tid, traced_pid, get_cycles()); + snprintf(outfile_name, PATH_MAX-1, "facilities-%s", identifier_name); + fd_fac = creat(outfile_name, 0644); + + snprintf(outfile_name, PATH_MAX-1, "cpu-%s", identifier_name); + fd_cpu = creat(outfile_name, 0644); + + + while(1) { + pause(); + if(traced_pid == 0) break; /* parent died */ + if(parent_exited) break; + printf("LTT Doing a buffer switch read. pid is : %lu\n", getpid()); + + do { + ret = read_subbuffer(&shared_trace_info->channel.cpu, fd_cpu); + } while(ret == 0); + + do { + ret = read_subbuffer(&shared_trace_info->channel.facilities, fd_fac); + } while(ret == 0); + } + + /* The parent thread is dead and we have finished with the buffer */ + + /* Buffer force switch (flush). Using FLUSH instead of ACTIVE because we know + * there is no writer. */ + flush_buffer(&shared_trace_info->channel.cpu, FORCE_FLUSH); + do { + ret = read_subbuffer(&shared_trace_info->channel.cpu, fd_cpu); + } while(ret == 0); + + + flush_buffer(&shared_trace_info->channel.facilities, FORCE_FLUSH); + do { + ret = read_subbuffer(&shared_trace_info->channel.facilities, fd_fac); + } while(ret == 0); + + close(fd_fac); + close(fd_cpu); + + munmap(shared_trace_info, sizeof(*shared_trace_info)); + + exit(0); +} + + +/* Reader-writer initialization */ + +static enum ltt_process_role { LTT_ROLE_WRITER, LTT_ROLE_READER } + role = LTT_ROLE_WRITER; + + +void ltt_rw_init(void) +{ + pid_t pid; + struct ltt_trace_info *shared_trace_info; + int ret; + sigset_t set, oldset; + pid_t l_traced_pid = getpid(); + pid_t l_traced_tid = gettid(); + + /* parent : create the shared memory map */ + shared_trace_info = mmap(0, sizeof(*thread_trace_info), + PROT_READ|PROT_WRITE, MAP_SHARED|MAP_ANONYMOUS, 0, 0); + memset(shared_trace_info, 0, sizeof(*shared_trace_info)); + /* Tricky semaphore : is in a shared memory space, so it's ok for a fast + * mutex (futex). */ + atomic_set(&shared_trace_info->channel.facilities.full, 0); + shared_trace_info->channel.facilities.alloc_size = LTT_BUF_SIZE_FACILITIES; + shared_trace_info->channel.facilities.subbuf_size = LTT_SUBBUF_SIZE_FACILITIES; + shared_trace_info->channel.facilities.start = + shared_trace_info->channel.facilities_buf; + ltt_buffer_begin_callback(&shared_trace_info->channel.facilities, + ltt_get_timestamp(), 0); + + atomic_set(&shared_trace_info->channel.cpu.full, 0); + shared_trace_info->channel.cpu.alloc_size = LTT_BUF_SIZE_CPU; + shared_trace_info->channel.cpu.subbuf_size = LTT_SUBBUF_SIZE_CPU; + shared_trace_info->channel.cpu.start = shared_trace_info->channel.cpu_buf; + ltt_buffer_begin_callback(&shared_trace_info->channel.cpu, + ltt_get_timestamp(), 0); + + shared_trace_info->init = 1; + + /* Disable signals */ + ret = sigfillset(&set); + if(ret) { + printf("LTT Error in sigfillset\n"); + } + + + ret = pthread_sigmask(SIG_BLOCK, &set, &oldset); + if(ret) { + printf("LTT Error in pthread_sigmask\n"); + } + + pid = fork(); + if(pid > 0) { + /* Parent */ + shared_trace_info->daemon_id = pid; + thread_trace_info = shared_trace_info; + + /* Enable signals */ + ret = pthread_sigmask(SIG_SETMASK, &oldset, NULL); + if(ret) { + printf("LTT Error in pthread_sigmask\n"); + } + } else if(pid == 0) { + /* Child */ + role = LTT_ROLE_READER; + ltt_usertrace_fast_daemon(shared_trace_info, oldset, l_traced_pid, + l_traced_tid); + /* Should never return */ + exit(-1); + } else if(pid < 0) { + /* fork error */ + perror("LTT Error in forking ltt-usertrace-fast"); + } +} + +static __thread struct _pthread_cleanup_buffer cleanup_buffer; + +void ltt_thread_init(void) +{ + _pthread_cleanup_push(&cleanup_buffer, ltt_usertrace_fast_cleanup, NULL); + ltt_rw_init(); +} + +void __attribute__((constructor)) __ltt_usertrace_fast_init(void) +{ + printf("LTT usertrace-fast init\n"); + + ltt_rw_init(); +} + +void __attribute__((destructor)) __ltt_usertrace_fast_fini(void) +{ + if(role == LTT_ROLE_WRITER) { + printf("LTT usertrace-fast fini\n"); + ltt_usertrace_fast_cleanup(NULL); + } }