X-Git-Url: http://git.lttng.org./?a=blobdiff_plain;f=usertrace-fast%2Fltt-usertrace-fast.c;h=55ba880ab8684f9038bc05f3f9ba4fba64fe0edf;hb=8b30e7bcf2fe4ab9afecf8012c180aa7d4424a34;hp=42dde3e50695f53fd73448a4bfcd1856fe1b9777;hpb=1c48e587d7940c6f370ce4a9d01dd8b3f3fd7a32;p=lttv.git

diff --git a/usertrace-fast/ltt-usertrace-fast.c b/usertrace-fast/ltt-usertrace-fast.c
index 42dde3e5..55ba880a 100644
--- a/usertrace-fast/ltt-usertrace-fast.c
+++ b/usertrace-fast/ltt-usertrace-fast.c
@@ -1,11 +1,48 @@
-
-/* LTTng user-space "fast" tracing code
+/* LTTng user-space "fast" library
+ *
+ * This daemon is spawned by each traced thread (to share the mmap).
+ *
+ * Its job is to dump periodically this buffer to disk (when it receives a
+ * SIGUSR1 from its parent).
+ *
+ * It uses the control information in the shared memory area (producer/consumer
+ * count).
+ *
+ * When the parent thread dies (yes, those thing may happen) ;) , this daemon
+ * will flush the last buffer and write it to disk.
+ *
+ * Supplement note for streaming : the daemon is responsible for flushing
+ * periodically the buffer if it is streaming data.
+ * 
  *
+ * Notes :
+ * shm memory is typically limited to 4096 units (system wide limit SHMMNI in
+ * /proc/sys/kernel/shmmni). As it requires computation time upon creation, we
+ * do not use it : we will use a shared mmap() instead which is passed through
+ * the fork().
+ * MAP_SHARED mmap segment. Updated when msync or munmap are called.
+ * MAP_ANONYMOUS.
+ * Memory  mapped  by  mmap()  is  preserved across fork(2), with the same
+ *   attributes.
+ * 
+ * Eventually, there will be two mode :
+ * * Slow thread spawn : a fork() is done for each new thread. If the process
+ *   dies, the data is not lost.
+ * * Fast thread spawn : a pthread_create() is done by the application for each
+ *   new thread.
+ *
+ * We use a timer to check periodically if the parent died. I think it is less
+ * intrusive than a ptrace() on the parent, which would get every signal. The
+ * side effect of this is that we won't be notified if the parent does an
+ * exec(). In this case, we will just sit there until the parent exits.
+ * 
+ *   
  * Copyright 2006 Mathieu Desnoyers
  *
  */
 
-
+#define _GNU_SOURCE
+#define LTT_TRACE
 #include <sys/types.h>
 #include <sys/wait.h>
 #include <unistd.h>
@@ -17,61 +54,517 @@
 #include <pthread.h>
 #include <malloc.h>
 #include <string.h>
+#include <sys/mman.h>
+#include <signal.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <stdlib.h>
+#include <sys/param.h>
+#include <sys/time.h>
+#include <errno.h>
+
+#include <asm/atomic.h>
+#include <asm/timex.h>	//for get_cycles()
+
+_syscall0(pid_t,gettid)
 
 #include "ltt-usertrace-fast.h"
 
-/* TLS for the trace buffer
- * http://www.dis.com/gnu/gcc/C--98-Thread-Local-Edits.html
- *
- * Add after paragraph 4
- *
- *     The storage for an object of thread storage duration shall be statically
- *     initialized before the first statement of the thread startup function. An
- *     object of thread storage duration shall not require dynamic
- *     initialization.
- */
+enum force_switch_mode { FORCE_ACTIVE, FORCE_FLUSH };
 
-__thread struct lttng_trace_info lttng_trace_info =
+/* Writer (the traced application) */
+
+__thread struct ltt_trace_info *thread_trace_info = NULL;
+
+void ltt_usertrace_fast_buffer_switch(void)
 {
-	.init = 0,
-	.filter = 0,
-	.nesting = ATOMIC_INIT(0),
-	.channel.facilities = 
-		{	ATOMIC_INIT(0),
-			ATOMIC_INIT(0),
-			ATOMIC_INIT(0),
-			ATOMIC_INIT(0)
-		},
-	.channel.cpu = 
-		{ ATOMIC_INIT(0),
-			ATOMIC_INIT(0),
-			ATOMIC_INIT(0),
-			ATOMIC_INIT(0)
-		},
-};
-
-
-static void ltt_cleanup_thread(void *arg)
+	struct ltt_trace_info *tmp = thread_trace_info;
+	if(tmp)
+		kill(tmp->daemon_id, SIGUSR1);
+}
+
+/* The cleanup should never be called from a signal handler */
+static void ltt_usertrace_fast_cleanup(void *arg)
 {
-	/* Flush the data in the lttng_trace_info */
+	struct ltt_trace_info *tmp = thread_trace_info;
+	if(tmp) {
+		thread_trace_info = NULL;
+		kill(tmp->daemon_id, SIGUSR2);
+		munmap(tmp, sizeof(*tmp));
+	}
+}
+
+/* Reader (the disk dumper daemon) */
 
+static pid_t traced_pid = 0;
+static pid_t traced_tid = 0;
+static int parent_exited = 0;
+
+/* signal handling */
+static void handler_sigusr1(int signo)
+{
+	printf("LTT Signal %d received : parent buffer switch.\n", signo);
 }
 
+static void handler_sigusr2(int signo)
+{
+	printf("LTT Signal %d received : parent exited.\n", signo);
+	parent_exited = 1;
+}
 
-void ltt_thread_init(void)
+static void handler_sigalarm(int signo)
 {
-	_pthread_cleanup_push(&lttng_trace_info.cleanup,
-			ltt_cleanup_thread, NULL);
+	printf("LTT Signal %d received\n", signo);
+
+	if(getppid() != traced_pid) {
+		/* Parent died */
+		printf("LTT Parent %lu died, cleaning up\n", traced_pid);
+		traced_pid = 0;
+	}
+	alarm(3);
 }
 
+/* Do a buffer switch. Don't switch if buffer is completely empty */
+static void flush_buffer(struct ltt_buf *ltt_buf, enum force_switch_mode mode)
+{
+	uint64_t tsc;
+	int offset_begin, offset_end, offset_old;
+	int reserve_commit_diff;
+	int consumed_old, consumed_new;
+	int commit_count, reserve_count;
+	int end_switch_old;
 
-void __attribute__((constructor)) __ltt_usertrace_fast_init(void)
+	do {
+		offset_old = atomic_read(&ltt_buf->offset);
+		offset_begin = offset_old;
+		end_switch_old = 0;
+		tsc = ltt_get_timestamp();
+		if(tsc == 0) {
+			/* Error in getting the timestamp : should not happen : it would
+			 * mean we are called from an NMI during a write seqlock on xtime. */
+			return;
+		}
+
+		if(SUBBUF_OFFSET(offset_begin, ltt_buf) != 0) {
+			offset_begin = SUBBUF_ALIGN(offset_begin, ltt_buf);
+			end_switch_old = 1;
+		} else {
+      /* we do not have to switch : buffer is empty */
+      return;
+    }
+		if(mode == FORCE_ACTIVE)
+			offset_begin += ltt_subbuf_header_len(ltt_buf);
+		/* Always begin_switch in FORCE_ACTIVE mode */
+
+		/* Test new buffer integrity */
+		reserve_commit_diff = 
+			atomic_read(
+				&ltt_buf->reserve_count[SUBBUF_INDEX(offset_begin, ltt_buf)])
+			- atomic_read(
+					&ltt_buf->commit_count[SUBBUF_INDEX(offset_begin, ltt_buf)]);
+		if(reserve_commit_diff == 0) {
+      /* Next buffer not corrupted. */ 
+      if(mode == FORCE_ACTIVE
+				&& (offset_begin-atomic_read(&ltt_buf->consumed))
+											>= ltt_buf->alloc_size) {
+      	/* We do not overwrite non consumed buffers and we are full : ignore
+        	 switch while tracing is active. */ 
+        return;
+      }   
+    } else { 
+      /* Next subbuffer corrupted. Force pushing reader even in normal mode */
+    }
+			
+		offset_end = offset_begin;
+	} while(atomic_cmpxchg(&ltt_buf->offset, offset_old, offset_end)
+							!= offset_old);
+
+
+	if(mode == FORCE_ACTIVE) {
+		/* Push the reader if necessary */
+		do {
+			consumed_old = atomic_read(&ltt_buf->consumed);
+			/* If buffer is in overwrite mode, push the reader consumed count if
+				 the write position has reached it and we are not at the first
+				 iteration (don't push the reader farther than the writer). 
+				 This operation can be done concurrently by many writers in the
+				 same buffer, the writer being at the fartest write position sub-buffer
+				 index in the buffer being the one which will win this loop. */
+			/* If the buffer is not in overwrite mode, pushing the reader only
+				 happen if a sub-buffer is corrupted */
+			if((SUBBUF_TRUNC(offset_end, ltt_buf) 
+					- SUBBUF_TRUNC(consumed_old, ltt_buf)) 
+							>= ltt_buf->alloc_size)
+				consumed_new = SUBBUF_ALIGN(consumed_old, ltt_buf);
+			else {
+				consumed_new = consumed_old;
+				break;
+			}
+		} while(atomic_cmpxchg(&ltt_buf->consumed, consumed_old, consumed_new)
+				!= consumed_old);
+
+		if(consumed_old != consumed_new) {
+			/* Reader pushed : we are the winner of the push, we can therefore
+				 reequilibrate reserve and commit. Atomic increment of the commit
+				 count permits other writers to play around with this variable
+				 before us. We keep track of corrupted_subbuffers even in overwrite
+				 mode :
+				 we never want to write over a non completely committed sub-buffer : 
+				 possible causes : the buffer size is too low compared to the unordered
+				 data input, or there is a writer who died between the reserve and the
+				 commit. */
+			if(reserve_commit_diff) {
+				/* We have to alter the sub-buffer commit count : a sub-buffer is
+					 corrupted */
+				atomic_add(reserve_commit_diff,
+								&ltt_buf->commit_count[SUBBUF_INDEX(offset_begin, ltt_buf)]);
+				atomic_inc(&ltt_buf->corrupted_subbuffers);
+			}
+		}
+	}
+
+	/* Always switch */
+
+	if(end_switch_old) {
+		/* old subbuffer */
+		/* Concurrency safe because we are the last and only thread to alter this
+			 sub-buffer. As long as it is not delivered and read, no other thread can
+			 alter the offset, alter the reserve_count or call the
+			 client_buffer_end_callback on this sub-buffer.
+			 The only remaining threads could be the ones with pending commits. They
+			 will have to do the deliver themself.
+			 Not concurrency safe in overwrite mode. We detect corrupted subbuffers with
+			 commit and reserve counts. We keep a corrupted sub-buffers count and push
+			 the readers across these sub-buffers.
+			 Not concurrency safe if a writer is stalled in a subbuffer and
+			 another writer switches in, finding out it's corrupted. The result will be
+			 than the old (uncommited) subbuffer will be declared corrupted, and that
+			 the new subbuffer will be declared corrupted too because of the commit
+			 count adjustment.
+			 Offset old should never be 0. */
+		ltt_buffer_end_callback(ltt_buf, tsc, offset_old,
+				SUBBUF_INDEX((offset_old), ltt_buf));
+		/* Setting this reserve_count will allow the sub-buffer to be delivered by
+			 the last committer. */
+		reserve_count = atomic_add_return((SUBBUF_OFFSET((offset_old-1),
+                                                      ltt_buf) + 1),
+										&ltt_buf->reserve_count[SUBBUF_INDEX((offset_old),
+                                                          ltt_buf)]);
+		if(reserve_count == atomic_read(
+				&ltt_buf->commit_count[SUBBUF_INDEX((offset_old), ltt_buf)])) {
+			ltt_deliver_callback(ltt_buf, SUBBUF_INDEX((offset_old), ltt_buf), NULL);
+		}
+	}
+	
+	if(mode == FORCE_ACTIVE) {
+		/* New sub-buffer */
+		/* This code can be executed unordered : writers may already have written
+			 to the sub-buffer before this code gets executed, caution. */
+		/* The commit makes sure that this code is executed before the deliver
+			 of this sub-buffer */
+		ltt_buffer_begin_callback(ltt_buf, tsc, SUBBUF_INDEX(offset_begin, ltt_buf));
+		commit_count = atomic_add_return(ltt_subbuf_header_len(ltt_buf),
+								 &ltt_buf->commit_count[SUBBUF_INDEX(offset_begin, ltt_buf)]);
+		/* Check if the written buffer has to be delivered */
+		if(commit_count	== atomic_read(
+					&ltt_buf->reserve_count[SUBBUF_INDEX(offset_begin, ltt_buf)])) {
+			ltt_deliver_callback(ltt_buf, SUBBUF_INDEX(offset_begin, ltt_buf), NULL);
+		}
+	}
+
+}
+
+static inline int ltt_buffer_get(struct ltt_buf *ltt_buf,
+		unsigned int *offset)
+{
+	unsigned int consumed_old, consumed_idx;
+	consumed_old = atomic_read(&ltt_buf->consumed);
+	consumed_idx = SUBBUF_INDEX(consumed_old, ltt_buf);
+	
+	if(atomic_read(&ltt_buf->commit_count[consumed_idx])
+		!= atomic_read(&ltt_buf->reserve_count[consumed_idx])) {
+		return -EAGAIN;
+	}
+	if((SUBBUF_TRUNC(atomic_read(&ltt_buf->offset), ltt_buf)
+				-SUBBUF_TRUNC(consumed_old, ltt_buf)) == 0) {
+		return -EAGAIN;
+	}
+	
+	*offset = consumed_old;
+
+	return 0;
+}
+
+static inline int ltt_buffer_put(struct ltt_buf *ltt_buf,
+		unsigned int offset)
+{
+	unsigned int consumed_old, consumed_new;
+	int ret;
+
+	consumed_old = offset;
+	consumed_new = SUBBUF_ALIGN(consumed_old, ltt_buf);
+	if(atomic_cmpxchg(&ltt_buf->consumed, consumed_old, consumed_new)
+			!= consumed_old) {
+		/* We have been pushed by the writer : the last buffer read _is_
+		 * corrupted!
+		 * It can also happen if this is a buffer we never got. */
+		return -EIO;
+	} else {
+		if(atomic_read(&ltt_buf->full) == 1) {
+			/* tell the client that buffer is now unfull */
+			ret = futex((unsigned long)&ltt_buf->full,
+					FUTEX_WAKE, 1, 0, 0, 0);
+			if(ret != 1) {
+				printf("LTT warning : race condition : writer not waiting or too many writers\n");
+			}
+			atomic_set(&ltt_buf->full, 0);
+		}
+	}
+}
+
+static int read_subbuffer(struct ltt_buf *ltt_buf, int fd)
 {
+	unsigned int consumed_old;
 	int err;
+	printf("LTT read buffer\n");
 
-  printf("LTTng usertrace-fast init\n");
 
-	ltt_thread_init();
+	err = ltt_buffer_get(ltt_buf, &consumed_old);
+	if(err != 0) {
+		if(err != -EAGAIN) printf("LTT Reserving sub buffer failed\n");
+		goto get_error;
+	}
 
+	err = TEMP_FAILURE_RETRY(write(fd,
+				ltt_buf->start 
+					+ (consumed_old & ((ltt_buf->alloc_size)-1)),
+				ltt_buf->subbuf_size));
+
+	if(err < 0) {
+		perror("Error in writing to file");
+		goto write_error;
+	}
+#if 0
+	err = fsync(pair->trace);
+	if(err < 0) {
+		ret = errno;
+		perror("Error in writing to file");
+		goto write_error;
+	}
+#endif //0
+write_error:
+	err = ltt_buffer_put(ltt_buf, consumed_old);
+
+	if(err != 0) {
+		if(err == -EIO) {
+			printf("Reader has been pushed by the writer, last subbuffer corrupted.\n");
+			/* FIXME : we may delete the last written buffer if we wish. */
+		}
+		goto get_error;
+	}
+
+get_error:
+	return err;
+}
+
+/* This function is called by ltt_rw_init which has signals blocked */
+static void ltt_usertrace_fast_daemon(struct ltt_trace_info *shared_trace_info,
+		sigset_t oldset, pid_t l_traced_pid, pthread_t l_traced_tid)
+{
+	struct sigaction act;
+	int ret;
+	int fd_fac;
+	int fd_cpu;
+	char outfile_name[PATH_MAX];
+	char identifier_name[PATH_MAX];
+
+
+	traced_pid = l_traced_pid;
+	traced_tid = l_traced_tid;
+
+	printf("LTT ltt_usertrace_fast_daemon : init is %d, pid is %lu, traced_pid is %lu, traced_tid is %lu\n",
+			shared_trace_info->init, getpid(), traced_pid, traced_tid);
+
+	act.sa_handler = handler_sigusr1;
+	act.sa_flags = 0;
+	sigemptyset(&(act.sa_mask));
+	sigaddset(&(act.sa_mask), SIGUSR1);
+	sigaction(SIGUSR1, &act, NULL);
+
+	act.sa_handler = handler_sigusr2;
+	act.sa_flags = 0;
+	sigemptyset(&(act.sa_mask));
+	sigaddset(&(act.sa_mask), SIGUSR2);
+	sigaction(SIGUSR2, &act, NULL);
+
+	act.sa_handler = handler_sigalarm;
+	act.sa_flags = 0;
+	sigemptyset(&(act.sa_mask));
+	sigaddset(&(act.sa_mask), SIGALRM);
+	sigaction(SIGALRM, &act, NULL);
+
+	/* Enable signals */
+	ret = pthread_sigmask(SIG_SETMASK, &oldset, NULL);
+	if(ret) {
+		printf("LTT Error in pthread_sigmask\n");
+	}
+
+	alarm(3);
+
+	/* Open output files */
+	umask(00000);
+	ret = mkdir(LTT_USERTRACE_ROOT, 0777);
+	if(ret < 0 && errno != EEXIST) {
+		perror("LTT Error in creating output (mkdir)");
+		exit(-1);
+	}
+	ret = chdir(LTT_USERTRACE_ROOT);
+	if(ret < 0) {
+		perror("LTT Error in creating output (chdir)");
+		exit(-1);
+	}
+	snprintf(identifier_name, PATH_MAX-1,	"%lu.%lu.%llu",
+			traced_tid, traced_pid, get_cycles());
+	snprintf(outfile_name, PATH_MAX-1,	"facilities-%s", identifier_name);
+	fd_fac = creat(outfile_name, 0644);
+
+	snprintf(outfile_name, PATH_MAX-1,	"cpu-%s", identifier_name);
+	fd_cpu = creat(outfile_name, 0644);
+	
+	
+	while(1) {
+		pause();
+		if(traced_pid == 0) break; /* parent died */
+		if(parent_exited) break;
+		printf("LTT Doing a buffer switch read. pid is : %lu\n", getpid());
+	
+		do {
+			ret = read_subbuffer(&shared_trace_info->channel.cpu, fd_cpu);
+		} while(ret == 0);
+
+		do {
+			ret = read_subbuffer(&shared_trace_info->channel.facilities, fd_fac);
+		} while(ret == 0);
+	}
+
+	/* The parent thread is dead and we have finished with the buffer */
+
+	/* Buffer force switch (flush). Using FLUSH instead of ACTIVE because we know
+	 * there is no writer. */
+	flush_buffer(&shared_trace_info->channel.cpu, FORCE_FLUSH);
+	do {
+		ret = read_subbuffer(&shared_trace_info->channel.cpu, fd_cpu);
+	} while(ret == 0);
+
+
+	flush_buffer(&shared_trace_info->channel.facilities, FORCE_FLUSH);
+	do {
+		ret = read_subbuffer(&shared_trace_info->channel.facilities, fd_fac);
+	} while(ret == 0);
+
+	close(fd_fac);
+	close(fd_cpu);
+	
+	munmap(shared_trace_info, sizeof(*shared_trace_info));
+	
+	exit(0);
+}
+
+
+/* Reader-writer initialization */
+
+static enum ltt_process_role { LTT_ROLE_WRITER, LTT_ROLE_READER }
+	role = LTT_ROLE_WRITER;
+
+
+void ltt_rw_init(void)
+{
+	pid_t pid;
+	struct ltt_trace_info *shared_trace_info;
+	int ret;
+	sigset_t set, oldset;
+	pid_t l_traced_pid = getpid();
+	pid_t l_traced_tid = gettid();
+
+	/* parent : create the shared memory map */
+	shared_trace_info = mmap(0, sizeof(*thread_trace_info),
+			PROT_READ|PROT_WRITE, MAP_SHARED|MAP_ANONYMOUS, 0, 0);
+	memset(shared_trace_info, 0, sizeof(*shared_trace_info));
+	/* Tricky semaphore : is in a shared memory space, so it's ok for a fast
+	 * mutex (futex). */
+	atomic_set(&shared_trace_info->channel.facilities.full, 0);
+	shared_trace_info->channel.facilities.alloc_size = LTT_BUF_SIZE_FACILITIES;
+	shared_trace_info->channel.facilities.subbuf_size = LTT_SUBBUF_SIZE_FACILITIES;
+	shared_trace_info->channel.facilities.start =
+		shared_trace_info->channel.facilities_buf;
+	ltt_buffer_begin_callback(&shared_trace_info->channel.facilities,
+			ltt_get_timestamp(), 0);
+
+	atomic_set(&shared_trace_info->channel.cpu.full, 0);
+	shared_trace_info->channel.cpu.alloc_size = LTT_BUF_SIZE_CPU;
+	shared_trace_info->channel.cpu.subbuf_size = LTT_SUBBUF_SIZE_CPU;
+	shared_trace_info->channel.cpu.start = shared_trace_info->channel.cpu_buf;
+	ltt_buffer_begin_callback(&shared_trace_info->channel.cpu,
+			ltt_get_timestamp(), 0);
+	
+	shared_trace_info->init = 1;
+
+	/* Disable signals */
+  ret = sigfillset(&set);
+  if(ret) {
+    printf("LTT Error in sigfillset\n");
+  } 
+	
+	
+  ret = pthread_sigmask(SIG_BLOCK, &set, &oldset);
+  if(ret) {
+    printf("LTT Error in pthread_sigmask\n");
+  }
+
+	pid = fork();
+	if(pid > 0) {
+		/* Parent */
+		shared_trace_info->daemon_id = pid;
+		thread_trace_info = shared_trace_info;
+
+		/* Enable signals */
+		ret = pthread_sigmask(SIG_SETMASK, &oldset, NULL);
+		if(ret) {
+			printf("LTT Error in pthread_sigmask\n");
+		}
+	} else if(pid == 0) {
+		/* Child */
+		role = LTT_ROLE_READER;
+		ltt_usertrace_fast_daemon(shared_trace_info, oldset, l_traced_pid,
+					l_traced_tid);
+		/* Should never return */
+		exit(-1);
+	} else if(pid < 0) {
+		/* fork error */
+		perror("LTT Error in forking ltt-usertrace-fast");
+	}
+}
+
+static __thread struct _pthread_cleanup_buffer cleanup_buffer;
+
+void ltt_thread_init(void)
+{
+	_pthread_cleanup_push(&cleanup_buffer, ltt_usertrace_fast_cleanup, NULL);
+	ltt_rw_init();
+}
+	
+void __attribute__((constructor)) __ltt_usertrace_fast_init(void)
+{
+  printf("LTT usertrace-fast init\n");
+
+	ltt_rw_init();
+}
+
+void __attribute__((destructor)) __ltt_usertrace_fast_fini(void)
+{
+	if(role == LTT_ROLE_WRITER) {
+	  printf("LTT usertrace-fast fini\n");
+		ltt_usertrace_fast_cleanup(NULL);
+	}
 }