--- /dev/null
+/*
+ * lttng-context-perf-counters.c
+ *
+ * LTTng UST performance monitoring counters (perf-counters) integration.
+ *
+ * Copyright (C) 2009-2014 Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; only
+ * version 2.1 of the License.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include <sys/types.h>
+#include <unistd.h>
+#include <string.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <sys/mman.h>
+#include <sys/syscall.h>
+#include <linux/perf_event.h>
+#include <lttng/ust-events.h>
+#include <lttng/ust-tracer.h>
+#include <lttng/ringbuffer-config.h>
+#include <urcu/system.h>
+#include <urcu/arch.h>
+#include <urcu/rculist.h>
+#include <helper.h>
+#include <urcu/ref.h>
+#include <usterr-signal-safe.h>
+#include <signal.h>
+#include "lttng-tracer-core.h"
+
+/*
+ * We use a global perf counter key and iterate on per-thread RCU lists
+ * of fields in the fast path, even though this is not strictly speaking
+ * what would provide the best fast-path complexity, to ensure teardown
+ * of sessions vs thread exit is handled racelessly.
+ *
+ * Updates and traversals of thread_list are protected by UST lock.
+ * Updates to rcu_field_list are protected by UST lock.
+ */
+
+struct lttng_perf_counter_thread_field {
+ struct lttng_perf_counter_field *field; /* Back reference */
+ struct perf_event_mmap_page *pc;
+ struct cds_list_head thread_field_node; /* Per-field list of thread fields (node) */
+ struct cds_list_head rcu_field_node; /* RCU per-thread list of fields (node) */
+};
+
+struct lttng_perf_counter_thread {
+ struct cds_list_head rcu_field_list; /* RCU per-thread list of fields */
+};
+
+struct lttng_perf_counter_field {
+ struct perf_event_attr attr;
+ struct cds_list_head thread_field_list; /* Per-field list of thread fields */
+};
+
+static pthread_key_t perf_counter_key;
+
+static
+size_t perf_counter_get_size(size_t offset)
+{
+ size_t size = 0;
+
+ size += lib_ring_buffer_align(offset, lttng_alignof(uint64_t));
+ size += sizeof(uint64_t);
+ return size;
+}
+
+#if defined(__x86_64__) || defined(__i386__)
+
+static
+uint64_t rdpmc(unsigned int counter)
+{
+ unsigned int low, high;
+
+ asm volatile("rdpmc" : "=a" (low), "=d" (high) : "c" (counter));
+
+ return low | ((uint64_t) high) << 32;
+}
+
+#else /* defined(__x86_64__) || defined(__i386__) */
+
+#error "Perf event counters are only supported on x86 so far."
+
+#endif /* #else defined(__x86_64__) || defined(__i386__) */
+
+static
+uint64_t read_perf_counter(struct perf_event_mmap_page *pc)
+{
+ uint32_t seq, idx;
+ uint64_t count;
+
+ if (caa_unlikely(!pc))
+ return 0;
+
+ do {
+ seq = CMM_LOAD_SHARED(pc->lock);
+ cmm_barrier();
+
+ idx = pc->index;
+ if (idx)
+ count = pc->offset + rdpmc(idx - 1);
+ else
+ count = 0;
+
+ cmm_barrier();
+ } while (CMM_LOAD_SHARED(pc->lock) != seq);
+
+ return count;
+}
+
+static
+int sys_perf_event_open(struct perf_event_attr *attr,
+ pid_t pid, int cpu, int group_fd,
+ unsigned long flags)
+{
+ return syscall(SYS_perf_event_open, attr, pid, cpu,
+ group_fd, flags);
+}
+
+static
+struct perf_event_mmap_page *setup_perf(struct perf_event_attr *attr)
+{
+ void *perf_addr;
+ int fd;
+
+ fd = sys_perf_event_open(attr, 0, -1, -1, 0);
+ if (fd < 0)
+ return NULL;
+
+ perf_addr = mmap(NULL, sizeof(struct perf_event_mmap_page),
+ PROT_READ, MAP_SHARED, fd, 0);
+ if (perf_addr == MAP_FAILED)
+ return NULL;
+ close(fd);
+ return perf_addr;
+}
+
+static
+void unmap_perf_page(struct perf_event_mmap_page *pc)
+{
+ int ret;
+
+ if (!pc)
+ return;
+ ret = munmap(pc, sizeof(struct perf_event_mmap_page));
+ if (ret < 0) {
+ PERROR("Error in munmap");
+ abort();
+ }
+}
+
+static
+struct lttng_perf_counter_thread *alloc_perf_counter_thread(void)
+{
+ struct lttng_perf_counter_thread *perf_thread;
+ sigset_t newmask, oldmask;
+ int ret;
+
+ ret = sigfillset(&newmask);
+ if (ret)
+ abort();
+ ret = pthread_sigmask(SIG_BLOCK, &newmask, &oldmask);
+ if (ret)
+ abort();
+ /* Check again with signals disabled */
+ perf_thread = pthread_getspecific(perf_counter_key);
+ if (perf_thread)
+ goto skip;
+ perf_thread = zmalloc(sizeof(*perf_thread));
+ if (!perf_thread)
+ abort();
+ CDS_INIT_LIST_HEAD(&perf_thread->rcu_field_list);
+ ret = pthread_setspecific(perf_counter_key, perf_thread);
+ if (ret)
+ abort();
+skip:
+ ret = pthread_sigmask(SIG_SETMASK, &oldmask, NULL);
+ if (ret)
+ abort();
+ return perf_thread;
+}
+
+static
+struct lttng_perf_counter_thread_field *
+ add_thread_field(struct lttng_perf_counter_field *perf_field,
+ struct lttng_perf_counter_thread *perf_thread)
+{
+ struct lttng_perf_counter_thread_field *thread_field;
+ sigset_t newmask, oldmask;
+ int ret;
+
+ ret = sigfillset(&newmask);
+ if (ret)
+ abort();
+ ret = pthread_sigmask(SIG_BLOCK, &newmask, &oldmask);
+ if (ret)
+ abort();
+ /* Check again with signals disabled */
+ cds_list_for_each_entry_rcu(thread_field, &perf_thread->rcu_field_list,
+ rcu_field_node) {
+ if (thread_field->field == perf_field)
+ goto skip;
+ }
+ thread_field = zmalloc(sizeof(*thread_field));
+ if (!thread_field)
+ abort();
+ thread_field->field = perf_field;
+ thread_field->pc = setup_perf(&perf_field->attr);
+ /* Note: thread_field->pc can be NULL if setup_perf() fails. */
+ ust_lock_nocheck();
+ cds_list_add_rcu(&thread_field->rcu_field_node,
+ &perf_thread->rcu_field_list);
+ cds_list_add(&thread_field->thread_field_node,
+ &perf_field->thread_field_list);
+ ust_unlock();
+skip:
+ ret = pthread_sigmask(SIG_SETMASK, &oldmask, NULL);
+ if (ret)
+ abort();
+ return thread_field;
+}
+
+static
+struct lttng_perf_counter_thread_field *
+ get_thread_field(struct lttng_perf_counter_field *field)
+{
+ struct lttng_perf_counter_thread *perf_thread;
+ struct lttng_perf_counter_thread_field *thread_field;
+
+ perf_thread = pthread_getspecific(perf_counter_key);
+ if (!perf_thread)
+ perf_thread = alloc_perf_counter_thread();
+ cds_list_for_each_entry_rcu(thread_field, &perf_thread->rcu_field_list,
+ rcu_field_node) {
+ if (thread_field->field == field)
+ return thread_field;
+ }
+ /* perf_counter_thread_field not found, need to add one */
+ return add_thread_field(field, perf_thread);
+}
+
+static
+uint64_t wrapper_perf_counter_read(struct lttng_ctx_field *field)
+{
+ struct lttng_perf_counter_field *perf_field;
+ struct lttng_perf_counter_thread_field *perf_thread_field;
+
+ perf_field = field->u.perf_counter;
+ perf_thread_field = get_thread_field(perf_field);
+ return read_perf_counter(perf_thread_field->pc);
+}
+
+static
+void perf_counter_record(struct lttng_ctx_field *field,
+ struct lttng_ust_lib_ring_buffer_ctx *ctx,
+ struct lttng_channel *chan)
+{
+ uint64_t value;
+
+ value = wrapper_perf_counter_read(field);
+ lib_ring_buffer_align_ctx(ctx, lttng_alignof(value));
+ chan->ops->event_write(ctx, &value, sizeof(value));
+}
+
+static
+void perf_counter_get_value(struct lttng_ctx_field *field,
+ union lttng_ctx_value *value)
+{
+ uint64_t v;
+
+ v = wrapper_perf_counter_read(field);
+ value->s64 = v;
+}
+
+/* Called with UST lock held */
+static
+void lttng_destroy_perf_thread_field(
+ struct lttng_perf_counter_thread_field *thread_field)
+{
+ unmap_perf_page(thread_field->pc);
+ cds_list_del_rcu(&thread_field->rcu_field_node);
+ cds_list_del(&thread_field->thread_field_node);
+ free(thread_field);
+}
+
+static
+void lttng_destroy_perf_thread_key(void *_key)
+{
+ struct lttng_perf_counter_thread *perf_thread = _key;
+ struct lttng_perf_counter_thread_field *pos, *p;
+
+ ust_lock_nocheck();
+ cds_list_for_each_entry_safe(pos, p, &perf_thread->rcu_field_list,
+ rcu_field_node)
+ lttng_destroy_perf_thread_field(pos);
+ ust_unlock();
+ free(perf_thread);
+}
+
+/* Called with UST lock held */
+static
+void lttng_destroy_perf_counter_field(struct lttng_ctx_field *field)
+{
+ struct lttng_perf_counter_field *perf_field;
+ struct lttng_perf_counter_thread_field *pos, *p;
+
+ free((char *) field->event_field.name);
+ perf_field = field->u.perf_counter;
+ /*
+ * This put is performed when no threads can concurrently
+ * perform a "get" concurrently, thanks to urcu-bp grace
+ * period.
+ */
+ cds_list_for_each_entry_safe(pos, p, &perf_field->thread_field_list,
+ thread_field_node)
+ lttng_destroy_perf_thread_field(pos);
+ free(perf_field);
+}
+
+/* Called with UST lock held */
+int lttng_add_perf_counter_to_ctx(uint32_t type,
+ uint64_t config,
+ const char *name,
+ struct lttng_ctx **ctx)
+{
+ struct lttng_ctx_field *field;
+ struct lttng_perf_counter_field *perf_field;
+ struct perf_event_mmap_page *tmp_pc;
+ char *name_alloc;
+ int ret;
+
+ name_alloc = strdup(name);
+ if (!name_alloc) {
+ ret = -ENOMEM;
+ goto name_alloc_error;
+ }
+ perf_field = zmalloc(sizeof(*perf_field));
+ if (!perf_field) {
+ ret = -ENOMEM;
+ goto perf_field_alloc_error;
+ }
+ field = lttng_append_context(ctx);
+ if (!field) {
+ ret = -ENOMEM;
+ goto append_context_error;
+ }
+ if (lttng_find_context(*ctx, name_alloc)) {
+ ret = -EEXIST;
+ goto find_error;
+ }
+
+ field->destroy = lttng_destroy_perf_counter_field;
+
+ field->event_field.name = name_alloc;
+ field->event_field.type.atype = atype_integer;
+ field->event_field.type.u.basic.integer.size =
+ sizeof(uint64_t) * CHAR_BIT;
+ field->event_field.type.u.basic.integer.alignment =
+ lttng_alignof(uint64_t) * CHAR_BIT;
+ field->event_field.type.u.basic.integer.signedness =
+ lttng_is_signed_type(uint64_t);
+ field->event_field.type.u.basic.integer.reverse_byte_order = 0;
+ field->event_field.type.u.basic.integer.base = 10;
+ field->event_field.type.u.basic.integer.encoding = lttng_encode_none;
+ field->get_size = perf_counter_get_size;
+ field->record = perf_counter_record;
+ field->get_value = perf_counter_get_value;
+
+ perf_field->attr.type = type;
+ perf_field->attr.config = config;
+ perf_field->attr.exclude_kernel = 1;
+ CDS_INIT_LIST_HEAD(&perf_field->thread_field_list);
+ field->u.perf_counter = perf_field;
+
+ /* Ensure that this perf counter can be used in this process. */
+ tmp_pc = setup_perf(&perf_field->attr);
+ if (!tmp_pc) {
+ ret = -ENODEV;
+ goto setup_error;
+ }
+ unmap_perf_page(tmp_pc);
+
+ /*
+ * Contexts can only be added before tracing is started, so we
+ * don't have to synchronize against concurrent threads using
+ * the field here.
+ */
+
+ return 0;
+
+setup_error:
+find_error:
+ lttng_remove_context_field(ctx, field);
+append_context_error:
+ free(perf_field);
+perf_field_alloc_error:
+ free(name_alloc);
+name_alloc_error:
+ return ret;
+}
+
+int lttng_perf_counter_init(void)
+{
+ int ret;
+
+ ret = pthread_key_create(&perf_counter_key,
+ lttng_destroy_perf_thread_key);
+ if (ret)
+ ret = -ret;
+ return ret;
+}
+
+void lttng_perf_counter_exit(void)
+{
+ int ret;
+
+ ret = pthread_key_delete(perf_counter_key);
+ if (ret) {
+ errno = ret;
+ PERROR("Error in pthread_key_delete");
+ }
+}
* probe registration.
*
* ust_exit_mutex must never nest in ust_mutex.
+ *
+ * ust_mutex_nest is a per-thread nesting counter, allowing the perf
+ * counter lazy initialization called by events within the statedump,
+ * which traces while the ust_mutex is held.
*/
static pthread_mutex_t ust_mutex = PTHREAD_MUTEX_INITIALIZER;
+/* Allow nesting the ust_mutex within the same thread. */
+static DEFINE_URCU_TLS(int, ust_mutex_nest);
+
/*
* ust_exit_mutex protects thread_active variable wrt thread exit. It
* cannot be done by ust_mutex because pthread_cancel(), which takes an
static int lttng_ust_comm_should_quit;
/*
- * Return 0 on success, -1 if should quilt.
+ * Return 0 on success, -1 if should quit.
* The lock is taken in both cases.
+ * Signal-safe.
*/
int ust_lock(void)
{
- pthread_mutex_lock(&ust_mutex);
+ sigset_t sig_all_blocked, orig_mask;
+ int ret;
+
+ sigfillset(&sig_all_blocked);
+ ret = pthread_sigmask(SIG_SETMASK, &sig_all_blocked, &orig_mask);
+ if (ret) {
+ ERR("pthread_sigmask: %s", strerror(ret));
+ }
+ if (!URCU_TLS(ust_mutex_nest)++)
+ pthread_mutex_lock(&ust_mutex);
+ ret = pthread_sigmask(SIG_SETMASK, &orig_mask, NULL);
+ if (ret) {
+ ERR("pthread_sigmask: %s", strerror(ret));
+ }
if (lttng_ust_comm_should_quit) {
return -1;
} else {
* ust_lock_nocheck() can be used in constructors/destructors, because
* they are already nested within the dynamic loader lock, and therefore
* have exclusive access against execution of liblttng-ust destructor.
+ * Signal-safe.
*/
void ust_lock_nocheck(void)
{
- pthread_mutex_lock(&ust_mutex);
+ sigset_t sig_all_blocked, orig_mask;
+ int ret;
+
+ sigfillset(&sig_all_blocked);
+ ret = pthread_sigmask(SIG_SETMASK, &sig_all_blocked, &orig_mask);
+ if (ret) {
+ ERR("pthread_sigmask: %s", strerror(ret));
+ }
+ if (!URCU_TLS(ust_mutex_nest)++)
+ pthread_mutex_lock(&ust_mutex);
+ ret = pthread_sigmask(SIG_SETMASK, &orig_mask, NULL);
+ if (ret) {
+ ERR("pthread_sigmask: %s", strerror(ret));
+ }
}
+/*
+ * Signal-safe.
+ */
void ust_unlock(void)
{
- pthread_mutex_unlock(&ust_mutex);
+ sigset_t sig_all_blocked, orig_mask;
+ int ret;
+
+ sigfillset(&sig_all_blocked);
+ ret = pthread_sigmask(SIG_SETMASK, &sig_all_blocked, &orig_mask);
+ if (ret) {
+ ERR("pthread_sigmask: %s", strerror(ret));
+ }
+ if (!--URCU_TLS(ust_mutex_nest))
+ pthread_mutex_unlock(&ust_mutex);
+ ret = pthread_sigmask(SIG_SETMASK, &orig_mask, NULL);
+ if (ret) {
+ ERR("pthread_sigmask: %s", strerror(ret));
+ }
}
/*
asm volatile ("" : : "m" (URCU_TLS(lttng_ust_nest_count)));
}
+static
+void lttng_fixup_ust_mutex_nest_tls(void)
+{
+ asm volatile ("" : : "m" (URCU_TLS(ust_mutex_nest)));
+}
+
int lttng_get_notify_socket(void *owner)
{
struct sock_info *info = owner;
lttng_fixup_vtid_tls();
lttng_fixup_nest_count_tls();
lttng_fixup_procname_tls();
+ lttng_fixup_ust_mutex_nest_tls();
/*
* We want precise control over the order in which we construct
lttng_ring_buffer_client_overwrite_rt_init();
lttng_ring_buffer_client_discard_init();
lttng_ring_buffer_client_discard_rt_init();
+ lttng_perf_counter_init();
lttng_context_init();
/*
* Invoke ust malloc wrapper init before starting other threads.
lttng_ust_abi_exit();
lttng_ust_events_exit();
lttng_context_exit();
+ lttng_perf_counter_exit();
lttng_ring_buffer_client_discard_rt_exit();
lttng_ring_buffer_client_discard_exit();
lttng_ring_buffer_client_overwrite_rt_exit();