Fix: use a free running channel key between sessiond and kernel consumer
authorJulien Desfossez <jdesfossez@efficios.com>
Thu, 30 Nov 2017 20:31:27 +0000 (15:31 -0500)
committerJérémie Galarneau <jeremie.galarneau@efficios.com>
Wed, 28 Feb 2018 03:32:34 +0000 (22:32 -0500)
We currently use the channel FD number opened by the session daemon to
reference a channel in the consumer. This can lead to races where the
session daemon destroys a channel and recreates one with the same FD
number before the consumer has time to cleanup everything on its side,
so all the commands in between that use that FD number has a key may end
up working on the wrong objects.

This fix introduces a free running counter as the channel key, so this
decouples the channel key in the consumer from the channel FD in the
session daemon. This fixes the race observed in stress tests.

Signed-off-by: Julien Desfossez <jdesfossez@efficios.com>
Signed-off-by: Jérémie Galarneau <jeremie.galarneau@efficios.com>
src/bin/lttng-sessiond/cmd.c
src/bin/lttng-sessiond/kernel-consumer.c
src/bin/lttng-sessiond/kernel.c
src/bin/lttng-sessiond/trace-kernel.c
src/bin/lttng-sessiond/trace-kernel.h

index 9d742536852b4880f30d7245f71d32567a94bc5e..2c1ae3f5132ea3e8a0031dfe6525af733926c535 100644 (file)
@@ -167,14 +167,14 @@ static int get_kernel_runtime_stats(struct ltt_session *session,
                goto end;
        }
 
-       ret = consumer_get_discarded_events(session->id, kchan->fd,
+       ret = consumer_get_discarded_events(session->id, kchan->key,
                        session->kernel_session->consumer,
                        discarded_events);
        if (ret < 0) {
                goto end;
        }
 
-       ret = consumer_get_lost_packets(session->id, kchan->fd,
+       ret = consumer_get_lost_packets(session->id, kchan->key,
                        session->kernel_session->consumer,
                        lost_packets);
        if (ret < 0) {
index bc481e5c4cedbbfcac3818b85122a7aace36e8f7..89bf4596b54969ebd556d2d27faec3a66e011a98 100644 (file)
@@ -20,6 +20,7 @@
 #include <stdlib.h>
 #include <sys/stat.h>
 #include <unistd.h>
+#include <inttypes.h>
 
 #include <common/common.h>
 #include <common/defaults.h>
@@ -129,7 +130,7 @@ int kernel_consumer_add_channel(struct consumer_socket *sock,
        /* Prep channel message structure */
        consumer_init_channel_comm_msg(&lkm,
                        LTTNG_CONSUMER_ADD_CHANNEL,
-                       channel->fd,
+                       channel->key,
                        ksession->id,
                        pathname,
                        ksession->uid,
@@ -160,7 +161,7 @@ int kernel_consumer_add_channel(struct consumer_socket *sock,
        status = notification_thread_command_add_channel(
                        notification_thread_handle, session->name,
                        ksession->uid, ksession->gid,
-                       channel->channel->name, channel->fd,
+                       channel->channel->name, channel->key,
                        LTTNG_DOMAIN_KERNEL,
                        channel->channel->attr.subbuf_size * channel->channel->attr.num_subbuf);
        rcu_read_unlock();
@@ -284,7 +285,7 @@ int kernel_consumer_add_stream(struct consumer_socket *sock,
        /* Prep stream consumer message */
        consumer_init_stream_comm_msg(&lkm,
                        LTTNG_CONSUMER_ADD_STREAM,
-                       channel->fd,
+                       channel->key,
                        stream->fd,
                        stream->cpu);
 
@@ -438,7 +439,7 @@ int kernel_consumer_send_session(struct consumer_socket *sock,
                         * Inform the relay that all the streams for the
                         * channel were sent.
                         */
-                       ret = kernel_consumer_streams_sent(sock, session, chan->fd);
+                       ret = kernel_consumer_streams_sent(sock, session, chan->key);
                        if (ret < 0) {
                                goto error;
                        }
@@ -463,11 +464,11 @@ int kernel_consumer_destroy_channel(struct consumer_socket *socket,
        assert(channel);
        assert(socket);
 
-       DBG("Sending kernel consumer destroy channel key %d", channel->fd);
+       DBG("Sending kernel consumer destroy channel key %" PRIu64, channel->key);
 
        memset(&msg, 0, sizeof(msg));
        msg.cmd_type = LTTNG_CONSUMER_DESTROY_CHANNEL;
-       msg.u.destroy_channel.key = channel->fd;
+       msg.u.destroy_channel.key = channel->key;
 
        pthread_mutex_lock(socket->lock);
        health_code_update();
index 8892e76dc144f7963be7813647df05e5f9006da1..535a5b8e8c4035918f0e2665ac0af8f0e7ad8eda 100644 (file)
 #include "kern-modules.h"
 #include "utils.h"
 
+/*
+ * Key used to reference a channel between the sessiond and the consumer. This
+ * is only read and updated with the session_list lock held.
+ */
+static uint64_t next_kernel_channel_key;
+
 /*
  * Add context on a kernel channel.
  *
@@ -169,8 +175,10 @@ int kernel_create_channel(struct ltt_kernel_session *session,
        cds_list_add(&lkc->list, &session->channel_list.head);
        session->channel_count++;
        lkc->session = session;
+       lkc->key = ++next_kernel_channel_key;
 
-       DBG("Kernel channel %s created (fd: %d)", lkc->channel->name, lkc->fd);
+       DBG("Kernel channel %s created (fd: %d, key: %" PRIu64 ")",
+                       lkc->channel->name, lkc->fd, lkc->key);
 
        return 0;
 
@@ -291,7 +299,8 @@ int kernel_disable_channel(struct ltt_kernel_channel *chan)
        }
 
        chan->enabled = 0;
-       DBG("Kernel channel %s disabled (fd: %d)", chan->channel->name, chan->fd);
+       DBG("Kernel channel %s disabled (fd: %d, key: %" PRIu64 ")",
+                       chan->channel->name, chan->fd, chan->key);
 
        return 0;
 
@@ -315,7 +324,8 @@ int kernel_enable_channel(struct ltt_kernel_channel *chan)
        }
 
        chan->enabled = 1;
-       DBG("Kernel channel %s enabled (fd: %d)", chan->channel->name, chan->fd);
+       DBG("Kernel channel %s enabled (fd: %d, key: %" PRIu64 ")",
+                       chan->channel->name, chan->fd, chan->key);
 
        return 0;
 
index 52b8cc7e0e8d4f2d4e017d746def763f599d9420..ef5abbab4c3b20fe8d39165012e27b756234a43e 100644 (file)
@@ -551,7 +551,7 @@ void trace_kernel_destroy_channel(struct ltt_kernel_channel *channel)
                        && channel->published_to_notification_thread) {
                status = notification_thread_command_remove_channel(
                                notification_thread_handle,
-                               channel->fd, LTTNG_DOMAIN_KERNEL);
+                               channel->key, LTTNG_DOMAIN_KERNEL);
                assert(status == LTTNG_OK);
        }
        free(channel->channel->attr.extended.ptr);
index 2eb599272a0ceb15ce167b73b9e68ae37e9528f4..9c52cee0994944df07da6bcbe436a4233a39a550 100644 (file)
@@ -63,6 +63,7 @@ struct ltt_kernel_event {
 /* Kernel channel */
 struct ltt_kernel_channel {
        int fd;
+       uint64_t key; /* Key to reference this channel with the consumer. */
        int enabled;
        unsigned int stream_count;
        unsigned int event_count;
This page took 0.030859 seconds and 4 git commands to generate.