Fix: channel and stream leak in consumerd
authorDavid Goulet <dgoulet@ev0ke.net>
Tue, 18 Jun 2013 20:07:02 +0000 (16:07 -0400)
committerDavid Goulet <dgoulet@efficios.com>
Thu, 20 Jun 2013 17:20:16 +0000 (13:20 -0400)
Multiple error paths were possibly leaking channel and stream structure
in the lttng-consumerd.

This has been detected by the stress test killing the relayd randomly
during a long period of time.

Acked-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Signed-off-by: David Goulet <dgoulet@efficios.com>
src/bin/lttng-sessiond/ust-app.c
src/common/consumer.c
src/common/ust-consumer/ust-consumer.c

index 07c75031a63e493821ee688faca8951ba3501754..1736a3e851b20b646aa0f0b95c8e65c3ddc67fd6 100644 (file)
@@ -2610,10 +2610,8 @@ static int create_ust_app_metadata(struct ust_app_session *ua_sess,
        ret = ust_consumer_ask_channel(ua_sess, metadata, consumer, socket,
                        registry);
        if (ret < 0) {
-               /*
-                * Safe because the metadata obj pointer is not set so the delete below
-                * will not put a FD back again.
-                */
+               /* Nullify the metadata key so we don't try to close it later on. */
+               registry->metadata_key = 0;
                goto error_consumer;
        }
 
@@ -2625,10 +2623,8 @@ static int create_ust_app_metadata(struct ust_app_session *ua_sess,
         */
        ret = consumer_setup_metadata(socket, metadata->key);
        if (ret < 0) {
-               /*
-                * Safe because the metadata obj pointer is not set so the delete below
-                * will not put a FD back again.
-                */
+               /* Nullify the metadata key so we don't try to close it later on. */
+               registry->metadata_key = 0;
                goto error_consumer;
        }
 
index 96bc337262d65cf525484ac14962f2b44eee19a6..116441171e5aa73d198c2bbd90175da0d9fb03fa 100644 (file)
@@ -287,6 +287,7 @@ void consumer_del_channel(struct lttng_consumer_channel *channel)
 {
        int ret;
        struct lttng_ht_iter iter;
+       struct lttng_consumer_stream *stream, *stmp;
 
        DBG("Consumer delete channel key %" PRIu64, channel->key);
 
@@ -297,6 +298,13 @@ void consumer_del_channel(struct lttng_consumer_channel *channel)
                break;
        case LTTNG_CONSUMER32_UST:
        case LTTNG_CONSUMER64_UST:
+               /* Delete streams that might have been left in the stream list. */
+               cds_list_for_each_entry_safe(stream, stmp, &channel->streams.head,
+                               send_node) {
+                       cds_list_del(&stream->send_node);
+                       lttng_ustconsumer_del_stream(stream);
+                       free(stream);
+               }
                lttng_ustconsumer_del_channel(channel);
                break;
        default:
@@ -2728,6 +2736,8 @@ restart:
                                                break;
                                        case CONSUMER_CHANNEL_DEL:
                                        {
+                                               struct lttng_consumer_stream *stream, *stmp;
+
                                                rcu_read_lock();
                                                chan = consumer_find_channel(key);
                                                if (!chan) {
@@ -2741,6 +2751,26 @@ restart:
                                                assert(ret == 0);
                                                consumer_close_channel_streams(chan);
 
+                                               switch (consumer_data.type) {
+                                               case LTTNG_CONSUMER_KERNEL:
+                                                       break;
+                                               case LTTNG_CONSUMER32_UST:
+                                               case LTTNG_CONSUMER64_UST:
+                                                       /* Delete streams that might have been left in the stream list. */
+                                                       cds_list_for_each_entry_safe(stream, stmp, &chan->streams.head,
+                                                                       send_node) {
+                                                               cds_list_del(&stream->send_node);
+                                                               lttng_ustconsumer_del_stream(stream);
+                                                               uatomic_sub(&stream->chan->refcount, 1);
+                                                               assert(&chan->refcount);
+                                                               free(stream);
+                                                       }
+                                                       break;
+                                               default:
+                                                       ERR("Unknown consumer_data type");
+                                                       assert(0);
+                                               }
+
                                                /*
                                                 * Release our own refcount. Force channel deletion even if
                                                 * streams were not initialized.
@@ -2787,6 +2817,7 @@ restart:
                                lttng_poll_del(&events, chan->wait_fd);
                                ret = lttng_ht_del(channel_ht, &iter);
                                assert(ret == 0);
+                               assert(cds_list_empty(&chan->streams.head));
                                consumer_close_channel_streams(chan);
 
                                /* Release our own refcount */
index 780a601f67e74b70b3632a6d489067d82e1359ef..f783d4058c550033983045c93cbeddcc363028d3 100644 (file)
@@ -394,7 +394,7 @@ static int send_sessiond_channel(int sock,
                struct lttng_consumer_channel *channel,
                struct lttng_consumer_local_data *ctx, int *relayd_error)
 {
-       int ret;
+       int ret, ret_code = LTTNG_OK;
        struct lttng_consumer_stream *stream;
 
        assert(channel);
@@ -403,18 +403,6 @@ static int send_sessiond_channel(int sock,
 
        DBG("UST consumer sending channel %s to sessiond", channel->name);
 
-       /* Send channel to sessiond. */
-       ret = ustctl_send_channel_to_sessiond(sock, channel->uchan);
-       if (ret < 0) {
-               goto error;
-       }
-
-       ret = ustctl_channel_close_wakeup_fd(channel->uchan);
-       if (ret < 0) {
-               goto error;
-       }
-
-       /* The channel was sent successfully to the sessiond at this point. */
        cds_list_for_each_entry(stream, &channel->streams.head, send_node) {
                /* Try to send the stream to the relayd if one is available. */
                ret = send_stream_to_relayd(stream);
@@ -426,9 +414,33 @@ static int send_sessiond_channel(int sock,
                        if (relayd_error) {
                                *relayd_error = 1;
                        }
-                       goto error;
+                       ret_code = LTTNG_ERR_RELAYD_CONNECT_FAIL;
                }
+       }
 
+       /* Inform sessiond that we are about to send channel and streams. */
+       ret = consumer_send_status_msg(sock, ret_code);
+       if (ret < 0 || ret_code != LTTNG_OK) {
+               /*
+                * Either the session daemon is not responding or the relayd died so we
+                * stop now.
+                */
+               goto error;
+       }
+
+       /* Send channel to sessiond. */
+       ret = ustctl_send_channel_to_sessiond(sock, channel->uchan);
+       if (ret < 0) {
+               goto error;
+       }
+
+       ret = ustctl_channel_close_wakeup_fd(channel->uchan);
+       if (ret < 0) {
+               goto error;
+       }
+
+       /* The channel was sent successfully to the sessiond at this point. */
+       cds_list_for_each_entry(stream, &channel->streams.head, send_node) {
                /* Send stream to session daemon. */
                ret = send_sessiond_stream(sock, stream);
                if (ret < 0) {
@@ -447,6 +459,9 @@ static int send_sessiond_channel(int sock,
        return 0;
 
 error:
+       if (ret_code != LTTNG_OK) {
+               ret = -1;
+       }
        return ret;
 }
 
@@ -677,7 +692,7 @@ static int setup_metadata(struct lttng_consumer_local_data *ctx, uint64_t key)
        if (!metadata) {
                ERR("UST consumer push metadata %" PRIu64 " not found", key);
                ret = LTTNG_ERR_UST_CHAN_NOT_FOUND;
-               goto error;
+               goto error_find;
        }
 
        /*
@@ -709,9 +724,17 @@ static int setup_metadata(struct lttng_consumer_local_data *ctx, uint64_t key)
        /* List MUST be empty after or else it could be reused. */
        assert(cds_list_empty(&metadata->streams.head));
 
-       ret = 0;
+       return 0;
 
 error:
+       /*
+        * Delete metadata channel on error. At this point, the metadata stream can
+        * NOT be monitored by the metadata thread thus having the guarantee that
+        * the stream is still in the local stream list of the channel. This call
+        * will make sure to clean that list.
+        */
+       consumer_del_channel(metadata);
+error_find:
        return ret;
 }
 
@@ -1007,13 +1030,6 @@ int lttng_ustconsumer_recv_cmd(struct lttng_consumer_local_data *ctx,
                        goto end_msg_sessiond;
                }
 
-               /* Inform sessiond that we are about to send channel and streams. */
-               ret = consumer_send_status_msg(sock, LTTNG_OK);
-               if (ret < 0) {
-                       /* Somehow, the session daemon is not responding anymore. */
-                       goto error_fatal;
-               }
-
                /* Send everything to sessiond. */
                ret = send_sessiond_channel(sock, channel, ctx, &relayd_err);
                if (ret < 0) {
@@ -1021,10 +1037,10 @@ int lttng_ustconsumer_recv_cmd(struct lttng_consumer_local_data *ctx,
                                /*
                                 * We were unable to send to the relayd the stream so avoid
                                 * sending back a fatal error to the thread since this is OK
-                                * and the consumer can continue its work.
+                                * and the consumer can continue its work. The above call
+                                * has sent the error status message to the sessiond.
                                 */
-                               ret_code = LTTNG_ERR_RELAYD_CONNECT_FAIL;
-                               goto end_msg_sessiond;
+                               goto end_nosignal;
                        }
                        /*
                         * The communicaton was broken hence there is a bad state between
@@ -1544,7 +1560,13 @@ int lttng_ustconsumer_request_metadata(struct lttng_consumer_local_data *ctx,
 
        ret_code = lttng_ustconsumer_recv_metadata(ctx->consumer_metadata_socket,
                        key, offset, len, channel);
-       (void) consumer_send_status_msg(ctx->consumer_metadata_socket, ret_code);
+       if (ret_code >= 0) {
+               /*
+                * Only send the status msg if the sessiond is alive meaning a positive
+                * ret code.
+                */
+               (void) consumer_send_status_msg(ctx->consumer_metadata_socket, ret_code);
+       }
        ret = 0;
 
 end:
This page took 0.03336 seconds and 4 git commands to generate.