Fix deadlock: don't take channel lock in timer
authorMathieu Desnoyers <mathieu.desnoyers@efficios.com>
Tue, 16 Jul 2013 13:53:33 +0000 (09:53 -0400)
committerMathieu Desnoyers <mathieu.desnoyers@efficios.com>
Tue, 16 Jul 2013 18:58:50 +0000 (14:58 -0400)
Reviewed-by: Julien Desfossez <julien.desfossez@efficios.com>
Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
src/common/consumer-metadata-cache.c
src/common/consumer-metadata-cache.h
src/common/consumer-timer.c
src/common/consumer.c
src/common/ust-consumer/ust-consumer.c
src/common/ust-consumer/ust-consumer.h

index e21fd4c73cb0b8c4b5d8990500c5d1188909a33e..473ab231a9100a278afec61c1844738e1f027df2 100644 (file)
@@ -193,7 +193,7 @@ void consumer_metadata_cache_destroy(struct lttng_consumer_channel *channel)
  * Return 0 if everything has been flushed, 1 if there is data not flushed.
  */
 int consumer_metadata_cache_flushed(struct lttng_consumer_channel *channel,
-               uint64_t offset)
+               uint64_t offset, int timer)
 {
        int ret;
        struct consumer_metadata_cache *cache;
@@ -204,12 +204,15 @@ int consumer_metadata_cache_flushed(struct lttng_consumer_channel *channel,
        cache = channel->metadata_cache;
 
        /*
-        * XXX This consumer_data.lock should eventually be replaced by
-        * a channel lock. It protects metadata_stream read and endpoint
-        * status check.
+        * If not called from a timer handler, we have to take the
+        * channel lock to be mutually exclusive with channel teardown.
+        * Timer handler does not need to take this lock because it is
+        * already synchronized by timer stop (and, more importantly,
+        * taking this lock in a timer handler would cause a deadlock).
         */
-       pthread_mutex_lock(&consumer_data.lock);
-       pthread_mutex_lock(&channel->lock);
+       if (!timer) {
+               pthread_mutex_lock(&channel->lock);
+       }
        pthread_mutex_lock(&channel->timer_lock);
        pthread_mutex_lock(&channel->metadata_cache->lock);
 
@@ -232,8 +235,9 @@ int consumer_metadata_cache_flushed(struct lttng_consumer_channel *channel,
 
        pthread_mutex_unlock(&channel->metadata_cache->lock);
        pthread_mutex_unlock(&channel->timer_lock);
-       pthread_mutex_unlock(&channel->lock);
-       pthread_mutex_unlock(&consumer_data.lock);
+       if (!timer) {
+               pthread_mutex_unlock(&channel->lock);
+       }
 
        return ret;
 }
index b1a4dc2b777fad31b91a8ecf040b439f180776c3..4a671b23856874f49f12b65aa97198d0615ee8f8 100644 (file)
@@ -54,6 +54,6 @@ int consumer_metadata_cache_write(struct lttng_consumer_channel *channel,
 int consumer_metadata_cache_allocate(struct lttng_consumer_channel *channel);
 void consumer_metadata_cache_destroy(struct lttng_consumer_channel *channel);
 int consumer_metadata_cache_flushed(struct lttng_consumer_channel *channel,
-               uint64_t offset);
+               uint64_t offset, int timer);
 
 #endif /* CONSUMER_METADATA_CACHE_H */
index a32dbf64d2d229e730f6e87a4add490c75567a39..d6b5648b71acc0b5933f2b4eedcb91f08b439dbf 100644 (file)
@@ -86,15 +86,15 @@ static void metadata_switch_timer(struct lttng_consumer_local_data *ctx,
                 *       - channel->lock
                 *         - channel->metadata_cache->lock
                 *     - Calling consumer_metadata_cache_flushed():
-                *       - consumer_data.lock
-                *         - channel->lock
-                *           - channel->metadata_cache->lock
+                *       - channel->timer_lock
+                *         - channel->metadata_cache->lock
                 *
-                * Both consumer_data.lock and channel->lock currently
-                * cause a deadlock, since they are held while
-                * consumer_timer_switch_stop() is called.
+                * Ensure that neither consumer_data.lock nor
+                * channel->lock are taken within this function, since
+                * they are held while consumer_timer_switch_stop() is
+                * called.
                 */
-               ret = lttng_ustconsumer_request_metadata(ctx, channel);
+               ret = lttng_ustconsumer_request_metadata(ctx, channel, 1);
                if (ret < 0) {
                        channel->switch_timer_error = 1;
                }
index ce13a9835a0fa554facc474efd76a1dbff96313a..43848bc5e065076c53ca8f3453b8910e8bc16faa 100644 (file)
@@ -293,7 +293,6 @@ void consumer_del_channel(struct lttng_consumer_channel *channel)
 
        pthread_mutex_lock(&consumer_data.lock);
        pthread_mutex_lock(&channel->lock);
-       pthread_mutex_lock(&channel->timer_lock);
 
        switch (consumer_data.type) {
        case LTTNG_CONSUMER_KERNEL:
@@ -323,7 +322,6 @@ void consumer_del_channel(struct lttng_consumer_channel *channel)
 
        call_rcu(&channel->node.head, free_channel_rcu);
 end:
-       pthread_mutex_unlock(&channel->timer_lock);
        pthread_mutex_unlock(&channel->lock);
        pthread_mutex_unlock(&consumer_data.lock);
 }
@@ -1902,7 +1900,6 @@ void consumer_del_metadata_stream(struct lttng_consumer_stream *stream,
 
        pthread_mutex_lock(&consumer_data.lock);
        pthread_mutex_lock(&stream->chan->lock);
-       pthread_mutex_lock(&stream->chan->timer_lock);
        pthread_mutex_lock(&stream->lock);
 
        switch (consumer_data.type) {
@@ -1991,13 +1988,12 @@ void consumer_del_metadata_stream(struct lttng_consumer_stream *stream,
 end:
        /*
         * Nullify the stream reference so it is not used after deletion. The
-        * consumer data lock MUST be acquired before being able to check for a
-        * NULL pointer value.
+        * channel lock MUST be acquired before being able to check for
+        * NULL pointer value.
         */
        stream->chan->metadata_stream = NULL;
 
        pthread_mutex_unlock(&stream->lock);
-       pthread_mutex_unlock(&stream->chan->timer_lock);
        pthread_mutex_unlock(&stream->chan->lock);
        pthread_mutex_unlock(&consumer_data.lock);
 
index f9d0817c2de5f44c217b71920fdf2d08f5ae6abc..290eb3390e80aff424d954f8d9b7a00746f62fc0 100644 (file)
@@ -654,7 +654,6 @@ static int close_metadata(uint64_t chan_key)
 
        pthread_mutex_lock(&consumer_data.lock);
        pthread_mutex_lock(&channel->lock);
-       pthread_mutex_lock(&channel->timer_lock);
 
        if (cds_lfht_is_node_deleted(&channel->node.node)) {
                goto error_unlock;
@@ -675,7 +674,6 @@ static int close_metadata(uint64_t chan_key)
        }
 
 error_unlock:
-       pthread_mutex_unlock(&channel->timer_lock);
        pthread_mutex_unlock(&channel->lock);
        pthread_mutex_unlock(&consumer_data.lock);
 error:
@@ -748,7 +746,8 @@ error_find:
  * Receive the metadata updates from the sessiond.
  */
 int lttng_ustconsumer_recv_metadata(int sock, uint64_t key, uint64_t offset,
-               uint64_t len, struct lttng_consumer_channel *channel)
+               uint64_t len, struct lttng_consumer_channel *channel,
+               int timer)
 {
        int ret, ret_code = LTTNG_OK;
        char *metadata_str;
@@ -770,17 +769,9 @@ int lttng_ustconsumer_recv_metadata(int sock, uint64_t key, uint64_t offset,
                goto end_free;
        }
 
-       /*
-        * XXX: The consumer data lock is acquired before calling metadata cache
-        * write which calls push metadata that MUST be protected by the consumer
-        * lock in order to be able to check the validity of the metadata stream of
-        * the channel.
-        *
-        * Note that this will be subject to change to better fine grained locking
-        * and ultimately try to get rid of this global consumer data lock.
-        */
-       pthread_mutex_lock(&consumer_data.lock);
-       pthread_mutex_lock(&channel->lock);
+       if (!timer) {
+               pthread_mutex_lock(&channel->lock);
+       }
        pthread_mutex_lock(&channel->timer_lock);
        pthread_mutex_lock(&channel->metadata_cache->lock);
        ret = consumer_metadata_cache_write(channel, offset, len, metadata_str);
@@ -794,16 +785,18 @@ int lttng_ustconsumer_recv_metadata(int sock, uint64_t key, uint64_t offset,
                 */
                pthread_mutex_unlock(&channel->metadata_cache->lock);
                pthread_mutex_unlock(&channel->timer_lock);
-               pthread_mutex_unlock(&channel->lock);
-               pthread_mutex_unlock(&consumer_data.lock);
+               if (!timer) {
+                       pthread_mutex_unlock(&channel->lock);
+               }
                goto end_free;
        }
        pthread_mutex_unlock(&channel->metadata_cache->lock);
        pthread_mutex_unlock(&channel->timer_lock);
-       pthread_mutex_unlock(&channel->lock);
-       pthread_mutex_unlock(&consumer_data.lock);
+       if (!timer) {
+               pthread_mutex_unlock(&channel->lock);
+       }
 
-       while (consumer_metadata_cache_flushed(channel, offset + len)) {
+       while (consumer_metadata_cache_flushed(channel, offset + len, timer)) {
                DBG("Waiting for metadata to be flushed");
                usleep(DEFAULT_METADATA_AVAILABILITY_WAIT_TIME);
        }
@@ -1139,7 +1132,7 @@ int lttng_ustconsumer_recv_cmd(struct lttng_consumer_local_data *ctx,
                }
 
                ret = lttng_ustconsumer_recv_metadata(sock, key, offset,
-                               len, channel);
+                               len, channel, 0);
                if (ret < 0) {
                        /* error receiving from sessiond */
                        goto error_fatal;
@@ -1498,7 +1491,7 @@ void lttng_ustconsumer_close_stream_wakeup(struct lttng_consumer_stream *stream)
  * introduces deadlocks.
  */
 int lttng_ustconsumer_request_metadata(struct lttng_consumer_local_data *ctx,
-               struct lttng_consumer_channel *channel)
+               struct lttng_consumer_channel *channel, int timer)
 {
        struct lttcomm_metadata_request_msg request;
        struct lttcomm_consumer_msg msg;
@@ -1585,7 +1578,7 @@ int lttng_ustconsumer_request_metadata(struct lttng_consumer_local_data *ctx,
        }
 
        ret_code = lttng_ustconsumer_recv_metadata(ctx->consumer_metadata_socket,
-                       key, offset, len, channel);
+                       key, offset, len, channel, timer);
        if (ret_code >= 0) {
                /*
                 * Only send the status msg if the sessiond is alive meaning a positive
index 9f0b22ddeffa7e775503957b253e098ea535b490..4ad917ca9c3d9961fcc28e26b0a7d8b9ac31d333 100644 (file)
@@ -52,11 +52,12 @@ int lttng_ustconsumer_data_pending(struct lttng_consumer_stream *stream);
 void lttng_ustconsumer_close_metadata(struct lttng_ht *ht);
 void lttng_ustconsumer_close_stream_wakeup(struct lttng_consumer_stream *stream);
 int lttng_ustconsumer_recv_metadata(int sock, uint64_t key, uint64_t offset,
-               uint64_t len, struct lttng_consumer_channel *channel);
+               uint64_t len, struct lttng_consumer_channel *channel,
+               int timer);
 int lttng_ustconsumer_push_metadata(struct lttng_consumer_channel *metadata,
                const char *metadata_str, uint64_t target_offset, uint64_t len);
 int lttng_ustconsumer_request_metadata(struct lttng_consumer_local_data *ctx,
-               struct lttng_consumer_channel *channel);
+               struct lttng_consumer_channel *channel, int timer);
 
 #else /* HAVE_LIBLTTNG_UST_CTL */
 
@@ -164,7 +165,8 @@ void lttng_ustconsumer_close_stream_wakeup(struct lttng_consumer_stream *stream)
 }
 static inline
 int lttng_ustconsumer_recv_metadata(int sock, uint64_t key, uint64_t offset,
-               uint64_t len, struct lttng_consumer_channel *channel)
+               uint64_t len, struct lttng_consumer_channel *channel,
+               int timer)
 {
        return -ENOSYS;
 }
@@ -176,7 +178,7 @@ int lttng_ustconsumer_push_metadata(struct lttng_consumer_channel *metadata,
 }
 static inline
 int lttng_ustconsumer_request_metadata(struct lttng_consumer_local_data *ctx,
-               struct lttng_consumer_channel *channel)
+               struct lttng_consumer_channel *channel, int timer)
 {
        return -ENOSYS;
 }
This page took 0.029775 seconds and 4 git commands to generate.