From: Mathieu Desnoyers Date: Tue, 16 Jul 2013 13:53:33 +0000 (-0400) Subject: Fix deadlock: don't take channel lock in timer X-Git-Tag: v2.2.2~7 X-Git-Url: https://git.lttng.org./?a=commitdiff_plain;h=f0e1c78570c3a9f98489696672e1286fe79091fd;p=lttng-tools.git Fix deadlock: don't take channel lock in timer Reviewed-by: Julien Desfossez Signed-off-by: Mathieu Desnoyers --- diff --git a/src/common/consumer-metadata-cache.c b/src/common/consumer-metadata-cache.c index e21fd4c73..473ab231a 100644 --- a/src/common/consumer-metadata-cache.c +++ b/src/common/consumer-metadata-cache.c @@ -193,7 +193,7 @@ void consumer_metadata_cache_destroy(struct lttng_consumer_channel *channel) * Return 0 if everything has been flushed, 1 if there is data not flushed. */ int consumer_metadata_cache_flushed(struct lttng_consumer_channel *channel, - uint64_t offset) + uint64_t offset, int timer) { int ret; struct consumer_metadata_cache *cache; @@ -204,12 +204,15 @@ int consumer_metadata_cache_flushed(struct lttng_consumer_channel *channel, cache = channel->metadata_cache; /* - * XXX This consumer_data.lock should eventually be replaced by - * a channel lock. It protects metadata_stream read and endpoint - * status check. + * If not called from a timer handler, we have to take the + * channel lock to be mutually exclusive with channel teardown. + * Timer handler does not need to take this lock because it is + * already synchronized by timer stop (and, more importantly, + * taking this lock in a timer handler would cause a deadlock). */ - pthread_mutex_lock(&consumer_data.lock); - pthread_mutex_lock(&channel->lock); + if (!timer) { + pthread_mutex_lock(&channel->lock); + } pthread_mutex_lock(&channel->timer_lock); pthread_mutex_lock(&channel->metadata_cache->lock); @@ -232,8 +235,9 @@ int consumer_metadata_cache_flushed(struct lttng_consumer_channel *channel, pthread_mutex_unlock(&channel->metadata_cache->lock); pthread_mutex_unlock(&channel->timer_lock); - pthread_mutex_unlock(&channel->lock); - pthread_mutex_unlock(&consumer_data.lock); + if (!timer) { + pthread_mutex_unlock(&channel->lock); + } return ret; } diff --git a/src/common/consumer-metadata-cache.h b/src/common/consumer-metadata-cache.h index b1a4dc2b7..4a671b238 100644 --- a/src/common/consumer-metadata-cache.h +++ b/src/common/consumer-metadata-cache.h @@ -54,6 +54,6 @@ int consumer_metadata_cache_write(struct lttng_consumer_channel *channel, int consumer_metadata_cache_allocate(struct lttng_consumer_channel *channel); void consumer_metadata_cache_destroy(struct lttng_consumer_channel *channel); int consumer_metadata_cache_flushed(struct lttng_consumer_channel *channel, - uint64_t offset); + uint64_t offset, int timer); #endif /* CONSUMER_METADATA_CACHE_H */ diff --git a/src/common/consumer-timer.c b/src/common/consumer-timer.c index a32dbf64d..d6b5648b7 100644 --- a/src/common/consumer-timer.c +++ b/src/common/consumer-timer.c @@ -86,15 +86,15 @@ static void metadata_switch_timer(struct lttng_consumer_local_data *ctx, * - channel->lock * - channel->metadata_cache->lock * - Calling consumer_metadata_cache_flushed(): - * - consumer_data.lock - * - channel->lock - * - channel->metadata_cache->lock + * - channel->timer_lock + * - channel->metadata_cache->lock * - * Both consumer_data.lock and channel->lock currently - * cause a deadlock, since they are held while - * consumer_timer_switch_stop() is called. + * Ensure that neither consumer_data.lock nor + * channel->lock are taken within this function, since + * they are held while consumer_timer_switch_stop() is + * called. */ - ret = lttng_ustconsumer_request_metadata(ctx, channel); + ret = lttng_ustconsumer_request_metadata(ctx, channel, 1); if (ret < 0) { channel->switch_timer_error = 1; } diff --git a/src/common/consumer.c b/src/common/consumer.c index ce13a9835..43848bc5e 100644 --- a/src/common/consumer.c +++ b/src/common/consumer.c @@ -293,7 +293,6 @@ void consumer_del_channel(struct lttng_consumer_channel *channel) pthread_mutex_lock(&consumer_data.lock); pthread_mutex_lock(&channel->lock); - pthread_mutex_lock(&channel->timer_lock); switch (consumer_data.type) { case LTTNG_CONSUMER_KERNEL: @@ -323,7 +322,6 @@ void consumer_del_channel(struct lttng_consumer_channel *channel) call_rcu(&channel->node.head, free_channel_rcu); end: - pthread_mutex_unlock(&channel->timer_lock); pthread_mutex_unlock(&channel->lock); pthread_mutex_unlock(&consumer_data.lock); } @@ -1902,7 +1900,6 @@ void consumer_del_metadata_stream(struct lttng_consumer_stream *stream, pthread_mutex_lock(&consumer_data.lock); pthread_mutex_lock(&stream->chan->lock); - pthread_mutex_lock(&stream->chan->timer_lock); pthread_mutex_lock(&stream->lock); switch (consumer_data.type) { @@ -1991,13 +1988,12 @@ void consumer_del_metadata_stream(struct lttng_consumer_stream *stream, end: /* * Nullify the stream reference so it is not used after deletion. The - * consumer data lock MUST be acquired before being able to check for a - * NULL pointer value. + * channel lock MUST be acquired before being able to check for + * a NULL pointer value. */ stream->chan->metadata_stream = NULL; pthread_mutex_unlock(&stream->lock); - pthread_mutex_unlock(&stream->chan->timer_lock); pthread_mutex_unlock(&stream->chan->lock); pthread_mutex_unlock(&consumer_data.lock); diff --git a/src/common/ust-consumer/ust-consumer.c b/src/common/ust-consumer/ust-consumer.c index f9d0817c2..290eb3390 100644 --- a/src/common/ust-consumer/ust-consumer.c +++ b/src/common/ust-consumer/ust-consumer.c @@ -654,7 +654,6 @@ static int close_metadata(uint64_t chan_key) pthread_mutex_lock(&consumer_data.lock); pthread_mutex_lock(&channel->lock); - pthread_mutex_lock(&channel->timer_lock); if (cds_lfht_is_node_deleted(&channel->node.node)) { goto error_unlock; @@ -675,7 +674,6 @@ static int close_metadata(uint64_t chan_key) } error_unlock: - pthread_mutex_unlock(&channel->timer_lock); pthread_mutex_unlock(&channel->lock); pthread_mutex_unlock(&consumer_data.lock); error: @@ -748,7 +746,8 @@ error_find: * Receive the metadata updates from the sessiond. */ int lttng_ustconsumer_recv_metadata(int sock, uint64_t key, uint64_t offset, - uint64_t len, struct lttng_consumer_channel *channel) + uint64_t len, struct lttng_consumer_channel *channel, + int timer) { int ret, ret_code = LTTNG_OK; char *metadata_str; @@ -770,17 +769,9 @@ int lttng_ustconsumer_recv_metadata(int sock, uint64_t key, uint64_t offset, goto end_free; } - /* - * XXX: The consumer data lock is acquired before calling metadata cache - * write which calls push metadata that MUST be protected by the consumer - * lock in order to be able to check the validity of the metadata stream of - * the channel. - * - * Note that this will be subject to change to better fine grained locking - * and ultimately try to get rid of this global consumer data lock. - */ - pthread_mutex_lock(&consumer_data.lock); - pthread_mutex_lock(&channel->lock); + if (!timer) { + pthread_mutex_lock(&channel->lock); + } pthread_mutex_lock(&channel->timer_lock); pthread_mutex_lock(&channel->metadata_cache->lock); ret = consumer_metadata_cache_write(channel, offset, len, metadata_str); @@ -794,16 +785,18 @@ int lttng_ustconsumer_recv_metadata(int sock, uint64_t key, uint64_t offset, */ pthread_mutex_unlock(&channel->metadata_cache->lock); pthread_mutex_unlock(&channel->timer_lock); - pthread_mutex_unlock(&channel->lock); - pthread_mutex_unlock(&consumer_data.lock); + if (!timer) { + pthread_mutex_unlock(&channel->lock); + } goto end_free; } pthread_mutex_unlock(&channel->metadata_cache->lock); pthread_mutex_unlock(&channel->timer_lock); - pthread_mutex_unlock(&channel->lock); - pthread_mutex_unlock(&consumer_data.lock); + if (!timer) { + pthread_mutex_unlock(&channel->lock); + } - while (consumer_metadata_cache_flushed(channel, offset + len)) { + while (consumer_metadata_cache_flushed(channel, offset + len, timer)) { DBG("Waiting for metadata to be flushed"); usleep(DEFAULT_METADATA_AVAILABILITY_WAIT_TIME); } @@ -1139,7 +1132,7 @@ int lttng_ustconsumer_recv_cmd(struct lttng_consumer_local_data *ctx, } ret = lttng_ustconsumer_recv_metadata(sock, key, offset, - len, channel); + len, channel, 0); if (ret < 0) { /* error receiving from sessiond */ goto error_fatal; @@ -1498,7 +1491,7 @@ void lttng_ustconsumer_close_stream_wakeup(struct lttng_consumer_stream *stream) * introduces deadlocks. */ int lttng_ustconsumer_request_metadata(struct lttng_consumer_local_data *ctx, - struct lttng_consumer_channel *channel) + struct lttng_consumer_channel *channel, int timer) { struct lttcomm_metadata_request_msg request; struct lttcomm_consumer_msg msg; @@ -1585,7 +1578,7 @@ int lttng_ustconsumer_request_metadata(struct lttng_consumer_local_data *ctx, } ret_code = lttng_ustconsumer_recv_metadata(ctx->consumer_metadata_socket, - key, offset, len, channel); + key, offset, len, channel, timer); if (ret_code >= 0) { /* * Only send the status msg if the sessiond is alive meaning a positive diff --git a/src/common/ust-consumer/ust-consumer.h b/src/common/ust-consumer/ust-consumer.h index 9f0b22dde..4ad917ca9 100644 --- a/src/common/ust-consumer/ust-consumer.h +++ b/src/common/ust-consumer/ust-consumer.h @@ -52,11 +52,12 @@ int lttng_ustconsumer_data_pending(struct lttng_consumer_stream *stream); void lttng_ustconsumer_close_metadata(struct lttng_ht *ht); void lttng_ustconsumer_close_stream_wakeup(struct lttng_consumer_stream *stream); int lttng_ustconsumer_recv_metadata(int sock, uint64_t key, uint64_t offset, - uint64_t len, struct lttng_consumer_channel *channel); + uint64_t len, struct lttng_consumer_channel *channel, + int timer); int lttng_ustconsumer_push_metadata(struct lttng_consumer_channel *metadata, const char *metadata_str, uint64_t target_offset, uint64_t len); int lttng_ustconsumer_request_metadata(struct lttng_consumer_local_data *ctx, - struct lttng_consumer_channel *channel); + struct lttng_consumer_channel *channel, int timer); #else /* HAVE_LIBLTTNG_UST_CTL */ @@ -164,7 +165,8 @@ void lttng_ustconsumer_close_stream_wakeup(struct lttng_consumer_stream *stream) } static inline int lttng_ustconsumer_recv_metadata(int sock, uint64_t key, uint64_t offset, - uint64_t len, struct lttng_consumer_channel *channel) + uint64_t len, struct lttng_consumer_channel *channel, + int timer) { return -ENOSYS; } @@ -176,7 +178,7 @@ int lttng_ustconsumer_push_metadata(struct lttng_consumer_channel *metadata, } static inline int lttng_ustconsumer_request_metadata(struct lttng_consumer_local_data *ctx, - struct lttng_consumer_channel *channel) + struct lttng_consumer_channel *channel, int timer) { return -ENOSYS; }