From: Mathieu Desnoyers Date: Tue, 16 Jul 2013 13:53:33 +0000 (-0400) Subject: Fix deadlock: don't take channel lock in timer X-Git-Tag: v2.3.0-rc1~11 X-Git-Url: https://git.lttng.org./?a=commitdiff_plain;h=5e41ebe1bcc918cb5ff5d97489ccfbae0b7086a3;p=lttng-tools.git Fix deadlock: don't take channel lock in timer Reviewed-by: Julien Desfossez Signed-off-by: Mathieu Desnoyers --- diff --git a/src/common/consumer-metadata-cache.c b/src/common/consumer-metadata-cache.c index 5967a8eb2..d597e64e3 100644 --- a/src/common/consumer-metadata-cache.c +++ b/src/common/consumer-metadata-cache.c @@ -190,7 +190,7 @@ void consumer_metadata_cache_destroy(struct lttng_consumer_channel *channel) * Return 0 if everything has been flushed, 1 if there is data not flushed. */ int consumer_metadata_cache_flushed(struct lttng_consumer_channel *channel, - uint64_t offset) + uint64_t offset, int timer) { int ret = 0; struct lttng_consumer_stream *metadata_stream; @@ -199,12 +199,15 @@ int consumer_metadata_cache_flushed(struct lttng_consumer_channel *channel, assert(channel->metadata_cache); /* - * XXX This consumer_data.lock should eventually be replaced by - * a channel lock. It protects metadata_stream read and endpoint - * status check. + * If not called from a timer handler, we have to take the + * channel lock to be mutually exclusive with channel teardown. + * Timer handler does not need to take this lock because it is + * already synchronized by timer stop (and, more importantly, + * taking this lock in a timer handler would cause a deadlock). */ - pthread_mutex_lock(&consumer_data.lock); - pthread_mutex_lock(&channel->lock); + if (!timer) { + pthread_mutex_lock(&channel->lock); + } pthread_mutex_lock(&channel->timer_lock); pthread_mutex_lock(&channel->metadata_cache->lock); @@ -229,8 +232,9 @@ int consumer_metadata_cache_flushed(struct lttng_consumer_channel *channel, pthread_mutex_unlock(&channel->metadata_cache->lock); pthread_mutex_unlock(&channel->timer_lock); - pthread_mutex_unlock(&channel->lock); - pthread_mutex_unlock(&consumer_data.lock); + if (!timer) { + pthread_mutex_unlock(&channel->lock); + } return ret; } diff --git a/src/common/consumer-metadata-cache.h b/src/common/consumer-metadata-cache.h index 8f485d639..aaf9f24d2 100644 --- a/src/common/consumer-metadata-cache.h +++ b/src/common/consumer-metadata-cache.h @@ -54,6 +54,6 @@ int consumer_metadata_cache_write(struct lttng_consumer_channel *channel, int consumer_metadata_cache_allocate(struct lttng_consumer_channel *channel); void consumer_metadata_cache_destroy(struct lttng_consumer_channel *channel); int consumer_metadata_cache_flushed(struct lttng_consumer_channel *channel, - uint64_t offset); + uint64_t offset, int timer); #endif /* CONSUMER_METADATA_CACHE_H */ diff --git a/src/common/consumer-stream.c b/src/common/consumer-stream.c index 02887fcc4..717e0a735 100644 --- a/src/common/consumer-stream.c +++ b/src/common/consumer-stream.c @@ -281,7 +281,6 @@ void consumer_stream_destroy(struct lttng_consumer_stream *stream, if (stream->globally_visible) { pthread_mutex_lock(&consumer_data.lock); pthread_mutex_lock(&stream->chan->lock); - pthread_mutex_lock(&stream->chan->timer_lock); pthread_mutex_lock(&stream->lock); /* Remove every reference of the stream in the consumer. */ consumer_stream_delete(stream, ht); @@ -295,7 +294,6 @@ void consumer_stream_destroy(struct lttng_consumer_stream *stream, consumer_data.need_update = 1; pthread_mutex_unlock(&stream->lock); - pthread_mutex_unlock(&stream->chan->timer_lock); pthread_mutex_unlock(&stream->chan->lock); pthread_mutex_unlock(&consumer_data.lock); } else { diff --git a/src/common/consumer-timer.c b/src/common/consumer-timer.c index a7f4536a2..37c9861b9 100644 --- a/src/common/consumer-timer.c +++ b/src/common/consumer-timer.c @@ -84,15 +84,15 @@ static void metadata_switch_timer(struct lttng_consumer_local_data *ctx, * - Calling lttng_ustconsumer_recv_metadata(): * - channel->metadata_cache->lock * - Calling consumer_metadata_cache_flushed(): - * - consumer_data.lock - * - channel->lock - * - channel->metadata_cache->lock + * - channel->timer_lock + * - channel->metadata_cache->lock * - * Both consumer_data.lock and channel->lock currently - * cause a deadlock, since they are held while - * consumer_timer_switch_stop() is called. + * Ensure that neither consumer_data.lock nor + * channel->lock are taken within this function, since + * they are held while consumer_timer_switch_stop() is + * called. */ - ret = lttng_ustconsumer_request_metadata(ctx, channel); + ret = lttng_ustconsumer_request_metadata(ctx, channel, 1); if (ret < 0) { channel->switch_timer_error = 1; } diff --git a/src/common/consumer.c b/src/common/consumer.c index a0fb2384c..6cf85d96a 100644 --- a/src/common/consumer.c +++ b/src/common/consumer.c @@ -292,7 +292,6 @@ void consumer_del_channel(struct lttng_consumer_channel *channel) pthread_mutex_lock(&consumer_data.lock); pthread_mutex_lock(&channel->lock); - pthread_mutex_lock(&channel->timer_lock); /* Delete streams that might have been left in the stream list. */ cds_list_for_each_entry_safe(stream, stmp, &channel->streams.head, @@ -326,7 +325,6 @@ void consumer_del_channel(struct lttng_consumer_channel *channel) call_rcu(&channel->node.head, free_channel_rcu); end: - pthread_mutex_unlock(&channel->timer_lock); pthread_mutex_unlock(&channel->lock); pthread_mutex_unlock(&consumer_data.lock); } @@ -1869,7 +1867,6 @@ void consumer_del_metadata_stream(struct lttng_consumer_stream *stream, pthread_mutex_lock(&consumer_data.lock); pthread_mutex_lock(&stream->chan->lock); - pthread_mutex_lock(&stream->chan->timer_lock); pthread_mutex_lock(&stream->lock); switch (consumer_data.type) { @@ -1964,13 +1961,12 @@ void consumer_del_metadata_stream(struct lttng_consumer_stream *stream, end: /* * Nullify the stream reference so it is not used after deletion. The - * consumer data lock MUST be acquired before being able to check for a - * NULL pointer value. + * channel lock MUST be acquired before being able to check for + * a NULL pointer value. */ stream->chan->metadata_stream = NULL; pthread_mutex_unlock(&stream->lock); - pthread_mutex_unlock(&stream->chan->timer_lock); pthread_mutex_unlock(&stream->chan->lock); pthread_mutex_unlock(&consumer_data.lock); diff --git a/src/common/ust-consumer/ust-consumer.c b/src/common/ust-consumer/ust-consumer.c index 61b220ee0..f8c02ee72 100644 --- a/src/common/ust-consumer/ust-consumer.c +++ b/src/common/ust-consumer/ust-consumer.c @@ -614,7 +614,6 @@ static int close_metadata(uint64_t chan_key) pthread_mutex_lock(&consumer_data.lock); pthread_mutex_lock(&channel->lock); - pthread_mutex_lock(&channel->timer_lock); if (cds_lfht_is_node_deleted(&channel->node.node)) { goto error_unlock; @@ -642,7 +641,6 @@ static int close_metadata(uint64_t chan_key) } error_unlock: - pthread_mutex_unlock(&channel->timer_lock); pthread_mutex_unlock(&channel->lock); pthread_mutex_unlock(&consumer_data.lock); error: @@ -759,7 +757,7 @@ static int snapshot_metadata(uint64_t key, char *path, uint64_t relayd_id, * Ask the sessiond if we have new metadata waiting and update the * consumer metadata cache. */ - ret = lttng_ustconsumer_request_metadata(ctx, metadata_channel); + ret = lttng_ustconsumer_request_metadata(ctx, metadata_channel, 0); if (ret < 0) { goto error; } @@ -982,7 +980,8 @@ error: * Receive the metadata updates from the sessiond. */ int lttng_ustconsumer_recv_metadata(int sock, uint64_t key, uint64_t offset, - uint64_t len, struct lttng_consumer_channel *channel) + uint64_t len, struct lttng_consumer_channel *channel, + int timer) { int ret, ret_code = LTTNG_OK; char *metadata_str; @@ -1019,7 +1018,7 @@ int lttng_ustconsumer_recv_metadata(int sock, uint64_t key, uint64_t offset, } pthread_mutex_unlock(&channel->metadata_cache->lock); - while (consumer_metadata_cache_flushed(channel, offset + len)) { + while (consumer_metadata_cache_flushed(channel, offset + len, timer)) { DBG("Waiting for metadata to be flushed"); usleep(DEFAULT_METADATA_AVAILABILITY_WAIT_TIME); } @@ -1354,7 +1353,7 @@ int lttng_ustconsumer_recv_cmd(struct lttng_consumer_local_data *ctx, } ret = lttng_ustconsumer_recv_metadata(sock, key, offset, - len, channel); + len, channel, 0); if (ret < 0) { /* error receiving from sessiond */ goto error_fatal; @@ -1813,7 +1812,7 @@ void lttng_ustconsumer_close_stream_wakeup(struct lttng_consumer_stream *stream) * introduces deadlocks. */ int lttng_ustconsumer_request_metadata(struct lttng_consumer_local_data *ctx, - struct lttng_consumer_channel *channel) + struct lttng_consumer_channel *channel, int timer) { struct lttcomm_metadata_request_msg request; struct lttcomm_consumer_msg msg; @@ -1901,7 +1900,7 @@ int lttng_ustconsumer_request_metadata(struct lttng_consumer_local_data *ctx, } ret_code = lttng_ustconsumer_recv_metadata(ctx->consumer_metadata_socket, - key, offset, len, channel); + key, offset, len, channel, timer); if (ret_code >= 0) { /* * Only send the status msg if the sessiond is alive meaning a positive diff --git a/src/common/ust-consumer/ust-consumer.h b/src/common/ust-consumer/ust-consumer.h index e65acac0b..c10cd13a2 100644 --- a/src/common/ust-consumer/ust-consumer.h +++ b/src/common/ust-consumer/ust-consumer.h @@ -54,9 +54,10 @@ int lttng_ustconsumer_data_pending(struct lttng_consumer_stream *stream); void lttng_ustconsumer_close_metadata(struct lttng_ht *ht); void lttng_ustconsumer_close_stream_wakeup(struct lttng_consumer_stream *stream); int lttng_ustconsumer_recv_metadata(int sock, uint64_t key, uint64_t offset, - uint64_t len, struct lttng_consumer_channel *channel); + uint64_t len, struct lttng_consumer_channel *channel, + int timer); int lttng_ustconsumer_request_metadata(struct lttng_consumer_local_data *ctx, - struct lttng_consumer_channel *channel); + struct lttng_consumer_channel *channel, int timer); #else /* HAVE_LIBLTTNG_UST_CTL */ @@ -164,13 +165,14 @@ void lttng_ustconsumer_close_stream_wakeup(struct lttng_consumer_stream *stream) } static inline int lttng_ustconsumer_recv_metadata(int sock, uint64_t key, uint64_t offset, - uint64_t len, struct lttng_consumer_channel *channel) + uint64_t len, struct lttng_consumer_channel *channel, + int timer) { return -ENOSYS; } static inline int lttng_ustconsumer_request_metadata(struct lttng_consumer_local_data *ctx, - struct lttng_consumer_channel *channel) + struct lttng_consumer_channel *channel, int timer) { return -ENOSYS; }