From: Mathieu Desnoyers Date: Fri, 1 Nov 2019 20:23:05 +0000 (-0400) Subject: Fix: sessiond: ust: deadlock with per-pid buffers X-Git-Tag: v2.11.1~46 X-Git-Url: https://git.lttng.org./?a=commitdiff_plain;h=7ca90d1f9f3b3098fda2bff016cd996cdbe7bd1f;p=lttng-tools.git Fix: sessiond: ust: deadlock with per-pid buffers Do not hold the registry lock while communicating with the consumerd, because doing so causes inter-process deadlocks between consumerd and sessiond with the metadata request notification. The deadlock involves both sessiond and consumerd: * lttng-sessiond: thread 11 - thread_application_management close_metadata() pthread_mutex_lock(®istry->lock); consumer_close_metadata() pthread_mutex_lock(socket->lock); thread 15 - thread_consumer_management ust_consumer_metadata_request() pthread_mutex_lock(&ust_reg->lock); thread 8 - thread_manage_clients consumer_is_data_pending pthread_mutex_lock(socket->lock); consumer_socket_recv() * lttng-consumerd: thread 4 - consumer_timer_thread sample_channel_positions() pthread_mutex_lock(&stream->lock); thread 8 - consumer_thread_sessiond_poll consumer_data_pending pthread_mutex_lock(&consumer_data.lock); pthread_mutex_lock(&stream->lock); thread 7 - consumer_thread_data_poll lttng_consumer_read_subbuffer pthread_mutex_lock(&stream->chan->lock); pthread_mutex_lock(&stream->lock); do_sync_metadata pthread_mutex_lock(&metadata->lock); lttng_ustconsumer_sync_metadata pthread_mutex_unlock(&metadata_stream->lock); lttng_ustconsumer_request_metadata() pthread_mutex_lock(&ctx->metadata_socket_lock); lttcomm_recv_unix_sock() Signed-off-by: Mathieu Desnoyers Signed-off-by: Jérémie Galarneau --- diff --git a/src/bin/lttng-sessiond/ust-app.c b/src/bin/lttng-sessiond/ust-app.c index 048dff430..d130f116a 100644 --- a/src/bin/lttng-sessiond/ust-app.c +++ b/src/bin/lttng-sessiond/ust-app.c @@ -741,6 +741,10 @@ error: * nullified. The session lock MUST be held unless the application is * in the destroy path. * + * Do not hold the registry lock while communicating with the consumerd, because + * doing so causes inter-process deadlocks between consumerd and sessiond with + * the metadata request notification. + * * Return 0 on success else a negative value. */ static int close_metadata(struct ust_registry_session *registry, @@ -748,6 +752,8 @@ static int close_metadata(struct ust_registry_session *registry, { int ret; struct consumer_socket *socket; + uint64_t metadata_key; + bool registry_was_already_closed; assert(registry); assert(consumer); @@ -755,8 +761,19 @@ static int close_metadata(struct ust_registry_session *registry, rcu_read_lock(); pthread_mutex_lock(®istry->lock); + metadata_key = registry->metadata_key; + registry_was_already_closed = registry->metadata_closed; + if (metadata_key != 0) { + /* + * Metadata closed. Even on error this means that the consumer + * is not responding or not found so either way a second close + * should NOT be emit for this registry. + */ + registry->metadata_closed = 1; + } + pthread_mutex_unlock(®istry->lock); - if (!registry->metadata_key || registry->metadata_closed) { + if (metadata_key == 0 || registry_was_already_closed) { ret = 0; goto end; } @@ -766,23 +783,15 @@ static int close_metadata(struct ust_registry_session *registry, consumer); if (!socket) { ret = -1; - goto error; + goto end; } - ret = consumer_close_metadata(socket, registry->metadata_key); + ret = consumer_close_metadata(socket, metadata_key); if (ret < 0) { - goto error; + goto end; } -error: - /* - * Metadata closed. Even on error this means that the consumer is not - * responding or not found so either way a second close should NOT be emit - * for this registry. - */ - registry->metadata_closed = 1; end: - pthread_mutex_unlock(®istry->lock); rcu_read_unlock(); return ret; }