notification-thread: drain all tracer notification on removal
authorFrancis Deslauriers <francis.deslauriers@efficios.com>
Thu, 10 Dec 2020 20:41:29 +0000 (15:41 -0500)
committerJérémie Galarneau <jeremie.galarneau@efficios.com>
Tue, 9 Mar 2021 03:55:47 +0000 (22:55 -0500)
We also force the restart of the poll loop to ensure a valid poll set
state on handle_notification_thread_command_remove_tracer_event_source.

Signed-off-by: Francis Deslauriers <francis.deslauriers@efficios.com>
Signed-off-by: Jérémie Galarneau <jeremie.galarneau@efficios.com>
Change-Id: I0c6b94309ddcf690d2e7603d5c718076e213095c

src/bin/lttng-sessiond/notification-thread-events.c
src/bin/lttng-sessiond/notification-thread.c
src/bin/lttng-sessiond/notification-thread.h

index 823f2a7eb3b2d53a4642844738ebd808445e2f26..6bd799279b3ff6cead92c77bb2553c6117ed5afc 100644 (file)
@@ -193,6 +193,11 @@ int client_handle_transmission_status(
                enum client_transmission_status transmission_status,
                struct notification_thread_state *state);
 
+static
+int handle_one_event_notifier_notification(
+               struct notification_thread_state *state,
+               int pipe, enum lttng_domain_type domain);
+
 static
 void free_lttng_trigger_ht_element_rcu(struct rcu_head *node);
 
@@ -1980,6 +1985,59 @@ end:
        return ret;
 }
 
+static
+int drain_event_notifier_notification_pipe(
+               struct notification_thread_state *state,
+               int pipe, enum lttng_domain_type domain)
+{
+       struct lttng_poll_event events = {0};
+       int ret;
+
+       ret = lttng_poll_create(&events, 1, LTTNG_CLOEXEC);
+       if (ret < 0) {
+               ERR("[notification-thread] Error creating lttng_poll_event");
+               goto end;
+       }
+
+       ret = lttng_poll_add(&events, pipe, LPOLLIN);
+       if (ret < 0) {
+               ERR("[notification-thread] Error adding fd event notifier notification pipe to lttng_poll_event: fd = %d",
+                               pipe);
+               goto end;
+       }
+
+       while (true) {
+               /*
+                * Continue to consume notifications as long as there are new
+                * ones coming in. The tracer has been asked to stop producing
+                * them.
+                *
+                * LPOLLIN is explicitly checked since LPOLLHUP is implicitly
+                * monitored (on Linux, at least) and will be returned when
+                * the pipe is closed but empty.
+                */
+               ret = lttng_poll_wait_interruptible(&events, 0);
+               if (ret == 0 || (LTTNG_POLL_GETEV(&events, 1) & LPOLLIN) == 0) {
+                       /* No more notification to be read on this pipe. */
+                       ret = 0;
+                       goto end;
+               } else if (ret < 0) {
+                       PERROR("Failed on lttng_poll_wait_interruptible() call");
+                       ret = -1;
+                       goto end;
+               }
+
+               ret = handle_one_event_notifier_notification(state, pipe, domain);
+               if (ret) {
+                       ERR("[notification-thread] Error consuming an event notifier notification from pipe: fd = %d",
+                                       pipe);
+               }
+       }
+end:
+       lttng_poll_clean(&events);
+       return ret;
+}
+
 static
 int handle_notification_thread_command_remove_tracer_event_source(
                struct notification_thread_state *state,
@@ -2036,6 +2094,24 @@ int handle_notification_thread_command_remove_tracer_event_source(
 
        source_element->is_fd_in_poll_set = false;
 
+       ret = drain_event_notifier_notification_pipe(state, tracer_event_source_fd,
+                       source_element->domain);
+       if (ret) {
+               ERR("[notification-thread] Error draining event notifier notification: tracer_event_source_fd = %d, domain = %s",
+                               tracer_event_source_fd,
+                               lttng_domain_type_str(source_element->domain));
+               cmd_result = LTTNG_ERR_FATAL;
+               goto end;
+       }
+
+       /*
+        * The drain_event_notifier_notification_pipe() call might have read
+        * data from an fd that we received in event in the latest _poll_wait()
+        * call. Make sure the thread call poll_wait() again to ensure we have
+        * a clean state.
+        */
+       state->restart_poll = true;
+
 end:
        free(source_element);
        *_cmd_result = cmd_result;
@@ -4161,7 +4237,8 @@ end:
        return ret;
 }
 
-static struct lttng_event_notifier_notification *receive_notification(
+static
+struct lttng_event_notifier_notification *recv_one_event_notifier_notification(
                int notification_pipe_read_fd, enum lttng_domain_type domain)
 {
        int ret;
@@ -4217,28 +4294,19 @@ end:
        return notification;
 }
 
-int handle_notification_thread_event_notification(struct notification_thread_state *state,
-               int pipe,
-               enum lttng_domain_type domain)
+static
+int dispatch_one_event_notifier_notification(struct notification_thread_state *state,
+               struct lttng_event_notifier_notification *notification)
 {
-       int ret;
-       enum lttng_trigger_status trigger_status;
        struct cds_lfht_node *node;
        struct cds_lfht_iter iter;
        struct notification_trigger_tokens_ht_element *element;
+       enum lttng_trigger_status trigger_status;
        struct lttng_evaluation *evaluation = NULL;
-       struct lttng_event_notifier_notification *notification = NULL;
        enum action_executor_status executor_status;
        struct notification_client_list *client_list = NULL;
        const char *trigger_name;
-
-       notification = receive_notification(pipe, domain);
-       if (notification == NULL) {
-               ERR("[notification-thread] Error receiving notification from tracer (fd = %i, domain = %s)",
-                               pipe, lttng_domain_type_str(domain));
-               ret = -1;
-               goto end;
-       }
+       int ret;
 
        /* Find triggers associated with this token. */
        rcu_read_lock();
@@ -4347,13 +4415,45 @@ next_client:
        }
 
 end_unlock:
-       lttng_event_notifier_notification_destroy(notification);
        notification_client_list_put(client_list);
        rcu_read_unlock();
+       return ret;
+}
+
+static
+int handle_one_event_notifier_notification(
+               struct notification_thread_state *state,
+               int pipe, enum lttng_domain_type domain)
+{
+       int ret;
+       struct lttng_event_notifier_notification *notification = NULL;
+
+       notification = recv_one_event_notifier_notification(pipe, domain);
+       if (notification == NULL) {
+               ERR("[notification-thread] Error receiving an event notifier notification from tracer: fd = %i, domain = %s",
+                               pipe, lttng_domain_type_str(domain));
+               ret = -1;
+               goto end;
+       }
+
+       ret = dispatch_one_event_notifier_notification(state, notification);
+       if (ret) {
+               ERR("[notification-thread] Error dispatching an event notifier notification from tracer: fd = %i, domain = %s",
+                               pipe, lttng_domain_type_str(domain));
+               goto end;
+       }
+
 end:
+       lttng_event_notifier_notification_destroy(notification);
        return ret;
 }
 
+int handle_notification_thread_event_notification(struct notification_thread_state *state,
+               int pipe, enum lttng_domain_type domain)
+{
+       return handle_one_event_notifier_notification(state, pipe, domain);
+}
+
 int handle_notification_thread_channel_sample(
                struct notification_thread_state *state, int pipe,
                enum lttng_domain_type domain)
index 1ba724337b67ee69a1856087a5c8b65c45a32cd8..dd1dc3e8fe50753cfe7ac10978a6f6898df25dd3 100644 (file)
@@ -510,6 +510,9 @@ int init_thread_state(struct notification_thread_handle *handle,
        if (!state->executor) {
                goto error;
        }
+
+       state->restart_poll = false;
+
        mark_thread_as_ready(handle);
 end:
        return 0;
@@ -656,6 +659,12 @@ void *thread_notification(void *data)
                        goto error;
                }
 
+               /*
+                * Reset restart_poll flag so that calls below might turn it
+                * on.
+                */
+               state.restart_poll = false;
+
                fd_count = ret;
                for (i = 0; i < fd_count; i++) {
                        int fd = LTTNG_POLL_GETFD(&state.events, i);
@@ -733,6 +742,15 @@ void *thread_notification(void *data)
                                        }
                                }
                        }
+
+                       /*
+                        * Calls above might have changed the state of the
+                        * FDs in `state.events`. Call _poll_wait() again to
+                        * ensure we have a consistent state.
+                        */
+                       if (state.restart_poll) {
+                               break;
+                       }
                }
        }
 exit:
index 83a5bea79e175240b462e0703c743220ee98ba02..8b4c7af2b0326dec1126ea2767cc1ffcf0e9761c 100644 (file)
@@ -285,6 +285,31 @@ struct notification_thread_state {
        struct cds_list_head tracer_event_sources_list;
        notification_client_id next_notification_client_id;
        struct action_executor *executor;
+
+       /*
+        * Indicates the thread to break for the poll event processing loop and
+        * call _poll_wait() again.
+        *
+        * This is necessary because some events on one fd might trigger the
+        * consumption of another fd.
+        * For example, a single _poll_wait() call can return notification
+        * thread commands and events from the tracer event source (event
+        * notifier).
+        * Picture a scenario where we receive two events:
+        *  the first one is a _REMOVE_TRACER_EVENT_SOURCE command, and
+        *  the second is an POLLIN on the tracer event source fd.
+        *
+        * The _REMOVE_TRACER_EVENT_SOURCE will read all the data of the
+        * removed tracer event source.
+        *
+        * The second event is now invalid has we consumed all the data for
+        * which we received the POLLIN.
+        *
+        * For this reason, we need to break for the event processing loop and
+        * call _poll_wait() again to get a clean view of the activity on the
+        * fds.
+        */
+       bool restart_poll;
 };
 
 /* notification_thread_data takes ownership of the channel monitor pipes. */
This page took 0.031542 seconds and 4 git commands to generate.