Fix: join consumer timer thread
authorMathieu Desnoyers <mathieu.desnoyers@efficios.com>
Fri, 16 Jun 2017 21:23:13 +0000 (17:23 -0400)
committerJérémie Galarneau <jeremie.galarneau@efficios.com>
Tue, 20 Jun 2017 20:53:44 +0000 (16:53 -0400)
Detaching the timer thread has the unfortunate side-effect of letting
the health management data structures be freed by main() while the timer
thread may still be using them (if, e.g., main() exits quickly).

Overcome this situation by tearing down and joining the timer thread.

Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Signed-off-by: Jérémie Galarneau <jeremie.galarneau@efficios.com>
src/bin/lttng-consumerd/lttng-consumerd.c
src/common/consumer/consumer-timer.c
src/common/consumer/consumer-timer.h
src/common/consumer/consumer.h

index 9fb4747536ad7fb3551217f1f3d6aaa0198caae6..fef03c6f64c44060637934ceb5e873198ed70b80 100644 (file)
@@ -56,6 +56,7 @@
 
 static pthread_t channel_thread, data_thread, metadata_thread,
                sessiond_thread, metadata_timer_thread, health_thread;
+static bool metadata_timer_thread_online;
 
 /* to count the number of times the user pressed ctrl+c */
 static int sigintcount = 0;
@@ -506,6 +507,20 @@ int main(int argc, char **argv)
        }
        cmm_smp_mb();   /* Read ready before following operations */
 
+       /*
+        * Create the thread to manage the UST metadata periodic timer and
+        * live timer.
+        */
+       ret = pthread_create(&metadata_timer_thread, NULL,
+                       consumer_timer_thread, (void *) ctx);
+       if (ret) {
+               errno = ret;
+               PERROR("pthread_create");
+               retval = -1;
+               goto exit_metadata_timer_thread;
+       }
+       metadata_timer_thread_online = true;
+
        /* Create thread to manage channels */
        ret = pthread_create(&channel_thread, default_pthread_attr(),
                        consumer_thread_channel_poll,
@@ -549,34 +564,12 @@ int main(int argc, char **argv)
                goto exit_sessiond_thread;
        }
 
-       /*
-        * Create the thread to manage the UST metadata periodic timer and
-        * live timer.
-        */
-       ret = pthread_create(&metadata_timer_thread, default_pthread_attr(),
-                       consumer_timer_thread, (void *) ctx);
-       if (ret) {
-               errno = ret;
-               PERROR("pthread_create");
-               retval = -1;
-               goto exit_metadata_timer_thread;
-       }
-
-       ret = pthread_detach(metadata_timer_thread);
-       if (ret) {
-               errno = ret;
-               PERROR("pthread_detach");
-               retval = -1;
-               goto exit_metadata_timer_detach;
-       }
 
        /*
         * This is where we start awaiting program completion (e.g. through
         * signal that asks threads to teardown.
         */
 
-exit_metadata_timer_detach:
-exit_metadata_timer_thread:
        ret = pthread_join(sessiond_thread, &status);
        if (ret) {
                errno = ret;
@@ -617,6 +610,8 @@ exit_metadata_thread:
        }
 exit_channel_thread:
 
+exit_metadata_timer_thread:
+
        ret = pthread_join(health_thread, &status);
        if (ret) {
                errno = ret;
@@ -629,17 +624,38 @@ exit_health_thread:
 exit_health_pipe:
 
 exit_init_data:
-       tmp_ctx = ctx;
-       ctx = NULL;
-       cmm_barrier();  /* Clear ctx for signal handler. */
        /*
         * Wait for all pending call_rcu work to complete before tearing
         * down data structures. call_rcu worker may be trying to
         * perform lookups in those structures.
         */
        rcu_barrier();
-       lttng_consumer_destroy(tmp_ctx);
        lttng_consumer_cleanup();
+       /*
+        * Tearing down the metadata timer thread in a
+        * non-fully-symmetric fashion compared to its creation in case
+        * lttng_consumer_cleanup() ends up tearing down timers (which
+        * requires the timer thread to be alive).
+        */
+       if (metadata_timer_thread_online) {
+               /*
+                * Ensure the metadata timer thread exits only after all other
+                * threads are gone, because it is required to perform timer
+                * teardown synchronization.
+                */
+               kill(getpid(), LTTNG_CONSUMER_SIG_EXIT);
+               ret = pthread_join(metadata_timer_thread, &status);
+               if (ret) {
+                       errno = ret;
+                       PERROR("pthread_join metadata_timer_thread");
+                       retval = -1;
+               }
+               metadata_timer_thread_online = false;
+       }
+       tmp_ctx = ctx;
+       ctx = NULL;
+       cmm_barrier();  /* Clear ctx for signal handler. */
+       lttng_consumer_destroy(tmp_ctx);
 
        if (health_consumerd) {
                health_app_destroy(health_consumerd);
index 0adc5724efd6833ff22132989d45ccbde33193f7..e42940ed991fa60a6ec7e0e744ebd1033e203bf9 100644 (file)
@@ -72,6 +72,10 @@ static void setmask(sigset_t *mask)
        if (ret) {
                PERROR("sigaddset monitor");
        }
+       ret = sigaddset(mask, LTTNG_CONSUMER_SIG_EXIT);
+       if (ret) {
+               PERROR("sigaddset exit");
+       }
 }
 
 static int channel_monitor_pipe = -1;
@@ -782,7 +786,7 @@ end:
 /*
  * This thread is the sighandler for signals LTTNG_CONSUMER_SIG_SWITCH,
  * LTTNG_CONSUMER_SIG_TEARDOWN, LTTNG_CONSUMER_SIG_LIVE, and
- * LTTNG_CONSUMER_SIG_MONITOR.
+ * LTTNG_CONSUMER_SIG_MONITOR, LTTNG_CONSUMER_SIG_EXIT.
  */
 void *consumer_timer_thread(void *data)
 {
@@ -836,6 +840,9 @@ void *consumer_timer_thread(void *data)
 
                        channel = info.si_value.sival_ptr;
                        monitor_timer(ctx, channel);
+               } else if (signr == LTTNG_CONSUMER_SIG_EXIT) {
+                       assert(CMM_LOAD_SHARED(consumer_quit));
+                       goto end;
                } else {
                        ERR("Unexpected signal %d\n", info.si_signo);
                }
@@ -844,10 +851,8 @@ void *consumer_timer_thread(void *data)
 error_testpoint:
        /* Only reached in testpoint error */
        health_error();
+end:
        health_unregister(health_consumerd);
-
        rcu_unregister_thread();
-
-       /* Never return */
        return NULL;
 }
index 851a172aa13bfa4608138c951907c13c6f971549..1b5dd414c98f7c5bf897ddce4cfad21b17231fbc 100644 (file)
@@ -28,6 +28,7 @@
 #define LTTNG_CONSUMER_SIG_TEARDOWN    SIGRTMIN + 11
 #define LTTNG_CONSUMER_SIG_LIVE                SIGRTMIN + 12
 #define LTTNG_CONSUMER_SIG_MONITOR     SIGRTMIN + 13
+#define LTTNG_CONSUMER_SIG_EXIT                SIGRTMIN + 14
 
 #define CLOCKID CLOCK_MONOTONIC
 
index 220dbb4a294b0b608a88391bc72f08365d64fad5..3111471db2bfde3ce7cfb81c9f166ece3c544b3d 100644 (file)
@@ -610,6 +610,12 @@ struct lttng_consumer_global_data {
  */
 extern int consumer_quit;
 
+/*
+ * Set to nonzero when the consumer is exiting. Updated by signal
+ * handler and thread exit, read by threads.
+ */
+extern int consumer_quit;
+
 /* Flag used to temporarily pause data consumption from testpoints. */
 extern int data_consumption_paused;
 
This page took 0.035423 seconds and 4 git commands to generate.