logging: print human-readable thread names when logging
authorJérémie Galarneau <jeremie.galarneau@efficios.com>
Tue, 11 Feb 2020 23:22:13 +0000 (18:22 -0500)
committerJérémie Galarneau <jeremie.galarneau@efficios.com>
Tue, 11 Aug 2020 20:50:22 +0000 (16:50 -0400)
The lttng_thread interface used by the session daemon uniquely names
all threads. This name can be used to augment the thread's logging
statement with a human-readable name rather than using the pid/tid
tuple used elsewhere.

Additionally, the thread name is set using the pthread API so that it
is visible in GDB and other tools (e.g. htop).

Invocations of pgrep in the test utilities are replaced by 'pgrep -f',
which matches against the process name.

We are not the first to encounter this problem after renaming the main
thread, see
https://github.com/mongodb/mongo/commit/726cafd713c7333640f8458ec9808ed4f678e3a7#diff-a9003101d1e4a99ac2d43d9b1b839587R122

pgrep uses the name name in /proc/$PID/status which contains the
thread name, not the executable name. In the case of the sessiond,
this is now "Main".

Signed-off-by: Jérémie Galarneau <jeremie.galarneau@efficios.com>
Change-Id: I73dfe8683b2ea31f7ed0c2ffdfa8332f36e28f9b

src/bin/lttng-sessiond/main.c
src/bin/lttng-sessiond/thread.c
src/common/error.c
src/common/error.h
tests/utils/utils.sh

index a62719a1281fa5f508ac1c8b7fdc019357c73935..4bdac5217978d28dd2af6c036d66f0650615a9bf 100644 (file)
@@ -1302,6 +1302,7 @@ int main(int argc, char **argv)
        struct lttng_thread *notification_thread = NULL;
        struct lttng_thread *register_apps_thread = NULL;
 
+       logger_set_thread_name("Main", false);
        init_kernel_workarounds();
 
        rcu_register_thread();
index ed090f6d6b09d65570c4fced29f951d871e1c1a3..26661a36f2aae08c0b21426b9fa343ac9f820a61 100644 (file)
@@ -61,9 +61,10 @@ void *launch_thread(void *data)
        void *ret;
        struct lttng_thread *thread = (struct lttng_thread *) data;
 
-       DBG("Launching \"%s\" thread", thread->name);
+       logger_set_thread_name(thread->name, true);
+       DBG("Entering thread entry point");
        ret = thread->entry(thread->data);
-       DBG("Thread \"%s\" has returned", thread->name);
+       DBG("Thread entry point has returned");
        return ret;
 }
 
index 70063e5044400ab521f8e9b1383728f1fb71e359..82190b9fd527cea831f61e9620b43f73d1984c17 100644 (file)
@@ -7,14 +7,15 @@
 
 #define _LGPL_SOURCE
 #include <assert.h>
+#include <errno.h>
 #include <inttypes.h>
+#include <pthread.h>
 #include <stdlib.h>
 #include <string.h>
-#include <errno.h>
 
-#include <lttng/lttng-error.h>
 #include <common/common.h>
 #include <common/compat/getenv.h>
+#include <lttng/lttng-error.h>
 
 #include "error.h"
 
@@ -28,6 +29,7 @@ static int lttng_opt_abort_on_error = -1;
 
 /* TLS variable that contains the time of one single log entry. */
 DEFINE_URCU_TLS(struct log_time, error_log_time);
+DEFINE_URCU_TLS(const char *, logger_thread_name);
 
 LTTNG_HIDDEN
 const char *log_add_time(void)
@@ -66,6 +68,30 @@ error:
        return "";
 }
 
+LTTNG_HIDDEN
+void logger_set_thread_name(const char *name, bool set_pthread_name)
+{
+       int ret;
+
+       assert(name);
+       URCU_TLS(logger_thread_name) = name;
+
+       if (set_pthread_name) {
+               char pthread_name[16];
+
+               /*
+                * Truncations are expected since pthread limits thread names to
+                * a generous 16 characters.
+                */
+               strncpy(pthread_name, name, sizeof(pthread_name));
+               pthread_name[sizeof(pthread_name) - 1] = '\0';
+               ret = pthread_setname_np(pthread_self(), pthread_name);
+               if (ret) {
+                       DBG("Failed to set pthread name attribute");
+               }
+       }
+}
+
 /*
  * Human readable error message.
  */
index 33a299c4e4952e3882bdbc457e0bd55c042982bc..ea00ea5f96f7b81eb5bfb045e9ae893e5303644d 100644 (file)
@@ -15,6 +15,8 @@
 #include <stdbool.h>
 #include <urcu/tls-compat.h>
 #include <common/compat/time.h>
+#include <common/string-utils/format.h>
+#include <common/macros.h>
 
 #ifndef _GNU_SOURCE
 #error "lttng-tools error.h needs _GNU_SOURCE"
@@ -42,6 +44,7 @@ struct log_time {
        char str[19];
 };
 extern DECLARE_URCU_TLS(struct log_time, error_log_time);
+extern DECLARE_URCU_TLS(const char *, logger_thread_name);
 
 extern int lttng_opt_quiet;
 extern int lttng_opt_verbose;
@@ -139,13 +142,44 @@ static inline void __lttng_print_check_abort(enum lttng_error_level type)
        } while (0)
 
 /* Three level of debug. Use -v, -vv or -vvv for the levels */
-#define _ERRMSG(msg, type, fmt, args...) __lttng_print(type, msg \
-               " - %s [%ld/%ld]: " fmt " (in %s() at " __FILE__ ":" XSTR(__LINE__) ")\n", \
-                       log_add_time(), (long) getpid(), (long) lttng_gettid(), ## args, __func__)
+#define _ERRMSG(msg, type, fmt, args...)                                \
+       do {                                                            \
+               if (caa_unlikely(__lttng_print_check_opt(type))) {      \
+                       char generic_name[MAX_INT_DEC_LEN(long) +       \
+                                         MAX_INT_DEC_LEN(long)];       \
+                                                                        \
+                       snprintf(generic_name, sizeof(generic_name),    \
+                                       "%ld/%ld", (long) getpid(),     \
+                                       (long) lttng_gettid());         \
+                                                                        \
+                       __lttng_print(type,                             \
+                                       msg " - %s [%s]: " fmt          \
+                                           " (in %s() at " __FILE__    \
+                                           ":" XSTR(__LINE__) ")\n",   \
+                                       log_add_time(),                 \
+                                       URCU_TLS(logger_thread_name) ?: \
+                                                       generic_name,   \
+                                       ##args, __func__);              \
+               }                                                       \
+       } while (0)
 
-#define _ERRMSG_NO_LOC(msg, type, fmt, args...) __lttng_print(type, msg        \
-               " - %s [%ld/%ld]: " fmt "\n", \
-                       log_add_time(), (long) getpid(), (long) lttng_gettid(), ## args)
+#define _ERRMSG_NO_LOC(msg, type, fmt, args...)                          \
+       do {                                                             \
+               if (caa_unlikely(__lttng_print_check_opt(type))) {       \
+                       char generic_name[MAX_INT_DEC_LEN(long) +        \
+                                         MAX_INT_DEC_LEN(long)];        \
+                                                                         \
+                       snprintf(generic_name, sizeof(generic_name),     \
+                                       "%ld/%ld", (long) getpid(),      \
+                                       (long) lttng_gettid());          \
+                                                                         \
+                       __lttng_print(type, msg " - %s [%s]: " fmt "\n", \
+                                       log_add_time(),                  \
+                                       URCU_TLS(logger_thread_name) ?:  \
+                                                       generic_name,    \
+                                       ##args);                         \
+               }                                                        \
+       } while (0)
 
 #define MSG(fmt, args...) \
        __lttng_print(PRINT_MSG, fmt "\n", ## args)
@@ -158,10 +192,10 @@ static inline void __lttng_print_check_abort(enum lttng_error_level type)
 
 #define BUG(fmt, args...) _ERRMSG("BUG", PRINT_BUG, fmt, ## args)
 
-#define DBG(fmt, args...) _ERRMSG("DEBUG1", PRINT_DBG, fmt, ## args)
-#define DBG_NO_LOC(fmt, args...) _ERRMSG_NO_LOC("DEBUG1", PRINT_DBG, fmt, ## args)
-#define DBG2(fmt, args...) _ERRMSG("DEBUG2", PRINT_DBG2, fmt, ## args)
-#define DBG3(fmt, args...) _ERRMSG("DEBUG3", PRINT_DBG3, fmt, ## args)
+#define DBG(fmt, args...) _ERRMSG("DBG1", PRINT_DBG, fmt, ## args)
+#define DBG_NO_LOC(fmt, args...) _ERRMSG_NO_LOC("DBG1", PRINT_DBG, fmt, ## args)
+#define DBG2(fmt, args...) _ERRMSG("DBG2", PRINT_DBG2, fmt, ## args)
+#define DBG3(fmt, args...) _ERRMSG("DBG3", PRINT_DBG3, fmt, ## args)
 #define LOG(type, fmt, args...)                        \
        do {                                    \
                switch (type) {                 \
@@ -226,4 +260,8 @@ const char *error_get_str(int32_t code);
  */
 const char *log_add_time(void);
 
+/* Name must be a statically-allocated string. */
+LTTNG_HIDDEN
+void logger_set_thread_name(const char *name, bool set_pthread_name);
+
 #endif /* _ERROR_H */
index 8e6563d0672098e7c6d2f467bd26e4460a40b636..d65662e4265a8fc05c429549d8097c231bd39ba3 100644 (file)
@@ -401,7 +401,7 @@ function start_lttng_relayd_opt()
 
        DIR=$(readlink -f "$TESTDIR")
 
-       if [ -z $(pgrep $RELAYD_MATCH) ]; then
+       if [ -z $(pgrep -f $RELAYD_MATCH) ]; then
                # shellcheck disable=SC2086
                $DIR/../src/bin/lttng-relayd/$RELAYD_BIN $process_mode $opt 1> $OUTPUT_DEST 2> $ERROR_OUTPUT_DEST
                #$DIR/../src/bin/lttng-relayd/$RELAYD_BIN $opt -vvv >>/tmp/relayd.log 2>&1 &
@@ -450,7 +450,7 @@ function stop_lttng_relayd_opt()
        local retval=0
        local pids=
 
-       pids=$(pgrep "$RELAYD_MATCH")
+       pids=$(pgrep -f "$RELAYD_MATCH")
        if [ -z "$pids" ]; then
                if [ "$withtap" -eq "1" ]; then
                        pass "No relay daemon to kill"
@@ -469,7 +469,7 @@ function stop_lttng_relayd_opt()
        else
                out=1
                while [ -n "$out" ]; do
-                       out=$(pgrep "$RELAYD_MATCH")
+                       out=$(pgrep -f "$RELAYD_MATCH")
                        if [ -n "$dtimeleft_s" ]; then
                                if [ $dtimeleft_s -lt 0 ]; then
                                        out=
@@ -547,7 +547,7 @@ function start_lttng_sessiond_opt()
        : "${LTTNG_SESSION_CONFIG_XSD_PATH="${DIR}/../src/common/config/"}"
        export LTTNG_SESSION_CONFIG_XSD_PATH
 
-       if [ -z "$(pgrep "${SESSIOND_MATCH}")" ]; then
+       if [ -z "$(pgrep -f "${SESSIOND_MATCH}")" ]; then
                # Have a load path ?
                if [ -n "$load_path" ]; then
                        # shellcheck disable=SC2086
@@ -599,10 +599,10 @@ function stop_lttng_sessiond_opt()
        local retval=0
 
        local runas_pids=
-       runas_pids=$(pgrep "$RUNAS_MATCH")
+       runas_pids=$(pgrep -f "$RUNAS_MATCH")
 
        local pids=
-       pids=$(pgrep "$SESSIOND_MATCH")
+       pids=$(pgrep -f "$SESSIOND_MATCH")
 
        if [ -n "$runas_pids" ]; then
                pids="$pids $runas_pids"
@@ -626,7 +626,7 @@ function stop_lttng_sessiond_opt()
        else
                out=1
                while [ -n "$out" ]; do
-                       out=$(pgrep "${SESSIOND_MATCH}")
+                       out=$(pgrep -f "${SESSIOND_MATCH}")
                        if [ -n "$dtimeleft_s" ]; then
                                if [ $dtimeleft_s -lt 0 ]; then
                                        out=
@@ -638,7 +638,7 @@ function stop_lttng_sessiond_opt()
                done
                out=1
                while [ -n "$out" ]; do
-                       out=$(pgrep "$CONSUMERD_MATCH")
+                       out=$(pgrep -f "$CONSUMERD_MATCH")
                        if [ -n "$dtimeleft_s" ]; then
                                if [ $dtimeleft_s -lt 0 ]; then
                                        out=
@@ -692,7 +692,7 @@ function sigstop_lttng_sessiond_opt()
                return
        fi
 
-       PID_SESSIOND="$(pgrep "${SESSIOND_MATCH}") $(pgrep "$RUNAS_MATCH")"
+       PID_SESSIOND="$(pgrep -f "${SESSIOND_MATCH}") $(pgrep -f "$RUNAS_MATCH")"
 
        if [ "$withtap" -eq "1" ]; then
                diag "Sending SIGSTOP to lt-$SESSIOND_BIN and $SESSIOND_BIN pids: $(echo "$PID_SESSIOND" | tr '\n' ' ')"
@@ -706,7 +706,7 @@ function sigstop_lttng_sessiond_opt()
        else
                out=1
                while [ $out -ne 0 ]; do
-                       pid="$(pgrep "$SESSIOND_MATCH")"
+                       pid="$(pgrep -f "$SESSIOND_MATCH")"
 
                        # Wait until state becomes stopped for session
                        # daemon(s).
@@ -754,7 +754,7 @@ function stop_lttng_consumerd_opt()
 
        local retval=0
 
-       PID_CONSUMERD="$(pgrep "$CONSUMERD_MATCH")"
+       PID_CONSUMERD="$(pgrep -f "$CONSUMERD_MATCH")"
 
        if [ -z "$PID_CONSUMERD" ]; then
                if [ "$withtap" -eq "1" ]; then
@@ -774,7 +774,7 @@ function stop_lttng_consumerd_opt()
        else
                out=1
                while [ $out -ne 0 ]; do
-                       pid="$(pgrep "$CONSUMERD_MATCH")"
+                       pid="$(pgrep -f "$CONSUMERD_MATCH")"
 
                        # If consumerds are still present check their status.
                        # A zombie status qualifies the consumerd as *killed*
@@ -821,7 +821,7 @@ function sigstop_lttng_consumerd_opt()
        local withtap=$1
        local signal=SIGSTOP
 
-       PID_CONSUMERD="$(pgrep "$CONSUMERD_MATCH")"
+       PID_CONSUMERD="$(pgrep -f "$CONSUMERD_MATCH")"
 
        diag "Sending SIGSTOP to $CONSUMERD_BIN pids: $(echo "$PID_CONSUMERD" | tr '\n' ' ')"
 
@@ -837,7 +837,7 @@ function sigstop_lttng_consumerd_opt()
        else
                out=1
                while [ $out -ne 0 ]; do
-                       pid="$(pgrep "$CONSUMERD_MATCH")"
+                       pid="$(pgrep -f "$CONSUMERD_MATCH")"
 
                        # Wait until state becomes stopped for all
                        # consumers.
This page took 0.033464 seconds and 4 git commands to generate.