Observed issue
==============
When tracing applications linked against lttng-ust 2.12 and lttng-ust
2.13 in parallel with a lttng-tools 2.13 into the same per-uid buffers,
with the "procname" context enabled, babeltrace fails with "Event id NN
is outside range" when reading the trace:
[14:51:58.
717006865] (+5.
927872956) x lttng_ust_statedump:start: { cpu_id = 1 }, { procname = "sample-2.13-ust" }, { }
[error] Event id 41984 is outside range.
[error] Reading event failed.
Error printing trace.
Cause
=====
Inspection of the trace reveals that the layout of the procname context
field changed from 17 bytes to 16 bytes between 2.12 and 2.13. This is
an issue when applications share a per-uid ring buffer, because context
fields are associated with channels, and need to have the same layout
across all processes tracing into a given channel.
The layout of the procname field described by the trace metadata is that
of the first application which happens to register that channel in the
session lifetime.
Therefore, the procname context field length is part of the LTTng-UST
ABI and cannot be changed without breaking the LTTng-UST ABI (bumping
LTTNG_UST_ABI_MAJOR_VERSION_OLDEST_COMPATIBLE), which is unwanted
between 2.12 and 2.13. Keeping compatibility for combined use by
different applications between lttng-ust 2.12 and 2.13 is a required
feature for this release, because lttng-ust 2.13 introduces a library
ABI break (soname bump).
An example scenario leading to this issue:
1) trace created for per-uid buffers,
2) add procname context
3) start tracing
4) Application [a] linked against lttng-ust 2.13 registers the channel to
lttng-sessiond, sending its context descriptions with a 16-byte
procname context field,
5) Application [b] linked against lttng-ust 2.12 registers the same channel
to lttng-sessiond,
6) Application [b] traces an event with the procname context, followed
by an event payload with a single "string" field.
7) A trace viewer will observe the procname context, followed by an
extra null character, and thus mistakenly consider the event payload
to be an empty string. Reading the next event header will fail
because the string payload will be expected to contain an event ID.
Solution
========
Revert the procname context field size to 17 bytes to stay compatible
with lttng-ust 2.12.
In an abundance of caution, also revert the size of the
lttng_ust_statedump:procname "procname" field to 17, so there won't be
duplicated event IDs for this event when applications linked against
lttng-ust 2.12 and 2.13 are traced concurrently for the same user ID
in per-uid tracing.
History
=======
This issue was introduced by commit
0db3d6ee9be ("port: fix
pthread_setname_np integration") within the 2.13 development cycle.
Known drawbacks
===============
Applications currently running which are linked against a liblttng-ust
2.13 without this fix should be restarted after upgrading the library to
liblttng-ust 2.13 with this fix.
Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Change-Id: I206086df8b71463c248ca186343baaff5452762b
#include "common/macros.h"
#include "common/ust-context-provider.h"
+/*
+ * The context procname length is part of the LTTng-UST ABI.
+ * TODO: At the next breaking protocol bump, all users of this macro
+ * should instead use LTTNG_UST_ABI_PROCNAME_LEN.
+ */
+#define LTTNG_UST_CONTEXT_PROCNAME_LEN 17
+
struct lttng_ust_abi_obj;
struct lttng_event_notifier_group;
* be set for a thread before the first event is logged within this
* thread.
*/
-typedef char procname_array[PROCNAME_NESTING_MAX][17];
+typedef char procname_array[PROCNAME_NESTING_MAX][LTTNG_UST_CONTEXT_PROCNAME_LEN];
static DEFINE_URCU_TLS(procname_array, cached_procname);
CMM_STORE_SHARED(URCU_TLS(procname_nesting), nesting + 1);
/* Increment nesting before updating cache. */
cmm_barrier();
- lttng_pthread_getname_np(URCU_TLS(cached_procname)[nesting], LTTNG_UST_ABI_PROCNAME_LEN);
- URCU_TLS(cached_procname)[nesting][LTTNG_UST_ABI_PROCNAME_LEN - 1] = '\0';
+ lttng_pthread_getname_np(URCU_TLS(cached_procname)[nesting], LTTNG_UST_CONTEXT_PROCNAME_LEN);
+ URCU_TLS(cached_procname)[nesting][LTTNG_UST_CONTEXT_PROCNAME_LEN - 1] = '\0';
/* Decrement nesting after updating cache. */
cmm_barrier();
CMM_STORE_SHARED(URCU_TLS(procname_nesting), nesting);
struct lttng_ust_probe_ctx *probe_ctx __attribute__((unused)),
size_t offset __attribute__((unused)))
{
- return LTTNG_UST_ABI_PROCNAME_LEN;
+ return LTTNG_UST_CONTEXT_PROCNAME_LEN;
}
static
const char *procname;
procname = wrapper_getprocname();
- chan->ops->event_write(ctx, procname, LTTNG_UST_ABI_PROCNAME_LEN, 1);
+ chan->ops->event_write(ctx, procname, LTTNG_UST_CONTEXT_PROCNAME_LEN, 1);
}
static
static const struct lttng_ust_ctx_field *ctx_field = lttng_ust_static_ctx_field(
lttng_ust_static_event_field("procname",
- lttng_ust_static_type_array_text(LTTNG_UST_ABI_PROCNAME_LEN),
+ lttng_ust_static_type_array_text(LTTNG_UST_CONTEXT_PROCNAME_LEN),
false, false),
procname_get_size,
procname_record,
int statedump_pending;
int initial_statedump_done;
/* Keep procname for statedump */
- char procname[LTTNG_UST_ABI_PROCNAME_LEN];
+ char procname[LTTNG_UST_CONTEXT_PROCNAME_LEN];
};
/* Socket from app (connect) to session daemon (listen) for communication */
}
global_apps.allowed = 1;
- lttng_pthread_getname_np(global_apps.procname, LTTNG_UST_ABI_PROCNAME_LEN);
+ lttng_pthread_getname_np(global_apps.procname, LTTNG_UST_CONTEXT_PROCNAME_LEN);
error:
return ret;
}
goto end;
}
- lttng_pthread_getname_np(local_apps.procname, LTTNG_UST_ABI_PROCNAME_LEN);
+ lttng_pthread_getname_np(local_apps.procname, LTTNG_UST_CONTEXT_PROCNAME_LEN);
end:
return ret;
}
),
LTTNG_UST_TP_FIELDS(
lttng_ust_field_unused(session)
- lttng_ust_field_array_text(char, procname, name, LTTNG_UST_ABI_PROCNAME_LEN)
+ lttng_ust_field_array_text(char, procname, name, LTTNG_UST_CONTEXT_PROCNAME_LEN)
)
)