Fix: consumerd: consumed size miscomputed during statistics sampling
[lttng-tools.git] / src / bin / lttng-sessiond / notification-thread.hpp
1 /*
2 * Copyright (C) 2017 Jérémie Galarneau <jeremie.galarneau@efficios.com>
3 *
4 * SPDX-License-Identifier: GPL-2.0-only
5 *
6 */
7
8 #ifndef NOTIFICATION_THREAD_H
9 #define NOTIFICATION_THREAD_H
10
11 #include "action-executor.hpp"
12 #include "thread.hpp"
13
14 #include <common/compat/poll.hpp>
15 #include <common/hashtable/hashtable.hpp>
16 #include <common/pipe.hpp>
17
18 #include <lttng/domain.h>
19 #include <lttng/trigger/trigger.h>
20
21 #include <pthread.h>
22 #include <semaphore.h>
23 #include <urcu.h>
24 #include <urcu/list.h>
25 #include <urcu/rculfhash.h>
26
27 using notification_client_id = uint64_t;
28
29 /*
30 * The notification thread holds no ownership of the tracer event source pipe
31 * file descriptor. The tracer management logic must remove the event source
32 * from the notification thread (see external commands) before releasing
33 * this file descriptor.
34 */
35 struct notification_event_tracer_event_source_element {
36 int fd;
37 /*
38 * A tracer event source can be removed from the notification thread's
39 * poll set before the end of its lifetime (for instance, when an error
40 * or hang-up is detected on its file descriptor). This is done to
41 * allow the notification thread to ignore follow-up events on this
42 * file descriptors.
43 *
44 * Under such circumstances, the notification thread still expects
45 * the normal clean-up to occur through the 'REMOVE_TRACER_EVENT_SOURCE'
46 * command.
47 */
48 bool is_fd_in_poll_set;
49 enum lttng_domain_type domain;
50 struct cds_list_head node;
51 };
52
53 struct notification_trigger_tokens_ht_element {
54 uint64_t token;
55 /* Weak reference to the trigger. */
56 struct lttng_trigger *trigger;
57 struct cds_lfht_node node;
58 /* call_rcu delayed reclaim. */
59 struct rcu_head rcu_node;
60 };
61
62 struct notification_thread_handle {
63 /*
64 * Queue of struct notification command.
65 * event_pipe must be WRITE(2) to signal that a new command
66 * has been enqueued.
67 */
68 struct {
69 int event_fd;
70 struct cds_list_head list;
71 pthread_mutex_t lock;
72 } cmd_queue;
73 /*
74 * Read side of pipes used to receive channel status info collected
75 * by the various consumer daemons.
76 */
77 struct {
78 int ust32_consumer;
79 int ust64_consumer;
80 int kernel_consumer;
81 } channel_monitoring_pipes;
82 /* Used to wait for the launch of the notification thread. */
83 sem_t ready;
84 };
85
86 /**
87 * This thread maintains an internal state associating clients and triggers.
88 *
89 * In order to speed-up and simplify queries, hash tables providing the
90 * following associations are maintained:
91 *
92 * - client_socket_ht: associate a client's socket (fd) to its
93 * "struct notification_client".
94 * This hash table owns the "struct notification_client" which must
95 * thus be disposed-of on removal from the hash table.
96 *
97 * - client_id_ht: associate a client's id to its "struct notification_client"
98 * This hash table holds a _weak_ reference to the
99 * "struct notification_client".
100 *
101 * - channel_triggers_ht:
102 * associates a channel key to a list of
103 * struct lttng_trigger_list_nodes. The triggers in this list are
104 * those that have conditions that apply to a particular channel.
105 * A channel entry is only created when a channel is added; the
106 * list of triggers applying to such a channel is built at that
107 * moment.
108 * This hash table owns the list, but not the triggers themselves.
109 *
110 * - session_triggers_ht:
111 * associates a session name to a list of
112 * struct lttng_trigger_list_nodes. The triggers in this list are
113 * those that have conditions that apply to a particular session.
114 * A session entry is only created when a session is created; the
115 * list of triggers applying to this new session is built at that
116 * moment. This happens at the time of creation of a session_info.
117 * Likewise, the list is destroyed at the time of the session_info's
118 * destruction.
119 *
120 * - channel_state_ht:
121 * associates a pair (channel key, channel domain) to its last
122 * sampled state received from the consumer daemon
123 * (struct channel_state).
124 * This previous sample is kept to implement edge-triggered
125 * conditions as we need to detect the state transitions.
126 * This hash table owns the channel state.
127 *
128 * - notification_trigger_clients_ht:
129 * associates notification-emitting triggers to clients
130 * (struct notification_client_list) subscribed to those
131 * conditions.
132 * The condition's hash and match functions are used directly since
133 * all triggers in this hash table have the "notify" action.
134 * This hash table holds no ownership.
135 *
136 * - channels_ht:
137 * associates a channel_key to a struct channel_info. The hash table
138 * holds the ownership of the struct channel_info.
139 *
140 * - sessions_ht:
141 * associates a session_name (hash) to a struct session_info. The
142 * hash table holds no ownership of the struct session_info;
143 * the session_info structure is owned by the session's various
144 * channels through their struct channel_info (ref-counting is used).
145 *
146 * - triggers_ht:
147 * associates a trigger to a struct lttng_trigger_ht_element.
148 * The hash table holds the ownership of the
149 * lttng_trigger_ht_elements along with the triggers themselves.
150 * - triggers_by_name_uid_ht:
151 * associates a trigger (name, uid) tuple to
152 * a struct lttng_trigger_ht_element.
153 * The hash table does not hold any ownership and is used strictly
154 * for lookup on registration.
155 * - tracer_event_sources_list:
156 * A list of tracer event source (read side fd) of type
157 * struct notification_event_tracer_event_source_element.
158 *
159 *
160 * The thread reacts to the following internal events:
161 * 1) creation of a tracing channel,
162 * 2) destruction of a tracing channel,
163 * 3) creation of a tracing session,
164 * 4) destruction of a tracing session,
165 * 5) registration of a trigger,
166 * 6) unregistration of a trigger,
167 * 7) reception of a channel monitor sample from the consumer daemon,
168 * 8) Session rotation ongoing,
169 * 9) Session rotation completed,
170 * 10) registration of a tracer event source,
171 * 11) unregistration of a tracer event source,
172 *
173 * Events specific to notification-emitting triggers:
174 * 9) connection of a notification client,
175 * 10) disconnection of a notification client,
176 * 11) subscription of a client to a conditions' notifications,
177 * 12) unsubscription of a client from a conditions' notifications,
178 *
179 *
180 * 1) Creation of a tracing channel
181 * - notification_trigger_clients_ht is traversed to identify
182 * triggers which apply to this new channel,
183 * - triggers identified are added to the channel_triggers_ht.
184 * - add channel to channels_ht
185 * - if it is the first channel of a session, a session_info is created and
186 * added to the sessions_ht. A list of the triggers associated with that
187 * session is built, and it is added to session_triggers_ht.
188 *
189 * 2) Destruction of a tracing channel
190 * - remove entry from channel_triggers_ht, releasing the list wrapper and
191 * elements,
192 * - remove entry from the channel_state_ht.
193 * - remove channel from channels_ht
194 * - if it was the last known channel of a session, the session_info
195 * structure is torndown, which in return destroys the list of triggers
196 * applying to that session.
197 *
198 * 3) Registration of a trigger
199 * - if the trigger's action is of type "notify",
200 * - traverse the list of conditions of every client to build a list of
201 * clients which have to be notified when this trigger's condition is met,
202 * - add list of clients (even if it is empty) to the
203 * notification_trigger_clients_ht,
204 * - add trigger to channel_triggers_ht (if applicable),
205 * - add trigger to session_triggers_ht (if applicable),
206 * - add trigger to triggers_by_name_uid_ht
207 * - add trigger to triggers_ht
208 * - evaluate the trigger's condition right away to react if that condition
209 * is true from the beginning.
210 *
211 * 4) Unregistration of a trigger
212 * - if the trigger's action is of type "notify",
213 * - remove the trigger from the notification_trigger_clients_ht,
214 * - remove trigger from channel_triggers_ht (if applicable),
215 * - remove trigger from session_triggers_ht (if applicable),
216 * - remove trigger from triggers_by_name_uid_ht
217 * - remove trigger from triggers_ht
218 *
219 * 5) Reception of a channel monitor sample from the consumer daemon
220 * - evaluate the conditions associated with the triggers found in
221 * the channel_triggers_ht,
222 * - if a condition evaluates to "true" and the condition is of type
223 * "notify", query the notification_trigger_clients_ht and send
224 * a notification to the clients.
225 *
226 * 6) Session rotation ongoing
227 *
228 * 7) Session rotation completed
229 *
230 * 8) Registration of a tracer event source
231 * - Add the tracer event source of the application to
232 * tracer_event_sources_list,
233 * - Add the trace event source to the pollset.
234 *
235 * 8) Unregistration of a tracer event source
236 * - Remove the tracer event source of the application from
237 * tracer_event_sources_list,
238 * - Remove the trace event source from the pollset.
239 *
240 * 10) Connection of a client
241 * - add client socket to the client_socket_ht,
242 * - add client socket to the client_id_ht.
243 *
244 * 11) Disconnection of a client
245 * - remove client socket from the client_id_ht,
246 * - remove client socket from the client_socket_ht,
247 * - traverse all conditions to which the client is subscribed and remove
248 * the client from the notification_trigger_clients_ht.
249 *
250 * 12) Subscription of a client to a condition's notifications
251 * - Add the condition to the client's list of subscribed conditions,
252 * - Look-up notification_trigger_clients_ht and add the client to
253 * list of clients.
254 * - Evaluate the condition for the client that subscribed if the trigger
255 * was already registered.
256 *
257 * 13) Unsubscription of a client to a condition's notifications
258 * - Remove the condition from the client's list of subscribed conditions,
259 * - Look-up notification_trigger_clients_ht and remove the client
260 * from the list of clients.
261 */
262 struct notification_thread_state {
263 int notification_channel_socket;
264 struct lttng_poll_event events;
265 struct cds_lfht *client_socket_ht;
266 struct cds_lfht *client_id_ht;
267 struct cds_lfht *channel_triggers_ht;
268 struct cds_lfht *session_triggers_ht;
269 struct cds_lfht *channel_state_ht;
270 struct cds_lfht *notification_trigger_clients_ht;
271 struct cds_lfht *channels_ht;
272 struct cds_lfht *sessions_ht;
273 struct cds_lfht *triggers_ht;
274 struct cds_lfht *triggers_by_name_uid_ht;
275 struct cds_lfht *trigger_tokens_ht;
276 struct {
277 uint64_t next_tracer_token;
278 uint64_t name_offset;
279 } trigger_id;
280 /*
281 * Read side of the pipes used to receive tracer events. As their name
282 * implies, tracer event source activity originate from either
283 * registered applications (user space tracer) or from the kernel
284 * tracer.
285 *
286 * The list is not protected by a lock since add and remove operations
287 * are currently done only by the notification thread through in
288 * response to blocking commands.
289 */
290 struct cds_list_head tracer_event_sources_list;
291 notification_client_id next_notification_client_id;
292 struct action_executor *executor;
293
294 /*
295 * Indicates the thread to break for the poll event processing loop and
296 * call _poll_wait() again.
297 *
298 * This is necessary because some events on one fd might trigger the
299 * consumption of another fd.
300 * For example, a single _poll_wait() call can return notification
301 * thread commands and events from the tracer event source (event
302 * notifier).
303 * Picture a scenario where we receive two events:
304 * the first one is a _REMOVE_TRACER_EVENT_SOURCE command, and
305 * the second is an POLLIN on the tracer event source fd.
306 *
307 * The _REMOVE_TRACER_EVENT_SOURCE will read all the data of the
308 * removed tracer event source.
309 *
310 * The second event is now invalid has we consumed all the data for
311 * which we received the POLLIN.
312 *
313 * For this reason, we need to break for the event processing loop and
314 * call _poll_wait() again to get a clean view of the activity on the
315 * fds.
316 */
317 bool restart_poll;
318 };
319
320 /* notification_thread_data takes ownership of the channel monitor pipes. */
321 struct notification_thread_handle *
322 notification_thread_handle_create(struct lttng_pipe *ust32_channel_monitor_pipe,
323 struct lttng_pipe *ust64_channel_monitor_pipe,
324 struct lttng_pipe *kernel_channel_monitor_pipe);
325 void notification_thread_handle_destroy(struct notification_thread_handle *handle);
326 struct lttng_thread *launch_notification_thread(struct notification_thread_handle *handle);
327
328 #endif /* NOTIFICATION_THREAD_H */
This page took 0.041453 seconds and 4 git commands to generate.