1 /* SPDX-License-Identifier: GPL-2.0-only */
3 #define TRACE_SYSTEM sched
5 #if !defined(LTTNG_TRACE_SCHED_H) || defined(TRACE_HEADER_MULTI_READ)
6 #define LTTNG_TRACE_SCHED_H
8 #include <lttng/tracepoint-event.h>
9 #include <linux/sched.h>
10 #include <linux/pid_namespace.h>
11 #include <linux/binfmts.h>
12 #include <lttng/kernel-version.h>
13 #include <linux/sched/rt.h>
15 #define LTTNG_MAX_PID_NS_LEVEL 32
17 #ifndef _TRACE_SCHED_DEF_
18 #define _TRACE_SCHED_DEF_
20 #if (LTTNG_LINUX_VERSION_CODE >= LTTNG_KERNEL_VERSION(5,18,0) \
21 || LTTNG_RHEL_KERNEL_RANGE(5,14,0,162,0,0, 5,15,0,0,0,0))
23 static inline long __trace_sched_switch_state(bool preempt
,
24 unsigned int prev_state
,
25 struct task_struct
*p
)
29 #ifdef CONFIG_SCHED_DEBUG
31 #endif /* CONFIG_SCHED_DEBUG */
34 * Preemption ignores task state, therefore preempted tasks are always
35 * RUNNING (we will not have dequeued if state != RUNNING).
38 return TASK_REPORT_MAX
;
41 * task_state_index() uses fls() and returns a value from 0-8 range.
42 * Decrement it by 1 (except TASK_RUNNING state i.e 0) before using
43 * it for left shift operation to get the correct task->state
46 state
= __task_state_index(prev_state
, p
->exit_state
);
48 return state
? (1 << (state
- 1)) : state
;
51 #elif (LTTNG_LINUX_VERSION_CODE >= LTTNG_KERNEL_VERSION(4,15,0))
53 static inline long __trace_sched_switch_state(bool preempt
, struct task_struct
*p
)
57 #ifdef CONFIG_SCHED_DEBUG
59 #endif /* CONFIG_SCHED_DEBUG */
62 * Preemption ignores task state, therefore preempted tasks are always
63 * RUNNING (we will not have dequeued if state != RUNNING).
66 return TASK_REPORT_MAX
;
69 * task_state_index() uses fls() and returns a value from 0-8 range.
70 * Decrement it by 1 (except TASK_RUNNING state i.e 0) before using
71 * it for left shift operation to get the correct task->state
74 state
= task_state_index(p
);
76 return state
? (1 << (state
- 1)) : state
;
79 #elif (LTTNG_LINUX_VERSION_CODE >= LTTNG_KERNEL_VERSION(4,14,0))
81 static inline long __trace_sched_switch_state(bool preempt
, struct task_struct
*p
)
85 #ifdef CONFIG_SCHED_DEBUG
87 #endif /* CONFIG_SCHED_DEBUG */
90 * Preemption ignores task state, therefore preempted tasks are always
91 * RUNNING (we will not have dequeued if state != RUNNING).
94 return TASK_REPORT_MAX
;
97 * __get_task_state() uses fls() and returns a value from 0-8 range.
98 * Decrement it by 1 (except TASK_RUNNING state i.e 0) before using
99 * it for left shift operation to get the correct task->state
102 state
= __get_task_state(p
);
104 return state
? (1 << (state
- 1)) : state
;
109 static inline long __trace_sched_switch_state(bool preempt
, struct task_struct
*p
)
111 #ifdef CONFIG_SCHED_DEBUG
112 BUG_ON(p
!= current
);
113 #endif /* CONFIG_SCHED_DEBUG */
115 * Preemption ignores task state, therefore preempted tasks are always RUNNING
116 * (we will not have dequeued if state != RUNNING).
118 return preempt
? TASK_RUNNING
| TASK_STATE_MAX
: p
->state
;
122 #endif /* _TRACE_SCHED_DEF_ */
124 #ifdef CONFIG_LTTNG_EXPERIMENTAL_BITWISE_ENUM
126 * Enumeration of the task state bitmask.
127 * Only bit flags are enumerated here, not composition of states.
129 LTTNG_TRACEPOINT_ENUM(task_state
,
131 ctf_enum_value("TASK_RUNNING", TASK_RUNNING
)
132 ctf_enum_value("TASK_INTERRUPTIBLE", TASK_INTERRUPTIBLE
)
133 ctf_enum_value("TASK_UNINTERRUPTIBLE", TASK_UNINTERRUPTIBLE
)
134 ctf_enum_value("TASK_STOPPED", __TASK_STOPPED
)
135 ctf_enum_value("TASK_TRACED", __TASK_TRACED
)
136 ctf_enum_value("EXIT_DEAD", EXIT_DEAD
)
137 ctf_enum_value("EXIT_ZOMBIE", EXIT_ZOMBIE
)
138 ctf_enum_value("TASK_PARKED", TASK_PARKED
)
139 ctf_enum_value("TASK_DEAD", TASK_DEAD
)
140 ctf_enum_value("TASK_WAKEKILL", TASK_WAKEKILL
)
141 ctf_enum_value("TASK_WAKING", TASK_WAKING
)
142 ctf_enum_value("TASK_NOLOAD", TASK_NOLOAD
)
144 #if (LTTNG_LINUX_VERSION_CODE >= LTTNG_KERNEL_VERSION(4,8,0))
145 ctf_enum_value("TASK_NEW", TASK_NEW
)
146 #endif /* #if (LTTNG_LINUX_VERSION_CODE >= LTTNG_KERNEL_VERSION(4,8,0)) */
148 ctf_enum_value("TASK_STATE_MAX", TASK_STATE_MAX
)
151 #endif /* CONFIG_LTTNG_EXPERIMENTAL_BITWISE_ENUM */
154 * Tracepoint for calling kthread_stop, performed to end a kthread:
156 LTTNG_TRACEPOINT_EVENT(sched_kthread_stop
,
158 TP_PROTO(struct task_struct
*t
),
163 ctf_array_text(char, comm
, t
->comm
, TASK_COMM_LEN
)
164 ctf_integer(pid_t
, tid
, t
->pid
)
169 * Tracepoint for the return value of the kthread stopping:
171 LTTNG_TRACEPOINT_EVENT(sched_kthread_stop_ret
,
178 ctf_integer(int, ret
, ret
)
183 * Tracepoint for waking up a task:
185 LTTNG_TRACEPOINT_EVENT_CLASS(sched_wakeup_template
,
187 TP_PROTO(struct task_struct
*p
),
192 ctf_array_text(char, comm
, p
->comm
, TASK_COMM_LEN
)
193 ctf_integer(pid_t
, tid
, p
->pid
)
194 ctf_integer(int, prio
, p
->prio
- MAX_RT_PRIO
)
195 ctf_integer(int, target_cpu
, task_cpu(p
))
200 * Tracepoint called when waking a task; this tracepoint is guaranteed to be
201 * called from the waking context.
203 LTTNG_TRACEPOINT_EVENT_INSTANCE(sched_wakeup_template
, sched_waking
,
204 TP_PROTO(struct task_struct
*p
),
208 * Tracepoint called when the task is actually woken; p->state == TASK_RUNNNG.
209 * It it not always called from the waking context.
211 LTTNG_TRACEPOINT_EVENT_INSTANCE(sched_wakeup_template
, sched_wakeup
,
212 TP_PROTO(struct task_struct
*p
),
216 * Tracepoint for waking up a new task:
218 LTTNG_TRACEPOINT_EVENT_INSTANCE(sched_wakeup_template
, sched_wakeup_new
,
219 TP_PROTO(struct task_struct
*p
),
223 * Tracepoint for task switches, performed by the scheduler:
226 #if (LTTNG_LINUX_VERSION_CODE >= LTTNG_KERNEL_VERSION(5,18,0) \
227 || LTTNG_RHEL_KERNEL_RANGE(5,14,0,162,0,0, 5,15,0,0,0,0))
228 LTTNG_TRACEPOINT_EVENT(sched_switch
,
230 TP_PROTO(bool preempt
,
231 struct task_struct
*prev
,
232 struct task_struct
*next
,
233 unsigned int prev_state
),
235 TP_ARGS(preempt
, prev
, next
, prev_state
),
238 ctf_array_text(char, prev_comm
, prev
->comm
, TASK_COMM_LEN
)
239 ctf_integer(pid_t
, prev_tid
, prev
->pid
)
240 ctf_integer(int, prev_prio
, prev
->prio
- MAX_RT_PRIO
)
241 #ifdef CONFIG_LTTNG_EXPERIMENTAL_BITWISE_ENUM
242 ctf_enum(task_state
, long, prev_state
, __trace_sched_switch_state(preempt
, prev_state
, prev
))
244 ctf_integer(long, prev_state
, __trace_sched_switch_state(preempt
, prev_state
, prev
))
246 ctf_array_text(char, next_comm
, next
->comm
, TASK_COMM_LEN
)
247 ctf_integer(pid_t
, next_tid
, next
->pid
)
248 ctf_integer(int, next_prio
, next
->prio
- MAX_RT_PRIO
)
254 LTTNG_TRACEPOINT_EVENT(sched_switch
,
256 TP_PROTO(bool preempt
,
257 struct task_struct
*prev
,
258 struct task_struct
*next
),
260 TP_ARGS(preempt
, prev
, next
),
263 ctf_array_text(char, prev_comm
, prev
->comm
, TASK_COMM_LEN
)
264 ctf_integer(pid_t
, prev_tid
, prev
->pid
)
265 ctf_integer(int, prev_prio
, prev
->prio
- MAX_RT_PRIO
)
266 #ifdef CONFIG_LTTNG_EXPERIMENTAL_BITWISE_ENUM
267 ctf_enum(task_state
, long, prev_state
, __trace_sched_switch_state(preempt
, prev
))
269 ctf_integer(long, prev_state
, __trace_sched_switch_state(preempt
, prev
))
271 ctf_array_text(char, next_comm
, next
->comm
, TASK_COMM_LEN
)
272 ctf_integer(pid_t
, next_tid
, next
->pid
)
273 ctf_integer(int, next_prio
, next
->prio
- MAX_RT_PRIO
)
279 * Tracepoint for a task being migrated:
281 LTTNG_TRACEPOINT_EVENT(sched_migrate_task
,
283 TP_PROTO(struct task_struct
*p
, int dest_cpu
),
285 TP_ARGS(p
, dest_cpu
),
288 ctf_array_text(char, comm
, p
->comm
, TASK_COMM_LEN
)
289 ctf_integer(pid_t
, tid
, p
->pid
)
290 ctf_integer(int, prio
, p
->prio
- MAX_RT_PRIO
)
291 ctf_integer(int, orig_cpu
, task_cpu(p
))
292 ctf_integer(int, dest_cpu
, dest_cpu
)
296 LTTNG_TRACEPOINT_EVENT_CLASS(sched_process_template
,
298 TP_PROTO(struct task_struct
*p
),
303 ctf_array_text(char, comm
, p
->comm
, TASK_COMM_LEN
)
304 ctf_integer(pid_t
, tid
, p
->pid
)
305 ctf_integer(int, prio
, p
->prio
- MAX_RT_PRIO
)
310 * Tracepoint for freeing a task:
312 LTTNG_TRACEPOINT_EVENT_INSTANCE(sched_process_template
, sched_process_free
,
313 TP_PROTO(struct task_struct
*p
),
318 * Tracepoint for a task exiting:
320 LTTNG_TRACEPOINT_EVENT_INSTANCE(sched_process_template
, sched_process_exit
,
321 TP_PROTO(struct task_struct
*p
),
325 * Tracepoint for waiting on task to unschedule:
327 LTTNG_TRACEPOINT_EVENT_INSTANCE(sched_process_template
, sched_wait_task
,
328 TP_PROTO(struct task_struct
*p
),
332 * Tracepoint for a waiting task:
334 LTTNG_TRACEPOINT_EVENT(sched_process_wait
,
336 TP_PROTO(struct pid
*pid
),
341 ctf_array_text(char, comm
, current
->comm
, TASK_COMM_LEN
)
342 ctf_integer(pid_t
, tid
, pid_nr(pid
))
343 ctf_integer(int, prio
, current
->prio
- MAX_RT_PRIO
)
348 * Tracepoint for do_fork.
349 * Saving both TID and PID information, especially for the child, allows
350 * trace analyzers to distinguish between creation of a new process and
351 * creation of a new thread. Newly created processes will have child_tid
352 * == child_pid, while creation of a thread yields to child_tid !=
355 LTTNG_TRACEPOINT_EVENT_CODE(sched_process_fork
,
357 TP_PROTO(struct task_struct
*parent
, struct task_struct
*child
),
359 TP_ARGS(parent
, child
),
362 pid_t vtids
[LTTNG_MAX_PID_NS_LEVEL
];
363 unsigned int ns_level
;
368 struct pid
*child_pid
;
371 child_pid
= task_pid(child
);
372 tp_locvar
->ns_level
=
373 min_t(unsigned int, child_pid
->level
+ 1,
374 LTTNG_MAX_PID_NS_LEVEL
);
375 for (i
= 0; i
< tp_locvar
->ns_level
; i
++)
376 tp_locvar
->vtids
[i
] = child_pid
->numbers
[i
].nr
;
381 ctf_array_text(char, parent_comm
, parent
->comm
, TASK_COMM_LEN
)
382 ctf_integer(pid_t
, parent_tid
, parent
->pid
)
383 ctf_integer(pid_t
, parent_pid
, parent
->tgid
)
384 ctf_integer(unsigned int, parent_ns_inum
,
386 unsigned int parent_ns_inum
= 0;
389 struct pid_namespace
*pid_ns
;
391 pid_ns
= task_active_pid_ns(parent
);
398 ctf_array_text(char, child_comm
, child
->comm
, TASK_COMM_LEN
)
399 ctf_integer(pid_t
, child_tid
, child
->pid
)
400 ctf_sequence(pid_t
, vtids
, tp_locvar
->vtids
, u8
, tp_locvar
->ns_level
)
401 ctf_integer(pid_t
, child_pid
, child
->tgid
)
402 ctf_integer(unsigned int, child_ns_inum
,
404 unsigned int child_ns_inum
= 0;
407 struct pid_namespace
*pid_ns
;
409 pid_ns
= task_active_pid_ns(child
);
422 * Tracepoint for exec:
424 LTTNG_TRACEPOINT_EVENT(sched_process_exec
,
426 TP_PROTO(struct task_struct
*p
, pid_t old_pid
,
427 struct linux_binprm
*bprm
),
429 TP_ARGS(p
, old_pid
, bprm
),
432 ctf_string(filename
, bprm
->filename
)
433 ctf_integer(pid_t
, tid
, p
->pid
)
434 ctf_integer(pid_t
, old_tid
, old_pid
)
439 * XXX the below sched_stat tracepoints only apply to SCHED_OTHER/BATCH/IDLE
440 * adding sched_stat support to SCHED_FIFO/RR would be welcome.
442 LTTNG_TRACEPOINT_EVENT_CLASS(sched_stat_template
,
444 TP_PROTO(struct task_struct
*tsk
, u64 delay
),
449 ctf_array_text(char, comm
, tsk
->comm
, TASK_COMM_LEN
)
450 ctf_integer(pid_t
, tid
, tsk
->pid
)
451 ctf_integer(u64
, delay
, delay
)
457 * Tracepoint for accounting wait time (time the task is runnable
458 * but not actually running due to scheduler contention).
460 LTTNG_TRACEPOINT_EVENT_INSTANCE(sched_stat_template
, sched_stat_wait
,
461 TP_PROTO(struct task_struct
*tsk
, u64 delay
),
465 * Tracepoint for accounting sleep time (time the task is not runnable,
466 * including iowait, see below).
468 LTTNG_TRACEPOINT_EVENT_INSTANCE(sched_stat_template
, sched_stat_sleep
,
469 TP_PROTO(struct task_struct
*tsk
, u64 delay
),
473 * Tracepoint for accounting iowait time (time the task is not runnable
474 * due to waiting on IO to complete).
476 LTTNG_TRACEPOINT_EVENT_INSTANCE(sched_stat_template
, sched_stat_iowait
,
477 TP_PROTO(struct task_struct
*tsk
, u64 delay
),
481 * Tracepoint for accounting blocked time (time the task is in uninterruptible).
483 LTTNG_TRACEPOINT_EVENT_INSTANCE(sched_stat_template
, sched_stat_blocked
,
484 TP_PROTO(struct task_struct
*tsk
, u64 delay
),
487 #if (LTTNG_LINUX_VERSION_CODE >= LTTNG_KERNEL_VERSION(6,8,0))
489 * Tracepoint for accounting runtime (time the task is executing
492 LTTNG_TRACEPOINT_EVENT(sched_stat_runtime
,
494 TP_PROTO(struct task_struct
*tsk
, u64 runtime
),
496 TP_ARGS(tsk
, runtime
),
499 ctf_array_text(char, comm
, tsk
->comm
, TASK_COMM_LEN
)
500 ctf_integer(pid_t
, tid
, tsk
->pid
)
501 ctf_integer(u64
, runtime
, runtime
)
506 * Tracepoint for accounting runtime (time the task is executing
509 LTTNG_TRACEPOINT_EVENT(sched_stat_runtime
,
511 TP_PROTO(struct task_struct
*tsk
, u64 runtime
, u64 vruntime
),
513 TP_ARGS(tsk
, runtime
, vruntime
),
516 ctf_array_text(char, comm
, tsk
->comm
, TASK_COMM_LEN
)
517 ctf_integer(pid_t
, tid
, tsk
->pid
)
518 ctf_integer(u64
, runtime
, runtime
)
519 ctf_integer(u64
, vruntime
, vruntime
)
524 #if (LTTNG_LINUX_VERSION_CODE >= LTTNG_KERNEL_VERSION(4,12,0) || \
525 LTTNG_RT_KERNEL_RANGE(4,9,27,18, 4,10,0,0) || \
526 LTTNG_RT_KERNEL_RANGE(4,11,5,1, 4,12,0,0))
528 * Tracepoint for showing priority inheritance modifying a tasks
531 LTTNG_TRACEPOINT_EVENT(sched_pi_setprio
,
533 TP_PROTO(struct task_struct
*tsk
, struct task_struct
*pi_task
),
535 TP_ARGS(tsk
, pi_task
),
538 ctf_array_text(char, comm
, tsk
->comm
, TASK_COMM_LEN
)
539 ctf_integer(pid_t
, tid
, tsk
->pid
)
540 ctf_integer(int, oldprio
, tsk
->prio
- MAX_RT_PRIO
)
541 ctf_integer(int, newprio
, pi_task
? pi_task
->prio
- MAX_RT_PRIO
: tsk
->prio
- MAX_RT_PRIO
)
546 * Tracepoint for showing priority inheritance modifying a tasks
549 LTTNG_TRACEPOINT_EVENT(sched_pi_setprio
,
551 TP_PROTO(struct task_struct
*tsk
, int newprio
),
553 TP_ARGS(tsk
, newprio
),
556 ctf_array_text(char, comm
, tsk
->comm
, TASK_COMM_LEN
)
557 ctf_integer(pid_t
, tid
, tsk
->pid
)
558 ctf_integer(int, oldprio
, tsk
->prio
- MAX_RT_PRIO
)
559 ctf_integer(int, newprio
, newprio
- MAX_RT_PRIO
)
564 #endif /* LTTNG_TRACE_SCHED_H */
566 /* This part must be outside protection */
567 #include <lttng/define_trace.h>
This page took 0.052202 seconds and 4 git commands to generate.