1 /* SPDX-License-Identifier: GPL-2.0 */
2 #ifndef _LINUX_PSI_TYPES_H
3 #define _LINUX_PSI_TYPES_H
5 #include <linux/kthread.h>
6 #include <linux/seqlock.h>
7 #include <linux/types.h>
8 #include <linux/kref.h>
9 #include <linux/wait.h>
13 /* Tracked task states */
19 * For IO and CPU stalls the presence of running/oncpu tasks
20 * in the domain means a partial rather than a full stall.
21 * For memory it's not so simple because of page reclaimers:
22 * they are running/oncpu while representing a stall. To tell
23 * whether a domain has productivity left or not, we need to
24 * distinguish between regular running (i.e. productive)
25 * threads and memstall ones.
28 NR_PSI_TASK_COUNTS = 4,
31 /* Task state bitmasks */
32 #define TSK_IOWAIT (1 << NR_IOWAIT)
33 #define TSK_MEMSTALL (1 << NR_MEMSTALL)
34 #define TSK_RUNNING (1 << NR_RUNNING)
35 #define TSK_MEMSTALL_RUNNING (1 << NR_MEMSTALL_RUNNING)
37 /* Only one task can be scheduled, no corresponding task count */
38 #define TSK_ONCPU (1 << NR_PSI_TASK_COUNTS)
40 /* Resources that workloads could be stalled on */
45 #ifdef CONFIG_IRQ_TIME_ACCOUNTING
52 * Pressure states for each resource:
54 * SOME: Stalled tasks & working tasks
55 * FULL: Stalled tasks & no working tasks
64 #ifdef CONFIG_IRQ_TIME_ACCOUNTING
67 /* Only per-CPU, to weigh the CPU in the global average: */
72 /* Use one bit in the state mask to track TSK_ONCPU */
73 #define PSI_ONCPU (1 << NR_PSI_STATES)
75 enum psi_aggregators {
81 struct psi_group_cpu {
82 /* 1st cacheline updated by the scheduler */
84 /* Aggregator needs to know of concurrent changes */
85 seqcount_t seq ____cacheline_aligned_in_smp;
87 /* States of the tasks belonging to this group */
88 unsigned int tasks[NR_PSI_TASK_COUNTS];
90 /* Aggregate pressure state derived from the tasks */
93 /* Period time sampling buckets for each state of interest (ns) */
94 u32 times[NR_PSI_STATES];
96 /* Time of last task change in this group (rq_clock) */
99 /* 2nd cacheline updated by the aggregator */
101 /* Delta detection against the sampling buckets */
102 u32 times_prev[NR_PSI_AGGREGATORS][NR_PSI_STATES]
103 ____cacheline_aligned_in_smp;
106 /* PSI growth tracking window */
108 /* Window size in ns */
111 /* Start time of the current window in ns */
114 /* Value at the start of the window */
117 /* Value growth in the previous window */
122 /* PSI state being monitored by the trigger */
123 enum psi_states state;
125 /* User-spacified threshold in ns */
128 /* List node inside triggers list */
129 struct list_head node;
131 /* Backpointer needed during trigger destruction */
132 struct psi_group *group;
134 /* Wait queue for polling */
135 wait_queue_head_t event_wait;
137 /* Pending event flag */
140 /* Tracking window */
141 struct psi_window win;
144 * Time last event was generated. Used for rate-limiting
145 * events to one per window
149 /* Deferred event(s) from previous ratelimit window */
154 struct psi_group *parent;
157 /* Protects data used by the aggregator */
158 struct mutex avgs_lock;
160 /* Per-cpu task state & time tracking */
161 struct psi_group_cpu __percpu *pcpu;
163 /* Running pressure averages */
164 u64 avg_total[NR_PSI_STATES - 1];
168 /* Aggregator work control */
169 struct delayed_work avgs_work;
171 /* Total stall times and sampled pressure averages */
172 u64 total[NR_PSI_AGGREGATORS][NR_PSI_STATES - 1];
173 unsigned long avg[NR_PSI_STATES - 1][3];
175 /* Monitor work control */
176 struct task_struct __rcu *poll_task;
177 struct timer_list poll_timer;
178 wait_queue_head_t poll_wait;
179 atomic_t poll_wakeup;
181 /* Protects data used by the monitor */
182 struct mutex trigger_lock;
184 /* Configured polling triggers */
185 struct list_head triggers;
186 u32 nr_triggers[NR_PSI_STATES - 1];
190 /* Total stall times at the start of monitor activation */
191 u64 polling_total[NR_PSI_STATES - 1];
192 u64 polling_next_update;
196 #else /* CONFIG_PSI */
198 #define NR_PSI_RESOURCES 0
200 struct psi_group { };
202 #endif /* CONFIG_PSI */
204 #endif /* _LINUX_PSI_TYPES_H */