8d3cfbb3cc65bdab777235861dbf7321f8f78993
[platform/kernel/linux-starfive.git] / tools / perf / util / bpf_skel / lock_contention.bpf.c
1 // SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
2 // Copyright (c) 2022 Google
3 #include "vmlinux.h"
4 #include <bpf/bpf_helpers.h>
5 #include <bpf/bpf_tracing.h>
6 #include <bpf/bpf_core_read.h>
7 #include <asm-generic/errno-base.h>
8
9 #include "lock_data.h"
10
11 /* for collect_lock_syms().  4096 was rejected by the verifier */
12 #define MAX_CPUS  1024
13
14 /* lock contention flags from include/trace/events/lock.h */
15 #define LCB_F_SPIN      (1U << 0)
16 #define LCB_F_READ      (1U << 1)
17 #define LCB_F_WRITE     (1U << 2)
18 #define LCB_F_RT        (1U << 3)
19 #define LCB_F_PERCPU    (1U << 4)
20 #define LCB_F_MUTEX     (1U << 5)
21
22 struct tstamp_data {
23         __u64 timestamp;
24         __u64 lock;
25         __u32 flags;
26         __s32 stack_id;
27 };
28
29 /* callstack storage  */
30 struct {
31         __uint(type, BPF_MAP_TYPE_STACK_TRACE);
32         __uint(key_size, sizeof(__u32));
33         __uint(value_size, sizeof(__u64));
34         __uint(max_entries, MAX_ENTRIES);
35 } stacks SEC(".maps");
36
37 /* maintain timestamp at the beginning of contention */
38 struct {
39         __uint(type, BPF_MAP_TYPE_HASH);
40         __type(key, int);
41         __type(value, struct tstamp_data);
42         __uint(max_entries, MAX_ENTRIES);
43 } tstamp SEC(".maps");
44
45 /* actual lock contention statistics */
46 struct {
47         __uint(type, BPF_MAP_TYPE_HASH);
48         __uint(key_size, sizeof(struct contention_key));
49         __uint(value_size, sizeof(struct contention_data));
50         __uint(max_entries, MAX_ENTRIES);
51 } lock_stat SEC(".maps");
52
53 struct {
54         __uint(type, BPF_MAP_TYPE_HASH);
55         __uint(key_size, sizeof(__u32));
56         __uint(value_size, sizeof(struct contention_task_data));
57         __uint(max_entries, MAX_ENTRIES);
58 } task_data SEC(".maps");
59
60 struct {
61         __uint(type, BPF_MAP_TYPE_HASH);
62         __uint(key_size, sizeof(__u64));
63         __uint(value_size, sizeof(__u32));
64         __uint(max_entries, MAX_ENTRIES);
65 } lock_syms SEC(".maps");
66
67 struct {
68         __uint(type, BPF_MAP_TYPE_HASH);
69         __uint(key_size, sizeof(__u32));
70         __uint(value_size, sizeof(__u8));
71         __uint(max_entries, 1);
72 } cpu_filter SEC(".maps");
73
74 struct {
75         __uint(type, BPF_MAP_TYPE_HASH);
76         __uint(key_size, sizeof(__u32));
77         __uint(value_size, sizeof(__u8));
78         __uint(max_entries, 1);
79 } task_filter SEC(".maps");
80
81 struct {
82         __uint(type, BPF_MAP_TYPE_HASH);
83         __uint(key_size, sizeof(__u32));
84         __uint(value_size, sizeof(__u8));
85         __uint(max_entries, 1);
86 } type_filter SEC(".maps");
87
88 struct {
89         __uint(type, BPF_MAP_TYPE_HASH);
90         __uint(key_size, sizeof(__u64));
91         __uint(value_size, sizeof(__u8));
92         __uint(max_entries, 1);
93 } addr_filter SEC(".maps");
94
95 struct rw_semaphore___old {
96         struct task_struct *owner;
97 } __attribute__((preserve_access_index));
98
99 struct rw_semaphore___new {
100         atomic_long_t owner;
101 } __attribute__((preserve_access_index));
102
103 struct mm_struct___old {
104         struct rw_semaphore mmap_sem;
105 } __attribute__((preserve_access_index));
106
107 struct mm_struct___new {
108         struct rw_semaphore mmap_lock;
109 } __attribute__((preserve_access_index));
110
111 /* control flags */
112 int enabled;
113 int has_cpu;
114 int has_task;
115 int has_type;
116 int has_addr;
117 int needs_callstack;
118 int stack_skip;
119 int lock_owner;
120
121 /* determine the key of lock stat */
122 int aggr_mode;
123
124 /* error stat */
125 int task_fail;
126 int stack_fail;
127 int time_fail;
128 int data_fail;
129
130 int task_map_full;
131 int data_map_full;
132
133 static inline int can_record(u64 *ctx)
134 {
135         if (has_cpu) {
136                 __u32 cpu = bpf_get_smp_processor_id();
137                 __u8 *ok;
138
139                 ok = bpf_map_lookup_elem(&cpu_filter, &cpu);
140                 if (!ok)
141                         return 0;
142         }
143
144         if (has_task) {
145                 __u8 *ok;
146                 __u32 pid = bpf_get_current_pid_tgid();
147
148                 ok = bpf_map_lookup_elem(&task_filter, &pid);
149                 if (!ok)
150                         return 0;
151         }
152
153         if (has_type) {
154                 __u8 *ok;
155                 __u32 flags = (__u32)ctx[1];
156
157                 ok = bpf_map_lookup_elem(&type_filter, &flags);
158                 if (!ok)
159                         return 0;
160         }
161
162         if (has_addr) {
163                 __u8 *ok;
164                 __u64 addr = ctx[0];
165
166                 ok = bpf_map_lookup_elem(&addr_filter, &addr);
167                 if (!ok)
168                         return 0;
169         }
170
171         return 1;
172 }
173
174 static inline int update_task_data(struct task_struct *task)
175 {
176         struct contention_task_data *p;
177         int pid, err;
178
179         err = bpf_core_read(&pid, sizeof(pid), &task->pid);
180         if (err)
181                 return -1;
182
183         p = bpf_map_lookup_elem(&task_data, &pid);
184         if (p == NULL && !task_map_full) {
185                 struct contention_task_data data = {};
186
187                 BPF_CORE_READ_STR_INTO(&data.comm, task, comm);
188                 if (bpf_map_update_elem(&task_data, &pid, &data, BPF_NOEXIST) == -E2BIG)
189                         task_map_full = 1;
190         }
191
192         return 0;
193 }
194
195 #ifndef __has_builtin
196 # define __has_builtin(x) 0
197 #endif
198
199 static inline struct task_struct *get_lock_owner(__u64 lock, __u32 flags)
200 {
201         struct task_struct *task;
202         __u64 owner = 0;
203
204         if (flags & LCB_F_MUTEX) {
205                 struct mutex *mutex = (void *)lock;
206                 owner = BPF_CORE_READ(mutex, owner.counter);
207         } else if (flags == LCB_F_READ || flags == LCB_F_WRITE) {
208         /*
209          * Support for the BPF_TYPE_MATCHES argument to the
210          * __builtin_preserve_type_info builtin was added at some point during
211          * development of clang 15 and it's what is needed for
212          * bpf_core_type_matches.
213          */
214 #if __has_builtin(__builtin_preserve_type_info) && __clang_major__ >= 15
215                 if (bpf_core_type_matches(struct rw_semaphore___old)) {
216                         struct rw_semaphore___old *rwsem = (void *)lock;
217                         owner = (unsigned long)BPF_CORE_READ(rwsem, owner);
218                 } else if (bpf_core_type_matches(struct rw_semaphore___new)) {
219                         struct rw_semaphore___new *rwsem = (void *)lock;
220                         owner = BPF_CORE_READ(rwsem, owner.counter);
221                 }
222 #else
223                 /* assume new struct */
224                 struct rw_semaphore *rwsem = (void *)lock;
225                 owner = BPF_CORE_READ(rwsem, owner.counter);
226 #endif
227         }
228
229         if (!owner)
230                 return NULL;
231
232         task = (void *)(owner & ~7UL);
233         return task;
234 }
235
236 static inline __u32 check_lock_type(__u64 lock, __u32 flags)
237 {
238         struct task_struct *curr;
239         struct mm_struct___old *mm_old;
240         struct mm_struct___new *mm_new;
241
242         switch (flags) {
243         case LCB_F_READ:  /* rwsem */
244         case LCB_F_WRITE:
245                 curr = bpf_get_current_task_btf();
246                 if (curr->mm == NULL)
247                         break;
248                 mm_new = (void *)curr->mm;
249                 if (bpf_core_field_exists(mm_new->mmap_lock)) {
250                         if (&mm_new->mmap_lock == (void *)lock)
251                                 return LCD_F_MMAP_LOCK;
252                         break;
253                 }
254                 mm_old = (void *)curr->mm;
255                 if (bpf_core_field_exists(mm_old->mmap_sem)) {
256                         if (&mm_old->mmap_sem == (void *)lock)
257                                 return LCD_F_MMAP_LOCK;
258                 }
259                 break;
260         case LCB_F_SPIN:  /* spinlock */
261                 curr = bpf_get_current_task_btf();
262                 if (&curr->sighand->siglock == (void *)lock)
263                         return LCD_F_SIGHAND_LOCK;
264                 break;
265         default:
266                 break;
267         }
268         return 0;
269 }
270
271 SEC("tp_btf/contention_begin")
272 int contention_begin(u64 *ctx)
273 {
274         __u32 pid;
275         struct tstamp_data *pelem;
276
277         if (!enabled || !can_record(ctx))
278                 return 0;
279
280         pid = bpf_get_current_pid_tgid();
281         pelem = bpf_map_lookup_elem(&tstamp, &pid);
282         if (pelem && pelem->lock)
283                 return 0;
284
285         if (pelem == NULL) {
286                 struct tstamp_data zero = {};
287
288                 bpf_map_update_elem(&tstamp, &pid, &zero, BPF_ANY);
289                 pelem = bpf_map_lookup_elem(&tstamp, &pid);
290                 if (pelem == NULL) {
291                         __sync_fetch_and_add(&task_fail, 1);
292                         return 0;
293                 }
294         }
295
296         pelem->timestamp = bpf_ktime_get_ns();
297         pelem->lock = (__u64)ctx[0];
298         pelem->flags = (__u32)ctx[1];
299
300         if (needs_callstack) {
301                 pelem->stack_id = bpf_get_stackid(ctx, &stacks,
302                                                   BPF_F_FAST_STACK_CMP | stack_skip);
303                 if (pelem->stack_id < 0)
304                         __sync_fetch_and_add(&stack_fail, 1);
305         } else if (aggr_mode == LOCK_AGGR_TASK) {
306                 struct task_struct *task;
307
308                 if (lock_owner) {
309                         task = get_lock_owner(pelem->lock, pelem->flags);
310
311                         /* The flags is not used anymore.  Pass the owner pid. */
312                         if (task)
313                                 pelem->flags = BPF_CORE_READ(task, pid);
314                         else
315                                 pelem->flags = -1U;
316
317                 } else {
318                         task = bpf_get_current_task_btf();
319                 }
320
321                 if (task) {
322                         if (update_task_data(task) < 0 && lock_owner)
323                                 pelem->flags = -1U;
324                 }
325         }
326
327         return 0;
328 }
329
330 SEC("tp_btf/contention_end")
331 int contention_end(u64 *ctx)
332 {
333         __u32 pid;
334         struct tstamp_data *pelem;
335         struct contention_key key = {};
336         struct contention_data *data;
337         __u64 duration;
338
339         if (!enabled)
340                 return 0;
341
342         pid = bpf_get_current_pid_tgid();
343         pelem = bpf_map_lookup_elem(&tstamp, &pid);
344         if (!pelem || pelem->lock != ctx[0])
345                 return 0;
346
347         duration = bpf_ktime_get_ns() - pelem->timestamp;
348         if ((__s64)duration < 0) {
349                 bpf_map_delete_elem(&tstamp, &pid);
350                 __sync_fetch_and_add(&time_fail, 1);
351                 return 0;
352         }
353
354         switch (aggr_mode) {
355         case LOCK_AGGR_CALLER:
356                 key.stack_id = pelem->stack_id;
357                 break;
358         case LOCK_AGGR_TASK:
359                 if (lock_owner)
360                         key.pid = pelem->flags;
361                 else
362                         key.pid = pid;
363                 if (needs_callstack)
364                         key.stack_id = pelem->stack_id;
365                 break;
366         case LOCK_AGGR_ADDR:
367                 key.lock_addr = pelem->lock;
368                 if (needs_callstack)
369                         key.stack_id = pelem->stack_id;
370                 break;
371         default:
372                 /* should not happen */
373                 return 0;
374         }
375
376         data = bpf_map_lookup_elem(&lock_stat, &key);
377         if (!data) {
378                 if (data_map_full) {
379                         bpf_map_delete_elem(&tstamp, &pid);
380                         __sync_fetch_and_add(&data_fail, 1);
381                         return 0;
382                 }
383
384                 struct contention_data first = {
385                         .total_time = duration,
386                         .max_time = duration,
387                         .min_time = duration,
388                         .count = 1,
389                         .flags = pelem->flags,
390                 };
391                 int err;
392
393                 if (aggr_mode == LOCK_AGGR_ADDR)
394                         first.flags |= check_lock_type(pelem->lock, pelem->flags);
395
396                 err = bpf_map_update_elem(&lock_stat, &key, &first, BPF_NOEXIST);
397                 if (err < 0) {
398                         if (err == -E2BIG)
399                                 data_map_full = 1;
400                         __sync_fetch_and_add(&data_fail, 1);
401                 }
402                 bpf_map_delete_elem(&tstamp, &pid);
403                 return 0;
404         }
405
406         __sync_fetch_and_add(&data->total_time, duration);
407         __sync_fetch_and_add(&data->count, 1);
408
409         /* FIXME: need atomic operations */
410         if (data->max_time < duration)
411                 data->max_time = duration;
412         if (data->min_time > duration)
413                 data->min_time = duration;
414
415         bpf_map_delete_elem(&tstamp, &pid);
416         return 0;
417 }
418
419 extern struct rq runqueues __ksym;
420
421 struct rq___old {
422         raw_spinlock_t lock;
423 } __attribute__((preserve_access_index));
424
425 struct rq___new {
426         raw_spinlock_t __lock;
427 } __attribute__((preserve_access_index));
428
429 SEC("raw_tp/bpf_test_finish")
430 int BPF_PROG(collect_lock_syms)
431 {
432         __u64 lock_addr, lock_off;
433         __u32 lock_flag;
434
435         if (bpf_core_field_exists(struct rq___new, __lock))
436                 lock_off = offsetof(struct rq___new, __lock);
437         else
438                 lock_off = offsetof(struct rq___old, lock);
439
440         for (int i = 0; i < MAX_CPUS; i++) {
441                 struct rq *rq = bpf_per_cpu_ptr(&runqueues, i);
442
443                 if (rq == NULL)
444                         break;
445
446                 lock_addr = (__u64)(void *)rq + lock_off;
447                 lock_flag = LOCK_CLASS_RQLOCK;
448                 bpf_map_update_elem(&lock_syms, &lock_addr, &lock_flag, BPF_ANY);
449         }
450         return 0;
451 }
452
453 char LICENSE[] SEC("license") = "Dual BSD/GPL";