Merge tag 'pull-nfsd-fix' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs
[platform/kernel/linux-starfive.git] / tools / perf / builtin-lock.c
1 // SPDX-License-Identifier: GPL-2.0
2 #include <errno.h>
3 #include <inttypes.h>
4 #include "builtin.h"
5 #include "perf.h"
6
7 #include "util/evlist.h" // for struct evsel_str_handler
8 #include "util/evsel.h"
9 #include "util/symbol.h"
10 #include "util/thread.h"
11 #include "util/header.h"
12 #include "util/target.h"
13 #include "util/callchain.h"
14 #include "util/lock-contention.h"
15 #include "util/bpf_skel/lock_data.h"
16
17 #include <subcmd/pager.h>
18 #include <subcmd/parse-options.h>
19 #include "util/trace-event.h"
20 #include "util/tracepoint.h"
21
22 #include "util/debug.h"
23 #include "util/session.h"
24 #include "util/tool.h"
25 #include "util/data.h"
26 #include "util/string2.h"
27 #include "util/map.h"
28 #include "util/util.h"
29
30 #include <stdio.h>
31 #include <sys/types.h>
32 #include <sys/prctl.h>
33 #include <semaphore.h>
34 #include <math.h>
35 #include <limits.h>
36 #include <ctype.h>
37
38 #include <linux/list.h>
39 #include <linux/hash.h>
40 #include <linux/kernel.h>
41 #include <linux/zalloc.h>
42 #include <linux/err.h>
43 #include <linux/stringify.h>
44
45 static struct perf_session *session;
46 static struct target target;
47
48 /* based on kernel/lockdep.c */
49 #define LOCKHASH_BITS           12
50 #define LOCKHASH_SIZE           (1UL << LOCKHASH_BITS)
51
52 static struct hlist_head *lockhash_table;
53
54 #define __lockhashfn(key)       hash_long((unsigned long)key, LOCKHASH_BITS)
55 #define lockhashentry(key)      (lockhash_table + __lockhashfn((key)))
56
57 static struct rb_root           thread_stats;
58
59 static bool combine_locks;
60 static bool show_thread_stats;
61 static bool show_lock_addrs;
62 static bool show_lock_owner;
63 static bool use_bpf;
64 static unsigned long bpf_map_entries = MAX_ENTRIES;
65 static int max_stack_depth = CONTENTION_STACK_DEPTH;
66 static int stack_skip = CONTENTION_STACK_SKIP;
67 static int print_nr_entries = INT_MAX / 2;
68 static LIST_HEAD(callstack_filters);
69 static const char *output_name = NULL;
70 static FILE *lock_output;
71
72 struct callstack_filter {
73         struct list_head list;
74         char name[];
75 };
76
77 static struct lock_filter filters;
78
79 static enum lock_aggr_mode aggr_mode = LOCK_AGGR_ADDR;
80
81 static bool needs_callstack(void)
82 {
83         return !list_empty(&callstack_filters);
84 }
85
86 static struct thread_stat *thread_stat_find(u32 tid)
87 {
88         struct rb_node *node;
89         struct thread_stat *st;
90
91         node = thread_stats.rb_node;
92         while (node) {
93                 st = container_of(node, struct thread_stat, rb);
94                 if (st->tid == tid)
95                         return st;
96                 else if (tid < st->tid)
97                         node = node->rb_left;
98                 else
99                         node = node->rb_right;
100         }
101
102         return NULL;
103 }
104
105 static void thread_stat_insert(struct thread_stat *new)
106 {
107         struct rb_node **rb = &thread_stats.rb_node;
108         struct rb_node *parent = NULL;
109         struct thread_stat *p;
110
111         while (*rb) {
112                 p = container_of(*rb, struct thread_stat, rb);
113                 parent = *rb;
114
115                 if (new->tid < p->tid)
116                         rb = &(*rb)->rb_left;
117                 else if (new->tid > p->tid)
118                         rb = &(*rb)->rb_right;
119                 else
120                         BUG_ON("inserting invalid thread_stat\n");
121         }
122
123         rb_link_node(&new->rb, parent, rb);
124         rb_insert_color(&new->rb, &thread_stats);
125 }
126
127 static struct thread_stat *thread_stat_findnew_after_first(u32 tid)
128 {
129         struct thread_stat *st;
130
131         st = thread_stat_find(tid);
132         if (st)
133                 return st;
134
135         st = zalloc(sizeof(struct thread_stat));
136         if (!st) {
137                 pr_err("memory allocation failed\n");
138                 return NULL;
139         }
140
141         st->tid = tid;
142         INIT_LIST_HEAD(&st->seq_list);
143
144         thread_stat_insert(st);
145
146         return st;
147 }
148
149 static struct thread_stat *thread_stat_findnew_first(u32 tid);
150 static struct thread_stat *(*thread_stat_findnew)(u32 tid) =
151         thread_stat_findnew_first;
152
153 static struct thread_stat *thread_stat_findnew_first(u32 tid)
154 {
155         struct thread_stat *st;
156
157         st = zalloc(sizeof(struct thread_stat));
158         if (!st) {
159                 pr_err("memory allocation failed\n");
160                 return NULL;
161         }
162         st->tid = tid;
163         INIT_LIST_HEAD(&st->seq_list);
164
165         rb_link_node(&st->rb, NULL, &thread_stats.rb_node);
166         rb_insert_color(&st->rb, &thread_stats);
167
168         thread_stat_findnew = thread_stat_findnew_after_first;
169         return st;
170 }
171
172 /* build simple key function one is bigger than two */
173 #define SINGLE_KEY(member)                                              \
174         static int lock_stat_key_ ## member(struct lock_stat *one,      \
175                                          struct lock_stat *two)         \
176         {                                                               \
177                 return one->member > two->member;                       \
178         }
179
180 SINGLE_KEY(nr_acquired)
181 SINGLE_KEY(nr_contended)
182 SINGLE_KEY(avg_wait_time)
183 SINGLE_KEY(wait_time_total)
184 SINGLE_KEY(wait_time_max)
185
186 static int lock_stat_key_wait_time_min(struct lock_stat *one,
187                                         struct lock_stat *two)
188 {
189         u64 s1 = one->wait_time_min;
190         u64 s2 = two->wait_time_min;
191         if (s1 == ULLONG_MAX)
192                 s1 = 0;
193         if (s2 == ULLONG_MAX)
194                 s2 = 0;
195         return s1 > s2;
196 }
197
198 struct lock_key {
199         /*
200          * name: the value for specify by user
201          * this should be simpler than raw name of member
202          * e.g. nr_acquired -> acquired, wait_time_total -> wait_total
203          */
204         const char              *name;
205         /* header: the string printed on the header line */
206         const char              *header;
207         /* len: the printing width of the field */
208         int                     len;
209         /* key: a pointer to function to compare two lock stats for sorting */
210         int                     (*key)(struct lock_stat*, struct lock_stat*);
211         /* print: a pointer to function to print a given lock stats */
212         void                    (*print)(struct lock_key*, struct lock_stat*);
213         /* list: list entry to link this */
214         struct list_head        list;
215 };
216
217 static void lock_stat_key_print_time(unsigned long long nsec, int len)
218 {
219         static const struct {
220                 float base;
221                 const char *unit;
222         } table[] = {
223                 { 1e9 * 3600, "h " },
224                 { 1e9 * 60, "m " },
225                 { 1e9, "s " },
226                 { 1e6, "ms" },
227                 { 1e3, "us" },
228                 { 0, NULL },
229         };
230
231         /* for CSV output */
232         if (len == 0) {
233                 fprintf(lock_output, "%llu", nsec);
234                 return;
235         }
236
237         for (int i = 0; table[i].unit; i++) {
238                 if (nsec < table[i].base)
239                         continue;
240
241                 fprintf(lock_output, "%*.2f %s", len - 3, nsec / table[i].base, table[i].unit);
242                 return;
243         }
244
245         fprintf(lock_output, "%*llu %s", len - 3, nsec, "ns");
246 }
247
248 #define PRINT_KEY(member)                                               \
249 static void lock_stat_key_print_ ## member(struct lock_key *key,        \
250                                            struct lock_stat *ls)        \
251 {                                                                       \
252         fprintf(lock_output, "%*llu", key->len, (unsigned long long)ls->member);\
253 }
254
255 #define PRINT_TIME(member)                                              \
256 static void lock_stat_key_print_ ## member(struct lock_key *key,        \
257                                            struct lock_stat *ls)        \
258 {                                                                       \
259         lock_stat_key_print_time((unsigned long long)ls->member, key->len);     \
260 }
261
262 PRINT_KEY(nr_acquired)
263 PRINT_KEY(nr_contended)
264 PRINT_TIME(avg_wait_time)
265 PRINT_TIME(wait_time_total)
266 PRINT_TIME(wait_time_max)
267
268 static void lock_stat_key_print_wait_time_min(struct lock_key *key,
269                                               struct lock_stat *ls)
270 {
271         u64 wait_time = ls->wait_time_min;
272
273         if (wait_time == ULLONG_MAX)
274                 wait_time = 0;
275
276         lock_stat_key_print_time(wait_time, key->len);
277 }
278
279
280 static const char               *sort_key = "acquired";
281
282 static int                      (*compare)(struct lock_stat *, struct lock_stat *);
283
284 static struct rb_root           sorted; /* place to store intermediate data */
285 static struct rb_root           result; /* place to store sorted data */
286
287 static LIST_HEAD(lock_keys);
288 static const char               *output_fields;
289
290 #define DEF_KEY_LOCK(name, header, fn_suffix, len)                      \
291         { #name, header, len, lock_stat_key_ ## fn_suffix, lock_stat_key_print_ ## fn_suffix, {} }
292 static struct lock_key report_keys[] = {
293         DEF_KEY_LOCK(acquired, "acquired", nr_acquired, 10),
294         DEF_KEY_LOCK(contended, "contended", nr_contended, 10),
295         DEF_KEY_LOCK(avg_wait, "avg wait", avg_wait_time, 12),
296         DEF_KEY_LOCK(wait_total, "total wait", wait_time_total, 12),
297         DEF_KEY_LOCK(wait_max, "max wait", wait_time_max, 12),
298         DEF_KEY_LOCK(wait_min, "min wait", wait_time_min, 12),
299
300         /* extra comparisons much complicated should be here */
301         { }
302 };
303
304 static struct lock_key contention_keys[] = {
305         DEF_KEY_LOCK(contended, "contended", nr_contended, 10),
306         DEF_KEY_LOCK(wait_total, "total wait", wait_time_total, 12),
307         DEF_KEY_LOCK(wait_max, "max wait", wait_time_max, 12),
308         DEF_KEY_LOCK(wait_min, "min wait", wait_time_min, 12),
309         DEF_KEY_LOCK(avg_wait, "avg wait", avg_wait_time, 12),
310
311         /* extra comparisons much complicated should be here */
312         { }
313 };
314
315 static int select_key(bool contention)
316 {
317         int i;
318         struct lock_key *keys = report_keys;
319
320         if (contention)
321                 keys = contention_keys;
322
323         for (i = 0; keys[i].name; i++) {
324                 if (!strcmp(keys[i].name, sort_key)) {
325                         compare = keys[i].key;
326
327                         /* selected key should be in the output fields */
328                         if (list_empty(&keys[i].list))
329                                 list_add_tail(&keys[i].list, &lock_keys);
330
331                         return 0;
332                 }
333         }
334
335         pr_err("Unknown compare key: %s\n", sort_key);
336         return -1;
337 }
338
339 static int add_output_field(bool contention, char *name)
340 {
341         int i;
342         struct lock_key *keys = report_keys;
343
344         if (contention)
345                 keys = contention_keys;
346
347         for (i = 0; keys[i].name; i++) {
348                 if (strcmp(keys[i].name, name))
349                         continue;
350
351                 /* prevent double link */
352                 if (list_empty(&keys[i].list))
353                         list_add_tail(&keys[i].list, &lock_keys);
354
355                 return 0;
356         }
357
358         pr_err("Unknown output field: %s\n", name);
359         return -1;
360 }
361
362 static int setup_output_field(bool contention, const char *str)
363 {
364         char *tok, *tmp, *orig;
365         int i, ret = 0;
366         struct lock_key *keys = report_keys;
367
368         if (contention)
369                 keys = contention_keys;
370
371         /* no output field given: use all of them */
372         if (str == NULL) {
373                 for (i = 0; keys[i].name; i++)
374                         list_add_tail(&keys[i].list, &lock_keys);
375                 return 0;
376         }
377
378         for (i = 0; keys[i].name; i++)
379                 INIT_LIST_HEAD(&keys[i].list);
380
381         orig = tmp = strdup(str);
382         if (orig == NULL)
383                 return -ENOMEM;
384
385         while ((tok = strsep(&tmp, ",")) != NULL){
386                 ret = add_output_field(contention, tok);
387                 if (ret < 0)
388                         break;
389         }
390         free(orig);
391
392         return ret;
393 }
394
395 static void combine_lock_stats(struct lock_stat *st)
396 {
397         struct rb_node **rb = &sorted.rb_node;
398         struct rb_node *parent = NULL;
399         struct lock_stat *p;
400         int ret;
401
402         while (*rb) {
403                 p = container_of(*rb, struct lock_stat, rb);
404                 parent = *rb;
405
406                 if (st->name && p->name)
407                         ret = strcmp(st->name, p->name);
408                 else
409                         ret = !!st->name - !!p->name;
410
411                 if (ret == 0) {
412                         p->nr_acquired += st->nr_acquired;
413                         p->nr_contended += st->nr_contended;
414                         p->wait_time_total += st->wait_time_total;
415
416                         if (p->nr_contended)
417                                 p->avg_wait_time = p->wait_time_total / p->nr_contended;
418
419                         if (p->wait_time_min > st->wait_time_min)
420                                 p->wait_time_min = st->wait_time_min;
421                         if (p->wait_time_max < st->wait_time_max)
422                                 p->wait_time_max = st->wait_time_max;
423
424                         p->broken |= st->broken;
425                         st->combined = 1;
426                         return;
427                 }
428
429                 if (ret < 0)
430                         rb = &(*rb)->rb_left;
431                 else
432                         rb = &(*rb)->rb_right;
433         }
434
435         rb_link_node(&st->rb, parent, rb);
436         rb_insert_color(&st->rb, &sorted);
437 }
438
439 static void insert_to_result(struct lock_stat *st,
440                              int (*bigger)(struct lock_stat *, struct lock_stat *))
441 {
442         struct rb_node **rb = &result.rb_node;
443         struct rb_node *parent = NULL;
444         struct lock_stat *p;
445
446         if (combine_locks && st->combined)
447                 return;
448
449         while (*rb) {
450                 p = container_of(*rb, struct lock_stat, rb);
451                 parent = *rb;
452
453                 if (bigger(st, p))
454                         rb = &(*rb)->rb_left;
455                 else
456                         rb = &(*rb)->rb_right;
457         }
458
459         rb_link_node(&st->rb, parent, rb);
460         rb_insert_color(&st->rb, &result);
461 }
462
463 /* returns left most element of result, and erase it */
464 static struct lock_stat *pop_from_result(void)
465 {
466         struct rb_node *node = result.rb_node;
467
468         if (!node)
469                 return NULL;
470
471         while (node->rb_left)
472                 node = node->rb_left;
473
474         rb_erase(node, &result);
475         return container_of(node, struct lock_stat, rb);
476 }
477
478 struct lock_stat *lock_stat_find(u64 addr)
479 {
480         struct hlist_head *entry = lockhashentry(addr);
481         struct lock_stat *ret;
482
483         hlist_for_each_entry(ret, entry, hash_entry) {
484                 if (ret->addr == addr)
485                         return ret;
486         }
487         return NULL;
488 }
489
490 struct lock_stat *lock_stat_findnew(u64 addr, const char *name, int flags)
491 {
492         struct hlist_head *entry = lockhashentry(addr);
493         struct lock_stat *ret, *new;
494
495         hlist_for_each_entry(ret, entry, hash_entry) {
496                 if (ret->addr == addr)
497                         return ret;
498         }
499
500         new = zalloc(sizeof(struct lock_stat));
501         if (!new)
502                 goto alloc_failed;
503
504         new->addr = addr;
505         new->name = strdup(name);
506         if (!new->name) {
507                 free(new);
508                 goto alloc_failed;
509         }
510
511         new->flags = flags;
512         new->wait_time_min = ULLONG_MAX;
513
514         hlist_add_head(&new->hash_entry, entry);
515         return new;
516
517 alloc_failed:
518         pr_err("memory allocation failed\n");
519         return NULL;
520 }
521
522 bool match_callstack_filter(struct machine *machine, u64 *callstack)
523 {
524         struct map *kmap;
525         struct symbol *sym;
526         u64 ip;
527
528         if (list_empty(&callstack_filters))
529                 return true;
530
531         for (int i = 0; i < max_stack_depth; i++) {
532                 struct callstack_filter *filter;
533
534                 if (!callstack || !callstack[i])
535                         break;
536
537                 ip = callstack[i];
538                 sym = machine__find_kernel_symbol(machine, ip, &kmap);
539                 if (sym == NULL)
540                         continue;
541
542                 list_for_each_entry(filter, &callstack_filters, list) {
543                         if (strstr(sym->name, filter->name))
544                                 return true;
545                 }
546         }
547         return false;
548 }
549
550 struct trace_lock_handler {
551         /* it's used on CONFIG_LOCKDEP */
552         int (*acquire_event)(struct evsel *evsel,
553                              struct perf_sample *sample);
554
555         /* it's used on CONFIG_LOCKDEP && CONFIG_LOCK_STAT */
556         int (*acquired_event)(struct evsel *evsel,
557                               struct perf_sample *sample);
558
559         /* it's used on CONFIG_LOCKDEP && CONFIG_LOCK_STAT */
560         int (*contended_event)(struct evsel *evsel,
561                                struct perf_sample *sample);
562
563         /* it's used on CONFIG_LOCKDEP */
564         int (*release_event)(struct evsel *evsel,
565                              struct perf_sample *sample);
566
567         /* it's used when CONFIG_LOCKDEP is off */
568         int (*contention_begin_event)(struct evsel *evsel,
569                                       struct perf_sample *sample);
570
571         /* it's used when CONFIG_LOCKDEP is off */
572         int (*contention_end_event)(struct evsel *evsel,
573                                     struct perf_sample *sample);
574 };
575
576 static struct lock_seq_stat *get_seq(struct thread_stat *ts, u64 addr)
577 {
578         struct lock_seq_stat *seq;
579
580         list_for_each_entry(seq, &ts->seq_list, list) {
581                 if (seq->addr == addr)
582                         return seq;
583         }
584
585         seq = zalloc(sizeof(struct lock_seq_stat));
586         if (!seq) {
587                 pr_err("memory allocation failed\n");
588                 return NULL;
589         }
590         seq->state = SEQ_STATE_UNINITIALIZED;
591         seq->addr = addr;
592
593         list_add(&seq->list, &ts->seq_list);
594         return seq;
595 }
596
597 enum broken_state {
598         BROKEN_ACQUIRE,
599         BROKEN_ACQUIRED,
600         BROKEN_CONTENDED,
601         BROKEN_RELEASE,
602         BROKEN_MAX,
603 };
604
605 static int bad_hist[BROKEN_MAX];
606
607 enum acquire_flags {
608         TRY_LOCK = 1,
609         READ_LOCK = 2,
610 };
611
612 static int get_key_by_aggr_mode_simple(u64 *key, u64 addr, u32 tid)
613 {
614         switch (aggr_mode) {
615         case LOCK_AGGR_ADDR:
616                 *key = addr;
617                 break;
618         case LOCK_AGGR_TASK:
619                 *key = tid;
620                 break;
621         case LOCK_AGGR_CALLER:
622         default:
623                 pr_err("Invalid aggregation mode: %d\n", aggr_mode);
624                 return -EINVAL;
625         }
626         return 0;
627 }
628
629 static u64 callchain_id(struct evsel *evsel, struct perf_sample *sample);
630
631 static int get_key_by_aggr_mode(u64 *key, u64 addr, struct evsel *evsel,
632                                  struct perf_sample *sample)
633 {
634         if (aggr_mode == LOCK_AGGR_CALLER) {
635                 *key = callchain_id(evsel, sample);
636                 return 0;
637         }
638         return get_key_by_aggr_mode_simple(key, addr, sample->tid);
639 }
640
641 static int report_lock_acquire_event(struct evsel *evsel,
642                                      struct perf_sample *sample)
643 {
644         struct lock_stat *ls;
645         struct thread_stat *ts;
646         struct lock_seq_stat *seq;
647         const char *name = evsel__strval(evsel, sample, "name");
648         u64 addr = evsel__intval(evsel, sample, "lockdep_addr");
649         int flag = evsel__intval(evsel, sample, "flags");
650         u64 key;
651         int ret;
652
653         ret = get_key_by_aggr_mode_simple(&key, addr, sample->tid);
654         if (ret < 0)
655                 return ret;
656
657         ls = lock_stat_findnew(key, name, 0);
658         if (!ls)
659                 return -ENOMEM;
660
661         ts = thread_stat_findnew(sample->tid);
662         if (!ts)
663                 return -ENOMEM;
664
665         seq = get_seq(ts, addr);
666         if (!seq)
667                 return -ENOMEM;
668
669         switch (seq->state) {
670         case SEQ_STATE_UNINITIALIZED:
671         case SEQ_STATE_RELEASED:
672                 if (!flag) {
673                         seq->state = SEQ_STATE_ACQUIRING;
674                 } else {
675                         if (flag & TRY_LOCK)
676                                 ls->nr_trylock++;
677                         if (flag & READ_LOCK)
678                                 ls->nr_readlock++;
679                         seq->state = SEQ_STATE_READ_ACQUIRED;
680                         seq->read_count = 1;
681                         ls->nr_acquired++;
682                 }
683                 break;
684         case SEQ_STATE_READ_ACQUIRED:
685                 if (flag & READ_LOCK) {
686                         seq->read_count++;
687                         ls->nr_acquired++;
688                         goto end;
689                 } else {
690                         goto broken;
691                 }
692                 break;
693         case SEQ_STATE_ACQUIRED:
694         case SEQ_STATE_ACQUIRING:
695         case SEQ_STATE_CONTENDED:
696 broken:
697                 /* broken lock sequence */
698                 if (!ls->broken) {
699                         ls->broken = 1;
700                         bad_hist[BROKEN_ACQUIRE]++;
701                 }
702                 list_del_init(&seq->list);
703                 free(seq);
704                 goto end;
705         default:
706                 BUG_ON("Unknown state of lock sequence found!\n");
707                 break;
708         }
709
710         ls->nr_acquire++;
711         seq->prev_event_time = sample->time;
712 end:
713         return 0;
714 }
715
716 static int report_lock_acquired_event(struct evsel *evsel,
717                                       struct perf_sample *sample)
718 {
719         struct lock_stat *ls;
720         struct thread_stat *ts;
721         struct lock_seq_stat *seq;
722         u64 contended_term;
723         const char *name = evsel__strval(evsel, sample, "name");
724         u64 addr = evsel__intval(evsel, sample, "lockdep_addr");
725         u64 key;
726         int ret;
727
728         ret = get_key_by_aggr_mode_simple(&key, addr, sample->tid);
729         if (ret < 0)
730                 return ret;
731
732         ls = lock_stat_findnew(key, name, 0);
733         if (!ls)
734                 return -ENOMEM;
735
736         ts = thread_stat_findnew(sample->tid);
737         if (!ts)
738                 return -ENOMEM;
739
740         seq = get_seq(ts, addr);
741         if (!seq)
742                 return -ENOMEM;
743
744         switch (seq->state) {
745         case SEQ_STATE_UNINITIALIZED:
746                 /* orphan event, do nothing */
747                 return 0;
748         case SEQ_STATE_ACQUIRING:
749                 break;
750         case SEQ_STATE_CONTENDED:
751                 contended_term = sample->time - seq->prev_event_time;
752                 ls->wait_time_total += contended_term;
753                 if (contended_term < ls->wait_time_min)
754                         ls->wait_time_min = contended_term;
755                 if (ls->wait_time_max < contended_term)
756                         ls->wait_time_max = contended_term;
757                 break;
758         case SEQ_STATE_RELEASED:
759         case SEQ_STATE_ACQUIRED:
760         case SEQ_STATE_READ_ACQUIRED:
761                 /* broken lock sequence */
762                 if (!ls->broken) {
763                         ls->broken = 1;
764                         bad_hist[BROKEN_ACQUIRED]++;
765                 }
766                 list_del_init(&seq->list);
767                 free(seq);
768                 goto end;
769         default:
770                 BUG_ON("Unknown state of lock sequence found!\n");
771                 break;
772         }
773
774         seq->state = SEQ_STATE_ACQUIRED;
775         ls->nr_acquired++;
776         ls->avg_wait_time = ls->nr_contended ? ls->wait_time_total/ls->nr_contended : 0;
777         seq->prev_event_time = sample->time;
778 end:
779         return 0;
780 }
781
782 static int report_lock_contended_event(struct evsel *evsel,
783                                        struct perf_sample *sample)
784 {
785         struct lock_stat *ls;
786         struct thread_stat *ts;
787         struct lock_seq_stat *seq;
788         const char *name = evsel__strval(evsel, sample, "name");
789         u64 addr = evsel__intval(evsel, sample, "lockdep_addr");
790         u64 key;
791         int ret;
792
793         ret = get_key_by_aggr_mode_simple(&key, addr, sample->tid);
794         if (ret < 0)
795                 return ret;
796
797         ls = lock_stat_findnew(key, name, 0);
798         if (!ls)
799                 return -ENOMEM;
800
801         ts = thread_stat_findnew(sample->tid);
802         if (!ts)
803                 return -ENOMEM;
804
805         seq = get_seq(ts, addr);
806         if (!seq)
807                 return -ENOMEM;
808
809         switch (seq->state) {
810         case SEQ_STATE_UNINITIALIZED:
811                 /* orphan event, do nothing */
812                 return 0;
813         case SEQ_STATE_ACQUIRING:
814                 break;
815         case SEQ_STATE_RELEASED:
816         case SEQ_STATE_ACQUIRED:
817         case SEQ_STATE_READ_ACQUIRED:
818         case SEQ_STATE_CONTENDED:
819                 /* broken lock sequence */
820                 if (!ls->broken) {
821                         ls->broken = 1;
822                         bad_hist[BROKEN_CONTENDED]++;
823                 }
824                 list_del_init(&seq->list);
825                 free(seq);
826                 goto end;
827         default:
828                 BUG_ON("Unknown state of lock sequence found!\n");
829                 break;
830         }
831
832         seq->state = SEQ_STATE_CONTENDED;
833         ls->nr_contended++;
834         ls->avg_wait_time = ls->wait_time_total/ls->nr_contended;
835         seq->prev_event_time = sample->time;
836 end:
837         return 0;
838 }
839
840 static int report_lock_release_event(struct evsel *evsel,
841                                      struct perf_sample *sample)
842 {
843         struct lock_stat *ls;
844         struct thread_stat *ts;
845         struct lock_seq_stat *seq;
846         const char *name = evsel__strval(evsel, sample, "name");
847         u64 addr = evsel__intval(evsel, sample, "lockdep_addr");
848         u64 key;
849         int ret;
850
851         ret = get_key_by_aggr_mode_simple(&key, addr, sample->tid);
852         if (ret < 0)
853                 return ret;
854
855         ls = lock_stat_findnew(key, name, 0);
856         if (!ls)
857                 return -ENOMEM;
858
859         ts = thread_stat_findnew(sample->tid);
860         if (!ts)
861                 return -ENOMEM;
862
863         seq = get_seq(ts, addr);
864         if (!seq)
865                 return -ENOMEM;
866
867         switch (seq->state) {
868         case SEQ_STATE_UNINITIALIZED:
869                 goto end;
870         case SEQ_STATE_ACQUIRED:
871                 break;
872         case SEQ_STATE_READ_ACQUIRED:
873                 seq->read_count--;
874                 BUG_ON(seq->read_count < 0);
875                 if (seq->read_count) {
876                         ls->nr_release++;
877                         goto end;
878                 }
879                 break;
880         case SEQ_STATE_ACQUIRING:
881         case SEQ_STATE_CONTENDED:
882         case SEQ_STATE_RELEASED:
883                 /* broken lock sequence */
884                 if (!ls->broken) {
885                         ls->broken = 1;
886                         bad_hist[BROKEN_RELEASE]++;
887                 }
888                 goto free_seq;
889         default:
890                 BUG_ON("Unknown state of lock sequence found!\n");
891                 break;
892         }
893
894         ls->nr_release++;
895 free_seq:
896         list_del_init(&seq->list);
897         free(seq);
898 end:
899         return 0;
900 }
901
902 static int get_symbol_name_offset(struct map *map, struct symbol *sym, u64 ip,
903                                   char *buf, int size)
904 {
905         u64 offset;
906
907         if (map == NULL || sym == NULL) {
908                 buf[0] = '\0';
909                 return 0;
910         }
911
912         offset = map__map_ip(map, ip) - sym->start;
913
914         if (offset)
915                 return scnprintf(buf, size, "%s+%#lx", sym->name, offset);
916         else
917                 return strlcpy(buf, sym->name, size);
918 }
919 static int lock_contention_caller(struct evsel *evsel, struct perf_sample *sample,
920                                   char *buf, int size)
921 {
922         struct thread *thread;
923         struct callchain_cursor *cursor;
924         struct machine *machine = &session->machines.host;
925         struct symbol *sym;
926         int skip = 0;
927         int ret;
928
929         /* lock names will be replaced to task name later */
930         if (show_thread_stats)
931                 return -1;
932
933         thread = machine__findnew_thread(machine, -1, sample->pid);
934         if (thread == NULL)
935                 return -1;
936
937         cursor = get_tls_callchain_cursor();
938
939         /* use caller function name from the callchain */
940         ret = thread__resolve_callchain(thread, cursor, evsel, sample,
941                                         NULL, NULL, max_stack_depth);
942         if (ret != 0) {
943                 thread__put(thread);
944                 return -1;
945         }
946
947         callchain_cursor_commit(cursor);
948         thread__put(thread);
949
950         while (true) {
951                 struct callchain_cursor_node *node;
952
953                 node = callchain_cursor_current(cursor);
954                 if (node == NULL)
955                         break;
956
957                 /* skip first few entries - for lock functions */
958                 if (++skip <= stack_skip)
959                         goto next;
960
961                 sym = node->ms.sym;
962                 if (sym && !machine__is_lock_function(machine, node->ip)) {
963                         get_symbol_name_offset(node->ms.map, sym, node->ip,
964                                                buf, size);
965                         return 0;
966                 }
967
968 next:
969                 callchain_cursor_advance(cursor);
970         }
971         return -1;
972 }
973
974 static u64 callchain_id(struct evsel *evsel, struct perf_sample *sample)
975 {
976         struct callchain_cursor *cursor;
977         struct machine *machine = &session->machines.host;
978         struct thread *thread;
979         u64 hash = 0;
980         int skip = 0;
981         int ret;
982
983         thread = machine__findnew_thread(machine, -1, sample->pid);
984         if (thread == NULL)
985                 return -1;
986
987         cursor = get_tls_callchain_cursor();
988         /* use caller function name from the callchain */
989         ret = thread__resolve_callchain(thread, cursor, evsel, sample,
990                                         NULL, NULL, max_stack_depth);
991         thread__put(thread);
992
993         if (ret != 0)
994                 return -1;
995
996         callchain_cursor_commit(cursor);
997
998         while (true) {
999                 struct callchain_cursor_node *node;
1000
1001                 node = callchain_cursor_current(cursor);
1002                 if (node == NULL)
1003                         break;
1004
1005                 /* skip first few entries - for lock functions */
1006                 if (++skip <= stack_skip)
1007                         goto next;
1008
1009                 if (node->ms.sym && machine__is_lock_function(machine, node->ip))
1010                         goto next;
1011
1012                 hash ^= hash_long((unsigned long)node->ip, 64);
1013
1014 next:
1015                 callchain_cursor_advance(cursor);
1016         }
1017         return hash;
1018 }
1019
1020 static u64 *get_callstack(struct perf_sample *sample, int max_stack)
1021 {
1022         u64 *callstack;
1023         u64 i;
1024         int c;
1025
1026         callstack = calloc(max_stack, sizeof(*callstack));
1027         if (callstack == NULL)
1028                 return NULL;
1029
1030         for (i = 0, c = 0; i < sample->callchain->nr && c < max_stack; i++) {
1031                 u64 ip = sample->callchain->ips[i];
1032
1033                 if (ip >= PERF_CONTEXT_MAX)
1034                         continue;
1035
1036                 callstack[c++] = ip;
1037         }
1038         return callstack;
1039 }
1040
1041 static int report_lock_contention_begin_event(struct evsel *evsel,
1042                                               struct perf_sample *sample)
1043 {
1044         struct lock_stat *ls;
1045         struct thread_stat *ts;
1046         struct lock_seq_stat *seq;
1047         u64 addr = evsel__intval(evsel, sample, "lock_addr");
1048         unsigned int flags = evsel__intval(evsel, sample, "flags");
1049         u64 key;
1050         int i, ret;
1051         static bool kmap_loaded;
1052         struct machine *machine = &session->machines.host;
1053         struct map *kmap;
1054         struct symbol *sym;
1055
1056         ret = get_key_by_aggr_mode(&key, addr, evsel, sample);
1057         if (ret < 0)
1058                 return ret;
1059
1060         if (!kmap_loaded) {
1061                 unsigned long *addrs;
1062
1063                 /* make sure it loads the kernel map to find lock symbols */
1064                 map__load(machine__kernel_map(machine));
1065                 kmap_loaded = true;
1066
1067                 /* convert (kernel) symbols to addresses */
1068                 for (i = 0; i < filters.nr_syms; i++) {
1069                         sym = machine__find_kernel_symbol_by_name(machine,
1070                                                                   filters.syms[i],
1071                                                                   &kmap);
1072                         if (sym == NULL) {
1073                                 pr_warning("ignore unknown symbol: %s\n",
1074                                            filters.syms[i]);
1075                                 continue;
1076                         }
1077
1078                         addrs = realloc(filters.addrs,
1079                                         (filters.nr_addrs + 1) * sizeof(*addrs));
1080                         if (addrs == NULL) {
1081                                 pr_warning("memory allocation failure\n");
1082                                 return -ENOMEM;
1083                         }
1084
1085                         addrs[filters.nr_addrs++] = map__unmap_ip(kmap, sym->start);
1086                         filters.addrs = addrs;
1087                 }
1088         }
1089
1090         ls = lock_stat_find(key);
1091         if (!ls) {
1092                 char buf[128];
1093                 const char *name = "";
1094
1095                 switch (aggr_mode) {
1096                 case LOCK_AGGR_ADDR:
1097                         sym = machine__find_kernel_symbol(machine, key, &kmap);
1098                         if (sym)
1099                                 name = sym->name;
1100                         break;
1101                 case LOCK_AGGR_CALLER:
1102                         name = buf;
1103                         if (lock_contention_caller(evsel, sample, buf, sizeof(buf)) < 0)
1104                                 name = "Unknown";
1105                         break;
1106                 case LOCK_AGGR_TASK:
1107                 default:
1108                         break;
1109                 }
1110
1111                 ls = lock_stat_findnew(key, name, flags);
1112                 if (!ls)
1113                         return -ENOMEM;
1114         }
1115
1116         if (filters.nr_types) {
1117                 bool found = false;
1118
1119                 for (i = 0; i < filters.nr_types; i++) {
1120                         if (flags == filters.types[i]) {
1121                                 found = true;
1122                                 break;
1123                         }
1124                 }
1125
1126                 if (!found)
1127                         return 0;
1128         }
1129
1130         if (filters.nr_addrs) {
1131                 bool found = false;
1132
1133                 for (i = 0; i < filters.nr_addrs; i++) {
1134                         if (addr == filters.addrs[i]) {
1135                                 found = true;
1136                                 break;
1137                         }
1138                 }
1139
1140                 if (!found)
1141                         return 0;
1142         }
1143
1144         if (needs_callstack()) {
1145                 u64 *callstack = get_callstack(sample, max_stack_depth);
1146                 if (callstack == NULL)
1147                         return -ENOMEM;
1148
1149                 if (!match_callstack_filter(machine, callstack)) {
1150                         free(callstack);
1151                         return 0;
1152                 }
1153
1154                 if (ls->callstack == NULL)
1155                         ls->callstack = callstack;
1156                 else
1157                         free(callstack);
1158         }
1159
1160         ts = thread_stat_findnew(sample->tid);
1161         if (!ts)
1162                 return -ENOMEM;
1163
1164         seq = get_seq(ts, addr);
1165         if (!seq)
1166                 return -ENOMEM;
1167
1168         switch (seq->state) {
1169         case SEQ_STATE_UNINITIALIZED:
1170         case SEQ_STATE_ACQUIRED:
1171                 break;
1172         case SEQ_STATE_CONTENDED:
1173                 /*
1174                  * It can have nested contention begin with mutex spinning,
1175                  * then we would use the original contention begin event and
1176                  * ignore the second one.
1177                  */
1178                 goto end;
1179         case SEQ_STATE_ACQUIRING:
1180         case SEQ_STATE_READ_ACQUIRED:
1181         case SEQ_STATE_RELEASED:
1182                 /* broken lock sequence */
1183                 if (!ls->broken) {
1184                         ls->broken = 1;
1185                         bad_hist[BROKEN_CONTENDED]++;
1186                 }
1187                 list_del_init(&seq->list);
1188                 free(seq);
1189                 goto end;
1190         default:
1191                 BUG_ON("Unknown state of lock sequence found!\n");
1192                 break;
1193         }
1194
1195         if (seq->state != SEQ_STATE_CONTENDED) {
1196                 seq->state = SEQ_STATE_CONTENDED;
1197                 seq->prev_event_time = sample->time;
1198                 ls->nr_contended++;
1199         }
1200 end:
1201         return 0;
1202 }
1203
1204 static int report_lock_contention_end_event(struct evsel *evsel,
1205                                             struct perf_sample *sample)
1206 {
1207         struct lock_stat *ls;
1208         struct thread_stat *ts;
1209         struct lock_seq_stat *seq;
1210         u64 contended_term;
1211         u64 addr = evsel__intval(evsel, sample, "lock_addr");
1212         u64 key;
1213         int ret;
1214
1215         ret = get_key_by_aggr_mode(&key, addr, evsel, sample);
1216         if (ret < 0)
1217                 return ret;
1218
1219         ls = lock_stat_find(key);
1220         if (!ls)
1221                 return 0;
1222
1223         ts = thread_stat_find(sample->tid);
1224         if (!ts)
1225                 return 0;
1226
1227         seq = get_seq(ts, addr);
1228         if (!seq)
1229                 return -ENOMEM;
1230
1231         switch (seq->state) {
1232         case SEQ_STATE_UNINITIALIZED:
1233                 goto end;
1234         case SEQ_STATE_CONTENDED:
1235                 contended_term = sample->time - seq->prev_event_time;
1236                 ls->wait_time_total += contended_term;
1237                 if (contended_term < ls->wait_time_min)
1238                         ls->wait_time_min = contended_term;
1239                 if (ls->wait_time_max < contended_term)
1240                         ls->wait_time_max = contended_term;
1241                 break;
1242         case SEQ_STATE_ACQUIRING:
1243         case SEQ_STATE_ACQUIRED:
1244         case SEQ_STATE_READ_ACQUIRED:
1245         case SEQ_STATE_RELEASED:
1246                 /* broken lock sequence */
1247                 if (!ls->broken) {
1248                         ls->broken = 1;
1249                         bad_hist[BROKEN_ACQUIRED]++;
1250                 }
1251                 list_del_init(&seq->list);
1252                 free(seq);
1253                 goto end;
1254         default:
1255                 BUG_ON("Unknown state of lock sequence found!\n");
1256                 break;
1257         }
1258
1259         seq->state = SEQ_STATE_ACQUIRED;
1260         ls->nr_acquired++;
1261         ls->avg_wait_time = ls->wait_time_total/ls->nr_acquired;
1262 end:
1263         return 0;
1264 }
1265
1266 /* lock oriented handlers */
1267 /* TODO: handlers for CPU oriented, thread oriented */
1268 static struct trace_lock_handler report_lock_ops  = {
1269         .acquire_event          = report_lock_acquire_event,
1270         .acquired_event         = report_lock_acquired_event,
1271         .contended_event        = report_lock_contended_event,
1272         .release_event          = report_lock_release_event,
1273         .contention_begin_event = report_lock_contention_begin_event,
1274         .contention_end_event   = report_lock_contention_end_event,
1275 };
1276
1277 static struct trace_lock_handler contention_lock_ops  = {
1278         .contention_begin_event = report_lock_contention_begin_event,
1279         .contention_end_event   = report_lock_contention_end_event,
1280 };
1281
1282
1283 static struct trace_lock_handler *trace_handler;
1284
1285 static int evsel__process_lock_acquire(struct evsel *evsel, struct perf_sample *sample)
1286 {
1287         if (trace_handler->acquire_event)
1288                 return trace_handler->acquire_event(evsel, sample);
1289         return 0;
1290 }
1291
1292 static int evsel__process_lock_acquired(struct evsel *evsel, struct perf_sample *sample)
1293 {
1294         if (trace_handler->acquired_event)
1295                 return trace_handler->acquired_event(evsel, sample);
1296         return 0;
1297 }
1298
1299 static int evsel__process_lock_contended(struct evsel *evsel, struct perf_sample *sample)
1300 {
1301         if (trace_handler->contended_event)
1302                 return trace_handler->contended_event(evsel, sample);
1303         return 0;
1304 }
1305
1306 static int evsel__process_lock_release(struct evsel *evsel, struct perf_sample *sample)
1307 {
1308         if (trace_handler->release_event)
1309                 return trace_handler->release_event(evsel, sample);
1310         return 0;
1311 }
1312
1313 static int evsel__process_contention_begin(struct evsel *evsel, struct perf_sample *sample)
1314 {
1315         if (trace_handler->contention_begin_event)
1316                 return trace_handler->contention_begin_event(evsel, sample);
1317         return 0;
1318 }
1319
1320 static int evsel__process_contention_end(struct evsel *evsel, struct perf_sample *sample)
1321 {
1322         if (trace_handler->contention_end_event)
1323                 return trace_handler->contention_end_event(evsel, sample);
1324         return 0;
1325 }
1326
1327 static void print_bad_events(int bad, int total)
1328 {
1329         /* Output for debug, this have to be removed */
1330         int i;
1331         int broken = 0;
1332         const char *name[4] =
1333                 { "acquire", "acquired", "contended", "release" };
1334
1335         for (i = 0; i < BROKEN_MAX; i++)
1336                 broken += bad_hist[i];
1337
1338         if (quiet || total == 0 || (broken == 0 && verbose <= 0))
1339                 return;
1340
1341         fprintf(lock_output, "\n=== output for debug ===\n\n");
1342         fprintf(lock_output, "bad: %d, total: %d\n", bad, total);
1343         fprintf(lock_output, "bad rate: %.2f %%\n", (double)bad / (double)total * 100);
1344         fprintf(lock_output, "histogram of events caused bad sequence\n");
1345         for (i = 0; i < BROKEN_MAX; i++)
1346                 fprintf(lock_output, " %10s: %d\n", name[i], bad_hist[i]);
1347 }
1348
1349 /* TODO: various way to print, coloring, nano or milli sec */
1350 static void print_result(void)
1351 {
1352         struct lock_stat *st;
1353         struct lock_key *key;
1354         char cut_name[20];
1355         int bad, total, printed;
1356
1357         if (!quiet) {
1358                 fprintf(lock_output, "%20s ", "Name");
1359                 list_for_each_entry(key, &lock_keys, list)
1360                         fprintf(lock_output, "%*s ", key->len, key->header);
1361                 fprintf(lock_output, "\n\n");
1362         }
1363
1364         bad = total = printed = 0;
1365         while ((st = pop_from_result())) {
1366                 total++;
1367                 if (st->broken)
1368                         bad++;
1369                 if (!st->nr_acquired)
1370                         continue;
1371
1372                 bzero(cut_name, 20);
1373
1374                 if (strlen(st->name) < 20) {
1375                         /* output raw name */
1376                         const char *name = st->name;
1377
1378                         if (show_thread_stats) {
1379                                 struct thread *t;
1380
1381                                 /* st->addr contains tid of thread */
1382                                 t = perf_session__findnew(session, st->addr);
1383                                 name = thread__comm_str(t);
1384                         }
1385
1386                         fprintf(lock_output, "%20s ", name);
1387                 } else {
1388                         strncpy(cut_name, st->name, 16);
1389                         cut_name[16] = '.';
1390                         cut_name[17] = '.';
1391                         cut_name[18] = '.';
1392                         cut_name[19] = '\0';
1393                         /* cut off name for saving output style */
1394                         fprintf(lock_output, "%20s ", cut_name);
1395                 }
1396
1397                 list_for_each_entry(key, &lock_keys, list) {
1398                         key->print(key, st);
1399                         fprintf(lock_output, " ");
1400                 }
1401                 fprintf(lock_output, "\n");
1402
1403                 if (++printed >= print_nr_entries)
1404                         break;
1405         }
1406
1407         print_bad_events(bad, total);
1408 }
1409
1410 static bool info_threads, info_map;
1411
1412 static void dump_threads(void)
1413 {
1414         struct thread_stat *st;
1415         struct rb_node *node;
1416         struct thread *t;
1417
1418         fprintf(lock_output, "%10s: comm\n", "Thread ID");
1419
1420         node = rb_first(&thread_stats);
1421         while (node) {
1422                 st = container_of(node, struct thread_stat, rb);
1423                 t = perf_session__findnew(session, st->tid);
1424                 fprintf(lock_output, "%10d: %s\n", st->tid, thread__comm_str(t));
1425                 node = rb_next(node);
1426                 thread__put(t);
1427         }
1428 }
1429
1430 static int compare_maps(struct lock_stat *a, struct lock_stat *b)
1431 {
1432         int ret;
1433
1434         if (a->name && b->name)
1435                 ret = strcmp(a->name, b->name);
1436         else
1437                 ret = !!a->name - !!b->name;
1438
1439         if (!ret)
1440                 return a->addr < b->addr;
1441         else
1442                 return ret < 0;
1443 }
1444
1445 static void dump_map(void)
1446 {
1447         unsigned int i;
1448         struct lock_stat *st;
1449
1450         fprintf(lock_output, "Address of instance: name of class\n");
1451         for (i = 0; i < LOCKHASH_SIZE; i++) {
1452                 hlist_for_each_entry(st, &lockhash_table[i], hash_entry) {
1453                         insert_to_result(st, compare_maps);
1454                 }
1455         }
1456
1457         while ((st = pop_from_result()))
1458                 fprintf(lock_output, " %#llx: %s\n", (unsigned long long)st->addr, st->name);
1459 }
1460
1461 static int dump_info(void)
1462 {
1463         int rc = 0;
1464
1465         if (info_threads)
1466                 dump_threads();
1467         else if (info_map)
1468                 dump_map();
1469         else {
1470                 rc = -1;
1471                 pr_err("Unknown type of information\n");
1472         }
1473
1474         return rc;
1475 }
1476
1477 static const struct evsel_str_handler lock_tracepoints[] = {
1478         { "lock:lock_acquire",   evsel__process_lock_acquire,   }, /* CONFIG_LOCKDEP */
1479         { "lock:lock_acquired",  evsel__process_lock_acquired,  }, /* CONFIG_LOCKDEP, CONFIG_LOCK_STAT */
1480         { "lock:lock_contended", evsel__process_lock_contended, }, /* CONFIG_LOCKDEP, CONFIG_LOCK_STAT */
1481         { "lock:lock_release",   evsel__process_lock_release,   }, /* CONFIG_LOCKDEP */
1482 };
1483
1484 static const struct evsel_str_handler contention_tracepoints[] = {
1485         { "lock:contention_begin", evsel__process_contention_begin, },
1486         { "lock:contention_end",   evsel__process_contention_end,   },
1487 };
1488
1489 static int process_event_update(struct perf_tool *tool,
1490                                 union perf_event *event,
1491                                 struct evlist **pevlist)
1492 {
1493         int ret;
1494
1495         ret = perf_event__process_event_update(tool, event, pevlist);
1496         if (ret < 0)
1497                 return ret;
1498
1499         /* this can return -EEXIST since we call it for each evsel */
1500         perf_session__set_tracepoints_handlers(session, lock_tracepoints);
1501         perf_session__set_tracepoints_handlers(session, contention_tracepoints);
1502         return 0;
1503 }
1504
1505 typedef int (*tracepoint_handler)(struct evsel *evsel,
1506                                   struct perf_sample *sample);
1507
1508 static int process_sample_event(struct perf_tool *tool __maybe_unused,
1509                                 union perf_event *event,
1510                                 struct perf_sample *sample,
1511                                 struct evsel *evsel,
1512                                 struct machine *machine)
1513 {
1514         int err = 0;
1515         struct thread *thread = machine__findnew_thread(machine, sample->pid,
1516                                                         sample->tid);
1517
1518         if (thread == NULL) {
1519                 pr_debug("problem processing %d event, skipping it.\n",
1520                         event->header.type);
1521                 return -1;
1522         }
1523
1524         if (evsel->handler != NULL) {
1525                 tracepoint_handler f = evsel->handler;
1526                 err = f(evsel, sample);
1527         }
1528
1529         thread__put(thread);
1530
1531         return err;
1532 }
1533
1534 static void combine_result(void)
1535 {
1536         unsigned int i;
1537         struct lock_stat *st;
1538
1539         if (!combine_locks)
1540                 return;
1541
1542         for (i = 0; i < LOCKHASH_SIZE; i++) {
1543                 hlist_for_each_entry(st, &lockhash_table[i], hash_entry) {
1544                         combine_lock_stats(st);
1545                 }
1546         }
1547 }
1548
1549 static void sort_result(void)
1550 {
1551         unsigned int i;
1552         struct lock_stat *st;
1553
1554         for (i = 0; i < LOCKHASH_SIZE; i++) {
1555                 hlist_for_each_entry(st, &lockhash_table[i], hash_entry) {
1556                         insert_to_result(st, compare);
1557                 }
1558         }
1559 }
1560
1561 static const struct {
1562         unsigned int flags;
1563         const char *str;
1564         const char *name;
1565 } lock_type_table[] = {
1566         { 0,                            "semaphore",    "semaphore" },
1567         { LCB_F_SPIN,                   "spinlock",     "spinlock" },
1568         { LCB_F_SPIN | LCB_F_READ,      "rwlock:R",     "rwlock" },
1569         { LCB_F_SPIN | LCB_F_WRITE,     "rwlock:W",     "rwlock" },
1570         { LCB_F_READ,                   "rwsem:R",      "rwsem" },
1571         { LCB_F_WRITE,                  "rwsem:W",      "rwsem" },
1572         { LCB_F_RT,                     "rt-mutex",     "rt-mutex" },
1573         { LCB_F_RT | LCB_F_READ,        "rwlock-rt:R",  "rwlock-rt" },
1574         { LCB_F_RT | LCB_F_WRITE,       "rwlock-rt:W",  "rwlock-rt" },
1575         { LCB_F_PERCPU | LCB_F_READ,    "pcpu-sem:R",   "percpu-rwsem" },
1576         { LCB_F_PERCPU | LCB_F_WRITE,   "pcpu-sem:W",   "percpu-rwsem" },
1577         { LCB_F_MUTEX,                  "mutex",        "mutex" },
1578         { LCB_F_MUTEX | LCB_F_SPIN,     "mutex",        "mutex" },
1579         /* alias for get_type_flag() */
1580         { LCB_F_MUTEX | LCB_F_SPIN,     "mutex-spin",   "mutex" },
1581 };
1582
1583 static const char *get_type_str(unsigned int flags)
1584 {
1585         flags &= LCB_F_MAX_FLAGS - 1;
1586
1587         for (unsigned int i = 0; i < ARRAY_SIZE(lock_type_table); i++) {
1588                 if (lock_type_table[i].flags == flags)
1589                         return lock_type_table[i].str;
1590         }
1591         return "unknown";
1592 }
1593
1594 static const char *get_type_name(unsigned int flags)
1595 {
1596         flags &= LCB_F_MAX_FLAGS - 1;
1597
1598         for (unsigned int i = 0; i < ARRAY_SIZE(lock_type_table); i++) {
1599                 if (lock_type_table[i].flags == flags)
1600                         return lock_type_table[i].name;
1601         }
1602         return "unknown";
1603 }
1604
1605 static unsigned int get_type_flag(const char *str)
1606 {
1607         for (unsigned int i = 0; i < ARRAY_SIZE(lock_type_table); i++) {
1608                 if (!strcmp(lock_type_table[i].name, str))
1609                         return lock_type_table[i].flags;
1610         }
1611         for (unsigned int i = 0; i < ARRAY_SIZE(lock_type_table); i++) {
1612                 if (!strcmp(lock_type_table[i].str, str))
1613                         return lock_type_table[i].flags;
1614         }
1615         return UINT_MAX;
1616 }
1617
1618 static void lock_filter_finish(void)
1619 {
1620         zfree(&filters.types);
1621         filters.nr_types = 0;
1622
1623         zfree(&filters.addrs);
1624         filters.nr_addrs = 0;
1625
1626         for (int i = 0; i < filters.nr_syms; i++)
1627                 free(filters.syms[i]);
1628
1629         zfree(&filters.syms);
1630         filters.nr_syms = 0;
1631 }
1632
1633 static void sort_contention_result(void)
1634 {
1635         sort_result();
1636 }
1637
1638 static void print_header_stdio(void)
1639 {
1640         struct lock_key *key;
1641
1642         list_for_each_entry(key, &lock_keys, list)
1643                 fprintf(lock_output, "%*s ", key->len, key->header);
1644
1645         switch (aggr_mode) {
1646         case LOCK_AGGR_TASK:
1647                 fprintf(lock_output, "  %10s   %s\n\n", "pid",
1648                         show_lock_owner ? "owner" : "comm");
1649                 break;
1650         case LOCK_AGGR_CALLER:
1651                 fprintf(lock_output, "  %10s   %s\n\n", "type", "caller");
1652                 break;
1653         case LOCK_AGGR_ADDR:
1654                 fprintf(lock_output, "  %16s   %s\n\n", "address", "symbol");
1655                 break;
1656         default:
1657                 break;
1658         }
1659 }
1660
1661 static void print_header_csv(const char *sep)
1662 {
1663         struct lock_key *key;
1664
1665         fprintf(lock_output, "# output: ");
1666         list_for_each_entry(key, &lock_keys, list)
1667                 fprintf(lock_output, "%s%s ", key->header, sep);
1668
1669         switch (aggr_mode) {
1670         case LOCK_AGGR_TASK:
1671                 fprintf(lock_output, "%s%s %s\n", "pid", sep,
1672                         show_lock_owner ? "owner" : "comm");
1673                 break;
1674         case LOCK_AGGR_CALLER:
1675                 fprintf(lock_output, "%s%s %s", "type", sep, "caller");
1676                 if (verbose > 0)
1677                         fprintf(lock_output, "%s %s", sep, "stacktrace");
1678                 fprintf(lock_output, "\n");
1679                 break;
1680         case LOCK_AGGR_ADDR:
1681                 fprintf(lock_output, "%s%s %s%s %s\n", "address", sep, "symbol", sep, "type");
1682                 break;
1683         default:
1684                 break;
1685         }
1686 }
1687
1688 static void print_header(void)
1689 {
1690         if (!quiet) {
1691                 if (symbol_conf.field_sep)
1692                         print_header_csv(symbol_conf.field_sep);
1693                 else
1694                         print_header_stdio();
1695         }
1696 }
1697
1698 static void print_lock_stat_stdio(struct lock_contention *con, struct lock_stat *st)
1699 {
1700         struct lock_key *key;
1701         struct thread *t;
1702         int pid;
1703
1704         list_for_each_entry(key, &lock_keys, list) {
1705                 key->print(key, st);
1706                 fprintf(lock_output, " ");
1707         }
1708
1709         switch (aggr_mode) {
1710         case LOCK_AGGR_CALLER:
1711                 fprintf(lock_output, "  %10s   %s\n", get_type_str(st->flags), st->name);
1712                 break;
1713         case LOCK_AGGR_TASK:
1714                 pid = st->addr;
1715                 t = perf_session__findnew(session, pid);
1716                 fprintf(lock_output, "  %10d   %s\n",
1717                         pid, pid == -1 ? "Unknown" : thread__comm_str(t));
1718                 break;
1719         case LOCK_AGGR_ADDR:
1720                 fprintf(lock_output, "  %016llx   %s (%s)\n", (unsigned long long)st->addr,
1721                         st->name, get_type_name(st->flags));
1722                 break;
1723         default:
1724                 break;
1725         }
1726
1727         if (aggr_mode == LOCK_AGGR_CALLER && verbose > 0) {
1728                 struct map *kmap;
1729                 struct symbol *sym;
1730                 char buf[128];
1731                 u64 ip;
1732
1733                 for (int i = 0; i < max_stack_depth; i++) {
1734                         if (!st->callstack || !st->callstack[i])
1735                                 break;
1736
1737                         ip = st->callstack[i];
1738                         sym = machine__find_kernel_symbol(con->machine, ip, &kmap);
1739                         get_symbol_name_offset(kmap, sym, ip, buf, sizeof(buf));
1740                         fprintf(lock_output, "\t\t\t%#lx  %s\n", (unsigned long)ip, buf);
1741                 }
1742         }
1743 }
1744
1745 static void print_lock_stat_csv(struct lock_contention *con, struct lock_stat *st,
1746                                 const char *sep)
1747 {
1748         struct lock_key *key;
1749         struct thread *t;
1750         int pid;
1751
1752         list_for_each_entry(key, &lock_keys, list) {
1753                 key->print(key, st);
1754                 fprintf(lock_output, "%s ", sep);
1755         }
1756
1757         switch (aggr_mode) {
1758         case LOCK_AGGR_CALLER:
1759                 fprintf(lock_output, "%s%s %s", get_type_str(st->flags), sep, st->name);
1760                 if (verbose <= 0)
1761                         fprintf(lock_output, "\n");
1762                 break;
1763         case LOCK_AGGR_TASK:
1764                 pid = st->addr;
1765                 t = perf_session__findnew(session, pid);
1766                 fprintf(lock_output, "%d%s %s\n", pid, sep,
1767                         pid == -1 ? "Unknown" : thread__comm_str(t));
1768                 break;
1769         case LOCK_AGGR_ADDR:
1770                 fprintf(lock_output, "%llx%s %s%s %s\n", (unsigned long long)st->addr, sep,
1771                         st->name, sep, get_type_name(st->flags));
1772                 break;
1773         default:
1774                 break;
1775         }
1776
1777         if (aggr_mode == LOCK_AGGR_CALLER && verbose > 0) {
1778                 struct map *kmap;
1779                 struct symbol *sym;
1780                 char buf[128];
1781                 u64 ip;
1782
1783                 for (int i = 0; i < max_stack_depth; i++) {
1784                         if (!st->callstack || !st->callstack[i])
1785                                 break;
1786
1787                         ip = st->callstack[i];
1788                         sym = machine__find_kernel_symbol(con->machine, ip, &kmap);
1789                         get_symbol_name_offset(kmap, sym, ip, buf, sizeof(buf));
1790                         fprintf(lock_output, "%s %#lx %s", i ? ":" : sep, (unsigned long) ip, buf);
1791                 }
1792                 fprintf(lock_output, "\n");
1793         }
1794 }
1795
1796 static void print_lock_stat(struct lock_contention *con, struct lock_stat *st)
1797 {
1798         if (symbol_conf.field_sep)
1799                 print_lock_stat_csv(con, st, symbol_conf.field_sep);
1800         else
1801                 print_lock_stat_stdio(con, st);
1802 }
1803
1804 static void print_footer_stdio(int total, int bad, struct lock_contention_fails *fails)
1805 {
1806         /* Output for debug, this have to be removed */
1807         int broken = fails->task + fails->stack + fails->time + fails->data;
1808
1809         if (!use_bpf)
1810                 print_bad_events(bad, total);
1811
1812         if (quiet || total == 0 || (broken == 0 && verbose <= 0))
1813                 return;
1814
1815         total += broken;
1816         fprintf(lock_output, "\n=== output for debug ===\n\n");
1817         fprintf(lock_output, "bad: %d, total: %d\n", broken, total);
1818         fprintf(lock_output, "bad rate: %.2f %%\n", 100.0 * broken / total);
1819
1820         fprintf(lock_output, "histogram of failure reasons\n");
1821         fprintf(lock_output, " %10s: %d\n", "task", fails->task);
1822         fprintf(lock_output, " %10s: %d\n", "stack", fails->stack);
1823         fprintf(lock_output, " %10s: %d\n", "time", fails->time);
1824         fprintf(lock_output, " %10s: %d\n", "data", fails->data);
1825 }
1826
1827 static void print_footer_csv(int total, int bad, struct lock_contention_fails *fails,
1828                              const char *sep)
1829 {
1830         /* Output for debug, this have to be removed */
1831         if (use_bpf)
1832                 bad = fails->task + fails->stack + fails->time + fails->data;
1833
1834         if (quiet || total == 0 || (bad == 0 && verbose <= 0))
1835                 return;
1836
1837         total += bad;
1838         fprintf(lock_output, "# debug: total=%d%s bad=%d", total, sep, bad);
1839
1840         if (use_bpf) {
1841                 fprintf(lock_output, "%s bad_%s=%d", sep, "task", fails->task);
1842                 fprintf(lock_output, "%s bad_%s=%d", sep, "stack", fails->stack);
1843                 fprintf(lock_output, "%s bad_%s=%d", sep, "time", fails->time);
1844                 fprintf(lock_output, "%s bad_%s=%d", sep, "data", fails->data);
1845         } else {
1846                 int i;
1847                 const char *name[4] = { "acquire", "acquired", "contended", "release" };
1848
1849                 for (i = 0; i < BROKEN_MAX; i++)
1850                         fprintf(lock_output, "%s bad_%s=%d", sep, name[i], bad_hist[i]);
1851         }
1852         fprintf(lock_output, "\n");
1853 }
1854
1855 static void print_footer(int total, int bad, struct lock_contention_fails *fails)
1856 {
1857         if (symbol_conf.field_sep)
1858                 print_footer_csv(total, bad, fails, symbol_conf.field_sep);
1859         else
1860                 print_footer_stdio(total, bad, fails);
1861 }
1862
1863 static void print_contention_result(struct lock_contention *con)
1864 {
1865         struct lock_stat *st;
1866         int bad, total, printed;
1867
1868         if (!quiet)
1869                 print_header();
1870
1871         bad = total = printed = 0;
1872
1873         while ((st = pop_from_result())) {
1874                 total += use_bpf ? st->nr_contended : 1;
1875                 if (st->broken)
1876                         bad++;
1877
1878                 if (!st->wait_time_total)
1879                         continue;
1880
1881                 print_lock_stat(con, st);
1882
1883                 if (++printed >= print_nr_entries)
1884                         break;
1885         }
1886
1887         if (print_nr_entries) {
1888                 /* update the total/bad stats */
1889                 while ((st = pop_from_result())) {
1890                         total += use_bpf ? st->nr_contended : 1;
1891                         if (st->broken)
1892                                 bad++;
1893                 }
1894         }
1895         /* some entries are collected but hidden by the callstack filter */
1896         total += con->nr_filtered;
1897
1898         print_footer(total, bad, &con->fails);
1899 }
1900
1901 static bool force;
1902
1903 static int __cmd_report(bool display_info)
1904 {
1905         int err = -EINVAL;
1906         struct perf_tool eops = {
1907                 .attr            = perf_event__process_attr,
1908                 .event_update    = process_event_update,
1909                 .sample          = process_sample_event,
1910                 .comm            = perf_event__process_comm,
1911                 .mmap            = perf_event__process_mmap,
1912                 .namespaces      = perf_event__process_namespaces,
1913                 .tracing_data    = perf_event__process_tracing_data,
1914                 .ordered_events  = true,
1915         };
1916         struct perf_data data = {
1917                 .path  = input_name,
1918                 .mode  = PERF_DATA_MODE_READ,
1919                 .force = force,
1920         };
1921
1922         session = perf_session__new(&data, &eops);
1923         if (IS_ERR(session)) {
1924                 pr_err("Initializing perf session failed\n");
1925                 return PTR_ERR(session);
1926         }
1927
1928         symbol_conf.allow_aliases = true;
1929         symbol__init(&session->header.env);
1930
1931         if (!data.is_pipe) {
1932                 if (!perf_session__has_traces(session, "lock record"))
1933                         goto out_delete;
1934
1935                 if (perf_session__set_tracepoints_handlers(session, lock_tracepoints)) {
1936                         pr_err("Initializing perf session tracepoint handlers failed\n");
1937                         goto out_delete;
1938                 }
1939
1940                 if (perf_session__set_tracepoints_handlers(session, contention_tracepoints)) {
1941                         pr_err("Initializing perf session tracepoint handlers failed\n");
1942                         goto out_delete;
1943                 }
1944         }
1945
1946         if (setup_output_field(false, output_fields))
1947                 goto out_delete;
1948
1949         if (select_key(false))
1950                 goto out_delete;
1951
1952         if (show_thread_stats)
1953                 aggr_mode = LOCK_AGGR_TASK;
1954
1955         err = perf_session__process_events(session);
1956         if (err)
1957                 goto out_delete;
1958
1959         setup_pager();
1960         if (display_info) /* used for info subcommand */
1961                 err = dump_info();
1962         else {
1963                 combine_result();
1964                 sort_result();
1965                 print_result();
1966         }
1967
1968 out_delete:
1969         perf_session__delete(session);
1970         return err;
1971 }
1972
1973 static void sighandler(int sig __maybe_unused)
1974 {
1975 }
1976
1977 static int check_lock_contention_options(const struct option *options,
1978                                          const char * const *usage)
1979
1980 {
1981         if (show_thread_stats && show_lock_addrs) {
1982                 pr_err("Cannot use thread and addr mode together\n");
1983                 parse_options_usage(usage, options, "threads", 0);
1984                 parse_options_usage(NULL, options, "lock-addr", 0);
1985                 return -1;
1986         }
1987
1988         if (show_lock_owner && !use_bpf) {
1989                 pr_err("Lock owners are available only with BPF\n");
1990                 parse_options_usage(usage, options, "lock-owner", 0);
1991                 parse_options_usage(NULL, options, "use-bpf", 0);
1992                 return -1;
1993         }
1994
1995         if (show_lock_owner && show_lock_addrs) {
1996                 pr_err("Cannot use owner and addr mode together\n");
1997                 parse_options_usage(usage, options, "lock-owner", 0);
1998                 parse_options_usage(NULL, options, "lock-addr", 0);
1999                 return -1;
2000         }
2001
2002         if (symbol_conf.field_sep) {
2003                 if (strstr(symbol_conf.field_sep, ":") || /* part of type flags */
2004                     strstr(symbol_conf.field_sep, "+") || /* part of caller offset */
2005                     strstr(symbol_conf.field_sep, ".")) { /* can be in a symbol name */
2006                         pr_err("Cannot use the separator that is already used\n");
2007                         parse_options_usage(usage, options, "x", 1);
2008                         return -1;
2009                 }
2010         }
2011
2012         if (show_lock_owner)
2013                 show_thread_stats = true;
2014
2015         return 0;
2016 }
2017
2018 static int __cmd_contention(int argc, const char **argv)
2019 {
2020         int err = -EINVAL;
2021         struct perf_tool eops = {
2022                 .attr            = perf_event__process_attr,
2023                 .event_update    = process_event_update,
2024                 .sample          = process_sample_event,
2025                 .comm            = perf_event__process_comm,
2026                 .mmap            = perf_event__process_mmap,
2027                 .tracing_data    = perf_event__process_tracing_data,
2028                 .ordered_events  = true,
2029         };
2030         struct perf_data data = {
2031                 .path  = input_name,
2032                 .mode  = PERF_DATA_MODE_READ,
2033                 .force = force,
2034         };
2035         struct lock_contention con = {
2036                 .target = &target,
2037                 .map_nr_entries = bpf_map_entries,
2038                 .max_stack = max_stack_depth,
2039                 .stack_skip = stack_skip,
2040                 .filters = &filters,
2041                 .save_callstack = needs_callstack(),
2042                 .owner = show_lock_owner,
2043         };
2044
2045         lockhash_table = calloc(LOCKHASH_SIZE, sizeof(*lockhash_table));
2046         if (!lockhash_table)
2047                 return -ENOMEM;
2048
2049         con.result = &lockhash_table[0];
2050
2051         session = perf_session__new(use_bpf ? NULL : &data, &eops);
2052         if (IS_ERR(session)) {
2053                 pr_err("Initializing perf session failed\n");
2054                 err = PTR_ERR(session);
2055                 session = NULL;
2056                 goto out_delete;
2057         }
2058
2059         con.machine = &session->machines.host;
2060
2061         con.aggr_mode = aggr_mode = show_thread_stats ? LOCK_AGGR_TASK :
2062                 show_lock_addrs ? LOCK_AGGR_ADDR : LOCK_AGGR_CALLER;
2063
2064         if (con.aggr_mode == LOCK_AGGR_CALLER)
2065                 con.save_callstack = true;
2066
2067         symbol_conf.allow_aliases = true;
2068         symbol__init(&session->header.env);
2069
2070         if (use_bpf) {
2071                 err = target__validate(&target);
2072                 if (err) {
2073                         char errbuf[512];
2074
2075                         target__strerror(&target, err, errbuf, 512);
2076                         pr_err("%s\n", errbuf);
2077                         goto out_delete;
2078                 }
2079
2080                 signal(SIGINT, sighandler);
2081                 signal(SIGCHLD, sighandler);
2082                 signal(SIGTERM, sighandler);
2083
2084                 con.evlist = evlist__new();
2085                 if (con.evlist == NULL) {
2086                         err = -ENOMEM;
2087                         goto out_delete;
2088                 }
2089
2090                 err = evlist__create_maps(con.evlist, &target);
2091                 if (err < 0)
2092                         goto out_delete;
2093
2094                 if (argc) {
2095                         err = evlist__prepare_workload(con.evlist, &target,
2096                                                        argv, false, NULL);
2097                         if (err < 0)
2098                                 goto out_delete;
2099                 }
2100
2101                 if (lock_contention_prepare(&con) < 0) {
2102                         pr_err("lock contention BPF setup failed\n");
2103                         goto out_delete;
2104                 }
2105         } else if (!data.is_pipe) {
2106                 if (!perf_session__has_traces(session, "lock record"))
2107                         goto out_delete;
2108
2109                 if (!evlist__find_evsel_by_str(session->evlist,
2110                                                "lock:contention_begin")) {
2111                         pr_err("lock contention evsel not found\n");
2112                         goto out_delete;
2113                 }
2114
2115                 if (perf_session__set_tracepoints_handlers(session,
2116                                                 contention_tracepoints)) {
2117                         pr_err("Initializing perf session tracepoint handlers failed\n");
2118                         goto out_delete;
2119                 }
2120         }
2121
2122         if (setup_output_field(true, output_fields))
2123                 goto out_delete;
2124
2125         if (select_key(true))
2126                 goto out_delete;
2127
2128         if (symbol_conf.field_sep) {
2129                 int i;
2130                 struct lock_key *keys = contention_keys;
2131
2132                 /* do not align output in CSV format */
2133                 for (i = 0; keys[i].name; i++)
2134                         keys[i].len = 0;
2135         }
2136
2137         if (use_bpf) {
2138                 lock_contention_start();
2139                 if (argc)
2140                         evlist__start_workload(con.evlist);
2141
2142                 /* wait for signal */
2143                 pause();
2144
2145                 lock_contention_stop();
2146                 lock_contention_read(&con);
2147         } else {
2148                 err = perf_session__process_events(session);
2149                 if (err)
2150                         goto out_delete;
2151         }
2152
2153         setup_pager();
2154
2155         sort_contention_result();
2156         print_contention_result(&con);
2157
2158 out_delete:
2159         lock_filter_finish();
2160         evlist__delete(con.evlist);
2161         lock_contention_finish();
2162         perf_session__delete(session);
2163         zfree(&lockhash_table);
2164         return err;
2165 }
2166
2167
2168 static int __cmd_record(int argc, const char **argv)
2169 {
2170         const char *record_args[] = {
2171                 "record", "-R", "-m", "1024", "-c", "1", "--synth", "task",
2172         };
2173         const char *callgraph_args[] = {
2174                 "--call-graph", "fp," __stringify(CONTENTION_STACK_DEPTH),
2175         };
2176         unsigned int rec_argc, i, j, ret;
2177         unsigned int nr_tracepoints;
2178         unsigned int nr_callgraph_args = 0;
2179         const char **rec_argv;
2180         bool has_lock_stat = true;
2181
2182         for (i = 0; i < ARRAY_SIZE(lock_tracepoints); i++) {
2183                 if (!is_valid_tracepoint(lock_tracepoints[i].name)) {
2184                         pr_debug("tracepoint %s is not enabled. "
2185                                  "Are CONFIG_LOCKDEP and CONFIG_LOCK_STAT enabled?\n",
2186                                  lock_tracepoints[i].name);
2187                         has_lock_stat = false;
2188                         break;
2189                 }
2190         }
2191
2192         if (has_lock_stat)
2193                 goto setup_args;
2194
2195         for (i = 0; i < ARRAY_SIZE(contention_tracepoints); i++) {
2196                 if (!is_valid_tracepoint(contention_tracepoints[i].name)) {
2197                         pr_err("tracepoint %s is not enabled.\n",
2198                                contention_tracepoints[i].name);
2199                         return 1;
2200                 }
2201         }
2202
2203         nr_callgraph_args = ARRAY_SIZE(callgraph_args);
2204
2205 setup_args:
2206         rec_argc = ARRAY_SIZE(record_args) + nr_callgraph_args + argc - 1;
2207
2208         if (has_lock_stat)
2209                 nr_tracepoints = ARRAY_SIZE(lock_tracepoints);
2210         else
2211                 nr_tracepoints = ARRAY_SIZE(contention_tracepoints);
2212
2213         /* factor of 2 is for -e in front of each tracepoint */
2214         rec_argc += 2 * nr_tracepoints;
2215
2216         rec_argv = calloc(rec_argc + 1, sizeof(char *));
2217         if (!rec_argv)
2218                 return -ENOMEM;
2219
2220         for (i = 0; i < ARRAY_SIZE(record_args); i++)
2221                 rec_argv[i] = strdup(record_args[i]);
2222
2223         for (j = 0; j < nr_tracepoints; j++) {
2224                 const char *ev_name;
2225
2226                 if (has_lock_stat)
2227                         ev_name = strdup(lock_tracepoints[j].name);
2228                 else
2229                         ev_name = strdup(contention_tracepoints[j].name);
2230
2231                 if (!ev_name)
2232                         return -ENOMEM;
2233
2234                 rec_argv[i++] = "-e";
2235                 rec_argv[i++] = ev_name;
2236         }
2237
2238         for (j = 0; j < nr_callgraph_args; j++, i++)
2239                 rec_argv[i] = callgraph_args[j];
2240
2241         for (j = 1; j < (unsigned int)argc; j++, i++)
2242                 rec_argv[i] = argv[j];
2243
2244         BUG_ON(i != rec_argc);
2245
2246         ret = cmd_record(i, rec_argv);
2247         free(rec_argv);
2248         return ret;
2249 }
2250
2251 static int parse_map_entry(const struct option *opt, const char *str,
2252                             int unset __maybe_unused)
2253 {
2254         unsigned long *len = (unsigned long *)opt->value;
2255         unsigned long val;
2256         char *endptr;
2257
2258         errno = 0;
2259         val = strtoul(str, &endptr, 0);
2260         if (*endptr != '\0' || errno != 0) {
2261                 pr_err("invalid BPF map length: %s\n", str);
2262                 return -1;
2263         }
2264
2265         *len = val;
2266         return 0;
2267 }
2268
2269 static int parse_max_stack(const struct option *opt, const char *str,
2270                            int unset __maybe_unused)
2271 {
2272         unsigned long *len = (unsigned long *)opt->value;
2273         long val;
2274         char *endptr;
2275
2276         errno = 0;
2277         val = strtol(str, &endptr, 0);
2278         if (*endptr != '\0' || errno != 0) {
2279                 pr_err("invalid max stack depth: %s\n", str);
2280                 return -1;
2281         }
2282
2283         if (val < 0 || val > sysctl__max_stack()) {
2284                 pr_err("invalid max stack depth: %ld\n", val);
2285                 return -1;
2286         }
2287
2288         *len = val;
2289         return 0;
2290 }
2291
2292 static bool add_lock_type(unsigned int flags)
2293 {
2294         unsigned int *tmp;
2295
2296         tmp = realloc(filters.types, (filters.nr_types + 1) * sizeof(*filters.types));
2297         if (tmp == NULL)
2298                 return false;
2299
2300         tmp[filters.nr_types++] = flags;
2301         filters.types = tmp;
2302         return true;
2303 }
2304
2305 static int parse_lock_type(const struct option *opt __maybe_unused, const char *str,
2306                            int unset __maybe_unused)
2307 {
2308         char *s, *tmp, *tok;
2309         int ret = 0;
2310
2311         s = strdup(str);
2312         if (s == NULL)
2313                 return -1;
2314
2315         for (tok = strtok_r(s, ", ", &tmp); tok; tok = strtok_r(NULL, ", ", &tmp)) {
2316                 unsigned int flags = get_type_flag(tok);
2317
2318                 if (flags == -1U) {
2319                         pr_err("Unknown lock flags: %s\n", tok);
2320                         ret = -1;
2321                         break;
2322                 }
2323
2324                 if (!add_lock_type(flags)) {
2325                         ret = -1;
2326                         break;
2327                 }
2328         }
2329
2330         free(s);
2331         return ret;
2332 }
2333
2334 static bool add_lock_addr(unsigned long addr)
2335 {
2336         unsigned long *tmp;
2337
2338         tmp = realloc(filters.addrs, (filters.nr_addrs + 1) * sizeof(*filters.addrs));
2339         if (tmp == NULL) {
2340                 pr_err("Memory allocation failure\n");
2341                 return false;
2342         }
2343
2344         tmp[filters.nr_addrs++] = addr;
2345         filters.addrs = tmp;
2346         return true;
2347 }
2348
2349 static bool add_lock_sym(char *name)
2350 {
2351         char **tmp;
2352         char *sym = strdup(name);
2353
2354         if (sym == NULL) {
2355                 pr_err("Memory allocation failure\n");
2356                 return false;
2357         }
2358
2359         tmp = realloc(filters.syms, (filters.nr_syms + 1) * sizeof(*filters.syms));
2360         if (tmp == NULL) {
2361                 pr_err("Memory allocation failure\n");
2362                 free(sym);
2363                 return false;
2364         }
2365
2366         tmp[filters.nr_syms++] = sym;
2367         filters.syms = tmp;
2368         return true;
2369 }
2370
2371 static int parse_lock_addr(const struct option *opt __maybe_unused, const char *str,
2372                            int unset __maybe_unused)
2373 {
2374         char *s, *tmp, *tok;
2375         int ret = 0;
2376         u64 addr;
2377
2378         s = strdup(str);
2379         if (s == NULL)
2380                 return -1;
2381
2382         for (tok = strtok_r(s, ", ", &tmp); tok; tok = strtok_r(NULL, ", ", &tmp)) {
2383                 char *end;
2384
2385                 addr = strtoul(tok, &end, 16);
2386                 if (*end == '\0') {
2387                         if (!add_lock_addr(addr)) {
2388                                 ret = -1;
2389                                 break;
2390                         }
2391                         continue;
2392                 }
2393
2394                 /*
2395                  * At this moment, we don't have kernel symbols.  Save the symbols
2396                  * in a separate list and resolve them to addresses later.
2397                  */
2398                 if (!add_lock_sym(tok)) {
2399                         ret = -1;
2400                         break;
2401                 }
2402         }
2403
2404         free(s);
2405         return ret;
2406 }
2407
2408 static int parse_call_stack(const struct option *opt __maybe_unused, const char *str,
2409                            int unset __maybe_unused)
2410 {
2411         char *s, *tmp, *tok;
2412         int ret = 0;
2413
2414         s = strdup(str);
2415         if (s == NULL)
2416                 return -1;
2417
2418         for (tok = strtok_r(s, ", ", &tmp); tok; tok = strtok_r(NULL, ", ", &tmp)) {
2419                 struct callstack_filter *entry;
2420
2421                 entry = malloc(sizeof(*entry) + strlen(tok) + 1);
2422                 if (entry == NULL) {
2423                         pr_err("Memory allocation failure\n");
2424                         return -1;
2425                 }
2426
2427                 strcpy(entry->name, tok);
2428                 list_add_tail(&entry->list, &callstack_filters);
2429         }
2430
2431         free(s);
2432         return ret;
2433 }
2434
2435 static int parse_output(const struct option *opt __maybe_unused, const char *str,
2436                         int unset __maybe_unused)
2437 {
2438         const char **name = (const char **)opt->value;
2439
2440         if (str == NULL)
2441                 return -1;
2442
2443         lock_output = fopen(str, "w");
2444         if (lock_output == NULL) {
2445                 pr_err("Cannot open %s\n", str);
2446                 return -1;
2447         }
2448
2449         *name = str;
2450         return 0;
2451 }
2452
2453 int cmd_lock(int argc, const char **argv)
2454 {
2455         const struct option lock_options[] = {
2456         OPT_STRING('i', "input", &input_name, "file", "input file name"),
2457         OPT_CALLBACK(0, "output", &output_name, "file", "output file name", parse_output),
2458         OPT_INCR('v', "verbose", &verbose, "be more verbose (show symbol address, etc)"),
2459         OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace, "dump raw trace in ASCII"),
2460         OPT_BOOLEAN('f', "force", &force, "don't complain, do it"),
2461         OPT_STRING(0, "vmlinux", &symbol_conf.vmlinux_name,
2462                    "file", "vmlinux pathname"),
2463         OPT_STRING(0, "kallsyms", &symbol_conf.kallsyms_name,
2464                    "file", "kallsyms pathname"),
2465         OPT_BOOLEAN('q', "quiet", &quiet, "Do not show any warnings or messages"),
2466         OPT_END()
2467         };
2468
2469         const struct option info_options[] = {
2470         OPT_BOOLEAN('t', "threads", &info_threads,
2471                     "dump thread list in perf.data"),
2472         OPT_BOOLEAN('m', "map", &info_map,
2473                     "map of lock instances (address:name table)"),
2474         OPT_PARENT(lock_options)
2475         };
2476
2477         const struct option report_options[] = {
2478         OPT_STRING('k', "key", &sort_key, "acquired",
2479                     "key for sorting (acquired / contended / avg_wait / wait_total / wait_max / wait_min)"),
2480         OPT_STRING('F', "field", &output_fields, NULL,
2481                     "output fields (acquired / contended / avg_wait / wait_total / wait_max / wait_min)"),
2482         /* TODO: type */
2483         OPT_BOOLEAN('c', "combine-locks", &combine_locks,
2484                     "combine locks in the same class"),
2485         OPT_BOOLEAN('t', "threads", &show_thread_stats,
2486                     "show per-thread lock stats"),
2487         OPT_INTEGER('E', "entries", &print_nr_entries, "display this many functions"),
2488         OPT_PARENT(lock_options)
2489         };
2490
2491         struct option contention_options[] = {
2492         OPT_STRING('k', "key", &sort_key, "wait_total",
2493                     "key for sorting (contended / wait_total / wait_max / wait_min / avg_wait)"),
2494         OPT_STRING('F', "field", &output_fields, "contended,wait_total,wait_max,avg_wait",
2495                     "output fields (contended / wait_total / wait_max / wait_min / avg_wait)"),
2496         OPT_BOOLEAN('t', "threads", &show_thread_stats,
2497                     "show per-thread lock stats"),
2498         OPT_BOOLEAN('b', "use-bpf", &use_bpf, "use BPF program to collect lock contention stats"),
2499         OPT_BOOLEAN('a', "all-cpus", &target.system_wide,
2500                     "System-wide collection from all CPUs"),
2501         OPT_STRING('C', "cpu", &target.cpu_list, "cpu",
2502                     "List of cpus to monitor"),
2503         OPT_STRING('p', "pid", &target.pid, "pid",
2504                    "Trace on existing process id"),
2505         OPT_STRING(0, "tid", &target.tid, "tid",
2506                    "Trace on existing thread id (exclusive to --pid)"),
2507         OPT_CALLBACK('M', "map-nr-entries", &bpf_map_entries, "num",
2508                      "Max number of BPF map entries", parse_map_entry),
2509         OPT_CALLBACK(0, "max-stack", &max_stack_depth, "num",
2510                      "Set the maximum stack depth when collecting lock contention, "
2511                      "Default: " __stringify(CONTENTION_STACK_DEPTH), parse_max_stack),
2512         OPT_INTEGER(0, "stack-skip", &stack_skip,
2513                     "Set the number of stack depth to skip when finding a lock caller, "
2514                     "Default: " __stringify(CONTENTION_STACK_SKIP)),
2515         OPT_INTEGER('E', "entries", &print_nr_entries, "display this many functions"),
2516         OPT_BOOLEAN('l', "lock-addr", &show_lock_addrs, "show lock stats by address"),
2517         OPT_CALLBACK('Y', "type-filter", NULL, "FLAGS",
2518                      "Filter specific type of locks", parse_lock_type),
2519         OPT_CALLBACK('L', "lock-filter", NULL, "ADDRS/NAMES",
2520                      "Filter specific address/symbol of locks", parse_lock_addr),
2521         OPT_CALLBACK('S', "callstack-filter", NULL, "NAMES",
2522                      "Filter specific function in the callstack", parse_call_stack),
2523         OPT_BOOLEAN('o', "lock-owner", &show_lock_owner, "show lock owners instead of waiters"),
2524         OPT_STRING_NOEMPTY('x', "field-separator", &symbol_conf.field_sep, "separator",
2525                    "print result in CSV format with custom separator"),
2526         OPT_PARENT(lock_options)
2527         };
2528
2529         const char * const info_usage[] = {
2530                 "perf lock info [<options>]",
2531                 NULL
2532         };
2533         const char *const lock_subcommands[] = { "record", "report", "script",
2534                                                  "info", "contention", NULL };
2535         const char *lock_usage[] = {
2536                 NULL,
2537                 NULL
2538         };
2539         const char * const report_usage[] = {
2540                 "perf lock report [<options>]",
2541                 NULL
2542         };
2543         const char * const contention_usage[] = {
2544                 "perf lock contention [<options>]",
2545                 NULL
2546         };
2547         unsigned int i;
2548         int rc = 0;
2549
2550         lockhash_table = calloc(LOCKHASH_SIZE, sizeof(*lockhash_table));
2551         if (!lockhash_table)
2552                 return -ENOMEM;
2553
2554         for (i = 0; i < LOCKHASH_SIZE; i++)
2555                 INIT_HLIST_HEAD(lockhash_table + i);
2556
2557         lock_output = stderr;
2558         argc = parse_options_subcommand(argc, argv, lock_options, lock_subcommands,
2559                                         lock_usage, PARSE_OPT_STOP_AT_NON_OPTION);
2560         if (!argc)
2561                 usage_with_options(lock_usage, lock_options);
2562
2563         if (strlen(argv[0]) > 2 && strstarts("record", argv[0])) {
2564                 return __cmd_record(argc, argv);
2565         } else if (strlen(argv[0]) > 2 && strstarts("report", argv[0])) {
2566                 trace_handler = &report_lock_ops;
2567                 if (argc) {
2568                         argc = parse_options(argc, argv,
2569                                              report_options, report_usage, 0);
2570                         if (argc)
2571                                 usage_with_options(report_usage, report_options);
2572                 }
2573                 rc = __cmd_report(false);
2574         } else if (!strcmp(argv[0], "script")) {
2575                 /* Aliased to 'perf script' */
2576                 rc = cmd_script(argc, argv);
2577         } else if (!strcmp(argv[0], "info")) {
2578                 if (argc) {
2579                         argc = parse_options(argc, argv,
2580                                              info_options, info_usage, 0);
2581                         if (argc)
2582                                 usage_with_options(info_usage, info_options);
2583                 }
2584                 /* recycling report_lock_ops */
2585                 trace_handler = &report_lock_ops;
2586                 rc = __cmd_report(true);
2587         } else if (strlen(argv[0]) > 2 && strstarts("contention", argv[0])) {
2588                 trace_handler = &contention_lock_ops;
2589                 sort_key = "wait_total";
2590                 output_fields = "contended,wait_total,wait_max,avg_wait";
2591
2592 #ifndef HAVE_BPF_SKEL
2593                 set_option_nobuild(contention_options, 'b', "use-bpf",
2594                                    "no BUILD_BPF_SKEL=1", false);
2595 #endif
2596                 if (argc) {
2597                         argc = parse_options(argc, argv, contention_options,
2598                                              contention_usage, 0);
2599                 }
2600
2601                 if (check_lock_contention_options(contention_options,
2602                                                   contention_usage) < 0)
2603                         return -1;
2604
2605                 rc = __cmd_contention(argc, argv);
2606         } else {
2607                 usage_with_options(lock_usage, lock_options);
2608         }
2609
2610         zfree(&lockhash_table);
2611         return rc;
2612 }