Merge tag 'writeback-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/wfg...
[platform/adaptation/renesas_rcar/renesas_kernel.git] / tools / perf / builtin-trace.c
1 #include <traceevent/event-parse.h>
2 #include "builtin.h"
3 #include "util/color.h"
4 #include "util/debug.h"
5 #include "util/evlist.h"
6 #include "util/machine.h"
7 #include "util/session.h"
8 #include "util/thread.h"
9 #include "util/parse-options.h"
10 #include "util/strlist.h"
11 #include "util/intlist.h"
12 #include "util/thread_map.h"
13
14 #include <libaudit.h>
15 #include <stdlib.h>
16 #include <sys/mman.h>
17 #include <linux/futex.h>
18
19 static size_t syscall_arg__scnprintf_hex(char *bf, size_t size,
20                                          unsigned long arg,
21                                          u8 arg_idx __maybe_unused,
22                                          u8 *arg_mask __maybe_unused)
23 {
24         return scnprintf(bf, size, "%#lx", arg);
25 }
26
27 #define SCA_HEX syscall_arg__scnprintf_hex
28
29 static size_t syscall_arg__scnprintf_whence(char *bf, size_t size,
30                                             unsigned long arg,
31                                             u8 arg_idx __maybe_unused,
32                                             u8 *arg_mask __maybe_unused)
33 {
34         int whence = arg;
35
36         switch (whence) {
37 #define P_WHENCE(n) case SEEK_##n: return scnprintf(bf, size, #n)
38         P_WHENCE(SET);
39         P_WHENCE(CUR);
40         P_WHENCE(END);
41 #ifdef SEEK_DATA
42         P_WHENCE(DATA);
43 #endif
44 #ifdef SEEK_HOLE
45         P_WHENCE(HOLE);
46 #endif
47 #undef P_WHENCE
48         default: break;
49         }
50
51         return scnprintf(bf, size, "%#x", whence);
52 }
53
54 #define SCA_WHENCE syscall_arg__scnprintf_whence
55
56 static size_t syscall_arg__scnprintf_mmap_prot(char *bf, size_t size,
57                                                unsigned long arg,
58                                                u8 arg_idx __maybe_unused,
59                                                u8 *arg_mask __maybe_unused)
60 {
61         int printed = 0, prot = arg;
62
63         if (prot == PROT_NONE)
64                 return scnprintf(bf, size, "NONE");
65 #define P_MMAP_PROT(n) \
66         if (prot & PROT_##n) { \
67                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
68                 prot &= ~PROT_##n; \
69         }
70
71         P_MMAP_PROT(EXEC);
72         P_MMAP_PROT(READ);
73         P_MMAP_PROT(WRITE);
74 #ifdef PROT_SEM
75         P_MMAP_PROT(SEM);
76 #endif
77         P_MMAP_PROT(GROWSDOWN);
78         P_MMAP_PROT(GROWSUP);
79 #undef P_MMAP_PROT
80
81         if (prot)
82                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", prot);
83
84         return printed;
85 }
86
87 #define SCA_MMAP_PROT syscall_arg__scnprintf_mmap_prot
88
89 static size_t syscall_arg__scnprintf_mmap_flags(char *bf, size_t size,
90                                                 unsigned long arg, u8 arg_idx __maybe_unused,
91                                                 u8 *arg_mask __maybe_unused)
92 {
93         int printed = 0, flags = arg;
94
95 #define P_MMAP_FLAG(n) \
96         if (flags & MAP_##n) { \
97                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
98                 flags &= ~MAP_##n; \
99         }
100
101         P_MMAP_FLAG(SHARED);
102         P_MMAP_FLAG(PRIVATE);
103 #ifdef MAP_32BIT
104         P_MMAP_FLAG(32BIT);
105 #endif
106         P_MMAP_FLAG(ANONYMOUS);
107         P_MMAP_FLAG(DENYWRITE);
108         P_MMAP_FLAG(EXECUTABLE);
109         P_MMAP_FLAG(FILE);
110         P_MMAP_FLAG(FIXED);
111         P_MMAP_FLAG(GROWSDOWN);
112 #ifdef MAP_HUGETLB
113         P_MMAP_FLAG(HUGETLB);
114 #endif
115         P_MMAP_FLAG(LOCKED);
116         P_MMAP_FLAG(NONBLOCK);
117         P_MMAP_FLAG(NORESERVE);
118         P_MMAP_FLAG(POPULATE);
119         P_MMAP_FLAG(STACK);
120 #ifdef MAP_UNINITIALIZED
121         P_MMAP_FLAG(UNINITIALIZED);
122 #endif
123 #undef P_MMAP_FLAG
124
125         if (flags)
126                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
127
128         return printed;
129 }
130
131 #define SCA_MMAP_FLAGS syscall_arg__scnprintf_mmap_flags
132
133 static size_t syscall_arg__scnprintf_madvise_behavior(char *bf, size_t size,
134                                                       unsigned long arg, u8 arg_idx __maybe_unused,
135                                                       u8 *arg_mask __maybe_unused)
136 {
137         int behavior = arg;
138
139         switch (behavior) {
140 #define P_MADV_BHV(n) case MADV_##n: return scnprintf(bf, size, #n)
141         P_MADV_BHV(NORMAL);
142         P_MADV_BHV(RANDOM);
143         P_MADV_BHV(SEQUENTIAL);
144         P_MADV_BHV(WILLNEED);
145         P_MADV_BHV(DONTNEED);
146         P_MADV_BHV(REMOVE);
147         P_MADV_BHV(DONTFORK);
148         P_MADV_BHV(DOFORK);
149         P_MADV_BHV(HWPOISON);
150 #ifdef MADV_SOFT_OFFLINE
151         P_MADV_BHV(SOFT_OFFLINE);
152 #endif
153         P_MADV_BHV(MERGEABLE);
154         P_MADV_BHV(UNMERGEABLE);
155 #ifdef MADV_HUGEPAGE
156         P_MADV_BHV(HUGEPAGE);
157 #endif
158 #ifdef MADV_NOHUGEPAGE
159         P_MADV_BHV(NOHUGEPAGE);
160 #endif
161 #ifdef MADV_DONTDUMP
162         P_MADV_BHV(DONTDUMP);
163 #endif
164 #ifdef MADV_DODUMP
165         P_MADV_BHV(DODUMP);
166 #endif
167 #undef P_MADV_PHV
168         default: break;
169         }
170
171         return scnprintf(bf, size, "%#x", behavior);
172 }
173
174 #define SCA_MADV_BHV syscall_arg__scnprintf_madvise_behavior
175
176 static size_t syscall_arg__scnprintf_futex_op(char *bf, size_t size, unsigned long arg,
177                                               u8 arg_idx __maybe_unused, u8 *arg_mask)
178 {
179         enum syscall_futex_args {
180                 SCF_UADDR   = (1 << 0),
181                 SCF_OP      = (1 << 1),
182                 SCF_VAL     = (1 << 2),
183                 SCF_TIMEOUT = (1 << 3),
184                 SCF_UADDR2  = (1 << 4),
185                 SCF_VAL3    = (1 << 5),
186         };
187         int op = arg;
188         int cmd = op & FUTEX_CMD_MASK;
189         size_t printed = 0;
190
191         switch (cmd) {
192 #define P_FUTEX_OP(n) case FUTEX_##n: printed = scnprintf(bf, size, #n);
193         P_FUTEX_OP(WAIT);           *arg_mask |= SCF_VAL3|SCF_UADDR2;             break;
194         P_FUTEX_OP(WAKE);           *arg_mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
195         P_FUTEX_OP(FD);             *arg_mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
196         P_FUTEX_OP(REQUEUE);        *arg_mask |= SCF_VAL3|SCF_TIMEOUT;            break;
197         P_FUTEX_OP(CMP_REQUEUE);    *arg_mask |= SCF_TIMEOUT;                     break;
198         P_FUTEX_OP(CMP_REQUEUE_PI); *arg_mask |= SCF_TIMEOUT;                     break;
199         P_FUTEX_OP(WAKE_OP);                                                      break;
200         P_FUTEX_OP(LOCK_PI);        *arg_mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
201         P_FUTEX_OP(UNLOCK_PI);      *arg_mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
202         P_FUTEX_OP(TRYLOCK_PI);     *arg_mask |= SCF_VAL3|SCF_UADDR2;             break;
203         P_FUTEX_OP(WAIT_BITSET);    *arg_mask |= SCF_UADDR2;                      break;
204         P_FUTEX_OP(WAKE_BITSET);    *arg_mask |= SCF_UADDR2;                      break;
205         P_FUTEX_OP(WAIT_REQUEUE_PI);                                              break;
206         default: printed = scnprintf(bf, size, "%#x", cmd);                       break;
207         }
208
209         if (op & FUTEX_PRIVATE_FLAG)
210                 printed += scnprintf(bf + printed, size - printed, "|PRIV");
211
212         if (op & FUTEX_CLOCK_REALTIME)
213                 printed += scnprintf(bf + printed, size - printed, "|CLKRT");
214
215         return printed;
216 }
217
218 #define SCA_FUTEX_OP  syscall_arg__scnprintf_futex_op
219
220 static size_t syscall_arg__scnprintf_open_flags(char *bf, size_t size,
221                                                unsigned long arg,
222                                                u8 arg_idx, u8 *arg_mask)
223 {
224         int printed = 0, flags = arg;
225
226         if (!(flags & O_CREAT))
227                 *arg_mask |= 1 << (arg_idx + 1); /* Mask the mode parm */
228
229         if (flags == 0)
230                 return scnprintf(bf, size, "RDONLY");
231 #define P_FLAG(n) \
232         if (flags & O_##n) { \
233                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
234                 flags &= ~O_##n; \
235         }
236
237         P_FLAG(APPEND);
238         P_FLAG(ASYNC);
239         P_FLAG(CLOEXEC);
240         P_FLAG(CREAT);
241         P_FLAG(DIRECT);
242         P_FLAG(DIRECTORY);
243         P_FLAG(EXCL);
244         P_FLAG(LARGEFILE);
245         P_FLAG(NOATIME);
246         P_FLAG(NOCTTY);
247 #ifdef O_NONBLOCK
248         P_FLAG(NONBLOCK);
249 #elif O_NDELAY
250         P_FLAG(NDELAY);
251 #endif
252 #ifdef O_PATH
253         P_FLAG(PATH);
254 #endif
255         P_FLAG(RDWR);
256 #ifdef O_DSYNC
257         if ((flags & O_SYNC) == O_SYNC)
258                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", "SYNC");
259         else {
260                 P_FLAG(DSYNC);
261         }
262 #else
263         P_FLAG(SYNC);
264 #endif
265         P_FLAG(TRUNC);
266         P_FLAG(WRONLY);
267 #undef P_FLAG
268
269         if (flags)
270                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
271
272         return printed;
273 }
274
275 #define SCA_OPEN_FLAGS syscall_arg__scnprintf_open_flags
276
277 static struct syscall_fmt {
278         const char *name;
279         const char *alias;
280         size_t     (*arg_scnprintf[6])(char *bf, size_t size, unsigned long arg, u8 arg_idx, u8 *arg_mask);
281         bool       errmsg;
282         bool       timeout;
283         bool       hexret;
284 } syscall_fmts[] = {
285         { .name     = "access",     .errmsg = true, },
286         { .name     = "arch_prctl", .errmsg = true, .alias = "prctl", },
287         { .name     = "brk",        .hexret = true,
288           .arg_scnprintf = { [0] = SCA_HEX, /* brk */ }, },
289         { .name     = "mmap",       .hexret = true, },
290         { .name     = "connect",    .errmsg = true, },
291         { .name     = "fstat",      .errmsg = true, .alias = "newfstat", },
292         { .name     = "fstatat",    .errmsg = true, .alias = "newfstatat", },
293         { .name     = "futex",      .errmsg = true,
294           .arg_scnprintf = { [1] = SCA_FUTEX_OP, /* op */ }, },
295         { .name     = "ioctl",      .errmsg = true,
296           .arg_scnprintf = { [2] = SCA_HEX, /* arg */ }, },
297         { .name     = "lseek",      .errmsg = true,
298           .arg_scnprintf = { [2] = SCA_WHENCE, /* whence */ }, },
299         { .name     = "lstat",      .errmsg = true, .alias = "newlstat", },
300         { .name     = "madvise",    .errmsg = true,
301           .arg_scnprintf = { [0] = SCA_HEX,      /* start */
302                              [2] = SCA_MADV_BHV, /* behavior */ }, },
303         { .name     = "mmap",       .hexret = true,
304           .arg_scnprintf = { [0] = SCA_HEX,       /* addr */
305                              [2] = SCA_MMAP_PROT, /* prot */
306                              [3] = SCA_MMAP_FLAGS, /* flags */ }, },
307         { .name     = "mprotect",   .errmsg = true,
308           .arg_scnprintf = { [0] = SCA_HEX, /* start */
309                              [2] = SCA_MMAP_PROT, /* prot */ }, },
310         { .name     = "mremap",     .hexret = true,
311           .arg_scnprintf = { [0] = SCA_HEX, /* addr */
312                              [4] = SCA_HEX, /* new_addr */ }, },
313         { .name     = "munmap",     .errmsg = true,
314           .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
315         { .name     = "open",       .errmsg = true,
316           .arg_scnprintf = { [1] = SCA_OPEN_FLAGS, /* flags */ }, },
317         { .name     = "open_by_handle_at", .errmsg = true,
318           .arg_scnprintf = { [2] = SCA_OPEN_FLAGS, /* flags */ }, },
319         { .name     = "openat",     .errmsg = true,
320           .arg_scnprintf = { [2] = SCA_OPEN_FLAGS, /* flags */ }, },
321         { .name     = "poll",       .errmsg = true, .timeout = true, },
322         { .name     = "ppoll",      .errmsg = true, .timeout = true, },
323         { .name     = "pread",      .errmsg = true, .alias = "pread64", },
324         { .name     = "pwrite",     .errmsg = true, .alias = "pwrite64", },
325         { .name     = "read",       .errmsg = true, },
326         { .name     = "recvfrom",   .errmsg = true, },
327         { .name     = "select",     .errmsg = true, .timeout = true, },
328         { .name     = "socket",     .errmsg = true, },
329         { .name     = "stat",       .errmsg = true, .alias = "newstat", },
330         { .name     = "uname",      .errmsg = true, .alias = "newuname", },
331 };
332
333 static int syscall_fmt__cmp(const void *name, const void *fmtp)
334 {
335         const struct syscall_fmt *fmt = fmtp;
336         return strcmp(name, fmt->name);
337 }
338
339 static struct syscall_fmt *syscall_fmt__find(const char *name)
340 {
341         const int nmemb = ARRAY_SIZE(syscall_fmts);
342         return bsearch(name, syscall_fmts, nmemb, sizeof(struct syscall_fmt), syscall_fmt__cmp);
343 }
344
345 struct syscall {
346         struct event_format *tp_format;
347         const char          *name;
348         bool                filtered;
349         struct syscall_fmt  *fmt;
350         size_t              (**arg_scnprintf)(char *bf, size_t size,
351                                               unsigned long arg, u8 arg_idx, u8 *args_mask);
352 };
353
354 static size_t fprintf_duration(unsigned long t, FILE *fp)
355 {
356         double duration = (double)t / NSEC_PER_MSEC;
357         size_t printed = fprintf(fp, "(");
358
359         if (duration >= 1.0)
360                 printed += color_fprintf(fp, PERF_COLOR_RED, "%6.3f ms", duration);
361         else if (duration >= 0.01)
362                 printed += color_fprintf(fp, PERF_COLOR_YELLOW, "%6.3f ms", duration);
363         else
364                 printed += color_fprintf(fp, PERF_COLOR_NORMAL, "%6.3f ms", duration);
365         return printed + fprintf(fp, "): ");
366 }
367
368 struct thread_trace {
369         u64               entry_time;
370         u64               exit_time;
371         bool              entry_pending;
372         unsigned long     nr_events;
373         char              *entry_str;
374         double            runtime_ms;
375 };
376
377 static struct thread_trace *thread_trace__new(void)
378 {
379         return zalloc(sizeof(struct thread_trace));
380 }
381
382 static struct thread_trace *thread__trace(struct thread *thread, FILE *fp)
383 {
384         struct thread_trace *ttrace;
385
386         if (thread == NULL)
387                 goto fail;
388
389         if (thread->priv == NULL)
390                 thread->priv = thread_trace__new();
391                 
392         if (thread->priv == NULL)
393                 goto fail;
394
395         ttrace = thread->priv;
396         ++ttrace->nr_events;
397
398         return ttrace;
399 fail:
400         color_fprintf(fp, PERF_COLOR_RED,
401                       "WARNING: not enough memory, dropping samples!\n");
402         return NULL;
403 }
404
405 struct trace {
406         struct perf_tool        tool;
407         int                     audit_machine;
408         struct {
409                 int             max;
410                 struct syscall  *table;
411         } syscalls;
412         struct perf_record_opts opts;
413         struct machine          host;
414         u64                     base_time;
415         FILE                    *output;
416         unsigned long           nr_events;
417         struct strlist          *ev_qualifier;
418         bool                    not_ev_qualifier;
419         struct intlist          *tid_list;
420         struct intlist          *pid_list;
421         bool                    sched;
422         bool                    multiple_threads;
423         double                  duration_filter;
424         double                  runtime_ms;
425 };
426
427 static bool trace__filter_duration(struct trace *trace, double t)
428 {
429         return t < (trace->duration_filter * NSEC_PER_MSEC);
430 }
431
432 static size_t trace__fprintf_tstamp(struct trace *trace, u64 tstamp, FILE *fp)
433 {
434         double ts = (double)(tstamp - trace->base_time) / NSEC_PER_MSEC;
435
436         return fprintf(fp, "%10.3f ", ts);
437 }
438
439 static bool done = false;
440
441 static void sig_handler(int sig __maybe_unused)
442 {
443         done = true;
444 }
445
446 static size_t trace__fprintf_entry_head(struct trace *trace, struct thread *thread,
447                                         u64 duration, u64 tstamp, FILE *fp)
448 {
449         size_t printed = trace__fprintf_tstamp(trace, tstamp, fp);
450         printed += fprintf_duration(duration, fp);
451
452         if (trace->multiple_threads)
453                 printed += fprintf(fp, "%d ", thread->tid);
454
455         return printed;
456 }
457
458 static int trace__process_event(struct trace *trace, struct machine *machine,
459                                 union perf_event *event)
460 {
461         int ret = 0;
462
463         switch (event->header.type) {
464         case PERF_RECORD_LOST:
465                 color_fprintf(trace->output, PERF_COLOR_RED,
466                               "LOST %" PRIu64 " events!\n", event->lost.lost);
467                 ret = machine__process_lost_event(machine, event);
468         default:
469                 ret = machine__process_event(machine, event);
470                 break;
471         }
472
473         return ret;
474 }
475
476 static int trace__tool_process(struct perf_tool *tool,
477                                union perf_event *event,
478                                struct perf_sample *sample __maybe_unused,
479                                struct machine *machine)
480 {
481         struct trace *trace = container_of(tool, struct trace, tool);
482         return trace__process_event(trace, machine, event);
483 }
484
485 static int trace__symbols_init(struct trace *trace, struct perf_evlist *evlist)
486 {
487         int err = symbol__init();
488
489         if (err)
490                 return err;
491
492         machine__init(&trace->host, "", HOST_KERNEL_ID);
493         machine__create_kernel_maps(&trace->host);
494
495         if (perf_target__has_task(&trace->opts.target)) {
496                 err = perf_event__synthesize_thread_map(&trace->tool, evlist->threads,
497                                                         trace__tool_process,
498                                                         &trace->host);
499         } else {
500                 err = perf_event__synthesize_threads(&trace->tool, trace__tool_process,
501                                                      &trace->host);
502         }
503
504         if (err)
505                 symbol__exit();
506
507         return err;
508 }
509
510 static int syscall__set_arg_fmts(struct syscall *sc)
511 {
512         struct format_field *field;
513         int idx = 0;
514
515         sc->arg_scnprintf = calloc(sc->tp_format->format.nr_fields - 1, sizeof(void *));
516         if (sc->arg_scnprintf == NULL)
517                 return -1;
518
519         for (field = sc->tp_format->format.fields->next; field; field = field->next) {
520                 if (sc->fmt && sc->fmt->arg_scnprintf[idx])
521                         sc->arg_scnprintf[idx] = sc->fmt->arg_scnprintf[idx];
522                 else if (field->flags & FIELD_IS_POINTER)
523                         sc->arg_scnprintf[idx] = syscall_arg__scnprintf_hex;
524                 ++idx;
525         }
526
527         return 0;
528 }
529
530 static int trace__read_syscall_info(struct trace *trace, int id)
531 {
532         char tp_name[128];
533         struct syscall *sc;
534         const char *name = audit_syscall_to_name(id, trace->audit_machine);
535
536         if (name == NULL)
537                 return -1;
538
539         if (id > trace->syscalls.max) {
540                 struct syscall *nsyscalls = realloc(trace->syscalls.table, (id + 1) * sizeof(*sc));
541
542                 if (nsyscalls == NULL)
543                         return -1;
544
545                 if (trace->syscalls.max != -1) {
546                         memset(nsyscalls + trace->syscalls.max + 1, 0,
547                                (id - trace->syscalls.max) * sizeof(*sc));
548                 } else {
549                         memset(nsyscalls, 0, (id + 1) * sizeof(*sc));
550                 }
551
552                 trace->syscalls.table = nsyscalls;
553                 trace->syscalls.max   = id;
554         }
555
556         sc = trace->syscalls.table + id;
557         sc->name = name;
558
559         if (trace->ev_qualifier) {
560                 bool in = strlist__find(trace->ev_qualifier, name) != NULL;
561
562                 if (!(in ^ trace->not_ev_qualifier)) {
563                         sc->filtered = true;
564                         /*
565                          * No need to do read tracepoint information since this will be
566                          * filtered out.
567                          */
568                         return 0;
569                 }
570         }
571
572         sc->fmt  = syscall_fmt__find(sc->name);
573
574         snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->name);
575         sc->tp_format = event_format__new("syscalls", tp_name);
576
577         if (sc->tp_format == NULL && sc->fmt && sc->fmt->alias) {
578                 snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->fmt->alias);
579                 sc->tp_format = event_format__new("syscalls", tp_name);
580         }
581
582         if (sc->tp_format == NULL)
583                 return -1;
584
585         return syscall__set_arg_fmts(sc);
586 }
587
588 static size_t syscall__scnprintf_args(struct syscall *sc, char *bf, size_t size,
589                                       unsigned long *args)
590 {
591         int i = 0;
592         size_t printed = 0;
593
594         if (sc->tp_format != NULL) {
595                 struct format_field *field;
596                 u8 mask = 0, bit = 1;
597
598                 for (field = sc->tp_format->format.fields->next; field;
599                      field = field->next, ++i, bit <<= 1) {
600                         if (mask & bit)
601                                 continue;
602
603                         printed += scnprintf(bf + printed, size - printed,
604                                              "%s%s: ", printed ? ", " : "", field->name);
605
606                         if (sc->arg_scnprintf && sc->arg_scnprintf[i]) {
607                                 printed += sc->arg_scnprintf[i](bf + printed, size - printed,
608                                                                 args[i], i, &mask);
609                         } else {
610                                 printed += scnprintf(bf + printed, size - printed,
611                                                      "%ld", args[i]);
612                         }
613                 }
614         } else {
615                 while (i < 6) {
616                         printed += scnprintf(bf + printed, size - printed,
617                                              "%sarg%d: %ld",
618                                              printed ? ", " : "", i, args[i]);
619                         ++i;
620                 }
621         }
622
623         return printed;
624 }
625
626 typedef int (*tracepoint_handler)(struct trace *trace, struct perf_evsel *evsel,
627                                   struct perf_sample *sample);
628
629 static struct syscall *trace__syscall_info(struct trace *trace,
630                                            struct perf_evsel *evsel,
631                                            struct perf_sample *sample)
632 {
633         int id = perf_evsel__intval(evsel, sample, "id");
634
635         if (id < 0) {
636
637                 /*
638                  * XXX: Noticed on x86_64, reproduced as far back as 3.0.36, haven't tried
639                  * before that, leaving at a higher verbosity level till that is
640                  * explained. Reproduced with plain ftrace with:
641                  *
642                  * echo 1 > /t/events/raw_syscalls/sys_exit/enable
643                  * grep "NR -1 " /t/trace_pipe
644                  *
645                  * After generating some load on the machine.
646                  */
647                 if (verbose > 1) {
648                         static u64 n;
649                         fprintf(trace->output, "Invalid syscall %d id, skipping (%s, %" PRIu64 ") ...\n",
650                                 id, perf_evsel__name(evsel), ++n);
651                 }
652                 return NULL;
653         }
654
655         if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL) &&
656             trace__read_syscall_info(trace, id))
657                 goto out_cant_read;
658
659         if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL))
660                 goto out_cant_read;
661
662         return &trace->syscalls.table[id];
663
664 out_cant_read:
665         if (verbose) {
666                 fprintf(trace->output, "Problems reading syscall %d", id);
667                 if (id <= trace->syscalls.max && trace->syscalls.table[id].name != NULL)
668                         fprintf(trace->output, "(%s)", trace->syscalls.table[id].name);
669                 fputs(" information\n", trace->output);
670         }
671         return NULL;
672 }
673
674 static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel,
675                             struct perf_sample *sample)
676 {
677         char *msg;
678         void *args;
679         size_t printed = 0;
680         struct thread *thread;
681         struct syscall *sc = trace__syscall_info(trace, evsel, sample);
682         struct thread_trace *ttrace;
683
684         if (sc == NULL)
685                 return -1;
686
687         if (sc->filtered)
688                 return 0;
689
690         thread = machine__findnew_thread(&trace->host, sample->pid,
691                                          sample->tid);
692         ttrace = thread__trace(thread, trace->output);
693         if (ttrace == NULL)
694                 return -1;
695
696         args = perf_evsel__rawptr(evsel, sample, "args");
697         if (args == NULL) {
698                 fprintf(trace->output, "Problems reading syscall arguments\n");
699                 return -1;
700         }
701
702         ttrace = thread->priv;
703
704         if (ttrace->entry_str == NULL) {
705                 ttrace->entry_str = malloc(1024);
706                 if (!ttrace->entry_str)
707                         return -1;
708         }
709
710         ttrace->entry_time = sample->time;
711         msg = ttrace->entry_str;
712         printed += scnprintf(msg + printed, 1024 - printed, "%s(", sc->name);
713
714         printed += syscall__scnprintf_args(sc, msg + printed, 1024 - printed,  args);
715
716         if (!strcmp(sc->name, "exit_group") || !strcmp(sc->name, "exit")) {
717                 if (!trace->duration_filter) {
718                         trace__fprintf_entry_head(trace, thread, 1, sample->time, trace->output);
719                         fprintf(trace->output, "%-70s\n", ttrace->entry_str);
720                 }
721         } else
722                 ttrace->entry_pending = true;
723
724         return 0;
725 }
726
727 static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel,
728                            struct perf_sample *sample)
729 {
730         int ret;
731         u64 duration = 0;
732         struct thread *thread;
733         struct syscall *sc = trace__syscall_info(trace, evsel, sample);
734         struct thread_trace *ttrace;
735
736         if (sc == NULL)
737                 return -1;
738
739         if (sc->filtered)
740                 return 0;
741
742         thread = machine__findnew_thread(&trace->host, sample->pid,
743                                          sample->tid);
744         ttrace = thread__trace(thread, trace->output);
745         if (ttrace == NULL)
746                 return -1;
747
748         ret = perf_evsel__intval(evsel, sample, "ret");
749
750         ttrace = thread->priv;
751
752         ttrace->exit_time = sample->time;
753
754         if (ttrace->entry_time) {
755                 duration = sample->time - ttrace->entry_time;
756                 if (trace__filter_duration(trace, duration))
757                         goto out;
758         } else if (trace->duration_filter)
759                 goto out;
760
761         trace__fprintf_entry_head(trace, thread, duration, sample->time, trace->output);
762
763         if (ttrace->entry_pending) {
764                 fprintf(trace->output, "%-70s", ttrace->entry_str);
765         } else {
766                 fprintf(trace->output, " ... [");
767                 color_fprintf(trace->output, PERF_COLOR_YELLOW, "continued");
768                 fprintf(trace->output, "]: %s()", sc->name);
769         }
770
771         if (sc->fmt == NULL) {
772 signed_print:
773                 fprintf(trace->output, ") = %d", ret);
774         } else if (ret < 0 && sc->fmt->errmsg) {
775                 char bf[256];
776                 const char *emsg = strerror_r(-ret, bf, sizeof(bf)),
777                            *e = audit_errno_to_name(-ret);
778
779                 fprintf(trace->output, ") = -1 %s %s", e, emsg);
780         } else if (ret == 0 && sc->fmt->timeout)
781                 fprintf(trace->output, ") = 0 Timeout");
782         else if (sc->fmt->hexret)
783                 fprintf(trace->output, ") = %#x", ret);
784         else
785                 goto signed_print;
786
787         fputc('\n', trace->output);
788 out:
789         ttrace->entry_pending = false;
790
791         return 0;
792 }
793
794 static int trace__sched_stat_runtime(struct trace *trace, struct perf_evsel *evsel,
795                                      struct perf_sample *sample)
796 {
797         u64 runtime = perf_evsel__intval(evsel, sample, "runtime");
798         double runtime_ms = (double)runtime / NSEC_PER_MSEC;
799         struct thread *thread = machine__findnew_thread(&trace->host,
800                                                         sample->pid,
801                                                         sample->tid);
802         struct thread_trace *ttrace = thread__trace(thread, trace->output);
803
804         if (ttrace == NULL)
805                 goto out_dump;
806
807         ttrace->runtime_ms += runtime_ms;
808         trace->runtime_ms += runtime_ms;
809         return 0;
810
811 out_dump:
812         fprintf(trace->output, "%s: comm=%s,pid=%u,runtime=%" PRIu64 ",vruntime=%" PRIu64 ")\n",
813                evsel->name,
814                perf_evsel__strval(evsel, sample, "comm"),
815                (pid_t)perf_evsel__intval(evsel, sample, "pid"),
816                runtime,
817                perf_evsel__intval(evsel, sample, "vruntime"));
818         return 0;
819 }
820
821 static bool skip_sample(struct trace *trace, struct perf_sample *sample)
822 {
823         if ((trace->pid_list && intlist__find(trace->pid_list, sample->pid)) ||
824             (trace->tid_list && intlist__find(trace->tid_list, sample->tid)))
825                 return false;
826
827         if (trace->pid_list || trace->tid_list)
828                 return true;
829
830         return false;
831 }
832
833 static int trace__process_sample(struct perf_tool *tool,
834                                  union perf_event *event __maybe_unused,
835                                  struct perf_sample *sample,
836                                  struct perf_evsel *evsel,
837                                  struct machine *machine __maybe_unused)
838 {
839         struct trace *trace = container_of(tool, struct trace, tool);
840         int err = 0;
841
842         tracepoint_handler handler = evsel->handler.func;
843
844         if (skip_sample(trace, sample))
845                 return 0;
846
847         if (trace->base_time == 0)
848                 trace->base_time = sample->time;
849
850         if (handler)
851                 handler(trace, evsel, sample);
852
853         return err;
854 }
855
856 static bool
857 perf_session__has_tp(struct perf_session *session, const char *name)
858 {
859         struct perf_evsel *evsel;
860
861         evsel = perf_evlist__find_tracepoint_by_name(session->evlist, name);
862
863         return evsel != NULL;
864 }
865
866 static int parse_target_str(struct trace *trace)
867 {
868         if (trace->opts.target.pid) {
869                 trace->pid_list = intlist__new(trace->opts.target.pid);
870                 if (trace->pid_list == NULL) {
871                         pr_err("Error parsing process id string\n");
872                         return -EINVAL;
873                 }
874         }
875
876         if (trace->opts.target.tid) {
877                 trace->tid_list = intlist__new(trace->opts.target.tid);
878                 if (trace->tid_list == NULL) {
879                         pr_err("Error parsing thread id string\n");
880                         return -EINVAL;
881                 }
882         }
883
884         return 0;
885 }
886
887 static int trace__run(struct trace *trace, int argc, const char **argv)
888 {
889         struct perf_evlist *evlist = perf_evlist__new();
890         struct perf_evsel *evsel;
891         int err = -1, i;
892         unsigned long before;
893         const bool forks = argc > 0;
894
895         if (evlist == NULL) {
896                 fprintf(trace->output, "Not enough memory to run!\n");
897                 goto out;
898         }
899
900         if (perf_evlist__add_newtp(evlist, "raw_syscalls", "sys_enter", trace__sys_enter) ||
901             perf_evlist__add_newtp(evlist, "raw_syscalls", "sys_exit", trace__sys_exit)) {
902                 fprintf(trace->output, "Couldn't read the raw_syscalls tracepoints information!\n");
903                 goto out_delete_evlist;
904         }
905
906         if (trace->sched &&
907             perf_evlist__add_newtp(evlist, "sched", "sched_stat_runtime",
908                                    trace__sched_stat_runtime)) {
909                 fprintf(trace->output, "Couldn't read the sched_stat_runtime tracepoint information!\n");
910                 goto out_delete_evlist;
911         }
912
913         err = perf_evlist__create_maps(evlist, &trace->opts.target);
914         if (err < 0) {
915                 fprintf(trace->output, "Problems parsing the target to trace, check your options!\n");
916                 goto out_delete_evlist;
917         }
918
919         err = trace__symbols_init(trace, evlist);
920         if (err < 0) {
921                 fprintf(trace->output, "Problems initializing symbol libraries!\n");
922                 goto out_delete_maps;
923         }
924
925         perf_evlist__config(evlist, &trace->opts);
926
927         signal(SIGCHLD, sig_handler);
928         signal(SIGINT, sig_handler);
929
930         if (forks) {
931                 err = perf_evlist__prepare_workload(evlist, &trace->opts.target,
932                                                     argv, false, false);
933                 if (err < 0) {
934                         fprintf(trace->output, "Couldn't run the workload!\n");
935                         goto out_delete_maps;
936                 }
937         }
938
939         err = perf_evlist__open(evlist);
940         if (err < 0) {
941                 fprintf(trace->output, "Couldn't create the events: %s\n", strerror(errno));
942                 goto out_delete_maps;
943         }
944
945         err = perf_evlist__mmap(evlist, UINT_MAX, false);
946         if (err < 0) {
947                 fprintf(trace->output, "Couldn't mmap the events: %s\n", strerror(errno));
948                 goto out_close_evlist;
949         }
950
951         perf_evlist__enable(evlist);
952
953         if (forks)
954                 perf_evlist__start_workload(evlist);
955
956         trace->multiple_threads = evlist->threads->map[0] == -1 || evlist->threads->nr > 1;
957 again:
958         before = trace->nr_events;
959
960         for (i = 0; i < evlist->nr_mmaps; i++) {
961                 union perf_event *event;
962
963                 while ((event = perf_evlist__mmap_read(evlist, i)) != NULL) {
964                         const u32 type = event->header.type;
965                         tracepoint_handler handler;
966                         struct perf_sample sample;
967
968                         ++trace->nr_events;
969
970                         err = perf_evlist__parse_sample(evlist, event, &sample);
971                         if (err) {
972                                 fprintf(trace->output, "Can't parse sample, err = %d, skipping...\n", err);
973                                 continue;
974                         }
975
976                         if (trace->base_time == 0)
977                                 trace->base_time = sample.time;
978
979                         if (type != PERF_RECORD_SAMPLE) {
980                                 trace__process_event(trace, &trace->host, event);
981                                 continue;
982                         }
983
984                         evsel = perf_evlist__id2evsel(evlist, sample.id);
985                         if (evsel == NULL) {
986                                 fprintf(trace->output, "Unknown tp ID %" PRIu64 ", skipping...\n", sample.id);
987                                 continue;
988                         }
989
990                         if (sample.raw_data == NULL) {
991                                 fprintf(trace->output, "%s sample with no payload for tid: %d, cpu %d, raw_size=%d, skipping...\n",
992                                        perf_evsel__name(evsel), sample.tid,
993                                        sample.cpu, sample.raw_size);
994                                 continue;
995                         }
996
997                         handler = evsel->handler.func;
998                         handler(trace, evsel, &sample);
999
1000                         if (done)
1001                                 goto out_unmap_evlist;
1002                 }
1003         }
1004
1005         if (trace->nr_events == before) {
1006                 if (done)
1007                         goto out_unmap_evlist;
1008
1009                 poll(evlist->pollfd, evlist->nr_fds, -1);
1010         }
1011
1012         if (done)
1013                 perf_evlist__disable(evlist);
1014
1015         goto again;
1016
1017 out_unmap_evlist:
1018         perf_evlist__munmap(evlist);
1019 out_close_evlist:
1020         perf_evlist__close(evlist);
1021 out_delete_maps:
1022         perf_evlist__delete_maps(evlist);
1023 out_delete_evlist:
1024         perf_evlist__delete(evlist);
1025 out:
1026         return err;
1027 }
1028
1029 static int trace__replay(struct trace *trace)
1030 {
1031         const struct perf_evsel_str_handler handlers[] = {
1032                 { "raw_syscalls:sys_enter",  trace__sys_enter, },
1033                 { "raw_syscalls:sys_exit",   trace__sys_exit, },
1034         };
1035
1036         struct perf_session *session;
1037         int err = -1;
1038
1039         trace->tool.sample        = trace__process_sample;
1040         trace->tool.mmap          = perf_event__process_mmap;
1041         trace->tool.comm          = perf_event__process_comm;
1042         trace->tool.exit          = perf_event__process_exit;
1043         trace->tool.fork          = perf_event__process_fork;
1044         trace->tool.attr          = perf_event__process_attr;
1045         trace->tool.tracing_data = perf_event__process_tracing_data;
1046         trace->tool.build_id      = perf_event__process_build_id;
1047
1048         trace->tool.ordered_samples = true;
1049         trace->tool.ordering_requires_timestamps = true;
1050
1051         /* add tid to output */
1052         trace->multiple_threads = true;
1053
1054         if (symbol__init() < 0)
1055                 return -1;
1056
1057         session = perf_session__new(input_name, O_RDONLY, 0, false,
1058                                     &trace->tool);
1059         if (session == NULL)
1060                 return -ENOMEM;
1061
1062         err = perf_session__set_tracepoints_handlers(session, handlers);
1063         if (err)
1064                 goto out;
1065
1066         if (!perf_session__has_tp(session, "raw_syscalls:sys_enter")) {
1067                 pr_err("Data file does not have raw_syscalls:sys_enter events\n");
1068                 goto out;
1069         }
1070
1071         if (!perf_session__has_tp(session, "raw_syscalls:sys_exit")) {
1072                 pr_err("Data file does not have raw_syscalls:sys_exit events\n");
1073                 goto out;
1074         }
1075
1076         err = parse_target_str(trace);
1077         if (err != 0)
1078                 goto out;
1079
1080         setup_pager();
1081
1082         err = perf_session__process_events(session, &trace->tool);
1083         if (err)
1084                 pr_err("Failed to process events, error %d", err);
1085
1086 out:
1087         perf_session__delete(session);
1088
1089         return err;
1090 }
1091
1092 static size_t trace__fprintf_threads_header(FILE *fp)
1093 {
1094         size_t printed;
1095
1096         printed  = fprintf(fp, "\n _____________________________________________________________________\n");
1097         printed += fprintf(fp," __)    Summary of events    (__\n\n");
1098         printed += fprintf(fp,"              [ task - pid ]     [ events ] [ ratio ]  [ runtime ]\n");
1099         printed += fprintf(fp," _____________________________________________________________________\n\n");
1100
1101         return printed;
1102 }
1103
1104 static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp)
1105 {
1106         size_t printed = trace__fprintf_threads_header(fp);
1107         struct rb_node *nd;
1108
1109         for (nd = rb_first(&trace->host.threads); nd; nd = rb_next(nd)) {
1110                 struct thread *thread = rb_entry(nd, struct thread, rb_node);
1111                 struct thread_trace *ttrace = thread->priv;
1112                 const char *color;
1113                 double ratio;
1114
1115                 if (ttrace == NULL)
1116                         continue;
1117
1118                 ratio = (double)ttrace->nr_events / trace->nr_events * 100.0;
1119
1120                 color = PERF_COLOR_NORMAL;
1121                 if (ratio > 50.0)
1122                         color = PERF_COLOR_RED;
1123                 else if (ratio > 25.0)
1124                         color = PERF_COLOR_GREEN;
1125                 else if (ratio > 5.0)
1126                         color = PERF_COLOR_YELLOW;
1127
1128                 printed += color_fprintf(fp, color, "%20s", thread->comm);
1129                 printed += fprintf(fp, " - %-5d :%11lu   [", thread->tid, ttrace->nr_events);
1130                 printed += color_fprintf(fp, color, "%5.1f%%", ratio);
1131                 printed += fprintf(fp, " ] %10.3f ms\n", ttrace->runtime_ms);
1132         }
1133
1134         return printed;
1135 }
1136
1137 static int trace__set_duration(const struct option *opt, const char *str,
1138                                int unset __maybe_unused)
1139 {
1140         struct trace *trace = opt->value;
1141
1142         trace->duration_filter = atof(str);
1143         return 0;
1144 }
1145
1146 static int trace__open_output(struct trace *trace, const char *filename)
1147 {
1148         struct stat st;
1149
1150         if (!stat(filename, &st) && st.st_size) {
1151                 char oldname[PATH_MAX];
1152
1153                 scnprintf(oldname, sizeof(oldname), "%s.old", filename);
1154                 unlink(oldname);
1155                 rename(filename, oldname);
1156         }
1157
1158         trace->output = fopen(filename, "w");
1159
1160         return trace->output == NULL ? -errno : 0;
1161 }
1162
1163 int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused)
1164 {
1165         const char * const trace_usage[] = {
1166                 "perf trace [<options>] [<command>]",
1167                 "perf trace [<options>] -- <command> [<options>]",
1168                 NULL
1169         };
1170         struct trace trace = {
1171                 .audit_machine = audit_detect_machine(),
1172                 .syscalls = {
1173                         . max = -1,
1174                 },
1175                 .opts = {
1176                         .target = {
1177                                 .uid       = UINT_MAX,
1178                                 .uses_mmap = true,
1179                         },
1180                         .user_freq     = UINT_MAX,
1181                         .user_interval = ULLONG_MAX,
1182                         .no_delay      = true,
1183                         .mmap_pages    = 1024,
1184                 },
1185                 .output = stdout,
1186         };
1187         const char *output_name = NULL;
1188         const char *ev_qualifier_str = NULL;
1189         const struct option trace_options[] = {
1190         OPT_STRING('e', "expr", &ev_qualifier_str, "expr",
1191                     "list of events to trace"),
1192         OPT_STRING('o', "output", &output_name, "file", "output file name"),
1193         OPT_STRING('i', "input", &input_name, "file", "Analyze events in file"),
1194         OPT_STRING('p', "pid", &trace.opts.target.pid, "pid",
1195                     "trace events on existing process id"),
1196         OPT_STRING('t', "tid", &trace.opts.target.tid, "tid",
1197                     "trace events on existing thread id"),
1198         OPT_BOOLEAN('a', "all-cpus", &trace.opts.target.system_wide,
1199                     "system-wide collection from all CPUs"),
1200         OPT_STRING('C', "cpu", &trace.opts.target.cpu_list, "cpu",
1201                     "list of cpus to monitor"),
1202         OPT_BOOLEAN(0, "no-inherit", &trace.opts.no_inherit,
1203                     "child tasks do not inherit counters"),
1204         OPT_UINTEGER('m', "mmap-pages", &trace.opts.mmap_pages,
1205                      "number of mmap data pages"),
1206         OPT_STRING('u', "uid", &trace.opts.target.uid_str, "user",
1207                    "user to profile"),
1208         OPT_CALLBACK(0, "duration", &trace, "float",
1209                      "show only events with duration > N.M ms",
1210                      trace__set_duration),
1211         OPT_BOOLEAN(0, "sched", &trace.sched, "show blocking scheduler events"),
1212         OPT_INCR('v', "verbose", &verbose, "be more verbose"),
1213         OPT_END()
1214         };
1215         int err;
1216         char bf[BUFSIZ];
1217
1218         argc = parse_options(argc, argv, trace_options, trace_usage, 0);
1219
1220         if (output_name != NULL) {
1221                 err = trace__open_output(&trace, output_name);
1222                 if (err < 0) {
1223                         perror("failed to create output file");
1224                         goto out;
1225                 }
1226         }
1227
1228         if (ev_qualifier_str != NULL) {
1229                 const char *s = ev_qualifier_str;
1230
1231                 trace.not_ev_qualifier = *s == '!';
1232                 if (trace.not_ev_qualifier)
1233                         ++s;
1234                 trace.ev_qualifier = strlist__new(true, s);
1235                 if (trace.ev_qualifier == NULL) {
1236                         fputs("Not enough memory to parse event qualifier",
1237                               trace.output);
1238                         err = -ENOMEM;
1239                         goto out_close;
1240                 }
1241         }
1242
1243         err = perf_target__validate(&trace.opts.target);
1244         if (err) {
1245                 perf_target__strerror(&trace.opts.target, err, bf, sizeof(bf));
1246                 fprintf(trace.output, "%s", bf);
1247                 goto out_close;
1248         }
1249
1250         err = perf_target__parse_uid(&trace.opts.target);
1251         if (err) {
1252                 perf_target__strerror(&trace.opts.target, err, bf, sizeof(bf));
1253                 fprintf(trace.output, "%s", bf);
1254                 goto out_close;
1255         }
1256
1257         if (!argc && perf_target__none(&trace.opts.target))
1258                 trace.opts.target.system_wide = true;
1259
1260         if (input_name)
1261                 err = trace__replay(&trace);
1262         else
1263                 err = trace__run(&trace, argc, argv);
1264
1265         if (trace.sched && !err)
1266                 trace__fprintf_thread_summary(&trace, trace.output);
1267
1268 out_close:
1269         if (output_name != NULL)
1270                 fclose(trace.output);
1271 out:
1272         return err;
1273 }