fd485340472790498720470e35ce675da6e0109b
[platform/adaptation/renesas_rcar/renesas_kernel.git] / tools / perf / builtin-trace.c
1 #include <traceevent/event-parse.h>
2 #include "builtin.h"
3 #include "util/color.h"
4 #include "util/debug.h"
5 #include "util/evlist.h"
6 #include "util/machine.h"
7 #include "util/session.h"
8 #include "util/thread.h"
9 #include "util/parse-options.h"
10 #include "util/strlist.h"
11 #include "util/intlist.h"
12 #include "util/thread_map.h"
13
14 #include <libaudit.h>
15 #include <stdlib.h>
16 #include <sys/mman.h>
17 #include <linux/futex.h>
18
19 /* For older distros: */
20 #ifndef MAP_STACK
21 # define MAP_STACK              0x20000
22 #endif
23
24 #ifndef MADV_HWPOISON
25 # define MADV_HWPOISON          100
26 #endif
27
28 #ifndef MADV_MERGEABLE
29 # define MADV_MERGEABLE         12
30 #endif
31
32 #ifndef MADV_UNMERGEABLE
33 # define MADV_UNMERGEABLE       13
34 #endif
35
36 static size_t syscall_arg__scnprintf_hex(char *bf, size_t size,
37                                          unsigned long arg,
38                                          u8 arg_idx __maybe_unused,
39                                          u8 *arg_mask __maybe_unused)
40 {
41         return scnprintf(bf, size, "%#lx", arg);
42 }
43
44 #define SCA_HEX syscall_arg__scnprintf_hex
45
46 static size_t syscall_arg__scnprintf_whence(char *bf, size_t size,
47                                             unsigned long arg,
48                                             u8 arg_idx __maybe_unused,
49                                             u8 *arg_mask __maybe_unused)
50 {
51         int whence = arg;
52
53         switch (whence) {
54 #define P_WHENCE(n) case SEEK_##n: return scnprintf(bf, size, #n)
55         P_WHENCE(SET);
56         P_WHENCE(CUR);
57         P_WHENCE(END);
58 #ifdef SEEK_DATA
59         P_WHENCE(DATA);
60 #endif
61 #ifdef SEEK_HOLE
62         P_WHENCE(HOLE);
63 #endif
64 #undef P_WHENCE
65         default: break;
66         }
67
68         return scnprintf(bf, size, "%#x", whence);
69 }
70
71 #define SCA_WHENCE syscall_arg__scnprintf_whence
72
73 static size_t syscall_arg__scnprintf_mmap_prot(char *bf, size_t size,
74                                                unsigned long arg,
75                                                u8 arg_idx __maybe_unused,
76                                                u8 *arg_mask __maybe_unused)
77 {
78         int printed = 0, prot = arg;
79
80         if (prot == PROT_NONE)
81                 return scnprintf(bf, size, "NONE");
82 #define P_MMAP_PROT(n) \
83         if (prot & PROT_##n) { \
84                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
85                 prot &= ~PROT_##n; \
86         }
87
88         P_MMAP_PROT(EXEC);
89         P_MMAP_PROT(READ);
90         P_MMAP_PROT(WRITE);
91 #ifdef PROT_SEM
92         P_MMAP_PROT(SEM);
93 #endif
94         P_MMAP_PROT(GROWSDOWN);
95         P_MMAP_PROT(GROWSUP);
96 #undef P_MMAP_PROT
97
98         if (prot)
99                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", prot);
100
101         return printed;
102 }
103
104 #define SCA_MMAP_PROT syscall_arg__scnprintf_mmap_prot
105
106 static size_t syscall_arg__scnprintf_mmap_flags(char *bf, size_t size,
107                                                 unsigned long arg, u8 arg_idx __maybe_unused,
108                                                 u8 *arg_mask __maybe_unused)
109 {
110         int printed = 0, flags = arg;
111
112 #define P_MMAP_FLAG(n) \
113         if (flags & MAP_##n) { \
114                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
115                 flags &= ~MAP_##n; \
116         }
117
118         P_MMAP_FLAG(SHARED);
119         P_MMAP_FLAG(PRIVATE);
120 #ifdef MAP_32BIT
121         P_MMAP_FLAG(32BIT);
122 #endif
123         P_MMAP_FLAG(ANONYMOUS);
124         P_MMAP_FLAG(DENYWRITE);
125         P_MMAP_FLAG(EXECUTABLE);
126         P_MMAP_FLAG(FILE);
127         P_MMAP_FLAG(FIXED);
128         P_MMAP_FLAG(GROWSDOWN);
129 #ifdef MAP_HUGETLB
130         P_MMAP_FLAG(HUGETLB);
131 #endif
132         P_MMAP_FLAG(LOCKED);
133         P_MMAP_FLAG(NONBLOCK);
134         P_MMAP_FLAG(NORESERVE);
135         P_MMAP_FLAG(POPULATE);
136         P_MMAP_FLAG(STACK);
137 #ifdef MAP_UNINITIALIZED
138         P_MMAP_FLAG(UNINITIALIZED);
139 #endif
140 #undef P_MMAP_FLAG
141
142         if (flags)
143                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
144
145         return printed;
146 }
147
148 #define SCA_MMAP_FLAGS syscall_arg__scnprintf_mmap_flags
149
150 static size_t syscall_arg__scnprintf_madvise_behavior(char *bf, size_t size,
151                                                       unsigned long arg, u8 arg_idx __maybe_unused,
152                                                       u8 *arg_mask __maybe_unused)
153 {
154         int behavior = arg;
155
156         switch (behavior) {
157 #define P_MADV_BHV(n) case MADV_##n: return scnprintf(bf, size, #n)
158         P_MADV_BHV(NORMAL);
159         P_MADV_BHV(RANDOM);
160         P_MADV_BHV(SEQUENTIAL);
161         P_MADV_BHV(WILLNEED);
162         P_MADV_BHV(DONTNEED);
163         P_MADV_BHV(REMOVE);
164         P_MADV_BHV(DONTFORK);
165         P_MADV_BHV(DOFORK);
166         P_MADV_BHV(HWPOISON);
167 #ifdef MADV_SOFT_OFFLINE
168         P_MADV_BHV(SOFT_OFFLINE);
169 #endif
170         P_MADV_BHV(MERGEABLE);
171         P_MADV_BHV(UNMERGEABLE);
172 #ifdef MADV_HUGEPAGE
173         P_MADV_BHV(HUGEPAGE);
174 #endif
175 #ifdef MADV_NOHUGEPAGE
176         P_MADV_BHV(NOHUGEPAGE);
177 #endif
178 #ifdef MADV_DONTDUMP
179         P_MADV_BHV(DONTDUMP);
180 #endif
181 #ifdef MADV_DODUMP
182         P_MADV_BHV(DODUMP);
183 #endif
184 #undef P_MADV_PHV
185         default: break;
186         }
187
188         return scnprintf(bf, size, "%#x", behavior);
189 }
190
191 #define SCA_MADV_BHV syscall_arg__scnprintf_madvise_behavior
192
193 static size_t syscall_arg__scnprintf_futex_op(char *bf, size_t size, unsigned long arg,
194                                               u8 arg_idx __maybe_unused, u8 *arg_mask)
195 {
196         enum syscall_futex_args {
197                 SCF_UADDR   = (1 << 0),
198                 SCF_OP      = (1 << 1),
199                 SCF_VAL     = (1 << 2),
200                 SCF_TIMEOUT = (1 << 3),
201                 SCF_UADDR2  = (1 << 4),
202                 SCF_VAL3    = (1 << 5),
203         };
204         int op = arg;
205         int cmd = op & FUTEX_CMD_MASK;
206         size_t printed = 0;
207
208         switch (cmd) {
209 #define P_FUTEX_OP(n) case FUTEX_##n: printed = scnprintf(bf, size, #n);
210         P_FUTEX_OP(WAIT);           *arg_mask |= SCF_VAL3|SCF_UADDR2;             break;
211         P_FUTEX_OP(WAKE);           *arg_mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
212         P_FUTEX_OP(FD);             *arg_mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
213         P_FUTEX_OP(REQUEUE);        *arg_mask |= SCF_VAL3|SCF_TIMEOUT;            break;
214         P_FUTEX_OP(CMP_REQUEUE);    *arg_mask |= SCF_TIMEOUT;                     break;
215         P_FUTEX_OP(CMP_REQUEUE_PI); *arg_mask |= SCF_TIMEOUT;                     break;
216         P_FUTEX_OP(WAKE_OP);                                                      break;
217         P_FUTEX_OP(LOCK_PI);        *arg_mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
218         P_FUTEX_OP(UNLOCK_PI);      *arg_mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
219         P_FUTEX_OP(TRYLOCK_PI);     *arg_mask |= SCF_VAL3|SCF_UADDR2;             break;
220         P_FUTEX_OP(WAIT_BITSET);    *arg_mask |= SCF_UADDR2;                      break;
221         P_FUTEX_OP(WAKE_BITSET);    *arg_mask |= SCF_UADDR2;                      break;
222         P_FUTEX_OP(WAIT_REQUEUE_PI);                                              break;
223         default: printed = scnprintf(bf, size, "%#x", cmd);                       break;
224         }
225
226         if (op & FUTEX_PRIVATE_FLAG)
227                 printed += scnprintf(bf + printed, size - printed, "|PRIV");
228
229         if (op & FUTEX_CLOCK_REALTIME)
230                 printed += scnprintf(bf + printed, size - printed, "|CLKRT");
231
232         return printed;
233 }
234
235 #define SCA_FUTEX_OP  syscall_arg__scnprintf_futex_op
236
237 static size_t syscall_arg__scnprintf_open_flags(char *bf, size_t size,
238                                                unsigned long arg,
239                                                u8 arg_idx, u8 *arg_mask)
240 {
241         int printed = 0, flags = arg;
242
243         if (!(flags & O_CREAT))
244                 *arg_mask |= 1 << (arg_idx + 1); /* Mask the mode parm */
245
246         if (flags == 0)
247                 return scnprintf(bf, size, "RDONLY");
248 #define P_FLAG(n) \
249         if (flags & O_##n) { \
250                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
251                 flags &= ~O_##n; \
252         }
253
254         P_FLAG(APPEND);
255         P_FLAG(ASYNC);
256         P_FLAG(CLOEXEC);
257         P_FLAG(CREAT);
258         P_FLAG(DIRECT);
259         P_FLAG(DIRECTORY);
260         P_FLAG(EXCL);
261         P_FLAG(LARGEFILE);
262         P_FLAG(NOATIME);
263         P_FLAG(NOCTTY);
264 #ifdef O_NONBLOCK
265         P_FLAG(NONBLOCK);
266 #elif O_NDELAY
267         P_FLAG(NDELAY);
268 #endif
269 #ifdef O_PATH
270         P_FLAG(PATH);
271 #endif
272         P_FLAG(RDWR);
273 #ifdef O_DSYNC
274         if ((flags & O_SYNC) == O_SYNC)
275                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", "SYNC");
276         else {
277                 P_FLAG(DSYNC);
278         }
279 #else
280         P_FLAG(SYNC);
281 #endif
282         P_FLAG(TRUNC);
283         P_FLAG(WRONLY);
284 #undef P_FLAG
285
286         if (flags)
287                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
288
289         return printed;
290 }
291
292 #define SCA_OPEN_FLAGS syscall_arg__scnprintf_open_flags
293
294 static struct syscall_fmt {
295         const char *name;
296         const char *alias;
297         size_t     (*arg_scnprintf[6])(char *bf, size_t size, unsigned long arg, u8 arg_idx, u8 *arg_mask);
298         bool       errmsg;
299         bool       timeout;
300         bool       hexret;
301 } syscall_fmts[] = {
302         { .name     = "access",     .errmsg = true, },
303         { .name     = "arch_prctl", .errmsg = true, .alias = "prctl", },
304         { .name     = "brk",        .hexret = true,
305           .arg_scnprintf = { [0] = SCA_HEX, /* brk */ }, },
306         { .name     = "mmap",       .hexret = true, },
307         { .name     = "connect",    .errmsg = true, },
308         { .name     = "fstat",      .errmsg = true, .alias = "newfstat", },
309         { .name     = "fstatat",    .errmsg = true, .alias = "newfstatat", },
310         { .name     = "futex",      .errmsg = true,
311           .arg_scnprintf = { [1] = SCA_FUTEX_OP, /* op */ }, },
312         { .name     = "ioctl",      .errmsg = true,
313           .arg_scnprintf = { [2] = SCA_HEX, /* arg */ }, },
314         { .name     = "lseek",      .errmsg = true,
315           .arg_scnprintf = { [2] = SCA_WHENCE, /* whence */ }, },
316         { .name     = "lstat",      .errmsg = true, .alias = "newlstat", },
317         { .name     = "madvise",    .errmsg = true,
318           .arg_scnprintf = { [0] = SCA_HEX,      /* start */
319                              [2] = SCA_MADV_BHV, /* behavior */ }, },
320         { .name     = "mmap",       .hexret = true,
321           .arg_scnprintf = { [0] = SCA_HEX,       /* addr */
322                              [2] = SCA_MMAP_PROT, /* prot */
323                              [3] = SCA_MMAP_FLAGS, /* flags */ }, },
324         { .name     = "mprotect",   .errmsg = true,
325           .arg_scnprintf = { [0] = SCA_HEX, /* start */
326                              [2] = SCA_MMAP_PROT, /* prot */ }, },
327         { .name     = "mremap",     .hexret = true,
328           .arg_scnprintf = { [0] = SCA_HEX, /* addr */
329                              [4] = SCA_HEX, /* new_addr */ }, },
330         { .name     = "munmap",     .errmsg = true,
331           .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
332         { .name     = "open",       .errmsg = true,
333           .arg_scnprintf = { [1] = SCA_OPEN_FLAGS, /* flags */ }, },
334         { .name     = "open_by_handle_at", .errmsg = true,
335           .arg_scnprintf = { [2] = SCA_OPEN_FLAGS, /* flags */ }, },
336         { .name     = "openat",     .errmsg = true,
337           .arg_scnprintf = { [2] = SCA_OPEN_FLAGS, /* flags */ }, },
338         { .name     = "poll",       .errmsg = true, .timeout = true, },
339         { .name     = "ppoll",      .errmsg = true, .timeout = true, },
340         { .name     = "pread",      .errmsg = true, .alias = "pread64", },
341         { .name     = "pwrite",     .errmsg = true, .alias = "pwrite64", },
342         { .name     = "read",       .errmsg = true, },
343         { .name     = "recvfrom",   .errmsg = true, },
344         { .name     = "select",     .errmsg = true, .timeout = true, },
345         { .name     = "socket",     .errmsg = true, },
346         { .name     = "stat",       .errmsg = true, .alias = "newstat", },
347         { .name     = "uname",      .errmsg = true, .alias = "newuname", },
348 };
349
350 static int syscall_fmt__cmp(const void *name, const void *fmtp)
351 {
352         const struct syscall_fmt *fmt = fmtp;
353         return strcmp(name, fmt->name);
354 }
355
356 static struct syscall_fmt *syscall_fmt__find(const char *name)
357 {
358         const int nmemb = ARRAY_SIZE(syscall_fmts);
359         return bsearch(name, syscall_fmts, nmemb, sizeof(struct syscall_fmt), syscall_fmt__cmp);
360 }
361
362 struct syscall {
363         struct event_format *tp_format;
364         const char          *name;
365         bool                filtered;
366         struct syscall_fmt  *fmt;
367         size_t              (**arg_scnprintf)(char *bf, size_t size,
368                                               unsigned long arg, u8 arg_idx, u8 *args_mask);
369 };
370
371 static size_t fprintf_duration(unsigned long t, FILE *fp)
372 {
373         double duration = (double)t / NSEC_PER_MSEC;
374         size_t printed = fprintf(fp, "(");
375
376         if (duration >= 1.0)
377                 printed += color_fprintf(fp, PERF_COLOR_RED, "%6.3f ms", duration);
378         else if (duration >= 0.01)
379                 printed += color_fprintf(fp, PERF_COLOR_YELLOW, "%6.3f ms", duration);
380         else
381                 printed += color_fprintf(fp, PERF_COLOR_NORMAL, "%6.3f ms", duration);
382         return printed + fprintf(fp, "): ");
383 }
384
385 struct thread_trace {
386         u64               entry_time;
387         u64               exit_time;
388         bool              entry_pending;
389         unsigned long     nr_events;
390         char              *entry_str;
391         double            runtime_ms;
392 };
393
394 static struct thread_trace *thread_trace__new(void)
395 {
396         return zalloc(sizeof(struct thread_trace));
397 }
398
399 static struct thread_trace *thread__trace(struct thread *thread, FILE *fp)
400 {
401         struct thread_trace *ttrace;
402
403         if (thread == NULL)
404                 goto fail;
405
406         if (thread->priv == NULL)
407                 thread->priv = thread_trace__new();
408                 
409         if (thread->priv == NULL)
410                 goto fail;
411
412         ttrace = thread->priv;
413         ++ttrace->nr_events;
414
415         return ttrace;
416 fail:
417         color_fprintf(fp, PERF_COLOR_RED,
418                       "WARNING: not enough memory, dropping samples!\n");
419         return NULL;
420 }
421
422 struct trace {
423         struct perf_tool        tool;
424         int                     audit_machine;
425         struct {
426                 int             max;
427                 struct syscall  *table;
428         } syscalls;
429         struct perf_record_opts opts;
430         struct machine          host;
431         u64                     base_time;
432         FILE                    *output;
433         unsigned long           nr_events;
434         struct strlist          *ev_qualifier;
435         bool                    not_ev_qualifier;
436         struct intlist          *tid_list;
437         struct intlist          *pid_list;
438         bool                    sched;
439         bool                    multiple_threads;
440         double                  duration_filter;
441         double                  runtime_ms;
442 };
443
444 static bool trace__filter_duration(struct trace *trace, double t)
445 {
446         return t < (trace->duration_filter * NSEC_PER_MSEC);
447 }
448
449 static size_t trace__fprintf_tstamp(struct trace *trace, u64 tstamp, FILE *fp)
450 {
451         double ts = (double)(tstamp - trace->base_time) / NSEC_PER_MSEC;
452
453         return fprintf(fp, "%10.3f ", ts);
454 }
455
456 static bool done = false;
457
458 static void sig_handler(int sig __maybe_unused)
459 {
460         done = true;
461 }
462
463 static size_t trace__fprintf_entry_head(struct trace *trace, struct thread *thread,
464                                         u64 duration, u64 tstamp, FILE *fp)
465 {
466         size_t printed = trace__fprintf_tstamp(trace, tstamp, fp);
467         printed += fprintf_duration(duration, fp);
468
469         if (trace->multiple_threads)
470                 printed += fprintf(fp, "%d ", thread->tid);
471
472         return printed;
473 }
474
475 static int trace__process_event(struct trace *trace, struct machine *machine,
476                                 union perf_event *event)
477 {
478         int ret = 0;
479
480         switch (event->header.type) {
481         case PERF_RECORD_LOST:
482                 color_fprintf(trace->output, PERF_COLOR_RED,
483                               "LOST %" PRIu64 " events!\n", event->lost.lost);
484                 ret = machine__process_lost_event(machine, event);
485         default:
486                 ret = machine__process_event(machine, event);
487                 break;
488         }
489
490         return ret;
491 }
492
493 static int trace__tool_process(struct perf_tool *tool,
494                                union perf_event *event,
495                                struct perf_sample *sample __maybe_unused,
496                                struct machine *machine)
497 {
498         struct trace *trace = container_of(tool, struct trace, tool);
499         return trace__process_event(trace, machine, event);
500 }
501
502 static int trace__symbols_init(struct trace *trace, struct perf_evlist *evlist)
503 {
504         int err = symbol__init();
505
506         if (err)
507                 return err;
508
509         machine__init(&trace->host, "", HOST_KERNEL_ID);
510         machine__create_kernel_maps(&trace->host);
511
512         if (perf_target__has_task(&trace->opts.target)) {
513                 err = perf_event__synthesize_thread_map(&trace->tool, evlist->threads,
514                                                         trace__tool_process,
515                                                         &trace->host);
516         } else {
517                 err = perf_event__synthesize_threads(&trace->tool, trace__tool_process,
518                                                      &trace->host);
519         }
520
521         if (err)
522                 symbol__exit();
523
524         return err;
525 }
526
527 static int syscall__set_arg_fmts(struct syscall *sc)
528 {
529         struct format_field *field;
530         int idx = 0;
531
532         sc->arg_scnprintf = calloc(sc->tp_format->format.nr_fields - 1, sizeof(void *));
533         if (sc->arg_scnprintf == NULL)
534                 return -1;
535
536         for (field = sc->tp_format->format.fields->next; field; field = field->next) {
537                 if (sc->fmt && sc->fmt->arg_scnprintf[idx])
538                         sc->arg_scnprintf[idx] = sc->fmt->arg_scnprintf[idx];
539                 else if (field->flags & FIELD_IS_POINTER)
540                         sc->arg_scnprintf[idx] = syscall_arg__scnprintf_hex;
541                 ++idx;
542         }
543
544         return 0;
545 }
546
547 static int trace__read_syscall_info(struct trace *trace, int id)
548 {
549         char tp_name[128];
550         struct syscall *sc;
551         const char *name = audit_syscall_to_name(id, trace->audit_machine);
552
553         if (name == NULL)
554                 return -1;
555
556         if (id > trace->syscalls.max) {
557                 struct syscall *nsyscalls = realloc(trace->syscalls.table, (id + 1) * sizeof(*sc));
558
559                 if (nsyscalls == NULL)
560                         return -1;
561
562                 if (trace->syscalls.max != -1) {
563                         memset(nsyscalls + trace->syscalls.max + 1, 0,
564                                (id - trace->syscalls.max) * sizeof(*sc));
565                 } else {
566                         memset(nsyscalls, 0, (id + 1) * sizeof(*sc));
567                 }
568
569                 trace->syscalls.table = nsyscalls;
570                 trace->syscalls.max   = id;
571         }
572
573         sc = trace->syscalls.table + id;
574         sc->name = name;
575
576         if (trace->ev_qualifier) {
577                 bool in = strlist__find(trace->ev_qualifier, name) != NULL;
578
579                 if (!(in ^ trace->not_ev_qualifier)) {
580                         sc->filtered = true;
581                         /*
582                          * No need to do read tracepoint information since this will be
583                          * filtered out.
584                          */
585                         return 0;
586                 }
587         }
588
589         sc->fmt  = syscall_fmt__find(sc->name);
590
591         snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->name);
592         sc->tp_format = event_format__new("syscalls", tp_name);
593
594         if (sc->tp_format == NULL && sc->fmt && sc->fmt->alias) {
595                 snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->fmt->alias);
596                 sc->tp_format = event_format__new("syscalls", tp_name);
597         }
598
599         if (sc->tp_format == NULL)
600                 return -1;
601
602         return syscall__set_arg_fmts(sc);
603 }
604
605 static size_t syscall__scnprintf_args(struct syscall *sc, char *bf, size_t size,
606                                       unsigned long *args)
607 {
608         int i = 0;
609         size_t printed = 0;
610
611         if (sc->tp_format != NULL) {
612                 struct format_field *field;
613                 u8 mask = 0, bit = 1;
614
615                 for (field = sc->tp_format->format.fields->next; field;
616                      field = field->next, ++i, bit <<= 1) {
617                         if (mask & bit)
618                                 continue;
619
620                         printed += scnprintf(bf + printed, size - printed,
621                                              "%s%s: ", printed ? ", " : "", field->name);
622
623                         if (sc->arg_scnprintf && sc->arg_scnprintf[i]) {
624                                 printed += sc->arg_scnprintf[i](bf + printed, size - printed,
625                                                                 args[i], i, &mask);
626                         } else {
627                                 printed += scnprintf(bf + printed, size - printed,
628                                                      "%ld", args[i]);
629                         }
630                 }
631         } else {
632                 while (i < 6) {
633                         printed += scnprintf(bf + printed, size - printed,
634                                              "%sarg%d: %ld",
635                                              printed ? ", " : "", i, args[i]);
636                         ++i;
637                 }
638         }
639
640         return printed;
641 }
642
643 typedef int (*tracepoint_handler)(struct trace *trace, struct perf_evsel *evsel,
644                                   struct perf_sample *sample);
645
646 static struct syscall *trace__syscall_info(struct trace *trace,
647                                            struct perf_evsel *evsel,
648                                            struct perf_sample *sample)
649 {
650         int id = perf_evsel__intval(evsel, sample, "id");
651
652         if (id < 0) {
653
654                 /*
655                  * XXX: Noticed on x86_64, reproduced as far back as 3.0.36, haven't tried
656                  * before that, leaving at a higher verbosity level till that is
657                  * explained. Reproduced with plain ftrace with:
658                  *
659                  * echo 1 > /t/events/raw_syscalls/sys_exit/enable
660                  * grep "NR -1 " /t/trace_pipe
661                  *
662                  * After generating some load on the machine.
663                  */
664                 if (verbose > 1) {
665                         static u64 n;
666                         fprintf(trace->output, "Invalid syscall %d id, skipping (%s, %" PRIu64 ") ...\n",
667                                 id, perf_evsel__name(evsel), ++n);
668                 }
669                 return NULL;
670         }
671
672         if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL) &&
673             trace__read_syscall_info(trace, id))
674                 goto out_cant_read;
675
676         if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL))
677                 goto out_cant_read;
678
679         return &trace->syscalls.table[id];
680
681 out_cant_read:
682         if (verbose) {
683                 fprintf(trace->output, "Problems reading syscall %d", id);
684                 if (id <= trace->syscalls.max && trace->syscalls.table[id].name != NULL)
685                         fprintf(trace->output, "(%s)", trace->syscalls.table[id].name);
686                 fputs(" information\n", trace->output);
687         }
688         return NULL;
689 }
690
691 static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel,
692                             struct perf_sample *sample)
693 {
694         char *msg;
695         void *args;
696         size_t printed = 0;
697         struct thread *thread;
698         struct syscall *sc = trace__syscall_info(trace, evsel, sample);
699         struct thread_trace *ttrace;
700
701         if (sc == NULL)
702                 return -1;
703
704         if (sc->filtered)
705                 return 0;
706
707         thread = machine__findnew_thread(&trace->host, sample->pid,
708                                          sample->tid);
709         ttrace = thread__trace(thread, trace->output);
710         if (ttrace == NULL)
711                 return -1;
712
713         args = perf_evsel__rawptr(evsel, sample, "args");
714         if (args == NULL) {
715                 fprintf(trace->output, "Problems reading syscall arguments\n");
716                 return -1;
717         }
718
719         ttrace = thread->priv;
720
721         if (ttrace->entry_str == NULL) {
722                 ttrace->entry_str = malloc(1024);
723                 if (!ttrace->entry_str)
724                         return -1;
725         }
726
727         ttrace->entry_time = sample->time;
728         msg = ttrace->entry_str;
729         printed += scnprintf(msg + printed, 1024 - printed, "%s(", sc->name);
730
731         printed += syscall__scnprintf_args(sc, msg + printed, 1024 - printed,  args);
732
733         if (!strcmp(sc->name, "exit_group") || !strcmp(sc->name, "exit")) {
734                 if (!trace->duration_filter) {
735                         trace__fprintf_entry_head(trace, thread, 1, sample->time, trace->output);
736                         fprintf(trace->output, "%-70s\n", ttrace->entry_str);
737                 }
738         } else
739                 ttrace->entry_pending = true;
740
741         return 0;
742 }
743
744 static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel,
745                            struct perf_sample *sample)
746 {
747         int ret;
748         u64 duration = 0;
749         struct thread *thread;
750         struct syscall *sc = trace__syscall_info(trace, evsel, sample);
751         struct thread_trace *ttrace;
752
753         if (sc == NULL)
754                 return -1;
755
756         if (sc->filtered)
757                 return 0;
758
759         thread = machine__findnew_thread(&trace->host, sample->pid,
760                                          sample->tid);
761         ttrace = thread__trace(thread, trace->output);
762         if (ttrace == NULL)
763                 return -1;
764
765         ret = perf_evsel__intval(evsel, sample, "ret");
766
767         ttrace = thread->priv;
768
769         ttrace->exit_time = sample->time;
770
771         if (ttrace->entry_time) {
772                 duration = sample->time - ttrace->entry_time;
773                 if (trace__filter_duration(trace, duration))
774                         goto out;
775         } else if (trace->duration_filter)
776                 goto out;
777
778         trace__fprintf_entry_head(trace, thread, duration, sample->time, trace->output);
779
780         if (ttrace->entry_pending) {
781                 fprintf(trace->output, "%-70s", ttrace->entry_str);
782         } else {
783                 fprintf(trace->output, " ... [");
784                 color_fprintf(trace->output, PERF_COLOR_YELLOW, "continued");
785                 fprintf(trace->output, "]: %s()", sc->name);
786         }
787
788         if (sc->fmt == NULL) {
789 signed_print:
790                 fprintf(trace->output, ") = %d", ret);
791         } else if (ret < 0 && sc->fmt->errmsg) {
792                 char bf[256];
793                 const char *emsg = strerror_r(-ret, bf, sizeof(bf)),
794                            *e = audit_errno_to_name(-ret);
795
796                 fprintf(trace->output, ") = -1 %s %s", e, emsg);
797         } else if (ret == 0 && sc->fmt->timeout)
798                 fprintf(trace->output, ") = 0 Timeout");
799         else if (sc->fmt->hexret)
800                 fprintf(trace->output, ") = %#x", ret);
801         else
802                 goto signed_print;
803
804         fputc('\n', trace->output);
805 out:
806         ttrace->entry_pending = false;
807
808         return 0;
809 }
810
811 static int trace__sched_stat_runtime(struct trace *trace, struct perf_evsel *evsel,
812                                      struct perf_sample *sample)
813 {
814         u64 runtime = perf_evsel__intval(evsel, sample, "runtime");
815         double runtime_ms = (double)runtime / NSEC_PER_MSEC;
816         struct thread *thread = machine__findnew_thread(&trace->host,
817                                                         sample->pid,
818                                                         sample->tid);
819         struct thread_trace *ttrace = thread__trace(thread, trace->output);
820
821         if (ttrace == NULL)
822                 goto out_dump;
823
824         ttrace->runtime_ms += runtime_ms;
825         trace->runtime_ms += runtime_ms;
826         return 0;
827
828 out_dump:
829         fprintf(trace->output, "%s: comm=%s,pid=%u,runtime=%" PRIu64 ",vruntime=%" PRIu64 ")\n",
830                evsel->name,
831                perf_evsel__strval(evsel, sample, "comm"),
832                (pid_t)perf_evsel__intval(evsel, sample, "pid"),
833                runtime,
834                perf_evsel__intval(evsel, sample, "vruntime"));
835         return 0;
836 }
837
838 static bool skip_sample(struct trace *trace, struct perf_sample *sample)
839 {
840         if ((trace->pid_list && intlist__find(trace->pid_list, sample->pid)) ||
841             (trace->tid_list && intlist__find(trace->tid_list, sample->tid)))
842                 return false;
843
844         if (trace->pid_list || trace->tid_list)
845                 return true;
846
847         return false;
848 }
849
850 static int trace__process_sample(struct perf_tool *tool,
851                                  union perf_event *event __maybe_unused,
852                                  struct perf_sample *sample,
853                                  struct perf_evsel *evsel,
854                                  struct machine *machine __maybe_unused)
855 {
856         struct trace *trace = container_of(tool, struct trace, tool);
857         int err = 0;
858
859         tracepoint_handler handler = evsel->handler.func;
860
861         if (skip_sample(trace, sample))
862                 return 0;
863
864         if (trace->base_time == 0)
865                 trace->base_time = sample->time;
866
867         if (handler)
868                 handler(trace, evsel, sample);
869
870         return err;
871 }
872
873 static bool
874 perf_session__has_tp(struct perf_session *session, const char *name)
875 {
876         struct perf_evsel *evsel;
877
878         evsel = perf_evlist__find_tracepoint_by_name(session->evlist, name);
879
880         return evsel != NULL;
881 }
882
883 static int parse_target_str(struct trace *trace)
884 {
885         if (trace->opts.target.pid) {
886                 trace->pid_list = intlist__new(trace->opts.target.pid);
887                 if (trace->pid_list == NULL) {
888                         pr_err("Error parsing process id string\n");
889                         return -EINVAL;
890                 }
891         }
892
893         if (trace->opts.target.tid) {
894                 trace->tid_list = intlist__new(trace->opts.target.tid);
895                 if (trace->tid_list == NULL) {
896                         pr_err("Error parsing thread id string\n");
897                         return -EINVAL;
898                 }
899         }
900
901         return 0;
902 }
903
904 static int trace__run(struct trace *trace, int argc, const char **argv)
905 {
906         struct perf_evlist *evlist = perf_evlist__new();
907         struct perf_evsel *evsel;
908         int err = -1, i;
909         unsigned long before;
910         const bool forks = argc > 0;
911
912         if (evlist == NULL) {
913                 fprintf(trace->output, "Not enough memory to run!\n");
914                 goto out;
915         }
916
917         if (perf_evlist__add_newtp(evlist, "raw_syscalls", "sys_enter", trace__sys_enter) ||
918             perf_evlist__add_newtp(evlist, "raw_syscalls", "sys_exit", trace__sys_exit)) {
919                 fprintf(trace->output, "Couldn't read the raw_syscalls tracepoints information!\n");
920                 goto out_delete_evlist;
921         }
922
923         if (trace->sched &&
924             perf_evlist__add_newtp(evlist, "sched", "sched_stat_runtime",
925                                    trace__sched_stat_runtime)) {
926                 fprintf(trace->output, "Couldn't read the sched_stat_runtime tracepoint information!\n");
927                 goto out_delete_evlist;
928         }
929
930         err = perf_evlist__create_maps(evlist, &trace->opts.target);
931         if (err < 0) {
932                 fprintf(trace->output, "Problems parsing the target to trace, check your options!\n");
933                 goto out_delete_evlist;
934         }
935
936         err = trace__symbols_init(trace, evlist);
937         if (err < 0) {
938                 fprintf(trace->output, "Problems initializing symbol libraries!\n");
939                 goto out_delete_maps;
940         }
941
942         perf_evlist__config(evlist, &trace->opts);
943
944         signal(SIGCHLD, sig_handler);
945         signal(SIGINT, sig_handler);
946
947         if (forks) {
948                 err = perf_evlist__prepare_workload(evlist, &trace->opts.target,
949                                                     argv, false, false);
950                 if (err < 0) {
951                         fprintf(trace->output, "Couldn't run the workload!\n");
952                         goto out_delete_maps;
953                 }
954         }
955
956         err = perf_evlist__open(evlist);
957         if (err < 0) {
958                 fprintf(trace->output, "Couldn't create the events: %s\n", strerror(errno));
959                 goto out_delete_maps;
960         }
961
962         err = perf_evlist__mmap(evlist, UINT_MAX, false);
963         if (err < 0) {
964                 fprintf(trace->output, "Couldn't mmap the events: %s\n", strerror(errno));
965                 goto out_close_evlist;
966         }
967
968         perf_evlist__enable(evlist);
969
970         if (forks)
971                 perf_evlist__start_workload(evlist);
972
973         trace->multiple_threads = evlist->threads->map[0] == -1 || evlist->threads->nr > 1;
974 again:
975         before = trace->nr_events;
976
977         for (i = 0; i < evlist->nr_mmaps; i++) {
978                 union perf_event *event;
979
980                 while ((event = perf_evlist__mmap_read(evlist, i)) != NULL) {
981                         const u32 type = event->header.type;
982                         tracepoint_handler handler;
983                         struct perf_sample sample;
984
985                         ++trace->nr_events;
986
987                         err = perf_evlist__parse_sample(evlist, event, &sample);
988                         if (err) {
989                                 fprintf(trace->output, "Can't parse sample, err = %d, skipping...\n", err);
990                                 continue;
991                         }
992
993                         if (trace->base_time == 0)
994                                 trace->base_time = sample.time;
995
996                         if (type != PERF_RECORD_SAMPLE) {
997                                 trace__process_event(trace, &trace->host, event);
998                                 continue;
999                         }
1000
1001                         evsel = perf_evlist__id2evsel(evlist, sample.id);
1002                         if (evsel == NULL) {
1003                                 fprintf(trace->output, "Unknown tp ID %" PRIu64 ", skipping...\n", sample.id);
1004                                 continue;
1005                         }
1006
1007                         if (sample.raw_data == NULL) {
1008                                 fprintf(trace->output, "%s sample with no payload for tid: %d, cpu %d, raw_size=%d, skipping...\n",
1009                                        perf_evsel__name(evsel), sample.tid,
1010                                        sample.cpu, sample.raw_size);
1011                                 continue;
1012                         }
1013
1014                         handler = evsel->handler.func;
1015                         handler(trace, evsel, &sample);
1016
1017                         if (done)
1018                                 goto out_unmap_evlist;
1019                 }
1020         }
1021
1022         if (trace->nr_events == before) {
1023                 if (done)
1024                         goto out_unmap_evlist;
1025
1026                 poll(evlist->pollfd, evlist->nr_fds, -1);
1027         }
1028
1029         if (done)
1030                 perf_evlist__disable(evlist);
1031
1032         goto again;
1033
1034 out_unmap_evlist:
1035         perf_evlist__munmap(evlist);
1036 out_close_evlist:
1037         perf_evlist__close(evlist);
1038 out_delete_maps:
1039         perf_evlist__delete_maps(evlist);
1040 out_delete_evlist:
1041         perf_evlist__delete(evlist);
1042 out:
1043         return err;
1044 }
1045
1046 static int trace__replay(struct trace *trace)
1047 {
1048         const struct perf_evsel_str_handler handlers[] = {
1049                 { "raw_syscalls:sys_enter",  trace__sys_enter, },
1050                 { "raw_syscalls:sys_exit",   trace__sys_exit, },
1051         };
1052
1053         struct perf_session *session;
1054         int err = -1;
1055
1056         trace->tool.sample        = trace__process_sample;
1057         trace->tool.mmap          = perf_event__process_mmap;
1058         trace->tool.comm          = perf_event__process_comm;
1059         trace->tool.exit          = perf_event__process_exit;
1060         trace->tool.fork          = perf_event__process_fork;
1061         trace->tool.attr          = perf_event__process_attr;
1062         trace->tool.tracing_data = perf_event__process_tracing_data;
1063         trace->tool.build_id      = perf_event__process_build_id;
1064
1065         trace->tool.ordered_samples = true;
1066         trace->tool.ordering_requires_timestamps = true;
1067
1068         /* add tid to output */
1069         trace->multiple_threads = true;
1070
1071         if (symbol__init() < 0)
1072                 return -1;
1073
1074         session = perf_session__new(input_name, O_RDONLY, 0, false,
1075                                     &trace->tool);
1076         if (session == NULL)
1077                 return -ENOMEM;
1078
1079         err = perf_session__set_tracepoints_handlers(session, handlers);
1080         if (err)
1081                 goto out;
1082
1083         if (!perf_session__has_tp(session, "raw_syscalls:sys_enter")) {
1084                 pr_err("Data file does not have raw_syscalls:sys_enter events\n");
1085                 goto out;
1086         }
1087
1088         if (!perf_session__has_tp(session, "raw_syscalls:sys_exit")) {
1089                 pr_err("Data file does not have raw_syscalls:sys_exit events\n");
1090                 goto out;
1091         }
1092
1093         err = parse_target_str(trace);
1094         if (err != 0)
1095                 goto out;
1096
1097         setup_pager();
1098
1099         err = perf_session__process_events(session, &trace->tool);
1100         if (err)
1101                 pr_err("Failed to process events, error %d", err);
1102
1103 out:
1104         perf_session__delete(session);
1105
1106         return err;
1107 }
1108
1109 static size_t trace__fprintf_threads_header(FILE *fp)
1110 {
1111         size_t printed;
1112
1113         printed  = fprintf(fp, "\n _____________________________________________________________________\n");
1114         printed += fprintf(fp," __)    Summary of events    (__\n\n");
1115         printed += fprintf(fp,"              [ task - pid ]     [ events ] [ ratio ]  [ runtime ]\n");
1116         printed += fprintf(fp," _____________________________________________________________________\n\n");
1117
1118         return printed;
1119 }
1120
1121 static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp)
1122 {
1123         size_t printed = trace__fprintf_threads_header(fp);
1124         struct rb_node *nd;
1125
1126         for (nd = rb_first(&trace->host.threads); nd; nd = rb_next(nd)) {
1127                 struct thread *thread = rb_entry(nd, struct thread, rb_node);
1128                 struct thread_trace *ttrace = thread->priv;
1129                 const char *color;
1130                 double ratio;
1131
1132                 if (ttrace == NULL)
1133                         continue;
1134
1135                 ratio = (double)ttrace->nr_events / trace->nr_events * 100.0;
1136
1137                 color = PERF_COLOR_NORMAL;
1138                 if (ratio > 50.0)
1139                         color = PERF_COLOR_RED;
1140                 else if (ratio > 25.0)
1141                         color = PERF_COLOR_GREEN;
1142                 else if (ratio > 5.0)
1143                         color = PERF_COLOR_YELLOW;
1144
1145                 printed += color_fprintf(fp, color, "%20s", thread->comm);
1146                 printed += fprintf(fp, " - %-5d :%11lu   [", thread->tid, ttrace->nr_events);
1147                 printed += color_fprintf(fp, color, "%5.1f%%", ratio);
1148                 printed += fprintf(fp, " ] %10.3f ms\n", ttrace->runtime_ms);
1149         }
1150
1151         return printed;
1152 }
1153
1154 static int trace__set_duration(const struct option *opt, const char *str,
1155                                int unset __maybe_unused)
1156 {
1157         struct trace *trace = opt->value;
1158
1159         trace->duration_filter = atof(str);
1160         return 0;
1161 }
1162
1163 static int trace__open_output(struct trace *trace, const char *filename)
1164 {
1165         struct stat st;
1166
1167         if (!stat(filename, &st) && st.st_size) {
1168                 char oldname[PATH_MAX];
1169
1170                 scnprintf(oldname, sizeof(oldname), "%s.old", filename);
1171                 unlink(oldname);
1172                 rename(filename, oldname);
1173         }
1174
1175         trace->output = fopen(filename, "w");
1176
1177         return trace->output == NULL ? -errno : 0;
1178 }
1179
1180 int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused)
1181 {
1182         const char * const trace_usage[] = {
1183                 "perf trace [<options>] [<command>]",
1184                 "perf trace [<options>] -- <command> [<options>]",
1185                 NULL
1186         };
1187         struct trace trace = {
1188                 .audit_machine = audit_detect_machine(),
1189                 .syscalls = {
1190                         . max = -1,
1191                 },
1192                 .opts = {
1193                         .target = {
1194                                 .uid       = UINT_MAX,
1195                                 .uses_mmap = true,
1196                         },
1197                         .user_freq     = UINT_MAX,
1198                         .user_interval = ULLONG_MAX,
1199                         .no_delay      = true,
1200                         .mmap_pages    = 1024,
1201                 },
1202                 .output = stdout,
1203         };
1204         const char *output_name = NULL;
1205         const char *ev_qualifier_str = NULL;
1206         const struct option trace_options[] = {
1207         OPT_STRING('e', "expr", &ev_qualifier_str, "expr",
1208                     "list of events to trace"),
1209         OPT_STRING('o', "output", &output_name, "file", "output file name"),
1210         OPT_STRING('i', "input", &input_name, "file", "Analyze events in file"),
1211         OPT_STRING('p', "pid", &trace.opts.target.pid, "pid",
1212                     "trace events on existing process id"),
1213         OPT_STRING('t', "tid", &trace.opts.target.tid, "tid",
1214                     "trace events on existing thread id"),
1215         OPT_BOOLEAN('a', "all-cpus", &trace.opts.target.system_wide,
1216                     "system-wide collection from all CPUs"),
1217         OPT_STRING('C', "cpu", &trace.opts.target.cpu_list, "cpu",
1218                     "list of cpus to monitor"),
1219         OPT_BOOLEAN(0, "no-inherit", &trace.opts.no_inherit,
1220                     "child tasks do not inherit counters"),
1221         OPT_UINTEGER('m', "mmap-pages", &trace.opts.mmap_pages,
1222                      "number of mmap data pages"),
1223         OPT_STRING('u', "uid", &trace.opts.target.uid_str, "user",
1224                    "user to profile"),
1225         OPT_CALLBACK(0, "duration", &trace, "float",
1226                      "show only events with duration > N.M ms",
1227                      trace__set_duration),
1228         OPT_BOOLEAN(0, "sched", &trace.sched, "show blocking scheduler events"),
1229         OPT_INCR('v', "verbose", &verbose, "be more verbose"),
1230         OPT_END()
1231         };
1232         int err;
1233         char bf[BUFSIZ];
1234
1235         argc = parse_options(argc, argv, trace_options, trace_usage, 0);
1236
1237         if (output_name != NULL) {
1238                 err = trace__open_output(&trace, output_name);
1239                 if (err < 0) {
1240                         perror("failed to create output file");
1241                         goto out;
1242                 }
1243         }
1244
1245         if (ev_qualifier_str != NULL) {
1246                 const char *s = ev_qualifier_str;
1247
1248                 trace.not_ev_qualifier = *s == '!';
1249                 if (trace.not_ev_qualifier)
1250                         ++s;
1251                 trace.ev_qualifier = strlist__new(true, s);
1252                 if (trace.ev_qualifier == NULL) {
1253                         fputs("Not enough memory to parse event qualifier",
1254                               trace.output);
1255                         err = -ENOMEM;
1256                         goto out_close;
1257                 }
1258         }
1259
1260         err = perf_target__validate(&trace.opts.target);
1261         if (err) {
1262                 perf_target__strerror(&trace.opts.target, err, bf, sizeof(bf));
1263                 fprintf(trace.output, "%s", bf);
1264                 goto out_close;
1265         }
1266
1267         err = perf_target__parse_uid(&trace.opts.target);
1268         if (err) {
1269                 perf_target__strerror(&trace.opts.target, err, bf, sizeof(bf));
1270                 fprintf(trace.output, "%s", bf);
1271                 goto out_close;
1272         }
1273
1274         if (!argc && perf_target__none(&trace.opts.target))
1275                 trace.opts.target.system_wide = true;
1276
1277         if (input_name)
1278                 err = trace__replay(&trace);
1279         else
1280                 err = trace__run(&trace, argc, argv);
1281
1282         if (trace.sched && !err)
1283                 trace__fprintf_thread_summary(&trace, trace.output);
1284
1285 out_close:
1286         if (output_name != NULL)
1287                 fclose(trace.output);
1288 out:
1289         return err;
1290 }