perf trace: Beautify socket 'type' arg
[platform/adaptation/renesas_rcar/renesas_kernel.git] / tools / perf / builtin-trace.c
1 #include <traceevent/event-parse.h>
2 #include "builtin.h"
3 #include "util/color.h"
4 #include "util/debug.h"
5 #include "util/evlist.h"
6 #include "util/machine.h"
7 #include "util/session.h"
8 #include "util/thread.h"
9 #include "util/parse-options.h"
10 #include "util/strlist.h"
11 #include "util/intlist.h"
12 #include "util/thread_map.h"
13
14 #include <libaudit.h>
15 #include <stdlib.h>
16 #include <sys/mman.h>
17 #include <linux/futex.h>
18
19 /* For older distros: */
20 #ifndef MAP_STACK
21 # define MAP_STACK              0x20000
22 #endif
23
24 #ifndef MADV_HWPOISON
25 # define MADV_HWPOISON          100
26 #endif
27
28 #ifndef MADV_MERGEABLE
29 # define MADV_MERGEABLE         12
30 #endif
31
32 #ifndef MADV_UNMERGEABLE
33 # define MADV_UNMERGEABLE       13
34 #endif
35
36 struct syscall_arg {
37         unsigned long val;
38         void          *parm;
39         u8            idx;
40         u8            mask;
41 };
42
43 struct strarray {
44         int         nr_entries;
45         const char **entries;
46 };
47
48 #define DEFINE_STRARRAY(array) struct strarray strarray__##array = { \
49         .nr_entries = ARRAY_SIZE(array), \
50         .entries = array, \
51 }
52
53 static size_t syscall_arg__scnprintf_strarray(char *bf, size_t size,
54                                               struct syscall_arg *arg)
55 {
56         int idx = arg->val;
57         struct strarray *sa = arg->parm;
58
59         if (idx < 0 || idx >= sa->nr_entries)
60                 return scnprintf(bf, size, "%d", idx);
61
62         return scnprintf(bf, size, "%s", sa->entries[idx]);
63 }
64
65 #define SCA_STRARRAY syscall_arg__scnprintf_strarray
66
67 static size_t syscall_arg__scnprintf_hex(char *bf, size_t size,
68                                          struct syscall_arg *arg)
69 {
70         return scnprintf(bf, size, "%#lx", arg->val);
71 }
72
73 #define SCA_HEX syscall_arg__scnprintf_hex
74
75 static size_t syscall_arg__scnprintf_mmap_prot(char *bf, size_t size,
76                                                struct syscall_arg *arg)
77 {
78         int printed = 0, prot = arg->val;
79
80         if (prot == PROT_NONE)
81                 return scnprintf(bf, size, "NONE");
82 #define P_MMAP_PROT(n) \
83         if (prot & PROT_##n) { \
84                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
85                 prot &= ~PROT_##n; \
86         }
87
88         P_MMAP_PROT(EXEC);
89         P_MMAP_PROT(READ);
90         P_MMAP_PROT(WRITE);
91 #ifdef PROT_SEM
92         P_MMAP_PROT(SEM);
93 #endif
94         P_MMAP_PROT(GROWSDOWN);
95         P_MMAP_PROT(GROWSUP);
96 #undef P_MMAP_PROT
97
98         if (prot)
99                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", prot);
100
101         return printed;
102 }
103
104 #define SCA_MMAP_PROT syscall_arg__scnprintf_mmap_prot
105
106 static size_t syscall_arg__scnprintf_mmap_flags(char *bf, size_t size,
107                                                 struct syscall_arg *arg)
108 {
109         int printed = 0, flags = arg->val;
110
111 #define P_MMAP_FLAG(n) \
112         if (flags & MAP_##n) { \
113                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
114                 flags &= ~MAP_##n; \
115         }
116
117         P_MMAP_FLAG(SHARED);
118         P_MMAP_FLAG(PRIVATE);
119 #ifdef MAP_32BIT
120         P_MMAP_FLAG(32BIT);
121 #endif
122         P_MMAP_FLAG(ANONYMOUS);
123         P_MMAP_FLAG(DENYWRITE);
124         P_MMAP_FLAG(EXECUTABLE);
125         P_MMAP_FLAG(FILE);
126         P_MMAP_FLAG(FIXED);
127         P_MMAP_FLAG(GROWSDOWN);
128 #ifdef MAP_HUGETLB
129         P_MMAP_FLAG(HUGETLB);
130 #endif
131         P_MMAP_FLAG(LOCKED);
132         P_MMAP_FLAG(NONBLOCK);
133         P_MMAP_FLAG(NORESERVE);
134         P_MMAP_FLAG(POPULATE);
135         P_MMAP_FLAG(STACK);
136 #ifdef MAP_UNINITIALIZED
137         P_MMAP_FLAG(UNINITIALIZED);
138 #endif
139 #undef P_MMAP_FLAG
140
141         if (flags)
142                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
143
144         return printed;
145 }
146
147 #define SCA_MMAP_FLAGS syscall_arg__scnprintf_mmap_flags
148
149 static size_t syscall_arg__scnprintf_madvise_behavior(char *bf, size_t size,
150                                                       struct syscall_arg *arg)
151 {
152         int behavior = arg->val;
153
154         switch (behavior) {
155 #define P_MADV_BHV(n) case MADV_##n: return scnprintf(bf, size, #n)
156         P_MADV_BHV(NORMAL);
157         P_MADV_BHV(RANDOM);
158         P_MADV_BHV(SEQUENTIAL);
159         P_MADV_BHV(WILLNEED);
160         P_MADV_BHV(DONTNEED);
161         P_MADV_BHV(REMOVE);
162         P_MADV_BHV(DONTFORK);
163         P_MADV_BHV(DOFORK);
164         P_MADV_BHV(HWPOISON);
165 #ifdef MADV_SOFT_OFFLINE
166         P_MADV_BHV(SOFT_OFFLINE);
167 #endif
168         P_MADV_BHV(MERGEABLE);
169         P_MADV_BHV(UNMERGEABLE);
170 #ifdef MADV_HUGEPAGE
171         P_MADV_BHV(HUGEPAGE);
172 #endif
173 #ifdef MADV_NOHUGEPAGE
174         P_MADV_BHV(NOHUGEPAGE);
175 #endif
176 #ifdef MADV_DONTDUMP
177         P_MADV_BHV(DONTDUMP);
178 #endif
179 #ifdef MADV_DODUMP
180         P_MADV_BHV(DODUMP);
181 #endif
182 #undef P_MADV_PHV
183         default: break;
184         }
185
186         return scnprintf(bf, size, "%#x", behavior);
187 }
188
189 #define SCA_MADV_BHV syscall_arg__scnprintf_madvise_behavior
190
191 static size_t syscall_arg__scnprintf_futex_op(char *bf, size_t size, struct syscall_arg *arg)
192 {
193         enum syscall_futex_args {
194                 SCF_UADDR   = (1 << 0),
195                 SCF_OP      = (1 << 1),
196                 SCF_VAL     = (1 << 2),
197                 SCF_TIMEOUT = (1 << 3),
198                 SCF_UADDR2  = (1 << 4),
199                 SCF_VAL3    = (1 << 5),
200         };
201         int op = arg->val;
202         int cmd = op & FUTEX_CMD_MASK;
203         size_t printed = 0;
204
205         switch (cmd) {
206 #define P_FUTEX_OP(n) case FUTEX_##n: printed = scnprintf(bf, size, #n);
207         P_FUTEX_OP(WAIT);           arg->mask |= SCF_VAL3|SCF_UADDR2;             break;
208         P_FUTEX_OP(WAKE);           arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
209         P_FUTEX_OP(FD);             arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
210         P_FUTEX_OP(REQUEUE);        arg->mask |= SCF_VAL3|SCF_TIMEOUT;            break;
211         P_FUTEX_OP(CMP_REQUEUE);    arg->mask |= SCF_TIMEOUT;                     break;
212         P_FUTEX_OP(CMP_REQUEUE_PI); arg->mask |= SCF_TIMEOUT;                     break;
213         P_FUTEX_OP(WAKE_OP);                                                      break;
214         P_FUTEX_OP(LOCK_PI);        arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
215         P_FUTEX_OP(UNLOCK_PI);      arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
216         P_FUTEX_OP(TRYLOCK_PI);     arg->mask |= SCF_VAL3|SCF_UADDR2;             break;
217         P_FUTEX_OP(WAIT_BITSET);    arg->mask |= SCF_UADDR2;                      break;
218         P_FUTEX_OP(WAKE_BITSET);    arg->mask |= SCF_UADDR2;                      break;
219         P_FUTEX_OP(WAIT_REQUEUE_PI);                                              break;
220         default: printed = scnprintf(bf, size, "%#x", cmd);                       break;
221         }
222
223         if (op & FUTEX_PRIVATE_FLAG)
224                 printed += scnprintf(bf + printed, size - printed, "|PRIV");
225
226         if (op & FUTEX_CLOCK_REALTIME)
227                 printed += scnprintf(bf + printed, size - printed, "|CLKRT");
228
229         return printed;
230 }
231
232 #define SCA_FUTEX_OP  syscall_arg__scnprintf_futex_op
233
234 static const char *itimers[] = { "REAL", "VIRTUAL", "PROF", };
235 static DEFINE_STRARRAY(itimers);
236
237 static const char *whences[] = { "SET", "CUR", "END",
238 #ifdef SEEK_DATA
239 "DATA",
240 #endif
241 #ifdef SEEK_HOLE
242 "HOLE",
243 #endif
244 };
245 static DEFINE_STRARRAY(whences);
246
247 static const char *fcntl_cmds[] = {
248         "DUPFD", "GETFD", "SETFD", "GETFL", "SETFL", "GETLK", "SETLK",
249         "SETLKW", "SETOWN", "GETOWN", "SETSIG", "GETSIG", "F_GETLK64",
250         "F_SETLK64", "F_SETLKW64", "F_SETOWN_EX", "F_GETOWN_EX",
251         "F_GETOWNER_UIDS",
252 };
253 static DEFINE_STRARRAY(fcntl_cmds);
254
255 static const char *sighow[] = { "BLOCK", "UNBLOCK", "SETMASK", };
256 static DEFINE_STRARRAY(sighow);
257
258 static const char *socket_families[] = {
259         "UNSPEC", "LOCAL", "INET", "AX25", "IPX", "APPLETALK", "NETROM",
260         "BRIDGE", "ATMPVC", "X25", "INET6", "ROSE", "DECnet", "NETBEUI",
261         "SECURITY", "KEY", "NETLINK", "PACKET", "ASH", "ECONET", "ATMSVC",
262         "RDS", "SNA", "IRDA", "PPPOX", "WANPIPE", "LLC", "IB", "CAN", "TIPC",
263         "BLUETOOTH", "IUCV", "RXRPC", "ISDN", "PHONET", "IEEE802154", "CAIF",
264         "ALG", "NFC", "VSOCK",
265 };
266 static DEFINE_STRARRAY(socket_families);
267
268 #ifndef SOCK_TYPE_MASK
269 #define SOCK_TYPE_MASK 0xf
270 #endif
271
272 static size_t syscall_arg__scnprintf_socket_type(char *bf, size_t size,
273                                                       struct syscall_arg *arg)
274 {
275         size_t printed;
276         int type = arg->val,
277             flags = type & ~SOCK_TYPE_MASK;
278
279         type &= SOCK_TYPE_MASK;
280         /*
281          * Can't use a strarray, MIPS may override for ABI reasons.
282          */
283         switch (type) {
284 #define P_SK_TYPE(n) case SOCK_##n: printed = scnprintf(bf, size, #n); break;
285         P_SK_TYPE(STREAM);
286         P_SK_TYPE(DGRAM);
287         P_SK_TYPE(RAW);
288         P_SK_TYPE(RDM);
289         P_SK_TYPE(SEQPACKET);
290         P_SK_TYPE(DCCP);
291         P_SK_TYPE(PACKET);
292 #undef P_SK_TYPE
293         default:
294                 printed = scnprintf(bf, size, "%#x", type);
295         }
296
297 #define P_SK_FLAG(n) \
298         if (flags & SOCK_##n) { \
299                 printed += scnprintf(bf + printed, size - printed, "|%s", #n); \
300                 flags &= ~SOCK_##n; \
301         }
302
303         P_SK_FLAG(CLOEXEC);
304         P_SK_FLAG(NONBLOCK);
305 #undef P_SK_FLAG
306
307         if (flags)
308                 printed += scnprintf(bf + printed, size - printed, "|%#x", flags);
309
310         return printed;
311 }
312
313 #define SCA_SK_TYPE syscall_arg__scnprintf_socket_type
314
315 static size_t syscall_arg__scnprintf_open_flags(char *bf, size_t size,
316                                                struct syscall_arg *arg)
317 {
318         int printed = 0, flags = arg->val;
319
320         if (!(flags & O_CREAT))
321                 arg->mask |= 1 << (arg->idx + 1); /* Mask the mode parm */
322
323         if (flags == 0)
324                 return scnprintf(bf, size, "RDONLY");
325 #define P_FLAG(n) \
326         if (flags & O_##n) { \
327                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
328                 flags &= ~O_##n; \
329         }
330
331         P_FLAG(APPEND);
332         P_FLAG(ASYNC);
333         P_FLAG(CLOEXEC);
334         P_FLAG(CREAT);
335         P_FLAG(DIRECT);
336         P_FLAG(DIRECTORY);
337         P_FLAG(EXCL);
338         P_FLAG(LARGEFILE);
339         P_FLAG(NOATIME);
340         P_FLAG(NOCTTY);
341 #ifdef O_NONBLOCK
342         P_FLAG(NONBLOCK);
343 #elif O_NDELAY
344         P_FLAG(NDELAY);
345 #endif
346 #ifdef O_PATH
347         P_FLAG(PATH);
348 #endif
349         P_FLAG(RDWR);
350 #ifdef O_DSYNC
351         if ((flags & O_SYNC) == O_SYNC)
352                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", "SYNC");
353         else {
354                 P_FLAG(DSYNC);
355         }
356 #else
357         P_FLAG(SYNC);
358 #endif
359         P_FLAG(TRUNC);
360         P_FLAG(WRONLY);
361 #undef P_FLAG
362
363         if (flags)
364                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
365
366         return printed;
367 }
368
369 #define SCA_OPEN_FLAGS syscall_arg__scnprintf_open_flags
370
371 static size_t syscall_arg__scnprintf_signum(char *bf, size_t size, struct syscall_arg *arg)
372 {
373         int sig = arg->val;
374
375         switch (sig) {
376 #define P_SIGNUM(n) case SIG##n: return scnprintf(bf, size, #n)
377         P_SIGNUM(HUP);
378         P_SIGNUM(INT);
379         P_SIGNUM(QUIT);
380         P_SIGNUM(ILL);
381         P_SIGNUM(TRAP);
382         P_SIGNUM(ABRT);
383         P_SIGNUM(BUS);
384         P_SIGNUM(FPE);
385         P_SIGNUM(KILL);
386         P_SIGNUM(USR1);
387         P_SIGNUM(SEGV);
388         P_SIGNUM(USR2);
389         P_SIGNUM(PIPE);
390         P_SIGNUM(ALRM);
391         P_SIGNUM(TERM);
392         P_SIGNUM(STKFLT);
393         P_SIGNUM(CHLD);
394         P_SIGNUM(CONT);
395         P_SIGNUM(STOP);
396         P_SIGNUM(TSTP);
397         P_SIGNUM(TTIN);
398         P_SIGNUM(TTOU);
399         P_SIGNUM(URG);
400         P_SIGNUM(XCPU);
401         P_SIGNUM(XFSZ);
402         P_SIGNUM(VTALRM);
403         P_SIGNUM(PROF);
404         P_SIGNUM(WINCH);
405         P_SIGNUM(IO);
406         P_SIGNUM(PWR);
407         P_SIGNUM(SYS);
408         default: break;
409         }
410
411         return scnprintf(bf, size, "%#x", sig);
412 }
413
414 #define SCA_SIGNUM syscall_arg__scnprintf_signum
415
416 static struct syscall_fmt {
417         const char *name;
418         const char *alias;
419         size_t     (*arg_scnprintf[6])(char *bf, size_t size, struct syscall_arg *arg);
420         void       *arg_parm[6];
421         bool       errmsg;
422         bool       timeout;
423         bool       hexret;
424 } syscall_fmts[] = {
425         { .name     = "access",     .errmsg = true, },
426         { .name     = "arch_prctl", .errmsg = true, .alias = "prctl", },
427         { .name     = "brk",        .hexret = true,
428           .arg_scnprintf = { [0] = SCA_HEX, /* brk */ }, },
429         { .name     = "mmap",       .hexret = true, },
430         { .name     = "connect",    .errmsg = true, },
431         { .name     = "fcntl",      .errmsg = true,
432           .arg_scnprintf = { [1] = SCA_STRARRAY, /* cmd */ },
433           .arg_parm      = { [1] = &strarray__fcntl_cmds, /* cmd */ }, },
434         { .name     = "fstat",      .errmsg = true, .alias = "newfstat", },
435         { .name     = "fstatat",    .errmsg = true, .alias = "newfstatat", },
436         { .name     = "futex",      .errmsg = true,
437           .arg_scnprintf = { [1] = SCA_FUTEX_OP, /* op */ }, },
438         { .name     = "getitimer",  .errmsg = true,
439           .arg_scnprintf = { [0] = SCA_STRARRAY, /* which */ },
440           .arg_parm      = { [0] = &strarray__itimers, /* which */ }, },
441         { .name     = "ioctl",      .errmsg = true,
442           .arg_scnprintf = { [2] = SCA_HEX, /* arg */ }, },
443         { .name     = "kill",       .errmsg = true,
444           .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
445         { .name     = "lseek",      .errmsg = true,
446           .arg_scnprintf = { [2] = SCA_STRARRAY, /* whence */ },
447           .arg_parm      = { [2] = &strarray__whences, /* whence */ }, },
448         { .name     = "lstat",      .errmsg = true, .alias = "newlstat", },
449         { .name     = "madvise",    .errmsg = true,
450           .arg_scnprintf = { [0] = SCA_HEX,      /* start */
451                              [2] = SCA_MADV_BHV, /* behavior */ }, },
452         { .name     = "mmap",       .hexret = true,
453           .arg_scnprintf = { [0] = SCA_HEX,       /* addr */
454                              [2] = SCA_MMAP_PROT, /* prot */
455                              [3] = SCA_MMAP_FLAGS, /* flags */ }, },
456         { .name     = "mprotect",   .errmsg = true,
457           .arg_scnprintf = { [0] = SCA_HEX, /* start */
458                              [2] = SCA_MMAP_PROT, /* prot */ }, },
459         { .name     = "mremap",     .hexret = true,
460           .arg_scnprintf = { [0] = SCA_HEX, /* addr */
461                              [4] = SCA_HEX, /* new_addr */ }, },
462         { .name     = "munmap",     .errmsg = true,
463           .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
464         { .name     = "open",       .errmsg = true,
465           .arg_scnprintf = { [1] = SCA_OPEN_FLAGS, /* flags */ }, },
466         { .name     = "open_by_handle_at", .errmsg = true,
467           .arg_scnprintf = { [2] = SCA_OPEN_FLAGS, /* flags */ }, },
468         { .name     = "openat",     .errmsg = true,
469           .arg_scnprintf = { [2] = SCA_OPEN_FLAGS, /* flags */ }, },
470         { .name     = "poll",       .errmsg = true, .timeout = true, },
471         { .name     = "ppoll",      .errmsg = true, .timeout = true, },
472         { .name     = "pread",      .errmsg = true, .alias = "pread64", },
473         { .name     = "pwrite",     .errmsg = true, .alias = "pwrite64", },
474         { .name     = "read",       .errmsg = true, },
475         { .name     = "recvfrom",   .errmsg = true, },
476         { .name     = "rt_sigaction", .errmsg = true,
477           .arg_scnprintf = { [0] = SCA_SIGNUM, /* sig */ }, },
478         { .name     = "rt_sigprocmask", .errmsg = true,
479           .arg_scnprintf = { [0] = SCA_STRARRAY, /* how */ },
480           .arg_parm      = { [0] = &strarray__sighow, /* how */ }, },
481         { .name     = "rt_sigqueueinfo", .errmsg = true,
482           .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
483         { .name     = "rt_tgsigqueueinfo", .errmsg = true,
484           .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, },
485         { .name     = "select",     .errmsg = true, .timeout = true, },
486         { .name     = "setitimer",  .errmsg = true,
487           .arg_scnprintf = { [0] = SCA_STRARRAY, /* which */ },
488           .arg_parm      = { [0] = &strarray__itimers, /* which */ }, },
489         { .name     = "socket",     .errmsg = true,
490           .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */
491                              [1] = SCA_SK_TYPE, /* type */ },
492           .arg_parm      = { [0] = &strarray__socket_families, /* family */ }, },
493         { .name     = "stat",       .errmsg = true, .alias = "newstat", },
494         { .name     = "tgkill",     .errmsg = true,
495           .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, },
496         { .name     = "tkill",      .errmsg = true,
497           .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
498         { .name     = "uname",      .errmsg = true, .alias = "newuname", },
499 };
500
501 static int syscall_fmt__cmp(const void *name, const void *fmtp)
502 {
503         const struct syscall_fmt *fmt = fmtp;
504         return strcmp(name, fmt->name);
505 }
506
507 static struct syscall_fmt *syscall_fmt__find(const char *name)
508 {
509         const int nmemb = ARRAY_SIZE(syscall_fmts);
510         return bsearch(name, syscall_fmts, nmemb, sizeof(struct syscall_fmt), syscall_fmt__cmp);
511 }
512
513 struct syscall {
514         struct event_format *tp_format;
515         const char          *name;
516         bool                filtered;
517         struct syscall_fmt  *fmt;
518         size_t              (**arg_scnprintf)(char *bf, size_t size, struct syscall_arg *arg);
519         void                **arg_parm;
520 };
521
522 static size_t fprintf_duration(unsigned long t, FILE *fp)
523 {
524         double duration = (double)t / NSEC_PER_MSEC;
525         size_t printed = fprintf(fp, "(");
526
527         if (duration >= 1.0)
528                 printed += color_fprintf(fp, PERF_COLOR_RED, "%6.3f ms", duration);
529         else if (duration >= 0.01)
530                 printed += color_fprintf(fp, PERF_COLOR_YELLOW, "%6.3f ms", duration);
531         else
532                 printed += color_fprintf(fp, PERF_COLOR_NORMAL, "%6.3f ms", duration);
533         return printed + fprintf(fp, "): ");
534 }
535
536 struct thread_trace {
537         u64               entry_time;
538         u64               exit_time;
539         bool              entry_pending;
540         unsigned long     nr_events;
541         char              *entry_str;
542         double            runtime_ms;
543 };
544
545 static struct thread_trace *thread_trace__new(void)
546 {
547         return zalloc(sizeof(struct thread_trace));
548 }
549
550 static struct thread_trace *thread__trace(struct thread *thread, FILE *fp)
551 {
552         struct thread_trace *ttrace;
553
554         if (thread == NULL)
555                 goto fail;
556
557         if (thread->priv == NULL)
558                 thread->priv = thread_trace__new();
559                 
560         if (thread->priv == NULL)
561                 goto fail;
562
563         ttrace = thread->priv;
564         ++ttrace->nr_events;
565
566         return ttrace;
567 fail:
568         color_fprintf(fp, PERF_COLOR_RED,
569                       "WARNING: not enough memory, dropping samples!\n");
570         return NULL;
571 }
572
573 struct trace {
574         struct perf_tool        tool;
575         int                     audit_machine;
576         struct {
577                 int             max;
578                 struct syscall  *table;
579         } syscalls;
580         struct perf_record_opts opts;
581         struct machine          host;
582         u64                     base_time;
583         FILE                    *output;
584         unsigned long           nr_events;
585         struct strlist          *ev_qualifier;
586         bool                    not_ev_qualifier;
587         struct intlist          *tid_list;
588         struct intlist          *pid_list;
589         bool                    sched;
590         bool                    multiple_threads;
591         double                  duration_filter;
592         double                  runtime_ms;
593 };
594
595 static bool trace__filter_duration(struct trace *trace, double t)
596 {
597         return t < (trace->duration_filter * NSEC_PER_MSEC);
598 }
599
600 static size_t trace__fprintf_tstamp(struct trace *trace, u64 tstamp, FILE *fp)
601 {
602         double ts = (double)(tstamp - trace->base_time) / NSEC_PER_MSEC;
603
604         return fprintf(fp, "%10.3f ", ts);
605 }
606
607 static bool done = false;
608
609 static void sig_handler(int sig __maybe_unused)
610 {
611         done = true;
612 }
613
614 static size_t trace__fprintf_entry_head(struct trace *trace, struct thread *thread,
615                                         u64 duration, u64 tstamp, FILE *fp)
616 {
617         size_t printed = trace__fprintf_tstamp(trace, tstamp, fp);
618         printed += fprintf_duration(duration, fp);
619
620         if (trace->multiple_threads)
621                 printed += fprintf(fp, "%d ", thread->tid);
622
623         return printed;
624 }
625
626 static int trace__process_event(struct trace *trace, struct machine *machine,
627                                 union perf_event *event)
628 {
629         int ret = 0;
630
631         switch (event->header.type) {
632         case PERF_RECORD_LOST:
633                 color_fprintf(trace->output, PERF_COLOR_RED,
634                               "LOST %" PRIu64 " events!\n", event->lost.lost);
635                 ret = machine__process_lost_event(machine, event);
636         default:
637                 ret = machine__process_event(machine, event);
638                 break;
639         }
640
641         return ret;
642 }
643
644 static int trace__tool_process(struct perf_tool *tool,
645                                union perf_event *event,
646                                struct perf_sample *sample __maybe_unused,
647                                struct machine *machine)
648 {
649         struct trace *trace = container_of(tool, struct trace, tool);
650         return trace__process_event(trace, machine, event);
651 }
652
653 static int trace__symbols_init(struct trace *trace, struct perf_evlist *evlist)
654 {
655         int err = symbol__init();
656
657         if (err)
658                 return err;
659
660         machine__init(&trace->host, "", HOST_KERNEL_ID);
661         machine__create_kernel_maps(&trace->host);
662
663         if (perf_target__has_task(&trace->opts.target)) {
664                 err = perf_event__synthesize_thread_map(&trace->tool, evlist->threads,
665                                                         trace__tool_process,
666                                                         &trace->host);
667         } else {
668                 err = perf_event__synthesize_threads(&trace->tool, trace__tool_process,
669                                                      &trace->host);
670         }
671
672         if (err)
673                 symbol__exit();
674
675         return err;
676 }
677
678 static int syscall__set_arg_fmts(struct syscall *sc)
679 {
680         struct format_field *field;
681         int idx = 0;
682
683         sc->arg_scnprintf = calloc(sc->tp_format->format.nr_fields - 1, sizeof(void *));
684         if (sc->arg_scnprintf == NULL)
685                 return -1;
686
687         if (sc->fmt)
688                 sc->arg_parm = sc->fmt->arg_parm;
689
690         for (field = sc->tp_format->format.fields->next; field; field = field->next) {
691                 if (sc->fmt && sc->fmt->arg_scnprintf[idx])
692                         sc->arg_scnprintf[idx] = sc->fmt->arg_scnprintf[idx];
693                 else if (field->flags & FIELD_IS_POINTER)
694                         sc->arg_scnprintf[idx] = syscall_arg__scnprintf_hex;
695                 ++idx;
696         }
697
698         return 0;
699 }
700
701 static int trace__read_syscall_info(struct trace *trace, int id)
702 {
703         char tp_name[128];
704         struct syscall *sc;
705         const char *name = audit_syscall_to_name(id, trace->audit_machine);
706
707         if (name == NULL)
708                 return -1;
709
710         if (id > trace->syscalls.max) {
711                 struct syscall *nsyscalls = realloc(trace->syscalls.table, (id + 1) * sizeof(*sc));
712
713                 if (nsyscalls == NULL)
714                         return -1;
715
716                 if (trace->syscalls.max != -1) {
717                         memset(nsyscalls + trace->syscalls.max + 1, 0,
718                                (id - trace->syscalls.max) * sizeof(*sc));
719                 } else {
720                         memset(nsyscalls, 0, (id + 1) * sizeof(*sc));
721                 }
722
723                 trace->syscalls.table = nsyscalls;
724                 trace->syscalls.max   = id;
725         }
726
727         sc = trace->syscalls.table + id;
728         sc->name = name;
729
730         if (trace->ev_qualifier) {
731                 bool in = strlist__find(trace->ev_qualifier, name) != NULL;
732
733                 if (!(in ^ trace->not_ev_qualifier)) {
734                         sc->filtered = true;
735                         /*
736                          * No need to do read tracepoint information since this will be
737                          * filtered out.
738                          */
739                         return 0;
740                 }
741         }
742
743         sc->fmt  = syscall_fmt__find(sc->name);
744
745         snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->name);
746         sc->tp_format = event_format__new("syscalls", tp_name);
747
748         if (sc->tp_format == NULL && sc->fmt && sc->fmt->alias) {
749                 snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->fmt->alias);
750                 sc->tp_format = event_format__new("syscalls", tp_name);
751         }
752
753         if (sc->tp_format == NULL)
754                 return -1;
755
756         return syscall__set_arg_fmts(sc);
757 }
758
759 static size_t syscall__scnprintf_args(struct syscall *sc, char *bf, size_t size,
760                                       unsigned long *args)
761 {
762         size_t printed = 0;
763
764         if (sc->tp_format != NULL) {
765                 struct format_field *field;
766                 u8 bit = 1;
767                 struct syscall_arg arg = {
768                         .idx  = 0,
769                         .mask = 0,
770                 };
771
772                 for (field = sc->tp_format->format.fields->next; field;
773                      field = field->next, ++arg.idx, bit <<= 1) {
774                         if (arg.mask & bit)
775                                 continue;
776
777                         printed += scnprintf(bf + printed, size - printed,
778                                              "%s%s: ", printed ? ", " : "", field->name);
779                         if (sc->arg_scnprintf && sc->arg_scnprintf[arg.idx]) {
780                                 arg.val = args[arg.idx];
781                                 if (sc->arg_parm)
782                                         arg.parm = sc->arg_parm[arg.idx];
783                                 printed += sc->arg_scnprintf[arg.idx](bf + printed,
784                                                                       size - printed, &arg);
785                         } else {
786                                 printed += scnprintf(bf + printed, size - printed,
787                                                      "%ld", args[arg.idx]);
788                         }
789                 }
790         } else {
791                 int i = 0;
792
793                 while (i < 6) {
794                         printed += scnprintf(bf + printed, size - printed,
795                                              "%sarg%d: %ld",
796                                              printed ? ", " : "", i, args[i]);
797                         ++i;
798                 }
799         }
800
801         return printed;
802 }
803
804 typedef int (*tracepoint_handler)(struct trace *trace, struct perf_evsel *evsel,
805                                   struct perf_sample *sample);
806
807 static struct syscall *trace__syscall_info(struct trace *trace,
808                                            struct perf_evsel *evsel,
809                                            struct perf_sample *sample)
810 {
811         int id = perf_evsel__intval(evsel, sample, "id");
812
813         if (id < 0) {
814
815                 /*
816                  * XXX: Noticed on x86_64, reproduced as far back as 3.0.36, haven't tried
817                  * before that, leaving at a higher verbosity level till that is
818                  * explained. Reproduced with plain ftrace with:
819                  *
820                  * echo 1 > /t/events/raw_syscalls/sys_exit/enable
821                  * grep "NR -1 " /t/trace_pipe
822                  *
823                  * After generating some load on the machine.
824                  */
825                 if (verbose > 1) {
826                         static u64 n;
827                         fprintf(trace->output, "Invalid syscall %d id, skipping (%s, %" PRIu64 ") ...\n",
828                                 id, perf_evsel__name(evsel), ++n);
829                 }
830                 return NULL;
831         }
832
833         if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL) &&
834             trace__read_syscall_info(trace, id))
835                 goto out_cant_read;
836
837         if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL))
838                 goto out_cant_read;
839
840         return &trace->syscalls.table[id];
841
842 out_cant_read:
843         if (verbose) {
844                 fprintf(trace->output, "Problems reading syscall %d", id);
845                 if (id <= trace->syscalls.max && trace->syscalls.table[id].name != NULL)
846                         fprintf(trace->output, "(%s)", trace->syscalls.table[id].name);
847                 fputs(" information\n", trace->output);
848         }
849         return NULL;
850 }
851
852 static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel,
853                             struct perf_sample *sample)
854 {
855         char *msg;
856         void *args;
857         size_t printed = 0;
858         struct thread *thread;
859         struct syscall *sc = trace__syscall_info(trace, evsel, sample);
860         struct thread_trace *ttrace;
861
862         if (sc == NULL)
863                 return -1;
864
865         if (sc->filtered)
866                 return 0;
867
868         thread = machine__findnew_thread(&trace->host, sample->pid,
869                                          sample->tid);
870         ttrace = thread__trace(thread, trace->output);
871         if (ttrace == NULL)
872                 return -1;
873
874         args = perf_evsel__rawptr(evsel, sample, "args");
875         if (args == NULL) {
876                 fprintf(trace->output, "Problems reading syscall arguments\n");
877                 return -1;
878         }
879
880         ttrace = thread->priv;
881
882         if (ttrace->entry_str == NULL) {
883                 ttrace->entry_str = malloc(1024);
884                 if (!ttrace->entry_str)
885                         return -1;
886         }
887
888         ttrace->entry_time = sample->time;
889         msg = ttrace->entry_str;
890         printed += scnprintf(msg + printed, 1024 - printed, "%s(", sc->name);
891
892         printed += syscall__scnprintf_args(sc, msg + printed, 1024 - printed,  args);
893
894         if (!strcmp(sc->name, "exit_group") || !strcmp(sc->name, "exit")) {
895                 if (!trace->duration_filter) {
896                         trace__fprintf_entry_head(trace, thread, 1, sample->time, trace->output);
897                         fprintf(trace->output, "%-70s\n", ttrace->entry_str);
898                 }
899         } else
900                 ttrace->entry_pending = true;
901
902         return 0;
903 }
904
905 static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel,
906                            struct perf_sample *sample)
907 {
908         int ret;
909         u64 duration = 0;
910         struct thread *thread;
911         struct syscall *sc = trace__syscall_info(trace, evsel, sample);
912         struct thread_trace *ttrace;
913
914         if (sc == NULL)
915                 return -1;
916
917         if (sc->filtered)
918                 return 0;
919
920         thread = machine__findnew_thread(&trace->host, sample->pid,
921                                          sample->tid);
922         ttrace = thread__trace(thread, trace->output);
923         if (ttrace == NULL)
924                 return -1;
925
926         ret = perf_evsel__intval(evsel, sample, "ret");
927
928         ttrace = thread->priv;
929
930         ttrace->exit_time = sample->time;
931
932         if (ttrace->entry_time) {
933                 duration = sample->time - ttrace->entry_time;
934                 if (trace__filter_duration(trace, duration))
935                         goto out;
936         } else if (trace->duration_filter)
937                 goto out;
938
939         trace__fprintf_entry_head(trace, thread, duration, sample->time, trace->output);
940
941         if (ttrace->entry_pending) {
942                 fprintf(trace->output, "%-70s", ttrace->entry_str);
943         } else {
944                 fprintf(trace->output, " ... [");
945                 color_fprintf(trace->output, PERF_COLOR_YELLOW, "continued");
946                 fprintf(trace->output, "]: %s()", sc->name);
947         }
948
949         if (sc->fmt == NULL) {
950 signed_print:
951                 fprintf(trace->output, ") = %d", ret);
952         } else if (ret < 0 && sc->fmt->errmsg) {
953                 char bf[256];
954                 const char *emsg = strerror_r(-ret, bf, sizeof(bf)),
955                            *e = audit_errno_to_name(-ret);
956
957                 fprintf(trace->output, ") = -1 %s %s", e, emsg);
958         } else if (ret == 0 && sc->fmt->timeout)
959                 fprintf(trace->output, ") = 0 Timeout");
960         else if (sc->fmt->hexret)
961                 fprintf(trace->output, ") = %#x", ret);
962         else
963                 goto signed_print;
964
965         fputc('\n', trace->output);
966 out:
967         ttrace->entry_pending = false;
968
969         return 0;
970 }
971
972 static int trace__sched_stat_runtime(struct trace *trace, struct perf_evsel *evsel,
973                                      struct perf_sample *sample)
974 {
975         u64 runtime = perf_evsel__intval(evsel, sample, "runtime");
976         double runtime_ms = (double)runtime / NSEC_PER_MSEC;
977         struct thread *thread = machine__findnew_thread(&trace->host,
978                                                         sample->pid,
979                                                         sample->tid);
980         struct thread_trace *ttrace = thread__trace(thread, trace->output);
981
982         if (ttrace == NULL)
983                 goto out_dump;
984
985         ttrace->runtime_ms += runtime_ms;
986         trace->runtime_ms += runtime_ms;
987         return 0;
988
989 out_dump:
990         fprintf(trace->output, "%s: comm=%s,pid=%u,runtime=%" PRIu64 ",vruntime=%" PRIu64 ")\n",
991                evsel->name,
992                perf_evsel__strval(evsel, sample, "comm"),
993                (pid_t)perf_evsel__intval(evsel, sample, "pid"),
994                runtime,
995                perf_evsel__intval(evsel, sample, "vruntime"));
996         return 0;
997 }
998
999 static bool skip_sample(struct trace *trace, struct perf_sample *sample)
1000 {
1001         if ((trace->pid_list && intlist__find(trace->pid_list, sample->pid)) ||
1002             (trace->tid_list && intlist__find(trace->tid_list, sample->tid)))
1003                 return false;
1004
1005         if (trace->pid_list || trace->tid_list)
1006                 return true;
1007
1008         return false;
1009 }
1010
1011 static int trace__process_sample(struct perf_tool *tool,
1012                                  union perf_event *event __maybe_unused,
1013                                  struct perf_sample *sample,
1014                                  struct perf_evsel *evsel,
1015                                  struct machine *machine __maybe_unused)
1016 {
1017         struct trace *trace = container_of(tool, struct trace, tool);
1018         int err = 0;
1019
1020         tracepoint_handler handler = evsel->handler.func;
1021
1022         if (skip_sample(trace, sample))
1023                 return 0;
1024
1025         if (trace->base_time == 0)
1026                 trace->base_time = sample->time;
1027
1028         if (handler)
1029                 handler(trace, evsel, sample);
1030
1031         return err;
1032 }
1033
1034 static bool
1035 perf_session__has_tp(struct perf_session *session, const char *name)
1036 {
1037         struct perf_evsel *evsel;
1038
1039         evsel = perf_evlist__find_tracepoint_by_name(session->evlist, name);
1040
1041         return evsel != NULL;
1042 }
1043
1044 static int parse_target_str(struct trace *trace)
1045 {
1046         if (trace->opts.target.pid) {
1047                 trace->pid_list = intlist__new(trace->opts.target.pid);
1048                 if (trace->pid_list == NULL) {
1049                         pr_err("Error parsing process id string\n");
1050                         return -EINVAL;
1051                 }
1052         }
1053
1054         if (trace->opts.target.tid) {
1055                 trace->tid_list = intlist__new(trace->opts.target.tid);
1056                 if (trace->tid_list == NULL) {
1057                         pr_err("Error parsing thread id string\n");
1058                         return -EINVAL;
1059                 }
1060         }
1061
1062         return 0;
1063 }
1064
1065 static int trace__run(struct trace *trace, int argc, const char **argv)
1066 {
1067         struct perf_evlist *evlist = perf_evlist__new();
1068         struct perf_evsel *evsel;
1069         int err = -1, i;
1070         unsigned long before;
1071         const bool forks = argc > 0;
1072
1073         if (evlist == NULL) {
1074                 fprintf(trace->output, "Not enough memory to run!\n");
1075                 goto out;
1076         }
1077
1078         if (perf_evlist__add_newtp(evlist, "raw_syscalls", "sys_enter", trace__sys_enter) ||
1079             perf_evlist__add_newtp(evlist, "raw_syscalls", "sys_exit", trace__sys_exit)) {
1080                 fprintf(trace->output, "Couldn't read the raw_syscalls tracepoints information!\n");
1081                 goto out_delete_evlist;
1082         }
1083
1084         if (trace->sched &&
1085             perf_evlist__add_newtp(evlist, "sched", "sched_stat_runtime",
1086                                    trace__sched_stat_runtime)) {
1087                 fprintf(trace->output, "Couldn't read the sched_stat_runtime tracepoint information!\n");
1088                 goto out_delete_evlist;
1089         }
1090
1091         err = perf_evlist__create_maps(evlist, &trace->opts.target);
1092         if (err < 0) {
1093                 fprintf(trace->output, "Problems parsing the target to trace, check your options!\n");
1094                 goto out_delete_evlist;
1095         }
1096
1097         err = trace__symbols_init(trace, evlist);
1098         if (err < 0) {
1099                 fprintf(trace->output, "Problems initializing symbol libraries!\n");
1100                 goto out_delete_maps;
1101         }
1102
1103         perf_evlist__config(evlist, &trace->opts);
1104
1105         signal(SIGCHLD, sig_handler);
1106         signal(SIGINT, sig_handler);
1107
1108         if (forks) {
1109                 err = perf_evlist__prepare_workload(evlist, &trace->opts.target,
1110                                                     argv, false, false);
1111                 if (err < 0) {
1112                         fprintf(trace->output, "Couldn't run the workload!\n");
1113                         goto out_delete_maps;
1114                 }
1115         }
1116
1117         err = perf_evlist__open(evlist);
1118         if (err < 0) {
1119                 fprintf(trace->output, "Couldn't create the events: %s\n", strerror(errno));
1120                 goto out_delete_maps;
1121         }
1122
1123         err = perf_evlist__mmap(evlist, UINT_MAX, false);
1124         if (err < 0) {
1125                 fprintf(trace->output, "Couldn't mmap the events: %s\n", strerror(errno));
1126                 goto out_close_evlist;
1127         }
1128
1129         perf_evlist__enable(evlist);
1130
1131         if (forks)
1132                 perf_evlist__start_workload(evlist);
1133
1134         trace->multiple_threads = evlist->threads->map[0] == -1 || evlist->threads->nr > 1;
1135 again:
1136         before = trace->nr_events;
1137
1138         for (i = 0; i < evlist->nr_mmaps; i++) {
1139                 union perf_event *event;
1140
1141                 while ((event = perf_evlist__mmap_read(evlist, i)) != NULL) {
1142                         const u32 type = event->header.type;
1143                         tracepoint_handler handler;
1144                         struct perf_sample sample;
1145
1146                         ++trace->nr_events;
1147
1148                         err = perf_evlist__parse_sample(evlist, event, &sample);
1149                         if (err) {
1150                                 fprintf(trace->output, "Can't parse sample, err = %d, skipping...\n", err);
1151                                 continue;
1152                         }
1153
1154                         if (trace->base_time == 0)
1155                                 trace->base_time = sample.time;
1156
1157                         if (type != PERF_RECORD_SAMPLE) {
1158                                 trace__process_event(trace, &trace->host, event);
1159                                 continue;
1160                         }
1161
1162                         evsel = perf_evlist__id2evsel(evlist, sample.id);
1163                         if (evsel == NULL) {
1164                                 fprintf(trace->output, "Unknown tp ID %" PRIu64 ", skipping...\n", sample.id);
1165                                 continue;
1166                         }
1167
1168                         if (sample.raw_data == NULL) {
1169                                 fprintf(trace->output, "%s sample with no payload for tid: %d, cpu %d, raw_size=%d, skipping...\n",
1170                                        perf_evsel__name(evsel), sample.tid,
1171                                        sample.cpu, sample.raw_size);
1172                                 continue;
1173                         }
1174
1175                         handler = evsel->handler.func;
1176                         handler(trace, evsel, &sample);
1177
1178                         if (done)
1179                                 goto out_unmap_evlist;
1180                 }
1181         }
1182
1183         if (trace->nr_events == before) {
1184                 if (done)
1185                         goto out_unmap_evlist;
1186
1187                 poll(evlist->pollfd, evlist->nr_fds, -1);
1188         }
1189
1190         if (done)
1191                 perf_evlist__disable(evlist);
1192
1193         goto again;
1194
1195 out_unmap_evlist:
1196         perf_evlist__munmap(evlist);
1197 out_close_evlist:
1198         perf_evlist__close(evlist);
1199 out_delete_maps:
1200         perf_evlist__delete_maps(evlist);
1201 out_delete_evlist:
1202         perf_evlist__delete(evlist);
1203 out:
1204         return err;
1205 }
1206
1207 static int trace__replay(struct trace *trace)
1208 {
1209         const struct perf_evsel_str_handler handlers[] = {
1210                 { "raw_syscalls:sys_enter",  trace__sys_enter, },
1211                 { "raw_syscalls:sys_exit",   trace__sys_exit, },
1212         };
1213
1214         struct perf_session *session;
1215         int err = -1;
1216
1217         trace->tool.sample        = trace__process_sample;
1218         trace->tool.mmap          = perf_event__process_mmap;
1219         trace->tool.mmap2         = perf_event__process_mmap2;
1220         trace->tool.comm          = perf_event__process_comm;
1221         trace->tool.exit          = perf_event__process_exit;
1222         trace->tool.fork          = perf_event__process_fork;
1223         trace->tool.attr          = perf_event__process_attr;
1224         trace->tool.tracing_data = perf_event__process_tracing_data;
1225         trace->tool.build_id      = perf_event__process_build_id;
1226
1227         trace->tool.ordered_samples = true;
1228         trace->tool.ordering_requires_timestamps = true;
1229
1230         /* add tid to output */
1231         trace->multiple_threads = true;
1232
1233         if (symbol__init() < 0)
1234                 return -1;
1235
1236         session = perf_session__new(input_name, O_RDONLY, 0, false,
1237                                     &trace->tool);
1238         if (session == NULL)
1239                 return -ENOMEM;
1240
1241         err = perf_session__set_tracepoints_handlers(session, handlers);
1242         if (err)
1243                 goto out;
1244
1245         if (!perf_session__has_tp(session, "raw_syscalls:sys_enter")) {
1246                 pr_err("Data file does not have raw_syscalls:sys_enter events\n");
1247                 goto out;
1248         }
1249
1250         if (!perf_session__has_tp(session, "raw_syscalls:sys_exit")) {
1251                 pr_err("Data file does not have raw_syscalls:sys_exit events\n");
1252                 goto out;
1253         }
1254
1255         err = parse_target_str(trace);
1256         if (err != 0)
1257                 goto out;
1258
1259         setup_pager();
1260
1261         err = perf_session__process_events(session, &trace->tool);
1262         if (err)
1263                 pr_err("Failed to process events, error %d", err);
1264
1265 out:
1266         perf_session__delete(session);
1267
1268         return err;
1269 }
1270
1271 static size_t trace__fprintf_threads_header(FILE *fp)
1272 {
1273         size_t printed;
1274
1275         printed  = fprintf(fp, "\n _____________________________________________________________________\n");
1276         printed += fprintf(fp," __)    Summary of events    (__\n\n");
1277         printed += fprintf(fp,"              [ task - pid ]     [ events ] [ ratio ]  [ runtime ]\n");
1278         printed += fprintf(fp," _____________________________________________________________________\n\n");
1279
1280         return printed;
1281 }
1282
1283 static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp)
1284 {
1285         size_t printed = trace__fprintf_threads_header(fp);
1286         struct rb_node *nd;
1287
1288         for (nd = rb_first(&trace->host.threads); nd; nd = rb_next(nd)) {
1289                 struct thread *thread = rb_entry(nd, struct thread, rb_node);
1290                 struct thread_trace *ttrace = thread->priv;
1291                 const char *color;
1292                 double ratio;
1293
1294                 if (ttrace == NULL)
1295                         continue;
1296
1297                 ratio = (double)ttrace->nr_events / trace->nr_events * 100.0;
1298
1299                 color = PERF_COLOR_NORMAL;
1300                 if (ratio > 50.0)
1301                         color = PERF_COLOR_RED;
1302                 else if (ratio > 25.0)
1303                         color = PERF_COLOR_GREEN;
1304                 else if (ratio > 5.0)
1305                         color = PERF_COLOR_YELLOW;
1306
1307                 printed += color_fprintf(fp, color, "%20s", thread->comm);
1308                 printed += fprintf(fp, " - %-5d :%11lu   [", thread->tid, ttrace->nr_events);
1309                 printed += color_fprintf(fp, color, "%5.1f%%", ratio);
1310                 printed += fprintf(fp, " ] %10.3f ms\n", ttrace->runtime_ms);
1311         }
1312
1313         return printed;
1314 }
1315
1316 static int trace__set_duration(const struct option *opt, const char *str,
1317                                int unset __maybe_unused)
1318 {
1319         struct trace *trace = opt->value;
1320
1321         trace->duration_filter = atof(str);
1322         return 0;
1323 }
1324
1325 static int trace__open_output(struct trace *trace, const char *filename)
1326 {
1327         struct stat st;
1328
1329         if (!stat(filename, &st) && st.st_size) {
1330                 char oldname[PATH_MAX];
1331
1332                 scnprintf(oldname, sizeof(oldname), "%s.old", filename);
1333                 unlink(oldname);
1334                 rename(filename, oldname);
1335         }
1336
1337         trace->output = fopen(filename, "w");
1338
1339         return trace->output == NULL ? -errno : 0;
1340 }
1341
1342 int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused)
1343 {
1344         const char * const trace_usage[] = {
1345                 "perf trace [<options>] [<command>]",
1346                 "perf trace [<options>] -- <command> [<options>]",
1347                 NULL
1348         };
1349         struct trace trace = {
1350                 .audit_machine = audit_detect_machine(),
1351                 .syscalls = {
1352                         . max = -1,
1353                 },
1354                 .opts = {
1355                         .target = {
1356                                 .uid       = UINT_MAX,
1357                                 .uses_mmap = true,
1358                         },
1359                         .user_freq     = UINT_MAX,
1360                         .user_interval = ULLONG_MAX,
1361                         .no_delay      = true,
1362                         .mmap_pages    = 1024,
1363                 },
1364                 .output = stdout,
1365         };
1366         const char *output_name = NULL;
1367         const char *ev_qualifier_str = NULL;
1368         const struct option trace_options[] = {
1369         OPT_STRING('e', "expr", &ev_qualifier_str, "expr",
1370                     "list of events to trace"),
1371         OPT_STRING('o', "output", &output_name, "file", "output file name"),
1372         OPT_STRING('i', "input", &input_name, "file", "Analyze events in file"),
1373         OPT_STRING('p', "pid", &trace.opts.target.pid, "pid",
1374                     "trace events on existing process id"),
1375         OPT_STRING('t', "tid", &trace.opts.target.tid, "tid",
1376                     "trace events on existing thread id"),
1377         OPT_BOOLEAN('a', "all-cpus", &trace.opts.target.system_wide,
1378                     "system-wide collection from all CPUs"),
1379         OPT_STRING('C', "cpu", &trace.opts.target.cpu_list, "cpu",
1380                     "list of cpus to monitor"),
1381         OPT_BOOLEAN(0, "no-inherit", &trace.opts.no_inherit,
1382                     "child tasks do not inherit counters"),
1383         OPT_UINTEGER('m', "mmap-pages", &trace.opts.mmap_pages,
1384                      "number of mmap data pages"),
1385         OPT_STRING('u', "uid", &trace.opts.target.uid_str, "user",
1386                    "user to profile"),
1387         OPT_CALLBACK(0, "duration", &trace, "float",
1388                      "show only events with duration > N.M ms",
1389                      trace__set_duration),
1390         OPT_BOOLEAN(0, "sched", &trace.sched, "show blocking scheduler events"),
1391         OPT_INCR('v', "verbose", &verbose, "be more verbose"),
1392         OPT_END()
1393         };
1394         int err;
1395         char bf[BUFSIZ];
1396
1397         argc = parse_options(argc, argv, trace_options, trace_usage, 0);
1398
1399         if (output_name != NULL) {
1400                 err = trace__open_output(&trace, output_name);
1401                 if (err < 0) {
1402                         perror("failed to create output file");
1403                         goto out;
1404                 }
1405         }
1406
1407         if (ev_qualifier_str != NULL) {
1408                 const char *s = ev_qualifier_str;
1409
1410                 trace.not_ev_qualifier = *s == '!';
1411                 if (trace.not_ev_qualifier)
1412                         ++s;
1413                 trace.ev_qualifier = strlist__new(true, s);
1414                 if (trace.ev_qualifier == NULL) {
1415                         fputs("Not enough memory to parse event qualifier",
1416                               trace.output);
1417                         err = -ENOMEM;
1418                         goto out_close;
1419                 }
1420         }
1421
1422         err = perf_target__validate(&trace.opts.target);
1423         if (err) {
1424                 perf_target__strerror(&trace.opts.target, err, bf, sizeof(bf));
1425                 fprintf(trace.output, "%s", bf);
1426                 goto out_close;
1427         }
1428
1429         err = perf_target__parse_uid(&trace.opts.target);
1430         if (err) {
1431                 perf_target__strerror(&trace.opts.target, err, bf, sizeof(bf));
1432                 fprintf(trace.output, "%s", bf);
1433                 goto out_close;
1434         }
1435
1436         if (!argc && perf_target__none(&trace.opts.target))
1437                 trace.opts.target.system_wide = true;
1438
1439         if (input_name)
1440                 err = trace__replay(&trace);
1441         else
1442                 err = trace__run(&trace, argc, argv);
1443
1444         if (trace.sched && !err)
1445                 trace__fprintf_thread_summary(&trace, trace.output);
1446
1447 out_close:
1448         if (output_name != NULL)
1449                 fclose(trace.output);
1450 out:
1451         return err;
1452 }