perf trace: Add option to show process COMM
[platform/adaptation/renesas_rcar/renesas_kernel.git] / tools / perf / builtin-trace.c
1 #include <traceevent/event-parse.h>
2 #include "builtin.h"
3 #include "util/color.h"
4 #include "util/debug.h"
5 #include "util/evlist.h"
6 #include "util/machine.h"
7 #include "util/session.h"
8 #include "util/thread.h"
9 #include "util/parse-options.h"
10 #include "util/strlist.h"
11 #include "util/intlist.h"
12 #include "util/thread_map.h"
13
14 #include <libaudit.h>
15 #include <stdlib.h>
16 #include <sys/eventfd.h>
17 #include <sys/mman.h>
18 #include <linux/futex.h>
19
20 /* For older distros: */
21 #ifndef MAP_STACK
22 # define MAP_STACK              0x20000
23 #endif
24
25 #ifndef MADV_HWPOISON
26 # define MADV_HWPOISON          100
27 #endif
28
29 #ifndef MADV_MERGEABLE
30 # define MADV_MERGEABLE         12
31 #endif
32
33 #ifndef MADV_UNMERGEABLE
34 # define MADV_UNMERGEABLE       13
35 #endif
36
37 struct syscall_arg {
38         unsigned long val;
39         void          *parm;
40         u8            idx;
41         u8            mask;
42 };
43
44 struct strarray {
45         int         nr_entries;
46         const char **entries;
47 };
48
49 #define DEFINE_STRARRAY(array) struct strarray strarray__##array = { \
50         .nr_entries = ARRAY_SIZE(array), \
51         .entries = array, \
52 }
53
54 static size_t syscall_arg__scnprintf_strarray(char *bf, size_t size,
55                                               struct syscall_arg *arg)
56 {
57         int idx = arg->val;
58         struct strarray *sa = arg->parm;
59
60         if (idx < 0 || idx >= sa->nr_entries)
61                 return scnprintf(bf, size, "%d", idx);
62
63         return scnprintf(bf, size, "%s", sa->entries[idx]);
64 }
65
66 #define SCA_STRARRAY syscall_arg__scnprintf_strarray
67
68 static size_t syscall_arg__scnprintf_hex(char *bf, size_t size,
69                                          struct syscall_arg *arg)
70 {
71         return scnprintf(bf, size, "%#lx", arg->val);
72 }
73
74 #define SCA_HEX syscall_arg__scnprintf_hex
75
76 static size_t syscall_arg__scnprintf_mmap_prot(char *bf, size_t size,
77                                                struct syscall_arg *arg)
78 {
79         int printed = 0, prot = arg->val;
80
81         if (prot == PROT_NONE)
82                 return scnprintf(bf, size, "NONE");
83 #define P_MMAP_PROT(n) \
84         if (prot & PROT_##n) { \
85                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
86                 prot &= ~PROT_##n; \
87         }
88
89         P_MMAP_PROT(EXEC);
90         P_MMAP_PROT(READ);
91         P_MMAP_PROT(WRITE);
92 #ifdef PROT_SEM
93         P_MMAP_PROT(SEM);
94 #endif
95         P_MMAP_PROT(GROWSDOWN);
96         P_MMAP_PROT(GROWSUP);
97 #undef P_MMAP_PROT
98
99         if (prot)
100                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", prot);
101
102         return printed;
103 }
104
105 #define SCA_MMAP_PROT syscall_arg__scnprintf_mmap_prot
106
107 static size_t syscall_arg__scnprintf_mmap_flags(char *bf, size_t size,
108                                                 struct syscall_arg *arg)
109 {
110         int printed = 0, flags = arg->val;
111
112 #define P_MMAP_FLAG(n) \
113         if (flags & MAP_##n) { \
114                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
115                 flags &= ~MAP_##n; \
116         }
117
118         P_MMAP_FLAG(SHARED);
119         P_MMAP_FLAG(PRIVATE);
120 #ifdef MAP_32BIT
121         P_MMAP_FLAG(32BIT);
122 #endif
123         P_MMAP_FLAG(ANONYMOUS);
124         P_MMAP_FLAG(DENYWRITE);
125         P_MMAP_FLAG(EXECUTABLE);
126         P_MMAP_FLAG(FILE);
127         P_MMAP_FLAG(FIXED);
128         P_MMAP_FLAG(GROWSDOWN);
129 #ifdef MAP_HUGETLB
130         P_MMAP_FLAG(HUGETLB);
131 #endif
132         P_MMAP_FLAG(LOCKED);
133         P_MMAP_FLAG(NONBLOCK);
134         P_MMAP_FLAG(NORESERVE);
135         P_MMAP_FLAG(POPULATE);
136         P_MMAP_FLAG(STACK);
137 #ifdef MAP_UNINITIALIZED
138         P_MMAP_FLAG(UNINITIALIZED);
139 #endif
140 #undef P_MMAP_FLAG
141
142         if (flags)
143                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
144
145         return printed;
146 }
147
148 #define SCA_MMAP_FLAGS syscall_arg__scnprintf_mmap_flags
149
150 static size_t syscall_arg__scnprintf_madvise_behavior(char *bf, size_t size,
151                                                       struct syscall_arg *arg)
152 {
153         int behavior = arg->val;
154
155         switch (behavior) {
156 #define P_MADV_BHV(n) case MADV_##n: return scnprintf(bf, size, #n)
157         P_MADV_BHV(NORMAL);
158         P_MADV_BHV(RANDOM);
159         P_MADV_BHV(SEQUENTIAL);
160         P_MADV_BHV(WILLNEED);
161         P_MADV_BHV(DONTNEED);
162         P_MADV_BHV(REMOVE);
163         P_MADV_BHV(DONTFORK);
164         P_MADV_BHV(DOFORK);
165         P_MADV_BHV(HWPOISON);
166 #ifdef MADV_SOFT_OFFLINE
167         P_MADV_BHV(SOFT_OFFLINE);
168 #endif
169         P_MADV_BHV(MERGEABLE);
170         P_MADV_BHV(UNMERGEABLE);
171 #ifdef MADV_HUGEPAGE
172         P_MADV_BHV(HUGEPAGE);
173 #endif
174 #ifdef MADV_NOHUGEPAGE
175         P_MADV_BHV(NOHUGEPAGE);
176 #endif
177 #ifdef MADV_DONTDUMP
178         P_MADV_BHV(DONTDUMP);
179 #endif
180 #ifdef MADV_DODUMP
181         P_MADV_BHV(DODUMP);
182 #endif
183 #undef P_MADV_PHV
184         default: break;
185         }
186
187         return scnprintf(bf, size, "%#x", behavior);
188 }
189
190 #define SCA_MADV_BHV syscall_arg__scnprintf_madvise_behavior
191
192 static size_t syscall_arg__scnprintf_futex_op(char *bf, size_t size, struct syscall_arg *arg)
193 {
194         enum syscall_futex_args {
195                 SCF_UADDR   = (1 << 0),
196                 SCF_OP      = (1 << 1),
197                 SCF_VAL     = (1 << 2),
198                 SCF_TIMEOUT = (1 << 3),
199                 SCF_UADDR2  = (1 << 4),
200                 SCF_VAL3    = (1 << 5),
201         };
202         int op = arg->val;
203         int cmd = op & FUTEX_CMD_MASK;
204         size_t printed = 0;
205
206         switch (cmd) {
207 #define P_FUTEX_OP(n) case FUTEX_##n: printed = scnprintf(bf, size, #n);
208         P_FUTEX_OP(WAIT);           arg->mask |= SCF_VAL3|SCF_UADDR2;             break;
209         P_FUTEX_OP(WAKE);           arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
210         P_FUTEX_OP(FD);             arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
211         P_FUTEX_OP(REQUEUE);        arg->mask |= SCF_VAL3|SCF_TIMEOUT;            break;
212         P_FUTEX_OP(CMP_REQUEUE);    arg->mask |= SCF_TIMEOUT;                     break;
213         P_FUTEX_OP(CMP_REQUEUE_PI); arg->mask |= SCF_TIMEOUT;                     break;
214         P_FUTEX_OP(WAKE_OP);                                                      break;
215         P_FUTEX_OP(LOCK_PI);        arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
216         P_FUTEX_OP(UNLOCK_PI);      arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
217         P_FUTEX_OP(TRYLOCK_PI);     arg->mask |= SCF_VAL3|SCF_UADDR2;             break;
218         P_FUTEX_OP(WAIT_BITSET);    arg->mask |= SCF_UADDR2;                      break;
219         P_FUTEX_OP(WAKE_BITSET);    arg->mask |= SCF_UADDR2;                      break;
220         P_FUTEX_OP(WAIT_REQUEUE_PI);                                              break;
221         default: printed = scnprintf(bf, size, "%#x", cmd);                       break;
222         }
223
224         if (op & FUTEX_PRIVATE_FLAG)
225                 printed += scnprintf(bf + printed, size - printed, "|PRIV");
226
227         if (op & FUTEX_CLOCK_REALTIME)
228                 printed += scnprintf(bf + printed, size - printed, "|CLKRT");
229
230         return printed;
231 }
232
233 #define SCA_FUTEX_OP  syscall_arg__scnprintf_futex_op
234
235 static const char *itimers[] = { "REAL", "VIRTUAL", "PROF", };
236 static DEFINE_STRARRAY(itimers);
237
238 static const char *whences[] = { "SET", "CUR", "END",
239 #ifdef SEEK_DATA
240 "DATA",
241 #endif
242 #ifdef SEEK_HOLE
243 "HOLE",
244 #endif
245 };
246 static DEFINE_STRARRAY(whences);
247
248 static const char *fcntl_cmds[] = {
249         "DUPFD", "GETFD", "SETFD", "GETFL", "SETFL", "GETLK", "SETLK",
250         "SETLKW", "SETOWN", "GETOWN", "SETSIG", "GETSIG", "F_GETLK64",
251         "F_SETLK64", "F_SETLKW64", "F_SETOWN_EX", "F_GETOWN_EX",
252         "F_GETOWNER_UIDS",
253 };
254 static DEFINE_STRARRAY(fcntl_cmds);
255
256 static const char *rlimit_resources[] = {
257         "CPU", "FSIZE", "DATA", "STACK", "CORE", "RSS", "NPROC", "NOFILE",
258         "MEMLOCK", "AS", "LOCKS", "SIGPENDING", "MSGQUEUE", "NICE", "RTPRIO",
259         "RTTIME",
260 };
261 static DEFINE_STRARRAY(rlimit_resources);
262
263 static const char *sighow[] = { "BLOCK", "UNBLOCK", "SETMASK", };
264 static DEFINE_STRARRAY(sighow);
265
266 static const char *socket_families[] = {
267         "UNSPEC", "LOCAL", "INET", "AX25", "IPX", "APPLETALK", "NETROM",
268         "BRIDGE", "ATMPVC", "X25", "INET6", "ROSE", "DECnet", "NETBEUI",
269         "SECURITY", "KEY", "NETLINK", "PACKET", "ASH", "ECONET", "ATMSVC",
270         "RDS", "SNA", "IRDA", "PPPOX", "WANPIPE", "LLC", "IB", "CAN", "TIPC",
271         "BLUETOOTH", "IUCV", "RXRPC", "ISDN", "PHONET", "IEEE802154", "CAIF",
272         "ALG", "NFC", "VSOCK",
273 };
274 static DEFINE_STRARRAY(socket_families);
275
276 #ifndef SOCK_TYPE_MASK
277 #define SOCK_TYPE_MASK 0xf
278 #endif
279
280 static size_t syscall_arg__scnprintf_socket_type(char *bf, size_t size,
281                                                       struct syscall_arg *arg)
282 {
283         size_t printed;
284         int type = arg->val,
285             flags = type & ~SOCK_TYPE_MASK;
286
287         type &= SOCK_TYPE_MASK;
288         /*
289          * Can't use a strarray, MIPS may override for ABI reasons.
290          */
291         switch (type) {
292 #define P_SK_TYPE(n) case SOCK_##n: printed = scnprintf(bf, size, #n); break;
293         P_SK_TYPE(STREAM);
294         P_SK_TYPE(DGRAM);
295         P_SK_TYPE(RAW);
296         P_SK_TYPE(RDM);
297         P_SK_TYPE(SEQPACKET);
298         P_SK_TYPE(DCCP);
299         P_SK_TYPE(PACKET);
300 #undef P_SK_TYPE
301         default:
302                 printed = scnprintf(bf, size, "%#x", type);
303         }
304
305 #define P_SK_FLAG(n) \
306         if (flags & SOCK_##n) { \
307                 printed += scnprintf(bf + printed, size - printed, "|%s", #n); \
308                 flags &= ~SOCK_##n; \
309         }
310
311         P_SK_FLAG(CLOEXEC);
312         P_SK_FLAG(NONBLOCK);
313 #undef P_SK_FLAG
314
315         if (flags)
316                 printed += scnprintf(bf + printed, size - printed, "|%#x", flags);
317
318         return printed;
319 }
320
321 #define SCA_SK_TYPE syscall_arg__scnprintf_socket_type
322
323 #ifndef MSG_PROBE
324 #define MSG_PROBE            0x10
325 #endif
326 #ifndef MSG_SENDPAGE_NOTLAST
327 #define MSG_SENDPAGE_NOTLAST 0x20000
328 #endif
329 #ifndef MSG_FASTOPEN
330 #define MSG_FASTOPEN         0x20000000
331 #endif
332
333 static size_t syscall_arg__scnprintf_msg_flags(char *bf, size_t size,
334                                                struct syscall_arg *arg)
335 {
336         int printed = 0, flags = arg->val;
337
338         if (flags == 0)
339                 return scnprintf(bf, size, "NONE");
340 #define P_MSG_FLAG(n) \
341         if (flags & MSG_##n) { \
342                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
343                 flags &= ~MSG_##n; \
344         }
345
346         P_MSG_FLAG(OOB);
347         P_MSG_FLAG(PEEK);
348         P_MSG_FLAG(DONTROUTE);
349         P_MSG_FLAG(TRYHARD);
350         P_MSG_FLAG(CTRUNC);
351         P_MSG_FLAG(PROBE);
352         P_MSG_FLAG(TRUNC);
353         P_MSG_FLAG(DONTWAIT);
354         P_MSG_FLAG(EOR);
355         P_MSG_FLAG(WAITALL);
356         P_MSG_FLAG(FIN);
357         P_MSG_FLAG(SYN);
358         P_MSG_FLAG(CONFIRM);
359         P_MSG_FLAG(RST);
360         P_MSG_FLAG(ERRQUEUE);
361         P_MSG_FLAG(NOSIGNAL);
362         P_MSG_FLAG(MORE);
363         P_MSG_FLAG(WAITFORONE);
364         P_MSG_FLAG(SENDPAGE_NOTLAST);
365         P_MSG_FLAG(FASTOPEN);
366         P_MSG_FLAG(CMSG_CLOEXEC);
367 #undef P_MSG_FLAG
368
369         if (flags)
370                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
371
372         return printed;
373 }
374
375 #define SCA_MSG_FLAGS syscall_arg__scnprintf_msg_flags
376
377 static size_t syscall_arg__scnprintf_access_mode(char *bf, size_t size,
378                                                  struct syscall_arg *arg)
379 {
380         size_t printed = 0;
381         int mode = arg->val;
382
383         if (mode == F_OK) /* 0 */
384                 return scnprintf(bf, size, "F");
385 #define P_MODE(n) \
386         if (mode & n##_OK) { \
387                 printed += scnprintf(bf + printed, size - printed, "%s", #n); \
388                 mode &= ~n##_OK; \
389         }
390
391         P_MODE(R);
392         P_MODE(W);
393         P_MODE(X);
394 #undef P_MODE
395
396         if (mode)
397                 printed += scnprintf(bf + printed, size - printed, "|%#x", mode);
398
399         return printed;
400 }
401
402 #define SCA_ACCMODE syscall_arg__scnprintf_access_mode
403
404 static size_t syscall_arg__scnprintf_open_flags(char *bf, size_t size,
405                                                struct syscall_arg *arg)
406 {
407         int printed = 0, flags = arg->val;
408
409         if (!(flags & O_CREAT))
410                 arg->mask |= 1 << (arg->idx + 1); /* Mask the mode parm */
411
412         if (flags == 0)
413                 return scnprintf(bf, size, "RDONLY");
414 #define P_FLAG(n) \
415         if (flags & O_##n) { \
416                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
417                 flags &= ~O_##n; \
418         }
419
420         P_FLAG(APPEND);
421         P_FLAG(ASYNC);
422         P_FLAG(CLOEXEC);
423         P_FLAG(CREAT);
424         P_FLAG(DIRECT);
425         P_FLAG(DIRECTORY);
426         P_FLAG(EXCL);
427         P_FLAG(LARGEFILE);
428         P_FLAG(NOATIME);
429         P_FLAG(NOCTTY);
430 #ifdef O_NONBLOCK
431         P_FLAG(NONBLOCK);
432 #elif O_NDELAY
433         P_FLAG(NDELAY);
434 #endif
435 #ifdef O_PATH
436         P_FLAG(PATH);
437 #endif
438         P_FLAG(RDWR);
439 #ifdef O_DSYNC
440         if ((flags & O_SYNC) == O_SYNC)
441                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", "SYNC");
442         else {
443                 P_FLAG(DSYNC);
444         }
445 #else
446         P_FLAG(SYNC);
447 #endif
448         P_FLAG(TRUNC);
449         P_FLAG(WRONLY);
450 #undef P_FLAG
451
452         if (flags)
453                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
454
455         return printed;
456 }
457
458 #define SCA_OPEN_FLAGS syscall_arg__scnprintf_open_flags
459
460 static size_t syscall_arg__scnprintf_eventfd_flags(char *bf, size_t size,
461                                                    struct syscall_arg *arg)
462 {
463         int printed = 0, flags = arg->val;
464
465         if (flags == 0)
466                 return scnprintf(bf, size, "NONE");
467 #define P_FLAG(n) \
468         if (flags & EFD_##n) { \
469                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
470                 flags &= ~EFD_##n; \
471         }
472
473         P_FLAG(SEMAPHORE);
474         P_FLAG(CLOEXEC);
475         P_FLAG(NONBLOCK);
476 #undef P_FLAG
477
478         if (flags)
479                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
480
481         return printed;
482 }
483
484 #define SCA_EFD_FLAGS syscall_arg__scnprintf_eventfd_flags
485
486 static size_t syscall_arg__scnprintf_signum(char *bf, size_t size, struct syscall_arg *arg)
487 {
488         int sig = arg->val;
489
490         switch (sig) {
491 #define P_SIGNUM(n) case SIG##n: return scnprintf(bf, size, #n)
492         P_SIGNUM(HUP);
493         P_SIGNUM(INT);
494         P_SIGNUM(QUIT);
495         P_SIGNUM(ILL);
496         P_SIGNUM(TRAP);
497         P_SIGNUM(ABRT);
498         P_SIGNUM(BUS);
499         P_SIGNUM(FPE);
500         P_SIGNUM(KILL);
501         P_SIGNUM(USR1);
502         P_SIGNUM(SEGV);
503         P_SIGNUM(USR2);
504         P_SIGNUM(PIPE);
505         P_SIGNUM(ALRM);
506         P_SIGNUM(TERM);
507         P_SIGNUM(STKFLT);
508         P_SIGNUM(CHLD);
509         P_SIGNUM(CONT);
510         P_SIGNUM(STOP);
511         P_SIGNUM(TSTP);
512         P_SIGNUM(TTIN);
513         P_SIGNUM(TTOU);
514         P_SIGNUM(URG);
515         P_SIGNUM(XCPU);
516         P_SIGNUM(XFSZ);
517         P_SIGNUM(VTALRM);
518         P_SIGNUM(PROF);
519         P_SIGNUM(WINCH);
520         P_SIGNUM(IO);
521         P_SIGNUM(PWR);
522         P_SIGNUM(SYS);
523         default: break;
524         }
525
526         return scnprintf(bf, size, "%#x", sig);
527 }
528
529 #define SCA_SIGNUM syscall_arg__scnprintf_signum
530
531 static struct syscall_fmt {
532         const char *name;
533         const char *alias;
534         size_t     (*arg_scnprintf[6])(char *bf, size_t size, struct syscall_arg *arg);
535         void       *arg_parm[6];
536         bool       errmsg;
537         bool       timeout;
538         bool       hexret;
539 } syscall_fmts[] = {
540         { .name     = "access",     .errmsg = true,
541           .arg_scnprintf = { [1] = SCA_ACCMODE, /* mode */ }, },
542         { .name     = "arch_prctl", .errmsg = true, .alias = "prctl", },
543         { .name     = "brk",        .hexret = true,
544           .arg_scnprintf = { [0] = SCA_HEX, /* brk */ }, },
545         { .name     = "connect",    .errmsg = true, },
546         { .name     = "eventfd2",   .errmsg = true,
547           .arg_scnprintf = { [1] = SCA_EFD_FLAGS, /* flags */ }, },
548         { .name     = "fcntl",      .errmsg = true,
549           .arg_scnprintf = { [1] = SCA_STRARRAY, /* cmd */ },
550           .arg_parm      = { [1] = &strarray__fcntl_cmds, /* cmd */ }, },
551         { .name     = "fstat",      .errmsg = true, .alias = "newfstat", },
552         { .name     = "fstatat",    .errmsg = true, .alias = "newfstatat", },
553         { .name     = "futex",      .errmsg = true,
554           .arg_scnprintf = { [1] = SCA_FUTEX_OP, /* op */ }, },
555         { .name     = "getitimer",  .errmsg = true,
556           .arg_scnprintf = { [0] = SCA_STRARRAY, /* which */ },
557           .arg_parm      = { [0] = &strarray__itimers, /* which */ }, },
558         { .name     = "getrlimit",  .errmsg = true,
559           .arg_scnprintf = { [0] = SCA_STRARRAY, /* resource */ },
560           .arg_parm      = { [0] = &strarray__rlimit_resources, /* resource */ }, },
561         { .name     = "ioctl",      .errmsg = true,
562           .arg_scnprintf = { [2] = SCA_HEX, /* arg */ }, },
563         { .name     = "kill",       .errmsg = true,
564           .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
565         { .name     = "lseek",      .errmsg = true,
566           .arg_scnprintf = { [2] = SCA_STRARRAY, /* whence */ },
567           .arg_parm      = { [2] = &strarray__whences, /* whence */ }, },
568         { .name     = "lstat",      .errmsg = true, .alias = "newlstat", },
569         { .name     = "madvise",    .errmsg = true,
570           .arg_scnprintf = { [0] = SCA_HEX,      /* start */
571                              [2] = SCA_MADV_BHV, /* behavior */ }, },
572         { .name     = "mmap",       .hexret = true,
573           .arg_scnprintf = { [0] = SCA_HEX,       /* addr */
574                              [2] = SCA_MMAP_PROT, /* prot */
575                              [3] = SCA_MMAP_FLAGS, /* flags */ }, },
576         { .name     = "mprotect",   .errmsg = true,
577           .arg_scnprintf = { [0] = SCA_HEX, /* start */
578                              [2] = SCA_MMAP_PROT, /* prot */ }, },
579         { .name     = "mremap",     .hexret = true,
580           .arg_scnprintf = { [0] = SCA_HEX, /* addr */
581                              [4] = SCA_HEX, /* new_addr */ }, },
582         { .name     = "munmap",     .errmsg = true,
583           .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
584         { .name     = "open",       .errmsg = true,
585           .arg_scnprintf = { [1] = SCA_OPEN_FLAGS, /* flags */ }, },
586         { .name     = "open_by_handle_at", .errmsg = true,
587           .arg_scnprintf = { [2] = SCA_OPEN_FLAGS, /* flags */ }, },
588         { .name     = "openat",     .errmsg = true,
589           .arg_scnprintf = { [2] = SCA_OPEN_FLAGS, /* flags */ }, },
590         { .name     = "poll",       .errmsg = true, .timeout = true, },
591         { .name     = "ppoll",      .errmsg = true, .timeout = true, },
592         { .name     = "pread",      .errmsg = true, .alias = "pread64", },
593         { .name     = "prlimit64",  .errmsg = true,
594           .arg_scnprintf = { [1] = SCA_STRARRAY, /* resource */ },
595           .arg_parm      = { [1] = &strarray__rlimit_resources, /* resource */ }, },
596         { .name     = "pwrite",     .errmsg = true, .alias = "pwrite64", },
597         { .name     = "read",       .errmsg = true, },
598         { .name     = "recvfrom",   .errmsg = true,
599           .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
600         { .name     = "recvmmsg",   .errmsg = true,
601           .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
602         { .name     = "recvmsg",    .errmsg = true,
603           .arg_scnprintf = { [2] = SCA_MSG_FLAGS, /* flags */ }, },
604         { .name     = "rt_sigaction", .errmsg = true,
605           .arg_scnprintf = { [0] = SCA_SIGNUM, /* sig */ }, },
606         { .name     = "rt_sigprocmask", .errmsg = true,
607           .arg_scnprintf = { [0] = SCA_STRARRAY, /* how */ },
608           .arg_parm      = { [0] = &strarray__sighow, /* how */ }, },
609         { .name     = "rt_sigqueueinfo", .errmsg = true,
610           .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
611         { .name     = "rt_tgsigqueueinfo", .errmsg = true,
612           .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, },
613         { .name     = "select",     .errmsg = true, .timeout = true, },
614         { .name     = "sendmmsg",    .errmsg = true,
615           .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
616         { .name     = "sendmsg",    .errmsg = true,
617           .arg_scnprintf = { [2] = SCA_MSG_FLAGS, /* flags */ }, },
618         { .name     = "sendto",     .errmsg = true,
619           .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
620         { .name     = "setitimer",  .errmsg = true,
621           .arg_scnprintf = { [0] = SCA_STRARRAY, /* which */ },
622           .arg_parm      = { [0] = &strarray__itimers, /* which */ }, },
623         { .name     = "setrlimit",  .errmsg = true,
624           .arg_scnprintf = { [0] = SCA_STRARRAY, /* resource */ },
625           .arg_parm      = { [0] = &strarray__rlimit_resources, /* resource */ }, },
626         { .name     = "socket",     .errmsg = true,
627           .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */
628                              [1] = SCA_SK_TYPE, /* type */ },
629           .arg_parm      = { [0] = &strarray__socket_families, /* family */ }, },
630         { .name     = "stat",       .errmsg = true, .alias = "newstat", },
631         { .name     = "tgkill",     .errmsg = true,
632           .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, },
633         { .name     = "tkill",      .errmsg = true,
634           .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
635         { .name     = "uname",      .errmsg = true, .alias = "newuname", },
636 };
637
638 static int syscall_fmt__cmp(const void *name, const void *fmtp)
639 {
640         const struct syscall_fmt *fmt = fmtp;
641         return strcmp(name, fmt->name);
642 }
643
644 static struct syscall_fmt *syscall_fmt__find(const char *name)
645 {
646         const int nmemb = ARRAY_SIZE(syscall_fmts);
647         return bsearch(name, syscall_fmts, nmemb, sizeof(struct syscall_fmt), syscall_fmt__cmp);
648 }
649
650 struct syscall {
651         struct event_format *tp_format;
652         const char          *name;
653         bool                filtered;
654         struct syscall_fmt  *fmt;
655         size_t              (**arg_scnprintf)(char *bf, size_t size, struct syscall_arg *arg);
656         void                **arg_parm;
657 };
658
659 static size_t fprintf_duration(unsigned long t, FILE *fp)
660 {
661         double duration = (double)t / NSEC_PER_MSEC;
662         size_t printed = fprintf(fp, "(");
663
664         if (duration >= 1.0)
665                 printed += color_fprintf(fp, PERF_COLOR_RED, "%6.3f ms", duration);
666         else if (duration >= 0.01)
667                 printed += color_fprintf(fp, PERF_COLOR_YELLOW, "%6.3f ms", duration);
668         else
669                 printed += color_fprintf(fp, PERF_COLOR_NORMAL, "%6.3f ms", duration);
670         return printed + fprintf(fp, "): ");
671 }
672
673 struct thread_trace {
674         u64               entry_time;
675         u64               exit_time;
676         bool              entry_pending;
677         unsigned long     nr_events;
678         char              *entry_str;
679         double            runtime_ms;
680 };
681
682 static struct thread_trace *thread_trace__new(void)
683 {
684         return zalloc(sizeof(struct thread_trace));
685 }
686
687 static struct thread_trace *thread__trace(struct thread *thread, FILE *fp)
688 {
689         struct thread_trace *ttrace;
690
691         if (thread == NULL)
692                 goto fail;
693
694         if (thread->priv == NULL)
695                 thread->priv = thread_trace__new();
696                 
697         if (thread->priv == NULL)
698                 goto fail;
699
700         ttrace = thread->priv;
701         ++ttrace->nr_events;
702
703         return ttrace;
704 fail:
705         color_fprintf(fp, PERF_COLOR_RED,
706                       "WARNING: not enough memory, dropping samples!\n");
707         return NULL;
708 }
709
710 struct trace {
711         struct perf_tool        tool;
712         int                     audit_machine;
713         struct {
714                 int             max;
715                 struct syscall  *table;
716         } syscalls;
717         struct perf_record_opts opts;
718         struct machine          host;
719         u64                     base_time;
720         bool                    full_time;
721         FILE                    *output;
722         unsigned long           nr_events;
723         struct strlist          *ev_qualifier;
724         bool                    not_ev_qualifier;
725         struct intlist          *tid_list;
726         struct intlist          *pid_list;
727         bool                    sched;
728         bool                    multiple_threads;
729         bool                    show_comm;
730         double                  duration_filter;
731         double                  runtime_ms;
732 };
733
734 static bool trace__filter_duration(struct trace *trace, double t)
735 {
736         return t < (trace->duration_filter * NSEC_PER_MSEC);
737 }
738
739 static size_t trace__fprintf_tstamp(struct trace *trace, u64 tstamp, FILE *fp)
740 {
741         double ts = (double)(tstamp - trace->base_time) / NSEC_PER_MSEC;
742
743         return fprintf(fp, "%10.3f ", ts);
744 }
745
746 static bool done = false;
747
748 static void sig_handler(int sig __maybe_unused)
749 {
750         done = true;
751 }
752
753 static size_t trace__fprintf_entry_head(struct trace *trace, struct thread *thread,
754                                         u64 duration, u64 tstamp, FILE *fp)
755 {
756         size_t printed = trace__fprintf_tstamp(trace, tstamp, fp);
757         printed += fprintf_duration(duration, fp);
758
759         if (trace->multiple_threads) {
760                 if (trace->show_comm)
761                         printed += fprintf(fp, "%.14s/", thread->comm);
762                 printed += fprintf(fp, "%d ", thread->tid);
763         }
764
765         return printed;
766 }
767
768 static int trace__process_event(struct trace *trace, struct machine *machine,
769                                 union perf_event *event)
770 {
771         int ret = 0;
772
773         switch (event->header.type) {
774         case PERF_RECORD_LOST:
775                 color_fprintf(trace->output, PERF_COLOR_RED,
776                               "LOST %" PRIu64 " events!\n", event->lost.lost);
777                 ret = machine__process_lost_event(machine, event);
778         default:
779                 ret = machine__process_event(machine, event);
780                 break;
781         }
782
783         return ret;
784 }
785
786 static int trace__tool_process(struct perf_tool *tool,
787                                union perf_event *event,
788                                struct perf_sample *sample __maybe_unused,
789                                struct machine *machine)
790 {
791         struct trace *trace = container_of(tool, struct trace, tool);
792         return trace__process_event(trace, machine, event);
793 }
794
795 static int trace__symbols_init(struct trace *trace, struct perf_evlist *evlist)
796 {
797         int err = symbol__init();
798
799         if (err)
800                 return err;
801
802         machine__init(&trace->host, "", HOST_KERNEL_ID);
803         machine__create_kernel_maps(&trace->host);
804
805         if (perf_target__has_task(&trace->opts.target)) {
806                 err = perf_event__synthesize_thread_map(&trace->tool, evlist->threads,
807                                                         trace__tool_process,
808                                                         &trace->host);
809         } else {
810                 err = perf_event__synthesize_threads(&trace->tool, trace__tool_process,
811                                                      &trace->host);
812         }
813
814         if (err)
815                 symbol__exit();
816
817         return err;
818 }
819
820 static int syscall__set_arg_fmts(struct syscall *sc)
821 {
822         struct format_field *field;
823         int idx = 0;
824
825         sc->arg_scnprintf = calloc(sc->tp_format->format.nr_fields - 1, sizeof(void *));
826         if (sc->arg_scnprintf == NULL)
827                 return -1;
828
829         if (sc->fmt)
830                 sc->arg_parm = sc->fmt->arg_parm;
831
832         for (field = sc->tp_format->format.fields->next; field; field = field->next) {
833                 if (sc->fmt && sc->fmt->arg_scnprintf[idx])
834                         sc->arg_scnprintf[idx] = sc->fmt->arg_scnprintf[idx];
835                 else if (field->flags & FIELD_IS_POINTER)
836                         sc->arg_scnprintf[idx] = syscall_arg__scnprintf_hex;
837                 ++idx;
838         }
839
840         return 0;
841 }
842
843 static int trace__read_syscall_info(struct trace *trace, int id)
844 {
845         char tp_name[128];
846         struct syscall *sc;
847         const char *name = audit_syscall_to_name(id, trace->audit_machine);
848
849         if (name == NULL)
850                 return -1;
851
852         if (id > trace->syscalls.max) {
853                 struct syscall *nsyscalls = realloc(trace->syscalls.table, (id + 1) * sizeof(*sc));
854
855                 if (nsyscalls == NULL)
856                         return -1;
857
858                 if (trace->syscalls.max != -1) {
859                         memset(nsyscalls + trace->syscalls.max + 1, 0,
860                                (id - trace->syscalls.max) * sizeof(*sc));
861                 } else {
862                         memset(nsyscalls, 0, (id + 1) * sizeof(*sc));
863                 }
864
865                 trace->syscalls.table = nsyscalls;
866                 trace->syscalls.max   = id;
867         }
868
869         sc = trace->syscalls.table + id;
870         sc->name = name;
871
872         if (trace->ev_qualifier) {
873                 bool in = strlist__find(trace->ev_qualifier, name) != NULL;
874
875                 if (!(in ^ trace->not_ev_qualifier)) {
876                         sc->filtered = true;
877                         /*
878                          * No need to do read tracepoint information since this will be
879                          * filtered out.
880                          */
881                         return 0;
882                 }
883         }
884
885         sc->fmt  = syscall_fmt__find(sc->name);
886
887         snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->name);
888         sc->tp_format = event_format__new("syscalls", tp_name);
889
890         if (sc->tp_format == NULL && sc->fmt && sc->fmt->alias) {
891                 snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->fmt->alias);
892                 sc->tp_format = event_format__new("syscalls", tp_name);
893         }
894
895         if (sc->tp_format == NULL)
896                 return -1;
897
898         return syscall__set_arg_fmts(sc);
899 }
900
901 static size_t syscall__scnprintf_args(struct syscall *sc, char *bf, size_t size,
902                                       unsigned long *args)
903 {
904         size_t printed = 0;
905
906         if (sc->tp_format != NULL) {
907                 struct format_field *field;
908                 u8 bit = 1;
909                 struct syscall_arg arg = {
910                         .idx  = 0,
911                         .mask = 0,
912                 };
913
914                 for (field = sc->tp_format->format.fields->next; field;
915                      field = field->next, ++arg.idx, bit <<= 1) {
916                         if (arg.mask & bit)
917                                 continue;
918
919                         if (args[arg.idx] == 0)
920                                 continue;
921
922                         printed += scnprintf(bf + printed, size - printed,
923                                              "%s%s: ", printed ? ", " : "", field->name);
924                         if (sc->arg_scnprintf && sc->arg_scnprintf[arg.idx]) {
925                                 arg.val = args[arg.idx];
926                                 if (sc->arg_parm)
927                                         arg.parm = sc->arg_parm[arg.idx];
928                                 printed += sc->arg_scnprintf[arg.idx](bf + printed,
929                                                                       size - printed, &arg);
930                         } else {
931                                 printed += scnprintf(bf + printed, size - printed,
932                                                      "%ld", args[arg.idx]);
933                         }
934                 }
935         } else {
936                 int i = 0;
937
938                 while (i < 6) {
939                         printed += scnprintf(bf + printed, size - printed,
940                                              "%sarg%d: %ld",
941                                              printed ? ", " : "", i, args[i]);
942                         ++i;
943                 }
944         }
945
946         return printed;
947 }
948
949 typedef int (*tracepoint_handler)(struct trace *trace, struct perf_evsel *evsel,
950                                   struct perf_sample *sample);
951
952 static struct syscall *trace__syscall_info(struct trace *trace,
953                                            struct perf_evsel *evsel,
954                                            struct perf_sample *sample)
955 {
956         int id = perf_evsel__intval(evsel, sample, "id");
957
958         if (id < 0) {
959
960                 /*
961                  * XXX: Noticed on x86_64, reproduced as far back as 3.0.36, haven't tried
962                  * before that, leaving at a higher verbosity level till that is
963                  * explained. Reproduced with plain ftrace with:
964                  *
965                  * echo 1 > /t/events/raw_syscalls/sys_exit/enable
966                  * grep "NR -1 " /t/trace_pipe
967                  *
968                  * After generating some load on the machine.
969                  */
970                 if (verbose > 1) {
971                         static u64 n;
972                         fprintf(trace->output, "Invalid syscall %d id, skipping (%s, %" PRIu64 ") ...\n",
973                                 id, perf_evsel__name(evsel), ++n);
974                 }
975                 return NULL;
976         }
977
978         if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL) &&
979             trace__read_syscall_info(trace, id))
980                 goto out_cant_read;
981
982         if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL))
983                 goto out_cant_read;
984
985         return &trace->syscalls.table[id];
986
987 out_cant_read:
988         if (verbose) {
989                 fprintf(trace->output, "Problems reading syscall %d", id);
990                 if (id <= trace->syscalls.max && trace->syscalls.table[id].name != NULL)
991                         fprintf(trace->output, "(%s)", trace->syscalls.table[id].name);
992                 fputs(" information\n", trace->output);
993         }
994         return NULL;
995 }
996
997 static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel,
998                             struct perf_sample *sample)
999 {
1000         char *msg;
1001         void *args;
1002         size_t printed = 0;
1003         struct thread *thread;
1004         struct syscall *sc = trace__syscall_info(trace, evsel, sample);
1005         struct thread_trace *ttrace;
1006
1007         if (sc == NULL)
1008                 return -1;
1009
1010         if (sc->filtered)
1011                 return 0;
1012
1013         thread = machine__findnew_thread(&trace->host, sample->pid,
1014                                          sample->tid);
1015         ttrace = thread__trace(thread, trace->output);
1016         if (ttrace == NULL)
1017                 return -1;
1018
1019         args = perf_evsel__rawptr(evsel, sample, "args");
1020         if (args == NULL) {
1021                 fprintf(trace->output, "Problems reading syscall arguments\n");
1022                 return -1;
1023         }
1024
1025         ttrace = thread->priv;
1026
1027         if (ttrace->entry_str == NULL) {
1028                 ttrace->entry_str = malloc(1024);
1029                 if (!ttrace->entry_str)
1030                         return -1;
1031         }
1032
1033         ttrace->entry_time = sample->time;
1034         msg = ttrace->entry_str;
1035         printed += scnprintf(msg + printed, 1024 - printed, "%s(", sc->name);
1036
1037         printed += syscall__scnprintf_args(sc, msg + printed, 1024 - printed,  args);
1038
1039         if (!strcmp(sc->name, "exit_group") || !strcmp(sc->name, "exit")) {
1040                 if (!trace->duration_filter) {
1041                         trace__fprintf_entry_head(trace, thread, 1, sample->time, trace->output);
1042                         fprintf(trace->output, "%-70s\n", ttrace->entry_str);
1043                 }
1044         } else
1045                 ttrace->entry_pending = true;
1046
1047         return 0;
1048 }
1049
1050 static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel,
1051                            struct perf_sample *sample)
1052 {
1053         int ret;
1054         u64 duration = 0;
1055         struct thread *thread;
1056         struct syscall *sc = trace__syscall_info(trace, evsel, sample);
1057         struct thread_trace *ttrace;
1058
1059         if (sc == NULL)
1060                 return -1;
1061
1062         if (sc->filtered)
1063                 return 0;
1064
1065         thread = machine__findnew_thread(&trace->host, sample->pid,
1066                                          sample->tid);
1067         ttrace = thread__trace(thread, trace->output);
1068         if (ttrace == NULL)
1069                 return -1;
1070
1071         ret = perf_evsel__intval(evsel, sample, "ret");
1072
1073         ttrace = thread->priv;
1074
1075         ttrace->exit_time = sample->time;
1076
1077         if (ttrace->entry_time) {
1078                 duration = sample->time - ttrace->entry_time;
1079                 if (trace__filter_duration(trace, duration))
1080                         goto out;
1081         } else if (trace->duration_filter)
1082                 goto out;
1083
1084         trace__fprintf_entry_head(trace, thread, duration, sample->time, trace->output);
1085
1086         if (ttrace->entry_pending) {
1087                 fprintf(trace->output, "%-70s", ttrace->entry_str);
1088         } else {
1089                 fprintf(trace->output, " ... [");
1090                 color_fprintf(trace->output, PERF_COLOR_YELLOW, "continued");
1091                 fprintf(trace->output, "]: %s()", sc->name);
1092         }
1093
1094         if (sc->fmt == NULL) {
1095 signed_print:
1096                 fprintf(trace->output, ") = %d", ret);
1097         } else if (ret < 0 && sc->fmt->errmsg) {
1098                 char bf[256];
1099                 const char *emsg = strerror_r(-ret, bf, sizeof(bf)),
1100                            *e = audit_errno_to_name(-ret);
1101
1102                 fprintf(trace->output, ") = -1 %s %s", e, emsg);
1103         } else if (ret == 0 && sc->fmt->timeout)
1104                 fprintf(trace->output, ") = 0 Timeout");
1105         else if (sc->fmt->hexret)
1106                 fprintf(trace->output, ") = %#x", ret);
1107         else
1108                 goto signed_print;
1109
1110         fputc('\n', trace->output);
1111 out:
1112         ttrace->entry_pending = false;
1113
1114         return 0;
1115 }
1116
1117 static int trace__sched_stat_runtime(struct trace *trace, struct perf_evsel *evsel,
1118                                      struct perf_sample *sample)
1119 {
1120         u64 runtime = perf_evsel__intval(evsel, sample, "runtime");
1121         double runtime_ms = (double)runtime / NSEC_PER_MSEC;
1122         struct thread *thread = machine__findnew_thread(&trace->host,
1123                                                         sample->pid,
1124                                                         sample->tid);
1125         struct thread_trace *ttrace = thread__trace(thread, trace->output);
1126
1127         if (ttrace == NULL)
1128                 goto out_dump;
1129
1130         ttrace->runtime_ms += runtime_ms;
1131         trace->runtime_ms += runtime_ms;
1132         return 0;
1133
1134 out_dump:
1135         fprintf(trace->output, "%s: comm=%s,pid=%u,runtime=%" PRIu64 ",vruntime=%" PRIu64 ")\n",
1136                evsel->name,
1137                perf_evsel__strval(evsel, sample, "comm"),
1138                (pid_t)perf_evsel__intval(evsel, sample, "pid"),
1139                runtime,
1140                perf_evsel__intval(evsel, sample, "vruntime"));
1141         return 0;
1142 }
1143
1144 static bool skip_sample(struct trace *trace, struct perf_sample *sample)
1145 {
1146         if ((trace->pid_list && intlist__find(trace->pid_list, sample->pid)) ||
1147             (trace->tid_list && intlist__find(trace->tid_list, sample->tid)))
1148                 return false;
1149
1150         if (trace->pid_list || trace->tid_list)
1151                 return true;
1152
1153         return false;
1154 }
1155
1156 static int trace__process_sample(struct perf_tool *tool,
1157                                  union perf_event *event __maybe_unused,
1158                                  struct perf_sample *sample,
1159                                  struct perf_evsel *evsel,
1160                                  struct machine *machine __maybe_unused)
1161 {
1162         struct trace *trace = container_of(tool, struct trace, tool);
1163         int err = 0;
1164
1165         tracepoint_handler handler = evsel->handler.func;
1166
1167         if (skip_sample(trace, sample))
1168                 return 0;
1169
1170         if (!trace->full_time && trace->base_time == 0)
1171                 trace->base_time = sample->time;
1172
1173         if (handler)
1174                 handler(trace, evsel, sample);
1175
1176         return err;
1177 }
1178
1179 static bool
1180 perf_session__has_tp(struct perf_session *session, const char *name)
1181 {
1182         struct perf_evsel *evsel;
1183
1184         evsel = perf_evlist__find_tracepoint_by_name(session->evlist, name);
1185
1186         return evsel != NULL;
1187 }
1188
1189 static int parse_target_str(struct trace *trace)
1190 {
1191         if (trace->opts.target.pid) {
1192                 trace->pid_list = intlist__new(trace->opts.target.pid);
1193                 if (trace->pid_list == NULL) {
1194                         pr_err("Error parsing process id string\n");
1195                         return -EINVAL;
1196                 }
1197         }
1198
1199         if (trace->opts.target.tid) {
1200                 trace->tid_list = intlist__new(trace->opts.target.tid);
1201                 if (trace->tid_list == NULL) {
1202                         pr_err("Error parsing thread id string\n");
1203                         return -EINVAL;
1204                 }
1205         }
1206
1207         return 0;
1208 }
1209
1210 static int trace__run(struct trace *trace, int argc, const char **argv)
1211 {
1212         struct perf_evlist *evlist = perf_evlist__new();
1213         struct perf_evsel *evsel;
1214         int err = -1, i;
1215         unsigned long before;
1216         const bool forks = argc > 0;
1217
1218         if (evlist == NULL) {
1219                 fprintf(trace->output, "Not enough memory to run!\n");
1220                 goto out;
1221         }
1222
1223         if (perf_evlist__add_newtp(evlist, "raw_syscalls", "sys_enter", trace__sys_enter) ||
1224             perf_evlist__add_newtp(evlist, "raw_syscalls", "sys_exit", trace__sys_exit)) {
1225                 fprintf(trace->output, "Couldn't read the raw_syscalls tracepoints information!\n");
1226                 goto out_delete_evlist;
1227         }
1228
1229         if (trace->sched &&
1230             perf_evlist__add_newtp(evlist, "sched", "sched_stat_runtime",
1231                                    trace__sched_stat_runtime)) {
1232                 fprintf(trace->output, "Couldn't read the sched_stat_runtime tracepoint information!\n");
1233                 goto out_delete_evlist;
1234         }
1235
1236         err = perf_evlist__create_maps(evlist, &trace->opts.target);
1237         if (err < 0) {
1238                 fprintf(trace->output, "Problems parsing the target to trace, check your options!\n");
1239                 goto out_delete_evlist;
1240         }
1241
1242         err = trace__symbols_init(trace, evlist);
1243         if (err < 0) {
1244                 fprintf(trace->output, "Problems initializing symbol libraries!\n");
1245                 goto out_delete_maps;
1246         }
1247
1248         perf_evlist__config(evlist, &trace->opts);
1249
1250         signal(SIGCHLD, sig_handler);
1251         signal(SIGINT, sig_handler);
1252
1253         if (forks) {
1254                 err = perf_evlist__prepare_workload(evlist, &trace->opts.target,
1255                                                     argv, false, false);
1256                 if (err < 0) {
1257                         fprintf(trace->output, "Couldn't run the workload!\n");
1258                         goto out_delete_maps;
1259                 }
1260         }
1261
1262         err = perf_evlist__open(evlist);
1263         if (err < 0) {
1264                 fprintf(trace->output, "Couldn't create the events: %s\n", strerror(errno));
1265                 goto out_delete_maps;
1266         }
1267
1268         err = perf_evlist__mmap(evlist, UINT_MAX, false);
1269         if (err < 0) {
1270                 fprintf(trace->output, "Couldn't mmap the events: %s\n", strerror(errno));
1271                 goto out_close_evlist;
1272         }
1273
1274         perf_evlist__enable(evlist);
1275
1276         if (forks)
1277                 perf_evlist__start_workload(evlist);
1278
1279         trace->multiple_threads = evlist->threads->map[0] == -1 || evlist->threads->nr > 1;
1280 again:
1281         before = trace->nr_events;
1282
1283         for (i = 0; i < evlist->nr_mmaps; i++) {
1284                 union perf_event *event;
1285
1286                 while ((event = perf_evlist__mmap_read(evlist, i)) != NULL) {
1287                         const u32 type = event->header.type;
1288                         tracepoint_handler handler;
1289                         struct perf_sample sample;
1290
1291                         ++trace->nr_events;
1292
1293                         err = perf_evlist__parse_sample(evlist, event, &sample);
1294                         if (err) {
1295                                 fprintf(trace->output, "Can't parse sample, err = %d, skipping...\n", err);
1296                                 continue;
1297                         }
1298
1299                         if (!trace->full_time && trace->base_time == 0)
1300                                 trace->base_time = sample.time;
1301
1302                         if (type != PERF_RECORD_SAMPLE) {
1303                                 trace__process_event(trace, &trace->host, event);
1304                                 continue;
1305                         }
1306
1307                         evsel = perf_evlist__id2evsel(evlist, sample.id);
1308                         if (evsel == NULL) {
1309                                 fprintf(trace->output, "Unknown tp ID %" PRIu64 ", skipping...\n", sample.id);
1310                                 continue;
1311                         }
1312
1313                         if (sample.raw_data == NULL) {
1314                                 fprintf(trace->output, "%s sample with no payload for tid: %d, cpu %d, raw_size=%d, skipping...\n",
1315                                        perf_evsel__name(evsel), sample.tid,
1316                                        sample.cpu, sample.raw_size);
1317                                 continue;
1318                         }
1319
1320                         handler = evsel->handler.func;
1321                         handler(trace, evsel, &sample);
1322
1323                         if (done)
1324                                 goto out_unmap_evlist;
1325                 }
1326         }
1327
1328         if (trace->nr_events == before) {
1329                 if (done)
1330                         goto out_unmap_evlist;
1331
1332                 poll(evlist->pollfd, evlist->nr_fds, -1);
1333         }
1334
1335         if (done)
1336                 perf_evlist__disable(evlist);
1337
1338         goto again;
1339
1340 out_unmap_evlist:
1341         perf_evlist__munmap(evlist);
1342 out_close_evlist:
1343         perf_evlist__close(evlist);
1344 out_delete_maps:
1345         perf_evlist__delete_maps(evlist);
1346 out_delete_evlist:
1347         perf_evlist__delete(evlist);
1348 out:
1349         return err;
1350 }
1351
1352 static int trace__replay(struct trace *trace)
1353 {
1354         const struct perf_evsel_str_handler handlers[] = {
1355                 { "raw_syscalls:sys_enter",  trace__sys_enter, },
1356                 { "raw_syscalls:sys_exit",   trace__sys_exit, },
1357         };
1358
1359         struct perf_session *session;
1360         int err = -1;
1361
1362         trace->tool.sample        = trace__process_sample;
1363         trace->tool.mmap          = perf_event__process_mmap;
1364         trace->tool.mmap2         = perf_event__process_mmap2;
1365         trace->tool.comm          = perf_event__process_comm;
1366         trace->tool.exit          = perf_event__process_exit;
1367         trace->tool.fork          = perf_event__process_fork;
1368         trace->tool.attr          = perf_event__process_attr;
1369         trace->tool.tracing_data = perf_event__process_tracing_data;
1370         trace->tool.build_id      = perf_event__process_build_id;
1371
1372         trace->tool.ordered_samples = true;
1373         trace->tool.ordering_requires_timestamps = true;
1374
1375         /* add tid to output */
1376         trace->multiple_threads = true;
1377
1378         if (symbol__init() < 0)
1379                 return -1;
1380
1381         session = perf_session__new(input_name, O_RDONLY, 0, false,
1382                                     &trace->tool);
1383         if (session == NULL)
1384                 return -ENOMEM;
1385
1386         err = perf_session__set_tracepoints_handlers(session, handlers);
1387         if (err)
1388                 goto out;
1389
1390         if (!perf_session__has_tp(session, "raw_syscalls:sys_enter")) {
1391                 pr_err("Data file does not have raw_syscalls:sys_enter events\n");
1392                 goto out;
1393         }
1394
1395         if (!perf_session__has_tp(session, "raw_syscalls:sys_exit")) {
1396                 pr_err("Data file does not have raw_syscalls:sys_exit events\n");
1397                 goto out;
1398         }
1399
1400         err = parse_target_str(trace);
1401         if (err != 0)
1402                 goto out;
1403
1404         setup_pager();
1405
1406         err = perf_session__process_events(session, &trace->tool);
1407         if (err)
1408                 pr_err("Failed to process events, error %d", err);
1409
1410 out:
1411         perf_session__delete(session);
1412
1413         return err;
1414 }
1415
1416 static size_t trace__fprintf_threads_header(FILE *fp)
1417 {
1418         size_t printed;
1419
1420         printed  = fprintf(fp, "\n _____________________________________________________________________\n");
1421         printed += fprintf(fp," __)    Summary of events    (__\n\n");
1422         printed += fprintf(fp,"              [ task - pid ]     [ events ] [ ratio ]  [ runtime ]\n");
1423         printed += fprintf(fp," _____________________________________________________________________\n\n");
1424
1425         return printed;
1426 }
1427
1428 static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp)
1429 {
1430         size_t printed = trace__fprintf_threads_header(fp);
1431         struct rb_node *nd;
1432
1433         for (nd = rb_first(&trace->host.threads); nd; nd = rb_next(nd)) {
1434                 struct thread *thread = rb_entry(nd, struct thread, rb_node);
1435                 struct thread_trace *ttrace = thread->priv;
1436                 const char *color;
1437                 double ratio;
1438
1439                 if (ttrace == NULL)
1440                         continue;
1441
1442                 ratio = (double)ttrace->nr_events / trace->nr_events * 100.0;
1443
1444                 color = PERF_COLOR_NORMAL;
1445                 if (ratio > 50.0)
1446                         color = PERF_COLOR_RED;
1447                 else if (ratio > 25.0)
1448                         color = PERF_COLOR_GREEN;
1449                 else if (ratio > 5.0)
1450                         color = PERF_COLOR_YELLOW;
1451
1452                 printed += color_fprintf(fp, color, "%20s", thread->comm);
1453                 printed += fprintf(fp, " - %-5d :%11lu   [", thread->tid, ttrace->nr_events);
1454                 printed += color_fprintf(fp, color, "%5.1f%%", ratio);
1455                 printed += fprintf(fp, " ] %10.3f ms\n", ttrace->runtime_ms);
1456         }
1457
1458         return printed;
1459 }
1460
1461 static int trace__set_duration(const struct option *opt, const char *str,
1462                                int unset __maybe_unused)
1463 {
1464         struct trace *trace = opt->value;
1465
1466         trace->duration_filter = atof(str);
1467         return 0;
1468 }
1469
1470 static int trace__open_output(struct trace *trace, const char *filename)
1471 {
1472         struct stat st;
1473
1474         if (!stat(filename, &st) && st.st_size) {
1475                 char oldname[PATH_MAX];
1476
1477                 scnprintf(oldname, sizeof(oldname), "%s.old", filename);
1478                 unlink(oldname);
1479                 rename(filename, oldname);
1480         }
1481
1482         trace->output = fopen(filename, "w");
1483
1484         return trace->output == NULL ? -errno : 0;
1485 }
1486
1487 int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused)
1488 {
1489         const char * const trace_usage[] = {
1490                 "perf trace [<options>] [<command>]",
1491                 "perf trace [<options>] -- <command> [<options>]",
1492                 NULL
1493         };
1494         struct trace trace = {
1495                 .audit_machine = audit_detect_machine(),
1496                 .syscalls = {
1497                         . max = -1,
1498                 },
1499                 .opts = {
1500                         .target = {
1501                                 .uid       = UINT_MAX,
1502                                 .uses_mmap = true,
1503                         },
1504                         .user_freq     = UINT_MAX,
1505                         .user_interval = ULLONG_MAX,
1506                         .no_delay      = true,
1507                         .mmap_pages    = 1024,
1508                 },
1509                 .output = stdout,
1510                 .show_comm = true,
1511         };
1512         const char *output_name = NULL;
1513         const char *ev_qualifier_str = NULL;
1514         const struct option trace_options[] = {
1515         OPT_BOOLEAN(0, "comm", &trace.show_comm,
1516                     "show the thread COMM next to its id"),
1517         OPT_STRING('e', "expr", &ev_qualifier_str, "expr",
1518                     "list of events to trace"),
1519         OPT_STRING('o', "output", &output_name, "file", "output file name"),
1520         OPT_STRING('i', "input", &input_name, "file", "Analyze events in file"),
1521         OPT_STRING('p', "pid", &trace.opts.target.pid, "pid",
1522                     "trace events on existing process id"),
1523         OPT_STRING('t', "tid", &trace.opts.target.tid, "tid",
1524                     "trace events on existing thread id"),
1525         OPT_BOOLEAN('a', "all-cpus", &trace.opts.target.system_wide,
1526                     "system-wide collection from all CPUs"),
1527         OPT_STRING('C', "cpu", &trace.opts.target.cpu_list, "cpu",
1528                     "list of cpus to monitor"),
1529         OPT_BOOLEAN(0, "no-inherit", &trace.opts.no_inherit,
1530                     "child tasks do not inherit counters"),
1531         OPT_UINTEGER('m', "mmap-pages", &trace.opts.mmap_pages,
1532                      "number of mmap data pages"),
1533         OPT_STRING('u', "uid", &trace.opts.target.uid_str, "user",
1534                    "user to profile"),
1535         OPT_CALLBACK(0, "duration", &trace, "float",
1536                      "show only events with duration > N.M ms",
1537                      trace__set_duration),
1538         OPT_BOOLEAN(0, "sched", &trace.sched, "show blocking scheduler events"),
1539         OPT_INCR('v', "verbose", &verbose, "be more verbose"),
1540         OPT_BOOLEAN('T', "time", &trace.full_time,
1541                     "Show full timestamp, not time relative to first start"),
1542         OPT_END()
1543         };
1544         int err;
1545         char bf[BUFSIZ];
1546
1547         argc = parse_options(argc, argv, trace_options, trace_usage, 0);
1548
1549         if (output_name != NULL) {
1550                 err = trace__open_output(&trace, output_name);
1551                 if (err < 0) {
1552                         perror("failed to create output file");
1553                         goto out;
1554                 }
1555         }
1556
1557         if (ev_qualifier_str != NULL) {
1558                 const char *s = ev_qualifier_str;
1559
1560                 trace.not_ev_qualifier = *s == '!';
1561                 if (trace.not_ev_qualifier)
1562                         ++s;
1563                 trace.ev_qualifier = strlist__new(true, s);
1564                 if (trace.ev_qualifier == NULL) {
1565                         fputs("Not enough memory to parse event qualifier",
1566                               trace.output);
1567                         err = -ENOMEM;
1568                         goto out_close;
1569                 }
1570         }
1571
1572         err = perf_target__validate(&trace.opts.target);
1573         if (err) {
1574                 perf_target__strerror(&trace.opts.target, err, bf, sizeof(bf));
1575                 fprintf(trace.output, "%s", bf);
1576                 goto out_close;
1577         }
1578
1579         err = perf_target__parse_uid(&trace.opts.target);
1580         if (err) {
1581                 perf_target__strerror(&trace.opts.target, err, bf, sizeof(bf));
1582                 fprintf(trace.output, "%s", bf);
1583                 goto out_close;
1584         }
1585
1586         if (!argc && perf_target__none(&trace.opts.target))
1587                 trace.opts.target.system_wide = true;
1588
1589         if (input_name)
1590                 err = trace__replay(&trace);
1591         else
1592                 err = trace__run(&trace, argc, argv);
1593
1594         if (trace.sched && !err)
1595                 trace__fprintf_thread_summary(&trace, trace.output);
1596
1597 out_close:
1598         if (output_name != NULL)
1599                 fclose(trace.output);
1600 out:
1601         return err;
1602 }