8a09ba3dcd97dd5c6619e5ac31166e1335811fd4
[platform/adaptation/renesas_rcar/renesas_kernel.git] / tools / perf / builtin-trace.c
1 #include <traceevent/event-parse.h>
2 #include "builtin.h"
3 #include "util/color.h"
4 #include "util/debug.h"
5 #include "util/evlist.h"
6 #include "util/machine.h"
7 #include "util/session.h"
8 #include "util/thread.h"
9 #include "util/parse-options.h"
10 #include "util/strlist.h"
11 #include "util/intlist.h"
12 #include "util/thread_map.h"
13
14 #include <libaudit.h>
15 #include <stdlib.h>
16 #include <sys/eventfd.h>
17 #include <sys/mman.h>
18 #include <linux/futex.h>
19
20 /* For older distros: */
21 #ifndef MAP_STACK
22 # define MAP_STACK              0x20000
23 #endif
24
25 #ifndef MADV_HWPOISON
26 # define MADV_HWPOISON          100
27 #endif
28
29 #ifndef MADV_MERGEABLE
30 # define MADV_MERGEABLE         12
31 #endif
32
33 #ifndef MADV_UNMERGEABLE
34 # define MADV_UNMERGEABLE       13
35 #endif
36
37 struct syscall_arg {
38         unsigned long val;
39         void          *parm;
40         u8            idx;
41         u8            mask;
42 };
43
44 struct strarray {
45         int         nr_entries;
46         const char **entries;
47 };
48
49 #define DEFINE_STRARRAY(array) struct strarray strarray__##array = { \
50         .nr_entries = ARRAY_SIZE(array), \
51         .entries = array, \
52 }
53
54 static size_t syscall_arg__scnprintf_strarray(char *bf, size_t size,
55                                               struct syscall_arg *arg)
56 {
57         int idx = arg->val;
58         struct strarray *sa = arg->parm;
59
60         if (idx < 0 || idx >= sa->nr_entries)
61                 return scnprintf(bf, size, "%d", idx);
62
63         return scnprintf(bf, size, "%s", sa->entries[idx]);
64 }
65
66 #define SCA_STRARRAY syscall_arg__scnprintf_strarray
67
68 static size_t syscall_arg__scnprintf_hex(char *bf, size_t size,
69                                          struct syscall_arg *arg)
70 {
71         return scnprintf(bf, size, "%#lx", arg->val);
72 }
73
74 #define SCA_HEX syscall_arg__scnprintf_hex
75
76 static size_t syscall_arg__scnprintf_mmap_prot(char *bf, size_t size,
77                                                struct syscall_arg *arg)
78 {
79         int printed = 0, prot = arg->val;
80
81         if (prot == PROT_NONE)
82                 return scnprintf(bf, size, "NONE");
83 #define P_MMAP_PROT(n) \
84         if (prot & PROT_##n) { \
85                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
86                 prot &= ~PROT_##n; \
87         }
88
89         P_MMAP_PROT(EXEC);
90         P_MMAP_PROT(READ);
91         P_MMAP_PROT(WRITE);
92 #ifdef PROT_SEM
93         P_MMAP_PROT(SEM);
94 #endif
95         P_MMAP_PROT(GROWSDOWN);
96         P_MMAP_PROT(GROWSUP);
97 #undef P_MMAP_PROT
98
99         if (prot)
100                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", prot);
101
102         return printed;
103 }
104
105 #define SCA_MMAP_PROT syscall_arg__scnprintf_mmap_prot
106
107 static size_t syscall_arg__scnprintf_mmap_flags(char *bf, size_t size,
108                                                 struct syscall_arg *arg)
109 {
110         int printed = 0, flags = arg->val;
111
112 #define P_MMAP_FLAG(n) \
113         if (flags & MAP_##n) { \
114                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
115                 flags &= ~MAP_##n; \
116         }
117
118         P_MMAP_FLAG(SHARED);
119         P_MMAP_FLAG(PRIVATE);
120 #ifdef MAP_32BIT
121         P_MMAP_FLAG(32BIT);
122 #endif
123         P_MMAP_FLAG(ANONYMOUS);
124         P_MMAP_FLAG(DENYWRITE);
125         P_MMAP_FLAG(EXECUTABLE);
126         P_MMAP_FLAG(FILE);
127         P_MMAP_FLAG(FIXED);
128         P_MMAP_FLAG(GROWSDOWN);
129 #ifdef MAP_HUGETLB
130         P_MMAP_FLAG(HUGETLB);
131 #endif
132         P_MMAP_FLAG(LOCKED);
133         P_MMAP_FLAG(NONBLOCK);
134         P_MMAP_FLAG(NORESERVE);
135         P_MMAP_FLAG(POPULATE);
136         P_MMAP_FLAG(STACK);
137 #ifdef MAP_UNINITIALIZED
138         P_MMAP_FLAG(UNINITIALIZED);
139 #endif
140 #undef P_MMAP_FLAG
141
142         if (flags)
143                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
144
145         return printed;
146 }
147
148 #define SCA_MMAP_FLAGS syscall_arg__scnprintf_mmap_flags
149
150 static size_t syscall_arg__scnprintf_madvise_behavior(char *bf, size_t size,
151                                                       struct syscall_arg *arg)
152 {
153         int behavior = arg->val;
154
155         switch (behavior) {
156 #define P_MADV_BHV(n) case MADV_##n: return scnprintf(bf, size, #n)
157         P_MADV_BHV(NORMAL);
158         P_MADV_BHV(RANDOM);
159         P_MADV_BHV(SEQUENTIAL);
160         P_MADV_BHV(WILLNEED);
161         P_MADV_BHV(DONTNEED);
162         P_MADV_BHV(REMOVE);
163         P_MADV_BHV(DONTFORK);
164         P_MADV_BHV(DOFORK);
165         P_MADV_BHV(HWPOISON);
166 #ifdef MADV_SOFT_OFFLINE
167         P_MADV_BHV(SOFT_OFFLINE);
168 #endif
169         P_MADV_BHV(MERGEABLE);
170         P_MADV_BHV(UNMERGEABLE);
171 #ifdef MADV_HUGEPAGE
172         P_MADV_BHV(HUGEPAGE);
173 #endif
174 #ifdef MADV_NOHUGEPAGE
175         P_MADV_BHV(NOHUGEPAGE);
176 #endif
177 #ifdef MADV_DONTDUMP
178         P_MADV_BHV(DONTDUMP);
179 #endif
180 #ifdef MADV_DODUMP
181         P_MADV_BHV(DODUMP);
182 #endif
183 #undef P_MADV_PHV
184         default: break;
185         }
186
187         return scnprintf(bf, size, "%#x", behavior);
188 }
189
190 #define SCA_MADV_BHV syscall_arg__scnprintf_madvise_behavior
191
192 static size_t syscall_arg__scnprintf_futex_op(char *bf, size_t size, struct syscall_arg *arg)
193 {
194         enum syscall_futex_args {
195                 SCF_UADDR   = (1 << 0),
196                 SCF_OP      = (1 << 1),
197                 SCF_VAL     = (1 << 2),
198                 SCF_TIMEOUT = (1 << 3),
199                 SCF_UADDR2  = (1 << 4),
200                 SCF_VAL3    = (1 << 5),
201         };
202         int op = arg->val;
203         int cmd = op & FUTEX_CMD_MASK;
204         size_t printed = 0;
205
206         switch (cmd) {
207 #define P_FUTEX_OP(n) case FUTEX_##n: printed = scnprintf(bf, size, #n);
208         P_FUTEX_OP(WAIT);           arg->mask |= SCF_VAL3|SCF_UADDR2;             break;
209         P_FUTEX_OP(WAKE);           arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
210         P_FUTEX_OP(FD);             arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
211         P_FUTEX_OP(REQUEUE);        arg->mask |= SCF_VAL3|SCF_TIMEOUT;            break;
212         P_FUTEX_OP(CMP_REQUEUE);    arg->mask |= SCF_TIMEOUT;                     break;
213         P_FUTEX_OP(CMP_REQUEUE_PI); arg->mask |= SCF_TIMEOUT;                     break;
214         P_FUTEX_OP(WAKE_OP);                                                      break;
215         P_FUTEX_OP(LOCK_PI);        arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
216         P_FUTEX_OP(UNLOCK_PI);      arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
217         P_FUTEX_OP(TRYLOCK_PI);     arg->mask |= SCF_VAL3|SCF_UADDR2;             break;
218         P_FUTEX_OP(WAIT_BITSET);    arg->mask |= SCF_UADDR2;                      break;
219         P_FUTEX_OP(WAKE_BITSET);    arg->mask |= SCF_UADDR2;                      break;
220         P_FUTEX_OP(WAIT_REQUEUE_PI);                                              break;
221         default: printed = scnprintf(bf, size, "%#x", cmd);                       break;
222         }
223
224         if (op & FUTEX_PRIVATE_FLAG)
225                 printed += scnprintf(bf + printed, size - printed, "|PRIV");
226
227         if (op & FUTEX_CLOCK_REALTIME)
228                 printed += scnprintf(bf + printed, size - printed, "|CLKRT");
229
230         return printed;
231 }
232
233 #define SCA_FUTEX_OP  syscall_arg__scnprintf_futex_op
234
235 static const char *itimers[] = { "REAL", "VIRTUAL", "PROF", };
236 static DEFINE_STRARRAY(itimers);
237
238 static const char *whences[] = { "SET", "CUR", "END",
239 #ifdef SEEK_DATA
240 "DATA",
241 #endif
242 #ifdef SEEK_HOLE
243 "HOLE",
244 #endif
245 };
246 static DEFINE_STRARRAY(whences);
247
248 static const char *fcntl_cmds[] = {
249         "DUPFD", "GETFD", "SETFD", "GETFL", "SETFL", "GETLK", "SETLK",
250         "SETLKW", "SETOWN", "GETOWN", "SETSIG", "GETSIG", "F_GETLK64",
251         "F_SETLK64", "F_SETLKW64", "F_SETOWN_EX", "F_GETOWN_EX",
252         "F_GETOWNER_UIDS",
253 };
254 static DEFINE_STRARRAY(fcntl_cmds);
255
256 static const char *rlimit_resources[] = {
257         "CPU", "FSIZE", "DATA", "STACK", "CORE", "RSS", "NPROC", "NOFILE",
258         "MEMLOCK", "AS", "LOCKS", "SIGPENDING", "MSGQUEUE", "NICE", "RTPRIO",
259         "RTTIME",
260 };
261 static DEFINE_STRARRAY(rlimit_resources);
262
263 static const char *sighow[] = { "BLOCK", "UNBLOCK", "SETMASK", };
264 static DEFINE_STRARRAY(sighow);
265
266 static const char *socket_families[] = {
267         "UNSPEC", "LOCAL", "INET", "AX25", "IPX", "APPLETALK", "NETROM",
268         "BRIDGE", "ATMPVC", "X25", "INET6", "ROSE", "DECnet", "NETBEUI",
269         "SECURITY", "KEY", "NETLINK", "PACKET", "ASH", "ECONET", "ATMSVC",
270         "RDS", "SNA", "IRDA", "PPPOX", "WANPIPE", "LLC", "IB", "CAN", "TIPC",
271         "BLUETOOTH", "IUCV", "RXRPC", "ISDN", "PHONET", "IEEE802154", "CAIF",
272         "ALG", "NFC", "VSOCK",
273 };
274 static DEFINE_STRARRAY(socket_families);
275
276 #ifndef SOCK_TYPE_MASK
277 #define SOCK_TYPE_MASK 0xf
278 #endif
279
280 static size_t syscall_arg__scnprintf_socket_type(char *bf, size_t size,
281                                                       struct syscall_arg *arg)
282 {
283         size_t printed;
284         int type = arg->val,
285             flags = type & ~SOCK_TYPE_MASK;
286
287         type &= SOCK_TYPE_MASK;
288         /*
289          * Can't use a strarray, MIPS may override for ABI reasons.
290          */
291         switch (type) {
292 #define P_SK_TYPE(n) case SOCK_##n: printed = scnprintf(bf, size, #n); break;
293         P_SK_TYPE(STREAM);
294         P_SK_TYPE(DGRAM);
295         P_SK_TYPE(RAW);
296         P_SK_TYPE(RDM);
297         P_SK_TYPE(SEQPACKET);
298         P_SK_TYPE(DCCP);
299         P_SK_TYPE(PACKET);
300 #undef P_SK_TYPE
301         default:
302                 printed = scnprintf(bf, size, "%#x", type);
303         }
304
305 #define P_SK_FLAG(n) \
306         if (flags & SOCK_##n) { \
307                 printed += scnprintf(bf + printed, size - printed, "|%s", #n); \
308                 flags &= ~SOCK_##n; \
309         }
310
311         P_SK_FLAG(CLOEXEC);
312         P_SK_FLAG(NONBLOCK);
313 #undef P_SK_FLAG
314
315         if (flags)
316                 printed += scnprintf(bf + printed, size - printed, "|%#x", flags);
317
318         return printed;
319 }
320
321 #define SCA_SK_TYPE syscall_arg__scnprintf_socket_type
322
323 #ifndef MSG_PROBE
324 #define MSG_PROBE            0x10
325 #endif
326 #ifndef MSG_SENDPAGE_NOTLAST
327 #define MSG_SENDPAGE_NOTLAST 0x20000
328 #endif
329 #ifndef MSG_FASTOPEN
330 #define MSG_FASTOPEN         0x20000000
331 #endif
332
333 static size_t syscall_arg__scnprintf_msg_flags(char *bf, size_t size,
334                                                struct syscall_arg *arg)
335 {
336         int printed = 0, flags = arg->val;
337
338         if (flags == 0)
339                 return scnprintf(bf, size, "NONE");
340 #define P_MSG_FLAG(n) \
341         if (flags & MSG_##n) { \
342                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
343                 flags &= ~MSG_##n; \
344         }
345
346         P_MSG_FLAG(OOB);
347         P_MSG_FLAG(PEEK);
348         P_MSG_FLAG(DONTROUTE);
349         P_MSG_FLAG(TRYHARD);
350         P_MSG_FLAG(CTRUNC);
351         P_MSG_FLAG(PROBE);
352         P_MSG_FLAG(TRUNC);
353         P_MSG_FLAG(DONTWAIT);
354         P_MSG_FLAG(EOR);
355         P_MSG_FLAG(WAITALL);
356         P_MSG_FLAG(FIN);
357         P_MSG_FLAG(SYN);
358         P_MSG_FLAG(CONFIRM);
359         P_MSG_FLAG(RST);
360         P_MSG_FLAG(ERRQUEUE);
361         P_MSG_FLAG(NOSIGNAL);
362         P_MSG_FLAG(MORE);
363         P_MSG_FLAG(WAITFORONE);
364         P_MSG_FLAG(SENDPAGE_NOTLAST);
365         P_MSG_FLAG(FASTOPEN);
366         P_MSG_FLAG(CMSG_CLOEXEC);
367 #undef P_MSG_FLAG
368
369         if (flags)
370                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
371
372         return printed;
373 }
374
375 #define SCA_MSG_FLAGS syscall_arg__scnprintf_msg_flags
376
377 static size_t syscall_arg__scnprintf_access_mode(char *bf, size_t size,
378                                                  struct syscall_arg *arg)
379 {
380         size_t printed = 0;
381         int mode = arg->val;
382
383         if (mode == F_OK) /* 0 */
384                 return scnprintf(bf, size, "F");
385 #define P_MODE(n) \
386         if (mode & n##_OK) { \
387                 printed += scnprintf(bf + printed, size - printed, "%s", #n); \
388                 mode &= ~n##_OK; \
389         }
390
391         P_MODE(R);
392         P_MODE(W);
393         P_MODE(X);
394 #undef P_MODE
395
396         if (mode)
397                 printed += scnprintf(bf + printed, size - printed, "|%#x", mode);
398
399         return printed;
400 }
401
402 #define SCA_ACCMODE syscall_arg__scnprintf_access_mode
403
404 static size_t syscall_arg__scnprintf_open_flags(char *bf, size_t size,
405                                                struct syscall_arg *arg)
406 {
407         int printed = 0, flags = arg->val;
408
409         if (!(flags & O_CREAT))
410                 arg->mask |= 1 << (arg->idx + 1); /* Mask the mode parm */
411
412         if (flags == 0)
413                 return scnprintf(bf, size, "RDONLY");
414 #define P_FLAG(n) \
415         if (flags & O_##n) { \
416                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
417                 flags &= ~O_##n; \
418         }
419
420         P_FLAG(APPEND);
421         P_FLAG(ASYNC);
422         P_FLAG(CLOEXEC);
423         P_FLAG(CREAT);
424         P_FLAG(DIRECT);
425         P_FLAG(DIRECTORY);
426         P_FLAG(EXCL);
427         P_FLAG(LARGEFILE);
428         P_FLAG(NOATIME);
429         P_FLAG(NOCTTY);
430 #ifdef O_NONBLOCK
431         P_FLAG(NONBLOCK);
432 #elif O_NDELAY
433         P_FLAG(NDELAY);
434 #endif
435 #ifdef O_PATH
436         P_FLAG(PATH);
437 #endif
438         P_FLAG(RDWR);
439 #ifdef O_DSYNC
440         if ((flags & O_SYNC) == O_SYNC)
441                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", "SYNC");
442         else {
443                 P_FLAG(DSYNC);
444         }
445 #else
446         P_FLAG(SYNC);
447 #endif
448         P_FLAG(TRUNC);
449         P_FLAG(WRONLY);
450 #undef P_FLAG
451
452         if (flags)
453                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
454
455         return printed;
456 }
457
458 #define SCA_OPEN_FLAGS syscall_arg__scnprintf_open_flags
459
460 static size_t syscall_arg__scnprintf_eventfd_flags(char *bf, size_t size,
461                                                    struct syscall_arg *arg)
462 {
463         int printed = 0, flags = arg->val;
464
465         if (flags == 0)
466                 return scnprintf(bf, size, "NONE");
467 #define P_FLAG(n) \
468         if (flags & EFD_##n) { \
469                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
470                 flags &= ~EFD_##n; \
471         }
472
473         P_FLAG(SEMAPHORE);
474         P_FLAG(CLOEXEC);
475         P_FLAG(NONBLOCK);
476 #undef P_FLAG
477
478         if (flags)
479                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
480
481         return printed;
482 }
483
484 #define SCA_EFD_FLAGS syscall_arg__scnprintf_eventfd_flags
485
486 static size_t syscall_arg__scnprintf_signum(char *bf, size_t size, struct syscall_arg *arg)
487 {
488         int sig = arg->val;
489
490         switch (sig) {
491 #define P_SIGNUM(n) case SIG##n: return scnprintf(bf, size, #n)
492         P_SIGNUM(HUP);
493         P_SIGNUM(INT);
494         P_SIGNUM(QUIT);
495         P_SIGNUM(ILL);
496         P_SIGNUM(TRAP);
497         P_SIGNUM(ABRT);
498         P_SIGNUM(BUS);
499         P_SIGNUM(FPE);
500         P_SIGNUM(KILL);
501         P_SIGNUM(USR1);
502         P_SIGNUM(SEGV);
503         P_SIGNUM(USR2);
504         P_SIGNUM(PIPE);
505         P_SIGNUM(ALRM);
506         P_SIGNUM(TERM);
507         P_SIGNUM(STKFLT);
508         P_SIGNUM(CHLD);
509         P_SIGNUM(CONT);
510         P_SIGNUM(STOP);
511         P_SIGNUM(TSTP);
512         P_SIGNUM(TTIN);
513         P_SIGNUM(TTOU);
514         P_SIGNUM(URG);
515         P_SIGNUM(XCPU);
516         P_SIGNUM(XFSZ);
517         P_SIGNUM(VTALRM);
518         P_SIGNUM(PROF);
519         P_SIGNUM(WINCH);
520         P_SIGNUM(IO);
521         P_SIGNUM(PWR);
522         P_SIGNUM(SYS);
523         default: break;
524         }
525
526         return scnprintf(bf, size, "%#x", sig);
527 }
528
529 #define SCA_SIGNUM syscall_arg__scnprintf_signum
530
531 static struct syscall_fmt {
532         const char *name;
533         const char *alias;
534         size_t     (*arg_scnprintf[6])(char *bf, size_t size, struct syscall_arg *arg);
535         void       *arg_parm[6];
536         bool       errmsg;
537         bool       timeout;
538         bool       hexret;
539 } syscall_fmts[] = {
540         { .name     = "access",     .errmsg = true,
541           .arg_scnprintf = { [1] = SCA_ACCMODE, /* mode */ }, },
542         { .name     = "arch_prctl", .errmsg = true, .alias = "prctl", },
543         { .name     = "brk",        .hexret = true,
544           .arg_scnprintf = { [0] = SCA_HEX, /* brk */ }, },
545         { .name     = "connect",    .errmsg = true, },
546         { .name     = "eventfd2",   .errmsg = true,
547           .arg_scnprintf = { [1] = SCA_EFD_FLAGS, /* flags */ }, },
548         { .name     = "fcntl",      .errmsg = true,
549           .arg_scnprintf = { [1] = SCA_STRARRAY, /* cmd */ },
550           .arg_parm      = { [1] = &strarray__fcntl_cmds, /* cmd */ }, },
551         { .name     = "fstat",      .errmsg = true, .alias = "newfstat", },
552         { .name     = "fstatat",    .errmsg = true, .alias = "newfstatat", },
553         { .name     = "futex",      .errmsg = true,
554           .arg_scnprintf = { [1] = SCA_FUTEX_OP, /* op */ }, },
555         { .name     = "getitimer",  .errmsg = true,
556           .arg_scnprintf = { [0] = SCA_STRARRAY, /* which */ },
557           .arg_parm      = { [0] = &strarray__itimers, /* which */ }, },
558         { .name     = "getrlimit",  .errmsg = true,
559           .arg_scnprintf = { [0] = SCA_STRARRAY, /* resource */ },
560           .arg_parm      = { [0] = &strarray__rlimit_resources, /* resource */ }, },
561         { .name     = "ioctl",      .errmsg = true,
562           .arg_scnprintf = { [2] = SCA_HEX, /* arg */ }, },
563         { .name     = "kill",       .errmsg = true,
564           .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
565         { .name     = "lseek",      .errmsg = true,
566           .arg_scnprintf = { [2] = SCA_STRARRAY, /* whence */ },
567           .arg_parm      = { [2] = &strarray__whences, /* whence */ }, },
568         { .name     = "lstat",      .errmsg = true, .alias = "newlstat", },
569         { .name     = "madvise",    .errmsg = true,
570           .arg_scnprintf = { [0] = SCA_HEX,      /* start */
571                              [2] = SCA_MADV_BHV, /* behavior */ }, },
572         { .name     = "mmap",       .hexret = true,
573           .arg_scnprintf = { [0] = SCA_HEX,       /* addr */
574                              [2] = SCA_MMAP_PROT, /* prot */
575                              [3] = SCA_MMAP_FLAGS, /* flags */ }, },
576         { .name     = "mprotect",   .errmsg = true,
577           .arg_scnprintf = { [0] = SCA_HEX, /* start */
578                              [2] = SCA_MMAP_PROT, /* prot */ }, },
579         { .name     = "mremap",     .hexret = true,
580           .arg_scnprintf = { [0] = SCA_HEX, /* addr */
581                              [4] = SCA_HEX, /* new_addr */ }, },
582         { .name     = "munmap",     .errmsg = true,
583           .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
584         { .name     = "open",       .errmsg = true,
585           .arg_scnprintf = { [1] = SCA_OPEN_FLAGS, /* flags */ }, },
586         { .name     = "open_by_handle_at", .errmsg = true,
587           .arg_scnprintf = { [2] = SCA_OPEN_FLAGS, /* flags */ }, },
588         { .name     = "openat",     .errmsg = true,
589           .arg_scnprintf = { [2] = SCA_OPEN_FLAGS, /* flags */ }, },
590         { .name     = "poll",       .errmsg = true, .timeout = true, },
591         { .name     = "ppoll",      .errmsg = true, .timeout = true, },
592         { .name     = "pread",      .errmsg = true, .alias = "pread64", },
593         { .name     = "prlimit64",  .errmsg = true,
594           .arg_scnprintf = { [1] = SCA_STRARRAY, /* resource */ },
595           .arg_parm      = { [1] = &strarray__rlimit_resources, /* resource */ }, },
596         { .name     = "pwrite",     .errmsg = true, .alias = "pwrite64", },
597         { .name     = "read",       .errmsg = true, },
598         { .name     = "recvfrom",   .errmsg = true,
599           .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
600         { .name     = "recvmmsg",   .errmsg = true,
601           .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
602         { .name     = "recvmsg",    .errmsg = true,
603           .arg_scnprintf = { [2] = SCA_MSG_FLAGS, /* flags */ }, },
604         { .name     = "rt_sigaction", .errmsg = true,
605           .arg_scnprintf = { [0] = SCA_SIGNUM, /* sig */ }, },
606         { .name     = "rt_sigprocmask", .errmsg = true,
607           .arg_scnprintf = { [0] = SCA_STRARRAY, /* how */ },
608           .arg_parm      = { [0] = &strarray__sighow, /* how */ }, },
609         { .name     = "rt_sigqueueinfo", .errmsg = true,
610           .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
611         { .name     = "rt_tgsigqueueinfo", .errmsg = true,
612           .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, },
613         { .name     = "select",     .errmsg = true, .timeout = true, },
614         { .name     = "sendmmsg",    .errmsg = true,
615           .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
616         { .name     = "sendmsg",    .errmsg = true,
617           .arg_scnprintf = { [2] = SCA_MSG_FLAGS, /* flags */ }, },
618         { .name     = "sendto",     .errmsg = true,
619           .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
620         { .name     = "setitimer",  .errmsg = true,
621           .arg_scnprintf = { [0] = SCA_STRARRAY, /* which */ },
622           .arg_parm      = { [0] = &strarray__itimers, /* which */ }, },
623         { .name     = "setrlimit",  .errmsg = true,
624           .arg_scnprintf = { [0] = SCA_STRARRAY, /* resource */ },
625           .arg_parm      = { [0] = &strarray__rlimit_resources, /* resource */ }, },
626         { .name     = "socket",     .errmsg = true,
627           .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */
628                              [1] = SCA_SK_TYPE, /* type */ },
629           .arg_parm      = { [0] = &strarray__socket_families, /* family */ }, },
630         { .name     = "stat",       .errmsg = true, .alias = "newstat", },
631         { .name     = "tgkill",     .errmsg = true,
632           .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, },
633         { .name     = "tkill",      .errmsg = true,
634           .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
635         { .name     = "uname",      .errmsg = true, .alias = "newuname", },
636 };
637
638 static int syscall_fmt__cmp(const void *name, const void *fmtp)
639 {
640         const struct syscall_fmt *fmt = fmtp;
641         return strcmp(name, fmt->name);
642 }
643
644 static struct syscall_fmt *syscall_fmt__find(const char *name)
645 {
646         const int nmemb = ARRAY_SIZE(syscall_fmts);
647         return bsearch(name, syscall_fmts, nmemb, sizeof(struct syscall_fmt), syscall_fmt__cmp);
648 }
649
650 struct syscall {
651         struct event_format *tp_format;
652         const char          *name;
653         bool                filtered;
654         struct syscall_fmt  *fmt;
655         size_t              (**arg_scnprintf)(char *bf, size_t size, struct syscall_arg *arg);
656         void                **arg_parm;
657 };
658
659 static size_t fprintf_duration(unsigned long t, FILE *fp)
660 {
661         double duration = (double)t / NSEC_PER_MSEC;
662         size_t printed = fprintf(fp, "(");
663
664         if (duration >= 1.0)
665                 printed += color_fprintf(fp, PERF_COLOR_RED, "%6.3f ms", duration);
666         else if (duration >= 0.01)
667                 printed += color_fprintf(fp, PERF_COLOR_YELLOW, "%6.3f ms", duration);
668         else
669                 printed += color_fprintf(fp, PERF_COLOR_NORMAL, "%6.3f ms", duration);
670         return printed + fprintf(fp, "): ");
671 }
672
673 struct thread_trace {
674         u64               entry_time;
675         u64               exit_time;
676         bool              entry_pending;
677         unsigned long     nr_events;
678         char              *entry_str;
679         double            runtime_ms;
680 };
681
682 static struct thread_trace *thread_trace__new(void)
683 {
684         return zalloc(sizeof(struct thread_trace));
685 }
686
687 static struct thread_trace *thread__trace(struct thread *thread, FILE *fp)
688 {
689         struct thread_trace *ttrace;
690
691         if (thread == NULL)
692                 goto fail;
693
694         if (thread->priv == NULL)
695                 thread->priv = thread_trace__new();
696                 
697         if (thread->priv == NULL)
698                 goto fail;
699
700         ttrace = thread->priv;
701         ++ttrace->nr_events;
702
703         return ttrace;
704 fail:
705         color_fprintf(fp, PERF_COLOR_RED,
706                       "WARNING: not enough memory, dropping samples!\n");
707         return NULL;
708 }
709
710 struct trace {
711         struct perf_tool        tool;
712         int                     audit_machine;
713         struct {
714                 int             max;
715                 struct syscall  *table;
716         } syscalls;
717         struct perf_record_opts opts;
718         struct machine          host;
719         u64                     base_time;
720         bool                    full_time;
721         FILE                    *output;
722         unsigned long           nr_events;
723         struct strlist          *ev_qualifier;
724         bool                    not_ev_qualifier;
725         struct intlist          *tid_list;
726         struct intlist          *pid_list;
727         bool                    sched;
728         bool                    multiple_threads;
729         double                  duration_filter;
730         double                  runtime_ms;
731 };
732
733 static bool trace__filter_duration(struct trace *trace, double t)
734 {
735         return t < (trace->duration_filter * NSEC_PER_MSEC);
736 }
737
738 static size_t trace__fprintf_tstamp(struct trace *trace, u64 tstamp, FILE *fp)
739 {
740         double ts = (double)(tstamp - trace->base_time) / NSEC_PER_MSEC;
741
742         return fprintf(fp, "%10.3f ", ts);
743 }
744
745 static bool done = false;
746
747 static void sig_handler(int sig __maybe_unused)
748 {
749         done = true;
750 }
751
752 static size_t trace__fprintf_entry_head(struct trace *trace, struct thread *thread,
753                                         u64 duration, u64 tstamp, FILE *fp)
754 {
755         size_t printed = trace__fprintf_tstamp(trace, tstamp, fp);
756         printed += fprintf_duration(duration, fp);
757
758         if (trace->multiple_threads)
759                 printed += fprintf(fp, "%d ", thread->tid);
760
761         return printed;
762 }
763
764 static int trace__process_event(struct trace *trace, struct machine *machine,
765                                 union perf_event *event)
766 {
767         int ret = 0;
768
769         switch (event->header.type) {
770         case PERF_RECORD_LOST:
771                 color_fprintf(trace->output, PERF_COLOR_RED,
772                               "LOST %" PRIu64 " events!\n", event->lost.lost);
773                 ret = machine__process_lost_event(machine, event);
774         default:
775                 ret = machine__process_event(machine, event);
776                 break;
777         }
778
779         return ret;
780 }
781
782 static int trace__tool_process(struct perf_tool *tool,
783                                union perf_event *event,
784                                struct perf_sample *sample __maybe_unused,
785                                struct machine *machine)
786 {
787         struct trace *trace = container_of(tool, struct trace, tool);
788         return trace__process_event(trace, machine, event);
789 }
790
791 static int trace__symbols_init(struct trace *trace, struct perf_evlist *evlist)
792 {
793         int err = symbol__init();
794
795         if (err)
796                 return err;
797
798         machine__init(&trace->host, "", HOST_KERNEL_ID);
799         machine__create_kernel_maps(&trace->host);
800
801         if (perf_target__has_task(&trace->opts.target)) {
802                 err = perf_event__synthesize_thread_map(&trace->tool, evlist->threads,
803                                                         trace__tool_process,
804                                                         &trace->host);
805         } else {
806                 err = perf_event__synthesize_threads(&trace->tool, trace__tool_process,
807                                                      &trace->host);
808         }
809
810         if (err)
811                 symbol__exit();
812
813         return err;
814 }
815
816 static int syscall__set_arg_fmts(struct syscall *sc)
817 {
818         struct format_field *field;
819         int idx = 0;
820
821         sc->arg_scnprintf = calloc(sc->tp_format->format.nr_fields - 1, sizeof(void *));
822         if (sc->arg_scnprintf == NULL)
823                 return -1;
824
825         if (sc->fmt)
826                 sc->arg_parm = sc->fmt->arg_parm;
827
828         for (field = sc->tp_format->format.fields->next; field; field = field->next) {
829                 if (sc->fmt && sc->fmt->arg_scnprintf[idx])
830                         sc->arg_scnprintf[idx] = sc->fmt->arg_scnprintf[idx];
831                 else if (field->flags & FIELD_IS_POINTER)
832                         sc->arg_scnprintf[idx] = syscall_arg__scnprintf_hex;
833                 ++idx;
834         }
835
836         return 0;
837 }
838
839 static int trace__read_syscall_info(struct trace *trace, int id)
840 {
841         char tp_name[128];
842         struct syscall *sc;
843         const char *name = audit_syscall_to_name(id, trace->audit_machine);
844
845         if (name == NULL)
846                 return -1;
847
848         if (id > trace->syscalls.max) {
849                 struct syscall *nsyscalls = realloc(trace->syscalls.table, (id + 1) * sizeof(*sc));
850
851                 if (nsyscalls == NULL)
852                         return -1;
853
854                 if (trace->syscalls.max != -1) {
855                         memset(nsyscalls + trace->syscalls.max + 1, 0,
856                                (id - trace->syscalls.max) * sizeof(*sc));
857                 } else {
858                         memset(nsyscalls, 0, (id + 1) * sizeof(*sc));
859                 }
860
861                 trace->syscalls.table = nsyscalls;
862                 trace->syscalls.max   = id;
863         }
864
865         sc = trace->syscalls.table + id;
866         sc->name = name;
867
868         if (trace->ev_qualifier) {
869                 bool in = strlist__find(trace->ev_qualifier, name) != NULL;
870
871                 if (!(in ^ trace->not_ev_qualifier)) {
872                         sc->filtered = true;
873                         /*
874                          * No need to do read tracepoint information since this will be
875                          * filtered out.
876                          */
877                         return 0;
878                 }
879         }
880
881         sc->fmt  = syscall_fmt__find(sc->name);
882
883         snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->name);
884         sc->tp_format = event_format__new("syscalls", tp_name);
885
886         if (sc->tp_format == NULL && sc->fmt && sc->fmt->alias) {
887                 snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->fmt->alias);
888                 sc->tp_format = event_format__new("syscalls", tp_name);
889         }
890
891         if (sc->tp_format == NULL)
892                 return -1;
893
894         return syscall__set_arg_fmts(sc);
895 }
896
897 static size_t syscall__scnprintf_args(struct syscall *sc, char *bf, size_t size,
898                                       unsigned long *args)
899 {
900         size_t printed = 0;
901
902         if (sc->tp_format != NULL) {
903                 struct format_field *field;
904                 u8 bit = 1;
905                 struct syscall_arg arg = {
906                         .idx  = 0,
907                         .mask = 0,
908                 };
909
910                 for (field = sc->tp_format->format.fields->next; field;
911                      field = field->next, ++arg.idx, bit <<= 1) {
912                         if (arg.mask & bit)
913                                 continue;
914
915                         if (args[arg.idx] == 0)
916                                 continue;
917
918                         printed += scnprintf(bf + printed, size - printed,
919                                              "%s%s: ", printed ? ", " : "", field->name);
920                         if (sc->arg_scnprintf && sc->arg_scnprintf[arg.idx]) {
921                                 arg.val = args[arg.idx];
922                                 if (sc->arg_parm)
923                                         arg.parm = sc->arg_parm[arg.idx];
924                                 printed += sc->arg_scnprintf[arg.idx](bf + printed,
925                                                                       size - printed, &arg);
926                         } else {
927                                 printed += scnprintf(bf + printed, size - printed,
928                                                      "%ld", args[arg.idx]);
929                         }
930                 }
931         } else {
932                 int i = 0;
933
934                 while (i < 6) {
935                         printed += scnprintf(bf + printed, size - printed,
936                                              "%sarg%d: %ld",
937                                              printed ? ", " : "", i, args[i]);
938                         ++i;
939                 }
940         }
941
942         return printed;
943 }
944
945 typedef int (*tracepoint_handler)(struct trace *trace, struct perf_evsel *evsel,
946                                   struct perf_sample *sample);
947
948 static struct syscall *trace__syscall_info(struct trace *trace,
949                                            struct perf_evsel *evsel,
950                                            struct perf_sample *sample)
951 {
952         int id = perf_evsel__intval(evsel, sample, "id");
953
954         if (id < 0) {
955
956                 /*
957                  * XXX: Noticed on x86_64, reproduced as far back as 3.0.36, haven't tried
958                  * before that, leaving at a higher verbosity level till that is
959                  * explained. Reproduced with plain ftrace with:
960                  *
961                  * echo 1 > /t/events/raw_syscalls/sys_exit/enable
962                  * grep "NR -1 " /t/trace_pipe
963                  *
964                  * After generating some load on the machine.
965                  */
966                 if (verbose > 1) {
967                         static u64 n;
968                         fprintf(trace->output, "Invalid syscall %d id, skipping (%s, %" PRIu64 ") ...\n",
969                                 id, perf_evsel__name(evsel), ++n);
970                 }
971                 return NULL;
972         }
973
974         if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL) &&
975             trace__read_syscall_info(trace, id))
976                 goto out_cant_read;
977
978         if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL))
979                 goto out_cant_read;
980
981         return &trace->syscalls.table[id];
982
983 out_cant_read:
984         if (verbose) {
985                 fprintf(trace->output, "Problems reading syscall %d", id);
986                 if (id <= trace->syscalls.max && trace->syscalls.table[id].name != NULL)
987                         fprintf(trace->output, "(%s)", trace->syscalls.table[id].name);
988                 fputs(" information\n", trace->output);
989         }
990         return NULL;
991 }
992
993 static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel,
994                             struct perf_sample *sample)
995 {
996         char *msg;
997         void *args;
998         size_t printed = 0;
999         struct thread *thread;
1000         struct syscall *sc = trace__syscall_info(trace, evsel, sample);
1001         struct thread_trace *ttrace;
1002
1003         if (sc == NULL)
1004                 return -1;
1005
1006         if (sc->filtered)
1007                 return 0;
1008
1009         thread = machine__findnew_thread(&trace->host, sample->pid,
1010                                          sample->tid);
1011         ttrace = thread__trace(thread, trace->output);
1012         if (ttrace == NULL)
1013                 return -1;
1014
1015         args = perf_evsel__rawptr(evsel, sample, "args");
1016         if (args == NULL) {
1017                 fprintf(trace->output, "Problems reading syscall arguments\n");
1018                 return -1;
1019         }
1020
1021         ttrace = thread->priv;
1022
1023         if (ttrace->entry_str == NULL) {
1024                 ttrace->entry_str = malloc(1024);
1025                 if (!ttrace->entry_str)
1026                         return -1;
1027         }
1028
1029         ttrace->entry_time = sample->time;
1030         msg = ttrace->entry_str;
1031         printed += scnprintf(msg + printed, 1024 - printed, "%s(", sc->name);
1032
1033         printed += syscall__scnprintf_args(sc, msg + printed, 1024 - printed,  args);
1034
1035         if (!strcmp(sc->name, "exit_group") || !strcmp(sc->name, "exit")) {
1036                 if (!trace->duration_filter) {
1037                         trace__fprintf_entry_head(trace, thread, 1, sample->time, trace->output);
1038                         fprintf(trace->output, "%-70s\n", ttrace->entry_str);
1039                 }
1040         } else
1041                 ttrace->entry_pending = true;
1042
1043         return 0;
1044 }
1045
1046 static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel,
1047                            struct perf_sample *sample)
1048 {
1049         int ret;
1050         u64 duration = 0;
1051         struct thread *thread;
1052         struct syscall *sc = trace__syscall_info(trace, evsel, sample);
1053         struct thread_trace *ttrace;
1054
1055         if (sc == NULL)
1056                 return -1;
1057
1058         if (sc->filtered)
1059                 return 0;
1060
1061         thread = machine__findnew_thread(&trace->host, sample->pid,
1062                                          sample->tid);
1063         ttrace = thread__trace(thread, trace->output);
1064         if (ttrace == NULL)
1065                 return -1;
1066
1067         ret = perf_evsel__intval(evsel, sample, "ret");
1068
1069         ttrace = thread->priv;
1070
1071         ttrace->exit_time = sample->time;
1072
1073         if (ttrace->entry_time) {
1074                 duration = sample->time - ttrace->entry_time;
1075                 if (trace__filter_duration(trace, duration))
1076                         goto out;
1077         } else if (trace->duration_filter)
1078                 goto out;
1079
1080         trace__fprintf_entry_head(trace, thread, duration, sample->time, trace->output);
1081
1082         if (ttrace->entry_pending) {
1083                 fprintf(trace->output, "%-70s", ttrace->entry_str);
1084         } else {
1085                 fprintf(trace->output, " ... [");
1086                 color_fprintf(trace->output, PERF_COLOR_YELLOW, "continued");
1087                 fprintf(trace->output, "]: %s()", sc->name);
1088         }
1089
1090         if (sc->fmt == NULL) {
1091 signed_print:
1092                 fprintf(trace->output, ") = %d", ret);
1093         } else if (ret < 0 && sc->fmt->errmsg) {
1094                 char bf[256];
1095                 const char *emsg = strerror_r(-ret, bf, sizeof(bf)),
1096                            *e = audit_errno_to_name(-ret);
1097
1098                 fprintf(trace->output, ") = -1 %s %s", e, emsg);
1099         } else if (ret == 0 && sc->fmt->timeout)
1100                 fprintf(trace->output, ") = 0 Timeout");
1101         else if (sc->fmt->hexret)
1102                 fprintf(trace->output, ") = %#x", ret);
1103         else
1104                 goto signed_print;
1105
1106         fputc('\n', trace->output);
1107 out:
1108         ttrace->entry_pending = false;
1109
1110         return 0;
1111 }
1112
1113 static int trace__sched_stat_runtime(struct trace *trace, struct perf_evsel *evsel,
1114                                      struct perf_sample *sample)
1115 {
1116         u64 runtime = perf_evsel__intval(evsel, sample, "runtime");
1117         double runtime_ms = (double)runtime / NSEC_PER_MSEC;
1118         struct thread *thread = machine__findnew_thread(&trace->host,
1119                                                         sample->pid,
1120                                                         sample->tid);
1121         struct thread_trace *ttrace = thread__trace(thread, trace->output);
1122
1123         if (ttrace == NULL)
1124                 goto out_dump;
1125
1126         ttrace->runtime_ms += runtime_ms;
1127         trace->runtime_ms += runtime_ms;
1128         return 0;
1129
1130 out_dump:
1131         fprintf(trace->output, "%s: comm=%s,pid=%u,runtime=%" PRIu64 ",vruntime=%" PRIu64 ")\n",
1132                evsel->name,
1133                perf_evsel__strval(evsel, sample, "comm"),
1134                (pid_t)perf_evsel__intval(evsel, sample, "pid"),
1135                runtime,
1136                perf_evsel__intval(evsel, sample, "vruntime"));
1137         return 0;
1138 }
1139
1140 static bool skip_sample(struct trace *trace, struct perf_sample *sample)
1141 {
1142         if ((trace->pid_list && intlist__find(trace->pid_list, sample->pid)) ||
1143             (trace->tid_list && intlist__find(trace->tid_list, sample->tid)))
1144                 return false;
1145
1146         if (trace->pid_list || trace->tid_list)
1147                 return true;
1148
1149         return false;
1150 }
1151
1152 static int trace__process_sample(struct perf_tool *tool,
1153                                  union perf_event *event __maybe_unused,
1154                                  struct perf_sample *sample,
1155                                  struct perf_evsel *evsel,
1156                                  struct machine *machine __maybe_unused)
1157 {
1158         struct trace *trace = container_of(tool, struct trace, tool);
1159         int err = 0;
1160
1161         tracepoint_handler handler = evsel->handler.func;
1162
1163         if (skip_sample(trace, sample))
1164                 return 0;
1165
1166         if (!trace->full_time && trace->base_time == 0)
1167                 trace->base_time = sample->time;
1168
1169         if (handler)
1170                 handler(trace, evsel, sample);
1171
1172         return err;
1173 }
1174
1175 static bool
1176 perf_session__has_tp(struct perf_session *session, const char *name)
1177 {
1178         struct perf_evsel *evsel;
1179
1180         evsel = perf_evlist__find_tracepoint_by_name(session->evlist, name);
1181
1182         return evsel != NULL;
1183 }
1184
1185 static int parse_target_str(struct trace *trace)
1186 {
1187         if (trace->opts.target.pid) {
1188                 trace->pid_list = intlist__new(trace->opts.target.pid);
1189                 if (trace->pid_list == NULL) {
1190                         pr_err("Error parsing process id string\n");
1191                         return -EINVAL;
1192                 }
1193         }
1194
1195         if (trace->opts.target.tid) {
1196                 trace->tid_list = intlist__new(trace->opts.target.tid);
1197                 if (trace->tid_list == NULL) {
1198                         pr_err("Error parsing thread id string\n");
1199                         return -EINVAL;
1200                 }
1201         }
1202
1203         return 0;
1204 }
1205
1206 static int trace__run(struct trace *trace, int argc, const char **argv)
1207 {
1208         struct perf_evlist *evlist = perf_evlist__new();
1209         struct perf_evsel *evsel;
1210         int err = -1, i;
1211         unsigned long before;
1212         const bool forks = argc > 0;
1213
1214         if (evlist == NULL) {
1215                 fprintf(trace->output, "Not enough memory to run!\n");
1216                 goto out;
1217         }
1218
1219         if (perf_evlist__add_newtp(evlist, "raw_syscalls", "sys_enter", trace__sys_enter) ||
1220             perf_evlist__add_newtp(evlist, "raw_syscalls", "sys_exit", trace__sys_exit)) {
1221                 fprintf(trace->output, "Couldn't read the raw_syscalls tracepoints information!\n");
1222                 goto out_delete_evlist;
1223         }
1224
1225         if (trace->sched &&
1226             perf_evlist__add_newtp(evlist, "sched", "sched_stat_runtime",
1227                                    trace__sched_stat_runtime)) {
1228                 fprintf(trace->output, "Couldn't read the sched_stat_runtime tracepoint information!\n");
1229                 goto out_delete_evlist;
1230         }
1231
1232         err = perf_evlist__create_maps(evlist, &trace->opts.target);
1233         if (err < 0) {
1234                 fprintf(trace->output, "Problems parsing the target to trace, check your options!\n");
1235                 goto out_delete_evlist;
1236         }
1237
1238         err = trace__symbols_init(trace, evlist);
1239         if (err < 0) {
1240                 fprintf(trace->output, "Problems initializing symbol libraries!\n");
1241                 goto out_delete_maps;
1242         }
1243
1244         perf_evlist__config(evlist, &trace->opts);
1245
1246         signal(SIGCHLD, sig_handler);
1247         signal(SIGINT, sig_handler);
1248
1249         if (forks) {
1250                 err = perf_evlist__prepare_workload(evlist, &trace->opts.target,
1251                                                     argv, false, false);
1252                 if (err < 0) {
1253                         fprintf(trace->output, "Couldn't run the workload!\n");
1254                         goto out_delete_maps;
1255                 }
1256         }
1257
1258         err = perf_evlist__open(evlist);
1259         if (err < 0) {
1260                 fprintf(trace->output, "Couldn't create the events: %s\n", strerror(errno));
1261                 goto out_delete_maps;
1262         }
1263
1264         err = perf_evlist__mmap(evlist, UINT_MAX, false);
1265         if (err < 0) {
1266                 fprintf(trace->output, "Couldn't mmap the events: %s\n", strerror(errno));
1267                 goto out_close_evlist;
1268         }
1269
1270         perf_evlist__enable(evlist);
1271
1272         if (forks)
1273                 perf_evlist__start_workload(evlist);
1274
1275         trace->multiple_threads = evlist->threads->map[0] == -1 || evlist->threads->nr > 1;
1276 again:
1277         before = trace->nr_events;
1278
1279         for (i = 0; i < evlist->nr_mmaps; i++) {
1280                 union perf_event *event;
1281
1282                 while ((event = perf_evlist__mmap_read(evlist, i)) != NULL) {
1283                         const u32 type = event->header.type;
1284                         tracepoint_handler handler;
1285                         struct perf_sample sample;
1286
1287                         ++trace->nr_events;
1288
1289                         err = perf_evlist__parse_sample(evlist, event, &sample);
1290                         if (err) {
1291                                 fprintf(trace->output, "Can't parse sample, err = %d, skipping...\n", err);
1292                                 continue;
1293                         }
1294
1295                         if (!trace->full_time && trace->base_time == 0)
1296                                 trace->base_time = sample.time;
1297
1298                         if (type != PERF_RECORD_SAMPLE) {
1299                                 trace__process_event(trace, &trace->host, event);
1300                                 continue;
1301                         }
1302
1303                         evsel = perf_evlist__id2evsel(evlist, sample.id);
1304                         if (evsel == NULL) {
1305                                 fprintf(trace->output, "Unknown tp ID %" PRIu64 ", skipping...\n", sample.id);
1306                                 continue;
1307                         }
1308
1309                         if (sample.raw_data == NULL) {
1310                                 fprintf(trace->output, "%s sample with no payload for tid: %d, cpu %d, raw_size=%d, skipping...\n",
1311                                        perf_evsel__name(evsel), sample.tid,
1312                                        sample.cpu, sample.raw_size);
1313                                 continue;
1314                         }
1315
1316                         handler = evsel->handler.func;
1317                         handler(trace, evsel, &sample);
1318
1319                         if (done)
1320                                 goto out_unmap_evlist;
1321                 }
1322         }
1323
1324         if (trace->nr_events == before) {
1325                 if (done)
1326                         goto out_unmap_evlist;
1327
1328                 poll(evlist->pollfd, evlist->nr_fds, -1);
1329         }
1330
1331         if (done)
1332                 perf_evlist__disable(evlist);
1333
1334         goto again;
1335
1336 out_unmap_evlist:
1337         perf_evlist__munmap(evlist);
1338 out_close_evlist:
1339         perf_evlist__close(evlist);
1340 out_delete_maps:
1341         perf_evlist__delete_maps(evlist);
1342 out_delete_evlist:
1343         perf_evlist__delete(evlist);
1344 out:
1345         return err;
1346 }
1347
1348 static int trace__replay(struct trace *trace)
1349 {
1350         const struct perf_evsel_str_handler handlers[] = {
1351                 { "raw_syscalls:sys_enter",  trace__sys_enter, },
1352                 { "raw_syscalls:sys_exit",   trace__sys_exit, },
1353         };
1354
1355         struct perf_session *session;
1356         int err = -1;
1357
1358         trace->tool.sample        = trace__process_sample;
1359         trace->tool.mmap          = perf_event__process_mmap;
1360         trace->tool.mmap2         = perf_event__process_mmap2;
1361         trace->tool.comm          = perf_event__process_comm;
1362         trace->tool.exit          = perf_event__process_exit;
1363         trace->tool.fork          = perf_event__process_fork;
1364         trace->tool.attr          = perf_event__process_attr;
1365         trace->tool.tracing_data = perf_event__process_tracing_data;
1366         trace->tool.build_id      = perf_event__process_build_id;
1367
1368         trace->tool.ordered_samples = true;
1369         trace->tool.ordering_requires_timestamps = true;
1370
1371         /* add tid to output */
1372         trace->multiple_threads = true;
1373
1374         if (symbol__init() < 0)
1375                 return -1;
1376
1377         session = perf_session__new(input_name, O_RDONLY, 0, false,
1378                                     &trace->tool);
1379         if (session == NULL)
1380                 return -ENOMEM;
1381
1382         err = perf_session__set_tracepoints_handlers(session, handlers);
1383         if (err)
1384                 goto out;
1385
1386         if (!perf_session__has_tp(session, "raw_syscalls:sys_enter")) {
1387                 pr_err("Data file does not have raw_syscalls:sys_enter events\n");
1388                 goto out;
1389         }
1390
1391         if (!perf_session__has_tp(session, "raw_syscalls:sys_exit")) {
1392                 pr_err("Data file does not have raw_syscalls:sys_exit events\n");
1393                 goto out;
1394         }
1395
1396         err = parse_target_str(trace);
1397         if (err != 0)
1398                 goto out;
1399
1400         setup_pager();
1401
1402         err = perf_session__process_events(session, &trace->tool);
1403         if (err)
1404                 pr_err("Failed to process events, error %d", err);
1405
1406 out:
1407         perf_session__delete(session);
1408
1409         return err;
1410 }
1411
1412 static size_t trace__fprintf_threads_header(FILE *fp)
1413 {
1414         size_t printed;
1415
1416         printed  = fprintf(fp, "\n _____________________________________________________________________\n");
1417         printed += fprintf(fp," __)    Summary of events    (__\n\n");
1418         printed += fprintf(fp,"              [ task - pid ]     [ events ] [ ratio ]  [ runtime ]\n");
1419         printed += fprintf(fp," _____________________________________________________________________\n\n");
1420
1421         return printed;
1422 }
1423
1424 static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp)
1425 {
1426         size_t printed = trace__fprintf_threads_header(fp);
1427         struct rb_node *nd;
1428
1429         for (nd = rb_first(&trace->host.threads); nd; nd = rb_next(nd)) {
1430                 struct thread *thread = rb_entry(nd, struct thread, rb_node);
1431                 struct thread_trace *ttrace = thread->priv;
1432                 const char *color;
1433                 double ratio;
1434
1435                 if (ttrace == NULL)
1436                         continue;
1437
1438                 ratio = (double)ttrace->nr_events / trace->nr_events * 100.0;
1439
1440                 color = PERF_COLOR_NORMAL;
1441                 if (ratio > 50.0)
1442                         color = PERF_COLOR_RED;
1443                 else if (ratio > 25.0)
1444                         color = PERF_COLOR_GREEN;
1445                 else if (ratio > 5.0)
1446                         color = PERF_COLOR_YELLOW;
1447
1448                 printed += color_fprintf(fp, color, "%20s", thread->comm);
1449                 printed += fprintf(fp, " - %-5d :%11lu   [", thread->tid, ttrace->nr_events);
1450                 printed += color_fprintf(fp, color, "%5.1f%%", ratio);
1451                 printed += fprintf(fp, " ] %10.3f ms\n", ttrace->runtime_ms);
1452         }
1453
1454         return printed;
1455 }
1456
1457 static int trace__set_duration(const struct option *opt, const char *str,
1458                                int unset __maybe_unused)
1459 {
1460         struct trace *trace = opt->value;
1461
1462         trace->duration_filter = atof(str);
1463         return 0;
1464 }
1465
1466 static int trace__open_output(struct trace *trace, const char *filename)
1467 {
1468         struct stat st;
1469
1470         if (!stat(filename, &st) && st.st_size) {
1471                 char oldname[PATH_MAX];
1472
1473                 scnprintf(oldname, sizeof(oldname), "%s.old", filename);
1474                 unlink(oldname);
1475                 rename(filename, oldname);
1476         }
1477
1478         trace->output = fopen(filename, "w");
1479
1480         return trace->output == NULL ? -errno : 0;
1481 }
1482
1483 int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused)
1484 {
1485         const char * const trace_usage[] = {
1486                 "perf trace [<options>] [<command>]",
1487                 "perf trace [<options>] -- <command> [<options>]",
1488                 NULL
1489         };
1490         struct trace trace = {
1491                 .audit_machine = audit_detect_machine(),
1492                 .syscalls = {
1493                         . max = -1,
1494                 },
1495                 .opts = {
1496                         .target = {
1497                                 .uid       = UINT_MAX,
1498                                 .uses_mmap = true,
1499                         },
1500                         .user_freq     = UINT_MAX,
1501                         .user_interval = ULLONG_MAX,
1502                         .no_delay      = true,
1503                         .mmap_pages    = 1024,
1504                 },
1505                 .output = stdout,
1506         };
1507         const char *output_name = NULL;
1508         const char *ev_qualifier_str = NULL;
1509         const struct option trace_options[] = {
1510         OPT_STRING('e', "expr", &ev_qualifier_str, "expr",
1511                     "list of events to trace"),
1512         OPT_STRING('o', "output", &output_name, "file", "output file name"),
1513         OPT_STRING('i', "input", &input_name, "file", "Analyze events in file"),
1514         OPT_STRING('p', "pid", &trace.opts.target.pid, "pid",
1515                     "trace events on existing process id"),
1516         OPT_STRING('t', "tid", &trace.opts.target.tid, "tid",
1517                     "trace events on existing thread id"),
1518         OPT_BOOLEAN('a', "all-cpus", &trace.opts.target.system_wide,
1519                     "system-wide collection from all CPUs"),
1520         OPT_STRING('C', "cpu", &trace.opts.target.cpu_list, "cpu",
1521                     "list of cpus to monitor"),
1522         OPT_BOOLEAN(0, "no-inherit", &trace.opts.no_inherit,
1523                     "child tasks do not inherit counters"),
1524         OPT_UINTEGER('m', "mmap-pages", &trace.opts.mmap_pages,
1525                      "number of mmap data pages"),
1526         OPT_STRING('u', "uid", &trace.opts.target.uid_str, "user",
1527                    "user to profile"),
1528         OPT_CALLBACK(0, "duration", &trace, "float",
1529                      "show only events with duration > N.M ms",
1530                      trace__set_duration),
1531         OPT_BOOLEAN(0, "sched", &trace.sched, "show blocking scheduler events"),
1532         OPT_INCR('v', "verbose", &verbose, "be more verbose"),
1533         OPT_BOOLEAN('T', "time", &trace.full_time,
1534                     "Show full timestamp, not time relative to first start"),
1535         OPT_END()
1536         };
1537         int err;
1538         char bf[BUFSIZ];
1539
1540         argc = parse_options(argc, argv, trace_options, trace_usage, 0);
1541
1542         if (output_name != NULL) {
1543                 err = trace__open_output(&trace, output_name);
1544                 if (err < 0) {
1545                         perror("failed to create output file");
1546                         goto out;
1547                 }
1548         }
1549
1550         if (ev_qualifier_str != NULL) {
1551                 const char *s = ev_qualifier_str;
1552
1553                 trace.not_ev_qualifier = *s == '!';
1554                 if (trace.not_ev_qualifier)
1555                         ++s;
1556                 trace.ev_qualifier = strlist__new(true, s);
1557                 if (trace.ev_qualifier == NULL) {
1558                         fputs("Not enough memory to parse event qualifier",
1559                               trace.output);
1560                         err = -ENOMEM;
1561                         goto out_close;
1562                 }
1563         }
1564
1565         err = perf_target__validate(&trace.opts.target);
1566         if (err) {
1567                 perf_target__strerror(&trace.opts.target, err, bf, sizeof(bf));
1568                 fprintf(trace.output, "%s", bf);
1569                 goto out_close;
1570         }
1571
1572         err = perf_target__parse_uid(&trace.opts.target);
1573         if (err) {
1574                 perf_target__strerror(&trace.opts.target, err, bf, sizeof(bf));
1575                 fprintf(trace.output, "%s", bf);
1576                 goto out_close;
1577         }
1578
1579         if (!argc && perf_target__none(&trace.opts.target))
1580                 trace.opts.target.system_wide = true;
1581
1582         if (input_name)
1583                 err = trace__replay(&trace);
1584         else
1585                 err = trace__run(&trace, argc, argv);
1586
1587         if (trace.sched && !err)
1588                 trace__fprintf_thread_summary(&trace, trace.output);
1589
1590 out_close:
1591         if (output_name != NULL)
1592                 fclose(trace.output);
1593 out:
1594         return err;
1595 }