perf trace: Beautify send/recv syscall 'flags' arg
[platform/adaptation/renesas_rcar/renesas_kernel.git] / tools / perf / builtin-trace.c
1 #include <traceevent/event-parse.h>
2 #include "builtin.h"
3 #include "util/color.h"
4 #include "util/debug.h"
5 #include "util/evlist.h"
6 #include "util/machine.h"
7 #include "util/session.h"
8 #include "util/thread.h"
9 #include "util/parse-options.h"
10 #include "util/strlist.h"
11 #include "util/intlist.h"
12 #include "util/thread_map.h"
13
14 #include <libaudit.h>
15 #include <stdlib.h>
16 #include <sys/mman.h>
17 #include <linux/futex.h>
18
19 /* For older distros: */
20 #ifndef MAP_STACK
21 # define MAP_STACK              0x20000
22 #endif
23
24 #ifndef MADV_HWPOISON
25 # define MADV_HWPOISON          100
26 #endif
27
28 #ifndef MADV_MERGEABLE
29 # define MADV_MERGEABLE         12
30 #endif
31
32 #ifndef MADV_UNMERGEABLE
33 # define MADV_UNMERGEABLE       13
34 #endif
35
36 struct syscall_arg {
37         unsigned long val;
38         void          *parm;
39         u8            idx;
40         u8            mask;
41 };
42
43 struct strarray {
44         int         nr_entries;
45         const char **entries;
46 };
47
48 #define DEFINE_STRARRAY(array) struct strarray strarray__##array = { \
49         .nr_entries = ARRAY_SIZE(array), \
50         .entries = array, \
51 }
52
53 static size_t syscall_arg__scnprintf_strarray(char *bf, size_t size,
54                                               struct syscall_arg *arg)
55 {
56         int idx = arg->val;
57         struct strarray *sa = arg->parm;
58
59         if (idx < 0 || idx >= sa->nr_entries)
60                 return scnprintf(bf, size, "%d", idx);
61
62         return scnprintf(bf, size, "%s", sa->entries[idx]);
63 }
64
65 #define SCA_STRARRAY syscall_arg__scnprintf_strarray
66
67 static size_t syscall_arg__scnprintf_hex(char *bf, size_t size,
68                                          struct syscall_arg *arg)
69 {
70         return scnprintf(bf, size, "%#lx", arg->val);
71 }
72
73 #define SCA_HEX syscall_arg__scnprintf_hex
74
75 static size_t syscall_arg__scnprintf_mmap_prot(char *bf, size_t size,
76                                                struct syscall_arg *arg)
77 {
78         int printed = 0, prot = arg->val;
79
80         if (prot == PROT_NONE)
81                 return scnprintf(bf, size, "NONE");
82 #define P_MMAP_PROT(n) \
83         if (prot & PROT_##n) { \
84                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
85                 prot &= ~PROT_##n; \
86         }
87
88         P_MMAP_PROT(EXEC);
89         P_MMAP_PROT(READ);
90         P_MMAP_PROT(WRITE);
91 #ifdef PROT_SEM
92         P_MMAP_PROT(SEM);
93 #endif
94         P_MMAP_PROT(GROWSDOWN);
95         P_MMAP_PROT(GROWSUP);
96 #undef P_MMAP_PROT
97
98         if (prot)
99                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", prot);
100
101         return printed;
102 }
103
104 #define SCA_MMAP_PROT syscall_arg__scnprintf_mmap_prot
105
106 static size_t syscall_arg__scnprintf_mmap_flags(char *bf, size_t size,
107                                                 struct syscall_arg *arg)
108 {
109         int printed = 0, flags = arg->val;
110
111 #define P_MMAP_FLAG(n) \
112         if (flags & MAP_##n) { \
113                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
114                 flags &= ~MAP_##n; \
115         }
116
117         P_MMAP_FLAG(SHARED);
118         P_MMAP_FLAG(PRIVATE);
119 #ifdef MAP_32BIT
120         P_MMAP_FLAG(32BIT);
121 #endif
122         P_MMAP_FLAG(ANONYMOUS);
123         P_MMAP_FLAG(DENYWRITE);
124         P_MMAP_FLAG(EXECUTABLE);
125         P_MMAP_FLAG(FILE);
126         P_MMAP_FLAG(FIXED);
127         P_MMAP_FLAG(GROWSDOWN);
128 #ifdef MAP_HUGETLB
129         P_MMAP_FLAG(HUGETLB);
130 #endif
131         P_MMAP_FLAG(LOCKED);
132         P_MMAP_FLAG(NONBLOCK);
133         P_MMAP_FLAG(NORESERVE);
134         P_MMAP_FLAG(POPULATE);
135         P_MMAP_FLAG(STACK);
136 #ifdef MAP_UNINITIALIZED
137         P_MMAP_FLAG(UNINITIALIZED);
138 #endif
139 #undef P_MMAP_FLAG
140
141         if (flags)
142                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
143
144         return printed;
145 }
146
147 #define SCA_MMAP_FLAGS syscall_arg__scnprintf_mmap_flags
148
149 static size_t syscall_arg__scnprintf_madvise_behavior(char *bf, size_t size,
150                                                       struct syscall_arg *arg)
151 {
152         int behavior = arg->val;
153
154         switch (behavior) {
155 #define P_MADV_BHV(n) case MADV_##n: return scnprintf(bf, size, #n)
156         P_MADV_BHV(NORMAL);
157         P_MADV_BHV(RANDOM);
158         P_MADV_BHV(SEQUENTIAL);
159         P_MADV_BHV(WILLNEED);
160         P_MADV_BHV(DONTNEED);
161         P_MADV_BHV(REMOVE);
162         P_MADV_BHV(DONTFORK);
163         P_MADV_BHV(DOFORK);
164         P_MADV_BHV(HWPOISON);
165 #ifdef MADV_SOFT_OFFLINE
166         P_MADV_BHV(SOFT_OFFLINE);
167 #endif
168         P_MADV_BHV(MERGEABLE);
169         P_MADV_BHV(UNMERGEABLE);
170 #ifdef MADV_HUGEPAGE
171         P_MADV_BHV(HUGEPAGE);
172 #endif
173 #ifdef MADV_NOHUGEPAGE
174         P_MADV_BHV(NOHUGEPAGE);
175 #endif
176 #ifdef MADV_DONTDUMP
177         P_MADV_BHV(DONTDUMP);
178 #endif
179 #ifdef MADV_DODUMP
180         P_MADV_BHV(DODUMP);
181 #endif
182 #undef P_MADV_PHV
183         default: break;
184         }
185
186         return scnprintf(bf, size, "%#x", behavior);
187 }
188
189 #define SCA_MADV_BHV syscall_arg__scnprintf_madvise_behavior
190
191 static size_t syscall_arg__scnprintf_futex_op(char *bf, size_t size, struct syscall_arg *arg)
192 {
193         enum syscall_futex_args {
194                 SCF_UADDR   = (1 << 0),
195                 SCF_OP      = (1 << 1),
196                 SCF_VAL     = (1 << 2),
197                 SCF_TIMEOUT = (1 << 3),
198                 SCF_UADDR2  = (1 << 4),
199                 SCF_VAL3    = (1 << 5),
200         };
201         int op = arg->val;
202         int cmd = op & FUTEX_CMD_MASK;
203         size_t printed = 0;
204
205         switch (cmd) {
206 #define P_FUTEX_OP(n) case FUTEX_##n: printed = scnprintf(bf, size, #n);
207         P_FUTEX_OP(WAIT);           arg->mask |= SCF_VAL3|SCF_UADDR2;             break;
208         P_FUTEX_OP(WAKE);           arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
209         P_FUTEX_OP(FD);             arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
210         P_FUTEX_OP(REQUEUE);        arg->mask |= SCF_VAL3|SCF_TIMEOUT;            break;
211         P_FUTEX_OP(CMP_REQUEUE);    arg->mask |= SCF_TIMEOUT;                     break;
212         P_FUTEX_OP(CMP_REQUEUE_PI); arg->mask |= SCF_TIMEOUT;                     break;
213         P_FUTEX_OP(WAKE_OP);                                                      break;
214         P_FUTEX_OP(LOCK_PI);        arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
215         P_FUTEX_OP(UNLOCK_PI);      arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
216         P_FUTEX_OP(TRYLOCK_PI);     arg->mask |= SCF_VAL3|SCF_UADDR2;             break;
217         P_FUTEX_OP(WAIT_BITSET);    arg->mask |= SCF_UADDR2;                      break;
218         P_FUTEX_OP(WAKE_BITSET);    arg->mask |= SCF_UADDR2;                      break;
219         P_FUTEX_OP(WAIT_REQUEUE_PI);                                              break;
220         default: printed = scnprintf(bf, size, "%#x", cmd);                       break;
221         }
222
223         if (op & FUTEX_PRIVATE_FLAG)
224                 printed += scnprintf(bf + printed, size - printed, "|PRIV");
225
226         if (op & FUTEX_CLOCK_REALTIME)
227                 printed += scnprintf(bf + printed, size - printed, "|CLKRT");
228
229         return printed;
230 }
231
232 #define SCA_FUTEX_OP  syscall_arg__scnprintf_futex_op
233
234 static const char *itimers[] = { "REAL", "VIRTUAL", "PROF", };
235 static DEFINE_STRARRAY(itimers);
236
237 static const char *whences[] = { "SET", "CUR", "END",
238 #ifdef SEEK_DATA
239 "DATA",
240 #endif
241 #ifdef SEEK_HOLE
242 "HOLE",
243 #endif
244 };
245 static DEFINE_STRARRAY(whences);
246
247 static const char *fcntl_cmds[] = {
248         "DUPFD", "GETFD", "SETFD", "GETFL", "SETFL", "GETLK", "SETLK",
249         "SETLKW", "SETOWN", "GETOWN", "SETSIG", "GETSIG", "F_GETLK64",
250         "F_SETLK64", "F_SETLKW64", "F_SETOWN_EX", "F_GETOWN_EX",
251         "F_GETOWNER_UIDS",
252 };
253 static DEFINE_STRARRAY(fcntl_cmds);
254
255 static const char *rlimit_resources[] = {
256         "CPU", "FSIZE", "DATA", "STACK", "CORE", "RSS", "NPROC", "NOFILE",
257         "MEMLOCK", "AS", "LOCKS", "SIGPENDING", "MSGQUEUE", "NICE", "RTPRIO",
258         "RTTIME",
259 };
260 static DEFINE_STRARRAY(rlimit_resources);
261
262 static const char *sighow[] = { "BLOCK", "UNBLOCK", "SETMASK", };
263 static DEFINE_STRARRAY(sighow);
264
265 static const char *socket_families[] = {
266         "UNSPEC", "LOCAL", "INET", "AX25", "IPX", "APPLETALK", "NETROM",
267         "BRIDGE", "ATMPVC", "X25", "INET6", "ROSE", "DECnet", "NETBEUI",
268         "SECURITY", "KEY", "NETLINK", "PACKET", "ASH", "ECONET", "ATMSVC",
269         "RDS", "SNA", "IRDA", "PPPOX", "WANPIPE", "LLC", "IB", "CAN", "TIPC",
270         "BLUETOOTH", "IUCV", "RXRPC", "ISDN", "PHONET", "IEEE802154", "CAIF",
271         "ALG", "NFC", "VSOCK",
272 };
273 static DEFINE_STRARRAY(socket_families);
274
275 #ifndef SOCK_TYPE_MASK
276 #define SOCK_TYPE_MASK 0xf
277 #endif
278
279 static size_t syscall_arg__scnprintf_socket_type(char *bf, size_t size,
280                                                       struct syscall_arg *arg)
281 {
282         size_t printed;
283         int type = arg->val,
284             flags = type & ~SOCK_TYPE_MASK;
285
286         type &= SOCK_TYPE_MASK;
287         /*
288          * Can't use a strarray, MIPS may override for ABI reasons.
289          */
290         switch (type) {
291 #define P_SK_TYPE(n) case SOCK_##n: printed = scnprintf(bf, size, #n); break;
292         P_SK_TYPE(STREAM);
293         P_SK_TYPE(DGRAM);
294         P_SK_TYPE(RAW);
295         P_SK_TYPE(RDM);
296         P_SK_TYPE(SEQPACKET);
297         P_SK_TYPE(DCCP);
298         P_SK_TYPE(PACKET);
299 #undef P_SK_TYPE
300         default:
301                 printed = scnprintf(bf, size, "%#x", type);
302         }
303
304 #define P_SK_FLAG(n) \
305         if (flags & SOCK_##n) { \
306                 printed += scnprintf(bf + printed, size - printed, "|%s", #n); \
307                 flags &= ~SOCK_##n; \
308         }
309
310         P_SK_FLAG(CLOEXEC);
311         P_SK_FLAG(NONBLOCK);
312 #undef P_SK_FLAG
313
314         if (flags)
315                 printed += scnprintf(bf + printed, size - printed, "|%#x", flags);
316
317         return printed;
318 }
319
320 #define SCA_SK_TYPE syscall_arg__scnprintf_socket_type
321
322 #ifndef MSG_PROBE
323 #define MSG_PROBE            0x10
324 #endif
325 #ifndef MSG_SENDPAGE_NOTLAST
326 #define MSG_SENDPAGE_NOTLAST 0x20000
327 #endif
328 #ifndef MSG_FASTOPEN
329 #define MSG_FASTOPEN         0x20000000
330 #endif
331
332 static size_t syscall_arg__scnprintf_msg_flags(char *bf, size_t size,
333                                                struct syscall_arg *arg)
334 {
335         int printed = 0, flags = arg->val;
336
337         if (flags == 0)
338                 return scnprintf(bf, size, "NONE");
339 #define P_MSG_FLAG(n) \
340         if (flags & MSG_##n) { \
341                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
342                 flags &= ~MSG_##n; \
343         }
344
345         P_MSG_FLAG(OOB);
346         P_MSG_FLAG(PEEK);
347         P_MSG_FLAG(DONTROUTE);
348         P_MSG_FLAG(TRYHARD);
349         P_MSG_FLAG(CTRUNC);
350         P_MSG_FLAG(PROBE);
351         P_MSG_FLAG(TRUNC);
352         P_MSG_FLAG(DONTWAIT);
353         P_MSG_FLAG(EOR);
354         P_MSG_FLAG(WAITALL);
355         P_MSG_FLAG(FIN);
356         P_MSG_FLAG(SYN);
357         P_MSG_FLAG(CONFIRM);
358         P_MSG_FLAG(RST);
359         P_MSG_FLAG(ERRQUEUE);
360         P_MSG_FLAG(NOSIGNAL);
361         P_MSG_FLAG(MORE);
362         P_MSG_FLAG(WAITFORONE);
363         P_MSG_FLAG(SENDPAGE_NOTLAST);
364         P_MSG_FLAG(FASTOPEN);
365         P_MSG_FLAG(CMSG_CLOEXEC);
366 #undef P_MSG_FLAG
367
368         if (flags)
369                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
370
371         return printed;
372 }
373
374 #define SCA_MSG_FLAGS syscall_arg__scnprintf_msg_flags
375
376 static size_t syscall_arg__scnprintf_access_mode(char *bf, size_t size,
377                                                  struct syscall_arg *arg)
378 {
379         size_t printed = 0;
380         int mode = arg->val;
381
382         if (mode == F_OK) /* 0 */
383                 return scnprintf(bf, size, "F");
384 #define P_MODE(n) \
385         if (mode & n##_OK) { \
386                 printed += scnprintf(bf + printed, size - printed, "%s", #n); \
387                 mode &= ~n##_OK; \
388         }
389
390         P_MODE(R);
391         P_MODE(W);
392         P_MODE(X);
393 #undef P_MODE
394
395         if (mode)
396                 printed += scnprintf(bf + printed, size - printed, "|%#x", mode);
397
398         return printed;
399 }
400
401 #define SCA_ACCMODE syscall_arg__scnprintf_access_mode
402
403 static size_t syscall_arg__scnprintf_open_flags(char *bf, size_t size,
404                                                struct syscall_arg *arg)
405 {
406         int printed = 0, flags = arg->val;
407
408         if (!(flags & O_CREAT))
409                 arg->mask |= 1 << (arg->idx + 1); /* Mask the mode parm */
410
411         if (flags == 0)
412                 return scnprintf(bf, size, "RDONLY");
413 #define P_FLAG(n) \
414         if (flags & O_##n) { \
415                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
416                 flags &= ~O_##n; \
417         }
418
419         P_FLAG(APPEND);
420         P_FLAG(ASYNC);
421         P_FLAG(CLOEXEC);
422         P_FLAG(CREAT);
423         P_FLAG(DIRECT);
424         P_FLAG(DIRECTORY);
425         P_FLAG(EXCL);
426         P_FLAG(LARGEFILE);
427         P_FLAG(NOATIME);
428         P_FLAG(NOCTTY);
429 #ifdef O_NONBLOCK
430         P_FLAG(NONBLOCK);
431 #elif O_NDELAY
432         P_FLAG(NDELAY);
433 #endif
434 #ifdef O_PATH
435         P_FLAG(PATH);
436 #endif
437         P_FLAG(RDWR);
438 #ifdef O_DSYNC
439         if ((flags & O_SYNC) == O_SYNC)
440                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", "SYNC");
441         else {
442                 P_FLAG(DSYNC);
443         }
444 #else
445         P_FLAG(SYNC);
446 #endif
447         P_FLAG(TRUNC);
448         P_FLAG(WRONLY);
449 #undef P_FLAG
450
451         if (flags)
452                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
453
454         return printed;
455 }
456
457 #define SCA_OPEN_FLAGS syscall_arg__scnprintf_open_flags
458
459 static size_t syscall_arg__scnprintf_signum(char *bf, size_t size, struct syscall_arg *arg)
460 {
461         int sig = arg->val;
462
463         switch (sig) {
464 #define P_SIGNUM(n) case SIG##n: return scnprintf(bf, size, #n)
465         P_SIGNUM(HUP);
466         P_SIGNUM(INT);
467         P_SIGNUM(QUIT);
468         P_SIGNUM(ILL);
469         P_SIGNUM(TRAP);
470         P_SIGNUM(ABRT);
471         P_SIGNUM(BUS);
472         P_SIGNUM(FPE);
473         P_SIGNUM(KILL);
474         P_SIGNUM(USR1);
475         P_SIGNUM(SEGV);
476         P_SIGNUM(USR2);
477         P_SIGNUM(PIPE);
478         P_SIGNUM(ALRM);
479         P_SIGNUM(TERM);
480         P_SIGNUM(STKFLT);
481         P_SIGNUM(CHLD);
482         P_SIGNUM(CONT);
483         P_SIGNUM(STOP);
484         P_SIGNUM(TSTP);
485         P_SIGNUM(TTIN);
486         P_SIGNUM(TTOU);
487         P_SIGNUM(URG);
488         P_SIGNUM(XCPU);
489         P_SIGNUM(XFSZ);
490         P_SIGNUM(VTALRM);
491         P_SIGNUM(PROF);
492         P_SIGNUM(WINCH);
493         P_SIGNUM(IO);
494         P_SIGNUM(PWR);
495         P_SIGNUM(SYS);
496         default: break;
497         }
498
499         return scnprintf(bf, size, "%#x", sig);
500 }
501
502 #define SCA_SIGNUM syscall_arg__scnprintf_signum
503
504 static struct syscall_fmt {
505         const char *name;
506         const char *alias;
507         size_t     (*arg_scnprintf[6])(char *bf, size_t size, struct syscall_arg *arg);
508         void       *arg_parm[6];
509         bool       errmsg;
510         bool       timeout;
511         bool       hexret;
512 } syscall_fmts[] = {
513         { .name     = "access",     .errmsg = true,
514           .arg_scnprintf = { [1] = SCA_ACCMODE, /* mode */ }, },
515         { .name     = "arch_prctl", .errmsg = true, .alias = "prctl", },
516         { .name     = "brk",        .hexret = true,
517           .arg_scnprintf = { [0] = SCA_HEX, /* brk */ }, },
518         { .name     = "connect",    .errmsg = true, },
519         { .name     = "fcntl",      .errmsg = true,
520           .arg_scnprintf = { [1] = SCA_STRARRAY, /* cmd */ },
521           .arg_parm      = { [1] = &strarray__fcntl_cmds, /* cmd */ }, },
522         { .name     = "fstat",      .errmsg = true, .alias = "newfstat", },
523         { .name     = "fstatat",    .errmsg = true, .alias = "newfstatat", },
524         { .name     = "futex",      .errmsg = true,
525           .arg_scnprintf = { [1] = SCA_FUTEX_OP, /* op */ }, },
526         { .name     = "getitimer",  .errmsg = true,
527           .arg_scnprintf = { [0] = SCA_STRARRAY, /* which */ },
528           .arg_parm      = { [0] = &strarray__itimers, /* which */ }, },
529         { .name     = "getrlimit",  .errmsg = true,
530           .arg_scnprintf = { [0] = SCA_STRARRAY, /* resource */ },
531           .arg_parm      = { [0] = &strarray__rlimit_resources, /* resource */ }, },
532         { .name     = "ioctl",      .errmsg = true,
533           .arg_scnprintf = { [2] = SCA_HEX, /* arg */ }, },
534         { .name     = "kill",       .errmsg = true,
535           .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
536         { .name     = "lseek",      .errmsg = true,
537           .arg_scnprintf = { [2] = SCA_STRARRAY, /* whence */ },
538           .arg_parm      = { [2] = &strarray__whences, /* whence */ }, },
539         { .name     = "lstat",      .errmsg = true, .alias = "newlstat", },
540         { .name     = "madvise",    .errmsg = true,
541           .arg_scnprintf = { [0] = SCA_HEX,      /* start */
542                              [2] = SCA_MADV_BHV, /* behavior */ }, },
543         { .name     = "mmap",       .hexret = true,
544           .arg_scnprintf = { [0] = SCA_HEX,       /* addr */
545                              [2] = SCA_MMAP_PROT, /* prot */
546                              [3] = SCA_MMAP_FLAGS, /* flags */ }, },
547         { .name     = "mprotect",   .errmsg = true,
548           .arg_scnprintf = { [0] = SCA_HEX, /* start */
549                              [2] = SCA_MMAP_PROT, /* prot */ }, },
550         { .name     = "mremap",     .hexret = true,
551           .arg_scnprintf = { [0] = SCA_HEX, /* addr */
552                              [4] = SCA_HEX, /* new_addr */ }, },
553         { .name     = "munmap",     .errmsg = true,
554           .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
555         { .name     = "open",       .errmsg = true,
556           .arg_scnprintf = { [1] = SCA_OPEN_FLAGS, /* flags */ }, },
557         { .name     = "open_by_handle_at", .errmsg = true,
558           .arg_scnprintf = { [2] = SCA_OPEN_FLAGS, /* flags */ }, },
559         { .name     = "openat",     .errmsg = true,
560           .arg_scnprintf = { [2] = SCA_OPEN_FLAGS, /* flags */ }, },
561         { .name     = "poll",       .errmsg = true, .timeout = true, },
562         { .name     = "ppoll",      .errmsg = true, .timeout = true, },
563         { .name     = "pread",      .errmsg = true, .alias = "pread64", },
564         { .name     = "prlimit64",  .errmsg = true,
565           .arg_scnprintf = { [1] = SCA_STRARRAY, /* resource */ },
566           .arg_parm      = { [1] = &strarray__rlimit_resources, /* resource */ }, },
567         { .name     = "pwrite",     .errmsg = true, .alias = "pwrite64", },
568         { .name     = "read",       .errmsg = true, },
569         { .name     = "recvfrom",   .errmsg = true,
570           .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
571         { .name     = "recvmmsg",   .errmsg = true,
572           .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
573         { .name     = "recvmsg",    .errmsg = true,
574           .arg_scnprintf = { [2] = SCA_MSG_FLAGS, /* flags */ }, },
575         { .name     = "rt_sigaction", .errmsg = true,
576           .arg_scnprintf = { [0] = SCA_SIGNUM, /* sig */ }, },
577         { .name     = "rt_sigprocmask", .errmsg = true,
578           .arg_scnprintf = { [0] = SCA_STRARRAY, /* how */ },
579           .arg_parm      = { [0] = &strarray__sighow, /* how */ }, },
580         { .name     = "rt_sigqueueinfo", .errmsg = true,
581           .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
582         { .name     = "rt_tgsigqueueinfo", .errmsg = true,
583           .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, },
584         { .name     = "select",     .errmsg = true, .timeout = true, },
585         { .name     = "sendmmsg",    .errmsg = true,
586           .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
587         { .name     = "sendmsg",    .errmsg = true,
588           .arg_scnprintf = { [2] = SCA_MSG_FLAGS, /* flags */ }, },
589         { .name     = "sendto",     .errmsg = true,
590           .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
591         { .name     = "setitimer",  .errmsg = true,
592           .arg_scnprintf = { [0] = SCA_STRARRAY, /* which */ },
593           .arg_parm      = { [0] = &strarray__itimers, /* which */ }, },
594         { .name     = "setrlimit",  .errmsg = true,
595           .arg_scnprintf = { [0] = SCA_STRARRAY, /* resource */ },
596           .arg_parm      = { [0] = &strarray__rlimit_resources, /* resource */ }, },
597         { .name     = "socket",     .errmsg = true,
598           .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */
599                              [1] = SCA_SK_TYPE, /* type */ },
600           .arg_parm      = { [0] = &strarray__socket_families, /* family */ }, },
601         { .name     = "stat",       .errmsg = true, .alias = "newstat", },
602         { .name     = "tgkill",     .errmsg = true,
603           .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, },
604         { .name     = "tkill",      .errmsg = true,
605           .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
606         { .name     = "uname",      .errmsg = true, .alias = "newuname", },
607 };
608
609 static int syscall_fmt__cmp(const void *name, const void *fmtp)
610 {
611         const struct syscall_fmt *fmt = fmtp;
612         return strcmp(name, fmt->name);
613 }
614
615 static struct syscall_fmt *syscall_fmt__find(const char *name)
616 {
617         const int nmemb = ARRAY_SIZE(syscall_fmts);
618         return bsearch(name, syscall_fmts, nmemb, sizeof(struct syscall_fmt), syscall_fmt__cmp);
619 }
620
621 struct syscall {
622         struct event_format *tp_format;
623         const char          *name;
624         bool                filtered;
625         struct syscall_fmt  *fmt;
626         size_t              (**arg_scnprintf)(char *bf, size_t size, struct syscall_arg *arg);
627         void                **arg_parm;
628 };
629
630 static size_t fprintf_duration(unsigned long t, FILE *fp)
631 {
632         double duration = (double)t / NSEC_PER_MSEC;
633         size_t printed = fprintf(fp, "(");
634
635         if (duration >= 1.0)
636                 printed += color_fprintf(fp, PERF_COLOR_RED, "%6.3f ms", duration);
637         else if (duration >= 0.01)
638                 printed += color_fprintf(fp, PERF_COLOR_YELLOW, "%6.3f ms", duration);
639         else
640                 printed += color_fprintf(fp, PERF_COLOR_NORMAL, "%6.3f ms", duration);
641         return printed + fprintf(fp, "): ");
642 }
643
644 struct thread_trace {
645         u64               entry_time;
646         u64               exit_time;
647         bool              entry_pending;
648         unsigned long     nr_events;
649         char              *entry_str;
650         double            runtime_ms;
651 };
652
653 static struct thread_trace *thread_trace__new(void)
654 {
655         return zalloc(sizeof(struct thread_trace));
656 }
657
658 static struct thread_trace *thread__trace(struct thread *thread, FILE *fp)
659 {
660         struct thread_trace *ttrace;
661
662         if (thread == NULL)
663                 goto fail;
664
665         if (thread->priv == NULL)
666                 thread->priv = thread_trace__new();
667                 
668         if (thread->priv == NULL)
669                 goto fail;
670
671         ttrace = thread->priv;
672         ++ttrace->nr_events;
673
674         return ttrace;
675 fail:
676         color_fprintf(fp, PERF_COLOR_RED,
677                       "WARNING: not enough memory, dropping samples!\n");
678         return NULL;
679 }
680
681 struct trace {
682         struct perf_tool        tool;
683         int                     audit_machine;
684         struct {
685                 int             max;
686                 struct syscall  *table;
687         } syscalls;
688         struct perf_record_opts opts;
689         struct machine          host;
690         u64                     base_time;
691         bool                    full_time;
692         FILE                    *output;
693         unsigned long           nr_events;
694         struct strlist          *ev_qualifier;
695         bool                    not_ev_qualifier;
696         struct intlist          *tid_list;
697         struct intlist          *pid_list;
698         bool                    sched;
699         bool                    multiple_threads;
700         double                  duration_filter;
701         double                  runtime_ms;
702 };
703
704 static bool trace__filter_duration(struct trace *trace, double t)
705 {
706         return t < (trace->duration_filter * NSEC_PER_MSEC);
707 }
708
709 static size_t trace__fprintf_tstamp(struct trace *trace, u64 tstamp, FILE *fp)
710 {
711         double ts = (double)(tstamp - trace->base_time) / NSEC_PER_MSEC;
712
713         return fprintf(fp, "%10.3f ", ts);
714 }
715
716 static bool done = false;
717
718 static void sig_handler(int sig __maybe_unused)
719 {
720         done = true;
721 }
722
723 static size_t trace__fprintf_entry_head(struct trace *trace, struct thread *thread,
724                                         u64 duration, u64 tstamp, FILE *fp)
725 {
726         size_t printed = trace__fprintf_tstamp(trace, tstamp, fp);
727         printed += fprintf_duration(duration, fp);
728
729         if (trace->multiple_threads)
730                 printed += fprintf(fp, "%d ", thread->tid);
731
732         return printed;
733 }
734
735 static int trace__process_event(struct trace *trace, struct machine *machine,
736                                 union perf_event *event)
737 {
738         int ret = 0;
739
740         switch (event->header.type) {
741         case PERF_RECORD_LOST:
742                 color_fprintf(trace->output, PERF_COLOR_RED,
743                               "LOST %" PRIu64 " events!\n", event->lost.lost);
744                 ret = machine__process_lost_event(machine, event);
745         default:
746                 ret = machine__process_event(machine, event);
747                 break;
748         }
749
750         return ret;
751 }
752
753 static int trace__tool_process(struct perf_tool *tool,
754                                union perf_event *event,
755                                struct perf_sample *sample __maybe_unused,
756                                struct machine *machine)
757 {
758         struct trace *trace = container_of(tool, struct trace, tool);
759         return trace__process_event(trace, machine, event);
760 }
761
762 static int trace__symbols_init(struct trace *trace, struct perf_evlist *evlist)
763 {
764         int err = symbol__init();
765
766         if (err)
767                 return err;
768
769         machine__init(&trace->host, "", HOST_KERNEL_ID);
770         machine__create_kernel_maps(&trace->host);
771
772         if (perf_target__has_task(&trace->opts.target)) {
773                 err = perf_event__synthesize_thread_map(&trace->tool, evlist->threads,
774                                                         trace__tool_process,
775                                                         &trace->host);
776         } else {
777                 err = perf_event__synthesize_threads(&trace->tool, trace__tool_process,
778                                                      &trace->host);
779         }
780
781         if (err)
782                 symbol__exit();
783
784         return err;
785 }
786
787 static int syscall__set_arg_fmts(struct syscall *sc)
788 {
789         struct format_field *field;
790         int idx = 0;
791
792         sc->arg_scnprintf = calloc(sc->tp_format->format.nr_fields - 1, sizeof(void *));
793         if (sc->arg_scnprintf == NULL)
794                 return -1;
795
796         if (sc->fmt)
797                 sc->arg_parm = sc->fmt->arg_parm;
798
799         for (field = sc->tp_format->format.fields->next; field; field = field->next) {
800                 if (sc->fmt && sc->fmt->arg_scnprintf[idx])
801                         sc->arg_scnprintf[idx] = sc->fmt->arg_scnprintf[idx];
802                 else if (field->flags & FIELD_IS_POINTER)
803                         sc->arg_scnprintf[idx] = syscall_arg__scnprintf_hex;
804                 ++idx;
805         }
806
807         return 0;
808 }
809
810 static int trace__read_syscall_info(struct trace *trace, int id)
811 {
812         char tp_name[128];
813         struct syscall *sc;
814         const char *name = audit_syscall_to_name(id, trace->audit_machine);
815
816         if (name == NULL)
817                 return -1;
818
819         if (id > trace->syscalls.max) {
820                 struct syscall *nsyscalls = realloc(trace->syscalls.table, (id + 1) * sizeof(*sc));
821
822                 if (nsyscalls == NULL)
823                         return -1;
824
825                 if (trace->syscalls.max != -1) {
826                         memset(nsyscalls + trace->syscalls.max + 1, 0,
827                                (id - trace->syscalls.max) * sizeof(*sc));
828                 } else {
829                         memset(nsyscalls, 0, (id + 1) * sizeof(*sc));
830                 }
831
832                 trace->syscalls.table = nsyscalls;
833                 trace->syscalls.max   = id;
834         }
835
836         sc = trace->syscalls.table + id;
837         sc->name = name;
838
839         if (trace->ev_qualifier) {
840                 bool in = strlist__find(trace->ev_qualifier, name) != NULL;
841
842                 if (!(in ^ trace->not_ev_qualifier)) {
843                         sc->filtered = true;
844                         /*
845                          * No need to do read tracepoint information since this will be
846                          * filtered out.
847                          */
848                         return 0;
849                 }
850         }
851
852         sc->fmt  = syscall_fmt__find(sc->name);
853
854         snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->name);
855         sc->tp_format = event_format__new("syscalls", tp_name);
856
857         if (sc->tp_format == NULL && sc->fmt && sc->fmt->alias) {
858                 snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->fmt->alias);
859                 sc->tp_format = event_format__new("syscalls", tp_name);
860         }
861
862         if (sc->tp_format == NULL)
863                 return -1;
864
865         return syscall__set_arg_fmts(sc);
866 }
867
868 static size_t syscall__scnprintf_args(struct syscall *sc, char *bf, size_t size,
869                                       unsigned long *args)
870 {
871         size_t printed = 0;
872
873         if (sc->tp_format != NULL) {
874                 struct format_field *field;
875                 u8 bit = 1;
876                 struct syscall_arg arg = {
877                         .idx  = 0,
878                         .mask = 0,
879                 };
880
881                 for (field = sc->tp_format->format.fields->next; field;
882                      field = field->next, ++arg.idx, bit <<= 1) {
883                         if (arg.mask & bit)
884                                 continue;
885
886                         if (args[arg.idx] == 0)
887                                 continue;
888
889                         printed += scnprintf(bf + printed, size - printed,
890                                              "%s%s: ", printed ? ", " : "", field->name);
891                         if (sc->arg_scnprintf && sc->arg_scnprintf[arg.idx]) {
892                                 arg.val = args[arg.idx];
893                                 if (sc->arg_parm)
894                                         arg.parm = sc->arg_parm[arg.idx];
895                                 printed += sc->arg_scnprintf[arg.idx](bf + printed,
896                                                                       size - printed, &arg);
897                         } else {
898                                 printed += scnprintf(bf + printed, size - printed,
899                                                      "%ld", args[arg.idx]);
900                         }
901                 }
902         } else {
903                 int i = 0;
904
905                 while (i < 6) {
906                         printed += scnprintf(bf + printed, size - printed,
907                                              "%sarg%d: %ld",
908                                              printed ? ", " : "", i, args[i]);
909                         ++i;
910                 }
911         }
912
913         return printed;
914 }
915
916 typedef int (*tracepoint_handler)(struct trace *trace, struct perf_evsel *evsel,
917                                   struct perf_sample *sample);
918
919 static struct syscall *trace__syscall_info(struct trace *trace,
920                                            struct perf_evsel *evsel,
921                                            struct perf_sample *sample)
922 {
923         int id = perf_evsel__intval(evsel, sample, "id");
924
925         if (id < 0) {
926
927                 /*
928                  * XXX: Noticed on x86_64, reproduced as far back as 3.0.36, haven't tried
929                  * before that, leaving at a higher verbosity level till that is
930                  * explained. Reproduced with plain ftrace with:
931                  *
932                  * echo 1 > /t/events/raw_syscalls/sys_exit/enable
933                  * grep "NR -1 " /t/trace_pipe
934                  *
935                  * After generating some load on the machine.
936                  */
937                 if (verbose > 1) {
938                         static u64 n;
939                         fprintf(trace->output, "Invalid syscall %d id, skipping (%s, %" PRIu64 ") ...\n",
940                                 id, perf_evsel__name(evsel), ++n);
941                 }
942                 return NULL;
943         }
944
945         if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL) &&
946             trace__read_syscall_info(trace, id))
947                 goto out_cant_read;
948
949         if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL))
950                 goto out_cant_read;
951
952         return &trace->syscalls.table[id];
953
954 out_cant_read:
955         if (verbose) {
956                 fprintf(trace->output, "Problems reading syscall %d", id);
957                 if (id <= trace->syscalls.max && trace->syscalls.table[id].name != NULL)
958                         fprintf(trace->output, "(%s)", trace->syscalls.table[id].name);
959                 fputs(" information\n", trace->output);
960         }
961         return NULL;
962 }
963
964 static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel,
965                             struct perf_sample *sample)
966 {
967         char *msg;
968         void *args;
969         size_t printed = 0;
970         struct thread *thread;
971         struct syscall *sc = trace__syscall_info(trace, evsel, sample);
972         struct thread_trace *ttrace;
973
974         if (sc == NULL)
975                 return -1;
976
977         if (sc->filtered)
978                 return 0;
979
980         thread = machine__findnew_thread(&trace->host, sample->pid,
981                                          sample->tid);
982         ttrace = thread__trace(thread, trace->output);
983         if (ttrace == NULL)
984                 return -1;
985
986         args = perf_evsel__rawptr(evsel, sample, "args");
987         if (args == NULL) {
988                 fprintf(trace->output, "Problems reading syscall arguments\n");
989                 return -1;
990         }
991
992         ttrace = thread->priv;
993
994         if (ttrace->entry_str == NULL) {
995                 ttrace->entry_str = malloc(1024);
996                 if (!ttrace->entry_str)
997                         return -1;
998         }
999
1000         ttrace->entry_time = sample->time;
1001         msg = ttrace->entry_str;
1002         printed += scnprintf(msg + printed, 1024 - printed, "%s(", sc->name);
1003
1004         printed += syscall__scnprintf_args(sc, msg + printed, 1024 - printed,  args);
1005
1006         if (!strcmp(sc->name, "exit_group") || !strcmp(sc->name, "exit")) {
1007                 if (!trace->duration_filter) {
1008                         trace__fprintf_entry_head(trace, thread, 1, sample->time, trace->output);
1009                         fprintf(trace->output, "%-70s\n", ttrace->entry_str);
1010                 }
1011         } else
1012                 ttrace->entry_pending = true;
1013
1014         return 0;
1015 }
1016
1017 static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel,
1018                            struct perf_sample *sample)
1019 {
1020         int ret;
1021         u64 duration = 0;
1022         struct thread *thread;
1023         struct syscall *sc = trace__syscall_info(trace, evsel, sample);
1024         struct thread_trace *ttrace;
1025
1026         if (sc == NULL)
1027                 return -1;
1028
1029         if (sc->filtered)
1030                 return 0;
1031
1032         thread = machine__findnew_thread(&trace->host, sample->pid,
1033                                          sample->tid);
1034         ttrace = thread__trace(thread, trace->output);
1035         if (ttrace == NULL)
1036                 return -1;
1037
1038         ret = perf_evsel__intval(evsel, sample, "ret");
1039
1040         ttrace = thread->priv;
1041
1042         ttrace->exit_time = sample->time;
1043
1044         if (ttrace->entry_time) {
1045                 duration = sample->time - ttrace->entry_time;
1046                 if (trace__filter_duration(trace, duration))
1047                         goto out;
1048         } else if (trace->duration_filter)
1049                 goto out;
1050
1051         trace__fprintf_entry_head(trace, thread, duration, sample->time, trace->output);
1052
1053         if (ttrace->entry_pending) {
1054                 fprintf(trace->output, "%-70s", ttrace->entry_str);
1055         } else {
1056                 fprintf(trace->output, " ... [");
1057                 color_fprintf(trace->output, PERF_COLOR_YELLOW, "continued");
1058                 fprintf(trace->output, "]: %s()", sc->name);
1059         }
1060
1061         if (sc->fmt == NULL) {
1062 signed_print:
1063                 fprintf(trace->output, ") = %d", ret);
1064         } else if (ret < 0 && sc->fmt->errmsg) {
1065                 char bf[256];
1066                 const char *emsg = strerror_r(-ret, bf, sizeof(bf)),
1067                            *e = audit_errno_to_name(-ret);
1068
1069                 fprintf(trace->output, ") = -1 %s %s", e, emsg);
1070         } else if (ret == 0 && sc->fmt->timeout)
1071                 fprintf(trace->output, ") = 0 Timeout");
1072         else if (sc->fmt->hexret)
1073                 fprintf(trace->output, ") = %#x", ret);
1074         else
1075                 goto signed_print;
1076
1077         fputc('\n', trace->output);
1078 out:
1079         ttrace->entry_pending = false;
1080
1081         return 0;
1082 }
1083
1084 static int trace__sched_stat_runtime(struct trace *trace, struct perf_evsel *evsel,
1085                                      struct perf_sample *sample)
1086 {
1087         u64 runtime = perf_evsel__intval(evsel, sample, "runtime");
1088         double runtime_ms = (double)runtime / NSEC_PER_MSEC;
1089         struct thread *thread = machine__findnew_thread(&trace->host,
1090                                                         sample->pid,
1091                                                         sample->tid);
1092         struct thread_trace *ttrace = thread__trace(thread, trace->output);
1093
1094         if (ttrace == NULL)
1095                 goto out_dump;
1096
1097         ttrace->runtime_ms += runtime_ms;
1098         trace->runtime_ms += runtime_ms;
1099         return 0;
1100
1101 out_dump:
1102         fprintf(trace->output, "%s: comm=%s,pid=%u,runtime=%" PRIu64 ",vruntime=%" PRIu64 ")\n",
1103                evsel->name,
1104                perf_evsel__strval(evsel, sample, "comm"),
1105                (pid_t)perf_evsel__intval(evsel, sample, "pid"),
1106                runtime,
1107                perf_evsel__intval(evsel, sample, "vruntime"));
1108         return 0;
1109 }
1110
1111 static bool skip_sample(struct trace *trace, struct perf_sample *sample)
1112 {
1113         if ((trace->pid_list && intlist__find(trace->pid_list, sample->pid)) ||
1114             (trace->tid_list && intlist__find(trace->tid_list, sample->tid)))
1115                 return false;
1116
1117         if (trace->pid_list || trace->tid_list)
1118                 return true;
1119
1120         return false;
1121 }
1122
1123 static int trace__process_sample(struct perf_tool *tool,
1124                                  union perf_event *event __maybe_unused,
1125                                  struct perf_sample *sample,
1126                                  struct perf_evsel *evsel,
1127                                  struct machine *machine __maybe_unused)
1128 {
1129         struct trace *trace = container_of(tool, struct trace, tool);
1130         int err = 0;
1131
1132         tracepoint_handler handler = evsel->handler.func;
1133
1134         if (skip_sample(trace, sample))
1135                 return 0;
1136
1137         if (!trace->full_time && trace->base_time == 0)
1138                 trace->base_time = sample->time;
1139
1140         if (handler)
1141                 handler(trace, evsel, sample);
1142
1143         return err;
1144 }
1145
1146 static bool
1147 perf_session__has_tp(struct perf_session *session, const char *name)
1148 {
1149         struct perf_evsel *evsel;
1150
1151         evsel = perf_evlist__find_tracepoint_by_name(session->evlist, name);
1152
1153         return evsel != NULL;
1154 }
1155
1156 static int parse_target_str(struct trace *trace)
1157 {
1158         if (trace->opts.target.pid) {
1159                 trace->pid_list = intlist__new(trace->opts.target.pid);
1160                 if (trace->pid_list == NULL) {
1161                         pr_err("Error parsing process id string\n");
1162                         return -EINVAL;
1163                 }
1164         }
1165
1166         if (trace->opts.target.tid) {
1167                 trace->tid_list = intlist__new(trace->opts.target.tid);
1168                 if (trace->tid_list == NULL) {
1169                         pr_err("Error parsing thread id string\n");
1170                         return -EINVAL;
1171                 }
1172         }
1173
1174         return 0;
1175 }
1176
1177 static int trace__run(struct trace *trace, int argc, const char **argv)
1178 {
1179         struct perf_evlist *evlist = perf_evlist__new();
1180         struct perf_evsel *evsel;
1181         int err = -1, i;
1182         unsigned long before;
1183         const bool forks = argc > 0;
1184
1185         if (evlist == NULL) {
1186                 fprintf(trace->output, "Not enough memory to run!\n");
1187                 goto out;
1188         }
1189
1190         if (perf_evlist__add_newtp(evlist, "raw_syscalls", "sys_enter", trace__sys_enter) ||
1191             perf_evlist__add_newtp(evlist, "raw_syscalls", "sys_exit", trace__sys_exit)) {
1192                 fprintf(trace->output, "Couldn't read the raw_syscalls tracepoints information!\n");
1193                 goto out_delete_evlist;
1194         }
1195
1196         if (trace->sched &&
1197             perf_evlist__add_newtp(evlist, "sched", "sched_stat_runtime",
1198                                    trace__sched_stat_runtime)) {
1199                 fprintf(trace->output, "Couldn't read the sched_stat_runtime tracepoint information!\n");
1200                 goto out_delete_evlist;
1201         }
1202
1203         err = perf_evlist__create_maps(evlist, &trace->opts.target);
1204         if (err < 0) {
1205                 fprintf(trace->output, "Problems parsing the target to trace, check your options!\n");
1206                 goto out_delete_evlist;
1207         }
1208
1209         err = trace__symbols_init(trace, evlist);
1210         if (err < 0) {
1211                 fprintf(trace->output, "Problems initializing symbol libraries!\n");
1212                 goto out_delete_maps;
1213         }
1214
1215         perf_evlist__config(evlist, &trace->opts);
1216
1217         signal(SIGCHLD, sig_handler);
1218         signal(SIGINT, sig_handler);
1219
1220         if (forks) {
1221                 err = perf_evlist__prepare_workload(evlist, &trace->opts.target,
1222                                                     argv, false, false);
1223                 if (err < 0) {
1224                         fprintf(trace->output, "Couldn't run the workload!\n");
1225                         goto out_delete_maps;
1226                 }
1227         }
1228
1229         err = perf_evlist__open(evlist);
1230         if (err < 0) {
1231                 fprintf(trace->output, "Couldn't create the events: %s\n", strerror(errno));
1232                 goto out_delete_maps;
1233         }
1234
1235         err = perf_evlist__mmap(evlist, UINT_MAX, false);
1236         if (err < 0) {
1237                 fprintf(trace->output, "Couldn't mmap the events: %s\n", strerror(errno));
1238                 goto out_close_evlist;
1239         }
1240
1241         perf_evlist__enable(evlist);
1242
1243         if (forks)
1244                 perf_evlist__start_workload(evlist);
1245
1246         trace->multiple_threads = evlist->threads->map[0] == -1 || evlist->threads->nr > 1;
1247 again:
1248         before = trace->nr_events;
1249
1250         for (i = 0; i < evlist->nr_mmaps; i++) {
1251                 union perf_event *event;
1252
1253                 while ((event = perf_evlist__mmap_read(evlist, i)) != NULL) {
1254                         const u32 type = event->header.type;
1255                         tracepoint_handler handler;
1256                         struct perf_sample sample;
1257
1258                         ++trace->nr_events;
1259
1260                         err = perf_evlist__parse_sample(evlist, event, &sample);
1261                         if (err) {
1262                                 fprintf(trace->output, "Can't parse sample, err = %d, skipping...\n", err);
1263                                 continue;
1264                         }
1265
1266                         if (!trace->full_time && trace->base_time == 0)
1267                                 trace->base_time = sample.time;
1268
1269                         if (type != PERF_RECORD_SAMPLE) {
1270                                 trace__process_event(trace, &trace->host, event);
1271                                 continue;
1272                         }
1273
1274                         evsel = perf_evlist__id2evsel(evlist, sample.id);
1275                         if (evsel == NULL) {
1276                                 fprintf(trace->output, "Unknown tp ID %" PRIu64 ", skipping...\n", sample.id);
1277                                 continue;
1278                         }
1279
1280                         if (sample.raw_data == NULL) {
1281                                 fprintf(trace->output, "%s sample with no payload for tid: %d, cpu %d, raw_size=%d, skipping...\n",
1282                                        perf_evsel__name(evsel), sample.tid,
1283                                        sample.cpu, sample.raw_size);
1284                                 continue;
1285                         }
1286
1287                         handler = evsel->handler.func;
1288                         handler(trace, evsel, &sample);
1289
1290                         if (done)
1291                                 goto out_unmap_evlist;
1292                 }
1293         }
1294
1295         if (trace->nr_events == before) {
1296                 if (done)
1297                         goto out_unmap_evlist;
1298
1299                 poll(evlist->pollfd, evlist->nr_fds, -1);
1300         }
1301
1302         if (done)
1303                 perf_evlist__disable(evlist);
1304
1305         goto again;
1306
1307 out_unmap_evlist:
1308         perf_evlist__munmap(evlist);
1309 out_close_evlist:
1310         perf_evlist__close(evlist);
1311 out_delete_maps:
1312         perf_evlist__delete_maps(evlist);
1313 out_delete_evlist:
1314         perf_evlist__delete(evlist);
1315 out:
1316         return err;
1317 }
1318
1319 static int trace__replay(struct trace *trace)
1320 {
1321         const struct perf_evsel_str_handler handlers[] = {
1322                 { "raw_syscalls:sys_enter",  trace__sys_enter, },
1323                 { "raw_syscalls:sys_exit",   trace__sys_exit, },
1324         };
1325
1326         struct perf_session *session;
1327         int err = -1;
1328
1329         trace->tool.sample        = trace__process_sample;
1330         trace->tool.mmap          = perf_event__process_mmap;
1331         trace->tool.mmap2         = perf_event__process_mmap2;
1332         trace->tool.comm          = perf_event__process_comm;
1333         trace->tool.exit          = perf_event__process_exit;
1334         trace->tool.fork          = perf_event__process_fork;
1335         trace->tool.attr          = perf_event__process_attr;
1336         trace->tool.tracing_data = perf_event__process_tracing_data;
1337         trace->tool.build_id      = perf_event__process_build_id;
1338
1339         trace->tool.ordered_samples = true;
1340         trace->tool.ordering_requires_timestamps = true;
1341
1342         /* add tid to output */
1343         trace->multiple_threads = true;
1344
1345         if (symbol__init() < 0)
1346                 return -1;
1347
1348         session = perf_session__new(input_name, O_RDONLY, 0, false,
1349                                     &trace->tool);
1350         if (session == NULL)
1351                 return -ENOMEM;
1352
1353         err = perf_session__set_tracepoints_handlers(session, handlers);
1354         if (err)
1355                 goto out;
1356
1357         if (!perf_session__has_tp(session, "raw_syscalls:sys_enter")) {
1358                 pr_err("Data file does not have raw_syscalls:sys_enter events\n");
1359                 goto out;
1360         }
1361
1362         if (!perf_session__has_tp(session, "raw_syscalls:sys_exit")) {
1363                 pr_err("Data file does not have raw_syscalls:sys_exit events\n");
1364                 goto out;
1365         }
1366
1367         err = parse_target_str(trace);
1368         if (err != 0)
1369                 goto out;
1370
1371         setup_pager();
1372
1373         err = perf_session__process_events(session, &trace->tool);
1374         if (err)
1375                 pr_err("Failed to process events, error %d", err);
1376
1377 out:
1378         perf_session__delete(session);
1379
1380         return err;
1381 }
1382
1383 static size_t trace__fprintf_threads_header(FILE *fp)
1384 {
1385         size_t printed;
1386
1387         printed  = fprintf(fp, "\n _____________________________________________________________________\n");
1388         printed += fprintf(fp," __)    Summary of events    (__\n\n");
1389         printed += fprintf(fp,"              [ task - pid ]     [ events ] [ ratio ]  [ runtime ]\n");
1390         printed += fprintf(fp," _____________________________________________________________________\n\n");
1391
1392         return printed;
1393 }
1394
1395 static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp)
1396 {
1397         size_t printed = trace__fprintf_threads_header(fp);
1398         struct rb_node *nd;
1399
1400         for (nd = rb_first(&trace->host.threads); nd; nd = rb_next(nd)) {
1401                 struct thread *thread = rb_entry(nd, struct thread, rb_node);
1402                 struct thread_trace *ttrace = thread->priv;
1403                 const char *color;
1404                 double ratio;
1405
1406                 if (ttrace == NULL)
1407                         continue;
1408
1409                 ratio = (double)ttrace->nr_events / trace->nr_events * 100.0;
1410
1411                 color = PERF_COLOR_NORMAL;
1412                 if (ratio > 50.0)
1413                         color = PERF_COLOR_RED;
1414                 else if (ratio > 25.0)
1415                         color = PERF_COLOR_GREEN;
1416                 else if (ratio > 5.0)
1417                         color = PERF_COLOR_YELLOW;
1418
1419                 printed += color_fprintf(fp, color, "%20s", thread->comm);
1420                 printed += fprintf(fp, " - %-5d :%11lu   [", thread->tid, ttrace->nr_events);
1421                 printed += color_fprintf(fp, color, "%5.1f%%", ratio);
1422                 printed += fprintf(fp, " ] %10.3f ms\n", ttrace->runtime_ms);
1423         }
1424
1425         return printed;
1426 }
1427
1428 static int trace__set_duration(const struct option *opt, const char *str,
1429                                int unset __maybe_unused)
1430 {
1431         struct trace *trace = opt->value;
1432
1433         trace->duration_filter = atof(str);
1434         return 0;
1435 }
1436
1437 static int trace__open_output(struct trace *trace, const char *filename)
1438 {
1439         struct stat st;
1440
1441         if (!stat(filename, &st) && st.st_size) {
1442                 char oldname[PATH_MAX];
1443
1444                 scnprintf(oldname, sizeof(oldname), "%s.old", filename);
1445                 unlink(oldname);
1446                 rename(filename, oldname);
1447         }
1448
1449         trace->output = fopen(filename, "w");
1450
1451         return trace->output == NULL ? -errno : 0;
1452 }
1453
1454 int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused)
1455 {
1456         const char * const trace_usage[] = {
1457                 "perf trace [<options>] [<command>]",
1458                 "perf trace [<options>] -- <command> [<options>]",
1459                 NULL
1460         };
1461         struct trace trace = {
1462                 .audit_machine = audit_detect_machine(),
1463                 .syscalls = {
1464                         . max = -1,
1465                 },
1466                 .opts = {
1467                         .target = {
1468                                 .uid       = UINT_MAX,
1469                                 .uses_mmap = true,
1470                         },
1471                         .user_freq     = UINT_MAX,
1472                         .user_interval = ULLONG_MAX,
1473                         .no_delay      = true,
1474                         .mmap_pages    = 1024,
1475                 },
1476                 .output = stdout,
1477         };
1478         const char *output_name = NULL;
1479         const char *ev_qualifier_str = NULL;
1480         const struct option trace_options[] = {
1481         OPT_STRING('e', "expr", &ev_qualifier_str, "expr",
1482                     "list of events to trace"),
1483         OPT_STRING('o', "output", &output_name, "file", "output file name"),
1484         OPT_STRING('i', "input", &input_name, "file", "Analyze events in file"),
1485         OPT_STRING('p', "pid", &trace.opts.target.pid, "pid",
1486                     "trace events on existing process id"),
1487         OPT_STRING('t', "tid", &trace.opts.target.tid, "tid",
1488                     "trace events on existing thread id"),
1489         OPT_BOOLEAN('a', "all-cpus", &trace.opts.target.system_wide,
1490                     "system-wide collection from all CPUs"),
1491         OPT_STRING('C', "cpu", &trace.opts.target.cpu_list, "cpu",
1492                     "list of cpus to monitor"),
1493         OPT_BOOLEAN(0, "no-inherit", &trace.opts.no_inherit,
1494                     "child tasks do not inherit counters"),
1495         OPT_UINTEGER('m', "mmap-pages", &trace.opts.mmap_pages,
1496                      "number of mmap data pages"),
1497         OPT_STRING('u', "uid", &trace.opts.target.uid_str, "user",
1498                    "user to profile"),
1499         OPT_CALLBACK(0, "duration", &trace, "float",
1500                      "show only events with duration > N.M ms",
1501                      trace__set_duration),
1502         OPT_BOOLEAN(0, "sched", &trace.sched, "show blocking scheduler events"),
1503         OPT_INCR('v', "verbose", &verbose, "be more verbose"),
1504         OPT_BOOLEAN('T', "time", &trace.full_time,
1505                     "Show full timestamp, not time relative to first start"),
1506         OPT_END()
1507         };
1508         int err;
1509         char bf[BUFSIZ];
1510
1511         argc = parse_options(argc, argv, trace_options, trace_usage, 0);
1512
1513         if (output_name != NULL) {
1514                 err = trace__open_output(&trace, output_name);
1515                 if (err < 0) {
1516                         perror("failed to create output file");
1517                         goto out;
1518                 }
1519         }
1520
1521         if (ev_qualifier_str != NULL) {
1522                 const char *s = ev_qualifier_str;
1523
1524                 trace.not_ev_qualifier = *s == '!';
1525                 if (trace.not_ev_qualifier)
1526                         ++s;
1527                 trace.ev_qualifier = strlist__new(true, s);
1528                 if (trace.ev_qualifier == NULL) {
1529                         fputs("Not enough memory to parse event qualifier",
1530                               trace.output);
1531                         err = -ENOMEM;
1532                         goto out_close;
1533                 }
1534         }
1535
1536         err = perf_target__validate(&trace.opts.target);
1537         if (err) {
1538                 perf_target__strerror(&trace.opts.target, err, bf, sizeof(bf));
1539                 fprintf(trace.output, "%s", bf);
1540                 goto out_close;
1541         }
1542
1543         err = perf_target__parse_uid(&trace.opts.target);
1544         if (err) {
1545                 perf_target__strerror(&trace.opts.target, err, bf, sizeof(bf));
1546                 fprintf(trace.output, "%s", bf);
1547                 goto out_close;
1548         }
1549
1550         if (!argc && perf_target__none(&trace.opts.target))
1551                 trace.opts.target.system_wide = true;
1552
1553         if (input_name)
1554                 err = trace__replay(&trace);
1555         else
1556                 err = trace__run(&trace, argc, argv);
1557
1558         if (trace.sched && !err)
1559                 trace__fprintf_thread_summary(&trace, trace.output);
1560
1561 out_close:
1562         if (output_name != NULL)
1563                 fclose(trace.output);
1564 out:
1565         return err;
1566 }