1 #include <traceevent/event-parse.h>
3 #include "util/color.h"
4 #include "util/debug.h"
5 #include "util/evlist.h"
6 #include "util/machine.h"
7 #include "util/session.h"
8 #include "util/thread.h"
9 #include "util/parse-options.h"
10 #include "util/strlist.h"
11 #include "util/intlist.h"
12 #include "util/thread_map.h"
16 #include <sys/eventfd.h>
18 #include <linux/futex.h>
20 /* For older distros: */
22 # define MAP_STACK 0x20000
26 # define MADV_HWPOISON 100
29 #ifndef MADV_MERGEABLE
30 # define MADV_MERGEABLE 12
33 #ifndef MADV_UNMERGEABLE
34 # define MADV_UNMERGEABLE 13
49 #define DEFINE_STRARRAY(array) struct strarray strarray__##array = { \
50 .nr_entries = ARRAY_SIZE(array), \
54 static size_t syscall_arg__scnprintf_strarray(char *bf, size_t size,
55 struct syscall_arg *arg)
58 struct strarray *sa = arg->parm;
60 if (idx < 0 || idx >= sa->nr_entries)
61 return scnprintf(bf, size, "%d", idx);
63 return scnprintf(bf, size, "%s", sa->entries[idx]);
66 #define SCA_STRARRAY syscall_arg__scnprintf_strarray
68 static size_t syscall_arg__scnprintf_hex(char *bf, size_t size,
69 struct syscall_arg *arg)
71 return scnprintf(bf, size, "%#lx", arg->val);
74 #define SCA_HEX syscall_arg__scnprintf_hex
76 static size_t syscall_arg__scnprintf_mmap_prot(char *bf, size_t size,
77 struct syscall_arg *arg)
79 int printed = 0, prot = arg->val;
81 if (prot == PROT_NONE)
82 return scnprintf(bf, size, "NONE");
83 #define P_MMAP_PROT(n) \
84 if (prot & PROT_##n) { \
85 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
95 P_MMAP_PROT(GROWSDOWN);
100 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", prot);
105 #define SCA_MMAP_PROT syscall_arg__scnprintf_mmap_prot
107 static size_t syscall_arg__scnprintf_mmap_flags(char *bf, size_t size,
108 struct syscall_arg *arg)
110 int printed = 0, flags = arg->val;
112 #define P_MMAP_FLAG(n) \
113 if (flags & MAP_##n) { \
114 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
119 P_MMAP_FLAG(PRIVATE);
123 P_MMAP_FLAG(ANONYMOUS);
124 P_MMAP_FLAG(DENYWRITE);
125 P_MMAP_FLAG(EXECUTABLE);
128 P_MMAP_FLAG(GROWSDOWN);
130 P_MMAP_FLAG(HUGETLB);
133 P_MMAP_FLAG(NONBLOCK);
134 P_MMAP_FLAG(NORESERVE);
135 P_MMAP_FLAG(POPULATE);
137 #ifdef MAP_UNINITIALIZED
138 P_MMAP_FLAG(UNINITIALIZED);
143 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
148 #define SCA_MMAP_FLAGS syscall_arg__scnprintf_mmap_flags
150 static size_t syscall_arg__scnprintf_madvise_behavior(char *bf, size_t size,
151 struct syscall_arg *arg)
153 int behavior = arg->val;
156 #define P_MADV_BHV(n) case MADV_##n: return scnprintf(bf, size, #n)
159 P_MADV_BHV(SEQUENTIAL);
160 P_MADV_BHV(WILLNEED);
161 P_MADV_BHV(DONTNEED);
163 P_MADV_BHV(DONTFORK);
165 P_MADV_BHV(HWPOISON);
166 #ifdef MADV_SOFT_OFFLINE
167 P_MADV_BHV(SOFT_OFFLINE);
169 P_MADV_BHV(MERGEABLE);
170 P_MADV_BHV(UNMERGEABLE);
172 P_MADV_BHV(HUGEPAGE);
174 #ifdef MADV_NOHUGEPAGE
175 P_MADV_BHV(NOHUGEPAGE);
178 P_MADV_BHV(DONTDUMP);
187 return scnprintf(bf, size, "%#x", behavior);
190 #define SCA_MADV_BHV syscall_arg__scnprintf_madvise_behavior
192 static size_t syscall_arg__scnprintf_futex_op(char *bf, size_t size, struct syscall_arg *arg)
194 enum syscall_futex_args {
195 SCF_UADDR = (1 << 0),
198 SCF_TIMEOUT = (1 << 3),
199 SCF_UADDR2 = (1 << 4),
203 int cmd = op & FUTEX_CMD_MASK;
207 #define P_FUTEX_OP(n) case FUTEX_##n: printed = scnprintf(bf, size, #n);
208 P_FUTEX_OP(WAIT); arg->mask |= SCF_VAL3|SCF_UADDR2; break;
209 P_FUTEX_OP(WAKE); arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
210 P_FUTEX_OP(FD); arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
211 P_FUTEX_OP(REQUEUE); arg->mask |= SCF_VAL3|SCF_TIMEOUT; break;
212 P_FUTEX_OP(CMP_REQUEUE); arg->mask |= SCF_TIMEOUT; break;
213 P_FUTEX_OP(CMP_REQUEUE_PI); arg->mask |= SCF_TIMEOUT; break;
214 P_FUTEX_OP(WAKE_OP); break;
215 P_FUTEX_OP(LOCK_PI); arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
216 P_FUTEX_OP(UNLOCK_PI); arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
217 P_FUTEX_OP(TRYLOCK_PI); arg->mask |= SCF_VAL3|SCF_UADDR2; break;
218 P_FUTEX_OP(WAIT_BITSET); arg->mask |= SCF_UADDR2; break;
219 P_FUTEX_OP(WAKE_BITSET); arg->mask |= SCF_UADDR2; break;
220 P_FUTEX_OP(WAIT_REQUEUE_PI); break;
221 default: printed = scnprintf(bf, size, "%#x", cmd); break;
224 if (op & FUTEX_PRIVATE_FLAG)
225 printed += scnprintf(bf + printed, size - printed, "|PRIV");
227 if (op & FUTEX_CLOCK_REALTIME)
228 printed += scnprintf(bf + printed, size - printed, "|CLKRT");
233 #define SCA_FUTEX_OP syscall_arg__scnprintf_futex_op
235 static const char *itimers[] = { "REAL", "VIRTUAL", "PROF", };
236 static DEFINE_STRARRAY(itimers);
238 static const char *whences[] = { "SET", "CUR", "END",
246 static DEFINE_STRARRAY(whences);
248 static const char *fcntl_cmds[] = {
249 "DUPFD", "GETFD", "SETFD", "GETFL", "SETFL", "GETLK", "SETLK",
250 "SETLKW", "SETOWN", "GETOWN", "SETSIG", "GETSIG", "F_GETLK64",
251 "F_SETLK64", "F_SETLKW64", "F_SETOWN_EX", "F_GETOWN_EX",
254 static DEFINE_STRARRAY(fcntl_cmds);
256 static const char *rlimit_resources[] = {
257 "CPU", "FSIZE", "DATA", "STACK", "CORE", "RSS", "NPROC", "NOFILE",
258 "MEMLOCK", "AS", "LOCKS", "SIGPENDING", "MSGQUEUE", "NICE", "RTPRIO",
261 static DEFINE_STRARRAY(rlimit_resources);
263 static const char *sighow[] = { "BLOCK", "UNBLOCK", "SETMASK", };
264 static DEFINE_STRARRAY(sighow);
266 static const char *socket_families[] = {
267 "UNSPEC", "LOCAL", "INET", "AX25", "IPX", "APPLETALK", "NETROM",
268 "BRIDGE", "ATMPVC", "X25", "INET6", "ROSE", "DECnet", "NETBEUI",
269 "SECURITY", "KEY", "NETLINK", "PACKET", "ASH", "ECONET", "ATMSVC",
270 "RDS", "SNA", "IRDA", "PPPOX", "WANPIPE", "LLC", "IB", "CAN", "TIPC",
271 "BLUETOOTH", "IUCV", "RXRPC", "ISDN", "PHONET", "IEEE802154", "CAIF",
272 "ALG", "NFC", "VSOCK",
274 static DEFINE_STRARRAY(socket_families);
276 #ifndef SOCK_TYPE_MASK
277 #define SOCK_TYPE_MASK 0xf
280 static size_t syscall_arg__scnprintf_socket_type(char *bf, size_t size,
281 struct syscall_arg *arg)
285 flags = type & ~SOCK_TYPE_MASK;
287 type &= SOCK_TYPE_MASK;
289 * Can't use a strarray, MIPS may override for ABI reasons.
292 #define P_SK_TYPE(n) case SOCK_##n: printed = scnprintf(bf, size, #n); break;
297 P_SK_TYPE(SEQPACKET);
302 printed = scnprintf(bf, size, "%#x", type);
305 #define P_SK_FLAG(n) \
306 if (flags & SOCK_##n) { \
307 printed += scnprintf(bf + printed, size - printed, "|%s", #n); \
308 flags &= ~SOCK_##n; \
316 printed += scnprintf(bf + printed, size - printed, "|%#x", flags);
321 #define SCA_SK_TYPE syscall_arg__scnprintf_socket_type
324 #define MSG_PROBE 0x10
326 #ifndef MSG_SENDPAGE_NOTLAST
327 #define MSG_SENDPAGE_NOTLAST 0x20000
330 #define MSG_FASTOPEN 0x20000000
333 static size_t syscall_arg__scnprintf_msg_flags(char *bf, size_t size,
334 struct syscall_arg *arg)
336 int printed = 0, flags = arg->val;
339 return scnprintf(bf, size, "NONE");
340 #define P_MSG_FLAG(n) \
341 if (flags & MSG_##n) { \
342 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
348 P_MSG_FLAG(DONTROUTE);
353 P_MSG_FLAG(DONTWAIT);
360 P_MSG_FLAG(ERRQUEUE);
361 P_MSG_FLAG(NOSIGNAL);
363 P_MSG_FLAG(WAITFORONE);
364 P_MSG_FLAG(SENDPAGE_NOTLAST);
365 P_MSG_FLAG(FASTOPEN);
366 P_MSG_FLAG(CMSG_CLOEXEC);
370 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
375 #define SCA_MSG_FLAGS syscall_arg__scnprintf_msg_flags
377 static size_t syscall_arg__scnprintf_access_mode(char *bf, size_t size,
378 struct syscall_arg *arg)
383 if (mode == F_OK) /* 0 */
384 return scnprintf(bf, size, "F");
386 if (mode & n##_OK) { \
387 printed += scnprintf(bf + printed, size - printed, "%s", #n); \
397 printed += scnprintf(bf + printed, size - printed, "|%#x", mode);
402 #define SCA_ACCMODE syscall_arg__scnprintf_access_mode
404 static size_t syscall_arg__scnprintf_open_flags(char *bf, size_t size,
405 struct syscall_arg *arg)
407 int printed = 0, flags = arg->val;
409 if (!(flags & O_CREAT))
410 arg->mask |= 1 << (arg->idx + 1); /* Mask the mode parm */
413 return scnprintf(bf, size, "RDONLY");
415 if (flags & O_##n) { \
416 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
440 if ((flags & O_SYNC) == O_SYNC)
441 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", "SYNC");
453 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
458 #define SCA_OPEN_FLAGS syscall_arg__scnprintf_open_flags
460 static size_t syscall_arg__scnprintf_eventfd_flags(char *bf, size_t size,
461 struct syscall_arg *arg)
463 int printed = 0, flags = arg->val;
466 return scnprintf(bf, size, "NONE");
468 if (flags & EFD_##n) { \
469 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
479 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
484 #define SCA_EFD_FLAGS syscall_arg__scnprintf_eventfd_flags
486 static size_t syscall_arg__scnprintf_signum(char *bf, size_t size, struct syscall_arg *arg)
491 #define P_SIGNUM(n) case SIG##n: return scnprintf(bf, size, #n)
526 return scnprintf(bf, size, "%#x", sig);
529 #define SCA_SIGNUM syscall_arg__scnprintf_signum
531 static struct syscall_fmt {
534 size_t (*arg_scnprintf[6])(char *bf, size_t size, struct syscall_arg *arg);
540 { .name = "access", .errmsg = true,
541 .arg_scnprintf = { [1] = SCA_ACCMODE, /* mode */ }, },
542 { .name = "arch_prctl", .errmsg = true, .alias = "prctl", },
543 { .name = "brk", .hexret = true,
544 .arg_scnprintf = { [0] = SCA_HEX, /* brk */ }, },
545 { .name = "connect", .errmsg = true, },
546 { .name = "eventfd2", .errmsg = true,
547 .arg_scnprintf = { [1] = SCA_EFD_FLAGS, /* flags */ }, },
548 { .name = "fcntl", .errmsg = true,
549 .arg_scnprintf = { [1] = SCA_STRARRAY, /* cmd */ },
550 .arg_parm = { [1] = &strarray__fcntl_cmds, /* cmd */ }, },
551 { .name = "fstat", .errmsg = true, .alias = "newfstat", },
552 { .name = "fstatat", .errmsg = true, .alias = "newfstatat", },
553 { .name = "futex", .errmsg = true,
554 .arg_scnprintf = { [1] = SCA_FUTEX_OP, /* op */ }, },
555 { .name = "getitimer", .errmsg = true,
556 .arg_scnprintf = { [0] = SCA_STRARRAY, /* which */ },
557 .arg_parm = { [0] = &strarray__itimers, /* which */ }, },
558 { .name = "getrlimit", .errmsg = true,
559 .arg_scnprintf = { [0] = SCA_STRARRAY, /* resource */ },
560 .arg_parm = { [0] = &strarray__rlimit_resources, /* resource */ }, },
561 { .name = "ioctl", .errmsg = true,
562 .arg_scnprintf = { [2] = SCA_HEX, /* arg */ }, },
563 { .name = "kill", .errmsg = true,
564 .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
565 { .name = "lseek", .errmsg = true,
566 .arg_scnprintf = { [2] = SCA_STRARRAY, /* whence */ },
567 .arg_parm = { [2] = &strarray__whences, /* whence */ }, },
568 { .name = "lstat", .errmsg = true, .alias = "newlstat", },
569 { .name = "madvise", .errmsg = true,
570 .arg_scnprintf = { [0] = SCA_HEX, /* start */
571 [2] = SCA_MADV_BHV, /* behavior */ }, },
572 { .name = "mmap", .hexret = true,
573 .arg_scnprintf = { [0] = SCA_HEX, /* addr */
574 [2] = SCA_MMAP_PROT, /* prot */
575 [3] = SCA_MMAP_FLAGS, /* flags */ }, },
576 { .name = "mprotect", .errmsg = true,
577 .arg_scnprintf = { [0] = SCA_HEX, /* start */
578 [2] = SCA_MMAP_PROT, /* prot */ }, },
579 { .name = "mremap", .hexret = true,
580 .arg_scnprintf = { [0] = SCA_HEX, /* addr */
581 [4] = SCA_HEX, /* new_addr */ }, },
582 { .name = "munmap", .errmsg = true,
583 .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
584 { .name = "open", .errmsg = true,
585 .arg_scnprintf = { [1] = SCA_OPEN_FLAGS, /* flags */ }, },
586 { .name = "open_by_handle_at", .errmsg = true,
587 .arg_scnprintf = { [2] = SCA_OPEN_FLAGS, /* flags */ }, },
588 { .name = "openat", .errmsg = true,
589 .arg_scnprintf = { [2] = SCA_OPEN_FLAGS, /* flags */ }, },
590 { .name = "poll", .errmsg = true, .timeout = true, },
591 { .name = "ppoll", .errmsg = true, .timeout = true, },
592 { .name = "pread", .errmsg = true, .alias = "pread64", },
593 { .name = "prlimit64", .errmsg = true,
594 .arg_scnprintf = { [1] = SCA_STRARRAY, /* resource */ },
595 .arg_parm = { [1] = &strarray__rlimit_resources, /* resource */ }, },
596 { .name = "pwrite", .errmsg = true, .alias = "pwrite64", },
597 { .name = "read", .errmsg = true, },
598 { .name = "recvfrom", .errmsg = true,
599 .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
600 { .name = "recvmmsg", .errmsg = true,
601 .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
602 { .name = "recvmsg", .errmsg = true,
603 .arg_scnprintf = { [2] = SCA_MSG_FLAGS, /* flags */ }, },
604 { .name = "rt_sigaction", .errmsg = true,
605 .arg_scnprintf = { [0] = SCA_SIGNUM, /* sig */ }, },
606 { .name = "rt_sigprocmask", .errmsg = true,
607 .arg_scnprintf = { [0] = SCA_STRARRAY, /* how */ },
608 .arg_parm = { [0] = &strarray__sighow, /* how */ }, },
609 { .name = "rt_sigqueueinfo", .errmsg = true,
610 .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
611 { .name = "rt_tgsigqueueinfo", .errmsg = true,
612 .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, },
613 { .name = "select", .errmsg = true, .timeout = true, },
614 { .name = "sendmmsg", .errmsg = true,
615 .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
616 { .name = "sendmsg", .errmsg = true,
617 .arg_scnprintf = { [2] = SCA_MSG_FLAGS, /* flags */ }, },
618 { .name = "sendto", .errmsg = true,
619 .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
620 { .name = "setitimer", .errmsg = true,
621 .arg_scnprintf = { [0] = SCA_STRARRAY, /* which */ },
622 .arg_parm = { [0] = &strarray__itimers, /* which */ }, },
623 { .name = "setrlimit", .errmsg = true,
624 .arg_scnprintf = { [0] = SCA_STRARRAY, /* resource */ },
625 .arg_parm = { [0] = &strarray__rlimit_resources, /* resource */ }, },
626 { .name = "socket", .errmsg = true,
627 .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */
628 [1] = SCA_SK_TYPE, /* type */ },
629 .arg_parm = { [0] = &strarray__socket_families, /* family */ }, },
630 { .name = "stat", .errmsg = true, .alias = "newstat", },
631 { .name = "tgkill", .errmsg = true,
632 .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, },
633 { .name = "tkill", .errmsg = true,
634 .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
635 { .name = "uname", .errmsg = true, .alias = "newuname", },
638 static int syscall_fmt__cmp(const void *name, const void *fmtp)
640 const struct syscall_fmt *fmt = fmtp;
641 return strcmp(name, fmt->name);
644 static struct syscall_fmt *syscall_fmt__find(const char *name)
646 const int nmemb = ARRAY_SIZE(syscall_fmts);
647 return bsearch(name, syscall_fmts, nmemb, sizeof(struct syscall_fmt), syscall_fmt__cmp);
651 struct event_format *tp_format;
654 struct syscall_fmt *fmt;
655 size_t (**arg_scnprintf)(char *bf, size_t size, struct syscall_arg *arg);
659 static size_t fprintf_duration(unsigned long t, FILE *fp)
661 double duration = (double)t / NSEC_PER_MSEC;
662 size_t printed = fprintf(fp, "(");
665 printed += color_fprintf(fp, PERF_COLOR_RED, "%6.3f ms", duration);
666 else if (duration >= 0.01)
667 printed += color_fprintf(fp, PERF_COLOR_YELLOW, "%6.3f ms", duration);
669 printed += color_fprintf(fp, PERF_COLOR_NORMAL, "%6.3f ms", duration);
670 return printed + fprintf(fp, "): ");
673 struct thread_trace {
677 unsigned long nr_events;
682 static struct thread_trace *thread_trace__new(void)
684 return zalloc(sizeof(struct thread_trace));
687 static struct thread_trace *thread__trace(struct thread *thread, FILE *fp)
689 struct thread_trace *ttrace;
694 if (thread->priv == NULL)
695 thread->priv = thread_trace__new();
697 if (thread->priv == NULL)
700 ttrace = thread->priv;
705 color_fprintf(fp, PERF_COLOR_RED,
706 "WARNING: not enough memory, dropping samples!\n");
711 struct perf_tool tool;
715 struct syscall *table;
717 struct perf_record_opts opts;
722 unsigned long nr_events;
723 struct strlist *ev_qualifier;
724 bool not_ev_qualifier;
725 struct intlist *tid_list;
726 struct intlist *pid_list;
728 bool multiple_threads;
729 double duration_filter;
733 static bool trace__filter_duration(struct trace *trace, double t)
735 return t < (trace->duration_filter * NSEC_PER_MSEC);
738 static size_t trace__fprintf_tstamp(struct trace *trace, u64 tstamp, FILE *fp)
740 double ts = (double)(tstamp - trace->base_time) / NSEC_PER_MSEC;
742 return fprintf(fp, "%10.3f ", ts);
745 static bool done = false;
747 static void sig_handler(int sig __maybe_unused)
752 static size_t trace__fprintf_entry_head(struct trace *trace, struct thread *thread,
753 u64 duration, u64 tstamp, FILE *fp)
755 size_t printed = trace__fprintf_tstamp(trace, tstamp, fp);
756 printed += fprintf_duration(duration, fp);
758 if (trace->multiple_threads)
759 printed += fprintf(fp, "%d ", thread->tid);
764 static int trace__process_event(struct trace *trace, struct machine *machine,
765 union perf_event *event)
769 switch (event->header.type) {
770 case PERF_RECORD_LOST:
771 color_fprintf(trace->output, PERF_COLOR_RED,
772 "LOST %" PRIu64 " events!\n", event->lost.lost);
773 ret = machine__process_lost_event(machine, event);
775 ret = machine__process_event(machine, event);
782 static int trace__tool_process(struct perf_tool *tool,
783 union perf_event *event,
784 struct perf_sample *sample __maybe_unused,
785 struct machine *machine)
787 struct trace *trace = container_of(tool, struct trace, tool);
788 return trace__process_event(trace, machine, event);
791 static int trace__symbols_init(struct trace *trace, struct perf_evlist *evlist)
793 int err = symbol__init();
798 machine__init(&trace->host, "", HOST_KERNEL_ID);
799 machine__create_kernel_maps(&trace->host);
801 if (perf_target__has_task(&trace->opts.target)) {
802 err = perf_event__synthesize_thread_map(&trace->tool, evlist->threads,
806 err = perf_event__synthesize_threads(&trace->tool, trace__tool_process,
816 static int syscall__set_arg_fmts(struct syscall *sc)
818 struct format_field *field;
821 sc->arg_scnprintf = calloc(sc->tp_format->format.nr_fields - 1, sizeof(void *));
822 if (sc->arg_scnprintf == NULL)
826 sc->arg_parm = sc->fmt->arg_parm;
828 for (field = sc->tp_format->format.fields->next; field; field = field->next) {
829 if (sc->fmt && sc->fmt->arg_scnprintf[idx])
830 sc->arg_scnprintf[idx] = sc->fmt->arg_scnprintf[idx];
831 else if (field->flags & FIELD_IS_POINTER)
832 sc->arg_scnprintf[idx] = syscall_arg__scnprintf_hex;
839 static int trace__read_syscall_info(struct trace *trace, int id)
843 const char *name = audit_syscall_to_name(id, trace->audit_machine);
848 if (id > trace->syscalls.max) {
849 struct syscall *nsyscalls = realloc(trace->syscalls.table, (id + 1) * sizeof(*sc));
851 if (nsyscalls == NULL)
854 if (trace->syscalls.max != -1) {
855 memset(nsyscalls + trace->syscalls.max + 1, 0,
856 (id - trace->syscalls.max) * sizeof(*sc));
858 memset(nsyscalls, 0, (id + 1) * sizeof(*sc));
861 trace->syscalls.table = nsyscalls;
862 trace->syscalls.max = id;
865 sc = trace->syscalls.table + id;
868 if (trace->ev_qualifier) {
869 bool in = strlist__find(trace->ev_qualifier, name) != NULL;
871 if (!(in ^ trace->not_ev_qualifier)) {
874 * No need to do read tracepoint information since this will be
881 sc->fmt = syscall_fmt__find(sc->name);
883 snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->name);
884 sc->tp_format = event_format__new("syscalls", tp_name);
886 if (sc->tp_format == NULL && sc->fmt && sc->fmt->alias) {
887 snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->fmt->alias);
888 sc->tp_format = event_format__new("syscalls", tp_name);
891 if (sc->tp_format == NULL)
894 return syscall__set_arg_fmts(sc);
897 static size_t syscall__scnprintf_args(struct syscall *sc, char *bf, size_t size,
902 if (sc->tp_format != NULL) {
903 struct format_field *field;
905 struct syscall_arg arg = {
910 for (field = sc->tp_format->format.fields->next; field;
911 field = field->next, ++arg.idx, bit <<= 1) {
915 if (args[arg.idx] == 0)
918 printed += scnprintf(bf + printed, size - printed,
919 "%s%s: ", printed ? ", " : "", field->name);
920 if (sc->arg_scnprintf && sc->arg_scnprintf[arg.idx]) {
921 arg.val = args[arg.idx];
923 arg.parm = sc->arg_parm[arg.idx];
924 printed += sc->arg_scnprintf[arg.idx](bf + printed,
925 size - printed, &arg);
927 printed += scnprintf(bf + printed, size - printed,
928 "%ld", args[arg.idx]);
935 printed += scnprintf(bf + printed, size - printed,
937 printed ? ", " : "", i, args[i]);
945 typedef int (*tracepoint_handler)(struct trace *trace, struct perf_evsel *evsel,
946 struct perf_sample *sample);
948 static struct syscall *trace__syscall_info(struct trace *trace,
949 struct perf_evsel *evsel,
950 struct perf_sample *sample)
952 int id = perf_evsel__intval(evsel, sample, "id");
957 * XXX: Noticed on x86_64, reproduced as far back as 3.0.36, haven't tried
958 * before that, leaving at a higher verbosity level till that is
959 * explained. Reproduced with plain ftrace with:
961 * echo 1 > /t/events/raw_syscalls/sys_exit/enable
962 * grep "NR -1 " /t/trace_pipe
964 * After generating some load on the machine.
968 fprintf(trace->output, "Invalid syscall %d id, skipping (%s, %" PRIu64 ") ...\n",
969 id, perf_evsel__name(evsel), ++n);
974 if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL) &&
975 trace__read_syscall_info(trace, id))
978 if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL))
981 return &trace->syscalls.table[id];
985 fprintf(trace->output, "Problems reading syscall %d", id);
986 if (id <= trace->syscalls.max && trace->syscalls.table[id].name != NULL)
987 fprintf(trace->output, "(%s)", trace->syscalls.table[id].name);
988 fputs(" information\n", trace->output);
993 static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel,
994 struct perf_sample *sample)
999 struct thread *thread;
1000 struct syscall *sc = trace__syscall_info(trace, evsel, sample);
1001 struct thread_trace *ttrace;
1009 thread = machine__findnew_thread(&trace->host, sample->pid,
1011 ttrace = thread__trace(thread, trace->output);
1015 args = perf_evsel__rawptr(evsel, sample, "args");
1017 fprintf(trace->output, "Problems reading syscall arguments\n");
1021 ttrace = thread->priv;
1023 if (ttrace->entry_str == NULL) {
1024 ttrace->entry_str = malloc(1024);
1025 if (!ttrace->entry_str)
1029 ttrace->entry_time = sample->time;
1030 msg = ttrace->entry_str;
1031 printed += scnprintf(msg + printed, 1024 - printed, "%s(", sc->name);
1033 printed += syscall__scnprintf_args(sc, msg + printed, 1024 - printed, args);
1035 if (!strcmp(sc->name, "exit_group") || !strcmp(sc->name, "exit")) {
1036 if (!trace->duration_filter) {
1037 trace__fprintf_entry_head(trace, thread, 1, sample->time, trace->output);
1038 fprintf(trace->output, "%-70s\n", ttrace->entry_str);
1041 ttrace->entry_pending = true;
1046 static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel,
1047 struct perf_sample *sample)
1051 struct thread *thread;
1052 struct syscall *sc = trace__syscall_info(trace, evsel, sample);
1053 struct thread_trace *ttrace;
1061 thread = machine__findnew_thread(&trace->host, sample->pid,
1063 ttrace = thread__trace(thread, trace->output);
1067 ret = perf_evsel__intval(evsel, sample, "ret");
1069 ttrace = thread->priv;
1071 ttrace->exit_time = sample->time;
1073 if (ttrace->entry_time) {
1074 duration = sample->time - ttrace->entry_time;
1075 if (trace__filter_duration(trace, duration))
1077 } else if (trace->duration_filter)
1080 trace__fprintf_entry_head(trace, thread, duration, sample->time, trace->output);
1082 if (ttrace->entry_pending) {
1083 fprintf(trace->output, "%-70s", ttrace->entry_str);
1085 fprintf(trace->output, " ... [");
1086 color_fprintf(trace->output, PERF_COLOR_YELLOW, "continued");
1087 fprintf(trace->output, "]: %s()", sc->name);
1090 if (sc->fmt == NULL) {
1092 fprintf(trace->output, ") = %d", ret);
1093 } else if (ret < 0 && sc->fmt->errmsg) {
1095 const char *emsg = strerror_r(-ret, bf, sizeof(bf)),
1096 *e = audit_errno_to_name(-ret);
1098 fprintf(trace->output, ") = -1 %s %s", e, emsg);
1099 } else if (ret == 0 && sc->fmt->timeout)
1100 fprintf(trace->output, ") = 0 Timeout");
1101 else if (sc->fmt->hexret)
1102 fprintf(trace->output, ") = %#x", ret);
1106 fputc('\n', trace->output);
1108 ttrace->entry_pending = false;
1113 static int trace__sched_stat_runtime(struct trace *trace, struct perf_evsel *evsel,
1114 struct perf_sample *sample)
1116 u64 runtime = perf_evsel__intval(evsel, sample, "runtime");
1117 double runtime_ms = (double)runtime / NSEC_PER_MSEC;
1118 struct thread *thread = machine__findnew_thread(&trace->host,
1121 struct thread_trace *ttrace = thread__trace(thread, trace->output);
1126 ttrace->runtime_ms += runtime_ms;
1127 trace->runtime_ms += runtime_ms;
1131 fprintf(trace->output, "%s: comm=%s,pid=%u,runtime=%" PRIu64 ",vruntime=%" PRIu64 ")\n",
1133 perf_evsel__strval(evsel, sample, "comm"),
1134 (pid_t)perf_evsel__intval(evsel, sample, "pid"),
1136 perf_evsel__intval(evsel, sample, "vruntime"));
1140 static bool skip_sample(struct trace *trace, struct perf_sample *sample)
1142 if ((trace->pid_list && intlist__find(trace->pid_list, sample->pid)) ||
1143 (trace->tid_list && intlist__find(trace->tid_list, sample->tid)))
1146 if (trace->pid_list || trace->tid_list)
1152 static int trace__process_sample(struct perf_tool *tool,
1153 union perf_event *event __maybe_unused,
1154 struct perf_sample *sample,
1155 struct perf_evsel *evsel,
1156 struct machine *machine __maybe_unused)
1158 struct trace *trace = container_of(tool, struct trace, tool);
1161 tracepoint_handler handler = evsel->handler.func;
1163 if (skip_sample(trace, sample))
1166 if (!trace->full_time && trace->base_time == 0)
1167 trace->base_time = sample->time;
1170 handler(trace, evsel, sample);
1176 perf_session__has_tp(struct perf_session *session, const char *name)
1178 struct perf_evsel *evsel;
1180 evsel = perf_evlist__find_tracepoint_by_name(session->evlist, name);
1182 return evsel != NULL;
1185 static int parse_target_str(struct trace *trace)
1187 if (trace->opts.target.pid) {
1188 trace->pid_list = intlist__new(trace->opts.target.pid);
1189 if (trace->pid_list == NULL) {
1190 pr_err("Error parsing process id string\n");
1195 if (trace->opts.target.tid) {
1196 trace->tid_list = intlist__new(trace->opts.target.tid);
1197 if (trace->tid_list == NULL) {
1198 pr_err("Error parsing thread id string\n");
1206 static int trace__run(struct trace *trace, int argc, const char **argv)
1208 struct perf_evlist *evlist = perf_evlist__new();
1209 struct perf_evsel *evsel;
1211 unsigned long before;
1212 const bool forks = argc > 0;
1214 if (evlist == NULL) {
1215 fprintf(trace->output, "Not enough memory to run!\n");
1219 if (perf_evlist__add_newtp(evlist, "raw_syscalls", "sys_enter", trace__sys_enter) ||
1220 perf_evlist__add_newtp(evlist, "raw_syscalls", "sys_exit", trace__sys_exit)) {
1221 fprintf(trace->output, "Couldn't read the raw_syscalls tracepoints information!\n");
1222 goto out_delete_evlist;
1226 perf_evlist__add_newtp(evlist, "sched", "sched_stat_runtime",
1227 trace__sched_stat_runtime)) {
1228 fprintf(trace->output, "Couldn't read the sched_stat_runtime tracepoint information!\n");
1229 goto out_delete_evlist;
1232 err = perf_evlist__create_maps(evlist, &trace->opts.target);
1234 fprintf(trace->output, "Problems parsing the target to trace, check your options!\n");
1235 goto out_delete_evlist;
1238 err = trace__symbols_init(trace, evlist);
1240 fprintf(trace->output, "Problems initializing symbol libraries!\n");
1241 goto out_delete_maps;
1244 perf_evlist__config(evlist, &trace->opts);
1246 signal(SIGCHLD, sig_handler);
1247 signal(SIGINT, sig_handler);
1250 err = perf_evlist__prepare_workload(evlist, &trace->opts.target,
1251 argv, false, false);
1253 fprintf(trace->output, "Couldn't run the workload!\n");
1254 goto out_delete_maps;
1258 err = perf_evlist__open(evlist);
1260 fprintf(trace->output, "Couldn't create the events: %s\n", strerror(errno));
1261 goto out_delete_maps;
1264 err = perf_evlist__mmap(evlist, UINT_MAX, false);
1266 fprintf(trace->output, "Couldn't mmap the events: %s\n", strerror(errno));
1267 goto out_close_evlist;
1270 perf_evlist__enable(evlist);
1273 perf_evlist__start_workload(evlist);
1275 trace->multiple_threads = evlist->threads->map[0] == -1 || evlist->threads->nr > 1;
1277 before = trace->nr_events;
1279 for (i = 0; i < evlist->nr_mmaps; i++) {
1280 union perf_event *event;
1282 while ((event = perf_evlist__mmap_read(evlist, i)) != NULL) {
1283 const u32 type = event->header.type;
1284 tracepoint_handler handler;
1285 struct perf_sample sample;
1289 err = perf_evlist__parse_sample(evlist, event, &sample);
1291 fprintf(trace->output, "Can't parse sample, err = %d, skipping...\n", err);
1295 if (!trace->full_time && trace->base_time == 0)
1296 trace->base_time = sample.time;
1298 if (type != PERF_RECORD_SAMPLE) {
1299 trace__process_event(trace, &trace->host, event);
1303 evsel = perf_evlist__id2evsel(evlist, sample.id);
1304 if (evsel == NULL) {
1305 fprintf(trace->output, "Unknown tp ID %" PRIu64 ", skipping...\n", sample.id);
1309 if (sample.raw_data == NULL) {
1310 fprintf(trace->output, "%s sample with no payload for tid: %d, cpu %d, raw_size=%d, skipping...\n",
1311 perf_evsel__name(evsel), sample.tid,
1312 sample.cpu, sample.raw_size);
1316 handler = evsel->handler.func;
1317 handler(trace, evsel, &sample);
1320 goto out_unmap_evlist;
1324 if (trace->nr_events == before) {
1326 goto out_unmap_evlist;
1328 poll(evlist->pollfd, evlist->nr_fds, -1);
1332 perf_evlist__disable(evlist);
1337 perf_evlist__munmap(evlist);
1339 perf_evlist__close(evlist);
1341 perf_evlist__delete_maps(evlist);
1343 perf_evlist__delete(evlist);
1348 static int trace__replay(struct trace *trace)
1350 const struct perf_evsel_str_handler handlers[] = {
1351 { "raw_syscalls:sys_enter", trace__sys_enter, },
1352 { "raw_syscalls:sys_exit", trace__sys_exit, },
1355 struct perf_session *session;
1358 trace->tool.sample = trace__process_sample;
1359 trace->tool.mmap = perf_event__process_mmap;
1360 trace->tool.mmap2 = perf_event__process_mmap2;
1361 trace->tool.comm = perf_event__process_comm;
1362 trace->tool.exit = perf_event__process_exit;
1363 trace->tool.fork = perf_event__process_fork;
1364 trace->tool.attr = perf_event__process_attr;
1365 trace->tool.tracing_data = perf_event__process_tracing_data;
1366 trace->tool.build_id = perf_event__process_build_id;
1368 trace->tool.ordered_samples = true;
1369 trace->tool.ordering_requires_timestamps = true;
1371 /* add tid to output */
1372 trace->multiple_threads = true;
1374 if (symbol__init() < 0)
1377 session = perf_session__new(input_name, O_RDONLY, 0, false,
1379 if (session == NULL)
1382 err = perf_session__set_tracepoints_handlers(session, handlers);
1386 if (!perf_session__has_tp(session, "raw_syscalls:sys_enter")) {
1387 pr_err("Data file does not have raw_syscalls:sys_enter events\n");
1391 if (!perf_session__has_tp(session, "raw_syscalls:sys_exit")) {
1392 pr_err("Data file does not have raw_syscalls:sys_exit events\n");
1396 err = parse_target_str(trace);
1402 err = perf_session__process_events(session, &trace->tool);
1404 pr_err("Failed to process events, error %d", err);
1407 perf_session__delete(session);
1412 static size_t trace__fprintf_threads_header(FILE *fp)
1416 printed = fprintf(fp, "\n _____________________________________________________________________\n");
1417 printed += fprintf(fp," __) Summary of events (__\n\n");
1418 printed += fprintf(fp," [ task - pid ] [ events ] [ ratio ] [ runtime ]\n");
1419 printed += fprintf(fp," _____________________________________________________________________\n\n");
1424 static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp)
1426 size_t printed = trace__fprintf_threads_header(fp);
1429 for (nd = rb_first(&trace->host.threads); nd; nd = rb_next(nd)) {
1430 struct thread *thread = rb_entry(nd, struct thread, rb_node);
1431 struct thread_trace *ttrace = thread->priv;
1438 ratio = (double)ttrace->nr_events / trace->nr_events * 100.0;
1440 color = PERF_COLOR_NORMAL;
1442 color = PERF_COLOR_RED;
1443 else if (ratio > 25.0)
1444 color = PERF_COLOR_GREEN;
1445 else if (ratio > 5.0)
1446 color = PERF_COLOR_YELLOW;
1448 printed += color_fprintf(fp, color, "%20s", thread->comm);
1449 printed += fprintf(fp, " - %-5d :%11lu [", thread->tid, ttrace->nr_events);
1450 printed += color_fprintf(fp, color, "%5.1f%%", ratio);
1451 printed += fprintf(fp, " ] %10.3f ms\n", ttrace->runtime_ms);
1457 static int trace__set_duration(const struct option *opt, const char *str,
1458 int unset __maybe_unused)
1460 struct trace *trace = opt->value;
1462 trace->duration_filter = atof(str);
1466 static int trace__open_output(struct trace *trace, const char *filename)
1470 if (!stat(filename, &st) && st.st_size) {
1471 char oldname[PATH_MAX];
1473 scnprintf(oldname, sizeof(oldname), "%s.old", filename);
1475 rename(filename, oldname);
1478 trace->output = fopen(filename, "w");
1480 return trace->output == NULL ? -errno : 0;
1483 int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused)
1485 const char * const trace_usage[] = {
1486 "perf trace [<options>] [<command>]",
1487 "perf trace [<options>] -- <command> [<options>]",
1490 struct trace trace = {
1491 .audit_machine = audit_detect_machine(),
1500 .user_freq = UINT_MAX,
1501 .user_interval = ULLONG_MAX,
1507 const char *output_name = NULL;
1508 const char *ev_qualifier_str = NULL;
1509 const struct option trace_options[] = {
1510 OPT_STRING('e', "expr", &ev_qualifier_str, "expr",
1511 "list of events to trace"),
1512 OPT_STRING('o', "output", &output_name, "file", "output file name"),
1513 OPT_STRING('i', "input", &input_name, "file", "Analyze events in file"),
1514 OPT_STRING('p', "pid", &trace.opts.target.pid, "pid",
1515 "trace events on existing process id"),
1516 OPT_STRING('t', "tid", &trace.opts.target.tid, "tid",
1517 "trace events on existing thread id"),
1518 OPT_BOOLEAN('a', "all-cpus", &trace.opts.target.system_wide,
1519 "system-wide collection from all CPUs"),
1520 OPT_STRING('C', "cpu", &trace.opts.target.cpu_list, "cpu",
1521 "list of cpus to monitor"),
1522 OPT_BOOLEAN(0, "no-inherit", &trace.opts.no_inherit,
1523 "child tasks do not inherit counters"),
1524 OPT_UINTEGER('m', "mmap-pages", &trace.opts.mmap_pages,
1525 "number of mmap data pages"),
1526 OPT_STRING('u', "uid", &trace.opts.target.uid_str, "user",
1528 OPT_CALLBACK(0, "duration", &trace, "float",
1529 "show only events with duration > N.M ms",
1530 trace__set_duration),
1531 OPT_BOOLEAN(0, "sched", &trace.sched, "show blocking scheduler events"),
1532 OPT_INCR('v', "verbose", &verbose, "be more verbose"),
1533 OPT_BOOLEAN('T', "time", &trace.full_time,
1534 "Show full timestamp, not time relative to first start"),
1540 argc = parse_options(argc, argv, trace_options, trace_usage, 0);
1542 if (output_name != NULL) {
1543 err = trace__open_output(&trace, output_name);
1545 perror("failed to create output file");
1550 if (ev_qualifier_str != NULL) {
1551 const char *s = ev_qualifier_str;
1553 trace.not_ev_qualifier = *s == '!';
1554 if (trace.not_ev_qualifier)
1556 trace.ev_qualifier = strlist__new(true, s);
1557 if (trace.ev_qualifier == NULL) {
1558 fputs("Not enough memory to parse event qualifier",
1565 err = perf_target__validate(&trace.opts.target);
1567 perf_target__strerror(&trace.opts.target, err, bf, sizeof(bf));
1568 fprintf(trace.output, "%s", bf);
1572 err = perf_target__parse_uid(&trace.opts.target);
1574 perf_target__strerror(&trace.opts.target, err, bf, sizeof(bf));
1575 fprintf(trace.output, "%s", bf);
1579 if (!argc && perf_target__none(&trace.opts.target))
1580 trace.opts.target.system_wide = true;
1583 err = trace__replay(&trace);
1585 err = trace__run(&trace, argc, argv);
1587 if (trace.sched && !err)
1588 trace__fprintf_thread_summary(&trace, trace.output);
1591 if (output_name != NULL)
1592 fclose(trace.output);