1 #include <traceevent/event-parse.h>
3 #include "util/color.h"
4 #include "util/debug.h"
5 #include "util/evlist.h"
6 #include "util/machine.h"
7 #include "util/session.h"
8 #include "util/thread.h"
9 #include "util/parse-options.h"
10 #include "util/strlist.h"
11 #include "util/intlist.h"
12 #include "util/thread_map.h"
17 #include <linux/futex.h>
19 /* For older distros: */
21 # define MAP_STACK 0x20000
25 # define MADV_HWPOISON 100
28 #ifndef MADV_MERGEABLE
29 # define MADV_MERGEABLE 12
32 #ifndef MADV_UNMERGEABLE
33 # define MADV_UNMERGEABLE 13
48 #define DEFINE_STRARRAY(array) struct strarray strarray__##array = { \
49 .nr_entries = ARRAY_SIZE(array), \
53 static size_t syscall_arg__scnprintf_strarray(char *bf, size_t size,
54 struct syscall_arg *arg)
57 struct strarray *sa = arg->parm;
59 if (idx < 0 || idx >= sa->nr_entries)
60 return scnprintf(bf, size, "%d", idx);
62 return scnprintf(bf, size, "%s", sa->entries[idx]);
65 #define SCA_STRARRAY syscall_arg__scnprintf_strarray
67 static size_t syscall_arg__scnprintf_hex(char *bf, size_t size,
68 struct syscall_arg *arg)
70 return scnprintf(bf, size, "%#lx", arg->val);
73 #define SCA_HEX syscall_arg__scnprintf_hex
75 static size_t syscall_arg__scnprintf_mmap_prot(char *bf, size_t size,
76 struct syscall_arg *arg)
78 int printed = 0, prot = arg->val;
80 if (prot == PROT_NONE)
81 return scnprintf(bf, size, "NONE");
82 #define P_MMAP_PROT(n) \
83 if (prot & PROT_##n) { \
84 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
94 P_MMAP_PROT(GROWSDOWN);
99 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", prot);
104 #define SCA_MMAP_PROT syscall_arg__scnprintf_mmap_prot
106 static size_t syscall_arg__scnprintf_mmap_flags(char *bf, size_t size,
107 struct syscall_arg *arg)
109 int printed = 0, flags = arg->val;
111 #define P_MMAP_FLAG(n) \
112 if (flags & MAP_##n) { \
113 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
118 P_MMAP_FLAG(PRIVATE);
122 P_MMAP_FLAG(ANONYMOUS);
123 P_MMAP_FLAG(DENYWRITE);
124 P_MMAP_FLAG(EXECUTABLE);
127 P_MMAP_FLAG(GROWSDOWN);
129 P_MMAP_FLAG(HUGETLB);
132 P_MMAP_FLAG(NONBLOCK);
133 P_MMAP_FLAG(NORESERVE);
134 P_MMAP_FLAG(POPULATE);
136 #ifdef MAP_UNINITIALIZED
137 P_MMAP_FLAG(UNINITIALIZED);
142 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
147 #define SCA_MMAP_FLAGS syscall_arg__scnprintf_mmap_flags
149 static size_t syscall_arg__scnprintf_madvise_behavior(char *bf, size_t size,
150 struct syscall_arg *arg)
152 int behavior = arg->val;
155 #define P_MADV_BHV(n) case MADV_##n: return scnprintf(bf, size, #n)
158 P_MADV_BHV(SEQUENTIAL);
159 P_MADV_BHV(WILLNEED);
160 P_MADV_BHV(DONTNEED);
162 P_MADV_BHV(DONTFORK);
164 P_MADV_BHV(HWPOISON);
165 #ifdef MADV_SOFT_OFFLINE
166 P_MADV_BHV(SOFT_OFFLINE);
168 P_MADV_BHV(MERGEABLE);
169 P_MADV_BHV(UNMERGEABLE);
171 P_MADV_BHV(HUGEPAGE);
173 #ifdef MADV_NOHUGEPAGE
174 P_MADV_BHV(NOHUGEPAGE);
177 P_MADV_BHV(DONTDUMP);
186 return scnprintf(bf, size, "%#x", behavior);
189 #define SCA_MADV_BHV syscall_arg__scnprintf_madvise_behavior
191 static size_t syscall_arg__scnprintf_futex_op(char *bf, size_t size, struct syscall_arg *arg)
193 enum syscall_futex_args {
194 SCF_UADDR = (1 << 0),
197 SCF_TIMEOUT = (1 << 3),
198 SCF_UADDR2 = (1 << 4),
202 int cmd = op & FUTEX_CMD_MASK;
206 #define P_FUTEX_OP(n) case FUTEX_##n: printed = scnprintf(bf, size, #n);
207 P_FUTEX_OP(WAIT); arg->mask |= SCF_VAL3|SCF_UADDR2; break;
208 P_FUTEX_OP(WAKE); arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
209 P_FUTEX_OP(FD); arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
210 P_FUTEX_OP(REQUEUE); arg->mask |= SCF_VAL3|SCF_TIMEOUT; break;
211 P_FUTEX_OP(CMP_REQUEUE); arg->mask |= SCF_TIMEOUT; break;
212 P_FUTEX_OP(CMP_REQUEUE_PI); arg->mask |= SCF_TIMEOUT; break;
213 P_FUTEX_OP(WAKE_OP); break;
214 P_FUTEX_OP(LOCK_PI); arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
215 P_FUTEX_OP(UNLOCK_PI); arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
216 P_FUTEX_OP(TRYLOCK_PI); arg->mask |= SCF_VAL3|SCF_UADDR2; break;
217 P_FUTEX_OP(WAIT_BITSET); arg->mask |= SCF_UADDR2; break;
218 P_FUTEX_OP(WAKE_BITSET); arg->mask |= SCF_UADDR2; break;
219 P_FUTEX_OP(WAIT_REQUEUE_PI); break;
220 default: printed = scnprintf(bf, size, "%#x", cmd); break;
223 if (op & FUTEX_PRIVATE_FLAG)
224 printed += scnprintf(bf + printed, size - printed, "|PRIV");
226 if (op & FUTEX_CLOCK_REALTIME)
227 printed += scnprintf(bf + printed, size - printed, "|CLKRT");
232 #define SCA_FUTEX_OP syscall_arg__scnprintf_futex_op
234 static const char *itimers[] = { "REAL", "VIRTUAL", "PROF", };
235 static DEFINE_STRARRAY(itimers);
237 static const char *whences[] = { "SET", "CUR", "END",
245 static DEFINE_STRARRAY(whences);
247 static const char *fcntl_cmds[] = {
248 "DUPFD", "GETFD", "SETFD", "GETFL", "SETFL", "GETLK", "SETLK",
249 "SETLKW", "SETOWN", "GETOWN", "SETSIG", "GETSIG", "F_GETLK64",
250 "F_SETLK64", "F_SETLKW64", "F_SETOWN_EX", "F_GETOWN_EX",
253 static DEFINE_STRARRAY(fcntl_cmds);
255 static const char *rlimit_resources[] = {
256 "CPU", "FSIZE", "DATA", "STACK", "CORE", "RSS", "NPROC", "NOFILE",
257 "MEMLOCK", "AS", "LOCKS", "SIGPENDING", "MSGQUEUE", "NICE", "RTPRIO",
260 static DEFINE_STRARRAY(rlimit_resources);
262 static const char *sighow[] = { "BLOCK", "UNBLOCK", "SETMASK", };
263 static DEFINE_STRARRAY(sighow);
265 static const char *socket_families[] = {
266 "UNSPEC", "LOCAL", "INET", "AX25", "IPX", "APPLETALK", "NETROM",
267 "BRIDGE", "ATMPVC", "X25", "INET6", "ROSE", "DECnet", "NETBEUI",
268 "SECURITY", "KEY", "NETLINK", "PACKET", "ASH", "ECONET", "ATMSVC",
269 "RDS", "SNA", "IRDA", "PPPOX", "WANPIPE", "LLC", "IB", "CAN", "TIPC",
270 "BLUETOOTH", "IUCV", "RXRPC", "ISDN", "PHONET", "IEEE802154", "CAIF",
271 "ALG", "NFC", "VSOCK",
273 static DEFINE_STRARRAY(socket_families);
275 #ifndef SOCK_TYPE_MASK
276 #define SOCK_TYPE_MASK 0xf
279 static size_t syscall_arg__scnprintf_socket_type(char *bf, size_t size,
280 struct syscall_arg *arg)
284 flags = type & ~SOCK_TYPE_MASK;
286 type &= SOCK_TYPE_MASK;
288 * Can't use a strarray, MIPS may override for ABI reasons.
291 #define P_SK_TYPE(n) case SOCK_##n: printed = scnprintf(bf, size, #n); break;
296 P_SK_TYPE(SEQPACKET);
301 printed = scnprintf(bf, size, "%#x", type);
304 #define P_SK_FLAG(n) \
305 if (flags & SOCK_##n) { \
306 printed += scnprintf(bf + printed, size - printed, "|%s", #n); \
307 flags &= ~SOCK_##n; \
315 printed += scnprintf(bf + printed, size - printed, "|%#x", flags);
320 #define SCA_SK_TYPE syscall_arg__scnprintf_socket_type
323 #define MSG_PROBE 0x10
325 #ifndef MSG_SENDPAGE_NOTLAST
326 #define MSG_SENDPAGE_NOTLAST 0x20000
329 #define MSG_FASTOPEN 0x20000000
332 static size_t syscall_arg__scnprintf_msg_flags(char *bf, size_t size,
333 struct syscall_arg *arg)
335 int printed = 0, flags = arg->val;
338 return scnprintf(bf, size, "NONE");
339 #define P_MSG_FLAG(n) \
340 if (flags & MSG_##n) { \
341 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
347 P_MSG_FLAG(DONTROUTE);
352 P_MSG_FLAG(DONTWAIT);
359 P_MSG_FLAG(ERRQUEUE);
360 P_MSG_FLAG(NOSIGNAL);
362 P_MSG_FLAG(WAITFORONE);
363 P_MSG_FLAG(SENDPAGE_NOTLAST);
364 P_MSG_FLAG(FASTOPEN);
365 P_MSG_FLAG(CMSG_CLOEXEC);
369 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
374 #define SCA_MSG_FLAGS syscall_arg__scnprintf_msg_flags
376 static size_t syscall_arg__scnprintf_access_mode(char *bf, size_t size,
377 struct syscall_arg *arg)
382 if (mode == F_OK) /* 0 */
383 return scnprintf(bf, size, "F");
385 if (mode & n##_OK) { \
386 printed += scnprintf(bf + printed, size - printed, "%s", #n); \
396 printed += scnprintf(bf + printed, size - printed, "|%#x", mode);
401 #define SCA_ACCMODE syscall_arg__scnprintf_access_mode
403 static size_t syscall_arg__scnprintf_open_flags(char *bf, size_t size,
404 struct syscall_arg *arg)
406 int printed = 0, flags = arg->val;
408 if (!(flags & O_CREAT))
409 arg->mask |= 1 << (arg->idx + 1); /* Mask the mode parm */
412 return scnprintf(bf, size, "RDONLY");
414 if (flags & O_##n) { \
415 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
439 if ((flags & O_SYNC) == O_SYNC)
440 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", "SYNC");
452 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
457 #define SCA_OPEN_FLAGS syscall_arg__scnprintf_open_flags
459 static size_t syscall_arg__scnprintf_signum(char *bf, size_t size, struct syscall_arg *arg)
464 #define P_SIGNUM(n) case SIG##n: return scnprintf(bf, size, #n)
499 return scnprintf(bf, size, "%#x", sig);
502 #define SCA_SIGNUM syscall_arg__scnprintf_signum
504 static struct syscall_fmt {
507 size_t (*arg_scnprintf[6])(char *bf, size_t size, struct syscall_arg *arg);
513 { .name = "access", .errmsg = true,
514 .arg_scnprintf = { [1] = SCA_ACCMODE, /* mode */ }, },
515 { .name = "arch_prctl", .errmsg = true, .alias = "prctl", },
516 { .name = "brk", .hexret = true,
517 .arg_scnprintf = { [0] = SCA_HEX, /* brk */ }, },
518 { .name = "connect", .errmsg = true, },
519 { .name = "fcntl", .errmsg = true,
520 .arg_scnprintf = { [1] = SCA_STRARRAY, /* cmd */ },
521 .arg_parm = { [1] = &strarray__fcntl_cmds, /* cmd */ }, },
522 { .name = "fstat", .errmsg = true, .alias = "newfstat", },
523 { .name = "fstatat", .errmsg = true, .alias = "newfstatat", },
524 { .name = "futex", .errmsg = true,
525 .arg_scnprintf = { [1] = SCA_FUTEX_OP, /* op */ }, },
526 { .name = "getitimer", .errmsg = true,
527 .arg_scnprintf = { [0] = SCA_STRARRAY, /* which */ },
528 .arg_parm = { [0] = &strarray__itimers, /* which */ }, },
529 { .name = "getrlimit", .errmsg = true,
530 .arg_scnprintf = { [0] = SCA_STRARRAY, /* resource */ },
531 .arg_parm = { [0] = &strarray__rlimit_resources, /* resource */ }, },
532 { .name = "ioctl", .errmsg = true,
533 .arg_scnprintf = { [2] = SCA_HEX, /* arg */ }, },
534 { .name = "kill", .errmsg = true,
535 .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
536 { .name = "lseek", .errmsg = true,
537 .arg_scnprintf = { [2] = SCA_STRARRAY, /* whence */ },
538 .arg_parm = { [2] = &strarray__whences, /* whence */ }, },
539 { .name = "lstat", .errmsg = true, .alias = "newlstat", },
540 { .name = "madvise", .errmsg = true,
541 .arg_scnprintf = { [0] = SCA_HEX, /* start */
542 [2] = SCA_MADV_BHV, /* behavior */ }, },
543 { .name = "mmap", .hexret = true,
544 .arg_scnprintf = { [0] = SCA_HEX, /* addr */
545 [2] = SCA_MMAP_PROT, /* prot */
546 [3] = SCA_MMAP_FLAGS, /* flags */ }, },
547 { .name = "mprotect", .errmsg = true,
548 .arg_scnprintf = { [0] = SCA_HEX, /* start */
549 [2] = SCA_MMAP_PROT, /* prot */ }, },
550 { .name = "mremap", .hexret = true,
551 .arg_scnprintf = { [0] = SCA_HEX, /* addr */
552 [4] = SCA_HEX, /* new_addr */ }, },
553 { .name = "munmap", .errmsg = true,
554 .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
555 { .name = "open", .errmsg = true,
556 .arg_scnprintf = { [1] = SCA_OPEN_FLAGS, /* flags */ }, },
557 { .name = "open_by_handle_at", .errmsg = true,
558 .arg_scnprintf = { [2] = SCA_OPEN_FLAGS, /* flags */ }, },
559 { .name = "openat", .errmsg = true,
560 .arg_scnprintf = { [2] = SCA_OPEN_FLAGS, /* flags */ }, },
561 { .name = "poll", .errmsg = true, .timeout = true, },
562 { .name = "ppoll", .errmsg = true, .timeout = true, },
563 { .name = "pread", .errmsg = true, .alias = "pread64", },
564 { .name = "prlimit64", .errmsg = true,
565 .arg_scnprintf = { [1] = SCA_STRARRAY, /* resource */ },
566 .arg_parm = { [1] = &strarray__rlimit_resources, /* resource */ }, },
567 { .name = "pwrite", .errmsg = true, .alias = "pwrite64", },
568 { .name = "read", .errmsg = true, },
569 { .name = "recvfrom", .errmsg = true,
570 .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
571 { .name = "recvmmsg", .errmsg = true,
572 .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
573 { .name = "recvmsg", .errmsg = true,
574 .arg_scnprintf = { [2] = SCA_MSG_FLAGS, /* flags */ }, },
575 { .name = "rt_sigaction", .errmsg = true,
576 .arg_scnprintf = { [0] = SCA_SIGNUM, /* sig */ }, },
577 { .name = "rt_sigprocmask", .errmsg = true,
578 .arg_scnprintf = { [0] = SCA_STRARRAY, /* how */ },
579 .arg_parm = { [0] = &strarray__sighow, /* how */ }, },
580 { .name = "rt_sigqueueinfo", .errmsg = true,
581 .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
582 { .name = "rt_tgsigqueueinfo", .errmsg = true,
583 .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, },
584 { .name = "select", .errmsg = true, .timeout = true, },
585 { .name = "sendmmsg", .errmsg = true,
586 .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
587 { .name = "sendmsg", .errmsg = true,
588 .arg_scnprintf = { [2] = SCA_MSG_FLAGS, /* flags */ }, },
589 { .name = "sendto", .errmsg = true,
590 .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
591 { .name = "setitimer", .errmsg = true,
592 .arg_scnprintf = { [0] = SCA_STRARRAY, /* which */ },
593 .arg_parm = { [0] = &strarray__itimers, /* which */ }, },
594 { .name = "setrlimit", .errmsg = true,
595 .arg_scnprintf = { [0] = SCA_STRARRAY, /* resource */ },
596 .arg_parm = { [0] = &strarray__rlimit_resources, /* resource */ }, },
597 { .name = "socket", .errmsg = true,
598 .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */
599 [1] = SCA_SK_TYPE, /* type */ },
600 .arg_parm = { [0] = &strarray__socket_families, /* family */ }, },
601 { .name = "stat", .errmsg = true, .alias = "newstat", },
602 { .name = "tgkill", .errmsg = true,
603 .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, },
604 { .name = "tkill", .errmsg = true,
605 .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
606 { .name = "uname", .errmsg = true, .alias = "newuname", },
609 static int syscall_fmt__cmp(const void *name, const void *fmtp)
611 const struct syscall_fmt *fmt = fmtp;
612 return strcmp(name, fmt->name);
615 static struct syscall_fmt *syscall_fmt__find(const char *name)
617 const int nmemb = ARRAY_SIZE(syscall_fmts);
618 return bsearch(name, syscall_fmts, nmemb, sizeof(struct syscall_fmt), syscall_fmt__cmp);
622 struct event_format *tp_format;
625 struct syscall_fmt *fmt;
626 size_t (**arg_scnprintf)(char *bf, size_t size, struct syscall_arg *arg);
630 static size_t fprintf_duration(unsigned long t, FILE *fp)
632 double duration = (double)t / NSEC_PER_MSEC;
633 size_t printed = fprintf(fp, "(");
636 printed += color_fprintf(fp, PERF_COLOR_RED, "%6.3f ms", duration);
637 else if (duration >= 0.01)
638 printed += color_fprintf(fp, PERF_COLOR_YELLOW, "%6.3f ms", duration);
640 printed += color_fprintf(fp, PERF_COLOR_NORMAL, "%6.3f ms", duration);
641 return printed + fprintf(fp, "): ");
644 struct thread_trace {
648 unsigned long nr_events;
653 static struct thread_trace *thread_trace__new(void)
655 return zalloc(sizeof(struct thread_trace));
658 static struct thread_trace *thread__trace(struct thread *thread, FILE *fp)
660 struct thread_trace *ttrace;
665 if (thread->priv == NULL)
666 thread->priv = thread_trace__new();
668 if (thread->priv == NULL)
671 ttrace = thread->priv;
676 color_fprintf(fp, PERF_COLOR_RED,
677 "WARNING: not enough memory, dropping samples!\n");
682 struct perf_tool tool;
686 struct syscall *table;
688 struct perf_record_opts opts;
693 unsigned long nr_events;
694 struct strlist *ev_qualifier;
695 bool not_ev_qualifier;
696 struct intlist *tid_list;
697 struct intlist *pid_list;
699 bool multiple_threads;
700 double duration_filter;
704 static bool trace__filter_duration(struct trace *trace, double t)
706 return t < (trace->duration_filter * NSEC_PER_MSEC);
709 static size_t trace__fprintf_tstamp(struct trace *trace, u64 tstamp, FILE *fp)
711 double ts = (double)(tstamp - trace->base_time) / NSEC_PER_MSEC;
713 return fprintf(fp, "%10.3f ", ts);
716 static bool done = false;
718 static void sig_handler(int sig __maybe_unused)
723 static size_t trace__fprintf_entry_head(struct trace *trace, struct thread *thread,
724 u64 duration, u64 tstamp, FILE *fp)
726 size_t printed = trace__fprintf_tstamp(trace, tstamp, fp);
727 printed += fprintf_duration(duration, fp);
729 if (trace->multiple_threads)
730 printed += fprintf(fp, "%d ", thread->tid);
735 static int trace__process_event(struct trace *trace, struct machine *machine,
736 union perf_event *event)
740 switch (event->header.type) {
741 case PERF_RECORD_LOST:
742 color_fprintf(trace->output, PERF_COLOR_RED,
743 "LOST %" PRIu64 " events!\n", event->lost.lost);
744 ret = machine__process_lost_event(machine, event);
746 ret = machine__process_event(machine, event);
753 static int trace__tool_process(struct perf_tool *tool,
754 union perf_event *event,
755 struct perf_sample *sample __maybe_unused,
756 struct machine *machine)
758 struct trace *trace = container_of(tool, struct trace, tool);
759 return trace__process_event(trace, machine, event);
762 static int trace__symbols_init(struct trace *trace, struct perf_evlist *evlist)
764 int err = symbol__init();
769 machine__init(&trace->host, "", HOST_KERNEL_ID);
770 machine__create_kernel_maps(&trace->host);
772 if (perf_target__has_task(&trace->opts.target)) {
773 err = perf_event__synthesize_thread_map(&trace->tool, evlist->threads,
777 err = perf_event__synthesize_threads(&trace->tool, trace__tool_process,
787 static int syscall__set_arg_fmts(struct syscall *sc)
789 struct format_field *field;
792 sc->arg_scnprintf = calloc(sc->tp_format->format.nr_fields - 1, sizeof(void *));
793 if (sc->arg_scnprintf == NULL)
797 sc->arg_parm = sc->fmt->arg_parm;
799 for (field = sc->tp_format->format.fields->next; field; field = field->next) {
800 if (sc->fmt && sc->fmt->arg_scnprintf[idx])
801 sc->arg_scnprintf[idx] = sc->fmt->arg_scnprintf[idx];
802 else if (field->flags & FIELD_IS_POINTER)
803 sc->arg_scnprintf[idx] = syscall_arg__scnprintf_hex;
810 static int trace__read_syscall_info(struct trace *trace, int id)
814 const char *name = audit_syscall_to_name(id, trace->audit_machine);
819 if (id > trace->syscalls.max) {
820 struct syscall *nsyscalls = realloc(trace->syscalls.table, (id + 1) * sizeof(*sc));
822 if (nsyscalls == NULL)
825 if (trace->syscalls.max != -1) {
826 memset(nsyscalls + trace->syscalls.max + 1, 0,
827 (id - trace->syscalls.max) * sizeof(*sc));
829 memset(nsyscalls, 0, (id + 1) * sizeof(*sc));
832 trace->syscalls.table = nsyscalls;
833 trace->syscalls.max = id;
836 sc = trace->syscalls.table + id;
839 if (trace->ev_qualifier) {
840 bool in = strlist__find(trace->ev_qualifier, name) != NULL;
842 if (!(in ^ trace->not_ev_qualifier)) {
845 * No need to do read tracepoint information since this will be
852 sc->fmt = syscall_fmt__find(sc->name);
854 snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->name);
855 sc->tp_format = event_format__new("syscalls", tp_name);
857 if (sc->tp_format == NULL && sc->fmt && sc->fmt->alias) {
858 snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->fmt->alias);
859 sc->tp_format = event_format__new("syscalls", tp_name);
862 if (sc->tp_format == NULL)
865 return syscall__set_arg_fmts(sc);
868 static size_t syscall__scnprintf_args(struct syscall *sc, char *bf, size_t size,
873 if (sc->tp_format != NULL) {
874 struct format_field *field;
876 struct syscall_arg arg = {
881 for (field = sc->tp_format->format.fields->next; field;
882 field = field->next, ++arg.idx, bit <<= 1) {
886 if (args[arg.idx] == 0)
889 printed += scnprintf(bf + printed, size - printed,
890 "%s%s: ", printed ? ", " : "", field->name);
891 if (sc->arg_scnprintf && sc->arg_scnprintf[arg.idx]) {
892 arg.val = args[arg.idx];
894 arg.parm = sc->arg_parm[arg.idx];
895 printed += sc->arg_scnprintf[arg.idx](bf + printed,
896 size - printed, &arg);
898 printed += scnprintf(bf + printed, size - printed,
899 "%ld", args[arg.idx]);
906 printed += scnprintf(bf + printed, size - printed,
908 printed ? ", " : "", i, args[i]);
916 typedef int (*tracepoint_handler)(struct trace *trace, struct perf_evsel *evsel,
917 struct perf_sample *sample);
919 static struct syscall *trace__syscall_info(struct trace *trace,
920 struct perf_evsel *evsel,
921 struct perf_sample *sample)
923 int id = perf_evsel__intval(evsel, sample, "id");
928 * XXX: Noticed on x86_64, reproduced as far back as 3.0.36, haven't tried
929 * before that, leaving at a higher verbosity level till that is
930 * explained. Reproduced with plain ftrace with:
932 * echo 1 > /t/events/raw_syscalls/sys_exit/enable
933 * grep "NR -1 " /t/trace_pipe
935 * After generating some load on the machine.
939 fprintf(trace->output, "Invalid syscall %d id, skipping (%s, %" PRIu64 ") ...\n",
940 id, perf_evsel__name(evsel), ++n);
945 if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL) &&
946 trace__read_syscall_info(trace, id))
949 if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL))
952 return &trace->syscalls.table[id];
956 fprintf(trace->output, "Problems reading syscall %d", id);
957 if (id <= trace->syscalls.max && trace->syscalls.table[id].name != NULL)
958 fprintf(trace->output, "(%s)", trace->syscalls.table[id].name);
959 fputs(" information\n", trace->output);
964 static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel,
965 struct perf_sample *sample)
970 struct thread *thread;
971 struct syscall *sc = trace__syscall_info(trace, evsel, sample);
972 struct thread_trace *ttrace;
980 thread = machine__findnew_thread(&trace->host, sample->pid,
982 ttrace = thread__trace(thread, trace->output);
986 args = perf_evsel__rawptr(evsel, sample, "args");
988 fprintf(trace->output, "Problems reading syscall arguments\n");
992 ttrace = thread->priv;
994 if (ttrace->entry_str == NULL) {
995 ttrace->entry_str = malloc(1024);
996 if (!ttrace->entry_str)
1000 ttrace->entry_time = sample->time;
1001 msg = ttrace->entry_str;
1002 printed += scnprintf(msg + printed, 1024 - printed, "%s(", sc->name);
1004 printed += syscall__scnprintf_args(sc, msg + printed, 1024 - printed, args);
1006 if (!strcmp(sc->name, "exit_group") || !strcmp(sc->name, "exit")) {
1007 if (!trace->duration_filter) {
1008 trace__fprintf_entry_head(trace, thread, 1, sample->time, trace->output);
1009 fprintf(trace->output, "%-70s\n", ttrace->entry_str);
1012 ttrace->entry_pending = true;
1017 static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel,
1018 struct perf_sample *sample)
1022 struct thread *thread;
1023 struct syscall *sc = trace__syscall_info(trace, evsel, sample);
1024 struct thread_trace *ttrace;
1032 thread = machine__findnew_thread(&trace->host, sample->pid,
1034 ttrace = thread__trace(thread, trace->output);
1038 ret = perf_evsel__intval(evsel, sample, "ret");
1040 ttrace = thread->priv;
1042 ttrace->exit_time = sample->time;
1044 if (ttrace->entry_time) {
1045 duration = sample->time - ttrace->entry_time;
1046 if (trace__filter_duration(trace, duration))
1048 } else if (trace->duration_filter)
1051 trace__fprintf_entry_head(trace, thread, duration, sample->time, trace->output);
1053 if (ttrace->entry_pending) {
1054 fprintf(trace->output, "%-70s", ttrace->entry_str);
1056 fprintf(trace->output, " ... [");
1057 color_fprintf(trace->output, PERF_COLOR_YELLOW, "continued");
1058 fprintf(trace->output, "]: %s()", sc->name);
1061 if (sc->fmt == NULL) {
1063 fprintf(trace->output, ") = %d", ret);
1064 } else if (ret < 0 && sc->fmt->errmsg) {
1066 const char *emsg = strerror_r(-ret, bf, sizeof(bf)),
1067 *e = audit_errno_to_name(-ret);
1069 fprintf(trace->output, ") = -1 %s %s", e, emsg);
1070 } else if (ret == 0 && sc->fmt->timeout)
1071 fprintf(trace->output, ") = 0 Timeout");
1072 else if (sc->fmt->hexret)
1073 fprintf(trace->output, ") = %#x", ret);
1077 fputc('\n', trace->output);
1079 ttrace->entry_pending = false;
1084 static int trace__sched_stat_runtime(struct trace *trace, struct perf_evsel *evsel,
1085 struct perf_sample *sample)
1087 u64 runtime = perf_evsel__intval(evsel, sample, "runtime");
1088 double runtime_ms = (double)runtime / NSEC_PER_MSEC;
1089 struct thread *thread = machine__findnew_thread(&trace->host,
1092 struct thread_trace *ttrace = thread__trace(thread, trace->output);
1097 ttrace->runtime_ms += runtime_ms;
1098 trace->runtime_ms += runtime_ms;
1102 fprintf(trace->output, "%s: comm=%s,pid=%u,runtime=%" PRIu64 ",vruntime=%" PRIu64 ")\n",
1104 perf_evsel__strval(evsel, sample, "comm"),
1105 (pid_t)perf_evsel__intval(evsel, sample, "pid"),
1107 perf_evsel__intval(evsel, sample, "vruntime"));
1111 static bool skip_sample(struct trace *trace, struct perf_sample *sample)
1113 if ((trace->pid_list && intlist__find(trace->pid_list, sample->pid)) ||
1114 (trace->tid_list && intlist__find(trace->tid_list, sample->tid)))
1117 if (trace->pid_list || trace->tid_list)
1123 static int trace__process_sample(struct perf_tool *tool,
1124 union perf_event *event __maybe_unused,
1125 struct perf_sample *sample,
1126 struct perf_evsel *evsel,
1127 struct machine *machine __maybe_unused)
1129 struct trace *trace = container_of(tool, struct trace, tool);
1132 tracepoint_handler handler = evsel->handler.func;
1134 if (skip_sample(trace, sample))
1137 if (!trace->full_time && trace->base_time == 0)
1138 trace->base_time = sample->time;
1141 handler(trace, evsel, sample);
1147 perf_session__has_tp(struct perf_session *session, const char *name)
1149 struct perf_evsel *evsel;
1151 evsel = perf_evlist__find_tracepoint_by_name(session->evlist, name);
1153 return evsel != NULL;
1156 static int parse_target_str(struct trace *trace)
1158 if (trace->opts.target.pid) {
1159 trace->pid_list = intlist__new(trace->opts.target.pid);
1160 if (trace->pid_list == NULL) {
1161 pr_err("Error parsing process id string\n");
1166 if (trace->opts.target.tid) {
1167 trace->tid_list = intlist__new(trace->opts.target.tid);
1168 if (trace->tid_list == NULL) {
1169 pr_err("Error parsing thread id string\n");
1177 static int trace__run(struct trace *trace, int argc, const char **argv)
1179 struct perf_evlist *evlist = perf_evlist__new();
1180 struct perf_evsel *evsel;
1182 unsigned long before;
1183 const bool forks = argc > 0;
1185 if (evlist == NULL) {
1186 fprintf(trace->output, "Not enough memory to run!\n");
1190 if (perf_evlist__add_newtp(evlist, "raw_syscalls", "sys_enter", trace__sys_enter) ||
1191 perf_evlist__add_newtp(evlist, "raw_syscalls", "sys_exit", trace__sys_exit)) {
1192 fprintf(trace->output, "Couldn't read the raw_syscalls tracepoints information!\n");
1193 goto out_delete_evlist;
1197 perf_evlist__add_newtp(evlist, "sched", "sched_stat_runtime",
1198 trace__sched_stat_runtime)) {
1199 fprintf(trace->output, "Couldn't read the sched_stat_runtime tracepoint information!\n");
1200 goto out_delete_evlist;
1203 err = perf_evlist__create_maps(evlist, &trace->opts.target);
1205 fprintf(trace->output, "Problems parsing the target to trace, check your options!\n");
1206 goto out_delete_evlist;
1209 err = trace__symbols_init(trace, evlist);
1211 fprintf(trace->output, "Problems initializing symbol libraries!\n");
1212 goto out_delete_maps;
1215 perf_evlist__config(evlist, &trace->opts);
1217 signal(SIGCHLD, sig_handler);
1218 signal(SIGINT, sig_handler);
1221 err = perf_evlist__prepare_workload(evlist, &trace->opts.target,
1222 argv, false, false);
1224 fprintf(trace->output, "Couldn't run the workload!\n");
1225 goto out_delete_maps;
1229 err = perf_evlist__open(evlist);
1231 fprintf(trace->output, "Couldn't create the events: %s\n", strerror(errno));
1232 goto out_delete_maps;
1235 err = perf_evlist__mmap(evlist, UINT_MAX, false);
1237 fprintf(trace->output, "Couldn't mmap the events: %s\n", strerror(errno));
1238 goto out_close_evlist;
1241 perf_evlist__enable(evlist);
1244 perf_evlist__start_workload(evlist);
1246 trace->multiple_threads = evlist->threads->map[0] == -1 || evlist->threads->nr > 1;
1248 before = trace->nr_events;
1250 for (i = 0; i < evlist->nr_mmaps; i++) {
1251 union perf_event *event;
1253 while ((event = perf_evlist__mmap_read(evlist, i)) != NULL) {
1254 const u32 type = event->header.type;
1255 tracepoint_handler handler;
1256 struct perf_sample sample;
1260 err = perf_evlist__parse_sample(evlist, event, &sample);
1262 fprintf(trace->output, "Can't parse sample, err = %d, skipping...\n", err);
1266 if (!trace->full_time && trace->base_time == 0)
1267 trace->base_time = sample.time;
1269 if (type != PERF_RECORD_SAMPLE) {
1270 trace__process_event(trace, &trace->host, event);
1274 evsel = perf_evlist__id2evsel(evlist, sample.id);
1275 if (evsel == NULL) {
1276 fprintf(trace->output, "Unknown tp ID %" PRIu64 ", skipping...\n", sample.id);
1280 if (sample.raw_data == NULL) {
1281 fprintf(trace->output, "%s sample with no payload for tid: %d, cpu %d, raw_size=%d, skipping...\n",
1282 perf_evsel__name(evsel), sample.tid,
1283 sample.cpu, sample.raw_size);
1287 handler = evsel->handler.func;
1288 handler(trace, evsel, &sample);
1291 goto out_unmap_evlist;
1295 if (trace->nr_events == before) {
1297 goto out_unmap_evlist;
1299 poll(evlist->pollfd, evlist->nr_fds, -1);
1303 perf_evlist__disable(evlist);
1308 perf_evlist__munmap(evlist);
1310 perf_evlist__close(evlist);
1312 perf_evlist__delete_maps(evlist);
1314 perf_evlist__delete(evlist);
1319 static int trace__replay(struct trace *trace)
1321 const struct perf_evsel_str_handler handlers[] = {
1322 { "raw_syscalls:sys_enter", trace__sys_enter, },
1323 { "raw_syscalls:sys_exit", trace__sys_exit, },
1326 struct perf_session *session;
1329 trace->tool.sample = trace__process_sample;
1330 trace->tool.mmap = perf_event__process_mmap;
1331 trace->tool.mmap2 = perf_event__process_mmap2;
1332 trace->tool.comm = perf_event__process_comm;
1333 trace->tool.exit = perf_event__process_exit;
1334 trace->tool.fork = perf_event__process_fork;
1335 trace->tool.attr = perf_event__process_attr;
1336 trace->tool.tracing_data = perf_event__process_tracing_data;
1337 trace->tool.build_id = perf_event__process_build_id;
1339 trace->tool.ordered_samples = true;
1340 trace->tool.ordering_requires_timestamps = true;
1342 /* add tid to output */
1343 trace->multiple_threads = true;
1345 if (symbol__init() < 0)
1348 session = perf_session__new(input_name, O_RDONLY, 0, false,
1350 if (session == NULL)
1353 err = perf_session__set_tracepoints_handlers(session, handlers);
1357 if (!perf_session__has_tp(session, "raw_syscalls:sys_enter")) {
1358 pr_err("Data file does not have raw_syscalls:sys_enter events\n");
1362 if (!perf_session__has_tp(session, "raw_syscalls:sys_exit")) {
1363 pr_err("Data file does not have raw_syscalls:sys_exit events\n");
1367 err = parse_target_str(trace);
1373 err = perf_session__process_events(session, &trace->tool);
1375 pr_err("Failed to process events, error %d", err);
1378 perf_session__delete(session);
1383 static size_t trace__fprintf_threads_header(FILE *fp)
1387 printed = fprintf(fp, "\n _____________________________________________________________________\n");
1388 printed += fprintf(fp," __) Summary of events (__\n\n");
1389 printed += fprintf(fp," [ task - pid ] [ events ] [ ratio ] [ runtime ]\n");
1390 printed += fprintf(fp," _____________________________________________________________________\n\n");
1395 static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp)
1397 size_t printed = trace__fprintf_threads_header(fp);
1400 for (nd = rb_first(&trace->host.threads); nd; nd = rb_next(nd)) {
1401 struct thread *thread = rb_entry(nd, struct thread, rb_node);
1402 struct thread_trace *ttrace = thread->priv;
1409 ratio = (double)ttrace->nr_events / trace->nr_events * 100.0;
1411 color = PERF_COLOR_NORMAL;
1413 color = PERF_COLOR_RED;
1414 else if (ratio > 25.0)
1415 color = PERF_COLOR_GREEN;
1416 else if (ratio > 5.0)
1417 color = PERF_COLOR_YELLOW;
1419 printed += color_fprintf(fp, color, "%20s", thread->comm);
1420 printed += fprintf(fp, " - %-5d :%11lu [", thread->tid, ttrace->nr_events);
1421 printed += color_fprintf(fp, color, "%5.1f%%", ratio);
1422 printed += fprintf(fp, " ] %10.3f ms\n", ttrace->runtime_ms);
1428 static int trace__set_duration(const struct option *opt, const char *str,
1429 int unset __maybe_unused)
1431 struct trace *trace = opt->value;
1433 trace->duration_filter = atof(str);
1437 static int trace__open_output(struct trace *trace, const char *filename)
1441 if (!stat(filename, &st) && st.st_size) {
1442 char oldname[PATH_MAX];
1444 scnprintf(oldname, sizeof(oldname), "%s.old", filename);
1446 rename(filename, oldname);
1449 trace->output = fopen(filename, "w");
1451 return trace->output == NULL ? -errno : 0;
1454 int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused)
1456 const char * const trace_usage[] = {
1457 "perf trace [<options>] [<command>]",
1458 "perf trace [<options>] -- <command> [<options>]",
1461 struct trace trace = {
1462 .audit_machine = audit_detect_machine(),
1471 .user_freq = UINT_MAX,
1472 .user_interval = ULLONG_MAX,
1478 const char *output_name = NULL;
1479 const char *ev_qualifier_str = NULL;
1480 const struct option trace_options[] = {
1481 OPT_STRING('e', "expr", &ev_qualifier_str, "expr",
1482 "list of events to trace"),
1483 OPT_STRING('o', "output", &output_name, "file", "output file name"),
1484 OPT_STRING('i', "input", &input_name, "file", "Analyze events in file"),
1485 OPT_STRING('p', "pid", &trace.opts.target.pid, "pid",
1486 "trace events on existing process id"),
1487 OPT_STRING('t', "tid", &trace.opts.target.tid, "tid",
1488 "trace events on existing thread id"),
1489 OPT_BOOLEAN('a', "all-cpus", &trace.opts.target.system_wide,
1490 "system-wide collection from all CPUs"),
1491 OPT_STRING('C', "cpu", &trace.opts.target.cpu_list, "cpu",
1492 "list of cpus to monitor"),
1493 OPT_BOOLEAN(0, "no-inherit", &trace.opts.no_inherit,
1494 "child tasks do not inherit counters"),
1495 OPT_UINTEGER('m', "mmap-pages", &trace.opts.mmap_pages,
1496 "number of mmap data pages"),
1497 OPT_STRING('u', "uid", &trace.opts.target.uid_str, "user",
1499 OPT_CALLBACK(0, "duration", &trace, "float",
1500 "show only events with duration > N.M ms",
1501 trace__set_duration),
1502 OPT_BOOLEAN(0, "sched", &trace.sched, "show blocking scheduler events"),
1503 OPT_INCR('v', "verbose", &verbose, "be more verbose"),
1504 OPT_BOOLEAN('T', "time", &trace.full_time,
1505 "Show full timestamp, not time relative to first start"),
1511 argc = parse_options(argc, argv, trace_options, trace_usage, 0);
1513 if (output_name != NULL) {
1514 err = trace__open_output(&trace, output_name);
1516 perror("failed to create output file");
1521 if (ev_qualifier_str != NULL) {
1522 const char *s = ev_qualifier_str;
1524 trace.not_ev_qualifier = *s == '!';
1525 if (trace.not_ev_qualifier)
1527 trace.ev_qualifier = strlist__new(true, s);
1528 if (trace.ev_qualifier == NULL) {
1529 fputs("Not enough memory to parse event qualifier",
1536 err = perf_target__validate(&trace.opts.target);
1538 perf_target__strerror(&trace.opts.target, err, bf, sizeof(bf));
1539 fprintf(trace.output, "%s", bf);
1543 err = perf_target__parse_uid(&trace.opts.target);
1545 perf_target__strerror(&trace.opts.target, err, bf, sizeof(bf));
1546 fprintf(trace.output, "%s", bf);
1550 if (!argc && perf_target__none(&trace.opts.target))
1551 trace.opts.target.system_wide = true;
1554 err = trace__replay(&trace);
1556 err = trace__run(&trace, argc, argv);
1558 if (trace.sched && !err)
1559 trace__fprintf_thread_summary(&trace, trace.output);
1562 if (output_name != NULL)
1563 fclose(trace.output);