1 #include <traceevent/event-parse.h>
3 #include "util/color.h"
4 #include "util/debug.h"
5 #include "util/evlist.h"
6 #include "util/machine.h"
7 #include "util/session.h"
8 #include "util/thread.h"
9 #include "util/parse-options.h"
10 #include "util/strlist.h"
11 #include "util/intlist.h"
12 #include "util/thread_map.h"
17 #include <linux/futex.h>
19 /* For older distros: */
21 # define MAP_STACK 0x20000
25 # define MADV_HWPOISON 100
28 #ifndef MADV_MERGEABLE
29 # define MADV_MERGEABLE 12
32 #ifndef MADV_UNMERGEABLE
33 # define MADV_UNMERGEABLE 13
48 #define DEFINE_STRARRAY(array) struct strarray strarray__##array = { \
49 .nr_entries = ARRAY_SIZE(array), \
53 static size_t syscall_arg__scnprintf_strarray(char *bf, size_t size,
54 struct syscall_arg *arg)
57 struct strarray *sa = arg->parm;
59 if (idx < 0 || idx >= sa->nr_entries)
60 return scnprintf(bf, size, "%d", idx);
62 return scnprintf(bf, size, "%s", sa->entries[idx]);
65 #define SCA_STRARRAY syscall_arg__scnprintf_strarray
67 static size_t syscall_arg__scnprintf_hex(char *bf, size_t size,
68 struct syscall_arg *arg)
70 return scnprintf(bf, size, "%#lx", arg->val);
73 #define SCA_HEX syscall_arg__scnprintf_hex
75 static size_t syscall_arg__scnprintf_mmap_prot(char *bf, size_t size,
76 struct syscall_arg *arg)
78 int printed = 0, prot = arg->val;
80 if (prot == PROT_NONE)
81 return scnprintf(bf, size, "NONE");
82 #define P_MMAP_PROT(n) \
83 if (prot & PROT_##n) { \
84 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
94 P_MMAP_PROT(GROWSDOWN);
99 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", prot);
104 #define SCA_MMAP_PROT syscall_arg__scnprintf_mmap_prot
106 static size_t syscall_arg__scnprintf_mmap_flags(char *bf, size_t size,
107 struct syscall_arg *arg)
109 int printed = 0, flags = arg->val;
111 #define P_MMAP_FLAG(n) \
112 if (flags & MAP_##n) { \
113 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
118 P_MMAP_FLAG(PRIVATE);
122 P_MMAP_FLAG(ANONYMOUS);
123 P_MMAP_FLAG(DENYWRITE);
124 P_MMAP_FLAG(EXECUTABLE);
127 P_MMAP_FLAG(GROWSDOWN);
129 P_MMAP_FLAG(HUGETLB);
132 P_MMAP_FLAG(NONBLOCK);
133 P_MMAP_FLAG(NORESERVE);
134 P_MMAP_FLAG(POPULATE);
136 #ifdef MAP_UNINITIALIZED
137 P_MMAP_FLAG(UNINITIALIZED);
142 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
147 #define SCA_MMAP_FLAGS syscall_arg__scnprintf_mmap_flags
149 static size_t syscall_arg__scnprintf_madvise_behavior(char *bf, size_t size,
150 struct syscall_arg *arg)
152 int behavior = arg->val;
155 #define P_MADV_BHV(n) case MADV_##n: return scnprintf(bf, size, #n)
158 P_MADV_BHV(SEQUENTIAL);
159 P_MADV_BHV(WILLNEED);
160 P_MADV_BHV(DONTNEED);
162 P_MADV_BHV(DONTFORK);
164 P_MADV_BHV(HWPOISON);
165 #ifdef MADV_SOFT_OFFLINE
166 P_MADV_BHV(SOFT_OFFLINE);
168 P_MADV_BHV(MERGEABLE);
169 P_MADV_BHV(UNMERGEABLE);
171 P_MADV_BHV(HUGEPAGE);
173 #ifdef MADV_NOHUGEPAGE
174 P_MADV_BHV(NOHUGEPAGE);
177 P_MADV_BHV(DONTDUMP);
186 return scnprintf(bf, size, "%#x", behavior);
189 #define SCA_MADV_BHV syscall_arg__scnprintf_madvise_behavior
191 static size_t syscall_arg__scnprintf_futex_op(char *bf, size_t size, struct syscall_arg *arg)
193 enum syscall_futex_args {
194 SCF_UADDR = (1 << 0),
197 SCF_TIMEOUT = (1 << 3),
198 SCF_UADDR2 = (1 << 4),
202 int cmd = op & FUTEX_CMD_MASK;
206 #define P_FUTEX_OP(n) case FUTEX_##n: printed = scnprintf(bf, size, #n);
207 P_FUTEX_OP(WAIT); arg->mask |= SCF_VAL3|SCF_UADDR2; break;
208 P_FUTEX_OP(WAKE); arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
209 P_FUTEX_OP(FD); arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
210 P_FUTEX_OP(REQUEUE); arg->mask |= SCF_VAL3|SCF_TIMEOUT; break;
211 P_FUTEX_OP(CMP_REQUEUE); arg->mask |= SCF_TIMEOUT; break;
212 P_FUTEX_OP(CMP_REQUEUE_PI); arg->mask |= SCF_TIMEOUT; break;
213 P_FUTEX_OP(WAKE_OP); break;
214 P_FUTEX_OP(LOCK_PI); arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
215 P_FUTEX_OP(UNLOCK_PI); arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
216 P_FUTEX_OP(TRYLOCK_PI); arg->mask |= SCF_VAL3|SCF_UADDR2; break;
217 P_FUTEX_OP(WAIT_BITSET); arg->mask |= SCF_UADDR2; break;
218 P_FUTEX_OP(WAKE_BITSET); arg->mask |= SCF_UADDR2; break;
219 P_FUTEX_OP(WAIT_REQUEUE_PI); break;
220 default: printed = scnprintf(bf, size, "%#x", cmd); break;
223 if (op & FUTEX_PRIVATE_FLAG)
224 printed += scnprintf(bf + printed, size - printed, "|PRIV");
226 if (op & FUTEX_CLOCK_REALTIME)
227 printed += scnprintf(bf + printed, size - printed, "|CLKRT");
232 #define SCA_FUTEX_OP syscall_arg__scnprintf_futex_op
234 static const char *itimers[] = { "REAL", "VIRTUAL", "PROF", };
235 static DEFINE_STRARRAY(itimers);
237 static const char *whences[] = { "SET", "CUR", "END",
245 static DEFINE_STRARRAY(whences);
247 static const char *fcntl_cmds[] = {
248 "DUPFD", "GETFD", "SETFD", "GETFL", "SETFL", "GETLK", "SETLK",
249 "SETLKW", "SETOWN", "GETOWN", "SETSIG", "GETSIG", "F_GETLK64",
250 "F_SETLK64", "F_SETLKW64", "F_SETOWN_EX", "F_GETOWN_EX",
253 static DEFINE_STRARRAY(fcntl_cmds);
255 static const char *rlimit_resources[] = {
256 "CPU", "FSIZE", "DATA", "STACK", "CORE", "RSS", "NPROC", "NOFILE",
257 "MEMLOCK", "AS", "LOCKS", "SIGPENDING", "MSGQUEUE", "NICE", "RTPRIO",
260 static DEFINE_STRARRAY(rlimit_resources);
262 static const char *sighow[] = { "BLOCK", "UNBLOCK", "SETMASK", };
263 static DEFINE_STRARRAY(sighow);
265 static const char *socket_families[] = {
266 "UNSPEC", "LOCAL", "INET", "AX25", "IPX", "APPLETALK", "NETROM",
267 "BRIDGE", "ATMPVC", "X25", "INET6", "ROSE", "DECnet", "NETBEUI",
268 "SECURITY", "KEY", "NETLINK", "PACKET", "ASH", "ECONET", "ATMSVC",
269 "RDS", "SNA", "IRDA", "PPPOX", "WANPIPE", "LLC", "IB", "CAN", "TIPC",
270 "BLUETOOTH", "IUCV", "RXRPC", "ISDN", "PHONET", "IEEE802154", "CAIF",
271 "ALG", "NFC", "VSOCK",
273 static DEFINE_STRARRAY(socket_families);
275 #ifndef SOCK_TYPE_MASK
276 #define SOCK_TYPE_MASK 0xf
279 static size_t syscall_arg__scnprintf_socket_type(char *bf, size_t size,
280 struct syscall_arg *arg)
284 flags = type & ~SOCK_TYPE_MASK;
286 type &= SOCK_TYPE_MASK;
288 * Can't use a strarray, MIPS may override for ABI reasons.
291 #define P_SK_TYPE(n) case SOCK_##n: printed = scnprintf(bf, size, #n); break;
296 P_SK_TYPE(SEQPACKET);
301 printed = scnprintf(bf, size, "%#x", type);
304 #define P_SK_FLAG(n) \
305 if (flags & SOCK_##n) { \
306 printed += scnprintf(bf + printed, size - printed, "|%s", #n); \
307 flags &= ~SOCK_##n; \
315 printed += scnprintf(bf + printed, size - printed, "|%#x", flags);
320 #define SCA_SK_TYPE syscall_arg__scnprintf_socket_type
322 static size_t syscall_arg__scnprintf_access_mode(char *bf, size_t size,
323 struct syscall_arg *arg)
328 if (mode == F_OK) /* 0 */
329 return scnprintf(bf, size, "F");
331 if (mode & n##_OK) { \
332 printed += scnprintf(bf + printed, size - printed, "%s", #n); \
342 printed += scnprintf(bf + printed, size - printed, "|%#x", mode);
347 #define SCA_ACCMODE syscall_arg__scnprintf_access_mode
349 static size_t syscall_arg__scnprintf_open_flags(char *bf, size_t size,
350 struct syscall_arg *arg)
352 int printed = 0, flags = arg->val;
354 if (!(flags & O_CREAT))
355 arg->mask |= 1 << (arg->idx + 1); /* Mask the mode parm */
358 return scnprintf(bf, size, "RDONLY");
360 if (flags & O_##n) { \
361 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
385 if ((flags & O_SYNC) == O_SYNC)
386 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", "SYNC");
398 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
403 #define SCA_OPEN_FLAGS syscall_arg__scnprintf_open_flags
405 static size_t syscall_arg__scnprintf_signum(char *bf, size_t size, struct syscall_arg *arg)
410 #define P_SIGNUM(n) case SIG##n: return scnprintf(bf, size, #n)
445 return scnprintf(bf, size, "%#x", sig);
448 #define SCA_SIGNUM syscall_arg__scnprintf_signum
450 static struct syscall_fmt {
453 size_t (*arg_scnprintf[6])(char *bf, size_t size, struct syscall_arg *arg);
459 { .name = "access", .errmsg = true,
460 .arg_scnprintf = { [1] = SCA_ACCMODE, /* mode */ }, },
461 { .name = "arch_prctl", .errmsg = true, .alias = "prctl", },
462 { .name = "brk", .hexret = true,
463 .arg_scnprintf = { [0] = SCA_HEX, /* brk */ }, },
464 { .name = "connect", .errmsg = true, },
465 { .name = "fcntl", .errmsg = true,
466 .arg_scnprintf = { [1] = SCA_STRARRAY, /* cmd */ },
467 .arg_parm = { [1] = &strarray__fcntl_cmds, /* cmd */ }, },
468 { .name = "fstat", .errmsg = true, .alias = "newfstat", },
469 { .name = "fstatat", .errmsg = true, .alias = "newfstatat", },
470 { .name = "futex", .errmsg = true,
471 .arg_scnprintf = { [1] = SCA_FUTEX_OP, /* op */ }, },
472 { .name = "getitimer", .errmsg = true,
473 .arg_scnprintf = { [0] = SCA_STRARRAY, /* which */ },
474 .arg_parm = { [0] = &strarray__itimers, /* which */ }, },
475 { .name = "getrlimit", .errmsg = true,
476 .arg_scnprintf = { [0] = SCA_STRARRAY, /* resource */ },
477 .arg_parm = { [0] = &strarray__rlimit_resources, /* resource */ }, },
478 { .name = "ioctl", .errmsg = true,
479 .arg_scnprintf = { [2] = SCA_HEX, /* arg */ }, },
480 { .name = "kill", .errmsg = true,
481 .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
482 { .name = "lseek", .errmsg = true,
483 .arg_scnprintf = { [2] = SCA_STRARRAY, /* whence */ },
484 .arg_parm = { [2] = &strarray__whences, /* whence */ }, },
485 { .name = "lstat", .errmsg = true, .alias = "newlstat", },
486 { .name = "madvise", .errmsg = true,
487 .arg_scnprintf = { [0] = SCA_HEX, /* start */
488 [2] = SCA_MADV_BHV, /* behavior */ }, },
489 { .name = "mmap", .hexret = true,
490 .arg_scnprintf = { [0] = SCA_HEX, /* addr */
491 [2] = SCA_MMAP_PROT, /* prot */
492 [3] = SCA_MMAP_FLAGS, /* flags */ }, },
493 { .name = "mprotect", .errmsg = true,
494 .arg_scnprintf = { [0] = SCA_HEX, /* start */
495 [2] = SCA_MMAP_PROT, /* prot */ }, },
496 { .name = "mremap", .hexret = true,
497 .arg_scnprintf = { [0] = SCA_HEX, /* addr */
498 [4] = SCA_HEX, /* new_addr */ }, },
499 { .name = "munmap", .errmsg = true,
500 .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
501 { .name = "open", .errmsg = true,
502 .arg_scnprintf = { [1] = SCA_OPEN_FLAGS, /* flags */ }, },
503 { .name = "open_by_handle_at", .errmsg = true,
504 .arg_scnprintf = { [2] = SCA_OPEN_FLAGS, /* flags */ }, },
505 { .name = "openat", .errmsg = true,
506 .arg_scnprintf = { [2] = SCA_OPEN_FLAGS, /* flags */ }, },
507 { .name = "poll", .errmsg = true, .timeout = true, },
508 { .name = "ppoll", .errmsg = true, .timeout = true, },
509 { .name = "pread", .errmsg = true, .alias = "pread64", },
510 { .name = "prlimit64", .errmsg = true,
511 .arg_scnprintf = { [1] = SCA_STRARRAY, /* resource */ },
512 .arg_parm = { [1] = &strarray__rlimit_resources, /* resource */ }, },
513 { .name = "pwrite", .errmsg = true, .alias = "pwrite64", },
514 { .name = "read", .errmsg = true, },
515 { .name = "recvfrom", .errmsg = true, },
516 { .name = "rt_sigaction", .errmsg = true,
517 .arg_scnprintf = { [0] = SCA_SIGNUM, /* sig */ }, },
518 { .name = "rt_sigprocmask", .errmsg = true,
519 .arg_scnprintf = { [0] = SCA_STRARRAY, /* how */ },
520 .arg_parm = { [0] = &strarray__sighow, /* how */ }, },
521 { .name = "rt_sigqueueinfo", .errmsg = true,
522 .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
523 { .name = "rt_tgsigqueueinfo", .errmsg = true,
524 .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, },
525 { .name = "select", .errmsg = true, .timeout = true, },
526 { .name = "setitimer", .errmsg = true,
527 .arg_scnprintf = { [0] = SCA_STRARRAY, /* which */ },
528 .arg_parm = { [0] = &strarray__itimers, /* which */ }, },
529 { .name = "setrlimit", .errmsg = true,
530 .arg_scnprintf = { [0] = SCA_STRARRAY, /* resource */ },
531 .arg_parm = { [0] = &strarray__rlimit_resources, /* resource */ }, },
532 { .name = "socket", .errmsg = true,
533 .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */
534 [1] = SCA_SK_TYPE, /* type */ },
535 .arg_parm = { [0] = &strarray__socket_families, /* family */ }, },
536 { .name = "stat", .errmsg = true, .alias = "newstat", },
537 { .name = "tgkill", .errmsg = true,
538 .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, },
539 { .name = "tkill", .errmsg = true,
540 .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
541 { .name = "uname", .errmsg = true, .alias = "newuname", },
544 static int syscall_fmt__cmp(const void *name, const void *fmtp)
546 const struct syscall_fmt *fmt = fmtp;
547 return strcmp(name, fmt->name);
550 static struct syscall_fmt *syscall_fmt__find(const char *name)
552 const int nmemb = ARRAY_SIZE(syscall_fmts);
553 return bsearch(name, syscall_fmts, nmemb, sizeof(struct syscall_fmt), syscall_fmt__cmp);
557 struct event_format *tp_format;
560 struct syscall_fmt *fmt;
561 size_t (**arg_scnprintf)(char *bf, size_t size, struct syscall_arg *arg);
565 static size_t fprintf_duration(unsigned long t, FILE *fp)
567 double duration = (double)t / NSEC_PER_MSEC;
568 size_t printed = fprintf(fp, "(");
571 printed += color_fprintf(fp, PERF_COLOR_RED, "%6.3f ms", duration);
572 else if (duration >= 0.01)
573 printed += color_fprintf(fp, PERF_COLOR_YELLOW, "%6.3f ms", duration);
575 printed += color_fprintf(fp, PERF_COLOR_NORMAL, "%6.3f ms", duration);
576 return printed + fprintf(fp, "): ");
579 struct thread_trace {
583 unsigned long nr_events;
588 static struct thread_trace *thread_trace__new(void)
590 return zalloc(sizeof(struct thread_trace));
593 static struct thread_trace *thread__trace(struct thread *thread, FILE *fp)
595 struct thread_trace *ttrace;
600 if (thread->priv == NULL)
601 thread->priv = thread_trace__new();
603 if (thread->priv == NULL)
606 ttrace = thread->priv;
611 color_fprintf(fp, PERF_COLOR_RED,
612 "WARNING: not enough memory, dropping samples!\n");
617 struct perf_tool tool;
621 struct syscall *table;
623 struct perf_record_opts opts;
628 unsigned long nr_events;
629 struct strlist *ev_qualifier;
630 bool not_ev_qualifier;
631 struct intlist *tid_list;
632 struct intlist *pid_list;
634 bool multiple_threads;
635 double duration_filter;
639 static bool trace__filter_duration(struct trace *trace, double t)
641 return t < (trace->duration_filter * NSEC_PER_MSEC);
644 static size_t trace__fprintf_tstamp(struct trace *trace, u64 tstamp, FILE *fp)
646 double ts = (double)(tstamp - trace->base_time) / NSEC_PER_MSEC;
648 return fprintf(fp, "%10.3f ", ts);
651 static bool done = false;
653 static void sig_handler(int sig __maybe_unused)
658 static size_t trace__fprintf_entry_head(struct trace *trace, struct thread *thread,
659 u64 duration, u64 tstamp, FILE *fp)
661 size_t printed = trace__fprintf_tstamp(trace, tstamp, fp);
662 printed += fprintf_duration(duration, fp);
664 if (trace->multiple_threads)
665 printed += fprintf(fp, "%d ", thread->tid);
670 static int trace__process_event(struct trace *trace, struct machine *machine,
671 union perf_event *event)
675 switch (event->header.type) {
676 case PERF_RECORD_LOST:
677 color_fprintf(trace->output, PERF_COLOR_RED,
678 "LOST %" PRIu64 " events!\n", event->lost.lost);
679 ret = machine__process_lost_event(machine, event);
681 ret = machine__process_event(machine, event);
688 static int trace__tool_process(struct perf_tool *tool,
689 union perf_event *event,
690 struct perf_sample *sample __maybe_unused,
691 struct machine *machine)
693 struct trace *trace = container_of(tool, struct trace, tool);
694 return trace__process_event(trace, machine, event);
697 static int trace__symbols_init(struct trace *trace, struct perf_evlist *evlist)
699 int err = symbol__init();
704 machine__init(&trace->host, "", HOST_KERNEL_ID);
705 machine__create_kernel_maps(&trace->host);
707 if (perf_target__has_task(&trace->opts.target)) {
708 err = perf_event__synthesize_thread_map(&trace->tool, evlist->threads,
712 err = perf_event__synthesize_threads(&trace->tool, trace__tool_process,
722 static int syscall__set_arg_fmts(struct syscall *sc)
724 struct format_field *field;
727 sc->arg_scnprintf = calloc(sc->tp_format->format.nr_fields - 1, sizeof(void *));
728 if (sc->arg_scnprintf == NULL)
732 sc->arg_parm = sc->fmt->arg_parm;
734 for (field = sc->tp_format->format.fields->next; field; field = field->next) {
735 if (sc->fmt && sc->fmt->arg_scnprintf[idx])
736 sc->arg_scnprintf[idx] = sc->fmt->arg_scnprintf[idx];
737 else if (field->flags & FIELD_IS_POINTER)
738 sc->arg_scnprintf[idx] = syscall_arg__scnprintf_hex;
745 static int trace__read_syscall_info(struct trace *trace, int id)
749 const char *name = audit_syscall_to_name(id, trace->audit_machine);
754 if (id > trace->syscalls.max) {
755 struct syscall *nsyscalls = realloc(trace->syscalls.table, (id + 1) * sizeof(*sc));
757 if (nsyscalls == NULL)
760 if (trace->syscalls.max != -1) {
761 memset(nsyscalls + trace->syscalls.max + 1, 0,
762 (id - trace->syscalls.max) * sizeof(*sc));
764 memset(nsyscalls, 0, (id + 1) * sizeof(*sc));
767 trace->syscalls.table = nsyscalls;
768 trace->syscalls.max = id;
771 sc = trace->syscalls.table + id;
774 if (trace->ev_qualifier) {
775 bool in = strlist__find(trace->ev_qualifier, name) != NULL;
777 if (!(in ^ trace->not_ev_qualifier)) {
780 * No need to do read tracepoint information since this will be
787 sc->fmt = syscall_fmt__find(sc->name);
789 snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->name);
790 sc->tp_format = event_format__new("syscalls", tp_name);
792 if (sc->tp_format == NULL && sc->fmt && sc->fmt->alias) {
793 snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->fmt->alias);
794 sc->tp_format = event_format__new("syscalls", tp_name);
797 if (sc->tp_format == NULL)
800 return syscall__set_arg_fmts(sc);
803 static size_t syscall__scnprintf_args(struct syscall *sc, char *bf, size_t size,
808 if (sc->tp_format != NULL) {
809 struct format_field *field;
811 struct syscall_arg arg = {
816 for (field = sc->tp_format->format.fields->next; field;
817 field = field->next, ++arg.idx, bit <<= 1) {
821 if (args[arg.idx] == 0)
824 printed += scnprintf(bf + printed, size - printed,
825 "%s%s: ", printed ? ", " : "", field->name);
826 if (sc->arg_scnprintf && sc->arg_scnprintf[arg.idx]) {
827 arg.val = args[arg.idx];
829 arg.parm = sc->arg_parm[arg.idx];
830 printed += sc->arg_scnprintf[arg.idx](bf + printed,
831 size - printed, &arg);
833 printed += scnprintf(bf + printed, size - printed,
834 "%ld", args[arg.idx]);
841 printed += scnprintf(bf + printed, size - printed,
843 printed ? ", " : "", i, args[i]);
851 typedef int (*tracepoint_handler)(struct trace *trace, struct perf_evsel *evsel,
852 struct perf_sample *sample);
854 static struct syscall *trace__syscall_info(struct trace *trace,
855 struct perf_evsel *evsel,
856 struct perf_sample *sample)
858 int id = perf_evsel__intval(evsel, sample, "id");
863 * XXX: Noticed on x86_64, reproduced as far back as 3.0.36, haven't tried
864 * before that, leaving at a higher verbosity level till that is
865 * explained. Reproduced with plain ftrace with:
867 * echo 1 > /t/events/raw_syscalls/sys_exit/enable
868 * grep "NR -1 " /t/trace_pipe
870 * After generating some load on the machine.
874 fprintf(trace->output, "Invalid syscall %d id, skipping (%s, %" PRIu64 ") ...\n",
875 id, perf_evsel__name(evsel), ++n);
880 if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL) &&
881 trace__read_syscall_info(trace, id))
884 if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL))
887 return &trace->syscalls.table[id];
891 fprintf(trace->output, "Problems reading syscall %d", id);
892 if (id <= trace->syscalls.max && trace->syscalls.table[id].name != NULL)
893 fprintf(trace->output, "(%s)", trace->syscalls.table[id].name);
894 fputs(" information\n", trace->output);
899 static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel,
900 struct perf_sample *sample)
905 struct thread *thread;
906 struct syscall *sc = trace__syscall_info(trace, evsel, sample);
907 struct thread_trace *ttrace;
915 thread = machine__findnew_thread(&trace->host, sample->pid,
917 ttrace = thread__trace(thread, trace->output);
921 args = perf_evsel__rawptr(evsel, sample, "args");
923 fprintf(trace->output, "Problems reading syscall arguments\n");
927 ttrace = thread->priv;
929 if (ttrace->entry_str == NULL) {
930 ttrace->entry_str = malloc(1024);
931 if (!ttrace->entry_str)
935 ttrace->entry_time = sample->time;
936 msg = ttrace->entry_str;
937 printed += scnprintf(msg + printed, 1024 - printed, "%s(", sc->name);
939 printed += syscall__scnprintf_args(sc, msg + printed, 1024 - printed, args);
941 if (!strcmp(sc->name, "exit_group") || !strcmp(sc->name, "exit")) {
942 if (!trace->duration_filter) {
943 trace__fprintf_entry_head(trace, thread, 1, sample->time, trace->output);
944 fprintf(trace->output, "%-70s\n", ttrace->entry_str);
947 ttrace->entry_pending = true;
952 static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel,
953 struct perf_sample *sample)
957 struct thread *thread;
958 struct syscall *sc = trace__syscall_info(trace, evsel, sample);
959 struct thread_trace *ttrace;
967 thread = machine__findnew_thread(&trace->host, sample->pid,
969 ttrace = thread__trace(thread, trace->output);
973 ret = perf_evsel__intval(evsel, sample, "ret");
975 ttrace = thread->priv;
977 ttrace->exit_time = sample->time;
979 if (ttrace->entry_time) {
980 duration = sample->time - ttrace->entry_time;
981 if (trace__filter_duration(trace, duration))
983 } else if (trace->duration_filter)
986 trace__fprintf_entry_head(trace, thread, duration, sample->time, trace->output);
988 if (ttrace->entry_pending) {
989 fprintf(trace->output, "%-70s", ttrace->entry_str);
991 fprintf(trace->output, " ... [");
992 color_fprintf(trace->output, PERF_COLOR_YELLOW, "continued");
993 fprintf(trace->output, "]: %s()", sc->name);
996 if (sc->fmt == NULL) {
998 fprintf(trace->output, ") = %d", ret);
999 } else if (ret < 0 && sc->fmt->errmsg) {
1001 const char *emsg = strerror_r(-ret, bf, sizeof(bf)),
1002 *e = audit_errno_to_name(-ret);
1004 fprintf(trace->output, ") = -1 %s %s", e, emsg);
1005 } else if (ret == 0 && sc->fmt->timeout)
1006 fprintf(trace->output, ") = 0 Timeout");
1007 else if (sc->fmt->hexret)
1008 fprintf(trace->output, ") = %#x", ret);
1012 fputc('\n', trace->output);
1014 ttrace->entry_pending = false;
1019 static int trace__sched_stat_runtime(struct trace *trace, struct perf_evsel *evsel,
1020 struct perf_sample *sample)
1022 u64 runtime = perf_evsel__intval(evsel, sample, "runtime");
1023 double runtime_ms = (double)runtime / NSEC_PER_MSEC;
1024 struct thread *thread = machine__findnew_thread(&trace->host,
1027 struct thread_trace *ttrace = thread__trace(thread, trace->output);
1032 ttrace->runtime_ms += runtime_ms;
1033 trace->runtime_ms += runtime_ms;
1037 fprintf(trace->output, "%s: comm=%s,pid=%u,runtime=%" PRIu64 ",vruntime=%" PRIu64 ")\n",
1039 perf_evsel__strval(evsel, sample, "comm"),
1040 (pid_t)perf_evsel__intval(evsel, sample, "pid"),
1042 perf_evsel__intval(evsel, sample, "vruntime"));
1046 static bool skip_sample(struct trace *trace, struct perf_sample *sample)
1048 if ((trace->pid_list && intlist__find(trace->pid_list, sample->pid)) ||
1049 (trace->tid_list && intlist__find(trace->tid_list, sample->tid)))
1052 if (trace->pid_list || trace->tid_list)
1058 static int trace__process_sample(struct perf_tool *tool,
1059 union perf_event *event __maybe_unused,
1060 struct perf_sample *sample,
1061 struct perf_evsel *evsel,
1062 struct machine *machine __maybe_unused)
1064 struct trace *trace = container_of(tool, struct trace, tool);
1067 tracepoint_handler handler = evsel->handler.func;
1069 if (skip_sample(trace, sample))
1072 if (!trace->full_time && trace->base_time == 0)
1073 trace->base_time = sample->time;
1076 handler(trace, evsel, sample);
1082 perf_session__has_tp(struct perf_session *session, const char *name)
1084 struct perf_evsel *evsel;
1086 evsel = perf_evlist__find_tracepoint_by_name(session->evlist, name);
1088 return evsel != NULL;
1091 static int parse_target_str(struct trace *trace)
1093 if (trace->opts.target.pid) {
1094 trace->pid_list = intlist__new(trace->opts.target.pid);
1095 if (trace->pid_list == NULL) {
1096 pr_err("Error parsing process id string\n");
1101 if (trace->opts.target.tid) {
1102 trace->tid_list = intlist__new(trace->opts.target.tid);
1103 if (trace->tid_list == NULL) {
1104 pr_err("Error parsing thread id string\n");
1112 static int trace__run(struct trace *trace, int argc, const char **argv)
1114 struct perf_evlist *evlist = perf_evlist__new();
1115 struct perf_evsel *evsel;
1117 unsigned long before;
1118 const bool forks = argc > 0;
1120 if (evlist == NULL) {
1121 fprintf(trace->output, "Not enough memory to run!\n");
1125 if (perf_evlist__add_newtp(evlist, "raw_syscalls", "sys_enter", trace__sys_enter) ||
1126 perf_evlist__add_newtp(evlist, "raw_syscalls", "sys_exit", trace__sys_exit)) {
1127 fprintf(trace->output, "Couldn't read the raw_syscalls tracepoints information!\n");
1128 goto out_delete_evlist;
1132 perf_evlist__add_newtp(evlist, "sched", "sched_stat_runtime",
1133 trace__sched_stat_runtime)) {
1134 fprintf(trace->output, "Couldn't read the sched_stat_runtime tracepoint information!\n");
1135 goto out_delete_evlist;
1138 err = perf_evlist__create_maps(evlist, &trace->opts.target);
1140 fprintf(trace->output, "Problems parsing the target to trace, check your options!\n");
1141 goto out_delete_evlist;
1144 err = trace__symbols_init(trace, evlist);
1146 fprintf(trace->output, "Problems initializing symbol libraries!\n");
1147 goto out_delete_maps;
1150 perf_evlist__config(evlist, &trace->opts);
1152 signal(SIGCHLD, sig_handler);
1153 signal(SIGINT, sig_handler);
1156 err = perf_evlist__prepare_workload(evlist, &trace->opts.target,
1157 argv, false, false);
1159 fprintf(trace->output, "Couldn't run the workload!\n");
1160 goto out_delete_maps;
1164 err = perf_evlist__open(evlist);
1166 fprintf(trace->output, "Couldn't create the events: %s\n", strerror(errno));
1167 goto out_delete_maps;
1170 err = perf_evlist__mmap(evlist, UINT_MAX, false);
1172 fprintf(trace->output, "Couldn't mmap the events: %s\n", strerror(errno));
1173 goto out_close_evlist;
1176 perf_evlist__enable(evlist);
1179 perf_evlist__start_workload(evlist);
1181 trace->multiple_threads = evlist->threads->map[0] == -1 || evlist->threads->nr > 1;
1183 before = trace->nr_events;
1185 for (i = 0; i < evlist->nr_mmaps; i++) {
1186 union perf_event *event;
1188 while ((event = perf_evlist__mmap_read(evlist, i)) != NULL) {
1189 const u32 type = event->header.type;
1190 tracepoint_handler handler;
1191 struct perf_sample sample;
1195 err = perf_evlist__parse_sample(evlist, event, &sample);
1197 fprintf(trace->output, "Can't parse sample, err = %d, skipping...\n", err);
1201 if (!trace->full_time && trace->base_time == 0)
1202 trace->base_time = sample.time;
1204 if (type != PERF_RECORD_SAMPLE) {
1205 trace__process_event(trace, &trace->host, event);
1209 evsel = perf_evlist__id2evsel(evlist, sample.id);
1210 if (evsel == NULL) {
1211 fprintf(trace->output, "Unknown tp ID %" PRIu64 ", skipping...\n", sample.id);
1215 if (sample.raw_data == NULL) {
1216 fprintf(trace->output, "%s sample with no payload for tid: %d, cpu %d, raw_size=%d, skipping...\n",
1217 perf_evsel__name(evsel), sample.tid,
1218 sample.cpu, sample.raw_size);
1222 handler = evsel->handler.func;
1223 handler(trace, evsel, &sample);
1226 goto out_unmap_evlist;
1230 if (trace->nr_events == before) {
1232 goto out_unmap_evlist;
1234 poll(evlist->pollfd, evlist->nr_fds, -1);
1238 perf_evlist__disable(evlist);
1243 perf_evlist__munmap(evlist);
1245 perf_evlist__close(evlist);
1247 perf_evlist__delete_maps(evlist);
1249 perf_evlist__delete(evlist);
1254 static int trace__replay(struct trace *trace)
1256 const struct perf_evsel_str_handler handlers[] = {
1257 { "raw_syscalls:sys_enter", trace__sys_enter, },
1258 { "raw_syscalls:sys_exit", trace__sys_exit, },
1261 struct perf_session *session;
1264 trace->tool.sample = trace__process_sample;
1265 trace->tool.mmap = perf_event__process_mmap;
1266 trace->tool.mmap2 = perf_event__process_mmap2;
1267 trace->tool.comm = perf_event__process_comm;
1268 trace->tool.exit = perf_event__process_exit;
1269 trace->tool.fork = perf_event__process_fork;
1270 trace->tool.attr = perf_event__process_attr;
1271 trace->tool.tracing_data = perf_event__process_tracing_data;
1272 trace->tool.build_id = perf_event__process_build_id;
1274 trace->tool.ordered_samples = true;
1275 trace->tool.ordering_requires_timestamps = true;
1277 /* add tid to output */
1278 trace->multiple_threads = true;
1280 if (symbol__init() < 0)
1283 session = perf_session__new(input_name, O_RDONLY, 0, false,
1285 if (session == NULL)
1288 err = perf_session__set_tracepoints_handlers(session, handlers);
1292 if (!perf_session__has_tp(session, "raw_syscalls:sys_enter")) {
1293 pr_err("Data file does not have raw_syscalls:sys_enter events\n");
1297 if (!perf_session__has_tp(session, "raw_syscalls:sys_exit")) {
1298 pr_err("Data file does not have raw_syscalls:sys_exit events\n");
1302 err = parse_target_str(trace);
1308 err = perf_session__process_events(session, &trace->tool);
1310 pr_err("Failed to process events, error %d", err);
1313 perf_session__delete(session);
1318 static size_t trace__fprintf_threads_header(FILE *fp)
1322 printed = fprintf(fp, "\n _____________________________________________________________________\n");
1323 printed += fprintf(fp," __) Summary of events (__\n\n");
1324 printed += fprintf(fp," [ task - pid ] [ events ] [ ratio ] [ runtime ]\n");
1325 printed += fprintf(fp," _____________________________________________________________________\n\n");
1330 static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp)
1332 size_t printed = trace__fprintf_threads_header(fp);
1335 for (nd = rb_first(&trace->host.threads); nd; nd = rb_next(nd)) {
1336 struct thread *thread = rb_entry(nd, struct thread, rb_node);
1337 struct thread_trace *ttrace = thread->priv;
1344 ratio = (double)ttrace->nr_events / trace->nr_events * 100.0;
1346 color = PERF_COLOR_NORMAL;
1348 color = PERF_COLOR_RED;
1349 else if (ratio > 25.0)
1350 color = PERF_COLOR_GREEN;
1351 else if (ratio > 5.0)
1352 color = PERF_COLOR_YELLOW;
1354 printed += color_fprintf(fp, color, "%20s", thread->comm);
1355 printed += fprintf(fp, " - %-5d :%11lu [", thread->tid, ttrace->nr_events);
1356 printed += color_fprintf(fp, color, "%5.1f%%", ratio);
1357 printed += fprintf(fp, " ] %10.3f ms\n", ttrace->runtime_ms);
1363 static int trace__set_duration(const struct option *opt, const char *str,
1364 int unset __maybe_unused)
1366 struct trace *trace = opt->value;
1368 trace->duration_filter = atof(str);
1372 static int trace__open_output(struct trace *trace, const char *filename)
1376 if (!stat(filename, &st) && st.st_size) {
1377 char oldname[PATH_MAX];
1379 scnprintf(oldname, sizeof(oldname), "%s.old", filename);
1381 rename(filename, oldname);
1384 trace->output = fopen(filename, "w");
1386 return trace->output == NULL ? -errno : 0;
1389 int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused)
1391 const char * const trace_usage[] = {
1392 "perf trace [<options>] [<command>]",
1393 "perf trace [<options>] -- <command> [<options>]",
1396 struct trace trace = {
1397 .audit_machine = audit_detect_machine(),
1406 .user_freq = UINT_MAX,
1407 .user_interval = ULLONG_MAX,
1413 const char *output_name = NULL;
1414 const char *ev_qualifier_str = NULL;
1415 const struct option trace_options[] = {
1416 OPT_STRING('e', "expr", &ev_qualifier_str, "expr",
1417 "list of events to trace"),
1418 OPT_STRING('o', "output", &output_name, "file", "output file name"),
1419 OPT_STRING('i', "input", &input_name, "file", "Analyze events in file"),
1420 OPT_STRING('p', "pid", &trace.opts.target.pid, "pid",
1421 "trace events on existing process id"),
1422 OPT_STRING('t', "tid", &trace.opts.target.tid, "tid",
1423 "trace events on existing thread id"),
1424 OPT_BOOLEAN('a', "all-cpus", &trace.opts.target.system_wide,
1425 "system-wide collection from all CPUs"),
1426 OPT_STRING('C', "cpu", &trace.opts.target.cpu_list, "cpu",
1427 "list of cpus to monitor"),
1428 OPT_BOOLEAN(0, "no-inherit", &trace.opts.no_inherit,
1429 "child tasks do not inherit counters"),
1430 OPT_UINTEGER('m', "mmap-pages", &trace.opts.mmap_pages,
1431 "number of mmap data pages"),
1432 OPT_STRING('u', "uid", &trace.opts.target.uid_str, "user",
1434 OPT_CALLBACK(0, "duration", &trace, "float",
1435 "show only events with duration > N.M ms",
1436 trace__set_duration),
1437 OPT_BOOLEAN(0, "sched", &trace.sched, "show blocking scheduler events"),
1438 OPT_INCR('v', "verbose", &verbose, "be more verbose"),
1439 OPT_BOOLEAN('T', "time", &trace.full_time,
1440 "Show full timestamp, not time relative to first start"),
1446 argc = parse_options(argc, argv, trace_options, trace_usage, 0);
1448 if (output_name != NULL) {
1449 err = trace__open_output(&trace, output_name);
1451 perror("failed to create output file");
1456 if (ev_qualifier_str != NULL) {
1457 const char *s = ev_qualifier_str;
1459 trace.not_ev_qualifier = *s == '!';
1460 if (trace.not_ev_qualifier)
1462 trace.ev_qualifier = strlist__new(true, s);
1463 if (trace.ev_qualifier == NULL) {
1464 fputs("Not enough memory to parse event qualifier",
1471 err = perf_target__validate(&trace.opts.target);
1473 perf_target__strerror(&trace.opts.target, err, bf, sizeof(bf));
1474 fprintf(trace.output, "%s", bf);
1478 err = perf_target__parse_uid(&trace.opts.target);
1480 perf_target__strerror(&trace.opts.target, err, bf, sizeof(bf));
1481 fprintf(trace.output, "%s", bf);
1485 if (!argc && perf_target__none(&trace.opts.target))
1486 trace.opts.target.system_wide = true;
1489 err = trace__replay(&trace);
1491 err = trace__run(&trace, argc, argv);
1493 if (trace.sched && !err)
1494 trace__fprintf_thread_summary(&trace, trace.output);
1497 if (output_name != NULL)
1498 fclose(trace.output);