1 #include <traceevent/event-parse.h>
3 #include "util/color.h"
4 #include "util/debug.h"
5 #include "util/evlist.h"
6 #include "util/machine.h"
7 #include "util/session.h"
8 #include "util/thread.h"
9 #include "util/parse-options.h"
10 #include "util/strlist.h"
11 #include "util/intlist.h"
12 #include "util/thread_map.h"
17 #include <linux/futex.h>
19 /* For older distros: */
21 # define MAP_STACK 0x20000
25 # define MADV_HWPOISON 100
28 #ifndef MADV_MERGEABLE
29 # define MADV_MERGEABLE 12
32 #ifndef MADV_UNMERGEABLE
33 # define MADV_UNMERGEABLE 13
48 #define DEFINE_STRARRAY(array) struct strarray strarray__##array = { \
49 .nr_entries = ARRAY_SIZE(array), \
53 static size_t syscall_arg__scnprintf_strarray(char *bf, size_t size,
54 struct syscall_arg *arg)
57 struct strarray *sa = arg->parm;
59 if (idx < 0 || idx >= sa->nr_entries)
60 return scnprintf(bf, size, "%d", idx);
62 return scnprintf(bf, size, "%s", sa->entries[idx]);
65 #define SCA_STRARRAY syscall_arg__scnprintf_strarray
67 static size_t syscall_arg__scnprintf_hex(char *bf, size_t size,
68 struct syscall_arg *arg)
70 return scnprintf(bf, size, "%#lx", arg->val);
73 #define SCA_HEX syscall_arg__scnprintf_hex
75 static size_t syscall_arg__scnprintf_mmap_prot(char *bf, size_t size,
76 struct syscall_arg *arg)
78 int printed = 0, prot = arg->val;
80 if (prot == PROT_NONE)
81 return scnprintf(bf, size, "NONE");
82 #define P_MMAP_PROT(n) \
83 if (prot & PROT_##n) { \
84 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
94 P_MMAP_PROT(GROWSDOWN);
99 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", prot);
104 #define SCA_MMAP_PROT syscall_arg__scnprintf_mmap_prot
106 static size_t syscall_arg__scnprintf_mmap_flags(char *bf, size_t size,
107 struct syscall_arg *arg)
109 int printed = 0, flags = arg->val;
111 #define P_MMAP_FLAG(n) \
112 if (flags & MAP_##n) { \
113 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
118 P_MMAP_FLAG(PRIVATE);
122 P_MMAP_FLAG(ANONYMOUS);
123 P_MMAP_FLAG(DENYWRITE);
124 P_MMAP_FLAG(EXECUTABLE);
127 P_MMAP_FLAG(GROWSDOWN);
129 P_MMAP_FLAG(HUGETLB);
132 P_MMAP_FLAG(NONBLOCK);
133 P_MMAP_FLAG(NORESERVE);
134 P_MMAP_FLAG(POPULATE);
136 #ifdef MAP_UNINITIALIZED
137 P_MMAP_FLAG(UNINITIALIZED);
142 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
147 #define SCA_MMAP_FLAGS syscall_arg__scnprintf_mmap_flags
149 static size_t syscall_arg__scnprintf_madvise_behavior(char *bf, size_t size,
150 struct syscall_arg *arg)
152 int behavior = arg->val;
155 #define P_MADV_BHV(n) case MADV_##n: return scnprintf(bf, size, #n)
158 P_MADV_BHV(SEQUENTIAL);
159 P_MADV_BHV(WILLNEED);
160 P_MADV_BHV(DONTNEED);
162 P_MADV_BHV(DONTFORK);
164 P_MADV_BHV(HWPOISON);
165 #ifdef MADV_SOFT_OFFLINE
166 P_MADV_BHV(SOFT_OFFLINE);
168 P_MADV_BHV(MERGEABLE);
169 P_MADV_BHV(UNMERGEABLE);
171 P_MADV_BHV(HUGEPAGE);
173 #ifdef MADV_NOHUGEPAGE
174 P_MADV_BHV(NOHUGEPAGE);
177 P_MADV_BHV(DONTDUMP);
186 return scnprintf(bf, size, "%#x", behavior);
189 #define SCA_MADV_BHV syscall_arg__scnprintf_madvise_behavior
191 static size_t syscall_arg__scnprintf_futex_op(char *bf, size_t size, struct syscall_arg *arg)
193 enum syscall_futex_args {
194 SCF_UADDR = (1 << 0),
197 SCF_TIMEOUT = (1 << 3),
198 SCF_UADDR2 = (1 << 4),
202 int cmd = op & FUTEX_CMD_MASK;
206 #define P_FUTEX_OP(n) case FUTEX_##n: printed = scnprintf(bf, size, #n);
207 P_FUTEX_OP(WAIT); arg->mask |= SCF_VAL3|SCF_UADDR2; break;
208 P_FUTEX_OP(WAKE); arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
209 P_FUTEX_OP(FD); arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
210 P_FUTEX_OP(REQUEUE); arg->mask |= SCF_VAL3|SCF_TIMEOUT; break;
211 P_FUTEX_OP(CMP_REQUEUE); arg->mask |= SCF_TIMEOUT; break;
212 P_FUTEX_OP(CMP_REQUEUE_PI); arg->mask |= SCF_TIMEOUT; break;
213 P_FUTEX_OP(WAKE_OP); break;
214 P_FUTEX_OP(LOCK_PI); arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
215 P_FUTEX_OP(UNLOCK_PI); arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
216 P_FUTEX_OP(TRYLOCK_PI); arg->mask |= SCF_VAL3|SCF_UADDR2; break;
217 P_FUTEX_OP(WAIT_BITSET); arg->mask |= SCF_UADDR2; break;
218 P_FUTEX_OP(WAKE_BITSET); arg->mask |= SCF_UADDR2; break;
219 P_FUTEX_OP(WAIT_REQUEUE_PI); break;
220 default: printed = scnprintf(bf, size, "%#x", cmd); break;
223 if (op & FUTEX_PRIVATE_FLAG)
224 printed += scnprintf(bf + printed, size - printed, "|PRIV");
226 if (op & FUTEX_CLOCK_REALTIME)
227 printed += scnprintf(bf + printed, size - printed, "|CLKRT");
232 #define SCA_FUTEX_OP syscall_arg__scnprintf_futex_op
234 static const char *itimers[] = { "REAL", "VIRTUAL", "PROF", };
235 static DEFINE_STRARRAY(itimers);
237 static const char *whences[] = { "SET", "CUR", "END",
245 static DEFINE_STRARRAY(whences);
247 static const char *fcntl_cmds[] = {
248 "DUPFD", "GETFD", "SETFD", "GETFL", "SETFL", "GETLK", "SETLK",
249 "SETLKW", "SETOWN", "GETOWN", "SETSIG", "GETSIG", "F_GETLK64",
250 "F_SETLK64", "F_SETLKW64", "F_SETOWN_EX", "F_GETOWN_EX",
253 static DEFINE_STRARRAY(fcntl_cmds);
255 static const char *sighow[] = { "BLOCK", "UNBLOCK", "SETMASK", };
256 static DEFINE_STRARRAY(sighow);
258 static const char *socket_families[] = {
259 "UNSPEC", "LOCAL", "INET", "AX25", "IPX", "APPLETALK", "NETROM",
260 "BRIDGE", "ATMPVC", "X25", "INET6", "ROSE", "DECnet", "NETBEUI",
261 "SECURITY", "KEY", "NETLINK", "PACKET", "ASH", "ECONET", "ATMSVC",
262 "RDS", "SNA", "IRDA", "PPPOX", "WANPIPE", "LLC", "IB", "CAN", "TIPC",
263 "BLUETOOTH", "IUCV", "RXRPC", "ISDN", "PHONET", "IEEE802154", "CAIF",
264 "ALG", "NFC", "VSOCK",
266 static DEFINE_STRARRAY(socket_families);
268 #ifndef SOCK_TYPE_MASK
269 #define SOCK_TYPE_MASK 0xf
272 static size_t syscall_arg__scnprintf_socket_type(char *bf, size_t size,
273 struct syscall_arg *arg)
277 flags = type & ~SOCK_TYPE_MASK;
279 type &= SOCK_TYPE_MASK;
281 * Can't use a strarray, MIPS may override for ABI reasons.
284 #define P_SK_TYPE(n) case SOCK_##n: printed = scnprintf(bf, size, #n); break;
289 P_SK_TYPE(SEQPACKET);
294 printed = scnprintf(bf, size, "%#x", type);
297 #define P_SK_FLAG(n) \
298 if (flags & SOCK_##n) { \
299 printed += scnprintf(bf + printed, size - printed, "|%s", #n); \
300 flags &= ~SOCK_##n; \
308 printed += scnprintf(bf + printed, size - printed, "|%#x", flags);
313 #define SCA_SK_TYPE syscall_arg__scnprintf_socket_type
315 static size_t syscall_arg__scnprintf_open_flags(char *bf, size_t size,
316 struct syscall_arg *arg)
318 int printed = 0, flags = arg->val;
320 if (!(flags & O_CREAT))
321 arg->mask |= 1 << (arg->idx + 1); /* Mask the mode parm */
324 return scnprintf(bf, size, "RDONLY");
326 if (flags & O_##n) { \
327 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
351 if ((flags & O_SYNC) == O_SYNC)
352 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", "SYNC");
364 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
369 #define SCA_OPEN_FLAGS syscall_arg__scnprintf_open_flags
371 static size_t syscall_arg__scnprintf_signum(char *bf, size_t size, struct syscall_arg *arg)
376 #define P_SIGNUM(n) case SIG##n: return scnprintf(bf, size, #n)
411 return scnprintf(bf, size, "%#x", sig);
414 #define SCA_SIGNUM syscall_arg__scnprintf_signum
416 static struct syscall_fmt {
419 size_t (*arg_scnprintf[6])(char *bf, size_t size, struct syscall_arg *arg);
425 { .name = "access", .errmsg = true, },
426 { .name = "arch_prctl", .errmsg = true, .alias = "prctl", },
427 { .name = "brk", .hexret = true,
428 .arg_scnprintf = { [0] = SCA_HEX, /* brk */ }, },
429 { .name = "mmap", .hexret = true, },
430 { .name = "connect", .errmsg = true, },
431 { .name = "fcntl", .errmsg = true,
432 .arg_scnprintf = { [1] = SCA_STRARRAY, /* cmd */ },
433 .arg_parm = { [1] = &strarray__fcntl_cmds, /* cmd */ }, },
434 { .name = "fstat", .errmsg = true, .alias = "newfstat", },
435 { .name = "fstatat", .errmsg = true, .alias = "newfstatat", },
436 { .name = "futex", .errmsg = true,
437 .arg_scnprintf = { [1] = SCA_FUTEX_OP, /* op */ }, },
438 { .name = "getitimer", .errmsg = true,
439 .arg_scnprintf = { [0] = SCA_STRARRAY, /* which */ },
440 .arg_parm = { [0] = &strarray__itimers, /* which */ }, },
441 { .name = "ioctl", .errmsg = true,
442 .arg_scnprintf = { [2] = SCA_HEX, /* arg */ }, },
443 { .name = "kill", .errmsg = true,
444 .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
445 { .name = "lseek", .errmsg = true,
446 .arg_scnprintf = { [2] = SCA_STRARRAY, /* whence */ },
447 .arg_parm = { [2] = &strarray__whences, /* whence */ }, },
448 { .name = "lstat", .errmsg = true, .alias = "newlstat", },
449 { .name = "madvise", .errmsg = true,
450 .arg_scnprintf = { [0] = SCA_HEX, /* start */
451 [2] = SCA_MADV_BHV, /* behavior */ }, },
452 { .name = "mmap", .hexret = true,
453 .arg_scnprintf = { [0] = SCA_HEX, /* addr */
454 [2] = SCA_MMAP_PROT, /* prot */
455 [3] = SCA_MMAP_FLAGS, /* flags */ }, },
456 { .name = "mprotect", .errmsg = true,
457 .arg_scnprintf = { [0] = SCA_HEX, /* start */
458 [2] = SCA_MMAP_PROT, /* prot */ }, },
459 { .name = "mremap", .hexret = true,
460 .arg_scnprintf = { [0] = SCA_HEX, /* addr */
461 [4] = SCA_HEX, /* new_addr */ }, },
462 { .name = "munmap", .errmsg = true,
463 .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
464 { .name = "open", .errmsg = true,
465 .arg_scnprintf = { [1] = SCA_OPEN_FLAGS, /* flags */ }, },
466 { .name = "open_by_handle_at", .errmsg = true,
467 .arg_scnprintf = { [2] = SCA_OPEN_FLAGS, /* flags */ }, },
468 { .name = "openat", .errmsg = true,
469 .arg_scnprintf = { [2] = SCA_OPEN_FLAGS, /* flags */ }, },
470 { .name = "poll", .errmsg = true, .timeout = true, },
471 { .name = "ppoll", .errmsg = true, .timeout = true, },
472 { .name = "pread", .errmsg = true, .alias = "pread64", },
473 { .name = "pwrite", .errmsg = true, .alias = "pwrite64", },
474 { .name = "read", .errmsg = true, },
475 { .name = "recvfrom", .errmsg = true, },
476 { .name = "rt_sigaction", .errmsg = true,
477 .arg_scnprintf = { [0] = SCA_SIGNUM, /* sig */ }, },
478 { .name = "rt_sigprocmask", .errmsg = true,
479 .arg_scnprintf = { [0] = SCA_STRARRAY, /* how */ },
480 .arg_parm = { [0] = &strarray__sighow, /* how */ }, },
481 { .name = "rt_sigqueueinfo", .errmsg = true,
482 .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
483 { .name = "rt_tgsigqueueinfo", .errmsg = true,
484 .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, },
485 { .name = "select", .errmsg = true, .timeout = true, },
486 { .name = "setitimer", .errmsg = true,
487 .arg_scnprintf = { [0] = SCA_STRARRAY, /* which */ },
488 .arg_parm = { [0] = &strarray__itimers, /* which */ }, },
489 { .name = "socket", .errmsg = true,
490 .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */
491 [1] = SCA_SK_TYPE, /* type */ },
492 .arg_parm = { [0] = &strarray__socket_families, /* family */ }, },
493 { .name = "stat", .errmsg = true, .alias = "newstat", },
494 { .name = "tgkill", .errmsg = true,
495 .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, },
496 { .name = "tkill", .errmsg = true,
497 .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
498 { .name = "uname", .errmsg = true, .alias = "newuname", },
501 static int syscall_fmt__cmp(const void *name, const void *fmtp)
503 const struct syscall_fmt *fmt = fmtp;
504 return strcmp(name, fmt->name);
507 static struct syscall_fmt *syscall_fmt__find(const char *name)
509 const int nmemb = ARRAY_SIZE(syscall_fmts);
510 return bsearch(name, syscall_fmts, nmemb, sizeof(struct syscall_fmt), syscall_fmt__cmp);
514 struct event_format *tp_format;
517 struct syscall_fmt *fmt;
518 size_t (**arg_scnprintf)(char *bf, size_t size, struct syscall_arg *arg);
522 static size_t fprintf_duration(unsigned long t, FILE *fp)
524 double duration = (double)t / NSEC_PER_MSEC;
525 size_t printed = fprintf(fp, "(");
528 printed += color_fprintf(fp, PERF_COLOR_RED, "%6.3f ms", duration);
529 else if (duration >= 0.01)
530 printed += color_fprintf(fp, PERF_COLOR_YELLOW, "%6.3f ms", duration);
532 printed += color_fprintf(fp, PERF_COLOR_NORMAL, "%6.3f ms", duration);
533 return printed + fprintf(fp, "): ");
536 struct thread_trace {
540 unsigned long nr_events;
545 static struct thread_trace *thread_trace__new(void)
547 return zalloc(sizeof(struct thread_trace));
550 static struct thread_trace *thread__trace(struct thread *thread, FILE *fp)
552 struct thread_trace *ttrace;
557 if (thread->priv == NULL)
558 thread->priv = thread_trace__new();
560 if (thread->priv == NULL)
563 ttrace = thread->priv;
568 color_fprintf(fp, PERF_COLOR_RED,
569 "WARNING: not enough memory, dropping samples!\n");
574 struct perf_tool tool;
578 struct syscall *table;
580 struct perf_record_opts opts;
584 unsigned long nr_events;
585 struct strlist *ev_qualifier;
586 bool not_ev_qualifier;
587 struct intlist *tid_list;
588 struct intlist *pid_list;
590 bool multiple_threads;
591 double duration_filter;
595 static bool trace__filter_duration(struct trace *trace, double t)
597 return t < (trace->duration_filter * NSEC_PER_MSEC);
600 static size_t trace__fprintf_tstamp(struct trace *trace, u64 tstamp, FILE *fp)
602 double ts = (double)(tstamp - trace->base_time) / NSEC_PER_MSEC;
604 return fprintf(fp, "%10.3f ", ts);
607 static bool done = false;
609 static void sig_handler(int sig __maybe_unused)
614 static size_t trace__fprintf_entry_head(struct trace *trace, struct thread *thread,
615 u64 duration, u64 tstamp, FILE *fp)
617 size_t printed = trace__fprintf_tstamp(trace, tstamp, fp);
618 printed += fprintf_duration(duration, fp);
620 if (trace->multiple_threads)
621 printed += fprintf(fp, "%d ", thread->tid);
626 static int trace__process_event(struct trace *trace, struct machine *machine,
627 union perf_event *event)
631 switch (event->header.type) {
632 case PERF_RECORD_LOST:
633 color_fprintf(trace->output, PERF_COLOR_RED,
634 "LOST %" PRIu64 " events!\n", event->lost.lost);
635 ret = machine__process_lost_event(machine, event);
637 ret = machine__process_event(machine, event);
644 static int trace__tool_process(struct perf_tool *tool,
645 union perf_event *event,
646 struct perf_sample *sample __maybe_unused,
647 struct machine *machine)
649 struct trace *trace = container_of(tool, struct trace, tool);
650 return trace__process_event(trace, machine, event);
653 static int trace__symbols_init(struct trace *trace, struct perf_evlist *evlist)
655 int err = symbol__init();
660 machine__init(&trace->host, "", HOST_KERNEL_ID);
661 machine__create_kernel_maps(&trace->host);
663 if (perf_target__has_task(&trace->opts.target)) {
664 err = perf_event__synthesize_thread_map(&trace->tool, evlist->threads,
668 err = perf_event__synthesize_threads(&trace->tool, trace__tool_process,
678 static int syscall__set_arg_fmts(struct syscall *sc)
680 struct format_field *field;
683 sc->arg_scnprintf = calloc(sc->tp_format->format.nr_fields - 1, sizeof(void *));
684 if (sc->arg_scnprintf == NULL)
688 sc->arg_parm = sc->fmt->arg_parm;
690 for (field = sc->tp_format->format.fields->next; field; field = field->next) {
691 if (sc->fmt && sc->fmt->arg_scnprintf[idx])
692 sc->arg_scnprintf[idx] = sc->fmt->arg_scnprintf[idx];
693 else if (field->flags & FIELD_IS_POINTER)
694 sc->arg_scnprintf[idx] = syscall_arg__scnprintf_hex;
701 static int trace__read_syscall_info(struct trace *trace, int id)
705 const char *name = audit_syscall_to_name(id, trace->audit_machine);
710 if (id > trace->syscalls.max) {
711 struct syscall *nsyscalls = realloc(trace->syscalls.table, (id + 1) * sizeof(*sc));
713 if (nsyscalls == NULL)
716 if (trace->syscalls.max != -1) {
717 memset(nsyscalls + trace->syscalls.max + 1, 0,
718 (id - trace->syscalls.max) * sizeof(*sc));
720 memset(nsyscalls, 0, (id + 1) * sizeof(*sc));
723 trace->syscalls.table = nsyscalls;
724 trace->syscalls.max = id;
727 sc = trace->syscalls.table + id;
730 if (trace->ev_qualifier) {
731 bool in = strlist__find(trace->ev_qualifier, name) != NULL;
733 if (!(in ^ trace->not_ev_qualifier)) {
736 * No need to do read tracepoint information since this will be
743 sc->fmt = syscall_fmt__find(sc->name);
745 snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->name);
746 sc->tp_format = event_format__new("syscalls", tp_name);
748 if (sc->tp_format == NULL && sc->fmt && sc->fmt->alias) {
749 snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->fmt->alias);
750 sc->tp_format = event_format__new("syscalls", tp_name);
753 if (sc->tp_format == NULL)
756 return syscall__set_arg_fmts(sc);
759 static size_t syscall__scnprintf_args(struct syscall *sc, char *bf, size_t size,
764 if (sc->tp_format != NULL) {
765 struct format_field *field;
767 struct syscall_arg arg = {
772 for (field = sc->tp_format->format.fields->next; field;
773 field = field->next, ++arg.idx, bit <<= 1) {
777 printed += scnprintf(bf + printed, size - printed,
778 "%s%s: ", printed ? ", " : "", field->name);
779 if (sc->arg_scnprintf && sc->arg_scnprintf[arg.idx]) {
780 arg.val = args[arg.idx];
782 arg.parm = sc->arg_parm[arg.idx];
783 printed += sc->arg_scnprintf[arg.idx](bf + printed,
784 size - printed, &arg);
786 printed += scnprintf(bf + printed, size - printed,
787 "%ld", args[arg.idx]);
794 printed += scnprintf(bf + printed, size - printed,
796 printed ? ", " : "", i, args[i]);
804 typedef int (*tracepoint_handler)(struct trace *trace, struct perf_evsel *evsel,
805 struct perf_sample *sample);
807 static struct syscall *trace__syscall_info(struct trace *trace,
808 struct perf_evsel *evsel,
809 struct perf_sample *sample)
811 int id = perf_evsel__intval(evsel, sample, "id");
816 * XXX: Noticed on x86_64, reproduced as far back as 3.0.36, haven't tried
817 * before that, leaving at a higher verbosity level till that is
818 * explained. Reproduced with plain ftrace with:
820 * echo 1 > /t/events/raw_syscalls/sys_exit/enable
821 * grep "NR -1 " /t/trace_pipe
823 * After generating some load on the machine.
827 fprintf(trace->output, "Invalid syscall %d id, skipping (%s, %" PRIu64 ") ...\n",
828 id, perf_evsel__name(evsel), ++n);
833 if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL) &&
834 trace__read_syscall_info(trace, id))
837 if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL))
840 return &trace->syscalls.table[id];
844 fprintf(trace->output, "Problems reading syscall %d", id);
845 if (id <= trace->syscalls.max && trace->syscalls.table[id].name != NULL)
846 fprintf(trace->output, "(%s)", trace->syscalls.table[id].name);
847 fputs(" information\n", trace->output);
852 static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel,
853 struct perf_sample *sample)
858 struct thread *thread;
859 struct syscall *sc = trace__syscall_info(trace, evsel, sample);
860 struct thread_trace *ttrace;
868 thread = machine__findnew_thread(&trace->host, sample->pid,
870 ttrace = thread__trace(thread, trace->output);
874 args = perf_evsel__rawptr(evsel, sample, "args");
876 fprintf(trace->output, "Problems reading syscall arguments\n");
880 ttrace = thread->priv;
882 if (ttrace->entry_str == NULL) {
883 ttrace->entry_str = malloc(1024);
884 if (!ttrace->entry_str)
888 ttrace->entry_time = sample->time;
889 msg = ttrace->entry_str;
890 printed += scnprintf(msg + printed, 1024 - printed, "%s(", sc->name);
892 printed += syscall__scnprintf_args(sc, msg + printed, 1024 - printed, args);
894 if (!strcmp(sc->name, "exit_group") || !strcmp(sc->name, "exit")) {
895 if (!trace->duration_filter) {
896 trace__fprintf_entry_head(trace, thread, 1, sample->time, trace->output);
897 fprintf(trace->output, "%-70s\n", ttrace->entry_str);
900 ttrace->entry_pending = true;
905 static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel,
906 struct perf_sample *sample)
910 struct thread *thread;
911 struct syscall *sc = trace__syscall_info(trace, evsel, sample);
912 struct thread_trace *ttrace;
920 thread = machine__findnew_thread(&trace->host, sample->pid,
922 ttrace = thread__trace(thread, trace->output);
926 ret = perf_evsel__intval(evsel, sample, "ret");
928 ttrace = thread->priv;
930 ttrace->exit_time = sample->time;
932 if (ttrace->entry_time) {
933 duration = sample->time - ttrace->entry_time;
934 if (trace__filter_duration(trace, duration))
936 } else if (trace->duration_filter)
939 trace__fprintf_entry_head(trace, thread, duration, sample->time, trace->output);
941 if (ttrace->entry_pending) {
942 fprintf(trace->output, "%-70s", ttrace->entry_str);
944 fprintf(trace->output, " ... [");
945 color_fprintf(trace->output, PERF_COLOR_YELLOW, "continued");
946 fprintf(trace->output, "]: %s()", sc->name);
949 if (sc->fmt == NULL) {
951 fprintf(trace->output, ") = %d", ret);
952 } else if (ret < 0 && sc->fmt->errmsg) {
954 const char *emsg = strerror_r(-ret, bf, sizeof(bf)),
955 *e = audit_errno_to_name(-ret);
957 fprintf(trace->output, ") = -1 %s %s", e, emsg);
958 } else if (ret == 0 && sc->fmt->timeout)
959 fprintf(trace->output, ") = 0 Timeout");
960 else if (sc->fmt->hexret)
961 fprintf(trace->output, ") = %#x", ret);
965 fputc('\n', trace->output);
967 ttrace->entry_pending = false;
972 static int trace__sched_stat_runtime(struct trace *trace, struct perf_evsel *evsel,
973 struct perf_sample *sample)
975 u64 runtime = perf_evsel__intval(evsel, sample, "runtime");
976 double runtime_ms = (double)runtime / NSEC_PER_MSEC;
977 struct thread *thread = machine__findnew_thread(&trace->host,
980 struct thread_trace *ttrace = thread__trace(thread, trace->output);
985 ttrace->runtime_ms += runtime_ms;
986 trace->runtime_ms += runtime_ms;
990 fprintf(trace->output, "%s: comm=%s,pid=%u,runtime=%" PRIu64 ",vruntime=%" PRIu64 ")\n",
992 perf_evsel__strval(evsel, sample, "comm"),
993 (pid_t)perf_evsel__intval(evsel, sample, "pid"),
995 perf_evsel__intval(evsel, sample, "vruntime"));
999 static bool skip_sample(struct trace *trace, struct perf_sample *sample)
1001 if ((trace->pid_list && intlist__find(trace->pid_list, sample->pid)) ||
1002 (trace->tid_list && intlist__find(trace->tid_list, sample->tid)))
1005 if (trace->pid_list || trace->tid_list)
1011 static int trace__process_sample(struct perf_tool *tool,
1012 union perf_event *event __maybe_unused,
1013 struct perf_sample *sample,
1014 struct perf_evsel *evsel,
1015 struct machine *machine __maybe_unused)
1017 struct trace *trace = container_of(tool, struct trace, tool);
1020 tracepoint_handler handler = evsel->handler.func;
1022 if (skip_sample(trace, sample))
1025 if (trace->base_time == 0)
1026 trace->base_time = sample->time;
1029 handler(trace, evsel, sample);
1035 perf_session__has_tp(struct perf_session *session, const char *name)
1037 struct perf_evsel *evsel;
1039 evsel = perf_evlist__find_tracepoint_by_name(session->evlist, name);
1041 return evsel != NULL;
1044 static int parse_target_str(struct trace *trace)
1046 if (trace->opts.target.pid) {
1047 trace->pid_list = intlist__new(trace->opts.target.pid);
1048 if (trace->pid_list == NULL) {
1049 pr_err("Error parsing process id string\n");
1054 if (trace->opts.target.tid) {
1055 trace->tid_list = intlist__new(trace->opts.target.tid);
1056 if (trace->tid_list == NULL) {
1057 pr_err("Error parsing thread id string\n");
1065 static int trace__run(struct trace *trace, int argc, const char **argv)
1067 struct perf_evlist *evlist = perf_evlist__new();
1068 struct perf_evsel *evsel;
1070 unsigned long before;
1071 const bool forks = argc > 0;
1073 if (evlist == NULL) {
1074 fprintf(trace->output, "Not enough memory to run!\n");
1078 if (perf_evlist__add_newtp(evlist, "raw_syscalls", "sys_enter", trace__sys_enter) ||
1079 perf_evlist__add_newtp(evlist, "raw_syscalls", "sys_exit", trace__sys_exit)) {
1080 fprintf(trace->output, "Couldn't read the raw_syscalls tracepoints information!\n");
1081 goto out_delete_evlist;
1085 perf_evlist__add_newtp(evlist, "sched", "sched_stat_runtime",
1086 trace__sched_stat_runtime)) {
1087 fprintf(trace->output, "Couldn't read the sched_stat_runtime tracepoint information!\n");
1088 goto out_delete_evlist;
1091 err = perf_evlist__create_maps(evlist, &trace->opts.target);
1093 fprintf(trace->output, "Problems parsing the target to trace, check your options!\n");
1094 goto out_delete_evlist;
1097 err = trace__symbols_init(trace, evlist);
1099 fprintf(trace->output, "Problems initializing symbol libraries!\n");
1100 goto out_delete_maps;
1103 perf_evlist__config(evlist, &trace->opts);
1105 signal(SIGCHLD, sig_handler);
1106 signal(SIGINT, sig_handler);
1109 err = perf_evlist__prepare_workload(evlist, &trace->opts.target,
1110 argv, false, false);
1112 fprintf(trace->output, "Couldn't run the workload!\n");
1113 goto out_delete_maps;
1117 err = perf_evlist__open(evlist);
1119 fprintf(trace->output, "Couldn't create the events: %s\n", strerror(errno));
1120 goto out_delete_maps;
1123 err = perf_evlist__mmap(evlist, UINT_MAX, false);
1125 fprintf(trace->output, "Couldn't mmap the events: %s\n", strerror(errno));
1126 goto out_close_evlist;
1129 perf_evlist__enable(evlist);
1132 perf_evlist__start_workload(evlist);
1134 trace->multiple_threads = evlist->threads->map[0] == -1 || evlist->threads->nr > 1;
1136 before = trace->nr_events;
1138 for (i = 0; i < evlist->nr_mmaps; i++) {
1139 union perf_event *event;
1141 while ((event = perf_evlist__mmap_read(evlist, i)) != NULL) {
1142 const u32 type = event->header.type;
1143 tracepoint_handler handler;
1144 struct perf_sample sample;
1148 err = perf_evlist__parse_sample(evlist, event, &sample);
1150 fprintf(trace->output, "Can't parse sample, err = %d, skipping...\n", err);
1154 if (trace->base_time == 0)
1155 trace->base_time = sample.time;
1157 if (type != PERF_RECORD_SAMPLE) {
1158 trace__process_event(trace, &trace->host, event);
1162 evsel = perf_evlist__id2evsel(evlist, sample.id);
1163 if (evsel == NULL) {
1164 fprintf(trace->output, "Unknown tp ID %" PRIu64 ", skipping...\n", sample.id);
1168 if (sample.raw_data == NULL) {
1169 fprintf(trace->output, "%s sample with no payload for tid: %d, cpu %d, raw_size=%d, skipping...\n",
1170 perf_evsel__name(evsel), sample.tid,
1171 sample.cpu, sample.raw_size);
1175 handler = evsel->handler.func;
1176 handler(trace, evsel, &sample);
1179 goto out_unmap_evlist;
1183 if (trace->nr_events == before) {
1185 goto out_unmap_evlist;
1187 poll(evlist->pollfd, evlist->nr_fds, -1);
1191 perf_evlist__disable(evlist);
1196 perf_evlist__munmap(evlist);
1198 perf_evlist__close(evlist);
1200 perf_evlist__delete_maps(evlist);
1202 perf_evlist__delete(evlist);
1207 static int trace__replay(struct trace *trace)
1209 const struct perf_evsel_str_handler handlers[] = {
1210 { "raw_syscalls:sys_enter", trace__sys_enter, },
1211 { "raw_syscalls:sys_exit", trace__sys_exit, },
1214 struct perf_session *session;
1217 trace->tool.sample = trace__process_sample;
1218 trace->tool.mmap = perf_event__process_mmap;
1219 trace->tool.mmap2 = perf_event__process_mmap2;
1220 trace->tool.comm = perf_event__process_comm;
1221 trace->tool.exit = perf_event__process_exit;
1222 trace->tool.fork = perf_event__process_fork;
1223 trace->tool.attr = perf_event__process_attr;
1224 trace->tool.tracing_data = perf_event__process_tracing_data;
1225 trace->tool.build_id = perf_event__process_build_id;
1227 trace->tool.ordered_samples = true;
1228 trace->tool.ordering_requires_timestamps = true;
1230 /* add tid to output */
1231 trace->multiple_threads = true;
1233 if (symbol__init() < 0)
1236 session = perf_session__new(input_name, O_RDONLY, 0, false,
1238 if (session == NULL)
1241 err = perf_session__set_tracepoints_handlers(session, handlers);
1245 if (!perf_session__has_tp(session, "raw_syscalls:sys_enter")) {
1246 pr_err("Data file does not have raw_syscalls:sys_enter events\n");
1250 if (!perf_session__has_tp(session, "raw_syscalls:sys_exit")) {
1251 pr_err("Data file does not have raw_syscalls:sys_exit events\n");
1255 err = parse_target_str(trace);
1261 err = perf_session__process_events(session, &trace->tool);
1263 pr_err("Failed to process events, error %d", err);
1266 perf_session__delete(session);
1271 static size_t trace__fprintf_threads_header(FILE *fp)
1275 printed = fprintf(fp, "\n _____________________________________________________________________\n");
1276 printed += fprintf(fp," __) Summary of events (__\n\n");
1277 printed += fprintf(fp," [ task - pid ] [ events ] [ ratio ] [ runtime ]\n");
1278 printed += fprintf(fp," _____________________________________________________________________\n\n");
1283 static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp)
1285 size_t printed = trace__fprintf_threads_header(fp);
1288 for (nd = rb_first(&trace->host.threads); nd; nd = rb_next(nd)) {
1289 struct thread *thread = rb_entry(nd, struct thread, rb_node);
1290 struct thread_trace *ttrace = thread->priv;
1297 ratio = (double)ttrace->nr_events / trace->nr_events * 100.0;
1299 color = PERF_COLOR_NORMAL;
1301 color = PERF_COLOR_RED;
1302 else if (ratio > 25.0)
1303 color = PERF_COLOR_GREEN;
1304 else if (ratio > 5.0)
1305 color = PERF_COLOR_YELLOW;
1307 printed += color_fprintf(fp, color, "%20s", thread->comm);
1308 printed += fprintf(fp, " - %-5d :%11lu [", thread->tid, ttrace->nr_events);
1309 printed += color_fprintf(fp, color, "%5.1f%%", ratio);
1310 printed += fprintf(fp, " ] %10.3f ms\n", ttrace->runtime_ms);
1316 static int trace__set_duration(const struct option *opt, const char *str,
1317 int unset __maybe_unused)
1319 struct trace *trace = opt->value;
1321 trace->duration_filter = atof(str);
1325 static int trace__open_output(struct trace *trace, const char *filename)
1329 if (!stat(filename, &st) && st.st_size) {
1330 char oldname[PATH_MAX];
1332 scnprintf(oldname, sizeof(oldname), "%s.old", filename);
1334 rename(filename, oldname);
1337 trace->output = fopen(filename, "w");
1339 return trace->output == NULL ? -errno : 0;
1342 int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused)
1344 const char * const trace_usage[] = {
1345 "perf trace [<options>] [<command>]",
1346 "perf trace [<options>] -- <command> [<options>]",
1349 struct trace trace = {
1350 .audit_machine = audit_detect_machine(),
1359 .user_freq = UINT_MAX,
1360 .user_interval = ULLONG_MAX,
1366 const char *output_name = NULL;
1367 const char *ev_qualifier_str = NULL;
1368 const struct option trace_options[] = {
1369 OPT_STRING('e', "expr", &ev_qualifier_str, "expr",
1370 "list of events to trace"),
1371 OPT_STRING('o', "output", &output_name, "file", "output file name"),
1372 OPT_STRING('i', "input", &input_name, "file", "Analyze events in file"),
1373 OPT_STRING('p', "pid", &trace.opts.target.pid, "pid",
1374 "trace events on existing process id"),
1375 OPT_STRING('t', "tid", &trace.opts.target.tid, "tid",
1376 "trace events on existing thread id"),
1377 OPT_BOOLEAN('a', "all-cpus", &trace.opts.target.system_wide,
1378 "system-wide collection from all CPUs"),
1379 OPT_STRING('C', "cpu", &trace.opts.target.cpu_list, "cpu",
1380 "list of cpus to monitor"),
1381 OPT_BOOLEAN(0, "no-inherit", &trace.opts.no_inherit,
1382 "child tasks do not inherit counters"),
1383 OPT_UINTEGER('m', "mmap-pages", &trace.opts.mmap_pages,
1384 "number of mmap data pages"),
1385 OPT_STRING('u', "uid", &trace.opts.target.uid_str, "user",
1387 OPT_CALLBACK(0, "duration", &trace, "float",
1388 "show only events with duration > N.M ms",
1389 trace__set_duration),
1390 OPT_BOOLEAN(0, "sched", &trace.sched, "show blocking scheduler events"),
1391 OPT_INCR('v', "verbose", &verbose, "be more verbose"),
1397 argc = parse_options(argc, argv, trace_options, trace_usage, 0);
1399 if (output_name != NULL) {
1400 err = trace__open_output(&trace, output_name);
1402 perror("failed to create output file");
1407 if (ev_qualifier_str != NULL) {
1408 const char *s = ev_qualifier_str;
1410 trace.not_ev_qualifier = *s == '!';
1411 if (trace.not_ev_qualifier)
1413 trace.ev_qualifier = strlist__new(true, s);
1414 if (trace.ev_qualifier == NULL) {
1415 fputs("Not enough memory to parse event qualifier",
1422 err = perf_target__validate(&trace.opts.target);
1424 perf_target__strerror(&trace.opts.target, err, bf, sizeof(bf));
1425 fprintf(trace.output, "%s", bf);
1429 err = perf_target__parse_uid(&trace.opts.target);
1431 perf_target__strerror(&trace.opts.target, err, bf, sizeof(bf));
1432 fprintf(trace.output, "%s", bf);
1436 if (!argc && perf_target__none(&trace.opts.target))
1437 trace.opts.target.system_wide = true;
1440 err = trace__replay(&trace);
1442 err = trace__run(&trace, argc, argv);
1444 if (trace.sched && !err)
1445 trace__fprintf_thread_summary(&trace, trace.output);
1448 if (output_name != NULL)
1449 fclose(trace.output);