perf trace: Allow syscall arg formatters to mask args
[platform/adaptation/renesas_rcar/renesas_kernel.git] / tools / perf / builtin-trace.c
1 #include <traceevent/event-parse.h>
2 #include "builtin.h"
3 #include "util/color.h"
4 #include "util/debug.h"
5 #include "util/evlist.h"
6 #include "util/machine.h"
7 #include "util/session.h"
8 #include "util/thread.h"
9 #include "util/parse-options.h"
10 #include "util/strlist.h"
11 #include "util/intlist.h"
12 #include "util/thread_map.h"
13
14 #include <libaudit.h>
15 #include <stdlib.h>
16 #include <sys/mman.h>
17
18 static size_t syscall_arg__scnprintf_hex(char *bf, size_t size,
19                                          unsigned long arg, u8 *arg_mask __maybe_unused)
20 {
21         return scnprintf(bf, size, "%#lx", arg);
22 }
23
24 #define SCA_HEX syscall_arg__scnprintf_hex
25
26 static size_t syscall_arg__scnprintf_mmap_prot(char *bf, size_t size,
27                                                unsigned long arg, u8 *arg_mask __maybe_unused)
28 {
29         int printed = 0, prot = arg;
30
31         if (prot == PROT_NONE)
32                 return scnprintf(bf, size, "NONE");
33 #define P_MMAP_PROT(n) \
34         if (prot & PROT_##n) { \
35                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
36                 prot &= ~PROT_##n; \
37         }
38
39         P_MMAP_PROT(EXEC);
40         P_MMAP_PROT(READ);
41         P_MMAP_PROT(WRITE);
42 #ifdef PROT_SEM
43         P_MMAP_PROT(SEM);
44 #endif
45         P_MMAP_PROT(GROWSDOWN);
46         P_MMAP_PROT(GROWSUP);
47 #undef P_MMAP_PROT
48
49         if (prot)
50                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", prot);
51
52         return printed;
53 }
54
55 #define SCA_MMAP_PROT syscall_arg__scnprintf_mmap_prot
56
57 static size_t syscall_arg__scnprintf_mmap_flags(char *bf, size_t size,
58                                                 unsigned long arg, u8 *arg_mask __maybe_unused)
59 {
60         int printed = 0, flags = arg;
61
62 #define P_MMAP_FLAG(n) \
63         if (flags & MAP_##n) { \
64                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
65                 flags &= ~MAP_##n; \
66         }
67
68         P_MMAP_FLAG(SHARED);
69         P_MMAP_FLAG(PRIVATE);
70         P_MMAP_FLAG(32BIT);
71         P_MMAP_FLAG(ANONYMOUS);
72         P_MMAP_FLAG(DENYWRITE);
73         P_MMAP_FLAG(EXECUTABLE);
74         P_MMAP_FLAG(FILE);
75         P_MMAP_FLAG(FIXED);
76         P_MMAP_FLAG(GROWSDOWN);
77 #ifdef MAP_HUGETLB
78         P_MMAP_FLAG(HUGETLB);
79 #endif
80         P_MMAP_FLAG(LOCKED);
81         P_MMAP_FLAG(NONBLOCK);
82         P_MMAP_FLAG(NORESERVE);
83         P_MMAP_FLAG(POPULATE);
84         P_MMAP_FLAG(STACK);
85 #ifdef MAP_UNINITIALIZED
86         P_MMAP_FLAG(UNINITIALIZED);
87 #endif
88 #undef P_MMAP_FLAG
89
90         if (flags)
91                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
92
93         return printed;
94 }
95
96 #define SCA_MMAP_FLAGS syscall_arg__scnprintf_mmap_flags
97
98 static size_t syscall_arg__scnprintf_madvise_behavior(char *bf, size_t size,
99                                                       unsigned long arg, u8 *arg_mask __maybe_unused)
100 {
101         int behavior = arg;
102
103         switch (behavior) {
104 #define P_MADV_BHV(n) case MADV_##n: return scnprintf(bf, size, #n)
105         P_MADV_BHV(NORMAL);
106         P_MADV_BHV(RANDOM);
107         P_MADV_BHV(SEQUENTIAL);
108         P_MADV_BHV(WILLNEED);
109         P_MADV_BHV(DONTNEED);
110         P_MADV_BHV(REMOVE);
111         P_MADV_BHV(DONTFORK);
112         P_MADV_BHV(DOFORK);
113         P_MADV_BHV(HWPOISON);
114 #ifdef MADV_SOFT_OFFLINE
115         P_MADV_BHV(SOFT_OFFLINE);
116 #endif
117         P_MADV_BHV(MERGEABLE);
118         P_MADV_BHV(UNMERGEABLE);
119 #ifdef MADV_HUGEPAGE
120         P_MADV_BHV(HUGEPAGE);
121 #endif
122 #ifdef MADV_NOHUGEPAGE
123         P_MADV_BHV(NOHUGEPAGE);
124 #endif
125 #ifdef MADV_DONTDUMP
126         P_MADV_BHV(DONTDUMP);
127 #endif
128 #ifdef MADV_DODUMP
129         P_MADV_BHV(DODUMP);
130 #endif
131 #undef P_MADV_PHV
132         default: break;
133         }
134
135         return scnprintf(bf, size, "%#x", behavior);
136 }
137
138 #define SCA_MADV_BHV syscall_arg__scnprintf_madvise_behavior
139
140 static struct syscall_fmt {
141         const char *name;
142         const char *alias;
143         size_t     (*arg_scnprintf[6])(char *bf, size_t size, unsigned long arg, u8 *arg_mask);
144         bool       errmsg;
145         bool       timeout;
146         bool       hexret;
147 } syscall_fmts[] = {
148         { .name     = "access",     .errmsg = true, },
149         { .name     = "arch_prctl", .errmsg = true, .alias = "prctl", },
150         { .name     = "brk",        .hexret = true,
151           .arg_scnprintf = { [0] = SCA_HEX, /* brk */ }, },
152         { .name     = "mmap",       .hexret = true, },
153         { .name     = "connect",    .errmsg = true, },
154         { .name     = "fstat",      .errmsg = true, .alias = "newfstat", },
155         { .name     = "fstatat",    .errmsg = true, .alias = "newfstatat", },
156         { .name     = "futex",      .errmsg = true, },
157         { .name     = "ioctl",      .errmsg = true,
158           .arg_scnprintf = { [2] = SCA_HEX, /* arg */ }, },
159         { .name     = "lstat",      .errmsg = true, .alias = "newlstat", },
160         { .name     = "madvise",    .errmsg = true,
161           .arg_scnprintf = { [0] = SCA_HEX,      /* start */
162                              [2] = SCA_MADV_BHV, /* behavior */ }, },
163         { .name     = "mmap",       .hexret = true,
164           .arg_scnprintf = { [0] = SCA_HEX,       /* addr */
165                              [2] = SCA_MMAP_PROT, /* prot */
166                              [3] = SCA_MMAP_FLAGS, /* flags */ }, },
167         { .name     = "mprotect",   .errmsg = true,
168           .arg_scnprintf = { [0] = SCA_HEX, /* start */
169                              [2] = SCA_MMAP_PROT, /* prot */ }, },
170         { .name     = "mremap",     .hexret = true,
171           .arg_scnprintf = { [0] = SCA_HEX, /* addr */
172                              [4] = SCA_HEX, /* new_addr */ }, },
173         { .name     = "munmap",     .errmsg = true,
174           .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
175         { .name     = "open",       .errmsg = true, },
176         { .name     = "poll",       .errmsg = true, .timeout = true, },
177         { .name     = "ppoll",      .errmsg = true, .timeout = true, },
178         { .name     = "pread",      .errmsg = true, .alias = "pread64", },
179         { .name     = "pwrite",     .errmsg = true, .alias = "pwrite64", },
180         { .name     = "read",       .errmsg = true, },
181         { .name     = "recvfrom",   .errmsg = true, },
182         { .name     = "select",     .errmsg = true, .timeout = true, },
183         { .name     = "socket",     .errmsg = true, },
184         { .name     = "stat",       .errmsg = true, .alias = "newstat", },
185         { .name     = "uname",      .errmsg = true, .alias = "newuname", },
186 };
187
188 static int syscall_fmt__cmp(const void *name, const void *fmtp)
189 {
190         const struct syscall_fmt *fmt = fmtp;
191         return strcmp(name, fmt->name);
192 }
193
194 static struct syscall_fmt *syscall_fmt__find(const char *name)
195 {
196         const int nmemb = ARRAY_SIZE(syscall_fmts);
197         return bsearch(name, syscall_fmts, nmemb, sizeof(struct syscall_fmt), syscall_fmt__cmp);
198 }
199
200 struct syscall {
201         struct event_format *tp_format;
202         const char          *name;
203         bool                filtered;
204         struct syscall_fmt  *fmt;
205         size_t              (**arg_scnprintf)(char *bf, size_t size,
206                                               unsigned long arg, u8 *args_mask);
207 };
208
209 static size_t fprintf_duration(unsigned long t, FILE *fp)
210 {
211         double duration = (double)t / NSEC_PER_MSEC;
212         size_t printed = fprintf(fp, "(");
213
214         if (duration >= 1.0)
215                 printed += color_fprintf(fp, PERF_COLOR_RED, "%6.3f ms", duration);
216         else if (duration >= 0.01)
217                 printed += color_fprintf(fp, PERF_COLOR_YELLOW, "%6.3f ms", duration);
218         else
219                 printed += color_fprintf(fp, PERF_COLOR_NORMAL, "%6.3f ms", duration);
220         return printed + fprintf(fp, "): ");
221 }
222
223 struct thread_trace {
224         u64               entry_time;
225         u64               exit_time;
226         bool              entry_pending;
227         unsigned long     nr_events;
228         char              *entry_str;
229         double            runtime_ms;
230 };
231
232 static struct thread_trace *thread_trace__new(void)
233 {
234         return zalloc(sizeof(struct thread_trace));
235 }
236
237 static struct thread_trace *thread__trace(struct thread *thread, FILE *fp)
238 {
239         struct thread_trace *ttrace;
240
241         if (thread == NULL)
242                 goto fail;
243
244         if (thread->priv == NULL)
245                 thread->priv = thread_trace__new();
246                 
247         if (thread->priv == NULL)
248                 goto fail;
249
250         ttrace = thread->priv;
251         ++ttrace->nr_events;
252
253         return ttrace;
254 fail:
255         color_fprintf(fp, PERF_COLOR_RED,
256                       "WARNING: not enough memory, dropping samples!\n");
257         return NULL;
258 }
259
260 struct trace {
261         struct perf_tool        tool;
262         int                     audit_machine;
263         struct {
264                 int             max;
265                 struct syscall  *table;
266         } syscalls;
267         struct perf_record_opts opts;
268         struct machine          host;
269         u64                     base_time;
270         FILE                    *output;
271         unsigned long           nr_events;
272         struct strlist          *ev_qualifier;
273         bool                    not_ev_qualifier;
274         struct intlist          *tid_list;
275         struct intlist          *pid_list;
276         bool                    sched;
277         bool                    multiple_threads;
278         double                  duration_filter;
279         double                  runtime_ms;
280 };
281
282 static bool trace__filter_duration(struct trace *trace, double t)
283 {
284         return t < (trace->duration_filter * NSEC_PER_MSEC);
285 }
286
287 static size_t trace__fprintf_tstamp(struct trace *trace, u64 tstamp, FILE *fp)
288 {
289         double ts = (double)(tstamp - trace->base_time) / NSEC_PER_MSEC;
290
291         return fprintf(fp, "%10.3f ", ts);
292 }
293
294 static bool done = false;
295
296 static void sig_handler(int sig __maybe_unused)
297 {
298         done = true;
299 }
300
301 static size_t trace__fprintf_entry_head(struct trace *trace, struct thread *thread,
302                                         u64 duration, u64 tstamp, FILE *fp)
303 {
304         size_t printed = trace__fprintf_tstamp(trace, tstamp, fp);
305         printed += fprintf_duration(duration, fp);
306
307         if (trace->multiple_threads)
308                 printed += fprintf(fp, "%d ", thread->tid);
309
310         return printed;
311 }
312
313 static int trace__process_event(struct trace *trace, struct machine *machine,
314                                 union perf_event *event)
315 {
316         int ret = 0;
317
318         switch (event->header.type) {
319         case PERF_RECORD_LOST:
320                 color_fprintf(trace->output, PERF_COLOR_RED,
321                               "LOST %" PRIu64 " events!\n", event->lost.lost);
322                 ret = machine__process_lost_event(machine, event);
323         default:
324                 ret = machine__process_event(machine, event);
325                 break;
326         }
327
328         return ret;
329 }
330
331 static int trace__tool_process(struct perf_tool *tool,
332                                union perf_event *event,
333                                struct perf_sample *sample __maybe_unused,
334                                struct machine *machine)
335 {
336         struct trace *trace = container_of(tool, struct trace, tool);
337         return trace__process_event(trace, machine, event);
338 }
339
340 static int trace__symbols_init(struct trace *trace, struct perf_evlist *evlist)
341 {
342         int err = symbol__init();
343
344         if (err)
345                 return err;
346
347         machine__init(&trace->host, "", HOST_KERNEL_ID);
348         machine__create_kernel_maps(&trace->host);
349
350         if (perf_target__has_task(&trace->opts.target)) {
351                 err = perf_event__synthesize_thread_map(&trace->tool, evlist->threads,
352                                                         trace__tool_process,
353                                                         &trace->host);
354         } else {
355                 err = perf_event__synthesize_threads(&trace->tool, trace__tool_process,
356                                                      &trace->host);
357         }
358
359         if (err)
360                 symbol__exit();
361
362         return err;
363 }
364
365 static int syscall__set_arg_fmts(struct syscall *sc)
366 {
367         struct format_field *field;
368         int idx = 0;
369
370         sc->arg_scnprintf = calloc(sc->tp_format->format.nr_fields - 1, sizeof(void *));
371         if (sc->arg_scnprintf == NULL)
372                 return -1;
373
374         for (field = sc->tp_format->format.fields->next; field; field = field->next) {
375                 if (sc->fmt && sc->fmt->arg_scnprintf[idx])
376                         sc->arg_scnprintf[idx] = sc->fmt->arg_scnprintf[idx];
377                 else if (field->flags & FIELD_IS_POINTER)
378                         sc->arg_scnprintf[idx] = syscall_arg__scnprintf_hex;
379                 ++idx;
380         }
381
382         return 0;
383 }
384
385 static int trace__read_syscall_info(struct trace *trace, int id)
386 {
387         char tp_name[128];
388         struct syscall *sc;
389         const char *name = audit_syscall_to_name(id, trace->audit_machine);
390
391         if (name == NULL)
392                 return -1;
393
394         if (id > trace->syscalls.max) {
395                 struct syscall *nsyscalls = realloc(trace->syscalls.table, (id + 1) * sizeof(*sc));
396
397                 if (nsyscalls == NULL)
398                         return -1;
399
400                 if (trace->syscalls.max != -1) {
401                         memset(nsyscalls + trace->syscalls.max + 1, 0,
402                                (id - trace->syscalls.max) * sizeof(*sc));
403                 } else {
404                         memset(nsyscalls, 0, (id + 1) * sizeof(*sc));
405                 }
406
407                 trace->syscalls.table = nsyscalls;
408                 trace->syscalls.max   = id;
409         }
410
411         sc = trace->syscalls.table + id;
412         sc->name = name;
413
414         if (trace->ev_qualifier) {
415                 bool in = strlist__find(trace->ev_qualifier, name) != NULL;
416
417                 if (!(in ^ trace->not_ev_qualifier)) {
418                         sc->filtered = true;
419                         /*
420                          * No need to do read tracepoint information since this will be
421                          * filtered out.
422                          */
423                         return 0;
424                 }
425         }
426
427         sc->fmt  = syscall_fmt__find(sc->name);
428
429         snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->name);
430         sc->tp_format = event_format__new("syscalls", tp_name);
431
432         if (sc->tp_format == NULL && sc->fmt && sc->fmt->alias) {
433                 snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->fmt->alias);
434                 sc->tp_format = event_format__new("syscalls", tp_name);
435         }
436
437         if (sc->tp_format == NULL)
438                 return -1;
439
440         return syscall__set_arg_fmts(sc);
441 }
442
443 static size_t syscall__scnprintf_args(struct syscall *sc, char *bf, size_t size,
444                                       unsigned long *args)
445 {
446         int i = 0;
447         size_t printed = 0;
448
449         if (sc->tp_format != NULL) {
450                 struct format_field *field;
451                 u8 mask = 0, bit = 1;
452
453                 for (field = sc->tp_format->format.fields->next; field;
454                      field = field->next, ++i, bit <<= 1) {
455                         if (mask & bit)
456                                 continue;
457
458                         printed += scnprintf(bf + printed, size - printed,
459                                              "%s%s: ", printed ? ", " : "", field->name);
460
461                         if (sc->arg_scnprintf && sc->arg_scnprintf[i]) {
462                                 printed += sc->arg_scnprintf[i](bf + printed, size - printed,
463                                                                 args[i], &mask);
464                         } else {
465                                 printed += scnprintf(bf + printed, size - printed,
466                                                      "%ld", args[i]);
467                         }
468                 }
469         } else {
470                 while (i < 6) {
471                         printed += scnprintf(bf + printed, size - printed,
472                                              "%sarg%d: %ld",
473                                              printed ? ", " : "", i, args[i]);
474                         ++i;
475                 }
476         }
477
478         return printed;
479 }
480
481 typedef int (*tracepoint_handler)(struct trace *trace, struct perf_evsel *evsel,
482                                   struct perf_sample *sample);
483
484 static struct syscall *trace__syscall_info(struct trace *trace,
485                                            struct perf_evsel *evsel,
486                                            struct perf_sample *sample)
487 {
488         int id = perf_evsel__intval(evsel, sample, "id");
489
490         if (id < 0) {
491
492                 /*
493                  * XXX: Noticed on x86_64, reproduced as far back as 3.0.36, haven't tried
494                  * before that, leaving at a higher verbosity level till that is
495                  * explained. Reproduced with plain ftrace with:
496                  *
497                  * echo 1 > /t/events/raw_syscalls/sys_exit/enable
498                  * grep "NR -1 " /t/trace_pipe
499                  *
500                  * After generating some load on the machine.
501                  */
502                 if (verbose > 1) {
503                         static u64 n;
504                         fprintf(trace->output, "Invalid syscall %d id, skipping (%s, %" PRIu64 ") ...\n",
505                                 id, perf_evsel__name(evsel), ++n);
506                 }
507                 return NULL;
508         }
509
510         if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL) &&
511             trace__read_syscall_info(trace, id))
512                 goto out_cant_read;
513
514         if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL))
515                 goto out_cant_read;
516
517         return &trace->syscalls.table[id];
518
519 out_cant_read:
520         if (verbose) {
521                 fprintf(trace->output, "Problems reading syscall %d", id);
522                 if (id <= trace->syscalls.max && trace->syscalls.table[id].name != NULL)
523                         fprintf(trace->output, "(%s)", trace->syscalls.table[id].name);
524                 fputs(" information\n", trace->output);
525         }
526         return NULL;
527 }
528
529 static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel,
530                             struct perf_sample *sample)
531 {
532         char *msg;
533         void *args;
534         size_t printed = 0;
535         struct thread *thread;
536         struct syscall *sc = trace__syscall_info(trace, evsel, sample);
537         struct thread_trace *ttrace;
538
539         if (sc == NULL)
540                 return -1;
541
542         if (sc->filtered)
543                 return 0;
544
545         thread = machine__findnew_thread(&trace->host, sample->pid,
546                                          sample->tid);
547         ttrace = thread__trace(thread, trace->output);
548         if (ttrace == NULL)
549                 return -1;
550
551         args = perf_evsel__rawptr(evsel, sample, "args");
552         if (args == NULL) {
553                 fprintf(trace->output, "Problems reading syscall arguments\n");
554                 return -1;
555         }
556
557         ttrace = thread->priv;
558
559         if (ttrace->entry_str == NULL) {
560                 ttrace->entry_str = malloc(1024);
561                 if (!ttrace->entry_str)
562                         return -1;
563         }
564
565         ttrace->entry_time = sample->time;
566         msg = ttrace->entry_str;
567         printed += scnprintf(msg + printed, 1024 - printed, "%s(", sc->name);
568
569         printed += syscall__scnprintf_args(sc, msg + printed, 1024 - printed,  args);
570
571         if (!strcmp(sc->name, "exit_group") || !strcmp(sc->name, "exit")) {
572                 if (!trace->duration_filter) {
573                         trace__fprintf_entry_head(trace, thread, 1, sample->time, trace->output);
574                         fprintf(trace->output, "%-70s\n", ttrace->entry_str);
575                 }
576         } else
577                 ttrace->entry_pending = true;
578
579         return 0;
580 }
581
582 static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel,
583                            struct perf_sample *sample)
584 {
585         int ret;
586         u64 duration = 0;
587         struct thread *thread;
588         struct syscall *sc = trace__syscall_info(trace, evsel, sample);
589         struct thread_trace *ttrace;
590
591         if (sc == NULL)
592                 return -1;
593
594         if (sc->filtered)
595                 return 0;
596
597         thread = machine__findnew_thread(&trace->host, sample->pid,
598                                          sample->tid);
599         ttrace = thread__trace(thread, trace->output);
600         if (ttrace == NULL)
601                 return -1;
602
603         ret = perf_evsel__intval(evsel, sample, "ret");
604
605         ttrace = thread->priv;
606
607         ttrace->exit_time = sample->time;
608
609         if (ttrace->entry_time) {
610                 duration = sample->time - ttrace->entry_time;
611                 if (trace__filter_duration(trace, duration))
612                         goto out;
613         } else if (trace->duration_filter)
614                 goto out;
615
616         trace__fprintf_entry_head(trace, thread, duration, sample->time, trace->output);
617
618         if (ttrace->entry_pending) {
619                 fprintf(trace->output, "%-70s", ttrace->entry_str);
620         } else {
621                 fprintf(trace->output, " ... [");
622                 color_fprintf(trace->output, PERF_COLOR_YELLOW, "continued");
623                 fprintf(trace->output, "]: %s()", sc->name);
624         }
625
626         if (sc->fmt == NULL) {
627 signed_print:
628                 fprintf(trace->output, ") = %d", ret);
629         } else if (ret < 0 && sc->fmt->errmsg) {
630                 char bf[256];
631                 const char *emsg = strerror_r(-ret, bf, sizeof(bf)),
632                            *e = audit_errno_to_name(-ret);
633
634                 fprintf(trace->output, ") = -1 %s %s", e, emsg);
635         } else if (ret == 0 && sc->fmt->timeout)
636                 fprintf(trace->output, ") = 0 Timeout");
637         else if (sc->fmt->hexret)
638                 fprintf(trace->output, ") = %#x", ret);
639         else
640                 goto signed_print;
641
642         fputc('\n', trace->output);
643 out:
644         ttrace->entry_pending = false;
645
646         return 0;
647 }
648
649 static int trace__sched_stat_runtime(struct trace *trace, struct perf_evsel *evsel,
650                                      struct perf_sample *sample)
651 {
652         u64 runtime = perf_evsel__intval(evsel, sample, "runtime");
653         double runtime_ms = (double)runtime / NSEC_PER_MSEC;
654         struct thread *thread = machine__findnew_thread(&trace->host,
655                                                         sample->pid,
656                                                         sample->tid);
657         struct thread_trace *ttrace = thread__trace(thread, trace->output);
658
659         if (ttrace == NULL)
660                 goto out_dump;
661
662         ttrace->runtime_ms += runtime_ms;
663         trace->runtime_ms += runtime_ms;
664         return 0;
665
666 out_dump:
667         fprintf(trace->output, "%s: comm=%s,pid=%u,runtime=%" PRIu64 ",vruntime=%" PRIu64 ")\n",
668                evsel->name,
669                perf_evsel__strval(evsel, sample, "comm"),
670                (pid_t)perf_evsel__intval(evsel, sample, "pid"),
671                runtime,
672                perf_evsel__intval(evsel, sample, "vruntime"));
673         return 0;
674 }
675
676 static bool skip_sample(struct trace *trace, struct perf_sample *sample)
677 {
678         if ((trace->pid_list && intlist__find(trace->pid_list, sample->pid)) ||
679             (trace->tid_list && intlist__find(trace->tid_list, sample->tid)))
680                 return false;
681
682         if (trace->pid_list || trace->tid_list)
683                 return true;
684
685         return false;
686 }
687
688 static int trace__process_sample(struct perf_tool *tool,
689                                  union perf_event *event __maybe_unused,
690                                  struct perf_sample *sample,
691                                  struct perf_evsel *evsel,
692                                  struct machine *machine __maybe_unused)
693 {
694         struct trace *trace = container_of(tool, struct trace, tool);
695         int err = 0;
696
697         tracepoint_handler handler = evsel->handler.func;
698
699         if (skip_sample(trace, sample))
700                 return 0;
701
702         if (trace->base_time == 0)
703                 trace->base_time = sample->time;
704
705         if (handler)
706                 handler(trace, evsel, sample);
707
708         return err;
709 }
710
711 static bool
712 perf_session__has_tp(struct perf_session *session, const char *name)
713 {
714         struct perf_evsel *evsel;
715
716         evsel = perf_evlist__find_tracepoint_by_name(session->evlist, name);
717
718         return evsel != NULL;
719 }
720
721 static int parse_target_str(struct trace *trace)
722 {
723         if (trace->opts.target.pid) {
724                 trace->pid_list = intlist__new(trace->opts.target.pid);
725                 if (trace->pid_list == NULL) {
726                         pr_err("Error parsing process id string\n");
727                         return -EINVAL;
728                 }
729         }
730
731         if (trace->opts.target.tid) {
732                 trace->tid_list = intlist__new(trace->opts.target.tid);
733                 if (trace->tid_list == NULL) {
734                         pr_err("Error parsing thread id string\n");
735                         return -EINVAL;
736                 }
737         }
738
739         return 0;
740 }
741
742 static int trace__run(struct trace *trace, int argc, const char **argv)
743 {
744         struct perf_evlist *evlist = perf_evlist__new();
745         struct perf_evsel *evsel;
746         int err = -1, i;
747         unsigned long before;
748         const bool forks = argc > 0;
749
750         if (evlist == NULL) {
751                 fprintf(trace->output, "Not enough memory to run!\n");
752                 goto out;
753         }
754
755         if (perf_evlist__add_newtp(evlist, "raw_syscalls", "sys_enter", trace__sys_enter) ||
756             perf_evlist__add_newtp(evlist, "raw_syscalls", "sys_exit", trace__sys_exit)) {
757                 fprintf(trace->output, "Couldn't read the raw_syscalls tracepoints information!\n");
758                 goto out_delete_evlist;
759         }
760
761         if (trace->sched &&
762             perf_evlist__add_newtp(evlist, "sched", "sched_stat_runtime",
763                                    trace__sched_stat_runtime)) {
764                 fprintf(trace->output, "Couldn't read the sched_stat_runtime tracepoint information!\n");
765                 goto out_delete_evlist;
766         }
767
768         err = perf_evlist__create_maps(evlist, &trace->opts.target);
769         if (err < 0) {
770                 fprintf(trace->output, "Problems parsing the target to trace, check your options!\n");
771                 goto out_delete_evlist;
772         }
773
774         err = trace__symbols_init(trace, evlist);
775         if (err < 0) {
776                 fprintf(trace->output, "Problems initializing symbol libraries!\n");
777                 goto out_delete_maps;
778         }
779
780         perf_evlist__config(evlist, &trace->opts);
781
782         signal(SIGCHLD, sig_handler);
783         signal(SIGINT, sig_handler);
784
785         if (forks) {
786                 err = perf_evlist__prepare_workload(evlist, &trace->opts.target,
787                                                     argv, false, false);
788                 if (err < 0) {
789                         fprintf(trace->output, "Couldn't run the workload!\n");
790                         goto out_delete_maps;
791                 }
792         }
793
794         err = perf_evlist__open(evlist);
795         if (err < 0) {
796                 fprintf(trace->output, "Couldn't create the events: %s\n", strerror(errno));
797                 goto out_delete_maps;
798         }
799
800         err = perf_evlist__mmap(evlist, UINT_MAX, false);
801         if (err < 0) {
802                 fprintf(trace->output, "Couldn't mmap the events: %s\n", strerror(errno));
803                 goto out_close_evlist;
804         }
805
806         perf_evlist__enable(evlist);
807
808         if (forks)
809                 perf_evlist__start_workload(evlist);
810
811         trace->multiple_threads = evlist->threads->map[0] == -1 || evlist->threads->nr > 1;
812 again:
813         before = trace->nr_events;
814
815         for (i = 0; i < evlist->nr_mmaps; i++) {
816                 union perf_event *event;
817
818                 while ((event = perf_evlist__mmap_read(evlist, i)) != NULL) {
819                         const u32 type = event->header.type;
820                         tracepoint_handler handler;
821                         struct perf_sample sample;
822
823                         ++trace->nr_events;
824
825                         err = perf_evlist__parse_sample(evlist, event, &sample);
826                         if (err) {
827                                 fprintf(trace->output, "Can't parse sample, err = %d, skipping...\n", err);
828                                 continue;
829                         }
830
831                         if (trace->base_time == 0)
832                                 trace->base_time = sample.time;
833
834                         if (type != PERF_RECORD_SAMPLE) {
835                                 trace__process_event(trace, &trace->host, event);
836                                 continue;
837                         }
838
839                         evsel = perf_evlist__id2evsel(evlist, sample.id);
840                         if (evsel == NULL) {
841                                 fprintf(trace->output, "Unknown tp ID %" PRIu64 ", skipping...\n", sample.id);
842                                 continue;
843                         }
844
845                         if (sample.raw_data == NULL) {
846                                 fprintf(trace->output, "%s sample with no payload for tid: %d, cpu %d, raw_size=%d, skipping...\n",
847                                        perf_evsel__name(evsel), sample.tid,
848                                        sample.cpu, sample.raw_size);
849                                 continue;
850                         }
851
852                         handler = evsel->handler.func;
853                         handler(trace, evsel, &sample);
854                 }
855         }
856
857         if (trace->nr_events == before) {
858                 if (done)
859                         goto out_unmap_evlist;
860
861                 poll(evlist->pollfd, evlist->nr_fds, -1);
862         }
863
864         if (done)
865                 perf_evlist__disable(evlist);
866
867         goto again;
868
869 out_unmap_evlist:
870         perf_evlist__munmap(evlist);
871 out_close_evlist:
872         perf_evlist__close(evlist);
873 out_delete_maps:
874         perf_evlist__delete_maps(evlist);
875 out_delete_evlist:
876         perf_evlist__delete(evlist);
877 out:
878         return err;
879 }
880
881 static int trace__replay(struct trace *trace)
882 {
883         const struct perf_evsel_str_handler handlers[] = {
884                 { "raw_syscalls:sys_enter",  trace__sys_enter, },
885                 { "raw_syscalls:sys_exit",   trace__sys_exit, },
886         };
887
888         struct perf_session *session;
889         int err = -1;
890
891         trace->tool.sample        = trace__process_sample;
892         trace->tool.mmap          = perf_event__process_mmap;
893         trace->tool.comm          = perf_event__process_comm;
894         trace->tool.exit          = perf_event__process_exit;
895         trace->tool.fork          = perf_event__process_fork;
896         trace->tool.attr          = perf_event__process_attr;
897         trace->tool.tracing_data = perf_event__process_tracing_data;
898         trace->tool.build_id      = perf_event__process_build_id;
899
900         trace->tool.ordered_samples = true;
901         trace->tool.ordering_requires_timestamps = true;
902
903         /* add tid to output */
904         trace->multiple_threads = true;
905
906         if (symbol__init() < 0)
907                 return -1;
908
909         session = perf_session__new(input_name, O_RDONLY, 0, false,
910                                     &trace->tool);
911         if (session == NULL)
912                 return -ENOMEM;
913
914         err = perf_session__set_tracepoints_handlers(session, handlers);
915         if (err)
916                 goto out;
917
918         if (!perf_session__has_tp(session, "raw_syscalls:sys_enter")) {
919                 pr_err("Data file does not have raw_syscalls:sys_enter events\n");
920                 goto out;
921         }
922
923         if (!perf_session__has_tp(session, "raw_syscalls:sys_exit")) {
924                 pr_err("Data file does not have raw_syscalls:sys_exit events\n");
925                 goto out;
926         }
927
928         err = parse_target_str(trace);
929         if (err != 0)
930                 goto out;
931
932         setup_pager();
933
934         err = perf_session__process_events(session, &trace->tool);
935         if (err)
936                 pr_err("Failed to process events, error %d", err);
937
938 out:
939         perf_session__delete(session);
940
941         return err;
942 }
943
944 static size_t trace__fprintf_threads_header(FILE *fp)
945 {
946         size_t printed;
947
948         printed  = fprintf(fp, "\n _____________________________________________________________________\n");
949         printed += fprintf(fp," __)    Summary of events    (__\n\n");
950         printed += fprintf(fp,"              [ task - pid ]     [ events ] [ ratio ]  [ runtime ]\n");
951         printed += fprintf(fp," _____________________________________________________________________\n\n");
952
953         return printed;
954 }
955
956 static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp)
957 {
958         size_t printed = trace__fprintf_threads_header(fp);
959         struct rb_node *nd;
960
961         for (nd = rb_first(&trace->host.threads); nd; nd = rb_next(nd)) {
962                 struct thread *thread = rb_entry(nd, struct thread, rb_node);
963                 struct thread_trace *ttrace = thread->priv;
964                 const char *color;
965                 double ratio;
966
967                 if (ttrace == NULL)
968                         continue;
969
970                 ratio = (double)ttrace->nr_events / trace->nr_events * 100.0;
971
972                 color = PERF_COLOR_NORMAL;
973                 if (ratio > 50.0)
974                         color = PERF_COLOR_RED;
975                 else if (ratio > 25.0)
976                         color = PERF_COLOR_GREEN;
977                 else if (ratio > 5.0)
978                         color = PERF_COLOR_YELLOW;
979
980                 printed += color_fprintf(fp, color, "%20s", thread->comm);
981                 printed += fprintf(fp, " - %-5d :%11lu   [", thread->tid, ttrace->nr_events);
982                 printed += color_fprintf(fp, color, "%5.1f%%", ratio);
983                 printed += fprintf(fp, " ] %10.3f ms\n", ttrace->runtime_ms);
984         }
985
986         return printed;
987 }
988
989 static int trace__set_duration(const struct option *opt, const char *str,
990                                int unset __maybe_unused)
991 {
992         struct trace *trace = opt->value;
993
994         trace->duration_filter = atof(str);
995         return 0;
996 }
997
998 static int trace__open_output(struct trace *trace, const char *filename)
999 {
1000         struct stat st;
1001
1002         if (!stat(filename, &st) && st.st_size) {
1003                 char oldname[PATH_MAX];
1004
1005                 scnprintf(oldname, sizeof(oldname), "%s.old", filename);
1006                 unlink(oldname);
1007                 rename(filename, oldname);
1008         }
1009
1010         trace->output = fopen(filename, "w");
1011
1012         return trace->output == NULL ? -errno : 0;
1013 }
1014
1015 int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused)
1016 {
1017         const char * const trace_usage[] = {
1018                 "perf trace [<options>] [<command>]",
1019                 "perf trace [<options>] -- <command> [<options>]",
1020                 NULL
1021         };
1022         struct trace trace = {
1023                 .audit_machine = audit_detect_machine(),
1024                 .syscalls = {
1025                         . max = -1,
1026                 },
1027                 .opts = {
1028                         .target = {
1029                                 .uid       = UINT_MAX,
1030                                 .uses_mmap = true,
1031                         },
1032                         .user_freq     = UINT_MAX,
1033                         .user_interval = ULLONG_MAX,
1034                         .no_delay      = true,
1035                         .mmap_pages    = 1024,
1036                 },
1037                 .output = stdout,
1038         };
1039         const char *output_name = NULL;
1040         const char *ev_qualifier_str = NULL;
1041         const struct option trace_options[] = {
1042         OPT_STRING('e', "expr", &ev_qualifier_str, "expr",
1043                     "list of events to trace"),
1044         OPT_STRING('o', "output", &output_name, "file", "output file name"),
1045         OPT_STRING('i', "input", &input_name, "file", "Analyze events in file"),
1046         OPT_STRING('p', "pid", &trace.opts.target.pid, "pid",
1047                     "trace events on existing process id"),
1048         OPT_STRING('t', "tid", &trace.opts.target.tid, "tid",
1049                     "trace events on existing thread id"),
1050         OPT_BOOLEAN('a', "all-cpus", &trace.opts.target.system_wide,
1051                     "system-wide collection from all CPUs"),
1052         OPT_STRING('C', "cpu", &trace.opts.target.cpu_list, "cpu",
1053                     "list of cpus to monitor"),
1054         OPT_BOOLEAN(0, "no-inherit", &trace.opts.no_inherit,
1055                     "child tasks do not inherit counters"),
1056         OPT_UINTEGER('m', "mmap-pages", &trace.opts.mmap_pages,
1057                      "number of mmap data pages"),
1058         OPT_STRING('u', "uid", &trace.opts.target.uid_str, "user",
1059                    "user to profile"),
1060         OPT_CALLBACK(0, "duration", &trace, "float",
1061                      "show only events with duration > N.M ms",
1062                      trace__set_duration),
1063         OPT_BOOLEAN(0, "sched", &trace.sched, "show blocking scheduler events"),
1064         OPT_INCR('v', "verbose", &verbose, "be more verbose"),
1065         OPT_END()
1066         };
1067         int err;
1068         char bf[BUFSIZ];
1069
1070         argc = parse_options(argc, argv, trace_options, trace_usage, 0);
1071
1072         if (output_name != NULL) {
1073                 err = trace__open_output(&trace, output_name);
1074                 if (err < 0) {
1075                         perror("failed to create output file");
1076                         goto out;
1077                 }
1078         }
1079
1080         if (ev_qualifier_str != NULL) {
1081                 const char *s = ev_qualifier_str;
1082
1083                 trace.not_ev_qualifier = *s == '!';
1084                 if (trace.not_ev_qualifier)
1085                         ++s;
1086                 trace.ev_qualifier = strlist__new(true, s);
1087                 if (trace.ev_qualifier == NULL) {
1088                         fputs("Not enough memory to parse event qualifier",
1089                               trace.output);
1090                         err = -ENOMEM;
1091                         goto out_close;
1092                 }
1093         }
1094
1095         err = perf_target__validate(&trace.opts.target);
1096         if (err) {
1097                 perf_target__strerror(&trace.opts.target, err, bf, sizeof(bf));
1098                 fprintf(trace.output, "%s", bf);
1099                 goto out_close;
1100         }
1101
1102         err = perf_target__parse_uid(&trace.opts.target);
1103         if (err) {
1104                 perf_target__strerror(&trace.opts.target, err, bf, sizeof(bf));
1105                 fprintf(trace.output, "%s", bf);
1106                 goto out_close;
1107         }
1108
1109         if (!argc && perf_target__none(&trace.opts.target))
1110                 trace.opts.target.system_wide = true;
1111
1112         if (input_name)
1113                 err = trace__replay(&trace);
1114         else
1115                 err = trace__run(&trace, argc, argv);
1116
1117         if (trace.sched && !err)
1118                 trace__fprintf_thread_summary(&trace, trace.output);
1119
1120 out_close:
1121         if (output_name != NULL)
1122                 fclose(trace.output);
1123 out:
1124         return err;
1125 }