perf trace: Handle missing HUGEPAGE defines
[platform/adaptation/renesas_rcar/renesas_kernel.git] / tools / perf / builtin-trace.c
1 #include <traceevent/event-parse.h>
2 #include "builtin.h"
3 #include "util/color.h"
4 #include "util/debug.h"
5 #include "util/evlist.h"
6 #include "util/machine.h"
7 #include "util/session.h"
8 #include "util/thread.h"
9 #include "util/parse-options.h"
10 #include "util/strlist.h"
11 #include "util/intlist.h"
12 #include "util/thread_map.h"
13
14 #include <libaudit.h>
15 #include <stdlib.h>
16 #include <sys/mman.h>
17
18 static size_t syscall_arg__scnprintf_hex(char *bf, size_t size, unsigned long arg)
19 {
20         return scnprintf(bf, size, "%#lx", arg);
21 }
22
23 #define SCA_HEX syscall_arg__scnprintf_hex
24
25 static size_t syscall_arg__scnprintf_mmap_prot(char *bf, size_t size, unsigned long arg)
26 {
27         int printed = 0, prot = arg;
28
29         if (prot == PROT_NONE)
30                 return scnprintf(bf, size, "NONE");
31 #define P_MMAP_PROT(n) \
32         if (prot & PROT_##n) { \
33                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
34                 prot &= ~PROT_##n; \
35         }
36
37         P_MMAP_PROT(EXEC);
38         P_MMAP_PROT(READ);
39         P_MMAP_PROT(WRITE);
40 #ifdef PROT_SEM
41         P_MMAP_PROT(SEM);
42 #endif
43         P_MMAP_PROT(GROWSDOWN);
44         P_MMAP_PROT(GROWSUP);
45 #undef P_MMAP_PROT
46
47         if (prot)
48                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", prot);
49
50         return printed;
51 }
52
53 #define SCA_MMAP_PROT syscall_arg__scnprintf_mmap_prot
54
55 static size_t syscall_arg__scnprintf_mmap_flags(char *bf, size_t size, unsigned long arg)
56 {
57         int printed = 0, flags = arg;
58
59 #define P_MMAP_FLAG(n) \
60         if (flags & MAP_##n) { \
61                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
62                 flags &= ~MAP_##n; \
63         }
64
65         P_MMAP_FLAG(SHARED);
66         P_MMAP_FLAG(PRIVATE);
67         P_MMAP_FLAG(32BIT);
68         P_MMAP_FLAG(ANONYMOUS);
69         P_MMAP_FLAG(DENYWRITE);
70         P_MMAP_FLAG(EXECUTABLE);
71         P_MMAP_FLAG(FILE);
72         P_MMAP_FLAG(FIXED);
73         P_MMAP_FLAG(GROWSDOWN);
74 #ifdef MAP_HUGETLB
75         P_MMAP_FLAG(HUGETLB);
76 #endif
77         P_MMAP_FLAG(LOCKED);
78         P_MMAP_FLAG(NONBLOCK);
79         P_MMAP_FLAG(NORESERVE);
80         P_MMAP_FLAG(POPULATE);
81         P_MMAP_FLAG(STACK);
82 #ifdef MAP_UNINITIALIZED
83         P_MMAP_FLAG(UNINITIALIZED);
84 #endif
85 #undef P_MMAP_FLAG
86
87         if (flags)
88                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
89
90         return printed;
91 }
92
93 #define SCA_MMAP_FLAGS syscall_arg__scnprintf_mmap_flags
94
95 static size_t syscall_arg__scnprintf_madvise_behavior(char *bf, size_t size, unsigned long arg)
96 {
97         int behavior = arg;
98
99         switch (behavior) {
100 #define P_MADV_BHV(n) case MADV_##n: return scnprintf(bf, size, #n)
101         P_MADV_BHV(NORMAL);
102         P_MADV_BHV(RANDOM);
103         P_MADV_BHV(SEQUENTIAL);
104         P_MADV_BHV(WILLNEED);
105         P_MADV_BHV(DONTNEED);
106         P_MADV_BHV(REMOVE);
107         P_MADV_BHV(DONTFORK);
108         P_MADV_BHV(DOFORK);
109         P_MADV_BHV(HWPOISON);
110 #ifdef MADV_SOFT_OFFLINE
111         P_MADV_BHV(SOFT_OFFLINE);
112 #endif
113         P_MADV_BHV(MERGEABLE);
114         P_MADV_BHV(UNMERGEABLE);
115 #ifdef MADV_HUGEPAGE
116         P_MADV_BHV(HUGEPAGE);
117 #endif
118 #ifdef MADV_NOHUGEPAGE
119         P_MADV_BHV(NOHUGEPAGE);
120 #endif
121 #ifdef MADV_DONTDUMP
122         P_MADV_BHV(DONTDUMP);
123 #endif
124 #ifdef MADV_DODUMP
125         P_MADV_BHV(DODUMP);
126 #endif
127 #undef P_MADV_PHV
128         default: break;
129         }
130
131         return scnprintf(bf, size, "%#x", behavior);
132 }
133
134 #define SCA_MADV_BHV syscall_arg__scnprintf_madvise_behavior
135
136 static struct syscall_fmt {
137         const char *name;
138         const char *alias;
139         size_t     (*arg_scnprintf[6])(char *bf, size_t size, unsigned long arg);
140         bool       errmsg;
141         bool       timeout;
142         bool       hexret;
143 } syscall_fmts[] = {
144         { .name     = "access",     .errmsg = true, },
145         { .name     = "arch_prctl", .errmsg = true, .alias = "prctl", },
146         { .name     = "brk",        .hexret = true,
147           .arg_scnprintf = { [0] = SCA_HEX, /* brk */ }, },
148         { .name     = "mmap",       .hexret = true, },
149         { .name     = "connect",    .errmsg = true, },
150         { .name     = "fstat",      .errmsg = true, .alias = "newfstat", },
151         { .name     = "fstatat",    .errmsg = true, .alias = "newfstatat", },
152         { .name     = "futex",      .errmsg = true, },
153         { .name     = "ioctl",      .errmsg = true,
154           .arg_scnprintf = { [2] = SCA_HEX, /* arg */ }, },
155         { .name     = "lstat",      .errmsg = true, .alias = "newlstat", },
156         { .name     = "madvise",    .errmsg = true,
157           .arg_scnprintf = { [0] = SCA_HEX,      /* start */
158                              [2] = SCA_MADV_BHV, /* behavior */ }, },
159         { .name     = "mmap",       .hexret = true,
160           .arg_scnprintf = { [0] = SCA_HEX,       /* addr */
161                              [2] = SCA_MMAP_PROT, /* prot */
162                              [3] = SCA_MMAP_FLAGS, /* flags */ }, },
163         { .name     = "mprotect",   .errmsg = true,
164           .arg_scnprintf = { [0] = SCA_HEX, /* start */
165                              [2] = SCA_MMAP_PROT, /* prot */ }, },
166         { .name     = "mremap",     .hexret = true,
167           .arg_scnprintf = { [0] = SCA_HEX, /* addr */
168                              [4] = SCA_HEX, /* new_addr */ }, },
169         { .name     = "munmap",     .errmsg = true,
170           .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
171         { .name     = "open",       .errmsg = true, },
172         { .name     = "poll",       .errmsg = true, .timeout = true, },
173         { .name     = "ppoll",      .errmsg = true, .timeout = true, },
174         { .name     = "pread",      .errmsg = true, .alias = "pread64", },
175         { .name     = "pwrite",     .errmsg = true, .alias = "pwrite64", },
176         { .name     = "read",       .errmsg = true, },
177         { .name     = "recvfrom",   .errmsg = true, },
178         { .name     = "select",     .errmsg = true, .timeout = true, },
179         { .name     = "socket",     .errmsg = true, },
180         { .name     = "stat",       .errmsg = true, .alias = "newstat", },
181         { .name     = "uname",      .errmsg = true, .alias = "newuname", },
182 };
183
184 static int syscall_fmt__cmp(const void *name, const void *fmtp)
185 {
186         const struct syscall_fmt *fmt = fmtp;
187         return strcmp(name, fmt->name);
188 }
189
190 static struct syscall_fmt *syscall_fmt__find(const char *name)
191 {
192         const int nmemb = ARRAY_SIZE(syscall_fmts);
193         return bsearch(name, syscall_fmts, nmemb, sizeof(struct syscall_fmt), syscall_fmt__cmp);
194 }
195
196 struct syscall {
197         struct event_format *tp_format;
198         const char          *name;
199         bool                filtered;
200         struct syscall_fmt  *fmt;
201         size_t              (**arg_scnprintf)(char *bf, size_t size, unsigned long arg);
202 };
203
204 static size_t fprintf_duration(unsigned long t, FILE *fp)
205 {
206         double duration = (double)t / NSEC_PER_MSEC;
207         size_t printed = fprintf(fp, "(");
208
209         if (duration >= 1.0)
210                 printed += color_fprintf(fp, PERF_COLOR_RED, "%6.3f ms", duration);
211         else if (duration >= 0.01)
212                 printed += color_fprintf(fp, PERF_COLOR_YELLOW, "%6.3f ms", duration);
213         else
214                 printed += color_fprintf(fp, PERF_COLOR_NORMAL, "%6.3f ms", duration);
215         return printed + fprintf(fp, "): ");
216 }
217
218 struct thread_trace {
219         u64               entry_time;
220         u64               exit_time;
221         bool              entry_pending;
222         unsigned long     nr_events;
223         char              *entry_str;
224         double            runtime_ms;
225 };
226
227 static struct thread_trace *thread_trace__new(void)
228 {
229         return zalloc(sizeof(struct thread_trace));
230 }
231
232 static struct thread_trace *thread__trace(struct thread *thread, FILE *fp)
233 {
234         struct thread_trace *ttrace;
235
236         if (thread == NULL)
237                 goto fail;
238
239         if (thread->priv == NULL)
240                 thread->priv = thread_trace__new();
241                 
242         if (thread->priv == NULL)
243                 goto fail;
244
245         ttrace = thread->priv;
246         ++ttrace->nr_events;
247
248         return ttrace;
249 fail:
250         color_fprintf(fp, PERF_COLOR_RED,
251                       "WARNING: not enough memory, dropping samples!\n");
252         return NULL;
253 }
254
255 struct trace {
256         struct perf_tool        tool;
257         int                     audit_machine;
258         struct {
259                 int             max;
260                 struct syscall  *table;
261         } syscalls;
262         struct perf_record_opts opts;
263         struct machine          host;
264         u64                     base_time;
265         FILE                    *output;
266         unsigned long           nr_events;
267         struct strlist          *ev_qualifier;
268         bool                    not_ev_qualifier;
269         struct intlist          *tid_list;
270         struct intlist          *pid_list;
271         bool                    sched;
272         bool                    multiple_threads;
273         double                  duration_filter;
274         double                  runtime_ms;
275 };
276
277 static bool trace__filter_duration(struct trace *trace, double t)
278 {
279         return t < (trace->duration_filter * NSEC_PER_MSEC);
280 }
281
282 static size_t trace__fprintf_tstamp(struct trace *trace, u64 tstamp, FILE *fp)
283 {
284         double ts = (double)(tstamp - trace->base_time) / NSEC_PER_MSEC;
285
286         return fprintf(fp, "%10.3f ", ts);
287 }
288
289 static bool done = false;
290
291 static void sig_handler(int sig __maybe_unused)
292 {
293         done = true;
294 }
295
296 static size_t trace__fprintf_entry_head(struct trace *trace, struct thread *thread,
297                                         u64 duration, u64 tstamp, FILE *fp)
298 {
299         size_t printed = trace__fprintf_tstamp(trace, tstamp, fp);
300         printed += fprintf_duration(duration, fp);
301
302         if (trace->multiple_threads)
303                 printed += fprintf(fp, "%d ", thread->tid);
304
305         return printed;
306 }
307
308 static int trace__process_event(struct trace *trace, struct machine *machine,
309                                 union perf_event *event)
310 {
311         int ret = 0;
312
313         switch (event->header.type) {
314         case PERF_RECORD_LOST:
315                 color_fprintf(trace->output, PERF_COLOR_RED,
316                               "LOST %" PRIu64 " events!\n", event->lost.lost);
317                 ret = machine__process_lost_event(machine, event);
318         default:
319                 ret = machine__process_event(machine, event);
320                 break;
321         }
322
323         return ret;
324 }
325
326 static int trace__tool_process(struct perf_tool *tool,
327                                union perf_event *event,
328                                struct perf_sample *sample __maybe_unused,
329                                struct machine *machine)
330 {
331         struct trace *trace = container_of(tool, struct trace, tool);
332         return trace__process_event(trace, machine, event);
333 }
334
335 static int trace__symbols_init(struct trace *trace, struct perf_evlist *evlist)
336 {
337         int err = symbol__init();
338
339         if (err)
340                 return err;
341
342         machine__init(&trace->host, "", HOST_KERNEL_ID);
343         machine__create_kernel_maps(&trace->host);
344
345         if (perf_target__has_task(&trace->opts.target)) {
346                 err = perf_event__synthesize_thread_map(&trace->tool, evlist->threads,
347                                                         trace__tool_process,
348                                                         &trace->host);
349         } else {
350                 err = perf_event__synthesize_threads(&trace->tool, trace__tool_process,
351                                                      &trace->host);
352         }
353
354         if (err)
355                 symbol__exit();
356
357         return err;
358 }
359
360 static int syscall__set_arg_fmts(struct syscall *sc)
361 {
362         struct format_field *field;
363         int idx = 0;
364
365         sc->arg_scnprintf = calloc(sc->tp_format->format.nr_fields - 1, sizeof(void *));
366         if (sc->arg_scnprintf == NULL)
367                 return -1;
368
369         for (field = sc->tp_format->format.fields->next; field; field = field->next) {
370                 if (sc->fmt && sc->fmt->arg_scnprintf[idx])
371                         sc->arg_scnprintf[idx] = sc->fmt->arg_scnprintf[idx];
372                 else if (field->flags & FIELD_IS_POINTER)
373                         sc->arg_scnprintf[idx] = syscall_arg__scnprintf_hex;
374                 ++idx;
375         }
376
377         return 0;
378 }
379
380 static int trace__read_syscall_info(struct trace *trace, int id)
381 {
382         char tp_name[128];
383         struct syscall *sc;
384         const char *name = audit_syscall_to_name(id, trace->audit_machine);
385
386         if (name == NULL)
387                 return -1;
388
389         if (id > trace->syscalls.max) {
390                 struct syscall *nsyscalls = realloc(trace->syscalls.table, (id + 1) * sizeof(*sc));
391
392                 if (nsyscalls == NULL)
393                         return -1;
394
395                 if (trace->syscalls.max != -1) {
396                         memset(nsyscalls + trace->syscalls.max + 1, 0,
397                                (id - trace->syscalls.max) * sizeof(*sc));
398                 } else {
399                         memset(nsyscalls, 0, (id + 1) * sizeof(*sc));
400                 }
401
402                 trace->syscalls.table = nsyscalls;
403                 trace->syscalls.max   = id;
404         }
405
406         sc = trace->syscalls.table + id;
407         sc->name = name;
408
409         if (trace->ev_qualifier) {
410                 bool in = strlist__find(trace->ev_qualifier, name) != NULL;
411
412                 if (!(in ^ trace->not_ev_qualifier)) {
413                         sc->filtered = true;
414                         /*
415                          * No need to do read tracepoint information since this will be
416                          * filtered out.
417                          */
418                         return 0;
419                 }
420         }
421
422         sc->fmt  = syscall_fmt__find(sc->name);
423
424         snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->name);
425         sc->tp_format = event_format__new("syscalls", tp_name);
426
427         if (sc->tp_format == NULL && sc->fmt && sc->fmt->alias) {
428                 snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->fmt->alias);
429                 sc->tp_format = event_format__new("syscalls", tp_name);
430         }
431
432         if (sc->tp_format == NULL)
433                 return -1;
434
435         return syscall__set_arg_fmts(sc);
436 }
437
438 static size_t syscall__scnprintf_args(struct syscall *sc, char *bf, size_t size,
439                                       unsigned long *args)
440 {
441         int i = 0;
442         size_t printed = 0;
443
444         if (sc->tp_format != NULL) {
445                 struct format_field *field;
446
447                 for (field = sc->tp_format->format.fields->next; field; field = field->next) {
448                         printed += scnprintf(bf + printed, size - printed,
449                                              "%s%s: ", printed ? ", " : "", field->name);
450
451                         if (sc->arg_scnprintf && sc->arg_scnprintf[i])
452                                 printed += sc->arg_scnprintf[i](bf + printed, size - printed, args[i]);
453                         else
454                                 printed += scnprintf(bf + printed, size - printed,
455                                                      "%ld", args[i]);
456                        ++i;
457                 }
458         } else {
459                 while (i < 6) {
460                         printed += scnprintf(bf + printed, size - printed,
461                                              "%sarg%d: %ld",
462                                              printed ? ", " : "", i, args[i]);
463                         ++i;
464                 }
465         }
466
467         return printed;
468 }
469
470 typedef int (*tracepoint_handler)(struct trace *trace, struct perf_evsel *evsel,
471                                   struct perf_sample *sample);
472
473 static struct syscall *trace__syscall_info(struct trace *trace,
474                                            struct perf_evsel *evsel,
475                                            struct perf_sample *sample)
476 {
477         int id = perf_evsel__intval(evsel, sample, "id");
478
479         if (id < 0) {
480
481                 /*
482                  * XXX: Noticed on x86_64, reproduced as far back as 3.0.36, haven't tried
483                  * before that, leaving at a higher verbosity level till that is
484                  * explained. Reproduced with plain ftrace with:
485                  *
486                  * echo 1 > /t/events/raw_syscalls/sys_exit/enable
487                  * grep "NR -1 " /t/trace_pipe
488                  *
489                  * After generating some load on the machine.
490                  */
491                 if (verbose > 1) {
492                         static u64 n;
493                         fprintf(trace->output, "Invalid syscall %d id, skipping (%s, %" PRIu64 ") ...\n",
494                                 id, perf_evsel__name(evsel), ++n);
495                 }
496                 return NULL;
497         }
498
499         if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL) &&
500             trace__read_syscall_info(trace, id))
501                 goto out_cant_read;
502
503         if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL))
504                 goto out_cant_read;
505
506         return &trace->syscalls.table[id];
507
508 out_cant_read:
509         if (verbose) {
510                 fprintf(trace->output, "Problems reading syscall %d", id);
511                 if (id <= trace->syscalls.max && trace->syscalls.table[id].name != NULL)
512                         fprintf(trace->output, "(%s)", trace->syscalls.table[id].name);
513                 fputs(" information\n", trace->output);
514         }
515         return NULL;
516 }
517
518 static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel,
519                             struct perf_sample *sample)
520 {
521         char *msg;
522         void *args;
523         size_t printed = 0;
524         struct thread *thread;
525         struct syscall *sc = trace__syscall_info(trace, evsel, sample);
526         struct thread_trace *ttrace;
527
528         if (sc == NULL)
529                 return -1;
530
531         if (sc->filtered)
532                 return 0;
533
534         thread = machine__findnew_thread(&trace->host, sample->pid,
535                                          sample->tid);
536         ttrace = thread__trace(thread, trace->output);
537         if (ttrace == NULL)
538                 return -1;
539
540         args = perf_evsel__rawptr(evsel, sample, "args");
541         if (args == NULL) {
542                 fprintf(trace->output, "Problems reading syscall arguments\n");
543                 return -1;
544         }
545
546         ttrace = thread->priv;
547
548         if (ttrace->entry_str == NULL) {
549                 ttrace->entry_str = malloc(1024);
550                 if (!ttrace->entry_str)
551                         return -1;
552         }
553
554         ttrace->entry_time = sample->time;
555         msg = ttrace->entry_str;
556         printed += scnprintf(msg + printed, 1024 - printed, "%s(", sc->name);
557
558         printed += syscall__scnprintf_args(sc, msg + printed, 1024 - printed,  args);
559
560         if (!strcmp(sc->name, "exit_group") || !strcmp(sc->name, "exit")) {
561                 if (!trace->duration_filter) {
562                         trace__fprintf_entry_head(trace, thread, 1, sample->time, trace->output);
563                         fprintf(trace->output, "%-70s\n", ttrace->entry_str);
564                 }
565         } else
566                 ttrace->entry_pending = true;
567
568         return 0;
569 }
570
571 static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel,
572                            struct perf_sample *sample)
573 {
574         int ret;
575         u64 duration = 0;
576         struct thread *thread;
577         struct syscall *sc = trace__syscall_info(trace, evsel, sample);
578         struct thread_trace *ttrace;
579
580         if (sc == NULL)
581                 return -1;
582
583         if (sc->filtered)
584                 return 0;
585
586         thread = machine__findnew_thread(&trace->host, sample->pid,
587                                          sample->tid);
588         ttrace = thread__trace(thread, trace->output);
589         if (ttrace == NULL)
590                 return -1;
591
592         ret = perf_evsel__intval(evsel, sample, "ret");
593
594         ttrace = thread->priv;
595
596         ttrace->exit_time = sample->time;
597
598         if (ttrace->entry_time) {
599                 duration = sample->time - ttrace->entry_time;
600                 if (trace__filter_duration(trace, duration))
601                         goto out;
602         } else if (trace->duration_filter)
603                 goto out;
604
605         trace__fprintf_entry_head(trace, thread, duration, sample->time, trace->output);
606
607         if (ttrace->entry_pending) {
608                 fprintf(trace->output, "%-70s", ttrace->entry_str);
609         } else {
610                 fprintf(trace->output, " ... [");
611                 color_fprintf(trace->output, PERF_COLOR_YELLOW, "continued");
612                 fprintf(trace->output, "]: %s()", sc->name);
613         }
614
615         if (sc->fmt == NULL) {
616 signed_print:
617                 fprintf(trace->output, ") = %d", ret);
618         } else if (ret < 0 && sc->fmt->errmsg) {
619                 char bf[256];
620                 const char *emsg = strerror_r(-ret, bf, sizeof(bf)),
621                            *e = audit_errno_to_name(-ret);
622
623                 fprintf(trace->output, ") = -1 %s %s", e, emsg);
624         } else if (ret == 0 && sc->fmt->timeout)
625                 fprintf(trace->output, ") = 0 Timeout");
626         else if (sc->fmt->hexret)
627                 fprintf(trace->output, ") = %#x", ret);
628         else
629                 goto signed_print;
630
631         fputc('\n', trace->output);
632 out:
633         ttrace->entry_pending = false;
634
635         return 0;
636 }
637
638 static int trace__sched_stat_runtime(struct trace *trace, struct perf_evsel *evsel,
639                                      struct perf_sample *sample)
640 {
641         u64 runtime = perf_evsel__intval(evsel, sample, "runtime");
642         double runtime_ms = (double)runtime / NSEC_PER_MSEC;
643         struct thread *thread = machine__findnew_thread(&trace->host,
644                                                         sample->pid,
645                                                         sample->tid);
646         struct thread_trace *ttrace = thread__trace(thread, trace->output);
647
648         if (ttrace == NULL)
649                 goto out_dump;
650
651         ttrace->runtime_ms += runtime_ms;
652         trace->runtime_ms += runtime_ms;
653         return 0;
654
655 out_dump:
656         fprintf(trace->output, "%s: comm=%s,pid=%u,runtime=%" PRIu64 ",vruntime=%" PRIu64 ")\n",
657                evsel->name,
658                perf_evsel__strval(evsel, sample, "comm"),
659                (pid_t)perf_evsel__intval(evsel, sample, "pid"),
660                runtime,
661                perf_evsel__intval(evsel, sample, "vruntime"));
662         return 0;
663 }
664
665 static bool skip_sample(struct trace *trace, struct perf_sample *sample)
666 {
667         if ((trace->pid_list && intlist__find(trace->pid_list, sample->pid)) ||
668             (trace->tid_list && intlist__find(trace->tid_list, sample->tid)))
669                 return false;
670
671         if (trace->pid_list || trace->tid_list)
672                 return true;
673
674         return false;
675 }
676
677 static int trace__process_sample(struct perf_tool *tool,
678                                  union perf_event *event __maybe_unused,
679                                  struct perf_sample *sample,
680                                  struct perf_evsel *evsel,
681                                  struct machine *machine __maybe_unused)
682 {
683         struct trace *trace = container_of(tool, struct trace, tool);
684         int err = 0;
685
686         tracepoint_handler handler = evsel->handler.func;
687
688         if (skip_sample(trace, sample))
689                 return 0;
690
691         if (trace->base_time == 0)
692                 trace->base_time = sample->time;
693
694         if (handler)
695                 handler(trace, evsel, sample);
696
697         return err;
698 }
699
700 static bool
701 perf_session__has_tp(struct perf_session *session, const char *name)
702 {
703         struct perf_evsel *evsel;
704
705         evsel = perf_evlist__find_tracepoint_by_name(session->evlist, name);
706
707         return evsel != NULL;
708 }
709
710 static int parse_target_str(struct trace *trace)
711 {
712         if (trace->opts.target.pid) {
713                 trace->pid_list = intlist__new(trace->opts.target.pid);
714                 if (trace->pid_list == NULL) {
715                         pr_err("Error parsing process id string\n");
716                         return -EINVAL;
717                 }
718         }
719
720         if (trace->opts.target.tid) {
721                 trace->tid_list = intlist__new(trace->opts.target.tid);
722                 if (trace->tid_list == NULL) {
723                         pr_err("Error parsing thread id string\n");
724                         return -EINVAL;
725                 }
726         }
727
728         return 0;
729 }
730
731 static int trace__run(struct trace *trace, int argc, const char **argv)
732 {
733         struct perf_evlist *evlist = perf_evlist__new();
734         struct perf_evsel *evsel;
735         int err = -1, i;
736         unsigned long before;
737         const bool forks = argc > 0;
738
739         if (evlist == NULL) {
740                 fprintf(trace->output, "Not enough memory to run!\n");
741                 goto out;
742         }
743
744         if (perf_evlist__add_newtp(evlist, "raw_syscalls", "sys_enter", trace__sys_enter) ||
745             perf_evlist__add_newtp(evlist, "raw_syscalls", "sys_exit", trace__sys_exit)) {
746                 fprintf(trace->output, "Couldn't read the raw_syscalls tracepoints information!\n");
747                 goto out_delete_evlist;
748         }
749
750         if (trace->sched &&
751             perf_evlist__add_newtp(evlist, "sched", "sched_stat_runtime",
752                                    trace__sched_stat_runtime)) {
753                 fprintf(trace->output, "Couldn't read the sched_stat_runtime tracepoint information!\n");
754                 goto out_delete_evlist;
755         }
756
757         err = perf_evlist__create_maps(evlist, &trace->opts.target);
758         if (err < 0) {
759                 fprintf(trace->output, "Problems parsing the target to trace, check your options!\n");
760                 goto out_delete_evlist;
761         }
762
763         err = trace__symbols_init(trace, evlist);
764         if (err < 0) {
765                 fprintf(trace->output, "Problems initializing symbol libraries!\n");
766                 goto out_delete_maps;
767         }
768
769         perf_evlist__config(evlist, &trace->opts);
770
771         signal(SIGCHLD, sig_handler);
772         signal(SIGINT, sig_handler);
773
774         if (forks) {
775                 err = perf_evlist__prepare_workload(evlist, &trace->opts.target,
776                                                     argv, false, false);
777                 if (err < 0) {
778                         fprintf(trace->output, "Couldn't run the workload!\n");
779                         goto out_delete_maps;
780                 }
781         }
782
783         err = perf_evlist__open(evlist);
784         if (err < 0) {
785                 fprintf(trace->output, "Couldn't create the events: %s\n", strerror(errno));
786                 goto out_delete_maps;
787         }
788
789         err = perf_evlist__mmap(evlist, UINT_MAX, false);
790         if (err < 0) {
791                 fprintf(trace->output, "Couldn't mmap the events: %s\n", strerror(errno));
792                 goto out_close_evlist;
793         }
794
795         perf_evlist__enable(evlist);
796
797         if (forks)
798                 perf_evlist__start_workload(evlist);
799
800         trace->multiple_threads = evlist->threads->map[0] == -1 || evlist->threads->nr > 1;
801 again:
802         before = trace->nr_events;
803
804         for (i = 0; i < evlist->nr_mmaps; i++) {
805                 union perf_event *event;
806
807                 while ((event = perf_evlist__mmap_read(evlist, i)) != NULL) {
808                         const u32 type = event->header.type;
809                         tracepoint_handler handler;
810                         struct perf_sample sample;
811
812                         ++trace->nr_events;
813
814                         err = perf_evlist__parse_sample(evlist, event, &sample);
815                         if (err) {
816                                 fprintf(trace->output, "Can't parse sample, err = %d, skipping...\n", err);
817                                 continue;
818                         }
819
820                         if (trace->base_time == 0)
821                                 trace->base_time = sample.time;
822
823                         if (type != PERF_RECORD_SAMPLE) {
824                                 trace__process_event(trace, &trace->host, event);
825                                 continue;
826                         }
827
828                         evsel = perf_evlist__id2evsel(evlist, sample.id);
829                         if (evsel == NULL) {
830                                 fprintf(trace->output, "Unknown tp ID %" PRIu64 ", skipping...\n", sample.id);
831                                 continue;
832                         }
833
834                         if (sample.raw_data == NULL) {
835                                 fprintf(trace->output, "%s sample with no payload for tid: %d, cpu %d, raw_size=%d, skipping...\n",
836                                        perf_evsel__name(evsel), sample.tid,
837                                        sample.cpu, sample.raw_size);
838                                 continue;
839                         }
840
841                         handler = evsel->handler.func;
842                         handler(trace, evsel, &sample);
843                 }
844         }
845
846         if (trace->nr_events == before) {
847                 if (done)
848                         goto out_unmap_evlist;
849
850                 poll(evlist->pollfd, evlist->nr_fds, -1);
851         }
852
853         if (done)
854                 perf_evlist__disable(evlist);
855
856         goto again;
857
858 out_unmap_evlist:
859         perf_evlist__munmap(evlist);
860 out_close_evlist:
861         perf_evlist__close(evlist);
862 out_delete_maps:
863         perf_evlist__delete_maps(evlist);
864 out_delete_evlist:
865         perf_evlist__delete(evlist);
866 out:
867         return err;
868 }
869
870 static int trace__replay(struct trace *trace)
871 {
872         const struct perf_evsel_str_handler handlers[] = {
873                 { "raw_syscalls:sys_enter",  trace__sys_enter, },
874                 { "raw_syscalls:sys_exit",   trace__sys_exit, },
875         };
876
877         struct perf_session *session;
878         int err = -1;
879
880         trace->tool.sample        = trace__process_sample;
881         trace->tool.mmap          = perf_event__process_mmap;
882         trace->tool.comm          = perf_event__process_comm;
883         trace->tool.exit          = perf_event__process_exit;
884         trace->tool.fork          = perf_event__process_fork;
885         trace->tool.attr          = perf_event__process_attr;
886         trace->tool.tracing_data = perf_event__process_tracing_data;
887         trace->tool.build_id      = perf_event__process_build_id;
888
889         trace->tool.ordered_samples = true;
890         trace->tool.ordering_requires_timestamps = true;
891
892         /* add tid to output */
893         trace->multiple_threads = true;
894
895         if (symbol__init() < 0)
896                 return -1;
897
898         session = perf_session__new(input_name, O_RDONLY, 0, false,
899                                     &trace->tool);
900         if (session == NULL)
901                 return -ENOMEM;
902
903         err = perf_session__set_tracepoints_handlers(session, handlers);
904         if (err)
905                 goto out;
906
907         if (!perf_session__has_tp(session, "raw_syscalls:sys_enter")) {
908                 pr_err("Data file does not have raw_syscalls:sys_enter events\n");
909                 goto out;
910         }
911
912         if (!perf_session__has_tp(session, "raw_syscalls:sys_exit")) {
913                 pr_err("Data file does not have raw_syscalls:sys_exit events\n");
914                 goto out;
915         }
916
917         err = parse_target_str(trace);
918         if (err != 0)
919                 goto out;
920
921         setup_pager();
922
923         err = perf_session__process_events(session, &trace->tool);
924         if (err)
925                 pr_err("Failed to process events, error %d", err);
926
927 out:
928         perf_session__delete(session);
929
930         return err;
931 }
932
933 static size_t trace__fprintf_threads_header(FILE *fp)
934 {
935         size_t printed;
936
937         printed  = fprintf(fp, "\n _____________________________________________________________________\n");
938         printed += fprintf(fp," __)    Summary of events    (__\n\n");
939         printed += fprintf(fp,"              [ task - pid ]     [ events ] [ ratio ]  [ runtime ]\n");
940         printed += fprintf(fp," _____________________________________________________________________\n\n");
941
942         return printed;
943 }
944
945 static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp)
946 {
947         size_t printed = trace__fprintf_threads_header(fp);
948         struct rb_node *nd;
949
950         for (nd = rb_first(&trace->host.threads); nd; nd = rb_next(nd)) {
951                 struct thread *thread = rb_entry(nd, struct thread, rb_node);
952                 struct thread_trace *ttrace = thread->priv;
953                 const char *color;
954                 double ratio;
955
956                 if (ttrace == NULL)
957                         continue;
958
959                 ratio = (double)ttrace->nr_events / trace->nr_events * 100.0;
960
961                 color = PERF_COLOR_NORMAL;
962                 if (ratio > 50.0)
963                         color = PERF_COLOR_RED;
964                 else if (ratio > 25.0)
965                         color = PERF_COLOR_GREEN;
966                 else if (ratio > 5.0)
967                         color = PERF_COLOR_YELLOW;
968
969                 printed += color_fprintf(fp, color, "%20s", thread->comm);
970                 printed += fprintf(fp, " - %-5d :%11lu   [", thread->tid, ttrace->nr_events);
971                 printed += color_fprintf(fp, color, "%5.1f%%", ratio);
972                 printed += fprintf(fp, " ] %10.3f ms\n", ttrace->runtime_ms);
973         }
974
975         return printed;
976 }
977
978 static int trace__set_duration(const struct option *opt, const char *str,
979                                int unset __maybe_unused)
980 {
981         struct trace *trace = opt->value;
982
983         trace->duration_filter = atof(str);
984         return 0;
985 }
986
987 static int trace__open_output(struct trace *trace, const char *filename)
988 {
989         struct stat st;
990
991         if (!stat(filename, &st) && st.st_size) {
992                 char oldname[PATH_MAX];
993
994                 scnprintf(oldname, sizeof(oldname), "%s.old", filename);
995                 unlink(oldname);
996                 rename(filename, oldname);
997         }
998
999         trace->output = fopen(filename, "w");
1000
1001         return trace->output == NULL ? -errno : 0;
1002 }
1003
1004 int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused)
1005 {
1006         const char * const trace_usage[] = {
1007                 "perf trace [<options>] [<command>]",
1008                 "perf trace [<options>] -- <command> [<options>]",
1009                 NULL
1010         };
1011         struct trace trace = {
1012                 .audit_machine = audit_detect_machine(),
1013                 .syscalls = {
1014                         . max = -1,
1015                 },
1016                 .opts = {
1017                         .target = {
1018                                 .uid       = UINT_MAX,
1019                                 .uses_mmap = true,
1020                         },
1021                         .user_freq     = UINT_MAX,
1022                         .user_interval = ULLONG_MAX,
1023                         .no_delay      = true,
1024                         .mmap_pages    = 1024,
1025                 },
1026                 .output = stdout,
1027         };
1028         const char *output_name = NULL;
1029         const char *ev_qualifier_str = NULL;
1030         const struct option trace_options[] = {
1031         OPT_STRING('e', "expr", &ev_qualifier_str, "expr",
1032                     "list of events to trace"),
1033         OPT_STRING('o', "output", &output_name, "file", "output file name"),
1034         OPT_STRING('i', "input", &input_name, "file", "Analyze events in file"),
1035         OPT_STRING('p', "pid", &trace.opts.target.pid, "pid",
1036                     "trace events on existing process id"),
1037         OPT_STRING('t', "tid", &trace.opts.target.tid, "tid",
1038                     "trace events on existing thread id"),
1039         OPT_BOOLEAN('a', "all-cpus", &trace.opts.target.system_wide,
1040                     "system-wide collection from all CPUs"),
1041         OPT_STRING('C', "cpu", &trace.opts.target.cpu_list, "cpu",
1042                     "list of cpus to monitor"),
1043         OPT_BOOLEAN(0, "no-inherit", &trace.opts.no_inherit,
1044                     "child tasks do not inherit counters"),
1045         OPT_UINTEGER('m', "mmap-pages", &trace.opts.mmap_pages,
1046                      "number of mmap data pages"),
1047         OPT_STRING('u', "uid", &trace.opts.target.uid_str, "user",
1048                    "user to profile"),
1049         OPT_CALLBACK(0, "duration", &trace, "float",
1050                      "show only events with duration > N.M ms",
1051                      trace__set_duration),
1052         OPT_BOOLEAN(0, "sched", &trace.sched, "show blocking scheduler events"),
1053         OPT_INCR('v', "verbose", &verbose, "be more verbose"),
1054         OPT_END()
1055         };
1056         int err;
1057         char bf[BUFSIZ];
1058
1059         argc = parse_options(argc, argv, trace_options, trace_usage, 0);
1060
1061         if (output_name != NULL) {
1062                 err = trace__open_output(&trace, output_name);
1063                 if (err < 0) {
1064                         perror("failed to create output file");
1065                         goto out;
1066                 }
1067         }
1068
1069         if (ev_qualifier_str != NULL) {
1070                 const char *s = ev_qualifier_str;
1071
1072                 trace.not_ev_qualifier = *s == '!';
1073                 if (trace.not_ev_qualifier)
1074                         ++s;
1075                 trace.ev_qualifier = strlist__new(true, s);
1076                 if (trace.ev_qualifier == NULL) {
1077                         fputs("Not enough memory to parse event qualifier",
1078                               trace.output);
1079                         err = -ENOMEM;
1080                         goto out_close;
1081                 }
1082         }
1083
1084         err = perf_target__validate(&trace.opts.target);
1085         if (err) {
1086                 perf_target__strerror(&trace.opts.target, err, bf, sizeof(bf));
1087                 fprintf(trace.output, "%s", bf);
1088                 goto out_close;
1089         }
1090
1091         err = perf_target__parse_uid(&trace.opts.target);
1092         if (err) {
1093                 perf_target__strerror(&trace.opts.target, err, bf, sizeof(bf));
1094                 fprintf(trace.output, "%s", bf);
1095                 goto out_close;
1096         }
1097
1098         if (!argc && perf_target__none(&trace.opts.target))
1099                 trace.opts.target.system_wide = true;
1100
1101         if (input_name)
1102                 err = trace__replay(&trace);
1103         else
1104                 err = trace__run(&trace, argc, argv);
1105
1106         if (trace.sched && !err)
1107                 trace__fprintf_thread_summary(&trace, trace.output);
1108
1109 out_close:
1110         if (output_name != NULL)
1111                 fclose(trace.output);
1112 out:
1113         return err;
1114 }