perf tools: Give user better message if precise is not supported
[platform/adaptation/renesas_rcar/renesas_kernel.git] / tools / perf / builtin-record.c
1 /*
2  * builtin-record.c
3  *
4  * Builtin record command: Record the profile of a workload
5  * (or a CPU, or a PID) into the perf.data output file - for
6  * later analysis via perf report.
7  */
8 #define _FILE_OFFSET_BITS 64
9
10 #include "builtin.h"
11
12 #include "perf.h"
13
14 #include "util/build-id.h"
15 #include "util/util.h"
16 #include "util/parse-options.h"
17 #include "util/parse-events.h"
18
19 #include "util/header.h"
20 #include "util/event.h"
21 #include "util/evlist.h"
22 #include "util/evsel.h"
23 #include "util/debug.h"
24 #include "util/session.h"
25 #include "util/tool.h"
26 #include "util/symbol.h"
27 #include "util/cpumap.h"
28 #include "util/thread_map.h"
29
30 #include <unistd.h>
31 #include <sched.h>
32 #include <sys/mman.h>
33
34 #ifndef HAVE_ON_EXIT
35 #ifndef ATEXIT_MAX
36 #define ATEXIT_MAX 32
37 #endif
38 static int __on_exit_count = 0;
39 typedef void (*on_exit_func_t) (int, void *);
40 static on_exit_func_t __on_exit_funcs[ATEXIT_MAX];
41 static void *__on_exit_args[ATEXIT_MAX];
42 static int __exitcode = 0;
43 static void __handle_on_exit_funcs(void);
44 static int on_exit(on_exit_func_t function, void *arg);
45 #define exit(x) (exit)(__exitcode = (x))
46
47 static int on_exit(on_exit_func_t function, void *arg)
48 {
49         if (__on_exit_count == ATEXIT_MAX)
50                 return -ENOMEM;
51         else if (__on_exit_count == 0)
52                 atexit(__handle_on_exit_funcs);
53         __on_exit_funcs[__on_exit_count] = function;
54         __on_exit_args[__on_exit_count++] = arg;
55         return 0;
56 }
57
58 static void __handle_on_exit_funcs(void)
59 {
60         int i;
61         for (i = 0; i < __on_exit_count; i++)
62                 __on_exit_funcs[i] (__exitcode, __on_exit_args[i]);
63 }
64 #endif
65
66 enum write_mode_t {
67         WRITE_FORCE,
68         WRITE_APPEND
69 };
70
71 struct perf_record {
72         struct perf_tool        tool;
73         struct perf_record_opts opts;
74         u64                     bytes_written;
75         const char              *output_name;
76         struct perf_evlist      *evlist;
77         struct perf_session     *session;
78         const char              *progname;
79         int                     output;
80         unsigned int            page_size;
81         int                     realtime_prio;
82         enum write_mode_t       write_mode;
83         bool                    no_buildid;
84         bool                    no_buildid_cache;
85         bool                    force;
86         bool                    file_new;
87         bool                    append_file;
88         long                    samples;
89         off_t                   post_processing_offset;
90 };
91
92 static void advance_output(struct perf_record *rec, size_t size)
93 {
94         rec->bytes_written += size;
95 }
96
97 static int write_output(struct perf_record *rec, void *buf, size_t size)
98 {
99         while (size) {
100                 int ret = write(rec->output, buf, size);
101
102                 if (ret < 0) {
103                         pr_err("failed to write\n");
104                         return -1;
105                 }
106
107                 size -= ret;
108                 buf += ret;
109
110                 rec->bytes_written += ret;
111         }
112
113         return 0;
114 }
115
116 static int process_synthesized_event(struct perf_tool *tool,
117                                      union perf_event *event,
118                                      struct perf_sample *sample __maybe_unused,
119                                      struct machine *machine __maybe_unused)
120 {
121         struct perf_record *rec = container_of(tool, struct perf_record, tool);
122         if (write_output(rec, event, event->header.size) < 0)
123                 return -1;
124
125         return 0;
126 }
127
128 static int perf_record__mmap_read(struct perf_record *rec,
129                                    struct perf_mmap *md)
130 {
131         unsigned int head = perf_mmap__read_head(md);
132         unsigned int old = md->prev;
133         unsigned char *data = md->base + rec->page_size;
134         unsigned long size;
135         void *buf;
136         int rc = 0;
137
138         if (old == head)
139                 return 0;
140
141         rec->samples++;
142
143         size = head - old;
144
145         if ((old & md->mask) + size != (head & md->mask)) {
146                 buf = &data[old & md->mask];
147                 size = md->mask + 1 - (old & md->mask);
148                 old += size;
149
150                 if (write_output(rec, buf, size) < 0) {
151                         rc = -1;
152                         goto out;
153                 }
154         }
155
156         buf = &data[old & md->mask];
157         size = head - old;
158         old += size;
159
160         if (write_output(rec, buf, size) < 0) {
161                 rc = -1;
162                 goto out;
163         }
164
165         md->prev = old;
166         perf_mmap__write_tail(md, old);
167
168 out:
169         return rc;
170 }
171
172 static volatile int done = 0;
173 static volatile int signr = -1;
174 static volatile int child_finished = 0;
175
176 static void sig_handler(int sig)
177 {
178         if (sig == SIGCHLD)
179                 child_finished = 1;
180
181         done = 1;
182         signr = sig;
183 }
184
185 static void perf_record__sig_exit(int exit_status __maybe_unused, void *arg)
186 {
187         struct perf_record *rec = arg;
188         int status;
189
190         if (rec->evlist->workload.pid > 0) {
191                 if (!child_finished)
192                         kill(rec->evlist->workload.pid, SIGTERM);
193
194                 wait(&status);
195                 if (WIFSIGNALED(status))
196                         psignal(WTERMSIG(status), rec->progname);
197         }
198
199         if (signr == -1 || signr == SIGUSR1)
200                 return;
201
202         signal(signr, SIG_DFL);
203         kill(getpid(), signr);
204 }
205
206 static bool perf_evlist__equal(struct perf_evlist *evlist,
207                                struct perf_evlist *other)
208 {
209         struct perf_evsel *pos, *pair;
210
211         if (evlist->nr_entries != other->nr_entries)
212                 return false;
213
214         pair = perf_evlist__first(other);
215
216         list_for_each_entry(pos, &evlist->entries, node) {
217                 if (memcmp(&pos->attr, &pair->attr, sizeof(pos->attr) != 0))
218                         return false;
219                 pair = perf_evsel__next(pair);
220         }
221
222         return true;
223 }
224
225 static int perf_record__open(struct perf_record *rec)
226 {
227         struct perf_evsel *pos;
228         struct perf_evlist *evlist = rec->evlist;
229         struct perf_session *session = rec->session;
230         struct perf_record_opts *opts = &rec->opts;
231         int rc = 0;
232
233         perf_evlist__config_attrs(evlist, opts);
234
235         if (opts->group)
236                 perf_evlist__set_leader(evlist);
237
238         list_for_each_entry(pos, &evlist->entries, node) {
239                 struct perf_event_attr *attr = &pos->attr;
240                 /*
241                  * Check if parse_single_tracepoint_event has already asked for
242                  * PERF_SAMPLE_TIME.
243                  *
244                  * XXX this is kludgy but short term fix for problems introduced by
245                  * eac23d1c that broke 'perf script' by having different sample_types
246                  * when using multiple tracepoint events when we use a perf binary
247                  * that tries to use sample_id_all on an older kernel.
248                  *
249                  * We need to move counter creation to perf_session, support
250                  * different sample_types, etc.
251                  */
252                 bool time_needed = attr->sample_type & PERF_SAMPLE_TIME;
253
254 fallback_missing_features:
255                 if (opts->exclude_guest_missing)
256                         attr->exclude_guest = attr->exclude_host = 0;
257 retry_sample_id:
258                 attr->sample_id_all = opts->sample_id_all_missing ? 0 : 1;
259 try_again:
260                 if (perf_evsel__open(pos, evlist->cpus, evlist->threads) < 0) {
261                         int err = errno;
262
263                         if (err == EPERM || err == EACCES) {
264                                 ui__error_paranoid();
265                                 rc = -err;
266                                 goto out;
267                         } else if (err ==  ENODEV && opts->target.cpu_list) {
268                                 pr_err("No such device - did you specify"
269                                        " an out-of-range profile CPU?\n");
270                                 rc = -err;
271                                 goto out;
272                         } else if (err == EINVAL) {
273                                 if (!opts->exclude_guest_missing &&
274                                     (attr->exclude_guest || attr->exclude_host)) {
275                                         pr_debug("Old kernel, cannot exclude "
276                                                  "guest or host samples.\n");
277                                         opts->exclude_guest_missing = true;
278                                         goto fallback_missing_features;
279                                 } else if (!opts->sample_id_all_missing) {
280                                         /*
281                                          * Old kernel, no attr->sample_id_type_all field
282                                          */
283                                         opts->sample_id_all_missing = true;
284                                         if (!opts->sample_time && !opts->raw_samples && !time_needed)
285                                                 attr->sample_type &= ~PERF_SAMPLE_TIME;
286
287                                         goto retry_sample_id;
288                                 }
289                         }
290
291                         /*
292                          * If it's cycles then fall back to hrtimer
293                          * based cpu-clock-tick sw counter, which
294                          * is always available even if no PMU support.
295                          *
296                          * PPC returns ENXIO until 2.6.37 (behavior changed
297                          * with commit b0a873e).
298                          */
299                         if ((err == ENOENT || err == ENXIO)
300                                         && attr->type == PERF_TYPE_HARDWARE
301                                         && attr->config == PERF_COUNT_HW_CPU_CYCLES) {
302
303                                 if (verbose)
304                                         ui__warning("The cycles event is not supported, "
305                                                     "trying to fall back to cpu-clock-ticks\n");
306                                 attr->type = PERF_TYPE_SOFTWARE;
307                                 attr->config = PERF_COUNT_SW_CPU_CLOCK;
308                                 if (pos->name) {
309                                         free(pos->name);
310                                         pos->name = NULL;
311                                 }
312                                 goto try_again;
313                         }
314
315                         if (err == ENOENT) {
316                                 ui__error("The %s event is not supported.\n",
317                                           perf_evsel__name(pos));
318                                 rc = -err;
319                                 goto out;
320                         } else if ((err == EOPNOTSUPP) && (attr->precise_ip)) {
321                                 ui__error("\'precise\' request may not be supported. "
322                                           "Try removing 'p' modifier\n");
323                                 rc = -err;
324                                 goto out;
325                         }
326
327                         printf("\n");
328                         error("sys_perf_event_open() syscall returned with %d "
329                               "(%s) for event %s. /bin/dmesg may provide "
330                               "additional information.\n",
331                               err, strerror(err), perf_evsel__name(pos));
332
333 #if defined(__i386__) || defined(__x86_64__)
334                         if (attr->type == PERF_TYPE_HARDWARE &&
335                             err == EOPNOTSUPP) {
336                                 pr_err("No hardware sampling interrupt available."
337                                        " No APIC? If so then you can boot the kernel"
338                                        " with the \"lapic\" boot parameter to"
339                                        " force-enable it.\n");
340                                 rc = -err;
341                                 goto out;
342                         }
343 #endif
344
345                         pr_err("No CONFIG_PERF_EVENTS=y kernel support configured?\n");
346                         rc = -err;
347                         goto out;
348                 }
349         }
350
351         if (perf_evlist__apply_filters(evlist)) {
352                 error("failed to set filter with %d (%s)\n", errno,
353                         strerror(errno));
354                 rc = -1;
355                 goto out;
356         }
357
358         if (perf_evlist__mmap(evlist, opts->mmap_pages, false) < 0) {
359                 if (errno == EPERM) {
360                         pr_err("Permission error mapping pages.\n"
361                                "Consider increasing "
362                                "/proc/sys/kernel/perf_event_mlock_kb,\n"
363                                "or try again with a smaller value of -m/--mmap_pages.\n"
364                                "(current value: %d)\n", opts->mmap_pages);
365                         rc = -errno;
366                 } else if (!is_power_of_2(opts->mmap_pages)) {
367                         pr_err("--mmap_pages/-m value must be a power of two.");
368                         rc = -EINVAL;
369                 } else {
370                         pr_err("failed to mmap with %d (%s)\n", errno, strerror(errno));
371                         rc = -errno;
372                 }
373                 goto out;
374         }
375
376         if (rec->file_new)
377                 session->evlist = evlist;
378         else {
379                 if (!perf_evlist__equal(session->evlist, evlist)) {
380                         fprintf(stderr, "incompatible append\n");
381                         rc = -1;
382                         goto out;
383                 }
384         }
385
386         perf_session__set_id_hdr_size(session);
387 out:
388         return rc;
389 }
390
391 static int process_buildids(struct perf_record *rec)
392 {
393         u64 size = lseek(rec->output, 0, SEEK_CUR);
394
395         if (size == 0)
396                 return 0;
397
398         rec->session->fd = rec->output;
399         return __perf_session__process_events(rec->session, rec->post_processing_offset,
400                                               size - rec->post_processing_offset,
401                                               size, &build_id__mark_dso_hit_ops);
402 }
403
404 static void perf_record__exit(int status, void *arg)
405 {
406         struct perf_record *rec = arg;
407
408         if (status != 0)
409                 return;
410
411         if (!rec->opts.pipe_output) {
412                 rec->session->header.data_size += rec->bytes_written;
413
414                 if (!rec->no_buildid)
415                         process_buildids(rec);
416                 perf_session__write_header(rec->session, rec->evlist,
417                                            rec->output, true);
418                 perf_session__delete(rec->session);
419                 perf_evlist__delete(rec->evlist);
420                 symbol__exit();
421         }
422 }
423
424 static void perf_event__synthesize_guest_os(struct machine *machine, void *data)
425 {
426         int err;
427         struct perf_tool *tool = data;
428
429         if (machine__is_host(machine))
430                 return;
431
432         /*
433          *As for guest kernel when processing subcommand record&report,
434          *we arrange module mmap prior to guest kernel mmap and trigger
435          *a preload dso because default guest module symbols are loaded
436          *from guest kallsyms instead of /lib/modules/XXX/XXX. This
437          *method is used to avoid symbol missing when the first addr is
438          *in module instead of in guest kernel.
439          */
440         err = perf_event__synthesize_modules(tool, process_synthesized_event,
441                                              machine);
442         if (err < 0)
443                 pr_err("Couldn't record guest kernel [%d]'s reference"
444                        " relocation symbol.\n", machine->pid);
445
446         /*
447          * We use _stext for guest kernel because guest kernel's /proc/kallsyms
448          * have no _text sometimes.
449          */
450         err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
451                                                  machine, "_text");
452         if (err < 0)
453                 err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
454                                                          machine, "_stext");
455         if (err < 0)
456                 pr_err("Couldn't record guest kernel [%d]'s reference"
457                        " relocation symbol.\n", machine->pid);
458 }
459
460 static struct perf_event_header finished_round_event = {
461         .size = sizeof(struct perf_event_header),
462         .type = PERF_RECORD_FINISHED_ROUND,
463 };
464
465 static int perf_record__mmap_read_all(struct perf_record *rec)
466 {
467         int i;
468         int rc = 0;
469
470         for (i = 0; i < rec->evlist->nr_mmaps; i++) {
471                 if (rec->evlist->mmap[i].base) {
472                         if (perf_record__mmap_read(rec, &rec->evlist->mmap[i]) != 0) {
473                                 rc = -1;
474                                 goto out;
475                         }
476                 }
477         }
478
479         if (perf_header__has_feat(&rec->session->header, HEADER_TRACING_DATA))
480                 rc = write_output(rec, &finished_round_event,
481                                   sizeof(finished_round_event));
482
483 out:
484         return rc;
485 }
486
487 static int __cmd_record(struct perf_record *rec, int argc, const char **argv)
488 {
489         struct stat st;
490         int flags;
491         int err, output, feat;
492         unsigned long waking = 0;
493         const bool forks = argc > 0;
494         struct machine *machine;
495         struct perf_tool *tool = &rec->tool;
496         struct perf_record_opts *opts = &rec->opts;
497         struct perf_evlist *evsel_list = rec->evlist;
498         const char *output_name = rec->output_name;
499         struct perf_session *session;
500
501         rec->progname = argv[0];
502
503         rec->page_size = sysconf(_SC_PAGE_SIZE);
504
505         on_exit(perf_record__sig_exit, rec);
506         signal(SIGCHLD, sig_handler);
507         signal(SIGINT, sig_handler);
508         signal(SIGUSR1, sig_handler);
509
510         if (!output_name) {
511                 if (!fstat(STDOUT_FILENO, &st) && S_ISFIFO(st.st_mode))
512                         opts->pipe_output = true;
513                 else
514                         rec->output_name = output_name = "perf.data";
515         }
516         if (output_name) {
517                 if (!strcmp(output_name, "-"))
518                         opts->pipe_output = true;
519                 else if (!stat(output_name, &st) && st.st_size) {
520                         if (rec->write_mode == WRITE_FORCE) {
521                                 char oldname[PATH_MAX];
522                                 snprintf(oldname, sizeof(oldname), "%s.old",
523                                          output_name);
524                                 unlink(oldname);
525                                 rename(output_name, oldname);
526                         }
527                 } else if (rec->write_mode == WRITE_APPEND) {
528                         rec->write_mode = WRITE_FORCE;
529                 }
530         }
531
532         flags = O_CREAT|O_RDWR;
533         if (rec->write_mode == WRITE_APPEND)
534                 rec->file_new = 0;
535         else
536                 flags |= O_TRUNC;
537
538         if (opts->pipe_output)
539                 output = STDOUT_FILENO;
540         else
541                 output = open(output_name, flags, S_IRUSR | S_IWUSR);
542         if (output < 0) {
543                 perror("failed to create output file");
544                 return -1;
545         }
546
547         rec->output = output;
548
549         session = perf_session__new(output_name, O_WRONLY,
550                                     rec->write_mode == WRITE_FORCE, false, NULL);
551         if (session == NULL) {
552                 pr_err("Not enough memory for reading perf file header\n");
553                 return -1;
554         }
555
556         rec->session = session;
557
558         for (feat = HEADER_FIRST_FEATURE; feat < HEADER_LAST_FEATURE; feat++)
559                 perf_header__set_feat(&session->header, feat);
560
561         if (rec->no_buildid)
562                 perf_header__clear_feat(&session->header, HEADER_BUILD_ID);
563
564         if (!have_tracepoints(&evsel_list->entries))
565                 perf_header__clear_feat(&session->header, HEADER_TRACING_DATA);
566
567         if (!rec->opts.branch_stack)
568                 perf_header__clear_feat(&session->header, HEADER_BRANCH_STACK);
569
570         if (!rec->file_new) {
571                 err = perf_session__read_header(session, output);
572                 if (err < 0)
573                         goto out_delete_session;
574         }
575
576         if (forks) {
577                 err = perf_evlist__prepare_workload(evsel_list, opts, argv);
578                 if (err < 0) {
579                         pr_err("Couldn't run the workload!\n");
580                         goto out_delete_session;
581                 }
582         }
583
584         if (perf_record__open(rec) != 0) {
585                 err = -1;
586                 goto out_delete_session;
587         }
588
589         /*
590          * perf_session__delete(session) will be called at perf_record__exit()
591          */
592         on_exit(perf_record__exit, rec);
593
594         if (opts->pipe_output) {
595                 err = perf_header__write_pipe(output);
596                 if (err < 0)
597                         goto out_delete_session;
598         } else if (rec->file_new) {
599                 err = perf_session__write_header(session, evsel_list,
600                                                  output, false);
601                 if (err < 0)
602                         goto out_delete_session;
603         }
604
605         if (!rec->no_buildid
606             && !perf_header__has_feat(&session->header, HEADER_BUILD_ID)) {
607                 pr_err("Couldn't generate buildids. "
608                        "Use --no-buildid to profile anyway.\n");
609                 err = -1;
610                 goto out_delete_session;
611         }
612
613         rec->post_processing_offset = lseek(output, 0, SEEK_CUR);
614
615         machine = perf_session__find_host_machine(session);
616         if (!machine) {
617                 pr_err("Couldn't find native kernel information.\n");
618                 err = -1;
619                 goto out_delete_session;
620         }
621
622         if (opts->pipe_output) {
623                 err = perf_event__synthesize_attrs(tool, session,
624                                                    process_synthesized_event);
625                 if (err < 0) {
626                         pr_err("Couldn't synthesize attrs.\n");
627                         goto out_delete_session;
628                 }
629
630                 err = perf_event__synthesize_event_types(tool, process_synthesized_event,
631                                                          machine);
632                 if (err < 0) {
633                         pr_err("Couldn't synthesize event_types.\n");
634                         goto out_delete_session;
635                 }
636
637                 if (have_tracepoints(&evsel_list->entries)) {
638                         /*
639                          * FIXME err <= 0 here actually means that
640                          * there were no tracepoints so its not really
641                          * an error, just that we don't need to
642                          * synthesize anything.  We really have to
643                          * return this more properly and also
644                          * propagate errors that now are calling die()
645                          */
646                         err = perf_event__synthesize_tracing_data(tool, output, evsel_list,
647                                                                   process_synthesized_event);
648                         if (err <= 0) {
649                                 pr_err("Couldn't record tracing data.\n");
650                                 goto out_delete_session;
651                         }
652                         advance_output(rec, err);
653                 }
654         }
655
656         err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
657                                                  machine, "_text");
658         if (err < 0)
659                 err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
660                                                          machine, "_stext");
661         if (err < 0)
662                 pr_err("Couldn't record kernel reference relocation symbol\n"
663                        "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
664                        "Check /proc/kallsyms permission or run as root.\n");
665
666         err = perf_event__synthesize_modules(tool, process_synthesized_event,
667                                              machine);
668         if (err < 0)
669                 pr_err("Couldn't record kernel module information.\n"
670                        "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
671                        "Check /proc/modules permission or run as root.\n");
672
673         if (perf_guest)
674                 perf_session__process_machines(session, tool,
675                                                perf_event__synthesize_guest_os);
676
677         if (!opts->target.system_wide)
678                 err = perf_event__synthesize_thread_map(tool, evsel_list->threads,
679                                                   process_synthesized_event,
680                                                   machine);
681         else
682                 err = perf_event__synthesize_threads(tool, process_synthesized_event,
683                                                machine);
684
685         if (err != 0)
686                 goto out_delete_session;
687
688         if (rec->realtime_prio) {
689                 struct sched_param param;
690
691                 param.sched_priority = rec->realtime_prio;
692                 if (sched_setscheduler(0, SCHED_FIFO, &param)) {
693                         pr_err("Could not set realtime priority.\n");
694                         err = -1;
695                         goto out_delete_session;
696                 }
697         }
698
699         perf_evlist__enable(evsel_list);
700
701         /*
702          * Let the child rip
703          */
704         if (forks)
705                 perf_evlist__start_workload(evsel_list);
706
707         for (;;) {
708                 int hits = rec->samples;
709
710                 if (perf_record__mmap_read_all(rec) < 0) {
711                         err = -1;
712                         goto out_delete_session;
713                 }
714
715                 if (hits == rec->samples) {
716                         if (done)
717                                 break;
718                         err = poll(evsel_list->pollfd, evsel_list->nr_fds, -1);
719                         waking++;
720                 }
721
722                 if (done)
723                         perf_evlist__disable(evsel_list);
724         }
725
726         if (quiet || signr == SIGUSR1)
727                 return 0;
728
729         fprintf(stderr, "[ perf record: Woken up %ld times to write data ]\n", waking);
730
731         /*
732          * Approximate RIP event size: 24 bytes.
733          */
734         fprintf(stderr,
735                 "[ perf record: Captured and wrote %.3f MB %s (~%" PRIu64 " samples) ]\n",
736                 (double)rec->bytes_written / 1024.0 / 1024.0,
737                 output_name,
738                 rec->bytes_written / 24);
739
740         return 0;
741
742 out_delete_session:
743         perf_session__delete(session);
744         return err;
745 }
746
747 #define BRANCH_OPT(n, m) \
748         { .name = n, .mode = (m) }
749
750 #define BRANCH_END { .name = NULL }
751
752 struct branch_mode {
753         const char *name;
754         int mode;
755 };
756
757 static const struct branch_mode branch_modes[] = {
758         BRANCH_OPT("u", PERF_SAMPLE_BRANCH_USER),
759         BRANCH_OPT("k", PERF_SAMPLE_BRANCH_KERNEL),
760         BRANCH_OPT("hv", PERF_SAMPLE_BRANCH_HV),
761         BRANCH_OPT("any", PERF_SAMPLE_BRANCH_ANY),
762         BRANCH_OPT("any_call", PERF_SAMPLE_BRANCH_ANY_CALL),
763         BRANCH_OPT("any_ret", PERF_SAMPLE_BRANCH_ANY_RETURN),
764         BRANCH_OPT("ind_call", PERF_SAMPLE_BRANCH_IND_CALL),
765         BRANCH_END
766 };
767
768 static int
769 parse_branch_stack(const struct option *opt, const char *str, int unset)
770 {
771 #define ONLY_PLM \
772         (PERF_SAMPLE_BRANCH_USER        |\
773          PERF_SAMPLE_BRANCH_KERNEL      |\
774          PERF_SAMPLE_BRANCH_HV)
775
776         uint64_t *mode = (uint64_t *)opt->value;
777         const struct branch_mode *br;
778         char *s, *os = NULL, *p;
779         int ret = -1;
780
781         if (unset)
782                 return 0;
783
784         /*
785          * cannot set it twice, -b + --branch-filter for instance
786          */
787         if (*mode)
788                 return -1;
789
790         /* str may be NULL in case no arg is passed to -b */
791         if (str) {
792                 /* because str is read-only */
793                 s = os = strdup(str);
794                 if (!s)
795                         return -1;
796
797                 for (;;) {
798                         p = strchr(s, ',');
799                         if (p)
800                                 *p = '\0';
801
802                         for (br = branch_modes; br->name; br++) {
803                                 if (!strcasecmp(s, br->name))
804                                         break;
805                         }
806                         if (!br->name) {
807                                 ui__warning("unknown branch filter %s,"
808                                             " check man page\n", s);
809                                 goto error;
810                         }
811
812                         *mode |= br->mode;
813
814                         if (!p)
815                                 break;
816
817                         s = p + 1;
818                 }
819         }
820         ret = 0;
821
822         /* default to any branch */
823         if ((*mode & ~ONLY_PLM) == 0) {
824                 *mode = PERF_SAMPLE_BRANCH_ANY;
825         }
826 error:
827         free(os);
828         return ret;
829 }
830
831 #ifdef LIBUNWIND_SUPPORT
832 static int get_stack_size(char *str, unsigned long *_size)
833 {
834         char *endptr;
835         unsigned long size;
836         unsigned long max_size = round_down(USHRT_MAX, sizeof(u64));
837
838         size = strtoul(str, &endptr, 0);
839
840         do {
841                 if (*endptr)
842                         break;
843
844                 size = round_up(size, sizeof(u64));
845                 if (!size || size > max_size)
846                         break;
847
848                 *_size = size;
849                 return 0;
850
851         } while (0);
852
853         pr_err("callchain: Incorrect stack dump size (max %ld): %s\n",
854                max_size, str);
855         return -1;
856 }
857 #endif /* LIBUNWIND_SUPPORT */
858
859 static int
860 parse_callchain_opt(const struct option *opt __maybe_unused, const char *arg,
861                     int unset)
862 {
863         struct perf_record *rec = (struct perf_record *)opt->value;
864         char *tok, *name, *saveptr = NULL;
865         char *buf;
866         int ret = -1;
867
868         /* --no-call-graph */
869         if (unset)
870                 return 0;
871
872         /* We specified default option if none is provided. */
873         BUG_ON(!arg);
874
875         /* We need buffer that we know we can write to. */
876         buf = malloc(strlen(arg) + 1);
877         if (!buf)
878                 return -ENOMEM;
879
880         strcpy(buf, arg);
881
882         tok = strtok_r((char *)buf, ",", &saveptr);
883         name = tok ? : (char *)buf;
884
885         do {
886                 /* Framepointer style */
887                 if (!strncmp(name, "fp", sizeof("fp"))) {
888                         if (!strtok_r(NULL, ",", &saveptr)) {
889                                 rec->opts.call_graph = CALLCHAIN_FP;
890                                 ret = 0;
891                         } else
892                                 pr_err("callchain: No more arguments "
893                                        "needed for -g fp\n");
894                         break;
895
896 #ifdef LIBUNWIND_SUPPORT
897                 /* Dwarf style */
898                 } else if (!strncmp(name, "dwarf", sizeof("dwarf"))) {
899                         const unsigned long default_stack_dump_size = 8192;
900
901                         ret = 0;
902                         rec->opts.call_graph = CALLCHAIN_DWARF;
903                         rec->opts.stack_dump_size = default_stack_dump_size;
904
905                         tok = strtok_r(NULL, ",", &saveptr);
906                         if (tok) {
907                                 unsigned long size = 0;
908
909                                 ret = get_stack_size(tok, &size);
910                                 rec->opts.stack_dump_size = size;
911                         }
912
913                         if (!ret)
914                                 pr_debug("callchain: stack dump size %d\n",
915                                          rec->opts.stack_dump_size);
916 #endif /* LIBUNWIND_SUPPORT */
917                 } else {
918                         pr_err("callchain: Unknown -g option "
919                                "value: %s\n", arg);
920                         break;
921                 }
922
923         } while (0);
924
925         free(buf);
926
927         if (!ret)
928                 pr_debug("callchain: type %d\n", rec->opts.call_graph);
929
930         return ret;
931 }
932
933 static const char * const record_usage[] = {
934         "perf record [<options>] [<command>]",
935         "perf record [<options>] -- <command> [<options>]",
936         NULL
937 };
938
939 /*
940  * XXX Ideally would be local to cmd_record() and passed to a perf_record__new
941  * because we need to have access to it in perf_record__exit, that is called
942  * after cmd_record() exits, but since record_options need to be accessible to
943  * builtin-script, leave it here.
944  *
945  * At least we don't ouch it in all the other functions here directly.
946  *
947  * Just say no to tons of global variables, sigh.
948  */
949 static struct perf_record record = {
950         .opts = {
951                 .mmap_pages          = UINT_MAX,
952                 .user_freq           = UINT_MAX,
953                 .user_interval       = ULLONG_MAX,
954                 .freq                = 4000,
955                 .target              = {
956                         .uses_mmap   = true,
957                 },
958         },
959         .write_mode = WRITE_FORCE,
960         .file_new   = true,
961 };
962
963 #define CALLCHAIN_HELP "do call-graph (stack chain/backtrace) recording: "
964
965 #ifdef LIBUNWIND_SUPPORT
966 static const char callchain_help[] = CALLCHAIN_HELP "[fp] dwarf";
967 #else
968 static const char callchain_help[] = CALLCHAIN_HELP "[fp]";
969 #endif
970
971 /*
972  * XXX Will stay a global variable till we fix builtin-script.c to stop messing
973  * with it and switch to use the library functions in perf_evlist that came
974  * from builtin-record.c, i.e. use perf_record_opts,
975  * perf_evlist__prepare_workload, etc instead of fork+exec'in 'perf record',
976  * using pipes, etc.
977  */
978 const struct option record_options[] = {
979         OPT_CALLBACK('e', "event", &record.evlist, "event",
980                      "event selector. use 'perf list' to list available events",
981                      parse_events_option),
982         OPT_CALLBACK(0, "filter", &record.evlist, "filter",
983                      "event filter", parse_filter),
984         OPT_STRING('p', "pid", &record.opts.target.pid, "pid",
985                     "record events on existing process id"),
986         OPT_STRING('t', "tid", &record.opts.target.tid, "tid",
987                     "record events on existing thread id"),
988         OPT_INTEGER('r', "realtime", &record.realtime_prio,
989                     "collect data with this RT SCHED_FIFO priority"),
990         OPT_BOOLEAN('D', "no-delay", &record.opts.no_delay,
991                     "collect data without buffering"),
992         OPT_BOOLEAN('R', "raw-samples", &record.opts.raw_samples,
993                     "collect raw sample records from all opened counters"),
994         OPT_BOOLEAN('a', "all-cpus", &record.opts.target.system_wide,
995                             "system-wide collection from all CPUs"),
996         OPT_BOOLEAN('A', "append", &record.append_file,
997                             "append to the output file to do incremental profiling"),
998         OPT_STRING('C', "cpu", &record.opts.target.cpu_list, "cpu",
999                     "list of cpus to monitor"),
1000         OPT_BOOLEAN('f', "force", &record.force,
1001                         "overwrite existing data file (deprecated)"),
1002         OPT_U64('c', "count", &record.opts.user_interval, "event period to sample"),
1003         OPT_STRING('o', "output", &record.output_name, "file",
1004                     "output file name"),
1005         OPT_BOOLEAN('i', "no-inherit", &record.opts.no_inherit,
1006                     "child tasks do not inherit counters"),
1007         OPT_UINTEGER('F', "freq", &record.opts.user_freq, "profile at this frequency"),
1008         OPT_UINTEGER('m', "mmap-pages", &record.opts.mmap_pages,
1009                      "number of mmap data pages"),
1010         OPT_BOOLEAN(0, "group", &record.opts.group,
1011                     "put the counters into a counter group"),
1012         OPT_CALLBACK_DEFAULT('g', "call-graph", &record, "mode[,dump_size]",
1013                              callchain_help, &parse_callchain_opt,
1014                              "fp"),
1015         OPT_INCR('v', "verbose", &verbose,
1016                     "be more verbose (show counter open errors, etc)"),
1017         OPT_BOOLEAN('q', "quiet", &quiet, "don't print any message"),
1018         OPT_BOOLEAN('s', "stat", &record.opts.inherit_stat,
1019                     "per thread counts"),
1020         OPT_BOOLEAN('d', "data", &record.opts.sample_address,
1021                     "Sample addresses"),
1022         OPT_BOOLEAN('T', "timestamp", &record.opts.sample_time, "Sample timestamps"),
1023         OPT_BOOLEAN('P', "period", &record.opts.period, "Sample period"),
1024         OPT_BOOLEAN('n', "no-samples", &record.opts.no_samples,
1025                     "don't sample"),
1026         OPT_BOOLEAN('N', "no-buildid-cache", &record.no_buildid_cache,
1027                     "do not update the buildid cache"),
1028         OPT_BOOLEAN('B', "no-buildid", &record.no_buildid,
1029                     "do not collect buildids in perf.data"),
1030         OPT_CALLBACK('G', "cgroup", &record.evlist, "name",
1031                      "monitor event in cgroup name only",
1032                      parse_cgroups),
1033         OPT_STRING('u', "uid", &record.opts.target.uid_str, "user",
1034                    "user to profile"),
1035
1036         OPT_CALLBACK_NOOPT('b', "branch-any", &record.opts.branch_stack,
1037                      "branch any", "sample any taken branches",
1038                      parse_branch_stack),
1039
1040         OPT_CALLBACK('j', "branch-filter", &record.opts.branch_stack,
1041                      "branch filter mask", "branch stack filter modes",
1042                      parse_branch_stack),
1043         OPT_END()
1044 };
1045
1046 int cmd_record(int argc, const char **argv, const char *prefix __maybe_unused)
1047 {
1048         int err = -ENOMEM;
1049         struct perf_evsel *pos;
1050         struct perf_evlist *evsel_list;
1051         struct perf_record *rec = &record;
1052         char errbuf[BUFSIZ];
1053
1054         evsel_list = perf_evlist__new(NULL, NULL);
1055         if (evsel_list == NULL)
1056                 return -ENOMEM;
1057
1058         rec->evlist = evsel_list;
1059
1060         argc = parse_options(argc, argv, record_options, record_usage,
1061                             PARSE_OPT_STOP_AT_NON_OPTION);
1062         if (!argc && perf_target__none(&rec->opts.target))
1063                 usage_with_options(record_usage, record_options);
1064
1065         if (rec->force && rec->append_file) {
1066                 ui__error("Can't overwrite and append at the same time."
1067                           " You need to choose between -f and -A");
1068                 usage_with_options(record_usage, record_options);
1069         } else if (rec->append_file) {
1070                 rec->write_mode = WRITE_APPEND;
1071         } else {
1072                 rec->write_mode = WRITE_FORCE;
1073         }
1074
1075         if (nr_cgroups && !rec->opts.target.system_wide) {
1076                 ui__error("cgroup monitoring only available in"
1077                           " system-wide mode\n");
1078                 usage_with_options(record_usage, record_options);
1079         }
1080
1081         symbol__init();
1082
1083         if (symbol_conf.kptr_restrict)
1084                 pr_warning(
1085 "WARNING: Kernel address maps (/proc/{kallsyms,modules}) are restricted,\n"
1086 "check /proc/sys/kernel/kptr_restrict.\n\n"
1087 "Samples in kernel functions may not be resolved if a suitable vmlinux\n"
1088 "file is not found in the buildid cache or in the vmlinux path.\n\n"
1089 "Samples in kernel modules won't be resolved at all.\n\n"
1090 "If some relocation was applied (e.g. kexec) symbols may be misresolved\n"
1091 "even with a suitable vmlinux or kallsyms file.\n\n");
1092
1093         if (rec->no_buildid_cache || rec->no_buildid)
1094                 disable_buildid_cache();
1095
1096         if (evsel_list->nr_entries == 0 &&
1097             perf_evlist__add_default(evsel_list) < 0) {
1098                 pr_err("Not enough memory for event selector list\n");
1099                 goto out_symbol_exit;
1100         }
1101
1102         err = perf_target__validate(&rec->opts.target);
1103         if (err) {
1104                 perf_target__strerror(&rec->opts.target, err, errbuf, BUFSIZ);
1105                 ui__warning("%s", errbuf);
1106         }
1107
1108         err = perf_target__parse_uid(&rec->opts.target);
1109         if (err) {
1110                 int saved_errno = errno;
1111
1112                 perf_target__strerror(&rec->opts.target, err, errbuf, BUFSIZ);
1113                 ui__error("%s", errbuf);
1114
1115                 err = -saved_errno;
1116                 goto out_free_fd;
1117         }
1118
1119         err = -ENOMEM;
1120         if (perf_evlist__create_maps(evsel_list, &rec->opts.target) < 0)
1121                 usage_with_options(record_usage, record_options);
1122
1123         list_for_each_entry(pos, &evsel_list->entries, node) {
1124                 if (perf_header__push_event(pos->attr.config, perf_evsel__name(pos)))
1125                         goto out_free_fd;
1126         }
1127
1128         if (rec->opts.user_interval != ULLONG_MAX)
1129                 rec->opts.default_interval = rec->opts.user_interval;
1130         if (rec->opts.user_freq != UINT_MAX)
1131                 rec->opts.freq = rec->opts.user_freq;
1132
1133         /*
1134          * User specified count overrides default frequency.
1135          */
1136         if (rec->opts.default_interval)
1137                 rec->opts.freq = 0;
1138         else if (rec->opts.freq) {
1139                 rec->opts.default_interval = rec->opts.freq;
1140         } else {
1141                 ui__error("frequency and count are zero, aborting\n");
1142                 err = -EINVAL;
1143                 goto out_free_fd;
1144         }
1145
1146         err = __cmd_record(&record, argc, argv);
1147 out_free_fd:
1148         perf_evlist__delete_maps(evsel_list);
1149 out_symbol_exit:
1150         symbol__exit();
1151         return err;
1152 }