2 * Copyright (C) 2011, Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com>
4 * Parts came from builtin-{top,stat,record}.c, see those files for further
7 * Released under the GPL v2. (and only v2, not any later version)
13 #include "thread_map.h"
19 #include "parse-events.h"
23 #include <linux/bitops.h>
24 #include <linux/hash.h>
26 #define FD(e, x, y) (*(int *)xyarray__entry(e->fd, x, y))
27 #define SID(e, x, y) xyarray__entry(e->sample_id, x, y)
29 void perf_evlist__init(struct perf_evlist *evlist, struct cpu_map *cpus,
30 struct thread_map *threads)
34 for (i = 0; i < PERF_EVLIST__HLIST_SIZE; ++i)
35 INIT_HLIST_HEAD(&evlist->heads[i]);
36 INIT_LIST_HEAD(&evlist->entries);
37 perf_evlist__set_maps(evlist, cpus, threads);
38 evlist->workload.pid = -1;
41 struct perf_evlist *perf_evlist__new(struct cpu_map *cpus,
42 struct thread_map *threads)
44 struct perf_evlist *evlist = zalloc(sizeof(*evlist));
47 perf_evlist__init(evlist, cpus, threads);
52 void perf_evlist__config_attrs(struct perf_evlist *evlist,
53 struct perf_record_opts *opts)
55 struct perf_evsel *evsel;
57 if (evlist->cpus->map[0] < 0)
58 opts->no_inherit = true;
60 list_for_each_entry(evsel, &evlist->entries, node) {
61 perf_evsel__config(evsel, opts);
63 if (evlist->nr_entries > 1)
64 perf_evsel__set_sample_id(evsel);
68 static void perf_evlist__purge(struct perf_evlist *evlist)
70 struct perf_evsel *pos, *n;
72 list_for_each_entry_safe(pos, n, &evlist->entries, node) {
73 list_del_init(&pos->node);
74 perf_evsel__delete(pos);
77 evlist->nr_entries = 0;
80 void perf_evlist__exit(struct perf_evlist *evlist)
85 evlist->pollfd = NULL;
88 void perf_evlist__delete(struct perf_evlist *evlist)
90 perf_evlist__purge(evlist);
91 perf_evlist__exit(evlist);
95 void perf_evlist__add(struct perf_evlist *evlist, struct perf_evsel *entry)
97 list_add_tail(&entry->node, &evlist->entries);
101 void perf_evlist__splice_list_tail(struct perf_evlist *evlist,
102 struct list_head *list,
105 list_splice_tail(list, &evlist->entries);
106 evlist->nr_entries += nr_entries;
109 void __perf_evlist__set_leader(struct list_head *list)
111 struct perf_evsel *evsel, *leader;
113 leader = list_entry(list->next, struct perf_evsel, node);
115 list_for_each_entry(evsel, list, node) {
117 evsel->leader = leader;
121 void perf_evlist__set_leader(struct perf_evlist *evlist)
123 if (evlist->nr_entries)
124 __perf_evlist__set_leader(&evlist->entries);
127 int perf_evlist__add_default(struct perf_evlist *evlist)
129 struct perf_event_attr attr = {
130 .type = PERF_TYPE_HARDWARE,
131 .config = PERF_COUNT_HW_CPU_CYCLES,
133 struct perf_evsel *evsel;
135 event_attr_init(&attr);
137 evsel = perf_evsel__new(&attr, 0);
141 /* use strdup() because free(evsel) assumes name is allocated */
142 evsel->name = strdup("cycles");
146 perf_evlist__add(evlist, evsel);
149 perf_evsel__delete(evsel);
154 static int perf_evlist__add_attrs(struct perf_evlist *evlist,
155 struct perf_event_attr *attrs, size_t nr_attrs)
157 struct perf_evsel *evsel, *n;
161 for (i = 0; i < nr_attrs; i++) {
162 evsel = perf_evsel__new(attrs + i, evlist->nr_entries + i);
164 goto out_delete_partial_list;
165 list_add_tail(&evsel->node, &head);
168 perf_evlist__splice_list_tail(evlist, &head, nr_attrs);
172 out_delete_partial_list:
173 list_for_each_entry_safe(evsel, n, &head, node)
174 perf_evsel__delete(evsel);
178 int __perf_evlist__add_default_attrs(struct perf_evlist *evlist,
179 struct perf_event_attr *attrs, size_t nr_attrs)
183 for (i = 0; i < nr_attrs; i++)
184 event_attr_init(attrs + i);
186 return perf_evlist__add_attrs(evlist, attrs, nr_attrs);
190 perf_evlist__find_tracepoint_by_id(struct perf_evlist *evlist, int id)
192 struct perf_evsel *evsel;
194 list_for_each_entry(evsel, &evlist->entries, node) {
195 if (evsel->attr.type == PERF_TYPE_TRACEPOINT &&
196 (int)evsel->attr.config == id)
203 int perf_evlist__add_newtp(struct perf_evlist *evlist,
204 const char *sys, const char *name, void *handler)
206 struct perf_evsel *evsel;
208 evsel = perf_evsel__newtp(sys, name, evlist->nr_entries);
212 evsel->handler.func = handler;
213 perf_evlist__add(evlist, evsel);
217 void perf_evlist__disable(struct perf_evlist *evlist)
220 struct perf_evsel *pos;
222 for (cpu = 0; cpu < evlist->cpus->nr; cpu++) {
223 list_for_each_entry(pos, &evlist->entries, node) {
224 if (!perf_evsel__is_group_leader(pos))
226 for (thread = 0; thread < evlist->threads->nr; thread++)
227 ioctl(FD(pos, cpu, thread),
228 PERF_EVENT_IOC_DISABLE, 0);
233 void perf_evlist__enable(struct perf_evlist *evlist)
236 struct perf_evsel *pos;
238 for (cpu = 0; cpu < cpu_map__nr(evlist->cpus); cpu++) {
239 list_for_each_entry(pos, &evlist->entries, node) {
240 if (!perf_evsel__is_group_leader(pos))
242 for (thread = 0; thread < evlist->threads->nr; thread++)
243 ioctl(FD(pos, cpu, thread),
244 PERF_EVENT_IOC_ENABLE, 0);
249 static int perf_evlist__alloc_pollfd(struct perf_evlist *evlist)
251 int nfds = cpu_map__nr(evlist->cpus) * evlist->threads->nr * evlist->nr_entries;
252 evlist->pollfd = malloc(sizeof(struct pollfd) * nfds);
253 return evlist->pollfd != NULL ? 0 : -ENOMEM;
256 void perf_evlist__add_pollfd(struct perf_evlist *evlist, int fd)
258 fcntl(fd, F_SETFL, O_NONBLOCK);
259 evlist->pollfd[evlist->nr_fds].fd = fd;
260 evlist->pollfd[evlist->nr_fds].events = POLLIN;
264 static void perf_evlist__id_hash(struct perf_evlist *evlist,
265 struct perf_evsel *evsel,
266 int cpu, int thread, u64 id)
269 struct perf_sample_id *sid = SID(evsel, cpu, thread);
273 hash = hash_64(sid->id, PERF_EVLIST__HLIST_BITS);
274 hlist_add_head(&sid->node, &evlist->heads[hash]);
277 void perf_evlist__id_add(struct perf_evlist *evlist, struct perf_evsel *evsel,
278 int cpu, int thread, u64 id)
280 perf_evlist__id_hash(evlist, evsel, cpu, thread, id);
281 evsel->id[evsel->ids++] = id;
284 static int perf_evlist__id_add_fd(struct perf_evlist *evlist,
285 struct perf_evsel *evsel,
286 int cpu, int thread, int fd)
288 u64 read_data[4] = { 0, };
289 int id_idx = 1; /* The first entry is the counter value */
291 if (!(evsel->attr.read_format & PERF_FORMAT_ID) ||
292 read(fd, &read_data, sizeof(read_data)) == -1)
295 if (evsel->attr.read_format & PERF_FORMAT_TOTAL_TIME_ENABLED)
297 if (evsel->attr.read_format & PERF_FORMAT_TOTAL_TIME_RUNNING)
300 perf_evlist__id_add(evlist, evsel, cpu, thread, read_data[id_idx]);
304 struct perf_evsel *perf_evlist__id2evsel(struct perf_evlist *evlist, u64 id)
306 struct hlist_head *head;
307 struct hlist_node *pos;
308 struct perf_sample_id *sid;
311 if (evlist->nr_entries == 1)
312 return perf_evlist__first(evlist);
314 hash = hash_64(id, PERF_EVLIST__HLIST_BITS);
315 head = &evlist->heads[hash];
317 hlist_for_each_entry(sid, pos, head, node)
321 if (!perf_evlist__sample_id_all(evlist))
322 return perf_evlist__first(evlist);
327 union perf_event *perf_evlist__mmap_read(struct perf_evlist *evlist, int idx)
329 struct perf_mmap *md = &evlist->mmap[idx];
330 unsigned int head = perf_mmap__read_head(md);
331 unsigned int old = md->prev;
332 unsigned char *data = md->base + page_size;
333 union perf_event *event = NULL;
335 if (evlist->overwrite) {
337 * If we're further behind than half the buffer, there's a chance
338 * the writer will bite our tail and mess up the samples under us.
340 * If we somehow ended up ahead of the head, we got messed up.
342 * In either case, truncate and restart at head.
344 int diff = head - old;
345 if (diff > md->mask / 2 || diff < 0) {
346 fprintf(stderr, "WARNING: failed to keep up with mmap data.\n");
349 * head points to a known good entry, start there.
358 event = (union perf_event *)&data[old & md->mask];
359 size = event->header.size;
362 * Event straddles the mmap boundary -- header should always
363 * be inside due to u64 alignment of output.
365 if ((old & md->mask) + size != ((old + size) & md->mask)) {
366 unsigned int offset = old;
367 unsigned int len = min(sizeof(*event), size), cpy;
368 void *dst = &evlist->event_copy;
371 cpy = min(md->mask + 1 - (offset & md->mask), len);
372 memcpy(dst, &data[offset & md->mask], cpy);
378 event = &evlist->event_copy;
386 if (!evlist->overwrite)
387 perf_mmap__write_tail(md, old);
392 void perf_evlist__munmap(struct perf_evlist *evlist)
396 for (i = 0; i < evlist->nr_mmaps; i++) {
397 if (evlist->mmap[i].base != NULL) {
398 munmap(evlist->mmap[i].base, evlist->mmap_len);
399 evlist->mmap[i].base = NULL;
407 static int perf_evlist__alloc_mmap(struct perf_evlist *evlist)
409 evlist->nr_mmaps = cpu_map__nr(evlist->cpus);
410 if (cpu_map__all(evlist->cpus))
411 evlist->nr_mmaps = evlist->threads->nr;
412 evlist->mmap = zalloc(evlist->nr_mmaps * sizeof(struct perf_mmap));
413 return evlist->mmap != NULL ? 0 : -ENOMEM;
416 static int __perf_evlist__mmap(struct perf_evlist *evlist,
417 int idx, int prot, int mask, int fd)
419 evlist->mmap[idx].prev = 0;
420 evlist->mmap[idx].mask = mask;
421 evlist->mmap[idx].base = mmap(NULL, evlist->mmap_len, prot,
423 if (evlist->mmap[idx].base == MAP_FAILED) {
424 evlist->mmap[idx].base = NULL;
428 perf_evlist__add_pollfd(evlist, fd);
432 static int perf_evlist__mmap_per_cpu(struct perf_evlist *evlist, int prot, int mask)
434 struct perf_evsel *evsel;
437 for (cpu = 0; cpu < evlist->cpus->nr; cpu++) {
440 for (thread = 0; thread < evlist->threads->nr; thread++) {
441 list_for_each_entry(evsel, &evlist->entries, node) {
442 int fd = FD(evsel, cpu, thread);
446 if (__perf_evlist__mmap(evlist, cpu,
447 prot, mask, output) < 0)
450 if (ioctl(fd, PERF_EVENT_IOC_SET_OUTPUT, output) != 0)
454 if ((evsel->attr.read_format & PERF_FORMAT_ID) &&
455 perf_evlist__id_add_fd(evlist, evsel, cpu, thread, fd) < 0)
464 for (cpu = 0; cpu < evlist->cpus->nr; cpu++) {
465 if (evlist->mmap[cpu].base != NULL) {
466 munmap(evlist->mmap[cpu].base, evlist->mmap_len);
467 evlist->mmap[cpu].base = NULL;
473 static int perf_evlist__mmap_per_thread(struct perf_evlist *evlist, int prot, int mask)
475 struct perf_evsel *evsel;
478 for (thread = 0; thread < evlist->threads->nr; thread++) {
481 list_for_each_entry(evsel, &evlist->entries, node) {
482 int fd = FD(evsel, 0, thread);
486 if (__perf_evlist__mmap(evlist, thread,
487 prot, mask, output) < 0)
490 if (ioctl(fd, PERF_EVENT_IOC_SET_OUTPUT, output) != 0)
494 if ((evsel->attr.read_format & PERF_FORMAT_ID) &&
495 perf_evlist__id_add_fd(evlist, evsel, 0, thread, fd) < 0)
503 for (thread = 0; thread < evlist->threads->nr; thread++) {
504 if (evlist->mmap[thread].base != NULL) {
505 munmap(evlist->mmap[thread].base, evlist->mmap_len);
506 evlist->mmap[thread].base = NULL;
512 /** perf_evlist__mmap - Create per cpu maps to receive events
514 * @evlist - list of events
515 * @pages - map length in pages
516 * @overwrite - overwrite older events?
518 * If overwrite is false the user needs to signal event consuption using:
520 * struct perf_mmap *m = &evlist->mmap[cpu];
521 * unsigned int head = perf_mmap__read_head(m);
523 * perf_mmap__write_tail(m, head)
525 * Using perf_evlist__read_on_cpu does this automatically.
527 int perf_evlist__mmap(struct perf_evlist *evlist, unsigned int pages,
530 struct perf_evsel *evsel;
531 const struct cpu_map *cpus = evlist->cpus;
532 const struct thread_map *threads = evlist->threads;
533 int prot = PROT_READ | (overwrite ? 0 : PROT_WRITE), mask;
535 /* 512 kiB: default amount of unprivileged mlocked memory */
536 if (pages == UINT_MAX)
537 pages = (512 * 1024) / page_size;
538 else if (!is_power_of_2(pages))
541 mask = pages * page_size - 1;
543 if (evlist->mmap == NULL && perf_evlist__alloc_mmap(evlist) < 0)
546 if (evlist->pollfd == NULL && perf_evlist__alloc_pollfd(evlist) < 0)
549 evlist->overwrite = overwrite;
550 evlist->mmap_len = (pages + 1) * page_size;
552 list_for_each_entry(evsel, &evlist->entries, node) {
553 if ((evsel->attr.read_format & PERF_FORMAT_ID) &&
554 evsel->sample_id == NULL &&
555 perf_evsel__alloc_id(evsel, cpu_map__nr(cpus), threads->nr) < 0)
559 if (cpu_map__all(cpus))
560 return perf_evlist__mmap_per_thread(evlist, prot, mask);
562 return perf_evlist__mmap_per_cpu(evlist, prot, mask);
565 int perf_evlist__create_maps(struct perf_evlist *evlist,
566 struct perf_target *target)
568 evlist->threads = thread_map__new_str(target->pid, target->tid,
571 if (evlist->threads == NULL)
574 if (perf_target__has_task(target))
575 evlist->cpus = cpu_map__dummy_new();
576 else if (!perf_target__has_cpu(target) && !target->uses_mmap)
577 evlist->cpus = cpu_map__dummy_new();
579 evlist->cpus = cpu_map__new(target->cpu_list);
581 if (evlist->cpus == NULL)
582 goto out_delete_threads;
587 thread_map__delete(evlist->threads);
591 void perf_evlist__delete_maps(struct perf_evlist *evlist)
593 cpu_map__delete(evlist->cpus);
594 thread_map__delete(evlist->threads);
596 evlist->threads = NULL;
599 int perf_evlist__apply_filters(struct perf_evlist *evlist)
601 struct perf_evsel *evsel;
603 const int ncpus = cpu_map__nr(evlist->cpus),
604 nthreads = evlist->threads->nr;
606 list_for_each_entry(evsel, &evlist->entries, node) {
607 if (evsel->filter == NULL)
610 err = perf_evsel__set_filter(evsel, ncpus, nthreads, evsel->filter);
618 int perf_evlist__set_filter(struct perf_evlist *evlist, const char *filter)
620 struct perf_evsel *evsel;
622 const int ncpus = cpu_map__nr(evlist->cpus),
623 nthreads = evlist->threads->nr;
625 list_for_each_entry(evsel, &evlist->entries, node) {
626 err = perf_evsel__set_filter(evsel, ncpus, nthreads, filter);
634 bool perf_evlist__valid_sample_type(struct perf_evlist *evlist)
636 struct perf_evsel *first = perf_evlist__first(evlist), *pos = first;
638 list_for_each_entry_continue(pos, &evlist->entries, node) {
639 if (first->attr.sample_type != pos->attr.sample_type)
646 u64 perf_evlist__sample_type(struct perf_evlist *evlist)
648 struct perf_evsel *first = perf_evlist__first(evlist);
649 return first->attr.sample_type;
652 u16 perf_evlist__id_hdr_size(struct perf_evlist *evlist)
654 struct perf_evsel *first = perf_evlist__first(evlist);
655 struct perf_sample *data;
659 if (!first->attr.sample_id_all)
662 sample_type = first->attr.sample_type;
664 if (sample_type & PERF_SAMPLE_TID)
665 size += sizeof(data->tid) * 2;
667 if (sample_type & PERF_SAMPLE_TIME)
668 size += sizeof(data->time);
670 if (sample_type & PERF_SAMPLE_ID)
671 size += sizeof(data->id);
673 if (sample_type & PERF_SAMPLE_STREAM_ID)
674 size += sizeof(data->stream_id);
676 if (sample_type & PERF_SAMPLE_CPU)
677 size += sizeof(data->cpu) * 2;
682 bool perf_evlist__valid_sample_id_all(struct perf_evlist *evlist)
684 struct perf_evsel *first = perf_evlist__first(evlist), *pos = first;
686 list_for_each_entry_continue(pos, &evlist->entries, node) {
687 if (first->attr.sample_id_all != pos->attr.sample_id_all)
694 bool perf_evlist__sample_id_all(struct perf_evlist *evlist)
696 struct perf_evsel *first = perf_evlist__first(evlist);
697 return first->attr.sample_id_all;
700 void perf_evlist__set_selected(struct perf_evlist *evlist,
701 struct perf_evsel *evsel)
703 evlist->selected = evsel;
706 int perf_evlist__open(struct perf_evlist *evlist)
708 struct perf_evsel *evsel;
709 int err, ncpus, nthreads;
711 list_for_each_entry(evsel, &evlist->entries, node) {
712 err = perf_evsel__open(evsel, evlist->cpus, evlist->threads);
719 ncpus = evlist->cpus ? evlist->cpus->nr : 1;
720 nthreads = evlist->threads ? evlist->threads->nr : 1;
722 list_for_each_entry_reverse(evsel, &evlist->entries, node)
723 perf_evsel__close(evsel, ncpus, nthreads);
729 int perf_evlist__prepare_workload(struct perf_evlist *evlist,
730 struct perf_record_opts *opts,
733 int child_ready_pipe[2], go_pipe[2];
736 if (pipe(child_ready_pipe) < 0) {
737 perror("failed to create 'ready' pipe");
741 if (pipe(go_pipe) < 0) {
742 perror("failed to create 'go' pipe");
743 goto out_close_ready_pipe;
746 evlist->workload.pid = fork();
747 if (evlist->workload.pid < 0) {
748 perror("failed to fork");
749 goto out_close_pipes;
752 if (!evlist->workload.pid) {
753 if (opts->pipe_output)
756 close(child_ready_pipe[0]);
758 fcntl(go_pipe[0], F_SETFD, FD_CLOEXEC);
761 * Do a dummy execvp to get the PLT entry resolved,
762 * so we avoid the resolver overhead on the real
765 execvp("", (char **)argv);
768 * Tell the parent we're ready to go
770 close(child_ready_pipe[1]);
773 * Wait until the parent tells us to go.
775 if (read(go_pipe[0], &bf, 1) == -1)
776 perror("unable to read pipe");
778 execvp(argv[0], (char **)argv);
781 kill(getppid(), SIGUSR1);
785 if (perf_target__none(&opts->target))
786 evlist->threads->map[0] = evlist->workload.pid;
788 close(child_ready_pipe[1]);
791 * wait for child to settle
793 if (read(child_ready_pipe[0], &bf, 1) == -1) {
794 perror("unable to read pipe");
795 goto out_close_pipes;
798 evlist->workload.cork_fd = go_pipe[1];
799 close(child_ready_pipe[0]);
805 out_close_ready_pipe:
806 close(child_ready_pipe[0]);
807 close(child_ready_pipe[1]);
811 int perf_evlist__start_workload(struct perf_evlist *evlist)
813 if (evlist->workload.cork_fd > 0) {
815 * Remove the cork, let it rip!
817 return close(evlist->workload.cork_fd);
823 int perf_evlist__parse_sample(struct perf_evlist *evlist, union perf_event *event,
824 struct perf_sample *sample)
826 struct perf_evsel *evsel = perf_evlist__first(evlist);
827 return perf_evsel__parse_sample(evsel, event, sample);
830 size_t perf_evlist__fprintf(struct perf_evlist *evlist, FILE *fp)
832 struct perf_evsel *evsel;
835 list_for_each_entry(evsel, &evlist->entries, node) {
836 printed += fprintf(fp, "%s%s", evsel->idx ? ", " : "",
837 perf_evsel__name(evsel));
840 return printed + fprintf(fp, "\n");;