#include "perf.h"
#include "util/color.h"
+#include "util/evlist.h"
#include "util/evsel.h"
#include "util/session.h"
#include "util/symbol.h"
#include "util/thread.h"
+#include "util/thread_map.h"
#include "util/util.h"
#include <linux/rbtree.h>
#include "util/parse-options.h"
#include <stdio.h>
#include <termios.h>
#include <unistd.h>
+#include <inttypes.h>
#include <errno.h>
#include <time.h>
#define FD(e, x, y) (*(int *)xyarray__entry(e->fd, x, y))
+struct perf_evlist *evsel_list;
+
static bool system_wide = false;
static int default_interval = 0;
static int target_pid = -1;
static int target_tid = -1;
-static struct thread_map *threads;
static bool inherit = false;
-static struct cpu_map *cpus;
static int realtime_prio = 0;
static bool group = false;
static unsigned int page_size;
-static unsigned int mmap_pages = 16;
+static unsigned int mmap_pages = 128;
static int freq = 1000; /* 1 KHz */
static int delay_secs = 2;
len = sym->end - sym->start;
sprintf(command,
- "objdump --start-address=%#0*Lx --stop-address=%#0*Lx -dS %s",
+ "objdump --start-address=%#0*" PRIx64 " --stop-address=%#0*" PRIx64 " -dS %s",
BITS_PER_LONG / 4, map__rip_2objdump(map, sym->start),
BITS_PER_LONG / 4, map__rip_2objdump(map, sym->end), path);
line = syme->src->lines;
while (line) {
- for (i = 0; i < nr_counters; i++)
+ for (i = 0; i < evsel_list->nr_entries; i++)
line->count[i] = 0;
line = line->next;
}
struct source_line *line;
char pattern[PATTERN_LEN + 1];
- sprintf(pattern, "%0*Lx <", BITS_PER_LONG / 4,
+ sprintf(pattern, "%0*" PRIx64 " <", BITS_PER_LONG / 4,
map__rip_2objdump(syme->map, symbol->start));
pthread_mutex_lock(&syme->src->lock);
}
/*
- * Symbols will be added here in event__process_sample and will get out
+ * Symbols will be added here in perf_event__process_sample and will get out
* after decayed.
*/
static LIST_HEAD(active_symbols);
if (!display_weighted)
return weight;
- for (counter = 1; counter < nr_counters-1; counter++)
+ for (counter = 1; counter < evsel_list->nr_entries - 1; counter++)
weight *= sym->count[counter];
weight /= (sym->count[counter] + 1);
rb_insert_color(&se->rb_node, tree);
}
-static void print_sym_table(void)
+static void print_sym_table(struct perf_session *session)
{
int printed = 0, j;
struct perf_evsel *counter;
rb_insert_active_sym(&tmp, syme);
sum_ksamples += syme->snap_count;
- for (j = 0; j < nr_counters; j++)
+ for (j = 0; j < evsel_list->nr_entries; j++)
syme->count[j] = zero ? 0 : syme->count[j] * 7 / 8;
} else
list_remove_active_sym(syme);
puts(CONSOLE_CLEAR);
- printf("%-*.*s\n", win_width, win_width, graph_dotted_line);
if (!perf_guest) {
printf(" PerfTop:%8.0f irqs/sec kernel:%4.1f%%"
" exact: %4.1f%% [",
esamples_percent);
}
- if (nr_counters == 1 || !display_weighted) {
+ if (evsel_list->nr_entries == 1 || !display_weighted) {
struct perf_evsel *first;
- first = list_entry(evsel_list.next, struct perf_evsel, node);
- printf("%Ld", first->attr.sample_period);
+ first = list_entry(evsel_list->entries.next, struct perf_evsel, node);
+ printf("%" PRIu64, (uint64_t)first->attr.sample_period);
if (freq)
printf("Hz ");
else
if (!display_weighted)
printf("%s", event_name(sym_evsel));
- else list_for_each_entry(counter, &evsel_list, node) {
+ else list_for_each_entry(counter, &evsel_list->entries, node) {
if (counter->idx)
printf("/");
printf(" (all");
if (cpu_list)
- printf(", CPU%s: %s)\n", cpus->nr > 1 ? "s" : "", cpu_list);
+ printf(", CPU%s: %s)\n", evsel_list->cpus->nr > 1 ? "s" : "", cpu_list);
else {
if (target_tid != -1)
printf(")\n");
else
- printf(", %d CPU%s)\n", cpus->nr, cpus->nr > 1 ? "s" : "");
+ printf(", %d CPU%s)\n", evsel_list->cpus->nr,
+ evsel_list->cpus->nr > 1 ? "s" : "");
}
printf("%-*.*s\n", win_width, win_width, graph_dotted_line);
+ if (session->hists.stats.total_lost != 0) {
+ color_fprintf(stdout, PERF_COLOR_RED, "WARNING:");
+ printf(" LOST %" PRIu64 " events, Check IO/CPU overload\n",
+ session->hists.stats.total_lost);
+ }
+
if (sym_filter_entry) {
show_details(sym_filter_entry);
return;
sym_width = winsize.ws_col - dso_width - 29;
}
putchar('\n');
- if (nr_counters == 1)
+ if (evsel_list->nr_entries == 1)
printf(" samples pcnt");
else
printf(" weight samples pcnt");
printf(" RIP ");
printf(" %-*.*s DSO\n", sym_width, sym_width, "function");
printf(" %s _______ _____",
- nr_counters == 1 ? " " : "______");
+ evsel_list->nr_entries == 1 ? " " : "______");
if (verbose)
printf(" ________________");
printf(" %-*.*s", sym_width, sym_width, graph_line);
pcnt = 100.0 - (100.0 * ((sum_ksamples - syme->snap_count) /
sum_ksamples));
- if (nr_counters == 1 || !display_weighted)
+ if (evsel_list->nr_entries == 1 || !display_weighted)
printf("%20.2f ", syme->weight);
else
printf("%9.1f %10ld ", syme->weight, syme->snap_count);
percent_color_fprintf(stdout, "%4.1f%%", pcnt);
if (verbose)
- printf(" %016llx", sym->start);
+ printf(" %016" PRIx64, sym->start);
printf(" %-*.*s", sym_width, sym_width, sym->name);
printf(" %-*.*s\n", dso_width, dso_width,
dso_width >= syme->map->dso->long_name_len ?
fprintf(stdout, "\t[d] display refresh delay. \t(%d)\n", delay_secs);
fprintf(stdout, "\t[e] display entries (lines). \t(%d)\n", print_entries);
- if (nr_counters > 1)
+ if (evsel_list->nr_entries > 1)
fprintf(stdout, "\t[E] active event counter. \t(%s)\n", event_name(sym_evsel));
fprintf(stdout, "\t[f] profile display filter (count). \t(%d)\n", count_filter);
fprintf(stdout, "\t[s] annotate symbol. \t(%s)\n", name?: "NULL");
fprintf(stdout, "\t[S] stop annotation.\n");
- if (nr_counters > 1)
+ if (evsel_list->nr_entries > 1)
fprintf(stdout, "\t[w] toggle display weighted/count[E]r. \t(%d)\n", display_weighted ? 1 : 0);
fprintf(stdout,
return 1;
case 'E':
case 'w':
- return nr_counters > 1 ? 1 : 0;
+ return evsel_list->nr_entries > 1 ? 1 : 0;
default:
break;
}
signal(SIGWINCH, SIG_DFL);
break;
case 'E':
- if (nr_counters > 1) {
+ if (evsel_list->nr_entries > 1) {
fprintf(stderr, "\nAvailable events:");
- list_for_each_entry(sym_evsel, &evsel_list, node)
+ list_for_each_entry(sym_evsel, &evsel_list->entries, node)
fprintf(stderr, "\n\t%d %s", sym_evsel->idx, event_name(sym_evsel));
prompt_integer(&sym_counter, "Enter details event counter");
- if (sym_counter >= nr_counters) {
- sym_evsel = list_entry(evsel_list.next, struct perf_evsel, node);
+ if (sym_counter >= evsel_list->nr_entries) {
+ sym_evsel = list_entry(evsel_list->entries.next, struct perf_evsel, node);
sym_counter = 0;
fprintf(stderr, "Sorry, no such event, using %s.\n", event_name(sym_evsel));
sleep(1);
break;
}
- list_for_each_entry(sym_evsel, &evsel_list, node)
+ list_for_each_entry(sym_evsel, &evsel_list->entries, node)
if (sym_evsel->idx == sym_counter)
break;
} else sym_counter = 0;
getc(stdin);
do {
- print_sym_table();
+ print_sym_table(session);
} while (!poll(&stdin_poll, 1, delay_msecs) == 1);
c = getc(stdin);
/* Tag samples to be skipped. */
static const char *skip_symbols[] = {
"default_idle",
+ "native_safe_halt",
"cpu_idle",
"enter_idle",
"exit_idle",
return 0;
}
-static void event__process_sample(const event_t *self,
- struct sample_data *sample,
- struct perf_session *session,
- struct perf_evsel *evsel)
+static void perf_event__process_sample(const union perf_event *event,
+ struct perf_sample *sample,
+ struct perf_session *session)
{
- u64 ip = self->ip.ip;
+ u64 ip = event->ip.ip;
struct sym_entry *syme;
struct addr_location al;
struct machine *machine;
- u8 origin = self->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
+ u8 origin = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
++samples;
break;
case PERF_RECORD_MISC_GUEST_KERNEL:
++guest_kernel_samples;
- machine = perf_session__find_machine(session, self->ip.pid);
+ machine = perf_session__find_machine(session, event->ip.pid);
break;
case PERF_RECORD_MISC_GUEST_USER:
++guest_us_samples;
if (!machine && perf_guest) {
pr_err("Can't find guest [%d]'s kernel information\n",
- self->ip.pid);
+ event->ip.pid);
return;
}
- if (self->header.misc & PERF_RECORD_MISC_EXACT_IP)
+ if (event->header.misc & PERF_RECORD_MISC_EXACT_IP)
exact_samples++;
- if (event__preprocess_sample(self, session, &al, sample,
- symbol_filter) < 0 ||
+ if (perf_event__preprocess_sample(event, session, &al, sample,
+ symbol_filter) < 0 ||
al.filtered)
return;
syme = symbol__priv(al.sym);
if (!syme->skip) {
- syme->count[evsel->idx]++;
+ struct perf_evsel *evsel;
+
syme->origin = origin;
+ evsel = perf_evlist__id2evsel(evsel_list, sample->id);
+ assert(evsel != NULL);
+ syme->count[evsel->idx]++;
record_precise_ip(syme, evsel->idx, ip);
pthread_mutex_lock(&active_symbols_lock);
if (list_empty(&syme->node) || !syme->node.next)
}
}
-struct mmap_data {
- void *base;
- int mask;
- unsigned int prev;
-};
-
-static int perf_evsel__alloc_mmap_per_thread(struct perf_evsel *evsel,
- int ncpus, int nthreads)
-{
- evsel->priv = xyarray__new(ncpus, nthreads, sizeof(struct mmap_data));
- return evsel->priv != NULL ? 0 : -ENOMEM;
-}
-
-static void perf_evsel__free_mmap(struct perf_evsel *evsel)
-{
- xyarray__delete(evsel->priv);
- evsel->priv = NULL;
-}
-
-static unsigned int mmap_read_head(struct mmap_data *md)
-{
- struct perf_event_mmap_page *pc = md->base;
- int head;
-
- head = pc->data_head;
- rmb();
-
- return head;
-}
-
-static void perf_session__mmap_read_counter(struct perf_session *self,
- struct perf_evsel *evsel,
- int cpu, int thread_idx)
+static void perf_session__mmap_read_cpu(struct perf_session *self, int cpu)
{
- struct xyarray *mmap_array = evsel->priv;
- struct mmap_data *md = xyarray__entry(mmap_array, cpu, thread_idx);
- unsigned int head = mmap_read_head(md);
- unsigned int old = md->prev;
- unsigned char *data = md->base + page_size;
- struct sample_data sample;
- int diff;
-
- /*
- * If we're further behind than half the buffer, there's a chance
- * the writer will bite our tail and mess up the samples under us.
- *
- * If we somehow ended up ahead of the head, we got messed up.
- *
- * In either case, truncate and restart at head.
- */
- diff = head - old;
- if (diff > md->mask / 2 || diff < 0) {
- fprintf(stderr, "WARNING: failed to keep up with mmap data.\n");
-
- /*
- * head points to a known good entry, start there.
- */
- old = head;
- }
-
- for (; old != head;) {
- event_t *event = (event_t *)&data[old & md->mask];
-
- event_t event_copy;
+ struct perf_sample sample;
+ union perf_event *event;
- size_t size = event->header.size;
+ while ((event = perf_evlist__read_on_cpu(evsel_list, cpu)) != NULL) {
+ perf_session__parse_sample(self, event, &sample);
- /*
- * Event straddles the mmap boundary -- header should always
- * be inside due to u64 alignment of output.
- */
- if ((old & md->mask) + size != ((old + size) & md->mask)) {
- unsigned int offset = old;
- unsigned int len = min(sizeof(*event), size), cpy;
- void *dst = &event_copy;
-
- do {
- cpy = min(md->mask + 1 - (offset & md->mask), len);
- memcpy(dst, &data[offset & md->mask], cpy);
- offset += cpy;
- dst += cpy;
- len -= cpy;
- } while (len);
-
- event = &event_copy;
- }
-
- event__parse_sample(event, self, &sample);
if (event->header.type == PERF_RECORD_SAMPLE)
- event__process_sample(event, &sample, self, evsel);
+ perf_event__process_sample(event, &sample, self);
else
- event__process(event, &sample, self);
- old += size;
+ perf_event__process(event, &sample, self);
}
-
- md->prev = old;
}
-static struct pollfd *event_array;
-
static void perf_session__mmap_read(struct perf_session *self)
{
- struct perf_evsel *counter;
- int i, thread_index;
-
- for (i = 0; i < cpus->nr; i++) {
- list_for_each_entry(counter, &evsel_list, node) {
- for (thread_index = 0;
- thread_index < threads->nr;
- thread_index++) {
- perf_session__mmap_read_counter(self,
- counter, i, thread_index);
- }
- }
- }
-}
+ int i;
-int nr_poll;
-int group_fd;
+ for (i = 0; i < evsel_list->cpus->nr; i++)
+ perf_session__mmap_read_cpu(self, i);
+}
-static void start_counter(int i, struct perf_evsel *evsel)
+static void start_counters(struct perf_evlist *evlist)
{
- struct xyarray *mmap_array = evsel->priv;
- struct mmap_data *mm;
- struct perf_event_attr *attr;
- int cpu = -1;
- int thread_index;
-
- if (target_tid == -1)
- cpu = cpus->map[i];
+ struct perf_evsel *counter;
- attr = &evsel->attr;
+ list_for_each_entry(counter, &evlist->entries, node) {
+ struct perf_event_attr *attr = &counter->attr;
- attr->sample_type = PERF_SAMPLE_IP | PERF_SAMPLE_TID;
+ attr->sample_type = PERF_SAMPLE_IP | PERF_SAMPLE_TID;
- if (freq) {
- attr->sample_type |= PERF_SAMPLE_PERIOD;
- attr->freq = 1;
- attr->sample_freq = freq;
- }
+ if (freq) {
+ attr->sample_type |= PERF_SAMPLE_PERIOD;
+ attr->freq = 1;
+ attr->sample_freq = freq;
+ }
- attr->inherit = (cpu < 0) && inherit;
- attr->mmap = 1;
+ if (evlist->nr_entries > 1) {
+ attr->sample_type |= PERF_SAMPLE_ID;
+ attr->read_format |= PERF_FORMAT_ID;
+ }
- for (thread_index = 0; thread_index < threads->nr; thread_index++) {
+ attr->mmap = 1;
try_again:
- FD(evsel, i, thread_index) = sys_perf_event_open(attr,
- threads->map[thread_index], cpu, group_fd, 0);
-
- if (FD(evsel, i, thread_index) < 0) {
+ if (perf_evsel__open(counter, evsel_list->cpus,
+ evsel_list->threads, group, inherit) < 0) {
int err = errno;
if (err == EPERM || err == EACCES)
die("Permission error - are you root?\n"
"\t Consider tweaking"
" /proc/sys/kernel/perf_event_paranoid.\n");
- if (err == ENOENT)
- die("%s event is not supported. ", event_name(evsel));
/*
* If it's cycles then fall back to hrtimer
* based cpu-clock-tick sw counter, which
* is always available even if no PMU support:
*/
- if (attr->type == PERF_TYPE_HARDWARE
- && attr->config == PERF_COUNT_HW_CPU_CYCLES) {
+ if (attr->type == PERF_TYPE_HARDWARE &&
+ attr->config == PERF_COUNT_HW_CPU_CYCLES) {
if (verbose)
warning(" ... trying to fall back to cpu-clock-ticks\n");
goto try_again;
}
printf("\n");
- error("sys_perf_event_open() syscall returned with %d (%s). /bin/dmesg may provide additional information.\n",
- FD(evsel, i, thread_index), strerror(err));
+ error("sys_perf_event_open() syscall returned with %d "
+ "(%s). /bin/dmesg may provide additional information.\n",
+ err, strerror(err));
die("No CONFIG_PERF_EVENTS=y kernel support configured?\n");
exit(-1);
}
- assert(FD(evsel, i, thread_index) >= 0);
- fcntl(FD(evsel, i, thread_index), F_SETFL, O_NONBLOCK);
-
- /*
- * First counter acts as the group leader:
- */
- if (group && group_fd == -1)
- group_fd = FD(evsel, i, thread_index);
-
- event_array[nr_poll].fd = FD(evsel, i, thread_index);
- event_array[nr_poll].events = POLLIN;
- nr_poll++;
-
- mm = xyarray__entry(mmap_array, i, thread_index);
- mm->prev = 0;
- mm->mask = mmap_pages*page_size - 1;
- mm->base = mmap(NULL, (mmap_pages+1)*page_size,
- PROT_READ, MAP_SHARED, FD(evsel, i, thread_index), 0);
- if (mm->base == MAP_FAILED)
- die("failed to mmap with %d (%s)\n", errno, strerror(errno));
}
+
+ if (perf_evlist__mmap(evlist, mmap_pages, false) < 0)
+ die("failed to mmap with %d (%s)\n", errno, strerror(errno));
}
static int __cmd_top(void)
{
pthread_t thread;
- struct perf_evsel *counter;
- int i, ret;
+ struct perf_evsel *first;
+ int ret;
/*
* FIXME: perf_session__new should allow passing a O_MMAP, so that all this
* mmap reading, etc is encapsulated in it. Use O_WRONLY for now.
return -ENOMEM;
if (target_tid != -1)
- event__synthesize_thread(target_tid, event__process, session);
+ perf_event__synthesize_thread(target_tid, perf_event__process,
+ session);
else
- event__synthesize_threads(event__process, session);
+ perf_event__synthesize_threads(perf_event__process, session);
- for (i = 0; i < cpus->nr; i++) {
- group_fd = -1;
- list_for_each_entry(counter, &evsel_list, node)
- start_counter(i, counter);
- }
+ start_counters(evsel_list);
+ first = list_entry(evsel_list->entries.next, struct perf_evsel, node);
+ perf_session__set_sample_type(session, first->attr.sample_type);
/* Wait for a minimal set of events before starting the snapshot */
- poll(&event_array[0], nr_poll, 100);
+ poll(evsel_list->pollfd, evsel_list->nr_fds, 100);
perf_session__mmap_read(session);
perf_session__mmap_read(session);
if (hits == samples)
- ret = poll(event_array, nr_poll, 100);
+ ret = poll(evsel_list->pollfd, evsel_list->nr_fds, 100);
}
return 0;
};
static const struct option options[] = {
- OPT_CALLBACK('e', "event", NULL, "event",
+ OPT_CALLBACK('e', "event", &evsel_list, "event",
"event selector. use 'perf list' to list available events",
parse_events),
OPT_INTEGER('c', "count", &default_interval,
struct perf_evsel *pos;
int status = -ENOMEM;
+ evsel_list = perf_evlist__new(NULL, NULL);
+ if (evsel_list == NULL)
+ return -ENOMEM;
+
page_size = sysconf(_SC_PAGE_SIZE);
argc = parse_options(argc, argv, options, top_usage, 0);
if (argc)
usage_with_options(top_usage, options);
- if (target_pid != -1)
- target_tid = target_pid;
-
- threads = thread_map__new(target_pid, target_tid);
- if (threads == NULL) {
- pr_err("Problems finding threads of monitor\n");
- usage_with_options(top_usage, options);
- }
-
- event_array = malloc((sizeof(struct pollfd) *
- MAX_NR_CPUS * MAX_COUNTERS * threads->nr));
- if (!event_array)
- return -ENOMEM;
-
/* CPU and PID are mutually exclusive */
if (target_tid > 0 && cpu_list) {
printf("WARNING: PID switch overriding CPU\n");
cpu_list = NULL;
}
- if (!nr_counters && perf_evsel_list__create_default() < 0) {
+ if (target_pid != -1)
+ target_tid = target_pid;
+
+ if (perf_evlist__create_maps(evsel_list, target_pid,
+ target_tid, cpu_list) < 0)
+ usage_with_options(top_usage, options);
+
+ if (!evsel_list->nr_entries &&
+ perf_evlist__add_default(evsel_list) < 0) {
pr_err("Not enough memory for event selector list\n");
return -ENOMEM;
}
exit(EXIT_FAILURE);
}
- if (target_tid != -1)
- cpus = cpu_map__dummy_new();
- else
- cpus = cpu_map__new(cpu_list);
-
- if (cpus == NULL)
- usage_with_options(top_usage, options);
-
- list_for_each_entry(pos, &evsel_list, node) {
- if (perf_evsel__alloc_mmap_per_thread(pos, cpus->nr, threads->nr) < 0 ||
- perf_evsel__alloc_fd(pos, cpus->nr, threads->nr) < 0)
+ list_for_each_entry(pos, &evsel_list->entries, node) {
+ if (perf_evsel__alloc_fd(pos, evsel_list->cpus->nr,
+ evsel_list->threads->nr) < 0)
goto out_free_fd;
/*
* Fill in the ones not specifically initialized via -c:
pos->attr.sample_period = default_interval;
}
+ if (perf_evlist__alloc_pollfd(evsel_list) < 0 ||
+ perf_evlist__alloc_mmap(evsel_list) < 0)
+ goto out_free_fd;
+
+ sym_evsel = list_entry(evsel_list->entries.next, struct perf_evsel, node);
+
symbol_conf.priv_size = (sizeof(struct sym_entry) +
- (nr_counters + 1) * sizeof(unsigned long));
+ (evsel_list->nr_entries + 1) * sizeof(unsigned long));
symbol_conf.try_vmlinux_path = (symbol_conf.vmlinux_name == NULL);
if (symbol__init() < 0)
status = __cmd_top();
out_free_fd:
- list_for_each_entry(pos, &evsel_list, node)
- perf_evsel__free_mmap(pos);
+ perf_evlist__delete(evsel_list);
return status;
}