4 #include "util/evsel.h"
5 #include "util/evlist.h"
7 #include "util/cache.h"
8 #include "util/symbol.h"
9 #include "util/thread.h"
10 #include "util/header.h"
11 #include "util/session.h"
13 #include "util/parse-options.h"
14 #include "util/trace-event.h"
15 #include "util/debug.h"
16 #include <lk/debugfs.h>
17 #include "util/tool.h"
18 #include "util/stat.h"
21 #include <sys/prctl.h>
22 #include <sys/timerfd.h>
25 #include <semaphore.h>
29 #if defined(__i386__) || defined(__x86_64__)
35 #define INVALID_KEY (~0ULL)
40 struct kvm_event_stats {
46 struct list_head hash_entry;
51 struct kvm_event_stats total;
53 #define DEFAULT_VCPU_NUM 8
55 struct kvm_event_stats *vcpu;
58 typedef int (*key_cmp_fun)(struct kvm_event*, struct kvm_event*, int);
60 struct kvm_event_key {
68 struct kvm_events_ops {
69 bool (*is_begin_event)(struct perf_evsel *evsel,
70 struct perf_sample *sample,
71 struct event_key *key);
72 bool (*is_end_event)(struct perf_evsel *evsel,
73 struct perf_sample *sample, struct event_key *key);
74 void (*decode_key)(struct perf_kvm_stat *kvm, struct event_key *key,
79 struct exit_reasons_table {
80 unsigned long exit_code;
84 #define EVENTS_BITS 12
85 #define EVENTS_CACHE_SIZE (1UL << EVENTS_BITS)
87 struct perf_kvm_stat {
88 struct perf_tool tool;
89 struct perf_record_opts opts;
90 struct perf_evlist *evlist;
91 struct perf_session *session;
93 const char *file_name;
94 const char *report_event;
98 struct exit_reasons_table *exit_reasons;
99 int exit_reasons_size;
100 const char *exit_reasons_isa;
102 struct kvm_events_ops *events_ops;
104 struct list_head kvm_events_cache[EVENTS_CACHE_SIZE];
110 struct rb_root result;
113 unsigned int display_time;
118 static void exit_event_get_key(struct perf_evsel *evsel,
119 struct perf_sample *sample,
120 struct event_key *key)
123 key->key = perf_evsel__intval(evsel, sample, "exit_reason");
126 static bool kvm_exit_event(struct perf_evsel *evsel)
128 return !strcmp(evsel->name, "kvm:kvm_exit");
131 static bool exit_event_begin(struct perf_evsel *evsel,
132 struct perf_sample *sample, struct event_key *key)
134 if (kvm_exit_event(evsel)) {
135 exit_event_get_key(evsel, sample, key);
142 static bool kvm_entry_event(struct perf_evsel *evsel)
144 return !strcmp(evsel->name, "kvm:kvm_entry");
147 static bool exit_event_end(struct perf_evsel *evsel,
148 struct perf_sample *sample __maybe_unused,
149 struct event_key *key __maybe_unused)
151 return kvm_entry_event(evsel);
154 static struct exit_reasons_table vmx_exit_reasons[] = {
158 static struct exit_reasons_table svm_exit_reasons[] = {
162 static const char *get_exit_reason(struct perf_kvm_stat *kvm, u64 exit_code)
164 int i = kvm->exit_reasons_size;
165 struct exit_reasons_table *tbl = kvm->exit_reasons;
168 if (tbl->exit_code == exit_code)
173 pr_err("unknown kvm exit code:%lld on %s\n",
174 (unsigned long long)exit_code, kvm->exit_reasons_isa);
178 static void exit_event_decode_key(struct perf_kvm_stat *kvm,
179 struct event_key *key,
182 const char *exit_reason = get_exit_reason(kvm, key->key);
184 scnprintf(decode, 20, "%s", exit_reason);
187 static struct kvm_events_ops exit_events = {
188 .is_begin_event = exit_event_begin,
189 .is_end_event = exit_event_end,
190 .decode_key = exit_event_decode_key,
195 * For the mmio events, we treat:
196 * the time of MMIO write: kvm_mmio(KVM_TRACE_MMIO_WRITE...) -> kvm_entry
197 * the time of MMIO read: kvm_exit -> kvm_mmio(KVM_TRACE_MMIO_READ...).
199 static void mmio_event_get_key(struct perf_evsel *evsel, struct perf_sample *sample,
200 struct event_key *key)
202 key->key = perf_evsel__intval(evsel, sample, "gpa");
203 key->info = perf_evsel__intval(evsel, sample, "type");
206 #define KVM_TRACE_MMIO_READ_UNSATISFIED 0
207 #define KVM_TRACE_MMIO_READ 1
208 #define KVM_TRACE_MMIO_WRITE 2
210 static bool mmio_event_begin(struct perf_evsel *evsel,
211 struct perf_sample *sample, struct event_key *key)
213 /* MMIO read begin event in kernel. */
214 if (kvm_exit_event(evsel))
217 /* MMIO write begin event in kernel. */
218 if (!strcmp(evsel->name, "kvm:kvm_mmio") &&
219 perf_evsel__intval(evsel, sample, "type") == KVM_TRACE_MMIO_WRITE) {
220 mmio_event_get_key(evsel, sample, key);
227 static bool mmio_event_end(struct perf_evsel *evsel, struct perf_sample *sample,
228 struct event_key *key)
230 /* MMIO write end event in kernel. */
231 if (kvm_entry_event(evsel))
234 /* MMIO read end event in kernel.*/
235 if (!strcmp(evsel->name, "kvm:kvm_mmio") &&
236 perf_evsel__intval(evsel, sample, "type") == KVM_TRACE_MMIO_READ) {
237 mmio_event_get_key(evsel, sample, key);
244 static void mmio_event_decode_key(struct perf_kvm_stat *kvm __maybe_unused,
245 struct event_key *key,
248 scnprintf(decode, 20, "%#lx:%s", (unsigned long)key->key,
249 key->info == KVM_TRACE_MMIO_WRITE ? "W" : "R");
252 static struct kvm_events_ops mmio_events = {
253 .is_begin_event = mmio_event_begin,
254 .is_end_event = mmio_event_end,
255 .decode_key = mmio_event_decode_key,
256 .name = "MMIO Access"
259 /* The time of emulation pio access is from kvm_pio to kvm_entry. */
260 static void ioport_event_get_key(struct perf_evsel *evsel,
261 struct perf_sample *sample,
262 struct event_key *key)
264 key->key = perf_evsel__intval(evsel, sample, "port");
265 key->info = perf_evsel__intval(evsel, sample, "rw");
268 static bool ioport_event_begin(struct perf_evsel *evsel,
269 struct perf_sample *sample,
270 struct event_key *key)
272 if (!strcmp(evsel->name, "kvm:kvm_pio")) {
273 ioport_event_get_key(evsel, sample, key);
280 static bool ioport_event_end(struct perf_evsel *evsel,
281 struct perf_sample *sample __maybe_unused,
282 struct event_key *key __maybe_unused)
284 return kvm_entry_event(evsel);
287 static void ioport_event_decode_key(struct perf_kvm_stat *kvm __maybe_unused,
288 struct event_key *key,
291 scnprintf(decode, 20, "%#llx:%s", (unsigned long long)key->key,
292 key->info ? "POUT" : "PIN");
295 static struct kvm_events_ops ioport_events = {
296 .is_begin_event = ioport_event_begin,
297 .is_end_event = ioport_event_end,
298 .decode_key = ioport_event_decode_key,
299 .name = "IO Port Access"
302 static bool register_kvm_events_ops(struct perf_kvm_stat *kvm)
306 if (!strcmp(kvm->report_event, "vmexit"))
307 kvm->events_ops = &exit_events;
308 else if (!strcmp(kvm->report_event, "mmio"))
309 kvm->events_ops = &mmio_events;
310 else if (!strcmp(kvm->report_event, "ioport"))
311 kvm->events_ops = &ioport_events;
313 pr_err("Unknown report event:%s\n", kvm->report_event);
320 struct vcpu_event_record {
323 struct kvm_event *last_event;
327 static void init_kvm_event_record(struct perf_kvm_stat *kvm)
331 for (i = 0; i < EVENTS_CACHE_SIZE; i++)
332 INIT_LIST_HEAD(&kvm->kvm_events_cache[i]);
335 static void clear_events_cache_stats(struct list_head *kvm_events_cache)
337 struct list_head *head;
338 struct kvm_event *event;
341 for (i = 0; i < EVENTS_CACHE_SIZE; i++) {
342 head = &kvm_events_cache[i];
343 list_for_each_entry(event, head, hash_entry) {
344 /* reset stats for event */
345 memset(&event->total, 0, sizeof(event->total));
346 memset(event->vcpu, 0,
347 event->max_vcpu * sizeof(*event->vcpu));
352 static int kvm_events_hash_fn(u64 key)
354 return key & (EVENTS_CACHE_SIZE - 1);
357 static bool kvm_event_expand(struct kvm_event *event, int vcpu_id)
359 int old_max_vcpu = event->max_vcpu;
362 if (vcpu_id < event->max_vcpu)
365 while (event->max_vcpu <= vcpu_id)
366 event->max_vcpu += DEFAULT_VCPU_NUM;
369 event->vcpu = realloc(event->vcpu,
370 event->max_vcpu * sizeof(*event->vcpu));
373 pr_err("Not enough memory\n");
377 memset(event->vcpu + old_max_vcpu, 0,
378 (event->max_vcpu - old_max_vcpu) * sizeof(*event->vcpu));
382 static struct kvm_event *kvm_alloc_init_event(struct event_key *key)
384 struct kvm_event *event;
386 event = zalloc(sizeof(*event));
388 pr_err("Not enough memory\n");
396 static struct kvm_event *find_create_kvm_event(struct perf_kvm_stat *kvm,
397 struct event_key *key)
399 struct kvm_event *event;
400 struct list_head *head;
402 BUG_ON(key->key == INVALID_KEY);
404 head = &kvm->kvm_events_cache[kvm_events_hash_fn(key->key)];
405 list_for_each_entry(event, head, hash_entry) {
406 if (event->key.key == key->key && event->key.info == key->info)
410 event = kvm_alloc_init_event(key);
414 list_add(&event->hash_entry, head);
418 static bool handle_begin_event(struct perf_kvm_stat *kvm,
419 struct vcpu_event_record *vcpu_record,
420 struct event_key *key, u64 timestamp)
422 struct kvm_event *event = NULL;
424 if (key->key != INVALID_KEY)
425 event = find_create_kvm_event(kvm, key);
427 vcpu_record->last_event = event;
428 vcpu_record->start_time = timestamp;
433 kvm_update_event_stats(struct kvm_event_stats *kvm_stats, u64 time_diff)
435 kvm_stats->time += time_diff;
436 update_stats(&kvm_stats->stats, time_diff);
439 static double kvm_event_rel_stddev(int vcpu_id, struct kvm_event *event)
441 struct kvm_event_stats *kvm_stats = &event->total;
444 kvm_stats = &event->vcpu[vcpu_id];
446 return rel_stddev_stats(stddev_stats(&kvm_stats->stats),
447 avg_stats(&kvm_stats->stats));
450 static bool update_kvm_event(struct kvm_event *event, int vcpu_id,
454 kvm_update_event_stats(&event->total, time_diff);
458 if (!kvm_event_expand(event, vcpu_id))
461 kvm_update_event_stats(&event->vcpu[vcpu_id], time_diff);
465 static bool handle_end_event(struct perf_kvm_stat *kvm,
466 struct vcpu_event_record *vcpu_record,
467 struct event_key *key,
470 struct kvm_event *event;
471 u64 time_begin, time_diff;
474 if (kvm->trace_vcpu == -1)
477 vcpu = vcpu_record->vcpu_id;
479 event = vcpu_record->last_event;
480 time_begin = vcpu_record->start_time;
482 /* The begin event is not caught. */
487 * In some case, the 'begin event' only records the start timestamp,
488 * the actual event is recognized in the 'end event' (e.g. mmio-event).
491 /* Both begin and end events did not get the key. */
492 if (!event && key->key == INVALID_KEY)
496 event = find_create_kvm_event(kvm, key);
501 vcpu_record->last_event = NULL;
502 vcpu_record->start_time = 0;
504 /* seems to happen once in a while during live mode */
505 if (timestamp < time_begin) {
506 pr_debug("End time before begin time; skipping event.\n");
510 time_diff = timestamp - time_begin;
511 return update_kvm_event(event, vcpu, time_diff);
515 struct vcpu_event_record *per_vcpu_record(struct thread *thread,
516 struct perf_evsel *evsel,
517 struct perf_sample *sample)
519 /* Only kvm_entry records vcpu id. */
520 if (!thread->priv && kvm_entry_event(evsel)) {
521 struct vcpu_event_record *vcpu_record;
523 vcpu_record = zalloc(sizeof(*vcpu_record));
525 pr_err("%s: Not enough memory\n", __func__);
529 vcpu_record->vcpu_id = perf_evsel__intval(evsel, sample, "vcpu_id");
530 thread->priv = vcpu_record;
536 static bool handle_kvm_event(struct perf_kvm_stat *kvm,
537 struct thread *thread,
538 struct perf_evsel *evsel,
539 struct perf_sample *sample)
541 struct vcpu_event_record *vcpu_record;
542 struct event_key key = {.key = INVALID_KEY};
544 vcpu_record = per_vcpu_record(thread, evsel, sample);
548 /* only process events for vcpus user cares about */
549 if ((kvm->trace_vcpu != -1) &&
550 (kvm->trace_vcpu != vcpu_record->vcpu_id))
553 if (kvm->events_ops->is_begin_event(evsel, sample, &key))
554 return handle_begin_event(kvm, vcpu_record, &key, sample->time);
556 if (kvm->events_ops->is_end_event(evsel, sample, &key))
557 return handle_end_event(kvm, vcpu_record, &key, sample->time);
562 #define GET_EVENT_KEY(func, field) \
563 static u64 get_event_ ##func(struct kvm_event *event, int vcpu) \
566 return event->total.field; \
568 if (vcpu >= event->max_vcpu) \
571 return event->vcpu[vcpu].field; \
574 #define COMPARE_EVENT_KEY(func, field) \
575 GET_EVENT_KEY(func, field) \
576 static int compare_kvm_event_ ## func(struct kvm_event *one, \
577 struct kvm_event *two, int vcpu)\
579 return get_event_ ##func(one, vcpu) > \
580 get_event_ ##func(two, vcpu); \
583 GET_EVENT_KEY(time, time);
584 COMPARE_EVENT_KEY(count, stats.n);
585 COMPARE_EVENT_KEY(mean, stats.mean);
587 #define DEF_SORT_NAME_KEY(name, compare_key) \
588 { #name, compare_kvm_event_ ## compare_key }
590 static struct kvm_event_key keys[] = {
591 DEF_SORT_NAME_KEY(sample, count),
592 DEF_SORT_NAME_KEY(time, mean),
596 static bool select_key(struct perf_kvm_stat *kvm)
600 for (i = 0; keys[i].name; i++) {
601 if (!strcmp(keys[i].name, kvm->sort_key)) {
602 kvm->compare = keys[i].key;
607 pr_err("Unknown compare key:%s\n", kvm->sort_key);
611 static void insert_to_result(struct rb_root *result, struct kvm_event *event,
612 key_cmp_fun bigger, int vcpu)
614 struct rb_node **rb = &result->rb_node;
615 struct rb_node *parent = NULL;
619 p = container_of(*rb, struct kvm_event, rb);
622 if (bigger(event, p, vcpu))
623 rb = &(*rb)->rb_left;
625 rb = &(*rb)->rb_right;
628 rb_link_node(&event->rb, parent, rb);
629 rb_insert_color(&event->rb, result);
633 update_total_count(struct perf_kvm_stat *kvm, struct kvm_event *event)
635 int vcpu = kvm->trace_vcpu;
637 kvm->total_count += get_event_count(event, vcpu);
638 kvm->total_time += get_event_time(event, vcpu);
641 static bool event_is_valid(struct kvm_event *event, int vcpu)
643 return !!get_event_count(event, vcpu);
646 static void sort_result(struct perf_kvm_stat *kvm)
649 int vcpu = kvm->trace_vcpu;
650 struct kvm_event *event;
652 for (i = 0; i < EVENTS_CACHE_SIZE; i++) {
653 list_for_each_entry(event, &kvm->kvm_events_cache[i], hash_entry) {
654 if (event_is_valid(event, vcpu)) {
655 update_total_count(kvm, event);
656 insert_to_result(&kvm->result, event,
663 /* returns left most element of result, and erase it */
664 static struct kvm_event *pop_from_result(struct rb_root *result)
666 struct rb_node *node = rb_first(result);
671 rb_erase(node, result);
672 return container_of(node, struct kvm_event, rb);
675 static void print_vcpu_info(struct perf_kvm_stat *kvm)
677 int vcpu = kvm->trace_vcpu;
679 pr_info("Analyze events for ");
682 if (kvm->opts.target.system_wide)
683 pr_info("all VMs, ");
684 else if (kvm->opts.target.pid)
685 pr_info("pid(s) %s, ", kvm->opts.target.pid);
687 pr_info("dazed and confused on what is monitored, ");
691 pr_info("all VCPUs:\n\n");
693 pr_info("VCPU %d:\n\n", vcpu);
696 static void show_timeofday(void)
702 gettimeofday(&tv, NULL);
703 if (localtime_r(&tv.tv_sec, <ime)) {
704 strftime(date, sizeof(date), "%H:%M:%S", <ime);
705 pr_info("%s.%06ld", date, tv.tv_usec);
707 pr_info("00:00:00.000000");
712 static void print_result(struct perf_kvm_stat *kvm)
715 struct kvm_event *event;
716 int vcpu = kvm->trace_vcpu;
724 print_vcpu_info(kvm);
725 pr_info("%20s ", kvm->events_ops->name);
726 pr_info("%10s ", "Samples");
727 pr_info("%9s ", "Samples%");
729 pr_info("%9s ", "Time%");
730 pr_info("%16s ", "Avg time");
733 while ((event = pop_from_result(&kvm->result))) {
736 ecount = get_event_count(event, vcpu);
737 etime = get_event_time(event, vcpu);
739 kvm->events_ops->decode_key(kvm, &event->key, decode);
740 pr_info("%20s ", decode);
741 pr_info("%10llu ", (unsigned long long)ecount);
742 pr_info("%8.2f%% ", (double)ecount / kvm->total_count * 100);
743 pr_info("%8.2f%% ", (double)etime / kvm->total_time * 100);
744 pr_info("%9.2fus ( +-%7.2f%% )", (double)etime / ecount/1e3,
745 kvm_event_rel_stddev(vcpu, event));
749 pr_info("\nTotal Samples:%" PRIu64 ", Total events handled time:%.2fus.\n\n",
750 kvm->total_count, kvm->total_time / 1e3);
752 if (kvm->lost_events)
753 pr_info("\nLost events: %" PRIu64 "\n\n", kvm->lost_events);
756 static int process_lost_event(struct perf_tool *tool,
757 union perf_event *event __maybe_unused,
758 struct perf_sample *sample __maybe_unused,
759 struct machine *machine __maybe_unused)
761 struct perf_kvm_stat *kvm = container_of(tool, struct perf_kvm_stat, tool);
767 static int process_sample_event(struct perf_tool *tool,
768 union perf_event *event,
769 struct perf_sample *sample,
770 struct perf_evsel *evsel,
771 struct machine *machine)
773 struct thread *thread = machine__findnew_thread(machine, sample->tid);
774 struct perf_kvm_stat *kvm = container_of(tool, struct perf_kvm_stat,
777 if (thread == NULL) {
778 pr_debug("problem processing %d event, skipping it.\n",
783 if (!handle_kvm_event(kvm, thread, evsel, sample))
789 static int cpu_isa_config(struct perf_kvm_stat *kvm)
791 char buf[64], *cpuid;
795 err = get_cpuid(buf, sizeof(buf));
797 pr_err("Failed to look up CPU type (Intel or AMD)\n");
802 cpuid = kvm->session->header.env.cpuid;
804 if (strstr(cpuid, "Intel"))
806 else if (strstr(cpuid, "AMD"))
809 pr_err("CPU %s is not supported.\n", cpuid);
814 kvm->exit_reasons = vmx_exit_reasons;
815 kvm->exit_reasons_size = ARRAY_SIZE(vmx_exit_reasons);
816 kvm->exit_reasons_isa = "VMX";
822 static bool verify_vcpu(int vcpu)
824 if (vcpu != -1 && vcpu < 0) {
825 pr_err("Invalid vcpu:%d.\n", vcpu);
832 /* keeping the max events to a modest level to keep
833 * the processing of samples per mmap smooth.
835 #define PERF_KVM__MAX_EVENTS_PER_MMAP 25
837 static s64 perf_kvm__mmap_read_idx(struct perf_kvm_stat *kvm, int idx,
840 union perf_event *event;
841 struct perf_sample sample;
845 *mmap_time = ULLONG_MAX;
846 while ((event = perf_evlist__mmap_read(kvm->evlist, idx)) != NULL) {
847 err = perf_evlist__parse_sample(kvm->evlist, event, &sample);
849 pr_err("Failed to parse sample\n");
853 err = perf_session_queue_event(kvm->session, event, &sample, 0);
855 pr_err("Failed to enqueue sample: %d\n", err);
859 /* save time stamp of our first sample for this mmap */
861 *mmap_time = sample.time;
863 /* limit events per mmap handled all at once */
865 if (n == PERF_KVM__MAX_EVENTS_PER_MMAP)
872 static int perf_kvm__mmap_read(struct perf_kvm_stat *kvm)
874 int i, err, throttled = 0;
876 u64 flush_time = ULLONG_MAX, mmap_time;
878 for (i = 0; i < kvm->evlist->nr_mmaps; i++) {
879 n = perf_kvm__mmap_read_idx(kvm, i, &mmap_time);
883 /* flush time is going to be the minimum of all the individual
884 * mmap times. Essentially, we flush all the samples queued up
885 * from the last pass under our minimal start time -- that leaves
886 * a very small race for samples to come in with a lower timestamp.
887 * The ioctl to return the perf_clock timestamp should close the
890 if (mmap_time < flush_time)
891 flush_time = mmap_time;
894 if (n == PERF_KVM__MAX_EVENTS_PER_MMAP)
898 /* flush queue after each round in which we processed events */
900 kvm->session->ordered_samples.next_flush = flush_time;
901 err = kvm->tool.finished_round(&kvm->tool, NULL, kvm->session);
903 if (kvm->lost_events)
904 pr_info("\nLost events: %" PRIu64 "\n\n",
913 static volatile int done;
915 static void sig_handler(int sig __maybe_unused)
920 static int perf_kvm__timerfd_create(struct perf_kvm_stat *kvm)
922 struct itimerspec new_value;
925 kvm->timerfd = timerfd_create(CLOCK_MONOTONIC, TFD_NONBLOCK);
926 if (kvm->timerfd < 0) {
927 pr_err("timerfd_create failed\n");
931 new_value.it_value.tv_sec = kvm->display_time;
932 new_value.it_value.tv_nsec = 0;
933 new_value.it_interval.tv_sec = kvm->display_time;
934 new_value.it_interval.tv_nsec = 0;
936 if (timerfd_settime(kvm->timerfd, 0, &new_value, NULL) != 0) {
937 pr_err("timerfd_settime failed: %d\n", errno);
947 static int perf_kvm__handle_timerfd(struct perf_kvm_stat *kvm)
952 rc = read(kvm->timerfd, &c, sizeof(uint64_t));
957 pr_err("Failed to read timer fd: %d\n", errno);
961 if (rc != sizeof(uint64_t)) {
962 pr_err("Error reading timer fd - invalid size returned\n");
967 pr_debug("Missed timer beats: %" PRIu64 "\n", c-1);
974 clear_events_cache_stats(kvm->kvm_events_cache);
975 kvm->total_count = 0;
977 kvm->lost_events = 0;
982 static int fd_set_nonblock(int fd)
986 arg = fcntl(fd, F_GETFL);
988 pr_err("Failed to get current flags for fd %d\n", fd);
992 if (fcntl(fd, F_SETFL, arg | O_NONBLOCK) < 0) {
993 pr_err("Failed to set non-block option on fd %d\n", fd);
1001 int perf_kvm__handle_stdin(struct termios *tc_now, struct termios *tc_save)
1005 tcsetattr(0, TCSANOW, tc_now);
1007 tcsetattr(0, TCSAFLUSH, tc_save);
1015 static int kvm_events_live_report(struct perf_kvm_stat *kvm)
1017 struct pollfd *pollfds = NULL;
1018 int nr_fds, nr_stdin, ret, err = -EINVAL;
1019 struct termios tc, save;
1021 /* live flag must be set first */
1024 ret = cpu_isa_config(kvm);
1028 if (!verify_vcpu(kvm->trace_vcpu) ||
1030 !register_kvm_events_ops(kvm)) {
1034 init_kvm_event_record(kvm);
1036 tcgetattr(0, &save);
1038 tc.c_lflag &= ~(ICANON | ECHO);
1042 signal(SIGINT, sig_handler);
1043 signal(SIGTERM, sig_handler);
1045 /* copy pollfds -- need to add timerfd and stdin */
1046 nr_fds = kvm->evlist->nr_fds;
1047 pollfds = zalloc(sizeof(struct pollfd) * (nr_fds + 2));
1052 memcpy(pollfds, kvm->evlist->pollfd,
1053 sizeof(struct pollfd) * kvm->evlist->nr_fds);
1056 if (perf_kvm__timerfd_create(kvm) < 0) {
1061 pollfds[nr_fds].fd = kvm->timerfd;
1062 pollfds[nr_fds].events = POLLIN;
1065 pollfds[nr_fds].fd = fileno(stdin);
1066 pollfds[nr_fds].events = POLLIN;
1069 if (fd_set_nonblock(fileno(stdin)) != 0)
1072 /* everything is good - enable the events and process */
1073 perf_evlist__enable(kvm->evlist);
1078 rc = perf_kvm__mmap_read(kvm);
1082 err = perf_kvm__handle_timerfd(kvm);
1086 if (pollfds[nr_stdin].revents & POLLIN)
1087 done = perf_kvm__handle_stdin(&tc, &save);
1090 err = poll(pollfds, nr_fds, 100);
1093 perf_evlist__disable(kvm->evlist);
1101 if (kvm->timerfd >= 0)
1102 close(kvm->timerfd);
1110 static int kvm_live_open_events(struct perf_kvm_stat *kvm)
1113 struct perf_evsel *pos;
1114 struct perf_evlist *evlist = kvm->evlist;
1116 perf_evlist__config(evlist, &kvm->opts);
1119 * Note: exclude_{guest,host} do not apply here.
1120 * This command processes KVM tracepoints from host only
1122 list_for_each_entry(pos, &evlist->entries, node) {
1123 struct perf_event_attr *attr = &pos->attr;
1125 /* make sure these *are* set */
1126 attr->sample_type |= PERF_SAMPLE_TID;
1127 attr->sample_type |= PERF_SAMPLE_TIME;
1128 attr->sample_type |= PERF_SAMPLE_CPU;
1129 attr->sample_type |= PERF_SAMPLE_RAW;
1130 /* make sure these are *not*; want as small a sample as possible */
1131 attr->sample_type &= ~PERF_SAMPLE_PERIOD;
1132 attr->sample_type &= ~PERF_SAMPLE_IP;
1133 attr->sample_type &= ~PERF_SAMPLE_CALLCHAIN;
1134 attr->sample_type &= ~PERF_SAMPLE_ADDR;
1135 attr->sample_type &= ~PERF_SAMPLE_READ;
1140 attr->sample_period = 1;
1142 attr->watermark = 0;
1143 attr->wakeup_events = 1000;
1145 /* will enable all once we are ready */
1149 err = perf_evlist__open(evlist);
1151 printf("Couldn't create the events: %s\n", strerror(errno));
1155 if (perf_evlist__mmap(evlist, kvm->opts.mmap_pages, false) < 0) {
1156 ui__error("Failed to mmap the events: %s\n", strerror(errno));
1157 perf_evlist__close(evlist);
1167 static int read_events(struct perf_kvm_stat *kvm)
1171 struct perf_tool eops = {
1172 .sample = process_sample_event,
1173 .comm = perf_event__process_comm,
1174 .ordered_samples = true,
1178 kvm->session = perf_session__new(kvm->file_name, O_RDONLY, 0, false,
1180 if (!kvm->session) {
1181 pr_err("Initializing perf session failed\n");
1185 if (!perf_session__has_traces(kvm->session, "kvm record"))
1189 * Do not use 'isa' recorded in kvm_exit tracepoint since it is not
1190 * traced in the old kernel.
1192 ret = cpu_isa_config(kvm);
1196 return perf_session__process_events(kvm->session, &kvm->tool);
1199 static int kvm_events_report_vcpu(struct perf_kvm_stat *kvm)
1202 int vcpu = kvm->trace_vcpu;
1204 if (!verify_vcpu(vcpu))
1207 if (!select_key(kvm))
1210 if (!register_kvm_events_ops(kvm))
1213 init_kvm_event_record(kvm);
1216 ret = read_events(kvm);
1227 static const char * const kvm_events_tp[] = {
1234 #define STRDUP_FAIL_EXIT(s) \
1243 kvm_events_record(struct perf_kvm_stat *kvm, int argc, const char **argv)
1245 unsigned int rec_argc, i, j;
1246 const char **rec_argv;
1247 const char * const record_args[] = {
1255 rec_argc = ARRAY_SIZE(record_args) + argc + 2 +
1256 2 * ARRAY_SIZE(kvm_events_tp);
1257 rec_argv = calloc(rec_argc + 1, sizeof(char *));
1259 if (rec_argv == NULL)
1262 for (i = 0; i < ARRAY_SIZE(record_args); i++)
1263 rec_argv[i] = STRDUP_FAIL_EXIT(record_args[i]);
1265 for (j = 0; j < ARRAY_SIZE(kvm_events_tp); j++) {
1266 rec_argv[i++] = "-e";
1267 rec_argv[i++] = STRDUP_FAIL_EXIT(kvm_events_tp[j]);
1270 rec_argv[i++] = STRDUP_FAIL_EXIT("-o");
1271 rec_argv[i++] = STRDUP_FAIL_EXIT(kvm->file_name);
1273 for (j = 1; j < (unsigned int)argc; j++, i++)
1274 rec_argv[i] = argv[j];
1276 return cmd_record(i, rec_argv, NULL);
1280 kvm_events_report(struct perf_kvm_stat *kvm, int argc, const char **argv)
1282 const struct option kvm_events_report_options[] = {
1283 OPT_STRING(0, "event", &kvm->report_event, "report event",
1284 "event for reporting: vmexit, mmio, ioport"),
1285 OPT_INTEGER(0, "vcpu", &kvm->trace_vcpu,
1286 "vcpu id to report"),
1287 OPT_STRING('k', "key", &kvm->sort_key, "sort-key",
1288 "key for sorting: sample(sort by samples number)"
1289 " time (sort by avg time)"),
1293 const char * const kvm_events_report_usage[] = {
1294 "perf kvm stat report [<options>]",
1301 argc = parse_options(argc, argv,
1302 kvm_events_report_options,
1303 kvm_events_report_usage, 0);
1305 usage_with_options(kvm_events_report_usage,
1306 kvm_events_report_options);
1309 return kvm_events_report_vcpu(kvm);
1312 static struct perf_evlist *kvm_live_event_list(void)
1314 struct perf_evlist *evlist;
1315 char *tp, *name, *sys;
1319 evlist = perf_evlist__new();
1323 for (j = 0; j < ARRAY_SIZE(kvm_events_tp); j++) {
1325 tp = strdup(kvm_events_tp[j]);
1329 /* split tracepoint into subsystem and name */
1331 name = strchr(tp, ':');
1333 pr_err("Error parsing %s tracepoint: subsystem delimiter not found\n",
1341 if (perf_evlist__add_newtp(evlist, sys, name, NULL)) {
1342 pr_err("Failed to add %s tracepoint to the list\n", kvm_events_tp[j]);
1354 perf_evlist__delete(evlist);
1361 static int kvm_events_live(struct perf_kvm_stat *kvm,
1362 int argc, const char **argv)
1364 char errbuf[BUFSIZ];
1367 const struct option live_options[] = {
1368 OPT_STRING('p', "pid", &kvm->opts.target.pid, "pid",
1369 "record events on existing process id"),
1370 OPT_UINTEGER('m', "mmap-pages", &kvm->opts.mmap_pages,
1371 "number of mmap data pages"),
1372 OPT_INCR('v', "verbose", &verbose,
1373 "be more verbose (show counter open errors, etc)"),
1374 OPT_BOOLEAN('a', "all-cpus", &kvm->opts.target.system_wide,
1375 "system-wide collection from all CPUs"),
1376 OPT_UINTEGER('d', "display", &kvm->display_time,
1377 "time in seconds between display updates"),
1378 OPT_STRING(0, "event", &kvm->report_event, "report event",
1379 "event for reporting: vmexit, mmio, ioport"),
1380 OPT_INTEGER(0, "vcpu", &kvm->trace_vcpu,
1381 "vcpu id to report"),
1382 OPT_STRING('k', "key", &kvm->sort_key, "sort-key",
1383 "key for sorting: sample(sort by samples number)"
1384 " time (sort by avg time)"),
1387 const char * const live_usage[] = {
1388 "perf kvm stat live [<options>]",
1393 /* event handling */
1394 kvm->tool.sample = process_sample_event;
1395 kvm->tool.comm = perf_event__process_comm;
1396 kvm->tool.exit = perf_event__process_exit;
1397 kvm->tool.fork = perf_event__process_fork;
1398 kvm->tool.lost = process_lost_event;
1399 kvm->tool.ordered_samples = true;
1400 perf_tool__fill_defaults(&kvm->tool);
1403 kvm->display_time = 1;
1404 kvm->opts.user_interval = 1;
1405 kvm->opts.mmap_pages = 512;
1406 kvm->opts.target.uses_mmap = false;
1407 kvm->opts.target.uid_str = NULL;
1408 kvm->opts.target.uid = UINT_MAX;
1411 disable_buildid_cache();
1414 setup_browser(false);
1417 argc = parse_options(argc, argv, live_options,
1420 usage_with_options(live_usage, live_options);
1424 * target related setups
1426 err = perf_target__validate(&kvm->opts.target);
1428 perf_target__strerror(&kvm->opts.target, err, errbuf, BUFSIZ);
1429 ui__warning("%s", errbuf);
1432 if (perf_target__none(&kvm->opts.target))
1433 kvm->opts.target.system_wide = true;
1437 * generate the event list
1439 kvm->evlist = kvm_live_event_list();
1440 if (kvm->evlist == NULL) {
1445 symbol_conf.nr_events = kvm->evlist->nr_entries;
1447 if (perf_evlist__create_maps(kvm->evlist, &kvm->opts.target) < 0)
1448 usage_with_options(live_usage, live_options);
1453 kvm->session = perf_session__new(NULL, O_WRONLY, false, false, &kvm->tool);
1454 if (kvm->session == NULL) {
1458 kvm->session->evlist = kvm->evlist;
1459 perf_session__set_id_hdr_size(kvm->session);
1462 if (perf_target__has_task(&kvm->opts.target))
1463 perf_event__synthesize_thread_map(&kvm->tool,
1464 kvm->evlist->threads,
1465 perf_event__process,
1466 &kvm->session->machines.host);
1468 perf_event__synthesize_threads(&kvm->tool, perf_event__process,
1469 &kvm->session->machines.host);
1472 err = kvm_live_open_events(kvm);
1476 err = kvm_events_live_report(kvm);
1482 perf_session__delete(kvm->session);
1483 kvm->session = NULL;
1485 perf_evlist__delete_maps(kvm->evlist);
1486 perf_evlist__delete(kvm->evlist);
1492 static void print_kvm_stat_usage(void)
1494 printf("Usage: perf kvm stat <command>\n\n");
1496 printf("# Available commands:\n");
1497 printf("\trecord: record kvm events\n");
1498 printf("\treport: report statistical data of kvm events\n");
1499 printf("\tlive: live reporting of statistical data of kvm events\n");
1501 printf("\nOtherwise, it is the alias of 'perf stat':\n");
1504 static int kvm_cmd_stat(const char *file_name, int argc, const char **argv)
1506 struct perf_kvm_stat kvm = {
1507 .file_name = file_name,
1510 .report_event = "vmexit",
1511 .sort_key = "sample",
1513 .exit_reasons = svm_exit_reasons,
1514 .exit_reasons_size = ARRAY_SIZE(svm_exit_reasons),
1515 .exit_reasons_isa = "SVM",
1519 print_kvm_stat_usage();
1523 if (!strncmp(argv[1], "rec", 3))
1524 return kvm_events_record(&kvm, argc - 1, argv + 1);
1526 if (!strncmp(argv[1], "rep", 3))
1527 return kvm_events_report(&kvm, argc - 1 , argv + 1);
1529 if (!strncmp(argv[1], "live", 4))
1530 return kvm_events_live(&kvm, argc - 1 , argv + 1);
1533 return cmd_stat(argc, argv, NULL);
1537 static int __cmd_record(const char *file_name, int argc, const char **argv)
1539 int rec_argc, i = 0, j;
1540 const char **rec_argv;
1542 rec_argc = argc + 2;
1543 rec_argv = calloc(rec_argc + 1, sizeof(char *));
1544 rec_argv[i++] = strdup("record");
1545 rec_argv[i++] = strdup("-o");
1546 rec_argv[i++] = strdup(file_name);
1547 for (j = 1; j < argc; j++, i++)
1548 rec_argv[i] = argv[j];
1550 BUG_ON(i != rec_argc);
1552 return cmd_record(i, rec_argv, NULL);
1555 static int __cmd_report(const char *file_name, int argc, const char **argv)
1557 int rec_argc, i = 0, j;
1558 const char **rec_argv;
1560 rec_argc = argc + 2;
1561 rec_argv = calloc(rec_argc + 1, sizeof(char *));
1562 rec_argv[i++] = strdup("report");
1563 rec_argv[i++] = strdup("-i");
1564 rec_argv[i++] = strdup(file_name);
1565 for (j = 1; j < argc; j++, i++)
1566 rec_argv[i] = argv[j];
1568 BUG_ON(i != rec_argc);
1570 return cmd_report(i, rec_argv, NULL);
1574 __cmd_buildid_list(const char *file_name, int argc, const char **argv)
1576 int rec_argc, i = 0, j;
1577 const char **rec_argv;
1579 rec_argc = argc + 2;
1580 rec_argv = calloc(rec_argc + 1, sizeof(char *));
1581 rec_argv[i++] = strdup("buildid-list");
1582 rec_argv[i++] = strdup("-i");
1583 rec_argv[i++] = strdup(file_name);
1584 for (j = 1; j < argc; j++, i++)
1585 rec_argv[i] = argv[j];
1587 BUG_ON(i != rec_argc);
1589 return cmd_buildid_list(i, rec_argv, NULL);
1592 int cmd_kvm(int argc, const char **argv, const char *prefix __maybe_unused)
1594 const char *file_name = NULL;
1595 const struct option kvm_options[] = {
1596 OPT_STRING('i', "input", &file_name, "file",
1598 OPT_STRING('o', "output", &file_name, "file",
1599 "Output file name"),
1600 OPT_BOOLEAN(0, "guest", &perf_guest,
1601 "Collect guest os data"),
1602 OPT_BOOLEAN(0, "host", &perf_host,
1603 "Collect host os data"),
1604 OPT_STRING(0, "guestmount", &symbol_conf.guestmount, "directory",
1605 "guest mount directory under which every guest os"
1606 " instance has a subdir"),
1607 OPT_STRING(0, "guestvmlinux", &symbol_conf.default_guest_vmlinux_name,
1608 "file", "file saving guest os vmlinux"),
1609 OPT_STRING(0, "guestkallsyms", &symbol_conf.default_guest_kallsyms,
1610 "file", "file saving guest os /proc/kallsyms"),
1611 OPT_STRING(0, "guestmodules", &symbol_conf.default_guest_modules,
1612 "file", "file saving guest os /proc/modules"),
1617 const char * const kvm_usage[] = {
1618 "perf kvm [<options>] {top|record|report|diff|buildid-list|stat}",
1625 argc = parse_options(argc, argv, kvm_options, kvm_usage,
1626 PARSE_OPT_STOP_AT_NON_OPTION);
1628 usage_with_options(kvm_usage, kvm_options);
1634 if (perf_host && !perf_guest)
1635 file_name = strdup("perf.data.host");
1636 else if (!perf_host && perf_guest)
1637 file_name = strdup("perf.data.guest");
1639 file_name = strdup("perf.data.kvm");
1642 pr_err("Failed to allocate memory for filename\n");
1647 if (!strncmp(argv[0], "rec", 3))
1648 return __cmd_record(file_name, argc, argv);
1649 else if (!strncmp(argv[0], "rep", 3))
1650 return __cmd_report(file_name, argc, argv);
1651 else if (!strncmp(argv[0], "diff", 4))
1652 return cmd_diff(argc, argv, NULL);
1653 else if (!strncmp(argv[0], "top", 3))
1654 return cmd_top(argc, argv, NULL);
1655 else if (!strncmp(argv[0], "buildid-list", 12))
1656 return __cmd_buildid_list(file_name, argc, argv);
1657 #if defined(__i386__) || defined(__x86_64__)
1658 else if (!strncmp(argv[0], "stat", 4))
1659 return kvm_cmd_stat(file_name, argc, argv);
1662 usage_with_options(kvm_usage, kvm_options);