1 // SPDX-License-Identifier: GPL-2.0-only
3 * Copyright (C) 2015 Linaro Ltd.
4 * Author: Shannon Zhao <shannon.zhao@linaro.org>
9 #include <linux/kvm_host.h>
10 #include <linux/perf_event.h>
11 #include <linux/perf/arm_pmu.h>
12 #include <linux/uaccess.h>
13 #include <asm/kvm_emulate.h>
14 #include <kvm/arm_pmu.h>
15 #include <kvm/arm_vgic.h>
17 static void kvm_pmu_create_perf_event(struct kvm_vcpu *vcpu, u64 select_idx);
18 static void kvm_pmu_update_pmc_chained(struct kvm_vcpu *vcpu, u64 select_idx);
19 static void kvm_pmu_stop_counter(struct kvm_vcpu *vcpu, struct kvm_pmc *pmc);
21 #define PERF_ATTR_CFG1_KVM_PMU_CHAINED 0x1
23 static u32 kvm_pmu_event_mask(struct kvm *kvm)
25 switch (kvm->arch.pmuver) {
26 case ID_AA64DFR0_PMUVER_8_0:
28 case ID_AA64DFR0_PMUVER_8_1:
29 case ID_AA64DFR0_PMUVER_8_4:
30 case ID_AA64DFR0_PMUVER_8_5:
31 return GENMASK(15, 0);
32 default: /* Shouldn't be here, just for sanity */
33 WARN_ONCE(1, "Unknown PMU version %d\n", kvm->arch.pmuver);
39 * kvm_pmu_idx_is_64bit - determine if select_idx is a 64bit counter
40 * @vcpu: The vcpu pointer
41 * @select_idx: The counter index
43 static bool kvm_pmu_idx_is_64bit(struct kvm_vcpu *vcpu, u64 select_idx)
45 return (select_idx == ARMV8_PMU_CYCLE_IDX &&
46 __vcpu_sys_reg(vcpu, PMCR_EL0) & ARMV8_PMU_PMCR_LC);
49 static struct kvm_vcpu *kvm_pmc_to_vcpu(struct kvm_pmc *pmc)
52 struct kvm_vcpu_arch *vcpu_arch;
55 pmu = container_of(pmc, struct kvm_pmu, pmc[0]);
56 vcpu_arch = container_of(pmu, struct kvm_vcpu_arch, pmu);
57 return container_of(vcpu_arch, struct kvm_vcpu, arch);
61 * kvm_pmu_pmc_is_chained - determine if the pmc is chained
62 * @pmc: The PMU counter pointer
64 static bool kvm_pmu_pmc_is_chained(struct kvm_pmc *pmc)
66 struct kvm_vcpu *vcpu = kvm_pmc_to_vcpu(pmc);
68 return test_bit(pmc->idx >> 1, vcpu->arch.pmu.chained);
72 * kvm_pmu_idx_is_high_counter - determine if select_idx is a high/low counter
73 * @select_idx: The counter index
75 static bool kvm_pmu_idx_is_high_counter(u64 select_idx)
77 return select_idx & 0x1;
81 * kvm_pmu_get_canonical_pmc - obtain the canonical pmc
82 * @pmc: The PMU counter pointer
84 * When a pair of PMCs are chained together we use the low counter (canonical)
85 * to hold the underlying perf event.
87 static struct kvm_pmc *kvm_pmu_get_canonical_pmc(struct kvm_pmc *pmc)
89 if (kvm_pmu_pmc_is_chained(pmc) &&
90 kvm_pmu_idx_is_high_counter(pmc->idx))
95 static struct kvm_pmc *kvm_pmu_get_alternate_pmc(struct kvm_pmc *pmc)
97 if (kvm_pmu_idx_is_high_counter(pmc->idx))
104 * kvm_pmu_idx_has_chain_evtype - determine if the event type is chain
105 * @vcpu: The vcpu pointer
106 * @select_idx: The counter index
108 static bool kvm_pmu_idx_has_chain_evtype(struct kvm_vcpu *vcpu, u64 select_idx)
114 if (select_idx == ARMV8_PMU_CYCLE_IDX)
117 reg = PMEVTYPER0_EL0 + select_idx;
118 eventsel = __vcpu_sys_reg(vcpu, reg) & kvm_pmu_event_mask(vcpu->kvm);
120 return eventsel == ARMV8_PMUV3_PERFCTR_CHAIN;
124 * kvm_pmu_get_pair_counter_value - get PMU counter value
125 * @vcpu: The vcpu pointer
126 * @pmc: The PMU counter pointer
128 static u64 kvm_pmu_get_pair_counter_value(struct kvm_vcpu *vcpu,
131 u64 counter, counter_high, reg, enabled, running;
133 if (kvm_pmu_pmc_is_chained(pmc)) {
134 pmc = kvm_pmu_get_canonical_pmc(pmc);
135 reg = PMEVCNTR0_EL0 + pmc->idx;
137 counter = __vcpu_sys_reg(vcpu, reg);
138 counter_high = __vcpu_sys_reg(vcpu, reg + 1);
140 counter = lower_32_bits(counter) | (counter_high << 32);
142 reg = (pmc->idx == ARMV8_PMU_CYCLE_IDX)
143 ? PMCCNTR_EL0 : PMEVCNTR0_EL0 + pmc->idx;
144 counter = __vcpu_sys_reg(vcpu, reg);
148 * The real counter value is equal to the value of counter register plus
149 * the value perf event counts.
152 counter += perf_event_read_value(pmc->perf_event, &enabled,
159 * kvm_pmu_get_counter_value - get PMU counter value
160 * @vcpu: The vcpu pointer
161 * @select_idx: The counter index
163 u64 kvm_pmu_get_counter_value(struct kvm_vcpu *vcpu, u64 select_idx)
166 struct kvm_pmu *pmu = &vcpu->arch.pmu;
167 struct kvm_pmc *pmc = &pmu->pmc[select_idx];
169 counter = kvm_pmu_get_pair_counter_value(vcpu, pmc);
171 if (kvm_pmu_pmc_is_chained(pmc) &&
172 kvm_pmu_idx_is_high_counter(select_idx))
173 counter = upper_32_bits(counter);
174 else if (select_idx != ARMV8_PMU_CYCLE_IDX)
175 counter = lower_32_bits(counter);
181 * kvm_pmu_set_counter_value - set PMU counter value
182 * @vcpu: The vcpu pointer
183 * @select_idx: The counter index
184 * @val: The counter value
186 void kvm_pmu_set_counter_value(struct kvm_vcpu *vcpu, u64 select_idx, u64 val)
190 reg = (select_idx == ARMV8_PMU_CYCLE_IDX)
191 ? PMCCNTR_EL0 : PMEVCNTR0_EL0 + select_idx;
192 __vcpu_sys_reg(vcpu, reg) += (s64)val - kvm_pmu_get_counter_value(vcpu, select_idx);
194 /* Recreate the perf event to reflect the updated sample_period */
195 kvm_pmu_create_perf_event(vcpu, select_idx);
199 * kvm_pmu_release_perf_event - remove the perf event
200 * @pmc: The PMU counter pointer
202 static void kvm_pmu_release_perf_event(struct kvm_pmc *pmc)
204 pmc = kvm_pmu_get_canonical_pmc(pmc);
205 if (pmc->perf_event) {
206 perf_event_disable(pmc->perf_event);
207 perf_event_release_kernel(pmc->perf_event);
208 pmc->perf_event = NULL;
213 * kvm_pmu_stop_counter - stop PMU counter
214 * @pmc: The PMU counter pointer
216 * If this counter has been configured to monitor some event, release it here.
218 static void kvm_pmu_stop_counter(struct kvm_vcpu *vcpu, struct kvm_pmc *pmc)
220 u64 counter, reg, val;
222 pmc = kvm_pmu_get_canonical_pmc(pmc);
223 if (!pmc->perf_event)
226 counter = kvm_pmu_get_pair_counter_value(vcpu, pmc);
228 if (pmc->idx == ARMV8_PMU_CYCLE_IDX) {
232 reg = PMEVCNTR0_EL0 + pmc->idx;
233 val = lower_32_bits(counter);
236 __vcpu_sys_reg(vcpu, reg) = val;
238 if (kvm_pmu_pmc_is_chained(pmc))
239 __vcpu_sys_reg(vcpu, reg + 1) = upper_32_bits(counter);
241 kvm_pmu_release_perf_event(pmc);
245 * kvm_pmu_vcpu_init - assign pmu counter idx for cpu
246 * @vcpu: The vcpu pointer
249 void kvm_pmu_vcpu_init(struct kvm_vcpu *vcpu)
252 struct kvm_pmu *pmu = &vcpu->arch.pmu;
254 for (i = 0; i < ARMV8_PMU_MAX_COUNTERS; i++)
259 * kvm_pmu_vcpu_reset - reset pmu state for cpu
260 * @vcpu: The vcpu pointer
263 void kvm_pmu_vcpu_reset(struct kvm_vcpu *vcpu)
265 unsigned long mask = kvm_pmu_valid_counter_mask(vcpu);
266 struct kvm_pmu *pmu = &vcpu->arch.pmu;
269 for_each_set_bit(i, &mask, 32)
270 kvm_pmu_stop_counter(vcpu, &pmu->pmc[i]);
272 bitmap_zero(vcpu->arch.pmu.chained, ARMV8_PMU_MAX_COUNTER_PAIRS);
276 * kvm_pmu_vcpu_destroy - free perf event of PMU for cpu
277 * @vcpu: The vcpu pointer
280 void kvm_pmu_vcpu_destroy(struct kvm_vcpu *vcpu)
283 struct kvm_pmu *pmu = &vcpu->arch.pmu;
285 for (i = 0; i < ARMV8_PMU_MAX_COUNTERS; i++)
286 kvm_pmu_release_perf_event(&pmu->pmc[i]);
287 irq_work_sync(&vcpu->arch.pmu.overflow_work);
290 u64 kvm_pmu_valid_counter_mask(struct kvm_vcpu *vcpu)
292 u64 val = __vcpu_sys_reg(vcpu, PMCR_EL0) >> ARMV8_PMU_PMCR_N_SHIFT;
294 val &= ARMV8_PMU_PMCR_N_MASK;
296 return BIT(ARMV8_PMU_CYCLE_IDX);
298 return GENMASK(val - 1, 0) | BIT(ARMV8_PMU_CYCLE_IDX);
302 * kvm_pmu_enable_counter_mask - enable selected PMU counters
303 * @vcpu: The vcpu pointer
304 * @val: the value guest writes to PMCNTENSET register
306 * Call perf_event_enable to start counting the perf event
308 void kvm_pmu_enable_counter_mask(struct kvm_vcpu *vcpu, u64 val)
311 struct kvm_pmu *pmu = &vcpu->arch.pmu;
314 if (!(__vcpu_sys_reg(vcpu, PMCR_EL0) & ARMV8_PMU_PMCR_E) || !val)
317 for (i = 0; i < ARMV8_PMU_MAX_COUNTERS; i++) {
323 /* A change in the enable state may affect the chain state */
324 kvm_pmu_update_pmc_chained(vcpu, i);
325 kvm_pmu_create_perf_event(vcpu, i);
327 /* At this point, pmc must be the canonical */
328 if (pmc->perf_event) {
329 perf_event_enable(pmc->perf_event);
330 if (pmc->perf_event->state != PERF_EVENT_STATE_ACTIVE)
331 kvm_debug("fail to enable perf event\n");
337 * kvm_pmu_disable_counter_mask - disable selected PMU counters
338 * @vcpu: The vcpu pointer
339 * @val: the value guest writes to PMCNTENCLR register
341 * Call perf_event_disable to stop counting the perf event
343 void kvm_pmu_disable_counter_mask(struct kvm_vcpu *vcpu, u64 val)
346 struct kvm_pmu *pmu = &vcpu->arch.pmu;
352 for (i = 0; i < ARMV8_PMU_MAX_COUNTERS; i++) {
358 /* A change in the enable state may affect the chain state */
359 kvm_pmu_update_pmc_chained(vcpu, i);
360 kvm_pmu_create_perf_event(vcpu, i);
362 /* At this point, pmc must be the canonical */
364 perf_event_disable(pmc->perf_event);
368 static u64 kvm_pmu_overflow_status(struct kvm_vcpu *vcpu)
372 if ((__vcpu_sys_reg(vcpu, PMCR_EL0) & ARMV8_PMU_PMCR_E)) {
373 reg = __vcpu_sys_reg(vcpu, PMOVSSET_EL0);
374 reg &= __vcpu_sys_reg(vcpu, PMCNTENSET_EL0);
375 reg &= __vcpu_sys_reg(vcpu, PMINTENSET_EL1);
381 static void kvm_pmu_update_state(struct kvm_vcpu *vcpu)
383 struct kvm_pmu *pmu = &vcpu->arch.pmu;
386 if (!kvm_vcpu_has_pmu(vcpu))
389 overflow = !!kvm_pmu_overflow_status(vcpu);
390 if (pmu->irq_level == overflow)
393 pmu->irq_level = overflow;
395 if (likely(irqchip_in_kernel(vcpu->kvm))) {
396 int ret = kvm_vgic_inject_irq(vcpu->kvm, vcpu->vcpu_id,
397 pmu->irq_num, overflow, pmu);
402 bool kvm_pmu_should_notify_user(struct kvm_vcpu *vcpu)
404 struct kvm_pmu *pmu = &vcpu->arch.pmu;
405 struct kvm_sync_regs *sregs = &vcpu->run->s.regs;
406 bool run_level = sregs->device_irq_level & KVM_ARM_DEV_PMU;
408 if (likely(irqchip_in_kernel(vcpu->kvm)))
411 return pmu->irq_level != run_level;
415 * Reflect the PMU overflow interrupt output level into the kvm_run structure
417 void kvm_pmu_update_run(struct kvm_vcpu *vcpu)
419 struct kvm_sync_regs *regs = &vcpu->run->s.regs;
421 /* Populate the timer bitmap for user space */
422 regs->device_irq_level &= ~KVM_ARM_DEV_PMU;
423 if (vcpu->arch.pmu.irq_level)
424 regs->device_irq_level |= KVM_ARM_DEV_PMU;
428 * kvm_pmu_flush_hwstate - flush pmu state to cpu
429 * @vcpu: The vcpu pointer
431 * Check if the PMU has overflowed while we were running in the host, and inject
432 * an interrupt if that was the case.
434 void kvm_pmu_flush_hwstate(struct kvm_vcpu *vcpu)
436 kvm_pmu_update_state(vcpu);
440 * kvm_pmu_sync_hwstate - sync pmu state from cpu
441 * @vcpu: The vcpu pointer
443 * Check if the PMU has overflowed while we were running in the guest, and
444 * inject an interrupt if that was the case.
446 void kvm_pmu_sync_hwstate(struct kvm_vcpu *vcpu)
448 kvm_pmu_update_state(vcpu);
452 * When perf interrupt is an NMI, we cannot safely notify the vcpu corresponding
454 * This is why we need a callback to do it once outside of the NMI context.
456 static void kvm_pmu_perf_overflow_notify_vcpu(struct irq_work *work)
458 struct kvm_vcpu *vcpu;
461 pmu = container_of(work, struct kvm_pmu, overflow_work);
462 vcpu = kvm_pmc_to_vcpu(pmu->pmc);
468 * When the perf event overflows, set the overflow status and inform the vcpu.
470 static void kvm_pmu_perf_overflow(struct perf_event *perf_event,
471 struct perf_sample_data *data,
472 struct pt_regs *regs)
474 struct kvm_pmc *pmc = perf_event->overflow_handler_context;
475 struct arm_pmu *cpu_pmu = to_arm_pmu(perf_event->pmu);
476 struct kvm_vcpu *vcpu = kvm_pmc_to_vcpu(pmc);
480 cpu_pmu->pmu.stop(perf_event, PERF_EF_UPDATE);
483 * Reset the sample period to the architectural limit,
484 * i.e. the point where the counter overflows.
486 period = -(local64_read(&perf_event->count));
488 if (!kvm_pmu_idx_is_64bit(vcpu, pmc->idx))
489 period &= GENMASK(31, 0);
491 local64_set(&perf_event->hw.period_left, 0);
492 perf_event->attr.sample_period = period;
493 perf_event->hw.sample_period = period;
495 __vcpu_sys_reg(vcpu, PMOVSSET_EL0) |= BIT(idx);
497 if (kvm_pmu_overflow_status(vcpu)) {
498 kvm_make_request(KVM_REQ_IRQ_PENDING, vcpu);
503 irq_work_queue(&vcpu->arch.pmu.overflow_work);
506 cpu_pmu->pmu.start(perf_event, PERF_EF_RELOAD);
510 * kvm_pmu_software_increment - do software increment
511 * @vcpu: The vcpu pointer
512 * @val: the value guest writes to PMSWINC register
514 void kvm_pmu_software_increment(struct kvm_vcpu *vcpu, u64 val)
516 struct kvm_pmu *pmu = &vcpu->arch.pmu;
519 if (!(__vcpu_sys_reg(vcpu, PMCR_EL0) & ARMV8_PMU_PMCR_E))
522 /* Weed out disabled counters */
523 val &= __vcpu_sys_reg(vcpu, PMCNTENSET_EL0);
525 for (i = 0; i < ARMV8_PMU_CYCLE_IDX; i++) {
531 /* PMSWINC only applies to ... SW_INC! */
532 type = __vcpu_sys_reg(vcpu, PMEVTYPER0_EL0 + i);
533 type &= kvm_pmu_event_mask(vcpu->kvm);
534 if (type != ARMV8_PMUV3_PERFCTR_SW_INCR)
537 /* increment this even SW_INC counter */
538 reg = __vcpu_sys_reg(vcpu, PMEVCNTR0_EL0 + i) + 1;
539 reg = lower_32_bits(reg);
540 __vcpu_sys_reg(vcpu, PMEVCNTR0_EL0 + i) = reg;
542 if (reg) /* no overflow on the low part */
545 if (kvm_pmu_pmc_is_chained(&pmu->pmc[i])) {
546 /* increment the high counter */
547 reg = __vcpu_sys_reg(vcpu, PMEVCNTR0_EL0 + i + 1) + 1;
548 reg = lower_32_bits(reg);
549 __vcpu_sys_reg(vcpu, PMEVCNTR0_EL0 + i + 1) = reg;
550 if (!reg) /* mark overflow on the high counter */
551 __vcpu_sys_reg(vcpu, PMOVSSET_EL0) |= BIT(i + 1);
553 /* mark overflow on low counter */
554 __vcpu_sys_reg(vcpu, PMOVSSET_EL0) |= BIT(i);
560 * kvm_pmu_handle_pmcr - handle PMCR register
561 * @vcpu: The vcpu pointer
562 * @val: the value guest writes to PMCR register
564 void kvm_pmu_handle_pmcr(struct kvm_vcpu *vcpu, u64 val)
568 if (val & ARMV8_PMU_PMCR_E) {
569 kvm_pmu_enable_counter_mask(vcpu,
570 __vcpu_sys_reg(vcpu, PMCNTENSET_EL0));
572 kvm_pmu_disable_counter_mask(vcpu,
573 __vcpu_sys_reg(vcpu, PMCNTENSET_EL0));
576 if (val & ARMV8_PMU_PMCR_C)
577 kvm_pmu_set_counter_value(vcpu, ARMV8_PMU_CYCLE_IDX, 0);
579 if (val & ARMV8_PMU_PMCR_P) {
580 unsigned long mask = kvm_pmu_valid_counter_mask(vcpu);
581 mask &= ~BIT(ARMV8_PMU_CYCLE_IDX);
582 for_each_set_bit(i, &mask, 32)
583 kvm_pmu_set_counter_value(vcpu, i, 0);
587 static bool kvm_pmu_counter_is_enabled(struct kvm_vcpu *vcpu, u64 select_idx)
589 return (__vcpu_sys_reg(vcpu, PMCR_EL0) & ARMV8_PMU_PMCR_E) &&
590 (__vcpu_sys_reg(vcpu, PMCNTENSET_EL0) & BIT(select_idx));
594 * kvm_pmu_create_perf_event - create a perf event for a counter
595 * @vcpu: The vcpu pointer
596 * @select_idx: The number of selected counter
598 static void kvm_pmu_create_perf_event(struct kvm_vcpu *vcpu, u64 select_idx)
600 struct kvm_pmu *pmu = &vcpu->arch.pmu;
602 struct perf_event *event;
603 struct perf_event_attr attr;
604 u64 eventsel, counter, reg, data;
607 * For chained counters the event type and filtering attributes are
608 * obtained from the low/even counter. We also use this counter to
609 * determine if the event is enabled/disabled.
611 pmc = kvm_pmu_get_canonical_pmc(&pmu->pmc[select_idx]);
613 reg = (pmc->idx == ARMV8_PMU_CYCLE_IDX)
614 ? PMCCFILTR_EL0 : PMEVTYPER0_EL0 + pmc->idx;
615 data = __vcpu_sys_reg(vcpu, reg);
617 kvm_pmu_stop_counter(vcpu, pmc);
618 if (pmc->idx == ARMV8_PMU_CYCLE_IDX)
619 eventsel = ARMV8_PMUV3_PERFCTR_CPU_CYCLES;
621 eventsel = data & kvm_pmu_event_mask(vcpu->kvm);
623 /* Software increment event doesn't need to be backed by a perf event */
624 if (eventsel == ARMV8_PMUV3_PERFCTR_SW_INCR)
628 * If we have a filter in place and that the event isn't allowed, do
629 * not install a perf event either.
631 if (vcpu->kvm->arch.pmu_filter &&
632 !test_bit(eventsel, vcpu->kvm->arch.pmu_filter))
635 memset(&attr, 0, sizeof(struct perf_event_attr));
636 attr.type = PERF_TYPE_RAW;
637 attr.size = sizeof(attr);
639 attr.disabled = !kvm_pmu_counter_is_enabled(vcpu, pmc->idx);
640 attr.exclude_user = data & ARMV8_PMU_EXCLUDE_EL0 ? 1 : 0;
641 attr.exclude_kernel = data & ARMV8_PMU_EXCLUDE_EL1 ? 1 : 0;
642 attr.exclude_hv = 1; /* Don't count EL2 events */
643 attr.exclude_host = 1; /* Don't count host events */
644 attr.config = eventsel;
646 counter = kvm_pmu_get_pair_counter_value(vcpu, pmc);
648 if (kvm_pmu_pmc_is_chained(pmc)) {
650 * The initial sample period (overflow count) of an event. For
651 * chained counters we only support overflow interrupts on the
654 attr.sample_period = (-counter) & GENMASK(63, 0);
655 attr.config1 |= PERF_ATTR_CFG1_KVM_PMU_CHAINED;
657 event = perf_event_create_kernel_counter(&attr, -1, current,
658 kvm_pmu_perf_overflow,
661 /* The initial sample period (overflow count) of an event. */
662 if (kvm_pmu_idx_is_64bit(vcpu, pmc->idx))
663 attr.sample_period = (-counter) & GENMASK(63, 0);
665 attr.sample_period = (-counter) & GENMASK(31, 0);
667 event = perf_event_create_kernel_counter(&attr, -1, current,
668 kvm_pmu_perf_overflow, pmc);
672 pr_err_once("kvm: pmu event creation failed %ld\n",
677 pmc->perf_event = event;
681 * kvm_pmu_update_pmc_chained - update chained bitmap
682 * @vcpu: The vcpu pointer
683 * @select_idx: The number of selected counter
685 * Update the chained bitmap based on the event type written in the
686 * typer register and the enable state of the odd register.
688 static void kvm_pmu_update_pmc_chained(struct kvm_vcpu *vcpu, u64 select_idx)
690 struct kvm_pmu *pmu = &vcpu->arch.pmu;
691 struct kvm_pmc *pmc = &pmu->pmc[select_idx], *canonical_pmc;
692 bool new_state, old_state;
694 old_state = kvm_pmu_pmc_is_chained(pmc);
695 new_state = kvm_pmu_idx_has_chain_evtype(vcpu, pmc->idx) &&
696 kvm_pmu_counter_is_enabled(vcpu, pmc->idx | 0x1);
698 if (old_state == new_state)
701 canonical_pmc = kvm_pmu_get_canonical_pmc(pmc);
702 kvm_pmu_stop_counter(vcpu, canonical_pmc);
705 * During promotion from !chained to chained we must ensure
706 * the adjacent counter is stopped and its event destroyed
708 kvm_pmu_stop_counter(vcpu, kvm_pmu_get_alternate_pmc(pmc));
709 set_bit(pmc->idx >> 1, vcpu->arch.pmu.chained);
712 clear_bit(pmc->idx >> 1, vcpu->arch.pmu.chained);
716 * kvm_pmu_set_counter_event_type - set selected counter to monitor some event
717 * @vcpu: The vcpu pointer
718 * @data: The data guest writes to PMXEVTYPER_EL0
719 * @select_idx: The number of selected counter
721 * When OS accesses PMXEVTYPER_EL0, that means it wants to set a PMC to count an
722 * event with given hardware event number. Here we call perf_event API to
723 * emulate this action and create a kernel perf event for it.
725 void kvm_pmu_set_counter_event_type(struct kvm_vcpu *vcpu, u64 data,
730 mask = ARMV8_PMU_EVTYPE_MASK;
731 mask &= ~ARMV8_PMU_EVTYPE_EVENT;
732 mask |= kvm_pmu_event_mask(vcpu->kvm);
734 reg = (select_idx == ARMV8_PMU_CYCLE_IDX)
735 ? PMCCFILTR_EL0 : PMEVTYPER0_EL0 + select_idx;
737 __vcpu_sys_reg(vcpu, reg) = data & mask;
739 kvm_pmu_update_pmc_chained(vcpu, select_idx);
740 kvm_pmu_create_perf_event(vcpu, select_idx);
743 void kvm_host_pmu_init(struct arm_pmu *pmu)
745 if (pmu->pmuver != 0 && pmu->pmuver != ID_AA64DFR0_PMUVER_IMP_DEF &&
746 !kvm_arm_support_pmu_v3() && !is_protected_kvm_enabled())
747 static_branch_enable(&kvm_arm_pmu_available);
750 static int kvm_pmu_probe_pmuver(void)
752 struct perf_event_attr attr = { };
753 struct perf_event *event;
755 int pmuver = ID_AA64DFR0_PMUVER_IMP_DEF;
758 * Create a dummy event that only counts user cycles. As we'll never
759 * leave this function with the event being live, it will never
760 * count anything. But it allows us to probe some of the PMU
761 * details. Yes, this is terrible.
763 attr.type = PERF_TYPE_RAW;
764 attr.size = sizeof(attr);
767 attr.exclude_user = 0;
768 attr.exclude_kernel = 1;
770 attr.exclude_host = 1;
771 attr.config = ARMV8_PMUV3_PERFCTR_CPU_CYCLES;
772 attr.sample_period = GENMASK(63, 0);
774 event = perf_event_create_kernel_counter(&attr, -1, current,
775 kvm_pmu_perf_overflow, &attr);
778 pr_err_once("kvm: pmu event creation failed %ld\n",
780 return ID_AA64DFR0_PMUVER_IMP_DEF;
784 pmu = to_arm_pmu(event->pmu);
786 pmuver = pmu->pmuver;
789 perf_event_disable(event);
790 perf_event_release_kernel(event);
795 u64 kvm_pmu_get_pmceid(struct kvm_vcpu *vcpu, bool pmceid1)
797 unsigned long *bmap = vcpu->kvm->arch.pmu_filter;
799 int base, i, nr_events;
802 val = read_sysreg(pmceid0_el0);
805 val = read_sysreg(pmceid1_el0);
807 * Don't advertise STALL_SLOT, as PMMIR_EL0 is handled
810 if (vcpu->kvm->arch.pmuver >= ID_AA64DFR0_PMUVER_8_4)
811 val &= ~BIT_ULL(ARMV8_PMUV3_PERFCTR_STALL_SLOT - 32);
818 nr_events = kvm_pmu_event_mask(vcpu->kvm) + 1;
820 for (i = 0; i < 32; i += 8) {
823 byte = bitmap_get_value8(bmap, base + i);
825 if (nr_events >= (0x4000 + base + 32)) {
826 byte = bitmap_get_value8(bmap, 0x4000 + base + i);
827 mask |= byte << (32 + i);
834 int kvm_arm_pmu_v3_enable(struct kvm_vcpu *vcpu)
836 if (!kvm_vcpu_has_pmu(vcpu))
839 if (!vcpu->arch.pmu.created)
843 * A valid interrupt configuration for the PMU is either to have a
844 * properly configured interrupt number and using an in-kernel
845 * irqchip, or to not have an in-kernel GIC and not set an IRQ.
847 if (irqchip_in_kernel(vcpu->kvm)) {
848 int irq = vcpu->arch.pmu.irq_num;
850 * If we are using an in-kernel vgic, at this point we know
851 * the vgic will be initialized, so we can check the PMU irq
852 * number against the dimensions of the vgic and make sure
855 if (!irq_is_ppi(irq) && !vgic_valid_spi(vcpu->kvm, irq))
857 } else if (kvm_arm_pmu_irq_initialized(vcpu)) {
861 /* One-off reload of the PMU on first run */
862 kvm_make_request(KVM_REQ_RELOAD_PMU, vcpu);
867 static int kvm_arm_pmu_v3_init(struct kvm_vcpu *vcpu)
869 if (irqchip_in_kernel(vcpu->kvm)) {
873 * If using the PMU with an in-kernel virtual GIC
874 * implementation, we require the GIC to be already
875 * initialized when initializing the PMU.
877 if (!vgic_initialized(vcpu->kvm))
880 if (!kvm_arm_pmu_irq_initialized(vcpu))
883 ret = kvm_vgic_set_owner(vcpu, vcpu->arch.pmu.irq_num,
889 init_irq_work(&vcpu->arch.pmu.overflow_work,
890 kvm_pmu_perf_overflow_notify_vcpu);
892 vcpu->arch.pmu.created = true;
897 * For one VM the interrupt type must be same for each vcpu.
898 * As a PPI, the interrupt number is the same for all vcpus,
899 * while as an SPI it must be a separate number per vcpu.
901 static bool pmu_irq_is_valid(struct kvm *kvm, int irq)
904 struct kvm_vcpu *vcpu;
906 kvm_for_each_vcpu(i, vcpu, kvm) {
907 if (!kvm_arm_pmu_irq_initialized(vcpu))
910 if (irq_is_ppi(irq)) {
911 if (vcpu->arch.pmu.irq_num != irq)
914 if (vcpu->arch.pmu.irq_num == irq)
922 int kvm_arm_pmu_v3_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr)
924 if (!kvm_vcpu_has_pmu(vcpu))
927 if (vcpu->arch.pmu.created)
930 if (!vcpu->kvm->arch.pmuver)
931 vcpu->kvm->arch.pmuver = kvm_pmu_probe_pmuver();
933 if (vcpu->kvm->arch.pmuver == ID_AA64DFR0_PMUVER_IMP_DEF)
936 switch (attr->attr) {
937 case KVM_ARM_VCPU_PMU_V3_IRQ: {
938 int __user *uaddr = (int __user *)(long)attr->addr;
941 if (!irqchip_in_kernel(vcpu->kvm))
944 if (get_user(irq, uaddr))
947 /* The PMU overflow interrupt can be a PPI or a valid SPI. */
948 if (!(irq_is_ppi(irq) || irq_is_spi(irq)))
951 if (!pmu_irq_is_valid(vcpu->kvm, irq))
954 if (kvm_arm_pmu_irq_initialized(vcpu))
957 kvm_debug("Set kvm ARM PMU irq: %d\n", irq);
958 vcpu->arch.pmu.irq_num = irq;
961 case KVM_ARM_VCPU_PMU_V3_FILTER: {
962 struct kvm_pmu_event_filter __user *uaddr;
963 struct kvm_pmu_event_filter filter;
966 nr_events = kvm_pmu_event_mask(vcpu->kvm) + 1;
968 uaddr = (struct kvm_pmu_event_filter __user *)(long)attr->addr;
970 if (copy_from_user(&filter, uaddr, sizeof(filter)))
973 if (((u32)filter.base_event + filter.nevents) > nr_events ||
974 (filter.action != KVM_PMU_EVENT_ALLOW &&
975 filter.action != KVM_PMU_EVENT_DENY))
978 mutex_lock(&vcpu->kvm->lock);
980 if (!vcpu->kvm->arch.pmu_filter) {
981 vcpu->kvm->arch.pmu_filter = bitmap_alloc(nr_events, GFP_KERNEL);
982 if (!vcpu->kvm->arch.pmu_filter) {
983 mutex_unlock(&vcpu->kvm->lock);
988 * The default depends on the first applied filter.
989 * If it allows events, the default is to deny.
990 * Conversely, if the first filter denies a set of
991 * events, the default is to allow.
993 if (filter.action == KVM_PMU_EVENT_ALLOW)
994 bitmap_zero(vcpu->kvm->arch.pmu_filter, nr_events);
996 bitmap_fill(vcpu->kvm->arch.pmu_filter, nr_events);
999 if (filter.action == KVM_PMU_EVENT_ALLOW)
1000 bitmap_set(vcpu->kvm->arch.pmu_filter, filter.base_event, filter.nevents);
1002 bitmap_clear(vcpu->kvm->arch.pmu_filter, filter.base_event, filter.nevents);
1004 mutex_unlock(&vcpu->kvm->lock);
1008 case KVM_ARM_VCPU_PMU_V3_INIT:
1009 return kvm_arm_pmu_v3_init(vcpu);
1015 int kvm_arm_pmu_v3_get_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr)
1017 switch (attr->attr) {
1018 case KVM_ARM_VCPU_PMU_V3_IRQ: {
1019 int __user *uaddr = (int __user *)(long)attr->addr;
1022 if (!irqchip_in_kernel(vcpu->kvm))
1025 if (!kvm_vcpu_has_pmu(vcpu))
1028 if (!kvm_arm_pmu_irq_initialized(vcpu))
1031 irq = vcpu->arch.pmu.irq_num;
1032 return put_user(irq, uaddr);
1039 int kvm_arm_pmu_v3_has_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr)
1041 switch (attr->attr) {
1042 case KVM_ARM_VCPU_PMU_V3_IRQ:
1043 case KVM_ARM_VCPU_PMU_V3_INIT:
1044 case KVM_ARM_VCPU_PMU_V3_FILTER:
1045 if (kvm_vcpu_has_pmu(vcpu))