Merge tag 'kvm-x86-pmu-6.6-fixes' of https://github.com/kvm-x86/linux into HEAD
authorPaolo Bonzini <pbonzini@redhat.com>
Sun, 15 Oct 2023 12:24:18 +0000 (08:24 -0400)
committerPaolo Bonzini <pbonzini@redhat.com>
Sun, 15 Oct 2023 12:24:18 +0000 (08:24 -0400)
KVM x86/pmu fixes for 6.6:

 - Truncate writes to PMU counters to the counter's width to avoid spurious
   overflows when emulating counter events in software.

 - Set the LVTPC entry mask bit when handling a PMI (to match Intel-defined
   architectural behavior).

 - Treat KVM_REQ_PMI as a wake event instead of queueing host IRQ work to
   kick the guest out of emulated halt.

arch/x86/include/asm/kvm_host.h
arch/x86/kvm/lapic.c
arch/x86/kvm/pmu.c
arch/x86/kvm/pmu.h
arch/x86/kvm/svm/pmu.c
arch/x86/kvm/vmx/pmu_intel.c
arch/x86/kvm/x86.c

index 17715cb..70d1394 100644 (file)
@@ -528,7 +528,6 @@ struct kvm_pmu {
        u64 raw_event_mask;
        struct kvm_pmc gp_counters[KVM_INTEL_PMC_MAX_GENERIC];
        struct kvm_pmc fixed_counters[KVM_PMC_MAX_FIXED];
-       struct irq_work irq_work;
 
        /*
         * Overlay the bitmap with a 64-bit atomic so that all bits can be
index dcd60b3..3e977db 100644 (file)
@@ -2759,13 +2759,17 @@ int kvm_apic_local_deliver(struct kvm_lapic *apic, int lvt_type)
 {
        u32 reg = kvm_lapic_get_reg(apic, lvt_type);
        int vector, mode, trig_mode;
+       int r;
 
        if (kvm_apic_hw_enabled(apic) && !(reg & APIC_LVT_MASKED)) {
                vector = reg & APIC_VECTOR_MASK;
                mode = reg & APIC_MODE_MASK;
                trig_mode = reg & APIC_LVT_LEVEL_TRIGGER;
-               return __apic_accept_irq(apic, mode, vector, 1, trig_mode,
-                                       NULL);
+
+               r = __apic_accept_irq(apic, mode, vector, 1, trig_mode, NULL);
+               if (r && lvt_type == APIC_LVTPC)
+                       kvm_lapic_set_reg(apic, APIC_LVTPC, reg | APIC_LVT_MASKED);
+               return r;
        }
        return 0;
 }
index edb89b5..9ae07db 100644 (file)
@@ -93,14 +93,6 @@ void kvm_pmu_ops_update(const struct kvm_pmu_ops *pmu_ops)
 #undef __KVM_X86_PMU_OP
 }
 
-static void kvm_pmi_trigger_fn(struct irq_work *irq_work)
-{
-       struct kvm_pmu *pmu = container_of(irq_work, struct kvm_pmu, irq_work);
-       struct kvm_vcpu *vcpu = pmu_to_vcpu(pmu);
-
-       kvm_pmu_deliver_pmi(vcpu);
-}
-
 static inline void __kvm_perf_overflow(struct kvm_pmc *pmc, bool in_pmi)
 {
        struct kvm_pmu *pmu = pmc_to_pmu(pmc);
@@ -124,20 +116,7 @@ static inline void __kvm_perf_overflow(struct kvm_pmc *pmc, bool in_pmi)
                __set_bit(pmc->idx, (unsigned long *)&pmu->global_status);
        }
 
-       if (!pmc->intr || skip_pmi)
-               return;
-
-       /*
-        * Inject PMI. If vcpu was in a guest mode during NMI PMI
-        * can be ejected on a guest mode re-entry. Otherwise we can't
-        * be sure that vcpu wasn't executing hlt instruction at the
-        * time of vmexit and is not going to re-enter guest mode until
-        * woken up. So we should wake it, but this is impossible from
-        * NMI context. Do it from irq work instead.
-        */
-       if (in_pmi && !kvm_handling_nmi_from_guest(pmc->vcpu))
-               irq_work_queue(&pmc_to_pmu(pmc)->irq_work);
-       else
+       if (pmc->intr && !skip_pmi)
                kvm_make_request(KVM_REQ_PMI, pmc->vcpu);
 }
 
@@ -675,9 +654,6 @@ void kvm_pmu_refresh(struct kvm_vcpu *vcpu)
 
 void kvm_pmu_reset(struct kvm_vcpu *vcpu)
 {
-       struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
-
-       irq_work_sync(&pmu->irq_work);
        static_call(kvm_x86_pmu_reset)(vcpu);
 }
 
@@ -687,7 +663,6 @@ void kvm_pmu_init(struct kvm_vcpu *vcpu)
 
        memset(pmu, 0, sizeof(*pmu));
        static_call(kvm_x86_pmu_init)(vcpu);
-       init_irq_work(&pmu->irq_work, kvm_pmi_trigger_fn);
        pmu->event_count = 0;
        pmu->need_cleanup = false;
        kvm_pmu_refresh(vcpu);
index 7d9ba30..1d64113 100644 (file)
@@ -74,6 +74,12 @@ static inline u64 pmc_read_counter(struct kvm_pmc *pmc)
        return counter & pmc_bitmask(pmc);
 }
 
+static inline void pmc_write_counter(struct kvm_pmc *pmc, u64 val)
+{
+       pmc->counter += val - pmc_read_counter(pmc);
+       pmc->counter &= pmc_bitmask(pmc);
+}
+
 static inline void pmc_release_perf_event(struct kvm_pmc *pmc)
 {
        if (pmc->perf_event) {
index cef5a3d..373ff6a 100644 (file)
@@ -160,7 +160,7 @@ static int amd_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
        /* MSR_PERFCTRn */
        pmc = get_gp_pmc_amd(pmu, msr, PMU_TYPE_COUNTER);
        if (pmc) {
-               pmc->counter += data - pmc_read_counter(pmc);
+               pmc_write_counter(pmc, data);
                pmc_update_sample_period(pmc);
                return 0;
        }
index f2efa0b..820d3e1 100644 (file)
@@ -436,11 +436,11 @@ static int intel_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
                        if (!msr_info->host_initiated &&
                            !(msr & MSR_PMC_FULL_WIDTH_BIT))
                                data = (s64)(s32)data;
-                       pmc->counter += data - pmc_read_counter(pmc);
+                       pmc_write_counter(pmc, data);
                        pmc_update_sample_period(pmc);
                        break;
                } else if ((pmc = get_fixed_pmc(pmu, msr))) {
-                       pmc->counter += data - pmc_read_counter(pmc);
+                       pmc_write_counter(pmc, data);
                        pmc_update_sample_period(pmc);
                        break;
                } else if ((pmc = get_gp_pmc(pmu, msr, MSR_P6_EVNTSEL0))) {
index 1e645f5..41cce50 100644 (file)
@@ -12854,6 +12854,9 @@ static inline bool kvm_vcpu_has_events(struct kvm_vcpu *vcpu)
                return true;
 #endif
 
+       if (kvm_test_request(KVM_REQ_PMI, vcpu))
+               return true;
+
        if (kvm_arch_interrupt_allowed(vcpu) &&
            (kvm_cpu_has_interrupt(vcpu) ||
            kvm_guest_apic_has_interrupt(vcpu)))