Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm
authorLinus Torvalds <torvalds@linux-foundation.org>
Fri, 23 Jun 2023 00:54:10 +0000 (17:54 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Fri, 23 Jun 2023 00:54:10 +0000 (17:54 -0700)
Pull kvm fixes from Paolo Bonzini:
 "ARM:

   - Correctly save/restore PMUSERNR_EL0 when host userspace is using
     PMU counters directly

   - Fix GICv2 emulation on GICv3 after the locking rework

   - Don't use smp_processor_id() in kvm_pmu_probe_armpmu(), and
     document why

  Generic:

   - Avoid setting page table entries pointing to a deleted memslot if a
     host page table entry is changed concurrently with the deletion"

* tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm:
  KVM: Avoid illegal stage2 mapping on invalid memory slot
  KVM: arm64: Use raw_smp_processor_id() in kvm_pmu_probe_armpmu()
  KVM: arm64: Restore GICv2-on-GICv3 functionality
  KVM: arm64: PMU: Don't overwrite PMUSERENR with vcpu loaded
  KVM: arm64: PMU: Restore the host's PMUSERENR_EL0

arch/arm/include/asm/arm_pmuv3.h
arch/arm64/include/asm/kvm_host.h
arch/arm64/kvm/hyp/include/hyp/switch.h
arch/arm64/kvm/hyp/vhe/switch.c
arch/arm64/kvm/pmu-emul.c
arch/arm64/kvm/pmu.c
arch/arm64/kvm/vgic/vgic-init.c
drivers/perf/arm_pmuv3.c
virt/kvm/kvm_main.c

index f4db3e7..f3cd04f 100644 (file)
@@ -222,6 +222,11 @@ static inline bool kvm_pmu_counter_deferred(struct perf_event_attr *attr)
        return false;
 }
 
+static inline bool kvm_set_pmuserenr(u64 val)
+{
+       return false;
+}
+
 /* PMU Version in DFR Register */
 #define ARMV8_PMU_DFR_VER_NI        0
 #define ARMV8_PMU_DFR_VER_V3P4      0x5
index 7e7e19e..9787503 100644 (file)
@@ -699,6 +699,8 @@ struct kvm_vcpu_arch {
 #define SYSREGS_ON_CPU         __vcpu_single_flag(sflags, BIT(4))
 /* Software step state is Active-pending */
 #define DBG_SS_ACTIVE_PENDING  __vcpu_single_flag(sflags, BIT(5))
+/* PMUSERENR for the guest EL0 is on physical CPU */
+#define PMUSERENR_ON_CPU       __vcpu_single_flag(sflags, BIT(6))
 
 
 /* Pointer to the vcpu's SVE FFR for sve_{save,load}_state() */
@@ -1065,9 +1067,14 @@ void kvm_arch_vcpu_put_debug_state_flags(struct kvm_vcpu *vcpu);
 #ifdef CONFIG_KVM
 void kvm_set_pmu_events(u32 set, struct perf_event_attr *attr);
 void kvm_clr_pmu_events(u32 clr);
+bool kvm_set_pmuserenr(u64 val);
 #else
 static inline void kvm_set_pmu_events(u32 set, struct perf_event_attr *attr) {}
 static inline void kvm_clr_pmu_events(u32 clr) {}
+static inline bool kvm_set_pmuserenr(u64 val)
+{
+       return false;
+}
 #endif
 
 void kvm_vcpu_load_sysregs_vhe(struct kvm_vcpu *vcpu);
index 5c15c58..4fe217e 100644 (file)
@@ -82,8 +82,14 @@ static inline void __activate_traps_common(struct kvm_vcpu *vcpu)
         * EL1 instead of being trapped to EL2.
         */
        if (kvm_arm_support_pmu_v3()) {
+               struct kvm_cpu_context *hctxt;
+
                write_sysreg(0, pmselr_el0);
+
+               hctxt = &this_cpu_ptr(&kvm_host_data)->host_ctxt;
+               ctxt_sys_reg(hctxt, PMUSERENR_EL0) = read_sysreg(pmuserenr_el0);
                write_sysreg(ARMV8_PMU_USERENR_MASK, pmuserenr_el0);
+               vcpu_set_flag(vcpu, PMUSERENR_ON_CPU);
        }
 
        vcpu->arch.mdcr_el2_host = read_sysreg(mdcr_el2);
@@ -106,8 +112,13 @@ static inline void __deactivate_traps_common(struct kvm_vcpu *vcpu)
        write_sysreg(vcpu->arch.mdcr_el2_host, mdcr_el2);
 
        write_sysreg(0, hstr_el2);
-       if (kvm_arm_support_pmu_v3())
-               write_sysreg(0, pmuserenr_el0);
+       if (kvm_arm_support_pmu_v3()) {
+               struct kvm_cpu_context *hctxt;
+
+               hctxt = &this_cpu_ptr(&kvm_host_data)->host_ctxt;
+               write_sysreg(ctxt_sys_reg(hctxt, PMUSERENR_EL0), pmuserenr_el0);
+               vcpu_clear_flag(vcpu, PMUSERENR_ON_CPU);
+       }
 
        if (cpus_have_final_cap(ARM64_SME)) {
                sysreg_clear_set_s(SYS_HFGRTR_EL2, 0,
index 7a1aa51..b37e7c9 100644 (file)
@@ -92,14 +92,28 @@ static void __deactivate_traps(struct kvm_vcpu *vcpu)
 }
 NOKPROBE_SYMBOL(__deactivate_traps);
 
+/*
+ * Disable IRQs in {activate,deactivate}_traps_vhe_{load,put}() to
+ * prevent a race condition between context switching of PMUSERENR_EL0
+ * in __{activate,deactivate}_traps_common() and IPIs that attempts to
+ * update PMUSERENR_EL0. See also kvm_set_pmuserenr().
+ */
 void activate_traps_vhe_load(struct kvm_vcpu *vcpu)
 {
+       unsigned long flags;
+
+       local_irq_save(flags);
        __activate_traps_common(vcpu);
+       local_irq_restore(flags);
 }
 
 void deactivate_traps_vhe_put(struct kvm_vcpu *vcpu)
 {
+       unsigned long flags;
+
+       local_irq_save(flags);
        __deactivate_traps_common(vcpu);
+       local_irq_restore(flags);
 }
 
 static const exit_handler_fn hyp_exit_handlers[] = {
index 491ca7e..5606509 100644 (file)
@@ -700,7 +700,25 @@ static struct arm_pmu *kvm_pmu_probe_armpmu(void)
 
        mutex_lock(&arm_pmus_lock);
 
-       cpu = smp_processor_id();
+       /*
+        * It is safe to use a stale cpu to iterate the list of PMUs so long as
+        * the same value is used for the entirety of the loop. Given this, and
+        * the fact that no percpu data is used for the lookup there is no need
+        * to disable preemption.
+        *
+        * It is still necessary to get a valid cpu, though, to probe for the
+        * default PMU instance as userspace is not required to specify a PMU
+        * type. In order to uphold the preexisting behavior KVM selects the
+        * PMU instance for the core where the first call to the
+        * KVM_ARM_VCPU_PMU_V3_CTRL attribute group occurs. A dependent use case
+        * would be a user with disdain of all things big.LITTLE that affines
+        * the VMM to a particular cluster of cores.
+        *
+        * In any case, userspace should just do the sane thing and use the UAPI
+        * to select a PMU type directly. But, be wary of the baggage being
+        * carried here.
+        */
+       cpu = raw_smp_processor_id();
        list_for_each_entry(entry, &arm_pmus, entry) {
                tmp = entry->arm_pmu;
 
index 7887133..121f1a1 100644 (file)
@@ -209,3 +209,30 @@ void kvm_vcpu_pmu_restore_host(struct kvm_vcpu *vcpu)
        kvm_vcpu_pmu_enable_el0(events_host);
        kvm_vcpu_pmu_disable_el0(events_guest);
 }
+
+/*
+ * With VHE, keep track of the PMUSERENR_EL0 value for the host EL0 on the pCPU
+ * where PMUSERENR_EL0 for the guest is loaded, since PMUSERENR_EL0 is switched
+ * to the value for the guest on vcpu_load().  The value for the host EL0
+ * will be restored on vcpu_put(), before returning to userspace.
+ * This isn't necessary for nVHE, as the register is context switched for
+ * every guest enter/exit.
+ *
+ * Return true if KVM takes care of the register. Otherwise return false.
+ */
+bool kvm_set_pmuserenr(u64 val)
+{
+       struct kvm_cpu_context *hctxt;
+       struct kvm_vcpu *vcpu;
+
+       if (!kvm_arm_support_pmu_v3() || !has_vhe())
+               return false;
+
+       vcpu = kvm_get_running_vcpu();
+       if (!vcpu || !vcpu_get_flag(vcpu, PMUSERENR_ON_CPU))
+               return false;
+
+       hctxt = &this_cpu_ptr(&kvm_host_data)->host_ctxt;
+       ctxt_sys_reg(hctxt, PMUSERENR_EL0) = val;
+       return true;
+}
index 6eafc2c..c8c3cb8 100644 (file)
@@ -446,6 +446,7 @@ int vgic_lazy_init(struct kvm *kvm)
 int kvm_vgic_map_resources(struct kvm *kvm)
 {
        struct vgic_dist *dist = &kvm->arch.vgic;
+       enum vgic_type type;
        gpa_t dist_base;
        int ret = 0;
 
@@ -460,10 +461,13 @@ int kvm_vgic_map_resources(struct kvm *kvm)
        if (!irqchip_in_kernel(kvm))
                goto out;
 
-       if (dist->vgic_model == KVM_DEV_TYPE_ARM_VGIC_V2)
+       if (dist->vgic_model == KVM_DEV_TYPE_ARM_VGIC_V2) {
                ret = vgic_v2_map_resources(kvm);
-       else
+               type = VGIC_V2;
+       } else {
                ret = vgic_v3_map_resources(kvm);
+               type = VGIC_V3;
+       }
 
        if (ret) {
                __kvm_vgic_destroy(kvm);
@@ -473,8 +477,7 @@ int kvm_vgic_map_resources(struct kvm *kvm)
        dist_base = dist->vgic_dist_base;
        mutex_unlock(&kvm->arch.config_lock);
 
-       ret = vgic_register_dist_iodev(kvm, dist_base,
-                                      kvm_vgic_global_state.type);
+       ret = vgic_register_dist_iodev(kvm, dist_base, type);
        if (ret) {
                kvm_err("Unable to register VGIC dist MMIO regions\n");
                kvm_vgic_destroy(kvm);
index c98e403..93b7edb 100644 (file)
@@ -677,9 +677,25 @@ static inline u32 armv8pmu_getreset_flags(void)
        return value;
 }
 
+static void update_pmuserenr(u64 val)
+{
+       lockdep_assert_irqs_disabled();
+
+       /*
+        * The current PMUSERENR_EL0 value might be the value for the guest.
+        * If that's the case, have KVM keep tracking of the register value
+        * for the host EL0 so that KVM can restore it before returning to
+        * the host EL0. Otherwise, update the register now.
+        */
+       if (kvm_set_pmuserenr(val))
+               return;
+
+       write_pmuserenr(val);
+}
+
 static void armv8pmu_disable_user_access(void)
 {
-       write_pmuserenr(0);
+       update_pmuserenr(0);
 }
 
 static void armv8pmu_enable_user_access(struct arm_pmu *cpu_pmu)
@@ -695,8 +711,7 @@ static void armv8pmu_enable_user_access(struct arm_pmu *cpu_pmu)
                        armv8pmu_write_evcntr(i, 0);
        }
 
-       write_pmuserenr(0);
-       write_pmuserenr(ARMV8_PMU_USERENR_ER | ARMV8_PMU_USERENR_CR);
+       update_pmuserenr(ARMV8_PMU_USERENR_ER | ARMV8_PMU_USERENR_CR);
 }
 
 static void armv8pmu_enable_event(struct perf_event *event)
index 479802a..65f94f5 100644 (file)
@@ -686,6 +686,24 @@ static __always_inline int kvm_handle_hva_range_no_flush(struct mmu_notifier *mn
 
        return __kvm_handle_hva_range(kvm, &range);
 }
+
+static bool kvm_change_spte_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
+{
+       /*
+        * Skipping invalid memslots is correct if and only change_pte() is
+        * surrounded by invalidate_range_{start,end}(), which is currently
+        * guaranteed by the primary MMU.  If that ever changes, KVM needs to
+        * unmap the memslot instead of skipping the memslot to ensure that KVM
+        * doesn't hold references to the old PFN.
+        */
+       WARN_ON_ONCE(!READ_ONCE(kvm->mn_active_invalidate_count));
+
+       if (range->slot->flags & KVM_MEMSLOT_INVALID)
+               return false;
+
+       return kvm_set_spte_gfn(kvm, range);
+}
+
 static void kvm_mmu_notifier_change_pte(struct mmu_notifier *mn,
                                        struct mm_struct *mm,
                                        unsigned long address,
@@ -707,7 +725,7 @@ static void kvm_mmu_notifier_change_pte(struct mmu_notifier *mn,
        if (!READ_ONCE(kvm->mmu_invalidate_in_progress))
                return;
 
-       kvm_handle_hva_range(mn, address, address + 1, pte, kvm_set_spte_gfn);
+       kvm_handle_hva_range(mn, address, address + 1, pte, kvm_change_spte_gfn);
 }
 
 void kvm_mmu_invalidate_begin(struct kvm *kvm, unsigned long start,