KVM: arm64: Use local TLBI on permission relaxation
authorMarc Zyngier <maz@kernel.org>
Wed, 26 Apr 2023 17:23:30 +0000 (17:23 +0000)
committerOliver Upton <oliver.upton@linux.dev>
Tue, 16 May 2023 17:39:19 +0000 (17:39 +0000)
Broadcast TLB invalidations (TLBIs) targeting the Inner Shareable
Domain are usually less performant than their non-shareable variant.
In particular, we observed some implementations that take
millliseconds to complete parallel broadcasted TLBIs.

It's safe to use non-shareable TLBIs when relaxing permissions on a
PTE in the KVM case.  According to the ARM ARM (0487I.a) section
D8.13.1 "Using break-before-make when updating translation table
entries", permission relaxation does not need break-before-make.
Specifically, R_WHZWS states that these are the only changes that
require a break-before-make sequence: changes of memory type
(Shareability or Cacheability), address changes, or changing the block
size.

Signed-off-by: Marc Zyngier <maz@kernel.org>
Signed-off-by: Ricardo Koller <ricarkol@google.com>
Reviewed-by: Gavin Shan <gshan@redhat.com>
Link: https://lore.kernel.org/r/20230426172330.1439644-13-ricarkol@google.com
Signed-off-by: Oliver Upton <oliver.upton@linux.dev>
arch/arm64/include/asm/kvm_asm.h
arch/arm64/kvm/hyp/nvhe/hyp-main.c
arch/arm64/kvm/hyp/nvhe/tlb.c
arch/arm64/kvm/hyp/pgtable.c
arch/arm64/kvm/hyp/vhe/tlb.c

index 43c3bc0..bb17b2e 100644 (file)
@@ -68,6 +68,7 @@ enum __kvm_host_smccc_func {
        __KVM_HOST_SMCCC_FUNC___kvm_vcpu_run,
        __KVM_HOST_SMCCC_FUNC___kvm_flush_vm_context,
        __KVM_HOST_SMCCC_FUNC___kvm_tlb_flush_vmid_ipa,
+       __KVM_HOST_SMCCC_FUNC___kvm_tlb_flush_vmid_ipa_nsh,
        __KVM_HOST_SMCCC_FUNC___kvm_tlb_flush_vmid,
        __KVM_HOST_SMCCC_FUNC___kvm_flush_cpu_context,
        __KVM_HOST_SMCCC_FUNC___kvm_timer_set_cntvoff,
@@ -225,6 +226,9 @@ extern void __kvm_flush_vm_context(void);
 extern void __kvm_flush_cpu_context(struct kvm_s2_mmu *mmu);
 extern void __kvm_tlb_flush_vmid_ipa(struct kvm_s2_mmu *mmu, phys_addr_t ipa,
                                     int level);
+extern void __kvm_tlb_flush_vmid_ipa_nsh(struct kvm_s2_mmu *mmu,
+                                        phys_addr_t ipa,
+                                        int level);
 extern void __kvm_tlb_flush_vmid(struct kvm_s2_mmu *mmu);
 
 extern void __kvm_timer_set_cntvoff(u64 cntvoff);
index 728e01d..c6bf1e4 100644 (file)
@@ -125,6 +125,15 @@ static void handle___kvm_tlb_flush_vmid_ipa(struct kvm_cpu_context *host_ctxt)
        __kvm_tlb_flush_vmid_ipa(kern_hyp_va(mmu), ipa, level);
 }
 
+static void handle___kvm_tlb_flush_vmid_ipa_nsh(struct kvm_cpu_context *host_ctxt)
+{
+       DECLARE_REG(struct kvm_s2_mmu *, mmu, host_ctxt, 1);
+       DECLARE_REG(phys_addr_t, ipa, host_ctxt, 2);
+       DECLARE_REG(int, level, host_ctxt, 3);
+
+       __kvm_tlb_flush_vmid_ipa_nsh(kern_hyp_va(mmu), ipa, level);
+}
+
 static void handle___kvm_tlb_flush_vmid(struct kvm_cpu_context *host_ctxt)
 {
        DECLARE_REG(struct kvm_s2_mmu *, mmu, host_ctxt, 1);
@@ -315,6 +324,7 @@ static const hcall_t host_hcall[] = {
        HANDLE_FUNC(__kvm_vcpu_run),
        HANDLE_FUNC(__kvm_flush_vm_context),
        HANDLE_FUNC(__kvm_tlb_flush_vmid_ipa),
+       HANDLE_FUNC(__kvm_tlb_flush_vmid_ipa_nsh),
        HANDLE_FUNC(__kvm_tlb_flush_vmid),
        HANDLE_FUNC(__kvm_flush_cpu_context),
        HANDLE_FUNC(__kvm_timer_set_cntvoff),
index 9781791..b9991bb 100644 (file)
@@ -130,6 +130,58 @@ void __kvm_tlb_flush_vmid_ipa(struct kvm_s2_mmu *mmu,
        __tlb_switch_to_host(&cxt);
 }
 
+void __kvm_tlb_flush_vmid_ipa_nsh(struct kvm_s2_mmu *mmu,
+                                 phys_addr_t ipa, int level)
+{
+       struct tlb_inv_context cxt;
+
+       /* Switch to requested VMID */
+       __tlb_switch_to_guest(mmu, &cxt, true);
+
+       /*
+        * We could do so much better if we had the VA as well.
+        * Instead, we invalidate Stage-2 for this IPA, and the
+        * whole of Stage-1. Weep...
+        */
+       ipa >>= 12;
+       __tlbi_level(ipas2e1, ipa, level);
+
+       /*
+        * We have to ensure completion of the invalidation at Stage-2,
+        * since a table walk on another CPU could refill a TLB with a
+        * complete (S1 + S2) walk based on the old Stage-2 mapping if
+        * the Stage-1 invalidation happened first.
+        */
+       dsb(nsh);
+       __tlbi(vmalle1);
+       dsb(nsh);
+       isb();
+
+       /*
+        * If the host is running at EL1 and we have a VPIPT I-cache,
+        * then we must perform I-cache maintenance at EL2 in order for
+        * it to have an effect on the guest. Since the guest cannot hit
+        * I-cache lines allocated with a different VMID, we don't need
+        * to worry about junk out of guest reset (we nuke the I-cache on
+        * VMID rollover), but we do need to be careful when remapping
+        * executable pages for the same guest. This can happen when KSM
+        * takes a CoW fault on an executable page, copies the page into
+        * a page that was previously mapped in the guest and then needs
+        * to invalidate the guest view of the I-cache for that page
+        * from EL1. To solve this, we invalidate the entire I-cache when
+        * unmapping a page from a guest if we have a VPIPT I-cache but
+        * the host is running at EL1. As above, we could do better if
+        * we had the VA.
+        *
+        * The moral of this story is: if you have a VPIPT I-cache, then
+        * you should be running with VHE enabled.
+        */
+       if (icache_is_vpipt())
+               icache_inval_all_pou();
+
+       __tlb_switch_to_host(&cxt);
+}
+
 void __kvm_tlb_flush_vmid(struct kvm_s2_mmu *mmu)
 {
        struct tlb_inv_context cxt;
index 8b03cd6..2dd7e4a 100644 (file)
@@ -1189,7 +1189,7 @@ int kvm_pgtable_stage2_relax_perms(struct kvm_pgtable *pgt, u64 addr,
                                       KVM_PGTABLE_WALK_HANDLE_FAULT |
                                       KVM_PGTABLE_WALK_SHARED);
        if (!ret)
-               kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, pgt->mmu, addr, level);
+               kvm_call_hyp(__kvm_tlb_flush_vmid_ipa_nsh, pgt->mmu, addr, level);
        return ret;
 }
 
index 24cef9b..e69da55 100644 (file)
@@ -111,6 +111,38 @@ void __kvm_tlb_flush_vmid_ipa(struct kvm_s2_mmu *mmu,
        __tlb_switch_to_host(&cxt);
 }
 
+void __kvm_tlb_flush_vmid_ipa_nsh(struct kvm_s2_mmu *mmu,
+                                 phys_addr_t ipa, int level)
+{
+       struct tlb_inv_context cxt;
+
+       dsb(nshst);
+
+       /* Switch to requested VMID */
+       __tlb_switch_to_guest(mmu, &cxt);
+
+       /*
+        * We could do so much better if we had the VA as well.
+        * Instead, we invalidate Stage-2 for this IPA, and the
+        * whole of Stage-1. Weep...
+        */
+       ipa >>= 12;
+       __tlbi_level(ipas2e1, ipa, level);
+
+       /*
+        * We have to ensure completion of the invalidation at Stage-2,
+        * since a table walk on another CPU could refill a TLB with a
+        * complete (S1 + S2) walk based on the old Stage-2 mapping if
+        * the Stage-1 invalidation happened first.
+        */
+       dsb(nsh);
+       __tlbi(vmalle1);
+       dsb(nsh);
+       isb();
+
+       __tlb_switch_to_host(&cxt);
+}
+
 void __kvm_tlb_flush_vmid(struct kvm_s2_mmu *mmu)
 {
        struct tlb_inv_context cxt;