Merge branch kvm-arm64/tlbi-range into kvmarm-master/next

author Marc Zyngier <maz@kernel.org>

Mon, 28 Aug 2023 08:29:02 +0000 (09:29 +0100)

committer Marc Zyngier <maz@kernel.org>

Mon, 28 Aug 2023 08:29:02 +0000 (09:29 +0100)
author Marc Zyngier <maz@kernel.org>
Mon, 28 Aug 2023 08:29:02 +0000 (09:29 +0100)
committer Marc Zyngier <maz@kernel.org>
Mon, 28 Aug 2023 08:29:02 +0000 (09:29 +0100)
diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h

index 7d170aa..2c27cb8 100644 (file)
--- a/arch/arm64/include/asm/kvm_asm.h
+++ b/arch/arm64/include/asm/kvm_asm.h
@@ -70,6 +70,7 @@ enum __kvm_host_smccc_func {
         __KVM_HOST_SMCCC_FUNC___kvm_tlb_flush_vmid_ipa,
         __KVM_HOST_SMCCC_FUNC___kvm_tlb_flush_vmid_ipa_nsh,
         __KVM_HOST_SMCCC_FUNC___kvm_tlb_flush_vmid,
+       __KVM_HOST_SMCCC_FUNC___kvm_tlb_flush_vmid_range,
         __KVM_HOST_SMCCC_FUNC___kvm_flush_cpu_context,
         __KVM_HOST_SMCCC_FUNC___kvm_timer_set_cntvoff,
         __KVM_HOST_SMCCC_FUNC___vgic_v3_read_vmcr,
@@ -229,6 +230,8 @@ extern void __kvm_tlb_flush_vmid_ipa(struct kvm_s2_mmu *mmu, phys_addr_t ipa,
  extern void __kvm_tlb_flush_vmid_ipa_nsh(struct kvm_s2_mmu *mmu,
                                          phys_addr_t ipa,
                                          int level);
+extern void __kvm_tlb_flush_vmid_range(struct kvm_s2_mmu *mmu,
+                                       phys_addr_t start, unsigned long pages);
  extern void __kvm_tlb_flush_vmid(struct kvm_s2_mmu *mmu);
  
  extern void __kvm_timer_set_cntvoff(u64 cntvoff);
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h

index 616dca4..967ee7e 100644 (file)
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -1120,6 +1120,10 @@ int __init kvm_set_ipa_limit(void);
  #define __KVM_HAVE_ARCH_VM_ALLOC
  struct kvm *kvm_arch_alloc_vm(void);
  
+#define __KVM_HAVE_ARCH_FLUSH_REMOTE_TLBS
+
+#define __KVM_HAVE_ARCH_FLUSH_REMOTE_TLBS_RANGE
+
  static inline bool kvm_vm_is_protected(struct kvm *kvm)
  {
         return false;
diff --git a/arch/arm64/include/asm/kvm_pgtable.h b/arch/arm64/include/asm/kvm_pgtable.h

index 929d355..d3e354b 100644 (file)
--- a/arch/arm64/include/asm/kvm_pgtable.h
+++ b/arch/arm64/include/asm/kvm_pgtable.h
@@ -746,4 +746,14 @@ enum kvm_pgtable_prot kvm_pgtable_stage2_pte_prot(kvm_pte_t pte);
   *        kvm_pgtable_prot format.
   */
  enum kvm_pgtable_prot kvm_pgtable_hyp_pte_prot(kvm_pte_t pte);
+
+/**
+ * kvm_tlb_flush_vmid_range() - Invalidate/flush a range of TLB entries
+ *
+ * @mmu:       Stage-2 KVM MMU struct
+ * @addr:      The base Intermediate physical address from which to invalidate
+ * @size:      Size of the range from the base to invalidate
+ */
+void kvm_tlb_flush_vmid_range(struct kvm_s2_mmu *mmu,
+                               phys_addr_t addr, size_t size);
  #endif /* __ARM64_KVM_PGTABLE_H__ */
diff --git a/arch/arm64/include/asm/tlbflush.h b/arch/arm64/include/asm/tlbflush.h

index 412a3b9..93f4b39 100644 (file)
--- a/arch/arm64/include/asm/tlbflush.h
+++ b/arch/arm64/include/asm/tlbflush.h
@@ -278,14 +278,77 @@ static inline void flush_tlb_page(struct vm_area_struct *vma,
   */
  #define MAX_TLBI_OPS   PTRS_PER_PTE
  
+/*
+ * __flush_tlb_range_op - Perform TLBI operation upon a range
+ *
+ * @op:        TLBI instruction that operates on a range (has 'r' prefix)
+ * @start:     The start address of the range
+ * @pages:     Range as the number of pages from 'start'
+ * @stride:    Flush granularity
+ * @asid:      The ASID of the task (0 for IPA instructions)
+ * @tlb_level: Translation Table level hint, if known
+ * @tlbi_user: If 'true', call an additional __tlbi_user()
+ *              (typically for user ASIDs). 'flase' for IPA instructions
+ *
+ * When the CPU does not support TLB range operations, flush the TLB
+ * entries one by one at the granularity of 'stride'. If the TLB
+ * range ops are supported, then:
+ *
+ * 1. If 'pages' is odd, flush the first page through non-range
+ *    operations;
+ *
+ * 2. For remaining pages: the minimum range granularity is decided
+ *    by 'scale', so multiple range TLBI operations may be required.
+ *    Start from scale = 0, flush the corresponding number of pages
+ *    ((num+1)*2^(5*scale+1) starting from 'addr'), then increase it
+ *    until no pages left.
+ *
+ * Note that certain ranges can be represented by either num = 31 and
+ * scale or num = 0 and scale + 1. The loop below favours the latter
+ * since num is limited to 30 by the __TLBI_RANGE_NUM() macro.
+ */
+#define __flush_tlb_range_op(op, start, pages, stride,                 \
+                               asid, tlb_level, tlbi_user)             \
+do {                                                                   \
+       int num = 0;                                                    \
+       int scale = 0;                                                  \
+       unsigned long addr;                                             \
+                                                                       \
+       while (pages > 0) {                                             \
+               if (!system_supports_tlb_range() ||                     \
+                   pages % 2 == 1) {                                   \
+                       addr = __TLBI_VADDR(start, asid);               \
+                       __tlbi_level(op, addr, tlb_level);              \
+                       if (tlbi_user)                                  \
+                               __tlbi_user_level(op, addr, tlb_level); \
+                       start += stride;                                \
+                       pages -= stride >> PAGE_SHIFT;                  \
+                       continue;                                       \
+               }                                                       \
+                                                                       \
+               num = __TLBI_RANGE_NUM(pages, scale);                   \
+               if (num >= 0) {                                         \
+                       addr = __TLBI_VADDR_RANGE(start, asid, scale,   \
+                                                 num, tlb_level);      \
+                       __tlbi(r##op, addr);                            \
+                       if (tlbi_user)                                  \
+                               __tlbi_user(r##op, addr);               \
+                       start += __TLBI_RANGE_PAGES(num, scale) << PAGE_SHIFT; \
+                       pages -= __TLBI_RANGE_PAGES(num, scale);        \
+               }                                                       \
+               scale++;                                                \
+       }                                                               \
+} while (0)
+
+#define __flush_s2_tlb_range_op(op, start, pages, stride, tlb_level) \
+       __flush_tlb_range_op(op, start, pages, stride, 0, tlb_level, false)
+
  static inline void __flush_tlb_range(struct vm_area_struct *vma,
                                      unsigned long start, unsigned long end,
                                      unsigned long stride, bool last_level,
                                      int tlb_level)
  {
-       int num = 0;
-       int scale = 0;
-       unsigned long asid, addr, pages;
+       unsigned long asid, pages;
  
         start = round_down(start, stride);
         end = round_up(end, stride);
@@ -307,56 +370,11 @@ static inline void __flush_tlb_range(struct vm_area_struct *vma,
         dsb(ishst);
         asid = ASID(vma->vm_mm);
  
-       /*
-        * When the CPU does not support TLB range operations, flush the TLB
-        * entries one by one at the granularity of 'stride'. If the TLB
-        * range ops are supported, then:
-        *
-        * 1. If 'pages' is odd, flush the first page through non-range
-        *    operations;
-        *
-        * 2. For remaining pages: the minimum range granularity is decided
-        *    by 'scale', so multiple range TLBI operations may be required.
-        *    Start from scale = 0, flush the corresponding number of pages
-        *    ((num+1)*2^(5*scale+1) starting from 'addr'), then increase it
-        *    until no pages left.
-        *
-        * Note that certain ranges can be represented by either num = 31 and
-        * scale or num = 0 and scale + 1. The loop below favours the latter
-        * since num is limited to 30 by the __TLBI_RANGE_NUM() macro.
-        */
-       while (pages > 0) {
-               if (!system_supports_tlb_range() ||
-                   pages % 2 == 1) {
-                       addr = __TLBI_VADDR(start, asid);
-                       if (last_level) {
-                               __tlbi_level(vale1is, addr, tlb_level);
-                               __tlbi_user_level(vale1is, addr, tlb_level);
-                       } else {
-                               __tlbi_level(vae1is, addr, tlb_level);
-                               __tlbi_user_level(vae1is, addr, tlb_level);
-                       }
-                       start += stride;
-                       pages -= stride >> PAGE_SHIFT;
-                       continue;
-               }
-
-               num = __TLBI_RANGE_NUM(pages, scale);
-               if (num >= 0) {
-                       addr = __TLBI_VADDR_RANGE(start, asid, scale,
-                                                 num, tlb_level);
-                       if (last_level) {
-                               __tlbi(rvale1is, addr);
-                               __tlbi_user(rvale1is, addr);
-                       } else {
-                               __tlbi(rvae1is, addr);
-                               __tlbi_user(rvae1is, addr);
-                       }
-                       start += __TLBI_RANGE_PAGES(num, scale) << PAGE_SHIFT;
-                       pages -= __TLBI_RANGE_PAGES(num, scale);
-               }
-               scale++;
-       }
+       if (last_level)
+               __flush_tlb_range_op(vale1is, start, pages, stride, asid, tlb_level, true);
+       else
+               __flush_tlb_range_op(vae1is, start, pages, stride, asid, tlb_level, true);
+
         dsb(ish);
  }
  
diff --git a/arch/arm64/kvm/Kconfig b/arch/arm64/kvm/Kconfig

index f4f0b19..83c1e09 100644 (file)
--- a/arch/arm64/kvm/Kconfig
+++ b/arch/arm64/kvm/Kconfig
@@ -25,7 +25,6 @@ menuconfig KVM
         select MMU_NOTIFIER
         select PREEMPT_NOTIFIERS
         select HAVE_KVM_CPU_RELAX_INTERCEPT
-       select HAVE_KVM_ARCH_TLB_FLUSH_ALL
         select KVM_MMIO
         select KVM_GENERIC_DIRTYLOG_READ_PROTECT
         select KVM_XFER_TO_GUEST_WORK
diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c

index 183786f..1cad736 100644 (file)
--- a/arch/arm64/kvm/arm.c
+++ b/arch/arm64/kvm/arm.c
@@ -1534,12 +1534,6 @@ void kvm_arch_sync_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot)
  
  }
  
-void kvm_arch_flush_remote_tlbs_memslot(struct kvm *kvm,
-                                       const struct kvm_memory_slot *memslot)
-{
-       kvm_flush_remote_tlbs(kvm);
-}
-
  static int kvm_vm_ioctl_set_device_addr(struct kvm *kvm,
                                         struct kvm_arm_device_addr *dev_addr)
  {
diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-main.c b/arch/arm64/kvm/hyp/nvhe/hyp-main.c

index a169c61..857d9bc 100644 (file)
--- a/arch/arm64/kvm/hyp/nvhe/hyp-main.c
+++ b/arch/arm64/kvm/hyp/nvhe/hyp-main.c
@@ -135,6 +135,16 @@ static void handle___kvm_tlb_flush_vmid_ipa_nsh(struct kvm_cpu_context *host_ctx
         __kvm_tlb_flush_vmid_ipa_nsh(kern_hyp_va(mmu), ipa, level);
  }
  
+static void
+handle___kvm_tlb_flush_vmid_range(struct kvm_cpu_context *host_ctxt)
+{
+       DECLARE_REG(struct kvm_s2_mmu *, mmu, host_ctxt, 1);
+       DECLARE_REG(phys_addr_t, start, host_ctxt, 2);
+       DECLARE_REG(unsigned long, pages, host_ctxt, 3);
+
+       __kvm_tlb_flush_vmid_range(kern_hyp_va(mmu), start, pages);
+}
+
  static void handle___kvm_tlb_flush_vmid(struct kvm_cpu_context *host_ctxt)
  {
         DECLARE_REG(struct kvm_s2_mmu *, mmu, host_ctxt, 1);
@@ -327,6 +337,7 @@ static const hcall_t host_hcall[] = {
         HANDLE_FUNC(__kvm_tlb_flush_vmid_ipa),
         HANDLE_FUNC(__kvm_tlb_flush_vmid_ipa_nsh),
         HANDLE_FUNC(__kvm_tlb_flush_vmid),
+       HANDLE_FUNC(__kvm_tlb_flush_vmid_range),
         HANDLE_FUNC(__kvm_flush_cpu_context),
         HANDLE_FUNC(__kvm_timer_set_cntvoff),
         HANDLE_FUNC(__vgic_v3_read_vmcr),
diff --git a/arch/arm64/kvm/hyp/nvhe/tlb.c b/arch/arm64/kvm/hyp/nvhe/tlb.c

index b9991bb..1b26571 100644 (file)
--- a/arch/arm64/kvm/hyp/nvhe/tlb.c
+++ b/arch/arm64/kvm/hyp/nvhe/tlb.c
@@ -182,6 +182,36 @@ void __kvm_tlb_flush_vmid_ipa_nsh(struct kvm_s2_mmu *mmu,
         __tlb_switch_to_host(&cxt);
  }
  
+void __kvm_tlb_flush_vmid_range(struct kvm_s2_mmu *mmu,
+                               phys_addr_t start, unsigned long pages)
+{
+       struct tlb_inv_context cxt;
+       unsigned long stride;
+
+       /*
+        * Since the range of addresses may not be mapped at
+        * the same level, assume the worst case as PAGE_SIZE
+        */
+       stride = PAGE_SIZE;
+       start = round_down(start, stride);
+
+       /* Switch to requested VMID */
+       __tlb_switch_to_guest(mmu, &cxt, false);
+
+       __flush_s2_tlb_range_op(ipas2e1is, start, pages, stride, 0);
+
+       dsb(ish);
+       __tlbi(vmalle1is);
+       dsb(ish);
+       isb();
+
+       /* See the comment in __kvm_tlb_flush_vmid_ipa() */
+       if (icache_is_vpipt())
+               icache_inval_all_pou();
+
+       __tlb_switch_to_host(&cxt);
+}
+
  void __kvm_tlb_flush_vmid(struct kvm_s2_mmu *mmu)
  {
         struct tlb_inv_context cxt;
diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c

index f7a93ef..f155b8c 100644 (file)
--- a/arch/arm64/kvm/hyp/pgtable.c
+++ b/arch/arm64/kvm/hyp/pgtable.c
@@ -670,6 +670,26 @@ static bool stage2_has_fwb(struct kvm_pgtable *pgt)
         return !(pgt->flags & KVM_PGTABLE_S2_NOFWB);
  }
  
+void kvm_tlb_flush_vmid_range(struct kvm_s2_mmu *mmu,
+                               phys_addr_t addr, size_t size)
+{
+       unsigned long pages, inval_pages;
+
+       if (!system_supports_tlb_range()) {
+               kvm_call_hyp(__kvm_tlb_flush_vmid, mmu);
+               return;
+       }
+
+       pages = size >> PAGE_SHIFT;
+       while (pages > 0) {
+               inval_pages = min(pages, MAX_TLBI_RANGE_PAGES);
+               kvm_call_hyp(__kvm_tlb_flush_vmid_range, mmu, addr, inval_pages);
+
+               addr += inval_pages << PAGE_SHIFT;
+               pages -= inval_pages;
+       }
+}
+
  #define KVM_S2_MEMATTR(pgt, attr) PAGE_S2_MEMATTR(attr, stage2_has_fwb(pgt))
  
  static int stage2_set_prot_attr(struct kvm_pgtable *pgt, enum kvm_pgtable_prot prot,
@@ -786,7 +806,8 @@ static bool stage2_try_break_pte(const struct kvm_pgtable_visit_ctx *ctx,
                  * evicted pte value (if any).
                  */
                 if (kvm_pte_table(ctx->old, ctx->level))
-                       kvm_call_hyp(__kvm_tlb_flush_vmid, mmu);
+                       kvm_tlb_flush_vmid_range(mmu, ctx->addr,
+                                               kvm_granule_size(ctx->level));
                 else if (kvm_pte_valid(ctx->old))
                         kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, mmu,
                                      ctx->addr, ctx->level);
@@ -810,16 +831,36 @@ static void stage2_make_pte(const struct kvm_pgtable_visit_ctx *ctx, kvm_pte_t n
         smp_store_release(ctx->ptep, new);
  }
  
-static void stage2_put_pte(const struct kvm_pgtable_visit_ctx *ctx, struct kvm_s2_mmu *mmu,
-                          struct kvm_pgtable_mm_ops *mm_ops)
+static bool stage2_unmap_defer_tlb_flush(struct kvm_pgtable *pgt)
+{
+       /*
+        * If FEAT_TLBIRANGE is implemented, defer the individual
+        * TLB invalidations until the entire walk is finished, and
+        * then use the range-based TLBI instructions to do the
+        * invalidations. Condition deferred TLB invalidation on the
+        * system supporting FWB as the optimization is entirely
+        * pointless when the unmap walker needs to perform CMOs.
+        */
+       return system_supports_tlb_range() && stage2_has_fwb(pgt);
+}
+
+static void stage2_unmap_put_pte(const struct kvm_pgtable_visit_ctx *ctx,
+                               struct kvm_s2_mmu *mmu,
+                               struct kvm_pgtable_mm_ops *mm_ops)
  {
+       struct kvm_pgtable *pgt = ctx->arg;
+
         /*
-        * Clear the existing PTE, and perform break-before-make with
-        * TLB maintenance if it was valid.
+        * Clear the existing PTE, and perform break-before-make if it was
+        * valid. Depending on the system support, defer the TLB maintenance
+        * for the same until the entire unmap walk is completed.
          */
         if (kvm_pte_valid(ctx->old)) {
                 kvm_clear_pte(ctx->ptep);
-               kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, mmu, ctx->addr, ctx->level);
+
+               if (!stage2_unmap_defer_tlb_flush(pgt))
+                       kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, mmu,
+                                       ctx->addr, ctx->level);
         }
  
         mm_ops->put_page(ctx->ptep);
@@ -1077,7 +1118,7 @@ static int stage2_unmap_walker(const struct kvm_pgtable_visit_ctx *ctx,
          * block entry and rely on the remaining portions being faulted
          * back lazily.
          */
-       stage2_put_pte(ctx, mmu, mm_ops);
+       stage2_unmap_put_pte(ctx, mmu, mm_ops);
  
         if (need_flush && mm_ops->dcache_clean_inval_poc)
                 mm_ops->dcache_clean_inval_poc(kvm_pte_follow(ctx->old, mm_ops),
@@ -1091,13 +1132,19 @@ static int stage2_unmap_walker(const struct kvm_pgtable_visit_ctx *ctx,
  
  int kvm_pgtable_stage2_unmap(struct kvm_pgtable *pgt, u64 addr, u64 size)
  {
+       int ret;
         struct kvm_pgtable_walker walker = {
                 .cb     = stage2_unmap_walker,
                 .arg    = pgt,
                 .flags  = KVM_PGTABLE_WALK_LEAF | KVM_PGTABLE_WALK_TABLE_POST,
         };
  
-       return kvm_pgtable_walk(pgt, addr, size, &walker);
+       ret = kvm_pgtable_walk(pgt, addr, size, &walker);
+       if (stage2_unmap_defer_tlb_flush(pgt))
+               /* Perform the deferred TLB invalidations */
+               kvm_tlb_flush_vmid_range(pgt->mmu, addr, size);
+
+       return ret;
  }
  
  struct stage2_attr_data {
diff --git a/arch/arm64/kvm/hyp/vhe/tlb.c b/arch/arm64/kvm/hyp/vhe/tlb.c

index e69da55..46bd43f 100644 (file)
--- a/arch/arm64/kvm/hyp/vhe/tlb.c
+++ b/arch/arm64/kvm/hyp/vhe/tlb.c
@@ -143,6 +143,34 @@ void __kvm_tlb_flush_vmid_ipa_nsh(struct kvm_s2_mmu *mmu,
         __tlb_switch_to_host(&cxt);
  }
  
+void __kvm_tlb_flush_vmid_range(struct kvm_s2_mmu *mmu,
+                               phys_addr_t start, unsigned long pages)
+{
+       struct tlb_inv_context cxt;
+       unsigned long stride;
+
+       /*
+        * Since the range of addresses may not be mapped at
+        * the same level, assume the worst case as PAGE_SIZE
+        */
+       stride = PAGE_SIZE;
+       start = round_down(start, stride);
+
+       dsb(ishst);
+
+       /* Switch to requested VMID */
+       __tlb_switch_to_guest(mmu, &cxt);
+
+       __flush_s2_tlb_range_op(ipas2e1is, start, pages, stride, 0);
+
+       dsb(ish);
+       __tlbi(vmalle1is);
+       dsb(ish);
+       isb();
+
+       __tlb_switch_to_host(&cxt);
+}
+
  void __kvm_tlb_flush_vmid(struct kvm_s2_mmu *mmu)
  {
         struct tlb_inv_context cxt;
diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c

index d3b4fee..b16aff3 100644 (file)
--- a/arch/arm64/kvm/mmu.c
+++ b/arch/arm64/kvm/mmu.c
@@ -161,15 +161,23 @@ static bool memslot_is_logging(struct kvm_memory_slot *memslot)
  }
  
  /**
- * kvm_flush_remote_tlbs() - flush all VM TLB entries for v7/8
+ * kvm_arch_flush_remote_tlbs() - flush all VM TLB entries for v7/8
   * @kvm:       pointer to kvm structure.
   *
   * Interface to HYP function to flush all VM TLB entries
   */
-void kvm_flush_remote_tlbs(struct kvm *kvm)
+int kvm_arch_flush_remote_tlbs(struct kvm *kvm)
  {
-       ++kvm->stat.generic.remote_tlb_flush_requests;
         kvm_call_hyp(__kvm_tlb_flush_vmid, &kvm->arch.mmu);
+       return 0;
+}
+
+int kvm_arch_flush_remote_tlbs_range(struct kvm *kvm,
+                                     gfn_t gfn, u64 nr_pages)
+{
+       kvm_tlb_flush_vmid_range(&kvm->arch.mmu,
+                               gfn << PAGE_SHIFT, nr_pages << PAGE_SHIFT);
+       return 0;
  }
  
  static bool kvm_is_device_pfn(unsigned long pfn)
@@ -1075,7 +1083,7 @@ static void kvm_mmu_wp_memory_region(struct kvm *kvm, int slot)
         write_lock(&kvm->mmu_lock);
         stage2_wp_range(&kvm->arch.mmu, start, end);
         write_unlock(&kvm->mmu_lock);
-       kvm_flush_remote_tlbs(kvm);
+       kvm_flush_remote_tlbs_memslot(kvm, memslot);
  }
  
  /**
diff --git a/arch/mips/include/asm/kvm_host.h b/arch/mips/include/asm/kvm_host.h

index 04cedf9..54a85f1 100644 (file)
--- a/arch/mips/include/asm/kvm_host.h
+++ b/arch/mips/include/asm/kvm_host.h
@@ -896,7 +896,6 @@ static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {}
  static inline void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu) {}
  static inline void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu) {}
  
-#define __KVM_HAVE_ARCH_FLUSH_REMOTE_TLB
-int kvm_arch_flush_remote_tlb(struct kvm *kvm);
+#define __KVM_HAVE_ARCH_FLUSH_REMOTE_TLBS
  
  #endif /* __MIPS_KVM_HOST_H__ */
diff --git a/arch/mips/kvm/mips.c b/arch/mips/kvm/mips.c

index aa5583a..231ac05 100644 (file)
--- a/arch/mips/kvm/mips.c
+++ b/arch/mips/kvm/mips.c
@@ -199,7 +199,7 @@ void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
         /* Flush slot from GPA */
         kvm_mips_flush_gpa_pt(kvm, slot->base_gfn,
                               slot->base_gfn + slot->npages - 1);
-       kvm_arch_flush_remote_tlbs_memslot(kvm, slot);
+       kvm_flush_remote_tlbs_memslot(kvm, slot);
         spin_unlock(&kvm->mmu_lock);
  }
  
@@ -235,7 +235,7 @@ void kvm_arch_commit_memory_region(struct kvm *kvm,
                 needs_flush = kvm_mips_mkclean_gpa_pt(kvm, new->base_gfn,
                                         new->base_gfn + new->npages - 1);
                 if (needs_flush)
-                       kvm_arch_flush_remote_tlbs_memslot(kvm, new);
+                       kvm_flush_remote_tlbs_memslot(kvm, new);
                 spin_unlock(&kvm->mmu_lock);
         }
  }
@@ -981,18 +981,12 @@ void kvm_arch_sync_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot)
  
  }
  
-int kvm_arch_flush_remote_tlb(struct kvm *kvm)
+int kvm_arch_flush_remote_tlbs(struct kvm *kvm)
  {
         kvm_mips_callbacks->prepare_flush_shadow(kvm);
         return 1;
  }
  
-void kvm_arch_flush_remote_tlbs_memslot(struct kvm *kvm,
-                                       const struct kvm_memory_slot *memslot)
-{
-       kvm_flush_remote_tlbs(kvm);
-}
-
  int kvm_arch_vm_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg)
  {
         int r;
diff --git a/arch/riscv/kvm/mmu.c b/arch/riscv/kvm/mmu.c

index f2eb479..97e1296 100644 (file)
--- a/arch/riscv/kvm/mmu.c
+++ b/arch/riscv/kvm/mmu.c
@@ -406,12 +406,6 @@ void kvm_arch_sync_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot)
  {
  }
  
-void kvm_arch_flush_remote_tlbs_memslot(struct kvm *kvm,
-                                       const struct kvm_memory_slot *memslot)
-{
-       kvm_flush_remote_tlbs(kvm);
-}
-
  void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free)
  {
  }
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h

index 28bd383..b547d17 100644 (file)
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1794,8 +1794,8 @@ static inline struct kvm *kvm_arch_alloc_vm(void)
  #define __KVM_HAVE_ARCH_VM_FREE
  void kvm_arch_free_vm(struct kvm *kvm);
  
-#define __KVM_HAVE_ARCH_FLUSH_REMOTE_TLB
-static inline int kvm_arch_flush_remote_tlb(struct kvm *kvm)
+#define __KVM_HAVE_ARCH_FLUSH_REMOTE_TLBS
+static inline int kvm_arch_flush_remote_tlbs(struct kvm *kvm)
  {
         if (kvm_x86_ops.flush_remote_tlbs &&
             !static_call(kvm_x86_flush_remote_tlbs)(kvm))
@@ -1804,6 +1804,8 @@ static inline int kvm_arch_flush_remote_tlb(struct kvm *kvm)
                 return -ENOTSUPP;
  }
  
+#define __KVM_HAVE_ARCH_FLUSH_REMOTE_TLBS_RANGE
+
  #define kvm_arch_pmi_in_guest(vcpu) \
         ((vcpu) && (vcpu)->arch.handling_intr_from_guest)
  
diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c

index ec169f5..dbf3c6c 100644 (file)
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -278,16 +278,12 @@ static inline bool kvm_available_flush_remote_tlbs_range(void)
         return kvm_x86_ops.flush_remote_tlbs_range;
  }
  
-void kvm_flush_remote_tlbs_range(struct kvm *kvm, gfn_t start_gfn,
-                                gfn_t nr_pages)
+int kvm_arch_flush_remote_tlbs_range(struct kvm *kvm, gfn_t gfn, u64 nr_pages)
  {
-       int ret = -EOPNOTSUPP;
+       if (!kvm_x86_ops.flush_remote_tlbs_range)
+               return -EOPNOTSUPP;
  
-       if (kvm_x86_ops.flush_remote_tlbs_range)
-               ret = static_call(kvm_x86_flush_remote_tlbs_range)(kvm, start_gfn,
-                                                                  nr_pages);
-       if (ret)
-               kvm_flush_remote_tlbs(kvm);
+       return static_call(kvm_x86_flush_remote_tlbs_range)(kvm, gfn, nr_pages);
  }
  
  static gfn_t kvm_mmu_page_get_gfn(struct kvm_mmu_page *sp, int index);
@@ -6670,7 +6666,7 @@ static void kvm_rmap_zap_collapsible_sptes(struct kvm *kvm,
          */
         if (walk_slot_rmaps(kvm, slot, kvm_mmu_zap_collapsible_spte,
                             PG_LEVEL_4K, KVM_MAX_HUGEPAGE_LEVEL - 1, true))
-               kvm_arch_flush_remote_tlbs_memslot(kvm, slot);
+               kvm_flush_remote_tlbs_memslot(kvm, slot);
  }
  
  void kvm_mmu_zap_collapsible_sptes(struct kvm *kvm,
@@ -6689,20 +6685,6 @@ void kvm_mmu_zap_collapsible_sptes(struct kvm *kvm,
         }
  }
  
-void kvm_arch_flush_remote_tlbs_memslot(struct kvm *kvm,
-                                       const struct kvm_memory_slot *memslot)
-{
-       /*
-        * All current use cases for flushing the TLBs for a specific memslot
-        * related to dirty logging, and many do the TLB flush out of mmu_lock.
-        * The interaction between the various operations on memslot must be
-        * serialized by slots_locks to ensure the TLB flush from one operation
-        * is observed by any other operation on the same memslot.
-        */
-       lockdep_assert_held(&kvm->slots_lock);
-       kvm_flush_remote_tlbs_range(kvm, memslot->base_gfn, memslot->npages);
-}
-
  void kvm_mmu_slot_leaf_clear_dirty(struct kvm *kvm,
                                    const struct kvm_memory_slot *memslot)
  {
diff --git a/arch/x86/kvm/mmu/mmu_internal.h b/arch/x86/kvm/mmu/mmu_internal.h

index d39af56..86cb83b 100644 (file)
--- a/arch/x86/kvm/mmu/mmu_internal.h
+++ b/arch/x86/kvm/mmu/mmu_internal.h
@@ -170,9 +170,6 @@ bool kvm_mmu_slot_gfn_write_protect(struct kvm *kvm,
                                     struct kvm_memory_slot *slot, u64 gfn,
                                     int min_level);
  
-void kvm_flush_remote_tlbs_range(struct kvm *kvm, gfn_t start_gfn,
-                                gfn_t nr_pages);
-
  /* Flush the given page (huge or not) of guest memory. */
  static inline void kvm_flush_remote_tlbs_gfn(struct kvm *kvm, gfn_t gfn, int level)
  {
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c

index a6b9bea..faeb2e3 100644 (file)
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -12751,7 +12751,7 @@ static void kvm_mmu_slot_apply_flags(struct kvm *kvm,
                  * See is_writable_pte() for more details (the case involving
                  * access-tracked SPTEs is particularly relevant).
                  */
-               kvm_arch_flush_remote_tlbs_memslot(kvm, new);
+               kvm_flush_remote_tlbs_memslot(kvm, new);
         }
  }
  
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h

index 9d3ac77..394db2c 100644 (file)
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -1359,6 +1359,9 @@ int kvm_vcpu_yield_to(struct kvm_vcpu *target);
  void kvm_vcpu_on_spin(struct kvm_vcpu *vcpu, bool yield_to_kernel_mode);
  
  void kvm_flush_remote_tlbs(struct kvm *kvm);
+void kvm_flush_remote_tlbs_range(struct kvm *kvm, gfn_t gfn, u64 nr_pages);
+void kvm_flush_remote_tlbs_memslot(struct kvm *kvm,
+                                  const struct kvm_memory_slot *memslot);
  
  #ifdef KVM_ARCH_NR_OBJS_PER_MEMORY_CACHE
  int kvm_mmu_topup_memory_cache(struct kvm_mmu_memory_cache *mc, int min);
@@ -1387,10 +1390,7 @@ void kvm_arch_mmu_enable_log_dirty_pt_masked(struct kvm *kvm,
                                         unsigned long mask);
  void kvm_arch_sync_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot);
  
-#ifdef CONFIG_KVM_GENERIC_DIRTYLOG_READ_PROTECT
-void kvm_arch_flush_remote_tlbs_memslot(struct kvm *kvm,
-                                       const struct kvm_memory_slot *memslot);
-#else /* !CONFIG_KVM_GENERIC_DIRTYLOG_READ_PROTECT */
+#ifndef CONFIG_KVM_GENERIC_DIRTYLOG_READ_PROTECT
  int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log);
  int kvm_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log,
                       int *is_dirty, struct kvm_memory_slot **memslot);
@@ -1479,11 +1479,23 @@ static inline void kvm_arch_free_vm(struct kvm *kvm)
  }
  #endif
  
-#ifndef __KVM_HAVE_ARCH_FLUSH_REMOTE_TLB
-static inline int kvm_arch_flush_remote_tlb(struct kvm *kvm)
+#ifndef __KVM_HAVE_ARCH_FLUSH_REMOTE_TLBS
+static inline int kvm_arch_flush_remote_tlbs(struct kvm *kvm)
  {
         return -ENOTSUPP;
  }
+#else
+int kvm_arch_flush_remote_tlbs(struct kvm *kvm);
+#endif
+
+#ifndef __KVM_HAVE_ARCH_FLUSH_REMOTE_TLBS_RANGE
+static inline int kvm_arch_flush_remote_tlbs_range(struct kvm *kvm,
+                                                   gfn_t gfn, u64 nr_pages)
+{
+       return -EOPNOTSUPP;
+}
+#else
+int kvm_arch_flush_remote_tlbs_range(struct kvm *kvm, gfn_t gfn, u64 nr_pages);
  #endif
  
  #ifdef __KVM_HAVE_ARCH_NONCOHERENT_DMA
diff --git a/virt/kvm/Kconfig b/virt/kvm/Kconfig

index b74916d..484d087 100644 (file)
--- a/virt/kvm/Kconfig
+++ b/virt/kvm/Kconfig
@@ -62,9 +62,6 @@ config HAVE_KVM_CPU_RELAX_INTERCEPT
  config KVM_VFIO
         bool
  
-config HAVE_KVM_ARCH_TLB_FLUSH_ALL
-       bool
-
  config HAVE_KVM_INVALID_WAKEUPS
         bool
  
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c

index dfbaafb..5d4d2e0 100644 (file)
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -345,7 +345,6 @@ bool kvm_make_all_cpus_request(struct kvm *kvm, unsigned int req)
  }
  EXPORT_SYMBOL_GPL(kvm_make_all_cpus_request);
  
-#ifndef CONFIG_HAVE_KVM_ARCH_TLB_FLUSH_ALL
  void kvm_flush_remote_tlbs(struct kvm *kvm)
  {
         ++kvm->stat.generic.remote_tlb_flush_requests;
@@ -361,12 +360,38 @@ void kvm_flush_remote_tlbs(struct kvm *kvm)
          * kvm_make_all_cpus_request() reads vcpu->mode. We reuse that
          * barrier here.
          */
-       if (!kvm_arch_flush_remote_tlb(kvm)
+       if (!kvm_arch_flush_remote_tlbs(kvm)
             || kvm_make_all_cpus_request(kvm, KVM_REQ_TLB_FLUSH))
                 ++kvm->stat.generic.remote_tlb_flush;
  }
  EXPORT_SYMBOL_GPL(kvm_flush_remote_tlbs);
-#endif
+
+void kvm_flush_remote_tlbs_range(struct kvm *kvm, gfn_t gfn, u64 nr_pages)
+{
+       if (!kvm_arch_flush_remote_tlbs_range(kvm, gfn, nr_pages))
+               return;
+
+       /*
+        * Fall back to a flushing entire TLBs if the architecture range-based
+        * TLB invalidation is unsupported or can't be performed for whatever
+        * reason.
+        */
+       kvm_flush_remote_tlbs(kvm);
+}
+
+void kvm_flush_remote_tlbs_memslot(struct kvm *kvm,
+                                  const struct kvm_memory_slot *memslot)
+{
+       /*
+        * All current use cases for flushing the TLBs for a specific memslot
+        * are related to dirty logging, and many do the TLB flush out of
+        * mmu_lock. The interaction between the various operations on memslot
+        * must be serialized by slots_locks to ensure the TLB flush from one
+        * operation is observed by any other operation on the same memslot.
+        */
+       lockdep_assert_held(&kvm->slots_lock);
+       kvm_flush_remote_tlbs_range(kvm, memslot->base_gfn, memslot->npages);
+}
  
  static void kvm_flush_shadow_all(struct kvm *kvm)
  {
@@ -2180,7 +2205,7 @@ static int kvm_get_dirty_log_protect(struct kvm *kvm, struct kvm_dirty_log *log)
         }
  
         if (flush)
-               kvm_arch_flush_remote_tlbs_memslot(kvm, memslot);
+               kvm_flush_remote_tlbs_memslot(kvm, memslot);
  
         if (copy_to_user(log->dirty_bitmap, dirty_bitmap_buffer, n))
                 return -EFAULT;
@@ -2297,7 +2322,7 @@ static int kvm_clear_dirty_log_protect(struct kvm *kvm,
         KVM_MMU_UNLOCK(kvm);
  
         if (flush)
-               kvm_arch_flush_remote_tlbs_memslot(kvm, memslot);
+               kvm_flush_remote_tlbs_memslot(kvm, memslot);
  
         return 0;
  }
author	Marc Zyngier <maz@kernel.org>
	Mon, 28 Aug 2023 08:29:02 +0000 (09:29 +0100)
committer	Marc Zyngier <maz@kernel.org>
	Mon, 28 Aug 2023 08:29:02 +0000 (09:29 +0100)
arch/arm64/include/asm/kvm_asm.h		patch \| blob \| history
arch/arm64/include/asm/kvm_host.h		patch \| blob \| history
arch/arm64/include/asm/kvm_pgtable.h		patch \| blob \| history
arch/arm64/include/asm/tlbflush.h		patch \| blob \| history
arch/arm64/kvm/Kconfig		patch \| blob \| history
arch/arm64/kvm/arm.c		patch \| blob \| history
arch/arm64/kvm/hyp/nvhe/hyp-main.c		patch \| blob \| history
arch/arm64/kvm/hyp/nvhe/tlb.c		patch \| blob \| history
arch/arm64/kvm/hyp/pgtable.c		patch \| blob \| history
arch/arm64/kvm/hyp/vhe/tlb.c		patch \| blob \| history
arch/arm64/kvm/mmu.c		patch \| blob \| history
arch/mips/include/asm/kvm_host.h		patch \| blob \| history
arch/mips/kvm/mips.c		patch \| blob \| history
arch/riscv/kvm/mmu.c		patch \| blob \| history
arch/x86/include/asm/kvm_host.h		patch \| blob \| history
arch/x86/kvm/mmu/mmu.c		patch \| blob \| history
arch/x86/kvm/mmu/mmu_internal.h		patch \| blob \| history
arch/x86/kvm/x86.c		patch \| blob \| history
include/linux/kvm_host.h		patch \| blob \| history
virt/kvm/Kconfig		patch \| blob \| history
virt/kvm/kvm_main.c		patch \| blob \| history