KVM: x86/mmu: Reintroduce fast invalidate/zap for flushing memslot

author Sean Christopherson <sean.j.christopherson@intel.com>

Fri, 13 Sep 2019 02:46:02 +0000 (19:46 -0700)

committer Paolo Bonzini <pbonzini@redhat.com>

Sat, 14 Sep 2019 07:25:11 +0000 (09:25 +0200)
author Sean Christopherson <sean.j.christopherson@intel.com>
Fri, 13 Sep 2019 02:46:02 +0000 (19:46 -0700)
committer Paolo Bonzini <pbonzini@redhat.com>
Sat, 14 Sep 2019 07:25:11 +0000 (09:25 +0200)
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h

index 74e88e5..bdc16b0 100644 (file)
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -335,6 +335,7 @@ struct kvm_mmu_page {
         int root_count;          /* Currently serving as active root */
         unsigned int unsync_children;
         struct kvm_rmap_head parent_ptes; /* rmap pointers to parent sptes */
+       unsigned long mmu_valid_gen;
         DECLARE_BITMAP(unsync_child_bitmap, 512);
  
  #ifdef CONFIG_X86_32
@@ -856,6 +857,7 @@ struct kvm_arch {
         unsigned long n_requested_mmu_pages;
         unsigned long n_max_mmu_pages;
         unsigned int indirect_shadow_pages;
+       unsigned long mmu_valid_gen;
         struct hlist_head mmu_page_hash[KVM_NUM_MMU_PAGES];
         /*
          * Hash table of struct kvm_mmu_page.
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c

index 218b277..a63964e 100644 (file)
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -2095,6 +2095,12 @@ static struct kvm_mmu_page *kvm_mmu_alloc_page(struct kvm_vcpu *vcpu, int direct
         if (!direct)
                 sp->gfns = mmu_memory_cache_alloc(&vcpu->arch.mmu_page_cache);
         set_page_private(virt_to_page(sp->spt), (unsigned long)sp);
+
+       /*
+        * active_mmu_pages must be a FIFO list, as kvm_zap_obsolete_pages()
+        * depends on valid pages being added to the head of the list.  See
+        * comments in kvm_zap_obsolete_pages().
+        */
         list_add(&sp->link, &vcpu->kvm->arch.active_mmu_pages);
         kvm_mod_used_mmu_pages(vcpu->kvm, +1);
         return sp;
@@ -2244,7 +2250,7 @@ static void kvm_mmu_commit_zap_page(struct kvm *kvm,
  #define for_each_valid_sp(_kvm, _sp, _gfn)                             \
         hlist_for_each_entry(_sp,                                       \
           &(_kvm)->arch.mmu_page_hash[kvm_page_table_hashfn(_gfn)], hash_link) \
-               if ((_sp)->role.invalid) {    \
+               if (is_obsolete_sp((_kvm), (_sp)) || (_sp)->role.invalid) {    \
                 } else
  
  #define for_each_gfn_indirect_valid_sp(_kvm, _sp, _gfn)                        \
@@ -2301,6 +2307,11 @@ static void kvm_mmu_audit(struct kvm_vcpu *vcpu, int point) { }
  static void mmu_audit_disable(void) { }
  #endif
  
+static bool is_obsolete_sp(struct kvm *kvm, struct kvm_mmu_page *sp)
+{
+       return unlikely(sp->mmu_valid_gen != kvm->arch.mmu_valid_gen);
+}
+
  static bool kvm_sync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
                          struct list_head *invalid_list)
  {
@@ -2525,6 +2536,7 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu,
                 if (level > PT_PAGE_TABLE_LEVEL && need_sync)
                         flush |= kvm_sync_pages(vcpu, gfn, &invalid_list);
         }
+       sp->mmu_valid_gen = vcpu->kvm->arch.mmu_valid_gen;
         clear_page(sp->spt);
         trace_kvm_mmu_get_page(sp, true);
  
@@ -4233,6 +4245,13 @@ static bool fast_cr3_switch(struct kvm_vcpu *vcpu, gpa_t new_cr3,
                         return false;
  
                 if (cached_root_available(vcpu, new_cr3, new_role)) {
+                       /*
+                        * It is possible that the cached previous root page is
+                        * obsolete because of a change in the MMU generation
+                        * number. However, changing the generation number is
+                        * accompanied by KVM_REQ_MMU_RELOAD, which will free
+                        * the root set here and allocate a new one.
+                        */
                         kvm_make_request(KVM_REQ_LOAD_CR3, vcpu);
                         if (!skip_tlb_flush) {
                                 kvm_make_request(KVM_REQ_MMU_SYNC, vcpu);
@@ -5649,11 +5668,89 @@ int kvm_mmu_create(struct kvm_vcpu *vcpu)
         return alloc_mmu_pages(vcpu);
  }
  
+
+static void kvm_zap_obsolete_pages(struct kvm *kvm)
+{
+       struct kvm_mmu_page *sp, *node;
+       LIST_HEAD(invalid_list);
+       int ign;
+
+restart:
+       list_for_each_entry_safe_reverse(sp, node,
+             &kvm->arch.active_mmu_pages, link) {
+               /*
+                * No obsolete valid page exists before a newly created page
+                * since active_mmu_pages is a FIFO list.
+                */
+               if (!is_obsolete_sp(kvm, sp))
+                       break;
+
+               /*
+                * Do not repeatedly zap a root page to avoid unnecessary
+                * KVM_REQ_MMU_RELOAD, otherwise we may not be able to
+                * progress:
+                *    vcpu 0                        vcpu 1
+                *                         call vcpu_enter_guest():
+                *                            1): handle KVM_REQ_MMU_RELOAD
+                *                                and require mmu-lock to
+                *                                load mmu
+                * repeat:
+                *    1): zap root page and
+                *        send KVM_REQ_MMU_RELOAD
+                *
+                *    2): if (cond_resched_lock(mmu-lock))
+                *
+                *                            2): hold mmu-lock and load mmu
+                *
+                *                            3): see KVM_REQ_MMU_RELOAD bit
+                *                                on vcpu->requests is set
+                *                                then return 1 to call
+                *                                vcpu_enter_guest() again.
+                *            goto repeat;
+                *
+                * Since we are reversely walking the list and the invalid
+                * list will be moved to the head, skip the invalid page
+                * can help us to avoid the infinity list walking.
+                */
+               if (sp->role.invalid)
+                       continue;
+
+               if (need_resched() || spin_needbreak(&kvm->mmu_lock)) {
+                       kvm_mmu_commit_zap_page(kvm, &invalid_list);
+                       cond_resched_lock(&kvm->mmu_lock);
+                       goto restart;
+               }
+
+               if (__kvm_mmu_prepare_zap_page(kvm, sp, &invalid_list, &ign))
+                       goto restart;
+       }
+
+       kvm_mmu_commit_zap_page(kvm, &invalid_list);
+}
+
+/*
+ * Fast invalidate all shadow pages and use lock-break technique
+ * to zap obsolete pages.
+ *
+ * It's required when memslot is being deleted or VM is being
+ * destroyed, in these cases, we should ensure that KVM MMU does
+ * not use any resource of the being-deleted slot or all slots
+ * after calling the function.
+ */
+static void kvm_mmu_zap_all_fast(struct kvm *kvm)
+{
+       spin_lock(&kvm->mmu_lock);
+       kvm->arch.mmu_valid_gen++;
+
+       kvm_zap_obsolete_pages(kvm);
+       spin_unlock(&kvm->mmu_lock);
+}
+
  static void kvm_mmu_invalidate_zap_pages_in_memslot(struct kvm *kvm,
                         struct kvm_memory_slot *slot,
                         struct kvm_page_track_notifier_node *node)
  {
-       kvm_mmu_zap_all(kvm);
+       kvm_mmu_zap_all_fast(kvm);
  }
  
  void kvm_mmu_init_vm(struct kvm *kvm)
author	Sean Christopherson <sean.j.christopherson@intel.com>
	Fri, 13 Sep 2019 02:46:02 +0000 (19:46 -0700)
committer	Paolo Bonzini <pbonzini@redhat.com>
	Sat, 14 Sep 2019 07:25:11 +0000 (09:25 +0200)
arch/x86/include/asm/kvm_host.h		patch \| blob \| history
arch/x86/kvm/mmu.c		patch \| blob \| history