KVM: x86/mmu: Allow enabling/disabling dirty logging under MMU read lock
authorBen Gardon <bgardon@google.com>
Thu, 1 Apr 2021 23:37:34 +0000 (16:37 -0700)
committerPaolo Bonzini <pbonzini@redhat.com>
Mon, 19 Apr 2021 13:06:04 +0000 (09:06 -0400)
To reduce lock contention and interference with page fault handlers,
allow the TDP MMU functions which enable and disable dirty logging
to operate under the MMU read lock.

Signed-off-by: Ben Gardon <bgardon@google.com>
Message-Id: <20210401233736.638171-12-bgardon@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
arch/x86/kvm/mmu/mmu.c
arch/x86/kvm/mmu/tdp_mmu.c

index 2390e8a..61c0d10 100644 (file)
@@ -5532,10 +5532,14 @@ void kvm_mmu_slot_remove_write_access(struct kvm *kvm,
        write_lock(&kvm->mmu_lock);
        flush = slot_handle_level(kvm, memslot, slot_rmap_write_protect,
                                start_level, KVM_MAX_HUGEPAGE_LEVEL, false);
-       if (is_tdp_mmu_enabled(kvm))
-               flush |= kvm_tdp_mmu_wrprot_slot(kvm, memslot, start_level);
        write_unlock(&kvm->mmu_lock);
 
+       if (is_tdp_mmu_enabled(kvm)) {
+               read_lock(&kvm->mmu_lock);
+               flush |= kvm_tdp_mmu_wrprot_slot(kvm, memslot, start_level);
+               read_unlock(&kvm->mmu_lock);
+       }
+
        /*
         * We can flush all the TLBs out of the mmu lock without TLB
         * corruption since we just change the spte from writable to
@@ -5638,10 +5642,14 @@ void kvm_mmu_slot_leaf_clear_dirty(struct kvm *kvm,
 
        write_lock(&kvm->mmu_lock);
        flush = slot_handle_leaf(kvm, memslot, __rmap_clear_dirty, false);
-       if (is_tdp_mmu_enabled(kvm))
-               flush |= kvm_tdp_mmu_clear_dirty_slot(kvm, memslot);
        write_unlock(&kvm->mmu_lock);
 
+       if (is_tdp_mmu_enabled(kvm)) {
+               read_lock(&kvm->mmu_lock);
+               flush |= kvm_tdp_mmu_clear_dirty_slot(kvm, memslot);
+               read_unlock(&kvm->mmu_lock);
+       }
+
        /*
         * It's also safe to flush TLBs out of mmu lock here as currently this
         * function is only used for dirty logging, in which case flushing TLB
index 6a3b7ba..335c126 100644 (file)
@@ -495,8 +495,9 @@ static void handle_changed_spte(struct kvm *kvm, int as_id, gfn_t gfn,
 }
 
 /*
- * tdp_mmu_set_spte_atomic - Set a TDP MMU SPTE atomically and handle the
- * associated bookkeeping
+ * tdp_mmu_set_spte_atomic_no_dirty_log - Set a TDP MMU SPTE atomically
+ * and handle the associated bookkeeping, but do not mark the page dirty
+ * in KVM's dirty bitmaps.
  *
  * @kvm: kvm instance
  * @iter: a tdp_iter instance currently on the SPTE that should be set
@@ -504,9 +505,9 @@ static void handle_changed_spte(struct kvm *kvm, int as_id, gfn_t gfn,
  * Returns: true if the SPTE was set, false if it was not. If false is returned,
  *         this function will have no side-effects.
  */
-static inline bool tdp_mmu_set_spte_atomic(struct kvm *kvm,
-                                          struct tdp_iter *iter,
-                                          u64 new_spte)
+static inline bool tdp_mmu_set_spte_atomic_no_dirty_log(struct kvm *kvm,
+                                                       struct tdp_iter *iter,
+                                                       u64 new_spte)
 {
        lockdep_assert_held_read(&kvm->mmu_lock);
 
@@ -521,12 +522,25 @@ static inline bool tdp_mmu_set_spte_atomic(struct kvm *kvm,
                      new_spte) != iter->old_spte)
                return false;
 
-       handle_changed_spte(kvm, iter->as_id, iter->gfn, iter->old_spte,
-                           new_spte, iter->level, true);
+       __handle_changed_spte(kvm, iter->as_id, iter->gfn, iter->old_spte,
+                             new_spte, iter->level, true);
+       handle_changed_spte_acc_track(iter->old_spte, new_spte, iter->level);
 
        return true;
 }
 
+static inline bool tdp_mmu_set_spte_atomic(struct kvm *kvm,
+                                          struct tdp_iter *iter,
+                                          u64 new_spte)
+{
+       if (!tdp_mmu_set_spte_atomic_no_dirty_log(kvm, iter, new_spte))
+               return false;
+
+       handle_changed_spte_dirty_log(kvm, iter->as_id, iter->gfn,
+                                     iter->old_spte, new_spte, iter->level);
+       return true;
+}
+
 static inline bool tdp_mmu_zap_spte_atomic(struct kvm *kvm,
                                           struct tdp_iter *iter)
 {
@@ -1082,7 +1096,8 @@ static bool wrprot_gfn_range(struct kvm *kvm, struct kvm_mmu_page *root,
 
        for_each_tdp_pte_min_level(iter, root->spt, root->role.level,
                                   min_level, start, end) {
-               if (tdp_mmu_iter_cond_resched(kvm, &iter, false, false))
+retry:
+               if (tdp_mmu_iter_cond_resched(kvm, &iter, false, true))
                        continue;
 
                if (!is_shadow_present_pte(iter.old_spte) ||
@@ -1092,7 +1107,15 @@ static bool wrprot_gfn_range(struct kvm *kvm, struct kvm_mmu_page *root,
 
                new_spte = iter.old_spte & ~PT_WRITABLE_MASK;
 
-               tdp_mmu_set_spte_no_dirty_log(kvm, &iter, new_spte);
+               if (!tdp_mmu_set_spte_atomic_no_dirty_log(kvm, &iter,
+                                                         new_spte)) {
+                       /*
+                        * The iter must explicitly re-read the SPTE because
+                        * the atomic cmpxchg failed.
+                        */
+                       iter.old_spte = READ_ONCE(*rcu_dereference(iter.sptep));
+                       goto retry;
+               }
                spte_set = true;
        }
 
@@ -1111,7 +1134,9 @@ bool kvm_tdp_mmu_wrprot_slot(struct kvm *kvm, struct kvm_memory_slot *slot,
        struct kvm_mmu_page *root;
        bool spte_set = false;
 
-       for_each_tdp_mmu_root_yield_safe(kvm, root, slot->as_id, false)
+       lockdep_assert_held_read(&kvm->mmu_lock);
+
+       for_each_tdp_mmu_root_yield_safe(kvm, root, slot->as_id, true)
                spte_set |= wrprot_gfn_range(kvm, root, slot->base_gfn,
                             slot->base_gfn + slot->npages, min_level);
 
@@ -1135,7 +1160,8 @@ static bool clear_dirty_gfn_range(struct kvm *kvm, struct kvm_mmu_page *root,
        rcu_read_lock();
 
        tdp_root_for_each_leaf_pte(iter, root, start, end) {
-               if (tdp_mmu_iter_cond_resched(kvm, &iter, false, false))
+retry:
+               if (tdp_mmu_iter_cond_resched(kvm, &iter, false, true))
                        continue;
 
                if (spte_ad_need_write_protect(iter.old_spte)) {
@@ -1150,7 +1176,15 @@ static bool clear_dirty_gfn_range(struct kvm *kvm, struct kvm_mmu_page *root,
                                continue;
                }
 
-               tdp_mmu_set_spte_no_dirty_log(kvm, &iter, new_spte);
+               if (!tdp_mmu_set_spte_atomic_no_dirty_log(kvm, &iter,
+                                                         new_spte)) {
+                       /*
+                        * The iter must explicitly re-read the SPTE because
+                        * the atomic cmpxchg failed.
+                        */
+                       iter.old_spte = READ_ONCE(*rcu_dereference(iter.sptep));
+                       goto retry;
+               }
                spte_set = true;
        }
 
@@ -1170,7 +1204,9 @@ bool kvm_tdp_mmu_clear_dirty_slot(struct kvm *kvm, struct kvm_memory_slot *slot)
        struct kvm_mmu_page *root;
        bool spte_set = false;
 
-       for_each_tdp_mmu_root_yield_safe(kvm, root, slot->as_id, false)
+       lockdep_assert_held_read(&kvm->mmu_lock);
+
+       for_each_tdp_mmu_root_yield_safe(kvm, root, slot->as_id, true)
                spte_set |= clear_dirty_gfn_range(kvm, root, slot->base_gfn,
                                slot->base_gfn + slot->npages);