KVM: x86/mmu: BUG() in rmap helpers iff CONFIG_BUG_ON_DATA_CORRUPTION=y
authorSean Christopherson <seanjc@google.com>
Sat, 29 Jul 2023 00:47:22 +0000 (17:47 -0700)
committerPaolo Bonzini <pbonzini@redhat.com>
Thu, 31 Aug 2023 17:48:50 +0000 (13:48 -0400)
Introduce KVM_BUG_ON_DATA_CORRUPTION() and use it in the low-level rmap
helpers to convert the existing BUG()s to WARN_ON_ONCE() when the kernel
is built with CONFIG_BUG_ON_DATA_CORRUPTION=n, i.e. does NOT want to BUG()
on corruption of host kernel data structures.  Environments that don't
have infrastructure to automatically capture crash dumps, i.e. aren't
likely to enable CONFIG_BUG_ON_DATA_CORRUPTION=y, are typically better
served overall by WARN-and-continue behavior (for the kernel, the VM is
dead regardless), as a BUG() while holding mmu_lock all but guarantees
the _best_ case scenario is a panic().

Make the BUG()s conditional instead of removing/replacing them entirely as
there's a non-zero chance (though by no means a guarantee) that the damage
isn't contained to the target VM, e.g. if no rmap is found for a SPTE then
KVM may be double-zapping the SPTE, i.e. has already freed the memory the
SPTE pointed at and thus KVM is reading/writing memory that KVM no longer
owns.

Link: https://lore.kernel.org/all/20221129191237.31447-1-mizhang@google.com
Suggested-by: Mingwei Zhang <mizhang@google.com>
Cc: David Matlack <dmatlack@google.com>
Cc: Jim Mattson <jmattson@google.com>
Reviewed-by: Mingwei Zhang <mizhang@google.com>
Link: https://lore.kernel.org/r/20230729004722.1056172-13-seanjc@google.com
Signed-off-by: Sean Christopherson <seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
arch/x86/kvm/mmu/mmu.c
include/linux/kvm_host.h

index f518dd5..0420944 100644 (file)
@@ -973,7 +973,7 @@ static void pte_list_desc_remove_entry(struct kvm *kvm,
         * when adding an entry and the previous head is full, and heads are
         * removed (this flow) when they become empty.
         */
-       BUG_ON(j < 0);
+       KVM_BUG_ON_DATA_CORRUPTION(j < 0, kvm);
 
        /*
         * Replace the to-be-freed SPTE with the last valid entry from the head
@@ -1004,14 +1004,13 @@ static void pte_list_remove(struct kvm *kvm, u64 *spte,
        struct pte_list_desc *desc;
        int i;
 
-       if (!rmap_head->val) {
-               pr_err("%s: %p 0->BUG\n", __func__, spte);
-               BUG();
-       } else if (!(rmap_head->val & 1)) {
-               if ((u64 *)rmap_head->val != spte) {
-                       pr_err("%s:  %p 1->BUG\n", __func__, spte);
-                       BUG();
-               }
+       if (KVM_BUG_ON_DATA_CORRUPTION(!rmap_head->val, kvm))
+               return;
+
+       if (!(rmap_head->val & 1)) {
+               if (KVM_BUG_ON_DATA_CORRUPTION((u64 *)rmap_head->val != spte, kvm))
+                       return;
+
                rmap_head->val = 0;
        } else {
                desc = (struct pte_list_desc *)(rmap_head->val & ~1ul);
@@ -1025,8 +1024,8 @@ static void pte_list_remove(struct kvm *kvm, u64 *spte,
                        }
                        desc = desc->more;
                }
-               pr_err("%s: %p many->many\n", __func__, spte);
-               BUG();
+
+               KVM_BUG_ON_DATA_CORRUPTION(true, kvm);
        }
 }
 
index 1b583f3..fb6c610 100644 (file)
@@ -867,6 +867,25 @@ static inline void kvm_vm_bugged(struct kvm *kvm)
        unlikely(__ret);                                        \
 })
 
+/*
+ * Note, "data corruption" refers to corruption of host kernel data structures,
+ * not guest data.  Guest data corruption, suspected or confirmed, that is tied
+ * and contained to a single VM should *never* BUG() and potentially panic the
+ * host, i.e. use this variant of KVM_BUG() if and only if a KVM data structure
+ * is corrupted and that corruption can have a cascading effect to other parts
+ * of the hosts and/or to other VMs.
+ */
+#define KVM_BUG_ON_DATA_CORRUPTION(cond, kvm)                  \
+({                                                             \
+       bool __ret = !!(cond);                                  \
+                                                               \
+       if (IS_ENABLED(CONFIG_BUG_ON_DATA_CORRUPTION))          \
+               BUG_ON(__ret);                                  \
+       else if (WARN_ON_ONCE(__ret && !(kvm)->vm_bugged))      \
+               kvm_vm_bugged(kvm);                             \
+       unlikely(__ret);                                        \
+})
+
 static inline void kvm_vcpu_srcu_read_lock(struct kvm_vcpu *vcpu)
 {
 #ifdef CONFIG_PROVE_RCU