kvm: Nested KVM MMUs need PAE root too
authorJiří Paleček <jpalecek@web.de>
Sat, 22 Jun 2019 17:42:04 +0000 (19:42 +0200)
committerPaolo Bonzini <pbonzini@redhat.com>
Wed, 11 Sep 2019 16:02:25 +0000 (18:02 +0200)
On AMD processors, in PAE 32bit mode, nested KVM instances don't
work. The L0 host get a kernel OOPS, which is related to
arch.mmu->pae_root being NULL.

The reason for this is that when setting up nested KVM instance,
arch.mmu is set to &arch.guest_mmu (while normally, it would be
&arch.root_mmu). However, the initialization and allocation of
pae_root only creates it in root_mmu. KVM code (ie. in
mmu_alloc_shadow_roots) then accesses arch.mmu->pae_root, which is the
unallocated arch.guest_mmu->pae_root.

This fix just allocates (and frees) pae_root in both guest_mmu and
root_mmu (and also lm_root if it was allocated). The allocation is
subject to previous restrictions ie. it won't allocate anything on
64-bit and AFAIK not on Intel.

Fixes: https://bugzilla.kernel.org/show_bug.cgi?id=203923
Fixes: 14c07ad89f4d ("x86/kvm/mmu: introduce guest_mmu")
Signed-off-by: Jiri Palecek <jpalecek@web.de>
Tested-by: Jiri Palecek <jpalecek@web.de>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
arch/x86/kvm/mmu.c

index 4c45ff0..9086ee4 100644 (file)
@@ -5595,13 +5595,13 @@ slot_handle_leaf(struct kvm *kvm, struct kvm_memory_slot *memslot,
                                 PT_PAGE_TABLE_LEVEL, lock_flush_tlb);
 }
 
-static void free_mmu_pages(struct kvm_vcpu *vcpu)
+static void free_mmu_pages(struct kvm_mmu *mmu)
 {
-       free_page((unsigned long)vcpu->arch.mmu->pae_root);
-       free_page((unsigned long)vcpu->arch.mmu->lm_root);
+       free_page((unsigned long)mmu->pae_root);
+       free_page((unsigned long)mmu->lm_root);
 }
 
-static int alloc_mmu_pages(struct kvm_vcpu *vcpu)
+static int alloc_mmu_pages(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu)
 {
        struct page *page;
        int i;
@@ -5622,9 +5622,9 @@ static int alloc_mmu_pages(struct kvm_vcpu *vcpu)
        if (!page)
                return -ENOMEM;
 
-       vcpu->arch.mmu->pae_root = page_address(page);
+       mmu->pae_root = page_address(page);
        for (i = 0; i < 4; ++i)
-               vcpu->arch.mmu->pae_root[i] = INVALID_PAGE;
+               mmu->pae_root[i] = INVALID_PAGE;
 
        return 0;
 }
@@ -5632,6 +5632,7 @@ static int alloc_mmu_pages(struct kvm_vcpu *vcpu)
 int kvm_mmu_create(struct kvm_vcpu *vcpu)
 {
        uint i;
+       int ret;
 
        vcpu->arch.mmu = &vcpu->arch.root_mmu;
        vcpu->arch.walk_mmu = &vcpu->arch.root_mmu;
@@ -5649,7 +5650,19 @@ int kvm_mmu_create(struct kvm_vcpu *vcpu)
                vcpu->arch.guest_mmu.prev_roots[i] = KVM_MMU_ROOT_INFO_INVALID;
 
        vcpu->arch.nested_mmu.translate_gpa = translate_nested_gpa;
-       return alloc_mmu_pages(vcpu);
+
+       ret = alloc_mmu_pages(vcpu, &vcpu->arch.guest_mmu);
+       if (ret)
+               return ret;
+
+       ret = alloc_mmu_pages(vcpu, &vcpu->arch.root_mmu);
+       if (ret)
+               goto fail_allocate_root;
+
+       return ret;
+ fail_allocate_root:
+       free_mmu_pages(&vcpu->arch.guest_mmu);
+       return ret;
 }
 
 static void kvm_mmu_invalidate_zap_pages_in_memslot(struct kvm *kvm,
@@ -6074,7 +6087,8 @@ unsigned long kvm_mmu_calculate_default_mmu_pages(struct kvm *kvm)
 void kvm_mmu_destroy(struct kvm_vcpu *vcpu)
 {
        kvm_mmu_unload(vcpu);
-       free_mmu_pages(vcpu);
+       free_mmu_pages(&vcpu->arch.root_mmu);
+       free_mmu_pages(&vcpu->arch.guest_mmu);
        mmu_free_memory_caches(vcpu);
 }