KVM: arm/arm64: Properly handle faulting of device mappings
authorMarc Zyngier <maz@kernel.org>
Wed, 11 Dec 2019 16:56:48 +0000 (16:56 +0000)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Tue, 31 Dec 2019 15:46:24 +0000 (16:46 +0100)
commit 6d674e28f642e3ff676fbae2d8d1b872814d32b6 upstream.

A device mapping is normally always mapped at Stage-2, since there
is very little gain in having it faulted in.

Nonetheless, it is possible to end-up in a situation where the device
mapping has been removed from Stage-2 (userspace munmaped the VFIO
region, and the MMU notifier did its job), but present in a userspace
mapping (userpace has mapped it back at the same address). In such
a situation, the device mapping will be demand-paged as the guest
performs memory accesses.

This requires to be careful when dealing with mapping size, cache
management, and to handle potential execution of a device mapping.

Reported-by: Alexandru Elisei <alexandru.elisei@arm.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Tested-by: Alexandru Elisei <alexandru.elisei@arm.com>
Reviewed-by: James Morse <james.morse@arm.com>
Cc: stable@vger.kernel.org
Link: https://lore.kernel.org/r/20191211165651.7889-2-maz@kernel.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
virt/kvm/arm/mmu.c

index 38b4c91..f23c9cd 100644 (file)
@@ -38,6 +38,11 @@ static unsigned long io_map_base;
 #define KVM_S2PTE_FLAG_IS_IOMAP                (1UL << 0)
 #define KVM_S2_FLAG_LOGGING_ACTIVE     (1UL << 1)
 
+static bool is_iomap(unsigned long flags)
+{
+       return flags & KVM_S2PTE_FLAG_IS_IOMAP;
+}
+
 static bool memslot_is_logging(struct kvm_memory_slot *memslot)
 {
        return memslot->dirty_bitmap && !(memslot->flags & KVM_MEM_READONLY);
@@ -1698,6 +1703,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
 
        vma_pagesize = vma_kernel_pagesize(vma);
        if (logging_active ||
+           (vma->vm_flags & VM_PFNMAP) ||
            !fault_supports_stage2_huge_mapping(memslot, hva, vma_pagesize)) {
                force_pte = true;
                vma_pagesize = PAGE_SIZE;
@@ -1760,6 +1766,9 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
                        writable = false;
        }
 
+       if (exec_fault && is_iomap(flags))
+               return -ENOEXEC;
+
        spin_lock(&kvm->mmu_lock);
        if (mmu_notifier_retry(kvm, mmu_seq))
                goto out_unlock;
@@ -1781,7 +1790,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
        if (writable)
                kvm_set_pfn_dirty(pfn);
 
-       if (fault_status != FSC_PERM)
+       if (fault_status != FSC_PERM && !is_iomap(flags))
                clean_dcache_guest_page(pfn, vma_pagesize);
 
        if (exec_fault)
@@ -1948,9 +1957,8 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run)
        if (kvm_is_error_hva(hva) || (write_fault && !writable)) {
                if (is_iabt) {
                        /* Prefetch Abort on I/O address */
-                       kvm_inject_pabt(vcpu, kvm_vcpu_get_hfar(vcpu));
-                       ret = 1;
-                       goto out_unlock;
+                       ret = -ENOEXEC;
+                       goto out;
                }
 
                /*
@@ -1992,6 +2000,11 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run)
        ret = user_mem_abort(vcpu, fault_ipa, memslot, hva, fault_status);
        if (ret == 0)
                ret = 1;
+out:
+       if (ret == -ENOEXEC) {
+               kvm_inject_pabt(vcpu, kvm_vcpu_get_hfar(vcpu));
+               ret = 1;
+       }
 out_unlock:
        srcu_read_unlock(&vcpu->kvm->srcu, idx);
        return ret;