Merge tag 'kvmarm-fixes-5.9-1' of git://git.kernel.org/pub/scm/linux/kernel/git/kvmar...
authorPaolo Bonzini <pbonzini@redhat.com>
Fri, 11 Sep 2020 17:12:11 +0000 (13:12 -0400)
committerPaolo Bonzini <pbonzini@redhat.com>
Fri, 11 Sep 2020 17:12:11 +0000 (13:12 -0400)
KVM/arm64 fixes for Linux 5.9, take #1

- Multiple stolen time fixes, with a new capability to match x86
- Fix for hugetlbfs mappings when PUD and PMD are the same level
- Fix for hugetlbfs mappings when PTE mappings are enforced
  (dirty logging, for example)
- Fix tracing output of 64bit values

1  2 
arch/arm64/include/asm/kvm_host.h
arch/arm64/kvm/mmu.c
arch/x86/kvm/x86.c
virt/kvm/kvm_main.c

@@@ -368,7 -368,6 +368,6 @@@ struct kvm_vcpu_arch 
  
        /* Guest PV state */
        struct {
-               u64 steal;
                u64 last_steal;
                gpa_t base;
        } steal;
@@@ -473,7 -472,7 +472,7 @@@ int __kvm_arm_vcpu_set_events(struct kv
  
  #define KVM_ARCH_WANT_MMU_NOTIFIER
  int kvm_unmap_hva_range(struct kvm *kvm,
 -                      unsigned long start, unsigned long end);
 +                      unsigned long start, unsigned long end, unsigned flags);
  int kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte);
  int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end);
  int kvm_test_age_hva(struct kvm *kvm, unsigned long hva);
@@@ -544,6 -543,7 +543,7 @@@ long kvm_hypercall_pv_features(struct k
  gpa_t kvm_init_stolen_time(struct kvm_vcpu *vcpu);
  void kvm_update_stolen_time(struct kvm_vcpu *vcpu);
  
+ bool kvm_arm_pvtime_supported(void);
  int kvm_arm_pvtime_set_attr(struct kvm_vcpu *vcpu,
                            struct kvm_device_attr *attr);
  int kvm_arm_pvtime_get_attr(struct kvm_vcpu *vcpu,
diff --combined arch/arm64/kvm/mmu.c
@@@ -343,8 -343,7 +343,8 @@@ static void unmap_stage2_p4ds(struct kv
   * destroying the VM), otherwise another faulting VCPU may come in and mess
   * with things behind our backs.
   */
 -static void unmap_stage2_range(struct kvm_s2_mmu *mmu, phys_addr_t start, u64 size)
 +static void __unmap_stage2_range(struct kvm_s2_mmu *mmu, phys_addr_t start, u64 size,
 +                               bool may_block)
  {
        struct kvm *kvm = mmu->kvm;
        pgd_t *pgd;
                 * If the range is too large, release the kvm->mmu_lock
                 * to prevent starvation and lockup detector warnings.
                 */
 -              if (next != end)
 +              if (may_block && next != end)
                        cond_resched_lock(&kvm->mmu_lock);
        } while (pgd++, addr = next, addr != end);
  }
  
 +static void unmap_stage2_range(struct kvm_s2_mmu *mmu, phys_addr_t start, u64 size)
 +{
 +      __unmap_stage2_range(mmu, start, size, true);
 +}
 +
  static void stage2_flush_ptes(struct kvm_s2_mmu *mmu, pmd_t *pmd,
                              phys_addr_t addr, phys_addr_t end)
  {
@@@ -1877,6 -1871,7 +1877,7 @@@ static int user_mem_abort(struct kvm_vc
            !fault_supports_stage2_huge_mapping(memslot, hva, vma_pagesize)) {
                force_pte = true;
                vma_pagesize = PAGE_SIZE;
+               vma_shift = PAGE_SHIFT;
        }
  
        /*
                (fault_status == FSC_PERM &&
                 stage2_is_exec(mmu, fault_ipa, vma_pagesize));
  
-       if (vma_pagesize == PUD_SIZE) {
+       /*
+        * If PUD_SIZE == PMD_SIZE, there is no real PUD level, and
+        * all we have is a 2-level page table. Trying to map a PUD in
+        * this case would be fatally wrong.
+        */
+       if (PUD_SIZE != PMD_SIZE && vma_pagesize == PUD_SIZE) {
                pud_t new_pud = kvm_pfn_pud(pfn, mem_type);
  
                new_pud = kvm_pud_mkhuge(new_pud);
@@@ -2214,21 -2214,18 +2220,21 @@@ static int handle_hva_to_gpa(struct kv
  
  static int kvm_unmap_hva_handler(struct kvm *kvm, gpa_t gpa, u64 size, void *data)
  {
 -      unmap_stage2_range(&kvm->arch.mmu, gpa, size);
 +      unsigned flags = *(unsigned *)data;
 +      bool may_block = flags & MMU_NOTIFIER_RANGE_BLOCKABLE;
 +
 +      __unmap_stage2_range(&kvm->arch.mmu, gpa, size, may_block);
        return 0;
  }
  
  int kvm_unmap_hva_range(struct kvm *kvm,
 -                      unsigned long start, unsigned long end)
 +                      unsigned long start, unsigned long end, unsigned flags)
  {
        if (!kvm->arch.mmu.pgd)
                return 0;
  
        trace_kvm_unmap_hva_range(start, end);
 -      handle_hva_to_gpa(kvm, start, end, &kvm_unmap_hva_handler, NULL);
 +      handle_hva_to_gpa(kvm, start, end, &kvm_unmap_hva_handler, &flags);
        return 0;
  }
  
diff --combined arch/x86/kvm/x86.c
@@@ -975,7 -975,7 +975,7 @@@ int kvm_set_cr4(struct kvm_vcpu *vcpu, 
  {
        unsigned long old_cr4 = kvm_read_cr4(vcpu);
        unsigned long pdptr_bits = X86_CR4_PGE | X86_CR4_PSE | X86_CR4_PAE |
 -                                 X86_CR4_SMEP | X86_CR4_SMAP | X86_CR4_PKE;
 +                                 X86_CR4_SMEP;
  
        if (kvm_valid_cr4(vcpu, cr4))
                return 1;
@@@ -3581,6 -3581,9 +3581,9 @@@ int kvm_vm_ioctl_check_extension(struc
        case KVM_CAP_SMALLER_MAXPHYADDR:
                r = (int) allow_smaller_maxphyaddr;
                break;
+       case KVM_CAP_STEAL_TIME:
+               r = sched_info_on();
+               break;
        default:
                break;
        }
@@@ -10667,11 -10670,17 +10670,17 @@@ int kvm_arch_irq_bypass_add_producer(st
  {
        struct kvm_kernel_irqfd *irqfd =
                container_of(cons, struct kvm_kernel_irqfd, consumer);
+       int ret;
  
        irqfd->producer = prod;
+       kvm_arch_start_assignment(irqfd->kvm);
+       ret = kvm_x86_ops.update_pi_irte(irqfd->kvm,
+                                        prod->irq, irqfd->gsi, 1);
+       if (ret)
+               kvm_arch_end_assignment(irqfd->kvm);
  
-       return kvm_x86_ops.update_pi_irte(irqfd->kvm,
-                                          prod->irq, irqfd->gsi, 1);
+       return ret;
  }
  
  void kvm_arch_irq_bypass_del_producer(struct irq_bypass_consumer *cons,
        if (ret)
                printk(KERN_INFO "irq bypass consumer (token %p) unregistration"
                       " fails: %d\n", irqfd->consumer.token, ret);
+       kvm_arch_end_assignment(irqfd->kvm);
  }
  
  int kvm_arch_update_irqfd_routing(struct kvm *kvm, unsigned int host_irq,
@@@ -10743,11 -10754,9 +10754,11 @@@ EXPORT_SYMBOL_GPL(kvm_spec_ctrl_test_va
  void kvm_fixup_and_inject_pf_error(struct kvm_vcpu *vcpu, gva_t gva, u16 error_code)
  {
        struct x86_exception fault;
 +      u32 access = error_code &
 +              (PFERR_WRITE_MASK | PFERR_FETCH_MASK | PFERR_USER_MASK);
  
        if (!(error_code & PFERR_PRESENT_MASK) ||
 -          vcpu->arch.walk_mmu->gva_to_gpa(vcpu, gva, error_code, &fault) != UNMAPPED_GVA) {
 +          vcpu->arch.walk_mmu->gva_to_gpa(vcpu, gva, access, &fault) != UNMAPPED_GVA) {
                /*
                 * If vcpu->arch.walk_mmu->gva_to_gpa succeeded, the page
                 * tables probably do not match the TLB.  Just proceed
diff --combined virt/kvm/kvm_main.c
@@@ -482,8 -482,7 +482,8 @@@ static int kvm_mmu_notifier_invalidate_
         * count is also read inside the mmu_lock critical section.
         */
        kvm->mmu_notifier_count++;
 -      need_tlb_flush = kvm_unmap_hva_range(kvm, range->start, range->end);
 +      need_tlb_flush = kvm_unmap_hva_range(kvm, range->start, range->end,
 +                                           range->flags);
        need_tlb_flush |= kvm->tlbs_dirty;
        /* we've to flush the tlb before the pages can be freed */
        if (need_tlb_flush)
@@@ -1894,7 -1893,7 +1894,7 @@@ static int hva_to_pfn_remapped(struct v
                 * not call the fault handler, so do it here.
                 */
                bool unlocked = false;
-               r = fixup_user_fault(current, current->mm, addr,
+               r = fixup_user_fault(current->mm, addr,
                                     (write_fault ? FAULT_FLAG_WRITE : 0),
                                     &unlocked);
                if (unlocked)