Merge tag 'kvmarm-6.4' of git://git.kernel.org/pub/scm/linux/kernel/git/kvmarm/kvmarm...
authorPaolo Bonzini <pbonzini@redhat.com>
Wed, 26 Apr 2023 19:46:52 +0000 (15:46 -0400)
committerPaolo Bonzini <pbonzini@redhat.com>
Wed, 26 Apr 2023 19:46:52 +0000 (15:46 -0400)
KVM/arm64 updates for 6.4

- Numerous fixes for the pathological lock inversion issue that
  plagued KVM/arm64 since... forever.

- New framework allowing SMCCC-compliant hypercalls to be forwarded
  to userspace, hopefully paving the way for some more features
  being moved to VMMs rather than be implemented in the kernel.

- Large rework of the timer code to allow a VM-wide offset to be
  applied to both virtual and physical counters as well as a
  per-timer, per-vcpu offset that complements the global one.
  This last part allows the NV timer code to be implemented on
  top.

- A small set of fixes to make sure that we don't change anything
  affecting the EL1&0 translation regime just after having having
  taken an exception to EL2 until we have executed a DSB. This
  ensures that speculative walks started in EL1&0 have completed.

- The usual selftest fixes and improvements.

33 files changed:
Documentation/virt/kvm/api.rst
arch/arm64/include/asm/kvm_host.h
arch/arm64/kvm/arm.c
arch/arm64/kvm/guest.c
arch/mips/kvm/mips.c
arch/powerpc/include/asm/kvm_ppc.h
arch/powerpc/kvm/book3s_64_mmu_hv.c
arch/powerpc/kvm/book3s_64_vio.c
arch/powerpc/kvm/book3s_hv.c
arch/powerpc/kvm/book3s_pr.c
arch/powerpc/kvm/powerpc.c
arch/riscv/kvm/vm.c
arch/s390/kvm/interrupt.c
arch/s390/kvm/kvm-s390.c
arch/s390/kvm/pci.c
arch/s390/kvm/vsie.c
arch/x86/include/asm/kvm_host.h
arch/x86/kvm/cpuid.c
arch/x86/kvm/mmu/mmu.c
arch/x86/kvm/mmu/mmu_internal.h
arch/x86/kvm/mmu/paging_tmpl.h
arch/x86/kvm/svm/svm.c
arch/x86/kvm/vmx/hyperv.c
arch/x86/kvm/vmx/hyperv.h
arch/x86/kvm/vmx/nested.c
arch/x86/kvm/vmx/vmx.c
arch/x86/kvm/vmx/vmx.h
arch/x86/kvm/vmx/vmx_ops.h
arch/x86/kvm/x86.c
include/linux/kvm_host.h
include/uapi/linux/kvm.h
tools/include/uapi/linux/kvm.h
virt/kvm/kvm_main.c

index 42403df..e54fdfa 100644 (file)
@@ -5645,7 +5645,8 @@ with the KVM_XEN_VCPU_GET_ATTR ioctl.
   };
 
 Copies Memory Tagging Extension (MTE) tags to/from guest tag memory. The
-``guest_ipa`` and ``length`` fields must be ``PAGE_SIZE`` aligned. The ``addr``
+``guest_ipa`` and ``length`` fields must be ``PAGE_SIZE`` aligned.
+``length`` must not be bigger than 2^31 - PAGE_SIZE bytes. The ``addr``
 field must point to a buffer which the tags will be copied to or from.
 
 ``flags`` specifies the direction of copy, either ``KVM_ARM_TAGS_TO_GUEST`` or
index ce75309..b9e3661 100644 (file)
@@ -1023,8 +1023,8 @@ int kvm_arm_vcpu_arch_get_attr(struct kvm_vcpu *vcpu,
 int kvm_arm_vcpu_arch_has_attr(struct kvm_vcpu *vcpu,
                               struct kvm_device_attr *attr);
 
-long kvm_vm_ioctl_mte_copy_tags(struct kvm *kvm,
-                               struct kvm_arm_copy_mte_tags *copy_tags);
+int kvm_vm_ioctl_mte_copy_tags(struct kvm *kvm,
+                              struct kvm_arm_copy_mte_tags *copy_tags);
 int kvm_vm_ioctl_set_counter_offset(struct kvm *kvm,
                                    struct kvm_arm_counter_offset *offset);
 
index bb21d0c..95b715c 100644 (file)
@@ -1499,8 +1499,7 @@ static int kvm_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr)
        }
 }
 
-long kvm_arch_vm_ioctl(struct file *filp,
-                      unsigned int ioctl, unsigned long arg)
+int kvm_arch_vm_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg)
 {
        struct kvm *kvm = filp->private_data;
        void __user *argp = (void __user *)arg;
index faae7ec..20280a5 100644 (file)
@@ -1026,8 +1026,8 @@ int kvm_arm_vcpu_arch_has_attr(struct kvm_vcpu *vcpu,
        return ret;
 }
 
-long kvm_vm_ioctl_mte_copy_tags(struct kvm *kvm,
-                               struct kvm_arm_copy_mte_tags *copy_tags)
+int kvm_vm_ioctl_mte_copy_tags(struct kvm *kvm,
+                              struct kvm_arm_copy_mte_tags *copy_tags)
 {
        gpa_t guest_ipa = copy_tags->guest_ipa;
        size_t length = copy_tags->length;
@@ -1048,6 +1048,10 @@ long kvm_vm_ioctl_mte_copy_tags(struct kvm *kvm,
        if (length & ~PAGE_MASK || guest_ipa & ~PAGE_MASK)
                return -EINVAL;
 
+       /* Lengths above INT_MAX cannot be represented in the return value */
+       if (length > INT_MAX)
+               return -EINVAL;
+
        gfn = gpa_to_gfn(guest_ipa);
 
        mutex_lock(&kvm->slots_lock);
index 36c8991..884be4e 100644 (file)
@@ -993,9 +993,9 @@ void kvm_arch_flush_remote_tlbs_memslot(struct kvm *kvm,
        kvm_flush_remote_tlbs(kvm);
 }
 
-long kvm_arch_vm_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg)
+int kvm_arch_vm_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg)
 {
-       long r;
+       int r;
 
        switch (ioctl) {
        default:
index 6bef23d..5b9f851 100644 (file)
@@ -156,7 +156,7 @@ extern void kvmppc_map_magic(struct kvm_vcpu *vcpu);
 
 extern int kvmppc_allocate_hpt(struct kvm_hpt_info *info, u32 order);
 extern void kvmppc_set_hpt(struct kvm *kvm, struct kvm_hpt_info *info);
-extern long kvmppc_alloc_reset_hpt(struct kvm *kvm, int order);
+extern int kvmppc_alloc_reset_hpt(struct kvm *kvm, int order);
 extern void kvmppc_free_hpt(struct kvm_hpt_info *info);
 extern void kvmppc_rmap_reset(struct kvm *kvm);
 extern void kvmppc_map_vrma(struct kvm_vcpu *vcpu,
@@ -170,7 +170,7 @@ extern int kvmppc_switch_mmu_to_hpt(struct kvm *kvm);
 extern int kvmppc_switch_mmu_to_radix(struct kvm *kvm);
 extern void kvmppc_setup_partition_table(struct kvm *kvm);
 
-extern long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm,
+extern int kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm,
                                struct kvm_create_spapr_tce_64 *args);
 #define kvmppc_ioba_validate(stt, ioba, npages)                         \
                (iommu_tce_check_ioba((stt)->page_shift, (stt)->offset, \
@@ -211,10 +211,10 @@ extern void kvmppc_bookehv_exit(void);
 extern int kvmppc_prepare_to_enter(struct kvm_vcpu *vcpu);
 
 extern int kvm_vm_ioctl_get_htab_fd(struct kvm *kvm, struct kvm_get_htab_fd *);
-extern long kvm_vm_ioctl_resize_hpt_prepare(struct kvm *kvm,
-                                           struct kvm_ppc_resize_hpt *rhpt);
-extern long kvm_vm_ioctl_resize_hpt_commit(struct kvm *kvm,
+extern int kvm_vm_ioctl_resize_hpt_prepare(struct kvm *kvm,
                                           struct kvm_ppc_resize_hpt *rhpt);
+extern int kvm_vm_ioctl_resize_hpt_commit(struct kvm *kvm,
+                                         struct kvm_ppc_resize_hpt *rhpt);
 
 int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu, struct kvm_interrupt *irq);
 
@@ -286,8 +286,8 @@ struct kvmppc_ops {
        int (*emulate_mtspr)(struct kvm_vcpu *vcpu, int sprn, ulong spr_val);
        int (*emulate_mfspr)(struct kvm_vcpu *vcpu, int sprn, ulong *spr_val);
        void (*fast_vcpu_kick)(struct kvm_vcpu *vcpu);
-       long (*arch_vm_ioctl)(struct file *filp, unsigned int ioctl,
-                             unsigned long arg);
+       int (*arch_vm_ioctl)(struct file *filp, unsigned int ioctl,
+                            unsigned long arg);
        int (*hcall_implemented)(unsigned long hcall);
        int (*irq_bypass_add_producer)(struct irq_bypass_consumer *,
                                       struct irq_bypass_producer *);
index 7006bcb..1f4896d 100644 (file)
@@ -124,9 +124,9 @@ void kvmppc_set_hpt(struct kvm *kvm, struct kvm_hpt_info *info)
                 info->virt, (long)info->order, kvm->arch.lpid);
 }
 
-long kvmppc_alloc_reset_hpt(struct kvm *kvm, int order)
+int kvmppc_alloc_reset_hpt(struct kvm *kvm, int order)
 {
-       long err = -EBUSY;
+       int err = -EBUSY;
        struct kvm_hpt_info info;
 
        mutex_lock(&kvm->arch.mmu_setup_lock);
@@ -1468,8 +1468,8 @@ static void resize_hpt_prepare_work(struct work_struct *work)
        mutex_unlock(&kvm->arch.mmu_setup_lock);
 }
 
-long kvm_vm_ioctl_resize_hpt_prepare(struct kvm *kvm,
-                                    struct kvm_ppc_resize_hpt *rhpt)
+int kvm_vm_ioctl_resize_hpt_prepare(struct kvm *kvm,
+                                   struct kvm_ppc_resize_hpt *rhpt)
 {
        unsigned long flags = rhpt->flags;
        unsigned long shift = rhpt->shift;
@@ -1534,13 +1534,13 @@ static void resize_hpt_boot_vcpu(void *opaque)
        /* Nothing to do, just force a KVM exit */
 }
 
-long kvm_vm_ioctl_resize_hpt_commit(struct kvm *kvm,
-                                   struct kvm_ppc_resize_hpt *rhpt)
+int kvm_vm_ioctl_resize_hpt_commit(struct kvm *kvm,
+                                  struct kvm_ppc_resize_hpt *rhpt)
 {
        unsigned long flags = rhpt->flags;
        unsigned long shift = rhpt->shift;
        struct kvm_resize_hpt *resize;
-       long ret;
+       int ret;
 
        if (flags != 0 || kvm_is_radix(kvm))
                return -EINVAL;
index 95e738e..93b695b 100644 (file)
@@ -288,8 +288,8 @@ static const struct file_operations kvm_spapr_tce_fops = {
        .release        = kvm_spapr_tce_release,
 };
 
-long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm,
-                                  struct kvm_create_spapr_tce_64 *args)
+int kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm,
+                                 struct kvm_create_spapr_tce_64 *args)
 {
        struct kvmppc_spapr_tce_table *stt = NULL;
        struct kvmppc_spapr_tce_table *siter;
index 6ba68dd..cd139a1 100644 (file)
@@ -5779,12 +5779,12 @@ static void kvmppc_irq_bypass_del_producer_hv(struct irq_bypass_consumer *cons,
 }
 #endif
 
-static long kvm_arch_vm_ioctl_hv(struct file *filp,
-                                unsigned int ioctl, unsigned long arg)
+static int kvm_arch_vm_ioctl_hv(struct file *filp,
+                               unsigned int ioctl, unsigned long arg)
 {
        struct kvm *kvm __maybe_unused = filp->private_data;
        void __user *argp = (void __user *)arg;
-       long r;
+       int r;
 
        switch (ioctl) {
 
index 9fc4dd8..5908b51 100644 (file)
@@ -2042,8 +2042,8 @@ static int kvmppc_core_check_processor_compat_pr(void)
        return 0;
 }
 
-static long kvm_arch_vm_ioctl_pr(struct file *filp,
-                                unsigned int ioctl, unsigned long arg)
+static int kvm_arch_vm_ioctl_pr(struct file *filp,
+                               unsigned int ioctl, unsigned long arg)
 {
        return -ENOTTY;
 }
index 4c5405f..c0bac9c 100644 (file)
@@ -2371,12 +2371,11 @@ static int kvmppc_get_cpu_char(struct kvm_ppc_cpu_char *cp)
 }
 #endif
 
-long kvm_arch_vm_ioctl(struct file *filp,
-                       unsigned int ioctl, unsigned long arg)
+int kvm_arch_vm_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg)
 {
        struct kvm *kvm __maybe_unused = filp->private_data;
        void __user *argp = (void __user *)arg;
-       long r;
+       int r;
 
        switch (ioctl) {
        case KVM_PPC_GET_PVINFO: {
index 65a964d..c13130a 100644 (file)
@@ -87,8 +87,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
        return r;
 }
 
-long kvm_arch_vm_ioctl(struct file *filp,
-                      unsigned int ioctl, unsigned long arg)
+int kvm_arch_vm_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg)
 {
        return -EINVAL;
 }
index 9250fde..da6dac3 100644 (file)
@@ -305,7 +305,7 @@ static inline u8 gisa_get_ipm_or_restore_iam(struct kvm_s390_gisa_interrupt *gi)
 
 static inline int gisa_in_alert_list(struct kvm_s390_gisa *gisa)
 {
-       return READ_ONCE(gisa->next_alert) != (u32)(u64)gisa;
+       return READ_ONCE(gisa->next_alert) != (u32)virt_to_phys(gisa);
 }
 
 static inline void gisa_set_ipm_gisc(struct kvm_s390_gisa *gisa, u32 gisc)
@@ -3168,7 +3168,7 @@ void kvm_s390_gisa_init(struct kvm *kvm)
        hrtimer_init(&gi->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
        gi->timer.function = gisa_vcpu_kicker;
        memset(gi->origin, 0, sizeof(struct kvm_s390_gisa));
-       gi->origin->next_alert = (u32)(u64)gi->origin;
+       gi->origin->next_alert = (u32)virt_to_phys(gi->origin);
        VM_EVENT(kvm, 3, "gisa 0x%pK initialized", gi->origin);
 }
 
index 39b3656..4c3edcc 100644 (file)
@@ -1989,7 +1989,7 @@ static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
        return ret;
 }
 
-static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
+static int kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
 {
        uint8_t *keys;
        uint64_t hva;
@@ -2037,7 +2037,7 @@ static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
        return r;
 }
 
-static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
+static int kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
 {
        uint8_t *keys;
        uint64_t hva;
@@ -2898,8 +2898,7 @@ static int kvm_s390_vm_mem_op(struct kvm *kvm, struct kvm_s390_mem_op *mop)
        }
 }
 
-long kvm_arch_vm_ioctl(struct file *filp,
-                      unsigned int ioctl, unsigned long arg)
+int kvm_arch_vm_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg)
 {
        struct kvm *kvm = filp->private_data;
        void __user *argp = (void __user *)arg;
index b124d58..7dab00f 100644 (file)
@@ -112,7 +112,7 @@ static int zpci_reset_aipb(u8 nisc)
                return -EINVAL;
 
        aift->sbv = zpci_aif_sbv;
-       aift->gait = (struct zpci_gaite *)zpci_aipb->aipb.gait;
+       aift->gait = phys_to_virt(zpci_aipb->aipb.gait);
 
        return 0;
 }
index b6a0219..8d6b765 100644 (file)
@@ -138,11 +138,15 @@ static int prepare_cpuflags(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
 }
 /* Copy to APCB FORMAT1 from APCB FORMAT0 */
 static int setup_apcb10(struct kvm_vcpu *vcpu, struct kvm_s390_apcb1 *apcb_s,
-                       unsigned long apcb_o, struct kvm_s390_apcb1 *apcb_h)
+                       unsigned long crycb_gpa, struct kvm_s390_apcb1 *apcb_h)
 {
        struct kvm_s390_apcb0 tmp;
+       unsigned long apcb_gpa;
 
-       if (read_guest_real(vcpu, apcb_o, &tmp, sizeof(struct kvm_s390_apcb0)))
+       apcb_gpa = crycb_gpa + offsetof(struct kvm_s390_crypto_cb, apcb0);
+
+       if (read_guest_real(vcpu, apcb_gpa, &tmp,
+                           sizeof(struct kvm_s390_apcb0)))
                return -EFAULT;
 
        apcb_s->apm[0] = apcb_h->apm[0] & tmp.apm[0];
@@ -157,15 +161,19 @@ static int setup_apcb10(struct kvm_vcpu *vcpu, struct kvm_s390_apcb1 *apcb_s,
  * setup_apcb00 - Copy to APCB FORMAT0 from APCB FORMAT0
  * @vcpu: pointer to the virtual CPU
  * @apcb_s: pointer to start of apcb in the shadow crycb
- * @apcb_o: pointer to start of original apcb in the guest2
+ * @crycb_gpa: guest physical address to start of original guest crycb
  * @apcb_h: pointer to start of apcb in the guest1
  *
  * Returns 0 and -EFAULT on error reading guest apcb
  */
 static int setup_apcb00(struct kvm_vcpu *vcpu, unsigned long *apcb_s,
-                       unsigned long apcb_o, unsigned long *apcb_h)
+                       unsigned long crycb_gpa, unsigned long *apcb_h)
 {
-       if (read_guest_real(vcpu, apcb_o, apcb_s,
+       unsigned long apcb_gpa;
+
+       apcb_gpa = crycb_gpa + offsetof(struct kvm_s390_crypto_cb, apcb0);
+
+       if (read_guest_real(vcpu, apcb_gpa, apcb_s,
                            sizeof(struct kvm_s390_apcb0)))
                return -EFAULT;
 
@@ -178,16 +186,20 @@ static int setup_apcb00(struct kvm_vcpu *vcpu, unsigned long *apcb_s,
  * setup_apcb11 - Copy the FORMAT1 APCB from the guest to the shadow CRYCB
  * @vcpu: pointer to the virtual CPU
  * @apcb_s: pointer to start of apcb in the shadow crycb
- * @apcb_o: pointer to start of original guest apcb
+ * @crycb_gpa: guest physical address to start of original guest crycb
  * @apcb_h: pointer to start of apcb in the host
  *
  * Returns 0 and -EFAULT on error reading guest apcb
  */
 static int setup_apcb11(struct kvm_vcpu *vcpu, unsigned long *apcb_s,
-                       unsigned long apcb_o,
+                       unsigned long crycb_gpa,
                        unsigned long *apcb_h)
 {
-       if (read_guest_real(vcpu, apcb_o, apcb_s,
+       unsigned long apcb_gpa;
+
+       apcb_gpa = crycb_gpa + offsetof(struct kvm_s390_crypto_cb, apcb1);
+
+       if (read_guest_real(vcpu, apcb_gpa, apcb_s,
                            sizeof(struct kvm_s390_apcb1)))
                return -EFAULT;
 
@@ -200,7 +212,7 @@ static int setup_apcb11(struct kvm_vcpu *vcpu, unsigned long *apcb_s,
  * setup_apcb - Create a shadow copy of the apcb.
  * @vcpu: pointer to the virtual CPU
  * @crycb_s: pointer to shadow crycb
- * @crycb_o: pointer to original guest crycb
+ * @crycb_gpa: guest physical address of original guest crycb
  * @crycb_h: pointer to the host crycb
  * @fmt_o: format of the original guest crycb.
  * @fmt_h: format of the host crycb.
@@ -211,50 +223,46 @@ static int setup_apcb11(struct kvm_vcpu *vcpu, unsigned long *apcb_s,
  * Return 0 or an error number if the guest and host crycb are incompatible.
  */
 static int setup_apcb(struct kvm_vcpu *vcpu, struct kvm_s390_crypto_cb *crycb_s,
-              const u32 crycb_o,
+              const u32 crycb_gpa,
               struct kvm_s390_crypto_cb *crycb_h,
               int fmt_o, int fmt_h)
 {
-       struct kvm_s390_crypto_cb *crycb;
-
-       crycb = (struct kvm_s390_crypto_cb *) (unsigned long)crycb_o;
-
        switch (fmt_o) {
        case CRYCB_FORMAT2:
-               if ((crycb_o & PAGE_MASK) != ((crycb_o + 256) & PAGE_MASK))
+               if ((crycb_gpa & PAGE_MASK) != ((crycb_gpa + 256) & PAGE_MASK))
                        return -EACCES;
                if (fmt_h != CRYCB_FORMAT2)
                        return -EINVAL;
                return setup_apcb11(vcpu, (unsigned long *)&crycb_s->apcb1,
-                                   (unsigned long) &crycb->apcb1,
+                                   crycb_gpa,
                                    (unsigned long *)&crycb_h->apcb1);
        case CRYCB_FORMAT1:
                switch (fmt_h) {
                case CRYCB_FORMAT2:
                        return setup_apcb10(vcpu, &crycb_s->apcb1,
-                                           (unsigned long) &crycb->apcb0,
+                                           crycb_gpa,
                                            &crycb_h->apcb1);
                case CRYCB_FORMAT1:
                        return setup_apcb00(vcpu,
                                            (unsigned long *) &crycb_s->apcb0,
-                                           (unsigned long) &crycb->apcb0,
+                                           crycb_gpa,
                                            (unsigned long *) &crycb_h->apcb0);
                }
                break;
        case CRYCB_FORMAT0:
-               if ((crycb_o & PAGE_MASK) != ((crycb_o + 32) & PAGE_MASK))
+               if ((crycb_gpa & PAGE_MASK) != ((crycb_gpa + 32) & PAGE_MASK))
                        return -EACCES;
 
                switch (fmt_h) {
                case CRYCB_FORMAT2:
                        return setup_apcb10(vcpu, &crycb_s->apcb1,
-                                           (unsigned long) &crycb->apcb0,
+                                           crycb_gpa,
                                            &crycb_h->apcb1);
                case CRYCB_FORMAT1:
                case CRYCB_FORMAT0:
                        return setup_apcb00(vcpu,
                                            (unsigned long *) &crycb_s->apcb0,
-                                           (unsigned long) &crycb->apcb0,
+                                           crycb_gpa,
                                            (unsigned long *) &crycb_h->apcb0);
                }
        }
index 15bda40..a0e58ca 100644 (file)
@@ -947,23 +947,6 @@ struct kvm_vcpu_arch {
 
        u64 msr_kvm_poll_control;
 
-       /*
-        * Indicates the guest is trying to write a gfn that contains one or
-        * more of the PTEs used to translate the write itself, i.e. the access
-        * is changing its own translation in the guest page tables.  KVM exits
-        * to userspace if emulation of the faulting instruction fails and this
-        * flag is set, as KVM cannot make forward progress.
-        *
-        * If emulation fails for a write to guest page tables, KVM unprotects
-        * (zaps) the shadow page for the target gfn and resumes the guest to
-        * retry the non-emulatable instruction (on hardware).  Unprotecting the
-        * gfn doesn't allow forward progress for a self-changing access because
-        * doing so also zaps the translation for the gfn, i.e. retrying the
-        * instruction will hit a !PRESENT fault, which results in a new shadow
-        * page and sends KVM back to square one.
-        */
-       bool write_fault_to_shadow_pgtable;
-
        /* set at EPT violation at this point */
        unsigned long exit_qualification;
 
@@ -1907,6 +1890,25 @@ u64 vcpu_tsc_khz(struct kvm_vcpu *vcpu);
  * EMULTYPE_COMPLETE_USER_EXIT - Set when the emulator should update interruptibility
  *                              state and inject single-step #DBs after skipping
  *                              an instruction (after completing userspace I/O).
+ *
+ * EMULTYPE_WRITE_PF_TO_SP - Set when emulating an intercepted page fault that
+ *                          is attempting to write a gfn that contains one or
+ *                          more of the PTEs used to translate the write itself,
+ *                          and the owning page table is being shadowed by KVM.
+ *                          If emulation of the faulting instruction fails and
+ *                          this flag is set, KVM will exit to userspace instead
+ *                          of retrying emulation as KVM cannot make forward
+ *                          progress.
+ *
+ *                          If emulation fails for a write to guest page tables,
+ *                          KVM unprotects (zaps) the shadow page for the target
+ *                          gfn and resumes the guest to retry the non-emulatable
+ *                          instruction (on hardware).  Unprotecting the gfn
+ *                          doesn't allow forward progress for a self-changing
+ *                          access because doing so also zaps the translation for
+ *                          the gfn, i.e. retrying the instruction will hit a
+ *                          !PRESENT fault, which results in a new shadow page
+ *                          and sends KVM back to square one.
  */
 #define EMULTYPE_NO_DECODE         (1 << 0)
 #define EMULTYPE_TRAP_UD           (1 << 1)
@@ -1916,6 +1918,7 @@ u64 vcpu_tsc_khz(struct kvm_vcpu *vcpu);
 #define EMULTYPE_VMWARE_GP         (1 << 5)
 #define EMULTYPE_PF                (1 << 6)
 #define EMULTYPE_COMPLETE_USER_EXIT (1 << 7)
+#define EMULTYPE_WRITE_PF_TO_SP            (1 << 8)
 
 int kvm_emulate_instruction(struct kvm_vcpu *vcpu, int emulation_type);
 int kvm_emulate_instruction_from_buffer(struct kvm_vcpu *vcpu,
index 599aebe..9583a11 100644 (file)
@@ -653,7 +653,7 @@ void kvm_set_cpu_caps(void)
                F(SPEC_CTRL_SSBD) | F(ARCH_CAPABILITIES) | F(INTEL_STIBP) |
                F(MD_CLEAR) | F(AVX512_VP2INTERSECT) | F(FSRM) |
                F(SERIALIZE) | F(TSXLDTRK) | F(AVX512_FP16) |
-               F(AMX_TILE) | F(AMX_INT8) | F(AMX_BF16)
+               F(AMX_TILE) | F(AMX_INT8) | F(AMX_BF16) | F(FLUSH_L1D)
        );
 
        /* TSC_ADJUST and ARCH_CAPABILITIES are emulated in software. */
index c8ebe54..144c5a0 100644 (file)
@@ -4203,7 +4203,7 @@ void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu, struct kvm_async_pf *work)
              work->arch.cr3 != vcpu->arch.mmu->get_guest_pgd(vcpu))
                return;
 
-       kvm_mmu_do_page_fault(vcpu, work->cr2_or_gpa, 0, true);
+       kvm_mmu_do_page_fault(vcpu, work->cr2_or_gpa, 0, true, NULL);
 }
 
 static int __kvm_faultin_pfn(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault)
@@ -5664,7 +5664,8 @@ int noinline kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, u64 err
 
        if (r == RET_PF_INVALID) {
                r = kvm_mmu_do_page_fault(vcpu, cr2_or_gpa,
-                                         lower_32_bits(error_code), false);
+                                         lower_32_bits(error_code), false,
+                                         &emulation_type);
                if (KVM_BUG_ON(r == RET_PF_INVALID, vcpu->kvm))
                        return -EIO;
        }
index cc58631..2cbb155 100644 (file)
@@ -240,6 +240,13 @@ struct kvm_page_fault {
        kvm_pfn_t pfn;
        hva_t hva;
        bool map_writable;
+
+       /*
+        * Indicates the guest is trying to write a gfn that contains one or
+        * more of the PTEs used to translate the write itself, i.e. the access
+        * is changing its own translation in the guest page tables.
+        */
+       bool write_fault_to_shadow_pgtable;
 };
 
 int kvm_tdp_page_fault(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault);
@@ -273,7 +280,7 @@ enum {
 };
 
 static inline int kvm_mmu_do_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
-                                       u32 err, bool prefetch)
+                                       u32 err, bool prefetch, int *emulation_type)
 {
        struct kvm_page_fault fault = {
                .addr = cr2_or_gpa,
@@ -312,6 +319,9 @@ static inline int kvm_mmu_do_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
        else
                r = vcpu->arch.mmu->page_fault(vcpu, &fault);
 
+       if (fault.write_fault_to_shadow_pgtable && emulation_type)
+               *emulation_type |= EMULTYPE_WRITE_PF_TO_SP;
+
        /*
         * Similar to above, prefetch faults aren't truly spurious, and the
         * async #PF path doesn't do emulation.  Do count faults that are fixed
index 57f0b75..a056f27 100644 (file)
@@ -685,8 +685,17 @@ static int FNAME(fetch)(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault,
 
                if (sp != ERR_PTR(-EEXIST))
                        link_shadow_page(vcpu, it.sptep, sp);
+
+               if (fault->write && table_gfn == fault->gfn)
+                       fault->write_fault_to_shadow_pgtable = true;
        }
 
+       /*
+        * Adjust the hugepage size _after_ resolving indirect shadow pages.
+        * KVM doesn't support mapping hugepages into the guest for gfns that
+        * are being shadowed by KVM, i.e. allocating a new shadow page may
+        * affect the allowed hugepage size.
+        */
        kvm_mmu_hugepage_adjust(vcpu, fault);
 
        trace_kvm_mmu_spte_requested(fault);
@@ -731,46 +740,6 @@ out_gpte_changed:
        return RET_PF_RETRY;
 }
 
- /*
- * To see whether the mapped gfn can write its page table in the current
- * mapping.
- *
- * It is the helper function of FNAME(page_fault). When guest uses large page
- * size to map the writable gfn which is used as current page table, we should
- * force kvm to use small page size to map it because new shadow page will be
- * created when kvm establishes shadow page table that stop kvm using large
- * page size. Do it early can avoid unnecessary #PF and emulation.
- *
- * @write_fault_to_shadow_pgtable will return true if the fault gfn is
- * currently used as its page table.
- *
- * Note: the PDPT page table is not checked for PAE-32 bit guest. It is ok
- * since the PDPT is always shadowed, that means, we can not use large page
- * size to map the gfn which is used as PDPT.
- */
-static bool
-FNAME(is_self_change_mapping)(struct kvm_vcpu *vcpu,
-                             struct guest_walker *walker, bool user_fault,
-                             bool *write_fault_to_shadow_pgtable)
-{
-       int level;
-       gfn_t mask = ~(KVM_PAGES_PER_HPAGE(walker->level) - 1);
-       bool self_changed = false;
-
-       if (!(walker->pte_access & ACC_WRITE_MASK ||
-           (!is_cr0_wp(vcpu->arch.mmu) && !user_fault)))
-               return false;
-
-       for (level = walker->level; level <= walker->max_level; level++) {
-               gfn_t gfn = walker->gfn ^ walker->table_gfn[level - 1];
-
-               self_changed |= !(gfn & mask);
-               *write_fault_to_shadow_pgtable |= !gfn;
-       }
-
-       return self_changed;
-}
-
 /*
  * Page fault handler.  There are several causes for a page fault:
  *   - there is no shadow pte for the guest pte
@@ -789,7 +758,6 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault
 {
        struct guest_walker walker;
        int r;
-       bool is_self_change_mapping;
 
        pgprintk("%s: addr %lx err %x\n", __func__, fault->addr, fault->error_code);
        WARN_ON_ONCE(fault->is_tdp);
@@ -814,6 +782,7 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault
        }
 
        fault->gfn = walker.gfn;
+       fault->max_level = walker.level;
        fault->slot = kvm_vcpu_gfn_to_memslot(vcpu, fault->gfn);
 
        if (page_fault_handle_page_track(vcpu, fault)) {
@@ -825,16 +794,6 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault
        if (r)
                return r;
 
-       vcpu->arch.write_fault_to_shadow_pgtable = false;
-
-       is_self_change_mapping = FNAME(is_self_change_mapping)(vcpu,
-             &walker, fault->user, &vcpu->arch.write_fault_to_shadow_pgtable);
-
-       if (is_self_change_mapping)
-               fault->max_level = PG_LEVEL_4K;
-       else
-               fault->max_level = walker.level;
-
        r = kvm_faultin_pfn(vcpu, fault, walker.pte_access);
        if (r != RET_PF_CONTINUE)
                return r;
index 252e7f3..57f241c 100644 (file)
@@ -95,6 +95,7 @@ static const struct svm_direct_access_msrs {
 #endif
        { .index = MSR_IA32_SPEC_CTRL,                  .always = false },
        { .index = MSR_IA32_PRED_CMD,                   .always = false },
+       { .index = MSR_IA32_FLUSH_CMD,                  .always = false },
        { .index = MSR_IA32_LASTBRANCHFROMIP,           .always = false },
        { .index = MSR_IA32_LASTBRANCHTOIP,             .always = false },
        { .index = MSR_IA32_LASTINTFROMIP,              .always = false },
@@ -2872,7 +2873,7 @@ static int svm_set_vm_cr(struct kvm_vcpu *vcpu, u64 data)
 static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
 {
        struct vcpu_svm *svm = to_svm(vcpu);
-       int r;
+       int ret = 0;
 
        u32 ecx = msr->index;
        u64 data = msr->data;
@@ -2942,21 +2943,6 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
                 */
                set_msr_interception(vcpu, svm->msrpm, MSR_IA32_SPEC_CTRL, 1, 1);
                break;
-       case MSR_IA32_PRED_CMD:
-               if (!msr->host_initiated &&
-                   !guest_has_pred_cmd_msr(vcpu))
-                       return 1;
-
-               if (data & ~PRED_CMD_IBPB)
-                       return 1;
-               if (!boot_cpu_has(X86_FEATURE_IBPB))
-                       return 1;
-               if (!data)
-                       break;
-
-               wrmsrl(MSR_IA32_PRED_CMD, PRED_CMD_IBPB);
-               set_msr_interception(vcpu, svm->msrpm, MSR_IA32_PRED_CMD, 0, 1);
-               break;
        case MSR_AMD64_VIRT_SPEC_CTRL:
                if (!msr->host_initiated &&
                    !guest_cpuid_has(vcpu, X86_FEATURE_VIRT_SSBD))
@@ -3009,10 +2995,10 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
                 * guest via direct_access_msrs, and switch it via user return.
                 */
                preempt_disable();
-               r = kvm_set_user_return_msr(tsc_aux_uret_slot, data, -1ull);
+               ret = kvm_set_user_return_msr(tsc_aux_uret_slot, data, -1ull);
                preempt_enable();
-               if (r)
-                       return 1;
+               if (ret)
+                       break;
 
                svm->tsc_aux = data;
                break;
@@ -3070,7 +3056,7 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
        default:
                return kvm_set_msr_common(vcpu, msr);
        }
-       return 0;
+       return ret;
 }
 
 static int msr_interception(struct kvm_vcpu *vcpu)
@@ -4151,6 +4137,14 @@ static void svm_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu)
 
        svm_recalc_instruction_intercepts(vcpu, svm);
 
+       if (boot_cpu_has(X86_FEATURE_IBPB))
+               set_msr_interception(vcpu, svm->msrpm, MSR_IA32_PRED_CMD, 0,
+                                    !!guest_has_pred_cmd_msr(vcpu));
+
+       if (boot_cpu_has(X86_FEATURE_FLUSH_L1D))
+               set_msr_interception(vcpu, svm->msrpm, MSR_IA32_FLUSH_CMD, 0,
+                                    !!guest_cpuid_has(vcpu, X86_FEATURE_FLUSH_L1D));
+
        /* For sev guests, the memory encryption bit is not reserved in CR3.  */
        if (sev_guest(vcpu->kvm)) {
                best = kvm_find_cpuid_entry(vcpu, 0x8000001F);
index 22daca7..79450e1 100644 (file)
 
 #define CC KVM_NESTED_VMENTER_CONSISTENCY_CHECK
 
-DEFINE_STATIC_KEY_FALSE(enable_evmcs);
+/*
+ * Enlightened VMCSv1 doesn't support these:
+ *
+ *     POSTED_INTR_NV                  = 0x00000002,
+ *     GUEST_INTR_STATUS               = 0x00000810,
+ *     APIC_ACCESS_ADDR                = 0x00002014,
+ *     POSTED_INTR_DESC_ADDR           = 0x00002016,
+ *     EOI_EXIT_BITMAP0                = 0x0000201c,
+ *     EOI_EXIT_BITMAP1                = 0x0000201e,
+ *     EOI_EXIT_BITMAP2                = 0x00002020,
+ *     EOI_EXIT_BITMAP3                = 0x00002022,
+ *     GUEST_PML_INDEX                 = 0x00000812,
+ *     PML_ADDRESS                     = 0x0000200e,
+ *     VM_FUNCTION_CONTROL             = 0x00002018,
+ *     EPTP_LIST_ADDRESS               = 0x00002024,
+ *     VMREAD_BITMAP                   = 0x00002026,
+ *     VMWRITE_BITMAP                  = 0x00002028,
+ *
+ *     TSC_MULTIPLIER                  = 0x00002032,
+ *     PLE_GAP                         = 0x00004020,
+ *     PLE_WINDOW                      = 0x00004022,
+ *     VMX_PREEMPTION_TIMER_VALUE      = 0x0000482E,
+ *
+ * Currently unsupported in KVM:
+ *     GUEST_IA32_RTIT_CTL             = 0x00002814,
+ */
+#define EVMCS1_SUPPORTED_PINCTRL                                       \
+       (PIN_BASED_ALWAYSON_WITHOUT_TRUE_MSR |                          \
+        PIN_BASED_EXT_INTR_MASK |                                      \
+        PIN_BASED_NMI_EXITING |                                        \
+        PIN_BASED_VIRTUAL_NMIS)
+
+#define EVMCS1_SUPPORTED_EXEC_CTRL                                     \
+       (CPU_BASED_ALWAYSON_WITHOUT_TRUE_MSR |                          \
+        CPU_BASED_HLT_EXITING |                                        \
+        CPU_BASED_CR3_LOAD_EXITING |                                   \
+        CPU_BASED_CR3_STORE_EXITING |                                  \
+        CPU_BASED_UNCOND_IO_EXITING |                                  \
+        CPU_BASED_MOV_DR_EXITING |                                     \
+        CPU_BASED_USE_TSC_OFFSETTING |                                 \
+        CPU_BASED_MWAIT_EXITING |                                      \
+        CPU_BASED_MONITOR_EXITING |                                    \
+        CPU_BASED_INVLPG_EXITING |                                     \
+        CPU_BASED_RDPMC_EXITING |                                      \
+        CPU_BASED_INTR_WINDOW_EXITING |                                \
+        CPU_BASED_CR8_LOAD_EXITING |                                   \
+        CPU_BASED_CR8_STORE_EXITING |                                  \
+        CPU_BASED_RDTSC_EXITING |                                      \
+        CPU_BASED_TPR_SHADOW |                                         \
+        CPU_BASED_USE_IO_BITMAPS |                                     \
+        CPU_BASED_MONITOR_TRAP_FLAG |                                  \
+        CPU_BASED_USE_MSR_BITMAPS |                                    \
+        CPU_BASED_NMI_WINDOW_EXITING |                                 \
+        CPU_BASED_PAUSE_EXITING |                                      \
+        CPU_BASED_ACTIVATE_SECONDARY_CONTROLS)
+
+#define EVMCS1_SUPPORTED_2NDEXEC                                       \
+       (SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE |                        \
+        SECONDARY_EXEC_WBINVD_EXITING |                                \
+        SECONDARY_EXEC_ENABLE_VPID |                                   \
+        SECONDARY_EXEC_ENABLE_EPT |                                    \
+        SECONDARY_EXEC_UNRESTRICTED_GUEST |                            \
+        SECONDARY_EXEC_DESC |                                          \
+        SECONDARY_EXEC_ENABLE_RDTSCP |                                 \
+        SECONDARY_EXEC_ENABLE_INVPCID |                                \
+        SECONDARY_EXEC_XSAVES |                                        \
+        SECONDARY_EXEC_RDSEED_EXITING |                                \
+        SECONDARY_EXEC_RDRAND_EXITING |                                \
+        SECONDARY_EXEC_TSC_SCALING |                                   \
+        SECONDARY_EXEC_ENABLE_USR_WAIT_PAUSE |                         \
+        SECONDARY_EXEC_PT_USE_GPA |                                    \
+        SECONDARY_EXEC_PT_CONCEAL_VMX |                                \
+        SECONDARY_EXEC_BUS_LOCK_DETECTION |                            \
+        SECONDARY_EXEC_NOTIFY_VM_EXITING |                             \
+        SECONDARY_EXEC_ENCLS_EXITING)
+
+#define EVMCS1_SUPPORTED_3RDEXEC (0ULL)
+
+#define EVMCS1_SUPPORTED_VMEXIT_CTRL                                   \
+       (VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR |                            \
+        VM_EXIT_SAVE_DEBUG_CONTROLS |                                  \
+        VM_EXIT_ACK_INTR_ON_EXIT |                                     \
+        VM_EXIT_HOST_ADDR_SPACE_SIZE |                                 \
+        VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL |                           \
+        VM_EXIT_SAVE_IA32_PAT |                                        \
+        VM_EXIT_LOAD_IA32_PAT |                                        \
+        VM_EXIT_SAVE_IA32_EFER |                                       \
+        VM_EXIT_LOAD_IA32_EFER |                                       \
+        VM_EXIT_CLEAR_BNDCFGS |                                        \
+        VM_EXIT_PT_CONCEAL_PIP |                                       \
+        VM_EXIT_CLEAR_IA32_RTIT_CTL)
+
+#define EVMCS1_SUPPORTED_VMENTRY_CTRL                                  \
+       (VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR |                           \
+        VM_ENTRY_LOAD_DEBUG_CONTROLS |                                 \
+        VM_ENTRY_IA32E_MODE |                                          \
+        VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL |                          \
+        VM_ENTRY_LOAD_IA32_PAT |                                       \
+        VM_ENTRY_LOAD_IA32_EFER |                                      \
+        VM_ENTRY_LOAD_BNDCFGS |                                        \
+        VM_ENTRY_PT_CONCEAL_PIP |                                      \
+        VM_ENTRY_LOAD_IA32_RTIT_CTL)
+
+#define EVMCS1_SUPPORTED_VMFUNC (0)
 
 #define EVMCS1_OFFSET(x) offsetof(struct hv_enlightened_vmcs, x)
 #define EVMCS1_FIELD(number, name, clean_field)[ROL16(number, 6)] = \
@@ -506,6 +609,8 @@ int nested_evmcs_check_controls(struct vmcs12 *vmcs12)
 }
 
 #if IS_ENABLED(CONFIG_HYPERV)
+DEFINE_STATIC_KEY_FALSE(__kvm_is_using_evmcs);
+
 /*
  * KVM on Hyper-V always uses the latest known eVMCSv1 revision, the assumption
  * is: in case a feature has corresponding fields in eVMCS described and it was
index 78d1766..9623fe1 100644 (file)
 
 struct vmcs_config;
 
-DECLARE_STATIC_KEY_FALSE(enable_evmcs);
-
 #define current_evmcs ((struct hv_enlightened_vmcs *)this_cpu_read(current_vmcs))
 
 #define KVM_EVMCS_VERSION 1
 
-/*
- * Enlightened VMCSv1 doesn't support these:
- *
- *     POSTED_INTR_NV                  = 0x00000002,
- *     GUEST_INTR_STATUS               = 0x00000810,
- *     APIC_ACCESS_ADDR                = 0x00002014,
- *     POSTED_INTR_DESC_ADDR           = 0x00002016,
- *     EOI_EXIT_BITMAP0                = 0x0000201c,
- *     EOI_EXIT_BITMAP1                = 0x0000201e,
- *     EOI_EXIT_BITMAP2                = 0x00002020,
- *     EOI_EXIT_BITMAP3                = 0x00002022,
- *     GUEST_PML_INDEX                 = 0x00000812,
- *     PML_ADDRESS                     = 0x0000200e,
- *     VM_FUNCTION_CONTROL             = 0x00002018,
- *     EPTP_LIST_ADDRESS               = 0x00002024,
- *     VMREAD_BITMAP                   = 0x00002026,
- *     VMWRITE_BITMAP                  = 0x00002028,
- *
- *     TSC_MULTIPLIER                  = 0x00002032,
- *     PLE_GAP                         = 0x00004020,
- *     PLE_WINDOW                      = 0x00004022,
- *     VMX_PREEMPTION_TIMER_VALUE      = 0x0000482E,
- *
- * Currently unsupported in KVM:
- *     GUEST_IA32_RTIT_CTL             = 0x00002814,
- */
-#define EVMCS1_SUPPORTED_PINCTRL                                       \
-       (PIN_BASED_ALWAYSON_WITHOUT_TRUE_MSR |                          \
-        PIN_BASED_EXT_INTR_MASK |                                      \
-        PIN_BASED_NMI_EXITING |                                        \
-        PIN_BASED_VIRTUAL_NMIS)
-
-#define EVMCS1_SUPPORTED_EXEC_CTRL                                     \
-       (CPU_BASED_ALWAYSON_WITHOUT_TRUE_MSR |                          \
-        CPU_BASED_HLT_EXITING |                                        \
-        CPU_BASED_CR3_LOAD_EXITING |                                   \
-        CPU_BASED_CR3_STORE_EXITING |                                  \
-        CPU_BASED_UNCOND_IO_EXITING |                                  \
-        CPU_BASED_MOV_DR_EXITING |                                     \
-        CPU_BASED_USE_TSC_OFFSETTING |                                 \
-        CPU_BASED_MWAIT_EXITING |                                      \
-        CPU_BASED_MONITOR_EXITING |                                    \
-        CPU_BASED_INVLPG_EXITING |                                     \
-        CPU_BASED_RDPMC_EXITING |                                      \
-        CPU_BASED_INTR_WINDOW_EXITING |                                \
-        CPU_BASED_CR8_LOAD_EXITING |                                   \
-        CPU_BASED_CR8_STORE_EXITING |                                  \
-        CPU_BASED_RDTSC_EXITING |                                      \
-        CPU_BASED_TPR_SHADOW |                                         \
-        CPU_BASED_USE_IO_BITMAPS |                                     \
-        CPU_BASED_MONITOR_TRAP_FLAG |                                  \
-        CPU_BASED_USE_MSR_BITMAPS |                                    \
-        CPU_BASED_NMI_WINDOW_EXITING |                                 \
-        CPU_BASED_PAUSE_EXITING |                                      \
-        CPU_BASED_ACTIVATE_SECONDARY_CONTROLS)
-
-#define EVMCS1_SUPPORTED_2NDEXEC                                       \
-       (SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE |                        \
-        SECONDARY_EXEC_WBINVD_EXITING |                                \
-        SECONDARY_EXEC_ENABLE_VPID |                                   \
-        SECONDARY_EXEC_ENABLE_EPT |                                    \
-        SECONDARY_EXEC_UNRESTRICTED_GUEST |                            \
-        SECONDARY_EXEC_DESC |                                          \
-        SECONDARY_EXEC_ENABLE_RDTSCP |                                 \
-        SECONDARY_EXEC_ENABLE_INVPCID |                                \
-        SECONDARY_EXEC_XSAVES |                                        \
-        SECONDARY_EXEC_RDSEED_EXITING |                                \
-        SECONDARY_EXEC_RDRAND_EXITING |                                \
-        SECONDARY_EXEC_TSC_SCALING |                                   \
-        SECONDARY_EXEC_ENABLE_USR_WAIT_PAUSE |                         \
-        SECONDARY_EXEC_PT_USE_GPA |                                    \
-        SECONDARY_EXEC_PT_CONCEAL_VMX |                                \
-        SECONDARY_EXEC_BUS_LOCK_DETECTION |                            \
-        SECONDARY_EXEC_NOTIFY_VM_EXITING |                             \
-        SECONDARY_EXEC_ENCLS_EXITING)
-
-#define EVMCS1_SUPPORTED_3RDEXEC (0ULL)
-
-#define EVMCS1_SUPPORTED_VMEXIT_CTRL                                   \
-       (VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR |                            \
-        VM_EXIT_SAVE_DEBUG_CONTROLS |                                  \
-        VM_EXIT_ACK_INTR_ON_EXIT |                                     \
-        VM_EXIT_HOST_ADDR_SPACE_SIZE |                                 \
-        VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL |                           \
-        VM_EXIT_SAVE_IA32_PAT |                                        \
-        VM_EXIT_LOAD_IA32_PAT |                                        \
-        VM_EXIT_SAVE_IA32_EFER |                                       \
-        VM_EXIT_LOAD_IA32_EFER |                                       \
-        VM_EXIT_CLEAR_BNDCFGS |                                        \
-        VM_EXIT_PT_CONCEAL_PIP |                                       \
-        VM_EXIT_CLEAR_IA32_RTIT_CTL)
-
-#define EVMCS1_SUPPORTED_VMENTRY_CTRL                                  \
-       (VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR |                           \
-        VM_ENTRY_LOAD_DEBUG_CONTROLS |                                 \
-        VM_ENTRY_IA32E_MODE |                                          \
-        VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL |                          \
-        VM_ENTRY_LOAD_IA32_PAT |                                       \
-        VM_ENTRY_LOAD_IA32_EFER |                                      \
-        VM_ENTRY_LOAD_BNDCFGS |                                        \
-        VM_ENTRY_PT_CONCEAL_PIP |                                      \
-        VM_ENTRY_LOAD_IA32_RTIT_CTL)
-
-#define EVMCS1_SUPPORTED_VMFUNC (0)
-
 struct evmcs_field {
        u16 offset;
        u16 clean_field;
@@ -174,6 +67,13 @@ static inline u64 evmcs_read_any(struct hv_enlightened_vmcs *evmcs,
 
 #if IS_ENABLED(CONFIG_HYPERV)
 
+DECLARE_STATIC_KEY_FALSE(__kvm_is_using_evmcs);
+
+static __always_inline bool kvm_is_using_evmcs(void)
+{
+       return static_branch_unlikely(&__kvm_is_using_evmcs);
+}
+
 static __always_inline int get_evmcs_offset(unsigned long field,
                                            u16 *clean_field)
 {
@@ -263,6 +163,7 @@ static inline void evmcs_load(u64 phys_addr)
 
 void evmcs_sanitize_exec_ctrls(struct vmcs_config *vmcs_conf);
 #else /* !IS_ENABLED(CONFIG_HYPERV) */
+static __always_inline bool kvm_is_using_evmcs(void) { return false; }
 static __always_inline void evmcs_write64(unsigned long field, u64 value) {}
 static __always_inline void evmcs_write32(unsigned long field, u32 value) {}
 static __always_inline void evmcs_write16(unsigned long field, u16 value) {}
index 1bc2b80..f63b28f 100644 (file)
@@ -654,6 +654,9 @@ static inline bool nested_vmx_prepare_msr_bitmap(struct kvm_vcpu *vcpu,
        nested_vmx_set_intercept_for_msr(vmx, msr_bitmap_l1, msr_bitmap_l0,
                                         MSR_IA32_PRED_CMD, MSR_TYPE_W);
 
+       nested_vmx_set_intercept_for_msr(vmx, msr_bitmap_l1, msr_bitmap_l0,
+                                        MSR_IA32_FLUSH_CMD, MSR_TYPE_W);
+
        kvm_vcpu_unmap(vcpu, &vmx->nested.msr_bitmap_map, false);
 
        vmx->nested.force_msr_bitmap_recalc = false;
index d2d6e1b..56e0c7a 100644 (file)
@@ -164,6 +164,7 @@ module_param(allow_smaller_maxphyaddr, bool, S_IRUGO);
 static u32 vmx_possible_passthrough_msrs[MAX_POSSIBLE_PASSTHROUGH_MSRS] = {
        MSR_IA32_SPEC_CTRL,
        MSR_IA32_PRED_CMD,
+       MSR_IA32_FLUSH_CMD,
        MSR_IA32_TSC,
 #ifdef CONFIG_X86_64
        MSR_FS_BASE,
@@ -579,7 +580,7 @@ static __init void hv_init_evmcs(void)
 
                if (enlightened_vmcs) {
                        pr_info("Using Hyper-V Enlightened VMCS\n");
-                       static_branch_enable(&enable_evmcs);
+                       static_branch_enable(&__kvm_is_using_evmcs);
                }
 
                if (ms_hyperv.nested_features & HV_X64_NESTED_DIRECT_FLUSH)
@@ -595,7 +596,7 @@ static void hv_reset_evmcs(void)
 {
        struct hv_vp_assist_page *vp_ap;
 
-       if (!static_branch_unlikely(&enable_evmcs))
+       if (!kvm_is_using_evmcs())
                return;
 
        /*
@@ -2285,33 +2286,6 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
                if (data & ~(TSX_CTRL_RTM_DISABLE | TSX_CTRL_CPUID_CLEAR))
                        return 1;
                goto find_uret_msr;
-       case MSR_IA32_PRED_CMD:
-               if (!msr_info->host_initiated &&
-                   !guest_has_pred_cmd_msr(vcpu))
-                       return 1;
-
-               if (data & ~PRED_CMD_IBPB)
-                       return 1;
-               if (!boot_cpu_has(X86_FEATURE_IBPB))
-                       return 1;
-               if (!data)
-                       break;
-
-               wrmsrl(MSR_IA32_PRED_CMD, PRED_CMD_IBPB);
-
-               /*
-                * For non-nested:
-                * When it's written (to non-zero) for the first time, pass
-                * it through.
-                *
-                * For nested:
-                * The handling of the MSR bitmap for L2 guests is done in
-                * nested_vmx_prepare_msr_bitmap. We should not touch the
-                * vmcs02.msr_bitmap here since it gets completely overwritten
-                * in the merging.
-                */
-               vmx_disable_intercept_for_msr(vcpu, MSR_IA32_PRED_CMD, MSR_TYPE_W);
-               break;
        case MSR_IA32_CR_PAT:
                if (!kvm_pat_valid(data))
                        return 1;
@@ -2816,8 +2790,7 @@ static int vmx_hardware_enable(void)
         * This can happen if we hot-added a CPU but failed to allocate
         * VP assist page for it.
         */
-       if (static_branch_unlikely(&enable_evmcs) &&
-           !hv_get_vp_assist_page(cpu))
+       if (kvm_is_using_evmcs() && !hv_get_vp_assist_page(cpu))
                return -EFAULT;
 
        intel_pt_handle_vmx(1);
@@ -2869,7 +2842,7 @@ struct vmcs *alloc_vmcs_cpu(bool shadow, int cpu, gfp_t flags)
        memset(vmcs, 0, vmcs_config.size);
 
        /* KVM supports Enlightened VMCS v1 only */
-       if (static_branch_unlikely(&enable_evmcs))
+       if (kvm_is_using_evmcs())
                vmcs->hdr.revision_id = KVM_EVMCS_VERSION;
        else
                vmcs->hdr.revision_id = vmcs_config.revision_id;
@@ -2964,7 +2937,7 @@ static __init int alloc_kvm_area(void)
                 * still be marked with revision_id reported by
                 * physical CPU.
                 */
-               if (static_branch_unlikely(&enable_evmcs))
+               if (kvm_is_using_evmcs())
                        vmcs->hdr.revision_id = vmcs_config.revision_id;
 
                per_cpu(vmxarea, cpu) = vmcs;
@@ -3931,7 +3904,7 @@ static void vmx_msr_bitmap_l01_changed(struct vcpu_vmx *vmx)
         * 'Enlightened MSR Bitmap' feature L0 needs to know that MSR
         * bitmap has changed.
         */
-       if (IS_ENABLED(CONFIG_HYPERV) && static_branch_unlikely(&enable_evmcs)) {
+       if (kvm_is_using_evmcs()) {
                struct hv_enlightened_vmcs *evmcs = (void *)vmx->vmcs01.vmcs;
 
                if (evmcs->hv_enlightenments_control.msr_bitmap)
@@ -7310,7 +7283,7 @@ static fastpath_t vmx_vcpu_run(struct kvm_vcpu *vcpu)
        vmx_vcpu_enter_exit(vcpu, __vmx_vcpu_run_flags(vmx));
 
        /* All fields are clean at this point */
-       if (static_branch_unlikely(&enable_evmcs)) {
+       if (kvm_is_using_evmcs()) {
                current_evmcs->hv_clean_fields |=
                        HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL;
 
@@ -7440,7 +7413,7 @@ static int vmx_vcpu_create(struct kvm_vcpu *vcpu)
         * feature only for vmcs01, KVM currently isn't equipped to realize any
         * performance benefits from enabling it for vmcs02.
         */
-       if (IS_ENABLED(CONFIG_HYPERV) && static_branch_unlikely(&enable_evmcs) &&
+       if (kvm_is_using_evmcs() &&
            (ms_hyperv.nested_features & HV_X64_NESTED_MSR_BITMAP)) {
                struct hv_enlightened_vmcs *evmcs = (void *)vmx->vmcs01.vmcs;
 
@@ -7744,6 +7717,13 @@ static void vmx_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu)
                vmx_set_intercept_for_msr(vcpu, MSR_IA32_XFD_ERR, MSR_TYPE_R,
                                          !guest_cpuid_has(vcpu, X86_FEATURE_XFD));
 
+       if (boot_cpu_has(X86_FEATURE_IBPB))
+               vmx_set_intercept_for_msr(vcpu, MSR_IA32_PRED_CMD, MSR_TYPE_W,
+                                         !guest_has_pred_cmd_msr(vcpu));
+
+       if (boot_cpu_has(X86_FEATURE_FLUSH_L1D))
+               vmx_set_intercept_for_msr(vcpu, MSR_IA32_FLUSH_CMD, MSR_TYPE_W,
+                                         !guest_cpuid_has(vcpu, X86_FEATURE_FLUSH_L1D));
 
        set_cr4_guest_host_mask(vmx);
 
index 2acdc54..cb766f6 100644 (file)
@@ -369,7 +369,7 @@ struct vcpu_vmx {
        struct lbr_desc lbr_desc;
 
        /* Save desired MSR intercept (read: pass-through) state */
-#define MAX_POSSIBLE_PASSTHROUGH_MSRS  15
+#define MAX_POSSIBLE_PASSTHROUGH_MSRS  16
        struct {
                DECLARE_BITMAP(read, MAX_POSSIBLE_PASSTHROUGH_MSRS);
                DECLARE_BITMAP(write, MAX_POSSIBLE_PASSTHROUGH_MSRS);
index db95bde..ce47dc2 100644 (file)
@@ -147,7 +147,7 @@ do_exception:
 static __always_inline u16 vmcs_read16(unsigned long field)
 {
        vmcs_check16(field);
-       if (static_branch_unlikely(&enable_evmcs))
+       if (kvm_is_using_evmcs())
                return evmcs_read16(field);
        return __vmcs_readl(field);
 }
@@ -155,7 +155,7 @@ static __always_inline u16 vmcs_read16(unsigned long field)
 static __always_inline u32 vmcs_read32(unsigned long field)
 {
        vmcs_check32(field);
-       if (static_branch_unlikely(&enable_evmcs))
+       if (kvm_is_using_evmcs())
                return evmcs_read32(field);
        return __vmcs_readl(field);
 }
@@ -163,7 +163,7 @@ static __always_inline u32 vmcs_read32(unsigned long field)
 static __always_inline u64 vmcs_read64(unsigned long field)
 {
        vmcs_check64(field);
-       if (static_branch_unlikely(&enable_evmcs))
+       if (kvm_is_using_evmcs())
                return evmcs_read64(field);
 #ifdef CONFIG_X86_64
        return __vmcs_readl(field);
@@ -175,7 +175,7 @@ static __always_inline u64 vmcs_read64(unsigned long field)
 static __always_inline unsigned long vmcs_readl(unsigned long field)
 {
        vmcs_checkl(field);
-       if (static_branch_unlikely(&enable_evmcs))
+       if (kvm_is_using_evmcs())
                return evmcs_read64(field);
        return __vmcs_readl(field);
 }
@@ -222,7 +222,7 @@ static __always_inline void __vmcs_writel(unsigned long field, unsigned long val
 static __always_inline void vmcs_write16(unsigned long field, u16 value)
 {
        vmcs_check16(field);
-       if (static_branch_unlikely(&enable_evmcs))
+       if (kvm_is_using_evmcs())
                return evmcs_write16(field, value);
 
        __vmcs_writel(field, value);
@@ -231,7 +231,7 @@ static __always_inline void vmcs_write16(unsigned long field, u16 value)
 static __always_inline void vmcs_write32(unsigned long field, u32 value)
 {
        vmcs_check32(field);
-       if (static_branch_unlikely(&enable_evmcs))
+       if (kvm_is_using_evmcs())
                return evmcs_write32(field, value);
 
        __vmcs_writel(field, value);
@@ -240,7 +240,7 @@ static __always_inline void vmcs_write32(unsigned long field, u32 value)
 static __always_inline void vmcs_write64(unsigned long field, u64 value)
 {
        vmcs_check64(field);
-       if (static_branch_unlikely(&enable_evmcs))
+       if (kvm_is_using_evmcs())
                return evmcs_write64(field, value);
 
        __vmcs_writel(field, value);
@@ -252,7 +252,7 @@ static __always_inline void vmcs_write64(unsigned long field, u64 value)
 static __always_inline void vmcs_writel(unsigned long field, unsigned long value)
 {
        vmcs_checkl(field);
-       if (static_branch_unlikely(&enable_evmcs))
+       if (kvm_is_using_evmcs())
                return evmcs_write64(field, value);
 
        __vmcs_writel(field, value);
@@ -262,7 +262,7 @@ static __always_inline void vmcs_clear_bits(unsigned long field, u32 mask)
 {
        BUILD_BUG_ON_MSG(__builtin_constant_p(field) && ((field) & 0x6000) == 0x2000,
                         "vmcs_clear_bits does not support 64-bit fields");
-       if (static_branch_unlikely(&enable_evmcs))
+       if (kvm_is_using_evmcs())
                return evmcs_write32(field, evmcs_read32(field) & ~mask);
 
        __vmcs_writel(field, __vmcs_readl(field) & ~mask);
@@ -272,7 +272,7 @@ static __always_inline void vmcs_set_bits(unsigned long field, u32 mask)
 {
        BUILD_BUG_ON_MSG(__builtin_constant_p(field) && ((field) & 0x6000) == 0x2000,
                         "vmcs_set_bits does not support 64-bit fields");
-       if (static_branch_unlikely(&enable_evmcs))
+       if (kvm_is_using_evmcs())
                return evmcs_write32(field, evmcs_read32(field) | mask);
 
        __vmcs_writel(field, __vmcs_readl(field) | mask);
@@ -289,7 +289,7 @@ static inline void vmcs_load(struct vmcs *vmcs)
 {
        u64 phys_addr = __pa(vmcs);
 
-       if (static_branch_unlikely(&enable_evmcs))
+       if (kvm_is_using_evmcs())
                return evmcs_load(phys_addr);
 
        vmx_asm1(vmptrld, "m"(phys_addr), vmcs, phys_addr);
index 27a1d5c..9cf1c31 100644 (file)
@@ -3617,6 +3617,29 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
                vcpu->arch.perf_capabilities = data;
                kvm_pmu_refresh(vcpu);
                return 0;
+       case MSR_IA32_PRED_CMD:
+               if (!msr_info->host_initiated && !guest_has_pred_cmd_msr(vcpu))
+                       return 1;
+
+               if (!boot_cpu_has(X86_FEATURE_IBPB) || (data & ~PRED_CMD_IBPB))
+                       return 1;
+               if (!data)
+                       break;
+
+               wrmsrl(MSR_IA32_PRED_CMD, PRED_CMD_IBPB);
+               break;
+       case MSR_IA32_FLUSH_CMD:
+               if (!msr_info->host_initiated &&
+                   !guest_cpuid_has(vcpu, X86_FEATURE_FLUSH_L1D))
+                       return 1;
+
+               if (!boot_cpu_has(X86_FEATURE_FLUSH_L1D) || (data & ~L1D_FLUSH))
+                       return 1;
+               if (!data)
+                       break;
+
+               wrmsrl(MSR_IA32_FLUSH_CMD, L1D_FLUSH);
+               break;
        case MSR_EFER:
                return set_efer(vcpu, msr_info);
        case MSR_K7_HWCR:
@@ -6021,11 +6044,6 @@ static int kvm_vm_ioctl_set_nr_mmu_pages(struct kvm *kvm,
        return 0;
 }
 
-static unsigned long kvm_vm_ioctl_get_nr_mmu_pages(struct kvm *kvm)
-{
-       return kvm->arch.n_max_mmu_pages;
-}
-
 static int kvm_vm_ioctl_get_irqchip(struct kvm *kvm, struct kvm_irqchip *chip)
 {
        struct kvm_pic *pic = kvm->arch.vpic;
@@ -6672,8 +6690,7 @@ static int kvm_vm_ioctl_set_clock(struct kvm *kvm, void __user *argp)
        return 0;
 }
 
-long kvm_arch_vm_ioctl(struct file *filp,
-                      unsigned int ioctl, unsigned long arg)
+int kvm_arch_vm_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg)
 {
        struct kvm *kvm = filp->private_data;
        void __user *argp = (void __user *)arg;
@@ -6711,9 +6728,6 @@ set_identity_unlock:
        case KVM_SET_NR_MMU_PAGES:
                r = kvm_vm_ioctl_set_nr_mmu_pages(kvm, arg);
                break;
-       case KVM_GET_NR_MMU_PAGES:
-               r = kvm_vm_ioctl_get_nr_mmu_pages(kvm);
-               break;
        case KVM_CREATE_IRQCHIP: {
                mutex_lock(&kvm->lock);
 
@@ -8463,7 +8477,6 @@ static int handle_emulation_failure(struct kvm_vcpu *vcpu, int emulation_type)
 }
 
 static bool reexecute_instruction(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
-                                 bool write_fault_to_shadow_pgtable,
                                  int emulation_type)
 {
        gpa_t gpa = cr2_or_gpa;
@@ -8534,7 +8547,7 @@ static bool reexecute_instruction(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
         * be fixed by unprotecting shadow page and it should
         * be reported to userspace.
         */
-       return !write_fault_to_shadow_pgtable;
+       return !(emulation_type & EMULTYPE_WRITE_PF_TO_SP);
 }
 
 static bool retry_instruction(struct x86_emulate_ctxt *ctxt,
@@ -8782,20 +8795,12 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
        int r;
        struct x86_emulate_ctxt *ctxt = vcpu->arch.emulate_ctxt;
        bool writeback = true;
-       bool write_fault_to_spt;
 
        if (unlikely(!kvm_can_emulate_insn(vcpu, emulation_type, insn, insn_len)))
                return 1;
 
        vcpu->arch.l1tf_flush_l1d = true;
 
-       /*
-        * Clear write_fault_to_shadow_pgtable here to ensure it is
-        * never reused.
-        */
-       write_fault_to_spt = vcpu->arch.write_fault_to_shadow_pgtable;
-       vcpu->arch.write_fault_to_shadow_pgtable = false;
-
        if (!(emulation_type & EMULTYPE_NO_DECODE)) {
                kvm_clear_exception_queue(vcpu);
 
@@ -8816,7 +8821,6 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
                                return 1;
                        }
                        if (reexecute_instruction(vcpu, cr2_or_gpa,
-                                                 write_fault_to_spt,
                                                  emulation_type))
                                return 1;
 
@@ -8895,8 +8899,7 @@ restart:
                return 1;
 
        if (r == EMULATION_FAILED) {
-               if (reexecute_instruction(vcpu, cr2_or_gpa, write_fault_to_spt,
-                                       emulation_type))
+               if (reexecute_instruction(vcpu, cr2_or_gpa, emulation_type))
                        return 1;
 
                return handle_emulation_failure(vcpu, emulation_type);
index 8ada237..90edc16 100644 (file)
@@ -1397,8 +1397,7 @@ int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_level,
                        bool line_status);
 int kvm_vm_ioctl_enable_cap(struct kvm *kvm,
                            struct kvm_enable_cap *cap);
-long kvm_arch_vm_ioctl(struct file *filp,
-                      unsigned int ioctl, unsigned long arg);
+int kvm_arch_vm_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg);
 long kvm_arch_vm_compat_ioctl(struct file *filp, unsigned int ioctl,
                              unsigned long arg);
 
index 16287a9..737318b 100644 (file)
@@ -1457,7 +1457,7 @@ struct kvm_vfio_spapr_tce {
 #define KVM_CREATE_VCPU           _IO(KVMIO,   0x41)
 #define KVM_GET_DIRTY_LOG         _IOW(KVMIO,  0x42, struct kvm_dirty_log)
 #define KVM_SET_NR_MMU_PAGES      _IO(KVMIO,   0x44)
-#define KVM_GET_NR_MMU_PAGES      _IO(KVMIO,   0x45)
+#define KVM_GET_NR_MMU_PAGES      _IO(KVMIO,   0x45)  /* deprecated */
 #define KVM_SET_USER_MEMORY_REGION _IOW(KVMIO, 0x46, \
                                        struct kvm_userspace_memory_region)
 #define KVM_SET_TSS_ADDR          _IO(KVMIO,   0x47)
index d77aef8..4003a16 100644 (file)
@@ -1451,7 +1451,7 @@ struct kvm_vfio_spapr_tce {
 #define KVM_CREATE_VCPU           _IO(KVMIO,   0x41)
 #define KVM_GET_DIRTY_LOG         _IOW(KVMIO,  0x42, struct kvm_dirty_log)
 #define KVM_SET_NR_MMU_PAGES      _IO(KVMIO,   0x44)
-#define KVM_GET_NR_MMU_PAGES      _IO(KVMIO,   0x45)
+#define KVM_GET_NR_MMU_PAGES      _IO(KVMIO,   0x45)  /* deprecated */
 #define KVM_SET_USER_MEMORY_REGION _IOW(KVMIO, 0x46, \
                                        struct kvm_userspace_memory_region)
 #define KVM_SET_TSS_ADDR          _IO(KVMIO,   0x47)
index d255964..f40b72e 100644 (file)
@@ -4467,7 +4467,7 @@ static int kvm_ioctl_create_device(struct kvm *kvm,
        return 0;
 }
 
-static long kvm_vm_ioctl_check_extension_generic(struct kvm *kvm, long arg)
+static int kvm_vm_ioctl_check_extension_generic(struct kvm *kvm, long arg)
 {
        switch (arg) {
        case KVM_CAP_USER_MEMORY:
@@ -5045,7 +5045,7 @@ put_fd:
 static long kvm_dev_ioctl(struct file *filp,
                          unsigned int ioctl, unsigned long arg)
 {
-       long r = -EINVAL;
+       int r = -EINVAL;
 
        switch (ioctl) {
        case KVM_GET_API_VERSION: