Merge tag 'kvmarm-6.4' of git://git.kernel.org/pub/scm/linux/kernel/git/kvmarm/kvmarm...

author Paolo Bonzini <pbonzini@redhat.com>

Wed, 26 Apr 2023 19:46:52 +0000 (15:46 -0400)

committer Paolo Bonzini <pbonzini@redhat.com>

Wed, 26 Apr 2023 19:46:52 +0000 (15:46 -0400)
author Paolo Bonzini <pbonzini@redhat.com>
Wed, 26 Apr 2023 19:46:52 +0000 (15:46 -0400)
committer Paolo Bonzini <pbonzini@redhat.com>
Wed, 26 Apr 2023 19:46:52 +0000 (15:46 -0400)
diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst

index 42403df..e54fdfa 100644 (file)
--- a/Documentation/virt/kvm/api.rst
+++ b/Documentation/virt/kvm/api.rst
@@ -5645,7 +5645,8 @@ with the KVM_XEN_VCPU_GET_ATTR ioctl.
    };
  
  Copies Memory Tagging Extension (MTE) tags to/from guest tag memory. The
-``guest_ipa`` and ``length`` fields must be ``PAGE_SIZE`` aligned. The ``addr``
+``guest_ipa`` and ``length`` fields must be ``PAGE_SIZE`` aligned.
+``length`` must not be bigger than 2^31 - PAGE_SIZE bytes. The ``addr``
  field must point to a buffer which the tags will be copied to or from.
  
  ``flags`` specifies the direction of copy, either ``KVM_ARM_TAGS_TO_GUEST`` or
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h

index ce75309..b9e3661 100644 (file)
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -1023,8 +1023,8 @@ int kvm_arm_vcpu_arch_get_attr(struct kvm_vcpu *vcpu,
  int kvm_arm_vcpu_arch_has_attr(struct kvm_vcpu *vcpu,
                                struct kvm_device_attr *attr);
  
-long kvm_vm_ioctl_mte_copy_tags(struct kvm *kvm,
-                               struct kvm_arm_copy_mte_tags *copy_tags);
+int kvm_vm_ioctl_mte_copy_tags(struct kvm *kvm,
+                              struct kvm_arm_copy_mte_tags *copy_tags);
  int kvm_vm_ioctl_set_counter_offset(struct kvm *kvm,
                                     struct kvm_arm_counter_offset *offset);
  
diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c

index bb21d0c..95b715c 100644 (file)
--- a/arch/arm64/kvm/arm.c
+++ b/arch/arm64/kvm/arm.c
@@ -1499,8 +1499,7 @@ static int kvm_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr)
         }
  }
  
-long kvm_arch_vm_ioctl(struct file *filp,
-                      unsigned int ioctl, unsigned long arg)
+int kvm_arch_vm_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg)
  {
         struct kvm *kvm = filp->private_data;
         void __user *argp = (void __user *)arg;
diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c

index faae7ec..20280a5 100644 (file)
--- a/arch/arm64/kvm/guest.c
+++ b/arch/arm64/kvm/guest.c
@@ -1026,8 +1026,8 @@ int kvm_arm_vcpu_arch_has_attr(struct kvm_vcpu *vcpu,
         return ret;
  }
  
-long kvm_vm_ioctl_mte_copy_tags(struct kvm *kvm,
-                               struct kvm_arm_copy_mte_tags *copy_tags)
+int kvm_vm_ioctl_mte_copy_tags(struct kvm *kvm,
+                              struct kvm_arm_copy_mte_tags *copy_tags)
  {
         gpa_t guest_ipa = copy_tags->guest_ipa;
         size_t length = copy_tags->length;
@@ -1048,6 +1048,10 @@ long kvm_vm_ioctl_mte_copy_tags(struct kvm *kvm,
         if (length & ~PAGE_MASK || guest_ipa & ~PAGE_MASK)
                 return -EINVAL;
  
+       /* Lengths above INT_MAX cannot be represented in the return value */
+       if (length > INT_MAX)
+               return -EINVAL;
+
         gfn = gpa_to_gfn(guest_ipa);
  
         mutex_lock(&kvm->slots_lock);
diff --git a/arch/mips/kvm/mips.c b/arch/mips/kvm/mips.c

index 36c8991..884be4e 100644 (file)
--- a/arch/mips/kvm/mips.c
+++ b/arch/mips/kvm/mips.c
@@ -993,9 +993,9 @@ void kvm_arch_flush_remote_tlbs_memslot(struct kvm *kvm,
         kvm_flush_remote_tlbs(kvm);
  }
  
-long kvm_arch_vm_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg)
+int kvm_arch_vm_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg)
  {
-       long r;
+       int r;
  
         switch (ioctl) {
         default:
diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h

index 6bef23d..5b9f851 100644 (file)
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -156,7 +156,7 @@ extern void kvmppc_map_magic(struct kvm_vcpu *vcpu);
  
  extern int kvmppc_allocate_hpt(struct kvm_hpt_info *info, u32 order);
  extern void kvmppc_set_hpt(struct kvm *kvm, struct kvm_hpt_info *info);
-extern long kvmppc_alloc_reset_hpt(struct kvm *kvm, int order);
+extern int kvmppc_alloc_reset_hpt(struct kvm *kvm, int order);
  extern void kvmppc_free_hpt(struct kvm_hpt_info *info);
  extern void kvmppc_rmap_reset(struct kvm *kvm);
  extern void kvmppc_map_vrma(struct kvm_vcpu *vcpu,
@@ -170,7 +170,7 @@ extern int kvmppc_switch_mmu_to_hpt(struct kvm *kvm);
  extern int kvmppc_switch_mmu_to_radix(struct kvm *kvm);
  extern void kvmppc_setup_partition_table(struct kvm *kvm);
  
-extern long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm,
+extern int kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm,
                                 struct kvm_create_spapr_tce_64 *args);
  #define kvmppc_ioba_validate(stt, ioba, npages)                         \
                 (iommu_tce_check_ioba((stt)->page_shift, (stt)->offset, \
@@ -211,10 +211,10 @@ extern void kvmppc_bookehv_exit(void);
  extern int kvmppc_prepare_to_enter(struct kvm_vcpu *vcpu);
  
  extern int kvm_vm_ioctl_get_htab_fd(struct kvm *kvm, struct kvm_get_htab_fd *);
-extern long kvm_vm_ioctl_resize_hpt_prepare(struct kvm *kvm,
-                                           struct kvm_ppc_resize_hpt *rhpt);
-extern long kvm_vm_ioctl_resize_hpt_commit(struct kvm *kvm,
+extern int kvm_vm_ioctl_resize_hpt_prepare(struct kvm *kvm,
                                            struct kvm_ppc_resize_hpt *rhpt);
+extern int kvm_vm_ioctl_resize_hpt_commit(struct kvm *kvm,
+                                         struct kvm_ppc_resize_hpt *rhpt);
  
  int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu, struct kvm_interrupt *irq);
  
@@ -286,8 +286,8 @@ struct kvmppc_ops {
         int (*emulate_mtspr)(struct kvm_vcpu *vcpu, int sprn, ulong spr_val);
         int (*emulate_mfspr)(struct kvm_vcpu *vcpu, int sprn, ulong *spr_val);
         void (*fast_vcpu_kick)(struct kvm_vcpu *vcpu);
-       long (*arch_vm_ioctl)(struct file *filp, unsigned int ioctl,
-                             unsigned long arg);
+       int (*arch_vm_ioctl)(struct file *filp, unsigned int ioctl,
+                            unsigned long arg);
         int (*hcall_implemented)(unsigned long hcall);
         int (*irq_bypass_add_producer)(struct irq_bypass_consumer *,
                                        struct irq_bypass_producer *);
diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c

index 7006bcb..1f4896d 100644 (file)
--- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
@@ -124,9 +124,9 @@ void kvmppc_set_hpt(struct kvm *kvm, struct kvm_hpt_info *info)
                  info->virt, (long)info->order, kvm->arch.lpid);
  }
  
-long kvmppc_alloc_reset_hpt(struct kvm *kvm, int order)
+int kvmppc_alloc_reset_hpt(struct kvm *kvm, int order)
  {
-       long err = -EBUSY;
+       int err = -EBUSY;
         struct kvm_hpt_info info;
  
         mutex_lock(&kvm->arch.mmu_setup_lock);
@@ -1468,8 +1468,8 @@ static void resize_hpt_prepare_work(struct work_struct *work)
         mutex_unlock(&kvm->arch.mmu_setup_lock);
  }
  
-long kvm_vm_ioctl_resize_hpt_prepare(struct kvm *kvm,
-                                    struct kvm_ppc_resize_hpt *rhpt)
+int kvm_vm_ioctl_resize_hpt_prepare(struct kvm *kvm,
+                                   struct kvm_ppc_resize_hpt *rhpt)
  {
         unsigned long flags = rhpt->flags;
         unsigned long shift = rhpt->shift;
@@ -1534,13 +1534,13 @@ static void resize_hpt_boot_vcpu(void *opaque)
         /* Nothing to do, just force a KVM exit */
  }
  
-long kvm_vm_ioctl_resize_hpt_commit(struct kvm *kvm,
-                                   struct kvm_ppc_resize_hpt *rhpt)
+int kvm_vm_ioctl_resize_hpt_commit(struct kvm *kvm,
+                                  struct kvm_ppc_resize_hpt *rhpt)
  {
         unsigned long flags = rhpt->flags;
         unsigned long shift = rhpt->shift;
         struct kvm_resize_hpt *resize;
-       long ret;
+       int ret;
  
         if (flags != 0 || kvm_is_radix(kvm))
                 return -EINVAL;
diff --git a/arch/powerpc/kvm/book3s_64_vio.c b/arch/powerpc/kvm/book3s_64_vio.c

index 95e738e..93b695b 100644 (file)
--- a/arch/powerpc/kvm/book3s_64_vio.c
+++ b/arch/powerpc/kvm/book3s_64_vio.c
@@ -288,8 +288,8 @@ static const struct file_operations kvm_spapr_tce_fops = {
         .release        = kvm_spapr_tce_release,
  };
  
-long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm,
-                                  struct kvm_create_spapr_tce_64 *args)
+int kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm,
+                                 struct kvm_create_spapr_tce_64 *args)
  {
         struct kvmppc_spapr_tce_table *stt = NULL;
         struct kvmppc_spapr_tce_table *siter;
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c

index 6ba68dd..cd139a1 100644 (file)
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -5779,12 +5779,12 @@ static void kvmppc_irq_bypass_del_producer_hv(struct irq_bypass_consumer *cons,
  }
  #endif
  
-static long kvm_arch_vm_ioctl_hv(struct file *filp,
-                                unsigned int ioctl, unsigned long arg)
+static int kvm_arch_vm_ioctl_hv(struct file *filp,
+                               unsigned int ioctl, unsigned long arg)
  {
         struct kvm *kvm __maybe_unused = filp->private_data;
         void __user *argp = (void __user *)arg;
-       long r;
+       int r;
  
         switch (ioctl) {
  
diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c

index 9fc4dd8..5908b51 100644 (file)
--- a/arch/powerpc/kvm/book3s_pr.c
+++ b/arch/powerpc/kvm/book3s_pr.c
@@ -2042,8 +2042,8 @@ static int kvmppc_core_check_processor_compat_pr(void)
         return 0;
  }
  
-static long kvm_arch_vm_ioctl_pr(struct file *filp,
-                                unsigned int ioctl, unsigned long arg)
+static int kvm_arch_vm_ioctl_pr(struct file *filp,
+                               unsigned int ioctl, unsigned long arg)
  {
         return -ENOTTY;
  }
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c

index 4c5405f..c0bac9c 100644 (file)
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -2371,12 +2371,11 @@ static int kvmppc_get_cpu_char(struct kvm_ppc_cpu_char *cp)
  }
  #endif
  
-long kvm_arch_vm_ioctl(struct file *filp,
-                       unsigned int ioctl, unsigned long arg)
+int kvm_arch_vm_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg)
  {
         struct kvm *kvm __maybe_unused = filp->private_data;
         void __user *argp = (void __user *)arg;
-       long r;
+       int r;
  
         switch (ioctl) {
         case KVM_PPC_GET_PVINFO: {
diff --git a/arch/riscv/kvm/vm.c b/arch/riscv/kvm/vm.c

index 65a964d..c13130a 100644 (file)
--- a/arch/riscv/kvm/vm.c
+++ b/arch/riscv/kvm/vm.c
@@ -87,8 +87,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
         return r;
  }
  
-long kvm_arch_vm_ioctl(struct file *filp,
-                      unsigned int ioctl, unsigned long arg)
+int kvm_arch_vm_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg)
  {
         return -EINVAL;
  }
diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c

index 9250fde..da6dac3 100644 (file)
--- a/arch/s390/kvm/interrupt.c
+++ b/arch/s390/kvm/interrupt.c
@@ -305,7 +305,7 @@ static inline u8 gisa_get_ipm_or_restore_iam(struct kvm_s390_gisa_interrupt *gi)
  
  static inline int gisa_in_alert_list(struct kvm_s390_gisa *gisa)
  {
-       return READ_ONCE(gisa->next_alert) != (u32)(u64)gisa;
+       return READ_ONCE(gisa->next_alert) != (u32)virt_to_phys(gisa);
  }
  
  static inline void gisa_set_ipm_gisc(struct kvm_s390_gisa *gisa, u32 gisc)
@@ -3168,7 +3168,7 @@ void kvm_s390_gisa_init(struct kvm *kvm)
         hrtimer_init(&gi->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
         gi->timer.function = gisa_vcpu_kicker;
         memset(gi->origin, 0, sizeof(struct kvm_s390_gisa));
-       gi->origin->next_alert = (u32)(u64)gi->origin;
+       gi->origin->next_alert = (u32)virt_to_phys(gi->origin);
         VM_EVENT(kvm, 3, "gisa 0x%pK initialized", gi->origin);
  }
  
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c

index 39b3656..4c3edcc 100644 (file)
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -1989,7 +1989,7 @@ static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
         return ret;
  }
  
-static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
+static int kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
  {
         uint8_t *keys;
         uint64_t hva;
@@ -2037,7 +2037,7 @@ static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
         return r;
  }
  
-static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
+static int kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
  {
         uint8_t *keys;
         uint64_t hva;
@@ -2898,8 +2898,7 @@ static int kvm_s390_vm_mem_op(struct kvm *kvm, struct kvm_s390_mem_op *mop)
         }
  }
  
-long kvm_arch_vm_ioctl(struct file *filp,
-                      unsigned int ioctl, unsigned long arg)
+int kvm_arch_vm_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg)
  {
         struct kvm *kvm = filp->private_data;
         void __user *argp = (void __user *)arg;
diff --git a/arch/s390/kvm/pci.c b/arch/s390/kvm/pci.c

index b124d58..7dab00f 100644 (file)
--- a/arch/s390/kvm/pci.c
+++ b/arch/s390/kvm/pci.c
@@ -112,7 +112,7 @@ static int zpci_reset_aipb(u8 nisc)
                 return -EINVAL;
  
         aift->sbv = zpci_aif_sbv;
-       aift->gait = (struct zpci_gaite *)zpci_aipb->aipb.gait;
+       aift->gait = phys_to_virt(zpci_aipb->aipb.gait);
  
         return 0;
  }
diff --git a/arch/s390/kvm/vsie.c b/arch/s390/kvm/vsie.c

index b6a0219..8d6b765 100644 (file)
--- a/arch/s390/kvm/vsie.c
+++ b/arch/s390/kvm/vsie.c
@@ -138,11 +138,15 @@ static int prepare_cpuflags(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
  }
  /* Copy to APCB FORMAT1 from APCB FORMAT0 */
  static int setup_apcb10(struct kvm_vcpu *vcpu, struct kvm_s390_apcb1 *apcb_s,
-                       unsigned long apcb_o, struct kvm_s390_apcb1 *apcb_h)
+                       unsigned long crycb_gpa, struct kvm_s390_apcb1 *apcb_h)
  {
         struct kvm_s390_apcb0 tmp;
+       unsigned long apcb_gpa;
  
-       if (read_guest_real(vcpu, apcb_o, &tmp, sizeof(struct kvm_s390_apcb0)))
+       apcb_gpa = crycb_gpa + offsetof(struct kvm_s390_crypto_cb, apcb0);
+
+       if (read_guest_real(vcpu, apcb_gpa, &tmp,
+                           sizeof(struct kvm_s390_apcb0)))
                 return -EFAULT;
  
         apcb_s->apm[0] = apcb_h->apm[0] & tmp.apm[0];
@@ -157,15 +161,19 @@ static int setup_apcb10(struct kvm_vcpu *vcpu, struct kvm_s390_apcb1 *apcb_s,
   * setup_apcb00 - Copy to APCB FORMAT0 from APCB FORMAT0
   * @vcpu: pointer to the virtual CPU
   * @apcb_s: pointer to start of apcb in the shadow crycb
- * @apcb_o: pointer to start of original apcb in the guest2
+ * @crycb_gpa: guest physical address to start of original guest crycb
   * @apcb_h: pointer to start of apcb in the guest1
   *
   * Returns 0 and -EFAULT on error reading guest apcb
   */
  static int setup_apcb00(struct kvm_vcpu *vcpu, unsigned long *apcb_s,
-                       unsigned long apcb_o, unsigned long *apcb_h)
+                       unsigned long crycb_gpa, unsigned long *apcb_h)
  {
-       if (read_guest_real(vcpu, apcb_o, apcb_s,
+       unsigned long apcb_gpa;
+
+       apcb_gpa = crycb_gpa + offsetof(struct kvm_s390_crypto_cb, apcb0);
+
+       if (read_guest_real(vcpu, apcb_gpa, apcb_s,
                             sizeof(struct kvm_s390_apcb0)))
                 return -EFAULT;
  
@@ -178,16 +186,20 @@ static int setup_apcb00(struct kvm_vcpu *vcpu, unsigned long *apcb_s,
   * setup_apcb11 - Copy the FORMAT1 APCB from the guest to the shadow CRYCB
   * @vcpu: pointer to the virtual CPU
   * @apcb_s: pointer to start of apcb in the shadow crycb
- * @apcb_o: pointer to start of original guest apcb
+ * @crycb_gpa: guest physical address to start of original guest crycb
   * @apcb_h: pointer to start of apcb in the host
   *
   * Returns 0 and -EFAULT on error reading guest apcb
   */
  static int setup_apcb11(struct kvm_vcpu *vcpu, unsigned long *apcb_s,
-                       unsigned long apcb_o,
+                       unsigned long crycb_gpa,
                         unsigned long *apcb_h)
  {
-       if (read_guest_real(vcpu, apcb_o, apcb_s,
+       unsigned long apcb_gpa;
+
+       apcb_gpa = crycb_gpa + offsetof(struct kvm_s390_crypto_cb, apcb1);
+
+       if (read_guest_real(vcpu, apcb_gpa, apcb_s,
                             sizeof(struct kvm_s390_apcb1)))
                 return -EFAULT;
  
@@ -200,7 +212,7 @@ static int setup_apcb11(struct kvm_vcpu *vcpu, unsigned long *apcb_s,
   * setup_apcb - Create a shadow copy of the apcb.
   * @vcpu: pointer to the virtual CPU
   * @crycb_s: pointer to shadow crycb
- * @crycb_o: pointer to original guest crycb
+ * @crycb_gpa: guest physical address of original guest crycb
   * @crycb_h: pointer to the host crycb
   * @fmt_o: format of the original guest crycb.
   * @fmt_h: format of the host crycb.
@@ -211,50 +223,46 @@ static int setup_apcb11(struct kvm_vcpu *vcpu, unsigned long *apcb_s,
   * Return 0 or an error number if the guest and host crycb are incompatible.
   */
  static int setup_apcb(struct kvm_vcpu *vcpu, struct kvm_s390_crypto_cb *crycb_s,
-              const u32 crycb_o,
+              const u32 crycb_gpa,
                struct kvm_s390_crypto_cb *crycb_h,
                int fmt_o, int fmt_h)
  {
-       struct kvm_s390_crypto_cb *crycb;
-
-       crycb = (struct kvm_s390_crypto_cb *) (unsigned long)crycb_o;
-
         switch (fmt_o) {
         case CRYCB_FORMAT2:
-               if ((crycb_o & PAGE_MASK) != ((crycb_o + 256) & PAGE_MASK))
+               if ((crycb_gpa & PAGE_MASK) != ((crycb_gpa + 256) & PAGE_MASK))
                         return -EACCES;
                 if (fmt_h != CRYCB_FORMAT2)
                         return -EINVAL;
                 return setup_apcb11(vcpu, (unsigned long *)&crycb_s->apcb1,
-                                   (unsigned long) &crycb->apcb1,
+                                   crycb_gpa,
                                     (unsigned long *)&crycb_h->apcb1);
         case CRYCB_FORMAT1:
                 switch (fmt_h) {
                 case CRYCB_FORMAT2:
                         return setup_apcb10(vcpu, &crycb_s->apcb1,
-                                           (unsigned long) &crycb->apcb0,
+                                           crycb_gpa,
                                             &crycb_h->apcb1);
                 case CRYCB_FORMAT1:
                         return setup_apcb00(vcpu,
                                             (unsigned long *) &crycb_s->apcb0,
-                                           (unsigned long) &crycb->apcb0,
+                                           crycb_gpa,
                                             (unsigned long *) &crycb_h->apcb0);
                 }
                 break;
         case CRYCB_FORMAT0:
-               if ((crycb_o & PAGE_MASK) != ((crycb_o + 32) & PAGE_MASK))
+               if ((crycb_gpa & PAGE_MASK) != ((crycb_gpa + 32) & PAGE_MASK))
                         return -EACCES;
  
                 switch (fmt_h) {
                 case CRYCB_FORMAT2:
                         return setup_apcb10(vcpu, &crycb_s->apcb1,
-                                           (unsigned long) &crycb->apcb0,
+                                           crycb_gpa,
                                             &crycb_h->apcb1);
                 case CRYCB_FORMAT1:
                 case CRYCB_FORMAT0:
                         return setup_apcb00(vcpu,
                                             (unsigned long *) &crycb_s->apcb0,
-                                           (unsigned long) &crycb->apcb0,
+                                           crycb_gpa,
                                             (unsigned long *) &crycb_h->apcb0);
                 }
         }
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h

index 15bda40..a0e58ca 100644 (file)
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -947,23 +947,6 @@ struct kvm_vcpu_arch {
  
         u64 msr_kvm_poll_control;
  
-       /*
-        * Indicates the guest is trying to write a gfn that contains one or
-        * more of the PTEs used to translate the write itself, i.e. the access
-        * is changing its own translation in the guest page tables.  KVM exits
-        * to userspace if emulation of the faulting instruction fails and this
-        * flag is set, as KVM cannot make forward progress.
-        *
-        * If emulation fails for a write to guest page tables, KVM unprotects
-        * (zaps) the shadow page for the target gfn and resumes the guest to
-        * retry the non-emulatable instruction (on hardware).  Unprotecting the
-        * gfn doesn't allow forward progress for a self-changing access because
-        * doing so also zaps the translation for the gfn, i.e. retrying the
-        * instruction will hit a !PRESENT fault, which results in a new shadow
-        * page and sends KVM back to square one.
-        */
-       bool write_fault_to_shadow_pgtable;
-
         /* set at EPT violation at this point */
         unsigned long exit_qualification;
  
@@ -1907,6 +1890,25 @@ u64 vcpu_tsc_khz(struct kvm_vcpu *vcpu);
   * EMULTYPE_COMPLETE_USER_EXIT - Set when the emulator should update interruptibility
   *                              state and inject single-step #DBs after skipping
   *                              an instruction (after completing userspace I/O).
+ *
+ * EMULTYPE_WRITE_PF_TO_SP - Set when emulating an intercepted page fault that
+ *                          is attempting to write a gfn that contains one or
+ *                          more of the PTEs used to translate the write itself,
+ *                          and the owning page table is being shadowed by KVM.
+ *                          If emulation of the faulting instruction fails and
+ *                          this flag is set, KVM will exit to userspace instead
+ *                          of retrying emulation as KVM cannot make forward
+ *                          progress.
+ *
+ *                          If emulation fails for a write to guest page tables,
+ *                          KVM unprotects (zaps) the shadow page for the target
+ *                          gfn and resumes the guest to retry the non-emulatable
+ *                          instruction (on hardware).  Unprotecting the gfn
+ *                          doesn't allow forward progress for a self-changing
+ *                          access because doing so also zaps the translation for
+ *                          the gfn, i.e. retrying the instruction will hit a
+ *                          !PRESENT fault, which results in a new shadow page
+ *                          and sends KVM back to square one.
   */
  #define EMULTYPE_NO_DECODE         (1 << 0)
  #define EMULTYPE_TRAP_UD           (1 << 1)
@@ -1916,6 +1918,7 @@ u64 vcpu_tsc_khz(struct kvm_vcpu *vcpu);
  #define EMULTYPE_VMWARE_GP         (1 << 5)
  #define EMULTYPE_PF                (1 << 6)
  #define EMULTYPE_COMPLETE_USER_EXIT (1 << 7)
+#define EMULTYPE_WRITE_PF_TO_SP            (1 << 8)
  
  int kvm_emulate_instruction(struct kvm_vcpu *vcpu, int emulation_type);
  int kvm_emulate_instruction_from_buffer(struct kvm_vcpu *vcpu,
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c

index 599aebe..9583a11 100644 (file)
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -653,7 +653,7 @@ void kvm_set_cpu_caps(void)
                 F(SPEC_CTRL_SSBD) | F(ARCH_CAPABILITIES) | F(INTEL_STIBP) |
                 F(MD_CLEAR) | F(AVX512_VP2INTERSECT) | F(FSRM) |
                 F(SERIALIZE) | F(TSXLDTRK) | F(AVX512_FP16) |
-               F(AMX_TILE) | F(AMX_INT8) | F(AMX_BF16)
+               F(AMX_TILE) | F(AMX_INT8) | F(AMX_BF16) | F(FLUSH_L1D)
         );
  
         /* TSC_ADJUST and ARCH_CAPABILITIES are emulated in software. */
diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c

index c8ebe54..144c5a0 100644 (file)
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -4203,7 +4203,7 @@ void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu, struct kvm_async_pf *work)
               work->arch.cr3 != vcpu->arch.mmu->get_guest_pgd(vcpu))
                 return;
  
-       kvm_mmu_do_page_fault(vcpu, work->cr2_or_gpa, 0, true);
+       kvm_mmu_do_page_fault(vcpu, work->cr2_or_gpa, 0, true, NULL);
  }
  
  static int __kvm_faultin_pfn(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault)
@@ -5664,7 +5664,8 @@ int noinline kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, u64 err
  
         if (r == RET_PF_INVALID) {
                 r = kvm_mmu_do_page_fault(vcpu, cr2_or_gpa,
-                                         lower_32_bits(error_code), false);
+                                         lower_32_bits(error_code), false,
+                                         &emulation_type);
                 if (KVM_BUG_ON(r == RET_PF_INVALID, vcpu->kvm))
                         return -EIO;
         }
diff --git a/arch/x86/kvm/mmu/mmu_internal.h b/arch/x86/kvm/mmu/mmu_internal.h

index cc58631..2cbb155 100644 (file)
--- a/arch/x86/kvm/mmu/mmu_internal.h
+++ b/arch/x86/kvm/mmu/mmu_internal.h
@@ -240,6 +240,13 @@ struct kvm_page_fault {
         kvm_pfn_t pfn;
         hva_t hva;
         bool map_writable;
+
+       /*
+        * Indicates the guest is trying to write a gfn that contains one or
+        * more of the PTEs used to translate the write itself, i.e. the access
+        * is changing its own translation in the guest page tables.
+        */
+       bool write_fault_to_shadow_pgtable;
  };
  
  int kvm_tdp_page_fault(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault);
@@ -273,7 +280,7 @@ enum {
  };
  
  static inline int kvm_mmu_do_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
-                                       u32 err, bool prefetch)
+                                       u32 err, bool prefetch, int *emulation_type)
  {
         struct kvm_page_fault fault = {
                 .addr = cr2_or_gpa,
@@ -312,6 +319,9 @@ static inline int kvm_mmu_do_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
         else
                 r = vcpu->arch.mmu->page_fault(vcpu, &fault);
  
+       if (fault.write_fault_to_shadow_pgtable && emulation_type)
+               *emulation_type |= EMULTYPE_WRITE_PF_TO_SP;
+
         /*
          * Similar to above, prefetch faults aren't truly spurious, and the
          * async #PF path doesn't do emulation.  Do count faults that are fixed
diff --git a/arch/x86/kvm/mmu/paging_tmpl.h b/arch/x86/kvm/mmu/paging_tmpl.h

index 57f0b75..a056f27 100644 (file)
--- a/arch/x86/kvm/mmu/paging_tmpl.h
+++ b/arch/x86/kvm/mmu/paging_tmpl.h
@@ -685,8 +685,17 @@ static int FNAME(fetch)(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault,
  
                 if (sp != ERR_PTR(-EEXIST))
                         link_shadow_page(vcpu, it.sptep, sp);
+
+               if (fault->write && table_gfn == fault->gfn)
+                       fault->write_fault_to_shadow_pgtable = true;
         }
  
+       /*
+        * Adjust the hugepage size _after_ resolving indirect shadow pages.
+        * KVM doesn't support mapping hugepages into the guest for gfns that
+        * are being shadowed by KVM, i.e. allocating a new shadow page may
+        * affect the allowed hugepage size.
+        */
         kvm_mmu_hugepage_adjust(vcpu, fault);
  
         trace_kvm_mmu_spte_requested(fault);
@@ -731,46 +740,6 @@ out_gpte_changed:
         return RET_PF_RETRY;
  }
  
- /*
- * To see whether the mapped gfn can write its page table in the current
- * mapping.
- *
- * It is the helper function of FNAME(page_fault). When guest uses large page
- * size to map the writable gfn which is used as current page table, we should
- * force kvm to use small page size to map it because new shadow page will be
- * created when kvm establishes shadow page table that stop kvm using large
- * page size. Do it early can avoid unnecessary #PF and emulation.
- *
- * @write_fault_to_shadow_pgtable will return true if the fault gfn is
- * currently used as its page table.
- *
- * Note: the PDPT page table is not checked for PAE-32 bit guest. It is ok
- * since the PDPT is always shadowed, that means, we can not use large page
- * size to map the gfn which is used as PDPT.
- */
-static bool
-FNAME(is_self_change_mapping)(struct kvm_vcpu *vcpu,
-                             struct guest_walker *walker, bool user_fault,
-                             bool *write_fault_to_shadow_pgtable)
-{
-       int level;
-       gfn_t mask = ~(KVM_PAGES_PER_HPAGE(walker->level) - 1);
-       bool self_changed = false;
-
-       if (!(walker->pte_access & ACC_WRITE_MASK ||
-           (!is_cr0_wp(vcpu->arch.mmu) && !user_fault)))
-               return false;
-
-       for (level = walker->level; level <= walker->max_level; level++) {
-               gfn_t gfn = walker->gfn ^ walker->table_gfn[level - 1];
-
-               self_changed |= !(gfn & mask);
-               *write_fault_to_shadow_pgtable |= !gfn;
-       }
-
-       return self_changed;
-}
-
  /*
   * Page fault handler.  There are several causes for a page fault:
   *   - there is no shadow pte for the guest pte
@@ -789,7 +758,6 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault
  {
         struct guest_walker walker;
         int r;
-       bool is_self_change_mapping;
  
         pgprintk("%s: addr %lx err %x\n", __func__, fault->addr, fault->error_code);
         WARN_ON_ONCE(fault->is_tdp);
@@ -814,6 +782,7 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault
         }
  
         fault->gfn = walker.gfn;
+       fault->max_level = walker.level;
         fault->slot = kvm_vcpu_gfn_to_memslot(vcpu, fault->gfn);
  
         if (page_fault_handle_page_track(vcpu, fault)) {
@@ -825,16 +794,6 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault
         if (r)
                 return r;
  
-       vcpu->arch.write_fault_to_shadow_pgtable = false;
-
-       is_self_change_mapping = FNAME(is_self_change_mapping)(vcpu,
-             &walker, fault->user, &vcpu->arch.write_fault_to_shadow_pgtable);
-
-       if (is_self_change_mapping)
-               fault->max_level = PG_LEVEL_4K;
-       else
-               fault->max_level = walker.level;
-
         r = kvm_faultin_pfn(vcpu, fault, walker.pte_access);
         if (r != RET_PF_CONTINUE)
                 return r;
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c

index 252e7f3..57f241c 100644 (file)
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -95,6 +95,7 @@ static const struct svm_direct_access_msrs {
  #endif
         { .index = MSR_IA32_SPEC_CTRL,                  .always = false },
         { .index = MSR_IA32_PRED_CMD,                   .always = false },
+       { .index = MSR_IA32_FLUSH_CMD,                  .always = false },
         { .index = MSR_IA32_LASTBRANCHFROMIP,           .always = false },
         { .index = MSR_IA32_LASTBRANCHTOIP,             .always = false },
         { .index = MSR_IA32_LASTINTFROMIP,              .always = false },
@@ -2872,7 +2873,7 @@ static int svm_set_vm_cr(struct kvm_vcpu *vcpu, u64 data)
  static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
  {
         struct vcpu_svm *svm = to_svm(vcpu);
-       int r;
+       int ret = 0;
  
         u32 ecx = msr->index;
         u64 data = msr->data;
@@ -2942,21 +2943,6 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
                  */
                 set_msr_interception(vcpu, svm->msrpm, MSR_IA32_SPEC_CTRL, 1, 1);
                 break;
-       case MSR_IA32_PRED_CMD:
-               if (!msr->host_initiated &&
-                   !guest_has_pred_cmd_msr(vcpu))
-                       return 1;
-
-               if (data & ~PRED_CMD_IBPB)
-                       return 1;
-               if (!boot_cpu_has(X86_FEATURE_IBPB))
-                       return 1;
-               if (!data)
-                       break;
-
-               wrmsrl(MSR_IA32_PRED_CMD, PRED_CMD_IBPB);
-               set_msr_interception(vcpu, svm->msrpm, MSR_IA32_PRED_CMD, 0, 1);
-               break;
         case MSR_AMD64_VIRT_SPEC_CTRL:
                 if (!msr->host_initiated &&
                     !guest_cpuid_has(vcpu, X86_FEATURE_VIRT_SSBD))
@@ -3009,10 +2995,10 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
                  * guest via direct_access_msrs, and switch it via user return.
                  */
                 preempt_disable();
-               r = kvm_set_user_return_msr(tsc_aux_uret_slot, data, -1ull);
+               ret = kvm_set_user_return_msr(tsc_aux_uret_slot, data, -1ull);
                 preempt_enable();
-               if (r)
-                       return 1;
+               if (ret)
+                       break;
  
                 svm->tsc_aux = data;
                 break;
@@ -3070,7 +3056,7 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
         default:
                 return kvm_set_msr_common(vcpu, msr);
         }
-       return 0;
+       return ret;
  }
  
  static int msr_interception(struct kvm_vcpu *vcpu)
@@ -4151,6 +4137,14 @@ static void svm_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu)
  
         svm_recalc_instruction_intercepts(vcpu, svm);
  
+       if (boot_cpu_has(X86_FEATURE_IBPB))
+               set_msr_interception(vcpu, svm->msrpm, MSR_IA32_PRED_CMD, 0,
+                                    !!guest_has_pred_cmd_msr(vcpu));
+
+       if (boot_cpu_has(X86_FEATURE_FLUSH_L1D))
+               set_msr_interception(vcpu, svm->msrpm, MSR_IA32_FLUSH_CMD, 0,
+                                    !!guest_cpuid_has(vcpu, X86_FEATURE_FLUSH_L1D));
+
         /* For sev guests, the memory encryption bit is not reserved in CR3.  */
         if (sev_guest(vcpu->kvm)) {
                 best = kvm_find_cpuid_entry(vcpu, 0x8000001F);
diff --git a/arch/x86/kvm/vmx/hyperv.c b/arch/x86/kvm/vmx/hyperv.c

index 22daca7..79450e1 100644 (file)
--- a/arch/x86/kvm/vmx/hyperv.c
+++ b/arch/x86/kvm/vmx/hyperv.c
@@ -13,7 +13,110 @@
  
  #define CC KVM_NESTED_VMENTER_CONSISTENCY_CHECK
  
-DEFINE_STATIC_KEY_FALSE(enable_evmcs);
+/*
+ * Enlightened VMCSv1 doesn't support these:
+ *
+ *     POSTED_INTR_NV                  = 0x00000002,
+ *     GUEST_INTR_STATUS               = 0x00000810,
+ *     APIC_ACCESS_ADDR                = 0x00002014,
+ *     POSTED_INTR_DESC_ADDR           = 0x00002016,
+ *     EOI_EXIT_BITMAP0                = 0x0000201c,
+ *     EOI_EXIT_BITMAP1                = 0x0000201e,
+ *     EOI_EXIT_BITMAP2                = 0x00002020,
+ *     EOI_EXIT_BITMAP3                = 0x00002022,
+ *     GUEST_PML_INDEX                 = 0x00000812,
+ *     PML_ADDRESS                     = 0x0000200e,
+ *     VM_FUNCTION_CONTROL             = 0x00002018,
+ *     EPTP_LIST_ADDRESS               = 0x00002024,
+ *     VMREAD_BITMAP                   = 0x00002026,
+ *     VMWRITE_BITMAP                  = 0x00002028,
+ *
+ *     TSC_MULTIPLIER                  = 0x00002032,
+ *     PLE_GAP                         = 0x00004020,
+ *     PLE_WINDOW                      = 0x00004022,
+ *     VMX_PREEMPTION_TIMER_VALUE      = 0x0000482E,
+ *
+ * Currently unsupported in KVM:
+ *     GUEST_IA32_RTIT_CTL             = 0x00002814,
+ */
+#define EVMCS1_SUPPORTED_PINCTRL                                       \
+       (PIN_BASED_ALWAYSON_WITHOUT_TRUE_MSR |                          \
+        PIN_BASED_EXT_INTR_MASK |                                      \
+        PIN_BASED_NMI_EXITING |                                        \
+        PIN_BASED_VIRTUAL_NMIS)
+
+#define EVMCS1_SUPPORTED_EXEC_CTRL                                     \
+       (CPU_BASED_ALWAYSON_WITHOUT_TRUE_MSR |                          \
+        CPU_BASED_HLT_EXITING |                                        \
+        CPU_BASED_CR3_LOAD_EXITING |                                   \
+        CPU_BASED_CR3_STORE_EXITING |                                  \
+        CPU_BASED_UNCOND_IO_EXITING |                                  \
+        CPU_BASED_MOV_DR_EXITING |                                     \
+        CPU_BASED_USE_TSC_OFFSETTING |                                 \
+        CPU_BASED_MWAIT_EXITING |                                      \
+        CPU_BASED_MONITOR_EXITING |                                    \
+        CPU_BASED_INVLPG_EXITING |                                     \
+        CPU_BASED_RDPMC_EXITING |                                      \
+        CPU_BASED_INTR_WINDOW_EXITING |                                \
+        CPU_BASED_CR8_LOAD_EXITING |                                   \
+        CPU_BASED_CR8_STORE_EXITING |                                  \
+        CPU_BASED_RDTSC_EXITING |                                      \
+        CPU_BASED_TPR_SHADOW |                                         \
+        CPU_BASED_USE_IO_BITMAPS |                                     \
+        CPU_BASED_MONITOR_TRAP_FLAG |                                  \
+        CPU_BASED_USE_MSR_BITMAPS |                                    \
+        CPU_BASED_NMI_WINDOW_EXITING |                                 \
+        CPU_BASED_PAUSE_EXITING |                                      \
+        CPU_BASED_ACTIVATE_SECONDARY_CONTROLS)
+
+#define EVMCS1_SUPPORTED_2NDEXEC                                       \
+       (SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE |                        \
+        SECONDARY_EXEC_WBINVD_EXITING |                                \
+        SECONDARY_EXEC_ENABLE_VPID |                                   \
+        SECONDARY_EXEC_ENABLE_EPT |                                    \
+        SECONDARY_EXEC_UNRESTRICTED_GUEST |                            \
+        SECONDARY_EXEC_DESC |                                          \
+        SECONDARY_EXEC_ENABLE_RDTSCP |                                 \
+        SECONDARY_EXEC_ENABLE_INVPCID |                                \
+        SECONDARY_EXEC_XSAVES |                                        \
+        SECONDARY_EXEC_RDSEED_EXITING |                                \
+        SECONDARY_EXEC_RDRAND_EXITING |                                \
+        SECONDARY_EXEC_TSC_SCALING |                                   \
+        SECONDARY_EXEC_ENABLE_USR_WAIT_PAUSE |                         \
+        SECONDARY_EXEC_PT_USE_GPA |                                    \
+        SECONDARY_EXEC_PT_CONCEAL_VMX |                                \
+        SECONDARY_EXEC_BUS_LOCK_DETECTION |                            \
+        SECONDARY_EXEC_NOTIFY_VM_EXITING |                             \
+        SECONDARY_EXEC_ENCLS_EXITING)
+
+#define EVMCS1_SUPPORTED_3RDEXEC (0ULL)
+
+#define EVMCS1_SUPPORTED_VMEXIT_CTRL                                   \
+       (VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR |                            \
+        VM_EXIT_SAVE_DEBUG_CONTROLS |                                  \
+        VM_EXIT_ACK_INTR_ON_EXIT |                                     \
+        VM_EXIT_HOST_ADDR_SPACE_SIZE |                                 \
+        VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL |                           \
+        VM_EXIT_SAVE_IA32_PAT |                                        \
+        VM_EXIT_LOAD_IA32_PAT |                                        \
+        VM_EXIT_SAVE_IA32_EFER |                                       \
+        VM_EXIT_LOAD_IA32_EFER |                                       \
+        VM_EXIT_CLEAR_BNDCFGS |                                        \
+        VM_EXIT_PT_CONCEAL_PIP |                                       \
+        VM_EXIT_CLEAR_IA32_RTIT_CTL)
+
+#define EVMCS1_SUPPORTED_VMENTRY_CTRL                                  \
+       (VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR |                           \
+        VM_ENTRY_LOAD_DEBUG_CONTROLS |                                 \
+        VM_ENTRY_IA32E_MODE |                                          \
+        VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL |                          \
+        VM_ENTRY_LOAD_IA32_PAT |                                       \
+        VM_ENTRY_LOAD_IA32_EFER |                                      \
+        VM_ENTRY_LOAD_BNDCFGS |                                        \
+        VM_ENTRY_PT_CONCEAL_PIP |                                      \
+        VM_ENTRY_LOAD_IA32_RTIT_CTL)
+
+#define EVMCS1_SUPPORTED_VMFUNC (0)
  
  #define EVMCS1_OFFSET(x) offsetof(struct hv_enlightened_vmcs, x)
  #define EVMCS1_FIELD(number, name, clean_field)[ROL16(number, 6)] = \
@@ -506,6 +609,8 @@ int nested_evmcs_check_controls(struct vmcs12 *vmcs12)
  }
  
  #if IS_ENABLED(CONFIG_HYPERV)
+DEFINE_STATIC_KEY_FALSE(__kvm_is_using_evmcs);
+
  /*
   * KVM on Hyper-V always uses the latest known eVMCSv1 revision, the assumption
   * is: in case a feature has corresponding fields in eVMCS described and it was
diff --git a/arch/x86/kvm/vmx/hyperv.h b/arch/x86/kvm/vmx/hyperv.h

index 78d1766..9623fe1 100644 (file)
--- a/arch/x86/kvm/vmx/hyperv.h
+++ b/arch/x86/kvm/vmx/hyperv.h
@@ -16,117 +16,10 @@
  
  struct vmcs_config;
  
-DECLARE_STATIC_KEY_FALSE(enable_evmcs);
-
  #define current_evmcs ((struct hv_enlightened_vmcs *)this_cpu_read(current_vmcs))
  
  #define KVM_EVMCS_VERSION 1
  
-/*
- * Enlightened VMCSv1 doesn't support these:
- *
- *     POSTED_INTR_NV                  = 0x00000002,
- *     GUEST_INTR_STATUS               = 0x00000810,
- *     APIC_ACCESS_ADDR                = 0x00002014,
- *     POSTED_INTR_DESC_ADDR           = 0x00002016,
- *     EOI_EXIT_BITMAP0                = 0x0000201c,
- *     EOI_EXIT_BITMAP1                = 0x0000201e,
- *     EOI_EXIT_BITMAP2                = 0x00002020,
- *     EOI_EXIT_BITMAP3                = 0x00002022,
- *     GUEST_PML_INDEX                 = 0x00000812,
- *     PML_ADDRESS                     = 0x0000200e,
- *     VM_FUNCTION_CONTROL             = 0x00002018,
- *     EPTP_LIST_ADDRESS               = 0x00002024,
- *     VMREAD_BITMAP                   = 0x00002026,
- *     VMWRITE_BITMAP                  = 0x00002028,
- *
- *     TSC_MULTIPLIER                  = 0x00002032,
- *     PLE_GAP                         = 0x00004020,
- *     PLE_WINDOW                      = 0x00004022,
- *     VMX_PREEMPTION_TIMER_VALUE      = 0x0000482E,
- *
- * Currently unsupported in KVM:
- *     GUEST_IA32_RTIT_CTL             = 0x00002814,
- */
-#define EVMCS1_SUPPORTED_PINCTRL                                       \
-       (PIN_BASED_ALWAYSON_WITHOUT_TRUE_MSR |                          \
-        PIN_BASED_EXT_INTR_MASK |                                      \
-        PIN_BASED_NMI_EXITING |                                        \
-        PIN_BASED_VIRTUAL_NMIS)
-
-#define EVMCS1_SUPPORTED_EXEC_CTRL                                     \
-       (CPU_BASED_ALWAYSON_WITHOUT_TRUE_MSR |                          \
-        CPU_BASED_HLT_EXITING |                                        \
-        CPU_BASED_CR3_LOAD_EXITING |                                   \
-        CPU_BASED_CR3_STORE_EXITING |                                  \
-        CPU_BASED_UNCOND_IO_EXITING |                                  \
-        CPU_BASED_MOV_DR_EXITING |                                     \
-        CPU_BASED_USE_TSC_OFFSETTING |                                 \
-        CPU_BASED_MWAIT_EXITING |                                      \
-        CPU_BASED_MONITOR_EXITING |                                    \
-        CPU_BASED_INVLPG_EXITING |                                     \
-        CPU_BASED_RDPMC_EXITING |                                      \
-        CPU_BASED_INTR_WINDOW_EXITING |                                \
-        CPU_BASED_CR8_LOAD_EXITING |                                   \
-        CPU_BASED_CR8_STORE_EXITING |                                  \
-        CPU_BASED_RDTSC_EXITING |                                      \
-        CPU_BASED_TPR_SHADOW |                                         \
-        CPU_BASED_USE_IO_BITMAPS |                                     \
-        CPU_BASED_MONITOR_TRAP_FLAG |                                  \
-        CPU_BASED_USE_MSR_BITMAPS |                                    \
-        CPU_BASED_NMI_WINDOW_EXITING |                                 \
-        CPU_BASED_PAUSE_EXITING |                                      \
-        CPU_BASED_ACTIVATE_SECONDARY_CONTROLS)
-
-#define EVMCS1_SUPPORTED_2NDEXEC                                       \
-       (SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE |                        \
-        SECONDARY_EXEC_WBINVD_EXITING |                                \
-        SECONDARY_EXEC_ENABLE_VPID |                                   \
-        SECONDARY_EXEC_ENABLE_EPT |                                    \
-        SECONDARY_EXEC_UNRESTRICTED_GUEST |                            \
-        SECONDARY_EXEC_DESC |                                          \
-        SECONDARY_EXEC_ENABLE_RDTSCP |                                 \
-        SECONDARY_EXEC_ENABLE_INVPCID |                                \
-        SECONDARY_EXEC_XSAVES |                                        \
-        SECONDARY_EXEC_RDSEED_EXITING |                                \
-        SECONDARY_EXEC_RDRAND_EXITING |                                \
-        SECONDARY_EXEC_TSC_SCALING |                                   \
-        SECONDARY_EXEC_ENABLE_USR_WAIT_PAUSE |                         \
-        SECONDARY_EXEC_PT_USE_GPA |                                    \
-        SECONDARY_EXEC_PT_CONCEAL_VMX |                                \
-        SECONDARY_EXEC_BUS_LOCK_DETECTION |                            \
-        SECONDARY_EXEC_NOTIFY_VM_EXITING |                             \
-        SECONDARY_EXEC_ENCLS_EXITING)
-
-#define EVMCS1_SUPPORTED_3RDEXEC (0ULL)
-
-#define EVMCS1_SUPPORTED_VMEXIT_CTRL                                   \
-       (VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR |                            \
-        VM_EXIT_SAVE_DEBUG_CONTROLS |                                  \
-        VM_EXIT_ACK_INTR_ON_EXIT |                                     \
-        VM_EXIT_HOST_ADDR_SPACE_SIZE |                                 \
-        VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL |                           \
-        VM_EXIT_SAVE_IA32_PAT |                                        \
-        VM_EXIT_LOAD_IA32_PAT |                                        \
-        VM_EXIT_SAVE_IA32_EFER |                                       \
-        VM_EXIT_LOAD_IA32_EFER |                                       \
-        VM_EXIT_CLEAR_BNDCFGS |                                        \
-        VM_EXIT_PT_CONCEAL_PIP |                                       \
-        VM_EXIT_CLEAR_IA32_RTIT_CTL)
-
-#define EVMCS1_SUPPORTED_VMENTRY_CTRL                                  \
-       (VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR |                           \
-        VM_ENTRY_LOAD_DEBUG_CONTROLS |                                 \
-        VM_ENTRY_IA32E_MODE |                                          \
-        VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL |                          \
-        VM_ENTRY_LOAD_IA32_PAT |                                       \
-        VM_ENTRY_LOAD_IA32_EFER |                                      \
-        VM_ENTRY_LOAD_BNDCFGS |                                        \
-        VM_ENTRY_PT_CONCEAL_PIP |                                      \
-        VM_ENTRY_LOAD_IA32_RTIT_CTL)
-
-#define EVMCS1_SUPPORTED_VMFUNC (0)
-
  struct evmcs_field {
         u16 offset;
         u16 clean_field;
@@ -174,6 +67,13 @@ static inline u64 evmcs_read_any(struct hv_enlightened_vmcs *evmcs,
  
  #if IS_ENABLED(CONFIG_HYPERV)
  
+DECLARE_STATIC_KEY_FALSE(__kvm_is_using_evmcs);
+
+static __always_inline bool kvm_is_using_evmcs(void)
+{
+       return static_branch_unlikely(&__kvm_is_using_evmcs);
+}
+
  static __always_inline int get_evmcs_offset(unsigned long field,
                                             u16 *clean_field)
  {
@@ -263,6 +163,7 @@ static inline void evmcs_load(u64 phys_addr)
  
  void evmcs_sanitize_exec_ctrls(struct vmcs_config *vmcs_conf);
  #else /* !IS_ENABLED(CONFIG_HYPERV) */
+static __always_inline bool kvm_is_using_evmcs(void) { return false; }
  static __always_inline void evmcs_write64(unsigned long field, u64 value) {}
  static __always_inline void evmcs_write32(unsigned long field, u32 value) {}
  static __always_inline void evmcs_write16(unsigned long field, u16 value) {}
diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c

index 1bc2b80..f63b28f 100644 (file)
--- a/arch/x86/kvm/vmx/nested.c
+++ b/arch/x86/kvm/vmx/nested.c
@@ -654,6 +654,9 @@ static inline bool nested_vmx_prepare_msr_bitmap(struct kvm_vcpu *vcpu,
         nested_vmx_set_intercept_for_msr(vmx, msr_bitmap_l1, msr_bitmap_l0,
                                          MSR_IA32_PRED_CMD, MSR_TYPE_W);
  
+       nested_vmx_set_intercept_for_msr(vmx, msr_bitmap_l1, msr_bitmap_l0,
+                                        MSR_IA32_FLUSH_CMD, MSR_TYPE_W);
+
         kvm_vcpu_unmap(vcpu, &vmx->nested.msr_bitmap_map, false);
  
         vmx->nested.force_msr_bitmap_recalc = false;
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c

index d2d6e1b..56e0c7a 100644 (file)
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -164,6 +164,7 @@ module_param(allow_smaller_maxphyaddr, bool, S_IRUGO);
  static u32 vmx_possible_passthrough_msrs[MAX_POSSIBLE_PASSTHROUGH_MSRS] = {
         MSR_IA32_SPEC_CTRL,
         MSR_IA32_PRED_CMD,
+       MSR_IA32_FLUSH_CMD,
         MSR_IA32_TSC,
  #ifdef CONFIG_X86_64
         MSR_FS_BASE,
@@ -579,7 +580,7 @@ static __init void hv_init_evmcs(void)
  
                 if (enlightened_vmcs) {
                         pr_info("Using Hyper-V Enlightened VMCS\n");
-                       static_branch_enable(&enable_evmcs);
+                       static_branch_enable(&__kvm_is_using_evmcs);
                 }
  
                 if (ms_hyperv.nested_features & HV_X64_NESTED_DIRECT_FLUSH)
@@ -595,7 +596,7 @@ static void hv_reset_evmcs(void)
  {
         struct hv_vp_assist_page *vp_ap;
  
-       if (!static_branch_unlikely(&enable_evmcs))
+       if (!kvm_is_using_evmcs())
                 return;
  
         /*
@@ -2285,33 +2286,6 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
                 if (data & ~(TSX_CTRL_RTM_DISABLE | TSX_CTRL_CPUID_CLEAR))
                         return 1;
                 goto find_uret_msr;
-       case MSR_IA32_PRED_CMD:
-               if (!msr_info->host_initiated &&
-                   !guest_has_pred_cmd_msr(vcpu))
-                       return 1;
-
-               if (data & ~PRED_CMD_IBPB)
-                       return 1;
-               if (!boot_cpu_has(X86_FEATURE_IBPB))
-                       return 1;
-               if (!data)
-                       break;
-
-               wrmsrl(MSR_IA32_PRED_CMD, PRED_CMD_IBPB);
-
-               /*
-                * For non-nested:
-                * When it's written (to non-zero) for the first time, pass
-                * it through.
-                *
-                * For nested:
-                * The handling of the MSR bitmap for L2 guests is done in
-                * nested_vmx_prepare_msr_bitmap. We should not touch the
-                * vmcs02.msr_bitmap here since it gets completely overwritten
-                * in the merging.
-                */
-               vmx_disable_intercept_for_msr(vcpu, MSR_IA32_PRED_CMD, MSR_TYPE_W);
-               break;
         case MSR_IA32_CR_PAT:
                 if (!kvm_pat_valid(data))
                         return 1;
@@ -2816,8 +2790,7 @@ static int vmx_hardware_enable(void)
          * This can happen if we hot-added a CPU but failed to allocate
          * VP assist page for it.
          */
-       if (static_branch_unlikely(&enable_evmcs) &&
-           !hv_get_vp_assist_page(cpu))
+       if (kvm_is_using_evmcs() && !hv_get_vp_assist_page(cpu))
                 return -EFAULT;
  
         intel_pt_handle_vmx(1);
@@ -2869,7 +2842,7 @@ struct vmcs *alloc_vmcs_cpu(bool shadow, int cpu, gfp_t flags)
         memset(vmcs, 0, vmcs_config.size);
  
         /* KVM supports Enlightened VMCS v1 only */
-       if (static_branch_unlikely(&enable_evmcs))
+       if (kvm_is_using_evmcs())
                 vmcs->hdr.revision_id = KVM_EVMCS_VERSION;
         else
                 vmcs->hdr.revision_id = vmcs_config.revision_id;
@@ -2964,7 +2937,7 @@ static __init int alloc_kvm_area(void)
                  * still be marked with revision_id reported by
                  * physical CPU.
                  */
-               if (static_branch_unlikely(&enable_evmcs))
+               if (kvm_is_using_evmcs())
                         vmcs->hdr.revision_id = vmcs_config.revision_id;
  
                 per_cpu(vmxarea, cpu) = vmcs;
@@ -3931,7 +3904,7 @@ static void vmx_msr_bitmap_l01_changed(struct vcpu_vmx *vmx)
          * 'Enlightened MSR Bitmap' feature L0 needs to know that MSR
          * bitmap has changed.
          */
-       if (IS_ENABLED(CONFIG_HYPERV) && static_branch_unlikely(&enable_evmcs)) {
+       if (kvm_is_using_evmcs()) {
                 struct hv_enlightened_vmcs *evmcs = (void *)vmx->vmcs01.vmcs;
  
                 if (evmcs->hv_enlightenments_control.msr_bitmap)
@@ -7310,7 +7283,7 @@ static fastpath_t vmx_vcpu_run(struct kvm_vcpu *vcpu)
         vmx_vcpu_enter_exit(vcpu, __vmx_vcpu_run_flags(vmx));
  
         /* All fields are clean at this point */
-       if (static_branch_unlikely(&enable_evmcs)) {
+       if (kvm_is_using_evmcs()) {
                 current_evmcs->hv_clean_fields |=
                         HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL;
  
@@ -7440,7 +7413,7 @@ static int vmx_vcpu_create(struct kvm_vcpu *vcpu)
          * feature only for vmcs01, KVM currently isn't equipped to realize any
          * performance benefits from enabling it for vmcs02.
          */
-       if (IS_ENABLED(CONFIG_HYPERV) && static_branch_unlikely(&enable_evmcs) &&
+       if (kvm_is_using_evmcs() &&
             (ms_hyperv.nested_features & HV_X64_NESTED_MSR_BITMAP)) {
                 struct hv_enlightened_vmcs *evmcs = (void *)vmx->vmcs01.vmcs;
  
@@ -7744,6 +7717,13 @@ static void vmx_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu)
                 vmx_set_intercept_for_msr(vcpu, MSR_IA32_XFD_ERR, MSR_TYPE_R,
                                           !guest_cpuid_has(vcpu, X86_FEATURE_XFD));
  
+       if (boot_cpu_has(X86_FEATURE_IBPB))
+               vmx_set_intercept_for_msr(vcpu, MSR_IA32_PRED_CMD, MSR_TYPE_W,
+                                         !guest_has_pred_cmd_msr(vcpu));
+
+       if (boot_cpu_has(X86_FEATURE_FLUSH_L1D))
+               vmx_set_intercept_for_msr(vcpu, MSR_IA32_FLUSH_CMD, MSR_TYPE_W,
+                                         !guest_cpuid_has(vcpu, X86_FEATURE_FLUSH_L1D));
  
         set_cr4_guest_host_mask(vmx);
  
diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h

index 2acdc54..cb766f6 100644 (file)
--- a/arch/x86/kvm/vmx/vmx.h
+++ b/arch/x86/kvm/vmx/vmx.h
@@ -369,7 +369,7 @@ struct vcpu_vmx {
         struct lbr_desc lbr_desc;
  
         /* Save desired MSR intercept (read: pass-through) state */
-#define MAX_POSSIBLE_PASSTHROUGH_MSRS  15
+#define MAX_POSSIBLE_PASSTHROUGH_MSRS  16
         struct {
                 DECLARE_BITMAP(read, MAX_POSSIBLE_PASSTHROUGH_MSRS);
                 DECLARE_BITMAP(write, MAX_POSSIBLE_PASSTHROUGH_MSRS);
diff --git a/arch/x86/kvm/vmx/vmx_ops.h b/arch/x86/kvm/vmx/vmx_ops.h

index db95bde..ce47dc2 100644 (file)
--- a/arch/x86/kvm/vmx/vmx_ops.h
+++ b/arch/x86/kvm/vmx/vmx_ops.h
@@ -147,7 +147,7 @@ do_exception:
  static __always_inline u16 vmcs_read16(unsigned long field)
  {
         vmcs_check16(field);
-       if (static_branch_unlikely(&enable_evmcs))
+       if (kvm_is_using_evmcs())
                 return evmcs_read16(field);
         return __vmcs_readl(field);
  }
@@ -155,7 +155,7 @@ static __always_inline u16 vmcs_read16(unsigned long field)
  static __always_inline u32 vmcs_read32(unsigned long field)
  {
         vmcs_check32(field);
-       if (static_branch_unlikely(&enable_evmcs))
+       if (kvm_is_using_evmcs())
                 return evmcs_read32(field);
         return __vmcs_readl(field);
  }
@@ -163,7 +163,7 @@ static __always_inline u32 vmcs_read32(unsigned long field)
  static __always_inline u64 vmcs_read64(unsigned long field)
  {
         vmcs_check64(field);
-       if (static_branch_unlikely(&enable_evmcs))
+       if (kvm_is_using_evmcs())
                 return evmcs_read64(field);
  #ifdef CONFIG_X86_64
         return __vmcs_readl(field);
@@ -175,7 +175,7 @@ static __always_inline u64 vmcs_read64(unsigned long field)
  static __always_inline unsigned long vmcs_readl(unsigned long field)
  {
         vmcs_checkl(field);
-       if (static_branch_unlikely(&enable_evmcs))
+       if (kvm_is_using_evmcs())
                 return evmcs_read64(field);
         return __vmcs_readl(field);
  }
@@ -222,7 +222,7 @@ static __always_inline void __vmcs_writel(unsigned long field, unsigned long val
  static __always_inline void vmcs_write16(unsigned long field, u16 value)
  {
         vmcs_check16(field);
-       if (static_branch_unlikely(&enable_evmcs))
+       if (kvm_is_using_evmcs())
                 return evmcs_write16(field, value);
  
         __vmcs_writel(field, value);
@@ -231,7 +231,7 @@ static __always_inline void vmcs_write16(unsigned long field, u16 value)
  static __always_inline void vmcs_write32(unsigned long field, u32 value)
  {
         vmcs_check32(field);
-       if (static_branch_unlikely(&enable_evmcs))
+       if (kvm_is_using_evmcs())
                 return evmcs_write32(field, value);
  
         __vmcs_writel(field, value);
@@ -240,7 +240,7 @@ static __always_inline void vmcs_write32(unsigned long field, u32 value)
  static __always_inline void vmcs_write64(unsigned long field, u64 value)
  {
         vmcs_check64(field);
-       if (static_branch_unlikely(&enable_evmcs))
+       if (kvm_is_using_evmcs())
                 return evmcs_write64(field, value);
  
         __vmcs_writel(field, value);
@@ -252,7 +252,7 @@ static __always_inline void vmcs_write64(unsigned long field, u64 value)
  static __always_inline void vmcs_writel(unsigned long field, unsigned long value)
  {
         vmcs_checkl(field);
-       if (static_branch_unlikely(&enable_evmcs))
+       if (kvm_is_using_evmcs())
                 return evmcs_write64(field, value);
  
         __vmcs_writel(field, value);
@@ -262,7 +262,7 @@ static __always_inline void vmcs_clear_bits(unsigned long field, u32 mask)
  {
         BUILD_BUG_ON_MSG(__builtin_constant_p(field) && ((field) & 0x6000) == 0x2000,
                          "vmcs_clear_bits does not support 64-bit fields");
-       if (static_branch_unlikely(&enable_evmcs))
+       if (kvm_is_using_evmcs())
                 return evmcs_write32(field, evmcs_read32(field) & ~mask);
  
         __vmcs_writel(field, __vmcs_readl(field) & ~mask);
@@ -272,7 +272,7 @@ static __always_inline void vmcs_set_bits(unsigned long field, u32 mask)
  {
         BUILD_BUG_ON_MSG(__builtin_constant_p(field) && ((field) & 0x6000) == 0x2000,
                          "vmcs_set_bits does not support 64-bit fields");
-       if (static_branch_unlikely(&enable_evmcs))
+       if (kvm_is_using_evmcs())
                 return evmcs_write32(field, evmcs_read32(field) | mask);
  
         __vmcs_writel(field, __vmcs_readl(field) | mask);
@@ -289,7 +289,7 @@ static inline void vmcs_load(struct vmcs *vmcs)
  {
         u64 phys_addr = __pa(vmcs);
  
-       if (static_branch_unlikely(&enable_evmcs))
+       if (kvm_is_using_evmcs())
                 return evmcs_load(phys_addr);
  
         vmx_asm1(vmptrld, "m"(phys_addr), vmcs, phys_addr);
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c

index 27a1d5c..9cf1c31 100644 (file)
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -3617,6 +3617,29 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
                 vcpu->arch.perf_capabilities = data;
                 kvm_pmu_refresh(vcpu);
                 return 0;
+       case MSR_IA32_PRED_CMD:
+               if (!msr_info->host_initiated && !guest_has_pred_cmd_msr(vcpu))
+                       return 1;
+
+               if (!boot_cpu_has(X86_FEATURE_IBPB) || (data & ~PRED_CMD_IBPB))
+                       return 1;
+               if (!data)
+                       break;
+
+               wrmsrl(MSR_IA32_PRED_CMD, PRED_CMD_IBPB);
+               break;
+       case MSR_IA32_FLUSH_CMD:
+               if (!msr_info->host_initiated &&
+                   !guest_cpuid_has(vcpu, X86_FEATURE_FLUSH_L1D))
+                       return 1;
+
+               if (!boot_cpu_has(X86_FEATURE_FLUSH_L1D) || (data & ~L1D_FLUSH))
+                       return 1;
+               if (!data)
+                       break;
+
+               wrmsrl(MSR_IA32_FLUSH_CMD, L1D_FLUSH);
+               break;
         case MSR_EFER:
                 return set_efer(vcpu, msr_info);
         case MSR_K7_HWCR:
@@ -6021,11 +6044,6 @@ static int kvm_vm_ioctl_set_nr_mmu_pages(struct kvm *kvm,
         return 0;
  }
  
-static unsigned long kvm_vm_ioctl_get_nr_mmu_pages(struct kvm *kvm)
-{
-       return kvm->arch.n_max_mmu_pages;
-}
-
  static int kvm_vm_ioctl_get_irqchip(struct kvm *kvm, struct kvm_irqchip *chip)
  {
         struct kvm_pic *pic = kvm->arch.vpic;
@@ -6672,8 +6690,7 @@ static int kvm_vm_ioctl_set_clock(struct kvm *kvm, void __user *argp)
         return 0;
  }
  
-long kvm_arch_vm_ioctl(struct file *filp,
-                      unsigned int ioctl, unsigned long arg)
+int kvm_arch_vm_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg)
  {
         struct kvm *kvm = filp->private_data;
         void __user *argp = (void __user *)arg;
@@ -6711,9 +6728,6 @@ set_identity_unlock:
         case KVM_SET_NR_MMU_PAGES:
                 r = kvm_vm_ioctl_set_nr_mmu_pages(kvm, arg);
                 break;
-       case KVM_GET_NR_MMU_PAGES:
-               r = kvm_vm_ioctl_get_nr_mmu_pages(kvm);
-               break;
         case KVM_CREATE_IRQCHIP: {
                 mutex_lock(&kvm->lock);
  
@@ -8463,7 +8477,6 @@ static int handle_emulation_failure(struct kvm_vcpu *vcpu, int emulation_type)
  }
  
  static bool reexecute_instruction(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
-                                 bool write_fault_to_shadow_pgtable,
                                   int emulation_type)
  {
         gpa_t gpa = cr2_or_gpa;
@@ -8534,7 +8547,7 @@ static bool reexecute_instruction(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
          * be fixed by unprotecting shadow page and it should
          * be reported to userspace.
          */
-       return !write_fault_to_shadow_pgtable;
+       return !(emulation_type & EMULTYPE_WRITE_PF_TO_SP);
  }
  
  static bool retry_instruction(struct x86_emulate_ctxt *ctxt,
@@ -8782,20 +8795,12 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
         int r;
         struct x86_emulate_ctxt *ctxt = vcpu->arch.emulate_ctxt;
         bool writeback = true;
-       bool write_fault_to_spt;
  
         if (unlikely(!kvm_can_emulate_insn(vcpu, emulation_type, insn, insn_len)))
                 return 1;
  
         vcpu->arch.l1tf_flush_l1d = true;
  
-       /*
-        * Clear write_fault_to_shadow_pgtable here to ensure it is
-        * never reused.
-        */
-       write_fault_to_spt = vcpu->arch.write_fault_to_shadow_pgtable;
-       vcpu->arch.write_fault_to_shadow_pgtable = false;
-
         if (!(emulation_type & EMULTYPE_NO_DECODE)) {
                 kvm_clear_exception_queue(vcpu);
  
@@ -8816,7 +8821,6 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
                                 return 1;
                         }
                         if (reexecute_instruction(vcpu, cr2_or_gpa,
-                                                 write_fault_to_spt,
                                                   emulation_type))
                                 return 1;
  
@@ -8895,8 +8899,7 @@ restart:
                 return 1;
  
         if (r == EMULATION_FAILED) {
-               if (reexecute_instruction(vcpu, cr2_or_gpa, write_fault_to_spt,
-                                       emulation_type))
+               if (reexecute_instruction(vcpu, cr2_or_gpa, emulation_type))
                         return 1;
  
                 return handle_emulation_failure(vcpu, emulation_type);
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h

index 8ada237..90edc16 100644 (file)
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -1397,8 +1397,7 @@ int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_level,
                         bool line_status);
  int kvm_vm_ioctl_enable_cap(struct kvm *kvm,
                             struct kvm_enable_cap *cap);
-long kvm_arch_vm_ioctl(struct file *filp,
-                      unsigned int ioctl, unsigned long arg);
+int kvm_arch_vm_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg);
  long kvm_arch_vm_compat_ioctl(struct file *filp, unsigned int ioctl,
                               unsigned long arg);
  
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h

index 16287a9..737318b 100644 (file)
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -1457,7 +1457,7 @@ struct kvm_vfio_spapr_tce {
  #define KVM_CREATE_VCPU           _IO(KVMIO,   0x41)
  #define KVM_GET_DIRTY_LOG         _IOW(KVMIO,  0x42, struct kvm_dirty_log)
  #define KVM_SET_NR_MMU_PAGES      _IO(KVMIO,   0x44)
-#define KVM_GET_NR_MMU_PAGES      _IO(KVMIO,   0x45)
+#define KVM_GET_NR_MMU_PAGES      _IO(KVMIO,   0x45)  /* deprecated */
  #define KVM_SET_USER_MEMORY_REGION _IOW(KVMIO, 0x46, \
                                         struct kvm_userspace_memory_region)
  #define KVM_SET_TSS_ADDR          _IO(KVMIO,   0x47)
diff --git a/tools/include/uapi/linux/kvm.h b/tools/include/uapi/linux/kvm.h

index d77aef8..4003a16 100644 (file)
--- a/tools/include/uapi/linux/kvm.h
+++ b/tools/include/uapi/linux/kvm.h
@@ -1451,7 +1451,7 @@ struct kvm_vfio_spapr_tce {
  #define KVM_CREATE_VCPU           _IO(KVMIO,   0x41)
  #define KVM_GET_DIRTY_LOG         _IOW(KVMIO,  0x42, struct kvm_dirty_log)
  #define KVM_SET_NR_MMU_PAGES      _IO(KVMIO,   0x44)
-#define KVM_GET_NR_MMU_PAGES      _IO(KVMIO,   0x45)
+#define KVM_GET_NR_MMU_PAGES      _IO(KVMIO,   0x45)  /* deprecated */
  #define KVM_SET_USER_MEMORY_REGION _IOW(KVMIO, 0x46, \
                                         struct kvm_userspace_memory_region)
  #define KVM_SET_TSS_ADDR          _IO(KVMIO,   0x47)
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c

index d255964..f40b72e 100644 (file)
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -4467,7 +4467,7 @@ static int kvm_ioctl_create_device(struct kvm *kvm,
         return 0;
  }
  
-static long kvm_vm_ioctl_check_extension_generic(struct kvm *kvm, long arg)
+static int kvm_vm_ioctl_check_extension_generic(struct kvm *kvm, long arg)
  {
         switch (arg) {
         case KVM_CAP_USER_MEMORY:
@@ -5045,7 +5045,7 @@ put_fd:
  static long kvm_dev_ioctl(struct file *filp,
                           unsigned int ioctl, unsigned long arg)
  {
-       long r = -EINVAL;
+       int r = -EINVAL;
  
         switch (ioctl) {
         case KVM_GET_API_VERSION:
author	Paolo Bonzini <pbonzini@redhat.com>
	Wed, 26 Apr 2023 19:46:52 +0000 (15:46 -0400)
committer	Paolo Bonzini <pbonzini@redhat.com>
	Wed, 26 Apr 2023 19:46:52 +0000 (15:46 -0400)
Documentation/virt/kvm/api.rst		patch \| blob \| history
arch/arm64/include/asm/kvm_host.h		patch \| blob \| history
arch/arm64/kvm/arm.c		patch \| blob \| history
arch/arm64/kvm/guest.c		patch \| blob \| history
arch/mips/kvm/mips.c		patch \| blob \| history
arch/powerpc/include/asm/kvm_ppc.h		patch \| blob \| history
arch/powerpc/kvm/book3s_64_mmu_hv.c		patch \| blob \| history
arch/powerpc/kvm/book3s_64_vio.c		patch \| blob \| history
arch/powerpc/kvm/book3s_hv.c		patch \| blob \| history
arch/powerpc/kvm/book3s_pr.c		patch \| blob \| history
arch/powerpc/kvm/powerpc.c		patch \| blob \| history
arch/riscv/kvm/vm.c		patch \| blob \| history
arch/s390/kvm/interrupt.c		patch \| blob \| history
arch/s390/kvm/kvm-s390.c		patch \| blob \| history
arch/s390/kvm/pci.c		patch \| blob \| history
arch/s390/kvm/vsie.c		patch \| blob \| history
arch/x86/include/asm/kvm_host.h		patch \| blob \| history
arch/x86/kvm/cpuid.c		patch \| blob \| history
arch/x86/kvm/mmu/mmu.c		patch \| blob \| history
arch/x86/kvm/mmu/mmu_internal.h		patch \| blob \| history
arch/x86/kvm/mmu/paging_tmpl.h		patch \| blob \| history
arch/x86/kvm/svm/svm.c		patch \| blob \| history
arch/x86/kvm/vmx/hyperv.c		patch \| blob \| history
arch/x86/kvm/vmx/hyperv.h		patch \| blob \| history
arch/x86/kvm/vmx/nested.c		patch \| blob \| history
arch/x86/kvm/vmx/vmx.c		patch \| blob \| history
arch/x86/kvm/vmx/vmx.h		patch \| blob \| history
arch/x86/kvm/vmx/vmx_ops.h		patch \| blob \| history
arch/x86/kvm/x86.c		patch \| blob \| history
include/linux/kvm_host.h		patch \| blob \| history
include/uapi/linux/kvm.h		patch \| blob \| history
tools/include/uapi/linux/kvm.h		patch \| blob \| history
virt/kvm/kvm_main.c		patch \| blob \| history