Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm
authorLinus Torvalds <torvalds@linux-foundation.org>
Sun, 31 Oct 2021 18:19:02 +0000 (11:19 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Sun, 31 Oct 2021 18:19:02 +0000 (11:19 -0700)
Pull kvm fixes from Paolo Bonzini:

 - Fixes for s390 interrupt delivery

 - Fixes for Xen emulator bugs showing up as debug kernel WARNs

 - Fix another issue with SEV/ES string I/O VMGEXITs

* tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm:
  KVM: x86: Take srcu lock in post_kvm_run_save()
  KVM: SEV-ES: fix another issue with string I/O VMGEXITs
  KVM: x86/xen: Fix kvm_xen_has_interrupt() sleeping in kvm_vcpu_block()
  KVM: x86: switch pvclock_gtod_sync_lock to a raw spinlock
  KVM: s390: preserve deliverable_mask in __airqs_kick_single_vcpu
  KVM: s390: clear kicked_mask before sleeping again

1  2 
arch/x86/include/asm/kvm_host.h
arch/x86/kvm/svm/sev.c
arch/x86/kvm/x86.c

@@@ -702,8 -702,7 +702,8 @@@ struct kvm_vcpu_arch 
  
        struct kvm_pio_request pio;
        void *pio_data;
 -      void *guest_ins_data;
 +      void *sev_pio_data;
 +      unsigned sev_pio_count;
  
        u8 event_exit_inst_len;
  
@@@ -1098,7 -1097,7 +1098,7 @@@ struct kvm_arch 
        u64 cur_tsc_generation;
        int nr_vcpus_matched_tsc;
  
-       spinlock_t pvclock_gtod_sync_lock;
+       raw_spinlock_t pvclock_gtod_sync_lock;
        bool use_master_clock;
        u64 master_kernel_ns;
        u64 master_cycle_now;
diff --combined arch/x86/kvm/svm/sev.c
@@@ -618,12 -618,7 +618,12 @@@ static int __sev_launch_update_vmsa(str
        vmsa.handle = to_kvm_svm(kvm)->sev_info.handle;
        vmsa.address = __sme_pa(svm->vmsa);
        vmsa.len = PAGE_SIZE;
 -      return sev_issue_cmd(kvm, SEV_CMD_LAUNCH_UPDATE_VMSA, &vmsa, error);
 +      ret = sev_issue_cmd(kvm, SEV_CMD_LAUNCH_UPDATE_VMSA, &vmsa, error);
 +      if (ret)
 +        return ret;
 +
 +      vcpu->arch.guest_state_protected = true;
 +      return 0;
  }
  
  static int sev_launch_update_vmsa(struct kvm *kvm, struct kvm_sev_cmd *argp)
@@@ -1484,13 -1479,6 +1484,13 @@@ static int sev_receive_update_data(stru
                goto e_free_trans;
        }
  
 +      /*
 +       * Flush (on non-coherent CPUs) before RECEIVE_UPDATE_DATA, the PSP
 +       * encrypts the written data with the guest's key, and the cache may
 +       * contain dirty, unencrypted data.
 +       */
 +      sev_clflush_pages(guest_page, n);
 +
        /* The RECEIVE_UPDATE_DATA command requires C-bit to be always set. */
        data.guest_address = (page_to_pfn(guest_page[0]) << PAGE_SHIFT) + offset;
        data.guest_address |= sev_me_mask;
@@@ -2591,11 -2579,20 +2591,20 @@@ int sev_handle_vmgexit(struct kvm_vcpu 
  
  int sev_es_string_io(struct vcpu_svm *svm, int size, unsigned int port, int in)
  {
-       if (!setup_vmgexit_scratch(svm, in, svm->vmcb->control.exit_info_2))
+       int count;
+       int bytes;
+       if (svm->vmcb->control.exit_info_2 > INT_MAX)
+               return -EINVAL;
+       count = svm->vmcb->control.exit_info_2;
+       if (unlikely(check_mul_overflow(count, size, &bytes)))
+               return -EINVAL;
+       if (!setup_vmgexit_scratch(svm, in, bytes))
                return -EINVAL;
  
-       return kvm_sev_es_string_io(&svm->vcpu, size, port,
-                                   svm->ghcb_sa, svm->ghcb_sa_len / size, in);
+       return kvm_sev_es_string_io(&svm->vcpu, size, port, svm->ghcb_sa, count, in);
  }
  
  void sev_es_init_vmcb(struct vcpu_svm *svm)
diff --combined arch/x86/kvm/x86.c
@@@ -2542,7 -2542,7 +2542,7 @@@ static void kvm_synchronize_tsc(struct 
        kvm_vcpu_write_tsc_offset(vcpu, offset);
        raw_spin_unlock_irqrestore(&kvm->arch.tsc_write_lock, flags);
  
-       spin_lock_irqsave(&kvm->arch.pvclock_gtod_sync_lock, flags);
+       raw_spin_lock_irqsave(&kvm->arch.pvclock_gtod_sync_lock, flags);
        if (!matched) {
                kvm->arch.nr_vcpus_matched_tsc = 0;
        } else if (!already_matched) {
        }
  
        kvm_track_tsc_matching(vcpu);
-       spin_unlock_irqrestore(&kvm->arch.pvclock_gtod_sync_lock, flags);
+       raw_spin_unlock_irqrestore(&kvm->arch.pvclock_gtod_sync_lock, flags);
  }
  
  static inline void adjust_tsc_offset_guest(struct kvm_vcpu *vcpu,
@@@ -2780,9 -2780,9 +2780,9 @@@ static void kvm_gen_update_masterclock(
        kvm_make_mclock_inprogress_request(kvm);
  
        /* no guest entries from this point */
-       spin_lock_irqsave(&ka->pvclock_gtod_sync_lock, flags);
+       raw_spin_lock_irqsave(&ka->pvclock_gtod_sync_lock, flags);
        pvclock_update_vm_gtod_copy(kvm);
-       spin_unlock_irqrestore(&ka->pvclock_gtod_sync_lock, flags);
+       raw_spin_unlock_irqrestore(&ka->pvclock_gtod_sync_lock, flags);
  
        kvm_for_each_vcpu(i, vcpu, kvm)
                kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
@@@ -2800,15 -2800,15 +2800,15 @@@ u64 get_kvmclock_ns(struct kvm *kvm
        unsigned long flags;
        u64 ret;
  
-       spin_lock_irqsave(&ka->pvclock_gtod_sync_lock, flags);
+       raw_spin_lock_irqsave(&ka->pvclock_gtod_sync_lock, flags);
        if (!ka->use_master_clock) {
-               spin_unlock_irqrestore(&ka->pvclock_gtod_sync_lock, flags);
+               raw_spin_unlock_irqrestore(&ka->pvclock_gtod_sync_lock, flags);
                return get_kvmclock_base_ns() + ka->kvmclock_offset;
        }
  
        hv_clock.tsc_timestamp = ka->master_cycle_now;
        hv_clock.system_time = ka->master_kernel_ns + ka->kvmclock_offset;
-       spin_unlock_irqrestore(&ka->pvclock_gtod_sync_lock, flags);
+       raw_spin_unlock_irqrestore(&ka->pvclock_gtod_sync_lock, flags);
  
        /* both __this_cpu_read() and rdtsc() should be on the same cpu */
        get_cpu();
@@@ -2902,13 -2902,13 +2902,13 @@@ static int kvm_guest_time_update(struc
         * If the host uses TSC clock, then passthrough TSC as stable
         * to the guest.
         */
-       spin_lock_irqsave(&ka->pvclock_gtod_sync_lock, flags);
+       raw_spin_lock_irqsave(&ka->pvclock_gtod_sync_lock, flags);
        use_master_clock = ka->use_master_clock;
        if (use_master_clock) {
                host_tsc = ka->master_cycle_now;
                kernel_ns = ka->master_kernel_ns;
        }
-       spin_unlock_irqrestore(&ka->pvclock_gtod_sync_lock, flags);
+       raw_spin_unlock_irqrestore(&ka->pvclock_gtod_sync_lock, flags);
  
        /* Keep irq disabled to prevent changes to the clock */
        local_irq_save(flags);
@@@ -6100,13 -6100,13 +6100,13 @@@ set_pit2_out
                 * is slightly ahead) here we risk going negative on unsigned
                 * 'system_time' when 'user_ns.clock' is very small.
                 */
-               spin_lock_irq(&ka->pvclock_gtod_sync_lock);
+               raw_spin_lock_irq(&ka->pvclock_gtod_sync_lock);
                if (kvm->arch.use_master_clock)
                        now_ns = ka->master_kernel_ns;
                else
                        now_ns = get_kvmclock_base_ns();
                ka->kvmclock_offset = user_ns.clock - now_ns;
-               spin_unlock_irq(&ka->pvclock_gtod_sync_lock);
+               raw_spin_unlock_irq(&ka->pvclock_gtod_sync_lock);
  
                kvm_make_all_cpus_request(kvm, KVM_REQ_CLOCK_UPDATE);
                break;
@@@ -6906,7 -6906,7 +6906,7 @@@ static int kernel_pio(struct kvm_vcpu *
  }
  
  static int emulator_pio_in_out(struct kvm_vcpu *vcpu, int size,
 -                             unsigned short port, void *val,
 +                             unsigned short port,
                               unsigned int count, bool in)
  {
        vcpu->arch.pio.port = port;
        vcpu->arch.pio.count  = count;
        vcpu->arch.pio.size = size;
  
 -      if (!kernel_pio(vcpu, vcpu->arch.pio_data)) {
 -              vcpu->arch.pio.count = 0;
 +      if (!kernel_pio(vcpu, vcpu->arch.pio_data))
                return 1;
 -      }
  
        vcpu->run->exit_reason = KVM_EXIT_IO;
        vcpu->run->io.direction = in ? KVM_EXIT_IO_IN : KVM_EXIT_IO_OUT;
        return 0;
  }
  
 -static int emulator_pio_in(struct kvm_vcpu *vcpu, int size,
 -                         unsigned short port, void *val, unsigned int count)
 +static int __emulator_pio_in(struct kvm_vcpu *vcpu, int size,
 +                           unsigned short port, unsigned int count)
  {
 -      int ret;
 +      WARN_ON(vcpu->arch.pio.count);
 +      memset(vcpu->arch.pio_data, 0, size * count);
 +      return emulator_pio_in_out(vcpu, size, port, count, true);
 +}
  
 -      if (vcpu->arch.pio.count)
 -              goto data_avail;
 +static void complete_emulator_pio_in(struct kvm_vcpu *vcpu, void *val)
 +{
 +      int size = vcpu->arch.pio.size;
 +      unsigned count = vcpu->arch.pio.count;
 +      memcpy(val, vcpu->arch.pio_data, size * count);
 +      trace_kvm_pio(KVM_PIO_IN, vcpu->arch.pio.port, size, count, vcpu->arch.pio_data);
 +      vcpu->arch.pio.count = 0;
 +}
  
 -      memset(vcpu->arch.pio_data, 0, size * count);
 +static int emulator_pio_in(struct kvm_vcpu *vcpu, int size,
 +                         unsigned short port, void *val, unsigned int count)
 +{
 +      if (vcpu->arch.pio.count) {
 +              /* Complete previous iteration.  */
 +      } else {
 +              int r = __emulator_pio_in(vcpu, size, port, count);
 +              if (!r)
 +                      return r;
  
 -      ret = emulator_pio_in_out(vcpu, size, port, val, count, true);
 -      if (ret) {
 -data_avail:
 -              memcpy(val, vcpu->arch.pio_data, size * count);
 -              trace_kvm_pio(KVM_PIO_IN, port, size, count, vcpu->arch.pio_data);
 -              vcpu->arch.pio.count = 0;
 -              return 1;
 +              /* Results already available, fall through.  */
        }
  
 -      return 0;
 +      WARN_ON(count != vcpu->arch.pio.count);
 +      complete_emulator_pio_in(vcpu, val);
 +      return 1;
  }
  
  static int emulator_pio_in_emulated(struct x86_emulate_ctxt *ctxt,
@@@ -6974,15 -6963,9 +6974,15 @@@ static int emulator_pio_out(struct kvm_
                            unsigned short port, const void *val,
                            unsigned int count)
  {
 +      int ret;
 +
        memcpy(vcpu->arch.pio_data, val, size * count);
        trace_kvm_pio(KVM_PIO_OUT, port, size, count, vcpu->arch.pio_data);
 -      return emulator_pio_in_out(vcpu, size, port, (void *)val, count, false);
 +      ret = emulator_pio_in_out(vcpu, size, port, count, false);
 +      if (ret)
 +                vcpu->arch.pio.count = 0;
 +
 +        return ret;
  }
  
  static int emulator_pio_out_emulated(struct x86_emulate_ctxt *ctxt,
@@@ -8156,9 -8139,9 +8156,9 @@@ static void kvm_hyperv_tsc_notifier(voi
        list_for_each_entry(kvm, &vm_list, vm_list) {
                struct kvm_arch *ka = &kvm->arch;
  
-               spin_lock_irqsave(&ka->pvclock_gtod_sync_lock, flags);
+               raw_spin_lock_irqsave(&ka->pvclock_gtod_sync_lock, flags);
                pvclock_update_vm_gtod_copy(kvm);
-               spin_unlock_irqrestore(&ka->pvclock_gtod_sync_lock, flags);
+               raw_spin_unlock_irqrestore(&ka->pvclock_gtod_sync_lock, flags);
  
                kvm_for_each_vcpu(cpu, vcpu, kvm)
                        kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
@@@ -8800,9 -8783,17 +8800,17 @@@ static void post_kvm_run_save(struct kv
  
        kvm_run->cr8 = kvm_get_cr8(vcpu);
        kvm_run->apic_base = kvm_get_apic_base(vcpu);
+       /*
+        * The call to kvm_ready_for_interrupt_injection() may end up in
+        * kvm_xen_has_interrupt() which may require the srcu lock to be
+        * held, to protect against changes in the vcpu_info address.
+        */
+       vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
        kvm_run->ready_for_interrupt_injection =
                pic_in_kernel(vcpu->kvm) ||
                kvm_vcpu_ready_for_interrupt_injection(vcpu);
+       srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
  
        if (is_smm(vcpu))
                kvm_run->flags |= KVM_RUN_X86_SMM;
@@@ -9660,14 -9651,14 +9668,14 @@@ static int vcpu_enter_guest(struct kvm_
                if (likely(exit_fastpath != EXIT_FASTPATH_REENTER_GUEST))
                        break;
  
 -                if (unlikely(kvm_vcpu_exit_request(vcpu))) {
 +              if (vcpu->arch.apicv_active)
 +                      static_call(kvm_x86_sync_pir_to_irr)(vcpu);
 +
 +              if (unlikely(kvm_vcpu_exit_request(vcpu))) {
                        exit_fastpath = EXIT_FASTPATH_EXIT_HANDLED;
                        break;
                }
 -
 -              if (vcpu->arch.apicv_active)
 -                      static_call(kvm_x86_sync_pir_to_irr)(vcpu);
 -        }
 +      }
  
        /*
         * Do this here before restoring debug registers on the host.  And
@@@ -11199,7 -11190,7 +11207,7 @@@ int kvm_arch_init_vm(struct kvm *kvm, u
  
        raw_spin_lock_init(&kvm->arch.tsc_write_lock);
        mutex_init(&kvm->arch.apic_map_lock);
-       spin_lock_init(&kvm->arch.pvclock_gtod_sync_lock);
+       raw_spin_lock_init(&kvm->arch.pvclock_gtod_sync_lock);
  
        kvm->arch.kvmclock_offset = -get_kvmclock_base_ns();
        pvclock_update_vm_gtod_copy(kvm);
@@@ -12385,81 -12376,44 +12393,81 @@@ int kvm_sev_es_mmio_read(struct kvm_vcp
  }
  EXPORT_SYMBOL_GPL(kvm_sev_es_mmio_read);
  
 -static int complete_sev_es_emulated_ins(struct kvm_vcpu *vcpu)
 +static int kvm_sev_es_outs(struct kvm_vcpu *vcpu, unsigned int size,
 +                         unsigned int port);
 +
 +static int complete_sev_es_emulated_outs(struct kvm_vcpu *vcpu)
  {
 -      memcpy(vcpu->arch.guest_ins_data, vcpu->arch.pio_data,
 -             vcpu->arch.pio.count * vcpu->arch.pio.size);
 -      vcpu->arch.pio.count = 0;
 +      int size = vcpu->arch.pio.size;
 +      int port = vcpu->arch.pio.port;
  
 +      vcpu->arch.pio.count = 0;
 +      if (vcpu->arch.sev_pio_count)
 +              return kvm_sev_es_outs(vcpu, size, port);
        return 1;
  }
  
  static int kvm_sev_es_outs(struct kvm_vcpu *vcpu, unsigned int size,
 -                         unsigned int port, void *data,  unsigned int count)
 +                         unsigned int port)
  {
 -      int ret;
 -
 -      ret = emulator_pio_out_emulated(vcpu->arch.emulate_ctxt, size, port,
 -                                      data, count);
 -      if (ret)
 -              return ret;
 +      for (;;) {
 +              unsigned int count =
 +                      min_t(unsigned int, PAGE_SIZE / size, vcpu->arch.sev_pio_count);
 +              int ret = emulator_pio_out(vcpu, size, port, vcpu->arch.sev_pio_data, count);
 +
 +              /* memcpy done already by emulator_pio_out.  */
 +              vcpu->arch.sev_pio_count -= count;
 +              vcpu->arch.sev_pio_data += count * vcpu->arch.pio.size;
 +              if (!ret)
 +                      break;
  
 -      vcpu->arch.pio.count = 0;
 +              /* Emulation done by the kernel.  */
 +              if (!vcpu->arch.sev_pio_count)
 +                      return 1;
 +      }
  
 +      vcpu->arch.complete_userspace_io = complete_sev_es_emulated_outs;
        return 0;
  }
  
  static int kvm_sev_es_ins(struct kvm_vcpu *vcpu, unsigned int size,
 -                        unsigned int port, void *data, unsigned int count)
 +                        unsigned int port);
 +
 +static void advance_sev_es_emulated_ins(struct kvm_vcpu *vcpu)
  {
 -      int ret;
 +      unsigned count = vcpu->arch.pio.count;
 +      complete_emulator_pio_in(vcpu, vcpu->arch.sev_pio_data);
 +      vcpu->arch.sev_pio_count -= count;
 +      vcpu->arch.sev_pio_data += count * vcpu->arch.pio.size;
 +}
  
 -      ret = emulator_pio_in_emulated(vcpu->arch.emulate_ctxt, size, port,
 -                                     data, count);
 -      if (ret) {
 -              vcpu->arch.pio.count = 0;
 -      } else {
 -              vcpu->arch.guest_ins_data = data;
 -              vcpu->arch.complete_userspace_io = complete_sev_es_emulated_ins;
 +static int complete_sev_es_emulated_ins(struct kvm_vcpu *vcpu)
 +{
 +      int size = vcpu->arch.pio.size;
 +      int port = vcpu->arch.pio.port;
 +
 +      advance_sev_es_emulated_ins(vcpu);
 +      if (vcpu->arch.sev_pio_count)
 +              return kvm_sev_es_ins(vcpu, size, port);
 +      return 1;
 +}
 +
 +static int kvm_sev_es_ins(struct kvm_vcpu *vcpu, unsigned int size,
 +                        unsigned int port)
 +{
 +      for (;;) {
 +              unsigned int count =
 +                      min_t(unsigned int, PAGE_SIZE / size, vcpu->arch.sev_pio_count);
 +              if (!__emulator_pio_in(vcpu, size, port, count))
 +                      break;
 +
 +              /* Emulation done by the kernel.  */
 +              advance_sev_es_emulated_ins(vcpu);
 +              if (!vcpu->arch.sev_pio_count)
 +                      return 1;
        }
  
 +      vcpu->arch.complete_userspace_io = complete_sev_es_emulated_ins;
        return 0;
  }
  
@@@ -12467,10 -12421,8 +12475,10 @@@ int kvm_sev_es_string_io(struct kvm_vcp
                         unsigned int port, void *data,  unsigned int count,
                         int in)
  {
 -      return in ? kvm_sev_es_ins(vcpu, size, port, data, count)
 -                : kvm_sev_es_outs(vcpu, size, port, data, count);
 +      vcpu->arch.sev_pio_data = data;
 +      vcpu->arch.sev_pio_count = count;
 +      return in ? kvm_sev_es_ins(vcpu, size, port)
 +                : kvm_sev_es_outs(vcpu, size, port);
  }
  EXPORT_SYMBOL_GPL(kvm_sev_es_string_io);