KVM: x86/xen: Use gfn_to_pfn_cache for vcpu_info

author David Woodhouse <dwmw@amazon.co.uk>

Thu, 3 Mar 2022 15:41:15 +0000 (15:41 +0000)

committer Paolo Bonzini <pbonzini@redhat.com>

Sat, 2 Apr 2022 09:41:13 +0000 (05:41 -0400)
author David Woodhouse <dwmw@amazon.co.uk>
Thu, 3 Mar 2022 15:41:15 +0000 (15:41 +0000)
committer Paolo Bonzini <pbonzini@redhat.com>
Sat, 2 Apr 2022 09:41:13 +0000 (05:41 -0400)
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h

index 5a0ff6a..fce4dca 100644 (file)
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -606,9 +606,8 @@ struct kvm_vcpu_hv {
  struct kvm_vcpu_xen {
         u64 hypercall_rip;
         u32 current_runstate;
-       bool vcpu_info_set;
         bool vcpu_time_info_set;
-       struct gfn_to_hva_cache vcpu_info_cache;
+       struct gfn_to_pfn_cache vcpu_info_cache;
         struct gfn_to_hva_cache vcpu_time_info_cache;
         struct gfn_to_pfn_cache runstate_cache;
         u64 last_steal;
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c

index e5d43b4..d67487e 100644 (file)
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -3158,9 +3158,9 @@ static int kvm_guest_time_update(struct kvm_vcpu *v)
  
         if (vcpu->pv_time.active)
                 kvm_setup_guest_pvclock(v, &vcpu->pv_time, 0);
-       if (vcpu->xen.vcpu_info_set)
-               kvm_setup_pvclock_page(v, &vcpu->xen.vcpu_info_cache,
-                                      offsetof(struct compat_vcpu_info, time));
+       if (vcpu->xen.vcpu_info_cache.active)
+               kvm_setup_guest_pvclock(v, &vcpu->xen.vcpu_info_cache,
+                                       offsetof(struct compat_vcpu_info, time));
         if (vcpu->xen.vcpu_time_info_set)
                 kvm_setup_pvclock_page(v, &vcpu->xen.vcpu_time_info_cache, 0);
         if (!v->vcpu_idx)
@@ -10424,6 +10424,9 @@ static int vcpu_run(struct kvm_vcpu *vcpu)
                         break;
  
                 kvm_clear_request(KVM_REQ_UNBLOCK, vcpu);
+               if (kvm_xen_has_pending_events(vcpu))
+                       kvm_xen_inject_pending_events(vcpu);
+
                 if (kvm_cpu_has_pending_timer(vcpu))
                         kvm_inject_pending_timer_irqs(vcpu);
  
@@ -12236,6 +12239,9 @@ static inline bool kvm_vcpu_has_events(struct kvm_vcpu *vcpu)
             kvm_x86_ops.nested_ops->hv_timer_pending(vcpu))
                 return true;
  
+       if (kvm_xen_has_pending_events(vcpu))
+               return true;
+
         return false;
  }
  
diff --git a/arch/x86/kvm/xen.c b/arch/x86/kvm/xen.c

index de1b3be..56e8a69 100644 (file)
--- a/arch/x86/kvm/xen.c
+++ b/arch/x86/kvm/xen.c
@@ -8,6 +8,7 @@
  
  #include "x86.h"
  #include "xen.h"
+#include "lapic.h"
  #include "hyperv.h"
  
  #include <linux/kvm_host.h>
@@ -246,23 +247,79 @@ void kvm_xen_update_runstate_guest(struct kvm_vcpu *v, int state)
         mark_page_dirty_in_slot(v->kvm, gpc->memslot, gpc->gpa >> PAGE_SHIFT);
  }
  
-int __kvm_xen_has_interrupt(struct kvm_vcpu *v)
+/*
+ * On event channel delivery, the vcpu_info may not have been accessible.
+ * In that case, there are bits in vcpu->arch.xen.evtchn_pending_sel which
+ * need to be marked into the vcpu_info (and evtchn_upcall_pending set).
+ * Do so now that we can sleep in the context of the vCPU to bring the
+ * page in, and refresh the pfn cache for it.
+ */
+void kvm_xen_inject_pending_events(struct kvm_vcpu *v)
  {
         unsigned long evtchn_pending_sel = READ_ONCE(v->arch.xen.evtchn_pending_sel);
-       bool atomic = in_atomic() || !task_is_running(current);
-       int err;
+       struct gfn_to_pfn_cache *gpc = &v->arch.xen.vcpu_info_cache;
+       unsigned long flags;
+
+       if (!evtchn_pending_sel)
+               return;
+
+       /*
+        * Yes, this is an open-coded loop. But that's just what put_user()
+        * does anyway. Page it in and retry the instruction. We're just a
+        * little more honest about it.
+        */
+       read_lock_irqsave(&gpc->lock, flags);
+       while (!kvm_gfn_to_pfn_cache_check(v->kvm, gpc, gpc->gpa,
+                                          sizeof(struct vcpu_info))) {
+               read_unlock_irqrestore(&gpc->lock, flags);
+
+               if (kvm_gfn_to_pfn_cache_refresh(v->kvm, gpc, gpc->gpa,
+                                                sizeof(struct vcpu_info)))
+                       return;
+
+               read_lock_irqsave(&gpc->lock, flags);
+       }
+
+       /* Now gpc->khva is a valid kernel address for the vcpu_info */
+       if (IS_ENABLED(CONFIG_64BIT) && v->kvm->arch.xen.long_mode) {
+               struct vcpu_info *vi = gpc->khva;
+
+               asm volatile(LOCK_PREFIX "orq %0, %1\n"
+                            "notq %0\n"
+                            LOCK_PREFIX "andq %0, %2\n"
+                            : "=r" (evtchn_pending_sel),
+                              "+m" (vi->evtchn_pending_sel),
+                              "+m" (v->arch.xen.evtchn_pending_sel)
+                            : "0" (evtchn_pending_sel));
+               WRITE_ONCE(vi->evtchn_upcall_pending, 1);
+       } else {
+               u32 evtchn_pending_sel32 = evtchn_pending_sel;
+               struct compat_vcpu_info *vi = gpc->khva;
+
+               asm volatile(LOCK_PREFIX "orl %0, %1\n"
+                            "notl %0\n"
+                            LOCK_PREFIX "andl %0, %2\n"
+                            : "=r" (evtchn_pending_sel32),
+                              "+m" (vi->evtchn_pending_sel),
+                              "+m" (v->arch.xen.evtchn_pending_sel)
+                            : "0" (evtchn_pending_sel32));
+               WRITE_ONCE(vi->evtchn_upcall_pending, 1);
+       }
+       read_unlock_irqrestore(&gpc->lock, flags);
+
+       mark_page_dirty_in_slot(v->kvm, gpc->memslot, gpc->gpa >> PAGE_SHIFT);
+}
+
+int __kvm_xen_has_interrupt(struct kvm_vcpu *v)
+{
+       struct gfn_to_pfn_cache *gpc = &v->arch.xen.vcpu_info_cache;
+       unsigned long flags;
         u8 rc = 0;
  
         /*
          * If the global upcall vector (HVMIRQ_callback_vector) is set and
          * the vCPU's evtchn_upcall_pending flag is set, the IRQ is pending.
          */
-       struct gfn_to_hva_cache *ghc = &v->arch.xen.vcpu_info_cache;
-       struct kvm_memslots *slots = kvm_memslots(v->kvm);
-       bool ghc_valid = slots->generation == ghc->generation &&
-               !kvm_is_error_hva(ghc->hva) && ghc->memslot;
-
-       unsigned int offset = offsetof(struct vcpu_info, evtchn_upcall_pending);
  
         /* No need for compat handling here */
         BUILD_BUG_ON(offsetof(struct vcpu_info, evtchn_upcall_pending) !=
@@ -272,101 +329,35 @@ int __kvm_xen_has_interrupt(struct kvm_vcpu *v)
         BUILD_BUG_ON(sizeof(rc) !=
                      sizeof_field(struct compat_vcpu_info, evtchn_upcall_pending));
  
-       /*
-        * For efficiency, this mirrors the checks for using the valid
-        * cache in kvm_read_guest_offset_cached(), but just uses
-        * __get_user() instead. And falls back to the slow path.
-        */
-       if (!evtchn_pending_sel && ghc_valid) {
-               /* Fast path */
-               pagefault_disable();
-               err = __get_user(rc, (u8 __user *)ghc->hva + offset);
-               pagefault_enable();
-               if (!err)
-                       return rc;
-       }
-
-       /* Slow path */
+       read_lock_irqsave(&gpc->lock, flags);
+       while (!kvm_gfn_to_pfn_cache_check(v->kvm, gpc, gpc->gpa,
+                                          sizeof(struct vcpu_info))) {
+               read_unlock_irqrestore(&gpc->lock, flags);
  
-       /*
-        * This function gets called from kvm_vcpu_block() after setting the
-        * task to TASK_INTERRUPTIBLE, to see if it needs to wake immediately
-        * from a HLT. So we really mustn't sleep. If the page ended up absent
-        * at that point, just return 1 in order to trigger an immediate wake,
-        * and we'll end up getting called again from a context where we *can*
-        * fault in the page and wait for it.
-        */
-       if (atomic)
-               return 1;
+               /*
+                * This function gets called from kvm_vcpu_block() after setting the
+                * task to TASK_INTERRUPTIBLE, to see if it needs to wake immediately
+                * from a HLT. So we really mustn't sleep. If the page ended up absent
+                * at that point, just return 1 in order to trigger an immediate wake,
+                * and we'll end up getting called again from a context where we *can*
+                * fault in the page and wait for it.
+                */
+               if (in_atomic() || !task_is_running(current))
+                       return 1;
  
-       if (!ghc_valid) {
-               err = kvm_gfn_to_hva_cache_init(v->kvm, ghc, ghc->gpa, ghc->len);
-               if (err || !ghc->memslot) {
+               if (kvm_gfn_to_pfn_cache_refresh(v->kvm, gpc, gpc->gpa,
+                                                sizeof(struct vcpu_info))) {
                         /*
                          * If this failed, userspace has screwed up the
                          * vcpu_info mapping. No interrupts for you.
                          */
                         return 0;
                 }
+               read_lock_irqsave(&gpc->lock, flags);
         }
  
-       /*
-        * Now we have a valid (protected by srcu) userspace HVA in
-        * ghc->hva which points to the struct vcpu_info. If there
-        * are any bits in the in-kernel evtchn_pending_sel then
-        * we need to write those to the guest vcpu_info and set
-        * its evtchn_upcall_pending flag. If there aren't any bits
-        * to add, we only want to *check* evtchn_upcall_pending.
-        */
-       if (evtchn_pending_sel) {
-               bool long_mode = v->kvm->arch.xen.long_mode;
-
-               if (!user_access_begin((void __user *)ghc->hva, sizeof(struct vcpu_info)))
-                       return 0;
-
-               if (IS_ENABLED(CONFIG_64BIT) && long_mode) {
-                       struct vcpu_info __user *vi = (void __user *)ghc->hva;
-
-                       /* Attempt to set the evtchn_pending_sel bits in the
-                        * guest, and if that succeeds then clear the same
-                        * bits in the in-kernel version. */
-                       asm volatile("1:\t" LOCK_PREFIX "orq %0, %1\n"
-                                    "\tnotq %0\n"
-                                    "\t" LOCK_PREFIX "andq %0, %2\n"
-                                    "2:\n"
-                                    _ASM_EXTABLE_UA(1b, 2b)
-                                    : "=r" (evtchn_pending_sel),
-                                      "+m" (vi->evtchn_pending_sel),
-                                      "+m" (v->arch.xen.evtchn_pending_sel)
-                                    : "0" (evtchn_pending_sel));
-               } else {
-                       struct compat_vcpu_info __user *vi = (void __user *)ghc->hva;
-                       u32 evtchn_pending_sel32 = evtchn_pending_sel;
-
-                       /* Attempt to set the evtchn_pending_sel bits in the
-                        * guest, and if that succeeds then clear the same
-                        * bits in the in-kernel version. */
-                       asm volatile("1:\t" LOCK_PREFIX "orl %0, %1\n"
-                                    "\tnotl %0\n"
-                                    "\t" LOCK_PREFIX "andl %0, %2\n"
-                                    "2:\n"
-                                    _ASM_EXTABLE_UA(1b, 2b)
-                                    : "=r" (evtchn_pending_sel32),
-                                      "+m" (vi->evtchn_pending_sel),
-                                      "+m" (v->arch.xen.evtchn_pending_sel)
-                                    : "0" (evtchn_pending_sel32));
-               }
-               rc = 1;
-               unsafe_put_user(rc, (u8 __user *)ghc->hva + offset, err);
-
-       err:
-               user_access_end();
-
-               mark_page_dirty_in_slot(v->kvm, ghc->memslot, ghc->gpa >> PAGE_SHIFT);
-       } else {
-               __get_user(rc, (u8 __user *)ghc->hva + offset);
-       }
-
+       rc = ((struct vcpu_info *)gpc->khva)->evtchn_upcall_pending;
+       read_unlock_irqrestore(&gpc->lock, flags);
         return rc;
  }
  
@@ -456,25 +447,18 @@ int kvm_xen_vcpu_set_attr(struct kvm_vcpu *vcpu, struct kvm_xen_vcpu_attr *data)
                              offsetof(struct compat_vcpu_info, time));
  
                 if (data->u.gpa == GPA_INVALID) {
-                       vcpu->arch.xen.vcpu_info_set = false;
+                       kvm_gfn_to_pfn_cache_destroy(vcpu->kvm, &vcpu->arch.xen.vcpu_info_cache);
                         r = 0;
                         break;
                 }
  
-               /* It must fit within a single page */
-               if ((data->u.gpa & ~PAGE_MASK) + sizeof(struct vcpu_info) > PAGE_SIZE) {
-                       r = -EINVAL;
-                       break;
-               }
-
-               r = kvm_gfn_to_hva_cache_init(vcpu->kvm,
+               r = kvm_gfn_to_pfn_cache_init(vcpu->kvm,
                                               &vcpu->arch.xen.vcpu_info_cache,
-                                             data->u.gpa,
+                                             NULL, KVM_HOST_USES_PFN, data->u.gpa,
                                               sizeof(struct vcpu_info));
-               if (!r) {
-                       vcpu->arch.xen.vcpu_info_set = true;
+               if (!r)
                         kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
-               }
+
                 break;
  
         case KVM_XEN_VCPU_ATTR_TYPE_VCPU_TIME_INFO:
@@ -630,7 +614,7 @@ int kvm_xen_vcpu_get_attr(struct kvm_vcpu *vcpu, struct kvm_xen_vcpu_attr *data)
  
         switch (data->type) {
         case KVM_XEN_VCPU_ATTR_TYPE_VCPU_INFO:
-               if (vcpu->arch.xen.vcpu_info_set)
+               if (vcpu->arch.xen.vcpu_info_cache.active)
                         data->u.gpa = vcpu->arch.xen.vcpu_info_cache.gpa;
                 else
                         data->u.gpa = GPA_INVALID;
@@ -903,16 +887,17 @@ int kvm_xen_set_evtchn_fast(struct kvm_kernel_irq_routing_entry *e,
         if (!vcpu)
                 return -1;
  
-       if (!vcpu->arch.xen.vcpu_info_set)
+       if (!vcpu->arch.xen.vcpu_info_cache.active)
                 return -1;
  
         if (e->xen_evtchn.port >= max_evtchn_port(kvm))
                 return -1;
  
         rc = -EWOULDBLOCK;
-       read_lock_irqsave(&gpc->lock, flags);
  
         idx = srcu_read_lock(&kvm->srcu);
+
+       read_lock_irqsave(&gpc->lock, flags);
         if (!kvm_gfn_to_pfn_cache_check(kvm, gpc, gpc->gpa, PAGE_SIZE))
                 goto out_rcu;
  
@@ -940,17 +925,44 @@ int kvm_xen_set_evtchn_fast(struct kvm_kernel_irq_routing_entry *e,
         } else if (test_bit(e->xen_evtchn.port, mask_bits)) {
                 rc = -1; /* Masked */
         } else {
-               rc = 1; /* Delivered. But was the vCPU waking already? */
-               if (!test_and_set_bit(port_word_bit, &vcpu->arch.xen.evtchn_pending_sel))
-                       kick_vcpu = true;
+               rc = 1; /* Delivered to the bitmap in shared_info. */
+               /* Now switch to the vCPU's vcpu_info to set the index and pending_sel */
+               read_unlock_irqrestore(&gpc->lock, flags);
+               gpc = &vcpu->arch.xen.vcpu_info_cache;
+
+               read_lock_irqsave(&gpc->lock, flags);
+               if (!kvm_gfn_to_pfn_cache_check(kvm, gpc, gpc->gpa, sizeof(struct vcpu_info))) {
+                       /*
+                        * Could not access the vcpu_info. Set the bit in-kernel
+                        * and prod the vCPU to deliver it for itself.
+                        */
+                       if (!test_and_set_bit(port_word_bit, &vcpu->arch.xen.evtchn_pending_sel))
+                               kick_vcpu = true;
+                       goto out_rcu;
+               }
+
+               if (IS_ENABLED(CONFIG_64BIT) && kvm->arch.xen.long_mode) {
+                       struct vcpu_info *vcpu_info = gpc->khva;
+                       if (!test_and_set_bit(port_word_bit, &vcpu_info->evtchn_pending_sel)) {
+                               WRITE_ONCE(vcpu_info->evtchn_upcall_pending, 1);
+                               kick_vcpu = true;
+                       }
+               } else {
+                       struct compat_vcpu_info *vcpu_info = gpc->khva;
+                       if (!test_and_set_bit(port_word_bit,
+                                             (unsigned long *)&vcpu_info->evtchn_pending_sel)) {
+                               WRITE_ONCE(vcpu_info->evtchn_upcall_pending, 1);
+                               kick_vcpu = true;
+                       }
+               }
         }
  
   out_rcu:
-       srcu_read_unlock(&kvm->srcu, idx);
         read_unlock_irqrestore(&gpc->lock, flags);
+       srcu_read_unlock(&kvm->srcu, idx);
  
         if (kick_vcpu) {
-               kvm_make_request(KVM_REQ_EVENT, vcpu);
+               kvm_make_request(KVM_REQ_UNBLOCK, vcpu);
                 kvm_vcpu_kick(vcpu);
         }
  
@@ -1052,4 +1064,6 @@ void kvm_xen_destroy_vcpu(struct kvm_vcpu *vcpu)
  {
         kvm_gfn_to_pfn_cache_destroy(vcpu->kvm,
                                      &vcpu->arch.xen.runstate_cache);
+       kvm_gfn_to_pfn_cache_destroy(vcpu->kvm,
+                                    &vcpu->arch.xen.vcpu_info_cache);
  }
diff --git a/arch/x86/kvm/xen.h b/arch/x86/kvm/xen.h

index 54b2bf4..7dd0590 100644 (file)
--- a/arch/x86/kvm/xen.h
+++ b/arch/x86/kvm/xen.h
@@ -15,6 +15,7 @@
  extern struct static_key_false_deferred kvm_xen_enabled;
  
  int __kvm_xen_has_interrupt(struct kvm_vcpu *vcpu);
+void kvm_xen_inject_pending_events(struct kvm_vcpu *vcpu);
  int kvm_xen_vcpu_set_attr(struct kvm_vcpu *vcpu, struct kvm_xen_vcpu_attr *data);
  int kvm_xen_vcpu_get_attr(struct kvm_vcpu *vcpu, struct kvm_xen_vcpu_attr *data);
  int kvm_xen_hvm_set_attr(struct kvm *kvm, struct kvm_xen_hvm_attr *data);
@@ -46,11 +47,19 @@ static inline bool kvm_xen_hypercall_enabled(struct kvm *kvm)
  static inline int kvm_xen_has_interrupt(struct kvm_vcpu *vcpu)
  {
         if (static_branch_unlikely(&kvm_xen_enabled.key) &&
-           vcpu->arch.xen.vcpu_info_set && vcpu->kvm->arch.xen.upcall_vector)
+           vcpu->arch.xen.vcpu_info_cache.active &&
+           vcpu->kvm->arch.xen.upcall_vector)
                 return __kvm_xen_has_interrupt(vcpu);
  
         return 0;
  }
+
+static inline bool kvm_xen_has_pending_events(struct kvm_vcpu *vcpu)
+{
+       return static_branch_unlikely(&kvm_xen_enabled.key) &&
+               vcpu->arch.xen.evtchn_pending_sel;
+}
+
  #else
  static inline int kvm_xen_write_hypercall_page(struct kvm_vcpu *vcpu, u64 data)
  {
@@ -83,6 +92,15 @@ static inline int kvm_xen_has_interrupt(struct kvm_vcpu *vcpu)
  {
         return 0;
  }
+
+static inline void kvm_xen_inject_pending_events(struct kvm_vcpu *vcpu)
+{
+}
+
+static inline bool kvm_xen_has_pending_events(struct kvm_vcpu *vcpu)
+{
+       return false;
+}
  #endif
  
  int kvm_xen_hypercall(struct kvm_vcpu *vcpu);
author	David Woodhouse <dwmw@amazon.co.uk>
	Thu, 3 Mar 2022 15:41:15 +0000 (15:41 +0000)
committer	Paolo Bonzini <pbonzini@redhat.com>
	Sat, 2 Apr 2022 09:41:13 +0000 (05:41 -0400)
arch/x86/include/asm/kvm_host.h		patch \| blob \| history
arch/x86/kvm/x86.c		patch \| blob \| history
arch/x86/kvm/xen.c		patch \| blob \| history
arch/x86/kvm/xen.h		patch \| blob \| history