Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm

author Linus Torvalds <torvalds@linux-foundation.org>

Thu, 4 Mar 2021 19:26:17 +0000 (11:26 -0800)

committer Linus Torvalds <torvalds@linux-foundation.org>

Thu, 4 Mar 2021 19:26:17 +0000 (11:26 -0800)
author Linus Torvalds <torvalds@linux-foundation.org>
Thu, 4 Mar 2021 19:26:17 +0000 (11:26 -0800)
committer Linus Torvalds <torvalds@linux-foundation.org>
Thu, 4 Mar 2021 19:26:17 +0000 (11:26 -0800)
diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst

index aed52b0..1a2b521 100644 (file)
--- a/Documentation/virt/kvm/api.rst
+++ b/Documentation/virt/kvm/api.rst
@@ -3856,49 +3856,20 @@ base 2 of the page size in the bottom 6 bits.
           -EFAULT if struct kvm_reinject_control cannot be read,
          -EINVAL if the supplied shift or flags are invalid,
          -ENOMEM if unable to allocate the new HPT,
-        -ENOSPC if there was a hash collision
-
-::
-
-  struct kvm_ppc_rmmu_info {
-       struct kvm_ppc_radix_geom {
-               __u8    page_shift;
-               __u8    level_bits[4];
-               __u8    pad[3];
-       }       geometries[8];
-       __u32   ap_encodings[8];
-  };
-
-The geometries[] field gives up to 8 supported geometries for the
-radix page table, in terms of the log base 2 of the smallest page
-size, and the number of bits indexed at each level of the tree, from
-the PTE level up to the PGD level in that order.  Any unused entries
-will have 0 in the page_shift field.
-
-The ap_encodings gives the supported page sizes and their AP field
-encodings, encoded with the AP value in the top 3 bits and the log
-base 2 of the page size in the bottom 6 bits.
-
-4.102 KVM_PPC_RESIZE_HPT_PREPARE
---------------------------------
-
-:Capability: KVM_CAP_SPAPR_RESIZE_HPT
-:Architectures: powerpc
-:Type: vm ioctl
-:Parameters: struct kvm_ppc_resize_hpt (in)
-:Returns: 0 on successful completion,
-        >0 if a new HPT is being prepared, the value is an estimated
-         number of milliseconds until preparation is complete,
-         -EFAULT if struct kvm_reinject_control cannot be read,
-        -EINVAL if the supplied shift or flags are invalid,when moving existing
-         HPT entries to the new HPT,
-        -EIO on other error conditions
  
  Used to implement the PAPR extension for runtime resizing of a guest's
  Hashed Page Table (HPT).  Specifically this starts, stops or monitors
  the preparation of a new potential HPT for the guest, essentially
  implementing the H_RESIZE_HPT_PREPARE hypercall.
  
+::
+
+  struct kvm_ppc_resize_hpt {
+       __u64 flags;
+       __u32 shift;
+       __u32 pad;
+  };
+
  If called with shift > 0 when there is no pending HPT for the guest,
  this begins preparation of a new pending HPT of size 2^(shift) bytes.
  It then returns a positive integer with the estimated number of
@@ -3926,14 +3897,6 @@ Normally this will be called repeatedly with the same parameters until
  it returns <= 0.  The first call will initiate preparation, subsequent
  ones will monitor preparation until it completes or fails.
  
-::
-
-  struct kvm_ppc_resize_hpt {
-       __u64 flags;
-       __u32 shift;
-       __u32 pad;
-  };
-
  4.103 KVM_PPC_RESIZE_HPT_COMMIT
  -------------------------------
  
@@ -3956,6 +3919,14 @@ Hashed Page Table (HPT).  Specifically this requests that the guest be
  transferred to working with the new HPT, essentially implementing the
  H_RESIZE_HPT_COMMIT hypercall.
  
+::
+
+  struct kvm_ppc_resize_hpt {
+       __u64 flags;
+       __u32 shift;
+       __u32 pad;
+  };
+
  This should only be called after KVM_PPC_RESIZE_HPT_PREPARE has
  returned 0 with the same parameters.  In other cases
  KVM_PPC_RESIZE_HPT_COMMIT will return an error (usually -ENXIO or
@@ -3971,14 +3942,6 @@ HPT and the previous HPT will be discarded.
  
  On failure, the guest will still be operating on its previous HPT.
  
-::
-
-  struct kvm_ppc_resize_hpt {
-       __u64 flags;
-       __u32 shift;
-       __u32 pad;
-  };
-
  4.104 KVM_X86_GET_MCE_CAP_SUPPORTED
  -----------------------------------
  
@@ -4915,6 +4878,14 @@ see KVM_XEN_HVM_SET_ATTR above.
         union {
                 __u64 gpa;
                 __u64 pad[4];
+               struct {
+                       __u64 state;
+                       __u64 state_entry_time;
+                       __u64 time_running;
+                       __u64 time_runnable;
+                       __u64 time_blocked;
+                       __u64 time_offline;
+               } runstate;
         } u;
    };
  
@@ -4927,6 +4898,31 @@ KVM_XEN_VCPU_ATTR_TYPE_VCPU_TIME_INFO
    Sets the guest physical address of an additional pvclock structure
    for a given vCPU. This is typically used for guest vsyscall support.
  
+KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADDR
+  Sets the guest physical address of the vcpu_runstate_info for a given
+  vCPU. This is how a Xen guest tracks CPU state such as steal time.
+
+KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_CURRENT
+  Sets the runstate (RUNSTATE_running/_runnable/_blocked/_offline) of
+  the given vCPU from the .u.runstate.state member of the structure.
+  KVM automatically accounts running and runnable time but blocked
+  and offline states are only entered explicitly.
+
+KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_DATA
+  Sets all fields of the vCPU runstate data from the .u.runstate member
+  of the structure, including the current runstate. The state_entry_time
+  must equal the sum of the other four times.
+
+KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADJUST
+  This *adds* the contents of the .u.runstate members of the structure
+  to the corresponding members of the given vCPU's runstate data, thus
+  permitting atomic adjustments to the runstate times. The adjustment
+  to the state_entry_time must equal the sum of the adjustments to the
+  other four times. The state field must be set to -1, or to a valid
+  runstate value (RUNSTATE_running, RUNSTATE_runnable, RUNSTATE_blocked
+  or RUNSTATE_offline) to set the current accounted state as of the
+  adjusted state_entry_time.
+
  4.130 KVM_XEN_VCPU_GET_ATTR
  ---------------------------
  
@@ -4939,6 +4935,9 @@ KVM_XEN_VCPU_ATTR_TYPE_VCPU_TIME_INFO
  Allows Xen vCPU attributes to be read. For the structure and types,
  see KVM_XEN_VCPU_SET_ATTR above.
  
+The KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADJUST type may not be used
+with the KVM_XEN_VCPU_GET_ATTR ioctl.
+
  5. The kvm_run structure
  ========================
  
@@ -5000,7 +4999,8 @@ local APIC is not used.
         __u16 flags;
  
  More architecture-specific flags detailing state of the VCPU that may
-affect the device's behavior. Current defined flags:
+affect the device's behavior. Current defined flags::
+
    /* x86, set if the VCPU is in system management mode */
    #define KVM_RUN_X86_SMM     (1 << 0)
    /* x86, set if bus lock detected in VM */
@@ -6217,7 +6217,7 @@ the bus lock vm exit can be preempted by a higher priority VM exit, the exit
  notifications to userspace can be KVM_EXIT_BUS_LOCK or other reasons.
  KVM_RUN_BUS_LOCK flag is used to distinguish between them.
  
-7.22 KVM_CAP_PPC_DAWR1
+7.23 KVM_CAP_PPC_DAWR1
  ----------------------
  
  :Architectures: ppc
@@ -6702,6 +6702,7 @@ PVHVM guests. Valid flags are::
    #define KVM_XEN_HVM_CONFIG_HYPERCALL_MSR     (1 << 0)
    #define KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL   (1 << 1)
    #define KVM_XEN_HVM_CONFIG_SHARED_INFO       (1 << 2)
+  #define KVM_XEN_HVM_CONFIG_RUNSTATE          (1 << 2)
  
  The KVM_XEN_HVM_CONFIG_HYPERCALL_MSR flag indicates that the KVM_XEN_HVM_CONFIG
  ioctl is available, for the guest to set its hypercall page.
@@ -6716,3 +6717,7 @@ KVM_XEN_HVM_SET_ATTR, KVM_XEN_HVM_GET_ATTR, KVM_XEN_VCPU_SET_ATTR and
  KVM_XEN_VCPU_GET_ATTR ioctls, as well as the delivery of exception vectors
  for event channel upcalls when the evtchn_upcall_pending field of a vcpu's
  vcpu_info is set.
+
+The KVM_XEN_HVM_CONFIG_RUNSTATE flag indicates that the runstate-related
+features KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADDR/_CURRENT/_DATA/_ADJUST are
+supported by the KVM_XEN_VCPU_SET_ATTR/KVM_XEN_VCPU_GET_ATTR ioctls.
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h

index 0cf71ff..877a402 100644 (file)
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -535,10 +535,16 @@ struct kvm_vcpu_hv {
  /* Xen HVM per vcpu emulation context */
  struct kvm_vcpu_xen {
         u64 hypercall_rip;
+       u32 current_runstate;
         bool vcpu_info_set;
         bool vcpu_time_info_set;
+       bool runstate_set;
         struct gfn_to_hva_cache vcpu_info_cache;
         struct gfn_to_hva_cache vcpu_time_info_cache;
+       struct gfn_to_hva_cache runstate_cache;
+       u64 last_steal;
+       u64 runstate_entry_time;
+       u64 runstate_times[4];
  };
  
  struct kvm_vcpu_arch {
@@ -939,9 +945,6 @@ struct kvm_arch {
         unsigned int indirect_shadow_pages;
         u8 mmu_valid_gen;
         struct hlist_head mmu_page_hash[KVM_NUM_MMU_PAGES];
-       /*
-        * Hash table of struct kvm_mmu_page.
-        */
         struct list_head active_mmu_pages;
         struct list_head zapped_obsolete_pages;
         struct list_head lpage_disallowed_mmu_pages;
diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig

index 7ac5926..a788d51 100644 (file)
--- a/arch/x86/kvm/Kconfig
+++ b/arch/x86/kvm/Kconfig
@@ -103,6 +103,15 @@ config KVM_AMD_SEV
           Provides support for launching Encrypted VMs (SEV) and Encrypted VMs
           with Encrypted State (SEV-ES) on AMD processors.
  
+config KVM_XEN
+       bool "Support for Xen hypercall interface"
+       depends on KVM
+       help
+         Provides KVM support for the hosting Xen HVM guests and
+         passing Xen hypercalls to userspace.
+
+         If in doubt, say "N".
+
  config KVM_MMU_AUDIT
         bool "Audit KVM MMU"
         depends on KVM && TRACEPOINTS
diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile

index aeab168..1b4766f 100644 (file)
--- a/arch/x86/kvm/Makefile
+++ b/arch/x86/kvm/Makefile
@@ -14,11 +14,12 @@ kvm-y                       += $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o \
                                 $(KVM)/dirty_ring.o
  kvm-$(CONFIG_KVM_ASYNC_PF)     += $(KVM)/async_pf.o
  
-kvm-y                  += x86.o emulate.o i8259.o irq.o lapic.o xen.o \
+kvm-y                  += x86.o emulate.o i8259.o irq.o lapic.o \
                            i8254.o ioapic.o irq_comm.o cpuid.o pmu.o mtrr.o \
                            hyperv.o debugfs.o mmu/mmu.o mmu/page_track.o \
                            mmu/spte.o
  kvm-$(CONFIG_X86_64) += mmu/tdp_iter.o mmu/tdp_mmu.o
+kvm-$(CONFIG_KVM_XEN)  += xen.o
  
  kvm-intel-y            += vmx/vmx.o vmx/vmenter.o vmx/pmu_intel.o vmx/vmcs12.o \
                            vmx/evmcs.o vmx/nested.o vmx/posted_intr.o
diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c

index 7d2dae9..58fa8c0 100644 (file)
--- a/arch/x86/kvm/hyperv.c
+++ b/arch/x86/kvm/hyperv.c
@@ -159,7 +159,7 @@ static struct kvm_vcpu_hv_synic *synic_get(struct kvm *kvm, u32 vpidx)
         struct kvm_vcpu_hv_synic *synic;
  
         vcpu = get_vcpu_by_vpidx(kvm, vpidx);
-       if (!vcpu)
+       if (!vcpu || !to_hv_vcpu(vcpu))
                 return NULL;
         synic = to_hv_synic(vcpu);
         return (synic->active) ? synic : NULL;
diff --git a/arch/x86/kvm/mmu/mmu_internal.h b/arch/x86/kvm/mmu/mmu_internal.h

index 72b0928..ec4fc28 100644 (file)
--- a/arch/x86/kvm/mmu/mmu_internal.h
+++ b/arch/x86/kvm/mmu/mmu_internal.h
@@ -81,15 +81,15 @@ static inline struct kvm_mmu_page *sptep_to_sp(u64 *sptep)
  static inline bool kvm_vcpu_ad_need_write_protect(struct kvm_vcpu *vcpu)
  {
         /*
-        * When using the EPT page-modification log, the GPAs in the log
-        * would come from L2 rather than L1.  Therefore, we need to rely
-        * on write protection to record dirty pages.  This also bypasses
-        * PML, since writes now result in a vmexit.  Note, this helper will
-        * tag SPTEs as needing write-protection even if PML is disabled or
-        * unsupported, but that's ok because the tag is consumed if and only
-        * if PML is enabled.  Omit the PML check to save a few uops.
+        * When using the EPT page-modification log, the GPAs in the CPU dirty
+        * log would come from L2 rather than L1.  Therefore, we need to rely
+        * on write protection to record dirty pages, which bypasses PML, since
+        * writes now result in a vmexit.  Note, the check on CPU dirty logging
+        * being enabled is mandatory as the bits used to denote WP-only SPTEs
+        * are reserved for NPT w/ PAE (32-bit KVM).
          */
-       return vcpu->arch.mmu == &vcpu->arch.guest_mmu;
+       return vcpu->arch.mmu == &vcpu->arch.guest_mmu &&
+              kvm_x86_ops.cpu_dirty_log_size;
  }
  
  bool is_nx_huge_page_enabled(void);
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c

index c636021..baee91c 100644 (file)
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -1200,6 +1200,7 @@ static void init_vmcb(struct vcpu_svm *svm)
         init_sys_seg(&save->ldtr, SEG_TYPE_LDT);
         init_sys_seg(&save->tr, SEG_TYPE_BUSY_TSS16);
  
+       svm_set_cr4(&svm->vcpu, 0);
         svm_set_efer(&svm->vcpu, 0);
         save->dr6 = 0xffff0ff0;
         kvm_set_rflags(&svm->vcpu, X86_EFLAGS_FIXED);
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c

index 3712bb5..2a20ce6 100644 (file)
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -2957,6 +2957,11 @@ static void record_steal_time(struct kvm_vcpu *vcpu)
         struct kvm_host_map map;
         struct kvm_steal_time *st;
  
+       if (kvm_xen_msr_enabled(vcpu->kvm)) {
+               kvm_xen_runstate_set_running(vcpu);
+               return;
+       }
+
         if (!(vcpu->arch.st.msr_val & KVM_MSR_ENABLED))
                 return;
  
@@ -3756,11 +3761,15 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
         case KVM_CAP_ENFORCE_PV_FEATURE_CPUID:
                 r = 1;
                 break;
+#ifdef CONFIG_KVM_XEN
         case KVM_CAP_XEN_HVM:
                 r = KVM_XEN_HVM_CONFIG_HYPERCALL_MSR |
                     KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL |
                     KVM_XEN_HVM_CONFIG_SHARED_INFO;
+               if (sched_info_on())
+                       r |= KVM_XEN_HVM_CONFIG_RUNSTATE;
                 break;
+#endif
         case KVM_CAP_SYNC_REGS:
                 r = KVM_SYNC_X86_VALID_FIELDS;
                 break;
@@ -4038,7 +4047,11 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
         if (vcpu->preempted && !vcpu->arch.guest_state_protected)
                 vcpu->arch.preempted_in_kernel = !static_call(kvm_x86_get_cpl)(vcpu);
  
-       kvm_steal_time_set_preempted(vcpu);
+       if (kvm_xen_msr_enabled(vcpu->kvm))
+               kvm_xen_runstate_set_preempted(vcpu);
+       else
+               kvm_steal_time_set_preempted(vcpu);
+
         static_call(kvm_x86_vcpu_put)(vcpu);
         vcpu->arch.last_host_tsc = rdtsc();
         /*
@@ -5013,6 +5026,7 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
         case KVM_GET_SUPPORTED_HV_CPUID:
                 r = kvm_ioctl_get_supported_hv_cpuid(vcpu, argp);
                 break;
+#ifdef CONFIG_KVM_XEN
         case KVM_XEN_VCPU_GET_ATTR: {
                 struct kvm_xen_vcpu_attr xva;
  
@@ -5033,6 +5047,7 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
                 r = kvm_xen_vcpu_set_attr(vcpu, &xva);
                 break;
         }
+#endif
         default:
                 r = -EINVAL;
         }
@@ -5654,6 +5669,7 @@ set_pit2_out:
                         kvm->arch.bsp_vcpu_id = arg;
                 mutex_unlock(&kvm->lock);
                 break;
+#ifdef CONFIG_KVM_XEN
         case KVM_XEN_HVM_CONFIG: {
                 struct kvm_xen_hvm_config xhc;
                 r = -EFAULT;
@@ -5682,6 +5698,7 @@ set_pit2_out:
                 r = kvm_xen_hvm_set_attr(kvm, &xha);
                 break;
         }
+#endif
         case KVM_SET_CLOCK: {
                 struct kvm_clock_data user_ns;
                 u64 now_ns;
@@ -8040,7 +8057,10 @@ void kvm_arch_exit(void)
         kvm_mmu_module_exit();
         free_percpu(user_return_msrs);
         kmem_cache_destroy(x86_fpu_cache);
+#ifdef CONFIG_KVM_XEN
+       static_key_deferred_flush(&kvm_xen_enabled);
         WARN_ON(static_branch_unlikely(&kvm_xen_enabled.key));
+#endif
  }
  
  static int __kvm_vcpu_halt(struct kvm_vcpu *vcpu, int state, int reason)
diff --git a/arch/x86/kvm/xen.c b/arch/x86/kvm/xen.c

index af8f656..ae17250 100644 (file)
--- a/arch/x86/kvm/xen.c
+++ b/arch/x86/kvm/xen.c
@@ -11,9 +11,11 @@
  #include "hyperv.h"
  
  #include <linux/kvm_host.h>
+#include <linux/sched/stat.h>
  
  #include <trace/events/kvm.h>
  #include <xen/interface/xen.h>
+#include <xen/interface/vcpu.h>
  
  #include "trace.h"
  
@@ -61,6 +63,132 @@ out:
         return ret;
  }
  
+static void kvm_xen_update_runstate(struct kvm_vcpu *v, int state)
+{
+       struct kvm_vcpu_xen *vx = &v->arch.xen;
+       u64 now = get_kvmclock_ns(v->kvm);
+       u64 delta_ns = now - vx->runstate_entry_time;
+       u64 run_delay = current->sched_info.run_delay;
+
+       if (unlikely(!vx->runstate_entry_time))
+               vx->current_runstate = RUNSTATE_offline;
+
+       /*
+        * Time waiting for the scheduler isn't "stolen" if the
+        * vCPU wasn't running anyway.
+        */
+       if (vx->current_runstate == RUNSTATE_running) {
+               u64 steal_ns = run_delay - vx->last_steal;
+
+               delta_ns -= steal_ns;
+
+               vx->runstate_times[RUNSTATE_runnable] += steal_ns;
+       }
+       vx->last_steal = run_delay;
+
+       vx->runstate_times[vx->current_runstate] += delta_ns;
+       vx->current_runstate = state;
+       vx->runstate_entry_time = now;
+}
+
+void kvm_xen_update_runstate_guest(struct kvm_vcpu *v, int state)
+{
+       struct kvm_vcpu_xen *vx = &v->arch.xen;
+       uint64_t state_entry_time;
+       unsigned int offset;
+
+       kvm_xen_update_runstate(v, state);
+
+       if (!vx->runstate_set)
+               return;
+
+       BUILD_BUG_ON(sizeof(struct compat_vcpu_runstate_info) != 0x2c);
+
+       offset = offsetof(struct compat_vcpu_runstate_info, state_entry_time);
+#ifdef CONFIG_X86_64
+       /*
+        * The only difference is alignment of uint64_t in 32-bit.
+        * So the first field 'state' is accessed directly using
+        * offsetof() (where its offset happens to be zero), while the
+        * remaining fields which are all uint64_t, start at 'offset'
+        * which we tweak here by adding 4.
+        */
+       BUILD_BUG_ON(offsetof(struct vcpu_runstate_info, state_entry_time) !=
+                    offsetof(struct compat_vcpu_runstate_info, state_entry_time) + 4);
+       BUILD_BUG_ON(offsetof(struct vcpu_runstate_info, time) !=
+                    offsetof(struct compat_vcpu_runstate_info, time) + 4);
+
+       if (v->kvm->arch.xen.long_mode)
+               offset = offsetof(struct vcpu_runstate_info, state_entry_time);
+#endif
+       /*
+        * First write the updated state_entry_time at the appropriate
+        * location determined by 'offset'.
+        */
+       state_entry_time = vx->runstate_entry_time;
+       state_entry_time |= XEN_RUNSTATE_UPDATE;
+
+       BUILD_BUG_ON(sizeof(((struct vcpu_runstate_info *)0)->state_entry_time) !=
+                    sizeof(state_entry_time));
+       BUILD_BUG_ON(sizeof(((struct compat_vcpu_runstate_info *)0)->state_entry_time) !=
+                    sizeof(state_entry_time));
+
+       if (kvm_write_guest_offset_cached(v->kvm, &v->arch.xen.runstate_cache,
+                                         &state_entry_time, offset,
+                                         sizeof(state_entry_time)))
+               return;
+       smp_wmb();
+
+       /*
+        * Next, write the new runstate. This is in the *same* place
+        * for 32-bit and 64-bit guests, asserted here for paranoia.
+        */
+       BUILD_BUG_ON(offsetof(struct vcpu_runstate_info, state) !=
+                    offsetof(struct compat_vcpu_runstate_info, state));
+       BUILD_BUG_ON(sizeof(((struct vcpu_runstate_info *)0)->state) !=
+                    sizeof(vx->current_runstate));
+       BUILD_BUG_ON(sizeof(((struct compat_vcpu_runstate_info *)0)->state) !=
+                    sizeof(vx->current_runstate));
+
+       if (kvm_write_guest_offset_cached(v->kvm, &v->arch.xen.runstate_cache,
+                                         &vx->current_runstate,
+                                         offsetof(struct vcpu_runstate_info, state),
+                                         sizeof(vx->current_runstate)))
+               return;
+
+       /*
+        * Write the actual runstate times immediately after the
+        * runstate_entry_time.
+        */
+       BUILD_BUG_ON(offsetof(struct vcpu_runstate_info, state_entry_time) !=
+                    offsetof(struct vcpu_runstate_info, time) - sizeof(u64));
+       BUILD_BUG_ON(offsetof(struct compat_vcpu_runstate_info, state_entry_time) !=
+                    offsetof(struct compat_vcpu_runstate_info, time) - sizeof(u64));
+       BUILD_BUG_ON(sizeof(((struct vcpu_runstate_info *)0)->time) !=
+                    sizeof(((struct compat_vcpu_runstate_info *)0)->time));
+       BUILD_BUG_ON(sizeof(((struct vcpu_runstate_info *)0)->time) !=
+                    sizeof(vx->runstate_times));
+
+       if (kvm_write_guest_offset_cached(v->kvm, &v->arch.xen.runstate_cache,
+                                         &vx->runstate_times[0],
+                                         offset + sizeof(u64),
+                                         sizeof(vx->runstate_times)))
+               return;
+
+       smp_wmb();
+
+       /*
+        * Finally, clear the XEN_RUNSTATE_UPDATE bit in the guest's
+        * runstate_entry_time field.
+        */
+
+       state_entry_time &= ~XEN_RUNSTATE_UPDATE;
+       if (kvm_write_guest_offset_cached(v->kvm, &v->arch.xen.runstate_cache,
+                                         &state_entry_time, offset,
+                                         sizeof(state_entry_time)))
+               return;
+}
+
  int __kvm_xen_has_interrupt(struct kvm_vcpu *v)
  {
         u8 rc = 0;
@@ -187,9 +315,12 @@ int kvm_xen_vcpu_set_attr(struct kvm_vcpu *vcpu, struct kvm_xen_vcpu_attr *data)
                 /* No compat necessary here. */
                 BUILD_BUG_ON(sizeof(struct vcpu_info) !=
                              sizeof(struct compat_vcpu_info));
+               BUILD_BUG_ON(offsetof(struct vcpu_info, time) !=
+                            offsetof(struct compat_vcpu_info, time));
  
                 if (data->u.gpa == GPA_INVALID) {
                         vcpu->arch.xen.vcpu_info_set = false;
+                       r = 0;
                         break;
                 }
  
@@ -206,6 +337,7 @@ int kvm_xen_vcpu_set_attr(struct kvm_vcpu *vcpu, struct kvm_xen_vcpu_attr *data)
         case KVM_XEN_VCPU_ATTR_TYPE_VCPU_TIME_INFO:
                 if (data->u.gpa == GPA_INVALID) {
                         vcpu->arch.xen.vcpu_time_info_set = false;
+                       r = 0;
                         break;
                 }
  
@@ -219,6 +351,121 @@ int kvm_xen_vcpu_set_attr(struct kvm_vcpu *vcpu, struct kvm_xen_vcpu_attr *data)
                 }
                 break;
  
+       case KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADDR:
+               if (!sched_info_on()) {
+                       r = -EOPNOTSUPP;
+                       break;
+               }
+               if (data->u.gpa == GPA_INVALID) {
+                       vcpu->arch.xen.runstate_set = false;
+                       r = 0;
+                       break;
+               }
+
+               r = kvm_gfn_to_hva_cache_init(vcpu->kvm,
+                                             &vcpu->arch.xen.runstate_cache,
+                                             data->u.gpa,
+                                             sizeof(struct vcpu_runstate_info));
+               if (!r) {
+                       vcpu->arch.xen.runstate_set = true;
+               }
+               break;
+
+       case KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_CURRENT:
+               if (!sched_info_on()) {
+                       r = -EOPNOTSUPP;
+                       break;
+               }
+               if (data->u.runstate.state > RUNSTATE_offline) {
+                       r = -EINVAL;
+                       break;
+               }
+
+               kvm_xen_update_runstate(vcpu, data->u.runstate.state);
+               r = 0;
+               break;
+
+       case KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_DATA:
+               if (!sched_info_on()) {
+                       r = -EOPNOTSUPP;
+                       break;
+               }
+               if (data->u.runstate.state > RUNSTATE_offline) {
+                       r = -EINVAL;
+                       break;
+               }
+               if (data->u.runstate.state_entry_time !=
+                   (data->u.runstate.time_running +
+                    data->u.runstate.time_runnable +
+                    data->u.runstate.time_blocked +
+                    data->u.runstate.time_offline)) {
+                       r = -EINVAL;
+                       break;
+               }
+               if (get_kvmclock_ns(vcpu->kvm) <
+                   data->u.runstate.state_entry_time) {
+                       r = -EINVAL;
+                       break;
+               }
+
+               vcpu->arch.xen.current_runstate = data->u.runstate.state;
+               vcpu->arch.xen.runstate_entry_time =
+                       data->u.runstate.state_entry_time;
+               vcpu->arch.xen.runstate_times[RUNSTATE_running] =
+                       data->u.runstate.time_running;
+               vcpu->arch.xen.runstate_times[RUNSTATE_runnable] =
+                       data->u.runstate.time_runnable;
+               vcpu->arch.xen.runstate_times[RUNSTATE_blocked] =
+                       data->u.runstate.time_blocked;
+               vcpu->arch.xen.runstate_times[RUNSTATE_offline] =
+                       data->u.runstate.time_offline;
+               vcpu->arch.xen.last_steal = current->sched_info.run_delay;
+               r = 0;
+               break;
+
+       case KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADJUST:
+               if (!sched_info_on()) {
+                       r = -EOPNOTSUPP;
+                       break;
+               }
+               if (data->u.runstate.state > RUNSTATE_offline &&
+                   data->u.runstate.state != (u64)-1) {
+                       r = -EINVAL;
+                       break;
+               }
+               /* The adjustment must add up */
+               if (data->u.runstate.state_entry_time !=
+                   (data->u.runstate.time_running +
+                    data->u.runstate.time_runnable +
+                    data->u.runstate.time_blocked +
+                    data->u.runstate.time_offline)) {
+                       r = -EINVAL;
+                       break;
+               }
+
+               if (get_kvmclock_ns(vcpu->kvm) <
+                   (vcpu->arch.xen.runstate_entry_time +
+                    data->u.runstate.state_entry_time)) {
+                       r = -EINVAL;
+                       break;
+               }
+
+               vcpu->arch.xen.runstate_entry_time +=
+                       data->u.runstate.state_entry_time;
+               vcpu->arch.xen.runstate_times[RUNSTATE_running] +=
+                       data->u.runstate.time_running;
+               vcpu->arch.xen.runstate_times[RUNSTATE_runnable] +=
+                       data->u.runstate.time_runnable;
+               vcpu->arch.xen.runstate_times[RUNSTATE_blocked] +=
+                       data->u.runstate.time_blocked;
+               vcpu->arch.xen.runstate_times[RUNSTATE_offline] +=
+                       data->u.runstate.time_offline;
+
+               if (data->u.runstate.state <= RUNSTATE_offline)
+                       kvm_xen_update_runstate(vcpu, data->u.runstate.state);
+               r = 0;
+               break;
+
         default:
                 break;
         }
@@ -251,6 +498,49 @@ int kvm_xen_vcpu_get_attr(struct kvm_vcpu *vcpu, struct kvm_xen_vcpu_attr *data)
                 r = 0;
                 break;
  
+       case KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADDR:
+               if (!sched_info_on()) {
+                       r = -EOPNOTSUPP;
+                       break;
+               }
+               if (vcpu->arch.xen.runstate_set) {
+                       data->u.gpa = vcpu->arch.xen.runstate_cache.gpa;
+                       r = 0;
+               }
+               break;
+
+       case KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_CURRENT:
+               if (!sched_info_on()) {
+                       r = -EOPNOTSUPP;
+                       break;
+               }
+               data->u.runstate.state = vcpu->arch.xen.current_runstate;
+               r = 0;
+               break;
+
+       case KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_DATA:
+               if (!sched_info_on()) {
+                       r = -EOPNOTSUPP;
+                       break;
+               }
+               data->u.runstate.state = vcpu->arch.xen.current_runstate;
+               data->u.runstate.state_entry_time =
+                       vcpu->arch.xen.runstate_entry_time;
+               data->u.runstate.time_running =
+                       vcpu->arch.xen.runstate_times[RUNSTATE_running];
+               data->u.runstate.time_runnable =
+                       vcpu->arch.xen.runstate_times[RUNSTATE_runnable];
+               data->u.runstate.time_blocked =
+                       vcpu->arch.xen.runstate_times[RUNSTATE_blocked];
+               data->u.runstate.time_offline =
+                       vcpu->arch.xen.runstate_times[RUNSTATE_offline];
+               r = 0;
+               break;
+
+       case KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADJUST:
+               r = -EINVAL;
+               break;
+
         default:
                 break;
         }
diff --git a/arch/x86/kvm/xen.h b/arch/x86/kvm/xen.h

index b66a921..463a784 100644 (file)
--- a/arch/x86/kvm/xen.h
+++ b/arch/x86/kvm/xen.h
@@ -9,6 +9,7 @@
  #ifndef __ARCH_X86_KVM_XEN_H__
  #define __ARCH_X86_KVM_XEN_H__
  
+#ifdef CONFIG_KVM_XEN
  #include <linux/jump_label_ratelimit.h>
  
  extern struct static_key_false_deferred kvm_xen_enabled;
@@ -18,11 +19,16 @@ int kvm_xen_vcpu_set_attr(struct kvm_vcpu *vcpu, struct kvm_xen_vcpu_attr *data)
  int kvm_xen_vcpu_get_attr(struct kvm_vcpu *vcpu, struct kvm_xen_vcpu_attr *data);
  int kvm_xen_hvm_set_attr(struct kvm *kvm, struct kvm_xen_hvm_attr *data);
  int kvm_xen_hvm_get_attr(struct kvm *kvm, struct kvm_xen_hvm_attr *data);
-int kvm_xen_hypercall(struct kvm_vcpu *vcpu);
  int kvm_xen_write_hypercall_page(struct kvm_vcpu *vcpu, u64 data);
  int kvm_xen_hvm_config(struct kvm *kvm, struct kvm_xen_hvm_config *xhc);
  void kvm_xen_destroy_vm(struct kvm *kvm);
  
+static inline bool kvm_xen_msr_enabled(struct kvm *kvm)
+{
+       return static_branch_unlikely(&kvm_xen_enabled.key) &&
+               kvm->arch.xen_hvm_config.msr;
+}
+
  static inline bool kvm_xen_hypercall_enabled(struct kvm *kvm)
  {
         return static_branch_unlikely(&kvm_xen_enabled.key) &&
@@ -38,11 +44,59 @@ static inline int kvm_xen_has_interrupt(struct kvm_vcpu *vcpu)
  
         return 0;
  }
+#else
+static inline int kvm_xen_write_hypercall_page(struct kvm_vcpu *vcpu, u64 data)
+{
+       return 1;
+}
+
+static inline void kvm_xen_destroy_vm(struct kvm *kvm)
+{
+}
+
+static inline bool kvm_xen_msr_enabled(struct kvm *kvm)
+{
+       return false;
+}
+
+static inline bool kvm_xen_hypercall_enabled(struct kvm *kvm)
+{
+       return false;
+}
+
+static inline int kvm_xen_has_interrupt(struct kvm_vcpu *vcpu)
+{
+       return 0;
+}
+#endif
+
+int kvm_xen_hypercall(struct kvm_vcpu *vcpu);
  
-/* 32-bit compatibility definitions, also used natively in 32-bit build */
  #include <asm/pvclock-abi.h>
  #include <asm/xen/interface.h>
+#include <xen/interface/vcpu.h>
+
+void kvm_xen_update_runstate_guest(struct kvm_vcpu *vcpu, int state);
  
+static inline void kvm_xen_runstate_set_running(struct kvm_vcpu *vcpu)
+{
+       kvm_xen_update_runstate_guest(vcpu, RUNSTATE_running);
+}
+
+static inline void kvm_xen_runstate_set_preempted(struct kvm_vcpu *vcpu)
+{
+       /*
+        * If the vCPU wasn't preempted but took a normal exit for
+        * some reason (hypercalls, I/O, etc.), that is accounted as
+        * still RUNSTATE_running, as the VMM is still operating on
+        * behalf of the vCPU. Only if the VMM does actually block
+        * does it need to enter RUNSTATE_blocked.
+        */
+       if (vcpu->preempted)
+               kvm_xen_update_runstate_guest(vcpu, RUNSTATE_runnable);
+}
+
+/* 32-bit compatibility definitions, also used natively in 32-bit build */
  struct compat_arch_vcpu_info {
         unsigned int cr2;
         unsigned int pad[5];
@@ -75,4 +129,10 @@ struct compat_shared_info {
         struct compat_arch_shared_info arch;
  };
  
+struct compat_vcpu_runstate_info {
+    int state;
+    uint64_t state_entry_time;
+    uint64_t time[4];
+} __attribute__((packed));
+
  #endif /* __ARCH_X86_KVM_XEN_H__ */
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h

index 8b281f7..f6afee2 100644 (file)
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -1154,6 +1154,7 @@ struct kvm_x86_mce {
  #define KVM_XEN_HVM_CONFIG_HYPERCALL_MSR       (1 << 0)
  #define KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL     (1 << 1)
  #define KVM_XEN_HVM_CONFIG_SHARED_INFO         (1 << 2)
+#define KVM_XEN_HVM_CONFIG_RUNSTATE            (1 << 3)
  
  struct kvm_xen_hvm_config {
         __u32 flags;
@@ -1621,12 +1622,24 @@ struct kvm_xen_vcpu_attr {
         union {
                 __u64 gpa;
                 __u64 pad[8];
+               struct {
+                       __u64 state;
+                       __u64 state_entry_time;
+                       __u64 time_running;
+                       __u64 time_runnable;
+                       __u64 time_blocked;
+                       __u64 time_offline;
+               } runstate;
         } u;
  };
  
  /* Available with KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_SHARED_INFO */
  #define KVM_XEN_VCPU_ATTR_TYPE_VCPU_INFO       0x0
  #define KVM_XEN_VCPU_ATTR_TYPE_VCPU_TIME_INFO  0x1
+#define KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADDR   0x2
+#define KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_CURRENT        0x3
+#define KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_DATA   0x4
+#define KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADJUST 0x5
  
  /* Secure Encrypted Virtualization command */
  enum sev_cmd_id {
diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c

index d787cb8..e5fbf16 100644 (file)
--- a/tools/testing/selftests/kvm/lib/kvm_util.c
+++ b/tools/testing/selftests/kvm/lib/kvm_util.c
@@ -21,6 +21,8 @@
  #define KVM_UTIL_PGS_PER_HUGEPG 512
  #define KVM_UTIL_MIN_PFN       2
  
+static int vcpu_mmap_sz(void);
+
  /* Aligns x up to the next multiple of size. Size must be a power of 2. */
  static void *align(void *x, size_t size)
  {
@@ -509,7 +511,7 @@ static void vm_vcpu_rm(struct kvm_vm *vm, struct vcpu *vcpu)
                 vcpu->dirty_gfns = NULL;
         }
  
-       ret = munmap(vcpu->state, sizeof(*vcpu->state));
+       ret = munmap(vcpu->state, vcpu_mmap_sz());
         TEST_ASSERT(ret == 0, "munmap of VCPU fd failed, rc: %i "
                 "errno: %i", ret, errno);
         close(vcpu->fd);
@@ -978,7 +980,7 @@ void vm_vcpu_add(struct kvm_vm *vm, uint32_t vcpuid)
         TEST_ASSERT(vcpu_mmap_sz() >= sizeof(*vcpu->state), "vcpu mmap size "
                 "smaller than expected, vcpu_mmap_sz: %i expected_min: %zi",
                 vcpu_mmap_sz(), sizeof(*vcpu->state));
-       vcpu->state = (struct kvm_run *) mmap(NULL, sizeof(*vcpu->state),
+       vcpu->state = (struct kvm_run *) mmap(NULL, vcpu_mmap_sz(),
                 PROT_READ | PROT_WRITE, MAP_SHARED, vcpu->fd, 0);
         TEST_ASSERT(vcpu->state != MAP_FAILED, "mmap vcpu_state failed, "
                 "vcpu id: %u errno: %i", vcpuid, errno);
diff --git a/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c b/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c

index 9246ea3..804ff5f 100644 (file)
--- a/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c
+++ b/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c
@@ -13,19 +13,27 @@
  
  #include <stdint.h>
  #include <time.h>
+#include <sched.h>
+#include <sys/syscall.h>
  
  #define VCPU_ID                5
  
+#define SHINFO_REGION_GVA      0xc0000000ULL
  #define SHINFO_REGION_GPA      0xc0000000ULL
  #define SHINFO_REGION_SLOT     10
  #define PAGE_SIZE              4096
  
  #define PVTIME_ADDR    (SHINFO_REGION_GPA + PAGE_SIZE)
+#define RUNSTATE_ADDR  (SHINFO_REGION_GPA + PAGE_SIZE + 0x20)
+
+#define RUNSTATE_VADDR (SHINFO_REGION_GVA + PAGE_SIZE + 0x20)
  
  static struct kvm_vm *vm;
  
  #define XEN_HYPERCALL_MSR      0x40000000
  
+#define MIN_STEAL_TIME         50000
+
  struct pvclock_vcpu_time_info {
          u32   version;
          u32   pad0;
@@ -43,11 +51,67 @@ struct pvclock_wall_clock {
          u32   nsec;
  } __attribute__((__packed__));
  
+struct vcpu_runstate_info {
+    uint32_t state;
+    uint64_t state_entry_time;
+    uint64_t time[4];
+};
+
+#define RUNSTATE_running  0
+#define RUNSTATE_runnable 1
+#define RUNSTATE_blocked  2
+#define RUNSTATE_offline  3
+
  static void guest_code(void)
  {
+       struct vcpu_runstate_info *rs = (void *)RUNSTATE_VADDR;
+
+       /* Test having the host set runstates manually */
+       GUEST_SYNC(RUNSTATE_runnable);
+       GUEST_ASSERT(rs->time[RUNSTATE_runnable] != 0);
+       GUEST_ASSERT(rs->state == 0);
+
+       GUEST_SYNC(RUNSTATE_blocked);
+       GUEST_ASSERT(rs->time[RUNSTATE_blocked] != 0);
+       GUEST_ASSERT(rs->state == 0);
+
+       GUEST_SYNC(RUNSTATE_offline);
+       GUEST_ASSERT(rs->time[RUNSTATE_offline] != 0);
+       GUEST_ASSERT(rs->state == 0);
+
+       /* Test runstate time adjust */
+       GUEST_SYNC(4);
+       GUEST_ASSERT(rs->time[RUNSTATE_blocked] == 0x5a);
+       GUEST_ASSERT(rs->time[RUNSTATE_offline] == 0x6b6b);
+
+       /* Test runstate time set */
+       GUEST_SYNC(5);
+       GUEST_ASSERT(rs->state_entry_time >= 0x8000);
+       GUEST_ASSERT(rs->time[RUNSTATE_runnable] == 0);
+       GUEST_ASSERT(rs->time[RUNSTATE_blocked] == 0x6b6b);
+       GUEST_ASSERT(rs->time[RUNSTATE_offline] == 0x5a);
+
+       /* sched_yield() should result in some 'runnable' time */
+       GUEST_SYNC(6);
+       GUEST_ASSERT(rs->time[RUNSTATE_runnable] >= MIN_STEAL_TIME);
+
         GUEST_DONE();
  }
  
+static long get_run_delay(void)
+{
+        char path[64];
+        long val[2];
+        FILE *fp;
+
+        sprintf(path, "/proc/%ld/schedstat", syscall(SYS_gettid));
+        fp = fopen(path, "r");
+        fscanf(fp, "%ld %ld ", &val[0], &val[1]);
+        fclose(fp);
+
+        return val[1];
+}
+
  static int cmp_timespec(struct timespec *a, struct timespec *b)
  {
         if (a->tv_sec > b->tv_sec)
@@ -66,12 +130,14 @@ int main(int argc, char *argv[])
  {
         struct timespec min_ts, max_ts, vm_ts;
  
-       if (!(kvm_check_cap(KVM_CAP_XEN_HVM) &
-             KVM_XEN_HVM_CONFIG_SHARED_INFO) ) {
+       int xen_caps = kvm_check_cap(KVM_CAP_XEN_HVM);
+       if (!(xen_caps & KVM_XEN_HVM_CONFIG_SHARED_INFO) ) {
                 print_skip("KVM_XEN_HVM_CONFIG_SHARED_INFO not available");
                 exit(KSFT_SKIP);
         }
  
+       bool do_runstate_tests = !!(xen_caps & KVM_XEN_HVM_CONFIG_RUNSTATE);
+
         clock_gettime(CLOCK_REALTIME, &min_ts);
  
         vm = vm_create_default(VCPU_ID, 0, (void *) guest_code);
@@ -80,6 +146,7 @@ int main(int argc, char *argv[])
         /* Map a region for the shared_info page */
         vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS,
                                     SHINFO_REGION_GPA, SHINFO_REGION_SLOT, 2, 0);
+       virt_map(vm, SHINFO_REGION_GVA, SHINFO_REGION_GPA, 2, 0);
  
         struct kvm_xen_hvm_config hvmc = {
                 .flags = KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL,
@@ -111,6 +178,17 @@ int main(int argc, char *argv[])
         };
         vcpu_ioctl(vm, VCPU_ID, KVM_XEN_VCPU_SET_ATTR, &pvclock);
  
+       if (do_runstate_tests) {
+               struct kvm_xen_vcpu_attr st = {
+                       .type = KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADDR,
+                       .u.gpa = RUNSTATE_ADDR,
+               };
+               vcpu_ioctl(vm, VCPU_ID, KVM_XEN_VCPU_SET_ATTR, &st);
+       }
+
+       struct vcpu_runstate_info *rs = addr_gpa2hva(vm, RUNSTATE_ADDR);;
+       rs->state = 0x5a;
+
         for (;;) {
                 volatile struct kvm_run *run = vcpu_state(vm, VCPU_ID);
                 struct ucall uc;
@@ -126,8 +204,56 @@ int main(int argc, char *argv[])
                 case UCALL_ABORT:
                         TEST_FAIL("%s", (const char *)uc.args[0]);
                         /* NOT REACHED */
-               case UCALL_SYNC:
+               case UCALL_SYNC: {
+                       struct kvm_xen_vcpu_attr rst;
+                       long rundelay;
+
+                       /* If no runstate support, bail out early */
+                       if (!do_runstate_tests)
+                               goto done;
+
+                       TEST_ASSERT(rs->state_entry_time == rs->time[0] +
+                                   rs->time[1] + rs->time[2] + rs->time[3],
+                                   "runstate times don't add up");
+
+                       switch (uc.args[1]) {
+                       case RUNSTATE_running...RUNSTATE_offline:
+                               rst.type = KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_CURRENT;
+                               rst.u.runstate.state = uc.args[1];
+                               vcpu_ioctl(vm, VCPU_ID, KVM_XEN_VCPU_SET_ATTR, &rst);
+                               break;
+                       case 4:
+                               rst.type = KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADJUST;
+                               memset(&rst.u, 0, sizeof(rst.u));
+                               rst.u.runstate.state = (uint64_t)-1;
+                               rst.u.runstate.time_blocked =
+                                       0x5a - rs->time[RUNSTATE_blocked];
+                               rst.u.runstate.time_offline =
+                                       0x6b6b - rs->time[RUNSTATE_offline];
+                               rst.u.runstate.time_runnable = -rst.u.runstate.time_blocked -
+                                       rst.u.runstate.time_offline;
+                               vcpu_ioctl(vm, VCPU_ID, KVM_XEN_VCPU_SET_ATTR, &rst);
+                               break;
+
+                       case 5:
+                               rst.type = KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_DATA;
+                               memset(&rst.u, 0, sizeof(rst.u));
+                               rst.u.runstate.state = RUNSTATE_running;
+                               rst.u.runstate.state_entry_time = 0x6b6b + 0x5a;
+                               rst.u.runstate.time_blocked = 0x6b6b;
+                               rst.u.runstate.time_offline = 0x5a;
+                               vcpu_ioctl(vm, VCPU_ID, KVM_XEN_VCPU_SET_ATTR, &rst);
+                               break;
+                       case 6:
+                               /* Yield until scheduler delay exceeds target */
+                               rundelay = get_run_delay() + MIN_STEAL_TIME;
+                               do {
+                                       sched_yield();
+                               } while (get_run_delay() < rundelay);
+                               break;
+                       }
                         break;
+               }
                 case UCALL_DONE:
                         goto done;
                 default:
@@ -162,6 +288,33 @@ int main(int argc, char *argv[])
         TEST_ASSERT(ti2->version && !(ti2->version & 1),
                     "Bad time_info version %x", ti->version);
  
+       if (do_runstate_tests) {
+               /*
+                * Fetch runstate and check sanity. Strictly speaking in the
+                * general case we might not expect the numbers to be identical
+                * but in this case we know we aren't running the vCPU any more.
+                */
+               struct kvm_xen_vcpu_attr rst = {
+                       .type = KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_DATA,
+               };
+               vcpu_ioctl(vm, VCPU_ID, KVM_XEN_VCPU_GET_ATTR, &rst);
+
+               TEST_ASSERT(rs->state == rst.u.runstate.state, "Runstate mismatch");
+               TEST_ASSERT(rs->state_entry_time == rst.u.runstate.state_entry_time,
+                           "State entry time mismatch");
+               TEST_ASSERT(rs->time[RUNSTATE_running] == rst.u.runstate.time_running,
+                           "Running time mismatch");
+               TEST_ASSERT(rs->time[RUNSTATE_runnable] == rst.u.runstate.time_runnable,
+                           "Runnable time mismatch");
+               TEST_ASSERT(rs->time[RUNSTATE_blocked] == rst.u.runstate.time_blocked,
+                           "Blocked time mismatch");
+               TEST_ASSERT(rs->time[RUNSTATE_offline] == rst.u.runstate.time_offline,
+                           "Offline time mismatch");
+
+               TEST_ASSERT(rs->state_entry_time == rs->time[0] +
+                           rs->time[1] + rs->time[2] + rs->time[3],
+                           "runstate times don't add up");
+       }
         kvm_vm_free(vm);
         return 0;
  }
author	Linus Torvalds <torvalds@linux-foundation.org>
	Thu, 4 Mar 2021 19:26:17 +0000 (11:26 -0800)
committer	Linus Torvalds <torvalds@linux-foundation.org>
	Thu, 4 Mar 2021 19:26:17 +0000 (11:26 -0800)
Documentation/virt/kvm/api.rst		patch \| blob \| history
arch/x86/include/asm/kvm_host.h		patch \| blob \| history
arch/x86/kvm/Kconfig		patch \| blob \| history
arch/x86/kvm/Makefile		patch \| blob \| history
arch/x86/kvm/hyperv.c		patch \| blob \| history
arch/x86/kvm/mmu/mmu_internal.h		patch \| blob \| history
arch/x86/kvm/svm/svm.c		patch \| blob \| history
arch/x86/kvm/x86.c		patch \| blob \| history
arch/x86/kvm/xen.c		patch \| blob \| history
arch/x86/kvm/xen.h		patch \| blob \| history
include/uapi/linux/kvm.h		patch \| blob \| history
tools/testing/selftests/kvm/lib/kvm_util.c		patch \| blob \| history
tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c		patch \| blob \| history