KVM: x86/xen: Allow XEN_RUNSTATE_UPDATE flag behaviour to be configured

author David Woodhouse <dwmw@amazon.co.uk>

Sun, 27 Nov 2022 12:22:10 +0000 (12:22 +0000)

committer Paolo Bonzini <pbonzini@redhat.com>

Wed, 30 Nov 2022 15:59:37 +0000 (10:59 -0500)
author David Woodhouse <dwmw@amazon.co.uk>
Sun, 27 Nov 2022 12:22:10 +0000 (12:22 +0000)
committer Paolo Bonzini <pbonzini@redhat.com>
Wed, 30 Nov 2022 15:59:37 +0000 (10:59 -0500)
diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst

index 9175d41..5617bc4 100644 (file)
--- a/Documentation/virt/kvm/api.rst
+++ b/Documentation/virt/kvm/api.rst
@@ -5339,6 +5339,7 @@ KVM_PV_ASYNC_CLEANUP_PERFORM
         union {
                 __u8 long_mode;
                 __u8 vector;
+               __u8 runstate_update_flag;
                 struct {
                         __u64 gfn;
                 } shared_info;
@@ -5416,6 +5417,14 @@ KVM_XEN_ATTR_TYPE_XEN_VERSION
    event channel delivery, so responding within the kernel without
    exiting to userspace is beneficial.
  
+KVM_XEN_ATTR_TYPE_RUNSTATE_UPDATE_FLAG
+  This attribute is available when the KVM_CAP_XEN_HVM ioctl indicates
+  support for KVM_XEN_HVM_CONFIG_RUNSTATE_UPDATE_FLAG. It enables the
+  XEN_RUNSTATE_UPDATE flag which allows guest vCPUs to safely read
+  other vCPUs' vcpu_runstate_info. Xen guests enable this feature via
+  the VM_ASST_TYPE_runstate_update_flag of the HYPERVISOR_vm_assist
+  hypercall.
+
  4.127 KVM_XEN_HVM_GET_ATTR
  --------------------------
  
@@ -8059,12 +8068,13 @@ to userspace.
  This capability indicates the features that Xen supports for hosting Xen
  PVHVM guests. Valid flags are::
  
-  #define KVM_XEN_HVM_CONFIG_HYPERCALL_MSR     (1 << 0)
-  #define KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL   (1 << 1)
-  #define KVM_XEN_HVM_CONFIG_SHARED_INFO       (1 << 2)
-  #define KVM_XEN_HVM_CONFIG_RUNSTATE          (1 << 3)
-  #define KVM_XEN_HVM_CONFIG_EVTCHN_2LEVEL     (1 << 4)
-  #define KVM_XEN_HVM_CONFIG_EVTCHN_SEND       (1 << 5)
+  #define KVM_XEN_HVM_CONFIG_HYPERCALL_MSR             (1 << 0)
+  #define KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL           (1 << 1)
+  #define KVM_XEN_HVM_CONFIG_SHARED_INFO               (1 << 2)
+  #define KVM_XEN_HVM_CONFIG_RUNSTATE                  (1 << 3)
+  #define KVM_XEN_HVM_CONFIG_EVTCHN_2LEVEL             (1 << 4)
+  #define KVM_XEN_HVM_CONFIG_EVTCHN_SEND               (1 << 5)
+  #define KVM_XEN_HVM_CONFIG_RUNSTATE_UPDATE_FLAG      (1 << 6)
  
  The KVM_XEN_HVM_CONFIG_HYPERCALL_MSR flag indicates that the KVM_XEN_HVM_CONFIG
  ioctl is available, for the guest to set its hypercall page.
@@ -8096,6 +8106,18 @@ KVM_XEN_VCPU_ATTR_TYPE_VCPU_ID/TIMER/UPCALL_VECTOR vCPU attributes.
  related to event channel delivery, timers, and the XENVER_version
  interception.
  
+The KVM_XEN_HVM_CONFIG_RUNSTATE_UPDATE_FLAG flag indicates that KVM supports
+the KVM_XEN_ATTR_TYPE_RUNSTATE_UPDATE_FLAG attribute in the KVM_XEN_SET_ATTR
+and KVM_XEN_GET_ATTR ioctls. This controls whether KVM will set the
+XEN_RUNSTATE_UPDATE flag in guest memory mapped vcpu_runstate_info during
+updates of the runstate information. Note that versions of KVM which support
+the RUNSTATE feature above, but not thie RUNSTATE_UPDATE_FLAG feature, will
+always set the XEN_RUNSTATE_UPDATE flag when updating the guest structure,
+which is perhaps counterintuitive. When this flag is advertised, KVM will
+behave more correctly, not using the XEN_RUNSTATE_UPDATE flag until/unless
+specifically enabled (by the guest making the hypercall, causing the VMM
+to enable the KVM_XEN_ATTR_TYPE_RUNSTATE_UPDATE_FLAG attribute).
+
  8.31 KVM_CAP_PPC_MULTITCE
  -------------------------
  
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h

index 70af724..283cbb8 100644 (file)
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1113,6 +1113,7 @@ struct msr_bitmap_range {
  struct kvm_xen {
         u32 xen_version;
         bool long_mode;
+       bool runstate_update_flag;
         u8 upcall_vector;
         struct gfn_to_pfn_cache shinfo_cache;
         struct idr evtchn_ports;
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c

index 72ac6bf..59fd55b 100644 (file)
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -4431,7 +4431,8 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
                     KVM_XEN_HVM_CONFIG_EVTCHN_2LEVEL |
                     KVM_XEN_HVM_CONFIG_EVTCHN_SEND;
                 if (sched_info_on())
-                       r |= KVM_XEN_HVM_CONFIG_RUNSTATE;
+                       r |= KVM_XEN_HVM_CONFIG_RUNSTATE |
+                            KVM_XEN_HVM_CONFIG_RUNSTATE_UPDATE_FLAG;
                 break;
  #endif
         case KVM_CAP_SYNC_REGS:
diff --git a/arch/x86/kvm/xen.c b/arch/x86/kvm/xen.c

index cfc1c07..7acac5d 100644 (file)
--- a/arch/x86/kvm/xen.c
+++ b/arch/x86/kvm/xen.c
@@ -179,7 +179,8 @@ static void kvm_xen_update_runstate_guest(struct kvm_vcpu *v, bool atomic)
         struct vcpu_runstate_info rs;
         unsigned long flags;
         size_t times_ofs;
-       uint8_t *update_bit;
+       uint8_t *update_bit = NULL;
+       uint64_t entry_time;
         uint64_t *rs_times;
         int *rs_state;
  
@@ -297,7 +298,8 @@ static void kvm_xen_update_runstate_guest(struct kvm_vcpu *v, bool atomic)
                  */
                 rs_state = gpc1->khva;
                 rs_times = gpc1->khva + times_ofs;
-               update_bit = ((void *)(&rs_times[1])) - 1;
+               if (v->kvm->arch.xen.runstate_update_flag)
+                       update_bit = ((void *)(&rs_times[1])) - 1;
         } else {
                 /*
                  * The guest's runstate_info is split across two pages and we
@@ -351,12 +353,14 @@ static void kvm_xen_update_runstate_guest(struct kvm_vcpu *v, bool atomic)
                  * The update_bit is still directly in the guest memory,
                  * via one GPC or the other.
                  */
-               if (user_len1 >= times_ofs + sizeof(uint64_t))
-                       update_bit = gpc1->khva + times_ofs +
-                               sizeof(uint64_t) - 1;
-               else
-                       update_bit = gpc2->khva + times_ofs +
-                               sizeof(uint64_t) - 1 - user_len1;
+               if (v->kvm->arch.xen.runstate_update_flag) {
+                       if (user_len1 >= times_ofs + sizeof(uint64_t))
+                               update_bit = gpc1->khva + times_ofs +
+                                       sizeof(uint64_t) - 1;
+                       else
+                               update_bit = gpc2->khva + times_ofs +
+                                       sizeof(uint64_t) - 1 - user_len1;
+               }
  
  #ifdef CONFIG_X86_64
                 /*
@@ -376,8 +380,12 @@ static void kvm_xen_update_runstate_guest(struct kvm_vcpu *v, bool atomic)
          * different cache line to the rest of the 64-bit word, due to
          * the (lack of) alignment constraints.
          */
-       *update_bit = (vx->runstate_entry_time | XEN_RUNSTATE_UPDATE) >> 56;
-       smp_wmb();
+       entry_time = vx->runstate_entry_time;
+       if (update_bit) {
+               entry_time |= XEN_RUNSTATE_UPDATE;
+               *update_bit = (vx->runstate_entry_time | XEN_RUNSTATE_UPDATE) >> 56;
+               smp_wmb();
+       }
  
         /*
          * Now assemble the actual structure, either on our kernel stack
@@ -385,7 +393,7 @@ static void kvm_xen_update_runstate_guest(struct kvm_vcpu *v, bool atomic)
          * rs_times pointers were set up above.
          */
         *rs_state = vx->current_runstate;
-       rs_times[0] = vx->runstate_entry_time | XEN_RUNSTATE_UPDATE;
+       rs_times[0] = entry_time;
         memcpy(rs_times + 1, vx->runstate_times, sizeof(vx->runstate_times));
  
         /* For the split case, we have to then copy it to the guest. */
@@ -396,8 +404,11 @@ static void kvm_xen_update_runstate_guest(struct kvm_vcpu *v, bool atomic)
         smp_wmb();
  
         /* Finally, clear the XEN_RUNSTATE_UPDATE bit. */
-       *update_bit = vx->runstate_entry_time >> 56;
-       smp_wmb();
+       if (update_bit) {
+               entry_time &= ~XEN_RUNSTATE_UPDATE;
+               *update_bit = entry_time >> 56;
+               smp_wmb();
+       }
  
         if (user_len2)
                 read_unlock(&gpc2->lock);
@@ -619,6 +630,17 @@ int kvm_xen_hvm_set_attr(struct kvm *kvm, struct kvm_xen_hvm_attr *data)
                 r = 0;
                 break;
  
+       case KVM_XEN_ATTR_TYPE_RUNSTATE_UPDATE_FLAG:
+               if (!sched_info_on()) {
+                       r = -EOPNOTSUPP;
+                       break;
+               }
+               mutex_lock(&kvm->lock);
+               kvm->arch.xen.runstate_update_flag = !!data->u.runstate_update_flag;
+               mutex_unlock(&kvm->lock);
+               r = 0;
+               break;
+
         default:
                 break;
         }
@@ -656,6 +678,15 @@ int kvm_xen_hvm_get_attr(struct kvm *kvm, struct kvm_xen_hvm_attr *data)
                 r = 0;
                 break;
  
+       case KVM_XEN_ATTR_TYPE_RUNSTATE_UPDATE_FLAG:
+               if (!sched_info_on()) {
+                       r = -EOPNOTSUPP;
+                       break;
+               }
+               data->u.runstate_update_flag = kvm->arch.xen.runstate_update_flag;
+               r = 0;
+               break;
+
         default:
                 break;
         }
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h

index 8844839..64dfe9c 100644 (file)
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -1271,6 +1271,7 @@ struct kvm_x86_mce {
  #define KVM_XEN_HVM_CONFIG_RUNSTATE            (1 << 3)
  #define KVM_XEN_HVM_CONFIG_EVTCHN_2LEVEL       (1 << 4)
  #define KVM_XEN_HVM_CONFIG_EVTCHN_SEND         (1 << 5)
+#define KVM_XEN_HVM_CONFIG_RUNSTATE_UPDATE_FLAG        (1 << 6)
  
  struct kvm_xen_hvm_config {
         __u32 flags;
@@ -1776,6 +1777,7 @@ struct kvm_xen_hvm_attr {
         union {
                 __u8 long_mode;
                 __u8 vector;
+               __u8 runstate_update_flag;
                 struct {
                         __u64 gfn;
                 } shared_info;
@@ -1816,6 +1818,8 @@ struct kvm_xen_hvm_attr {
  /* Available with KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_EVTCHN_SEND */
  #define KVM_XEN_ATTR_TYPE_EVTCHN               0x3
  #define KVM_XEN_ATTR_TYPE_XEN_VERSION          0x4
+/* Available with KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_RUNSTATE_UPDATE_FLAG */
+#define KVM_XEN_ATTR_TYPE_RUNSTATE_UPDATE_FLAG 0x5
  
  /* Per-vCPU Xen attributes */
  #define KVM_XEN_VCPU_GET_ATTR  _IOWR(KVMIO, 0xca, struct kvm_xen_vcpu_attr)
diff --git a/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c b/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c

index 7f39815..c9b0110 100644 (file)
--- a/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c
+++ b/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c
@@ -440,6 +440,7 @@ int main(int argc, char *argv[])
         TEST_REQUIRE(xen_caps & KVM_XEN_HVM_CONFIG_SHARED_INFO);
  
         bool do_runstate_tests = !!(xen_caps & KVM_XEN_HVM_CONFIG_RUNSTATE);
+       bool do_runstate_flag = !!(xen_caps & KVM_XEN_HVM_CONFIG_RUNSTATE_UPDATE_FLAG);
         bool do_eventfd_tests = !!(xen_caps & KVM_XEN_HVM_CONFIG_EVTCHN_2LEVEL);
         bool do_evtchn_tests = do_eventfd_tests && !!(xen_caps & KVM_XEN_HVM_CONFIG_EVTCHN_SEND);
  
@@ -475,6 +476,19 @@ int main(int argc, char *argv[])
         };
         vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &lm);
  
+       if (do_runstate_flag) {
+               struct kvm_xen_hvm_attr ruf = {
+                       .type = KVM_XEN_ATTR_TYPE_RUNSTATE_UPDATE_FLAG,
+                       .u.runstate_update_flag = 1,
+               };
+               vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &ruf);
+
+               ruf.u.runstate_update_flag = 0;
+               vm_ioctl(vm, KVM_XEN_HVM_GET_ATTR, &ruf);
+               TEST_ASSERT(ruf.u.runstate_update_flag == 1,
+                           "Failed to read back RUNSTATE_UPDATE_FLAG attr");
+       }
+
         struct kvm_xen_hvm_attr ha = {
                 .type = KVM_XEN_ATTR_TYPE_SHARED_INFO,
                 .u.shared_info.gfn = SHINFO_REGION_GPA / PAGE_SIZE,
author	David Woodhouse <dwmw@amazon.co.uk>
	Sun, 27 Nov 2022 12:22:10 +0000 (12:22 +0000)
committer	Paolo Bonzini <pbonzini@redhat.com>
	Wed, 30 Nov 2022 15:59:37 +0000 (10:59 -0500)
Documentation/virt/kvm/api.rst		patch \| blob \| history
arch/x86/include/asm/kvm_host.h		patch \| blob \| history
arch/x86/kvm/x86.c		patch \| blob \| history
arch/x86/kvm/xen.c		patch \| blob \| history
include/uapi/linux/kvm.h		patch \| blob \| history
tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c		patch \| blob \| history