kvm: x86: Disable interception for IA32_XFD on demand
authorKevin Tian <kevin.tian@intel.com>
Wed, 5 Jan 2022 12:35:32 +0000 (04:35 -0800)
committerPaolo Bonzini <pbonzini@redhat.com>
Fri, 14 Jan 2022 18:44:43 +0000 (13:44 -0500)
Always intercepting IA32_XFD causes non-negligible overhead when this
register is updated frequently in the guest.

Disable r/w emulation after intercepting the first WRMSR(IA32_XFD)
with a non-zero value.

Disable WRMSR emulation implies that IA32_XFD becomes out-of-sync
with the software states in fpstate and the per-cpu xfd cache. This
leads to two additional changes accordingly:

  - Call fpu_sync_guest_vmexit_xfd_state() after vm-exit to bring
    software states back in-sync with the MSR, before handle_exit_irqoff()
    is called.

  - Always trap #NM once write interception is disabled for IA32_XFD.
    The #NM exception is rare if the guest doesn't use dynamic
    features. Otherwise, there is at most one exception per guest
    task given a dynamic feature.

p.s. We have confirmed that SDM is being revised to say that
when setting IA32_XFD[18] the AMX register state is not guaranteed
to be preserved. This clarification avoids adding mess for a creative
guest which sets IA32_XFD[18]=1 before saving active AMX state to
its own storage.

Signed-off-by: Kevin Tian <kevin.tian@intel.com>
Signed-off-by: Jing Liu <jing2.liu@intel.com>
Signed-off-by: Yang Zhong <yang.zhong@intel.com>
Message-Id: <20220105123532.12586-22-yang.zhong@intel.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
arch/x86/include/asm/kvm_host.h
arch/x86/kvm/vmx/vmx.c
arch/x86/kvm/vmx/vmx.h
arch/x86/kvm/x86.c

index 6cbf97a..89d1fdb 100644 (file)
@@ -647,6 +647,7 @@ struct kvm_vcpu_arch {
        u64 smi_count;
        bool tpr_access_reporting;
        bool xsaves_enabled;
+       bool xfd_no_write_intercept;
        u64 ia32_xss;
        u64 microcode_version;
        u64 arch_capabilities;
index b8b7f5c..15e3060 100644 (file)
@@ -162,6 +162,7 @@ static u32 vmx_possible_passthrough_msrs[MAX_POSSIBLE_PASSTHROUGH_MSRS] = {
        MSR_FS_BASE,
        MSR_GS_BASE,
        MSR_KERNEL_GS_BASE,
+       MSR_IA32_XFD,
        MSR_IA32_XFD_ERR,
 #endif
        MSR_IA32_SYSENTER_CS,
@@ -764,10 +765,11 @@ void vmx_update_exception_bitmap(struct kvm_vcpu *vcpu)
        }
 
        /*
-        * Trap #NM if guest xfd contains a non-zero value so guest XFD_ERR
-        * can be saved timely.
+        * Disabling xfd interception indicates that dynamic xfeatures
+        * might be used in the guest. Always trap #NM in this case
+        * to save guest xfd_err timely.
         */
-       if (vcpu->arch.guest_fpu.fpstate->xfd)
+       if (vcpu->arch.xfd_no_write_intercept)
                eb |= (1u << NM_VECTOR);
 
        vmcs_write32(EXCEPTION_BITMAP, eb);
@@ -1978,9 +1980,21 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
                break;
        case MSR_IA32_XFD:
                ret = kvm_set_msr_common(vcpu, msr_info);
-               /* Update #NM interception according to guest xfd */
-               if (!ret)
+               /*
+                * Always intercepting WRMSR could incur non-negligible
+                * overhead given xfd might be changed frequently in
+                * guest context switch. Disable write interception
+                * upon the first write with a non-zero value (indicating
+                * potential usage on dynamic xfeatures). Also update
+                * exception bitmap to trap #NM for proper virtualization
+                * of guest xfd_err.
+                */
+               if (!ret && data) {
+                       vmx_disable_intercept_for_msr(vcpu, MSR_IA32_XFD,
+                                                     MSR_TYPE_RW);
+                       vcpu->arch.xfd_no_write_intercept = true;
                        vmx_update_exception_bitmap(vcpu);
+               }
                break;
 #endif
        case MSR_IA32_SYSENTER_CS:
index 69dd2f8..f8fc744 100644 (file)
@@ -349,7 +349,7 @@ struct vcpu_vmx {
        struct lbr_desc lbr_desc;
 
        /* Save desired MSR intercept (read: pass-through) state */
-#define MAX_POSSIBLE_PASSTHROUGH_MSRS  14
+#define MAX_POSSIBLE_PASSTHROUGH_MSRS  15
        struct {
                DECLARE_BITMAP(read, MAX_POSSIBLE_PASSTHROUGH_MSRS);
                DECLARE_BITMAP(write, MAX_POSSIBLE_PASSTHROUGH_MSRS);
index bde18ca..60da233 100644 (file)
@@ -10083,6 +10083,14 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
        vcpu->mode = OUTSIDE_GUEST_MODE;
        smp_wmb();
 
+       /*
+        * Sync xfd before calling handle_exit_irqoff() which may
+        * rely on the fact that guest_fpu::xfd is up-to-date (e.g.
+        * in #NM irqoff handler).
+        */
+       if (vcpu->arch.xfd_no_write_intercept)
+               fpu_sync_guest_vmexit_xfd_state();
+
        static_call(kvm_x86_handle_exit_irqoff)(vcpu);
 
        if (vcpu->arch.guest_fpu.xfd_err)