From caa057a2cad647fb368a12c8e6c410ac4c28e063 Mon Sep 17 00:00:00 2001 From: Wanpeng Li Date: Mon, 12 Mar 2018 04:53:03 -0700 Subject: [PATCH] KVM: X86: Provide a capability to disable HLT intercepts MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit If host CPUs are dedicated to a VM, we can avoid VM exits on HLT. This patch adds the per-VM capability to disable them. Cc: Paolo Bonzini Cc: Radim Krčmář Cc: Jan H. Schönherr Signed-off-by: Wanpeng Li Signed-off-by: Paolo Bonzini --- Documentation/virtual/kvm/api.txt | 2 ++ arch/x86/include/asm/kvm_host.h | 1 + arch/x86/kvm/cpuid.c | 5 +++++ arch/x86/kvm/svm.c | 4 +++- arch/x86/kvm/vmx.c | 24 ++++++++++++++++++++++++ arch/x86/kvm/x86.c | 3 +++ arch/x86/kvm/x86.h | 9 ++++++++- 7 files changed, 46 insertions(+), 2 deletions(-) diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index 744a202..e9c7487 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt @@ -4367,6 +4367,7 @@ Returns: 0 on success, -EINVAL when args[0] contains invalid exits Valid bits in args[0] are #define KVM_X86_DISABLE_EXITS_MWAIT (1 << 0) +#define KVM_X86_DISABLE_EXITS_HLT (1 << 1) Enabling this capability on a VM provides userspace with a way to no longer intercept some instructions for improved latency in some @@ -4375,6 +4376,7 @@ physical CPUs. More bits can be added in the future; userspace can just pass the KVM_CHECK_EXTENSION result to KVM_ENABLE_CAP to disable all such vmexits. +Do not enable KVM_FEATURE_PV_UNHALT if you disable HLT exits. 8. Other capabilities. ---------------------- diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index a85b640..31f4186 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -812,6 +812,7 @@ struct kvm_arch { gpa_t wall_clock; bool mwait_in_guest; + bool hlt_in_guest; bool ept_identity_pagetable_done; gpa_t ept_identity_map_addr; diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index e2d3050..82055b9 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -135,6 +135,11 @@ int kvm_update_cpuid(struct kvm_vcpu *vcpu) return -EINVAL; } + best = kvm_find_cpuid_entry(vcpu, KVM_CPUID_FEATURES, 0); + if (kvm_hlt_in_guest(vcpu->kvm) && best && + (best->eax & (1 << KVM_FEATURE_PV_UNHALT))) + best->eax &= ~(1 << KVM_FEATURE_PV_UNHALT); + /* Update physical-address width */ vcpu->arch.maxphyaddr = cpuid_query_maxphyaddr(vcpu); kvm_mmu_reset_context(vcpu); diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index f6578ce..0f801d8 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -1380,7 +1380,6 @@ static void init_vmcb(struct vcpu_svm *svm) set_intercept(svm, INTERCEPT_RDPMC); set_intercept(svm, INTERCEPT_CPUID); set_intercept(svm, INTERCEPT_INVD); - set_intercept(svm, INTERCEPT_HLT); set_intercept(svm, INTERCEPT_INVLPG); set_intercept(svm, INTERCEPT_INVLPGA); set_intercept(svm, INTERCEPT_IOIO_PROT); @@ -1403,6 +1402,9 @@ static void init_vmcb(struct vcpu_svm *svm) set_intercept(svm, INTERCEPT_MWAIT); } + if (!kvm_hlt_in_guest(svm->vcpu.kvm)) + set_intercept(svm, INTERCEPT_HLT); + control->iopm_base_pa = __sme_set(iopm_base); control->msrpm_base_pa = __sme_set(__pa(svm->msrpm)); control->int_ctl = V_INTR_MASKING_MASK; diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 7cef183..65747d1 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -2556,6 +2556,19 @@ static int nested_vmx_check_exception(struct kvm_vcpu *vcpu, unsigned long *exit return 0; } +static void vmx_clear_hlt(struct kvm_vcpu *vcpu) +{ + /* + * Ensure that we clear the HLT state in the VMCS. We don't need to + * explicitly skip the instruction because if the HLT state is set, + * then the instruction is already executing and RIP has already been + * advanced. + */ + if (kvm_hlt_in_guest(vcpu->kvm) && + vmcs_read32(GUEST_ACTIVITY_STATE) == GUEST_ACTIVITY_HLT) + vmcs_write32(GUEST_ACTIVITY_STATE, GUEST_ACTIVITY_ACTIVE); +} + static void vmx_queue_exception(struct kvm_vcpu *vcpu) { struct vcpu_vmx *vmx = to_vmx(vcpu); @@ -2586,6 +2599,8 @@ static void vmx_queue_exception(struct kvm_vcpu *vcpu) intr_info |= INTR_TYPE_HARD_EXCEPTION; vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, intr_info); + + vmx_clear_hlt(vcpu); } static bool vmx_rdtscp_supported(void) @@ -5545,6 +5560,8 @@ static u32 vmx_exec_control(struct vcpu_vmx *vmx) if (kvm_mwait_in_guest(vmx->vcpu.kvm)) exec_control &= ~(CPU_BASED_MWAIT_EXITING | CPU_BASED_MONITOR_EXITING); + if (kvm_hlt_in_guest(vmx->vcpu.kvm)) + exec_control &= ~CPU_BASED_HLT_EXITING; return exec_control; } @@ -5906,6 +5923,8 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) update_exception_bitmap(vcpu); vpid_sync_context(vmx->vpid); + if (init_event) + vmx_clear_hlt(vcpu); } /* @@ -5976,6 +5995,8 @@ static void vmx_inject_irq(struct kvm_vcpu *vcpu) } else intr |= INTR_TYPE_EXT_INTR; vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, intr); + + vmx_clear_hlt(vcpu); } static void vmx_inject_nmi(struct kvm_vcpu *vcpu) @@ -6006,6 +6027,8 @@ static void vmx_inject_nmi(struct kvm_vcpu *vcpu) vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, INTR_TYPE_NMI_INTR | INTR_INFO_VALID_MASK | NMI_VECTOR); + + vmx_clear_hlt(vcpu); } static bool vmx_get_nmi_mask(struct kvm_vcpu *vcpu) @@ -12347,6 +12370,7 @@ static int vmx_pre_enter_smm(struct kvm_vcpu *vcpu, char *smstate) vmx->nested.smm.vmxon = vmx->nested.vmxon; vmx->nested.vmxon = false; + vmx_clear_hlt(vcpu); return 0; } diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index db95d4d6..9e6aaf6 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -2878,6 +2878,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) r = KVM_CLOCK_TSC_STABLE; break; case KVM_CAP_X86_DISABLE_EXITS: + r |= KVM_X86_DISABLE_EXITS_HTL; if(kvm_can_mwait_in_guest()) r |= KVM_X86_DISABLE_EXITS_MWAIT; break; @@ -4232,6 +4233,8 @@ split_irqchip_unlock: if ((cap->args[0] & KVM_X86_DISABLE_EXITS_MWAIT) && kvm_can_mwait_in_guest()) kvm->arch.mwait_in_guest = true; + if (cap->args[0] & KVM_X86_DISABLE_EXITS_HTL) + kvm->arch.hlt_in_guest = true; r = 0; break; default: diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h index 026b239..6d14589 100644 --- a/arch/x86/kvm/x86.h +++ b/arch/x86/kvm/x86.h @@ -265,11 +265,18 @@ static inline u64 nsec_to_cycles(struct kvm_vcpu *vcpu, u64 nsec) }) #define KVM_X86_DISABLE_EXITS_MWAIT (1 << 0) -#define KVM_X86_DISABLE_VALID_EXITS (KVM_X86_DISABLE_EXITS_MWAIT) +#define KVM_X86_DISABLE_EXITS_HTL (1 << 1) +#define KVM_X86_DISABLE_VALID_EXITS (KVM_X86_DISABLE_EXITS_MWAIT | \ + KVM_X86_DISABLE_EXITS_HTL) static inline bool kvm_mwait_in_guest(struct kvm *kvm) { return kvm->arch.mwait_in_guest; } +static inline bool kvm_hlt_in_guest(struct kvm *kvm) +{ + return kvm->arch.hlt_in_guest; +} + #endif -- 2.7.4