kvm: x86: Guest BNDCFGS requires guest MPX support
[platform/kernel/linux-starfive.git] / arch / x86 / kvm / vmx.c
index c5fd459..a339e97 100644 (file)
@@ -248,6 +248,7 @@ struct __packed vmcs12 {
        u64 xss_exit_bitmap;
        u64 guest_physical_address;
        u64 vmcs_link_pointer;
+       u64 pml_address;
        u64 guest_ia32_debugctl;
        u64 guest_ia32_pat;
        u64 guest_ia32_efer;
@@ -369,6 +370,7 @@ struct __packed vmcs12 {
        u16 guest_ldtr_selector;
        u16 guest_tr_selector;
        u16 guest_intr_status;
+       u16 guest_pml_index;
        u16 host_es_selector;
        u16 host_cs_selector;
        u16 host_ss_selector;
@@ -407,6 +409,7 @@ struct nested_vmx {
        /* Has the level1 guest done vmxon? */
        bool vmxon;
        gpa_t vmxon_ptr;
+       bool pml_full;
 
        /* The guest-physical address of the current VMCS L1 keeps for L2 */
        gpa_t current_vmptr;
@@ -742,6 +745,7 @@ static const unsigned short vmcs_field_to_offset_table[] = {
        FIELD(GUEST_LDTR_SELECTOR, guest_ldtr_selector),
        FIELD(GUEST_TR_SELECTOR, guest_tr_selector),
        FIELD(GUEST_INTR_STATUS, guest_intr_status),
+       FIELD(GUEST_PML_INDEX, guest_pml_index),
        FIELD(HOST_ES_SELECTOR, host_es_selector),
        FIELD(HOST_CS_SELECTOR, host_cs_selector),
        FIELD(HOST_SS_SELECTOR, host_ss_selector),
@@ -767,6 +771,7 @@ static const unsigned short vmcs_field_to_offset_table[] = {
        FIELD64(XSS_EXIT_BITMAP, xss_exit_bitmap),
        FIELD64(GUEST_PHYSICAL_ADDRESS, guest_physical_address),
        FIELD64(VMCS_LINK_POINTER, vmcs_link_pointer),
+       FIELD64(PML_ADDRESS, pml_address),
        FIELD64(GUEST_IA32_DEBUGCTL, guest_ia32_debugctl),
        FIELD64(GUEST_IA32_PAT, guest_ia32_pat),
        FIELD64(GUEST_IA32_EFER, guest_ia32_efer),
@@ -1314,6 +1319,11 @@ static inline bool report_flexpriority(void)
        return flexpriority_enabled;
 }
 
+static inline unsigned nested_cpu_vmx_misc_cr3_count(struct kvm_vcpu *vcpu)
+{
+       return vmx_misc_cr3_count(to_vmx(vcpu)->nested.nested_vmx_misc_low);
+}
+
 static inline bool nested_cpu_has(struct vmcs12 *vmcs12, u32 bit)
 {
        return vmcs12->cpu_based_vm_exec_control & bit;
@@ -1348,6 +1358,11 @@ static inline bool nested_cpu_has_xsaves(struct vmcs12 *vmcs12)
                vmx_xsaves_supported();
 }
 
+static inline bool nested_cpu_has_pml(struct vmcs12 *vmcs12)
+{
+       return nested_cpu_has2(vmcs12, SECONDARY_EXEC_ENABLE_PML);
+}
+
 static inline bool nested_cpu_has_virt_x2apic_mode(struct vmcs12 *vmcs12)
 {
        return nested_cpu_has2(vmcs12, SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE);
@@ -2751,8 +2766,11 @@ static void nested_vmx_setup_ctls_msrs(struct vcpu_vmx *vmx)
                vmx->nested.nested_vmx_ept_caps |= VMX_EPT_EXTENT_GLOBAL_BIT |
                        VMX_EPT_EXTENT_CONTEXT_BIT | VMX_EPT_2MB_PAGE_BIT |
                        VMX_EPT_1GB_PAGE_BIT;
-              if (enable_ept_ad_bits)
-                      vmx->nested.nested_vmx_ept_caps |= VMX_EPT_AD_BIT;
+               if (enable_ept_ad_bits) {
+                       vmx->nested.nested_vmx_secondary_ctls_high |=
+                               SECONDARY_EXEC_ENABLE_PML;
+                       vmx->nested.nested_vmx_ept_caps |= VMX_EPT_AD_BIT;
+               }
        } else
                vmx->nested.nested_vmx_ept_caps = 0;
 
@@ -3177,7 +3195,7 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
                msr_info->data = vmcs_readl(GUEST_SYSENTER_ESP);
                break;
        case MSR_IA32_BNDCFGS:
-               if (!kvm_mpx_supported())
+               if (!kvm_mpx_supported() || !guest_cpuid_has_mpx(vcpu))
                        return 1;
                msr_info->data = vmcs_read64(GUEST_BNDCFGS);
                break;
@@ -3259,7 +3277,7 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
                vmcs_writel(GUEST_SYSENTER_ESP, data);
                break;
        case MSR_IA32_BNDCFGS:
-               if (!kvm_mpx_supported())
+               if (!kvm_mpx_supported() || !guest_cpuid_has_mpx(vcpu))
                        return 1;
                vmcs_write64(GUEST_BNDCFGS, data);
                break;
@@ -6486,7 +6504,7 @@ static __init int hardware_setup(void)
                enable_ept_ad_bits = 0;
        }
 
-       if (!cpu_has_vmx_ept_ad_bits())
+       if (!cpu_has_vmx_ept_ad_bits() || !enable_ept)
                enable_ept_ad_bits = 0;
 
        if (!cpu_has_vmx_unrestricted_guest())
@@ -6529,7 +6547,6 @@ static __init int hardware_setup(void)
        vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_CS, false);
        vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_ESP, false);
        vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_EIP, false);
-       vmx_disable_intercept_for_msr(MSR_IA32_BNDCFGS, true);
 
        memcpy(vmx_msr_bitmap_legacy_x2apic_apicv,
                        vmx_msr_bitmap_legacy, PAGE_SIZE);
@@ -6896,97 +6913,21 @@ static int get_vmx_mem_address(struct kvm_vcpu *vcpu,
        return 0;
 }
 
-/*
- * This function performs the various checks including
- * - if it's 4KB aligned
- * - No bits beyond the physical address width are set
- * - Returns 0 on success or else 1
- * (Intel SDM Section 30.3)
- */
-static int nested_vmx_check_vmptr(struct kvm_vcpu *vcpu, int exit_reason,
-                                 gpa_t *vmpointer)
+static int nested_vmx_get_vmptr(struct kvm_vcpu *vcpu, gpa_t *vmpointer)
 {
        gva_t gva;
-       gpa_t vmptr;
        struct x86_exception e;
-       struct page *page;
-       struct vcpu_vmx *vmx = to_vmx(vcpu);
-       int maxphyaddr = cpuid_maxphyaddr(vcpu);
 
        if (get_vmx_mem_address(vcpu, vmcs_readl(EXIT_QUALIFICATION),
                        vmcs_read32(VMX_INSTRUCTION_INFO), false, &gva))
                return 1;
 
-       if (kvm_read_guest_virt(&vcpu->arch.emulate_ctxt, gva, &vmptr,
-                               sizeof(vmptr), &e)) {
+       if (kvm_read_guest_virt(&vcpu->arch.emulate_ctxt, gva, vmpointer,
+                               sizeof(*vmpointer), &e)) {
                kvm_inject_page_fault(vcpu, &e);
                return 1;
        }
 
-       switch (exit_reason) {
-       case EXIT_REASON_VMON:
-               /*
-                * SDM 3: 24.11.5
-                * The first 4 bytes of VMXON region contain the supported
-                * VMCS revision identifier
-                *
-                * Note - IA32_VMX_BASIC[48] will never be 1
-                * for the nested case;
-                * which replaces physical address width with 32
-                *
-                */
-               if (!PAGE_ALIGNED(vmptr) || (vmptr >> maxphyaddr)) {
-                       nested_vmx_failInvalid(vcpu);
-                       return kvm_skip_emulated_instruction(vcpu);
-               }
-
-               page = nested_get_page(vcpu, vmptr);
-               if (page == NULL) {
-                       nested_vmx_failInvalid(vcpu);
-                       return kvm_skip_emulated_instruction(vcpu);
-               }
-               if (*(u32 *)kmap(page) != VMCS12_REVISION) {
-                       kunmap(page);
-                       nested_release_page_clean(page);
-                       nested_vmx_failInvalid(vcpu);
-                       return kvm_skip_emulated_instruction(vcpu);
-               }
-               kunmap(page);
-               nested_release_page_clean(page);
-               vmx->nested.vmxon_ptr = vmptr;
-               break;
-       case EXIT_REASON_VMCLEAR:
-               if (!PAGE_ALIGNED(vmptr) || (vmptr >> maxphyaddr)) {
-                       nested_vmx_failValid(vcpu,
-                                            VMXERR_VMCLEAR_INVALID_ADDRESS);
-                       return kvm_skip_emulated_instruction(vcpu);
-               }
-
-               if (vmptr == vmx->nested.vmxon_ptr) {
-                       nested_vmx_failValid(vcpu,
-                                            VMXERR_VMCLEAR_VMXON_POINTER);
-                       return kvm_skip_emulated_instruction(vcpu);
-               }
-               break;
-       case EXIT_REASON_VMPTRLD:
-               if (!PAGE_ALIGNED(vmptr) || (vmptr >> maxphyaddr)) {
-                       nested_vmx_failValid(vcpu,
-                                            VMXERR_VMPTRLD_INVALID_ADDRESS);
-                       return kvm_skip_emulated_instruction(vcpu);
-               }
-
-               if (vmptr == vmx->nested.vmxon_ptr) {
-                       nested_vmx_failValid(vcpu,
-                                            VMXERR_VMPTRLD_VMXON_POINTER);
-                       return kvm_skip_emulated_instruction(vcpu);
-               }
-               break;
-       default:
-               return 1; /* shouldn't happen */
-       }
-
-       if (vmpointer)
-               *vmpointer = vmptr;
        return 0;
 }
 
@@ -7048,6 +6989,8 @@ out_msr_bitmap:
 static int handle_vmon(struct kvm_vcpu *vcpu)
 {
        int ret;
+       gpa_t vmptr;
+       struct page *page;
        struct vcpu_vmx *vmx = to_vmx(vcpu);
        const u64 VMXON_NEEDED_FEATURES = FEATURE_CONTROL_LOCKED
                | FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX;
@@ -7077,9 +7020,37 @@ static int handle_vmon(struct kvm_vcpu *vcpu)
                return 1;
        }
 
-       if (nested_vmx_check_vmptr(vcpu, EXIT_REASON_VMON, NULL))
+       if (nested_vmx_get_vmptr(vcpu, &vmptr))
                return 1;
+
+       /*
+        * SDM 3: 24.11.5
+        * The first 4 bytes of VMXON region contain the supported
+        * VMCS revision identifier
+        *
+        * Note - IA32_VMX_BASIC[48] will never be 1 for the nested case;
+        * which replaces physical address width with 32
+        */
+       if (!PAGE_ALIGNED(vmptr) || (vmptr >> cpuid_maxphyaddr(vcpu))) {
+               nested_vmx_failInvalid(vcpu);
+               return kvm_skip_emulated_instruction(vcpu);
+       }
+
+       page = nested_get_page(vcpu, vmptr);
+       if (page == NULL) {
+               nested_vmx_failInvalid(vcpu);
+               return kvm_skip_emulated_instruction(vcpu);
+       }
+       if (*(u32 *)kmap(page) != VMCS12_REVISION) {
+               kunmap(page);
+               nested_release_page_clean(page);
+               nested_vmx_failInvalid(vcpu);
+               return kvm_skip_emulated_instruction(vcpu);
+       }
+       kunmap(page);
+       nested_release_page_clean(page);
+
+       vmx->nested.vmxon_ptr = vmptr;
        ret = enter_vmx_operation(vcpu);
        if (ret)
                return ret;
@@ -7195,9 +7166,19 @@ static int handle_vmclear(struct kvm_vcpu *vcpu)
        if (!nested_vmx_check_permission(vcpu))
                return 1;
 
-       if (nested_vmx_check_vmptr(vcpu, EXIT_REASON_VMCLEAR, &vmptr))
+       if (nested_vmx_get_vmptr(vcpu, &vmptr))
                return 1;
 
+       if (!PAGE_ALIGNED(vmptr) || (vmptr >> cpuid_maxphyaddr(vcpu))) {
+               nested_vmx_failValid(vcpu, VMXERR_VMCLEAR_INVALID_ADDRESS);
+               return kvm_skip_emulated_instruction(vcpu);
+       }
+
+       if (vmptr == vmx->nested.vmxon_ptr) {
+               nested_vmx_failValid(vcpu, VMXERR_VMCLEAR_VMXON_POINTER);
+               return kvm_skip_emulated_instruction(vcpu);
+       }
+
        if (vmptr == vmx->nested.current_vmptr)
                nested_release_vmcs12(vmx);
 
@@ -7527,9 +7508,19 @@ static int handle_vmptrld(struct kvm_vcpu *vcpu)
        if (!nested_vmx_check_permission(vcpu))
                return 1;
 
-       if (nested_vmx_check_vmptr(vcpu, EXIT_REASON_VMPTRLD, &vmptr))
+       if (nested_vmx_get_vmptr(vcpu, &vmptr))
                return 1;
 
+       if (!PAGE_ALIGNED(vmptr) || (vmptr >> cpuid_maxphyaddr(vcpu))) {
+               nested_vmx_failValid(vcpu, VMXERR_VMPTRLD_INVALID_ADDRESS);
+               return kvm_skip_emulated_instruction(vcpu);
+       }
+
+       if (vmptr == vmx->nested.vmxon_ptr) {
+               nested_vmx_failValid(vcpu, VMXERR_VMPTRLD_VMXON_POINTER);
+               return kvm_skip_emulated_instruction(vcpu);
+       }
+
        if (vmx->nested.current_vmptr != vmptr) {
                struct vmcs12 *new_vmcs12;
                struct page *page;
@@ -7895,11 +7886,13 @@ static bool nested_vmx_exit_handled_cr(struct kvm_vcpu *vcpu,
 {
        unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
        int cr = exit_qualification & 15;
-       int reg = (exit_qualification >> 8) & 15;
-       unsigned long val = kvm_register_readl(vcpu, reg);
+       int reg;
+       unsigned long val;
 
        switch ((exit_qualification >> 4) & 3) {
        case 0: /* mov to cr */
+               reg = (exit_qualification >> 8) & 15;
+               val = kvm_register_readl(vcpu, reg);
                switch (cr) {
                case 0:
                        if (vmcs12->cr0_guest_host_mask &
@@ -7954,6 +7947,7 @@ static bool nested_vmx_exit_handled_cr(struct kvm_vcpu *vcpu,
                 * lmsw can change bits 1..3 of cr0, and only set bit 0 of
                 * cr0. Other attempted changes are ignored, with no exit.
                 */
+               val = (exit_qualification >> LMSW_SOURCE_DATA_SHIFT) & 0x0f;
                if (vmcs12->cr0_guest_host_mask & 0xe &
                    (val ^ vmcs12->cr0_read_shadow))
                        return true;
@@ -8114,7 +8108,7 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu)
        case EXIT_REASON_PREEMPTION_TIMER:
                return false;
        case EXIT_REASON_PML_FULL:
-               /* We don't expose PML support to L1. */
+               /* We emulate PML support to L1. */
                return false;
        default:
                return true;
@@ -9364,13 +9358,20 @@ static void nested_ept_inject_page_fault(struct kvm_vcpu *vcpu,
                struct x86_exception *fault)
 {
        struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
+       struct vcpu_vmx *vmx = to_vmx(vcpu);
        u32 exit_reason;
+       unsigned long exit_qualification = vcpu->arch.exit_qualification;
 
-       if (fault->error_code & PFERR_RSVD_MASK)
+       if (vmx->nested.pml_full) {
+               exit_reason = EXIT_REASON_PML_FULL;
+               vmx->nested.pml_full = false;
+               exit_qualification &= INTR_INFO_UNBLOCK_NMI;
+       } else if (fault->error_code & PFERR_RSVD_MASK)
                exit_reason = EXIT_REASON_EPT_MISCONFIG;
        else
                exit_reason = EXIT_REASON_EPT_VIOLATION;
-       nested_vmx_vmexit(vcpu, exit_reason, 0, vcpu->arch.exit_qualification);
+
+       nested_vmx_vmexit(vcpu, exit_reason, 0, exit_qualification);
        vmcs12->guest_physical_address = fault->address;
 }
 
@@ -9713,6 +9714,22 @@ static int nested_vmx_check_msr_switch_controls(struct kvm_vcpu *vcpu,
        return 0;
 }
 
+static int nested_vmx_check_pml_controls(struct kvm_vcpu *vcpu,
+                                        struct vmcs12 *vmcs12)
+{
+       u64 address = vmcs12->pml_address;
+       int maxphyaddr = cpuid_maxphyaddr(vcpu);
+
+       if (nested_cpu_has2(vmcs12, SECONDARY_EXEC_ENABLE_PML)) {
+               if (!nested_cpu_has_ept(vmcs12) ||
+                   !IS_ALIGNED(address, 4096)  ||
+                   address >> maxphyaddr)
+                       return -EINVAL;
+       }
+
+       return 0;
+}
+
 static int nested_vmx_msr_check_common(struct kvm_vcpu *vcpu,
                                       struct vmx_msr_entry *e)
 {
@@ -9886,7 +9903,7 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
                          bool from_vmentry, u32 *entry_failure_code)
 {
        struct vcpu_vmx *vmx = to_vmx(vcpu);
-       u32 exec_control;
+       u32 exec_control, vmcs12_exec_ctrl;
 
        vmcs_write16(GUEST_ES_SELECTOR, vmcs12->guest_es_selector);
        vmcs_write16(GUEST_CS_SELECTOR, vmcs12->guest_cs_selector);
@@ -10017,8 +10034,11 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
                                  SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY |
                                  SECONDARY_EXEC_APIC_REGISTER_VIRT);
                if (nested_cpu_has(vmcs12,
-                               CPU_BASED_ACTIVATE_SECONDARY_CONTROLS))
-                       exec_control |= vmcs12->secondary_vm_exec_control;
+                                  CPU_BASED_ACTIVATE_SECONDARY_CONTROLS)) {
+                       vmcs12_exec_ctrl = vmcs12->secondary_vm_exec_control &
+                               ~SECONDARY_EXEC_ENABLE_PML;
+                       exec_control |= vmcs12_exec_ctrl;
+               }
 
                if (exec_control & SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY) {
                        vmcs_write64(EOI_EXIT_BITMAP0,
@@ -10248,6 +10268,9 @@ static int check_vmentry_prereqs(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
        if (nested_vmx_check_msr_switch_controls(vcpu, vmcs12))
                return VMXERR_ENTRY_INVALID_CONTROL_FIELD;
 
+       if (nested_vmx_check_pml_controls(vcpu, vmcs12))
+               return VMXERR_ENTRY_INVALID_CONTROL_FIELD;
+
        if (!vmx_control_verify(vmcs12->cpu_based_vm_exec_control,
                                vmx->nested.nested_vmx_procbased_ctls_low,
                                vmx->nested.nested_vmx_procbased_ctls_high) ||
@@ -10266,6 +10289,9 @@ static int check_vmentry_prereqs(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
                                vmx->nested.nested_vmx_entry_ctls_high))
                return VMXERR_ENTRY_INVALID_CONTROL_FIELD;
 
+       if (vmcs12->cr3_target_count > nested_cpu_vmx_misc_cr3_count(vcpu))
+               return VMXERR_ENTRY_INVALID_CONTROL_FIELD;
+
        if (!nested_host_cr0_valid(vcpu, vmcs12->host_cr0) ||
            !nested_host_cr4_valid(vcpu, vmcs12->host_cr4) ||
            !nested_cr3_valid(vcpu, vmcs12->host_cr3))
@@ -11143,6 +11169,46 @@ static void vmx_flush_log_dirty(struct kvm *kvm)
        kvm_flush_pml_buffers(kvm);
 }
 
+static int vmx_write_pml_buffer(struct kvm_vcpu *vcpu)
+{
+       struct vmcs12 *vmcs12;
+       struct vcpu_vmx *vmx = to_vmx(vcpu);
+       gpa_t gpa;
+       struct page *page = NULL;
+       u64 *pml_address;
+
+       if (is_guest_mode(vcpu)) {
+               WARN_ON_ONCE(vmx->nested.pml_full);
+
+               /*
+                * Check if PML is enabled for the nested guest.
+                * Whether eptp bit 6 is set is already checked
+                * as part of A/D emulation.
+                */
+               vmcs12 = get_vmcs12(vcpu);
+               if (!nested_cpu_has_pml(vmcs12))
+                       return 0;
+
+               if (vmcs12->guest_pml_index >= PML_ENTITY_NUM) {
+                       vmx->nested.pml_full = true;
+                       return 1;
+               }
+
+               gpa = vmcs_read64(GUEST_PHYSICAL_ADDRESS) & ~0xFFFull;
+
+               page = nested_get_page(vcpu, vmcs12->pml_address);
+               if (!page)
+                       return 0;
+
+               pml_address = kmap(page);
+               pml_address[vmcs12->guest_pml_index--] = gpa;
+               kunmap(page);
+               nested_release_page_clean(page);
+       }
+
+       return 0;
+}
+
 static void vmx_enable_log_dirty_pt_masked(struct kvm *kvm,
                                           struct kvm_memory_slot *memslot,
                                           gfn_t offset, unsigned long mask)
@@ -11502,6 +11568,7 @@ static struct kvm_x86_ops vmx_x86_ops __ro_after_init = {
        .slot_disable_log_dirty = vmx_slot_disable_log_dirty,
        .flush_log_dirty = vmx_flush_log_dirty,
        .enable_log_dirty_pt_masked = vmx_enable_log_dirty_pt_masked,
+       .write_log_dirty = vmx_write_pml_buffer,
 
        .pre_block = vmx_pre_block,
        .post_block = vmx_post_block,