static void nested_svm_init_mmu_context(struct kvm_vcpu *vcpu)
{
struct vcpu_svm *svm = to_svm(vcpu);
- struct vmcb *hsave = svm->nested.hsave;
WARN_ON(mmu_is_nested(vcpu));
vcpu->arch.mmu = &vcpu->arch.guest_mmu;
- kvm_init_shadow_npt_mmu(vcpu, X86_CR0_PG, hsave->save.cr4, hsave->save.efer,
+ kvm_init_shadow_npt_mmu(vcpu, X86_CR0_PG, svm->vmcb01.ptr->save.cr4,
+ svm->vmcb01.ptr->save.efer,
svm->nested.ctl.nested_cr3);
vcpu->arch.mmu->get_guest_pgd = nested_svm_get_tdp_cr3;
vcpu->arch.mmu->get_pdptr = nested_svm_get_tdp_pdptr;
return;
c = &svm->vmcb->control;
- h = &svm->nested.hsave->control;
+ h = &svm->vmcb01.ptr->control;
g = &svm->nested.ctl;
for (i = 0; i < MAX_INTERCEPT; i++)
return 0;
}
+void nested_vmcb02_compute_g_pat(struct vcpu_svm *svm)
+{
+ if (!svm->nested.vmcb02.ptr)
+ return;
+
+ /* FIXME: merge g_pat from vmcb01 and vmcb12. */
+ svm->nested.vmcb02.ptr->save.g_pat = svm->vmcb01.ptr->save.g_pat;
+}
+
static void nested_prepare_vmcb_save(struct vcpu_svm *svm, struct vmcb *vmcb12)
{
+ nested_vmcb02_compute_g_pat(svm);
+
/* Load the nested guest state */
svm->vmcb->save.es = vmcb12->save.es;
svm->vmcb->save.cs = vmcb12->save.cs;
{
const u32 mask = V_INTR_MASKING_MASK | V_GIF_ENABLE_MASK | V_GIF_MASK;
+ /* FIXME: go through each field one by one. */
+ svm->nested.vmcb02.ptr->control = svm->vmcb01.ptr->control;
+
if (nested_npt_enabled(svm))
nested_svm_init_mmu_context(&svm->vcpu);
svm->vmcb->control.int_ctl =
(svm->nested.ctl.int_ctl & ~mask) |
- (svm->nested.hsave->control.int_ctl & mask);
+ (svm->vmcb01.ptr->control.int_ctl & mask);
svm->vmcb->control.virt_ext = svm->nested.ctl.virt_ext;
svm->vmcb->control.int_vector = svm->nested.ctl.int_vector;
svm->nested.vmcb12_gpa = vmcb12_gpa;
+
+ WARN_ON(svm->vmcb == svm->nested.vmcb02.ptr);
+
+ nested_svm_vmloadsave(svm->vmcb01.ptr, svm->nested.vmcb02.ptr);
load_nested_vmcb_control(svm, &vmcb12->control);
+
+ svm_switch_vmcb(svm, &svm->nested.vmcb02);
nested_prepare_vmcb_control(svm);
nested_prepare_vmcb_save(svm, vmcb12);
{
int ret;
struct vmcb *vmcb12;
- struct vmcb *hsave = svm->nested.hsave;
- struct vmcb *vmcb = svm->vmcb;
struct kvm_host_map map;
u64 vmcb12_gpa;
kvm_clear_interrupt_queue(&svm->vcpu);
/*
- * Save the old vmcb, so we don't need to pick what we save, but can
- * restore everything when a VMEXIT occurs
+ * Since vmcb01 is not in use, we can use it to store some of the L1
+ * state.
*/
- hsave->save.es = vmcb->save.es;
- hsave->save.cs = vmcb->save.cs;
- hsave->save.ss = vmcb->save.ss;
- hsave->save.ds = vmcb->save.ds;
- hsave->save.gdtr = vmcb->save.gdtr;
- hsave->save.idtr = vmcb->save.idtr;
- hsave->save.efer = svm->vcpu.arch.efer;
- hsave->save.cr0 = kvm_read_cr0(&svm->vcpu);
- hsave->save.cr4 = svm->vcpu.arch.cr4;
- hsave->save.rflags = kvm_get_rflags(&svm->vcpu);
- hsave->save.rip = kvm_rip_read(&svm->vcpu);
- hsave->save.rsp = vmcb->save.rsp;
- hsave->save.rax = vmcb->save.rax;
- if (npt_enabled)
- hsave->save.cr3 = vmcb->save.cr3;
- else
- hsave->save.cr3 = kvm_read_cr3(&svm->vcpu);
-
- copy_vmcb_control_area(&hsave->control, &vmcb->control);
+ svm->vmcb01.ptr->save.efer = svm->vcpu.arch.efer;
+ svm->vmcb01.ptr->save.cr0 = kvm_read_cr0(&svm->vcpu);
+ svm->vmcb01.ptr->save.cr4 = svm->vcpu.arch.cr4;
+ svm->vmcb01.ptr->save.rflags = kvm_get_rflags(&svm->vcpu);
+ svm->vmcb01.ptr->save.rip = kvm_rip_read(&svm->vcpu);
+
+ if (!npt_enabled)
+ svm->vmcb01.ptr->save.cr3 = kvm_read_cr3(&svm->vcpu);
svm->nested.nested_run_pending = 1;
{
int rc;
struct vmcb *vmcb12;
- struct vmcb *hsave = svm->nested.hsave;
struct vmcb *vmcb = svm->vmcb;
struct kvm_host_map map;
vmcb12->control.pause_filter_thresh =
svm->vmcb->control.pause_filter_thresh;
- /* Restore the original control entries */
- copy_vmcb_control_area(&vmcb->control, &hsave->control);
+ nested_svm_vmloadsave(svm->nested.vmcb02.ptr, svm->vmcb01.ptr);
+
+ svm_switch_vmcb(svm, &svm->vmcb01);
- /* On vmexit the GIF is set to false */
+ /*
+ * On vmexit the GIF is set to false and
+ * no event can be injected in L1.
+ */
svm_set_gif(svm, false);
+ svm->vmcb->control.exit_int_info = 0;
svm->vmcb->control.tsc_offset = svm->vcpu.arch.tsc_offset =
svm->vcpu.arch.l1_tsc_offset;
svm->nested.ctl.nested_cr3 = 0;
- /* Restore selected save entries */
- svm->vmcb->save.es = hsave->save.es;
- svm->vmcb->save.cs = hsave->save.cs;
- svm->vmcb->save.ss = hsave->save.ss;
- svm->vmcb->save.ds = hsave->save.ds;
- svm->vmcb->save.gdtr = hsave->save.gdtr;
- svm->vmcb->save.idtr = hsave->save.idtr;
- kvm_set_rflags(&svm->vcpu, hsave->save.rflags);
- kvm_set_rflags(&svm->vcpu, hsave->save.rflags | X86_EFLAGS_FIXED);
- svm_set_efer(&svm->vcpu, hsave->save.efer);
- svm_set_cr0(&svm->vcpu, hsave->save.cr0 | X86_CR0_PE);
- svm_set_cr4(&svm->vcpu, hsave->save.cr4);
- kvm_rax_write(&svm->vcpu, hsave->save.rax);
- kvm_rsp_write(&svm->vcpu, hsave->save.rsp);
- kvm_rip_write(&svm->vcpu, hsave->save.rip);
- svm->vmcb->save.dr7 = DR7_FIXED_1;
- svm->vmcb->save.cpl = 0;
- svm->vmcb->control.exit_int_info = 0;
+ /*
+ * Restore processor state that had been saved in vmcb01
+ */
+ kvm_set_rflags(&svm->vcpu, svm->vmcb->save.rflags | X86_EFLAGS_FIXED);
+ svm_set_efer(&svm->vcpu, svm->vmcb->save.efer);
+ svm_set_cr0(&svm->vcpu, svm->vmcb->save.cr0 | X86_CR0_PE);
+ svm_set_cr4(&svm->vcpu, svm->vmcb->save.cr4);
+ kvm_rax_write(&svm->vcpu, svm->vmcb->save.rax);
+ kvm_rsp_write(&svm->vcpu, svm->vmcb->save.rsp);
+ kvm_rip_write(&svm->vcpu, svm->vmcb->save.rip);
+
+ svm->vcpu.arch.dr7 = DR7_FIXED_1;
+ kvm_update_dr7(&svm->vcpu);
vmcb_mark_all_dirty(svm->vmcb);
nested_svm_uninit_mmu_context(&svm->vcpu);
- rc = nested_svm_load_cr3(&svm->vcpu, hsave->save.cr3, false);
+ rc = nested_svm_load_cr3(&svm->vcpu, svm->vmcb->save.cr3, false);
if (rc)
return 1;
- if (npt_enabled)
- svm->vmcb->save.cr3 = hsave->save.cr3;
-
/*
* Drop what we picked up for L2 via svm_complete_interrupts() so it
* doesn't end up in L1.
int svm_allocate_nested(struct vcpu_svm *svm)
{
- struct page *hsave_page;
+ struct page *vmcb02_page;
if (svm->nested.initialized)
return 0;
- hsave_page = alloc_page(GFP_KERNEL_ACCOUNT | __GFP_ZERO);
- if (!hsave_page)
+ vmcb02_page = alloc_page(GFP_KERNEL_ACCOUNT | __GFP_ZERO);
+ if (!vmcb02_page)
return -ENOMEM;
- svm->nested.hsave = page_address(hsave_page);
+ svm->nested.vmcb02.ptr = page_address(vmcb02_page);
+ svm->nested.vmcb02.pa = __sme_set(page_to_pfn(vmcb02_page) << PAGE_SHIFT);
svm->nested.msrpm = svm_vcpu_alloc_msrpm();
if (!svm->nested.msrpm)
- goto err_free_hsave;
+ goto err_free_vmcb02;
svm_vcpu_init_msrpm(&svm->vcpu, svm->nested.msrpm);
svm->nested.initialized = true;
return 0;
-err_free_hsave:
- __free_page(hsave_page);
+err_free_vmcb02:
+ __free_page(vmcb02_page);
return -ENOMEM;
}
svm_vcpu_free_msrpm(svm->nested.msrpm);
svm->nested.msrpm = NULL;
- __free_page(virt_to_page(svm->nested.hsave));
- svm->nested.hsave = NULL;
+ __free_page(virt_to_page(svm->nested.vmcb02.ptr));
+ svm->nested.vmcb02.ptr = NULL;
svm->nested.initialized = false;
}
void svm_leave_nested(struct vcpu_svm *svm)
{
if (is_guest_mode(&svm->vcpu)) {
- struct vmcb *hsave = svm->nested.hsave;
- struct vmcb *vmcb = svm->vmcb;
-
svm->nested.nested_run_pending = 0;
leave_guest_mode(&svm->vcpu);
- copy_vmcb_control_area(&vmcb->control, &hsave->control);
+
+ svm_switch_vmcb(svm, &svm->nested.vmcb02);
+
nested_svm_uninit_mmu_context(&svm->vcpu);
vmcb_mark_all_dirty(svm->vmcb);
}
case SVM_EXIT_EXCP_BASE ... SVM_EXIT_EXCP_BASE + 0x1f: {
u32 excp_bits = 1 << (exit_code - SVM_EXIT_EXCP_BASE);
- if (get_host_vmcb(svm)->control.intercepts[INTERCEPT_EXCEPTION] &
- excp_bits)
+ if (svm->vmcb01.ptr->control.intercepts[INTERCEPT_EXCEPTION] &
+ excp_bits)
return NESTED_EXIT_HOST;
else if (exit_code == SVM_EXIT_EXCP_BASE + PF_VECTOR &&
svm->vcpu.arch.apf.host_apf_flags)
if (copy_to_user(&user_vmcb->control, &svm->nested.ctl,
sizeof(user_vmcb->control)))
return -EFAULT;
- if (copy_to_user(&user_vmcb->save, &svm->nested.hsave->save,
+ if (copy_to_user(&user_vmcb->save, &svm->vmcb01.ptr->save,
sizeof(user_vmcb->save)))
return -EFAULT;
-
out:
return kvm_state.size;
}
struct kvm_nested_state *kvm_state)
{
struct vcpu_svm *svm = to_svm(vcpu);
- struct vmcb *hsave = svm->nested.hsave;
struct vmcb __user *user_vmcb = (struct vmcb __user *)
&user_kvm_nested_state->data.svm[0];
struct vmcb_control_area *ctl;
goto out_free;
/*
- * All checks done, we can enter guest mode. L1 control fields
- * come from the nested save state. Guest state is already
- * in the registers, the save area of the nested state instead
- * contains saved L1 state.
+ * All checks done, we can enter guest mode. Userspace provides
+ * vmcb12.control, which will be combined with L1 and stored into
+ * vmcb02, and the L1 save state which we store in vmcb01.
+ * L2 registers if needed are moved from the current VMCB to VMCB02.
*/
svm->nested.nested_run_pending =
!!(kvm_state->flags & KVM_STATE_NESTED_RUN_PENDING);
- copy_vmcb_control_area(&hsave->control, &svm->vmcb->control);
- hsave->save = *save;
-
svm->nested.vmcb12_gpa = kvm_state->hdr.svm.vmcb_pa;
+ if (svm->current_vmcb == &svm->vmcb01)
+ svm->nested.vmcb02.ptr->save = svm->vmcb01.ptr->save;
+ svm->vmcb01.ptr->save = *save;
load_nested_vmcb_control(svm, ctl);
+
+ svm_switch_vmcb(svm, &svm->nested.vmcb02);
+
nested_prepare_vmcb_control(svm);
kvm_make_request(KVM_REQ_GET_NESTED_STATE_PAGES, vcpu);
if (is_guest_mode(vcpu)) {
/* Write L1's TSC offset. */
g_tsc_offset = svm->vmcb->control.tsc_offset -
- svm->nested.hsave->control.tsc_offset;
- svm->nested.hsave->control.tsc_offset = offset;
+ svm->vmcb01.ptr->control.tsc_offset;
+ svm->vmcb01.ptr->control.tsc_offset = offset;
}
trace_kvm_write_tsc_offset(vcpu->vcpu_id,
avic_update_vapic_bar(svm, APIC_DEFAULT_PHYS_BASE);
}
+void svm_switch_vmcb(struct vcpu_svm *svm, struct kvm_vmcb_info *target_vmcb)
+{
+ svm->current_vmcb = target_vmcb;
+ svm->vmcb = target_vmcb->ptr;
+ svm->vmcb_pa = target_vmcb->pa;
+
+ /*
+ * Workaround: we don't yet track the ASID generation
+ * that was active the last time target_vmcb was run.
+ */
+
+ svm->asid_generation = 0;
+
+ /*
+ * Workaround: we don't yet track the physical CPU that
+ * target_vmcb has run on.
+ */
+
+ vmcb_mark_all_dirty(svm->vmcb);
+}
+
static int svm_create_vcpu(struct kvm_vcpu *vcpu)
{
struct vcpu_svm *svm;
- struct page *vmcb_page;
+ struct page *vmcb01_page;
struct page *vmsa_page = NULL;
int err;
svm = to_svm(vcpu);
err = -ENOMEM;
- vmcb_page = alloc_page(GFP_KERNEL_ACCOUNT | __GFP_ZERO);
- if (!vmcb_page)
+ vmcb01_page = alloc_page(GFP_KERNEL_ACCOUNT | __GFP_ZERO);
+ if (!vmcb01_page)
goto out;
if (sev_es_guest(svm->vcpu.kvm)) {
svm_vcpu_init_msrpm(vcpu, svm->msrpm);
- svm->vmcb = page_address(vmcb_page);
- svm->vmcb_pa = __sme_set(page_to_pfn(vmcb_page) << PAGE_SHIFT);
+ svm->vmcb01.ptr = page_address(vmcb01_page);
+ svm->vmcb01.pa = __sme_set(page_to_pfn(vmcb01_page) << PAGE_SHIFT);
if (vmsa_page)
svm->vmsa = page_address(vmsa_page);
svm->asid_generation = 0;
svm->guest_state_loaded = false;
+
+ svm_switch_vmcb(svm, &svm->vmcb01);
init_vmcb(svm);
svm_init_osvw(vcpu);
if (vmsa_page)
__free_page(vmsa_page);
error_free_vmcb_page:
- __free_page(vmcb_page);
+ __free_page(vmcb01_page);
out:
return err;
}
sev_free_vcpu(vcpu);
- __free_page(pfn_to_page(__sme_clr(svm->vmcb_pa) >> PAGE_SHIFT));
+ __free_page(pfn_to_page(__sme_clr(svm->vmcb01.pa) >> PAGE_SHIFT));
__free_pages(virt_to_page(svm->msrpm), MSRPM_ALLOC_ORDER);
}
/* Drop int_ctl fields related to VINTR injection. */
svm->vmcb->control.int_ctl &= mask;
if (is_guest_mode(&svm->vcpu)) {
- svm->nested.hsave->control.int_ctl &= mask;
+ svm->vmcb01.ptr->control.int_ctl &= mask;
WARN_ON((svm->vmcb->control.int_ctl & V_TPR_MASK) !=
(svm->nested.ctl.int_ctl & V_TPR_MASK));
if (!kvm_mtrr_valid(vcpu, MSR_IA32_CR_PAT, data))
return 1;
vcpu->arch.pat = data;
- svm->vmcb->save.g_pat = data;
+ svm->vmcb01.ptr->save.g_pat = data;
+ if (is_guest_mode(vcpu))
+ nested_vmcb02_compute_g_pat(svm);
vmcb_mark_dirty(svm->vmcb, VMCB_NPT);
break;
case MSR_IA32_SPEC_CTRL:
} else if (is_guest_mode(vcpu)) {
/* As long as interrupts are being delivered... */
if ((svm->nested.ctl.int_ctl & V_INTR_MASKING_MASK)
- ? !(svm->nested.hsave->save.rflags & X86_EFLAGS_IF)
+ ? !(svm->vmcb01.ptr->save.rflags & X86_EFLAGS_IF)
: !(kvm_get_rflags(vcpu) & X86_EFLAGS_IF))
return true;
struct kvm_vcpu;
+struct kvm_vmcb_info {
+ struct vmcb *ptr;
+ unsigned long pa;
+};
+
struct svm_nested_state {
- struct vmcb *hsave;
+ struct kvm_vmcb_info vmcb02;
u64 hsave_msr;
u64 vm_cr_msr;
u64 vmcb12_gpa;
struct kvm_vcpu vcpu;
struct vmcb *vmcb;
unsigned long vmcb_pa;
+ struct kvm_vmcb_info vmcb01;
+ struct kvm_vmcb_info *current_vmcb;
struct svm_cpu_data *svm_data;
u32 asid;
uint64_t asid_generation;
return container_of(vcpu, struct vcpu_svm, vcpu);
}
-static inline struct vmcb *get_host_vmcb(struct vcpu_svm *svm)
-{
- if (is_guest_mode(&svm->vcpu))
- return svm->nested.hsave;
- else
- return svm->vmcb;
-}
-
static inline void vmcb_set_intercept(struct vmcb_control_area *control, u32 bit)
{
WARN_ON_ONCE(bit >= 32 * MAX_INTERCEPT);
static inline void set_dr_intercepts(struct vcpu_svm *svm)
{
- struct vmcb *vmcb = get_host_vmcb(svm);
+ struct vmcb *vmcb = svm->vmcb01.ptr;
if (!sev_es_guest(svm->vcpu.kvm)) {
vmcb_set_intercept(&vmcb->control, INTERCEPT_DR0_READ);
static inline void clr_dr_intercepts(struct vcpu_svm *svm)
{
- struct vmcb *vmcb = get_host_vmcb(svm);
+ struct vmcb *vmcb = svm->vmcb01.ptr;
vmcb->control.intercepts[INTERCEPT_DR] = 0;
static inline void set_exception_intercept(struct vcpu_svm *svm, u32 bit)
{
- struct vmcb *vmcb = get_host_vmcb(svm);
+ struct vmcb *vmcb = svm->vmcb01.ptr;
WARN_ON_ONCE(bit >= 32);
vmcb_set_intercept(&vmcb->control, INTERCEPT_EXCEPTION_OFFSET + bit);
static inline void clr_exception_intercept(struct vcpu_svm *svm, u32 bit)
{
- struct vmcb *vmcb = get_host_vmcb(svm);
+ struct vmcb *vmcb = svm->vmcb01.ptr;
WARN_ON_ONCE(bit >= 32);
vmcb_clr_intercept(&vmcb->control, INTERCEPT_EXCEPTION_OFFSET + bit);
static inline void svm_set_intercept(struct vcpu_svm *svm, int bit)
{
- struct vmcb *vmcb = get_host_vmcb(svm);
+ struct vmcb *vmcb = svm->vmcb01.ptr;
vmcb_set_intercept(&vmcb->control, bit);
static inline void svm_clr_intercept(struct vcpu_svm *svm, int bit)
{
- struct vmcb *vmcb = get_host_vmcb(svm);
+ struct vmcb *vmcb = svm->vmcb01.ptr;
vmcb_clr_intercept(&vmcb->control, bit);
bool has_error_code, u32 error_code);
int nested_svm_exit_special(struct vcpu_svm *svm);
void sync_nested_vmcb_control(struct vcpu_svm *svm);
+void nested_vmcb02_compute_g_pat(struct vcpu_svm *svm);
+void svm_switch_vmcb(struct vcpu_svm *svm, struct kvm_vmcb_info *target_vmcb);
extern struct kvm_x86_nested_ops svm_nested_ops;