KVM: SVM: Add a proper field for Hyper-V VMCB enlightenments
[platform/kernel/linux-starfive.git] / arch / x86 / kvm / svm / nested.c
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Kernel-based Virtual Machine driver for Linux
4  *
5  * AMD SVM support
6  *
7  * Copyright (C) 2006 Qumranet, Inc.
8  * Copyright 2010 Red Hat, Inc. and/or its affiliates.
9  *
10  * Authors:
11  *   Yaniv Kamay  <yaniv@qumranet.com>
12  *   Avi Kivity   <avi@qumranet.com>
13  */
14
15 #define pr_fmt(fmt) "SVM: " fmt
16
17 #include <linux/kvm_types.h>
18 #include <linux/kvm_host.h>
19 #include <linux/kernel.h>
20
21 #include <asm/msr-index.h>
22 #include <asm/debugreg.h>
23
24 #include "kvm_emulate.h"
25 #include "trace.h"
26 #include "mmu.h"
27 #include "x86.h"
28 #include "cpuid.h"
29 #include "lapic.h"
30 #include "svm.h"
31 #include "hyperv.h"
32
33 #define CC KVM_NESTED_VMENTER_CONSISTENCY_CHECK
34
35 static void nested_svm_inject_npf_exit(struct kvm_vcpu *vcpu,
36                                        struct x86_exception *fault)
37 {
38         struct vcpu_svm *svm = to_svm(vcpu);
39         struct vmcb *vmcb = svm->vmcb;
40
41         if (vmcb->control.exit_code != SVM_EXIT_NPF) {
42                 /*
43                  * TODO: track the cause of the nested page fault, and
44                  * correctly fill in the high bits of exit_info_1.
45                  */
46                 vmcb->control.exit_code = SVM_EXIT_NPF;
47                 vmcb->control.exit_code_hi = 0;
48                 vmcb->control.exit_info_1 = (1ULL << 32);
49                 vmcb->control.exit_info_2 = fault->address;
50         }
51
52         vmcb->control.exit_info_1 &= ~0xffffffffULL;
53         vmcb->control.exit_info_1 |= fault->error_code;
54
55         nested_svm_vmexit(svm);
56 }
57
58 static u64 nested_svm_get_tdp_pdptr(struct kvm_vcpu *vcpu, int index)
59 {
60         struct vcpu_svm *svm = to_svm(vcpu);
61         u64 cr3 = svm->nested.ctl.nested_cr3;
62         u64 pdpte;
63         int ret;
64
65         ret = kvm_vcpu_read_guest_page(vcpu, gpa_to_gfn(cr3), &pdpte,
66                                        offset_in_page(cr3) + index * 8, 8);
67         if (ret)
68                 return 0;
69         return pdpte;
70 }
71
72 static unsigned long nested_svm_get_tdp_cr3(struct kvm_vcpu *vcpu)
73 {
74         struct vcpu_svm *svm = to_svm(vcpu);
75
76         return svm->nested.ctl.nested_cr3;
77 }
78
79 static void nested_svm_init_mmu_context(struct kvm_vcpu *vcpu)
80 {
81         struct vcpu_svm *svm = to_svm(vcpu);
82
83         WARN_ON(mmu_is_nested(vcpu));
84
85         vcpu->arch.mmu = &vcpu->arch.guest_mmu;
86
87         /*
88          * The NPT format depends on L1's CR4 and EFER, which is in vmcb01.  Note,
89          * when called via KVM_SET_NESTED_STATE, that state may _not_ match current
90          * vCPU state.  CR0.WP is explicitly ignored, while CR0.PG is required.
91          */
92         kvm_init_shadow_npt_mmu(vcpu, X86_CR0_PG, svm->vmcb01.ptr->save.cr4,
93                                 svm->vmcb01.ptr->save.efer,
94                                 svm->nested.ctl.nested_cr3);
95         vcpu->arch.mmu->get_guest_pgd     = nested_svm_get_tdp_cr3;
96         vcpu->arch.mmu->get_pdptr         = nested_svm_get_tdp_pdptr;
97         vcpu->arch.mmu->inject_page_fault = nested_svm_inject_npf_exit;
98         vcpu->arch.walk_mmu              = &vcpu->arch.nested_mmu;
99 }
100
101 static void nested_svm_uninit_mmu_context(struct kvm_vcpu *vcpu)
102 {
103         vcpu->arch.mmu = &vcpu->arch.root_mmu;
104         vcpu->arch.walk_mmu = &vcpu->arch.root_mmu;
105 }
106
107 static bool nested_vmcb_needs_vls_intercept(struct vcpu_svm *svm)
108 {
109         if (!svm->v_vmload_vmsave_enabled)
110                 return true;
111
112         if (!nested_npt_enabled(svm))
113                 return true;
114
115         if (!(svm->nested.ctl.virt_ext & VIRTUAL_VMLOAD_VMSAVE_ENABLE_MASK))
116                 return true;
117
118         return false;
119 }
120
121 void recalc_intercepts(struct vcpu_svm *svm)
122 {
123         struct vmcb_control_area *c, *h;
124         struct vmcb_ctrl_area_cached *g;
125         unsigned int i;
126
127         vmcb_mark_dirty(svm->vmcb, VMCB_INTERCEPTS);
128
129         if (!is_guest_mode(&svm->vcpu))
130                 return;
131
132         c = &svm->vmcb->control;
133         h = &svm->vmcb01.ptr->control;
134         g = &svm->nested.ctl;
135
136         for (i = 0; i < MAX_INTERCEPT; i++)
137                 c->intercepts[i] = h->intercepts[i];
138
139         if (g->int_ctl & V_INTR_MASKING_MASK) {
140                 /* We only want the cr8 intercept bits of L1 */
141                 vmcb_clr_intercept(c, INTERCEPT_CR8_READ);
142                 vmcb_clr_intercept(c, INTERCEPT_CR8_WRITE);
143
144                 /*
145                  * Once running L2 with HF_VINTR_MASK, EFLAGS.IF does not
146                  * affect any interrupt we may want to inject; therefore,
147                  * interrupt window vmexits are irrelevant to L0.
148                  */
149                 vmcb_clr_intercept(c, INTERCEPT_VINTR);
150         }
151
152         /* We don't want to see VMMCALLs from a nested guest */
153         vmcb_clr_intercept(c, INTERCEPT_VMMCALL);
154
155         for (i = 0; i < MAX_INTERCEPT; i++)
156                 c->intercepts[i] |= g->intercepts[i];
157
158         /* If SMI is not intercepted, ignore guest SMI intercept as well  */
159         if (!intercept_smi)
160                 vmcb_clr_intercept(c, INTERCEPT_SMI);
161
162         if (nested_vmcb_needs_vls_intercept(svm)) {
163                 /*
164                  * If the virtual VMLOAD/VMSAVE is not enabled for the L2,
165                  * we must intercept these instructions to correctly
166                  * emulate them in case L1 doesn't intercept them.
167                  */
168                 vmcb_set_intercept(c, INTERCEPT_VMLOAD);
169                 vmcb_set_intercept(c, INTERCEPT_VMSAVE);
170         } else {
171                 WARN_ON(!(c->virt_ext & VIRTUAL_VMLOAD_VMSAVE_ENABLE_MASK));
172         }
173 }
174
175 /*
176  * Merge L0's (KVM) and L1's (Nested VMCB) MSR permission bitmaps. The function
177  * is optimized in that it only merges the parts where KVM MSR permission bitmap
178  * may contain zero bits.
179  */
180 static bool nested_svm_vmrun_msrpm(struct vcpu_svm *svm)
181 {
182         struct hv_enlightenments *hve = &svm->nested.ctl.hv_enlightenments;
183         int i;
184
185         /*
186          * MSR bitmap update can be skipped when:
187          * - MSR bitmap for L1 hasn't changed.
188          * - Nested hypervisor (L1) is attempting to launch the same L2 as
189          *   before.
190          * - Nested hypervisor (L1) is using Hyper-V emulation interface and
191          * tells KVM (L0) there were no changes in MSR bitmap for L2.
192          */
193         if (!svm->nested.force_msr_bitmap_recalc &&
194             kvm_hv_hypercall_enabled(&svm->vcpu) &&
195             hve->hv_enlightenments_control.msr_bitmap &&
196             (svm->nested.ctl.clean & BIT(HV_VMCB_NESTED_ENLIGHTENMENTS)))
197                 goto set_msrpm_base_pa;
198
199         if (!(vmcb12_is_intercept(&svm->nested.ctl, INTERCEPT_MSR_PROT)))
200                 return true;
201
202         for (i = 0; i < MSRPM_OFFSETS; i++) {
203                 u32 value, p;
204                 u64 offset;
205
206                 if (msrpm_offsets[i] == 0xffffffff)
207                         break;
208
209                 p      = msrpm_offsets[i];
210
211                 /* x2apic msrs are intercepted always for the nested guest */
212                 if (is_x2apic_msrpm_offset(p))
213                         continue;
214
215                 offset = svm->nested.ctl.msrpm_base_pa + (p * 4);
216
217                 if (kvm_vcpu_read_guest(&svm->vcpu, offset, &value, 4))
218                         return false;
219
220                 svm->nested.msrpm[p] = svm->msrpm[p] | value;
221         }
222
223         svm->nested.force_msr_bitmap_recalc = false;
224
225 set_msrpm_base_pa:
226         svm->vmcb->control.msrpm_base_pa = __sme_set(__pa(svm->nested.msrpm));
227
228         return true;
229 }
230
231 /*
232  * Bits 11:0 of bitmap address are ignored by hardware
233  */
234 static bool nested_svm_check_bitmap_pa(struct kvm_vcpu *vcpu, u64 pa, u32 size)
235 {
236         u64 addr = PAGE_ALIGN(pa);
237
238         return kvm_vcpu_is_legal_gpa(vcpu, addr) &&
239             kvm_vcpu_is_legal_gpa(vcpu, addr + size - 1);
240 }
241
242 static bool nested_svm_check_tlb_ctl(struct kvm_vcpu *vcpu, u8 tlb_ctl)
243 {
244         /* Nested FLUSHBYASID is not supported yet.  */
245         switch(tlb_ctl) {
246                 case TLB_CONTROL_DO_NOTHING:
247                 case TLB_CONTROL_FLUSH_ALL_ASID:
248                         return true;
249                 default:
250                         return false;
251         }
252 }
253
254 static bool __nested_vmcb_check_controls(struct kvm_vcpu *vcpu,
255                                          struct vmcb_ctrl_area_cached *control)
256 {
257         if (CC(!vmcb12_is_intercept(control, INTERCEPT_VMRUN)))
258                 return false;
259
260         if (CC(control->asid == 0))
261                 return false;
262
263         if (CC((control->nested_ctl & SVM_NESTED_CTL_NP_ENABLE) && !npt_enabled))
264                 return false;
265
266         if (CC(!nested_svm_check_bitmap_pa(vcpu, control->msrpm_base_pa,
267                                            MSRPM_SIZE)))
268                 return false;
269         if (CC(!nested_svm_check_bitmap_pa(vcpu, control->iopm_base_pa,
270                                            IOPM_SIZE)))
271                 return false;
272
273         if (CC(!nested_svm_check_tlb_ctl(vcpu, control->tlb_ctl)))
274                 return false;
275
276         return true;
277 }
278
279 /* Common checks that apply to both L1 and L2 state.  */
280 static bool __nested_vmcb_check_save(struct kvm_vcpu *vcpu,
281                                      struct vmcb_save_area_cached *save)
282 {
283         if (CC(!(save->efer & EFER_SVME)))
284                 return false;
285
286         if (CC((save->cr0 & X86_CR0_CD) == 0 && (save->cr0 & X86_CR0_NW)) ||
287             CC(save->cr0 & ~0xffffffffULL))
288                 return false;
289
290         if (CC(!kvm_dr6_valid(save->dr6)) || CC(!kvm_dr7_valid(save->dr7)))
291                 return false;
292
293         /*
294          * These checks are also performed by KVM_SET_SREGS,
295          * except that EFER.LMA is not checked by SVM against
296          * CR0.PG && EFER.LME.
297          */
298         if ((save->efer & EFER_LME) && (save->cr0 & X86_CR0_PG)) {
299                 if (CC(!(save->cr4 & X86_CR4_PAE)) ||
300                     CC(!(save->cr0 & X86_CR0_PE)) ||
301                     CC(kvm_vcpu_is_illegal_gpa(vcpu, save->cr3)))
302                         return false;
303         }
304
305         /* Note, SVM doesn't have any additional restrictions on CR4. */
306         if (CC(!__kvm_is_valid_cr4(vcpu, save->cr4)))
307                 return false;
308
309         if (CC(!kvm_valid_efer(vcpu, save->efer)))
310                 return false;
311
312         return true;
313 }
314
315 static bool nested_vmcb_check_save(struct kvm_vcpu *vcpu)
316 {
317         struct vcpu_svm *svm = to_svm(vcpu);
318         struct vmcb_save_area_cached *save = &svm->nested.save;
319
320         return __nested_vmcb_check_save(vcpu, save);
321 }
322
323 static bool nested_vmcb_check_controls(struct kvm_vcpu *vcpu)
324 {
325         struct vcpu_svm *svm = to_svm(vcpu);
326         struct vmcb_ctrl_area_cached *ctl = &svm->nested.ctl;
327
328         return __nested_vmcb_check_controls(vcpu, ctl);
329 }
330
331 static
332 void __nested_copy_vmcb_control_to_cache(struct kvm_vcpu *vcpu,
333                                          struct vmcb_ctrl_area_cached *to,
334                                          struct vmcb_control_area *from)
335 {
336         unsigned int i;
337
338         for (i = 0; i < MAX_INTERCEPT; i++)
339                 to->intercepts[i] = from->intercepts[i];
340
341         to->iopm_base_pa        = from->iopm_base_pa;
342         to->msrpm_base_pa       = from->msrpm_base_pa;
343         to->tsc_offset          = from->tsc_offset;
344         to->tlb_ctl             = from->tlb_ctl;
345         to->int_ctl             = from->int_ctl;
346         to->int_vector          = from->int_vector;
347         to->int_state           = from->int_state;
348         to->exit_code           = from->exit_code;
349         to->exit_code_hi        = from->exit_code_hi;
350         to->exit_info_1         = from->exit_info_1;
351         to->exit_info_2         = from->exit_info_2;
352         to->exit_int_info       = from->exit_int_info;
353         to->exit_int_info_err   = from->exit_int_info_err;
354         to->nested_ctl          = from->nested_ctl;
355         to->event_inj           = from->event_inj;
356         to->event_inj_err       = from->event_inj_err;
357         to->next_rip            = from->next_rip;
358         to->nested_cr3          = from->nested_cr3;
359         to->virt_ext            = from->virt_ext;
360         to->pause_filter_count  = from->pause_filter_count;
361         to->pause_filter_thresh = from->pause_filter_thresh;
362
363         /* Copy asid here because nested_vmcb_check_controls will check it.  */
364         to->asid           = from->asid;
365         to->msrpm_base_pa &= ~0x0fffULL;
366         to->iopm_base_pa  &= ~0x0fffULL;
367
368         /* Hyper-V extensions (Enlightened VMCB) */
369         if (kvm_hv_hypercall_enabled(vcpu)) {
370                 to->clean = from->clean;
371                 memcpy(&to->hv_enlightenments, &from->hv_enlightenments,
372                        sizeof(to->hv_enlightenments));
373         }
374 }
375
376 void nested_copy_vmcb_control_to_cache(struct vcpu_svm *svm,
377                                        struct vmcb_control_area *control)
378 {
379         __nested_copy_vmcb_control_to_cache(&svm->vcpu, &svm->nested.ctl, control);
380 }
381
382 static void __nested_copy_vmcb_save_to_cache(struct vmcb_save_area_cached *to,
383                                              struct vmcb_save_area *from)
384 {
385         /*
386          * Copy only fields that are validated, as we need them
387          * to avoid TOC/TOU races.
388          */
389         to->efer = from->efer;
390         to->cr0 = from->cr0;
391         to->cr3 = from->cr3;
392         to->cr4 = from->cr4;
393
394         to->dr6 = from->dr6;
395         to->dr7 = from->dr7;
396 }
397
398 void nested_copy_vmcb_save_to_cache(struct vcpu_svm *svm,
399                                     struct vmcb_save_area *save)
400 {
401         __nested_copy_vmcb_save_to_cache(&svm->nested.save, save);
402 }
403
404 /*
405  * Synchronize fields that are written by the processor, so that
406  * they can be copied back into the vmcb12.
407  */
408 void nested_sync_control_from_vmcb02(struct vcpu_svm *svm)
409 {
410         u32 mask;
411         svm->nested.ctl.event_inj      = svm->vmcb->control.event_inj;
412         svm->nested.ctl.event_inj_err  = svm->vmcb->control.event_inj_err;
413
414         /* Only a few fields of int_ctl are written by the processor.  */
415         mask = V_IRQ_MASK | V_TPR_MASK;
416         if (!(svm->nested.ctl.int_ctl & V_INTR_MASKING_MASK) &&
417             svm_is_intercept(svm, INTERCEPT_VINTR)) {
418                 /*
419                  * In order to request an interrupt window, L0 is usurping
420                  * svm->vmcb->control.int_ctl and possibly setting V_IRQ
421                  * even if it was clear in L1's VMCB.  Restoring it would be
422                  * wrong.  However, in this case V_IRQ will remain true until
423                  * interrupt_window_interception calls svm_clear_vintr and
424                  * restores int_ctl.  We can just leave it aside.
425                  */
426                 mask &= ~V_IRQ_MASK;
427         }
428
429         if (nested_vgif_enabled(svm))
430                 mask |= V_GIF_MASK;
431
432         svm->nested.ctl.int_ctl        &= ~mask;
433         svm->nested.ctl.int_ctl        |= svm->vmcb->control.int_ctl & mask;
434 }
435
436 /*
437  * Transfer any event that L0 or L1 wanted to inject into L2 to
438  * EXIT_INT_INFO.
439  */
440 static void nested_save_pending_event_to_vmcb12(struct vcpu_svm *svm,
441                                                 struct vmcb *vmcb12)
442 {
443         struct kvm_vcpu *vcpu = &svm->vcpu;
444         u32 exit_int_info = 0;
445         unsigned int nr;
446
447         if (vcpu->arch.exception.injected) {
448                 nr = vcpu->arch.exception.vector;
449                 exit_int_info = nr | SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_EXEPT;
450
451                 if (vcpu->arch.exception.has_error_code) {
452                         exit_int_info |= SVM_EVTINJ_VALID_ERR;
453                         vmcb12->control.exit_int_info_err =
454                                 vcpu->arch.exception.error_code;
455                 }
456
457         } else if (vcpu->arch.nmi_injected) {
458                 exit_int_info = SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_NMI;
459
460         } else if (vcpu->arch.interrupt.injected) {
461                 nr = vcpu->arch.interrupt.nr;
462                 exit_int_info = nr | SVM_EVTINJ_VALID;
463
464                 if (vcpu->arch.interrupt.soft)
465                         exit_int_info |= SVM_EVTINJ_TYPE_SOFT;
466                 else
467                         exit_int_info |= SVM_EVTINJ_TYPE_INTR;
468         }
469
470         vmcb12->control.exit_int_info = exit_int_info;
471 }
472
473 static void nested_svm_transition_tlb_flush(struct kvm_vcpu *vcpu)
474 {
475         /*
476          * TODO: optimize unconditional TLB flush/MMU sync.  A partial list of
477          * things to fix before this can be conditional:
478          *
479          *  - Flush TLBs for both L1 and L2 remote TLB flush
480          *  - Honor L1's request to flush an ASID on nested VMRUN
481          *  - Sync nested NPT MMU on VMRUN that flushes L2's ASID[*]
482          *  - Don't crush a pending TLB flush in vmcb02 on nested VMRUN
483          *  - Flush L1's ASID on KVM_REQ_TLB_FLUSH_GUEST
484          *
485          * [*] Unlike nested EPT, SVM's ASID management can invalidate nested
486          *     NPT guest-physical mappings on VMRUN.
487          */
488         kvm_make_request(KVM_REQ_MMU_SYNC, vcpu);
489         kvm_make_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu);
490 }
491
492 /*
493  * Load guest's/host's cr3 on nested vmentry or vmexit. @nested_npt is true
494  * if we are emulating VM-Entry into a guest with NPT enabled.
495  */
496 static int nested_svm_load_cr3(struct kvm_vcpu *vcpu, unsigned long cr3,
497                                bool nested_npt, bool reload_pdptrs)
498 {
499         if (CC(kvm_vcpu_is_illegal_gpa(vcpu, cr3)))
500                 return -EINVAL;
501
502         if (reload_pdptrs && !nested_npt && is_pae_paging(vcpu) &&
503             CC(!load_pdptrs(vcpu, cr3)))
504                 return -EINVAL;
505
506         vcpu->arch.cr3 = cr3;
507
508         /* Re-initialize the MMU, e.g. to pick up CR4 MMU role changes. */
509         kvm_init_mmu(vcpu);
510
511         if (!nested_npt)
512                 kvm_mmu_new_pgd(vcpu, cr3);
513
514         return 0;
515 }
516
517 void nested_vmcb02_compute_g_pat(struct vcpu_svm *svm)
518 {
519         if (!svm->nested.vmcb02.ptr)
520                 return;
521
522         /* FIXME: merge g_pat from vmcb01 and vmcb12.  */
523         svm->nested.vmcb02.ptr->save.g_pat = svm->vmcb01.ptr->save.g_pat;
524 }
525
526 static void nested_vmcb02_prepare_save(struct vcpu_svm *svm, struct vmcb *vmcb12)
527 {
528         bool new_vmcb12 = false;
529         struct vmcb *vmcb01 = svm->vmcb01.ptr;
530         struct vmcb *vmcb02 = svm->nested.vmcb02.ptr;
531
532         nested_vmcb02_compute_g_pat(svm);
533
534         /* Load the nested guest state */
535         if (svm->nested.vmcb12_gpa != svm->nested.last_vmcb12_gpa) {
536                 new_vmcb12 = true;
537                 svm->nested.last_vmcb12_gpa = svm->nested.vmcb12_gpa;
538                 svm->nested.force_msr_bitmap_recalc = true;
539         }
540
541         if (unlikely(new_vmcb12 || vmcb_is_dirty(vmcb12, VMCB_SEG))) {
542                 vmcb02->save.es = vmcb12->save.es;
543                 vmcb02->save.cs = vmcb12->save.cs;
544                 vmcb02->save.ss = vmcb12->save.ss;
545                 vmcb02->save.ds = vmcb12->save.ds;
546                 vmcb02->save.cpl = vmcb12->save.cpl;
547                 vmcb_mark_dirty(vmcb02, VMCB_SEG);
548         }
549
550         if (unlikely(new_vmcb12 || vmcb_is_dirty(vmcb12, VMCB_DT))) {
551                 vmcb02->save.gdtr = vmcb12->save.gdtr;
552                 vmcb02->save.idtr = vmcb12->save.idtr;
553                 vmcb_mark_dirty(vmcb02, VMCB_DT);
554         }
555
556         kvm_set_rflags(&svm->vcpu, vmcb12->save.rflags | X86_EFLAGS_FIXED);
557
558         svm_set_efer(&svm->vcpu, svm->nested.save.efer);
559
560         svm_set_cr0(&svm->vcpu, svm->nested.save.cr0);
561         svm_set_cr4(&svm->vcpu, svm->nested.save.cr4);
562
563         svm->vcpu.arch.cr2 = vmcb12->save.cr2;
564
565         kvm_rax_write(&svm->vcpu, vmcb12->save.rax);
566         kvm_rsp_write(&svm->vcpu, vmcb12->save.rsp);
567         kvm_rip_write(&svm->vcpu, vmcb12->save.rip);
568
569         /* In case we don't even reach vcpu_run, the fields are not updated */
570         vmcb02->save.rax = vmcb12->save.rax;
571         vmcb02->save.rsp = vmcb12->save.rsp;
572         vmcb02->save.rip = vmcb12->save.rip;
573
574         /* These bits will be set properly on the first execution when new_vmc12 is true */
575         if (unlikely(new_vmcb12 || vmcb_is_dirty(vmcb12, VMCB_DR))) {
576                 vmcb02->save.dr7 = svm->nested.save.dr7 | DR7_FIXED_1;
577                 svm->vcpu.arch.dr6  = svm->nested.save.dr6 | DR6_ACTIVE_LOW;
578                 vmcb_mark_dirty(vmcb02, VMCB_DR);
579         }
580
581         if (unlikely(svm->lbrv_enabled && (svm->nested.ctl.virt_ext & LBR_CTL_ENABLE_MASK))) {
582                 /*
583                  * Reserved bits of DEBUGCTL are ignored.  Be consistent with
584                  * svm_set_msr's definition of reserved bits.
585                  */
586                 svm_copy_lbrs(vmcb02, vmcb12);
587                 vmcb02->save.dbgctl &= ~DEBUGCTL_RESERVED_BITS;
588                 svm_update_lbrv(&svm->vcpu);
589
590         } else if (unlikely(vmcb01->control.virt_ext & LBR_CTL_ENABLE_MASK)) {
591                 svm_copy_lbrs(vmcb02, vmcb01);
592         }
593 }
594
595 static inline bool is_evtinj_soft(u32 evtinj)
596 {
597         u32 type = evtinj & SVM_EVTINJ_TYPE_MASK;
598         u8 vector = evtinj & SVM_EVTINJ_VEC_MASK;
599
600         if (!(evtinj & SVM_EVTINJ_VALID))
601                 return false;
602
603         if (type == SVM_EVTINJ_TYPE_SOFT)
604                 return true;
605
606         return type == SVM_EVTINJ_TYPE_EXEPT && kvm_exception_is_soft(vector);
607 }
608
609 static bool is_evtinj_nmi(u32 evtinj)
610 {
611         u32 type = evtinj & SVM_EVTINJ_TYPE_MASK;
612
613         if (!(evtinj & SVM_EVTINJ_VALID))
614                 return false;
615
616         return type == SVM_EVTINJ_TYPE_NMI;
617 }
618
619 static void nested_vmcb02_prepare_control(struct vcpu_svm *svm,
620                                           unsigned long vmcb12_rip,
621                                           unsigned long vmcb12_csbase)
622 {
623         u32 int_ctl_vmcb01_bits = V_INTR_MASKING_MASK;
624         u32 int_ctl_vmcb12_bits = V_TPR_MASK | V_IRQ_INJECTION_BITS_MASK;
625
626         struct kvm_vcpu *vcpu = &svm->vcpu;
627         struct vmcb *vmcb01 = svm->vmcb01.ptr;
628         struct vmcb *vmcb02 = svm->nested.vmcb02.ptr;
629         u32 pause_count12;
630         u32 pause_thresh12;
631
632         /*
633          * Filled at exit: exit_code, exit_code_hi, exit_info_1, exit_info_2,
634          * exit_int_info, exit_int_info_err, next_rip, insn_len, insn_bytes.
635          */
636
637         if (svm->vgif_enabled && (svm->nested.ctl.int_ctl & V_GIF_ENABLE_MASK))
638                 int_ctl_vmcb12_bits |= (V_GIF_MASK | V_GIF_ENABLE_MASK);
639         else
640                 int_ctl_vmcb01_bits |= (V_GIF_MASK | V_GIF_ENABLE_MASK);
641
642         /* Copied from vmcb01.  msrpm_base can be overwritten later.  */
643         vmcb02->control.nested_ctl = vmcb01->control.nested_ctl;
644         vmcb02->control.iopm_base_pa = vmcb01->control.iopm_base_pa;
645         vmcb02->control.msrpm_base_pa = vmcb01->control.msrpm_base_pa;
646
647         /* Done at vmrun: asid.  */
648
649         /* Also overwritten later if necessary.  */
650         vmcb02->control.tlb_ctl = TLB_CONTROL_DO_NOTHING;
651
652         /* nested_cr3.  */
653         if (nested_npt_enabled(svm))
654                 nested_svm_init_mmu_context(vcpu);
655
656         vcpu->arch.tsc_offset = kvm_calc_nested_tsc_offset(
657                         vcpu->arch.l1_tsc_offset,
658                         svm->nested.ctl.tsc_offset,
659                         svm->tsc_ratio_msr);
660
661         vmcb02->control.tsc_offset = vcpu->arch.tsc_offset;
662
663         if (svm->tsc_ratio_msr != kvm_caps.default_tsc_scaling_ratio) {
664                 WARN_ON(!svm->tsc_scaling_enabled);
665                 nested_svm_update_tsc_ratio_msr(vcpu);
666         }
667
668         vmcb02->control.int_ctl             =
669                 (svm->nested.ctl.int_ctl & int_ctl_vmcb12_bits) |
670                 (vmcb01->control.int_ctl & int_ctl_vmcb01_bits);
671
672         vmcb02->control.int_vector          = svm->nested.ctl.int_vector;
673         vmcb02->control.int_state           = svm->nested.ctl.int_state;
674         vmcb02->control.event_inj           = svm->nested.ctl.event_inj;
675         vmcb02->control.event_inj_err       = svm->nested.ctl.event_inj_err;
676
677         /*
678          * next_rip is consumed on VMRUN as the return address pushed on the
679          * stack for injected soft exceptions/interrupts.  If nrips is exposed
680          * to L1, take it verbatim from vmcb12.  If nrips is supported in
681          * hardware but not exposed to L1, stuff the actual L2 RIP to emulate
682          * what a nrips=0 CPU would do (L1 is responsible for advancing RIP
683          * prior to injecting the event).
684          */
685         if (svm->nrips_enabled)
686                 vmcb02->control.next_rip    = svm->nested.ctl.next_rip;
687         else if (boot_cpu_has(X86_FEATURE_NRIPS))
688                 vmcb02->control.next_rip    = vmcb12_rip;
689
690         svm->nmi_l1_to_l2 = is_evtinj_nmi(vmcb02->control.event_inj);
691         if (is_evtinj_soft(vmcb02->control.event_inj)) {
692                 svm->soft_int_injected = true;
693                 svm->soft_int_csbase = vmcb12_csbase;
694                 svm->soft_int_old_rip = vmcb12_rip;
695                 if (svm->nrips_enabled)
696                         svm->soft_int_next_rip = svm->nested.ctl.next_rip;
697                 else
698                         svm->soft_int_next_rip = vmcb12_rip;
699         }
700
701         vmcb02->control.virt_ext            = vmcb01->control.virt_ext &
702                                               LBR_CTL_ENABLE_MASK;
703         if (svm->lbrv_enabled)
704                 vmcb02->control.virt_ext  |=
705                         (svm->nested.ctl.virt_ext & LBR_CTL_ENABLE_MASK);
706
707         if (!nested_vmcb_needs_vls_intercept(svm))
708                 vmcb02->control.virt_ext |= VIRTUAL_VMLOAD_VMSAVE_ENABLE_MASK;
709
710         pause_count12 = svm->pause_filter_enabled ? svm->nested.ctl.pause_filter_count : 0;
711         pause_thresh12 = svm->pause_threshold_enabled ? svm->nested.ctl.pause_filter_thresh : 0;
712         if (kvm_pause_in_guest(svm->vcpu.kvm)) {
713                 /* use guest values since host doesn't intercept PAUSE */
714                 vmcb02->control.pause_filter_count = pause_count12;
715                 vmcb02->control.pause_filter_thresh = pause_thresh12;
716
717         } else {
718                 /* start from host values otherwise */
719                 vmcb02->control.pause_filter_count = vmcb01->control.pause_filter_count;
720                 vmcb02->control.pause_filter_thresh = vmcb01->control.pause_filter_thresh;
721
722                 /* ... but ensure filtering is disabled if so requested.  */
723                 if (vmcb12_is_intercept(&svm->nested.ctl, INTERCEPT_PAUSE)) {
724                         if (!pause_count12)
725                                 vmcb02->control.pause_filter_count = 0;
726                         if (!pause_thresh12)
727                                 vmcb02->control.pause_filter_thresh = 0;
728                 }
729         }
730
731         nested_svm_transition_tlb_flush(vcpu);
732
733         /* Enter Guest-Mode */
734         enter_guest_mode(vcpu);
735
736         /*
737          * Merge guest and host intercepts - must be called with vcpu in
738          * guest-mode to take effect.
739          */
740         recalc_intercepts(svm);
741 }
742
743 static void nested_svm_copy_common_state(struct vmcb *from_vmcb, struct vmcb *to_vmcb)
744 {
745         /*
746          * Some VMCB state is shared between L1 and L2 and thus has to be
747          * moved at the time of nested vmrun and vmexit.
748          *
749          * VMLOAD/VMSAVE state would also belong in this category, but KVM
750          * always performs VMLOAD and VMSAVE from the VMCB01.
751          */
752         to_vmcb->save.spec_ctrl = from_vmcb->save.spec_ctrl;
753 }
754
755 int enter_svm_guest_mode(struct kvm_vcpu *vcpu, u64 vmcb12_gpa,
756                          struct vmcb *vmcb12, bool from_vmrun)
757 {
758         struct vcpu_svm *svm = to_svm(vcpu);
759         int ret;
760
761         trace_kvm_nested_vmenter(svm->vmcb->save.rip,
762                                  vmcb12_gpa,
763                                  vmcb12->save.rip,
764                                  vmcb12->control.int_ctl,
765                                  vmcb12->control.event_inj,
766                                  vmcb12->control.nested_ctl,
767                                  vmcb12->control.nested_cr3,
768                                  vmcb12->save.cr3,
769                                  KVM_ISA_SVM);
770
771         trace_kvm_nested_intercepts(vmcb12->control.intercepts[INTERCEPT_CR] & 0xffff,
772                                     vmcb12->control.intercepts[INTERCEPT_CR] >> 16,
773                                     vmcb12->control.intercepts[INTERCEPT_EXCEPTION],
774                                     vmcb12->control.intercepts[INTERCEPT_WORD3],
775                                     vmcb12->control.intercepts[INTERCEPT_WORD4],
776                                     vmcb12->control.intercepts[INTERCEPT_WORD5]);
777
778
779         svm->nested.vmcb12_gpa = vmcb12_gpa;
780
781         WARN_ON(svm->vmcb == svm->nested.vmcb02.ptr);
782
783         nested_svm_copy_common_state(svm->vmcb01.ptr, svm->nested.vmcb02.ptr);
784
785         svm_switch_vmcb(svm, &svm->nested.vmcb02);
786         nested_vmcb02_prepare_control(svm, vmcb12->save.rip, vmcb12->save.cs.base);
787         nested_vmcb02_prepare_save(svm, vmcb12);
788
789         ret = nested_svm_load_cr3(&svm->vcpu, svm->nested.save.cr3,
790                                   nested_npt_enabled(svm), from_vmrun);
791         if (ret)
792                 return ret;
793
794         if (!from_vmrun)
795                 kvm_make_request(KVM_REQ_GET_NESTED_STATE_PAGES, vcpu);
796
797         svm_set_gif(svm, true);
798
799         if (kvm_vcpu_apicv_active(vcpu))
800                 kvm_make_request(KVM_REQ_APICV_UPDATE, vcpu);
801
802         return 0;
803 }
804
805 int nested_svm_vmrun(struct kvm_vcpu *vcpu)
806 {
807         struct vcpu_svm *svm = to_svm(vcpu);
808         int ret;
809         struct vmcb *vmcb12;
810         struct kvm_host_map map;
811         u64 vmcb12_gpa;
812         struct vmcb *vmcb01 = svm->vmcb01.ptr;
813
814         if (!svm->nested.hsave_msr) {
815                 kvm_inject_gp(vcpu, 0);
816                 return 1;
817         }
818
819         if (is_smm(vcpu)) {
820                 kvm_queue_exception(vcpu, UD_VECTOR);
821                 return 1;
822         }
823
824         vmcb12_gpa = svm->vmcb->save.rax;
825         ret = kvm_vcpu_map(vcpu, gpa_to_gfn(vmcb12_gpa), &map);
826         if (ret == -EINVAL) {
827                 kvm_inject_gp(vcpu, 0);
828                 return 1;
829         } else if (ret) {
830                 return kvm_skip_emulated_instruction(vcpu);
831         }
832
833         ret = kvm_skip_emulated_instruction(vcpu);
834
835         vmcb12 = map.hva;
836
837         if (WARN_ON_ONCE(!svm->nested.initialized))
838                 return -EINVAL;
839
840         nested_copy_vmcb_control_to_cache(svm, &vmcb12->control);
841         nested_copy_vmcb_save_to_cache(svm, &vmcb12->save);
842
843         if (!nested_vmcb_check_save(vcpu) ||
844             !nested_vmcb_check_controls(vcpu)) {
845                 vmcb12->control.exit_code    = SVM_EXIT_ERR;
846                 vmcb12->control.exit_code_hi = 0;
847                 vmcb12->control.exit_info_1  = 0;
848                 vmcb12->control.exit_info_2  = 0;
849                 goto out;
850         }
851
852         /*
853          * Since vmcb01 is not in use, we can use it to store some of the L1
854          * state.
855          */
856         vmcb01->save.efer   = vcpu->arch.efer;
857         vmcb01->save.cr0    = kvm_read_cr0(vcpu);
858         vmcb01->save.cr4    = vcpu->arch.cr4;
859         vmcb01->save.rflags = kvm_get_rflags(vcpu);
860         vmcb01->save.rip    = kvm_rip_read(vcpu);
861
862         if (!npt_enabled)
863                 vmcb01->save.cr3 = kvm_read_cr3(vcpu);
864
865         svm->nested.nested_run_pending = 1;
866
867         if (enter_svm_guest_mode(vcpu, vmcb12_gpa, vmcb12, true))
868                 goto out_exit_err;
869
870         if (nested_svm_vmrun_msrpm(svm))
871                 goto out;
872
873 out_exit_err:
874         svm->nested.nested_run_pending = 0;
875         svm->nmi_l1_to_l2 = false;
876         svm->soft_int_injected = false;
877
878         svm->vmcb->control.exit_code    = SVM_EXIT_ERR;
879         svm->vmcb->control.exit_code_hi = 0;
880         svm->vmcb->control.exit_info_1  = 0;
881         svm->vmcb->control.exit_info_2  = 0;
882
883         nested_svm_vmexit(svm);
884
885 out:
886         kvm_vcpu_unmap(vcpu, &map, true);
887
888         return ret;
889 }
890
891 /* Copy state save area fields which are handled by VMRUN */
892 void svm_copy_vmrun_state(struct vmcb_save_area *to_save,
893                           struct vmcb_save_area *from_save)
894 {
895         to_save->es = from_save->es;
896         to_save->cs = from_save->cs;
897         to_save->ss = from_save->ss;
898         to_save->ds = from_save->ds;
899         to_save->gdtr = from_save->gdtr;
900         to_save->idtr = from_save->idtr;
901         to_save->rflags = from_save->rflags | X86_EFLAGS_FIXED;
902         to_save->efer = from_save->efer;
903         to_save->cr0 = from_save->cr0;
904         to_save->cr3 = from_save->cr3;
905         to_save->cr4 = from_save->cr4;
906         to_save->rax = from_save->rax;
907         to_save->rsp = from_save->rsp;
908         to_save->rip = from_save->rip;
909         to_save->cpl = 0;
910 }
911
912 void svm_copy_vmloadsave_state(struct vmcb *to_vmcb, struct vmcb *from_vmcb)
913 {
914         to_vmcb->save.fs = from_vmcb->save.fs;
915         to_vmcb->save.gs = from_vmcb->save.gs;
916         to_vmcb->save.tr = from_vmcb->save.tr;
917         to_vmcb->save.ldtr = from_vmcb->save.ldtr;
918         to_vmcb->save.kernel_gs_base = from_vmcb->save.kernel_gs_base;
919         to_vmcb->save.star = from_vmcb->save.star;
920         to_vmcb->save.lstar = from_vmcb->save.lstar;
921         to_vmcb->save.cstar = from_vmcb->save.cstar;
922         to_vmcb->save.sfmask = from_vmcb->save.sfmask;
923         to_vmcb->save.sysenter_cs = from_vmcb->save.sysenter_cs;
924         to_vmcb->save.sysenter_esp = from_vmcb->save.sysenter_esp;
925         to_vmcb->save.sysenter_eip = from_vmcb->save.sysenter_eip;
926 }
927
928 int nested_svm_vmexit(struct vcpu_svm *svm)
929 {
930         struct kvm_vcpu *vcpu = &svm->vcpu;
931         struct vmcb *vmcb01 = svm->vmcb01.ptr;
932         struct vmcb *vmcb02 = svm->nested.vmcb02.ptr;
933         struct vmcb *vmcb12;
934         struct kvm_host_map map;
935         int rc;
936
937         rc = kvm_vcpu_map(vcpu, gpa_to_gfn(svm->nested.vmcb12_gpa), &map);
938         if (rc) {
939                 if (rc == -EINVAL)
940                         kvm_inject_gp(vcpu, 0);
941                 return 1;
942         }
943
944         vmcb12 = map.hva;
945
946         /* Exit Guest-Mode */
947         leave_guest_mode(vcpu);
948         svm->nested.vmcb12_gpa = 0;
949         WARN_ON_ONCE(svm->nested.nested_run_pending);
950
951         kvm_clear_request(KVM_REQ_GET_NESTED_STATE_PAGES, vcpu);
952
953         /* in case we halted in L2 */
954         svm->vcpu.arch.mp_state = KVM_MP_STATE_RUNNABLE;
955
956         /* Give the current vmcb to the guest */
957
958         vmcb12->save.es     = vmcb02->save.es;
959         vmcb12->save.cs     = vmcb02->save.cs;
960         vmcb12->save.ss     = vmcb02->save.ss;
961         vmcb12->save.ds     = vmcb02->save.ds;
962         vmcb12->save.gdtr   = vmcb02->save.gdtr;
963         vmcb12->save.idtr   = vmcb02->save.idtr;
964         vmcb12->save.efer   = svm->vcpu.arch.efer;
965         vmcb12->save.cr0    = kvm_read_cr0(vcpu);
966         vmcb12->save.cr3    = kvm_read_cr3(vcpu);
967         vmcb12->save.cr2    = vmcb02->save.cr2;
968         vmcb12->save.cr4    = svm->vcpu.arch.cr4;
969         vmcb12->save.rflags = kvm_get_rflags(vcpu);
970         vmcb12->save.rip    = kvm_rip_read(vcpu);
971         vmcb12->save.rsp    = kvm_rsp_read(vcpu);
972         vmcb12->save.rax    = kvm_rax_read(vcpu);
973         vmcb12->save.dr7    = vmcb02->save.dr7;
974         vmcb12->save.dr6    = svm->vcpu.arch.dr6;
975         vmcb12->save.cpl    = vmcb02->save.cpl;
976
977         vmcb12->control.int_state         = vmcb02->control.int_state;
978         vmcb12->control.exit_code         = vmcb02->control.exit_code;
979         vmcb12->control.exit_code_hi      = vmcb02->control.exit_code_hi;
980         vmcb12->control.exit_info_1       = vmcb02->control.exit_info_1;
981         vmcb12->control.exit_info_2       = vmcb02->control.exit_info_2;
982
983         if (vmcb12->control.exit_code != SVM_EXIT_ERR)
984                 nested_save_pending_event_to_vmcb12(svm, vmcb12);
985
986         if (svm->nrips_enabled)
987                 vmcb12->control.next_rip  = vmcb02->control.next_rip;
988
989         vmcb12->control.int_ctl           = svm->nested.ctl.int_ctl;
990         vmcb12->control.tlb_ctl           = svm->nested.ctl.tlb_ctl;
991         vmcb12->control.event_inj         = svm->nested.ctl.event_inj;
992         vmcb12->control.event_inj_err     = svm->nested.ctl.event_inj_err;
993
994         if (!kvm_pause_in_guest(vcpu->kvm)) {
995                 vmcb01->control.pause_filter_count = vmcb02->control.pause_filter_count;
996                 vmcb_mark_dirty(vmcb01, VMCB_INTERCEPTS);
997
998         }
999
1000         nested_svm_copy_common_state(svm->nested.vmcb02.ptr, svm->vmcb01.ptr);
1001
1002         svm_switch_vmcb(svm, &svm->vmcb01);
1003
1004         if (unlikely(svm->lbrv_enabled && (svm->nested.ctl.virt_ext & LBR_CTL_ENABLE_MASK))) {
1005                 svm_copy_lbrs(vmcb12, vmcb02);
1006                 svm_update_lbrv(vcpu);
1007         } else if (unlikely(vmcb01->control.virt_ext & LBR_CTL_ENABLE_MASK)) {
1008                 svm_copy_lbrs(vmcb01, vmcb02);
1009                 svm_update_lbrv(vcpu);
1010         }
1011
1012         /*
1013          * On vmexit the  GIF is set to false and
1014          * no event can be injected in L1.
1015          */
1016         svm_set_gif(svm, false);
1017         vmcb01->control.exit_int_info = 0;
1018
1019         svm->vcpu.arch.tsc_offset = svm->vcpu.arch.l1_tsc_offset;
1020         if (vmcb01->control.tsc_offset != svm->vcpu.arch.tsc_offset) {
1021                 vmcb01->control.tsc_offset = svm->vcpu.arch.tsc_offset;
1022                 vmcb_mark_dirty(vmcb01, VMCB_INTERCEPTS);
1023         }
1024
1025         if (svm->tsc_ratio_msr != kvm_caps.default_tsc_scaling_ratio) {
1026                 WARN_ON(!svm->tsc_scaling_enabled);
1027                 vcpu->arch.tsc_scaling_ratio = vcpu->arch.l1_tsc_scaling_ratio;
1028                 __svm_write_tsc_multiplier(vcpu->arch.tsc_scaling_ratio);
1029         }
1030
1031         svm->nested.ctl.nested_cr3 = 0;
1032
1033         /*
1034          * Restore processor state that had been saved in vmcb01
1035          */
1036         kvm_set_rflags(vcpu, vmcb01->save.rflags);
1037         svm_set_efer(vcpu, vmcb01->save.efer);
1038         svm_set_cr0(vcpu, vmcb01->save.cr0 | X86_CR0_PE);
1039         svm_set_cr4(vcpu, vmcb01->save.cr4);
1040         kvm_rax_write(vcpu, vmcb01->save.rax);
1041         kvm_rsp_write(vcpu, vmcb01->save.rsp);
1042         kvm_rip_write(vcpu, vmcb01->save.rip);
1043
1044         svm->vcpu.arch.dr7 = DR7_FIXED_1;
1045         kvm_update_dr7(&svm->vcpu);
1046
1047         trace_kvm_nested_vmexit_inject(vmcb12->control.exit_code,
1048                                        vmcb12->control.exit_info_1,
1049                                        vmcb12->control.exit_info_2,
1050                                        vmcb12->control.exit_int_info,
1051                                        vmcb12->control.exit_int_info_err,
1052                                        KVM_ISA_SVM);
1053
1054         kvm_vcpu_unmap(vcpu, &map, true);
1055
1056         nested_svm_transition_tlb_flush(vcpu);
1057
1058         nested_svm_uninit_mmu_context(vcpu);
1059
1060         rc = nested_svm_load_cr3(vcpu, vmcb01->save.cr3, false, true);
1061         if (rc)
1062                 return 1;
1063
1064         /*
1065          * Drop what we picked up for L2 via svm_complete_interrupts() so it
1066          * doesn't end up in L1.
1067          */
1068         svm->vcpu.arch.nmi_injected = false;
1069         kvm_clear_exception_queue(vcpu);
1070         kvm_clear_interrupt_queue(vcpu);
1071
1072         /*
1073          * If we are here following the completion of a VMRUN that
1074          * is being single-stepped, queue the pending #DB intercept
1075          * right now so that it an be accounted for before we execute
1076          * L1's next instruction.
1077          */
1078         if (unlikely(vmcb01->save.rflags & X86_EFLAGS_TF))
1079                 kvm_queue_exception(&(svm->vcpu), DB_VECTOR);
1080
1081         /*
1082          * Un-inhibit the AVIC right away, so that other vCPUs can start
1083          * to benefit from it right away.
1084          */
1085         if (kvm_apicv_activated(vcpu->kvm))
1086                 kvm_vcpu_update_apicv(vcpu);
1087
1088         return 0;
1089 }
1090
1091 static void nested_svm_triple_fault(struct kvm_vcpu *vcpu)
1092 {
1093         struct vcpu_svm *svm = to_svm(vcpu);
1094
1095         if (!vmcb12_is_intercept(&svm->nested.ctl, INTERCEPT_SHUTDOWN))
1096                 return;
1097
1098         kvm_clear_request(KVM_REQ_TRIPLE_FAULT, vcpu);
1099         nested_svm_simple_vmexit(to_svm(vcpu), SVM_EXIT_SHUTDOWN);
1100 }
1101
1102 int svm_allocate_nested(struct vcpu_svm *svm)
1103 {
1104         struct page *vmcb02_page;
1105
1106         if (svm->nested.initialized)
1107                 return 0;
1108
1109         vmcb02_page = alloc_page(GFP_KERNEL_ACCOUNT | __GFP_ZERO);
1110         if (!vmcb02_page)
1111                 return -ENOMEM;
1112         svm->nested.vmcb02.ptr = page_address(vmcb02_page);
1113         svm->nested.vmcb02.pa = __sme_set(page_to_pfn(vmcb02_page) << PAGE_SHIFT);
1114
1115         svm->nested.msrpm = svm_vcpu_alloc_msrpm();
1116         if (!svm->nested.msrpm)
1117                 goto err_free_vmcb02;
1118         svm_vcpu_init_msrpm(&svm->vcpu, svm->nested.msrpm);
1119
1120         svm->nested.initialized = true;
1121         return 0;
1122
1123 err_free_vmcb02:
1124         __free_page(vmcb02_page);
1125         return -ENOMEM;
1126 }
1127
1128 void svm_free_nested(struct vcpu_svm *svm)
1129 {
1130         if (!svm->nested.initialized)
1131                 return;
1132
1133         if (WARN_ON_ONCE(svm->vmcb != svm->vmcb01.ptr))
1134                 svm_switch_vmcb(svm, &svm->vmcb01);
1135
1136         svm_vcpu_free_msrpm(svm->nested.msrpm);
1137         svm->nested.msrpm = NULL;
1138
1139         __free_page(virt_to_page(svm->nested.vmcb02.ptr));
1140         svm->nested.vmcb02.ptr = NULL;
1141
1142         /*
1143          * When last_vmcb12_gpa matches the current vmcb12 gpa,
1144          * some vmcb12 fields are not loaded if they are marked clean
1145          * in the vmcb12, since in this case they are up to date already.
1146          *
1147          * When the vmcb02 is freed, this optimization becomes invalid.
1148          */
1149         svm->nested.last_vmcb12_gpa = INVALID_GPA;
1150
1151         svm->nested.initialized = false;
1152 }
1153
1154 void svm_leave_nested(struct kvm_vcpu *vcpu)
1155 {
1156         struct vcpu_svm *svm = to_svm(vcpu);
1157
1158         if (is_guest_mode(vcpu)) {
1159                 svm->nested.nested_run_pending = 0;
1160                 svm->nested.vmcb12_gpa = INVALID_GPA;
1161
1162                 leave_guest_mode(vcpu);
1163
1164                 svm_switch_vmcb(svm, &svm->vmcb01);
1165
1166                 nested_svm_uninit_mmu_context(vcpu);
1167                 vmcb_mark_all_dirty(svm->vmcb);
1168         }
1169
1170         kvm_clear_request(KVM_REQ_GET_NESTED_STATE_PAGES, vcpu);
1171 }
1172
1173 static int nested_svm_exit_handled_msr(struct vcpu_svm *svm)
1174 {
1175         u32 offset, msr, value;
1176         int write, mask;
1177
1178         if (!(vmcb12_is_intercept(&svm->nested.ctl, INTERCEPT_MSR_PROT)))
1179                 return NESTED_EXIT_HOST;
1180
1181         msr    = svm->vcpu.arch.regs[VCPU_REGS_RCX];
1182         offset = svm_msrpm_offset(msr);
1183         write  = svm->vmcb->control.exit_info_1 & 1;
1184         mask   = 1 << ((2 * (msr & 0xf)) + write);
1185
1186         if (offset == MSR_INVALID)
1187                 return NESTED_EXIT_DONE;
1188
1189         /* Offset is in 32 bit units but need in 8 bit units */
1190         offset *= 4;
1191
1192         if (kvm_vcpu_read_guest(&svm->vcpu, svm->nested.ctl.msrpm_base_pa + offset, &value, 4))
1193                 return NESTED_EXIT_DONE;
1194
1195         return (value & mask) ? NESTED_EXIT_DONE : NESTED_EXIT_HOST;
1196 }
1197
1198 static int nested_svm_intercept_ioio(struct vcpu_svm *svm)
1199 {
1200         unsigned port, size, iopm_len;
1201         u16 val, mask;
1202         u8 start_bit;
1203         u64 gpa;
1204
1205         if (!(vmcb12_is_intercept(&svm->nested.ctl, INTERCEPT_IOIO_PROT)))
1206                 return NESTED_EXIT_HOST;
1207
1208         port = svm->vmcb->control.exit_info_1 >> 16;
1209         size = (svm->vmcb->control.exit_info_1 & SVM_IOIO_SIZE_MASK) >>
1210                 SVM_IOIO_SIZE_SHIFT;
1211         gpa  = svm->nested.ctl.iopm_base_pa + (port / 8);
1212         start_bit = port % 8;
1213         iopm_len = (start_bit + size > 8) ? 2 : 1;
1214         mask = (0xf >> (4 - size)) << start_bit;
1215         val = 0;
1216
1217         if (kvm_vcpu_read_guest(&svm->vcpu, gpa, &val, iopm_len))
1218                 return NESTED_EXIT_DONE;
1219
1220         return (val & mask) ? NESTED_EXIT_DONE : NESTED_EXIT_HOST;
1221 }
1222
1223 static int nested_svm_intercept(struct vcpu_svm *svm)
1224 {
1225         u32 exit_code = svm->vmcb->control.exit_code;
1226         int vmexit = NESTED_EXIT_HOST;
1227
1228         switch (exit_code) {
1229         case SVM_EXIT_MSR:
1230                 vmexit = nested_svm_exit_handled_msr(svm);
1231                 break;
1232         case SVM_EXIT_IOIO:
1233                 vmexit = nested_svm_intercept_ioio(svm);
1234                 break;
1235         case SVM_EXIT_READ_CR0 ... SVM_EXIT_WRITE_CR8: {
1236                 if (vmcb12_is_intercept(&svm->nested.ctl, exit_code))
1237                         vmexit = NESTED_EXIT_DONE;
1238                 break;
1239         }
1240         case SVM_EXIT_READ_DR0 ... SVM_EXIT_WRITE_DR7: {
1241                 if (vmcb12_is_intercept(&svm->nested.ctl, exit_code))
1242                         vmexit = NESTED_EXIT_DONE;
1243                 break;
1244         }
1245         case SVM_EXIT_EXCP_BASE ... SVM_EXIT_EXCP_BASE + 0x1f: {
1246                 /*
1247                  * Host-intercepted exceptions have been checked already in
1248                  * nested_svm_exit_special.  There is nothing to do here,
1249                  * the vmexit is injected by svm_check_nested_events.
1250                  */
1251                 vmexit = NESTED_EXIT_DONE;
1252                 break;
1253         }
1254         case SVM_EXIT_ERR: {
1255                 vmexit = NESTED_EXIT_DONE;
1256                 break;
1257         }
1258         default: {
1259                 if (vmcb12_is_intercept(&svm->nested.ctl, exit_code))
1260                         vmexit = NESTED_EXIT_DONE;
1261         }
1262         }
1263
1264         return vmexit;
1265 }
1266
1267 int nested_svm_exit_handled(struct vcpu_svm *svm)
1268 {
1269         int vmexit;
1270
1271         vmexit = nested_svm_intercept(svm);
1272
1273         if (vmexit == NESTED_EXIT_DONE)
1274                 nested_svm_vmexit(svm);
1275
1276         return vmexit;
1277 }
1278
1279 int nested_svm_check_permissions(struct kvm_vcpu *vcpu)
1280 {
1281         if (!(vcpu->arch.efer & EFER_SVME) || !is_paging(vcpu)) {
1282                 kvm_queue_exception(vcpu, UD_VECTOR);
1283                 return 1;
1284         }
1285
1286         if (to_svm(vcpu)->vmcb->save.cpl) {
1287                 kvm_inject_gp(vcpu, 0);
1288                 return 1;
1289         }
1290
1291         return 0;
1292 }
1293
1294 static bool nested_svm_is_exception_vmexit(struct kvm_vcpu *vcpu, u8 vector,
1295                                            u32 error_code)
1296 {
1297         struct vcpu_svm *svm = to_svm(vcpu);
1298
1299         return (svm->nested.ctl.intercepts[INTERCEPT_EXCEPTION] & BIT(vector));
1300 }
1301
1302 static void nested_svm_inject_exception_vmexit(struct kvm_vcpu *vcpu)
1303 {
1304         struct kvm_queued_exception *ex = &vcpu->arch.exception_vmexit;
1305         struct vcpu_svm *svm = to_svm(vcpu);
1306         struct vmcb *vmcb = svm->vmcb;
1307
1308         vmcb->control.exit_code = SVM_EXIT_EXCP_BASE + ex->vector;
1309         vmcb->control.exit_code_hi = 0;
1310
1311         if (ex->has_error_code)
1312                 vmcb->control.exit_info_1 = ex->error_code;
1313
1314         /*
1315          * EXITINFO2 is undefined for all exception intercepts other
1316          * than #PF.
1317          */
1318         if (ex->vector == PF_VECTOR) {
1319                 if (ex->has_payload)
1320                         vmcb->control.exit_info_2 = ex->payload;
1321                 else
1322                         vmcb->control.exit_info_2 = vcpu->arch.cr2;
1323         } else if (ex->vector == DB_VECTOR) {
1324                 /* See kvm_check_and_inject_events().  */
1325                 kvm_deliver_exception_payload(vcpu, ex);
1326
1327                 if (vcpu->arch.dr7 & DR7_GD) {
1328                         vcpu->arch.dr7 &= ~DR7_GD;
1329                         kvm_update_dr7(vcpu);
1330                 }
1331         } else {
1332                 WARN_ON(ex->has_payload);
1333         }
1334
1335         nested_svm_vmexit(svm);
1336 }
1337
1338 static inline bool nested_exit_on_init(struct vcpu_svm *svm)
1339 {
1340         return vmcb12_is_intercept(&svm->nested.ctl, INTERCEPT_INIT);
1341 }
1342
1343 static int svm_check_nested_events(struct kvm_vcpu *vcpu)
1344 {
1345         struct kvm_lapic *apic = vcpu->arch.apic;
1346         struct vcpu_svm *svm = to_svm(vcpu);
1347         /*
1348          * Only a pending nested run blocks a pending exception.  If there is a
1349          * previously injected event, the pending exception occurred while said
1350          * event was being delivered and thus needs to be handled.
1351          */
1352         bool block_nested_exceptions = svm->nested.nested_run_pending;
1353         /*
1354          * New events (not exceptions) are only recognized at instruction
1355          * boundaries.  If an event needs reinjection, then KVM is handling a
1356          * VM-Exit that occurred _during_ instruction execution; new events are
1357          * blocked until the instruction completes.
1358          */
1359         bool block_nested_events = block_nested_exceptions ||
1360                                    kvm_event_needs_reinjection(vcpu);
1361
1362         if (lapic_in_kernel(vcpu) &&
1363             test_bit(KVM_APIC_INIT, &apic->pending_events)) {
1364                 if (block_nested_events)
1365                         return -EBUSY;
1366                 if (!nested_exit_on_init(svm))
1367                         return 0;
1368                 nested_svm_simple_vmexit(svm, SVM_EXIT_INIT);
1369                 return 0;
1370         }
1371
1372         if (vcpu->arch.exception_vmexit.pending) {
1373                 if (block_nested_exceptions)
1374                         return -EBUSY;
1375                 nested_svm_inject_exception_vmexit(vcpu);
1376                 return 0;
1377         }
1378
1379         if (vcpu->arch.exception.pending) {
1380                 if (block_nested_exceptions)
1381                         return -EBUSY;
1382                 return 0;
1383         }
1384
1385         if (vcpu->arch.smi_pending && !svm_smi_blocked(vcpu)) {
1386                 if (block_nested_events)
1387                         return -EBUSY;
1388                 if (!nested_exit_on_smi(svm))
1389                         return 0;
1390                 nested_svm_simple_vmexit(svm, SVM_EXIT_SMI);
1391                 return 0;
1392         }
1393
1394         if (vcpu->arch.nmi_pending && !svm_nmi_blocked(vcpu)) {
1395                 if (block_nested_events)
1396                         return -EBUSY;
1397                 if (!nested_exit_on_nmi(svm))
1398                         return 0;
1399                 nested_svm_simple_vmexit(svm, SVM_EXIT_NMI);
1400                 return 0;
1401         }
1402
1403         if (kvm_cpu_has_interrupt(vcpu) && !svm_interrupt_blocked(vcpu)) {
1404                 if (block_nested_events)
1405                         return -EBUSY;
1406                 if (!nested_exit_on_intr(svm))
1407                         return 0;
1408                 trace_kvm_nested_intr_vmexit(svm->vmcb->save.rip);
1409                 nested_svm_simple_vmexit(svm, SVM_EXIT_INTR);
1410                 return 0;
1411         }
1412
1413         return 0;
1414 }
1415
1416 int nested_svm_exit_special(struct vcpu_svm *svm)
1417 {
1418         u32 exit_code = svm->vmcb->control.exit_code;
1419
1420         switch (exit_code) {
1421         case SVM_EXIT_INTR:
1422         case SVM_EXIT_NMI:
1423         case SVM_EXIT_NPF:
1424                 return NESTED_EXIT_HOST;
1425         case SVM_EXIT_EXCP_BASE ... SVM_EXIT_EXCP_BASE + 0x1f: {
1426                 u32 excp_bits = 1 << (exit_code - SVM_EXIT_EXCP_BASE);
1427
1428                 if (svm->vmcb01.ptr->control.intercepts[INTERCEPT_EXCEPTION] &
1429                     excp_bits)
1430                         return NESTED_EXIT_HOST;
1431                 else if (exit_code == SVM_EXIT_EXCP_BASE + PF_VECTOR &&
1432                          svm->vcpu.arch.apf.host_apf_flags)
1433                         /* Trap async PF even if not shadowing */
1434                         return NESTED_EXIT_HOST;
1435                 break;
1436         }
1437         default:
1438                 break;
1439         }
1440
1441         return NESTED_EXIT_CONTINUE;
1442 }
1443
1444 void nested_svm_update_tsc_ratio_msr(struct kvm_vcpu *vcpu)
1445 {
1446         struct vcpu_svm *svm = to_svm(vcpu);
1447
1448         vcpu->arch.tsc_scaling_ratio =
1449                 kvm_calc_nested_tsc_multiplier(vcpu->arch.l1_tsc_scaling_ratio,
1450                                                svm->tsc_ratio_msr);
1451         __svm_write_tsc_multiplier(vcpu->arch.tsc_scaling_ratio);
1452 }
1453
1454 /* Inverse operation of nested_copy_vmcb_control_to_cache(). asid is copied too. */
1455 static void nested_copy_vmcb_cache_to_control(struct vmcb_control_area *dst,
1456                                               struct vmcb_ctrl_area_cached *from)
1457 {
1458         unsigned int i;
1459
1460         memset(dst, 0, sizeof(struct vmcb_control_area));
1461
1462         for (i = 0; i < MAX_INTERCEPT; i++)
1463                 dst->intercepts[i] = from->intercepts[i];
1464
1465         dst->iopm_base_pa         = from->iopm_base_pa;
1466         dst->msrpm_base_pa        = from->msrpm_base_pa;
1467         dst->tsc_offset           = from->tsc_offset;
1468         dst->asid                 = from->asid;
1469         dst->tlb_ctl              = from->tlb_ctl;
1470         dst->int_ctl              = from->int_ctl;
1471         dst->int_vector           = from->int_vector;
1472         dst->int_state            = from->int_state;
1473         dst->exit_code            = from->exit_code;
1474         dst->exit_code_hi         = from->exit_code_hi;
1475         dst->exit_info_1          = from->exit_info_1;
1476         dst->exit_info_2          = from->exit_info_2;
1477         dst->exit_int_info        = from->exit_int_info;
1478         dst->exit_int_info_err    = from->exit_int_info_err;
1479         dst->nested_ctl           = from->nested_ctl;
1480         dst->event_inj            = from->event_inj;
1481         dst->event_inj_err        = from->event_inj_err;
1482         dst->next_rip             = from->next_rip;
1483         dst->nested_cr3           = from->nested_cr3;
1484         dst->virt_ext              = from->virt_ext;
1485         dst->pause_filter_count   = from->pause_filter_count;
1486         dst->pause_filter_thresh  = from->pause_filter_thresh;
1487         /* 'clean' and 'hv_enlightenments' are not changed by KVM */
1488 }
1489
1490 static int svm_get_nested_state(struct kvm_vcpu *vcpu,
1491                                 struct kvm_nested_state __user *user_kvm_nested_state,
1492                                 u32 user_data_size)
1493 {
1494         struct vcpu_svm *svm;
1495         struct vmcb_control_area *ctl;
1496         unsigned long r;
1497         struct kvm_nested_state kvm_state = {
1498                 .flags = 0,
1499                 .format = KVM_STATE_NESTED_FORMAT_SVM,
1500                 .size = sizeof(kvm_state),
1501         };
1502         struct vmcb __user *user_vmcb = (struct vmcb __user *)
1503                 &user_kvm_nested_state->data.svm[0];
1504
1505         if (!vcpu)
1506                 return kvm_state.size + KVM_STATE_NESTED_SVM_VMCB_SIZE;
1507
1508         svm = to_svm(vcpu);
1509
1510         if (user_data_size < kvm_state.size)
1511                 goto out;
1512
1513         /* First fill in the header and copy it out.  */
1514         if (is_guest_mode(vcpu)) {
1515                 kvm_state.hdr.svm.vmcb_pa = svm->nested.vmcb12_gpa;
1516                 kvm_state.size += KVM_STATE_NESTED_SVM_VMCB_SIZE;
1517                 kvm_state.flags |= KVM_STATE_NESTED_GUEST_MODE;
1518
1519                 if (svm->nested.nested_run_pending)
1520                         kvm_state.flags |= KVM_STATE_NESTED_RUN_PENDING;
1521         }
1522
1523         if (gif_set(svm))
1524                 kvm_state.flags |= KVM_STATE_NESTED_GIF_SET;
1525
1526         if (copy_to_user(user_kvm_nested_state, &kvm_state, sizeof(kvm_state)))
1527                 return -EFAULT;
1528
1529         if (!is_guest_mode(vcpu))
1530                 goto out;
1531
1532         /*
1533          * Copy over the full size of the VMCB rather than just the size
1534          * of the structs.
1535          */
1536         if (clear_user(user_vmcb, KVM_STATE_NESTED_SVM_VMCB_SIZE))
1537                 return -EFAULT;
1538
1539         ctl = kzalloc(sizeof(*ctl), GFP_KERNEL);
1540         if (!ctl)
1541                 return -ENOMEM;
1542
1543         nested_copy_vmcb_cache_to_control(ctl, &svm->nested.ctl);
1544         r = copy_to_user(&user_vmcb->control, ctl,
1545                          sizeof(user_vmcb->control));
1546         kfree(ctl);
1547         if (r)
1548                 return -EFAULT;
1549
1550         if (copy_to_user(&user_vmcb->save, &svm->vmcb01.ptr->save,
1551                          sizeof(user_vmcb->save)))
1552                 return -EFAULT;
1553 out:
1554         return kvm_state.size;
1555 }
1556
1557 static int svm_set_nested_state(struct kvm_vcpu *vcpu,
1558                                 struct kvm_nested_state __user *user_kvm_nested_state,
1559                                 struct kvm_nested_state *kvm_state)
1560 {
1561         struct vcpu_svm *svm = to_svm(vcpu);
1562         struct vmcb __user *user_vmcb = (struct vmcb __user *)
1563                 &user_kvm_nested_state->data.svm[0];
1564         struct vmcb_control_area *ctl;
1565         struct vmcb_save_area *save;
1566         struct vmcb_save_area_cached save_cached;
1567         struct vmcb_ctrl_area_cached ctl_cached;
1568         unsigned long cr0;
1569         int ret;
1570
1571         BUILD_BUG_ON(sizeof(struct vmcb_control_area) + sizeof(struct vmcb_save_area) >
1572                      KVM_STATE_NESTED_SVM_VMCB_SIZE);
1573
1574         if (kvm_state->format != KVM_STATE_NESTED_FORMAT_SVM)
1575                 return -EINVAL;
1576
1577         if (kvm_state->flags & ~(KVM_STATE_NESTED_GUEST_MODE |
1578                                  KVM_STATE_NESTED_RUN_PENDING |
1579                                  KVM_STATE_NESTED_GIF_SET))
1580                 return -EINVAL;
1581
1582         /*
1583          * If in guest mode, vcpu->arch.efer actually refers to the L2 guest's
1584          * EFER.SVME, but EFER.SVME still has to be 1 for VMRUN to succeed.
1585          */
1586         if (!(vcpu->arch.efer & EFER_SVME)) {
1587                 /* GIF=1 and no guest mode are required if SVME=0.  */
1588                 if (kvm_state->flags != KVM_STATE_NESTED_GIF_SET)
1589                         return -EINVAL;
1590         }
1591
1592         /* SMM temporarily disables SVM, so we cannot be in guest mode.  */
1593         if (is_smm(vcpu) && (kvm_state->flags & KVM_STATE_NESTED_GUEST_MODE))
1594                 return -EINVAL;
1595
1596         if (!(kvm_state->flags & KVM_STATE_NESTED_GUEST_MODE)) {
1597                 svm_leave_nested(vcpu);
1598                 svm_set_gif(svm, !!(kvm_state->flags & KVM_STATE_NESTED_GIF_SET));
1599                 return 0;
1600         }
1601
1602         if (!page_address_valid(vcpu, kvm_state->hdr.svm.vmcb_pa))
1603                 return -EINVAL;
1604         if (kvm_state->size < sizeof(*kvm_state) + KVM_STATE_NESTED_SVM_VMCB_SIZE)
1605                 return -EINVAL;
1606
1607         ret  = -ENOMEM;
1608         ctl  = kzalloc(sizeof(*ctl),  GFP_KERNEL_ACCOUNT);
1609         save = kzalloc(sizeof(*save), GFP_KERNEL_ACCOUNT);
1610         if (!ctl || !save)
1611                 goto out_free;
1612
1613         ret = -EFAULT;
1614         if (copy_from_user(ctl, &user_vmcb->control, sizeof(*ctl)))
1615                 goto out_free;
1616         if (copy_from_user(save, &user_vmcb->save, sizeof(*save)))
1617                 goto out_free;
1618
1619         ret = -EINVAL;
1620         __nested_copy_vmcb_control_to_cache(vcpu, &ctl_cached, ctl);
1621         if (!__nested_vmcb_check_controls(vcpu, &ctl_cached))
1622                 goto out_free;
1623
1624         /*
1625          * Processor state contains L2 state.  Check that it is
1626          * valid for guest mode (see nested_vmcb_check_save).
1627          */
1628         cr0 = kvm_read_cr0(vcpu);
1629         if (((cr0 & X86_CR0_CD) == 0) && (cr0 & X86_CR0_NW))
1630                 goto out_free;
1631
1632         /*
1633          * Validate host state saved from before VMRUN (see
1634          * nested_svm_check_permissions).
1635          */
1636         __nested_copy_vmcb_save_to_cache(&save_cached, save);
1637         if (!(save->cr0 & X86_CR0_PG) ||
1638             !(save->cr0 & X86_CR0_PE) ||
1639             (save->rflags & X86_EFLAGS_VM) ||
1640             !__nested_vmcb_check_save(vcpu, &save_cached))
1641                 goto out_free;
1642
1643
1644         /*
1645          * All checks done, we can enter guest mode. Userspace provides
1646          * vmcb12.control, which will be combined with L1 and stored into
1647          * vmcb02, and the L1 save state which we store in vmcb01.
1648          * L2 registers if needed are moved from the current VMCB to VMCB02.
1649          */
1650
1651         if (is_guest_mode(vcpu))
1652                 svm_leave_nested(vcpu);
1653         else
1654                 svm->nested.vmcb02.ptr->save = svm->vmcb01.ptr->save;
1655
1656         svm_set_gif(svm, !!(kvm_state->flags & KVM_STATE_NESTED_GIF_SET));
1657
1658         svm->nested.nested_run_pending =
1659                 !!(kvm_state->flags & KVM_STATE_NESTED_RUN_PENDING);
1660
1661         svm->nested.vmcb12_gpa = kvm_state->hdr.svm.vmcb_pa;
1662
1663         svm_copy_vmrun_state(&svm->vmcb01.ptr->save, save);
1664         nested_copy_vmcb_control_to_cache(svm, ctl);
1665
1666         svm_switch_vmcb(svm, &svm->nested.vmcb02);
1667         nested_vmcb02_prepare_control(svm, svm->vmcb->save.rip, svm->vmcb->save.cs.base);
1668
1669         /*
1670          * While the nested guest CR3 is already checked and set by
1671          * KVM_SET_SREGS, it was set when nested state was yet loaded,
1672          * thus MMU might not be initialized correctly.
1673          * Set it again to fix this.
1674          */
1675
1676         ret = nested_svm_load_cr3(&svm->vcpu, vcpu->arch.cr3,
1677                                   nested_npt_enabled(svm), false);
1678         if (WARN_ON_ONCE(ret))
1679                 goto out_free;
1680
1681         svm->nested.force_msr_bitmap_recalc = true;
1682
1683         kvm_make_request(KVM_REQ_GET_NESTED_STATE_PAGES, vcpu);
1684         ret = 0;
1685 out_free:
1686         kfree(save);
1687         kfree(ctl);
1688
1689         return ret;
1690 }
1691
1692 static bool svm_get_nested_state_pages(struct kvm_vcpu *vcpu)
1693 {
1694         struct vcpu_svm *svm = to_svm(vcpu);
1695
1696         if (WARN_ON(!is_guest_mode(vcpu)))
1697                 return true;
1698
1699         if (!vcpu->arch.pdptrs_from_userspace &&
1700             !nested_npt_enabled(svm) && is_pae_paging(vcpu))
1701                 /*
1702                  * Reload the guest's PDPTRs since after a migration
1703                  * the guest CR3 might be restored prior to setting the nested
1704                  * state which can lead to a load of wrong PDPTRs.
1705                  */
1706                 if (CC(!load_pdptrs(vcpu, vcpu->arch.cr3)))
1707                         return false;
1708
1709         if (!nested_svm_vmrun_msrpm(svm)) {
1710                 vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
1711                 vcpu->run->internal.suberror =
1712                         KVM_INTERNAL_ERROR_EMULATION;
1713                 vcpu->run->internal.ndata = 0;
1714                 return false;
1715         }
1716
1717         return true;
1718 }
1719
1720 struct kvm_x86_nested_ops svm_nested_ops = {
1721         .leave_nested = svm_leave_nested,
1722         .is_exception_vmexit = nested_svm_is_exception_vmexit,
1723         .check_events = svm_check_nested_events,
1724         .triple_fault = nested_svm_triple_fault,
1725         .get_nested_state_pages = svm_get_nested_state_pages,
1726         .get_state = svm_get_nested_state,
1727         .set_state = svm_set_nested_state,
1728 };