Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm

[platform/kernel/linux-rpi.git] / arch / x86 / kvm / vmx.c
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c

index 16f9373..1519f03 100644 (file)
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -189,6 +189,150 @@ module_param(ple_window_max, uint, 0444);
  
  extern const ulong vmx_return;
  
+static DEFINE_STATIC_KEY_FALSE(vmx_l1d_should_flush);
+static DEFINE_STATIC_KEY_FALSE(vmx_l1d_flush_cond);
+static DEFINE_MUTEX(vmx_l1d_flush_mutex);
+
+/* Storage for pre module init parameter parsing */
+static enum vmx_l1d_flush_state __read_mostly vmentry_l1d_flush_param = VMENTER_L1D_FLUSH_AUTO;
+
+static const struct {
+       const char *option;
+       enum vmx_l1d_flush_state cmd;
+} vmentry_l1d_param[] = {
+       {"auto",        VMENTER_L1D_FLUSH_AUTO},
+       {"never",       VMENTER_L1D_FLUSH_NEVER},
+       {"cond",        VMENTER_L1D_FLUSH_COND},
+       {"always",      VMENTER_L1D_FLUSH_ALWAYS},
+};
+
+#define L1D_CACHE_ORDER 4
+static void *vmx_l1d_flush_pages;
+
+static int vmx_setup_l1d_flush(enum vmx_l1d_flush_state l1tf)
+{
+       struct page *page;
+       unsigned int i;
+
+       if (!enable_ept) {
+               l1tf_vmx_mitigation = VMENTER_L1D_FLUSH_EPT_DISABLED;
+               return 0;
+       }
+
+       if (boot_cpu_has(X86_FEATURE_ARCH_CAPABILITIES)) {
+              u64 msr;
+
+              rdmsrl(MSR_IA32_ARCH_CAPABILITIES, msr);
+              if (msr & ARCH_CAP_SKIP_VMENTRY_L1DFLUSH) {
+                      l1tf_vmx_mitigation = VMENTER_L1D_FLUSH_NOT_REQUIRED;
+                      return 0;
+              }
+       }
+
+       /* If set to auto use the default l1tf mitigation method */
+       if (l1tf == VMENTER_L1D_FLUSH_AUTO) {
+               switch (l1tf_mitigation) {
+               case L1TF_MITIGATION_OFF:
+                       l1tf = VMENTER_L1D_FLUSH_NEVER;
+                       break;
+               case L1TF_MITIGATION_FLUSH_NOWARN:
+               case L1TF_MITIGATION_FLUSH:
+               case L1TF_MITIGATION_FLUSH_NOSMT:
+                       l1tf = VMENTER_L1D_FLUSH_COND;
+                       break;
+               case L1TF_MITIGATION_FULL:
+               case L1TF_MITIGATION_FULL_FORCE:
+                       l1tf = VMENTER_L1D_FLUSH_ALWAYS;
+                       break;
+               }
+       } else if (l1tf_mitigation == L1TF_MITIGATION_FULL_FORCE) {
+               l1tf = VMENTER_L1D_FLUSH_ALWAYS;
+       }
+
+       if (l1tf != VMENTER_L1D_FLUSH_NEVER && !vmx_l1d_flush_pages &&
+           !boot_cpu_has(X86_FEATURE_FLUSH_L1D)) {
+               page = alloc_pages(GFP_KERNEL, L1D_CACHE_ORDER);
+               if (!page)
+                       return -ENOMEM;
+               vmx_l1d_flush_pages = page_address(page);
+
+               /*
+                * Initialize each page with a different pattern in
+                * order to protect against KSM in the nested
+                * virtualization case.
+                */
+               for (i = 0; i < 1u << L1D_CACHE_ORDER; ++i) {
+                       memset(vmx_l1d_flush_pages + i * PAGE_SIZE, i + 1,
+                              PAGE_SIZE);
+               }
+       }
+
+       l1tf_vmx_mitigation = l1tf;
+
+       if (l1tf != VMENTER_L1D_FLUSH_NEVER)
+               static_branch_enable(&vmx_l1d_should_flush);
+       else
+               static_branch_disable(&vmx_l1d_should_flush);
+
+       if (l1tf == VMENTER_L1D_FLUSH_COND)
+               static_branch_enable(&vmx_l1d_flush_cond);
+       else
+               static_branch_disable(&vmx_l1d_flush_cond);
+       return 0;
+}
+
+static int vmentry_l1d_flush_parse(const char *s)
+{
+       unsigned int i;
+
+       if (s) {
+               for (i = 0; i < ARRAY_SIZE(vmentry_l1d_param); i++) {
+                       if (sysfs_streq(s, vmentry_l1d_param[i].option))
+                               return vmentry_l1d_param[i].cmd;
+               }
+       }
+       return -EINVAL;
+}
+
+static int vmentry_l1d_flush_set(const char *s, const struct kernel_param *kp)
+{
+       int l1tf, ret;
+
+       if (!boot_cpu_has(X86_BUG_L1TF))
+               return 0;
+
+       l1tf = vmentry_l1d_flush_parse(s);
+       if (l1tf < 0)
+               return l1tf;
+
+       /*
+        * Has vmx_init() run already? If not then this is the pre init
+        * parameter parsing. In that case just store the value and let
+        * vmx_init() do the proper setup after enable_ept has been
+        * established.
+        */
+       if (l1tf_vmx_mitigation == VMENTER_L1D_FLUSH_AUTO) {
+               vmentry_l1d_flush_param = l1tf;
+               return 0;
+       }
+
+       mutex_lock(&vmx_l1d_flush_mutex);
+       ret = vmx_setup_l1d_flush(l1tf);
+       mutex_unlock(&vmx_l1d_flush_mutex);
+       return ret;
+}
+
+static int vmentry_l1d_flush_get(char *s, const struct kernel_param *kp)
+{
+       return sprintf(s, "%s\n", vmentry_l1d_param[l1tf_vmx_mitigation].option);
+}
+
+static const struct kernel_param_ops vmentry_l1d_flush_ops = {
+       .set = vmentry_l1d_flush_set,
+       .get = vmentry_l1d_flush_get,
+};
+module_param_cb(vmentry_l1d_flush, &vmentry_l1d_flush_ops, NULL, 0644);
+
  enum ept_pointers_status {
         EPT_POINTERS_CHECK = 0,
         EPT_POINTERS_MATCH = 1,
@@ -794,6 +938,11 @@ static inline int pi_test_sn(struct pi_desc *pi_desc)
                         (unsigned long *)&pi_desc->control);
  }
  
+struct vmx_msrs {
+       unsigned int            nr;
+       struct vmx_msr_entry    val[NR_AUTOLOAD_MSRS];
+};
+
  struct vcpu_vmx {
         struct kvm_vcpu       vcpu;
         unsigned long         host_rsp;
@@ -831,9 +980,8 @@ struct vcpu_vmx {
         struct loaded_vmcs   *loaded_cpu_state;
         bool                  __launched; /* temporary, used in vmx_vcpu_run */
         struct msr_autoload {
-               unsigned nr;
-               struct vmx_msr_entry guest[NR_AUTOLOAD_MSRS];
-               struct vmx_msr_entry host[NR_AUTOLOAD_MSRS];
+               struct vmx_msrs guest;
+               struct vmx_msrs host;
         } msr_autoload;
  
         struct {
@@ -2483,9 +2631,20 @@ static void clear_atomic_switch_msr_special(struct vcpu_vmx *vmx,
         vm_exit_controls_clearbit(vmx, exit);
  }
  
+static int find_msr(struct vmx_msrs *m, unsigned int msr)
+{
+       unsigned int i;
+
+       for (i = 0; i < m->nr; ++i) {
+               if (m->val[i].index == msr)
+                       return i;
+       }
+       return -ENOENT;
+}
+
  static void clear_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr)
  {
-       unsigned i;
+       int i;
         struct msr_autoload *m = &vmx->msr_autoload;
  
         switch (msr) {
@@ -2506,18 +2665,21 @@ static void clear_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr)
                 }
                 break;
         }
+       i = find_msr(&m->guest, msr);
+       if (i < 0)
+               goto skip_guest;
+       --m->guest.nr;
+       m->guest.val[i] = m->guest.val[m->guest.nr];
+       vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, m->guest.nr);
  
-       for (i = 0; i < m->nr; ++i)
-               if (m->guest[i].index == msr)
-                       break;
-
-       if (i == m->nr)
+skip_guest:
+       i = find_msr(&m->host, msr);
+       if (i < 0)
                 return;
-       --m->nr;
-       m->guest[i] = m->guest[m->nr];
-       m->host[i] = m->host[m->nr];
-       vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, m->nr);
-       vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, m->nr);
+
+       --m->host.nr;
+       m->host.val[i] = m->host.val[m->host.nr];
+       vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, m->host.nr);
  }
  
  static void add_atomic_switch_msr_special(struct vcpu_vmx *vmx,
@@ -2532,9 +2694,9 @@ static void add_atomic_switch_msr_special(struct vcpu_vmx *vmx,
  }
  
  static void add_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr,
-                                 u64 guest_val, u64 host_val)
+                                 u64 guest_val, u64 host_val, bool entry_only)
  {
-       unsigned i;
+       int i, j = 0;
         struct msr_autoload *m = &vmx->msr_autoload;
  
         switch (msr) {
@@ -2569,24 +2731,31 @@ static void add_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr,
                 wrmsrl(MSR_IA32_PEBS_ENABLE, 0);
         }
  
-       for (i = 0; i < m->nr; ++i)
-               if (m->guest[i].index == msr)
-                       break;
+       i = find_msr(&m->guest, msr);
+       if (!entry_only)
+               j = find_msr(&m->host, msr);
  
-       if (i == NR_AUTOLOAD_MSRS) {
+       if (i == NR_AUTOLOAD_MSRS || j == NR_AUTOLOAD_MSRS) {
                 printk_once(KERN_WARNING "Not enough msr switch entries. "
                                 "Can't add msr %x\n", msr);
                 return;
-       } else if (i == m->nr) {
-               ++m->nr;
-               vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, m->nr);
-               vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, m->nr);
         }
+       if (i < 0) {
+               i = m->guest.nr++;
+               vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, m->guest.nr);
+       }
+       m->guest.val[i].index = msr;
+       m->guest.val[i].value = guest_val;
  
-       m->guest[i].index = msr;
-       m->guest[i].value = guest_val;
-       m->host[i].index = msr;
-       m->host[i].value = host_val;
+       if (entry_only)
+               return;
+
+       if (j < 0) {
+               j = m->host.nr++;
+               vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, m->host.nr);
+       }
+       m->host.val[j].index = msr;
+       m->host.val[j].value = host_val;
  }
  
  static bool update_transition_efer(struct vcpu_vmx *vmx, int efer_offset)
@@ -2630,7 +2799,7 @@ static bool update_transition_efer(struct vcpu_vmx *vmx, int efer_offset)
                         guest_efer &= ~EFER_LME;
                 if (guest_efer != host_efer)
                         add_atomic_switch_msr(vmx, MSR_EFER,
-                                             guest_efer, host_efer);
+                                             guest_efer, host_efer, false);
                 return false;
         } else {
                 guest_efer &= ~ignore_bits;
@@ -4126,7 +4295,7 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
                 vcpu->arch.ia32_xss = data;
                 if (vcpu->arch.ia32_xss != host_xss)
                         add_atomic_switch_msr(vmx, MSR_IA32_XSS,
-                               vcpu->arch.ia32_xss, host_xss);
+                               vcpu->arch.ia32_xss, host_xss, false);
                 else
                         clear_atomic_switch_msr(vmx, MSR_IA32_XSS);
                 break;
@@ -6437,9 +6606,9 @@ static void vmx_vcpu_setup(struct vcpu_vmx *vmx)
  
         vmcs_write32(VM_EXIT_MSR_STORE_COUNT, 0);
         vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, 0);
-       vmcs_write64(VM_EXIT_MSR_LOAD_ADDR, __pa(vmx->msr_autoload.host));
+       vmcs_write64(VM_EXIT_MSR_LOAD_ADDR, __pa(vmx->msr_autoload.host.val));
         vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, 0);
-       vmcs_write64(VM_ENTRY_MSR_LOAD_ADDR, __pa(vmx->msr_autoload.guest));
+       vmcs_write64(VM_ENTRY_MSR_LOAD_ADDR, __pa(vmx->msr_autoload.guest.val));
  
         if (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_PAT)
                 vmcs_write64(GUEST_IA32_PAT, vmx->vcpu.arch.pat);
@@ -6459,8 +6628,7 @@ static void vmx_vcpu_setup(struct vcpu_vmx *vmx)
                 ++vmx->nmsrs;
         }
  
-       if (boot_cpu_has(X86_FEATURE_ARCH_CAPABILITIES))
-               rdmsrl(MSR_IA32_ARCH_CAPABILITIES, vmx->arch_capabilities);
+       vmx->arch_capabilities = kvm_get_arch_capabilities();
  
         vm_exit_controls_init(vmx, vmcs_config.vmexit_ctrl);
  
@@ -8094,6 +8262,8 @@ static int enter_vmx_operation(struct kvm_vcpu *vcpu)
                      HRTIMER_MODE_REL_PINNED);
         vmx->nested.preemption_timer.function = vmx_preemption_timer_fn;
  
+       vmx->nested.vpid02 = allocate_vpid();
+
         vmx->nested.vmxon = true;
         return 0;
  
@@ -8725,21 +8895,20 @@ static int handle_vmptrld(struct kvm_vcpu *vcpu)
  /* Emulate the VMPTRST instruction */
  static int handle_vmptrst(struct kvm_vcpu *vcpu)
  {
-       unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
-       u32 vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO);
-       gva_t vmcs_gva;
+       unsigned long exit_qual = vmcs_readl(EXIT_QUALIFICATION);
+       u32 instr_info = vmcs_read32(VMX_INSTRUCTION_INFO);
+       gpa_t current_vmptr = to_vmx(vcpu)->nested.current_vmptr;
         struct x86_exception e;
+       gva_t gva;
  
         if (!nested_vmx_check_permission(vcpu))
                 return 1;
  
-       if (get_vmx_mem_address(vcpu, exit_qualification,
-                       vmx_instruction_info, true, &vmcs_gva))
+       if (get_vmx_mem_address(vcpu, exit_qual, instr_info, true, &gva))
                 return 1;
         /* *_system ok, nested_vmx_check_permission has verified cpl=0 */
-       if (kvm_write_guest_virt_system(vcpu, vmcs_gva,
-                                       (void *)&to_vmx(vcpu)->nested.current_vmptr,
-                                       sizeof(u64), &e)) {
+       if (kvm_write_guest_virt_system(vcpu, gva, (void *)&current_vmptr,
+                                       sizeof(gpa_t), &e)) {
                 kvm_inject_page_fault(vcpu, &e);
                 return 1;
         }
@@ -9921,6 +10090,79 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu)
         }
  }
  
+/*
+ * Software based L1D cache flush which is used when microcode providing
+ * the cache control MSR is not loaded.
+ *
+ * The L1D cache is 32 KiB on Nehalem and later microarchitectures, but to
+ * flush it is required to read in 64 KiB because the replacement algorithm
+ * is not exactly LRU. This could be sized at runtime via topology
+ * information but as all relevant affected CPUs have 32KiB L1D cache size
+ * there is no point in doing so.
+ */
+#define L1D_CACHE_ORDER 4
+static void *vmx_l1d_flush_pages;
+
+static void vmx_l1d_flush(struct kvm_vcpu *vcpu)
+{
+       int size = PAGE_SIZE << L1D_CACHE_ORDER;
+
+       /*
+        * This code is only executed when the the flush mode is 'cond' or
+        * 'always'
+        */
+       if (static_branch_likely(&vmx_l1d_flush_cond)) {
+               bool flush_l1d;
+
+               /*
+                * Clear the per-vcpu flush bit, it gets set again
+                * either from vcpu_run() or from one of the unsafe
+                * VMEXIT handlers.
+                */
+               flush_l1d = vcpu->arch.l1tf_flush_l1d;
+               vcpu->arch.l1tf_flush_l1d = false;
+
+               /*
+                * Clear the per-cpu flush bit, it gets set again from
+                * the interrupt handlers.
+                */
+               flush_l1d |= kvm_get_cpu_l1tf_flush_l1d();
+               kvm_clear_cpu_l1tf_flush_l1d();
+
+               if (!flush_l1d)
+                       return;
+       }
+
+       vcpu->stat.l1d_flush++;
+
+       if (static_cpu_has(X86_FEATURE_FLUSH_L1D)) {
+               wrmsrl(MSR_IA32_FLUSH_CMD, L1D_FLUSH);
+               return;
+       }
+
+       asm volatile(
+               /* First ensure the pages are in the TLB */
+               "xorl   %%eax, %%eax\n"
+               ".Lpopulate_tlb:\n\t"
+               "movzbl (%[flush_pages], %%" _ASM_AX "), %%ecx\n\t"
+               "addl   $4096, %%eax\n\t"
+               "cmpl   %%eax, %[size]\n\t"
+               "jne    .Lpopulate_tlb\n\t"
+               "xorl   %%eax, %%eax\n\t"
+               "cpuid\n\t"
+               /* Now fill the cache */
+               "xorl   %%eax, %%eax\n"
+               ".Lfill_cache:\n"
+               "movzbl (%[flush_pages], %%" _ASM_AX "), %%ecx\n\t"
+               "addl   $64, %%eax\n\t"
+               "cmpl   %%eax, %[size]\n\t"
+               "jne    .Lfill_cache\n\t"
+               "lfence\n"
+               :: [flush_pages] "r" (vmx_l1d_flush_pages),
+                   [size] "r" (size)
+               : "eax", "ebx", "ecx", "edx");
+}
+
  static void update_cr8_intercept(struct kvm_vcpu *vcpu, int tpr, int irr)
  {
         struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
@@ -10322,7 +10564,7 @@ static void atomic_switch_perf_msrs(struct vcpu_vmx *vmx)
                         clear_atomic_switch_msr(vmx, msrs[i].msr);
                 else
                         add_atomic_switch_msr(vmx, msrs[i].msr, msrs[i].guest,
-                                       msrs[i].host);
+                                       msrs[i].host, false);
  }
  
  static void vmx_arm_hv_timer(struct kvm_vcpu *vcpu)
@@ -10417,6 +10659,9 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
         evmcs_rsp = static_branch_unlikely(&enable_evmcs) ?
                 (unsigned long)&current_evmcs->host_rsp : 0;
  
+       if (static_branch_unlikely(&vmx_l1d_should_flush))
+               vmx_l1d_flush(vcpu);
+
         asm(
                 /* Store host registers */
                 "push %%" _ASM_DX "; push %%" _ASM_BP ";"
@@ -10744,11 +10989,9 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)
                         goto free_vmcs;
         }
  
-       if (nested) {
+       if (nested)
                 nested_vmx_setup_ctls_msrs(&vmx->nested.msrs,
                                            kvm_vcpu_apicv_active(&vmx->vcpu));
-               vmx->nested.vpid02 = allocate_vpid();
-       }
  
         vmx->nested.posted_intr_nv = -1;
         vmx->nested.current_vmptr = -1ull;
@@ -10765,7 +11008,6 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)
         return &vmx->vcpu;
  
  free_vmcs:
-       free_vpid(vmx->nested.vpid02);
         free_loaded_vmcs(vmx->loaded_vmcs);
  free_msrs:
         kfree(vmx->guest_msrs);
@@ -10779,12 +11021,39 @@ free_vcpu:
         return ERR_PTR(err);
  }
  
+#define L1TF_MSG_SMT "L1TF CPU bug present and SMT on, data leak possible. See CVE-2018-3646 and https://www.kernel.org/doc/html/latest/admin-guide/l1tf.html for details.\n"
+#define L1TF_MSG_L1D "L1TF CPU bug present and virtualization mitigation disabled, data leak possible. See CVE-2018-3646 and https://www.kernel.org/doc/html/latest/admin-guide/l1tf.html for details.\n"
+
  static int vmx_vm_init(struct kvm *kvm)
  {
         spin_lock_init(&to_kvm_vmx(kvm)->ept_pointer_lock);
  
         if (!ple_gap)
                 kvm->arch.pause_in_guest = true;
+
+       if (boot_cpu_has(X86_BUG_L1TF) && enable_ept) {
+               switch (l1tf_mitigation) {
+               case L1TF_MITIGATION_OFF:
+               case L1TF_MITIGATION_FLUSH_NOWARN:
+                       /* 'I explicitly don't care' is set */
+                       break;
+               case L1TF_MITIGATION_FLUSH:
+               case L1TF_MITIGATION_FLUSH_NOSMT:
+               case L1TF_MITIGATION_FULL:
+                       /*
+                        * Warn upon starting the first VM in a potentially
+                        * insecure environment.
+                        */
+                       if (cpu_smt_control == CPU_SMT_ENABLED)
+                               pr_warn_once(L1TF_MSG_SMT);
+                       if (l1tf_vmx_mitigation == VMENTER_L1D_FLUSH_NEVER)
+                               pr_warn_once(L1TF_MSG_L1D);
+                       break;
+               case L1TF_MITIGATION_FULL_FORCE:
+                       /* Flush is enforced */
+                       break;
+               }
+       }
         return 0;
  }
  
@@ -11689,10 +11958,10 @@ static void prepare_vmcs02_full(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
          * Set the MSR load/store lists to match L0's settings.
          */
         vmcs_write32(VM_EXIT_MSR_STORE_COUNT, 0);
-       vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, vmx->msr_autoload.nr);
-       vmcs_write64(VM_EXIT_MSR_LOAD_ADDR, __pa(vmx->msr_autoload.host));
-       vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, vmx->msr_autoload.nr);
-       vmcs_write64(VM_ENTRY_MSR_LOAD_ADDR, __pa(vmx->msr_autoload.guest));
+       vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, vmx->msr_autoload.host.nr);
+       vmcs_write64(VM_EXIT_MSR_LOAD_ADDR, __pa(vmx->msr_autoload.host.val));
+       vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, vmx->msr_autoload.guest.nr);
+       vmcs_write64(VM_ENTRY_MSR_LOAD_ADDR, __pa(vmx->msr_autoload.guest.val));
  
         set_cr4_guest_host_mask(vmx);
  
@@ -12382,6 +12651,9 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch)
                 return 1;
         }
  
+       /* Hide L1D cache contents from the nested guest.  */
+       vmx->vcpu.arch.l1tf_flush_l1d = true;
+
         /*
          * Must happen outside of enter_vmx_non_root_mode() as it will
          * also be used as part of restoring nVMX state for
@@ -12925,8 +13197,8 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason,
         vmx_segment_cache_clear(vmx);
  
         /* Update any VMCS fields that might have changed while L2 ran */
-       vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, vmx->msr_autoload.nr);
-       vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, vmx->msr_autoload.nr);
+       vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, vmx->msr_autoload.host.nr);
+       vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, vmx->msr_autoload.guest.nr);
         vmcs_write64(TSC_OFFSET, vcpu->arch.tsc_offset);
         if (vmx->hv_deadline_tsc == -1)
                 vmcs_clear_bits(PIN_BASED_VM_EXEC_CONTROL,
@@ -13841,6 +14113,51 @@ static struct kvm_x86_ops vmx_x86_ops __ro_after_init = {
         .enable_smi_window = enable_smi_window,
  };
  
+static void vmx_cleanup_l1d_flush(void)
+{
+       if (vmx_l1d_flush_pages) {
+               free_pages((unsigned long)vmx_l1d_flush_pages, L1D_CACHE_ORDER);
+               vmx_l1d_flush_pages = NULL;
+       }
+       /* Restore state so sysfs ignores VMX */
+       l1tf_vmx_mitigation = VMENTER_L1D_FLUSH_AUTO;
+}
+
+static void vmx_exit(void)
+{
+#ifdef CONFIG_KEXEC_CORE
+       RCU_INIT_POINTER(crash_vmclear_loaded_vmcss, NULL);
+       synchronize_rcu();
+#endif
+
+       kvm_exit();
+
+#if IS_ENABLED(CONFIG_HYPERV)
+       if (static_branch_unlikely(&enable_evmcs)) {
+               int cpu;
+               struct hv_vp_assist_page *vp_ap;
+               /*
+                * Reset everything to support using non-enlightened VMCS
+                * access later (e.g. when we reload the module with
+                * enlightened_vmcs=0)
+                */
+               for_each_online_cpu(cpu) {
+                       vp_ap = hv_get_vp_assist_page(cpu);
+
+                       if (!vp_ap)
+                               continue;
+
+                       vp_ap->current_nested_vmcs = 0;
+                       vp_ap->enlighten_vmentry = 0;
+               }
+
+               static_branch_disable(&enable_evmcs);
+       }
+#endif
+       vmx_cleanup_l1d_flush();
+}
+module_exit(vmx_exit);
+
  static int __init vmx_init(void)
  {
         int r;
@@ -13875,10 +14192,25 @@ static int __init vmx_init(void)
  #endif
  
         r = kvm_init(&vmx_x86_ops, sizeof(struct vcpu_vmx),
-                     __alignof__(struct vcpu_vmx), THIS_MODULE);
+                    __alignof__(struct vcpu_vmx), THIS_MODULE);
         if (r)
                 return r;
  
+       /*
+        * Must be called after kvm_init() so enable_ept is properly set
+        * up. Hand the parameter mitigation value in which was stored in
+        * the pre module init parser. If no parameter was given, it will
+        * contain 'auto' which will be turned into the default 'cond'
+        * mitigation mode.
+        */
+       if (boot_cpu_has(X86_BUG_L1TF)) {
+               r = vmx_setup_l1d_flush(vmentry_l1d_flush_param);
+               if (r) {
+                       vmx_exit();
+                       return r;
+               }
+       }
+
  #ifdef CONFIG_KEXEC_CORE
         rcu_assign_pointer(crash_vmclear_loaded_vmcss,
                            crash_vmclear_local_loaded_vmcss);
@@ -13887,39 +14219,4 @@ static int __init vmx_init(void)
  
         return 0;
  }
-
-static void __exit vmx_exit(void)
-{
-#ifdef CONFIG_KEXEC_CORE
-       RCU_INIT_POINTER(crash_vmclear_loaded_vmcss, NULL);
-       synchronize_rcu();
-#endif
-
-       kvm_exit();
-
-#if IS_ENABLED(CONFIG_HYPERV)
-       if (static_branch_unlikely(&enable_evmcs)) {
-               int cpu;
-               struct hv_vp_assist_page *vp_ap;
-               /*
-                * Reset everything to support using non-enlightened VMCS
-                * access later (e.g. when we reload the module with
-                * enlightened_vmcs=0)
-                */
-               for_each_online_cpu(cpu) {
-                       vp_ap = hv_get_vp_assist_page(cpu);
-
-                       if (!vp_ap)
-                               continue;
-
-                       vp_ap->current_nested_vmcs = 0;
-                       vp_ap->enlighten_vmentry = 0;
-               }
-
-               static_branch_disable(&enable_evmcs);
-       }
-#endif
-}
-
-module_init(vmx_init)
-module_exit(vmx_exit)
+module_init(vmx_init);