KVM: x86: Gracefully handle __vmalloc() failure during VM allocation

[platform/kernel/linux-rpi.git] / arch / x86 / kvm / vmx / vmx.c
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c

index ee74789..e0d16ba 100644 (file)
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -95,7 +95,7 @@ module_param(emulate_invalid_guest_state, bool, S_IRUGO);
  static bool __read_mostly fasteoi = 1;
  module_param(fasteoi, bool, S_IRUGO);
  
-static bool __read_mostly enable_apicv = 1;
+bool __read_mostly enable_apicv = 1;
  module_param(enable_apicv, bool, S_IRUGO);
  
  /*
@@ -648,43 +648,15 @@ void loaded_vmcs_init(struct loaded_vmcs *loaded_vmcs)
  }
  
  #ifdef CONFIG_KEXEC_CORE
-/*
- * This bitmap is used to indicate whether the vmclear
- * operation is enabled on all cpus. All disabled by
- * default.
- */
-static cpumask_t crash_vmclear_enabled_bitmap = CPU_MASK_NONE;
-
-static inline void crash_enable_local_vmclear(int cpu)
-{
-       cpumask_set_cpu(cpu, &crash_vmclear_enabled_bitmap);
-}
-
-static inline void crash_disable_local_vmclear(int cpu)
-{
-       cpumask_clear_cpu(cpu, &crash_vmclear_enabled_bitmap);
-}
-
-static inline int crash_local_vmclear_enabled(int cpu)
-{
-       return cpumask_test_cpu(cpu, &crash_vmclear_enabled_bitmap);
-}
-
  static void crash_vmclear_local_loaded_vmcss(void)
  {
         int cpu = raw_smp_processor_id();
         struct loaded_vmcs *v;
  
-       if (!crash_local_vmclear_enabled(cpu))
-               return;
-
         list_for_each_entry(v, &per_cpu(loaded_vmcss_on_cpu, cpu),
                             loaded_vmcss_on_cpu_link)
                 vmcs_clear(v->vmcs);
  }
-#else
-static inline void crash_enable_local_vmclear(int cpu) { }
-static inline void crash_disable_local_vmclear(int cpu) { }
  #endif /* CONFIG_KEXEC_CORE */
  
  static void __loaded_vmcs_clear(void *arg)
@@ -696,19 +668,24 @@ static void __loaded_vmcs_clear(void *arg)
                 return; /* vcpu migration can race with cpu offline */
         if (per_cpu(current_vmcs, cpu) == loaded_vmcs->vmcs)
                 per_cpu(current_vmcs, cpu) = NULL;
-       crash_disable_local_vmclear(cpu);
+
+       vmcs_clear(loaded_vmcs->vmcs);
+       if (loaded_vmcs->shadow_vmcs && loaded_vmcs->launched)
+               vmcs_clear(loaded_vmcs->shadow_vmcs);
+
         list_del(&loaded_vmcs->loaded_vmcss_on_cpu_link);
  
         /*
-        * we should ensure updating loaded_vmcs->loaded_vmcss_on_cpu_link
-        * is before setting loaded_vmcs->vcpu to -1 which is done in
-        * loaded_vmcs_init. Otherwise, other cpu can see vcpu = -1 fist
-        * then adds the vmcs into percpu list before it is deleted.
+        * Ensure all writes to loaded_vmcs, including deleting it from its
+        * current percpu list, complete before setting loaded_vmcs->vcpu to
+        * -1, otherwise a different cpu can see vcpu == -1 first and add
+        * loaded_vmcs to its percpu list before it's deleted from this cpu's
+        * list. Pairs with the smp_rmb() in vmx_vcpu_load_vmcs().
          */
         smp_wmb();
  
-       loaded_vmcs_init(loaded_vmcs);
-       crash_enable_local_vmclear(cpu);
+       loaded_vmcs->cpu = -1;
+       loaded_vmcs->launched = 0;
  }
  
  void loaded_vmcs_clear(struct loaded_vmcs *loaded_vmcs)
@@ -1317,18 +1294,17 @@ void vmx_vcpu_load_vmcs(struct kvm_vcpu *vcpu, int cpu)
         if (!already_loaded) {
                 loaded_vmcs_clear(vmx->loaded_vmcs);
                 local_irq_disable();
-               crash_disable_local_vmclear(cpu);
  
                 /*
-                * Read loaded_vmcs->cpu should be before fetching
-                * loaded_vmcs->loaded_vmcss_on_cpu_link.
-                * See the comments in __loaded_vmcs_clear().
+                * Ensure loaded_vmcs->cpu is read before adding loaded_vmcs to
+                * this cpu's percpu list, otherwise it may not yet be deleted
+                * from its previous cpu's percpu list.  Pairs with the
+                * smb_wmb() in __loaded_vmcs_clear().
                  */
                 smp_rmb();
  
                 list_add(&vmx->loaded_vmcs->loaded_vmcss_on_cpu_link,
                          &per_cpu(loaded_vmcss_on_cpu, cpu));
-               crash_enable_local_vmclear(cpu);
                 local_irq_enable();
         }
  
@@ -2256,17 +2232,6 @@ static int hardware_enable(void)
         INIT_LIST_HEAD(&per_cpu(blocked_vcpu_on_cpu, cpu));
         spin_lock_init(&per_cpu(blocked_vcpu_on_cpu_lock, cpu));
  
-       /*
-        * Now we can enable the vmclear operation in kdump
-        * since the loaded_vmcss_on_cpu list on this cpu
-        * has been initialized.
-        *
-        * Though the cpu is not in VMX operation now, there
-        * is no problem to enable the vmclear operation
-        * for the loaded_vmcss_on_cpu list is empty!
-        */
-       crash_enable_local_vmclear(cpu);
-
         rdmsrl(MSR_IA32_FEATURE_CONTROL, old);
  
         test_bits = FEATURE_CONTROL_LOCKED;
@@ -3853,24 +3818,29 @@ static int vmx_deliver_nested_posted_interrupt(struct kvm_vcpu *vcpu,
   * 2. If target vcpu isn't running(root mode), kick it to pick up the
   * interrupt from PIR in next vmentry.
   */
-static void vmx_deliver_posted_interrupt(struct kvm_vcpu *vcpu, int vector)
+static int vmx_deliver_posted_interrupt(struct kvm_vcpu *vcpu, int vector)
  {
         struct vcpu_vmx *vmx = to_vmx(vcpu);
         int r;
  
         r = vmx_deliver_nested_posted_interrupt(vcpu, vector);
         if (!r)
-               return;
+               return 0;
+
+       if (!vcpu->arch.apicv_active)
+               return -1;
  
         if (pi_test_and_set_pir(vector, &vmx->pi_desc))
-               return;
+               return 0;
  
         /* If a previous notification has sent the IPI, nothing to do.  */
         if (pi_test_and_set_on(&vmx->pi_desc))
-               return;
+               return 0;
  
         if (!kvm_vcpu_trigger_posted_interrupt(vcpu, false))
                 kvm_vcpu_kick(vcpu);
+
+       return 0;
  }
  
  /*
@@ -4500,8 +4470,13 @@ static int vmx_nmi_allowed(struct kvm_vcpu *vcpu)
  
  static int vmx_interrupt_allowed(struct kvm_vcpu *vcpu)
  {
-       return (!to_vmx(vcpu)->nested.nested_run_pending &&
-               vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_IF) &&
+       if (to_vmx(vcpu)->nested.nested_run_pending)
+               return false;
+
+       if (is_guest_mode(vcpu) && nested_exit_on_intr(vcpu))
+               return true;
+
+       return (vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_IF) &&
                 !(vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) &
                         (GUEST_INTR_STATE_STI | GUEST_INTR_STATE_MOV_SS));
  }
@@ -6679,6 +6654,10 @@ static struct kvm *vmx_vm_alloc(void)
         struct kvm_vmx *kvm_vmx = __vmalloc(sizeof(struct kvm_vmx),
                                             GFP_KERNEL_ACCOUNT | __GFP_ZERO,
                                             PAGE_KERNEL);
+
+       if (!kvm_vmx)
+               return NULL;
+
         return &kvm_vmx->kvm;
  }
  
@@ -6802,8 +6781,7 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)
  
         if (nested)
                 nested_vmx_setup_ctls_msrs(&vmx->nested.msrs,
-                                          vmx_capability.ept,
-                                          kvm_vcpu_apicv_active(&vmx->vcpu));
+                                          vmx_capability.ept);
         else
                 memset(&vmx->nested.msrs, 0, sizeof(vmx->nested.msrs));
  
@@ -6885,8 +6863,7 @@ static int __init vmx_check_processor_compat(void)
         if (setup_vmcs_config(&vmcs_conf, &vmx_cap) < 0)
                 return -EIO;
         if (nested)
-               nested_vmx_setup_ctls_msrs(&vmcs_conf.nested, vmx_cap.ept,
-                                          enable_apicv);
+               nested_vmx_setup_ctls_msrs(&vmcs_conf.nested, vmx_cap.ept);
         if (memcmp(&vmcs_config, &vmcs_conf, sizeof(struct vmcs_config)) != 0) {
                 printk(KERN_ERR "kvm: CPU %d feature inconsistency!\n",
                                 smp_processor_id());
@@ -7132,6 +7109,40 @@ static void vmx_request_immediate_exit(struct kvm_vcpu *vcpu)
         to_vmx(vcpu)->req_immediate_exit = true;
  }
  
+static int vmx_check_intercept_io(struct kvm_vcpu *vcpu,
+                                 struct x86_instruction_info *info)
+{
+       struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
+       unsigned short port;
+       bool intercept;
+       int size;
+
+       if (info->intercept == x86_intercept_in ||
+           info->intercept == x86_intercept_ins) {
+               port = info->src_val;
+               size = info->dst_bytes;
+       } else {
+               port = info->dst_val;
+               size = info->src_bytes;
+       }
+
+       /*
+        * If the 'use IO bitmaps' VM-execution control is 0, IO instruction
+        * VM-exits depend on the 'unconditional IO exiting' VM-execution
+        * control.
+        *
+        * Otherwise, IO instruction VM-exits are controlled by the IO bitmaps.
+        */
+       if (!nested_cpu_has(vmcs12, CPU_BASED_USE_IO_BITMAPS))
+               intercept = nested_cpu_has(vmcs12,
+                                          CPU_BASED_UNCOND_IO_EXITING);
+       else
+               intercept = nested_vmx_check_io_bitmaps(vcpu, port, size);
+
+       /* FIXME: produce nested vmexit and return X86EMUL_INTERCEPTED.  */
+       return intercept ? X86EMUL_UNHANDLEABLE : X86EMUL_CONTINUE;
+}
+
  static int vmx_check_intercept(struct kvm_vcpu *vcpu,
                                struct x86_instruction_info *info,
                                enum x86_intercept_stage stage)
@@ -7139,18 +7150,44 @@ static int vmx_check_intercept(struct kvm_vcpu *vcpu,
         struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
         struct x86_emulate_ctxt *ctxt = &vcpu->arch.emulate_ctxt;
  
+       switch (info->intercept) {
         /*
          * RDPID causes #UD if disabled through secondary execution controls.
          * Because it is marked as EmulateOnUD, we need to intercept it here.
          */
-       if (info->intercept == x86_intercept_rdtscp &&
-           !nested_cpu_has2(vmcs12, SECONDARY_EXEC_RDTSCP)) {
-               ctxt->exception.vector = UD_VECTOR;
-               ctxt->exception.error_code_valid = false;
-               return X86EMUL_PROPAGATE_FAULT;
-       }
+       case x86_intercept_rdtscp:
+               if (!nested_cpu_has2(vmcs12, SECONDARY_EXEC_RDTSCP)) {
+                       ctxt->exception.vector = UD_VECTOR;
+                       ctxt->exception.error_code_valid = false;
+                       return X86EMUL_PROPAGATE_FAULT;
+               }
+               break;
+
+       case x86_intercept_in:
+       case x86_intercept_ins:
+       case x86_intercept_out:
+       case x86_intercept_outs:
+               return vmx_check_intercept_io(vcpu, info);
+
+       case x86_intercept_lgdt:
+       case x86_intercept_lidt:
+       case x86_intercept_lldt:
+       case x86_intercept_ltr:
+       case x86_intercept_sgdt:
+       case x86_intercept_sidt:
+       case x86_intercept_sldt:
+       case x86_intercept_str:
+               if (!nested_cpu_has2(vmcs12, SECONDARY_EXEC_DESC))
+                       return X86EMUL_CONTINUE;
+
+               /* FIXME: produce nested vmexit and return X86EMUL_INTERCEPTED.  */
+               break;
  
         /* TODO: check more intercepts... */
+       default:
+               break;
+       }
+
         return X86EMUL_UNHANDLEABLE;
  }
  
@@ -7736,7 +7773,7 @@ static __init int hardware_setup(void)
  
         if (nested) {
                 nested_vmx_setup_ctls_msrs(&vmcs_config.nested,
-                                          vmx_capability.ept, enable_apicv);
+                                          vmx_capability.ept);
  
                 r = nested_vmx_hardware_setup(kvm_vmx_exit_handlers);
                 if (r)