KVM: X86: Reduce the overhead when lapic_timer_advance is disabled

[platform/kernel/linux-rpi.git] / arch / x86 / kvm / x86.c
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c

index 34c85aa..54d66f2 100644 (file)
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -107,6 +107,9 @@ EXPORT_SYMBOL_GPL(kvm_x86_ops);
  static bool __read_mostly ignore_msrs = 0;
  module_param(ignore_msrs, bool, S_IRUGO | S_IWUSR);
  
+static bool __read_mostly report_ignored_msrs = true;
+module_param(report_ignored_msrs, bool, S_IRUGO | S_IWUSR);
+
  unsigned int min_timer_period_us = 500;
  module_param(min_timer_period_us, uint, S_IRUGO | S_IWUSR);
  
@@ -791,6 +794,9 @@ int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
         if (!guest_cpuid_has(vcpu, X86_FEATURE_LA57) && (cr4 & X86_CR4_LA57))
                 return 1;
  
+       if (!guest_cpuid_has(vcpu, X86_FEATURE_UMIP) && (cr4 & X86_CR4_UMIP))
+               return 1;
+
         if (is_long_mode(vcpu)) {
                 if (!(cr4 & X86_CR4_PAE))
                         return 1;
@@ -1033,6 +1039,7 @@ static u32 emulated_msrs[] = {
         MSR_IA32_MCG_CTL,
         MSR_IA32_MCG_EXT_CTL,
         MSR_IA32_SMBASE,
+       MSR_SMI_COUNT,
         MSR_PLATFORM_INFO,
         MSR_MISC_FEATURES_ENABLES,
  };
@@ -1795,10 +1802,13 @@ u64 get_kvmclock_ns(struct kvm *kvm)
         /* both __this_cpu_read() and rdtsc() should be on the same cpu */
         get_cpu();
  
-       kvm_get_time_scale(NSEC_PER_SEC, __this_cpu_read(cpu_tsc_khz) * 1000LL,
-                          &hv_clock.tsc_shift,
-                          &hv_clock.tsc_to_system_mul);
-       ret = __pvclock_read_cycles(&hv_clock, rdtsc());
+       if (__this_cpu_read(cpu_tsc_khz)) {
+               kvm_get_time_scale(NSEC_PER_SEC, __this_cpu_read(cpu_tsc_khz) * 1000LL,
+                                  &hv_clock.tsc_shift,
+                                  &hv_clock.tsc_to_system_mul);
+               ret = __pvclock_read_cycles(&hv_clock, rdtsc());
+       } else
+               ret = ktime_get_boot_ns() + ka->kvmclock_offset;
  
         put_cpu();
  
@@ -1830,6 +1840,9 @@ static void kvm_setup_pvclock_page(struct kvm_vcpu *v)
          */
         BUILD_BUG_ON(offsetof(struct pvclock_vcpu_time_info, version) != 0);
  
+       if (guest_hv_clock.version & 1)
+               ++guest_hv_clock.version;  /* first time write, random junk */
+
         vcpu->hv_clock.version = guest_hv_clock.version + 1;
         kvm_write_guest_cached(v->kvm, &vcpu->pv_time,
                                 &vcpu->hv_clock,
@@ -2219,6 +2232,11 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
                         return 1;
                 vcpu->arch.smbase = data;
                 break;
+       case MSR_SMI_COUNT:
+               if (!msr_info->host_initiated)
+                       return 1;
+               vcpu->arch.smi_count = data;
+               break;
         case MSR_KVM_WALL_CLOCK_NEW:
         case MSR_KVM_WALL_CLOCK:
                 vcpu->kvm->arch.wall_clock = data;
@@ -2322,7 +2340,9 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
                 /* Drop writes to this legacy MSR -- see rdmsr
                  * counterpart for further detail.
                  */
-               vcpu_unimpl(vcpu, "ignored wrmsr: 0x%x data 0x%llx\n", msr, data);
+               if (report_ignored_msrs)
+                       vcpu_unimpl(vcpu, "ignored wrmsr: 0x%x data 0x%llx\n",
+                               msr, data);
                 break;
         case MSR_AMD64_OSVW_ID_LENGTH:
                 if (!guest_cpuid_has(vcpu, X86_FEATURE_OSVW))
@@ -2359,8 +2379,10 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
                                     msr, data);
                         return 1;
                 } else {
-                       vcpu_unimpl(vcpu, "ignored wrmsr: 0x%x data 0x%llx\n",
-                                   msr, data);
+                       if (report_ignored_msrs)
+                               vcpu_unimpl(vcpu,
+                                       "ignored wrmsr: 0x%x data 0x%llx\n",
+                                       msr, data);
                         break;
                 }
         }
@@ -2489,6 +2511,9 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
                         return 1;
                 msr_info->data = vcpu->arch.smbase;
                 break;
+       case MSR_SMI_COUNT:
+               msr_info->data = vcpu->arch.smi_count;
+               break;
         case MSR_IA32_PERF_STATUS:
                 /* TSC increment by tick */
                 msr_info->data = 1000ULL;
@@ -2578,7 +2603,9 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
                                                msr_info->index);
                         return 1;
                 } else {
-                       vcpu_unimpl(vcpu, "ignored rdmsr: 0x%x\n", msr_info->index);
+                       if (report_ignored_msrs)
+                               vcpu_unimpl(vcpu, "ignored rdmsr: 0x%x\n",
+                                       msr_info->index);
                         msr_info->data = 0;
                 }
                 break;
@@ -2922,7 +2949,6 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
         srcu_read_unlock(&vcpu->kvm->srcu, idx);
         pagefault_enable();
         kvm_x86_ops->vcpu_put(vcpu);
-       kvm_put_guest_fpu(vcpu);
         vcpu->arch.last_host_tsc = rdtsc();
  }
  
@@ -5237,17 +5263,6 @@ static void emulator_halt(struct x86_emulate_ctxt *ctxt)
         emul_to_vcpu(ctxt)->arch.halt_request = 1;
  }
  
-static void emulator_get_fpu(struct x86_emulate_ctxt *ctxt)
-{
-       preempt_disable();
-       kvm_load_guest_fpu(emul_to_vcpu(ctxt));
-}
-
-static void emulator_put_fpu(struct x86_emulate_ctxt *ctxt)
-{
-       preempt_enable();
-}
-
  static int emulator_intercept(struct x86_emulate_ctxt *ctxt,
                               struct x86_instruction_info *info,
                               enum x86_intercept_stage stage)
@@ -5325,8 +5340,6 @@ static const struct x86_emulate_ops emulate_ops = {
         .halt                = emulator_halt,
         .wbinvd              = emulator_wbinvd,
         .fix_hypercall       = emulator_fix_hypercall,
-       .get_fpu             = emulator_get_fpu,
-       .put_fpu             = emulator_put_fpu,
         .intercept           = emulator_intercept,
         .get_cpuid           = emulator_get_cpuid,
         .set_nmi_mask        = emulator_set_nmi_mask,
@@ -5430,7 +5443,7 @@ static int handle_emulation_failure(struct kvm_vcpu *vcpu)
                 vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
                 vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION;
                 vcpu->run->internal.ndata = 0;
-               r = EMULATE_FAIL;
+               r = EMULATE_USER_EXIT;
         }
         kvm_queue_exception(vcpu, UD_VECTOR);
  
@@ -5722,6 +5735,8 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu,
                         if (reexecute_instruction(vcpu, cr2, write_fault_to_spt,
                                                 emulation_type))
                                 return EMULATE_DONE;
+                       if (ctxt->have_exception && inject_emulated_exception(vcpu))
+                               return EMULATE_DONE;
                         if (emulation_type & EMULTYPE_SKIP)
                                 return EMULATE_FAIL;
                         return handle_emulation_failure(vcpu);
@@ -6445,6 +6460,7 @@ static int inject_pending_event(struct kvm_vcpu *vcpu, bool req_int_win)
                 kvm_x86_ops->queue_exception(vcpu);
         } else if (vcpu->arch.smi_pending && !is_smm(vcpu) && kvm_x86_ops->smi_allowed(vcpu)) {
                 vcpu->arch.smi_pending = false;
+               ++vcpu->arch.smi_count;
                 enter_smm(vcpu);
         } else if (vcpu->arch.nmi_pending && kvm_x86_ops->nmi_allowed(vcpu)) {
                 --vcpu->arch.nmi_pending;
@@ -6761,6 +6777,20 @@ static void kvm_vcpu_flush_tlb(struct kvm_vcpu *vcpu)
         kvm_x86_ops->tlb_flush(vcpu);
  }
  
+void kvm_arch_mmu_notifier_invalidate_range(struct kvm *kvm,
+               unsigned long start, unsigned long end)
+{
+       unsigned long apic_address;
+
+       /*
+        * The physical address of apic access page is stored in the VMCS.
+        * Update it when it becomes invalid.
+        */
+       apic_address = gfn_to_hva(kvm, APIC_DEFAULT_PHYS_BASE >> PAGE_SHIFT);
+       if (start <= apic_address && apic_address < end)
+               kvm_make_all_cpus_request(kvm, KVM_REQ_APIC_PAGE_RELOAD);
+}
+
  void kvm_vcpu_reload_apic_access_page(struct kvm_vcpu *vcpu)
  {
         struct page *page = NULL;
@@ -6935,7 +6965,6 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
         preempt_disable();
  
         kvm_x86_ops->prepare_guest_switch(vcpu);
-       kvm_load_guest_fpu(vcpu);
  
         /*
          * Disable IRQs before setting IN_GUEST_MODE.  Posted interrupt
@@ -6989,7 +7018,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
         }
  
         trace_kvm_entry(vcpu->vcpu_id);
-       wait_lapic_expire(vcpu);
+       if (lapic_timer_advance_ns)
+               wait_lapic_expire(vcpu);
         guest_enter_irqoff();
  
         if (unlikely(vcpu->arch.switch_db_regs)) {
@@ -7248,14 +7278,11 @@ static int complete_emulated_mmio(struct kvm_vcpu *vcpu)
  
  int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
  {
-       struct fpu *fpu = &current->thread.fpu;
         int r;
-       sigset_t sigsaved;
  
-       fpu__initialize(fpu);
+       kvm_sigset_activate(vcpu);
  
-       if (vcpu->sigset_active)
-               sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
+       kvm_load_guest_fpu(vcpu);
  
         if (unlikely(vcpu->arch.mp_state == KVM_MP_STATE_UNINITIALIZED)) {
                 if (kvm_run->immediate_exit) {
@@ -7297,9 +7324,9 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
                 r = vcpu_run(vcpu);
  
  out:
+       kvm_put_guest_fpu(vcpu);
         post_kvm_run_save(vcpu);
-       if (vcpu->sigset_active)
-               sigprocmask(SIG_SETMASK, &sigsaved, NULL);
+       kvm_sigset_deactivate(vcpu);
  
         return r;
  }
@@ -7367,7 +7394,7 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
  #endif
  
         kvm_rip_write(vcpu, regs->rip);
-       kvm_set_rflags(vcpu, regs->rflags);
+       kvm_set_rflags(vcpu, regs->rflags | X86_EFLAGS_FIXED);
  
         vcpu->arch.exception.pending = false;
  
@@ -7690,32 +7717,25 @@ static void fx_init(struct kvm_vcpu *vcpu)
         vcpu->arch.cr0 |= X86_CR0_ET;
  }
  
+/* Swap (qemu) user FPU context for the guest FPU context. */
  void kvm_load_guest_fpu(struct kvm_vcpu *vcpu)
  {
-       if (vcpu->guest_fpu_loaded)
-               return;
-
-       /*
-        * Restore all possible states in the guest,
-        * and assume host would use all available bits.
-        * Guest xcr0 would be loaded later.
-        */
-       vcpu->guest_fpu_loaded = 1;
-       __kernel_fpu_begin();
+       preempt_disable();
+       copy_fpregs_to_fpstate(&vcpu->arch.user_fpu);
         /* PKRU is separately restored in kvm_x86_ops->run.  */
         __copy_kernel_to_fpregs(&vcpu->arch.guest_fpu.state,
                                 ~XFEATURE_MASK_PKRU);
+       preempt_enable();
         trace_kvm_fpu(1);
  }
  
+/* When vcpu_run ends, restore user space FPU context. */
  void kvm_put_guest_fpu(struct kvm_vcpu *vcpu)
  {
-       if (!vcpu->guest_fpu_loaded)
-               return;
-
-       vcpu->guest_fpu_loaded = 0;
+       preempt_disable();
         copy_fpregs_to_fpstate(&vcpu->arch.guest_fpu);
-       __kernel_fpu_end();
+       copy_kernel_to_fpregs(&vcpu->arch.user_fpu.state);
+       preempt_enable();
         ++vcpu->stat.fpu_reload;
         trace_kvm_fpu(0);
  }
@@ -7799,6 +7819,7 @@ void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
         vcpu->arch.hflags = 0;
  
         vcpu->arch.smi_pending = 0;
+       vcpu->arch.smi_count = 0;
         atomic_set(&vcpu->arch.nmi_queued, 0);
         vcpu->arch.nmi_pending = 0;
         vcpu->arch.nmi_injected = false;
@@ -7832,7 +7853,8 @@ void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
                  * To avoid have the INIT path from kvm_apic_has_events() that be
                  * called with loaded FPU and does not let userspace fix the state.
                  */
-               kvm_put_guest_fpu(vcpu);
+               if (init_event)
+                       kvm_put_guest_fpu(vcpu);
                 mpx_state_buffer = get_xsave_addr(&vcpu->arch.guest_fpu.state.xsave,
                                         XFEATURE_MASK_BNDREGS);
                 if (mpx_state_buffer)
@@ -7841,6 +7863,8 @@ void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
                                         XFEATURE_MASK_BNDCSR);
                 if (mpx_state_buffer)
                         memset(mpx_state_buffer, 0, sizeof(struct mpx_bndcsr));
+               if (init_event)
+                       kvm_load_guest_fpu(vcpu);
         }
  
         if (!init_event) {