Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm

author Linus Torvalds <torvalds@linux-foundation.org>

Wed, 24 Jun 2015 16:36:49 +0000 (09:36 -0700)

committer Linus Torvalds <torvalds@linux-foundation.org>

Wed, 24 Jun 2015 16:36:49 +0000 (09:36 -0700)
author Linus Torvalds <torvalds@linux-foundation.org>
Wed, 24 Jun 2015 16:36:49 +0000 (09:36 -0700)
committer Linus Torvalds <torvalds@linux-foundation.org>
Wed, 24 Jun 2015 16:36:49 +0000 (09:36 -0700)
diff --combined arch/s390/kvm/interrupt.c

index 0d3deef,480b975..c98d897
--- 1/arch/s390/kvm/interrupt.c
--- 2/arch/s390/kvm/interrupt.c
+++ b/arch/s390/kvm/interrupt.c
@@@ -134,6 -134,8 +134,8 @@@ static unsigned long deliverable_irqs(s
   
         active_mask = pending_local_irqs(vcpu);
         active_mask |= pending_floating_irqs(vcpu);
+       if (!active_mask)
+               return 0;
   
         if (psw_extint_disabled(vcpu))
                 active_mask &= ~IRQ_PEND_EXT_MASK;
@@@ -799,7 -801,7 +801,7 @@@ int kvm_s390_ext_call_pending(struct kv
         struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
         uint8_t sigp_ctrl = vcpu->kvm->arch.sca->cpu[vcpu->vcpu_id].sigp_ctrl;
   
- -      if (!sclp_has_sigpif())
+ +      if (!sclp.has_sigpif)
                 return test_bit(IRQ_PEND_EXT_EXTERNAL, &li->pending_irqs);
   
         return (sigp_ctrl & SIGP_CTRL_C) &&
@@@ -941,12 -943,9 +943,9 @@@ int __must_check kvm_s390_deliver_pendi
         if (cpu_timer_irq_pending(vcpu))
                 set_bit(IRQ_PEND_EXT_CPU_TIMER, &li->pending_irqs);
   
-       do {
-               irqs = deliverable_irqs(vcpu);
+       while ((irqs = deliverable_irqs(vcpu)) && !rc) {
                 /* bits are in the order of interrupt priority */
                 irq_type = find_first_bit(&irqs, IRQ_PEND_COUNT);
-               if (irq_type == IRQ_PEND_COUNT)
-                       break;
                 if (is_ioirq(irq_type)) {
                         rc = __deliver_io(vcpu, irq_type);
                 } else {
@@@ -958,9 -957,7 +957,7 @@@
                         }
                         rc = func(vcpu);
                 }
-               if (rc)
-                       break;
-       } while (!rc);
+       }
   
         set_intercept_indicators(vcpu);
   
@@@ -1058,10 -1055,10 +1055,10 @@@ static int __inject_extcall(struct kvm_
             kvm_get_vcpu(vcpu->kvm, src_id) == NULL)
                 return -EINVAL;
   
- -      if (sclp_has_sigpif())
+ +      if (sclp.has_sigpif)
                 return __inject_extcall_sigpif(vcpu, src_id);
   
-       if (!test_and_set_bit(IRQ_PEND_EXT_EXTERNAL, &li->pending_irqs))
+       if (test_and_set_bit(IRQ_PEND_EXT_EXTERNAL, &li->pending_irqs))
                 return -EBUSY;
         *extcall = irq->u.extcall;
         atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags);
@@@ -1340,12 -1337,54 +1337,54 @@@ static int __inject_io(struct kvm *kvm
         return 0;
   }
   
- static int __inject_vm(struct kvm *kvm, struct kvm_s390_interrupt_info *inti)
+ /*
+  * Find a destination VCPU for a floating irq and kick it.
+  */
+ static void __floating_irq_kick(struct kvm *kvm, u64 type)
   {
+       struct kvm_s390_float_interrupt *fi = &kvm->arch.float_int;
         struct kvm_s390_local_interrupt *li;
+       struct kvm_vcpu *dst_vcpu;
+       int sigcpu, online_vcpus, nr_tries = 0;
+ 
+       online_vcpus = atomic_read(&kvm->online_vcpus);
+       if (!online_vcpus)
+               return;
+ 
+       /* find idle VCPUs first, then round robin */
+       sigcpu = find_first_bit(fi->idle_mask, online_vcpus);
+       if (sigcpu == online_vcpus) {
+               do {
+                       sigcpu = fi->next_rr_cpu;
+                       fi->next_rr_cpu = (fi->next_rr_cpu + 1) % online_vcpus;
+                       /* avoid endless loops if all vcpus are stopped */
+                       if (nr_tries++ >= online_vcpus)
+                               return;
+               } while (is_vcpu_stopped(kvm_get_vcpu(kvm, sigcpu)));
+       }
+       dst_vcpu = kvm_get_vcpu(kvm, sigcpu);
+ 
+       /* make the VCPU drop out of the SIE, or wake it up if sleeping */
+       li = &dst_vcpu->arch.local_int;
+       spin_lock(&li->lock);
+       switch (type) {
+       case KVM_S390_MCHK:
+               atomic_set_mask(CPUSTAT_STOP_INT, li->cpuflags);
+               break;
+       case KVM_S390_INT_IO_MIN...KVM_S390_INT_IO_MAX:
+               atomic_set_mask(CPUSTAT_IO_INT, li->cpuflags);
+               break;
+       default:
+               atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags);
+               break;
+       }
+       spin_unlock(&li->lock);
+       kvm_s390_vcpu_wakeup(dst_vcpu);
+ }
+ 
+ static int __inject_vm(struct kvm *kvm, struct kvm_s390_interrupt_info *inti)
+ {
         struct kvm_s390_float_interrupt *fi;
-       struct kvm_vcpu *dst_vcpu = NULL;
-       int sigcpu;
         u64 type = READ_ONCE(inti->type);
         int rc;
   
@@@ -1373,32 -1412,8 +1412,8 @@@
         if (rc)
                 return rc;
   
-       sigcpu = find_first_bit(fi->idle_mask, KVM_MAX_VCPUS);
-       if (sigcpu == KVM_MAX_VCPUS) {
-               do {
-                       sigcpu = fi->next_rr_cpu++;
-                       if (sigcpu == KVM_MAX_VCPUS)
-                               sigcpu = fi->next_rr_cpu = 0;
-               } while (kvm_get_vcpu(kvm, sigcpu) == NULL);
-       }
-       dst_vcpu = kvm_get_vcpu(kvm, sigcpu);
-       li = &dst_vcpu->arch.local_int;
-       spin_lock(&li->lock);
-       switch (type) {
-       case KVM_S390_MCHK:
-               atomic_set_mask(CPUSTAT_STOP_INT, li->cpuflags);
-               break;
-       case KVM_S390_INT_IO_MIN...KVM_S390_INT_IO_MAX:
-               atomic_set_mask(CPUSTAT_IO_INT, li->cpuflags);
-               break;
-       default:
-               atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags);
-               break;
-       }
-       spin_unlock(&li->lock);
-       kvm_s390_vcpu_wakeup(kvm_get_vcpu(kvm, sigcpu));
+       __floating_irq_kick(kvm, type);
         return 0;
- 
   }
   
   int kvm_s390_inject_vm(struct kvm *kvm,
@@@ -1606,6 -1621,9 +1621,9 @@@ void kvm_s390_clear_float_irqs(struct k
         int i;
   
         spin_lock(&fi->lock);
+       fi->pending_irqs = 0;
+       memset(&fi->srv_signal, 0, sizeof(fi->srv_signal));
+       memset(&fi->mchk, 0, sizeof(fi->mchk));
         for (i = 0; i < FIRQ_LIST_COUNT; i++)
                 clear_irq_list(&fi->lists[i]);
         for (i = 0; i < FIRQ_MAX_COUNT; i++)
diff --combined arch/s390/kvm/kvm-s390.c

index c4e81b2,71530a4..2078f92
--- 1/arch/s390/kvm/kvm-s390.c
--- 2/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@@ -36,6 -36,10 +36,10 @@@
   #include "kvm-s390.h"
   #include "gaccess.h"
   
+ #define KMSG_COMPONENT "kvm-s390"
+ #undef pr_fmt
+ #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+ 
   #define CREATE_TRACE_POINTS
   #include "trace.h"
   #include "trace-s390.h"
@@@ -110,7 -114,7 +114,7 @@@ struct kvm_stats_debugfs_item debugfs_e
   /* upper facilities limit for kvm */
   unsigned long kvm_s390_fac_list_mask[] = {
         0xffe6fffbfcfdfc40UL,
-       0x005c800000000000UL,
+       0x005e800000000000UL,
   };
   
   unsigned long kvm_s390_fac_list_mask_size(void)
@@@ -236,6 -240,7 +240,7 @@@ int kvm_vm_ioctl_get_dirty_log(struct k
   {
         int r;
         unsigned long n;
+       struct kvm_memslots *slots;
         struct kvm_memory_slot *memslot;
         int is_dirty = 0;
   
@@@ -245,7 -250,8 +250,8 @@@
         if (log->slot >= KVM_USER_MEM_SLOTS)
                 goto out;
   
-       memslot = id_to_memslot(kvm->memslots, log->slot);
+       slots = kvm_memslots(kvm);
+       memslot = id_to_memslot(slots, log->slot);
         r = -ENOENT;
         if (!memslot->dirty_bitmap)
                 goto out;
@@@ -454,10 -460,10 +460,10 @@@ static int kvm_s390_set_tod_low(struct 
   
         mutex_lock(&kvm->lock);
         kvm->arch.epoch = gtod - host_tod;
-       kvm_for_each_vcpu(vcpu_idx, cur_vcpu, kvm) {
+       kvm_s390_vcpu_block_all(kvm);
+       kvm_for_each_vcpu(vcpu_idx, cur_vcpu, kvm)
                 cur_vcpu->arch.sie_block->epoch = kvm->arch.epoch;
-               exit_sie(cur_vcpu);
-       }
+       kvm_s390_vcpu_unblock_all(kvm);
         mutex_unlock(&kvm->lock);
         return 0;
   }
@@@ -604,7 -610,7 +610,7 @@@ static int kvm_s390_get_machine(struct 
                 goto out;
         }
         get_cpu_id((struct cpuid *) &mach->cpuid);
- -      mach->ibc = sclp_get_ibc();
+ +      mach->ibc = sclp.ibc;
         memcpy(&mach->fac_mask, kvm->arch.model.fac->mask,
                S390_ARCH_FAC_LIST_SIZE_BYTE);
         memcpy((unsigned long *)&mach->fac_list, S390_lowcore.stfle_fac_list,
@@@ -1068,7 -1074,7 +1074,7 @@@ int kvm_arch_init_vm(struct kvm *kvm, u
                S390_ARCH_FAC_LIST_SIZE_BYTE);
   
         kvm_s390_get_cpu_id(&kvm->arch.model.cpu_id);
- -      kvm->arch.model.ibc = sclp_get_ibc() & 0x0fff;
+ +      kvm->arch.model.ibc = sclp.ibc & 0x0fff;
   
         if (kvm_s390_crypto_init(kvm) < 0)
                 goto out_err;
@@@ -1311,8 -1317,13 +1317,13 @@@ int kvm_arch_vcpu_setup(struct kvm_vcp
   
         atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH |
                                                     CPUSTAT_SM |
-                                                   CPUSTAT_STOPPED |
-                                                   CPUSTAT_GED);
+                                                   CPUSTAT_STOPPED);
+ 
+       if (test_kvm_facility(vcpu->kvm, 78))
+               atomic_set_mask(CPUSTAT_GED2, &vcpu->arch.sie_block->cpuflags);
+       else if (test_kvm_facility(vcpu->kvm, 8))
+               atomic_set_mask(CPUSTAT_GED, &vcpu->arch.sie_block->cpuflags);
+ 
         kvm_s390_vcpu_setup_model(vcpu);
   
         vcpu->arch.sie_block->ecb   = 6;
@@@ -1321,9 -1332,9 +1332,9 @@@
   
         vcpu->arch.sie_block->ecb2  = 8;
         vcpu->arch.sie_block->eca   = 0xC1002000U;
- -      if (sclp_has_siif())
+ +      if (sclp.has_siif)
                 vcpu->arch.sie_block->eca |= 1;
- -      if (sclp_has_sigpif())
+ +      if (sclp.has_sigpif)
                 vcpu->arch.sie_block->eca |= 0x10000000U;
         if (test_kvm_facility(vcpu->kvm, 129)) {
                 vcpu->arch.sie_block->eca |= 0x00020000;
@@@ -1409,16 -1420,28 +1420,28 @@@ int kvm_arch_vcpu_runnable(struct kvm_v
         return kvm_s390_vcpu_has_irq(vcpu, 0);
   }
   
- void s390_vcpu_block(struct kvm_vcpu *vcpu)
+ void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu)
   {
         atomic_set_mask(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
+       exit_sie(vcpu);
   }
   
- void s390_vcpu_unblock(struct kvm_vcpu *vcpu)
+ void kvm_s390_vcpu_unblock(struct kvm_vcpu *vcpu)
   {
         atomic_clear_mask(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
   }
   
+ static void kvm_s390_vcpu_request(struct kvm_vcpu *vcpu)
+ {
+       atomic_set_mask(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
+       exit_sie(vcpu);
+ }
+ 
+ static void kvm_s390_vcpu_request_handled(struct kvm_vcpu *vcpu)
+ {
+       atomic_clear_mask(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
+ }
+ 
   /*
    * Kick a guest cpu out of SIE and wait until SIE is not running.
    * If the CPU is not running (e.g. waiting as idle) the function will
@@@ -1430,11 -1453,11 +1453,11 @@@ void exit_sie(struct kvm_vcpu *vcpu
                 cpu_relax();
   }
   
- /* Kick a guest cpu out of SIE and prevent SIE-reentry */
- void exit_sie_sync(struct kvm_vcpu *vcpu)
+ /* Kick a guest cpu out of SIE to process a request synchronously */
+ void kvm_s390_sync_request(int req, struct kvm_vcpu *vcpu)
   {
-       s390_vcpu_block(vcpu);
-       exit_sie(vcpu);
+       kvm_make_request(req, vcpu);
+       kvm_s390_vcpu_request(vcpu);
   }
   
   static void kvm_gmap_notifier(struct gmap *gmap, unsigned long address)
@@@ -1447,8 -1470,7 +1470,7 @@@
                 /* match against both prefix pages */
                 if (kvm_s390_get_prefix(vcpu) == (address & ~0x1000UL)) {
                         VCPU_EVENT(vcpu, 2, "gmap notifier for %lx", address);
-                       kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu);
-                       exit_sie_sync(vcpu);
+                       kvm_s390_sync_request(KVM_REQ_MMU_RELOAD, vcpu);
                 }
         }
   }
@@@ -1720,8 -1742,10 +1742,10 @@@ static bool ibs_enabled(struct kvm_vcp
   
   static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
   {
+       if (!vcpu->requests)
+               return 0;
   retry:
-       s390_vcpu_unblock(vcpu);
+       kvm_s390_vcpu_request_handled(vcpu);
         /*
          * We use MMU_RELOAD just to re-arm the ipte notifier for the
          * guest prefix page. gmap_ipte_notify will wait on the ptl lock.
@@@ -1993,12 -2017,14 +2017,14 @@@ static int __vcpu_run(struct kvm_vcpu *
                  * As PF_VCPU will be used in fault handler, between
                  * guest_enter and guest_exit should be no uaccess.
                  */
-               preempt_disable();
-               kvm_guest_enter();
-               preempt_enable();
+               local_irq_disable();
+               __kvm_guest_enter();
+               local_irq_enable();
                 exit_reason = sie64a(vcpu->arch.sie_block,
                                      vcpu->run->s.regs.gprs);
-               kvm_guest_exit();
+               local_irq_disable();
+               __kvm_guest_exit();
+               local_irq_enable();
                 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
   
                 rc = vcpu_post_run(vcpu, exit_reason);
@@@ -2068,7 -2094,7 +2094,7 @@@ int kvm_arch_vcpu_ioctl_run(struct kvm_
         if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) {
                 kvm_s390_vcpu_start(vcpu);
         } else if (is_vcpu_stopped(vcpu)) {
-               pr_err_ratelimited("kvm-s390: can't run stopped vcpu %d\n",
+               pr_err_ratelimited("can't run stopped vcpu %d\n",
                                    vcpu->vcpu_id);
                 return -EINVAL;
         }
@@@ -2206,8 -2232,7 +2232,7 @@@ int kvm_s390_vcpu_store_adtl_status(str
   static void __disable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
   {
         kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu);
-       kvm_make_request(KVM_REQ_DISABLE_IBS, vcpu);
-       exit_sie_sync(vcpu);
+       kvm_s390_sync_request(KVM_REQ_DISABLE_IBS, vcpu);
   }
   
   static void __disable_ibs_on_all_vcpus(struct kvm *kvm)
@@@ -2223,8 -2248,7 +2248,7 @@@
   static void __enable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
   {
         kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu);
-       kvm_make_request(KVM_REQ_ENABLE_IBS, vcpu);
-       exit_sie_sync(vcpu);
+       kvm_s390_sync_request(KVM_REQ_ENABLE_IBS, vcpu);
   }
   
   void kvm_s390_vcpu_start(struct kvm_vcpu *vcpu)
@@@ -2563,7 -2587,7 +2587,7 @@@ int kvm_arch_create_memslot(struct kvm 
   /* Section: memory related */
   int kvm_arch_prepare_memory_region(struct kvm *kvm,
                                    struct kvm_memory_slot *memslot,
-                                  struct kvm_userspace_memory_region *mem,
+                                  const struct kvm_userspace_memory_region *mem,
                                    enum kvm_mr_change change)
   {
         /* A few sanity checks. We can have memory slots which have to be
@@@ -2581,8 -2605,9 +2605,9 @@@
   }
   
   void kvm_arch_commit_memory_region(struct kvm *kvm,
-                               struct kvm_userspace_memory_region *mem,
+                               const struct kvm_userspace_memory_region *mem,
                                 const struct kvm_memory_slot *old,
+                               const struct kvm_memory_slot *new,
                                 enum kvm_mr_change change)
   {
         int rc;
@@@ -2601,7 -2626,7 +2626,7 @@@
         rc = gmap_map_segment(kvm->arch.gmap, mem->userspace_addr,
                 mem->guest_phys_addr, mem->memory_size);
         if (rc)
-               printk(KERN_WARNING "kvm-s390: failed to commit memory region\n");
+               pr_warn("failed to commit memory region\n");
         return;
   }
   
diff --combined arch/x86/include/asm/kvm_host.h

index f8c0ec3,5a2b450..c7fa57b
--- 1/arch/x86/include/asm/kvm_host.h
--- 2/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@@ -184,23 -184,12 +184,12 @@@ struct kvm_mmu_memory_cache 
         void *objects[KVM_NR_MEM_OBJS];
   };
   
- /*
-  * kvm_mmu_page_role, below, is defined as:
-  *
-  *   bits 0:3 - total guest paging levels (2-4, or zero for real mode)
-  *   bits 4:7 - page table level for this shadow (1-4)
-  *   bits 8:9 - page table quadrant for 2-level guests
-  *   bit   16 - direct mapping of virtual to physical mapping at gfn
-  *              used for real mode and two-dimensional paging
-  *   bits 17:19 - common access permissions for all ptes in this shadow page
-  */
   union kvm_mmu_page_role {
         unsigned word;
         struct {
                 unsigned level:4;
                 unsigned cr4_pae:1;
                 unsigned quadrant:2;
-               unsigned pad_for_nice_hex_output:6;
                 unsigned direct:1;
                 unsigned access:3;
                 unsigned invalid:1;
@@@ -208,6 -197,15 +197,15 @@@
                 unsigned cr0_wp:1;
                 unsigned smep_andnot_wp:1;
                 unsigned smap_andnot_wp:1;
+               unsigned :8;
+ 
+               /*
+                * This is left at the top of the word so that
+                * kvm_memslots_for_spte_role can extract it with a
+                * simple shift.  While there is room, give it a whole
+                * byte so it is also faster to load it from memory.
+                */
+               unsigned smm:8;
         };
   };
   
@@@ -338,12 -336,28 +336,28 @@@ struct kvm_pmu 
         u64 reprogram_pmi;
   };
   
+ struct kvm_pmu_ops;
+ 
   enum {
         KVM_DEBUGREG_BP_ENABLED = 1,
         KVM_DEBUGREG_WONT_EXIT = 2,
         KVM_DEBUGREG_RELOAD = 4,
   };
   
+ struct kvm_mtrr_range {
+       u64 base;
+       u64 mask;
+       struct list_head node;
+ };
+ 
+ struct kvm_mtrr {
+       struct kvm_mtrr_range var_ranges[KVM_NR_VAR_MTRR];
+       mtrr_type fixed_ranges[KVM_NR_FIXED_MTRR_REGION];
+       u64 deftype;
+ 
+       struct list_head head;
+ };
+ 
   struct kvm_vcpu_arch {
         /*
          * rip and regs accesses must go through
@@@ -368,6 -382,7 +382,7 @@@
         int32_t apic_arb_prio;
         int mp_state;
         u64 ia32_misc_enable_msr;
+       u64 smbase;
         bool tpr_access_reporting;
         u64 ia32_xss;
   
@@@ -471,8 -486,9 +486,9 @@@
         atomic_t nmi_queued;  /* unprocessed asynchronous NMIs */
         unsigned nmi_pending; /* NMI queued after currently running handler */
         bool nmi_injected;    /* Trying to inject an NMI this entry */
+       bool smi_pending;    /* SMI queued after currently running handler */
   
-       struct mtrr_state_type mtrr_state;
+       struct kvm_mtrr mtrr_state;
         u64 pat;
   
         unsigned switch_db_regs;
@@@ -637,6 -653,8 +653,8 @@@ struct kvm_arch 
         #endif
   
         bool boot_vcpu_runs_old_kvmclock;
+ 
+       u64 disabled_quirks;
   };
   
   struct kvm_vm_stat {
@@@ -689,12 -707,13 +707,13 @@@ struct msr_data 
   
   struct kvm_lapic_irq {
         u32 vector;
-       u32 delivery_mode;
-       u32 dest_mode;
-       u32 level;
-       u32 trig_mode;
+       u16 delivery_mode;
+       u16 dest_mode;
+       bool level;
+       u16 trig_mode;
         u32 shorthand;
         u32 dest_id;
+       bool msi_redir_hint;
   };
   
   struct kvm_x86_ops {
@@@ -706,19 -725,20 +725,20 @@@
         int (*hardware_setup)(void);               /* __init */
         void (*hardware_unsetup)(void);            /* __exit */
         bool (*cpu_has_accelerated_tpr)(void);
+       bool (*cpu_has_high_real_mode_segbase)(void);
         void (*cpuid_update)(struct kvm_vcpu *vcpu);
   
         /* Create, but do not attach this VCPU */
         struct kvm_vcpu *(*vcpu_create)(struct kvm *kvm, unsigned id);
         void (*vcpu_free)(struct kvm_vcpu *vcpu);
-       void (*vcpu_reset)(struct kvm_vcpu *vcpu);
+       void (*vcpu_reset)(struct kvm_vcpu *vcpu, bool init_event);
   
         void (*prepare_guest_switch)(struct kvm_vcpu *vcpu);
         void (*vcpu_load)(struct kvm_vcpu *vcpu, int cpu);
         void (*vcpu_put)(struct kvm_vcpu *vcpu);
   
         void (*update_db_bp_intercept)(struct kvm_vcpu *vcpu);
-       int (*get_msr)(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata);
+       int (*get_msr)(struct kvm_vcpu *vcpu, struct msr_data *msr);
         int (*set_msr)(struct kvm_vcpu *vcpu, struct msr_data *msr);
         u64 (*get_segment_base)(struct kvm_vcpu *vcpu, int seg);
         void (*get_segment)(struct kvm_vcpu *vcpu,
@@@ -836,6 -856,8 +856,8 @@@
         void (*enable_log_dirty_pt_masked)(struct kvm *kvm,
                                            struct kvm_memory_slot *slot,
                                            gfn_t offset, unsigned long mask);
+       /* pmu operations of sub-arch */
+       const struct kvm_pmu_ops *pmu_ops;
   };
   
   struct kvm_arch_async_pf {
@@@ -871,7 -893,7 +893,7 @@@ void kvm_mmu_reset_context(struct kvm_v
   void kvm_mmu_slot_remove_write_access(struct kvm *kvm,
                                       struct kvm_memory_slot *memslot);
   void kvm_mmu_zap_collapsible_sptes(struct kvm *kvm,
-                                       struct kvm_memory_slot *memslot);
+                                  const struct kvm_memory_slot *memslot);
   void kvm_mmu_slot_leaf_clear_dirty(struct kvm *kvm,
                                    struct kvm_memory_slot *memslot);
   void kvm_mmu_slot_largepage_remove_write_access(struct kvm *kvm,
@@@ -882,7 -904,7 +904,7 @@@ void kvm_mmu_clear_dirty_pt_masked(stru
                                    struct kvm_memory_slot *slot,
                                    gfn_t gfn_offset, unsigned long mask);
   void kvm_mmu_zap_all(struct kvm *kvm);
- void kvm_mmu_invalidate_mmio_sptes(struct kvm *kvm);
+ void kvm_mmu_invalidate_mmio_sptes(struct kvm *kvm, struct kvm_memslots *slots);
   unsigned int kvm_mmu_calculate_mmu_pages(struct kvm *kvm);
   void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned int kvm_nr_mmu_pages);
   
@@@ -890,7 -912,6 +912,6 @@@ int load_pdptrs(struct kvm_vcpu *vcpu, 
   
   int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa,
                           const void *val, int bytes);
- u8 kvm_get_guest_memory_type(struct kvm_vcpu *vcpu, gfn_t gfn);
   
   struct kvm_irq_mask_notifier {
         void (*func)(struct kvm_irq_mask_notifier *kimn, bool masked);
@@@ -938,7 -959,7 +959,7 @@@ static inline int emulate_instruction(s
   
   void kvm_enable_efer_bits(u64);
   bool kvm_valid_efer(struct kvm_vcpu *vcpu, u64 efer);
- int kvm_get_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *data);
+ int kvm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr);
   int kvm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr);
   
   struct x86_emulate_ctxt;
@@@ -967,7 -988,7 +988,7 @@@ void kvm_lmsw(struct kvm_vcpu *vcpu, un
   void kvm_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l);
   int kvm_set_xcr(struct kvm_vcpu *vcpu, u32 index, u64 xcr);
   
- int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata);
+ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr);
   int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr);
   
   unsigned long kvm_get_rflags(struct kvm_vcpu *vcpu);
@@@ -1002,6 -1023,8 +1023,6 @@@ void kvm_pic_clear_all(struct kvm_pic *
   
   void kvm_inject_nmi(struct kvm_vcpu *vcpu);
   
- -int fx_init(struct kvm_vcpu *vcpu, bool init_event);
- -
   void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
                        const u8 *new, int bytes);
   int kvm_mmu_unprotect_page(struct kvm *kvm, gfn_t gfn);
@@@ -1110,6 -1133,14 +1131,14 @@@ enum 
   #define HF_NMI_MASK           (1 << 3)
   #define HF_IRET_MASK          (1 << 4)
   #define HF_GUEST_MASK         (1 << 5) /* VCPU is in guest-mode */
+ #define HF_SMM_MASK           (1 << 6)
+ #define HF_SMM_INSIDE_NMI_MASK        (1 << 7)
+ 
+ #define __KVM_VCPU_MULTIPLE_ADDRESS_SPACE
+ #define KVM_ADDRESS_SPACE_NUM 2
+ 
+ #define kvm_arch_vcpu_memslots_id(vcpu) ((vcpu)->arch.hflags & HF_SMM_MASK ? 1 : 0)
+ #define kvm_memslots_for_spte_role(kvm, role) __kvm_memslots(kvm, (role).smm)
   
   /*
    * Hardware virtualization extension instructions may fault if a
@@@ -1144,7 -1175,7 +1173,7 @@@ int kvm_cpu_has_injectable_intr(struct 
   int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu);
   int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu);
   int kvm_cpu_get_interrupt(struct kvm_vcpu *v);
- void kvm_vcpu_reset(struct kvm_vcpu *vcpu);
+ void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event);
   void kvm_vcpu_reload_apic_access_page(struct kvm_vcpu *vcpu);
   void kvm_arch_mmu_notifier_invalidate_page(struct kvm *kvm,
                                            unsigned long address);
@@@ -1168,16 -1199,9 +1197,9 @@@ void kvm_complete_insn_gp(struct kvm_vc
   
   int kvm_is_in_guest(void);
   
- void kvm_pmu_init(struct kvm_vcpu *vcpu);
- void kvm_pmu_destroy(struct kvm_vcpu *vcpu);
- void kvm_pmu_reset(struct kvm_vcpu *vcpu);
- void kvm_pmu_cpuid_update(struct kvm_vcpu *vcpu);
- bool kvm_pmu_msr(struct kvm_vcpu *vcpu, u32 msr);
- int kvm_pmu_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *data);
- int kvm_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info);
- int kvm_pmu_check_pmc(struct kvm_vcpu *vcpu, unsigned pmc);
- int kvm_pmu_read_pmc(struct kvm_vcpu *vcpu, unsigned pmc, u64 *data);
- void kvm_handle_pmu_event(struct kvm_vcpu *vcpu);
- void kvm_deliver_pmi(struct kvm_vcpu *vcpu);
+ int __x86_set_memory_region(struct kvm *kvm,
+                           const struct kvm_userspace_memory_region *mem);
+ int x86_set_memory_region(struct kvm *kvm,
+                         const struct kvm_userspace_memory_region *mem);
   
   #endif /* _ASM_X86_KVM_HOST_H */
diff --combined arch/x86/kernel/kvm.c

index 1681504,cc34cec..47190bd
--- 1/arch/x86/kernel/kvm.c
--- 2/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@@ -331,7 -331,7 +331,7 @@@ static void kvm_guest_apic_eoi_write(u3
         apic_write(APIC_EOI, APIC_EOI_ACK);
   }
   
- void kvm_guest_cpu_init(void)
+ static void kvm_guest_cpu_init(void)
   {
         if (!kvm_para_available())
                 return;
@@@ -584,39 -584,6 +584,39 @@@ static void kvm_kick_cpu(int cpu
         kvm_hypercall2(KVM_HC_KICK_CPU, flags, apicid);
   }
   
+ +
+ +#ifdef CONFIG_QUEUED_SPINLOCKS
+ +
+ +#include <asm/qspinlock.h>
+ +
+ +static void kvm_wait(u8 *ptr, u8 val)
+ +{
+ +      unsigned long flags;
+ +
+ +      if (in_nmi())
+ +              return;
+ +
+ +      local_irq_save(flags);
+ +
+ +      if (READ_ONCE(*ptr) != val)
+ +              goto out;
+ +
+ +      /*
+ +       * halt until it's our turn and kicked. Note that we do safe halt
+ +       * for irq enabled case to avoid hang when lock info is overwritten
+ +       * in irq spinlock slowpath and no spurious interrupt occur to save us.
+ +       */
+ +      if (arch_irqs_disabled_flags(flags))
+ +              halt();
+ +      else
+ +              safe_halt();
+ +
+ +out:
+ +      local_irq_restore(flags);
+ +}
+ +
+ +#else /* !CONFIG_QUEUED_SPINLOCKS */
+ +
   enum kvm_contention_stat {
         TAKEN_SLOW,
         TAKEN_SLOW_PICKUP,
@@@ -688,7 -655,7 +688,7 @@@ static inline void spin_time_accum_bloc
   static struct dentry *d_spin_debug;
   static struct dentry *d_kvm_debug;
   
- struct dentry *kvm_init_debugfs(void)
+ static struct dentry *kvm_init_debugfs(void)
   {
         d_kvm_debug = debugfs_create_dir("kvm-guest", NULL);
         if (!d_kvm_debug)
@@@ -850,8 -817,6 +850,8 @@@ static void kvm_unlock_kick(struct arch
         }
   }
   
+ +#endif /* !CONFIG_QUEUED_SPINLOCKS */
+ +
   /*
    * Setup pv_lock_ops to exploit KVM_FEATURE_PV_UNHALT if present.
    */
@@@ -863,16 -828,8 +863,16 @@@ void __init kvm_spinlock_init(void
         if (!kvm_para_has_feature(KVM_FEATURE_PV_UNHALT))
                 return;
   
+ +#ifdef CONFIG_QUEUED_SPINLOCKS
+ +      __pv_init_lock_hash();
+ +      pv_lock_ops.queued_spin_lock_slowpath = __pv_queued_spin_lock_slowpath;
+ +      pv_lock_ops.queued_spin_unlock = PV_CALLEE_SAVE(__pv_queued_spin_unlock);
+ +      pv_lock_ops.wait = kvm_wait;
+ +      pv_lock_ops.kick = kvm_kick_cpu;
+ +#else /* !CONFIG_QUEUED_SPINLOCKS */
         pv_lock_ops.lock_spinning = PV_CALLEE_SAVE(kvm_lock_spinning);
         pv_lock_ops.unlock_kick = kvm_unlock_kick;
+ +#endif
   }
   
   static __init int kvm_spinlock_init_jump(void)
diff --combined arch/x86/kvm/cpuid.c

index 9f705e6,a64cc76..64dd467
--- 1/arch/x86/kvm/cpuid.c
--- 2/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@@ -16,12 -16,15 +16,14 @@@
   #include <linux/module.h>
   #include <linux/vmalloc.h>
   #include <linux/uaccess.h>
- -#include <asm/i387.h> /* For use_eager_fpu.  Ugh! */
- -#include <asm/fpu-internal.h> /* For use_eager_fpu.  Ugh! */
++#include <asm/fpu/internal.h> /* For use_eager_fpu.  Ugh! */
   #include <asm/user.h>
- -#include <asm/xsave.h>
+ +#include <asm/fpu/xstate.h>
   #include "cpuid.h"
   #include "lapic.h"
   #include "mmu.h"
   #include "trace.h"
+ #include "pmu.h"
   
   static u32 xstate_required_size(u64 xstate_bv, bool compacted)
   {
@@@ -95,7 -98,7 +97,7 @@@ int kvm_update_cpuid(struct kvm_vcpu *v
         if (best && (best->eax & (F(XSAVES) | F(XSAVEC))))
                 best->ebx = xstate_required_size(vcpu->arch.xcr0, true);
   
-       vcpu->arch.eager_fpu = guest_cpuid_has_mpx(vcpu);
+       vcpu->arch.eager_fpu = use_eager_fpu() || guest_cpuid_has_mpx(vcpu);
   
         /*
          * The existing code assumes virtual address is 48-bit in the canonical
@@@ -109,7 -112,7 +111,7 @@@
         /* Update physical-address width */
         vcpu->arch.maxphyaddr = cpuid_query_maxphyaddr(vcpu);
   
-       kvm_pmu_cpuid_update(vcpu);
+       kvm_pmu_refresh(vcpu);
         return 0;
   }
   
@@@ -413,6 -416,12 +415,12 @@@ static inline int __do_cpuid_ent(struc
                 }
                 break;
         }
+       case 6: /* Thermal management */
+               entry->eax = 0x4; /* allow ARAT */
+               entry->ebx = 0;
+               entry->ecx = 0;
+               entry->edx = 0;
+               break;
         case 7: {
                 entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
                 /* Mask ebx against host capability word 9 */
@@@ -589,7 -598,6 +597,6 @@@
                 break;
         case 3: /* Processor serial number */
         case 5: /* MONITOR/MWAIT */
-       case 6: /* Thermal management */
         case 0xC0000002:
         case 0xC0000003:
         case 0xC0000004:
diff --combined arch/x86/kvm/lapic.c

index 4c7deb4,beeef05..36e9de1
--- 1/arch/x86/kvm/lapic.c
--- 2/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@@ -240,6 -240,15 +240,15 @@@ static inline void kvm_apic_set_ldr(str
         recalculate_apic_map(apic->vcpu->kvm);
   }
   
+ static inline void kvm_apic_set_x2apic_id(struct kvm_lapic *apic, u8 id)
+ {
+       u32 ldr = ((id >> 4) << 16) | (1 << (id & 0xf));
+ 
+       apic_set_reg(apic, APIC_ID, id << 24);
+       apic_set_reg(apic, APIC_LDR, ldr);
+       recalculate_apic_map(apic->vcpu->kvm);
+ }
+ 
   static inline int apic_lvt_enabled(struct kvm_lapic *apic, int lvt_type)
   {
         return !(kvm_apic_get_reg(apic, lvt_type) & APIC_LVT_MASKED);
@@@ -728,7 -737,7 +737,7 @@@ bool kvm_irq_delivery_to_apic_fast(stru
   
                 dst = map->logical_map[cid];
   
-               if (irq->delivery_mode == APIC_DM_LOWEST) {
+               if (kvm_lowest_prio_delivery(irq)) {
                         int l = -1;
                         for_each_set_bit(i, &bitmap, 16) {
                                 if (!dst[i])
@@@ -799,7 -808,9 +808,9 @@@ static int __apic_accept_irq(struct kvm
                 break;
   
         case APIC_DM_SMI:
-               apic_debug("Ignoring guest SMI\n");
+               result = 1;
+               kvm_make_request(KVM_REQ_SMI, vcpu);
+               kvm_vcpu_kick(vcpu);
                 break;
   
         case APIC_DM_NMI:
@@@ -914,9 -925,10 +925,10 @@@ static void apic_send_ipi(struct kvm_la
         irq.vector = icr_low & APIC_VECTOR_MASK;
         irq.delivery_mode = icr_low & APIC_MODE_MASK;
         irq.dest_mode = icr_low & APIC_DEST_MASK;
-       irq.level = icr_low & APIC_INT_ASSERT;
+       irq.level = (icr_low & APIC_INT_ASSERT) != 0;
         irq.trig_mode = icr_low & APIC_INT_LEVELTRIG;
         irq.shorthand = icr_low & APIC_SHORT_MASK;
+       irq.msi_redir_hint = false;
         if (apic_x2apic_mode(apic))
                 irq.dest_id = icr_high;
         else
@@@ -926,10 -938,11 +938,11 @@@
   
         apic_debug("icr_high 0x%x, icr_low 0x%x, "
                    "short_hand 0x%x, dest 0x%x, trig_mode 0x%x, level 0x%x, "
-                  "dest_mode 0x%x, delivery_mode 0x%x, vector 0x%x\n",
+                  "dest_mode 0x%x, delivery_mode 0x%x, vector 0x%x, "
+                  "msi_redir_hint 0x%x\n",
                    icr_high, icr_low, irq.shorthand, irq.dest_id,
                    irq.trig_mode, irq.level, irq.dest_mode, irq.delivery_mode,
-                  irq.vector);
+                  irq.vector, irq.msi_redir_hint);
   
         kvm_irq_delivery_to_apic(apic->vcpu->kvm, apic, &irq, NULL);
   }
@@@ -1090,17 -1103,6 +1103,17 @@@ static void update_divide_count(struct 
                                    apic->divide_count);
   }
   
+ +static void apic_update_lvtt(struct kvm_lapic *apic)
+ +{
+ +      u32 timer_mode = kvm_apic_get_reg(apic, APIC_LVTT) &
+ +                      apic->lapic_timer.timer_mode_mask;
+ +
+ +      if (apic->lapic_timer.timer_mode != timer_mode) {
+ +              apic->lapic_timer.timer_mode = timer_mode;
+ +              hrtimer_cancel(&apic->lapic_timer.timer);
+ +      }
+ +}
+ +
   static void apic_timer_expired(struct kvm_lapic *apic)
   {
         struct kvm_vcpu *vcpu = apic->vcpu;
@@@ -1309,7 -1311,6 +1322,7 @@@ static int apic_reg_write(struct kvm_la
                                 apic_set_reg(apic, APIC_LVTT + 0x10 * i,
                                              lvt_val | APIC_LVT_MASKED);
                         }
+ +                      apic_update_lvtt(apic);
                         atomic_set(&apic->lapic_timer.pending, 0);
   
                 }
@@@ -1342,13 -1343,20 +1355,13 @@@
   
                 break;
   
- -      case APIC_LVTT: {
- -              u32 timer_mode = val & apic->lapic_timer.timer_mode_mask;
- -
- -              if (apic->lapic_timer.timer_mode != timer_mode) {
- -                      apic->lapic_timer.timer_mode = timer_mode;
- -                      hrtimer_cancel(&apic->lapic_timer.timer);
- -              }
- -
+ +      case APIC_LVTT:
                 if (!kvm_apic_sw_enabled(apic))
                         val |= APIC_LVT_MASKED;
                 val &= (apic_lvt_mask[0] | apic->lapic_timer.timer_mode_mask);
                 apic_set_reg(apic, APIC_LVTT, val);
+ +              apic_update_lvtt(apic);
                 break;
- -      }
   
         case APIC_TMICT:
                 if (apic_lvtt_tscdeadline(apic))
@@@ -1541,9 -1549,7 +1554,7 @@@ void kvm_lapic_set_base(struct kvm_vcp
   
         if ((old_value ^ value) & X2APIC_ENABLE) {
                 if (value & X2APIC_ENABLE) {
-                       u32 id = kvm_apic_id(apic);
-                       u32 ldr = ((id >> 4) << 16) | (1 << (id & 0xf));
-                       kvm_apic_set_ldr(apic, ldr);
+                       kvm_apic_set_x2apic_id(apic, vcpu->vcpu_id);
                         kvm_x86_ops->set_virtual_x2apic_mode(vcpu, true);
                 } else
                         kvm_x86_ops->set_virtual_x2apic_mode(vcpu, false);
@@@ -1562,7 -1568,7 +1573,7 @@@
   
   }
   
- void kvm_lapic_reset(struct kvm_vcpu *vcpu)
+ void kvm_lapic_reset(struct kvm_vcpu *vcpu, bool init_event)
   {
         struct kvm_lapic *apic;
         int i;
@@@ -1576,19 -1582,22 +1587,22 @@@
         /* Stop the timer in case it's a reset to an active apic */
         hrtimer_cancel(&apic->lapic_timer.timer);
   
-       kvm_apic_set_id(apic, vcpu->vcpu_id);
+       if (!init_event)
+               kvm_apic_set_id(apic, vcpu->vcpu_id);
         kvm_apic_set_version(apic->vcpu);
   
         for (i = 0; i < APIC_LVT_NUM; i++)
                 apic_set_reg(apic, APIC_LVTT + 0x10 * i, APIC_LVT_MASKED);
- -      apic->lapic_timer.timer_mode = 0;
+ +      apic_update_lvtt(apic);
-       apic_set_reg(apic, APIC_LVT0,
-                    SET_APIC_DELIVERY_MODE(0, APIC_MODE_EXTINT));
+       if (!(vcpu->kvm->arch.disabled_quirks & KVM_QUIRK_LINT0_REENABLED))
+               apic_set_reg(apic, APIC_LVT0,
+                            SET_APIC_DELIVERY_MODE(0, APIC_MODE_EXTINT));
   
         apic_set_reg(apic, APIC_DFR, 0xffffffffU);
         apic_set_spiv(apic, 0xff);
         apic_set_reg(apic, APIC_TASKPRI, 0);
-       kvm_apic_set_ldr(apic, 0);
+       if (!apic_x2apic_mode(apic))
+               kvm_apic_set_ldr(apic, 0);
         apic_set_reg(apic, APIC_ESR, 0);
         apic_set_reg(apic, APIC_ICR, 0);
         apic_set_reg(apic, APIC_ICR2, 0);
@@@ -1717,7 -1726,7 +1731,7 @@@ int kvm_create_lapic(struct kvm_vcpu *v
                         APIC_DEFAULT_PHYS_BASE | MSR_IA32_APICBASE_ENABLE);
   
         static_key_slow_inc(&apic_sw_disabled.key); /* sw disabled at reset */
-       kvm_lapic_reset(vcpu);
+       kvm_lapic_reset(vcpu, false);
         kvm_iodevice_init(&apic->dev, &apic_mmio_ops);
   
         return 0;
@@@ -1807,7 -1816,6 +1821,7 @@@ void kvm_apic_post_state_restore(struc
   
         apic_update_ppr(apic);
         hrtimer_cancel(&apic->lapic_timer.timer);
+ +      apic_update_lvtt(apic);
         update_divide_count(apic);
         start_apic_timer(apic);
         apic->irr_pending = true;
@@@ -2049,11 -2057,22 +2063,22 @@@ void kvm_apic_accept_events(struct kvm_
         if (!kvm_vcpu_has_lapic(vcpu) || !apic->pending_events)
                 return;
   
-       pe = xchg(&apic->pending_events, 0);
+       /*
+        * INITs are latched while in SMM.  Because an SMM CPU cannot
+        * be in KVM_MP_STATE_INIT_RECEIVED state, just eat SIPIs
+        * and delay processing of INIT until the next RSM.
+        */
+       if (is_smm(vcpu)) {
+               WARN_ON_ONCE(vcpu->arch.mp_state == KVM_MP_STATE_INIT_RECEIVED);
+               if (test_bit(KVM_APIC_SIPI, &apic->pending_events))
+                       clear_bit(KVM_APIC_SIPI, &apic->pending_events);
+               return;
+       }
   
+       pe = xchg(&apic->pending_events, 0);
         if (test_bit(KVM_APIC_INIT, &pe)) {
-               kvm_lapic_reset(vcpu);
-               kvm_vcpu_reset(vcpu);
+               kvm_lapic_reset(vcpu, true);
+               kvm_vcpu_reset(vcpu, true);
                 if (kvm_vcpu_is_bsp(apic->vcpu))
                         vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
                 else
diff --combined arch/x86/kvm/vmx.c

index e11dd59,e5a379f..ab53d80
--- 1/arch/x86/kvm/vmx.c
--- 2/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@@ -40,13 -40,15 +40,14 @@@
   #include <asm/vmx.h>
   #include <asm/virtext.h>
   #include <asm/mce.h>
- -#include <asm/i387.h>
- -#include <asm/xcr.h>
+ +#include <asm/fpu/internal.h>
   #include <asm/perf_event.h>
   #include <asm/debugreg.h>
   #include <asm/kexec.h>
   #include <asm/apic.h>
   
   #include "trace.h"
+ #include "pmu.h"
   
   #define __ex(x) __kvm_handle_fault_on_reboot(x)
   #define __ex_clear(x, reg) \
@@@ -785,7 -787,7 +786,7 @@@ static inline struct vmcs12 *get_vmcs12
   
   static struct page *nested_get_page(struct kvm_vcpu *vcpu, gpa_t addr)
   {
-       struct page *page = gfn_to_page(vcpu->kvm, addr >> PAGE_SHIFT);
+       struct page *page = kvm_vcpu_gfn_to_page(vcpu, addr >> PAGE_SHIFT);
         if (is_error_page(page))
                 return NULL;
   
@@@ -1882,7 -1884,7 +1883,7 @@@ static void __vmx_load_host_state(struc
          * If the FPU is not active (through the host task or
          * the guest vcpu), then restore the cr0.TS bit.
          */
- -      if (!user_has_fpu() && !vmx->vcpu.guest_fpu_loaded)
+ +      if (!fpregs_active() && !vmx->vcpu.guest_fpu_loaded)
                 stts();
         load_gdt(this_cpu_ptr(&host_gdt));
   }
@@@ -2169,8 -2171,7 +2170,7 @@@ static void vmx_set_msr_bitmap(struct k
   
         if (is_guest_mode(vcpu))
                 msr_bitmap = vmx_msr_bitmap_nested;
-       else if (irqchip_in_kernel(vcpu->kvm) &&
-               apic_x2apic_mode(vcpu->arch.apic)) {
+       else if (vcpu->arch.apic_base & X2APIC_ENABLE) {
                 if (is_long_mode(vcpu))
                         msr_bitmap = vmx_msr_bitmap_longmode_x2apic;
                 else
@@@ -2622,76 -2623,69 +2622,69 @@@ static int vmx_get_vmx_msr(struct kvm_v
    * Returns 0 on success, non-0 otherwise.
    * Assumes vcpu_load() was already called.
    */
- static int vmx_get_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata)
+ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
   {
-       u64 data;
         struct shared_msr_entry *msr;
   
-       if (!pdata) {
-               printk(KERN_ERR "BUG: get_msr called with NULL pdata\n");
-               return -EINVAL;
-       }
- 
-       switch (msr_index) {
+       switch (msr_info->index) {
   #ifdef CONFIG_X86_64
         case MSR_FS_BASE:
-               data = vmcs_readl(GUEST_FS_BASE);
+               msr_info->data = vmcs_readl(GUEST_FS_BASE);
                 break;
         case MSR_GS_BASE:
-               data = vmcs_readl(GUEST_GS_BASE);
+               msr_info->data = vmcs_readl(GUEST_GS_BASE);
                 break;
         case MSR_KERNEL_GS_BASE:
                 vmx_load_host_state(to_vmx(vcpu));
-               data = to_vmx(vcpu)->msr_guest_kernel_gs_base;
+               msr_info->data = to_vmx(vcpu)->msr_guest_kernel_gs_base;
                 break;
   #endif
         case MSR_EFER:
-               return kvm_get_msr_common(vcpu, msr_index, pdata);
+               return kvm_get_msr_common(vcpu, msr_info);
         case MSR_IA32_TSC:
-               data = guest_read_tsc();
+               msr_info->data = guest_read_tsc();
                 break;
         case MSR_IA32_SYSENTER_CS:
-               data = vmcs_read32(GUEST_SYSENTER_CS);
+               msr_info->data = vmcs_read32(GUEST_SYSENTER_CS);
                 break;
         case MSR_IA32_SYSENTER_EIP:
-               data = vmcs_readl(GUEST_SYSENTER_EIP);
+               msr_info->data = vmcs_readl(GUEST_SYSENTER_EIP);
                 break;
         case MSR_IA32_SYSENTER_ESP:
-               data = vmcs_readl(GUEST_SYSENTER_ESP);
+               msr_info->data = vmcs_readl(GUEST_SYSENTER_ESP);
                 break;
         case MSR_IA32_BNDCFGS:
                 if (!vmx_mpx_supported())
                         return 1;
-               data = vmcs_read64(GUEST_BNDCFGS);
+               msr_info->data = vmcs_read64(GUEST_BNDCFGS);
                 break;
         case MSR_IA32_FEATURE_CONTROL:
                 if (!nested_vmx_allowed(vcpu))
                         return 1;
-               data = to_vmx(vcpu)->nested.msr_ia32_feature_control;
+               msr_info->data = to_vmx(vcpu)->nested.msr_ia32_feature_control;
                 break;
         case MSR_IA32_VMX_BASIC ... MSR_IA32_VMX_VMFUNC:
                 if (!nested_vmx_allowed(vcpu))
                         return 1;
-               return vmx_get_vmx_msr(vcpu, msr_index, pdata);
+               return vmx_get_vmx_msr(vcpu, msr_info->index, &msr_info->data);
         case MSR_IA32_XSS:
                 if (!vmx_xsaves_supported())
                         return 1;
-               data = vcpu->arch.ia32_xss;
+               msr_info->data = vcpu->arch.ia32_xss;
                 break;
         case MSR_TSC_AUX:
                 if (!to_vmx(vcpu)->rdtscp_enabled)
                         return 1;
                 /* Otherwise falls through */
         default:
-               msr = find_msr_entry(to_vmx(vcpu), msr_index);
+               msr = find_msr_entry(to_vmx(vcpu), msr_info->index);
                 if (msr) {
-                       data = msr->data;
+                       msr_info->data = msr->data;
                         break;
                 }
-               return kvm_get_msr_common(vcpu, msr_index, pdata);
+               return kvm_get_msr_common(vcpu, msr_info);
         }
   
-       *pdata = data;
         return 0;
   }
   
@@@ -4122,7 -4116,7 +4115,7 @@@ static int alloc_apic_access_page(struc
         kvm_userspace_mem.flags = 0;
         kvm_userspace_mem.guest_phys_addr = APIC_DEFAULT_PHYS_BASE;
         kvm_userspace_mem.memory_size = PAGE_SIZE;
-       r = __kvm_set_memory_region(kvm, &kvm_userspace_mem);
+       r = __x86_set_memory_region(kvm, &kvm_userspace_mem);
         if (r)
                 goto out;
   
@@@ -4157,7 -4151,7 +4150,7 @@@ static int alloc_identity_pagetable(str
         kvm_userspace_mem.guest_phys_addr =
                 kvm->arch.ept_identity_map_addr;
         kvm_userspace_mem.memory_size = PAGE_SIZE;
-       r = __kvm_set_memory_region(kvm, &kvm_userspace_mem);
+       r = __x86_set_memory_region(kvm, &kvm_userspace_mem);
   
         return r;
   }
@@@ -4666,16 -4660,8 +4659,8 @@@ static int vmx_vcpu_setup(struct vcpu_v
         vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, 0);
         vmcs_write64(VM_ENTRY_MSR_LOAD_ADDR, __pa(vmx->msr_autoload.guest));
   
-       if (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_PAT) {
-               u32 msr_low, msr_high;
-               u64 host_pat;
-               rdmsr(MSR_IA32_CR_PAT, msr_low, msr_high);
-               host_pat = msr_low | ((u64) msr_high << 32);
-               /* Write the default value follow host pat */
-               vmcs_write64(GUEST_IA32_PAT, host_pat);
-               /* Keep arch.pat sync with GUEST_IA32_PAT */
-               vmx->vcpu.arch.pat = host_pat;
-       }
+       if (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_PAT)
+               vmcs_write64(GUEST_IA32_PAT, vmx->vcpu.arch.pat);
   
         for (i = 0; i < ARRAY_SIZE(vmx_msr_index); ++i) {
                 u32 index = vmx_msr_index[i];
@@@ -4707,22 -4693,27 +4692,27 @@@
         return 0;
   }
   
- static void vmx_vcpu_reset(struct kvm_vcpu *vcpu)
+ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
   {
         struct vcpu_vmx *vmx = to_vmx(vcpu);
         struct msr_data apic_base_msr;
+       u64 cr0;
   
         vmx->rmode.vm86_active = 0;
   
         vmx->soft_vnmi_blocked = 0;
   
         vmx->vcpu.arch.regs[VCPU_REGS_RDX] = get_rdx_init_val();
-       kvm_set_cr8(&vmx->vcpu, 0);
-       apic_base_msr.data = APIC_DEFAULT_PHYS_BASE | MSR_IA32_APICBASE_ENABLE;
-       if (kvm_vcpu_is_reset_bsp(&vmx->vcpu))
-               apic_base_msr.data |= MSR_IA32_APICBASE_BSP;
-       apic_base_msr.host_initiated = true;
-       kvm_set_apic_base(&vmx->vcpu, &apic_base_msr);
+       kvm_set_cr8(vcpu, 0);
+ 
+       if (!init_event) {
+               apic_base_msr.data = APIC_DEFAULT_PHYS_BASE |
+                                    MSR_IA32_APICBASE_ENABLE;
+               if (kvm_vcpu_is_reset_bsp(vcpu))
+                       apic_base_msr.data |= MSR_IA32_APICBASE_BSP;
+               apic_base_msr.host_initiated = true;
+               kvm_set_apic_base(vcpu, &apic_base_msr);
+       }
   
         vmx_segment_cache_clear(vmx);
   
@@@ -4746,9 -4737,12 +4736,12 @@@
         vmcs_write32(GUEST_LDTR_LIMIT, 0xffff);
         vmcs_write32(GUEST_LDTR_AR_BYTES, 0x00082);
   
-       vmcs_write32(GUEST_SYSENTER_CS, 0);
-       vmcs_writel(GUEST_SYSENTER_ESP, 0);
-       vmcs_writel(GUEST_SYSENTER_EIP, 0);
+       if (!init_event) {
+               vmcs_write32(GUEST_SYSENTER_CS, 0);
+               vmcs_writel(GUEST_SYSENTER_ESP, 0);
+               vmcs_writel(GUEST_SYSENTER_EIP, 0);
+               vmcs_write64(GUEST_IA32_DEBUGCTL, 0);
+       }
   
         vmcs_writel(GUEST_RFLAGS, 0x02);
         kvm_rip_write(vcpu, 0xfff0);
@@@ -4763,18 -4757,15 +4756,15 @@@
         vmcs_write32(GUEST_INTERRUPTIBILITY_INFO, 0);
         vmcs_write32(GUEST_PENDING_DBG_EXCEPTIONS, 0);
   
-       /* Special registers */
-       vmcs_write64(GUEST_IA32_DEBUGCTL, 0);
- 
         setup_msrs(vmx);
   
         vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, 0);  /* 22.2.1 */
   
-       if (cpu_has_vmx_tpr_shadow()) {
+       if (cpu_has_vmx_tpr_shadow() && !init_event) {
                 vmcs_write64(VIRTUAL_APIC_PAGE_ADDR, 0);
-               if (vm_need_tpr_shadow(vmx->vcpu.kvm))
+               if (vm_need_tpr_shadow(vcpu->kvm))
                         vmcs_write64(VIRTUAL_APIC_PAGE_ADDR,
-                                    __pa(vmx->vcpu.arch.apic->regs));
+                                    __pa(vcpu->arch.apic->regs));
                 vmcs_write32(TPR_THRESHOLD, 0);
         }
   
@@@ -4786,12 -4777,14 +4776,14 @@@
         if (vmx->vpid != 0)
                 vmcs_write16(VIRTUAL_PROCESSOR_ID, vmx->vpid);
   
-       vmx->vcpu.arch.cr0 = X86_CR0_NW | X86_CR0_CD | X86_CR0_ET;
-       vmx_set_cr0(&vmx->vcpu, kvm_read_cr0(vcpu)); /* enter rmode */
-       vmx_set_cr4(&vmx->vcpu, 0);
-       vmx_set_efer(&vmx->vcpu, 0);
-       vmx_fpu_activate(&vmx->vcpu);
-       update_exception_bitmap(&vmx->vcpu);
+       cr0 = X86_CR0_NW | X86_CR0_CD | X86_CR0_ET;
+       vmx_set_cr0(vcpu, cr0); /* enter rmode */
+       vmx->vcpu.arch.cr0 = cr0;
+       vmx_set_cr4(vcpu, 0);
+       if (!init_event)
+               vmx_set_efer(vcpu, 0);
+       vmx_fpu_activate(vcpu);
+       update_exception_bitmap(vcpu);
   
         vpid_sync_context(vmx);
   }
@@@ -4964,7 -4957,7 +4956,7 @@@ static int vmx_set_tss_addr(struct kvm 
                 .flags = 0,
         };
   
-       ret = kvm_set_memory_region(kvm, &tss_mem);
+       ret = x86_set_memory_region(kvm, &tss_mem);
         if (ret)
                 return ret;
         kvm->arch.tss_addr = addr;
@@@ -5474,19 -5467,21 +5466,21 @@@ static int handle_cpuid(struct kvm_vcp
   static int handle_rdmsr(struct kvm_vcpu *vcpu)
   {
         u32 ecx = vcpu->arch.regs[VCPU_REGS_RCX];
-       u64 data;
+       struct msr_data msr_info;
   
-       if (vmx_get_msr(vcpu, ecx, &data)) {
+       msr_info.index = ecx;
+       msr_info.host_initiated = false;
+       if (vmx_get_msr(vcpu, &msr_info)) {
                 trace_kvm_msr_read_ex(ecx);
                 kvm_inject_gp(vcpu, 0);
                 return 1;
         }
   
-       trace_kvm_msr_read(ecx, data);
+       trace_kvm_msr_read(ecx, msr_info.data);
   
         /* FIXME: handling of bits 32:63 of rax, rdx */
-       vcpu->arch.regs[VCPU_REGS_RAX] = data & -1u;
-       vcpu->arch.regs[VCPU_REGS_RDX] = (data >> 32) & -1u;
+       vcpu->arch.regs[VCPU_REGS_RAX] = msr_info.data & -1u;
+       vcpu->arch.regs[VCPU_REGS_RDX] = (msr_info.data >> 32) & -1u;
         skip_emulated_instruction(vcpu);
         return 1;
   }
@@@ -5709,9 -5704,6 +5703,6 @@@ static int handle_task_switch(struct kv
                 return 0;
         }
   
-       /* clear all local breakpoint enable flags */
-       vmcs_writel(GUEST_DR7, vmcs_readl(GUEST_DR7) & ~0x155);
- 
         /*
          * TODO: What about debug traps on tss switch?
          *       Are we supposed to inject them and update dr6?
@@@ -7332,7 -7324,7 +7323,7 @@@ static bool nested_vmx_exit_handled_io(
                 bitmap += (port & 0x7fff) / 8;
   
                 if (last_bitmap != bitmap)
-                       if (kvm_read_guest(vcpu->kvm, bitmap, &b, 1))
+                       if (kvm_vcpu_read_guest(vcpu, bitmap, &b, 1))
                                 return true;
                 if (b & (1 << (port & 7)))
                         return true;
@@@ -7376,7 -7368,7 +7367,7 @@@ static bool nested_vmx_exit_handled_msr
         /* Then read the msr_index'th bit from this bitmap: */
         if (msr_index < 1024*8) {
                 unsigned char b;
-               if (kvm_read_guest(vcpu->kvm, bitmap + msr_index/8, &b, 1))
+               if (kvm_vcpu_read_guest(vcpu, bitmap + msr_index/8, &b, 1))
                         return true;
                 return 1 & (b >> (msr_index & 7));
         } else
@@@ -7641,9 -7633,9 +7632,9 @@@ static void vmx_disable_pml(struct vcpu
         vmcs_write32(SECONDARY_VM_EXEC_CONTROL, exec_control);
   }
   
- static void vmx_flush_pml_buffer(struct vcpu_vmx *vmx)
+ static void vmx_flush_pml_buffer(struct kvm_vcpu *vcpu)
   {
-       struct kvm *kvm = vmx->vcpu.kvm;
+       struct vcpu_vmx *vmx = to_vmx(vcpu);
         u64 *pml_buf;
         u16 pml_idx;
   
@@@ -7665,7 -7657,7 +7656,7 @@@
   
                 gpa = pml_buf[pml_idx];
                 WARN_ON(gpa & (PAGE_SIZE - 1));
-               mark_page_dirty(kvm, gpa >> PAGE_SHIFT);
+               kvm_vcpu_mark_page_dirty(vcpu, gpa >> PAGE_SHIFT);
         }
   
         /* reset PML index */
@@@ -7690,6 -7682,158 +7681,158 @@@ static void kvm_flush_pml_buffers(struc
                 kvm_vcpu_kick(vcpu);
   }
   
+ static void vmx_dump_sel(char *name, uint32_t sel)
+ {
+       pr_err("%s sel=0x%04x, attr=0x%05x, limit=0x%08x, base=0x%016lx\n",
+              name, vmcs_read32(sel),
+              vmcs_read32(sel + GUEST_ES_AR_BYTES - GUEST_ES_SELECTOR),
+              vmcs_read32(sel + GUEST_ES_LIMIT - GUEST_ES_SELECTOR),
+              vmcs_readl(sel + GUEST_ES_BASE - GUEST_ES_SELECTOR));
+ }
+ 
+ static void vmx_dump_dtsel(char *name, uint32_t limit)
+ {
+       pr_err("%s                           limit=0x%08x, base=0x%016lx\n",
+              name, vmcs_read32(limit),
+              vmcs_readl(limit + GUEST_GDTR_BASE - GUEST_GDTR_LIMIT));
+ }
+ 
+ static void dump_vmcs(void)
+ {
+       u32 vmentry_ctl = vmcs_read32(VM_ENTRY_CONTROLS);
+       u32 vmexit_ctl = vmcs_read32(VM_EXIT_CONTROLS);
+       u32 cpu_based_exec_ctrl = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL);
+       u32 pin_based_exec_ctrl = vmcs_read32(PIN_BASED_VM_EXEC_CONTROL);
+       u32 secondary_exec_control = 0;
+       unsigned long cr4 = vmcs_readl(GUEST_CR4);
+       u64 efer = vmcs_readl(GUEST_IA32_EFER);
+       int i, n;
+ 
+       if (cpu_has_secondary_exec_ctrls())
+               secondary_exec_control = vmcs_read32(SECONDARY_VM_EXEC_CONTROL);
+ 
+       pr_err("*** Guest State ***\n");
+       pr_err("CR0: actual=0x%016lx, shadow=0x%016lx, gh_mask=%016lx\n",
+              vmcs_readl(GUEST_CR0), vmcs_readl(CR0_READ_SHADOW),
+              vmcs_readl(CR0_GUEST_HOST_MASK));
+       pr_err("CR4: actual=0x%016lx, shadow=0x%016lx, gh_mask=%016lx\n",
+              cr4, vmcs_readl(CR4_READ_SHADOW), vmcs_readl(CR4_GUEST_HOST_MASK));
+       pr_err("CR3 = 0x%016lx\n", vmcs_readl(GUEST_CR3));
+       if ((secondary_exec_control & SECONDARY_EXEC_ENABLE_EPT) &&
+           (cr4 & X86_CR4_PAE) && !(efer & EFER_LMA))
+       {
+               pr_err("PDPTR0 = 0x%016lx  PDPTR1 = 0x%016lx\n",
+                      vmcs_readl(GUEST_PDPTR0), vmcs_readl(GUEST_PDPTR1));
+               pr_err("PDPTR2 = 0x%016lx  PDPTR3 = 0x%016lx\n",
+                      vmcs_readl(GUEST_PDPTR2), vmcs_readl(GUEST_PDPTR3));
+       }
+       pr_err("RSP = 0x%016lx  RIP = 0x%016lx\n",
+              vmcs_readl(GUEST_RSP), vmcs_readl(GUEST_RIP));
+       pr_err("RFLAGS=0x%08lx         DR7 = 0x%016lx\n",
+              vmcs_readl(GUEST_RFLAGS), vmcs_readl(GUEST_DR7));
+       pr_err("Sysenter RSP=%016lx CS:RIP=%04x:%016lx\n",
+              vmcs_readl(GUEST_SYSENTER_ESP),
+              vmcs_read32(GUEST_SYSENTER_CS), vmcs_readl(GUEST_SYSENTER_EIP));
+       vmx_dump_sel("CS:  ", GUEST_CS_SELECTOR);
+       vmx_dump_sel("DS:  ", GUEST_DS_SELECTOR);
+       vmx_dump_sel("SS:  ", GUEST_SS_SELECTOR);
+       vmx_dump_sel("ES:  ", GUEST_ES_SELECTOR);
+       vmx_dump_sel("FS:  ", GUEST_FS_SELECTOR);
+       vmx_dump_sel("GS:  ", GUEST_GS_SELECTOR);
+       vmx_dump_dtsel("GDTR:", GUEST_GDTR_LIMIT);
+       vmx_dump_sel("LDTR:", GUEST_LDTR_SELECTOR);
+       vmx_dump_dtsel("IDTR:", GUEST_IDTR_LIMIT);
+       vmx_dump_sel("TR:  ", GUEST_TR_SELECTOR);
+       if ((vmexit_ctl & (VM_EXIT_SAVE_IA32_PAT | VM_EXIT_SAVE_IA32_EFER)) ||
+           (vmentry_ctl & (VM_ENTRY_LOAD_IA32_PAT | VM_ENTRY_LOAD_IA32_EFER)))
+               pr_err("EFER =     0x%016llx  PAT = 0x%016lx\n",
+                      efer, vmcs_readl(GUEST_IA32_PAT));
+       pr_err("DebugCtl = 0x%016lx  DebugExceptions = 0x%016lx\n",
+              vmcs_readl(GUEST_IA32_DEBUGCTL),
+              vmcs_readl(GUEST_PENDING_DBG_EXCEPTIONS));
+       if (vmentry_ctl & VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL)
+               pr_err("PerfGlobCtl = 0x%016lx\n",
+                      vmcs_readl(GUEST_IA32_PERF_GLOBAL_CTRL));
+       if (vmentry_ctl & VM_ENTRY_LOAD_BNDCFGS)
+               pr_err("BndCfgS = 0x%016lx\n", vmcs_readl(GUEST_BNDCFGS));
+       pr_err("Interruptibility = %08x  ActivityState = %08x\n",
+              vmcs_read32(GUEST_INTERRUPTIBILITY_INFO),
+              vmcs_read32(GUEST_ACTIVITY_STATE));
+       if (secondary_exec_control & SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY)
+               pr_err("InterruptStatus = %04x\n",
+                      vmcs_read16(GUEST_INTR_STATUS));
+ 
+       pr_err("*** Host State ***\n");
+       pr_err("RIP = 0x%016lx  RSP = 0x%016lx\n",
+              vmcs_readl(HOST_RIP), vmcs_readl(HOST_RSP));
+       pr_err("CS=%04x SS=%04x DS=%04x ES=%04x FS=%04x GS=%04x TR=%04x\n",
+              vmcs_read16(HOST_CS_SELECTOR), vmcs_read16(HOST_SS_SELECTOR),
+              vmcs_read16(HOST_DS_SELECTOR), vmcs_read16(HOST_ES_SELECTOR),
+              vmcs_read16(HOST_FS_SELECTOR), vmcs_read16(HOST_GS_SELECTOR),
+              vmcs_read16(HOST_TR_SELECTOR));
+       pr_err("FSBase=%016lx GSBase=%016lx TRBase=%016lx\n",
+              vmcs_readl(HOST_FS_BASE), vmcs_readl(HOST_GS_BASE),
+              vmcs_readl(HOST_TR_BASE));
+       pr_err("GDTBase=%016lx IDTBase=%016lx\n",
+              vmcs_readl(HOST_GDTR_BASE), vmcs_readl(HOST_IDTR_BASE));
+       pr_err("CR0=%016lx CR3=%016lx CR4=%016lx\n",
+              vmcs_readl(HOST_CR0), vmcs_readl(HOST_CR3),
+              vmcs_readl(HOST_CR4));
+       pr_err("Sysenter RSP=%016lx CS:RIP=%04x:%016lx\n",
+              vmcs_readl(HOST_IA32_SYSENTER_ESP),
+              vmcs_read32(HOST_IA32_SYSENTER_CS),
+              vmcs_readl(HOST_IA32_SYSENTER_EIP));
+       if (vmexit_ctl & (VM_EXIT_LOAD_IA32_PAT | VM_EXIT_LOAD_IA32_EFER))
+               pr_err("EFER = 0x%016lx  PAT = 0x%016lx\n",
+                      vmcs_readl(HOST_IA32_EFER), vmcs_readl(HOST_IA32_PAT));
+       if (vmexit_ctl & VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL)
+               pr_err("PerfGlobCtl = 0x%016lx\n",
+                      vmcs_readl(HOST_IA32_PERF_GLOBAL_CTRL));
+ 
+       pr_err("*** Control State ***\n");
+       pr_err("PinBased=%08x CPUBased=%08x SecondaryExec=%08x\n",
+              pin_based_exec_ctrl, cpu_based_exec_ctrl, secondary_exec_control);
+       pr_err("EntryControls=%08x ExitControls=%08x\n", vmentry_ctl, vmexit_ctl);
+       pr_err("ExceptionBitmap=%08x PFECmask=%08x PFECmatch=%08x\n",
+              vmcs_read32(EXCEPTION_BITMAP),
+              vmcs_read32(PAGE_FAULT_ERROR_CODE_MASK),
+              vmcs_read32(PAGE_FAULT_ERROR_CODE_MATCH));
+       pr_err("VMEntry: intr_info=%08x errcode=%08x ilen=%08x\n",
+              vmcs_read32(VM_ENTRY_INTR_INFO_FIELD),
+              vmcs_read32(VM_ENTRY_EXCEPTION_ERROR_CODE),
+              vmcs_read32(VM_ENTRY_INSTRUCTION_LEN));
+       pr_err("VMExit: intr_info=%08x errcode=%08x ilen=%08x\n",
+              vmcs_read32(VM_EXIT_INTR_INFO),
+              vmcs_read32(VM_EXIT_INTR_ERROR_CODE),
+              vmcs_read32(VM_EXIT_INSTRUCTION_LEN));
+       pr_err("        reason=%08x qualification=%016lx\n",
+              vmcs_read32(VM_EXIT_REASON), vmcs_readl(EXIT_QUALIFICATION));
+       pr_err("IDTVectoring: info=%08x errcode=%08x\n",
+              vmcs_read32(IDT_VECTORING_INFO_FIELD),
+              vmcs_read32(IDT_VECTORING_ERROR_CODE));
+       pr_err("TSC Offset = 0x%016lx\n", vmcs_readl(TSC_OFFSET));
+       if (cpu_based_exec_ctrl & CPU_BASED_TPR_SHADOW)
+               pr_err("TPR Threshold = 0x%02x\n", vmcs_read32(TPR_THRESHOLD));
+       if (pin_based_exec_ctrl & PIN_BASED_POSTED_INTR)
+               pr_err("PostedIntrVec = 0x%02x\n", vmcs_read16(POSTED_INTR_NV));
+       if ((secondary_exec_control & SECONDARY_EXEC_ENABLE_EPT))
+               pr_err("EPT pointer = 0x%016lx\n", vmcs_readl(EPT_POINTER));
+       n = vmcs_read32(CR3_TARGET_COUNT);
+       for (i = 0; i + 1 < n; i += 4)
+               pr_err("CR3 target%u=%016lx target%u=%016lx\n",
+                      i, vmcs_readl(CR3_TARGET_VALUE0 + i * 2),
+                      i + 1, vmcs_readl(CR3_TARGET_VALUE0 + i * 2 + 2));
+       if (i < n)
+               pr_err("CR3 target%u=%016lx\n",
+                      i, vmcs_readl(CR3_TARGET_VALUE0 + i * 2));
+       if (secondary_exec_control & SECONDARY_EXEC_PAUSE_LOOP_EXITING)
+               pr_err("PLE Gap=%08x Window=%08x\n",
+                      vmcs_read32(PLE_GAP), vmcs_read32(PLE_WINDOW));
+       if (secondary_exec_control & SECONDARY_EXEC_ENABLE_VPID)
+               pr_err("Virtual processor ID = 0x%04x\n",
+                      vmcs_read16(VIRTUAL_PROCESSOR_ID));
+ }
+ 
   /*
    * The guest has exited.  See if we can fix it or if we need userspace
    * assistance.
@@@ -7708,7 -7852,7 +7851,7 @@@ static int vmx_handle_exit(struct kvm_v
          * flushed already.
          */
         if (enable_pml)
-               vmx_flush_pml_buffer(vmx);
+               vmx_flush_pml_buffer(vcpu);
   
         /* If guest state is invalid, start emulating */
         if (vmx->emulation_required)
@@@ -7722,6 -7866,7 +7865,7 @@@
         }
   
         if (exit_reason & VMX_EXIT_REASONS_FAILED_VMENTRY) {
+               dump_vmcs();
                 vcpu->run->exit_reason = KVM_EXIT_FAIL_ENTRY;
                 vcpu->run->fail_entry.hardware_entry_failure_reason
                         = exit_reason;
@@@ -7995,6 -8140,11 +8139,11 @@@ static void vmx_handle_external_intr(st
                 local_irq_enable();
   }
   
+ static bool vmx_has_high_real_mode_segbase(void)
+ {
+       return enable_unrestricted_guest || emulate_invalid_guest_state;
+ }
+ 
   static bool vmx_mpx_supported(void)
   {
         return (vmcs_config.vmexit_ctrl & VM_EXIT_CLEAR_BNDCFGS) &&
@@@ -8479,7 -8629,8 +8628,8 @@@ static int get_ept_level(void
   
   static u64 vmx_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio)
   {
-       u64 ret;
+       u8 cache;
+       u64 ipat = 0;
   
         /* For VT-d and EPT combination
          * 1. MMIO: always map as UC
@@@ -8492,16 -8643,27 +8642,27 @@@
          * 3. EPT without VT-d: always map as WB and set IPAT=1 to keep
          *    consistent with host MTRR
          */
-       if (is_mmio)
-               ret = MTRR_TYPE_UNCACHABLE << VMX_EPT_MT_EPTE_SHIFT;
-       else if (kvm_arch_has_noncoherent_dma(vcpu->kvm))
-               ret = kvm_get_guest_memory_type(vcpu, gfn) <<
-                     VMX_EPT_MT_EPTE_SHIFT;
-       else
-               ret = (MTRR_TYPE_WRBACK << VMX_EPT_MT_EPTE_SHIFT)
-                       | VMX_EPT_IPAT_BIT;
+       if (is_mmio) {
+               cache = MTRR_TYPE_UNCACHABLE;
+               goto exit;
+       }
   
-       return ret;
+       if (!kvm_arch_has_noncoherent_dma(vcpu->kvm)) {
+               ipat = VMX_EPT_IPAT_BIT;
+               cache = MTRR_TYPE_WRBACK;
+               goto exit;
+       }
+ 
+       if (kvm_read_cr0(vcpu) & X86_CR0_CD) {
+               ipat = VMX_EPT_IPAT_BIT;
+               cache = MTRR_TYPE_UNCACHABLE;
+               goto exit;
+       }
+ 
+       cache = kvm_mtrr_get_guest_memory_type(vcpu, gfn);
+ 
+ exit:
+       return (cache << VMX_EPT_MT_EPTE_SHIFT) | ipat;
   }
   
   static int vmx_get_lpage_level(void)
@@@ -8923,7 -9085,7 +9084,7 @@@ static int nested_vmx_msr_check_common(
                                        struct vmx_msr_entry *e)
   {
         /* x2APIC MSR accesses are not allowed */
-       if (apic_x2apic_mode(vcpu->arch.apic) && e->index >> 8 == 0x8)
+       if (vcpu->arch.apic_base & X2APIC_ENABLE && e->index >> 8 == 0x8)
                 return -EINVAL;
         if (e->index == MSR_IA32_UCODE_WRITE || /* SDM Table 35-2 */
             e->index == MSR_IA32_UCODE_REV)
@@@ -8965,8 -9127,8 +9126,8 @@@ static u32 nested_vmx_load_msr(struct k
   
         msr.host_initiated = false;
         for (i = 0; i < count; i++) {
-               if (kvm_read_guest(vcpu->kvm, gpa + i * sizeof(e),
-                                  &e, sizeof(e))) {
+               if (kvm_vcpu_read_guest(vcpu, gpa + i * sizeof(e),
+                                       &e, sizeof(e))) {
                         pr_warn_ratelimited(
                                 "%s cannot read MSR entry (%u, 0x%08llx)\n",
                                 __func__, i, gpa + i * sizeof(e));
@@@ -8998,9 -9160,10 +9159,10 @@@ static int nested_vmx_store_msr(struct 
         struct vmx_msr_entry e;
   
         for (i = 0; i < count; i++) {
-               if (kvm_read_guest(vcpu->kvm,
-                                  gpa + i * sizeof(e),
-                                  &e, 2 * sizeof(u32))) {
+               struct msr_data msr_info;
+               if (kvm_vcpu_read_guest(vcpu,
+                                       gpa + i * sizeof(e),
+                                       &e, 2 * sizeof(u32))) {
                         pr_warn_ratelimited(
                                 "%s cannot read MSR entry (%u, 0x%08llx)\n",
                                 __func__, i, gpa + i * sizeof(e));
@@@ -9012,19 -9175,21 +9174,21 @@@
                                 __func__, i, e.index, e.reserved);
                         return -EINVAL;
                 }
-               if (kvm_get_msr(vcpu, e.index, &e.value)) {
+               msr_info.host_initiated = false;
+               msr_info.index = e.index;
+               if (kvm_get_msr(vcpu, &msr_info)) {
                         pr_warn_ratelimited(
                                 "%s cannot read MSR (%u, 0x%x)\n",
                                 __func__, i, e.index);
                         return -EINVAL;
                 }
-               if (kvm_write_guest(vcpu->kvm,
-                                   gpa + i * sizeof(e) +
-                                       offsetof(struct vmx_msr_entry, value),
-                                   &e.value, sizeof(e.value))) {
+               if (kvm_vcpu_write_guest(vcpu,
+                                        gpa + i * sizeof(e) +
+                                            offsetof(struct vmx_msr_entry, value),
+                                        &msr_info.data, sizeof(msr_info.data))) {
                         pr_warn_ratelimited(
                                 "%s cannot write MSR (%u, 0x%x, 0x%llx)\n",
-                               __func__, i, e.index, e.value);
+                               __func__, i, e.index, msr_info.data);
                         return -EINVAL;
                 }
         }
@@@ -10149,6 -10314,7 +10313,7 @@@ static struct kvm_x86_ops vmx_x86_ops 
         .hardware_enable = hardware_enable,
         .hardware_disable = hardware_disable,
         .cpu_has_accelerated_tpr = report_flexpriority,
+       .cpu_has_high_real_mode_segbase = vmx_has_high_real_mode_segbase,
   
         .vcpu_create = vmx_create_vcpu,
         .vcpu_free = vmx_free_vcpu,
@@@ -10254,6 -10420,8 +10419,8 @@@
         .slot_disable_log_dirty = vmx_slot_disable_log_dirty,
         .flush_log_dirty = vmx_flush_log_dirty,
         .enable_log_dirty_pt_masked = vmx_enable_log_dirty_pt_masked,
+ 
+       .pmu_ops = &intel_pmu_ops,
   };
   
   static int __init vmx_init(void)
diff --combined arch/x86/kvm/x86.c

index 26eaeb5,613e13a..ac165c2
--- 1/arch/x86/kvm/x86.c
--- 2/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@@ -28,6 -28,7 +28,7 @@@
   #include "x86.h"
   #include "cpuid.h"
   #include "assigned-dev.h"
+ #include "pmu.h"
   
   #include <linux/clocksource.h>
   #include <linux/interrupt.h>
@@@ -57,10 -58,10 +58,9 @@@
   #include <asm/debugreg.h>
   #include <asm/msr.h>
   #include <asm/desc.h>
- #include <asm/mtrr.h>
   #include <asm/mce.h>
- -#include <asm/i387.h>
- -#include <asm/fpu-internal.h> /* Ugh! */
- -#include <asm/xcr.h>
+ +#include <linux/kernel_stat.h>
+ +#include <asm/fpu/internal.h> /* Ugh! */
   #include <asm/pvclock.h>
   #include <asm/div64.h>
   
@@@ -98,6 -99,9 +98,9 @@@ module_param(ignore_msrs, bool, S_IRUG
   unsigned int min_timer_period_us = 500;
   module_param(min_timer_period_us, uint, S_IRUGO | S_IWUSR);
   
+ static bool __read_mostly kvmclock_periodic_sync = true;
+ module_param(kvmclock_periodic_sync, bool, S_IRUGO);
+ 
   bool kvm_has_tsc_control;
   EXPORT_SYMBOL_GPL(kvm_has_tsc_control);
   u32  kvm_max_guest_tsc_khz;
@@@ -474,7 -478,7 +477,7 @@@ EXPORT_SYMBOL_GPL(kvm_require_dr)
   
   /*
    * This function will be used to read from the physical memory of the currently
-  * running guest. The difference to kvm_read_guest_page is that this function
+  * running guest. The difference to kvm_vcpu_read_guest_page is that this function
    * can read from guest physical or from the guest's guest physical memory.
    */
   int kvm_read_guest_page_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
@@@ -492,7 -496,7 +495,7 @@@
   
         real_gfn = gpa_to_gfn(real_gfn);
   
-       return kvm_read_guest_page(vcpu->kvm, real_gfn, data, offset, len);
+       return kvm_vcpu_read_guest_page(vcpu, real_gfn, data, offset, len);
   }
   EXPORT_SYMBOL_GPL(kvm_read_guest_page_mmu);
   
@@@ -571,8 -575,7 +574,7 @@@ out
   int kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
   {
         unsigned long old_cr0 = kvm_read_cr0(vcpu);
-       unsigned long update_bits = X86_CR0_PG | X86_CR0_WP |
-                                   X86_CR0_CD | X86_CR0_NW;
+       unsigned long update_bits = X86_CR0_PG | X86_CR0_WP;
   
         cr0 |= X86_CR0_ET;
   
@@@ -618,6 -621,10 +620,10 @@@
   
         if ((cr0 ^ old_cr0) & update_bits)
                 kvm_mmu_reset_context(vcpu);
+ 
+       if ((cr0 ^ old_cr0) & X86_CR0_CD)
+               kvm_zap_gfn_range(vcpu->kvm, 0, ~0ULL);
+ 
         return 0;
   }
   EXPORT_SYMBOL_GPL(kvm_set_cr0);
@@@ -907,7 -914,7 +913,7 @@@ bool kvm_rdpmc(struct kvm_vcpu *vcpu
         u64 data;
         int err;
   
-       err = kvm_pmu_read_pmc(vcpu, ecx, &data);
+       err = kvm_pmu_rdpmc(vcpu, ecx, &data);
         if (err)
                 return err;
         kvm_register_write(vcpu, VCPU_REGS_RAX, (u32)data);
@@@ -922,17 -929,11 +928,11 @@@ EXPORT_SYMBOL_GPL(kvm_rdpmc)
    *
    * This list is modified at module load time to reflect the
    * capabilities of the host cpu. This capabilities test skips MSRs that are
-  * kvm-specific. Those are put in the beginning of the list.
+  * kvm-specific. Those are put in emulated_msrs; filtering of emulated_msrs
+  * may depend on host virtualization features rather than host cpu features.
    */
   
- #define KVM_SAVE_MSRS_BEGIN   12
   static u32 msrs_to_save[] = {
-       MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK,
-       MSR_KVM_SYSTEM_TIME_NEW, MSR_KVM_WALL_CLOCK_NEW,
-       HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL,
-       HV_X64_MSR_TIME_REF_COUNT, HV_X64_MSR_REFERENCE_TSC,
-       HV_X64_MSR_APIC_ASSIST_PAGE, MSR_KVM_ASYNC_PF_EN, MSR_KVM_STEAL_TIME,
-       MSR_KVM_PV_EOI_EN,
         MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP,
         MSR_STAR,
   #ifdef CONFIG_X86_64
@@@ -944,14 -945,24 +944,24 @@@
   
   static unsigned num_msrs_to_save;
   
- static const u32 emulated_msrs[] = {
+ static u32 emulated_msrs[] = {
+       MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK,
+       MSR_KVM_SYSTEM_TIME_NEW, MSR_KVM_WALL_CLOCK_NEW,
+       HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL,
+       HV_X64_MSR_TIME_REF_COUNT, HV_X64_MSR_REFERENCE_TSC,
+       HV_X64_MSR_APIC_ASSIST_PAGE, MSR_KVM_ASYNC_PF_EN, MSR_KVM_STEAL_TIME,
+       MSR_KVM_PV_EOI_EN,
+ 
         MSR_IA32_TSC_ADJUST,
         MSR_IA32_TSCDEADLINE,
         MSR_IA32_MISC_ENABLE,
         MSR_IA32_MCG_STATUS,
         MSR_IA32_MCG_CTL,
+       MSR_IA32_SMBASE,
   };
   
+ static unsigned num_emulated_msrs;
+ 
   bool kvm_valid_efer(struct kvm_vcpu *vcpu, u64 efer)
   {
         if (efer & efer_reserved_bits)
@@@ -1045,6 -1056,21 +1055,21 @@@ EXPORT_SYMBOL_GPL(kvm_set_msr)
   /*
    * Adapt set_msr() to msr_io()'s calling convention
    */
+ static int do_get_msr(struct kvm_vcpu *vcpu, unsigned index, u64 *data)
+ {
+       struct msr_data msr;
+       int r;
+ 
+       msr.index = index;
+       msr.host_initiated = true;
+       r = kvm_get_msr(vcpu, &msr);
+       if (r)
+               return r;
+ 
+       *data = msr.data;
+       return 0;
+ }
+ 
   static int do_set_msr(struct kvm_vcpu *vcpu, unsigned index, u64 *data)
   {
         struct msr_data msr;
@@@ -1697,6 -1723,8 +1722,8 @@@ static int kvm_guest_time_update(struc
                 vcpu->pvclock_set_guest_stopped_request = false;
         }
   
+       pvclock_flags |= PVCLOCK_COUNTS_FROM_ZERO;
+ 
         /* If the host uses TSC clocksource, then it is stable */
         if (use_master_clock)
                 pvclock_flags |= PVCLOCK_TSC_STABLE_BIT;
@@@ -1767,127 -1795,14 +1794,14 @@@ static void kvmclock_sync_fn(struct wor
                                            kvmclock_sync_work);
         struct kvm *kvm = container_of(ka, struct kvm, arch);
   
+       if (!kvmclock_periodic_sync)
+               return;
+ 
         schedule_delayed_work(&kvm->arch.kvmclock_update_work, 0);
         schedule_delayed_work(&kvm->arch.kvmclock_sync_work,
                                         KVMCLOCK_SYNC_PERIOD);
   }
   
- static bool msr_mtrr_valid(unsigned msr)
- {
-       switch (msr) {
-       case 0x200 ... 0x200 + 2 * KVM_NR_VAR_MTRR - 1:
-       case MSR_MTRRfix64K_00000:
-       case MSR_MTRRfix16K_80000:
-       case MSR_MTRRfix16K_A0000:
-       case MSR_MTRRfix4K_C0000:
-       case MSR_MTRRfix4K_C8000:
-       case MSR_MTRRfix4K_D0000:
-       case MSR_MTRRfix4K_D8000:
-       case MSR_MTRRfix4K_E0000:
-       case MSR_MTRRfix4K_E8000:
-       case MSR_MTRRfix4K_F0000:
-       case MSR_MTRRfix4K_F8000:
-       case MSR_MTRRdefType:
-       case MSR_IA32_CR_PAT:
-               return true;
-       case 0x2f8:
-               return true;
-       }
-       return false;
- }
- 
- static bool valid_pat_type(unsigned t)
- {
-       return t < 8 && (1 << t) & 0xf3; /* 0, 1, 4, 5, 6, 7 */
- }
- 
- static bool valid_mtrr_type(unsigned t)
- {
-       return t < 8 && (1 << t) & 0x73; /* 0, 1, 4, 5, 6 */
- }
- 
- bool kvm_mtrr_valid(struct kvm_vcpu *vcpu, u32 msr, u64 data)
- {
-       int i;
-       u64 mask;
- 
-       if (!msr_mtrr_valid(msr))
-               return false;
- 
-       if (msr == MSR_IA32_CR_PAT) {
-               for (i = 0; i < 8; i++)
-                       if (!valid_pat_type((data >> (i * 8)) & 0xff))
-                               return false;
-               return true;
-       } else if (msr == MSR_MTRRdefType) {
-               if (data & ~0xcff)
-                       return false;
-               return valid_mtrr_type(data & 0xff);
-       } else if (msr >= MSR_MTRRfix64K_00000 && msr <= MSR_MTRRfix4K_F8000) {
-               for (i = 0; i < 8 ; i++)
-                       if (!valid_mtrr_type((data >> (i * 8)) & 0xff))
-                               return false;
-               return true;
-       }
- 
-       /* variable MTRRs */
-       WARN_ON(!(msr >= 0x200 && msr < 0x200 + 2 * KVM_NR_VAR_MTRR));
- 
-       mask = (~0ULL) << cpuid_maxphyaddr(vcpu);
-       if ((msr & 1) == 0) {
-               /* MTRR base */
-               if (!valid_mtrr_type(data & 0xff))
-                       return false;
-               mask |= 0xf00;
-       } else
-               /* MTRR mask */
-               mask |= 0x7ff;
-       if (data & mask) {
-               kvm_inject_gp(vcpu, 0);
-               return false;
-       }
- 
-       return true;
- }
- EXPORT_SYMBOL_GPL(kvm_mtrr_valid);
- 
- static int set_msr_mtrr(struct kvm_vcpu *vcpu, u32 msr, u64 data)
- {
-       u64 *p = (u64 *)&vcpu->arch.mtrr_state.fixed_ranges;
- 
-       if (!kvm_mtrr_valid(vcpu, msr, data))
-               return 1;
- 
-       if (msr == MSR_MTRRdefType) {
-               vcpu->arch.mtrr_state.def_type = data;
-               vcpu->arch.mtrr_state.enabled = (data & 0xc00) >> 10;
-       } else if (msr == MSR_MTRRfix64K_00000)
-               p[0] = data;
-       else if (msr == MSR_MTRRfix16K_80000 || msr == MSR_MTRRfix16K_A0000)
-               p[1 + msr - MSR_MTRRfix16K_80000] = data;
-       else if (msr >= MSR_MTRRfix4K_C0000 && msr <= MSR_MTRRfix4K_F8000)
-               p[3 + msr - MSR_MTRRfix4K_C0000] = data;
-       else if (msr == MSR_IA32_CR_PAT)
-               vcpu->arch.pat = data;
-       else {  /* Variable MTRRs */
-               int idx, is_mtrr_mask;
-               u64 *pt;
- 
-               idx = (msr - 0x200) / 2;
-               is_mtrr_mask = msr - 0x200 - 2 * idx;
-               if (!is_mtrr_mask)
-                       pt =
-                         (u64 *)&vcpu->arch.mtrr_state.var_ranges[idx].base_lo;
-               else
-                       pt =
-                         (u64 *)&vcpu->arch.mtrr_state.var_ranges[idx].mask_lo;
-               *pt = data;
-       }
- 
-       kvm_mmu_reset_context(vcpu);
-       return 0;
- }
- 
   static int set_msr_mce(struct kvm_vcpu *vcpu, u32 msr, u64 data)
   {
         u64 mcg_cap = vcpu->arch.mcg_cap;
@@@ -1946,7 -1861,7 +1860,7 @@@ static int xen_hvm_config(struct kvm_vc
                 r = PTR_ERR(page);
                 goto out;
         }
-       if (kvm_write_guest(kvm, page_addr, page, PAGE_SIZE))
+       if (kvm_vcpu_write_guest(vcpu, page_addr, page, PAGE_SIZE))
                 goto out_free;
         r = 0;
   out_free:
@@@ -2046,13 -1961,13 +1960,13 @@@ static int set_msr_hyperv(struct kvm_vc
                         break;
                 }
                 gfn = data >> HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_SHIFT;
-               addr = gfn_to_hva(vcpu->kvm, gfn);
+               addr = kvm_vcpu_gfn_to_hva(vcpu, gfn);
                 if (kvm_is_error_hva(addr))
                         return 1;
                 if (__clear_user((void __user *)addr, PAGE_SIZE))
                         return 1;
                 vcpu->arch.hv_vapic = data;
-               mark_page_dirty(vcpu->kvm, gfn);
+               kvm_vcpu_mark_page_dirty(vcpu, gfn);
                 if (kvm_lapic_enable_pv_eoi(vcpu, gfn_to_gpa(gfn) | KVM_MSR_ENABLED))
                         return 1;
                 break;
@@@ -2179,7 -2094,7 +2093,7 @@@ int kvm_set_msr_common(struct kvm_vcpu 
                             __func__, data);
                 break;
         case 0x200 ... 0x2ff:
-               return set_msr_mtrr(vcpu, msr, data);
+               return kvm_mtrr_set_msr(vcpu, msr, data);
         case MSR_IA32_APICBASE:
                 return kvm_set_apic_base(vcpu, msr_info);
         case APIC_BASE_MSR ... APIC_BASE_MSR + 0x3ff:
@@@ -2199,6 -2114,11 +2113,11 @@@
         case MSR_IA32_MISC_ENABLE:
                 vcpu->arch.ia32_misc_enable_msr = data;
                 break;
+       case MSR_IA32_SMBASE:
+               if (!msr_info->host_initiated)
+                       return 1;
+               vcpu->arch.smbase = data;
+               break;
         case MSR_KVM_WALL_CLOCK_NEW:
         case MSR_KVM_WALL_CLOCK:
                 vcpu->kvm->arch.wall_clock = data;
@@@ -2219,6 -2139,8 +2138,8 @@@
                                         &vcpu->requests);
   
                         ka->boot_vcpu_runs_old_kvmclock = tmp;
+ 
+                       ka->kvmclock_offset = -get_kernel_ns();
                 }
   
                 vcpu->arch.time = data;
@@@ -2280,37 -2202,12 +2201,12 @@@
         case MSR_IA32_MC0_CTL ... MSR_IA32_MCx_CTL(KVM_MAX_MCE_BANKS) - 1:
                 return set_msr_mce(vcpu, msr, data);
   
-       /* Performance counters are not protected by a CPUID bit,
-        * so we should check all of them in the generic path for the sake of
-        * cross vendor migration.
-        * Writing a zero into the event select MSRs disables them,
-        * which we perfectly emulate ;-). Any other value should be at least
-        * reported, some guests depend on them.
-        */
-       case MSR_K7_EVNTSEL0:
-       case MSR_K7_EVNTSEL1:
-       case MSR_K7_EVNTSEL2:
-       case MSR_K7_EVNTSEL3:
-               if (data != 0)
-                       vcpu_unimpl(vcpu, "unimplemented perfctr wrmsr: "
-                                   "0x%x data 0x%llx\n", msr, data);
-               break;
-       /* at least RHEL 4 unconditionally writes to the perfctr registers,
-        * so we ignore writes to make it happy.
-        */
-       case MSR_K7_PERFCTR0:
-       case MSR_K7_PERFCTR1:
-       case MSR_K7_PERFCTR2:
-       case MSR_K7_PERFCTR3:
-               vcpu_unimpl(vcpu, "unimplemented perfctr wrmsr: "
-                           "0x%x data 0x%llx\n", msr, data);
-               break;
-       case MSR_P6_PERFCTR0:
-       case MSR_P6_PERFCTR1:
-               pr = true;
-       case MSR_P6_EVNTSEL0:
-       case MSR_P6_EVNTSEL1:
-               if (kvm_pmu_msr(vcpu, msr))
+       case MSR_K7_PERFCTR0 ... MSR_K7_PERFCTR3:
+       case MSR_P6_PERFCTR0 ... MSR_P6_PERFCTR1:
+               pr = true; /* fall through */
+       case MSR_K7_EVNTSEL0 ... MSR_K7_EVNTSEL3:
+       case MSR_P6_EVNTSEL0 ... MSR_P6_EVNTSEL1:
+               if (kvm_pmu_is_valid_msr(vcpu, msr))
                         return kvm_pmu_set_msr(vcpu, msr_info);
   
                 if (pr || data != 0)
@@@ -2356,7 -2253,7 +2252,7 @@@
         default:
                 if (msr && (msr == vcpu->kvm->arch.xen_hvm_config.msr))
                         return xen_hvm_config(vcpu, data);
-               if (kvm_pmu_msr(vcpu, msr))
+               if (kvm_pmu_is_valid_msr(vcpu, msr))
                         return kvm_pmu_set_msr(vcpu, msr_info);
                 if (!ignore_msrs) {
                         vcpu_unimpl(vcpu, "unhandled wrmsr: 0x%x data %llx\n",
@@@ -2378,48 -2275,12 +2274,12 @@@ EXPORT_SYMBOL_GPL(kvm_set_msr_common)
    * Returns 0 on success, non-0 otherwise.
    * Assumes vcpu_load() was already called.
    */
- int kvm_get_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata)
+ int kvm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
   {
-       return kvm_x86_ops->get_msr(vcpu, msr_index, pdata);
+       return kvm_x86_ops->get_msr(vcpu, msr);
   }
   EXPORT_SYMBOL_GPL(kvm_get_msr);
   
- static int get_msr_mtrr(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
- {
-       u64 *p = (u64 *)&vcpu->arch.mtrr_state.fixed_ranges;
- 
-       if (!msr_mtrr_valid(msr))
-               return 1;
- 
-       if (msr == MSR_MTRRdefType)
-               *pdata = vcpu->arch.mtrr_state.def_type +
-                        (vcpu->arch.mtrr_state.enabled << 10);
-       else if (msr == MSR_MTRRfix64K_00000)
-               *pdata = p[0];
-       else if (msr == MSR_MTRRfix16K_80000 || msr == MSR_MTRRfix16K_A0000)
-               *pdata = p[1 + msr - MSR_MTRRfix16K_80000];
-       else if (msr >= MSR_MTRRfix4K_C0000 && msr <= MSR_MTRRfix4K_F8000)
-               *pdata = p[3 + msr - MSR_MTRRfix4K_C0000];
-       else if (msr == MSR_IA32_CR_PAT)
-               *pdata = vcpu->arch.pat;
-       else {  /* Variable MTRRs */
-               int idx, is_mtrr_mask;
-               u64 *pt;
- 
-               idx = (msr - 0x200) / 2;
-               is_mtrr_mask = msr - 0x200 - 2 * idx;
-               if (!is_mtrr_mask)
-                       pt =
-                         (u64 *)&vcpu->arch.mtrr_state.var_ranges[idx].base_lo;
-               else
-                       pt =
-                         (u64 *)&vcpu->arch.mtrr_state.var_ranges[idx].mask_lo;
-               *pdata = *pt;
-       }
- 
-       return 0;
- }
- 
   static int get_msr_mce(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
   {
         u64 data;
@@@ -2517,11 -2378,11 +2377,11 @@@ static int get_msr_hyperv(struct kvm_vc
         return 0;
   }
   
- int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
+ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
   {
         u64 data;
   
-       switch (msr) {
+       switch (msr_info->index) {
         case MSR_IA32_PLATFORM_ID:
         case MSR_IA32_EBL_CR_POWERON:
         case MSR_IA32_DEBUGCTLMSR:
@@@ -2532,38 -2393,28 +2392,28 @@@
         case MSR_K8_SYSCFG:
         case MSR_K7_HWCR:
         case MSR_VM_HSAVE_PA:
-       case MSR_K7_EVNTSEL0:
-       case MSR_K7_EVNTSEL1:
-       case MSR_K7_EVNTSEL2:
-       case MSR_K7_EVNTSEL3:
-       case MSR_K7_PERFCTR0:
-       case MSR_K7_PERFCTR1:
-       case MSR_K7_PERFCTR2:
-       case MSR_K7_PERFCTR3:
         case MSR_K8_INT_PENDING_MSG:
         case MSR_AMD64_NB_CFG:
         case MSR_FAM10H_MMIO_CONF_BASE:
         case MSR_AMD64_BU_CFG2:
-               data = 0;
+               msr_info->data = 0;
                 break;
-       case MSR_P6_PERFCTR0:
-       case MSR_P6_PERFCTR1:
-       case MSR_P6_EVNTSEL0:
-       case MSR_P6_EVNTSEL1:
-               if (kvm_pmu_msr(vcpu, msr))
-                       return kvm_pmu_get_msr(vcpu, msr, pdata);
-               data = 0;
+       case MSR_K7_EVNTSEL0 ... MSR_K7_EVNTSEL3:
+       case MSR_K7_PERFCTR0 ... MSR_K7_PERFCTR3:
+       case MSR_P6_PERFCTR0 ... MSR_P6_PERFCTR1:
+       case MSR_P6_EVNTSEL0 ... MSR_P6_EVNTSEL1:
+               if (kvm_pmu_is_valid_msr(vcpu, msr_info->index))
+                       return kvm_pmu_get_msr(vcpu, msr_info->index, &msr_info->data);
+               msr_info->data = 0;
                 break;
         case MSR_IA32_UCODE_REV:
-               data = 0x100000000ULL;
+               msr_info->data = 0x100000000ULL;
                 break;
         case MSR_MTRRcap:
-               data = 0x500 | KVM_NR_VAR_MTRR;
-               break;
         case 0x200 ... 0x2ff:
-               return get_msr_mtrr(vcpu, msr, pdata);
+               return kvm_mtrr_get_msr(vcpu, msr_info->index, &msr_info->data);
         case 0xcd: /* fsb frequency */
-               data = 3;
+               msr_info->data = 3;
                 break;
                 /*
                  * MSR_EBC_FREQUENCY_ID
@@@ -2577,48 -2428,53 +2427,53 @@@
                  * multiplying by zero otherwise.
                  */
         case MSR_EBC_FREQUENCY_ID:
-               data = 1 << 24;
+               msr_info->data = 1 << 24;
                 break;
         case MSR_IA32_APICBASE:
-               data = kvm_get_apic_base(vcpu);
+               msr_info->data = kvm_get_apic_base(vcpu);
                 break;
         case APIC_BASE_MSR ... APIC_BASE_MSR + 0x3ff:
-               return kvm_x2apic_msr_read(vcpu, msr, pdata);
+               return kvm_x2apic_msr_read(vcpu, msr_info->index, &msr_info->data);
                 break;
         case MSR_IA32_TSCDEADLINE:
-               data = kvm_get_lapic_tscdeadline_msr(vcpu);
+               msr_info->data = kvm_get_lapic_tscdeadline_msr(vcpu);
                 break;
         case MSR_IA32_TSC_ADJUST:
-               data = (u64)vcpu->arch.ia32_tsc_adjust_msr;
+               msr_info->data = (u64)vcpu->arch.ia32_tsc_adjust_msr;
                 break;
         case MSR_IA32_MISC_ENABLE:
-               data = vcpu->arch.ia32_misc_enable_msr;
+               msr_info->data = vcpu->arch.ia32_misc_enable_msr;
+               break;
+       case MSR_IA32_SMBASE:
+               if (!msr_info->host_initiated)
+                       return 1;
+               msr_info->data = vcpu->arch.smbase;
                 break;
         case MSR_IA32_PERF_STATUS:
                 /* TSC increment by tick */
-               data = 1000ULL;
+               msr_info->data = 1000ULL;
                 /* CPU multiplier */
                 data |= (((uint64_t)4ULL) << 40);
                 break;
         case MSR_EFER:
-               data = vcpu->arch.efer;
+               msr_info->data = vcpu->arch.efer;
                 break;
         case MSR_KVM_WALL_CLOCK:
         case MSR_KVM_WALL_CLOCK_NEW:
-               data = vcpu->kvm->arch.wall_clock;
+               msr_info->data = vcpu->kvm->arch.wall_clock;
                 break;
         case MSR_KVM_SYSTEM_TIME:
         case MSR_KVM_SYSTEM_TIME_NEW:
-               data = vcpu->arch.time;
+               msr_info->data = vcpu->arch.time;
                 break;
         case MSR_KVM_ASYNC_PF_EN:
-               data = vcpu->arch.apf.msr_val;
+               msr_info->data = vcpu->arch.apf.msr_val;
                 break;
         case MSR_KVM_STEAL_TIME:
-               data = vcpu->arch.st.msr_val;
+               msr_info->data = vcpu->arch.st.msr_val;
                 break;
         case MSR_KVM_PV_EOI_EN:
-               data = vcpu->arch.pv_eoi.msr_val;
+               msr_info->data = vcpu->arch.pv_eoi.msr_val;
                 break;
         case MSR_IA32_P5_MC_ADDR:
         case MSR_IA32_P5_MC_TYPE:
@@@ -2626,7 -2482,7 +2481,7 @@@
         case MSR_IA32_MCG_CTL:
         case MSR_IA32_MCG_STATUS:
         case MSR_IA32_MC0_CTL ... MSR_IA32_MCx_CTL(KVM_MAX_MCE_BANKS) - 1:
-               return get_msr_mce(vcpu, msr, pdata);
+               return get_msr_mce(vcpu, msr_info->index, &msr_info->data);
         case MSR_K7_CLK_CTL:
                 /*
                  * Provide expected ramp-up count for K7. All other
@@@ -2637,17 -2493,17 +2492,17 @@@
                  * type 6, model 8 and higher from exploding due to
                  * the rdmsr failing.
                  */
-               data = 0x20000000;
+               msr_info->data = 0x20000000;
                 break;
         case HV_X64_MSR_GUEST_OS_ID ... HV_X64_MSR_SINT15:
-               if (kvm_hv_msr_partition_wide(msr)) {
+               if (kvm_hv_msr_partition_wide(msr_info->index)) {
                         int r;
                         mutex_lock(&vcpu->kvm->lock);
-                       r = get_msr_hyperv_pw(vcpu, msr, pdata);
+                       r = get_msr_hyperv_pw(vcpu, msr_info->index, &msr_info->data);
                         mutex_unlock(&vcpu->kvm->lock);
                         return r;
                 } else
-                       return get_msr_hyperv(vcpu, msr, pdata);
+                       return get_msr_hyperv(vcpu, msr_info->index, &msr_info->data);
                 break;
         case MSR_IA32_BBL_CR_CTL3:
                 /* This legacy MSR exists but isn't fully documented in current
@@@ -2660,31 -2516,30 +2515,30 @@@
                  * L2 cache control register 3: 64GB range, 256KB size,
                  * enabled, latency 0x1, configured
                  */
-               data = 0xbe702111;
+               msr_info->data = 0xbe702111;
                 break;
         case MSR_AMD64_OSVW_ID_LENGTH:
                 if (!guest_cpuid_has_osvw(vcpu))
                         return 1;
-               data = vcpu->arch.osvw.length;
+               msr_info->data = vcpu->arch.osvw.length;
                 break;
         case MSR_AMD64_OSVW_STATUS:
                 if (!guest_cpuid_has_osvw(vcpu))
                         return 1;
-               data = vcpu->arch.osvw.status;
+               msr_info->data = vcpu->arch.osvw.status;
                 break;
         default:
-               if (kvm_pmu_msr(vcpu, msr))
-                       return kvm_pmu_get_msr(vcpu, msr, pdata);
+               if (kvm_pmu_is_valid_msr(vcpu, msr_info->index))
+                       return kvm_pmu_get_msr(vcpu, msr_info->index, &msr_info->data);
                 if (!ignore_msrs) {
-                       vcpu_unimpl(vcpu, "unhandled rdmsr: 0x%x\n", msr);
+                       vcpu_unimpl(vcpu, "unhandled rdmsr: 0x%x\n", msr_info->index);
                         return 1;
                 } else {
-                       vcpu_unimpl(vcpu, "ignored rdmsr: 0x%x\n", msr);
-                       data = 0;
+                       vcpu_unimpl(vcpu, "ignored rdmsr: 0x%x\n", msr_info->index);
+                       msr_info->data = 0;
                 }
                 break;
         }
-       *pdata = data;
         return 0;
   }
   EXPORT_SYMBOL_GPL(kvm_get_msr_common);
@@@ -2797,12 -2652,25 +2651,25 @@@ int kvm_vm_ioctl_check_extension(struc
         case KVM_CAP_HYPERV_TIME:
         case KVM_CAP_IOAPIC_POLARITY_IGNORED:
         case KVM_CAP_TSC_DEADLINE_TIMER:
+       case KVM_CAP_ENABLE_CAP_VM:
+       case KVM_CAP_DISABLE_QUIRKS:
   #ifdef CONFIG_KVM_DEVICE_ASSIGNMENT
         case KVM_CAP_ASSIGN_DEV_IRQ:
         case KVM_CAP_PCI_2_3:
   #endif
                 r = 1;
                 break;
+       case KVM_CAP_X86_SMM:
+               /* SMBASE is usually relocated above 1M on modern chipsets,
+                * and SMM handlers might indeed rely on 4G segment limits,
+                * so do not report SMM to be available if real mode is
+                * emulated via vm86 mode.  Still, do not go to great lengths
+                * to avoid userspace's usage of the feature, because it is a
+                * fringe case that is not enabled except via specific settings
+                * of the module parameters.
+                */
+               r = kvm_x86_ops->cpu_has_high_real_mode_segbase();
+               break;
         case KVM_CAP_COALESCED_MMIO:
                 r = KVM_COALESCED_MMIO_PAGE_OFFSET;
                 break;
@@@ -2859,7 -2727,7 +2726,7 @@@ long kvm_arch_dev_ioctl(struct file *fi
                 if (copy_from_user(&msr_list, user_msr_list, sizeof msr_list))
                         goto out;
                 n = msr_list.nmsrs;
-               msr_list.nmsrs = num_msrs_to_save + ARRAY_SIZE(emulated_msrs);
+               msr_list.nmsrs = num_msrs_to_save + num_emulated_msrs;
                 if (copy_to_user(user_msr_list, &msr_list, sizeof msr_list))
                         goto out;
                 r = -E2BIG;
@@@ -2871,7 -2739,7 +2738,7 @@@
                         goto out;
                 if (copy_to_user(user_msr_list->indices + num_msrs_to_save,
                                  &emulated_msrs,
-                                ARRAY_SIZE(emulated_msrs) * sizeof(u32)))
+                                num_emulated_msrs * sizeof(u32)))
                         goto out;
                 r = 0;
                 break;
@@@ -3015,6 -2883,13 +2882,13 @@@ static int kvm_vcpu_ioctl_nmi(struct kv
         return 0;
   }
   
+ static int kvm_vcpu_ioctl_smi(struct kvm_vcpu *vcpu)
+ {
+       kvm_make_request(KVM_REQ_SMI, vcpu);
+ 
+       return 0;
+ }
+ 
   static int vcpu_ioctl_tpr_access_reporting(struct kvm_vcpu *vcpu,
                                            struct kvm_tpr_access_ctl *tac)
   {
@@@ -3120,8 -2995,15 +2994,15 @@@ static void kvm_vcpu_ioctl_x86_get_vcpu
   
         events->sipi_vector = 0; /* never valid when reporting to user space */
   
+       events->smi.smm = is_smm(vcpu);
+       events->smi.pending = vcpu->arch.smi_pending;
+       events->smi.smm_inside_nmi =
+               !!(vcpu->arch.hflags & HF_SMM_INSIDE_NMI_MASK);
+       events->smi.latched_init = kvm_lapic_latched_init(vcpu);
+ 
         events->flags = (KVM_VCPUEVENT_VALID_NMI_PENDING
-                        | KVM_VCPUEVENT_VALID_SHADOW);
+                        | KVM_VCPUEVENT_VALID_SHADOW
+                        | KVM_VCPUEVENT_VALID_SMM);
         memset(&events->reserved, 0, sizeof(events->reserved));
   }
   
@@@ -3130,7 -3012,8 +3011,8 @@@ static int kvm_vcpu_ioctl_x86_set_vcpu_
   {
         if (events->flags & ~(KVM_VCPUEVENT_VALID_NMI_PENDING
                               | KVM_VCPUEVENT_VALID_SIPI_VECTOR
-                             | KVM_VCPUEVENT_VALID_SHADOW))
+                             | KVM_VCPUEVENT_VALID_SHADOW
+                             | KVM_VCPUEVENT_VALID_SMM))
                 return -EINVAL;
   
         process_nmi(vcpu);
@@@ -3155,6 -3038,24 +3037,24 @@@
             kvm_vcpu_has_lapic(vcpu))
                 vcpu->arch.apic->sipi_vector = events->sipi_vector;
   
+       if (events->flags & KVM_VCPUEVENT_VALID_SMM) {
+               if (events->smi.smm)
+                       vcpu->arch.hflags |= HF_SMM_MASK;
+               else
+                       vcpu->arch.hflags &= ~HF_SMM_MASK;
+               vcpu->arch.smi_pending = events->smi.pending;
+               if (events->smi.smm_inside_nmi)
+                       vcpu->arch.hflags |= HF_SMM_INSIDE_NMI_MASK;
+               else
+                       vcpu->arch.hflags &= ~HF_SMM_INSIDE_NMI_MASK;
+               if (kvm_vcpu_has_lapic(vcpu)) {
+                       if (events->smi.latched_init)
+                               set_bit(KVM_APIC_INIT, &vcpu->arch.apic->pending_events);
+                       else
+                               clear_bit(KVM_APIC_INIT, &vcpu->arch.apic->pending_events);
+               }
+       }
+ 
         kvm_make_request(KVM_REQ_EVENT, vcpu);
   
         return 0;
@@@ -3193,8 -3094,8 +3093,8 @@@ static int kvm_vcpu_ioctl_x86_set_debug
   
   static void fill_xsave(u8 *dest, struct kvm_vcpu *vcpu)
   {
- -      struct xsave_struct *xsave = &vcpu->arch.guest_fpu.state->xsave;
- -      u64 xstate_bv = xsave->xsave_hdr.xstate_bv;
+ +      struct xregs_state *xsave = &vcpu->arch.guest_fpu.state.xsave;
+ +      u64 xstate_bv = xsave->header.xfeatures;
         u64 valid;
   
         /*
@@@ -3229,7 -3130,7 +3129,7 @@@
   
   static void load_xsave(struct kvm_vcpu *vcpu, u8 *src)
   {
- -      struct xsave_struct *xsave = &vcpu->arch.guest_fpu.state->xsave;
+ +      struct xregs_state *xsave = &vcpu->arch.guest_fpu.state.xsave;
         u64 xstate_bv = *(u64 *)(src + XSAVE_HDR_OFFSET);
         u64 valid;
   
@@@ -3240,9 -3141,9 +3140,9 @@@
         memcpy(xsave, src, XSAVE_HDR_OFFSET);
   
         /* Set XSTATE_BV and possibly XCOMP_BV.  */
- -      xsave->xsave_hdr.xstate_bv = xstate_bv;
+ +      xsave->header.xfeatures = xstate_bv;
         if (cpu_has_xsaves)
- -              xsave->xsave_hdr.xcomp_bv = host_xcr0 | XSTATE_COMPACTION_ENABLED;
+ +              xsave->header.xcomp_bv = host_xcr0 | XSTATE_COMPACTION_ENABLED;
   
         /*
          * Copy each region from the non-compacted offset to the
@@@ -3274,8 -3175,8 +3174,8 @@@ static void kvm_vcpu_ioctl_x86_get_xsav
                 fill_xsave((u8 *) guest_xsave->region, vcpu);
         } else {
                 memcpy(guest_xsave->region,
- -                      &vcpu->arch.guest_fpu.state->fxsave,
- -                      sizeof(struct i387_fxsave_struct));
+ +                      &vcpu->arch.guest_fpu.state.fxsave,
+ +                      sizeof(struct fxregs_state));
                 *(u64 *)&guest_xsave->region[XSAVE_HDR_OFFSET / sizeof(u32)] =
                         XSTATE_FPSSE;
         }
@@@ -3299,8 -3200,8 +3199,8 @@@ static int kvm_vcpu_ioctl_x86_set_xsave
         } else {
                 if (xstate_bv & ~XSTATE_FPSSE)
                         return -EINVAL;
- -              memcpy(&vcpu->arch.guest_fpu.state->fxsave,
- -                      guest_xsave->region, sizeof(struct i387_fxsave_struct));
+ +              memcpy(&vcpu->arch.guest_fpu.state.fxsave,
+ +                      guest_xsave->region, sizeof(struct fxregs_state));
         }
         return 0;
   }
@@@ -3414,6 -3315,10 +3314,10 @@@ long kvm_arch_vcpu_ioctl(struct file *f
                 r = kvm_vcpu_ioctl_nmi(vcpu);
                 break;
         }
+       case KVM_SMI: {
+               r = kvm_vcpu_ioctl_smi(vcpu);
+               break;
+       }
         case KVM_SET_CPUID: {
                 struct kvm_cpuid __user *cpuid_arg = argp;
                 struct kvm_cpuid cpuid;
@@@ -3453,7 -3358,7 +3357,7 @@@
                 break;
         }
         case KVM_GET_MSRS:
-               r = msr_io(vcpu, argp, kvm_get_msr, 1);
+               r = msr_io(vcpu, argp, do_get_msr, 1);
                 break;
         case KVM_SET_MSRS:
                 r = msr_io(vcpu, argp, do_set_msr, 0);
@@@ -3844,6 -3749,26 +3748,26 @@@ int kvm_vm_ioctl_irq_line(struct kvm *k
         return 0;
   }
   
+ static int kvm_vm_ioctl_enable_cap(struct kvm *kvm,
+                                  struct kvm_enable_cap *cap)
+ {
+       int r;
+ 
+       if (cap->flags)
+               return -EINVAL;
+ 
+       switch (cap->cap) {
+       case KVM_CAP_DISABLE_QUIRKS:
+               kvm->arch.disabled_quirks = cap->args[0];
+               r = 0;
+               break;
+       default:
+               r = -EINVAL;
+               break;
+       }
+       return r;
+ }
+ 
   long kvm_arch_vm_ioctl(struct file *filp,
                        unsigned int ioctl, unsigned long arg)
   {
@@@ -4096,7 -4021,15 +4020,15 @@@
                 r = 0;
                 break;
         }
+       case KVM_ENABLE_CAP: {
+               struct kvm_enable_cap cap;
   
+               r = -EFAULT;
+               if (copy_from_user(&cap, argp, sizeof(cap)))
+                       goto out;
+               r = kvm_vm_ioctl_enable_cap(kvm, &cap);
+               break;
+       }
         default:
                 r = kvm_vm_ioctl_assigned_device(kvm, ioctl, arg);
         }
@@@ -4109,8 -4042,7 +4041,7 @@@ static void kvm_init_msr_list(void
         u32 dummy[2];
         unsigned i, j;
   
-       /* skip the first msrs in the list. KVM-specific */
-       for (i = j = KVM_SAVE_MSRS_BEGIN; i < ARRAY_SIZE(msrs_to_save); i++) {
+       for (i = j = 0; i < ARRAY_SIZE(msrs_to_save); i++) {
                 if (rdmsr_safe(msrs_to_save[i], &dummy[0], &dummy[1]) < 0)
                         continue;
   
@@@ -4135,6 -4067,22 +4066,22 @@@
                 j++;
         }
         num_msrs_to_save = j;
+ 
+       for (i = j = 0; i < ARRAY_SIZE(emulated_msrs); i++) {
+               switch (emulated_msrs[i]) {
+               case MSR_IA32_SMBASE:
+                       if (!kvm_x86_ops->cpu_has_high_real_mode_segbase())
+                               continue;
+                       break;
+               default:
+                       break;
+               }
+ 
+               if (j < i)
+                       emulated_msrs[j] = emulated_msrs[i];
+               j++;
+       }
+       num_emulated_msrs = j;
   }
   
   static int vcpu_mmio_write(struct kvm_vcpu *vcpu, gpa_t addr, int len,
@@@ -4252,8 -4200,8 +4199,8 @@@ static int kvm_read_guest_virt_helper(g
   
                 if (gpa == UNMAPPED_GVA)
                         return X86EMUL_PROPAGATE_FAULT;
-               ret = kvm_read_guest_page(vcpu->kvm, gpa >> PAGE_SHIFT, data,
-                                         offset, toread);
+               ret = kvm_vcpu_read_guest_page(vcpu, gpa >> PAGE_SHIFT, data,
+                                              offset, toread);
                 if (ret < 0) {
                         r = X86EMUL_IO_NEEDED;
                         goto out;
@@@ -4286,8 -4234,8 +4233,8 @@@ static int kvm_fetch_guest_virt(struct 
         offset = addr & (PAGE_SIZE-1);
         if (WARN_ON(offset + bytes > PAGE_SIZE))
                 bytes = (unsigned)PAGE_SIZE - offset;
-       ret = kvm_read_guest_page(vcpu->kvm, gpa >> PAGE_SHIFT, val,
-                                 offset, bytes);
+       ret = kvm_vcpu_read_guest_page(vcpu, gpa >> PAGE_SHIFT, val,
+                                      offset, bytes);
         if (unlikely(ret < 0))
                 return X86EMUL_IO_NEEDED;
   
@@@ -4333,7 -4281,7 +4280,7 @@@ int kvm_write_guest_virt_system(struct 
   
                 if (gpa == UNMAPPED_GVA)
                         return X86EMUL_PROPAGATE_FAULT;
-               ret = kvm_write_guest(vcpu->kvm, gpa, data, towrite);
+               ret = kvm_vcpu_write_guest(vcpu, gpa, data, towrite);
                 if (ret < 0) {
                         r = X86EMUL_IO_NEEDED;
                         goto out;
@@@ -4386,7 -4334,7 +4333,7 @@@ int emulator_write_phys(struct kvm_vcp
   {
         int ret;
   
-       ret = kvm_write_guest(vcpu->kvm, gpa, val, bytes);
+       ret = kvm_vcpu_write_guest(vcpu, gpa, val, bytes);
         if (ret < 0)
                 return 0;
         kvm_mmu_pte_write(vcpu, gpa, val, bytes);
@@@ -4420,7 -4368,7 +4367,7 @@@ static int read_prepare(struct kvm_vcp
   static int read_emulate(struct kvm_vcpu *vcpu, gpa_t gpa,
                         void *val, int bytes)
   {
-       return !kvm_read_guest(vcpu->kvm, gpa, val, bytes);
+       return !kvm_vcpu_read_guest(vcpu, gpa, val, bytes);
   }
   
   static int write_emulate(struct kvm_vcpu *vcpu, gpa_t gpa,
@@@ -4618,7 -4566,7 +4565,7 @@@ static int emulator_cmpxchg_emulated(st
         if (((gpa + bytes - 1) & PAGE_MASK) != (gpa & PAGE_MASK))
                 goto emul_write;
   
-       page = gfn_to_page(vcpu->kvm, gpa >> PAGE_SHIFT);
+       page = kvm_vcpu_gfn_to_page(vcpu, gpa >> PAGE_SHIFT);
         if (is_error_page(page))
                 goto emul_write;
   
@@@ -4646,7 -4594,7 +4593,7 @@@
         if (!exchanged)
                 return X86EMUL_CMPXCHG_FAILED;
   
-       mark_page_dirty(vcpu->kvm, gpa >> PAGE_SHIFT);
+       kvm_vcpu_mark_page_dirty(vcpu, gpa >> PAGE_SHIFT);
         kvm_mmu_pte_write(vcpu, gpa, new, bytes);
   
         return X86EMUL_CONTINUE;
@@@ -4945,7 -4893,17 +4892,17 @@@ static void emulator_set_segment(struc
   static int emulator_get_msr(struct x86_emulate_ctxt *ctxt,
                             u32 msr_index, u64 *pdata)
   {
-       return kvm_get_msr(emul_to_vcpu(ctxt), msr_index, pdata);
+       struct msr_data msr;
+       int r;
+ 
+       msr.index = msr_index;
+       msr.host_initiated = false;
+       r = kvm_get_msr(emul_to_vcpu(ctxt), &msr);
+       if (r)
+               return r;
+ 
+       *pdata = msr.data;
+       return 0;
   }
   
   static int emulator_set_msr(struct x86_emulate_ctxt *ctxt,
@@@ -4959,16 -4917,30 +4916,30 @@@
         return kvm_set_msr(emul_to_vcpu(ctxt), &msr);
   }
   
+ static u64 emulator_get_smbase(struct x86_emulate_ctxt *ctxt)
+ {
+       struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
+ 
+       return vcpu->arch.smbase;
+ }
+ 
+ static void emulator_set_smbase(struct x86_emulate_ctxt *ctxt, u64 smbase)
+ {
+       struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
+ 
+       vcpu->arch.smbase = smbase;
+ }
+ 
   static int emulator_check_pmc(struct x86_emulate_ctxt *ctxt,
                               u32 pmc)
   {
-       return kvm_pmu_check_pmc(emul_to_vcpu(ctxt), pmc);
+       return kvm_pmu_is_valid_msr_idx(emul_to_vcpu(ctxt), pmc);
   }
   
   static int emulator_read_pmc(struct x86_emulate_ctxt *ctxt,
                              u32 pmc, u64 *pdata)
   {
-       return kvm_pmu_read_pmc(emul_to_vcpu(ctxt), pmc, pdata);
+       return kvm_pmu_rdpmc(emul_to_vcpu(ctxt), pmc, pdata);
   }
   
   static void emulator_halt(struct x86_emulate_ctxt *ctxt)
@@@ -5044,6 -5016,8 +5015,8 @@@ static const struct x86_emulate_ops emu
         .cpl                 = emulator_get_cpl,
         .get_dr              = emulator_get_dr,
         .set_dr              = emulator_set_dr,
+       .get_smbase          = emulator_get_smbase,
+       .set_smbase          = emulator_set_smbase,
         .set_msr             = emulator_set_msr,
         .get_msr             = emulator_get_msr,
         .check_pmc           = emulator_check_pmc,
@@@ -5105,7 -5079,10 +5078,10 @@@ static void init_emulate_ctxt(struct kv
                      (cs_l && is_long_mode(vcpu))       ? X86EMUL_MODE_PROT64 :
                      cs_db                              ? X86EMUL_MODE_PROT32 :
                                                           X86EMUL_MODE_PROT16;
-       ctxt->guest_mode = is_guest_mode(vcpu);
+       BUILD_BUG_ON(HF_GUEST_MASK != X86EMUL_GUEST_MASK);
+       BUILD_BUG_ON(HF_SMM_MASK != X86EMUL_SMM_MASK);
+       BUILD_BUG_ON(HF_SMM_INSIDE_NMI_MASK != X86EMUL_SMM_INSIDE_NMI_MASK);
+       ctxt->emul_flags = vcpu->arch.hflags;
   
         init_decode_cache(ctxt);
         vcpu->arch.emulate_regs_need_sync_from_vcpu = false;
@@@ -5274,6 -5251,34 +5250,34 @@@ static bool retry_instruction(struct x8
   static int complete_emulated_mmio(struct kvm_vcpu *vcpu);
   static int complete_emulated_pio(struct kvm_vcpu *vcpu);
   
+ static void kvm_smm_changed(struct kvm_vcpu *vcpu)
+ {
+       if (!(vcpu->arch.hflags & HF_SMM_MASK)) {
+               /* This is a good place to trace that we are exiting SMM.  */
+               trace_kvm_enter_smm(vcpu->vcpu_id, vcpu->arch.smbase, false);
+ 
+               if (unlikely(vcpu->arch.smi_pending)) {
+                       kvm_make_request(KVM_REQ_SMI, vcpu);
+                       vcpu->arch.smi_pending = 0;
+               } else {
+                       /* Process a latched INIT, if any.  */
+                       kvm_make_request(KVM_REQ_EVENT, vcpu);
+               }
+       }
+ 
+       kvm_mmu_reset_context(vcpu);
+ }
+ 
+ static void kvm_set_hflags(struct kvm_vcpu *vcpu, unsigned emul_flags)
+ {
+       unsigned changed = vcpu->arch.hflags ^ emul_flags;
+ 
+       vcpu->arch.hflags = emul_flags;
+ 
+       if (changed & HF_SMM_MASK)
+               kvm_smm_changed(vcpu);
+ }
+ 
   static int kvm_vcpu_check_hw_bp(unsigned long addr, u32 type, u32 dr7,
                                 unsigned long *db)
   {
@@@ -5473,6 -5478,8 +5477,8 @@@ restart
                 unsigned long rflags = kvm_x86_ops->get_rflags(vcpu);
                 toggle_interruptibility(vcpu, ctxt->interruptibility);
                 vcpu->arch.emulate_regs_need_sync_to_vcpu = false;
+               if (vcpu->arch.hflags != ctxt->emul_flags)
+                       kvm_set_hflags(vcpu, ctxt->emul_flags);
                 kvm_rip_write(vcpu, ctxt->eip);
                 if (r == EMULATE_DONE)
                         kvm_vcpu_check_singlestep(vcpu, rflags, &r);
@@@ -5951,6 -5958,7 +5957,7 @@@ static void kvm_pv_kick_cpu_op(struct k
         lapic_irq.shorthand = 0;
         lapic_irq.dest_mode = 0;
         lapic_irq.dest_id = apicid;
+       lapic_irq.msi_redir_hint = false;
   
         lapic_irq.delivery_mode = APIC_DM_REMRD;
         kvm_irq_delivery_to_apic(kvm, NULL, &lapic_irq, NULL);
@@@ -6038,6 -6046,7 +6045,7 @@@ static void post_kvm_run_save(struct kv
         struct kvm_run *kvm_run = vcpu->run;
   
         kvm_run->if_flag = (kvm_get_rflags(vcpu) & X86_EFLAGS_IF) != 0;
+       kvm_run->flags = is_smm(vcpu) ? KVM_RUN_X86_SMM : 0;
         kvm_run->cr8 = kvm_get_cr8(vcpu);
         kvm_run->apic_base = kvm_get_apic_base(vcpu);
         if (irqchip_in_kernel(vcpu->kvm))
@@@ -6161,6 -6170,233 +6169,233 @@@ static void process_nmi(struct kvm_vcp
         kvm_make_request(KVM_REQ_EVENT, vcpu);
   }
   
+ #define put_smstate(type, buf, offset, val)                     \
+       *(type *)((buf) + (offset) - 0x7e00) = val
+ 
+ static u32 process_smi_get_segment_flags(struct kvm_segment *seg)
+ {
+       u32 flags = 0;
+       flags |= seg->g       << 23;
+       flags |= seg->db      << 22;
+       flags |= seg->l       << 21;
+       flags |= seg->avl     << 20;
+       flags |= seg->present << 15;
+       flags |= seg->dpl     << 13;
+       flags |= seg->s       << 12;
+       flags |= seg->type    << 8;
+       return flags;
+ }
+ 
+ static void process_smi_save_seg_32(struct kvm_vcpu *vcpu, char *buf, int n)
+ {
+       struct kvm_segment seg;
+       int offset;
+ 
+       kvm_get_segment(vcpu, &seg, n);
+       put_smstate(u32, buf, 0x7fa8 + n * 4, seg.selector);
+ 
+       if (n < 3)
+               offset = 0x7f84 + n * 12;
+       else
+               offset = 0x7f2c + (n - 3) * 12;
+ 
+       put_smstate(u32, buf, offset + 8, seg.base);
+       put_smstate(u32, buf, offset + 4, seg.limit);
+       put_smstate(u32, buf, offset, process_smi_get_segment_flags(&seg));
+ }
+ 
+ static void process_smi_save_seg_64(struct kvm_vcpu *vcpu, char *buf, int n)
+ {
+       struct kvm_segment seg;
+       int offset;
+       u16 flags;
+ 
+       kvm_get_segment(vcpu, &seg, n);
+       offset = 0x7e00 + n * 16;
+ 
+       flags = process_smi_get_segment_flags(&seg) >> 8;
+       put_smstate(u16, buf, offset, seg.selector);
+       put_smstate(u16, buf, offset + 2, flags);
+       put_smstate(u32, buf, offset + 4, seg.limit);
+       put_smstate(u64, buf, offset + 8, seg.base);
+ }
+ 
+ static void process_smi_save_state_32(struct kvm_vcpu *vcpu, char *buf)
+ {
+       struct desc_ptr dt;
+       struct kvm_segment seg;
+       unsigned long val;
+       int i;
+ 
+       put_smstate(u32, buf, 0x7ffc, kvm_read_cr0(vcpu));
+       put_smstate(u32, buf, 0x7ff8, kvm_read_cr3(vcpu));
+       put_smstate(u32, buf, 0x7ff4, kvm_get_rflags(vcpu));
+       put_smstate(u32, buf, 0x7ff0, kvm_rip_read(vcpu));
+ 
+       for (i = 0; i < 8; i++)
+               put_smstate(u32, buf, 0x7fd0 + i * 4, kvm_register_read(vcpu, i));
+ 
+       kvm_get_dr(vcpu, 6, &val);
+       put_smstate(u32, buf, 0x7fcc, (u32)val);
+       kvm_get_dr(vcpu, 7, &val);
+       put_smstate(u32, buf, 0x7fc8, (u32)val);
+ 
+       kvm_get_segment(vcpu, &seg, VCPU_SREG_TR);
+       put_smstate(u32, buf, 0x7fc4, seg.selector);
+       put_smstate(u32, buf, 0x7f64, seg.base);
+       put_smstate(u32, buf, 0x7f60, seg.limit);
+       put_smstate(u32, buf, 0x7f5c, process_smi_get_segment_flags(&seg));
+ 
+       kvm_get_segment(vcpu, &seg, VCPU_SREG_LDTR);
+       put_smstate(u32, buf, 0x7fc0, seg.selector);
+       put_smstate(u32, buf, 0x7f80, seg.base);
+       put_smstate(u32, buf, 0x7f7c, seg.limit);
+       put_smstate(u32, buf, 0x7f78, process_smi_get_segment_flags(&seg));
+ 
+       kvm_x86_ops->get_gdt(vcpu, &dt);
+       put_smstate(u32, buf, 0x7f74, dt.address);
+       put_smstate(u32, buf, 0x7f70, dt.size);
+ 
+       kvm_x86_ops->get_idt(vcpu, &dt);
+       put_smstate(u32, buf, 0x7f58, dt.address);
+       put_smstate(u32, buf, 0x7f54, dt.size);
+ 
+       for (i = 0; i < 6; i++)
+               process_smi_save_seg_32(vcpu, buf, i);
+ 
+       put_smstate(u32, buf, 0x7f14, kvm_read_cr4(vcpu));
+ 
+       /* revision id */
+       put_smstate(u32, buf, 0x7efc, 0x00020000);
+       put_smstate(u32, buf, 0x7ef8, vcpu->arch.smbase);
+ }
+ 
+ static void process_smi_save_state_64(struct kvm_vcpu *vcpu, char *buf)
+ {
+ #ifdef CONFIG_X86_64
+       struct desc_ptr dt;
+       struct kvm_segment seg;
+       unsigned long val;
+       int i;
+ 
+       for (i = 0; i < 16; i++)
+               put_smstate(u64, buf, 0x7ff8 - i * 8, kvm_register_read(vcpu, i));
+ 
+       put_smstate(u64, buf, 0x7f78, kvm_rip_read(vcpu));
+       put_smstate(u32, buf, 0x7f70, kvm_get_rflags(vcpu));
+ 
+       kvm_get_dr(vcpu, 6, &val);
+       put_smstate(u64, buf, 0x7f68, val);
+       kvm_get_dr(vcpu, 7, &val);
+       put_smstate(u64, buf, 0x7f60, val);
+ 
+       put_smstate(u64, buf, 0x7f58, kvm_read_cr0(vcpu));
+       put_smstate(u64, buf, 0x7f50, kvm_read_cr3(vcpu));
+       put_smstate(u64, buf, 0x7f48, kvm_read_cr4(vcpu));
+ 
+       put_smstate(u32, buf, 0x7f00, vcpu->arch.smbase);
+ 
+       /* revision id */
+       put_smstate(u32, buf, 0x7efc, 0x00020064);
+ 
+       put_smstate(u64, buf, 0x7ed0, vcpu->arch.efer);
+ 
+       kvm_get_segment(vcpu, &seg, VCPU_SREG_TR);
+       put_smstate(u16, buf, 0x7e90, seg.selector);
+       put_smstate(u16, buf, 0x7e92, process_smi_get_segment_flags(&seg) >> 8);
+       put_smstate(u32, buf, 0x7e94, seg.limit);
+       put_smstate(u64, buf, 0x7e98, seg.base);
+ 
+       kvm_x86_ops->get_idt(vcpu, &dt);
+       put_smstate(u32, buf, 0x7e84, dt.size);
+       put_smstate(u64, buf, 0x7e88, dt.address);
+ 
+       kvm_get_segment(vcpu, &seg, VCPU_SREG_LDTR);
+       put_smstate(u16, buf, 0x7e70, seg.selector);
+       put_smstate(u16, buf, 0x7e72, process_smi_get_segment_flags(&seg) >> 8);
+       put_smstate(u32, buf, 0x7e74, seg.limit);
+       put_smstate(u64, buf, 0x7e78, seg.base);
+ 
+       kvm_x86_ops->get_gdt(vcpu, &dt);
+       put_smstate(u32, buf, 0x7e64, dt.size);
+       put_smstate(u64, buf, 0x7e68, dt.address);
+ 
+       for (i = 0; i < 6; i++)
+               process_smi_save_seg_64(vcpu, buf, i);
+ #else
+       WARN_ON_ONCE(1);
+ #endif
+ }
+ 
+ static void process_smi(struct kvm_vcpu *vcpu)
+ {
+       struct kvm_segment cs, ds;
+       char buf[512];
+       u32 cr0;
+ 
+       if (is_smm(vcpu)) {
+               vcpu->arch.smi_pending = true;
+               return;
+       }
+ 
+       trace_kvm_enter_smm(vcpu->vcpu_id, vcpu->arch.smbase, true);
+       vcpu->arch.hflags |= HF_SMM_MASK;
+       memset(buf, 0, 512);
+       if (guest_cpuid_has_longmode(vcpu))
+               process_smi_save_state_64(vcpu, buf);
+       else
+               process_smi_save_state_32(vcpu, buf);
+ 
+       kvm_vcpu_write_guest(vcpu, vcpu->arch.smbase + 0xfe00, buf, sizeof(buf));
+ 
+       if (kvm_x86_ops->get_nmi_mask(vcpu))
+               vcpu->arch.hflags |= HF_SMM_INSIDE_NMI_MASK;
+       else
+               kvm_x86_ops->set_nmi_mask(vcpu, true);
+ 
+       kvm_set_rflags(vcpu, X86_EFLAGS_FIXED);
+       kvm_rip_write(vcpu, 0x8000);
+ 
+       cr0 = vcpu->arch.cr0 & ~(X86_CR0_PE | X86_CR0_EM | X86_CR0_TS | X86_CR0_PG);
+       kvm_x86_ops->set_cr0(vcpu, cr0);
+       vcpu->arch.cr0 = cr0;
+ 
+       kvm_x86_ops->set_cr4(vcpu, 0);
+ 
+       __kvm_set_dr(vcpu, 7, DR7_FIXED_1);
+ 
+       cs.selector = (vcpu->arch.smbase >> 4) & 0xffff;
+       cs.base = vcpu->arch.smbase;
+ 
+       ds.selector = 0;
+       ds.base = 0;
+ 
+       cs.limit    = ds.limit = 0xffffffff;
+       cs.type     = ds.type = 0x3;
+       cs.dpl      = ds.dpl = 0;
+       cs.db       = ds.db = 0;
+       cs.s        = ds.s = 1;
+       cs.l        = ds.l = 0;
+       cs.g        = ds.g = 1;
+       cs.avl      = ds.avl = 0;
+       cs.present  = ds.present = 1;
+       cs.unusable = ds.unusable = 0;
+       cs.padding  = ds.padding = 0;
+ 
+       kvm_set_segment(vcpu, &cs, VCPU_SREG_CS);
+       kvm_set_segment(vcpu, &ds, VCPU_SREG_DS);
+       kvm_set_segment(vcpu, &ds, VCPU_SREG_ES);
+       kvm_set_segment(vcpu, &ds, VCPU_SREG_FS);
+       kvm_set_segment(vcpu, &ds, VCPU_SREG_GS);
+       kvm_set_segment(vcpu, &ds, VCPU_SREG_SS);
+ 
+       if (guest_cpuid_has_longmode(vcpu))
+               kvm_x86_ops->set_efer(vcpu, 0);
+ 
+       kvm_update_cpuid(vcpu);
+       kvm_mmu_reset_context(vcpu);
+ }
+ 
   static void vcpu_scan_ioapic(struct kvm_vcpu *vcpu)
   {
         u64 eoi_exit_bitmap[4];
@@@ -6269,12 -6505,14 +6504,14 @@@ static int vcpu_enter_guest(struct kvm_
                 }
                 if (kvm_check_request(KVM_REQ_STEAL_UPDATE, vcpu))
                         record_steal_time(vcpu);
+               if (kvm_check_request(KVM_REQ_SMI, vcpu))
+                       process_smi(vcpu);
                 if (kvm_check_request(KVM_REQ_NMI, vcpu))
                         process_nmi(vcpu);
                 if (kvm_check_request(KVM_REQ_PMU, vcpu))
-                       kvm_handle_pmu_event(vcpu);
+                       kvm_pmu_handle_event(vcpu);
                 if (kvm_check_request(KVM_REQ_PMI, vcpu))
-                       kvm_deliver_pmi(vcpu);
+                       kvm_pmu_deliver_pmi(vcpu);
                 if (kvm_check_request(KVM_REQ_SCAN_IOAPIC, vcpu))
                         vcpu_scan_ioapic(vcpu);
                 if (kvm_check_request(KVM_REQ_APIC_PAGE_RELOAD, vcpu))
@@@ -6346,7 -6584,7 +6583,7 @@@
         if (req_immediate_exit)
                 smp_send_reschedule(vcpu->cpu);
   
-       kvm_guest_enter();
+       __kvm_guest_enter();
   
         if (unlikely(vcpu->arch.switch_db_regs)) {
                 set_debugreg(0, 7);
@@@ -6596,11 -6834,11 +6833,11 @@@ static int complete_emulated_mmio(struc
   
   int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
   {
+ +      struct fpu *fpu = &current->thread.fpu;
         int r;
         sigset_t sigsaved;
   
- -      if (!tsk_used_math(current) && init_fpu(current))
- -              return -ENOMEM;
+ +      fpu__activate_curr(fpu);
   
         if (vcpu->sigset_active)
                 sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
@@@ -6970,8 -7208,8 +7207,8 @@@ int kvm_arch_vcpu_ioctl_translate(struc
   
   int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
   {
- -      struct i387_fxsave_struct *fxsave =
- -                      &vcpu->arch.guest_fpu.state->fxsave;
+ +      struct fxregs_state *fxsave =
+ +                      &vcpu->arch.guest_fpu.state.fxsave;
   
         memcpy(fpu->fpr, fxsave->st_space, 128);
         fpu->fcw = fxsave->cwd;
@@@ -6987,8 -7225,8 +7224,8 @@@
   
   int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
   {
- -      struct i387_fxsave_struct *fxsave =
- -                      &vcpu->arch.guest_fpu.state->fxsave;
+ +      struct fxregs_state *fxsave =
+ +                      &vcpu->arch.guest_fpu.state.fxsave;
   
         memcpy(fxsave->st_space, fpu->fpr, 128);
         fxsave->cwd = fpu->fcw;
@@@ -7002,11 -7240,19 +7239,11 @@@
         return 0;
   }
   
- -int fx_init(struct kvm_vcpu *vcpu, bool init_event)
+ +static void fx_init(struct kvm_vcpu *vcpu)
   {
- -      int err;
- -
- -      err = fpu_alloc(&vcpu->arch.guest_fpu);
- -      if (err)
- -              return err;
- -
- -      if (!init_event)
- -              fpu_finit(&vcpu->arch.guest_fpu);
- -
+ +      fpstate_init(&vcpu->arch.guest_fpu.state);
         if (cpu_has_xsaves)
- -              vcpu->arch.guest_fpu.state->xsave.xsave_hdr.xcomp_bv =
+ +              vcpu->arch.guest_fpu.state.xsave.header.xcomp_bv =
                         host_xcr0 | XSTATE_COMPACTION_ENABLED;
   
         /*
@@@ -7015,6 -7261,14 +7252,6 @@@
         vcpu->arch.xcr0 = XSTATE_FP;
   
         vcpu->arch.cr0 |= X86_CR0_ET;
- -
- -      return 0;
- -}
- -EXPORT_SYMBOL_GPL(fx_init);
- -
- -static void fx_free(struct kvm_vcpu *vcpu)
- -{
- -      fpu_free(&vcpu->arch.guest_fpu);
   }
   
   void kvm_load_guest_fpu(struct kvm_vcpu *vcpu)
@@@ -7030,7 -7284,7 +7267,7 @@@
         kvm_put_guest_xcr0(vcpu);
         vcpu->guest_fpu_loaded = 1;
         __kernel_fpu_begin();
- -      fpu_restore_checking(&vcpu->arch.guest_fpu);
+ +      __copy_kernel_to_fpregs(&vcpu->arch.guest_fpu.state);
         trace_kvm_fpu(1);
   }
   
@@@ -7038,16 -7292,25 +7275,25 @@@ void kvm_put_guest_fpu(struct kvm_vcpu 
   {
         kvm_put_guest_xcr0(vcpu);
   
-       if (!vcpu->guest_fpu_loaded)
+       if (!vcpu->guest_fpu_loaded) {
+               vcpu->fpu_counter = 0;
                 return;
+       }
   
         vcpu->guest_fpu_loaded = 0;
- -      fpu_save_init(&vcpu->arch.guest_fpu);
+ +      copy_fpregs_to_fpstate(&vcpu->arch.guest_fpu);
         __kernel_fpu_end();
         ++vcpu->stat.fpu_reload;
-       if (!vcpu->arch.eager_fpu)
-               kvm_make_request(KVM_REQ_DEACTIVATE_FPU, vcpu);
- 
+       /*
+        * If using eager FPU mode, or if the guest is a frequent user
+        * of the FPU, just leave the FPU active for next time.
+        * Every 255 times fpu_counter rolls over to 0; a guest that uses
+        * the FPU in bursts will revert to loading it on demand.
+        */
+       if (!vcpu->arch.eager_fpu) {
+               if (++vcpu->fpu_counter < 5)
+                       kvm_make_request(KVM_REQ_DEACTIVATE_FPU, vcpu);
+       }
         trace_kvm_fpu(0);
   }
   
@@@ -7056,6 -7319,7 +7302,6 @@@ void kvm_arch_vcpu_free(struct kvm_vcp
         kvmclock_reset(vcpu);
   
         free_cpumask_var(vcpu->arch.wbinvd_dirty_mask);
- -      fx_free(vcpu);
         kvm_x86_ops->vcpu_free(vcpu);
   }
   
@@@ -7083,14 -7347,13 +7329,13 @@@ int kvm_arch_vcpu_setup(struct kvm_vcp
   {
         int r;
   
-       vcpu->arch.mtrr_state.have_fixed = 1;
+       kvm_vcpu_mtrr_init(vcpu);
         r = vcpu_load(vcpu);
         if (r)
                 return r;
-       kvm_vcpu_reset(vcpu);
+       kvm_vcpu_reset(vcpu, false);
         kvm_mmu_setup(vcpu);
         vcpu_put(vcpu);
- 
         return r;
   }
   
@@@ -7107,6 -7370,9 +7352,9 @@@ void kvm_arch_vcpu_postcreate(struct kv
         kvm_write_tsc(vcpu, &msr);
         vcpu_put(vcpu);
   
+       if (!kvmclock_periodic_sync)
+               return;
+ 
         schedule_delayed_work(&kvm->arch.kvmclock_sync_work,
                                         KVMCLOCK_SYNC_PERIOD);
   }
@@@ -7121,11 -7387,14 +7369,13 @@@ void kvm_arch_vcpu_destroy(struct kvm_v
         kvm_mmu_unload(vcpu);
         vcpu_put(vcpu);
   
- -      fx_free(vcpu);
         kvm_x86_ops->vcpu_free(vcpu);
   }
   
- void kvm_vcpu_reset(struct kvm_vcpu *vcpu)
+ void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
   {
+       vcpu->arch.hflags = 0;
+ 
         atomic_set(&vcpu->arch.nmi_queued, 0);
         vcpu->arch.nmi_pending = 0;
         vcpu->arch.nmi_injected = false;
@@@ -7151,13 -7420,16 +7401,16 @@@
         kvm_async_pf_hash_reset(vcpu);
         vcpu->arch.apf.halted = false;
   
-       kvm_pmu_reset(vcpu);
+       if (!init_event) {
+               kvm_pmu_reset(vcpu);
+               vcpu->arch.smbase = 0x30000;
+       }
   
         memset(vcpu->arch.regs, 0, sizeof(vcpu->arch.regs));
         vcpu->arch.regs_avail = ~0;
         vcpu->arch.regs_dirty = ~0;
   
-       kvm_x86_ops->vcpu_reset(vcpu);
+       kvm_x86_ops->vcpu_reset(vcpu, init_event);
   }
   
   void kvm_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, u8 vector)
@@@ -7346,7 -7618,9 +7599,7 @@@ int kvm_arch_vcpu_init(struct kvm_vcpu 
                 goto fail_free_mce_banks;
         }
   
- -      r = fx_init(vcpu, false);
- -      if (r)
- -              goto fail_free_wbinvd_dirty_mask;
+ +      fx_init(vcpu);
   
         vcpu->arch.ia32_tsc_adjust_msr = 0x0;
         vcpu->arch.pv_time_enabled = false;
@@@ -7356,11 -7630,14 +7609,13 @@@
   
         vcpu->arch.maxphyaddr = cpuid_query_maxphyaddr(vcpu);
   
+       vcpu->arch.pat = MSR_IA32_CR_PAT_DEFAULT;
+ 
         kvm_async_pf_hash_reset(vcpu);
         kvm_pmu_init(vcpu);
   
         return 0;
- -fail_free_wbinvd_dirty_mask:
- -      free_cpumask_var(vcpu->arch.wbinvd_dirty_mask);
+ +
   fail_free_mce_banks:
         kfree(vcpu->arch.mce_banks);
   fail_free_lapic:
@@@ -7462,6 -7739,40 +7717,40 @@@ void kvm_arch_sync_events(struct kvm *k
         kvm_free_pit(kvm);
   }
   
+ int __x86_set_memory_region(struct kvm *kvm,
+                           const struct kvm_userspace_memory_region *mem)
+ {
+       int i, r;
+ 
+       /* Called with kvm->slots_lock held.  */
+       BUG_ON(mem->slot >= KVM_MEM_SLOTS_NUM);
+ 
+       for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++) {
+               struct kvm_userspace_memory_region m = *mem;
+ 
+               m.slot |= i << 16;
+               r = __kvm_set_memory_region(kvm, &m);
+               if (r < 0)
+                       return r;
+       }
+ 
+       return 0;
+ }
+ EXPORT_SYMBOL_GPL(__x86_set_memory_region);
+ 
+ int x86_set_memory_region(struct kvm *kvm,
+                         const struct kvm_userspace_memory_region *mem)
+ {
+       int r;
+ 
+       mutex_lock(&kvm->slots_lock);
+       r = __x86_set_memory_region(kvm, mem);
+       mutex_unlock(&kvm->slots_lock);
+ 
+       return r;
+ }
+ EXPORT_SYMBOL_GPL(x86_set_memory_region);
+ 
   void kvm_arch_destroy_vm(struct kvm *kvm)
   {
         if (current->mm == kvm->mm) {
@@@ -7473,13 -7784,13 +7762,13 @@@
                 struct kvm_userspace_memory_region mem;
                 memset(&mem, 0, sizeof(mem));
                 mem.slot = APIC_ACCESS_PAGE_PRIVATE_MEMSLOT;
-               kvm_set_memory_region(kvm, &mem);
+               x86_set_memory_region(kvm, &mem);
   
                 mem.slot = IDENTITY_PAGETABLE_PRIVATE_MEMSLOT;
-               kvm_set_memory_region(kvm, &mem);
+               x86_set_memory_region(kvm, &mem);
   
                 mem.slot = TSS_PRIVATE_MEMSLOT;
-               kvm_set_memory_region(kvm, &mem);
+               x86_set_memory_region(kvm, &mem);
         }
         kvm_iommu_unmap_guest(kvm);
         kfree(kvm->arch.vpic);
@@@ -7568,18 -7879,18 +7857,18 @@@ out_free
         return -ENOMEM;
   }
   
- void kvm_arch_memslots_updated(struct kvm *kvm)
+ void kvm_arch_memslots_updated(struct kvm *kvm, struct kvm_memslots *slots)
   {
         /*
          * memslots->generation has been incremented.
          * mmio generation may have reached its maximum value.
          */
-       kvm_mmu_invalidate_mmio_sptes(kvm);
+       kvm_mmu_invalidate_mmio_sptes(kvm, slots);
   }
   
   int kvm_arch_prepare_memory_region(struct kvm *kvm,
                                 struct kvm_memory_slot *memslot,
-                               struct kvm_userspace_memory_region *mem,
+                               const struct kvm_userspace_memory_region *mem,
                                 enum kvm_mr_change change)
   {
         /*
@@@ -7657,14 -7968,14 +7946,14 @@@ static void kvm_mmu_slot_apply_flags(st
   }
   
   void kvm_arch_commit_memory_region(struct kvm *kvm,
-                               struct kvm_userspace_memory_region *mem,
+                               const struct kvm_userspace_memory_region *mem,
                                 const struct kvm_memory_slot *old,
+                               const struct kvm_memory_slot *new,
                                 enum kvm_mr_change change)
   {
-       struct kvm_memory_slot *new;
         int nr_mmu_pages = 0;
   
-       if ((mem->slot >= KVM_USER_MEM_SLOTS) && (change == KVM_MR_DELETE)) {
+       if (change == KVM_MR_DELETE && old->id >= KVM_USER_MEM_SLOTS) {
                 int ret;
   
                 ret = vm_munmap(old->userspace_addr,
@@@ -7681,9 -7992,6 +7970,6 @@@
         if (nr_mmu_pages)
                 kvm_mmu_change_mmu_pages(kvm, nr_mmu_pages);
   
-       /* It's OK to get 'new' slot here as it has already been installed */
-       new = id_to_memslot(kvm->memslots, mem->slot);
- 
         /*
          * Dirty logging tracks sptes in 4k granularity, meaning that large
          * sptes have to be split.  If live migration is successful, the guest
@@@ -7708,9 -8016,11 +7994,11 @@@
          * been zapped so no dirty logging staff is needed for old slot. For
          * KVM_MR_FLAGS_ONLY, the old slot is essentially the same one as the
          * new and it's also covered when dealing with the new slot.
+        *
+        * FIXME: const-ify all uses of struct kvm_memory_slot.
          */
         if (change != KVM_MR_DELETE)
-               kvm_mmu_slot_apply_flags(kvm, new);
+               kvm_mmu_slot_apply_flags(kvm, (struct kvm_memory_slot *) new);
   }
   
   void kvm_arch_flush_shadow_all(struct kvm *kvm)
author	Linus Torvalds <torvalds@linux-foundation.org>
	Wed, 24 Jun 2015 16:36:49 +0000 (09:36 -0700)
committer	Linus Torvalds <torvalds@linux-foundation.org>
	Wed, 24 Jun 2015 16:36:49 +0000 (09:36 -0700)
		1	2
arch/s390/kvm/interrupt.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/s390/kvm/kvm-s390.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/include/asm/kvm_host.h	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/kernel/kvm.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/kvm/cpuid.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/kvm/lapic.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/kvm/vmx.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/kvm/x86.c	patch \|	diff1 \|	diff2 \|	blob \| history