Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm
authorLinus Torvalds <torvalds@linux-foundation.org>
Wed, 24 Jun 2015 16:36:49 +0000 (09:36 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Wed, 24 Jun 2015 16:36:49 +0000 (09:36 -0700)
Pull first batch of KVM updates from Paolo Bonzini:
 "The bulk of the changes here is for x86.  And for once it's not for
  silicon that no one owns: these are really new features for everyone.

  Details:

   - ARM:
        several features are in progress but missed the 4.2 deadline.
        So here is just a smattering of bug fixes, plus enabling the
        VFIO integration.

   - s390:
        Some fixes/refactorings/optimizations, plus support for 2GB
        pages.

   - x86:
        * host and guest support for marking kvmclock as a stable
          scheduler clock.
        * support for write combining.
        * support for system management mode, needed for secure boot in
          guests.
        * a bunch of cleanups required for the above
        * support for virtualized performance counters on AMD
        * legacy PCI device assignment is deprecated and defaults to "n"
          in Kconfig; VFIO replaces it

        On top of this there are also bug fixes and eager FPU context
        loading for FPU-heavy guests.

   - Common code:
        Support for multiple address spaces; for now it is used only for
        x86 SMM but the s390 folks also have plans"

* tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (124 commits)
  KVM: s390: clear floating interrupt bitmap and parameters
  KVM: x86/vPMU: Enable PMU handling for AMD PERFCTRn and EVNTSELn MSRs
  KVM: x86/vPMU: Implement AMD vPMU code for KVM
  KVM: x86/vPMU: Define kvm_pmu_ops to support vPMU function dispatch
  KVM: x86/vPMU: introduce kvm_pmu_msr_idx_to_pmc
  KVM: x86/vPMU: reorder PMU functions
  KVM: x86/vPMU: whitespace and stylistic adjustments in PMU code
  KVM: x86/vPMU: use the new macros to go between PMC, PMU and VCPU
  KVM: x86/vPMU: introduce pmu.h header
  KVM: x86/vPMU: rename a few PMU functions
  KVM: MTRR: do not map huge page for non-consistent range
  KVM: MTRR: simplify kvm_mtrr_get_guest_memory_type
  KVM: MTRR: introduce mtrr_for_each_mem_type
  KVM: MTRR: introduce fixed_mtrr_addr_* functions
  KVM: MTRR: sort variable MTRRs
  KVM: MTRR: introduce var_mtrr_range
  KVM: MTRR: introduce fixed_mtrr_segment table
  KVM: MTRR: improve kvm_mtrr_get_guest_memory_type
  KVM: MTRR: do not split 64 bits MSR content
  KVM: MTRR: clean up mtrr default type
  ...

1  2 
arch/s390/kvm/interrupt.c
arch/s390/kvm/kvm-s390.c
arch/x86/include/asm/kvm_host.h
arch/x86/kernel/kvm.c
arch/x86/kvm/cpuid.c
arch/x86/kvm/lapic.c
arch/x86/kvm/vmx.c
arch/x86/kvm/x86.c

@@@ -134,6 -134,8 +134,8 @@@ static unsigned long deliverable_irqs(s
  
        active_mask = pending_local_irqs(vcpu);
        active_mask |= pending_floating_irqs(vcpu);
+       if (!active_mask)
+               return 0;
  
        if (psw_extint_disabled(vcpu))
                active_mask &= ~IRQ_PEND_EXT_MASK;
@@@ -799,7 -801,7 +801,7 @@@ int kvm_s390_ext_call_pending(struct kv
        struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
        uint8_t sigp_ctrl = vcpu->kvm->arch.sca->cpu[vcpu->vcpu_id].sigp_ctrl;
  
 -      if (!sclp_has_sigpif())
 +      if (!sclp.has_sigpif)
                return test_bit(IRQ_PEND_EXT_EXTERNAL, &li->pending_irqs);
  
        return (sigp_ctrl & SIGP_CTRL_C) &&
@@@ -941,12 -943,9 +943,9 @@@ int __must_check kvm_s390_deliver_pendi
        if (cpu_timer_irq_pending(vcpu))
                set_bit(IRQ_PEND_EXT_CPU_TIMER, &li->pending_irqs);
  
-       do {
-               irqs = deliverable_irqs(vcpu);
+       while ((irqs = deliverable_irqs(vcpu)) && !rc) {
                /* bits are in the order of interrupt priority */
                irq_type = find_first_bit(&irqs, IRQ_PEND_COUNT);
-               if (irq_type == IRQ_PEND_COUNT)
-                       break;
                if (is_ioirq(irq_type)) {
                        rc = __deliver_io(vcpu, irq_type);
                } else {
                        }
                        rc = func(vcpu);
                }
-               if (rc)
-                       break;
-       } while (!rc);
+       }
  
        set_intercept_indicators(vcpu);
  
@@@ -1058,10 -1055,10 +1055,10 @@@ static int __inject_extcall(struct kvm_
            kvm_get_vcpu(vcpu->kvm, src_id) == NULL)
                return -EINVAL;
  
 -      if (sclp_has_sigpif())
 +      if (sclp.has_sigpif)
                return __inject_extcall_sigpif(vcpu, src_id);
  
-       if (!test_and_set_bit(IRQ_PEND_EXT_EXTERNAL, &li->pending_irqs))
+       if (test_and_set_bit(IRQ_PEND_EXT_EXTERNAL, &li->pending_irqs))
                return -EBUSY;
        *extcall = irq->u.extcall;
        atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags);
@@@ -1340,12 -1337,54 +1337,54 @@@ static int __inject_io(struct kvm *kvm
        return 0;
  }
  
- static int __inject_vm(struct kvm *kvm, struct kvm_s390_interrupt_info *inti)
+ /*
+  * Find a destination VCPU for a floating irq and kick it.
+  */
+ static void __floating_irq_kick(struct kvm *kvm, u64 type)
  {
+       struct kvm_s390_float_interrupt *fi = &kvm->arch.float_int;
        struct kvm_s390_local_interrupt *li;
+       struct kvm_vcpu *dst_vcpu;
+       int sigcpu, online_vcpus, nr_tries = 0;
+       online_vcpus = atomic_read(&kvm->online_vcpus);
+       if (!online_vcpus)
+               return;
+       /* find idle VCPUs first, then round robin */
+       sigcpu = find_first_bit(fi->idle_mask, online_vcpus);
+       if (sigcpu == online_vcpus) {
+               do {
+                       sigcpu = fi->next_rr_cpu;
+                       fi->next_rr_cpu = (fi->next_rr_cpu + 1) % online_vcpus;
+                       /* avoid endless loops if all vcpus are stopped */
+                       if (nr_tries++ >= online_vcpus)
+                               return;
+               } while (is_vcpu_stopped(kvm_get_vcpu(kvm, sigcpu)));
+       }
+       dst_vcpu = kvm_get_vcpu(kvm, sigcpu);
+       /* make the VCPU drop out of the SIE, or wake it up if sleeping */
+       li = &dst_vcpu->arch.local_int;
+       spin_lock(&li->lock);
+       switch (type) {
+       case KVM_S390_MCHK:
+               atomic_set_mask(CPUSTAT_STOP_INT, li->cpuflags);
+               break;
+       case KVM_S390_INT_IO_MIN...KVM_S390_INT_IO_MAX:
+               atomic_set_mask(CPUSTAT_IO_INT, li->cpuflags);
+               break;
+       default:
+               atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags);
+               break;
+       }
+       spin_unlock(&li->lock);
+       kvm_s390_vcpu_wakeup(dst_vcpu);
+ }
+ static int __inject_vm(struct kvm *kvm, struct kvm_s390_interrupt_info *inti)
+ {
        struct kvm_s390_float_interrupt *fi;
-       struct kvm_vcpu *dst_vcpu = NULL;
-       int sigcpu;
        u64 type = READ_ONCE(inti->type);
        int rc;
  
        if (rc)
                return rc;
  
-       sigcpu = find_first_bit(fi->idle_mask, KVM_MAX_VCPUS);
-       if (sigcpu == KVM_MAX_VCPUS) {
-               do {
-                       sigcpu = fi->next_rr_cpu++;
-                       if (sigcpu == KVM_MAX_VCPUS)
-                               sigcpu = fi->next_rr_cpu = 0;
-               } while (kvm_get_vcpu(kvm, sigcpu) == NULL);
-       }
-       dst_vcpu = kvm_get_vcpu(kvm, sigcpu);
-       li = &dst_vcpu->arch.local_int;
-       spin_lock(&li->lock);
-       switch (type) {
-       case KVM_S390_MCHK:
-               atomic_set_mask(CPUSTAT_STOP_INT, li->cpuflags);
-               break;
-       case KVM_S390_INT_IO_MIN...KVM_S390_INT_IO_MAX:
-               atomic_set_mask(CPUSTAT_IO_INT, li->cpuflags);
-               break;
-       default:
-               atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags);
-               break;
-       }
-       spin_unlock(&li->lock);
-       kvm_s390_vcpu_wakeup(kvm_get_vcpu(kvm, sigcpu));
+       __floating_irq_kick(kvm, type);
        return 0;
  }
  
  int kvm_s390_inject_vm(struct kvm *kvm,
@@@ -1606,6 -1621,9 +1621,9 @@@ void kvm_s390_clear_float_irqs(struct k
        int i;
  
        spin_lock(&fi->lock);
+       fi->pending_irqs = 0;
+       memset(&fi->srv_signal, 0, sizeof(fi->srv_signal));
+       memset(&fi->mchk, 0, sizeof(fi->mchk));
        for (i = 0; i < FIRQ_LIST_COUNT; i++)
                clear_irq_list(&fi->lists[i]);
        for (i = 0; i < FIRQ_MAX_COUNT; i++)
diff --combined arch/s390/kvm/kvm-s390.c
  #include "kvm-s390.h"
  #include "gaccess.h"
  
+ #define KMSG_COMPONENT "kvm-s390"
+ #undef pr_fmt
+ #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
  #define CREATE_TRACE_POINTS
  #include "trace.h"
  #include "trace-s390.h"
@@@ -110,7 -114,7 +114,7 @@@ struct kvm_stats_debugfs_item debugfs_e
  /* upper facilities limit for kvm */
  unsigned long kvm_s390_fac_list_mask[] = {
        0xffe6fffbfcfdfc40UL,
-       0x005c800000000000UL,
+       0x005e800000000000UL,
  };
  
  unsigned long kvm_s390_fac_list_mask_size(void)
@@@ -236,6 -240,7 +240,7 @@@ int kvm_vm_ioctl_get_dirty_log(struct k
  {
        int r;
        unsigned long n;
+       struct kvm_memslots *slots;
        struct kvm_memory_slot *memslot;
        int is_dirty = 0;
  
        if (log->slot >= KVM_USER_MEM_SLOTS)
                goto out;
  
-       memslot = id_to_memslot(kvm->memslots, log->slot);
+       slots = kvm_memslots(kvm);
+       memslot = id_to_memslot(slots, log->slot);
        r = -ENOENT;
        if (!memslot->dirty_bitmap)
                goto out;
@@@ -454,10 -460,10 +460,10 @@@ static int kvm_s390_set_tod_low(struct 
  
        mutex_lock(&kvm->lock);
        kvm->arch.epoch = gtod - host_tod;
-       kvm_for_each_vcpu(vcpu_idx, cur_vcpu, kvm) {
+       kvm_s390_vcpu_block_all(kvm);
+       kvm_for_each_vcpu(vcpu_idx, cur_vcpu, kvm)
                cur_vcpu->arch.sie_block->epoch = kvm->arch.epoch;
-               exit_sie(cur_vcpu);
-       }
+       kvm_s390_vcpu_unblock_all(kvm);
        mutex_unlock(&kvm->lock);
        return 0;
  }
@@@ -604,7 -610,7 +610,7 @@@ static int kvm_s390_get_machine(struct 
                goto out;
        }
        get_cpu_id((struct cpuid *) &mach->cpuid);
 -      mach->ibc = sclp_get_ibc();
 +      mach->ibc = sclp.ibc;
        memcpy(&mach->fac_mask, kvm->arch.model.fac->mask,
               S390_ARCH_FAC_LIST_SIZE_BYTE);
        memcpy((unsigned long *)&mach->fac_list, S390_lowcore.stfle_fac_list,
@@@ -1068,7 -1074,7 +1074,7 @@@ int kvm_arch_init_vm(struct kvm *kvm, u
               S390_ARCH_FAC_LIST_SIZE_BYTE);
  
        kvm_s390_get_cpu_id(&kvm->arch.model.cpu_id);
 -      kvm->arch.model.ibc = sclp_get_ibc() & 0x0fff;
 +      kvm->arch.model.ibc = sclp.ibc & 0x0fff;
  
        if (kvm_s390_crypto_init(kvm) < 0)
                goto out_err;
@@@ -1311,8 -1317,13 +1317,13 @@@ int kvm_arch_vcpu_setup(struct kvm_vcp
  
        atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH |
                                                    CPUSTAT_SM |
-                                                   CPUSTAT_STOPPED |
-                                                   CPUSTAT_GED);
+                                                   CPUSTAT_STOPPED);
+       if (test_kvm_facility(vcpu->kvm, 78))
+               atomic_set_mask(CPUSTAT_GED2, &vcpu->arch.sie_block->cpuflags);
+       else if (test_kvm_facility(vcpu->kvm, 8))
+               atomic_set_mask(CPUSTAT_GED, &vcpu->arch.sie_block->cpuflags);
        kvm_s390_vcpu_setup_model(vcpu);
  
        vcpu->arch.sie_block->ecb   = 6;
  
        vcpu->arch.sie_block->ecb2  = 8;
        vcpu->arch.sie_block->eca   = 0xC1002000U;
 -      if (sclp_has_siif())
 +      if (sclp.has_siif)
                vcpu->arch.sie_block->eca |= 1;
 -      if (sclp_has_sigpif())
 +      if (sclp.has_sigpif)
                vcpu->arch.sie_block->eca |= 0x10000000U;
        if (test_kvm_facility(vcpu->kvm, 129)) {
                vcpu->arch.sie_block->eca |= 0x00020000;
@@@ -1409,16 -1420,28 +1420,28 @@@ int kvm_arch_vcpu_runnable(struct kvm_v
        return kvm_s390_vcpu_has_irq(vcpu, 0);
  }
  
- void s390_vcpu_block(struct kvm_vcpu *vcpu)
+ void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu)
  {
        atomic_set_mask(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
+       exit_sie(vcpu);
  }
  
- void s390_vcpu_unblock(struct kvm_vcpu *vcpu)
+ void kvm_s390_vcpu_unblock(struct kvm_vcpu *vcpu)
  {
        atomic_clear_mask(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
  }
  
+ static void kvm_s390_vcpu_request(struct kvm_vcpu *vcpu)
+ {
+       atomic_set_mask(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
+       exit_sie(vcpu);
+ }
+ static void kvm_s390_vcpu_request_handled(struct kvm_vcpu *vcpu)
+ {
+       atomic_clear_mask(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
+ }
  /*
   * Kick a guest cpu out of SIE and wait until SIE is not running.
   * If the CPU is not running (e.g. waiting as idle) the function will
@@@ -1430,11 -1453,11 +1453,11 @@@ void exit_sie(struct kvm_vcpu *vcpu
                cpu_relax();
  }
  
- /* Kick a guest cpu out of SIE and prevent SIE-reentry */
- void exit_sie_sync(struct kvm_vcpu *vcpu)
+ /* Kick a guest cpu out of SIE to process a request synchronously */
+ void kvm_s390_sync_request(int req, struct kvm_vcpu *vcpu)
  {
-       s390_vcpu_block(vcpu);
-       exit_sie(vcpu);
+       kvm_make_request(req, vcpu);
+       kvm_s390_vcpu_request(vcpu);
  }
  
  static void kvm_gmap_notifier(struct gmap *gmap, unsigned long address)
                /* match against both prefix pages */
                if (kvm_s390_get_prefix(vcpu) == (address & ~0x1000UL)) {
                        VCPU_EVENT(vcpu, 2, "gmap notifier for %lx", address);
-                       kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu);
-                       exit_sie_sync(vcpu);
+                       kvm_s390_sync_request(KVM_REQ_MMU_RELOAD, vcpu);
                }
        }
  }
@@@ -1720,8 -1742,10 +1742,10 @@@ static bool ibs_enabled(struct kvm_vcp
  
  static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
  {
+       if (!vcpu->requests)
+               return 0;
  retry:
-       s390_vcpu_unblock(vcpu);
+       kvm_s390_vcpu_request_handled(vcpu);
        /*
         * We use MMU_RELOAD just to re-arm the ipte notifier for the
         * guest prefix page. gmap_ipte_notify will wait on the ptl lock.
@@@ -1993,12 -2017,14 +2017,14 @@@ static int __vcpu_run(struct kvm_vcpu *
                 * As PF_VCPU will be used in fault handler, between
                 * guest_enter and guest_exit should be no uaccess.
                 */
-               preempt_disable();
-               kvm_guest_enter();
-               preempt_enable();
+               local_irq_disable();
+               __kvm_guest_enter();
+               local_irq_enable();
                exit_reason = sie64a(vcpu->arch.sie_block,
                                     vcpu->run->s.regs.gprs);
-               kvm_guest_exit();
+               local_irq_disable();
+               __kvm_guest_exit();
+               local_irq_enable();
                vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
  
                rc = vcpu_post_run(vcpu, exit_reason);
@@@ -2068,7 -2094,7 +2094,7 @@@ int kvm_arch_vcpu_ioctl_run(struct kvm_
        if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) {
                kvm_s390_vcpu_start(vcpu);
        } else if (is_vcpu_stopped(vcpu)) {
-               pr_err_ratelimited("kvm-s390: can't run stopped vcpu %d\n",
+               pr_err_ratelimited("can't run stopped vcpu %d\n",
                                   vcpu->vcpu_id);
                return -EINVAL;
        }
@@@ -2206,8 -2232,7 +2232,7 @@@ int kvm_s390_vcpu_store_adtl_status(str
  static void __disable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
  {
        kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu);
-       kvm_make_request(KVM_REQ_DISABLE_IBS, vcpu);
-       exit_sie_sync(vcpu);
+       kvm_s390_sync_request(KVM_REQ_DISABLE_IBS, vcpu);
  }
  
  static void __disable_ibs_on_all_vcpus(struct kvm *kvm)
  static void __enable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
  {
        kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu);
-       kvm_make_request(KVM_REQ_ENABLE_IBS, vcpu);
-       exit_sie_sync(vcpu);
+       kvm_s390_sync_request(KVM_REQ_ENABLE_IBS, vcpu);
  }
  
  void kvm_s390_vcpu_start(struct kvm_vcpu *vcpu)
@@@ -2563,7 -2587,7 +2587,7 @@@ int kvm_arch_create_memslot(struct kvm 
  /* Section: memory related */
  int kvm_arch_prepare_memory_region(struct kvm *kvm,
                                   struct kvm_memory_slot *memslot,
-                                  struct kvm_userspace_memory_region *mem,
+                                  const struct kvm_userspace_memory_region *mem,
                                   enum kvm_mr_change change)
  {
        /* A few sanity checks. We can have memory slots which have to be
  }
  
  void kvm_arch_commit_memory_region(struct kvm *kvm,
-                               struct kvm_userspace_memory_region *mem,
+                               const struct kvm_userspace_memory_region *mem,
                                const struct kvm_memory_slot *old,
+                               const struct kvm_memory_slot *new,
                                enum kvm_mr_change change)
  {
        int rc;
        rc = gmap_map_segment(kvm->arch.gmap, mem->userspace_addr,
                mem->guest_phys_addr, mem->memory_size);
        if (rc)
-               printk(KERN_WARNING "kvm-s390: failed to commit memory region\n");
+               pr_warn("failed to commit memory region\n");
        return;
  }
  
@@@ -184,23 -184,12 +184,12 @@@ struct kvm_mmu_memory_cache 
        void *objects[KVM_NR_MEM_OBJS];
  };
  
- /*
-  * kvm_mmu_page_role, below, is defined as:
-  *
-  *   bits 0:3 - total guest paging levels (2-4, or zero for real mode)
-  *   bits 4:7 - page table level for this shadow (1-4)
-  *   bits 8:9 - page table quadrant for 2-level guests
-  *   bit   16 - direct mapping of virtual to physical mapping at gfn
-  *              used for real mode and two-dimensional paging
-  *   bits 17:19 - common access permissions for all ptes in this shadow page
-  */
  union kvm_mmu_page_role {
        unsigned word;
        struct {
                unsigned level:4;
                unsigned cr4_pae:1;
                unsigned quadrant:2;
-               unsigned pad_for_nice_hex_output:6;
                unsigned direct:1;
                unsigned access:3;
                unsigned invalid:1;
                unsigned cr0_wp:1;
                unsigned smep_andnot_wp:1;
                unsigned smap_andnot_wp:1;
+               unsigned :8;
+               /*
+                * This is left at the top of the word so that
+                * kvm_memslots_for_spte_role can extract it with a
+                * simple shift.  While there is room, give it a whole
+                * byte so it is also faster to load it from memory.
+                */
+               unsigned smm:8;
        };
  };
  
@@@ -338,12 -336,28 +336,28 @@@ struct kvm_pmu 
        u64 reprogram_pmi;
  };
  
+ struct kvm_pmu_ops;
  enum {
        KVM_DEBUGREG_BP_ENABLED = 1,
        KVM_DEBUGREG_WONT_EXIT = 2,
        KVM_DEBUGREG_RELOAD = 4,
  };
  
+ struct kvm_mtrr_range {
+       u64 base;
+       u64 mask;
+       struct list_head node;
+ };
+ struct kvm_mtrr {
+       struct kvm_mtrr_range var_ranges[KVM_NR_VAR_MTRR];
+       mtrr_type fixed_ranges[KVM_NR_FIXED_MTRR_REGION];
+       u64 deftype;
+       struct list_head head;
+ };
  struct kvm_vcpu_arch {
        /*
         * rip and regs accesses must go through
        int32_t apic_arb_prio;
        int mp_state;
        u64 ia32_misc_enable_msr;
+       u64 smbase;
        bool tpr_access_reporting;
        u64 ia32_xss;
  
        atomic_t nmi_queued;  /* unprocessed asynchronous NMIs */
        unsigned nmi_pending; /* NMI queued after currently running handler */
        bool nmi_injected;    /* Trying to inject an NMI this entry */
+       bool smi_pending;    /* SMI queued after currently running handler */
  
-       struct mtrr_state_type mtrr_state;
+       struct kvm_mtrr mtrr_state;
        u64 pat;
  
        unsigned switch_db_regs;
@@@ -637,6 -653,8 +653,8 @@@ struct kvm_arch 
        #endif
  
        bool boot_vcpu_runs_old_kvmclock;
+       u64 disabled_quirks;
  };
  
  struct kvm_vm_stat {
@@@ -689,12 -707,13 +707,13 @@@ struct msr_data 
  
  struct kvm_lapic_irq {
        u32 vector;
-       u32 delivery_mode;
-       u32 dest_mode;
-       u32 level;
-       u32 trig_mode;
+       u16 delivery_mode;
+       u16 dest_mode;
+       bool level;
+       u16 trig_mode;
        u32 shorthand;
        u32 dest_id;
+       bool msi_redir_hint;
  };
  
  struct kvm_x86_ops {
        int (*hardware_setup)(void);               /* __init */
        void (*hardware_unsetup)(void);            /* __exit */
        bool (*cpu_has_accelerated_tpr)(void);
+       bool (*cpu_has_high_real_mode_segbase)(void);
        void (*cpuid_update)(struct kvm_vcpu *vcpu);
  
        /* Create, but do not attach this VCPU */
        struct kvm_vcpu *(*vcpu_create)(struct kvm *kvm, unsigned id);
        void (*vcpu_free)(struct kvm_vcpu *vcpu);
-       void (*vcpu_reset)(struct kvm_vcpu *vcpu);
+       void (*vcpu_reset)(struct kvm_vcpu *vcpu, bool init_event);
  
        void (*prepare_guest_switch)(struct kvm_vcpu *vcpu);
        void (*vcpu_load)(struct kvm_vcpu *vcpu, int cpu);
        void (*vcpu_put)(struct kvm_vcpu *vcpu);
  
        void (*update_db_bp_intercept)(struct kvm_vcpu *vcpu);
-       int (*get_msr)(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata);
+       int (*get_msr)(struct kvm_vcpu *vcpu, struct msr_data *msr);
        int (*set_msr)(struct kvm_vcpu *vcpu, struct msr_data *msr);
        u64 (*get_segment_base)(struct kvm_vcpu *vcpu, int seg);
        void (*get_segment)(struct kvm_vcpu *vcpu,
        void (*enable_log_dirty_pt_masked)(struct kvm *kvm,
                                           struct kvm_memory_slot *slot,
                                           gfn_t offset, unsigned long mask);
+       /* pmu operations of sub-arch */
+       const struct kvm_pmu_ops *pmu_ops;
  };
  
  struct kvm_arch_async_pf {
@@@ -871,7 -893,7 +893,7 @@@ void kvm_mmu_reset_context(struct kvm_v
  void kvm_mmu_slot_remove_write_access(struct kvm *kvm,
                                      struct kvm_memory_slot *memslot);
  void kvm_mmu_zap_collapsible_sptes(struct kvm *kvm,
-                                       struct kvm_memory_slot *memslot);
+                                  const struct kvm_memory_slot *memslot);
  void kvm_mmu_slot_leaf_clear_dirty(struct kvm *kvm,
                                   struct kvm_memory_slot *memslot);
  void kvm_mmu_slot_largepage_remove_write_access(struct kvm *kvm,
@@@ -882,7 -904,7 +904,7 @@@ void kvm_mmu_clear_dirty_pt_masked(stru
                                   struct kvm_memory_slot *slot,
                                   gfn_t gfn_offset, unsigned long mask);
  void kvm_mmu_zap_all(struct kvm *kvm);
- void kvm_mmu_invalidate_mmio_sptes(struct kvm *kvm);
+ void kvm_mmu_invalidate_mmio_sptes(struct kvm *kvm, struct kvm_memslots *slots);
  unsigned int kvm_mmu_calculate_mmu_pages(struct kvm *kvm);
  void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned int kvm_nr_mmu_pages);
  
@@@ -890,7 -912,6 +912,6 @@@ int load_pdptrs(struct kvm_vcpu *vcpu, 
  
  int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa,
                          const void *val, int bytes);
- u8 kvm_get_guest_memory_type(struct kvm_vcpu *vcpu, gfn_t gfn);
  
  struct kvm_irq_mask_notifier {
        void (*func)(struct kvm_irq_mask_notifier *kimn, bool masked);
@@@ -938,7 -959,7 +959,7 @@@ static inline int emulate_instruction(s
  
  void kvm_enable_efer_bits(u64);
  bool kvm_valid_efer(struct kvm_vcpu *vcpu, u64 efer);
- int kvm_get_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *data);
+ int kvm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr);
  int kvm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr);
  
  struct x86_emulate_ctxt;
@@@ -967,7 -988,7 +988,7 @@@ void kvm_lmsw(struct kvm_vcpu *vcpu, un
  void kvm_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l);
  int kvm_set_xcr(struct kvm_vcpu *vcpu, u32 index, u64 xcr);
  
- int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata);
+ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr);
  int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr);
  
  unsigned long kvm_get_rflags(struct kvm_vcpu *vcpu);
@@@ -1002,6 -1023,8 +1023,6 @@@ void kvm_pic_clear_all(struct kvm_pic *
  
  void kvm_inject_nmi(struct kvm_vcpu *vcpu);
  
 -int fx_init(struct kvm_vcpu *vcpu, bool init_event);
 -
  void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
                       const u8 *new, int bytes);
  int kvm_mmu_unprotect_page(struct kvm *kvm, gfn_t gfn);
@@@ -1110,6 -1133,14 +1131,14 @@@ enum 
  #define HF_NMI_MASK           (1 << 3)
  #define HF_IRET_MASK          (1 << 4)
  #define HF_GUEST_MASK         (1 << 5) /* VCPU is in guest-mode */
+ #define HF_SMM_MASK           (1 << 6)
+ #define HF_SMM_INSIDE_NMI_MASK        (1 << 7)
+ #define __KVM_VCPU_MULTIPLE_ADDRESS_SPACE
+ #define KVM_ADDRESS_SPACE_NUM 2
+ #define kvm_arch_vcpu_memslots_id(vcpu) ((vcpu)->arch.hflags & HF_SMM_MASK ? 1 : 0)
+ #define kvm_memslots_for_spte_role(kvm, role) __kvm_memslots(kvm, (role).smm)
  
  /*
   * Hardware virtualization extension instructions may fault if a
@@@ -1144,7 -1175,7 +1173,7 @@@ int kvm_cpu_has_injectable_intr(struct 
  int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu);
  int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu);
  int kvm_cpu_get_interrupt(struct kvm_vcpu *v);
- void kvm_vcpu_reset(struct kvm_vcpu *vcpu);
+ void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event);
  void kvm_vcpu_reload_apic_access_page(struct kvm_vcpu *vcpu);
  void kvm_arch_mmu_notifier_invalidate_page(struct kvm *kvm,
                                           unsigned long address);
@@@ -1168,16 -1199,9 +1197,9 @@@ void kvm_complete_insn_gp(struct kvm_vc
  
  int kvm_is_in_guest(void);
  
- void kvm_pmu_init(struct kvm_vcpu *vcpu);
- void kvm_pmu_destroy(struct kvm_vcpu *vcpu);
- void kvm_pmu_reset(struct kvm_vcpu *vcpu);
- void kvm_pmu_cpuid_update(struct kvm_vcpu *vcpu);
- bool kvm_pmu_msr(struct kvm_vcpu *vcpu, u32 msr);
- int kvm_pmu_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *data);
- int kvm_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info);
- int kvm_pmu_check_pmc(struct kvm_vcpu *vcpu, unsigned pmc);
- int kvm_pmu_read_pmc(struct kvm_vcpu *vcpu, unsigned pmc, u64 *data);
- void kvm_handle_pmu_event(struct kvm_vcpu *vcpu);
- void kvm_deliver_pmi(struct kvm_vcpu *vcpu);
+ int __x86_set_memory_region(struct kvm *kvm,
+                           const struct kvm_userspace_memory_region *mem);
+ int x86_set_memory_region(struct kvm *kvm,
+                         const struct kvm_userspace_memory_region *mem);
  
  #endif /* _ASM_X86_KVM_HOST_H */
diff --combined arch/x86/kernel/kvm.c
@@@ -331,7 -331,7 +331,7 @@@ static void kvm_guest_apic_eoi_write(u3
        apic_write(APIC_EOI, APIC_EOI_ACK);
  }
  
- void kvm_guest_cpu_init(void)
static void kvm_guest_cpu_init(void)
  {
        if (!kvm_para_available())
                return;
@@@ -584,39 -584,6 +584,39 @@@ static void kvm_kick_cpu(int cpu
        kvm_hypercall2(KVM_HC_KICK_CPU, flags, apicid);
  }
  
 +
 +#ifdef CONFIG_QUEUED_SPINLOCKS
 +
 +#include <asm/qspinlock.h>
 +
 +static void kvm_wait(u8 *ptr, u8 val)
 +{
 +      unsigned long flags;
 +
 +      if (in_nmi())
 +              return;
 +
 +      local_irq_save(flags);
 +
 +      if (READ_ONCE(*ptr) != val)
 +              goto out;
 +
 +      /*
 +       * halt until it's our turn and kicked. Note that we do safe halt
 +       * for irq enabled case to avoid hang when lock info is overwritten
 +       * in irq spinlock slowpath and no spurious interrupt occur to save us.
 +       */
 +      if (arch_irqs_disabled_flags(flags))
 +              halt();
 +      else
 +              safe_halt();
 +
 +out:
 +      local_irq_restore(flags);
 +}
 +
 +#else /* !CONFIG_QUEUED_SPINLOCKS */
 +
  enum kvm_contention_stat {
        TAKEN_SLOW,
        TAKEN_SLOW_PICKUP,
@@@ -688,7 -655,7 +688,7 @@@ static inline void spin_time_accum_bloc
  static struct dentry *d_spin_debug;
  static struct dentry *d_kvm_debug;
  
- struct dentry *kvm_init_debugfs(void)
+ static struct dentry *kvm_init_debugfs(void)
  {
        d_kvm_debug = debugfs_create_dir("kvm-guest", NULL);
        if (!d_kvm_debug)
@@@ -850,8 -817,6 +850,8 @@@ static void kvm_unlock_kick(struct arch
        }
  }
  
 +#endif /* !CONFIG_QUEUED_SPINLOCKS */
 +
  /*
   * Setup pv_lock_ops to exploit KVM_FEATURE_PV_UNHALT if present.
   */
@@@ -863,16 -828,8 +863,16 @@@ void __init kvm_spinlock_init(void
        if (!kvm_para_has_feature(KVM_FEATURE_PV_UNHALT))
                return;
  
 +#ifdef CONFIG_QUEUED_SPINLOCKS
 +      __pv_init_lock_hash();
 +      pv_lock_ops.queued_spin_lock_slowpath = __pv_queued_spin_lock_slowpath;
 +      pv_lock_ops.queued_spin_unlock = PV_CALLEE_SAVE(__pv_queued_spin_unlock);
 +      pv_lock_ops.wait = kvm_wait;
 +      pv_lock_ops.kick = kvm_kick_cpu;
 +#else /* !CONFIG_QUEUED_SPINLOCKS */
        pv_lock_ops.lock_spinning = PV_CALLEE_SAVE(kvm_lock_spinning);
        pv_lock_ops.unlock_kick = kvm_unlock_kick;
 +#endif
  }
  
  static __init int kvm_spinlock_init_jump(void)
diff --combined arch/x86/kvm/cpuid.c
  #include <linux/module.h>
  #include <linux/vmalloc.h>
  #include <linux/uaccess.h>
 -#include <asm/i387.h> /* For use_eager_fpu.  Ugh! */
 -#include <asm/fpu-internal.h> /* For use_eager_fpu.  Ugh! */
++#include <asm/fpu/internal.h> /* For use_eager_fpu.  Ugh! */
  #include <asm/user.h>
 -#include <asm/xsave.h>
 +#include <asm/fpu/xstate.h>
  #include "cpuid.h"
  #include "lapic.h"
  #include "mmu.h"
  #include "trace.h"
+ #include "pmu.h"
  
  static u32 xstate_required_size(u64 xstate_bv, bool compacted)
  {
@@@ -95,7 -98,7 +97,7 @@@ int kvm_update_cpuid(struct kvm_vcpu *v
        if (best && (best->eax & (F(XSAVES) | F(XSAVEC))))
                best->ebx = xstate_required_size(vcpu->arch.xcr0, true);
  
-       vcpu->arch.eager_fpu = guest_cpuid_has_mpx(vcpu);
+       vcpu->arch.eager_fpu = use_eager_fpu() || guest_cpuid_has_mpx(vcpu);
  
        /*
         * The existing code assumes virtual address is 48-bit in the canonical
        /* Update physical-address width */
        vcpu->arch.maxphyaddr = cpuid_query_maxphyaddr(vcpu);
  
-       kvm_pmu_cpuid_update(vcpu);
+       kvm_pmu_refresh(vcpu);
        return 0;
  }
  
@@@ -413,6 -416,12 +415,12 @@@ static inline int __do_cpuid_ent(struc
                }
                break;
        }
+       case 6: /* Thermal management */
+               entry->eax = 0x4; /* allow ARAT */
+               entry->ebx = 0;
+               entry->ecx = 0;
+               entry->edx = 0;
+               break;
        case 7: {
                entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
                /* Mask ebx against host capability word 9 */
                break;
        case 3: /* Processor serial number */
        case 5: /* MONITOR/MWAIT */
-       case 6: /* Thermal management */
        case 0xC0000002:
        case 0xC0000003:
        case 0xC0000004:
diff --combined arch/x86/kvm/lapic.c
@@@ -240,6 -240,15 +240,15 @@@ static inline void kvm_apic_set_ldr(str
        recalculate_apic_map(apic->vcpu->kvm);
  }
  
+ static inline void kvm_apic_set_x2apic_id(struct kvm_lapic *apic, u8 id)
+ {
+       u32 ldr = ((id >> 4) << 16) | (1 << (id & 0xf));
+       apic_set_reg(apic, APIC_ID, id << 24);
+       apic_set_reg(apic, APIC_LDR, ldr);
+       recalculate_apic_map(apic->vcpu->kvm);
+ }
  static inline int apic_lvt_enabled(struct kvm_lapic *apic, int lvt_type)
  {
        return !(kvm_apic_get_reg(apic, lvt_type) & APIC_LVT_MASKED);
@@@ -728,7 -737,7 +737,7 @@@ bool kvm_irq_delivery_to_apic_fast(stru
  
                dst = map->logical_map[cid];
  
-               if (irq->delivery_mode == APIC_DM_LOWEST) {
+               if (kvm_lowest_prio_delivery(irq)) {
                        int l = -1;
                        for_each_set_bit(i, &bitmap, 16) {
                                if (!dst[i])
@@@ -799,7 -808,9 +808,9 @@@ static int __apic_accept_irq(struct kvm
                break;
  
        case APIC_DM_SMI:
-               apic_debug("Ignoring guest SMI\n");
+               result = 1;
+               kvm_make_request(KVM_REQ_SMI, vcpu);
+               kvm_vcpu_kick(vcpu);
                break;
  
        case APIC_DM_NMI:
@@@ -914,9 -925,10 +925,10 @@@ static void apic_send_ipi(struct kvm_la
        irq.vector = icr_low & APIC_VECTOR_MASK;
        irq.delivery_mode = icr_low & APIC_MODE_MASK;
        irq.dest_mode = icr_low & APIC_DEST_MASK;
-       irq.level = icr_low & APIC_INT_ASSERT;
+       irq.level = (icr_low & APIC_INT_ASSERT) != 0;
        irq.trig_mode = icr_low & APIC_INT_LEVELTRIG;
        irq.shorthand = icr_low & APIC_SHORT_MASK;
+       irq.msi_redir_hint = false;
        if (apic_x2apic_mode(apic))
                irq.dest_id = icr_high;
        else
  
        apic_debug("icr_high 0x%x, icr_low 0x%x, "
                   "short_hand 0x%x, dest 0x%x, trig_mode 0x%x, level 0x%x, "
-                  "dest_mode 0x%x, delivery_mode 0x%x, vector 0x%x\n",
+                  "dest_mode 0x%x, delivery_mode 0x%x, vector 0x%x, "
+                  "msi_redir_hint 0x%x\n",
                   icr_high, icr_low, irq.shorthand, irq.dest_id,
                   irq.trig_mode, irq.level, irq.dest_mode, irq.delivery_mode,
-                  irq.vector);
+                  irq.vector, irq.msi_redir_hint);
  
        kvm_irq_delivery_to_apic(apic->vcpu->kvm, apic, &irq, NULL);
  }
@@@ -1090,17 -1103,6 +1103,17 @@@ static void update_divide_count(struct 
                                   apic->divide_count);
  }
  
 +static void apic_update_lvtt(struct kvm_lapic *apic)
 +{
 +      u32 timer_mode = kvm_apic_get_reg(apic, APIC_LVTT) &
 +                      apic->lapic_timer.timer_mode_mask;
 +
 +      if (apic->lapic_timer.timer_mode != timer_mode) {
 +              apic->lapic_timer.timer_mode = timer_mode;
 +              hrtimer_cancel(&apic->lapic_timer.timer);
 +      }
 +}
 +
  static void apic_timer_expired(struct kvm_lapic *apic)
  {
        struct kvm_vcpu *vcpu = apic->vcpu;
@@@ -1309,7 -1311,6 +1322,7 @@@ static int apic_reg_write(struct kvm_la
                                apic_set_reg(apic, APIC_LVTT + 0x10 * i,
                                             lvt_val | APIC_LVT_MASKED);
                        }
 +                      apic_update_lvtt(apic);
                        atomic_set(&apic->lapic_timer.pending, 0);
  
                }
  
                break;
  
 -      case APIC_LVTT: {
 -              u32 timer_mode = val & apic->lapic_timer.timer_mode_mask;
 -
 -              if (apic->lapic_timer.timer_mode != timer_mode) {
 -                      apic->lapic_timer.timer_mode = timer_mode;
 -                      hrtimer_cancel(&apic->lapic_timer.timer);
 -              }
 -
 +      case APIC_LVTT:
                if (!kvm_apic_sw_enabled(apic))
                        val |= APIC_LVT_MASKED;
                val &= (apic_lvt_mask[0] | apic->lapic_timer.timer_mode_mask);
                apic_set_reg(apic, APIC_LVTT, val);
 +              apic_update_lvtt(apic);
                break;
 -      }
  
        case APIC_TMICT:
                if (apic_lvtt_tscdeadline(apic))
@@@ -1541,9 -1549,7 +1554,7 @@@ void kvm_lapic_set_base(struct kvm_vcp
  
        if ((old_value ^ value) & X2APIC_ENABLE) {
                if (value & X2APIC_ENABLE) {
-                       u32 id = kvm_apic_id(apic);
-                       u32 ldr = ((id >> 4) << 16) | (1 << (id & 0xf));
-                       kvm_apic_set_ldr(apic, ldr);
+                       kvm_apic_set_x2apic_id(apic, vcpu->vcpu_id);
                        kvm_x86_ops->set_virtual_x2apic_mode(vcpu, true);
                } else
                        kvm_x86_ops->set_virtual_x2apic_mode(vcpu, false);
  
  }
  
- void kvm_lapic_reset(struct kvm_vcpu *vcpu)
+ void kvm_lapic_reset(struct kvm_vcpu *vcpu, bool init_event)
  {
        struct kvm_lapic *apic;
        int i;
        /* Stop the timer in case it's a reset to an active apic */
        hrtimer_cancel(&apic->lapic_timer.timer);
  
-       kvm_apic_set_id(apic, vcpu->vcpu_id);
+       if (!init_event)
+               kvm_apic_set_id(apic, vcpu->vcpu_id);
        kvm_apic_set_version(apic->vcpu);
  
        for (i = 0; i < APIC_LVT_NUM; i++)
                apic_set_reg(apic, APIC_LVTT + 0x10 * i, APIC_LVT_MASKED);
 -      apic->lapic_timer.timer_mode = 0;
 +      apic_update_lvtt(apic);
-       apic_set_reg(apic, APIC_LVT0,
-                    SET_APIC_DELIVERY_MODE(0, APIC_MODE_EXTINT));
+       if (!(vcpu->kvm->arch.disabled_quirks & KVM_QUIRK_LINT0_REENABLED))
+               apic_set_reg(apic, APIC_LVT0,
+                            SET_APIC_DELIVERY_MODE(0, APIC_MODE_EXTINT));
  
        apic_set_reg(apic, APIC_DFR, 0xffffffffU);
        apic_set_spiv(apic, 0xff);
        apic_set_reg(apic, APIC_TASKPRI, 0);
-       kvm_apic_set_ldr(apic, 0);
+       if (!apic_x2apic_mode(apic))
+               kvm_apic_set_ldr(apic, 0);
        apic_set_reg(apic, APIC_ESR, 0);
        apic_set_reg(apic, APIC_ICR, 0);
        apic_set_reg(apic, APIC_ICR2, 0);
@@@ -1717,7 -1726,7 +1731,7 @@@ int kvm_create_lapic(struct kvm_vcpu *v
                        APIC_DEFAULT_PHYS_BASE | MSR_IA32_APICBASE_ENABLE);
  
        static_key_slow_inc(&apic_sw_disabled.key); /* sw disabled at reset */
-       kvm_lapic_reset(vcpu);
+       kvm_lapic_reset(vcpu, false);
        kvm_iodevice_init(&apic->dev, &apic_mmio_ops);
  
        return 0;
@@@ -1807,7 -1816,6 +1821,7 @@@ void kvm_apic_post_state_restore(struc
  
        apic_update_ppr(apic);
        hrtimer_cancel(&apic->lapic_timer.timer);
 +      apic_update_lvtt(apic);
        update_divide_count(apic);
        start_apic_timer(apic);
        apic->irr_pending = true;
@@@ -2049,11 -2057,22 +2063,22 @@@ void kvm_apic_accept_events(struct kvm_
        if (!kvm_vcpu_has_lapic(vcpu) || !apic->pending_events)
                return;
  
-       pe = xchg(&apic->pending_events, 0);
+       /*
+        * INITs are latched while in SMM.  Because an SMM CPU cannot
+        * be in KVM_MP_STATE_INIT_RECEIVED state, just eat SIPIs
+        * and delay processing of INIT until the next RSM.
+        */
+       if (is_smm(vcpu)) {
+               WARN_ON_ONCE(vcpu->arch.mp_state == KVM_MP_STATE_INIT_RECEIVED);
+               if (test_bit(KVM_APIC_SIPI, &apic->pending_events))
+                       clear_bit(KVM_APIC_SIPI, &apic->pending_events);
+               return;
+       }
  
+       pe = xchg(&apic->pending_events, 0);
        if (test_bit(KVM_APIC_INIT, &pe)) {
-               kvm_lapic_reset(vcpu);
-               kvm_vcpu_reset(vcpu);
+               kvm_lapic_reset(vcpu, true);
+               kvm_vcpu_reset(vcpu, true);
                if (kvm_vcpu_is_bsp(apic->vcpu))
                        vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
                else
diff --combined arch/x86/kvm/vmx.c
  #include <asm/vmx.h>
  #include <asm/virtext.h>
  #include <asm/mce.h>
 -#include <asm/i387.h>
 -#include <asm/xcr.h>
 +#include <asm/fpu/internal.h>
  #include <asm/perf_event.h>
  #include <asm/debugreg.h>
  #include <asm/kexec.h>
  #include <asm/apic.h>
  
  #include "trace.h"
+ #include "pmu.h"
  
  #define __ex(x) __kvm_handle_fault_on_reboot(x)
  #define __ex_clear(x, reg) \
@@@ -785,7 -787,7 +786,7 @@@ static inline struct vmcs12 *get_vmcs12
  
  static struct page *nested_get_page(struct kvm_vcpu *vcpu, gpa_t addr)
  {
-       struct page *page = gfn_to_page(vcpu->kvm, addr >> PAGE_SHIFT);
+       struct page *page = kvm_vcpu_gfn_to_page(vcpu, addr >> PAGE_SHIFT);
        if (is_error_page(page))
                return NULL;
  
@@@ -1882,7 -1884,7 +1883,7 @@@ static void __vmx_load_host_state(struc
         * If the FPU is not active (through the host task or
         * the guest vcpu), then restore the cr0.TS bit.
         */
 -      if (!user_has_fpu() && !vmx->vcpu.guest_fpu_loaded)
 +      if (!fpregs_active() && !vmx->vcpu.guest_fpu_loaded)
                stts();
        load_gdt(this_cpu_ptr(&host_gdt));
  }
@@@ -2169,8 -2171,7 +2170,7 @@@ static void vmx_set_msr_bitmap(struct k
  
        if (is_guest_mode(vcpu))
                msr_bitmap = vmx_msr_bitmap_nested;
-       else if (irqchip_in_kernel(vcpu->kvm) &&
-               apic_x2apic_mode(vcpu->arch.apic)) {
+       else if (vcpu->arch.apic_base & X2APIC_ENABLE) {
                if (is_long_mode(vcpu))
                        msr_bitmap = vmx_msr_bitmap_longmode_x2apic;
                else
@@@ -2622,76 -2623,69 +2622,69 @@@ static int vmx_get_vmx_msr(struct kvm_v
   * Returns 0 on success, non-0 otherwise.
   * Assumes vcpu_load() was already called.
   */
- static int vmx_get_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata)
+ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
  {
-       u64 data;
        struct shared_msr_entry *msr;
  
-       if (!pdata) {
-               printk(KERN_ERR "BUG: get_msr called with NULL pdata\n");
-               return -EINVAL;
-       }
-       switch (msr_index) {
+       switch (msr_info->index) {
  #ifdef CONFIG_X86_64
        case MSR_FS_BASE:
-               data = vmcs_readl(GUEST_FS_BASE);
+               msr_info->data = vmcs_readl(GUEST_FS_BASE);
                break;
        case MSR_GS_BASE:
-               data = vmcs_readl(GUEST_GS_BASE);
+               msr_info->data = vmcs_readl(GUEST_GS_BASE);
                break;
        case MSR_KERNEL_GS_BASE:
                vmx_load_host_state(to_vmx(vcpu));
-               data = to_vmx(vcpu)->msr_guest_kernel_gs_base;
+               msr_info->data = to_vmx(vcpu)->msr_guest_kernel_gs_base;
                break;
  #endif
        case MSR_EFER:
-               return kvm_get_msr_common(vcpu, msr_index, pdata);
+               return kvm_get_msr_common(vcpu, msr_info);
        case MSR_IA32_TSC:
-               data = guest_read_tsc();
+               msr_info->data = guest_read_tsc();
                break;
        case MSR_IA32_SYSENTER_CS:
-               data = vmcs_read32(GUEST_SYSENTER_CS);
+               msr_info->data = vmcs_read32(GUEST_SYSENTER_CS);
                break;
        case MSR_IA32_SYSENTER_EIP:
-               data = vmcs_readl(GUEST_SYSENTER_EIP);
+               msr_info->data = vmcs_readl(GUEST_SYSENTER_EIP);
                break;
        case MSR_IA32_SYSENTER_ESP:
-               data = vmcs_readl(GUEST_SYSENTER_ESP);
+               msr_info->data = vmcs_readl(GUEST_SYSENTER_ESP);
                break;
        case MSR_IA32_BNDCFGS:
                if (!vmx_mpx_supported())
                        return 1;
-               data = vmcs_read64(GUEST_BNDCFGS);
+               msr_info->data = vmcs_read64(GUEST_BNDCFGS);
                break;
        case MSR_IA32_FEATURE_CONTROL:
                if (!nested_vmx_allowed(vcpu))
                        return 1;
-               data = to_vmx(vcpu)->nested.msr_ia32_feature_control;
+               msr_info->data = to_vmx(vcpu)->nested.msr_ia32_feature_control;
                break;
        case MSR_IA32_VMX_BASIC ... MSR_IA32_VMX_VMFUNC:
                if (!nested_vmx_allowed(vcpu))
                        return 1;
-               return vmx_get_vmx_msr(vcpu, msr_index, pdata);
+               return vmx_get_vmx_msr(vcpu, msr_info->index, &msr_info->data);
        case MSR_IA32_XSS:
                if (!vmx_xsaves_supported())
                        return 1;
-               data = vcpu->arch.ia32_xss;
+               msr_info->data = vcpu->arch.ia32_xss;
                break;
        case MSR_TSC_AUX:
                if (!to_vmx(vcpu)->rdtscp_enabled)
                        return 1;
                /* Otherwise falls through */
        default:
-               msr = find_msr_entry(to_vmx(vcpu), msr_index);
+               msr = find_msr_entry(to_vmx(vcpu), msr_info->index);
                if (msr) {
-                       data = msr->data;
+                       msr_info->data = msr->data;
                        break;
                }
-               return kvm_get_msr_common(vcpu, msr_index, pdata);
+               return kvm_get_msr_common(vcpu, msr_info);
        }
  
-       *pdata = data;
        return 0;
  }
  
@@@ -4122,7 -4116,7 +4115,7 @@@ static int alloc_apic_access_page(struc
        kvm_userspace_mem.flags = 0;
        kvm_userspace_mem.guest_phys_addr = APIC_DEFAULT_PHYS_BASE;
        kvm_userspace_mem.memory_size = PAGE_SIZE;
-       r = __kvm_set_memory_region(kvm, &kvm_userspace_mem);
+       r = __x86_set_memory_region(kvm, &kvm_userspace_mem);
        if (r)
                goto out;
  
@@@ -4157,7 -4151,7 +4150,7 @@@ static int alloc_identity_pagetable(str
        kvm_userspace_mem.guest_phys_addr =
                kvm->arch.ept_identity_map_addr;
        kvm_userspace_mem.memory_size = PAGE_SIZE;
-       r = __kvm_set_memory_region(kvm, &kvm_userspace_mem);
+       r = __x86_set_memory_region(kvm, &kvm_userspace_mem);
  
        return r;
  }
@@@ -4666,16 -4660,8 +4659,8 @@@ static int vmx_vcpu_setup(struct vcpu_v
        vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, 0);
        vmcs_write64(VM_ENTRY_MSR_LOAD_ADDR, __pa(vmx->msr_autoload.guest));
  
-       if (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_PAT) {
-               u32 msr_low, msr_high;
-               u64 host_pat;
-               rdmsr(MSR_IA32_CR_PAT, msr_low, msr_high);
-               host_pat = msr_low | ((u64) msr_high << 32);
-               /* Write the default value follow host pat */
-               vmcs_write64(GUEST_IA32_PAT, host_pat);
-               /* Keep arch.pat sync with GUEST_IA32_PAT */
-               vmx->vcpu.arch.pat = host_pat;
-       }
+       if (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_PAT)
+               vmcs_write64(GUEST_IA32_PAT, vmx->vcpu.arch.pat);
  
        for (i = 0; i < ARRAY_SIZE(vmx_msr_index); ++i) {
                u32 index = vmx_msr_index[i];
        return 0;
  }
  
- static void vmx_vcpu_reset(struct kvm_vcpu *vcpu)
+ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
  {
        struct vcpu_vmx *vmx = to_vmx(vcpu);
        struct msr_data apic_base_msr;
+       u64 cr0;
  
        vmx->rmode.vm86_active = 0;
  
        vmx->soft_vnmi_blocked = 0;
  
        vmx->vcpu.arch.regs[VCPU_REGS_RDX] = get_rdx_init_val();
-       kvm_set_cr8(&vmx->vcpu, 0);
-       apic_base_msr.data = APIC_DEFAULT_PHYS_BASE | MSR_IA32_APICBASE_ENABLE;
-       if (kvm_vcpu_is_reset_bsp(&vmx->vcpu))
-               apic_base_msr.data |= MSR_IA32_APICBASE_BSP;
-       apic_base_msr.host_initiated = true;
-       kvm_set_apic_base(&vmx->vcpu, &apic_base_msr);
+       kvm_set_cr8(vcpu, 0);
+       if (!init_event) {
+               apic_base_msr.data = APIC_DEFAULT_PHYS_BASE |
+                                    MSR_IA32_APICBASE_ENABLE;
+               if (kvm_vcpu_is_reset_bsp(vcpu))
+                       apic_base_msr.data |= MSR_IA32_APICBASE_BSP;
+               apic_base_msr.host_initiated = true;
+               kvm_set_apic_base(vcpu, &apic_base_msr);
+       }
  
        vmx_segment_cache_clear(vmx);
  
        vmcs_write32(GUEST_LDTR_LIMIT, 0xffff);
        vmcs_write32(GUEST_LDTR_AR_BYTES, 0x00082);
  
-       vmcs_write32(GUEST_SYSENTER_CS, 0);
-       vmcs_writel(GUEST_SYSENTER_ESP, 0);
-       vmcs_writel(GUEST_SYSENTER_EIP, 0);
+       if (!init_event) {
+               vmcs_write32(GUEST_SYSENTER_CS, 0);
+               vmcs_writel(GUEST_SYSENTER_ESP, 0);
+               vmcs_writel(GUEST_SYSENTER_EIP, 0);
+               vmcs_write64(GUEST_IA32_DEBUGCTL, 0);
+       }
  
        vmcs_writel(GUEST_RFLAGS, 0x02);
        kvm_rip_write(vcpu, 0xfff0);
        vmcs_write32(GUEST_INTERRUPTIBILITY_INFO, 0);
        vmcs_write32(GUEST_PENDING_DBG_EXCEPTIONS, 0);
  
-       /* Special registers */
-       vmcs_write64(GUEST_IA32_DEBUGCTL, 0);
        setup_msrs(vmx);
  
        vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, 0);  /* 22.2.1 */
  
-       if (cpu_has_vmx_tpr_shadow()) {
+       if (cpu_has_vmx_tpr_shadow() && !init_event) {
                vmcs_write64(VIRTUAL_APIC_PAGE_ADDR, 0);
-               if (vm_need_tpr_shadow(vmx->vcpu.kvm))
+               if (vm_need_tpr_shadow(vcpu->kvm))
                        vmcs_write64(VIRTUAL_APIC_PAGE_ADDR,
-                                    __pa(vmx->vcpu.arch.apic->regs));
+                                    __pa(vcpu->arch.apic->regs));
                vmcs_write32(TPR_THRESHOLD, 0);
        }
  
        if (vmx->vpid != 0)
                vmcs_write16(VIRTUAL_PROCESSOR_ID, vmx->vpid);
  
-       vmx->vcpu.arch.cr0 = X86_CR0_NW | X86_CR0_CD | X86_CR0_ET;
-       vmx_set_cr0(&vmx->vcpu, kvm_read_cr0(vcpu)); /* enter rmode */
-       vmx_set_cr4(&vmx->vcpu, 0);
-       vmx_set_efer(&vmx->vcpu, 0);
-       vmx_fpu_activate(&vmx->vcpu);
-       update_exception_bitmap(&vmx->vcpu);
+       cr0 = X86_CR0_NW | X86_CR0_CD | X86_CR0_ET;
+       vmx_set_cr0(vcpu, cr0); /* enter rmode */
+       vmx->vcpu.arch.cr0 = cr0;
+       vmx_set_cr4(vcpu, 0);
+       if (!init_event)
+               vmx_set_efer(vcpu, 0);
+       vmx_fpu_activate(vcpu);
+       update_exception_bitmap(vcpu);
  
        vpid_sync_context(vmx);
  }
@@@ -4964,7 -4957,7 +4956,7 @@@ static int vmx_set_tss_addr(struct kvm 
                .flags = 0,
        };
  
-       ret = kvm_set_memory_region(kvm, &tss_mem);
+       ret = x86_set_memory_region(kvm, &tss_mem);
        if (ret)
                return ret;
        kvm->arch.tss_addr = addr;
@@@ -5474,19 -5467,21 +5466,21 @@@ static int handle_cpuid(struct kvm_vcp
  static int handle_rdmsr(struct kvm_vcpu *vcpu)
  {
        u32 ecx = vcpu->arch.regs[VCPU_REGS_RCX];
-       u64 data;
+       struct msr_data msr_info;
  
-       if (vmx_get_msr(vcpu, ecx, &data)) {
+       msr_info.index = ecx;
+       msr_info.host_initiated = false;
+       if (vmx_get_msr(vcpu, &msr_info)) {
                trace_kvm_msr_read_ex(ecx);
                kvm_inject_gp(vcpu, 0);
                return 1;
        }
  
-       trace_kvm_msr_read(ecx, data);
+       trace_kvm_msr_read(ecx, msr_info.data);
  
        /* FIXME: handling of bits 32:63 of rax, rdx */
-       vcpu->arch.regs[VCPU_REGS_RAX] = data & -1u;
-       vcpu->arch.regs[VCPU_REGS_RDX] = (data >> 32) & -1u;
+       vcpu->arch.regs[VCPU_REGS_RAX] = msr_info.data & -1u;
+       vcpu->arch.regs[VCPU_REGS_RDX] = (msr_info.data >> 32) & -1u;
        skip_emulated_instruction(vcpu);
        return 1;
  }
@@@ -5709,9 -5704,6 +5703,6 @@@ static int handle_task_switch(struct kv
                return 0;
        }
  
-       /* clear all local breakpoint enable flags */
-       vmcs_writel(GUEST_DR7, vmcs_readl(GUEST_DR7) & ~0x155);
        /*
         * TODO: What about debug traps on tss switch?
         *       Are we supposed to inject them and update dr6?
@@@ -7332,7 -7324,7 +7323,7 @@@ static bool nested_vmx_exit_handled_io(
                bitmap += (port & 0x7fff) / 8;
  
                if (last_bitmap != bitmap)
-                       if (kvm_read_guest(vcpu->kvm, bitmap, &b, 1))
+                       if (kvm_vcpu_read_guest(vcpu, bitmap, &b, 1))
                                return true;
                if (b & (1 << (port & 7)))
                        return true;
@@@ -7376,7 -7368,7 +7367,7 @@@ static bool nested_vmx_exit_handled_msr
        /* Then read the msr_index'th bit from this bitmap: */
        if (msr_index < 1024*8) {
                unsigned char b;
-               if (kvm_read_guest(vcpu->kvm, bitmap + msr_index/8, &b, 1))
+               if (kvm_vcpu_read_guest(vcpu, bitmap + msr_index/8, &b, 1))
                        return true;
                return 1 & (b >> (msr_index & 7));
        } else
@@@ -7641,9 -7633,9 +7632,9 @@@ static void vmx_disable_pml(struct vcpu
        vmcs_write32(SECONDARY_VM_EXEC_CONTROL, exec_control);
  }
  
- static void vmx_flush_pml_buffer(struct vcpu_vmx *vmx)
+ static void vmx_flush_pml_buffer(struct kvm_vcpu *vcpu)
  {
-       struct kvm *kvm = vmx->vcpu.kvm;
+       struct vcpu_vmx *vmx = to_vmx(vcpu);
        u64 *pml_buf;
        u16 pml_idx;
  
  
                gpa = pml_buf[pml_idx];
                WARN_ON(gpa & (PAGE_SIZE - 1));
-               mark_page_dirty(kvm, gpa >> PAGE_SHIFT);
+               kvm_vcpu_mark_page_dirty(vcpu, gpa >> PAGE_SHIFT);
        }
  
        /* reset PML index */
@@@ -7690,6 -7682,158 +7681,158 @@@ static void kvm_flush_pml_buffers(struc
                kvm_vcpu_kick(vcpu);
  }
  
+ static void vmx_dump_sel(char *name, uint32_t sel)
+ {
+       pr_err("%s sel=0x%04x, attr=0x%05x, limit=0x%08x, base=0x%016lx\n",
+              name, vmcs_read32(sel),
+              vmcs_read32(sel + GUEST_ES_AR_BYTES - GUEST_ES_SELECTOR),
+              vmcs_read32(sel + GUEST_ES_LIMIT - GUEST_ES_SELECTOR),
+              vmcs_readl(sel + GUEST_ES_BASE - GUEST_ES_SELECTOR));
+ }
+ static void vmx_dump_dtsel(char *name, uint32_t limit)
+ {
+       pr_err("%s                           limit=0x%08x, base=0x%016lx\n",
+              name, vmcs_read32(limit),
+              vmcs_readl(limit + GUEST_GDTR_BASE - GUEST_GDTR_LIMIT));
+ }
+ static void dump_vmcs(void)
+ {
+       u32 vmentry_ctl = vmcs_read32(VM_ENTRY_CONTROLS);
+       u32 vmexit_ctl = vmcs_read32(VM_EXIT_CONTROLS);
+       u32 cpu_based_exec_ctrl = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL);
+       u32 pin_based_exec_ctrl = vmcs_read32(PIN_BASED_VM_EXEC_CONTROL);
+       u32 secondary_exec_control = 0;
+       unsigned long cr4 = vmcs_readl(GUEST_CR4);
+       u64 efer = vmcs_readl(GUEST_IA32_EFER);
+       int i, n;
+       if (cpu_has_secondary_exec_ctrls())
+               secondary_exec_control = vmcs_read32(SECONDARY_VM_EXEC_CONTROL);
+       pr_err("*** Guest State ***\n");
+       pr_err("CR0: actual=0x%016lx, shadow=0x%016lx, gh_mask=%016lx\n",
+              vmcs_readl(GUEST_CR0), vmcs_readl(CR0_READ_SHADOW),
+              vmcs_readl(CR0_GUEST_HOST_MASK));
+       pr_err("CR4: actual=0x%016lx, shadow=0x%016lx, gh_mask=%016lx\n",
+              cr4, vmcs_readl(CR4_READ_SHADOW), vmcs_readl(CR4_GUEST_HOST_MASK));
+       pr_err("CR3 = 0x%016lx\n", vmcs_readl(GUEST_CR3));
+       if ((secondary_exec_control & SECONDARY_EXEC_ENABLE_EPT) &&
+           (cr4 & X86_CR4_PAE) && !(efer & EFER_LMA))
+       {
+               pr_err("PDPTR0 = 0x%016lx  PDPTR1 = 0x%016lx\n",
+                      vmcs_readl(GUEST_PDPTR0), vmcs_readl(GUEST_PDPTR1));
+               pr_err("PDPTR2 = 0x%016lx  PDPTR3 = 0x%016lx\n",
+                      vmcs_readl(GUEST_PDPTR2), vmcs_readl(GUEST_PDPTR3));
+       }
+       pr_err("RSP = 0x%016lx  RIP = 0x%016lx\n",
+              vmcs_readl(GUEST_RSP), vmcs_readl(GUEST_RIP));
+       pr_err("RFLAGS=0x%08lx         DR7 = 0x%016lx\n",
+              vmcs_readl(GUEST_RFLAGS), vmcs_readl(GUEST_DR7));
+       pr_err("Sysenter RSP=%016lx CS:RIP=%04x:%016lx\n",
+              vmcs_readl(GUEST_SYSENTER_ESP),
+              vmcs_read32(GUEST_SYSENTER_CS), vmcs_readl(GUEST_SYSENTER_EIP));
+       vmx_dump_sel("CS:  ", GUEST_CS_SELECTOR);
+       vmx_dump_sel("DS:  ", GUEST_DS_SELECTOR);
+       vmx_dump_sel("SS:  ", GUEST_SS_SELECTOR);
+       vmx_dump_sel("ES:  ", GUEST_ES_SELECTOR);
+       vmx_dump_sel("FS:  ", GUEST_FS_SELECTOR);
+       vmx_dump_sel("GS:  ", GUEST_GS_SELECTOR);
+       vmx_dump_dtsel("GDTR:", GUEST_GDTR_LIMIT);
+       vmx_dump_sel("LDTR:", GUEST_LDTR_SELECTOR);
+       vmx_dump_dtsel("IDTR:", GUEST_IDTR_LIMIT);
+       vmx_dump_sel("TR:  ", GUEST_TR_SELECTOR);
+       if ((vmexit_ctl & (VM_EXIT_SAVE_IA32_PAT | VM_EXIT_SAVE_IA32_EFER)) ||
+           (vmentry_ctl & (VM_ENTRY_LOAD_IA32_PAT | VM_ENTRY_LOAD_IA32_EFER)))
+               pr_err("EFER =     0x%016llx  PAT = 0x%016lx\n",
+                      efer, vmcs_readl(GUEST_IA32_PAT));
+       pr_err("DebugCtl = 0x%016lx  DebugExceptions = 0x%016lx\n",
+              vmcs_readl(GUEST_IA32_DEBUGCTL),
+              vmcs_readl(GUEST_PENDING_DBG_EXCEPTIONS));
+       if (vmentry_ctl & VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL)
+               pr_err("PerfGlobCtl = 0x%016lx\n",
+                      vmcs_readl(GUEST_IA32_PERF_GLOBAL_CTRL));
+       if (vmentry_ctl & VM_ENTRY_LOAD_BNDCFGS)
+               pr_err("BndCfgS = 0x%016lx\n", vmcs_readl(GUEST_BNDCFGS));
+       pr_err("Interruptibility = %08x  ActivityState = %08x\n",
+              vmcs_read32(GUEST_INTERRUPTIBILITY_INFO),
+              vmcs_read32(GUEST_ACTIVITY_STATE));
+       if (secondary_exec_control & SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY)
+               pr_err("InterruptStatus = %04x\n",
+                      vmcs_read16(GUEST_INTR_STATUS));
+       pr_err("*** Host State ***\n");
+       pr_err("RIP = 0x%016lx  RSP = 0x%016lx\n",
+              vmcs_readl(HOST_RIP), vmcs_readl(HOST_RSP));
+       pr_err("CS=%04x SS=%04x DS=%04x ES=%04x FS=%04x GS=%04x TR=%04x\n",
+              vmcs_read16(HOST_CS_SELECTOR), vmcs_read16(HOST_SS_SELECTOR),
+              vmcs_read16(HOST_DS_SELECTOR), vmcs_read16(HOST_ES_SELECTOR),
+              vmcs_read16(HOST_FS_SELECTOR), vmcs_read16(HOST_GS_SELECTOR),
+              vmcs_read16(HOST_TR_SELECTOR));
+       pr_err("FSBase=%016lx GSBase=%016lx TRBase=%016lx\n",
+              vmcs_readl(HOST_FS_BASE), vmcs_readl(HOST_GS_BASE),
+              vmcs_readl(HOST_TR_BASE));
+       pr_err("GDTBase=%016lx IDTBase=%016lx\n",
+              vmcs_readl(HOST_GDTR_BASE), vmcs_readl(HOST_IDTR_BASE));
+       pr_err("CR0=%016lx CR3=%016lx CR4=%016lx\n",
+              vmcs_readl(HOST_CR0), vmcs_readl(HOST_CR3),
+              vmcs_readl(HOST_CR4));
+       pr_err("Sysenter RSP=%016lx CS:RIP=%04x:%016lx\n",
+              vmcs_readl(HOST_IA32_SYSENTER_ESP),
+              vmcs_read32(HOST_IA32_SYSENTER_CS),
+              vmcs_readl(HOST_IA32_SYSENTER_EIP));
+       if (vmexit_ctl & (VM_EXIT_LOAD_IA32_PAT | VM_EXIT_LOAD_IA32_EFER))
+               pr_err("EFER = 0x%016lx  PAT = 0x%016lx\n",
+                      vmcs_readl(HOST_IA32_EFER), vmcs_readl(HOST_IA32_PAT));
+       if (vmexit_ctl & VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL)
+               pr_err("PerfGlobCtl = 0x%016lx\n",
+                      vmcs_readl(HOST_IA32_PERF_GLOBAL_CTRL));
+       pr_err("*** Control State ***\n");
+       pr_err("PinBased=%08x CPUBased=%08x SecondaryExec=%08x\n",
+              pin_based_exec_ctrl, cpu_based_exec_ctrl, secondary_exec_control);
+       pr_err("EntryControls=%08x ExitControls=%08x\n", vmentry_ctl, vmexit_ctl);
+       pr_err("ExceptionBitmap=%08x PFECmask=%08x PFECmatch=%08x\n",
+              vmcs_read32(EXCEPTION_BITMAP),
+              vmcs_read32(PAGE_FAULT_ERROR_CODE_MASK),
+              vmcs_read32(PAGE_FAULT_ERROR_CODE_MATCH));
+       pr_err("VMEntry: intr_info=%08x errcode=%08x ilen=%08x\n",
+              vmcs_read32(VM_ENTRY_INTR_INFO_FIELD),
+              vmcs_read32(VM_ENTRY_EXCEPTION_ERROR_CODE),
+              vmcs_read32(VM_ENTRY_INSTRUCTION_LEN));
+       pr_err("VMExit: intr_info=%08x errcode=%08x ilen=%08x\n",
+              vmcs_read32(VM_EXIT_INTR_INFO),
+              vmcs_read32(VM_EXIT_INTR_ERROR_CODE),
+              vmcs_read32(VM_EXIT_INSTRUCTION_LEN));
+       pr_err("        reason=%08x qualification=%016lx\n",
+              vmcs_read32(VM_EXIT_REASON), vmcs_readl(EXIT_QUALIFICATION));
+       pr_err("IDTVectoring: info=%08x errcode=%08x\n",
+              vmcs_read32(IDT_VECTORING_INFO_FIELD),
+              vmcs_read32(IDT_VECTORING_ERROR_CODE));
+       pr_err("TSC Offset = 0x%016lx\n", vmcs_readl(TSC_OFFSET));
+       if (cpu_based_exec_ctrl & CPU_BASED_TPR_SHADOW)
+               pr_err("TPR Threshold = 0x%02x\n", vmcs_read32(TPR_THRESHOLD));
+       if (pin_based_exec_ctrl & PIN_BASED_POSTED_INTR)
+               pr_err("PostedIntrVec = 0x%02x\n", vmcs_read16(POSTED_INTR_NV));
+       if ((secondary_exec_control & SECONDARY_EXEC_ENABLE_EPT))
+               pr_err("EPT pointer = 0x%016lx\n", vmcs_readl(EPT_POINTER));
+       n = vmcs_read32(CR3_TARGET_COUNT);
+       for (i = 0; i + 1 < n; i += 4)
+               pr_err("CR3 target%u=%016lx target%u=%016lx\n",
+                      i, vmcs_readl(CR3_TARGET_VALUE0 + i * 2),
+                      i + 1, vmcs_readl(CR3_TARGET_VALUE0 + i * 2 + 2));
+       if (i < n)
+               pr_err("CR3 target%u=%016lx\n",
+                      i, vmcs_readl(CR3_TARGET_VALUE0 + i * 2));
+       if (secondary_exec_control & SECONDARY_EXEC_PAUSE_LOOP_EXITING)
+               pr_err("PLE Gap=%08x Window=%08x\n",
+                      vmcs_read32(PLE_GAP), vmcs_read32(PLE_WINDOW));
+       if (secondary_exec_control & SECONDARY_EXEC_ENABLE_VPID)
+               pr_err("Virtual processor ID = 0x%04x\n",
+                      vmcs_read16(VIRTUAL_PROCESSOR_ID));
+ }
  /*
   * The guest has exited.  See if we can fix it or if we need userspace
   * assistance.
@@@ -7708,7 -7852,7 +7851,7 @@@ static int vmx_handle_exit(struct kvm_v
         * flushed already.
         */
        if (enable_pml)
-               vmx_flush_pml_buffer(vmx);
+               vmx_flush_pml_buffer(vcpu);
  
        /* If guest state is invalid, start emulating */
        if (vmx->emulation_required)
        }
  
        if (exit_reason & VMX_EXIT_REASONS_FAILED_VMENTRY) {
+               dump_vmcs();
                vcpu->run->exit_reason = KVM_EXIT_FAIL_ENTRY;
                vcpu->run->fail_entry.hardware_entry_failure_reason
                        = exit_reason;
@@@ -7995,6 -8140,11 +8139,11 @@@ static void vmx_handle_external_intr(st
                local_irq_enable();
  }
  
+ static bool vmx_has_high_real_mode_segbase(void)
+ {
+       return enable_unrestricted_guest || emulate_invalid_guest_state;
+ }
  static bool vmx_mpx_supported(void)
  {
        return (vmcs_config.vmexit_ctrl & VM_EXIT_CLEAR_BNDCFGS) &&
@@@ -8479,7 -8629,8 +8628,8 @@@ static int get_ept_level(void
  
  static u64 vmx_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio)
  {
-       u64 ret;
+       u8 cache;
+       u64 ipat = 0;
  
        /* For VT-d and EPT combination
         * 1. MMIO: always map as UC
         * 3. EPT without VT-d: always map as WB and set IPAT=1 to keep
         *    consistent with host MTRR
         */
-       if (is_mmio)
-               ret = MTRR_TYPE_UNCACHABLE << VMX_EPT_MT_EPTE_SHIFT;
-       else if (kvm_arch_has_noncoherent_dma(vcpu->kvm))
-               ret = kvm_get_guest_memory_type(vcpu, gfn) <<
-                     VMX_EPT_MT_EPTE_SHIFT;
-       else
-               ret = (MTRR_TYPE_WRBACK << VMX_EPT_MT_EPTE_SHIFT)
-                       | VMX_EPT_IPAT_BIT;
+       if (is_mmio) {
+               cache = MTRR_TYPE_UNCACHABLE;
+               goto exit;
+       }
  
-       return ret;
+       if (!kvm_arch_has_noncoherent_dma(vcpu->kvm)) {
+               ipat = VMX_EPT_IPAT_BIT;
+               cache = MTRR_TYPE_WRBACK;
+               goto exit;
+       }
+       if (kvm_read_cr0(vcpu) & X86_CR0_CD) {
+               ipat = VMX_EPT_IPAT_BIT;
+               cache = MTRR_TYPE_UNCACHABLE;
+               goto exit;
+       }
+       cache = kvm_mtrr_get_guest_memory_type(vcpu, gfn);
+ exit:
+       return (cache << VMX_EPT_MT_EPTE_SHIFT) | ipat;
  }
  
  static int vmx_get_lpage_level(void)
@@@ -8923,7 -9085,7 +9084,7 @@@ static int nested_vmx_msr_check_common(
                                       struct vmx_msr_entry *e)
  {
        /* x2APIC MSR accesses are not allowed */
-       if (apic_x2apic_mode(vcpu->arch.apic) && e->index >> 8 == 0x8)
+       if (vcpu->arch.apic_base & X2APIC_ENABLE && e->index >> 8 == 0x8)
                return -EINVAL;
        if (e->index == MSR_IA32_UCODE_WRITE || /* SDM Table 35-2 */
            e->index == MSR_IA32_UCODE_REV)
@@@ -8965,8 -9127,8 +9126,8 @@@ static u32 nested_vmx_load_msr(struct k
  
        msr.host_initiated = false;
        for (i = 0; i < count; i++) {
-               if (kvm_read_guest(vcpu->kvm, gpa + i * sizeof(e),
-                                  &e, sizeof(e))) {
+               if (kvm_vcpu_read_guest(vcpu, gpa + i * sizeof(e),
+                                       &e, sizeof(e))) {
                        pr_warn_ratelimited(
                                "%s cannot read MSR entry (%u, 0x%08llx)\n",
                                __func__, i, gpa + i * sizeof(e));
@@@ -8998,9 -9160,10 +9159,10 @@@ static int nested_vmx_store_msr(struct 
        struct vmx_msr_entry e;
  
        for (i = 0; i < count; i++) {
-               if (kvm_read_guest(vcpu->kvm,
-                                  gpa + i * sizeof(e),
-                                  &e, 2 * sizeof(u32))) {
+               struct msr_data msr_info;
+               if (kvm_vcpu_read_guest(vcpu,
+                                       gpa + i * sizeof(e),
+                                       &e, 2 * sizeof(u32))) {
                        pr_warn_ratelimited(
                                "%s cannot read MSR entry (%u, 0x%08llx)\n",
                                __func__, i, gpa + i * sizeof(e));
                                __func__, i, e.index, e.reserved);
                        return -EINVAL;
                }
-               if (kvm_get_msr(vcpu, e.index, &e.value)) {
+               msr_info.host_initiated = false;
+               msr_info.index = e.index;
+               if (kvm_get_msr(vcpu, &msr_info)) {
                        pr_warn_ratelimited(
                                "%s cannot read MSR (%u, 0x%x)\n",
                                __func__, i, e.index);
                        return -EINVAL;
                }
-               if (kvm_write_guest(vcpu->kvm,
-                                   gpa + i * sizeof(e) +
-                                       offsetof(struct vmx_msr_entry, value),
-                                   &e.value, sizeof(e.value))) {
+               if (kvm_vcpu_write_guest(vcpu,
+                                        gpa + i * sizeof(e) +
+                                            offsetof(struct vmx_msr_entry, value),
+                                        &msr_info.data, sizeof(msr_info.data))) {
                        pr_warn_ratelimited(
                                "%s cannot write MSR (%u, 0x%x, 0x%llx)\n",
-                               __func__, i, e.index, e.value);
+                               __func__, i, e.index, msr_info.data);
                        return -EINVAL;
                }
        }
@@@ -10149,6 -10314,7 +10313,7 @@@ static struct kvm_x86_ops vmx_x86_ops 
        .hardware_enable = hardware_enable,
        .hardware_disable = hardware_disable,
        .cpu_has_accelerated_tpr = report_flexpriority,
+       .cpu_has_high_real_mode_segbase = vmx_has_high_real_mode_segbase,
  
        .vcpu_create = vmx_create_vcpu,
        .vcpu_free = vmx_free_vcpu,
        .slot_disable_log_dirty = vmx_slot_disable_log_dirty,
        .flush_log_dirty = vmx_flush_log_dirty,
        .enable_log_dirty_pt_masked = vmx_enable_log_dirty_pt_masked,
+       .pmu_ops = &intel_pmu_ops,
  };
  
  static int __init vmx_init(void)
diff --combined arch/x86/kvm/x86.c
@@@ -28,6 -28,7 +28,7 @@@
  #include "x86.h"
  #include "cpuid.h"
  #include "assigned-dev.h"
+ #include "pmu.h"
  
  #include <linux/clocksource.h>
  #include <linux/interrupt.h>
  #include <asm/debugreg.h>
  #include <asm/msr.h>
  #include <asm/desc.h>
- #include <asm/mtrr.h>
  #include <asm/mce.h>
 -#include <asm/i387.h>
 -#include <asm/fpu-internal.h> /* Ugh! */
 -#include <asm/xcr.h>
 +#include <linux/kernel_stat.h>
 +#include <asm/fpu/internal.h> /* Ugh! */
  #include <asm/pvclock.h>
  #include <asm/div64.h>
  
@@@ -98,6 -99,9 +98,9 @@@ module_param(ignore_msrs, bool, S_IRUG
  unsigned int min_timer_period_us = 500;
  module_param(min_timer_period_us, uint, S_IRUGO | S_IWUSR);
  
+ static bool __read_mostly kvmclock_periodic_sync = true;
+ module_param(kvmclock_periodic_sync, bool, S_IRUGO);
  bool kvm_has_tsc_control;
  EXPORT_SYMBOL_GPL(kvm_has_tsc_control);
  u32  kvm_max_guest_tsc_khz;
@@@ -474,7 -478,7 +477,7 @@@ EXPORT_SYMBOL_GPL(kvm_require_dr)
  
  /*
   * This function will be used to read from the physical memory of the currently
-  * running guest. The difference to kvm_read_guest_page is that this function
+  * running guest. The difference to kvm_vcpu_read_guest_page is that this function
   * can read from guest physical or from the guest's guest physical memory.
   */
  int kvm_read_guest_page_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
  
        real_gfn = gpa_to_gfn(real_gfn);
  
-       return kvm_read_guest_page(vcpu->kvm, real_gfn, data, offset, len);
+       return kvm_vcpu_read_guest_page(vcpu, real_gfn, data, offset, len);
  }
  EXPORT_SYMBOL_GPL(kvm_read_guest_page_mmu);
  
@@@ -571,8 -575,7 +574,7 @@@ out
  int kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
  {
        unsigned long old_cr0 = kvm_read_cr0(vcpu);
-       unsigned long update_bits = X86_CR0_PG | X86_CR0_WP |
-                                   X86_CR0_CD | X86_CR0_NW;
+       unsigned long update_bits = X86_CR0_PG | X86_CR0_WP;
  
        cr0 |= X86_CR0_ET;
  
  
        if ((cr0 ^ old_cr0) & update_bits)
                kvm_mmu_reset_context(vcpu);
+       if ((cr0 ^ old_cr0) & X86_CR0_CD)
+               kvm_zap_gfn_range(vcpu->kvm, 0, ~0ULL);
        return 0;
  }
  EXPORT_SYMBOL_GPL(kvm_set_cr0);
@@@ -907,7 -914,7 +913,7 @@@ bool kvm_rdpmc(struct kvm_vcpu *vcpu
        u64 data;
        int err;
  
-       err = kvm_pmu_read_pmc(vcpu, ecx, &data);
+       err = kvm_pmu_rdpmc(vcpu, ecx, &data);
        if (err)
                return err;
        kvm_register_write(vcpu, VCPU_REGS_RAX, (u32)data);
@@@ -922,17 -929,11 +928,11 @@@ EXPORT_SYMBOL_GPL(kvm_rdpmc)
   *
   * This list is modified at module load time to reflect the
   * capabilities of the host cpu. This capabilities test skips MSRs that are
-  * kvm-specific. Those are put in the beginning of the list.
+  * kvm-specific. Those are put in emulated_msrs; filtering of emulated_msrs
+  * may depend on host virtualization features rather than host cpu features.
   */
  
- #define KVM_SAVE_MSRS_BEGIN   12
  static u32 msrs_to_save[] = {
-       MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK,
-       MSR_KVM_SYSTEM_TIME_NEW, MSR_KVM_WALL_CLOCK_NEW,
-       HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL,
-       HV_X64_MSR_TIME_REF_COUNT, HV_X64_MSR_REFERENCE_TSC,
-       HV_X64_MSR_APIC_ASSIST_PAGE, MSR_KVM_ASYNC_PF_EN, MSR_KVM_STEAL_TIME,
-       MSR_KVM_PV_EOI_EN,
        MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP,
        MSR_STAR,
  #ifdef CONFIG_X86_64
  
  static unsigned num_msrs_to_save;
  
- static const u32 emulated_msrs[] = {
+ static u32 emulated_msrs[] = {
+       MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK,
+       MSR_KVM_SYSTEM_TIME_NEW, MSR_KVM_WALL_CLOCK_NEW,
+       HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL,
+       HV_X64_MSR_TIME_REF_COUNT, HV_X64_MSR_REFERENCE_TSC,
+       HV_X64_MSR_APIC_ASSIST_PAGE, MSR_KVM_ASYNC_PF_EN, MSR_KVM_STEAL_TIME,
+       MSR_KVM_PV_EOI_EN,
        MSR_IA32_TSC_ADJUST,
        MSR_IA32_TSCDEADLINE,
        MSR_IA32_MISC_ENABLE,
        MSR_IA32_MCG_STATUS,
        MSR_IA32_MCG_CTL,
+       MSR_IA32_SMBASE,
  };
  
+ static unsigned num_emulated_msrs;
  bool kvm_valid_efer(struct kvm_vcpu *vcpu, u64 efer)
  {
        if (efer & efer_reserved_bits)
@@@ -1045,6 -1056,21 +1055,21 @@@ EXPORT_SYMBOL_GPL(kvm_set_msr)
  /*
   * Adapt set_msr() to msr_io()'s calling convention
   */
+ static int do_get_msr(struct kvm_vcpu *vcpu, unsigned index, u64 *data)
+ {
+       struct msr_data msr;
+       int r;
+       msr.index = index;
+       msr.host_initiated = true;
+       r = kvm_get_msr(vcpu, &msr);
+       if (r)
+               return r;
+       *data = msr.data;
+       return 0;
+ }
  static int do_set_msr(struct kvm_vcpu *vcpu, unsigned index, u64 *data)
  {
        struct msr_data msr;
@@@ -1697,6 -1723,8 +1722,8 @@@ static int kvm_guest_time_update(struc
                vcpu->pvclock_set_guest_stopped_request = false;
        }
  
+       pvclock_flags |= PVCLOCK_COUNTS_FROM_ZERO;
        /* If the host uses TSC clocksource, then it is stable */
        if (use_master_clock)
                pvclock_flags |= PVCLOCK_TSC_STABLE_BIT;
@@@ -1767,127 -1795,14 +1794,14 @@@ static void kvmclock_sync_fn(struct wor
                                           kvmclock_sync_work);
        struct kvm *kvm = container_of(ka, struct kvm, arch);
  
+       if (!kvmclock_periodic_sync)
+               return;
        schedule_delayed_work(&kvm->arch.kvmclock_update_work, 0);
        schedule_delayed_work(&kvm->arch.kvmclock_sync_work,
                                        KVMCLOCK_SYNC_PERIOD);
  }
  
- static bool msr_mtrr_valid(unsigned msr)
- {
-       switch (msr) {
-       case 0x200 ... 0x200 + 2 * KVM_NR_VAR_MTRR - 1:
-       case MSR_MTRRfix64K_00000:
-       case MSR_MTRRfix16K_80000:
-       case MSR_MTRRfix16K_A0000:
-       case MSR_MTRRfix4K_C0000:
-       case MSR_MTRRfix4K_C8000:
-       case MSR_MTRRfix4K_D0000:
-       case MSR_MTRRfix4K_D8000:
-       case MSR_MTRRfix4K_E0000:
-       case MSR_MTRRfix4K_E8000:
-       case MSR_MTRRfix4K_F0000:
-       case MSR_MTRRfix4K_F8000:
-       case MSR_MTRRdefType:
-       case MSR_IA32_CR_PAT:
-               return true;
-       case 0x2f8:
-               return true;
-       }
-       return false;
- }
- static bool valid_pat_type(unsigned t)
- {
-       return t < 8 && (1 << t) & 0xf3; /* 0, 1, 4, 5, 6, 7 */
- }
- static bool valid_mtrr_type(unsigned t)
- {
-       return t < 8 && (1 << t) & 0x73; /* 0, 1, 4, 5, 6 */
- }
- bool kvm_mtrr_valid(struct kvm_vcpu *vcpu, u32 msr, u64 data)
- {
-       int i;
-       u64 mask;
-       if (!msr_mtrr_valid(msr))
-               return false;
-       if (msr == MSR_IA32_CR_PAT) {
-               for (i = 0; i < 8; i++)
-                       if (!valid_pat_type((data >> (i * 8)) & 0xff))
-                               return false;
-               return true;
-       } else if (msr == MSR_MTRRdefType) {
-               if (data & ~0xcff)
-                       return false;
-               return valid_mtrr_type(data & 0xff);
-       } else if (msr >= MSR_MTRRfix64K_00000 && msr <= MSR_MTRRfix4K_F8000) {
-               for (i = 0; i < 8 ; i++)
-                       if (!valid_mtrr_type((data >> (i * 8)) & 0xff))
-                               return false;
-               return true;
-       }
-       /* variable MTRRs */
-       WARN_ON(!(msr >= 0x200 && msr < 0x200 + 2 * KVM_NR_VAR_MTRR));
-       mask = (~0ULL) << cpuid_maxphyaddr(vcpu);
-       if ((msr & 1) == 0) {
-               /* MTRR base */
-               if (!valid_mtrr_type(data & 0xff))
-                       return false;
-               mask |= 0xf00;
-       } else
-               /* MTRR mask */
-               mask |= 0x7ff;
-       if (data & mask) {
-               kvm_inject_gp(vcpu, 0);
-               return false;
-       }
-       return true;
- }
- EXPORT_SYMBOL_GPL(kvm_mtrr_valid);
- static int set_msr_mtrr(struct kvm_vcpu *vcpu, u32 msr, u64 data)
- {
-       u64 *p = (u64 *)&vcpu->arch.mtrr_state.fixed_ranges;
-       if (!kvm_mtrr_valid(vcpu, msr, data))
-               return 1;
-       if (msr == MSR_MTRRdefType) {
-               vcpu->arch.mtrr_state.def_type = data;
-               vcpu->arch.mtrr_state.enabled = (data & 0xc00) >> 10;
-       } else if (msr == MSR_MTRRfix64K_00000)
-               p[0] = data;
-       else if (msr == MSR_MTRRfix16K_80000 || msr == MSR_MTRRfix16K_A0000)
-               p[1 + msr - MSR_MTRRfix16K_80000] = data;
-       else if (msr >= MSR_MTRRfix4K_C0000 && msr <= MSR_MTRRfix4K_F8000)
-               p[3 + msr - MSR_MTRRfix4K_C0000] = data;
-       else if (msr == MSR_IA32_CR_PAT)
-               vcpu->arch.pat = data;
-       else {  /* Variable MTRRs */
-               int idx, is_mtrr_mask;
-               u64 *pt;
-               idx = (msr - 0x200) / 2;
-               is_mtrr_mask = msr - 0x200 - 2 * idx;
-               if (!is_mtrr_mask)
-                       pt =
-                         (u64 *)&vcpu->arch.mtrr_state.var_ranges[idx].base_lo;
-               else
-                       pt =
-                         (u64 *)&vcpu->arch.mtrr_state.var_ranges[idx].mask_lo;
-               *pt = data;
-       }
-       kvm_mmu_reset_context(vcpu);
-       return 0;
- }
  static int set_msr_mce(struct kvm_vcpu *vcpu, u32 msr, u64 data)
  {
        u64 mcg_cap = vcpu->arch.mcg_cap;
@@@ -1946,7 -1861,7 +1860,7 @@@ static int xen_hvm_config(struct kvm_vc
                r = PTR_ERR(page);
                goto out;
        }
-       if (kvm_write_guest(kvm, page_addr, page, PAGE_SIZE))
+       if (kvm_vcpu_write_guest(vcpu, page_addr, page, PAGE_SIZE))
                goto out_free;
        r = 0;
  out_free:
@@@ -2046,13 -1961,13 +1960,13 @@@ static int set_msr_hyperv(struct kvm_vc
                        break;
                }
                gfn = data >> HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_SHIFT;
-               addr = gfn_to_hva(vcpu->kvm, gfn);
+               addr = kvm_vcpu_gfn_to_hva(vcpu, gfn);
                if (kvm_is_error_hva(addr))
                        return 1;
                if (__clear_user((void __user *)addr, PAGE_SIZE))
                        return 1;
                vcpu->arch.hv_vapic = data;
-               mark_page_dirty(vcpu->kvm, gfn);
+               kvm_vcpu_mark_page_dirty(vcpu, gfn);
                if (kvm_lapic_enable_pv_eoi(vcpu, gfn_to_gpa(gfn) | KVM_MSR_ENABLED))
                        return 1;
                break;
@@@ -2179,7 -2094,7 +2093,7 @@@ int kvm_set_msr_common(struct kvm_vcpu 
                            __func__, data);
                break;
        case 0x200 ... 0x2ff:
-               return set_msr_mtrr(vcpu, msr, data);
+               return kvm_mtrr_set_msr(vcpu, msr, data);
        case MSR_IA32_APICBASE:
                return kvm_set_apic_base(vcpu, msr_info);
        case APIC_BASE_MSR ... APIC_BASE_MSR + 0x3ff:
        case MSR_IA32_MISC_ENABLE:
                vcpu->arch.ia32_misc_enable_msr = data;
                break;
+       case MSR_IA32_SMBASE:
+               if (!msr_info->host_initiated)
+                       return 1;
+               vcpu->arch.smbase = data;
+               break;
        case MSR_KVM_WALL_CLOCK_NEW:
        case MSR_KVM_WALL_CLOCK:
                vcpu->kvm->arch.wall_clock = data;
                                        &vcpu->requests);
  
                        ka->boot_vcpu_runs_old_kvmclock = tmp;
+                       ka->kvmclock_offset = -get_kernel_ns();
                }
  
                vcpu->arch.time = data;
        case MSR_IA32_MC0_CTL ... MSR_IA32_MCx_CTL(KVM_MAX_MCE_BANKS) - 1:
                return set_msr_mce(vcpu, msr, data);
  
-       /* Performance counters are not protected by a CPUID bit,
-        * so we should check all of them in the generic path for the sake of
-        * cross vendor migration.
-        * Writing a zero into the event select MSRs disables them,
-        * which we perfectly emulate ;-). Any other value should be at least
-        * reported, some guests depend on them.
-        */
-       case MSR_K7_EVNTSEL0:
-       case MSR_K7_EVNTSEL1:
-       case MSR_K7_EVNTSEL2:
-       case MSR_K7_EVNTSEL3:
-               if (data != 0)
-                       vcpu_unimpl(vcpu, "unimplemented perfctr wrmsr: "
-                                   "0x%x data 0x%llx\n", msr, data);
-               break;
-       /* at least RHEL 4 unconditionally writes to the perfctr registers,
-        * so we ignore writes to make it happy.
-        */
-       case MSR_K7_PERFCTR0:
-       case MSR_K7_PERFCTR1:
-       case MSR_K7_PERFCTR2:
-       case MSR_K7_PERFCTR3:
-               vcpu_unimpl(vcpu, "unimplemented perfctr wrmsr: "
-                           "0x%x data 0x%llx\n", msr, data);
-               break;
-       case MSR_P6_PERFCTR0:
-       case MSR_P6_PERFCTR1:
-               pr = true;
-       case MSR_P6_EVNTSEL0:
-       case MSR_P6_EVNTSEL1:
-               if (kvm_pmu_msr(vcpu, msr))
+       case MSR_K7_PERFCTR0 ... MSR_K7_PERFCTR3:
+       case MSR_P6_PERFCTR0 ... MSR_P6_PERFCTR1:
+               pr = true; /* fall through */
+       case MSR_K7_EVNTSEL0 ... MSR_K7_EVNTSEL3:
+       case MSR_P6_EVNTSEL0 ... MSR_P6_EVNTSEL1:
+               if (kvm_pmu_is_valid_msr(vcpu, msr))
                        return kvm_pmu_set_msr(vcpu, msr_info);
  
                if (pr || data != 0)
        default:
                if (msr && (msr == vcpu->kvm->arch.xen_hvm_config.msr))
                        return xen_hvm_config(vcpu, data);
-               if (kvm_pmu_msr(vcpu, msr))
+               if (kvm_pmu_is_valid_msr(vcpu, msr))
                        return kvm_pmu_set_msr(vcpu, msr_info);
                if (!ignore_msrs) {
                        vcpu_unimpl(vcpu, "unhandled wrmsr: 0x%x data %llx\n",
@@@ -2378,48 -2275,12 +2274,12 @@@ EXPORT_SYMBOL_GPL(kvm_set_msr_common)
   * Returns 0 on success, non-0 otherwise.
   * Assumes vcpu_load() was already called.
   */
- int kvm_get_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata)
+ int kvm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
  {
-       return kvm_x86_ops->get_msr(vcpu, msr_index, pdata);
+       return kvm_x86_ops->get_msr(vcpu, msr);
  }
  EXPORT_SYMBOL_GPL(kvm_get_msr);
  
- static int get_msr_mtrr(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
- {
-       u64 *p = (u64 *)&vcpu->arch.mtrr_state.fixed_ranges;
-       if (!msr_mtrr_valid(msr))
-               return 1;
-       if (msr == MSR_MTRRdefType)
-               *pdata = vcpu->arch.mtrr_state.def_type +
-                        (vcpu->arch.mtrr_state.enabled << 10);
-       else if (msr == MSR_MTRRfix64K_00000)
-               *pdata = p[0];
-       else if (msr == MSR_MTRRfix16K_80000 || msr == MSR_MTRRfix16K_A0000)
-               *pdata = p[1 + msr - MSR_MTRRfix16K_80000];
-       else if (msr >= MSR_MTRRfix4K_C0000 && msr <= MSR_MTRRfix4K_F8000)
-               *pdata = p[3 + msr - MSR_MTRRfix4K_C0000];
-       else if (msr == MSR_IA32_CR_PAT)
-               *pdata = vcpu->arch.pat;
-       else {  /* Variable MTRRs */
-               int idx, is_mtrr_mask;
-               u64 *pt;
-               idx = (msr - 0x200) / 2;
-               is_mtrr_mask = msr - 0x200 - 2 * idx;
-               if (!is_mtrr_mask)
-                       pt =
-                         (u64 *)&vcpu->arch.mtrr_state.var_ranges[idx].base_lo;
-               else
-                       pt =
-                         (u64 *)&vcpu->arch.mtrr_state.var_ranges[idx].mask_lo;
-               *pdata = *pt;
-       }
-       return 0;
- }
  static int get_msr_mce(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
  {
        u64 data;
@@@ -2517,11 -2378,11 +2377,11 @@@ static int get_msr_hyperv(struct kvm_vc
        return 0;
  }
  
- int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
+ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
  {
        u64 data;
  
-       switch (msr) {
+       switch (msr_info->index) {
        case MSR_IA32_PLATFORM_ID:
        case MSR_IA32_EBL_CR_POWERON:
        case MSR_IA32_DEBUGCTLMSR:
        case MSR_K8_SYSCFG:
        case MSR_K7_HWCR:
        case MSR_VM_HSAVE_PA:
-       case MSR_K7_EVNTSEL0:
-       case MSR_K7_EVNTSEL1:
-       case MSR_K7_EVNTSEL2:
-       case MSR_K7_EVNTSEL3:
-       case MSR_K7_PERFCTR0:
-       case MSR_K7_PERFCTR1:
-       case MSR_K7_PERFCTR2:
-       case MSR_K7_PERFCTR3:
        case MSR_K8_INT_PENDING_MSG:
        case MSR_AMD64_NB_CFG:
        case MSR_FAM10H_MMIO_CONF_BASE:
        case MSR_AMD64_BU_CFG2:
-               data = 0;
+               msr_info->data = 0;
                break;
-       case MSR_P6_PERFCTR0:
-       case MSR_P6_PERFCTR1:
-       case MSR_P6_EVNTSEL0:
-       case MSR_P6_EVNTSEL1:
-               if (kvm_pmu_msr(vcpu, msr))
-                       return kvm_pmu_get_msr(vcpu, msr, pdata);
-               data = 0;
+       case MSR_K7_EVNTSEL0 ... MSR_K7_EVNTSEL3:
+       case MSR_K7_PERFCTR0 ... MSR_K7_PERFCTR3:
+       case MSR_P6_PERFCTR0 ... MSR_P6_PERFCTR1:
+       case MSR_P6_EVNTSEL0 ... MSR_P6_EVNTSEL1:
+               if (kvm_pmu_is_valid_msr(vcpu, msr_info->index))
+                       return kvm_pmu_get_msr(vcpu, msr_info->index, &msr_info->data);
+               msr_info->data = 0;
                break;
        case MSR_IA32_UCODE_REV:
-               data = 0x100000000ULL;
+               msr_info->data = 0x100000000ULL;
                break;
        case MSR_MTRRcap:
-               data = 0x500 | KVM_NR_VAR_MTRR;
-               break;
        case 0x200 ... 0x2ff:
-               return get_msr_mtrr(vcpu, msr, pdata);
+               return kvm_mtrr_get_msr(vcpu, msr_info->index, &msr_info->data);
        case 0xcd: /* fsb frequency */
-               data = 3;
+               msr_info->data = 3;
                break;
                /*
                 * MSR_EBC_FREQUENCY_ID
                 * multiplying by zero otherwise.
                 */
        case MSR_EBC_FREQUENCY_ID:
-               data = 1 << 24;
+               msr_info->data = 1 << 24;
                break;
        case MSR_IA32_APICBASE:
-               data = kvm_get_apic_base(vcpu);
+               msr_info->data = kvm_get_apic_base(vcpu);
                break;
        case APIC_BASE_MSR ... APIC_BASE_MSR + 0x3ff:
-               return kvm_x2apic_msr_read(vcpu, msr, pdata);
+               return kvm_x2apic_msr_read(vcpu, msr_info->index, &msr_info->data);
                break;
        case MSR_IA32_TSCDEADLINE:
-               data = kvm_get_lapic_tscdeadline_msr(vcpu);
+               msr_info->data = kvm_get_lapic_tscdeadline_msr(vcpu);
                break;
        case MSR_IA32_TSC_ADJUST:
-               data = (u64)vcpu->arch.ia32_tsc_adjust_msr;
+               msr_info->data = (u64)vcpu->arch.ia32_tsc_adjust_msr;
                break;
        case MSR_IA32_MISC_ENABLE:
-               data = vcpu->arch.ia32_misc_enable_msr;
+               msr_info->data = vcpu->arch.ia32_misc_enable_msr;
+               break;
+       case MSR_IA32_SMBASE:
+               if (!msr_info->host_initiated)
+                       return 1;
+               msr_info->data = vcpu->arch.smbase;
                break;
        case MSR_IA32_PERF_STATUS:
                /* TSC increment by tick */
-               data = 1000ULL;
+               msr_info->data = 1000ULL;
                /* CPU multiplier */
                data |= (((uint64_t)4ULL) << 40);
                break;
        case MSR_EFER:
-               data = vcpu->arch.efer;
+               msr_info->data = vcpu->arch.efer;
                break;
        case MSR_KVM_WALL_CLOCK:
        case MSR_KVM_WALL_CLOCK_NEW:
-               data = vcpu->kvm->arch.wall_clock;
+               msr_info->data = vcpu->kvm->arch.wall_clock;
                break;
        case MSR_KVM_SYSTEM_TIME:
        case MSR_KVM_SYSTEM_TIME_NEW:
-               data = vcpu->arch.time;
+               msr_info->data = vcpu->arch.time;
                break;
        case MSR_KVM_ASYNC_PF_EN:
-               data = vcpu->arch.apf.msr_val;
+               msr_info->data = vcpu->arch.apf.msr_val;
                break;
        case MSR_KVM_STEAL_TIME:
-               data = vcpu->arch.st.msr_val;
+               msr_info->data = vcpu->arch.st.msr_val;
                break;
        case MSR_KVM_PV_EOI_EN:
-               data = vcpu->arch.pv_eoi.msr_val;
+               msr_info->data = vcpu->arch.pv_eoi.msr_val;
                break;
        case MSR_IA32_P5_MC_ADDR:
        case MSR_IA32_P5_MC_TYPE:
        case MSR_IA32_MCG_CTL:
        case MSR_IA32_MCG_STATUS:
        case MSR_IA32_MC0_CTL ... MSR_IA32_MCx_CTL(KVM_MAX_MCE_BANKS) - 1:
-               return get_msr_mce(vcpu, msr, pdata);
+               return get_msr_mce(vcpu, msr_info->index, &msr_info->data);
        case MSR_K7_CLK_CTL:
                /*
                 * Provide expected ramp-up count for K7. All other
                 * type 6, model 8 and higher from exploding due to
                 * the rdmsr failing.
                 */
-               data = 0x20000000;
+               msr_info->data = 0x20000000;
                break;
        case HV_X64_MSR_GUEST_OS_ID ... HV_X64_MSR_SINT15:
-               if (kvm_hv_msr_partition_wide(msr)) {
+               if (kvm_hv_msr_partition_wide(msr_info->index)) {
                        int r;
                        mutex_lock(&vcpu->kvm->lock);
-                       r = get_msr_hyperv_pw(vcpu, msr, pdata);
+                       r = get_msr_hyperv_pw(vcpu, msr_info->index, &msr_info->data);
                        mutex_unlock(&vcpu->kvm->lock);
                        return r;
                } else
-                       return get_msr_hyperv(vcpu, msr, pdata);
+                       return get_msr_hyperv(vcpu, msr_info->index, &msr_info->data);
                break;
        case MSR_IA32_BBL_CR_CTL3:
                /* This legacy MSR exists but isn't fully documented in current
                 * L2 cache control register 3: 64GB range, 256KB size,
                 * enabled, latency 0x1, configured
                 */
-               data = 0xbe702111;
+               msr_info->data = 0xbe702111;
                break;
        case MSR_AMD64_OSVW_ID_LENGTH:
                if (!guest_cpuid_has_osvw(vcpu))
                        return 1;
-               data = vcpu->arch.osvw.length;
+               msr_info->data = vcpu->arch.osvw.length;
                break;
        case MSR_AMD64_OSVW_STATUS:
                if (!guest_cpuid_has_osvw(vcpu))
                        return 1;
-               data = vcpu->arch.osvw.status;
+               msr_info->data = vcpu->arch.osvw.status;
                break;
        default:
-               if (kvm_pmu_msr(vcpu, msr))
-                       return kvm_pmu_get_msr(vcpu, msr, pdata);
+               if (kvm_pmu_is_valid_msr(vcpu, msr_info->index))
+                       return kvm_pmu_get_msr(vcpu, msr_info->index, &msr_info->data);
                if (!ignore_msrs) {
-                       vcpu_unimpl(vcpu, "unhandled rdmsr: 0x%x\n", msr);
+                       vcpu_unimpl(vcpu, "unhandled rdmsr: 0x%x\n", msr_info->index);
                        return 1;
                } else {
-                       vcpu_unimpl(vcpu, "ignored rdmsr: 0x%x\n", msr);
-                       data = 0;
+                       vcpu_unimpl(vcpu, "ignored rdmsr: 0x%x\n", msr_info->index);
+                       msr_info->data = 0;
                }
                break;
        }
-       *pdata = data;
        return 0;
  }
  EXPORT_SYMBOL_GPL(kvm_get_msr_common);
@@@ -2797,12 -2652,25 +2651,25 @@@ int kvm_vm_ioctl_check_extension(struc
        case KVM_CAP_HYPERV_TIME:
        case KVM_CAP_IOAPIC_POLARITY_IGNORED:
        case KVM_CAP_TSC_DEADLINE_TIMER:
+       case KVM_CAP_ENABLE_CAP_VM:
+       case KVM_CAP_DISABLE_QUIRKS:
  #ifdef CONFIG_KVM_DEVICE_ASSIGNMENT
        case KVM_CAP_ASSIGN_DEV_IRQ:
        case KVM_CAP_PCI_2_3:
  #endif
                r = 1;
                break;
+       case KVM_CAP_X86_SMM:
+               /* SMBASE is usually relocated above 1M on modern chipsets,
+                * and SMM handlers might indeed rely on 4G segment limits,
+                * so do not report SMM to be available if real mode is
+                * emulated via vm86 mode.  Still, do not go to great lengths
+                * to avoid userspace's usage of the feature, because it is a
+                * fringe case that is not enabled except via specific settings
+                * of the module parameters.
+                */
+               r = kvm_x86_ops->cpu_has_high_real_mode_segbase();
+               break;
        case KVM_CAP_COALESCED_MMIO:
                r = KVM_COALESCED_MMIO_PAGE_OFFSET;
                break;
@@@ -2859,7 -2727,7 +2726,7 @@@ long kvm_arch_dev_ioctl(struct file *fi
                if (copy_from_user(&msr_list, user_msr_list, sizeof msr_list))
                        goto out;
                n = msr_list.nmsrs;
-               msr_list.nmsrs = num_msrs_to_save + ARRAY_SIZE(emulated_msrs);
+               msr_list.nmsrs = num_msrs_to_save + num_emulated_msrs;
                if (copy_to_user(user_msr_list, &msr_list, sizeof msr_list))
                        goto out;
                r = -E2BIG;
                        goto out;
                if (copy_to_user(user_msr_list->indices + num_msrs_to_save,
                                 &emulated_msrs,
-                                ARRAY_SIZE(emulated_msrs) * sizeof(u32)))
+                                num_emulated_msrs * sizeof(u32)))
                        goto out;
                r = 0;
                break;
@@@ -3015,6 -2883,13 +2882,13 @@@ static int kvm_vcpu_ioctl_nmi(struct kv
        return 0;
  }
  
+ static int kvm_vcpu_ioctl_smi(struct kvm_vcpu *vcpu)
+ {
+       kvm_make_request(KVM_REQ_SMI, vcpu);
+       return 0;
+ }
  static int vcpu_ioctl_tpr_access_reporting(struct kvm_vcpu *vcpu,
                                           struct kvm_tpr_access_ctl *tac)
  {
@@@ -3120,8 -2995,15 +2994,15 @@@ static void kvm_vcpu_ioctl_x86_get_vcpu
  
        events->sipi_vector = 0; /* never valid when reporting to user space */
  
+       events->smi.smm = is_smm(vcpu);
+       events->smi.pending = vcpu->arch.smi_pending;
+       events->smi.smm_inside_nmi =
+               !!(vcpu->arch.hflags & HF_SMM_INSIDE_NMI_MASK);
+       events->smi.latched_init = kvm_lapic_latched_init(vcpu);
        events->flags = (KVM_VCPUEVENT_VALID_NMI_PENDING
-                        | KVM_VCPUEVENT_VALID_SHADOW);
+                        | KVM_VCPUEVENT_VALID_SHADOW
+                        | KVM_VCPUEVENT_VALID_SMM);
        memset(&events->reserved, 0, sizeof(events->reserved));
  }
  
@@@ -3130,7 -3012,8 +3011,8 @@@ static int kvm_vcpu_ioctl_x86_set_vcpu_
  {
        if (events->flags & ~(KVM_VCPUEVENT_VALID_NMI_PENDING
                              | KVM_VCPUEVENT_VALID_SIPI_VECTOR
-                             | KVM_VCPUEVENT_VALID_SHADOW))
+                             | KVM_VCPUEVENT_VALID_SHADOW
+                             | KVM_VCPUEVENT_VALID_SMM))
                return -EINVAL;
  
        process_nmi(vcpu);
            kvm_vcpu_has_lapic(vcpu))
                vcpu->arch.apic->sipi_vector = events->sipi_vector;
  
+       if (events->flags & KVM_VCPUEVENT_VALID_SMM) {
+               if (events->smi.smm)
+                       vcpu->arch.hflags |= HF_SMM_MASK;
+               else
+                       vcpu->arch.hflags &= ~HF_SMM_MASK;
+               vcpu->arch.smi_pending = events->smi.pending;
+               if (events->smi.smm_inside_nmi)
+                       vcpu->arch.hflags |= HF_SMM_INSIDE_NMI_MASK;
+               else
+                       vcpu->arch.hflags &= ~HF_SMM_INSIDE_NMI_MASK;
+               if (kvm_vcpu_has_lapic(vcpu)) {
+                       if (events->smi.latched_init)
+                               set_bit(KVM_APIC_INIT, &vcpu->arch.apic->pending_events);
+                       else
+                               clear_bit(KVM_APIC_INIT, &vcpu->arch.apic->pending_events);
+               }
+       }
        kvm_make_request(KVM_REQ_EVENT, vcpu);
  
        return 0;
@@@ -3193,8 -3094,8 +3093,8 @@@ static int kvm_vcpu_ioctl_x86_set_debug
  
  static void fill_xsave(u8 *dest, struct kvm_vcpu *vcpu)
  {
 -      struct xsave_struct *xsave = &vcpu->arch.guest_fpu.state->xsave;
 -      u64 xstate_bv = xsave->xsave_hdr.xstate_bv;
 +      struct xregs_state *xsave = &vcpu->arch.guest_fpu.state.xsave;
 +      u64 xstate_bv = xsave->header.xfeatures;
        u64 valid;
  
        /*
  
  static void load_xsave(struct kvm_vcpu *vcpu, u8 *src)
  {
 -      struct xsave_struct *xsave = &vcpu->arch.guest_fpu.state->xsave;
 +      struct xregs_state *xsave = &vcpu->arch.guest_fpu.state.xsave;
        u64 xstate_bv = *(u64 *)(src + XSAVE_HDR_OFFSET);
        u64 valid;
  
        memcpy(xsave, src, XSAVE_HDR_OFFSET);
  
        /* Set XSTATE_BV and possibly XCOMP_BV.  */
 -      xsave->xsave_hdr.xstate_bv = xstate_bv;
 +      xsave->header.xfeatures = xstate_bv;
        if (cpu_has_xsaves)
 -              xsave->xsave_hdr.xcomp_bv = host_xcr0 | XSTATE_COMPACTION_ENABLED;
 +              xsave->header.xcomp_bv = host_xcr0 | XSTATE_COMPACTION_ENABLED;
  
        /*
         * Copy each region from the non-compacted offset to the
@@@ -3274,8 -3175,8 +3174,8 @@@ static void kvm_vcpu_ioctl_x86_get_xsav
                fill_xsave((u8 *) guest_xsave->region, vcpu);
        } else {
                memcpy(guest_xsave->region,
 -                      &vcpu->arch.guest_fpu.state->fxsave,
 -                      sizeof(struct i387_fxsave_struct));
 +                      &vcpu->arch.guest_fpu.state.fxsave,
 +                      sizeof(struct fxregs_state));
                *(u64 *)&guest_xsave->region[XSAVE_HDR_OFFSET / sizeof(u32)] =
                        XSTATE_FPSSE;
        }
@@@ -3299,8 -3200,8 +3199,8 @@@ static int kvm_vcpu_ioctl_x86_set_xsave
        } else {
                if (xstate_bv & ~XSTATE_FPSSE)
                        return -EINVAL;
 -              memcpy(&vcpu->arch.guest_fpu.state->fxsave,
 -                      guest_xsave->region, sizeof(struct i387_fxsave_struct));
 +              memcpy(&vcpu->arch.guest_fpu.state.fxsave,
 +                      guest_xsave->region, sizeof(struct fxregs_state));
        }
        return 0;
  }
@@@ -3414,6 -3315,10 +3314,10 @@@ long kvm_arch_vcpu_ioctl(struct file *f
                r = kvm_vcpu_ioctl_nmi(vcpu);
                break;
        }
+       case KVM_SMI: {
+               r = kvm_vcpu_ioctl_smi(vcpu);
+               break;
+       }
        case KVM_SET_CPUID: {
                struct kvm_cpuid __user *cpuid_arg = argp;
                struct kvm_cpuid cpuid;
                break;
        }
        case KVM_GET_MSRS:
-               r = msr_io(vcpu, argp, kvm_get_msr, 1);
+               r = msr_io(vcpu, argp, do_get_msr, 1);
                break;
        case KVM_SET_MSRS:
                r = msr_io(vcpu, argp, do_set_msr, 0);
@@@ -3844,6 -3749,26 +3748,26 @@@ int kvm_vm_ioctl_irq_line(struct kvm *k
        return 0;
  }
  
+ static int kvm_vm_ioctl_enable_cap(struct kvm *kvm,
+                                  struct kvm_enable_cap *cap)
+ {
+       int r;
+       if (cap->flags)
+               return -EINVAL;
+       switch (cap->cap) {
+       case KVM_CAP_DISABLE_QUIRKS:
+               kvm->arch.disabled_quirks = cap->args[0];
+               r = 0;
+               break;
+       default:
+               r = -EINVAL;
+               break;
+       }
+       return r;
+ }
  long kvm_arch_vm_ioctl(struct file *filp,
                       unsigned int ioctl, unsigned long arg)
  {
                r = 0;
                break;
        }
+       case KVM_ENABLE_CAP: {
+               struct kvm_enable_cap cap;
  
+               r = -EFAULT;
+               if (copy_from_user(&cap, argp, sizeof(cap)))
+                       goto out;
+               r = kvm_vm_ioctl_enable_cap(kvm, &cap);
+               break;
+       }
        default:
                r = kvm_vm_ioctl_assigned_device(kvm, ioctl, arg);
        }
@@@ -4109,8 -4042,7 +4041,7 @@@ static void kvm_init_msr_list(void
        u32 dummy[2];
        unsigned i, j;
  
-       /* skip the first msrs in the list. KVM-specific */
-       for (i = j = KVM_SAVE_MSRS_BEGIN; i < ARRAY_SIZE(msrs_to_save); i++) {
+       for (i = j = 0; i < ARRAY_SIZE(msrs_to_save); i++) {
                if (rdmsr_safe(msrs_to_save[i], &dummy[0], &dummy[1]) < 0)
                        continue;
  
                j++;
        }
        num_msrs_to_save = j;
+       for (i = j = 0; i < ARRAY_SIZE(emulated_msrs); i++) {
+               switch (emulated_msrs[i]) {
+               case MSR_IA32_SMBASE:
+                       if (!kvm_x86_ops->cpu_has_high_real_mode_segbase())
+                               continue;
+                       break;
+               default:
+                       break;
+               }
+               if (j < i)
+                       emulated_msrs[j] = emulated_msrs[i];
+               j++;
+       }
+       num_emulated_msrs = j;
  }
  
  static int vcpu_mmio_write(struct kvm_vcpu *vcpu, gpa_t addr, int len,
@@@ -4252,8 -4200,8 +4199,8 @@@ static int kvm_read_guest_virt_helper(g
  
                if (gpa == UNMAPPED_GVA)
                        return X86EMUL_PROPAGATE_FAULT;
-               ret = kvm_read_guest_page(vcpu->kvm, gpa >> PAGE_SHIFT, data,
-                                         offset, toread);
+               ret = kvm_vcpu_read_guest_page(vcpu, gpa >> PAGE_SHIFT, data,
+                                              offset, toread);
                if (ret < 0) {
                        r = X86EMUL_IO_NEEDED;
                        goto out;
@@@ -4286,8 -4234,8 +4233,8 @@@ static int kvm_fetch_guest_virt(struct 
        offset = addr & (PAGE_SIZE-1);
        if (WARN_ON(offset + bytes > PAGE_SIZE))
                bytes = (unsigned)PAGE_SIZE - offset;
-       ret = kvm_read_guest_page(vcpu->kvm, gpa >> PAGE_SHIFT, val,
-                                 offset, bytes);
+       ret = kvm_vcpu_read_guest_page(vcpu, gpa >> PAGE_SHIFT, val,
+                                      offset, bytes);
        if (unlikely(ret < 0))
                return X86EMUL_IO_NEEDED;
  
@@@ -4333,7 -4281,7 +4280,7 @@@ int kvm_write_guest_virt_system(struct 
  
                if (gpa == UNMAPPED_GVA)
                        return X86EMUL_PROPAGATE_FAULT;
-               ret = kvm_write_guest(vcpu->kvm, gpa, data, towrite);
+               ret = kvm_vcpu_write_guest(vcpu, gpa, data, towrite);
                if (ret < 0) {
                        r = X86EMUL_IO_NEEDED;
                        goto out;
@@@ -4386,7 -4334,7 +4333,7 @@@ int emulator_write_phys(struct kvm_vcp
  {
        int ret;
  
-       ret = kvm_write_guest(vcpu->kvm, gpa, val, bytes);
+       ret = kvm_vcpu_write_guest(vcpu, gpa, val, bytes);
        if (ret < 0)
                return 0;
        kvm_mmu_pte_write(vcpu, gpa, val, bytes);
@@@ -4420,7 -4368,7 +4367,7 @@@ static int read_prepare(struct kvm_vcp
  static int read_emulate(struct kvm_vcpu *vcpu, gpa_t gpa,
                        void *val, int bytes)
  {
-       return !kvm_read_guest(vcpu->kvm, gpa, val, bytes);
+       return !kvm_vcpu_read_guest(vcpu, gpa, val, bytes);
  }
  
  static int write_emulate(struct kvm_vcpu *vcpu, gpa_t gpa,
@@@ -4618,7 -4566,7 +4565,7 @@@ static int emulator_cmpxchg_emulated(st
        if (((gpa + bytes - 1) & PAGE_MASK) != (gpa & PAGE_MASK))
                goto emul_write;
  
-       page = gfn_to_page(vcpu->kvm, gpa >> PAGE_SHIFT);
+       page = kvm_vcpu_gfn_to_page(vcpu, gpa >> PAGE_SHIFT);
        if (is_error_page(page))
                goto emul_write;
  
        if (!exchanged)
                return X86EMUL_CMPXCHG_FAILED;
  
-       mark_page_dirty(vcpu->kvm, gpa >> PAGE_SHIFT);
+       kvm_vcpu_mark_page_dirty(vcpu, gpa >> PAGE_SHIFT);
        kvm_mmu_pte_write(vcpu, gpa, new, bytes);
  
        return X86EMUL_CONTINUE;
@@@ -4945,7 -4893,17 +4892,17 @@@ static void emulator_set_segment(struc
  static int emulator_get_msr(struct x86_emulate_ctxt *ctxt,
                            u32 msr_index, u64 *pdata)
  {
-       return kvm_get_msr(emul_to_vcpu(ctxt), msr_index, pdata);
+       struct msr_data msr;
+       int r;
+       msr.index = msr_index;
+       msr.host_initiated = false;
+       r = kvm_get_msr(emul_to_vcpu(ctxt), &msr);
+       if (r)
+               return r;
+       *pdata = msr.data;
+       return 0;
  }
  
  static int emulator_set_msr(struct x86_emulate_ctxt *ctxt,
        return kvm_set_msr(emul_to_vcpu(ctxt), &msr);
  }
  
+ static u64 emulator_get_smbase(struct x86_emulate_ctxt *ctxt)
+ {
+       struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
+       return vcpu->arch.smbase;
+ }
+ static void emulator_set_smbase(struct x86_emulate_ctxt *ctxt, u64 smbase)
+ {
+       struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
+       vcpu->arch.smbase = smbase;
+ }
  static int emulator_check_pmc(struct x86_emulate_ctxt *ctxt,
                              u32 pmc)
  {
-       return kvm_pmu_check_pmc(emul_to_vcpu(ctxt), pmc);
+       return kvm_pmu_is_valid_msr_idx(emul_to_vcpu(ctxt), pmc);
  }
  
  static int emulator_read_pmc(struct x86_emulate_ctxt *ctxt,
                             u32 pmc, u64 *pdata)
  {
-       return kvm_pmu_read_pmc(emul_to_vcpu(ctxt), pmc, pdata);
+       return kvm_pmu_rdpmc(emul_to_vcpu(ctxt), pmc, pdata);
  }
  
  static void emulator_halt(struct x86_emulate_ctxt *ctxt)
@@@ -5044,6 -5016,8 +5015,8 @@@ static const struct x86_emulate_ops emu
        .cpl                 = emulator_get_cpl,
        .get_dr              = emulator_get_dr,
        .set_dr              = emulator_set_dr,
+       .get_smbase          = emulator_get_smbase,
+       .set_smbase          = emulator_set_smbase,
        .set_msr             = emulator_set_msr,
        .get_msr             = emulator_get_msr,
        .check_pmc           = emulator_check_pmc,
@@@ -5105,7 -5079,10 +5078,10 @@@ static void init_emulate_ctxt(struct kv
                     (cs_l && is_long_mode(vcpu))       ? X86EMUL_MODE_PROT64 :
                     cs_db                              ? X86EMUL_MODE_PROT32 :
                                                          X86EMUL_MODE_PROT16;
-       ctxt->guest_mode = is_guest_mode(vcpu);
+       BUILD_BUG_ON(HF_GUEST_MASK != X86EMUL_GUEST_MASK);
+       BUILD_BUG_ON(HF_SMM_MASK != X86EMUL_SMM_MASK);
+       BUILD_BUG_ON(HF_SMM_INSIDE_NMI_MASK != X86EMUL_SMM_INSIDE_NMI_MASK);
+       ctxt->emul_flags = vcpu->arch.hflags;
  
        init_decode_cache(ctxt);
        vcpu->arch.emulate_regs_need_sync_from_vcpu = false;
@@@ -5274,6 -5251,34 +5250,34 @@@ static bool retry_instruction(struct x8
  static int complete_emulated_mmio(struct kvm_vcpu *vcpu);
  static int complete_emulated_pio(struct kvm_vcpu *vcpu);
  
+ static void kvm_smm_changed(struct kvm_vcpu *vcpu)
+ {
+       if (!(vcpu->arch.hflags & HF_SMM_MASK)) {
+               /* This is a good place to trace that we are exiting SMM.  */
+               trace_kvm_enter_smm(vcpu->vcpu_id, vcpu->arch.smbase, false);
+               if (unlikely(vcpu->arch.smi_pending)) {
+                       kvm_make_request(KVM_REQ_SMI, vcpu);
+                       vcpu->arch.smi_pending = 0;
+               } else {
+                       /* Process a latched INIT, if any.  */
+                       kvm_make_request(KVM_REQ_EVENT, vcpu);
+               }
+       }
+       kvm_mmu_reset_context(vcpu);
+ }
+ static void kvm_set_hflags(struct kvm_vcpu *vcpu, unsigned emul_flags)
+ {
+       unsigned changed = vcpu->arch.hflags ^ emul_flags;
+       vcpu->arch.hflags = emul_flags;
+       if (changed & HF_SMM_MASK)
+               kvm_smm_changed(vcpu);
+ }
  static int kvm_vcpu_check_hw_bp(unsigned long addr, u32 type, u32 dr7,
                                unsigned long *db)
  {
@@@ -5473,6 -5478,8 +5477,8 @@@ restart
                unsigned long rflags = kvm_x86_ops->get_rflags(vcpu);
                toggle_interruptibility(vcpu, ctxt->interruptibility);
                vcpu->arch.emulate_regs_need_sync_to_vcpu = false;
+               if (vcpu->arch.hflags != ctxt->emul_flags)
+                       kvm_set_hflags(vcpu, ctxt->emul_flags);
                kvm_rip_write(vcpu, ctxt->eip);
                if (r == EMULATE_DONE)
                        kvm_vcpu_check_singlestep(vcpu, rflags, &r);
@@@ -5951,6 -5958,7 +5957,7 @@@ static void kvm_pv_kick_cpu_op(struct k
        lapic_irq.shorthand = 0;
        lapic_irq.dest_mode = 0;
        lapic_irq.dest_id = apicid;
+       lapic_irq.msi_redir_hint = false;
  
        lapic_irq.delivery_mode = APIC_DM_REMRD;
        kvm_irq_delivery_to_apic(kvm, NULL, &lapic_irq, NULL);
@@@ -6038,6 -6046,7 +6045,7 @@@ static void post_kvm_run_save(struct kv
        struct kvm_run *kvm_run = vcpu->run;
  
        kvm_run->if_flag = (kvm_get_rflags(vcpu) & X86_EFLAGS_IF) != 0;
+       kvm_run->flags = is_smm(vcpu) ? KVM_RUN_X86_SMM : 0;
        kvm_run->cr8 = kvm_get_cr8(vcpu);
        kvm_run->apic_base = kvm_get_apic_base(vcpu);
        if (irqchip_in_kernel(vcpu->kvm))
@@@ -6161,6 -6170,233 +6169,233 @@@ static void process_nmi(struct kvm_vcp
        kvm_make_request(KVM_REQ_EVENT, vcpu);
  }
  
+ #define put_smstate(type, buf, offset, val)                     \
+       *(type *)((buf) + (offset) - 0x7e00) = val
+ static u32 process_smi_get_segment_flags(struct kvm_segment *seg)
+ {
+       u32 flags = 0;
+       flags |= seg->g       << 23;
+       flags |= seg->db      << 22;
+       flags |= seg->l       << 21;
+       flags |= seg->avl     << 20;
+       flags |= seg->present << 15;
+       flags |= seg->dpl     << 13;
+       flags |= seg->s       << 12;
+       flags |= seg->type    << 8;
+       return flags;
+ }
+ static void process_smi_save_seg_32(struct kvm_vcpu *vcpu, char *buf, int n)
+ {
+       struct kvm_segment seg;
+       int offset;
+       kvm_get_segment(vcpu, &seg, n);
+       put_smstate(u32, buf, 0x7fa8 + n * 4, seg.selector);
+       if (n < 3)
+               offset = 0x7f84 + n * 12;
+       else
+               offset = 0x7f2c + (n - 3) * 12;
+       put_smstate(u32, buf, offset + 8, seg.base);
+       put_smstate(u32, buf, offset + 4, seg.limit);
+       put_smstate(u32, buf, offset, process_smi_get_segment_flags(&seg));
+ }
+ static void process_smi_save_seg_64(struct kvm_vcpu *vcpu, char *buf, int n)
+ {
+       struct kvm_segment seg;
+       int offset;
+       u16 flags;
+       kvm_get_segment(vcpu, &seg, n);
+       offset = 0x7e00 + n * 16;
+       flags = process_smi_get_segment_flags(&seg) >> 8;
+       put_smstate(u16, buf, offset, seg.selector);
+       put_smstate(u16, buf, offset + 2, flags);
+       put_smstate(u32, buf, offset + 4, seg.limit);
+       put_smstate(u64, buf, offset + 8, seg.base);
+ }
+ static void process_smi_save_state_32(struct kvm_vcpu *vcpu, char *buf)
+ {
+       struct desc_ptr dt;
+       struct kvm_segment seg;
+       unsigned long val;
+       int i;
+       put_smstate(u32, buf, 0x7ffc, kvm_read_cr0(vcpu));
+       put_smstate(u32, buf, 0x7ff8, kvm_read_cr3(vcpu));
+       put_smstate(u32, buf, 0x7ff4, kvm_get_rflags(vcpu));
+       put_smstate(u32, buf, 0x7ff0, kvm_rip_read(vcpu));
+       for (i = 0; i < 8; i++)
+               put_smstate(u32, buf, 0x7fd0 + i * 4, kvm_register_read(vcpu, i));
+       kvm_get_dr(vcpu, 6, &val);
+       put_smstate(u32, buf, 0x7fcc, (u32)val);
+       kvm_get_dr(vcpu, 7, &val);
+       put_smstate(u32, buf, 0x7fc8, (u32)val);
+       kvm_get_segment(vcpu, &seg, VCPU_SREG_TR);
+       put_smstate(u32, buf, 0x7fc4, seg.selector);
+       put_smstate(u32, buf, 0x7f64, seg.base);
+       put_smstate(u32, buf, 0x7f60, seg.limit);
+       put_smstate(u32, buf, 0x7f5c, process_smi_get_segment_flags(&seg));
+       kvm_get_segment(vcpu, &seg, VCPU_SREG_LDTR);
+       put_smstate(u32, buf, 0x7fc0, seg.selector);
+       put_smstate(u32, buf, 0x7f80, seg.base);
+       put_smstate(u32, buf, 0x7f7c, seg.limit);
+       put_smstate(u32, buf, 0x7f78, process_smi_get_segment_flags(&seg));
+       kvm_x86_ops->get_gdt(vcpu, &dt);
+       put_smstate(u32, buf, 0x7f74, dt.address);
+       put_smstate(u32, buf, 0x7f70, dt.size);
+       kvm_x86_ops->get_idt(vcpu, &dt);
+       put_smstate(u32, buf, 0x7f58, dt.address);
+       put_smstate(u32, buf, 0x7f54, dt.size);
+       for (i = 0; i < 6; i++)
+               process_smi_save_seg_32(vcpu, buf, i);
+       put_smstate(u32, buf, 0x7f14, kvm_read_cr4(vcpu));
+       /* revision id */
+       put_smstate(u32, buf, 0x7efc, 0x00020000);
+       put_smstate(u32, buf, 0x7ef8, vcpu->arch.smbase);
+ }
+ static void process_smi_save_state_64(struct kvm_vcpu *vcpu, char *buf)
+ {
+ #ifdef CONFIG_X86_64
+       struct desc_ptr dt;
+       struct kvm_segment seg;
+       unsigned long val;
+       int i;
+       for (i = 0; i < 16; i++)
+               put_smstate(u64, buf, 0x7ff8 - i * 8, kvm_register_read(vcpu, i));
+       put_smstate(u64, buf, 0x7f78, kvm_rip_read(vcpu));
+       put_smstate(u32, buf, 0x7f70, kvm_get_rflags(vcpu));
+       kvm_get_dr(vcpu, 6, &val);
+       put_smstate(u64, buf, 0x7f68, val);
+       kvm_get_dr(vcpu, 7, &val);
+       put_smstate(u64, buf, 0x7f60, val);
+       put_smstate(u64, buf, 0x7f58, kvm_read_cr0(vcpu));
+       put_smstate(u64, buf, 0x7f50, kvm_read_cr3(vcpu));
+       put_smstate(u64, buf, 0x7f48, kvm_read_cr4(vcpu));
+       put_smstate(u32, buf, 0x7f00, vcpu->arch.smbase);
+       /* revision id */
+       put_smstate(u32, buf, 0x7efc, 0x00020064);
+       put_smstate(u64, buf, 0x7ed0, vcpu->arch.efer);
+       kvm_get_segment(vcpu, &seg, VCPU_SREG_TR);
+       put_smstate(u16, buf, 0x7e90, seg.selector);
+       put_smstate(u16, buf, 0x7e92, process_smi_get_segment_flags(&seg) >> 8);
+       put_smstate(u32, buf, 0x7e94, seg.limit);
+       put_smstate(u64, buf, 0x7e98, seg.base);
+       kvm_x86_ops->get_idt(vcpu, &dt);
+       put_smstate(u32, buf, 0x7e84, dt.size);
+       put_smstate(u64, buf, 0x7e88, dt.address);
+       kvm_get_segment(vcpu, &seg, VCPU_SREG_LDTR);
+       put_smstate(u16, buf, 0x7e70, seg.selector);
+       put_smstate(u16, buf, 0x7e72, process_smi_get_segment_flags(&seg) >> 8);
+       put_smstate(u32, buf, 0x7e74, seg.limit);
+       put_smstate(u64, buf, 0x7e78, seg.base);
+       kvm_x86_ops->get_gdt(vcpu, &dt);
+       put_smstate(u32, buf, 0x7e64, dt.size);
+       put_smstate(u64, buf, 0x7e68, dt.address);
+       for (i = 0; i < 6; i++)
+               process_smi_save_seg_64(vcpu, buf, i);
+ #else
+       WARN_ON_ONCE(1);
+ #endif
+ }
+ static void process_smi(struct kvm_vcpu *vcpu)
+ {
+       struct kvm_segment cs, ds;
+       char buf[512];
+       u32 cr0;
+       if (is_smm(vcpu)) {
+               vcpu->arch.smi_pending = true;
+               return;
+       }
+       trace_kvm_enter_smm(vcpu->vcpu_id, vcpu->arch.smbase, true);
+       vcpu->arch.hflags |= HF_SMM_MASK;
+       memset(buf, 0, 512);
+       if (guest_cpuid_has_longmode(vcpu))
+               process_smi_save_state_64(vcpu, buf);
+       else
+               process_smi_save_state_32(vcpu, buf);
+       kvm_vcpu_write_guest(vcpu, vcpu->arch.smbase + 0xfe00, buf, sizeof(buf));
+       if (kvm_x86_ops->get_nmi_mask(vcpu))
+               vcpu->arch.hflags |= HF_SMM_INSIDE_NMI_MASK;
+       else
+               kvm_x86_ops->set_nmi_mask(vcpu, true);
+       kvm_set_rflags(vcpu, X86_EFLAGS_FIXED);
+       kvm_rip_write(vcpu, 0x8000);
+       cr0 = vcpu->arch.cr0 & ~(X86_CR0_PE | X86_CR0_EM | X86_CR0_TS | X86_CR0_PG);
+       kvm_x86_ops->set_cr0(vcpu, cr0);
+       vcpu->arch.cr0 = cr0;
+       kvm_x86_ops->set_cr4(vcpu, 0);
+       __kvm_set_dr(vcpu, 7, DR7_FIXED_1);
+       cs.selector = (vcpu->arch.smbase >> 4) & 0xffff;
+       cs.base = vcpu->arch.smbase;
+       ds.selector = 0;
+       ds.base = 0;
+       cs.limit    = ds.limit = 0xffffffff;
+       cs.type     = ds.type = 0x3;
+       cs.dpl      = ds.dpl = 0;
+       cs.db       = ds.db = 0;
+       cs.s        = ds.s = 1;
+       cs.l        = ds.l = 0;
+       cs.g        = ds.g = 1;
+       cs.avl      = ds.avl = 0;
+       cs.present  = ds.present = 1;
+       cs.unusable = ds.unusable = 0;
+       cs.padding  = ds.padding = 0;
+       kvm_set_segment(vcpu, &cs, VCPU_SREG_CS);
+       kvm_set_segment(vcpu, &ds, VCPU_SREG_DS);
+       kvm_set_segment(vcpu, &ds, VCPU_SREG_ES);
+       kvm_set_segment(vcpu, &ds, VCPU_SREG_FS);
+       kvm_set_segment(vcpu, &ds, VCPU_SREG_GS);
+       kvm_set_segment(vcpu, &ds, VCPU_SREG_SS);
+       if (guest_cpuid_has_longmode(vcpu))
+               kvm_x86_ops->set_efer(vcpu, 0);
+       kvm_update_cpuid(vcpu);
+       kvm_mmu_reset_context(vcpu);
+ }
  static void vcpu_scan_ioapic(struct kvm_vcpu *vcpu)
  {
        u64 eoi_exit_bitmap[4];
@@@ -6269,12 -6505,14 +6504,14 @@@ static int vcpu_enter_guest(struct kvm_
                }
                if (kvm_check_request(KVM_REQ_STEAL_UPDATE, vcpu))
                        record_steal_time(vcpu);
+               if (kvm_check_request(KVM_REQ_SMI, vcpu))
+                       process_smi(vcpu);
                if (kvm_check_request(KVM_REQ_NMI, vcpu))
                        process_nmi(vcpu);
                if (kvm_check_request(KVM_REQ_PMU, vcpu))
-                       kvm_handle_pmu_event(vcpu);
+                       kvm_pmu_handle_event(vcpu);
                if (kvm_check_request(KVM_REQ_PMI, vcpu))
-                       kvm_deliver_pmi(vcpu);
+                       kvm_pmu_deliver_pmi(vcpu);
                if (kvm_check_request(KVM_REQ_SCAN_IOAPIC, vcpu))
                        vcpu_scan_ioapic(vcpu);
                if (kvm_check_request(KVM_REQ_APIC_PAGE_RELOAD, vcpu))
        if (req_immediate_exit)
                smp_send_reschedule(vcpu->cpu);
  
-       kvm_guest_enter();
+       __kvm_guest_enter();
  
        if (unlikely(vcpu->arch.switch_db_regs)) {
                set_debugreg(0, 7);
@@@ -6596,11 -6834,11 +6833,11 @@@ static int complete_emulated_mmio(struc
  
  int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
  {
 +      struct fpu *fpu = &current->thread.fpu;
        int r;
        sigset_t sigsaved;
  
 -      if (!tsk_used_math(current) && init_fpu(current))
 -              return -ENOMEM;
 +      fpu__activate_curr(fpu);
  
        if (vcpu->sigset_active)
                sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
@@@ -6970,8 -7208,8 +7207,8 @@@ int kvm_arch_vcpu_ioctl_translate(struc
  
  int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
  {
 -      struct i387_fxsave_struct *fxsave =
 -                      &vcpu->arch.guest_fpu.state->fxsave;
 +      struct fxregs_state *fxsave =
 +                      &vcpu->arch.guest_fpu.state.fxsave;
  
        memcpy(fpu->fpr, fxsave->st_space, 128);
        fpu->fcw = fxsave->cwd;
  
  int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
  {
 -      struct i387_fxsave_struct *fxsave =
 -                      &vcpu->arch.guest_fpu.state->fxsave;
 +      struct fxregs_state *fxsave =
 +                      &vcpu->arch.guest_fpu.state.fxsave;
  
        memcpy(fxsave->st_space, fpu->fpr, 128);
        fxsave->cwd = fpu->fcw;
        return 0;
  }
  
 -int fx_init(struct kvm_vcpu *vcpu, bool init_event)
 +static void fx_init(struct kvm_vcpu *vcpu)
  {
 -      int err;
 -
 -      err = fpu_alloc(&vcpu->arch.guest_fpu);
 -      if (err)
 -              return err;
 -
 -      if (!init_event)
 -              fpu_finit(&vcpu->arch.guest_fpu);
 -
 +      fpstate_init(&vcpu->arch.guest_fpu.state);
        if (cpu_has_xsaves)
 -              vcpu->arch.guest_fpu.state->xsave.xsave_hdr.xcomp_bv =
 +              vcpu->arch.guest_fpu.state.xsave.header.xcomp_bv =
                        host_xcr0 | XSTATE_COMPACTION_ENABLED;
  
        /*
        vcpu->arch.xcr0 = XSTATE_FP;
  
        vcpu->arch.cr0 |= X86_CR0_ET;
 -
 -      return 0;
 -}
 -EXPORT_SYMBOL_GPL(fx_init);
 -
 -static void fx_free(struct kvm_vcpu *vcpu)
 -{
 -      fpu_free(&vcpu->arch.guest_fpu);
  }
  
  void kvm_load_guest_fpu(struct kvm_vcpu *vcpu)
        kvm_put_guest_xcr0(vcpu);
        vcpu->guest_fpu_loaded = 1;
        __kernel_fpu_begin();
 -      fpu_restore_checking(&vcpu->arch.guest_fpu);
 +      __copy_kernel_to_fpregs(&vcpu->arch.guest_fpu.state);
        trace_kvm_fpu(1);
  }
  
@@@ -7038,16 -7292,25 +7275,25 @@@ void kvm_put_guest_fpu(struct kvm_vcpu 
  {
        kvm_put_guest_xcr0(vcpu);
  
-       if (!vcpu->guest_fpu_loaded)
+       if (!vcpu->guest_fpu_loaded) {
+               vcpu->fpu_counter = 0;
                return;
+       }
  
        vcpu->guest_fpu_loaded = 0;
 -      fpu_save_init(&vcpu->arch.guest_fpu);
 +      copy_fpregs_to_fpstate(&vcpu->arch.guest_fpu);
        __kernel_fpu_end();
        ++vcpu->stat.fpu_reload;
-       if (!vcpu->arch.eager_fpu)
-               kvm_make_request(KVM_REQ_DEACTIVATE_FPU, vcpu);
+       /*
+        * If using eager FPU mode, or if the guest is a frequent user
+        * of the FPU, just leave the FPU active for next time.
+        * Every 255 times fpu_counter rolls over to 0; a guest that uses
+        * the FPU in bursts will revert to loading it on demand.
+        */
+       if (!vcpu->arch.eager_fpu) {
+               if (++vcpu->fpu_counter < 5)
+                       kvm_make_request(KVM_REQ_DEACTIVATE_FPU, vcpu);
+       }
        trace_kvm_fpu(0);
  }
  
@@@ -7056,6 -7319,7 +7302,6 @@@ void kvm_arch_vcpu_free(struct kvm_vcp
        kvmclock_reset(vcpu);
  
        free_cpumask_var(vcpu->arch.wbinvd_dirty_mask);
 -      fx_free(vcpu);
        kvm_x86_ops->vcpu_free(vcpu);
  }
  
@@@ -7083,14 -7347,13 +7329,13 @@@ int kvm_arch_vcpu_setup(struct kvm_vcp
  {
        int r;
  
-       vcpu->arch.mtrr_state.have_fixed = 1;
+       kvm_vcpu_mtrr_init(vcpu);
        r = vcpu_load(vcpu);
        if (r)
                return r;
-       kvm_vcpu_reset(vcpu);
+       kvm_vcpu_reset(vcpu, false);
        kvm_mmu_setup(vcpu);
        vcpu_put(vcpu);
        return r;
  }
  
@@@ -7107,6 -7370,9 +7352,9 @@@ void kvm_arch_vcpu_postcreate(struct kv
        kvm_write_tsc(vcpu, &msr);
        vcpu_put(vcpu);
  
+       if (!kvmclock_periodic_sync)
+               return;
        schedule_delayed_work(&kvm->arch.kvmclock_sync_work,
                                        KVMCLOCK_SYNC_PERIOD);
  }
@@@ -7121,11 -7387,14 +7369,13 @@@ void kvm_arch_vcpu_destroy(struct kvm_v
        kvm_mmu_unload(vcpu);
        vcpu_put(vcpu);
  
 -      fx_free(vcpu);
        kvm_x86_ops->vcpu_free(vcpu);
  }
  
- void kvm_vcpu_reset(struct kvm_vcpu *vcpu)
+ void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
  {
+       vcpu->arch.hflags = 0;
        atomic_set(&vcpu->arch.nmi_queued, 0);
        vcpu->arch.nmi_pending = 0;
        vcpu->arch.nmi_injected = false;
        kvm_async_pf_hash_reset(vcpu);
        vcpu->arch.apf.halted = false;
  
-       kvm_pmu_reset(vcpu);
+       if (!init_event) {
+               kvm_pmu_reset(vcpu);
+               vcpu->arch.smbase = 0x30000;
+       }
  
        memset(vcpu->arch.regs, 0, sizeof(vcpu->arch.regs));
        vcpu->arch.regs_avail = ~0;
        vcpu->arch.regs_dirty = ~0;
  
-       kvm_x86_ops->vcpu_reset(vcpu);
+       kvm_x86_ops->vcpu_reset(vcpu, init_event);
  }
  
  void kvm_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, u8 vector)
@@@ -7346,7 -7618,9 +7599,7 @@@ int kvm_arch_vcpu_init(struct kvm_vcpu 
                goto fail_free_mce_banks;
        }
  
 -      r = fx_init(vcpu, false);
 -      if (r)
 -              goto fail_free_wbinvd_dirty_mask;
 +      fx_init(vcpu);
  
        vcpu->arch.ia32_tsc_adjust_msr = 0x0;
        vcpu->arch.pv_time_enabled = false;
  
        vcpu->arch.maxphyaddr = cpuid_query_maxphyaddr(vcpu);
  
+       vcpu->arch.pat = MSR_IA32_CR_PAT_DEFAULT;
        kvm_async_pf_hash_reset(vcpu);
        kvm_pmu_init(vcpu);
  
        return 0;
 -fail_free_wbinvd_dirty_mask:
 -      free_cpumask_var(vcpu->arch.wbinvd_dirty_mask);
 +
  fail_free_mce_banks:
        kfree(vcpu->arch.mce_banks);
  fail_free_lapic:
@@@ -7462,6 -7739,40 +7717,40 @@@ void kvm_arch_sync_events(struct kvm *k
        kvm_free_pit(kvm);
  }
  
+ int __x86_set_memory_region(struct kvm *kvm,
+                           const struct kvm_userspace_memory_region *mem)
+ {
+       int i, r;
+       /* Called with kvm->slots_lock held.  */
+       BUG_ON(mem->slot >= KVM_MEM_SLOTS_NUM);
+       for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++) {
+               struct kvm_userspace_memory_region m = *mem;
+               m.slot |= i << 16;
+               r = __kvm_set_memory_region(kvm, &m);
+               if (r < 0)
+                       return r;
+       }
+       return 0;
+ }
+ EXPORT_SYMBOL_GPL(__x86_set_memory_region);
+ int x86_set_memory_region(struct kvm *kvm,
+                         const struct kvm_userspace_memory_region *mem)
+ {
+       int r;
+       mutex_lock(&kvm->slots_lock);
+       r = __x86_set_memory_region(kvm, mem);
+       mutex_unlock(&kvm->slots_lock);
+       return r;
+ }
+ EXPORT_SYMBOL_GPL(x86_set_memory_region);
  void kvm_arch_destroy_vm(struct kvm *kvm)
  {
        if (current->mm == kvm->mm) {
                struct kvm_userspace_memory_region mem;
                memset(&mem, 0, sizeof(mem));
                mem.slot = APIC_ACCESS_PAGE_PRIVATE_MEMSLOT;
-               kvm_set_memory_region(kvm, &mem);
+               x86_set_memory_region(kvm, &mem);
  
                mem.slot = IDENTITY_PAGETABLE_PRIVATE_MEMSLOT;
-               kvm_set_memory_region(kvm, &mem);
+               x86_set_memory_region(kvm, &mem);
  
                mem.slot = TSS_PRIVATE_MEMSLOT;
-               kvm_set_memory_region(kvm, &mem);
+               x86_set_memory_region(kvm, &mem);
        }
        kvm_iommu_unmap_guest(kvm);
        kfree(kvm->arch.vpic);
@@@ -7568,18 -7879,18 +7857,18 @@@ out_free
        return -ENOMEM;
  }
  
- void kvm_arch_memslots_updated(struct kvm *kvm)
+ void kvm_arch_memslots_updated(struct kvm *kvm, struct kvm_memslots *slots)
  {
        /*
         * memslots->generation has been incremented.
         * mmio generation may have reached its maximum value.
         */
-       kvm_mmu_invalidate_mmio_sptes(kvm);
+       kvm_mmu_invalidate_mmio_sptes(kvm, slots);
  }
  
  int kvm_arch_prepare_memory_region(struct kvm *kvm,
                                struct kvm_memory_slot *memslot,
-                               struct kvm_userspace_memory_region *mem,
+                               const struct kvm_userspace_memory_region *mem,
                                enum kvm_mr_change change)
  {
        /*
@@@ -7657,14 -7968,14 +7946,14 @@@ static void kvm_mmu_slot_apply_flags(st
  }
  
  void kvm_arch_commit_memory_region(struct kvm *kvm,
-                               struct kvm_userspace_memory_region *mem,
+                               const struct kvm_userspace_memory_region *mem,
                                const struct kvm_memory_slot *old,
+                               const struct kvm_memory_slot *new,
                                enum kvm_mr_change change)
  {
-       struct kvm_memory_slot *new;
        int nr_mmu_pages = 0;
  
-       if ((mem->slot >= KVM_USER_MEM_SLOTS) && (change == KVM_MR_DELETE)) {
+       if (change == KVM_MR_DELETE && old->id >= KVM_USER_MEM_SLOTS) {
                int ret;
  
                ret = vm_munmap(old->userspace_addr,
        if (nr_mmu_pages)
                kvm_mmu_change_mmu_pages(kvm, nr_mmu_pages);
  
-       /* It's OK to get 'new' slot here as it has already been installed */
-       new = id_to_memslot(kvm->memslots, mem->slot);
        /*
         * Dirty logging tracks sptes in 4k granularity, meaning that large
         * sptes have to be split.  If live migration is successful, the guest
         * been zapped so no dirty logging staff is needed for old slot. For
         * KVM_MR_FLAGS_ONLY, the old slot is essentially the same one as the
         * new and it's also covered when dealing with the new slot.
+        *
+        * FIXME: const-ify all uses of struct kvm_memory_slot.
         */
        if (change != KVM_MR_DELETE)
-               kvm_mmu_slot_apply_flags(kvm, new);
+               kvm_mmu_slot_apply_flags(kvm, (struct kvm_memory_slot *) new);
  }
  
  void kvm_arch_flush_shadow_all(struct kvm *kvm)