Merge tag 'kvm-s390-master-5.14-1' of git://git.kernel.org/pub/scm/linux/kernel/git...

author Paolo Bonzini <pbonzini@redhat.com>

Thu, 8 Jul 2021 17:15:57 +0000 (13:15 -0400)

committer Paolo Bonzini <pbonzini@redhat.com>

Wed, 14 Jul 2021 16:14:27 +0000 (12:14 -0400)
author Paolo Bonzini <pbonzini@redhat.com>
Thu, 8 Jul 2021 17:15:57 +0000 (13:15 -0400)
committer Paolo Bonzini <pbonzini@redhat.com>
Wed, 14 Jul 2021 16:14:27 +0000 (12:14 -0400)
diff --combined MAINTAINERS

index b9d5999,8c5ee00..fcd3ba0
--- 1/MAINTAINERS
--- 2/MAINTAINERS
+++ b/MAINTAINERS
@@@ -3877,6 -3877,7 +3877,7 @@@ L:      linux-btrfs@vger.kernel.or
   S:    Maintained
   W:    http://btrfs.wiki.kernel.org/
   Q:    http://patchwork.kernel.org/project/linux-btrfs/list/
+ C:    irc://irc.libera.chat/btrfs
   T:    git git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux.git
   F:    Documentation/filesystems/btrfs.rst
   F:    fs/btrfs/
@@@ -6945,6 -6946,7 +6946,7 @@@ F:      net/core/failover.
   FANOTIFY
   M:    Jan Kara <jack@suse.cz>
   R:    Amir Goldstein <amir73il@gmail.com>
+ R:    Matthew Bobrowski <repnop@google.com>
   L:    linux-fsdevel@vger.kernel.org
   S:    Maintained
   F:    fs/notify/fanotify/
@@@ -9993,8 -9995,6 +9995,8 @@@ F:      arch/arm64/include/asm/kvm
   F:    arch/arm64/include/uapi/asm/kvm*
   F:    arch/arm64/kvm/
   F:    include/kvm/arm_*
+ +F:    tools/testing/selftests/kvm/*/aarch64/
+ +F:    tools/testing/selftests/kvm/aarch64/
   
   KERNEL VIRTUAL MACHINE FOR MIPS (KVM/mips)
   M:    Huacai Chen <chenhuacai@kernel.org>
@@@ -12905,7 -12905,7 +12907,7 @@@ F:   net/ipv4/nexthop.
   
   NFC SUBSYSTEM
   M:    Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
- L:    linux-nfc@lists.01.org (moderated for non-subscribers)
+ L:    linux-nfc@lists.01.org (subscribers-only)
   L:    netdev@vger.kernel.org
   S:    Maintained
   F:    Documentation/devicetree/bindings/net/nfc/
@@@ -12918,7 -12918,7 +12920,7 @@@ F:   net/nfc
   NFC VIRTUAL NCI DEVICE DRIVER
   M:    Bongsu Jeon <bongsu.jeon@samsung.com>
   L:    netdev@vger.kernel.org
- L:    linux-nfc@lists.01.org (moderated for non-subscribers)
+ L:    linux-nfc@lists.01.org (subscribers-only)
   S:    Supported
   F:    drivers/nfc/virtual_ncidev.c
   F:    tools/testing/selftests/nci/
@@@ -13216,7 -13216,7 +13218,7 @@@ F:   sound/soc/codecs/tfa9879
   
   NXP-NCI NFC DRIVER
   R:    Charles Gorand <charles.gorand@effinnov.com>
- L:    linux-nfc@lists.01.org (moderated for non-subscribers)
+ L:    linux-nfc@lists.01.org (subscribers-only)
   S:    Supported
   F:    drivers/nfc/nxp-nci
   
@@@ -14119,6 -14119,7 +14121,7 @@@ F:   drivers/pci/controller/pci-v3-semi.
   PCI ENDPOINT SUBSYSTEM
   M:    Kishon Vijay Abraham I <kishon@ti.com>
   M:    Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
+ R:    Krzysztof Wilczyński <kw@linux.com>
   L:    linux-pci@vger.kernel.org
   S:    Supported
   F:    Documentation/PCI/endpoint/*
@@@ -14167,6 -14168,7 +14170,7 @@@ F:   drivers/pci/controller/pci-xgene-msi
   PCI NATIVE HOST BRIDGE AND ENDPOINT DRIVERS
   M:    Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
   R:    Rob Herring <robh@kernel.org>
+ R:    Krzysztof Wilczyński <kw@linux.com>
   L:    linux-pci@vger.kernel.org
   S:    Supported
   Q:    http://patchwork.ozlabs.org/project/linux-pci/list/
@@@ -16145,7 -16147,7 +16149,7 @@@ F:   include/media/drv-intf/s3c_camif.
   SAMSUNG S3FWRN5 NFC DRIVER
   M:    Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
   M:    Krzysztof Opasiak <k.opasiak@samsung.com>
- L:    linux-nfc@lists.01.org (moderated for non-subscribers)
+ L:    linux-nfc@lists.01.org (subscribers-only)
   S:    Maintained
   F:    Documentation/devicetree/bindings/net/nfc/samsung,s3fwrn5.yaml
   F:    drivers/nfc/s3fwrn5
@@@ -16558,6 -16560,7 +16562,7 @@@ F:   drivers/misc/sgi-xp
   
   SHARED MEMORY COMMUNICATIONS (SMC) SOCKETS
   M:    Karsten Graul <kgraul@linux.ibm.com>
+ M:    Guvenc Gulce <guvenc@linux.ibm.com>
   L:    linux-s390@vger.kernel.org
   S:    Supported
   W:    http://www.ibm.com/developerworks/linux/linux390/
@@@ -18335,7 -18338,7 +18340,7 @@@ F:   sound/soc/codecs/tas571x
   TI TRF7970A NFC DRIVER
   M:    Mark Greer <mgreer@animalcreek.com>
   L:    linux-wireless@vger.kernel.org
- L:    linux-nfc@lists.01.org (moderated for non-subscribers)
+ L:    linux-nfc@lists.01.org (subscribers-only)
   S:    Supported
   F:    Documentation/devicetree/bindings/net/nfc/trf7970a.txt
   F:    drivers/nfc/trf7970a.c
@@@ -18871,6 -18874,13 +18876,13 @@@ S: Maintaine
   F:    drivers/usb/host/isp116x*
   F:    include/linux/usb/isp116x.h
   
+ USB ISP1760 DRIVER
+ M:    Rui Miguel Silva <rui.silva@linaro.org>
+ L:    linux-usb@vger.kernel.org
+ S:    Maintained
+ F:    drivers/usb/isp1760/*
+ F:    Documentation/devicetree/bindings/usb/nxp,isp1760.yaml
+ 
   USB LAN78XX ETHERNET DRIVER
   M:    Woojung Huh <woojung.huh@microchip.com>
   M:    UNGLinuxDriver@microchip.com
diff --combined arch/powerpc/kvm/book3s_hv_rm_mmu.c

index 8b70de4,7a0f124..632b254
--- 1/arch/powerpc/kvm/book3s_hv_rm_mmu.c
--- 2/arch/powerpc/kvm/book3s_hv_rm_mmu.c
+++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
@@@ -23,20 -23,9 +23,9 @@@
   #include <asm/pte-walk.h>
   
   /* Translate address of a vmalloc'd thing to a linear map address */
- static void *real_vmalloc_addr(void *x)
+ static void *real_vmalloc_addr(void *addr)
   {
-       unsigned long addr = (unsigned long) x;
-       pte_t *p;
-       /*
-        * assume we don't have huge pages in vmalloc space...
-        * So don't worry about THP collapse/split. Called
-        * Only in realmode with MSR_EE = 0, hence won't need irq_save/restore.
-        */
-       p = find_init_mm_pte(addr, NULL);
-       if (!p || !pte_present(*p))
-               return NULL;
-       addr = (pte_pfn(*p) << PAGE_SHIFT) | (addr & ~PAGE_MASK);
-       return __va(addr);
+       return __va(ppc_find_vmap_phys((unsigned long)addr));
   }
   
   /* Return 1 if we need to do a global tlbie, 0 if we can use tlbiel */
@@@ -57,10 -46,6 +46,10 @@@ static int global_invalidates(struct kv
         else
                 global = 1;
   
+ +      /* LPID has been switched to host if in virt mode so can't do local */
+ +      if (!global && (mfmsr() & (MSR_IR|MSR_DR)))
+ +              global = 1;
+ +
         if (!global) {
                 /* any other core might now have stale TLB entries... */
                 smp_wmb();
@@@ -71,7 -56,7 +60,7 @@@
                  * so use the bit for the first thread to represent the core.
                  */
                 if (cpu_has_feature(CPU_FTR_ARCH_300))
- -                      cpu = cpu_first_thread_sibling(cpu);
+ +                      cpu = cpu_first_tlb_thread_sibling(cpu);
                 cpumask_clear_cpu(cpu, &kvm->arch.need_tlb_flush);
         }
   
@@@ -413,7 -398,6 +402,7 @@@ long kvmppc_h_enter(struct kvm_vcpu *vc
                                  vcpu->arch.pgdir, true,
                                  &vcpu->arch.regs.gpr[4]);
   }
+ +EXPORT_SYMBOL_GPL(kvmppc_h_enter);
   
   #ifdef __BIG_ENDIAN__
   #define LOCK_TOKEN    (*(u32 *)(&get_paca()->lock_token))
@@@ -558,7 -542,6 +547,7 @@@ long kvmppc_h_remove(struct kvm_vcpu *v
         return kvmppc_do_h_remove(vcpu->kvm, flags, pte_index, avpn,
                                   &vcpu->arch.regs.gpr[4]);
   }
+ +EXPORT_SYMBOL_GPL(kvmppc_h_remove);
   
   long kvmppc_h_bulk_remove(struct kvm_vcpu *vcpu)
   {
@@@ -677,7 -660,6 +666,7 @@@
   
         return ret;
   }
+ +EXPORT_SYMBOL_GPL(kvmppc_h_bulk_remove);
   
   long kvmppc_h_protect(struct kvm_vcpu *vcpu, unsigned long flags,
                       unsigned long pte_index, unsigned long avpn)
@@@ -748,7 -730,6 +737,7 @@@
   
         return H_SUCCESS;
   }
+ +EXPORT_SYMBOL_GPL(kvmppc_h_protect);
   
   long kvmppc_h_read(struct kvm_vcpu *vcpu, unsigned long flags,
                    unsigned long pte_index)
@@@ -789,7 -770,6 +778,7 @@@
         }
         return H_SUCCESS;
   }
+ +EXPORT_SYMBOL_GPL(kvmppc_h_read);
   
   long kvmppc_h_clear_ref(struct kvm_vcpu *vcpu, unsigned long flags,
                         unsigned long pte_index)
@@@ -838,7 -818,6 +827,7 @@@
         unlock_hpte(hpte, v & ~HPTE_V_HVLOCK);
         return ret;
   }
+ +EXPORT_SYMBOL_GPL(kvmppc_h_clear_ref);
   
   long kvmppc_h_clear_mod(struct kvm_vcpu *vcpu, unsigned long flags,
                         unsigned long pte_index)
@@@ -886,7 -865,6 +875,7 @@@
         unlock_hpte(hpte, v & ~HPTE_V_HVLOCK);
         return ret;
   }
+ +EXPORT_SYMBOL_GPL(kvmppc_h_clear_mod);
   
   static int kvmppc_get_hpa(struct kvm_vcpu *vcpu, unsigned long mmu_seq,
                           unsigned long gpa, int writing, unsigned long *hpa,
@@@ -1305,4 -1283,3 +1294,4 @@@ long kvmppc_hpte_hv_fault(struct kvm_vc
   
         return -1;              /* send fault up to host kernel mode */
   }
+ +EXPORT_SYMBOL_GPL(kvmppc_hpte_hv_fault);
diff --combined drivers/irqchip/irq-gic-v3.c

index 453fc42,66d623f..e0f4deb
--- 1/drivers/irqchip/irq-gic-v3.c
--- 2/drivers/irqchip/irq-gic-v3.c
+++ b/drivers/irqchip/irq-gic-v3.c
@@@ -103,7 -103,7 +103,7 @@@ EXPORT_SYMBOL(gic_nonsecure_priorities)
   /* ppi_nmi_refs[n] == number of cpus having ppi[n + 16] set as NMI */
   static refcount_t *ppi_nmi_refs;
   
- -static struct gic_kvm_info gic_v3_kvm_info;
+ +static struct gic_kvm_info gic_v3_kvm_info __initdata;
   static DEFINE_PER_CPU(bool, has_rss);
   
   #define MPIDR_RS(mpidr)                       (((mpidr) & 0xF0UL) >> 4)
@@@ -642,11 -642,45 +642,45 @@@ static inline void gic_handle_nmi(u32 i
                 nmi_exit();
   }
   
+ static u32 do_read_iar(struct pt_regs *regs)
+ {
+       u32 iar;
+ 
+       if (gic_supports_nmi() && unlikely(!interrupts_enabled(regs))) {
+               u64 pmr;
+ 
+               /*
+                * We were in a context with IRQs disabled. However, the
+                * entry code has set PMR to a value that allows any
+                * interrupt to be acknowledged, and not just NMIs. This can
+                * lead to surprising effects if the NMI has been retired in
+                * the meantime, and that there is an IRQ pending. The IRQ
+                * would then be taken in NMI context, something that nobody
+                * wants to debug twice.
+                *
+                * Until we sort this, drop PMR again to a level that will
+                * actually only allow NMIs before reading IAR, and then
+                * restore it to what it was.
+                */
+               pmr = gic_read_pmr();
+               gic_pmr_mask_irqs();
+               isb();
+ 
+               iar = gic_read_iar();
+ 
+               gic_write_pmr(pmr);
+       } else {
+               iar = gic_read_iar();
+       }
+ 
+       return iar;
+ }
+ 
   static asmlinkage void __exception_irq_entry gic_handle_irq(struct pt_regs *regs)
   {
         u32 irqnr;
   
-       irqnr = gic_read_iar();
+       irqnr = do_read_iar(regs);
   
         /* Check for special IDs first */
         if ((irqnr >= 1020 && irqnr <= 1023))
@@@ -1852,7 -1886,7 +1886,7 @@@ static void __init gic_of_setup_kvm_inf
   
         gic_v3_kvm_info.has_v4 = gic_data.rdists.has_vlpis;
         gic_v3_kvm_info.has_v4_1 = gic_data.rdists.has_rvpeid;
- -      gic_set_kvm_info(&gic_v3_kvm_info);
+ +      vgic_set_kvm_info(&gic_v3_kvm_info);
   }
   
   static int __init gic_of_init(struct device_node *node, struct device_node *parent)
@@@ -2168,7 -2202,7 +2202,7 @@@ static void __init gic_acpi_setup_kvm_i
   
         gic_v3_kvm_info.has_v4 = gic_data.rdists.has_vlpis;
         gic_v3_kvm_info.has_v4_1 = gic_data.rdists.has_rvpeid;
- -      gic_set_kvm_info(&gic_v3_kvm_info);
+ +      vgic_set_kvm_info(&gic_v3_kvm_info);
   }
   
   static int __init
diff --combined tools/testing/selftests/kvm/include/kvm_util.h

index 615ab25,74d7353..010b59b
--- 1/tools/testing/selftests/kvm/include/kvm_util.h
--- 2/tools/testing/selftests/kvm/include/kvm_util.h
+++ b/tools/testing/selftests/kvm/include/kvm_util.h
@@@ -30,7 -30,6 +30,7 @@@ typedef uint64_t vm_vaddr_t; /* Virtua
   
   /* Minimum allocated guest virtual and physical addresses */
   #define KVM_UTIL_MIN_VADDR            0x2000
+ +#define KVM_GUEST_PAGE_TABLE_MIN_PADDR        0x180000
   
   #define DEFAULT_GUEST_PHY_PAGES               512
   #define DEFAULT_GUEST_STACK_VADDR_MIN 0xab6000
@@@ -45,6 -44,7 +45,7 @@@ enum vm_guest_mode 
         VM_MODE_P40V48_64K,
         VM_MODE_PXXV48_4K,      /* For 48bits VA but ANY bits PA */
         VM_MODE_P47V64_4K,
+       VM_MODE_P44V64_4K,
         NUM_VM_MODES,
   };
   
@@@ -62,7 -62,7 +63,7 @@@
   
   #elif defined(__s390x__)
   
- #define VM_MODE_DEFAULT                       VM_MODE_P47V64_4K
+ #define VM_MODE_DEFAULT                       VM_MODE_P44V64_4K
   #define MIN_PAGE_SHIFT                        12U
   #define ptes_per_page(page_size)      ((page_size) / 16)
   
@@@ -99,7 -99,8 +100,7 @@@ uint32_t kvm_vm_reset_dirty_ring(struc
   int kvm_memcmp_hva_gva(void *hva, struct kvm_vm *vm, const vm_vaddr_t gva,
                        size_t len);
   
- -void kvm_vm_elf_load(struct kvm_vm *vm, const char *filename,
- -                   uint32_t data_memslot, uint32_t pgd_memslot);
+ +void kvm_vm_elf_load(struct kvm_vm *vm, const char *filename);
   
   void vm_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent);
   
@@@ -141,12 -142,10 +142,12 @@@ void vm_mem_region_set_flags(struct kvm
   void vm_mem_region_move(struct kvm_vm *vm, uint32_t slot, uint64_t new_gpa);
   void vm_mem_region_delete(struct kvm_vm *vm, uint32_t slot);
   void vm_vcpu_add(struct kvm_vm *vm, uint32_t vcpuid);
- -vm_vaddr_t vm_vaddr_alloc(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min,
- -                        uint32_t data_memslot, uint32_t pgd_memslot);
+ +vm_vaddr_t vm_vaddr_alloc(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min);
+ +vm_vaddr_t vm_vaddr_alloc_pages(struct kvm_vm *vm, int nr_pages);
+ +vm_vaddr_t vm_vaddr_alloc_page(struct kvm_vm *vm);
+ +
   void virt_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
- -            unsigned int npages, uint32_t pgd_memslot);
+ +            unsigned int npages);
   void *addr_gpa2hva(struct kvm_vm *vm, vm_paddr_t gpa);
   void *addr_gva2hva(struct kvm_vm *vm, vm_vaddr_t gva);
   vm_paddr_t addr_hva2gpa(struct kvm_vm *vm, void *hva);
@@@ -239,7 -238,7 +240,7 @@@ int kvm_device_access(int dev_fd, uint3
   
   const char *exit_reason_str(unsigned int exit_reason);
   
- -void virt_pgd_alloc(struct kvm_vm *vm, uint32_t pgd_memslot);
+ +void virt_pgd_alloc(struct kvm_vm *vm);
   
   /*
    * VM Virtual Page Map
@@@ -257,13 -256,13 +258,13 @@@
    * Within @vm, creates a virtual translation for the page starting
    * at @vaddr to the page starting at @paddr.
    */
- -void virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
- -               uint32_t memslot);
+ +void virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr);
   
   vm_paddr_t vm_phy_page_alloc(struct kvm_vm *vm, vm_paddr_t paddr_min,
                              uint32_t memslot);
   vm_paddr_t vm_phy_pages_alloc(struct kvm_vm *vm, size_t num,
                               vm_paddr_t paddr_min, uint32_t memslot);
+ +vm_paddr_t vm_alloc_page_table(struct kvm_vm *vm);
   
   /*
    * Create a VM with reasonable defaults
@@@ -353,7 -352,6 +354,7 @@@ enum 
         UCALL_SYNC,
         UCALL_ABORT,
         UCALL_DONE,
+ +      UCALL_UNHANDLED,
   };
   
   #define UCALL_MAX_ARGS 6
@@@ -372,31 -370,26 +373,31 @@@ uint64_t get_ucall(struct kvm_vm *vm, u
                                 ucall(UCALL_SYNC, 6, "hello", stage, arg1, arg2, arg3, arg4)
   #define GUEST_SYNC(stage)     ucall(UCALL_SYNC, 2, "hello", stage)
   #define GUEST_DONE()          ucall(UCALL_DONE, 0)
- -#define __GUEST_ASSERT(_condition, _nargs, _args...) do {     \
- -      if (!(_condition))                                      \
- -              ucall(UCALL_ABORT, 2 + _nargs,                  \
- -                      "Failed guest assert: "                 \
- -                      #_condition, __LINE__, _args);          \
+ +#define __GUEST_ASSERT(_condition, _condstr, _nargs, _args...) do {    \
+ +      if (!(_condition))                                              \
+ +              ucall(UCALL_ABORT, 2 + _nargs,                          \
+ +                      "Failed guest assert: "                         \
+ +                      _condstr, __LINE__, _args);                     \
   } while (0)
   
   #define GUEST_ASSERT(_condition) \
- -      __GUEST_ASSERT((_condition), 0, 0)
+ +      __GUEST_ASSERT(_condition, #_condition, 0, 0)
   
   #define GUEST_ASSERT_1(_condition, arg1) \
- -      __GUEST_ASSERT((_condition), 1, (arg1))
+ +      __GUEST_ASSERT(_condition, #_condition, 1, (arg1))
   
   #define GUEST_ASSERT_2(_condition, arg1, arg2) \
- -      __GUEST_ASSERT((_condition), 2, (arg1), (arg2))
+ +      __GUEST_ASSERT(_condition, #_condition, 2, (arg1), (arg2))
   
   #define GUEST_ASSERT_3(_condition, arg1, arg2, arg3) \
- -      __GUEST_ASSERT((_condition), 3, (arg1), (arg2), (arg3))
+ +      __GUEST_ASSERT(_condition, #_condition, 3, (arg1), (arg2), (arg3))
   
   #define GUEST_ASSERT_4(_condition, arg1, arg2, arg3, arg4) \
- -      __GUEST_ASSERT((_condition), 4, (arg1), (arg2), (arg3), (arg4))
+ +      __GUEST_ASSERT(_condition, #_condition, 4, (arg1), (arg2), (arg3), (arg4))
+ +
+ +#define GUEST_ASSERT_EQ(a, b) __GUEST_ASSERT((a) == (b), #a " == " #b, 2, a, b)
+ +
+ +int vm_get_stats_fd(struct kvm_vm *vm);
+ +int vcpu_get_stats_fd(struct kvm_vm *vm, uint32_t vcpuid);
   
   #endif /* SELFTEST_KVM_UTIL_H */
diff --combined tools/testing/selftests/kvm/lib/kvm_util.c

index 5b56b57,8606000..10a8ed6
--- 1/tools/testing/selftests/kvm/lib/kvm_util.c
--- 2/tools/testing/selftests/kvm/lib/kvm_util.c
+++ b/tools/testing/selftests/kvm/lib/kvm_util.c
@@@ -176,6 -176,7 +176,7 @@@ const char *vm_guest_mode_string(uint32
                 [VM_MODE_P40V48_64K]    = "PA-bits:40,  VA-bits:48, 64K pages",
                 [VM_MODE_PXXV48_4K]     = "PA-bits:ANY, VA-bits:48,  4K pages",
                 [VM_MODE_P47V64_4K]     = "PA-bits:47,  VA-bits:64,  4K pages",
+               [VM_MODE_P44V64_4K]     = "PA-bits:44,  VA-bits:64,  4K pages",
         };
         _Static_assert(sizeof(strings)/sizeof(char *) == NUM_VM_MODES,
                        "Missing new mode strings?");
@@@ -194,6 -195,7 +195,7 @@@ const struct vm_guest_mode_params vm_gu
         { 40, 48, 0x10000, 16 },
         {  0,  0,  0x1000, 12 },
         { 47, 64,  0x1000, 12 },
+       { 44, 64,  0x1000, 12 },
   };
   _Static_assert(sizeof(vm_guest_mode_params)/sizeof(struct vm_guest_mode_params) == NUM_VM_MODES,
                "Missing new mode params?");
@@@ -282,6 -284,9 +284,9 @@@ struct kvm_vm *vm_create(enum vm_guest_
         case VM_MODE_P47V64_4K:
                 vm->pgtable_levels = 5;
                 break;
+       case VM_MODE_P44V64_4K:
+               vm->pgtable_levels = 5;
+               break;
         default:
                 TEST_FAIL("Unknown guest mode, mode: 0x%x", mode);
         }
@@@ -365,7 -370,7 +370,7 @@@ struct kvm_vm *vm_create_with_vcpus(enu
         pages = vm_adjust_num_guest_pages(mode, pages);
         vm = vm_create(mode, pages, O_RDWR);
   
- -      kvm_vm_elf_load(vm, program_invocation_name, 0, 0);
+ +      kvm_vm_elf_load(vm, program_invocation_name);
   
   #ifdef __x86_64__
         vm_create_irqchip(vm);
@@@ -375,6 -380,10 +380,6 @@@
                 uint32_t vcpuid = vcpuids ? vcpuids[i] : i;
   
                 vm_vcpu_add_default(vm, vcpuid, guest_code);
- -
- -#ifdef __x86_64__
- -              vcpu_set_cpuid(vm, vcpuid, kvm_get_supported_cpuid());
- -#endif
         }
   
         return vm;
@@@ -1247,13 -1256,15 +1252,13 @@@ va_found
    * a unique set of pages, with the minimum real allocation being at least
    * a page.
    */
- -vm_vaddr_t vm_vaddr_alloc(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min,
- -                        uint32_t data_memslot, uint32_t pgd_memslot)
+ +vm_vaddr_t vm_vaddr_alloc(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min)
   {
         uint64_t pages = (sz >> vm->page_shift) + ((sz % vm->page_size) != 0);
   
- -      virt_pgd_alloc(vm, pgd_memslot);
+ +      virt_pgd_alloc(vm);
         vm_paddr_t paddr = vm_phy_pages_alloc(vm, pages,
- -                                            KVM_UTIL_MIN_PFN * vm->page_size,
- -                                            data_memslot);
+ +                                            KVM_UTIL_MIN_PFN * vm->page_size, 0);
   
         /*
          * Find an unused range of virtual page addresses of at least
@@@ -1265,7 -1276,7 +1270,7 @@@
         for (vm_vaddr_t vaddr = vaddr_start; pages > 0;
                 pages--, vaddr += vm->page_size, paddr += vm->page_size) {
   
- -              virt_pg_map(vm, vaddr, paddr, pgd_memslot);
+ +              virt_pg_map(vm, vaddr, paddr);
   
                 sparsebit_set(vm->vpages_mapped,
                         vaddr >> vm->page_shift);
@@@ -1275,44 -1286,6 +1280,44 @@@
   }
   
   /*
+ + * VM Virtual Address Allocate Pages
+ + *
+ + * Input Args:
+ + *   vm - Virtual Machine
+ + *
+ + * Output Args: None
+ + *
+ + * Return:
+ + *   Starting guest virtual address
+ + *
+ + * Allocates at least N system pages worth of bytes within the virtual address
+ + * space of the vm.
+ + */
+ +vm_vaddr_t vm_vaddr_alloc_pages(struct kvm_vm *vm, int nr_pages)
+ +{
+ +      return vm_vaddr_alloc(vm, nr_pages * getpagesize(), KVM_UTIL_MIN_VADDR);
+ +}
+ +
+ +/*
+ + * VM Virtual Address Allocate Page
+ + *
+ + * Input Args:
+ + *   vm - Virtual Machine
+ + *
+ + * Output Args: None
+ + *
+ + * Return:
+ + *   Starting guest virtual address
+ + *
+ + * Allocates at least one system page worth of bytes within the virtual address
+ + * space of the vm.
+ + */
+ +vm_vaddr_t vm_vaddr_alloc_page(struct kvm_vm *vm)
+ +{
+ +      return vm_vaddr_alloc_pages(vm, 1);
+ +}
+ +
+ +/*
    * Map a range of VM virtual address to the VM's physical address
    *
    * Input Args:
@@@ -1330,7 -1303,7 +1335,7 @@@
    * @npages starting at @vaddr to the page range starting at @paddr.
    */
   void virt_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
- -            unsigned int npages, uint32_t pgd_memslot)
+ +            unsigned int npages)
   {
         size_t page_size = vm->page_size;
         size_t size = npages * page_size;
@@@ -1339,7 -1312,7 +1344,7 @@@
         TEST_ASSERT(paddr + size > paddr, "Paddr overflow");
   
         while (npages--) {
- -              virt_pg_map(vm, vaddr, paddr, pgd_memslot);
+ +              virt_pg_map(vm, vaddr, paddr);
                 vaddr += page_size;
                 paddr += page_size;
         }
@@@ -2209,14 -2182,6 +2214,14 @@@ vm_paddr_t vm_phy_page_alloc(struct kvm
         return vm_phy_pages_alloc(vm, 1, paddr_min, memslot);
   }
   
+ +/* Arbitrary minimum physical address used for virtual translation tables. */
+ +#define KVM_GUEST_PAGE_TABLE_MIN_PADDR 0x180000
+ +
+ +vm_paddr_t vm_alloc_page_table(struct kvm_vm *vm)
+ +{
+ +      return vm_phy_page_alloc(vm, KVM_GUEST_PAGE_TABLE_MIN_PADDR, 0);
+ +}
+ +
   /*
    * Address Guest Virtual to Host Virtual
    *
@@@ -2326,15 -2291,3 +2331,15 @@@ unsigned int vm_calc_num_guest_pages(en
         n = DIV_ROUND_UP(size, vm_guest_mode_params[mode].page_size);
         return vm_adjust_num_guest_pages(mode, n);
   }
+ +
+ +int vm_get_stats_fd(struct kvm_vm *vm)
+ +{
+ +      return ioctl(vm->fd, KVM_GET_STATS_FD, NULL);
+ +}
+ +
+ +int vcpu_get_stats_fd(struct kvm_vm *vm, uint32_t vcpuid)
+ +{
+ +      struct vcpu *vcpu = vcpu_find(vm, vcpuid);
+ +
+ +      return ioctl(vcpu->fd, KVM_GET_STATS_FD, NULL);
+ +}
diff --combined tools/testing/selftests/kvm/set_memory_region_test.c

index d79d58e,d31f54a..72a1c9b
--- 1/tools/testing/selftests/kvm/set_memory_region_test.c
--- 2/tools/testing/selftests/kvm/set_memory_region_test.c
+++ b/tools/testing/selftests/kvm/set_memory_region_test.c
@@@ -132,7 -132,7 +132,7 @@@ static struct kvm_vm *spawn_vm(pthread_
         gpa = vm_phy_pages_alloc(vm, 2, MEM_REGION_GPA, MEM_REGION_SLOT);
         TEST_ASSERT(gpa == MEM_REGION_GPA, "Failed vm_phy_pages_alloc\n");
   
- -      virt_map(vm, MEM_REGION_GPA, MEM_REGION_GPA, 2, 0);
+ +      virt_map(vm, MEM_REGION_GPA, MEM_REGION_GPA, 2);
   
         /* Ditto for the host mapping so that both pages can be zeroed. */
         hva = addr_gpa2hva(vm, MEM_REGION_GPA);
@@@ -376,8 -376,9 +376,9 @@@ static void test_add_max_memory_regions
         pr_info("Adding slots 0..%i, each memory region with %dK size\n",
                 (max_mem_slots - 1), MEM_REGION_SIZE >> 10);
   
-       mem = mmap(NULL, MEM_REGION_SIZE * max_mem_slots + alignment,
-                  PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+       mem = mmap(NULL, (size_t)max_mem_slots * MEM_REGION_SIZE + alignment,
+                  PROT_READ | PROT_WRITE,
+                  MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE, -1, 0);
         TEST_ASSERT(mem != MAP_FAILED, "Failed to mmap() host");
         mem_aligned = (void *)(((size_t) mem + alignment - 1) & ~(alignment - 1));
   
@@@ -401,7 -402,7 +402,7 @@@
         TEST_ASSERT(ret == -1 && errno == EINVAL,
                     "Adding one more memory slot should fail with EINVAL");
   
-       munmap(mem, MEM_REGION_SIZE * max_mem_slots + alignment);
+       munmap(mem, (size_t)max_mem_slots * MEM_REGION_SIZE + alignment);
         munmap(mem_extra, MEM_REGION_SIZE);
         kvm_vm_free(vm);
   }
diff --combined virt/kvm/kvm_main.c

index 3dcc2ab,46fb042..f7445c3
--- 1/virt/kvm/kvm_main.c
--- 2/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@@ -51,7 -51,6 +51,7 @@@
   #include <linux/io.h>
   #include <linux/lockdep.h>
   #include <linux/kthread.h>
+ +#include <linux/suspend.h>
   
   #include <asm/processor.h>
   #include <asm/ioctl.h>
@@@ -115,6 -114,7 +115,6 @@@ static DEFINE_PER_CPU(struct kvm_vcpu *
   struct dentry *kvm_debugfs_dir;
   EXPORT_SYMBOL_GPL(kvm_debugfs_dir);
   
- -static int kvm_debugfs_num_entries;
   static const struct file_operations stat_fops_per_vm;
   
   static long kvm_vcpu_ioctl(struct file *file, unsigned int ioctl,
@@@ -331,7 -331,7 +331,7 @@@ void kvm_flush_remote_tlbs(struct kvm *
          */
         if (!kvm_arch_flush_remote_tlb(kvm)
             || kvm_make_all_cpus_request(kvm, KVM_REQ_TLB_FLUSH))
- -              ++kvm->stat.remote_tlb_flush;
+ +              ++kvm->stat.generic.remote_tlb_flush;
         cmpxchg(&kvm->tlbs_dirty, dirty_count, 0);
   }
   EXPORT_SYMBOL_GPL(kvm_flush_remote_tlbs);
@@@ -780,38 -780,6 +780,38 @@@ static int kvm_init_mmu_notifier(struc
   
   #endif /* CONFIG_MMU_NOTIFIER && KVM_ARCH_WANT_MMU_NOTIFIER */
   
+ +#ifdef CONFIG_HAVE_KVM_PM_NOTIFIER
+ +static int kvm_pm_notifier_call(struct notifier_block *bl,
+ +                              unsigned long state,
+ +                              void *unused)
+ +{
+ +      struct kvm *kvm = container_of(bl, struct kvm, pm_notifier);
+ +
+ +      return kvm_arch_pm_notifier(kvm, state);
+ +}
+ +
+ +static void kvm_init_pm_notifier(struct kvm *kvm)
+ +{
+ +      kvm->pm_notifier.notifier_call = kvm_pm_notifier_call;
+ +      /* Suspend KVM before we suspend ftrace, RCU, etc. */
+ +      kvm->pm_notifier.priority = INT_MAX;
+ +      register_pm_notifier(&kvm->pm_notifier);
+ +}
+ +
+ +static void kvm_destroy_pm_notifier(struct kvm *kvm)
+ +{
+ +      unregister_pm_notifier(&kvm->pm_notifier);
+ +}
+ +#else /* !CONFIG_HAVE_KVM_PM_NOTIFIER */
+ +static void kvm_init_pm_notifier(struct kvm *kvm)
+ +{
+ +}
+ +
+ +static void kvm_destroy_pm_notifier(struct kvm *kvm)
+ +{
+ +}
+ +#endif /* CONFIG_HAVE_KVM_PM_NOTIFIER */
+ +
   static struct kvm_memslots *kvm_alloc_memslots(void)
   {
         int i;
@@@ -859,24 -827,9 +859,24 @@@ static void kvm_free_memslots(struct kv
         kvfree(slots);
   }
   
+ +static umode_t kvm_stats_debugfs_mode(const struct _kvm_stats_desc *pdesc)
+ +{
+ +      switch (pdesc->desc.flags & KVM_STATS_TYPE_MASK) {
+ +      case KVM_STATS_TYPE_INSTANT:
+ +              return 0444;
+ +      case KVM_STATS_TYPE_CUMULATIVE:
+ +      case KVM_STATS_TYPE_PEAK:
+ +      default:
+ +              return 0644;
+ +      }
+ +}
+ +
+ +
   static void kvm_destroy_vm_debugfs(struct kvm *kvm)
   {
         int i;
+ +      int kvm_debugfs_num_entries = kvm_vm_stats_header.num_desc +
+ +                                    kvm_vcpu_stats_header.num_desc;
   
         if (!kvm->debugfs_dentry)
                 return;
@@@ -894,10 -847,7 +894,10 @@@ static int kvm_create_vm_debugfs(struc
   {
         char dir_name[ITOA_MAX_LEN * 2];
         struct kvm_stat_data *stat_data;
- -      struct kvm_stats_debugfs_item *p;
+ +      const struct _kvm_stats_desc *pdesc;
+ +      int i;
+ +      int kvm_debugfs_num_entries = kvm_vm_stats_header.num_desc +
+ +                                    kvm_vcpu_stats_header.num_desc;
   
         if (!debugfs_initialized())
                 return 0;
@@@ -911,32 -861,15 +911,32 @@@
         if (!kvm->debugfs_stat_data)
                 return -ENOMEM;
   
- -      for (p = debugfs_entries; p->name; p++) {
+ +      for (i = 0; i < kvm_vm_stats_header.num_desc; ++i) {
+ +              pdesc = &kvm_vm_stats_desc[i];
+ +              stat_data = kzalloc(sizeof(*stat_data), GFP_KERNEL_ACCOUNT);
+ +              if (!stat_data)
+ +                      return -ENOMEM;
+ +
+ +              stat_data->kvm = kvm;
+ +              stat_data->desc = pdesc;
+ +              stat_data->kind = KVM_STAT_VM;
+ +              kvm->debugfs_stat_data[i] = stat_data;
+ +              debugfs_create_file(pdesc->name, kvm_stats_debugfs_mode(pdesc),
+ +                                  kvm->debugfs_dentry, stat_data,
+ +                                  &stat_fops_per_vm);
+ +      }
+ +
+ +      for (i = 0; i < kvm_vcpu_stats_header.num_desc; ++i) {
+ +              pdesc = &kvm_vcpu_stats_desc[i];
                 stat_data = kzalloc(sizeof(*stat_data), GFP_KERNEL_ACCOUNT);
                 if (!stat_data)
                         return -ENOMEM;
   
                 stat_data->kvm = kvm;
- -              stat_data->dbgfs_item = p;
- -              kvm->debugfs_stat_data[p - debugfs_entries] = stat_data;
- -              debugfs_create_file(p->name, KVM_DBGFS_GET_MODE(p),
+ +              stat_data->desc = pdesc;
+ +              stat_data->kind = KVM_STAT_VCPU;
+ +              kvm->debugfs_stat_data[i] = stat_data;
+ +              debugfs_create_file(pdesc->name, kvm_stats_debugfs_mode(pdesc),
                                     kvm->debugfs_dentry, stat_data,
                                     &stat_fops_per_vm);
         }
@@@ -976,7 -909,6 +976,7 @@@ static struct kvm *kvm_create_vm(unsign
         mutex_init(&kvm->lock);
         mutex_init(&kvm->irq_lock);
         mutex_init(&kvm->slots_lock);
+ +      mutex_init(&kvm->slots_arch_lock);
         INIT_LIST_HEAD(&kvm->devices);
   
         BUILD_BUG_ON(KVM_MEM_SLOTS_NUM > SHRT_MAX);
@@@ -1031,7 -963,6 +1031,7 @@@
         mutex_unlock(&kvm_lock);
   
         preempt_notifier_inc();
+ +      kvm_init_pm_notifier(kvm);
   
         return kvm;
   
@@@ -1079,7 -1010,6 +1079,7 @@@ static void kvm_destroy_vm(struct kvm *
         int i;
         struct mm_struct *mm = kvm->mm;
   
+ +      kvm_destroy_pm_notifier(kvm);
         kvm_uevent_notify_change(KVM_EVENT_DESTROY_VM, kvm);
         kvm_destroy_vm_debugfs(kvm);
         kvm_arch_sync_events(kvm);
@@@ -1351,14 -1281,6 +1351,14 @@@ static struct kvm_memslots *install_new
         slots->generation = gen | KVM_MEMSLOT_GEN_UPDATE_IN_PROGRESS;
   
         rcu_assign_pointer(kvm->memslots[as_id], slots);
+ +
+ +      /*
+ +       * Acquired in kvm_set_memslot. Must be released before synchronize
+ +       * SRCU below in order to avoid deadlock with another thread
+ +       * acquiring the slots_arch_lock in an srcu critical section.
+ +       */
+ +      mutex_unlock(&kvm->slots_arch_lock);
+ +
         synchronize_srcu_expedited(&kvm->srcu);
   
         /*
@@@ -1385,18 -1307,6 +1385,18 @@@
         return old_memslots;
   }
   
+ +static size_t kvm_memslots_size(int slots)
+ +{
+ +      return sizeof(struct kvm_memslots) +
+ +             (sizeof(struct kvm_memory_slot) * slots);
+ +}
+ +
+ +static void kvm_copy_memslots(struct kvm_memslots *to,
+ +                            struct kvm_memslots *from)
+ +{
+ +      memcpy(to, from, kvm_memslots_size(from->used_slots));
+ +}
+ +
   /*
    * Note, at a minimum, the current number of used slots must be allocated, even
    * when deleting a memslot, as we need a complete duplicate of the memslots for
@@@ -1406,16 -1316,19 +1406,16 @@@ static struct kvm_memslots *kvm_dup_mem
                                              enum kvm_mr_change change)
   {
         struct kvm_memslots *slots;
- -      size_t old_size, new_size;
- -
- -      old_size = sizeof(struct kvm_memslots) +
- -                 (sizeof(struct kvm_memory_slot) * old->used_slots);
+ +      size_t new_size;
   
         if (change == KVM_MR_CREATE)
- -              new_size = old_size + sizeof(struct kvm_memory_slot);
+ +              new_size = kvm_memslots_size(old->used_slots + 1);
         else
- -              new_size = old_size;
+ +              new_size = kvm_memslots_size(old->used_slots);
   
         slots = kvzalloc(new_size, GFP_KERNEL_ACCOUNT);
         if (likely(slots))
- -              memcpy(slots, old, old_size);
+ +              kvm_copy_memslots(slots, old);
   
         return slots;
   }
@@@ -1430,27 -1343,9 +1430,27 @@@ static int kvm_set_memslot(struct kvm *
         struct kvm_memslots *slots;
         int r;
   
+ +      /*
+ +       * Released in install_new_memslots.
+ +       *
+ +       * Must be held from before the current memslots are copied until
+ +       * after the new memslots are installed with rcu_assign_pointer,
+ +       * then released before the synchronize srcu in install_new_memslots.
+ +       *
+ +       * When modifying memslots outside of the slots_lock, must be held
+ +       * before reading the pointer to the current memslots until after all
+ +       * changes to those memslots are complete.
+ +       *
+ +       * These rules ensure that installing new memslots does not lose
+ +       * changes made to the previous memslots.
+ +       */
+ +      mutex_lock(&kvm->slots_arch_lock);
+ +
         slots = kvm_dup_memslots(__kvm_memslots(kvm, as_id), change);
- -      if (!slots)
+ +      if (!slots) {
+ +              mutex_unlock(&kvm->slots_arch_lock);
                 return -ENOMEM;
+ +      }
   
         if (change == KVM_MR_DELETE || change == KVM_MR_MOVE) {
                 /*
@@@ -1461,9 -1356,10 +1461,9 @@@
                 slot->flags |= KVM_MEMSLOT_INVALID;
   
                 /*
- -               * We can re-use the old memslots, the only difference from the
- -               * newly installed memslots is the invalid flag, which will get
- -               * dropped by update_memslots anyway.  We'll also revert to the
- -               * old memslots if preparing the new memory region fails.
+ +               * We can re-use the memory from the old memslots.
+ +               * It will be overwritten with a copy of the new memslots
+ +               * after reacquiring the slots_arch_lock below.
                  */
                 slots = install_new_memslots(kvm, as_id, slots);
   
@@@ -1475,17 -1371,6 +1475,17 @@@
                  *      - kvm_is_visible_gfn (mmu_check_root)
                  */
                 kvm_arch_flush_shadow_memslot(kvm, slot);
+ +
+ +              /* Released in install_new_memslots. */
+ +              mutex_lock(&kvm->slots_arch_lock);
+ +
+ +              /*
+ +               * The arch-specific fields of the memslots could have changed
+ +               * between releasing the slots_arch_lock in
+ +               * install_new_memslots and here, so get a fresh copy of the
+ +               * slots.
+ +               */
+ +              kvm_copy_memslots(slots, __kvm_memslots(kvm, as_id));
         }
   
         r = kvm_arch_prepare_memory_region(kvm, new, mem, change);
@@@ -1501,13 -1386,8 +1501,13 @@@
         return 0;
   
   out_slots:
- -      if (change == KVM_MR_DELETE || change == KVM_MR_MOVE)
+ +      if (change == KVM_MR_DELETE || change == KVM_MR_MOVE) {
+ +              slot = id_to_memslot(slots, old->id);
+ +              slot->flags &= ~KVM_MEMSLOT_INVALID;
                 slots = install_new_memslots(kvm, as_id, slots);
+ +      } else {
+ +              mutex_unlock(&kvm->slots_arch_lock);
+ +      }
         kvfree(slots);
         return r;
   }
@@@ -2175,6 -2055,13 +2175,13 @@@ static bool vma_is_valid(struct vm_area
         return true;
   }
   
+ static int kvm_try_get_pfn(kvm_pfn_t pfn)
+ {
+       if (kvm_is_reserved_pfn(pfn))
+               return 1;
+       return get_page_unless_zero(pfn_to_page(pfn));
+ }
+ 
   static int hva_to_pfn_remapped(struct vm_area_struct *vma,
                                unsigned long addr, bool *async,
                                bool write_fault, bool *writable,
@@@ -2224,13 -2111,21 +2231,21 @@@
          * Whoever called remap_pfn_range is also going to call e.g.
          * unmap_mapping_range before the underlying pages are freed,
          * causing a call to our MMU notifier.
+        *
+        * Certain IO or PFNMAP mappings can be backed with valid
+        * struct pages, but be allocated without refcounting e.g.,
+        * tail pages of non-compound higher order allocations, which
+        * would then underflow the refcount when the caller does the
+        * required put_page. Don't allow those pages here.
          */ 
-       kvm_get_pfn(pfn);
+       if (!kvm_try_get_pfn(pfn))
+               r = -EFAULT;
   
   out:
         pte_unmap_unlock(ptep, ptl);
         *p_pfn = pfn;
-       return 0;
+ 
+       return r;
   }
   
   /*
@@@ -3063,9 -2958,9 +3078,9 @@@ static inline voi
   update_halt_poll_stats(struct kvm_vcpu *vcpu, u64 poll_ns, bool waited)
   {
         if (waited)
- -              vcpu->stat.halt_poll_fail_ns += poll_ns;
+ +              vcpu->stat.generic.halt_poll_fail_ns += poll_ns;
         else
- -              vcpu->stat.halt_poll_success_ns += poll_ns;
+ +              vcpu->stat.generic.halt_poll_success_ns += poll_ns;
   }
   
   /*
@@@ -3083,16 -2978,16 +3098,16 @@@ void kvm_vcpu_block(struct kvm_vcpu *vc
         if (vcpu->halt_poll_ns && !kvm_arch_no_poll(vcpu)) {
                 ktime_t stop = ktime_add_ns(ktime_get(), vcpu->halt_poll_ns);
   
- -              ++vcpu->stat.halt_attempted_poll;
+ +              ++vcpu->stat.generic.halt_attempted_poll;
                 do {
                         /*
                          * This sets KVM_REQ_UNHALT if an interrupt
                          * arrives.
                          */
                         if (kvm_vcpu_check_block(vcpu) < 0) {
- -                              ++vcpu->stat.halt_successful_poll;
+ +                              ++vcpu->stat.generic.halt_successful_poll;
                                 if (!vcpu_valid_wakeup(vcpu))
- -                                      ++vcpu->stat.halt_poll_invalid;
+ +                                      ++vcpu->stat.generic.halt_poll_invalid;
                                 goto out;
                         }
                         poll_end = cur = ktime_get();
@@@ -3149,7 -3044,7 +3164,7 @@@ bool kvm_vcpu_wake_up(struct kvm_vcpu *
         waitp = kvm_arch_vcpu_get_wait(vcpu);
         if (rcuwait_wake_up(waitp)) {
                 WRITE_ONCE(vcpu->ready, true);
- -              ++vcpu->stat.halt_wakeup;
+ +              ++vcpu->stat.generic.halt_wakeup;
                 return true;
         }
   
@@@ -3482,10 -3377,6 +3497,10 @@@ static int kvm_vm_ioctl_create_vcpu(str
         vcpu->vcpu_idx = atomic_read(&kvm->online_vcpus);
         BUG_ON(kvm->vcpus[vcpu->vcpu_idx]);
   
+ +      /* Fill the stats id string for the vcpu */
+ +      snprintf(vcpu->stats_id, sizeof(vcpu->stats_id), "kvm-%d/vcpu-%d",
+ +               task_pid_nr(current), id);
+ +
         /* Now it's all set up, let userspace reach it */
         kvm_get_kvm(kvm);
         r = create_vcpu_fd(vcpu);
@@@ -3535,44 -3426,6 +3550,44 @@@ static int kvm_vcpu_ioctl_set_sigmask(s
         return 0;
   }
   
+ +static ssize_t kvm_vcpu_stats_read(struct file *file, char __user *user_buffer,
+ +                            size_t size, loff_t *offset)
+ +{
+ +      struct kvm_vcpu *vcpu = file->private_data;
+ +
+ +      return kvm_stats_read(vcpu->stats_id, &kvm_vcpu_stats_header,
+ +                      &kvm_vcpu_stats_desc[0], &vcpu->stat,
+ +                      sizeof(vcpu->stat), user_buffer, size, offset);
+ +}
+ +
+ +static const struct file_operations kvm_vcpu_stats_fops = {
+ +      .read = kvm_vcpu_stats_read,
+ +      .llseek = noop_llseek,
+ +};
+ +
+ +static int kvm_vcpu_ioctl_get_stats_fd(struct kvm_vcpu *vcpu)
+ +{
+ +      int fd;
+ +      struct file *file;
+ +      char name[15 + ITOA_MAX_LEN + 1];
+ +
+ +      snprintf(name, sizeof(name), "kvm-vcpu-stats:%d", vcpu->vcpu_id);
+ +
+ +      fd = get_unused_fd_flags(O_CLOEXEC);
+ +      if (fd < 0)
+ +              return fd;
+ +
+ +      file = anon_inode_getfile(name, &kvm_vcpu_stats_fops, vcpu, O_RDONLY);
+ +      if (IS_ERR(file)) {
+ +              put_unused_fd(fd);
+ +              return PTR_ERR(file);
+ +      }
+ +      file->f_mode |= FMODE_PREAD;
+ +      fd_install(fd, file);
+ +
+ +      return fd;
+ +}
+ +
   static long kvm_vcpu_ioctl(struct file *filp,
                            unsigned int ioctl, unsigned long arg)
   {
@@@ -3770,10 -3623,6 +3785,10 @@@ out_free1
                 r = kvm_arch_vcpu_ioctl_set_fpu(vcpu, fpu);
                 break;
         }
+ +      case KVM_GET_STATS_FD: {
+ +              r = kvm_vcpu_ioctl_get_stats_fd(vcpu);
+ +              break;
+ +      }
         default:
                 r = kvm_arch_vcpu_ioctl(filp, ioctl, arg);
         }
@@@ -4032,8 -3881,6 +4047,8 @@@ static long kvm_vm_ioctl_check_extensio
   #else
                 return 0;
   #endif
+ +      case KVM_CAP_BINARY_STATS_FD:
+ +              return 1;
         default:
                 break;
         }
@@@ -4137,42 -3984,6 +4152,42 @@@ static int kvm_vm_ioctl_enable_cap_gene
         }
   }
   
+ +static ssize_t kvm_vm_stats_read(struct file *file, char __user *user_buffer,
+ +                            size_t size, loff_t *offset)
+ +{
+ +      struct kvm *kvm = file->private_data;
+ +
+ +      return kvm_stats_read(kvm->stats_id, &kvm_vm_stats_header,
+ +                              &kvm_vm_stats_desc[0], &kvm->stat,
+ +                              sizeof(kvm->stat), user_buffer, size, offset);
+ +}
+ +
+ +static const struct file_operations kvm_vm_stats_fops = {
+ +      .read = kvm_vm_stats_read,
+ +      .llseek = noop_llseek,
+ +};
+ +
+ +static int kvm_vm_ioctl_get_stats_fd(struct kvm *kvm)
+ +{
+ +      int fd;
+ +      struct file *file;
+ +
+ +      fd = get_unused_fd_flags(O_CLOEXEC);
+ +      if (fd < 0)
+ +              return fd;
+ +
+ +      file = anon_inode_getfile("kvm-vm-stats",
+ +                      &kvm_vm_stats_fops, kvm, O_RDONLY);
+ +      if (IS_ERR(file)) {
+ +              put_unused_fd(fd);
+ +              return PTR_ERR(file);
+ +      }
+ +      file->f_mode |= FMODE_PREAD;
+ +      fd_install(fd, file);
+ +
+ +      return fd;
+ +}
+ +
   static long kvm_vm_ioctl(struct file *filp,
                            unsigned int ioctl, unsigned long arg)
   {
@@@ -4355,9 -4166,6 +4370,9 @@@
         case KVM_RESET_DIRTY_RINGS:
                 r = kvm_vm_ioctl_reset_dirty_pages(kvm);
                 break;
+ +      case KVM_GET_STATS_FD:
+ +              r = kvm_vm_ioctl_get_stats_fd(kvm);
+ +              break;
         default:
                 r = kvm_arch_vm_ioctl(filp, ioctl, arg);
         }
@@@ -4437,9 -4245,6 +4452,9 @@@ static int kvm_dev_ioctl_create_vm(unsi
         if (r < 0)
                 goto put_kvm;
   
+ +      snprintf(kvm->stats_id, sizeof(kvm->stats_id),
+ +                      "kvm-%d", task_pid_nr(current));
+ +
         file = anon_inode_getfile("kvm-vm", &kvm_vm_fops, kvm, O_RDWR);
         if (IS_ERR(file)) {
                 put_unused_fd(r);
@@@ -4934,7 -4739,7 +4949,7 @@@ static int kvm_debugfs_open(struct inod
                 return -ENOENT;
   
         if (simple_attr_open(inode, file, get,
- -                  KVM_DBGFS_GET_MODE(stat_data->dbgfs_item) & 0222
+ +                  kvm_stats_debugfs_mode(stat_data->desc) & 0222
                     ? set : NULL,
                     fmt)) {
                 kvm_put_kvm(stat_data->kvm);
@@@ -4957,14 -4762,14 +4972,14 @@@ static int kvm_debugfs_release(struct i
   
   static int kvm_get_stat_per_vm(struct kvm *kvm, size_t offset, u64 *val)
   {
- -      *val = *(ulong *)((void *)kvm + offset);
+ +      *val = *(u64 *)((void *)(&kvm->stat) + offset);
   
         return 0;
   }
   
   static int kvm_clear_stat_per_vm(struct kvm *kvm, size_t offset)
   {
- -      *(ulong *)((void *)kvm + offset) = 0;
+ +      *(u64 *)((void *)(&kvm->stat) + offset) = 0;
   
         return 0;
   }
@@@ -4977,7 -4782,7 +4992,7 @@@ static int kvm_get_stat_per_vcpu(struc
         *val = 0;
   
         kvm_for_each_vcpu(i, vcpu, kvm)
- -              *val += *(u64 *)((void *)vcpu + offset);
+ +              *val += *(u64 *)((void *)(&vcpu->stat) + offset);
   
         return 0;
   }
@@@ -4988,7 -4793,7 +5003,7 @@@ static int kvm_clear_stat_per_vcpu(stru
         struct kvm_vcpu *vcpu;
   
         kvm_for_each_vcpu(i, vcpu, kvm)
- -              *(u64 *)((void *)vcpu + offset) = 0;
+ +              *(u64 *)((void *)(&vcpu->stat) + offset) = 0;
   
         return 0;
   }
@@@ -4998,14 -4803,14 +5013,14 @@@ static int kvm_stat_data_get(void *data
         int r = -EFAULT;
         struct kvm_stat_data *stat_data = (struct kvm_stat_data *)data;
   
- -      switch (stat_data->dbgfs_item->kind) {
+ +      switch (stat_data->kind) {
         case KVM_STAT_VM:
                 r = kvm_get_stat_per_vm(stat_data->kvm,
- -                                      stat_data->dbgfs_item->offset, val);
+ +                                      stat_data->desc->desc.offset, val);
                 break;
         case KVM_STAT_VCPU:
                 r = kvm_get_stat_per_vcpu(stat_data->kvm,
- -                                        stat_data->dbgfs_item->offset, val);
+ +                                        stat_data->desc->desc.offset, val);
                 break;
         }
   
@@@ -5020,14 -4825,14 +5035,14 @@@ static int kvm_stat_data_clear(void *da
         if (val)
                 return -EINVAL;
   
- -      switch (stat_data->dbgfs_item->kind) {
+ +      switch (stat_data->kind) {
         case KVM_STAT_VM:
                 r = kvm_clear_stat_per_vm(stat_data->kvm,
- -                                        stat_data->dbgfs_item->offset);
+ +                                        stat_data->desc->desc.offset);
                 break;
         case KVM_STAT_VCPU:
                 r = kvm_clear_stat_per_vcpu(stat_data->kvm,
- -                                          stat_data->dbgfs_item->offset);
+ +                                          stat_data->desc->desc.offset);
                 break;
         }
   
@@@ -5084,7 -4889,6 +5099,7 @@@ static int vm_stat_clear(void *_offset
   }
   
   DEFINE_SIMPLE_ATTRIBUTE(vm_stat_fops, vm_stat_get, vm_stat_clear, "%llu\n");
+ +DEFINE_SIMPLE_ATTRIBUTE(vm_stat_readonly_fops, vm_stat_get, NULL, "%llu\n");
   
   static int vcpu_stat_get(void *_offset, u64 *val)
   {
@@@ -5121,7 -4925,11 +5136,7 @@@ static int vcpu_stat_clear(void *_offse
   
   DEFINE_SIMPLE_ATTRIBUTE(vcpu_stat_fops, vcpu_stat_get, vcpu_stat_clear,
                         "%llu\n");
- -
- -static const struct file_operations *stat_fops[] = {
- -      [KVM_STAT_VCPU] = &vcpu_stat_fops,
- -      [KVM_STAT_VM]   = &vm_stat_fops,
- -};
+ +DEFINE_SIMPLE_ATTRIBUTE(vcpu_stat_readonly_fops, vcpu_stat_get, NULL, "%llu\n");
   
   static void kvm_uevent_notify_change(unsigned int type, struct kvm *kvm)
   {
@@@ -5175,32 -4983,15 +5190,32 @@@
   
   static void kvm_init_debug(void)
   {
- -      struct kvm_stats_debugfs_item *p;
+ +      const struct file_operations *fops;
+ +      const struct _kvm_stats_desc *pdesc;
+ +      int i;
   
         kvm_debugfs_dir = debugfs_create_dir("kvm", NULL);
   
- -      kvm_debugfs_num_entries = 0;
- -      for (p = debugfs_entries; p->name; ++p, kvm_debugfs_num_entries++) {
- -              debugfs_create_file(p->name, KVM_DBGFS_GET_MODE(p),
- -                                  kvm_debugfs_dir, (void *)(long)p->offset,
- -                                  stat_fops[p->kind]);
+ +      for (i = 0; i < kvm_vm_stats_header.num_desc; ++i) {
+ +              pdesc = &kvm_vm_stats_desc[i];
+ +              if (kvm_stats_debugfs_mode(pdesc) & 0222)
+ +                      fops = &vm_stat_fops;
+ +              else
+ +                      fops = &vm_stat_readonly_fops;
+ +              debugfs_create_file(pdesc->name, kvm_stats_debugfs_mode(pdesc),
+ +                              kvm_debugfs_dir,
+ +                              (void *)(long)pdesc->desc.offset, fops);
+ +      }
+ +
+ +      for (i = 0; i < kvm_vcpu_stats_header.num_desc; ++i) {
+ +              pdesc = &kvm_vcpu_stats_desc[i];
+ +              if (kvm_stats_debugfs_mode(pdesc) & 0222)
+ +                      fops = &vcpu_stat_fops;
+ +              else
+ +                      fops = &vcpu_stat_readonly_fops;
+ +              debugfs_create_file(pdesc->name, kvm_stats_debugfs_mode(pdesc),
+ +                              kvm_debugfs_dir,
+ +                              (void *)(long)pdesc->desc.offset, fops);
         }
   }
   
@@@ -5350,8 -5141,7 +5365,8 @@@ int kvm_init(void *opaque, unsigned vcp
                 kmem_cache_create_usercopy("kvm_vcpu", vcpu_size, vcpu_align,
                                            SLAB_ACCOUNT,
                                            offsetof(struct kvm_vcpu, arch),
- -                                         sizeof_field(struct kvm_vcpu, arch),
+ +                                         offsetofend(struct kvm_vcpu, stats_id)
+ +                                         - offsetof(struct kvm_vcpu, arch),
                                            NULL);
         if (!kvm_vcpu_cache) {
                 r = -ENOMEM;
author	Paolo Bonzini <pbonzini@redhat.com>
	Thu, 8 Jul 2021 17:15:57 +0000 (13:15 -0400)
committer	Paolo Bonzini <pbonzini@redhat.com>
	Wed, 14 Jul 2021 16:14:27 +0000 (12:14 -0400)
		1	2
MAINTAINERS	patch \|	diff1 \|	diff2 \|	blob \| history
arch/powerpc/kvm/book3s_hv_rm_mmu.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/irqchip/irq-gic-v3.c	patch \|	diff1 \|	diff2 \|	blob \| history
tools/testing/selftests/kvm/include/kvm_util.h	patch \|	diff1 \|	diff2 \|	blob \| history
tools/testing/selftests/kvm/lib/kvm_util.c	patch \|	diff1 \|	diff2 \|	blob \| history
tools/testing/selftests/kvm/set_memory_region_test.c	patch \|	diff1 \|	diff2 \|	blob \| history
virt/kvm/kvm_main.c	patch \|	diff1 \|	diff2 \|	blob \| history