Merge remote-tracking branch 'kvm/next' into kvm-next-5.20
authorPaolo Bonzini <pbonzini@redhat.com>
Fri, 29 Jul 2022 13:46:01 +0000 (09:46 -0400)
committerPaolo Bonzini <pbonzini@redhat.com>
Mon, 1 Aug 2022 07:21:00 +0000 (03:21 -0400)
KVM/s390, KVM/x86 and common infrastructure changes for 5.20

x86:

* Permit guests to ignore single-bit ECC errors

* Fix races in gfn->pfn cache refresh; do not pin pages tracked by the cache

* Intel IPI virtualization

* Allow getting/setting pending triple fault with KVM_GET/SET_VCPU_EVENTS

* PEBS virtualization

* Simplify PMU emulation by just using PERF_TYPE_RAW events

* More accurate event reinjection on SVM (avoid retrying instructions)

* Allow getting/setting the state of the speaker port data bit

* Refuse starting the kvm-intel module if VM-Entry/VM-Exit controls are inconsistent

* "Notify" VM exit (detect microarchitectural hangs) for Intel

* Cleanups for MCE MSR emulation

s390:

* add an interface to provide a hypervisor dump for secure guests

* improve selftests to use TAP interface

* enable interpretive execution of zPCI instructions (for PCI passthrough)

* First part of deferred teardown

* CPU Topology

* PV attestation

* Minor fixes

Generic:

* new selftests API using struct kvm_vcpu instead of a (vm, id) tuple

x86:

* Use try_cmpxchg64 instead of cmpxchg64

* Bugfixes

* Ignore benign host accesses to PMU MSRs when PMU is disabled

* Allow disabling KVM's "MONITOR/MWAIT are NOPs!" behavior

* x86/MMU: Allow NX huge pages to be disabled on a per-vm basis

* Port eager page splitting to shadow MMU as well

* Enable CMCI capability by default and handle injected UCNA errors

* Expose pid of vcpu threads in debugfs

* x2AVIC support for AMD

* cleanup PIO emulation

* Fixes for LLDT/LTR emulation

* Don't require refcounted "struct page" to create huge SPTEs

x86 cleanups:

* Use separate namespaces for guest PTEs and shadow PTEs bitmasks

* PIO emulation

* Reorganize rmap API, mostly around rmap destruction

* Do not workaround very old KVM bugs for L0 that runs with nesting enabled

* new selftests API for CPUID

23 files changed:
1  2 
Documentation/admin-guide/kernel-parameters.txt
Documentation/virt/kvm/api.rst
MAINTAINERS
arch/riscv/kvm/mmu.c
arch/x86/include/asm/cpufeatures.h
arch/x86/include/asm/msr-index.h
arch/x86/kvm/emulate.c
arch/x86/kvm/svm/sev.c
arch/x86/kvm/svm/svm.c
arch/x86/kvm/svm/svm.h
arch/x86/kvm/vmx/capabilities.h
arch/x86/kvm/vmx/nested.c
arch/x86/kvm/vmx/vmx.c
arch/x86/kvm/vmx/vmx.h
arch/x86/kvm/x86.c
drivers/s390/crypto/ap_bus.c
drivers/s390/virtio/virtio_ccw.c
include/linux/kvm_host.h
include/linux/kvm_types.h
include/uapi/linux/kvm.h
tools/testing/selftests/kvm/lib/aarch64/ucall.c
tools/testing/selftests/kvm/rseq_test.c
virt/kvm/kvm_main.c

Simple merge
diff --cc MAINTAINERS
Simple merge
@@@ -351,11 -350,7 +351,10 @@@ int kvm_riscv_gstage_ioremap(struct kv
        int ret = 0;
        unsigned long pfn;
        phys_addr_t addr, end;
-       struct kvm_mmu_memory_cache pcache;
-       memset(&pcache, 0, sizeof(pcache));
-       pcache.gfp_custom = (in_atomic) ? GFP_ATOMIC | __GFP_ACCOUNT : 0;
-       pcache.gfp_zero = __GFP_ZERO;
 -      struct kvm_mmu_memory_cache pcache = { .gfp_zero = __GFP_ZERO };
++      struct kvm_mmu_memory_cache pcache = {
++              .gfp_custom = (in_atomic) ? GFP_ATOMIC | __GFP_ACCOUNT : 0,
++              .gfp_zero = __GFP_ZERO,
++      };
  
        end = (gpa + size + PAGE_SIZE - 1) & PAGE_MASK;
        pfn = __phys_to_pfn(hpa);
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
@@@ -4,8 -4,10 +4,10 @@@
  
  #include <asm/vmx.h>
  
 -#include "lapic.h"
 -#include "x86.h"
 -#include "pmu.h"
 -#include "cpuid.h"
 +#include "../lapic.h"
 +#include "../x86.h"
++#include "../pmu.h"
++#include "../cpuid.h"
  
  extern bool __read_mostly enable_vpid;
  extern bool __read_mostly flexpriority_enabled;
Simple merge
Simple merge
Simple merge
@@@ -298,7 -286,8 +286,8 @@@ const struct _kvm_stats_desc kvm_vcpu_s
        STATS_DESC_COUNTER(VCPU, directed_yield_successful),
        STATS_DESC_COUNTER(VCPU, preemption_reported),
        STATS_DESC_COUNTER(VCPU, preemption_other),
-       STATS_DESC_IBOOLEAN(VCPU, guest_mode)
 -      STATS_DESC_ICOUNTER(VCPU, guest_mode),
++      STATS_DESC_IBOOLEAN(VCPU, guest_mode),
+       STATS_DESC_COUNTER(VCPU, notify_window_exits),
  };
  
  const struct kvm_stats_header kvm_vcpu_stats_header = {
@@@ -6028,12 -6172,11 +6175,16 @@@ split_irqchip_unlock
                kvm->arch.exception_payload_enabled = cap->args[0];
                r = 0;
                break;
+       case KVM_CAP_X86_TRIPLE_FAULT_EVENT:
+               kvm->arch.triple_fault_event = cap->args[0];
+               r = 0;
+               break;
        case KVM_CAP_X86_USER_SPACE_MSR:
 +              r = -EINVAL;
 +              if (cap->args[0] & ~(KVM_MSR_EXIT_REASON_INVAL |
 +                                   KVM_MSR_EXIT_REASON_UNKNOWN |
 +                                   KVM_MSR_EXIT_REASON_FILTER))
 +                      break;
                kvm->arch.user_space_msr_mask = cap->args[0];
                r = 0;
                break;
Simple merge
Simple merge
Simple merge
@@@ -87,9 -92,9 +92,10 @@@ struct gfn_to_pfn_cache 
  struct kvm_mmu_memory_cache {
        int nobjs;
        gfp_t gfp_zero;
 +      gfp_t gfp_custom;
        struct kmem_cache *kmem_cache;
-       void *objects[KVM_ARCH_NR_OBJS_PER_MEMORY_CACHE];
+       int capacity;
+       void **objects;
  };
  #endif
  
@@@ -270,7 -270,7 +270,8 @@@ struct kvm_xen_exit 
  #define KVM_EXIT_X86_BUS_LOCK     33
  #define KVM_EXIT_XEN              34
  #define KVM_EXIT_RISCV_SBI        35
 -#define KVM_EXIT_NOTIFY           36
 +#define KVM_EXIT_RISCV_CSR        36
++#define KVM_EXIT_NOTIFY           37
  
  /* For KVM_EXIT_INTERNAL_ERROR */
  /* Emulate instruction failed. */
@@@ -497,13 -497,11 +498,18 @@@ struct kvm_run 
                        unsigned long args[6];
                        unsigned long ret[2];
                } riscv_sbi;
 +              /* KVM_EXIT_RISCV_CSR */
 +              struct {
 +                      unsigned long csr_num;
 +                      unsigned long new_value;
 +                      unsigned long write_mask;
 +                      unsigned long ret_value;
 +              } riscv_csr;
+               /* KVM_EXIT_NOTIFY */
+               struct {
+ #define KVM_NOTIFY_CONTEXT_INVALID    (1 << 0)
+                       __u32 flags;
+               } notify;
                /* Fix the size of the union. */
                char padding[256];
        };
@@@ -77,20 -78,19 +76,20 @@@ void ucall(uint64_t cmd, int nargs, ...
        va_list va;
        int i;
  
-       nargs = nargs <= UCALL_MAX_ARGS ? nargs : UCALL_MAX_ARGS;
 +      WRITE_ONCE(uc.cmd, cmd);
+       nargs = min(nargs, UCALL_MAX_ARGS);
  
        va_start(va, nargs);
        for (i = 0; i < nargs; ++i)
 -              uc.args[i] = va_arg(va, uint64_t);
 +              WRITE_ONCE(uc.args[i], va_arg(va, uint64_t));
        va_end(va);
  
 -      *ucall_exit_mmio_addr = (vm_vaddr_t)&uc;
 +      WRITE_ONCE(*ucall_exit_mmio_addr, (vm_vaddr_t)&uc);
  }
  
- uint64_t get_ucall(struct kvm_vm *vm, uint32_t vcpu_id, struct ucall *uc)
+ uint64_t get_ucall(struct kvm_vcpu *vcpu, struct ucall *uc)
  {
-       struct kvm_run *run = vcpu_state(vm, vcpu_id);
+       struct kvm_run *run = vcpu->run;
        struct ucall ucall = {};
  
        if (uc)
@@@ -229,15 -224,14 +225,15 @@@ int main(int argc, char *argv[]
         * GUEST_SYNC, while concurrently migrating the process by setting its
         * CPU affinity.
         */
-       vm = vm_create_default(VCPU_ID, 0, guest_code);
+       vm = vm_create_with_one_vcpu(&vcpu, guest_code);
        ucall_init(vm, NULL);
  
 -      pthread_create(&migration_thread, NULL, migration_worker, 0);
 +      pthread_create(&migration_thread, NULL, migration_worker,
 +                     (void *)(unsigned long)gettid());
  
        for (i = 0; !done; i++) {
-               vcpu_run(vm, VCPU_ID);
-               TEST_ASSERT(get_ucall(vm, VCPU_ID, NULL) == UCALL_SYNC,
+               vcpu_run(vcpu);
+               TEST_ASSERT(get_ucall(vcpu, NULL) == UCALL_SYNC,
                            "Guest failed?");
  
                /*
@@@ -379,8 -396,9 +396,9 @@@ static inline void *mmu_memory_cache_al
                return (void *)__get_free_page(gfp_flags);
  }
  
- int kvm_mmu_topup_memory_cache(struct kvm_mmu_memory_cache *mc, int min)
+ int __kvm_mmu_topup_memory_cache(struct kvm_mmu_memory_cache *mc, int capacity, int min)
  {
 -      gfp_t gfp = GFP_KERNEL_ACCOUNT;
++      gfp_t gfp = mc->gfp_custom ? mc->gfp_custom : GFP_KERNEL_ACCOUNT;
        void *obj;
  
        if (mc->nobjs >= min)