Merge tag 'kvm-s390-master-5.15-2' of git://git.kernel.org/pub/scm/linux/kernel/git...

author Paolo Bonzini <pbonzini@redhat.com>

Mon, 25 Oct 2021 13:08:56 +0000 (09:08 -0400)

committer Paolo Bonzini <pbonzini@redhat.com>

Mon, 25 Oct 2021 13:08:56 +0000 (09:08 -0400)
author Paolo Bonzini <pbonzini@redhat.com>
Mon, 25 Oct 2021 13:08:56 +0000 (09:08 -0400)
committer Paolo Bonzini <pbonzini@redhat.com>
Mon, 25 Oct 2021 13:08:56 +0000 (09:08 -0400)
diff --git a/arch/arm64/kvm/hyp/include/nvhe/gfp.h b/arch/arm64/kvm/hyp/include/nvhe/gfp.h

index fb0f523..0a048dc 100644 (file)
--- a/arch/arm64/kvm/hyp/include/nvhe/gfp.h
+++ b/arch/arm64/kvm/hyp/include/nvhe/gfp.h
@@ -24,6 +24,7 @@ struct hyp_pool {
  
  /* Allocation */
  void *hyp_alloc_pages(struct hyp_pool *pool, unsigned short order);
+void hyp_split_page(struct hyp_page *page);
  void hyp_get_page(struct hyp_pool *pool, void *addr);
  void hyp_put_page(struct hyp_pool *pool, void *addr);
  
diff --git a/arch/arm64/kvm/hyp/nvhe/mem_protect.c b/arch/arm64/kvm/hyp/nvhe/mem_protect.c

index bacd493..34eeb52 100644 (file)
--- a/arch/arm64/kvm/hyp/nvhe/mem_protect.c
+++ b/arch/arm64/kvm/hyp/nvhe/mem_protect.c
@@ -35,7 +35,18 @@ const u8 pkvm_hyp_id = 1;
  
  static void *host_s2_zalloc_pages_exact(size_t size)
  {
-       return hyp_alloc_pages(&host_s2_pool, get_order(size));
+       void *addr = hyp_alloc_pages(&host_s2_pool, get_order(size));
+
+       hyp_split_page(hyp_virt_to_page(addr));
+
+       /*
+        * The size of concatenated PGDs is always a power of two of PAGE_SIZE,
+        * so there should be no need to free any of the tail pages to make the
+        * allocation exact.
+        */
+       WARN_ON(size != (PAGE_SIZE << get_order(size)));
+
+       return addr;
  }
  
  static void *host_s2_zalloc_page(void *pool)
diff --git a/arch/arm64/kvm/hyp/nvhe/page_alloc.c b/arch/arm64/kvm/hyp/nvhe/page_alloc.c

index 41fc25b..0bd7701 100644 (file)
--- a/arch/arm64/kvm/hyp/nvhe/page_alloc.c
+++ b/arch/arm64/kvm/hyp/nvhe/page_alloc.c
@@ -152,6 +152,7 @@ static inline void hyp_page_ref_inc(struct hyp_page *p)
  
  static inline int hyp_page_ref_dec_and_test(struct hyp_page *p)
  {
+       BUG_ON(!p->refcount);
         p->refcount--;
         return (p->refcount == 0);
  }
@@ -193,6 +194,20 @@ void hyp_get_page(struct hyp_pool *pool, void *addr)
         hyp_spin_unlock(&pool->lock);
  }
  
+void hyp_split_page(struct hyp_page *p)
+{
+       unsigned short order = p->order;
+       unsigned int i;
+
+       p->order = 0;
+       for (i = 1; i < (1 << order); i++) {
+               struct hyp_page *tail = p + i;
+
+               tail->order = 0;
+               hyp_set_page_refcounted(tail);
+       }
+}
+
  void *hyp_alloc_pages(struct hyp_pool *pool, unsigned short order)
  {
         unsigned short i = order;
diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c

index 1a94a7c..69bd173 100644 (file)
--- a/arch/arm64/kvm/mmu.c
+++ b/arch/arm64/kvm/mmu.c
@@ -1529,8 +1529,10 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
                  * when updating the PG_mte_tagged page flag, see
                  * sanitise_mte_tags for more details.
                  */
-               if (kvm_has_mte(kvm) && vma->vm_flags & VM_SHARED)
-                       return -EINVAL;
+               if (kvm_has_mte(kvm) && vma->vm_flags & VM_SHARED) {
+                       ret = -EINVAL;
+                       break;
+               }
  
                 if (vma->vm_flags & VM_PFNMAP) {
                         /* IO region dirty page logging not allowed */
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h

index f8f48a7..7077137 100644 (file)
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1097,7 +1097,7 @@ struct kvm_arch {
         u64 cur_tsc_generation;
         int nr_vcpus_matched_tsc;
  
-       spinlock_t pvclock_gtod_sync_lock;
+       raw_spinlock_t pvclock_gtod_sync_lock;
         bool use_master_clock;
         u64 master_kernel_ns;
         u64 master_cycle_now;
diff --git a/arch/x86/include/asm/kvmclock.h b/arch/x86/include/asm/kvmclock.h

index eceea92..6c57651 100644 (file)
--- a/arch/x86/include/asm/kvmclock.h
+++ b/arch/x86/include/asm/kvmclock.h
@@ -2,6 +2,20 @@
  #ifndef _ASM_X86_KVM_CLOCK_H
  #define _ASM_X86_KVM_CLOCK_H
  
+#include <linux/percpu.h>
+
  extern struct clocksource kvm_clock;
  
+DECLARE_PER_CPU(struct pvclock_vsyscall_time_info *, hv_clock_per_cpu);
+
+static inline struct pvclock_vcpu_time_info *this_cpu_pvti(void)
+{
+       return &this_cpu_read(hv_clock_per_cpu)->pvti;
+}
+
+static inline struct pvclock_vsyscall_time_info *this_cpu_hvclock(void)
+{
+       return this_cpu_read(hv_clock_per_cpu);
+}
+
  #endif /* _ASM_X86_KVM_CLOCK_H */
diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c

index ad273e5..73c74b9 100644 (file)
--- a/arch/x86/kernel/kvmclock.c
+++ b/arch/x86/kernel/kvmclock.c
@@ -49,18 +49,9 @@ early_param("no-kvmclock-vsyscall", parse_no_kvmclock_vsyscall);
  static struct pvclock_vsyscall_time_info
                         hv_clock_boot[HVC_BOOT_ARRAY_SIZE] __bss_decrypted __aligned(PAGE_SIZE);
  static struct pvclock_wall_clock wall_clock __bss_decrypted;
-static DEFINE_PER_CPU(struct pvclock_vsyscall_time_info *, hv_clock_per_cpu);
  static struct pvclock_vsyscall_time_info *hvclock_mem;
-
-static inline struct pvclock_vcpu_time_info *this_cpu_pvti(void)
-{
-       return &this_cpu_read(hv_clock_per_cpu)->pvti;
-}
-
-static inline struct pvclock_vsyscall_time_info *this_cpu_hvclock(void)
-{
-       return this_cpu_read(hv_clock_per_cpu);
-}
+DEFINE_PER_CPU(struct pvclock_vsyscall_time_info *, hv_clock_per_cpu);
+EXPORT_PER_CPU_SYMBOL_GPL(hv_clock_per_cpu);
  
  /*
   * The wallclock is the time of day when we booted. Since then, some time may
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c

index fe03bd9..751aa85 100644 (file)
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -65,8 +65,8 @@ static inline struct kvm_cpuid_entry2 *cpuid_entry2_find(
         for (i = 0; i < nent; i++) {
                 e = &entries[i];
  
-               if (e->function == function && (e->index == index ||
-                   !(e->flags & KVM_CPUID_FLAG_SIGNIFCANT_INDEX)))
+               if (e->function == function &&
+                   (!(e->flags & KVM_CPUID_FLAG_SIGNIFCANT_INDEX) || e->index == index))
                         return e;
         }
  
diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c

index c36b5fe..e672493 100644 (file)
--- a/arch/x86/kvm/svm/sev.c
+++ b/arch/x86/kvm/svm/sev.c
@@ -2583,7 +2583,7 @@ int sev_es_string_io(struct vcpu_svm *svm, int size, unsigned int port, int in)
                 return -EINVAL;
  
         return kvm_sev_es_string_io(&svm->vcpu, size, port,
-                                   svm->ghcb_sa, svm->ghcb_sa_len, in);
+                                   svm->ghcb_sa, svm->ghcb_sa_len / size, in);
  }
  
  void sev_es_init_vmcb(struct vcpu_svm *svm)
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c

index aabd3a2..5c82eff 100644 (file)
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -2542,7 +2542,7 @@ static void kvm_synchronize_tsc(struct kvm_vcpu *vcpu, u64 data)
         kvm_vcpu_write_tsc_offset(vcpu, offset);
         raw_spin_unlock_irqrestore(&kvm->arch.tsc_write_lock, flags);
  
-       spin_lock_irqsave(&kvm->arch.pvclock_gtod_sync_lock, flags);
+       raw_spin_lock_irqsave(&kvm->arch.pvclock_gtod_sync_lock, flags);
         if (!matched) {
                 kvm->arch.nr_vcpus_matched_tsc = 0;
         } else if (!already_matched) {
@@ -2550,7 +2550,7 @@ static void kvm_synchronize_tsc(struct kvm_vcpu *vcpu, u64 data)
         }
  
         kvm_track_tsc_matching(vcpu);
-       spin_unlock_irqrestore(&kvm->arch.pvclock_gtod_sync_lock, flags);
+       raw_spin_unlock_irqrestore(&kvm->arch.pvclock_gtod_sync_lock, flags);
  }
  
  static inline void adjust_tsc_offset_guest(struct kvm_vcpu *vcpu,
@@ -2780,9 +2780,9 @@ static void kvm_gen_update_masterclock(struct kvm *kvm)
         kvm_make_mclock_inprogress_request(kvm);
  
         /* no guest entries from this point */
-       spin_lock_irqsave(&ka->pvclock_gtod_sync_lock, flags);
+       raw_spin_lock_irqsave(&ka->pvclock_gtod_sync_lock, flags);
         pvclock_update_vm_gtod_copy(kvm);
-       spin_unlock_irqrestore(&ka->pvclock_gtod_sync_lock, flags);
+       raw_spin_unlock_irqrestore(&ka->pvclock_gtod_sync_lock, flags);
  
         kvm_for_each_vcpu(i, vcpu, kvm)
                 kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
@@ -2800,15 +2800,15 @@ u64 get_kvmclock_ns(struct kvm *kvm)
         unsigned long flags;
         u64 ret;
  
-       spin_lock_irqsave(&ka->pvclock_gtod_sync_lock, flags);
+       raw_spin_lock_irqsave(&ka->pvclock_gtod_sync_lock, flags);
         if (!ka->use_master_clock) {
-               spin_unlock_irqrestore(&ka->pvclock_gtod_sync_lock, flags);
+               raw_spin_unlock_irqrestore(&ka->pvclock_gtod_sync_lock, flags);
                 return get_kvmclock_base_ns() + ka->kvmclock_offset;
         }
  
         hv_clock.tsc_timestamp = ka->master_cycle_now;
         hv_clock.system_time = ka->master_kernel_ns + ka->kvmclock_offset;
-       spin_unlock_irqrestore(&ka->pvclock_gtod_sync_lock, flags);
+       raw_spin_unlock_irqrestore(&ka->pvclock_gtod_sync_lock, flags);
  
         /* both __this_cpu_read() and rdtsc() should be on the same cpu */
         get_cpu();
@@ -2902,13 +2902,13 @@ static int kvm_guest_time_update(struct kvm_vcpu *v)
          * If the host uses TSC clock, then passthrough TSC as stable
          * to the guest.
          */
-       spin_lock_irqsave(&ka->pvclock_gtod_sync_lock, flags);
+       raw_spin_lock_irqsave(&ka->pvclock_gtod_sync_lock, flags);
         use_master_clock = ka->use_master_clock;
         if (use_master_clock) {
                 host_tsc = ka->master_cycle_now;
                 kernel_ns = ka->master_kernel_ns;
         }
-       spin_unlock_irqrestore(&ka->pvclock_gtod_sync_lock, flags);
+       raw_spin_unlock_irqrestore(&ka->pvclock_gtod_sync_lock, flags);
  
         /* Keep irq disabled to prevent changes to the clock */
         local_irq_save(flags);
@@ -6100,13 +6100,13 @@ set_pit2_out:
                  * is slightly ahead) here we risk going negative on unsigned
                  * 'system_time' when 'user_ns.clock' is very small.
                  */
-               spin_lock_irq(&ka->pvclock_gtod_sync_lock);
+               raw_spin_lock_irq(&ka->pvclock_gtod_sync_lock);
                 if (kvm->arch.use_master_clock)
                         now_ns = ka->master_kernel_ns;
                 else
                         now_ns = get_kvmclock_base_ns();
                 ka->kvmclock_offset = user_ns.clock - now_ns;
-               spin_unlock_irq(&ka->pvclock_gtod_sync_lock);
+               raw_spin_unlock_irq(&ka->pvclock_gtod_sync_lock);
  
                 kvm_make_all_cpus_request(kvm, KVM_REQ_CLOCK_UPDATE);
                 break;
@@ -8139,9 +8139,9 @@ static void kvm_hyperv_tsc_notifier(void)
         list_for_each_entry(kvm, &vm_list, vm_list) {
                 struct kvm_arch *ka = &kvm->arch;
  
-               spin_lock_irqsave(&ka->pvclock_gtod_sync_lock, flags);
+               raw_spin_lock_irqsave(&ka->pvclock_gtod_sync_lock, flags);
                 pvclock_update_vm_gtod_copy(kvm);
-               spin_unlock_irqrestore(&ka->pvclock_gtod_sync_lock, flags);
+               raw_spin_unlock_irqrestore(&ka->pvclock_gtod_sync_lock, flags);
  
                 kvm_for_each_vcpu(cpu, vcpu, kvm)
                         kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
@@ -11182,7 +11182,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
  
         raw_spin_lock_init(&kvm->arch.tsc_write_lock);
         mutex_init(&kvm->arch.apic_map_lock);
-       spin_lock_init(&kvm->arch.pvclock_gtod_sync_lock);
+       raw_spin_lock_init(&kvm->arch.pvclock_gtod_sync_lock);
  
         kvm->arch.kvmclock_offset = -get_kvmclock_base_ns();
         pvclock_update_vm_gtod_copy(kvm);
@@ -11392,7 +11392,8 @@ static int memslot_rmap_alloc(struct kvm_memory_slot *slot,
                 int level = i + 1;
                 int lpages = __kvm_mmu_slot_lpages(slot, npages, level);
  
-               WARN_ON(slot->arch.rmap[i]);
+               if (slot->arch.rmap[i])
+                       continue;
  
                 slot->arch.rmap[i] = kvcalloc(lpages, sz, GFP_KERNEL_ACCOUNT);
                 if (!slot->arch.rmap[i]) {
diff --git a/drivers/ptp/ptp_kvm_x86.c b/drivers/ptp/ptp_kvm_x86.c

index 3dd519d..d0096cd 100644 (file)
--- a/drivers/ptp/ptp_kvm_x86.c
+++ b/drivers/ptp/ptp_kvm_x86.c
@@ -15,8 +15,6 @@
  #include <linux/ptp_clock_kernel.h>
  #include <linux/ptp_kvm.h>
  
-struct pvclock_vsyscall_time_info *hv_clock;
-
  static phys_addr_t clock_pair_gpa;
  static struct kvm_clock_pairing clock_pair;
  
@@ -28,8 +26,7 @@ int kvm_arch_ptp_init(void)
                 return -ENODEV;
  
         clock_pair_gpa = slow_virt_to_phys(&clock_pair);
-       hv_clock = pvclock_get_pvti_cpu0_va();
-       if (!hv_clock)
+       if (!pvclock_get_pvti_cpu0_va())
                 return -ENODEV;
  
         ret = kvm_hypercall2(KVM_HC_CLOCK_PAIRING, clock_pair_gpa,
@@ -64,10 +61,8 @@ int kvm_arch_ptp_get_crosststamp(u64 *cycle, struct timespec64 *tspec,
         struct pvclock_vcpu_time_info *src;
         unsigned int version;
         long ret;
-       int cpu;
  
-       cpu = smp_processor_id();
-       src = &hv_clock[cpu].pvti;
+       src = this_cpu_pvti();
  
         do {
                 /*
diff --git a/tools/testing/selftests/kvm/include/x86_64/processor.h b/tools/testing/selftests/kvm/include/x86_64/processor.h

index eba8bd0..05e65ca 100644 (file)
--- a/tools/testing/selftests/kvm/include/x86_64/processor.h
+++ b/tools/testing/selftests/kvm/include/x86_64/processor.h
@@ -315,7 +315,7 @@ static inline void set_xmm(int n, unsigned long val)
  #define GET_XMM(__xmm)                                                 \
  ({                                                                     \
         unsigned long __val;                                            \
-       asm volatile("movq %%"#__xmm", %0" : "=r"(__val) : : #__xmm);   \
+       asm volatile("movq %%"#__xmm", %0" : "=r"(__val));              \
         __val;                                                          \
  })
  
diff --git a/tools/testing/selftests/kvm/rseq_test.c b/tools/testing/selftests/kvm/rseq_test.c

index c5e0dd6..4158da0 100644 (file)
--- a/tools/testing/selftests/kvm/rseq_test.c
+++ b/tools/testing/selftests/kvm/rseq_test.c
@@ -10,6 +10,7 @@
  #include <signal.h>
  #include <syscall.h>
  #include <sys/ioctl.h>
+#include <sys/sysinfo.h>
  #include <asm/barrier.h>
  #include <linux/atomic.h>
  #include <linux/rseq.h>
@@ -39,6 +40,7 @@ static __thread volatile struct rseq __rseq = {
  
  static pthread_t migration_thread;
  static cpu_set_t possible_mask;
+static int min_cpu, max_cpu;
  static bool done;
  
  static atomic_t seq_cnt;
@@ -57,20 +59,37 @@ static void sys_rseq(int flags)
         TEST_ASSERT(!r, "rseq failed, errno = %d (%s)", errno, strerror(errno));
  }
  
+static int next_cpu(int cpu)
+{
+       /*
+        * Advance to the next CPU, skipping those that weren't in the original
+        * affinity set.  Sadly, there is no CPU_SET_FOR_EACH, and cpu_set_t's
+        * data storage is considered as opaque.  Note, if this task is pinned
+        * to a small set of discontigous CPUs, e.g. 2 and 1023, this loop will
+        * burn a lot cycles and the test will take longer than normal to
+        * complete.
+        */
+       do {
+               cpu++;
+               if (cpu > max_cpu) {
+                       cpu = min_cpu;
+                       TEST_ASSERT(CPU_ISSET(cpu, &possible_mask),
+                                   "Min CPU = %d must always be usable", cpu);
+                       break;
+               }
+       } while (!CPU_ISSET(cpu, &possible_mask));
+
+       return cpu;
+}
+
  static void *migration_worker(void *ign)
  {
         cpu_set_t allowed_mask;
-       int r, i, nr_cpus, cpu;
+       int r, i, cpu;
  
         CPU_ZERO(&allowed_mask);
  
-       nr_cpus = CPU_COUNT(&possible_mask);
-
-       for (i = 0; i < NR_TASK_MIGRATIONS; i++) {
-               cpu = i % nr_cpus;
-               if (!CPU_ISSET(cpu, &possible_mask))
-                       continue;
-
+       for (i = 0, cpu = min_cpu; i < NR_TASK_MIGRATIONS; i++, cpu = next_cpu(cpu)) {
                 CPU_SET(cpu, &allowed_mask);
  
                 /*
@@ -154,6 +173,36 @@ static void *migration_worker(void *ign)
         return NULL;
  }
  
+static int calc_min_max_cpu(void)
+{
+       int i, cnt, nproc;
+
+       if (CPU_COUNT(&possible_mask) < 2)
+               return -EINVAL;
+
+       /*
+        * CPU_SET doesn't provide a FOR_EACH helper, get the min/max CPU that
+        * this task is affined to in order to reduce the time spent querying
+        * unusable CPUs, e.g. if this task is pinned to a small percentage of
+        * total CPUs.
+        */
+       nproc = get_nprocs_conf();
+       min_cpu = -1;
+       max_cpu = -1;
+       cnt = 0;
+
+       for (i = 0; i < nproc; i++) {
+               if (!CPU_ISSET(i, &possible_mask))
+                       continue;
+               if (min_cpu == -1)
+                       min_cpu = i;
+               max_cpu = i;
+               cnt++;
+       }
+
+       return (cnt < 2) ? -EINVAL : 0;
+}
+
  int main(int argc, char *argv[])
  {
         int r, i, snapshot;
@@ -167,8 +216,8 @@ int main(int argc, char *argv[])
         TEST_ASSERT(!r, "sched_getaffinity failed, errno = %d (%s)", errno,
                     strerror(errno));
  
-       if (CPU_COUNT(&possible_mask) < 2) {
-               print_skip("Only one CPU, task migration not possible\n");
+       if (calc_min_max_cpu()) {
+               print_skip("Only one usable CPU, task migration not possible");
                 exit(KSFT_SKIP);
         }
author	Paolo Bonzini <pbonzini@redhat.com>
	Mon, 25 Oct 2021 13:08:56 +0000 (09:08 -0400)
committer	Paolo Bonzini <pbonzini@redhat.com>
	Mon, 25 Oct 2021 13:08:56 +0000 (09:08 -0400)
arch/arm64/kvm/hyp/include/nvhe/gfp.h		patch \| blob \| history
arch/arm64/kvm/hyp/nvhe/mem_protect.c		patch \| blob \| history
arch/arm64/kvm/hyp/nvhe/page_alloc.c		patch \| blob \| history
arch/arm64/kvm/mmu.c		patch \| blob \| history
arch/x86/include/asm/kvm_host.h		patch \| blob \| history
arch/x86/include/asm/kvmclock.h		patch \| blob \| history
arch/x86/kernel/kvmclock.c		patch \| blob \| history
arch/x86/kvm/cpuid.c		patch \| blob \| history
arch/x86/kvm/svm/sev.c		patch \| blob \| history
arch/x86/kvm/x86.c		patch \| blob \| history
drivers/ptp/ptp_kvm_x86.c		patch \| blob \| history
tools/testing/selftests/kvm/include/x86_64/processor.h		patch \| blob \| history
tools/testing/selftests/kvm/rseq_test.c		patch \| blob \| history