KVM: Note down when cpu relax intercepted or pause loop exited
[platform/adaptation/renesas_rcar/renesas_kernel.git] / virt / kvm / kvm_main.c
index 9739b53..0892b75 100644 (file)
@@ -100,11 +100,14 @@ EXPORT_SYMBOL_GPL(kvm_rebooting);
 
 static bool largepages_enabled = true;
 
+struct page *bad_page;
+static pfn_t bad_pfn;
+
 static struct page *hwpoison_page;
 static pfn_t hwpoison_pfn;
 
-struct page *fault_page;
-pfn_t fault_pfn;
+static struct page *fault_page;
+static pfn_t fault_pfn;
 
 inline int kvm_is_mmio_pfn(pfn_t pfn)
 {
@@ -236,6 +239,9 @@ int kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id)
        }
        vcpu->run = page_address(page);
 
+       kvm_vcpu_set_in_spin_loop(vcpu, false);
+       kvm_vcpu_set_dy_eligible(vcpu, false);
+
        r = kvm_arch_vcpu_init(vcpu);
        if (r < 0)
                goto fail_free_run;
@@ -332,8 +338,7 @@ static void kvm_mmu_notifier_invalidate_range_start(struct mmu_notifier *mn,
         * count is also read inside the mmu_lock critical section.
         */
        kvm->mmu_notifier_count++;
-       for (; start < end; start += PAGE_SIZE)
-               need_tlb_flush |= kvm_unmap_hva(kvm, start);
+       need_tlb_flush = kvm_unmap_hva_range(kvm, start, end);
        need_tlb_flush |= kvm->tlbs_dirty;
        /* we've to flush the tlb before the pages can be freed */
        if (need_tlb_flush)
@@ -516,18 +521,33 @@ out_err_nodisable:
        return ERR_PTR(r);
 }
 
+/*
+ * Avoid using vmalloc for a small buffer.
+ * Should not be used when the size is statically known.
+ */
+void *kvm_kvzalloc(unsigned long size)
+{
+       if (size > PAGE_SIZE)
+               return vzalloc(size);
+       else
+               return kzalloc(size, GFP_KERNEL);
+}
+
+void kvm_kvfree(const void *addr)
+{
+       if (is_vmalloc_addr(addr))
+               vfree(addr);
+       else
+               kfree(addr);
+}
+
 static void kvm_destroy_dirty_bitmap(struct kvm_memory_slot *memslot)
 {
        if (!memslot->dirty_bitmap)
                return;
 
-       if (2 * kvm_dirty_bitmap_bytes(memslot) > PAGE_SIZE)
-               vfree(memslot->dirty_bitmap_head);
-       else
-               kfree(memslot->dirty_bitmap_head);
-
+       kvm_kvfree(memslot->dirty_bitmap);
        memslot->dirty_bitmap = NULL;
-       memslot->dirty_bitmap_head = NULL;
 }
 
 /*
@@ -611,24 +631,17 @@ static int kvm_vm_release(struct inode *inode, struct file *filp)
 
 /*
  * Allocation size is twice as large as the actual dirty bitmap size.
- * This makes it possible to do double buffering: see x86's
- * kvm_vm_ioctl_get_dirty_log().
+ * See x86's kvm_vm_ioctl_get_dirty_log() why this is needed.
  */
 static int kvm_create_dirty_bitmap(struct kvm_memory_slot *memslot)
 {
 #ifndef CONFIG_S390
        unsigned long dirty_bytes = 2 * kvm_dirty_bitmap_bytes(memslot);
 
-       if (dirty_bytes > PAGE_SIZE)
-               memslot->dirty_bitmap = vzalloc(dirty_bytes);
-       else
-               memslot->dirty_bitmap = kzalloc(dirty_bytes, GFP_KERNEL);
-
+       memslot->dirty_bitmap = kvm_kvzalloc(dirty_bytes);
        if (!memslot->dirty_bitmap)
                return -ENOMEM;
 
-       memslot->dirty_bitmap_head = memslot->dirty_bitmap;
-       memslot->nr_dirty_pages = 0;
 #endif /* !CONFIG_S390 */
        return 0;
 }
@@ -942,12 +955,6 @@ int is_hwpoison_pfn(pfn_t pfn)
 }
 EXPORT_SYMBOL_GPL(is_hwpoison_pfn);
 
-int is_fault_pfn(pfn_t pfn)
-{
-       return pfn == fault_pfn;
-}
-EXPORT_SYMBOL_GPL(is_fault_pfn);
-
 int is_noslot_pfn(pfn_t pfn)
 {
        return pfn == bad_pfn;
@@ -1031,11 +1038,12 @@ unsigned long gfn_to_hva(struct kvm *kvm, gfn_t gfn)
 }
 EXPORT_SYMBOL_GPL(gfn_to_hva);
 
-static pfn_t get_fault_pfn(void)
+pfn_t get_fault_pfn(void)
 {
        get_page(fault_page);
        return fault_pfn;
 }
+EXPORT_SYMBOL_GPL(get_fault_pfn);
 
 int get_user_page_nowait(struct task_struct *tsk, struct mm_struct *mm,
        unsigned long start, int write, struct page **page)
@@ -1057,8 +1065,8 @@ static inline int check_user_page_hwpoison(unsigned long addr)
        return rc == -EHWPOISON;
 }
 
-static pfn_t hva_to_pfn(struct kvm *kvm, unsigned long addr, bool atomic,
-                       bool *async, bool write_fault, bool *writable)
+static pfn_t hva_to_pfn(unsigned long addr, bool atomic, bool *async,
+                       bool write_fault, bool *writable)
 {
        struct page *page[1];
        int npages = 0;
@@ -1138,9 +1146,9 @@ static pfn_t hva_to_pfn(struct kvm *kvm, unsigned long addr, bool atomic,
        return pfn;
 }
 
-pfn_t hva_to_pfn_atomic(struct kvm *kvm, unsigned long addr)
+pfn_t hva_to_pfn_atomic(unsigned long addr)
 {
-       return hva_to_pfn(kvm, addr, true, NULL, true, NULL);
+       return hva_to_pfn(addr, true, NULL, true, NULL);
 }
 EXPORT_SYMBOL_GPL(hva_to_pfn_atomic);
 
@@ -1158,7 +1166,7 @@ static pfn_t __gfn_to_pfn(struct kvm *kvm, gfn_t gfn, bool atomic, bool *async,
                return page_to_pfn(bad_page);
        }
 
-       return hva_to_pfn(kvm, addr, atomic, async, write_fault, writable);
+       return hva_to_pfn(addr, atomic, async, write_fault, writable);
 }
 
 pfn_t gfn_to_pfn_atomic(struct kvm *kvm, gfn_t gfn)
@@ -1187,11 +1195,10 @@ pfn_t gfn_to_pfn_prot(struct kvm *kvm, gfn_t gfn, bool write_fault,
 }
 EXPORT_SYMBOL_GPL(gfn_to_pfn_prot);
 
-pfn_t gfn_to_pfn_memslot(struct kvm *kvm,
-                        struct kvm_memory_slot *slot, gfn_t gfn)
+pfn_t gfn_to_pfn_memslot(struct kvm_memory_slot *slot, gfn_t gfn)
 {
        unsigned long addr = gfn_to_hva_memslot(slot, gfn);
-       return hva_to_pfn(kvm, addr, false, NULL, true, NULL);
+       return hva_to_pfn(addr, false, NULL, true, NULL);
 }
 
 int gfn_to_page_many_atomic(struct kvm *kvm, gfn_t gfn, struct page **pages,
@@ -1477,8 +1484,8 @@ void mark_page_dirty_in_slot(struct kvm *kvm, struct kvm_memory_slot *memslot,
        if (memslot && memslot->dirty_bitmap) {
                unsigned long rel_gfn = gfn - memslot->base_gfn;
 
-               if (!test_and_set_bit_le(rel_gfn, memslot->dirty_bitmap))
-                       memslot->nr_dirty_pages++;
+               /* TODO: introduce set_bit_le() and use it */
+               test_and_set_bit_le(rel_gfn, memslot->dirty_bitmap);
        }
 }
 
@@ -1515,6 +1522,30 @@ void kvm_vcpu_block(struct kvm_vcpu *vcpu)
        finish_wait(&vcpu->wq, &wait);
 }
 
+#ifndef CONFIG_S390
+/*
+ * Kick a sleeping VCPU, or a guest VCPU in guest mode, into host kernel mode.
+ */
+void kvm_vcpu_kick(struct kvm_vcpu *vcpu)
+{
+       int me;
+       int cpu = vcpu->cpu;
+       wait_queue_head_t *wqp;
+
+       wqp = kvm_arch_vcpu_wq(vcpu);
+       if (waitqueue_active(wqp)) {
+               wake_up_interruptible(wqp);
+               ++vcpu->stat.halt_wakeup;
+       }
+
+       me = get_cpu();
+       if (cpu != me && (unsigned)cpu < nr_cpu_ids && cpu_online(cpu))
+               if (kvm_arch_vcpu_should_kick(vcpu))
+                       smp_send_reschedule(cpu);
+       put_cpu();
+}
+#endif /* !CONFIG_S390 */
+
 void kvm_resched(struct kvm_vcpu *vcpu)
 {
        if (!need_resched())
@@ -1523,6 +1554,31 @@ void kvm_resched(struct kvm_vcpu *vcpu)
 }
 EXPORT_SYMBOL_GPL(kvm_resched);
 
+bool kvm_vcpu_yield_to(struct kvm_vcpu *target)
+{
+       struct pid *pid;
+       struct task_struct *task = NULL;
+
+       rcu_read_lock();
+       pid = rcu_dereference(target->pid);
+       if (pid)
+               task = get_pid_task(target->pid, PIDTYPE_PID);
+       rcu_read_unlock();
+       if (!task)
+               return false;
+       if (task->flags & PF_VCPU) {
+               put_task_struct(task);
+               return false;
+       }
+       if (yield_to(task, 1)) {
+               put_task_struct(task);
+               return true;
+       }
+       put_task_struct(task);
+       return false;
+}
+EXPORT_SYMBOL_GPL(kvm_vcpu_yield_to);
+
 void kvm_vcpu_on_spin(struct kvm_vcpu *me)
 {
        struct kvm *kvm = me->kvm;
@@ -1532,6 +1588,7 @@ void kvm_vcpu_on_spin(struct kvm_vcpu *me)
        int pass;
        int i;
 
+       kvm_vcpu_set_in_spin_loop(me, true);
        /*
         * We boost the priority of a VCPU that is runnable but not
         * currently running, because it got preempted by something
@@ -1541,9 +1598,7 @@ void kvm_vcpu_on_spin(struct kvm_vcpu *me)
         */
        for (pass = 0; pass < 2 && !yielded; pass++) {
                kvm_for_each_vcpu(i, vcpu, kvm) {
-                       struct task_struct *task = NULL;
-                       struct pid *pid;
-                       if (!pass && i < last_boosted_vcpu) {
+                       if (!pass && i <= last_boosted_vcpu) {
                                i = last_boosted_vcpu;
                                continue;
                        } else if (pass && i > last_boosted_vcpu)
@@ -1552,26 +1607,14 @@ void kvm_vcpu_on_spin(struct kvm_vcpu *me)
                                continue;
                        if (waitqueue_active(&vcpu->wq))
                                continue;
-                       rcu_read_lock();
-                       pid = rcu_dereference(vcpu->pid);
-                       if (pid)
-                               task = get_pid_task(vcpu->pid, PIDTYPE_PID);
-                       rcu_read_unlock();
-                       if (!task)
-                               continue;
-                       if (task->flags & PF_VCPU) {
-                               put_task_struct(task);
-                               continue;
-                       }
-                       if (yield_to(task, 1)) {
-                               put_task_struct(task);
+                       if (kvm_vcpu_yield_to(vcpu)) {
                                kvm->last_boosted_vcpu = i;
                                yielded = 1;
                                break;
                        }
-                       put_task_struct(task);
                }
        }
+       kvm_vcpu_set_in_spin_loop(me, false);
 }
 EXPORT_SYMBOL_GPL(kvm_vcpu_on_spin);
 
@@ -2040,6 +2083,17 @@ static long kvm_vm_ioctl(struct file *filp,
                mutex_unlock(&kvm->lock);
                break;
 #endif
+#ifdef CONFIG_HAVE_KVM_MSI
+       case KVM_SIGNAL_MSI: {
+               struct kvm_msi msi;
+
+               r = -EFAULT;
+               if (copy_from_user(&msi, argp, sizeof msi))
+                       goto out;
+               r = kvm_send_userspace_msi(kvm, &msi);
+               break;
+       }
+#endif
        default:
                r = kvm_arch_vm_ioctl(filp, ioctl, arg);
                if (r == -ENOTTY)
@@ -2168,8 +2222,11 @@ static long kvm_dev_ioctl_check_extension_generic(long arg)
        case KVM_CAP_SET_BOOT_CPU_ID:
 #endif
        case KVM_CAP_INTERNAL_ERROR_DATA:
+#ifdef CONFIG_HAVE_KVM_MSI
+       case KVM_CAP_SIGNAL_MSI:
+#endif
                return 1;
-#ifdef CONFIG_HAVE_KVM_IRQCHIP
+#ifdef KVM_CAP_IRQ_ROUTING
        case KVM_CAP_IRQ_ROUTING:
                return KVM_MAX_IRQ_ROUTES;
 #endif
@@ -2394,9 +2451,6 @@ int kvm_io_bus_sort_cmp(const void *p1, const void *p2)
 int kvm_io_bus_insert_dev(struct kvm_io_bus *bus, struct kvm_io_device *dev,
                          gpa_t addr, int len)
 {
-       if (bus->dev_count == NR_IOBUS_DEVS)
-               return -ENOSPC;
-
        bus->range[bus->dev_count++] = (struct kvm_io_range) {
                .addr = addr,
                .len = len,
@@ -2496,12 +2550,15 @@ int kvm_io_bus_register_dev(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr,
        struct kvm_io_bus *new_bus, *bus;
 
        bus = kvm->buses[bus_idx];
-       if (bus->dev_count > NR_IOBUS_DEVS-1)
+       if (bus->dev_count > NR_IOBUS_DEVS - 1)
                return -ENOSPC;
 
-       new_bus = kmemdup(bus, sizeof(struct kvm_io_bus), GFP_KERNEL);
+       new_bus = kzalloc(sizeof(*bus) + ((bus->dev_count + 1) *
+                         sizeof(struct kvm_io_range)), GFP_KERNEL);
        if (!new_bus)
                return -ENOMEM;
+       memcpy(new_bus, bus, sizeof(*bus) + (bus->dev_count *
+              sizeof(struct kvm_io_range)));
        kvm_io_bus_insert_dev(new_bus, dev, addr, len);
        rcu_assign_pointer(kvm->buses[bus_idx], new_bus);
        synchronize_srcu_expedited(&kvm->srcu);
@@ -2518,27 +2575,25 @@ int kvm_io_bus_unregister_dev(struct kvm *kvm, enum kvm_bus bus_idx,
        struct kvm_io_bus *new_bus, *bus;
 
        bus = kvm->buses[bus_idx];
-
-       new_bus = kmemdup(bus, sizeof(*bus), GFP_KERNEL);
-       if (!new_bus)
-               return -ENOMEM;
-
        r = -ENOENT;
-       for (i = 0; i < new_bus->dev_count; i++)
-               if (new_bus->range[i].dev == dev) {
+       for (i = 0; i < bus->dev_count; i++)
+               if (bus->range[i].dev == dev) {
                        r = 0;
-                       new_bus->dev_count--;
-                       new_bus->range[i] = new_bus->range[new_bus->dev_count];
-                       sort(new_bus->range, new_bus->dev_count,
-                            sizeof(struct kvm_io_range),
-                            kvm_io_bus_sort_cmp, NULL);
                        break;
                }
 
-       if (r) {
-               kfree(new_bus);
+       if (r)
                return r;
-       }
+
+       new_bus = kzalloc(sizeof(*bus) + ((bus->dev_count - 1) *
+                         sizeof(struct kvm_io_range)), GFP_KERNEL);
+       if (!new_bus)
+               return -ENOMEM;
+
+       memcpy(new_bus, bus, sizeof(*bus) + i * sizeof(struct kvm_io_range));
+       new_bus->dev_count--;
+       memcpy(new_bus->range + i, bus->range + i + 1,
+              (new_bus->dev_count - i) * sizeof(struct kvm_io_range));
 
        rcu_assign_pointer(kvm->buses[bus_idx], new_bus);
        synchronize_srcu_expedited(&kvm->srcu);
@@ -2643,9 +2698,6 @@ static struct syscore_ops kvm_syscore_ops = {
        .resume = kvm_resume,
 };
 
-struct page *bad_page;
-pfn_t bad_pfn;
-
 static inline
 struct kvm_vcpu *preempt_notifier_to_vcpu(struct preempt_notifier *pn)
 {