kvm/eventfd: Use priority waitqueue to catch events before userspace
authorDavid Woodhouse <dwmw@amazon.co.uk>
Mon, 26 Oct 2020 17:53:25 +0000 (17:53 +0000)
committerPaolo Bonzini <pbonzini@redhat.com>
Sun, 15 Nov 2020 14:49:10 +0000 (09:49 -0500)
As far as I can tell, when we use posted interrupts we silently cut off
the events from userspace, if it's listening on the same eventfd that
feeds the irqfd.

I like that behaviour. Let's do it all the time, even without posted
interrupts. It makes it much easier to handle IRQ remapping invalidation
without having to constantly add/remove the fd from the userspace poll
set. We can just leave userspace polling on it, and the bypass will...
well... bypass it.

Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
Message-Id: <20201026175325.585623-2-dwmw2@infradead.org>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
virt/kvm/eventfd.c

index c2323c27a28b52dd14c6405dcbc8d54b1356efe0..efa8a5ae7a95f2f65c010e51e0d9b845e929fd2b 100644 (file)
@@ -191,6 +191,7 @@ irqfd_wakeup(wait_queue_entry_t *wait, unsigned mode, int sync, void *key)
        struct kvm *kvm = irqfd->kvm;
        unsigned seq;
        int idx;
+       int ret = 0;
 
        if (flags & EPOLLIN) {
                idx = srcu_read_lock(&kvm->irq_srcu);
@@ -204,6 +205,7 @@ irqfd_wakeup(wait_queue_entry_t *wait, unsigned mode, int sync, void *key)
                                              false) == -EWOULDBLOCK)
                        schedule_work(&irqfd->inject);
                srcu_read_unlock(&kvm->irq_srcu, idx);
+               ret = 1;
        }
 
        if (flags & EPOLLHUP) {
@@ -227,7 +229,7 @@ irqfd_wakeup(wait_queue_entry_t *wait, unsigned mode, int sync, void *key)
                spin_unlock_irqrestore(&kvm->irqfds.lock, iflags);
        }
 
-       return 0;
+       return ret;
 }
 
 static void
@@ -236,7 +238,7 @@ irqfd_ptable_queue_proc(struct file *file, wait_queue_head_t *wqh,
 {
        struct kvm_kernel_irqfd *irqfd =
                container_of(pt, struct kvm_kernel_irqfd, pt);
-       add_wait_queue(wqh, &irqfd->wait);
+       add_wait_queue_priority(wqh, &irqfd->wait);
 }
 
 /* Must be called under irqfds.lock */