mm/pagewalk: fix bootstopping regression from extra pte_unmap()
[platform/kernel/linux-rpi.git] / drivers / vfio / virqfd.c
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * VFIO generic eventfd code for IRQFD support.
4  * Derived from drivers/vfio/pci/vfio_pci_intrs.c
5  *
6  * Copyright (C) 2012 Red Hat, Inc.  All rights reserved.
7  *     Author: Alex Williamson <alex.williamson@redhat.com>
8  */
9
10 #include <linux/vfio.h>
11 #include <linux/eventfd.h>
12 #include <linux/file.h>
13 #include <linux/module.h>
14 #include <linux/slab.h>
15 #include "vfio.h"
16
17 static struct workqueue_struct *vfio_irqfd_cleanup_wq;
18 static DEFINE_SPINLOCK(virqfd_lock);
19
20 int __init vfio_virqfd_init(void)
21 {
22         vfio_irqfd_cleanup_wq =
23                 create_singlethread_workqueue("vfio-irqfd-cleanup");
24         if (!vfio_irqfd_cleanup_wq)
25                 return -ENOMEM;
26
27         return 0;
28 }
29
30 void vfio_virqfd_exit(void)
31 {
32         destroy_workqueue(vfio_irqfd_cleanup_wq);
33 }
34
35 static void virqfd_deactivate(struct virqfd *virqfd)
36 {
37         queue_work(vfio_irqfd_cleanup_wq, &virqfd->shutdown);
38 }
39
40 static int virqfd_wakeup(wait_queue_entry_t *wait, unsigned mode, int sync, void *key)
41 {
42         struct virqfd *virqfd = container_of(wait, struct virqfd, wait);
43         __poll_t flags = key_to_poll(key);
44
45         if (flags & EPOLLIN) {
46                 u64 cnt;
47                 eventfd_ctx_do_read(virqfd->eventfd, &cnt);
48
49                 /* An event has been signaled, call function */
50                 if ((!virqfd->handler ||
51                      virqfd->handler(virqfd->opaque, virqfd->data)) &&
52                     virqfd->thread)
53                         schedule_work(&virqfd->inject);
54         }
55
56         if (flags & EPOLLHUP) {
57                 unsigned long flags;
58                 spin_lock_irqsave(&virqfd_lock, flags);
59
60                 /*
61                  * The eventfd is closing, if the virqfd has not yet been
62                  * queued for release, as determined by testing whether the
63                  * virqfd pointer to it is still valid, queue it now.  As
64                  * with kvm irqfds, we know we won't race against the virqfd
65                  * going away because we hold the lock to get here.
66                  */
67                 if (*(virqfd->pvirqfd) == virqfd) {
68                         *(virqfd->pvirqfd) = NULL;
69                         virqfd_deactivate(virqfd);
70                 }
71
72                 spin_unlock_irqrestore(&virqfd_lock, flags);
73         }
74
75         return 0;
76 }
77
78 static void virqfd_ptable_queue_proc(struct file *file,
79                                      wait_queue_head_t *wqh, poll_table *pt)
80 {
81         struct virqfd *virqfd = container_of(pt, struct virqfd, pt);
82         add_wait_queue(wqh, &virqfd->wait);
83 }
84
85 static void virqfd_shutdown(struct work_struct *work)
86 {
87         struct virqfd *virqfd = container_of(work, struct virqfd, shutdown);
88         u64 cnt;
89
90         eventfd_ctx_remove_wait_queue(virqfd->eventfd, &virqfd->wait, &cnt);
91         flush_work(&virqfd->inject);
92         eventfd_ctx_put(virqfd->eventfd);
93
94         kfree(virqfd);
95 }
96
97 static void virqfd_inject(struct work_struct *work)
98 {
99         struct virqfd *virqfd = container_of(work, struct virqfd, inject);
100         if (virqfd->thread)
101                 virqfd->thread(virqfd->opaque, virqfd->data);
102 }
103
104 int vfio_virqfd_enable(void *opaque,
105                        int (*handler)(void *, void *),
106                        void (*thread)(void *, void *),
107                        void *data, struct virqfd **pvirqfd, int fd)
108 {
109         struct fd irqfd;
110         struct eventfd_ctx *ctx;
111         struct virqfd *virqfd;
112         int ret = 0;
113         __poll_t events;
114
115         virqfd = kzalloc(sizeof(*virqfd), GFP_KERNEL_ACCOUNT);
116         if (!virqfd)
117                 return -ENOMEM;
118
119         virqfd->pvirqfd = pvirqfd;
120         virqfd->opaque = opaque;
121         virqfd->handler = handler;
122         virqfd->thread = thread;
123         virqfd->data = data;
124
125         INIT_WORK(&virqfd->shutdown, virqfd_shutdown);
126         INIT_WORK(&virqfd->inject, virqfd_inject);
127
128         irqfd = fdget(fd);
129         if (!irqfd.file) {
130                 ret = -EBADF;
131                 goto err_fd;
132         }
133
134         ctx = eventfd_ctx_fileget(irqfd.file);
135         if (IS_ERR(ctx)) {
136                 ret = PTR_ERR(ctx);
137                 goto err_ctx;
138         }
139
140         virqfd->eventfd = ctx;
141
142         /*
143          * virqfds can be released by closing the eventfd or directly
144          * through ioctl.  These are both done through a workqueue, so
145          * we update the pointer to the virqfd under lock to avoid
146          * pushing multiple jobs to release the same virqfd.
147          */
148         spin_lock_irq(&virqfd_lock);
149
150         if (*pvirqfd) {
151                 spin_unlock_irq(&virqfd_lock);
152                 ret = -EBUSY;
153                 goto err_busy;
154         }
155         *pvirqfd = virqfd;
156
157         spin_unlock_irq(&virqfd_lock);
158
159         /*
160          * Install our own custom wake-up handling so we are notified via
161          * a callback whenever someone signals the underlying eventfd.
162          */
163         init_waitqueue_func_entry(&virqfd->wait, virqfd_wakeup);
164         init_poll_funcptr(&virqfd->pt, virqfd_ptable_queue_proc);
165
166         events = vfs_poll(irqfd.file, &virqfd->pt);
167
168         /*
169          * Check if there was an event already pending on the eventfd
170          * before we registered and trigger it as if we didn't miss it.
171          */
172         if (events & EPOLLIN) {
173                 if ((!handler || handler(opaque, data)) && thread)
174                         schedule_work(&virqfd->inject);
175         }
176
177         /*
178          * Do not drop the file until the irqfd is fully initialized,
179          * otherwise we might race against the EPOLLHUP.
180          */
181         fdput(irqfd);
182
183         return 0;
184 err_busy:
185         eventfd_ctx_put(ctx);
186 err_ctx:
187         fdput(irqfd);
188 err_fd:
189         kfree(virqfd);
190
191         return ret;
192 }
193 EXPORT_SYMBOL_GPL(vfio_virqfd_enable);
194
195 void vfio_virqfd_disable(struct virqfd **pvirqfd)
196 {
197         unsigned long flags;
198
199         spin_lock_irqsave(&virqfd_lock, flags);
200
201         if (*pvirqfd) {
202                 virqfd_deactivate(*pvirqfd);
203                 *pvirqfd = NULL;
204         }
205
206         spin_unlock_irqrestore(&virqfd_lock, flags);
207
208         /*
209          * Block until we know all outstanding shutdown jobs have completed.
210          * Even if we don't queue the job, flush the wq to be sure it's
211          * been released.
212          */
213         flush_workqueue(vfio_irqfd_cleanup_wq);
214 }
215 EXPORT_SYMBOL_GPL(vfio_virqfd_disable);