int ops = atomic_xchg(&ev_fd->ops, 0);
if (ops & BIT(IO_EVENTFD_OP_SIGNAL_BIT))
- eventfd_signal(ev_fd->cq_ev_fd, 1);
+ eventfd_signal_mask(ev_fd->cq_ev_fd, 1, EPOLL_URING_WAKE);
/* IO_EVENTFD_OP_FREE_BIT may not be set here depending on callback
* ordering in a race but if references are 0 we know we have to free
goto out;
if (likely(eventfd_signal_allowed())) {
- eventfd_signal(ev_fd->cq_ev_fd, 1);
+ eventfd_signal_mask(ev_fd->cq_ev_fd, 1, EPOLL_URING_WAKE);
} else {
atomic_inc(&ev_fd->refs);
if (!atomic_fetch_or(BIT(IO_EVENTFD_OP_SIGNAL_BIT), &ev_fd->ops))
#include <linux/errno.h>
#include <linux/lockdep.h>
#include <linux/io_uring_types.h>
+#include <uapi/linux/eventpoll.h>
#include "io-wq.h"
#include "slist.h"
#include "filetable.h"
static inline void __io_cqring_wake(struct io_ring_ctx *ctx)
{
/*
- * wake_up_all() may seem excessive, but io_wake_function() and
- * io_should_wake() handle the termination of the loop and only
- * wake as many waiters as we need to.
+ * Trigger waitqueue handler on all waiters on our waitqueue. This
+ * won't necessarily wake up all the tasks, io_should_wake() will make
+ * that decision.
+ *
+ * Pass in EPOLLIN|EPOLL_URING_WAKE as the poll wakeup key. The latter
+ * set in the mask so that if we recurse back into our own poll
+ * waitqueue handlers, we know we have a dependency between eventfd or
+ * epoll and should terminate multishot poll at that point.
*/
if (waitqueue_active(&ctx->cq_wait))
- wake_up_all(&ctx->cq_wait);
+ __wake_up(&ctx->cq_wait, TASK_NORMAL, 0,
+ poll_to_key(EPOLL_URING_WAKE | EPOLLIN));
}
static inline void io_cqring_wake(struct io_ring_ctx *ctx)
return 0;
if (io_poll_get_ownership(req)) {
+ /*
+ * If we trigger a multishot poll off our own wakeup path,
+ * disable multishot as there is a circular dependency between
+ * CQ posting and triggering the event.
+ */
+ if (mask & EPOLL_URING_WAKE)
+ poll->events |= EPOLLONESHOT;
+
/* optional, saves extra locking for removal in tw handler */
if (mask && poll->events & EPOLLONESHOT) {
list_del_init(&poll->wait.entry);