io_uring: use TWA_SIGNAL_NO_IPI if IORING_SETUP_COOP_TASKRUN is used
authorJens Axboe <axboe@kernel.dk>
Tue, 26 Apr 2022 01:49:03 +0000 (19:49 -0600)
committerJens Axboe <axboe@kernel.dk>
Sat, 30 Apr 2022 14:39:54 +0000 (08:39 -0600)
If this is set, io_uring will never use an IPI to deliver a task_work
notification. This can be used in the common case where a single task or
thread communicates with the ring, and doesn't rely on
io_uring_cqe_peek().

This provides a noticeable win in performance, both from eliminating
the IPI itself, but also from avoiding interrupting the submitting
task unnecessarily.

Reviewed-by: Pavel Begunkov <asml.silence@gmail.com>
Link: https://lore.kernel.org/r/20220426014904.60384-6-axboe@kernel.dk
Signed-off-by: Jens Axboe <axboe@kernel.dk>
fs/io_uring.c
include/uapi/linux/io_uring.h

index 3c669d8f5e57fb393c50c8ad7e817d25fc9db81e..0b9ae36159118e8fcce54e6d0acb674a849126f3 100644 (file)
@@ -11327,12 +11327,20 @@ static __cold int io_uring_create(unsigned entries, struct io_uring_params *p,
                ctx->user = get_uid(current_user());
 
        /*
-        * For SQPOLL, we just need a wakeup, always.
+        * For SQPOLL, we just need a wakeup, always. For !SQPOLL, if
+        * COOP_TASKRUN is set, then IPIs are never needed by the app.
         */
-       if (ctx->flags & IORING_SETUP_SQPOLL)
+       ret = -EINVAL;
+       if (ctx->flags & IORING_SETUP_SQPOLL) {
+               /* IPI related flags don't make sense with SQPOLL */
+               if (ctx->flags & IORING_SETUP_COOP_TASKRUN)
+                       goto err;
                ctx->notify_method = TWA_SIGNAL_NO_IPI;
-       else
+       } else if (ctx->flags & IORING_SETUP_COOP_TASKRUN) {
+               ctx->notify_method = TWA_SIGNAL_NO_IPI;
+       } else {
                ctx->notify_method = TWA_SIGNAL;
+       }
 
        /*
         * This is just grabbed for accounting purposes. When a process exits,
@@ -11431,7 +11439,8 @@ static long io_uring_setup(u32 entries, struct io_uring_params __user *params)
        if (p.flags & ~(IORING_SETUP_IOPOLL | IORING_SETUP_SQPOLL |
                        IORING_SETUP_SQ_AFF | IORING_SETUP_CQSIZE |
                        IORING_SETUP_CLAMP | IORING_SETUP_ATTACH_WQ |
-                       IORING_SETUP_R_DISABLED | IORING_SETUP_SUBMIT_ALL))
+                       IORING_SETUP_R_DISABLED | IORING_SETUP_SUBMIT_ALL |
+                       IORING_SETUP_COOP_TASKRUN))
                return -EINVAL;
 
        return  io_uring_create(entries, &p, params);
index 980d82eb196e833a9c2375c8cfb1d17896567229..a84f29d657c3360bc4b9a21dbad6dc2f87d640ae 100644 (file)
@@ -102,6 +102,14 @@ enum {
 #define IORING_SETUP_ATTACH_WQ (1U << 5)       /* attach to existing wq */
 #define IORING_SETUP_R_DISABLED        (1U << 6)       /* start with ring disabled */
 #define IORING_SETUP_SUBMIT_ALL        (1U << 7)       /* continue submit on error */
+/*
+ * Cooperative task running. When requests complete, they often require
+ * forcing the submitter to transition to the kernel to complete. If this
+ * flag is set, work will be done when the task transitions anyway, rather
+ * than force an inter-processor interrupt reschedule. This avoids interrupting
+ * a task running in userspace, and saves an IPI.
+ */
+#define IORING_SETUP_COOP_TASKRUN      (1U << 8)
 
 enum {
        IORING_OP_NOP,