io_uring: Support calling io_uring_register with a registered ring fd
authorJosh Triplett <josh@joshtriplett.org>
Wed, 15 Feb 2023 00:42:22 +0000 (16:42 -0800)
committerJens Axboe <axboe@kernel.dk>
Thu, 16 Feb 2023 13:09:30 +0000 (06:09 -0700)
Add a new flag IORING_REGISTER_USE_REGISTERED_RING (set via the high bit
of the opcode) to treat the fd as a registered index rather than a file
descriptor.

This makes it possible for a library to open an io_uring, register the
ring fd, close the ring fd, and subsequently use the ring entirely via
registered index.

Signed-off-by: Josh Triplett <josh@joshtriplett.org>
Link: https://lore.kernel.org/r/f2396369e638284586b069dbddffb8c992afba95.1676419314.git.josh@joshtriplett.org
[axboe: remove extra high bit clear]
Signed-off-by: Jens Axboe <axboe@kernel.dk>
include/uapi/linux/io_uring.h
io_uring/io_uring.c

index 636a4c2..97661a6 100644 (file)
@@ -472,6 +472,7 @@ struct io_uring_params {
 #define IORING_FEAT_RSRC_TAGS          (1U << 10)
 #define IORING_FEAT_CQE_SKIP           (1U << 11)
 #define IORING_FEAT_LINKED_FILE                (1U << 12)
+#define IORING_FEAT_REG_REG_RING       (1U << 13)
 
 /*
  * io_uring_register(2) opcodes and arguments
@@ -519,7 +520,10 @@ enum {
        IORING_REGISTER_FILE_ALLOC_RANGE        = 25,
 
        /* this goes last */
-       IORING_REGISTER_LAST
+       IORING_REGISTER_LAST,
+
+       /* flag added to the opcode to use a registered ring fd */
+       IORING_REGISTER_USE_REGISTERED_RING     = 1U << 31
 };
 
 /* io-wq worker categories */
index 0e42160..3b915de 100644 (file)
@@ -3785,7 +3785,7 @@ static __cold int io_uring_create(unsigned entries, struct io_uring_params *p,
                        IORING_FEAT_POLL_32BITS | IORING_FEAT_SQPOLL_NONFIXED |
                        IORING_FEAT_EXT_ARG | IORING_FEAT_NATIVE_WORKERS |
                        IORING_FEAT_RSRC_TAGS | IORING_FEAT_CQE_SKIP |
-                       IORING_FEAT_LINKED_FILE;
+                       IORING_FEAT_LINKED_FILE | IORING_FEAT_REG_REG_RING;
 
        if (copy_to_user(params, p, sizeof(*p))) {
                ret = -EFAULT;
@@ -4306,17 +4306,36 @@ SYSCALL_DEFINE4(io_uring_register, unsigned int, fd, unsigned int, opcode,
        struct io_ring_ctx *ctx;
        long ret = -EBADF;
        struct fd f;
+       bool use_registered_ring;
+
+       use_registered_ring = !!(opcode & IORING_REGISTER_USE_REGISTERED_RING);
+       opcode &= ~IORING_REGISTER_USE_REGISTERED_RING;
 
        if (opcode >= IORING_REGISTER_LAST)
                return -EINVAL;
 
-       f = fdget(fd);
-       if (!f.file)
-               return -EBADF;
+       if (use_registered_ring) {
+               /*
+                * Ring fd has been registered via IORING_REGISTER_RING_FDS, we
+                * need only dereference our task private array to find it.
+                */
+               struct io_uring_task *tctx = current->io_uring;
 
-       ret = -EOPNOTSUPP;
-       if (!io_is_uring_fops(f.file))
-               goto out_fput;
+               if (unlikely(!tctx || fd >= IO_RINGFD_REG_MAX))
+                       return -EINVAL;
+               fd = array_index_nospec(fd, IO_RINGFD_REG_MAX);
+               f.file = tctx->registered_rings[fd];
+               f.flags = 0;
+               if (unlikely(!f.file))
+                       return -EBADF;
+       } else {
+               f = fdget(fd);
+               if (unlikely(!f.file))
+                       return -EBADF;
+               ret = -EOPNOTSUPP;
+               if (!io_is_uring_fops(f.file))
+                       goto out_fput;
+       }
 
        ctx = f.file->private_data;