};
};
+/*
+ * Arbitrary limit, can be raised if need be
+ */
+#define IO_RINGFD_REG_MAX 16
+
struct io_uring_task {
/* submission side */
int cached_refs;
struct io_wq_work_list task_list;
struct io_wq_work_list prior_task_list;
struct callback_head task_work;
+ struct file **registered_rings;
bool task_running;
};
if (unlikely(!tctx))
return -ENOMEM;
+ tctx->registered_rings = kcalloc(IO_RINGFD_REG_MAX,
+ sizeof(struct file *), GFP_KERNEL);
+ if (unlikely(!tctx->registered_rings)) {
+ kfree(tctx);
+ return -ENOMEM;
+ }
+
ret = percpu_counter_init(&tctx->inflight, 0, GFP_KERNEL);
if (unlikely(ret)) {
+ kfree(tctx->registered_rings);
kfree(tctx);
return ret;
}
if (IS_ERR(tctx->io_wq)) {
ret = PTR_ERR(tctx->io_wq);
percpu_counter_destroy(&tctx->inflight);
+ kfree(tctx->registered_rings);
kfree(tctx);
return ret;
}
WARN_ON_ONCE(tctx->io_wq);
WARN_ON_ONCE(tctx->cached_refs);
+ kfree(tctx->registered_rings);
percpu_counter_destroy(&tctx->inflight);
kfree(tctx);
tsk->io_uring = NULL;
io_uring_cancel_generic(cancel_all, NULL);
}
+void io_uring_unreg_ringfd(void)
+{
+ struct io_uring_task *tctx = current->io_uring;
+ int i;
+
+ for (i = 0; i < IO_RINGFD_REG_MAX; i++) {
+ if (tctx->registered_rings[i]) {
+ fput(tctx->registered_rings[i]);
+ tctx->registered_rings[i] = NULL;
+ }
+ }
+}
+
+static int io_ring_add_registered_fd(struct io_uring_task *tctx, int fd,
+ int start, int end)
+{
+ struct file *file;
+ int offset;
+
+ for (offset = start; offset < end; offset++) {
+ offset = array_index_nospec(offset, IO_RINGFD_REG_MAX);
+ if (tctx->registered_rings[offset])
+ continue;
+
+ file = fget(fd);
+ if (!file) {
+ return -EBADF;
+ } else if (file->f_op != &io_uring_fops) {
+ fput(file);
+ return -EOPNOTSUPP;
+ }
+ tctx->registered_rings[offset] = file;
+ return offset;
+ }
+
+ return -EBUSY;
+}
+
+/*
+ * Register a ring fd to avoid fdget/fdput for each io_uring_enter()
+ * invocation. User passes in an array of struct io_uring_rsrc_update
+ * with ->data set to the ring_fd, and ->offset given for the desired
+ * index. If no index is desired, application may set ->offset == -1U
+ * and we'll find an available index. Returns number of entries
+ * successfully processed, or < 0 on error if none were processed.
+ */
+static int io_ringfd_register(struct io_ring_ctx *ctx, void __user *__arg,
+ unsigned nr_args)
+{
+ struct io_uring_rsrc_update __user *arg = __arg;
+ struct io_uring_rsrc_update reg;
+ struct io_uring_task *tctx;
+ int ret, i;
+
+ if (!nr_args || nr_args > IO_RINGFD_REG_MAX)
+ return -EINVAL;
+
+ mutex_unlock(&ctx->uring_lock);
+ ret = io_uring_add_tctx_node(ctx);
+ mutex_lock(&ctx->uring_lock);
+ if (ret)
+ return ret;
+
+ tctx = current->io_uring;
+ for (i = 0; i < nr_args; i++) {
+ int start, end;
+
+ if (copy_from_user(®, &arg[i], sizeof(reg))) {
+ ret = -EFAULT;
+ break;
+ }
+
+ if (reg.offset == -1U) {
+ start = 0;
+ end = IO_RINGFD_REG_MAX;
+ } else {
+ if (reg.offset >= IO_RINGFD_REG_MAX) {
+ ret = -EINVAL;
+ break;
+ }
+ start = reg.offset;
+ end = start + 1;
+ }
+
+ ret = io_ring_add_registered_fd(tctx, reg.data, start, end);
+ if (ret < 0)
+ break;
+
+ reg.offset = ret;
+ if (copy_to_user(&arg[i], ®, sizeof(reg))) {
+ fput(tctx->registered_rings[reg.offset]);
+ tctx->registered_rings[reg.offset] = NULL;
+ ret = -EFAULT;
+ break;
+ }
+ }
+
+ return i ? i : ret;
+}
+
+static int io_ringfd_unregister(struct io_ring_ctx *ctx, void __user *__arg,
+ unsigned nr_args)
+{
+ struct io_uring_rsrc_update __user *arg = __arg;
+ struct io_uring_task *tctx = current->io_uring;
+ struct io_uring_rsrc_update reg;
+ int ret = 0, i;
+
+ if (!nr_args || nr_args > IO_RINGFD_REG_MAX)
+ return -EINVAL;
+ if (!tctx)
+ return 0;
+
+ for (i = 0; i < nr_args; i++) {
+ if (copy_from_user(®, &arg[i], sizeof(reg))) {
+ ret = -EFAULT;
+ break;
+ }
+ if (reg.offset >= IO_RINGFD_REG_MAX) {
+ ret = -EINVAL;
+ break;
+ }
+
+ reg.offset = array_index_nospec(reg.offset, IO_RINGFD_REG_MAX);
+ if (tctx->registered_rings[reg.offset]) {
+ fput(tctx->registered_rings[reg.offset]);
+ tctx->registered_rings[reg.offset] = NULL;
+ }
+ }
+
+ return i ? i : ret;
+}
+
static void *io_uring_validate_mmap_request(struct file *file,
loff_t pgoff, size_t sz)
{
io_run_task_work();
if (unlikely(flags & ~(IORING_ENTER_GETEVENTS | IORING_ENTER_SQ_WAKEUP |
- IORING_ENTER_SQ_WAIT | IORING_ENTER_EXT_ARG)))
+ IORING_ENTER_SQ_WAIT | IORING_ENTER_EXT_ARG |
+ IORING_ENTER_REGISTERED_RING)))
return -EINVAL;
- f = fdget(fd);
- if (unlikely(!f.file))
- return -EBADF;
+ /*
+ * Ring fd has been registered via IORING_REGISTER_RING_FDS, we
+ * need only dereference our task private array to find it.
+ */
+ if (flags & IORING_ENTER_REGISTERED_RING) {
+ struct io_uring_task *tctx = current->io_uring;
+
+ if (!tctx || fd >= IO_RINGFD_REG_MAX)
+ return -EINVAL;
+ fd = array_index_nospec(fd, IO_RINGFD_REG_MAX);
+ f.file = tctx->registered_rings[fd];
+ if (unlikely(!f.file))
+ return -EBADF;
+ } else {
+ f = fdget(fd);
+ if (unlikely(!f.file))
+ return -EBADF;
+ }
ret = -EOPNOTSUPP;
if (unlikely(f.file->f_op != &io_uring_fops))
out:
percpu_ref_put(&ctx->refs);
out_fput:
- fdput(f);
+ if (!(flags & IORING_ENTER_REGISTERED_RING))
+ fdput(f);
return submitted ? submitted : ret;
}
break;
ret = io_register_iowq_max_workers(ctx, arg);
break;
+ case IORING_REGISTER_RING_FDS:
+ ret = io_ringfd_register(ctx, arg, nr_args);
+ break;
+ case IORING_UNREGISTER_RING_FDS:
+ ret = io_ringfd_unregister(ctx, arg, nr_args);
+ break;
default:
ret = -EINVAL;
break;