io_uring: do ctx initiated file note removal
authorPavel Begunkov <asml.silence@gmail.com>
Sat, 6 Mar 2021 11:02:13 +0000 (11:02 +0000)
committerJens Axboe <axboe@kernel.dk>
Sun, 7 Mar 2021 21:12:43 +0000 (14:12 -0700)
Another preparation patch. When full quiesce is done on ctx exit, use
task_work infra to remove corresponding to the ctx io_uring->xa entries.
For that we use the back tctx map. Also use ->in_idle to prevent
removing it while we traversing ->xa on cancellation, just ignore it.

Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
fs/io_uring.c

index 9a2cff0662e01d5efa4624c908f0ad0db178401c..8a4ab86ae64f8954b202e47d5ec26130ecea74e4 100644 (file)
@@ -987,6 +987,7 @@ static const struct io_op_def io_op_defs[] = {
        [IORING_OP_UNLINKAT] = {},
 };
 
+static void io_uring_del_task_file(unsigned long index);
 static void io_uring_try_cancel_requests(struct io_ring_ctx *ctx,
                                         struct task_struct *task,
                                         struct files_struct *files);
@@ -8536,10 +8537,33 @@ static bool io_run_ctx_fallback(struct io_ring_ctx *ctx)
        return executed;
 }
 
+struct io_tctx_exit {
+       struct callback_head            task_work;
+       struct completion               completion;
+       unsigned long                   index;
+};
+
+static void io_tctx_exit_cb(struct callback_head *cb)
+{
+       struct io_uring_task *tctx = current->io_uring;
+       struct io_tctx_exit *work;
+
+       work = container_of(cb, struct io_tctx_exit, task_work);
+       /*
+        * When @in_idle, we're in cancellation and it's racy to remove the
+        * node. It'll be removed by the end of cancellation, just ignore it.
+        */
+       if (!atomic_read(&tctx->in_idle))
+               io_uring_del_task_file(work->index);
+       complete(&work->completion);
+}
+
 static void io_ring_exit_work(struct work_struct *work)
 {
-       struct io_ring_ctx *ctx = container_of(work, struct io_ring_ctx,
-                                              exit_work);
+       struct io_ring_ctx *ctx = container_of(work, struct io_ring_ctx, exit_work);
+       struct io_tctx_exit exit;
+       struct io_tctx_node *node;
+       int ret;
 
        /*
         * If we're doing polled IO and end up having requests being
@@ -8550,6 +8574,26 @@ static void io_ring_exit_work(struct work_struct *work)
        do {
                io_uring_try_cancel_requests(ctx, NULL, NULL);
        } while (!wait_for_completion_timeout(&ctx->ref_comp, HZ/20));
+
+       mutex_lock(&ctx->uring_lock);
+       while (!list_empty(&ctx->tctx_list)) {
+               node = list_first_entry(&ctx->tctx_list, struct io_tctx_node,
+                                       ctx_node);
+               exit.index = (unsigned long)node->file;
+               init_completion(&exit.completion);
+               init_task_work(&exit.task_work, io_tctx_exit_cb);
+               ret = task_work_add(node->task, &exit.task_work, TWA_SIGNAL);
+               if (WARN_ON_ONCE(ret))
+                       continue;
+               wake_up_process(node->task);
+
+               mutex_unlock(&ctx->uring_lock);
+               wait_for_completion(&exit.completion);
+               cond_resched();
+               mutex_lock(&ctx->uring_lock);
+       }
+       mutex_unlock(&ctx->uring_lock);
+
        io_ring_ctx_free(ctx);
 }