From d73a572df24661851465c821d33c03e70e4b68e5 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Thu, 6 Apr 2023 14:20:08 +0100 Subject: [PATCH] io_uring: optimize local tw add ctx pinning We currently pin the ctx for io_req_local_work_add() with percpu_ref_get/put, which implies two rcu_read_lock/unlock pairs and some extra overhead on top in the fast path. Replace it with a pure rcu read and let io_ring_exit_work() synchronise against it. Signed-off-by: Pavel Begunkov Link: https://lore.kernel.org/r/cbdfcb6b232627f30e9e50ef91f13c4f05910247.1680782017.git.asml.silence@gmail.com Signed-off-by: Jens Axboe --- io_uring/io_uring.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c index 29a0516..fb7215b 100644 --- a/io_uring/io_uring.c +++ b/io_uring/io_uring.c @@ -1332,9 +1332,9 @@ void __io_req_task_work_add(struct io_kiocb *req, bool allow_local) struct io_ring_ctx *ctx = req->ctx; if (allow_local && ctx->flags & IORING_SETUP_DEFER_TASKRUN) { - percpu_ref_get(&ctx->refs); + rcu_read_lock(); io_req_local_work_add(req); - percpu_ref_put(&ctx->refs); + rcu_read_unlock(); return; } @@ -3052,6 +3052,10 @@ static __cold void io_ring_exit_work(struct work_struct *work) spin_lock(&ctx->completion_lock); spin_unlock(&ctx->completion_lock); + /* pairs with RCU read section in io_req_local_work_add() */ + if (ctx->flags & IORING_SETUP_DEFER_TASKRUN) + synchronize_rcu(); + io_ring_ctx_free(ctx); } -- 2.7.4