From c1e53a6988b9c83dd8bbc759414bc0f13ff1fe0c Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Mon, 4 Oct 2021 20:02:55 +0100 Subject: [PATCH] io_uring: optimise io_free_batch_list() Delay reading the next node in io_free_batch_list(), allows the compiler to load the value a bit later improving register spilling in some cases. With gcc 11.1 it helped to move @task_refs variable from the stack to a register and optimises out a couple of per request instructions. Signed-off-by: Pavel Begunkov Link: https://lore.kernel.org/r/cc9fdfb6f72a4e8bc9918a5e9f2d97869a263ae4.1633373302.git.asml.silence@gmail.com Signed-off-by: Jens Axboe --- fs/io_uring.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index e404c98..1b007f7 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -2280,9 +2280,10 @@ static void io_free_batch_list(struct io_ring_ctx *ctx, struct io_kiocb *req = container_of(node, struct io_kiocb, comp_list); - node = req->comp_list.next; - if (!req_ref_put_and_test(req)) + if (!req_ref_put_and_test(req)) { + node = req->comp_list.next; continue; + } io_queue_next(req); io_dismantle_req(req); @@ -2294,6 +2295,7 @@ static void io_free_batch_list(struct io_ring_ctx *ctx, task_refs = 0; } task_refs++; + node = req->comp_list.next; wq_stack_add_head(&req->comp_list, &ctx->submit_state.free_list); } while (node); -- 2.7.4