From e342c807f556dbcee1370ab78af1d8faf497d771 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Tue, 19 Jan 2021 13:32:47 +0000 Subject: [PATCH] io_uring: save atomic dec for inline executed reqs When a request is completed with comp_state, its completion reference put is deferred to io_submit_flush_completions(), but the submission is put not far from there, so do it together to save one atomic dec per request. That targets requests that complete inline, e.g. buffered rw, send/recv. Proper benchmarking haven't been conducted but for nops(batch=32) it was around 7901 vs 8117 KIOPS (~2.7%), or ~4% per perf profiling. Signed-off-by: Pavel Begunkov Signed-off-by: Jens Axboe --- fs/io_uring.c | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index 935a16a..3f6d055e 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -629,6 +629,7 @@ enum { REQ_F_NO_FILE_TABLE_BIT, REQ_F_WORK_INITIALIZED_BIT, REQ_F_LTIMEOUT_ACTIVE_BIT, + REQ_F_COMPLETE_INLINE_BIT, /* not a real bit, just to check we're not overflowing the space */ __REQ_F_LAST_BIT, @@ -672,6 +673,8 @@ enum { REQ_F_WORK_INITIALIZED = BIT(REQ_F_WORK_INITIALIZED_BIT), /* linked timeout is active, i.e. prepared by link's head */ REQ_F_LTIMEOUT_ACTIVE = BIT(REQ_F_LTIMEOUT_ACTIVE_BIT), + /* completion is deferred through io_comp_state */ + REQ_F_COMPLETE_INLINE = BIT(REQ_F_COMPLETE_INLINE_BIT), }; struct async_poll { @@ -1917,14 +1920,15 @@ static void io_submit_flush_completions(struct io_comp_state *cs) * io_free_req() doesn't care about completion_lock unless one * of these flags is set. REQ_F_WORK_INITIALIZED is in the list * because of a potential deadlock with req->work.fs->lock + * We defer both, completion and submission refs. */ if (req->flags & (REQ_F_FAIL_LINK|REQ_F_LINK_TIMEOUT |REQ_F_WORK_INITIALIZED)) { spin_unlock_irq(&ctx->completion_lock); - io_put_req(req); + io_double_put_req(req); spin_lock_irq(&ctx->completion_lock); } else { - io_put_req(req); + io_double_put_req(req); } } io_commit_cqring(ctx); @@ -1940,8 +1944,7 @@ static void io_req_complete_state(struct io_kiocb *req, long res, io_clean_op(req); req->result = res; req->compl.cflags = cflags; - list_add_tail(&req->compl.list, &cs->list); - cs->nr++; + req->flags |= REQ_F_COMPLETE_INLINE; } static inline void __io_req_complete(struct io_kiocb *req, long res, @@ -6576,9 +6579,9 @@ again: io_queue_linked_timeout(linked_timeout); } else if (likely(!ret)) { /* drop submission reference */ - if (cs) { - io_put_req(req); - if (cs->nr >= 32) + if (req->flags & REQ_F_COMPLETE_INLINE) { + list_add_tail(&req->compl.list, &cs->list); + if (++cs->nr >= 32) io_submit_flush_completions(cs); req = NULL; } else { -- 2.7.4