From 206aefde4f886fdeb3b6339aacab3a85fb74cb7e Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Thu, 7 Nov 2019 18:27:42 -0700 Subject: [PATCH] io_uring: reduce/pack size of io_ring_ctx With the recent flurry of additions and changes to io_uring, the layout of io_ring_ctx has become a bit stale. We're right now at 704 bytes in size on my x86-64 build, or 11 cachelines. This patch does two things: - We have to completion structs embedded, that we only use for quiesce of the ctx (or shutdown) and for sqthread init cases. That 2x32 bytes right there, let's dynamically allocate them. - Reorder the struct a bit with an eye on cachelines, use cases, and holes. With this patch, we're down to 512 bytes, or 8 cachelines. Reviewed-by: Jackie Liu Signed-off-by: Jens Axboe --- fs/io_uring.c | 69 ++++++++++++++++++++++++++++++++--------------------------- 1 file changed, 38 insertions(+), 31 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index 02a4f5e..710eb27 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -204,6 +204,7 @@ struct io_ring_ctx { unsigned sq_mask; unsigned sq_thread_idle; unsigned cached_sq_dropped; + atomic_t cached_cq_overflow; struct io_uring_sqe *sq_sqes; struct list_head defer_list; @@ -213,25 +214,13 @@ struct io_ring_ctx { wait_queue_head_t inflight_wait; } ____cacheline_aligned_in_smp; + struct io_rings *rings; + /* IO offload */ struct io_wq *io_wq; struct task_struct *sqo_thread; /* if using sq thread polling */ struct mm_struct *sqo_mm; wait_queue_head_t sqo_wait; - struct completion sqo_thread_started; - - struct { - unsigned cached_cq_tail; - atomic_t cached_cq_overflow; - unsigned cq_entries; - unsigned cq_mask; - struct wait_queue_head cq_wait; - struct fasync_struct *cq_fasync; - struct eventfd_ctx *cq_ev_fd; - atomic_t cq_timeouts; - } ____cacheline_aligned_in_smp; - - struct io_rings *rings; /* * If used, fixed file set. Writers must ensure that ->refs is dead, @@ -247,7 +236,22 @@ struct io_ring_ctx { struct user_struct *user; - struct completion ctx_done; + /* 0 is for ctx quiesce/reinit/free, 1 is for sqo_thread started */ + struct completion *completions; + +#if defined(CONFIG_UNIX) + struct socket *ring_sock; +#endif + + struct { + unsigned cached_cq_tail; + unsigned cq_entries; + unsigned cq_mask; + atomic_t cq_timeouts; + struct wait_queue_head cq_wait; + struct fasync_struct *cq_fasync; + struct eventfd_ctx *cq_ev_fd; + } ____cacheline_aligned_in_smp; struct { struct mutex uring_lock; @@ -269,10 +273,6 @@ struct io_ring_ctx { spinlock_t inflight_lock; struct list_head inflight_list; } ____cacheline_aligned_in_smp; - -#if defined(CONFIG_UNIX) - struct socket *ring_sock; -#endif }; struct sqe_submit { @@ -397,7 +397,7 @@ static void io_ring_ctx_ref_free(struct percpu_ref *ref) { struct io_ring_ctx *ctx = container_of(ref, struct io_ring_ctx, refs); - complete(&ctx->ctx_done); + complete(&ctx->completions[0]); } static struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p) @@ -408,17 +408,19 @@ static struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p) if (!ctx) return NULL; + ctx->completions = kmalloc(2 * sizeof(struct completion), GFP_KERNEL); + if (!ctx->completions) + goto err; + if (percpu_ref_init(&ctx->refs, io_ring_ctx_ref_free, - PERCPU_REF_ALLOW_REINIT, GFP_KERNEL)) { - kfree(ctx); - return NULL; - } + PERCPU_REF_ALLOW_REINIT, GFP_KERNEL)) + goto err; ctx->flags = p->flags; init_waitqueue_head(&ctx->cq_wait); INIT_LIST_HEAD(&ctx->cq_overflow_list); - init_completion(&ctx->ctx_done); - init_completion(&ctx->sqo_thread_started); + init_completion(&ctx->completions[0]); + init_completion(&ctx->completions[1]); mutex_init(&ctx->uring_lock); init_waitqueue_head(&ctx->wait); spin_lock_init(&ctx->completion_lock); @@ -430,6 +432,10 @@ static struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p) spin_lock_init(&ctx->inflight_lock); INIT_LIST_HEAD(&ctx->inflight_list); return ctx; +err: + kfree(ctx->completions); + kfree(ctx); + return NULL; } static inline bool __io_sequence_defer(struct io_ring_ctx *ctx, @@ -3046,7 +3052,7 @@ static int io_sq_thread(void *data) unsigned inflight; unsigned long timeout; - complete(&ctx->sqo_thread_started); + complete(&ctx->completions[1]); old_fs = get_fs(); set_fs(USER_DS); @@ -3286,7 +3292,7 @@ static int io_sqe_files_unregister(struct io_ring_ctx *ctx) static void io_sq_thread_stop(struct io_ring_ctx *ctx) { if (ctx->sqo_thread) { - wait_for_completion(&ctx->sqo_thread_started); + wait_for_completion(&ctx->completions[1]); /* * The park is a bit of a work-around, without it we get * warning spews on shutdown with SQPOLL set and affinity @@ -4108,6 +4114,7 @@ static void io_ring_ctx_free(struct io_ring_ctx *ctx) io_unaccount_mem(ctx->user, ring_pages(ctx->sq_entries, ctx->cq_entries)); free_uid(ctx->user); + kfree(ctx->completions); kfree(ctx); } @@ -4152,7 +4159,7 @@ static void io_ring_ctx_wait_and_kill(struct io_ring_ctx *ctx) io_iopoll_reap_events(ctx); io_cqring_overflow_flush(ctx, true); - wait_for_completion(&ctx->ctx_done); + wait_for_completion(&ctx->completions[0]); io_ring_ctx_free(ctx); } @@ -4555,7 +4562,7 @@ static int __io_uring_register(struct io_ring_ctx *ctx, unsigned opcode, * no new references will come in after we've killed the percpu ref. */ mutex_unlock(&ctx->uring_lock); - wait_for_completion(&ctx->ctx_done); + wait_for_completion(&ctx->completions[0]); mutex_lock(&ctx->uring_lock); switch (opcode) { @@ -4598,7 +4605,7 @@ static int __io_uring_register(struct io_ring_ctx *ctx, unsigned opcode, } /* bring the ctx back to life */ - reinit_completion(&ctx->ctx_done); + reinit_completion(&ctx->completions[0]); percpu_ref_reinit(&ctx->refs); return ret; } -- 2.7.4