io_uring: banish non-hot data to end of io_ring_ctx
authorPavel Begunkov <asml.silence@gmail.com>
Thu, 24 Aug 2023 22:53:34 +0000 (23:53 +0100)
committerJens Axboe <axboe@kernel.dk>
Thu, 24 Aug 2023 23:16:20 +0000 (17:16 -0600)
Let's move all slow path, setup/init and so on fields to the end of
io_ring_ctx, that makes ctx reorganisation later easier. That includes,
page arrays used only on tear down, CQ overflow list, old provided
buffer caches and used by io-wq poll hashes.

Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
Link: https://lore.kernel.org/r/fc471b63925a0bf90a34943c4d36163c523cfb43.1692916914.git.asml.silence@gmail.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
include/linux/io_uring_types.h

index ad87d60..72e6097 100644 (file)
@@ -211,20 +211,11 @@ struct io_ring_ctx {
                unsigned int            drain_disabled: 1;
                unsigned int            compat: 1;
 
-               enum task_work_notify_mode      notify_method;
+               struct task_struct      *submitter_task;
+               struct io_rings         *rings;
+               struct percpu_ref       refs;
 
-               /*
-                * If IORING_SETUP_NO_MMAP is used, then the below holds
-                * the gup'ed pages for the two rings, and the sqes.
-                */
-               unsigned short          n_ring_pages;
-               unsigned short          n_sqe_pages;
-               struct page             **ring_pages;
-               struct page             **sqe_pages;
-
-               struct io_rings                 *rings;
-               struct task_struct              *submitter_task;
-               struct percpu_ref               refs;
+               enum task_work_notify_mode      notify_method;
        } ____cacheline_aligned_in_smp;
 
        /* submission data */
@@ -262,10 +253,8 @@ struct io_ring_ctx {
 
                struct io_buffer_list   *io_bl;
                struct xarray           io_bl_xa;
-               struct list_head        io_buffers_cache;
 
                struct io_hash_table    cancel_table_locked;
-               struct list_head        cq_overflow_list;
                struct io_alloc_cache   apoll_cache;
                struct io_alloc_cache   netmsg_cache;
        } ____cacheline_aligned_in_smp;
@@ -298,11 +287,8 @@ struct io_ring_ctx {
                 * manipulate the list, hence no extra locking is needed there.
                 */
                struct io_wq_work_list  iopoll_list;
-               struct io_hash_table    cancel_table;
 
                struct llist_head       work_llist;
-
-               struct list_head        io_buffers_comp;
        } ____cacheline_aligned_in_smp;
 
        /* timeouts */
@@ -318,6 +304,10 @@ struct io_ring_ctx {
        struct io_wq_work_list  locked_free_list;
        unsigned int            locked_free_nr;
 
+       struct list_head        io_buffers_comp;
+       struct list_head        cq_overflow_list;
+       struct io_hash_table    cancel_table;
+
        const struct cred       *sq_creds;      /* cred used for __io_sq_thread() */
        struct io_sq_data       *sq_data;       /* if using sq thread polling */
 
@@ -332,6 +322,8 @@ struct io_ring_ctx {
        struct xarray           personalities;
        u32                     pers_next;
 
+       struct list_head        io_buffers_cache;
+
        /* Keep this last, we don't need it for the fast path */
        struct wait_queue_head          poll_wq;
        struct io_restriction           restrictions;
@@ -375,6 +367,15 @@ struct io_ring_ctx {
        unsigned                        sq_thread_idle;
        /* protected by ->completion_lock */
        unsigned                        evfd_last_cq_tail;
+
+       /*
+        * If IORING_SETUP_NO_MMAP is used, then the below holds
+        * the gup'ed pages for the two rings, and the sqes.
+        */
+       unsigned short                  n_ring_pages;
+       unsigned short                  n_sqe_pages;
+       struct page                     **ring_pages;
+       struct page                     **sqe_pages;
 };
 
 struct io_tw_state {