Merge tag 'for-6.4/io_uring-2023-05-07' of git://git.kernel.dk/linux
authorLinus Torvalds <torvalds@linux-foundation.org>
Sun, 7 May 2023 17:00:09 +0000 (10:00 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Sun, 7 May 2023 17:00:09 +0000 (10:00 -0700)
Pull more io_uring updates from Jens Axboe:
 "Nothing major in here, just two different parts:

   - A small series from Breno that enables passing the full SQE down
     for ->uring_cmd().

     This is a prerequisite for enabling full network socket operations.
     Queued up a bit late because of some stylistic concerns that got
     resolved, would be nice to have this in 6.4-rc1 so the dependent
     work will be easier to handle for 6.5.

   - Fix for the huge page coalescing, which was a regression introduced
     in the 6.3 kernel release (Tobias)"

* tag 'for-6.4/io_uring-2023-05-07' of git://git.kernel.dk/linux:
  io_uring: Remove unnecessary BUILD_BUG_ON
  io_uring: Pass whole sqe to commands
  io_uring: Create a helper to return the SQE size
  io_uring/rsrc: check for nonconsecutive pages

drivers/block/ublk_drv.c
drivers/nvme/host/ioctl.c
include/linux/io_uring.h
io_uring/io_uring.h
io_uring/opdef.c
io_uring/rsrc.c
io_uring/uring_cmd.c
io_uring/uring_cmd.h

index 72a5cde..c7331f5 100644 (file)
@@ -1035,7 +1035,7 @@ static int ublk_ch_mmap(struct file *filp, struct vm_area_struct *vma)
 }
 
 static void ublk_commit_completion(struct ublk_device *ub,
-               struct ublksrv_io_cmd *ub_cmd)
+               const struct ublksrv_io_cmd *ub_cmd)
 {
        u32 qid = ub_cmd->q_id, tag = ub_cmd->tag;
        struct ublk_queue *ubq = ublk_get_queue(ub, qid);
@@ -1292,7 +1292,7 @@ static inline int ublk_check_cmd_op(u32 cmd_op)
 
 static int __ublk_ch_uring_cmd(struct io_uring_cmd *cmd,
                               unsigned int issue_flags,
-                              struct ublksrv_io_cmd *ub_cmd)
+                              const struct ublksrv_io_cmd *ub_cmd)
 {
        struct ublk_device *ub = cmd->file->private_data;
        struct ublk_queue *ubq;
@@ -1399,17 +1399,17 @@ static int __ublk_ch_uring_cmd(struct io_uring_cmd *cmd,
 
 static int ublk_ch_uring_cmd(struct io_uring_cmd *cmd, unsigned int issue_flags)
 {
-       struct ublksrv_io_cmd *ub_src = (struct ublksrv_io_cmd *) cmd->cmd;
-       struct ublksrv_io_cmd ub_cmd;
-
        /*
         * Not necessary for async retry, but let's keep it simple and always
         * copy the values to avoid any potential reuse.
         */
-       ub_cmd.q_id = READ_ONCE(ub_src->q_id);
-       ub_cmd.tag = READ_ONCE(ub_src->tag);
-       ub_cmd.result = READ_ONCE(ub_src->result);
-       ub_cmd.addr = READ_ONCE(ub_src->addr);
+       const struct ublksrv_io_cmd *ub_src = io_uring_sqe_cmd(cmd->sqe);
+       const struct ublksrv_io_cmd ub_cmd = {
+               .q_id = READ_ONCE(ub_src->q_id),
+               .tag = READ_ONCE(ub_src->tag),
+               .result = READ_ONCE(ub_src->result),
+               .addr = READ_ONCE(ub_src->addr)
+       };
 
        return __ublk_ch_uring_cmd(cmd, issue_flags, &ub_cmd);
 }
@@ -1619,7 +1619,7 @@ static struct ublk_device *ublk_get_device_from_id(int idx)
 
 static int ublk_ctrl_start_dev(struct ublk_device *ub, struct io_uring_cmd *cmd)
 {
-       struct ublksrv_ctrl_cmd *header = (struct ublksrv_ctrl_cmd *)cmd->cmd;
+       const struct ublksrv_ctrl_cmd *header = io_uring_sqe_cmd(cmd->sqe);
        int ublksrv_pid = (int)header->data[0];
        struct gendisk *disk;
        int ret = -EINVAL;
@@ -1682,7 +1682,7 @@ out_unlock:
 static int ublk_ctrl_get_queue_affinity(struct ublk_device *ub,
                struct io_uring_cmd *cmd)
 {
-       struct ublksrv_ctrl_cmd *header = (struct ublksrv_ctrl_cmd *)cmd->cmd;
+       const struct ublksrv_ctrl_cmd *header = io_uring_sqe_cmd(cmd->sqe);
        void __user *argp = (void __user *)(unsigned long)header->addr;
        cpumask_var_t cpumask;
        unsigned long queue;
@@ -1733,7 +1733,7 @@ static inline void ublk_dump_dev_info(struct ublksrv_ctrl_dev_info *info)
 
 static int ublk_ctrl_add_dev(struct io_uring_cmd *cmd)
 {
-       struct ublksrv_ctrl_cmd *header = (struct ublksrv_ctrl_cmd *)cmd->cmd;
+       const struct ublksrv_ctrl_cmd *header = io_uring_sqe_cmd(cmd->sqe);
        void __user *argp = (void __user *)(unsigned long)header->addr;
        struct ublksrv_ctrl_dev_info info;
        struct ublk_device *ub;
@@ -1910,7 +1910,7 @@ static int ublk_ctrl_del_dev(struct ublk_device **p_ub)
 
 static inline void ublk_ctrl_cmd_dump(struct io_uring_cmd *cmd)
 {
-       struct ublksrv_ctrl_cmd *header = (struct ublksrv_ctrl_cmd *)cmd->cmd;
+       const struct ublksrv_ctrl_cmd *header = io_uring_sqe_cmd(cmd->sqe);
 
        pr_devel("%s: cmd_op %x, dev id %d qid %d data %llx buf %llx len %u\n",
                        __func__, cmd->cmd_op, header->dev_id, header->queue_id,
@@ -1929,7 +1929,7 @@ static int ublk_ctrl_stop_dev(struct ublk_device *ub)
 static int ublk_ctrl_get_dev_info(struct ublk_device *ub,
                struct io_uring_cmd *cmd)
 {
-       struct ublksrv_ctrl_cmd *header = (struct ublksrv_ctrl_cmd *)cmd->cmd;
+       const struct ublksrv_ctrl_cmd *header = io_uring_sqe_cmd(cmd->sqe);
        void __user *argp = (void __user *)(unsigned long)header->addr;
 
        if (header->len < sizeof(struct ublksrv_ctrl_dev_info) || !header->addr)
@@ -1960,7 +1960,7 @@ static void ublk_ctrl_fill_params_devt(struct ublk_device *ub)
 static int ublk_ctrl_get_params(struct ublk_device *ub,
                struct io_uring_cmd *cmd)
 {
-       struct ublksrv_ctrl_cmd *header = (struct ublksrv_ctrl_cmd *)cmd->cmd;
+       const struct ublksrv_ctrl_cmd *header = io_uring_sqe_cmd(cmd->sqe);
        void __user *argp = (void __user *)(unsigned long)header->addr;
        struct ublk_params_header ph;
        int ret;
@@ -1991,7 +1991,7 @@ static int ublk_ctrl_get_params(struct ublk_device *ub,
 static int ublk_ctrl_set_params(struct ublk_device *ub,
                struct io_uring_cmd *cmd)
 {
-       struct ublksrv_ctrl_cmd *header = (struct ublksrv_ctrl_cmd *)cmd->cmd;
+       const struct ublksrv_ctrl_cmd *header = io_uring_sqe_cmd(cmd->sqe);
        void __user *argp = (void __user *)(unsigned long)header->addr;
        struct ublk_params_header ph;
        int ret = -EFAULT;
@@ -2052,7 +2052,7 @@ static void ublk_queue_reinit(struct ublk_device *ub, struct ublk_queue *ubq)
 static int ublk_ctrl_start_recovery(struct ublk_device *ub,
                struct io_uring_cmd *cmd)
 {
-       struct ublksrv_ctrl_cmd *header = (struct ublksrv_ctrl_cmd *)cmd->cmd;
+       const struct ublksrv_ctrl_cmd *header = io_uring_sqe_cmd(cmd->sqe);
        int ret = -EINVAL;
        int i;
 
@@ -2094,7 +2094,7 @@ static int ublk_ctrl_start_recovery(struct ublk_device *ub,
 static int ublk_ctrl_end_recovery(struct ublk_device *ub,
                struct io_uring_cmd *cmd)
 {
-       struct ublksrv_ctrl_cmd *header = (struct ublksrv_ctrl_cmd *)cmd->cmd;
+       const struct ublksrv_ctrl_cmd *header = io_uring_sqe_cmd(cmd->sqe);
        int ublksrv_pid = (int)header->data[0];
        int ret = -EINVAL;
 
@@ -2161,7 +2161,7 @@ exit:
 static int ublk_ctrl_uring_cmd_permission(struct ublk_device *ub,
                struct io_uring_cmd *cmd)
 {
-       struct ublksrv_ctrl_cmd *header = (struct ublksrv_ctrl_cmd *)cmd->cmd;
+       struct ublksrv_ctrl_cmd *header = (struct ublksrv_ctrl_cmd *)io_uring_sqe_cmd(cmd->sqe);
        bool unprivileged = ub->dev_info.flags & UBLK_F_UNPRIVILEGED_DEV;
        void __user *argp = (void __user *)(unsigned long)header->addr;
        char *dev_path = NULL;
@@ -2240,7 +2240,7 @@ exit:
 static int ublk_ctrl_uring_cmd(struct io_uring_cmd *cmd,
                unsigned int issue_flags)
 {
-       struct ublksrv_ctrl_cmd *header = (struct ublksrv_ctrl_cmd *)cmd->cmd;
+       const struct ublksrv_ctrl_cmd *header = io_uring_sqe_cmd(cmd->sqe);
        struct ublk_device *ub = NULL;
        u32 cmd_op = cmd->cmd_op;
        int ret = -EINVAL;
index d24ea2e..81c5c9e 100644 (file)
@@ -552,7 +552,7 @@ static int nvme_uring_cmd_io(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
                struct io_uring_cmd *ioucmd, unsigned int issue_flags, bool vec)
 {
        struct nvme_uring_cmd_pdu *pdu = nvme_uring_cmd_pdu(ioucmd);
-       const struct nvme_uring_cmd *cmd = ioucmd->cmd;
+       const struct nvme_uring_cmd *cmd = io_uring_sqe_cmd(ioucmd->sqe);
        struct request_queue *q = ns ? ns->queue : ctrl->admin_q;
        struct nvme_uring_data d;
        struct nvme_command c;
index 35b9328..3399d97 100644 (file)
@@ -24,7 +24,7 @@ enum io_uring_cmd_flags {
 
 struct io_uring_cmd {
        struct file     *file;
-       const void      *cmd;
+       const struct io_uring_sqe *sqe;
        union {
                /* callback to defer completions to task context */
                void (*task_work_cb)(struct io_uring_cmd *cmd, unsigned);
@@ -66,6 +66,11 @@ static inline void io_uring_free(struct task_struct *tsk)
        if (tsk->io_uring)
                __io_uring_free(tsk);
 }
+
+static inline const void *io_uring_sqe_cmd(const struct io_uring_sqe *sqe)
+{
+       return sqe->cmd;
+}
 #else
 static inline int io_uring_cmd_import_fixed(u64 ubuf, unsigned long len, int rw,
                              struct iov_iter *iter, void *ioucmd)
index 25515d6..259bf79 100644 (file)
@@ -394,4 +394,14 @@ static inline void io_req_queue_tw_complete(struct io_kiocb *req, s32 res)
        io_req_task_work_add(req);
 }
 
+/*
+ * IORING_SETUP_SQE128 contexts allocate twice the normal SQE size for each
+ * slot.
+ */
+static inline size_t uring_sqe_size(struct io_ring_ctx *ctx)
+{
+       if (ctx->flags & IORING_SETUP_SQE128)
+               return 2 * sizeof(struct io_uring_sqe);
+       return sizeof(struct io_uring_sqe);
+}
 #endif
index cca7c5b..3b9c648 100644 (file)
@@ -627,7 +627,7 @@ const struct io_cold_def io_cold_defs[] = {
        },
        [IORING_OP_URING_CMD] = {
                .name                   = "URING_CMD",
-               .async_size             = uring_cmd_pdu_size(1),
+               .async_size             = 2 * sizeof(struct io_uring_sqe),
                .prep_async             = io_uring_cmd_prep_async,
        },
        [IORING_OP_SEND_ZC] = {
index d4c9139..d46f72a 100644 (file)
@@ -1116,7 +1116,12 @@ static int io_sqe_buffer_register(struct io_ring_ctx *ctx, struct iovec *iov,
        if (nr_pages > 1) {
                folio = page_folio(pages[0]);
                for (i = 1; i < nr_pages; i++) {
-                       if (page_folio(pages[i]) != folio) {
+                       /*
+                        * Pages must be consecutive and on the same folio for
+                        * this to work
+                        */
+                       if (page_folio(pages[i]) != folio ||
+                           pages[i] != pages[i - 1] + 1) {
                                folio = NULL;
                                break;
                        }
index 5113c9a..5e32db4 100644 (file)
@@ -69,15 +69,9 @@ EXPORT_SYMBOL_GPL(io_uring_cmd_done);
 int io_uring_cmd_prep_async(struct io_kiocb *req)
 {
        struct io_uring_cmd *ioucmd = io_kiocb_to_cmd(req, struct io_uring_cmd);
-       size_t cmd_size;
 
-       BUILD_BUG_ON(uring_cmd_pdu_size(0) != 16);
-       BUILD_BUG_ON(uring_cmd_pdu_size(1) != 80);
-
-       cmd_size = uring_cmd_pdu_size(req->ctx->flags & IORING_SETUP_SQE128);
-
-       memcpy(req->async_data, ioucmd->cmd, cmd_size);
-       ioucmd->cmd = req->async_data;
+       memcpy(req->async_data, ioucmd->sqe, uring_sqe_size(req->ctx));
+       ioucmd->sqe = req->async_data;
        return 0;
 }
 
@@ -103,7 +97,7 @@ int io_uring_cmd_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
                req->imu = ctx->user_bufs[index];
                io_req_set_rsrc_node(req, ctx, 0);
        }
-       ioucmd->cmd = sqe->cmd;
+       ioucmd->sqe = sqe;
        ioucmd->cmd_op = READ_ONCE(sqe->cmd_op);
        return 0;
 }
index 7c6697d..8117684 100644 (file)
@@ -3,11 +3,3 @@
 int io_uring_cmd(struct io_kiocb *req, unsigned int issue_flags);
 int io_uring_cmd_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe);
 int io_uring_cmd_prep_async(struct io_kiocb *req);
-
-/*
- * The URING_CMD payload starts at 'cmd' in the first sqe, and continues into
- * the following sqe if SQE128 is used.
- */
-#define uring_cmd_pdu_size(is_sqe128)                          \
-       ((1 + !!(is_sqe128)) * sizeof(struct io_uring_sqe) -    \
-               offsetof(struct io_uring_sqe, cmd))