Merge tag 'io_uring-5.12-2021-04-02' of git://git.kernel.dk/linux-block
authorLinus Torvalds <torvalds@linux-foundation.org>
Fri, 2 Apr 2021 23:08:19 +0000 (16:08 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Fri, 2 Apr 2021 23:08:19 +0000 (16:08 -0700)
Pull io_uring fixes from Jens Axboe:
 "Nothing really major in here, and finally nothing really related to
  signals. A few minor fixups related to the threading changes, and some
  general fixes, that's it.

  There's the pending gdb-get-confused-about-arch, but that's more of a
  cosmetic issue, nothing that hinder use of it. And given that other
  archs will likely be affected by that oddity too, better to postpone
  any changes there until 5.13 imho"

* tag 'io_uring-5.12-2021-04-02' of git://git.kernel.dk/linux-block:
  io_uring: move reissue into regular IO path
  io_uring: fix EIOCBQUEUED iter revert
  io_uring/io-wq: protect against sprintf overflow
  io_uring: don't mark S_ISBLK async work as unbounded
  io_uring: drop sqd lock before handling signals for SQPOLL
  io_uring: handle setup-failed ctx in kill_timeouts
  io_uring: always go for cancellation spin on exec

fs/io-wq.c
fs/io_uring.c

index 7434eb4..433c4d3 100644 (file)
@@ -484,7 +484,7 @@ static int io_wqe_worker(void *data)
        worker->flags |= (IO_WORKER_F_UP | IO_WORKER_F_RUNNING);
        io_wqe_inc_running(worker);
 
-       sprintf(buf, "iou-wrk-%d", wq->task_pid);
+       snprintf(buf, sizeof(buf), "iou-wrk-%d", wq->task_pid);
        set_task_comm(current, buf);
 
        while (!test_bit(IO_WQ_BIT_EXIT, &wq->state)) {
@@ -711,7 +711,7 @@ static int io_wq_manager(void *data)
        char buf[TASK_COMM_LEN];
        int node;
 
-       sprintf(buf, "iou-mgr-%d", wq->task_pid);
+       snprintf(buf, sizeof(buf), "iou-mgr-%d", wq->task_pid);
        set_task_comm(current, buf);
 
        do {
index 1949b80..8be5420 100644 (file)
@@ -697,6 +697,7 @@ enum {
        REQ_F_NO_FILE_TABLE_BIT,
        REQ_F_LTIMEOUT_ACTIVE_BIT,
        REQ_F_COMPLETE_INLINE_BIT,
+       REQ_F_REISSUE_BIT,
 
        /* not a real bit, just to check we're not overflowing the space */
        __REQ_F_LAST_BIT,
@@ -740,6 +741,8 @@ enum {
        REQ_F_LTIMEOUT_ACTIVE   = BIT(REQ_F_LTIMEOUT_ACTIVE_BIT),
        /* completion is deferred through io_comp_state */
        REQ_F_COMPLETE_INLINE   = BIT(REQ_F_COMPLETE_INLINE_BIT),
+       /* caller should reissue async */
+       REQ_F_REISSUE           = BIT(REQ_F_REISSUE_BIT),
 };
 
 struct async_poll {
@@ -1213,7 +1216,7 @@ static void io_prep_async_work(struct io_kiocb *req)
        if (req->flags & REQ_F_ISREG) {
                if (def->hash_reg_file || (ctx->flags & IORING_SETUP_IOPOLL))
                        io_wq_hash_work(&req->work, file_inode(req->file));
-       } else {
+       } else if (!req->file || !S_ISBLK(file_inode(req->file)->i_mode)) {
                if (def->unbound_nonreg_file)
                        req->work.flags |= IO_WQ_WORK_UNBOUND;
        }
@@ -2503,8 +2506,10 @@ static void __io_complete_rw(struct io_kiocb *req, long res, long res2,
 
        if (req->rw.kiocb.ki_flags & IOCB_WRITE)
                kiocb_end_write(req);
-       if ((res == -EAGAIN || res == -EOPNOTSUPP) && io_rw_reissue(req))
+       if ((res == -EAGAIN || res == -EOPNOTSUPP) && io_rw_should_reissue(req)) {
+               req->flags |= REQ_F_REISSUE;
                return;
+       }
        if (res != req->result)
                req_set_fail_links(req);
        if (req->flags & REQ_F_BUFFER_SELECTED)
@@ -3283,11 +3288,7 @@ static int io_read(struct io_kiocb *req, unsigned int issue_flags)
 
        ret = io_iter_do_read(req, iter);
 
-       if (ret == -EIOCBQUEUED) {
-               if (req->async_data)
-                       iov_iter_revert(iter, io_size - iov_iter_count(iter));
-               goto out_free;
-       } else if (ret == -EAGAIN) {
+       if (ret == -EAGAIN || (req->flags & REQ_F_REISSUE)) {
                /* IOPOLL retry should happen for io-wq threads */
                if (!force_nonblock && !(req->ctx->flags & IORING_SETUP_IOPOLL))
                        goto done;
@@ -3297,6 +3298,8 @@ static int io_read(struct io_kiocb *req, unsigned int issue_flags)
                /* some cases will consume bytes even on error returns */
                iov_iter_revert(iter, io_size - iov_iter_count(iter));
                ret = 0;
+       } else if (ret == -EIOCBQUEUED) {
+               goto out_free;
        } else if (ret <= 0 || ret == io_size || !force_nonblock ||
                   (req->flags & REQ_F_NOWAIT) || !(req->flags & REQ_F_ISREG)) {
                /* read all, failed, already did sync or don't want to retry */
@@ -3409,6 +3412,9 @@ static int io_write(struct io_kiocb *req, unsigned int issue_flags)
        else
                ret2 = -EINVAL;
 
+       if (req->flags & REQ_F_REISSUE)
+               ret2 = -EAGAIN;
+
        /*
         * Raw bdev writes will return -EOPNOTSUPP for IOCB_NOWAIT. Just
         * retry them without IOCB_NOWAIT.
@@ -3418,8 +3424,6 @@ static int io_write(struct io_kiocb *req, unsigned int issue_flags)
        /* no retry on NONBLOCK nor RWF_NOWAIT */
        if (ret2 == -EAGAIN && (req->flags & REQ_F_NOWAIT))
                goto done;
-       if (ret2 == -EIOCBQUEUED && req->async_data)
-               iov_iter_revert(iter, io_size - iov_iter_count(iter));
        if (!force_nonblock || ret2 != -EAGAIN) {
                /* IOPOLL retry should happen for io-wq threads */
                if ((req->ctx->flags & IORING_SETUP_IOPOLL) && ret2 == -EAGAIN)
@@ -6164,6 +6168,7 @@ static void io_wq_submit_work(struct io_wq_work *work)
                ret = -ECANCELED;
 
        if (!ret) {
+               req->flags &= ~REQ_F_REISSUE;
                do {
                        ret = io_issue_sqe(req, 0);
                        /*
@@ -6718,7 +6723,7 @@ static int io_sq_thread(void *data)
        char buf[TASK_COMM_LEN];
        DEFINE_WAIT(wait);
 
-       sprintf(buf, "iou-sqp-%d", sqd->task_pid);
+       snprintf(buf, sizeof(buf), "iou-sqp-%d", sqd->task_pid);
        set_task_comm(current, buf);
        current->pf_io_worker = NULL;
 
@@ -6733,22 +6738,25 @@ static int io_sq_thread(void *data)
                int ret;
                bool cap_entries, sqt_spin, needs_sched;
 
-               if (test_bit(IO_SQ_THREAD_SHOULD_PARK, &sqd->state)) {
+               if (test_bit(IO_SQ_THREAD_SHOULD_PARK, &sqd->state) ||
+                   signal_pending(current)) {
+                       bool did_sig = false;
+
                        mutex_unlock(&sqd->lock);
+                       if (signal_pending(current)) {
+                               struct ksignal ksig;
+
+                               did_sig = get_signal(&ksig);
+                       }
                        cond_resched();
                        mutex_lock(&sqd->lock);
+                       if (did_sig)
+                               break;
                        io_run_task_work();
                        io_run_task_work_head(&sqd->park_task_work);
                        timeout = jiffies + sqd->sq_thread_idle;
                        continue;
                }
-               if (signal_pending(current)) {
-                       struct ksignal ksig;
-
-                       if (!get_signal(&ksig))
-                               continue;
-                       break;
-               }
                sqt_spin = false;
                cap_entries = !list_is_singular(&sqd->ctx_list);
                list_for_each_entry(ctx, &sqd->ctx_list, sqd_list) {
@@ -8603,9 +8611,9 @@ static bool io_kill_timeouts(struct io_ring_ctx *ctx, struct task_struct *tsk,
                        canceled++;
                }
        }
-       io_commit_cqring(ctx);
+       if (canceled != 0)
+               io_commit_cqring(ctx);
        spin_unlock_irq(&ctx->completion_lock);
-
        if (canceled != 0)
                io_cqring_ev_posted(ctx);
        return canceled != 0;
@@ -9002,6 +9010,8 @@ void __io_uring_task_cancel(void)
 
        /* make sure overflow events are dropped */
        atomic_inc(&tctx->in_idle);
+       __io_uring_files_cancel(NULL);
+
        do {
                /* read completions before cancelations */
                inflight = tctx_inflight(tctx);