io-wq: fix potential race of acct->nr_workers
authorHao Xu <haoxu@linux.alibaba.com>
Sat, 11 Sep 2021 19:40:52 +0000 (03:40 +0800)
committerJens Axboe <axboe@kernel.dk>
Mon, 13 Sep 2021 01:27:47 +0000 (19:27 -0600)
Given max_worker is 1, and we currently have 1 running and it is
exiting. There may be race like:
 io_wqe_enqueue                   worker1
                               no work there and timeout
                               unlock(wqe->lock)
 ->insert work
                               -->io_worker_exit
 lock(wqe->lock)
 ->if(!nr_workers) //it's still 1
 unlock(wqe->lock)
    goto run_cancel
                                  lock(wqe->lock)
                                  nr_workers--
                                  ->dec_running
                                    ->worker creation fails
                                  unlock(wqe->lock)

We enqueued one work but there is no workers, causes hung.

Signed-off-by: Hao Xu <haoxu@linux.alibaba.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
fs/io-wq.c

index a1685b4..3d4460d 100644 (file)
@@ -176,7 +176,6 @@ static void io_worker_ref_put(struct io_wq *wq)
 static void io_worker_exit(struct io_worker *worker)
 {
        struct io_wqe *wqe = worker->wqe;
-       struct io_wqe_acct *acct = io_wqe_get_acct(worker);
 
        if (refcount_dec_and_test(&worker->ref))
                complete(&worker->ref_done);
@@ -186,7 +185,6 @@ static void io_worker_exit(struct io_worker *worker)
        if (worker->flags & IO_WORKER_F_FREE)
                hlist_nulls_del_rcu(&worker->nulls_node);
        list_del_rcu(&worker->all_list);
-       acct->nr_workers--;
        preempt_disable();
        io_wqe_dec_running(worker);
        worker->flags = 0;
@@ -569,6 +567,7 @@ loop:
                }
                /* timed out, exit unless we're the last worker */
                if (last_timeout && acct->nr_workers > 1) {
+                       acct->nr_workers--;
                        raw_spin_unlock(&wqe->lock);
                        __set_current_state(TASK_RUNNING);
                        break;