io_uring: improve ctx hang handling
authorPavel Begunkov <asml.silence@gmail.com>
Mon, 9 Aug 2021 12:04:17 +0000 (13:04 +0100)
committerJens Axboe <axboe@kernel.dk>
Mon, 23 Aug 2021 19:07:59 +0000 (13:07 -0600)
If io_ring_exit_work() can't get it done in 5 minutes, something is
going very wrong, don't keep spinning at HZ / 20 rate, it doesn't help
and it may take much of CPU time if there is a lot of workers stuck as
such.

Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
Link: https://lore.kernel.org/r/9e2d1ca81d569f6bc628af1a42ff6663bff7ce9c.1628471125.git.asml.silence@gmail.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
fs/io_uring.c

index 26a3ab9..af9472e 100644 (file)
@@ -8795,6 +8795,7 @@ static void io_ring_exit_work(struct work_struct *work)
 {
        struct io_ring_ctx *ctx = container_of(work, struct io_ring_ctx, exit_work);
        unsigned long timeout = jiffies + HZ * 60 * 5;
+       unsigned long interval = HZ / 20;
        struct io_tctx_exit exit;
        struct io_tctx_node *node;
        int ret;
@@ -8819,8 +8820,11 @@ static void io_ring_exit_work(struct work_struct *work)
                        io_sq_thread_unpark(sqd);
                }
 
-               WARN_ON_ONCE(time_after(jiffies, timeout));
-       } while (!wait_for_completion_timeout(&ctx->ref_comp, HZ/20));
+               if (WARN_ON_ONCE(time_after(jiffies, timeout))) {
+                       /* there is little hope left, don't run it too often */
+                       interval = HZ * 60;
+               }
+       } while (!wait_for_completion_timeout(&ctx->ref_comp, interval));
 
        init_completion(&exit.completion);
        init_task_work(&exit.task_work, io_tctx_exit_cb);