io_uring: split work handling part of SQPOLL into helper
authorJens Axboe <axboe@kernel.dk>
Mon, 14 Sep 2020 17:07:26 +0000 (11:07 -0600)
committerJens Axboe <axboe@kernel.dk>
Thu, 1 Oct 2020 02:32:33 +0000 (20:32 -0600)
This is done in preparation for handling more than one ctx, but it also
cleans up the code a bit since io_sq_thread() was a bit too unwieldy to
get a get overview on.

__io_sq_thread() is now the main handler, and it returns an enum sq_ret
that tells io_sq_thread() what it ended up doing. The parent then makes
a decision on idle, spinning, or work handling based on that.

Signed-off-by: Jens Axboe <axboe@kernel.dk>
fs/io_uring.c

index 6d97767c82d50c328be2f0a1503a4fccdf23fd79..4958e78cd39015545a2fc62d9174f6affe931af9 100644 (file)
@@ -6642,110 +6642,119 @@ static int io_sq_wake_function(struct wait_queue_entry *wqe, unsigned mode,
        return ret;
 }
 
-static int io_sq_thread(void *data)
+enum sq_ret {
+       SQT_IDLE        = 1,
+       SQT_SPIN        = 2,
+       SQT_DID_WORK    = 4,
+};
+
+static enum sq_ret __io_sq_thread(struct io_ring_ctx *ctx,
+                                 unsigned long start_jiffies)
 {
-       struct io_ring_ctx *ctx = data;
-       const struct cred *old_cred;
-       unsigned long timeout;
+       unsigned long timeout = start_jiffies + ctx->sq_thread_idle;
+       unsigned int to_submit;
        int ret = 0;
 
-       init_wait(&ctx->sqo_wait_entry);
-       ctx->sqo_wait_entry.func = io_sq_wake_function;
+again:
+       if (!list_empty(&ctx->iopoll_list)) {
+               unsigned nr_events = 0;
 
-       complete(&ctx->sq_thread_comp);
+               mutex_lock(&ctx->uring_lock);
+               if (!list_empty(&ctx->iopoll_list) && !need_resched())
+                       io_do_iopoll(ctx, &nr_events, 0);
+               mutex_unlock(&ctx->uring_lock);
+       }
 
-       old_cred = override_creds(ctx->creds);
+       to_submit = io_sqring_entries(ctx);
 
-       timeout = jiffies + ctx->sq_thread_idle;
-       while (!kthread_should_park()) {
-               unsigned int to_submit;
+       /*
+        * If submit got -EBUSY, flag us as needing the application
+        * to enter the kernel to reap and flush events.
+        */
+       if (!to_submit || ret == -EBUSY || need_resched()) {
+               /*
+                * Drop cur_mm before scheduling, we can't hold it for
+                * long periods (or over schedule()). Do this before
+                * adding ourselves to the waitqueue, as the unuse/drop
+                * may sleep.
+                */
+               io_sq_thread_drop_mm();
 
-               if (!list_empty(&ctx->iopoll_list)) {
-                       unsigned nr_events = 0;
+               /*
+                * We're polling. If we're within the defined idle
+                * period, then let us spin without work before going
+                * to sleep. The exception is if we got EBUSY doing
+                * more IO, we should wait for the application to
+                * reap events and wake us up.
+                */
+               if (!list_empty(&ctx->iopoll_list) || need_resched() ||
+                   (!time_after(jiffies, timeout) && ret != -EBUSY &&
+                   !percpu_ref_is_dying(&ctx->refs)))
+                       return SQT_SPIN;
 
-                       mutex_lock(&ctx->uring_lock);
-                       if (!list_empty(&ctx->iopoll_list) && !need_resched())
-                               io_do_iopoll(ctx, &nr_events, 0);
-                       else
-                               timeout = jiffies + ctx->sq_thread_idle;
-                       mutex_unlock(&ctx->uring_lock);
+               prepare_to_wait(ctx->sqo_wait, &ctx->sqo_wait_entry,
+                                       TASK_INTERRUPTIBLE);
+
+               /*
+                * While doing polled IO, before going to sleep, we need
+                * to check if there are new reqs added to iopoll_list,
+                * it is because reqs may have been punted to io worker
+                * and will be added to iopoll_list later, hence check
+                * the iopoll_list again.
+                */
+               if ((ctx->flags & IORING_SETUP_IOPOLL) &&
+                   !list_empty_careful(&ctx->iopoll_list)) {
+                       finish_wait(ctx->sqo_wait, &ctx->sqo_wait_entry);
+                       goto again;
                }
 
+               io_ring_set_wakeup_flag(ctx);
+
                to_submit = io_sqring_entries(ctx);
+               if (!to_submit || ret == -EBUSY)
+                       return SQT_IDLE;
+       }
 
-               /*
-                * If submit got -EBUSY, flag us as needing the application
-                * to enter the kernel to reap and flush events.
-                */
-               if (!to_submit || ret == -EBUSY || need_resched()) {
-                       /*
-                        * Drop cur_mm before scheduling, we can't hold it for
-                        * long periods (or over schedule()). Do this before
-                        * adding ourselves to the waitqueue, as the unuse/drop
-                        * may sleep.
-                        */
-                       io_sq_thread_drop_mm();
+       finish_wait(ctx->sqo_wait, &ctx->sqo_wait_entry);
+       io_ring_clear_wakeup_flag(ctx);
 
-                       /*
-                        * We're polling. If we're within the defined idle
-                        * period, then let us spin without work before going
-                        * to sleep. The exception is if we got EBUSY doing
-                        * more IO, we should wait for the application to
-                        * reap events and wake us up.
-                        */
-                       if (!list_empty(&ctx->iopoll_list) || need_resched() ||
-                           (!time_after(jiffies, timeout) && ret != -EBUSY &&
-                           !percpu_ref_is_dying(&ctx->refs))) {
-                               io_run_task_work();
-                               cond_resched();
-                               continue;
-                       }
+       mutex_lock(&ctx->uring_lock);
+       if (likely(!percpu_ref_is_dying(&ctx->refs)))
+               ret = io_submit_sqes(ctx, to_submit);
+       mutex_unlock(&ctx->uring_lock);
+       return SQT_DID_WORK;
+}
 
-                       prepare_to_wait(ctx->sqo_wait, &ctx->sqo_wait_entry,
-                                               TASK_INTERRUPTIBLE);
+static int io_sq_thread(void *data)
+{
+       struct io_ring_ctx *ctx = data;
+       const struct cred *old_cred;
+       unsigned long start_jiffies;
 
-                       /*
-                        * While doing polled IO, before going to sleep, we need
-                        * to check if there are new reqs added to iopoll_list,
-                        * it is because reqs may have been punted to io worker
-                        * and will be added to iopoll_list later, hence check
-                        * the iopoll_list again.
-                        */
-                       if ((ctx->flags & IORING_SETUP_IOPOLL) &&
-                           !list_empty_careful(&ctx->iopoll_list)) {
-                               finish_wait(ctx->sqo_wait, &ctx->sqo_wait_entry);
-                               continue;
-                       }
+       init_wait(&ctx->sqo_wait_entry);
+       ctx->sqo_wait_entry.func = io_sq_wake_function;
 
-                       io_ring_set_wakeup_flag(ctx);
+       complete(&ctx->sq_thread_comp);
 
-                       to_submit = io_sqring_entries(ctx);
-                       if (!to_submit || ret == -EBUSY) {
-                               if (kthread_should_park()) {
-                                       finish_wait(ctx->sqo_wait, &ctx->sqo_wait_entry);
-                                       break;
-                               }
-                               if (io_run_task_work()) {
-                                       finish_wait(ctx->sqo_wait, &ctx->sqo_wait_entry);
-                                       io_ring_clear_wakeup_flag(ctx);
-                                       continue;
-                               }
-                               schedule();
-                               finish_wait(ctx->sqo_wait, &ctx->sqo_wait_entry);
+       old_cred = override_creds(ctx->creds);
 
-                               ret = 0;
-                               continue;
-                       }
-                       finish_wait(ctx->sqo_wait, &ctx->sqo_wait_entry);
+       start_jiffies = jiffies;
+       while (!kthread_should_park()) {
+               enum sq_ret ret;
 
-                       io_ring_clear_wakeup_flag(ctx);
+               ret = __io_sq_thread(ctx, start_jiffies);
+               switch (ret) {
+               case SQT_IDLE:
+                       schedule();
+                       start_jiffies = jiffies;
+                       continue;
+               case SQT_SPIN:
+                       io_run_task_work();
+                       cond_resched();
+                       fallthrough;
+               case SQT_DID_WORK:
+                       continue;
                }
-
-               mutex_lock(&ctx->uring_lock);
-               if (likely(!percpu_ref_is_dying(&ctx->refs)))
-                       ret = io_submit_sqes(ctx, to_submit);
-               mutex_unlock(&ctx->uring_lock);
-               timeout = jiffies + ctx->sq_thread_idle;
        }
 
        io_run_task_work();