blk-mq: improve tag waiting setup for non-shared tags
authorJens Axboe <axboe@kernel.dk>
Thu, 9 Nov 2017 23:10:13 +0000 (16:10 -0700)
committerJens Axboe <axboe@kernel.dk>
Sat, 11 Nov 2017 02:55:57 +0000 (19:55 -0700)
If we run out of driver tags, we currently treat shared and non-shared
tags the same - both cases hook into the tag waitqueue. This is a bit
more costly than it needs to be on unshared tags, since we have to both
grab the hctx lock, and the waitqueue lock (and disable interrupts).
For the non-shared case, we can simply mark the queue as needing a
restart.

Split blk_mq_dispatch_wait_add() to account for both cases, and
rename it to blk_mq_mark_tag_wait() to better reflect what it
does now.

Without this patch, shared and non-shared performance is about the same
with 4 fio thread hammering on a single null_blk device (~410K, at 75%
sys). With the patch, the shared case is the same, but the non-shared
tags case runs at 431K at 71% sys.

Reviewed-by: Omar Sandoval <osandov@fb.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
block/blk-mq.c

index a2a4271..3295859 100644 (file)
@@ -1006,44 +1006,68 @@ static int blk_mq_dispatch_wake(wait_queue_entry_t *wait, unsigned mode,
        return 1;
 }
 
-static bool blk_mq_dispatch_wait_add(struct blk_mq_hw_ctx **hctx,
-                                    struct request *rq)
+/*
+ * Mark us waiting for a tag. For shared tags, this involves hooking us into
+ * the tag wakeups. For non-shared tags, we can simply mark us nedeing a
+ * restart. For both caes, take care to check the condition again after
+ * marking us as waiting.
+ */
+static bool blk_mq_mark_tag_wait(struct blk_mq_hw_ctx **hctx,
+                                struct request *rq)
 {
        struct blk_mq_hw_ctx *this_hctx = *hctx;
-       wait_queue_entry_t *wait = &this_hctx->dispatch_wait;
+       bool shared_tags = (this_hctx->flags & BLK_MQ_F_TAG_SHARED) != 0;
        struct sbq_wait_state *ws;
+       wait_queue_entry_t *wait;
+       bool ret;
 
-       if (!list_empty_careful(&wait->entry))
-               return false;
+       if (!shared_tags) {
+               if (!test_bit(BLK_MQ_S_SCHED_RESTART, &this_hctx->state))
+                       set_bit(BLK_MQ_S_SCHED_RESTART, &this_hctx->state);
+       } else {
+               wait = &this_hctx->dispatch_wait;
+               if (!list_empty_careful(&wait->entry))
+                       return false;
 
-       spin_lock(&this_hctx->lock);
-       if (!list_empty(&wait->entry)) {
-               spin_unlock(&this_hctx->lock);
-               return false;
-       }
+               spin_lock(&this_hctx->lock);
+               if (!list_empty(&wait->entry)) {
+                       spin_unlock(&this_hctx->lock);
+                       return false;
+               }
 
-       ws = bt_wait_ptr(&this_hctx->tags->bitmap_tags, this_hctx);
-       add_wait_queue(&ws->wait, wait);
+               ws = bt_wait_ptr(&this_hctx->tags->bitmap_tags, this_hctx);
+               add_wait_queue(&ws->wait, wait);
+       }
 
        /*
         * It's possible that a tag was freed in the window between the
         * allocation failure and adding the hardware queue to the wait
         * queue.
         */
-       if (!blk_mq_get_driver_tag(rq, hctx, false)) {
+       ret = blk_mq_get_driver_tag(rq, hctx, false);
+
+       if (!shared_tags) {
+               /*
+                * Don't clear RESTART here, someone else could have set it.
+                * At most this will cost an extra queue run.
+                */
+               return ret;
+       } else {
+               if (!ret) {
+                       spin_unlock(&this_hctx->lock);
+                       return false;
+               }
+
+               /*
+                * We got a tag, remove ourselves from the wait queue to ensure
+                * someone else gets the wakeup.
+                */
+               spin_lock_irq(&ws->wait.lock);
+               list_del_init(&wait->entry);
+               spin_unlock_irq(&ws->wait.lock);
                spin_unlock(&this_hctx->lock);
-               return false;
+               return true;
        }
-
-       /*
-        * We got a tag, remove ourselves from the wait queue to ensure
-        * someone else gets the wakeup.
-        */
-       spin_lock_irq(&ws->wait.lock);
-       list_del_init(&wait->entry);
-       spin_unlock_irq(&ws->wait.lock);
-       spin_unlock(&this_hctx->lock);
-       return true;
 }
 
 bool blk_mq_dispatch_rq_list(struct request_queue *q, struct list_head *list,
@@ -1076,10 +1100,15 @@ bool blk_mq_dispatch_rq_list(struct request_queue *q, struct list_head *list,
                         * before we add this entry back on the dispatch list,
                         * we'll re-run it below.
                         */
-                       if (!blk_mq_dispatch_wait_add(&hctx, rq)) {
+                       if (!blk_mq_mark_tag_wait(&hctx, rq)) {
                                if (got_budget)
                                        blk_mq_put_dispatch_budget(hctx);
-                               no_tag = true;
+                               /*
+                                * For non-shared tags, the RESTART check
+                                * will suffice.
+                                */
+                               if (hctx->flags & BLK_MQ_F_TAG_SHARED)
+                                       no_tag = true;
                                break;
                        }
                }