block/blk-mq-sched.c

   1 // SPDX-License-Identifier: GPL-2.0
   2 /*
   3  * blk-mq scheduling framework
   4  *
   5  * Copyright (C) 2016 Jens Axboe
   6  */
   7 #include <linux/kernel.h>
   8 #include <linux/module.h>
   9 #include <linux/blk-mq.h>
  10 #include <linux/list_sort.h>
  11
  12 #include <trace/events/block.h>
  13
  14 #include "blk.h"
  15 #include "blk-mq.h"
  16 #include "blk-mq-debugfs.h"
  17 #include "blk-mq-sched.h"
  18 #include "blk-mq-tag.h"
  19 #include "blk-wbt.h"
  20
  21 /*
  22  * Mark a hardware queue as needing a restart.
  23  */
  24 void blk_mq_sched_mark_restart_hctx(struct blk_mq_hw_ctx *hctx)
  25 {
  26         if (test_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state))
  27                 return;
  28
  29         set_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state);
  30 }
  31 EXPORT_SYMBOL_GPL(blk_mq_sched_mark_restart_hctx);
  32
  33 void __blk_mq_sched_restart(struct blk_mq_hw_ctx *hctx)
  34 {
  35         clear_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state);
  36
  37         /*
  38          * Order clearing SCHED_RESTART and list_empty_careful(&hctx->dispatch)
  39          * in blk_mq_run_hw_queue(). Its pair is the barrier in
  40          * blk_mq_dispatch_rq_list(). So dispatch code won't see SCHED_RESTART,
  41          * meantime new request added to hctx->dispatch is missed to check in
  42          * blk_mq_run_hw_queue().
  43          */
  44         smp_mb();
  45
  46         blk_mq_run_hw_queue(hctx, true);
  47 }
  48
  49 static int sched_rq_cmp(void *priv, const struct list_head *a,
  50                         const struct list_head *b)
  51 {
  52         struct request *rqa = container_of(a, struct request, queuelist);
  53         struct request *rqb = container_of(b, struct request, queuelist);
  54
  55         return rqa->mq_hctx > rqb->mq_hctx;
  56 }
  57
  58 static bool blk_mq_dispatch_hctx_list(struct list_head *rq_list)
  59 {
  60         struct blk_mq_hw_ctx *hctx =
  61                 list_first_entry(rq_list, struct request, queuelist)->mq_hctx;
  62         struct request *rq;
  63         LIST_HEAD(hctx_list);
  64         unsigned int count = 0;
  65
  66         list_for_each_entry(rq, rq_list, queuelist) {
  67                 if (rq->mq_hctx != hctx) {
  68                         list_cut_before(&hctx_list, rq_list, &rq->queuelist);
  69                         goto dispatch;
  70                 }
  71                 count++;
  72         }
  73         list_splice_tail_init(rq_list, &hctx_list);
  74
  75 dispatch:
  76         return blk_mq_dispatch_rq_list(hctx, &hctx_list, count);
  77 }
  78
  79 #define BLK_MQ_BUDGET_DELAY     3               /* ms units */
  80
  81 /*
  82  * Only SCSI implements .get_budget and .put_budget, and SCSI restarts
  83  * its queue by itself in its completion handler, so we don't need to
  84  * restart queue if .get_budget() fails to get the budget.
  85  *
  86  * Returns -EAGAIN if hctx->dispatch was found non-empty and run_work has to
  87  * be run again.  This is necessary to avoid starving flushes.
  88  */
  89 static int __blk_mq_do_dispatch_sched(struct blk_mq_hw_ctx *hctx)
  90 {
  91         struct request_queue *q = hctx->queue;
  92         struct elevator_queue *e = q->elevator;
  93         bool multi_hctxs = false, run_queue = false;
  94         bool dispatched = false, busy = false;
  95         unsigned int max_dispatch;
  96         LIST_HEAD(rq_list);
  97         int count = 0;
  98
  99         if (hctx->dispatch_busy)
 100                 max_dispatch = 1;
 101         else
 102                 max_dispatch = hctx->queue->nr_requests;
 103
 104         do {
 105                 struct request *rq;
 106                 int budget_token;
 107
 108                 if (e->type->ops.has_work && !e->type->ops.has_work(hctx))
 109                         break;
 110
 111                 if (!list_empty_careful(&hctx->dispatch)) {
 112                         busy = true;
 113                         break;
 114                 }
 115
 116                 budget_token = blk_mq_get_dispatch_budget(q);
 117                 if (budget_token < 0)
 118                         break;
 119
 120                 rq = e->type->ops.dispatch_request(hctx);
 121                 if (!rq) {
 122                         blk_mq_put_dispatch_budget(q, budget_token);
 123                         /*
 124                          * We're releasing without dispatching. Holding the
 125                          * budget could have blocked any "hctx"s with the
 126                          * same queue and if we didn't dispatch then there's
 127                          * no guarantee anyone will kick the queue.  Kick it
 128                          * ourselves.
 129                          */
 130                         run_queue = true;
 131                         break;
 132                 }
 133
 134                 blk_mq_set_rq_budget_token(rq, budget_token);
 135
 136                 /*
 137                  * Now this rq owns the budget which has to be released
 138                  * if this rq won't be queued to driver via .queue_rq()
 139                  * in blk_mq_dispatch_rq_list().
 140                  */
 141                 list_add_tail(&rq->queuelist, &rq_list);
 142                 count++;
 143                 if (rq->mq_hctx != hctx)
 144                         multi_hctxs = true;
 145
 146                 /*
 147                  * If we cannot get tag for the request, stop dequeueing
 148                  * requests from the IO scheduler. We are unlikely to be able
 149                  * to submit them anyway and it creates false impression for
 150                  * scheduling heuristics that the device can take more IO.
 151                  */
 152                 if (!blk_mq_get_driver_tag(rq))
 153                         break;
 154         } while (count < max_dispatch);
 155
 156         if (!count) {
 157                 if (run_queue)
 158                         blk_mq_delay_run_hw_queues(q, BLK_MQ_BUDGET_DELAY);
 159         } else if (multi_hctxs) {
 160                 /*
 161                  * Requests from different hctx may be dequeued from some
 162                  * schedulers, such as bfq and deadline.
 163                  *
 164                  * Sort the requests in the list according to their hctx,
 165                  * dispatch batching requests from same hctx at a time.
 166                  */
 167                 list_sort(NULL, &rq_list, sched_rq_cmp);
 168                 do {
 169                         dispatched |= blk_mq_dispatch_hctx_list(&rq_list);
 170                 } while (!list_empty(&rq_list));
 171         } else {
 172                 dispatched = blk_mq_dispatch_rq_list(hctx, &rq_list, count);
 173         }
 174
 175         if (busy)
 176                 return -EAGAIN;
 177         return !!dispatched;
 178 }
 179
 180 static int blk_mq_do_dispatch_sched(struct blk_mq_hw_ctx *hctx)
 181 {
 182         unsigned long end = jiffies + HZ;
 183         int ret;
 184
 185         do {
 186                 ret = __blk_mq_do_dispatch_sched(hctx);
 187                 if (ret != 1)
 188                         break;
 189                 if (need_resched() || time_is_before_jiffies(end)) {
 190                         blk_mq_delay_run_hw_queue(hctx, 0);
 191                         break;
 192                 }
 193         } while (1);
 194
 195         return ret;
 196 }
 197
 198 static struct blk_mq_ctx *blk_mq_next_ctx(struct blk_mq_hw_ctx *hctx,
 199                                           struct blk_mq_ctx *ctx)
 200 {
 201         unsigned short idx = ctx->index_hw[hctx->type];
 202
 203         if (++idx == hctx->nr_ctx)
 204                 idx = 0;
 205
 206         return hctx->ctxs[idx];
 207 }
 208
 209 /*
 210  * Only SCSI implements .get_budget and .put_budget, and SCSI restarts
 211  * its queue by itself in its completion handler, so we don't need to
 212  * restart queue if .get_budget() fails to get the budget.
 213  *
 214  * Returns -EAGAIN if hctx->dispatch was found non-empty and run_work has to
 215  * be run again.  This is necessary to avoid starving flushes.
 216  */
 217 static int blk_mq_do_dispatch_ctx(struct blk_mq_hw_ctx *hctx)
 218 {
 219         struct request_queue *q = hctx->queue;
 220         LIST_HEAD(rq_list);
 221         struct blk_mq_ctx *ctx = READ_ONCE(hctx->dispatch_from);
 222         int ret = 0;
 223         struct request *rq;
 224
 225         do {
 226                 int budget_token;
 227
 228                 if (!list_empty_careful(&hctx->dispatch)) {
 229                         ret = -EAGAIN;
 230                         break;
 231                 }
 232
 233                 if (!sbitmap_any_bit_set(&hctx->ctx_map))
 234                         break;
 235
 236                 budget_token = blk_mq_get_dispatch_budget(q);
 237                 if (budget_token < 0)
 238                         break;
 239
 240                 rq = blk_mq_dequeue_from_ctx(hctx, ctx);
 241                 if (!rq) {
 242                         blk_mq_put_dispatch_budget(q, budget_token);
 243                         /*
 244                          * We're releasing without dispatching. Holding the
 245                          * budget could have blocked any "hctx"s with the
 246                          * same queue and if we didn't dispatch then there's
 247                          * no guarantee anyone will kick the queue.  Kick it
 248                          * ourselves.
 249                          */
 250                         blk_mq_delay_run_hw_queues(q, BLK_MQ_BUDGET_DELAY);
 251                         break;
 252                 }
 253
 254                 blk_mq_set_rq_budget_token(rq, budget_token);
 255
 256                 /*
 257                  * Now this rq owns the budget which has to be released
 258                  * if this rq won't be queued to driver via .queue_rq()
 259                  * in blk_mq_dispatch_rq_list().
 260                  */
 261                 list_add(&rq->queuelist, &rq_list);
 262
 263                 /* round robin for fair dispatch */
 264                 ctx = blk_mq_next_ctx(hctx, rq->mq_ctx);
 265
 266         } while (blk_mq_dispatch_rq_list(rq->mq_hctx, &rq_list, 1));
 267
 268         WRITE_ONCE(hctx->dispatch_from, ctx);
 269         return ret;
 270 }
 271
 272 static int __blk_mq_sched_dispatch_requests(struct blk_mq_hw_ctx *hctx)
 273 {
 274         struct request_queue *q = hctx->queue;
 275         const bool has_sched = q->elevator;
 276         int ret = 0;
 277         LIST_HEAD(rq_list);
 278
 279         /*
 280          * If we have previous entries on our dispatch list, grab them first for
 281          * more fair dispatch.
 282          */
 283         if (!list_empty_careful(&hctx->dispatch)) {
 284                 spin_lock(&hctx->lock);
 285                 if (!list_empty(&hctx->dispatch))
 286                         list_splice_init(&hctx->dispatch, &rq_list);
 287                 spin_unlock(&hctx->lock);
 288         }
 289
 290         /*
 291          * Only ask the scheduler for requests, if we didn't have residual
 292          * requests from the dispatch list. This is to avoid the case where
 293          * we only ever dispatch a fraction of the requests available because
 294          * of low device queue depth. Once we pull requests out of the IO
 295          * scheduler, we can no longer merge or sort them. So it's best to
 296          * leave them there for as long as we can. Mark the hw queue as
 297          * needing a restart in that case.
 298          *
 299          * We want to dispatch from the scheduler if there was nothing
 300          * on the dispatch list or we were able to dispatch from the
 301          * dispatch list.
 302          */
 303         if (!list_empty(&rq_list)) {
 304                 blk_mq_sched_mark_restart_hctx(hctx);
 305                 if (blk_mq_dispatch_rq_list(hctx, &rq_list, 0)) {
 306                         if (has_sched)
 307                                 ret = blk_mq_do_dispatch_sched(hctx);
 308                         else
 309                                 ret = blk_mq_do_dispatch_ctx(hctx);
 310                 }
 311         } else if (has_sched) {
 312                 ret = blk_mq_do_dispatch_sched(hctx);
 313         } else if (hctx->dispatch_busy) {
 314                 /* dequeue request one by one from sw queue if queue is busy */
 315                 ret = blk_mq_do_dispatch_ctx(hctx);
 316         } else {
 317                 blk_mq_flush_busy_ctxs(hctx, &rq_list);
 318                 blk_mq_dispatch_rq_list(hctx, &rq_list, 0);
 319         }
 320
 321         return ret;
 322 }
 323
 324 void blk_mq_sched_dispatch_requests(struct blk_mq_hw_ctx *hctx)
 325 {
 326         struct request_queue *q = hctx->queue;
 327
 328         /* RCU or SRCU read lock is needed before checking quiesced flag */
 329         if (unlikely(blk_mq_hctx_stopped(hctx) || blk_queue_quiesced(q)))
 330                 return;
 331
 332         hctx->run++;
 333
 334         /*
 335          * A return of -EAGAIN is an indication that hctx->dispatch is not
 336          * empty and we must run again in order to avoid starving flushes.
 337          */
 338         if (__blk_mq_sched_dispatch_requests(hctx) == -EAGAIN) {
 339                 if (__blk_mq_sched_dispatch_requests(hctx) == -EAGAIN)
 340                         blk_mq_run_hw_queue(hctx, true);
 341         }
 342 }
 343
 344 bool blk_mq_sched_bio_merge(struct request_queue *q, struct bio *bio,
 345                 unsigned int nr_segs)
 346 {
 347         struct elevator_queue *e = q->elevator;
 348         struct blk_mq_ctx *ctx;
 349         struct blk_mq_hw_ctx *hctx;
 350         bool ret = false;
 351         enum hctx_type type;
 352
 353         if (e && e->type->ops.bio_merge) {
 354                 ret = e->type->ops.bio_merge(q, bio, nr_segs);
 355                 goto out_put;
 356         }
 357
 358         ctx = blk_mq_get_ctx(q);
 359         hctx = blk_mq_map_queue(q, bio->bi_opf, ctx);
 360         type = hctx->type;
 361         if (!(hctx->flags & BLK_MQ_F_SHOULD_MERGE) ||
 362             list_empty_careful(&ctx->rq_lists[type]))
 363                 goto out_put;
 364
 365         /* default per sw-queue merge */
 366         spin_lock(&ctx->lock);
 367         /*
 368          * Reverse check our software queue for entries that we could
 369          * potentially merge with. Currently includes a hand-wavy stop
 370          * count of 8, to not spend too much time checking for merges.
 371          */
 372         if (blk_bio_list_merge(q, &ctx->rq_lists[type], bio, nr_segs))
 373                 ret = true;
 374
 375         spin_unlock(&ctx->lock);
 376 out_put:
 377         return ret;
 378 }
 379
 380 bool blk_mq_sched_try_insert_merge(struct request_queue *q, struct request *rq,
 381                                    struct list_head *free)
 382 {
 383         return rq_mergeable(rq) && elv_attempt_insert_merge(q, rq, free);
 384 }
 385 EXPORT_SYMBOL_GPL(blk_mq_sched_try_insert_merge);
 386
 387 static bool blk_mq_sched_bypass_insert(struct blk_mq_hw_ctx *hctx,
 388                                        struct request *rq)
 389 {
 390         /*
 391          * dispatch flush and passthrough rq directly
 392          *
 393          * passthrough request has to be added to hctx->dispatch directly.
 394          * For some reason, device may be in one situation which can't
 395          * handle FS request, so STS_RESOURCE is always returned and the
 396          * FS request will be added to hctx->dispatch. However passthrough
 397          * request may be required at that time for fixing the problem. If
 398          * passthrough request is added to scheduler queue, there isn't any
 399          * chance to dispatch it given we prioritize requests in hctx->dispatch.
 400          */
 401         if ((rq->rq_flags & RQF_FLUSH_SEQ) || blk_rq_is_passthrough(rq))
 402                 return true;
 403
 404         return false;
 405 }
 406
 407 void blk_mq_sched_insert_request(struct request *rq, bool at_head,
 408                                  bool run_queue, bool async)
 409 {
 410         struct request_queue *q = rq->q;
 411         struct elevator_queue *e = q->elevator;
 412         struct blk_mq_ctx *ctx = rq->mq_ctx;
 413         struct blk_mq_hw_ctx *hctx = rq->mq_hctx;
 414
 415         WARN_ON(e && (rq->tag != BLK_MQ_NO_TAG));
 416
 417         if (blk_mq_sched_bypass_insert(hctx, rq)) {
 418                 /*
 419                  * Firstly normal IO request is inserted to scheduler queue or
 420                  * sw queue, meantime we add flush request to dispatch queue(
 421                  * hctx->dispatch) directly and there is at most one in-flight
 422                  * flush request for each hw queue, so it doesn't matter to add
 423                  * flush request to tail or front of the dispatch queue.
 424                  *
 425                  * Secondly in case of NCQ, flush request belongs to non-NCQ
 426                  * command, and queueing it will fail when there is any
 427                  * in-flight normal IO request(NCQ command). When adding flush
 428                  * rq to the front of hctx->dispatch, it is easier to introduce
 429                  * extra time to flush rq's latency because of S_SCHED_RESTART
 430                  * compared with adding to the tail of dispatch queue, then
 431                  * chance of flush merge is increased, and less flush requests
 432                  * will be issued to controller. It is observed that ~10% time
 433                  * is saved in blktests block/004 on disk attached to AHCI/NCQ
 434                  * drive when adding flush rq to the front of hctx->dispatch.
 435                  *
 436                  * Simply queue flush rq to the front of hctx->dispatch so that
 437                  * intensive flush workloads can benefit in case of NCQ HW.
 438                  */
 439                 at_head = (rq->rq_flags & RQF_FLUSH_SEQ) ? true : at_head;
 440                 blk_mq_request_bypass_insert(rq, at_head, false);
 441                 goto run;
 442         }
 443
 444         if (e) {
 445                 LIST_HEAD(list);
 446
 447                 list_add(&rq->queuelist, &list);
 448                 e->type->ops.insert_requests(hctx, &list, at_head);
 449         } else {
 450                 spin_lock(&ctx->lock);
 451                 __blk_mq_insert_request(hctx, rq, at_head);
 452                 spin_unlock(&ctx->lock);
 453         }
 454
 455 run:
 456         if (run_queue)
 457                 blk_mq_run_hw_queue(hctx, async);
 458 }
 459
 460 void blk_mq_sched_insert_requests(struct blk_mq_hw_ctx *hctx,
 461                                   struct blk_mq_ctx *ctx,
 462                                   struct list_head *list, bool run_queue_async)
 463 {
 464         struct elevator_queue *e;
 465         struct request_queue *q = hctx->queue;
 466
 467         /*
 468          * blk_mq_sched_insert_requests() is called from flush plug
 469          * context only, and hold one usage counter to prevent queue
 470          * from being released.
 471          */
 472         percpu_ref_get(&q->q_usage_counter);
 473
 474         e = hctx->queue->elevator;
 475         if (e) {
 476                 e->type->ops.insert_requests(hctx, list, false);
 477         } else {
 478                 /*
 479                  * try to issue requests directly if the hw queue isn't
 480                  * busy in case of 'none' scheduler, and this way may save
 481                  * us one extra enqueue & dequeue to sw queue.
 482                  */
 483                 if (!hctx->dispatch_busy && !run_queue_async) {
 484                         blk_mq_run_dispatch_ops(hctx->queue,
 485                                 blk_mq_try_issue_list_directly(hctx, list));
 486                         if (list_empty(list))
 487                                 goto out;
 488                 }
 489                 blk_mq_insert_requests(hctx, ctx, list);
 490         }
 491
 492         blk_mq_run_hw_queue(hctx, run_queue_async);
 493  out:
 494         percpu_ref_put(&q->q_usage_counter);
 495 }
 496
 497 static int blk_mq_sched_alloc_map_and_rqs(struct request_queue *q,
 498                                           struct blk_mq_hw_ctx *hctx,
 499                                           unsigned int hctx_idx)
 500 {
 501         if (blk_mq_is_shared_tags(q->tag_set->flags)) {
 502                 hctx->sched_tags = q->sched_shared_tags;
 503                 return 0;
 504         }
 505
 506         hctx->sched_tags = blk_mq_alloc_map_and_rqs(q->tag_set, hctx_idx,
 507                                                     q->nr_requests);
 508
 509         if (!hctx->sched_tags)
 510                 return -ENOMEM;
 511         return 0;
 512 }
 513
 514 static void blk_mq_exit_sched_shared_tags(struct request_queue *queue)
 515 {
 516         blk_mq_free_rq_map(queue->sched_shared_tags);
 517         queue->sched_shared_tags = NULL;
 518 }
 519
 520 /* called in queue's release handler, tagset has gone away */
 521 static void blk_mq_sched_tags_teardown(struct request_queue *q, unsigned int flags)
 522 {
 523         struct blk_mq_hw_ctx *hctx;
 524         unsigned long i;
 525
 526         queue_for_each_hw_ctx(q, hctx, i) {
 527                 if (hctx->sched_tags) {
 528                         if (!blk_mq_is_shared_tags(flags))
 529                                 blk_mq_free_rq_map(hctx->sched_tags);
 530                         hctx->sched_tags = NULL;
 531                 }
 532         }
 533
 534         if (blk_mq_is_shared_tags(flags))
 535                 blk_mq_exit_sched_shared_tags(q);
 536 }
 537
 538 static int blk_mq_init_sched_shared_tags(struct request_queue *queue)
 539 {
 540         struct blk_mq_tag_set *set = queue->tag_set;
 541
 542         /*
 543          * Set initial depth at max so that we don't need to reallocate for
 544          * updating nr_requests.
 545          */
 546         queue->sched_shared_tags = blk_mq_alloc_map_and_rqs(set,
 547                                                 BLK_MQ_NO_HCTX_IDX,
 548                                                 MAX_SCHED_RQ);
 549         if (!queue->sched_shared_tags)
 550                 return -ENOMEM;
 551
 552         blk_mq_tag_update_sched_shared_tags(queue);
 553
 554         return 0;
 555 }
 556
 557 int blk_mq_init_sched(struct request_queue *q, struct elevator_type *e)
 558 {
 559         unsigned int flags = q->tag_set->flags;
 560         struct blk_mq_hw_ctx *hctx;
 561         struct elevator_queue *eq;
 562         unsigned long i;
 563         int ret;
 564
 565         if (!e) {
 566                 blk_queue_flag_clear(QUEUE_FLAG_SQ_SCHED, q);
 567                 q->elevator = NULL;
 568                 q->nr_requests = q->tag_set->queue_depth;
 569                 return 0;
 570         }
 571
 572         /*
 573          * Default to double of smaller one between hw queue_depth and 128,
 574          * since we don't split into sync/async like the old code did.
 575          * Additionally, this is a per-hw queue depth.
 576          */
 577         q->nr_requests = 2 * min_t(unsigned int, q->tag_set->queue_depth,
 578                                    BLKDEV_DEFAULT_RQ);
 579
 580         if (blk_mq_is_shared_tags(flags)) {
 581                 ret = blk_mq_init_sched_shared_tags(q);
 582                 if (ret)
 583                         return ret;
 584         }
 585
 586         queue_for_each_hw_ctx(q, hctx, i) {
 587                 ret = blk_mq_sched_alloc_map_and_rqs(q, hctx, i);
 588                 if (ret)
 589                         goto err_free_map_and_rqs;
 590         }
 591
 592         ret = e->ops.init_sched(q, e);
 593         if (ret)
 594                 goto err_free_map_and_rqs;
 595
 596         mutex_lock(&q->debugfs_mutex);
 597         blk_mq_debugfs_register_sched(q);
 598         mutex_unlock(&q->debugfs_mutex);
 599
 600         queue_for_each_hw_ctx(q, hctx, i) {
 601                 if (e->ops.init_hctx) {
 602                         ret = e->ops.init_hctx(hctx, i);
 603                         if (ret) {
 604                                 eq = q->elevator;
 605                                 blk_mq_sched_free_rqs(q);
 606                                 blk_mq_exit_sched(q, eq);
 607                                 kobject_put(&eq->kobj);
 608                                 return ret;
 609                         }
 610                 }
 611                 mutex_lock(&q->debugfs_mutex);
 612                 blk_mq_debugfs_register_sched_hctx(q, hctx);
 613                 mutex_unlock(&q->debugfs_mutex);
 614         }
 615
 616         return 0;
 617
 618 err_free_map_and_rqs:
 619         blk_mq_sched_free_rqs(q);
 620         blk_mq_sched_tags_teardown(q, flags);
 621
 622         q->elevator = NULL;
 623         return ret;
 624 }
 625
 626 /*
 627  * called in either blk_queue_cleanup or elevator_switch, tagset
 628  * is required for freeing requests
 629  */
 630 void blk_mq_sched_free_rqs(struct request_queue *q)
 631 {
 632         struct blk_mq_hw_ctx *hctx;
 633         unsigned long i;
 634
 635         if (blk_mq_is_shared_tags(q->tag_set->flags)) {
 636                 blk_mq_free_rqs(q->tag_set, q->sched_shared_tags,
 637                                 BLK_MQ_NO_HCTX_IDX);
 638         } else {
 639                 queue_for_each_hw_ctx(q, hctx, i) {
 640                         if (hctx->sched_tags)
 641                                 blk_mq_free_rqs(q->tag_set,
 642                                                 hctx->sched_tags, i);
 643                 }
 644         }
 645 }
 646
 647 void blk_mq_exit_sched(struct request_queue *q, struct elevator_queue *e)
 648 {
 649         struct blk_mq_hw_ctx *hctx;
 650         unsigned long i;
 651         unsigned int flags = 0;
 652
 653         queue_for_each_hw_ctx(q, hctx, i) {
 654                 mutex_lock(&q->debugfs_mutex);
 655                 blk_mq_debugfs_unregister_sched_hctx(hctx);
 656                 mutex_unlock(&q->debugfs_mutex);
 657
 658                 if (e->type->ops.exit_hctx && hctx->sched_data) {
 659                         e->type->ops.exit_hctx(hctx, i);
 660                         hctx->sched_data = NULL;
 661                 }
 662                 flags = hctx->flags;
 663         }
 664
 665         mutex_lock(&q->debugfs_mutex);
 666         blk_mq_debugfs_unregister_sched(q);
 667         mutex_unlock(&q->debugfs_mutex);
 668
 669         if (e->type->ops.exit_sched)
 670                 e->type->ops.exit_sched(e);
 671         blk_mq_sched_tags_teardown(q, flags);
 672         q->elevator = NULL;
 673 }