Add binder to deathconfig for arm64.
[platform/kernel/linux-rpi.git] / block / mq-deadline.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  *  MQ Deadline i/o scheduler - adaptation of the legacy deadline scheduler,
4  *  for the blk-mq scheduling framework
5  *
6  *  Copyright (C) 2016 Jens Axboe <axboe@kernel.dk>
7  */
8 #include <linux/kernel.h>
9 #include <linux/fs.h>
10 #include <linux/blkdev.h>
11 #include <linux/blk-mq.h>
12 #include <linux/elevator.h>
13 #include <linux/bio.h>
14 #include <linux/module.h>
15 #include <linux/slab.h>
16 #include <linux/init.h>
17 #include <linux/compiler.h>
18 #include <linux/rbtree.h>
19 #include <linux/sbitmap.h>
20
21 #include <trace/events/block.h>
22
23 #include "blk.h"
24 #include "blk-mq.h"
25 #include "blk-mq-debugfs.h"
26 #include "blk-mq-tag.h"
27 #include "blk-mq-sched.h"
28
29 /*
30  * See Documentation/block/deadline-iosched.rst
31  */
32 static const int read_expire = HZ / 2;  /* max time before a read is submitted. */
33 static const int write_expire = 5 * HZ; /* ditto for writes, these limits are SOFT! */
34 static const int writes_starved = 2;    /* max times reads can starve a write */
35 static const int fifo_batch = 16;       /* # of sequential requests treated as one
36                                      by the above parameters. For throughput. */
37
38 enum dd_data_dir {
39         DD_READ         = READ,
40         DD_WRITE        = WRITE,
41 };
42
43 enum { DD_DIR_COUNT = 2 };
44
45 enum dd_prio {
46         DD_RT_PRIO      = 0,
47         DD_BE_PRIO      = 1,
48         DD_IDLE_PRIO    = 2,
49         DD_PRIO_MAX     = 2,
50 };
51
52 enum { DD_PRIO_COUNT = 3 };
53
54 /* I/O statistics per I/O priority. */
55 struct io_stats_per_prio {
56         local_t inserted;
57         local_t merged;
58         local_t dispatched;
59         local_t completed;
60 };
61
62 /* I/O statistics for all I/O priorities (enum dd_prio). */
63 struct io_stats {
64         struct io_stats_per_prio stats[DD_PRIO_COUNT];
65 };
66
67 /*
68  * Deadline scheduler data per I/O priority (enum dd_prio). Requests are
69  * present on both sort_list[] and fifo_list[].
70  */
71 struct dd_per_prio {
72         struct list_head dispatch;
73         struct rb_root sort_list[DD_DIR_COUNT];
74         struct list_head fifo_list[DD_DIR_COUNT];
75         /* Next request in FIFO order. Read, write or both are NULL. */
76         struct request *next_rq[DD_DIR_COUNT];
77 };
78
79 struct deadline_data {
80         /*
81          * run time data
82          */
83
84         struct dd_per_prio per_prio[DD_PRIO_COUNT];
85
86         /* Data direction of latest dispatched request. */
87         enum dd_data_dir last_dir;
88         unsigned int batching;          /* number of sequential requests made */
89         unsigned int starved;           /* times reads have starved writes */
90
91         struct io_stats __percpu *stats;
92
93         /*
94          * settings that change how the i/o scheduler behaves
95          */
96         int fifo_expire[DD_DIR_COUNT];
97         int fifo_batch;
98         int writes_starved;
99         int front_merges;
100         u32 async_depth;
101
102         spinlock_t lock;
103         spinlock_t zone_lock;
104 };
105
106 /* Count one event of type 'event_type' and with I/O priority 'prio' */
107 #define dd_count(dd, event_type, prio) do {                             \
108         struct io_stats *io_stats = get_cpu_ptr((dd)->stats);           \
109                                                                         \
110         BUILD_BUG_ON(!__same_type((dd), struct deadline_data *));       \
111         BUILD_BUG_ON(!__same_type((prio), enum dd_prio));               \
112         local_inc(&io_stats->stats[(prio)].event_type);                 \
113         put_cpu_ptr(io_stats);                                          \
114 } while (0)
115
116 /*
117  * Returns the total number of dd_count(dd, event_type, prio) calls across all
118  * CPUs. No locking or barriers since it is fine if the returned sum is slightly
119  * outdated.
120  */
121 #define dd_sum(dd, event_type, prio) ({                                 \
122         unsigned int cpu;                                               \
123         u32 sum = 0;                                                    \
124                                                                         \
125         BUILD_BUG_ON(!__same_type((dd), struct deadline_data *));       \
126         BUILD_BUG_ON(!__same_type((prio), enum dd_prio));               \
127         for_each_present_cpu(cpu)                                       \
128                 sum += local_read(&per_cpu_ptr((dd)->stats, cpu)->      \
129                                   stats[(prio)].event_type);            \
130         sum;                                                            \
131 })
132
133 /* Maps an I/O priority class to a deadline scheduler priority. */
134 static const enum dd_prio ioprio_class_to_prio[] = {
135         [IOPRIO_CLASS_NONE]     = DD_BE_PRIO,
136         [IOPRIO_CLASS_RT]       = DD_RT_PRIO,
137         [IOPRIO_CLASS_BE]       = DD_BE_PRIO,
138         [IOPRIO_CLASS_IDLE]     = DD_IDLE_PRIO,
139 };
140
141 static inline struct rb_root *
142 deadline_rb_root(struct dd_per_prio *per_prio, struct request *rq)
143 {
144         return &per_prio->sort_list[rq_data_dir(rq)];
145 }
146
147 /*
148  * Returns the I/O priority class (IOPRIO_CLASS_*) that has been assigned to a
149  * request.
150  */
151 static u8 dd_rq_ioclass(struct request *rq)
152 {
153         return IOPRIO_PRIO_CLASS(req_get_ioprio(rq));
154 }
155
156 /*
157  * get the request before `rq' in sector-sorted order
158  */
159 static inline struct request *
160 deadline_earlier_request(struct request *rq)
161 {
162         struct rb_node *node = rb_prev(&rq->rb_node);
163
164         if (node)
165                 return rb_entry_rq(node);
166
167         return NULL;
168 }
169
170 /*
171  * get the request after `rq' in sector-sorted order
172  */
173 static inline struct request *
174 deadline_latter_request(struct request *rq)
175 {
176         struct rb_node *node = rb_next(&rq->rb_node);
177
178         if (node)
179                 return rb_entry_rq(node);
180
181         return NULL;
182 }
183
184 static void
185 deadline_add_rq_rb(struct dd_per_prio *per_prio, struct request *rq)
186 {
187         struct rb_root *root = deadline_rb_root(per_prio, rq);
188
189         elv_rb_add(root, rq);
190 }
191
192 static inline void
193 deadline_del_rq_rb(struct dd_per_prio *per_prio, struct request *rq)
194 {
195         const enum dd_data_dir data_dir = rq_data_dir(rq);
196
197         if (per_prio->next_rq[data_dir] == rq)
198                 per_prio->next_rq[data_dir] = deadline_latter_request(rq);
199
200         elv_rb_del(deadline_rb_root(per_prio, rq), rq);
201 }
202
203 /*
204  * remove rq from rbtree and fifo.
205  */
206 static void deadline_remove_request(struct request_queue *q,
207                                     struct dd_per_prio *per_prio,
208                                     struct request *rq)
209 {
210         list_del_init(&rq->queuelist);
211
212         /*
213          * We might not be on the rbtree, if we are doing an insert merge
214          */
215         if (!RB_EMPTY_NODE(&rq->rb_node))
216                 deadline_del_rq_rb(per_prio, rq);
217
218         elv_rqhash_del(q, rq);
219         if (q->last_merge == rq)
220                 q->last_merge = NULL;
221 }
222
223 static void dd_request_merged(struct request_queue *q, struct request *req,
224                               enum elv_merge type)
225 {
226         struct deadline_data *dd = q->elevator->elevator_data;
227         const u8 ioprio_class = dd_rq_ioclass(req);
228         const enum dd_prio prio = ioprio_class_to_prio[ioprio_class];
229         struct dd_per_prio *per_prio = &dd->per_prio[prio];
230
231         /*
232          * if the merge was a front merge, we need to reposition request
233          */
234         if (type == ELEVATOR_FRONT_MERGE) {
235                 elv_rb_del(deadline_rb_root(per_prio, req), req);
236                 deadline_add_rq_rb(per_prio, req);
237         }
238 }
239
240 /*
241  * Callback function that is invoked after @next has been merged into @req.
242  */
243 static void dd_merged_requests(struct request_queue *q, struct request *req,
244                                struct request *next)
245 {
246         struct deadline_data *dd = q->elevator->elevator_data;
247         const u8 ioprio_class = dd_rq_ioclass(next);
248         const enum dd_prio prio = ioprio_class_to_prio[ioprio_class];
249
250         dd_count(dd, merged, prio);
251
252         /*
253          * if next expires before rq, assign its expire time to rq
254          * and move into next position (next will be deleted) in fifo
255          */
256         if (!list_empty(&req->queuelist) && !list_empty(&next->queuelist)) {
257                 if (time_before((unsigned long)next->fifo_time,
258                                 (unsigned long)req->fifo_time)) {
259                         list_move(&req->queuelist, &next->queuelist);
260                         req->fifo_time = next->fifo_time;
261                 }
262         }
263
264         /*
265          * kill knowledge of next, this one is a goner
266          */
267         deadline_remove_request(q, &dd->per_prio[prio], next);
268 }
269
270 /*
271  * move an entry to dispatch queue
272  */
273 static void
274 deadline_move_request(struct deadline_data *dd, struct dd_per_prio *per_prio,
275                       struct request *rq)
276 {
277         const enum dd_data_dir data_dir = rq_data_dir(rq);
278
279         per_prio->next_rq[data_dir] = deadline_latter_request(rq);
280
281         /*
282          * take it off the sort and fifo list
283          */
284         deadline_remove_request(rq->q, per_prio, rq);
285 }
286
287 /*
288  * deadline_check_fifo returns 0 if there are no expired requests on the fifo,
289  * 1 otherwise. Requires !list_empty(&dd->fifo_list[data_dir])
290  */
291 static inline int deadline_check_fifo(struct dd_per_prio *per_prio,
292                                       enum dd_data_dir data_dir)
293 {
294         struct request *rq = rq_entry_fifo(per_prio->fifo_list[data_dir].next);
295
296         /*
297          * rq is expired!
298          */
299         if (time_after_eq(jiffies, (unsigned long)rq->fifo_time))
300                 return 1;
301
302         return 0;
303 }
304
305 /*
306  * Check if rq has a sequential request preceding it.
307  */
308 static bool deadline_is_seq_write(struct deadline_data *dd, struct request *rq)
309 {
310         struct request *prev = deadline_earlier_request(rq);
311
312         if (!prev)
313                 return false;
314
315         return blk_rq_pos(prev) + blk_rq_sectors(prev) == blk_rq_pos(rq);
316 }
317
318 /*
319  * Skip all write requests that are sequential from @rq, even if we cross
320  * a zone boundary.
321  */
322 static struct request *deadline_skip_seq_writes(struct deadline_data *dd,
323                                                 struct request *rq)
324 {
325         sector_t pos = blk_rq_pos(rq);
326         sector_t skipped_sectors = 0;
327
328         while (rq) {
329                 if (blk_rq_pos(rq) != pos + skipped_sectors)
330                         break;
331                 skipped_sectors += blk_rq_sectors(rq);
332                 rq = deadline_latter_request(rq);
333         }
334
335         return rq;
336 }
337
338 /*
339  * For the specified data direction, return the next request to
340  * dispatch using arrival ordered lists.
341  */
342 static struct request *
343 deadline_fifo_request(struct deadline_data *dd, struct dd_per_prio *per_prio,
344                       enum dd_data_dir data_dir)
345 {
346         struct request *rq;
347         unsigned long flags;
348
349         if (list_empty(&per_prio->fifo_list[data_dir]))
350                 return NULL;
351
352         rq = rq_entry_fifo(per_prio->fifo_list[data_dir].next);
353         if (data_dir == DD_READ || !blk_queue_is_zoned(rq->q))
354                 return rq;
355
356         /*
357          * Look for a write request that can be dispatched, that is one with
358          * an unlocked target zone. For some HDDs, breaking a sequential
359          * write stream can lead to lower throughput, so make sure to preserve
360          * sequential write streams, even if that stream crosses into the next
361          * zones and these zones are unlocked.
362          */
363         spin_lock_irqsave(&dd->zone_lock, flags);
364         list_for_each_entry(rq, &per_prio->fifo_list[DD_WRITE], queuelist) {
365                 if (blk_req_can_dispatch_to_zone(rq) &&
366                     (blk_queue_nonrot(rq->q) ||
367                      !deadline_is_seq_write(dd, rq)))
368                         goto out;
369         }
370         rq = NULL;
371 out:
372         spin_unlock_irqrestore(&dd->zone_lock, flags);
373
374         return rq;
375 }
376
377 /*
378  * For the specified data direction, return the next request to
379  * dispatch using sector position sorted lists.
380  */
381 static struct request *
382 deadline_next_request(struct deadline_data *dd, struct dd_per_prio *per_prio,
383                       enum dd_data_dir data_dir)
384 {
385         struct request *rq;
386         unsigned long flags;
387
388         rq = per_prio->next_rq[data_dir];
389         if (!rq)
390                 return NULL;
391
392         if (data_dir == DD_READ || !blk_queue_is_zoned(rq->q))
393                 return rq;
394
395         /*
396          * Look for a write request that can be dispatched, that is one with
397          * an unlocked target zone. For some HDDs, breaking a sequential
398          * write stream can lead to lower throughput, so make sure to preserve
399          * sequential write streams, even if that stream crosses into the next
400          * zones and these zones are unlocked.
401          */
402         spin_lock_irqsave(&dd->zone_lock, flags);
403         while (rq) {
404                 if (blk_req_can_dispatch_to_zone(rq))
405                         break;
406                 if (blk_queue_nonrot(rq->q))
407                         rq = deadline_latter_request(rq);
408                 else
409                         rq = deadline_skip_seq_writes(dd, rq);
410         }
411         spin_unlock_irqrestore(&dd->zone_lock, flags);
412
413         return rq;
414 }
415
416 /*
417  * deadline_dispatch_requests selects the best request according to
418  * read/write expire, fifo_batch, etc
419  */
420 static struct request *__dd_dispatch_request(struct deadline_data *dd,
421                                              struct dd_per_prio *per_prio)
422 {
423         struct request *rq, *next_rq;
424         enum dd_data_dir data_dir;
425         enum dd_prio prio;
426         u8 ioprio_class;
427
428         lockdep_assert_held(&dd->lock);
429
430         if (!list_empty(&per_prio->dispatch)) {
431                 rq = list_first_entry(&per_prio->dispatch, struct request,
432                                       queuelist);
433                 list_del_init(&rq->queuelist);
434                 goto done;
435         }
436
437         /*
438          * batches are currently reads XOR writes
439          */
440         rq = deadline_next_request(dd, per_prio, dd->last_dir);
441         if (rq && dd->batching < dd->fifo_batch)
442                 /* we have a next request are still entitled to batch */
443                 goto dispatch_request;
444
445         /*
446          * at this point we are not running a batch. select the appropriate
447          * data direction (read / write)
448          */
449
450         if (!list_empty(&per_prio->fifo_list[DD_READ])) {
451                 BUG_ON(RB_EMPTY_ROOT(&per_prio->sort_list[DD_READ]));
452
453                 if (deadline_fifo_request(dd, per_prio, DD_WRITE) &&
454                     (dd->starved++ >= dd->writes_starved))
455                         goto dispatch_writes;
456
457                 data_dir = DD_READ;
458
459                 goto dispatch_find_request;
460         }
461
462         /*
463          * there are either no reads or writes have been starved
464          */
465
466         if (!list_empty(&per_prio->fifo_list[DD_WRITE])) {
467 dispatch_writes:
468                 BUG_ON(RB_EMPTY_ROOT(&per_prio->sort_list[DD_WRITE]));
469
470                 dd->starved = 0;
471
472                 data_dir = DD_WRITE;
473
474                 goto dispatch_find_request;
475         }
476
477         return NULL;
478
479 dispatch_find_request:
480         /*
481          * we are not running a batch, find best request for selected data_dir
482          */
483         next_rq = deadline_next_request(dd, per_prio, data_dir);
484         if (deadline_check_fifo(per_prio, data_dir) || !next_rq) {
485                 /*
486                  * A deadline has expired, the last request was in the other
487                  * direction, or we have run out of higher-sectored requests.
488                  * Start again from the request with the earliest expiry time.
489                  */
490                 rq = deadline_fifo_request(dd, per_prio, data_dir);
491         } else {
492                 /*
493                  * The last req was the same dir and we have a next request in
494                  * sort order. No expired requests so continue on from here.
495                  */
496                 rq = next_rq;
497         }
498
499         /*
500          * For a zoned block device, if we only have writes queued and none of
501          * them can be dispatched, rq will be NULL.
502          */
503         if (!rq)
504                 return NULL;
505
506         dd->last_dir = data_dir;
507         dd->batching = 0;
508
509 dispatch_request:
510         /*
511          * rq is the selected appropriate request.
512          */
513         dd->batching++;
514         deadline_move_request(dd, per_prio, rq);
515 done:
516         ioprio_class = dd_rq_ioclass(rq);
517         prio = ioprio_class_to_prio[ioprio_class];
518         dd_count(dd, dispatched, prio);
519         /*
520          * If the request needs its target zone locked, do it.
521          */
522         blk_req_zone_write_lock(rq);
523         rq->rq_flags |= RQF_STARTED;
524         return rq;
525 }
526
527 /*
528  * Called from blk_mq_run_hw_queue() -> __blk_mq_sched_dispatch_requests().
529  *
530  * One confusing aspect here is that we get called for a specific
531  * hardware queue, but we may return a request that is for a
532  * different hardware queue. This is because mq-deadline has shared
533  * state for all hardware queues, in terms of sorting, FIFOs, etc.
534  */
535 static struct request *dd_dispatch_request(struct blk_mq_hw_ctx *hctx)
536 {
537         struct deadline_data *dd = hctx->queue->elevator->elevator_data;
538         struct request *rq;
539         enum dd_prio prio;
540
541         spin_lock(&dd->lock);
542         for (prio = 0; prio <= DD_PRIO_MAX; prio++) {
543                 rq = __dd_dispatch_request(dd, &dd->per_prio[prio]);
544                 if (rq)
545                         break;
546         }
547         spin_unlock(&dd->lock);
548
549         return rq;
550 }
551
552 /*
553  * Called by __blk_mq_alloc_request(). The shallow_depth value set by this
554  * function is used by __blk_mq_get_tag().
555  */
556 static void dd_limit_depth(unsigned int op, struct blk_mq_alloc_data *data)
557 {
558         struct deadline_data *dd = data->q->elevator->elevator_data;
559
560         /* Do not throttle synchronous reads. */
561         if (op_is_sync(op) && !op_is_write(op))
562                 return;
563
564         /*
565          * Throttle asynchronous requests and writes such that these requests
566          * do not block the allocation of synchronous requests.
567          */
568         data->shallow_depth = dd->async_depth;
569 }
570
571 /* Called by blk_mq_update_nr_requests(). */
572 static void dd_depth_updated(struct blk_mq_hw_ctx *hctx)
573 {
574         struct request_queue *q = hctx->queue;
575         struct deadline_data *dd = q->elevator->elevator_data;
576         struct blk_mq_tags *tags = hctx->sched_tags;
577
578         dd->async_depth = max(1UL, 3 * q->nr_requests / 4);
579
580         sbitmap_queue_min_shallow_depth(tags->bitmap_tags, dd->async_depth);
581 }
582
583 /* Called by blk_mq_init_hctx() and blk_mq_init_sched(). */
584 static int dd_init_hctx(struct blk_mq_hw_ctx *hctx, unsigned int hctx_idx)
585 {
586         dd_depth_updated(hctx);
587         return 0;
588 }
589
590 static void dd_exit_sched(struct elevator_queue *e)
591 {
592         struct deadline_data *dd = e->elevator_data;
593         enum dd_prio prio;
594
595         for (prio = 0; prio <= DD_PRIO_MAX; prio++) {
596                 struct dd_per_prio *per_prio = &dd->per_prio[prio];
597
598                 WARN_ON_ONCE(!list_empty(&per_prio->fifo_list[DD_READ]));
599                 WARN_ON_ONCE(!list_empty(&per_prio->fifo_list[DD_WRITE]));
600         }
601
602         free_percpu(dd->stats);
603
604         kfree(dd);
605 }
606
607 /*
608  * initialize elevator private data (deadline_data).
609  */
610 static int dd_init_sched(struct request_queue *q, struct elevator_type *e)
611 {
612         struct deadline_data *dd;
613         struct elevator_queue *eq;
614         enum dd_prio prio;
615         int ret = -ENOMEM;
616
617         eq = elevator_alloc(q, e);
618         if (!eq)
619                 return ret;
620
621         dd = kzalloc_node(sizeof(*dd), GFP_KERNEL, q->node);
622         if (!dd)
623                 goto put_eq;
624
625         eq->elevator_data = dd;
626
627         dd->stats = alloc_percpu_gfp(typeof(*dd->stats),
628                                      GFP_KERNEL | __GFP_ZERO);
629         if (!dd->stats)
630                 goto free_dd;
631
632         for (prio = 0; prio <= DD_PRIO_MAX; prio++) {
633                 struct dd_per_prio *per_prio = &dd->per_prio[prio];
634
635                 INIT_LIST_HEAD(&per_prio->dispatch);
636                 INIT_LIST_HEAD(&per_prio->fifo_list[DD_READ]);
637                 INIT_LIST_HEAD(&per_prio->fifo_list[DD_WRITE]);
638                 per_prio->sort_list[DD_READ] = RB_ROOT;
639                 per_prio->sort_list[DD_WRITE] = RB_ROOT;
640         }
641         dd->fifo_expire[DD_READ] = read_expire;
642         dd->fifo_expire[DD_WRITE] = write_expire;
643         dd->writes_starved = writes_starved;
644         dd->front_merges = 1;
645         dd->last_dir = DD_WRITE;
646         dd->fifo_batch = fifo_batch;
647         spin_lock_init(&dd->lock);
648         spin_lock_init(&dd->zone_lock);
649
650         q->elevator = eq;
651         return 0;
652
653 free_dd:
654         kfree(dd);
655
656 put_eq:
657         kobject_put(&eq->kobj);
658         return ret;
659 }
660
661 /*
662  * Try to merge @bio into an existing request. If @bio has been merged into
663  * an existing request, store the pointer to that request into *@rq.
664  */
665 static int dd_request_merge(struct request_queue *q, struct request **rq,
666                             struct bio *bio)
667 {
668         struct deadline_data *dd = q->elevator->elevator_data;
669         const u8 ioprio_class = IOPRIO_PRIO_CLASS(bio->bi_ioprio);
670         const enum dd_prio prio = ioprio_class_to_prio[ioprio_class];
671         struct dd_per_prio *per_prio = &dd->per_prio[prio];
672         sector_t sector = bio_end_sector(bio);
673         struct request *__rq;
674
675         if (!dd->front_merges)
676                 return ELEVATOR_NO_MERGE;
677
678         __rq = elv_rb_find(&per_prio->sort_list[bio_data_dir(bio)], sector);
679         if (__rq) {
680                 BUG_ON(sector != blk_rq_pos(__rq));
681
682                 if (elv_bio_merge_ok(__rq, bio)) {
683                         *rq = __rq;
684                         if (blk_discard_mergable(__rq))
685                                 return ELEVATOR_DISCARD_MERGE;
686                         return ELEVATOR_FRONT_MERGE;
687                 }
688         }
689
690         return ELEVATOR_NO_MERGE;
691 }
692
693 /*
694  * Attempt to merge a bio into an existing request. This function is called
695  * before @bio is associated with a request.
696  */
697 static bool dd_bio_merge(struct request_queue *q, struct bio *bio,
698                 unsigned int nr_segs)
699 {
700         struct deadline_data *dd = q->elevator->elevator_data;
701         struct request *free = NULL;
702         bool ret;
703
704         spin_lock(&dd->lock);
705         ret = blk_mq_sched_try_merge(q, bio, nr_segs, &free);
706         spin_unlock(&dd->lock);
707
708         if (free)
709                 blk_mq_free_request(free);
710
711         return ret;
712 }
713
714 /*
715  * add rq to rbtree and fifo
716  */
717 static void dd_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq,
718                               bool at_head)
719 {
720         struct request_queue *q = hctx->queue;
721         struct deadline_data *dd = q->elevator->elevator_data;
722         const enum dd_data_dir data_dir = rq_data_dir(rq);
723         u16 ioprio = req_get_ioprio(rq);
724         u8 ioprio_class = IOPRIO_PRIO_CLASS(ioprio);
725         struct dd_per_prio *per_prio;
726         enum dd_prio prio;
727         LIST_HEAD(free);
728
729         lockdep_assert_held(&dd->lock);
730
731         /*
732          * This may be a requeue of a write request that has locked its
733          * target zone. If it is the case, this releases the zone lock.
734          */
735         blk_req_zone_write_unlock(rq);
736
737         prio = ioprio_class_to_prio[ioprio_class];
738         dd_count(dd, inserted, prio);
739         rq->elv.priv[0] = (void *)(uintptr_t)1;
740
741         if (blk_mq_sched_try_insert_merge(q, rq, &free)) {
742                 blk_mq_free_requests(&free);
743                 return;
744         }
745
746         trace_block_rq_insert(rq);
747
748         per_prio = &dd->per_prio[prio];
749         if (at_head) {
750                 list_add(&rq->queuelist, &per_prio->dispatch);
751         } else {
752                 deadline_add_rq_rb(per_prio, rq);
753
754                 if (rq_mergeable(rq)) {
755                         elv_rqhash_add(q, rq);
756                         if (!q->last_merge)
757                                 q->last_merge = rq;
758                 }
759
760                 /*
761                  * set expire time and add to fifo list
762                  */
763                 rq->fifo_time = jiffies + dd->fifo_expire[data_dir];
764                 list_add_tail(&rq->queuelist, &per_prio->fifo_list[data_dir]);
765         }
766 }
767
768 /*
769  * Called from blk_mq_sched_insert_request() or blk_mq_sched_insert_requests().
770  */
771 static void dd_insert_requests(struct blk_mq_hw_ctx *hctx,
772                                struct list_head *list, bool at_head)
773 {
774         struct request_queue *q = hctx->queue;
775         struct deadline_data *dd = q->elevator->elevator_data;
776
777         spin_lock(&dd->lock);
778         while (!list_empty(list)) {
779                 struct request *rq;
780
781                 rq = list_first_entry(list, struct request, queuelist);
782                 list_del_init(&rq->queuelist);
783                 dd_insert_request(hctx, rq, at_head);
784         }
785         spin_unlock(&dd->lock);
786 }
787
788 /* Callback from inside blk_mq_rq_ctx_init(). */
789 static void dd_prepare_request(struct request *rq)
790 {
791         rq->elv.priv[0] = NULL;
792 }
793
794 static bool dd_has_write_work(struct blk_mq_hw_ctx *hctx)
795 {
796         struct deadline_data *dd = hctx->queue->elevator->elevator_data;
797         enum dd_prio p;
798
799         for (p = 0; p <= DD_PRIO_MAX; p++)
800                 if (!list_empty_careful(&dd->per_prio[p].fifo_list[DD_WRITE]))
801                         return true;
802
803         return false;
804 }
805
806 /*
807  * Callback from inside blk_mq_free_request().
808  *
809  * For zoned block devices, write unlock the target zone of
810  * completed write requests. Do this while holding the zone lock
811  * spinlock so that the zone is never unlocked while deadline_fifo_request()
812  * or deadline_next_request() are executing. This function is called for
813  * all requests, whether or not these requests complete successfully.
814  *
815  * For a zoned block device, __dd_dispatch_request() may have stopped
816  * dispatching requests if all the queued requests are write requests directed
817  * at zones that are already locked due to on-going write requests. To ensure
818  * write request dispatch progress in this case, mark the queue as needing a
819  * restart to ensure that the queue is run again after completion of the
820  * request and zones being unlocked.
821  */
822 static void dd_finish_request(struct request *rq)
823 {
824         struct request_queue *q = rq->q;
825         struct deadline_data *dd = q->elevator->elevator_data;
826         const u8 ioprio_class = dd_rq_ioclass(rq);
827         const enum dd_prio prio = ioprio_class_to_prio[ioprio_class];
828
829         /*
830          * The block layer core may call dd_finish_request() without having
831          * called dd_insert_requests(). Hence only update statistics for
832          * requests for which dd_insert_requests() has been called. See also
833          * blk_mq_request_bypass_insert().
834          */
835         if (rq->elv.priv[0])
836                 dd_count(dd, completed, prio);
837
838         if (blk_queue_is_zoned(q)) {
839                 unsigned long flags;
840
841                 spin_lock_irqsave(&dd->zone_lock, flags);
842                 blk_req_zone_write_unlock(rq);
843                 spin_unlock_irqrestore(&dd->zone_lock, flags);
844
845                 if (dd_has_write_work(rq->mq_hctx))
846                         blk_mq_sched_mark_restart_hctx(rq->mq_hctx);
847         }
848 }
849
850 static bool dd_has_work_for_prio(struct dd_per_prio *per_prio)
851 {
852         return !list_empty_careful(&per_prio->dispatch) ||
853                 !list_empty_careful(&per_prio->fifo_list[DD_READ]) ||
854                 !list_empty_careful(&per_prio->fifo_list[DD_WRITE]);
855 }
856
857 static bool dd_has_work(struct blk_mq_hw_ctx *hctx)
858 {
859         struct deadline_data *dd = hctx->queue->elevator->elevator_data;
860         enum dd_prio prio;
861
862         for (prio = 0; prio <= DD_PRIO_MAX; prio++)
863                 if (dd_has_work_for_prio(&dd->per_prio[prio]))
864                         return true;
865
866         return false;
867 }
868
869 /*
870  * sysfs parts below
871  */
872 #define SHOW_INT(__FUNC, __VAR)                                         \
873 static ssize_t __FUNC(struct elevator_queue *e, char *page)             \
874 {                                                                       \
875         struct deadline_data *dd = e->elevator_data;                    \
876                                                                         \
877         return sysfs_emit(page, "%d\n", __VAR);                         \
878 }
879 #define SHOW_JIFFIES(__FUNC, __VAR) SHOW_INT(__FUNC, jiffies_to_msecs(__VAR))
880 SHOW_JIFFIES(deadline_read_expire_show, dd->fifo_expire[DD_READ]);
881 SHOW_JIFFIES(deadline_write_expire_show, dd->fifo_expire[DD_WRITE]);
882 SHOW_INT(deadline_writes_starved_show, dd->writes_starved);
883 SHOW_INT(deadline_front_merges_show, dd->front_merges);
884 SHOW_INT(deadline_async_depth_show, dd->async_depth);
885 SHOW_INT(deadline_fifo_batch_show, dd->fifo_batch);
886 #undef SHOW_INT
887 #undef SHOW_JIFFIES
888
889 #define STORE_FUNCTION(__FUNC, __PTR, MIN, MAX, __CONV)                 \
890 static ssize_t __FUNC(struct elevator_queue *e, const char *page, size_t count) \
891 {                                                                       \
892         struct deadline_data *dd = e->elevator_data;                    \
893         int __data, __ret;                                              \
894                                                                         \
895         __ret = kstrtoint(page, 0, &__data);                            \
896         if (__ret < 0)                                                  \
897                 return __ret;                                           \
898         if (__data < (MIN))                                             \
899                 __data = (MIN);                                         \
900         else if (__data > (MAX))                                        \
901                 __data = (MAX);                                         \
902         *(__PTR) = __CONV(__data);                                      \
903         return count;                                                   \
904 }
905 #define STORE_INT(__FUNC, __PTR, MIN, MAX)                              \
906         STORE_FUNCTION(__FUNC, __PTR, MIN, MAX, )
907 #define STORE_JIFFIES(__FUNC, __PTR, MIN, MAX)                          \
908         STORE_FUNCTION(__FUNC, __PTR, MIN, MAX, msecs_to_jiffies)
909 STORE_JIFFIES(deadline_read_expire_store, &dd->fifo_expire[DD_READ], 0, INT_MAX);
910 STORE_JIFFIES(deadline_write_expire_store, &dd->fifo_expire[DD_WRITE], 0, INT_MAX);
911 STORE_INT(deadline_writes_starved_store, &dd->writes_starved, INT_MIN, INT_MAX);
912 STORE_INT(deadline_front_merges_store, &dd->front_merges, 0, 1);
913 STORE_INT(deadline_async_depth_store, &dd->async_depth, 1, INT_MAX);
914 STORE_INT(deadline_fifo_batch_store, &dd->fifo_batch, 0, INT_MAX);
915 #undef STORE_FUNCTION
916 #undef STORE_INT
917 #undef STORE_JIFFIES
918
919 #define DD_ATTR(name) \
920         __ATTR(name, 0644, deadline_##name##_show, deadline_##name##_store)
921
922 static struct elv_fs_entry deadline_attrs[] = {
923         DD_ATTR(read_expire),
924         DD_ATTR(write_expire),
925         DD_ATTR(writes_starved),
926         DD_ATTR(front_merges),
927         DD_ATTR(async_depth),
928         DD_ATTR(fifo_batch),
929         __ATTR_NULL
930 };
931
932 #ifdef CONFIG_BLK_DEBUG_FS
933 #define DEADLINE_DEBUGFS_DDIR_ATTRS(prio, data_dir, name)               \
934 static void *deadline_##name##_fifo_start(struct seq_file *m,           \
935                                           loff_t *pos)                  \
936         __acquires(&dd->lock)                                           \
937 {                                                                       \
938         struct request_queue *q = m->private;                           \
939         struct deadline_data *dd = q->elevator->elevator_data;          \
940         struct dd_per_prio *per_prio = &dd->per_prio[prio];             \
941                                                                         \
942         spin_lock(&dd->lock);                                           \
943         return seq_list_start(&per_prio->fifo_list[data_dir], *pos);    \
944 }                                                                       \
945                                                                         \
946 static void *deadline_##name##_fifo_next(struct seq_file *m, void *v,   \
947                                          loff_t *pos)                   \
948 {                                                                       \
949         struct request_queue *q = m->private;                           \
950         struct deadline_data *dd = q->elevator->elevator_data;          \
951         struct dd_per_prio *per_prio = &dd->per_prio[prio];             \
952                                                                         \
953         return seq_list_next(v, &per_prio->fifo_list[data_dir], pos);   \
954 }                                                                       \
955                                                                         \
956 static void deadline_##name##_fifo_stop(struct seq_file *m, void *v)    \
957         __releases(&dd->lock)                                           \
958 {                                                                       \
959         struct request_queue *q = m->private;                           \
960         struct deadline_data *dd = q->elevator->elevator_data;          \
961                                                                         \
962         spin_unlock(&dd->lock);                                         \
963 }                                                                       \
964                                                                         \
965 static const struct seq_operations deadline_##name##_fifo_seq_ops = {   \
966         .start  = deadline_##name##_fifo_start,                         \
967         .next   = deadline_##name##_fifo_next,                          \
968         .stop   = deadline_##name##_fifo_stop,                          \
969         .show   = blk_mq_debugfs_rq_show,                               \
970 };                                                                      \
971                                                                         \
972 static int deadline_##name##_next_rq_show(void *data,                   \
973                                           struct seq_file *m)           \
974 {                                                                       \
975         struct request_queue *q = data;                                 \
976         struct deadline_data *dd = q->elevator->elevator_data;          \
977         struct dd_per_prio *per_prio = &dd->per_prio[prio];             \
978         struct request *rq = per_prio->next_rq[data_dir];               \
979                                                                         \
980         if (rq)                                                         \
981                 __blk_mq_debugfs_rq_show(m, rq);                        \
982         return 0;                                                       \
983 }
984
985 DEADLINE_DEBUGFS_DDIR_ATTRS(DD_RT_PRIO, DD_READ, read0);
986 DEADLINE_DEBUGFS_DDIR_ATTRS(DD_RT_PRIO, DD_WRITE, write0);
987 DEADLINE_DEBUGFS_DDIR_ATTRS(DD_BE_PRIO, DD_READ, read1);
988 DEADLINE_DEBUGFS_DDIR_ATTRS(DD_BE_PRIO, DD_WRITE, write1);
989 DEADLINE_DEBUGFS_DDIR_ATTRS(DD_IDLE_PRIO, DD_READ, read2);
990 DEADLINE_DEBUGFS_DDIR_ATTRS(DD_IDLE_PRIO, DD_WRITE, write2);
991 #undef DEADLINE_DEBUGFS_DDIR_ATTRS
992
993 static int deadline_batching_show(void *data, struct seq_file *m)
994 {
995         struct request_queue *q = data;
996         struct deadline_data *dd = q->elevator->elevator_data;
997
998         seq_printf(m, "%u\n", dd->batching);
999         return 0;
1000 }
1001
1002 static int deadline_starved_show(void *data, struct seq_file *m)
1003 {
1004         struct request_queue *q = data;
1005         struct deadline_data *dd = q->elevator->elevator_data;
1006
1007         seq_printf(m, "%u\n", dd->starved);
1008         return 0;
1009 }
1010
1011 static int dd_async_depth_show(void *data, struct seq_file *m)
1012 {
1013         struct request_queue *q = data;
1014         struct deadline_data *dd = q->elevator->elevator_data;
1015
1016         seq_printf(m, "%u\n", dd->async_depth);
1017         return 0;
1018 }
1019
1020 /* Number of requests queued for a given priority level. */
1021 static u32 dd_queued(struct deadline_data *dd, enum dd_prio prio)
1022 {
1023         return dd_sum(dd, inserted, prio) - dd_sum(dd, completed, prio);
1024 }
1025
1026 static int dd_queued_show(void *data, struct seq_file *m)
1027 {
1028         struct request_queue *q = data;
1029         struct deadline_data *dd = q->elevator->elevator_data;
1030
1031         seq_printf(m, "%u %u %u\n", dd_queued(dd, DD_RT_PRIO),
1032                    dd_queued(dd, DD_BE_PRIO),
1033                    dd_queued(dd, DD_IDLE_PRIO));
1034         return 0;
1035 }
1036
1037 /* Number of requests owned by the block driver for a given priority. */
1038 static u32 dd_owned_by_driver(struct deadline_data *dd, enum dd_prio prio)
1039 {
1040         return dd_sum(dd, dispatched, prio) + dd_sum(dd, merged, prio)
1041                 - dd_sum(dd, completed, prio);
1042 }
1043
1044 static int dd_owned_by_driver_show(void *data, struct seq_file *m)
1045 {
1046         struct request_queue *q = data;
1047         struct deadline_data *dd = q->elevator->elevator_data;
1048
1049         seq_printf(m, "%u %u %u\n", dd_owned_by_driver(dd, DD_RT_PRIO),
1050                    dd_owned_by_driver(dd, DD_BE_PRIO),
1051                    dd_owned_by_driver(dd, DD_IDLE_PRIO));
1052         return 0;
1053 }
1054
1055 #define DEADLINE_DISPATCH_ATTR(prio)                                    \
1056 static void *deadline_dispatch##prio##_start(struct seq_file *m,        \
1057                                              loff_t *pos)               \
1058         __acquires(&dd->lock)                                           \
1059 {                                                                       \
1060         struct request_queue *q = m->private;                           \
1061         struct deadline_data *dd = q->elevator->elevator_data;          \
1062         struct dd_per_prio *per_prio = &dd->per_prio[prio];             \
1063                                                                         \
1064         spin_lock(&dd->lock);                                           \
1065         return seq_list_start(&per_prio->dispatch, *pos);               \
1066 }                                                                       \
1067                                                                         \
1068 static void *deadline_dispatch##prio##_next(struct seq_file *m,         \
1069                                             void *v, loff_t *pos)       \
1070 {                                                                       \
1071         struct request_queue *q = m->private;                           \
1072         struct deadline_data *dd = q->elevator->elevator_data;          \
1073         struct dd_per_prio *per_prio = &dd->per_prio[prio];             \
1074                                                                         \
1075         return seq_list_next(v, &per_prio->dispatch, pos);              \
1076 }                                                                       \
1077                                                                         \
1078 static void deadline_dispatch##prio##_stop(struct seq_file *m, void *v) \
1079         __releases(&dd->lock)                                           \
1080 {                                                                       \
1081         struct request_queue *q = m->private;                           \
1082         struct deadline_data *dd = q->elevator->elevator_data;          \
1083                                                                         \
1084         spin_unlock(&dd->lock);                                         \
1085 }                                                                       \
1086                                                                         \
1087 static const struct seq_operations deadline_dispatch##prio##_seq_ops = { \
1088         .start  = deadline_dispatch##prio##_start,                      \
1089         .next   = deadline_dispatch##prio##_next,                       \
1090         .stop   = deadline_dispatch##prio##_stop,                       \
1091         .show   = blk_mq_debugfs_rq_show,                               \
1092 }
1093
1094 DEADLINE_DISPATCH_ATTR(0);
1095 DEADLINE_DISPATCH_ATTR(1);
1096 DEADLINE_DISPATCH_ATTR(2);
1097 #undef DEADLINE_DISPATCH_ATTR
1098
1099 #define DEADLINE_QUEUE_DDIR_ATTRS(name)                                 \
1100         {#name "_fifo_list", 0400,                                      \
1101                         .seq_ops = &deadline_##name##_fifo_seq_ops}
1102 #define DEADLINE_NEXT_RQ_ATTR(name)                                     \
1103         {#name "_next_rq", 0400, deadline_##name##_next_rq_show}
1104 static const struct blk_mq_debugfs_attr deadline_queue_debugfs_attrs[] = {
1105         DEADLINE_QUEUE_DDIR_ATTRS(read0),
1106         DEADLINE_QUEUE_DDIR_ATTRS(write0),
1107         DEADLINE_QUEUE_DDIR_ATTRS(read1),
1108         DEADLINE_QUEUE_DDIR_ATTRS(write1),
1109         DEADLINE_QUEUE_DDIR_ATTRS(read2),
1110         DEADLINE_QUEUE_DDIR_ATTRS(write2),
1111         DEADLINE_NEXT_RQ_ATTR(read0),
1112         DEADLINE_NEXT_RQ_ATTR(write0),
1113         DEADLINE_NEXT_RQ_ATTR(read1),
1114         DEADLINE_NEXT_RQ_ATTR(write1),
1115         DEADLINE_NEXT_RQ_ATTR(read2),
1116         DEADLINE_NEXT_RQ_ATTR(write2),
1117         {"batching", 0400, deadline_batching_show},
1118         {"starved", 0400, deadline_starved_show},
1119         {"async_depth", 0400, dd_async_depth_show},
1120         {"dispatch0", 0400, .seq_ops = &deadline_dispatch0_seq_ops},
1121         {"dispatch1", 0400, .seq_ops = &deadline_dispatch1_seq_ops},
1122         {"dispatch2", 0400, .seq_ops = &deadline_dispatch2_seq_ops},
1123         {"owned_by_driver", 0400, dd_owned_by_driver_show},
1124         {"queued", 0400, dd_queued_show},
1125         {},
1126 };
1127 #undef DEADLINE_QUEUE_DDIR_ATTRS
1128 #endif
1129
1130 static struct elevator_type mq_deadline = {
1131         .ops = {
1132                 .depth_updated          = dd_depth_updated,
1133                 .limit_depth            = dd_limit_depth,
1134                 .insert_requests        = dd_insert_requests,
1135                 .dispatch_request       = dd_dispatch_request,
1136                 .prepare_request        = dd_prepare_request,
1137                 .finish_request         = dd_finish_request,
1138                 .next_request           = elv_rb_latter_request,
1139                 .former_request         = elv_rb_former_request,
1140                 .bio_merge              = dd_bio_merge,
1141                 .request_merge          = dd_request_merge,
1142                 .requests_merged        = dd_merged_requests,
1143                 .request_merged         = dd_request_merged,
1144                 .has_work               = dd_has_work,
1145                 .init_sched             = dd_init_sched,
1146                 .exit_sched             = dd_exit_sched,
1147                 .init_hctx              = dd_init_hctx,
1148         },
1149
1150 #ifdef CONFIG_BLK_DEBUG_FS
1151         .queue_debugfs_attrs = deadline_queue_debugfs_attrs,
1152 #endif
1153         .elevator_attrs = deadline_attrs,
1154         .elevator_name = "mq-deadline",
1155         .elevator_alias = "deadline",
1156         .elevator_features = ELEVATOR_F_ZBD_SEQ_WRITE,
1157         .elevator_owner = THIS_MODULE,
1158 };
1159 MODULE_ALIAS("mq-deadline-iosched");
1160
1161 static int __init deadline_init(void)
1162 {
1163         return elv_register(&mq_deadline);
1164 }
1165
1166 static void __exit deadline_exit(void)
1167 {
1168         elv_unregister(&mq_deadline);
1169 }
1170
1171 module_init(deadline_init);
1172 module_exit(deadline_exit);
1173
1174 MODULE_AUTHOR("Jens Axboe, Damien Le Moal and Bart Van Assche");
1175 MODULE_LICENSE("GPL");
1176 MODULE_DESCRIPTION("MQ deadline IO scheduler");