Merge branch 'timers-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel...
[platform/adaptation/renesas_rcar/renesas_kernel.git] / drivers / block / null_blk.c
1 #include <linux/module.h>
2
3 #include <linux/moduleparam.h>
4 #include <linux/sched.h>
5 #include <linux/fs.h>
6 #include <linux/blkdev.h>
7 #include <linux/init.h>
8 #include <linux/slab.h>
9 #include <linux/blk-mq.h>
10 #include <linux/hrtimer.h>
11
12 struct nullb_cmd {
13         struct list_head list;
14         struct llist_node ll_list;
15         struct call_single_data csd;
16         struct request *rq;
17         struct bio *bio;
18         unsigned int tag;
19         struct nullb_queue *nq;
20 };
21
22 struct nullb_queue {
23         unsigned long *tag_map;
24         wait_queue_head_t wait;
25         unsigned int queue_depth;
26
27         struct nullb_cmd *cmds;
28 };
29
30 struct nullb {
31         struct list_head list;
32         unsigned int index;
33         struct request_queue *q;
34         struct gendisk *disk;
35         struct hrtimer timer;
36         unsigned int queue_depth;
37         spinlock_t lock;
38
39         struct nullb_queue *queues;
40         unsigned int nr_queues;
41 };
42
43 static LIST_HEAD(nullb_list);
44 static struct mutex lock;
45 static int null_major;
46 static int nullb_indexes;
47
48 struct completion_queue {
49         struct llist_head list;
50         struct hrtimer timer;
51 };
52
53 /*
54  * These are per-cpu for now, they will need to be configured by the
55  * complete_queues parameter and appropriately mapped.
56  */
57 static DEFINE_PER_CPU(struct completion_queue, completion_queues);
58
59 enum {
60         NULL_IRQ_NONE           = 0,
61         NULL_IRQ_SOFTIRQ        = 1,
62         NULL_IRQ_TIMER          = 2,
63 };
64
65 enum {
66         NULL_Q_BIO              = 0,
67         NULL_Q_RQ               = 1,
68         NULL_Q_MQ               = 2,
69 };
70
71 static int submit_queues;
72 module_param(submit_queues, int, S_IRUGO);
73 MODULE_PARM_DESC(submit_queues, "Number of submission queues");
74
75 static int home_node = NUMA_NO_NODE;
76 module_param(home_node, int, S_IRUGO);
77 MODULE_PARM_DESC(home_node, "Home node for the device");
78
79 static int queue_mode = NULL_Q_MQ;
80 module_param(queue_mode, int, S_IRUGO);
81 MODULE_PARM_DESC(use_mq, "Use blk-mq interface (0=bio,1=rq,2=multiqueue)");
82
83 static int gb = 250;
84 module_param(gb, int, S_IRUGO);
85 MODULE_PARM_DESC(gb, "Size in GB");
86
87 static int bs = 512;
88 module_param(bs, int, S_IRUGO);
89 MODULE_PARM_DESC(bs, "Block size (in bytes)");
90
91 static int nr_devices = 2;
92 module_param(nr_devices, int, S_IRUGO);
93 MODULE_PARM_DESC(nr_devices, "Number of devices to register");
94
95 static int irqmode = NULL_IRQ_SOFTIRQ;
96 module_param(irqmode, int, S_IRUGO);
97 MODULE_PARM_DESC(irqmode, "IRQ completion handler. 0-none, 1-softirq, 2-timer");
98
99 static int completion_nsec = 10000;
100 module_param(completion_nsec, int, S_IRUGO);
101 MODULE_PARM_DESC(completion_nsec, "Time in ns to complete a request in hardware. Default: 10,000ns");
102
103 static int hw_queue_depth = 64;
104 module_param(hw_queue_depth, int, S_IRUGO);
105 MODULE_PARM_DESC(hw_queue_depth, "Queue depth for each hardware queue. Default: 64");
106
107 static bool use_per_node_hctx = false;
108 module_param(use_per_node_hctx, bool, S_IRUGO);
109 MODULE_PARM_DESC(use_per_node_hctx, "Use per-node allocation for hardware context queues. Default: false");
110
111 static void put_tag(struct nullb_queue *nq, unsigned int tag)
112 {
113         clear_bit_unlock(tag, nq->tag_map);
114
115         if (waitqueue_active(&nq->wait))
116                 wake_up(&nq->wait);
117 }
118
119 static unsigned int get_tag(struct nullb_queue *nq)
120 {
121         unsigned int tag;
122
123         do {
124                 tag = find_first_zero_bit(nq->tag_map, nq->queue_depth);
125                 if (tag >= nq->queue_depth)
126                         return -1U;
127         } while (test_and_set_bit_lock(tag, nq->tag_map));
128
129         return tag;
130 }
131
132 static void free_cmd(struct nullb_cmd *cmd)
133 {
134         put_tag(cmd->nq, cmd->tag);
135 }
136
137 static struct nullb_cmd *__alloc_cmd(struct nullb_queue *nq)
138 {
139         struct nullb_cmd *cmd;
140         unsigned int tag;
141
142         tag = get_tag(nq);
143         if (tag != -1U) {
144                 cmd = &nq->cmds[tag];
145                 cmd->tag = tag;
146                 cmd->nq = nq;
147                 return cmd;
148         }
149
150         return NULL;
151 }
152
153 static struct nullb_cmd *alloc_cmd(struct nullb_queue *nq, int can_wait)
154 {
155         struct nullb_cmd *cmd;
156         DEFINE_WAIT(wait);
157
158         cmd = __alloc_cmd(nq);
159         if (cmd || !can_wait)
160                 return cmd;
161
162         do {
163                 prepare_to_wait(&nq->wait, &wait, TASK_UNINTERRUPTIBLE);
164                 cmd = __alloc_cmd(nq);
165                 if (cmd)
166                         break;
167
168                 io_schedule();
169         } while (1);
170
171         finish_wait(&nq->wait, &wait);
172         return cmd;
173 }
174
175 static void end_cmd(struct nullb_cmd *cmd)
176 {
177         switch (queue_mode)  {
178         case NULL_Q_MQ:
179                 blk_mq_end_io(cmd->rq, 0);
180                 return;
181         case NULL_Q_RQ:
182                 INIT_LIST_HEAD(&cmd->rq->queuelist);
183                 blk_end_request_all(cmd->rq, 0);
184                 break;
185         case NULL_Q_BIO:
186                 bio_endio(cmd->bio, 0);
187                 break;
188         }
189
190         free_cmd(cmd);
191 }
192
193 static enum hrtimer_restart null_cmd_timer_expired(struct hrtimer *timer)
194 {
195         struct completion_queue *cq;
196         struct llist_node *entry;
197         struct nullb_cmd *cmd;
198
199         cq = &per_cpu(completion_queues, smp_processor_id());
200
201         while ((entry = llist_del_all(&cq->list)) != NULL) {
202                 entry = llist_reverse_order(entry);
203                 do {
204                         cmd = container_of(entry, struct nullb_cmd, ll_list);
205                         end_cmd(cmd);
206                         entry = entry->next;
207                 } while (entry);
208         }
209
210         return HRTIMER_NORESTART;
211 }
212
213 static void null_cmd_end_timer(struct nullb_cmd *cmd)
214 {
215         struct completion_queue *cq = &per_cpu(completion_queues, get_cpu());
216
217         cmd->ll_list.next = NULL;
218         if (llist_add(&cmd->ll_list, &cq->list)) {
219                 ktime_t kt = ktime_set(0, completion_nsec);
220
221                 hrtimer_start(&cq->timer, kt, HRTIMER_MODE_REL);
222         }
223
224         put_cpu();
225 }
226
227 static void null_softirq_done_fn(struct request *rq)
228 {
229         end_cmd(rq->special);
230 }
231
232 static inline void null_handle_cmd(struct nullb_cmd *cmd)
233 {
234         /* Complete IO by inline, softirq or timer */
235         switch (irqmode) {
236         case NULL_IRQ_SOFTIRQ:
237                 switch (queue_mode)  {
238                 case NULL_Q_MQ:
239                         blk_mq_complete_request(cmd->rq);
240                         break;
241                 case NULL_Q_RQ:
242                         blk_complete_request(cmd->rq);
243                         break;
244                 case NULL_Q_BIO:
245                         /*
246                          * XXX: no proper submitting cpu information available.
247                          */
248                         end_cmd(cmd);
249                         break;
250                 }
251                 break;
252         case NULL_IRQ_NONE:
253                 end_cmd(cmd);
254                 break;
255         case NULL_IRQ_TIMER:
256                 null_cmd_end_timer(cmd);
257                 break;
258         }
259 }
260
261 static struct nullb_queue *nullb_to_queue(struct nullb *nullb)
262 {
263         int index = 0;
264
265         if (nullb->nr_queues != 1)
266                 index = raw_smp_processor_id() / ((nr_cpu_ids + nullb->nr_queues - 1) / nullb->nr_queues);
267
268         return &nullb->queues[index];
269 }
270
271 static void null_queue_bio(struct request_queue *q, struct bio *bio)
272 {
273         struct nullb *nullb = q->queuedata;
274         struct nullb_queue *nq = nullb_to_queue(nullb);
275         struct nullb_cmd *cmd;
276
277         cmd = alloc_cmd(nq, 1);
278         cmd->bio = bio;
279
280         null_handle_cmd(cmd);
281 }
282
283 static int null_rq_prep_fn(struct request_queue *q, struct request *req)
284 {
285         struct nullb *nullb = q->queuedata;
286         struct nullb_queue *nq = nullb_to_queue(nullb);
287         struct nullb_cmd *cmd;
288
289         cmd = alloc_cmd(nq, 0);
290         if (cmd) {
291                 cmd->rq = req;
292                 req->special = cmd;
293                 return BLKPREP_OK;
294         }
295
296         return BLKPREP_DEFER;
297 }
298
299 static void null_request_fn(struct request_queue *q)
300 {
301         struct request *rq;
302
303         while ((rq = blk_fetch_request(q)) != NULL) {
304                 struct nullb_cmd *cmd = rq->special;
305
306                 spin_unlock_irq(q->queue_lock);
307                 null_handle_cmd(cmd);
308                 spin_lock_irq(q->queue_lock);
309         }
310 }
311
312 static int null_queue_rq(struct blk_mq_hw_ctx *hctx, struct request *rq)
313 {
314         struct nullb_cmd *cmd = rq->special;
315
316         cmd->rq = rq;
317         cmd->nq = hctx->driver_data;
318
319         null_handle_cmd(cmd);
320         return BLK_MQ_RQ_QUEUE_OK;
321 }
322
323 static struct blk_mq_hw_ctx *null_alloc_hctx(struct blk_mq_reg *reg, unsigned int hctx_index)
324 {
325         int b_size = DIV_ROUND_UP(reg->nr_hw_queues, nr_online_nodes);
326         int tip = (reg->nr_hw_queues % nr_online_nodes);
327         int node = 0, i, n;
328
329         /*
330          * Split submit queues evenly wrt to the number of nodes. If uneven,
331          * fill the first buckets with one extra, until the rest is filled with
332          * no extra.
333          */
334         for (i = 0, n = 1; i < hctx_index; i++, n++) {
335                 if (n % b_size == 0) {
336                         n = 0;
337                         node++;
338
339                         tip--;
340                         if (!tip)
341                                 b_size = reg->nr_hw_queues / nr_online_nodes;
342                 }
343         }
344
345         /*
346          * A node might not be online, therefore map the relative node id to the
347          * real node id.
348          */
349         for_each_online_node(n) {
350                 if (!node)
351                         break;
352                 node--;
353         }
354
355         return kzalloc_node(sizeof(struct blk_mq_hw_ctx), GFP_KERNEL, n);
356 }
357
358 static void null_free_hctx(struct blk_mq_hw_ctx *hctx, unsigned int hctx_index)
359 {
360         kfree(hctx);
361 }
362
363 static void null_init_queue(struct nullb *nullb, struct nullb_queue *nq)
364 {
365         BUG_ON(!nullb);
366         BUG_ON(!nq);
367
368         init_waitqueue_head(&nq->wait);
369         nq->queue_depth = nullb->queue_depth;
370 }
371
372 static int null_init_hctx(struct blk_mq_hw_ctx *hctx, void *data,
373                           unsigned int index)
374 {
375         struct nullb *nullb = data;
376         struct nullb_queue *nq = &nullb->queues[index];
377
378         hctx->driver_data = nq;
379         null_init_queue(nullb, nq);
380         nullb->nr_queues++;
381
382         return 0;
383 }
384
385 static struct blk_mq_ops null_mq_ops = {
386         .queue_rq       = null_queue_rq,
387         .map_queue      = blk_mq_map_queue,
388         .init_hctx      = null_init_hctx,
389         .complete       = null_softirq_done_fn,
390 };
391
392 static struct blk_mq_reg null_mq_reg = {
393         .ops            = &null_mq_ops,
394         .queue_depth    = 64,
395         .cmd_size       = sizeof(struct nullb_cmd),
396         .flags          = BLK_MQ_F_SHOULD_MERGE,
397 };
398
399 static void null_del_dev(struct nullb *nullb)
400 {
401         list_del_init(&nullb->list);
402
403         del_gendisk(nullb->disk);
404         blk_cleanup_queue(nullb->q);
405         put_disk(nullb->disk);
406         kfree(nullb);
407 }
408
409 static int null_open(struct block_device *bdev, fmode_t mode)
410 {
411         return 0;
412 }
413
414 static void null_release(struct gendisk *disk, fmode_t mode)
415 {
416 }
417
418 static const struct block_device_operations null_fops = {
419         .owner =        THIS_MODULE,
420         .open =         null_open,
421         .release =      null_release,
422 };
423
424 static int setup_commands(struct nullb_queue *nq)
425 {
426         struct nullb_cmd *cmd;
427         int i, tag_size;
428
429         nq->cmds = kzalloc(nq->queue_depth * sizeof(*cmd), GFP_KERNEL);
430         if (!nq->cmds)
431                 return -ENOMEM;
432
433         tag_size = ALIGN(nq->queue_depth, BITS_PER_LONG) / BITS_PER_LONG;
434         nq->tag_map = kzalloc(tag_size * sizeof(unsigned long), GFP_KERNEL);
435         if (!nq->tag_map) {
436                 kfree(nq->cmds);
437                 return -ENOMEM;
438         }
439
440         for (i = 0; i < nq->queue_depth; i++) {
441                 cmd = &nq->cmds[i];
442                 INIT_LIST_HEAD(&cmd->list);
443                 cmd->ll_list.next = NULL;
444                 cmd->tag = -1U;
445         }
446
447         return 0;
448 }
449
450 static void cleanup_queue(struct nullb_queue *nq)
451 {
452         kfree(nq->tag_map);
453         kfree(nq->cmds);
454 }
455
456 static void cleanup_queues(struct nullb *nullb)
457 {
458         int i;
459
460         for (i = 0; i < nullb->nr_queues; i++)
461                 cleanup_queue(&nullb->queues[i]);
462
463         kfree(nullb->queues);
464 }
465
466 static int setup_queues(struct nullb *nullb)
467 {
468         nullb->queues = kzalloc(submit_queues * sizeof(struct nullb_queue),
469                                                                 GFP_KERNEL);
470         if (!nullb->queues)
471                 return -ENOMEM;
472
473         nullb->nr_queues = 0;
474         nullb->queue_depth = hw_queue_depth;
475
476         return 0;
477 }
478
479 static int init_driver_queues(struct nullb *nullb)
480 {
481         struct nullb_queue *nq;
482         int i, ret = 0;
483
484         for (i = 0; i < submit_queues; i++) {
485                 nq = &nullb->queues[i];
486
487                 null_init_queue(nullb, nq);
488
489                 ret = setup_commands(nq);
490                 if (ret)
491                         goto err_queue;
492                 nullb->nr_queues++;
493         }
494
495         return 0;
496 err_queue:
497         cleanup_queues(nullb);
498         return ret;
499 }
500
501 static int null_add_dev(void)
502 {
503         struct gendisk *disk;
504         struct nullb *nullb;
505         sector_t size;
506
507         nullb = kzalloc_node(sizeof(*nullb), GFP_KERNEL, home_node);
508         if (!nullb)
509                 return -ENOMEM;
510
511         spin_lock_init(&nullb->lock);
512
513         if (queue_mode == NULL_Q_MQ && use_per_node_hctx)
514                 submit_queues = nr_online_nodes;
515
516         if (setup_queues(nullb))
517                 goto err;
518
519         if (queue_mode == NULL_Q_MQ) {
520                 null_mq_reg.numa_node = home_node;
521                 null_mq_reg.queue_depth = hw_queue_depth;
522                 null_mq_reg.nr_hw_queues = submit_queues;
523
524                 if (use_per_node_hctx) {
525                         null_mq_reg.ops->alloc_hctx = null_alloc_hctx;
526                         null_mq_reg.ops->free_hctx = null_free_hctx;
527                 } else {
528                         null_mq_reg.ops->alloc_hctx = blk_mq_alloc_single_hw_queue;
529                         null_mq_reg.ops->free_hctx = blk_mq_free_single_hw_queue;
530                 }
531
532                 nullb->q = blk_mq_init_queue(&null_mq_reg, nullb);
533         } else if (queue_mode == NULL_Q_BIO) {
534                 nullb->q = blk_alloc_queue_node(GFP_KERNEL, home_node);
535                 blk_queue_make_request(nullb->q, null_queue_bio);
536                 init_driver_queues(nullb);
537         } else {
538                 nullb->q = blk_init_queue_node(null_request_fn, &nullb->lock, home_node);
539                 blk_queue_prep_rq(nullb->q, null_rq_prep_fn);
540                 if (nullb->q)
541                         blk_queue_softirq_done(nullb->q, null_softirq_done_fn);
542                 init_driver_queues(nullb);
543         }
544
545         if (!nullb->q)
546                 goto queue_fail;
547
548         nullb->q->queuedata = nullb;
549         queue_flag_set_unlocked(QUEUE_FLAG_NONROT, nullb->q);
550
551         disk = nullb->disk = alloc_disk_node(1, home_node);
552         if (!disk) {
553 queue_fail:
554                 blk_cleanup_queue(nullb->q);
555                 cleanup_queues(nullb);
556 err:
557                 kfree(nullb);
558                 return -ENOMEM;
559         }
560
561         mutex_lock(&lock);
562         list_add_tail(&nullb->list, &nullb_list);
563         nullb->index = nullb_indexes++;
564         mutex_unlock(&lock);
565
566         blk_queue_logical_block_size(nullb->q, bs);
567         blk_queue_physical_block_size(nullb->q, bs);
568
569         size = gb * 1024 * 1024 * 1024ULL;
570         sector_div(size, bs);
571         set_capacity(disk, size);
572
573         disk->flags |= GENHD_FL_EXT_DEVT;
574         disk->major             = null_major;
575         disk->first_minor       = nullb->index;
576         disk->fops              = &null_fops;
577         disk->private_data      = nullb;
578         disk->queue             = nullb->q;
579         sprintf(disk->disk_name, "nullb%d", nullb->index);
580         add_disk(disk);
581         return 0;
582 }
583
584 static int __init null_init(void)
585 {
586         unsigned int i;
587
588         if (bs > PAGE_SIZE) {
589                 pr_warn("null_blk: invalid block size\n");
590                 pr_warn("null_blk: defaults block size to %lu\n", PAGE_SIZE);
591                 bs = PAGE_SIZE;
592         }
593
594         if (queue_mode == NULL_Q_MQ && use_per_node_hctx) {
595                 if (submit_queues < nr_online_nodes) {
596                         pr_warn("null_blk: submit_queues param is set to %u.",
597                                                         nr_online_nodes);
598                         submit_queues = nr_online_nodes;
599                 }
600         } else if (submit_queues > nr_cpu_ids)
601                 submit_queues = nr_cpu_ids;
602         else if (!submit_queues)
603                 submit_queues = 1;
604
605         mutex_init(&lock);
606
607         /* Initialize a separate list for each CPU for issuing softirqs */
608         for_each_possible_cpu(i) {
609                 struct completion_queue *cq = &per_cpu(completion_queues, i);
610
611                 init_llist_head(&cq->list);
612
613                 if (irqmode != NULL_IRQ_TIMER)
614                         continue;
615
616                 hrtimer_init(&cq->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
617                 cq->timer.function = null_cmd_timer_expired;
618         }
619
620         null_major = register_blkdev(0, "nullb");
621         if (null_major < 0)
622                 return null_major;
623
624         for (i = 0; i < nr_devices; i++) {
625                 if (null_add_dev()) {
626                         unregister_blkdev(null_major, "nullb");
627                         return -EINVAL;
628                 }
629         }
630
631         pr_info("null: module loaded\n");
632         return 0;
633 }
634
635 static void __exit null_exit(void)
636 {
637         struct nullb *nullb;
638
639         unregister_blkdev(null_major, "nullb");
640
641         mutex_lock(&lock);
642         while (!list_empty(&nullb_list)) {
643                 nullb = list_entry(nullb_list.next, struct nullb, list);
644                 null_del_dev(nullb);
645         }
646         mutex_unlock(&lock);
647 }
648
649 module_init(null_init);
650 module_exit(null_exit);
651
652 MODULE_AUTHOR("Jens Axboe <jaxboe@fusionio.com>");
653 MODULE_LICENSE("GPL");