NVMe: Add a module parameter to use a threaded interrupt
[platform/adaptation/renesas_rcar/renesas_kernel.git] / drivers / block / nvme.c
1 /*
2  * NVM Express device driver
3  * Copyright (c) 2011, Intel Corporation.
4  *
5  * This program is free software; you can redistribute it and/or modify it
6  * under the terms and conditions of the GNU General Public License,
7  * version 2, as published by the Free Software Foundation.
8  *
9  * This program is distributed in the hope it will be useful, but WITHOUT
10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
12  * more details.
13  *
14  * You should have received a copy of the GNU General Public License along with
15  * this program; if not, write to the Free Software Foundation, Inc.,
16  * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
17  */
18
19 #include <linux/nvme.h>
20 #include <linux/bio.h>
21 #include <linux/blkdev.h>
22 #include <linux/errno.h>
23 #include <linux/fs.h>
24 #include <linux/genhd.h>
25 #include <linux/init.h>
26 #include <linux/interrupt.h>
27 #include <linux/io.h>
28 #include <linux/kdev_t.h>
29 #include <linux/kernel.h>
30 #include <linux/mm.h>
31 #include <linux/module.h>
32 #include <linux/moduleparam.h>
33 #include <linux/pci.h>
34 #include <linux/sched.h>
35 #include <linux/slab.h>
36 #include <linux/types.h>
37 #include <linux/version.h>
38
39 #define NVME_Q_DEPTH 1024
40 #define SQ_SIZE(depth)          (depth * sizeof(struct nvme_command))
41 #define CQ_SIZE(depth)          (depth * sizeof(struct nvme_completion))
42 #define NVME_MINORS 64
43
44 static int nvme_major;
45 module_param(nvme_major, int, 0);
46
47 static int use_threaded_interrupts;
48 module_param(use_threaded_interrupts, int, 0);
49
50 /*
51  * Represents an NVM Express device.  Each nvme_dev is a PCI function.
52  */
53 struct nvme_dev {
54         struct nvme_queue **queues;
55         u32 __iomem *dbs;
56         struct pci_dev *pci_dev;
57         int instance;
58         int queue_count;
59         u32 ctrl_config;
60         struct msix_entry *entry;
61         struct nvme_bar __iomem *bar;
62         struct list_head namespaces;
63         char serial[20];
64         char model[40];
65         char firmware_rev[8];
66 };
67
68 /*
69  * An NVM Express namespace is equivalent to a SCSI LUN
70  */
71 struct nvme_ns {
72         struct list_head list;
73
74         struct nvme_dev *dev;
75         struct request_queue *queue;
76         struct gendisk *disk;
77
78         int ns_id;
79         int lba_shift;
80 };
81
82 /*
83  * An NVM Express queue.  Each device has at least two (one for admin
84  * commands and one for I/O commands).
85  */
86 struct nvme_queue {
87         struct device *q_dmadev;
88         spinlock_t q_lock;
89         struct nvme_command *sq_cmds;
90         volatile struct nvme_completion *cqes;
91         dma_addr_t sq_dma_addr;
92         dma_addr_t cq_dma_addr;
93         wait_queue_head_t sq_full;
94         struct bio_list sq_cong;
95         u32 __iomem *q_db;
96         u16 q_depth;
97         u16 cq_vector;
98         u16 sq_head;
99         u16 sq_tail;
100         u16 cq_head;
101         u16 cq_phase;
102         unsigned long cmdid_data[];
103 };
104
105 /*
106  * Check we didin't inadvertently grow the command struct
107  */
108 static inline void _nvme_check_size(void)
109 {
110         BUILD_BUG_ON(sizeof(struct nvme_rw_command) != 64);
111         BUILD_BUG_ON(sizeof(struct nvme_create_cq) != 64);
112         BUILD_BUG_ON(sizeof(struct nvme_create_sq) != 64);
113         BUILD_BUG_ON(sizeof(struct nvme_delete_queue) != 64);
114         BUILD_BUG_ON(sizeof(struct nvme_features) != 64);
115         BUILD_BUG_ON(sizeof(struct nvme_command) != 64);
116         BUILD_BUG_ON(sizeof(struct nvme_id_ctrl) != 4096);
117         BUILD_BUG_ON(sizeof(struct nvme_id_ns) != 4096);
118         BUILD_BUG_ON(sizeof(struct nvme_lba_range_type) != 64);
119 }
120
121 /**
122  * alloc_cmdid - Allocate a Command ID
123  * @param nvmeq The queue that will be used for this command
124  * @param ctx A pointer that will be passed to the handler
125  * @param handler The ID of the handler to call
126  *
127  * Allocate a Command ID for a queue.  The data passed in will
128  * be passed to the completion handler.  This is implemented by using
129  * the bottom two bits of the ctx pointer to store the handler ID.
130  * Passing in a pointer that's not 4-byte aligned will cause a BUG.
131  * We can change this if it becomes a problem.
132  */
133 static int alloc_cmdid(struct nvme_queue *nvmeq, void *ctx, int handler)
134 {
135         int depth = nvmeq->q_depth;
136         unsigned long data = (unsigned long)ctx | handler;
137         int cmdid;
138
139         BUG_ON((unsigned long)ctx & 3);
140
141         do {
142                 cmdid = find_first_zero_bit(nvmeq->cmdid_data, depth);
143                 if (cmdid >= depth)
144                         return -EBUSY;
145         } while (test_and_set_bit(cmdid, nvmeq->cmdid_data));
146
147         nvmeq->cmdid_data[cmdid + BITS_TO_LONGS(depth)] = data;
148         return cmdid;
149 }
150
151 static int alloc_cmdid_killable(struct nvme_queue *nvmeq, void *ctx,
152                                                                 int handler)
153 {
154         int cmdid;
155         wait_event_killable(nvmeq->sq_full,
156                         (cmdid = alloc_cmdid(nvmeq, ctx, handler)) >= 0);
157         return (cmdid < 0) ? -EINTR : cmdid;
158 }
159
160 /* If you need more than four handlers, you'll need to change how
161  * alloc_cmdid and nvme_process_cq work.  Also, aborted commands take
162  * the sync_completion path (if they complete), so don't put anything
163  * else in slot zero.
164  */
165 enum {
166         sync_completion_id = 0,
167         bio_completion_id,
168 };
169
170 static unsigned long free_cmdid(struct nvme_queue *nvmeq, int cmdid)
171 {
172         unsigned long data;
173
174         data = nvmeq->cmdid_data[cmdid + BITS_TO_LONGS(nvmeq->q_depth)];
175         clear_bit(cmdid, nvmeq->cmdid_data);
176         wake_up(&nvmeq->sq_full);
177         return data;
178 }
179
180 static void clear_cmdid_data(struct nvme_queue *nvmeq, int cmdid)
181 {
182         nvmeq->cmdid_data[cmdid + BITS_TO_LONGS(nvmeq->q_depth)] = 0;
183 }
184
185 static struct nvme_queue *get_nvmeq(struct nvme_ns *ns)
186 {
187         int qid, cpu = get_cpu();
188         if (cpu < ns->dev->queue_count)
189                 qid = cpu + 1;
190         else
191                 qid = (cpu % rounddown_pow_of_two(ns->dev->queue_count)) + 1;
192         return ns->dev->queues[qid];
193 }
194
195 static void put_nvmeq(struct nvme_queue *nvmeq)
196 {
197         put_cpu();
198 }
199
200 /**
201  * nvme_submit_cmd: Copy a command into a queue and ring the doorbell
202  * @nvmeq: The queue to use
203  * @cmd: The command to send
204  *
205  * Safe to use from interrupt context
206  */
207 static int nvme_submit_cmd(struct nvme_queue *nvmeq, struct nvme_command *cmd)
208 {
209         unsigned long flags;
210         u16 tail;
211         /* XXX: Need to check tail isn't going to overrun head */
212         spin_lock_irqsave(&nvmeq->q_lock, flags);
213         tail = nvmeq->sq_tail;
214         memcpy(&nvmeq->sq_cmds[tail], cmd, sizeof(*cmd));
215         writel(tail, nvmeq->q_db);
216         if (++tail == nvmeq->q_depth)
217                 tail = 0;
218         nvmeq->sq_tail = tail;
219         spin_unlock_irqrestore(&nvmeq->q_lock, flags);
220
221         return 0;
222 }
223
224 struct nvme_req_info {
225         struct bio *bio;
226         int nents;
227         struct scatterlist sg[0];
228 };
229
230 /* XXX: use a mempool */
231 static struct nvme_req_info *alloc_info(unsigned nseg, gfp_t gfp)
232 {
233         return kmalloc(sizeof(struct nvme_req_info) +
234                         sizeof(struct scatterlist) * nseg, gfp);
235 }
236
237 static void free_info(struct nvme_req_info *info)
238 {
239         kfree(info);
240 }
241
242 static void bio_completion(struct nvme_queue *nvmeq, void *ctx,
243                                                 struct nvme_completion *cqe)
244 {
245         struct nvme_req_info *info = ctx;
246         struct bio *bio = info->bio;
247         u16 status = le16_to_cpup(&cqe->status) >> 1;
248
249         dma_unmap_sg(nvmeq->q_dmadev, info->sg, info->nents,
250                         bio_data_dir(bio) ? DMA_TO_DEVICE : DMA_FROM_DEVICE);
251         free_info(info);
252         bio_endio(bio, status ? -EIO : 0);
253 }
254
255 /* length is in bytes */
256 static void nvme_setup_prps(struct nvme_common_command *cmd,
257                                         struct scatterlist *sg, int length)
258 {
259         int dma_len = sg_dma_len(sg);
260         u64 dma_addr = sg_dma_address(sg);
261         int offset = offset_in_page(dma_addr);
262
263         cmd->prp1 = cpu_to_le64(dma_addr);
264         length -= (PAGE_SIZE - offset);
265         if (length <= 0)
266                 return;
267
268         dma_len -= (PAGE_SIZE - offset);
269         if (dma_len) {
270                 dma_addr += (PAGE_SIZE - offset);
271         } else {
272                 sg = sg_next(sg);
273                 dma_addr = sg_dma_address(sg);
274                 dma_len = sg_dma_len(sg);
275         }
276
277         if (length <= PAGE_SIZE) {
278                 cmd->prp2 = cpu_to_le64(dma_addr);
279                 return;
280         }
281
282         /* XXX: support PRP lists */
283 }
284
285 static int nvme_map_bio(struct device *dev, struct nvme_req_info *info,
286                 struct bio *bio, enum dma_data_direction dma_dir, int psegs)
287 {
288         struct bio_vec *bvec;
289         struct scatterlist *sg = info->sg;
290         int i, nsegs;
291
292         sg_init_table(sg, psegs);
293         bio_for_each_segment(bvec, bio, i) {
294                 sg_set_page(sg, bvec->bv_page, bvec->bv_len, bvec->bv_offset);
295                 /* XXX: handle non-mergable here */
296                 nsegs++;
297         }
298         info->nents = nsegs;
299
300         return dma_map_sg(dev, info->sg, info->nents, dma_dir);
301 }
302
303 static int nvme_submit_bio_queue(struct nvme_queue *nvmeq, struct nvme_ns *ns,
304                                                                 struct bio *bio)
305 {
306         struct nvme_command *cmnd;
307         struct nvme_req_info *info;
308         enum dma_data_direction dma_dir;
309         int cmdid;
310         u16 control;
311         u32 dsmgmt;
312         unsigned long flags;
313         int psegs = bio_phys_segments(ns->queue, bio);
314
315         info = alloc_info(psegs, GFP_NOIO);
316         if (!info)
317                 goto congestion;
318         info->bio = bio;
319
320         cmdid = alloc_cmdid(nvmeq, info, bio_completion_id);
321         if (unlikely(cmdid < 0))
322                 goto free_info;
323
324         control = 0;
325         if (bio->bi_rw & REQ_FUA)
326                 control |= NVME_RW_FUA;
327         if (bio->bi_rw & (REQ_FAILFAST_DEV | REQ_RAHEAD))
328                 control |= NVME_RW_LR;
329
330         dsmgmt = 0;
331         if (bio->bi_rw & REQ_RAHEAD)
332                 dsmgmt |= NVME_RW_DSM_FREQ_PREFETCH;
333
334         spin_lock_irqsave(&nvmeq->q_lock, flags);
335         cmnd = &nvmeq->sq_cmds[nvmeq->sq_tail];
336
337         memset(cmnd, 0, sizeof(*cmnd));
338         if (bio_data_dir(bio)) {
339                 cmnd->rw.opcode = nvme_cmd_write;
340                 dma_dir = DMA_TO_DEVICE;
341         } else {
342                 cmnd->rw.opcode = nvme_cmd_read;
343                 dma_dir = DMA_FROM_DEVICE;
344         }
345
346         nvme_map_bio(nvmeq->q_dmadev, info, bio, dma_dir, psegs);
347
348         cmnd->rw.flags = 1;
349         cmnd->rw.command_id = cmdid;
350         cmnd->rw.nsid = cpu_to_le32(ns->ns_id);
351         nvme_setup_prps(&cmnd->common, info->sg, bio->bi_size);
352         cmnd->rw.slba = cpu_to_le64(bio->bi_sector >> (ns->lba_shift - 9));
353         cmnd->rw.length = cpu_to_le16((bio->bi_size >> ns->lba_shift) - 1);
354         cmnd->rw.control = cpu_to_le16(control);
355         cmnd->rw.dsmgmt = cpu_to_le32(dsmgmt);
356
357         writel(nvmeq->sq_tail, nvmeq->q_db);
358         if (++nvmeq->sq_tail == nvmeq->q_depth)
359                 nvmeq->sq_tail = 0;
360
361         spin_unlock_irqrestore(&nvmeq->q_lock, flags);
362
363         return 0;
364
365  free_info:
366         free_info(info);
367  congestion:
368         return -EBUSY;
369 }
370
371 /*
372  * NB: return value of non-zero would mean that we were a stacking driver.
373  * make_request must always succeed.
374  */
375 static int nvme_make_request(struct request_queue *q, struct bio *bio)
376 {
377         struct nvme_ns *ns = q->queuedata;
378         struct nvme_queue *nvmeq = get_nvmeq(ns);
379
380         if (nvme_submit_bio_queue(nvmeq, ns, bio)) {
381                 blk_set_queue_congested(q, rw_is_sync(bio->bi_rw));
382                 bio_list_add(&nvmeq->sq_cong, bio);
383         }
384         put_nvmeq(nvmeq);
385
386         return 0;
387 }
388
389 struct sync_cmd_info {
390         struct task_struct *task;
391         u32 result;
392         int status;
393 };
394
395 static void sync_completion(struct nvme_queue *nvmeq, void *ctx,
396                                                 struct nvme_completion *cqe)
397 {
398         struct sync_cmd_info *cmdinfo = ctx;
399         if (!cmdinfo)
400                 return; /* Command aborted */
401         cmdinfo->result = le32_to_cpup(&cqe->result);
402         cmdinfo->status = le16_to_cpup(&cqe->status) >> 1;
403         wake_up_process(cmdinfo->task);
404 }
405
406 typedef void (*completion_fn)(struct nvme_queue *, void *,
407                                                 struct nvme_completion *);
408
409 static irqreturn_t nvme_process_cq(struct nvme_queue *nvmeq)
410 {
411         u16 head, phase;
412
413         static const completion_fn completions[4] = {
414                 [sync_completion_id] = sync_completion,
415                 [bio_completion_id]  = bio_completion,
416         };
417
418         head = nvmeq->cq_head;
419         phase = nvmeq->cq_phase;
420
421         for (;;) {
422                 unsigned long data;
423                 void *ptr;
424                 unsigned char handler;
425                 struct nvme_completion cqe = nvmeq->cqes[head];
426                 if ((le16_to_cpu(cqe.status) & 1) != phase)
427                         break;
428                 nvmeq->sq_head = le16_to_cpu(cqe.sq_head);
429                 if (++head == nvmeq->q_depth) {
430                         head = 0;
431                         phase = !phase;
432                 }
433
434                 data = free_cmdid(nvmeq, cqe.command_id);
435                 handler = data & 3;
436                 ptr = (void *)(data & ~3UL);
437                 completions[handler](nvmeq, ptr, &cqe);
438         }
439
440         /* If the controller ignores the cq head doorbell and continuously
441          * writes to the queue, it is theoretically possible to wrap around
442          * the queue twice and mistakenly return IRQ_NONE.  Linux only
443          * requires that 0.1% of your interrupts are handled, so this isn't
444          * a big problem.
445          */
446         if (head == nvmeq->cq_head && phase == nvmeq->cq_phase)
447                 return IRQ_NONE;
448
449         writel(head, nvmeq->q_db + 1);
450         nvmeq->cq_head = head;
451         nvmeq->cq_phase = phase;
452
453         return IRQ_HANDLED;
454 }
455
456 static irqreturn_t nvme_irq(int irq, void *data)
457 {
458         return nvme_process_cq(data);
459 }
460
461 static irqreturn_t nvme_irq_thread(int irq, void *data)
462 {
463         irqreturn_t result;
464         struct nvme_queue *nvmeq = data;
465         spin_lock(&nvmeq->q_lock);
466         result = nvme_process_cq(nvmeq);
467         spin_unlock(&nvmeq->q_lock);
468         return result;
469 }
470
471 static irqreturn_t nvme_irq_check(int irq, void *data)
472 {
473         struct nvme_queue *nvmeq = data;
474         struct nvme_completion cqe = nvmeq->cqes[nvmeq->cq_head];
475         if ((le16_to_cpu(cqe.status) & 1) != nvmeq->cq_phase)
476                 return IRQ_NONE;
477         return IRQ_WAKE_THREAD;
478 }
479
480 static void nvme_abort_command(struct nvme_queue *nvmeq, int cmdid)
481 {
482         spin_lock_irq(&nvmeq->q_lock);
483         clear_cmdid_data(nvmeq, cmdid);
484         spin_unlock_irq(&nvmeq->q_lock);
485 }
486
487 /*
488  * Returns 0 on success.  If the result is negative, it's a Linux error code;
489  * if the result is positive, it's an NVM Express status code
490  */
491 static int nvme_submit_sync_cmd(struct nvme_queue *nvmeq,
492                                         struct nvme_command *cmd, u32 *result)
493 {
494         int cmdid;
495         struct sync_cmd_info cmdinfo;
496
497         cmdinfo.task = current;
498         cmdinfo.status = -EINTR;
499
500         cmdid = alloc_cmdid_killable(nvmeq, &cmdinfo, sync_completion_id);
501         if (cmdid < 0)
502                 return cmdid;
503         cmd->common.command_id = cmdid;
504
505         set_current_state(TASK_KILLABLE);
506         nvme_submit_cmd(nvmeq, cmd);
507         schedule();
508
509         if (cmdinfo.status == -EINTR) {
510                 nvme_abort_command(nvmeq, cmdid);
511                 return -EINTR;
512         }
513
514         if (result)
515                 *result = cmdinfo.result;
516
517         return cmdinfo.status;
518 }
519
520 static int nvme_submit_admin_cmd(struct nvme_dev *dev, struct nvme_command *cmd,
521                                                                 u32 *result)
522 {
523         return nvme_submit_sync_cmd(dev->queues[0], cmd, result);
524 }
525
526 static int adapter_delete_queue(struct nvme_dev *dev, u8 opcode, u16 id)
527 {
528         int status;
529         struct nvme_command c;
530
531         memset(&c, 0, sizeof(c));
532         c.delete_queue.opcode = opcode;
533         c.delete_queue.qid = cpu_to_le16(id);
534
535         status = nvme_submit_admin_cmd(dev, &c, NULL);
536         if (status)
537                 return -EIO;
538         return 0;
539 }
540
541 static int adapter_alloc_cq(struct nvme_dev *dev, u16 qid,
542                                                 struct nvme_queue *nvmeq)
543 {
544         int status;
545         struct nvme_command c;
546         int flags = NVME_QUEUE_PHYS_CONTIG | NVME_CQ_IRQ_ENABLED;
547
548         memset(&c, 0, sizeof(c));
549         c.create_cq.opcode = nvme_admin_create_cq;
550         c.create_cq.prp1 = cpu_to_le64(nvmeq->cq_dma_addr);
551         c.create_cq.cqid = cpu_to_le16(qid);
552         c.create_cq.qsize = cpu_to_le16(nvmeq->q_depth - 1);
553         c.create_cq.cq_flags = cpu_to_le16(flags);
554         c.create_cq.irq_vector = cpu_to_le16(nvmeq->cq_vector);
555
556         status = nvme_submit_admin_cmd(dev, &c, NULL);
557         if (status)
558                 return -EIO;
559         return 0;
560 }
561
562 static int adapter_alloc_sq(struct nvme_dev *dev, u16 qid,
563                                                 struct nvme_queue *nvmeq)
564 {
565         int status;
566         struct nvme_command c;
567         int flags = NVME_QUEUE_PHYS_CONTIG | NVME_SQ_PRIO_MEDIUM;
568
569         memset(&c, 0, sizeof(c));
570         c.create_sq.opcode = nvme_admin_create_sq;
571         c.create_sq.prp1 = cpu_to_le64(nvmeq->sq_dma_addr);
572         c.create_sq.sqid = cpu_to_le16(qid);
573         c.create_sq.qsize = cpu_to_le16(nvmeq->q_depth - 1);
574         c.create_sq.sq_flags = cpu_to_le16(flags);
575         c.create_sq.cqid = cpu_to_le16(qid);
576
577         status = nvme_submit_admin_cmd(dev, &c, NULL);
578         if (status)
579                 return -EIO;
580         return 0;
581 }
582
583 static int adapter_delete_cq(struct nvme_dev *dev, u16 cqid)
584 {
585         return adapter_delete_queue(dev, nvme_admin_delete_cq, cqid);
586 }
587
588 static int adapter_delete_sq(struct nvme_dev *dev, u16 sqid)
589 {
590         return adapter_delete_queue(dev, nvme_admin_delete_sq, sqid);
591 }
592
593 static void nvme_free_queue(struct nvme_dev *dev, int qid)
594 {
595         struct nvme_queue *nvmeq = dev->queues[qid];
596
597         free_irq(dev->entry[nvmeq->cq_vector].vector, nvmeq);
598
599         /* Don't tell the adapter to delete the admin queue */
600         if (qid) {
601                 adapter_delete_sq(dev, qid);
602                 adapter_delete_cq(dev, qid);
603         }
604
605         dma_free_coherent(nvmeq->q_dmadev, CQ_SIZE(nvmeq->q_depth),
606                                 (void *)nvmeq->cqes, nvmeq->cq_dma_addr);
607         dma_free_coherent(nvmeq->q_dmadev, SQ_SIZE(nvmeq->q_depth),
608                                         nvmeq->sq_cmds, nvmeq->sq_dma_addr);
609         kfree(nvmeq);
610 }
611
612 static struct nvme_queue *nvme_alloc_queue(struct nvme_dev *dev, int qid,
613                                                         int depth, int vector)
614 {
615         struct device *dmadev = &dev->pci_dev->dev;
616         unsigned extra = (depth + BITS_TO_LONGS(depth)) * sizeof(long);
617         struct nvme_queue *nvmeq = kzalloc(sizeof(*nvmeq) + extra, GFP_KERNEL);
618         if (!nvmeq)
619                 return NULL;
620
621         nvmeq->cqes = dma_alloc_coherent(dmadev, CQ_SIZE(depth),
622                                         &nvmeq->cq_dma_addr, GFP_KERNEL);
623         if (!nvmeq->cqes)
624                 goto free_nvmeq;
625         memset((void *)nvmeq->cqes, 0, CQ_SIZE(depth));
626
627         nvmeq->sq_cmds = dma_alloc_coherent(dmadev, SQ_SIZE(depth),
628                                         &nvmeq->sq_dma_addr, GFP_KERNEL);
629         if (!nvmeq->sq_cmds)
630                 goto free_cqdma;
631
632         nvmeq->q_dmadev = dmadev;
633         spin_lock_init(&nvmeq->q_lock);
634         nvmeq->cq_head = 0;
635         nvmeq->cq_phase = 1;
636         init_waitqueue_head(&nvmeq->sq_full);
637         bio_list_init(&nvmeq->sq_cong);
638         nvmeq->q_db = &dev->dbs[qid * 2];
639         nvmeq->q_depth = depth;
640         nvmeq->cq_vector = vector;
641
642         return nvmeq;
643
644  free_cqdma:
645         dma_free_coherent(dmadev, CQ_SIZE(nvmeq->q_depth), (void *)nvmeq->cqes,
646                                                         nvmeq->cq_dma_addr);
647  free_nvmeq:
648         kfree(nvmeq);
649         return NULL;
650 }
651
652 static int queue_request_irq(struct nvme_dev *dev, struct nvme_queue *nvmeq,
653                                                         const char *name)
654 {
655         if (use_threaded_interrupts)
656                 return request_threaded_irq(dev->entry[nvmeq->cq_vector].vector,
657                                         nvme_irq_check, nvme_irq_thread,
658                                         IRQF_DISABLED | IRQF_SHARED,
659                                         name, nvmeq);
660         return request_irq(dev->entry[nvmeq->cq_vector].vector, nvme_irq,
661                                 IRQF_DISABLED | IRQF_SHARED, name, nvmeq);
662 }
663
664 static __devinit struct nvme_queue *nvme_create_queue(struct nvme_dev *dev,
665                                         int qid, int cq_size, int vector)
666 {
667         int result;
668         struct nvme_queue *nvmeq = nvme_alloc_queue(dev, qid, cq_size, vector);
669
670         if (!nvmeq)
671                 return NULL;
672
673         result = adapter_alloc_cq(dev, qid, nvmeq);
674         if (result < 0)
675                 goto free_nvmeq;
676
677         result = adapter_alloc_sq(dev, qid, nvmeq);
678         if (result < 0)
679                 goto release_cq;
680
681         result = queue_request_irq(dev, nvmeq, "nvme");
682         if (result < 0)
683                 goto release_sq;
684
685         return nvmeq;
686
687  release_sq:
688         adapter_delete_sq(dev, qid);
689  release_cq:
690         adapter_delete_cq(dev, qid);
691  free_nvmeq:
692         dma_free_coherent(nvmeq->q_dmadev, CQ_SIZE(nvmeq->q_depth),
693                                 (void *)nvmeq->cqes, nvmeq->cq_dma_addr);
694         dma_free_coherent(nvmeq->q_dmadev, SQ_SIZE(nvmeq->q_depth),
695                                         nvmeq->sq_cmds, nvmeq->sq_dma_addr);
696         kfree(nvmeq);
697         return NULL;
698 }
699
700 static int __devinit nvme_configure_admin_queue(struct nvme_dev *dev)
701 {
702         int result;
703         u32 aqa;
704         struct nvme_queue *nvmeq;
705
706         dev->dbs = ((void __iomem *)dev->bar) + 4096;
707
708         nvmeq = nvme_alloc_queue(dev, 0, 64, 0);
709         if (!nvmeq)
710                 return -ENOMEM;
711
712         aqa = nvmeq->q_depth - 1;
713         aqa |= aqa << 16;
714
715         dev->ctrl_config = NVME_CC_ENABLE | NVME_CC_CSS_NVM;
716         dev->ctrl_config |= (PAGE_SHIFT - 12) << NVME_CC_MPS_SHIFT;
717         dev->ctrl_config |= NVME_CC_ARB_RR | NVME_CC_SHN_NONE;
718
719         writel(0, &dev->bar->cc);
720         writel(aqa, &dev->bar->aqa);
721         writeq(nvmeq->sq_dma_addr, &dev->bar->asq);
722         writeq(nvmeq->cq_dma_addr, &dev->bar->acq);
723         writel(dev->ctrl_config, &dev->bar->cc);
724
725         while (!(readl(&dev->bar->csts) & NVME_CSTS_RDY)) {
726                 msleep(100);
727                 if (fatal_signal_pending(current))
728                         return -EINTR;
729         }
730
731         result = queue_request_irq(dev, nvmeq, "nvme admin");
732         dev->queues[0] = nvmeq;
733         return result;
734 }
735
736 static int nvme_map_user_pages(struct nvme_dev *dev, int write,
737                                 unsigned long addr, unsigned length,
738                                 struct scatterlist **sgp)
739 {
740         int i, err, count, nents, offset;
741         struct scatterlist *sg;
742         struct page **pages;
743
744         if (addr & 3)
745                 return -EINVAL;
746         if (!length)
747                 return -EINVAL;
748
749         offset = offset_in_page(addr);
750         count = DIV_ROUND_UP(offset + length, PAGE_SIZE);
751         pages = kcalloc(count, sizeof(*pages), GFP_KERNEL);
752
753         err = get_user_pages_fast(addr, count, 1, pages);
754         if (err < count) {
755                 count = err;
756                 err = -EFAULT;
757                 goto put_pages;
758         }
759
760         sg = kcalloc(count, sizeof(*sg), GFP_KERNEL);
761         sg_init_table(sg, count);
762         sg_set_page(&sg[0], pages[0], PAGE_SIZE - offset, offset);
763         length -= (PAGE_SIZE - offset);
764         for (i = 1; i < count; i++) {
765                 sg_set_page(&sg[i], pages[i], min_t(int, length, PAGE_SIZE), 0);
766                 length -= PAGE_SIZE;
767         }
768
769         err = -ENOMEM;
770         nents = dma_map_sg(&dev->pci_dev->dev, sg, count,
771                                 write ? DMA_TO_DEVICE : DMA_FROM_DEVICE);
772         if (!nents)
773                 goto put_pages;
774
775         kfree(pages);
776         *sgp = sg;
777         return nents;
778
779  put_pages:
780         for (i = 0; i < count; i++)
781                 put_page(pages[i]);
782         kfree(pages);
783         return err;
784 }
785
786 static void nvme_unmap_user_pages(struct nvme_dev *dev, int write,
787                                 unsigned long addr, int length,
788                                 struct scatterlist *sg, int nents)
789 {
790         int i, count;
791
792         count = DIV_ROUND_UP(offset_in_page(addr) + length, PAGE_SIZE);
793         dma_unmap_sg(&dev->pci_dev->dev, sg, nents, DMA_FROM_DEVICE);
794
795         for (i = 0; i < count; i++)
796                 put_page(sg_page(&sg[i]));
797 }
798
799 static int nvme_submit_user_admin_command(struct nvme_dev *dev,
800                                         unsigned long addr, unsigned length,
801                                         struct nvme_command *cmd)
802 {
803         int err, nents;
804         struct scatterlist *sg;
805
806         nents = nvme_map_user_pages(dev, 0, addr, length, &sg);
807         if (nents < 0)
808                 return nents;
809         nvme_setup_prps(&cmd->common, sg, length);
810         err = nvme_submit_admin_cmd(dev, cmd, NULL);
811         nvme_unmap_user_pages(dev, 0, addr, length, sg, nents);
812         return err ? -EIO : 0;
813 }
814
815 static int nvme_identify(struct nvme_ns *ns, unsigned long addr, int cns)
816 {
817         struct nvme_command c;
818
819         memset(&c, 0, sizeof(c));
820         c.identify.opcode = nvme_admin_identify;
821         c.identify.nsid = cns ? 0 : cpu_to_le32(ns->ns_id);
822         c.identify.cns = cpu_to_le32(cns);
823
824         return nvme_submit_user_admin_command(ns->dev, addr, 4096, &c);
825 }
826
827 static int nvme_get_range_type(struct nvme_ns *ns, unsigned long addr)
828 {
829         struct nvme_command c;
830
831         memset(&c, 0, sizeof(c));
832         c.features.opcode = nvme_admin_get_features;
833         c.features.nsid = cpu_to_le32(ns->ns_id);
834         c.features.fid = cpu_to_le32(NVME_FEAT_LBA_RANGE);
835
836         return nvme_submit_user_admin_command(ns->dev, addr, 4096, &c);
837 }
838
839 static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio)
840 {
841         struct nvme_dev *dev = ns->dev;
842         struct nvme_queue *nvmeq;
843         struct nvme_user_io io;
844         struct nvme_command c;
845         unsigned length;
846         u32 result;
847         int nents, status;
848         struct scatterlist *sg;
849
850         if (copy_from_user(&io, uio, sizeof(io)))
851                 return -EFAULT;
852         length = io.nblocks << io.block_shift;
853         nents = nvme_map_user_pages(dev, io.opcode & 1, io.addr, length, &sg);
854         if (nents < 0)
855                 return nents;
856
857         memset(&c, 0, sizeof(c));
858         c.rw.opcode = io.opcode;
859         c.rw.flags = io.flags;
860         c.rw.nsid = cpu_to_le32(io.nsid);
861         c.rw.slba = cpu_to_le64(io.slba);
862         c.rw.length = cpu_to_le16(io.nblocks - 1);
863         c.rw.control = cpu_to_le16(io.control);
864         c.rw.dsmgmt = cpu_to_le16(io.dsmgmt);
865         c.rw.reftag = cpu_to_le32(io.reftag);   /* XXX: endian? */
866         c.rw.apptag = cpu_to_le16(io.apptag);
867         c.rw.appmask = cpu_to_le16(io.appmask);
868         /* XXX: metadata */
869         nvme_setup_prps(&c.common, sg, length);
870
871         nvmeq = get_nvmeq(ns);
872         /* Since nvme_submit_sync_cmd sleeps, we can't keep preemption
873          * disabled.  We may be preempted at any point, and be rescheduled
874          * to a different CPU.  That will cause cacheline bouncing, but no
875          * additional races since q_lock already protects against other CPUs.
876          */
877         put_nvmeq(nvmeq);
878         status = nvme_submit_sync_cmd(nvmeq, &c, &result);
879
880         nvme_unmap_user_pages(dev, io.opcode & 1, io.addr, length, sg, nents);
881         put_user(result, &uio->result);
882         return status;
883 }
884
885 static int nvme_download_firmware(struct nvme_ns *ns,
886                                                 struct nvme_dlfw __user *udlfw)
887 {
888         struct nvme_dev *dev = ns->dev;
889         struct nvme_dlfw dlfw;
890         struct nvme_command c;
891         int nents, status;
892         struct scatterlist *sg;
893
894         if (copy_from_user(&dlfw, udlfw, sizeof(dlfw)))
895                 return -EFAULT;
896         if (dlfw.length >= (1 << 30))
897                 return -EINVAL;
898
899         nents = nvme_map_user_pages(dev, 1, dlfw.addr, dlfw.length * 4, &sg);
900         if (nents < 0)
901                 return nents;
902
903         memset(&c, 0, sizeof(c));
904         c.dlfw.opcode = nvme_admin_download_fw;
905         c.dlfw.numd = cpu_to_le32(dlfw.length);
906         c.dlfw.offset = cpu_to_le32(dlfw.offset);
907         nvme_setup_prps(&c.common, sg, dlfw.length * 4);
908
909         status = nvme_submit_admin_cmd(dev, &c, NULL);
910         nvme_unmap_user_pages(dev, 0, dlfw.addr, dlfw.length * 4, sg, nents);
911         return status;
912 }
913
914 static int nvme_activate_firmware(struct nvme_ns *ns, unsigned long arg)
915 {
916         struct nvme_dev *dev = ns->dev;
917         struct nvme_command c;
918
919         memset(&c, 0, sizeof(c));
920         c.common.opcode = nvme_admin_activate_fw;
921         c.common.rsvd10[0] = cpu_to_le32(arg);
922
923         return nvme_submit_admin_cmd(dev, &c, NULL);
924 }
925
926 static int nvme_ioctl(struct block_device *bdev, fmode_t mode, unsigned int cmd,
927                                                         unsigned long arg)
928 {
929         struct nvme_ns *ns = bdev->bd_disk->private_data;
930
931         switch (cmd) {
932         case NVME_IOCTL_IDENTIFY_NS:
933                 return nvme_identify(ns, arg, 0);
934         case NVME_IOCTL_IDENTIFY_CTRL:
935                 return nvme_identify(ns, arg, 1);
936         case NVME_IOCTL_GET_RANGE_TYPE:
937                 return nvme_get_range_type(ns, arg);
938         case NVME_IOCTL_SUBMIT_IO:
939                 return nvme_submit_io(ns, (void __user *)arg);
940         case NVME_IOCTL_DOWNLOAD_FW:
941                 return nvme_download_firmware(ns, (void __user *)arg);
942         case NVME_IOCTL_ACTIVATE_FW:
943                 return nvme_activate_firmware(ns, arg);
944         default:
945                 return -ENOTTY;
946         }
947 }
948
949 static const struct block_device_operations nvme_fops = {
950         .owner          = THIS_MODULE,
951         .ioctl          = nvme_ioctl,
952 };
953
954 static struct nvme_ns *nvme_alloc_ns(struct nvme_dev *dev, int index,
955                         struct nvme_id_ns *id, struct nvme_lba_range_type *rt)
956 {
957         struct nvme_ns *ns;
958         struct gendisk *disk;
959         int lbaf;
960
961         if (rt->attributes & NVME_LBART_ATTRIB_HIDE)
962                 return NULL;
963
964         ns = kzalloc(sizeof(*ns), GFP_KERNEL);
965         if (!ns)
966                 return NULL;
967         ns->queue = blk_alloc_queue(GFP_KERNEL);
968         if (!ns->queue)
969                 goto out_free_ns;
970         ns->queue->queue_flags = QUEUE_FLAG_DEFAULT | QUEUE_FLAG_NOMERGES |
971                                 QUEUE_FLAG_NONROT | QUEUE_FLAG_DISCARD;
972         blk_queue_make_request(ns->queue, nvme_make_request);
973         ns->dev = dev;
974         ns->queue->queuedata = ns;
975
976         disk = alloc_disk(NVME_MINORS);
977         if (!disk)
978                 goto out_free_queue;
979         ns->ns_id = index;
980         ns->disk = disk;
981         lbaf = id->flbas & 0xf;
982         ns->lba_shift = id->lbaf[lbaf].ds;
983
984         disk->major = nvme_major;
985         disk->minors = NVME_MINORS;
986         disk->first_minor = NVME_MINORS * index;
987         disk->fops = &nvme_fops;
988         disk->private_data = ns;
989         disk->queue = ns->queue;
990         disk->driverfs_dev = &dev->pci_dev->dev;
991         sprintf(disk->disk_name, "nvme%dn%d", dev->instance, index);
992         set_capacity(disk, le64_to_cpup(&id->nsze) << (ns->lba_shift - 9));
993
994         return ns;
995
996  out_free_queue:
997         blk_cleanup_queue(ns->queue);
998  out_free_ns:
999         kfree(ns);
1000         return NULL;
1001 }
1002
1003 static void nvme_ns_free(struct nvme_ns *ns)
1004 {
1005         put_disk(ns->disk);
1006         blk_cleanup_queue(ns->queue);
1007         kfree(ns);
1008 }
1009
1010 static int set_queue_count(struct nvme_dev *dev, int count)
1011 {
1012         int status;
1013         u32 result;
1014         struct nvme_command c;
1015         u32 q_count = (count - 1) | ((count - 1) << 16);
1016
1017         memset(&c, 0, sizeof(c));
1018         c.features.opcode = nvme_admin_get_features;
1019         c.features.fid = cpu_to_le32(NVME_FEAT_NUM_QUEUES);
1020         c.features.dword11 = cpu_to_le32(q_count);
1021
1022         status = nvme_submit_admin_cmd(dev, &c, &result);
1023         if (status)
1024                 return -EIO;
1025         return min(result & 0xffff, result >> 16) + 1;
1026 }
1027
1028 static int __devinit nvme_setup_io_queues(struct nvme_dev *dev)
1029 {
1030         int result, cpu, i, nr_queues;
1031
1032         nr_queues = num_online_cpus();
1033         result = set_queue_count(dev, nr_queues);
1034         if (result < 0)
1035                 return result;
1036         if (result < nr_queues)
1037                 nr_queues = result;
1038
1039         /* Deregister the admin queue's interrupt */
1040         free_irq(dev->entry[0].vector, dev->queues[0]);
1041
1042         for (i = 0; i < nr_queues; i++)
1043                 dev->entry[i].entry = i;
1044         for (;;) {
1045                 result = pci_enable_msix(dev->pci_dev, dev->entry, nr_queues);
1046                 if (result == 0) {
1047                         break;
1048                 } else if (result > 0) {
1049                         nr_queues = result;
1050                         continue;
1051                 } else {
1052                         nr_queues = 1;
1053                         break;
1054                 }
1055         }
1056
1057         result = queue_request_irq(dev, dev->queues[0], "nvme admin");
1058         /* XXX: handle failure here */
1059
1060         cpu = cpumask_first(cpu_online_mask);
1061         for (i = 0; i < nr_queues; i++) {
1062                 irq_set_affinity_hint(dev->entry[i].vector, get_cpu_mask(cpu));
1063                 cpu = cpumask_next(cpu, cpu_online_mask);
1064         }
1065
1066         for (i = 0; i < nr_queues; i++) {
1067                 dev->queues[i + 1] = nvme_create_queue(dev, i + 1,
1068                                                         NVME_Q_DEPTH, i);
1069                 if (!dev->queues[i + 1])
1070                         return -ENOMEM;
1071                 dev->queue_count++;
1072         }
1073
1074         return 0;
1075 }
1076
1077 static void nvme_free_queues(struct nvme_dev *dev)
1078 {
1079         int i;
1080
1081         for (i = dev->queue_count - 1; i >= 0; i--)
1082                 nvme_free_queue(dev, i);
1083 }
1084
1085 static int __devinit nvme_dev_add(struct nvme_dev *dev)
1086 {
1087         int res, nn, i;
1088         struct nvme_ns *ns, *next;
1089         struct nvme_id_ctrl *ctrl;
1090         void *id;
1091         dma_addr_t dma_addr;
1092         struct nvme_command cid, crt;
1093
1094         res = nvme_setup_io_queues(dev);
1095         if (res)
1096                 return res;
1097
1098         /* XXX: Switch to a SG list once prp2 works */
1099         id = dma_alloc_coherent(&dev->pci_dev->dev, 8192, &dma_addr,
1100                                                                 GFP_KERNEL);
1101
1102         memset(&cid, 0, sizeof(cid));
1103         cid.identify.opcode = nvme_admin_identify;
1104         cid.identify.nsid = 0;
1105         cid.identify.prp1 = cpu_to_le64(dma_addr);
1106         cid.identify.cns = cpu_to_le32(1);
1107
1108         res = nvme_submit_admin_cmd(dev, &cid, NULL);
1109         if (res) {
1110                 res = -EIO;
1111                 goto out_free;
1112         }
1113
1114         ctrl = id;
1115         nn = le32_to_cpup(&ctrl->nn);
1116         memcpy(dev->serial, ctrl->sn, sizeof(ctrl->sn));
1117         memcpy(dev->model, ctrl->mn, sizeof(ctrl->mn));
1118         memcpy(dev->firmware_rev, ctrl->fr, sizeof(ctrl->fr));
1119
1120         cid.identify.cns = 0;
1121         memset(&crt, 0, sizeof(crt));
1122         crt.features.opcode = nvme_admin_get_features;
1123         crt.features.prp1 = cpu_to_le64(dma_addr + 4096);
1124         crt.features.fid = cpu_to_le32(NVME_FEAT_LBA_RANGE);
1125
1126         for (i = 0; i < nn; i++) {
1127                 cid.identify.nsid = cpu_to_le32(i);
1128                 res = nvme_submit_admin_cmd(dev, &cid, NULL);
1129                 if (res)
1130                         continue;
1131
1132                 if (((struct nvme_id_ns *)id)->ncap == 0)
1133                         continue;
1134
1135                 crt.features.nsid = cpu_to_le32(i);
1136                 res = nvme_submit_admin_cmd(dev, &crt, NULL);
1137                 if (res)
1138                         continue;
1139
1140                 ns = nvme_alloc_ns(dev, i, id, id + 4096);
1141                 if (ns)
1142                         list_add_tail(&ns->list, &dev->namespaces);
1143         }
1144         list_for_each_entry(ns, &dev->namespaces, list)
1145                 add_disk(ns->disk);
1146
1147         dma_free_coherent(&dev->pci_dev->dev, 4096, id, dma_addr);
1148         return 0;
1149
1150  out_free:
1151         list_for_each_entry_safe(ns, next, &dev->namespaces, list) {
1152                 list_del(&ns->list);
1153                 nvme_ns_free(ns);
1154         }
1155
1156         dma_free_coherent(&dev->pci_dev->dev, 4096, id, dma_addr);
1157         return res;
1158 }
1159
1160 static int nvme_dev_remove(struct nvme_dev *dev)
1161 {
1162         struct nvme_ns *ns, *next;
1163
1164         /* TODO: wait all I/O finished or cancel them */
1165
1166         list_for_each_entry_safe(ns, next, &dev->namespaces, list) {
1167                 list_del(&ns->list);
1168                 del_gendisk(ns->disk);
1169                 nvme_ns_free(ns);
1170         }
1171
1172         nvme_free_queues(dev);
1173
1174         return 0;
1175 }
1176
1177 /* XXX: Use an ida or something to let remove / add work correctly */
1178 static void nvme_set_instance(struct nvme_dev *dev)
1179 {
1180         static int instance;
1181         dev->instance = instance++;
1182 }
1183
1184 static void nvme_release_instance(struct nvme_dev *dev)
1185 {
1186 }
1187
1188 static int __devinit nvme_probe(struct pci_dev *pdev,
1189                                                 const struct pci_device_id *id)
1190 {
1191         int bars, result = -ENOMEM;
1192         struct nvme_dev *dev;
1193
1194         dev = kzalloc(sizeof(*dev), GFP_KERNEL);
1195         if (!dev)
1196                 return -ENOMEM;
1197         dev->entry = kcalloc(num_possible_cpus(), sizeof(*dev->entry),
1198                                                                 GFP_KERNEL);
1199         if (!dev->entry)
1200                 goto free;
1201         dev->queues = kcalloc(num_possible_cpus() + 1, sizeof(void *),
1202                                                                 GFP_KERNEL);
1203         if (!dev->queues)
1204                 goto free;
1205
1206         if (pci_enable_device_mem(pdev))
1207                 goto free;
1208         pci_set_master(pdev);
1209         bars = pci_select_bars(pdev, IORESOURCE_MEM);
1210         if (pci_request_selected_regions(pdev, bars, "nvme"))
1211                 goto disable;
1212
1213         INIT_LIST_HEAD(&dev->namespaces);
1214         dev->pci_dev = pdev;
1215         pci_set_drvdata(pdev, dev);
1216         dma_set_mask(&pdev->dev, DMA_BIT_MASK(64));
1217         dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(64));
1218         nvme_set_instance(dev);
1219         dev->entry[0].vector = pdev->irq;
1220
1221         dev->bar = ioremap(pci_resource_start(pdev, 0), 8192);
1222         if (!dev->bar) {
1223                 result = -ENOMEM;
1224                 goto disable_msix;
1225         }
1226
1227         result = nvme_configure_admin_queue(dev);
1228         if (result)
1229                 goto unmap;
1230         dev->queue_count++;
1231
1232         result = nvme_dev_add(dev);
1233         if (result)
1234                 goto delete;
1235         return 0;
1236
1237  delete:
1238         nvme_free_queues(dev);
1239  unmap:
1240         iounmap(dev->bar);
1241  disable_msix:
1242         pci_disable_msix(pdev);
1243         nvme_release_instance(dev);
1244  disable:
1245         pci_disable_device(pdev);
1246         pci_release_regions(pdev);
1247  free:
1248         kfree(dev->queues);
1249         kfree(dev->entry);
1250         kfree(dev);
1251         return result;
1252 }
1253
1254 static void __devexit nvme_remove(struct pci_dev *pdev)
1255 {
1256         struct nvme_dev *dev = pci_get_drvdata(pdev);
1257         nvme_dev_remove(dev);
1258         pci_disable_msix(pdev);
1259         iounmap(dev->bar);
1260         nvme_release_instance(dev);
1261         pci_disable_device(pdev);
1262         pci_release_regions(pdev);
1263         kfree(dev->queues);
1264         kfree(dev->entry);
1265         kfree(dev);
1266 }
1267
1268 /* These functions are yet to be implemented */
1269 #define nvme_error_detected NULL
1270 #define nvme_dump_registers NULL
1271 #define nvme_link_reset NULL
1272 #define nvme_slot_reset NULL
1273 #define nvme_error_resume NULL
1274 #define nvme_suspend NULL
1275 #define nvme_resume NULL
1276
1277 static struct pci_error_handlers nvme_err_handler = {
1278         .error_detected = nvme_error_detected,
1279         .mmio_enabled   = nvme_dump_registers,
1280         .link_reset     = nvme_link_reset,
1281         .slot_reset     = nvme_slot_reset,
1282         .resume         = nvme_error_resume,
1283 };
1284
1285 /* Move to pci_ids.h later */
1286 #define PCI_CLASS_STORAGE_EXPRESS       0x010802
1287
1288 static DEFINE_PCI_DEVICE_TABLE(nvme_id_table) = {
1289         { PCI_DEVICE_CLASS(PCI_CLASS_STORAGE_EXPRESS, 0xffffff) },
1290         { 0, }
1291 };
1292 MODULE_DEVICE_TABLE(pci, nvme_id_table);
1293
1294 static struct pci_driver nvme_driver = {
1295         .name           = "nvme",
1296         .id_table       = nvme_id_table,
1297         .probe          = nvme_probe,
1298         .remove         = __devexit_p(nvme_remove),
1299         .suspend        = nvme_suspend,
1300         .resume         = nvme_resume,
1301         .err_handler    = &nvme_err_handler,
1302 };
1303
1304 static int __init nvme_init(void)
1305 {
1306         int result;
1307
1308         nvme_major = register_blkdev(nvme_major, "nvme");
1309         if (nvme_major <= 0)
1310                 return -EBUSY;
1311
1312         result = pci_register_driver(&nvme_driver);
1313         if (!result)
1314                 return 0;
1315
1316         unregister_blkdev(nvme_major, "nvme");
1317         return result;
1318 }
1319
1320 static void __exit nvme_exit(void)
1321 {
1322         pci_unregister_driver(&nvme_driver);
1323         unregister_blkdev(nvme_major, "nvme");
1324 }
1325
1326 MODULE_AUTHOR("Matthew Wilcox <willy@linux.intel.com>");
1327 MODULE_LICENSE("GPL");
1328 MODULE_VERSION("0.2");
1329 module_init(nvme_init);
1330 module_exit(nvme_exit);