dm: treewide: Rename 'platdata' variables to just 'plat'
[platform/kernel/u-boot.git] / drivers / nvme / nvme.c
1 // SPDX-License-Identifier: GPL-2.0+
2 /*
3  * Copyright (C) 2017 NXP Semiconductors
4  * Copyright (C) 2017 Bin Meng <bmeng.cn@gmail.com>
5  */
6
7 #include <common.h>
8 #include <blk.h>
9 #include <cpu_func.h>
10 #include <dm.h>
11 #include <errno.h>
12 #include <log.h>
13 #include <malloc.h>
14 #include <memalign.h>
15 #include <pci.h>
16 #include <time.h>
17 #include <dm/device-internal.h>
18 #include <linux/compat.h>
19 #include "nvme.h"
20
21 #define NVME_Q_DEPTH            2
22 #define NVME_AQ_DEPTH           2
23 #define NVME_SQ_SIZE(depth)     (depth * sizeof(struct nvme_command))
24 #define NVME_CQ_SIZE(depth)     (depth * sizeof(struct nvme_completion))
25 #define ADMIN_TIMEOUT           60
26 #define IO_TIMEOUT              30
27 #define MAX_PRP_POOL            512
28
29 enum nvme_queue_id {
30         NVME_ADMIN_Q,
31         NVME_IO_Q,
32         NVME_Q_NUM,
33 };
34
35 /*
36  * An NVM Express queue. Each device has at least two (one for admin
37  * commands and one for I/O commands).
38  */
39 struct nvme_queue {
40         struct nvme_dev *dev;
41         struct nvme_command *sq_cmds;
42         struct nvme_completion *cqes;
43         wait_queue_head_t sq_full;
44         u32 __iomem *q_db;
45         u16 q_depth;
46         s16 cq_vector;
47         u16 sq_head;
48         u16 sq_tail;
49         u16 cq_head;
50         u16 qid;
51         u8 cq_phase;
52         u8 cqe_seen;
53         unsigned long cmdid_data[];
54 };
55
56 static int nvme_wait_ready(struct nvme_dev *dev, bool enabled)
57 {
58         u32 bit = enabled ? NVME_CSTS_RDY : 0;
59         int timeout;
60         ulong start;
61
62         /* Timeout field in the CAP register is in 500 millisecond units */
63         timeout = NVME_CAP_TIMEOUT(dev->cap) * 500;
64
65         start = get_timer(0);
66         while (get_timer(start) < timeout) {
67                 if ((readl(&dev->bar->csts) & NVME_CSTS_RDY) == bit)
68                         return 0;
69         }
70
71         return -ETIME;
72 }
73
74 static int nvme_setup_prps(struct nvme_dev *dev, u64 *prp2,
75                            int total_len, u64 dma_addr)
76 {
77         u32 page_size = dev->page_size;
78         int offset = dma_addr & (page_size - 1);
79         u64 *prp_pool;
80         int length = total_len;
81         int i, nprps;
82         u32 prps_per_page = (page_size >> 3) - 1;
83         u32 num_pages;
84
85         length -= (page_size - offset);
86
87         if (length <= 0) {
88                 *prp2 = 0;
89                 return 0;
90         }
91
92         if (length)
93                 dma_addr += (page_size - offset);
94
95         if (length <= page_size) {
96                 *prp2 = dma_addr;
97                 return 0;
98         }
99
100         nprps = DIV_ROUND_UP(length, page_size);
101         num_pages = DIV_ROUND_UP(nprps, prps_per_page);
102
103         if (nprps > dev->prp_entry_num) {
104                 free(dev->prp_pool);
105                 /*
106                  * Always increase in increments of pages.  It doesn't waste
107                  * much memory and reduces the number of allocations.
108                  */
109                 dev->prp_pool = memalign(page_size, num_pages * page_size);
110                 if (!dev->prp_pool) {
111                         printf("Error: malloc prp_pool fail\n");
112                         return -ENOMEM;
113                 }
114                 dev->prp_entry_num = prps_per_page * num_pages;
115         }
116
117         prp_pool = dev->prp_pool;
118         i = 0;
119         while (nprps) {
120                 if (i == ((page_size >> 3) - 1)) {
121                         *(prp_pool + i) = cpu_to_le64((ulong)prp_pool +
122                                         page_size);
123                         i = 0;
124                         prp_pool += page_size;
125                 }
126                 *(prp_pool + i++) = cpu_to_le64(dma_addr);
127                 dma_addr += page_size;
128                 nprps--;
129         }
130         *prp2 = (ulong)dev->prp_pool;
131
132         flush_dcache_range((ulong)dev->prp_pool, (ulong)dev->prp_pool +
133                            dev->prp_entry_num * sizeof(u64));
134
135         return 0;
136 }
137
138 static __le16 nvme_get_cmd_id(void)
139 {
140         static unsigned short cmdid;
141
142         return cpu_to_le16((cmdid < USHRT_MAX) ? cmdid++ : 0);
143 }
144
145 static u16 nvme_read_completion_status(struct nvme_queue *nvmeq, u16 index)
146 {
147         u64 start = (ulong)&nvmeq->cqes[index];
148         u64 stop = start + sizeof(struct nvme_completion);
149
150         invalidate_dcache_range(start, stop);
151
152         return le16_to_cpu(readw(&(nvmeq->cqes[index].status)));
153 }
154
155 /**
156  * nvme_submit_cmd() - copy a command into a queue and ring the doorbell
157  *
158  * @nvmeq:      The queue to use
159  * @cmd:        The command to send
160  */
161 static void nvme_submit_cmd(struct nvme_queue *nvmeq, struct nvme_command *cmd)
162 {
163         u16 tail = nvmeq->sq_tail;
164
165         memcpy(&nvmeq->sq_cmds[tail], cmd, sizeof(*cmd));
166         flush_dcache_range((ulong)&nvmeq->sq_cmds[tail],
167                            (ulong)&nvmeq->sq_cmds[tail] + sizeof(*cmd));
168
169         if (++tail == nvmeq->q_depth)
170                 tail = 0;
171         writel(tail, nvmeq->q_db);
172         nvmeq->sq_tail = tail;
173 }
174
175 static int nvme_submit_sync_cmd(struct nvme_queue *nvmeq,
176                                 struct nvme_command *cmd,
177                                 u32 *result, unsigned timeout)
178 {
179         u16 head = nvmeq->cq_head;
180         u16 phase = nvmeq->cq_phase;
181         u16 status;
182         ulong start_time;
183         ulong timeout_us = timeout * 100000;
184
185         cmd->common.command_id = nvme_get_cmd_id();
186         nvme_submit_cmd(nvmeq, cmd);
187
188         start_time = timer_get_us();
189
190         for (;;) {
191                 status = nvme_read_completion_status(nvmeq, head);
192                 if ((status & 0x01) == phase)
193                         break;
194                 if (timeout_us > 0 && (timer_get_us() - start_time)
195                     >= timeout_us)
196                         return -ETIMEDOUT;
197         }
198
199         status >>= 1;
200         if (status) {
201                 printf("ERROR: status = %x, phase = %d, head = %d\n",
202                        status, phase, head);
203                 status = 0;
204                 if (++head == nvmeq->q_depth) {
205                         head = 0;
206                         phase = !phase;
207                 }
208                 writel(head, nvmeq->q_db + nvmeq->dev->db_stride);
209                 nvmeq->cq_head = head;
210                 nvmeq->cq_phase = phase;
211
212                 return -EIO;
213         }
214
215         if (result)
216                 *result = le32_to_cpu(readl(&(nvmeq->cqes[head].result)));
217
218         if (++head == nvmeq->q_depth) {
219                 head = 0;
220                 phase = !phase;
221         }
222         writel(head, nvmeq->q_db + nvmeq->dev->db_stride);
223         nvmeq->cq_head = head;
224         nvmeq->cq_phase = phase;
225
226         return status;
227 }
228
229 static int nvme_submit_admin_cmd(struct nvme_dev *dev, struct nvme_command *cmd,
230                                  u32 *result)
231 {
232         return nvme_submit_sync_cmd(dev->queues[NVME_ADMIN_Q], cmd,
233                                     result, ADMIN_TIMEOUT);
234 }
235
236 static struct nvme_queue *nvme_alloc_queue(struct nvme_dev *dev,
237                                            int qid, int depth)
238 {
239         struct nvme_queue *nvmeq = malloc(sizeof(*nvmeq));
240         if (!nvmeq)
241                 return NULL;
242         memset(nvmeq, 0, sizeof(*nvmeq));
243
244         nvmeq->cqes = (void *)memalign(4096, NVME_CQ_SIZE(depth));
245         if (!nvmeq->cqes)
246                 goto free_nvmeq;
247         memset((void *)nvmeq->cqes, 0, NVME_CQ_SIZE(depth));
248
249         nvmeq->sq_cmds = (void *)memalign(4096, NVME_SQ_SIZE(depth));
250         if (!nvmeq->sq_cmds)
251                 goto free_queue;
252         memset((void *)nvmeq->sq_cmds, 0, NVME_SQ_SIZE(depth));
253
254         nvmeq->dev = dev;
255
256         nvmeq->cq_head = 0;
257         nvmeq->cq_phase = 1;
258         nvmeq->q_db = &dev->dbs[qid * 2 * dev->db_stride];
259         nvmeq->q_depth = depth;
260         nvmeq->qid = qid;
261         dev->queue_count++;
262         dev->queues[qid] = nvmeq;
263
264         return nvmeq;
265
266  free_queue:
267         free((void *)nvmeq->cqes);
268  free_nvmeq:
269         free(nvmeq);
270
271         return NULL;
272 }
273
274 static int nvme_delete_queue(struct nvme_dev *dev, u8 opcode, u16 id)
275 {
276         struct nvme_command c;
277
278         memset(&c, 0, sizeof(c));
279         c.delete_queue.opcode = opcode;
280         c.delete_queue.qid = cpu_to_le16(id);
281
282         return nvme_submit_admin_cmd(dev, &c, NULL);
283 }
284
285 static int nvme_delete_sq(struct nvme_dev *dev, u16 sqid)
286 {
287         return nvme_delete_queue(dev, nvme_admin_delete_sq, sqid);
288 }
289
290 static int nvme_delete_cq(struct nvme_dev *dev, u16 cqid)
291 {
292         return nvme_delete_queue(dev, nvme_admin_delete_cq, cqid);
293 }
294
295 static int nvme_enable_ctrl(struct nvme_dev *dev)
296 {
297         dev->ctrl_config &= ~NVME_CC_SHN_MASK;
298         dev->ctrl_config |= NVME_CC_ENABLE;
299         writel(cpu_to_le32(dev->ctrl_config), &dev->bar->cc);
300
301         return nvme_wait_ready(dev, true);
302 }
303
304 static int nvme_disable_ctrl(struct nvme_dev *dev)
305 {
306         dev->ctrl_config &= ~NVME_CC_SHN_MASK;
307         dev->ctrl_config &= ~NVME_CC_ENABLE;
308         writel(cpu_to_le32(dev->ctrl_config), &dev->bar->cc);
309
310         return nvme_wait_ready(dev, false);
311 }
312
313 static void nvme_free_queue(struct nvme_queue *nvmeq)
314 {
315         free((void *)nvmeq->cqes);
316         free(nvmeq->sq_cmds);
317         free(nvmeq);
318 }
319
320 static void nvme_free_queues(struct nvme_dev *dev, int lowest)
321 {
322         int i;
323
324         for (i = dev->queue_count - 1; i >= lowest; i--) {
325                 struct nvme_queue *nvmeq = dev->queues[i];
326                 dev->queue_count--;
327                 dev->queues[i] = NULL;
328                 nvme_free_queue(nvmeq);
329         }
330 }
331
332 static void nvme_init_queue(struct nvme_queue *nvmeq, u16 qid)
333 {
334         struct nvme_dev *dev = nvmeq->dev;
335
336         nvmeq->sq_tail = 0;
337         nvmeq->cq_head = 0;
338         nvmeq->cq_phase = 1;
339         nvmeq->q_db = &dev->dbs[qid * 2 * dev->db_stride];
340         memset((void *)nvmeq->cqes, 0, NVME_CQ_SIZE(nvmeq->q_depth));
341         flush_dcache_range((ulong)nvmeq->cqes,
342                            (ulong)nvmeq->cqes + NVME_CQ_SIZE(nvmeq->q_depth));
343         dev->online_queues++;
344 }
345
346 static int nvme_configure_admin_queue(struct nvme_dev *dev)
347 {
348         int result;
349         u32 aqa;
350         u64 cap = dev->cap;
351         struct nvme_queue *nvmeq;
352         /* most architectures use 4KB as the page size */
353         unsigned page_shift = 12;
354         unsigned dev_page_min = NVME_CAP_MPSMIN(cap) + 12;
355         unsigned dev_page_max = NVME_CAP_MPSMAX(cap) + 12;
356
357         if (page_shift < dev_page_min) {
358                 debug("Device minimum page size (%u) too large for host (%u)\n",
359                       1 << dev_page_min, 1 << page_shift);
360                 return -ENODEV;
361         }
362
363         if (page_shift > dev_page_max) {
364                 debug("Device maximum page size (%u) smaller than host (%u)\n",
365                       1 << dev_page_max, 1 << page_shift);
366                 page_shift = dev_page_max;
367         }
368
369         result = nvme_disable_ctrl(dev);
370         if (result < 0)
371                 return result;
372
373         nvmeq = dev->queues[NVME_ADMIN_Q];
374         if (!nvmeq) {
375                 nvmeq = nvme_alloc_queue(dev, 0, NVME_AQ_DEPTH);
376                 if (!nvmeq)
377                         return -ENOMEM;
378         }
379
380         aqa = nvmeq->q_depth - 1;
381         aqa |= aqa << 16;
382         aqa |= aqa << 16;
383
384         dev->page_size = 1 << page_shift;
385
386         dev->ctrl_config = NVME_CC_CSS_NVM;
387         dev->ctrl_config |= (page_shift - 12) << NVME_CC_MPS_SHIFT;
388         dev->ctrl_config |= NVME_CC_ARB_RR | NVME_CC_SHN_NONE;
389         dev->ctrl_config |= NVME_CC_IOSQES | NVME_CC_IOCQES;
390
391         writel(aqa, &dev->bar->aqa);
392         nvme_writeq((ulong)nvmeq->sq_cmds, &dev->bar->asq);
393         nvme_writeq((ulong)nvmeq->cqes, &dev->bar->acq);
394
395         result = nvme_enable_ctrl(dev);
396         if (result)
397                 goto free_nvmeq;
398
399         nvmeq->cq_vector = 0;
400
401         nvme_init_queue(dev->queues[NVME_ADMIN_Q], 0);
402
403         return result;
404
405  free_nvmeq:
406         nvme_free_queues(dev, 0);
407
408         return result;
409 }
410
411 static int nvme_alloc_cq(struct nvme_dev *dev, u16 qid,
412                             struct nvme_queue *nvmeq)
413 {
414         struct nvme_command c;
415         int flags = NVME_QUEUE_PHYS_CONTIG | NVME_CQ_IRQ_ENABLED;
416
417         memset(&c, 0, sizeof(c));
418         c.create_cq.opcode = nvme_admin_create_cq;
419         c.create_cq.prp1 = cpu_to_le64((ulong)nvmeq->cqes);
420         c.create_cq.cqid = cpu_to_le16(qid);
421         c.create_cq.qsize = cpu_to_le16(nvmeq->q_depth - 1);
422         c.create_cq.cq_flags = cpu_to_le16(flags);
423         c.create_cq.irq_vector = cpu_to_le16(nvmeq->cq_vector);
424
425         return nvme_submit_admin_cmd(dev, &c, NULL);
426 }
427
428 static int nvme_alloc_sq(struct nvme_dev *dev, u16 qid,
429                             struct nvme_queue *nvmeq)
430 {
431         struct nvme_command c;
432         int flags = NVME_QUEUE_PHYS_CONTIG | NVME_SQ_PRIO_MEDIUM;
433
434         memset(&c, 0, sizeof(c));
435         c.create_sq.opcode = nvme_admin_create_sq;
436         c.create_sq.prp1 = cpu_to_le64((ulong)nvmeq->sq_cmds);
437         c.create_sq.sqid = cpu_to_le16(qid);
438         c.create_sq.qsize = cpu_to_le16(nvmeq->q_depth - 1);
439         c.create_sq.sq_flags = cpu_to_le16(flags);
440         c.create_sq.cqid = cpu_to_le16(qid);
441
442         return nvme_submit_admin_cmd(dev, &c, NULL);
443 }
444
445 int nvme_identify(struct nvme_dev *dev, unsigned nsid,
446                   unsigned cns, dma_addr_t dma_addr)
447 {
448         struct nvme_command c;
449         u32 page_size = dev->page_size;
450         int offset = dma_addr & (page_size - 1);
451         int length = sizeof(struct nvme_id_ctrl);
452         int ret;
453
454         memset(&c, 0, sizeof(c));
455         c.identify.opcode = nvme_admin_identify;
456         c.identify.nsid = cpu_to_le32(nsid);
457         c.identify.prp1 = cpu_to_le64(dma_addr);
458
459         length -= (page_size - offset);
460         if (length <= 0) {
461                 c.identify.prp2 = 0;
462         } else {
463                 dma_addr += (page_size - offset);
464                 c.identify.prp2 = cpu_to_le64(dma_addr);
465         }
466
467         c.identify.cns = cpu_to_le32(cns);
468
469         invalidate_dcache_range(dma_addr,
470                                 dma_addr + sizeof(struct nvme_id_ctrl));
471
472         ret = nvme_submit_admin_cmd(dev, &c, NULL);
473         if (!ret)
474                 invalidate_dcache_range(dma_addr,
475                                         dma_addr + sizeof(struct nvme_id_ctrl));
476
477         return ret;
478 }
479
480 int nvme_get_features(struct nvme_dev *dev, unsigned fid, unsigned nsid,
481                       dma_addr_t dma_addr, u32 *result)
482 {
483         struct nvme_command c;
484
485         memset(&c, 0, sizeof(c));
486         c.features.opcode = nvme_admin_get_features;
487         c.features.nsid = cpu_to_le32(nsid);
488         c.features.prp1 = cpu_to_le64(dma_addr);
489         c.features.fid = cpu_to_le32(fid);
490
491         /*
492          * TODO: add cache invalidate operation when the size of
493          * the DMA buffer is known
494          */
495
496         return nvme_submit_admin_cmd(dev, &c, result);
497 }
498
499 int nvme_set_features(struct nvme_dev *dev, unsigned fid, unsigned dword11,
500                       dma_addr_t dma_addr, u32 *result)
501 {
502         struct nvme_command c;
503
504         memset(&c, 0, sizeof(c));
505         c.features.opcode = nvme_admin_set_features;
506         c.features.prp1 = cpu_to_le64(dma_addr);
507         c.features.fid = cpu_to_le32(fid);
508         c.features.dword11 = cpu_to_le32(dword11);
509
510         /*
511          * TODO: add cache flush operation when the size of
512          * the DMA buffer is known
513          */
514
515         return nvme_submit_admin_cmd(dev, &c, result);
516 }
517
518 static int nvme_create_queue(struct nvme_queue *nvmeq, int qid)
519 {
520         struct nvme_dev *dev = nvmeq->dev;
521         int result;
522
523         nvmeq->cq_vector = qid - 1;
524         result = nvme_alloc_cq(dev, qid, nvmeq);
525         if (result < 0)
526                 goto release_cq;
527
528         result = nvme_alloc_sq(dev, qid, nvmeq);
529         if (result < 0)
530                 goto release_sq;
531
532         nvme_init_queue(nvmeq, qid);
533
534         return result;
535
536  release_sq:
537         nvme_delete_sq(dev, qid);
538  release_cq:
539         nvme_delete_cq(dev, qid);
540
541         return result;
542 }
543
544 static int nvme_set_queue_count(struct nvme_dev *dev, int count)
545 {
546         int status;
547         u32 result;
548         u32 q_count = (count - 1) | ((count - 1) << 16);
549
550         status = nvme_set_features(dev, NVME_FEAT_NUM_QUEUES,
551                         q_count, 0, &result);
552
553         if (status < 0)
554                 return status;
555         if (status > 1)
556                 return 0;
557
558         return min(result & 0xffff, result >> 16) + 1;
559 }
560
561 static void nvme_create_io_queues(struct nvme_dev *dev)
562 {
563         unsigned int i;
564
565         for (i = dev->queue_count; i <= dev->max_qid; i++)
566                 if (!nvme_alloc_queue(dev, i, dev->q_depth))
567                         break;
568
569         for (i = dev->online_queues; i <= dev->queue_count - 1; i++)
570                 if (nvme_create_queue(dev->queues[i], i))
571                         break;
572 }
573
574 static int nvme_setup_io_queues(struct nvme_dev *dev)
575 {
576         int nr_io_queues;
577         int result;
578
579         nr_io_queues = 1;
580         result = nvme_set_queue_count(dev, nr_io_queues);
581         if (result <= 0)
582                 return result;
583
584         dev->max_qid = nr_io_queues;
585
586         /* Free previously allocated queues */
587         nvme_free_queues(dev, nr_io_queues + 1);
588         nvme_create_io_queues(dev);
589
590         return 0;
591 }
592
593 static int nvme_get_info_from_identify(struct nvme_dev *dev)
594 {
595         struct nvme_id_ctrl *ctrl;
596         int ret;
597         int shift = NVME_CAP_MPSMIN(dev->cap) + 12;
598
599         ctrl = memalign(dev->page_size, sizeof(struct nvme_id_ctrl));
600         if (!ctrl)
601                 return -ENOMEM;
602
603         ret = nvme_identify(dev, 0, 1, (dma_addr_t)(long)ctrl);
604         if (ret) {
605                 free(ctrl);
606                 return -EIO;
607         }
608
609         dev->nn = le32_to_cpu(ctrl->nn);
610         dev->vwc = ctrl->vwc;
611         memcpy(dev->serial, ctrl->sn, sizeof(ctrl->sn));
612         memcpy(dev->model, ctrl->mn, sizeof(ctrl->mn));
613         memcpy(dev->firmware_rev, ctrl->fr, sizeof(ctrl->fr));
614         if (ctrl->mdts)
615                 dev->max_transfer_shift = (ctrl->mdts + shift);
616         else {
617                 /*
618                  * Maximum Data Transfer Size (MDTS) field indicates the maximum
619                  * data transfer size between the host and the controller. The
620                  * host should not submit a command that exceeds this transfer
621                  * size. The value is in units of the minimum memory page size
622                  * and is reported as a power of two (2^n).
623                  *
624                  * The spec also says: a value of 0h indicates no restrictions
625                  * on transfer size. But in nvme_blk_read/write() below we have
626                  * the following algorithm for maximum number of logic blocks
627                  * per transfer:
628                  *
629                  * u16 lbas = 1 << (dev->max_transfer_shift - ns->lba_shift);
630                  *
631                  * In order for lbas not to overflow, the maximum number is 15
632                  * which means dev->max_transfer_shift = 15 + 9 (ns->lba_shift).
633                  * Let's use 20 which provides 1MB size.
634                  */
635                 dev->max_transfer_shift = 20;
636         }
637
638         free(ctrl);
639         return 0;
640 }
641
642 int nvme_get_namespace_id(struct udevice *udev, u32 *ns_id, u8 *eui64)
643 {
644         struct nvme_ns *ns = dev_get_priv(udev);
645
646         if (ns_id)
647                 *ns_id = ns->ns_id;
648         if (eui64)
649                 memcpy(eui64, ns->eui64, sizeof(ns->eui64));
650
651         return 0;
652 }
653
654 int nvme_scan_namespace(void)
655 {
656         struct uclass *uc;
657         struct udevice *dev;
658         int ret;
659
660         ret = uclass_get(UCLASS_NVME, &uc);
661         if (ret)
662                 return ret;
663
664         uclass_foreach_dev(dev, uc) {
665                 ret = device_probe(dev);
666                 if (ret)
667                         return ret;
668         }
669
670         return 0;
671 }
672
673 static int nvme_blk_probe(struct udevice *udev)
674 {
675         struct nvme_dev *ndev = dev_get_priv(udev->parent);
676         struct blk_desc *desc = dev_get_uclass_plat(udev);
677         struct nvme_ns *ns = dev_get_priv(udev);
678         u8 flbas;
679         struct pci_child_platdata *pplat;
680         struct nvme_id_ns *id;
681
682         id = memalign(ndev->page_size, sizeof(struct nvme_id_ns));
683         if (!id)
684                 return -ENOMEM;
685
686         memset(ns, 0, sizeof(*ns));
687         ns->dev = ndev;
688         /* extract the namespace id from the block device name */
689         ns->ns_id = trailing_strtol(udev->name) + 1;
690         if (nvme_identify(ndev, ns->ns_id, 0, (dma_addr_t)(long)id)) {
691                 free(id);
692                 return -EIO;
693         }
694
695         memcpy(&ns->eui64, &id->eui64, sizeof(id->eui64));
696         flbas = id->flbas & NVME_NS_FLBAS_LBA_MASK;
697         ns->flbas = flbas;
698         ns->lba_shift = id->lbaf[flbas].ds;
699         ns->mode_select_num_blocks = le64_to_cpu(id->nsze);
700         ns->mode_select_block_len = 1 << ns->lba_shift;
701         list_add(&ns->list, &ndev->namespaces);
702
703         desc->lba = ns->mode_select_num_blocks;
704         desc->log2blksz = ns->lba_shift;
705         desc->blksz = 1 << ns->lba_shift;
706         desc->bdev = udev;
707         pplat = dev_get_parent_plat(udev->parent);
708         sprintf(desc->vendor, "0x%.4x", pplat->vendor);
709         memcpy(desc->product, ndev->serial, sizeof(ndev->serial));
710         memcpy(desc->revision, ndev->firmware_rev, sizeof(ndev->firmware_rev));
711
712         free(id);
713         return 0;
714 }
715
716 static ulong nvme_blk_rw(struct udevice *udev, lbaint_t blknr,
717                          lbaint_t blkcnt, void *buffer, bool read)
718 {
719         struct nvme_ns *ns = dev_get_priv(udev);
720         struct nvme_dev *dev = ns->dev;
721         struct nvme_command c;
722         struct blk_desc *desc = dev_get_uclass_plat(udev);
723         int status;
724         u64 prp2;
725         u64 total_len = blkcnt << desc->log2blksz;
726         u64 temp_len = total_len;
727
728         u64 slba = blknr;
729         u16 lbas = 1 << (dev->max_transfer_shift - ns->lba_shift);
730         u64 total_lbas = blkcnt;
731
732         flush_dcache_range((unsigned long)buffer,
733                            (unsigned long)buffer + total_len);
734
735         c.rw.opcode = read ? nvme_cmd_read : nvme_cmd_write;
736         c.rw.flags = 0;
737         c.rw.nsid = cpu_to_le32(ns->ns_id);
738         c.rw.control = 0;
739         c.rw.dsmgmt = 0;
740         c.rw.reftag = 0;
741         c.rw.apptag = 0;
742         c.rw.appmask = 0;
743         c.rw.metadata = 0;
744
745         while (total_lbas) {
746                 if (total_lbas < lbas) {
747                         lbas = (u16)total_lbas;
748                         total_lbas = 0;
749                 } else {
750                         total_lbas -= lbas;
751                 }
752
753                 if (nvme_setup_prps(dev, &prp2,
754                                     lbas << ns->lba_shift, (ulong)buffer))
755                         return -EIO;
756                 c.rw.slba = cpu_to_le64(slba);
757                 slba += lbas;
758                 c.rw.length = cpu_to_le16(lbas - 1);
759                 c.rw.prp1 = cpu_to_le64((ulong)buffer);
760                 c.rw.prp2 = cpu_to_le64(prp2);
761                 status = nvme_submit_sync_cmd(dev->queues[NVME_IO_Q],
762                                 &c, NULL, IO_TIMEOUT);
763                 if (status)
764                         break;
765                 temp_len -= (u32)lbas << ns->lba_shift;
766                 buffer += lbas << ns->lba_shift;
767         }
768
769         if (read)
770                 invalidate_dcache_range((unsigned long)buffer,
771                                         (unsigned long)buffer + total_len);
772
773         return (total_len - temp_len) >> desc->log2blksz;
774 }
775
776 static ulong nvme_blk_read(struct udevice *udev, lbaint_t blknr,
777                            lbaint_t blkcnt, void *buffer)
778 {
779         return nvme_blk_rw(udev, blknr, blkcnt, buffer, true);
780 }
781
782 static ulong nvme_blk_write(struct udevice *udev, lbaint_t blknr,
783                             lbaint_t blkcnt, const void *buffer)
784 {
785         return nvme_blk_rw(udev, blknr, blkcnt, (void *)buffer, false);
786 }
787
788 static const struct blk_ops nvme_blk_ops = {
789         .read   = nvme_blk_read,
790         .write  = nvme_blk_write,
791 };
792
793 U_BOOT_DRIVER(nvme_blk) = {
794         .name   = "nvme-blk",
795         .id     = UCLASS_BLK,
796         .probe  = nvme_blk_probe,
797         .ops    = &nvme_blk_ops,
798         .priv_auto      = sizeof(struct nvme_ns),
799 };
800
801 static int nvme_bind(struct udevice *udev)
802 {
803         static int ndev_num;
804         char name[20];
805
806         sprintf(name, "nvme#%d", ndev_num++);
807
808         return device_set_name(udev, name);
809 }
810
811 static int nvme_probe(struct udevice *udev)
812 {
813         int ret;
814         struct nvme_dev *ndev = dev_get_priv(udev);
815
816         ndev->instance = trailing_strtol(udev->name);
817
818         INIT_LIST_HEAD(&ndev->namespaces);
819         ndev->bar = dm_pci_map_bar(udev, PCI_BASE_ADDRESS_0,
820                         PCI_REGION_MEM);
821         if (readl(&ndev->bar->csts) == -1) {
822                 ret = -ENODEV;
823                 printf("Error: %s: Out of memory!\n", udev->name);
824                 goto free_nvme;
825         }
826
827         ndev->queues = malloc(NVME_Q_NUM * sizeof(struct nvme_queue *));
828         if (!ndev->queues) {
829                 ret = -ENOMEM;
830                 printf("Error: %s: Out of memory!\n", udev->name);
831                 goto free_nvme;
832         }
833         memset(ndev->queues, 0, NVME_Q_NUM * sizeof(struct nvme_queue *));
834
835         ndev->cap = nvme_readq(&ndev->bar->cap);
836         ndev->q_depth = min_t(int, NVME_CAP_MQES(ndev->cap) + 1, NVME_Q_DEPTH);
837         ndev->db_stride = 1 << NVME_CAP_STRIDE(ndev->cap);
838         ndev->dbs = ((void __iomem *)ndev->bar) + 4096;
839
840         ret = nvme_configure_admin_queue(ndev);
841         if (ret)
842                 goto free_queue;
843
844         /* Allocate after the page size is known */
845         ndev->prp_pool = memalign(ndev->page_size, MAX_PRP_POOL);
846         if (!ndev->prp_pool) {
847                 ret = -ENOMEM;
848                 printf("Error: %s: Out of memory!\n", udev->name);
849                 goto free_nvme;
850         }
851         ndev->prp_entry_num = MAX_PRP_POOL >> 3;
852
853         ret = nvme_setup_io_queues(ndev);
854         if (ret)
855                 goto free_queue;
856
857         nvme_get_info_from_identify(ndev);
858
859         return 0;
860
861 free_queue:
862         free((void *)ndev->queues);
863 free_nvme:
864         return ret;
865 }
866
867 U_BOOT_DRIVER(nvme) = {
868         .name   = "nvme",
869         .id     = UCLASS_NVME,
870         .bind   = nvme_bind,
871         .probe  = nvme_probe,
872         .priv_auto      = sizeof(struct nvme_dev),
873 };
874
875 struct pci_device_id nvme_supported[] = {
876         { PCI_DEVICE_CLASS(PCI_CLASS_STORAGE_EXPRESS, ~0) },
877         {}
878 };
879
880 U_BOOT_PCI_DEVICE(nvme, nvme_supported);