efi_loader: move udevice pointer into struct efi_object
[platform/kernel/u-boot.git] / drivers / nvme / nvme.c
1 // SPDX-License-Identifier: GPL-2.0+
2 /*
3  * Copyright (C) 2017 NXP Semiconductors
4  * Copyright (C) 2017 Bin Meng <bmeng.cn@gmail.com>
5  */
6
7 #include <common.h>
8 #include <blk.h>
9 #include <cpu_func.h>
10 #include <dm.h>
11 #include <errno.h>
12 #include <log.h>
13 #include <malloc.h>
14 #include <memalign.h>
15 #include <time.h>
16 #include <dm/device-internal.h>
17 #include <linux/compat.h>
18 #include "nvme.h"
19
20 #define NVME_Q_DEPTH            2
21 #define NVME_AQ_DEPTH           2
22 #define NVME_SQ_SIZE(depth)     (depth * sizeof(struct nvme_command))
23 #define NVME_CQ_SIZE(depth)     (depth * sizeof(struct nvme_completion))
24 #define NVME_CQ_ALLOCATION      ALIGN(NVME_CQ_SIZE(NVME_Q_DEPTH), \
25                                       ARCH_DMA_MINALIGN)
26 #define ADMIN_TIMEOUT           60
27 #define IO_TIMEOUT              30
28 #define MAX_PRP_POOL            512
29
30 static int nvme_wait_ready(struct nvme_dev *dev, bool enabled)
31 {
32         u32 bit = enabled ? NVME_CSTS_RDY : 0;
33         int timeout;
34         ulong start;
35
36         /* Timeout field in the CAP register is in 500 millisecond units */
37         timeout = NVME_CAP_TIMEOUT(dev->cap) * 500;
38
39         start = get_timer(0);
40         while (get_timer(start) < timeout) {
41                 if ((readl(&dev->bar->csts) & NVME_CSTS_RDY) == bit)
42                         return 0;
43         }
44
45         return -ETIME;
46 }
47
48 static int nvme_setup_prps(struct nvme_dev *dev, u64 *prp2,
49                            int total_len, u64 dma_addr)
50 {
51         u32 page_size = dev->page_size;
52         int offset = dma_addr & (page_size - 1);
53         u64 *prp_pool;
54         int length = total_len;
55         int i, nprps;
56         u32 prps_per_page = page_size >> 3;
57         u32 num_pages;
58
59         length -= (page_size - offset);
60
61         if (length <= 0) {
62                 *prp2 = 0;
63                 return 0;
64         }
65
66         if (length)
67                 dma_addr += (page_size - offset);
68
69         if (length <= page_size) {
70                 *prp2 = dma_addr;
71                 return 0;
72         }
73
74         nprps = DIV_ROUND_UP(length, page_size);
75         num_pages = DIV_ROUND_UP(nprps, prps_per_page);
76
77         if (nprps > dev->prp_entry_num) {
78                 free(dev->prp_pool);
79                 /*
80                  * Always increase in increments of pages.  It doesn't waste
81                  * much memory and reduces the number of allocations.
82                  */
83                 dev->prp_pool = memalign(page_size, num_pages * page_size);
84                 if (!dev->prp_pool) {
85                         printf("Error: malloc prp_pool fail\n");
86                         return -ENOMEM;
87                 }
88                 dev->prp_entry_num = prps_per_page * num_pages;
89         }
90
91         prp_pool = dev->prp_pool;
92         i = 0;
93         while (nprps) {
94                 if (i == ((page_size >> 3) - 1)) {
95                         *(prp_pool + i) = cpu_to_le64((ulong)prp_pool +
96                                         page_size);
97                         i = 0;
98                         prp_pool += page_size;
99                 }
100                 *(prp_pool + i++) = cpu_to_le64(dma_addr);
101                 dma_addr += page_size;
102                 nprps--;
103         }
104         *prp2 = (ulong)dev->prp_pool;
105
106         flush_dcache_range((ulong)dev->prp_pool, (ulong)dev->prp_pool +
107                            dev->prp_entry_num * sizeof(u64));
108
109         return 0;
110 }
111
112 static __le16 nvme_get_cmd_id(void)
113 {
114         static unsigned short cmdid;
115
116         return cpu_to_le16((cmdid < USHRT_MAX) ? cmdid++ : 0);
117 }
118
119 static u16 nvme_read_completion_status(struct nvme_queue *nvmeq, u16 index)
120 {
121         /*
122          * Single CQ entries are always smaller than a cache line, so we
123          * can't invalidate them individually. However CQ entries are
124          * read only by the CPU, so it's safe to always invalidate all of them,
125          * as the cache line should never become dirty.
126          */
127         ulong start = (ulong)&nvmeq->cqes[0];
128         ulong stop = start + NVME_CQ_ALLOCATION;
129
130         invalidate_dcache_range(start, stop);
131
132         return readw(&(nvmeq->cqes[index].status));
133 }
134
135 /**
136  * nvme_submit_cmd() - copy a command into a queue and ring the doorbell
137  *
138  * @nvmeq:      The queue to use
139  * @cmd:        The command to send
140  */
141 static void nvme_submit_cmd(struct nvme_queue *nvmeq, struct nvme_command *cmd)
142 {
143         struct nvme_ops *ops;
144         u16 tail = nvmeq->sq_tail;
145
146         memcpy(&nvmeq->sq_cmds[tail], cmd, sizeof(*cmd));
147         flush_dcache_range((ulong)&nvmeq->sq_cmds[tail],
148                            (ulong)&nvmeq->sq_cmds[tail] + sizeof(*cmd));
149
150         ops = (struct nvme_ops *)nvmeq->dev->udev->driver->ops;
151         if (ops && ops->submit_cmd) {
152                 ops->submit_cmd(nvmeq, cmd);
153                 return;
154         }
155
156         if (++tail == nvmeq->q_depth)
157                 tail = 0;
158         writel(tail, nvmeq->q_db);
159         nvmeq->sq_tail = tail;
160 }
161
162 static int nvme_submit_sync_cmd(struct nvme_queue *nvmeq,
163                                 struct nvme_command *cmd,
164                                 u32 *result, unsigned timeout)
165 {
166         struct nvme_ops *ops;
167         u16 head = nvmeq->cq_head;
168         u16 phase = nvmeq->cq_phase;
169         u16 status;
170         ulong start_time;
171         ulong timeout_us = timeout * 100000;
172
173         cmd->common.command_id = nvme_get_cmd_id();
174         nvme_submit_cmd(nvmeq, cmd);
175
176         start_time = timer_get_us();
177
178         for (;;) {
179                 status = nvme_read_completion_status(nvmeq, head);
180                 if ((status & 0x01) == phase)
181                         break;
182                 if (timeout_us > 0 && (timer_get_us() - start_time)
183                     >= timeout_us)
184                         return -ETIMEDOUT;
185         }
186
187         ops = (struct nvme_ops *)nvmeq->dev->udev->driver->ops;
188         if (ops && ops->complete_cmd)
189                 ops->complete_cmd(nvmeq, cmd);
190
191         status >>= 1;
192         if (status) {
193                 printf("ERROR: status = %x, phase = %d, head = %d\n",
194                        status, phase, head);
195                 status = 0;
196                 if (++head == nvmeq->q_depth) {
197                         head = 0;
198                         phase = !phase;
199                 }
200                 writel(head, nvmeq->q_db + nvmeq->dev->db_stride);
201                 nvmeq->cq_head = head;
202                 nvmeq->cq_phase = phase;
203
204                 return -EIO;
205         }
206
207         if (result)
208                 *result = readl(&(nvmeq->cqes[head].result));
209
210         if (++head == nvmeq->q_depth) {
211                 head = 0;
212                 phase = !phase;
213         }
214         writel(head, nvmeq->q_db + nvmeq->dev->db_stride);
215         nvmeq->cq_head = head;
216         nvmeq->cq_phase = phase;
217
218         return status;
219 }
220
221 static int nvme_submit_admin_cmd(struct nvme_dev *dev, struct nvme_command *cmd,
222                                  u32 *result)
223 {
224         return nvme_submit_sync_cmd(dev->queues[NVME_ADMIN_Q], cmd,
225                                     result, ADMIN_TIMEOUT);
226 }
227
228 static struct nvme_queue *nvme_alloc_queue(struct nvme_dev *dev,
229                                            int qid, int depth)
230 {
231         struct nvme_ops *ops;
232         struct nvme_queue *nvmeq = malloc(sizeof(*nvmeq));
233         if (!nvmeq)
234                 return NULL;
235         memset(nvmeq, 0, sizeof(*nvmeq));
236
237         nvmeq->cqes = (void *)memalign(4096, NVME_CQ_ALLOCATION);
238         if (!nvmeq->cqes)
239                 goto free_nvmeq;
240         memset((void *)nvmeq->cqes, 0, NVME_CQ_SIZE(depth));
241
242         nvmeq->sq_cmds = (void *)memalign(4096, NVME_SQ_SIZE(depth));
243         if (!nvmeq->sq_cmds)
244                 goto free_queue;
245         memset((void *)nvmeq->sq_cmds, 0, NVME_SQ_SIZE(depth));
246
247         nvmeq->dev = dev;
248
249         nvmeq->cq_head = 0;
250         nvmeq->cq_phase = 1;
251         nvmeq->q_db = &dev->dbs[qid * 2 * dev->db_stride];
252         nvmeq->q_depth = depth;
253         nvmeq->qid = qid;
254         dev->queue_count++;
255         dev->queues[qid] = nvmeq;
256
257         ops = (struct nvme_ops *)dev->udev->driver->ops;
258         if (ops && ops->setup_queue)
259                 ops->setup_queue(nvmeq);
260
261         return nvmeq;
262
263  free_queue:
264         free((void *)nvmeq->cqes);
265  free_nvmeq:
266         free(nvmeq);
267
268         return NULL;
269 }
270
271 static int nvme_delete_queue(struct nvme_dev *dev, u8 opcode, u16 id)
272 {
273         struct nvme_command c;
274
275         memset(&c, 0, sizeof(c));
276         c.delete_queue.opcode = opcode;
277         c.delete_queue.qid = cpu_to_le16(id);
278
279         return nvme_submit_admin_cmd(dev, &c, NULL);
280 }
281
282 static int nvme_delete_sq(struct nvme_dev *dev, u16 sqid)
283 {
284         return nvme_delete_queue(dev, nvme_admin_delete_sq, sqid);
285 }
286
287 static int nvme_delete_cq(struct nvme_dev *dev, u16 cqid)
288 {
289         return nvme_delete_queue(dev, nvme_admin_delete_cq, cqid);
290 }
291
292 static int nvme_enable_ctrl(struct nvme_dev *dev)
293 {
294         dev->ctrl_config &= ~NVME_CC_SHN_MASK;
295         dev->ctrl_config |= NVME_CC_ENABLE;
296         writel(dev->ctrl_config, &dev->bar->cc);
297
298         return nvme_wait_ready(dev, true);
299 }
300
301 static int nvme_disable_ctrl(struct nvme_dev *dev)
302 {
303         dev->ctrl_config &= ~NVME_CC_SHN_MASK;
304         dev->ctrl_config &= ~NVME_CC_ENABLE;
305         writel(dev->ctrl_config, &dev->bar->cc);
306
307         return nvme_wait_ready(dev, false);
308 }
309
310 static void nvme_free_queue(struct nvme_queue *nvmeq)
311 {
312         free((void *)nvmeq->cqes);
313         free(nvmeq->sq_cmds);
314         free(nvmeq);
315 }
316
317 static void nvme_free_queues(struct nvme_dev *dev, int lowest)
318 {
319         int i;
320
321         for (i = dev->queue_count - 1; i >= lowest; i--) {
322                 struct nvme_queue *nvmeq = dev->queues[i];
323                 dev->queue_count--;
324                 dev->queues[i] = NULL;
325                 nvme_free_queue(nvmeq);
326         }
327 }
328
329 static void nvme_init_queue(struct nvme_queue *nvmeq, u16 qid)
330 {
331         struct nvme_dev *dev = nvmeq->dev;
332
333         nvmeq->sq_tail = 0;
334         nvmeq->cq_head = 0;
335         nvmeq->cq_phase = 1;
336         nvmeq->q_db = &dev->dbs[qid * 2 * dev->db_stride];
337         memset((void *)nvmeq->cqes, 0, NVME_CQ_SIZE(nvmeq->q_depth));
338         flush_dcache_range((ulong)nvmeq->cqes,
339                            (ulong)nvmeq->cqes + NVME_CQ_ALLOCATION);
340         dev->online_queues++;
341 }
342
343 static int nvme_configure_admin_queue(struct nvme_dev *dev)
344 {
345         int result;
346         u32 aqa;
347         u64 cap = dev->cap;
348         struct nvme_queue *nvmeq;
349         /* most architectures use 4KB as the page size */
350         unsigned page_shift = 12;
351         unsigned dev_page_min = NVME_CAP_MPSMIN(cap) + 12;
352         unsigned dev_page_max = NVME_CAP_MPSMAX(cap) + 12;
353
354         if (page_shift < dev_page_min) {
355                 debug("Device minimum page size (%u) too large for host (%u)\n",
356                       1 << dev_page_min, 1 << page_shift);
357                 return -ENODEV;
358         }
359
360         if (page_shift > dev_page_max) {
361                 debug("Device maximum page size (%u) smaller than host (%u)\n",
362                       1 << dev_page_max, 1 << page_shift);
363                 page_shift = dev_page_max;
364         }
365
366         result = nvme_disable_ctrl(dev);
367         if (result < 0)
368                 return result;
369
370         nvmeq = dev->queues[NVME_ADMIN_Q];
371         if (!nvmeq) {
372                 nvmeq = nvme_alloc_queue(dev, 0, NVME_AQ_DEPTH);
373                 if (!nvmeq)
374                         return -ENOMEM;
375         }
376
377         aqa = nvmeq->q_depth - 1;
378         aqa |= aqa << 16;
379
380         dev->page_size = 1 << page_shift;
381
382         dev->ctrl_config = NVME_CC_CSS_NVM;
383         dev->ctrl_config |= (page_shift - 12) << NVME_CC_MPS_SHIFT;
384         dev->ctrl_config |= NVME_CC_ARB_RR | NVME_CC_SHN_NONE;
385         dev->ctrl_config |= NVME_CC_IOSQES | NVME_CC_IOCQES;
386
387         writel(aqa, &dev->bar->aqa);
388         nvme_writeq((ulong)nvmeq->sq_cmds, &dev->bar->asq);
389         nvme_writeq((ulong)nvmeq->cqes, &dev->bar->acq);
390
391         result = nvme_enable_ctrl(dev);
392         if (result)
393                 goto free_nvmeq;
394
395         nvmeq->cq_vector = 0;
396
397         nvme_init_queue(dev->queues[NVME_ADMIN_Q], 0);
398
399         return result;
400
401  free_nvmeq:
402         nvme_free_queues(dev, 0);
403
404         return result;
405 }
406
407 static int nvme_alloc_cq(struct nvme_dev *dev, u16 qid,
408                             struct nvme_queue *nvmeq)
409 {
410         struct nvme_command c;
411         int flags = NVME_QUEUE_PHYS_CONTIG | NVME_CQ_IRQ_ENABLED;
412
413         memset(&c, 0, sizeof(c));
414         c.create_cq.opcode = nvme_admin_create_cq;
415         c.create_cq.prp1 = cpu_to_le64((ulong)nvmeq->cqes);
416         c.create_cq.cqid = cpu_to_le16(qid);
417         c.create_cq.qsize = cpu_to_le16(nvmeq->q_depth - 1);
418         c.create_cq.cq_flags = cpu_to_le16(flags);
419         c.create_cq.irq_vector = cpu_to_le16(nvmeq->cq_vector);
420
421         return nvme_submit_admin_cmd(dev, &c, NULL);
422 }
423
424 static int nvme_alloc_sq(struct nvme_dev *dev, u16 qid,
425                             struct nvme_queue *nvmeq)
426 {
427         struct nvme_command c;
428         int flags = NVME_QUEUE_PHYS_CONTIG | NVME_SQ_PRIO_MEDIUM;
429
430         memset(&c, 0, sizeof(c));
431         c.create_sq.opcode = nvme_admin_create_sq;
432         c.create_sq.prp1 = cpu_to_le64((ulong)nvmeq->sq_cmds);
433         c.create_sq.sqid = cpu_to_le16(qid);
434         c.create_sq.qsize = cpu_to_le16(nvmeq->q_depth - 1);
435         c.create_sq.sq_flags = cpu_to_le16(flags);
436         c.create_sq.cqid = cpu_to_le16(qid);
437
438         return nvme_submit_admin_cmd(dev, &c, NULL);
439 }
440
441 int nvme_identify(struct nvme_dev *dev, unsigned nsid,
442                   unsigned cns, dma_addr_t dma_addr)
443 {
444         struct nvme_command c;
445         u32 page_size = dev->page_size;
446         int offset = dma_addr & (page_size - 1);
447         int length = sizeof(struct nvme_id_ctrl);
448         int ret;
449
450         memset(&c, 0, sizeof(c));
451         c.identify.opcode = nvme_admin_identify;
452         c.identify.nsid = cpu_to_le32(nsid);
453         c.identify.prp1 = cpu_to_le64(dma_addr);
454
455         length -= (page_size - offset);
456         if (length <= 0) {
457                 c.identify.prp2 = 0;
458         } else {
459                 dma_addr += (page_size - offset);
460                 c.identify.prp2 = cpu_to_le64(dma_addr);
461         }
462
463         c.identify.cns = cpu_to_le32(cns);
464
465         invalidate_dcache_range(dma_addr,
466                                 dma_addr + sizeof(struct nvme_id_ctrl));
467
468         ret = nvme_submit_admin_cmd(dev, &c, NULL);
469         if (!ret)
470                 invalidate_dcache_range(dma_addr,
471                                         dma_addr + sizeof(struct nvme_id_ctrl));
472
473         return ret;
474 }
475
476 int nvme_get_features(struct nvme_dev *dev, unsigned fid, unsigned nsid,
477                       dma_addr_t dma_addr, u32 *result)
478 {
479         struct nvme_command c;
480         int ret;
481
482         memset(&c, 0, sizeof(c));
483         c.features.opcode = nvme_admin_get_features;
484         c.features.nsid = cpu_to_le32(nsid);
485         c.features.prp1 = cpu_to_le64(dma_addr);
486         c.features.fid = cpu_to_le32(fid);
487
488         ret = nvme_submit_admin_cmd(dev, &c, result);
489
490         /*
491          * TODO: Add some cache invalidation when a DMA buffer is involved
492          * in the request, here and before the command gets submitted. The
493          * buffer size varies by feature, also some features use a different
494          * field in the command packet to hold the buffer address.
495          * Section 5.21.1 (Set Features command) in the NVMe specification
496          * details the buffer requirements for each feature.
497          *
498          * At the moment there is no user of this function.
499          */
500
501         return ret;
502 }
503
504 int nvme_set_features(struct nvme_dev *dev, unsigned fid, unsigned dword11,
505                       dma_addr_t dma_addr, u32 *result)
506 {
507         struct nvme_command c;
508
509         memset(&c, 0, sizeof(c));
510         c.features.opcode = nvme_admin_set_features;
511         c.features.prp1 = cpu_to_le64(dma_addr);
512         c.features.fid = cpu_to_le32(fid);
513         c.features.dword11 = cpu_to_le32(dword11);
514
515         /*
516          * TODO: Add a cache clean (aka flush) operation when a DMA buffer is
517          * involved in the request. The buffer size varies by feature, also
518          * some features use a different field in the command packet to hold
519          * the buffer address. Section 5.21.1 (Set Features command) in the
520          * NVMe specification details the buffer requirements for each
521          * feature.
522          * At the moment the only user of this function is not using
523          * any DMA buffer at all.
524          */
525
526         return nvme_submit_admin_cmd(dev, &c, result);
527 }
528
529 static int nvme_create_queue(struct nvme_queue *nvmeq, int qid)
530 {
531         struct nvme_dev *dev = nvmeq->dev;
532         int result;
533
534         nvmeq->cq_vector = qid - 1;
535         result = nvme_alloc_cq(dev, qid, nvmeq);
536         if (result < 0)
537                 goto release_cq;
538
539         result = nvme_alloc_sq(dev, qid, nvmeq);
540         if (result < 0)
541                 goto release_sq;
542
543         nvme_init_queue(nvmeq, qid);
544
545         return result;
546
547  release_sq:
548         nvme_delete_sq(dev, qid);
549  release_cq:
550         nvme_delete_cq(dev, qid);
551
552         return result;
553 }
554
555 static int nvme_set_queue_count(struct nvme_dev *dev, int count)
556 {
557         int status;
558         u32 result;
559         u32 q_count = (count - 1) | ((count - 1) << 16);
560
561         status = nvme_set_features(dev, NVME_FEAT_NUM_QUEUES,
562                         q_count, 0, &result);
563
564         if (status < 0)
565                 return status;
566         if (status > 1)
567                 return 0;
568
569         return min(result & 0xffff, result >> 16) + 1;
570 }
571
572 static void nvme_create_io_queues(struct nvme_dev *dev)
573 {
574         unsigned int i;
575
576         for (i = dev->queue_count; i <= dev->max_qid; i++)
577                 if (!nvme_alloc_queue(dev, i, dev->q_depth))
578                         break;
579
580         for (i = dev->online_queues; i <= dev->queue_count - 1; i++)
581                 if (nvme_create_queue(dev->queues[i], i))
582                         break;
583 }
584
585 static int nvme_setup_io_queues(struct nvme_dev *dev)
586 {
587         int nr_io_queues;
588         int result;
589
590         nr_io_queues = 1;
591         result = nvme_set_queue_count(dev, nr_io_queues);
592         if (result <= 0)
593                 return result;
594
595         dev->max_qid = nr_io_queues;
596
597         /* Free previously allocated queues */
598         nvme_free_queues(dev, nr_io_queues + 1);
599         nvme_create_io_queues(dev);
600
601         return 0;
602 }
603
604 static int nvme_get_info_from_identify(struct nvme_dev *dev)
605 {
606         struct nvme_id_ctrl *ctrl;
607         int ret;
608         int shift = NVME_CAP_MPSMIN(dev->cap) + 12;
609
610         ctrl = memalign(dev->page_size, sizeof(struct nvme_id_ctrl));
611         if (!ctrl)
612                 return -ENOMEM;
613
614         ret = nvme_identify(dev, 0, 1, (dma_addr_t)(long)ctrl);
615         if (ret) {
616                 free(ctrl);
617                 return -EIO;
618         }
619
620         dev->nn = le32_to_cpu(ctrl->nn);
621         dev->vwc = ctrl->vwc;
622         memcpy(dev->serial, ctrl->sn, sizeof(ctrl->sn));
623         memcpy(dev->model, ctrl->mn, sizeof(ctrl->mn));
624         memcpy(dev->firmware_rev, ctrl->fr, sizeof(ctrl->fr));
625         if (ctrl->mdts)
626                 dev->max_transfer_shift = (ctrl->mdts + shift);
627         else {
628                 /*
629                  * Maximum Data Transfer Size (MDTS) field indicates the maximum
630                  * data transfer size between the host and the controller. The
631                  * host should not submit a command that exceeds this transfer
632                  * size. The value is in units of the minimum memory page size
633                  * and is reported as a power of two (2^n).
634                  *
635                  * The spec also says: a value of 0h indicates no restrictions
636                  * on transfer size. But in nvme_blk_read/write() below we have
637                  * the following algorithm for maximum number of logic blocks
638                  * per transfer:
639                  *
640                  * u16 lbas = 1 << (dev->max_transfer_shift - ns->lba_shift);
641                  *
642                  * In order for lbas not to overflow, the maximum number is 15
643                  * which means dev->max_transfer_shift = 15 + 9 (ns->lba_shift).
644                  * Let's use 20 which provides 1MB size.
645                  */
646                 dev->max_transfer_shift = 20;
647         }
648
649         free(ctrl);
650         return 0;
651 }
652
653 int nvme_get_namespace_id(struct udevice *udev, u32 *ns_id, u8 *eui64)
654 {
655         struct nvme_ns *ns = dev_get_priv(udev);
656
657         if (ns_id)
658                 *ns_id = ns->ns_id;
659         if (eui64)
660                 memcpy(eui64, ns->eui64, sizeof(ns->eui64));
661
662         return 0;
663 }
664
665 int nvme_scan_namespace(void)
666 {
667         struct uclass *uc;
668         struct udevice *dev;
669         int ret;
670
671         ret = uclass_get(UCLASS_NVME, &uc);
672         if (ret)
673                 return ret;
674
675         uclass_foreach_dev(dev, uc) {
676                 ret = device_probe(dev);
677                 if (ret)
678                         return ret;
679         }
680
681         return 0;
682 }
683
684 static int nvme_blk_probe(struct udevice *udev)
685 {
686         struct nvme_dev *ndev = dev_get_priv(udev->parent);
687         struct blk_desc *desc = dev_get_uclass_plat(udev);
688         struct nvme_ns *ns = dev_get_priv(udev);
689         u8 flbas;
690         struct nvme_id_ns *id;
691
692         id = memalign(ndev->page_size, sizeof(struct nvme_id_ns));
693         if (!id)
694                 return -ENOMEM;
695
696         ns->dev = ndev;
697         /* extract the namespace id from the block device name */
698         ns->ns_id = trailing_strtol(udev->name);
699         if (nvme_identify(ndev, ns->ns_id, 0, (dma_addr_t)(long)id)) {
700                 free(id);
701                 return -EIO;
702         }
703
704         memcpy(&ns->eui64, &id->eui64, sizeof(id->eui64));
705         flbas = id->flbas & NVME_NS_FLBAS_LBA_MASK;
706         ns->flbas = flbas;
707         ns->lba_shift = id->lbaf[flbas].ds;
708         list_add(&ns->list, &ndev->namespaces);
709
710         desc->lba = le64_to_cpu(id->nsze);
711         desc->log2blksz = ns->lba_shift;
712         desc->blksz = 1 << ns->lba_shift;
713         desc->bdev = udev;
714         memcpy(desc->vendor, ndev->vendor, sizeof(ndev->vendor));
715         memcpy(desc->product, ndev->serial, sizeof(ndev->serial));
716         memcpy(desc->revision, ndev->firmware_rev, sizeof(ndev->firmware_rev));
717
718         free(id);
719         return 0;
720 }
721
722 static ulong nvme_blk_rw(struct udevice *udev, lbaint_t blknr,
723                          lbaint_t blkcnt, void *buffer, bool read)
724 {
725         struct nvme_ns *ns = dev_get_priv(udev);
726         struct nvme_dev *dev = ns->dev;
727         struct nvme_command c;
728         struct blk_desc *desc = dev_get_uclass_plat(udev);
729         int status;
730         u64 prp2;
731         u64 total_len = blkcnt << desc->log2blksz;
732         u64 temp_len = total_len;
733         uintptr_t temp_buffer = (uintptr_t)buffer;
734
735         u64 slba = blknr;
736         u16 lbas = 1 << (dev->max_transfer_shift - ns->lba_shift);
737         u64 total_lbas = blkcnt;
738
739         flush_dcache_range((unsigned long)buffer,
740                            (unsigned long)buffer + total_len);
741
742         c.rw.opcode = read ? nvme_cmd_read : nvme_cmd_write;
743         c.rw.flags = 0;
744         c.rw.nsid = cpu_to_le32(ns->ns_id);
745         c.rw.control = 0;
746         c.rw.dsmgmt = 0;
747         c.rw.reftag = 0;
748         c.rw.apptag = 0;
749         c.rw.appmask = 0;
750         c.rw.metadata = 0;
751
752         while (total_lbas) {
753                 if (total_lbas < lbas) {
754                         lbas = (u16)total_lbas;
755                         total_lbas = 0;
756                 } else {
757                         total_lbas -= lbas;
758                 }
759
760                 if (nvme_setup_prps(dev, &prp2,
761                                     lbas << ns->lba_shift, temp_buffer))
762                         return -EIO;
763                 c.rw.slba = cpu_to_le64(slba);
764                 slba += lbas;
765                 c.rw.length = cpu_to_le16(lbas - 1);
766                 c.rw.prp1 = cpu_to_le64(temp_buffer);
767                 c.rw.prp2 = cpu_to_le64(prp2);
768                 status = nvme_submit_sync_cmd(dev->queues[NVME_IO_Q],
769                                 &c, NULL, IO_TIMEOUT);
770                 if (status)
771                         break;
772                 temp_len -= (u32)lbas << ns->lba_shift;
773                 temp_buffer += lbas << ns->lba_shift;
774         }
775
776         if (read)
777                 invalidate_dcache_range((unsigned long)buffer,
778                                         (unsigned long)buffer + total_len);
779
780         return (total_len - temp_len) >> desc->log2blksz;
781 }
782
783 static ulong nvme_blk_read(struct udevice *udev, lbaint_t blknr,
784                            lbaint_t blkcnt, void *buffer)
785 {
786         return nvme_blk_rw(udev, blknr, blkcnt, buffer, true);
787 }
788
789 static ulong nvme_blk_write(struct udevice *udev, lbaint_t blknr,
790                             lbaint_t blkcnt, const void *buffer)
791 {
792         return nvme_blk_rw(udev, blknr, blkcnt, (void *)buffer, false);
793 }
794
795 static const struct blk_ops nvme_blk_ops = {
796         .read   = nvme_blk_read,
797         .write  = nvme_blk_write,
798 };
799
800 U_BOOT_DRIVER(nvme_blk) = {
801         .name   = "nvme-blk",
802         .id     = UCLASS_BLK,
803         .probe  = nvme_blk_probe,
804         .ops    = &nvme_blk_ops,
805         .priv_auto      = sizeof(struct nvme_ns),
806 };
807
808 int nvme_init(struct udevice *udev)
809 {
810         struct nvme_dev *ndev = dev_get_priv(udev);
811         struct nvme_id_ns *id;
812         int ret;
813
814         ndev->udev = udev;
815         INIT_LIST_HEAD(&ndev->namespaces);
816         if (readl(&ndev->bar->csts) == -1) {
817                 ret = -ENODEV;
818                 printf("Error: %s: Out of memory!\n", udev->name);
819                 goto free_nvme;
820         }
821
822         ndev->queues = malloc(NVME_Q_NUM * sizeof(struct nvme_queue *));
823         if (!ndev->queues) {
824                 ret = -ENOMEM;
825                 printf("Error: %s: Out of memory!\n", udev->name);
826                 goto free_nvme;
827         }
828         memset(ndev->queues, 0, NVME_Q_NUM * sizeof(struct nvme_queue *));
829
830         ndev->cap = nvme_readq(&ndev->bar->cap);
831         ndev->q_depth = min_t(int, NVME_CAP_MQES(ndev->cap) + 1, NVME_Q_DEPTH);
832         ndev->db_stride = 1 << NVME_CAP_STRIDE(ndev->cap);
833         ndev->dbs = ((void __iomem *)ndev->bar) + 4096;
834
835         ret = nvme_configure_admin_queue(ndev);
836         if (ret)
837                 goto free_queue;
838
839         /* Allocate after the page size is known */
840         ndev->prp_pool = memalign(ndev->page_size, MAX_PRP_POOL);
841         if (!ndev->prp_pool) {
842                 ret = -ENOMEM;
843                 printf("Error: %s: Out of memory!\n", udev->name);
844                 goto free_nvme;
845         }
846         ndev->prp_entry_num = MAX_PRP_POOL >> 3;
847
848         ret = nvme_setup_io_queues(ndev);
849         if (ret)
850                 goto free_queue;
851
852         nvme_get_info_from_identify(ndev);
853
854         /* Create a blk device for each namespace */
855
856         id = memalign(ndev->page_size, sizeof(struct nvme_id_ns));
857         if (!id) {
858                 ret = -ENOMEM;
859                 goto free_queue;
860         }
861
862         for (int i = 1; i <= ndev->nn; i++) {
863                 struct udevice *ns_udev;
864                 char name[20];
865
866                 memset(id, 0, sizeof(*id));
867                 if (nvme_identify(ndev, i, 0, (dma_addr_t)(long)id)) {
868                         ret = -EIO;
869                         goto free_id;
870                 }
871
872                 /* skip inactive namespace */
873                 if (!id->nsze)
874                         continue;
875
876                 /*
877                  * Encode the namespace id to the device name so that
878                  * we can extract it when doing the probe.
879                  */
880                 sprintf(name, "blk#%d", i);
881
882                 /* The real blksz and size will be set by nvme_blk_probe() */
883                 ret = blk_create_devicef(udev, "nvme-blk", name, IF_TYPE_NVME,
884                                          -1, 512, 0, &ns_udev);
885                 if (ret)
886                         goto free_id;
887
888                 ret = blk_probe_or_unbind(ns_udev);
889                 if (ret)
890                         goto free_id;
891         }
892
893         free(id);
894         return 0;
895
896 free_id:
897         free(id);
898 free_queue:
899         free((void *)ndev->queues);
900 free_nvme:
901         return ret;
902 }
903
904 int nvme_shutdown(struct udevice *udev)
905 {
906         struct nvme_dev *ndev = dev_get_priv(udev);
907
908         return nvme_disable_ctrl(ndev);
909 }