Merge tag 'for-5.2/block-post-20190516' of git://git.kernel.dk/linux-block
authorLinus Torvalds <torvalds@linux-foundation.org>
Fri, 17 May 2019 02:08:15 +0000 (19:08 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Fri, 17 May 2019 02:08:15 +0000 (19:08 -0700)
Pull more block updates from Jens Axboe:
 "This is mainly some late lightnvm changes that came in just before the
  merge window, as well as fixes that have been queued up since the
  initial pull request was frozen.

  This contains:

   - lightnvm changes, fixing race conditions, improving memory
     utilization, and improving pblk compatability (Chansol, Igor,
     Marcin)

   - NVMe pull request with minor fixes all over the map (via Christoph)

   - remove redundant error print in sata_rcar (Geert)

   - struct_size() cleanup (Jackie)

   - dasd CONFIG_LBADF warning fix (Ming)

   - brd cond_resched() improvement (Mikulas)"

* tag 'for-5.2/block-post-20190516' of git://git.kernel.dk/linux-block: (41 commits)
  block/bio-integrity: use struct_size() in kmalloc()
  nvme: validate cntlid during controller initialisation
  nvme: change locking for the per-subsystem controller list
  nvme: trace all async notice events
  nvme: fix typos in nvme status code values
  nvme-fabrics: remove unused argument
  nvme-multipath: avoid crash on invalid subsystem cntlid enumeration
  nvme-fc: use separate work queue to avoid warning
  nvme-rdma: remove redundant reference between ib_device and tagset
  nvme-pci: mark expected switch fall-through
  nvme-pci: add known admin effects to augument admin effects log page
  nvme-pci: init shadow doorbell after each reset
  brd: add cond_resched to brd_free_pages
  sata_rcar: Remove ata_host_alloc() error printing
  s390/dasd: fix build warning in dasd_eckd_build_cp_raw
  lightnvm: pblk: use nvm_rq_to_ppa_list()
  lightnvm: pblk: simplify partial read path
  lightnvm: do not remove instance under global lock
  lightnvm: track inflight target creations
  lightnvm: pblk: recover only written metadata
  ...

25 files changed:
block/bio-integrity.c
drivers/ata/sata_rcar.c
drivers/block/brd.c
drivers/lightnvm/core.c
drivers/lightnvm/pblk-cache.c
drivers/lightnvm/pblk-core.c
drivers/lightnvm/pblk-gc.c
drivers/lightnvm/pblk-init.c
drivers/lightnvm/pblk-map.c
drivers/lightnvm/pblk-rb.c
drivers/lightnvm/pblk-read.c
drivers/lightnvm/pblk-recovery.c
drivers/lightnvm/pblk-write.c
drivers/lightnvm/pblk.h
drivers/nvme/host/core.c
drivers/nvme/host/fabrics.c
drivers/nvme/host/fc.c
drivers/nvme/host/lightnvm.c
drivers/nvme/host/multipath.c
drivers/nvme/host/pci.c
drivers/nvme/host/rdma.c
drivers/nvme/host/trace.h
drivers/s390/block/dasd_eckd.c
include/linux/lightnvm.h
include/linux/nvme.h

index 4253667..4db6208 100644 (file)
@@ -43,8 +43,7 @@ struct bio_integrity_payload *bio_integrity_alloc(struct bio *bio,
        unsigned inline_vecs;
 
        if (!bs || !mempool_initialized(&bs->bio_integrity_pool)) {
-               bip = kmalloc(sizeof(struct bio_integrity_payload) +
-                             sizeof(struct bio_vec) * nr_vecs, gfp_mask);
+               bip = kmalloc(struct_size(bip, bip_inline_vecs, nr_vecs), gfp_mask);
                inline_vecs = nr_vecs;
        } else {
                bip = mempool_alloc(&bs->bio_integrity_pool, gfp_mask);
index 59b2317..3495e17 100644 (file)
@@ -909,7 +909,6 @@ static int sata_rcar_probe(struct platform_device *pdev)
 
        host = ata_host_alloc(dev, 1);
        if (!host) {
-               dev_err(dev, "ata_host_alloc failed\n");
                ret = -ENOMEM;
                goto err_pm_put;
        }
index 17defbf..2da615b 100644 (file)
@@ -153,6 +153,12 @@ static void brd_free_pages(struct brd_device *brd)
                pos++;
 
                /*
+                * It takes 3.4 seconds to remove 80GiB ramdisk.
+                * So, we need cond_resched to avoid stalling the CPU.
+                */
+               cond_resched();
+
+               /*
                 * This assumes radix_tree_gang_lookup always returns as
                 * many pages as possible. If the radix-tree code changes,
                 * so will this have to.
index 5f82036..0df7454 100644 (file)
@@ -45,6 +45,8 @@ struct nvm_dev_map {
        int num_ch;
 };
 
+static void nvm_free(struct kref *ref);
+
 static struct nvm_target *nvm_find_target(struct nvm_dev *dev, const char *name)
 {
        struct nvm_target *tgt;
@@ -325,6 +327,7 @@ static int nvm_create_tgt(struct nvm_dev *dev, struct nvm_ioctl_create *create)
        struct nvm_target *t;
        struct nvm_tgt_dev *tgt_dev;
        void *targetdata;
+       unsigned int mdts;
        int ret;
 
        switch (create->conf.type) {
@@ -412,8 +415,12 @@ static int nvm_create_tgt(struct nvm_dev *dev, struct nvm_ioctl_create *create)
        tdisk->private_data = targetdata;
        tqueue->queuedata = targetdata;
 
-       blk_queue_max_hw_sectors(tqueue,
-                       (dev->geo.csecs >> 9) * NVM_MAX_VLBA);
+       mdts = (dev->geo.csecs >> 9) * NVM_MAX_VLBA;
+       if (dev->geo.mdts) {
+               mdts = min_t(u32, dev->geo.mdts,
+                               (dev->geo.csecs >> 9) * NVM_MAX_VLBA);
+       }
+       blk_queue_max_hw_sectors(tqueue, mdts);
 
        set_capacity(tdisk, tt->capacity(targetdata));
        add_disk(tdisk);
@@ -476,7 +483,6 @@ static void __nvm_remove_target(struct nvm_target *t, bool graceful)
 
 /**
  * nvm_remove_tgt - Removes a target from the media manager
- * @dev:       device
  * @remove:    ioctl structure with target name to remove.
  *
  * Returns:
@@ -484,18 +490,28 @@ static void __nvm_remove_target(struct nvm_target *t, bool graceful)
  * 1: on not found
  * <0: on error
  */
-static int nvm_remove_tgt(struct nvm_dev *dev, struct nvm_ioctl_remove *remove)
+static int nvm_remove_tgt(struct nvm_ioctl_remove *remove)
 {
        struct nvm_target *t;
+       struct nvm_dev *dev;
 
-       mutex_lock(&dev->mlock);
-       t = nvm_find_target(dev, remove->tgtname);
-       if (!t) {
+       down_read(&nvm_lock);
+       list_for_each_entry(dev, &nvm_devices, devices) {
+               mutex_lock(&dev->mlock);
+               t = nvm_find_target(dev, remove->tgtname);
+               if (t) {
+                       mutex_unlock(&dev->mlock);
+                       break;
+               }
                mutex_unlock(&dev->mlock);
-               return 1;
        }
+       up_read(&nvm_lock);
+
+       if (!t)
+               return 1;
+
        __nvm_remove_target(t, true);
-       mutex_unlock(&dev->mlock);
+       kref_put(&dev->ref, nvm_free);
 
        return 0;
 }
@@ -1089,15 +1105,16 @@ err_fmtype:
        return ret;
 }
 
-static void nvm_free(struct nvm_dev *dev)
+static void nvm_free(struct kref *ref)
 {
-       if (!dev)
-               return;
+       struct nvm_dev *dev = container_of(ref, struct nvm_dev, ref);
 
        if (dev->dma_pool)
                dev->ops->destroy_dma_pool(dev->dma_pool);
 
-       nvm_unregister_map(dev);
+       if (dev->rmap)
+               nvm_unregister_map(dev);
+
        kfree(dev->lun_map);
        kfree(dev);
 }
@@ -1134,7 +1151,13 @@ err:
 
 struct nvm_dev *nvm_alloc_dev(int node)
 {
-       return kzalloc_node(sizeof(struct nvm_dev), GFP_KERNEL, node);
+       struct nvm_dev *dev;
+
+       dev = kzalloc_node(sizeof(struct nvm_dev), GFP_KERNEL, node);
+       if (dev)
+               kref_init(&dev->ref);
+
+       return dev;
 }
 EXPORT_SYMBOL(nvm_alloc_dev);
 
@@ -1142,12 +1165,16 @@ int nvm_register(struct nvm_dev *dev)
 {
        int ret, exp_pool_size;
 
-       if (!dev->q || !dev->ops)
+       if (!dev->q || !dev->ops) {
+               kref_put(&dev->ref, nvm_free);
                return -EINVAL;
+       }
 
        ret = nvm_init(dev);
-       if (ret)
+       if (ret) {
+               kref_put(&dev->ref, nvm_free);
                return ret;
+       }
 
        exp_pool_size = max_t(int, PAGE_SIZE,
                              (NVM_MAX_VLBA * (sizeof(u64) + dev->geo.sos)));
@@ -1157,7 +1184,7 @@ int nvm_register(struct nvm_dev *dev)
                                                  exp_pool_size);
        if (!dev->dma_pool) {
                pr_err("nvm: could not create dma pool\n");
-               nvm_free(dev);
+               kref_put(&dev->ref, nvm_free);
                return -ENOMEM;
        }
 
@@ -1179,6 +1206,7 @@ void nvm_unregister(struct nvm_dev *dev)
                if (t->dev->parent != dev)
                        continue;
                __nvm_remove_target(t, false);
+               kref_put(&dev->ref, nvm_free);
        }
        mutex_unlock(&dev->mlock);
 
@@ -1186,13 +1214,14 @@ void nvm_unregister(struct nvm_dev *dev)
        list_del(&dev->devices);
        up_write(&nvm_lock);
 
-       nvm_free(dev);
+       kref_put(&dev->ref, nvm_free);
 }
 EXPORT_SYMBOL(nvm_unregister);
 
 static int __nvm_configure_create(struct nvm_ioctl_create *create)
 {
        struct nvm_dev *dev;
+       int ret;
 
        down_write(&nvm_lock);
        dev = nvm_find_nvm_dev(create->dev);
@@ -1203,7 +1232,12 @@ static int __nvm_configure_create(struct nvm_ioctl_create *create)
                return -EINVAL;
        }
 
-       return nvm_create_tgt(dev, create);
+       kref_get(&dev->ref);
+       ret = nvm_create_tgt(dev, create);
+       if (ret)
+               kref_put(&dev->ref, nvm_free);
+
+       return ret;
 }
 
 static long nvm_ioctl_info(struct file *file, void __user *arg)
@@ -1322,8 +1356,6 @@ static long nvm_ioctl_dev_create(struct file *file, void __user *arg)
 static long nvm_ioctl_dev_remove(struct file *file, void __user *arg)
 {
        struct nvm_ioctl_remove remove;
-       struct nvm_dev *dev;
-       int ret = 0;
 
        if (copy_from_user(&remove, arg, sizeof(struct nvm_ioctl_remove)))
                return -EFAULT;
@@ -1335,13 +1367,7 @@ static long nvm_ioctl_dev_remove(struct file *file, void __user *arg)
                return -EINVAL;
        }
 
-       list_for_each_entry(dev, &nvm_devices, devices) {
-               ret = nvm_remove_tgt(dev, &remove);
-               if (!ret)
-                       break;
-       }
-
-       return ret;
+       return nvm_remove_tgt(&remove);
 }
 
 /* kept for compatibility reasons */
index c9fa26f..5c1034c 100644 (file)
@@ -18,7 +18,8 @@
 
 #include "pblk.h"
 
-int pblk_write_to_cache(struct pblk *pblk, struct bio *bio, unsigned long flags)
+void pblk_write_to_cache(struct pblk *pblk, struct bio *bio,
+                               unsigned long flags)
 {
        struct request_queue *q = pblk->dev->q;
        struct pblk_w_ctx w_ctx;
@@ -43,6 +44,7 @@ retry:
                goto retry;
        case NVM_IO_ERR:
                pblk_pipeline_stop(pblk);
+               bio_io_error(bio);
                goto out;
        }
 
@@ -79,7 +81,9 @@ retry:
 out:
        generic_end_io_acct(q, REQ_OP_WRITE, &pblk->disk->part0, start_time);
        pblk_write_should_kick(pblk);
-       return ret;
+
+       if (ret == NVM_IO_DONE)
+               bio_endio(bio);
 }
 
 /*
index 6ca8688..7735378 100644 (file)
@@ -562,11 +562,9 @@ int pblk_submit_io_sync(struct pblk *pblk, struct nvm_rq *rqd)
 
 int pblk_submit_io_sync_sem(struct pblk *pblk, struct nvm_rq *rqd)
 {
-       struct ppa_addr *ppa_list;
+       struct ppa_addr *ppa_list = nvm_rq_to_ppa_list(rqd);
        int ret;
 
-       ppa_list = (rqd->nr_ppas > 1) ? rqd->ppa_list : &rqd->ppa_addr;
-
        pblk_down_chunk(pblk, ppa_list[0]);
        ret = pblk_submit_io_sync(pblk, rqd);
        pblk_up_chunk(pblk, ppa_list[0]);
@@ -725,6 +723,7 @@ int pblk_line_smeta_read(struct pblk *pblk, struct pblk_line *line)
        struct nvm_tgt_dev *dev = pblk->dev;
        struct pblk_line_meta *lm = &pblk->lm;
        struct bio *bio;
+       struct ppa_addr *ppa_list;
        struct nvm_rq rqd;
        u64 paddr = pblk_line_smeta_start(pblk, line);
        int i, ret;
@@ -748,9 +747,10 @@ int pblk_line_smeta_read(struct pblk *pblk, struct pblk_line *line)
        rqd.opcode = NVM_OP_PREAD;
        rqd.nr_ppas = lm->smeta_sec;
        rqd.is_seq = 1;
+       ppa_list = nvm_rq_to_ppa_list(&rqd);
 
        for (i = 0; i < lm->smeta_sec; i++, paddr++)
-               rqd.ppa_list[i] = addr_to_gen_ppa(pblk, paddr, line->id);
+               ppa_list[i] = addr_to_gen_ppa(pblk, paddr, line->id);
 
        ret = pblk_submit_io_sync(pblk, &rqd);
        if (ret) {
@@ -761,8 +761,10 @@ int pblk_line_smeta_read(struct pblk *pblk, struct pblk_line *line)
 
        atomic_dec(&pblk->inflight_io);
 
-       if (rqd.error)
+       if (rqd.error && rqd.error != NVM_RSP_WARN_HIGHECC) {
                pblk_log_read_err(pblk, &rqd);
+               ret = -EIO;
+       }
 
 clear_rqd:
        pblk_free_rqd_meta(pblk, &rqd);
@@ -775,6 +777,7 @@ static int pblk_line_smeta_write(struct pblk *pblk, struct pblk_line *line,
        struct nvm_tgt_dev *dev = pblk->dev;
        struct pblk_line_meta *lm = &pblk->lm;
        struct bio *bio;
+       struct ppa_addr *ppa_list;
        struct nvm_rq rqd;
        __le64 *lba_list = emeta_to_lbas(pblk, line->emeta->buf);
        __le64 addr_empty = cpu_to_le64(ADDR_EMPTY);
@@ -799,12 +802,13 @@ static int pblk_line_smeta_write(struct pblk *pblk, struct pblk_line *line,
        rqd.opcode = NVM_OP_PWRITE;
        rqd.nr_ppas = lm->smeta_sec;
        rqd.is_seq = 1;
+       ppa_list = nvm_rq_to_ppa_list(&rqd);
 
        for (i = 0; i < lm->smeta_sec; i++, paddr++) {
                struct pblk_sec_meta *meta = pblk_get_meta(pblk,
                                                           rqd.meta_list, i);
 
-               rqd.ppa_list[i] = addr_to_gen_ppa(pblk, paddr, line->id);
+               ppa_list[i] = addr_to_gen_ppa(pblk, paddr, line->id);
                meta->lba = lba_list[paddr] = addr_empty;
        }
 
@@ -834,8 +838,9 @@ int pblk_line_emeta_read(struct pblk *pblk, struct pblk_line *line,
        struct nvm_geo *geo = &dev->geo;
        struct pblk_line_mgmt *l_mg = &pblk->l_mg;
        struct pblk_line_meta *lm = &pblk->lm;
-       void *ppa_list, *meta_list;
+       void *ppa_list_buf, *meta_list;
        struct bio *bio;
+       struct ppa_addr *ppa_list;
        struct nvm_rq rqd;
        u64 paddr = line->emeta_ssec;
        dma_addr_t dma_ppa_list, dma_meta_list;
@@ -851,7 +856,7 @@ int pblk_line_emeta_read(struct pblk *pblk, struct pblk_line *line,
        if (!meta_list)
                return -ENOMEM;
 
-       ppa_list = meta_list + pblk_dma_meta_size(pblk);
+       ppa_list_buf = meta_list + pblk_dma_meta_size(pblk);
        dma_ppa_list = dma_meta_list + pblk_dma_meta_size(pblk);
 
 next_rq:
@@ -872,11 +877,12 @@ next_rq:
 
        rqd.bio = bio;
        rqd.meta_list = meta_list;
-       rqd.ppa_list = ppa_list;
+       rqd.ppa_list = ppa_list_buf;
        rqd.dma_meta_list = dma_meta_list;
        rqd.dma_ppa_list = dma_ppa_list;
        rqd.opcode = NVM_OP_PREAD;
        rqd.nr_ppas = rq_ppas;
+       ppa_list = nvm_rq_to_ppa_list(&rqd);
 
        for (i = 0; i < rqd.nr_ppas; ) {
                struct ppa_addr ppa = addr_to_gen_ppa(pblk, paddr, line_id);
@@ -904,7 +910,7 @@ next_rq:
                }
 
                for (j = 0; j < min; j++, i++, paddr++)
-                       rqd.ppa_list[i] = addr_to_gen_ppa(pblk, paddr, line_id);
+                       ppa_list[i] = addr_to_gen_ppa(pblk, paddr, line_id);
        }
 
        ret = pblk_submit_io_sync(pblk, &rqd);
@@ -916,8 +922,11 @@ next_rq:
 
        atomic_dec(&pblk->inflight_io);
 
-       if (rqd.error)
+       if (rqd.error && rqd.error != NVM_RSP_WARN_HIGHECC) {
                pblk_log_read_err(pblk, &rqd);
+               ret = -EIO;
+               goto free_rqd_dma;
+       }
 
        emeta_buf += rq_len;
        left_ppas -= rq_ppas;
@@ -1162,7 +1171,6 @@ static int pblk_line_init_bb(struct pblk *pblk, struct pblk_line *line,
        off = bit * geo->ws_opt;
        bitmap_set(line->map_bitmap, off, lm->smeta_sec);
        line->sec_in_line -= lm->smeta_sec;
-       line->smeta_ssec = off;
        line->cur_sec = off + lm->smeta_sec;
 
        if (init && pblk_line_smeta_write(pblk, line, off)) {
@@ -1521,11 +1529,9 @@ void pblk_ppa_to_line_put(struct pblk *pblk, struct ppa_addr ppa)
 
 void pblk_rq_to_line_put(struct pblk *pblk, struct nvm_rq *rqd)
 {
-       struct ppa_addr *ppa_list;
+       struct ppa_addr *ppa_list = nvm_rq_to_ppa_list(rqd);
        int i;
 
-       ppa_list = (rqd->nr_ppas > 1) ? rqd->ppa_list : &rqd->ppa_addr;
-
        for (i = 0; i < rqd->nr_ppas; i++)
                pblk_ppa_to_line_put(pblk, ppa_list[i]);
 }
@@ -1699,6 +1705,14 @@ static void __pblk_line_put(struct pblk *pblk, struct pblk_line *line)
 
        spin_lock(&line->lock);
        WARN_ON(line->state != PBLK_LINESTATE_GC);
+       if (line->w_err_gc->has_gc_err) {
+               spin_unlock(&line->lock);
+               pblk_err(pblk, "line %d had errors during GC\n", line->id);
+               pblk_put_line_back(pblk, line);
+               line->w_err_gc->has_gc_err = 0;
+               return;
+       }
+
        line->state = PBLK_LINESTATE_FREE;
        trace_pblk_line_state(pblk_disk_name(pblk), line->id,
                                        line->state);
@@ -2023,7 +2037,7 @@ void pblk_update_map(struct pblk *pblk, sector_t lba, struct ppa_addr ppa)
        struct ppa_addr ppa_l2p;
 
        /* logic error: lba out-of-bounds. Ignore update */
-       if (!(lba < pblk->rl.nr_secs)) {
+       if (!(lba < pblk->capacity)) {
                WARN(1, "pblk: corrupted L2P map request\n");
                return;
        }
@@ -2063,7 +2077,7 @@ int pblk_update_map_gc(struct pblk *pblk, sector_t lba, struct ppa_addr ppa_new,
 #endif
 
        /* logic error: lba out-of-bounds. Ignore update */
-       if (!(lba < pblk->rl.nr_secs)) {
+       if (!(lba < pblk->capacity)) {
                WARN(1, "pblk: corrupted L2P map request\n");
                return 0;
        }
@@ -2109,7 +2123,7 @@ void pblk_update_map_dev(struct pblk *pblk, sector_t lba,
        }
 
        /* logic error: lba out-of-bounds. Ignore update */
-       if (!(lba < pblk->rl.nr_secs)) {
+       if (!(lba < pblk->capacity)) {
                WARN(1, "pblk: corrupted L2P map request\n");
                return;
        }
@@ -2135,8 +2149,8 @@ out:
        spin_unlock(&pblk->trans_lock);
 }
 
-void pblk_lookup_l2p_seq(struct pblk *pblk, struct ppa_addr *ppas,
-                        sector_t blba, int nr_secs)
+int pblk_lookup_l2p_seq(struct pblk *pblk, struct ppa_addr *ppas,
+                        sector_t blba, int nr_secs, bool *from_cache)
 {
        int i;
 
@@ -2150,10 +2164,19 @@ void pblk_lookup_l2p_seq(struct pblk *pblk, struct ppa_addr *ppas,
                if (!pblk_ppa_empty(ppa) && !pblk_addr_in_cache(ppa)) {
                        struct pblk_line *line = pblk_ppa_to_line(pblk, ppa);
 
+                       if (i > 0 && *from_cache)
+                               break;
+                       *from_cache = false;
+
                        kref_get(&line->ref);
+               } else {
+                       if (i > 0 && !*from_cache)
+                               break;
+                       *from_cache = true;
                }
        }
        spin_unlock(&pblk->trans_lock);
+       return i;
 }
 
 void pblk_lookup_l2p_rand(struct pblk *pblk, struct ppa_addr *ppas,
@@ -2167,7 +2190,7 @@ void pblk_lookup_l2p_rand(struct pblk *pblk, struct ppa_addr *ppas,
                lba = lba_list[i];
                if (lba != ADDR_EMPTY) {
                        /* logic error: lba out-of-bounds. Ignore update */
-                       if (!(lba < pblk->rl.nr_secs)) {
+                       if (!(lba < pblk->capacity)) {
                                WARN(1, "pblk: corrupted L2P map request\n");
                                continue;
                        }
index 26a52ea..63ee205 100644 (file)
@@ -59,24 +59,28 @@ static void pblk_gc_writer_kick(struct pblk_gc *gc)
        wake_up_process(gc->gc_writer_ts);
 }
 
-static void pblk_put_line_back(struct pblk *pblk, struct pblk_line *line)
+void pblk_put_line_back(struct pblk *pblk, struct pblk_line *line)
 {
        struct pblk_line_mgmt *l_mg = &pblk->l_mg;
        struct list_head *move_list;
 
+       spin_lock(&l_mg->gc_lock);
        spin_lock(&line->lock);
        WARN_ON(line->state != PBLK_LINESTATE_GC);
        line->state = PBLK_LINESTATE_CLOSED;
        trace_pblk_line_state(pblk_disk_name(pblk), line->id,
                                        line->state);
+
+       /* We need to reset gc_group in order to ensure that
+        * pblk_line_gc_list will return proper move_list
+        * since right now current line is not on any of the
+        * gc lists.
+        */
+       line->gc_group = PBLK_LINEGC_NONE;
        move_list = pblk_line_gc_list(pblk, line);
        spin_unlock(&line->lock);
-
-       if (move_list) {
-               spin_lock(&l_mg->gc_lock);
-               list_add_tail(&line->list, move_list);
-               spin_unlock(&l_mg->gc_lock);
-       }
+       list_add_tail(&line->list, move_list);
+       spin_unlock(&l_mg->gc_lock);
 }
 
 static void pblk_gc_line_ws(struct work_struct *work)
@@ -84,8 +88,6 @@ static void pblk_gc_line_ws(struct work_struct *work)
        struct pblk_line_ws *gc_rq_ws = container_of(work,
                                                struct pblk_line_ws, ws);
        struct pblk *pblk = gc_rq_ws->pblk;
-       struct nvm_tgt_dev *dev = pblk->dev;
-       struct nvm_geo *geo = &dev->geo;
        struct pblk_gc *gc = &pblk->gc;
        struct pblk_line *line = gc_rq_ws->line;
        struct pblk_gc_rq *gc_rq = gc_rq_ws->priv;
@@ -93,18 +95,10 @@ static void pblk_gc_line_ws(struct work_struct *work)
 
        up(&gc->gc_sem);
 
-       gc_rq->data = vmalloc(array_size(gc_rq->nr_secs, geo->csecs));
-       if (!gc_rq->data) {
-               pblk_err(pblk, "could not GC line:%d (%d/%d)\n",
-                                       line->id, *line->vsc, gc_rq->nr_secs);
-               goto out;
-       }
-
        /* Read from GC victim block */
        ret = pblk_submit_read_gc(pblk, gc_rq);
        if (ret) {
-               pblk_err(pblk, "failed GC read in line:%d (err:%d)\n",
-                                                               line->id, ret);
+               line->w_err_gc->has_gc_err = 1;
                goto out;
        }
 
@@ -189,6 +183,8 @@ static void pblk_gc_line_prepare_ws(struct work_struct *work)
        struct pblk_line *line = line_ws->line;
        struct pblk_line_mgmt *l_mg = &pblk->l_mg;
        struct pblk_line_meta *lm = &pblk->lm;
+       struct nvm_tgt_dev *dev = pblk->dev;
+       struct nvm_geo *geo = &dev->geo;
        struct pblk_gc *gc = &pblk->gc;
        struct pblk_line_ws *gc_rq_ws;
        struct pblk_gc_rq *gc_rq;
@@ -247,9 +243,13 @@ next_rq:
        gc_rq->nr_secs = nr_secs;
        gc_rq->line = line;
 
+       gc_rq->data = vmalloc(array_size(gc_rq->nr_secs, geo->csecs));
+       if (!gc_rq->data)
+               goto fail_free_gc_rq;
+
        gc_rq_ws = kmalloc(sizeof(struct pblk_line_ws), GFP_KERNEL);
        if (!gc_rq_ws)
-               goto fail_free_gc_rq;
+               goto fail_free_gc_data;
 
        gc_rq_ws->pblk = pblk;
        gc_rq_ws->line = line;
@@ -281,6 +281,8 @@ out:
 
        return;
 
+fail_free_gc_data:
+       vfree(gc_rq->data);
 fail_free_gc_rq:
        kfree(gc_rq);
 fail_free_lba_list:
@@ -290,8 +292,11 @@ fail_free_invalid_bitmap:
 fail_free_ws:
        kfree(line_ws);
 
+       /* Line goes back to closed state, so we cannot release additional
+        * reference for line, since we do that only when we want to do
+        * gc to free line state transition.
+        */
        pblk_put_line_back(pblk, line);
-       kref_put(&line->ref, pblk_line_put);
        atomic_dec(&gc->read_inflight_gc);
 
        pblk_err(pblk, "failed to GC line %d\n", line->id);
@@ -355,8 +360,13 @@ static int pblk_gc_read(struct pblk *pblk)
 
        pblk_gc_kick(pblk);
 
-       if (pblk_gc_line(pblk, line))
+       if (pblk_gc_line(pblk, line)) {
                pblk_err(pblk, "failed to GC line %d\n", line->id);
+               /* rollback */
+               spin_lock(&gc->r_lock);
+               list_add_tail(&line->list, &gc->r_list);
+               spin_unlock(&gc->r_lock);
+       }
 
        return 0;
 }
index 8b643d0..b351c7f 100644 (file)
@@ -47,33 +47,6 @@ static struct pblk_global_caches pblk_caches = {
 
 struct bio_set pblk_bio_set;
 
-static int pblk_rw_io(struct request_queue *q, struct pblk *pblk,
-                         struct bio *bio)
-{
-       int ret;
-
-       /* Read requests must be <= 256kb due to NVMe's 64 bit completion bitmap
-        * constraint. Writes can be of arbitrary size.
-        */
-       if (bio_data_dir(bio) == READ) {
-               blk_queue_split(q, &bio);
-               ret = pblk_submit_read(pblk, bio);
-               if (ret == NVM_IO_DONE && bio_flagged(bio, BIO_CLONED))
-                       bio_put(bio);
-
-               return ret;
-       }
-
-       /* Prevent deadlock in the case of a modest LUN configuration and large
-        * user I/Os. Unless stalled, the rate limiter leaves at least 256KB
-        * available for user I/O.
-        */
-       if (pblk_get_secs(bio) > pblk_rl_max_io(&pblk->rl))
-               blk_queue_split(q, &bio);
-
-       return pblk_write_to_cache(pblk, bio, PBLK_IOTYPE_USER);
-}
-
 static blk_qc_t pblk_make_rq(struct request_queue *q, struct bio *bio)
 {
        struct pblk *pblk = q->queuedata;
@@ -86,13 +59,21 @@ static blk_qc_t pblk_make_rq(struct request_queue *q, struct bio *bio)
                }
        }
 
-       switch (pblk_rw_io(q, pblk, bio)) {
-       case NVM_IO_ERR:
-               bio_io_error(bio);
-               break;
-       case NVM_IO_DONE:
-               bio_endio(bio);
-               break;
+       /* Read requests must be <= 256kb due to NVMe's 64 bit completion bitmap
+        * constraint. Writes can be of arbitrary size.
+        */
+       if (bio_data_dir(bio) == READ) {
+               blk_queue_split(q, &bio);
+               pblk_submit_read(pblk, bio);
+       } else {
+               /* Prevent deadlock in the case of a modest LUN configuration
+                * and large user I/Os. Unless stalled, the rate limiter
+                * leaves at least 256KB available for user I/O.
+                */
+               if (pblk_get_secs(bio) > pblk_rl_max_io(&pblk->rl))
+                       blk_queue_split(q, &bio);
+
+               pblk_write_to_cache(pblk, bio, PBLK_IOTYPE_USER);
        }
 
        return BLK_QC_T_NONE;
@@ -105,7 +86,7 @@ static size_t pblk_trans_map_size(struct pblk *pblk)
        if (pblk->addrf_len < 32)
                entry_size = 4;
 
-       return entry_size * pblk->rl.nr_secs;
+       return entry_size * pblk->capacity;
 }
 
 #ifdef CONFIG_NVM_PBLK_DEBUG
@@ -164,13 +145,18 @@ static int pblk_l2p_init(struct pblk *pblk, bool factory_init)
        int ret = 0;
 
        map_size = pblk_trans_map_size(pblk);
-       pblk->trans_map = vmalloc(map_size);
-       if (!pblk->trans_map)
+       pblk->trans_map = __vmalloc(map_size, GFP_KERNEL | __GFP_NOWARN
+                                       | __GFP_RETRY_MAYFAIL | __GFP_HIGHMEM,
+                                       PAGE_KERNEL);
+       if (!pblk->trans_map) {
+               pblk_err(pblk, "failed to allocate L2P (need %zu of memory)\n",
+                               map_size);
                return -ENOMEM;
+       }
 
        pblk_ppa_set_empty(&ppa);
 
-       for (i = 0; i < pblk->rl.nr_secs; i++)
+       for (i = 0; i < pblk->capacity; i++)
                pblk_trans_map_set(pblk, i, ppa);
 
        ret = pblk_l2p_recover(pblk, factory_init);
@@ -701,7 +687,6 @@ static int pblk_set_provision(struct pblk *pblk, int nr_free_chks)
         * on user capacity consider only provisioned blocks
         */
        pblk->rl.total_blocks = nr_free_chks;
-       pblk->rl.nr_secs = nr_free_chks * geo->clba;
 
        /* Consider sectors used for metadata */
        sec_meta = (lm->smeta_sec + lm->emeta_sec[0]) * l_mg->nr_free_lines;
@@ -1284,7 +1269,7 @@ static void *pblk_init(struct nvm_tgt_dev *dev, struct gendisk *tdisk,
 
        pblk_info(pblk, "luns:%u, lines:%d, secs:%llu, buf entries:%u\n",
                        geo->all_luns, pblk->l_mg.nr_lines,
-                       (unsigned long long)pblk->rl.nr_secs,
+                       (unsigned long long)pblk->capacity,
                        pblk->rwb.nr_entries);
 
        wake_up_process(pblk->writer_ts);
index 7fbc99b..5408e32 100644 (file)
@@ -162,6 +162,7 @@ int pblk_map_erase_rq(struct pblk *pblk, struct nvm_rq *rqd,
 
                        *erase_ppa = ppa_list[i];
                        erase_ppa->a.blk = e_line->id;
+                       erase_ppa->a.reserved = 0;
 
                        spin_unlock(&e_line->lock);
 
index 03c241b..5abb170 100644 (file)
@@ -642,7 +642,7 @@ try:
  * be directed to disk.
  */
 int pblk_rb_copy_to_bio(struct pblk_rb *rb, struct bio *bio, sector_t lba,
-                       struct ppa_addr ppa, int bio_iter, bool advanced_bio)
+                       struct ppa_addr ppa)
 {
        struct pblk *pblk = container_of(rb, struct pblk, rwb);
        struct pblk_rb_entry *entry;
@@ -673,15 +673,6 @@ int pblk_rb_copy_to_bio(struct pblk_rb *rb, struct bio *bio, sector_t lba,
                ret = 0;
                goto out;
        }
-
-       /* Only advance the bio if it hasn't been advanced already. If advanced,
-        * this bio is at least a partial bio (i.e., it has partially been
-        * filled with data from the cache). If part of the data resides on the
-        * media, we will read later on
-        */
-       if (unlikely(!advanced_bio))
-               bio_advance(bio, bio_iter * PBLK_EXPOSED_PAGE_SIZE);
-
        data = bio_data(bio);
        memcpy(data, entry->data, rb->seg_size);
 
@@ -799,8 +790,8 @@ int pblk_rb_tear_down_check(struct pblk_rb *rb)
        }
 
 out:
-       spin_unlock(&rb->w_lock);
        spin_unlock_irq(&rb->s_lock);
+       spin_unlock(&rb->w_lock);
 
        return ret;
 }
index 0b7d5fb..d98ea39 100644 (file)
@@ -26,8 +26,7 @@
  * issued.
  */
 static int pblk_read_from_cache(struct pblk *pblk, struct bio *bio,
-                               sector_t lba, struct ppa_addr ppa,
-                               int bio_iter, bool advanced_bio)
+                               sector_t lba, struct ppa_addr ppa)
 {
 #ifdef CONFIG_NVM_PBLK_DEBUG
        /* Callers must ensure that the ppa points to a cache address */
@@ -35,73 +34,75 @@ static int pblk_read_from_cache(struct pblk *pblk, struct bio *bio,
        BUG_ON(!pblk_addr_in_cache(ppa));
 #endif
 
-       return pblk_rb_copy_to_bio(&pblk->rwb, bio, lba, ppa,
-                                               bio_iter, advanced_bio);
+       return pblk_rb_copy_to_bio(&pblk->rwb, bio, lba, ppa);
 }
 
-static void pblk_read_ppalist_rq(struct pblk *pblk, struct nvm_rq *rqd,
+static int pblk_read_ppalist_rq(struct pblk *pblk, struct nvm_rq *rqd,
                                 struct bio *bio, sector_t blba,
-                                unsigned long *read_bitmap)
+                                bool *from_cache)
 {
        void *meta_list = rqd->meta_list;
-       struct ppa_addr ppas[NVM_MAX_VLBA];
-       int nr_secs = rqd->nr_ppas;
-       bool advanced_bio = false;
-       int i, j = 0;
+       int nr_secs, i;
 
-       pblk_lookup_l2p_seq(pblk, ppas, blba, nr_secs);
+retry:
+       nr_secs = pblk_lookup_l2p_seq(pblk, rqd->ppa_list, blba, rqd->nr_ppas,
+                                       from_cache);
+
+       if (!*from_cache)
+               goto end;
 
        for (i = 0; i < nr_secs; i++) {
-               struct ppa_addr p = ppas[i];
                struct pblk_sec_meta *meta = pblk_get_meta(pblk, meta_list, i);
                sector_t lba = blba + i;
 
-retry:
-               if (pblk_ppa_empty(p)) {
+               if (pblk_ppa_empty(rqd->ppa_list[i])) {
                        __le64 addr_empty = cpu_to_le64(ADDR_EMPTY);
 
-                       WARN_ON(test_and_set_bit(i, read_bitmap));
                        meta->lba = addr_empty;
-
-                       if (unlikely(!advanced_bio)) {
-                               bio_advance(bio, (i) * PBLK_EXPOSED_PAGE_SIZE);
-                               advanced_bio = true;
+               } else if (pblk_addr_in_cache(rqd->ppa_list[i])) {
+                       /*
+                        * Try to read from write buffer. The address is later
+                        * checked on the write buffer to prevent retrieving
+                        * overwritten data.
+                        */
+                       if (!pblk_read_from_cache(pblk, bio, lba,
+                                                       rqd->ppa_list[i])) {
+                               if (i == 0) {
+                                       /*
+                                        * We didn't call with bio_advance()
+                                        * yet, so we can just retry.
+                                        */
+                                       goto retry;
+                               } else {
+                                       /*
+                                        * We already call bio_advance()
+                                        * so we cannot retry and we need
+                                        * to quit that function in order
+                                        * to allow caller to handle the bio
+                                        * splitting in the current sector
+                                        * position.
+                                        */
+                                       nr_secs = i;
+                                       goto end;
+                               }
                        }
-
-                       goto next;
-               }
-
-               /* Try to read from write buffer. The address is later checked
-                * on the write buffer to prevent retrieving overwritten data.
-                */
-               if (pblk_addr_in_cache(p)) {
-                       if (!pblk_read_from_cache(pblk, bio, lba, p, i,
-                                                               advanced_bio)) {
-                               pblk_lookup_l2p_seq(pblk, &p, lba, 1);
-                               goto retry;
-                       }
-                       WARN_ON(test_and_set_bit(i, read_bitmap));
                        meta->lba = cpu_to_le64(lba);
-                       advanced_bio = true;
 #ifdef CONFIG_NVM_PBLK_DEBUG
                        atomic_long_inc(&pblk->cache_reads);
 #endif
-               } else {
-                       /* Read from media non-cached sectors */
-                       rqd->ppa_list[j++] = p;
                }
-
-next:
-               if (advanced_bio)
-                       bio_advance(bio, PBLK_EXPOSED_PAGE_SIZE);
+               bio_advance(bio, PBLK_EXPOSED_PAGE_SIZE);
        }
 
+end:
        if (pblk_io_aligned(pblk, nr_secs))
                rqd->is_seq = 1;
 
 #ifdef CONFIG_NVM_PBLK_DEBUG
        atomic_long_add(nr_secs, &pblk->inflight_reads);
 #endif
+
+       return nr_secs;
 }
 
 
@@ -175,12 +176,12 @@ static void pblk_read_check_rand(struct pblk *pblk, struct nvm_rq *rqd,
        WARN_ONCE(j != rqd->nr_ppas, "pblk: corrupted random request\n");
 }
 
-static void pblk_end_user_read(struct bio *bio)
+static void pblk_end_user_read(struct bio *bio, int error)
 {
-#ifdef CONFIG_NVM_PBLK_DEBUG
-       WARN_ONCE(bio->bi_status, "pblk: corrupted read bio\n");
-#endif
-       bio_endio(bio);
+       if (error && error != NVM_RSP_WARN_HIGHECC)
+               bio_io_error(bio);
+       else
+               bio_endio(bio);
 }
 
 static void __pblk_end_io_read(struct pblk *pblk, struct nvm_rq *rqd,
@@ -197,9 +198,7 @@ static void __pblk_end_io_read(struct pblk *pblk, struct nvm_rq *rqd,
                pblk_log_read_err(pblk, rqd);
 
        pblk_read_check_seq(pblk, rqd, r_ctx->lba);
-
-       if (int_bio)
-               bio_put(int_bio);
+       bio_put(int_bio);
 
        if (put_line)
                pblk_rq_to_line_put(pblk, rqd);
@@ -219,188 +218,17 @@ static void pblk_end_io_read(struct nvm_rq *rqd)
        struct pblk_g_ctx *r_ctx = nvm_rq_to_pdu(rqd);
        struct bio *bio = (struct bio *)r_ctx->private;
 
-       pblk_end_user_read(bio);
+       pblk_end_user_read(bio, rqd->error);
        __pblk_end_io_read(pblk, rqd, true);
 }
 
-static void pblk_end_partial_read(struct nvm_rq *rqd)
-{
-       struct pblk *pblk = rqd->private;
-       struct pblk_g_ctx *r_ctx = nvm_rq_to_pdu(rqd);
-       struct pblk_pr_ctx *pr_ctx = r_ctx->private;
-       struct pblk_sec_meta *meta;
-       struct bio *new_bio = rqd->bio;
-       struct bio *bio = pr_ctx->orig_bio;
-       void *meta_list = rqd->meta_list;
-       unsigned long *read_bitmap = pr_ctx->bitmap;
-       struct bvec_iter orig_iter = BVEC_ITER_ALL_INIT;
-       struct bvec_iter new_iter = BVEC_ITER_ALL_INIT;
-       int nr_secs = pr_ctx->orig_nr_secs;
-       int nr_holes = nr_secs - bitmap_weight(read_bitmap, nr_secs);
-       void *src_p, *dst_p;
-       int bit, i;
-
-       if (unlikely(nr_holes == 1)) {
-               struct ppa_addr ppa;
-
-               ppa = rqd->ppa_addr;
-               rqd->ppa_list = pr_ctx->ppa_ptr;
-               rqd->dma_ppa_list = pr_ctx->dma_ppa_list;
-               rqd->ppa_list[0] = ppa;
-       }
-
-       for (i = 0; i < nr_secs; i++) {
-               meta = pblk_get_meta(pblk, meta_list, i);
-               pr_ctx->lba_list_media[i] = le64_to_cpu(meta->lba);
-               meta->lba = cpu_to_le64(pr_ctx->lba_list_mem[i]);
-       }
-
-       /* Fill the holes in the original bio */
-       i = 0;
-       for (bit = 0; bit < nr_secs; bit++) {
-               if (!test_bit(bit, read_bitmap)) {
-                       struct bio_vec dst_bv, src_bv;
-                       struct pblk_line *line;
-
-                       line = pblk_ppa_to_line(pblk, rqd->ppa_list[i]);
-                       kref_put(&line->ref, pblk_line_put);
-
-                       meta = pblk_get_meta(pblk, meta_list, bit);
-                       meta->lba = cpu_to_le64(pr_ctx->lba_list_media[i]);
-
-                       dst_bv = bio_iter_iovec(bio, orig_iter);
-                       src_bv = bio_iter_iovec(new_bio, new_iter);
-
-                       src_p = kmap_atomic(src_bv.bv_page);
-                       dst_p = kmap_atomic(dst_bv.bv_page);
-
-                       memcpy(dst_p + dst_bv.bv_offset,
-                               src_p + src_bv.bv_offset,
-                               PBLK_EXPOSED_PAGE_SIZE);
-
-                       kunmap_atomic(src_p);
-                       kunmap_atomic(dst_p);
-
-                       flush_dcache_page(dst_bv.bv_page);
-                       mempool_free(src_bv.bv_page, &pblk->page_bio_pool);
-
-                       bio_advance_iter(new_bio, &new_iter,
-                                       PBLK_EXPOSED_PAGE_SIZE);
-                       i++;
-               }
-               bio_advance_iter(bio, &orig_iter, PBLK_EXPOSED_PAGE_SIZE);
-       }
-
-       bio_put(new_bio);
-       kfree(pr_ctx);
-
-       /* restore original request */
-       rqd->bio = NULL;
-       rqd->nr_ppas = nr_secs;
-
-       bio_endio(bio);
-       __pblk_end_io_read(pblk, rqd, false);
-}
-
-static int pblk_setup_partial_read(struct pblk *pblk, struct nvm_rq *rqd,
-                           unsigned int bio_init_idx,
-                           unsigned long *read_bitmap,
-                           int nr_holes)
-{
-       void *meta_list = rqd->meta_list;
-       struct pblk_g_ctx *r_ctx = nvm_rq_to_pdu(rqd);
-       struct pblk_pr_ctx *pr_ctx;
-       struct bio *new_bio, *bio = r_ctx->private;
-       int nr_secs = rqd->nr_ppas;
-       int i;
-
-       new_bio = bio_alloc(GFP_KERNEL, nr_holes);
-
-       if (pblk_bio_add_pages(pblk, new_bio, GFP_KERNEL, nr_holes))
-               goto fail_bio_put;
-
-       if (nr_holes != new_bio->bi_vcnt) {
-               WARN_ONCE(1, "pblk: malformed bio\n");
-               goto fail_free_pages;
-       }
-
-       pr_ctx = kzalloc(sizeof(struct pblk_pr_ctx), GFP_KERNEL);
-       if (!pr_ctx)
-               goto fail_free_pages;
-
-       for (i = 0; i < nr_secs; i++) {
-               struct pblk_sec_meta *meta = pblk_get_meta(pblk, meta_list, i);
-
-               pr_ctx->lba_list_mem[i] = le64_to_cpu(meta->lba);
-       }
-
-       new_bio->bi_iter.bi_sector = 0; /* internal bio */
-       bio_set_op_attrs(new_bio, REQ_OP_READ, 0);
-
-       rqd->bio = new_bio;
-       rqd->nr_ppas = nr_holes;
-
-       pr_ctx->orig_bio = bio;
-       bitmap_copy(pr_ctx->bitmap, read_bitmap, NVM_MAX_VLBA);
-       pr_ctx->bio_init_idx = bio_init_idx;
-       pr_ctx->orig_nr_secs = nr_secs;
-       r_ctx->private = pr_ctx;
-
-       if (unlikely(nr_holes == 1)) {
-               pr_ctx->ppa_ptr = rqd->ppa_list;
-               pr_ctx->dma_ppa_list = rqd->dma_ppa_list;
-               rqd->ppa_addr = rqd->ppa_list[0];
-       }
-       return 0;
-
-fail_free_pages:
-       pblk_bio_free_pages(pblk, new_bio, 0, new_bio->bi_vcnt);
-fail_bio_put:
-       bio_put(new_bio);
-
-       return -ENOMEM;
-}
-
-static int pblk_partial_read_bio(struct pblk *pblk, struct nvm_rq *rqd,
-                                unsigned int bio_init_idx,
-                                unsigned long *read_bitmap, int nr_secs)
-{
-       int nr_holes;
-       int ret;
-
-       nr_holes = nr_secs - bitmap_weight(read_bitmap, nr_secs);
-
-       if (pblk_setup_partial_read(pblk, rqd, bio_init_idx, read_bitmap,
-                                   nr_holes))
-               return NVM_IO_ERR;
-
-       rqd->end_io = pblk_end_partial_read;
-
-       ret = pblk_submit_io(pblk, rqd);
-       if (ret) {
-               bio_put(rqd->bio);
-               pblk_err(pblk, "partial read IO submission failed\n");
-               goto err;
-       }
-
-       return NVM_IO_OK;
-
-err:
-       pblk_err(pblk, "failed to perform partial read\n");
-
-       /* Free allocated pages in new bio */
-       pblk_bio_free_pages(pblk, rqd->bio, 0, rqd->bio->bi_vcnt);
-       __pblk_end_io_read(pblk, rqd, false);
-       return NVM_IO_ERR;
-}
-
 static void pblk_read_rq(struct pblk *pblk, struct nvm_rq *rqd, struct bio *bio,
-                        sector_t lba, unsigned long *read_bitmap)
+                        sector_t lba, bool *from_cache)
 {
        struct pblk_sec_meta *meta = pblk_get_meta(pblk, rqd->meta_list, 0);
        struct ppa_addr ppa;
 
-       pblk_lookup_l2p_seq(pblk, &ppa, lba, 1);
+       pblk_lookup_l2p_seq(pblk, &ppa, lba, 1, from_cache);
 
 #ifdef CONFIG_NVM_PBLK_DEBUG
        atomic_long_inc(&pblk->inflight_reads);
@@ -410,7 +238,6 @@ retry:
        if (pblk_ppa_empty(ppa)) {
                __le64 addr_empty = cpu_to_le64(ADDR_EMPTY);
 
-               WARN_ON(test_and_set_bit(0, read_bitmap));
                meta->lba = addr_empty;
                return;
        }
@@ -419,12 +246,11 @@ retry:
         * write buffer to prevent retrieving overwritten data.
         */
        if (pblk_addr_in_cache(ppa)) {
-               if (!pblk_read_from_cache(pblk, bio, lba, ppa, 0, 1)) {
-                       pblk_lookup_l2p_seq(pblk, &ppa, lba, 1);
+               if (!pblk_read_from_cache(pblk, bio, lba, ppa)) {
+                       pblk_lookup_l2p_seq(pblk, &ppa, lba, 1, from_cache);
                        goto retry;
                }
 
-               WARN_ON(test_and_set_bit(0, read_bitmap));
                meta->lba = cpu_to_le64(lba);
 
 #ifdef CONFIG_NVM_PBLK_DEBUG
@@ -435,95 +261,92 @@ retry:
        }
 }
 
-int pblk_submit_read(struct pblk *pblk, struct bio *bio)
+void pblk_submit_read(struct pblk *pblk, struct bio *bio)
 {
        struct nvm_tgt_dev *dev = pblk->dev;
        struct request_queue *q = dev->q;
        sector_t blba = pblk_get_lba(bio);
        unsigned int nr_secs = pblk_get_secs(bio);
+       bool from_cache;
        struct pblk_g_ctx *r_ctx;
        struct nvm_rq *rqd;
-       unsigned int bio_init_idx;
-       DECLARE_BITMAP(read_bitmap, NVM_MAX_VLBA);
-       int ret = NVM_IO_ERR;
+       struct bio *int_bio, *split_bio;
 
        generic_start_io_acct(q, REQ_OP_READ, bio_sectors(bio),
                              &pblk->disk->part0);
 
-       bitmap_zero(read_bitmap, nr_secs);
-
        rqd = pblk_alloc_rqd(pblk, PBLK_READ);
 
        rqd->opcode = NVM_OP_PREAD;
        rqd->nr_ppas = nr_secs;
-       rqd->bio = NULL; /* cloned bio if needed */
        rqd->private = pblk;
        rqd->end_io = pblk_end_io_read;
 
        r_ctx = nvm_rq_to_pdu(rqd);
        r_ctx->start_time = jiffies;
        r_ctx->lba = blba;
-       r_ctx->private = bio; /* original bio */
 
-       /* Save the index for this bio's start. This is needed in case
-        * we need to fill a partial read.
-        */
-       bio_init_idx = pblk_get_bi_idx(bio);
+       if (pblk_alloc_rqd_meta(pblk, rqd)) {
+               bio_io_error(bio);
+               pblk_free_rqd(pblk, rqd, PBLK_READ);
+               return;
+       }
 
-       if (pblk_alloc_rqd_meta(pblk, rqd))
-               goto fail_rqd_free;
+       /* Clone read bio to deal internally with:
+        * -read errors when reading from drive
+        * -bio_advance() calls during cache reads
+        */
+       int_bio = bio_clone_fast(bio, GFP_KERNEL, &pblk_bio_set);
 
        if (nr_secs > 1)
-               pblk_read_ppalist_rq(pblk, rqd, bio, blba, read_bitmap);
+               nr_secs = pblk_read_ppalist_rq(pblk, rqd, int_bio, blba,
+                                               &from_cache);
        else
-               pblk_read_rq(pblk, rqd, bio, blba, read_bitmap);
+               pblk_read_rq(pblk, rqd, int_bio, blba, &from_cache);
 
-       if (bitmap_full(read_bitmap, nr_secs)) {
+split_retry:
+       r_ctx->private = bio; /* original bio */
+       rqd->bio = int_bio; /* internal bio */
+
+       if (from_cache && nr_secs == rqd->nr_ppas) {
+               /* All data was read from cache, we can complete the IO. */
+               pblk_end_user_read(bio, 0);
                atomic_inc(&pblk->inflight_io);
                __pblk_end_io_read(pblk, rqd, false);
-               return NVM_IO_DONE;
-       }
-
-       /* All sectors are to be read from the device */
-       if (bitmap_empty(read_bitmap, rqd->nr_ppas)) {
-               struct bio *int_bio = NULL;
+       } else if (nr_secs != rqd->nr_ppas) {
+               /* The read bio request could be partially filled by the write
+                * buffer, but there are some holes that need to be read from
+                * the drive. In order to handle this, we will use block layer
+                * mechanism to split this request in to smaller ones and make
+                * a chain of it.
+                */
+               split_bio = bio_split(bio, nr_secs * NR_PHY_IN_LOG, GFP_KERNEL,
+                                       &pblk_bio_set);
+               bio_chain(split_bio, bio);
+               generic_make_request(bio);
+
+               /* New bio contains first N sectors of the previous one, so
+                * we can continue to use existing rqd, but we need to shrink
+                * the number of PPAs in it. New bio is also guaranteed that
+                * it contains only either data from cache or from drive, newer
+                * mix of them.
+                */
+               bio = split_bio;
+               rqd->nr_ppas = nr_secs;
+               if (rqd->nr_ppas == 1)
+                       rqd->ppa_addr = rqd->ppa_list[0];
 
-               /* Clone read bio to deal with read errors internally */
+               /* Recreate int_bio - existing might have some needed internal
+                * fields modified already.
+                */
+               bio_put(int_bio);
                int_bio = bio_clone_fast(bio, GFP_KERNEL, &pblk_bio_set);
-               if (!int_bio) {
-                       pblk_err(pblk, "could not clone read bio\n");
-                       goto fail_end_io;
-               }
-
-               rqd->bio = int_bio;
-
-               if (pblk_submit_io(pblk, rqd)) {
-                       pblk_err(pblk, "read IO submission failed\n");
-                       ret = NVM_IO_ERR;
-                       goto fail_end_io;
-               }
-
-               return NVM_IO_OK;
+               goto split_retry;
+       } else if (pblk_submit_io(pblk, rqd)) {
+               /* Submitting IO to drive failed, let's report an error */
+               rqd->error = -ENODEV;
+               pblk_end_io_read(rqd);
        }
-
-       /* The read bio request could be partially filled by the write buffer,
-        * but there are some holes that need to be read from the drive.
-        */
-       ret = pblk_partial_read_bio(pblk, rqd, bio_init_idx, read_bitmap,
-                                   nr_secs);
-       if (ret)
-               goto fail_meta_free;
-
-       return NVM_IO_OK;
-
-fail_meta_free:
-       nvm_dev_dma_free(dev->parent, rqd->meta_list, rqd->dma_meta_list);
-fail_rqd_free:
-       pblk_free_rqd(pblk, rqd, PBLK_READ);
-       return ret;
-fail_end_io:
-       __pblk_end_io_read(pblk, rqd, false);
-       return ret;
 }
 
 static int read_ppalist_rq_gc(struct pblk *pblk, struct nvm_rq *rqd,
@@ -568,7 +391,7 @@ static int read_rq_gc(struct pblk *pblk, struct nvm_rq *rqd,
                goto out;
 
        /* logic error: lba out-of-bounds */
-       if (lba >= pblk->rl.nr_secs) {
+       if (lba >= pblk->capacity) {
                WARN(1, "pblk: read lba out of bounds\n");
                goto out;
        }
@@ -642,7 +465,6 @@ int pblk_submit_read_gc(struct pblk *pblk, struct pblk_gc_rq *gc_rq)
 
        if (pblk_submit_io_sync(pblk, &rqd)) {
                ret = -EIO;
-               pblk_err(pblk, "GC read request failed\n");
                goto err_free_bio;
        }
 
index d86f580..e6dda04 100644 (file)
@@ -93,10 +93,24 @@ static int pblk_recov_l2p_from_emeta(struct pblk *pblk, struct pblk_line *line)
 static void pblk_update_line_wp(struct pblk *pblk, struct pblk_line *line,
                                u64 written_secs)
 {
+       struct pblk_line_mgmt *l_mg = &pblk->l_mg;
        int i;
 
        for (i = 0; i < written_secs; i += pblk->min_write_pgs)
-               pblk_alloc_page(pblk, line, pblk->min_write_pgs);
+               __pblk_alloc_page(pblk, line, pblk->min_write_pgs);
+
+       spin_lock(&l_mg->free_lock);
+       if (written_secs > line->left_msecs) {
+               /*
+                * We have all data sectors written
+                * and some emeta sectors written too.
+                */
+               line->left_msecs = 0;
+       } else {
+               /* We have only some data sectors written. */
+               line->left_msecs -= written_secs;
+       }
+       spin_unlock(&l_mg->free_lock);
 }
 
 static u64 pblk_sec_in_open_line(struct pblk *pblk, struct pblk_line *line)
@@ -165,6 +179,7 @@ static int pblk_recov_pad_line(struct pblk *pblk, struct pblk_line *line,
        struct pblk_pad_rq *pad_rq;
        struct nvm_rq *rqd;
        struct bio *bio;
+       struct ppa_addr *ppa_list;
        void *data;
        __le64 *lba_list = emeta_to_lbas(pblk, line->emeta->buf);
        u64 w_ptr = line->cur_sec;
@@ -194,7 +209,7 @@ next_pad_rq:
        rq_ppas = pblk_calc_secs(pblk, left_ppas, 0, false);
        if (rq_ppas < pblk->min_write_pgs) {
                pblk_err(pblk, "corrupted pad line %d\n", line->id);
-               goto fail_free_pad;
+               goto fail_complete;
        }
 
        rq_len = rq_ppas * geo->csecs;
@@ -203,7 +218,7 @@ next_pad_rq:
                                                PBLK_VMALLOC_META, GFP_KERNEL);
        if (IS_ERR(bio)) {
                ret = PTR_ERR(bio);
-               goto fail_free_pad;
+               goto fail_complete;
        }
 
        bio->bi_iter.bi_sector = 0; /* internal bio */
@@ -212,8 +227,11 @@ next_pad_rq:
        rqd = pblk_alloc_rqd(pblk, PBLK_WRITE_INT);
 
        ret = pblk_alloc_rqd_meta(pblk, rqd);
-       if (ret)
-               goto fail_free_rqd;
+       if (ret) {
+               pblk_free_rqd(pblk, rqd, PBLK_WRITE_INT);
+               bio_put(bio);
+               goto fail_complete;
+       }
 
        rqd->bio = bio;
        rqd->opcode = NVM_OP_PWRITE;
@@ -222,6 +240,7 @@ next_pad_rq:
        rqd->end_io = pblk_end_io_recov;
        rqd->private = pad_rq;
 
+       ppa_list = nvm_rq_to_ppa_list(rqd);
        meta_list = rqd->meta_list;
 
        for (i = 0; i < rqd->nr_ppas; ) {
@@ -249,18 +268,21 @@ next_pad_rq:
                        lba_list[w_ptr] = addr_empty;
                        meta = pblk_get_meta(pblk, meta_list, i);
                        meta->lba = addr_empty;
-                       rqd->ppa_list[i] = dev_ppa;
+                       ppa_list[i] = dev_ppa;
                }
        }
 
        kref_get(&pad_rq->ref);
-       pblk_down_chunk(pblk, rqd->ppa_list[0]);
+       pblk_down_chunk(pblk, ppa_list[0]);
 
        ret = pblk_submit_io(pblk, rqd);
        if (ret) {
                pblk_err(pblk, "I/O submission failed: %d\n", ret);
-               pblk_up_chunk(pblk, rqd->ppa_list[0]);
-               goto fail_free_rqd;
+               pblk_up_chunk(pblk, ppa_list[0]);
+               kref_put(&pad_rq->ref, pblk_recov_complete);
+               pblk_free_rqd(pblk, rqd, PBLK_WRITE_INT);
+               bio_put(bio);
+               goto fail_complete;
        }
 
        left_line_ppas -= rq_ppas;
@@ -268,13 +290,9 @@ next_pad_rq:
        if (left_ppas && left_line_ppas)
                goto next_pad_rq;
 
+fail_complete:
        kref_put(&pad_rq->ref, pblk_recov_complete);
-
-       if (!wait_for_completion_io_timeout(&pad_rq->wait,
-                               msecs_to_jiffies(PBLK_COMMAND_TIMEOUT_MS))) {
-               pblk_err(pblk, "pad write timed out\n");
-               ret = -ETIME;
-       }
+       wait_for_completion(&pad_rq->wait);
 
        if (!pblk_line_is_full(line))
                pblk_err(pblk, "corrupted padded line: %d\n", line->id);
@@ -283,14 +301,6 @@ next_pad_rq:
 free_rq:
        kfree(pad_rq);
        return ret;
-
-fail_free_rqd:
-       pblk_free_rqd(pblk, rqd, PBLK_WRITE_INT);
-       bio_put(bio);
-fail_free_pad:
-       kfree(pad_rq);
-       vfree(data);
-       return ret;
 }
 
 static int pblk_pad_distance(struct pblk *pblk, struct pblk_line *line)
@@ -412,6 +422,7 @@ retry_rq:
        rqd->ppa_list = ppa_list;
        rqd->dma_ppa_list = dma_ppa_list;
        rqd->dma_meta_list = dma_meta_list;
+       ppa_list = nvm_rq_to_ppa_list(rqd);
 
        if (pblk_io_aligned(pblk, rq_ppas))
                rqd->is_seq = 1;
@@ -430,7 +441,7 @@ retry_rq:
                }
 
                for (j = 0; j < pblk->min_write_pgs; j++, i++)
-                       rqd->ppa_list[i] =
+                       ppa_list[i] =
                                addr_to_gen_ppa(pblk, paddr + j, line->id);
        }
 
@@ -444,7 +455,7 @@ retry_rq:
        atomic_dec(&pblk->inflight_io);
 
        /* If a read fails, do a best effort by padding the line and retrying */
-       if (rqd->error) {
+       if (rqd->error && rqd->error != NVM_RSP_WARN_HIGHECC) {
                int pad_distance, ret;
 
                if (padded) {
@@ -474,11 +485,11 @@ retry_rq:
 
                lba_list[paddr++] = cpu_to_le64(lba);
 
-               if (lba == ADDR_EMPTY || lba > pblk->rl.nr_secs)
+               if (lba == ADDR_EMPTY || lba >= pblk->capacity)
                        continue;
 
                line->nr_valid_lbas++;
-               pblk_update_map(pblk, lba, rqd->ppa_list[i]);
+               pblk_update_map(pblk, lba, ppa_list[i]);
        }
 
        left_ppas -= rq_ppas;
@@ -647,10 +658,12 @@ static int pblk_line_was_written(struct pblk_line *line,
        bppa = pblk->luns[smeta_blk].bppa;
        chunk = &line->chks[pblk_ppa_to_pos(geo, bppa)];
 
-       if (chunk->state & NVM_CHK_ST_FREE)
-               return 0;
+       if (chunk->state & NVM_CHK_ST_CLOSED ||
+           (chunk->state & NVM_CHK_ST_OPEN
+            && chunk->wp >= lm->smeta_sec))
+               return 1;
 
-       return 1;
+       return 0;
 }
 
 static bool pblk_line_is_open(struct pblk *pblk, struct pblk_line *line)
@@ -844,6 +857,7 @@ next:
                spin_unlock(&l_mg->free_lock);
        } else {
                spin_lock(&l_mg->free_lock);
+               l_mg->data_line = data_line;
                /* Allocate next line for preparation */
                l_mg->data_next = pblk_line_get(pblk);
                if (l_mg->data_next) {
index 6593dea..4e63f9b 100644 (file)
@@ -228,6 +228,7 @@ static void pblk_submit_rec(struct work_struct *work)
        mempool_free(recovery, &pblk->rec_pool);
 
        atomic_dec(&pblk->inflight_io);
+       pblk_write_kick(pblk);
 }
 
 
index ac3ab77..a678553 100644 (file)
@@ -43,8 +43,6 @@
 
 #define PBLK_CACHE_NAME_LEN (DISK_NAME_LEN + 16)
 
-#define PBLK_COMMAND_TIMEOUT_MS 30000
-
 /* Max 512 LUNs per device */
 #define PBLK_MAX_LUNS_BITMAP (4)
 
@@ -123,18 +121,6 @@ struct pblk_g_ctx {
        u64 lba;
 };
 
-/* partial read context */
-struct pblk_pr_ctx {
-       struct bio *orig_bio;
-       DECLARE_BITMAP(bitmap, NVM_MAX_VLBA);
-       unsigned int orig_nr_secs;
-       unsigned int bio_init_idx;
-       void *ppa_ptr;
-       dma_addr_t dma_ppa_list;
-       u64 lba_list_mem[NVM_MAX_VLBA];
-       u64 lba_list_media[NVM_MAX_VLBA];
-};
-
 /* Pad context */
 struct pblk_pad_rq {
        struct pblk *pblk;
@@ -305,7 +291,6 @@ struct pblk_rl {
 
        struct timer_list u_timer;
 
-       unsigned long long nr_secs;
        unsigned long total_blocks;
 
        atomic_t free_blocks;           /* Total number of free blocks (+ OP) */
@@ -440,6 +425,7 @@ struct pblk_smeta {
 
 struct pblk_w_err_gc {
        int has_write_err;
+       int has_gc_err;
        __le64 *lba_list;
 };
 
@@ -465,7 +451,6 @@ struct pblk_line {
        int meta_line;                  /* Metadata line id */
        int meta_distance;              /* Distance between data and metadata */
 
-       u64 smeta_ssec;                 /* Sector where smeta starts */
        u64 emeta_ssec;                 /* Sector where emeta starts */
 
        unsigned int sec_in_line;       /* Number of usable secs in line */
@@ -762,7 +747,7 @@ unsigned int pblk_rb_read_to_bio(struct pblk_rb *rb, struct nvm_rq *rqd,
                                 unsigned int pos, unsigned int nr_entries,
                                 unsigned int count);
 int pblk_rb_copy_to_bio(struct pblk_rb *rb, struct bio *bio, sector_t lba,
-                       struct ppa_addr ppa, int bio_iter, bool advanced_bio);
+                       struct ppa_addr ppa);
 unsigned int pblk_rb_read_commit(struct pblk_rb *rb, unsigned int entries);
 
 unsigned int pblk_rb_sync_init(struct pblk_rb *rb, unsigned long *flags);
@@ -862,15 +847,15 @@ int pblk_update_map_gc(struct pblk *pblk, sector_t lba, struct ppa_addr ppa,
                       struct pblk_line *gc_line, u64 paddr);
 void pblk_lookup_l2p_rand(struct pblk *pblk, struct ppa_addr *ppas,
                          u64 *lba_list, int nr_secs);
-void pblk_lookup_l2p_seq(struct pblk *pblk, struct ppa_addr *ppas,
-                        sector_t blba, int nr_secs);
+int pblk_lookup_l2p_seq(struct pblk *pblk, struct ppa_addr *ppas,
+                        sector_t blba, int nr_secs, bool *from_cache);
 void *pblk_get_meta_for_writes(struct pblk *pblk, struct nvm_rq *rqd);
 void pblk_get_packed_meta(struct pblk *pblk, struct nvm_rq *rqd);
 
 /*
  * pblk user I/O write path
  */
-int pblk_write_to_cache(struct pblk *pblk, struct bio *bio,
+void pblk_write_to_cache(struct pblk *pblk, struct bio *bio,
                        unsigned long flags);
 int pblk_write_gc_to_cache(struct pblk *pblk, struct pblk_gc_rq *gc_rq);
 
@@ -896,7 +881,7 @@ void pblk_write_kick(struct pblk *pblk);
  * pblk read path
  */
 extern struct bio_set pblk_bio_set;
-int pblk_submit_read(struct pblk *pblk, struct bio *bio);
+void pblk_submit_read(struct pblk *pblk, struct bio *bio);
 int pblk_submit_read_gc(struct pblk *pblk, struct pblk_gc_rq *gc_rq);
 /*
  * pblk recovery
@@ -921,6 +906,7 @@ void pblk_gc_free_full_lines(struct pblk *pblk);
 void pblk_gc_sysfs_state_show(struct pblk *pblk, int *gc_enabled,
                              int *gc_active);
 int pblk_gc_sysfs_force(struct pblk *pblk, int force);
+void pblk_put_line_back(struct pblk *pblk, struct pblk_line *line);
 
 /*
  * pblk rate limiter
index a6644a2..7da80f3 100644 (file)
@@ -1257,10 +1257,9 @@ static u32 nvme_passthru_start(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
                return 0;
        }
 
+       effects |= nvme_known_admin_effects(opcode);
        if (ctrl->effects)
                effects = le32_to_cpu(ctrl->effects->acs[opcode]);
-       else
-               effects = nvme_known_admin_effects(opcode);
 
        /*
         * For simplicity, IO to all namespaces is quiesced even if the command
@@ -2342,20 +2341,35 @@ static const struct attribute_group *nvme_subsys_attrs_groups[] = {
        NULL,
 };
 
-static int nvme_active_ctrls(struct nvme_subsystem *subsys)
+static bool nvme_validate_cntlid(struct nvme_subsystem *subsys,
+               struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id)
 {
-       int count = 0;
-       struct nvme_ctrl *ctrl;
+       struct nvme_ctrl *tmp;
+
+       lockdep_assert_held(&nvme_subsystems_lock);
+
+       list_for_each_entry(tmp, &subsys->ctrls, subsys_entry) {
+               if (ctrl->state == NVME_CTRL_DELETING ||
+                   ctrl->state == NVME_CTRL_DEAD)
+                       continue;
+
+               if (tmp->cntlid == ctrl->cntlid) {
+                       dev_err(ctrl->device,
+                               "Duplicate cntlid %u with %s, rejecting\n",
+                               ctrl->cntlid, dev_name(tmp->device));
+                       return false;
+               }
 
-       mutex_lock(&subsys->lock);
-       list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry) {
-               if (ctrl->state != NVME_CTRL_DELETING &&
-                   ctrl->state != NVME_CTRL_DEAD)
-                       count++;
+               if ((id->cmic & (1 << 1)) ||
+                   (ctrl->opts && ctrl->opts->discovery_nqn))
+                       continue;
+
+               dev_err(ctrl->device,
+                       "Subsystem does not support multiple controllers\n");
+               return false;
        }
-       mutex_unlock(&subsys->lock);
 
-       return count;
+       return true;
 }
 
 static int nvme_init_subsystem(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id)
@@ -2395,22 +2409,13 @@ static int nvme_init_subsystem(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id)
        mutex_lock(&nvme_subsystems_lock);
        found = __nvme_find_get_subsystem(subsys->subnqn);
        if (found) {
-               /*
-                * Verify that the subsystem actually supports multiple
-                * controllers, else bail out.
-                */
-               if (!(ctrl->opts && ctrl->opts->discovery_nqn) &&
-                   nvme_active_ctrls(found) && !(id->cmic & (1 << 1))) {
-                       dev_err(ctrl->device,
-                               "ignoring ctrl due to duplicate subnqn (%s).\n",
-                               found->subnqn);
-                       nvme_put_subsystem(found);
-                       ret = -EINVAL;
-                       goto out_unlock;
-               }
-
                __nvme_release_subsystem(subsys);
                subsys = found;
+
+               if (!nvme_validate_cntlid(subsys, ctrl, id)) {
+                       ret = -EINVAL;
+                       goto out_put_subsystem;
+               }
        } else {
                ret = device_add(&subsys->dev);
                if (ret) {
@@ -2422,23 +2427,20 @@ static int nvme_init_subsystem(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id)
                list_add_tail(&subsys->entry, &nvme_subsystems);
        }
 
-       ctrl->subsys = subsys;
-       mutex_unlock(&nvme_subsystems_lock);
-
        if (sysfs_create_link(&subsys->dev.kobj, &ctrl->device->kobj,
                        dev_name(ctrl->device))) {
                dev_err(ctrl->device,
                        "failed to create sysfs link from subsystem.\n");
-               /* the transport driver will eventually put the subsystem */
-               return -EINVAL;
+               goto out_put_subsystem;
        }
 
-       mutex_lock(&subsys->lock);
+       ctrl->subsys = subsys;
        list_add_tail(&ctrl->subsys_entry, &subsys->ctrls);
-       mutex_unlock(&subsys->lock);
-
+       mutex_unlock(&nvme_subsystems_lock);
        return 0;
 
+out_put_subsystem:
+       nvme_put_subsystem(subsys);
 out_unlock:
        mutex_unlock(&nvme_subsystems_lock);
        put_device(&subsys->dev);
@@ -3605,19 +3607,18 @@ static void nvme_handle_aen_notice(struct nvme_ctrl *ctrl, u32 result)
 {
        u32 aer_notice_type = (result & 0xff00) >> 8;
 
+       trace_nvme_async_event(ctrl, aer_notice_type);
+
        switch (aer_notice_type) {
        case NVME_AER_NOTICE_NS_CHANGED:
-               trace_nvme_async_event(ctrl, aer_notice_type);
                set_bit(NVME_AER_NOTICE_NS_CHANGED, &ctrl->events);
                nvme_queue_scan(ctrl);
                break;
        case NVME_AER_NOTICE_FW_ACT_STARTING:
-               trace_nvme_async_event(ctrl, aer_notice_type);
                queue_work(nvme_wq, &ctrl->fw_act_work);
                break;
 #ifdef CONFIG_NVME_MULTIPATH
        case NVME_AER_NOTICE_ANA:
-               trace_nvme_async_event(ctrl, aer_notice_type);
                if (!ctrl->ana_log_buf)
                        break;
                queue_work(nvme_wq, &ctrl->ana_work);
@@ -3696,10 +3697,10 @@ static void nvme_free_ctrl(struct device *dev)
        __free_page(ctrl->discard_page);
 
        if (subsys) {
-               mutex_lock(&subsys->lock);
+               mutex_lock(&nvme_subsystems_lock);
                list_del(&ctrl->subsys_entry);
-               mutex_unlock(&subsys->lock);
                sysfs_remove_link(&subsys->dev.kobj, dev_name(ctrl->device));
+               mutex_unlock(&nvme_subsystems_lock);
        }
 
        ctrl->ops->free_ctrl(ctrl);
index 592d1e6..5838f7c 100644 (file)
@@ -978,7 +978,7 @@ EXPORT_SYMBOL_GPL(nvmf_free_options);
                                 NVMF_OPT_DISABLE_SQFLOW)
 
 static struct nvme_ctrl *
-nvmf_create_ctrl(struct device *dev, const char *buf, size_t count)
+nvmf_create_ctrl(struct device *dev, const char *buf)
 {
        struct nvmf_ctrl_options *opts;
        struct nvmf_transport_ops *ops;
@@ -1073,7 +1073,7 @@ static ssize_t nvmf_dev_write(struct file *file, const char __user *ubuf,
                goto out_unlock;
        }
 
-       ctrl = nvmf_create_ctrl(nvmf_device, buf, count);
+       ctrl = nvmf_create_ctrl(nvmf_device, buf);
        if (IS_ERR(ctrl)) {
                ret = PTR_ERR(ctrl);
                goto out_unlock;
index 9544eb6..dd8169b 100644 (file)
@@ -202,7 +202,7 @@ static LIST_HEAD(nvme_fc_lport_list);
 static DEFINE_IDA(nvme_fc_local_port_cnt);
 static DEFINE_IDA(nvme_fc_ctrl_cnt);
 
-
+static struct workqueue_struct *nvme_fc_wq;
 
 /*
  * These items are short-term. They will eventually be moved into
@@ -2054,7 +2054,7 @@ nvme_fc_error_recovery(struct nvme_fc_ctrl *ctrl, char *errmsg)
         */
        if (ctrl->ctrl.state == NVME_CTRL_CONNECTING) {
                active = atomic_xchg(&ctrl->err_work_active, 1);
-               if (!active && !schedule_work(&ctrl->err_work)) {
+               if (!active && !queue_work(nvme_fc_wq, &ctrl->err_work)) {
                        atomic_set(&ctrl->err_work_active, 0);
                        WARN_ON(1);
                }
@@ -3399,6 +3399,10 @@ static int __init nvme_fc_init_module(void)
 {
        int ret;
 
+       nvme_fc_wq = alloc_workqueue("nvme_fc_wq", WQ_MEM_RECLAIM, 0);
+       if (!nvme_fc_wq)
+               return -ENOMEM;
+
        /*
         * NOTE:
         * It is expected that in the future the kernel will combine
@@ -3416,7 +3420,7 @@ static int __init nvme_fc_init_module(void)
        ret = class_register(&fc_class);
        if (ret) {
                pr_err("couldn't register class fc\n");
-               return ret;
+               goto out_destroy_wq;
        }
 
        /*
@@ -3440,6 +3444,9 @@ out_destroy_device:
        device_destroy(&fc_class, MKDEV(0, 0));
 out_destroy_class:
        class_unregister(&fc_class);
+out_destroy_wq:
+       destroy_workqueue(nvme_fc_wq);
+
        return ret;
 }
 
@@ -3456,6 +3463,7 @@ static void __exit nvme_fc_exit_module(void)
 
        device_destroy(&fc_class, MKDEV(0, 0));
        class_unregister(&fc_class);
+       destroy_workqueue(nvme_fc_wq);
 }
 
 module_init(nvme_fc_init_module);
index 949e29e..4f20a10 100644 (file)
@@ -977,6 +977,7 @@ int nvme_nvm_register(struct nvme_ns *ns, char *disk_name, int node)
        geo->csecs = 1 << ns->lba_shift;
        geo->sos = ns->ms;
        geo->ext = ns->ext;
+       geo->mdts = ns->ctrl->max_hw_sectors;
 
        dev->q = q;
        memcpy(dev->name, disk_name, DISK_NAME_LEN);
index 5c9429d..499acf0 100644 (file)
@@ -31,7 +31,7 @@ void nvme_set_disk_name(char *disk_name, struct nvme_ns *ns,
                sprintf(disk_name, "nvme%dn%d", ctrl->instance, ns->head->instance);
        } else if (ns->head->disk) {
                sprintf(disk_name, "nvme%dc%dn%d", ctrl->subsys->instance,
-                               ctrl->cntlid, ns->head->instance);
+                               ctrl->instance, ns->head->instance);
                *flags = GENHD_FL_HIDDEN;
        } else {
                sprintf(disk_name, "nvme%dn%d", ctrl->subsys->instance,
index 3e4fb89..2a8708c 100644 (file)
@@ -1296,6 +1296,7 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req, bool reserved)
        switch (dev->ctrl.state) {
        case NVME_CTRL_DELETING:
                shutdown = true;
+               /* fall through */
        case NVME_CTRL_CONNECTING:
        case NVME_CTRL_RESETTING:
                dev_warn_ratelimited(dev->ctrl.device,
@@ -2280,8 +2281,6 @@ static int nvme_dev_add(struct nvme_dev *dev)
                        return ret;
                }
                dev->ctrl.tagset = &dev->tagset;
-
-               nvme_dbbuf_set(dev);
        } else {
                blk_mq_update_nr_hw_queues(&dev->tagset, dev->online_queues - 1);
 
@@ -2289,6 +2288,7 @@ static int nvme_dev_add(struct nvme_dev *dev)
                nvme_free_queues(dev, dev->online_queues);
        }
 
+       nvme_dbbuf_set(dev);
        return 0;
 }
 
index e1824c2..f383146 100644 (file)
@@ -697,15 +697,6 @@ out_free_queues:
        return ret;
 }
 
-static void nvme_rdma_free_tagset(struct nvme_ctrl *nctrl,
-               struct blk_mq_tag_set *set)
-{
-       struct nvme_rdma_ctrl *ctrl = to_rdma_ctrl(nctrl);
-
-       blk_mq_free_tag_set(set);
-       nvme_rdma_dev_put(ctrl->device);
-}
-
 static struct blk_mq_tag_set *nvme_rdma_alloc_tagset(struct nvme_ctrl *nctrl,
                bool admin)
 {
@@ -744,24 +735,9 @@ static struct blk_mq_tag_set *nvme_rdma_alloc_tagset(struct nvme_ctrl *nctrl,
 
        ret = blk_mq_alloc_tag_set(set);
        if (ret)
-               goto out;
-
-       /*
-        * We need a reference on the device as long as the tag_set is alive,
-        * as the MRs in the request structures need a valid ib_device.
-        */
-       ret = nvme_rdma_dev_get(ctrl->device);
-       if (!ret) {
-               ret = -EINVAL;
-               goto out_free_tagset;
-       }
+               return ERR_PTR(ret);
 
        return set;
-
-out_free_tagset:
-       blk_mq_free_tag_set(set);
-out:
-       return ERR_PTR(ret);
 }
 
 static void nvme_rdma_destroy_admin_queue(struct nvme_rdma_ctrl *ctrl,
@@ -769,7 +745,7 @@ static void nvme_rdma_destroy_admin_queue(struct nvme_rdma_ctrl *ctrl,
 {
        if (remove) {
                blk_cleanup_queue(ctrl->ctrl.admin_q);
-               nvme_rdma_free_tagset(&ctrl->ctrl, ctrl->ctrl.admin_tagset);
+               blk_mq_free_tag_set(ctrl->ctrl.admin_tagset);
        }
        if (ctrl->async_event_sqe.data) {
                nvme_rdma_free_qe(ctrl->device->dev, &ctrl->async_event_sqe,
@@ -847,7 +823,7 @@ out_cleanup_queue:
                blk_cleanup_queue(ctrl->ctrl.admin_q);
 out_free_tagset:
        if (new)
-               nvme_rdma_free_tagset(&ctrl->ctrl, ctrl->ctrl.admin_tagset);
+               blk_mq_free_tag_set(ctrl->ctrl.admin_tagset);
 out_free_async_qe:
        nvme_rdma_free_qe(ctrl->device->dev, &ctrl->async_event_sqe,
                sizeof(struct nvme_command), DMA_TO_DEVICE);
@@ -862,7 +838,7 @@ static void nvme_rdma_destroy_io_queues(struct nvme_rdma_ctrl *ctrl,
 {
        if (remove) {
                blk_cleanup_queue(ctrl->ctrl.connect_q);
-               nvme_rdma_free_tagset(&ctrl->ctrl, ctrl->ctrl.tagset);
+               blk_mq_free_tag_set(ctrl->ctrl.tagset);
        }
        nvme_rdma_free_io_queues(ctrl);
 }
@@ -903,7 +879,7 @@ out_cleanup_connect_q:
                blk_cleanup_queue(ctrl->ctrl.connect_q);
 out_free_tag_set:
        if (new)
-               nvme_rdma_free_tagset(&ctrl->ctrl, ctrl->ctrl.tagset);
+               blk_mq_free_tag_set(ctrl->ctrl.tagset);
 out_free_io_queues:
        nvme_rdma_free_io_queues(ctrl);
        return ret;
index 97d3c77..e71502d 100644 (file)
@@ -167,6 +167,7 @@ TRACE_EVENT(nvme_async_event,
                aer_name(NVME_AER_NOTICE_NS_CHANGED),
                aer_name(NVME_AER_NOTICE_ANA),
                aer_name(NVME_AER_NOTICE_FW_ACT_STARTING),
+               aer_name(NVME_AER_NOTICE_DISC_CHANGED),
                aer_name(NVME_AER_ERROR),
                aer_name(NVME_AER_SMART),
                aer_name(NVME_AER_CSS),
index f89f9d0..c09039e 100644 (file)
@@ -3827,7 +3827,7 @@ static struct dasd_ccw_req *dasd_eckd_build_cp_raw(struct dasd_device *startdev,
        if ((start_padding_sectors || end_padding_sectors) &&
            (rq_data_dir(req) == WRITE)) {
                DBF_DEV_EVENT(DBF_ERR, basedev,
-                             "raw write not track aligned (%lu,%lu) req %p",
+                             "raw write not track aligned (%llu,%llu) req %p",
                              start_padding_sectors, end_padding_sectors, req);
                return ERR_PTR(-EINVAL);
        }
index 5d865a5..4d0d565 100644 (file)
@@ -358,6 +358,7 @@ struct nvm_geo {
        u16     csecs;          /* sector size */
        u16     sos;            /* out-of-band area size */
        bool    ext;            /* metadata in extended data buffer */
+       u32     mdts;           /* Max data transfer size*/
 
        /* device write constrains */
        u32     ws_min;         /* minimum write size */
@@ -427,6 +428,7 @@ struct nvm_dev {
        char name[DISK_NAME_LEN];
        void *private_data;
 
+       struct kref ref;
        void *rmap;
 
        struct mutex mlock;
index c40720c..8028ada 100644 (file)
@@ -1246,9 +1246,9 @@ enum {
        NVME_SC_FW_NEEDS_SUBSYS_RESET   = 0x110,
        NVME_SC_FW_NEEDS_RESET          = 0x111,
        NVME_SC_FW_NEEDS_MAX_TIME       = 0x112,
-       NVME_SC_FW_ACIVATE_PROHIBITED   = 0x113,
+       NVME_SC_FW_ACTIVATE_PROHIBITED  = 0x113,
        NVME_SC_OVERLAPPING_RANGE       = 0x114,
-       NVME_SC_NS_INSUFFICENT_CAP      = 0x115,
+       NVME_SC_NS_INSUFFICIENT_CAP     = 0x115,
        NVME_SC_NS_ID_UNAVAILABLE       = 0x116,
        NVME_SC_NS_ALREADY_ATTACHED     = 0x118,
        NVME_SC_NS_IS_PRIVATE           = 0x119,