Merge branch 'for-4.14/block' of git://git.kernel.dk/linux-block
authorLinus Torvalds <torvalds@linux-foundation.org>
Thu, 7 Sep 2017 18:59:42 +0000 (11:59 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Thu, 7 Sep 2017 18:59:42 +0000 (11:59 -0700)
Pull block layer updates from Jens Axboe:
 "This is the first pull request for 4.14, containing most of the code
  changes. It's a quiet series this round, which I think we needed after
  the churn of the last few series. This contains:

   - Fix for a registration race in loop, from Anton Volkov.

   - Overflow complaint fix from Arnd for DAC960.

   - Series of drbd changes from the usual suspects.

   - Conversion of the stec/skd driver to blk-mq. From Bart.

   - A few BFQ improvements/fixes from Paolo.

   - CFQ improvement from Ritesh, allowing idling for group idle.

   - A few fixes found by Dan's smatch, courtesy of Dan.

   - A warning fixup for a race between changing the IO scheduler and
     device remova. From David Jeffery.

   - A few nbd fixes from Josef.

   - Support for cgroup info in blktrace, from Shaohua.

   - Also from Shaohua, new features in the null_blk driver to allow it
     to actually hold data, among other things.

   - Various corner cases and error handling fixes from Weiping Zhang.

   - Improvements to the IO stats tracking for blk-mq from me. Can
     drastically improve performance for fast devices and/or big
     machines.

   - Series from Christoph removing bi_bdev as being needed for IO
     submission, in preparation for nvme multipathing code.

   - Series from Bart, including various cleanups and fixes for switch
     fall through case complaints"

* 'for-4.14/block' of git://git.kernel.dk/linux-block: (162 commits)
  kernfs: checking for IS_ERR() instead of NULL
  drbd: remove BIOSET_NEED_RESCUER flag from drbd_{md_,}io_bio_set
  drbd: Fix allyesconfig build, fix recent commit
  drbd: switch from kmalloc() to kmalloc_array()
  drbd: abort drbd_start_resync if there is no connection
  drbd: move global variables to drbd namespace and make some static
  drbd: rename "usermode_helper" to "drbd_usermode_helper"
  drbd: fix race between handshake and admin disconnect/down
  drbd: fix potential deadlock when trying to detach during handshake
  drbd: A single dot should be put into a sequence.
  drbd: fix rmmod cleanup, remove _all_ debugfs entries
  drbd: Use setup_timer() instead of init_timer() to simplify the code.
  drbd: fix potential get_ldev/put_ldev refcount imbalance during attach
  drbd: new disk-option disable-write-same
  drbd: Fix resource role for newly created resources in events2
  drbd: mark symbols static where possible
  drbd: Send P_NEG_ACK upon write error in protocol != C
  drbd: add explicit plugging when submitting batches
  drbd: change list_for_each_safe to while(list_first_entry_or_null)
  drbd: introduce drbd_recv_header_maybe_unplug
  ...

40 files changed:
1  2 
MAINTAINERS
arch/powerpc/sysdev/axonram.c
block/bfq-iosched.h
block/bio-integrity.c
block/blk-mq-debugfs.c
block/blk-mq.c
block/blk-throttle.c
block/genhd.c
drivers/block/Kconfig
drivers/block/brd.c
drivers/block/loop.c
drivers/block/null_blk.c
drivers/block/virtio_blk.c
drivers/block/xen-blkback/xenbus.c
drivers/block/xen-blkfront.c
drivers/block/zram/zram_drv.c
drivers/md/dm-crypt.c
drivers/md/dm-mpath.c
drivers/md/dm.c
drivers/md/md.c
drivers/md/raid5-cache.c
drivers/nvme/host/core.c
drivers/nvme/host/rdma.c
fs/btrfs/disk-io.c
fs/btrfs/raid56.c
fs/btrfs/volumes.c
fs/buffer.c
fs/gfs2/lops.c
fs/gfs2/meta_io.c
fs/gfs2/ops_fstype.c
fs/iomap.c
fs/kernfs/file.c
fs/ocfs2/cluster/heartbeat.c
fs/xfs/xfs_aops.c
include/linux/bio.h
include/linux/blkdev.h
include/linux/cgroup.h
include/linux/fs.h
kernel/cgroup/cgroup.c
mm/page_io.c

diff --cc MAINTAINERS
Simple merge
Simple merge
Simple merge
@@@ -387,11 -385,12 +385,11 @@@ static void bio_integrity_verify_fn(str
   */
  bool __bio_integrity_endio(struct bio *bio)
  {
-       struct blk_integrity *bi = bdev_get_integrity(bio->bi_bdev);
+       struct blk_integrity *bi = blk_get_integrity(bio->bi_disk);
 +      struct bio_integrity_payload *bip = bio_integrity(bio);
  
        if (bio_op(bio) == REQ_OP_READ && !bio->bi_status &&
 -          bi->profile->verify_fn) {
 -              struct bio_integrity_payload *bip = bio_integrity(bio);
 -
 +          (bip->bip_flags & BIP_BLOCK_INTEGRITY) && bi->profile->verify_fn) {
                INIT_WORK(&bip->bip_work, bio_integrity_verify_fn);
                queue_work(kintegrityd_wq, &bip->bip_work);
                return false;
Simple merge
diff --cc block/blk-mq.c
Simple merge
Simple merge
diff --cc block/genhd.c
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
@@@ -270,349 -270,6 +270,349 @@@ static ssize_t mem_used_max_store(struc
        return len;
  }
  
-       bio->bi_bdev = zram->bdev;
 +#ifdef CONFIG_ZRAM_WRITEBACK
 +static bool zram_wb_enabled(struct zram *zram)
 +{
 +      return zram->backing_dev;
 +}
 +
 +static void reset_bdev(struct zram *zram)
 +{
 +      struct block_device *bdev;
 +
 +      if (!zram_wb_enabled(zram))
 +              return;
 +
 +      bdev = zram->bdev;
 +      if (zram->old_block_size)
 +              set_blocksize(bdev, zram->old_block_size);
 +      blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL);
 +      /* hope filp_close flush all of IO */
 +      filp_close(zram->backing_dev, NULL);
 +      zram->backing_dev = NULL;
 +      zram->old_block_size = 0;
 +      zram->bdev = NULL;
 +
 +      kvfree(zram->bitmap);
 +      zram->bitmap = NULL;
 +}
 +
 +static ssize_t backing_dev_show(struct device *dev,
 +              struct device_attribute *attr, char *buf)
 +{
 +      struct zram *zram = dev_to_zram(dev);
 +      struct file *file = zram->backing_dev;
 +      char *p;
 +      ssize_t ret;
 +
 +      down_read(&zram->init_lock);
 +      if (!zram_wb_enabled(zram)) {
 +              memcpy(buf, "none\n", 5);
 +              up_read(&zram->init_lock);
 +              return 5;
 +      }
 +
 +      p = file_path(file, buf, PAGE_SIZE - 1);
 +      if (IS_ERR(p)) {
 +              ret = PTR_ERR(p);
 +              goto out;
 +      }
 +
 +      ret = strlen(p);
 +      memmove(buf, p, ret);
 +      buf[ret++] = '\n';
 +out:
 +      up_read(&zram->init_lock);
 +      return ret;
 +}
 +
 +static ssize_t backing_dev_store(struct device *dev,
 +              struct device_attribute *attr, const char *buf, size_t len)
 +{
 +      char *file_name;
 +      struct file *backing_dev = NULL;
 +      struct inode *inode;
 +      struct address_space *mapping;
 +      unsigned int bitmap_sz, old_block_size = 0;
 +      unsigned long nr_pages, *bitmap = NULL;
 +      struct block_device *bdev = NULL;
 +      int err;
 +      struct zram *zram = dev_to_zram(dev);
 +
 +      file_name = kmalloc(PATH_MAX, GFP_KERNEL);
 +      if (!file_name)
 +              return -ENOMEM;
 +
 +      down_write(&zram->init_lock);
 +      if (init_done(zram)) {
 +              pr_info("Can't setup backing device for initialized device\n");
 +              err = -EBUSY;
 +              goto out;
 +      }
 +
 +      strlcpy(file_name, buf, len);
 +
 +      backing_dev = filp_open(file_name, O_RDWR|O_LARGEFILE, 0);
 +      if (IS_ERR(backing_dev)) {
 +              err = PTR_ERR(backing_dev);
 +              backing_dev = NULL;
 +              goto out;
 +      }
 +
 +      mapping = backing_dev->f_mapping;
 +      inode = mapping->host;
 +
 +      /* Support only block device in this moment */
 +      if (!S_ISBLK(inode->i_mode)) {
 +              err = -ENOTBLK;
 +              goto out;
 +      }
 +
 +      bdev = bdgrab(I_BDEV(inode));
 +      err = blkdev_get(bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL, zram);
 +      if (err < 0)
 +              goto out;
 +
 +      nr_pages = i_size_read(inode) >> PAGE_SHIFT;
 +      bitmap_sz = BITS_TO_LONGS(nr_pages) * sizeof(long);
 +      bitmap = kvzalloc(bitmap_sz, GFP_KERNEL);
 +      if (!bitmap) {
 +              err = -ENOMEM;
 +              goto out;
 +      }
 +
 +      old_block_size = block_size(bdev);
 +      err = set_blocksize(bdev, PAGE_SIZE);
 +      if (err)
 +              goto out;
 +
 +      reset_bdev(zram);
 +      spin_lock_init(&zram->bitmap_lock);
 +
 +      zram->old_block_size = old_block_size;
 +      zram->bdev = bdev;
 +      zram->backing_dev = backing_dev;
 +      zram->bitmap = bitmap;
 +      zram->nr_pages = nr_pages;
 +      up_write(&zram->init_lock);
 +
 +      pr_info("setup backing device %s\n", file_name);
 +      kfree(file_name);
 +
 +      return len;
 +out:
 +      if (bitmap)
 +              kvfree(bitmap);
 +
 +      if (bdev)
 +              blkdev_put(bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL);
 +
 +      if (backing_dev)
 +              filp_close(backing_dev, NULL);
 +
 +      up_write(&zram->init_lock);
 +
 +      kfree(file_name);
 +
 +      return err;
 +}
 +
 +static unsigned long get_entry_bdev(struct zram *zram)
 +{
 +      unsigned long entry;
 +
 +      spin_lock(&zram->bitmap_lock);
 +      /* skip 0 bit to confuse zram.handle = 0 */
 +      entry = find_next_zero_bit(zram->bitmap, zram->nr_pages, 1);
 +      if (entry == zram->nr_pages) {
 +              spin_unlock(&zram->bitmap_lock);
 +              return 0;
 +      }
 +
 +      set_bit(entry, zram->bitmap);
 +      spin_unlock(&zram->bitmap_lock);
 +
 +      return entry;
 +}
 +
 +static void put_entry_bdev(struct zram *zram, unsigned long entry)
 +{
 +      int was_set;
 +
 +      spin_lock(&zram->bitmap_lock);
 +      was_set = test_and_clear_bit(entry, zram->bitmap);
 +      spin_unlock(&zram->bitmap_lock);
 +      WARN_ON_ONCE(!was_set);
 +}
 +
 +void zram_page_end_io(struct bio *bio)
 +{
 +      struct page *page = bio->bi_io_vec[0].bv_page;
 +
 +      page_endio(page, op_is_write(bio_op(bio)),
 +                      blk_status_to_errno(bio->bi_status));
 +      bio_put(bio);
 +}
 +
 +/*
 + * Returns 1 if the submission is successful.
 + */
 +static int read_from_bdev_async(struct zram *zram, struct bio_vec *bvec,
 +                      unsigned long entry, struct bio *parent)
 +{
 +      struct bio *bio;
 +
 +      bio = bio_alloc(GFP_ATOMIC, 1);
 +      if (!bio)
 +              return -ENOMEM;
 +
 +      bio->bi_iter.bi_sector = entry * (PAGE_SIZE >> 9);
-       bio->bi_bdev = zram->bdev;
++      bio_set_dev(bio, zram->bdev);
 +      if (!bio_add_page(bio, bvec->bv_page, bvec->bv_len, bvec->bv_offset)) {
 +              bio_put(bio);
 +              return -EIO;
 +      }
 +
 +      if (!parent) {
 +              bio->bi_opf = REQ_OP_READ;
 +              bio->bi_end_io = zram_page_end_io;
 +      } else {
 +              bio->bi_opf = parent->bi_opf;
 +              bio_chain(bio, parent);
 +      }
 +
 +      submit_bio(bio);
 +      return 1;
 +}
 +
 +struct zram_work {
 +      struct work_struct work;
 +      struct zram *zram;
 +      unsigned long entry;
 +      struct bio *bio;
 +};
 +
 +#if PAGE_SIZE != 4096
 +static void zram_sync_read(struct work_struct *work)
 +{
 +      struct bio_vec bvec;
 +      struct zram_work *zw = container_of(work, struct zram_work, work);
 +      struct zram *zram = zw->zram;
 +      unsigned long entry = zw->entry;
 +      struct bio *bio = zw->bio;
 +
 +      read_from_bdev_async(zram, &bvec, entry, bio);
 +}
 +
 +/*
 + * Block layer want one ->make_request_fn to be active at a time
 + * so if we use chained IO with parent IO in same context,
 + * it's a deadlock. To avoid, it, it uses worker thread context.
 + */
 +static int read_from_bdev_sync(struct zram *zram, struct bio_vec *bvec,
 +                              unsigned long entry, struct bio *bio)
 +{
 +      struct zram_work work;
 +
 +      work.zram = zram;
 +      work.entry = entry;
 +      work.bio = bio;
 +
 +      INIT_WORK_ONSTACK(&work.work, zram_sync_read);
 +      queue_work(system_unbound_wq, &work.work);
 +      flush_work(&work.work);
 +      destroy_work_on_stack(&work.work);
 +
 +      return 1;
 +}
 +#else
 +static int read_from_bdev_sync(struct zram *zram, struct bio_vec *bvec,
 +                              unsigned long entry, struct bio *bio)
 +{
 +      WARN_ON(1);
 +      return -EIO;
 +}
 +#endif
 +
 +static int read_from_bdev(struct zram *zram, struct bio_vec *bvec,
 +                      unsigned long entry, struct bio *parent, bool sync)
 +{
 +      if (sync)
 +              return read_from_bdev_sync(zram, bvec, entry, parent);
 +      else
 +              return read_from_bdev_async(zram, bvec, entry, parent);
 +}
 +
 +static int write_to_bdev(struct zram *zram, struct bio_vec *bvec,
 +                                      u32 index, struct bio *parent,
 +                                      unsigned long *pentry)
 +{
 +      struct bio *bio;
 +      unsigned long entry;
 +
 +      bio = bio_alloc(GFP_ATOMIC, 1);
 +      if (!bio)
 +              return -ENOMEM;
 +
 +      entry = get_entry_bdev(zram);
 +      if (!entry) {
 +              bio_put(bio);
 +              return -ENOSPC;
 +      }
 +
 +      bio->bi_iter.bi_sector = entry * (PAGE_SIZE >> 9);
++      bio_set_dev(bio, zram->bdev);
 +      if (!bio_add_page(bio, bvec->bv_page, bvec->bv_len,
 +                                      bvec->bv_offset)) {
 +              bio_put(bio);
 +              put_entry_bdev(zram, entry);
 +              return -EIO;
 +      }
 +
 +      if (!parent) {
 +              bio->bi_opf = REQ_OP_WRITE | REQ_SYNC;
 +              bio->bi_end_io = zram_page_end_io;
 +      } else {
 +              bio->bi_opf = parent->bi_opf;
 +              bio_chain(bio, parent);
 +      }
 +
 +      submit_bio(bio);
 +      *pentry = entry;
 +
 +      return 0;
 +}
 +
 +static void zram_wb_clear(struct zram *zram, u32 index)
 +{
 +      unsigned long entry;
 +
 +      zram_clear_flag(zram, index, ZRAM_WB);
 +      entry = zram_get_element(zram, index);
 +      zram_set_element(zram, index, 0);
 +      put_entry_bdev(zram, entry);
 +}
 +
 +#else
 +static bool zram_wb_enabled(struct zram *zram) { return false; }
 +static inline void reset_bdev(struct zram *zram) {};
 +static int write_to_bdev(struct zram *zram, struct bio_vec *bvec,
 +                                      u32 index, struct bio *parent,
 +                                      unsigned long *pentry)
 +
 +{
 +      return -EIO;
 +}
 +
 +static int read_from_bdev(struct zram *zram, struct bio_vec *bvec,
 +                      unsigned long entry, struct bio *parent, bool sync)
 +{
 +      return -EIO;
 +}
 +static void zram_wb_clear(struct zram *zram, u32 index) {}
 +#endif
 +
 +
  /*
   * We switched to per-cpu streams and this attr is not needed anymore.
   * However, we will keep it around for some time, because:
@@@ -1182,12 -825,12 +1183,12 @@@ static int zram_bvec_rw(struct zram *zr
                flush_dcache_page(bvec->bv_page);
        } else {
                atomic64_inc(&zram->stats.num_writes);
 -              ret = zram_bvec_write(zram, bvec, index, offset);
 +              ret = zram_bvec_write(zram, bvec, index, offset, bio);
        }
  
-       generic_end_io_acct(rw_acct, &zram->disk->part0, start_time);
+       generic_end_io_acct(q, rw_acct, &zram->disk->part0, start_time);
  
 -      if (unlikely(ret)) {
 +      if (unlikely(ret < 0)) {
                if (!is_write)
                        atomic64_inc(&zram->stats.failed_reads);
                else
Simple merge
Simple merge
diff --cc drivers/md/dm.c
Simple merge
diff --cc drivers/md/md.c
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
diff --cc fs/buffer.c
Simple merge
diff --cc fs/gfs2/lops.c
Simple merge
Simple merge
Simple merge
diff --cc fs/iomap.c
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
diff --cc mm/page_io.c
  static struct bio *get_swap_bio(gfp_t gfp_flags,
                                struct page *page, bio_end_io_t end_io)
  {
 +      int i, nr = hpage_nr_pages(page);
        struct bio *bio;
  
 -      bio = bio_alloc(gfp_flags, 1);
 +      bio = bio_alloc(gfp_flags, nr);
        if (bio) {
-               bio->bi_iter.bi_sector = map_swap_page(page, &bio->bi_bdev);
+               struct block_device *bdev;
+               bio->bi_iter.bi_sector = map_swap_page(page, &bdev);
+               bio_set_dev(bio, bdev);
                bio->bi_iter.bi_sector <<= PAGE_SHIFT - 9;
                bio->bi_end_io = end_io;
  
@@@ -390,12 -378,7 +391,12 @@@ int swap_readpage(struct page *page, bo
                ret = -ENOMEM;
                goto out;
        }
-       bdev = bio->bi_bdev;
+       disk = bio->bi_disk;
 +      /*
 +       * Keep this task valid during swap readpage because the oom killer may
 +       * attempt to access it in the page fault retry time check.
 +       */
 +      get_task_struct(current);
        bio->bi_private = current;
        bio_set_op_attrs(bio, REQ_OP_READ, 0);
        count_vm_event(PSWPIN);