From: Linus Torvalds Date: Thu, 7 Sep 2017 18:59:42 +0000 (-0700) Subject: Merge branch 'for-4.14/block' of git://git.kernel.dk/linux-block X-Git-Tag: v4.14-rc1~117 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=a0725ab0c7536076d5477264420ef420ebb64501;p=platform%2Fkernel%2Flinux-exynos.git Merge branch 'for-4.14/block' of git://git.kernel.dk/linux-block Pull block layer updates from Jens Axboe: "This is the first pull request for 4.14, containing most of the code changes. It's a quiet series this round, which I think we needed after the churn of the last few series. This contains: - Fix for a registration race in loop, from Anton Volkov. - Overflow complaint fix from Arnd for DAC960. - Series of drbd changes from the usual suspects. - Conversion of the stec/skd driver to blk-mq. From Bart. - A few BFQ improvements/fixes from Paolo. - CFQ improvement from Ritesh, allowing idling for group idle. - A few fixes found by Dan's smatch, courtesy of Dan. - A warning fixup for a race between changing the IO scheduler and device remova. From David Jeffery. - A few nbd fixes from Josef. - Support for cgroup info in blktrace, from Shaohua. - Also from Shaohua, new features in the null_blk driver to allow it to actually hold data, among other things. - Various corner cases and error handling fixes from Weiping Zhang. - Improvements to the IO stats tracking for blk-mq from me. Can drastically improve performance for fast devices and/or big machines. - Series from Christoph removing bi_bdev as being needed for IO submission, in preparation for nvme multipathing code. - Series from Bart, including various cleanups and fixes for switch fall through case complaints" * 'for-4.14/block' of git://git.kernel.dk/linux-block: (162 commits) kernfs: checking for IS_ERR() instead of NULL drbd: remove BIOSET_NEED_RESCUER flag from drbd_{md_,}io_bio_set drbd: Fix allyesconfig build, fix recent commit drbd: switch from kmalloc() to kmalloc_array() drbd: abort drbd_start_resync if there is no connection drbd: move global variables to drbd namespace and make some static drbd: rename "usermode_helper" to "drbd_usermode_helper" drbd: fix race between handshake and admin disconnect/down drbd: fix potential deadlock when trying to detach during handshake drbd: A single dot should be put into a sequence. drbd: fix rmmod cleanup, remove _all_ debugfs entries drbd: Use setup_timer() instead of init_timer() to simplify the code. drbd: fix potential get_ldev/put_ldev refcount imbalance during attach drbd: new disk-option disable-write-same drbd: Fix resource role for newly created resources in events2 drbd: mark symbols static where possible drbd: Send P_NEG_ACK upon write error in protocol != C drbd: add explicit plugging when submitting batches drbd: change list_for_each_safe to while(list_first_entry_or_null) drbd: introduce drbd_recv_header_maybe_unplug ... --- a0725ab0c7536076d5477264420ef420ebb64501 diff --cc block/bio-integrity.c index 9b1ea47,553d75e..5df3290 --- a/block/bio-integrity.c +++ b/block/bio-integrity.c @@@ -387,11 -385,12 +385,11 @@@ static void bio_integrity_verify_fn(str */ bool __bio_integrity_endio(struct bio *bio) { - struct blk_integrity *bi = bdev_get_integrity(bio->bi_bdev); + struct blk_integrity *bi = blk_get_integrity(bio->bi_disk); + struct bio_integrity_payload *bip = bio_integrity(bio); if (bio_op(bio) == REQ_OP_READ && !bio->bi_status && - bi->profile->verify_fn) { - struct bio_integrity_payload *bip = bio_integrity(bio); - + (bip->bip_flags & BIP_BLOCK_INTEGRITY) && bi->profile->verify_fn) { INIT_WORK(&bip->bip_work, bio_integrity_verify_fn); queue_work(kintegrityd_wq, &bip->bip_work); return false; diff --cc drivers/block/zram/zram_drv.c index 4a0438c,1c3383b..4063f3f --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@@ -270,349 -270,6 +270,349 @@@ static ssize_t mem_used_max_store(struc return len; } +#ifdef CONFIG_ZRAM_WRITEBACK +static bool zram_wb_enabled(struct zram *zram) +{ + return zram->backing_dev; +} + +static void reset_bdev(struct zram *zram) +{ + struct block_device *bdev; + + if (!zram_wb_enabled(zram)) + return; + + bdev = zram->bdev; + if (zram->old_block_size) + set_blocksize(bdev, zram->old_block_size); + blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL); + /* hope filp_close flush all of IO */ + filp_close(zram->backing_dev, NULL); + zram->backing_dev = NULL; + zram->old_block_size = 0; + zram->bdev = NULL; + + kvfree(zram->bitmap); + zram->bitmap = NULL; +} + +static ssize_t backing_dev_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct zram *zram = dev_to_zram(dev); + struct file *file = zram->backing_dev; + char *p; + ssize_t ret; + + down_read(&zram->init_lock); + if (!zram_wb_enabled(zram)) { + memcpy(buf, "none\n", 5); + up_read(&zram->init_lock); + return 5; + } + + p = file_path(file, buf, PAGE_SIZE - 1); + if (IS_ERR(p)) { + ret = PTR_ERR(p); + goto out; + } + + ret = strlen(p); + memmove(buf, p, ret); + buf[ret++] = '\n'; +out: + up_read(&zram->init_lock); + return ret; +} + +static ssize_t backing_dev_store(struct device *dev, + struct device_attribute *attr, const char *buf, size_t len) +{ + char *file_name; + struct file *backing_dev = NULL; + struct inode *inode; + struct address_space *mapping; + unsigned int bitmap_sz, old_block_size = 0; + unsigned long nr_pages, *bitmap = NULL; + struct block_device *bdev = NULL; + int err; + struct zram *zram = dev_to_zram(dev); + + file_name = kmalloc(PATH_MAX, GFP_KERNEL); + if (!file_name) + return -ENOMEM; + + down_write(&zram->init_lock); + if (init_done(zram)) { + pr_info("Can't setup backing device for initialized device\n"); + err = -EBUSY; + goto out; + } + + strlcpy(file_name, buf, len); + + backing_dev = filp_open(file_name, O_RDWR|O_LARGEFILE, 0); + if (IS_ERR(backing_dev)) { + err = PTR_ERR(backing_dev); + backing_dev = NULL; + goto out; + } + + mapping = backing_dev->f_mapping; + inode = mapping->host; + + /* Support only block device in this moment */ + if (!S_ISBLK(inode->i_mode)) { + err = -ENOTBLK; + goto out; + } + + bdev = bdgrab(I_BDEV(inode)); + err = blkdev_get(bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL, zram); + if (err < 0) + goto out; + + nr_pages = i_size_read(inode) >> PAGE_SHIFT; + bitmap_sz = BITS_TO_LONGS(nr_pages) * sizeof(long); + bitmap = kvzalloc(bitmap_sz, GFP_KERNEL); + if (!bitmap) { + err = -ENOMEM; + goto out; + } + + old_block_size = block_size(bdev); + err = set_blocksize(bdev, PAGE_SIZE); + if (err) + goto out; + + reset_bdev(zram); + spin_lock_init(&zram->bitmap_lock); + + zram->old_block_size = old_block_size; + zram->bdev = bdev; + zram->backing_dev = backing_dev; + zram->bitmap = bitmap; + zram->nr_pages = nr_pages; + up_write(&zram->init_lock); + + pr_info("setup backing device %s\n", file_name); + kfree(file_name); + + return len; +out: + if (bitmap) + kvfree(bitmap); + + if (bdev) + blkdev_put(bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL); + + if (backing_dev) + filp_close(backing_dev, NULL); + + up_write(&zram->init_lock); + + kfree(file_name); + + return err; +} + +static unsigned long get_entry_bdev(struct zram *zram) +{ + unsigned long entry; + + spin_lock(&zram->bitmap_lock); + /* skip 0 bit to confuse zram.handle = 0 */ + entry = find_next_zero_bit(zram->bitmap, zram->nr_pages, 1); + if (entry == zram->nr_pages) { + spin_unlock(&zram->bitmap_lock); + return 0; + } + + set_bit(entry, zram->bitmap); + spin_unlock(&zram->bitmap_lock); + + return entry; +} + +static void put_entry_bdev(struct zram *zram, unsigned long entry) +{ + int was_set; + + spin_lock(&zram->bitmap_lock); + was_set = test_and_clear_bit(entry, zram->bitmap); + spin_unlock(&zram->bitmap_lock); + WARN_ON_ONCE(!was_set); +} + +void zram_page_end_io(struct bio *bio) +{ + struct page *page = bio->bi_io_vec[0].bv_page; + + page_endio(page, op_is_write(bio_op(bio)), + blk_status_to_errno(bio->bi_status)); + bio_put(bio); +} + +/* + * Returns 1 if the submission is successful. + */ +static int read_from_bdev_async(struct zram *zram, struct bio_vec *bvec, + unsigned long entry, struct bio *parent) +{ + struct bio *bio; + + bio = bio_alloc(GFP_ATOMIC, 1); + if (!bio) + return -ENOMEM; + + bio->bi_iter.bi_sector = entry * (PAGE_SIZE >> 9); - bio->bi_bdev = zram->bdev; ++ bio_set_dev(bio, zram->bdev); + if (!bio_add_page(bio, bvec->bv_page, bvec->bv_len, bvec->bv_offset)) { + bio_put(bio); + return -EIO; + } + + if (!parent) { + bio->bi_opf = REQ_OP_READ; + bio->bi_end_io = zram_page_end_io; + } else { + bio->bi_opf = parent->bi_opf; + bio_chain(bio, parent); + } + + submit_bio(bio); + return 1; +} + +struct zram_work { + struct work_struct work; + struct zram *zram; + unsigned long entry; + struct bio *bio; +}; + +#if PAGE_SIZE != 4096 +static void zram_sync_read(struct work_struct *work) +{ + struct bio_vec bvec; + struct zram_work *zw = container_of(work, struct zram_work, work); + struct zram *zram = zw->zram; + unsigned long entry = zw->entry; + struct bio *bio = zw->bio; + + read_from_bdev_async(zram, &bvec, entry, bio); +} + +/* + * Block layer want one ->make_request_fn to be active at a time + * so if we use chained IO with parent IO in same context, + * it's a deadlock. To avoid, it, it uses worker thread context. + */ +static int read_from_bdev_sync(struct zram *zram, struct bio_vec *bvec, + unsigned long entry, struct bio *bio) +{ + struct zram_work work; + + work.zram = zram; + work.entry = entry; + work.bio = bio; + + INIT_WORK_ONSTACK(&work.work, zram_sync_read); + queue_work(system_unbound_wq, &work.work); + flush_work(&work.work); + destroy_work_on_stack(&work.work); + + return 1; +} +#else +static int read_from_bdev_sync(struct zram *zram, struct bio_vec *bvec, + unsigned long entry, struct bio *bio) +{ + WARN_ON(1); + return -EIO; +} +#endif + +static int read_from_bdev(struct zram *zram, struct bio_vec *bvec, + unsigned long entry, struct bio *parent, bool sync) +{ + if (sync) + return read_from_bdev_sync(zram, bvec, entry, parent); + else + return read_from_bdev_async(zram, bvec, entry, parent); +} + +static int write_to_bdev(struct zram *zram, struct bio_vec *bvec, + u32 index, struct bio *parent, + unsigned long *pentry) +{ + struct bio *bio; + unsigned long entry; + + bio = bio_alloc(GFP_ATOMIC, 1); + if (!bio) + return -ENOMEM; + + entry = get_entry_bdev(zram); + if (!entry) { + bio_put(bio); + return -ENOSPC; + } + + bio->bi_iter.bi_sector = entry * (PAGE_SIZE >> 9); - bio->bi_bdev = zram->bdev; ++ bio_set_dev(bio, zram->bdev); + if (!bio_add_page(bio, bvec->bv_page, bvec->bv_len, + bvec->bv_offset)) { + bio_put(bio); + put_entry_bdev(zram, entry); + return -EIO; + } + + if (!parent) { + bio->bi_opf = REQ_OP_WRITE | REQ_SYNC; + bio->bi_end_io = zram_page_end_io; + } else { + bio->bi_opf = parent->bi_opf; + bio_chain(bio, parent); + } + + submit_bio(bio); + *pentry = entry; + + return 0; +} + +static void zram_wb_clear(struct zram *zram, u32 index) +{ + unsigned long entry; + + zram_clear_flag(zram, index, ZRAM_WB); + entry = zram_get_element(zram, index); + zram_set_element(zram, index, 0); + put_entry_bdev(zram, entry); +} + +#else +static bool zram_wb_enabled(struct zram *zram) { return false; } +static inline void reset_bdev(struct zram *zram) {}; +static int write_to_bdev(struct zram *zram, struct bio_vec *bvec, + u32 index, struct bio *parent, + unsigned long *pentry) + +{ + return -EIO; +} + +static int read_from_bdev(struct zram *zram, struct bio_vec *bvec, + unsigned long entry, struct bio *parent, bool sync) +{ + return -EIO; +} +static void zram_wb_clear(struct zram *zram, u32 index) {} +#endif + + /* * We switched to per-cpu streams and this attr is not needed anymore. * However, we will keep it around for some time, because: @@@ -1182,12 -825,12 +1183,12 @@@ static int zram_bvec_rw(struct zram *zr flush_dcache_page(bvec->bv_page); } else { atomic64_inc(&zram->stats.num_writes); - ret = zram_bvec_write(zram, bvec, index, offset); + ret = zram_bvec_write(zram, bvec, index, offset, bio); } - generic_end_io_acct(rw_acct, &zram->disk->part0, start_time); + generic_end_io_acct(q, rw_acct, &zram->disk->part0, start_time); - if (unlikely(ret)) { + if (unlikely(ret < 0)) { if (!is_write) atomic64_inc(&zram->stats.failed_reads); else diff --cc mm/page_io.c index 20139b9,9cf1bc7..21502d3 --- a/mm/page_io.c +++ b/mm/page_io.c @@@ -28,12 -27,14 +28,15 @@@ static struct bio *get_swap_bio(gfp_t gfp_flags, struct page *page, bio_end_io_t end_io) { + int i, nr = hpage_nr_pages(page); struct bio *bio; - bio = bio_alloc(gfp_flags, 1); + bio = bio_alloc(gfp_flags, nr); if (bio) { - bio->bi_iter.bi_sector = map_swap_page(page, &bio->bi_bdev); + struct block_device *bdev; + + bio->bi_iter.bi_sector = map_swap_page(page, &bdev); + bio_set_dev(bio, bdev); bio->bi_iter.bi_sector <<= PAGE_SHIFT - 9; bio->bi_end_io = end_io; @@@ -390,12 -378,7 +391,12 @@@ int swap_readpage(struct page *page, bo ret = -ENOMEM; goto out; } - bdev = bio->bi_bdev; + disk = bio->bi_disk; + /* + * Keep this task valid during swap readpage because the oom killer may + * attempt to access it in the page fault retry time check. + */ + get_task_struct(current); bio->bi_private = current; bio_set_op_attrs(bio, REQ_OP_READ, 0); count_vm_event(PSWPIN);