btrfs: unify buffered and direct I/O read repair
authorOmar Sandoval <osandov@fb.com>
Thu, 16 Apr 2020 21:46:25 +0000 (14:46 -0700)
committerDavid Sterba <dsterba@suse.com>
Mon, 25 May 2020 09:25:27 +0000 (11:25 +0200)
Currently, direct I/O has its own versions of bio_readpage_error() and
btrfs_check_repairable() (dio_read_error() and
btrfs_check_dio_repairable(), respectively). The main difference is that
the direct I/O version doesn't do read validation. The rework of direct
I/O repair makes it possible to do validation, so we can get rid of
btrfs_check_dio_repairable() and combine bio_readpage_error() and
dio_read_error() into a new helper, btrfs_submit_read_repair().

Reviewed-by: Josef Bacik <josef@toxicpanda.com>
Signed-off-by: Omar Sandoval <osandov@fb.com>
Signed-off-by: David Sterba <dsterba@suse.com>
fs/btrfs/extent_io.c
fs/btrfs/extent_io.h
fs/btrfs/inode.c

index 2b1f0be..22db0b2 100644 (file)
@@ -2601,46 +2601,10 @@ static bool btrfs_check_repairable(struct inode *inode, bool needs_validation,
        return true;
 }
 
-
-struct bio *btrfs_create_repair_bio(struct inode *inode, struct bio *failed_bio,
-                                   struct io_failure_record *failrec,
-                                   struct page *page, int pg_offset, int icsum,
-                                   bio_end_io_t *endio_func, void *data)
-{
-       struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
-       struct bio *bio;
-       struct btrfs_io_bio *btrfs_failed_bio;
-       struct btrfs_io_bio *btrfs_bio;
-
-       bio = btrfs_io_bio_alloc(1);
-       bio->bi_end_io = endio_func;
-       bio->bi_iter.bi_sector = failrec->logical >> 9;
-       bio->bi_iter.bi_size = 0;
-       bio->bi_private = data;
-
-       btrfs_failed_bio = btrfs_io_bio(failed_bio);
-       if (btrfs_failed_bio->csum) {
-               u16 csum_size = btrfs_super_csum_size(fs_info->super_copy);
-
-               btrfs_bio = btrfs_io_bio(bio);
-               btrfs_bio->csum = btrfs_bio->csum_inline;
-               icsum *= csum_size;
-               memcpy(btrfs_bio->csum, btrfs_failed_bio->csum + icsum,
-                      csum_size);
-       }
-
-       bio_add_page(bio, page, failrec->len, pg_offset);
-       btrfs_io_bio(bio)->logical = failrec->start;
-       btrfs_io_bio(bio)->iter = bio->bi_iter;
-
-       return bio;
-}
-
 static bool btrfs_io_needs_validation(struct inode *inode, struct bio *bio)
 {
-       struct bio_vec *bvec;
        u64 len = 0;
-       int i;
+       const u32 blocksize = inode->i_sb->s_blocksize;
 
        /*
         * If bi_status is BLK_STS_OK, then this was a checksum error, not an
@@ -2653,72 +2617,99 @@ static bool btrfs_io_needs_validation(struct inode *inode, struct bio *bio)
        /*
         * We need to validate each sector individually if the failed I/O was
         * for multiple sectors.
+        *
+        * There are a few possible bios that can end up here:
+        * 1. A buffered read bio, which is not cloned.
+        * 2. A direct I/O read bio, which is cloned.
+        * 3. A (buffered or direct) repair bio, which is not cloned.
+        *
+        * For cloned bios (case 2), we can get the size from
+        * btrfs_io_bio->iter; for non-cloned bios (cases 1 and 3), we can get
+        * it from the bvecs.
         */
-       bio_for_each_bvec_all(bvec, bio, i) {
-               len += bvec->bv_len;
-               if (len > inode->i_sb->s_blocksize)
+       if (bio_flagged(bio, BIO_CLONED)) {
+               if (btrfs_io_bio(bio)->iter.bi_size > blocksize)
                        return true;
+       } else {
+               struct bio_vec *bvec;
+               int i;
+
+               bio_for_each_bvec_all(bvec, bio, i) {
+                       len += bvec->bv_len;
+                       if (len > blocksize)
+                               return true;
+               }
        }
        return false;
 }
 
-/*
- * This is a generic handler for readpage errors. If other copies exist, read
- * those and write back good data to the failed position. Does not investigate
- * in remapping the failed extent elsewhere, hoping the device will be smart
- * enough to do this as needed
- */
-static int bio_readpage_error(struct bio *failed_bio, u64 phy_offset,
-                             struct page *page, u64 start, u64 end,
-                             int failed_mirror)
+blk_status_t btrfs_submit_read_repair(struct inode *inode,
+                                     struct bio *failed_bio, u64 phy_offset,
+                                     struct page *page, unsigned int pgoff,
+                                     u64 start, u64 end, int failed_mirror,
+                                     submit_bio_hook_t *submit_bio_hook)
 {
        struct io_failure_record *failrec;
-       struct inode *inode = page->mapping->host;
+       struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
        struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree;
        struct extent_io_tree *failure_tree = &BTRFS_I(inode)->io_failure_tree;
+       struct btrfs_io_bio *failed_io_bio = btrfs_io_bio(failed_bio);
+       const int icsum = phy_offset >> inode->i_sb->s_blocksize_bits;
        bool need_validation;
-       struct bio *bio;
-       int read_mode = 0;
+       struct bio *repair_bio;
+       struct btrfs_io_bio *repair_io_bio;
        blk_status_t status;
        int ret;
 
+       btrfs_debug(fs_info,
+                  "repair read error: read error at %llu", start);
+
        BUG_ON(bio_op(failed_bio) == REQ_OP_WRITE);
 
        ret = btrfs_get_io_failure_record(inode, start, end, &failrec);
        if (ret)
-               return ret;
+               return errno_to_blk_status(ret);
 
        need_validation = btrfs_io_needs_validation(inode, failed_bio);
 
        if (!btrfs_check_repairable(inode, need_validation, failrec,
                                    failed_mirror)) {
                free_io_failure(failure_tree, tree, failrec);
-               return -EIO;
+               return BLK_STS_IOERR;
        }
 
+       repair_bio = btrfs_io_bio_alloc(1);
+       repair_io_bio = btrfs_io_bio(repair_bio);
+       repair_bio->bi_opf = REQ_OP_READ;
        if (need_validation)
-               read_mode |= REQ_FAILFAST_DEV;
+               repair_bio->bi_opf |= REQ_FAILFAST_DEV;
+       repair_bio->bi_end_io = failed_bio->bi_end_io;
+       repair_bio->bi_iter.bi_sector = failrec->logical >> 9;
+       repair_bio->bi_private = failed_bio->bi_private;
 
-       phy_offset >>= inode->i_sb->s_blocksize_bits;
-       bio = btrfs_create_repair_bio(inode, failed_bio, failrec, page,
-                                     start - page_offset(page),
-                                     (int)phy_offset, failed_bio->bi_end_io,
-                                     NULL);
-       bio->bi_opf = REQ_OP_READ | read_mode;
+       if (failed_io_bio->csum) {
+               const u16 csum_size = btrfs_super_csum_size(fs_info->super_copy);
+
+               repair_io_bio->csum = repair_io_bio->csum_inline;
+               memcpy(repair_io_bio->csum,
+                      failed_io_bio->csum + csum_size * icsum, csum_size);
+       }
+
+       bio_add_page(repair_bio, page, failrec->len, pgoff);
+       repair_io_bio->logical = failrec->start;
+       repair_io_bio->iter = repair_bio->bi_iter;
 
        btrfs_debug(btrfs_sb(inode->i_sb),
-               "Repair Read Error: submitting new read[%#x] to this_mirror=%d, in_validation=%d",
-               read_mode, failrec->this_mirror, failrec->in_validation);
+"repair read error: submitting new read to mirror %d, in_validation=%d",
+                   failrec->this_mirror, failrec->in_validation);
 
-       status = tree->ops->submit_bio_hook(tree->private_data, bio, failrec->this_mirror,
-                                        failrec->bio_flags);
+       status = submit_bio_hook(inode, repair_bio, failrec->this_mirror,
+                                failrec->bio_flags);
        if (status) {
                free_io_failure(failure_tree, tree, failrec);
-               bio_put(bio);
-               ret = blk_status_to_errno(status);
+               bio_put(repair_bio);
        }
-
-       return ret;
+       return status;
 }
 
 /* lots and lots of room for performance fixes in the end_bio funcs */
@@ -2890,9 +2881,10 @@ static void end_bio_extent_readpage(struct bio *bio)
                         * If it can't handle the error it will return -EIO and
                         * we remain responsible for that page.
                         */
-                       ret = bio_readpage_error(bio, offset, page, start, end,
-                                                mirror);
-                       if (ret == 0) {
+                       if (!btrfs_submit_read_repair(inode, bio, offset, page,
+                                               start - page_offset(page),
+                                               start, end, mirror,
+                                               tree->ops->submit_bio_hook)) {
                                uptodate = !bio->bi_status;
                                offset += len;
                                continue;
index f4dfac7..a2842b2 100644 (file)
@@ -66,6 +66,10 @@ struct btrfs_io_bio;
 struct io_failure_record;
 struct extent_io_tree;
 
+typedef blk_status_t (submit_bio_hook_t)(struct inode *inode, struct bio *bio,
+                                        int mirror_num,
+                                        unsigned long bio_flags);
+
 typedef blk_status_t (extent_submit_bio_start_t)(void *private_data,
                struct bio *bio, u64 bio_offset);
 
@@ -74,8 +78,7 @@ struct extent_io_ops {
         * The following callbacks must be always defined, the function
         * pointer will be called unconditionally.
         */
-       blk_status_t (*submit_bio_hook)(struct inode *inode, struct bio *bio,
-                                       int mirror_num, unsigned long bio_flags);
+       submit_bio_hook_t *submit_bio_hook;
        int (*readpage_end_io_hook)(struct btrfs_io_bio *io_bio, u64 phy_offset,
                                    struct page *page, u64 start, u64 end,
                                    int mirror);
@@ -312,10 +315,12 @@ struct io_failure_record {
 };
 
 
-struct bio *btrfs_create_repair_bio(struct inode *inode, struct bio *failed_bio,
-                                   struct io_failure_record *failrec,
-                                   struct page *page, int pg_offset, int icsum,
-                                   bio_end_io_t *endio_func, void *data);
+blk_status_t btrfs_submit_read_repair(struct inode *inode,
+                                     struct bio *failed_bio, u64 phy_offset,
+                                     struct page *page, unsigned int pgoff,
+                                     u64 start, u64 end, int failed_mirror,
+                                     submit_bio_hook_t *submit_bio_hook);
+
 #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
 bool find_lock_delalloc_range(struct inode *inode,
                             struct page *locked_page, u64 *start,
index bd7453f..5d56708 100644 (file)
@@ -7379,10 +7379,11 @@ static void btrfs_dio_private_put(struct btrfs_dio_private *dip)
        kfree(dip);
 }
 
-static inline blk_status_t submit_dio_repair_bio(struct inode *inode,
-                                                struct bio *bio,
-                                                int mirror_num)
+static blk_status_t submit_dio_repair_bio(struct inode *inode, struct bio *bio,
+                                         int mirror_num,
+                                         unsigned long bio_flags)
 {
+       struct btrfs_dio_private *dip = bio->bi_private;
        struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
        blk_status_t ret;
 
@@ -7392,96 +7393,11 @@ static inline blk_status_t submit_dio_repair_bio(struct inode *inode,
        if (ret)
                return ret;
 
+       refcount_inc(&dip->refs);
        ret = btrfs_map_bio(fs_info, bio, mirror_num);
-
-       return ret;
-}
-
-static int btrfs_check_dio_repairable(struct inode *inode,
-                                     struct bio *failed_bio,
-                                     struct io_failure_record *failrec,
-                                     int failed_mirror)
-{
-       struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
-       int num_copies;
-
-       num_copies = btrfs_num_copies(fs_info, failrec->logical, failrec->len);
-       if (num_copies == 1) {
-               /*
-                * we only have a single copy of the data, so don't bother with
-                * all the retry and error correction code that follows. no
-                * matter what the error is, it is very likely to persist.
-                */
-               btrfs_debug(fs_info,
-                       "Check DIO Repairable: cannot repair, num_copies=%d, next_mirror %d, failed_mirror %d",
-                       num_copies, failrec->this_mirror, failed_mirror);
-               return 0;
-       }
-
-       failrec->failed_mirror = failed_mirror;
-       failrec->this_mirror++;
-       if (failrec->this_mirror == failed_mirror)
-               failrec->this_mirror++;
-
-       if (failrec->this_mirror > num_copies) {
-               btrfs_debug(fs_info,
-                       "Check DIO Repairable: (fail) num_copies=%d, next_mirror %d, failed_mirror %d",
-                       num_copies, failrec->this_mirror, failed_mirror);
-               return 0;
-       }
-
-       return 1;
-}
-
-static blk_status_t dio_read_error(struct inode *inode, struct bio *failed_bio,
-                                  struct page *page, unsigned int pgoff,
-                                  u64 start, u64 end, int failed_mirror)
-{
-       struct btrfs_dio_private *dip = failed_bio->bi_private;
-       struct io_failure_record *failrec;
-       struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
-       struct extent_io_tree *failure_tree = &BTRFS_I(inode)->io_failure_tree;
-       struct bio *bio;
-       int isector;
-       unsigned int read_mode = 0;
-       int ret;
-       blk_status_t status;
-
-       BUG_ON(bio_op(failed_bio) == REQ_OP_WRITE);
-
-       ret = btrfs_get_io_failure_record(inode, start, end, &failrec);
        if (ret)
-               return errno_to_blk_status(ret);
-
-       ret = btrfs_check_dio_repairable(inode, failed_bio, failrec,
-                                        failed_mirror);
-       if (!ret) {
-               free_io_failure(failure_tree, io_tree, failrec);
-               return BLK_STS_IOERR;
-       }
-
-       if (btrfs_io_bio(failed_bio)->iter.bi_size > inode->i_sb->s_blocksize)
-               read_mode |= REQ_FAILFAST_DEV;
-
-       isector = start - btrfs_io_bio(failed_bio)->logical;
-       isector >>= inode->i_sb->s_blocksize_bits;
-       bio = btrfs_create_repair_bio(inode, failed_bio, failrec, page, pgoff,
-                                     isector, failed_bio->bi_end_io, dip);
-       bio->bi_opf = REQ_OP_READ | read_mode;
-
-       btrfs_debug(BTRFS_I(inode)->root->fs_info,
-                   "repair DIO read error: submitting new dio read[%#x] to this_mirror=%d, in_validation=%d",
-                   read_mode, failrec->this_mirror, failrec->in_validation);
-
-       refcount_inc(&dip->refs);
-       status = submit_dio_repair_bio(inode, bio, failrec->this_mirror);
-       if (status) {
-               free_io_failure(failure_tree, io_tree, failrec);
-               bio_put(bio);
                refcount_dec(&dip->refs);
-       }
-
-       return status;
+       return ret;
 }
 
 static blk_status_t btrfs_check_read_dio_bio(struct inode *inode,
@@ -7517,11 +7433,14 @@ static blk_status_t btrfs_check_read_dio_bio(struct inode *inode,
                        } else {
                                blk_status_t status;
 
-                               status = dio_read_error(inode, &io_bio->bio,
+                               status = btrfs_submit_read_repair(inode,
+                                                       &io_bio->bio,
+                                                       start - io_bio->logical,
                                                        bvec.bv_page, pgoff,
                                                        start,
                                                        start + sectorsize - 1,
-                                                       io_bio->mirror_num);
+                                                       io_bio->mirror_num,
+                                                       submit_dio_repair_bio);
                                if (status)
                                        err = status;
                        }