btrfs: split the bio submission path into a separate file
authorChristoph Hellwig <hch@lst.de>
Tue, 15 Nov 2022 09:44:05 +0000 (10:44 +0100)
committerDavid Sterba <dsterba@suse.com>
Mon, 5 Dec 2022 17:00:57 +0000 (18:00 +0100)
The code used by btrfs_submit_bio only interacts with the rest of
volumes.c through __btrfs_map_block (which itself is a more generic
version of two exported helpers) and does not really have anything
to do with volumes.c.  Create a new bio.c file and a bio.h header
going along with it for the btrfs_bio-based storage layer, which
will grow even more going forward.

Also update the file with my copyright notice given that a large
part of the moved code was written or rewritten by me.

Reviewed-by: Josef Bacik <josef@toxicpanda.com>
Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
14 files changed:
fs/btrfs/Makefile
fs/btrfs/bio.c [new file with mode: 0644]
fs/btrfs/bio.h [new file with mode: 0644]
fs/btrfs/compression.c
fs/btrfs/disk-io.c
fs/btrfs/extent-tree.c
fs/btrfs/extent_io.c
fs/btrfs/file-item.c
fs/btrfs/inode.c
fs/btrfs/relocation.c
fs/btrfs/super.c
fs/btrfs/tree-log.c
fs/btrfs/volumes.c
fs/btrfs/volumes.h

index 84fb3b4..555c962 100644 (file)
@@ -31,7 +31,7 @@ btrfs-y += super.o ctree.o extent-tree.o print-tree.o root-tree.o dir-item.o \
           backref.o ulist.o qgroup.o send.o dev-replace.o raid56.o \
           uuid-tree.o props.o free-space-tree.o tree-checker.o space-info.o \
           block-rsv.o delalloc-space.o block-group.o discard.o reflink.o \
-          subpage.o tree-mod-log.o extent-io-tree.o fs.o messages.o
+          subpage.o tree-mod-log.o extent-io-tree.o fs.o messages.o bio.o
 
 btrfs-$(CONFIG_BTRFS_FS_POSIX_ACL) += acl.o
 btrfs-$(CONFIG_BTRFS_FS_CHECK_INTEGRITY) += check-integrity.o
diff --git a/fs/btrfs/bio.c b/fs/btrfs/bio.c
new file mode 100644 (file)
index 0000000..9e881dc
--- /dev/null
@@ -0,0 +1,291 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2007 Oracle.  All rights reserved.
+ * Copyright (C) 2022 Christoph Hellwig.
+ */
+
+#include <linux/bio.h>
+#include "bio.h"
+#include "ctree.h"
+#include "volumes.h"
+#include "raid56.h"
+#include "async-thread.h"
+#include "check-integrity.h"
+#include "dev-replace.h"
+#include "rcu-string.h"
+#include "zoned.h"
+
+static struct bio_set btrfs_bioset;
+
+/*
+ * Initialize a btrfs_bio structure.  This skips the embedded bio itself as it
+ * is already initialized by the block layer.
+ */
+static inline void btrfs_bio_init(struct btrfs_bio *bbio,
+                                 btrfs_bio_end_io_t end_io, void *private)
+{
+       memset(bbio, 0, offsetof(struct btrfs_bio, bio));
+       bbio->end_io = end_io;
+       bbio->private = private;
+}
+
+/*
+ * Allocate a btrfs_bio structure.  The btrfs_bio is the main I/O container for
+ * btrfs, and is used for all I/O submitted through btrfs_submit_bio.
+ *
+ * Just like the underlying bio_alloc_bioset it will not fail as it is backed by
+ * a mempool.
+ */
+struct bio *btrfs_bio_alloc(unsigned int nr_vecs, blk_opf_t opf,
+                           btrfs_bio_end_io_t end_io, void *private)
+{
+       struct bio *bio;
+
+       bio = bio_alloc_bioset(NULL, nr_vecs, opf, GFP_NOFS, &btrfs_bioset);
+       btrfs_bio_init(btrfs_bio(bio), end_io, private);
+       return bio;
+}
+
+struct bio *btrfs_bio_clone_partial(struct bio *orig, u64 offset, u64 size,
+                                   btrfs_bio_end_io_t end_io, void *private)
+{
+       struct bio *bio;
+       struct btrfs_bio *bbio;
+
+       ASSERT(offset <= UINT_MAX && size <= UINT_MAX);
+
+       bio = bio_alloc_clone(orig->bi_bdev, orig, GFP_NOFS, &btrfs_bioset);
+       bbio = btrfs_bio(bio);
+       btrfs_bio_init(bbio, end_io, private);
+
+       bio_trim(bio, offset >> 9, size >> 9);
+       bbio->iter = bio->bi_iter;
+       return bio;
+}
+
+static void btrfs_log_dev_io_error(struct bio *bio, struct btrfs_device *dev)
+{
+       if (!dev || !dev->bdev)
+               return;
+       if (bio->bi_status != BLK_STS_IOERR && bio->bi_status != BLK_STS_TARGET)
+               return;
+
+       if (btrfs_op(bio) == BTRFS_MAP_WRITE)
+               btrfs_dev_stat_inc_and_print(dev, BTRFS_DEV_STAT_WRITE_ERRS);
+       if (!(bio->bi_opf & REQ_RAHEAD))
+               btrfs_dev_stat_inc_and_print(dev, BTRFS_DEV_STAT_READ_ERRS);
+       if (bio->bi_opf & REQ_PREFLUSH)
+               btrfs_dev_stat_inc_and_print(dev, BTRFS_DEV_STAT_FLUSH_ERRS);
+}
+
+static struct workqueue_struct *btrfs_end_io_wq(struct btrfs_fs_info *fs_info,
+                                               struct bio *bio)
+{
+       if (bio->bi_opf & REQ_META)
+               return fs_info->endio_meta_workers;
+       return fs_info->endio_workers;
+}
+
+static void btrfs_end_bio_work(struct work_struct *work)
+{
+       struct btrfs_bio *bbio = container_of(work, struct btrfs_bio, end_io_work);
+
+       bbio->end_io(bbio);
+}
+
+static void btrfs_simple_end_io(struct bio *bio)
+{
+       struct btrfs_fs_info *fs_info = bio->bi_private;
+       struct btrfs_bio *bbio = btrfs_bio(bio);
+
+       btrfs_bio_counter_dec(fs_info);
+
+       if (bio->bi_status)
+               btrfs_log_dev_io_error(bio, bbio->device);
+
+       if (bio_op(bio) == REQ_OP_READ) {
+               INIT_WORK(&bbio->end_io_work, btrfs_end_bio_work);
+               queue_work(btrfs_end_io_wq(fs_info, bio), &bbio->end_io_work);
+       } else {
+               bbio->end_io(bbio);
+       }
+}
+
+static void btrfs_raid56_end_io(struct bio *bio)
+{
+       struct btrfs_io_context *bioc = bio->bi_private;
+       struct btrfs_bio *bbio = btrfs_bio(bio);
+
+       btrfs_bio_counter_dec(bioc->fs_info);
+       bbio->mirror_num = bioc->mirror_num;
+       bbio->end_io(bbio);
+
+       btrfs_put_bioc(bioc);
+}
+
+static void btrfs_orig_write_end_io(struct bio *bio)
+{
+       struct btrfs_io_stripe *stripe = bio->bi_private;
+       struct btrfs_io_context *bioc = stripe->bioc;
+       struct btrfs_bio *bbio = btrfs_bio(bio);
+
+       btrfs_bio_counter_dec(bioc->fs_info);
+
+       if (bio->bi_status) {
+               atomic_inc(&bioc->error);
+               btrfs_log_dev_io_error(bio, stripe->dev);
+       }
+
+       /*
+        * Only send an error to the higher layers if it is beyond the tolerance
+        * threshold.
+        */
+       if (atomic_read(&bioc->error) > bioc->max_errors)
+               bio->bi_status = BLK_STS_IOERR;
+       else
+               bio->bi_status = BLK_STS_OK;
+
+       bbio->end_io(bbio);
+       btrfs_put_bioc(bioc);
+}
+
+static void btrfs_clone_write_end_io(struct bio *bio)
+{
+       struct btrfs_io_stripe *stripe = bio->bi_private;
+
+       if (bio->bi_status) {
+               atomic_inc(&stripe->bioc->error);
+               btrfs_log_dev_io_error(bio, stripe->dev);
+       }
+
+       /* Pass on control to the original bio this one was cloned from */
+       bio_endio(stripe->bioc->orig_bio);
+       bio_put(bio);
+}
+
+static void btrfs_submit_dev_bio(struct btrfs_device *dev, struct bio *bio)
+{
+       if (!dev || !dev->bdev ||
+           test_bit(BTRFS_DEV_STATE_MISSING, &dev->dev_state) ||
+           (btrfs_op(bio) == BTRFS_MAP_WRITE &&
+            !test_bit(BTRFS_DEV_STATE_WRITEABLE, &dev->dev_state))) {
+               bio_io_error(bio);
+               return;
+       }
+
+       bio_set_dev(bio, dev->bdev);
+
+       /*
+        * For zone append writing, bi_sector must point the beginning of the
+        * zone
+        */
+       if (bio_op(bio) == REQ_OP_ZONE_APPEND) {
+               u64 physical = bio->bi_iter.bi_sector << SECTOR_SHIFT;
+
+               if (btrfs_dev_is_sequential(dev, physical)) {
+                       u64 zone_start = round_down(physical,
+                                                   dev->fs_info->zone_size);
+
+                       bio->bi_iter.bi_sector = zone_start >> SECTOR_SHIFT;
+               } else {
+                       bio->bi_opf &= ~REQ_OP_ZONE_APPEND;
+                       bio->bi_opf |= REQ_OP_WRITE;
+               }
+       }
+       btrfs_debug_in_rcu(dev->fs_info,
+       "%s: rw %d 0x%x, sector=%llu, dev=%lu (%s id %llu), size=%u",
+               __func__, bio_op(bio), bio->bi_opf, bio->bi_iter.bi_sector,
+               (unsigned long)dev->bdev->bd_dev, btrfs_dev_name(dev),
+               dev->devid, bio->bi_iter.bi_size);
+
+       btrfsic_check_bio(bio);
+       submit_bio(bio);
+}
+
+static void btrfs_submit_mirrored_bio(struct btrfs_io_context *bioc, int dev_nr)
+{
+       struct bio *orig_bio = bioc->orig_bio, *bio;
+
+       ASSERT(bio_op(orig_bio) != REQ_OP_READ);
+
+       /* Reuse the bio embedded into the btrfs_bio for the last mirror */
+       if (dev_nr == bioc->num_stripes - 1) {
+               bio = orig_bio;
+               bio->bi_end_io = btrfs_orig_write_end_io;
+       } else {
+               bio = bio_alloc_clone(NULL, orig_bio, GFP_NOFS, &fs_bio_set);
+               bio_inc_remaining(orig_bio);
+               bio->bi_end_io = btrfs_clone_write_end_io;
+       }
+
+       bio->bi_private = &bioc->stripes[dev_nr];
+       bio->bi_iter.bi_sector = bioc->stripes[dev_nr].physical >> SECTOR_SHIFT;
+       bioc->stripes[dev_nr].bioc = bioc;
+       btrfs_submit_dev_bio(bioc->stripes[dev_nr].dev, bio);
+}
+
+void btrfs_submit_bio(struct btrfs_fs_info *fs_info, struct bio *bio, int mirror_num)
+{
+       u64 logical = bio->bi_iter.bi_sector << 9;
+       u64 length = bio->bi_iter.bi_size;
+       u64 map_length = length;
+       struct btrfs_io_context *bioc = NULL;
+       struct btrfs_io_stripe smap;
+       int ret;
+
+       btrfs_bio_counter_inc_blocked(fs_info);
+       ret = __btrfs_map_block(fs_info, btrfs_op(bio), logical, &map_length,
+                               &bioc, &smap, &mirror_num, 1);
+       if (ret) {
+               btrfs_bio_counter_dec(fs_info);
+               btrfs_bio_end_io(btrfs_bio(bio), errno_to_blk_status(ret));
+               return;
+       }
+
+       if (map_length < length) {
+               btrfs_crit(fs_info,
+                          "mapping failed logical %llu bio len %llu len %llu",
+                          logical, length, map_length);
+               BUG();
+       }
+
+       if (!bioc) {
+               /* Single mirror read/write fast path */
+               btrfs_bio(bio)->mirror_num = mirror_num;
+               btrfs_bio(bio)->device = smap.dev;
+               bio->bi_iter.bi_sector = smap.physical >> SECTOR_SHIFT;
+               bio->bi_private = fs_info;
+               bio->bi_end_io = btrfs_simple_end_io;
+               btrfs_submit_dev_bio(smap.dev, bio);
+       } else if (bioc->map_type & BTRFS_BLOCK_GROUP_RAID56_MASK) {
+               /* Parity RAID write or read recovery */
+               bio->bi_private = bioc;
+               bio->bi_end_io = btrfs_raid56_end_io;
+               if (bio_op(bio) == REQ_OP_READ)
+                       raid56_parity_recover(bio, bioc, mirror_num);
+               else
+                       raid56_parity_write(bio, bioc);
+       } else {
+               /* Write to multiple mirrors */
+               int total_devs = bioc->num_stripes;
+               int dev_nr;
+
+               bioc->orig_bio = bio;
+               for (dev_nr = 0; dev_nr < total_devs; dev_nr++)
+                       btrfs_submit_mirrored_bio(bioc, dev_nr);
+       }
+}
+
+int __init btrfs_bioset_init(void)
+{
+       if (bioset_init(&btrfs_bioset, BIO_POOL_SIZE,
+                       offsetof(struct btrfs_bio, bio),
+                       BIOSET_NEED_BVECS))
+               return -ENOMEM;
+       return 0;
+}
+
+void __cold btrfs_bioset_exit(void)
+{
+       bioset_exit(&btrfs_bioset);
+}
diff --git a/fs/btrfs/bio.h b/fs/btrfs/bio.h
new file mode 100644 (file)
index 0000000..b12f84b
--- /dev/null
@@ -0,0 +1,127 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2007 Oracle.  All rights reserved.
+ * Copyright (C) 2022 Christoph Hellwig.
+ */
+
+#ifndef BTRFS_BIO_H
+#define BTRFS_BIO_H
+
+#include <linux/bio.h>
+#include <linux/workqueue.h>
+#include "tree-checker.h"
+
+struct btrfs_bio;
+struct btrfs_fs_info;
+
+#define BTRFS_BIO_INLINE_CSUM_SIZE     64
+
+/*
+ * Maximum number of sectors for a single bio to limit the size of the
+ * checksum array.  This matches the number of bio_vecs per bio and thus the
+ * I/O size for buffered I/O.
+ */
+#define BTRFS_MAX_BIO_SECTORS          (256)
+
+typedef void (*btrfs_bio_end_io_t)(struct btrfs_bio *bbio);
+
+/*
+ * Additional info to pass along bio.
+ *
+ * Mostly for btrfs specific features like csum and mirror_num.
+ */
+struct btrfs_bio {
+       unsigned int mirror_num:7;
+
+       /*
+        * Extra indicator for metadata bios.
+        * For some btrfs bios they use pages without a mapping, thus
+        * we can not rely on page->mapping->host to determine if
+        * it's a metadata bio.
+        */
+       unsigned int is_metadata:1;
+       struct bvec_iter iter;
+
+       /* for direct I/O */
+       u64 file_offset;
+
+       /* @device is for stripe IO submission. */
+       struct btrfs_device *device;
+       union {
+               /* For data checksum verification. */
+               struct {
+                       u8 *csum;
+                       u8 csum_inline[BTRFS_BIO_INLINE_CSUM_SIZE];
+               };
+
+               /* For metadata parentness verification. */
+               struct btrfs_tree_parent_check parent_check;
+       };
+
+       /* End I/O information supplied to btrfs_bio_alloc */
+       btrfs_bio_end_io_t end_io;
+       void *private;
+
+       /* For read end I/O handling */
+       struct work_struct end_io_work;
+
+       /*
+        * This member must come last, bio_alloc_bioset will allocate enough
+        * bytes for entire btrfs_bio but relies on bio being last.
+        */
+       struct bio bio;
+};
+
+static inline struct btrfs_bio *btrfs_bio(struct bio *bio)
+{
+       return container_of(bio, struct btrfs_bio, bio);
+}
+
+int __init btrfs_bioset_init(void);
+void __cold btrfs_bioset_exit(void);
+
+struct bio *btrfs_bio_alloc(unsigned int nr_vecs, blk_opf_t opf,
+                           btrfs_bio_end_io_t end_io, void *private);
+struct bio *btrfs_bio_clone_partial(struct bio *orig, u64 offset, u64 size,
+                                   btrfs_bio_end_io_t end_io, void *private);
+
+
+static inline void btrfs_bio_end_io(struct btrfs_bio *bbio, blk_status_t status)
+{
+       bbio->bio.bi_status = status;
+       bbio->end_io(bbio);
+}
+
+static inline void btrfs_bio_free_csum(struct btrfs_bio *bbio)
+{
+       if (bbio->is_metadata)
+               return;
+       if (bbio->csum != bbio->csum_inline) {
+               kfree(bbio->csum);
+               bbio->csum = NULL;
+       }
+}
+
+/*
+ * Iterate through a btrfs_bio (@bbio) on a per-sector basis.
+ *
+ * bvl        - struct bio_vec
+ * bbio       - struct btrfs_bio
+ * iters      - struct bvec_iter
+ * bio_offset - unsigned int
+ */
+#define btrfs_bio_for_each_sector(fs_info, bvl, bbio, iter, bio_offset)        \
+       for ((iter) = (bbio)->iter, (bio_offset) = 0;                   \
+            (iter).bi_size &&                                  \
+            (((bvl) = bio_iter_iovec((&(bbio)->bio), (iter))), 1);     \
+            (bio_offset) += fs_info->sectorsize,                       \
+            bio_advance_iter_single(&(bbio)->bio, &(iter),             \
+            (fs_info)->sectorsize))
+
+void btrfs_submit_bio(struct btrfs_fs_info *fs_info, struct bio *bio,
+                     int mirror_num);
+int btrfs_repair_io_failure(struct btrfs_fs_info *fs_info, u64 ino, u64 start,
+                           u64 length, u64 logical, struct page *page,
+                           unsigned int pg_offset, int mirror_num);
+
+#endif
index 30adf43..5122ca7 100644 (file)
@@ -27,7 +27,7 @@
 #include "disk-io.h"
 #include "transaction.h"
 #include "btrfs_inode.h"
-#include "volumes.h"
+#include "bio.h"
 #include "ordered-data.h"
 #include "compression.h"
 #include "extent_io.h"
index 91a0882..d5be259 100644 (file)
@@ -23,7 +23,7 @@
 #include "disk-io.h"
 #include "transaction.h"
 #include "btrfs_inode.h"
-#include "volumes.h"
+#include "bio.h"
 #include "print-tree.h"
 #include "locking.h"
 #include "tree-log.h"
index 17f5990..892d78c 100644 (file)
@@ -42,6 +42,7 @@
 #include "root-tree.h"
 #include "file-item.h"
 #include "orphan.h"
+#include "tree-checker.h"
 
 #undef SCRAMBLE_DELAYED_REFS
 
index 65ba5c3..95d54b5 100644 (file)
@@ -20,7 +20,7 @@
 #include "extent_map.h"
 #include "ctree.h"
 #include "btrfs_inode.h"
-#include "volumes.h"
+#include "bio.h"
 #include "check-integrity.h"
 #include "locking.h"
 #include "rcu-string.h"
index 352bbb3..5de7346 100644 (file)
@@ -14,7 +14,7 @@
 #include "ctree.h"
 #include "disk-io.h"
 #include "transaction.h"
-#include "volumes.h"
+#include "bio.h"
 #include "print-tree.h"
 #include "compression.h"
 #include "fs.h"
index 4248e6c..905ea19 100644 (file)
@@ -43,7 +43,7 @@
 #include "ordered-data.h"
 #include "xattr.h"
 #include "tree-log.h"
-#include "volumes.h"
+#include "bio.h"
 #include "compression.h"
 #include "locking.h"
 #include "free-space-cache.h"
index aa80e51..31ec4a7 100644 (file)
@@ -35,6 +35,7 @@
 #include "file-item.h"
 #include "relocation.h"
 #include "super.h"
+#include "tree-checker.h"
 
 /*
  * Relocation overview
index ea83dd9..93f52ee 100644 (file)
@@ -35,7 +35,7 @@
 #include "print-tree.h"
 #include "props.h"
 #include "xattr.h"
-#include "volumes.h"
+#include "bio.h"
 #include "export.h"
 #include "compression.h"
 #include "rcu-string.h"
index 8f8d7e7..4387cd2 100644 (file)
@@ -29,6 +29,7 @@
 #include "file-item.h"
 #include "file.h"
 #include "orphan.h"
+#include "tree-checker.h"
 
 #define MAX_CONFLICT_INODES 10
 
index e51fd5f..acab20f 100644 (file)
@@ -5,12 +5,9 @@
 
 #include <linux/sched.h>
 #include <linux/sched/mm.h>
-#include <linux/bio.h>
 #include <linux/slab.h>
-#include <linux/blkdev.h>
 #include <linux/ratelimit.h>
 #include <linux/kthread.h>
-#include <linux/raid/pq.h>
 #include <linux/semaphore.h>
 #include <linux/uuid.h>
 #include <linux/list_sort.h>
@@ -23,8 +20,6 @@
 #include "print-tree.h"
 #include "volumes.h"
 #include "raid56.h"
-#include "async-thread.h"
-#include "check-integrity.h"
 #include "rcu-string.h"
 #include "dev-replace.h"
 #include "sysfs.h"
@@ -41,8 +36,6 @@
 #include "scrub.h"
 #include "super.h"
 
-static struct bio_set btrfs_bioset;
-
 #define BTRFS_BLOCK_GROUP_STRIPE_MASK  (BTRFS_BLOCK_GROUP_RAID0 | \
                                         BTRFS_BLOCK_GROUP_RAID10 | \
                                         BTRFS_BLOCK_GROUP_RAID56_MASK)
@@ -255,11 +248,6 @@ out_overflow:;
 static int init_first_rw_device(struct btrfs_trans_handle *trans);
 static int btrfs_relocate_sys_chunks(struct btrfs_fs_info *fs_info);
 static void btrfs_dev_stat_print_on_load(struct btrfs_device *device);
-static int __btrfs_map_block(struct btrfs_fs_info *fs_info,
-                            enum btrfs_map_op op, u64 logical, u64 *length,
-                            struct btrfs_io_context **bioc_ret,
-                            struct btrfs_io_stripe *smap,
-                            int *mirror_num_ret, int need_raid_map);
 
 /*
  * Device locking
@@ -6364,11 +6352,11 @@ static void set_io_stripe(struct btrfs_io_stripe *dst, const struct map_lookup *
                        stripe_offset + stripe_nr * map->stripe_len;
 }
 
-static int __btrfs_map_block(struct btrfs_fs_info *fs_info,
-                            enum btrfs_map_op op, u64 logical, u64 *length,
-                            struct btrfs_io_context **bioc_ret,
-                            struct btrfs_io_stripe *smap,
-                            int *mirror_num_ret, int need_raid_map)
+int __btrfs_map_block(struct btrfs_fs_info *fs_info, enum btrfs_map_op op,
+                     u64 logical, u64 *length,
+                     struct btrfs_io_context **bioc_ret,
+                     struct btrfs_io_stripe *smap, int *mirror_num_ret,
+                     int need_raid_map)
 {
        struct extent_map *em;
        struct map_lookup *map;
@@ -6651,266 +6639,6 @@ int btrfs_map_sblock(struct btrfs_fs_info *fs_info, enum btrfs_map_op op,
                                 NULL, NULL, 1);
 }
 
-/*
- * Initialize a btrfs_bio structure.  This skips the embedded bio itself as it
- * is already initialized by the block layer.
- */
-static inline void btrfs_bio_init(struct btrfs_bio *bbio,
-                                 btrfs_bio_end_io_t end_io, void *private)
-{
-       memset(bbio, 0, offsetof(struct btrfs_bio, bio));
-       bbio->end_io = end_io;
-       bbio->private = private;
-}
-
-/*
- * Allocate a btrfs_bio structure.  The btrfs_bio is the main I/O container for
- * btrfs, and is used for all I/O submitted through btrfs_submit_bio.
- *
- * Just like the underlying bio_alloc_bioset it will not fail as it is backed by
- * a mempool.
- */
-struct bio *btrfs_bio_alloc(unsigned int nr_vecs, blk_opf_t opf,
-                           btrfs_bio_end_io_t end_io, void *private)
-{
-       struct bio *bio;
-
-       bio = bio_alloc_bioset(NULL, nr_vecs, opf, GFP_NOFS, &btrfs_bioset);
-       btrfs_bio_init(btrfs_bio(bio), end_io, private);
-       return bio;
-}
-
-struct bio *btrfs_bio_clone_partial(struct bio *orig, u64 offset, u64 size,
-                                   btrfs_bio_end_io_t end_io, void *private)
-{
-       struct bio *bio;
-       struct btrfs_bio *bbio;
-
-       ASSERT(offset <= UINT_MAX && size <= UINT_MAX);
-
-       bio = bio_alloc_clone(orig->bi_bdev, orig, GFP_NOFS, &btrfs_bioset);
-       bbio = btrfs_bio(bio);
-       btrfs_bio_init(bbio, end_io, private);
-
-       bio_trim(bio, offset >> 9, size >> 9);
-       bbio->iter = bio->bi_iter;
-       return bio;
-}
-
-static void btrfs_log_dev_io_error(struct bio *bio, struct btrfs_device *dev)
-{
-       if (!dev || !dev->bdev)
-               return;
-       if (bio->bi_status != BLK_STS_IOERR && bio->bi_status != BLK_STS_TARGET)
-               return;
-
-       if (btrfs_op(bio) == BTRFS_MAP_WRITE)
-               btrfs_dev_stat_inc_and_print(dev, BTRFS_DEV_STAT_WRITE_ERRS);
-       if (!(bio->bi_opf & REQ_RAHEAD))
-               btrfs_dev_stat_inc_and_print(dev, BTRFS_DEV_STAT_READ_ERRS);
-       if (bio->bi_opf & REQ_PREFLUSH)
-               btrfs_dev_stat_inc_and_print(dev, BTRFS_DEV_STAT_FLUSH_ERRS);
-}
-
-static struct workqueue_struct *btrfs_end_io_wq(struct btrfs_fs_info *fs_info,
-                                               struct bio *bio)
-{
-       if (bio->bi_opf & REQ_META)
-               return fs_info->endio_meta_workers;
-       return fs_info->endio_workers;
-}
-
-static void btrfs_end_bio_work(struct work_struct *work)
-{
-       struct btrfs_bio *bbio =
-               container_of(work, struct btrfs_bio, end_io_work);
-
-       bbio->end_io(bbio);
-}
-
-static void btrfs_simple_end_io(struct bio *bio)
-{
-       struct btrfs_fs_info *fs_info = bio->bi_private;
-       struct btrfs_bio *bbio = btrfs_bio(bio);
-
-       btrfs_bio_counter_dec(fs_info);
-
-       if (bio->bi_status)
-               btrfs_log_dev_io_error(bio, bbio->device);
-
-       if (bio_op(bio) == REQ_OP_READ) {
-               INIT_WORK(&bbio->end_io_work, btrfs_end_bio_work);
-               queue_work(btrfs_end_io_wq(fs_info, bio), &bbio->end_io_work);
-       } else {
-               bbio->end_io(bbio);
-       }
-}
-
-static void btrfs_raid56_end_io(struct bio *bio)
-{
-       struct btrfs_io_context *bioc = bio->bi_private;
-       struct btrfs_bio *bbio = btrfs_bio(bio);
-
-       btrfs_bio_counter_dec(bioc->fs_info);
-       bbio->mirror_num = bioc->mirror_num;
-       bbio->end_io(bbio);
-
-       btrfs_put_bioc(bioc);
-}
-
-static void btrfs_orig_write_end_io(struct bio *bio)
-{
-       struct btrfs_io_stripe *stripe = bio->bi_private;
-       struct btrfs_io_context *bioc = stripe->bioc;
-       struct btrfs_bio *bbio = btrfs_bio(bio);
-
-       btrfs_bio_counter_dec(bioc->fs_info);
-
-       if (bio->bi_status) {
-               atomic_inc(&bioc->error);
-               btrfs_log_dev_io_error(bio, stripe->dev);
-       }
-
-       /*
-        * Only send an error to the higher layers if it is beyond the tolerance
-        * threshold.
-        */
-       if (atomic_read(&bioc->error) > bioc->max_errors)
-               bio->bi_status = BLK_STS_IOERR;
-       else
-               bio->bi_status = BLK_STS_OK;
-
-       bbio->end_io(bbio);
-       btrfs_put_bioc(bioc);
-}
-
-static void btrfs_clone_write_end_io(struct bio *bio)
-{
-       struct btrfs_io_stripe *stripe = bio->bi_private;
-
-       if (bio->bi_status) {
-               atomic_inc(&stripe->bioc->error);
-               btrfs_log_dev_io_error(bio, stripe->dev);
-       }
-
-       /* Pass on control to the original bio this one was cloned from */
-       bio_endio(stripe->bioc->orig_bio);
-       bio_put(bio);
-}
-
-static void btrfs_submit_dev_bio(struct btrfs_device *dev, struct bio *bio)
-{
-       if (!dev || !dev->bdev ||
-           test_bit(BTRFS_DEV_STATE_MISSING, &dev->dev_state) ||
-           (btrfs_op(bio) == BTRFS_MAP_WRITE &&
-            !test_bit(BTRFS_DEV_STATE_WRITEABLE, &dev->dev_state))) {
-               bio_io_error(bio);
-               return;
-       }
-
-       bio_set_dev(bio, dev->bdev);
-
-       /*
-        * For zone append writing, bi_sector must point the beginning of the
-        * zone
-        */
-       if (bio_op(bio) == REQ_OP_ZONE_APPEND) {
-               u64 physical = bio->bi_iter.bi_sector << SECTOR_SHIFT;
-
-               if (btrfs_dev_is_sequential(dev, physical)) {
-                       u64 zone_start = round_down(physical,
-                                                   dev->fs_info->zone_size);
-
-                       bio->bi_iter.bi_sector = zone_start >> SECTOR_SHIFT;
-               } else {
-                       bio->bi_opf &= ~REQ_OP_ZONE_APPEND;
-                       bio->bi_opf |= REQ_OP_WRITE;
-               }
-       }
-       btrfs_debug_in_rcu(dev->fs_info,
-       "%s: rw %d 0x%x, sector=%llu, dev=%lu (%s id %llu), size=%u",
-               __func__, bio_op(bio), bio->bi_opf, bio->bi_iter.bi_sector,
-               (unsigned long)dev->bdev->bd_dev, btrfs_dev_name(dev),
-               dev->devid, bio->bi_iter.bi_size);
-
-       btrfsic_check_bio(bio);
-       submit_bio(bio);
-}
-
-static void btrfs_submit_mirrored_bio(struct btrfs_io_context *bioc, int dev_nr)
-{
-       struct bio *orig_bio = bioc->orig_bio, *bio;
-
-       ASSERT(bio_op(orig_bio) != REQ_OP_READ);
-
-       /* Reuse the bio embedded into the btrfs_bio for the last mirror */
-       if (dev_nr == bioc->num_stripes - 1) {
-               bio = orig_bio;
-               bio->bi_end_io = btrfs_orig_write_end_io;
-       } else {
-               bio = bio_alloc_clone(NULL, orig_bio, GFP_NOFS, &fs_bio_set);
-               bio_inc_remaining(orig_bio);
-               bio->bi_end_io = btrfs_clone_write_end_io;
-       }
-
-       bio->bi_private = &bioc->stripes[dev_nr];
-       bio->bi_iter.bi_sector = bioc->stripes[dev_nr].physical >> SECTOR_SHIFT;
-       bioc->stripes[dev_nr].bioc = bioc;
-       btrfs_submit_dev_bio(bioc->stripes[dev_nr].dev, bio);
-}
-
-void btrfs_submit_bio(struct btrfs_fs_info *fs_info, struct bio *bio, int mirror_num)
-{
-       u64 logical = bio->bi_iter.bi_sector << 9;
-       u64 length = bio->bi_iter.bi_size;
-       u64 map_length = length;
-       struct btrfs_io_context *bioc = NULL;
-       struct btrfs_io_stripe smap;
-       int ret;
-
-       btrfs_bio_counter_inc_blocked(fs_info);
-       ret = __btrfs_map_block(fs_info, btrfs_op(bio), logical, &map_length,
-                               &bioc, &smap, &mirror_num, 1);
-       if (ret) {
-               btrfs_bio_counter_dec(fs_info);
-               btrfs_bio_end_io(btrfs_bio(bio), errno_to_blk_status(ret));
-               return;
-       }
-
-       if (map_length < length) {
-               btrfs_crit(fs_info,
-                          "mapping failed logical %llu bio len %llu len %llu",
-                          logical, length, map_length);
-               BUG();
-       }
-
-       if (!bioc) {
-               /* Single mirror read/write fast path */
-               btrfs_bio(bio)->mirror_num = mirror_num;
-               btrfs_bio(bio)->device = smap.dev;
-               bio->bi_iter.bi_sector = smap.physical >> SECTOR_SHIFT;
-               bio->bi_private = fs_info;
-               bio->bi_end_io = btrfs_simple_end_io;
-               btrfs_submit_dev_bio(smap.dev, bio);
-       } else if (bioc->map_type & BTRFS_BLOCK_GROUP_RAID56_MASK) {
-               /* Parity RAID write or read recovery */
-               bio->bi_private = bioc;
-               bio->bi_end_io = btrfs_raid56_end_io;
-               if (bio_op(bio) == REQ_OP_READ)
-                       raid56_parity_recover(bio, bioc, mirror_num);
-               else
-                       raid56_parity_write(bio, bioc);
-       } else {
-               /* Write to multiple mirrors */
-               int total_devs = bioc->num_stripes;
-               int dev_nr;
-
-               bioc->orig_bio = bio;
-               for (dev_nr = 0; dev_nr < total_devs; dev_nr++)
-                       btrfs_submit_mirrored_bio(bioc, dev_nr);
-       }
-}
-
 static bool dev_args_match_fs_devices(const struct btrfs_dev_lookup_args *args,
                                      const struct btrfs_fs_devices *fs_devices)
 {
@@ -8440,17 +8168,3 @@ bool btrfs_repair_one_zone(struct btrfs_fs_info *fs_info, u64 logical)
 
        return true;
 }
-
-int __init btrfs_bioset_init(void)
-{
-       if (bioset_init(&btrfs_bioset, BIO_POOL_SIZE,
-                       offsetof(struct btrfs_bio, bio),
-                       BIOSET_NEED_BVECS))
-               return -ENOMEM;
-       return 0;
-}
-
-void __cold btrfs_bioset_exit(void)
-{
-       bioset_exit(&btrfs_bioset);
-}
index ab55147..6b7a05f 100644 (file)
@@ -6,7 +6,6 @@
 #ifndef BTRFS_VOLUMES_H
 #define BTRFS_VOLUMES_H
 
-#include <linux/bio.h>
 #include <linux/sort.h>
 #include <linux/btrfs.h>
 #include "async-thread.h"
@@ -373,8 +372,6 @@ struct btrfs_fs_devices {
        enum btrfs_read_policy read_policy;
 };
 
-#define BTRFS_BIO_INLINE_CSUM_SIZE     64
-
 #define BTRFS_MAX_DEVS(info) ((BTRFS_MAX_ITEM_SIZE(info)       \
                        - sizeof(struct btrfs_chunk))           \
                        / sizeof(struct btrfs_stripe) + 1)
@@ -384,107 +381,6 @@ struct btrfs_fs_devices {
                                - 2 * sizeof(struct btrfs_chunk))       \
                                / sizeof(struct btrfs_stripe) + 1)
 
-/*
- * Maximum number of sectors for a single bio to limit the size of the
- * checksum array.  This matches the number of bio_vecs per bio and thus the
- * I/O size for buffered I/O.
- */
-#define BTRFS_MAX_BIO_SECTORS                          (256)
-
-typedef void (*btrfs_bio_end_io_t)(struct btrfs_bio *bbio);
-
-/*
- * Additional info to pass along bio.
- *
- * Mostly for btrfs specific features like csum and mirror_num.
- */
-struct btrfs_bio {
-       unsigned int mirror_num:7;
-
-       /*
-        * Extra indicator for metadata bios.
-        * For some btrfs bios they use pages without a mapping, thus
-        * we can not rely on page->mapping->host to determine if
-        * it's a metadata bio.
-        */
-       unsigned int is_metadata:1;
-       struct bvec_iter iter;
-
-       /* for direct I/O */
-       u64 file_offset;
-
-       /* @device is for stripe IO submission. */
-       struct btrfs_device *device;
-       union {
-               /* For data checksum verification. */
-               struct {
-                       u8 *csum;
-                       u8 csum_inline[BTRFS_BIO_INLINE_CSUM_SIZE];
-               };
-
-               /* For metadata parentness verification. */
-               struct btrfs_tree_parent_check parent_check;
-       };
-
-       /* End I/O information supplied to btrfs_bio_alloc */
-       btrfs_bio_end_io_t end_io;
-       void *private;
-
-       /* For read end I/O handling */
-       struct work_struct end_io_work;
-
-       /*
-        * This member must come last, bio_alloc_bioset will allocate enough
-        * bytes for entire btrfs_bio but relies on bio being last.
-        */
-       struct bio bio;
-};
-
-static inline struct btrfs_bio *btrfs_bio(struct bio *bio)
-{
-       return container_of(bio, struct btrfs_bio, bio);
-}
-
-int __init btrfs_bioset_init(void);
-void __cold btrfs_bioset_exit(void);
-
-struct bio *btrfs_bio_alloc(unsigned int nr_vecs, blk_opf_t opf,
-                           btrfs_bio_end_io_t end_io, void *private);
-struct bio *btrfs_bio_clone_partial(struct bio *orig, u64 offset, u64 size,
-                                   btrfs_bio_end_io_t end_io, void *private);
-
-static inline void btrfs_bio_end_io(struct btrfs_bio *bbio, blk_status_t status)
-{
-       bbio->bio.bi_status = status;
-       bbio->end_io(bbio);
-}
-
-static inline void btrfs_bio_free_csum(struct btrfs_bio *bbio)
-{
-       if (bbio->is_metadata)
-               return;
-       if (bbio->csum != bbio->csum_inline) {
-               kfree(bbio->csum);
-               bbio->csum = NULL;
-       }
-}
-
-/*
- * Iterate through a btrfs_bio (@bbio) on a per-sector basis.
- *
- * bvl        - struct bio_vec
- * bbio       - struct btrfs_bio
- * iters      - struct bvec_iter
- * bio_offset - unsigned int
- */
-#define btrfs_bio_for_each_sector(fs_info, bvl, bbio, iter, bio_offset)        \
-       for ((iter) = (bbio)->iter, (bio_offset) = 0;                   \
-            (iter).bi_size &&                                  \
-            (((bvl) = bio_iter_iovec((&(bbio)->bio), (iter))), 1);     \
-            (bio_offset) += fs_info->sectorsize,                       \
-            bio_advance_iter_single(&(bbio)->bio, &(iter),             \
-            (fs_info)->sectorsize))
-
 struct btrfs_io_stripe {
        struct btrfs_device *dev;
        union {
@@ -641,6 +537,11 @@ int btrfs_map_block(struct btrfs_fs_info *fs_info, enum btrfs_map_op op,
 int btrfs_map_sblock(struct btrfs_fs_info *fs_info, enum btrfs_map_op op,
                     u64 logical, u64 *length,
                     struct btrfs_io_context **bioc_ret);
+int __btrfs_map_block(struct btrfs_fs_info *fs_info, enum btrfs_map_op op,
+                     u64 logical, u64 *length,
+                     struct btrfs_io_context **bioc_ret,
+                     struct btrfs_io_stripe *smap, int *mirror_num_ret,
+                     int need_raid_map);
 struct btrfs_discard_stripe *btrfs_map_discard(struct btrfs_fs_info *fs_info,
                                               u64 logical, u64 *length_ret,
                                               u32 *num_stripes);
@@ -652,7 +553,6 @@ int btrfs_read_chunk_tree(struct btrfs_fs_info *fs_info);
 struct btrfs_block_group *btrfs_create_chunk(struct btrfs_trans_handle *trans,
                                            u64 type);
 void btrfs_mapping_tree_free(struct extent_map_tree *tree);
-void btrfs_submit_bio(struct btrfs_fs_info *fs_info, struct bio *bio, int mirror_num);
 int btrfs_open_devices(struct btrfs_fs_devices *fs_devices,
                       fmode_t flags, void *holder);
 struct btrfs_device *btrfs_scan_one_device(const char *path,