btrfs: handle errors properly inside btrfs_submit_compressed_read()
authorQu Wenruo <wqu@suse.com>
Mon, 27 Sep 2021 07:21:50 +0000 (15:21 +0800)
committerDavid Sterba <dsterba@suse.com>
Tue, 26 Oct 2021 17:08:04 +0000 (19:08 +0200)
There are quite some BUG_ON()s inside btrfs_submit_compressed_read(),
namely all errors inside the for() loop relies on BUG_ON() to handle
-ENOMEM.

Handle these errors properly by:

- Wait for submitted bios to finish first
  Using wake_var_event() APIs to wait without introducing extra memory
  overhead inside compressed_bio.
  This allows us to wait for any submitted bio to finish, while still
  keeps the compressed_bio from being freed.

- Introduce finish_compressed_bio_read() to finish the compressed_bio

- Properly end the bio and finish compressed_bio when error happens

Now in btrfs_submit_compressed_read() even when the bio submission
failed, we can properly handle the error without triggering BUG_ON().

Signed-off-by: Qu Wenruo <wqu@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
fs/btrfs/compression.c

index 891a624..f7a6c3c 100644 (file)
@@ -222,9 +222,59 @@ static bool dec_and_test_compressed_bio(struct compressed_bio *cb, struct bio *b
        last_io = refcount_sub_and_test(bi_size >> fs_info->sectorsize_bits,
                                        &cb->pending_sectors);
        atomic_dec(&cb->pending_bios);
+       /*
+        * Here we must wake up the possible error handler after all other
+        * operations on @cb finished, or we can race with
+        * finish_compressed_bio_*() which may free @cb.
+        */
+       wake_up_var(cb);
+
        return last_io;
 }
 
+static void finish_compressed_bio_read(struct compressed_bio *cb, struct bio *bio)
+{
+       unsigned int index;
+       struct page *page;
+
+       /* Release the compressed pages */
+       for (index = 0; index < cb->nr_pages; index++) {
+               page = cb->compressed_pages[index];
+               page->mapping = NULL;
+               put_page(page);
+       }
+
+       /* Do io completion on the original bio */
+       if (cb->errors) {
+               bio_io_error(cb->orig_bio);
+       } else {
+               struct bio_vec *bvec;
+               struct bvec_iter_all iter_all;
+
+               ASSERT(bio);
+               ASSERT(!bio->bi_status);
+               /*
+                * We have verified the checksum already, set page checked so
+                * the end_io handlers know about it
+                */
+               ASSERT(!bio_flagged(bio, BIO_CLONED));
+               bio_for_each_segment_all(bvec, cb->orig_bio, iter_all) {
+                       u64 bvec_start = page_offset(bvec->bv_page) +
+                                        bvec->bv_offset;
+
+                       btrfs_page_set_checked(btrfs_sb(cb->inode->i_sb),
+                                       bvec->bv_page, bvec_start,
+                                       bvec->bv_len);
+               }
+
+               bio_endio(cb->orig_bio);
+       }
+
+       /* Finally free the cb struct */
+       kfree(cb->compressed_pages);
+       kfree(cb);
+}
+
 /* when we finish reading compressed pages from the disk, we
  * decompress them and then run the bio end_io routines on the
  * decompressed pages (in the inode address space).
@@ -239,8 +289,6 @@ static void end_compressed_bio_read(struct bio *bio)
 {
        struct compressed_bio *cb = bio->bi_private;
        struct inode *inode;
-       struct page *page;
-       unsigned int index;
        unsigned int mirror = btrfs_bio(bio)->mirror_num;
        int ret = 0;
 
@@ -275,42 +323,7 @@ static void end_compressed_bio_read(struct bio *bio)
 csum_failed:
        if (ret)
                cb->errors = 1;
-
-       /* release the compressed pages */
-       index = 0;
-       for (index = 0; index < cb->nr_pages; index++) {
-               page = cb->compressed_pages[index];
-               page->mapping = NULL;
-               put_page(page);
-       }
-
-       /* do io completion on the original bio */
-       if (cb->errors) {
-               bio_io_error(cb->orig_bio);
-       } else {
-               struct bio_vec *bvec;
-               struct bvec_iter_all iter_all;
-
-               /*
-                * we have verified the checksum already, set page
-                * checked so the end_io handlers know about it
-                */
-               ASSERT(!bio_flagged(bio, BIO_CLONED));
-               bio_for_each_segment_all(bvec, cb->orig_bio, iter_all) {
-                       u64 bvec_start = page_offset(bvec->bv_page) +
-                                        bvec->bv_offset;
-
-                       btrfs_page_set_checked(btrfs_sb(cb->inode->i_sb),
-                                       bvec->bv_page, bvec_start,
-                                       bvec->bv_len);
-               }
-
-               bio_endio(cb->orig_bio);
-       }
-
-       /* finally free the cb struct */
-       kfree(cb->compressed_pages);
-       kfree(cb);
+       finish_compressed_bio_read(cb, bio);
 out:
        bio_put(bio);
 }
@@ -832,20 +845,20 @@ blk_status_t btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
                        atomic_inc(&cb->pending_bios);
                        ret = btrfs_bio_wq_end_io(fs_info, comp_bio,
                                                  BTRFS_WQ_ENDIO_DATA);
-                       BUG_ON(ret); /* -ENOMEM */
+                       if (ret)
+                               goto finish_cb;
 
                        ret = btrfs_lookup_bio_sums(inode, comp_bio, sums);
-                       BUG_ON(ret); /* -ENOMEM */
+                       if (ret)
+                               goto finish_cb;
 
                        nr_sectors = DIV_ROUND_UP(comp_bio->bi_iter.bi_size,
                                                  fs_info->sectorsize);
                        sums += fs_info->csum_size * nr_sectors;
 
                        ret = btrfs_map_bio(fs_info, comp_bio, mirror_num);
-                       if (ret) {
-                               comp_bio->bi_status = ret;
-                               bio_endio(comp_bio);
-                       }
+                       if (ret)
+                               goto finish_cb;
 
                        comp_bio = btrfs_bio_alloc(BIO_MAX_VECS);
                        comp_bio->bi_iter.bi_sector = cur_disk_byte >> SECTOR_SHIFT;
@@ -860,16 +873,16 @@ blk_status_t btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
 
        atomic_inc(&cb->pending_bios);
        ret = btrfs_bio_wq_end_io(fs_info, comp_bio, BTRFS_WQ_ENDIO_DATA);
-       BUG_ON(ret); /* -ENOMEM */
+       if (ret)
+               goto last_bio;
 
        ret = btrfs_lookup_bio_sums(inode, comp_bio, sums);
-       BUG_ON(ret); /* -ENOMEM */
+       if (ret)
+               goto last_bio;
 
        ret = btrfs_map_bio(fs_info, comp_bio, mirror_num);
-       if (ret) {
-               comp_bio->bi_status = ret;
-               bio_endio(comp_bio);
-       }
+       if (ret)
+               goto last_bio;
 
        return 0;
 
@@ -885,6 +898,26 @@ fail1:
 out:
        free_extent_map(em);
        return ret;
+last_bio:
+       comp_bio->bi_status = ret;
+       /* This is the last bio, endio functions will free @cb */
+       bio_endio(comp_bio);
+       return ret;
+
+finish_cb:
+       if (comp_bio) {
+               comp_bio->bi_status = ret;
+               bio_endio(comp_bio);
+       }
+       wait_var_event(cb, atomic_read(&cb->pending_bios) == 0);
+       /*
+        * Even with previous bio ended, we should still have io not yet
+        * submitted, thus need to finish @cb manually.
+        */
+       ASSERT(refcount_read(&cb->pending_sectors));
+       /* Now we are the only one referring @cb, can finish it safely. */
+       finish_compressed_bio_read(cb, NULL);
+       return ret;
 }
 
 /*