btrfs: raid56: switch scrub path to use a single function

author Qu Wenruo <wqu@suse.com>

Tue, 1 Nov 2022 11:16:11 +0000 (19:16 +0800)

committer David Sterba <dsterba@suse.com>

Mon, 5 Dec 2022 17:00:49 +0000 (18:00 +0100)
author Qu Wenruo <wqu@suse.com>
Tue, 1 Nov 2022 11:16:11 +0000 (19:16 +0800)
committer David Sterba <dsterba@suse.com>
Mon, 5 Dec 2022 17:00:49 +0000 (18:00 +0100)
diff --git a/fs/btrfs/raid56.c b/fs/btrfs/raid56.c

index fcac70f..6292371 100644 (file)
--- a/fs/btrfs/raid56.c
+++ b/fs/btrfs/raid56.c
@@ -64,7 +64,6 @@ struct sector_ptr {
         unsigned int uptodate:8;
  };
  
-static noinline void finish_rmw(struct btrfs_raid_bio *rbio);
  static void rmw_rbio_work(struct work_struct *work);
  static void rmw_rbio_work_locked(struct work_struct *work);
  static int fail_bio_stripe(struct btrfs_raid_bio *rbio, struct bio *bio);
@@ -72,9 +71,8 @@ static int fail_rbio_index(struct btrfs_raid_bio *rbio, int failed);
  static void index_rbio_pages(struct btrfs_raid_bio *rbio);
  static int alloc_rbio_pages(struct btrfs_raid_bio *rbio);
  
-static noinline void finish_parity_scrub(struct btrfs_raid_bio *rbio,
-                                        int need_check);
-static void scrub_parity_work(struct work_struct *work);
+static int finish_parity_scrub(struct btrfs_raid_bio *rbio, int need_check);
+static void scrub_rbio_work_locked(struct work_struct *work);
  
  static void free_raid_bio_pointers(struct btrfs_raid_bio *rbio)
  {
@@ -819,7 +817,7 @@ static noinline void unlock_stripe(struct btrfs_raid_bio *rbio)
                                 start_async_work(next, rmw_rbio_work_locked);
                         } else if (next->operation == BTRFS_RBIO_PARITY_SCRUB) {
                                 steal_rbio(rbio, next);
-                               start_async_work(next, scrub_parity_work);
+                               start_async_work(next, scrub_rbio_work_locked);
                         }
  
                         goto done_nolock;
@@ -881,35 +879,6 @@ static void rbio_orig_end_io(struct btrfs_raid_bio *rbio, blk_status_t err)
  }
  
  /*
- * end io function used by finish_rmw.  When we finally
- * get here, we've written a full stripe
- */
-static void raid_write_end_io(struct bio *bio)
-{
-       struct btrfs_raid_bio *rbio = bio->bi_private;
-       blk_status_t err = bio->bi_status;
-       int max_errors;
-
-       if (err)
-               fail_bio_stripe(rbio, bio);
-
-       bio_put(bio);
-
-       if (!atomic_dec_and_test(&rbio->stripes_pending))
-               return;
-
-       err = BLK_STS_OK;
-
-       /* OK, we have read all the stripes we need to. */
-       max_errors = (rbio->operation == BTRFS_RBIO_PARITY_SCRUB) ?
-                    0 : rbio->bioc->max_errors;
-       if (atomic_read(&rbio->error) > max_errors)
-               err = BLK_STS_IOERR;
-
-       rbio_orig_end_io(rbio, err);
-}
-
-/*
   * Get a sector pointer specified by its @stripe_nr and @sector_nr.
   *
   * @rbio:               The raid bio
@@ -1320,87 +1289,6 @@ error:
  }
  
  /*
- * this is called from one of two situations.  We either
- * have a full stripe from the higher layers, or we've read all
- * the missing bits off disk.
- *
- * This will calculate the parity and then send down any
- * changed blocks.
- */
-static noinline void finish_rmw(struct btrfs_raid_bio *rbio)
-{
-       /* The total sector number inside the full stripe. */
-       /* Sector number inside a stripe. */
-       int sectornr;
-       struct bio_list bio_list;
-       struct bio *bio;
-       int ret;
-
-       bio_list_init(&bio_list);
-
-       /* We should have at least one data sector. */
-       ASSERT(bitmap_weight(&rbio->dbitmap, rbio->stripe_nsectors));
-
-       /* at this point we either have a full stripe,
-        * or we've read the full stripe from the drive.
-        * recalculate the parity and write the new results.
-        *
-        * We're not allowed to add any new bios to the
-        * bio list here, anyone else that wants to
-        * change this stripe needs to do their own rmw.
-        */
-       spin_lock_irq(&rbio->bio_list_lock);
-       set_bit(RBIO_RMW_LOCKED_BIT, &rbio->flags);
-       spin_unlock_irq(&rbio->bio_list_lock);
-
-       atomic_set(&rbio->error, 0);
-
-       /*
-        * now that we've set rmw_locked, run through the
-        * bio list one last time and map the page pointers
-        *
-        * We don't cache full rbios because we're assuming
-        * the higher layers are unlikely to use this area of
-        * the disk again soon.  If they do use it again,
-        * hopefully they will send another full bio.
-        */
-       index_rbio_pages(rbio);
-       if (!rbio_is_full(rbio))
-               cache_rbio_pages(rbio);
-       else
-               clear_bit(RBIO_CACHE_READY_BIT, &rbio->flags);
-
-       for (sectornr = 0; sectornr < rbio->stripe_nsectors; sectornr++)
-               generate_pq_vertical(rbio, sectornr);
-
-       ret = rmw_assemble_write_bios(rbio, &bio_list);
-       if (ret < 0)
-               goto cleanup;
-
-       atomic_set(&rbio->stripes_pending, bio_list_size(&bio_list));
-       BUG_ON(atomic_read(&rbio->stripes_pending) == 0);
-
-       while ((bio = bio_list_pop(&bio_list))) {
-               bio->bi_end_io = raid_write_end_io;
-
-               if (trace_raid56_write_stripe_enabled()) {
-                       struct raid56_bio_trace_info trace_info = { 0 };
-
-                       bio_get_trace_info(rbio, bio, &trace_info);
-                       trace_raid56_write_stripe(rbio, bio, &trace_info);
-               }
-               submit_bio(bio);
-       }
-       return;
-
-cleanup:
-       rbio_orig_end_io(rbio, BLK_STS_IOERR);
-
-       while ((bio = bio_list_pop(&bio_list)))
-               bio_put(bio);
-}
-
-/*
   * helper to find the stripe number for a given bio.  Used to figure out which
   * stripe has failed.  This expects the bio to correspond to a physical disk,
   * so it looks up based on physical sector numbers.
@@ -1568,22 +1456,6 @@ static void submit_read_bios(struct btrfs_raid_bio *rbio,
         }
  }
  
-static void raid56_bio_end_io(struct bio *bio)
-{
-       struct btrfs_raid_bio *rbio = bio->bi_private;
-
-       if (bio->bi_status)
-               fail_bio_stripe(rbio, bio);
-       else
-               set_bio_pages_uptodate(rbio, bio);
-
-       bio_put(bio);
-
-       if (atomic_dec_and_test(&rbio->stripes_pending))
-               queue_work(rbio->bioc->fs_info->endio_raid56_workers,
-                          &rbio->end_io_work);
-}
-
  static int rmw_assemble_read_bios(struct btrfs_raid_bio *rbio,
                                   struct bio_list *bio_list)
  {
@@ -1968,60 +1840,6 @@ out:
         return ret;
  }
  
-/*
- * all parity reconstruction happens here.  We've read in everything
- * we can find from the drives and this does the heavy lifting of
- * sorting the good from the bad.
- */
-static void __raid_recover_end_io(struct btrfs_raid_bio *rbio)
-{
-       int ret;
-
-       ret = recover_sectors(rbio);
-
-       /*
-        * Similar to READ_REBUILD, REBUILD_MISSING at this point also has a
-        * valid rbio which is consistent with ondisk content, thus such a
-        * valid rbio can be cached to avoid further disk reads.
-        */
-       if (rbio->operation == BTRFS_RBIO_READ_REBUILD ||
-           rbio->operation == BTRFS_RBIO_REBUILD_MISSING) {
-               /*
-                * - In case of two failures, where rbio->failb != -1:
-                *
-                *   Do not cache this rbio since the above read reconstruction
-                *   (raid6_datap_recov() or raid6_2data_recov()) may have
-                *   changed some content of stripes which are not identical to
-                *   on-disk content any more, otherwise, a later write/recover
-                *   may steal stripe_pages from this rbio and end up with
-                *   corruptions or rebuild failures.
-                *
-                * - In case of single failure, where rbio->failb == -1:
-                *
-                *   Cache this rbio iff the above read reconstruction is
-                *   executed without problems.
-                */
-               if (!ret && rbio->failb < 0)
-                       cache_rbio_pages(rbio);
-               else
-                       clear_bit(RBIO_CACHE_READY_BIT, &rbio->flags);
-
-               rbio_orig_end_io(rbio, errno_to_blk_status(ret));
-       } else if (!ret) {
-               rbio->faila = -1;
-               rbio->failb = -1;
-
-               if (rbio->operation == BTRFS_RBIO_WRITE)
-                       finish_rmw(rbio);
-               else if (rbio->operation == BTRFS_RBIO_PARITY_SCRUB)
-                       finish_parity_scrub(rbio, 0);
-               else
-                       BUG();
-       } else {
-               rbio_orig_end_io(rbio, errno_to_blk_status(ret));
-       }
-}
-
  static int recover_assemble_read_bios(struct btrfs_raid_bio *rbio,
                                       struct bio_list *bio_list)
  {
@@ -2449,8 +2267,7 @@ static int alloc_rbio_essential_pages(struct btrfs_raid_bio *rbio)
         return 0;
  }
  
-static noinline void finish_parity_scrub(struct btrfs_raid_bio *rbio,
-                                        int need_check)
+static int finish_parity_scrub(struct btrfs_raid_bio *rbio, int need_check)
  {
         struct btrfs_io_context *bioc = rbio->bioc;
         const u32 sectorsize = bioc->fs_info->sectorsize;
@@ -2493,7 +2310,7 @@ static noinline void finish_parity_scrub(struct btrfs_raid_bio *rbio,
  
         p_sector.page = alloc_page(GFP_NOFS);
         if (!p_sector.page)
-               goto cleanup;
+               return -ENOMEM;
         p_sector.pgoff = 0;
         p_sector.uptodate = 1;
  
@@ -2503,7 +2320,7 @@ static noinline void finish_parity_scrub(struct btrfs_raid_bio *rbio,
                 if (!q_sector.page) {
                         __free_page(p_sector.page);
                         p_sector.page = NULL;
-                       goto cleanup;
+                       return -ENOMEM;
                 }
                 q_sector.pgoff = 0;
                 q_sector.uptodate = 1;
@@ -2590,33 +2407,13 @@ writeback:
         }
  
  submit_write:
-       nr_data = bio_list_size(&bio_list);
-       if (!nr_data) {
-               /* Every parity is right */
-               rbio_orig_end_io(rbio, BLK_STS_OK);
-               return;
-       }
-
-       atomic_set(&rbio->stripes_pending, nr_data);
-
-       while ((bio = bio_list_pop(&bio_list))) {
-               bio->bi_end_io = raid_write_end_io;
-
-               if (trace_raid56_scrub_write_stripe_enabled()) {
-                       struct raid56_bio_trace_info trace_info = { 0 };
-
-                       bio_get_trace_info(rbio, bio, &trace_info);
-                       trace_raid56_scrub_write_stripe(rbio, bio, &trace_info);
-               }
-               submit_bio(bio);
-       }
-       return;
+       submit_write_bios(rbio, &bio_list);
+       return 0;
  
  cleanup:
-       rbio_orig_end_io(rbio, BLK_STS_IOERR);
-
         while ((bio = bio_list_pop(&bio_list)))
                 bio_put(bio);
+       return ret;
  }
  
  static inline int is_data_stripe(struct btrfs_raid_bio *rbio, int stripe)
@@ -2626,85 +2423,51 @@ static inline int is_data_stripe(struct btrfs_raid_bio *rbio, int stripe)
         return 0;
  }
  
-/*
- * While we're doing the parity check and repair, we could have errors
- * in reading pages off the disk.  This checks for errors and if we're
- * not able to read the page it'll trigger parity reconstruction.  The
- * parity scrub will be finished after we've reconstructed the failed
- * stripes
- */
-static void validate_rbio_for_parity_scrub(struct btrfs_raid_bio *rbio)
+static int recover_scrub_rbio(struct btrfs_raid_bio *rbio)
  {
-       if (atomic_read(&rbio->error) > rbio->bioc->max_errors)
-               goto cleanup;
-
-       if (rbio->faila >= 0 || rbio->failb >= 0) {
-               int dfail = 0, failp = -1;
-
-               if (is_data_stripe(rbio, rbio->faila))
-                       dfail++;
-               else if (is_parity_stripe(rbio->faila))
-                       failp = rbio->faila;
-
-               if (is_data_stripe(rbio, rbio->failb))
-                       dfail++;
-               else if (is_parity_stripe(rbio->failb))
-                       failp = rbio->failb;
-
-               /*
-                * Because we can not use a scrubbing parity to repair
-                * the data, so the capability of the repair is declined.
-                * (In the case of RAID5, we can not repair anything)
-                */
-               if (dfail > rbio->bioc->max_errors - 1)
-                       goto cleanup;
+       int dfail = 0, failp = -1;
+       int ret;
  
-               /*
-                * If all data is good, only parity is correctly, just
-                * repair the parity.
-                */
-               if (dfail == 0) {
-                       finish_parity_scrub(rbio, 0);
-                       return;
-               }
+       /* No error case should be already handled by the caller. */
+       ASSERT(rbio->faila >= 0 || rbio->failb >= 0);
  
-               /*
-                * Here means we got one corrupted data stripe and one
-                * corrupted parity on RAID6, if the corrupted parity
-                * is scrubbing parity, luckily, use the other one to repair
-                * the data, or we can not repair the data stripe.
-                */
-               if (failp != rbio->scrubp)
-                       goto cleanup;
+       if (is_data_stripe(rbio, rbio->faila))
+               dfail++;
+       else if (is_parity_stripe(rbio->faila))
+               failp = rbio->faila;
  
-               __raid_recover_end_io(rbio);
-       } else {
-               finish_parity_scrub(rbio, 1);
-       }
-       return;
+       if (is_data_stripe(rbio, rbio->failb))
+               dfail++;
+       else if (is_parity_stripe(rbio->failb))
+               failp = rbio->failb;
  
-cleanup:
-       rbio_orig_end_io(rbio, BLK_STS_IOERR);
-}
+       /*
+        * Because we can not use a scrubbing parity to repair
+        * the data, so the capability of the repair is declined.
+        * (In the case of RAID5, we can not repair anything)
+        */
+       if (dfail > rbio->bioc->max_errors - 1)
+               return -EIO;
  
-/*
- * end io for the read phase of the rmw cycle.  All the bios here are physical
- * stripe bios we've read from the disk so we can recalculate the parity of the
- * stripe.
- *
- * This will usually kick off finish_rmw once all the bios are read in, but it
- * may trigger parity reconstruction if we had any errors along the way
- */
-static void raid56_parity_scrub_end_io_work(struct work_struct *work)
-{
-       struct btrfs_raid_bio *rbio =
-               container_of(work, struct btrfs_raid_bio, end_io_work);
+       /*
+        * If all data is good, only parity is correctly, just
+        * repair the parity.
+        */
+       if (dfail == 0)
+               return 0;
  
         /*
-        * This will normally call finish_rmw to start our write, but if there
-        * are any failed stripes we'll reconstruct from parity first
+        * Here means we got one corrupted data stripe and one
+        * corrupted parity on RAID6, if the corrupted parity
+        * is scrubbing parity, luckily, use the other one to repair
+        * the data, or we can not repair the data stripe.
          */
-       validate_rbio_for_parity_scrub(rbio);
+       if (failp != rbio->scrubp)
+               return -EIO;
+
+       /* We have some corrupted sectors, need to repair them. */
+       ret = recover_sectors(rbio);
+       return ret;
  }
  
  static int scrub_assemble_read_bios(struct btrfs_raid_bio *rbio,
@@ -2756,9 +2519,9 @@ error:
         return ret;
  }
  
-static void raid56_parity_scrub_stripe(struct btrfs_raid_bio *rbio)
+static int scrub_rbio(struct btrfs_raid_bio *rbio)
  {
-       int bios_to_read = 0;
+       bool need_check = false;
         struct bio_list bio_list;
         int ret;
         struct bio *bio;
@@ -2774,61 +2537,59 @@ static void raid56_parity_scrub_stripe(struct btrfs_raid_bio *rbio)
         if (ret < 0)
                 goto cleanup;
  
-       bios_to_read = bio_list_size(&bio_list);
-       if (!bios_to_read) {
-               /*
-                * this can happen if others have merged with
-                * us, it means there is nothing left to read.
-                * But if there are missing devices it may not be
-                * safe to do the full stripe write yet.
-                */
-               goto finish;
-       }
+       submit_read_bios(rbio, &bio_list);
+       wait_event(rbio->io_wait, atomic_read(&rbio->stripes_pending) == 0);
  
+       if (atomic_read(&rbio->error) > rbio->bioc->max_errors) {
+               ret = -EIO;
+               goto cleanup;
+       }
         /*
-        * The bioc may be freed once we submit the last bio. Make sure not to
-        * touch it after that.
+        * No error during read, can finish the scrub and need to verify the
+        * P/Q sectors;
          */
-       atomic_set(&rbio->stripes_pending, bios_to_read);
-       INIT_WORK(&rbio->end_io_work, raid56_parity_scrub_end_io_work);
-       while ((bio = bio_list_pop(&bio_list))) {
-               bio->bi_end_io = raid56_bio_end_io;
+       if (atomic_read(&rbio->error) == 0) {
+               need_check = true;
+               goto finish;
+       }
  
-               if (trace_raid56_scrub_read_enabled()) {
-                       struct raid56_bio_trace_info trace_info = { 0 };
+       /* We have some failures, need to recover the failed sectors first. */
+       ret = recover_scrub_rbio(rbio);
+       if (ret < 0)
+               goto cleanup;
  
-                       bio_get_trace_info(rbio, bio, &trace_info);
-                       trace_raid56_scrub_read(rbio, bio, &trace_info);
-               }
-               submit_bio(bio);
-       }
-       /* the actual write will happen once the reads are done */
-       return;
+finish:
+       /*
+        * We have every sector properly prepared. Can finish the scrub
+        * and writeback the good content.
+        */
+       ret = finish_parity_scrub(rbio, need_check);
+       wait_event(rbio->io_wait, atomic_read(&rbio->stripes_pending) == 0);
+       if (atomic_read(&rbio->error) > rbio->bioc->max_errors)
+               ret = -EIO;
+       return ret;
  
  cleanup:
-       rbio_orig_end_io(rbio, BLK_STS_IOERR);
-
         while ((bio = bio_list_pop(&bio_list)))
                 bio_put(bio);
  
-       return;
-
-finish:
-       validate_rbio_for_parity_scrub(rbio);
+       return ret;
  }
  
-static void scrub_parity_work(struct work_struct *work)
+static void scrub_rbio_work_locked(struct work_struct *work)
  {
         struct btrfs_raid_bio *rbio;
+       int ret;
  
         rbio = container_of(work, struct btrfs_raid_bio, work);
-       raid56_parity_scrub_stripe(rbio);
+       ret = scrub_rbio(rbio);
+       rbio_orig_end_io(rbio, errno_to_blk_status(ret));
  }
  
  void raid56_parity_submit_scrub_rbio(struct btrfs_raid_bio *rbio)
  {
         if (!lock_stripe_add(rbio))
-               start_async_work(rbio, scrub_parity_work);
+               start_async_work(rbio, scrub_rbio_work_locked);
  }
  
  /* The following code is used for dev replace of a missing RAID 5/6 device. */
author	Qu Wenruo <wqu@suse.com>
	Tue, 1 Nov 2022 11:16:11 +0000 (19:16 +0800)
committer	David Sterba <dsterba@suse.com>
	Mon, 5 Dec 2022 17:00:49 +0000 (18:00 +0100)