btrfs: scrub: Require mandatory block group RO for dev-replace

author Qu Wenruo <wqu@suse.com>

Thu, 23 Jan 2020 23:58:20 +0000 (07:58 +0800)

committer David Sterba <dsterba@suse.com>

Fri, 24 Jan 2020 13:35:56 +0000 (14:35 +0100)
author Qu Wenruo <wqu@suse.com>
Thu, 23 Jan 2020 23:58:20 +0000 (07:58 +0800)
committer David Sterba <dsterba@suse.com>
Fri, 24 Jan 2020 13:35:56 +0000 (14:35 +0100)
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c

index 21de630..fd266a2 100644 (file)
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -3577,17 +3577,27 @@ int scrub_enumerate_chunks(struct scrub_ctx *sctx,
                  * This can easily boost the amount of SYSTEM chunks if cleaner
                  * thread can't be triggered fast enough, and use up all space
                  * of btrfs_super_block::sys_chunk_array
+                *
+                * While for dev replace, we need to try our best to mark block
+                * group RO, to prevent race between:
+                * - Write duplication
+                *   Contains latest data
+                * - Scrub copy
+                *   Contains data from commit tree
+                *
+                * If target block group is not marked RO, nocow writes can
+                * be overwritten by scrub copy, causing data corruption.
+                * So for dev-replace, it's not allowed to continue if a block
+                * group is not RO.
                  */
-               ret = btrfs_inc_block_group_ro(cache, false);
-               scrub_pause_off(fs_info);
-
+               ret = btrfs_inc_block_group_ro(cache, sctx->is_dev_replace);
                 if (ret == 0) {
                         ro_set = 1;
-               } else if (ret == -ENOSPC) {
+               } else if (ret == -ENOSPC && !sctx->is_dev_replace) {
                         /*
                          * btrfs_inc_block_group_ro return -ENOSPC when it
                          * failed in creating new chunk for metadata.
-                        * It is not a problem for scrub/replace, because
+                        * It is not a problem for scrub, because
                          * metadata are always cowed, and our scrub paused
                          * commit_transactions.
                          */
@@ -3596,9 +3606,22 @@ int scrub_enumerate_chunks(struct scrub_ctx *sctx,
                         btrfs_warn(fs_info,
                                    "failed setting block group ro: %d", ret);
                         btrfs_put_block_group(cache);
+                       scrub_pause_off(fs_info);
                         break;
                 }
  
+               /*
+                * Now the target block is marked RO, wait for nocow writes to
+                * finish before dev-replace.
+                * COW is fine, as COW never overwrites extents in commit tree.
+                */
+               if (sctx->is_dev_replace) {
+                       btrfs_wait_nocow_writers(cache);
+                       btrfs_wait_ordered_roots(fs_info, U64_MAX, cache->start,
+                                       cache->length);
+               }
+
+               scrub_pause_off(fs_info);
                 down_write(&dev_replace->rwsem);
                 dev_replace->cursor_right = found_key.offset + length;
                 dev_replace->cursor_left = found_key.offset;
author	Qu Wenruo <wqu@suse.com>
	Thu, 23 Jan 2020 23:58:20 +0000 (07:58 +0800)
committer	David Sterba <dsterba@suse.com>
	Fri, 24 Jan 2020 13:35:56 +0000 (14:35 +0100)