btrfs: consolidate device_list_mutex in prepare_sprout to its parent
authorAnand Jain <anand.jain@oracle.com>
Tue, 9 Nov 2021 09:51:58 +0000 (17:51 +0800)
committerDavid Sterba <dsterba@suse.com>
Mon, 3 Jan 2022 14:09:44 +0000 (15:09 +0100)
btrfs_prepare_sprout() splices seed devices into its own struct fs_devices,
so that its parent function btrfs_init_new_device() can add the new sprout
device to fs_info->fs_devices.

Both btrfs_prepare_sprout() and btrfs_init_new_device() need
device_list_mutex. But they are holding it separately, thus create a
small race window. Close it and hold device_list_mutex across both
functions btrfs_init_new_device() and btrfs_prepare_sprout().

Split btrfs_prepare_sprout() into btrfs_init_sprout() and
btrfs_setup_sprout(). This split is essential because device_list_mutex
must not be held for allocations in btrfs_init_sprout() but must be held
for btrfs_setup_sprout(). So now a common device_list_mutex can be used
between btrfs_init_new_device() and btrfs_setup_sprout().

Reviewed-by: Josef Bacik <josef@toxicpanda.com>
Signed-off-by: Anand Jain <anand.jain@oracle.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
fs/btrfs/volumes.c

index 836e1b661fc92249850e48b2580f0dafc4db1cb2..53753e04af144c3b483a681d2e03f5fb5dbbbc31 100644 (file)
@@ -2431,21 +2431,15 @@ struct btrfs_device *btrfs_find_device_by_devspec(
        return device;
 }
 
-/*
- * does all the dirty work required for changing file system's UUID.
- */
-static int btrfs_prepare_sprout(struct btrfs_fs_info *fs_info)
+static struct btrfs_fs_devices *btrfs_init_sprout(struct btrfs_fs_info *fs_info)
 {
        struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
        struct btrfs_fs_devices *old_devices;
        struct btrfs_fs_devices *seed_devices;
-       struct btrfs_super_block *disk_super = fs_info->super_copy;
-       struct btrfs_device *device;
-       u64 super_flags;
 
        lockdep_assert_held(&uuid_mutex);
        if (!fs_devices->seeding)
-               return -EINVAL;
+               return ERR_PTR(-EINVAL);
 
        /*
         * Private copy of the seed devices, anchored at
@@ -2453,7 +2447,7 @@ static int btrfs_prepare_sprout(struct btrfs_fs_info *fs_info)
         */
        seed_devices = alloc_fs_devices(NULL, NULL);
        if (IS_ERR(seed_devices))
-               return PTR_ERR(seed_devices);
+               return seed_devices;
 
        /*
         * It's necessary to retain a copy of the original seed fs_devices in
@@ -2464,7 +2458,7 @@ static int btrfs_prepare_sprout(struct btrfs_fs_info *fs_info)
        old_devices = clone_fs_devices(fs_devices);
        if (IS_ERR(old_devices)) {
                kfree(seed_devices);
-               return PTR_ERR(old_devices);
+               return old_devices;
        }
 
        list_add(&old_devices->fs_list, &fs_uuids);
@@ -2475,7 +2469,41 @@ static int btrfs_prepare_sprout(struct btrfs_fs_info *fs_info)
        INIT_LIST_HEAD(&seed_devices->alloc_list);
        mutex_init(&seed_devices->device_list_mutex);
 
-       mutex_lock(&fs_devices->device_list_mutex);
+       return seed_devices;
+}
+
+/*
+ * Splice seed devices into the sprout fs_devices.
+ * Generate a new fsid for the sprouted read-write filesystem.
+ */
+static void btrfs_setup_sprout(struct btrfs_fs_info *fs_info,
+                              struct btrfs_fs_devices *seed_devices)
+{
+       struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
+       struct btrfs_super_block *disk_super = fs_info->super_copy;
+       struct btrfs_device *device;
+       u64 super_flags;
+
+       /*
+        * We are updating the fsid, the thread leading to device_list_add()
+        * could race, so uuid_mutex is needed.
+        */
+       lockdep_assert_held(&uuid_mutex);
+
+       /*
+        * The threads listed below may traverse dev_list but can do that without
+        * device_list_mutex:
+        * - All device ops and balance - as we are in btrfs_exclop_start.
+        * - Various dev_list readers - are using RCU.
+        * - btrfs_ioctl_fitrim() - is using RCU.
+        *
+        * For-read threads as below are using device_list_mutex:
+        * - Readonly scrub btrfs_scrub_dev()
+        * - Readonly scrub btrfs_scrub_progress()
+        * - btrfs_get_dev_stats()
+        */
+       lockdep_assert_held(&fs_devices->device_list_mutex);
+
        list_splice_init_rcu(&fs_devices->devices, &seed_devices->devices,
                              synchronize_rcu);
        list_for_each_entry(device, &seed_devices->devices, dev_list)
@@ -2491,13 +2519,10 @@ static int btrfs_prepare_sprout(struct btrfs_fs_info *fs_info)
        generate_random_uuid(fs_devices->fsid);
        memcpy(fs_devices->metadata_uuid, fs_devices->fsid, BTRFS_FSID_SIZE);
        memcpy(disk_super->fsid, fs_devices->fsid, BTRFS_FSID_SIZE);
-       mutex_unlock(&fs_devices->device_list_mutex);
 
        super_flags = btrfs_super_flags(disk_super) &
                      ~BTRFS_SUPER_FLAG_SEEDING;
        btrfs_set_super_flags(disk_super, super_flags);
-
-       return 0;
 }
 
 /*
@@ -2588,6 +2613,7 @@ int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path
        struct super_block *sb = fs_info->sb;
        struct rcu_string *name;
        struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
+       struct btrfs_fs_devices *seed_devices;
        u64 orig_super_total_bytes;
        u64 orig_super_num_devices;
        int ret = 0;
@@ -2671,18 +2697,25 @@ int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path
 
        if (seeding_dev) {
                btrfs_clear_sb_rdonly(sb);
-               ret = btrfs_prepare_sprout(fs_info);
-               if (ret) {
+
+               /* GFP_KERNEL allocation must not be under device_list_mutex */
+               seed_devices = btrfs_init_sprout(fs_info);
+               if (IS_ERR(seed_devices)) {
+                       ret = PTR_ERR(seed_devices);
                        btrfs_abort_transaction(trans, ret);
                        goto error_trans;
                }
+       }
+
+       mutex_lock(&fs_devices->device_list_mutex);
+       if (seeding_dev) {
+               btrfs_setup_sprout(fs_info, seed_devices);
                btrfs_assign_next_active_device(fs_info->fs_devices->latest_dev,
                                                device);
        }
 
        device->fs_devices = fs_devices;
 
-       mutex_lock(&fs_devices->device_list_mutex);
        mutex_lock(&fs_info->chunk_mutex);
        list_add_rcu(&device->dev_list, &fs_devices->devices);
        list_add(&device->dev_alloc_list, &fs_devices->alloc_list);
@@ -2744,7 +2777,7 @@ int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path
 
                /*
                 * fs_devices now represents the newly sprouted filesystem and
-                * its fsid has been changed by btrfs_prepare_sprout
+                * its fsid has been changed by btrfs_sprout_splice().
                 */
                btrfs_sysfs_update_sprout_fsid(fs_devices);
        }