btrfs: zoned: implement active zone tracking
authorNaohiro Aota <naohiro.aota@wdc.com>
Thu, 19 Aug 2021 12:19:17 +0000 (21:19 +0900)
committerDavid Sterba <dsterba@suse.com>
Tue, 26 Oct 2021 17:07:59 +0000 (19:07 +0200)
Add zone_is_active flag to btrfs_block_group. This flag indicates the
underlying zones are all active. Such zone active block groups are tracked
by fs_info->active_bg_list.

btrfs_dev_{set,clear}_active_zone() take responsibility for the underlying
device part. They set/clear the bitmap to indicate zone activeness and
count the number of zones we can activate left.

btrfs_zone_{activate,finish}() take responsibility for the logical part and
the list management. In addition, btrfs_zone_finish() wait for any writes
on it and send REQ_OP_ZONE_FINISH to the zone.

Signed-off-by: Naohiro Aota <naohiro.aota@wdc.com>
Signed-off-by: David Sterba <dsterba@suse.com>
fs/btrfs/block-group.c
fs/btrfs/block-group.h
fs/btrfs/ctree.h
fs/btrfs/disk-io.c
fs/btrfs/free-space-cache.c
fs/btrfs/zoned.c
fs/btrfs/zoned.h

index 4f8f04e..8e7b74f 100644 (file)
@@ -1896,6 +1896,7 @@ static struct btrfs_block_group *btrfs_create_block_group_cache(
        INIT_LIST_HEAD(&cache->discard_list);
        INIT_LIST_HEAD(&cache->dirty_list);
        INIT_LIST_HEAD(&cache->io_list);
+       INIT_LIST_HEAD(&cache->active_bg_list);
        btrfs_init_free_space_ctl(cache, cache->free_space_ctl);
        atomic_set(&cache->frozen, 0);
        mutex_init(&cache->free_space_lock);
@@ -3842,6 +3843,16 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info)
        }
        spin_unlock(&info->unused_bgs_lock);
 
+       spin_lock(&info->zone_active_bgs_lock);
+       while (!list_empty(&info->zone_active_bgs)) {
+               block_group = list_first_entry(&info->zone_active_bgs,
+                                              struct btrfs_block_group,
+                                              active_bg_list);
+               list_del_init(&block_group->active_bg_list);
+               btrfs_put_block_group(block_group);
+       }
+       spin_unlock(&info->zone_active_bgs_lock);
+
        spin_lock(&info->block_group_cache_lock);
        while ((n = rb_last(&info->block_group_cache_tree)) != NULL) {
                block_group = rb_entry(n, struct btrfs_block_group,
index 265db2c..f751b80 100644 (file)
@@ -98,6 +98,7 @@ struct btrfs_block_group {
        unsigned int to_copy:1;
        unsigned int relocating_repair:1;
        unsigned int chunk_item_inserted:1;
+       unsigned int zone_is_active:1;
 
        int disk_cache_state;
 
@@ -205,6 +206,7 @@ struct btrfs_block_group {
        u64 zone_capacity;
        u64 meta_write_pointer;
        struct map_lookup *physical_map;
+       struct list_head active_bg_list;
 };
 
 static inline u64 btrfs_block_group_end(struct btrfs_block_group *block_group)
index 8f0174c..bfba85d 100644 (file)
@@ -1018,6 +1018,9 @@ struct btrfs_fs_info {
        spinlock_t treelog_bg_lock;
        u64 treelog_bg;
 
+       spinlock_t zone_active_bgs_lock;
+       struct list_head zone_active_bgs;
+
 #ifdef CONFIG_BTRFS_FS_REF_VERIFY
        spinlock_t ref_verify_lock;
        struct rb_root block_tree;
index f755d02..41ea50f 100644 (file)
@@ -2884,6 +2884,7 @@ void btrfs_init_fs_info(struct btrfs_fs_info *fs_info)
        spin_lock_init(&fs_info->buffer_lock);
        spin_lock_init(&fs_info->unused_bgs_lock);
        spin_lock_init(&fs_info->treelog_bg_lock);
+       spin_lock_init(&fs_info->zone_active_bgs_lock);
        rwlock_init(&fs_info->tree_mod_log_lock);
        mutex_init(&fs_info->unused_bg_unpin_mutex);
        mutex_init(&fs_info->reclaim_bgs_lock);
@@ -2897,6 +2898,7 @@ void btrfs_init_fs_info(struct btrfs_fs_info *fs_info)
        INIT_LIST_HEAD(&fs_info->tree_mod_seq_list);
        INIT_LIST_HEAD(&fs_info->unused_bgs);
        INIT_LIST_HEAD(&fs_info->reclaim_bgs);
+       INIT_LIST_HEAD(&fs_info->zone_active_bgs);
 #ifdef CONFIG_BTRFS_DEBUG
        INIT_LIST_HEAD(&fs_info->allocated_roots);
        INIT_LIST_HEAD(&fs_info->allocated_ebs);
index 9ce0f9b..0d26819 100644 (file)
@@ -2763,8 +2763,9 @@ void btrfs_dump_free_space(struct btrfs_block_group *block_group,
         * out the free space after the allocation offset.
         */
        if (btrfs_is_zoned(fs_info)) {
-               btrfs_info(fs_info, "free space %llu",
-                          block_group->zone_capacity - block_group->alloc_offset);
+               btrfs_info(fs_info, "free space %llu active %d",
+                          block_group->zone_capacity - block_group->alloc_offset,
+                          block_group->zone_is_active);
                return;
        }
 
index 4c89ac0..614499a 100644 (file)
@@ -989,6 +989,41 @@ u64 btrfs_find_allocatable_zones(struct btrfs_device *device, u64 hole_start,
        return pos;
 }
 
+static bool btrfs_dev_set_active_zone(struct btrfs_device *device, u64 pos)
+{
+       struct btrfs_zoned_device_info *zone_info = device->zone_info;
+       unsigned int zno = (pos >> zone_info->zone_size_shift);
+
+       /* We can use any number of zones */
+       if (zone_info->max_active_zones == 0)
+               return true;
+
+       if (!test_bit(zno, zone_info->active_zones)) {
+               /* Active zone left? */
+               if (atomic_dec_if_positive(&zone_info->active_zones_left) < 0)
+                       return false;
+               if (test_and_set_bit(zno, zone_info->active_zones)) {
+                       /* Someone already set the bit */
+                       atomic_inc(&zone_info->active_zones_left);
+               }
+       }
+
+       return true;
+}
+
+static void btrfs_dev_clear_active_zone(struct btrfs_device *device, u64 pos)
+{
+       struct btrfs_zoned_device_info *zone_info = device->zone_info;
+       unsigned int zno = (pos >> zone_info->zone_size_shift);
+
+       /* We can use any number of zones */
+       if (zone_info->max_active_zones == 0)
+               return;
+
+       if (test_and_clear_bit(zno, zone_info->active_zones))
+               atomic_inc(&zone_info->active_zones_left);
+}
+
 int btrfs_reset_device_zone(struct btrfs_device *device, u64 physical,
                            u64 length, u64 *bytes)
 {
@@ -1004,6 +1039,7 @@ int btrfs_reset_device_zone(struct btrfs_device *device, u64 physical,
        *bytes = length;
        while (length) {
                btrfs_dev_set_zone_empty(device, physical);
+               btrfs_dev_clear_active_zone(device, physical);
                physical += device->zone_info->zone_size;
                length -= device->zone_info->zone_size;
        }
@@ -1656,3 +1692,160 @@ struct btrfs_device *btrfs_zoned_get_device(struct btrfs_fs_info *fs_info,
 
        return device;
 }
+
+/**
+ * Activate block group and underlying device zones
+ *
+ * @block_group: the block group to activate
+ *
+ * Return: true on success, false otherwise
+ */
+bool btrfs_zone_activate(struct btrfs_block_group *block_group)
+{
+       struct btrfs_fs_info *fs_info = block_group->fs_info;
+       struct map_lookup *map;
+       struct btrfs_device *device;
+       u64 physical;
+       bool ret;
+
+       if (!btrfs_is_zoned(block_group->fs_info))
+               return true;
+
+       map = block_group->physical_map;
+       /* Currently support SINGLE profile only */
+       ASSERT(map->num_stripes == 1);
+       device = map->stripes[0].dev;
+       physical = map->stripes[0].physical;
+
+       if (device->zone_info->max_active_zones == 0)
+               return true;
+
+       spin_lock(&block_group->lock);
+
+       if (block_group->zone_is_active) {
+               ret = true;
+               goto out_unlock;
+       }
+
+       /* No space left */
+       if (block_group->alloc_offset == block_group->zone_capacity) {
+               ret = false;
+               goto out_unlock;
+       }
+
+       if (!btrfs_dev_set_active_zone(device, physical)) {
+               /* Cannot activate the zone */
+               ret = false;
+               goto out_unlock;
+       }
+
+       /* Successfully activated all the zones */
+       block_group->zone_is_active = 1;
+
+       spin_unlock(&block_group->lock);
+
+       /* For the active block group list */
+       btrfs_get_block_group(block_group);
+
+       spin_lock(&fs_info->zone_active_bgs_lock);
+       ASSERT(list_empty(&block_group->active_bg_list));
+       list_add_tail(&block_group->active_bg_list, &fs_info->zone_active_bgs);
+       spin_unlock(&fs_info->zone_active_bgs_lock);
+
+       return true;
+
+out_unlock:
+       spin_unlock(&block_group->lock);
+       return ret;
+}
+
+int btrfs_zone_finish(struct btrfs_block_group *block_group)
+{
+       struct btrfs_fs_info *fs_info = block_group->fs_info;
+       struct map_lookup *map;
+       struct btrfs_device *device;
+       u64 physical;
+       int ret = 0;
+
+       if (!btrfs_is_zoned(fs_info))
+               return 0;
+
+       map = block_group->physical_map;
+       /* Currently support SINGLE profile only */
+       ASSERT(map->num_stripes == 1);
+
+       device = map->stripes[0].dev;
+       physical = map->stripes[0].physical;
+
+       if (device->zone_info->max_active_zones == 0)
+               return 0;
+
+       spin_lock(&block_group->lock);
+       if (!block_group->zone_is_active) {
+               spin_unlock(&block_group->lock);
+               return 0;
+       }
+
+       /* Check if we have unwritten allocated space */
+       if ((block_group->flags &
+            (BTRFS_BLOCK_GROUP_METADATA | BTRFS_BLOCK_GROUP_SYSTEM)) &&
+           block_group->alloc_offset > block_group->meta_write_pointer) {
+               spin_unlock(&block_group->lock);
+               return -EAGAIN;
+       }
+       spin_unlock(&block_group->lock);
+
+       ret = btrfs_inc_block_group_ro(block_group, false);
+       if (ret)
+               return ret;
+
+       /* Ensure all writes in this block group finish */
+       btrfs_wait_block_group_reservations(block_group);
+       /* No need to wait for NOCOW writers. Zoned mode does not allow that. */
+       btrfs_wait_ordered_roots(fs_info, U64_MAX, block_group->start,
+                                block_group->length);
+
+       spin_lock(&block_group->lock);
+
+       /*
+        * Bail out if someone already deactivated the block group, or
+        * allocated space is left in the block group.
+        */
+       if (!block_group->zone_is_active) {
+               spin_unlock(&block_group->lock);
+               btrfs_dec_block_group_ro(block_group);
+               return 0;
+       }
+
+       if (block_group->reserved) {
+               spin_unlock(&block_group->lock);
+               btrfs_dec_block_group_ro(block_group);
+               return -EAGAIN;
+       }
+
+       block_group->zone_is_active = 0;
+       block_group->alloc_offset = block_group->zone_capacity;
+       block_group->free_space_ctl->free_space = 0;
+       btrfs_clear_treelog_bg(block_group);
+       spin_unlock(&block_group->lock);
+
+       ret = blkdev_zone_mgmt(device->bdev, REQ_OP_ZONE_FINISH,
+                              physical >> SECTOR_SHIFT,
+                              device->zone_info->zone_size >> SECTOR_SHIFT,
+                              GFP_NOFS);
+       btrfs_dec_block_group_ro(block_group);
+
+       if (!ret) {
+               btrfs_dev_clear_active_zone(device, physical);
+
+               spin_lock(&fs_info->zone_active_bgs_lock);
+               ASSERT(!list_empty(&block_group->active_bg_list));
+               list_del_init(&block_group->active_bg_list);
+               spin_unlock(&fs_info->zone_active_bgs_lock);
+
+               /* For active_bg_list */
+               btrfs_put_block_group(block_group);
+       }
+
+       return ret;
+}
index 4862878..2345ecf 100644 (file)
@@ -69,6 +69,8 @@ int btrfs_sync_zone_write_pointer(struct btrfs_device *tgt_dev, u64 logical,
                                  u64 physical_start, u64 physical_pos);
 struct btrfs_device *btrfs_zoned_get_device(struct btrfs_fs_info *fs_info,
                                            u64 logical, u64 length);
+bool btrfs_zone_activate(struct btrfs_block_group *block_group);
+int btrfs_zone_finish(struct btrfs_block_group *block_group);
 #else /* CONFIG_BLK_DEV_ZONED */
 static inline int btrfs_get_dev_zone(struct btrfs_device *device, u64 pos,
                                     struct blk_zone *zone)
@@ -204,6 +206,16 @@ static inline struct btrfs_device *btrfs_zoned_get_device(
        return ERR_PTR(-EOPNOTSUPP);
 }
 
+static inline bool btrfs_zone_activate(struct btrfs_block_group *block_group)
+{
+       return true;
+}
+
+static inline int btrfs_zone_finish(struct btrfs_block_group *block_group)
+{
+       return 0;
+}
+
 #endif
 
 static inline bool btrfs_dev_is_sequential(struct btrfs_device *device, u64 pos)