btrfs: load free space cache into a temporary ctl

author Josef Bacik <josef@toxicpanda.com>

Fri, 23 Oct 2020 13:58:08 +0000 (09:58 -0400)

committer David Sterba <dsterba@suse.com>

Tue, 8 Dec 2020 14:54:03 +0000 (15:54 +0100)
author Josef Bacik <josef@toxicpanda.com>
Fri, 23 Oct 2020 13:58:08 +0000 (09:58 -0400)
committer David Sterba <dsterba@suse.com>
Tue, 8 Dec 2020 14:54:03 +0000 (15:54 +0100)
diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c

index bb66857..adbd18d 100644 (file)
--- a/fs/btrfs/block-group.c
+++ b/fs/btrfs/block-group.c
@@ -695,33 +695,6 @@ int btrfs_cache_block_group(struct btrfs_block_group *cache, int load_cache_only
         btrfs_init_work(&caching_ctl->work, caching_thread, NULL, NULL);
  
         spin_lock(&cache->lock);
-       /*
-        * This should be a rare occasion, but this could happen I think in the
-        * case where one thread starts to load the space cache info, and then
-        * some other thread starts a transaction commit which tries to do an
-        * allocation while the other thread is still loading the space cache
-        * info.  The previous loop should have kept us from choosing this block
-        * group, but if we've moved to the state where we will wait on caching
-        * block groups we need to first check if we're doing a fast load here,
-        * so we can wait for it to finish, otherwise we could end up allocating
-        * from a block group who's cache gets evicted for one reason or
-        * another.
-        */
-       while (cache->cached == BTRFS_CACHE_FAST) {
-               struct btrfs_caching_control *ctl;
-
-               ctl = cache->caching_ctl;
-               refcount_inc(&ctl->count);
-               prepare_to_wait(&ctl->wait, &wait, TASK_UNINTERRUPTIBLE);
-               spin_unlock(&cache->lock);
-
-               schedule();
-
-               finish_wait(&ctl->wait, &wait);
-               btrfs_put_caching_control(ctl);
-               spin_lock(&cache->lock);
-       }
-
         if (cache->cached != BTRFS_CACHE_NO) {
                 spin_unlock(&cache->lock);
                 kfree(caching_ctl);
@@ -1805,7 +1778,7 @@ static struct btrfs_block_group *btrfs_create_block_group_cache(
         INIT_LIST_HEAD(&cache->discard_list);
         INIT_LIST_HEAD(&cache->dirty_list);
         INIT_LIST_HEAD(&cache->io_list);
-       btrfs_init_free_space_ctl(cache);
+       btrfs_init_free_space_ctl(cache, cache->free_space_ctl);
         atomic_set(&cache->frozen, 0);
         mutex_init(&cache->free_space_lock);
         btrfs_init_full_stripe_locks_tree(&cache->full_stripe_locks_root);
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c

index 0787339..58bd2d3 100644 (file)
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c
@@ -33,8 +33,6 @@ struct btrfs_trim_range {
         struct list_head list;
  };
  
-static int count_bitmap_extents(struct btrfs_free_space_ctl *ctl,
-                               struct btrfs_free_space *bitmap_info);
  static int link_free_space(struct btrfs_free_space_ctl *ctl,
                            struct btrfs_free_space *info);
  static void unlink_free_space(struct btrfs_free_space_ctl *ctl,
@@ -43,6 +41,14 @@ static int btrfs_wait_cache_io_root(struct btrfs_root *root,
                              struct btrfs_trans_handle *trans,
                              struct btrfs_io_ctl *io_ctl,
                              struct btrfs_path *path);
+static int search_bitmap(struct btrfs_free_space_ctl *ctl,
+                        struct btrfs_free_space *bitmap_info, u64 *offset,
+                        u64 *bytes, bool for_alloc);
+static void free_bitmap(struct btrfs_free_space_ctl *ctl,
+                       struct btrfs_free_space *bitmap_info);
+static void bitmap_clear_bits(struct btrfs_free_space_ctl *ctl,
+                             struct btrfs_free_space *info, u64 offset,
+                             u64 bytes);
  
  static struct inode *__lookup_free_space_inode(struct btrfs_root *root,
                                                struct btrfs_path *path,
@@ -625,44 +631,6 @@ static int io_ctl_read_bitmap(struct btrfs_io_ctl *io_ctl,
         return 0;
  }
  
-/*
- * Since we attach pinned extents after the fact we can have contiguous sections
- * of free space that are split up in entries.  This poses a problem with the
- * tree logging stuff since it could have allocated across what appears to be 2
- * entries since we would have merged the entries when adding the pinned extents
- * back to the free space cache.  So run through the space cache that we just
- * loaded and merge contiguous entries.  This will make the log replay stuff not
- * blow up and it will make for nicer allocator behavior.
- */
-static void merge_space_tree(struct btrfs_free_space_ctl *ctl)
-{
-       struct btrfs_free_space *e, *prev = NULL;
-       struct rb_node *n;
-
-again:
-       spin_lock(&ctl->tree_lock);
-       for (n = rb_first(&ctl->free_space_offset); n; n = rb_next(n)) {
-               e = rb_entry(n, struct btrfs_free_space, offset_index);
-               if (!prev)
-                       goto next;
-               if (e->bitmap || prev->bitmap)
-                       goto next;
-               if (prev->offset + prev->bytes == e->offset) {
-                       unlink_free_space(ctl, prev);
-                       unlink_free_space(ctl, e);
-                       prev->bytes += e->bytes;
-                       kmem_cache_free(btrfs_free_space_cachep, e);
-                       link_free_space(ctl, prev);
-                       prev = NULL;
-                       spin_unlock(&ctl->tree_lock);
-                       goto again;
-               }
-next:
-               prev = e;
-       }
-       spin_unlock(&ctl->tree_lock);
-}
-
  static int __load_free_space_cache(struct btrfs_root *root, struct inode *inode,
                                    struct btrfs_free_space_ctl *ctl,
                                    struct btrfs_path *path, u64 offset)
@@ -753,16 +721,6 @@ static int __load_free_space_cache(struct btrfs_root *root, struct inode *inode,
                         goto free_cache;
                 }
  
-               /*
-                * Sync discard ensures that the free space cache is always
-                * trimmed.  So when reading this in, the state should reflect
-                * that.  We also do this for async as a stop gap for lack of
-                * persistence.
-                */
-               if (btrfs_test_opt(fs_info, DISCARD_SYNC) ||
-                   btrfs_test_opt(fs_info, DISCARD_ASYNC))
-                       e->trim_state = BTRFS_TRIM_STATE_TRIMMED;
-
                 if (!e->bytes) {
                         kmem_cache_free(btrfs_free_space_cachep, e);
                         goto free_cache;
@@ -816,16 +774,9 @@ static int __load_free_space_cache(struct btrfs_root *root, struct inode *inode,
                 ret = io_ctl_read_bitmap(&io_ctl, e);
                 if (ret)
                         goto free_cache;
-               e->bitmap_extents = count_bitmap_extents(ctl, e);
-               if (!btrfs_free_space_trimmed(e)) {
-                       ctl->discardable_extents[BTRFS_STAT_CURR] +=
-                               e->bitmap_extents;
-                       ctl->discardable_bytes[BTRFS_STAT_CURR] += e->bytes;
-               }
         }
  
         io_ctl_drop_pages(&io_ctl);
-       merge_space_tree(ctl);
         ret = 1;
  out:
         io_ctl_free(&io_ctl);
@@ -836,10 +787,46 @@ free_cache:
         goto out;
  }
  
+static int copy_free_space_cache(struct btrfs_block_group *block_group,
+                                struct btrfs_free_space_ctl *ctl)
+{
+       struct btrfs_free_space *info;
+       struct rb_node *n;
+       int ret = 0;
+
+       while (!ret && (n = rb_first(&ctl->free_space_offset)) != NULL) {
+               info = rb_entry(n, struct btrfs_free_space, offset_index);
+               if (!info->bitmap) {
+                       unlink_free_space(ctl, info);
+                       ret = btrfs_add_free_space(block_group, info->offset,
+                                                  info->bytes);
+                       kmem_cache_free(btrfs_free_space_cachep, info);
+               } else {
+                       u64 offset = info->offset;
+                       u64 bytes = ctl->unit;
+
+                       while (search_bitmap(ctl, info, &offset, &bytes,
+                                            false) == 0) {
+                               ret = btrfs_add_free_space(block_group, offset,
+                                                          bytes);
+                               if (ret)
+                                       break;
+                               bitmap_clear_bits(ctl, info, offset, bytes);
+                               offset = info->offset;
+                               bytes = ctl->unit;
+                       }
+                       free_bitmap(ctl, info);
+               }
+               cond_resched();
+       }
+       return ret;
+}
+
  int load_free_space_cache(struct btrfs_block_group *block_group)
  {
         struct btrfs_fs_info *fs_info = block_group->fs_info;
         struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
+       struct btrfs_free_space_ctl tmp_ctl = {};
         struct inode *inode;
         struct btrfs_path *path;
         int ret = 0;
@@ -847,6 +834,13 @@ int load_free_space_cache(struct btrfs_block_group *block_group)
         u64 used = block_group->used;
  
         /*
+        * Because we could potentially discard our loaded free space, we want
+        * to load everything into a temporary structure first, and then if it's
+        * valid copy it all into the actual free space ctl.
+        */
+       btrfs_init_free_space_ctl(block_group, &tmp_ctl);
+
+       /*
          * If this block group has been marked to be cleared for one reason or
          * another then we can't trust the on disk cache, so just return.
          */
@@ -897,19 +891,25 @@ int load_free_space_cache(struct btrfs_block_group *block_group)
         }
         spin_unlock(&block_group->lock);
  
-       ret = __load_free_space_cache(fs_info->tree_root, inode, ctl,
+       ret = __load_free_space_cache(fs_info->tree_root, inode, &tmp_ctl,
                                       path, block_group->start);
         btrfs_free_path(path);
         if (ret <= 0)
                 goto out;
  
-       spin_lock(&ctl->tree_lock);
-       matched = (ctl->free_space == (block_group->length - used -
-                                      block_group->bytes_super));
-       spin_unlock(&ctl->tree_lock);
+       matched = (tmp_ctl.free_space == (block_group->length - used -
+                                         block_group->bytes_super));
  
-       if (!matched) {
-               __btrfs_remove_free_space_cache(ctl);
+       if (matched) {
+               ret = copy_free_space_cache(block_group, &tmp_ctl);
+               /*
+                * ret == 1 means we successfully loaded the free space cache,
+                * so we need to re-set it here.
+                */
+               if (ret == 0)
+                       ret = 1;
+       } else {
+               __btrfs_remove_free_space_cache(&tmp_ctl);
                 btrfs_warn(fs_info,
                            "block group %llu has wrong amount of free space",
                            block_group->start);
@@ -1914,29 +1914,6 @@ out:
         return NULL;
  }
  
-static int count_bitmap_extents(struct btrfs_free_space_ctl *ctl,
-                               struct btrfs_free_space *bitmap_info)
-{
-       struct btrfs_block_group *block_group = ctl->private;
-       u64 bytes = bitmap_info->bytes;
-       unsigned int rs, re;
-       int count = 0;
-
-       if (!block_group || !bytes)
-               return count;
-
-       bitmap_for_each_set_region(bitmap_info->bitmap, rs, re, 0,
-                                  BITS_PER_BITMAP) {
-               bytes -= (rs - re) * ctl->unit;
-               count++;
-
-               if (!bytes)
-                       break;
-       }
-
-       return count;
-}
-
  static void add_new_bitmap(struct btrfs_free_space_ctl *ctl,
                            struct btrfs_free_space *info, u64 offset)
  {
@@ -2676,10 +2653,10 @@ void btrfs_dump_free_space(struct btrfs_block_group *block_group,
                    "%d blocks of free space at or bigger than bytes is", count);
  }
  
-void btrfs_init_free_space_ctl(struct btrfs_block_group *block_group)
+void btrfs_init_free_space_ctl(struct btrfs_block_group *block_group,
+                              struct btrfs_free_space_ctl *ctl)
  {
         struct btrfs_fs_info *fs_info = block_group->fs_info;
-       struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
  
         spin_lock_init(&ctl->tree_lock);
         ctl->unit = fs_info->sectorsize;
diff --git a/fs/btrfs/free-space-cache.h b/fs/btrfs/free-space-cache.h

index e3d5e0a..bf8d127 100644 (file)
--- a/fs/btrfs/free-space-cache.h
+++ b/fs/btrfs/free-space-cache.h
@@ -109,7 +109,8 @@ int btrfs_write_out_ino_cache(struct btrfs_root *root,
                               struct btrfs_path *path,
                               struct inode *inode);
  
-void btrfs_init_free_space_ctl(struct btrfs_block_group *block_group);
+void btrfs_init_free_space_ctl(struct btrfs_block_group *block_group,
+                              struct btrfs_free_space_ctl *ctl);
  int __btrfs_add_free_space(struct btrfs_fs_info *fs_info,
                            struct btrfs_free_space_ctl *ctl,
                            u64 bytenr, u64 size,
diff --git a/fs/btrfs/tests/btrfs-tests.c b/fs/btrfs/tests/btrfs-tests.c

index b36e887..8ca334d 100644 (file)
--- a/fs/btrfs/tests/btrfs-tests.c
+++ b/fs/btrfs/tests/btrfs-tests.c
@@ -225,7 +225,7 @@ btrfs_alloc_dummy_block_group(struct btrfs_fs_info *fs_info,
         INIT_LIST_HEAD(&cache->list);
         INIT_LIST_HEAD(&cache->cluster_list);
         INIT_LIST_HEAD(&cache->bg_list);
-       btrfs_init_free_space_ctl(cache);
+       btrfs_init_free_space_ctl(cache, cache->free_space_ctl);
         mutex_init(&cache->free_space_lock);
  
         return cache;
author	Josef Bacik <josef@toxicpanda.com>
	Fri, 23 Oct 2020 13:58:08 +0000 (09:58 -0400)
committer	David Sterba <dsterba@suse.com>
	Tue, 8 Dec 2020 14:54:03 +0000 (15:54 +0100)
fs/btrfs/block-group.c		patch \| blob \| history
fs/btrfs/free-space-cache.c		patch \| blob \| history
fs/btrfs/free-space-cache.h		patch \| blob \| history
fs/btrfs/tests/btrfs-tests.c		patch \| blob \| history