btrfs: handle error in btrfs_cache_block_group
authorJosef Bacik <josef@toxicpanda.com>
Tue, 19 Nov 2019 18:59:00 +0000 (13:59 -0500)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Sun, 12 Jan 2020 11:21:30 +0000 (12:21 +0100)
[ Upstream commit db8fe64f9ce61d1d89d3c3c34d111a43afb9f053 ]

We have a BUG_ON(ret < 0) in find_free_extent from
btrfs_cache_block_group.  If we fail to allocate our ctl we'll just
panic, which is not good.  Instead just go on to another block group.
If we fail to find a block group we don't want to return ENOSPC, because
really we got a ENOMEM and that's the root of the problem.  Save our
return from btrfs_cache_block_group(), and then if we still fail to make
our allocation return that ret so we get the right error back.

Tested with inject-error.py from bcc.

Reviewed-by: Johannes Thumshirn <jthumshirn@suse.de>
Signed-off-by: Josef Bacik <josef@toxicpanda.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
Signed-off-by: Sasha Levin <sashal@kernel.org>
fs/btrfs/extent-tree.c

index eb95ed78a18eb24a7353816ac542d2ce4f9f1801..dc50605ecbdafaab0d20ba1b1993b01e16537193 100644 (file)
@@ -3781,6 +3781,7 @@ static noinline int find_free_extent(struct btrfs_fs_info *fs_info,
                                u64 flags, int delalloc)
 {
        int ret = 0;
+       int cache_block_group_error = 0;
        struct btrfs_free_cluster *last_ptr = NULL;
        struct btrfs_block_group_cache *block_group = NULL;
        struct find_free_extent_ctl ffe_ctl = {0};
@@ -3940,7 +3941,20 @@ have_block_group:
                if (unlikely(!ffe_ctl.cached)) {
                        ffe_ctl.have_caching_bg = true;
                        ret = btrfs_cache_block_group(block_group, 0);
-                       BUG_ON(ret < 0);
+
+                       /*
+                        * If we get ENOMEM here or something else we want to
+                        * try other block groups, because it may not be fatal.
+                        * However if we can't find anything else we need to
+                        * save our return here so that we return the actual
+                        * error that caused problems, not ENOSPC.
+                        */
+                       if (ret < 0) {
+                               if (!cache_block_group_error)
+                                       cache_block_group_error = ret;
+                               ret = 0;
+                               goto loop;
+                       }
                        ret = 0;
                }
 
@@ -4027,7 +4041,7 @@ loop:
        if (ret > 0)
                goto search;
 
-       if (ret == -ENOSPC) {
+       if (ret == -ENOSPC && !cache_block_group_error) {
                /*
                 * Use ffe_ctl->total_free_space as fallback if we can't find
                 * any contiguous hole.
@@ -4038,6 +4052,8 @@ loop:
                space_info->max_extent_size = ffe_ctl.max_extent_size;
                spin_unlock(&space_info->lock);
                ins->offset = ffe_ctl.max_extent_size;
+       } else if (ret == -ENOSPC) {
+               ret = cache_block_group_error;
        }
        return ret;
 }