ext4: use pre-allocated es in __es_remove_extent()
authorBaokun Li <libaokun1@huawei.com>
Mon, 24 Apr 2023 03:38:39 +0000 (11:38 +0800)
committerTheodore Ts'o <tytso@mit.edu>
Mon, 26 Jun 2023 23:35:12 +0000 (19:35 -0400)
When splitting extent, if the second extent can not be dropped, we return
-ENOMEM and use GFP_NOFAIL to preallocate an extent_status outside of
i_es_lock and pass it to __es_remove_extent() to be used as the second
extent. This ensures that __es_remove_extent() is executed successfully,
thus ensuring consistency in the extent status tree. If the second extent
is not undroppable, we simply drop it and return 0. Then retry is no longer
necessary, remove it.

Now, __es_remove_extent() will always remove what it should, maybe more.

Suggested-by: Jan Kara <jack@suse.cz>
Signed-off-by: Baokun Li <libaokun1@huawei.com>
Reviewed-by: Jan Kara <jack@suse.cz>
Link: https://lore.kernel.org/r/20230424033846.4732-6-libaokun1@huawei.com
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
fs/ext4/extents_status.c

index 2ac1f6e..d28a746 100644 (file)
@@ -147,7 +147,8 @@ static struct kmem_cache *ext4_pending_cachep;
 static int __es_insert_extent(struct inode *inode, struct extent_status *newes,
                              struct extent_status *prealloc);
 static int __es_remove_extent(struct inode *inode, ext4_lblk_t lblk,
-                             ext4_lblk_t end, int *reserved);
+                             ext4_lblk_t end, int *reserved,
+                             struct extent_status *prealloc);
 static int es_reclaim_extents(struct ext4_inode_info *ei, int *nr_to_scan);
 static int __es_shrink(struct ext4_sb_info *sbi, int nr_to_scan,
                       struct ext4_inode_info *locked_ei);
@@ -867,7 +868,7 @@ int ext4_es_insert_extent(struct inode *inode, ext4_lblk_t lblk,
        ext4_es_insert_extent_check(inode, &newes);
 
        write_lock(&EXT4_I(inode)->i_es_lock);
-       err = __es_remove_extent(inode, lblk, end, NULL);
+       err = __es_remove_extent(inode, lblk, end, NULL, NULL);
        if (err != 0)
                goto error;
 retry:
@@ -1311,6 +1312,7 @@ static unsigned int get_rsvd(struct inode *inode, ext4_lblk_t end,
  * @lblk - first block in range
  * @end - last block in range
  * @reserved - number of cluster reservations released
+ * @prealloc - pre-allocated es to avoid memory allocation failures
  *
  * If @reserved is not NULL and delayed allocation is enabled, counts
  * block/cluster reservations freed by removing range and if bigalloc
@@ -1318,7 +1320,8 @@ static unsigned int get_rsvd(struct inode *inode, ext4_lblk_t end,
  * error code on failure.
  */
 static int __es_remove_extent(struct inode *inode, ext4_lblk_t lblk,
-                             ext4_lblk_t end, int *reserved)
+                             ext4_lblk_t end, int *reserved,
+                             struct extent_status *prealloc)
 {
        struct ext4_es_tree *tree = &EXT4_I(inode)->i_es_tree;
        struct rb_node *node;
@@ -1326,14 +1329,12 @@ static int __es_remove_extent(struct inode *inode, ext4_lblk_t lblk,
        struct extent_status orig_es;
        ext4_lblk_t len1, len2;
        ext4_fsblk_t block;
-       int err;
+       int err = 0;
        bool count_reserved = true;
        struct rsvd_count rc;
 
        if (reserved == NULL || !test_opt(inode->i_sb, DELALLOC))
                count_reserved = false;
-retry:
-       err = 0;
 
        es = __es_tree_search(&tree->root, lblk);
        if (!es)
@@ -1367,14 +1368,13 @@ retry:
                                        orig_es.es_len - len2;
                        ext4_es_store_pblock_status(&newes, block,
                                                    ext4_es_status(&orig_es));
-                       err = __es_insert_extent(inode, &newes, NULL);
+                       err = __es_insert_extent(inode, &newes, prealloc);
                        if (err) {
+                               if (!ext4_es_must_keep(&newes))
+                                       return 0;
+
                                es->es_lblk = orig_es.es_lblk;
                                es->es_len = orig_es.es_len;
-                               if ((err == -ENOMEM) &&
-                                   __es_shrink(EXT4_SB(inode->i_sb),
-                                                       128, EXT4_I(inode)))
-                                       goto retry;
                                goto out;
                        }
                } else {
@@ -1474,7 +1474,7 @@ int ext4_es_remove_extent(struct inode *inode, ext4_lblk_t lblk,
         * is reclaimed.
         */
        write_lock(&EXT4_I(inode)->i_es_lock);
-       err = __es_remove_extent(inode, lblk, end, &reserved);
+       err = __es_remove_extent(inode, lblk, end, &reserved, NULL);
        write_unlock(&EXT4_I(inode)->i_es_lock);
        ext4_es_print_tree(inode);
        ext4_da_release_space(inode, reserved);
@@ -2017,7 +2017,7 @@ int ext4_es_insert_delayed_block(struct inode *inode, ext4_lblk_t lblk,
 
        write_lock(&EXT4_I(inode)->i_es_lock);
 
-       err = __es_remove_extent(inode, lblk, lblk, NULL);
+       err = __es_remove_extent(inode, lblk, lblk, NULL, NULL);
        if (err != 0)
                goto error;
 retry: