btrfs: atomically insert the new extent in btrfs_split_ordered_extent
authorChristoph Hellwig <hch@lst.de>
Wed, 24 May 2023 15:03:14 +0000 (17:03 +0200)
committerDavid Sterba <dsterba@suse.com>
Mon, 19 Jun 2023 11:59:33 +0000 (13:59 +0200)
Currently there is a small race window in btrfs_split_ordered_extent,
where the reduced old extent can be looked up on the per-inode rbtree
or the per-root list while the newly split out one isn't visible yet.

Fix this by open coding btrfs_alloc_ordered_extent in
btrfs_split_ordered_extent, and holding the tree lock and
root->ordered_extent_lock over the entire tree and extent manipulation.

Note that this introduces new lock ordering because previously
ordered_extent_lock was never held over the tree lock.

Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
fs/btrfs/ordered-data.c

index ea1c2ec..0dc2bd0 100644 (file)
@@ -1135,15 +1135,17 @@ bool btrfs_try_lock_ordered_range(struct btrfs_inode *inode, u64 start, u64 end,
 struct btrfs_ordered_extent *btrfs_split_ordered_extent(
                        struct btrfs_ordered_extent *ordered, u64 len)
 {
-       struct inode *inode = ordered->inode;
-       struct btrfs_ordered_inode_tree *tree = &BTRFS_I(inode)->ordered_tree;
-       struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
+       struct btrfs_inode *inode = BTRFS_I(ordered->inode);
+       struct btrfs_ordered_inode_tree *tree = &inode->ordered_tree;
+       struct btrfs_root *root = inode->root;
+       struct btrfs_fs_info *fs_info = root->fs_info;
        u64 file_offset = ordered->file_offset;
        u64 disk_bytenr = ordered->disk_bytenr;
        unsigned long flags = ordered->flags & BTRFS_ORDERED_TYPE_FLAGS;
+       struct btrfs_ordered_extent *new;
        struct rb_node *node;
 
-       trace_btrfs_ordered_extent_split(BTRFS_I(inode), ordered);
+       trace_btrfs_ordered_extent_split(inode, ordered);
 
        ASSERT(!(flags & (1U << BTRFS_ORDERED_COMPRESSED)));
 
@@ -1163,7 +1165,16 @@ struct btrfs_ordered_extent *btrfs_split_ordered_extent(
        if (WARN_ON_ONCE(!list_empty(&ordered->list)))
                return ERR_PTR(-EINVAL);
 
-       spin_lock_irq(&tree->lock);
+       new = alloc_ordered_extent(inode, file_offset, len, len, disk_bytenr,
+                                  len, 0, flags, ordered->compress_type);
+       if (IS_ERR(new))
+               return new;
+
+       /* One ref for the tree. */
+       refcount_inc(&new->refs);
+
+       spin_lock_irq(&root->ordered_extent_lock);
+       spin_lock(&tree->lock);
        /* Remove from tree once */
        node = &ordered->rb_node;
        rb_erase(node, &tree->tree);
@@ -1182,19 +1193,19 @@ struct btrfs_ordered_extent *btrfs_split_ordered_extent(
        if (node)
                btrfs_panic(fs_info, -EEXIST,
                        "zoned: inconsistency in ordered tree at offset %llu",
-                           ordered->file_offset);
+                       ordered->file_offset);
 
-       spin_unlock_irq(&tree->lock);
-
-       /*
-        * The splitting extent is already counted and will be added again in
-        * btrfs_alloc_ordered_extent(). Subtract len to avoid double counting.
-        */
-       percpu_counter_add_batch(&fs_info->ordered_bytes, -len, fs_info->delalloc_batch);
+       node = tree_insert(&tree->tree, new->file_offset, &new->rb_node);
+       if (node)
+               btrfs_panic(fs_info, -EEXIST,
+                       "zoned: inconsistency in ordered tree at offset %llu",
+                       new->file_offset);
+       spin_unlock(&tree->lock);
 
-       return btrfs_alloc_ordered_extent(BTRFS_I(inode), file_offset, len, len,
-                                         disk_bytenr, len, 0, flags,
-                                         ordered->compress_type);
+       list_add_tail(&new->root_extent_list, &root->ordered_extents);
+       root->nr_ordered_extents++;
+       spin_unlock_irq(&root->ordered_extent_lock);
+       return new;
 }
 
 int __init ordered_data_init(void)