btrfs: zoned: introduce dedicated data write path for zoned filesystems
authorNaohiro Aota <naohiro.aota@wdc.com>
Thu, 4 Feb 2021 10:22:07 +0000 (19:22 +0900)
committerDavid Sterba <dsterba@suse.com>
Tue, 9 Feb 2021 01:46:06 +0000 (02:46 +0100)
If more than one IO is issued for one file extent, these IO can be
written to separate regions on a device. Since we cannot map one file
extent to such a separate area on a zoned filesystem, we need to follow
the "one IO == one ordered extent" rule.

The normal buffered, uncompressed and not pre-allocated write path (used
by cow_file_range()) sometimes does not follow this rule. It can write a
part of an ordered extent when specified a region to write e.g., when
its called from fdatasync().

Introduce a dedicated (uncompressed buffered) data write path for zoned
filesystems, that will COW the region and write it at once.

Reviewed-by: Josef Bacik <josef@toxicpanda.com>
Signed-off-by: Naohiro Aota <naohiro.aota@wdc.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
fs/btrfs/inode.c

index dd6fe8afd0e0eae160283ea0b4500583ae4e121a..c4779cde83c629f432380d752cf336b95c51b8c8 100644 (file)
@@ -1394,6 +1394,29 @@ static int cow_file_range_async(struct btrfs_inode *inode,
        return 0;
 }
 
+static noinline int run_delalloc_zoned(struct btrfs_inode *inode,
+                                      struct page *locked_page, u64 start,
+                                      u64 end, int *page_started,
+                                      unsigned long *nr_written)
+{
+       int ret;
+
+       ret = cow_file_range(inode, locked_page, start, end, page_started,
+                            nr_written, 0);
+       if (ret)
+               return ret;
+
+       if (*page_started)
+               return 0;
+
+       __set_page_dirty_nobuffers(locked_page);
+       account_page_redirty(locked_page);
+       extent_write_locked_range(&inode->vfs_inode, start, end, WB_SYNC_ALL);
+       *page_started = 1;
+
+       return 0;
+}
+
 static noinline int csum_exist_in_range(struct btrfs_fs_info *fs_info,
                                        u64 bytenr, u64 num_bytes)
 {
@@ -1871,17 +1894,24 @@ int btrfs_run_delalloc_range(struct btrfs_inode *inode, struct page *locked_page
 {
        int ret;
        int force_cow = need_force_cow(inode, start, end);
+       const bool zoned = btrfs_is_zoned(inode->root->fs_info);
 
        if (inode->flags & BTRFS_INODE_NODATACOW && !force_cow) {
+               ASSERT(!zoned);
                ret = run_delalloc_nocow(inode, locked_page, start, end,
                                         page_started, 1, nr_written);
        } else if (inode->flags & BTRFS_INODE_PREALLOC && !force_cow) {
+               ASSERT(!zoned);
                ret = run_delalloc_nocow(inode, locked_page, start, end,
                                         page_started, 0, nr_written);
        } else if (!inode_can_compress(inode) ||
                   !inode_need_compress(inode, start, end)) {
-               ret = cow_file_range(inode, locked_page, start, end,
-                                    page_started, nr_written, 1);
+               if (zoned)
+                       ret = run_delalloc_zoned(inode, locked_page, start, end,
+                                                page_started, nr_written);
+               else
+                       ret = cow_file_range(inode, locked_page, start, end,
+                                            page_started, nr_written, 1);
        } else {
                set_bit(BTRFS_INODE_HAS_ASYNC_EXTENT, &inode->runtime_flags);
                ret = cow_file_range_async(inode, wbc, locked_page, start, end,