btrfs: rework the order of btrfs_ordered_extent::flags
authorQu Wenruo <wqu@suse.com>
Thu, 21 Jan 2021 06:13:54 +0000 (14:13 +0800)
committerDavid Sterba <dsterba@suse.com>
Mon, 8 Feb 2021 21:58:52 +0000 (22:58 +0100)
[BUG]
There is a long existing bug in the last parameter of
btrfs_add_ordered_extent(), in commit 771ed689d2cd ("Btrfs: Optimize
compressed writeback and reads") back to 2008.

In that ancient commit btrfs_add_ordered_extent() expects the @type
parameter to be one of the following:

- BTRFS_ORDERED_REGULAR
- BTRFS_ORDERED_NOCOW
- BTRFS_ORDERED_PREALLOC
- BTRFS_ORDERED_COMPRESSED

But we pass 0 in cow_file_range(), which means BTRFS_ORDERED_IO_DONE.

Ironically extra check in __btrfs_add_ordered_extent() won't set the bit
if we see (type == IO_DONE || type == IO_COMPLETE), and avoid any
obvious bug.

But this still leads to regular COW ordered extent having no bit to
indicate its type in various trace events, rendering REGULAR bit
useless.

[FIX]
Change the following aspects to avoid such problem:

- Reorder btrfs_ordered_extent::flags
  Now the type bits go first (REGULAR/NOCOW/PREALLCO/COMPRESSED), then
  DIRECT bit, finally extra status bits like IO_DONE/COMPLETE/IOERR.

- Add extra ASSERT() for btrfs_add_ordered_extent_*()

- Remove @type parameter for btrfs_add_ordered_extent_compress()
  As the only valid @type here is BTRFS_ORDERED_COMPRESSED.

- Remove the unnecessary special check for IO_DONE/COMPLETE in
  __btrfs_add_ordered_extent()
  This is just to make the code work, with extra ASSERT(), there are
  limited values can be passed in.

Reviewed-by: Nikolay Borisov <nborisov@suse.com>
Reviewed-by: Filipe Manana <fdmanana@suse.com>
Signed-off-by: Qu Wenruo <wqu@suse.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
fs/btrfs/inode.c
fs/btrfs/ordered-data.c
fs/btrfs/ordered-data.h
include/trace/events/btrfs.h

index ef6cb7b620d005d5ef0a0befa5b56742d9819a05..ea9056cc5559c5f8a1307afff9856daca4a06f30 100644 (file)
@@ -917,7 +917,6 @@ retry:
                                                ins.objectid,
                                                async_extent->ram_size,
                                                ins.offset,
                                                ins.objectid,
                                                async_extent->ram_size,
                                                ins.offset,
-                                               BTRFS_ORDERED_COMPRESSED,
                                                async_extent->compress_type);
                if (ret) {
                        btrfs_drop_extent_cache(inode, async_extent->start,
                                                async_extent->compress_type);
                if (ret) {
                        btrfs_drop_extent_cache(inode, async_extent->start,
@@ -1127,7 +1126,8 @@ static noinline int cow_file_range(struct btrfs_inode *inode,
                free_extent_map(em);
 
                ret = btrfs_add_ordered_extent(inode, start, ins.objectid,
                free_extent_map(em);
 
                ret = btrfs_add_ordered_extent(inode, start, ins.objectid,
-                                              ram_size, cur_alloc_size, 0);
+                                              ram_size, cur_alloc_size,
+                                              BTRFS_ORDERED_REGULAR);
                if (ret)
                        goto out_drop_extent_cache;
 
                if (ret)
                        goto out_drop_extent_cache;
 
index d5d326c674b1a77e3894eb944677307b04879ad7..b4e6500548a2ef84d39eac4d78f803397b7d6a44 100644 (file)
@@ -199,8 +199,12 @@ static int __btrfs_add_ordered_extent(struct btrfs_inode *inode, u64 file_offset
        entry->compress_type = compress_type;
        entry->truncated_len = (u64)-1;
        entry->qgroup_rsv = ret;
        entry->compress_type = compress_type;
        entry->truncated_len = (u64)-1;
        entry->qgroup_rsv = ret;
-       if (type != BTRFS_ORDERED_IO_DONE && type != BTRFS_ORDERED_COMPLETE)
-               set_bit(type, &entry->flags);
+
+       ASSERT(type == BTRFS_ORDERED_REGULAR ||
+              type == BTRFS_ORDERED_NOCOW ||
+              type == BTRFS_ORDERED_PREALLOC ||
+              type == BTRFS_ORDERED_COMPRESSED);
+       set_bit(type, &entry->flags);
 
        if (dio) {
                percpu_counter_add_batch(&fs_info->dio_bytes, num_bytes,
 
        if (dio) {
                percpu_counter_add_batch(&fs_info->dio_bytes, num_bytes,
@@ -256,6 +260,9 @@ int btrfs_add_ordered_extent(struct btrfs_inode *inode, u64 file_offset,
                             u64 disk_bytenr, u64 num_bytes, u64 disk_num_bytes,
                             int type)
 {
                             u64 disk_bytenr, u64 num_bytes, u64 disk_num_bytes,
                             int type)
 {
+       ASSERT(type == BTRFS_ORDERED_REGULAR ||
+              type == BTRFS_ORDERED_NOCOW ||
+              type == BTRFS_ORDERED_PREALLOC);
        return __btrfs_add_ordered_extent(inode, file_offset, disk_bytenr,
                                          num_bytes, disk_num_bytes, type, 0,
                                          BTRFS_COMPRESS_NONE);
        return __btrfs_add_ordered_extent(inode, file_offset, disk_bytenr,
                                          num_bytes, disk_num_bytes, type, 0,
                                          BTRFS_COMPRESS_NONE);
@@ -265,6 +272,9 @@ int btrfs_add_ordered_extent_dio(struct btrfs_inode *inode, u64 file_offset,
                                 u64 disk_bytenr, u64 num_bytes,
                                 u64 disk_num_bytes, int type)
 {
                                 u64 disk_bytenr, u64 num_bytes,
                                 u64 disk_num_bytes, int type)
 {
+       ASSERT(type == BTRFS_ORDERED_REGULAR ||
+              type == BTRFS_ORDERED_NOCOW ||
+              type == BTRFS_ORDERED_PREALLOC);
        return __btrfs_add_ordered_extent(inode, file_offset, disk_bytenr,
                                          num_bytes, disk_num_bytes, type, 1,
                                          BTRFS_COMPRESS_NONE);
        return __btrfs_add_ordered_extent(inode, file_offset, disk_bytenr,
                                          num_bytes, disk_num_bytes, type, 1,
                                          BTRFS_COMPRESS_NONE);
@@ -272,11 +282,12 @@ int btrfs_add_ordered_extent_dio(struct btrfs_inode *inode, u64 file_offset,
 
 int btrfs_add_ordered_extent_compress(struct btrfs_inode *inode, u64 file_offset,
                                      u64 disk_bytenr, u64 num_bytes,
 
 int btrfs_add_ordered_extent_compress(struct btrfs_inode *inode, u64 file_offset,
                                      u64 disk_bytenr, u64 num_bytes,
-                                     u64 disk_num_bytes, int type,
-                                     int compress_type)
+                                     u64 disk_num_bytes, int compress_type)
 {
 {
+       ASSERT(compress_type != BTRFS_COMPRESS_NONE);
        return __btrfs_add_ordered_extent(inode, file_offset, disk_bytenr,
        return __btrfs_add_ordered_extent(inode, file_offset, disk_bytenr,
-                                         num_bytes, disk_num_bytes, type, 0,
+                                         num_bytes, disk_num_bytes,
+                                         BTRFS_ORDERED_COMPRESSED, 0,
                                          compress_type);
 }
 
                                          compress_type);
 }
 
index 46194c2c05d40e58d805b9bac11e3ba0b4b2cbb1..cca3307807e847dc059313f46f346a100c835518 100644 (file)
@@ -27,7 +27,7 @@ struct btrfs_ordered_sum {
 };
 
 /*
 };
 
 /*
- * bits for the flags field:
+ * Bits for btrfs_ordered_extent::flags.
  *
  * BTRFS_ORDERED_IO_DONE is set when all of the blocks are written.
  * It is used to make sure metadata is inserted into the tree only once
  *
  * BTRFS_ORDERED_IO_DONE is set when all of the blocks are written.
  * It is used to make sure metadata is inserted into the tree only once
@@ -38,24 +38,36 @@ struct btrfs_ordered_sum {
  * IO is done and any metadata is inserted into the tree.
  */
 enum {
  * IO is done and any metadata is inserted into the tree.
  */
 enum {
+       /*
+        * Different types for direct io, one and only one of the 4 type can
+        * be set when creating ordered extent.
+        *
+        * REGULAR:     For regular non-compressed COW write
+        * NOCOW:       For NOCOW write into existing non-hole extent
+        * PREALLOC:    For NOCOW write into preallocated extent
+        * COMPRESSED:  For compressed COW write
+        */
+       BTRFS_ORDERED_REGULAR,
+       BTRFS_ORDERED_NOCOW,
+       BTRFS_ORDERED_PREALLOC,
+       BTRFS_ORDERED_COMPRESSED,
+
+       /*
+        * Extra bit for direct io, can only be set for
+        * REGULAR/NOCOW/PREALLOC. No direct io for compressed extent.
+        */
+       BTRFS_ORDERED_DIRECT,
+
+       /* Extra status bits for ordered extents */
+
        /* set when all the pages are written */
        BTRFS_ORDERED_IO_DONE,
        /* set when removed from the tree */
        BTRFS_ORDERED_COMPLETE,
        /* set when all the pages are written */
        BTRFS_ORDERED_IO_DONE,
        /* set when removed from the tree */
        BTRFS_ORDERED_COMPLETE,
-       /* set when we want to write in place */
-       BTRFS_ORDERED_NOCOW,
-       /* writing a zlib compressed extent */
-       BTRFS_ORDERED_COMPRESSED,
-       /* set when writing to preallocated extent */
-       BTRFS_ORDERED_PREALLOC,
-       /* set when we're doing DIO with this extent */
-       BTRFS_ORDERED_DIRECT,
        /* We had an io error when writing this out */
        BTRFS_ORDERED_IOERR,
        /* Set when we have to truncate an extent */
        BTRFS_ORDERED_TRUNCATED,
        /* We had an io error when writing this out */
        BTRFS_ORDERED_IOERR,
        /* Set when we have to truncate an extent */
        BTRFS_ORDERED_TRUNCATED,
-       /* Regular IO for COW */
-       BTRFS_ORDERED_REGULAR,
        /* Used during fsync to track already logged extents */
        BTRFS_ORDERED_LOGGED,
        /* We have already logged all the csums of the ordered extent */
        /* Used during fsync to track already logged extents */
        BTRFS_ORDERED_LOGGED,
        /* We have already logged all the csums of the ordered extent */
@@ -167,8 +179,7 @@ int btrfs_add_ordered_extent_dio(struct btrfs_inode *inode, u64 file_offset,
                                 u64 disk_num_bytes, int type);
 int btrfs_add_ordered_extent_compress(struct btrfs_inode *inode, u64 file_offset,
                                      u64 disk_bytenr, u64 num_bytes,
                                 u64 disk_num_bytes, int type);
 int btrfs_add_ordered_extent_compress(struct btrfs_inode *inode, u64 file_offset,
                                      u64 disk_bytenr, u64 num_bytes,
-                                     u64 disk_num_bytes, int type,
-                                     int compress_type);
+                                     u64 disk_num_bytes, int compress_type);
 void btrfs_add_ordered_sum(struct btrfs_ordered_extent *entry,
                           struct btrfs_ordered_sum *sum);
 struct btrfs_ordered_extent *btrfs_lookup_ordered_extent(struct btrfs_inode *inode,
 void btrfs_add_ordered_sum(struct btrfs_ordered_extent *entry,
                           struct btrfs_ordered_sum *sum);
 struct btrfs_ordered_extent *btrfs_lookup_ordered_extent(struct btrfs_inode *inode,
index ecd24c719de4d3e2c24230941038d2ed20b86977..b9896fc061600360b6446c2f02c2c307e94a5953 100644 (file)
@@ -499,12 +499,13 @@ DEFINE_EVENT(
 
 #define show_ordered_flags(flags)                                         \
        __print_flags(flags, "|",                                          \
 
 #define show_ordered_flags(flags)                                         \
        __print_flags(flags, "|",                                          \
-               { (1 << BTRFS_ORDERED_IO_DONE),         "IO_DONE"       }, \
-               { (1 << BTRFS_ORDERED_COMPLETE),        "COMPLETE"      }, \
+               { (1 << BTRFS_ORDERED_REGULAR),         "REGULAR"       }, \
                { (1 << BTRFS_ORDERED_NOCOW),           "NOCOW"         }, \
                { (1 << BTRFS_ORDERED_NOCOW),           "NOCOW"         }, \
-               { (1 << BTRFS_ORDERED_COMPRESSED),      "COMPRESSED"    }, \
                { (1 << BTRFS_ORDERED_PREALLOC),        "PREALLOC"      }, \
                { (1 << BTRFS_ORDERED_PREALLOC),        "PREALLOC"      }, \
+               { (1 << BTRFS_ORDERED_COMPRESSED),      "COMPRESSED"    }, \
                { (1 << BTRFS_ORDERED_DIRECT),          "DIRECT"        }, \
                { (1 << BTRFS_ORDERED_DIRECT),          "DIRECT"        }, \
+               { (1 << BTRFS_ORDERED_IO_DONE),         "IO_DONE"       }, \
+               { (1 << BTRFS_ORDERED_COMPLETE),        "COMPLETE"      }, \
                { (1 << BTRFS_ORDERED_IOERR),           "IOERR"         }, \
                { (1 << BTRFS_ORDERED_TRUNCATED),       "TRUNCATED"     })
 
                { (1 << BTRFS_ORDERED_IOERR),           "IOERR"         }, \
                { (1 << BTRFS_ORDERED_TRUNCATED),       "TRUNCATED"     })