btrfs: remove unnecessary EXTENT_UPTODATE state in buffered I/O path
authorEthan Lien <ethanlien@synology.com>
Fri, 19 Aug 2022 02:44:08 +0000 (10:44 +0800)
committerDavid Sterba <dsterba@suse.com>
Mon, 26 Sep 2022 10:27:57 +0000 (12:27 +0200)
After we copied data to page cache in buffered I/O, we
1. Insert a EXTENT_UPTODATE state into inode's io_tree, by
   endio_readpage_release_extent(), set_extent_delalloc() or
   set_extent_defrag().
2. Set page uptodate before we unlock the page.

But the only place we check io_tree's EXTENT_UPTODATE state is in
btrfs_do_readpage(). We know we enter btrfs_do_readpage() only when we
have a non-uptodate page, so it is unnecessary to set EXTENT_UPTODATE.

For example, when performing a buffered random read:

fio --rw=randread --ioengine=libaio --direct=0 --numjobs=4 \
--filesize=32G --size=4G --bs=4k --name=job \
--filename=/mnt/file --name=job

Then check how many extent_state in io_tree:

cat /proc/slabinfo | grep btrfs_extent_state | awk '{print $2}'

w/o this patch, we got 640567 btrfs_extent_state.
w/  this patch, we got    204 btrfs_extent_state.

Maintaining such a big tree brings overhead since every I/O needs to insert
EXTENT_LOCKED, insert EXTENT_UPTODATE, then remove EXTENT_LOCKED. And in
every insert or remove, we need to lock io_tree, do tree search, alloc or
dealloc extent states. By removing unnecessary EXTENT_UPTODATE, we keep
io_tree in a minimal size and reduce overhead when performing buffered I/O.

Reviewed-by: Filipe Manana <fdmanana@suse.com>
Reviewed-by: Robbie Ko <robbieko@synology.com>
Signed-off-by: Ethan Lien <ethanlien@synology.com>
Signed-off-by: David Sterba <dsterba@suse.com>
fs/btrfs/extent-io-tree.h
fs/btrfs/extent_io.c
fs/btrfs/inode.c

index c3eb52d..53ae849 100644 (file)
@@ -211,7 +211,7 @@ static inline int set_extent_delalloc(struct extent_io_tree *tree, u64 start,
                                      struct extent_state **cached_state)
 {
        return set_extent_bit(tree, start, end,
-                             EXTENT_DELALLOC | EXTENT_UPTODATE | extra_bits,
+                             EXTENT_DELALLOC | extra_bits,
                              0, NULL, cached_state, GFP_NOFS, NULL);
 }
 
@@ -219,7 +219,7 @@ static inline int set_extent_defrag(struct extent_io_tree *tree, u64 start,
                u64 end, struct extent_state **cached_state)
 {
        return set_extent_bit(tree, start, end,
-                             EXTENT_DELALLOC | EXTENT_UPTODATE | EXTENT_DEFRAG,
+                             EXTENT_DELALLOC | EXTENT_DEFRAG,
                              0, NULL, cached_state, GFP_NOFS, NULL);
 }
 
index 63decf3..f57a3e9 100644 (file)
@@ -2924,9 +2924,6 @@ static void endio_readpage_release_extent(struct processed_extent *processed,
         * Now we don't have range contiguous to the processed range, release
         * the processed range now.
         */
-       if (processed->uptodate && tree->track_uptodate)
-               set_extent_uptodate(tree, processed->start, processed->end,
-                                   &cached, GFP_ATOMIC);
        unlock_extent_cached_atomic(tree, processed->start, processed->end,
                                    &cached);
 
@@ -3613,7 +3610,6 @@ static int btrfs_do_readpage(struct page *page, struct extent_map **em_cached,
        u64 extent_offset;
        u64 last_byte = i_size_read(inode);
        u64 block_start;
-       u64 cur_end;
        struct extent_map *em;
        int ret = 0;
        size_t pg_offset = 0;
@@ -3672,7 +3668,6 @@ static int btrfs_do_readpage(struct page *page, struct extent_map **em_cached,
                        this_bio_flag = em->compress_type;
 
                iosize = min(extent_map_end(em) - cur, end - cur + 1);
-               cur_end = min(extent_map_end(em) - 1, end);
                iosize = ALIGN(iosize, blocksize);
                if (this_bio_flag != BTRFS_COMPRESS_NONE)
                        disk_bytenr = em->block_start;
@@ -3743,20 +3738,9 @@ static int btrfs_do_readpage(struct page *page, struct extent_map **em_cached,
                        continue;
                }
                /* the get_extent function already copied into the page */
-               if (test_range_bit(tree, cur, cur_end,
-                                  EXTENT_UPTODATE, 1, NULL)) {
-                       unlock_extent(tree, cur, cur + iosize - 1);
-                       end_page_read(page, true, cur, iosize);
-                       cur = cur + iosize;
-                       pg_offset += iosize;
-                       continue;
-               }
-               /* we have an inline extent but it didn't get marked up
-                * to date.  Error out
-                */
                if (block_start == EXTENT_MAP_INLINE) {
                        unlock_extent(tree, cur, cur + iosize - 1);
-                       end_page_read(page, false, cur, iosize);
+                       end_page_read(page, true, cur, iosize);
                        cur = cur + iosize;
                        pg_offset += iosize;
                        continue;
index b04ce7a..0a77705 100644 (file)
@@ -6868,7 +6868,6 @@ struct extent_map *btrfs_get_extent(struct btrfs_inode *inode,
        struct btrfs_key found_key;
        struct extent_map *em = NULL;
        struct extent_map_tree *em_tree = &inode->extent_tree;
-       struct extent_io_tree *io_tree = &inode->io_tree;
 
        read_lock(&em_tree->lock);
        em = lookup_extent_mapping(em_tree, start, len);
@@ -7031,8 +7030,6 @@ next:
                        }
                        flush_dcache_page(page);
                }
-               set_extent_uptodate(io_tree, em->start,
-                                   extent_map_end(em) - 1, NULL, GFP_NOFS);
                goto insert;
        }
 not_found: