btrfs: refactor btrfs_invalidatepage() for subpage support
authorQu Wenruo <wqu@suse.com>
Tue, 6 Apr 2021 11:54:53 +0000 (19:54 +0800)
committerDavid Sterba <dsterba@suse.com>
Mon, 21 Jun 2021 13:19:08 +0000 (15:19 +0200)
This patch will refactor btrfs_invalidatepage() for the incoming subpage
support.

The involved modifications are:

- Use while() loop instead of "goto again;"
- Use single variable to determine whether to delete extent states
  Each branch will also have comments why we can or cannot delete the
  extent states
- Do qgroup free and extent states deletion per-loop
  Current code can only work for PAGE_SIZE == sectorsize case.

This refactor also makes it clear what we do for different sectors:

- Sectors without ordered extent
  We're completely safe to remove all extent states for the sector(s)

- Sectors with ordered extent, but no Private2 bit
  This means the endio has already been executed, we can't remove all
  extent states for the sector(s).

- Sectors with ordere extent, still has Private2 bit
  This means we need to decrease the ordered extent accounting.
  And then it comes to two different variants:

  * We have finished and removed the ordered extent
    Then it's the same as "sectors without ordered extent"
  * We didn't finished the ordered extent
    We can remove some extent states, but not all.

Signed-off-by: Qu Wenruo <wqu@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
fs/btrfs/inode.c

index e86a611..f036b6e 100644 (file)
@@ -8318,15 +8318,11 @@ static void btrfs_invalidatepage(struct page *page, unsigned int offset,
 {
        struct btrfs_inode *inode = BTRFS_I(page->mapping->host);
        struct extent_io_tree *tree = &inode->io_tree;
-       struct btrfs_ordered_extent *ordered;
        struct extent_state *cached_state = NULL;
        u64 page_start = page_offset(page);
        u64 page_end = page_start + PAGE_SIZE - 1;
-       u64 start;
-       u64 end;
+       u64 cur;
        int inode_evicting = inode->vfs_inode.i_state & I_FREEING;
-       bool found_ordered = false;
-       bool completed_ordered = false;
 
        /*
         * We have page locked so no new ordered extent can be created on this
@@ -8350,93 +8346,120 @@ static void btrfs_invalidatepage(struct page *page, unsigned int offset,
        if (!inode_evicting)
                lock_extent_bits(tree, page_start, page_end, &cached_state);
 
-       start = page_start;
-again:
-       ordered = btrfs_lookup_ordered_range(inode, start, page_end - start + 1);
-       if (ordered) {
-               found_ordered = true;
-               end = min(page_end,
-                         ordered->file_offset + ordered->num_bytes - 1);
+       cur = page_start;
+       while (cur < page_end) {
+               struct btrfs_ordered_extent *ordered;
+               bool delete_states;
+               u64 range_end;
+
+               ordered = btrfs_lookup_first_ordered_range(inode, cur,
+                                                          page_end + 1 - cur);
+               if (!ordered) {
+                       range_end = page_end;
+                       /*
+                        * No ordered extent covering this range, we are safe
+                        * to delete all extent states in the range.
+                        */
+                       delete_states = true;
+                       goto next;
+               }
+               if (ordered->file_offset > cur) {
+                       /*
+                        * There is a range between [cur, oe->file_offset) not
+                        * covered by any ordered extent.
+                        * We are safe to delete all extent states, and handle
+                        * the ordered extent in the next iteration.
+                        */
+                       range_end = ordered->file_offset - 1;
+                       delete_states = true;
+                       goto next;
+               }
+
+               range_end = min(ordered->file_offset + ordered->num_bytes - 1,
+                               page_end);
+               if (!PagePrivate2(page)) {
+                       /*
+                        * If Private2 is cleared, it means endio has already
+                        * been executed for the range.
+                        * We can't delete the extent states as
+                        * btrfs_finish_ordered_io() may still use some of them.
+                        */
+                       delete_states = false;
+                       goto next;
+               }
+               ClearPagePrivate2(page);
+
                /*
                 * IO on this page will never be started, so we need to account
                 * for any ordered extents now. Don't clear EXTENT_DELALLOC_NEW
                 * here, must leave that up for the ordered extent completion.
+                *
+                * This will also unlock the range for incoming
+                * btrfs_finish_ordered_io().
                 */
                if (!inode_evicting)
-                       clear_extent_bit(tree, start, end,
+                       clear_extent_bit(tree, cur, range_end,
                                         EXTENT_DELALLOC |
                                         EXTENT_LOCKED | EXTENT_DO_ACCOUNTING |
                                         EXTENT_DEFRAG, 1, 0, &cached_state);
+
+               spin_lock_irq(&inode->ordered_tree.lock);
+               set_bit(BTRFS_ORDERED_TRUNCATED, &ordered->flags);
+               ordered->truncated_len = min(ordered->truncated_len,
+                                            cur - ordered->file_offset);
+               spin_unlock_irq(&inode->ordered_tree.lock);
+
+               if (btrfs_dec_test_ordered_pending(inode, &ordered,
+                                       cur, range_end + 1 - cur, 1)) {
+                       btrfs_finish_ordered_io(ordered);
+                       /*
+                        * The ordered extent has finished, now we're again
+                        * safe to delete all extent states of the range.
+                        */
+                       delete_states = true;
+               } else {
+                       /*
+                        * btrfs_finish_ordered_io() will get executed by endio
+                        * of other pages, thus we can't delete extent states
+                        * anymore
+                        */
+                       delete_states = false;
+               }
+next:
+               if (ordered)
+                       btrfs_put_ordered_extent(ordered);
                /*
-                * A page with Private2 bit means no bio has been submitted
-                * covering the page, thus we have to manually do the ordered
-                * extent accounting.
+                * Qgroup reserved space handler
+                * Sector(s) here will be either:
                 *
-                * For page without Private2, the ordered extent accounting is
-                * done in its endio function of the submitted bio.
+                * 1) Already written to disk or bio already finished
+                *    Then its QGROUP_RESERVED bit in io_tree is already cleared.
+                *    Qgroup will be handled by its qgroup_record then.
+                *    btrfs_qgroup_free_data() call will do nothing here.
+                *
+                * 2) Not written to disk yet
+                *    Then btrfs_qgroup_free_data() call will clear the
+                *    QGROUP_RESERVED bit of its io_tree, and free the qgroup
+                *    reserved data space.
+                *    Since the IO will never happen for this page.
                 */
-               if (TestClearPagePrivate2(page)) {
-                       spin_lock_irq(&inode->ordered_tree.lock);
-                       set_bit(BTRFS_ORDERED_TRUNCATED, &ordered->flags);
-                       ordered->truncated_len = min(ordered->truncated_len,
-                                                    start - ordered->file_offset);
-                       spin_unlock_irq(&inode->ordered_tree.lock);
-
-                       if (btrfs_dec_test_ordered_pending(inode, &ordered,
-                                                          start,
-                                                          end - start + 1, 1)) {
-                               btrfs_finish_ordered_io(ordered);
-                               completed_ordered = true;
-                       }
-               }
-               btrfs_put_ordered_extent(ordered);
+               btrfs_qgroup_free_data(inode, NULL, cur, range_end + 1 - cur);
                if (!inode_evicting) {
-                       cached_state = NULL;
-                       lock_extent_bits(tree, start, end,
-                                        &cached_state);
+                       clear_extent_bit(tree, cur, range_end, EXTENT_LOCKED |
+                                EXTENT_DELALLOC | EXTENT_UPTODATE |
+                                EXTENT_DO_ACCOUNTING | EXTENT_DEFRAG, 1,
+                                delete_states, &cached_state);
                }
-
-               start = end + 1;
-               if (start < page_end)
-                       goto again;
+               cur = range_end + 1;
        }
-
        /*
-        * Qgroup reserved space handler
-        * Page here will be either
-        * 1) Already written to disk or ordered extent already submitted
-        *    Then its QGROUP_RESERVED bit in io_tree is already cleaned.
-        *    Qgroup will be handled by its qgroup_record then.
-        *    btrfs_qgroup_free_data() call will do nothing here.
-        *
-        * 2) Not written to disk yet
-        *    Then btrfs_qgroup_free_data() call will clear the QGROUP_RESERVED
-        *    bit of its io_tree, and free the qgroup reserved data space.
-        *    Since the IO will never happen for this page.
+        * We have iterated through all ordered extents of the page, the page
+        * should not have Private2 anymore, or the above iteration does
+        * something wrong.
         */
-       btrfs_qgroup_free_data(inode, NULL, page_start, PAGE_SIZE);
-       if (!inode_evicting) {
-               bool delete = true;
-
-               /*
-                * If there's an ordered extent for this range and we have not
-                * finished it ourselves, we must leave EXTENT_DELALLOC_NEW set
-                * in the range for the ordered extent completion. We must also
-                * not delete the range, otherwise we would lose that bit (and
-                * any other bits set in the range). Make sure EXTENT_UPTODATE
-                * is cleared if we don't delete, otherwise it can lead to
-                * corruptions if the i_size is extented later.
-                */
-               if (found_ordered && !completed_ordered)
-                       delete = false;
-               clear_extent_bit(tree, page_start, page_end, EXTENT_LOCKED |
-                                EXTENT_DELALLOC | EXTENT_UPTODATE |
-                                EXTENT_DO_ACCOUNTING | EXTENT_DEFRAG, 1,
-                                delete, &cached_state);
-
+       ASSERT(!PagePrivate2(page));
+       if (!inode_evicting)
                __btrfs_releasepage(page, GFP_NOFS);
-       }
-
        ClearPageChecked(page);
        clear_page_extent_mapped(page);
 }