btrfs: make nodesize >= PAGE_SIZE case to reuse the non-subpage routine
authorQu Wenruo <wqu@suse.com>
Thu, 13 Jan 2022 05:22:09 +0000 (13:22 +0800)
committerDavid Sterba <dsterba@suse.com>
Mon, 16 May 2022 15:03:11 +0000 (17:03 +0200)
The reason why we only support 64K page size for subpage is, for 64K
page size we can ensure no matter what the nodesize is, we can fit it
into one page.

When other page size come, especially like 16K, the limitation is a bit
limiting.

To remove such limitation, we allow nodesize >= PAGE_SIZE case to go the
non-subpage routine.  By this, we can allow 4K sectorsize on 16K page
size.

Although this introduces another smaller limitation, the metadata can
not cross page boundary, which is already met by most recent mkfs.

Another small improvement is, we can avoid the overhead for metadata if
nodesize >= PAGE_SIZE.
For 4K sector size and 64K page size/node size, or 4K sector size and
16K page size/node size, we don't need to allocate extra memory for the
metadata pages.

Please note that, this patch will not yet enable other page size support
yet.

Signed-off-by: Qu Wenruo <wqu@suse.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
fs/btrfs/disk-io.c
fs/btrfs/extent_io.c
fs/btrfs/inode.c
fs/btrfs/subpage.c
fs/btrfs/subpage.h

index 67f2d3bb17eb6796bc3bb82e96434f0df4fffe92..b4ed0b3b47847d34c4acd05ea825530d49f99d1f 100644 (file)
@@ -519,7 +519,7 @@ static int csum_dirty_buffer(struct btrfs_fs_info *fs_info, struct bio_vec *bvec
        u64 found_start;
        struct extent_buffer *eb;
 
-       if (fs_info->sectorsize < PAGE_SIZE)
+       if (fs_info->nodesize < PAGE_SIZE)
                return csum_dirty_subpage_buffers(fs_info, bvec);
 
        eb = (struct extent_buffer *)page->private;
@@ -704,7 +704,7 @@ int btrfs_validate_metadata_buffer(struct btrfs_bio *bbio,
 
        ASSERT(page->private);
 
-       if (btrfs_sb(page->mapping->host->i_sb)->sectorsize < PAGE_SIZE)
+       if (btrfs_sb(page->mapping->host->i_sb)->nodesize < PAGE_SIZE)
                return validate_subpage_buffer(page, start, end, mirror);
 
        eb = (struct extent_buffer *)page->private;
index 0dbd59a362c6aaa4d6d10f7fc7690cc99ff906ac..f28f059d16b7eec3af8650b7568000d2125d6bee 100644 (file)
@@ -2711,7 +2711,7 @@ static void end_page_read(struct page *page, bool uptodate, u64 start, u32 len)
                btrfs_page_set_error(fs_info, page, start, len);
        }
 
-       if (fs_info->sectorsize == PAGE_SIZE)
+       if (!btrfs_is_subpage(fs_info, page))
                unlock_page(page);
        else
                btrfs_subpage_end_reader(fs_info, page, start, len);
@@ -2944,7 +2944,7 @@ update:
 static void begin_page_read(struct btrfs_fs_info *fs_info, struct page *page)
 {
        ASSERT(PageLocked(page));
-       if (fs_info->sectorsize == PAGE_SIZE)
+       if (!btrfs_is_subpage(fs_info, page))
                return;
 
        ASSERT(PagePrivate(page));
@@ -2966,7 +2966,7 @@ static struct extent_buffer *find_extent_buffer_readpage(
         * For regular sectorsize, we can use page->private to grab extent
         * buffer
         */
-       if (fs_info->sectorsize == PAGE_SIZE) {
+       if (fs_info->nodesize >= PAGE_SIZE) {
                ASSERT(PagePrivate(page) && page->private);
                return (struct extent_buffer *)page->private;
        }
@@ -3479,7 +3479,7 @@ static int attach_extent_buffer_page(struct extent_buffer *eb,
        if (page->mapping)
                lockdep_assert_held(&page->mapping->private_lock);
 
-       if (fs_info->sectorsize == PAGE_SIZE) {
+       if (fs_info->nodesize >= PAGE_SIZE) {
                if (!PagePrivate(page))
                        attach_page_private(page, eb);
                else
@@ -3514,7 +3514,7 @@ int set_page_extent_mapped(struct page *page)
 
        fs_info = btrfs_sb(page->mapping->host->i_sb);
 
-       if (fs_info->sectorsize < PAGE_SIZE)
+       if (btrfs_is_subpage(fs_info, page))
                return btrfs_attach_subpage(fs_info, page, BTRFS_SUBPAGE_DATA);
 
        attach_page_private(page, (void *)EXTENT_PAGE_PRIVATE);
@@ -3531,7 +3531,7 @@ void clear_page_extent_mapped(struct page *page)
                return;
 
        fs_info = btrfs_sb(page->mapping->host->i_sb);
-       if (fs_info->sectorsize < PAGE_SIZE)
+       if (btrfs_is_subpage(fs_info, page))
                return btrfs_detach_subpage(fs_info, page);
 
        detach_page_private(page);
@@ -3878,7 +3878,7 @@ static void find_next_dirty_byte(struct btrfs_fs_info *fs_info,
         * For regular sector size == page size case, since one page only
         * contains one sector, we return the page offset directly.
         */
-       if (fs_info->sectorsize == PAGE_SIZE) {
+       if (!btrfs_is_subpage(fs_info, page)) {
                *start = page_offset(page);
                *end = page_offset(page) + PAGE_SIZE;
                return;
@@ -4261,7 +4261,7 @@ static noinline_for_stack int lock_extent_buffer_for_io(struct extent_buffer *eb
         * Subpage metadata doesn't use page locking at all, so we can skip
         * the page locking.
         */
-       if (!ret || fs_info->sectorsize < PAGE_SIZE)
+       if (!ret || fs_info->nodesize < PAGE_SIZE)
                return ret;
 
        num_pages = num_extent_pages(eb);
@@ -4421,7 +4421,7 @@ static void end_bio_subpage_eb_writepage(struct bio *bio)
        struct bvec_iter_all iter_all;
 
        fs_info = btrfs_sb(bio_first_page_all(bio)->mapping->host->i_sb);
-       ASSERT(fs_info->sectorsize < PAGE_SIZE);
+       ASSERT(fs_info->nodesize < PAGE_SIZE);
 
        ASSERT(!bio_flagged(bio, BIO_CLONED));
        bio_for_each_segment_all(bvec, bio, iter_all) {
@@ -4748,7 +4748,7 @@ static int submit_eb_page(struct page *page, struct writeback_control *wbc,
        if (!PagePrivate(page))
                return 0;
 
-       if (btrfs_sb(page->mapping->host->i_sb)->sectorsize < PAGE_SIZE)
+       if (btrfs_sb(page->mapping->host->i_sb)->nodesize < PAGE_SIZE)
                return submit_eb_subpage(page, wbc, epd);
 
        spin_lock(&mapping->private_lock);
@@ -5805,7 +5805,7 @@ static void detach_extent_buffer_page(struct extent_buffer *eb, struct page *pag
                return;
        }
 
-       if (fs_info->sectorsize == PAGE_SIZE) {
+       if (fs_info->nodesize >= PAGE_SIZE) {
                /*
                 * We do this since we'll remove the pages after we've
                 * removed the eb from the radix tree, so we could race
@@ -6125,7 +6125,7 @@ static struct extent_buffer *grab_extent_buffer(
         * don't try to insert two ebs for the same bytenr.  So here we always
         * return NULL and just continue.
         */
-       if (fs_info->sectorsize < PAGE_SIZE)
+       if (fs_info->nodesize < PAGE_SIZE)
                return NULL;
 
        /* Page not yet attached to an extent buffer */
@@ -6147,6 +6147,30 @@ static struct extent_buffer *grab_extent_buffer(
        return NULL;
 }
 
+static int check_eb_alignment(struct btrfs_fs_info *fs_info, u64 start)
+{
+       if (!IS_ALIGNED(start, fs_info->sectorsize)) {
+               btrfs_err(fs_info, "bad tree block start %llu", start);
+               return -EINVAL;
+       }
+
+       if (fs_info->nodesize < PAGE_SIZE &&
+           offset_in_page(start) + fs_info->nodesize > PAGE_SIZE) {
+               btrfs_err(fs_info,
+               "tree block crosses page boundary, start %llu nodesize %u",
+                         start, fs_info->nodesize);
+               return -EINVAL;
+       }
+       if (fs_info->nodesize >= PAGE_SIZE &&
+           !IS_ALIGNED(start, PAGE_SIZE)) {
+               btrfs_err(fs_info,
+               "tree block is not page aligned, start %llu nodesize %u",
+                         start, fs_info->nodesize);
+               return -EINVAL;
+       }
+       return 0;
+}
+
 struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info,
                                          u64 start, u64 owner_root, int level)
 {
@@ -6161,10 +6185,8 @@ struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info,
        int uptodate = 1;
        int ret;
 
-       if (!IS_ALIGNED(start, fs_info->sectorsize)) {
-               btrfs_err(fs_info, "bad tree block start %llu", start);
+       if (check_eb_alignment(fs_info, start))
                return ERR_PTR(-EINVAL);
-       }
 
 #if BITS_PER_LONG == 32
        if (start >= MAX_LFS_FILESIZE) {
@@ -6177,14 +6199,6 @@ struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info,
                btrfs_warn_32bit_limit(fs_info);
 #endif
 
-       if (fs_info->sectorsize < PAGE_SIZE &&
-           offset_in_page(start) + len > PAGE_SIZE) {
-               btrfs_err(fs_info,
-               "tree block crosses page boundary, start %llu nodesize %lu",
-                         start, len);
-               return ERR_PTR(-EINVAL);
-       }
-
        eb = find_extent_buffer(fs_info, start);
        if (eb)
                return eb;
@@ -6214,7 +6228,7 @@ struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info,
                 * page, but it may change in the future for 16K page size
                 * support, so we still preallocate the memory in the loop.
                 */
-               if (fs_info->sectorsize < PAGE_SIZE) {
+               if (fs_info->nodesize < PAGE_SIZE) {
                        prealloc = btrfs_alloc_subpage(fs_info, BTRFS_SUBPAGE_METADATA);
                        if (IS_ERR(prealloc)) {
                                ret = PTR_ERR(prealloc);
@@ -6433,7 +6447,7 @@ void clear_extent_buffer_dirty(const struct extent_buffer *eb)
        int num_pages;
        struct page *page;
 
-       if (eb->fs_info->sectorsize < PAGE_SIZE)
+       if (eb->fs_info->nodesize < PAGE_SIZE)
                return clear_subpage_extent_buffer_dirty(eb);
 
        num_pages = num_extent_pages(eb);
@@ -6465,7 +6479,7 @@ bool set_extent_buffer_dirty(struct extent_buffer *eb)
        WARN_ON(!test_bit(EXTENT_BUFFER_TREE_REF, &eb->bflags));
 
        if (!was_dirty) {
-               bool subpage = eb->fs_info->sectorsize < PAGE_SIZE;
+               bool subpage = eb->fs_info->nodesize < PAGE_SIZE;
 
                /*
                 * For subpage case, we can have other extent buffers in the
@@ -6505,9 +6519,18 @@ void clear_extent_buffer_uptodate(struct extent_buffer *eb)
        num_pages = num_extent_pages(eb);
        for (i = 0; i < num_pages; i++) {
                page = eb->pages[i];
-               if (page)
-                       btrfs_page_clear_uptodate(fs_info, page,
-                                                 eb->start, eb->len);
+               if (!page)
+                       continue;
+
+               /*
+                * This is special handling for metadata subpage, as regular
+                * btrfs_is_subpage() can not handle cloned/dummy metadata.
+                */
+               if (fs_info->nodesize >= PAGE_SIZE)
+                       ClearPageUptodate(page);
+               else
+                       btrfs_subpage_clear_uptodate(fs_info, page, eb->start,
+                                                    eb->len);
        }
 }
 
@@ -6522,7 +6545,16 @@ void set_extent_buffer_uptodate(struct extent_buffer *eb)
        num_pages = num_extent_pages(eb);
        for (i = 0; i < num_pages; i++) {
                page = eb->pages[i];
-               btrfs_page_set_uptodate(fs_info, page, eb->start, eb->len);
+
+               /*
+                * This is special handling for metadata subpage, as regular
+                * btrfs_is_subpage() can not handle cloned/dummy metadata.
+                */
+               if (fs_info->nodesize >= PAGE_SIZE)
+                       SetPageUptodate(page);
+               else
+                       btrfs_subpage_set_uptodate(fs_info, page, eb->start,
+                                                  eb->len);
        }
 }
 
@@ -6617,7 +6649,7 @@ int read_extent_buffer_pages(struct extent_buffer *eb, int wait, int mirror_num)
        if (unlikely(test_bit(EXTENT_BUFFER_WRITE_ERR, &eb->bflags)))
                return -EIO;
 
-       if (eb->fs_info->sectorsize < PAGE_SIZE)
+       if (eb->fs_info->nodesize < PAGE_SIZE)
                return read_extent_buffer_subpage(eb, wait, mirror_num);
 
        num_pages = num_extent_pages(eb);
@@ -6872,7 +6904,7 @@ static void assert_eb_page_uptodate(const struct extent_buffer *eb,
         * would have !PageUptodate && !PageError, as we clear PageError before
         * reading.
         */
-       if (fs_info->sectorsize < PAGE_SIZE) {
+       if (fs_info->nodesize < PAGE_SIZE) {
                bool uptodate, error;
 
                uptodate = btrfs_subpage_test_uptodate(fs_info, page,
@@ -6974,7 +7006,7 @@ void copy_extent_buffer_full(const struct extent_buffer *dst,
 
        ASSERT(dst->len == src->len);
 
-       if (dst->fs_info->sectorsize == PAGE_SIZE) {
+       if (dst->fs_info->nodesize >= PAGE_SIZE) {
                num_pages = num_extent_pages(dst);
                for (i = 0; i < num_pages; i++)
                        copy_page(page_address(dst->pages[i]),
@@ -6983,7 +7015,7 @@ void copy_extent_buffer_full(const struct extent_buffer *dst,
                size_t src_offset = get_eb_offset_in_page(src, 0);
                size_t dst_offset = get_eb_offset_in_page(dst, 0);
 
-               ASSERT(src->fs_info->sectorsize < PAGE_SIZE);
+               ASSERT(src->fs_info->nodesize < PAGE_SIZE);
                memcpy(page_address(dst->pages[0]) + dst_offset,
                       page_address(src->pages[0]) + src_offset,
                       src->len);
@@ -7376,7 +7408,7 @@ int try_release_extent_buffer(struct page *page)
 {
        struct extent_buffer *eb;
 
-       if (btrfs_sb(page->mapping->host->i_sb)->sectorsize < PAGE_SIZE)
+       if (btrfs_sb(page->mapping->host->i_sb)->nodesize < PAGE_SIZE)
                return try_release_subpage_extent_buffer(page);
 
        /*
index d16d1dc8f54ca687e48b2c463f9bac3670853928..7ddcc6709eb50120d186ec11de522a9ccb2c5701 100644 (file)
@@ -8200,7 +8200,7 @@ static void wait_subpage_spinlock(struct page *page)
        struct btrfs_fs_info *fs_info = btrfs_sb(page->mapping->host->i_sb);
        struct btrfs_subpage *subpage;
 
-       if (fs_info->sectorsize == PAGE_SIZE)
+       if (!btrfs_is_subpage(fs_info, page))
                return;
 
        ASSERT(PagePrivate(page) && page->private);
index ef7ae20d2b77bb0e8d7a98584a0df367e1a734db..a4980659e3c3f66366f40ebd5278e2b23a24e8df 100644 (file)
  *   This means a slightly higher tree locking latency.
  */
 
+bool btrfs_is_subpage(const struct btrfs_fs_info *fs_info, struct page *page)
+{
+       if (fs_info->sectorsize >= PAGE_SIZE)
+               return false;
+
+       /*
+        * Only data pages (either through DIO or compression) can have no
+        * mapping. And if page->mapping->host is data inode, it's subpage.
+        * As we have ruled our sectorsize >= PAGE_SIZE case already.
+        */
+       if (!page->mapping || !page->mapping->host ||
+           is_data_inode(page->mapping->host))
+               return true;
+
+       /*
+        * Now the only remaining case is metadata, which we only go subpage
+        * routine if nodesize < PAGE_SIZE.
+        */
+       if (fs_info->nodesize < PAGE_SIZE)
+               return true;
+       return false;
+}
+
 void btrfs_init_subpage_info(struct btrfs_subpage_info *subpage_info, u32 sectorsize)
 {
        unsigned int cur = 0;
@@ -107,7 +130,7 @@ int btrfs_attach_subpage(const struct btrfs_fs_info *fs_info,
                ASSERT(PageLocked(page));
 
        /* Either not subpage, or the page already has private attached */
-       if (fs_info->sectorsize == PAGE_SIZE || PagePrivate(page))
+       if (!btrfs_is_subpage(fs_info, page) || PagePrivate(page))
                return 0;
 
        subpage = btrfs_alloc_subpage(fs_info, type);
@@ -124,7 +147,7 @@ void btrfs_detach_subpage(const struct btrfs_fs_info *fs_info,
        struct btrfs_subpage *subpage;
 
        /* Either not subpage, or already detached */
-       if (fs_info->sectorsize == PAGE_SIZE || !PagePrivate(page))
+       if (!btrfs_is_subpage(fs_info, page) || !PagePrivate(page))
                return;
 
        subpage = (struct btrfs_subpage *)detach_page_private(page);
@@ -175,7 +198,7 @@ void btrfs_page_inc_eb_refs(const struct btrfs_fs_info *fs_info,
 {
        struct btrfs_subpage *subpage;
 
-       if (fs_info->sectorsize == PAGE_SIZE)
+       if (!btrfs_is_subpage(fs_info, page))
                return;
 
        ASSERT(PagePrivate(page) && page->mapping);
@@ -190,7 +213,7 @@ void btrfs_page_dec_eb_refs(const struct btrfs_fs_info *fs_info,
 {
        struct btrfs_subpage *subpage;
 
-       if (fs_info->sectorsize == PAGE_SIZE)
+       if (!btrfs_is_subpage(fs_info, page))
                return;
 
        ASSERT(PagePrivate(page) && page->mapping);
@@ -319,7 +342,7 @@ bool btrfs_subpage_end_and_test_writer(const struct btrfs_fs_info *fs_info,
 int btrfs_page_start_writer_lock(const struct btrfs_fs_info *fs_info,
                struct page *page, u64 start, u32 len)
 {
-       if (unlikely(!fs_info) || fs_info->sectorsize == PAGE_SIZE) {
+       if (unlikely(!fs_info) || !btrfs_is_subpage(fs_info, page)) {
                lock_page(page);
                return 0;
        }
@@ -336,7 +359,7 @@ int btrfs_page_start_writer_lock(const struct btrfs_fs_info *fs_info,
 void btrfs_page_end_writer_lock(const struct btrfs_fs_info *fs_info,
                struct page *page, u64 start, u32 len)
 {
-       if (unlikely(!fs_info) || fs_info->sectorsize == PAGE_SIZE)
+       if (unlikely(!fs_info) || !btrfs_is_subpage(fs_info, page))
                return unlock_page(page);
        btrfs_subpage_clamp_range(page, &start, &len);
        if (btrfs_subpage_end_and_test_writer(fs_info, page, start, len))
@@ -620,7 +643,7 @@ IMPLEMENT_BTRFS_SUBPAGE_TEST_OP(checked);
 void btrfs_page_set_##name(const struct btrfs_fs_info *fs_info,                \
                struct page *page, u64 start, u32 len)                  \
 {                                                                      \
-       if (unlikely(!fs_info) || fs_info->sectorsize == PAGE_SIZE) {   \
+       if (unlikely(!fs_info) || !btrfs_is_subpage(fs_info, page)) {   \
                set_page_func(page);                                    \
                return;                                                 \
        }                                                               \
@@ -629,7 +652,7 @@ void btrfs_page_set_##name(const struct btrfs_fs_info *fs_info,             \
 void btrfs_page_clear_##name(const struct btrfs_fs_info *fs_info,      \
                struct page *page, u64 start, u32 len)                  \
 {                                                                      \
-       if (unlikely(!fs_info) || fs_info->sectorsize == PAGE_SIZE) {   \
+       if (unlikely(!fs_info) || !btrfs_is_subpage(fs_info, page)) {   \
                clear_page_func(page);                                  \
                return;                                                 \
        }                                                               \
@@ -638,14 +661,14 @@ void btrfs_page_clear_##name(const struct btrfs_fs_info *fs_info, \
 bool btrfs_page_test_##name(const struct btrfs_fs_info *fs_info,       \
                struct page *page, u64 start, u32 len)                  \
 {                                                                      \
-       if (unlikely(!fs_info) || fs_info->sectorsize == PAGE_SIZE)     \
+       if (unlikely(!fs_info) || !btrfs_is_subpage(fs_info, page))     \
                return test_page_func(page);                            \
        return btrfs_subpage_test_##name(fs_info, page, start, len);    \
 }                                                                      \
 void btrfs_page_clamp_set_##name(const struct btrfs_fs_info *fs_info,  \
                struct page *page, u64 start, u32 len)                  \
 {                                                                      \
-       if (unlikely(!fs_info) || fs_info->sectorsize == PAGE_SIZE) {   \
+       if (unlikely(!fs_info) || !btrfs_is_subpage(fs_info, page)) {   \
                set_page_func(page);                                    \
                return;                                                 \
        }                                                               \
@@ -655,7 +678,7 @@ void btrfs_page_clamp_set_##name(const struct btrfs_fs_info *fs_info,       \
 void btrfs_page_clamp_clear_##name(const struct btrfs_fs_info *fs_info, \
                struct page *page, u64 start, u32 len)                  \
 {                                                                      \
-       if (unlikely(!fs_info) || fs_info->sectorsize == PAGE_SIZE) {   \
+       if (unlikely(!fs_info) || !btrfs_is_subpage(fs_info, page)) {   \
                clear_page_func(page);                                  \
                return;                                                 \
        }                                                               \
@@ -665,7 +688,7 @@ void btrfs_page_clamp_clear_##name(const struct btrfs_fs_info *fs_info, \
 bool btrfs_page_clamp_test_##name(const struct btrfs_fs_info *fs_info, \
                struct page *page, u64 start, u32 len)                  \
 {                                                                      \
-       if (unlikely(!fs_info) || fs_info->sectorsize == PAGE_SIZE)     \
+       if (unlikely(!fs_info) || !btrfs_is_subpage(fs_info, page))     \
                return test_page_func(page);                            \
        btrfs_subpage_clamp_range(page, &start, &len);                  \
        return btrfs_subpage_test_##name(fs_info, page, start, len);    \
@@ -694,7 +717,7 @@ void btrfs_page_assert_not_dirty(const struct btrfs_fs_info *fs_info,
                return;
 
        ASSERT(!PageDirty(page));
-       if (fs_info->sectorsize == PAGE_SIZE)
+       if (!btrfs_is_subpage(fs_info, page))
                return;
 
        ASSERT(PagePrivate(page) && page->private);
@@ -722,8 +745,8 @@ void btrfs_page_unlock_writer(struct btrfs_fs_info *fs_info, struct page *page,
        struct btrfs_subpage *subpage;
 
        ASSERT(PageLocked(page));
-       /* For regular page size case, we just unlock the page */
-       if (fs_info->sectorsize == PAGE_SIZE)
+       /* For non-subpage case, we just unlock the page */
+       if (!btrfs_is_subpage(fs_info, page))
                return unlock_page(page);
 
        ASSERT(PagePrivate(page) && page->private);
index 7accb5c40d33bfffe9b6ffca34d82ff5cb4ba455..0e80ad33690466567e0e6d34fda79e4691b2e87b 100644 (file)
@@ -74,6 +74,8 @@ enum btrfs_subpage_type {
        BTRFS_SUBPAGE_DATA,
 };
 
+bool btrfs_is_subpage(const struct btrfs_fs_info *fs_info, struct page *page);
+
 void btrfs_init_subpage_info(struct btrfs_subpage_info *subpage_info, u32 sectorsize);
 int btrfs_attach_subpage(const struct btrfs_fs_info *fs_info,
                         struct page *page, enum btrfs_subpage_type type);