Merge tag 'folio-6.0' of git://git.infradead.org/users/willy/pagecache
authorLinus Torvalds <torvalds@linux-foundation.org>
Wed, 3 Aug 2022 17:35:43 +0000 (10:35 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Wed, 3 Aug 2022 17:35:43 +0000 (10:35 -0700)
Pull folio updates from Matthew Wilcox:

 - Fix an accounting bug that made NR_FILE_DIRTY grow without limit
   when running xfstests

 - Convert more of mpage to use folios

 - Remove add_to_page_cache() and add_to_page_cache_locked()

 - Convert find_get_pages_range() to filemap_get_folios()

 - Improvements to the read_cache_page() family of functions

 - Remove a few unnecessary checks of PageError

 - Some straightforward filesystem conversions to use folios

 - Split PageMovable users out from address_space_operations into
   their own movable_operations

 - Convert aops->migratepage to aops->migrate_folio

 - Remove nobh support (Christoph Hellwig)

* tag 'folio-6.0' of git://git.infradead.org/users/willy/pagecache: (78 commits)
  fs: remove the NULL get_block case in mpage_writepages
  fs: don't call ->writepage from __mpage_writepage
  fs: remove the nobh helpers
  jfs: stop using the nobh helper
  ext2: remove nobh support
  ntfs3: refactor ntfs_writepages
  mm/folio-compat: Remove migration compatibility functions
  fs: Remove aops->migratepage()
  secretmem: Convert to migrate_folio
  hugetlb: Convert to migrate_folio
  aio: Convert to migrate_folio
  f2fs: Convert to filemap_migrate_folio()
  ubifs: Convert to filemap_migrate_folio()
  btrfs: Convert btrfs_migratepage to migrate_folio
  mm/migrate: Add filemap_migrate_folio()
  mm/migrate: Convert migrate_page() to migrate_folio()
  nfs: Convert to migrate_folio
  btrfs: Convert btree_migratepage to migrate_folio
  mm/migrate: Convert expected_page_refs() to folio_expected_refs()
  mm/migrate: Convert buffer_migrate_page() to buffer_migrate_folio()
  ...

32 files changed:
1  2 
block/fops.c
block/partitions/core.c
fs/btrfs/disk-io.c
fs/btrfs/inode.c
fs/buffer.c
fs/ext2/inode.c
fs/ext2/super.c
fs/ext4/inode.c
fs/f2fs/data.c
fs/f2fs/f2fs.h
fs/f2fs/node.c
fs/gfs2/lops.c
fs/hugetlbfs/inode.c
fs/inode.c
fs/iomap/buffered-io.c
fs/mpage.c
fs/ntfs/aops.c
fs/ntfs/file.c
fs/ntfs3/inode.c
fs/ntfs3/ntfs_fs.h
fs/ocfs2/aops.c
fs/remap_range.c
fs/zonefs/super.c
include/linux/buffer_head.h
include/linux/fs.h
include/linux/netfs.h
mm/filemap.c
mm/hugetlb.c
mm/memory-failure.c
mm/secretmem.c
mm/shmem.c
mm/swap.c

diff --combined block/fops.c
@@@ -32,21 -32,14 +32,21 @@@ static int blkdev_get_block(struct inod
        return 0;
  }
  
 -static unsigned int dio_bio_write_op(struct kiocb *iocb)
 +static blk_opf_t dio_bio_write_op(struct kiocb *iocb)
  {
 -      unsigned int op = REQ_OP_WRITE | REQ_SYNC | REQ_IDLE;
 +      blk_opf_t opf = REQ_OP_WRITE | REQ_SYNC | REQ_IDLE;
  
        /* avoid the need for a I/O completion work item */
        if (iocb->ki_flags & IOCB_DSYNC)
 -              op |= REQ_FUA;
 -      return op;
 +              opf |= REQ_FUA;
 +      return opf;
 +}
 +
 +static bool blkdev_dio_unaligned(struct block_device *bdev, loff_t pos,
 +                            struct iov_iter *iter)
 +{
 +      return pos & (bdev_logical_block_size(bdev) - 1) ||
 +              !bdev_iter_is_aligned(bdev, iter);
  }
  
  #define DIO_INLINE_BIO_VECS 4
@@@ -61,7 -54,8 +61,7 @@@ static ssize_t __blkdev_direct_IO_simpl
        struct bio bio;
        ssize_t ret;
  
 -      if ((pos | iov_iter_alignment(iter)) &
 -          (bdev_logical_block_size(bdev) - 1))
 +      if (blkdev_dio_unaligned(bdev, pos, iter))
                return -EINVAL;
  
        if (nr_pages <= DIO_INLINE_BIO_VECS)
@@@ -175,11 -169,12 +175,11 @@@ static ssize_t __blkdev_direct_IO(struc
        struct blkdev_dio *dio;
        struct bio *bio;
        bool is_read = (iov_iter_rw(iter) == READ), is_sync;
 -      unsigned int opf = is_read ? REQ_OP_READ : dio_bio_write_op(iocb);
 +      blk_opf_t opf = is_read ? REQ_OP_READ : dio_bio_write_op(iocb);
        loff_t pos = iocb->ki_pos;
        int ret = 0;
  
 -      if ((pos | iov_iter_alignment(iter)) &
 -          (bdev_logical_block_size(bdev) - 1))
 +      if (blkdev_dio_unaligned(bdev, pos, iter))
                return -EINVAL;
  
        if (iocb->ki_flags & IOCB_ALLOC_CACHE)
@@@ -297,13 -292,14 +297,13 @@@ static ssize_t __blkdev_direct_IO_async
  {
        struct block_device *bdev = iocb->ki_filp->private_data;
        bool is_read = iov_iter_rw(iter) == READ;
 -      unsigned int opf = is_read ? REQ_OP_READ : dio_bio_write_op(iocb);
 +      blk_opf_t opf = is_read ? REQ_OP_READ : dio_bio_write_op(iocb);
        struct blkdev_dio *dio;
        struct bio *bio;
        loff_t pos = iocb->ki_pos;
        int ret = 0;
  
 -      if ((pos | iov_iter_alignment(iter)) &
 -          (bdev_logical_block_size(bdev) - 1))
 +      if (blkdev_dio_unaligned(bdev, pos, iter))
                return -EINVAL;
  
        if (iocb->ki_flags & IOCB_ALLOC_CACHE)
@@@ -421,7 -417,7 +421,7 @@@ const struct address_space_operations d
        .write_end      = blkdev_write_end,
        .writepages     = blkdev_writepages,
        .direct_IO      = blkdev_direct_IO,
-       .migratepage    = buffer_migrate_page_norefs,
+       .migrate_folio  = buffer_migrate_folio_norefs,
        .is_dirty_writeback = buffer_check_dirty_writeback,
  };
  
diff --combined block/partitions/core.c
@@@ -9,6 -9,7 +9,6 @@@
  #include <linux/slab.h>
  #include <linux/ctype.h>
  #include <linux/vmalloc.h>
 -#include <linux/blktrace_api.h>
  #include <linux/raid/detect.h>
  #include "check.h"
  
@@@ -330,7 -331,7 +330,7 @@@ static struct block_device *add_partiti
        case BLK_ZONED_HA:
                pr_info("%s: disabling host aware zoned block device support due to partitions\n",
                        disk->disk_name);
 -              blk_queue_set_zoned(disk, BLK_ZONED_NONE);
 +              disk_set_zoned(disk, BLK_ZONED_NONE);
                break;
        case BLK_ZONED_NONE:
                break;
@@@ -704,25 -705,19 +704,19 @@@ EXPORT_SYMBOL_GPL(bdev_disk_changed)
  void *read_part_sector(struct parsed_partitions *state, sector_t n, Sector *p)
  {
        struct address_space *mapping = state->disk->part0->bd_inode->i_mapping;
-       struct page *page;
+       struct folio *folio;
  
        if (n >= get_capacity(state->disk)) {
                state->access_beyond_eod = true;
-               return NULL;
+               goto out;
        }
  
-       page = read_mapping_page(mapping,
-                       (pgoff_t)(n >> (PAGE_SHIFT - 9)), NULL);
-       if (IS_ERR(page))
+       folio = read_mapping_folio(mapping, n >> PAGE_SECTORS_SHIFT, NULL);
+       if (IS_ERR(folio))
                goto out;
-       if (PageError(page))
-               goto out_put_page;
-       p->v = page;
-       return (unsigned char *)page_address(page) +
-                       ((n & ((1 << (PAGE_SHIFT - 9)) - 1)) << SECTOR_SHIFT);
- out_put_page:
-       put_page(page);
+       p->v = folio;
+       return folio_address(folio) + offset_in_folio(folio, n * SECTOR_SIZE);
  out:
        p->v = NULL;
        return NULL;
diff --combined fs/btrfs/disk-io.c
@@@ -5,7 -5,6 +5,7 @@@
  
  #include <linux/fs.h>
  #include <linux/blkdev.h>
 +#include <linux/radix-tree.h>
  #include <linux/writeback.h>
  #include <linux/workqueue.h>
  #include <linux/kthread.h>
@@@ -486,7 -485,7 +486,7 @@@ static int csum_dirty_subpage_buffers(s
                uptodate = btrfs_subpage_test_uptodate(fs_info, page, cur,
                                                       fs_info->nodesize);
  
 -              /* A dirty eb shouldn't disappear from extent_buffers */
 +              /* A dirty eb shouldn't disappear from buffer_radix */
                if (WARN_ON(!eb))
                        return -EUCLEAN;
  
@@@ -953,28 -952,28 +953,28 @@@ void btrfs_submit_metadata_bio(struct i
  }
  
  #ifdef CONFIG_MIGRATION
- static int btree_migratepage(struct address_space *mapping,
-                       struct page *newpage, struct page *page,
-                       enum migrate_mode mode)
+ static int btree_migrate_folio(struct address_space *mapping,
+               struct folio *dst, struct folio *src, enum migrate_mode mode)
  {
        /*
         * we can't safely write a btree page from here,
         * we haven't done the locking hook
         */
-       if (PageDirty(page))
+       if (folio_test_dirty(src))
                return -EAGAIN;
        /*
         * Buffers may be managed in a filesystem specific way.
         * We must have no buffers or drop them.
         */
-       if (page_has_private(page) &&
-           !try_to_release_page(page, GFP_KERNEL))
+       if (folio_get_private(src) &&
+           !filemap_release_folio(src, GFP_KERNEL))
                return -EAGAIN;
-       return migrate_page(mapping, newpage, page, mode);
+       return migrate_folio(mapping, dst, src, mode);
  }
+ #else
+ #define btree_migrate_folio NULL
  #endif
  
  static int btree_writepages(struct address_space *mapping,
                            struct writeback_control *wbc)
  {
@@@ -1074,10 -1073,8 +1074,8 @@@ static const struct address_space_opera
        .writepages     = btree_writepages,
        .release_folio  = btree_release_folio,
        .invalidate_folio = btree_invalidate_folio,
- #ifdef CONFIG_MIGRATION
-       .migratepage    = btree_migratepage,
- #endif
-       .dirty_folio = btree_dirty_folio,
+       .migrate_folio  = btree_migrate_folio,
+       .dirty_folio    = btree_dirty_folio,
  };
  
  struct extent_buffer *btrfs_find_create_tree_block(
@@@ -1159,7 -1156,7 +1157,7 @@@ static void __setup_root(struct btrfs_r
        root->nr_delalloc_inodes = 0;
        root->nr_ordered_extents = 0;
        root->inode_tree = RB_ROOT;
 -      xa_init_flags(&root->delayed_nodes, GFP_ATOMIC);
 +      INIT_RADIX_TREE(&root->delayed_nodes_tree, GFP_ATOMIC);
  
        btrfs_init_root_block_rsv(root);
  
        btrfs_qgroup_init_swapped_blocks(&root->swapped_blocks);
  #ifdef CONFIG_BTRFS_DEBUG
        INIT_LIST_HEAD(&root->leak_list);
 -      spin_lock(&fs_info->fs_roots_lock);
 +      spin_lock(&fs_info->fs_roots_radix_lock);
        list_add_tail(&root->leak_list, &fs_info->allocated_roots);
 -      spin_unlock(&fs_info->fs_roots_lock);
 +      spin_unlock(&fs_info->fs_roots_radix_lock);
  #endif
  }
  
@@@ -1660,12 -1657,11 +1658,12 @@@ static struct btrfs_root *btrfs_lookup_
  {
        struct btrfs_root *root;
  
 -      spin_lock(&fs_info->fs_roots_lock);
 -      root = xa_load(&fs_info->fs_roots, (unsigned long)root_id);
 +      spin_lock(&fs_info->fs_roots_radix_lock);
 +      root = radix_tree_lookup(&fs_info->fs_roots_radix,
 +                               (unsigned long)root_id);
        if (root)
                root = btrfs_grab_root(root);
 -      spin_unlock(&fs_info->fs_roots_lock);
 +      spin_unlock(&fs_info->fs_roots_radix_lock);
        return root;
  }
  
@@@ -1707,20 -1703,14 +1705,20 @@@ int btrfs_insert_fs_root(struct btrfs_f
  {
        int ret;
  
 -      spin_lock(&fs_info->fs_roots_lock);
 -      ret = xa_insert(&fs_info->fs_roots, (unsigned long)root->root_key.objectid,
 -                      root, GFP_NOFS);
 +      ret = radix_tree_preload(GFP_NOFS);
 +      if (ret)
 +              return ret;
 +
 +      spin_lock(&fs_info->fs_roots_radix_lock);
 +      ret = radix_tree_insert(&fs_info->fs_roots_radix,
 +                              (unsigned long)root->root_key.objectid,
 +                              root);
        if (ret == 0) {
                btrfs_grab_root(root);
 -              set_bit(BTRFS_ROOT_REGISTERED, &root->state);
 +              set_bit(BTRFS_ROOT_IN_RADIX, &root->state);
        }
 -      spin_unlock(&fs_info->fs_roots_lock);
 +      spin_unlock(&fs_info->fs_roots_radix_lock);
 +      radix_tree_preload_end();
  
        return ret;
  }
@@@ -2350,9 -2340,9 +2348,9 @@@ void btrfs_put_root(struct btrfs_root *
                btrfs_drew_lock_destroy(&root->snapshot_lock);
                free_root_extent_buffers(root);
  #ifdef CONFIG_BTRFS_DEBUG
 -              spin_lock(&root->fs_info->fs_roots_lock);
 +              spin_lock(&root->fs_info->fs_roots_radix_lock);
                list_del_init(&root->leak_list);
 -              spin_unlock(&root->fs_info->fs_roots_lock);
 +              spin_unlock(&root->fs_info->fs_roots_radix_lock);
  #endif
                kfree(root);
        }
  
  void btrfs_free_fs_roots(struct btrfs_fs_info *fs_info)
  {
 -      struct btrfs_root *root;
 -      unsigned long index = 0;
 +      int ret;
 +      struct btrfs_root *gang[8];
 +      int i;
  
        while (!list_empty(&fs_info->dead_roots)) {
 -              root = list_entry(fs_info->dead_roots.next,
 -                                struct btrfs_root, root_list);
 -              list_del(&root->root_list);
 +              gang[0] = list_entry(fs_info->dead_roots.next,
 +                                   struct btrfs_root, root_list);
 +              list_del(&gang[0]->root_list);
  
 -              if (test_bit(BTRFS_ROOT_REGISTERED, &root->state))
 -                      btrfs_drop_and_free_fs_root(fs_info, root);
 -              btrfs_put_root(root);
 +              if (test_bit(BTRFS_ROOT_IN_RADIX, &gang[0]->state))
 +                      btrfs_drop_and_free_fs_root(fs_info, gang[0]);
 +              btrfs_put_root(gang[0]);
        }
  
 -      xa_for_each(&fs_info->fs_roots, index, root) {
 -              btrfs_drop_and_free_fs_root(fs_info, root);
 +      while (1) {
 +              ret = radix_tree_gang_lookup(&fs_info->fs_roots_radix,
 +                                           (void **)gang, 0,
 +                                           ARRAY_SIZE(gang));
 +              if (!ret)
 +                      break;
 +              for (i = 0; i < ret; i++)
 +                      btrfs_drop_and_free_fs_root(fs_info, gang[i]);
        }
  }
  
@@@ -3149,8 -3132,8 +3147,8 @@@ static int __cold init_tree_roots(struc
  
  void btrfs_init_fs_info(struct btrfs_fs_info *fs_info)
  {
 -      xa_init_flags(&fs_info->fs_roots, GFP_ATOMIC);
 -      xa_init_flags(&fs_info->extent_buffers, GFP_ATOMIC);
 +      INIT_RADIX_TREE(&fs_info->fs_roots_radix, GFP_ATOMIC);
 +      INIT_RADIX_TREE(&fs_info->buffer_radix, GFP_ATOMIC);
        INIT_LIST_HEAD(&fs_info->trans_list);
        INIT_LIST_HEAD(&fs_info->dead_roots);
        INIT_LIST_HEAD(&fs_info->delayed_iputs);
        INIT_LIST_HEAD(&fs_info->caching_block_groups);
        spin_lock_init(&fs_info->delalloc_root_lock);
        spin_lock_init(&fs_info->trans_lock);
 -      spin_lock_init(&fs_info->fs_roots_lock);
 +      spin_lock_init(&fs_info->fs_roots_radix_lock);
        spin_lock_init(&fs_info->delayed_iput_lock);
        spin_lock_init(&fs_info->defrag_inodes_lock);
        spin_lock_init(&fs_info->super_lock);
@@@ -3389,7 -3372,7 +3387,7 @@@ int btrfs_start_pre_rw_mount(struct btr
        /*
         * btrfs_find_orphan_roots() is responsible for finding all the dead
         * roots (with 0 refs), flag them with BTRFS_ROOT_DEAD_TREE and load
 -       * them into the fs_info->fs_roots. This must be done before
 +       * them into the fs_info->fs_roots_radix tree. This must be done before
         * calling btrfs_orphan_cleanup() on the tree root. If we don't do it
         * first, then btrfs_orphan_cleanup() will delete a dead root's orphan
         * item before the root's tree is deleted - this means that if we unmount
@@@ -4514,12 -4497,11 +4512,12 @@@ void btrfs_drop_and_free_fs_root(struc
  {
        bool drop_ref = false;
  
 -      spin_lock(&fs_info->fs_roots_lock);
 -      xa_erase(&fs_info->fs_roots, (unsigned long)root->root_key.objectid);
 -      if (test_and_clear_bit(BTRFS_ROOT_REGISTERED, &root->state))
 +      spin_lock(&fs_info->fs_roots_radix_lock);
 +      radix_tree_delete(&fs_info->fs_roots_radix,
 +                        (unsigned long)root->root_key.objectid);
 +      if (test_and_clear_bit(BTRFS_ROOT_IN_RADIX, &root->state))
                drop_ref = true;
 -      spin_unlock(&fs_info->fs_roots_lock);
 +      spin_unlock(&fs_info->fs_roots_radix_lock);
  
        if (BTRFS_FS_ERROR(fs_info)) {
                ASSERT(root->log_root == NULL);
  
  int btrfs_cleanup_fs_roots(struct btrfs_fs_info *fs_info)
  {
 -      struct btrfs_root *roots[8];
 -      unsigned long index = 0;
 -      int i;
 +      u64 root_objectid = 0;
 +      struct btrfs_root *gang[8];
 +      int i = 0;
        int err = 0;
 -      int grabbed;
 +      unsigned int ret = 0;
  
        while (1) {
 -              struct btrfs_root *root;
 -
 -              spin_lock(&fs_info->fs_roots_lock);
 -              if (!xa_find(&fs_info->fs_roots, &index, ULONG_MAX, XA_PRESENT)) {
 -                      spin_unlock(&fs_info->fs_roots_lock);
 -                      return err;
 +              spin_lock(&fs_info->fs_roots_radix_lock);
 +              ret = radix_tree_gang_lookup(&fs_info->fs_roots_radix,
 +                                           (void **)gang, root_objectid,
 +                                           ARRAY_SIZE(gang));
 +              if (!ret) {
 +                      spin_unlock(&fs_info->fs_roots_radix_lock);
 +                      break;
                }
 +              root_objectid = gang[ret - 1]->root_key.objectid + 1;
  
 -              grabbed = 0;
 -              xa_for_each_start(&fs_info->fs_roots, index, root, index) {
 -                      /* Avoid grabbing roots in dead_roots */
 -                      if (btrfs_root_refs(&root->root_item) > 0)
 -                              roots[grabbed++] = btrfs_grab_root(root);
 -                      if (grabbed >= ARRAY_SIZE(roots))
 -                              break;
 +              for (i = 0; i < ret; i++) {
 +                      /* Avoid to grab roots in dead_roots */
 +                      if (btrfs_root_refs(&gang[i]->root_item) == 0) {
 +                              gang[i] = NULL;
 +                              continue;
 +                      }
 +                      /* grab all the search result for later use */
 +                      gang[i] = btrfs_grab_root(gang[i]);
                }
 -              spin_unlock(&fs_info->fs_roots_lock);
 +              spin_unlock(&fs_info->fs_roots_radix_lock);
  
 -              for (i = 0; i < grabbed; i++) {
 -                      if (!roots[i])
 +              for (i = 0; i < ret; i++) {
 +                      if (!gang[i])
                                continue;
 -                      index = roots[i]->root_key.objectid;
 -                      err = btrfs_orphan_cleanup(roots[i]);
 +                      root_objectid = gang[i]->root_key.objectid;
 +                      err = btrfs_orphan_cleanup(gang[i]);
                        if (err)
 -                              goto out;
 -                      btrfs_put_root(roots[i]);
 +                              break;
 +                      btrfs_put_root(gang[i]);
                }
 -              index++;
 +              root_objectid++;
        }
  
 -out:
 -      /* Release the roots that remain uncleaned due to error */
 -      for (; i < grabbed; i++) {
 -              if (roots[i])
 -                      btrfs_put_root(roots[i]);
 +      /* release the uncleaned roots due to error */
 +      for (; i < ret; i++) {
 +              if (gang[i])
 +                      btrfs_put_root(gang[i]);
        }
        return err;
  }
@@@ -4897,31 -4877,28 +4895,31 @@@ static void btrfs_error_commit_super(st
  
  static void btrfs_drop_all_logs(struct btrfs_fs_info *fs_info)
  {
 -      unsigned long index = 0;
 -      int grabbed = 0;
 -      struct btrfs_root *roots[8];
 -
 -      spin_lock(&fs_info->fs_roots_lock);
 -      while ((grabbed = xa_extract(&fs_info->fs_roots, (void **)roots, index,
 -                                   ULONG_MAX, 8, XA_PRESENT))) {
 -              for (int i = 0; i < grabbed; i++)
 -                      roots[i] = btrfs_grab_root(roots[i]);
 -              spin_unlock(&fs_info->fs_roots_lock);
 -
 -              for (int i = 0; i < grabbed; i++) {
 -                      if (!roots[i])
 +      struct btrfs_root *gang[8];
 +      u64 root_objectid = 0;
 +      int ret;
 +
 +      spin_lock(&fs_info->fs_roots_radix_lock);
 +      while ((ret = radix_tree_gang_lookup(&fs_info->fs_roots_radix,
 +                                           (void **)gang, root_objectid,
 +                                           ARRAY_SIZE(gang))) != 0) {
 +              int i;
 +
 +              for (i = 0; i < ret; i++)
 +                      gang[i] = btrfs_grab_root(gang[i]);
 +              spin_unlock(&fs_info->fs_roots_radix_lock);
 +
 +              for (i = 0; i < ret; i++) {
 +                      if (!gang[i])
                                continue;
 -                      index = roots[i]->root_key.objectid;
 -                      btrfs_free_log(NULL, roots[i]);
 -                      btrfs_put_root(roots[i]);
 +                      root_objectid = gang[i]->root_key.objectid;
 +                      btrfs_free_log(NULL, gang[i]);
 +                      btrfs_put_root(gang[i]);
                }
 -              index++;
 -              spin_lock(&fs_info->fs_roots_lock);
 +              root_objectid++;
 +              spin_lock(&fs_info->fs_roots_radix_lock);
        }
 -      spin_unlock(&fs_info->fs_roots_lock);
 +      spin_unlock(&fs_info->fs_roots_radix_lock);
        btrfs_free_log_root_tree(NULL, fs_info);
  }
  
diff --combined fs/btrfs/inode.c
@@@ -485,7 -485,7 +485,7 @@@ struct async_chunk 
        struct page *locked_page;
        u64 start;
        u64 end;
 -      unsigned int write_flags;
 +      blk_opf_t write_flags;
        struct list_head extents;
        struct cgroup_subsys_state *blkcg_css;
        struct btrfs_work work;
@@@ -1435,7 -1435,7 +1435,7 @@@ static int cow_file_range_async(struct 
        int i;
        bool should_compress;
        unsigned nofs_flag;
 -      const unsigned int write_flags = wbc_to_write_flags(wbc);
 +      const blk_opf_t write_flags = wbc_to_write_flags(wbc);
  
        unlock_extent(&inode->io_tree, start, end);
  
@@@ -3195,8 -3195,6 +3195,8 @@@ static int btrfs_finish_ordered_io(stru
                                                ordered_extent->file_offset,
                                                ordered_extent->file_offset +
                                                logical_len);
 +              btrfs_zoned_release_data_reloc_bg(fs_info, ordered_extent->disk_bytenr,
 +                                                ordered_extent->disk_num_bytes);
        } else {
                BUG_ON(root == fs_info->tree_root);
                ret = insert_ordered_extent_file_extent(trans, ordered_extent);
@@@ -3578,6 -3576,7 +3578,6 @@@ int btrfs_orphan_cleanup(struct btrfs_r
        u64 last_objectid = 0;
        int ret = 0, nr_unlink = 0;
  
 -      /* Bail out if the cleanup is already running. */
        if (test_and_set_bit(BTRFS_ROOT_ORPHAN_CLEANUP, &root->state))
                return 0;
  
                         *
                         * btrfs_find_orphan_roots() ran before us, which has
                         * found all deleted roots and loaded them into
 -                       * fs_info->fs_roots. So here we can find if an
 +                       * fs_info->fs_roots_radix. So here we can find if an
                         * orphan item corresponds to a deleted root by looking
 -                       * up the root from that xarray.
 +                       * up the root from that radix tree.
                         */
  
 -                      spin_lock(&fs_info->fs_roots_lock);
 -                      dead_root = xa_load(&fs_info->fs_roots,
 -                                          (unsigned long)found_key.objectid);
 +                      spin_lock(&fs_info->fs_roots_radix_lock);
 +                      dead_root = radix_tree_lookup(&fs_info->fs_roots_radix,
 +                                                       (unsigned long)found_key.objectid);
                        if (dead_root && btrfs_root_refs(&dead_root->root_item) == 0)
                                is_dead_root = 1;
 -                      spin_unlock(&fs_info->fs_roots_lock);
 +                      spin_unlock(&fs_info->fs_roots_radix_lock);
  
                        if (is_dead_root) {
                                /* prevent this orphan from being found again */
@@@ -3910,7 -3909,7 +3910,7 @@@ cache_index
         * cache.
         *
         * This is required for both inode re-read from disk and delayed inode
 -       * in the delayed_nodes xarray.
 +       * in delayed_nodes_tree.
         */
        if (BTRFS_I(inode)->last_trans == fs_info->generation)
                set_bit(BTRFS_INODE_NEEDS_FULL_SYNC,
@@@ -7680,19 -7679,7 +7680,19 @@@ static int btrfs_dio_iomap_begin(struc
        if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags) ||
            em->block_start == EXTENT_MAP_INLINE) {
                free_extent_map(em);
 -              ret = -ENOTBLK;
 +              /*
 +               * If we are in a NOWAIT context, return -EAGAIN in order to
 +               * fallback to buffered IO. This is not only because we can
 +               * block with buffered IO (no support for NOWAIT semantics at
 +               * the moment) but also to avoid returning short reads to user
 +               * space - this happens if we were able to read some data from
 +               * previous non-compressed extents and then when we fallback to
 +               * buffered IO, at btrfs_file_read_iter() by calling
 +               * filemap_read(), we fail to fault in pages for the read buffer,
 +               * in which case filemap_read() returns a short read (the number
 +               * of bytes previously read is > 0, so it does not return -EFAULT).
 +               */
 +              ret = (flags & IOMAP_NOWAIT) ? -EAGAIN : -ENOTBLK;
                goto unlock_err;
        }
  
@@@ -8268,30 -8255,24 +8268,24 @@@ static bool btrfs_release_folio(struct 
  }
  
  #ifdef CONFIG_MIGRATION
- static int btrfs_migratepage(struct address_space *mapping,
-                            struct page *newpage, struct page *page,
+ static int btrfs_migrate_folio(struct address_space *mapping,
+                            struct folio *dst, struct folio *src,
                             enum migrate_mode mode)
  {
-       int ret;
+       int ret = filemap_migrate_folio(mapping, dst, src, mode);
  
-       ret = migrate_page_move_mapping(mapping, newpage, page, 0);
        if (ret != MIGRATEPAGE_SUCCESS)
                return ret;
  
-       if (page_has_private(page))
-               attach_page_private(newpage, detach_page_private(page));
-       if (PageOrdered(page)) {
-               ClearPageOrdered(page);
-               SetPageOrdered(newpage);
+       if (folio_test_ordered(src)) {
+               folio_clear_ordered(src);
+               folio_set_ordered(dst);
        }
  
-       if (mode != MIGRATE_SYNC_NO_COPY)
-               migrate_page_copy(newpage, page);
-       else
-               migrate_page_states(newpage, page);
        return MIGRATEPAGE_SUCCESS;
  }
+ #else
+ #define btrfs_migrate_folio NULL
  #endif
  
  static void btrfs_invalidate_folio(struct folio *folio, size_t offset,
@@@ -9910,7 -9891,6 +9904,7 @@@ static struct btrfs_trans_handle *inser
        extent_info.file_offset = file_offset;
        extent_info.extent_buf = (char *)&stack_fi;
        extent_info.is_new_extent = true;
 +      extent_info.update_times = true;
        extent_info.qgroup_reserved = qgroup_released;
        extent_info.insertions = 0;
  
@@@ -11436,9 -11416,7 +11430,7 @@@ static const struct address_space_opera
        .direct_IO      = noop_direct_IO,
        .invalidate_folio = btrfs_invalidate_folio,
        .release_folio  = btrfs_release_folio,
- #ifdef CONFIG_MIGRATION
-       .migratepage    = btrfs_migratepage,
- #endif
+       .migrate_folio  = btrfs_migrate_folio,
        .dirty_folio    = filemap_dirty_folio,
        .error_remove_page = generic_error_remove_page,
        .swap_activate  = btrfs_swap_activate,
diff --combined fs/buffer.c
@@@ -52,7 -52,7 +52,7 @@@
  #include "internal.h"
  
  static int fsync_buffers_list(spinlock_t *lock, struct list_head *list);
 -static int submit_bh_wbc(int op, int op_flags, struct buffer_head *bh,
 +static int submit_bh_wbc(blk_opf_t opf, struct buffer_head *bh,
                         struct writeback_control *wbc);
  
  #define BH_ENTRY(list) list_entry((list), struct buffer_head, b_assoc_buffers)
@@@ -282,10 -282,10 +282,10 @@@ static void end_buffer_async_read(struc
        spin_unlock_irqrestore(&first->b_uptodate_lock, flags);
  
        /*
-        * If none of the buffers had errors and they are all
-        * uptodate then we can set the page uptodate.
+        * If all of the buffers are uptodate then we can set the page
+        * uptodate.
         */
-       if (page_uptodate && !PageError(page))
+       if (page_uptodate)
                SetPageUptodate(page);
        unlock_page(page);
        return;
@@@ -562,7 -562,7 +562,7 @@@ void write_boundary_block(struct block_
        struct buffer_head *bh = __find_get_block(bdev, bblock + 1, blocksize);
        if (bh) {
                if (buffer_dirty(bh))
 -                      ll_rw_block(REQ_OP_WRITE, 0, 1, &bh);
 +                      ll_rw_block(REQ_OP_WRITE, 1, &bh);
                put_bh(bh);
        }
  }
@@@ -1174,7 -1174,7 +1174,7 @@@ static struct buffer_head *__bread_slow
        } else {
                get_bh(bh);
                bh->b_end_io = end_buffer_read_sync;
 -              submit_bh(REQ_OP_READ, 0, bh);
 +              submit_bh(REQ_OP_READ, bh);
                wait_on_buffer(bh);
                if (buffer_uptodate(bh))
                        return bh;
@@@ -1342,7 -1342,7 +1342,7 @@@ void __breadahead(struct block_device *
  {
        struct buffer_head *bh = __getblk(bdev, block, size);
        if (likely(bh)) {
 -              ll_rw_block(REQ_OP_READ, REQ_RAHEAD, 1, &bh);
 +              ll_rw_block(REQ_OP_READ | REQ_RAHEAD, 1, &bh);
                brelse(bh);
        }
  }
@@@ -1353,7 -1353,7 +1353,7 @@@ void __breadahead_gfp(struct block_devi
  {
        struct buffer_head *bh = __getblk_gfp(bdev, block, size, gfp);
        if (likely(bh)) {
 -              ll_rw_block(REQ_OP_READ, REQ_RAHEAD, 1, &bh);
 +              ll_rw_block(REQ_OP_READ | REQ_RAHEAD, 1, &bh);
                brelse(bh);
        }
  }
@@@ -1604,7 -1604,7 +1604,7 @@@ void clean_bdev_aliases(struct block_de
  {
        struct inode *bd_inode = bdev->bd_inode;
        struct address_space *bd_mapping = bd_inode->i_mapping;
-       struct pagevec pvec;
+       struct folio_batch fbatch;
        pgoff_t index = block >> (PAGE_SHIFT - bd_inode->i_blkbits);
        pgoff_t end;
        int i, count;
        struct buffer_head *head;
  
        end = (block + len - 1) >> (PAGE_SHIFT - bd_inode->i_blkbits);
-       pagevec_init(&pvec);
-       while (pagevec_lookup_range(&pvec, bd_mapping, &index, end)) {
-               count = pagevec_count(&pvec);
+       folio_batch_init(&fbatch);
+       while (filemap_get_folios(bd_mapping, &index, end, &fbatch)) {
+               count = folio_batch_count(&fbatch);
                for (i = 0; i < count; i++) {
-                       struct page *page = pvec.pages[i];
+                       struct folio *folio = fbatch.folios[i];
  
-                       if (!page_has_buffers(page))
+                       if (!folio_buffers(folio))
                                continue;
                        /*
-                        * We use page lock instead of bd_mapping->private_lock
+                        * We use folio lock instead of bd_mapping->private_lock
                         * to pin buffers here since we can afford to sleep and
                         * it scales better than a global spinlock lock.
                         */
-                       lock_page(page);
-                       /* Recheck when the page is locked which pins bhs */
-                       if (!page_has_buffers(page))
+                       folio_lock(folio);
+                       /* Recheck when the folio is locked which pins bhs */
+                       head = folio_buffers(folio);
+                       if (!head)
                                goto unlock_page;
-                       head = page_buffers(page);
                        bh = head;
                        do {
                                if (!buffer_mapped(bh) || (bh->b_blocknr < block))
@@@ -1643,9 -1643,9 +1643,9 @@@ next
                                bh = bh->b_this_page;
                        } while (bh != head);
  unlock_page:
-                       unlock_page(page);
+                       folio_unlock(folio);
                }
-               pagevec_release(&pvec);
+               folio_batch_release(&fbatch);
                cond_resched();
                /* End of range already reached? */
                if (index > end || !index)
@@@ -1716,7 -1716,7 +1716,7 @@@ int __block_write_full_page(struct inod
        struct buffer_head *bh, *head;
        unsigned int blocksize, bbits;
        int nr_underway = 0;
 -      int write_flags = wbc_to_write_flags(wbc);
 +      blk_opf_t write_flags = wbc_to_write_flags(wbc);
  
        head = create_page_buffers(page, inode,
                                        (1 << BH_Dirty)|(1 << BH_Uptodate));
        do {
                struct buffer_head *next = bh->b_this_page;
                if (buffer_async_write(bh)) {
 -                      submit_bh_wbc(REQ_OP_WRITE, write_flags, bh, wbc);
 +                      submit_bh_wbc(REQ_OP_WRITE | write_flags, bh, wbc);
                        nr_underway++;
                }
                bh = next;
@@@ -1858,7 -1858,7 +1858,7 @@@ recover
                struct buffer_head *next = bh->b_this_page;
                if (buffer_async_write(bh)) {
                        clear_buffer_dirty(bh);
 -                      submit_bh_wbc(REQ_OP_WRITE, write_flags, bh, wbc);
 +                      submit_bh_wbc(REQ_OP_WRITE | write_flags, bh, wbc);
                        nr_underway++;
                }
                bh = next;
@@@ -2033,7 -2033,7 +2033,7 @@@ int __block_write_begin_int(struct foli
                if (!buffer_uptodate(bh) && !buffer_delay(bh) &&
                    !buffer_unwritten(bh) &&
                     (block_start < from || block_end > to)) {
 -                      ll_rw_block(REQ_OP_READ, 0, 1, &bh);
 +                      ll_rw_block(REQ_OP_READ, 1, &bh);
                        *wait_bh++=bh;
                }
        }
@@@ -2259,6 -2259,7 +2259,7 @@@ int block_read_full_folio(struct folio 
        unsigned int blocksize, bbits;
        int nr, i;
        int fully_mapped = 1;
+       bool page_error = false;
  
        VM_BUG_ON_FOLIO(folio_test_large(folio), folio);
  
                        if (iblock < lblock) {
                                WARN_ON(bh->b_size != blocksize);
                                err = get_block(inode, iblock, bh, 0);
-                               if (err)
+                               if (err) {
                                        folio_set_error(folio);
+                                       page_error = true;
+                               }
                        }
                        if (!buffer_mapped(bh)) {
                                folio_zero_range(folio, i * blocksize,
                 * All buffers are uptodate - we can set the folio uptodate
                 * as well. But not if get_block() returned an error.
                 */
-               if (!folio_test_error(folio))
+               if (!page_error)
                        folio_mark_uptodate(folio);
                folio_unlock(folio);
                return 0;
                if (buffer_uptodate(bh))
                        end_buffer_async_read(bh, 1);
                else
 -                      submit_bh(REQ_OP_READ, 0, bh);
 +                      submit_bh(REQ_OP_READ, bh);
        }
        return 0;
  }
@@@ -2534,330 -2537,6 +2537,6 @@@ out_unlock
  }
  EXPORT_SYMBOL(block_page_mkwrite);
  
- /*
-  * nobh_write_begin()'s prereads are special: the buffer_heads are freed
-  * immediately, while under the page lock.  So it needs a special end_io
-  * handler which does not touch the bh after unlocking it.
-  */
- static void end_buffer_read_nobh(struct buffer_head *bh, int uptodate)
- {
-       __end_buffer_read_notouch(bh, uptodate);
- }
- /*
-  * Attach the singly-linked list of buffers created by nobh_write_begin, to
-  * the page (converting it to circular linked list and taking care of page
-  * dirty races).
-  */
- static void attach_nobh_buffers(struct page *page, struct buffer_head *head)
- {
-       struct buffer_head *bh;
-       BUG_ON(!PageLocked(page));
-       spin_lock(&page->mapping->private_lock);
-       bh = head;
-       do {
-               if (PageDirty(page))
-                       set_buffer_dirty(bh);
-               if (!bh->b_this_page)
-                       bh->b_this_page = head;
-               bh = bh->b_this_page;
-       } while (bh != head);
-       attach_page_private(page, head);
-       spin_unlock(&page->mapping->private_lock);
- }
- /*
-  * On entry, the page is fully not uptodate.
-  * On exit the page is fully uptodate in the areas outside (from,to)
-  * The filesystem needs to handle block truncation upon failure.
-  */
- int nobh_write_begin(struct address_space *mapping, loff_t pos, unsigned len,
-                       struct page **pagep, void **fsdata,
-                       get_block_t *get_block)
- {
-       struct inode *inode = mapping->host;
-       const unsigned blkbits = inode->i_blkbits;
-       const unsigned blocksize = 1 << blkbits;
-       struct buffer_head *head, *bh;
-       struct page *page;
-       pgoff_t index;
-       unsigned from, to;
-       unsigned block_in_page;
-       unsigned block_start, block_end;
-       sector_t block_in_file;
-       int nr_reads = 0;
-       int ret = 0;
-       int is_mapped_to_disk = 1;
-       index = pos >> PAGE_SHIFT;
-       from = pos & (PAGE_SIZE - 1);
-       to = from + len;
-       page = grab_cache_page_write_begin(mapping, index);
-       if (!page)
-               return -ENOMEM;
-       *pagep = page;
-       *fsdata = NULL;
-       if (page_has_buffers(page)) {
-               ret = __block_write_begin(page, pos, len, get_block);
-               if (unlikely(ret))
-                       goto out_release;
-               return ret;
-       }
-       if (PageMappedToDisk(page))
-               return 0;
-       /*
-        * Allocate buffers so that we can keep track of state, and potentially
-        * attach them to the page if an error occurs. In the common case of
-        * no error, they will just be freed again without ever being attached
-        * to the page (which is all OK, because we're under the page lock).
-        *
-        * Be careful: the buffer linked list is a NULL terminated one, rather
-        * than the circular one we're used to.
-        */
-       head = alloc_page_buffers(page, blocksize, false);
-       if (!head) {
-               ret = -ENOMEM;
-               goto out_release;
-       }
-       block_in_file = (sector_t)page->index << (PAGE_SHIFT - blkbits);
-       /*
-        * We loop across all blocks in the page, whether or not they are
-        * part of the affected region.  This is so we can discover if the
-        * page is fully mapped-to-disk.
-        */
-       for (block_start = 0, block_in_page = 0, bh = head;
-                 block_start < PAGE_SIZE;
-                 block_in_page++, block_start += blocksize, bh = bh->b_this_page) {
-               int create;
-               block_end = block_start + blocksize;
-               bh->b_state = 0;
-               create = 1;
-               if (block_start >= to)
-                       create = 0;
-               ret = get_block(inode, block_in_file + block_in_page,
-                                       bh, create);
-               if (ret)
-                       goto failed;
-               if (!buffer_mapped(bh))
-                       is_mapped_to_disk = 0;
-               if (buffer_new(bh))
-                       clean_bdev_bh_alias(bh);
-               if (PageUptodate(page)) {
-                       set_buffer_uptodate(bh);
-                       continue;
-               }
-               if (buffer_new(bh) || !buffer_mapped(bh)) {
-                       zero_user_segments(page, block_start, from,
-                                                       to, block_end);
-                       continue;
-               }
-               if (buffer_uptodate(bh))
-                       continue;       /* reiserfs does this */
-               if (block_start < from || block_end > to) {
-                       lock_buffer(bh);
-                       bh->b_end_io = end_buffer_read_nobh;
-                       submit_bh(REQ_OP_READ, bh);
-                       nr_reads++;
-               }
-       }
-       if (nr_reads) {
-               /*
-                * The page is locked, so these buffers are protected from
-                * any VM or truncate activity.  Hence we don't need to care
-                * for the buffer_head refcounts.
-                */
-               for (bh = head; bh; bh = bh->b_this_page) {
-                       wait_on_buffer(bh);
-                       if (!buffer_uptodate(bh))
-                               ret = -EIO;
-               }
-               if (ret)
-                       goto failed;
-       }
-       if (is_mapped_to_disk)
-               SetPageMappedToDisk(page);
-       *fsdata = head; /* to be released by nobh_write_end */
-       return 0;
- failed:
-       BUG_ON(!ret);
-       /*
-        * Error recovery is a bit difficult. We need to zero out blocks that
-        * were newly allocated, and dirty them to ensure they get written out.
-        * Buffers need to be attached to the page at this point, otherwise
-        * the handling of potential IO errors during writeout would be hard
-        * (could try doing synchronous writeout, but what if that fails too?)
-        */
-       attach_nobh_buffers(page, head);
-       page_zero_new_buffers(page, from, to);
- out_release:
-       unlock_page(page);
-       put_page(page);
-       *pagep = NULL;
-       return ret;
- }
- EXPORT_SYMBOL(nobh_write_begin);
- int nobh_write_end(struct file *file, struct address_space *mapping,
-                       loff_t pos, unsigned len, unsigned copied,
-                       struct page *page, void *fsdata)
- {
-       struct inode *inode = page->mapping->host;
-       struct buffer_head *head = fsdata;
-       struct buffer_head *bh;
-       BUG_ON(fsdata != NULL && page_has_buffers(page));
-       if (unlikely(copied < len) && head)
-               attach_nobh_buffers(page, head);
-       if (page_has_buffers(page))
-               return generic_write_end(file, mapping, pos, len,
-                                       copied, page, fsdata);
-       SetPageUptodate(page);
-       set_page_dirty(page);
-       if (pos+copied > inode->i_size) {
-               i_size_write(inode, pos+copied);
-               mark_inode_dirty(inode);
-       }
-       unlock_page(page);
-       put_page(page);
-       while (head) {
-               bh = head;
-               head = head->b_this_page;
-               free_buffer_head(bh);
-       }
-       return copied;
- }
- EXPORT_SYMBOL(nobh_write_end);
- /*
-  * nobh_writepage() - based on block_full_write_page() except
-  * that it tries to operate without attaching bufferheads to
-  * the page.
-  */
- int nobh_writepage(struct page *page, get_block_t *get_block,
-                       struct writeback_control *wbc)
- {
-       struct inode * const inode = page->mapping->host;
-       loff_t i_size = i_size_read(inode);
-       const pgoff_t end_index = i_size >> PAGE_SHIFT;
-       unsigned offset;
-       int ret;
-       /* Is the page fully inside i_size? */
-       if (page->index < end_index)
-               goto out;
-       /* Is the page fully outside i_size? (truncate in progress) */
-       offset = i_size & (PAGE_SIZE-1);
-       if (page->index >= end_index+1 || !offset) {
-               unlock_page(page);
-               return 0; /* don't care */
-       }
-       /*
-        * The page straddles i_size.  It must be zeroed out on each and every
-        * writepage invocation because it may be mmapped.  "A file is mapped
-        * in multiples of the page size.  For a file that is not a multiple of
-        * the  page size, the remaining memory is zeroed when mapped, and
-        * writes to that region are not written out to the file."
-        */
-       zero_user_segment(page, offset, PAGE_SIZE);
- out:
-       ret = mpage_writepage(page, get_block, wbc);
-       if (ret == -EAGAIN)
-               ret = __block_write_full_page(inode, page, get_block, wbc,
-                                             end_buffer_async_write);
-       return ret;
- }
- EXPORT_SYMBOL(nobh_writepage);
- int nobh_truncate_page(struct address_space *mapping,
-                       loff_t from, get_block_t *get_block)
- {
-       pgoff_t index = from >> PAGE_SHIFT;
-       struct inode *inode = mapping->host;
-       unsigned blocksize = i_blocksize(inode);
-       struct folio *folio;
-       struct buffer_head map_bh;
-       size_t offset;
-       sector_t iblock;
-       int err;
-       /* Block boundary? Nothing to do */
-       if (!(from & (blocksize - 1)))
-               return 0;
-       folio = __filemap_get_folio(mapping, index, FGP_LOCK | FGP_CREAT,
-                       mapping_gfp_mask(mapping));
-       err = -ENOMEM;
-       if (!folio)
-               goto out;
-       if (folio_buffers(folio))
-               goto has_buffers;
-       iblock = from >> inode->i_blkbits;
-       map_bh.b_size = blocksize;
-       map_bh.b_state = 0;
-       err = get_block(inode, iblock, &map_bh, 0);
-       if (err)
-               goto unlock;
-       /* unmapped? It's a hole - nothing to do */
-       if (!buffer_mapped(&map_bh))
-               goto unlock;
-       /* Ok, it's mapped. Make sure it's up-to-date */
-       if (!folio_test_uptodate(folio)) {
-               err = mapping->a_ops->read_folio(NULL, folio);
-               if (err) {
-                       folio_put(folio);
-                       goto out;
-               }
-               folio_lock(folio);
-               if (!folio_test_uptodate(folio)) {
-                       err = -EIO;
-                       goto unlock;
-               }
-               if (folio_buffers(folio))
-                       goto has_buffers;
-       }
-       offset = offset_in_folio(folio, from);
-       folio_zero_segment(folio, offset, round_up(offset, blocksize));
-       folio_mark_dirty(folio);
-       err = 0;
- unlock:
-       folio_unlock(folio);
-       folio_put(folio);
- out:
-       return err;
- has_buffers:
-       folio_unlock(folio);
-       folio_put(folio);
-       return block_truncate_page(mapping, from, get_block);
- }
- EXPORT_SYMBOL(nobh_truncate_page);
  int block_truncate_page(struct address_space *mapping,
                        loff_t from, get_block_t *get_block)
  {
  
        if (!buffer_uptodate(bh) && !buffer_delay(bh) && !buffer_unwritten(bh)) {
                err = -EIO;
 -              ll_rw_block(REQ_OP_READ, 0, 1, &bh);
 +              ll_rw_block(REQ_OP_READ, 1, &bh);
                wait_on_buffer(bh);
                /* Uhhuh. Read error. Complain and punt. */
                if (!buffer_uptodate(bh))
@@@ -2994,10 -2673,9 +2673,10 @@@ static void end_bio_bh_io_sync(struct b
        bio_put(bio);
  }
  
 -static int submit_bh_wbc(int op, int op_flags, struct buffer_head *bh,
 +static int submit_bh_wbc(blk_opf_t opf, struct buffer_head *bh,
                         struct writeback_control *wbc)
  {
 +      const enum req_op op = opf & REQ_OP_MASK;
        struct bio *bio;
  
        BUG_ON(!buffer_locked(bh));
                clear_buffer_write_io_error(bh);
  
        if (buffer_meta(bh))
 -              op_flags |= REQ_META;
 +              opf |= REQ_META;
        if (buffer_prio(bh))
 -              op_flags |= REQ_PRIO;
 +              opf |= REQ_PRIO;
  
 -      bio = bio_alloc(bh->b_bdev, 1, op | op_flags, GFP_NOIO);
 +      bio = bio_alloc(bh->b_bdev, 1, opf, GFP_NOIO);
  
        fscrypt_set_bio_crypt_ctx_bh(bio, bh, GFP_NOIO);
  
        return 0;
  }
  
 -int submit_bh(int op, int op_flags, struct buffer_head *bh)
 +int submit_bh(blk_opf_t opf, struct buffer_head *bh)
  {
 -      return submit_bh_wbc(op, op_flags, bh, NULL);
 +      return submit_bh_wbc(opf, bh, NULL);
  }
  EXPORT_SYMBOL(submit_bh);
  
  /**
   * ll_rw_block: low-level access to block devices (DEPRECATED)
 - * @op: whether to %READ or %WRITE
 - * @op_flags: req_flag_bits
 + * @opf: block layer request operation and flags.
   * @nr: number of &struct buffer_heads in the array
   * @bhs: array of pointers to &struct buffer_head
   *
   * ll_rw_block() takes an array of pointers to &struct buffer_heads, and
   * requests an I/O operation on them, either a %REQ_OP_READ or a %REQ_OP_WRITE.
 - * @op_flags contains flags modifying the detailed I/O behavior, most notably
 + * @opf contains flags modifying the detailed I/O behavior, most notably
   * %REQ_RAHEAD.
   *
   * This function drops any buffer that it cannot get a lock on (with the
   * All of the buffers must be for the same device, and must also be a
   * multiple of the current approved size for the device.
   */
 -void ll_rw_block(int op, int op_flags,  int nr, struct buffer_head *bhs[])
 +void ll_rw_block(const blk_opf_t opf, int nr, struct buffer_head *bhs[])
  {
 +      const enum req_op op = opf & REQ_OP_MASK;
        int i;
  
        for (i = 0; i < nr; i++) {
  
                if (!trylock_buffer(bh))
                        continue;
 -              if (op == WRITE) {
 +              if (op == REQ_OP_WRITE) {
                        if (test_clear_buffer_dirty(bh)) {
                                bh->b_end_io = end_buffer_write_sync;
                                get_bh(bh);
 -                              submit_bh(op, op_flags, bh);
 +                              submit_bh(opf, bh);
                                continue;
                        }
                } else {
                        if (!buffer_uptodate(bh)) {
                                bh->b_end_io = end_buffer_read_sync;
                                get_bh(bh);
 -                              submit_bh(op, op_flags, bh);
 +                              submit_bh(opf, bh);
                                continue;
                        }
                }
  }
  EXPORT_SYMBOL(ll_rw_block);
  
 -void write_dirty_buffer(struct buffer_head *bh, int op_flags)
 +void write_dirty_buffer(struct buffer_head *bh, blk_opf_t op_flags)
  {
        lock_buffer(bh);
        if (!test_clear_buffer_dirty(bh)) {
        }
        bh->b_end_io = end_buffer_write_sync;
        get_bh(bh);
 -      submit_bh(REQ_OP_WRITE, op_flags, bh);
 +      submit_bh(REQ_OP_WRITE | op_flags, bh);
  }
  EXPORT_SYMBOL(write_dirty_buffer);
  
   * and then start new I/O and then wait upon it.  The caller must have a ref on
   * the buffer_head.
   */
 -int __sync_dirty_buffer(struct buffer_head *bh, int op_flags)
 +int __sync_dirty_buffer(struct buffer_head *bh, blk_opf_t op_flags)
  {
        int ret = 0;
  
  
                get_bh(bh);
                bh->b_end_io = end_buffer_write_sync;
 -              ret = submit_bh(REQ_OP_WRITE, op_flags, bh);
 +              ret = submit_bh(REQ_OP_WRITE | op_flags, bh);
                wait_on_buffer(bh);
                if (!ret && !buffer_uptodate(bh))
                        ret = -EIO;
@@@ -3366,7 -3044,7 +3045,7 @@@ int bh_submit_read(struct buffer_head *
  
        get_bh(bh);
        bh->b_end_io = end_buffer_read_sync;
 -      submit_bh(REQ_OP_READ, 0, bh);
 +      submit_bh(REQ_OP_READ, bh);
        wait_on_buffer(bh);
        if (buffer_uptodate(bh))
                return 0;
diff --combined fs/ext2/inode.c
@@@ -908,25 -908,6 +908,6 @@@ static int ext2_write_end(struct file *
        return ret;
  }
  
- static int
- ext2_nobh_write_begin(struct file *file, struct address_space *mapping,
-               loff_t pos, unsigned len, struct page **pagep, void **fsdata)
- {
-       int ret;
-       ret = nobh_write_begin(mapping, pos, len, pagep, fsdata,
-                              ext2_get_block);
-       if (ret < 0)
-               ext2_write_failed(mapping, pos + len);
-       return ret;
- }
- static int ext2_nobh_writepage(struct page *page,
-                       struct writeback_control *wbc)
- {
-       return nobh_writepage(page, ext2_get_block, wbc);
- }
  static sector_t ext2_bmap(struct address_space *mapping, sector_t block)
  {
        return generic_block_bmap(mapping,block,ext2_get_block);
@@@ -973,26 -954,11 +954,11 @@@ const struct address_space_operations e
        .bmap                   = ext2_bmap,
        .direct_IO              = ext2_direct_IO,
        .writepages             = ext2_writepages,
-       .migratepage            = buffer_migrate_page,
+       .migrate_folio          = buffer_migrate_folio,
        .is_partially_uptodate  = block_is_partially_uptodate,
        .error_remove_page      = generic_error_remove_page,
  };
  
- const struct address_space_operations ext2_nobh_aops = {
-       .dirty_folio            = block_dirty_folio,
-       .invalidate_folio       = block_invalidate_folio,
-       .read_folio             = ext2_read_folio,
-       .readahead              = ext2_readahead,
-       .writepage              = ext2_nobh_writepage,
-       .write_begin            = ext2_nobh_write_begin,
-       .write_end              = nobh_write_end,
-       .bmap                   = ext2_bmap,
-       .direct_IO              = ext2_direct_IO,
-       .writepages             = ext2_writepages,
-       .migratepage            = buffer_migrate_page,
-       .error_remove_page      = generic_error_remove_page,
- };
  static const struct address_space_operations ext2_dax_aops = {
        .writepages             = ext2_dax_writepages,
        .direct_IO              = noop_direct_IO,
@@@ -1298,13 -1264,10 +1264,10 @@@ static int ext2_setsize(struct inode *i
  
        inode_dio_wait(inode);
  
-       if (IS_DAX(inode)) {
+       if (IS_DAX(inode))
                error = dax_zero_range(inode, newsize,
                                       PAGE_ALIGN(newsize) - newsize, NULL,
                                       &ext2_iomap_ops);
-       } else if (test_opt(inode->i_sb, NOBH))
-               error = nobh_truncate_page(inode->i_mapping,
-                               newsize, ext2_get_block);
        else
                error = block_truncate_page(inode->i_mapping,
                                newsize, ext2_get_block);
@@@ -1396,8 -1359,6 +1359,6 @@@ void ext2_set_file_ops(struct inode *in
        inode->i_fop = &ext2_file_operations;
        if (IS_DAX(inode))
                inode->i_mapping->a_ops = &ext2_dax_aops;
-       else if (test_opt(inode->i_sb, NOBH))
-               inode->i_mapping->a_ops = &ext2_nobh_aops;
        else
                inode->i_mapping->a_ops = &ext2_aops;
  }
@@@ -1497,10 -1458,7 +1458,7 @@@ struct inode *ext2_iget (struct super_b
        } else if (S_ISDIR(inode->i_mode)) {
                inode->i_op = &ext2_dir_inode_operations;
                inode->i_fop = &ext2_dir_operations;
-               if (test_opt(inode->i_sb, NOBH))
-                       inode->i_mapping->a_ops = &ext2_nobh_aops;
-               else
-                       inode->i_mapping->a_ops = &ext2_aops;
+               inode->i_mapping->a_ops = &ext2_aops;
        } else if (S_ISLNK(inode->i_mode)) {
                if (ext2_inode_is_fast_symlink(inode)) {
                        inode->i_link = (char *)ei->i_data;
                } else {
                        inode->i_op = &ext2_symlink_inode_operations;
                        inode_nohighmem(inode);
-                       if (test_opt(inode->i_sb, NOBH))
-                               inode->i_mapping->a_ops = &ext2_nobh_aops;
-                       else
-                               inode->i_mapping->a_ops = &ext2_aops;
+                       inode->i_mapping->a_ops = &ext2_aops;
                }
        } else {
                inode->i_op = &ext2_special_inode_operations;
@@@ -1679,14 -1634,14 +1634,14 @@@ int ext2_setattr(struct user_namespace 
        if (error)
                return error;
  
 -      if (is_quota_modification(inode, iattr)) {
 +      if (is_quota_modification(mnt_userns, inode, iattr)) {
                error = dquot_initialize(inode);
                if (error)
                        return error;
        }
 -      if ((iattr->ia_valid & ATTR_UID && !uid_eq(iattr->ia_uid, inode->i_uid)) ||
 -          (iattr->ia_valid & ATTR_GID && !gid_eq(iattr->ia_gid, inode->i_gid))) {
 -              error = dquot_transfer(inode, iattr);
 +      if (i_uid_needs_update(mnt_userns, iattr, inode) ||
 +          i_gid_needs_update(mnt_userns, iattr, inode)) {
 +              error = dquot_transfer(mnt_userns, inode, iattr);
                if (error)
                        return error;
        }
diff --combined fs/ext2/super.c
@@@ -296,9 -296,6 +296,6 @@@ static int ext2_show_options(struct seq
                seq_puts(seq, ",noacl");
  #endif
  
-       if (test_opt(sb, NOBH))
-               seq_puts(seq, ",nobh");
        if (test_opt(sb, USRQUOTA))
                seq_puts(seq, ",usrquota");
  
@@@ -551,7 -548,8 +548,8 @@@ static int parse_options(char *options
                        clear_opt (opts->s_mount_opt, OLDALLOC);
                        break;
                case Opt_nobh:
-                       set_opt (opts->s_mount_opt, NOBH);
+                       ext2_msg(sb, KERN_INFO,
+                               "nobh option not supported");
                        break;
  #ifdef CONFIG_EXT2_FS_XATTR
                case Opt_user_xattr:
@@@ -1059,10 -1057,9 +1057,10 @@@ static int ext2_fill_super(struct super
                        sbi->s_frags_per_group);
                goto failed_mount;
        }
 -      if (sbi->s_inodes_per_group > sb->s_blocksize * 8) {
 +      if (sbi->s_inodes_per_group < sbi->s_inodes_per_block ||
 +          sbi->s_inodes_per_group > sb->s_blocksize * 8) {
                ext2_msg(sb, KERN_ERR,
 -                      "error: #inodes per group too big: %lu",
 +                      "error: invalid #inodes per group: %lu",
                        sbi->s_inodes_per_group);
                goto failed_mount;
        }
        sbi->s_groups_count = ((le32_to_cpu(es->s_blocks_count) -
                                le32_to_cpu(es->s_first_data_block) - 1)
                                        / EXT2_BLOCKS_PER_GROUP(sb)) + 1;
 +      if ((u64)sbi->s_groups_count * sbi->s_inodes_per_group !=
 +          le32_to_cpu(es->s_inodes_count)) {
 +              ext2_msg(sb, KERN_ERR, "error: invalid #inodes: %u vs computed %llu",
 +                       le32_to_cpu(es->s_inodes_count),
 +                       (u64)sbi->s_groups_count * sbi->s_inodes_per_group);
 +              goto failed_mount;
 +      }
        db_count = (sbi->s_groups_count + EXT2_DESC_PER_BLOCK(sb) - 1) /
                   EXT2_DESC_PER_BLOCK(sb);
        sbi->s_group_desc = kmalloc_array(db_count,
@@@ -1498,7 -1488,8 +1496,7 @@@ static ssize_t ext2_quota_read(struct s
                len = i_size-off;
        toread = len;
        while (toread > 0) {
 -              tocopy = sb->s_blocksize - offset < toread ?
 -                              sb->s_blocksize - offset : toread;
 +              tocopy = min_t(size_t, sb->s_blocksize - offset, toread);
  
                tmp_bh.b_state = 0;
                tmp_bh.b_size = sb->s_blocksize;
@@@ -1536,7 -1527,8 +1534,7 @@@ static ssize_t ext2_quota_write(struct 
        struct buffer_head *bh;
  
        while (towrite > 0) {
 -              tocopy = sb->s_blocksize - offset < towrite ?
 -                              sb->s_blocksize - offset : towrite;
 +              tocopy = min_t(size_t, sb->s_blocksize - offset, towrite);
  
                tmp_bh.b_state = 0;
                tmp_bh.b_size = sb->s_blocksize;
diff --combined fs/ext4/inode.c
@@@ -1554,9 -1554,9 +1554,9 @@@ struct mpage_da_data 
  static void mpage_release_unused_pages(struct mpage_da_data *mpd,
                                       bool invalidate)
  {
-       int nr_pages, i;
+       unsigned nr, i;
        pgoff_t index, end;
-       struct pagevec pvec;
+       struct folio_batch fbatch;
        struct inode *inode = mpd->inode;
        struct address_space *mapping = inode->i_mapping;
  
                ext4_es_remove_extent(inode, start, last - start + 1);
        }
  
-       pagevec_init(&pvec);
+       folio_batch_init(&fbatch);
        while (index <= end) {
-               nr_pages = pagevec_lookup_range(&pvec, mapping, &index, end);
-               if (nr_pages == 0)
+               nr = filemap_get_folios(mapping, &index, end, &fbatch);
+               if (nr == 0)
                        break;
-               for (i = 0; i < nr_pages; i++) {
-                       struct page *page = pvec.pages[i];
-                       struct folio *folio = page_folio(page);
+               for (i = 0; i < nr; i++) {
+                       struct folio *folio = fbatch.folios[i];
  
+                       if (folio->index < mpd->first_page)
+                               continue;
+                       if (folio->index + folio_nr_pages(folio) - 1 > end)
+                               continue;
                        BUG_ON(!folio_test_locked(folio));
                        BUG_ON(folio_test_writeback(folio));
                        if (invalidate) {
                        }
                        folio_unlock(folio);
                }
-               pagevec_release(&pvec);
+               folio_batch_release(&fbatch);
        }
  }
  
@@@ -2311,8 -2314,8 +2314,8 @@@ out
   */
  static int mpage_map_and_submit_buffers(struct mpage_da_data *mpd)
  {
-       struct pagevec pvec;
-       int nr_pages, i;
+       struct folio_batch fbatch;
+       unsigned nr, i;
        struct inode *inode = mpd->inode;
        int bpp_bits = PAGE_SHIFT - inode->i_blkbits;
        pgoff_t start, end;
        lblk = start << bpp_bits;
        pblock = mpd->map.m_pblk;
  
-       pagevec_init(&pvec);
+       folio_batch_init(&fbatch);
        while (start <= end) {
-               nr_pages = pagevec_lookup_range(&pvec, inode->i_mapping,
-                                               &start, end);
-               if (nr_pages == 0)
+               nr = filemap_get_folios(inode->i_mapping, &start, end, &fbatch);
+               if (nr == 0)
                        break;
-               for (i = 0; i < nr_pages; i++) {
-                       struct page *page = pvec.pages[i];
+               for (i = 0; i < nr; i++) {
+                       struct page *page = &fbatch.folios[i]->page;
  
                        err = mpage_process_page(mpd, page, &lblk, &pblock,
                                                 &map_bh);
                        if (err < 0)
                                goto out;
                }
-               pagevec_release(&pvec);
+               folio_batch_release(&fbatch);
        }
        /* Extent fully mapped and matches with page boundary. We are done. */
        mpd->map.m_len = 0;
        mpd->map.m_flags = 0;
        return 0;
  out:
-       pagevec_release(&pvec);
+       folio_batch_release(&fbatch);
        return err;
  }
  
@@@ -3631,7 -3633,7 +3633,7 @@@ static const struct address_space_opera
        .invalidate_folio       = ext4_invalidate_folio,
        .release_folio          = ext4_release_folio,
        .direct_IO              = noop_direct_IO,
-       .migratepage            = buffer_migrate_page,
+       .migrate_folio          = buffer_migrate_folio,
        .is_partially_uptodate  = block_is_partially_uptodate,
        .error_remove_page      = generic_error_remove_page,
        .swap_activate          = ext4_iomap_swap_activate,
@@@ -3666,7 -3668,7 +3668,7 @@@ static const struct address_space_opera
        .invalidate_folio       = ext4_invalidate_folio,
        .release_folio          = ext4_release_folio,
        .direct_IO              = noop_direct_IO,
-       .migratepage            = buffer_migrate_page,
+       .migrate_folio          = buffer_migrate_folio,
        .is_partially_uptodate  = block_is_partially_uptodate,
        .error_remove_page      = generic_error_remove_page,
        .swap_activate          = ext4_iomap_swap_activate,
@@@ -5350,14 -5352,14 +5352,14 @@@ int ext4_setattr(struct user_namespace 
        if (error)
                return error;
  
 -      if (is_quota_modification(inode, attr)) {
 +      if (is_quota_modification(mnt_userns, inode, attr)) {
                error = dquot_initialize(inode);
                if (error)
                        return error;
        }
  
 -      if ((ia_valid & ATTR_UID && !uid_eq(attr->ia_uid, inode->i_uid)) ||
 -          (ia_valid & ATTR_GID && !gid_eq(attr->ia_gid, inode->i_gid))) {
 +      if (i_uid_needs_update(mnt_userns, attr, inode) ||
 +          i_gid_needs_update(mnt_userns, attr, inode)) {
                handle_t *handle;
  
                /* (user+group)*(old+new) structure, inode write (sb,
                 * counts xattr inode references.
                 */
                down_read(&EXT4_I(inode)->xattr_sem);
 -              error = dquot_transfer(inode, attr);
 +              error = dquot_transfer(mnt_userns, inode, attr);
                up_read(&EXT4_I(inode)->xattr_sem);
  
                if (error) {
                }
                /* Update corresponding info in inode so that everything is in
                 * one transaction */
 -              if (attr->ia_valid & ATTR_UID)
 -                      inode->i_uid = attr->ia_uid;
 -              if (attr->ia_valid & ATTR_GID)
 -                      inode->i_gid = attr->ia_gid;
 +              i_uid_update(mnt_userns, attr, inode);
 +              i_gid_update(mnt_userns, attr, inode);
                error = ext4_mark_inode_dirty(handle, inode);
                ext4_journal_stop(handle);
                if (unlikely(error)) {
diff --combined fs/f2fs/data.c
@@@ -387,11 -387,11 +387,11 @@@ int f2fs_target_device_index(struct f2f
        return 0;
  }
  
 -static unsigned int f2fs_io_flags(struct f2fs_io_info *fio)
 +static blk_opf_t f2fs_io_flags(struct f2fs_io_info *fio)
  {
        unsigned int temp_mask = (1 << NR_TEMP_TYPE) - 1;
        unsigned int fua_flag, meta_flag, io_flag;
 -      unsigned int op_flags = 0;
 +      blk_opf_t op_flags = 0;
  
        if (fio->op != REQ_OP_WRITE)
                return 0;
@@@ -999,7 -999,7 +999,7 @@@ out
  }
  
  static struct bio *f2fs_grab_read_bio(struct inode *inode, block_t blkaddr,
 -                                    unsigned nr_pages, unsigned op_flag,
 +                                    unsigned nr_pages, blk_opf_t op_flag,
                                      pgoff_t first_idx, bool for_write)
  {
        struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
  
  /* This can handle encryption stuffs */
  static int f2fs_submit_page_read(struct inode *inode, struct page *page,
 -                               block_t blkaddr, int op_flags, bool for_write)
 +                               block_t blkaddr, blk_opf_t op_flags,
 +                               bool for_write)
  {
        struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
        struct bio *bio;
@@@ -1182,7 -1181,7 +1182,7 @@@ int f2fs_get_block(struct dnode_of_dat
  }
  
  struct page *f2fs_get_read_data_page(struct inode *inode, pgoff_t index,
 -                                              int op_flags, bool for_write)
 +                                   blk_opf_t op_flags, bool for_write)
  {
        struct address_space *mapping = inode->i_mapping;
        struct dnode_of_data dn;
@@@ -3752,42 -3751,6 +3752,6 @@@ out
        return blknr;
  }
  
- #ifdef CONFIG_MIGRATION
- #include <linux/migrate.h>
- int f2fs_migrate_page(struct address_space *mapping,
-               struct page *newpage, struct page *page, enum migrate_mode mode)
- {
-       int rc, extra_count = 0;
-       BUG_ON(PageWriteback(page));
-       rc = migrate_page_move_mapping(mapping, newpage,
-                               page, extra_count);
-       if (rc != MIGRATEPAGE_SUCCESS)
-               return rc;
-       /* guarantee to start from no stale private field */
-       set_page_private(newpage, 0);
-       if (PagePrivate(page)) {
-               set_page_private(newpage, page_private(page));
-               SetPagePrivate(newpage);
-               get_page(newpage);
-               set_page_private(page, 0);
-               ClearPagePrivate(page);
-               put_page(page);
-       }
-       if (mode != MIGRATE_SYNC_NO_COPY)
-               migrate_page_copy(newpage, page);
-       else
-               migrate_page_states(newpage, page);
-       return MIGRATEPAGE_SUCCESS;
- }
- #endif
  #ifdef CONFIG_SWAP
  static int f2fs_migrate_blocks(struct inode *inode, block_t start_blk,
                                                        unsigned int blkcnt)
@@@ -4019,15 -3982,13 +3983,13 @@@ const struct address_space_operations f
        .write_begin    = f2fs_write_begin,
        .write_end      = f2fs_write_end,
        .dirty_folio    = f2fs_dirty_data_folio,
+       .migrate_folio  = filemap_migrate_folio,
        .invalidate_folio = f2fs_invalidate_folio,
        .release_folio  = f2fs_release_folio,
        .direct_IO      = noop_direct_IO,
        .bmap           = f2fs_bmap,
        .swap_activate  = f2fs_swap_activate,
        .swap_deactivate = f2fs_swap_deactivate,
- #ifdef CONFIG_MIGRATION
-       .migratepage    = f2fs_migrate_page,
- #endif
  };
  
  void f2fs_clear_page_cache_dirty_tag(struct page *page)
diff --combined fs/f2fs/f2fs.h
@@@ -1183,8 -1183,8 +1183,8 @@@ struct f2fs_io_info 
        nid_t ino;              /* inode number */
        enum page_type type;    /* contains DATA/NODE/META/META_FLUSH */
        enum temp_type temp;    /* contains HOT/WARM/COLD */
 -      int op;                 /* contains REQ_OP_ */
 -      int op_flags;           /* req_flag_bits */
 +      enum req_op op;         /* contains REQ_OP_ */
 +      blk_opf_t op_flags;     /* req_flag_bits */
        block_t new_blkaddr;    /* new block address to be written */
        block_t old_blkaddr;    /* old block address before Cow */
        struct page *page;      /* page to be written */
@@@ -3741,7 -3741,7 +3741,7 @@@ int f2fs_reserve_new_block(struct dnode
  int f2fs_get_block(struct dnode_of_data *dn, pgoff_t index);
  int f2fs_reserve_block(struct dnode_of_data *dn, pgoff_t index);
  struct page *f2fs_get_read_data_page(struct inode *inode, pgoff_t index,
 -                      int op_flags, bool for_write);
 +                      blk_opf_t op_flags, bool for_write);
  struct page *f2fs_find_data_page(struct inode *inode, pgoff_t index);
  struct page *f2fs_get_lock_data_page(struct inode *inode, pgoff_t index,
                        bool for_write);
@@@ -3764,10 -3764,6 +3764,6 @@@ int f2fs_write_single_data_page(struct 
  void f2fs_write_failed(struct inode *inode, loff_t to);
  void f2fs_invalidate_folio(struct folio *folio, size_t offset, size_t length);
  bool f2fs_release_folio(struct folio *folio, gfp_t wait);
- #ifdef CONFIG_MIGRATION
- int f2fs_migrate_page(struct address_space *mapping, struct page *newpage,
-                       struct page *page, enum migrate_mode mode);
- #endif
  bool f2fs_overwrite_io(struct inode *inode, loff_t pos, size_t len);
  void f2fs_clear_page_cache_dirty_tag(struct page *page);
  int f2fs_init_post_read_processing(void);
diff --combined fs/f2fs/node.c
@@@ -1327,7 -1327,7 +1327,7 @@@ fail
   * 0: f2fs_put_page(page, 0)
   * LOCKED_PAGE or error: f2fs_put_page(page, 1)
   */
 -static int read_node_page(struct page *page, int op_flags)
 +static int read_node_page(struct page *page, blk_opf_t op_flags)
  {
        struct f2fs_sb_info *sbi = F2FS_P_SB(page);
        struct node_info ni;
@@@ -2165,9 -2165,7 +2165,7 @@@ const struct address_space_operations f
        .dirty_folio    = f2fs_dirty_node_folio,
        .invalidate_folio = f2fs_invalidate_folio,
        .release_folio  = f2fs_release_folio,
- #ifdef CONFIG_MIGRATION
-       .migratepage    = f2fs_migrate_page,
- #endif
+       .migrate_folio  = filemap_migrate_folio,
  };
  
  static struct free_nid *__lookup_free_nid_list(struct f2fs_nm_info *nm_i,
diff --combined fs/gfs2/lops.c
@@@ -238,7 -238,7 +238,7 @@@ static void gfs2_end_log_write(struct b
   * there is no pending bio, then this is a no-op.
   */
  
 -void gfs2_log_submit_bio(struct bio **biop, int opf)
 +void gfs2_log_submit_bio(struct bio **biop, blk_opf_t opf)
  {
        struct bio *bio = *biop;
        if (bio) {
@@@ -292,7 -292,7 +292,7 @@@ static struct bio *gfs2_log_alloc_bio(s
   */
  
  static struct bio *gfs2_log_get_bio(struct gfs2_sbd *sdp, u64 blkno,
 -                                  struct bio **biop, int op,
 +                                  struct bio **biop, enum req_op op,
                                    bio_end_io_t *end_io, bool flush)
  {
        struct bio *bio = *biop;
@@@ -452,36 -452,36 +452,36 @@@ static bool gfs2_jhead_pg_srch(struct g
   * @head: The journal head to start from
   * @done: If set, perform only cleanup, else search and set if found.
   *
-  * Find the page with 'index' in the journal's mapping. Search the page for
+  * Find the folio with 'index' in the journal's mapping. Search the folio for
   * the journal head if requested (cleanup == false). Release refs on the
-  * page so the page cache can reclaim it (put_page() twice). We grabbed a
-  * reference on this page two times, first when we did a find_or_create_page()
-  * to obtain the page to add it to the bio and second when we do a
-  * find_get_page() here to get the page to wait on while I/O on it is being
+  * folio so the page cache can reclaim it. We grabbed a
+  * reference on this folio twice, first when we did a find_or_create_page()
+  * to obtain the folio to add it to the bio and second when we do a
+  * filemap_get_folio() here to get the folio to wait on while I/O on it is being
   * completed.
-  * This function is also used to free up a page we might've grabbed but not
+  * This function is also used to free up a folio we might've grabbed but not
   * used. Maybe we added it to a bio, but not submitted it for I/O. Or we
   * submitted the I/O, but we already found the jhead so we only need to drop
-  * our references to the page.
+  * our references to the folio.
   */
  
  static void gfs2_jhead_process_page(struct gfs2_jdesc *jd, unsigned long index,
                                    struct gfs2_log_header_host *head,
                                    bool *done)
  {
-       struct page *page;
+       struct folio *folio;
  
-       page = find_get_page(jd->jd_inode->i_mapping, index);
-       wait_on_page_locked(page);
+       folio = filemap_get_folio(jd->jd_inode->i_mapping, index);
  
-       if (PageError(page))
+       folio_wait_locked(folio);
+       if (folio_test_error(folio))
                *done = true;
  
        if (!*done)
-               *done = gfs2_jhead_pg_srch(jd, head, page);
+               *done = gfs2_jhead_pg_srch(jd, head, &folio->page);
  
-       put_page(page); /* Once for find_get_page */
-       put_page(page); /* Once more for find_or_create_page */
+       /* filemap_get_folio() and the earlier find_or_create_page() */
+       folio_put_refs(folio, 2);
  }
  
  static struct bio *gfs2_chain_bio(struct bio *prev, unsigned int nr_iovecs)
diff --combined fs/hugetlbfs/inode.c
@@@ -108,16 -108,6 +108,6 @@@ static inline void hugetlb_drop_vma_pol
  }
  #endif
  
- static void huge_pagevec_release(struct pagevec *pvec)
- {
-       int i;
-       for (i = 0; i < pagevec_count(pvec); ++i)
-               put_page(pvec->pages[i]);
-       pagevec_reinit(pvec);
- }
  /*
   * Mask used when checking the page offset value passed in via system
   * calls.  This value will be converted to a loff_t which is signed.
@@@ -480,25 -470,19 +470,19 @@@ static void remove_inode_hugepages(stru
        struct address_space *mapping = &inode->i_data;
        const pgoff_t start = lstart >> huge_page_shift(h);
        const pgoff_t end = lend >> huge_page_shift(h);
-       struct pagevec pvec;
+       struct folio_batch fbatch;
        pgoff_t next, index;
        int i, freed = 0;
        bool truncate_op = (lend == LLONG_MAX);
  
-       pagevec_init(&pvec);
+       folio_batch_init(&fbatch);
        next = start;
-       while (next < end) {
-               /*
-                * When no more pages are found, we are done.
-                */
-               if (!pagevec_lookup_range(&pvec, mapping, &next, end - 1))
-                       break;
-               for (i = 0; i < pagevec_count(&pvec); ++i) {
-                       struct page *page = pvec.pages[i];
+       while (filemap_get_folios(mapping, &next, end - 1, &fbatch)) {
+               for (i = 0; i < folio_batch_count(&fbatch); ++i) {
+                       struct folio *folio = fbatch.folios[i];
                        u32 hash = 0;
  
-                       index = page->index;
+                       index = folio->index;
                        if (!truncate_op) {
                                /*
                                 * Only need to hold the fault mutex in the
                        }
  
                        /*
-                        * If page is mapped, it was faulted in after being
+                        * If folio is mapped, it was faulted in after being
                         * unmapped in caller.  Unmap (again) now after taking
                         * the fault mutex.  The mutex will prevent faults
-                        * until we finish removing the page.
+                        * until we finish removing the folio.
                         *
                         * This race can only happen in the hole punch case.
                         * Getting here in a truncate operation is a bug.
                         */
-                       if (unlikely(page_mapped(page))) {
+                       if (unlikely(folio_mapped(folio))) {
                                BUG_ON(truncate_op);
  
                                mutex_unlock(&hugetlb_fault_mutex_table[hash]);
                                i_mmap_unlock_write(mapping);
                        }
  
-                       lock_page(page);
+                       folio_lock(folio);
                        /*
                         * We must free the huge page and remove from page
                         * cache (remove_huge_page) BEFORE removing the
                         * the subpool and global reserve usage count can need
                         * to be adjusted.
                         */
-                       VM_BUG_ON(HPageRestoreReserve(page));
-                       remove_huge_page(page);
+                       VM_BUG_ON(HPageRestoreReserve(&folio->page));
+                       remove_huge_page(&folio->page);
                        freed++;
                        if (!truncate_op) {
                                if (unlikely(hugetlb_unreserve_pages(inode,
                                        hugetlb_fix_reserve_counts(inode);
                        }
  
-                       unlock_page(page);
+                       folio_unlock(folio);
                        if (!truncate_op)
                                mutex_unlock(&hugetlb_fault_mutex_table[hash]);
                }
-               huge_pagevec_release(&pvec);
+               folio_batch_release(&fbatch);
                cond_resched();
        }
  
@@@ -600,79 -584,41 +584,79 @@@ static void hugetlb_vmtruncate(struct i
        remove_inode_hugepages(inode, offset, LLONG_MAX);
  }
  
 +static void hugetlbfs_zero_partial_page(struct hstate *h,
 +                                      struct address_space *mapping,
 +                                      loff_t start,
 +                                      loff_t end)
 +{
 +      pgoff_t idx = start >> huge_page_shift(h);
 +      struct folio *folio;
 +
 +      folio = filemap_lock_folio(mapping, idx);
 +      if (!folio)
 +              return;
 +
 +      start = start & ~huge_page_mask(h);
 +      end = end & ~huge_page_mask(h);
 +      if (!end)
 +              end = huge_page_size(h);
 +
 +      folio_zero_segment(folio, (size_t)start, (size_t)end);
 +
 +      folio_unlock(folio);
 +      folio_put(folio);
 +}
 +
  static long hugetlbfs_punch_hole(struct inode *inode, loff_t offset, loff_t len)
  {
 +      struct hugetlbfs_inode_info *info = HUGETLBFS_I(inode);
 +      struct address_space *mapping = inode->i_mapping;
        struct hstate *h = hstate_inode(inode);
        loff_t hpage_size = huge_page_size(h);
        loff_t hole_start, hole_end;
  
        /*
 -       * For hole punch round up the beginning offset of the hole and
 -       * round down the end.
 +       * hole_start and hole_end indicate the full pages within the hole.
         */
        hole_start = round_up(offset, hpage_size);
        hole_end = round_down(offset + len, hpage_size);
  
 -      if (hole_end > hole_start) {
 -              struct address_space *mapping = inode->i_mapping;
 -              struct hugetlbfs_inode_info *info = HUGETLBFS_I(inode);
 +      inode_lock(inode);
 +
 +      /* protected by i_rwsem */
 +      if (info->seals & (F_SEAL_WRITE | F_SEAL_FUTURE_WRITE)) {
 +              inode_unlock(inode);
 +              return -EPERM;
 +      }
  
 -              inode_lock(inode);
 +      i_mmap_lock_write(mapping);
  
 -              /* protected by i_rwsem */
 -              if (info->seals & (F_SEAL_WRITE | F_SEAL_FUTURE_WRITE)) {
 -                      inode_unlock(inode);
 -                      return -EPERM;
 -              }
 +      /* If range starts before first full page, zero partial page. */
 +      if (offset < hole_start)
 +              hugetlbfs_zero_partial_page(h, mapping,
 +                              offset, min(offset + len, hole_start));
  
 -              i_mmap_lock_write(mapping);
 +      /* Unmap users of full pages in the hole. */
 +      if (hole_end > hole_start) {
                if (!RB_EMPTY_ROOT(&mapping->i_mmap.rb_root))
                        hugetlb_vmdelete_list(&mapping->i_mmap,
                                              hole_start >> PAGE_SHIFT,
                                              hole_end >> PAGE_SHIFT, 0);
 -              i_mmap_unlock_write(mapping);
 -              remove_inode_hugepages(inode, hole_start, hole_end);
 -              inode_unlock(inode);
        }
  
 +      /* If range extends beyond last full page, zero partial page. */
 +      if ((offset + len) > hole_end && (offset + len) > hole_start)
 +              hugetlbfs_zero_partial_page(h, mapping,
 +                              hole_end, offset + len);
 +
 +      i_mmap_unlock_write(mapping);
 +
 +      /* Remove full pages from the file. */
 +      if (hole_end > hole_start)
 +              remove_inode_hugepages(inode, hole_start, hole_end);
 +
 +      inode_unlock(inode);
 +
        return 0;
  }
  
@@@ -797,7 -743,7 +781,7 @@@ static long hugetlbfs_fallocate(struct 
  
                SetHPageMigratable(page);
                /*
-                * unlock_page because locked by add_to_page_cache()
+                * unlock_page because locked by huge_add_to_page_cache()
                 * put_page() due to reference from alloc_huge_page()
                 */
                unlock_page(page);
@@@ -1008,28 -954,33 +992,33 @@@ static int hugetlbfs_symlink(struct use
        return error;
  }
  
- static int hugetlbfs_migrate_page(struct address_space *mapping,
-                               struct page *newpage, struct page *page,
+ #ifdef CONFIG_MIGRATION
+ static int hugetlbfs_migrate_folio(struct address_space *mapping,
+                               struct folio *dst, struct folio *src,
                                enum migrate_mode mode)
  {
        int rc;
  
-       rc = migrate_huge_page_move_mapping(mapping, newpage, page);
+       rc = migrate_huge_page_move_mapping(mapping, dst, src);
        if (rc != MIGRATEPAGE_SUCCESS)
                return rc;
  
-       if (hugetlb_page_subpool(page)) {
-               hugetlb_set_page_subpool(newpage, hugetlb_page_subpool(page));
-               hugetlb_set_page_subpool(page, NULL);
+       if (hugetlb_page_subpool(&src->page)) {
+               hugetlb_set_page_subpool(&dst->page,
+                                       hugetlb_page_subpool(&src->page));
+               hugetlb_set_page_subpool(&src->page, NULL);
        }
  
        if (mode != MIGRATE_SYNC_NO_COPY)
-               migrate_page_copy(newpage, page);
+               folio_migrate_copy(dst, src);
        else
-               migrate_page_states(newpage, page);
+               folio_migrate_flags(dst, src);
  
        return MIGRATEPAGE_SUCCESS;
  }
+ #else
+ #define hugetlbfs_migrate_folio NULL
+ #endif
  
  static int hugetlbfs_error_remove_page(struct address_space *mapping,
                                struct page *page)
@@@ -1196,7 -1147,7 +1185,7 @@@ static const struct address_space_opera
        .write_begin    = hugetlbfs_write_begin,
        .write_end      = hugetlbfs_write_end,
        .dirty_folio    = noop_dirty_folio,
-       .migratepage    = hugetlbfs_migrate_page,
+       .migrate_folio  = hugetlbfs_migrate_folio,
        .error_remove_page      = hugetlbfs_error_remove_page,
  };
  
diff --combined fs/inode.c
@@@ -604,7 -604,7 +604,7 @@@ void clear_inode(struct inode *inode
  {
        /*
         * We have to cycle the i_pages lock here because reclaim can be in the
-        * process of removing the last page (in __delete_from_page_cache())
+        * process of removing the last page (in __filemap_remove_folio())
         * and we must not free the mapping under it.
         */
        xa_lock_irq(&inode->i_data.i_pages);
@@@ -2010,57 -2010,67 +2010,57 @@@ static int __remove_privs(struct user_n
        return notify_change(mnt_userns, dentry, &newattrs, NULL);
  }
  
 -/*
 - * Remove special file priviledges (suid, capabilities) when file is written
 - * to or truncated.
 - */
 -int file_remove_privs(struct file *file)
 +static int __file_remove_privs(struct file *file, unsigned int flags)
  {
        struct dentry *dentry = file_dentry(file);
        struct inode *inode = file_inode(file);
 +      int error;
        int kill;
 -      int error = 0;
  
 -      /*
 -       * Fast path for nothing security related.
 -       * As well for non-regular files, e.g. blkdev inodes.
 -       * For example, blkdev_write_iter() might get here
 -       * trying to remove privs which it is not allowed to.
 -       */
        if (IS_NOSEC(inode) || !S_ISREG(inode->i_mode))
                return 0;
  
        kill = dentry_needs_remove_privs(dentry);
 -      if (kill < 0)
 +      if (kill <= 0)
                return kill;
 -      if (kill)
 -              error = __remove_privs(file_mnt_user_ns(file), dentry, kill);
 +
 +      if (flags & IOCB_NOWAIT)
 +              return -EAGAIN;
 +
 +      error = __remove_privs(file_mnt_user_ns(file), dentry, kill);
        if (!error)
                inode_has_no_xattr(inode);
  
        return error;
  }
 -EXPORT_SYMBOL(file_remove_privs);
  
  /**
 - *    file_update_time        -       update mtime and ctime time
 - *    @file: file accessed
 - *
 - *    Update the mtime and ctime members of an inode and mark the inode
 - *    for writeback.  Note that this function is meant exclusively for
 - *    usage in the file write path of filesystems, and filesystems may
 - *    choose to explicitly ignore update via this function with the
 - *    S_NOCMTIME inode flag, e.g. for network filesystem where these
 - *    timestamps are handled by the server.  This can return an error for
 - *    file systems who need to allocate space in order to update an inode.
 + * file_remove_privs - remove special file privileges (suid, capabilities)
 + * @file: file to remove privileges from
 + *
 + * When file is modified by a write or truncation ensure that special
 + * file privileges are removed.
 + *
 + * Return: 0 on success, negative errno on failure.
   */
 +int file_remove_privs(struct file *file)
 +{
 +      return __file_remove_privs(file, 0);
 +}
 +EXPORT_SYMBOL(file_remove_privs);
  
 -int file_update_time(struct file *file)
 +static int inode_needs_update_time(struct inode *inode, struct timespec64 *now)
  {
 -      struct inode *inode = file_inode(file);
 -      struct timespec64 now;
        int sync_it = 0;
 -      int ret;
  
        /* First try to exhaust all avenues to not sync */
        if (IS_NOCMTIME(inode))
                return 0;
  
 -      now = current_time(inode);
 -      if (!timespec64_equal(&inode->i_mtime, &now))
 +      if (!timespec64_equal(&inode->i_mtime, now))
                sync_it = S_MTIME;
  
 -      if (!timespec64_equal(&inode->i_ctime, &now))
 +      if (!timespec64_equal(&inode->i_ctime, now))
                sync_it |= S_CTIME;
  
        if (IS_I_VERSION(inode) && inode_iversion_need_inc(inode))
        if (!sync_it)
                return 0;
  
 -      /* Finally allowed to write? Takes lock. */
 -      if (__mnt_want_write_file(file))
 -              return 0;
 +      return sync_it;
 +}
  
 -      ret = inode_update_time(inode, &now, sync_it);
 -      __mnt_drop_write_file(file);
 +static int __file_update_time(struct file *file, struct timespec64 *now,
 +                      int sync_mode)
 +{
 +      int ret = 0;
 +      struct inode *inode = file_inode(file);
 +
 +      /* try to update time settings */
 +      if (!__mnt_want_write_file(file)) {
 +              ret = inode_update_time(inode, now, sync_mode);
 +              __mnt_drop_write_file(file);
 +      }
  
        return ret;
  }
 +
 +/**
 + * file_update_time - update mtime and ctime time
 + * @file: file accessed
 + *
 + * Update the mtime and ctime members of an inode and mark the inode for
 + * writeback. Note that this function is meant exclusively for usage in
 + * the file write path of filesystems, and filesystems may choose to
 + * explicitly ignore updates via this function with the _NOCMTIME inode
 + * flag, e.g. for network filesystem where these imestamps are handled
 + * by the server. This can return an error for file systems who need to
 + * allocate space in order to update an inode.
 + *
 + * Return: 0 on success, negative errno on failure.
 + */
 +int file_update_time(struct file *file)
 +{
 +      int ret;
 +      struct inode *inode = file_inode(file);
 +      struct timespec64 now = current_time(inode);
 +
 +      ret = inode_needs_update_time(inode, &now);
 +      if (ret <= 0)
 +              return ret;
 +
 +      return __file_update_time(file, &now, ret);
 +}
  EXPORT_SYMBOL(file_update_time);
  
 -/* Caller must hold the file's inode lock */
 -int file_modified(struct file *file)
 +/**
 + * file_modified_flags - handle mandated vfs changes when modifying a file
 + * @file: file that was modified
 + * @flags: kiocb flags
 + *
 + * When file has been modified ensure that special
 + * file privileges are removed and time settings are updated.
 + *
 + * If IOCB_NOWAIT is set, special file privileges will not be removed and
 + * time settings will not be updated. It will return -EAGAIN.
 + *
 + * Context: Caller must hold the file's inode lock.
 + *
 + * Return: 0 on success, negative errno on failure.
 + */
 +static int file_modified_flags(struct file *file, int flags)
  {
 -      int err;
 +      int ret;
 +      struct inode *inode = file_inode(file);
 +      struct timespec64 now = current_time(inode);
  
        /*
         * Clear the security bits if the process is not being run by root.
         * This keeps people from modifying setuid and setgid binaries.
         */
 -      err = file_remove_privs(file);
 -      if (err)
 -              return err;
 +      ret = __file_remove_privs(file, flags);
 +      if (ret)
 +              return ret;
  
        if (unlikely(file->f_mode & FMODE_NOCMTIME))
                return 0;
  
 -      return file_update_time(file);
 +      ret = inode_needs_update_time(inode, &now);
 +      if (ret <= 0)
 +              return ret;
 +      if (flags & IOCB_NOWAIT)
 +              return -EAGAIN;
 +
 +      return __file_update_time(file, &now, ret);
 +}
 +
 +/**
 + * file_modified - handle mandated vfs changes when modifying a file
 + * @file: file that was modified
 + *
 + * When file has been modified ensure that special
 + * file privileges are removed and time settings are updated.
 + *
 + * Context: Caller must hold the file's inode lock.
 + *
 + * Return: 0 on success, negative errno on failure.
 + */
 +int file_modified(struct file *file)
 +{
 +      return file_modified_flags(file, 0);
  }
  EXPORT_SYMBOL(file_modified);
  
 +/**
 + * kiocb_modified - handle mandated vfs changes when modifying a file
 + * @iocb: iocb that was modified
 + *
 + * When file has been modified ensure that special
 + * file privileges are removed and time settings are updated.
 + *
 + * Context: Caller must hold the file's inode lock.
 + *
 + * Return: 0 on success, negative errno on failure.
 + */
 +int kiocb_modified(struct kiocb *iocb)
 +{
 +      return file_modified_flags(iocb->ki_filp, iocb->ki_flags);
 +}
 +EXPORT_SYMBOL_GPL(kiocb_modified);
 +
  int inode_needs_sync(struct inode *inode)
  {
        if (IS_SYNC(inode))
diff --combined fs/iomap/buffered-io.c
@@@ -44,28 -44,20 +44,28 @@@ static inline struct iomap_page *to_iom
  static struct bio_set iomap_ioend_bioset;
  
  static struct iomap_page *
 -iomap_page_create(struct inode *inode, struct folio *folio)
 +iomap_page_create(struct inode *inode, struct folio *folio, unsigned int flags)
  {
        struct iomap_page *iop = to_iomap_page(folio);
        unsigned int nr_blocks = i_blocks_per_folio(inode, folio);
 +      gfp_t gfp;
  
        if (iop || nr_blocks <= 1)
                return iop;
  
 +      if (flags & IOMAP_NOWAIT)
 +              gfp = GFP_NOWAIT;
 +      else
 +              gfp = GFP_NOFS | __GFP_NOFAIL;
 +
        iop = kzalloc(struct_size(iop, uptodate, BITS_TO_LONGS(nr_blocks)),
 -                      GFP_NOFS | __GFP_NOFAIL);
 -      spin_lock_init(&iop->uptodate_lock);
 -      if (folio_test_uptodate(folio))
 -              bitmap_fill(iop->uptodate, nr_blocks);
 -      folio_attach_private(folio, iop);
 +                    gfp);
 +      if (iop) {
 +              spin_lock_init(&iop->uptodate_lock);
 +              if (folio_test_uptodate(folio))
 +                      bitmap_fill(iop->uptodate, nr_blocks);
 +              folio_attach_private(folio, iop);
 +      }
        return iop;
  }
  
@@@ -162,9 -154,6 +162,6 @@@ static void iomap_iop_set_range_uptodat
  static void iomap_set_range_uptodate(struct folio *folio,
                struct iomap_page *iop, size_t off, size_t len)
  {
-       if (folio_test_error(folio))
-               return;
        if (iop)
                iomap_iop_set_range_uptodate(folio, iop, off, len);
        else
@@@ -234,7 -223,7 +231,7 @@@ static int iomap_read_inline_data(cons
        if (WARN_ON_ONCE(size > iomap->length))
                return -EIO;
        if (offset > 0)
 -              iop = iomap_page_create(iter->inode, folio);
 +              iop = iomap_page_create(iter->inode, folio, iter->flags);
        else
                iop = to_iomap_page(folio);
  
@@@ -272,7 -261,7 +269,7 @@@ static loff_t iomap_readpage_iter(cons
                return iomap_read_inline_data(iter, folio);
  
        /* zero post-eof blocks as the page may be mapped */
 -      iop = iomap_page_create(iter->inode, folio);
 +      iop = iomap_page_create(iter->inode, folio, iter->flags);
        iomap_adjust_read_range(iter->inode, folio, &pos, length, &poff, &plen);
        if (plen == 0)
                goto done;
@@@ -500,31 -489,6 +497,6 @@@ void iomap_invalidate_folio(struct foli
  }
  EXPORT_SYMBOL_GPL(iomap_invalidate_folio);
  
- #ifdef CONFIG_MIGRATION
- int
- iomap_migrate_page(struct address_space *mapping, struct page *newpage,
-               struct page *page, enum migrate_mode mode)
- {
-       struct folio *folio = page_folio(page);
-       struct folio *newfolio = page_folio(newpage);
-       int ret;
-       ret = folio_migrate_mapping(mapping, newfolio, folio, 0);
-       if (ret != MIGRATEPAGE_SUCCESS)
-               return ret;
-       if (folio_test_private(folio))
-               folio_attach_private(newfolio, folio_detach_private(folio));
-       if (mode != MIGRATE_SYNC_NO_COPY)
-               folio_migrate_copy(newfolio, folio);
-       else
-               folio_migrate_flags(newfolio, folio);
-       return MIGRATEPAGE_SUCCESS;
- }
- EXPORT_SYMBOL_GPL(iomap_migrate_page);
- #endif /* CONFIG_MIGRATION */
  static void
  iomap_write_failed(struct inode *inode, loff_t pos, unsigned len)
  {
@@@ -555,11 -519,10 +527,11 @@@ static int __iomap_write_begin(const st
                size_t len, struct folio *folio)
  {
        const struct iomap *srcmap = iomap_iter_srcmap(iter);
 -      struct iomap_page *iop = iomap_page_create(iter->inode, folio);
 +      struct iomap_page *iop;
        loff_t block_size = i_blocksize(iter->inode);
        loff_t block_start = round_down(pos, block_size);
        loff_t block_end = round_up(pos + len, block_size);
 +      unsigned int nr_blocks = i_blocks_per_folio(iter->inode, folio);
        size_t from = offset_in_folio(folio, pos), to = from + len;
        size_t poff, plen;
  
                return 0;
        folio_clear_error(folio);
  
 +      iop = iomap_page_create(iter->inode, folio, iter->flags);
 +      if ((iter->flags & IOMAP_NOWAIT) && !iop && nr_blocks > 1)
 +              return -EAGAIN;
 +
        do {
                iomap_adjust_read_range(iter->inode, folio, &block_start,
                                block_end - block_start, &poff, &plen);
                                return -EIO;
                        folio_zero_segments(folio, poff, from, to, poff + plen);
                } else {
 -                      int status = iomap_read_folio_sync(block_start, folio,
 +                      int status;
 +
 +                      if (iter->flags & IOMAP_NOWAIT)
 +                              return -EAGAIN;
 +
 +                      status = iomap_read_folio_sync(block_start, folio,
                                        poff, plen, srcmap);
                        if (status)
                                return status;
@@@ -621,9 -575,6 +593,9 @@@ static int iomap_write_begin(const stru
        unsigned fgp = FGP_LOCK | FGP_WRITE | FGP_CREAT | FGP_STABLE | FGP_NOFS;
        int status = 0;
  
 +      if (iter->flags & IOMAP_NOWAIT)
 +              fgp |= FGP_NOWAIT;
 +
        BUG_ON(pos + len > iter->iomap.offset + iter->iomap.length);
        if (srcmap != &iter->iomap)
                BUG_ON(pos + len > srcmap->offset + srcmap->length);
        folio = __filemap_get_folio(iter->inode->i_mapping, pos >> PAGE_SHIFT,
                        fgp, mapping_gfp_mask(iter->inode->i_mapping));
        if (!folio) {
 -              status = -ENOMEM;
 +              status = (iter->flags & IOMAP_NOWAIT) ? -EAGAIN : -ENOMEM;
                goto out_no_page;
        }
        if (pos + len > folio_pos(folio) + folio_size(folio))
@@@ -761,8 -712,6 +733,8 @@@ static loff_t iomap_write_iter(struct i
        loff_t pos = iter->pos;
        ssize_t written = 0;
        long status = 0;
 +      struct address_space *mapping = iter->inode->i_mapping;
 +      unsigned int bdp_flags = (iter->flags & IOMAP_NOWAIT) ? BDP_ASYNC : 0;
  
        do {
                struct folio *folio;
                bytes = min_t(unsigned long, PAGE_SIZE - offset,
                                                iov_iter_count(i));
  again:
 +              status = balance_dirty_pages_ratelimited_flags(mapping,
 +                                                             bdp_flags);
 +              if (unlikely(status))
 +                      break;
 +
                if (bytes > length)
                        bytes = length;
  
                 * Otherwise there's a nasty deadlock on copying from the
                 * same page as we're writing to, without it being marked
                 * up-to-date.
 +               *
 +               * For async buffered writes the assumption is that the user
 +               * page has already been faulted in. This can be optimized by
 +               * faulting the user page.
                 */
                if (unlikely(fault_in_iov_iter_readable(i, bytes) == bytes)) {
                        status = -EFAULT;
                        break;
  
                page = folio_file_page(folio, pos >> PAGE_SHIFT);
 -              if (mapping_writably_mapped(iter->inode->i_mapping))
 +              if (mapping_writably_mapped(mapping))
                        flush_dcache_page(page);
  
                copied = copy_page_from_iter_atomic(page, offset, bytes, i);
                pos += status;
                written += status;
                length -= status;
 -
 -              balance_dirty_pages_ratelimited(iter->inode->i_mapping);
        } while (iov_iter_count(i) && length);
  
 +      if (status == -EAGAIN) {
 +              iov_iter_revert(i, written);
 +              return -EAGAIN;
 +      }
        return written ? written : status;
  }
  
@@@ -849,9 -787,6 +821,9 @@@ iomap_file_buffered_write(struct kiocb 
        };
        int ret;
  
 +      if (iocb->ki_flags & IOCB_NOWAIT)
 +              iter.flags |= IOMAP_NOWAIT;
 +
        while ((ret = iomap_iter(&iter, ops)) > 0)
                iter.processed = iomap_write_iter(&iter, i);
        if (iter.pos == iocb->ki_pos)
@@@ -1366,7 -1301,7 +1338,7 @@@ iomap_writepage_map(struct iomap_writep
                struct writeback_control *wbc, struct inode *inode,
                struct folio *folio, u64 end_pos)
  {
 -      struct iomap_page *iop = iomap_page_create(inode, folio);
 +      struct iomap_page *iop = iomap_page_create(inode, folio, 0);
        struct iomap_ioend *ioend, *next;
        unsigned len = i_blocksize(inode);
        unsigned nblocks = i_blocks_per_folio(inode, folio);
diff --combined fs/mpage.c
@@@ -75,26 -75,28 +75,28 @@@ static struct bio *mpage_bio_submit(str
   * them.  So when the buffer is up to date and the page size == block size,
   * this marks the page up to date instead of adding new buffers.
   */
- static void 
- map_buffer_to_page(struct page *page, struct buffer_head *bh, int page_block) 
+ static void map_buffer_to_folio(struct folio *folio, struct buffer_head *bh,
+               int page_block)
  {
-       struct inode *inode = page->mapping->host;
+       struct inode *inode = folio->mapping->host;
        struct buffer_head *page_bh, *head;
        int block = 0;
  
-       if (!page_has_buffers(page)) {
+       head = folio_buffers(folio);
+       if (!head) {
                /*
                 * don't make any buffers if there is only one buffer on
-                * the page and the page just needs to be set up to date
+                * the folio and the folio just needs to be set up to date
                 */
                if (inode->i_blkbits == PAGE_SHIFT &&
                    buffer_uptodate(bh)) {
-                       SetPageUptodate(page);    
+                       folio_mark_uptodate(folio);
                        return;
                }
-               create_empty_buffers(page, i_blocksize(inode), 0);
+               create_empty_buffers(&folio->page, i_blocksize(inode), 0);
+               head = folio_buffers(folio);
        }
-       head = page_buffers(page);
        page_bh = head;
        do {
                if (block == page_block) {
  
  struct mpage_readpage_args {
        struct bio *bio;
-       struct page *page;
+       struct folio *folio;
        unsigned int nr_pages;
        bool is_readahead;
        sector_t last_block_in_bio;
   */
  static struct bio *do_mpage_readpage(struct mpage_readpage_args *args)
  {
-       struct page *page = args->page;
-       struct inode *inode = page->mapping->host;
+       struct folio *folio = args->folio;
+       struct inode *inode = folio->mapping->host;
        const unsigned blkbits = inode->i_blkbits;
        const unsigned blocks_per_page = PAGE_SIZE >> blkbits;
        const unsigned blocksize = 1 << blkbits;
        struct block_device *bdev = NULL;
        int length;
        int fully_mapped = 1;
 -      int op = REQ_OP_READ;
 +      blk_opf_t opf = REQ_OP_READ;
        unsigned nblocks;
        unsigned relative_block;
-       gfp_t gfp = mapping_gfp_constraint(page->mapping, GFP_KERNEL);
+       gfp_t gfp = mapping_gfp_constraint(folio->mapping, GFP_KERNEL);
+       /* MAX_BUF_PER_PAGE, for example */
+       VM_BUG_ON_FOLIO(folio_test_large(folio), folio);
  
        if (args->is_readahead) {
 -              op |= REQ_RAHEAD;
 +              opf |= REQ_RAHEAD;
                gfp |= __GFP_NORETRY | __GFP_NOWARN;
        }
  
-       if (page_has_buffers(page))
+       if (folio_buffers(folio))
                goto confused;
  
-       block_in_file = (sector_t)page->index << (PAGE_SHIFT - blkbits);
+       block_in_file = (sector_t)folio->index << (PAGE_SHIFT - blkbits);
        last_block = block_in_file + args->nr_pages * blocks_per_page;
        last_block_in_file = (i_size_read(inode) + blocksize - 1) >> blkbits;
        if (last_block > last_block_in_file)
        }
  
        /*
-        * Then do more get_blocks calls until we are done with this page.
+        * Then do more get_blocks calls until we are done with this folio.
         */
-       map_bh->b_page = page;
+       map_bh->b_page = &folio->page;
        while (page_block < blocks_per_page) {
                map_bh->b_state = 0;
                map_bh->b_size = 0;
  
                /* some filesystems will copy data into the page during
                 * the get_block call, in which case we don't want to
-                * read it again.  map_buffer_to_page copies the data
-                * we just collected from get_block into the page's buffers
-                * so readpage doesn't have to repeat the get_block call
+                * read it again.  map_buffer_to_folio copies the data
+                * we just collected from get_block into the folio's buffers
+                * so read_folio doesn't have to repeat the get_block call
                 */
                if (buffer_uptodate(map_bh)) {
-                       map_buffer_to_page(page, map_bh, page_block);
+                       map_buffer_to_folio(folio, map_bh, page_block);
                        goto confused;
                }
        
        }
  
        if (first_hole != blocks_per_page) {
-               zero_user_segment(page, first_hole << blkbits, PAGE_SIZE);
+               folio_zero_segment(folio, first_hole << blkbits, PAGE_SIZE);
                if (first_hole == 0) {
-                       SetPageUptodate(page);
-                       unlock_page(page);
+                       folio_mark_uptodate(folio);
+                       folio_unlock(folio);
                        goto out;
                }
        } else if (fully_mapped) {
-               SetPageMappedToDisk(page);
+               folio_set_mappedtodisk(folio);
        }
  
        /*
-        * This page will go to BIO.  Do we need to send this BIO off first?
+        * This folio will go to BIO.  Do we need to send this BIO off first?
         */
        if (args->bio && (args->last_block_in_bio != blocks[0] - 1))
                args->bio = mpage_bio_submit(args->bio);
@@@ -266,10 -271,10 +271,10 @@@ alloc_new
        if (args->bio == NULL) {
                if (first_hole == blocks_per_page) {
                        if (!bdev_read_page(bdev, blocks[0] << (blkbits - 9),
-                                                               page))
+                                                               &folio->page))
                                goto out;
                }
 -              args->bio = bio_alloc(bdev, bio_max_segs(args->nr_pages), op,
 +              args->bio = bio_alloc(bdev, bio_max_segs(args->nr_pages), opf,
                                      gfp);
                if (args->bio == NULL)
                        goto confused;
        }
  
        length = first_hole << blkbits;
-       if (bio_add_page(args->bio, page, length, 0) < length) {
+       if (!bio_add_folio(args->bio, folio, length, 0)) {
                args->bio = mpage_bio_submit(args->bio);
                goto alloc_new;
        }
@@@ -295,10 -300,10 +300,10 @@@ out
  confused:
        if (args->bio)
                args->bio = mpage_bio_submit(args->bio);
-       if (!PageUptodate(page))
-               block_read_full_folio(page_folio(page), args->get_block);
+       if (!folio_test_uptodate(folio))
+               block_read_full_folio(folio, args->get_block);
        else
-               unlock_page(page);
+               folio_unlock(folio);
        goto out;
  }
  
   */
  void mpage_readahead(struct readahead_control *rac, get_block_t get_block)
  {
-       struct page *page;
+       struct folio *folio;
        struct mpage_readpage_args args = {
                .get_block = get_block,
                .is_readahead = true,
        };
  
-       while ((page = readahead_page(rac))) {
-               prefetchw(&page->flags);
-               args.page = page;
+       while ((folio = readahead_folio(rac))) {
+               prefetchw(&folio->flags);
+               args.folio = folio;
                args.nr_pages = readahead_count(rac);
                args.bio = do_mpage_readpage(&args);
-               put_page(page);
        }
        if (args.bio)
                mpage_bio_submit(args.bio);
@@@ -367,13 -371,11 +371,11 @@@ EXPORT_SYMBOL(mpage_readahead)
  int mpage_read_folio(struct folio *folio, get_block_t get_block)
  {
        struct mpage_readpage_args args = {
-               .page = &folio->page,
+               .folio = folio,
                .nr_pages = 1,
                .get_block = get_block,
        };
  
-       VM_BUG_ON_FOLIO(folio_test_large(folio), folio);
        args.bio = do_mpage_readpage(&args);
        if (args.bio)
                mpage_bio_submit(args.bio);
@@@ -402,7 -404,6 +404,6 @@@ struct mpage_data 
        struct bio *bio;
        sector_t last_block_in_bio;
        get_block_t *get_block;
-       unsigned use_writepage;
  };
  
  /*
@@@ -622,15 -623,10 +623,10 @@@ confused
        if (bio)
                bio = mpage_bio_submit(bio);
  
-       if (mpd->use_writepage) {
-               ret = mapping->a_ops->writepage(page, wbc);
-       } else {
-               ret = -EAGAIN;
-               goto out;
-       }
        /*
         * The caller has a ref on the inode, so *mapping is stable
         */
+       ret = block_write_full_page(page, mpd->get_block, wbc);
        mapping_set_error(mapping, ret);
  out:
        mpd->bio = bio;
   * @mapping: address space structure to write
   * @wbc: subtract the number of written pages from *@wbc->nr_to_write
   * @get_block: the filesystem's block mapper function.
-  *             If this is NULL then use a_ops->writepage.  Otherwise, go
-  *             direct-to-BIO.
   *
   * This is a library function, which implements the writepages()
   * address_space_operation.
@@@ -660,42 -654,17 +654,17 @@@ in
  mpage_writepages(struct address_space *mapping,
                struct writeback_control *wbc, get_block_t get_block)
  {
+       struct mpage_data mpd = {
+               .get_block      = get_block,
+       };
        struct blk_plug plug;
        int ret;
  
        blk_start_plug(&plug);
-       if (!get_block)
-               ret = generic_writepages(mapping, wbc);
-       else {
-               struct mpage_data mpd = {
-                       .bio = NULL,
-                       .last_block_in_bio = 0,
-                       .get_block = get_block,
-                       .use_writepage = 1,
-               };
-               ret = write_cache_pages(mapping, wbc, __mpage_writepage, &mpd);
-               if (mpd.bio)
-                       mpage_bio_submit(mpd.bio);
-       }
-       blk_finish_plug(&plug);
-       return ret;
- }
- EXPORT_SYMBOL(mpage_writepages);
- int mpage_writepage(struct page *page, get_block_t get_block,
-       struct writeback_control *wbc)
- {
-       struct mpage_data mpd = {
-               .bio = NULL,
-               .last_block_in_bio = 0,
-               .get_block = get_block,
-               .use_writepage = 0,
-       };
-       int ret = __mpage_writepage(page, wbc, &mpd);
+       ret = write_cache_pages(mapping, wbc, __mpage_writepage, &mpd);
        if (mpd.bio)
                mpage_bio_submit(mpd.bio);
+       blk_finish_plug(&plug);
        return ret;
  }
- EXPORT_SYMBOL(mpage_writepage);
+ EXPORT_SYMBOL(mpage_writepages);
diff --combined fs/ntfs/aops.c
@@@ -342,7 -342,7 +342,7 @@@ handle_zblock
                for (i = 0; i < nr; i++) {
                        tbh = arr[i];
                        if (likely(!buffer_uptodate(tbh)))
 -                              submit_bh(REQ_OP_READ, 0, tbh);
 +                              submit_bh(REQ_OP_READ, tbh);
                        else
                                ntfs_end_buffer_async_read(tbh, 1);
                }
@@@ -859,7 -859,7 +859,7 @@@ lock_retry_remap
        do {
                struct buffer_head *next = bh->b_this_page;
                if (buffer_async_write(bh)) {
 -                      submit_bh(REQ_OP_WRITE, 0, bh);
 +                      submit_bh(REQ_OP_WRITE, bh);
                        need_end_writeback = false;
                }
                bh = next;
@@@ -1187,7 -1187,7 +1187,7 @@@ lock_retry_remap
                BUG_ON(!buffer_mapped(tbh));
                get_bh(tbh);
                tbh->b_end_io = end_buffer_write_sync;
 -              submit_bh(REQ_OP_WRITE, 0, tbh);
 +              submit_bh(REQ_OP_WRITE, tbh);
        }
        /* Synchronize the mft mirror now if not @sync. */
        if (is_mft && !sync)
@@@ -1659,7 -1659,7 +1659,7 @@@ const struct address_space_operations n
        .dirty_folio    = block_dirty_folio,
  #endif /* NTFS_RW */
        .bmap           = ntfs_bmap,
-       .migratepage    = buffer_migrate_page,
+       .migrate_folio  = buffer_migrate_folio,
        .is_partially_uptodate = block_is_partially_uptodate,
        .error_remove_page = generic_error_remove_page,
  };
@@@ -1673,7 -1673,7 +1673,7 @@@ const struct address_space_operations n
        .writepage      = ntfs_writepage,
        .dirty_folio    = block_dirty_folio,
  #endif /* NTFS_RW */
-       .migratepage    = buffer_migrate_page,
+       .migrate_folio  = buffer_migrate_folio,
        .is_partially_uptodate = block_is_partially_uptodate,
        .error_remove_page = generic_error_remove_page,
  };
@@@ -1688,7 -1688,7 +1688,7 @@@ const struct address_space_operations n
        .writepage      = ntfs_writepage,       /* Write dirty page to disk. */
        .dirty_folio    = filemap_dirty_folio,
  #endif /* NTFS_RW */
-       .migratepage    = buffer_migrate_page,
+       .migrate_folio  = buffer_migrate_folio,
        .is_partially_uptodate  = block_is_partially_uptodate,
        .error_remove_page = generic_error_remove_page,
  };
diff --combined fs/ntfs/file.c
@@@ -219,11 -219,6 +219,6 @@@ do_non_resident_extend
                        err = PTR_ERR(page);
                        goto init_err_out;
                }
-               if (unlikely(PageError(page))) {
-                       put_page(page);
-                       err = -EIO;
-                       goto init_err_out;
-               }
                /*
                 * Update the initialized size in the ntfs inode.  This is
                 * enough to make ntfs_writepage() work.
@@@ -537,7 -532,7 +532,7 @@@ static inline int ntfs_submit_bh_for_re
        lock_buffer(bh);
        get_bh(bh);
        bh->b_end_io = end_buffer_read_sync;
 -      return submit_bh(REQ_OP_READ, 0, bh);
 +      return submit_bh(REQ_OP_READ, bh);
  }
  
  /**
diff --combined fs/ntfs3/inode.c
@@@ -629,7 -629,7 +629,7 @@@ static noinline int ntfs_get_block_vbo(
                        bh->b_size = block_size;
                        off = vbo & (PAGE_SIZE - 1);
                        set_bh_page(bh, page, off);
 -                      ll_rw_block(REQ_OP_READ, 0, 1, &bh);
 +                      ll_rw_block(REQ_OP_READ, 1, &bh);
                        wait_on_buffer(bh);
                        if (!buffer_uptodate(bh)) {
                                err = -EIO;
@@@ -851,12 -851,10 +851,10 @@@ static int ntfs_writepage(struct page *
  static int ntfs_writepages(struct address_space *mapping,
                           struct writeback_control *wbc)
  {
-       struct inode *inode = mapping->host;
-       struct ntfs_inode *ni = ntfs_i(inode);
        /* Redirect call to 'ntfs_writepage' for resident files. */
-       get_block_t *get_block = is_resident(ni) ? NULL : &ntfs_get_block;
-       return mpage_writepages(mapping, wbc, get_block);
+       if (is_resident(ntfs_i(mapping->host)))
+               return generic_writepages(mapping, wbc);
+       return mpage_writepages(mapping, wbc, ntfs_get_block);
  }
  
  static int ntfs_get_block_write_begin(struct inode *inode, sector_t vbn,
diff --combined fs/ntfs3/ntfs_fs.h
@@@ -617,7 -617,7 +617,7 @@@ int ntfs_write_bh(struct ntfs_sb_info *
                  struct ntfs_buffers *nb, int sync);
  int ntfs_bio_pages(struct ntfs_sb_info *sbi, const struct runs_tree *run,
                   struct page **pages, u32 nr_pages, u64 vbo, u32 bytes,
 -                 u32 op);
 +                 enum req_op op);
  int ntfs_bio_fill_1(struct ntfs_sb_info *sbi, const struct runs_tree *run);
  int ntfs_vbo_to_lbo(struct ntfs_sb_info *sbi, const struct runs_tree *run,
                    u64 vbo, u64 *lbo, u64 *bytes);
@@@ -896,13 -896,8 +896,8 @@@ static inline struct page *ntfs_map_pag
  {
        struct page *page = read_mapping_page(mapping, index, NULL);
  
-       if (!IS_ERR(page)) {
+       if (!IS_ERR(page))
                kmap(page);
-               if (!PageError(page))
-                       return page;
-               ntfs_unmap_page(page);
-               return ERR_PTR(-EIO);
-       }
        return page;
  }
  
diff --combined fs/ocfs2/aops.c
@@@ -277,16 -277,14 +277,14 @@@ out
  
  static int ocfs2_read_folio(struct file *file, struct folio *folio)
  {
-       struct page *page = &folio->page;
-       struct inode *inode = page->mapping->host;
+       struct inode *inode = folio->mapping->host;
        struct ocfs2_inode_info *oi = OCFS2_I(inode);
-       loff_t start = (loff_t)page->index << PAGE_SHIFT;
+       loff_t start = folio_pos(folio);
        int ret, unlock = 1;
  
-       trace_ocfs2_readpage((unsigned long long)oi->ip_blkno,
-                            (page ? page->index : 0));
+       trace_ocfs2_readpage((unsigned long long)oi->ip_blkno, folio->index);
  
-       ret = ocfs2_inode_lock_with_page(inode, NULL, 0, page);
+       ret = ocfs2_inode_lock_with_page(inode, NULL, 0, &folio->page);
        if (ret != 0) {
                if (ret == AOP_TRUNCATED_PAGE)
                        unlock = 0;
  
        if (down_read_trylock(&oi->ip_alloc_sem) == 0) {
                /*
-                * Unlock the page and cycle ip_alloc_sem so that we don't
+                * Unlock the folio and cycle ip_alloc_sem so that we don't
                 * busyloop waiting for ip_alloc_sem to unlock
                 */
                ret = AOP_TRUNCATED_PAGE;
-               unlock_page(page);
+               folio_unlock(folio);
                unlock = 0;
                down_read(&oi->ip_alloc_sem);
                up_read(&oi->ip_alloc_sem);
         * block_read_full_folio->get_block freaks out if it is asked to read
         * beyond the end of a file, so we check here.  Callers
         * (generic_file_read, vm_ops->fault) are clever enough to check i_size
-        * and notice that the page they just read isn't needed.
+        * and notice that the folio they just read isn't needed.
         *
         * XXX sys_readahead() seems to get that wrong?
         */
        if (start >= i_size_read(inode)) {
-               zero_user(page, 0, PAGE_SIZE);
-               SetPageUptodate(page);
+               folio_zero_segment(folio, 0, folio_size(folio));
+               folio_mark_uptodate(folio);
                ret = 0;
                goto out_alloc;
        }
  
        if (oi->ip_dyn_features & OCFS2_INLINE_DATA_FL)
-               ret = ocfs2_readpage_inline(inode, page);
+               ret = ocfs2_readpage_inline(inode, &folio->page);
        else
-               ret = block_read_full_folio(page_folio(page), ocfs2_get_block);
+               ret = block_read_full_folio(folio, ocfs2_get_block);
        unlock = 0;
  
  out_alloc:
@@@ -336,7 -334,7 +334,7 @@@ out_inode_unlock
        ocfs2_inode_unlock(inode, 0);
  out:
        if (unlock)
-               unlock_page(page);
+               folio_unlock(folio);
        return ret;
  }
  
@@@ -638,7 -636,7 +636,7 @@@ int ocfs2_map_page_blocks(struct page *
                           !buffer_new(bh) &&
                           ocfs2_should_read_blk(inode, page, block_start) &&
                           (block_start < from || block_end > to)) {
 -                      ll_rw_block(REQ_OP_READ, 0, 1, &bh);
 +                      ll_rw_block(REQ_OP_READ, 1, &bh);
                        *wait_bh++=bh;
                }
  
@@@ -2464,7 -2462,7 +2462,7 @@@ const struct address_space_operations o
        .direct_IO              = ocfs2_direct_IO,
        .invalidate_folio       = block_invalidate_folio,
        .release_folio          = ocfs2_release_folio,
-       .migratepage            = buffer_migrate_page,
+       .migrate_folio          = buffer_migrate_folio,
        .is_partially_uptodate  = block_is_partially_uptodate,
        .error_remove_page      = generic_error_remove_page,
  };
diff --combined fs/remap_range.c
@@@ -71,8 -71,7 +71,8 @@@ static int generic_remap_checks(struct 
         * Otherwise, make sure the count is also block-aligned, having
         * already confirmed the starting offsets' block alignment.
         */
 -      if (pos_in + count == size_in) {
 +      if (pos_in + count == size_in &&
 +          (!(remap_flags & REMAP_FILE_DEDUP) || pos_out + count == size_out)) {
                bcount = ALIGN(size_in, bs) - pos_in;
        } else {
                if (!IS_ALIGNED(count, bs))
@@@ -149,16 -148,7 +149,7 @@@ static int generic_remap_check_len(stru
  /* Read a page's worth of file data into the page cache. */
  static struct folio *vfs_dedupe_get_folio(struct file *file, loff_t pos)
  {
-       struct folio *folio;
-       folio = read_mapping_folio(file->f_mapping, pos >> PAGE_SHIFT, file);
-       if (IS_ERR(folio))
-               return folio;
-       if (!folio_test_uptodate(folio)) {
-               folio_put(folio);
-               return ERR_PTR(-EIO);
-       }
-       return folio;
+       return read_mapping_folio(file->f_mapping, pos >> PAGE_SHIFT, file);
  }
  
  /*
diff --combined fs/zonefs/super.c
@@@ -60,7 -60,8 +60,7 @@@ static void zonefs_account_active(struc
        }
  }
  
 -static inline int zonefs_zone_mgmt(struct inode *inode,
 -                                 enum req_opf op)
 +static inline int zonefs_zone_mgmt(struct inode *inode, enum req_op op)
  {
        struct zonefs_inode_info *zi = ZONEFS_I(inode);
        int ret;
@@@ -270,7 -271,7 +270,7 @@@ static const struct address_space_opera
        .dirty_folio            = filemap_dirty_folio,
        .release_folio          = iomap_release_folio,
        .invalidate_folio       = iomap_invalidate_folio,
-       .migratepage            = iomap_migrate_page,
+       .migrate_folio          = filemap_migrate_folio,
        .is_partially_uptodate  = iomap_is_partially_uptodate,
        .error_remove_page      = generic_error_remove_page,
        .direct_IO              = noop_direct_IO,
@@@ -524,7 -525,7 +524,7 @@@ static int zonefs_file_truncate(struct 
  {
        struct zonefs_inode_info *zi = ZONEFS_I(inode);
        loff_t old_isize;
 -      enum req_opf op;
 +      enum req_op op;
        int ret = 0;
  
        /*
@@@ -615,7 -616,7 +615,7 @@@ static int zonefs_inode_setattr(struct 
             !uid_eq(iattr->ia_uid, inode->i_uid)) ||
            ((iattr->ia_valid & ATTR_GID) &&
             !gid_eq(iattr->ia_gid, inode->i_gid))) {
 -              ret = dquot_transfer(inode, iattr);
 +              ret = dquot_transfer(mnt_userns, inode, iattr);
                if (ret)
                        return ret;
        }
@@@ -1393,7 -1394,7 +1393,7 @@@ static void zonefs_init_dir_inode(struc
  {
        struct super_block *sb = parent->i_sb;
  
 -      inode->i_ino = blkdev_nr_zones(sb->s_bdev->bd_disk) + type + 1;
 +      inode->i_ino = bdev_nr_zones(sb->s_bdev) + type + 1;
        inode_init_owner(&init_user_ns, inode, parent, S_IFDIR | 0555);
        inode->i_op = &zonefs_dir_inode_operations;
        inode->i_fop = &simple_dir_operations;
@@@ -1539,7 -1540,7 +1539,7 @@@ static int zonefs_create_zgroup(struct 
        /*
         * The first zone contains the super block: skip it.
         */
 -      end = zd->zones + blkdev_nr_zones(sb->s_bdev->bd_disk);
 +      end = zd->zones + bdev_nr_zones(sb->s_bdev);
        for (zone = &zd->zones[1]; zone < end; zone = next) {
  
                next = zone + 1;
@@@ -1634,8 -1635,8 +1634,8 @@@ static int zonefs_get_zone_info(struct 
        struct block_device *bdev = zd->sb->s_bdev;
        int ret;
  
 -      zd->zones = kvcalloc(blkdev_nr_zones(bdev->bd_disk),
 -                           sizeof(struct blk_zone), GFP_KERNEL);
 +      zd->zones = kvcalloc(bdev_nr_zones(bdev), sizeof(struct blk_zone),
 +                           GFP_KERNEL);
        if (!zd->zones)
                return -ENOMEM;
  
                return ret;
        }
  
 -      if (ret != blkdev_nr_zones(bdev->bd_disk)) {
 +      if (ret != bdev_nr_zones(bdev)) {
                zonefs_err(zd->sb, "Invalid zone report (%d/%u zones)\n",
 -                         ret, blkdev_nr_zones(bdev->bd_disk));
 +                         ret, bdev_nr_zones(bdev));
                return -EIO;
        }
  
@@@ -1815,7 -1816,8 +1815,7 @@@ static int zonefs_fill_super(struct sup
        if (ret)
                goto cleanup;
  
 -      zonefs_info(sb, "Mounting %u zones",
 -                  blkdev_nr_zones(sb->s_bdev->bd_disk));
 +      zonefs_info(sb, "Mounting %u zones", bdev_nr_zones(sb->s_bdev));
  
        if (!sbi->s_max_wro_seq_files &&
            !sbi->s_max_active_seq_files &&
        if (!inode)
                goto cleanup;
  
 -      inode->i_ino = blkdev_nr_zones(sb->s_bdev->bd_disk);
 +      inode->i_ino = bdev_nr_zones(sb->s_bdev);
        inode->i_mode = S_IFDIR | 0555;
        inode->i_ctime = inode->i_mtime = inode->i_atime = current_time(inode);
        inode->i_op = &zonefs_dir_inode_operations;
@@@ -9,7 -9,6 +9,7 @@@
  #define _LINUX_BUFFER_HEAD_H
  
  #include <linux/types.h>
 +#include <linux/blk_types.h>
  #include <linux/fs.h>
  #include <linux/linkage.h>
  #include <linux/pagemap.h>
@@@ -202,11 -201,11 +202,11 @@@ struct buffer_head *alloc_buffer_head(g
  void free_buffer_head(struct buffer_head * bh);
  void unlock_buffer(struct buffer_head *bh);
  void __lock_buffer(struct buffer_head *bh);
 -void ll_rw_block(int, int, int, struct buffer_head * bh[]);
 +void ll_rw_block(blk_opf_t, int, struct buffer_head * bh[]);
  int sync_dirty_buffer(struct buffer_head *bh);
 -int __sync_dirty_buffer(struct buffer_head *bh, int op_flags);
 -void write_dirty_buffer(struct buffer_head *bh, int op_flags);
 -int submit_bh(int, int, struct buffer_head *);
 +int __sync_dirty_buffer(struct buffer_head *bh, blk_opf_t op_flags);
 +void write_dirty_buffer(struct buffer_head *bh, blk_opf_t op_flags);
 +int submit_bh(blk_opf_t, struct buffer_head *);
  void write_boundary_block(struct block_device *bdev,
                        sector_t bblock, unsigned blocksize);
  int bh_uptodate_or_lock(struct buffer_head *bh);
@@@ -259,14 -258,16 +259,16 @@@ static inline vm_fault_t block_page_mkw
  }
  sector_t generic_block_bmap(struct address_space *, sector_t, get_block_t *);
  int block_truncate_page(struct address_space *, loff_t, get_block_t *);
- int nobh_write_begin(struct address_space *, loff_t, unsigned len,
-                               struct page **, void **, get_block_t*);
- int nobh_write_end(struct file *, struct address_space *,
-                               loff_t, unsigned, unsigned,
-                               struct page *, void *);
- int nobh_truncate_page(struct address_space *, loff_t, get_block_t *);
- int nobh_writepage(struct page *page, get_block_t *get_block,
-                         struct writeback_control *wbc);
+ #ifdef CONFIG_MIGRATION
+ extern int buffer_migrate_folio(struct address_space *,
+               struct folio *dst, struct folio *src, enum migrate_mode);
+ extern int buffer_migrate_folio_norefs(struct address_space *,
+               struct folio *dst, struct folio *src, enum migrate_mode);
+ #else
+ #define buffer_migrate_folio NULL
+ #define buffer_migrate_folio_norefs NULL
+ #endif
  
  void buffer_init(void);
  
diff --combined include/linux/fs.h
@@@ -180,9 -180,6 +180,9 @@@ typedef int (dio_iodone_t)(struct kioc
  /* File supports async buffered reads */
  #define FMODE_BUF_RASYNC      ((__force fmode_t)0x40000000)
  
 +/* File supports async nowait buffered writes */
 +#define FMODE_BUF_WASYNC      ((__force fmode_t)0x80000000)
 +
  /*
   * Attribute flags.  These should be or-ed together to figure out what
   * has been changed!
  struct iattr {
        unsigned int    ia_valid;
        umode_t         ia_mode;
 -      kuid_t          ia_uid;
 -      kgid_t          ia_gid;
 +      /*
 +       * The two anonymous unions wrap structures with the same member.
 +       *
 +       * Filesystems raising FS_ALLOW_IDMAP need to use ia_vfs{g,u}id which
 +       * are a dedicated type requiring the filesystem to use the dedicated
 +       * helpers. Other filesystem can continue to use ia_{g,u}id until they
 +       * have been ported.
 +       *
 +       * They always contain the same value. In other words FS_ALLOW_IDMAP
 +       * pass down the same value on idmapped mounts as they would on regular
 +       * mounts.
 +       */
 +      union {
 +              kuid_t          ia_uid;
 +              vfsuid_t        ia_vfsuid;
 +      };
 +      union {
 +              kgid_t          ia_gid;
 +              vfsgid_t        ia_vfsgid;
 +      };
        loff_t          ia_size;
        struct timespec64 ia_atime;
        struct timespec64 ia_mtime;
@@@ -383,13 -362,11 +383,11 @@@ struct address_space_operations 
        void (*free_folio)(struct folio *folio);
        ssize_t (*direct_IO)(struct kiocb *, struct iov_iter *iter);
        /*
-        * migrate the contents of a page to the specified target. If
+        * migrate the contents of a folio to the specified target. If
         * migrate_mode is MIGRATE_ASYNC, it must not block.
         */
-       int (*migratepage) (struct address_space *,
-                       struct page *, struct page *, enum migrate_mode);
-       bool (*isolate_page)(struct page *, isolate_mode_t);
-       void (*putback_page)(struct page *);
+       int (*migrate_folio)(struct address_space *, struct folio *dst,
+                       struct folio *src, enum migrate_mode);
        int (*launder_folio)(struct folio *);
        bool (*is_partially_uptodate) (struct folio *, size_t from,
                        size_t count);
@@@ -1621,68 -1598,13 +1619,68 @@@ static inline void i_gid_write(struct i
   * @mnt_userns: user namespace of the mount the inode was found from
   * @inode: inode to map
   *
 + * Note, this will eventually be removed completely in favor of the type-safe
 + * i_uid_into_vfsuid().
 + *
   * Return: the inode's i_uid mapped down according to @mnt_userns.
   * If the inode's i_uid has no mapping INVALID_UID is returned.
   */
  static inline kuid_t i_uid_into_mnt(struct user_namespace *mnt_userns,
                                    const struct inode *inode)
  {
 -      return mapped_kuid_fs(mnt_userns, i_user_ns(inode), inode->i_uid);
 +      return AS_KUIDT(make_vfsuid(mnt_userns, i_user_ns(inode), inode->i_uid));
 +}
 +
 +/**
 + * i_uid_into_vfsuid - map an inode's i_uid down into a mnt_userns
 + * @mnt_userns: user namespace of the mount the inode was found from
 + * @inode: inode to map
 + *
 + * Return: whe inode's i_uid mapped down according to @mnt_userns.
 + * If the inode's i_uid has no mapping INVALID_VFSUID is returned.
 + */
 +static inline vfsuid_t i_uid_into_vfsuid(struct user_namespace *mnt_userns,
 +                                       const struct inode *inode)
 +{
 +      return make_vfsuid(mnt_userns, i_user_ns(inode), inode->i_uid);
 +}
 +
 +/**
 + * i_uid_needs_update - check whether inode's i_uid needs to be updated
 + * @mnt_userns: user namespace of the mount the inode was found from
 + * @attr: the new attributes of @inode
 + * @inode: the inode to update
 + *
 + * Check whether the $inode's i_uid field needs to be updated taking idmapped
 + * mounts into account if the filesystem supports it.
 + *
 + * Return: true if @inode's i_uid field needs to be updated, false if not.
 + */
 +static inline bool i_uid_needs_update(struct user_namespace *mnt_userns,
 +                                    const struct iattr *attr,
 +                                    const struct inode *inode)
 +{
 +      return ((attr->ia_valid & ATTR_UID) &&
 +              !vfsuid_eq(attr->ia_vfsuid,
 +                         i_uid_into_vfsuid(mnt_userns, inode)));
 +}
 +
 +/**
 + * i_uid_update - update @inode's i_uid field
 + * @mnt_userns: user namespace of the mount the inode was found from
 + * @attr: the new attributes of @inode
 + * @inode: the inode to update
 + *
 + * Safely update @inode's i_uid field translating the vfsuid of any idmapped
 + * mount into the filesystem kuid.
 + */
 +static inline void i_uid_update(struct user_namespace *mnt_userns,
 +                              const struct iattr *attr,
 +                              struct inode *inode)
 +{
 +      if (attr->ia_valid & ATTR_UID)
 +              inode->i_uid = from_vfsuid(mnt_userns, i_user_ns(inode),
 +                                         attr->ia_vfsuid);
  }
  
  /**
   * @mnt_userns: user namespace of the mount the inode was found from
   * @inode: inode to map
   *
 + * Note, this will eventually be removed completely in favor of the type-safe
 + * i_gid_into_vfsgid().
 + *
   * Return: the inode's i_gid mapped down according to @mnt_userns.
   * If the inode's i_gid has no mapping INVALID_GID is returned.
   */
  static inline kgid_t i_gid_into_mnt(struct user_namespace *mnt_userns,
                                    const struct inode *inode)
  {
 -      return mapped_kgid_fs(mnt_userns, i_user_ns(inode), inode->i_gid);
 +      return AS_KGIDT(make_vfsgid(mnt_userns, i_user_ns(inode), inode->i_gid));
 +}
 +
 +/**
 + * i_gid_into_vfsgid - map an inode's i_gid down into a mnt_userns
 + * @mnt_userns: user namespace of the mount the inode was found from
 + * @inode: inode to map
 + *
 + * Return: the inode's i_gid mapped down according to @mnt_userns.
 + * If the inode's i_gid has no mapping INVALID_VFSGID is returned.
 + */
 +static inline vfsgid_t i_gid_into_vfsgid(struct user_namespace *mnt_userns,
 +                                       const struct inode *inode)
 +{
 +      return make_vfsgid(mnt_userns, i_user_ns(inode), inode->i_gid);
 +}
 +
 +/**
 + * i_gid_needs_update - check whether inode's i_gid needs to be updated
 + * @mnt_userns: user namespace of the mount the inode was found from
 + * @attr: the new attributes of @inode
 + * @inode: the inode to update
 + *
 + * Check whether the $inode's i_gid field needs to be updated taking idmapped
 + * mounts into account if the filesystem supports it.
 + *
 + * Return: true if @inode's i_gid field needs to be updated, false if not.
 + */
 +static inline bool i_gid_needs_update(struct user_namespace *mnt_userns,
 +                                    const struct iattr *attr,
 +                                    const struct inode *inode)
 +{
 +      return ((attr->ia_valid & ATTR_GID) &&
 +              !vfsgid_eq(attr->ia_vfsgid,
 +                         i_gid_into_vfsgid(mnt_userns, inode)));
 +}
 +
 +/**
 + * i_gid_update - update @inode's i_gid field
 + * @mnt_userns: user namespace of the mount the inode was found from
 + * @attr: the new attributes of @inode
 + * @inode: the inode to update
 + *
 + * Safely update @inode's i_gid field translating the vfsgid of any idmapped
 + * mount into the filesystem kgid.
 + */
 +static inline void i_gid_update(struct user_namespace *mnt_userns,
 +                              const struct iattr *attr,
 +                              struct inode *inode)
 +{
 +      if (attr->ia_valid & ATTR_GID)
 +              inode->i_gid = from_vfsgid(mnt_userns, i_user_ns(inode),
 +                                         attr->ia_vfsgid);
  }
  
  /**
@@@ -2326,8 -2193,8 +2324,8 @@@ static inline bool sb_rdonly(const stru
  static inline bool HAS_UNMAPPED_ID(struct user_namespace *mnt_userns,
                                   struct inode *inode)
  {
 -      return !uid_valid(i_uid_into_mnt(mnt_userns, inode)) ||
 -             !gid_valid(i_gid_into_mnt(mnt_userns, inode));
 +      return !vfsuid_valid(i_uid_into_vfsuid(mnt_userns, inode)) ||
 +             !vfsgid_valid(i_gid_into_vfsgid(mnt_userns, inode));
  }
  
  static inline int iocb_flags(struct file *file);
@@@ -2518,7 -2385,6 +2516,7 @@@ static inline void file_accessed(struc
  }
  
  extern int file_modified(struct file *file);
 +int kiocb_modified(struct kiocb *iocb);
  
  int sync_inode_metadata(struct inode *inode, int wait);
  
@@@ -3347,18 -3213,6 +3345,6 @@@ extern int generic_check_addressable(un
  
  extern void generic_set_encrypted_ci_d_ops(struct dentry *dentry);
  
- #ifdef CONFIG_MIGRATION
- extern int buffer_migrate_page(struct address_space *,
-                               struct page *, struct page *,
-                               enum migrate_mode);
- extern int buffer_migrate_page_norefs(struct address_space *,
-                               struct page *, struct page *,
-                               enum migrate_mode);
- #else
- #define buffer_migrate_page NULL
- #define buffer_migrate_page_norefs NULL
- #endif
  int may_setattr(struct user_namespace *mnt_userns, struct inode *inode,
                unsigned int ia_valid);
  int setattr_prepare(struct user_namespace *, struct dentry *, struct iattr *);
diff --combined include/linux/netfs.h
@@@ -214,7 -214,7 +214,7 @@@ struct netfs_request_ops 
        void (*issue_read)(struct netfs_io_subrequest *subreq);
        bool (*is_still_valid)(struct netfs_io_request *rreq);
        int (*check_write_begin)(struct file *file, loff_t pos, unsigned len,
 -                               struct folio *folio, void **_fsdata);
 +                               struct folio **foliop, void **_fsdata);
        void (*done)(struct netfs_io_request *rreq);
  };
  
@@@ -276,19 -276,18 +276,18 @@@ struct netfs_cache_ops 
  };
  
  struct readahead_control;
extern void netfs_readahead(struct readahead_control *);
+ void netfs_readahead(struct readahead_control *);
  int netfs_read_folio(struct file *, struct folio *);
- extern int netfs_write_begin(struct netfs_inode *,
-                            struct file *, struct address_space *,
-                            loff_t, unsigned int, struct folio **,
-                            void **);
- extern void netfs_subreq_terminated(struct netfs_io_subrequest *, ssize_t, bool);
- extern void netfs_get_subrequest(struct netfs_io_subrequest *subreq,
-                                enum netfs_sreq_ref_trace what);
- extern void netfs_put_subrequest(struct netfs_io_subrequest *subreq,
-                                bool was_async, enum netfs_sreq_ref_trace what);
- extern void netfs_stats_show(struct seq_file *);
+ int netfs_write_begin(struct netfs_inode *, struct file *,
+               struct address_space *, loff_t pos, unsigned int len,
+               struct folio **, void **fsdata);
+ void netfs_subreq_terminated(struct netfs_io_subrequest *, ssize_t, bool);
+ void netfs_get_subrequest(struct netfs_io_subrequest *subreq,
+                         enum netfs_sreq_ref_trace what);
+ void netfs_put_subrequest(struct netfs_io_subrequest *subreq,
+                         bool was_async, enum netfs_sreq_ref_trace what);
+ void netfs_stats_show(struct seq_file *);
  
  /**
   * netfs_inode - Get the netfs inode context from the inode
diff --combined mm/filemap.c
@@@ -929,26 -929,6 +929,6 @@@ error
  }
  ALLOW_ERROR_INJECTION(__filemap_add_folio, ERRNO);
  
- /**
-  * add_to_page_cache_locked - add a locked page to the pagecache
-  * @page:     page to add
-  * @mapping:  the page's address_space
-  * @offset:   page index
-  * @gfp_mask: page allocation mode
-  *
-  * This function is used to add a page to the pagecache. It must be locked.
-  * This function does not add the page to the LRU.  The caller must do that.
-  *
-  * Return: %0 on success, negative error code otherwise.
-  */
- int add_to_page_cache_locked(struct page *page, struct address_space *mapping,
-               pgoff_t offset, gfp_t gfp_mask)
- {
-       return __filemap_add_folio(mapping, page_folio(page), offset,
-                                         gfp_mask, NULL);
- }
- EXPORT_SYMBOL(add_to_page_cache_locked);
  int filemap_add_folio(struct address_space *mapping, struct folio *folio,
                                pgoff_t index, gfp_t gfp)
  {
@@@ -1988,10 -1968,6 +1968,10 @@@ no_page
                        gfp |= __GFP_WRITE;
                if (fgp_flags & FGP_NOFS)
                        gfp &= ~__GFP_FS;
 +              if (fgp_flags & FGP_NOWAIT) {
 +                      gfp &= ~GFP_KERNEL;
 +                      gfp |= GFP_NOWAIT | __GFP_NOWARN;
 +              }
  
                folio = filemap_alloc_folio(gfp, 0);
                if (!folio)
        return folio_batch_count(fbatch);
  }
  
- static inline
- bool folio_more_pages(struct folio *folio, pgoff_t index, pgoff_t max)
- {
-       if (!folio_test_large(folio) || folio_test_hugetlb(folio))
-               return false;
-       if (index >= max)
-               return false;
-       return index < folio->index + folio_nr_pages(folio) - 1;
- }
  /**
-  * find_get_pages_range - gang pagecache lookup
+  * filemap_get_folios - Get a batch of folios
   * @mapping:  The address_space to search
   * @start:    The starting page index
   * @end:      The final page index (inclusive)
-  * @nr_pages: The maximum number of pages
-  * @pages:    Where the resulting pages are placed
+  * @fbatch:   The batch to fill.
   *
-  * find_get_pages_range() will search for and return a group of up to @nr_pages
-  * pages in the mapping starting at index @start and up to index @end
-  * (inclusive).  The pages are placed at @pages.  find_get_pages_range() takes
-  * a reference against the returned pages.
+  * Search for and return a batch of folios in the mapping starting at
+  * index @start and up to index @end (inclusive).  The folios are returned
+  * in @fbatch with an elevated reference count.
   *
-  * The search returns a group of mapping-contiguous pages with ascending
-  * indexes.  There may be holes in the indices due to not-present pages.
-  * We also update @start to index the next page for the traversal.
+  * The first folio may start before @start; if it does, it will contain
+  * @start.  The final folio may extend beyond @end; if it does, it will
+  * contain @end.  The folios have ascending indices.  There may be gaps
+  * between the folios if there are indices which have no folio in the
+  * page cache.  If folios are added to or removed from the page cache
+  * while this is running, they may or may not be found by this call.
   *
-  * Return: the number of pages which were found. If this number is
-  * smaller than @nr_pages, the end of specified range has been
-  * reached.
+  * Return: The number of folios which were found.
+  * We also update @start to index the next folio for the traversal.
   */
- unsigned find_get_pages_range(struct address_space *mapping, pgoff_t *start,
-                             pgoff_t end, unsigned int nr_pages,
-                             struct page **pages)
+ unsigned filemap_get_folios(struct address_space *mapping, pgoff_t *start,
+               pgoff_t end, struct folio_batch *fbatch)
  {
        XA_STATE(xas, &mapping->i_pages, *start);
        struct folio *folio;
-       unsigned ret = 0;
-       if (unlikely(!nr_pages))
-               return 0;
  
        rcu_read_lock();
-       while ((folio = find_get_entry(&xas, end, XA_PRESENT))) {
+       while ((folio = find_get_entry(&xas, end, XA_PRESENT)) != NULL) {
                /* Skip over shadow, swap and DAX entries */
                if (xa_is_value(folio))
                        continue;
+               if (!folio_batch_add(fbatch, folio)) {
+                       unsigned long nr = folio_nr_pages(folio);
  
- again:
-               pages[ret] = folio_file_page(folio, xas.xa_index);
-               if (++ret == nr_pages) {
-                       *start = xas.xa_index + 1;
+                       if (folio_test_hugetlb(folio))
+                               nr = 1;
+                       *start = folio->index + nr;
                        goto out;
                }
-               if (folio_more_pages(folio, xas.xa_index, end)) {
-                       xas.xa_index++;
-                       folio_ref_inc(folio);
-                       goto again;
-               }
        }
  
        /*
  out:
        rcu_read_unlock();
  
-       return ret;
+       return folio_batch_count(fbatch);
+ }
+ EXPORT_SYMBOL(filemap_get_folios);
+ static inline
+ bool folio_more_pages(struct folio *folio, pgoff_t index, pgoff_t max)
+ {
+       if (!folio_test_large(folio) || folio_test_hugetlb(folio))
+               return false;
+       if (index >= max)
+               return false;
+       return index < folio->index + folio_nr_pages(folio) - 1;
  }
  
  /**
@@@ -2413,7 -2381,7 +2385,7 @@@ retry
        rcu_read_unlock();
  }
  
- static int filemap_read_folio(struct file *file, struct address_space *mapping,
+ static int filemap_read_folio(struct file *file, filler_t filler,
                struct folio *folio)
  {
        int error;
         */
        folio_clear_error(folio);
        /* Start the actual read. The read will unlock the page. */
-       error = mapping->a_ops->read_folio(file, folio);
+       error = filler(file, folio);
        if (error)
                return error;
  
                return error;
        if (folio_test_uptodate(folio))
                return 0;
-       shrink_readahead_size_eio(&file->f_ra);
+       if (file)
+               shrink_readahead_size_eio(&file->f_ra);
        return -EIO;
  }
  
@@@ -2507,7 -2476,8 +2480,8 @@@ static int filemap_update_page(struct k
        if (iocb->ki_flags & (IOCB_NOIO | IOCB_NOWAIT | IOCB_WAITQ))
                goto unlock;
  
-       error = filemap_read_folio(iocb->ki_filp, mapping, folio);
+       error = filemap_read_folio(iocb->ki_filp, mapping->a_ops->read_folio,
+                       folio);
        goto unlock_mapping;
  unlock:
        folio_unlock(folio);
@@@ -2550,7 -2520,7 +2524,7 @@@ static int filemap_create_folio(struct 
        if (error)
                goto error;
  
-       error = filemap_read_folio(file, mapping, folio);
+       error = filemap_read_folio(file, mapping->a_ops->read_folio, folio);
        if (error)
                goto error;
  
@@@ -3234,7 -3204,7 +3208,7 @@@ page_not_uptodate
         * and we need to check for errors.
         */
        fpin = maybe_unlock_mmap_for_io(vmf, fpin);
-       error = filemap_read_folio(file, mapping, folio);
+       error = filemap_read_folio(file, mapping->a_ops->read_folio, folio);
        if (fpin)
                goto out_retry;
        folio_put(folio);
@@@ -3524,20 -3494,7 +3498,7 @@@ repeat
                        return ERR_PTR(err);
                }
  
- filler:
-               err = filler(file, folio);
-               if (err < 0) {
-                       folio_put(folio);
-                       return ERR_PTR(err);
-               }
-               folio_wait_locked(folio);
-               if (!folio_test_uptodate(folio)) {
-                       folio_put(folio);
-                       return ERR_PTR(-EIO);
-               }
-               goto out;
+               goto filler;
        }
        if (folio_test_uptodate(folio))
                goto out;
                goto out;
        }
  
-       /*
-        * A previous I/O error may have been due to temporary
-        * failures.
-        * Clear page error before actual read, PG_error will be
-        * set again if read page fails.
-        */
-       folio_clear_error(folio);
-       goto filler;
+ filler:
+       err = filemap_read_folio(file, filler, folio);
+       if (err) {
+               folio_put(folio);
+               if (err == AOP_TRUNCATED_PAGE)
+                       goto repeat;
+               return ERR_PTR(err);
+       }
  
  out:
        folio_mark_accessed(folio);
diff --combined mm/hugetlb.c
@@@ -4788,13 -4788,8 +4788,13 @@@ again
                         * sharing with another vma.
                         */
                        ;
 -              } else if (unlikely(is_hugetlb_entry_migration(entry) ||
 -                                  is_hugetlb_entry_hwpoisoned(entry))) {
 +              } else if (unlikely(is_hugetlb_entry_hwpoisoned(entry))) {
 +                      bool uffd_wp = huge_pte_uffd_wp(entry);
 +
 +                      if (!userfaultfd_wp(dst_vma) && uffd_wp)
 +                              entry = huge_pte_clear_uffd_wp(entry);
 +                      set_huge_pte_at(dst, addr, dst_pte, entry);
 +              } else if (unlikely(is_hugetlb_entry_migration(entry))) {
                        swp_entry_t swp_entry = pte_to_swp_entry(entry);
                        bool uffd_wp = huge_pte_uffd_wp(entry);
  
@@@ -5419,19 -5414,25 +5419,25 @@@ static bool hugetlbfs_pagecache_present
  int huge_add_to_page_cache(struct page *page, struct address_space *mapping,
                           pgoff_t idx)
  {
+       struct folio *folio = page_folio(page);
        struct inode *inode = mapping->host;
        struct hstate *h = hstate_inode(inode);
-       int err = add_to_page_cache(page, mapping, idx, GFP_KERNEL);
+       int err;
  
-       if (err)
+       __folio_set_locked(folio);
+       err = __filemap_add_folio(mapping, folio, idx, GFP_KERNEL, NULL);
+       if (unlikely(err)) {
+               __folio_clear_locked(folio);
                return err;
+       }
        ClearHPageRestoreReserve(page);
  
        /*
-        * set page dirty so that it will not be removed from cache/file
+        * mark folio dirty so that it will not be removed from cache/file
         * by non-hugetlbfs specific code paths.
         */
-       set_page_dirty(page);
+       folio_mark_dirty(folio);
  
        spin_lock(&inode->i_lock);
        inode->i_blocks += blocks_per_huge_page(h);
@@@ -5952,7 -5953,6 +5958,7 @@@ int hugetlb_mcopy_atomic_pte(struct mm_
  
                page = alloc_huge_page(dst_vma, dst_addr, 0);
                if (IS_ERR(page)) {
 +                      put_page(*pagep);
                        ret = -ENOMEM;
                        *pagep = NULL;
                        goto out;
diff --combined mm/memory-failure.c
@@@ -69,8 -69,6 +69,8 @@@ int sysctl_memory_failure_recovery __re
  
  atomic_long_t num_poisoned_pages __read_mostly = ATOMIC_LONG_INIT(0);
  
 +static bool hw_memory_failure __read_mostly = false;
 +
  static bool __page_handle_poison(struct page *page)
  {
        int ret;
@@@ -1770,9 -1768,6 +1770,9 @@@ int memory_failure(unsigned long pfn, i
  
        mutex_lock(&mf_mutex);
  
 +      if (!(flags & MF_SW_SIMULATED))
 +              hw_memory_failure = true;
 +
        p = pfn_to_online_page(pfn);
        if (!p) {
                res = arch_memory_failure(pfn, flags);
@@@ -1940,7 -1935,7 +1940,7 @@@ try_again
  
        /*
         * Now take care of user space mappings.
-        * Abort on fail: __delete_from_page_cache() assumes unmapped page.
+        * Abort on fail: __filemap_remove_folio() assumes unmapped page.
         */
        if (!hwpoison_user_mappings(p, pfn, flags, p)) {
                action_result(pfn, MF_MSG_UNMAP_FAILED, MF_IGNORED);
@@@ -2108,13 -2103,6 +2108,13 @@@ int unpoison_memory(unsigned long pfn
  
        mutex_lock(&mf_mutex);
  
 +      if (hw_memory_failure) {
 +              unpoison_pr_info("Unpoison: Disabled after HW memory failure %#lx\n",
 +                               pfn, &unpoison_rs);
 +              ret = -EOPNOTSUPP;
 +              goto unlock_mutex;
 +      }
 +
        if (!PageHWPoison(p)) {
                unpoison_pr_info("Unpoison: Page was already unpoisoned %#lx\n",
                                 pfn, &unpoison_rs);
diff --combined mm/secretmem.c
@@@ -55,28 -55,22 +55,28 @@@ static vm_fault_t secretmem_fault(struc
        gfp_t gfp = vmf->gfp_mask;
        unsigned long addr;
        struct page *page;
 +      vm_fault_t ret;
        int err;
  
        if (((loff_t)vmf->pgoff << PAGE_SHIFT) >= i_size_read(inode))
                return vmf_error(-EINVAL);
  
 +      filemap_invalidate_lock_shared(mapping);
 +
  retry:
        page = find_lock_page(mapping, offset);
        if (!page) {
                page = alloc_page(gfp | __GFP_ZERO);
 -              if (!page)
 -                      return VM_FAULT_OOM;
 +              if (!page) {
 +                      ret = VM_FAULT_OOM;
 +                      goto out;
 +              }
  
                err = set_direct_map_invalid_noflush(page);
                if (err) {
                        put_page(page);
 -                      return vmf_error(err);
 +                      ret = vmf_error(err);
 +                      goto out;
                }
  
                __SetPageUptodate(page);
@@@ -92,8 -86,7 +92,8 @@@
                        if (err == -EEXIST)
                                goto retry;
  
 -                      return vmf_error(err);
 +                      ret = vmf_error(err);
 +                      goto out;
                }
  
                addr = (unsigned long)page_address(page);
        }
  
        vmf->page = page;
 -      return VM_FAULT_LOCKED;
 +      ret = VM_FAULT_LOCKED;
 +
 +out:
 +      filemap_invalidate_unlock_shared(mapping);
 +      return ret;
  }
  
  static const struct vm_operations_struct secretmem_vm_ops = {
@@@ -144,14 -133,8 +144,8 @@@ static const struct file_operations sec
        .mmap           = secretmem_mmap,
  };
  
- static bool secretmem_isolate_page(struct page *page, isolate_mode_t mode)
- {
-       return false;
- }
- static int secretmem_migratepage(struct address_space *mapping,
-                                struct page *newpage, struct page *page,
-                                enum migrate_mode mode)
+ static int secretmem_migrate_folio(struct address_space *mapping,
+               struct folio *dst, struct folio *src, enum migrate_mode mode)
  {
        return -EBUSY;
  }
@@@ -165,28 -148,19 +159,27 @@@ static void secretmem_free_folio(struc
  const struct address_space_operations secretmem_aops = {
        .dirty_folio    = noop_dirty_folio,
        .free_folio     = secretmem_free_folio,
-       .migratepage    = secretmem_migratepage,
-       .isolate_page   = secretmem_isolate_page,
+       .migrate_folio  = secretmem_migrate_folio,
  };
  
  static int secretmem_setattr(struct user_namespace *mnt_userns,
                             struct dentry *dentry, struct iattr *iattr)
  {
        struct inode *inode = d_inode(dentry);
 +      struct address_space *mapping = inode->i_mapping;
        unsigned int ia_valid = iattr->ia_valid;
 +      int ret;
 +
 +      filemap_invalidate_lock(mapping);
  
        if ((ia_valid & ATTR_SIZE) && inode->i_size)
 -              return -EINVAL;
 +              ret = -EINVAL;
 +      else
 +              ret = simple_setattr(mnt_userns, dentry, iattr);
  
 -      return simple_setattr(mnt_userns, dentry, iattr);
 +      filemap_invalidate_unlock(mapping);
 +
 +      return ret;
  }
  
  static const struct inode_operations secretmem_iops = {
@@@ -199,20 -173,11 +192,20 @@@ static struct file *secretmem_file_crea
  {
        struct file *file = ERR_PTR(-ENOMEM);
        struct inode *inode;
 +      const char *anon_name = "[secretmem]";
 +      const struct qstr qname = QSTR_INIT(anon_name, strlen(anon_name));
 +      int err;
  
        inode = alloc_anon_inode(secretmem_mnt->mnt_sb);
        if (IS_ERR(inode))
                return ERR_CAST(inode);
  
 +      err = security_inode_init_security_anon(inode, &qname, NULL);
 +      if (err) {
 +              file = ERR_PTR(err);
 +              goto err_free_inode;
 +      }
 +
        file = alloc_file_pseudo(inode, secretmem_mnt, "secretmem",
                                 O_RDWR, &secretmem_fops);
        if (IS_ERR(file))
diff --combined mm/shmem.c
@@@ -392,7 -392,7 +392,7 @@@ void shmem_uncharge(struct inode *inode
        struct shmem_inode_info *info = SHMEM_I(inode);
        unsigned long flags;
  
-       /* nrpages adjustment done by __delete_from_page_cache() or caller */
+       /* nrpages adjustment done by __filemap_remove_folio() or caller */
  
        spin_lock_irqsave(&info->lock, flags);
        info->alloced -= pages;
@@@ -693,7 -693,7 +693,7 @@@ static unsigned long shmem_unused_huge_
  #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
  
  /*
-  * Like add_to_page_cache_locked, but error if expected item has gone.
+  * Like filemap_add_folio, but error if expected item has gone.
   */
  static int shmem_add_to_page_cache(struct folio *folio,
                                   struct address_space *mapping,
@@@ -867,18 -867,17 +867,17 @@@ unsigned long shmem_swap_usage(struct v
   */
  void shmem_unlock_mapping(struct address_space *mapping)
  {
-       struct pagevec pvec;
+       struct folio_batch fbatch;
        pgoff_t index = 0;
  
-       pagevec_init(&pvec);
+       folio_batch_init(&fbatch);
        /*
         * Minor point, but we might as well stop if someone else SHM_LOCKs it.
         */
-       while (!mapping_unevictable(mapping)) {
-               if (!pagevec_lookup(&pvec, mapping, &index))
-                       break;
-               check_move_unevictable_pages(&pvec);
-               pagevec_release(&pvec);
+       while (!mapping_unevictable(mapping) &&
+              filemap_get_folios(mapping, &index, ~0UL, &fbatch)) {
+               check_move_unevictable_folios(&fbatch);
+               folio_batch_release(&fbatch);
                cond_resched();
        }
  }
@@@ -3392,7 -3391,7 +3391,7 @@@ static int shmem_parse_one(struct fs_co
                break;
        case Opt_nr_blocks:
                ctx->blocks = memparse(param->string, &rest);
 -              if (*rest)
 +              if (*rest || ctx->blocks > S64_MAX)
                        goto bad_value;
                ctx->seen |= SHMEM_SEEN_BLOCKS;
                break;
@@@ -3514,7 -3513,10 +3513,7 @@@ static int shmem_reconfigure(struct fs_
  
        raw_spin_lock(&sbinfo->stat_lock);
        inodes = sbinfo->max_inodes - sbinfo->free_inodes;
 -      if (ctx->blocks > S64_MAX) {
 -              err = "Number of blocks too large";
 -              goto out;
 -      }
 +
        if ((ctx->seen & SHMEM_SEEN_BLOCKS) && ctx->blocks) {
                if (!sbinfo->max_blocks) {
                        err = "Cannot retroactively limit size";
@@@ -3799,7 -3801,7 +3798,7 @@@ const struct address_space_operations s
        .write_end      = shmem_write_end,
  #endif
  #ifdef CONFIG_MIGRATION
-       .migratepage    = migrate_page,
+       .migrate_folio  = migrate_folio,
  #endif
        .error_remove_page = shmem_error_remove_page,
  };
diff --combined mm/swap.c
+++ b/mm/swap.c
@@@ -881,7 -881,7 +881,7 @@@ void lru_cache_disable(void
         * lru_disable_count = 0 will have exited the critical
         * section when synchronize_rcu() returns.
         */
 -      synchronize_rcu();
 +      synchronize_rcu_expedited();
  #ifdef CONFIG_SMP
        __lru_add_drain_all(true);
  #else
@@@ -1086,35 -1086,6 +1086,6 @@@ void folio_batch_remove_exceptionals(st
        fbatch->nr = j;
  }
  
- /**
-  * pagevec_lookup_range - gang pagecache lookup
-  * @pvec:     Where the resulting pages are placed
-  * @mapping:  The address_space to search
-  * @start:    The starting page index
-  * @end:      The final page index
-  *
-  * pagevec_lookup_range() will search for & return a group of up to PAGEVEC_SIZE
-  * pages in the mapping starting from index @start and upto index @end
-  * (inclusive).  The pages are placed in @pvec.  pagevec_lookup() takes a
-  * reference against the pages in @pvec.
-  *
-  * The search returns a group of mapping-contiguous pages with ascending
-  * indexes.  There may be holes in the indices due to not-present pages. We
-  * also update @start to index the next page for the traversal.
-  *
-  * pagevec_lookup_range() returns the number of pages which were found. If this
-  * number is smaller than PAGEVEC_SIZE, the end of specified range has been
-  * reached.
-  */
- unsigned pagevec_lookup_range(struct pagevec *pvec,
-               struct address_space *mapping, pgoff_t *start, pgoff_t end)
- {
-       pvec->nr = find_get_pages_range(mapping, start, end, PAGEVEC_SIZE,
-                                       pvec->pages);
-       return pagevec_count(pvec);
- }
- EXPORT_SYMBOL(pagevec_lookup_range);
  unsigned pagevec_lookup_range_tag(struct pagevec *pvec,
                struct address_space *mapping, pgoff_t *index, pgoff_t end,
                xa_mark_t tag)