Merge tag 'folio-5.18c' of git://git.infradead.org/users/willy/pagecache
authorLinus Torvalds <torvalds@linux-foundation.org>
Wed, 23 Mar 2022 00:03:12 +0000 (17:03 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Wed, 23 Mar 2022 00:03:12 +0000 (17:03 -0700)
Pull folio updates from Matthew Wilcox:

 - Rewrite how munlock works to massively reduce the contention on
   i_mmap_rwsem (Hugh Dickins):

     https://lore.kernel.org/linux-mm/8e4356d-9622-a7f0-b2c-f116b5f2efea@google.com/

 - Sort out the page refcount mess for ZONE_DEVICE pages (Christoph
   Hellwig):

     https://lore.kernel.org/linux-mm/20220210072828.2930359-1-hch@lst.de/

 - Convert GUP to use folios and make pincount available for order-1
   pages. (Matthew Wilcox)

 - Convert a few more truncation functions to use folios (Matthew
   Wilcox)

 - Convert page_vma_mapped_walk to use PFNs instead of pages (Matthew
   Wilcox)

 - Convert rmap_walk to use folios (Matthew Wilcox)

 - Convert most of shrink_page_list() to use a folio (Matthew Wilcox)

 - Add support for creating large folios in readahead (Matthew Wilcox)

* tag 'folio-5.18c' of git://git.infradead.org/users/willy/pagecache: (114 commits)
  mm/damon: minor cleanup for damon_pa_young
  selftests/vm/transhuge-stress: Support file-backed PMD folios
  mm/filemap: Support VM_HUGEPAGE for file mappings
  mm/readahead: Switch to page_cache_ra_order
  mm/readahead: Align file mappings for non-DAX
  mm/readahead: Add large folio readahead
  mm: Support arbitrary THP sizes
  mm: Make large folios depend on THP
  mm: Fix READ_ONLY_THP warning
  mm/filemap: Allow large folios to be added to the page cache
  mm: Turn can_split_huge_page() into can_split_folio()
  mm/vmscan: Convert pageout() to take a folio
  mm/vmscan: Turn page_check_references() into folio_check_references()
  mm/vmscan: Account large folios correctly
  mm/vmscan: Optimise shrink_page_list for non-PMD-sized folios
  mm/vmscan: Free non-shmem folios without splitting them
  mm/rmap: Constify the rmap_walk_control argument
  mm/rmap: Convert rmap_walk() to take a folio
  mm: Turn page_anon_vma() into folio_anon_vma()
  mm/rmap: Turn page_lock_anon_vma_read() into folio_lock_anon_vma_read()
  ...

41 files changed:
1  2 
arch/arm64/mm/mmu.c
arch/parisc/include/asm/pgtable.h
drivers/nvme/host/pci.c
drivers/nvme/target/io-cmd-bdev.c
fs/Kconfig
fs/nfsd/filecache.c
fs/nfsd/vfs.c
include/linux/fs.h
include/linux/hugetlb.h
include/linux/mm.h
include/linux/mm_inline.h
include/linux/mm_types.h
include/linux/pagemap.h
include/linux/swap.h
mm/Kconfig
mm/damon/paddr.c
mm/filemap.c
mm/gup.c
mm/huge_memory.c
mm/hugetlb.c
mm/internal.h
mm/ksm.c
mm/madvise.c
mm/memcontrol.c
mm/memory-failure.c
mm/memory.c
mm/memory_hotplug.c
mm/memremap.c
mm/migrate.c
mm/mlock.c
mm/mmap.c
mm/mmzone.c
mm/oom_kill.c
mm/page_alloc.c
mm/readahead.c
mm/rmap.c
mm/swap.c
mm/userfaultfd.c
mm/util.c
mm/vmscan.c
mm/workingset.c

Simple merge
Simple merge
Simple merge
Simple merge
diff --cc fs/Kconfig
Simple merge
Simple merge
diff --cc fs/nfsd/vfs.c
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
diff --cc mm/Kconfig
Simple merge
  #include <linux/swap.h>
  
  #include "../internal.h"
 -#include "prmtv-common.h"
 +#include "ops-common.h"
  
- static bool __damon_pa_mkold(struct page *page, struct vm_area_struct *vma,
+ static bool __damon_pa_mkold(struct folio *folio, struct vm_area_struct *vma,
                unsigned long addr, void *arg)
  {
-       struct page_vma_mapped_walk pvmw = {
-               .page = page,
-               .vma = vma,
-               .address = addr,
-       };
+       DEFINE_FOLIO_VMA_WALK(pvmw, folio, vma, addr, 0);
  
        while (page_vma_mapped_walk(&pvmw)) {
                addr = pvmw.address;
diff --cc mm/filemap.c
Simple merge
diff --cc mm/gup.c
Simple merge
@@@ -3210,12 -3148,9 +3158,10 @@@ void remove_migration_pmd(struct page_v
        if (PageAnon(new))
                page_add_anon_rmap(new, vma, mmun_start, true);
        else
-               page_add_file_rmap(new, true);
+               page_add_file_rmap(new, vma, true);
        set_pmd_at(mm, mmun_start, pvmw->pmd, pmde);
-       if ((vma->vm_flags & VM_LOCKED) && !PageDoubleMap(new))
-               mlock_vma_page(new);
 +
 +      /* No need to invalidate - it was non-present before */
        update_mmu_cache_pmd(vma, address, pvmw->pmd);
  }
  #endif
diff --cc mm/hugetlb.c
Simple merge
diff --cc mm/internal.h
@@@ -155,16 -155,12 +155,18 @@@ extern unsigned long highest_memmap_pfn
  #define MAX_RECLAIM_RETRIES 16
  
  /*
 + * in mm/early_ioremap.c
 + */
 +pgprot_t __init early_memremap_pgprot_adjust(resource_size_t phys_addr,
 +                                      unsigned long size, pgprot_t prot);
 +
 +/*
   * in mm/vmscan.c:
   */
- extern int isolate_lru_page(struct page *page);
- extern void putback_lru_page(struct page *page);
+ int isolate_lru_page(struct page *page);
+ int folio_isolate_lru(struct folio *folio);
+ void putback_lru_page(struct page *page);
+ void folio_putback_lru(struct folio *folio);
  extern void reclaim_throttle(pg_data_t *pgdat, enum vmscan_throttle_state reason);
  
  /*
@@@ -713,6 -752,11 +747,13 @@@ void vunmap_range_noflush(unsigned lon
  int numa_migrate_prep(struct page *page, struct vm_area_struct *vma,
                      unsigned long addr, int page_nid, int *flags);
  
+ void free_zone_device_page(struct page *page);
+ /*
+  * mm/gup.c
+  */
+ struct folio *try_grab_folio(struct page *page, int refs, unsigned int flags);
 +DECLARE_PER_CPU(struct per_cpu_nodestat, boot_nodestats);
 +
  #endif        /* __MM_INTERNAL_H */
diff --cc mm/ksm.c
Simple merge
diff --cc mm/madvise.c
Simple merge
diff --cc mm/memcontrol.c
@@@ -7111,10 -7174,9 +7106,10 @@@ void mem_cgroup_swapout(struct folio *f
         * important here to have the interrupts disabled because it is the
         * only synchronisation we have for updating the per-CPU variables.
         */
 -      VM_BUG_ON(!irqs_disabled());
 +      memcg_stats_lock();
        mem_cgroup_charge_statistics(memcg, -nr_entries);
-       memcg_check_events(memcg, page_to_nid(page));
 +      memcg_stats_unlock();
+       memcg_check_events(memcg, folio_nid(folio));
  
        css_put(&memcg->css);
  }
@@@ -1411,22 -1413,26 +1413,22 @@@ static bool hwpoison_user_mappings(stru
        if (kill)
                collect_procs(hpage, &tokill, flags & MF_ACTION_REQUIRED);
  
 -      if (!PageHuge(hpage)) {
 -              try_to_unmap(folio, ttu);
 +      if (PageHuge(hpage) && !PageAnon(hpage)) {
 +              /*
 +               * For hugetlb pages in shared mappings, try_to_unmap
 +               * could potentially call huge_pmd_unshare.  Because of
 +               * this, take semaphore in write mode here and set
 +               * TTU_RMAP_LOCKED to indicate we have taken the lock
 +               * at this higher level.
 +               */
 +              mapping = hugetlb_page_mapping_lock_write(hpage);
 +              if (mapping) {
-                       try_to_unmap(hpage, ttu|TTU_RMAP_LOCKED);
++                      try_to_unmap(folio, ttu|TTU_RMAP_LOCKED);
 +                      i_mmap_unlock_write(mapping);
 +              } else
 +                      pr_info("Memory failure: %#lx: could not lock mapping for mapped huge page\n", pfn);
        } else {
-               try_to_unmap(hpage, ttu);
 -              if (!PageAnon(hpage)) {
 -                      /*
 -                       * For hugetlb pages in shared mappings, try_to_unmap
 -                       * could potentially call huge_pmd_unshare.  Because of
 -                       * this, take semaphore in write mode here and set
 -                       * TTU_RMAP_LOCKED to indicate we have taken the lock
 -                       * at this higher level.
 -                       */
 -                      mapping = hugetlb_page_mapping_lock_write(hpage);
 -                      if (mapping) {
 -                              try_to_unmap(folio, ttu|TTU_RMAP_LOCKED);
 -                              i_mmap_unlock_write(mapping);
 -                      } else
 -                              pr_info("Memory failure: %#lx: could not lock mapping for mapped huge page\n", pfn);
 -              } else {
 -                      try_to_unmap(folio, ttu);
 -              }
++              try_to_unmap(folio, ttu);
        }
  
        unmap_success = !page_mapped(hpage);
diff --cc mm/memory.c
@@@ -1403,32 -1388,32 +1400,32 @@@ again
                entry = pte_to_swp_entry(ptent);
                if (is_device_private_entry(entry) ||
                    is_device_exclusive_entry(entry)) {
 -                      struct page *page = pfn_swap_entry_to_page(entry);
 -
 -                      if (unlikely(zap_skip_check_mapping(details, page)))
 +                      page = pfn_swap_entry_to_page(entry);
 +                      if (unlikely(!should_zap_page(details, page)))
                                continue;
 -                      pte_clear_not_present_full(mm, addr, pte, tlb->fullmm);
                        rss[mm_counter(page)]--;
                        if (is_device_private_entry(entry))
-                               page_remove_rmap(page, false);
+                               page_remove_rmap(page, vma, false);
                        put_page(page);
 -                      continue;
 -              }
 -
 -              /* If details->check_mapping, we leave swap entries. */
 -              if (unlikely(details))
 -                      continue;
 -
 -              if (!non_swap_entry(entry))
 +              } else if (!non_swap_entry(entry)) {
 +                      /* Genuine swap entry, hence a private anon page */
 +                      if (!should_zap_cows(details))
 +                              continue;
                        rss[MM_SWAPENTS]--;
 -              else if (is_migration_entry(entry)) {
 -                      struct page *page;
 -
 +                      if (unlikely(!free_swap_and_cache(entry)))
 +                              print_bad_pte(vma, addr, ptent, NULL);
 +              } else if (is_migration_entry(entry)) {
                        page = pfn_swap_entry_to_page(entry);
 +                      if (!should_zap_page(details, page))
 +                              continue;
                        rss[mm_counter(page)]--;
 +              } else if (is_hwpoison_entry(entry)) {
 +                      if (!should_zap_cows(details))
 +                              continue;
 +              } else {
 +                      /* We should have covered all the swap entry types */
 +                      WARN_ON_ONCE(1);
                }
 -              if (unlikely(!free_swap_and_cache(entry)))
 -                      print_bad_pte(vma, addr, ptent, NULL);
                pte_clear_not_present_full(mm, addr, pte, tlb->fullmm);
        } while (pte++, addr += PAGE_SIZE, addr != end);
  
Simple merge
diff --cc mm/memremap.c
Simple merge
diff --cc mm/migrate.c
Simple merge
diff --cc mm/mlock.c
Simple merge
diff --cc mm/mmap.c
Simple merge
diff --cc mm/mmzone.c
Simple merge
diff --cc mm/oom_kill.c
Simple merge
diff --cc mm/page_alloc.c
Simple merge
diff --cc mm/readahead.c
Simple merge
diff --cc mm/rmap.c
Simple merge
diff --cc mm/swap.c
Simple merge
Simple merge
diff --cc mm/util.c
Simple merge
diff --cc mm/vmscan.c
@@@ -986,12 -985,23 +986,12 @@@ static inline int is_page_cache_freeabl
         * that isolated the page, the page cache and optional buffer
         * heads at page->private.
         */
-       int page_cache_pins = thp_nr_pages(page);
-       return page_count(page) - page_has_private(page) == 1 + page_cache_pins;
+       return folio_ref_count(folio) - folio_test_private(folio) ==
+               1 + folio_nr_pages(folio);
  }
  
 -static int may_write_to_inode(struct inode *inode)
 -{
 -      if (current->flags & PF_SWAPWRITE)
 -              return 1;
 -      if (!inode_write_congested(inode))
 -              return 1;
 -      if (inode_to_bdi(inode) == current->backing_dev_info)
 -              return 1;
 -      return 0;
 -}
 -
  /*
-  * We detected a synchronous write error writing a page out.  Probably
+  * We detected a synchronous write error writing a folio out.  Probably
   * -ENOSPC.  We need to propagate that into the address_space for a subsequent
   * fsync(), msync() or close().
   *
@@@ -1191,8 -1201,10 +1191,8 @@@ static pageout_t pageout(struct folio *
        }
        if (mapping->a_ops->writepage == NULL)
                return PAGE_ACTIVATE;
 -      if (!may_write_to_inode(mapping->host))
 -              return PAGE_KEEP;
  
-       if (clear_page_dirty_for_io(page)) {
+       if (folio_clear_dirty_for_io(folio)) {
                int res;
                struct writeback_control wbc = {
                        .sync_mode = WB_SYNC_NONE,
@@@ -1384,9 -1402,9 +1390,9 @@@ static enum page_references folio_check
  
        if (referenced_ptes) {
                /*
-                * All mapped pages start out with page table
+                * All mapped folios start out with page table
                 * references from the instantiating fault, so we need
-                * to look twice if a mapped file/anon page is used more
 -               * to look twice if a mapped file folio is used more
++               * to look twice if a mapped file/anon folio is used more
                 * than once.
                 *
                 * Mark it and spare it for another trip around the
@@@ -1566,8 -1586,10 +1574,8 @@@ retry
                 * end of the LRU a second time.
                 */
                mapping = page_mapping(page);
 -              if (((dirty || writeback) && mapping &&
 -                   inode_write_congested(mapping->host)) ||
 -                  (writeback && PageReclaim(page)))
 +              if (writeback && PageReclaim(page))
-                       stat->nr_congested++;
+                       stat->nr_congested += nr_pages;
  
                /*
                 * If a page at the tail of the LRU is under writeback, there
diff --cc mm/workingset.c
Simple merge