Merge tag 'mm-stable-2022-05-25' of git://git.kernel.org/pub/scm/linux/kernel/git...
authorLinus Torvalds <torvalds@linux-foundation.org>
Thu, 26 May 2022 19:32:41 +0000 (12:32 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Thu, 26 May 2022 19:32:41 +0000 (12:32 -0700)
Pull MM updates from Andrew Morton:
 "Almost all of MM here. A few things are still getting finished off,
  reviewed, etc.

   - Yang Shi has improved the behaviour of khugepaged collapsing of
     readonly file-backed transparent hugepages.

   - Johannes Weiner has arranged for zswap memory use to be tracked and
     managed on a per-cgroup basis.

   - Munchun Song adds a /proc knob ("hugetlb_optimize_vmemmap") for
     runtime enablement of the recent huge page vmemmap optimization
     feature.

   - Baolin Wang contributes a series to fix some issues around hugetlb
     pagetable invalidation.

   - Zhenwei Pi has fixed some interactions between hwpoisoned pages and
     virtualization.

   - Tong Tiangen has enabled the use of the presently x86-only
     page_table_check debugging feature on arm64 and riscv.

   - David Vernet has done some fixup work on the memcg selftests.

   - Peter Xu has taught userfaultfd to handle write protection faults
     against shmem- and hugetlbfs-backed files.

   - More DAMON development from SeongJae Park - adding online tuning of
     the feature and support for monitoring of fixed virtual address
     ranges. Also easier discovery of which monitoring operations are
     available.

   - Nadav Amit has done some optimization of TLB flushing during
     mprotect().

   - Neil Brown continues to labor away at improving our swap-over-NFS
     support.

   - David Hildenbrand has some fixes to anon page COWing versus
     get_user_pages().

   - Peng Liu fixed some errors in the core hugetlb code.

   - Joao Martins has reduced the amount of memory consumed by
     device-dax's compound devmaps.

   - Some cleanups of the arch-specific pagemap code from Anshuman
     Khandual.

   - Muchun Song has found and fixed some errors in the TLB flushing of
     transparent hugepages.

   - Roman Gushchin has done more work on the memcg selftests.

  ... and, of course, many smaller fixes and cleanups. Notably, the
  customary million cleanup serieses from Miaohe Lin"

* tag 'mm-stable-2022-05-25' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm: (381 commits)
  mm: kfence: use PAGE_ALIGNED helper
  selftests: vm: add the "settings" file with timeout variable
  selftests: vm: add "test_hmm.sh" to TEST_FILES
  selftests: vm: check numa_available() before operating "merge_across_nodes" in ksm_tests
  selftests: vm: add migration to the .gitignore
  selftests/vm/pkeys: fix typo in comment
  ksm: fix typo in comment
  selftests: vm: add process_mrelease tests
  Revert "mm/vmscan: never demote for memcg reclaim"
  mm/kfence: print disabling or re-enabling message
  include/trace/events/percpu.h: cleanup for "percpu: improve percpu_alloc_percpu event trace"
  include/trace/events/mmflags.h: cleanup for "tracing: incorrect gfp_t conversion"
  mm: fix a potential infinite loop in start_isolate_page_range()
  MAINTAINERS: add Muchun as co-maintainer for HugeTLB
  zram: fix Kconfig dependency warning
  mm/shmem: fix shmem folio swapoff hang
  cgroup: fix an error handling path in alloc_pagecache_max_30M()
  mm: damon: use HPAGE_PMD_SIZE
  tracing: incorrect isolate_mote_t cast in mm_vmscan_lru_isolate
  nodemask.h: fix compilation error with GCC12
  ...

55 files changed:
1  2 
Documentation/admin-guide/cgroup-v2.rst
Documentation/admin-guide/kernel-parameters.txt
Documentation/filesystems/locking.rst
Documentation/filesystems/proc.rst
Documentation/filesystems/vfs.rst
Documentation/vm/index.rst
MAINTAINERS
arch/arm64/Kconfig
arch/arm64/include/asm/hugetlb.h
arch/arm64/include/asm/pgtable.h
arch/arm64/mm/hugetlbpage.c
arch/powerpc/Kconfig
arch/riscv/Kconfig
arch/x86/Kconfig
arch/x86/include/asm/pgtable_types.h
arch/x86/mm/Makefile
arch/x86/mm/init_64.c
drivers/block/loop.c
drivers/block/zram/zram_drv.c
drivers/gpu/drm/ttm/ttm_bo_vm.c
fs/cifs/file.c
fs/hugetlbfs/inode.c
fs/nfs/file.c
include/linux/fs.h
include/linux/highmem-internal.h
include/linux/mm.h
include/linux/nfs_fs.h
include/linux/page-flags.h
include/linux/printk.h
include/linux/slab.h
include/trace/events/btrfs.h
init/Kconfig
kernel/events/uprobes.c
kernel/fork.c
mm/filemap.c
mm/gup.c
mm/huge_memory.c
mm/kfence/core.c
mm/kfence/kfence_test.c
mm/memory-failure.c
mm/memory.c
mm/migrate.c
mm/mremap.c
mm/page-writeback.c
mm/page_io.c
mm/page_owner.c
mm/shmem.c
mm/slab.c
mm/slab_common.c
mm/swapfile.c
mm/util.c
mm/vmscan.c
tools/testing/selftests/cgroup/cgroup_util.c
tools/testing/selftests/cgroup/cgroup_util.h
tools/testing/selftests/vm/Makefile

                        Documentation/admin-guide/mm/hugetlbpage.rst.
                        Format: size[KMG]
  
 +      hugetlb_cma=    [HW,CMA] The size of a CMA area used for allocation
 +                      of gigantic hugepages. Or using node format, the size
 +                      of a CMA area per node can be specified.
 +                      Format: nn[KMGTPE] or (node format)
 +                              <node>:nn[KMGTPE][,<node>:nn[KMGTPE]]
 +
 +                      Reserve a CMA area of given size and allocate gigantic
 +                      hugepages using the CMA allocator. If enabled, the
 +                      boot-time allocation of gigantic hugepages is skipped.
 +
        hugetlb_free_vmemmap=
-                       [KNL] Reguires CONFIG_HUGETLB_PAGE_FREE_VMEMMAP
+                       [KNL] Reguires CONFIG_HUGETLB_PAGE_OPTIMIZE_VMEMMAP
                        enabled.
                        Allows heavy hugetlb users to free up some more
                        memory (7 * PAGE_SIZE for each 2MB hugetlb page).
@@@ -258,19 -258,20 +258,20 @@@ prototypes:
        int (*launder_folio)(struct folio *);
        bool (*is_partially_uptodate)(struct folio *, size_t from, size_t count);
        int (*error_remove_page)(struct address_space *, struct page *);
-       int (*swap_activate)(struct file *);
+       int (*swap_activate)(struct swap_info_struct *sis, struct file *f, sector_t *span)
        int (*swap_deactivate)(struct file *);
+       int (*swap_rw)(struct kiocb *iocb, struct iov_iter *iter);
  
  locking rules:
 -      All except dirty_folio and freepage may block
 +      All except dirty_folio and free_folio may block
  
  ======================        ======================== =========      ===============
 -ops                   PageLocked(page)         i_rwsem        invalidate_lock
 +ops                   folio locked             i_rwsem        invalidate_lock
  ======================        ======================== =========      ===============
  writepage:            yes, unlocks (see below)
 -readpage:             yes, unlocks                            shared
 +read_folio:           yes, unlocks                            shared
  writepages:
 -dirty_folio           maybe
 +dirty_folio:          maybe
  readahead:            yes, unlocks                            shared
  write_begin:          locks the page           exclusive
  write_end:            yes, unlocks             exclusive
@@@ -287,15 -288,16 +288,16 @@@ is_partially_uptodate:  ye
  error_remove_page:    yes
  swap_activate:                no
  swap_deactivate:      no
+ swap_rw:              yes, unlocks
  ======================        ======================== =========      ===============
  
 -->write_begin(), ->write_end() and ->readpage() may be called from
 +->write_begin(), ->write_end() and ->read_folio() may be called from
  the request handler (/dev/loop).
  
 -->readpage() unlocks the page, either synchronously or via I/O
 +->read_folio() unlocks the folio, either synchronously or via I/O
  completion.
  
 -->readahead() unlocks the pages that I/O is attempted on like ->readpage().
 +->readahead() unlocks the folios that I/O is attempted on like ->read_folio().
  
  ->writepage() is used for two purposes: for "memory cleansing" and for
  "sync".  These are quite different operations and the behaviour may differ
Simple merge
@@@ -747,10 -747,11 +747,11 @@@ cache in your filesystem.  The followin
  
                bool (*is_partially_uptodate) (struct folio *, size_t from,
                                               size_t count);
 -              void (*is_dirty_writeback) (struct page *, bool *, bool *);
 +              void (*is_dirty_writeback)(struct folio *, bool *, bool *);
                int (*error_remove_page) (struct mapping *mapping, struct page *page);
-               int (*swap_activate)(struct file *);
+               int (*swap_activate)(struct swap_info_struct *sis, struct file *f, sector_t *span)
                int (*swap_deactivate)(struct file *);
+               int (*swap_rw)(struct kiocb *iocb, struct iov_iter *iter);
        };
  
  ``writepage``
Simple merge
diff --cc MAINTAINERS
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
@@@ -38,8 -38,8 +38,9 @@@ config RISC
        select ARCH_SUPPORTS_ATOMIC_RMW
        select ARCH_SUPPORTS_DEBUG_PAGEALLOC if MMU
        select ARCH_SUPPORTS_HUGETLBFS if MMU
+       select ARCH_SUPPORTS_PAGE_TABLE_CHECK
        select ARCH_USE_MEMTEST
 +      select ARCH_USE_QUEUED_RWLOCKS
        select ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT if MMU
        select ARCH_WANT_FRAME_POINTERS
        select ARCH_WANT_GENERAL_HUGETLB
Simple merge
Simple merge
@@@ -20,7 -20,7 +20,7 @@@ CFLAGS_REMOVE_mem_encrypt_identity.o  = 
  endif
  
  obj-y                         :=  init.o init_$(BITS).o fault.o ioremap.o extable.o mmap.o \
-                                   pgtable.o physaddr.o tlb.o cpu_entry_area.o maccess.o
 -                                  pgtable.o physaddr.o setup_nx.o tlb.o cpu_entry_area.o maccess.o pgprot.o
++                                  pgtable.o physaddr.o tlb.o cpu_entry_area.o maccess.o pgprot.o
  
  obj-y                         += pat/
  
Simple merge
Simple merge
Simple merge
Simple merge
diff --cc fs/cifs/file.c
Simple merge
Simple merge
diff --cc fs/nfs/file.c
@@@ -522,8 -537,7 +534,7 @@@ const struct address_space_operations n
        .write_begin = nfs_write_begin,
        .write_end = nfs_write_end,
        .invalidate_folio = nfs_invalidate_folio,
 -      .releasepage = nfs_release_page,
 +      .release_folio = nfs_release_folio,
-       .direct_IO = nfs_direct_IO,
  #ifdef CONFIG_MIGRATION
        .migratepage = nfs_migrate_page,
  #endif
Simple merge
@@@ -239,16 -234,23 +239,28 @@@ static inline void __kunmap_atomic(voi
  static inline unsigned int nr_free_highpages(void) { return 0; }
  static inline unsigned long totalhigh_pages(void) { return 0UL; }
  
 +static inline bool is_kmap_addr(const void *x)
 +{
 +      return false;
 +}
 +
  #endif /* CONFIG_HIGHMEM */
  
- /*
-  * Prevent people trying to call kunmap_atomic() as if it were kunmap()
-  * kunmap_atomic() should get the return value of kmap_atomic, not the page.
+ /**
+  * kunmap_atomic - Unmap the virtual address mapped by kmap_atomic() - deprecated!
+  * @__addr:       Virtual address to be unmapped
+  *
+  * Unmaps an address previously mapped by kmap_atomic() and re-enables
+  * pagefaults. Depending on PREEMP_RT configuration, re-enables also
+  * migration and preemption. Users should not count on these side effects.
+  *
+  * Mappings should be unmapped in the reverse order that they were mapped.
+  * See kmap_local_page() for details on nesting.
+  *
+  * @__addr can be any address within the mapped page, so there is no need
+  * to subtract any offset that has been added. In contrast to kunmap(),
+  * this function takes the address returned from kmap_atomic(), not the
+  * page passed to it. The compiler will warn you if you pass the page.
   */
  #define kunmap_atomic(__addr)                                 \
  do {                                                          \
Simple merge
Simple merge
Simple merge
Simple merge
@@@ -217,9 -210,21 +217,21 @@@ void kmem_dump_obj(void *object)
  #endif
  
  /*
 - * kmalloc and friends return ARCH_KMALLOC_MINALIGN aligned
 - * pointers. kmem_cache_alloc and friends return ARCH_SLAB_MINALIGN
 - * aligned pointers.
+  * Arches can define this function if they want to decide the minimum slab
+  * alignment at runtime. The value returned by the function must be a power
+  * of two and >= ARCH_SLAB_MINALIGN.
+  */
+ #ifndef arch_slab_minalign
+ static inline unsigned int arch_slab_minalign(void)
+ {
+       return ARCH_SLAB_MINALIGN;
+ }
+ #endif
+ /*
 + * kmem_cache_alloc and friends return pointers aligned to ARCH_SLAB_MINALIGN.
 + * kmalloc and friends return pointers aligned to both ARCH_KMALLOC_MINALIGN
 + * and ARCH_SLAB_MINALIGN, but here we only assume the former alignment.
   */
  #define __assume_kmalloc_alignment __assume_aligned(ARCH_KMALLOC_MINALIGN)
  #define __assume_slab_alignment __assume_aligned(ARCH_SLAB_MINALIGN)
Simple merge
diff --cc init/Kconfig
Simple merge
Simple merge
diff --cc kernel/fork.c
Simple merge
diff --cc mm/filemap.c
Simple merge
diff --cc mm/gup.c
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
diff --cc mm/memory.c
Simple merge
diff --cc mm/migrate.c
Simple merge
diff --cc mm/mremap.c
Simple merge
Simple merge
diff --cc mm/page_io.c
Simple merge
diff --cc mm/page_owner.c
Simple merge
diff --cc mm/shmem.c
Simple merge
diff --cc mm/slab.c
Simple merge
Simple merge
diff --cc mm/swapfile.c
Simple merge
diff --cc mm/util.c
Simple merge
diff --cc mm/vmscan.c
Simple merge
@@@ -180,28 -176,13 +176,25 @@@ long cg_read_lc(const char *cgroup, con
  int cg_write(const char *cgroup, const char *control, char *buf)
  {
        char path[PATH_MAX];
-       ssize_t len = strlen(buf);
+       ssize_t len = strlen(buf), ret;
  
        snprintf(path, sizeof(path), "%s/%s", cgroup, control);
-       if (write_text(path, buf, len) == len)
-               return 0;
-       return -1;
+       ret = write_text(path, buf, len);
+       return ret == len ? 0 : ret;
  }
  
 +int cg_write_numeric(const char *cgroup, const char *control, long value)
 +{
 +      char buf[64];
 +      int ret;
 +
 +      ret = sprintf(buf, "%lu", value);
 +      if (ret < 0)
 +              return ret;
 +
 +      return cg_write(cgroup, control, buf);
 +}
 +
  int cg_find_unified_root(char *root, size_t len)
  {
        char buf[10 * PAGE_SIZE];
Simple merge