Merge tag 'mm-stable-2022-10-08' of git://git.kernel.org/pub/scm/linux/kernel/git...
authorLinus Torvalds <torvalds@linux-foundation.org>
Tue, 11 Oct 2022 00:53:04 +0000 (17:53 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Tue, 11 Oct 2022 00:53:04 +0000 (17:53 -0700)
Pull MM updates from Andrew Morton:

 - Yu Zhao's Multi-Gen LRU patches are here. They've been under test in
   linux-next for a couple of months without, to my knowledge, any
   negative reports (or any positive ones, come to that).

 - Also the Maple Tree from Liam Howlett. An overlapping range-based
   tree for vmas. It it apparently slightly more efficient in its own
   right, but is mainly targeted at enabling work to reduce mmap_lock
   contention.

   Liam has identified a number of other tree users in the kernel which
   could be beneficially onverted to mapletrees.

   Yu Zhao has identified a hard-to-hit but "easy to fix" lockdep splat
   at [1]. This has yet to be addressed due to Liam's unfortunately
   timed vacation. He is now back and we'll get this fixed up.

 - Dmitry Vyukov introduces KMSAN: the Kernel Memory Sanitizer. It uses
   clang-generated instrumentation to detect used-unintialized bugs down
   to the single bit level.

   KMSAN keeps finding bugs. New ones, as well as the legacy ones.

 - Yang Shi adds a userspace mechanism (madvise) to induce a collapse of
   memory into THPs.

 - Zach O'Keefe has expanded Yang Shi's madvise(MADV_COLLAPSE) to
   support file/shmem-backed pages.

 - userfaultfd updates from Axel Rasmussen

 - zsmalloc cleanups from Alexey Romanov

 - cleanups from Miaohe Lin: vmscan, hugetlb_cgroup, hugetlb and
   memory-failure

 - Huang Ying adds enhancements to NUMA balancing memory tiering mode's
   page promotion, with a new way of detecting hot pages.

 - memcg updates from Shakeel Butt: charging optimizations and reduced
   memory consumption.

 - memcg cleanups from Kairui Song.

 - memcg fixes and cleanups from Johannes Weiner.

 - Vishal Moola provides more folio conversions

 - Zhang Yi removed ll_rw_block() :(

 - migration enhancements from Peter Xu

 - migration error-path bugfixes from Huang Ying

 - Aneesh Kumar added ability for a device driver to alter the memory
   tiering promotion paths. For optimizations by PMEM drivers, DRM
   drivers, etc.

 - vma merging improvements from Jakub MatÄ›n.

 - NUMA hinting cleanups from David Hildenbrand.

 - xu xin added aditional userspace visibility into KSM merging
   activity.

 - THP & KSM code consolidation from Qi Zheng.

 - more folio work from Matthew Wilcox.

 - KASAN updates from Andrey Konovalov.

 - DAMON cleanups from Kaixu Xia.

 - DAMON work from SeongJae Park: fixes, cleanups.

 - hugetlb sysfs cleanups from Muchun Song.

 - Mike Kravetz fixes locking issues in hugetlbfs and in hugetlb core.

Link: https://lkml.kernel.org/r/CAOUHufZabH85CeUN-MEMgL8gJGzJEWUrkiM58JkTbBhh-jew0Q@mail.gmail.com
* tag 'mm-stable-2022-10-08' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm: (555 commits)
  hugetlb: allocate vma lock for all sharable vmas
  hugetlb: take hugetlb vma_lock when clearing vma_lock->vma pointer
  hugetlb: fix vma lock handling during split vma and range unmapping
  mglru: mm/vmscan.c: fix imprecise comments
  mm/mglru: don't sync disk for each aging cycle
  mm: memcontrol: drop dead CONFIG_MEMCG_SWAP config symbol
  mm: memcontrol: use do_memsw_account() in a few more places
  mm: memcontrol: deprecate swapaccounting=0 mode
  mm: memcontrol: don't allocate cgroup swap arrays when memcg is disabled
  mm/secretmem: remove reduntant return value
  mm/hugetlb: add available_huge_pages() func
  mm: remove unused inline functions from include/linux/mm_inline.h
  selftests/vm: add selftest for MADV_COLLAPSE of uffd-minor memory
  selftests/vm: add file/shmem MADV_COLLAPSE selftest for cleared pmd
  selftests/vm: add thp collapse shmem testing
  selftests/vm: add thp collapse file and tmpfs testing
  selftests/vm: modularize thp collapse memory operations
  selftests/vm: dedup THP helpers
  mm/khugepaged: add tracepoint to hpage_collapse_scan_file()
  mm/madvise: add file and shmem support to MADV_COLLAPSE
  ...

104 files changed:
1  2 
Documentation/admin-guide/kernel-parameters.txt
Documentation/admin-guide/mm/damon/usage.rst
Documentation/core-api/index.rst
MAINTAINERS
Makefile
arch/Kconfig
arch/arm/Kconfig
arch/arm/configs/imx_v6_v7_defconfig
arch/arm/configs/milbeaut_m10v_defconfig
arch/arm/configs/oxnas_v6_defconfig
arch/arm/configs/pxa_defconfig
arch/arm/configs/sama7_defconfig
arch/arm/configs/sp7021_defconfig
arch/arm64/Kconfig
arch/arm64/kernel/vdso.c
arch/loongarch/Kconfig
arch/m68k/Kconfig.cpu
arch/mips/Kconfig
arch/mips/configs/db1xxx_defconfig
arch/powerpc/Kconfig
arch/powerpc/configs/85xx/ge_imp3a_defconfig
arch/powerpc/configs/powernv_defconfig
arch/powerpc/configs/pseries_defconfig
arch/powerpc/kernel/vdso.c
arch/x86/Kconfig
arch/x86/entry/vdso/Makefile
arch/x86/include/asm/uaccess.h
arch/x86/kernel/Makefile
arch/x86/kernel/dumpstack.c
arch/x86/mm/fault.c
arch/x86/mm/init_64.c
block/bio.c
block/blk.h
crypto/Kconfig
drivers/base/node.c
drivers/block/zram/zram_drv.c
drivers/firmware/efi/efi.c
drivers/firmware/efi/libstub/Makefile
drivers/net/Kconfig
drivers/virtio/virtio_ring.c
fs/btrfs/compression.c
fs/btrfs/extent_io.c
fs/btrfs/tests/extent-io-tests.c
fs/buffer.c
fs/coredump.c
fs/exec.c
fs/jbd2/journal.c
fs/jbd2/recovery.c
fs/namei.c
fs/ntfs3/inode.c
fs/posix_acl.c
fs/proc/base.c
fs/reiserfs/super.c
fs/userfaultfd.c
include/linux/buffer_head.h
include/linux/cgroup.h
include/linux/compiler-clang.h
include/linux/compiler_types.h
include/linux/fortify-string.h
include/linux/hugetlb.h
include/linux/memcontrol.h
include/linux/mmzone.h
include/linux/nodemask.h
include/linux/pagemap.h
include/linux/rmap.h
include/linux/sched.h
include/linux/slab.h
init/Kconfig
init/main.c
kernel/bpf/core.c
kernel/bpf/task_iter.c
kernel/cgroup/cgroup-internal.h
kernel/dma/mapping.c
kernel/events/core.c
kernel/exit.c
kernel/fork.c
kernel/sched/core.c
kernel/sched/debug.c
kernel/sched/fair.c
kernel/sched/sched.h
kernel/sysctl.c
lib/Kconfig.debug
lib/Makefile
mm/Kconfig
mm/compaction.c
mm/damon/sysfs.c
mm/filemap.c
mm/kfence/core.c
mm/khugepaged.c
mm/memcontrol.c
mm/page_alloc.c
mm/pagewalk.c
mm/rmap.c
mm/slab.h
mm/slab_common.c
mm/slub.c
mm/swapfile.c
mm/util.c
mm/vmstat.c
scripts/Makefile.lib
security/Kconfig.hardening
tools/objtool/check.c
tools/testing/selftests/vm/Makefile
tools/testing/selftests/vm/hmm-tests.c

Simple merge
diff --cc MAINTAINERS
Simple merge
diff --cc Makefile
Simple merge
diff --cc arch/Kconfig
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
diff --cc block/bio.c
Simple merge
diff --cc block/blk.h
Simple merge
diff --cc crypto/Kconfig
@@@ -1391,28 -2158,6 +1391,30 @@@ endmen
  config CRYPTO_HASH_INFO
        bool
  
++if !KMSAN # avoid false positives from assembly
 +if ARM
 +source "arch/arm/crypto/Kconfig"
 +endif
 +if ARM64
 +source "arch/arm64/crypto/Kconfig"
 +endif
 +if MIPS
 +source "arch/mips/crypto/Kconfig"
 +endif
 +if PPC
 +source "arch/powerpc/crypto/Kconfig"
 +endif
 +if S390
 +source "arch/s390/crypto/Kconfig"
 +endif
 +if SPARC
 +source "arch/sparc/crypto/Kconfig"
 +endif
 +if X86
 +source "arch/x86/crypto/Kconfig"
 +endif
++endif
 +
  source "drivers/crypto/Kconfig"
  source "crypto/asymmetric_keys/Kconfig"
  source "certs/Kconfig"
Simple merge
Simple merge
@@@ -56,10 -55,9 +56,10 @@@ EXPORT_SYMBOL(efi)
  unsigned long __ro_after_init efi_rng_seed = EFI_INVALID_TABLE_ADDR;
  static unsigned long __initdata mem_reserve = EFI_INVALID_TABLE_ADDR;
  static unsigned long __initdata rt_prop = EFI_INVALID_TABLE_ADDR;
 +static unsigned long __initdata initrd = EFI_INVALID_TABLE_ADDR;
  
  struct mm_struct efi_mm = {
-       .mm_rb                  = RB_ROOT,
+       .mm_mt                  = MTREE_INIT_EXT(mm_mt, MM_MT_FLAGS, efi_mm.mmap_lock),
        .mm_users               = ATOMIC_INIT(2),
        .mm_count               = ATOMIC_INIT(1),
        .write_protect_seq      = SEQCNT_ZERO(efi_mm.write_protect_seq),
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
diff --cc fs/buffer.c
Simple merge
diff --cc fs/coredump.c
Simple merge
diff --cc fs/exec.c
+++ b/fs/exec.c
@@@ -1022,9 -1029,12 +1025,8 @@@ static int exec_mmap(struct mm_struct *
        activate_mm(active_mm, mm);
        if (IS_ENABLED(CONFIG_ARCH_WANT_IRQS_OFF_ACTIVATE_MM))
                local_irq_enable();
-       tsk->mm->vmacache_seqnum = 0;
-       vmacache_flush(tsk);
        task_unlock(tsk);
 -
 -      if (vfork)
 -              timens_on_fork(tsk->nsproxy, tsk);
 -
+       lru_gen_use_mm(mm);
        if (old_mm) {
                mmap_read_unlock(old_mm);
                BUG_ON(active_mm != old_mm);
Simple merge
Simple merge
diff --cc fs/namei.c
Simple merge
Simple merge
diff --cc fs/posix_acl.c
Simple merge
diff --cc fs/proc/base.c
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
@@@ -325,11 -282,13 +325,13 @@@ __FORTIFY_INLINE void fortify_memset_ch
  })
  
  /*
 - * __builtin_object_size() must be captured here to avoid evaluating argument
 - * side-effects further into the macro layers.
 + * __struct_size() vs __member_size() must be captured here to avoid
 + * evaluating argument side-effects further into the macro layers.
   */
+ #ifndef CONFIG_KMSAN
  #define memset(p, c, s) __fortify_memset_chk(p, c, s,                 \
 -              __builtin_object_size(p, 0), __builtin_object_size(p, 1))
 +              __struct_size(p), __member_size(p))
+ #endif
  
  /*
   * To make sure the compiler can enforce protection against buffer overflows,
@@@ -16,8 -16,9 +16,9 @@@
  struct ctl_table;
  struct user_struct;
  struct mmu_gather;
+ struct node;
  
 -#ifndef is_hugepd
 +#ifndef CONFIG_ARCH_HAS_HUGEPD
  typedef struct { unsigned long pd; } hugepd_t;
  #define is_hugepd(hugepd) (0)
  #define __hugepd(x) ((hugepd_t) { (x) })
Simple merge
Simple merge
@@@ -504,11 -505,21 +505,20 @@@ static inline int num_node_state(enum n
  static inline int node_random(const nodemask_t *maskp)
  {
  #if defined(CONFIG_NUMA) && (MAX_NUMNODES > 1)
-       int w, bit = NUMA_NO_NODE;
+       int w, bit;
  
        w = nodes_weight(*maskp);
-       if (w)
+       switch (w) {
+       case 0:
+               bit = NUMA_NO_NODE;
+               break;
+       case 1:
+               bit = first_node(*maskp);
+               break;
+       default:
 -              bit = bitmap_ord_to_pos(maskp->bits,
 -                                      get_random_int() % w, MAX_NUMNODES);
 +              bit = find_nth_bit(maskp->bits, MAX_NUMNODES, get_random_int() % w);
+               break;
+       }
        return bit;
  #else
        return 0;
Simple merge
Simple merge
Simple merge
Simple merge
diff --cc init/Kconfig
Simple merge
diff --cc init/main.c
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
diff --cc kernel/exit.c
Simple merge
diff --cc kernel/fork.c
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
diff --cc kernel/sysctl.c
Simple merge
Simple merge
diff --cc lib/Makefile
Simple merge
diff --cc mm/Kconfig
Simple merge
diff --cc mm/compaction.c
Simple merge
Simple merge
diff --cc mm/filemap.c
Simple merge
Simple merge
diff --cc mm/khugepaged.c
Simple merge
diff --cc mm/memcontrol.c
Simple merge
diff --cc mm/page_alloc.c
Simple merge
diff --cc mm/pagewalk.c
Simple merge
diff --cc mm/rmap.c
Simple merge
diff --cc mm/slab.h
Simple merge
@@@ -915,155 -886,6 +915,156 @@@ void __init create_kmalloc_caches(slab_
        /* Kmalloc array is now usable */
        slab_state = UP;
  }
 +
 +void free_large_kmalloc(struct folio *folio, void *object)
 +{
 +      unsigned int order = folio_order(folio);
 +
 +      if (WARN_ON_ONCE(order == 0))
 +              pr_warn_once("object pointer: 0x%p\n", object);
 +
 +      kmemleak_free(object);
 +      kasan_kfree_large(object);
++      kmsan_kfree_large(object);
 +
 +      mod_lruvec_page_state(folio_page(folio, 0), NR_SLAB_UNRECLAIMABLE_B,
 +                            -(PAGE_SIZE << order));
 +      __free_pages(folio_page(folio, 0), order);
 +}
 +
 +static void *__kmalloc_large_node(size_t size, gfp_t flags, int node);
 +static __always_inline
 +void *__do_kmalloc_node(size_t size, gfp_t flags, int node, unsigned long caller)
 +{
 +      struct kmem_cache *s;
 +      void *ret;
 +
 +      if (unlikely(size > KMALLOC_MAX_CACHE_SIZE)) {
 +              ret = __kmalloc_large_node(size, flags, node);
 +              trace_kmalloc(_RET_IP_, ret, size,
 +                            PAGE_SIZE << get_order(size), flags, node);
 +              return ret;
 +      }
 +
 +      s = kmalloc_slab(size, flags);
 +
 +      if (unlikely(ZERO_OR_NULL_PTR(s)))
 +              return s;
 +
 +      ret = __kmem_cache_alloc_node(s, flags, node, size, caller);
 +      ret = kasan_kmalloc(s, ret, size, flags);
 +      trace_kmalloc(_RET_IP_, ret, size, s->size, flags, node);
 +      return ret;
 +}
 +
 +void *__kmalloc_node(size_t size, gfp_t flags, int node)
 +{
 +      return __do_kmalloc_node(size, flags, node, _RET_IP_);
 +}
 +EXPORT_SYMBOL(__kmalloc_node);
 +
 +void *__kmalloc(size_t size, gfp_t flags)
 +{
 +      return __do_kmalloc_node(size, flags, NUMA_NO_NODE, _RET_IP_);
 +}
 +EXPORT_SYMBOL(__kmalloc);
 +
 +void *__kmalloc_node_track_caller(size_t size, gfp_t flags,
 +                                int node, unsigned long caller)
 +{
 +      return __do_kmalloc_node(size, flags, node, caller);
 +}
 +EXPORT_SYMBOL(__kmalloc_node_track_caller);
 +
 +/**
 + * kfree - free previously allocated memory
 + * @object: pointer returned by kmalloc.
 + *
 + * If @object is NULL, no operation is performed.
 + *
 + * Don't free memory not originally allocated by kmalloc()
 + * or you will run into trouble.
 + */
 +void kfree(const void *object)
 +{
 +      struct folio *folio;
 +      struct slab *slab;
 +      struct kmem_cache *s;
 +
 +      trace_kfree(_RET_IP_, object);
 +
 +      if (unlikely(ZERO_OR_NULL_PTR(object)))
 +              return;
 +
 +      folio = virt_to_folio(object);
 +      if (unlikely(!folio_test_slab(folio))) {
 +              free_large_kmalloc(folio, (void *)object);
 +              return;
 +      }
 +
 +      slab = folio_slab(folio);
 +      s = slab->slab_cache;
 +      __kmem_cache_free(s, (void *)object, _RET_IP_);
 +}
 +EXPORT_SYMBOL(kfree);
 +
 +/**
 + * __ksize -- Report full size of underlying allocation
 + * @objp: pointer to the object
 + *
 + * This should only be used internally to query the true size of allocations.
 + * It is not meant to be a way to discover the usable size of an allocation
 + * after the fact. Instead, use kmalloc_size_roundup(). Using memory beyond
 + * the originally requested allocation size may trigger KASAN, UBSAN_BOUNDS,
 + * and/or FORTIFY_SOURCE.
 + *
 + * Return: size of the actual memory used by @objp in bytes
 + */
 +size_t __ksize(const void *object)
 +{
 +      struct folio *folio;
 +
 +      if (unlikely(object == ZERO_SIZE_PTR))
 +              return 0;
 +
 +      folio = virt_to_folio(object);
 +
 +      if (unlikely(!folio_test_slab(folio))) {
 +              if (WARN_ON(folio_size(folio) <= KMALLOC_MAX_CACHE_SIZE))
 +                      return 0;
 +              if (WARN_ON(object != folio_address(folio)))
 +                      return 0;
 +              return folio_size(folio);
 +      }
 +
 +      return slab_ksize(folio_slab(folio)->slab_cache);
 +}
 +
 +#ifdef CONFIG_TRACING
 +void *kmalloc_trace(struct kmem_cache *s, gfp_t gfpflags, size_t size)
 +{
 +      void *ret = __kmem_cache_alloc_node(s, gfpflags, NUMA_NO_NODE,
 +                                          size, _RET_IP_);
 +
 +      trace_kmalloc(_RET_IP_, ret, size, s->size, gfpflags, NUMA_NO_NODE);
 +
 +      ret = kasan_kmalloc(s, ret, size, gfpflags);
 +      return ret;
 +}
 +EXPORT_SYMBOL(kmalloc_trace);
 +
 +void *kmalloc_node_trace(struct kmem_cache *s, gfp_t gfpflags,
 +                       int node, size_t size)
 +{
 +      void *ret = __kmem_cache_alloc_node(s, gfpflags, node, size, _RET_IP_);
 +
 +      trace_kmalloc(_RET_IP_, ret, size, s->size, gfpflags, node);
 +
 +      ret = kasan_kmalloc(s, ret, size, gfpflags);
 +      return ret;
 +}
 +EXPORT_SYMBOL(kmalloc_node_trace);
 +#endif /* !CONFIG_TRACING */
  #endif /* !CONFIG_SLOB */
  
  gfp_t kmalloc_fix_flags(gfp_t flags)
@@@ -1100,33 -920,22 +1101,34 @@@ static void *__kmalloc_large_node(size_
                mod_lruvec_page_state(page, NR_SLAB_UNRECLAIMABLE_B,
                                      PAGE_SIZE << order);
        }
 -      ret = kasan_kmalloc_large(ret, size, flags);
 -      /* As ret might get tagged, call kmemleak hook after KASAN. */
 -      kmemleak_alloc(ret, size, 1, flags);
 +
 +      ptr = kasan_kmalloc_large(ptr, size, flags);
 +      /* As ptr might get tagged, call kmemleak hook after KASAN. */
 +      kmemleak_alloc(ptr, size, 1, flags);
++      kmsan_kmalloc_large(ptr, size, flags);
 +
 +      return ptr;
 +}
 +
 +void *kmalloc_large(size_t size, gfp_t flags)
 +{
 +      void *ret = __kmalloc_large_node(size, flags, NUMA_NO_NODE);
 +
 +      trace_kmalloc(_RET_IP_, ret, size, PAGE_SIZE << get_order(size),
 +                    flags, NUMA_NO_NODE);
        return ret;
  }
 -EXPORT_SYMBOL(kmalloc_order);
 +EXPORT_SYMBOL(kmalloc_large);
  
 -#ifdef CONFIG_TRACING
 -void *kmalloc_order_trace(size_t size, gfp_t flags, unsigned int order)
 +void *kmalloc_large_node(size_t size, gfp_t flags, int node)
  {
 -      void *ret = kmalloc_order(size, flags, order);
 -      trace_kmalloc(_RET_IP_, ret, NULL, size, PAGE_SIZE << order, flags);
 +      void *ret = __kmalloc_large_node(size, flags, node);
 +
 +      trace_kmalloc(_RET_IP_, ret, size, PAGE_SIZE << get_order(size),
 +                    flags, node);
        return ret;
  }
 -EXPORT_SYMBOL(kmalloc_order_trace);
 -#endif
 +EXPORT_SYMBOL(kmalloc_large_node);
  
  #ifdef CONFIG_SLAB_FREELIST_RANDOM
  /* Randomize a generic freelist */
diff --cc mm/slub.c
+++ b/mm/slub.c
@@@ -5864,12 -5953,10 +5903,13 @@@ static char *create_unique_id(struct km
                *p++ = 'A';
        if (p != name + 1)
                *p++ = '-';
 -      p += sprintf(p, "%07u", s->size);
 +      p += snprintf(p, ID_STR_LENGTH - (p - name), "%07u", s->size);
  
 -      BUG_ON(p > name + ID_STR_LENGTH - 1);
 +      if (WARN_ON(p > name + ID_STR_LENGTH - 1)) {
 +              kfree(name);
 +              return ERR_PTR(-EINVAL);
 +      }
+       kmsan_unpoison_memory(name, p - name);
        return name;
  }
  
diff --cc mm/swapfile.c
Simple merge
diff --cc mm/util.c
Simple merge
diff --cc mm/vmstat.c
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge