From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Tue, 11 Oct 2022 00:53:04 +0000 (-0700)
Subject: Merge tag 'mm-stable-2022-10-08' of git://git.kernel.org/pub/scm/linux/kernel/git... 
X-Git-Tag: v6.1-rc5~217
X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=27bc50fc90647bbf7b734c3fc306a5e61350da53;p=platform%2Fkernel%2Flinux-starfive.git

Merge tag 'mm-stable-2022-10-08' of git://git./linux/kernel/git/akpm/mm

Pull MM updates from Andrew Morton:

 - Yu Zhao's Multi-Gen LRU patches are here. They've been under test in
   linux-next for a couple of months without, to my knowledge, any
   negative reports (or any positive ones, come to that).

 - Also the Maple Tree from Liam Howlett. An overlapping range-based
   tree for vmas. It it apparently slightly more efficient in its own
   right, but is mainly targeted at enabling work to reduce mmap_lock
   contention.

   Liam has identified a number of other tree users in the kernel which
   could be beneficially onverted to mapletrees.

   Yu Zhao has identified a hard-to-hit but "easy to fix" lockdep splat
   at [1]. This has yet to be addressed due to Liam's unfortunately
   timed vacation. He is now back and we'll get this fixed up.

 - Dmitry Vyukov introduces KMSAN: the Kernel Memory Sanitizer. It uses
   clang-generated instrumentation to detect used-unintialized bugs down
   to the single bit level.

   KMSAN keeps finding bugs. New ones, as well as the legacy ones.

 - Yang Shi adds a userspace mechanism (madvise) to induce a collapse of
   memory into THPs.

 - Zach O'Keefe has expanded Yang Shi's madvise(MADV_COLLAPSE) to
   support file/shmem-backed pages.

 - userfaultfd updates from Axel Rasmussen

 - zsmalloc cleanups from Alexey Romanov

 - cleanups from Miaohe Lin: vmscan, hugetlb_cgroup, hugetlb and
   memory-failure

 - Huang Ying adds enhancements to NUMA balancing memory tiering mode's
   page promotion, with a new way of detecting hot pages.

 - memcg updates from Shakeel Butt: charging optimizations and reduced
   memory consumption.

 - memcg cleanups from Kairui Song.

 - memcg fixes and cleanups from Johannes Weiner.

 - Vishal Moola provides more folio conversions

 - Zhang Yi removed ll_rw_block() :(

 - migration enhancements from Peter Xu

 - migration error-path bugfixes from Huang Ying

 - Aneesh Kumar added ability for a device driver to alter the memory
   tiering promotion paths. For optimizations by PMEM drivers, DRM
   drivers, etc.

 - vma merging improvements from Jakub Matěn.

 - NUMA hinting cleanups from David Hildenbrand.

 - xu xin added aditional userspace visibility into KSM merging
   activity.

 - THP & KSM code consolidation from Qi Zheng.

 - more folio work from Matthew Wilcox.

 - KASAN updates from Andrey Konovalov.

 - DAMON cleanups from Kaixu Xia.

 - DAMON work from SeongJae Park: fixes, cleanups.

 - hugetlb sysfs cleanups from Muchun Song.

 - Mike Kravetz fixes locking issues in hugetlbfs and in hugetlb core.

Link: https://lkml.kernel.org/r/CAOUHufZabH85CeUN-MEMgL8gJGzJEWUrkiM58JkTbBhh-jew0Q@mail.gmail.com [1]

* tag 'mm-stable-2022-10-08' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm: (555 commits)
  hugetlb: allocate vma lock for all sharable vmas
  hugetlb: take hugetlb vma_lock when clearing vma_lock->vma pointer
  hugetlb: fix vma lock handling during split vma and range unmapping
  mglru: mm/vmscan.c: fix imprecise comments
  mm/mglru: don't sync disk for each aging cycle
  mm: memcontrol: drop dead CONFIG_MEMCG_SWAP config symbol
  mm: memcontrol: use do_memsw_account() in a few more places
  mm: memcontrol: deprecate swapaccounting=0 mode
  mm: memcontrol: don't allocate cgroup swap arrays when memcg is disabled
  mm/secretmem: remove reduntant return value
  mm/hugetlb: add available_huge_pages() func
  mm: remove unused inline functions from include/linux/mm_inline.h
  selftests/vm: add selftest for MADV_COLLAPSE of uffd-minor memory
  selftests/vm: add file/shmem MADV_COLLAPSE selftest for cleared pmd
  selftests/vm: add thp collapse shmem testing
  selftests/vm: add thp collapse file and tmpfs testing
  selftests/vm: modularize thp collapse memory operations
  selftests/vm: dedup THP helpers
  mm/khugepaged: add tracepoint to hpage_collapse_scan_file()
  mm/madvise: add file and shmem support to MADV_COLLAPSE
  ...
---

27bc50fc90647bbf7b734c3fc306a5e61350da53
diff --cc crypto/Kconfig
index 2589ad5,182fb81..d779667
--- a/crypto/Kconfig
+++ b/crypto/Kconfig
@@@ -1391,28 -2158,6 +1391,30 @@@ endmen
  config CRYPTO_HASH_INFO
  	bool
  
++if !KMSAN # avoid false positives from assembly
 +if ARM
 +source "arch/arm/crypto/Kconfig"
 +endif
 +if ARM64
 +source "arch/arm64/crypto/Kconfig"
 +endif
 +if MIPS
 +source "arch/mips/crypto/Kconfig"
 +endif
 +if PPC
 +source "arch/powerpc/crypto/Kconfig"
 +endif
 +if S390
 +source "arch/s390/crypto/Kconfig"
 +endif
 +if SPARC
 +source "arch/sparc/crypto/Kconfig"
 +endif
 +if X86
 +source "arch/x86/crypto/Kconfig"
 +endif
++endif
 +
  source "drivers/crypto/Kconfig"
  source "crypto/asymmetric_keys/Kconfig"
  source "certs/Kconfig"
diff --cc drivers/firmware/efi/efi.c
index 11857af,042a3ef..9624735
--- a/drivers/firmware/efi/efi.c
+++ b/drivers/firmware/efi/efi.c
@@@ -56,10 -55,9 +56,10 @@@ EXPORT_SYMBOL(efi)
  unsigned long __ro_after_init efi_rng_seed = EFI_INVALID_TABLE_ADDR;
  static unsigned long __initdata mem_reserve = EFI_INVALID_TABLE_ADDR;
  static unsigned long __initdata rt_prop = EFI_INVALID_TABLE_ADDR;
 +static unsigned long __initdata initrd = EFI_INVALID_TABLE_ADDR;
  
  struct mm_struct efi_mm = {
- 	.mm_rb			= RB_ROOT,
+ 	.mm_mt			= MTREE_INIT_EXT(mm_mt, MM_MT_FLAGS, efi_mm.mmap_lock),
  	.mm_users		= ATOMIC_INIT(2),
  	.mm_count		= ATOMIC_INIT(1),
  	.write_protect_seq      = SEQCNT_ZERO(efi_mm.write_protect_seq),
diff --cc fs/exec.c
index de084e4,afe55d0..349a5da
--- a/fs/exec.c
+++ b/fs/exec.c
@@@ -1022,9 -1029,12 +1025,8 @@@ static int exec_mmap(struct mm_struct *
  	activate_mm(active_mm, mm);
  	if (IS_ENABLED(CONFIG_ARCH_WANT_IRQS_OFF_ACTIVATE_MM))
  		local_irq_enable();
- 	tsk->mm->vmacache_seqnum = 0;
- 	vmacache_flush(tsk);
  	task_unlock(tsk);
+ 	lru_gen_use_mm(mm);
 -
 -	if (vfork)
 -		timens_on_fork(tsk->nsproxy, tsk);
 -
  	if (old_mm) {
  		mmap_read_unlock(old_mm);
  		BUG_ON(active_mm != old_mm);
diff --cc include/linux/fortify-string.h
index b62c90c,6c8a1a2..4029fe3
--- a/include/linux/fortify-string.h
+++ b/include/linux/fortify-string.h
@@@ -325,11 -282,13 +325,13 @@@ __FORTIFY_INLINE void fortify_memset_ch
  })
  
  /*
 - * __builtin_object_size() must be captured here to avoid evaluating argument
 - * side-effects further into the macro layers.
 + * __struct_size() vs __member_size() must be captured here to avoid
 + * evaluating argument side-effects further into the macro layers.
   */
+ #ifndef CONFIG_KMSAN
  #define memset(p, c, s) __fortify_memset_chk(p, c, s,			\
 -		__builtin_object_size(p, 0), __builtin_object_size(p, 1))
 +		__struct_size(p), __member_size(p))
+ #endif
  
  /*
   * To make sure the compiler can enforce protection against buffer overflows,
diff --cc include/linux/hugetlb.h
index 1ec1535,7b70aa93..95fda85
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@@ -16,8 -16,9 +16,9 @@@
  struct ctl_table;
  struct user_struct;
  struct mmu_gather;
+ struct node;
  
 -#ifndef is_hugepd
 +#ifndef CONFIG_ARCH_HAS_HUGEPD
  typedef struct { unsigned long pd; } hugepd_t;
  #define is_hugepd(hugepd) (0)
  #define __hugepd(x) ((hugepd_t) { (x) })
diff --cc include/linux/nodemask.h
index 0c45fb0,e66742d..378956c
--- a/include/linux/nodemask.h
+++ b/include/linux/nodemask.h
@@@ -504,11 -505,21 +505,20 @@@ static inline int num_node_state(enum n
  static inline int node_random(const nodemask_t *maskp)
  {
  #if defined(CONFIG_NUMA) && (MAX_NUMNODES > 1)
- 	int w, bit = NUMA_NO_NODE;
+ 	int w, bit;
  
  	w = nodes_weight(*maskp);
- 	if (w)
+ 	switch (w) {
+ 	case 0:
+ 		bit = NUMA_NO_NODE;
+ 		break;
+ 	case 1:
+ 		bit = first_node(*maskp);
+ 		break;
+ 	default:
 -		bit = bitmap_ord_to_pos(maskp->bits,
 -					get_random_int() % w, MAX_NUMNODES);
 +		bit = find_nth_bit(maskp->bits, MAX_NUMNODES, get_random_int() % w);
+ 		break;
+ 	}
  	return bit;
  #else
  	return 0;
diff --cc mm/slab_common.c
index 9ad97ae,1799664..33b1886
--- a/mm/slab_common.c
+++ b/mm/slab_common.c
@@@ -915,155 -886,6 +915,156 @@@ void __init create_kmalloc_caches(slab_
  	/* Kmalloc array is now usable */
  	slab_state = UP;
  }
 +
 +void free_large_kmalloc(struct folio *folio, void *object)
 +{
 +	unsigned int order = folio_order(folio);
 +
 +	if (WARN_ON_ONCE(order == 0))
 +		pr_warn_once("object pointer: 0x%p\n", object);
 +
 +	kmemleak_free(object);
 +	kasan_kfree_large(object);
++	kmsan_kfree_large(object);
 +
 +	mod_lruvec_page_state(folio_page(folio, 0), NR_SLAB_UNRECLAIMABLE_B,
 +			      -(PAGE_SIZE << order));
 +	__free_pages(folio_page(folio, 0), order);
 +}
 +
 +static void *__kmalloc_large_node(size_t size, gfp_t flags, int node);
 +static __always_inline
 +void *__do_kmalloc_node(size_t size, gfp_t flags, int node, unsigned long caller)
 +{
 +	struct kmem_cache *s;
 +	void *ret;
 +
 +	if (unlikely(size > KMALLOC_MAX_CACHE_SIZE)) {
 +		ret = __kmalloc_large_node(size, flags, node);
 +		trace_kmalloc(_RET_IP_, ret, size,
 +			      PAGE_SIZE << get_order(size), flags, node);
 +		return ret;
 +	}
 +
 +	s = kmalloc_slab(size, flags);
 +
 +	if (unlikely(ZERO_OR_NULL_PTR(s)))
 +		return s;
 +
 +	ret = __kmem_cache_alloc_node(s, flags, node, size, caller);
 +	ret = kasan_kmalloc(s, ret, size, flags);
 +	trace_kmalloc(_RET_IP_, ret, size, s->size, flags, node);
 +	return ret;
 +}
 +
 +void *__kmalloc_node(size_t size, gfp_t flags, int node)
 +{
 +	return __do_kmalloc_node(size, flags, node, _RET_IP_);
 +}
 +EXPORT_SYMBOL(__kmalloc_node);
 +
 +void *__kmalloc(size_t size, gfp_t flags)
 +{
 +	return __do_kmalloc_node(size, flags, NUMA_NO_NODE, _RET_IP_);
 +}
 +EXPORT_SYMBOL(__kmalloc);
 +
 +void *__kmalloc_node_track_caller(size_t size, gfp_t flags,
 +				  int node, unsigned long caller)
 +{
 +	return __do_kmalloc_node(size, flags, node, caller);
 +}
 +EXPORT_SYMBOL(__kmalloc_node_track_caller);
 +
 +/**
 + * kfree - free previously allocated memory
 + * @object: pointer returned by kmalloc.
 + *
 + * If @object is NULL, no operation is performed.
 + *
 + * Don't free memory not originally allocated by kmalloc()
 + * or you will run into trouble.
 + */
 +void kfree(const void *object)
 +{
 +	struct folio *folio;
 +	struct slab *slab;
 +	struct kmem_cache *s;
 +
 +	trace_kfree(_RET_IP_, object);
 +
 +	if (unlikely(ZERO_OR_NULL_PTR(object)))
 +		return;
 +
 +	folio = virt_to_folio(object);
 +	if (unlikely(!folio_test_slab(folio))) {
 +		free_large_kmalloc(folio, (void *)object);
 +		return;
 +	}
 +
 +	slab = folio_slab(folio);
 +	s = slab->slab_cache;
 +	__kmem_cache_free(s, (void *)object, _RET_IP_);
 +}
 +EXPORT_SYMBOL(kfree);
 +
 +/**
 + * __ksize -- Report full size of underlying allocation
 + * @objp: pointer to the object
 + *
 + * This should only be used internally to query the true size of allocations.
 + * It is not meant to be a way to discover the usable size of an allocation
 + * after the fact. Instead, use kmalloc_size_roundup(). Using memory beyond
 + * the originally requested allocation size may trigger KASAN, UBSAN_BOUNDS,
 + * and/or FORTIFY_SOURCE.
 + *
 + * Return: size of the actual memory used by @objp in bytes
 + */
 +size_t __ksize(const void *object)
 +{
 +	struct folio *folio;
 +
 +	if (unlikely(object == ZERO_SIZE_PTR))
 +		return 0;
 +
 +	folio = virt_to_folio(object);
 +
 +	if (unlikely(!folio_test_slab(folio))) {
 +		if (WARN_ON(folio_size(folio) <= KMALLOC_MAX_CACHE_SIZE))
 +			return 0;
 +		if (WARN_ON(object != folio_address(folio)))
 +			return 0;
 +		return folio_size(folio);
 +	}
 +
 +	return slab_ksize(folio_slab(folio)->slab_cache);
 +}
 +
 +#ifdef CONFIG_TRACING
 +void *kmalloc_trace(struct kmem_cache *s, gfp_t gfpflags, size_t size)
 +{
 +	void *ret = __kmem_cache_alloc_node(s, gfpflags, NUMA_NO_NODE,
 +					    size, _RET_IP_);
 +
 +	trace_kmalloc(_RET_IP_, ret, size, s->size, gfpflags, NUMA_NO_NODE);
 +
 +	ret = kasan_kmalloc(s, ret, size, gfpflags);
 +	return ret;
 +}
 +EXPORT_SYMBOL(kmalloc_trace);
 +
 +void *kmalloc_node_trace(struct kmem_cache *s, gfp_t gfpflags,
 +			 int node, size_t size)
 +{
 +	void *ret = __kmem_cache_alloc_node(s, gfpflags, node, size, _RET_IP_);
 +
 +	trace_kmalloc(_RET_IP_, ret, size, s->size, gfpflags, node);
 +
 +	ret = kasan_kmalloc(s, ret, size, gfpflags);
 +	return ret;
 +}
 +EXPORT_SYMBOL(kmalloc_node_trace);
 +#endif /* !CONFIG_TRACING */
  #endif /* !CONFIG_SLOB */
  
  gfp_t kmalloc_fix_flags(gfp_t flags)
@@@ -1100,33 -920,22 +1101,34 @@@ static void *__kmalloc_large_node(size_
  		mod_lruvec_page_state(page, NR_SLAB_UNRECLAIMABLE_B,
  				      PAGE_SIZE << order);
  	}
 -	ret = kasan_kmalloc_large(ret, size, flags);
 -	/* As ret might get tagged, call kmemleak hook after KASAN. */
 -	kmemleak_alloc(ret, size, 1, flags);
 +
 +	ptr = kasan_kmalloc_large(ptr, size, flags);
 +	/* As ptr might get tagged, call kmemleak hook after KASAN. */
 +	kmemleak_alloc(ptr, size, 1, flags);
++	kmsan_kmalloc_large(ptr, size, flags);
 +
 +	return ptr;
 +}
 +
 +void *kmalloc_large(size_t size, gfp_t flags)
 +{
 +	void *ret = __kmalloc_large_node(size, flags, NUMA_NO_NODE);
 +
 +	trace_kmalloc(_RET_IP_, ret, size, PAGE_SIZE << get_order(size),
 +		      flags, NUMA_NO_NODE);
  	return ret;
  }
 -EXPORT_SYMBOL(kmalloc_order);
 +EXPORT_SYMBOL(kmalloc_large);
  
 -#ifdef CONFIG_TRACING
 -void *kmalloc_order_trace(size_t size, gfp_t flags, unsigned int order)
 +void *kmalloc_large_node(size_t size, gfp_t flags, int node)
  {
 -	void *ret = kmalloc_order(size, flags, order);
 -	trace_kmalloc(_RET_IP_, ret, NULL, size, PAGE_SIZE << order, flags);
 +	void *ret = __kmalloc_large_node(size, flags, node);
 +
 +	trace_kmalloc(_RET_IP_, ret, size, PAGE_SIZE << get_order(size),
 +		      flags, node);
  	return ret;
  }
 -EXPORT_SYMBOL(kmalloc_order_trace);
 -#endif
 +EXPORT_SYMBOL(kmalloc_large_node);
  
  #ifdef CONFIG_SLAB_FREELIST_RANDOM
  /* Randomize a generic freelist */
diff --cc mm/slub.c
index 2a6b3f3,ce8310e..96dd392
--- a/mm/slub.c
+++ b/mm/slub.c
@@@ -5864,12 -5953,10 +5903,13 @@@ static char *create_unique_id(struct km
  		*p++ = 'A';
  	if (p != name + 1)
  		*p++ = '-';
 -	p += sprintf(p, "%07u", s->size);
 +	p += snprintf(p, ID_STR_LENGTH - (p - name), "%07u", s->size);
  
 -	BUG_ON(p > name + ID_STR_LENGTH - 1);
 +	if (WARN_ON(p > name + ID_STR_LENGTH - 1)) {
 +		kfree(name);
 +		return ERR_PTR(-EINVAL);
 +	}
+ 	kmsan_unpoison_memory(name, p - name);
  	return name;
  }