Merge branch 'akpm' (patches from Andrew)

author Linus Torvalds <torvalds@linux-foundation.org>

Sat, 12 Feb 2022 16:57:37 +0000 (08:57 -0800)

committer Linus Torvalds <torvalds@linux-foundation.org>

Sat, 12 Feb 2022 16:57:37 +0000 (08:57 -0800)
author Linus Torvalds <torvalds@linux-foundation.org>
Sat, 12 Feb 2022 16:57:37 +0000 (08:57 -0800)
committer Linus Torvalds <torvalds@linux-foundation.org>
Sat, 12 Feb 2022 16:57:37 +0000 (08:57 -0800)
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c

index 605017e..9e11e6f 100644 (file)
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -1117,7 +1117,7 @@ out_free_interp:
                          * without MAP_FIXED nor MAP_FIXED_NOREPLACE).
                          */
                         alignment = maximum_alignment(elf_phdata, elf_ex->e_phnum);
-                       if (alignment > ELF_MIN_ALIGN) {
+                       if (interpreter || alignment > ELF_MIN_ALIGN) {
                                 load_bias = ELF_ET_DYN_BASE;
                                 if (current->flags & PF_RANDOMIZE)
                                         load_bias += arch_mmap_rnd();
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c

index 18f8c3a..6e97ed7 100644 (file)
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -440,7 +440,8 @@ static void smaps_page_accumulate(struct mem_size_stats *mss,
  }
  
  static void smaps_account(struct mem_size_stats *mss, struct page *page,
-               bool compound, bool young, bool dirty, bool locked)
+               bool compound, bool young, bool dirty, bool locked,
+               bool migration)
  {
         int i, nr = compound ? compound_nr(page) : 1;
         unsigned long size = nr * PAGE_SIZE;
@@ -467,8 +468,15 @@ static void smaps_account(struct mem_size_stats *mss, struct page *page,
          * page_count(page) == 1 guarantees the page is mapped exactly once.
          * If any subpage of the compound page mapped with PTE it would elevate
          * page_count().
+        *
+        * The page_mapcount() is called to get a snapshot of the mapcount.
+        * Without holding the page lock this snapshot can be slightly wrong as
+        * we cannot always read the mapcount atomically.  It is not safe to
+        * call page_mapcount() even with PTL held if the page is not mapped,
+        * especially for migration entries.  Treat regular migration entries
+        * as mapcount == 1.
          */
-       if (page_count(page) == 1) {
+       if ((page_count(page) == 1) || migration) {
                 smaps_page_accumulate(mss, page, size, size << PSS_SHIFT, dirty,
                         locked, true);
                 return;
@@ -517,6 +525,7 @@ static void smaps_pte_entry(pte_t *pte, unsigned long addr,
         struct vm_area_struct *vma = walk->vma;
         bool locked = !!(vma->vm_flags & VM_LOCKED);
         struct page *page = NULL;
+       bool migration = false;
  
         if (pte_present(*pte)) {
                 page = vm_normal_page(vma, addr, *pte);
@@ -536,8 +545,11 @@ static void smaps_pte_entry(pte_t *pte, unsigned long addr,
                         } else {
                                 mss->swap_pss += (u64)PAGE_SIZE << PSS_SHIFT;
                         }
-               } else if (is_pfn_swap_entry(swpent))
+               } else if (is_pfn_swap_entry(swpent)) {
+                       if (is_migration_entry(swpent))
+                               migration = true;
                         page = pfn_swap_entry_to_page(swpent);
+               }
         } else {
                 smaps_pte_hole_lookup(addr, walk);
                 return;
@@ -546,7 +558,8 @@ static void smaps_pte_entry(pte_t *pte, unsigned long addr,
         if (!page)
                 return;
  
-       smaps_account(mss, page, false, pte_young(*pte), pte_dirty(*pte), locked);
+       smaps_account(mss, page, false, pte_young(*pte), pte_dirty(*pte),
+                     locked, migration);
  }
  
  #ifdef CONFIG_TRANSPARENT_HUGEPAGE
@@ -557,6 +570,7 @@ static void smaps_pmd_entry(pmd_t *pmd, unsigned long addr,
         struct vm_area_struct *vma = walk->vma;
         bool locked = !!(vma->vm_flags & VM_LOCKED);
         struct page *page = NULL;
+       bool migration = false;
  
         if (pmd_present(*pmd)) {
                 /* FOLL_DUMP will return -EFAULT on huge zero page */
@@ -564,8 +578,10 @@ static void smaps_pmd_entry(pmd_t *pmd, unsigned long addr,
         } else if (unlikely(thp_migration_supported() && is_swap_pmd(*pmd))) {
                 swp_entry_t entry = pmd_to_swp_entry(*pmd);
  
-               if (is_migration_entry(entry))
+               if (is_migration_entry(entry)) {
+                       migration = true;
                         page = pfn_swap_entry_to_page(entry);
+               }
         }
         if (IS_ERR_OR_NULL(page))
                 return;
@@ -577,7 +593,9 @@ static void smaps_pmd_entry(pmd_t *pmd, unsigned long addr,
                 /* pass */;
         else
                 mss->file_thp += HPAGE_PMD_SIZE;
-       smaps_account(mss, page, true, pmd_young(*pmd), pmd_dirty(*pmd), locked);
+
+       smaps_account(mss, page, true, pmd_young(*pmd), pmd_dirty(*pmd),
+                     locked, migration);
  }
  #else
  static void smaps_pmd_entry(pmd_t *pmd, unsigned long addr,
@@ -1378,6 +1396,7 @@ static pagemap_entry_t pte_to_pagemap_entry(struct pagemapread *pm,
  {
         u64 frame = 0, flags = 0;
         struct page *page = NULL;
+       bool migration = false;
  
         if (pte_present(pte)) {
                 if (pm->show_pfn)
@@ -1399,13 +1418,14 @@ static pagemap_entry_t pte_to_pagemap_entry(struct pagemapread *pm,
                         frame = swp_type(entry) |
                                 (swp_offset(entry) << MAX_SWAPFILES_SHIFT);
                 flags |= PM_SWAP;
+               migration = is_migration_entry(entry);
                 if (is_pfn_swap_entry(entry))
                         page = pfn_swap_entry_to_page(entry);
         }
  
         if (page && !PageAnon(page))
                 flags |= PM_FILE;
-       if (page && page_mapcount(page) == 1)
+       if (page && !migration && page_mapcount(page) == 1)
                 flags |= PM_MMAP_EXCLUSIVE;
         if (vma->vm_flags & VM_SOFTDIRTY)
                 flags |= PM_SOFT_DIRTY;
@@ -1421,8 +1441,9 @@ static int pagemap_pmd_range(pmd_t *pmdp, unsigned long addr, unsigned long end,
         spinlock_t *ptl;
         pte_t *pte, *orig_pte;
         int err = 0;
-
  #ifdef CONFIG_TRANSPARENT_HUGEPAGE
+       bool migration = false;
+
         ptl = pmd_trans_huge_lock(pmdp, vma);
         if (ptl) {
                 u64 flags = 0, frame = 0;
@@ -1461,11 +1482,12 @@ static int pagemap_pmd_range(pmd_t *pmdp, unsigned long addr, unsigned long end,
                         if (pmd_swp_uffd_wp(pmd))
                                 flags |= PM_UFFD_WP;
                         VM_BUG_ON(!is_pmd_migration_entry(pmd));
+                       migration = is_migration_entry(entry);
                         page = pfn_swap_entry_to_page(entry);
                 }
  #endif
  
-               if (page && page_mapcount(page) == 1)
+               if (page && !migration && page_mapcount(page) == 1)
                         flags |= PM_MMAP_EXCLUSIVE;
  
                 for (; addr != end; addr += PAGE_SIZE) {
diff --git a/include/linux/kfence.h b/include/linux/kfence.h

index 4b5e367..f49e642 100644 (file)
--- a/include/linux/kfence.h
+++ b/include/linux/kfence.h
@@ -17,6 +17,8 @@
  #include <linux/atomic.h>
  #include <linux/static_key.h>
  
+extern unsigned long kfence_sample_interval;
+
  /*
   * We allocate an even number of pages, as it simplifies calculations to map
   * address to metadata indices; effectively, the very first page serves as an
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h

index b72d751..0abbd68 100644 (file)
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -219,7 +219,7 @@ struct obj_cgroup {
         struct mem_cgroup *memcg;
         atomic_t nr_charged_bytes;
         union {
-               struct list_head list;
+               struct list_head list; /* protected by objcg_lock */
                 struct rcu_head rcu;
         };
  };
@@ -315,7 +315,8 @@ struct mem_cgroup {
  #ifdef CONFIG_MEMCG_KMEM
         int kmemcg_id;
         struct obj_cgroup __rcu *objcg;
-       struct list_head objcg_list; /* list of inherited objcgs */
+       /* list of inherited objcgs, protected by objcg_lock */
+       struct list_head objcg_list;
  #endif
  
         MEMCG_PADDING(_pad2_);
diff --git a/mm/kfence/core.c b/mm/kfence/core.c

index 5ad40e3..13128fa 100644 (file)
--- a/mm/kfence/core.c
+++ b/mm/kfence/core.c
@@ -47,7 +47,8 @@
  
  static bool kfence_enabled __read_mostly;
  
-static unsigned long kfence_sample_interval __read_mostly = CONFIG_KFENCE_SAMPLE_INTERVAL;
+unsigned long kfence_sample_interval __read_mostly = CONFIG_KFENCE_SAMPLE_INTERVAL;
+EXPORT_SYMBOL_GPL(kfence_sample_interval); /* Export for test modules. */
  
  #ifdef MODULE_PARAM_PREFIX
  #undef MODULE_PARAM_PREFIX
diff --git a/mm/kfence/kfence_test.c b/mm/kfence/kfence_test.c

index a22b1af..50dbb81 100644 (file)
--- a/mm/kfence/kfence_test.c
+++ b/mm/kfence/kfence_test.c
@@ -268,13 +268,13 @@ static void *test_alloc(struct kunit *test, size_t size, gfp_t gfp, enum allocat
          * 100x the sample interval should be more than enough to ensure we get
          * a KFENCE allocation eventually.
          */
-       timeout = jiffies + msecs_to_jiffies(100 * CONFIG_KFENCE_SAMPLE_INTERVAL);
+       timeout = jiffies + msecs_to_jiffies(100 * kfence_sample_interval);
         /*
          * Especially for non-preemption kernels, ensure the allocation-gate
          * timer can catch up: after @resched_after, every failed allocation
          * attempt yields, to ensure the allocation-gate timer is scheduled.
          */
-       resched_after = jiffies + msecs_to_jiffies(CONFIG_KFENCE_SAMPLE_INTERVAL);
+       resched_after = jiffies + msecs_to_jiffies(kfence_sample_interval);
         do {
                 if (test_cache)
                         alloc = kmem_cache_alloc(test_cache, gfp);
@@ -608,7 +608,7 @@ static void test_gfpzero(struct kunit *test)
         int i;
  
         /* Skip if we think it'd take too long. */
-       KFENCE_TEST_REQUIRES(test, CONFIG_KFENCE_SAMPLE_INTERVAL <= 100);
+       KFENCE_TEST_REQUIRES(test, kfence_sample_interval <= 100);
  
         setup_test_cache(test, size, 0, NULL);
         buf1 = test_alloc(test, size, GFP_KERNEL, ALLOCATE_ANY);
@@ -739,7 +739,7 @@ static void test_memcache_alloc_bulk(struct kunit *test)
          * 100x the sample interval should be more than enough to ensure we get
          * a KFENCE allocation eventually.
          */
-       timeout = jiffies + msecs_to_jiffies(100 * CONFIG_KFENCE_SAMPLE_INTERVAL);
+       timeout = jiffies + msecs_to_jiffies(100 * kfence_sample_interval);
         do {
                 void *objects[100];
                 int i, num = kmem_cache_alloc_bulk(test_cache, GFP_ATOMIC, ARRAY_SIZE(objects),
diff --git a/mm/memcontrol.c b/mm/memcontrol.c

index 09d342c..36e9f38 100644 (file)
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -254,7 +254,7 @@ struct mem_cgroup *vmpressure_to_memcg(struct vmpressure *vmpr)
  }
  
  #ifdef CONFIG_MEMCG_KMEM
-extern spinlock_t css_set_lock;
+static DEFINE_SPINLOCK(objcg_lock);
  
  bool mem_cgroup_kmem_disabled(void)
  {
@@ -298,9 +298,9 @@ static void obj_cgroup_release(struct percpu_ref *ref)
         if (nr_pages)
                 obj_cgroup_uncharge_pages(objcg, nr_pages);
  
-       spin_lock_irqsave(&css_set_lock, flags);
+       spin_lock_irqsave(&objcg_lock, flags);
         list_del(&objcg->list);
-       spin_unlock_irqrestore(&css_set_lock, flags);
+       spin_unlock_irqrestore(&objcg_lock, flags);
  
         percpu_ref_exit(ref);
         kfree_rcu(objcg, rcu);
@@ -332,7 +332,7 @@ static void memcg_reparent_objcgs(struct mem_cgroup *memcg,
  
         objcg = rcu_replace_pointer(memcg->objcg, NULL, true);
  
-       spin_lock_irq(&css_set_lock);
+       spin_lock_irq(&objcg_lock);
  
         /* 1) Ready to reparent active objcg. */
         list_add(&objcg->list, &memcg->objcg_list);
@@ -342,7 +342,7 @@ static void memcg_reparent_objcgs(struct mem_cgroup *memcg,
         /* 3) Move already reparented objcgs to the parent's list */
         list_splice(&memcg->objcg_list, &parent->objcg_list);
  
-       spin_unlock_irq(&css_set_lock);
+       spin_unlock_irq(&objcg_lock);
  
         percpu_ref_kill(&objcg->refcnt);
  }
diff --git a/mm/vmscan.c b/mm/vmscan.c

index 090bfb6..59b14e0 100644 (file)
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -1066,8 +1066,10 @@ void reclaim_throttle(pg_data_t *pgdat, enum vmscan_throttle_state reason)
          * forward progress (e.g. journalling workqueues or kthreads).
          */
         if (!current_is_kswapd() &&
-           current->flags & (PF_IO_WORKER|PF_KTHREAD))
+           current->flags & (PF_IO_WORKER|PF_KTHREAD)) {
+               cond_resched();
                 return;
+       }
  
         /*
          * These figures are pulled out of thin air.
author	Linus Torvalds <torvalds@linux-foundation.org>
	Sat, 12 Feb 2022 16:57:37 +0000 (08:57 -0800)
committer	Linus Torvalds <torvalds@linux-foundation.org>
	Sat, 12 Feb 2022 16:57:37 +0000 (08:57 -0800)
fs/binfmt_elf.c		patch \| blob \| history
fs/proc/task_mmu.c		patch \| blob \| history
include/linux/kfence.h		patch \| blob \| history
include/linux/memcontrol.h		patch \| blob \| history
mm/kfence/core.c		patch \| blob \| history
mm/kfence/kfence_test.c		patch \| blob \| history
mm/memcontrol.c		patch \| blob \| history
mm/vmscan.c		patch \| blob \| history