Merge branch 'akpm' (patches from Andrew)
authorLinus Torvalds <torvalds@linux-foundation.org>
Sat, 6 Nov 2021 21:08:17 +0000 (14:08 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Sat, 6 Nov 2021 21:08:17 +0000 (14:08 -0700)
Merge misc updates from Andrew Morton:
 "257 patches.

  Subsystems affected by this patch series: scripts, ocfs2, vfs, and
  mm (slab-generic, slab, slub, kconfig, dax, kasan, debug, pagecache,
  gup, swap, memcg, pagemap, mprotect, mremap, iomap, tracing, vmalloc,
  pagealloc, memory-failure, hugetlb, userfaultfd, vmscan, tools,
  memblock, oom-kill, hugetlbfs, migration, thp, readahead, nommu, ksm,
  vmstat, madvise, memory-hotplug, rmap, zsmalloc, highmem, zram,
  cleanups, kfence, and damon)"

* emailed patches from Andrew Morton <akpm@linux-foundation.org>: (257 commits)
  mm/damon: remove return value from before_terminate callback
  mm/damon: fix a few spelling mistakes in comments and a pr_debug message
  mm/damon: simplify stop mechanism
  Docs/admin-guide/mm/pagemap: wordsmith page flags descriptions
  Docs/admin-guide/mm/damon/start: simplify the content
  Docs/admin-guide/mm/damon/start: fix a wrong link
  Docs/admin-guide/mm/damon/start: fix wrong example commands
  mm/damon/dbgfs: add adaptive_targets list check before enable monitor_on
  mm/damon: remove unnecessary variable initialization
  Documentation/admin-guide/mm/damon: add a document for DAMON_RECLAIM
  mm/damon: introduce DAMON-based Reclamation (DAMON_RECLAIM)
  selftests/damon: support watermarks
  mm/damon/dbgfs: support watermarks
  mm/damon/schemes: activate schemes based on a watermarks mechanism
  tools/selftests/damon: update for regions prioritization of schemes
  mm/damon/dbgfs: support prioritization weights
  mm/damon/vaddr,paddr: support pageout prioritization
  mm/damon/schemes: prioritize regions within the quotas
  mm/damon/selftests: support schemes quotas
  mm/damon/dbgfs: support quotas of schemes
  ...

77 files changed:
1  2 
Documentation/admin-guide/kernel-parameters.txt
Documentation/translations/zh_CN/core-api/memory-hotplug.rst
MAINTAINERS
Makefile
arch/arm64/Kconfig
arch/arm64/mm/mmu.c
arch/mips/loongson64/init.c
arch/powerpc/configs/skiroot_defconfig
arch/powerpc/include/asm/machdep.h
arch/powerpc/kernel/setup-common.c
arch/powerpc/platforms/pseries/setup.c
arch/powerpc/platforms/pseries/svm.c
arch/s390/kernel/uv.c
arch/sh/boards/mach-ecovec24/setup.c
arch/sh/boards/mach-se/7724/setup.c
arch/x86/Kconfig
arch/x86/xen/mmu_pv.c
drivers/block/zram/zram_drv.c
drivers/hwmon/occ/p9_sbe.c
drivers/macintosh/smu.c
drivers/virtio/Kconfig
drivers/xen/swiotlb-xen.c
fs/open.c
include/linux/backing-dev.h
include/linux/compiler-gcc.h
include/linux/compiler_types.h
include/linux/fs.h
include/linux/gfp.h
include/linux/highmem.h
include/linux/kasan.h
include/linux/kernel.h
include/linux/memcontrol.h
include/linux/migrate.h
include/linux/mm.h
include/linux/mm_types.h
include/linux/page-flags.h
include/linux/swap.h
include/trace/events/writeback.h
init/main.c
kernel/dma/swiotlb.c
kernel/locking/lockdep.c
kernel/printk/printk.c
kernel/sched/topology.c
kernel/workqueue.c
lib/Kconfig.debug
lib/bootconfig.c
lib/test_kasan.c
mm/backing-dev.c
mm/compaction.c
mm/debug.c
mm/filemap.c
mm/gup.c
mm/highmem.c
mm/hugetlb.c
mm/internal.h
mm/kasan/kasan.h
mm/khugepaged.c
mm/memcontrol.c
mm/memory-failure.c
mm/memory.c
mm/mempolicy.c
mm/migrate.c
mm/nommu.c
mm/oom_kill.c
mm/page-writeback.c
mm/page_alloc.c
mm/readahead.c
mm/rmap.c
mm/shmem.c
mm/slub.c
mm/swap.c
mm/swapfile.c
mm/userfaultfd.c
mm/vmscan.c
net/ipv4/tcp.c
net/netfilter/ipvs/ip_vs_ctl.c
security/Kconfig

diff --cc MAINTAINERS
Simple merge
diff --cc Makefile
Simple merge
Simple merge
Simple merge
Simple merge
@@@ -29,9 -29,10 +29,9 @@@ struct machdep_calls 
        char            *name;
  #ifdef CONFIG_PPC64
  #ifdef CONFIG_PM
 -      void            (*iommu_save)(void);
        void            (*iommu_restore)(void);
  #endif
- #ifdef CONFIG_MEMORY_HOTPLUG_SPARSE
+ #ifdef CONFIG_MEMORY_HOTPLUG
        unsigned long   (*memory_block_size)(void);
  #endif
  #endif /* CONFIG_PPC64 */
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
@@@ -61,9 -61,8 +61,9 @@@ config X8
        select ACPI_SYSTEM_POWER_STATES_SUPPORT if ACPI
        select ARCH_32BIT_OFF_T                 if X86_32
        select ARCH_CLOCKSOURCE_INIT
 +      select ARCH_CORRECT_STACKTRACE_ON_KRETPROBE
        select ARCH_ENABLE_HUGEPAGE_MIGRATION if X86_64 && HUGETLB_PAGE && MIGRATION
-       select ARCH_ENABLE_MEMORY_HOTPLUG if X86_64 || (X86_32 && HIGHMEM)
+       select ARCH_ENABLE_MEMORY_HOTPLUG if X86_64
        select ARCH_ENABLE_MEMORY_HOTREMOVE if MEMORY_HOTPLUG
        select ARCH_ENABLE_SPLIT_PMD_PTLOCK if (PGTABLE_LEVELS > 2) && (X86_64 || X86_PAE)
        select ARCH_ENABLE_THP_MIGRATION if X86_64 && TRANSPARENT_HUGEPAGE
Simple merge
Simple merge
@@@ -3,13 -3,10 +3,14 @@@
  
  #include <linux/device.h>
  #include <linux/errno.h>
+ #include <linux/slab.h>
  #include <linux/fsi-occ.h>
 +#include <linux/mm.h>
  #include <linux/module.h>
 +#include <linux/mutex.h>
  #include <linux/platform_device.h>
 +#include <linux/string.h>
 +#include <linux/sysfs.h>
  
  #include "common.h"
  
Simple merge
Simple merge
Simple merge
diff --cc fs/open.c
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
@@@ -57,10 -40,8 +40,12 @@@ extern int migrate_huge_page_move_mappi
                                  struct page *newpage, struct page *page);
  extern int migrate_page_move_mapping(struct address_space *mapping,
                struct page *newpage, struct page *page, int extra_count);
 +void folio_migrate_flags(struct folio *newfolio, struct folio *folio);
 +void folio_migrate_copy(struct folio *newfolio, struct folio *folio);
 +int folio_migrate_mapping(struct address_space *mapping,
 +              struct folio *newfolio, struct folio *folio, int extra_count);
+ extern bool numa_demotion_enabled;
  #else
  
  static inline void putback_movable_pages(struct list_head *l) {}
@@@ -902,8 -873,10 +868,10 @@@ void __put_page(struct page *page)
  void put_pages_list(struct list_head *pages);
  
  void split_page(struct page *page, unsigned int order);
 -void copy_huge_page(struct page *dst, struct page *src);
 +void folio_copy(struct folio *dst, struct folio *src);
  
+ unsigned long nr_free_buffer_pages(void);
  /*
   * Compound pages have a destructor function.  Provide a
   * prototype for that function and accessor functions.
Simple merge
Simple merge
Simple merge
Simple merge
diff --cc init/main.c
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
@@@ -42,50 -34,6 +42,50 @@@ static int xbc_err_pos __initdata
  static int open_brace[XBC_DEPTH_MAX] __initdata;
  static int brace_index __initdata;
  
-       memblock_free_ptr(addr, size);
 +#ifdef __KERNEL__
 +static inline void * __init xbc_alloc_mem(size_t size)
 +{
 +      return memblock_alloc(size, SMP_CACHE_BYTES);
 +}
 +
 +static inline void __init xbc_free_mem(void *addr, size_t size)
 +{
++      memblock_free(addr, size);
 +}
 +
 +#else /* !__KERNEL__ */
 +
 +static inline void *xbc_alloc_mem(size_t size)
 +{
 +      return malloc(size);
 +}
 +
 +static inline void xbc_free_mem(void *addr, size_t size)
 +{
 +      free(addr);
 +}
 +#endif
 +/**
 + * xbc_get_info() - Get the information of loaded boot config
 + * @node_size: A pointer to store the number of nodes.
 + * @data_size: A pointer to store the size of bootconfig data.
 + *
 + * Get the number of used nodes in @node_size if it is not NULL,
 + * and the size of bootconfig data in @data_size if it is not NULL.
 + * Return 0 if the boot config is initialized, or return -ENODEV.
 + */
 +int __init xbc_get_info(int *node_size, size_t *data_size)
 +{
 +      if (!xbc_data)
 +              return -ENODEV;
 +
 +      if (node_size)
 +              *node_size = xbc_node_num;
 +      if (data_size)
 +              *data_size = xbc_data_size;
 +      return 0;
 +}
 +
  static int __init xbc_parse_error(const char *msg, const char *p)
  {
        xbc_err_msg = msg;
Simple merge
Simple merge
diff --cc mm/compaction.c
Simple merge
diff --cc mm/debug.c
Simple merge
diff --cc mm/filemap.c
@@@ -1591,10 -1611,11 +1598,11 @@@ void folio_end_writeback(struct folio *
                BUG();
  
        smp_mb__after_atomic();
 -      wake_up_page(page, PG_writeback);
 -      acct_reclaim_writeback(page);
 -      put_page(page);
 +      folio_wake(folio, PG_writeback);
++      acct_reclaim_writeback(folio);
 +      folio_put(folio);
  }
 -EXPORT_SYMBOL(end_page_writeback);
 +EXPORT_SYMBOL(folio_end_writeback);
  
  /*
   * After completing I/O on a page, call this routine to update the page
diff --cc mm/gup.c
Simple merge
diff --cc mm/highmem.c
Simple merge
diff --cc mm/hugetlb.c
Simple merge
diff --cc mm/internal.h
  
  void page_writeback_init(void);
  
 -void __acct_reclaim_writeback(pg_data_t *pgdat, struct page *page,
 +static inline void *folio_raw_mapping(struct folio *folio)
 +{
 +      unsigned long mapping = (unsigned long)folio->mapping;
 +
 +      return (void *)(mapping & ~PAGE_MAPPING_FLAGS);
 +}
 +
++void __acct_reclaim_writeback(pg_data_t *pgdat, struct folio *folio,
+                                               int nr_throttled);
 -static inline void acct_reclaim_writeback(struct page *page)
++static inline void acct_reclaim_writeback(struct folio *folio)
+ {
 -      pg_data_t *pgdat = page_pgdat(page);
++      pg_data_t *pgdat = folio_pgdat(folio);
+       int nr_throttled = atomic_read(&pgdat->nr_writeback_throttled);
+       if (nr_throttled)
 -              __acct_reclaim_writeback(pgdat, page, nr_throttled);
++              __acct_reclaim_writeback(pgdat, folio, nr_throttled);
+ }
+ static inline void wake_throttle_isolated(pg_data_t *pgdat)
+ {
+       wait_queue_head_t *wqh;
+       wqh = &pgdat->reclaim_wait[VMSCAN_THROTTLE_ISOLATED];
+       if (waitqueue_active(wqh))
+               wake_up(wqh);
+ }
  vm_fault_t do_swap_page(struct vm_fault *vmf);
 +void folio_rotate_reclaimable(struct folio *folio);
 +bool __folio_end_writeback(struct folio *folio);
  
  void free_pgtables(struct mmu_gather *tlb, struct vm_area_struct *start_vma,
                unsigned long floor, unsigned long ceiling);
Simple merge
diff --cc mm/khugepaged.c
Simple merge
diff --cc mm/memcontrol.c
@@@ -2746,11 -2769,10 +2783,10 @@@ static inline void cancel_charge(struc
        if (do_memsw_account())
                page_counter_uncharge(&memcg->memsw, nr_pages);
  }
- #endif
  
 -static void commit_charge(struct page *page, struct mem_cgroup *memcg)
 +static void commit_charge(struct folio *folio, struct mem_cgroup *memcg)
  {
 -      VM_BUG_ON_PAGE(page_memcg(page), page);
 +      VM_BUG_ON_FOLIO(folio_memcg(folio), folio);
        /*
         * Any of the following ensures page's memcg stability:
         *
Simple merge
diff --cc mm/memory.c
Simple merge
diff --cc mm/mempolicy.c
@@@ -2196,16 -2196,88 +2196,98 @@@ struct page *alloc_pages(gfp_t gfp, uns
  }
  EXPORT_SYMBOL(alloc_pages);
  
 +struct folio *folio_alloc(gfp_t gfp, unsigned order)
 +{
 +      struct page *page = alloc_pages(gfp | __GFP_COMP, order);
 +
 +      if (page && order > 1)
 +              prep_transhuge_page(page);
 +      return (struct folio *)page;
 +}
 +EXPORT_SYMBOL(folio_alloc);
 +
+ static unsigned long alloc_pages_bulk_array_interleave(gfp_t gfp,
+               struct mempolicy *pol, unsigned long nr_pages,
+               struct page **page_array)
+ {
+       int nodes;
+       unsigned long nr_pages_per_node;
+       int delta;
+       int i;
+       unsigned long nr_allocated;
+       unsigned long total_allocated = 0;
+       nodes = nodes_weight(pol->nodes);
+       nr_pages_per_node = nr_pages / nodes;
+       delta = nr_pages - nodes * nr_pages_per_node;
+       for (i = 0; i < nodes; i++) {
+               if (delta) {
+                       nr_allocated = __alloc_pages_bulk(gfp,
+                                       interleave_nodes(pol), NULL,
+                                       nr_pages_per_node + 1, NULL,
+                                       page_array);
+                       delta--;
+               } else {
+                       nr_allocated = __alloc_pages_bulk(gfp,
+                                       interleave_nodes(pol), NULL,
+                                       nr_pages_per_node, NULL, page_array);
+               }
+               page_array += nr_allocated;
+               total_allocated += nr_allocated;
+       }
+       return total_allocated;
+ }
+ static unsigned long alloc_pages_bulk_array_preferred_many(gfp_t gfp, int nid,
+               struct mempolicy *pol, unsigned long nr_pages,
+               struct page **page_array)
+ {
+       gfp_t preferred_gfp;
+       unsigned long nr_allocated = 0;
+       preferred_gfp = gfp | __GFP_NOWARN;
+       preferred_gfp &= ~(__GFP_DIRECT_RECLAIM | __GFP_NOFAIL);
+       nr_allocated  = __alloc_pages_bulk(preferred_gfp, nid, &pol->nodes,
+                                          nr_pages, NULL, page_array);
+       if (nr_allocated < nr_pages)
+               nr_allocated += __alloc_pages_bulk(gfp, numa_node_id(), NULL,
+                               nr_pages - nr_allocated, NULL,
+                               page_array + nr_allocated);
+       return nr_allocated;
+ }
+ /* alloc pages bulk and mempolicy should be considered at the
+  * same time in some situation such as vmalloc.
+  *
+  * It can accelerate memory allocation especially interleaving
+  * allocate memory.
+  */
+ unsigned long alloc_pages_bulk_array_mempolicy(gfp_t gfp,
+               unsigned long nr_pages, struct page **page_array)
+ {
+       struct mempolicy *pol = &default_policy;
+       if (!in_interrupt() && !(gfp & __GFP_THISNODE))
+               pol = get_task_policy(current);
+       if (pol->mode == MPOL_INTERLEAVE)
+               return alloc_pages_bulk_array_interleave(gfp, pol,
+                                                        nr_pages, page_array);
+       if (pol->mode == MPOL_PREFERRED_MANY)
+               return alloc_pages_bulk_array_preferred_many(gfp,
+                               numa_node_id(), pol, nr_pages, page_array);
+       return __alloc_pages_bulk(gfp, policy_node(gfp, pol, numa_node_id()),
+                                 policy_nodemask(gfp, pol), nr_pages, NULL,
+                                 page_array);
+ }
  int vma_dup_policy(struct vm_area_struct *src, struct vm_area_struct *dst)
  {
        struct mempolicy *pol = mpol_dup(vma_policy(src));
diff --cc mm/migrate.c
Simple merge
diff --cc mm/nommu.c
Simple merge
diff --cc mm/oom_kill.c
Simple merge
Simple merge
diff --cc mm/page_alloc.c
Simple merge
diff --cc mm/readahead.c
Simple merge
diff --cc mm/rmap.c
Simple merge
diff --cc mm/shmem.c
Simple merge
diff --cc mm/slub.c
Simple merge
diff --cc mm/swap.c
Simple merge
diff --cc mm/swapfile.c
Simple merge
Simple merge
diff --cc mm/vmscan.c
@@@ -1021,6 -1006,91 +1021,91 @@@ static void handle_write_error(struct a
        unlock_page(page);
  }
  
 -void __acct_reclaim_writeback(pg_data_t *pgdat, struct page *page,
+ void reclaim_throttle(pg_data_t *pgdat, enum vmscan_throttle_state reason)
+ {
+       wait_queue_head_t *wqh = &pgdat->reclaim_wait[reason];
+       long timeout, ret;
+       DEFINE_WAIT(wait);
+       /*
+        * Do not throttle IO workers, kthreads other than kswapd or
+        * workqueues. They may be required for reclaim to make
+        * forward progress (e.g. journalling workqueues or kthreads).
+        */
+       if (!current_is_kswapd() &&
+           current->flags & (PF_IO_WORKER|PF_KTHREAD))
+               return;
+       /*
+        * These figures are pulled out of thin air.
+        * VMSCAN_THROTTLE_ISOLATED is a transient condition based on too many
+        * parallel reclaimers which is a short-lived event so the timeout is
+        * short. Failing to make progress or waiting on writeback are
+        * potentially long-lived events so use a longer timeout. This is shaky
+        * logic as a failure to make progress could be due to anything from
+        * writeback to a slow device to excessive references pages at the tail
+        * of the inactive LRU.
+        */
+       switch(reason) {
+       case VMSCAN_THROTTLE_WRITEBACK:
+               timeout = HZ/10;
+               if (atomic_inc_return(&pgdat->nr_writeback_throttled) == 1) {
+                       WRITE_ONCE(pgdat->nr_reclaim_start,
+                               node_page_state(pgdat, NR_THROTTLED_WRITTEN));
+               }
+               break;
+       case VMSCAN_THROTTLE_NOPROGRESS:
+               timeout = HZ/2;
+               break;
+       case VMSCAN_THROTTLE_ISOLATED:
+               timeout = HZ/50;
+               break;
+       default:
+               WARN_ON_ONCE(1);
+               timeout = HZ;
+               break;
+       }
+       prepare_to_wait(wqh, &wait, TASK_UNINTERRUPTIBLE);
+       ret = schedule_timeout(timeout);
+       finish_wait(wqh, &wait);
+       if (reason == VMSCAN_THROTTLE_WRITEBACK)
+               atomic_dec(&pgdat->nr_writeback_throttled);
+       trace_mm_vmscan_throttled(pgdat->node_id, jiffies_to_usecs(timeout),
+                               jiffies_to_usecs(timeout - ret),
+                               reason);
+ }
+ /*
+  * Account for pages written if tasks are throttled waiting on dirty
+  * pages to clean. If enough pages have been cleaned since throttling
+  * started then wakeup the throttled tasks.
+  */
 -      inc_node_page_state(page, NR_THROTTLED_WRITTEN);
++void __acct_reclaim_writeback(pg_data_t *pgdat, struct folio *folio,
+                                                       int nr_throttled)
+ {
+       unsigned long nr_written;
++      node_stat_add_folio(folio, NR_THROTTLED_WRITTEN);
+       /*
+        * This is an inaccurate read as the per-cpu deltas may not
+        * be synchronised. However, given that the system is
+        * writeback throttled, it is not worth taking the penalty
+        * of getting an accurate count. At worst, the throttle
+        * timeout guarantees forward progress.
+        */
+       nr_written = node_page_state(pgdat, NR_THROTTLED_WRITTEN) -
+               READ_ONCE(pgdat->nr_reclaim_start);
+       if (nr_written > SWAP_CLUSTER_MAX * nr_throttled)
+               wake_up(&pgdat->reclaim_wait[VMSCAN_THROTTLE_WRITEBACK]);
+ }
  /* possible outcome of pageout() */
  typedef enum {
        /* failed to write page out, page is locked */
diff --cc net/ipv4/tcp.c
Simple merge
Simple merge
Simple merge