mm: parallelize deferred_init_memmap()

author Daniel Jordan <daniel.m.jordan@oracle.com>

Wed, 3 Jun 2020 22:59:51 +0000 (15:59 -0700)

committer Linus Torvalds <torvalds@linux-foundation.org>

Thu, 4 Jun 2020 03:09:45 +0000 (20:09 -0700)
author Daniel Jordan <daniel.m.jordan@oracle.com>
Wed, 3 Jun 2020 22:59:51 +0000 (15:59 -0700)
committer Linus Torvalds <torvalds@linux-foundation.org>
Thu, 4 Jun 2020 03:09:45 +0000 (20:09 -0700)
diff --git a/mm/Kconfig b/mm/Kconfig

index 3af64646f343de2c559b20f5e65b3be5a72727df..e3490ecac839db0d9fcad94165887253ab06a570 100644 (file)
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -747,13 +747,13 @@ config DEFERRED_STRUCT_PAGE_INIT
         depends on SPARSEMEM
         depends on !NEED_PER_CPU_KM
         depends on 64BIT
+       select PADATA
         help
           Ordinarily all struct pages are initialised during early boot in a
           single thread. On very large machines this can take a considerable
           amount of time. If this option is set, large machines will bring up
-         a subset of memmap at boot and then initialise the rest in parallel
-         by starting one-off "pgdatinitX" kernel thread for each node X. This
-         has a potential performance impact on processes running early in the
+         a subset of memmap at boot and then initialise the rest in parallel.
+         This has a potential performance impact on tasks running early in the
           lifetime of the system until these kthreads finish the
           initialisation.
  
diff --git a/mm/page_alloc.c b/mm/page_alloc.c

index 89bd57241e08e5bcb6067010358aebf419522acf..27ec5dc4db33676bb34d93ee2d97c10d100959e0 100644 (file)
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -68,6 +68,7 @@
  #include <linux/lockdep.h>
  #include <linux/nmi.h>
  #include <linux/psi.h>
+#include <linux/padata.h>
  
  #include <asm/sections.h>
  #include <asm/tlbflush.h>
@@ -1815,6 +1816,26 @@ deferred_init_maxorder(u64 *i, struct zone *zone, unsigned long *start_pfn,
         return nr_pages;
  }
  
+static void __init
+deferred_init_memmap_chunk(unsigned long start_pfn, unsigned long end_pfn,
+                          void *arg)
+{
+       unsigned long spfn, epfn;
+       struct zone *zone = arg;
+       u64 i;
+
+       deferred_init_mem_pfn_range_in_zone(&i, zone, &spfn, &epfn, start_pfn);
+
+       /*
+        * Initialize and free pages in MAX_ORDER sized increments so that we
+        * can avoid introducing any issues with the buddy allocator.
+        */
+       while (spfn < end_pfn) {
+               deferred_init_maxorder(&i, zone, &spfn, &epfn);
+               cond_resched();
+       }
+}
+
  /* Initialise remaining memory on a node */
  static int __init deferred_init_memmap(void *data)
  {
@@ -1824,7 +1845,7 @@ static int __init deferred_init_memmap(void *data)
         unsigned long first_init_pfn, flags;
         unsigned long start = jiffies;
         struct zone *zone;
-       int zid;
+       int zid, max_threads;
         u64 i;
  
         /* Bind memory initialisation thread to a local node if possible */
@@ -1864,13 +1885,26 @@ static int __init deferred_init_memmap(void *data)
                 goto zone_empty;
  
         /*
-        * Initialize and free pages in MAX_ORDER sized increments so
-        * that we can avoid introducing any issues with the buddy
-        * allocator.
+        * More CPUs always led to greater speedups on tested systems, up to
+        * all the nodes' CPUs.  Use all since the system is otherwise idle now.
          */
+       max_threads = max(cpumask_weight(cpumask), 1u);
+
         while (spfn < epfn) {
-               deferred_init_maxorder(&i, zone, &spfn, &epfn);
-               cond_resched();
+               unsigned long epfn_align = ALIGN(epfn, PAGES_PER_SECTION);
+               struct padata_mt_job job = {
+                       .thread_fn   = deferred_init_memmap_chunk,
+                       .fn_arg      = zone,
+                       .start       = spfn,
+                       .size        = epfn_align - spfn,
+                       .align       = PAGES_PER_SECTION,
+                       .min_chunk   = PAGES_PER_SECTION,
+                       .max_threads = max_threads,
+               };
+
+               padata_do_multithreaded(&job);
+               deferred_init_mem_pfn_range_in_zone(&i, zone, &spfn, &epfn,
+                                                   epfn_align);
         }
  zone_empty:
         /* Sanity check that the next zone really is unpopulated */
author	Daniel Jordan <daniel.m.jordan@oracle.com>
	Wed, 3 Jun 2020 22:59:51 +0000 (15:59 -0700)
committer	Linus Torvalds <torvalds@linux-foundation.org>
	Thu, 4 Jun 2020 03:09:45 +0000 (20:09 -0700)
mm/Kconfig		patch \| blob \| history
mm/page_alloc.c		patch \| blob \| history