arm64: support page mapping percpu first chunk allocator
authorKefeng Wang <wangkefeng.wang@huawei.com>
Fri, 5 Nov 2021 20:39:44 +0000 (13:39 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Sat, 6 Nov 2021 20:30:37 +0000 (13:30 -0700)
Percpu embedded first chunk allocator is the firstly option, but it
could fails on ARM64, eg,

  percpu: max_distance=0x5fcfdc640000 too large for vmalloc space 0x781fefff0000
  percpu: max_distance=0x600000540000 too large for vmalloc space 0x7dffb7ff0000
  percpu: max_distance=0x5fff9adb0000 too large for vmalloc space 0x5dffb7ff0000

then we could get

  WARNING: CPU: 15 PID: 461 at vmalloc.c:3087 pcpu_get_vm_areas+0x488/0x838

and the system could not boot successfully.

Let's implement page mapping percpu first chunk allocator as a fallback
to the embedding allocator to increase the robustness of the system.

Link: https://lkml.kernel.org/r/20210910053354.26721-3-wangkefeng.wang@huawei.com
Signed-off-by: Kefeng Wang <wangkefeng.wang@huawei.com>
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Cc: Andrey Konovalov <andreyknvl@gmail.com>
Cc: Andrey Ryabinin <ryabinin.a.a@gmail.com>
Cc: Dmitry Vyukov <dvyukov@google.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Marco Elver <elver@google.com>
Cc: Will Deacon <will@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
arch/arm64/Kconfig
drivers/base/arch_numa.c

index fee914c716aa262d12060cce3980471d510af283..67f24ee6cf572f070415277fb6e463c62ad95e67 100644 (file)
@@ -1042,6 +1042,10 @@ config NEED_PER_CPU_EMBED_FIRST_CHUNK
        def_bool y
        depends on NUMA
 
+config NEED_PER_CPU_PAGE_FIRST_CHUNK
+       def_bool y
+       depends on NUMA
+
 source "kernel/Kconfig.hz"
 
 config ARCH_SPARSEMEM_ENABLE
index 00fb4120a5b3a8166882216b91316d86ab0c9950..28222688ace795c0e22f88abe94d05f6371617f0 100644 (file)
@@ -14,6 +14,7 @@
 #include <linux/of.h>
 
 #include <asm/sections.h>
+#include <asm/pgalloc.h>
 
 struct pglist_data *node_data[MAX_NUMNODES] __read_mostly;
 EXPORT_SYMBOL(node_data);
@@ -168,22 +169,83 @@ static void __init pcpu_fc_free(void *ptr, size_t size)
        memblock_free_early(__pa(ptr), size);
 }
 
+#ifdef CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK
+static void __init pcpu_populate_pte(unsigned long addr)
+{
+       pgd_t *pgd = pgd_offset_k(addr);
+       p4d_t *p4d;
+       pud_t *pud;
+       pmd_t *pmd;
+
+       p4d = p4d_offset(pgd, addr);
+       if (p4d_none(*p4d)) {
+               pud_t *new;
+
+               new = memblock_alloc(PAGE_SIZE, PAGE_SIZE);
+               if (!new)
+                       goto err_alloc;
+               p4d_populate(&init_mm, p4d, new);
+       }
+
+       pud = pud_offset(p4d, addr);
+       if (pud_none(*pud)) {
+               pmd_t *new;
+
+               new = memblock_alloc(PAGE_SIZE, PAGE_SIZE);
+               if (!new)
+                       goto err_alloc;
+               pud_populate(&init_mm, pud, new);
+       }
+
+       pmd = pmd_offset(pud, addr);
+       if (!pmd_present(*pmd)) {
+               pte_t *new;
+
+               new = memblock_alloc(PAGE_SIZE, PAGE_SIZE);
+               if (!new)
+                       goto err_alloc;
+               pmd_populate_kernel(&init_mm, pmd, new);
+       }
+
+       return;
+
+err_alloc:
+       panic("%s: Failed to allocate %lu bytes align=%lx from=%lx\n",
+             __func__, PAGE_SIZE, PAGE_SIZE, PAGE_SIZE);
+}
+#endif
+
 void __init setup_per_cpu_areas(void)
 {
        unsigned long delta;
        unsigned int cpu;
-       int rc;
+       int rc = -EINVAL;
+
+       if (pcpu_chosen_fc != PCPU_FC_PAGE) {
+               /*
+                * Always reserve area for module percpu variables.  That's
+                * what the legacy allocator did.
+                */
+               rc = pcpu_embed_first_chunk(PERCPU_MODULE_RESERVE,
+                                           PERCPU_DYNAMIC_RESERVE, PAGE_SIZE,
+                                           pcpu_cpu_distance,
+                                           pcpu_fc_alloc, pcpu_fc_free);
+#ifdef CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK
+               if (rc < 0)
+                       pr_warn("PERCPU: %s allocator failed (%d), falling back to page size\n",
+                                  pcpu_fc_names[pcpu_chosen_fc], rc);
+#endif
+       }
 
-       /*
-        * Always reserve area for module percpu variables.  That's
-        * what the legacy allocator did.
-        */
-       rc = pcpu_embed_first_chunk(PERCPU_MODULE_RESERVE,
-                                   PERCPU_DYNAMIC_RESERVE, PAGE_SIZE,
-                                   pcpu_cpu_distance,
-                                   pcpu_fc_alloc, pcpu_fc_free);
+#ifdef CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK
+       if (rc < 0)
+               rc = pcpu_page_first_chunk(PERCPU_MODULE_RESERVE,
+                                          pcpu_fc_alloc,
+                                          pcpu_fc_free,
+                                          pcpu_populate_pte);
+#endif
        if (rc < 0)
-               panic("Failed to initialize percpu areas.");
+               panic("Failed to initialize percpu areas (err=%d).", rc);
 
        delta = (unsigned long)pcpu_base_addr - (unsigned long)__per_cpu_start;
        for_each_possible_cpu(cpu)