From 7e8a6304d5419cbf056a59de92939e5eef039c57 Mon Sep 17 00:00:00 2001 From: "Dennis Zhou (Facebook)" Date: Tue, 21 Aug 2018 21:53:58 -0700 Subject: [PATCH] /proc/meminfo: add percpu populated pages count Currently, percpu memory only exposes allocation and utilization information via debugfs. This more or less is only really useful for understanding the fragmentation and allocation information at a per-chunk level with a few global counters. This is also gated behind a config. BPF and cgroup, for example, have seen an increase in use causing increased use of percpu memory. Let's make it easier for someone to identify how much memory is being used. This patch adds the "Percpu" stat to meminfo to more easily look up how much percpu memory is in use. This number includes the cost for all allocated backing pages and not just insight at the per a unit, per chunk level. Metadata is excluded. I think excluding metadata is fair because the backing memory scales with the numbere of cpus and can quickly outweigh the metadata. It also makes this calculation light. Link: http://lkml.kernel.org/r/20180807184723.74919-1-dennisszhou@gmail.com Signed-off-by: Dennis Zhou Acked-by: Tejun Heo Acked-by: Roman Gushchin Reviewed-by: Andrew Morton Acked-by: David Rientjes Acked-by: Vlastimil Babka Cc: Johannes Weiner Cc: Christoph Lameter Cc: Alexey Dobriyan Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/filesystems/proc.txt | 3 +++ fs/proc/meminfo.c | 2 ++ include/linux/percpu.h | 2 ++ mm/percpu.c | 29 +++++++++++++++++++++++++++++ 4 files changed, 36 insertions(+) diff --git a/Documentation/filesystems/proc.txt b/Documentation/filesystems/proc.txt index 1605acbb..22b4b00 100644 --- a/Documentation/filesystems/proc.txt +++ b/Documentation/filesystems/proc.txt @@ -870,6 +870,7 @@ Committed_AS: 100056 kB VmallocTotal: 112216 kB VmallocUsed: 428 kB VmallocChunk: 111088 kB +Percpu: 62080 kB HardwareCorrupted: 0 kB AnonHugePages: 49152 kB ShmemHugePages: 0 kB @@ -962,6 +963,8 @@ Committed_AS: The amount of memory presently allocated on the system. VmallocTotal: total size of vmalloc memory area VmallocUsed: amount of vmalloc area which is used VmallocChunk: largest contiguous block of vmalloc area which is free + Percpu: Memory allocated to the percpu allocator used to back percpu + allocations. This stat excludes the cost of metadata. .............................................................................. diff --git a/fs/proc/meminfo.c b/fs/proc/meminfo.c index 2fb0484..edda898 100644 --- a/fs/proc/meminfo.c +++ b/fs/proc/meminfo.c @@ -7,6 +7,7 @@ #include #include #include +#include #include #include #include @@ -121,6 +122,7 @@ static int meminfo_proc_show(struct seq_file *m, void *v) (unsigned long)VMALLOC_TOTAL >> 10); show_val_kb(m, "VmallocUsed: ", 0ul); show_val_kb(m, "VmallocChunk: ", 0ul); + show_val_kb(m, "Percpu: ", pcpu_nr_pages()); #ifdef CONFIG_MEMORY_FAILURE seq_printf(m, "HardwareCorrupted: %5lu kB\n", diff --git a/include/linux/percpu.h b/include/linux/percpu.h index 296bbe4..70b7123 100644 --- a/include/linux/percpu.h +++ b/include/linux/percpu.h @@ -149,4 +149,6 @@ extern phys_addr_t per_cpu_ptr_to_phys(void *addr); (typeof(type) __percpu *)__alloc_percpu(sizeof(type), \ __alignof__(type)) +extern unsigned long pcpu_nr_pages(void); + #endif /* __LINUX_PERCPU_H */ diff --git a/mm/percpu.c b/mm/percpu.c index 0b64809..a749d4d 100644 --- a/mm/percpu.c +++ b/mm/percpu.c @@ -170,6 +170,14 @@ static LIST_HEAD(pcpu_map_extend_chunks); int pcpu_nr_empty_pop_pages; /* + * The number of populated pages in use by the allocator, protected by + * pcpu_lock. This number is kept per a unit per chunk (i.e. when a page gets + * allocated/deallocated, it is allocated/deallocated in all units of a chunk + * and increments/decrements this count by 1). + */ +static unsigned long pcpu_nr_populated; + +/* * Balance work is used to populate or destroy chunks asynchronously. We * try to keep the number of populated free pages between * PCPU_EMPTY_POP_PAGES_LOW and HIGH for atomic allocations and at most one @@ -1232,6 +1240,7 @@ static void pcpu_chunk_populated(struct pcpu_chunk *chunk, int page_start, bitmap_set(chunk->populated, page_start, nr); chunk->nr_populated += nr; + pcpu_nr_populated += nr; if (!for_alloc) { chunk->nr_empty_pop_pages += nr; @@ -1260,6 +1269,7 @@ static void pcpu_chunk_depopulated(struct pcpu_chunk *chunk, chunk->nr_populated -= nr; chunk->nr_empty_pop_pages -= nr; pcpu_nr_empty_pop_pages -= nr; + pcpu_nr_populated -= nr; } /* @@ -2176,6 +2186,9 @@ int __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai, pcpu_nr_empty_pop_pages = pcpu_first_chunk->nr_empty_pop_pages; pcpu_chunk_relocate(pcpu_first_chunk, -1); + /* include all regions of the first chunk */ + pcpu_nr_populated += PFN_DOWN(size_sum); + pcpu_stats_chunk_alloc(); trace_percpu_create_chunk(base_addr); @@ -2746,6 +2759,22 @@ void __init setup_per_cpu_areas(void) #endif /* CONFIG_SMP */ /* + * pcpu_nr_pages - calculate total number of populated backing pages + * + * This reflects the number of pages populated to back chunks. Metadata is + * excluded in the number exposed in meminfo as the number of backing pages + * scales with the number of cpus and can quickly outweigh the memory used for + * metadata. It also keeps this calculation nice and simple. + * + * RETURNS: + * Total number of populated backing pages in use by the allocator. + */ +unsigned long pcpu_nr_pages(void) +{ + return pcpu_nr_populated * pcpu_nr_units; +} + +/* * Percpu allocator is initialized early during boot when neither slab or * workqueue is available. Plug async management until everything is up * and running. -- 2.7.4