mm: memcontrol: account pagetables per node
authorShakeel Butt <shakeelb@google.com>
Tue, 15 Dec 2020 03:07:17 +0000 (19:07 -0800)
committerLinus Torvalds <torvalds@linux-foundation.org>
Tue, 15 Dec 2020 20:13:40 +0000 (12:13 -0800)
For many workloads, pagetable consumption is significant and it makes
sense to expose it in the memory.stat for the memory cgroups.  However at
the moment, the pagetables are accounted per-zone.  Converting them to
per-node and using the right interface will correctly account for the
memory cgroups as well.

[akpm@linux-foundation.org: export __mod_lruvec_page_state to modules for arch/mips/kvm/]

Link: https://lkml.kernel.org/r/20201130212541.2781790-3-shakeelb@google.com
Signed-off-by: Shakeel Butt <shakeelb@google.com>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Acked-by: Roman Gushchin <guro@fb.com>
Cc: Michal Hocko <mhocko@suse.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Documentation/admin-guide/cgroup-v2.rst
arch/nds32/mm/mm-nds32.c
drivers/base/node.c
fs/proc/meminfo.c
include/linux/mm.h
include/linux/mmzone.h
mm/memcontrol.c
mm/page_alloc.c
mm/vmstat.c

index 515bb13..63521cd 100644 (file)
@@ -1274,6 +1274,9 @@ PAGE_SIZE multiple when read back.
          kernel_stack
                Amount of memory allocated to kernel stacks.
 
+         pagetables
+                Amount of memory allocated for page tables.
+
          percpu(npn)
                Amount of memory used for storing per-cpu kernel
                data structures.
index 55bec50..f2778f2 100644 (file)
@@ -34,8 +34,8 @@ pgd_t *pgd_alloc(struct mm_struct *mm)
        cpu_dcache_wb_range((unsigned long)new_pgd,
                            (unsigned long)new_pgd +
                            PTRS_PER_PGD * sizeof(pgd_t));
-       inc_zone_page_state(virt_to_page((unsigned long *)new_pgd),
-                           NR_PAGETABLE);
+       inc_lruvec_page_state(virt_to_page((unsigned long *)new_pgd),
+                             NR_PAGETABLE);
 
        return new_pgd;
 }
@@ -59,7 +59,7 @@ void pgd_free(struct mm_struct *mm, pgd_t * pgd)
 
        pte = pmd_page(*pmd);
        pmd_clear(pmd);
-       dec_zone_page_state(virt_to_page((unsigned long *)pgd), NR_PAGETABLE);
+       dec_lruvec_page_state(virt_to_page((unsigned long *)pgd), NR_PAGETABLE);
        pte_free(mm, pte);
        mm_dec_nr_ptes(mm);
        pmd_free(mm, pmd);
index 6ffa470..04f71c7 100644 (file)
@@ -450,7 +450,7 @@ static ssize_t node_read_meminfo(struct device *dev,
 #ifdef CONFIG_SHADOW_CALL_STACK
                             nid, node_page_state(pgdat, NR_KERNEL_SCS_KB),
 #endif
-                            nid, K(sum_zone_node_page_state(nid, NR_PAGETABLE)),
+                            nid, K(node_page_state(pgdat, NR_PAGETABLE)),
                             nid, 0UL,
                             nid, K(sum_zone_node_page_state(nid, NR_BOUNCE)),
                             nid, K(node_page_state(pgdat, NR_WRITEBACK_TEMP)),
index 887a553..d6fc746 100644 (file)
@@ -107,7 +107,7 @@ static int meminfo_proc_show(struct seq_file *m, void *v)
                   global_node_page_state(NR_KERNEL_SCS_KB));
 #endif
        show_val_kb(m, "PageTables:     ",
-                   global_zone_page_state(NR_PAGETABLE));
+                   global_node_page_state(NR_PAGETABLE));
 
        show_val_kb(m, "NFS_Unstable:   ", 0);
        show_val_kb(m, "Bounce:         ",
index db6ae4d..5bbbf4a 100644 (file)
@@ -2203,7 +2203,7 @@ static inline bool pgtable_pte_page_ctor(struct page *page)
        if (!ptlock_init(page))
                return false;
        __SetPageTable(page);
-       inc_zone_page_state(page, NR_PAGETABLE);
+       inc_lruvec_page_state(page, NR_PAGETABLE);
        return true;
 }
 
@@ -2211,7 +2211,7 @@ static inline void pgtable_pte_page_dtor(struct page *page)
 {
        ptlock_free(page);
        __ClearPageTable(page);
-       dec_zone_page_state(page, NR_PAGETABLE);
+       dec_lruvec_page_state(page, NR_PAGETABLE);
 }
 
 #define pte_offset_map_lock(mm, pmd, address, ptlp)    \
@@ -2298,7 +2298,7 @@ static inline bool pgtable_pmd_page_ctor(struct page *page)
        if (!pmd_ptlock_init(page))
                return false;
        __SetPageTable(page);
-       inc_zone_page_state(page, NR_PAGETABLE);
+       inc_lruvec_page_state(page, NR_PAGETABLE);
        return true;
 }
 
@@ -2306,7 +2306,7 @@ static inline void pgtable_pmd_page_dtor(struct page *page)
 {
        pmd_ptlock_free(page);
        __ClearPageTable(page);
-       dec_zone_page_state(page, NR_PAGETABLE);
+       dec_lruvec_page_state(page, NR_PAGETABLE);
 }
 
 /*
index fb3bf69..cca2a44 100644 (file)
@@ -152,7 +152,6 @@ enum zone_stat_item {
        NR_ZONE_UNEVICTABLE,
        NR_ZONE_WRITE_PENDING,  /* Count of dirty, writeback and unstable pages */
        NR_MLOCK,               /* mlock()ed pages found and moved off LRU */
-       NR_PAGETABLE,           /* used for pagetables */
        /* Second 128 byte cacheline */
        NR_BOUNCE,
 #if IS_ENABLED(CONFIG_ZSMALLOC)
@@ -207,6 +206,7 @@ enum node_stat_item {
 #if IS_ENABLED(CONFIG_SHADOW_CALL_STACK)
        NR_KERNEL_SCS_KB,       /* measured in KiB */
 #endif
+       NR_PAGETABLE,           /* used for pagetables */
        NR_VM_NODE_STAT_ITEMS
 };
 
index 52837d6..b9419a3 100644 (file)
@@ -869,6 +869,7 @@ void __mod_lruvec_page_state(struct page *page, enum node_stat_item idx,
        lruvec = mem_cgroup_lruvec(head->mem_cgroup, pgdat);
        __mod_lruvec_state(lruvec, idx, val);
 }
+EXPORT_SYMBOL(__mod_lruvec_page_state);
 
 void __mod_lruvec_kmem_state(void *p, enum node_stat_item idx, int val)
 {
@@ -1493,6 +1494,7 @@ static struct memory_stat memory_stats[] = {
        { "anon", PAGE_SIZE, NR_ANON_MAPPED },
        { "file", PAGE_SIZE, NR_FILE_PAGES },
        { "kernel_stack", 1024, NR_KERNEL_STACK_KB },
+       { "pagetables", PAGE_SIZE, NR_PAGETABLE },
        { "percpu", 1, MEMCG_PERCPU_B },
        { "sock", PAGE_SIZE, MEMCG_SOCK },
        { "shmem", PAGE_SIZE, NR_SHMEM },
index eaa227a..743fb2b 100644 (file)
@@ -5465,7 +5465,7 @@ void show_free_areas(unsigned int filter, nodemask_t *nodemask)
                global_node_page_state_pages(NR_SLAB_UNRECLAIMABLE_B),
                global_node_page_state(NR_FILE_MAPPED),
                global_node_page_state(NR_SHMEM),
-               global_zone_page_state(NR_PAGETABLE),
+               global_node_page_state(NR_PAGETABLE),
                global_zone_page_state(NR_BOUNCE),
                global_zone_page_state(NR_FREE_PAGES),
                free_pcp,
@@ -5497,6 +5497,7 @@ void show_free_areas(unsigned int filter, nodemask_t *nodemask)
 #ifdef CONFIG_SHADOW_CALL_STACK
                        " shadow_call_stack:%lukB"
 #endif
+                       " pagetables:%lukB"
                        " all_unreclaimable? %s"
                        "\n",
                        pgdat->node_id,
@@ -5522,6 +5523,7 @@ void show_free_areas(unsigned int filter, nodemask_t *nodemask)
 #ifdef CONFIG_SHADOW_CALL_STACK
                        node_page_state(pgdat, NR_KERNEL_SCS_KB),
 #endif
+                       K(node_page_state(pgdat, NR_PAGETABLE)),
                        pgdat->kswapd_failures >= MAX_RECLAIM_RETRIES ?
                                "yes" : "no");
        }
@@ -5553,7 +5555,6 @@ void show_free_areas(unsigned int filter, nodemask_t *nodemask)
                        " present:%lukB"
                        " managed:%lukB"
                        " mlocked:%lukB"
-                       " pagetables:%lukB"
                        " bounce:%lukB"
                        " free_pcp:%lukB"
                        " local_pcp:%ukB"
@@ -5574,7 +5575,6 @@ void show_free_areas(unsigned int filter, nodemask_t *nodemask)
                        K(zone->present_pages),
                        K(zone_managed_pages(zone)),
                        K(zone_page_state(zone, NR_MLOCK)),
-                       K(zone_page_state(zone, NR_PAGETABLE)),
                        K(zone_page_state(zone, NR_BOUNCE)),
                        K(free_pcp),
                        K(this_cpu_read(zone->pageset->pcp.count)),
index 698bc0b..da36e3b 100644 (file)
@@ -1157,7 +1157,6 @@ const char * const vmstat_text[] = {
        "nr_zone_unevictable",
        "nr_zone_write_pending",
        "nr_mlock",
-       "nr_page_table_pages",
        "nr_bounce",
 #if IS_ENABLED(CONFIG_ZSMALLOC)
        "nr_zspages",
@@ -1215,6 +1214,7 @@ const char * const vmstat_text[] = {
 #if IS_ENABLED(CONFIG_SHADOW_CALL_STACK)
        "nr_shadow_call_stack",
 #endif
+       "nr_page_table_pages",
 
        /* enum writeback_stat_item counters */
        "nr_dirty_threshold",