mm: memcg: convert vmstat slab counters to bytes
authorRoman Gushchin <guro@fb.com>
Fri, 7 Aug 2020 06:20:39 +0000 (23:20 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Fri, 7 Aug 2020 18:33:24 +0000 (11:33 -0700)
In order to prepare for per-object slab memory accounting, convert
NR_SLAB_RECLAIMABLE and NR_SLAB_UNRECLAIMABLE vmstat items to bytes.

To make it obvious, rename them to NR_SLAB_RECLAIMABLE_B and
NR_SLAB_UNRECLAIMABLE_B (similar to NR_KERNEL_STACK_KB).

Internally global and per-node counters are stored in pages, however memcg
and lruvec counters are stored in bytes.  This scheme may look weird, but
only for now.  As soon as slab pages will be shared between multiple
cgroups, global and node counters will reflect the total number of slab
pages.  However memcg and lruvec counters will be used for per-memcg slab
memory tracking, which will take separate kernel objects in the account.
Keeping global and node counters in pages helps to avoid additional
overhead.

The size of slab memory shouldn't exceed 4Gb on 32-bit machines, so it
will fit into atomic_long_t we use for vmstats.

Signed-off-by: Roman Gushchin <guro@fb.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Reviewed-by: Shakeel Butt <shakeelb@google.com>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
Cc: Christoph Lameter <cl@linux.com>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: Tejun Heo <tj@kernel.org>
Link: http://lkml.kernel.org/r/20200623174037.3951353-4-guro@fb.com
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
13 files changed:
drivers/base/node.c
fs/proc/meminfo.c
include/linux/mmzone.h
kernel/power/snapshot.c
mm/memcontrol.c
mm/oom_kill.c
mm/page_alloc.c
mm/slab.h
mm/slab_common.c
mm/slob.c
mm/slub.c
mm/vmscan.c
mm/workingset.c

index e21e313592970ef51e10f4c1283c4b27586b5a89..0cf13e31603c9dcf71b6ce23b8da144595252459 100644 (file)
@@ -368,8 +368,8 @@ static ssize_t node_read_meminfo(struct device *dev,
        unsigned long sreclaimable, sunreclaimable;
 
        si_meminfo_node(&i, nid);
-       sreclaimable = node_page_state(pgdat, NR_SLAB_RECLAIMABLE);
-       sunreclaimable = node_page_state(pgdat, NR_SLAB_UNRECLAIMABLE);
+       sreclaimable = node_page_state_pages(pgdat, NR_SLAB_RECLAIMABLE_B);
+       sunreclaimable = node_page_state_pages(pgdat, NR_SLAB_UNRECLAIMABLE_B);
        n = sprintf(buf,
                       "Node %d MemTotal:       %8lu kB\n"
                       "Node %d MemFree:        %8lu kB\n"
index e9a6841fc25bcfdf460ad7a0818e85de53a908ae..38ea95fd919a5d65f3d7be2074ecb92fb2ecbf61 100644 (file)
@@ -52,8 +52,8 @@ static int meminfo_proc_show(struct seq_file *m, void *v)
                pages[lru] = global_node_page_state(NR_LRU_BASE + lru);
 
        available = si_mem_available();
-       sreclaimable = global_node_page_state(NR_SLAB_RECLAIMABLE);
-       sunreclaim = global_node_page_state(NR_SLAB_UNRECLAIMABLE);
+       sreclaimable = global_node_page_state_pages(NR_SLAB_RECLAIMABLE_B);
+       sunreclaim = global_node_page_state_pages(NR_SLAB_UNRECLAIMABLE_B);
 
        show_val_kb(m, "MemTotal:       ", i.totalram);
        show_val_kb(m, "MemFree:        ", i.freeram);
index f16306e15b986c6557e381025f5ddc98a3371b4c..b79100edd2285e1211c1c5bfc280e0e17240d874 100644 (file)
@@ -174,8 +174,8 @@ enum node_stat_item {
        NR_INACTIVE_FILE,       /*  "     "     "   "       "         */
        NR_ACTIVE_FILE,         /*  "     "     "   "       "         */
        NR_UNEVICTABLE,         /*  "     "     "   "       "         */
-       NR_SLAB_RECLAIMABLE,
-       NR_SLAB_UNRECLAIMABLE,
+       NR_SLAB_RECLAIMABLE_B,
+       NR_SLAB_UNRECLAIMABLE_B,
        NR_ISOLATED_ANON,       /* Temporary isolated pages from anon lru */
        NR_ISOLATED_FILE,       /* Temporary isolated pages from file lru */
        WORKINGSET_NODES,
@@ -213,7 +213,17 @@ enum node_stat_item {
  */
 static __always_inline bool vmstat_item_in_bytes(int idx)
 {
-       return false;
+       /*
+        * Global and per-node slab counters track slab pages.
+        * It's expected that changes are multiples of PAGE_SIZE.
+        * Internally values are stored in pages.
+        *
+        * Per-memcg and per-lruvec counters track memory, consumed
+        * by individual slab objects. These counters are actually
+        * byte-precise.
+        */
+       return (idx == NR_SLAB_RECLAIMABLE_B ||
+               idx == NR_SLAB_UNRECLAIMABLE_B);
 }
 
 /*
index cef154261fe2f9c64b1775b5a09a78e0b9d73f6d..d25749bce7cf889444559b57c9aae83e4a77c184 100644 (file)
@@ -1663,7 +1663,7 @@ static unsigned long minimum_image_size(unsigned long saveable)
 {
        unsigned long size;
 
-       size = global_node_page_state(NR_SLAB_RECLAIMABLE)
+       size = global_node_page_state_pages(NR_SLAB_RECLAIMABLE_B)
                + global_node_page_state(NR_ACTIVE_ANON)
                + global_node_page_state(NR_INACTIVE_ANON)
                + global_node_page_state(NR_ACTIVE_FILE)
index 61ae6658d59fad1860f6c86b827147bb3c60e278..328b7e7bf9ab1b8d99c9fbc4c26bf6d54ea8ab2a 100644 (file)
@@ -1391,9 +1391,8 @@ static char *memory_stat_format(struct mem_cgroup *memcg)
                       (u64)memcg_page_state(memcg, MEMCG_KERNEL_STACK_KB) *
                       1024);
        seq_buf_printf(&s, "slab %llu\n",
-                      (u64)(memcg_page_state(memcg, NR_SLAB_RECLAIMABLE) +
-                            memcg_page_state(memcg, NR_SLAB_UNRECLAIMABLE)) *
-                      PAGE_SIZE);
+                      (u64)(memcg_page_state(memcg, NR_SLAB_RECLAIMABLE_B) +
+                            memcg_page_state(memcg, NR_SLAB_UNRECLAIMABLE_B)));
        seq_buf_printf(&s, "sock %llu\n",
                       (u64)memcg_page_state(memcg, MEMCG_SOCK) *
                       PAGE_SIZE);
@@ -1423,11 +1422,9 @@ static char *memory_stat_format(struct mem_cgroup *memcg)
                               PAGE_SIZE);
 
        seq_buf_printf(&s, "slab_reclaimable %llu\n",
-                      (u64)memcg_page_state(memcg, NR_SLAB_RECLAIMABLE) *
-                      PAGE_SIZE);
+                      (u64)memcg_page_state(memcg, NR_SLAB_RECLAIMABLE_B));
        seq_buf_printf(&s, "slab_unreclaimable %llu\n",
-                      (u64)memcg_page_state(memcg, NR_SLAB_UNRECLAIMABLE) *
-                      PAGE_SIZE);
+                      (u64)memcg_page_state(memcg, NR_SLAB_UNRECLAIMABLE_B));
 
        /* Accumulated memory events */
 
index 6e94962893ee8432a65945a497fab1596c1a1895..d30ce75f23fb772e0a7a74eb6aa576bc9275ee6c 100644 (file)
@@ -184,7 +184,7 @@ static bool is_dump_unreclaim_slabs(void)
                 global_node_page_state(NR_ISOLATED_FILE) +
                 global_node_page_state(NR_UNEVICTABLE);
 
-       return (global_node_page_state(NR_SLAB_UNRECLAIMABLE) > nr_lru);
+       return (global_node_page_state_pages(NR_SLAB_UNRECLAIMABLE_B) > nr_lru);
 }
 
 /**
index 901a21f61d68acb31633ac2cbdd32b36ceb8f530..f9ad093814d2560a57db05ec526c34c96d937405 100644 (file)
@@ -5220,8 +5220,8 @@ long si_mem_available(void)
         * items that are in use, and cannot be freed. Cap this estimate at the
         * low watermark.
         */
-       reclaimable = global_node_page_state(NR_SLAB_RECLAIMABLE) +
-                       global_node_page_state(NR_KERNEL_MISC_RECLAIMABLE);
+       reclaimable = global_node_page_state_pages(NR_SLAB_RECLAIMABLE_B) +
+               global_node_page_state(NR_KERNEL_MISC_RECLAIMABLE);
        available += reclaimable - min(reclaimable / 2, wmark_low);
 
        if (available < 0)
@@ -5364,8 +5364,8 @@ void show_free_areas(unsigned int filter, nodemask_t *nodemask)
                global_node_page_state(NR_UNEVICTABLE),
                global_node_page_state(NR_FILE_DIRTY),
                global_node_page_state(NR_WRITEBACK),
-               global_node_page_state(NR_SLAB_RECLAIMABLE),
-               global_node_page_state(NR_SLAB_UNRECLAIMABLE),
+               global_node_page_state_pages(NR_SLAB_RECLAIMABLE_B),
+               global_node_page_state_pages(NR_SLAB_UNRECLAIMABLE_B),
                global_node_page_state(NR_FILE_MAPPED),
                global_node_page_state(NR_SHMEM),
                global_zone_page_state(NR_PAGETABLE),
index fceb4341ba910aaa0bfe1e0a284537f59c4a070f..09be3ca6fe878f1c90bf1fa86c9b14cfb146de82 100644 (file)
--- a/mm/slab.h
+++ b/mm/slab.h
@@ -273,7 +273,7 @@ int __kmem_cache_alloc_bulk(struct kmem_cache *, gfp_t, size_t, void **);
 static inline int cache_vmstat_idx(struct kmem_cache *s)
 {
        return (s->flags & SLAB_RECLAIM_ACCOUNT) ?
-               NR_SLAB_RECLAIMABLE : NR_SLAB_UNRECLAIMABLE;
+               NR_SLAB_RECLAIMABLE_B : NR_SLAB_UNRECLAIMABLE_B;
 }
 
 #ifdef CONFIG_SLUB_DEBUG
@@ -390,7 +390,7 @@ static __always_inline int memcg_charge_slab(struct page *page,
 
        if (unlikely(!memcg || mem_cgroup_is_root(memcg))) {
                mod_node_page_state(page_pgdat(page), cache_vmstat_idx(s),
-                                   nr_pages);
+                                   nr_pages << PAGE_SHIFT);
                percpu_ref_get_many(&s->memcg_params.refcnt, nr_pages);
                return 0;
        }
@@ -400,7 +400,7 @@ static __always_inline int memcg_charge_slab(struct page *page,
                goto out;
 
        lruvec = mem_cgroup_lruvec(memcg, page_pgdat(page));
-       mod_lruvec_state(lruvec, cache_vmstat_idx(s), nr_pages);
+       mod_lruvec_state(lruvec, cache_vmstat_idx(s), nr_pages << PAGE_SHIFT);
 
        /* transer try_charge() page references to kmem_cache */
        percpu_ref_get_many(&s->memcg_params.refcnt, nr_pages);
@@ -425,11 +425,12 @@ static __always_inline void memcg_uncharge_slab(struct page *page, int order,
        memcg = READ_ONCE(s->memcg_params.memcg);
        if (likely(!mem_cgroup_is_root(memcg))) {
                lruvec = mem_cgroup_lruvec(memcg, page_pgdat(page));
-               mod_lruvec_state(lruvec, cache_vmstat_idx(s), -nr_pages);
+               mod_lruvec_state(lruvec, cache_vmstat_idx(s),
+                                -(nr_pages << PAGE_SHIFT));
                memcg_kmem_uncharge(memcg, nr_pages);
        } else {
                mod_node_page_state(page_pgdat(page), cache_vmstat_idx(s),
-                                   -nr_pages);
+                                   -(nr_pages << PAGE_SHIFT));
        }
        rcu_read_unlock();
 
@@ -513,7 +514,7 @@ static __always_inline int charge_slab_page(struct page *page,
 {
        if (is_root_cache(s)) {
                mod_node_page_state(page_pgdat(page), cache_vmstat_idx(s),
-                                   1 << order);
+                                   PAGE_SIZE << order);
                return 0;
        }
 
@@ -525,7 +526,7 @@ static __always_inline void uncharge_slab_page(struct page *page, int order,
 {
        if (is_root_cache(s)) {
                mod_node_page_state(page_pgdat(page), cache_vmstat_idx(s),
-                                   -(1 << order));
+                                   -(PAGE_SIZE << order));
                return;
        }
 
index 616ec8a0d91a5187925a01c704dbf18a7f4d9373..a73f168b103535a88bdba8d15a710eadc5b05149 100644 (file)
@@ -1363,8 +1363,8 @@ void *kmalloc_order(size_t size, gfp_t flags, unsigned int order)
        page = alloc_pages(flags, order);
        if (likely(page)) {
                ret = page_address(page);
-               mod_node_page_state(page_pgdat(page), NR_SLAB_UNRECLAIMABLE,
-                                   1 << order);
+               mod_node_page_state(page_pgdat(page), NR_SLAB_UNRECLAIMABLE_B,
+                                   PAGE_SIZE << order);
        }
        ret = kasan_kmalloc_large(ret, size, flags);
        /* As ret might get tagged, call kmemleak hook after KASAN. */
index ac2aecfbc7a828d4494272245809984bab340c3c..7cc9805c8091155f8c77be8d727ea630d2a3a2dc 100644 (file)
--- a/mm/slob.c
+++ b/mm/slob.c
@@ -202,8 +202,8 @@ static void *slob_new_pages(gfp_t gfp, int order, int node)
        if (!page)
                return NULL;
 
-       mod_node_page_state(page_pgdat(page), NR_SLAB_UNRECLAIMABLE,
-                           1 << order);
+       mod_node_page_state(page_pgdat(page), NR_SLAB_UNRECLAIMABLE_B,
+                           PAGE_SIZE << order);
        return page_address(page);
 }
 
@@ -214,8 +214,8 @@ static void slob_free_pages(void *b, int order)
        if (current->reclaim_state)
                current->reclaim_state->reclaimed_slab += 1 << order;
 
-       mod_node_page_state(page_pgdat(sp), NR_SLAB_UNRECLAIMABLE,
-                           -(1 << order));
+       mod_node_page_state(page_pgdat(sp), NR_SLAB_UNRECLAIMABLE_B,
+                           -(PAGE_SIZE << order));
        __free_pages(sp, order);
 }
 
@@ -552,8 +552,8 @@ void kfree(const void *block)
                slob_free(m, *m + align);
        } else {
                unsigned int order = compound_order(sp);
-               mod_node_page_state(page_pgdat(sp), NR_SLAB_UNRECLAIMABLE,
-                                   -(1 << order));
+               mod_node_page_state(page_pgdat(sp), NR_SLAB_UNRECLAIMABLE_B,
+                                   -(PAGE_SIZE << order));
                __free_pages(sp, order);
 
        }
index ae39eb392396f403ce7762f118162988b52e7fbc..2d73d677f7acd1728051440976f24034995a34d9 100644 (file)
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -3991,8 +3991,8 @@ static void *kmalloc_large_node(size_t size, gfp_t flags, int node)
        page = alloc_pages_node(node, flags, order);
        if (page) {
                ptr = page_address(page);
-               mod_node_page_state(page_pgdat(page), NR_SLAB_UNRECLAIMABLE,
-                                   1 << order);
+               mod_node_page_state(page_pgdat(page), NR_SLAB_UNRECLAIMABLE_B,
+                                   PAGE_SIZE << order);
        }
 
        return kmalloc_large_node_hook(ptr, size, flags);
@@ -4123,8 +4123,8 @@ void kfree(const void *x)
 
                BUG_ON(!PageCompound(page));
                kfree_hook(object);
-               mod_node_page_state(page_pgdat(page), NR_SLAB_UNRECLAIMABLE,
-                                   -(1 << order));
+               mod_node_page_state(page_pgdat(page), NR_SLAB_UNRECLAIMABLE_B,
+                                   -(PAGE_SIZE << order));
                __free_pages(page, order);
                return;
        }
index 749d239c62b2b714fc73aac9db6044a4ab575065..2ac43664aba4bf8b54e9614a22fe8b9197bef982 100644 (file)
@@ -4222,7 +4222,8 @@ int node_reclaim(struct pglist_data *pgdat, gfp_t gfp_mask, unsigned int order)
         * unmapped file backed pages.
         */
        if (node_pagecache_reclaimable(pgdat) <= pgdat->min_unmapped_pages &&
-           node_page_state(pgdat, NR_SLAB_RECLAIMABLE) <= pgdat->min_slab_pages)
+           node_page_state_pages(pgdat, NR_SLAB_RECLAIMABLE_B) <=
+           pgdat->min_slab_pages)
                return NODE_RECLAIM_FULL;
 
        /*
index 50b7937bab32fe6b16e2a53a2ddeada8d3710144..b199726924ddd094f56c8421d1a85cba7f2069c4 100644 (file)
@@ -486,8 +486,10 @@ static unsigned long count_shadow_nodes(struct shrinker *shrinker,
                for (pages = 0, i = 0; i < NR_LRU_LISTS; i++)
                        pages += lruvec_page_state_local(lruvec,
                                                         NR_LRU_BASE + i);
-               pages += lruvec_page_state_local(lruvec, NR_SLAB_RECLAIMABLE);
-               pages += lruvec_page_state_local(lruvec, NR_SLAB_UNRECLAIMABLE);
+               pages += lruvec_page_state_local(
+                       lruvec, NR_SLAB_RECLAIMABLE_B) >> PAGE_SHIFT;
+               pages += lruvec_page_state_local(
+                       lruvec, NR_SLAB_UNRECLAIMABLE_B) >> PAGE_SHIFT;
        } else
 #endif
                pages = node_present_pages(sc->nid);