mm: vmscan: split khugepaged stats from direct reclaim stats
authorJohannes Weiner <hannes@cmpxchg.org>
Wed, 26 Oct 2022 18:01:33 +0000 (14:01 -0400)
committerAndrew Morton <akpm@linux-foundation.org>
Wed, 30 Nov 2022 23:58:41 +0000 (15:58 -0800)
Direct reclaim stats are useful for identifying a potential source for
application latency, as well as spotting issues with kswapd.  However,
khugepaged currently distorts the picture: as a kernel thread it doesn't
impose allocation latencies on userspace, and it explicitly opts out of
kswapd reclaim.  Its activity showing up in the direct reclaim stats is
misleading.  Counting it as kswapd reclaim could also cause confusion when
trying to understand actual kswapd behavior.

Break out khugepaged from the direct reclaim counters into new
pgsteal_khugepaged, pgdemote_khugepaged, pgscan_khugepaged counters.

Test with a huge executable (CONFIG_READ_ONLY_THP_FOR_FS):

pgsteal_kswapd 1342185
pgsteal_direct 0
pgsteal_khugepaged 3623
pgscan_kswapd 1345025
pgscan_direct 0
pgscan_khugepaged 3623

Link: https://lkml.kernel.org/r/20221026180133.377671-1-hannes@cmpxchg.org
Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
Reported-by: Eric Bergen <ebergen@meta.com>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Yang Shi <shy828301@gmail.com>
Cc: Yosry Ahmed <yosryahmed@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Documentation/admin-guide/cgroup-v2.rst
include/linux/khugepaged.h
include/linux/vm_event_item.h
mm/khugepaged.c
mm/memcontrol.c
mm/vmscan.c
mm/vmstat.c

index dc254a3cb95686e67a7335bad3101313e97eedd9..74cec76be9f2c0a336a89756327e43742bd24528 100644 (file)
@@ -1488,12 +1488,18 @@ PAGE_SIZE multiple when read back.
          pgscan_direct (npn)
                Amount of scanned pages directly  (in an inactive LRU list)
 
+         pgscan_khugepaged (npn)
+               Amount of scanned pages by khugepaged  (in an inactive LRU list)
+
          pgsteal_kswapd (npn)
                Amount of reclaimed pages by kswapd
 
          pgsteal_direct (npn)
                Amount of reclaimed pages directly
 
+         pgsteal_khugepaged (npn)
+               Amount of reclaimed pages by khugepaged
+
          pgfault (npn)
                Total number of page faults incurred
 
index 70162d707caf0c3713d03a801a8c5c54b39c64ab..f68865e19b0b0efad301c72b8749d8deec9d2431 100644 (file)
@@ -15,6 +15,7 @@ extern void __khugepaged_exit(struct mm_struct *mm);
 extern void khugepaged_enter_vma(struct vm_area_struct *vma,
                                 unsigned long vm_flags);
 extern void khugepaged_min_free_kbytes_update(void);
+extern bool current_is_khugepaged(void);
 #ifdef CONFIG_SHMEM
 extern int collapse_pte_mapped_thp(struct mm_struct *mm, unsigned long addr,
                                   bool install_pmd);
@@ -57,6 +58,11 @@ static inline int collapse_pte_mapped_thp(struct mm_struct *mm,
 static inline void khugepaged_min_free_kbytes_update(void)
 {
 }
+
+static inline bool current_is_khugepaged(void)
+{
+       return false;
+}
 #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
 
 #endif /* _LINUX_KHUGEPAGED_H */
index 3518dba1e02f4bdf5f6217a9ea8af2fad38f13af..7f5d1caf5890e4c0cc97058640ff9e92bdc113aa 100644 (file)
@@ -40,10 +40,13 @@ enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT,
                PGREUSE,
                PGSTEAL_KSWAPD,
                PGSTEAL_DIRECT,
+               PGSTEAL_KHUGEPAGED,
                PGDEMOTE_KSWAPD,
                PGDEMOTE_DIRECT,
+               PGDEMOTE_KHUGEPAGED,
                PGSCAN_KSWAPD,
                PGSCAN_DIRECT,
+               PGSCAN_KHUGEPAGED,
                PGSCAN_DIRECT_THROTTLE,
                PGSCAN_ANON,
                PGSCAN_FILE,
index 3703a56571c125e84cebc3058956c9d09681e06b..9c111273bbf91fe7f519ab1bcfbd2a32adbfc542 100644 (file)
@@ -2577,6 +2577,11 @@ void khugepaged_min_free_kbytes_update(void)
        mutex_unlock(&khugepaged_mutex);
 }
 
+bool current_is_khugepaged(void)
+{
+       return kthread_func(current) == khugepaged;
+}
+
 static int madvise_collapse_errno(enum scan_result r)
 {
        /*
index c95e2ed6e7fd0e3a1e296cfbb94a4ce2fd8906b5..23750cec0036ac8972ef9d992860c1fc19dbc974 100644 (file)
@@ -661,8 +661,10 @@ static const unsigned int memcg_vm_event_stat[] = {
        PGPGOUT,
        PGSCAN_KSWAPD,
        PGSCAN_DIRECT,
+       PGSCAN_KHUGEPAGED,
        PGSTEAL_KSWAPD,
        PGSTEAL_DIRECT,
+       PGSTEAL_KHUGEPAGED,
        PGFAULT,
        PGMAJFAULT,
        PGREFILL,
@@ -1574,10 +1576,12 @@ static void memory_stat_format(struct mem_cgroup *memcg, char *buf, int bufsize)
        /* Accumulated memory events */
        seq_buf_printf(&s, "pgscan %lu\n",
                       memcg_events(memcg, PGSCAN_KSWAPD) +
-                      memcg_events(memcg, PGSCAN_DIRECT));
+                      memcg_events(memcg, PGSCAN_DIRECT) +
+                      memcg_events(memcg, PGSCAN_KHUGEPAGED));
        seq_buf_printf(&s, "pgsteal %lu\n",
                       memcg_events(memcg, PGSTEAL_KSWAPD) +
-                      memcg_events(memcg, PGSTEAL_DIRECT));
+                      memcg_events(memcg, PGSTEAL_DIRECT) +
+                      memcg_events(memcg, PGSTEAL_KHUGEPAGED));
 
        for (i = 0; i < ARRAY_SIZE(memcg_vm_event_stat); i++) {
                if (memcg_vm_event_stat[i] == PGPGIN ||
index 55a5b5d66d6815f4bde7d06394bf28a7be2e03a5..d7c71be6417d3366e6ef4fce1cc2226e8beecb07 100644 (file)
@@ -54,6 +54,7 @@
 #include <linux/shmem_fs.h>
 #include <linux/ctype.h>
 #include <linux/debugfs.h>
+#include <linux/khugepaged.h>
 
 #include <asm/tlbflush.h>
 #include <asm/div64.h>
@@ -1047,6 +1048,24 @@ void drop_slab(void)
                drop_slab_node(nid);
 }
 
+static int reclaimer_offset(void)
+{
+       BUILD_BUG_ON(PGSTEAL_DIRECT - PGSTEAL_KSWAPD !=
+                       PGDEMOTE_DIRECT - PGDEMOTE_KSWAPD);
+       BUILD_BUG_ON(PGSTEAL_DIRECT - PGSTEAL_KSWAPD !=
+                       PGSCAN_DIRECT - PGSCAN_KSWAPD);
+       BUILD_BUG_ON(PGSTEAL_KHUGEPAGED - PGSTEAL_KSWAPD !=
+                       PGDEMOTE_KHUGEPAGED - PGDEMOTE_KSWAPD);
+       BUILD_BUG_ON(PGSTEAL_KHUGEPAGED - PGSTEAL_KSWAPD !=
+                       PGSCAN_KHUGEPAGED - PGSCAN_KSWAPD);
+
+       if (current_is_kswapd())
+               return 0;
+       if (current_is_khugepaged())
+               return PGSTEAL_KHUGEPAGED - PGSTEAL_KSWAPD;
+       return PGSTEAL_DIRECT - PGSTEAL_KSWAPD;
+}
+
 static inline int is_page_cache_freeable(struct folio *folio)
 {
        /*
@@ -1599,10 +1618,7 @@ static unsigned int demote_folio_list(struct list_head *demote_folios,
                      (unsigned long)&mtc, MIGRATE_ASYNC, MR_DEMOTION,
                      &nr_succeeded);
 
-       if (current_is_kswapd())
-               __count_vm_events(PGDEMOTE_KSWAPD, nr_succeeded);
-       else
-               __count_vm_events(PGDEMOTE_DIRECT, nr_succeeded);
+       __count_vm_events(PGDEMOTE_KSWAPD + reclaimer_offset(), nr_succeeded);
 
        return nr_succeeded;
 }
@@ -2475,7 +2491,7 @@ static unsigned long shrink_inactive_list(unsigned long nr_to_scan,
                                     &nr_scanned, sc, lru);
 
        __mod_node_page_state(pgdat, NR_ISOLATED_ANON + file, nr_taken);
-       item = current_is_kswapd() ? PGSCAN_KSWAPD : PGSCAN_DIRECT;
+       item = PGSCAN_KSWAPD + reclaimer_offset();
        if (!cgroup_reclaim(sc))
                __count_vm_events(item, nr_scanned);
        __count_memcg_events(lruvec_memcg(lruvec), item, nr_scanned);
@@ -2492,7 +2508,7 @@ static unsigned long shrink_inactive_list(unsigned long nr_to_scan,
        move_folios_to_lru(lruvec, &folio_list);
 
        __mod_node_page_state(pgdat, NR_ISOLATED_ANON + file, -nr_taken);
-       item = current_is_kswapd() ? PGSTEAL_KSWAPD : PGSTEAL_DIRECT;
+       item = PGSTEAL_KSWAPD + reclaimer_offset();
        if (!cgroup_reclaim(sc))
                __count_vm_events(item, nr_reclaimed);
        __count_memcg_events(lruvec_memcg(lruvec), item, nr_reclaimed);
@@ -4871,7 +4887,7 @@ static int scan_folios(struct lruvec *lruvec, struct scan_control *sc,
                        break;
        }
 
-       item = current_is_kswapd() ? PGSCAN_KSWAPD : PGSCAN_DIRECT;
+       item = PGSCAN_KSWAPD + reclaimer_offset();
        if (!cgroup_reclaim(sc)) {
                __count_vm_events(item, isolated);
                __count_vm_events(PGREFILL, sorted);
@@ -5049,7 +5065,7 @@ retry:
        if (walk && walk->batched)
                reset_batch_size(lruvec, walk);
 
-       item = current_is_kswapd() ? PGSTEAL_KSWAPD : PGSTEAL_DIRECT;
+       item = PGSTEAL_KSWAPD + reclaimer_offset();
        if (!cgroup_reclaim(sc))
                __count_vm_events(item, reclaimed);
        __count_memcg_events(memcg, item, reclaimed);
index b2371d745e007f0ac5812c9727ffe636c66cb8c2..1ea6a5ce1c4161b5f41387a82e64c2446ad93a50 100644 (file)
@@ -1271,10 +1271,13 @@ const char * const vmstat_text[] = {
        "pgreuse",
        "pgsteal_kswapd",
        "pgsteal_direct",
+       "pgsteal_khugepaged",
        "pgdemote_kswapd",
        "pgdemote_direct",
+       "pgdemote_khugepaged",
        "pgscan_kswapd",
        "pgscan_direct",
+       "pgscan_khugepaged",
        "pgscan_direct_throttle",
        "pgscan_anon",
        "pgscan_file",