delayacct: track delays from write-protect copy
authorYang Yang <yang.yang29@zte.com.cn>
Wed, 1 Jun 2022 22:55:25 +0000 (15:55 -0700)
committerakpm <akpm@linux-foundation.org>
Wed, 1 Jun 2022 22:55:25 +0000 (15:55 -0700)
Delay accounting does not track the delay of write-protect copy.  When
tasks trigger many write-protect copys(include COW and unsharing of
anonymous pages[1]), it may spend a amount of time waiting for them.  To
get the delay of tasks in write-protect copy, could help users to evaluate
the impact of using KSM or fork() or GUP.

Also update tools/accounting/getdelays.c:

    / # ./getdelays -dl -p 231
    print delayacct stats ON
    listen forever
    PID     231

    CPU             count     real total  virtual total    delay total  delay average
                     6247     1859000000     2154070021     1674255063          0.268ms
    IO              count    delay total  delay average
                        0              0              0ms
    SWAP            count    delay total  delay average
                        0              0              0ms
    RECLAIM         count    delay total  delay average
                        0              0              0ms
    THRASHING       count    delay total  delay average
                        0              0              0ms
    COMPACT         count    delay total  delay average
                        3          72758              0ms
    WPCOPY          count    delay total  delay average
                     3635      271567604              0ms

[1] commit 31cc5bc4af70("mm: support GUP-triggered unsharing of anonymous pages")

Link: https://lkml.kernel.org/r/20220409014342.2505532-1-yang.yang29@zte.com.cn
Signed-off-by: Yang Yang <yang.yang29@zte.com.cn>
Reviewed-by: David Hildenbrand <david@redhat.com>
Reviewed-by: Jiang Xuexin <jiang.xuexin@zte.com.cn>
Reviewed-by: Ran Xiaokai <ran.xiaokai@zte.com.cn>
Reviewed-by: wangyong <wang.yong12@zte.com.cn>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Balbir Singh <bsingharora@gmail.com>
Cc: Mike Kravetz <mike.kravetz@oracle.com>
Cc: Stephen Rothwell <sfr@canb.auug.org.au>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Documentation/accounting/delay-accounting.rst
include/linux/delayacct.h
include/uapi/linux/taskstats.h
kernel/delayacct.c
mm/hugetlb.c
mm/memory.c
tools/accounting/getdelays.c

index 197fe31..241d1a8 100644 (file)
@@ -15,6 +15,7 @@ c) swapping in pages
 d) memory reclaim
 e) thrashing page cache
 f) direct compact
+g) write-protect copy
 
 and makes these statistics available to userspace through
 the taskstats interface.
@@ -48,7 +49,7 @@ this structure. See
 for a description of the fields pertaining to delay accounting.
 It will generally be in the form of counters returning the cumulative
 delay seen for cpu, sync block I/O, swapin, memory reclaim, thrash page
-cache, direct compact etc.
+cache, direct compact, write-protect copy etc.
 
 Taking the difference of two successive readings of a given
 counter (say cpu_delay_total) for a task will give the delay
@@ -117,6 +118,8 @@ Get sum of delays, since system boot, for all pids with tgid 5::
                            0              0              0ms
        COMPACT         count    delay total  delay average
                            0              0              0ms
+        WPCOPY          count    delay total  delay average
+                            0              0              0ms
 
 Get IO accounting for pid 1, it works only with -p::
 
index 6b16a69..58aea2d 100644 (file)
@@ -45,9 +45,13 @@ struct task_delay_info {
        u64 compact_start;
        u64 compact_delay;      /* wait for memory compact */
 
+       u64 wpcopy_start;
+       u64 wpcopy_delay;       /* wait for write-protect copy */
+
        u32 freepages_count;    /* total count of memory reclaim */
        u32 thrashing_count;    /* total count of thrash waits */
        u32 compact_count;      /* total count of memory compact */
+       u32 wpcopy_count;       /* total count of write-protect copy */
 };
 #endif
 
@@ -75,6 +79,8 @@ extern void __delayacct_swapin_start(void);
 extern void __delayacct_swapin_end(void);
 extern void __delayacct_compact_start(void);
 extern void __delayacct_compact_end(void);
+extern void __delayacct_wpcopy_start(void);
+extern void __delayacct_wpcopy_end(void);
 
 static inline void delayacct_tsk_init(struct task_struct *tsk)
 {
@@ -191,6 +197,24 @@ static inline void delayacct_compact_end(void)
                __delayacct_compact_end();
 }
 
+static inline void delayacct_wpcopy_start(void)
+{
+       if (!static_branch_unlikely(&delayacct_key))
+               return;
+
+       if (current->delays)
+               __delayacct_wpcopy_start();
+}
+
+static inline void delayacct_wpcopy_end(void)
+{
+       if (!static_branch_unlikely(&delayacct_key))
+               return;
+
+       if (current->delays)
+               __delayacct_wpcopy_end();
+}
+
 #else
 static inline void delayacct_init(void)
 {}
@@ -225,6 +249,10 @@ static inline void delayacct_compact_start(void)
 {}
 static inline void delayacct_compact_end(void)
 {}
+static inline void delayacct_wpcopy_start(void)
+{}
+static inline void delayacct_wpcopy_end(void)
+{}
 
 #endif /* CONFIG_TASK_DELAY_ACCT */
 
index 7361541..a7f5b11 100644 (file)
@@ -34,7 +34,7 @@
  */
 
 
-#define TASKSTATS_VERSION      12
+#define TASKSTATS_VERSION      13
 #define TS_COMM_LEN            32      /* should be >= TASK_COMM_LEN
                                         * in linux/sched.h */
 
@@ -194,6 +194,10 @@ struct taskstats {
        __u64   ac_exe_dev;     /* program binary device ID */
        __u64   ac_exe_inode;   /* program binary inode number */
        /* v12 end */
+
+       /* v13: Delay waiting for write-protect copy */
+       __u64    wpcopy_count;
+       __u64    wpcopy_delay_total;
 };
 
 
index 2c1e18f..164ed9e 100644 (file)
@@ -177,11 +177,14 @@ int delayacct_add_tsk(struct taskstats *d, struct task_struct *tsk)
        d->thrashing_delay_total = (tmp < d->thrashing_delay_total) ? 0 : tmp;
        tmp = d->compact_delay_total + tsk->delays->compact_delay;
        d->compact_delay_total = (tmp < d->compact_delay_total) ? 0 : tmp;
+       tmp = d->wpcopy_delay_total + tsk->delays->wpcopy_delay;
+       d->wpcopy_delay_total = (tmp < d->wpcopy_delay_total) ? 0 : tmp;
        d->blkio_count += tsk->delays->blkio_count;
        d->swapin_count += tsk->delays->swapin_count;
        d->freepages_count += tsk->delays->freepages_count;
        d->thrashing_count += tsk->delays->thrashing_count;
        d->compact_count += tsk->delays->compact_count;
+       d->wpcopy_count += tsk->delays->wpcopy_count;
        raw_spin_unlock_irqrestore(&tsk->delays->lock, flags);
 
        return 0;
@@ -249,3 +252,16 @@ void __delayacct_compact_end(void)
                      &current->delays->compact_delay,
                      &current->delays->compact_count);
 }
+
+void __delayacct_wpcopy_start(void)
+{
+       current->delays->wpcopy_start = local_clock();
+}
+
+void __delayacct_wpcopy_end(void)
+{
+       delayacct_end(&current->delays->lock,
+                     &current->delays->wpcopy_start,
+                     &current->delays->wpcopy_delay,
+                     &current->delays->wpcopy_count);
+}
index 7c468ac..a57e1be 100644 (file)
@@ -32,6 +32,7 @@
 #include <linux/cma.h>
 #include <linux/migrate.h>
 #include <linux/nospec.h>
+#include <linux/delayacct.h>
 
 #include <asm/page.h>
 #include <asm/pgalloc.h>
@@ -5230,6 +5231,8 @@ static vm_fault_t hugetlb_wp(struct mm_struct *mm, struct vm_area_struct *vma,
        pte = huge_ptep_get(ptep);
        old_page = pte_page(pte);
 
+       delayacct_wpcopy_start();
+
 retry_avoidcopy:
        /*
         * If no-one else is actually using this page, we're the exclusive
@@ -5240,6 +5243,8 @@ retry_avoidcopy:
                        page_move_anon_rmap(old_page, vma);
                if (likely(!unshare))
                        set_huge_ptep_writable(vma, haddr, ptep);
+
+               delayacct_wpcopy_end();
                return 0;
        }
        VM_BUG_ON_PAGE(PageAnon(old_page) && PageAnonExclusive(old_page),
@@ -5309,6 +5314,7 @@ retry_avoidcopy:
                         * race occurs while re-acquiring page table
                         * lock, and our job is done.
                         */
+                       delayacct_wpcopy_end();
                        return 0;
                }
 
@@ -5367,6 +5373,8 @@ out_release_old:
        put_page(old_page);
 
        spin_lock(ptl); /* Caller expects lock to be held */
+
+       delayacct_wpcopy_end();
        return ret;
 }
 
index 21dadf0..7a08914 100644 (file)
@@ -3090,6 +3090,8 @@ static vm_fault_t wp_page_copy(struct vm_fault *vmf)
        int page_copied = 0;
        struct mmu_notifier_range range;
 
+       delayacct_wpcopy_start();
+
        if (unlikely(anon_vma_prepare(vma)))
                goto oom;
 
@@ -3114,6 +3116,8 @@ static vm_fault_t wp_page_copy(struct vm_fault *vmf)
                        put_page(new_page);
                        if (old_page)
                                put_page(old_page);
+
+                       delayacct_wpcopy_end();
                        return 0;
                }
        }
@@ -3220,12 +3224,16 @@ static vm_fault_t wp_page_copy(struct vm_fault *vmf)
                        free_swap_cache(old_page);
                put_page(old_page);
        }
+
+       delayacct_wpcopy_end();
        return (page_copied && !unshare) ? VM_FAULT_WRITE : 0;
 oom_free_new:
        put_page(new_page);
 oom:
        if (old_page)
                put_page(old_page);
+
+       delayacct_wpcopy_end();
        return VM_FAULT_OOM;
 }
 
index 11e8673..e83e6e4 100644 (file)
@@ -207,6 +207,8 @@ static void print_delayacct(struct taskstats *t)
               "THRASHING%12s%15s%15s\n"
               "      %15llu%15llu%15llums\n"
               "COMPACT  %12s%15s%15s\n"
+              "      %15llu%15llu%15llums\n"
+              "WPCOPY   %12s%15s%15s\n"
               "      %15llu%15llu%15llums\n",
               "count", "real total", "virtual total",
               "delay total", "delay average",
@@ -234,7 +236,11 @@ static void print_delayacct(struct taskstats *t)
               "count", "delay total", "delay average",
               (unsigned long long)t->compact_count,
               (unsigned long long)t->compact_delay_total,
-              average_ms(t->compact_delay_total, t->compact_count));
+              average_ms(t->compact_delay_total, t->compact_count),
+              "count", "delay total", "delay average",
+              (unsigned long long)t->wpcopy_count,
+              (unsigned long long)t->wpcopy_delay_total,
+              average_ms(t->wpcopy_delay_total, t->wpcopy_count));
 }
 
 static void task_context_switch_counts(struct taskstats *t)