cgroup: add cpu.stat file to root cgroup

author Boris Burkov <boris@bur.io>

Wed, 27 May 2020 21:43:19 +0000 (14:43 -0700)

committer Tejun Heo <tj@kernel.org>

Thu, 28 May 2020 14:06:35 +0000 (10:06 -0400)
author Boris Burkov <boris@bur.io>
Wed, 27 May 2020 21:43:19 +0000 (14:43 -0700)
committer Tejun Heo <tj@kernel.org>
Thu, 28 May 2020 14:06:35 +0000 (10:06 -0400)
diff --git a/Documentation/admin-guide/cgroup-v2.rst b/Documentation/admin-guide/cgroup-v2.rst

index bcc8026..341a6c2 100644 (file)
--- a/Documentation/admin-guide/cgroup-v2.rst
+++ b/Documentation/admin-guide/cgroup-v2.rst
@@ -714,9 +714,7 @@ Conventions
  - Settings for a single feature should be contained in a single file.
  
  - The root cgroup should be exempt from resource control and thus
-  shouldn't have resource control interface files.  Also,
-  informational files on the root cgroup which end up showing global
-  information available elsewhere shouldn't exist.
+  shouldn't have resource control interface files.
  
  - The default time unit is microseconds.  If a different unit is ever
    used, an explicit unit suffix must be present.
@@ -985,7 +983,7 @@ CPU Interface Files
  All time durations are in microseconds.
  
    cpu.stat
-       A read-only flat-keyed file which exists on non-root cgroups.
+       A read-only flat-keyed file.
         This file exists whether the controller is enabled or not.
  
         It always reports the following three stats:
diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c

index 7a01674..51924eb 100644 (file)
--- a/kernel/cgroup/cgroup.c
+++ b/kernel/cgroup/cgroup.c
@@ -4874,7 +4874,6 @@ static struct cftype cgroup_base_files[] = {
         },
         {
                 .name = "cpu.stat",
-               .flags = CFTYPE_NOT_ON_ROOT,
                 .seq_show = cpu_stat_show,
         },
  #ifdef CONFIG_PSI
diff --git a/kernel/cgroup/rstat.c b/kernel/cgroup/rstat.c

index 41ca996..b6397a1 100644 (file)
--- a/kernel/cgroup/rstat.c
+++ b/kernel/cgroup/rstat.c
@@ -389,18 +389,62 @@ void __cgroup_account_cputime_field(struct cgroup *cgrp,
         cgroup_base_stat_cputime_account_end(cgrp, rstatc);
  }
  
+/*
+ * compute the cputime for the root cgroup by getting the per cpu data
+ * at a global level, then categorizing the fields in a manner consistent
+ * with how it is done by __cgroup_account_cputime_field for each bit of
+ * cpu time attributed to a cgroup.
+ */
+static void root_cgroup_cputime(struct task_cputime *cputime)
+{
+       int i;
+
+       cputime->stime = 0;
+       cputime->utime = 0;
+       cputime->sum_exec_runtime = 0;
+       for_each_possible_cpu(i) {
+               struct kernel_cpustat kcpustat;
+               u64 *cpustat = kcpustat.cpustat;
+               u64 user = 0;
+               u64 sys = 0;
+
+               kcpustat_cpu_fetch(&kcpustat, i);
+
+               user += cpustat[CPUTIME_USER];
+               user += cpustat[CPUTIME_NICE];
+               cputime->utime += user;
+
+               sys += cpustat[CPUTIME_SYSTEM];
+               sys += cpustat[CPUTIME_IRQ];
+               sys += cpustat[CPUTIME_SOFTIRQ];
+               cputime->stime += sys;
+
+               cputime->sum_exec_runtime += user;
+               cputime->sum_exec_runtime += sys;
+               cputime->sum_exec_runtime += cpustat[CPUTIME_STEAL];
+               cputime->sum_exec_runtime += cpustat[CPUTIME_GUEST];
+               cputime->sum_exec_runtime += cpustat[CPUTIME_GUEST_NICE];
+       }
+}
+
  void cgroup_base_stat_cputime_show(struct seq_file *seq)
  {
         struct cgroup *cgrp = seq_css(seq)->cgroup;
         u64 usage, utime, stime;
-
-       if (!cgroup_parent(cgrp))
-               return;
-
-       cgroup_rstat_flush_hold(cgrp);
-       usage = cgrp->bstat.cputime.sum_exec_runtime;
-       cputime_adjust(&cgrp->bstat.cputime, &cgrp->prev_cputime, &utime, &stime);
-       cgroup_rstat_flush_release();
+       struct task_cputime cputime;
+
+       if (cgroup_parent(cgrp)) {
+               cgroup_rstat_flush_hold(cgrp);
+               usage = cgrp->bstat.cputime.sum_exec_runtime;
+               cputime_adjust(&cgrp->bstat.cputime, &cgrp->prev_cputime,
+                              &utime, &stime);
+               cgroup_rstat_flush_release();
+       } else {
+               root_cgroup_cputime(&cputime);
+               usage = cputime.sum_exec_runtime;
+               utime = cputime.utime;
+               stime = cputime.stime;
+       }
  
         do_div(usage, NSEC_PER_USEC);
         do_div(utime, NSEC_PER_USEC);
author	Boris Burkov <boris@bur.io>
	Wed, 27 May 2020 21:43:19 +0000 (14:43 -0700)
committer	Tejun Heo <tj@kernel.org>
	Thu, 28 May 2020 14:06:35 +0000 (10:06 -0400)
Documentation/admin-guide/cgroup-v2.rst		patch \| blob \| history
kernel/cgroup/cgroup.c		patch \| blob \| history
kernel/cgroup/rstat.c		patch \| blob \| history