cgroup: add pids.peak interface for pids controller
authorJosh Don <joshdon@google.com>
Wed, 31 Aug 2022 22:49:03 +0000 (15:49 -0700)
committerTejun Heo <tj@kernel.org>
Sun, 4 Sep 2022 19:26:51 +0000 (09:26 -1000)
pids.peak tracks the high watermark of usage for number of pids. This
helps give a better baseline on which to set pids.max. Polling
pids.current isn't really feasible, since it would potentially miss
short-lived spikes.

This interface is analogous to memory.peak.

Signed-off-by: Josh Don <joshdon@google.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
kernel/cgroup/pids.c

index 511af87..7695e60 100644 (file)
@@ -47,6 +47,7 @@ struct pids_cgroup {
         */
        atomic64_t                      counter;
        atomic64_t                      limit;
+       int64_t                         watermark;
 
        /* Handle for "pids.events" */
        struct cgroup_file              events_file;
@@ -85,6 +86,16 @@ static void pids_css_free(struct cgroup_subsys_state *css)
        kfree(css_pids(css));
 }
 
+static void pids_update_watermark(struct pids_cgroup *p, int64_t nr_pids)
+{
+       /*
+        * This is racy, but we don't need perfectly accurate tallying of
+        * the watermark, and this lets us avoid extra atomic overhead.
+        */
+       if (nr_pids > READ_ONCE(p->watermark))
+               WRITE_ONCE(p->watermark, nr_pids);
+}
+
 /**
  * pids_cancel - uncharge the local pid count
  * @pids: the pid cgroup state
@@ -128,8 +139,11 @@ static void pids_charge(struct pids_cgroup *pids, int num)
 {
        struct pids_cgroup *p;
 
-       for (p = pids; parent_pids(p); p = parent_pids(p))
-               atomic64_add(num, &p->counter);
+       for (p = pids; parent_pids(p); p = parent_pids(p)) {
+               int64_t new = atomic64_add_return(num, &p->counter);
+
+               pids_update_watermark(p, new);
+       }
 }
 
 /**
@@ -156,6 +170,12 @@ static int pids_try_charge(struct pids_cgroup *pids, int num)
                 */
                if (new > limit)
                        goto revert;
+
+               /*
+                * Not technically accurate if we go over limit somewhere up
+                * the hierarchy, but that's tolerable for the watermark.
+                */
+               pids_update_watermark(p, new);
        }
 
        return 0;
@@ -311,6 +331,14 @@ static s64 pids_current_read(struct cgroup_subsys_state *css,
        return atomic64_read(&pids->counter);
 }
 
+static s64 pids_peak_read(struct cgroup_subsys_state *css,
+                         struct cftype *cft)
+{
+       struct pids_cgroup *pids = css_pids(css);
+
+       return READ_ONCE(pids->watermark);
+}
+
 static int pids_events_show(struct seq_file *sf, void *v)
 {
        struct pids_cgroup *pids = css_pids(seq_css(sf));
@@ -332,6 +360,11 @@ static struct cftype pids_files[] = {
                .flags = CFTYPE_NOT_ON_ROOT,
        },
        {
+               .name = "peak",
+               .flags = CFTYPE_NOT_ON_ROOT,
+               .read_s64 = pids_peak_read,
+       },
+       {
                .name = "events",
                .seq_show = pids_events_show,
                .file_offset = offsetof(struct pids_cgroup, events_file),