cgroup: cgroup v2 freezer

author Roman Gushchin <guro@fb.com>

Fri, 19 Apr 2019 17:03:04 +0000 (10:03 -0700)

committer Tejun Heo <tj@kernel.org>

Fri, 19 Apr 2019 18:26:48 +0000 (11:26 -0700)
author Roman Gushchin <guro@fb.com>
Fri, 19 Apr 2019 17:03:04 +0000 (10:03 -0700)
committer Tejun Heo <tj@kernel.org>
Fri, 19 Apr 2019 18:26:48 +0000 (11:26 -0700)
diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h

index 7d57890..77258d2 100644 (file)
--- a/include/linux/cgroup-defs.h
+++ b/include/linux/cgroup-defs.h
@@ -65,6 +65,12 @@ enum {
          * specified at mount time and thus is implemented here.
          */
         CGRP_CPUSET_CLONE_CHILDREN,
+
+       /* Control group has to be frozen. */
+       CGRP_FREEZE,
+
+       /* Cgroup is frozen. */
+       CGRP_FROZEN,
  };
  
  /* cgroup_root->flags */
@@ -317,6 +323,25 @@ struct cgroup_rstat_cpu {
         struct cgroup *updated_next;            /* NULL iff not on the list */
  };
  
+struct cgroup_freezer_state {
+       /* Should the cgroup and its descendants be frozen. */
+       bool freeze;
+
+       /* Should the cgroup actually be frozen? */
+       int e_freeze;
+
+       /* Fields below are protected by css_set_lock */
+
+       /* Number of frozen descendant cgroups */
+       int nr_frozen_descendants;
+
+       /*
+        * Number of tasks, which are counted as frozen:
+        * frozen, SIGSTOPped, and PTRACEd.
+        */
+       int nr_frozen_tasks;
+};
+
  struct cgroup {
         /* self css with NULL ->ss, points back to this cgroup */
         struct cgroup_subsys_state self;
@@ -453,6 +478,9 @@ struct cgroup {
         /* If there is block congestion on this cgroup. */
         atomic_t congestion_count;
  
+       /* Used to store internal freezer state */
+       struct cgroup_freezer_state freezer;
+
         /* ids of the ancestors at each level including self */
         int ancestor_ids[];
  };
diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h

index 81f58b4..3e2efd4 100644 (file)
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -881,4 +881,47 @@ static inline void put_cgroup_ns(struct cgroup_namespace *ns)
                 free_cgroup_ns(ns);
  }
  
+#ifdef CONFIG_CGROUPS
+
+void cgroup_enter_frozen(void);
+void cgroup_leave_frozen(bool always_leave);
+void cgroup_update_frozen(struct cgroup *cgrp);
+void cgroup_freeze(struct cgroup *cgrp, bool freeze);
+void cgroup_freezer_migrate_task(struct task_struct *task, struct cgroup *src,
+                                struct cgroup *dst);
+void cgroup_freezer_frozen_exit(struct task_struct *task);
+static inline bool cgroup_task_freeze(struct task_struct *task)
+{
+       bool ret;
+
+       if (task->flags & PF_KTHREAD)
+               return false;
+
+       rcu_read_lock();
+       ret = test_bit(CGRP_FREEZE, &task_dfl_cgroup(task)->flags);
+       rcu_read_unlock();
+
+       return ret;
+}
+
+static inline bool cgroup_task_frozen(struct task_struct *task)
+{
+       return task->frozen;
+}
+
+#else /* !CONFIG_CGROUPS */
+
+static inline void cgroup_enter_frozen(void) { }
+static inline void cgroup_leave_frozen(bool always_leave) { }
+static inline bool cgroup_task_freeze(struct task_struct *task)
+{
+       return false;
+}
+static inline bool cgroup_task_frozen(struct task_struct *task)
+{
+       return false;
+}
+
+#endif /* !CONFIG_CGROUPS */
+
  #endif /* _LINUX_CGROUP_H */
diff --git a/include/linux/sched.h b/include/linux/sched.h

index 1549584..45b2199 100644 (file)
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -726,6 +726,8 @@ struct task_struct {
  #ifdef CONFIG_CGROUPS
         /* disallow userland-initiated cgroup migration */
         unsigned                        no_cgroup_migration:1;
+       /* task is frozen/stopped (used by the cgroup freezer) */
+       unsigned                        frozen:1;
  #endif
  #ifdef CONFIG_BLK_CGROUP
         /* to be used once the psi infrastructure lands upstream. */
diff --git a/include/linux/sched/jobctl.h b/include/linux/sched/jobctl.h

index 98228bd..fa067de 100644 (file)
--- a/include/linux/sched/jobctl.h
+++ b/include/linux/sched/jobctl.h
@@ -18,6 +18,7 @@ struct task_struct;
  #define JOBCTL_TRAP_NOTIFY_BIT 20      /* trap for NOTIFY */
  #define JOBCTL_TRAPPING_BIT    21      /* switching to TRACED */
  #define JOBCTL_LISTENING_BIT   22      /* ptracer is listening for events */
+#define JOBCTL_TRAP_FREEZE_BIT 23      /* trap for cgroup freezer */
  
  #define JOBCTL_STOP_DEQUEUED   (1UL << JOBCTL_STOP_DEQUEUED_BIT)
  #define JOBCTL_STOP_PENDING    (1UL << JOBCTL_STOP_PENDING_BIT)
@@ -26,6 +27,7 @@ struct task_struct;
  #define JOBCTL_TRAP_NOTIFY     (1UL << JOBCTL_TRAP_NOTIFY_BIT)
  #define JOBCTL_TRAPPING                (1UL << JOBCTL_TRAPPING_BIT)
  #define JOBCTL_LISTENING       (1UL << JOBCTL_LISTENING_BIT)
+#define JOBCTL_TRAP_FREEZE     (1UL << JOBCTL_TRAP_FREEZE_BIT)
  
  #define JOBCTL_TRAP_MASK       (JOBCTL_TRAP_STOP | JOBCTL_TRAP_NOTIFY)
  #define JOBCTL_PENDING_MASK    (JOBCTL_STOP_PENDING | JOBCTL_TRAP_MASK)
diff --git a/kernel/cgroup/Makefile b/kernel/cgroup/Makefile

index 8d5689c..5d7a76b 100644 (file)
--- a/kernel/cgroup/Makefile
+++ b/kernel/cgroup/Makefile
@@ -1,5 +1,5 @@
  # SPDX-License-Identifier: GPL-2.0
-obj-y := cgroup.o rstat.o namespace.o cgroup-v1.o
+obj-y := cgroup.o rstat.o namespace.o cgroup-v1.o freezer.o
  
  obj-$(CONFIG_CGROUP_FREEZER) += legacy_freezer.o
  obj-$(CONFIG_CGROUP_PIDS) += pids.o
diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c

index 786ceef..6895464 100644 (file)
--- a/kernel/cgroup/cgroup.c
+++ b/kernel/cgroup/cgroup.c
@@ -2435,8 +2435,15 @@ static int cgroup_migrate_execute(struct cgroup_mgctx *mgctx)
                         get_css_set(to_cset);
                         to_cset->nr_tasks++;
                         css_set_move_task(task, from_cset, to_cset, true);
-                       put_css_set_locked(from_cset);
                         from_cset->nr_tasks--;
+                       /*
+                        * If the source or destination cgroup is frozen,
+                        * the task might require to change its state.
+                        */
+                       cgroup_freezer_migrate_task(task, from_cset->dfl_cgrp,
+                                                   to_cset->dfl_cgrp);
+                       put_css_set_locked(from_cset);
+
                 }
         }
         spin_unlock_irq(&css_set_lock);
@@ -3477,8 +3484,11 @@ static ssize_t cgroup_max_depth_write(struct kernfs_open_file *of,
  
  static int cgroup_events_show(struct seq_file *seq, void *v)
  {
-       seq_printf(seq, "populated %d\n",
-                  cgroup_is_populated(seq_css(seq)->cgroup));
+       struct cgroup *cgrp = seq_css(seq)->cgroup;
+
+       seq_printf(seq, "populated %d\n", cgroup_is_populated(cgrp));
+       seq_printf(seq, "frozen %d\n", test_bit(CGRP_FROZEN, &cgrp->flags));
+
         return 0;
  }
  
@@ -3540,6 +3550,40 @@ static int cgroup_cpu_pressure_show(struct seq_file *seq, void *v)
  }
  #endif
  
+static int cgroup_freeze_show(struct seq_file *seq, void *v)
+{
+       struct cgroup *cgrp = seq_css(seq)->cgroup;
+
+       seq_printf(seq, "%d\n", cgrp->freezer.freeze);
+
+       return 0;
+}
+
+static ssize_t cgroup_freeze_write(struct kernfs_open_file *of,
+                                  char *buf, size_t nbytes, loff_t off)
+{
+       struct cgroup *cgrp;
+       ssize_t ret;
+       int freeze;
+
+       ret = kstrtoint(strstrip(buf), 0, &freeze);
+       if (ret)
+               return ret;
+
+       if (freeze < 0 || freeze > 1)
+               return -ERANGE;
+
+       cgrp = cgroup_kn_lock_live(of->kn, false);
+       if (!cgrp)
+               return -ENOENT;
+
+       cgroup_freeze(cgrp, freeze);
+
+       cgroup_kn_unlock(of->kn);
+
+       return nbytes;
+}
+
  static int cgroup_file_open(struct kernfs_open_file *of)
  {
         struct cftype *cft = of->kn->priv;
@@ -4684,6 +4728,12 @@ static struct cftype cgroup_base_files[] = {
                 .seq_show = cgroup_stat_show,
         },
         {
+               .name = "cgroup.freeze",
+               .flags = CFTYPE_NOT_ON_ROOT,
+               .seq_show = cgroup_freeze_show,
+               .write = cgroup_freeze_write,
+       },
+       {
                 .name = "cpu.stat",
                 .flags = CFTYPE_NOT_ON_ROOT,
                 .seq_show = cpu_stat_show,
@@ -5033,12 +5083,29 @@ static struct cgroup *cgroup_create(struct cgroup *parent)
         if (ret)
                 goto out_psi_free;
  
+       /*
+        * New cgroup inherits effective freeze counter, and
+        * if the parent has to be frozen, the child has too.
+        */
+       cgrp->freezer.e_freeze = parent->freezer.e_freeze;
+       if (cgrp->freezer.e_freeze)
+               set_bit(CGRP_FROZEN, &cgrp->flags);
+
         spin_lock_irq(&css_set_lock);
         for (tcgrp = cgrp; tcgrp; tcgrp = cgroup_parent(tcgrp)) {
                 cgrp->ancestor_ids[tcgrp->level] = tcgrp->id;
  
-               if (tcgrp != cgrp)
+               if (tcgrp != cgrp) {
                         tcgrp->nr_descendants++;
+
+                       /*
+                        * If the new cgroup is frozen, all ancestor cgroups
+                        * get a new frozen descendant, but their state can't
+                        * change because of this.
+                        */
+                       if (cgrp->freezer.e_freeze)
+                               tcgrp->freezer.nr_frozen_descendants++;
+               }
         }
         spin_unlock_irq(&css_set_lock);
  
@@ -5329,6 +5396,12 @@ static int cgroup_destroy_locked(struct cgroup *cgrp)
         for (tcgrp = cgroup_parent(cgrp); tcgrp; tcgrp = cgroup_parent(tcgrp)) {
                 tcgrp->nr_descendants--;
                 tcgrp->nr_dying_descendants++;
+               /*
+                * If the dying cgroup is frozen, decrease frozen descendants
+                * counters of ancestor cgroups.
+                */
+               if (test_bit(CGRP_FROZEN, &cgrp->flags))
+                       tcgrp->freezer.nr_frozen_descendants--;
         }
         spin_unlock_irq(&css_set_lock);
  
@@ -5782,6 +5855,29 @@ void cgroup_post_fork(struct task_struct *child)
                         cset->nr_tasks++;
                         css_set_move_task(child, NULL, cset, false);
                 }
+
+               /*
+                * If the cgroup has to be frozen, the new task has too.
+                * Let's set the JOBCTL_TRAP_FREEZE jobctl bit to get
+                * the task into the frozen state.
+                */
+               if (unlikely(cgroup_task_freeze(child))) {
+                       struct cgroup *cgrp;
+
+                       spin_lock(&child->sighand->siglock);
+                       WARN_ON_ONCE(child->frozen);
+                       cgrp = cset->dfl_cgrp;
+                       child->jobctl |= JOBCTL_TRAP_FREEZE;
+                       spin_unlock(&child->sighand->siglock);
+
+                       /*
+                        * Calling cgroup_update_frozen() isn't required here,
+                        * because it will be called anyway a bit later
+                        * from do_freezer_trap(). So we avoid cgroup's
+                        * transient switch from the frozen state and back.
+                        */
+               }
+
                 spin_unlock_irq(&css_set_lock);
         }
  
@@ -5830,6 +5926,12 @@ void cgroup_exit(struct task_struct *tsk)
                 spin_lock_irq(&css_set_lock);
                 css_set_move_task(tsk, cset, NULL, false);
                 cset->nr_tasks--;
+
+               if (unlikely(cgroup_task_frozen(tsk)))
+                       cgroup_freezer_frozen_exit(tsk);
+               else if (unlikely(cgroup_task_freeze(tsk)))
+                       cgroup_update_frozen(task_dfl_cgroup(tsk));
+
                 spin_unlock_irq(&css_set_lock);
         } else {
                 get_css_set(cset);
diff --git a/kernel/cgroup/freezer.c b/kernel/cgroup/freezer.c

new file mode 100644 (file)

index 0000000..9d8cda4
--- /dev/null
+++ b/kernel/cgroup/freezer.c
@@ -0,0 +1,317 @@
+//SPDX-License-Identifier: GPL-2.0
+#include <linux/cgroup.h>
+#include <linux/sched.h>
+#include <linux/sched/task.h>
+#include <linux/sched/signal.h>
+
+#include "cgroup-internal.h"
+
+/*
+ * Propagate the cgroup frozen state upwards by the cgroup tree.
+ */
+static void cgroup_propagate_frozen(struct cgroup *cgrp, bool frozen)
+{
+       int desc = 1;
+
+       /*
+        * If the new state is frozen, some freezing ancestor cgroups may change
+        * their state too, depending on if all their descendants are frozen.
+        *
+        * Otherwise, all ancestor cgroups are forced into the non-frozen state.
+        */
+       while ((cgrp = cgroup_parent(cgrp))) {
+               if (frozen) {
+                       cgrp->freezer.nr_frozen_descendants += desc;
+                       if (!test_bit(CGRP_FROZEN, &cgrp->flags) &&
+                           test_bit(CGRP_FREEZE, &cgrp->flags) &&
+                           cgrp->freezer.nr_frozen_descendants ==
+                           cgrp->nr_descendants) {
+                               set_bit(CGRP_FROZEN, &cgrp->flags);
+                               cgroup_file_notify(&cgrp->events_file);
+                               desc++;
+                       }
+               } else {
+                       cgrp->freezer.nr_frozen_descendants -= desc;
+                       if (test_bit(CGRP_FROZEN, &cgrp->flags)) {
+                               clear_bit(CGRP_FROZEN, &cgrp->flags);
+                               cgroup_file_notify(&cgrp->events_file);
+                               desc++;
+                       }
+               }
+       }
+}
+
+/*
+ * Revisit the cgroup frozen state.
+ * Checks if the cgroup is really frozen and perform all state transitions.
+ */
+void cgroup_update_frozen(struct cgroup *cgrp)
+{
+       bool frozen;
+
+       lockdep_assert_held(&css_set_lock);
+
+       /*
+        * If the cgroup has to be frozen (CGRP_FREEZE bit set),
+        * and all tasks are frozen and/or stopped, let's consider
+        * the cgroup frozen. Otherwise it's not frozen.
+        */
+       frozen = test_bit(CGRP_FREEZE, &cgrp->flags) &&
+               cgrp->freezer.nr_frozen_tasks == __cgroup_task_count(cgrp);
+
+       if (frozen) {
+               /* Already there? */
+               if (test_bit(CGRP_FROZEN, &cgrp->flags))
+                       return;
+
+               set_bit(CGRP_FROZEN, &cgrp->flags);
+       } else {
+               /* Already there? */
+               if (!test_bit(CGRP_FROZEN, &cgrp->flags))
+                       return;
+
+               clear_bit(CGRP_FROZEN, &cgrp->flags);
+       }
+       cgroup_file_notify(&cgrp->events_file);
+
+       /* Update the state of ancestor cgroups. */
+       cgroup_propagate_frozen(cgrp, frozen);
+}
+
+/*
+ * Increment cgroup's nr_frozen_tasks.
+ */
+static void cgroup_inc_frozen_cnt(struct cgroup *cgrp)
+{
+       cgrp->freezer.nr_frozen_tasks++;
+}
+
+/*
+ * Decrement cgroup's nr_frozen_tasks.
+ */
+static void cgroup_dec_frozen_cnt(struct cgroup *cgrp)
+{
+       cgrp->freezer.nr_frozen_tasks--;
+       WARN_ON_ONCE(cgrp->freezer.nr_frozen_tasks < 0);
+}
+
+/*
+ * Enter frozen/stopped state, if not yet there. Update cgroup's counters,
+ * and revisit the state of the cgroup, if necessary.
+ */
+void cgroup_enter_frozen(void)
+{
+       struct cgroup *cgrp;
+
+       if (current->frozen)
+               return;
+
+       spin_lock_irq(&css_set_lock);
+       current->frozen = true;
+       cgrp = task_dfl_cgroup(current);
+       cgroup_inc_frozen_cnt(cgrp);
+       cgroup_update_frozen(cgrp);
+       spin_unlock_irq(&css_set_lock);
+}
+
+/*
+ * Conditionally leave frozen/stopped state. Update cgroup's counters,
+ * and revisit the state of the cgroup, if necessary.
+ *
+ * If always_leave is not set, and the cgroup is freezing,
+ * we're racing with the cgroup freezing. In this case, we don't
+ * drop the frozen counter to avoid a transient switch to
+ * the unfrozen state.
+ */
+void cgroup_leave_frozen(bool always_leave)
+{
+       struct cgroup *cgrp;
+
+       spin_lock_irq(&css_set_lock);
+       cgrp = task_dfl_cgroup(current);
+       if (always_leave || !test_bit(CGRP_FREEZE, &cgrp->flags)) {
+               cgroup_dec_frozen_cnt(cgrp);
+               cgroup_update_frozen(cgrp);
+               WARN_ON_ONCE(!current->frozen);
+               current->frozen = false;
+       }
+       spin_unlock_irq(&css_set_lock);
+
+       if (unlikely(current->frozen)) {
+               /*
+                * If the task remained in the frozen state,
+                * make sure it won't reach userspace without
+                * entering the signal handling loop.
+                */
+               spin_lock_irq(&current->sighand->siglock);
+               recalc_sigpending();
+               spin_unlock_irq(&current->sighand->siglock);
+       }
+}
+
+/*
+ * Freeze or unfreeze the task by setting or clearing the JOBCTL_TRAP_FREEZE
+ * jobctl bit.
+ */
+static void cgroup_freeze_task(struct task_struct *task, bool freeze)
+{
+       unsigned long flags;
+
+       /* If the task is about to die, don't bother with freezing it. */
+       if (!lock_task_sighand(task, &flags))
+               return;
+
+       if (freeze) {
+               task->jobctl |= JOBCTL_TRAP_FREEZE;
+               signal_wake_up(task, false);
+       } else {
+               task->jobctl &= ~JOBCTL_TRAP_FREEZE;
+               wake_up_process(task);
+       }
+
+       unlock_task_sighand(task, &flags);
+}
+
+/*
+ * Freeze or unfreeze all tasks in the given cgroup.
+ */
+static void cgroup_do_freeze(struct cgroup *cgrp, bool freeze)
+{
+       struct css_task_iter it;
+       struct task_struct *task;
+
+       lockdep_assert_held(&cgroup_mutex);
+
+       spin_lock_irq(&css_set_lock);
+       if (freeze)
+               set_bit(CGRP_FREEZE, &cgrp->flags);
+       else
+               clear_bit(CGRP_FREEZE, &cgrp->flags);
+       spin_unlock_irq(&css_set_lock);
+
+       css_task_iter_start(&cgrp->self, 0, &it);
+       while ((task = css_task_iter_next(&it))) {
+               /*
+                * Ignore kernel threads here. Freezing cgroups containing
+                * kthreads isn't supported.
+                */
+               if (task->flags & PF_KTHREAD)
+                       continue;
+               cgroup_freeze_task(task, freeze);
+       }
+       css_task_iter_end(&it);
+
+       /*
+        * Cgroup state should be revisited here to cover empty leaf cgroups
+        * and cgroups which descendants are already in the desired state.
+        */
+       spin_lock_irq(&css_set_lock);
+       if (cgrp->nr_descendants == cgrp->freezer.nr_frozen_descendants)
+               cgroup_update_frozen(cgrp);
+       spin_unlock_irq(&css_set_lock);
+}
+
+/*
+ * Adjust the task state (freeze or unfreeze) and revisit the state of
+ * source and destination cgroups.
+ */
+void cgroup_freezer_migrate_task(struct task_struct *task,
+                                struct cgroup *src, struct cgroup *dst)
+{
+       lockdep_assert_held(&css_set_lock);
+
+       /*
+        * Kernel threads are not supposed to be frozen at all.
+        */
+       if (task->flags & PF_KTHREAD)
+               return;
+
+       /*
+        * Adjust counters of freezing and frozen tasks.
+        * Note, that if the task is frozen, but the destination cgroup is not
+        * frozen, we bump both counters to keep them balanced.
+        */
+       if (task->frozen) {
+               cgroup_inc_frozen_cnt(dst);
+               cgroup_dec_frozen_cnt(src);
+       }
+       cgroup_update_frozen(dst);
+       cgroup_update_frozen(src);
+
+       /*
+        * Force the task to the desired state.
+        */
+       cgroup_freeze_task(task, test_bit(CGRP_FREEZE, &dst->flags));
+}
+
+void cgroup_freezer_frozen_exit(struct task_struct *task)
+{
+       struct cgroup *cgrp = task_dfl_cgroup(task);
+
+       lockdep_assert_held(&css_set_lock);
+
+       cgroup_dec_frozen_cnt(cgrp);
+       cgroup_update_frozen(cgrp);
+}
+
+void cgroup_freeze(struct cgroup *cgrp, bool freeze)
+{
+       struct cgroup_subsys_state *css;
+       struct cgroup *dsct;
+       bool applied = false;
+
+       lockdep_assert_held(&cgroup_mutex);
+
+       /*
+        * Nothing changed? Just exit.
+        */
+       if (cgrp->freezer.freeze == freeze)
+               return;
+
+       cgrp->freezer.freeze = freeze;
+
+       /*
+        * Propagate changes downwards the cgroup tree.
+        */
+       css_for_each_descendant_pre(css, &cgrp->self) {
+               dsct = css->cgroup;
+
+               if (cgroup_is_dead(dsct))
+                       continue;
+
+               if (freeze) {
+                       dsct->freezer.e_freeze++;
+                       /*
+                        * Already frozen because of ancestor's settings?
+                        */
+                       if (dsct->freezer.e_freeze > 1)
+                               continue;
+               } else {
+                       dsct->freezer.e_freeze--;
+                       /*
+                        * Still frozen because of ancestor's settings?
+                        */
+                       if (dsct->freezer.e_freeze > 0)
+                               continue;
+
+                       WARN_ON_ONCE(dsct->freezer.e_freeze < 0);
+               }
+
+               /*
+                * Do change actual state: freeze or unfreeze.
+                */
+               cgroup_do_freeze(dsct, freeze);
+               applied = true;
+       }
+
+       /*
+        * Even if the actual state hasn't changed, let's notify a user.
+        * The state can be enforced by an ancestor cgroup: the cgroup
+        * can already be in the desired state or it can be locked in the
+        * opposite state, so that the transition will never happen.
+        * In both cases it's better to notify a user, that there is
+        * nothing to wait for.
+        */
+       if (!applied)
+               cgroup_file_notify(&cgrp->events_file);
+}
diff --git a/kernel/fork.c b/kernel/fork.c

index 9dcd18a..8097a0c 100644 (file)
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1222,7 +1222,9 @@ static int wait_for_vfork_done(struct task_struct *child,
         int killed;
  
         freezer_do_not_count();
+       cgroup_enter_frozen();
         killed = wait_for_completion_killable(vfork);
+       cgroup_leave_frozen(false);
         freezer_count();
  
         if (killed) {
diff --git a/kernel/signal.c b/kernel/signal.c

index f98448c..095e0fc 100644 (file)
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -43,6 +43,7 @@
  #include <linux/compiler.h>
  #include <linux/posix-timers.h>
  #include <linux/livepatch.h>
+#include <linux/cgroup.h>
  
  #define CREATE_TRACE_POINTS
  #include <trace/events/signal.h>
@@ -146,9 +147,10 @@ static inline bool has_pending_signals(sigset_t *signal, sigset_t *blocked)
  
  static bool recalc_sigpending_tsk(struct task_struct *t)
  {
-       if ((t->jobctl & JOBCTL_PENDING_MASK) ||
+       if ((t->jobctl & (JOBCTL_PENDING_MASK | JOBCTL_TRAP_FREEZE)) ||
             PENDING(&t->pending, &t->blocked) ||
-           PENDING(&t->signal->shared_pending, &t->blocked)) {
+           PENDING(&t->signal->shared_pending, &t->blocked) ||
+           cgroup_task_frozen(t)) {
                 set_tsk_thread_flag(t, TIF_SIGPENDING);
                 return true;
         }
@@ -2108,6 +2110,7 @@ static void ptrace_stop(int exit_code, int why, int clear_code, kernel_siginfo_t
                 preempt_disable();
                 read_unlock(&tasklist_lock);
                 preempt_enable_no_resched();
+               cgroup_enter_frozen();
                 freezable_schedule();
         } else {
                 /*
@@ -2286,6 +2289,7 @@ static bool do_signal_stop(int signr)
                 }
  
                 /* Now we don't run again until woken by SIGCONT or SIGKILL */
+               cgroup_enter_frozen();
                 freezable_schedule();
                 return true;
         } else {
@@ -2332,6 +2336,43 @@ static void do_jobctl_trap(void)
         }
  }
  
+/**
+ * do_freezer_trap - handle the freezer jobctl trap
+ *
+ * Puts the task into frozen state, if only the task is not about to quit.
+ * In this case it drops JOBCTL_TRAP_FREEZE.
+ *
+ * CONTEXT:
+ * Must be called with @current->sighand->siglock held,
+ * which is always released before returning.
+ */
+static void do_freezer_trap(void)
+       __releases(&current->sighand->siglock)
+{
+       /*
+        * If there are other trap bits pending except JOBCTL_TRAP_FREEZE,
+        * let's make another loop to give it a chance to be handled.
+        * In any case, we'll return back.
+        */
+       if ((current->jobctl & (JOBCTL_PENDING_MASK | JOBCTL_TRAP_FREEZE)) !=
+            JOBCTL_TRAP_FREEZE) {
+               spin_unlock_irq(&current->sighand->siglock);
+               return;
+       }
+
+       /*
+        * Now we're sure that there is no pending fatal signal and no
+        * pending traps. Clear TIF_SIGPENDING to not get out of schedule()
+        * immediately (if there is a non-fatal signal pending), and
+        * put the task into sleep.
+        */
+       __set_current_state(TASK_INTERRUPTIBLE);
+       clear_thread_flag(TIF_SIGPENDING);
+       spin_unlock_irq(&current->sighand->siglock);
+       cgroup_enter_frozen();
+       freezable_schedule();
+}
+
  static int ptrace_signal(int signr, kernel_siginfo_t *info)
  {
         /*
@@ -2442,6 +2483,10 @@ relock:
                 ksig->info.si_signo = signr = SIGKILL;
                 sigdelset(&current->pending.signal, SIGKILL);
                 recalc_sigpending();
+               current->jobctl &= ~JOBCTL_TRAP_FREEZE;
+               spin_unlock_irq(&sighand->siglock);
+               if (unlikely(cgroup_task_frozen(current)))
+                       cgroup_leave_frozen(true);
                 goto fatal;
         }
  
@@ -2452,9 +2497,24 @@ relock:
                     do_signal_stop(0))
                         goto relock;
  
-               if (unlikely(current->jobctl & JOBCTL_TRAP_MASK)) {
-                       do_jobctl_trap();
+               if (unlikely(current->jobctl &
+                            (JOBCTL_TRAP_MASK | JOBCTL_TRAP_FREEZE))) {
+                       if (current->jobctl & JOBCTL_TRAP_MASK) {
+                               do_jobctl_trap();
+                               spin_unlock_irq(&sighand->siglock);
+                       } else if (current->jobctl & JOBCTL_TRAP_FREEZE)
+                               do_freezer_trap();
+
+                       goto relock;
+               }
+
+               /*
+                * If the task is leaving the frozen state, let's update
+                * cgroup counters and reset the frozen bit.
+                */
+               if (unlikely(cgroup_task_frozen(current))) {
                         spin_unlock_irq(&sighand->siglock);
+                       cgroup_leave_frozen(true);
                         goto relock;
                 }
  
@@ -2548,8 +2608,8 @@ relock:
                         continue;
                 }
  
-       fatal:
                 spin_unlock_irq(&sighand->siglock);
+       fatal:
  
                 /*
                  * Anything else is fatal, maybe with a core dump.
author	Roman Gushchin <guro@fb.com>
	Fri, 19 Apr 2019 17:03:04 +0000 (10:03 -0700)
committer	Tejun Heo <tj@kernel.org>
	Fri, 19 Apr 2019 18:26:48 +0000 (11:26 -0700)
include/linux/cgroup-defs.h		patch \| blob \| history
include/linux/cgroup.h		patch \| blob \| history
include/linux/sched.h		patch \| blob \| history
include/linux/sched/jobctl.h		patch \| blob \| history
kernel/cgroup/Makefile		patch \| blob \| history
kernel/cgroup/cgroup.c		patch \| blob \| history
kernel/cgroup/freezer.c	[new file with mode: 0644]	patch \| blob
kernel/fork.c		patch \| blob \| history
kernel/signal.c		patch \| blob \| history