cgroup: fix spurious warnings on cgroup_is_dead() from cgroup_sk_alloc()
authorTejun Heo <tj@kernel.org>
Fri, 28 Apr 2017 19:14:55 +0000 (15:14 -0400)
committerTejun Heo <tj@kernel.org>
Fri, 28 Apr 2017 19:28:20 +0000 (15:28 -0400)
cgroup_get() expected to be called only on live cgroups and triggers
warning on a dead cgroup; however, cgroup_sk_alloc() may be called
while cloning a socket which is left in an empty and removed cgroup
and thus may legitimately duplicate its reference on a dead cgroup.
This currently triggers the following warning spuriously.

 WARNING: CPU: 14 PID: 0 at kernel/cgroup.c:490 cgroup_get+0x55/0x60
 ...
  [<ffffffff8107e123>] __warn+0xd3/0xf0
  [<ffffffff8107e20e>] warn_slowpath_null+0x1e/0x20
  [<ffffffff810ff465>] cgroup_get+0x55/0x60
  [<ffffffff81106061>] cgroup_sk_alloc+0x51/0xe0
  [<ffffffff81761beb>] sk_clone_lock+0x2db/0x390
  [<ffffffff817cce06>] inet_csk_clone_lock+0x16/0xc0
  [<ffffffff817e8173>] tcp_create_openreq_child+0x23/0x4b0
  [<ffffffff818601a1>] tcp_v6_syn_recv_sock+0x91/0x670
  [<ffffffff817e8b16>] tcp_check_req+0x3a6/0x4e0
  [<ffffffff81861ba3>] tcp_v6_rcv+0x693/0xa00
  [<ffffffff81837429>] ip6_input_finish+0x59/0x3e0
  [<ffffffff81837cb2>] ip6_input+0x32/0xb0
  [<ffffffff81837387>] ip6_rcv_finish+0x57/0xa0
  [<ffffffff81837ac8>] ipv6_rcv+0x318/0x4d0
  [<ffffffff817778c7>] __netif_receive_skb_core+0x2d7/0x9a0
  [<ffffffff81777fa6>] __netif_receive_skb+0x16/0x70
  [<ffffffff81778023>] netif_receive_skb_internal+0x23/0x80
  [<ffffffff817787d8>] napi_gro_frags+0x208/0x270
  [<ffffffff8168a9ec>] mlx4_en_process_rx_cq+0x74c/0xf40
  [<ffffffff8168b270>] mlx4_en_poll_rx_cq+0x30/0x90
  [<ffffffff81778b30>] net_rx_action+0x210/0x350
  [<ffffffff8188c426>] __do_softirq+0x106/0x2c7
  [<ffffffff81082bad>] irq_exit+0x9d/0xa0 [<ffffffff8188c0e4>] do_IRQ+0x54/0xd0
  [<ffffffff8188a63f>] common_interrupt+0x7f/0x7f <EOI>
  [<ffffffff8173d7e7>] cpuidle_enter+0x17/0x20
  [<ffffffff810bdfd9>] cpu_startup_entry+0x2a9/0x2f0
  [<ffffffff8103edd1>] start_secondary+0xf1/0x100

This patch renames the existing cgroup_get() with the dead cgroup
warning to cgroup_get_live() after cgroup_kn_lock_live() and
introduces the new cgroup_get() which doesn't check whether the cgroup
is live or dead.

All existing cgroup_get() users except for cgroup_sk_alloc() are
converted to use cgroup_get_live().

Fixes: d979a39d7242 ("cgroup: duplicate cgroup reference when cloning sockets")
Cc: stable@vger.kernel.org # v4.5+
Cc: Johannes Weiner <hannes@cmpxchg.org>
Reported-by: Chris Mason <clm@fb.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
kernel/cgroup/cgroup.c

index b1cc1c306668cc8058181a609e0a2836873aa4b9..10951d5e35d28e7be8e951da249bdc9e23a19e17 100644 (file)
@@ -437,6 +437,11 @@ out_unlock:
 }
 
 static void cgroup_get(struct cgroup *cgrp)
+{
+       css_get(&cgrp->self);
+}
+
+static void cgroup_get_live(struct cgroup *cgrp)
 {
        WARN_ON_ONCE(cgroup_is_dead(cgrp));
        css_get(&cgrp->self);
@@ -932,7 +937,7 @@ static void link_css_set(struct list_head *tmp_links, struct css_set *cset,
        list_add_tail(&link->cgrp_link, &cset->cgrp_links);
 
        if (cgroup_parent(cgrp))
-               cgroup_get(cgrp);
+               cgroup_get_live(cgrp);
 }
 
 /**
@@ -1802,7 +1807,7 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type,
                        return ERR_PTR(-EINVAL);
                }
                cgrp_dfl_visible = true;
-               cgroup_get(&cgrp_dfl_root.cgrp);
+               cgroup_get_live(&cgrp_dfl_root.cgrp);
 
                dentry = cgroup_do_mount(&cgroup2_fs_type, flags, &cgrp_dfl_root,
                                         CGROUP2_SUPER_MAGIC, ns);
@@ -2575,7 +2580,7 @@ restart:
                        if (!css || !percpu_ref_is_dying(&css->refcnt))
                                continue;
 
-                       cgroup_get(dsct);
+                       cgroup_get_live(dsct);
                        prepare_to_wait(&dsct->offline_waitq, &wait,
                                        TASK_UNINTERRUPTIBLE);
 
@@ -3946,7 +3951,7 @@ static void init_and_link_css(struct cgroup_subsys_state *css,
 {
        lockdep_assert_held(&cgroup_mutex);
 
-       cgroup_get(cgrp);
+       cgroup_get_live(cgrp);
 
        memset(css, 0, sizeof(*css));
        css->cgroup = cgrp;
@@ -4122,7 +4127,7 @@ static struct cgroup *cgroup_create(struct cgroup *parent)
        /* allocation complete, commit to creation */
        list_add_tail_rcu(&cgrp->self.sibling, &cgroup_parent(cgrp)->self.children);
        atomic_inc(&root->nr_cgrps);
-       cgroup_get(parent);
+       cgroup_get_live(parent);
 
        /*
         * @cgrp is now fully operational.  If something fails after this
@@ -4946,7 +4951,7 @@ struct cgroup *cgroup_get_from_path(const char *path)
        if (kn) {
                if (kernfs_type(kn) == KERNFS_DIR) {
                        cgrp = kn->priv;
-                       cgroup_get(cgrp);
+                       cgroup_get_live(cgrp);
                } else {
                        cgrp = ERR_PTR(-ENOTDIR);
                }
@@ -5026,6 +5031,11 @@ void cgroup_sk_alloc(struct sock_cgroup_data *skcd)
 
        /* Socket clone path */
        if (skcd->val) {
+               /*
+                * We might be cloning a socket which is left in an empty
+                * cgroup and the cgroup might have already been rmdir'd.
+                * Don't use cgroup_get_live().
+                */
                cgroup_get(sock_cgroup_ptr(skcd));
                return;
        }