cgroup: fix cgroup_create() error handling path

[platform/adaptation/renesas_rcar/renesas_kernel.git] / kernel / cgroup.c
diff --git a/kernel/cgroup.c b/kernel/cgroup.c

index e0839bc..bcb1755 100644 (file)
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -90,6 +90,14 @@ static DEFINE_MUTEX(cgroup_mutex);
  static DEFINE_MUTEX(cgroup_root_mutex);
  
  /*
+ * cgroup destruction makes heavy use of work items and there can be a lot
+ * of concurrent destructions.  Use a separate workqueue so that cgroup
+ * destruction work items don't end up filling up max_active of system_wq
+ * which may lead to deadlock.
+ */
+static struct workqueue_struct *cgroup_destroy_wq;
+
+/*
   * Generate an array of cgroup subsystem pointers. At boot time, this is
   * populated with the built in subsystems, and modular subsystems are
   * registered after that. The mutable section of this array is protected by
@@ -191,6 +199,7 @@ static void cgroup_destroy_css_killed(struct cgroup *cgrp);
  static int cgroup_destroy_locked(struct cgroup *cgrp);
  static int cgroup_addrm_files(struct cgroup *cgrp, struct cftype cfts[],
                               bool is_add);
+static int cgroup_file_release(struct inode *inode, struct file *file);
  
  /**
   * cgroup_css - obtain a cgroup's css for the specified subsystem
@@ -871,7 +880,7 @@ static void cgroup_free_rcu(struct rcu_head *head)
         struct cgroup *cgrp = container_of(head, struct cgroup, rcu_head);
  
         INIT_WORK(&cgrp->destroy_work, cgroup_free_fn);
-       schedule_work(&cgrp->destroy_work);
+       queue_work(cgroup_destroy_wq, &cgrp->destroy_work);
  }
  
  static void cgroup_diput(struct dentry *dentry, struct inode *inode)
@@ -895,11 +904,6 @@ static void cgroup_diput(struct dentry *dentry, struct inode *inode)
         iput(inode);
  }
  
-static int cgroup_delete(const struct dentry *d)
-{
-       return 1;
-}
-
  static void remove_dir(struct dentry *d)
  {
         struct dentry *parent = dget(d->d_parent);
@@ -1486,7 +1490,7 @@ static int cgroup_get_rootdir(struct super_block *sb)
  {
         static const struct dentry_operations cgroup_dops = {
                 .d_iput = cgroup_diput,
-               .d_delete = cgroup_delete,
+               .d_delete = always_delete_dentry,
         };
  
         struct inode *inode =
@@ -2426,7 +2430,7 @@ static const struct file_operations cgroup_seqfile_operations = {
         .read = seq_read,
         .write = cgroup_file_write,
         .llseek = seq_lseek,
-       .release = single_release,
+       .release = cgroup_file_release,
  };
  
  static int cgroup_file_open(struct inode *inode, struct file *file)
@@ -2487,6 +2491,8 @@ static int cgroup_file_release(struct inode *inode, struct file *file)
                 ret = cft->release(inode, file);
         if (css->ss)
                 css_put(css);
+       if (file->f_op == &cgroup_seqfile_operations)
+               single_release(inode, file);
         return ret;
  }
  
@@ -4254,7 +4260,7 @@ static void css_free_rcu_fn(struct rcu_head *rcu_head)
          * css_put().  dput() requires process context which we don't have.
          */
         INIT_WORK(&css->destroy_work, css_free_work_fn);
-       schedule_work(&css->destroy_work);
+       queue_work(cgroup_destroy_wq, &css->destroy_work);
  }
  
  static void css_release(struct percpu_ref *ref)
@@ -4420,14 +4426,6 @@ static long cgroup_create(struct cgroup *parent, struct dentry *dentry,
         list_add_tail_rcu(&cgrp->sibling, &cgrp->parent->children);
         root->number_of_cgroups++;
  
-       /* each css holds a ref to the cgroup's dentry and the parent css */
-       for_each_root_subsys(root, ss) {
-               struct cgroup_subsys_state *css = css_ar[ss->subsys_id];
-
-               dget(dentry);
-               css_get(css->parent);
-       }
-
         /* hold a ref to the parent's dentry */
         dget(parent->dentry);
  
@@ -4439,6 +4437,13 @@ static long cgroup_create(struct cgroup *parent, struct dentry *dentry,
                 if (err)
                         goto err_destroy;
  
+               /* each css holds a ref to the cgroup's dentry and parent css */
+               dget(dentry);
+               css_get(css->parent);
+
+               /* mark it consumed for error path */
+               css_ar[ss->subsys_id] = NULL;
+
                 if (ss->broken_hierarchy && !ss->warned_broken_hierarchy &&
                     parent->parent) {
                         pr_warning("cgroup: %s (%d) created nested cgroup for controller \"%s\" which has incomplete hierarchy support. Nested cgroups may change behavior in the future.\n",
@@ -4485,6 +4490,14 @@ err_free_cgrp:
         return err;
  
  err_destroy:
+       for_each_root_subsys(root, ss) {
+               struct cgroup_subsys_state *css = css_ar[ss->subsys_id];
+
+               if (css) {
+                       percpu_ref_cancel_init(&css->refcnt);
+                       ss->css_free(css);
+               }
+       }
         cgroup_destroy_locked(cgrp);
         mutex_unlock(&cgroup_mutex);
         mutex_unlock(&dentry->d_inode->i_mutex);
@@ -4544,7 +4557,7 @@ static void css_killed_ref_fn(struct percpu_ref *ref)
                 container_of(ref, struct cgroup_subsys_state, refcnt);
  
         INIT_WORK(&css->destroy_work, css_killed_work_fn);
-       schedule_work(&css->destroy_work);
+       queue_work(cgroup_destroy_wq, &css->destroy_work);
  }
  
  /**
@@ -4646,8 +4659,12 @@ static int cgroup_destroy_locked(struct cgroup *cgrp)
          * will be invoked to perform the rest of destruction once the
          * percpu refs of all css's are confirmed to be killed.
          */
-       for_each_root_subsys(cgrp->root, ss)
-               kill_css(cgroup_css(cgrp, ss));
+       for_each_root_subsys(cgrp->root, ss) {
+               struct cgroup_subsys_state *css = cgroup_css(cgrp, ss);
+
+               if (css)
+                       kill_css(css);
+       }
  
         /*
          * Mark @cgrp dead.  This prevents further task migration and child
@@ -5068,6 +5085,22 @@ out:
         return err;
  }
  
+static int __init cgroup_wq_init(void)
+{
+       /*
+        * There isn't much point in executing destruction path in
+        * parallel.  Good chunk is serialized with cgroup_mutex anyway.
+        * Use 1 for @max_active.
+        *
+        * We would prefer to do this in cgroup_init() above, but that
+        * is called before init_workqueues(): so leave this until after.
+        */
+       cgroup_destroy_wq = alloc_workqueue("cgroup_destroy", 0, 1);
+       BUG_ON(!cgroup_destroy_wq);
+       return 0;
+}
+core_initcall(cgroup_wq_init);
+
  /*
   * proc_cgroup_show()
   *  - Print task's cgroup paths into seq_file, one line for each hierarchy