cgroup: use a dedicated workqueue for cgroup destruction

author Tejun Heo <tj@kernel.org>

Fri, 22 Nov 2013 22:14:39 +0000 (17:14 -0500)

committer Tejun Heo <tj@kernel.org>

Fri, 22 Nov 2013 22:14:39 +0000 (17:14 -0500)
author Tejun Heo <tj@kernel.org>
Fri, 22 Nov 2013 22:14:39 +0000 (17:14 -0500)
committer Tejun Heo <tj@kernel.org>
Fri, 22 Nov 2013 22:14:39 +0000 (17:14 -0500)
diff --git a/kernel/cgroup.c b/kernel/cgroup.c

index 4c62513..a7b98ee 100644 (file)
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -90,6 +90,14 @@ static DEFINE_MUTEX(cgroup_mutex);
  static DEFINE_MUTEX(cgroup_root_mutex);
  
  /*
+ * cgroup destruction makes heavy use of work items and there can be a lot
+ * of concurrent destructions.  Use a separate workqueue so that cgroup
+ * destruction work items don't end up filling up max_active of system_wq
+ * which may lead to deadlock.
+ */
+static struct workqueue_struct *cgroup_destroy_wq;
+
+/*
   * Generate an array of cgroup subsystem pointers. At boot time, this is
   * populated with the built in subsystems, and modular subsystems are
   * registered after that. The mutable section of this array is protected by
@@ -871,7 +879,7 @@ static void cgroup_free_rcu(struct rcu_head *head)
         struct cgroup *cgrp = container_of(head, struct cgroup, rcu_head);
  
         INIT_WORK(&cgrp->destroy_work, cgroup_free_fn);
-       schedule_work(&cgrp->destroy_work);
+       queue_work(cgroup_destroy_wq, &cgrp->destroy_work);
  }
  
  static void cgroup_diput(struct dentry *dentry, struct inode *inode)
@@ -4249,7 +4257,7 @@ static void css_free_rcu_fn(struct rcu_head *rcu_head)
          * css_put().  dput() requires process context which we don't have.
          */
         INIT_WORK(&css->destroy_work, css_free_work_fn);
-       schedule_work(&css->destroy_work);
+       queue_work(cgroup_destroy_wq, &css->destroy_work);
  }
  
  static void css_release(struct percpu_ref *ref)
@@ -4539,7 +4547,7 @@ static void css_killed_ref_fn(struct percpu_ref *ref)
                 container_of(ref, struct cgroup_subsys_state, refcnt);
  
         INIT_WORK(&css->destroy_work, css_killed_work_fn);
-       schedule_work(&css->destroy_work);
+       queue_work(cgroup_destroy_wq, &css->destroy_work);
  }
  
  /**
@@ -5063,6 +5071,22 @@ out:
         return err;
  }
  
+static int __init cgroup_wq_init(void)
+{
+       /*
+        * There isn't much point in executing destruction path in
+        * parallel.  Good chunk is serialized with cgroup_mutex anyway.
+        * Use 1 for @max_active.
+        *
+        * We would prefer to do this in cgroup_init() above, but that
+        * is called before init_workqueues(): so leave this until after.
+        */
+       cgroup_destroy_wq = alloc_workqueue("cgroup_destroy", 0, 1);
+       BUG_ON(!cgroup_destroy_wq);
+       return 0;
+}
+core_initcall(cgroup_wq_init);
+
  /*
   * proc_cgroup_show()
   *  - Print task's cgroup paths into seq_file, one line for each hierarchy
author	Tejun Heo <tj@kernel.org>
	Fri, 22 Nov 2013 22:14:39 +0000 (17:14 -0500)
committer	Tejun Heo <tj@kernel.org>
	Fri, 22 Nov 2013 22:14:39 +0000 (17:14 -0500)