#include <linux/rcupdate.h>
#include <linux/sched.h>
#include <linux/backing-dev.h>
-#include <linux/seq_file.h>
#include <linux/slab.h>
#include <linux/magic.h>
#include <linux/spinlock.h>
static DEFINE_MUTEX(cgroup_root_mutex);
+#define cgroup_assert_mutex_or_rcu_locked() \
+ rcu_lockdep_assert(rcu_read_lock_held() || \
+ lockdep_is_held(&cgroup_mutex), \
+ "cgroup_mutex or RCU read lock required");
+
+#ifdef CONFIG_LOCKDEP
+#define cgroup_assert_mutex_or_root_locked() \
+ WARN_ON_ONCE(debug_locks && (!lockdep_is_held(&cgroup_mutex) && \
+ !lockdep_is_held(&cgroup_root_mutex)))
+#else
+#define cgroup_assert_mutex_or_root_locked() do { } while (0)
+#endif
+
/*
* cgroup destruction makes heavy use of work items and there can be a lot
* of concurrent destructions. Use a separate workqueue so that cgroup
/* dummy_top is a shorthand for the dummy hierarchy's top cgroup */
static struct cgroup * const cgroup_dummy_top = &cgroup_dummy_root.top_cgroup;
-/*
- * cgroupfs file entry, pointed to from leaf dentry->d_fsdata.
- */
-struct cfent {
- struct list_head node;
- struct dentry *dentry;
- struct cftype *type;
- struct cgroup_subsys_state *css;
-
- /* file xattrs */
- struct simple_xattrs xattrs;
-};
-
/* The list of hierarchy roots */
static LIST_HEAD(cgroup_roots);
}
/**
+ * for_each_css - iterate all css's of a cgroup
+ * @css: the iteration cursor
+ * @ssid: the index of the subsystem, CGROUP_SUBSYS_COUNT after reaching the end
+ * @cgrp: the target cgroup to iterate css's of
+ *
+ * Should be called under cgroup_mutex.
+ */
+#define for_each_css(css, ssid, cgrp) \
+ for ((ssid) = 0; (ssid) < CGROUP_SUBSYS_COUNT; (ssid)++) \
+ if (!((css) = rcu_dereference_check( \
+ (cgrp)->subsys[(ssid)], \
+ lockdep_is_held(&cgroup_mutex)))) { } \
+ else
+
+/**
* for_each_subsys - iterate all loaded cgroup subsystems
* @ss: the iteration cursor
- * @i: the index of @ss, CGROUP_SUBSYS_COUNT after reaching the end
+ * @ssid: the index of @ss, CGROUP_SUBSYS_COUNT after reaching the end
*
- * Should be called under cgroup_mutex.
+ * Iterates through all loaded subsystems. Should be called under
+ * cgroup_mutex or cgroup_root_mutex.
*/
-#define for_each_subsys(ss, i) \
- for ((i) = 0; (i) < CGROUP_SUBSYS_COUNT; (i)++) \
- if (({ lockdep_assert_held(&cgroup_mutex); \
- !((ss) = cgroup_subsys[i]); })) { } \
+#define for_each_subsys(ss, ssid) \
+ for (({ cgroup_assert_mutex_or_root_locked(); (ssid) = 0; }); \
+ (ssid) < CGROUP_SUBSYS_COUNT; (ssid)++) \
+ if (!((ss) = cgroup_subsys[(ssid)])) { } \
else
/**
for ((i) = 0; (i) < CGROUP_BUILTIN_SUBSYS_COUNT && \
(((ss) = cgroup_subsys[i]) || true); (i)++)
-/* iterate each subsystem attached to a hierarchy */
-#define for_each_root_subsys(root, ss) \
- list_for_each_entry((ss), &(root)->subsys_list, sibling)
-
/* iterate across the active hierarchies */
#define for_each_active_root(root) \
list_for_each_entry((root), &cgroup_roots, root_list)
cgroup_css(cgroup_dummy_top, ss));
cgroup_css(cgrp, ss)->cgroup = cgrp;
- list_move(&ss->sibling, &root->subsys_list);
ss->root = root;
if (ss->bind)
ss->bind(cgroup_css(cgrp, ss));
RCU_INIT_POINTER(cgrp->subsys[i], NULL);
cgroup_subsys[i]->root = &cgroup_dummy_root;
- list_move(&ss->sibling, &cgroup_dummy_root.subsys_list);
/* subsystem is now free - drop reference on module */
module_put(ss->module);
{
struct cgroupfs_root *root = dentry->d_sb->s_fs_info;
struct cgroup_subsys *ss;
+ int ssid;
mutex_lock(&cgroup_root_mutex);
- for_each_root_subsys(root, ss)
- seq_printf(seq, ",%s", ss->name);
+ for_each_subsys(ss, ssid)
+ if (root->subsys_mask & (1 << ssid))
+ seq_printf(seq, ",%s", ss->name);
if (root->flags & CGRP_ROOT_SANE_BEHAVIOR)
seq_puts(seq, ",sane_behavior");
if (root->flags & CGRP_ROOT_NOPREFIX)
{
struct cgroup *cgrp = &root->top_cgroup;
- INIT_LIST_HEAD(&root->subsys_list);
INIT_LIST_HEAD(&root->root_list);
root->number_of_cgroups = 1;
cgrp->root = root;
bool threadgroup)
{
int retval, i, group_size;
- struct cgroup_subsys *ss, *failed_ss = NULL;
struct cgroupfs_root *root = cgrp->root;
+ struct cgroup_subsys_state *css, *failed_css = NULL;
/* threadgroup list cursor and array */
struct task_struct *leader = tsk;
struct task_and_cgroup *tc;
/*
* step 1: check that we can legitimately attach to the cgroup.
*/
- for_each_root_subsys(root, ss) {
- struct cgroup_subsys_state *css = cgroup_css(cgrp, ss);
-
- if (ss->can_attach) {
- retval = ss->can_attach(css, &tset);
+ for_each_css(css, i, cgrp) {
+ if (css->ss->can_attach) {
+ retval = css->ss->can_attach(css, &tset);
if (retval) {
- failed_ss = ss;
+ failed_css = css;
goto out_cancel_attach;
}
}
/*
* step 4: do subsystem attach callbacks.
*/
- for_each_root_subsys(root, ss) {
- struct cgroup_subsys_state *css = cgroup_css(cgrp, ss);
-
- if (ss->attach)
- ss->attach(css, &tset);
- }
+ for_each_css(css, i, cgrp)
+ if (css->ss->attach)
+ css->ss->attach(css, &tset);
/*
* step 5: success! and cleanup
}
out_cancel_attach:
if (retval) {
- for_each_root_subsys(root, ss) {
- struct cgroup_subsys_state *css = cgroup_css(cgrp, ss);
-
- if (ss == failed_ss)
+ for_each_css(css, i, cgrp) {
+ if (css == failed_css)
break;
- if (ss->cancel_attach)
- ss->cancel_attach(css, &tset);
+ if (css->ss->cancel_attach)
+ css->ss->cancel_attach(css, &tset);
}
}
out_free_group_list:
return 0;
}
-static int cgroup_release_agent_show(struct cgroup_subsys_state *css,
- struct cftype *cft, struct seq_file *seq)
+static int cgroup_release_agent_show(struct seq_file *seq, void *v)
{
- struct cgroup *cgrp = css->cgroup;
+ struct cgroup *cgrp = seq_css(seq)->cgroup;
if (!cgroup_lock_live_group(cgrp))
return -ENODEV;
return 0;
}
-static int cgroup_sane_behavior_show(struct cgroup_subsys_state *css,
- struct cftype *cft, struct seq_file *seq)
+static int cgroup_sane_behavior_show(struct seq_file *seq, void *v)
{
- seq_printf(seq, "%d\n", cgroup_sane_behavior(css->cgroup));
+ struct cgroup *cgrp = seq_css(seq)->cgroup;
+
+ seq_printf(seq, "%d\n", cgroup_sane_behavior(cgrp));
return 0;
}
/* A buffer size big enough for numbers or short strings */
#define CGROUP_LOCAL_BUFFER_SIZE 64
-static ssize_t cgroup_write_X64(struct cgroup_subsys_state *css,
- struct cftype *cft, struct file *file,
- const char __user *userbuf, size_t nbytes,
- loff_t *unused_ppos)
+static ssize_t cgroup_file_write(struct file *file, const char __user *userbuf,
+ size_t nbytes, loff_t *ppos)
{
- char buffer[CGROUP_LOCAL_BUFFER_SIZE];
- int retval = 0;
- char *end;
+ struct cfent *cfe = __d_cfe(file->f_dentry);
+ struct cftype *cft = __d_cft(file->f_dentry);
+ struct cgroup_subsys_state *css = cfe->css;
+ size_t max_bytes = cft->max_write_len ?: CGROUP_LOCAL_BUFFER_SIZE - 1;
+ char *buf;
+ int ret;
- if (!nbytes)
- return -EINVAL;
- if (nbytes >= sizeof(buffer))
+ if (nbytes >= max_bytes)
return -E2BIG;
- if (copy_from_user(buffer, userbuf, nbytes))
- return -EFAULT;
- buffer[nbytes] = 0; /* nul-terminate */
- if (cft->write_u64) {
- u64 val = simple_strtoull(strstrip(buffer), &end, 0);
- if (*end)
- return -EINVAL;
- retval = cft->write_u64(css, cft, val);
+ buf = kmalloc(nbytes + 1, GFP_KERNEL);
+ if (!buf)
+ return -ENOMEM;
+
+ if (copy_from_user(buf, userbuf, nbytes)) {
+ ret = -EFAULT;
+ goto out_free;
+ }
+
+ buf[nbytes] = '\0';
+
+ if (cft->write_string) {
+ ret = cft->write_string(css, cft, strstrip(buf));
+ } else if (cft->write_u64) {
+ unsigned long long v;
+ ret = kstrtoull(buf, 0, &v);
+ if (!ret)
+ ret = cft->write_u64(css, cft, v);
+ } else if (cft->write_s64) {
+ long long v;
+ ret = kstrtoll(buf, 0, &v);
+ if (!ret)
+ ret = cft->write_s64(css, cft, v);
+ } else if (cft->trigger) {
+ ret = cft->trigger(css, (unsigned int)cft->private);
} else {
- s64 val = simple_strtoll(strstrip(buffer), &end, 0);
- if (*end)
- return -EINVAL;
- retval = cft->write_s64(css, cft, val);
+ ret = -EINVAL;
}
- if (!retval)
- retval = nbytes;
- return retval;
+out_free:
+ kfree(buf);
+ return ret ?: nbytes;
}
-static ssize_t cgroup_write_string(struct cgroup_subsys_state *css,
- struct cftype *cft, struct file *file,
- const char __user *userbuf, size_t nbytes,
- loff_t *unused_ppos)
+/*
+ * seqfile ops/methods for returning structured data. Currently just
+ * supports string->u64 maps, but can be extended in future.
+ */
+
+static void *cgroup_seqfile_start(struct seq_file *seq, loff_t *ppos)
{
- char local_buffer[CGROUP_LOCAL_BUFFER_SIZE];
- int retval = 0;
- size_t max_bytes = cft->max_write_len;
- char *buffer = local_buffer;
+ struct cftype *cft = seq_cft(seq);
- if (!max_bytes)
- max_bytes = sizeof(local_buffer) - 1;
- if (nbytes >= max_bytes)
- return -E2BIG;
- /* Allocate a dynamic buffer if we need one */
- if (nbytes >= sizeof(local_buffer)) {
- buffer = kmalloc(nbytes + 1, GFP_KERNEL);
- if (buffer == NULL)
- return -ENOMEM;
- }
- if (nbytes && copy_from_user(buffer, userbuf, nbytes)) {
- retval = -EFAULT;
- goto out;
+ if (cft->seq_start) {
+ return cft->seq_start(seq, ppos);
+ } else {
+ /*
+ * The same behavior and code as single_open(). Returns
+ * !NULL if pos is at the beginning; otherwise, NULL.
+ */
+ return NULL + !*ppos;
}
-
- buffer[nbytes] = 0; /* nul-terminate */
- retval = cft->write_string(css, cft, strstrip(buffer));
- if (!retval)
- retval = nbytes;
-out:
- if (buffer != local_buffer)
- kfree(buffer);
- return retval;
}
-static ssize_t cgroup_file_write(struct file *file, const char __user *buf,
- size_t nbytes, loff_t *ppos)
+static void *cgroup_seqfile_next(struct seq_file *seq, void *v, loff_t *ppos)
{
- struct cfent *cfe = __d_cfe(file->f_dentry);
- struct cftype *cft = __d_cft(file->f_dentry);
- struct cgroup_subsys_state *css = cfe->css;
+ struct cftype *cft = seq_cft(seq);
- if (cft->write)
- return cft->write(css, cft, file, buf, nbytes, ppos);
- if (cft->write_u64 || cft->write_s64)
- return cgroup_write_X64(css, cft, file, buf, nbytes, ppos);
- if (cft->write_string)
- return cgroup_write_string(css, cft, file, buf, nbytes, ppos);
- if (cft->trigger) {
- int ret = cft->trigger(css, (unsigned int)cft->private);
- return ret ? ret : nbytes;
+ if (cft->seq_next) {
+ return cft->seq_next(seq, v, ppos);
+ } else {
+ /*
+ * The same behavior and code as single_open(), always
+ * terminate after the initial read.
+ */
+ ++*ppos;
+ return NULL;
}
- return -EINVAL;
}
-static ssize_t cgroup_read_u64(struct cgroup_subsys_state *css,
- struct cftype *cft, struct file *file,
- char __user *buf, size_t nbytes, loff_t *ppos)
+static void cgroup_seqfile_stop(struct seq_file *seq, void *v)
{
- char tmp[CGROUP_LOCAL_BUFFER_SIZE];
- u64 val = cft->read_u64(css, cft);
- int len = sprintf(tmp, "%llu\n", (unsigned long long) val);
+ struct cftype *cft = seq_cft(seq);
- return simple_read_from_buffer(buf, nbytes, ppos, tmp, len);
+ if (cft->seq_stop)
+ cft->seq_stop(seq, v);
}
-static ssize_t cgroup_read_s64(struct cgroup_subsys_state *css,
- struct cftype *cft, struct file *file,
- char __user *buf, size_t nbytes, loff_t *ppos)
+static int cgroup_seqfile_show(struct seq_file *m, void *arg)
{
- char tmp[CGROUP_LOCAL_BUFFER_SIZE];
- s64 val = cft->read_s64(css, cft);
- int len = sprintf(tmp, "%lld\n", (long long) val);
-
- return simple_read_from_buffer(buf, nbytes, ppos, tmp, len);
-}
+ struct cftype *cft = seq_cft(m);
+ struct cgroup_subsys_state *css = seq_css(m);
-static ssize_t cgroup_file_read(struct file *file, char __user *buf,
- size_t nbytes, loff_t *ppos)
-{
- struct cfent *cfe = __d_cfe(file->f_dentry);
- struct cftype *cft = __d_cft(file->f_dentry);
- struct cgroup_subsys_state *css = cfe->css;
+ if (cft->seq_show)
+ return cft->seq_show(m, arg);
- if (cft->read)
- return cft->read(css, cft, file, buf, nbytes, ppos);
if (cft->read_u64)
- return cgroup_read_u64(css, cft, file, buf, nbytes, ppos);
- if (cft->read_s64)
- return cgroup_read_s64(css, cft, file, buf, nbytes, ppos);
- return -EINVAL;
-}
-
-/*
- * seqfile ops/methods for returning structured data. Currently just
- * supports string->u64 maps, but can be extended in future.
- */
-
-static int cgroup_map_add(struct cgroup_map_cb *cb, const char *key, u64 value)
-{
- struct seq_file *sf = cb->state;
- return seq_printf(sf, "%s %llu\n", key, (unsigned long long)value);
-}
-
-static int cgroup_seqfile_show(struct seq_file *m, void *arg)
-{
- struct cfent *cfe = m->private;
- struct cftype *cft = cfe->type;
- struct cgroup_subsys_state *css = cfe->css;
-
- if (cft->read_map) {
- struct cgroup_map_cb cb = {
- .fill = cgroup_map_add,
- .state = m,
- };
- return cft->read_map(css, cft, &cb);
- }
- return cft->read_seq_string(css, cft, m);
+ seq_printf(m, "%llu\n", cft->read_u64(css, cft));
+ else if (cft->read_s64)
+ seq_printf(m, "%lld\n", cft->read_s64(css, cft));
+ else
+ return -EINVAL;
+ return 0;
}
-static const struct file_operations cgroup_seqfile_operations = {
- .read = seq_read,
- .write = cgroup_file_write,
- .llseek = seq_lseek,
- .release = cgroup_file_release,
+static struct seq_operations cgroup_seq_operations = {
+ .start = cgroup_seqfile_start,
+ .next = cgroup_seqfile_next,
+ .stop = cgroup_seqfile_stop,
+ .show = cgroup_seqfile_show,
};
static int cgroup_file_open(struct inode *inode, struct file *file)
struct cftype *cft = __d_cft(file->f_dentry);
struct cgroup *cgrp = __d_cgrp(cfe->dentry->d_parent);
struct cgroup_subsys_state *css;
+ struct cgroup_open_file *of;
int err;
err = generic_file_open(inode, file);
WARN_ON_ONCE(cfe->css && cfe->css != css);
cfe->css = css;
- if (cft->read_map || cft->read_seq_string) {
- file->f_op = &cgroup_seqfile_operations;
- err = single_open(file, cgroup_seqfile_show, cfe);
- } else if (cft->open) {
- err = cft->open(inode, file);
+ of = __seq_open_private(file, &cgroup_seq_operations,
+ sizeof(struct cgroup_open_file));
+ if (of) {
+ of->cfe = cfe;
+ return 0;
}
- if (css->ss && err)
+ if (css->ss)
css_put(css);
- return err;
+ return -ENOMEM;
}
static int cgroup_file_release(struct inode *inode, struct file *file)
if (css->ss)
css_put(css);
- if (file->f_op == &cgroup_seqfile_operations)
- single_release(inode, file);
- return 0;
+ return seq_release_private(inode, file);
}
/*
}
static const struct file_operations cgroup_file_operations = {
- .read = cgroup_file_read,
+ .read = seq_read,
.write = cgroup_file_write,
.llseek = generic_file_llseek,
.open = cgroup_file_open,
if (cft->mode)
return cft->mode;
- if (cft->read || cft->read_u64 || cft->read_s64 ||
- cft->read_map || cft->read_seq_string)
+ if (cft->read_u64 || cft->read_s64 || cft->seq_show)
mode |= S_IRUGO;
- if (cft->write || cft->write_u64 || cft->write_s64 ||
- cft->write_string || cft->trigger)
+ if (cft->write_u64 || cft->write_s64 || cft->write_string ||
+ cft->trigger)
mode |= S_IWUSR;
return mode;
* @parent_css: css whose children to walk
*
* This function returns the next child of @parent_css and should be called
- * under RCU read lock. The only requirement is that @parent_css and
- * @pos_css are accessible. The next sibling is guaranteed to be returned
- * regardless of their states.
+ * under either cgroup_mutex or RCU read lock. The only requirement is
+ * that @parent_css and @pos_css are accessible. The next sibling is
+ * guaranteed to be returned regardless of their states.
*/
struct cgroup_subsys_state *
css_next_child(struct cgroup_subsys_state *pos_css,
struct cgroup *cgrp = parent_css->cgroup;
struct cgroup *next;
- WARN_ON_ONCE(!rcu_read_lock_held());
+ cgroup_assert_mutex_or_rcu_locked();
/*
* @pos could already have been removed. Once a cgroup is removed,
* to visit for pre-order traversal of @root's descendants. @root is
* included in the iteration and the first node to be visited.
*
- * While this function requires RCU read locking, it doesn't require the
- * whole traversal to be contained in a single RCU critical section. This
- * function will return the correct next descendant as long as both @pos
- * and @root are accessible and @pos is a descendant of @root.
+ * While this function requires cgroup_mutex or RCU read locking, it
+ * doesn't require the whole traversal to be contained in a single critical
+ * section. This function will return the correct next descendant as long
+ * as both @pos and @root are accessible and @pos is a descendant of @root.
*/
struct cgroup_subsys_state *
css_next_descendant_pre(struct cgroup_subsys_state *pos,
{
struct cgroup_subsys_state *next;
- WARN_ON_ONCE(!rcu_read_lock_held());
+ cgroup_assert_mutex_or_rcu_locked();
/* if first iteration, visit @root */
if (!pos)
* is returned. This can be used during pre-order traversal to skip
* subtree of @pos.
*
- * While this function requires RCU read locking, it doesn't require the
- * whole traversal to be contained in a single RCU critical section. This
- * function will return the correct rightmost descendant as long as @pos is
- * accessible.
+ * While this function requires cgroup_mutex or RCU read locking, it
+ * doesn't require the whole traversal to be contained in a single critical
+ * section. This function will return the correct rightmost descendant as
+ * long as @pos is accessible.
*/
struct cgroup_subsys_state *
css_rightmost_descendant(struct cgroup_subsys_state *pos)
{
struct cgroup_subsys_state *last, *tmp;
- WARN_ON_ONCE(!rcu_read_lock_held());
+ cgroup_assert_mutex_or_rcu_locked();
do {
last = pos;
* to visit for post-order traversal of @root's descendants. @root is
* included in the iteration and the last node to be visited.
*
- * While this function requires RCU read locking, it doesn't require the
- * whole traversal to be contained in a single RCU critical section. This
- * function will return the correct next descendant as long as both @pos
- * and @cgroup are accessible and @pos is a descendant of @cgroup.
+ * While this function requires cgroup_mutex or RCU read locking, it
+ * doesn't require the whole traversal to be contained in a single critical
+ * section. This function will return the correct next descendant as long
+ * as both @pos and @cgroup are accessible and @pos is a descendant of
+ * @cgroup.
*/
struct cgroup_subsys_state *
css_next_descendant_post(struct cgroup_subsys_state *pos,
{
struct cgroup_subsys_state *next;
- WARN_ON_ONCE(!rcu_read_lock_held());
+ cgroup_assert_mutex_or_rcu_locked();
/* if first iteration, visit leftmost descendant which may be @root */
if (!pos)
struct delayed_work destroy_dwork;
};
-/* seq_file->private points to the following */
-struct cgroup_pidlist_open_file {
- enum cgroup_filetype type;
- struct cgroup *cgrp;
- struct cgroup_pidlist *pidlist;
-};
-
/*
* The following two functions "fix" the issue where there are more pids
* than kmalloc will give memory for; in such cases, we use vmalloc/vfree.
* after a seek to the start). Use a binary-search to find the
* next pid to display, if any
*/
- struct cgroup_pidlist_open_file *of = s->private;
- struct cgroup *cgrp = of->cgrp;
+ struct cgroup_open_file *of = s->private;
+ struct cgroup *cgrp = seq_css(s)->cgroup;
struct cgroup_pidlist *l;
+ enum cgroup_filetype type = seq_cft(s)->private;
int index = 0, pid = *pos;
int *iter, ret;
mutex_lock(&cgrp->pidlist_mutex);
/*
- * !NULL @of->pidlist indicates that this isn't the first start()
+ * !NULL @of->priv indicates that this isn't the first start()
* after open. If the matching pidlist is around, we can use that.
- * Look for it. Note that @of->pidlist can't be used directly. It
+ * Look for it. Note that @of->priv can't be used directly. It
* could already have been destroyed.
*/
- if (of->pidlist)
- of->pidlist = cgroup_pidlist_find(cgrp, of->type);
+ if (of->priv)
+ of->priv = cgroup_pidlist_find(cgrp, type);
/*
* Either this is the first start() after open or the matching
* pidlist has been destroyed inbetween. Create a new one.
*/
- if (!of->pidlist) {
- ret = pidlist_array_load(of->cgrp, of->type, &of->pidlist);
+ if (!of->priv) {
+ ret = pidlist_array_load(cgrp, type,
+ (struct cgroup_pidlist **)&of->priv);
if (ret)
return ERR_PTR(ret);
}
- l = of->pidlist;
+ l = of->priv;
if (pid) {
int end = l->length;
static void cgroup_pidlist_stop(struct seq_file *s, void *v)
{
- struct cgroup_pidlist_open_file *of = s->private;
+ struct cgroup_open_file *of = s->private;
+ struct cgroup_pidlist *l = of->priv;
- if (of->pidlist)
- mod_delayed_work(cgroup_pidlist_destroy_wq,
- &of->pidlist->destroy_dwork,
+ if (l)
+ mod_delayed_work(cgroup_pidlist_destroy_wq, &l->destroy_dwork,
CGROUP_PIDLIST_DESTROY_DELAY);
- mutex_unlock(&of->cgrp->pidlist_mutex);
+ mutex_unlock(&seq_css(s)->cgroup->pidlist_mutex);
}
static void *cgroup_pidlist_next(struct seq_file *s, void *v, loff_t *pos)
{
- struct cgroup_pidlist_open_file *of = s->private;
- struct cgroup_pidlist *l = of->pidlist;
+ struct cgroup_open_file *of = s->private;
+ struct cgroup_pidlist *l = of->priv;
pid_t *p = v;
pid_t *end = l->list + l->length;
/*
if (p >= end) {
return NULL;
} else {
- *pos = cgroup_pid_fry(of->cgrp, *p);
+ *pos = cgroup_pid_fry(seq_css(s)->cgroup, *p);
return p;
}
}
.show = cgroup_pidlist_show,
};
-static const struct file_operations cgroup_pidlist_operations = {
- .read = seq_read,
- .llseek = seq_lseek,
- .write = cgroup_file_write,
- .release = seq_release_private,
-};
-
-/*
- * The following functions handle opens on a file that displays a pidlist
- * (tasks or procs). Prepare an array of the process/thread IDs of whoever's
- * in the cgroup.
- */
-/* helper function for the two below it */
-static int cgroup_pidlist_open(struct file *file, enum cgroup_filetype type)
-{
- struct cgroup *cgrp = __d_cgrp(file->f_dentry->d_parent);
- struct cgroup_pidlist_open_file *of;
-
- /* configure file information */
- file->f_op = &cgroup_pidlist_operations;
-
- of = __seq_open_private(file, &cgroup_pidlist_seq_operations,
- sizeof(*of));
- if (!of)
- return -ENOMEM;
-
- of->type = type;
- of->cgrp = cgrp;
- return 0;
-}
-static int cgroup_tasks_open(struct inode *unused, struct file *file)
-{
- return cgroup_pidlist_open(file, CGROUP_FILE_TASKS);
-}
-static int cgroup_procs_open(struct inode *unused, struct file *file)
-{
- return cgroup_pidlist_open(file, CGROUP_FILE_PROCS);
-}
-
static u64 cgroup_read_notify_on_release(struct cgroup_subsys_state *css,
struct cftype *cft)
{
static struct cftype cgroup_base_files[] = {
{
.name = "cgroup.procs",
- .open = cgroup_procs_open,
+ .seq_start = cgroup_pidlist_start,
+ .seq_next = cgroup_pidlist_next,
+ .seq_stop = cgroup_pidlist_stop,
+ .seq_show = cgroup_pidlist_show,
+ .private = CGROUP_FILE_PROCS,
.write_u64 = cgroup_procs_write,
.mode = S_IRUGO | S_IWUSR,
},
{
.name = "cgroup.sane_behavior",
.flags = CFTYPE_ONLY_ON_ROOT,
- .read_seq_string = cgroup_sane_behavior_show,
+ .seq_show = cgroup_sane_behavior_show,
},
/*
{
.name = "tasks",
.flags = CFTYPE_INSANE, /* use "procs" instead */
- .open = cgroup_tasks_open,
+ .seq_start = cgroup_pidlist_start,
+ .seq_next = cgroup_pidlist_next,
+ .seq_stop = cgroup_pidlist_stop,
+ .seq_show = cgroup_pidlist_show,
+ .private = CGROUP_FILE_TASKS,
.write_u64 = cgroup_tasks_write,
.mode = S_IRUGO | S_IWUSR,
},
{
.name = "release_agent",
.flags = CFTYPE_INSANE | CFTYPE_ONLY_ON_ROOT,
- .read_seq_string = cgroup_release_agent_show,
+ .seq_show = cgroup_release_agent_show,
.write_string = cgroup_release_agent_write,
.max_write_len = PATH_MAX,
},
RCU_INIT_POINTER(css->cgroup->subsys[ss->subsys_id], css);
}
+/**
+ * create_css - create a cgroup_subsys_state
+ * @cgrp: the cgroup new css will be associated with
+ * @ss: the subsys of new css
+ *
+ * Create a new css associated with @cgrp - @ss pair. On success, the new
+ * css is online and installed in @cgrp with all interface files created.
+ * Returns 0 on success, -errno on failure.
+ */
+static int create_css(struct cgroup *cgrp, struct cgroup_subsys *ss)
+{
+ struct cgroup *parent = cgrp->parent;
+ struct cgroup_subsys_state *css;
+ int err;
+
+ lockdep_assert_held(&cgrp->dentry->d_inode->i_mutex);
+ lockdep_assert_held(&cgroup_mutex);
+
+ css = ss->css_alloc(cgroup_css(parent, ss));
+ if (IS_ERR(css))
+ return PTR_ERR(css);
+
+ err = percpu_ref_init(&css->refcnt, css_release);
+ if (err)
+ goto err_free;
+
+ init_css(css, ss, cgrp);
+
+ err = cgroup_populate_dir(cgrp, 1 << ss->subsys_id);
+ if (err)
+ goto err_free;
+
+ err = online_css(css);
+ if (err)
+ goto err_free;
+
+ dget(cgrp->dentry);
+ css_get(css->parent);
+
+ if (ss->broken_hierarchy && !ss->warned_broken_hierarchy &&
+ parent->parent) {
+ pr_warning("cgroup: %s (%d) created nested cgroup for controller \"%s\" which has incomplete hierarchy support. Nested cgroups may change behavior in the future.\n",
+ current->comm, current->pid, ss->name);
+ if (!strcmp(ss->name, "memory"))
+ pr_warning("cgroup: \"memory\" requires setting use_hierarchy to 1 on the root.\n");
+ ss->warned_broken_hierarchy = true;
+ }
+
+ return 0;
+
+err_free:
+ percpu_ref_cancel_init(&css->refcnt);
+ ss->css_free(css);
+ return err;
+}
+
/*
* cgroup_create - create a cgroup
* @parent: cgroup that will be parent of the new cgroup
static long cgroup_create(struct cgroup *parent, struct dentry *dentry,
umode_t mode)
{
- struct cgroup_subsys_state *css_ar[CGROUP_SUBSYS_COUNT] = { };
struct cgroup *cgrp;
struct cgroup_name *name;
struct cgroupfs_root *root = parent->root;
- int err = 0;
+ int ssid, err = 0;
struct cgroup_subsys *ss;
struct super_block *sb = root->sb;
if (test_bit(CGRP_CPUSET_CLONE_CHILDREN, &parent->flags))
set_bit(CGRP_CPUSET_CLONE_CHILDREN, &cgrp->flags);
- for_each_root_subsys(root, ss) {
- struct cgroup_subsys_state *css;
-
- css = ss->css_alloc(cgroup_css(parent, ss));
- if (IS_ERR(css)) {
- err = PTR_ERR(css);
- goto err_free_all;
- }
- css_ar[ss->subsys_id] = css;
-
- err = percpu_ref_init(&css->refcnt, css_release);
- if (err)
- goto err_free_all;
-
- init_css(css, ss, cgrp);
- }
-
/*
* Create directory. cgroup_create_file() returns with the new
* directory locked on success so that it can be populated without
*/
err = cgroup_create_file(dentry, S_IFDIR | mode, sb);
if (err < 0)
- goto err_free_all;
+ goto err_unlock;
lockdep_assert_held(&dentry->d_inode->i_mutex);
cgrp->serial_nr = cgroup_serial_nr_next++;
list_add_tail_rcu(&cgrp->sibling, &cgrp->parent->children);
root->number_of_cgroups++;
- /* each css holds a ref to the cgroup's dentry and the parent css */
- for_each_root_subsys(root, ss) {
- struct cgroup_subsys_state *css = css_ar[ss->subsys_id];
-
- dget(dentry);
- css_get(css->parent);
- }
-
/* hold a ref to the parent's dentry */
dget(parent->dentry);
- /* creation succeeded, notify subsystems */
- for_each_root_subsys(root, ss) {
- struct cgroup_subsys_state *css = css_ar[ss->subsys_id];
-
- err = online_css(css);
- if (err)
- goto err_destroy;
-
- if (ss->broken_hierarchy && !ss->warned_broken_hierarchy &&
- parent->parent) {
- pr_warning("cgroup: %s (%d) created nested cgroup for controller \"%s\" which has incomplete hierarchy support. Nested cgroups may change behavior in the future.\n",
- current->comm, current->pid, ss->name);
- if (!strcmp(ss->name, "memory"))
- pr_warning("cgroup: \"memory\" requires setting use_hierarchy to 1 on the root.\n");
- ss->warned_broken_hierarchy = true;
- }
- }
-
+ /*
+ * @cgrp is now fully operational. If something fails after this
+ * point, it'll be released via the normal destruction path.
+ */
idr_replace(&root->cgroup_idr, cgrp, cgrp->id);
err = cgroup_addrm_files(cgrp, cgroup_base_files, true);
if (err)
goto err_destroy;
- err = cgroup_populate_dir(cgrp, root->subsys_mask);
- if (err)
- goto err_destroy;
+ /* let's create and online css's */
+ for_each_subsys(ss, ssid) {
+ if (root->subsys_mask & (1 << ssid)) {
+ err = create_css(cgrp, ss);
+ if (err)
+ goto err_destroy;
+ }
+ }
mutex_unlock(&cgroup_mutex);
mutex_unlock(&cgrp->dentry->d_inode->i_mutex);
return 0;
-err_free_all:
- for_each_root_subsys(root, ss) {
- struct cgroup_subsys_state *css = css_ar[ss->subsys_id];
-
- if (css) {
- percpu_ref_cancel_init(&css->refcnt);
- ss->css_free(css);
- }
- }
+err_unlock:
mutex_unlock(&cgroup_mutex);
/* Release the reference count that we took on the superblock */
deactivate_super(sb);
__releases(&cgroup_mutex) __acquires(&cgroup_mutex)
{
struct dentry *d = cgrp->dentry;
- struct cgroup_subsys *ss;
+ struct cgroup_subsys_state *css;
struct cgroup *child;
bool empty;
+ int ssid;
lockdep_assert_held(&d->d_inode->i_mutex);
lockdep_assert_held(&cgroup_mutex);
* will be invoked to perform the rest of destruction once the
* percpu refs of all css's are confirmed to be killed.
*/
- for_each_root_subsys(cgrp->root, ss)
- kill_css(cgroup_css(cgrp, ss));
+ for_each_css(css, ssid, cgrp)
+ kill_css(css);
/*
* Mark @cgrp dead. This prevents further task migration and child
cgroup_init_cftsets(ss);
/* Create the top cgroup state for this subsystem */
- list_add(&ss->sibling, &cgroup_dummy_root.subsys_list);
ss->root = &cgroup_dummy_root;
css = ss->css_alloc(cgroup_css(cgroup_dummy_top, ss));
/* We don't handle early failures gracefully */
cgroup_init_cftsets(ss);
mutex_lock(&cgroup_mutex);
+ mutex_lock(&cgroup_root_mutex);
cgroup_subsys[ss->subsys_id] = ss;
/*
if (IS_ERR(css)) {
/* failure case - need to deassign the cgroup_subsys[] slot. */
cgroup_subsys[ss->subsys_id] = NULL;
+ mutex_unlock(&cgroup_root_mutex);
mutex_unlock(&cgroup_mutex);
return PTR_ERR(css);
}
- list_add(&ss->sibling, &cgroup_dummy_root.subsys_list);
ss->root = &cgroup_dummy_root;
/* our new subsystem will be attached to the dummy hierarchy. */
write_unlock(&css_set_lock);
ret = online_css(css);
- if (ret)
+ if (ret) {
+ ss->css_free(css);
goto err_unload;
+ }
/* success! */
+ mutex_unlock(&cgroup_root_mutex);
mutex_unlock(&cgroup_mutex);
return 0;
err_unload:
+ mutex_unlock(&cgroup_root_mutex);
mutex_unlock(&cgroup_mutex);
/* @ss can't be mounted here as try_module_get() would fail */
cgroup_unload_subsys(ss);
void cgroup_unload_subsys(struct cgroup_subsys *ss)
{
struct cgrp_cset_link *link;
+ struct cgroup_subsys_state *css;
BUG_ON(ss->module == NULL);
BUG_ON(ss->root != &cgroup_dummy_root);
mutex_lock(&cgroup_mutex);
+ mutex_lock(&cgroup_root_mutex);
- offline_css(cgroup_css(cgroup_dummy_top, ss));
+ css = cgroup_css(cgroup_dummy_top, ss);
+ if (css)
+ offline_css(css);
/* deassign the subsys_id */
cgroup_subsys[ss->subsys_id] = NULL;
- /* remove subsystem from the dummy root's list of subsystems */
- list_del_init(&ss->sibling);
-
/*
* disentangle the css from all css_sets attached to the dummy
* top. as in loading, we need to pay our respects to the hashtable
* need to free before marking as null because ss->css_free needs
* the cgrp->subsys pointer to find their state.
*/
- ss->css_free(cgroup_css(cgroup_dummy_top, ss));
+ if (css)
+ ss->css_free(css);
RCU_INIT_POINTER(cgroup_dummy_top->subsys[ss->subsys_id], NULL);
+ mutex_unlock(&cgroup_root_mutex);
mutex_unlock(&cgroup_mutex);
}
EXPORT_SYMBOL_GPL(cgroup_unload_subsys);
for_each_active_root(root) {
struct cgroup_subsys *ss;
struct cgroup *cgrp;
- int count = 0;
+ int ssid, count = 0;
seq_printf(m, "%d:", root->hierarchy_id);
- for_each_root_subsys(root, ss)
- seq_printf(m, "%s%s", count++ ? "," : "", ss->name);
+ for_each_subsys(ss, ssid)
+ if (root->subsys_mask & (1 << ssid))
+ seq_printf(m, "%s%s", count++ ? "," : "", ss->name);
if (strlen(root->name))
seq_printf(m, "%sname=%s", count ? "," : "",
root->name);
* @dentry: directory dentry of interest
* @ss: subsystem of interest
*
- * Must be called under RCU read lock. The caller is responsible for
- * pinning the returned css if it needs to be accessed outside the RCU
- * critical section.
+ * Must be called under cgroup_mutex or RCU read lock. The caller is
+ * responsible for pinning the returned css if it needs to be accessed
+ * outside the critical section.
*/
struct cgroup_subsys_state *css_from_dir(struct dentry *dentry,
struct cgroup_subsys *ss)
{
struct cgroup *cgrp;
- WARN_ON_ONCE(!rcu_read_lock_held());
+ cgroup_assert_mutex_or_rcu_locked();
/* is @dentry a cgroup dir? */
if (!dentry->d_inode ||
{
struct cgroup *cgrp;
- rcu_lockdep_assert(rcu_read_lock_held() ||
- lockdep_is_held(&cgroup_mutex),
- "css_from_id() needs proper protection");
+ cgroup_assert_mutex_or_rcu_locked();
cgrp = idr_find(&ss->root->cgroup_idr, id);
if (cgrp)
return count;
}
-static int current_css_set_cg_links_read(struct cgroup_subsys_state *css,
- struct cftype *cft,
- struct seq_file *seq)
+static int current_css_set_cg_links_read(struct seq_file *seq, void *v)
{
struct cgrp_cset_link *link;
struct css_set *cset;
}
#define MAX_TASKS_SHOWN_PER_CSS 25
-static int cgroup_css_links_read(struct cgroup_subsys_state *css,
- struct cftype *cft, struct seq_file *seq)
+static int cgroup_css_links_read(struct seq_file *seq, void *v)
{
+ struct cgroup_subsys_state *css = seq_css(seq);
struct cgrp_cset_link *link;
read_lock(&css_set_lock);
{
.name = "current_css_set_cg_links",
- .read_seq_string = current_css_set_cg_links_read,
+ .seq_show = current_css_set_cg_links_read,
},
{
.name = "cgroup_css_links",
- .read_seq_string = cgroup_css_links_read,
+ .seq_show = cgroup_css_links_read,
},
{