cgroup, memcg: move cgroup->event_list[_lock] and event callbacks into memcg
authorTejun Heo <tj@kernel.org>
Fri, 22 Nov 2013 23:20:43 +0000 (18:20 -0500)
committerTejun Heo <tj@kernel.org>
Fri, 22 Nov 2013 23:20:43 +0000 (18:20 -0500)
cgroup_event is being moved from cgroup core to memcg and the
implementation is already moved by the previous patch.  This patch
moves the data fields and callbacks.

* cgroup->event_list[_lock] are moved to mem_cgroup.

* cftype->[un]register_event() are moved to cgroup_event.  This makes
  it impossible for individual cftype definitions to specify their
  event callbacks.  This is worked around by simply hard-coding
  filename to event callback mapping in cgroup_write_event_control().
  This is awkward and inflexible, which is actually desirable given
  that we don't want to grow more usages of this feature.

* eventfd_ctx declaration is removed from cgroup.h, which makes
  vmpressure.h miss eventfd_ctx declaration.  Include eventfd.h from
  vmpressure.h.

v2: Use file name from dentry instead of cftype.  This will allow
    removing all cftype handling in the function.

Signed-off-by: Tejun Heo <tj@kernel.org>
Acked-by: Li Zefan <lizefan@huawei.com>
Acked-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Acked-by: Michal Hocko <mhocko@suse.cz>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Balbir Singh <bsingharora@gmail.com>
include/linux/cgroup.h
include/linux/vmpressure.h
kernel/cgroup.c
mm/memcontrol.c

index 40c2427..612adc5 100644 (file)
@@ -29,7 +29,6 @@ struct cgroup_subsys;
 struct inode;
 struct cgroup;
 struct css_id;
-struct eventfd_ctx;
 
 extern int cgroup_init_early(void);
 extern int cgroup_init(void);
@@ -239,10 +238,6 @@ struct cgroup {
        struct rcu_head rcu_head;
        struct work_struct destroy_work;
 
-       /* List of events which userspace want to receive */
-       struct list_head event_list;
-       spinlock_t event_list_lock;
-
        /* directory xattrs */
        struct simple_xattrs xattrs;
 };
@@ -506,25 +501,6 @@ struct cftype {
        int (*trigger)(struct cgroup_subsys_state *css, unsigned int event);
 
        int (*release)(struct inode *inode, struct file *file);
-
-       /*
-        * register_event() callback will be used to add new userspace
-        * waiter for changes related to the cftype. Implement it if
-        * you want to provide this functionality. Use eventfd_signal()
-        * on eventfd to send notification to userspace.
-        */
-       int (*register_event)(struct cgroup_subsys_state *css,
-                             struct cftype *cft, struct eventfd_ctx *eventfd,
-                             const char *args);
-       /*
-        * unregister_event() callback will be called when userspace
-        * closes the eventfd or on cgroup removing.
-        * This callback must be implemented, if you want provide
-        * notification functionality.
-        */
-       void (*unregister_event)(struct cgroup_subsys_state *css,
-                                struct cftype *cft,
-                                struct eventfd_ctx *eventfd);
 };
 
 /*
index 3f3788d..9dd1914 100644 (file)
@@ -7,6 +7,7 @@
 #include <linux/gfp.h>
 #include <linux/types.h>
 #include <linux/cgroup.h>
+#include <linux/eventfd.h>
 
 struct vmpressure {
        unsigned long scanned;
index 4bccaa7..feda7c5 100644 (file)
@@ -1352,8 +1352,6 @@ static void init_cgroup_housekeeping(struct cgroup *cgrp)
        INIT_LIST_HEAD(&cgrp->pidlists);
        mutex_init(&cgrp->pidlist_mutex);
        cgrp->dummy_css.cgroup = cgrp;
-       INIT_LIST_HEAD(&cgrp->event_list);
-       spin_lock_init(&cgrp->event_list_lock);
        simple_xattrs_init(&cgrp->xattrs);
 }
 
index d003681..2fcacb1 100644 (file)
@@ -249,6 +249,22 @@ struct cgroup_event {
         */
        struct list_head list;
        /*
+        * register_event() callback will be used to add new userspace
+        * waiter for changes related to this event.  Use eventfd_signal()
+        * on eventfd to send notification to userspace.
+        */
+       int (*register_event)(struct cgroup_subsys_state *css,
+                             struct cftype *cft, struct eventfd_ctx *eventfd,
+                             const char *args);
+       /*
+        * unregister_event() callback will be called when userspace closes
+        * the eventfd or on cgroup removing.  This callback must be set,
+        * if you want provide notification functionality.
+        */
+       void (*unregister_event)(struct cgroup_subsys_state *css,
+                                struct cftype *cft,
+                                struct eventfd_ctx *eventfd);
+       /*
         * All fields below needed to unregister event when
         * userspace closes eventfd.
         */
@@ -362,6 +378,10 @@ struct mem_cgroup {
        atomic_t        numainfo_updating;
 #endif
 
+       /* List of events which userspace want to receive */
+       struct list_head event_list;
+       spinlock_t event_list_lock;
+
        struct mem_cgroup_per_node *nodeinfo[0];
        /* WARNING: nodeinfo must be the last member here */
 };
@@ -5992,7 +6012,7 @@ static void cgroup_event_remove(struct work_struct *work)
 
        remove_wait_queue(event->wqh, &event->wait);
 
-       event->cft->unregister_event(css, event->cft, event->eventfd);
+       event->unregister_event(css, event->cft, event->eventfd);
 
        /* Notify userspace the event is going away. */
        eventfd_signal(event->eventfd, 1);
@@ -6012,7 +6032,7 @@ static int cgroup_event_wake(wait_queue_t *wait, unsigned mode,
 {
        struct cgroup_event *event = container_of(wait,
                        struct cgroup_event, wait);
-       struct cgroup *cgrp = event->css->cgroup;
+       struct mem_cgroup *memcg = mem_cgroup_from_css(event->css);
        unsigned long flags = (unsigned long)key;
 
        if (flags & POLLHUP) {
@@ -6025,7 +6045,7 @@ static int cgroup_event_wake(wait_queue_t *wait, unsigned mode,
                 * side will require wqh->lock via remove_wait_queue(),
                 * which we hold.
                 */
-               spin_lock(&cgrp->event_list_lock);
+               spin_lock(&memcg->event_list_lock);
                if (!list_empty(&event->list)) {
                        list_del_init(&event->list);
                        /*
@@ -6034,7 +6054,7 @@ static int cgroup_event_wake(wait_queue_t *wait, unsigned mode,
                         */
                        schedule_work(&event->remove);
                }
-               spin_unlock(&cgrp->event_list_lock);
+               spin_unlock(&memcg->event_list_lock);
        }
 
        return 0;
@@ -6059,12 +6079,13 @@ static void cgroup_event_ptable_queue_proc(struct file *file,
 static int cgroup_write_event_control(struct cgroup_subsys_state *css,
                                      struct cftype *cft, const char *buffer)
 {
-       struct cgroup *cgrp = css->cgroup;
+       struct mem_cgroup *memcg = mem_cgroup_from_css(css);
        struct cgroup_event *event;
        struct cgroup_subsys_state *cfile_css;
        unsigned int efd, cfd;
        struct fd efile;
        struct fd cfile;
+       const char *name;
        char *endp;
        int ret;
 
@@ -6119,6 +6140,31 @@ static int cgroup_write_event_control(struct cgroup_subsys_state *css,
        }
 
        /*
+        * Determine the event callbacks and set them in @event.  This used
+        * to be done via struct cftype but cgroup core no longer knows
+        * about these events.  The following is crude but the whole thing
+        * is for compatibility anyway.
+        */
+       name = cfile.file->f_dentry->d_name.name;
+
+       if (!strcmp(name, "memory.usage_in_bytes")) {
+               event->register_event = mem_cgroup_usage_register_event;
+               event->unregister_event = mem_cgroup_usage_unregister_event;
+       } else if (!strcmp(name, "memory.oom_control")) {
+               event->register_event = mem_cgroup_oom_register_event;
+               event->unregister_event = mem_cgroup_oom_unregister_event;
+       } else if (!strcmp(name, "memory.pressure_level")) {
+               event->register_event = vmpressure_register_event;
+               event->unregister_event = vmpressure_unregister_event;
+       } else if (!strcmp(name, "memory.memsw.usage_in_bytes")) {
+               event->register_event = mem_cgroup_usage_register_event;
+               event->unregister_event = mem_cgroup_usage_unregister_event;
+       } else {
+               ret = -EINVAL;
+               goto out_put_cfile;
+       }
+
+       /*
         * Verify @cfile should belong to @css.  Also, remaining events are
         * automatically removed on cgroup destruction but the removal is
         * asynchronous, so take an extra ref on @css.
@@ -6135,21 +6181,15 @@ static int cgroup_write_event_control(struct cgroup_subsys_state *css,
        if (ret)
                goto out_put_cfile;
 
-       if (!event->cft->register_event || !event->cft->unregister_event) {
-               ret = -EINVAL;
-               goto out_put_css;
-       }
-
-       ret = event->cft->register_event(css, event->cft,
-                       event->eventfd, buffer);
+       ret = event->register_event(css, event->cft, event->eventfd, buffer);
        if (ret)
                goto out_put_css;
 
        efile.file->f_op->poll(efile.file, &event->pt);
 
-       spin_lock(&cgrp->event_list_lock);
-       list_add(&event->list, &cgrp->event_list);
-       spin_unlock(&cgrp->event_list_lock);
+       spin_lock(&memcg->event_list_lock);
+       list_add(&event->list, &memcg->event_list);
+       spin_unlock(&memcg->event_list_lock);
 
        fdput(cfile);
        fdput(efile);
@@ -6175,8 +6215,6 @@ static struct cftype mem_cgroup_files[] = {
                .name = "usage_in_bytes",
                .private = MEMFILE_PRIVATE(_MEM, RES_USAGE),
                .read = mem_cgroup_read,
-               .register_event = mem_cgroup_usage_register_event,
-               .unregister_event = mem_cgroup_usage_unregister_event,
        },
        {
                .name = "max_usage_in_bytes",
@@ -6236,14 +6274,10 @@ static struct cftype mem_cgroup_files[] = {
                .name = "oom_control",
                .read_map = mem_cgroup_oom_control_read,
                .write_u64 = mem_cgroup_oom_control_write,
-               .register_event = mem_cgroup_oom_register_event,
-               .unregister_event = mem_cgroup_oom_unregister_event,
                .private = MEMFILE_PRIVATE(_OOM_TYPE, OOM_CONTROL),
        },
        {
                .name = "pressure_level",
-               .register_event = vmpressure_register_event,
-               .unregister_event = vmpressure_unregister_event,
        },
 #ifdef CONFIG_NUMA
        {
@@ -6291,8 +6325,6 @@ static struct cftype memsw_cgroup_files[] = {
                .name = "memsw.usage_in_bytes",
                .private = MEMFILE_PRIVATE(_MEMSWAP, RES_USAGE),
                .read = mem_cgroup_read,
-               .register_event = mem_cgroup_usage_register_event,
-               .unregister_event = mem_cgroup_usage_unregister_event,
        },
        {
                .name = "memsw.max_usage_in_bytes",
@@ -6483,6 +6515,8 @@ mem_cgroup_css_alloc(struct cgroup_subsys_state *parent_css)
        mutex_init(&memcg->thresholds_lock);
        spin_lock_init(&memcg->move_lock);
        vmpressure_init(&memcg->vmpressure);
+       INIT_LIST_HEAD(&memcg->event_list);
+       spin_lock_init(&memcg->event_list_lock);
 
        return &memcg->css;
 
@@ -6555,7 +6589,6 @@ static void mem_cgroup_invalidate_reclaim_iterators(struct mem_cgroup *memcg)
 static void mem_cgroup_css_offline(struct cgroup_subsys_state *css)
 {
        struct mem_cgroup *memcg = mem_cgroup_from_css(css);
-       struct cgroup *cgrp = css->cgroup;
        struct cgroup_event *event, *tmp;
 
        /*
@@ -6563,12 +6596,12 @@ static void mem_cgroup_css_offline(struct cgroup_subsys_state *css)
         * Notify userspace about cgroup removing only after rmdir of cgroup
         * directory to avoid race between userspace and kernelspace.
         */
-       spin_lock(&cgrp->event_list_lock);
-       list_for_each_entry_safe(event, tmp, &cgrp->event_list, list) {
+       spin_lock(&memcg->event_list_lock);
+       list_for_each_entry_safe(event, tmp, &memcg->event_list, list) {
                list_del_init(&event->list);
                schedule_work(&event->remove);
        }
-       spin_unlock(&cgrp->event_list_lock);
+       spin_unlock(&memcg->event_list_lock);
 
        kmem_cgroup_css_offline(memcg);