memcg: fix possible use-after-free in memcg_write_event_control()
authorTejun Heo <tj@kernel.org>
Thu, 8 Dec 2022 02:53:15 +0000 (16:53 -1000)
committerAndrew Morton <akpm@linux-foundation.org>
Sat, 10 Dec 2022 02:41:17 +0000 (18:41 -0800)
memcg_write_event_control() accesses the dentry->d_name of the specified
control fd to route the write call.  As a cgroup interface file can't be
renamed, it's safe to access d_name as long as the specified file is a
regular cgroup file.  Also, as these cgroup interface files can't be
removed before the directory, it's safe to access the parent too.

Prior to 347c4a874710 ("memcg: remove cgroup_event->cft"), there was a
call to __file_cft() which verified that the specified file is a regular
cgroupfs file before further accesses.  The cftype pointer returned from
__file_cft() was no longer necessary and the commit inadvertently dropped
the file type check with it allowing any file to slip through.  With the
invarients broken, the d_name and parent accesses can now race against
renames and removals of arbitrary files and cause use-after-free's.

Fix the bug by resurrecting the file type check in __file_cft().  Now that
cgroupfs is implemented through kernfs, checking the file operations needs
to go through a layer of indirection.  Instead, let's check the superblock
and dentry type.

Link: https://lkml.kernel.org/r/Y5FRm/cfcKPGzWwl@slm.duckdns.org
Fixes: 347c4a874710 ("memcg: remove cgroup_event->cft")
Signed-off-by: Tejun Heo <tj@kernel.org>
Reported-by: Jann Horn <jannh@google.com>
Acked-by: Roman Gushchin <roman.gushchin@linux.dev>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: Muchun Song <songmuchun@bytedance.com>
Cc: Shakeel Butt <shakeelb@google.com>
Cc: <stable@vger.kernel.org> [3.14+]
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
include/linux/cgroup.h
kernel/cgroup/cgroup-internal.h
mm/memcontrol.c

index 528bd44..2b7d077 100644 (file)
@@ -68,6 +68,7 @@ struct css_task_iter {
        struct list_head                iters_node;     /* css_set->task_iters */
 };
 
+extern struct file_system_type cgroup_fs_type;
 extern struct cgroup_root cgrp_dfl_root;
 extern struct css_set init_css_set;
 
index fd40208..367b0a4 100644 (file)
@@ -167,7 +167,6 @@ struct cgroup_mgctx {
 extern spinlock_t css_set_lock;
 extern struct cgroup_subsys *cgroup_subsys[];
 extern struct list_head cgroup_roots;
-extern struct file_system_type cgroup_fs_type;
 
 /* iterate across the hierarchies */
 #define for_each_root(root)                                            \
index a1a35c1..266a1ab 100644 (file)
@@ -4832,6 +4832,7 @@ static ssize_t memcg_write_event_control(struct kernfs_open_file *of,
        unsigned int efd, cfd;
        struct fd efile;
        struct fd cfile;
+       struct dentry *cdentry;
        const char *name;
        char *endp;
        int ret;
@@ -4886,6 +4887,16 @@ static ssize_t memcg_write_event_control(struct kernfs_open_file *of,
                goto out_put_cfile;
 
        /*
+        * The control file must be a regular cgroup1 file. As a regular cgroup
+        * file can't be renamed, it's safe to access its name afterwards.
+        */
+       cdentry = cfile.file->f_path.dentry;
+       if (cdentry->d_sb->s_type != &cgroup_fs_type || !d_is_reg(cdentry)) {
+               ret = -EINVAL;
+               goto out_put_cfile;
+       }
+
+       /*
         * Determine the event callbacks and set them in @event.  This used
         * to be done via struct cftype but cgroup core no longer knows
         * about these events.  The following is crude but the whole thing
@@ -4893,7 +4904,7 @@ static ssize_t memcg_write_event_control(struct kernfs_open_file *of,
         *
         * DO NOT ADD NEW FILES.
         */
-       name = cfile.file->f_path.dentry->d_name.name;
+       name = cdentry->d_name.name;
 
        if (!strcmp(name, "memory.usage_in_bytes")) {
                event->register_event = mem_cgroup_usage_register_event;
@@ -4917,7 +4928,7 @@ static ssize_t memcg_write_event_control(struct kernfs_open_file *of,
         * automatically removed on cgroup destruction but the removal is
         * asynchronous, so take an extra ref on @css.
         */
-       cfile_css = css_tryget_online_from_dir(cfile.file->f_path.dentry->d_parent,
+       cfile_css = css_tryget_online_from_dir(cdentry->d_parent,
                                               &memory_cgrp_subsys);
        ret = -EINVAL;
        if (IS_ERR(cfile_css))