switch devtmpfs object creation/removal to separate kernel thread
authorAl Viro <viro@zeniv.linux.org.uk>
Mon, 27 Jun 2011 20:25:29 +0000 (16:25 -0400)
committerAl Viro <viro@zeniv.linux.org.uk>
Wed, 20 Jul 2011 05:44:09 +0000 (01:44 -0400)
... and give it a namespace where devtmpfs would be mounted on root,
thus avoiding abuses of vfs_path_lookup() (it was never intended to
be used with LOOKUP_PARENT).  Games with credentials are also gone.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
drivers/base/devtmpfs.c

index 82bbb59..1a16e1f 100644 (file)
 #include <linux/fs.h>
 #include <linux/shmem_fs.h>
 #include <linux/ramfs.h>
-#include <linux/cred.h>
 #include <linux/sched.h>
-#include <linux/init_task.h>
 #include <linux/slab.h>
+#include <linux/kthread.h>
 
-static struct vfsmount *dev_mnt;
+static struct task_struct *thread;
 
 #if defined CONFIG_DEVTMPFS_MOUNT
 static int mount_dev = 1;
@@ -34,7 +33,16 @@ static int mount_dev = 1;
 static int mount_dev;
 #endif
 
-static DEFINE_MUTEX(dirlock);
+static DEFINE_SPINLOCK(req_lock);
+
+static struct req {
+       struct req *next;
+       struct completion done;
+       int err;
+       const char *name;
+       mode_t mode;    /* 0 => delete */
+       struct device *dev;
+} *requests;
 
 static int __init mount_param(char *str)
 {
@@ -68,14 +76,79 @@ static inline int is_blockdev(struct device *dev)
 static inline int is_blockdev(struct device *dev) { return 0; }
 #endif
 
+int devtmpfs_create_node(struct device *dev)
+{
+       const char *tmp = NULL;
+       struct req req;
+
+       if (!thread)
+               return 0;
+
+       req.mode = 0;
+       req.name = device_get_devnode(dev, &req.mode, &tmp);
+       if (!req.name)
+               return -ENOMEM;
+
+       if (req.mode == 0)
+               req.mode = 0600;
+       if (is_blockdev(dev))
+               req.mode |= S_IFBLK;
+       else
+               req.mode |= S_IFCHR;
+
+       req.dev = dev;
+
+       init_completion(&req.done);
+
+       spin_lock(&req_lock);
+       req.next = requests;
+       requests = &req;
+       spin_unlock(&req_lock);
+
+       wake_up_process(thread);
+       wait_for_completion(&req.done);
+
+       kfree(tmp);
+
+       return req.err;
+}
+
+int devtmpfs_delete_node(struct device *dev)
+{
+       const char *tmp = NULL;
+       struct req req;
+
+       if (!thread)
+               return 0;
+
+       req.name = device_get_devnode(dev, NULL, &tmp);
+       if (!req.name)
+               return -ENOMEM;
+
+       req.mode = 0;
+       req.dev = dev;
+
+       init_completion(&req.done);
+
+       spin_lock(&req_lock);
+       req.next = requests;
+       requests = &req;
+       spin_unlock(&req_lock);
+
+       wake_up_process(thread);
+       wait_for_completion(&req.done);
+
+       kfree(tmp);
+       return req.err;
+}
+
 static int dev_mkdir(const char *name, mode_t mode)
 {
        struct nameidata nd;
        struct dentry *dentry;
        int err;
 
-       err = vfs_path_lookup(dev_mnt->mnt_root, dev_mnt,
-                             name, LOOKUP_PARENT, &nd);
+       err = kern_path_parent(name, &nd);
        if (err)
                return err;
 
@@ -84,7 +157,7 @@ static int dev_mkdir(const char *name, mode_t mode)
                err = vfs_mkdir(nd.path.dentry->d_inode, dentry, mode);
                if (!err)
                        /* mark as kernel-created inode */
-                       dentry->d_inode->i_private = &dev_mnt;
+                       dentry->d_inode->i_private = &thread;
                dput(dentry);
        } else {
                err = PTR_ERR(dentry);
@@ -99,7 +172,6 @@ static int create_path(const char *nodepath)
 {
        int err;
 
-       mutex_lock(&dirlock);
        err = dev_mkdir(nodepath, 0755);
        if (err == -ENOENT) {
                char *path;
@@ -126,45 +198,22 @@ static int create_path(const char *nodepath)
                kfree(path);
        }
 out:
-       mutex_unlock(&dirlock);
        return err;
 }
 
-int devtmpfs_create_node(struct device *dev)
+static int handle_create(const char *nodename, mode_t mode, struct device *dev)
 {
-       const char *tmp = NULL;
-       const char *nodename;
-       const struct cred *curr_cred;
-       mode_t mode = 0;
        struct nameidata nd;
        struct dentry *dentry;
        int err;
 
-       if (!dev_mnt)
-               return 0;
-
-       nodename = device_get_devnode(dev, &mode, &tmp);
-       if (!nodename)
-               return -ENOMEM;
-
-       if (mode == 0)
-               mode = 0600;
-       if (is_blockdev(dev))
-               mode |= S_IFBLK;
-       else
-               mode |= S_IFCHR;
-
-       curr_cred = override_creds(&init_cred);
-
-       err = vfs_path_lookup(dev_mnt->mnt_root, dev_mnt,
-                             nodename, LOOKUP_PARENT, &nd);
+       err = kern_path_parent(nodename, &nd);
        if (err == -ENOENT) {
                create_path(nodename);
-               err = vfs_path_lookup(dev_mnt->mnt_root, dev_mnt,
-                                     nodename, LOOKUP_PARENT, &nd);
+               err = kern_path_parent(nodename, &nd);
        }
        if (err)
-               goto out;
+               return err;
 
        dentry = lookup_create(&nd, 0);
        if (!IS_ERR(dentry)) {
@@ -181,7 +230,7 @@ int devtmpfs_create_node(struct device *dev)
                        mutex_unlock(&dentry->d_inode->i_mutex);
 
                        /* mark as kernel-created inode */
-                       dentry->d_inode->i_private = &dev_mnt;
+                       dentry->d_inode->i_private = &thread;
                }
                dput(dentry);
        } else {
@@ -190,9 +239,6 @@ int devtmpfs_create_node(struct device *dev)
 
        mutex_unlock(&nd.path.dentry->d_inode->i_mutex);
        path_put(&nd.path);
-out:
-       kfree(tmp);
-       revert_creds(curr_cred);
        return err;
 }
 
@@ -202,8 +248,7 @@ static int dev_rmdir(const char *name)
        struct dentry *dentry;
        int err;
 
-       err = vfs_path_lookup(dev_mnt->mnt_root, dev_mnt,
-                             name, LOOKUP_PARENT, &nd);
+       err = kern_path_parent(name, &nd);
        if (err)
                return err;
 
@@ -211,7 +256,7 @@ static int dev_rmdir(const char *name)
        dentry = lookup_one_len(nd.last.name, nd.path.dentry, nd.last.len);
        if (!IS_ERR(dentry)) {
                if (dentry->d_inode) {
-                       if (dentry->d_inode->i_private == &dev_mnt)
+                       if (dentry->d_inode->i_private == &thread)
                                err = vfs_rmdir(nd.path.dentry->d_inode,
                                                dentry);
                        else
@@ -238,7 +283,6 @@ static int delete_path(const char *nodepath)
        if (!path)
                return -ENOMEM;
 
-       mutex_lock(&dirlock);
        for (;;) {
                char *base;
 
@@ -250,7 +294,6 @@ static int delete_path(const char *nodepath)
                if (err)
                        break;
        }
-       mutex_unlock(&dirlock);
 
        kfree(path);
        return err;
@@ -259,7 +302,7 @@ static int delete_path(const char *nodepath)
 static int dev_mynode(struct device *dev, struct inode *inode, struct kstat *stat)
 {
        /* did we create it */
-       if (inode->i_private != &dev_mnt)
+       if (inode->i_private != &thread)
                return 0;
 
        /* does the dev_t match */
@@ -277,29 +320,17 @@ static int dev_mynode(struct device *dev, struct inode *inode, struct kstat *sta
        return 1;
 }
 
-int devtmpfs_delete_node(struct device *dev)
+static int handle_remove(const char *nodename, struct device *dev)
 {
-       const char *tmp = NULL;
-       const char *nodename;
-       const struct cred *curr_cred;
        struct nameidata nd;
        struct dentry *dentry;
        struct kstat stat;
        int deleted = 1;
        int err;
 
-       if (!dev_mnt)
-               return 0;
-
-       nodename = device_get_devnode(dev, NULL, &tmp);
-       if (!nodename)
-               return -ENOMEM;
-
-       curr_cred = override_creds(&init_cred);
-       err = vfs_path_lookup(dev_mnt->mnt_root, dev_mnt,
-                             nodename, LOOKUP_PARENT, &nd);
+       err = kern_path_parent(nodename, &nd);
        if (err)
-               goto out;
+               return err;
 
        mutex_lock_nested(&nd.path.dentry->d_inode->i_mutex, I_MUTEX_PARENT);
        dentry = lookup_one_len(nd.last.name, nd.path.dentry, nd.last.len);
@@ -337,9 +368,6 @@ int devtmpfs_delete_node(struct device *dev)
        path_put(&nd.path);
        if (deleted && strchr(nodename, '/'))
                delete_path(nodename);
-out:
-       kfree(tmp);
-       revert_creds(curr_cred);
        return err;
 }
 
@@ -354,7 +382,7 @@ int devtmpfs_mount(const char *mntdir)
        if (!mount_dev)
                return 0;
 
-       if (!dev_mnt)
+       if (!thread)
                return 0;
 
        err = sys_mount("devtmpfs", (char *)mntdir, "devtmpfs", MS_SILENT, NULL);
@@ -365,31 +393,79 @@ int devtmpfs_mount(const char *mntdir)
        return err;
 }
 
+static __initdata DECLARE_COMPLETION(setup_done);
+
+static int handle(const char *name, mode_t mode, struct device *dev)
+{
+       if (mode)
+               return handle_create(name, mode, dev);
+       else
+               return handle_remove(name, dev);
+}
+
+static int devtmpfsd(void *p)
+{
+       char options[] = "mode=0755";
+       int *err = p;
+       *err = sys_unshare(CLONE_NEWNS);
+       if (*err)
+               goto out;
+       *err = sys_mount("devtmpfs", "/", "devtmpfs", MS_SILENT, options);
+       if (*err)
+               goto out;
+       sys_chdir("/.."); /* will traverse into overmounted root */
+       sys_chroot(".");
+       complete(&setup_done);
+       while (1) {
+               spin_lock(&req_lock);
+               while (requests) {
+                       struct req *req = requests;
+                       requests = NULL;
+                       spin_unlock(&req_lock);
+                       while (req) {
+                               req->err = handle(req->name, req->mode, req->dev);
+                               complete(&req->done);
+                               req = req->next;
+                       }
+                       spin_lock(&req_lock);
+               }
+               set_current_state(TASK_INTERRUPTIBLE);
+               spin_unlock(&req_lock);
+               schedule();
+               __set_current_state(TASK_RUNNING);
+       }
+       return 0;
+out:
+       complete(&setup_done);
+       return *err;
+}
+
 /*
  * Create devtmpfs instance, driver-core devices will add their device
  * nodes here.
  */
 int __init devtmpfs_init(void)
 {
-       int err;
-       struct vfsmount *mnt;
-       char options[] = "mode=0755";
-
-       err = register_filesystem(&dev_fs_type);
+       int err = register_filesystem(&dev_fs_type);
        if (err) {
                printk(KERN_ERR "devtmpfs: unable to register devtmpfs "
                       "type %i\n", err);
                return err;
        }
 
-       mnt = kern_mount_data(&dev_fs_type, options);
-       if (IS_ERR(mnt)) {
-               err = PTR_ERR(mnt);
+       thread = kthread_run(devtmpfsd, &err, "kdevtmpfs");
+       if (!IS_ERR(thread)) {
+               wait_for_completion(&setup_done);
+       } else {
+               err = PTR_ERR(thread);
+               thread = NULL;
+       }
+
+       if (err) {
                printk(KERN_ERR "devtmpfs: unable to create devtmpfs %i\n", err);
                unregister_filesystem(&dev_fs_type);
                return err;
        }
-       dev_mnt = mnt;
 
        printk(KERN_INFO "devtmpfs: initialized\n");
        return 0;