userns: Better restrictions on when proc and sysfs can be mounted

author Eric W. Biederman <ebiederm@xmission.com>

Sun, 31 Mar 2013 02:57:41 +0000 (19:57 -0700)

committer Maciej Wereski <m.wereski@partner.samsung.com>

Tue, 9 Jun 2015 09:31:03 +0000 (11:31 +0200)
author Eric W. Biederman <ebiederm@xmission.com>
Sun, 31 Mar 2013 02:57:41 +0000 (19:57 -0700)
committer Maciej Wereski <m.wereski@partner.samsung.com>
Tue, 9 Jun 2015 09:31:03 +0000 (11:31 +0200)
diff --git a/fs/namespace.c b/fs/namespace.c

index ac573ec..be2270c 100644 (file)
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -2860,25 +2860,38 @@ bool current_chrooted(void)
         return chrooted;
  }
  
-void update_mnt_policy(struct user_namespace *userns)
+bool fs_fully_visible(struct file_system_type *type)
  {
         struct mnt_namespace *ns = current->nsproxy->mnt_ns;
         struct mount *mnt;
+       bool visible = false;
  
-       down_read(&namespace_sem);
+       if (unlikely(!ns))
+               return false;
+
+       namespace_lock();
         list_for_each_entry(mnt, &ns->list, mnt_list) {
-               switch (mnt->mnt.mnt_sb->s_magic) {
-               case SYSFS_MAGIC:
-                       userns->may_mount_sysfs = true;
-                       break;
-               case PROC_SUPER_MAGIC:
-                       userns->may_mount_proc = true;
-                       break;
+               struct mount *child;
+               if (mnt->mnt.mnt_sb->s_type != type)
+                       continue;
+
+               /* This mount is not fully visible if there are any child mounts
+                * that cover anything except for empty directories.
+                */
+               list_for_each_entry(child, &mnt->mnt_mounts, mnt_child) {
+                       struct inode *inode = child->mnt_mountpoint->d_inode;
+                       if (!S_ISDIR(inode->i_mode))
+                               goto next;
+                       if (inode->i_nlink != 2)
+                               goto next;
                 }
-               if (userns->may_mount_sysfs && userns->may_mount_proc)
-                       break;
+               visible = true;
+               goto found;
+       next:   ;
         }
-       up_read(&namespace_sem);
+found:
+       namespace_unlock();
+       return visible;
  }
  
  static void *mntns_get(struct task_struct *task)
diff --git a/fs/proc/root.c b/fs/proc/root.c

index 04ec276..901119e 100644 (file)
--- a/fs/proc/root.c
+++ b/fs/proc/root.c
@@ -110,8 +110,11 @@ static struct dentry *proc_mount(struct file_system_type *fs_type,
                 ns = task_active_pid_ns(current);
                 options = data;
  
-               if (!current_user_ns()->may_mount_proc ||
-                   !ns_capable(ns->user_ns, CAP_SYS_ADMIN))
+               if (!capable(CAP_SYS_ADMIN) && !fs_fully_visible(fs_type))
+                       return ERR_PTR(-EPERM);
+
+               /* Does the mounter have privilege over the pid namespace? */
+               if (!ns_capable(ns->user_ns, CAP_SYS_ADMIN))
                         return ERR_PTR(-EPERM);
         }
  
diff --git a/fs/sysfs/mount.c b/fs/sysfs/mount.c

index fd7ce7a..21719cb 100644 (file)
--- a/fs/sysfs/mount.c
+++ b/fs/sysfs/mount.c
@@ -112,7 +112,8 @@ static struct dentry *sysfs_mount(struct file_system_type *fs_type,
         struct super_block *sb;
         int error;
  
-       if (!(flags & MS_KERNMOUNT) && !current_user_ns()->may_mount_sysfs)
+       if (!(flags & MS_KERNMOUNT) && !capable(CAP_SYS_ADMIN) &&
+           !fs_fully_visible(fs_type))
                 return ERR_PTR(-EPERM);
  
         info = kzalloc(sizeof(*info), GFP_KERNEL);
diff --git a/include/linux/fs.h b/include/linux/fs.h

index 1924260..2ddb98a 100644 (file)
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1920,6 +1920,7 @@ extern int vfs_ustat(dev_t, struct kstatfs *);
  extern int freeze_super(struct super_block *super);
  extern int thaw_super(struct super_block *super);
  extern bool our_mnt(struct vfsmount *mnt);
+extern bool fs_fully_visible(struct file_system_type *);
  
  extern int current_umask(void);
  
diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h

index 14105c2..4db2985 100644 (file)
--- a/include/linux/user_namespace.h
+++ b/include/linux/user_namespace.h
@@ -27,8 +27,6 @@ struct user_namespace {
         kuid_t                  owner;
         kgid_t                  group;
         unsigned int            proc_inum;
-       bool                    may_mount_sysfs;
-       bool                    may_mount_proc;
  };
  
  extern struct user_namespace init_user_ns;
@@ -85,6 +83,4 @@ static inline void put_user_ns(struct user_namespace *ns)
  
  #endif
  
-void update_mnt_policy(struct user_namespace *userns);
-
  #endif /* _LINUX_USER_H */
diff --git a/kernel/user.c b/kernel/user.c

index 69b4c3d..5bbb919 100644 (file)
--- a/kernel/user.c
+++ b/kernel/user.c
@@ -51,8 +51,6 @@ struct user_namespace init_user_ns = {
         .owner = GLOBAL_ROOT_UID,
         .group = GLOBAL_ROOT_GID,
         .proc_inum = PROC_USER_INIT_INO,
-       .may_mount_sysfs = true,
-       .may_mount_proc = true,
  };
  EXPORT_SYMBOL_GPL(init_user_ns);
  
diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c

index 9bea1d7..6991139 100644 (file)
--- a/kernel/user_namespace.c
+++ b/kernel/user_namespace.c
@@ -101,8 +101,6 @@ int create_user_ns(struct cred *new)
  
         set_cred_user_ns(new, ns);
  
-       update_mnt_policy(ns);
-
         return 0;
  }
author	Eric W. Biederman <ebiederm@xmission.com>
	Sun, 31 Mar 2013 02:57:41 +0000 (19:57 -0700)
committer	Maciej Wereski <m.wereski@partner.samsung.com>
	Tue, 9 Jun 2015 09:31:03 +0000 (11:31 +0200)
fs/namespace.c		patch \| blob \| history
fs/proc/root.c		patch \| blob \| history
fs/sysfs/mount.c		patch \| blob \| history
include/linux/fs.h		patch \| blob \| history
include/linux/user_namespace.h		patch \| blob \| history
kernel/user.c		patch \| blob \| history
kernel/user_namespace.c		patch \| blob \| history