Merge branch 'proc-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/ebiederm...

author Linus Torvalds <torvalds@linux-foundation.org>

Thu, 4 Jun 2020 20:54:34 +0000 (13:54 -0700)

committer Linus Torvalds <torvalds@linux-foundation.org>

Thu, 4 Jun 2020 20:54:34 +0000 (13:54 -0700)
author Linus Torvalds <torvalds@linux-foundation.org>
Thu, 4 Jun 2020 20:54:34 +0000 (13:54 -0700)
committer Linus Torvalds <torvalds@linux-foundation.org>
Thu, 4 Jun 2020 20:54:34 +0000 (13:54 -0700)
diff --git a/Documentation/filesystems/proc.rst b/Documentation/filesystems/proc.rst

index 430963e..996f3cf 100644 (file)
--- a/Documentation/filesystems/proc.rst
+++ b/Documentation/filesystems/proc.rst
@@ -51,6 +51,8 @@ fixes/update part 1.1  Stefani Seibold <stefani@seibold.net>    June 9 2009
    4    Configuring procfs
    4.1  Mount options
  
+  5    Filesystem behavior
+
  Preface
  =======
  
@@ -2143,28 +2145,80 @@ The following mount options are supported:
         =========       ========================================================
         hidepid=        Set /proc/<pid>/ access mode.
         gid=            Set the group authorized to learn processes information.
+       subset=         Show only the specified subset of procfs.
         =========       ========================================================
  
-hidepid=0 means classic mode - everybody may access all /proc/<pid>/ directories
-(default).
-
-hidepid=1 means users may not access any /proc/<pid>/ directories but their
-own.  Sensitive files like cmdline, sched*, status are now protected against
-other users.  This makes it impossible to learn whether any user runs
-specific program (given the program doesn't reveal itself by its behaviour).
-As an additional bonus, as /proc/<pid>/cmdline is unaccessible for other users,
-poorly written programs passing sensitive information via program arguments are
-now protected against local eavesdroppers.
-
-hidepid=2 means hidepid=1 plus all /proc/<pid>/ will be fully invisible to other
-users.  It doesn't mean that it hides a fact whether a process with a specific
-pid value exists (it can be learned by other means, e.g. by "kill -0 $PID"),
-but it hides process' uid and gid, which may be learned by stat()'ing
-/proc/<pid>/ otherwise.  It greatly complicates an intruder's task of gathering
-information about running processes, whether some daemon runs with elevated
-privileges, whether other user runs some sensitive program, whether other users
-run any program at all, etc.
+hidepid=off or hidepid=0 means classic mode - everybody may access all
+/proc/<pid>/ directories (default).
+
+hidepid=noaccess or hidepid=1 means users may not access any /proc/<pid>/
+directories but their own.  Sensitive files like cmdline, sched*, status are now
+protected against other users.  This makes it impossible to learn whether any
+user runs specific program (given the program doesn't reveal itself by its
+behaviour).  As an additional bonus, as /proc/<pid>/cmdline is unaccessible for
+other users, poorly written programs passing sensitive information via program
+arguments are now protected against local eavesdroppers.
+
+hidepid=invisible or hidepid=2 means hidepid=1 plus all /proc/<pid>/ will be
+fully invisible to other users.  It doesn't mean that it hides a fact whether a
+process with a specific pid value exists (it can be learned by other means, e.g.
+by "kill -0 $PID"), but it hides process' uid and gid, which may be learned by
+stat()'ing /proc/<pid>/ otherwise.  It greatly complicates an intruder's task of
+gathering information about running processes, whether some daemon runs with
+elevated privileges, whether other user runs some sensitive program, whether
+other users run any program at all, etc.
+
+hidepid=ptraceable or hidepid=4 means that procfs should only contain
+/proc/<pid>/ directories that the caller can ptrace.
  
  gid= defines a group authorized to learn processes information otherwise
  prohibited by hidepid=.  If you use some daemon like identd which needs to learn
  information about processes information, just add identd to this group.
+
+subset=pid hides all top level files and directories in the procfs that
+are not related to tasks.
+
+5      Filesystem behavior
+----------------------------
+
+Originally, before the advent of pid namepsace, procfs was a global file
+system. It means that there was only one procfs instance in the system.
+
+When pid namespace was added, a separate procfs instance was mounted in
+each pid namespace. So, procfs mount options are global among all
+mountpoints within the same namespace.
+
+::
+
+# grep ^proc /proc/mounts
+proc /proc proc rw,relatime,hidepid=2 0 0
+
+# strace -e mount mount -o hidepid=1 -t proc proc /tmp/proc
+mount("proc", "/tmp/proc", "proc", 0, "hidepid=1") = 0
++++ exited with 0 +++
+
+# grep ^proc /proc/mounts
+proc /proc proc rw,relatime,hidepid=2 0 0
+proc /tmp/proc proc rw,relatime,hidepid=2 0 0
+
+and only after remounting procfs mount options will change at all
+mountpoints.
+
+# mount -o remount,hidepid=1 -t proc proc /tmp/proc
+
+# grep ^proc /proc/mounts
+proc /proc proc rw,relatime,hidepid=1 0 0
+proc /tmp/proc proc rw,relatime,hidepid=1 0 0
+
+This behavior is different from the behavior of other filesystems.
+
+The new procfs behavior is more like other filesystems. Each procfs mount
+creates a new procfs instance. Mount options affect own procfs instance.
+It means that it became possible to have several procfs instances
+displaying tasks with different filtering options in one pid namespace.
+
+# mount -o hidepid=invisible -t proc proc /proc
+# mount -o hidepid=noaccess -t proc proc /tmp/proc
+# grep ^proc /proc/mounts
+proc /proc proc rw,relatime,hidepid=invisible 0 0
+proc /tmp/proc proc rw,relatime,hidepid=noaccess 0 0
diff --git a/fs/exec.c b/fs/exec.c

index 2c46511..2f0a745 100644 (file)
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1176,7 +1176,6 @@ static int de_thread(struct task_struct *tsk)
                 tsk->start_boottime = leader->start_boottime;
  
                 BUG_ON(!same_thread_group(leader, tsk));
-               BUG_ON(has_group_leader_pid(tsk));
                 /*
                  * An exec() starts a new thread group with the
                  * TGID of the previous thread group. Rehash the
@@ -1186,11 +1185,8 @@ static int de_thread(struct task_struct *tsk)
  
                 /* Become a process group leader with the old leader's pid.
                  * The old leader becomes a thread of the this thread group.
-                * Note: The old leader also uses this pid until release_task
-                *       is called.  Odd but simple and correct.
                  */
-               tsk->pid = leader->pid;
-               change_pid(tsk, PIDTYPE_PID, task_pid(leader));
+               exchange_tids(tsk, leader);
                 transfer_pid(leader, tsk, PIDTYPE_TGID);
                 transfer_pid(leader, tsk, PIDTYPE_PGID);
                 transfer_pid(leader, tsk, PIDTYPE_SID);
diff --git a/fs/locks.c b/fs/locks.c

index 1d4f4d5..6fd1f6e 100644 (file)
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -2823,7 +2823,7 @@ static void lock_get_status(struct seq_file *f, struct file_lock *fl,
  {
         struct inode *inode = NULL;
         unsigned int fl_pid;
-       struct pid_namespace *proc_pidns = file_inode(f->file)->i_sb->s_fs_info;
+       struct pid_namespace *proc_pidns = proc_pid_ns(file_inode(f->file)->i_sb);
  
         fl_pid = locks_translate_pid(fl, proc_pidns);
         /*
@@ -2901,7 +2901,7 @@ static int locks_show(struct seq_file *f, void *v)
  {
         struct locks_iterator *iter = f->private;
         struct file_lock *fl, *bfl;
-       struct pid_namespace *proc_pidns = file_inode(f->file)->i_sb->s_fs_info;
+       struct pid_namespace *proc_pidns = proc_pid_ns(file_inode(f->file)->i_sb);
  
         fl = hlist_entry(v, struct file_lock, fl_link);
  
diff --git a/fs/proc/array.c b/fs/proc/array.c

index 8e16f14..0433110 100644 (file)
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -728,7 +728,7 @@ static int children_seq_show(struct seq_file *seq, void *v)
  {
         struct inode *inode = file_inode(seq->file);
  
-       seq_printf(seq, "%d ", pid_nr_ns(v, proc_pid_ns(inode)));
+       seq_printf(seq, "%d ", pid_nr_ns(v, proc_pid_ns(inode->i_sb)));
         return 0;
  }
  
diff --git a/fs/proc/base.c b/fs/proc/base.c

index eb2255e..b1d94d1 100644 (file)
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -697,13 +697,21 @@ int proc_setattr(struct dentry *dentry, struct iattr *attr)
   * May current process learn task's sched/cmdline info (for hide_pid_min=1)
   * or euid/egid (for hide_pid_min=2)?
   */
-static bool has_pid_permissions(struct pid_namespace *pid,
+static bool has_pid_permissions(struct proc_fs_info *fs_info,
                                  struct task_struct *task,
-                                int hide_pid_min)
+                                enum proc_hidepid hide_pid_min)
  {
-       if (pid->hide_pid < hide_pid_min)
+       /*
+        * If 'hidpid' mount option is set force a ptrace check,
+        * we indicate that we are using a filesystem syscall
+        * by passing PTRACE_MODE_READ_FSCREDS
+        */
+       if (fs_info->hide_pid == HIDEPID_NOT_PTRACEABLE)
+               return ptrace_may_access(task, PTRACE_MODE_READ_FSCREDS);
+
+       if (fs_info->hide_pid < hide_pid_min)
                 return true;
-       if (in_group_p(pid->pid_gid))
+       if (in_group_p(fs_info->pid_gid))
                 return true;
         return ptrace_may_access(task, PTRACE_MODE_READ_FSCREDS);
  }
@@ -711,18 +719,18 @@ static bool has_pid_permissions(struct pid_namespace *pid,
  
  static int proc_pid_permission(struct inode *inode, int mask)
  {
-       struct pid_namespace *pid = proc_pid_ns(inode);
+       struct proc_fs_info *fs_info = proc_sb_info(inode->i_sb);
         struct task_struct *task;
         bool has_perms;
  
         task = get_proc_task(inode);
         if (!task)
                 return -ESRCH;
-       has_perms = has_pid_permissions(pid, task, HIDEPID_NO_ACCESS);
+       has_perms = has_pid_permissions(fs_info, task, HIDEPID_NO_ACCESS);
         put_task_struct(task);
  
         if (!has_perms) {
-               if (pid->hide_pid == HIDEPID_INVISIBLE) {
+               if (fs_info->hide_pid == HIDEPID_INVISIBLE) {
                         /*
                          * Let's make getdents(), stat(), and open()
                          * consistent with each other.  If a process
@@ -746,7 +754,7 @@ static const struct inode_operations proc_def_inode_operations = {
  static int proc_single_show(struct seq_file *m, void *v)
  {
         struct inode *inode = m->private;
-       struct pid_namespace *ns = proc_pid_ns(inode);
+       struct pid_namespace *ns = proc_pid_ns(inode->i_sb);
         struct pid *pid = proc_pid(inode);
         struct task_struct *task;
         int ret;
@@ -1415,7 +1423,7 @@ static const struct file_operations proc_fail_nth_operations = {
  static int sched_show(struct seq_file *m, void *v)
  {
         struct inode *inode = m->private;
-       struct pid_namespace *ns = proc_pid_ns(inode);
+       struct pid_namespace *ns = proc_pid_ns(inode->i_sb);
         struct task_struct *p;
  
         p = get_proc_task(inode);
@@ -1909,7 +1917,7 @@ int pid_getattr(const struct path *path, struct kstat *stat,
                 u32 request_mask, unsigned int query_flags)
  {
         struct inode *inode = d_inode(path->dentry);
-       struct pid_namespace *pid = proc_pid_ns(inode);
+       struct proc_fs_info *fs_info = proc_sb_info(inode->i_sb);
         struct task_struct *task;
  
         generic_fillattr(inode, stat);
@@ -1919,7 +1927,7 @@ int pid_getattr(const struct path *path, struct kstat *stat,
         rcu_read_lock();
         task = pid_task(proc_pid(inode), PIDTYPE_PID);
         if (task) {
-               if (!has_pid_permissions(pid, task, HIDEPID_INVISIBLE)) {
+               if (!has_pid_permissions(fs_info, task, HIDEPID_INVISIBLE)) {
                         rcu_read_unlock();
                         /*
                          * This doesn't prevent learning whether PID exists,
@@ -2470,7 +2478,7 @@ static int proc_timers_open(struct inode *inode, struct file *file)
                 return -ENOMEM;
  
         tp->pid = proc_pid(inode);
-       tp->ns = proc_pid_ns(inode);
+       tp->ns = proc_pid_ns(inode->i_sb);
         return 0;
  }
  
@@ -3312,6 +3320,7 @@ struct dentry *proc_pid_lookup(struct dentry *dentry, unsigned int flags)
  {
         struct task_struct *task;
         unsigned tgid;
+       struct proc_fs_info *fs_info;
         struct pid_namespace *ns;
         struct dentry *result = ERR_PTR(-ENOENT);
  
@@ -3319,7 +3328,8 @@ struct dentry *proc_pid_lookup(struct dentry *dentry, unsigned int flags)
         if (tgid == ~0U)
                 goto out;
  
-       ns = dentry->d_sb->s_fs_info;
+       fs_info = proc_sb_info(dentry->d_sb);
+       ns = fs_info->pid_ns;
         rcu_read_lock();
         task = find_task_by_pid_ns(tgid, ns);
         if (task)
@@ -3328,7 +3338,14 @@ struct dentry *proc_pid_lookup(struct dentry *dentry, unsigned int flags)
         if (!task)
                 goto out;
  
+       /* Limit procfs to only ptraceable tasks */
+       if (fs_info->hide_pid == HIDEPID_NOT_PTRACEABLE) {
+               if (!has_pid_permissions(fs_info, task, HIDEPID_NO_ACCESS))
+                       goto out_put_task;
+       }
+
         result = proc_pid_instantiate(dentry, task, NULL);
+out_put_task:
         put_task_struct(task);
  out:
         return result;
@@ -3354,20 +3371,8 @@ retry:
         pid = find_ge_pid(iter.tgid, ns);
         if (pid) {
                 iter.tgid = pid_nr_ns(pid, ns);
-               iter.task = pid_task(pid, PIDTYPE_PID);
-               /* What we to know is if the pid we have find is the
-                * pid of a thread_group_leader.  Testing for task
-                * being a thread_group_leader is the obvious thing
-                * todo but there is a window when it fails, due to
-                * the pid transfer logic in de_thread.
-                *
-                * So we perform the straight forward test of seeing
-                * if the pid we have found is the pid of a thread
-                * group leader, and don't worry if the task we have
-                * found doesn't happen to be a thread group leader.
-                * As we don't care in the case of readdir.
-                */
-               if (!iter.task || !has_group_leader_pid(iter.task)) {
+               iter.task = pid_task(pid, PIDTYPE_TGID);
+               if (!iter.task) {
                         iter.tgid += 1;
                         goto retry;
                 }
@@ -3383,20 +3388,21 @@ retry:
  int proc_pid_readdir(struct file *file, struct dir_context *ctx)
  {
         struct tgid_iter iter;
-       struct pid_namespace *ns = proc_pid_ns(file_inode(file));
+       struct proc_fs_info *fs_info = proc_sb_info(file_inode(file)->i_sb);
+       struct pid_namespace *ns = proc_pid_ns(file_inode(file)->i_sb);
         loff_t pos = ctx->pos;
  
         if (pos >= PID_MAX_LIMIT + TGID_OFFSET)
                 return 0;
  
         if (pos == TGID_OFFSET - 2) {
-               struct inode *inode = d_inode(ns->proc_self);
+               struct inode *inode = d_inode(fs_info->proc_self);
                 if (!dir_emit(ctx, "self", 4, inode->i_ino, DT_LNK))
                         return 0;
                 ctx->pos = pos = pos + 1;
         }
         if (pos == TGID_OFFSET - 1) {
-               struct inode *inode = d_inode(ns->proc_thread_self);
+               struct inode *inode = d_inode(fs_info->proc_thread_self);
                 if (!dir_emit(ctx, "thread-self", 11, inode->i_ino, DT_LNK))
                         return 0;
                 ctx->pos = pos = pos + 1;
@@ -3410,7 +3416,7 @@ int proc_pid_readdir(struct file *file, struct dir_context *ctx)
                 unsigned int len;
  
                 cond_resched();
-               if (!has_pid_permissions(ns, iter.task, HIDEPID_INVISIBLE))
+               if (!has_pid_permissions(fs_info, iter.task, HIDEPID_INVISIBLE))
                         continue;
  
                 len = snprintf(name, sizeof(name), "%u", iter.tgid);
@@ -3610,6 +3616,7 @@ static struct dentry *proc_task_lookup(struct inode *dir, struct dentry * dentry
         struct task_struct *task;
         struct task_struct *leader = get_proc_task(dir);
         unsigned tid;
+       struct proc_fs_info *fs_info;
         struct pid_namespace *ns;
         struct dentry *result = ERR_PTR(-ENOENT);
  
@@ -3620,7 +3627,8 @@ static struct dentry *proc_task_lookup(struct inode *dir, struct dentry * dentry
         if (tid == ~0U)
                 goto out;
  
-       ns = dentry->d_sb->s_fs_info;
+       fs_info = proc_sb_info(dentry->d_sb);
+       ns = fs_info->pid_ns;
         rcu_read_lock();
         task = find_task_by_pid_ns(tid, ns);
         if (task)
@@ -3734,7 +3742,7 @@ static int proc_task_readdir(struct file *file, struct dir_context *ctx)
         /* f_version caches the tgid value that the last readdir call couldn't
          * return. lseek aka telldir automagically resets f_version to 0.
          */
-       ns = proc_pid_ns(inode);
+       ns = proc_pid_ns(inode->i_sb);
         tid = (int)file->f_version;
         file->f_version = 0;
         for (task = first_tid(proc_pid(inode), tid, ctx->pos - 2, ns);
diff --git a/fs/proc/generic.c b/fs/proc/generic.c

index 4ed6dab..2f9fa17 100644 (file)
--- a/fs/proc/generic.c
+++ b/fs/proc/generic.c
@@ -269,6 +269,11 @@ struct dentry *proc_lookup_de(struct inode *dir, struct dentry *dentry,
  struct dentry *proc_lookup(struct inode *dir, struct dentry *dentry,
                 unsigned int flags)
  {
+       struct proc_fs_info *fs_info = proc_sb_info(dir->i_sb);
+
+       if (fs_info->pidonly == PROC_PIDONLY_ON)
+               return ERR_PTR(-ENOENT);
+
         return proc_lookup_de(dir, dentry, PDE(dir));
  }
  
@@ -325,6 +330,10 @@ int proc_readdir_de(struct file *file, struct dir_context *ctx,
  int proc_readdir(struct file *file, struct dir_context *ctx)
  {
         struct inode *inode = file_inode(file);
+       struct proc_fs_info *fs_info = proc_sb_info(inode->i_sb);
+
+       if (fs_info->pidonly == PROC_PIDONLY_ON)
+               return 1;
  
         return proc_readdir_de(file, ctx, PDE(inode));
  }
diff --git a/fs/proc/inode.c b/fs/proc/inode.c

index fb4cace..f40c253 100644 (file)
--- a/fs/proc/inode.c
+++ b/fs/proc/inode.c
@@ -24,6 +24,7 @@
  #include <linux/seq_file.h>
  #include <linux/slab.h>
  #include <linux/mount.h>
+#include <linux/bug.h>
  
  #include <linux/uaccess.h>
  
@@ -165,15 +166,28 @@ void proc_invalidate_siblings_dcache(struct hlist_head *inodes, spinlock_t *lock
                 deactivate_super(old_sb);
  }
  
+static inline const char *hidepid2str(enum proc_hidepid v)
+{
+       switch (v) {
+               case HIDEPID_OFF: return "off";
+               case HIDEPID_NO_ACCESS: return "noaccess";
+               case HIDEPID_INVISIBLE: return "invisible";
+               case HIDEPID_NOT_PTRACEABLE: return "ptraceable";
+       }
+       WARN_ONCE(1, "bad hide_pid value: %d\n", v);
+       return "unknown";
+}
+
  static int proc_show_options(struct seq_file *seq, struct dentry *root)
  {
-       struct super_block *sb = root->d_sb;
-       struct pid_namespace *pid = sb->s_fs_info;
+       struct proc_fs_info *fs_info = proc_sb_info(root->d_sb);
  
-       if (!gid_eq(pid->pid_gid, GLOBAL_ROOT_GID))
-               seq_printf(seq, ",gid=%u", from_kgid_munged(&init_user_ns, pid->pid_gid));
-       if (pid->hide_pid != HIDEPID_OFF)
-               seq_printf(seq, ",hidepid=%u", pid->hide_pid);
+       if (!gid_eq(fs_info->pid_gid, GLOBAL_ROOT_GID))
+               seq_printf(seq, ",gid=%u", from_kgid_munged(&init_user_ns, fs_info->pid_gid));
+       if (fs_info->hide_pid != HIDEPID_OFF)
+               seq_printf(seq, ",hidepid=%s", hidepid2str(fs_info->hide_pid));
+       if (fs_info->pidonly != PROC_PIDONLY_OFF)
+               seq_printf(seq, ",subset=pid");
  
         return 0;
  }
@@ -464,6 +478,7 @@ proc_reg_get_unmapped_area(struct file *file, unsigned long orig_addr,
  
  static int proc_reg_open(struct inode *inode, struct file *file)
  {
+       struct proc_fs_info *fs_info = proc_sb_info(inode->i_sb);
         struct proc_dir_entry *pde = PDE(inode);
         int rv = 0;
         typeof_member(struct proc_ops, proc_open) open;
@@ -477,6 +492,9 @@ static int proc_reg_open(struct inode *inode, struct file *file)
                 return rv;
         }
  
+       if (fs_info->pidonly == PROC_PIDONLY_ON)
+               return -ENOENT;
+
         /*
          * Ensure that
          * 1) PDE's ->release hook will be called no matter what
diff --git a/fs/proc/root.c b/fs/proc/root.c

index cdbe929..ffebed1 100644 (file)
--- a/fs/proc/root.c
+++ b/fs/proc/root.c
@@ -32,21 +32,86 @@
  struct proc_fs_context {
         struct pid_namespace    *pid_ns;
         unsigned int            mask;
-       int                     hidepid;
+       enum proc_hidepid       hidepid;
         int                     gid;
+       enum proc_pidonly       pidonly;
  };
  
  enum proc_param {
         Opt_gid,
         Opt_hidepid,
+       Opt_subset,
  };
  
  static const struct fs_parameter_spec proc_fs_parameters[] = {
         fsparam_u32("gid",      Opt_gid),
-       fsparam_u32("hidepid",  Opt_hidepid),
+       fsparam_string("hidepid",       Opt_hidepid),
+       fsparam_string("subset",        Opt_subset),
         {}
  };
  
+static inline int valid_hidepid(unsigned int value)
+{
+       return (value == HIDEPID_OFF ||
+               value == HIDEPID_NO_ACCESS ||
+               value == HIDEPID_INVISIBLE ||
+               value == HIDEPID_NOT_PTRACEABLE);
+}
+
+static int proc_parse_hidepid_param(struct fs_context *fc, struct fs_parameter *param)
+{
+       struct proc_fs_context *ctx = fc->fs_private;
+       struct fs_parameter_spec hidepid_u32_spec = fsparam_u32("hidepid", Opt_hidepid);
+       struct fs_parse_result result;
+       int base = (unsigned long)hidepid_u32_spec.data;
+
+       if (param->type != fs_value_is_string)
+               return invalf(fc, "proc: unexpected type of hidepid value\n");
+
+       if (!kstrtouint(param->string, base, &result.uint_32)) {
+               if (!valid_hidepid(result.uint_32))
+                       return invalf(fc, "proc: unknown value of hidepid - %s\n", param->string);
+               ctx->hidepid = result.uint_32;
+               return 0;
+       }
+
+       if (!strcmp(param->string, "off"))
+               ctx->hidepid = HIDEPID_OFF;
+       else if (!strcmp(param->string, "noaccess"))
+               ctx->hidepid = HIDEPID_NO_ACCESS;
+       else if (!strcmp(param->string, "invisible"))
+               ctx->hidepid = HIDEPID_INVISIBLE;
+       else if (!strcmp(param->string, "ptraceable"))
+               ctx->hidepid = HIDEPID_NOT_PTRACEABLE;
+       else
+               return invalf(fc, "proc: unknown value of hidepid - %s\n", param->string);
+
+       return 0;
+}
+
+static int proc_parse_subset_param(struct fs_context *fc, char *value)
+{
+       struct proc_fs_context *ctx = fc->fs_private;
+
+       while (value) {
+               char *ptr = strchr(value, ',');
+
+               if (ptr != NULL)
+                       *ptr++ = '\0';
+
+               if (*value != '\0') {
+                       if (!strcmp(value, "pid")) {
+                               ctx->pidonly = PROC_PIDONLY_ON;
+                       } else {
+                               return invalf(fc, "proc: unsupported subset option - %s\n", value);
+                       }
+               }
+               value = ptr;
+       }
+
+       return 0;
+}
+
  static int proc_parse_param(struct fs_context *fc, struct fs_parameter *param)
  {
         struct proc_fs_context *ctx = fc->fs_private;
@@ -63,10 +128,13 @@ static int proc_parse_param(struct fs_context *fc, struct fs_parameter *param)
                 break;
  
         case Opt_hidepid:
-               ctx->hidepid = result.uint_32;
-               if (ctx->hidepid < HIDEPID_OFF ||
-                   ctx->hidepid > HIDEPID_INVISIBLE)
-                       return invalfc(fc, "hidepid value must be between 0 and 2.\n");
+               if (proc_parse_hidepid_param(fc, param))
+                       return -EINVAL;
+               break;
+
+       case Opt_subset:
+               if (proc_parse_subset_param(fc, param->string) < 0)
+                       return -EINVAL;
                 break;
  
         default:
@@ -77,26 +145,33 @@ static int proc_parse_param(struct fs_context *fc, struct fs_parameter *param)
         return 0;
  }
  
-static void proc_apply_options(struct super_block *s,
+static void proc_apply_options(struct proc_fs_info *fs_info,
                                struct fs_context *fc,
-                              struct pid_namespace *pid_ns,
                                struct user_namespace *user_ns)
  {
         struct proc_fs_context *ctx = fc->fs_private;
  
         if (ctx->mask & (1 << Opt_gid))
-               pid_ns->pid_gid = make_kgid(user_ns, ctx->gid);
+               fs_info->pid_gid = make_kgid(user_ns, ctx->gid);
         if (ctx->mask & (1 << Opt_hidepid))
-               pid_ns->hide_pid = ctx->hidepid;
+               fs_info->hide_pid = ctx->hidepid;
+       if (ctx->mask & (1 << Opt_subset))
+               fs_info->pidonly = ctx->pidonly;
  }
  
  static int proc_fill_super(struct super_block *s, struct fs_context *fc)
  {
-       struct pid_namespace *pid_ns = get_pid_ns(s->s_fs_info);
+       struct proc_fs_context *ctx = fc->fs_private;
         struct inode *root_inode;
+       struct proc_fs_info *fs_info;
         int ret;
  
-       proc_apply_options(s, fc, pid_ns, current_user_ns());
+       fs_info = kzalloc(sizeof(*fs_info), GFP_KERNEL);
+       if (!fs_info)
+               return -ENOMEM;
+
+       fs_info->pid_ns = get_pid_ns(ctx->pid_ns);
+       proc_apply_options(fs_info, fc, current_user_ns());
  
         /* User space would break if executables or devices appear on proc */
         s->s_iflags |= SB_I_USERNS_VISIBLE | SB_I_NOEXEC | SB_I_NODEV;
@@ -106,6 +181,7 @@ static int proc_fill_super(struct super_block *s, struct fs_context *fc)
         s->s_magic = PROC_SUPER_MAGIC;
         s->s_op = &proc_sops;
         s->s_time_gran = 1;
+       s->s_fs_info = fs_info;
  
         /*
          * procfs isn't actually a stacking filesystem; however, there is
@@ -113,7 +189,7 @@ static int proc_fill_super(struct super_block *s, struct fs_context *fc)
          * top of it
          */
         s->s_stack_depth = FILESYSTEM_MAX_STACK_DEPTH;
-       
+
         /* procfs dentries and inodes don't require IO to create */
         s->s_shrink.seeks = 0;
  
@@ -140,19 +216,17 @@ static int proc_fill_super(struct super_block *s, struct fs_context *fc)
  static int proc_reconfigure(struct fs_context *fc)
  {
         struct super_block *sb = fc->root->d_sb;
-       struct pid_namespace *pid = sb->s_fs_info;
+       struct proc_fs_info *fs_info = proc_sb_info(sb);
  
         sync_filesystem(sb);
  
-       proc_apply_options(sb, fc, pid, current_user_ns());
+       proc_apply_options(fs_info, fc, current_user_ns());
         return 0;
  }
  
  static int proc_get_tree(struct fs_context *fc)
  {
-       struct proc_fs_context *ctx = fc->fs_private;
-
-       return get_tree_keyed(fc, proc_fill_super, ctx->pid_ns);
+       return get_tree_nodev(fc, proc_fill_super);
  }
  
  static void proc_fs_context_free(struct fs_context *fc)
@@ -188,22 +262,17 @@ static int proc_init_fs_context(struct fs_context *fc)
  
  static void proc_kill_sb(struct super_block *sb)
  {
-       struct pid_namespace *ns;
+       struct proc_fs_info *fs_info = proc_sb_info(sb);
  
-       ns = (struct pid_namespace *)sb->s_fs_info;
-       if (ns->proc_self)
-               dput(ns->proc_self);
-       if (ns->proc_thread_self)
-               dput(ns->proc_thread_self);
-       kill_anon_super(sb);
+       if (fs_info->proc_self)
+               dput(fs_info->proc_self);
  
-       /* Make the pid namespace safe for the next mount of proc */
-       ns->proc_self = NULL;
-       ns->proc_thread_self = NULL;
-       ns->pid_gid = GLOBAL_ROOT_GID;
-       ns->hide_pid = 0;
+       if (fs_info->proc_thread_self)
+               dput(fs_info->proc_thread_self);
  
-       put_pid_ns(ns);
+       kill_anon_super(sb);
+       put_pid_ns(fs_info->pid_ns);
+       kfree(fs_info);
  }
  
  static struct file_system_type proc_fs_type = {
diff --git a/fs/proc/self.c b/fs/proc/self.c

index 57c0a10..ca5158f 100644 (file)
--- a/fs/proc/self.c
+++ b/fs/proc/self.c
@@ -12,7 +12,7 @@ static const char *proc_self_get_link(struct dentry *dentry,
                                       struct inode *inode,
                                       struct delayed_call *done)
  {
-       struct pid_namespace *ns = proc_pid_ns(inode);
+       struct pid_namespace *ns = proc_pid_ns(inode->i_sb);
         pid_t tgid = task_tgid_nr_ns(current, ns);
         char *name;
  
@@ -36,10 +36,10 @@ static unsigned self_inum __ro_after_init;
  int proc_setup_self(struct super_block *s)
  {
         struct inode *root_inode = d_inode(s->s_root);
-       struct pid_namespace *ns = proc_pid_ns(root_inode);
+       struct proc_fs_info *fs_info = proc_sb_info(s);
         struct dentry *self;
         int ret = -ENOMEM;
-       
+
         inode_lock(root_inode);
         self = d_alloc_name(s->s_root, "self");
         if (self) {
@@ -62,7 +62,7 @@ int proc_setup_self(struct super_block *s)
         if (ret)
                 pr_err("proc_fill_super: can't allocate /proc/self\n");
         else
-               ns->proc_self = self;
+               fs_info->proc_self = self;
  
         return ret;
  }
diff --git a/fs/proc/thread_self.c b/fs/proc/thread_self.c

index f61ae53..ac284f4 100644 (file)
--- a/fs/proc/thread_self.c
+++ b/fs/proc/thread_self.c
@@ -12,7 +12,7 @@ static const char *proc_thread_self_get_link(struct dentry *dentry,
                                              struct inode *inode,
                                              struct delayed_call *done)
  {
-       struct pid_namespace *ns = proc_pid_ns(inode);
+       struct pid_namespace *ns = proc_pid_ns(inode->i_sb);
         pid_t tgid = task_tgid_nr_ns(current, ns);
         pid_t pid = task_pid_nr_ns(current, ns);
         char *name;
@@ -36,7 +36,7 @@ static unsigned thread_self_inum __ro_after_init;
  int proc_setup_thread_self(struct super_block *s)
  {
         struct inode *root_inode = d_inode(s->s_root);
-       struct pid_namespace *ns = proc_pid_ns(root_inode);
+       struct proc_fs_info *fs_info = proc_sb_info(s);
         struct dentry *thread_self;
         int ret = -ENOMEM;
  
@@ -60,9 +60,9 @@ int proc_setup_thread_self(struct super_block *s)
         inode_unlock(root_inode);
  
         if (ret)
-               pr_err("proc_fill_super: can't allocate /proc/thread_self\n");
+               pr_err("proc_fill_super: can't allocate /proc/thread-self\n");
         else
-               ns->proc_thread_self = thread_self;
+               fs_info->proc_thread_self = thread_self;
  
         return ret;
  }
diff --git a/fs/proc_namespace.c b/fs/proc_namespace.c

index e4d70c0..3059a93 100644 (file)
--- a/fs/proc_namespace.c
+++ b/fs/proc_namespace.c
@@ -37,23 +37,23 @@ static __poll_t mounts_poll(struct file *file, poll_table *wait)
         return res;
  }
  
-struct proc_fs_info {
+struct proc_fs_opts {
         int flag;
         const char *str;
  };
  
  static int show_sb_opts(struct seq_file *m, struct super_block *sb)
  {
-       static const struct proc_fs_info fs_info[] = {
+       static const struct proc_fs_opts fs_opts[] = {
                 { SB_SYNCHRONOUS, ",sync" },
                 { SB_DIRSYNC, ",dirsync" },
                 { SB_MANDLOCK, ",mand" },
                 { SB_LAZYTIME, ",lazytime" },
                 { 0, NULL }
         };
-       const struct proc_fs_info *fs_infop;
+       const struct proc_fs_opts *fs_infop;
  
-       for (fs_infop = fs_info; fs_infop->flag; fs_infop++) {
+       for (fs_infop = fs_opts; fs_infop->flag; fs_infop++) {
                 if (sb->s_flags & fs_infop->flag)
                         seq_puts(m, fs_infop->str);
         }
@@ -63,7 +63,7 @@ static int show_sb_opts(struct seq_file *m, struct super_block *sb)
  
  static void show_mnt_opts(struct seq_file *m, struct vfsmount *mnt)
  {
-       static const struct proc_fs_info mnt_info[] = {
+       static const struct proc_fs_opts mnt_opts[] = {
                 { MNT_NOSUID, ",nosuid" },
                 { MNT_NODEV, ",nodev" },
                 { MNT_NOEXEC, ",noexec" },
@@ -72,9 +72,9 @@ static void show_mnt_opts(struct seq_file *m, struct vfsmount *mnt)
                 { MNT_RELATIME, ",relatime" },
                 { 0, NULL }
         };
-       const struct proc_fs_info *fs_infop;
+       const struct proc_fs_opts *fs_infop;
  
-       for (fs_infop = mnt_info; fs_infop->flag; fs_infop++) {
+       for (fs_infop = mnt_opts; fs_infop->flag; fs_infop++) {
                 if (mnt->mnt_flags & fs_infop->flag)
                         seq_puts(m, fs_infop->str);
         }
diff --git a/include/linux/pid.h b/include/linux/pid.h

index 93543cb..176d6cf 100644 (file)
--- a/include/linux/pid.h
+++ b/include/linux/pid.h
@@ -102,6 +102,7 @@ extern void attach_pid(struct task_struct *task, enum pid_type);
  extern void detach_pid(struct task_struct *task, enum pid_type);
  extern void change_pid(struct task_struct *task, enum pid_type,
                         struct pid *pid);
+extern void exchange_tids(struct task_struct *task, struct task_struct *old);
  extern void transfer_pid(struct task_struct *old, struct task_struct *new,
                          enum pid_type);
  
diff --git a/include/linux/pid_namespace.h b/include/linux/pid_namespace.h

index 4956e36..5a5cb45 100644 (file)
--- a/include/linux/pid_namespace.h
+++ b/include/linux/pid_namespace.h
@@ -17,12 +17,6 @@
  
  struct fs_pin;
  
-enum { /* definitions for pid_namespace's hide_pid field */
-       HIDEPID_OFF       = 0,
-       HIDEPID_NO_ACCESS = 1,
-       HIDEPID_INVISIBLE = 2,
-};
-
  struct pid_namespace {
         struct kref kref;
         struct idr idr;
@@ -32,17 +26,11 @@ struct pid_namespace {
         struct kmem_cache *pid_cachep;
         unsigned int level;
         struct pid_namespace *parent;
-#ifdef CONFIG_PROC_FS
-       struct dentry *proc_self;
-       struct dentry *proc_thread_self;
-#endif
  #ifdef CONFIG_BSD_PROCESS_ACCT
         struct fs_pin *bacct;
  #endif
         struct user_namespace *user_ns;
         struct ucounts *ucounts;
-       kgid_t pid_gid;
-       int hide_pid;
         int reboot;     /* group exit code if this pidns was rebooted */
         struct ns_common ns;
  } __randomize_layout;
diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h

index ad4ff71..d1eed1b 100644 (file)
--- a/include/linux/proc_fs.h
+++ b/include/linux/proc_fs.h
@@ -42,6 +42,34 @@ struct proc_ops {
         unsigned long (*proc_get_unmapped_area)(struct file *, unsigned long, unsigned long, unsigned long, unsigned long);
  } __randomize_layout;
  
+/* definitions for hide_pid field */
+enum proc_hidepid {
+       HIDEPID_OFF       = 0,
+       HIDEPID_NO_ACCESS = 1,
+       HIDEPID_INVISIBLE = 2,
+       HIDEPID_NOT_PTRACEABLE = 4, /* Limit pids to only ptraceable pids */
+};
+
+/* definitions for proc mount option pidonly */
+enum proc_pidonly {
+       PROC_PIDONLY_OFF = 0,
+       PROC_PIDONLY_ON  = 1,
+};
+
+struct proc_fs_info {
+       struct pid_namespace *pid_ns;
+       struct dentry *proc_self;        /* For /proc/self */
+       struct dentry *proc_thread_self; /* For /proc/thread-self */
+       kgid_t pid_gid;
+       enum proc_hidepid hide_pid;
+       enum proc_pidonly pidonly;
+};
+
+static inline struct proc_fs_info *proc_sb_info(struct super_block *sb)
+{
+       return sb->s_fs_info;
+}
+
  #ifdef CONFIG_PROC_FS
  
  typedef int (*proc_write_t)(struct file *, char *, size_t);
@@ -177,9 +205,9 @@ int open_related_ns(struct ns_common *ns,
                    struct ns_common *(*get_ns)(struct ns_common *ns));
  
  /* get the associated pid namespace for a file in procfs */
-static inline struct pid_namespace *proc_pid_ns(const struct inode *inode)
+static inline struct pid_namespace *proc_pid_ns(struct super_block *sb)
  {
-       return inode->i_sb->s_fs_info;
+       return proc_sb_info(sb)->pid_ns;
  }
  
  bool proc_ns_file(const struct file *file);
diff --git a/include/linux/rculist.h b/include/linux/rculist.h

index 7375bb3..df587d1 100644 (file)
--- a/include/linux/rculist.h
+++ b/include/linux/rculist.h
@@ -506,6 +506,27 @@ static inline void hlist_replace_rcu(struct hlist_node *old,
         WRITE_ONCE(old->pprev, LIST_POISON2);
  }
  
+/**
+ * hlists_swap_heads_rcu - swap the lists the hlist heads point to
+ * @left:  The hlist head on the left
+ * @right: The hlist head on the right
+ *
+ * The lists start out as [@left  ][node1 ... ] and
+                          [@right ][node2 ... ]
+ * The lists end up as    [@left  ][node2 ... ]
+ *                        [@right ][node1 ... ]
+ */
+static inline void hlists_swap_heads_rcu(struct hlist_head *left, struct hlist_head *right)
+{
+       struct hlist_node *node1 = left->first;
+       struct hlist_node *node2 = right->first;
+
+       rcu_assign_pointer(left->first, node2);
+       rcu_assign_pointer(right->first, node1);
+       WRITE_ONCE(node2->pprev, &left->first);
+       WRITE_ONCE(node1->pprev, &right->first);
+}
+
  /*
   * return the first or the next element in an RCU protected hlist
   */
diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h

index 3e5b090..0ee5e69 100644 (file)
--- a/include/linux/sched/signal.h
+++ b/include/linux/sched/signal.h
@@ -654,17 +654,6 @@ static inline bool thread_group_leader(struct task_struct *p)
         return p->exit_signal >= 0;
  }
  
-/* Do to the insanities of de_thread it is possible for a process
- * to have the pid of the thread group leader without actually being
- * the thread group leader.  For iteration through the pids in proc
- * all we care about is that we have a task with the appropriate
- * pid, we don't actually care if we have the right task.
- */
-static inline bool has_group_leader_pid(struct task_struct *p)
-{
-       return task_pid(p) == task_tgid(p);
-}
-
  static inline
  bool same_thread_group(struct task_struct *p1, struct task_struct *p2)
  {
diff --git a/kernel/fork.c b/kernel/fork.c

index be98e94..cefe874 100644 (file)
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1759,7 +1759,7 @@ static void pidfd_show_fdinfo(struct seq_file *m, struct file *f)
         pid_t nr = -1;
  
         if (likely(pid_has_task(pid, PIDTYPE_PID))) {
-               ns = proc_pid_ns(file_inode(m->file));
+               ns = proc_pid_ns(file_inode(m->file)->i_sb);
                 nr = pid_nr_ns(pid, ns);
         }
  
diff --git a/kernel/pid.c b/kernel/pid.c

index c835b84..f1496b7 100644 (file)
--- a/kernel/pid.c
+++ b/kernel/pid.c
@@ -363,6 +363,25 @@ void change_pid(struct task_struct *task, enum pid_type type,
         attach_pid(task, type);
  }
  
+void exchange_tids(struct task_struct *left, struct task_struct *right)
+{
+       struct pid *pid1 = left->thread_pid;
+       struct pid *pid2 = right->thread_pid;
+       struct hlist_head *head1 = &pid1->tasks[PIDTYPE_PID];
+       struct hlist_head *head2 = &pid2->tasks[PIDTYPE_PID];
+
+       /* Swap the single entry tid lists */
+       hlists_swap_heads_rcu(head1, head2);
+
+       /* Swap the per task_struct pid */
+       rcu_assign_pointer(left->thread_pid, pid2);
+       rcu_assign_pointer(right->thread_pid, pid1);
+
+       /* Swap the cached value */
+       WRITE_ONCE(left->pid, pid_nr(pid2));
+       WRITE_ONCE(right->pid, pid_nr(pid1));
+}
+
  /* transfer_pid is an optimization of attach_pid(new), detach_pid(old) */
  void transfer_pid(struct task_struct *old, struct task_struct *new,
                            enum pid_type type)
@@ -476,8 +495,7 @@ pid_t __task_pid_nr_ns(struct task_struct *task, enum pid_type type,
         rcu_read_lock();
         if (!ns)
                 ns = task_active_pid_ns(current);
-       if (likely(pid_alive(task)))
-               nr = pid_nr_ns(rcu_dereference(*task_pid_ptr(task, type)), ns);
+       nr = pid_nr_ns(rcu_dereference(*task_pid_ptr(task, type)), ns);
         rcu_read_unlock();
  
         return nr;
diff --git a/kernel/time/posix-cpu-timers.c b/kernel/time/posix-cpu-timers.c

index 2fd3b3f..1651179 100644 (file)
--- a/kernel/time/posix-cpu-timers.c
+++ b/kernel/time/posix-cpu-timers.c
@@ -47,85 +47,65 @@ void update_rlimit_cpu(struct task_struct *task, unsigned long rlim_new)
  /*
   * Functions for validating access to tasks.
   */
-static struct task_struct *lookup_task(const pid_t pid, bool thread,
-                                      bool gettime)
+static struct pid *pid_for_clock(const clockid_t clock, bool gettime)
  {
-       struct task_struct *p;
+       const bool thread = !!CPUCLOCK_PERTHREAD(clock);
+       const pid_t upid = CPUCLOCK_PID(clock);
+       struct pid *pid;
+
+       if (CPUCLOCK_WHICH(clock) >= CPUCLOCK_MAX)
+               return NULL;
  
         /*
          * If the encoded PID is 0, then the timer is targeted at current
          * or the process to which current belongs.
          */
-       if (!pid)
-               return thread ? current : current->group_leader;
+       if (upid == 0)
+               return thread ? task_pid(current) : task_tgid(current);
  
-       p = find_task_by_vpid(pid);
-       if (!p)
-               return p;
-
-       if (thread)
-               return same_thread_group(p, current) ? p : NULL;
+       pid = find_vpid(upid);
+       if (!pid)
+               return NULL;
  
-       if (gettime) {
-               /*
-                * For clock_gettime(PROCESS) the task does not need to be
-                * the actual group leader. tsk->sighand gives
-                * access to the group's clock.
-                *
-                * Timers need the group leader because they take a
-                * reference on it and store the task pointer until the
-                * timer is destroyed.
-                */
-               return (p == current || thread_group_leader(p)) ? p : NULL;
+       if (thread) {
+               struct task_struct *tsk = pid_task(pid, PIDTYPE_PID);
+               return (tsk && same_thread_group(tsk, current)) ? pid : NULL;
         }
  
         /*
-        * For processes require that p is group leader.
+        * For clock_gettime(PROCESS) allow finding the process by
+        * with the pid of the current task.  The code needs the tgid
+        * of the process so that pid_task(pid, PIDTYPE_TGID) can be
+        * used to find the process.
          */
-       return has_group_leader_pid(p) ? p : NULL;
+       if (gettime && (pid == task_pid(current)))
+               return task_tgid(current);
+
+       /*
+        * For processes require that pid identifies a process.
+        */
+       return pid_has_task(pid, PIDTYPE_TGID) ? pid : NULL;
  }
  
-static struct task_struct *__get_task_for_clock(const clockid_t clock,
-                                               bool getref, bool gettime)
+static inline int validate_clock_permissions(const clockid_t clock)
  {
-       const bool thread = !!CPUCLOCK_PERTHREAD(clock);
-       const pid_t pid = CPUCLOCK_PID(clock);
-       struct task_struct *p;
-
-       if (CPUCLOCK_WHICH(clock) >= CPUCLOCK_MAX)
-               return NULL;
+       int ret;
  
         rcu_read_lock();
-       p = lookup_task(pid, thread, gettime);
-       if (p && getref)
-               get_task_struct(p);
+       ret = pid_for_clock(clock, false) ? 0 : -EINVAL;
         rcu_read_unlock();
-       return p;
-}
-
-static inline struct task_struct *get_task_for_clock(const clockid_t clock)
-{
-       return __get_task_for_clock(clock, true, false);
-}
  
-static inline struct task_struct *get_task_for_clock_get(const clockid_t clock)
-{
-       return __get_task_for_clock(clock, true, true);
-}
-
-static inline int validate_clock_permissions(const clockid_t clock)
-{
-       return __get_task_for_clock(clock, false, false) ? 0 : -EINVAL;
+       return ret;
  }
  
-static inline enum pid_type cpu_timer_pid_type(struct k_itimer *timer)
+static inline enum pid_type clock_pid_type(const clockid_t clock)
  {
-       return CPUCLOCK_PERTHREAD(timer->it_clock) ? PIDTYPE_PID : PIDTYPE_TGID;
+       return CPUCLOCK_PERTHREAD(clock) ? PIDTYPE_PID : PIDTYPE_TGID;
  }
  
  static inline struct task_struct *cpu_timer_task_rcu(struct k_itimer *timer)
  {
-       return pid_task(timer->it.cpu.pid, cpu_timer_pid_type(timer));
+       return pid_task(timer->it.cpu.pid, clock_pid_type(timer->it_clock));
  }
  
  /*
@@ -373,15 +353,18 @@ static int posix_cpu_clock_get(const clockid_t clock, struct timespec64 *tp)
         struct task_struct *tsk;
         u64 t;
  
-       tsk = get_task_for_clock_get(clock);
-       if (!tsk)
+       rcu_read_lock();
+       tsk = pid_task(pid_for_clock(clock, true), clock_pid_type(clock));
+       if (!tsk) {
+               rcu_read_unlock();
                 return -EINVAL;
+       }
  
         if (CPUCLOCK_PERTHREAD(clock))
                 t = cpu_clock_sample(clkid, tsk);
         else
                 t = cpu_clock_sample_group(clkid, tsk, false);
-       put_task_struct(tsk);
+       rcu_read_unlock();
  
         *tp = ns_to_timespec64(t);
         return 0;
@@ -394,19 +377,19 @@ static int posix_cpu_clock_get(const clockid_t clock, struct timespec64 *tp)
   */
  static int posix_cpu_timer_create(struct k_itimer *new_timer)
  {
-       struct task_struct *p = get_task_for_clock(new_timer->it_clock);
+       struct pid *pid;
  
-       if (!p)
+       rcu_read_lock();
+       pid = pid_for_clock(new_timer->it_clock, false);
+       if (!pid) {
+               rcu_read_unlock();
                 return -EINVAL;
+       }
  
         new_timer->kclock = &clock_posix_cpu;
         timerqueue_init(&new_timer->it.cpu.node);
-       new_timer->it.cpu.pid = get_task_pid(p, cpu_timer_pid_type(new_timer));
-       /*
-        * get_task_for_clock() took a reference on @p. Drop it as the timer
-        * holds a reference on the pid of @p.
-        */
-       put_task_struct(p);
+       new_timer->it.cpu.pid = get_pid(pid);
+       rcu_read_unlock();
         return 0;
  }
  
diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c

index d64b83e..ce4fbba 100644 (file)
--- a/net/ipv6/ip6_flowlabel.c
+++ b/net/ipv6/ip6_flowlabel.c
@@ -779,7 +779,7 @@ static void *ip6fl_seq_start(struct seq_file *seq, loff_t *pos)
  {
         struct ip6fl_iter_state *state = ip6fl_seq_private(seq);
  
-       state->pid_ns = proc_pid_ns(file_inode(seq->file));
+       state->pid_ns = proc_pid_ns(file_inode(seq->file)->i_sb);
  
         rcu_read_lock_bh();
         return *pos ? ip6fl_get_idx(seq, *pos - 1) : SEQ_START_TOKEN;
diff --git a/security/tomoyo/realpath.c b/security/tomoyo/realpath.c

index bf38fc1..df47989 100644 (file)
--- a/security/tomoyo/realpath.c
+++ b/security/tomoyo/realpath.c
@@ -7,6 +7,7 @@
  
  #include "common.h"
  #include <linux/magic.h>
+#include <linux/proc_fs.h>
  
  /**
   * tomoyo_encode2 - Encode binary string to ascii string.
@@ -161,9 +162,10 @@ static char *tomoyo_get_local_path(struct dentry *dentry, char * const buffer,
         if (sb->s_magic == PROC_SUPER_MAGIC && *pos == '/') {
                 char *ep;
                 const pid_t pid = (pid_t) simple_strtoul(pos + 1, &ep, 10);
+               struct pid_namespace *proc_pidns = proc_pid_ns(sb);
  
                 if (*ep == '/' && pid && pid ==
-                   task_tgid_nr_ns(current, sb->s_fs_info)) {
+                   task_tgid_nr_ns(current, proc_pidns)) {
                         pos = ep - 5;
                         if (pos < buffer)
                                 goto out;
diff --git a/tools/testing/selftests/proc/.gitignore b/tools/testing/selftests/proc/.gitignore

index 4bca5a9..bed4b53 100644 (file)
--- a/tools/testing/selftests/proc/.gitignore
+++ b/tools/testing/selftests/proc/.gitignore
@@ -2,7 +2,9 @@
  /fd-001-lookup
  /fd-002-posix-eq
  /fd-003-kthread
+/proc-fsconfig-hidepid
  /proc-loadavg-001
+/proc-multiple-procfs
  /proc-pid-vm
  /proc-self-map-files-001
  /proc-self-map-files-002
diff --git a/tools/testing/selftests/proc/Makefile b/tools/testing/selftests/proc/Makefile

index a8ed0f6..8be8a03 100644 (file)
--- a/tools/testing/selftests/proc/Makefile
+++ b/tools/testing/selftests/proc/Makefile
@@ -19,5 +19,7 @@ TEST_GEN_PROGS += self
  TEST_GEN_PROGS += setns-dcache
  TEST_GEN_PROGS += setns-sysvipc
  TEST_GEN_PROGS += thread-self
+TEST_GEN_PROGS += proc-multiple-procfs
+TEST_GEN_PROGS += proc-fsconfig-hidepid
  
  include ../lib.mk
diff --git a/tools/testing/selftests/proc/proc-fsconfig-hidepid.c b/tools/testing/selftests/proc/proc-fsconfig-hidepid.c

new file mode 100644 (file)

index 0000000..b9af8f5
--- /dev/null
+++ b/tools/testing/selftests/proc/proc-fsconfig-hidepid.c
@@ -0,0 +1,50 @@
+/*
+ * Copyright © 2020 Alexey Gladkov <gladkov.alexey@gmail.com>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+#include <assert.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <errno.h>
+#include <linux/mount.h>
+#include <linux/unistd.h>
+
+static inline int fsopen(const char *fsname, unsigned int flags)
+{
+       return syscall(__NR_fsopen, fsname, flags);
+}
+
+static inline int fsconfig(int fd, unsigned int cmd, const char *key, const void *val, int aux)
+{
+       return syscall(__NR_fsconfig, fd, cmd, key, val, aux);
+}
+
+int main(void)
+{
+       int fsfd, ret;
+       int hidepid = 2;
+
+       assert((fsfd = fsopen("proc", 0)) != -1);
+
+       ret = fsconfig(fsfd, FSCONFIG_SET_BINARY, "hidepid", &hidepid, 0);
+       assert(ret == -1);
+       assert(errno == EINVAL);
+
+       assert(!fsconfig(fsfd, FSCONFIG_SET_STRING, "hidepid", "2", 0));
+       assert(!fsconfig(fsfd, FSCONFIG_SET_STRING, "hidepid", "invisible", 0));
+
+       assert(!close(fsfd));
+
+       return 0;
+}
diff --git a/tools/testing/selftests/proc/proc-multiple-procfs.c b/tools/testing/selftests/proc/proc-multiple-procfs.c

new file mode 100644 (file)

index 0000000..ab912ad
--- /dev/null
+++ b/tools/testing/selftests/proc/proc-multiple-procfs.c
@@ -0,0 +1,48 @@
+/*
+ * Copyright © 2020 Alexey Gladkov <gladkov.alexey@gmail.com>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+#include <assert.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <sys/mount.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+int main(void)
+{
+       struct stat proc_st1, proc_st2;
+       char procbuff[] = "/tmp/proc.XXXXXX/meminfo";
+       char procdir1[] = "/tmp/proc.XXXXXX";
+       char procdir2[] = "/tmp/proc.XXXXXX";
+
+       assert(mkdtemp(procdir1) != NULL);
+       assert(mkdtemp(procdir2) != NULL);
+
+       assert(!mount("proc", procdir1, "proc", 0, "hidepid=1"));
+       assert(!mount("proc", procdir2, "proc", 0, "hidepid=2"));
+
+       snprintf(procbuff, sizeof(procbuff), "%s/meminfo", procdir1);
+       assert(!stat(procbuff, &proc_st1));
+
+       snprintf(procbuff, sizeof(procbuff), "%s/meminfo", procdir2);
+       assert(!stat(procbuff, &proc_st2));
+
+       umount(procdir1);
+       umount(procdir2);
+
+       assert(proc_st1.st_dev != proc_st2.st_dev);
+
+       return 0;
+}
author	Linus Torvalds <torvalds@linux-foundation.org>
	Thu, 4 Jun 2020 20:54:34 +0000 (13:54 -0700)
committer	Linus Torvalds <torvalds@linux-foundation.org>
	Thu, 4 Jun 2020 20:54:34 +0000 (13:54 -0700)
Documentation/filesystems/proc.rst		patch \| blob \| history
fs/exec.c		patch \| blob \| history
fs/locks.c		patch \| blob \| history
fs/proc/array.c		patch \| blob \| history
fs/proc/base.c		patch \| blob \| history
fs/proc/generic.c		patch \| blob \| history
fs/proc/inode.c		patch \| blob \| history
fs/proc/root.c		patch \| blob \| history
fs/proc/self.c		patch \| blob \| history
fs/proc/thread_self.c		patch \| blob \| history
fs/proc_namespace.c		patch \| blob \| history
include/linux/pid.h		patch \| blob \| history
include/linux/pid_namespace.h		patch \| blob \| history
include/linux/proc_fs.h		patch \| blob \| history
include/linux/rculist.h		patch \| blob \| history
include/linux/sched/signal.h		patch \| blob \| history
kernel/fork.c		patch \| blob \| history
kernel/pid.c		patch \| blob \| history
kernel/time/posix-cpu-timers.c		patch \| blob \| history
net/ipv6/ip6_flowlabel.c		patch \| blob \| history
security/tomoyo/realpath.c		patch \| blob \| history
tools/testing/selftests/proc/.gitignore		patch \| blob \| history
tools/testing/selftests/proc/Makefile		patch \| blob \| history
tools/testing/selftests/proc/proc-fsconfig-hidepid.c	[new file with mode: 0644]	patch \| blob
tools/testing/selftests/proc/proc-multiple-procfs.c	[new file with mode: 0644]	patch \| blob