Merge branch 'exec-update-lock-for-v5.11' of git://git.kernel.org/pub/scm/linux/kerne...

author Linus Torvalds <torvalds@linux-foundation.org>

Wed, 16 Dec 2020 03:36:48 +0000 (19:36 -0800)

committer Linus Torvalds <torvalds@linux-foundation.org>

Wed, 16 Dec 2020 03:36:48 +0000 (19:36 -0800)
author Linus Torvalds <torvalds@linux-foundation.org>
Wed, 16 Dec 2020 03:36:48 +0000 (19:36 -0800)
committer Linus Torvalds <torvalds@linux-foundation.org>
Wed, 16 Dec 2020 03:36:48 +0000 (19:36 -0800)
diff --combined fs/exec.c

index 81b85f7,ca89e0e..c238c25
--- 1/fs/exec.c
--- 2/fs/exec.c
+++ b/fs/exec.c
@@@ -64,7 -64,6 +64,7 @@@
   #include <linux/compat.h>
   #include <linux/vmalloc.h>
   #include <linux/io_uring.h>
+ +#include <linux/syscall_user_dispatch.h>
   
   #include <linux/uaccess.h>
   #include <asm/mmu_context.h>
@@@ -966,8 -965,8 +966,8 @@@ EXPORT_SYMBOL(read_code)
   
   /*
    * Maps the mm_struct mm into the current task struct.
-  * On success, this function returns with the mutex
-  * exec_update_mutex locked.
+  * On success, this function returns with exec_update_lock
+  * held for writing.
    */
   static int exec_mmap(struct mm_struct *mm)
   {
@@@ -982,7 -981,7 +982,7 @@@
         if (old_mm)
                 sync_mm_rss(old_mm);
   
-       ret = mutex_lock_killable(&tsk->signal->exec_update_mutex);
+       ret = down_write_killable(&tsk->signal->exec_update_lock);
         if (ret)
                 return ret;
   
@@@ -996,7 -995,7 +996,7 @@@
                 mmap_read_lock(old_mm);
                 if (unlikely(old_mm->core_state)) {
                         mmap_read_unlock(old_mm);
-                       mutex_unlock(&tsk->signal->exec_update_mutex);
+                       up_write(&tsk->signal->exec_update_lock);
                         return -EINTR;
                 }
         }
@@@ -1259,16 -1258,6 +1259,16 @@@ int begin_new_exec(struct linux_binprm 
                 goto out;
   
         /*
+ +       * Cancel any io_uring activity across execve
+ +       */
+ +      io_uring_task_cancel();
+ +
+ +      /* Ensure the files table is not shared. */
+ +      retval = unshare_files();
+ +      if (retval)
+ +              goto out;
+ +
+ +      /*
          * Must be called _before_ exec_mmap() as bprm->mm is
          * not visibile until then. This also enables the update
          * to be lockless.
@@@ -1313,8 -1302,6 +1313,8 @@@
         flush_thread();
         me->personality &= ~bprm->per_clear;
   
+ +      clear_syscall_work_syscall_user_dispatch(me);
+ +
         /*
          * We have to apply CLOEXEC before we change whether the process is
          * dumpable (in setup_new_exec) to avoid a race with a process in userspace
@@@ -1395,7 -1382,7 +1395,7 @@@
         return 0;
   
   out_unlock:
-       mutex_unlock(&me->signal->exec_update_mutex);
+       up_write(&me->signal->exec_update_lock);
   out:
         return retval;
   }
@@@ -1436,7 -1423,7 +1436,7 @@@ void setup_new_exec(struct linux_binpr
          * some architectures like powerpc
          */
         me->mm->task_size = TASK_SIZE;
-       mutex_unlock(&me->signal->exec_update_mutex);
+       up_write(&me->signal->exec_update_lock);
         mutex_unlock(&me->signal->cred_guard_mutex);
   }
   EXPORT_SYMBOL(setup_new_exec);
@@@ -1789,11 -1776,21 +1789,11 @@@ static int bprm_execve(struct linux_bin
                        int fd, struct filename *filename, int flags)
   {
         struct file *file;
- -      struct files_struct *displaced;
         int retval;
   
- -      /*
- -       * Cancel any io_uring activity across execve
- -       */
- -      io_uring_task_cancel();
- -
- -      retval = unshare_files(&displaced);
- -      if (retval)
- -              return retval;
- -
         retval = prepare_bprm_creds(bprm);
         if (retval)
- -              goto out_files;
+ +              return retval;
   
         check_unsafe_exec(bprm);
         current->in_execve = 1;
@@@ -1808,14 -1805,11 +1808,14 @@@
         bprm->file = file;
         /*
          * Record that a name derived from an O_CLOEXEC fd will be
- -       * inaccessible after exec. Relies on having exclusive access to
- -       * current->files (due to unshare_files above).
+ +       * inaccessible after exec.  This allows the code in exec to
+ +       * choose to fail when the executable is not mmaped into the
+ +       * interpreter and an open file descriptor is not passed to
+ +       * the interpreter.  This makes for a better user experience
+ +       * than having the interpreter start and then immediately fail
+ +       * when it finds the executable is inaccessible.
          */
- -      if (bprm->fdpath &&
- -          close_on_exec(fd, rcu_dereference_raw(current->files->fdt)))
+ +      if (bprm->fdpath && get_close_on_exec(fd))
                 bprm->interp_flags |= BINPRM_FLAGS_PATH_INACCESSIBLE;
   
         /* Set the unchanging part of bprm->cred */
@@@ -1833,6 -1827,8 +1833,6 @@@
         rseq_execve(current);
         acct_update_integrals(current);
         task_numa_free(current, false);
- -      if (displaced)
- -              put_files_struct(displaced);
         return retval;
   
   out:
@@@ -1849,6 -1845,10 +1849,6 @@@ out_unmark
         current->fs->in_exec = 0;
         current->in_execve = 0;
   
- -out_files:
- -      if (displaced)
- -              reset_files_struct(displaced);
- -
         return retval;
   }
   
diff --combined include/linux/sched/signal.h

index bd5afa0,4b6a823..4e116cd
--- 1/include/linux/sched/signal.h
--- 2/include/linux/sched/signal.h
+++ b/include/linux/sched/signal.h
@@@ -228,12 -228,13 +228,13 @@@ struct signal_struct 
                                          * credential calculations
                                          * (notably. ptrace)
                                          * Deprecated do not use in new code.
-                                        * Use exec_update_mutex instead.
-                                        */
-       struct mutex exec_update_mutex; /* Held while task_struct is being
-                                        * updated during exec, and may have
-                                        * inconsistent permissions.
+                                        * Use exec_update_lock instead.
                                          */
+       struct rw_semaphore exec_update_lock;   /* Held while task_struct is
+                                                * being updated during exec,
+                                                * and may have inconsistent
+                                                * permissions.
+                                                */
   } __randomize_layout;
   
   /*
@@@ -353,25 -354,11 +354,25 @@@ static inline int restart_syscall(void
         return -ERESTARTNOINTR;
   }
   
- -static inline int signal_pending(struct task_struct *p)
+ +static inline int task_sigpending(struct task_struct *p)
   {
         return unlikely(test_tsk_thread_flag(p,TIF_SIGPENDING));
   }
   
+ +static inline int signal_pending(struct task_struct *p)
+ +{
+ +#if defined(TIF_NOTIFY_SIGNAL)
+ +      /*
+ +       * TIF_NOTIFY_SIGNAL isn't really a signal, but it requires the same
+ +       * behavior in terms of ensuring that we break out of wait loops
+ +       * so that notify signal callbacks can be processed.
+ +       */
+ +      if (unlikely(test_tsk_thread_flag(p, TIF_NOTIFY_SIGNAL)))
+ +              return 1;
+ +#endif
+ +      return task_sigpending(p);
+ +}
+ +
   static inline int __fatal_signal_pending(struct task_struct *p)
   {
         return unlikely(sigismember(&p->pending.signal, SIGKILL));
@@@ -379,7 -366,7 +380,7 @@@
   
   static inline int fatal_signal_pending(struct task_struct *p)
   {
- -      return signal_pending(p) && __fatal_signal_pending(p);
+ +      return task_sigpending(p) && __fatal_signal_pending(p);
   }
   
   static inline int signal_pending_state(long state, struct task_struct *p)
@@@ -516,7 -503,7 +517,7 @@@ extern int set_user_sigmask(const sigse
   static inline void restore_saved_sigmask_unless(bool interrupted)
   {
         if (interrupted)
- -              WARN_ON(!test_thread_flag(TIF_SIGPENDING));
+ +              WARN_ON(!signal_pending(current));
         else
                 restore_saved_sigmask();
   }
diff --combined kernel/events/core.c

index 19ae6c9,55b2330..55d1879
--- 1/kernel/events/core.c
--- 2/kernel/events/core.c
+++ b/kernel/events/core.c
@@@ -51,8 -51,6 +51,8 @@@
   #include <linux/proc_ns.h>
   #include <linux/mount.h>
   #include <linux/min_heap.h>
+ +#include <linux/highmem.h>
+ +#include <linux/pgtable.h>
   
   #include "internal.h"
   
@@@ -1327,7 -1325,7 +1327,7 @@@ static void put_ctx(struct perf_event_c
    * function.
    *
    * Lock order:
-  *    exec_update_mutex
+  *    exec_update_lock
    *    task_struct::perf_event_mutex
    *      perf_event_context::mutex
    *        perf_event::child_mutex;
@@@ -1897,12 -1895,6 +1897,12 @@@ static void __perf_event_header_size(st
         if (sample_type & PERF_SAMPLE_CGROUP)
                 size += sizeof(data->cgroup);
   
+ +      if (sample_type & PERF_SAMPLE_DATA_PAGE_SIZE)
+ +              size += sizeof(data->data_page_size);
+ +
+ +      if (sample_type & PERF_SAMPLE_CODE_PAGE_SIZE)
+ +              size += sizeof(data->code_page_size);
+ +
         event->header_size = size;
   }
   
@@@ -6939,12 -6931,6 +6939,12 @@@ void perf_output_sample(struct perf_out
         if (sample_type & PERF_SAMPLE_CGROUP)
                 perf_output_put(handle, data->cgroup);
   
+ +      if (sample_type & PERF_SAMPLE_DATA_PAGE_SIZE)
+ +              perf_output_put(handle, data->data_page_size);
+ +
+ +      if (sample_type & PERF_SAMPLE_CODE_PAGE_SIZE)
+ +              perf_output_put(handle, data->code_page_size);
+ +
         if (sample_type & PERF_SAMPLE_AUX) {
                 perf_output_put(handle, data->aux_size);
   
@@@ -7002,93 -6988,6 +7002,93 @@@ static u64 perf_virt_to_phys(u64 virt
         return phys_addr;
   }
   
+ +/*
+ + * Return the pagetable size of a given virtual address.
+ + */
+ +static u64 perf_get_pgtable_size(struct mm_struct *mm, unsigned long addr)
+ +{
+ +      u64 size = 0;
+ +
+ +#ifdef CONFIG_HAVE_FAST_GUP
+ +      pgd_t *pgdp, pgd;
+ +      p4d_t *p4dp, p4d;
+ +      pud_t *pudp, pud;
+ +      pmd_t *pmdp, pmd;
+ +      pte_t *ptep, pte;
+ +
+ +      pgdp = pgd_offset(mm, addr);
+ +      pgd = READ_ONCE(*pgdp);
+ +      if (pgd_none(pgd))
+ +              return 0;
+ +
+ +      if (pgd_leaf(pgd))
+ +              return pgd_leaf_size(pgd);
+ +
+ +      p4dp = p4d_offset_lockless(pgdp, pgd, addr);
+ +      p4d = READ_ONCE(*p4dp);
+ +      if (!p4d_present(p4d))
+ +              return 0;
+ +
+ +      if (p4d_leaf(p4d))
+ +              return p4d_leaf_size(p4d);
+ +
+ +      pudp = pud_offset_lockless(p4dp, p4d, addr);
+ +      pud = READ_ONCE(*pudp);
+ +      if (!pud_present(pud))
+ +              return 0;
+ +
+ +      if (pud_leaf(pud))
+ +              return pud_leaf_size(pud);
+ +
+ +      pmdp = pmd_offset_lockless(pudp, pud, addr);
+ +      pmd = READ_ONCE(*pmdp);
+ +      if (!pmd_present(pmd))
+ +              return 0;
+ +
+ +      if (pmd_leaf(pmd))
+ +              return pmd_leaf_size(pmd);
+ +
+ +      ptep = pte_offset_map(&pmd, addr);
+ +      pte = ptep_get_lockless(ptep);
+ +      if (pte_present(pte))
+ +              size = pte_leaf_size(pte);
+ +      pte_unmap(ptep);
+ +#endif /* CONFIG_HAVE_FAST_GUP */
+ +
+ +      return size;
+ +}
+ +
+ +static u64 perf_get_page_size(unsigned long addr)
+ +{
+ +      struct mm_struct *mm;
+ +      unsigned long flags;
+ +      u64 size;
+ +
+ +      if (!addr)
+ +              return 0;
+ +
+ +      /*
+ +       * Software page-table walkers must disable IRQs,
+ +       * which prevents any tear down of the page tables.
+ +       */
+ +      local_irq_save(flags);
+ +
+ +      mm = current->mm;
+ +      if (!mm) {
+ +              /*
+ +               * For kernel threads and the like, use init_mm so that
+ +               * we can find kernel memory.
+ +               */
+ +              mm = &init_mm;
+ +      }
+ +
+ +      size = perf_get_pgtable_size(mm, addr);
+ +
+ +      local_irq_restore(flags);
+ +
+ +      return size;
+ +}
+ +
   static struct perf_callchain_entry __empty_callchain = { .nr = 0, };
   
   struct perf_callchain_entry *
@@@ -7124,7 -7023,7 +7124,7 @@@ void perf_prepare_sample(struct perf_ev
   
         __perf_event_header__init_id(header, data, event);
   
- -      if (sample_type & PERF_SAMPLE_IP)
+ +      if (sample_type & (PERF_SAMPLE_IP | PERF_SAMPLE_CODE_PAGE_SIZE))
                 data->ip = perf_instruction_pointer(regs);
   
         if (sample_type & PERF_SAMPLE_CALLCHAIN) {
@@@ -7243,17 -7142,6 +7243,17 @@@
         }
   #endif
   
+ +      /*
+ +       * PERF_DATA_PAGE_SIZE requires PERF_SAMPLE_ADDR. If the user doesn't
+ +       * require PERF_SAMPLE_ADDR, kernel implicitly retrieve the data->addr,
+ +       * but the value will not dump to the userspace.
+ +       */
+ +      if (sample_type & PERF_SAMPLE_DATA_PAGE_SIZE)
+ +              data->data_page_size = perf_get_page_size(data->addr);
+ +
+ +      if (sample_type & PERF_SAMPLE_CODE_PAGE_SIZE)
+ +              data->code_page_size = perf_get_page_size(data->ip);
+ +
         if (sample_type & PERF_SAMPLE_AUX) {
                 u64 size;
   
@@@ -11832,6 -11720,24 +11832,6 @@@ SYSCALL_DEFINE5(perf_event_open
                 goto err_task;
         }
   
- -      if (task) {
- -              err = down_read_interruptible(&task->signal->exec_update_lock);
- -              if (err)
- -                      goto err_task;
- -
- -              /*
- -               * Preserve ptrace permission check for backwards compatibility.
- -               *
- -               * We must hold exec_update_lock across this and any potential
- -               * perf_install_in_context() call for this new event to
- -               * serialize against exec() altering our credentials (and the
- -               * perf_event_exit_task() that could imply).
- -               */
- -              err = -EACCES;
- -              if (!perfmon_capable() && !ptrace_may_access(task, PTRACE_MODE_READ_REALCREDS))
- -                      goto err_cred;
- -      }
- -
         if (flags & PERF_FLAG_PID_CGROUP)
                 cgroup_fd = pid;
   
@@@ -11839,7 -11745,7 +11839,7 @@@
                                  NULL, NULL, cgroup_fd);
         if (IS_ERR(event)) {
                 err = PTR_ERR(event);
- -              goto err_cred;
+ +              goto err_task;
         }
   
         if (is_sampling_event(event)) {
@@@ -11958,24 -11864,6 +11958,24 @@@
                 goto err_context;
         }
   
-               err = mutex_lock_interruptible(&task->signal->exec_update_mutex);
+ +      if (task) {
-                * We must hold exec_update_mutex across this and any potential
++              err = down_read_interruptible(&task->signal->exec_update_lock);
+ +              if (err)
+ +                      goto err_file;
+ +
+ +              /*
+ +               * Preserve ptrace permission check for backwards compatibility.
+ +               *
++               * We must hold exec_update_lock across this and any potential
+ +               * perf_install_in_context() call for this new event to
+ +               * serialize against exec() altering our credentials (and the
+ +               * perf_event_exit_task() that could imply).
+ +               */
+ +              err = -EACCES;
+ +              if (!perfmon_capable() && !ptrace_may_access(task, PTRACE_MODE_READ_REALCREDS))
+ +                      goto err_cred;
+ +      }
+ +
         if (move_group) {
                 gctx = __perf_event_ctx_lock_double(group_leader, ctx);
   
@@@ -12129,7 -12017,7 +12129,7 @@@
         mutex_unlock(&ctx->mutex);
   
         if (task) {
-               mutex_unlock(&task->signal->exec_update_mutex);
+               up_read(&task->signal->exec_update_lock);
                 put_task_struct(task);
         }
   
@@@ -12151,10 -12039,7 +12151,10 @@@ err_locked
         if (move_group)
                 perf_event_ctx_unlock(group_leader, gctx);
         mutex_unlock(&ctx->mutex);
- -/* err_file: */
+ +err_cred:
+ +      if (task)
-               mutex_unlock(&task->signal->exec_update_mutex);
++              up_read(&task->signal->exec_update_lock);
+ +err_file:
         fput(event_file);
   err_context:
         perf_unpin_context(ctx);
@@@ -12166,6 -12051,9 +12166,6 @@@ err_alloc
          */
         if (!event_file)
                 free_event(event);
- -err_cred:
- -      if (task)
- -              up_read(&task->signal->exec_update_lock);
   err_task:
         if (task)
                 put_task_struct(task);
@@@ -12470,7 -12358,7 +12470,7 @@@ static void perf_event_exit_task_contex
   /*
    * When a child task exits, feed back event values to parent events.
    *
-  * Can be called with exec_update_mutex held when called from
+  * Can be called with exec_update_lock held when called from
    * setup_new_exec().
    */
   void perf_event_exit_task(struct task_struct *child)
diff --combined kernel/fork.c

index 4f44d87,e8cb80b..41906a5
--- 1/kernel/fork.c
--- 2/kernel/fork.c
+++ b/kernel/fork.c
@@@ -385,7 -385,7 +385,7 @@@ static void account_kernel_stack(struc
                 mod_lruvec_page_state(vm->pages[0], NR_KERNEL_STACK_KB,
                                       account * (THREAD_SIZE / 1024));
         else
- -              mod_lruvec_slab_state(stack, NR_KERNEL_STACK_KB,
+ +              mod_lruvec_kmem_state(stack, NR_KERNEL_STACK_KB,
                                       account * (THREAD_SIZE / 1024));
   }
   
@@@ -404,10 -404,9 +404,10 @@@ static int memcg_charge_kernel_stack(st
   
                 for (i = 0; i < THREAD_SIZE / PAGE_SIZE; i++) {
                         /*
- -                       * If memcg_kmem_charge_page() fails, page->mem_cgroup
- -                       * pointer is NULL, and memcg_kmem_uncharge_page() in
- -                       * free_thread_stack() will ignore this page.
+ +                       * If memcg_kmem_charge_page() fails, page's
+ +                       * memory cgroup pointer is NULL, and
+ +                       * memcg_kmem_uncharge_page() in free_thread_stack()
+ +                       * will ignore this page.
                          */
                         ret = memcg_kmem_charge_page(vm->pages[i], GFP_KERNEL,
                                                      0);
@@@ -907,7 -906,6 +907,7 @@@ static struct task_struct *dup_task_str
         clear_user_return_notifier(tsk);
         clear_tsk_need_resched(tsk);
         set_task_stack_end_magic(tsk);
+ +      clear_syscall_work_syscall_user_dispatch(tsk);
   
   #ifdef CONFIG_STACKPROTECTOR
         tsk->stack_canary = get_random_canary();
@@@ -932,7 -930,6 +932,7 @@@
         account_kernel_stack(tsk, 1);
   
         kcov_task_init(tsk);
+ +      kmap_local_fork(tsk);
   
   #ifdef CONFIG_FAULT_INJECTION
         tsk->fail_nth = 0;
@@@ -1010,7 -1007,6 +1010,7 @@@ static struct mm_struct *mm_init(struc
         mm->vmacache_seqnum = 0;
         atomic_set(&mm->mm_users, 1);
         atomic_set(&mm->mm_count, 1);
+ +      seqcount_init(&mm->write_protect_seq);
         mmap_init_lock(mm);
         INIT_LIST_HEAD(&mm->mmlist);
         mm->core_state = NULL;
@@@ -1225,7 -1221,7 +1225,7 @@@ struct mm_struct *mm_access(struct task
         struct mm_struct *mm;
         int err;
   
-       err =  mutex_lock_killable(&task->signal->exec_update_mutex);
+       err =  down_read_killable(&task->signal->exec_update_lock);
         if (err)
                 return ERR_PTR(err);
   
@@@ -1235,7 -1231,7 +1235,7 @@@
                 mmput(mm);
                 mm = ERR_PTR(-EACCES);
         }
-       mutex_unlock(&task->signal->exec_update_mutex);
+       up_read(&task->signal->exec_update_lock);
   
         return mm;
   }
@@@ -1595,7 -1591,7 +1595,7 @@@ static int copy_signal(unsigned long cl
         sig->oom_score_adj_min = current->signal->oom_score_adj_min;
   
         mutex_init(&sig->cred_guard_mutex);
-       mutex_init(&sig->exec_update_mutex);
+       init_rwsem(&sig->exec_update_lock);
   
         return 0;
   }
@@@ -1629,7 -1625,7 +1629,7 @@@ static void copy_seccomp(struct task_st
          * to manually enable the seccomp thread flag here.
          */
         if (p->seccomp.mode != SECCOMP_MODE_DISABLED)
- -              set_tsk_thread_flag(p, TIF_SECCOMP);
+ +              set_task_syscall_work(p, SECCOMP);
   #endif
   }
   
@@@ -2162,9 -2158,9 +2162,9 @@@ static __latent_entropy struct task_str
          * child regardless of CLONE_PTRACE.
          */
         user_disable_single_step(p);
- -      clear_tsk_thread_flag(p, TIF_SYSCALL_TRACE);
- -#ifdef TIF_SYSCALL_EMU
- -      clear_tsk_thread_flag(p, TIF_SYSCALL_EMU);
+ +      clear_task_syscall_work(p, SYSCALL_TRACE);
+ +#if defined(CONFIG_GENERIC_ENTRY) || defined(TIF_SYSCALL_EMU)
+ +      clear_task_syscall_work(p, SYSCALL_EMU);
   #endif
         clear_tsk_latency_tracing(p);
   
@@@ -2186,10 -2182,6 +2186,10 @@@
         INIT_LIST_HEAD(&p->thread_group);
         p->task_works = NULL;
   
+ +#ifdef CONFIG_KRETPROBES
+ +      p->kretprobe_instances.first = NULL;
+ +#endif
+ +
         /*
          * Ensure that the cgroup subsystem policies allow the new process to be
          * forked. It should be noted that the new process's css_set can be changed
@@@ -3031,21 -3023,21 +3031,21 @@@ SYSCALL_DEFINE1(unshare, unsigned long
    *    the exec layer of the kernel.
    */
   
- -int unshare_files(struct files_struct **displaced)
+ +int unshare_files(void)
   {
         struct task_struct *task = current;
- -      struct files_struct *copy = NULL;
+ +      struct files_struct *old, *copy = NULL;
         int error;
   
         error = unshare_fd(CLONE_FILES, NR_OPEN_MAX, &copy);
- -      if (error || !copy) {
- -              *displaced = NULL;
+ +      if (error || !copy)
                 return error;
- -      }
- -      *displaced = task->files;
+ +
+ +      old = task->files;
         task_lock(task);
         task->files = copy;
         task_unlock(task);
+ +      put_files_struct(old);
         return 0;
   }
   
diff --combined kernel/kcmp.c

index 36e58eb,c0d2ad9..5353edf
--- 1/kernel/kcmp.c
--- 2/kernel/kcmp.c
+++ b/kernel/kcmp.c
@@@ -61,34 -61,39 +61,34 @@@ static int kcmp_ptr(void *v1, void *v2
   static struct file *
   get_file_raw_ptr(struct task_struct *task, unsigned int idx)
   {
- -      struct file *file = NULL;
+ +      struct file *file;
   
- -      task_lock(task);
         rcu_read_lock();
- -
- -      if (task->files)
- -              file = fcheck_files(task->files, idx);
- -
+ +      file = task_lookup_fd_rcu(task, idx);
         rcu_read_unlock();
- -      task_unlock(task);
   
         return file;
   }
   
- static void kcmp_unlock(struct mutex *m1, struct mutex *m2)
+ static void kcmp_unlock(struct rw_semaphore *l1, struct rw_semaphore *l2)
   {
-       if (likely(m2 != m1))
-               mutex_unlock(m2);
-       mutex_unlock(m1);
+       if (likely(l2 != l1))
+               up_read(l2);
+       up_read(l1);
   }
   
- static int kcmp_lock(struct mutex *m1, struct mutex *m2)
+ static int kcmp_lock(struct rw_semaphore *l1, struct rw_semaphore *l2)
   {
         int err;
   
-       if (m2 > m1)
-               swap(m1, m2);
+       if (l2 > l1)
+               swap(l1, l2);
   
-       err = mutex_lock_killable(m1);
-       if (!err && likely(m1 != m2)) {
-               err = mutex_lock_killable_nested(m2, SINGLE_DEPTH_NESTING);
+       err = down_read_killable(l1);
+       if (!err && likely(l1 != l2)) {
+               err = down_read_killable_nested(l2, SINGLE_DEPTH_NESTING);
                 if (err)
-                       mutex_unlock(m1);
+                       up_read(l1);
         }
   
         return err;
@@@ -102,6 -107,7 +102,6 @@@ static int kcmp_epoll_target(struct tas
   {
         struct file *filp, *filp_epoll, *filp_tgt;
         struct kcmp_epoll_slot slot;
- -      struct files_struct *files;
   
         if (copy_from_user(&slot, uslot, sizeof(slot)))
                 return -EFAULT;
@@@ -110,12 -116,23 +110,12 @@@
         if (!filp)
                 return -EBADF;
   
- -      files = get_files_struct(task2);
- -      if (!files)
+ +      filp_epoll = fget_task(task2, slot.efd);
+ +      if (!filp_epoll)
                 return -EBADF;
   
- -      spin_lock(&files->file_lock);
- -      filp_epoll = fcheck_files(files, slot.efd);
- -      if (filp_epoll)
- -              get_file(filp_epoll);
- -      else
- -              filp_tgt = ERR_PTR(-EBADF);
- -      spin_unlock(&files->file_lock);
- -      put_files_struct(files);
- -
- -      if (filp_epoll) {
- -              filp_tgt = get_epoll_tfile_raw_ptr(filp_epoll, slot.tfd, slot.toff);
- -              fput(filp_epoll);
- -      }
+ +      filp_tgt = get_epoll_tfile_raw_ptr(filp_epoll, slot.tfd, slot.toff);
+ +      fput(filp_epoll);
   
         if (IS_ERR(filp_tgt))
                 return PTR_ERR(filp_tgt);
@@@ -156,8 -173,8 +156,8 @@@ SYSCALL_DEFINE5(kcmp, pid_t, pid1, pid_
         /*
          * One should have enough rights to inspect task details.
          */
-       ret = kcmp_lock(&task1->signal->exec_update_mutex,
-                       &task2->signal->exec_update_mutex);
+       ret = kcmp_lock(&task1->signal->exec_update_lock,
+                       &task2->signal->exec_update_lock);
         if (ret)
                 goto err;
         if (!ptrace_may_access(task1, PTRACE_MODE_READ_REALCREDS) ||
@@@ -212,8 -229,8 +212,8 @@@
         }
   
   err_unlock:
-       kcmp_unlock(&task1->signal->exec_update_mutex,
-                   &task2->signal->exec_update_mutex);
+       kcmp_unlock(&task1->signal->exec_update_lock,
+                   &task2->signal->exec_update_lock);
   err:
         put_task_struct(task1);
         put_task_struct(task2);
diff --combined kernel/pid.c

index 47466d0,4856818..ebdf9c6
--- 1/kernel/pid.c
--- 2/kernel/pid.c
+++ b/kernel/pid.c
@@@ -73,7 -73,7 +73,7 @@@ int pid_max_max = PID_MAX_LIMIT
    * the scheme scales to up to 4 million PIDs, runtime.
    */
   struct pid_namespace init_pid_ns = {
- -      .kref = KREF_INIT(2),
+ +      .ns.count = REFCOUNT_INIT(2),
         .idr = IDR_INIT(init_pid_ns.idr),
         .pid_allocated = PIDNS_ADDING,
         .level = 0,
@@@ -628,7 -628,7 +628,7 @@@ static struct file *__pidfd_fget(struc
         struct file *file;
         int ret;
   
-       ret = mutex_lock_killable(&task->signal->exec_update_mutex);
+       ret = down_read_killable(&task->signal->exec_update_lock);
         if (ret)
                 return ERR_PTR(ret);
   
@@@ -637,7 -637,7 +637,7 @@@
         else
                 file = ERR_PTR(-EPERM);
   
-       mutex_unlock(&task->signal->exec_update_mutex);
+       up_read(&task->signal->exec_update_lock);
   
         return file ?: ERR_PTR(-EBADF);
   }
author	Linus Torvalds <torvalds@linux-foundation.org>
	Wed, 16 Dec 2020 03:36:48 +0000 (19:36 -0800)
committer	Linus Torvalds <torvalds@linux-foundation.org>
	Wed, 16 Dec 2020 03:36:48 +0000 (19:36 -0800)
		1	2
fs/exec.c	patch \|	diff1 \|	diff2 \|	blob \| history
include/linux/sched/signal.h	patch \|	diff1 \|	diff2 \|	blob \| history
kernel/events/core.c	patch \|	diff1 \|	diff2 \|	blob \| history
kernel/fork.c	patch \|	diff1 \|	diff2 \|	blob \| history
kernel/kcmp.c	patch \|	diff1 \|	diff2 \|	blob \| history
kernel/pid.c	patch \|	diff1 \|	diff2 \|	blob \| history