userfaultfd: fix SIGBUS resulting from false rwsem wakeups

author Andrea Arcangeli <aarcange@redhat.com>

Tue, 24 Jan 2017 23:17:59 +0000 (15:17 -0800)

committer Linus Torvalds <torvalds@linux-foundation.org>

Wed, 25 Jan 2017 00:26:14 +0000 (16:26 -0800)
author Andrea Arcangeli <aarcange@redhat.com>
Tue, 24 Jan 2017 23:17:59 +0000 (15:17 -0800)
committer Linus Torvalds <torvalds@linux-foundation.org>
Wed, 25 Jan 2017 00:26:14 +0000 (16:26 -0800)
diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c

index d96e2f3..43953e0 100644 (file)
--- a/fs/userfaultfd.c
+++ b/fs/userfaultfd.c
@@ -63,6 +63,7 @@ struct userfaultfd_wait_queue {
         struct uffd_msg msg;
         wait_queue_t wq;
         struct userfaultfd_ctx *ctx;
+       bool waken;
  };
  
  struct userfaultfd_wake_range {
@@ -86,6 +87,12 @@ static int userfaultfd_wake_function(wait_queue_t *wq, unsigned mode,
         if (len && (start > uwq->msg.arg.pagefault.address ||
                     start + len <= uwq->msg.arg.pagefault.address))
                 goto out;
+       WRITE_ONCE(uwq->waken, true);
+       /*
+        * The implicit smp_mb__before_spinlock in try_to_wake_up()
+        * renders uwq->waken visible to other CPUs before the task is
+        * waken.
+        */
         ret = wake_up_state(wq->private, mode);
         if (ret)
                 /*
@@ -264,6 +271,7 @@ int handle_userfault(struct vm_fault *vmf, unsigned long reason)
         struct userfaultfd_wait_queue uwq;
         int ret;
         bool must_wait, return_to_userland;
+       long blocking_state;
  
         BUG_ON(!rwsem_is_locked(&mm->mmap_sem));
  
@@ -334,10 +342,13 @@ int handle_userfault(struct vm_fault *vmf, unsigned long reason)
         uwq.wq.private = current;
         uwq.msg = userfault_msg(vmf->address, vmf->flags, reason);
         uwq.ctx = ctx;
+       uwq.waken = false;
  
         return_to_userland =
                 (vmf->flags & (FAULT_FLAG_USER|FAULT_FLAG_KILLABLE)) ==
                 (FAULT_FLAG_USER|FAULT_FLAG_KILLABLE);
+       blocking_state = return_to_userland ? TASK_INTERRUPTIBLE :
+                        TASK_KILLABLE;
  
         spin_lock(&ctx->fault_pending_wqh.lock);
         /*
@@ -350,8 +361,7 @@ int handle_userfault(struct vm_fault *vmf, unsigned long reason)
          * following the spin_unlock to happen before the list_add in
          * __add_wait_queue.
          */
-       set_current_state(return_to_userland ? TASK_INTERRUPTIBLE :
-                         TASK_KILLABLE);
+       set_current_state(blocking_state);
         spin_unlock(&ctx->fault_pending_wqh.lock);
  
         must_wait = userfaultfd_must_wait(ctx, vmf->address, vmf->flags,
@@ -364,6 +374,29 @@ int handle_userfault(struct vm_fault *vmf, unsigned long reason)
                 wake_up_poll(&ctx->fd_wqh, POLLIN);
                 schedule();
                 ret |= VM_FAULT_MAJOR;
+
+               /*
+                * False wakeups can orginate even from rwsem before
+                * up_read() however userfaults will wait either for a
+                * targeted wakeup on the specific uwq waitqueue from
+                * wake_userfault() or for signals or for uffd
+                * release.
+                */
+               while (!READ_ONCE(uwq.waken)) {
+                       /*
+                        * This needs the full smp_store_mb()
+                        * guarantee as the state write must be
+                        * visible to other CPUs before reading
+                        * uwq.waken from other CPUs.
+                        */
+                       set_current_state(blocking_state);
+                       if (READ_ONCE(uwq.waken) ||
+                           READ_ONCE(ctx->released) ||
+                           (return_to_userland ? signal_pending(current) :
+                            fatal_signal_pending(current)))
+                               break;
+                       schedule();
+               }
         }
  
         __set_current_state(TASK_RUNNING);
author	Andrea Arcangeli <aarcange@redhat.com>
	Tue, 24 Jan 2017 23:17:59 +0000 (15:17 -0800)
committer	Linus Torvalds <torvalds@linux-foundation.org>
	Wed, 25 Jan 2017 00:26:14 +0000 (16:26 -0800)