mm: lock_vma_under_rcu() must check vma->anon_vma under vma lock
authorJann Horn <jannh@google.com>
Wed, 26 Jul 2023 21:41:03 +0000 (23:41 +0200)
committerLinus Torvalds <torvalds@linux-foundation.org>
Thu, 27 Jul 2023 18:13:22 +0000 (11:13 -0700)
lock_vma_under_rcu() tries to guarantee that __anon_vma_prepare() can't
be called in the VMA-locked page fault path by ensuring that
vma->anon_vma is set.

However, this check happens before the VMA is locked, which means a
concurrent move_vma() can concurrently call unlink_anon_vmas(), which
disassociates the VMA's anon_vma.

This means we can get UAF in the following scenario:

  THREAD 1                   THREAD 2
  ========                   ========
  <page fault>
    lock_vma_under_rcu()
      rcu_read_lock()
      mas_walk()
      check vma->anon_vma

                             mremap() syscall
                               move_vma()
                                vma_start_write()
                                 unlink_anon_vmas()
                             <syscall end>

    handle_mm_fault()
      __handle_mm_fault()
        handle_pte_fault()
          do_pte_missing()
            do_anonymous_page()
              anon_vma_prepare()
                __anon_vma_prepare()
                  find_mergeable_anon_vma()
                    mas_walk() [looks up VMA X]

                             munmap() syscall (deletes VMA X)

                    reusable_anon_vma() [called on freed VMA X]

This is a security bug if you can hit it, although an attacker would
have to win two races at once where the first race window is only a few
instructions wide.

This patch is based on some previous discussion with Linus Torvalds on
the security list.

Cc: stable@vger.kernel.org
Fixes: 5e31275cc997 ("mm: add per-VMA lock and helper functions to control it")
Signed-off-by: Jann Horn <jannh@google.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
mm/memory.c

index 01f39e8..603b2f4 100644 (file)
@@ -5393,27 +5393,28 @@ retry:
        if (!vma_is_anonymous(vma) && !vma_is_tcp(vma))
                goto inval;
 
-       /* find_mergeable_anon_vma uses adjacent vmas which are not locked */
-       if (!vma->anon_vma && !vma_is_tcp(vma))
-               goto inval;
-
        if (!vma_start_read(vma))
                goto inval;
 
        /*
+        * find_mergeable_anon_vma uses adjacent vmas which are not locked.
+        * This check must happen after vma_start_read(); otherwise, a
+        * concurrent mremap() with MREMAP_DONTUNMAP could dissociate the VMA
+        * from its anon_vma.
+        */
+       if (unlikely(!vma->anon_vma && !vma_is_tcp(vma)))
+               goto inval_end_read;
+
+       /*
         * Due to the possibility of userfault handler dropping mmap_lock, avoid
         * it for now and fall back to page fault handling under mmap_lock.
         */
-       if (userfaultfd_armed(vma)) {
-               vma_end_read(vma);
-               goto inval;
-       }
+       if (userfaultfd_armed(vma))
+               goto inval_end_read;
 
        /* Check since vm_start/vm_end might change before we lock the VMA */
-       if (unlikely(address < vma->vm_start || address >= vma->vm_end)) {
-               vma_end_read(vma);
-               goto inval;
-       }
+       if (unlikely(address < vma->vm_start || address >= vma->vm_end))
+               goto inval_end_read;
 
        /* Check if the VMA got isolated after we found it */
        if (vma->detached) {
@@ -5425,6 +5426,9 @@ retry:
 
        rcu_read_unlock();
        return vma;
+
+inval_end_read:
+       vma_end_read(vma);
 inval:
        rcu_read_unlock();
        count_vm_vma_lock_event(VMA_LOCK_ABORT);