mm: gup: make fault_in_safe_writeable() use fixup_user_fault()
authorLinus Torvalds <torvalds@linux-foundation.org>
Tue, 8 Mar 2022 19:55:48 +0000 (11:55 -0800)
committerLinus Torvalds <torvalds@linux-foundation.org>
Thu, 10 Mar 2022 18:48:53 +0000 (10:48 -0800)
Instead of using GUP, make fault_in_safe_writeable() actually force a
'handle_mm_fault()' using the same fixup_user_fault() machinery that
futexes already use.

Using the GUP machinery meant that fault_in_safe_writeable() did not do
everything that a real fault would do, ranging from not auto-expanding
the stack segment, to not updating accessed or dirty flags in the page
tables (GUP sets those flags on the pages themselves).

The latter causes problems on architectures (like s390) that do accessed
bit handling in software, which meant that fault_in_safe_writeable()
didn't actually do all the fault handling it needed to, and trying to
access the user address afterwards would still cause faults.

Reported-and-tested-by: Andreas Gruenbacher <agruenba@redhat.com>
Fixes: cdd591fc86e3 ("iov_iter: Introduce fault_in_iov_iter_writeable")
Link: https://lore.kernel.org/all/CAHc6FU5nP+nziNGG0JAF1FUx-GV7kKFvM7aZuU_XD2_1v4vnvg@mail.gmail.com/
Acked-by: David Hildenbrand <david@redhat.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
mm/gup.c

index a9d4d72..7bc1ba9 100644 (file)
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -1729,11 +1729,11 @@ EXPORT_SYMBOL(fault_in_writeable);
  * @uaddr: start of address range
  * @size: length of address range
  *
- * Faults in an address range using get_user_pages, i.e., without triggering
- * hardware page faults.  This is primarily useful when we already know that
- * some or all of the pages in the address range aren't in memory.
+ * Faults in an address range for writing.  This is primarily useful when we
+ * already know that some or all of the pages in the address range aren't in
+ * memory.
  *
- * Other than fault_in_writeable(), this function is non-destructive.
+ * Unlike fault_in_writeable(), this function is non-destructive.
  *
  * Note that we don't pin or otherwise hold the pages referenced that we fault
  * in.  There's no guarantee that they'll stay in memory for any duration of
@@ -1744,46 +1744,27 @@ EXPORT_SYMBOL(fault_in_writeable);
  */
 size_t fault_in_safe_writeable(const char __user *uaddr, size_t size)
 {
-       unsigned long start = (unsigned long)untagged_addr(uaddr);
-       unsigned long end, nstart, nend;
+       unsigned long start = (unsigned long)uaddr, end;
        struct mm_struct *mm = current->mm;
-       struct vm_area_struct *vma = NULL;
-       int locked = 0;
+       bool unlocked = false;
 
-       nstart = start & PAGE_MASK;
+       if (unlikely(size == 0))
+               return 0;
        end = PAGE_ALIGN(start + size);
-       if (end < nstart)
+       if (end < start)
                end = 0;
-       for (; nstart != end; nstart = nend) {
-               unsigned long nr_pages;
-               long ret;
 
-               if (!locked) {
-                       locked = 1;
-                       mmap_read_lock(mm);
-                       vma = find_vma(mm, nstart);
-               } else if (nstart >= vma->vm_end)
-                       vma = vma->vm_next;
-               if (!vma || vma->vm_start >= end)
-                       break;
-               nend = end ? min(end, vma->vm_end) : vma->vm_end;
-               if (vma->vm_flags & (VM_IO | VM_PFNMAP))
-                       continue;
-               if (nstart < vma->vm_start)
-                       nstart = vma->vm_start;
-               nr_pages = (nend - nstart) / PAGE_SIZE;
-               ret = __get_user_pages_locked(mm, nstart, nr_pages,
-                                             NULL, NULL, &locked,
-                                             FOLL_TOUCH | FOLL_WRITE);
-               if (ret <= 0)
+       mmap_read_lock(mm);
+       do {
+               if (fixup_user_fault(mm, start, FAULT_FLAG_WRITE, &unlocked))
                        break;
-               nend = nstart + ret * PAGE_SIZE;
-       }
-       if (locked)
-               mmap_read_unlock(mm);
-       if (nstart == end)
-               return 0;
-       return size - min_t(size_t, nstart - start, size);
+               start = (start + PAGE_SIZE) & PAGE_MASK;
+       } while (start != end);
+       mmap_read_unlock(mm);
+
+       if (size > (unsigned long)uaddr - start)
+               return size - ((unsigned long)uaddr - start);
+       return 0;
 }
 EXPORT_SYMBOL(fault_in_safe_writeable);