mm/gup: simplify the external interface functions and consolidate invariants
authorJason Gunthorpe <jgg@nvidia.com>
Tue, 24 Jan 2023 20:34:26 +0000 (16:34 -0400)
committerAndrew Morton <akpm@linux-foundation.org>
Fri, 10 Feb 2023 00:51:44 +0000 (16:51 -0800)
The GUP family of functions have a complex, but fairly well defined, set
of invariants for their arguments.  Currently these are sprinkled about,
sometimes in duplicate through many functions.

Internally we don't follow all the invariants that the external interface
has to follow, so place these checks directly at the exported interface.
This ensures the internal functions never reach a violated invariant.

Remove the duplicated invariant checks.

The end result is to make these functions fully internal:
 __get_user_pages_locked()
 internal_get_user_pages_fast()
 __gup_longterm_locked()

And all the other functions call directly into one of these.

Link: https://lkml.kernel.org/r/5-v2-987e91b59705+36b-gup_tidy_jgg@nvidia.com
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
Suggested-by: John Hubbard <jhubbard@nvidia.com>
Reviewed-by: John Hubbard <jhubbard@nvidia.com>
Acked-by: Mike Rapoport (IBM) <rppt@kernel.org>
Cc: Alistair Popple <apopple@nvidia.com>
Cc: Christoph Hellwig <hch@infradead.org>
Cc: Claudio Imbrenda <imbrenda@linux.ibm.com>
Cc: David Hildenbrand <david@redhat.com>
Cc: David Howells <dhowells@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
mm/gup.c
mm/huge_memory.c

index 9bd4d77..029de13 100644 (file)
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -215,7 +215,6 @@ int __must_check try_grab_page(struct page *page, unsigned int flags)
 {
        struct folio *folio = page_folio(page);
 
-       WARN_ON_ONCE((flags & (FOLL_GET | FOLL_PIN)) == (FOLL_GET | FOLL_PIN));
        if (WARN_ON_ONCE(folio_ref_count(folio) <= 0))
                return -ENOMEM;
 
@@ -818,7 +817,7 @@ struct page *follow_page(struct vm_area_struct *vma, unsigned long address,
        if (vma_is_secretmem(vma))
                return NULL;
 
-       if (foll_flags & FOLL_PIN)
+       if (WARN_ON_ONCE(foll_flags & FOLL_PIN))
                return NULL;
 
        page = follow_page_mask(vma, address, foll_flags, &ctx);
@@ -975,9 +974,6 @@ static int check_vma_flags(struct vm_area_struct *vma, unsigned long gup_flags)
        if ((gup_flags & FOLL_LONGTERM) && vma_is_fsdax(vma))
                return -EOPNOTSUPP;
 
-       if ((gup_flags & FOLL_LONGTERM) && (gup_flags & FOLL_PCI_P2PDMA))
-               return -EOPNOTSUPP;
-
        if (vma_is_secretmem(vma))
                return -EFAULT;
 
@@ -1354,11 +1350,6 @@ static __always_inline long __get_user_pages_locked(struct mm_struct *mm,
        long ret, pages_done;
        bool must_unlock = false;
 
-       if (locked) {
-               /* if VM_FAULT_RETRY can be returned, vmas become invalid */
-               BUG_ON(vmas);
-       }
-
        /*
         * The internal caller expects GUP to manage the lock internally and the
         * lock must be released when this returns.
@@ -2087,16 +2078,6 @@ static long __gup_longterm_locked(struct mm_struct *mm,
                return __get_user_pages_locked(mm, start, nr_pages, pages, vmas,
                                               locked, gup_flags);
 
-       /*
-        * If we get to this point then FOLL_LONGTERM is set, and FOLL_LONGTERM
-        * implies FOLL_PIN (although the reverse is not true). Therefore it is
-        * correct to unconditionally call check_and_migrate_movable_pages()
-        * which assumes pages have been pinned via FOLL_PIN.
-        *
-        * Enforce the above reasoning by asserting that FOLL_PIN is set.
-        */
-       if (WARN_ON(!(gup_flags & FOLL_PIN)))
-               return -EINVAL;
        flags = memalloc_pin_save();
        do {
                nr_pinned_pages = __get_user_pages_locked(mm, start, nr_pages,
@@ -2106,28 +2087,66 @@ static long __gup_longterm_locked(struct mm_struct *mm,
                        rc = nr_pinned_pages;
                        break;
                }
+
+               /* FOLL_LONGTERM implies FOLL_PIN */
                rc = check_and_migrate_movable_pages(nr_pinned_pages, pages);
        } while (rc == -EAGAIN);
        memalloc_pin_restore(flags);
        return rc ? rc : nr_pinned_pages;
 }
 
-static bool is_valid_gup_flags(unsigned int gup_flags)
+/*
+ * Check that the given flags are valid for the exported gup/pup interface, and
+ * update them with the required flags that the caller must have set.
+ */
+static bool is_valid_gup_args(struct page **pages, struct vm_area_struct **vmas,
+                             int *locked, unsigned int *gup_flags_p,
+                             unsigned int to_set)
 {
+       unsigned int gup_flags = *gup_flags_p;
+
        /*
-        * FOLL_PIN must only be set internally by the pin_user_pages*() APIs,
-        * never directly by the caller, so enforce that with an assertion:
+        * These flags not allowed to be specified externally to the gup
+        * interfaces:
+        * - FOLL_PIN/FOLL_TRIED/FOLL_FAST_ONLY are internal only
+        * - FOLL_REMOTE is internal only and used on follow_page()
         */
-       if (WARN_ON_ONCE(gup_flags & FOLL_PIN))
+       if (WARN_ON_ONCE(gup_flags & (FOLL_PIN | FOLL_TRIED |
+                                     FOLL_REMOTE | FOLL_FAST_ONLY)))
+               return false;
+
+       gup_flags |= to_set;
+
+       /* FOLL_GET and FOLL_PIN are mutually exclusive. */
+       if (WARN_ON_ONCE((gup_flags & (FOLL_PIN | FOLL_GET)) ==
+                        (FOLL_PIN | FOLL_GET)))
+               return false;
+
+       /* LONGTERM can only be specified when pinning */
+       if (WARN_ON_ONCE(!(gup_flags & FOLL_PIN) && (gup_flags & FOLL_LONGTERM)))
+               return false;
+
+       /* Pages input must be given if using GET/PIN */
+       if (WARN_ON_ONCE((gup_flags & (FOLL_GET | FOLL_PIN)) && !pages))
                return false;
+
+       /* At the external interface locked must be set */
+       if (WARN_ON_ONCE(locked && *locked != 1))
+               return false;
+
+       /* We want to allow the pgmap to be hot-unplugged at all times */
+       if (WARN_ON_ONCE((gup_flags & FOLL_LONGTERM) &&
+                        (gup_flags & FOLL_PCI_P2PDMA)))
+               return false;
+
        /*
-        * FOLL_PIN is a prerequisite to FOLL_LONGTERM. Another way of saying
-        * that is, FOLL_LONGTERM is a specific case, more restrictive case of
-        * FOLL_PIN.
+        * Can't use VMAs with locked, as locked allows GUP to unlock
+        * which invalidates the vmas array
         */
-       if (WARN_ON_ONCE(gup_flags & FOLL_LONGTERM))
+       if (WARN_ON_ONCE(vmas && locked))
                return false;
 
+       *gup_flags_p = gup_flags;
        return true;
 }
 
@@ -2197,11 +2216,12 @@ long get_user_pages_remote(struct mm_struct *mm,
                unsigned int gup_flags, struct page **pages,
                struct vm_area_struct **vmas, int *locked)
 {
-       if (!is_valid_gup_flags(gup_flags))
+       if (!is_valid_gup_args(pages, vmas, locked, &gup_flags,
+                              FOLL_TOUCH | FOLL_REMOTE))
                return -EINVAL;
 
        return __get_user_pages_locked(mm, start, nr_pages, pages, vmas, locked,
-                                      gup_flags | FOLL_TOUCH | FOLL_REMOTE);
+                                      gup_flags);
 }
 EXPORT_SYMBOL(get_user_pages_remote);
 
@@ -2235,11 +2255,11 @@ long get_user_pages(unsigned long start, unsigned long nr_pages,
                unsigned int gup_flags, struct page **pages,
                struct vm_area_struct **vmas)
 {
-       if (!is_valid_gup_flags(gup_flags))
+       if (!is_valid_gup_args(pages, vmas, NULL, &gup_flags, FOLL_TOUCH))
                return -EINVAL;
 
        return __get_user_pages_locked(current->mm, start, nr_pages, pages,
-                                      vmas, NULL, gup_flags | FOLL_TOUCH);
+                                      vmas, NULL, gup_flags);
 }
 EXPORT_SYMBOL(get_user_pages);
 
@@ -2263,8 +2283,11 @@ long get_user_pages_unlocked(unsigned long start, unsigned long nr_pages,
 {
        int locked = 0;
 
+       if (!is_valid_gup_args(pages, NULL, NULL, &gup_flags, FOLL_TOUCH))
+               return -EINVAL;
+
        return __get_user_pages_locked(current->mm, start, nr_pages, pages,
-                                      NULL, &locked, gup_flags | FOLL_TOUCH);
+                                      NULL, &locked, gup_flags);
 }
 EXPORT_SYMBOL(get_user_pages_unlocked);
 
@@ -2992,7 +3015,9 @@ int get_user_pages_fast_only(unsigned long start, int nr_pages,
         * FOLL_FAST_ONLY is required in order to match the API description of
         * this routine: no fall back to regular ("slow") GUP.
         */
-       gup_flags |= FOLL_GET | FOLL_FAST_ONLY;
+       if (!is_valid_gup_args(pages, NULL, NULL, &gup_flags,
+                              FOLL_GET | FOLL_FAST_ONLY))
+               return -EINVAL;
 
        nr_pinned = internal_get_user_pages_fast(start, nr_pages, gup_flags,
                                                 pages);
@@ -3029,16 +3054,14 @@ EXPORT_SYMBOL_GPL(get_user_pages_fast_only);
 int get_user_pages_fast(unsigned long start, int nr_pages,
                        unsigned int gup_flags, struct page **pages)
 {
-       if (!is_valid_gup_flags(gup_flags))
-               return -EINVAL;
-
        /*
         * The caller may or may not have explicitly set FOLL_GET; either way is
         * OK. However, internally (within mm/gup.c), gup fast variants must set
         * FOLL_GET, because gup fast is always a "pin with a +1 page refcount"
         * request.
         */
-       gup_flags |= FOLL_GET;
+       if (!is_valid_gup_args(pages, NULL, NULL, &gup_flags, FOLL_GET))
+               return -EINVAL;
        return internal_get_user_pages_fast(start, nr_pages, gup_flags, pages);
 }
 EXPORT_SYMBOL_GPL(get_user_pages_fast);
@@ -3062,14 +3085,8 @@ EXPORT_SYMBOL_GPL(get_user_pages_fast);
 int pin_user_pages_fast(unsigned long start, int nr_pages,
                        unsigned int gup_flags, struct page **pages)
 {
-       /* FOLL_GET and FOLL_PIN are mutually exclusive. */
-       if (WARN_ON_ONCE(gup_flags & FOLL_GET))
+       if (!is_valid_gup_args(pages, NULL, NULL, &gup_flags, FOLL_PIN))
                return -EINVAL;
-
-       if (WARN_ON_ONCE(!pages))
-               return -EINVAL;
-
-       gup_flags |= FOLL_PIN;
        return internal_get_user_pages_fast(start, nr_pages, gup_flags, pages);
 }
 EXPORT_SYMBOL_GPL(pin_user_pages_fast);
@@ -3086,19 +3103,13 @@ int pin_user_pages_fast_only(unsigned long start, int nr_pages,
        int nr_pinned;
 
        /*
-        * FOLL_GET and FOLL_PIN are mutually exclusive. Note that the API
-        * rules require returning 0, rather than -errno:
-        */
-       if (WARN_ON_ONCE(gup_flags & FOLL_GET))
-               return 0;
-
-       if (WARN_ON_ONCE(!pages))
-               return 0;
-       /*
         * FOLL_FAST_ONLY is required in order to match the API description of
         * this routine: no fall back to regular ("slow") GUP.
         */
-       gup_flags |= (FOLL_PIN | FOLL_FAST_ONLY);
+       if (!is_valid_gup_args(pages, NULL, NULL, &gup_flags,
+                              FOLL_PIN | FOLL_FAST_ONLY))
+               return 0;
+
        nr_pinned = internal_get_user_pages_fast(start, nr_pages, gup_flags,
                                                 pages);
        /*
@@ -3140,16 +3151,11 @@ long pin_user_pages_remote(struct mm_struct *mm,
                           unsigned int gup_flags, struct page **pages,
                           struct vm_area_struct **vmas, int *locked)
 {
-       /* FOLL_GET and FOLL_PIN are mutually exclusive. */
-       if (WARN_ON_ONCE(gup_flags & FOLL_GET))
-               return -EINVAL;
-
-       if (WARN_ON_ONCE(!pages))
-               return -EINVAL;
-
+       if (!is_valid_gup_args(pages, vmas, locked, &gup_flags,
+                              FOLL_PIN | FOLL_TOUCH | FOLL_REMOTE))
+               return 0;
        return __gup_longterm_locked(mm, start, nr_pages, pages, vmas, locked,
-                                    gup_flags | FOLL_PIN | FOLL_TOUCH |
-                                            FOLL_REMOTE);
+                                    gup_flags);
 }
 EXPORT_SYMBOL(pin_user_pages_remote);
 
@@ -3174,14 +3180,8 @@ long pin_user_pages(unsigned long start, unsigned long nr_pages,
                    unsigned int gup_flags, struct page **pages,
                    struct vm_area_struct **vmas)
 {
-       /* FOLL_GET and FOLL_PIN are mutually exclusive. */
-       if (WARN_ON_ONCE(gup_flags & FOLL_GET))
-               return -EINVAL;
-
-       if (WARN_ON_ONCE(!pages))
-               return -EINVAL;
-
-       gup_flags |= FOLL_PIN;
+       if (!is_valid_gup_args(pages, vmas, NULL, &gup_flags, FOLL_PIN))
+               return 0;
        return __gup_longterm_locked(current->mm, start, nr_pages,
                                     pages, vmas, NULL, gup_flags);
 }
@@ -3195,15 +3195,12 @@ EXPORT_SYMBOL(pin_user_pages);
 long pin_user_pages_unlocked(unsigned long start, unsigned long nr_pages,
                             struct page **pages, unsigned int gup_flags)
 {
-       /* FOLL_GET and FOLL_PIN are mutually exclusive. */
-       if (WARN_ON_ONCE(gup_flags & FOLL_GET))
-               return -EINVAL;
        int locked = 0;
 
-       if (WARN_ON_ONCE(!pages))
-               return -EINVAL;
+       if (!is_valid_gup_args(pages, NULL, NULL, &gup_flags,
+                              FOLL_PIN | FOLL_TOUCH))
+               return 0;
 
-       gup_flags |= FOLL_PIN | FOLL_TOUCH;
        return __gup_longterm_locked(current->mm, start, nr_pages, pages, NULL,
                                     &locked, gup_flags);
 }
index 1d6977d..1343a7d 100644 (file)
@@ -1042,11 +1042,6 @@ struct page *follow_devmap_pmd(struct vm_area_struct *vma, unsigned long addr,
 
        assert_spin_locked(pmd_lockptr(mm, pmd));
 
-       /* FOLL_GET and FOLL_PIN are mutually exclusive. */
-       if (WARN_ON_ONCE((flags & (FOLL_PIN | FOLL_GET)) ==
-                        (FOLL_PIN | FOLL_GET)))
-               return NULL;
-
        if (flags & FOLL_WRITE && !pmd_write(*pmd))
                return NULL;
 
@@ -1205,11 +1200,6 @@ struct page *follow_devmap_pud(struct vm_area_struct *vma, unsigned long addr,
        if (flags & FOLL_WRITE && !pud_write(*pud))
                return NULL;
 
-       /* FOLL_GET and FOLL_PIN are mutually exclusive. */
-       if (WARN_ON_ONCE((flags & (FOLL_PIN | FOLL_GET)) ==
-                        (FOLL_PIN | FOLL_GET)))
-               return NULL;
-
        if (pud_present(*pud) && pud_devmap(*pud))
                /* pass */;
        else