RISC-V: Fix wrong use of CONFIG_HAVE_SOFTIRQ_ON_OWN_STACK
[platform/kernel/linux-rpi.git] / mm / gup.c
index 76d222c..2f8a2d8 100644 (file)
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -597,7 +597,7 @@ static struct page *follow_page_pte(struct vm_area_struct *vma,
        pte = ptep_get(ptep);
        if (!pte_present(pte))
                goto no_page;
-       if (pte_protnone(pte) && !gup_can_follow_protnone(flags))
+       if (pte_protnone(pte) && !gup_can_follow_protnone(vma, flags))
                goto no_page;
 
        page = vm_normal_page(vma, address, pte);
@@ -714,7 +714,7 @@ static struct page *follow_pmd_mask(struct vm_area_struct *vma,
        if (likely(!pmd_trans_huge(pmdval)))
                return follow_page_pte(vma, address, pmd, flags, &ctx->pgmap);
 
-       if (pmd_protnone(pmdval) && !gup_can_follow_protnone(flags))
+       if (pmd_protnone(pmdval) && !gup_can_follow_protnone(vma, flags))
                return no_page_table(vma, flags);
 
        ptl = pmd_lock(mm, pmd);
@@ -811,7 +811,6 @@ static struct page *follow_page_mask(struct vm_area_struct *vma,
                              struct follow_page_context *ctx)
 {
        pgd_t *pgd;
-       struct page *page;
        struct mm_struct *mm = vma->vm_mm;
 
        ctx->page_mask = 0;
@@ -820,16 +819,10 @@ static struct page *follow_page_mask(struct vm_area_struct *vma,
         * Call hugetlb_follow_page_mask for hugetlb vmas as it will use
         * special hugetlb page table walking code.  This eliminates the
         * need to check for hugetlb entries in the general walking code.
-        *
-        * hugetlb_follow_page_mask is only for follow_page() handling here.
-        * Ordinary GUP uses follow_hugetlb_page for hugetlb processing.
         */
-       if (is_vm_hugetlb_page(vma)) {
-               page = hugetlb_follow_page_mask(vma, address, flags);
-               if (!page)
-                       page = no_page_table(vma, flags);
-               return page;
-       }
+       if (is_vm_hugetlb_page(vma))
+               return hugetlb_follow_page_mask(vma, address, flags,
+                                               &ctx->page_mask);
 
        pgd = pgd_offset(mm, address);
 
@@ -851,6 +844,10 @@ struct page *follow_page(struct vm_area_struct *vma, unsigned long address,
        if (WARN_ON_ONCE(foll_flags & FOLL_PIN))
                return NULL;
 
+       /*
+        * We never set FOLL_HONOR_NUMA_FAULT because callers don't expect
+        * to fail on PROT_NONE-mapped pages.
+        */
        page = follow_page_mask(vma, address, foll_flags, &ctx);
        if (ctx.pgmap)
                put_dev_pagemap(ctx.pgmap);
@@ -1054,7 +1051,7 @@ static int check_vma_flags(struct vm_area_struct *vma, unsigned long gup_flags)
                    !writable_file_mapping_allowed(vma, gup_flags))
                        return -EFAULT;
 
-               if (!(vm_flags & VM_WRITE)) {
+               if (!(vm_flags & VM_WRITE) || (vm_flags & VM_SHADOW_STACK)) {
                        if (!(gup_flags & FOLL_FORCE))
                                return -EFAULT;
                        /* hugetlb does not support FOLL_FORCE|FOLL_WRITE. */
@@ -1211,7 +1208,7 @@ static long __get_user_pages(struct mm_struct *mm,
                        if (!vma && in_gate_area(mm, start)) {
                                ret = get_gate_page(mm, start & PAGE_MASK,
                                                gup_flags, &vma,
-                                               pages ? &pages[i] : NULL);
+                                               pages ? &page : NULL);
                                if (ret)
                                        goto out;
                                ctx.page_mask = 0;
@@ -1225,22 +1222,6 @@ static long __get_user_pages(struct mm_struct *mm,
                        ret = check_vma_flags(vma, gup_flags);
                        if (ret)
                                goto out;
-
-                       if (is_vm_hugetlb_page(vma)) {
-                               i = follow_hugetlb_page(mm, vma, pages,
-                                                       &start, &nr_pages, i,
-                                                       gup_flags, locked);
-                               if (!*locked) {
-                                       /*
-                                        * We've got a VM_FAULT_RETRY
-                                        * and we've lost mmap_lock.
-                                        * We must stop here.
-                                        */
-                                       BUG_ON(gup_flags & FOLL_NOWAIT);
-                                       goto out;
-                               }
-                               continue;
-                       }
                }
 retry:
                /*
@@ -1281,22 +1262,58 @@ retry:
                                ret = PTR_ERR(page);
                                goto out;
                        }
-
-                       goto next_page;
                } else if (IS_ERR(page)) {
                        ret = PTR_ERR(page);
                        goto out;
                }
-               if (pages) {
-                       pages[i] = page;
-                       flush_anon_page(vma, page, start);
-                       flush_dcache_page(page);
-                       ctx.page_mask = 0;
-               }
 next_page:
                page_increm = 1 + (~(start >> PAGE_SHIFT) & ctx.page_mask);
                if (page_increm > nr_pages)
                        page_increm = nr_pages;
+
+               if (pages) {
+                       struct page *subpage;
+                       unsigned int j;
+
+                       /*
+                        * This must be a large folio (and doesn't need to
+                        * be the whole folio; it can be part of it), do
+                        * the refcount work for all the subpages too.
+                        *
+                        * NOTE: here the page may not be the head page
+                        * e.g. when start addr is not thp-size aligned.
+                        * try_grab_folio() should have taken care of tail
+                        * pages.
+                        */
+                       if (page_increm > 1) {
+                               struct folio *folio;
+
+                               /*
+                                * Since we already hold refcount on the
+                                * large folio, this should never fail.
+                                */
+                               folio = try_grab_folio(page, page_increm - 1,
+                                                      foll_flags);
+                               if (WARN_ON_ONCE(!folio)) {
+                                       /*
+                                        * Release the 1st page ref if the
+                                        * folio is problematic, fail hard.
+                                        */
+                                       gup_put_folio(page_folio(page), 1,
+                                                     foll_flags);
+                                       ret = -EFAULT;
+                                       goto out;
+                               }
+                       }
+
+                       for (j = 0; j < page_increm; j++) {
+                               subpage = nth_page(page, j);
+                               pages[i + j] = subpage;
+                               flush_anon_page(vma, subpage, start + j * PAGE_SIZE);
+                               flush_dcache_page(subpage);
+                       }
+               }
+
                i += page_increm;
                start += page_increm * PAGE_SIZE;
                nr_pages -= page_increm;
@@ -2551,7 +2568,14 @@ static int gup_pte_range(pmd_t pmd, pmd_t *pmdp, unsigned long addr,
                struct page *page;
                struct folio *folio;
 
-               if (pte_protnone(pte) && !gup_can_follow_protnone(flags))
+               /*
+                * Always fallback to ordinary GUP on PROT_NONE-mapped pages:
+                * pte_access_permitted() better should reject these pages
+                * either way: otherwise, GUP-fast might succeed in
+                * cases where ordinary GUP would fail due to VMA access
+                * permissions.
+                */
+               if (pte_protnone(pte))
                        goto pte_unmap;
 
                if (!pte_access_permitted(pte, flags & FOLL_WRITE))
@@ -2576,7 +2600,7 @@ static int gup_pte_range(pmd_t pmd, pmd_t *pmdp, unsigned long addr,
                if (!folio)
                        goto pte_unmap;
 
-               if (unlikely(page_is_secretmem(page))) {
+               if (unlikely(folio_is_secretmem(folio))) {
                        gup_put_folio(folio, 1, flags);
                        goto pte_unmap;
                }
@@ -2970,8 +2994,8 @@ static int gup_pmd_range(pud_t *pudp, pud_t pud, unsigned long addr, unsigned lo
 
                if (unlikely(pmd_trans_huge(pmd) || pmd_huge(pmd) ||
                             pmd_devmap(pmd))) {
-                       if (pmd_protnone(pmd) &&
-                           !gup_can_follow_protnone(flags))
+                       /* See gup_pte_range() */
+                       if (pmd_protnone(pmd))
                                return 0;
 
                        if (!gup_huge_pmd(pmd, pmdp, addr, next, flags,
@@ -3151,7 +3175,7 @@ static int internal_get_user_pages_fast(unsigned long start,
        if (WARN_ON_ONCE(gup_flags & ~(FOLL_WRITE | FOLL_LONGTERM |
                                       FOLL_FORCE | FOLL_PIN | FOLL_GET |
                                       FOLL_FAST_ONLY | FOLL_NOFAULT |
-                                      FOLL_PCI_P2PDMA)))
+                                      FOLL_PCI_P2PDMA | FOLL_HONOR_NUMA_FAULT)))
                return -EINVAL;
 
        if (gup_flags & FOLL_PIN)