Merge tag 'nvme-6.5-2023-06-30' of git://git.infradead.org/nvme into block-6.5
[platform/kernel/linux-starfive.git] / mm / hugetlb.c
index f154019..bce28cc 100644 (file)
@@ -1489,7 +1489,6 @@ static void __destroy_compound_gigantic_folio(struct folio *folio,
                        set_page_refcounted(p);
        }
 
-       folio_set_order(folio, 0);
        __folio_clear_head(folio);
 }
 
@@ -1951,9 +1950,6 @@ static bool __prep_compound_gigantic_folio(struct folio *folio,
        struct page *p;
 
        __folio_clear_reserved(folio);
-       __folio_set_head(folio);
-       /* we rely on prep_new_hugetlb_folio to set the destructor */
-       folio_set_order(folio, order);
        for (i = 0; i < nr_pages; i++) {
                p = folio_page(folio, i);
 
@@ -1999,6 +1995,9 @@ static bool __prep_compound_gigantic_folio(struct folio *folio,
                if (i != 0)
                        set_compound_head(p, &folio->page);
        }
+       __folio_set_head(folio);
+       /* we rely on prep_new_hugetlb_folio to set the destructor */
+       folio_set_order(folio, order);
        atomic_set(&folio->_entire_mapcount, -1);
        atomic_set(&folio->_nr_pages_mapped, 0);
        atomic_set(&folio->_pincount, 0);
@@ -2017,8 +2016,6 @@ out_error:
                p = folio_page(folio, j);
                __ClearPageReserved(p);
        }
-       folio_set_order(folio, 0);
-       __folio_clear_head(folio);
        return false;
 }
 
@@ -5016,7 +5013,7 @@ int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src,
                            struct vm_area_struct *src_vma)
 {
        pte_t *src_pte, *dst_pte, entry;
-       struct page *ptepage;
+       struct folio *pte_folio;
        unsigned long addr;
        bool cow = is_cow_mapping(src_vma->vm_flags);
        struct hstate *h = hstate_vma(src_vma);
@@ -5115,8 +5112,8 @@ again:
                                set_huge_pte_at(dst, addr, dst_pte, entry);
                } else {
                        entry = huge_ptep_get(src_pte);
-                       ptepage = pte_page(entry);
-                       get_page(ptepage);
+                       pte_folio = page_folio(pte_page(entry));
+                       folio_get(pte_folio);
 
                        /*
                         * Failing to duplicate the anon rmap is a rare case
@@ -5128,10 +5125,10 @@ again:
                         * need to be without the pgtable locks since we could
                         * sleep during the process.
                         */
-                       if (!PageAnon(ptepage)) {
-                               page_dup_file_rmap(ptepage, true);
-                       } else if (page_try_dup_anon_rmap(ptepage, true,
-                                                         src_vma)) {
+                       if (!folio_test_anon(pte_folio)) {
+                               page_dup_file_rmap(&pte_folio->page, true);
+                       } else if (page_try_dup_anon_rmap(&pte_folio->page,
+                                                         true, src_vma)) {
                                pte_t src_pte_old = entry;
                                struct folio *new_folio;
 
@@ -5140,14 +5137,14 @@ again:
                                /* Do not use reserve as it's private owned */
                                new_folio = alloc_hugetlb_folio(dst_vma, addr, 1);
                                if (IS_ERR(new_folio)) {
-                                       put_page(ptepage);
+                                       folio_put(pte_folio);
                                        ret = PTR_ERR(new_folio);
                                        break;
                                }
                                ret = copy_user_large_folio(new_folio,
-                                                     page_folio(ptepage),
-                                                     addr, dst_vma);
-                               put_page(ptepage);
+                                                           pte_folio,
+                                                           addr, dst_vma);
+                               folio_put(pte_folio);
                                if (ret) {
                                        folio_put(new_folio);
                                        break;
@@ -5540,7 +5537,7 @@ static vm_fault_t hugetlb_wp(struct mm_struct *mm, struct vm_area_struct *vma,
        const bool unshare = flags & FAULT_FLAG_UNSHARE;
        pte_t pte = huge_ptep_get(ptep);
        struct hstate *h = hstate_vma(vma);
-       struct page *old_page;
+       struct folio *old_folio;
        struct folio *new_folio;
        int outside_reserve = 0;
        vm_fault_t ret = 0;
@@ -5571,7 +5568,7 @@ static vm_fault_t hugetlb_wp(struct mm_struct *mm, struct vm_area_struct *vma,
                return 0;
        }
 
-       old_page = pte_page(pte);
+       old_folio = page_folio(pte_page(pte));
 
        delayacct_wpcopy_start();
 
@@ -5580,17 +5577,17 @@ retry_avoidcopy:
         * If no-one else is actually using this page, we're the exclusive
         * owner and can reuse this page.
         */
-       if (page_mapcount(old_page) == 1 && PageAnon(old_page)) {
-               if (!PageAnonExclusive(old_page))
-                       page_move_anon_rmap(old_page, vma);
+       if (folio_mapcount(old_folio) == 1 && folio_test_anon(old_folio)) {
+               if (!PageAnonExclusive(&old_folio->page))
+                       page_move_anon_rmap(&old_folio->page, vma);
                if (likely(!unshare))
                        set_huge_ptep_writable(vma, haddr, ptep);
 
                delayacct_wpcopy_end();
                return 0;
        }
-       VM_BUG_ON_PAGE(PageAnon(old_page) && PageAnonExclusive(old_page),
-                      old_page);
+       VM_BUG_ON_PAGE(folio_test_anon(old_folio) &&
+                      PageAnonExclusive(&old_folio->page), &old_folio->page);
 
        /*
         * If the process that created a MAP_PRIVATE mapping is about to
@@ -5602,10 +5599,10 @@ retry_avoidcopy:
         * of the full address range.
         */
        if (is_vma_resv_set(vma, HPAGE_RESV_OWNER) &&
-                       page_folio(old_page) != pagecache_folio)
+                       old_folio != pagecache_folio)
                outside_reserve = 1;
 
-       get_page(old_page);
+       folio_get(old_folio);
 
        /*
         * Drop page table lock as buddy allocator may be called. It will
@@ -5627,7 +5624,7 @@ retry_avoidcopy:
                        pgoff_t idx;
                        u32 hash;
 
-                       put_page(old_page);
+                       folio_put(old_folio);
                        /*
                         * Drop hugetlb_fault_mutex and vma_lock before
                         * unmapping.  unmapping needs to hold vma_lock
@@ -5642,7 +5639,7 @@ retry_avoidcopy:
                        hugetlb_vma_unlock_read(vma);
                        mutex_unlock(&hugetlb_fault_mutex_table[hash]);
 
-                       unmap_ref_private(mm, vma, old_page, haddr);
+                       unmap_ref_private(mm, vma, &old_folio->page, haddr);
 
                        mutex_lock(&hugetlb_fault_mutex_table[hash]);
                        hugetlb_vma_lock_read(vma);
@@ -5672,7 +5669,7 @@ retry_avoidcopy:
                goto out_release_all;
        }
 
-       if (copy_user_large_folio(new_folio, page_folio(old_page), address, vma)) {
+       if (copy_user_large_folio(new_folio, old_folio, address, vma)) {
                ret = VM_FAULT_HWPOISON_LARGE;
                goto out_release_all;
        }
@@ -5694,14 +5691,14 @@ retry_avoidcopy:
                /* Break COW or unshare */
                huge_ptep_clear_flush(vma, haddr, ptep);
                mmu_notifier_invalidate_range(mm, range.start, range.end);
-               page_remove_rmap(old_page, vma, true);
+               page_remove_rmap(&old_folio->page, vma, true);
                hugepage_add_new_anon_rmap(new_folio, vma, haddr);
                if (huge_pte_uffd_wp(pte))
                        newpte = huge_pte_mkuffd_wp(newpte);
                set_huge_pte_at(mm, haddr, ptep, newpte);
                folio_set_hugetlb_migratable(new_folio);
                /* Make the old page be freed below */
-               new_folio = page_folio(old_page);
+               new_folio = old_folio;
        }
        spin_unlock(ptl);
        mmu_notifier_invalidate_range_end(&range);
@@ -5710,11 +5707,11 @@ out_release_all:
         * No restore in case of successful pagetable update (Break COW or
         * unshare)
         */
-       if (new_folio != page_folio(old_page))
+       if (new_folio != old_folio)
                restore_reserve_on_error(h, vma, haddr, new_folio);
        folio_put(new_folio);
 out_release_old:
-       put_page(old_page);
+       folio_put(old_folio);
 
        spin_lock(ptl); /* Caller expects lock to be held */
 
@@ -5731,13 +5728,13 @@ static bool hugetlbfs_pagecache_present(struct hstate *h,
 {
        struct address_space *mapping = vma->vm_file->f_mapping;
        pgoff_t idx = vma_hugecache_offset(h, vma, address);
-       bool present;
-
-       rcu_read_lock();
-       present = page_cache_next_miss(mapping, idx, 1) != idx;
-       rcu_read_unlock();
+       struct folio *folio;
 
-       return present;
+       folio = filemap_get_folio(mapping, idx);
+       if (IS_ERR(folio))
+               return false;
+       folio_put(folio);
+       return true;
 }
 
 int hugetlb_add_to_page_cache(struct folio *folio, struct address_space *mapping,
@@ -6062,7 +6059,7 @@ vm_fault_t hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma,
        vm_fault_t ret;
        u32 hash;
        pgoff_t idx;
-       struct page *page = NULL;
+       struct folio *folio = NULL;
        struct folio *pagecache_folio = NULL;
        struct hstate *h = hstate_vma(vma);
        struct address_space *mapping;
@@ -6179,16 +6176,16 @@ vm_fault_t hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma,
        /*
         * hugetlb_wp() requires page locks of pte_page(entry) and
         * pagecache_folio, so here we need take the former one
-        * when page != pagecache_folio or !pagecache_folio.
+        * when folio != pagecache_folio or !pagecache_folio.
         */
-       page = pte_page(entry);
-       if (page_folio(page) != pagecache_folio)
-               if (!trylock_page(page)) {
+       folio = page_folio(pte_page(entry));
+       if (folio != pagecache_folio)
+               if (!folio_trylock(folio)) {
                        need_wait_lock = 1;
                        goto out_ptl;
                }
 
-       get_page(page);
+       folio_get(folio);
 
        if (flags & (FAULT_FLAG_WRITE|FAULT_FLAG_UNSHARE)) {
                if (!huge_pte_write(entry)) {
@@ -6204,9 +6201,9 @@ vm_fault_t hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma,
                                                flags & FAULT_FLAG_WRITE))
                update_mmu_cache(vma, haddr, ptep);
 out_put_page:
-       if (page_folio(page) != pagecache_folio)
-               unlock_page(page);
-       put_page(page);
+       if (folio != pagecache_folio)
+               folio_unlock(folio);
+       folio_put(folio);
 out_ptl:
        spin_unlock(ptl);
 
@@ -6225,7 +6222,7 @@ out_mutex:
         * here without taking refcount.
         */
        if (need_wait_lock)
-               wait_on_page_locked(page);
+               folio_wait_locked(folio);
        return ret;
 }
 
@@ -6425,17 +6422,14 @@ out_release_nounlock:
 }
 #endif /* CONFIG_USERFAULTFD */
 
-static void record_subpages_vmas(struct page *page, struct vm_area_struct *vma,
-                                int refs, struct page **pages,
-                                struct vm_area_struct **vmas)
+static void record_subpages(struct page *page, struct vm_area_struct *vma,
+                           int refs, struct page **pages)
 {
        int nr;
 
        for (nr = 0; nr < refs; nr++) {
                if (likely(pages))
                        pages[nr] = nth_page(page, nr);
-               if (vmas)
-                       vmas[nr] = vma;
        }
 }
 
@@ -6508,9 +6502,9 @@ out_unlock:
 }
 
 long follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma,
-                        struct page **pages, struct vm_area_struct **vmas,
-                        unsigned long *position, unsigned long *nr_pages,
-                        long i, unsigned int flags, int *locked)
+                        struct page **pages, unsigned long *position,
+                        unsigned long *nr_pages, long i, unsigned int flags,
+                        int *locked)
 {
        unsigned long pfn_offset;
        unsigned long vaddr = *position;
@@ -6638,7 +6632,7 @@ long follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma,
                 * If subpage information not requested, update counters
                 * and skip the same_page loop below.
                 */
-               if (!pages && !vmas && !pfn_offset &&
+               if (!pages && !pfn_offset &&
                    (vaddr + huge_page_size(h) < vma->vm_end) &&
                    (remainder >= pages_per_huge_page(h))) {
                        vaddr += huge_page_size(h);
@@ -6653,11 +6647,10 @@ long follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma,
                refs = min3(pages_per_huge_page(h) - pfn_offset, remainder,
                    (vma->vm_end - ALIGN_DOWN(vaddr, PAGE_SIZE)) >> PAGE_SHIFT);
 
-               if (pages || vmas)
-                       record_subpages_vmas(nth_page(page, pfn_offset),
-                                            vma, refs,
-                                            likely(pages) ? pages + i : NULL,
-                                            vmas ? vmas + i : NULL);
+               if (pages)
+                       record_subpages(nth_page(page, pfn_offset),
+                                       vma, refs,
+                                       likely(pages) ? pages + i : NULL);
 
                if (pages) {
                        /*
@@ -7137,7 +7130,6 @@ pte_t *huge_pmd_share(struct mm_struct *mm, struct vm_area_struct *vma,
        unsigned long saddr;
        pte_t *spte = NULL;
        pte_t *pte;
-       spinlock_t *ptl;
 
        i_mmap_lock_read(mapping);
        vma_interval_tree_foreach(svma, &mapping->i_mmap, idx, idx) {
@@ -7158,7 +7150,7 @@ pte_t *huge_pmd_share(struct mm_struct *mm, struct vm_area_struct *vma,
        if (!spte)
                goto out;
 
-       ptl = huge_pte_lock(hstate_vma(vma), mm, spte);
+       spin_lock(&mm->page_table_lock);
        if (pud_none(*pud)) {
                pud_populate(mm, pud,
                                (pmd_t *)((unsigned long)spte & PAGE_MASK));
@@ -7166,7 +7158,7 @@ pte_t *huge_pmd_share(struct mm_struct *mm, struct vm_area_struct *vma,
        } else {
                put_page(virt_to_page(spte));
        }
-       spin_unlock(ptl);
+       spin_unlock(&mm->page_table_lock);
 out:
        pte = (pte_t *)pmd_alloc(mm, pud, addr);
        i_mmap_unlock_read(mapping);
@@ -7254,7 +7246,7 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, struct vm_area_struct *vma,
                                pte = (pte_t *)pmd_alloc(mm, pud, addr);
                }
        }
-       BUG_ON(pte && pte_present(*pte) && !pte_huge(*pte));
+       BUG_ON(pte && pte_present(ptep_get(pte)) && !pte_huge(ptep_get(pte)));
 
        return pte;
 }