mm: Convert page_vma_mapped_walk to work on PFNs
authorMatthew Wilcox (Oracle) <willy@infradead.org>
Thu, 3 Feb 2022 16:40:17 +0000 (11:40 -0500)
committerMatthew Wilcox (Oracle) <willy@infradead.org>
Mon, 21 Mar 2022 16:59:02 +0000 (12:59 -0400)
page_mapped_in_vma() really just wants to walk one page, but as the
code stands, if passed the head page of a compound page, it will
walk every page in the compound page.  Extract pfn/nr_pages/pgoff
from the struct page early, so they can be overridden by
page_mapped_in_vma().

Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
include/linux/hugetlb.h
include/linux/rmap.h
mm/internal.h
mm/migrate.c
mm/page_vma_mapped.c
mm/rmap.c

index d1897a6..6ba2f8e 100644 (file)
@@ -970,6 +970,11 @@ static inline struct hstate *page_hstate(struct page *page)
        return NULL;
 }
 
+static inline struct hstate *size_to_hstate(unsigned long size)
+{
+       return NULL;
+}
+
 static inline unsigned long huge_page_size(struct hstate *h)
 {
        return PAGE_SIZE;
index 0d894a2..0c838ba 100644 (file)
@@ -11,6 +11,7 @@
 #include <linux/rwsem.h>
 #include <linux/memcontrol.h>
 #include <linux/highmem.h>
+#include <linux/pagemap.h>
 
 /*
  * The anon_vma heads a list of private "related" vmas, to scan if
@@ -201,11 +202,13 @@ int make_device_exclusive_range(struct mm_struct *mm, unsigned long start,
 
 /* Avoid racy checks */
 #define PVMW_SYNC              (1 << 0)
-/* Look for migarion entries rather than present PTEs */
+/* Look for migration entries rather than present PTEs */
 #define PVMW_MIGRATION         (1 << 1)
 
 struct page_vma_mapped_walk {
-       struct page *page;
+       unsigned long pfn;
+       unsigned long nr_pages;
+       pgoff_t pgoff;
        struct vm_area_struct *vma;
        unsigned long address;
        pmd_t *pmd;
@@ -216,7 +219,9 @@ struct page_vma_mapped_walk {
 
 #define DEFINE_PAGE_VMA_WALK(name, _page, _vma, _address, _flags)      \
        struct page_vma_mapped_walk name = {                            \
-               .page = _page,                                          \
+               .pfn = page_to_pfn(_page),                              \
+               .nr_pages = compound_nr(page),                          \
+               .pgoff = page_to_pgoff(page),                           \
                .vma = _vma,                                            \
                .address = _address,                                    \
                .flags = _flags,                                        \
@@ -224,7 +229,9 @@ struct page_vma_mapped_walk {
 
 #define DEFINE_FOLIO_VMA_WALK(name, _folio, _vma, _address, _flags)    \
        struct page_vma_mapped_walk name = {                            \
-               .page = &_folio->page,                                  \
+               .pfn = folio_pfn(_folio),                               \
+               .nr_pages = folio_nr_pages(_folio),                     \
+               .pgoff = folio_pgoff(_folio),                           \
                .vma = _vma,                                            \
                .address = _address,                                    \
                .flags = _flags,                                        \
@@ -233,7 +240,7 @@ struct page_vma_mapped_walk {
 static inline void page_vma_mapped_walk_done(struct page_vma_mapped_walk *pvmw)
 {
        /* HugeTLB pte is set to the relevant page table entry without pte_mapped. */
-       if (pvmw->pte && !PageHuge(pvmw->page))
+       if (pvmw->pte && !is_vm_hugetlb_page(pvmw->vma))
                pte_unmap(pvmw->pte);
        if (pvmw->ptl)
                spin_unlock(pvmw->ptl);
index 6047268..3b65244 100644 (file)
@@ -10,6 +10,7 @@
 #include <linux/fs.h>
 #include <linux/mm.h>
 #include <linux/pagemap.h>
+#include <linux/rmap.h>
 #include <linux/tracepoint-defs.h>
 
 struct folio_batch;
@@ -475,18 +476,20 @@ vma_address(struct page *page, struct vm_area_struct *vma)
 }
 
 /*
- * Then at what user virtual address will none of the page be found in vma?
+ * Then at what user virtual address will none of the range be found in vma?
  * Assumes that vma_address() already returned a good starting address.
- * If page is a compound head, the entire compound page is considered.
  */
-static inline unsigned long
-vma_address_end(struct page *page, struct vm_area_struct *vma)
+static inline unsigned long vma_address_end(struct page_vma_mapped_walk *pvmw)
 {
+       struct vm_area_struct *vma = pvmw->vma;
        pgoff_t pgoff;
        unsigned long address;
 
-       VM_BUG_ON_PAGE(PageKsm(page), page);    /* KSM page->index unusable */
-       pgoff = page_to_pgoff(page) + compound_nr(page);
+       /* Common case, plus ->pgoff is invalid for KSM */
+       if (pvmw->nr_pages == 1)
+               return pvmw->address + PAGE_SIZE;
+
+       pgoff = pvmw->pgoff + pvmw->nr_pages;
        address = vma->vm_start + ((pgoff - vma->vm_pgoff) << PAGE_SHIFT);
        /* Check for address beyond vma (or wrapped through 0?) */
        if (address < vma->vm_start || address > vma->vm_end)
index 71f92e8..358bc31 100644 (file)
@@ -174,7 +174,8 @@ void putback_movable_pages(struct list_head *l)
 static bool remove_migration_pte(struct page *page, struct vm_area_struct *vma,
                                 unsigned long addr, void *old)
 {
-       DEFINE_PAGE_VMA_WALK(pvmw, old, vma, addr, PVMW_SYNC | PVMW_MIGRATION);
+       DEFINE_PAGE_VMA_WALK(pvmw, (struct page *)old, vma, addr,
+                               PVMW_SYNC | PVMW_MIGRATION);
        struct page *new;
        pte_t pte;
        swp_entry_t entry;
@@ -184,7 +185,7 @@ static bool remove_migration_pte(struct page *page, struct vm_area_struct *vma,
                if (PageKsm(page))
                        new = page;
                else
-                       new = page - pvmw.page->index +
+                       new = page - pvmw.pgoff +
                                linear_page_index(vma, pvmw.address);
 
 #ifdef CONFIG_ARCH_ENABLE_THP_MIGRATION
index f7b3310..1187f9c 100644 (file)
@@ -53,18 +53,6 @@ static bool map_pte(struct page_vma_mapped_walk *pvmw)
        return true;
 }
 
-static inline bool pfn_is_match(struct page *page, unsigned long pfn)
-{
-       unsigned long page_pfn = page_to_pfn(page);
-
-       /* normal page and hugetlbfs page */
-       if (!PageTransCompound(page) || PageHuge(page))
-               return page_pfn == pfn;
-
-       /* THP can be referenced by any subpage */
-       return pfn >= page_pfn && pfn - page_pfn < thp_nr_pages(page);
-}
-
 /**
  * check_pte - check if @pvmw->page is mapped at the @pvmw->pte
  * @pvmw: page_vma_mapped_walk struct, includes a pair pte and page for checking
@@ -116,7 +104,17 @@ static bool check_pte(struct page_vma_mapped_walk *pvmw)
                pfn = pte_pfn(*pvmw->pte);
        }
 
-       return pfn_is_match(pvmw->page, pfn);
+       return (pfn - pvmw->pfn) < pvmw->nr_pages;
+}
+
+/* Returns true if the two ranges overlap.  Careful to not overflow. */
+static bool check_pmd(unsigned long pfn, struct page_vma_mapped_walk *pvmw)
+{
+       if ((pfn + HPAGE_PMD_NR - 1) < pvmw->pfn)
+               return false;
+       if (pfn > pvmw->pfn + pvmw->nr_pages - 1)
+               return false;
+       return true;
 }
 
 static void step_forward(struct page_vma_mapped_walk *pvmw, unsigned long size)
@@ -127,7 +125,7 @@ static void step_forward(struct page_vma_mapped_walk *pvmw, unsigned long size)
 }
 
 /**
- * page_vma_mapped_walk - check if @pvmw->page is mapped in @pvmw->vma at
+ * page_vma_mapped_walk - check if @pvmw->pfn is mapped in @pvmw->vma at
  * @pvmw->address
  * @pvmw: pointer to struct page_vma_mapped_walk. page, vma, address and flags
  * must be set. pmd, pte and ptl must be NULL.
@@ -152,8 +150,8 @@ static void step_forward(struct page_vma_mapped_walk *pvmw, unsigned long size)
  */
 bool page_vma_mapped_walk(struct page_vma_mapped_walk *pvmw)
 {
-       struct mm_struct *mm = pvmw->vma->vm_mm;
-       struct page *page = pvmw->page;
+       struct vm_area_struct *vma = pvmw->vma;
+       struct mm_struct *mm = vma->vm_mm;
        unsigned long end;
        pgd_t *pgd;
        p4d_t *p4d;
@@ -164,32 +162,26 @@ bool page_vma_mapped_walk(struct page_vma_mapped_walk *pvmw)
        if (pvmw->pmd && !pvmw->pte)
                return not_found(pvmw);
 
-       if (unlikely(PageHuge(page))) {
+       if (unlikely(is_vm_hugetlb_page(vma))) {
+               unsigned long size = pvmw->nr_pages * PAGE_SIZE;
                /* The only possible mapping was handled on last iteration */
                if (pvmw->pte)
                        return not_found(pvmw);
 
                /* when pud is not present, pte will be NULL */
-               pvmw->pte = huge_pte_offset(mm, pvmw->address, page_size(page));
+               pvmw->pte = huge_pte_offset(mm, pvmw->address, size);
                if (!pvmw->pte)
                        return false;
 
-               pvmw->ptl = huge_pte_lockptr(page_hstate(page), mm, pvmw->pte);
+               pvmw->ptl = huge_pte_lockptr(size_to_hstate(size), mm,
+                                               pvmw->pte);
                spin_lock(pvmw->ptl);
                if (!check_pte(pvmw))
                        return not_found(pvmw);
                return true;
        }
 
-       /*
-        * Seek to next pte only makes sense for THP.
-        * But more important than that optimization, is to filter out
-        * any PageKsm page: whose page->index misleads vma_address()
-        * and vma_address_end() to disaster.
-        */
-       end = PageTransCompound(page) ?
-               vma_address_end(page, pvmw->vma) :
-               pvmw->address + PAGE_SIZE;
+       end = vma_address_end(pvmw);
        if (pvmw->pte)
                goto next_pte;
 restart:
@@ -224,7 +216,7 @@ restart:
                        if (likely(pmd_trans_huge(pmde))) {
                                if (pvmw->flags & PVMW_MIGRATION)
                                        return not_found(pvmw);
-                               if (pmd_page(pmde) != page)
+                               if (!check_pmd(pmd_pfn(pmde), pvmw))
                                        return not_found(pvmw);
                                return true;
                        }
@@ -236,7 +228,7 @@ restart:
                                        return not_found(pvmw);
                                entry = pmd_to_swp_entry(pmde);
                                if (!is_migration_entry(entry) ||
-                                   pfn_swap_entry_to_page(entry) != page)
+                                   !check_pmd(swp_offset(entry), pvmw))
                                        return not_found(pvmw);
                                return true;
                        }
@@ -250,7 +242,8 @@ restart:
                         * cleared *pmd but not decremented compound_mapcount().
                         */
                        if ((pvmw->flags & PVMW_SYNC) &&
-                           PageTransCompound(page)) {
+                           transparent_hugepage_active(vma) &&
+                           (pvmw->nr_pages >= HPAGE_PMD_NR)) {
                                spinlock_t *ptl = pmd_lock(mm, pvmw->pmd);
 
                                spin_unlock(ptl);
@@ -307,7 +300,8 @@ next_pte:
 int page_mapped_in_vma(struct page *page, struct vm_area_struct *vma)
 {
        struct page_vma_mapped_walk pvmw = {
-               .page = page,
+               .pfn = page_to_pfn(page),
+               .nr_pages = 1,
                .vma = vma,
                .flags = PVMW_SYNC,
        };
index a7f06b7..e27ba41 100644 (file)
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -940,7 +940,7 @@ static bool page_mkclean_one(struct page *page, struct vm_area_struct *vma,
         */
        mmu_notifier_range_init(&range, MMU_NOTIFY_PROTECTION_PAGE,
                                0, vma, vma->vm_mm, address,
-                               vma_address_end(page, vma));
+                               vma_address_end(&pvmw));
        mmu_notifier_invalidate_range_start(&range);
 
        while (page_vma_mapped_walk(&pvmw)) {
@@ -1437,8 +1437,7 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
         * Note that the page can not be free in this function as call of
         * try_to_unmap() must hold a reference on the page.
         */
-       range.end = PageKsm(page) ?
-                       address + PAGE_SIZE : vma_address_end(page, vma);
+       range.end = vma_address_end(&pvmw);
        mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, vma, vma->vm_mm,
                                address, range.end);
        if (PageHuge(page)) {
@@ -1732,8 +1731,7 @@ static bool try_to_migrate_one(struct page *page, struct vm_area_struct *vma,
         * Note that the page can not be free in this function as call of
         * try_to_unmap() must hold a reference on the page.
         */
-       range.end = PageKsm(page) ?
-                       address + PAGE_SIZE : vma_address_end(page, vma);
+       range.end = vma_address_end(&pvmw);
        mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, vma, vma->vm_mm,
                                address, range.end);
        if (PageHuge(page)) {