WIP: update tizen_qemu_defconfig
[platform/kernel/linux-starfive.git] / mm / khugepaged.c
index 3703a56..ef72d3d 100644 (file)
@@ -561,6 +561,10 @@ static int __collapse_huge_page_isolate(struct vm_area_struct *vma,
                        result = SCAN_PTE_NON_PRESENT;
                        goto out;
                }
+               if (pte_uffd_wp(pteval)) {
+                       result = SCAN_PTE_UFFD_WP;
+                       goto out;
+               }
                page = vm_normal_page(vma, address, pteval);
                if (unlikely(!page) || unlikely(is_zone_device_page(page))) {
                        result = SCAN_PAGE_NULL;
@@ -847,6 +851,10 @@ static int hugepage_vma_revalidate(struct mm_struct *mm, unsigned long address,
        return SCAN_SUCCEED;
 }
 
+/*
+ * See pmd_trans_unstable() for how the result may change out from
+ * underneath us, even if we hold mmap_lock in read.
+ */
 static int find_pmd_or_thp_or_none(struct mm_struct *mm,
                                   unsigned long address,
                                   pmd_t **pmd)
@@ -865,8 +873,12 @@ static int find_pmd_or_thp_or_none(struct mm_struct *mm,
 #endif
        if (pmd_none(pmde))
                return SCAN_PMD_NONE;
+       if (!pmd_present(pmde))
+               return SCAN_PMD_NULL;
        if (pmd_trans_huge(pmde))
                return SCAN_PMD_MAPPED;
+       if (pmd_devmap(pmde))
+               return SCAN_PMD_NULL;
        if (pmd_bad(pmde))
                return SCAN_PMD_NULL;
        return SCAN_SUCCEED;
@@ -1467,14 +1479,6 @@ int collapse_pte_mapped_thp(struct mm_struct *mm, unsigned long addr,
        if (!hugepage_vma_check(vma, vma->vm_flags, false, false, false))
                return SCAN_VMA_CHECK;
 
-       /*
-        * Symmetry with retract_page_tables(): Exclude MAP_PRIVATE mappings
-        * that got written to. Without this, we'd have to also lock the
-        * anon_vma if one exists.
-        */
-       if (vma->anon_vma)
-               return SCAN_VMA_CHECK;
-
        /* Keep pmd pgtable for uffd-wp; see comment in retract_page_tables() */
        if (userfaultfd_wp(vma))
                return SCAN_PTE_UFFD_WP;
@@ -1574,8 +1578,14 @@ int collapse_pte_mapped_thp(struct mm_struct *mm, unsigned long addr,
        }
 
        /* step 4: remove pte entries */
+       /* we make no change to anon, but protect concurrent anon page lookup */
+       if (vma->anon_vma)
+               anon_vma_lock_write(vma->anon_vma);
+
        collapse_and_free_pmd(mm, vma, haddr, pmd);
 
+       if (vma->anon_vma)
+               anon_vma_unlock_write(vma->anon_vma);
        i_mmap_unlock_write(vma->vm_file->f_mapping);
 
 maybe_install_pmd:
@@ -1651,7 +1661,7 @@ static int retract_page_tables(struct address_space *mapping, pgoff_t pgoff,
                 * has higher cost too. It would also probably require locking
                 * the anon_vma.
                 */
-               if (vma->anon_vma) {
+               if (READ_ONCE(vma->anon_vma)) {
                        result = SCAN_PAGE_ANON;
                        goto next;
                }
@@ -1680,6 +1690,18 @@ static int retract_page_tables(struct address_space *mapping, pgoff_t pgoff,
                if ((cc->is_khugepaged || is_target) &&
                    mmap_write_trylock(mm)) {
                        /*
+                        * Re-check whether we have an ->anon_vma, because
+                        * collapse_and_free_pmd() requires that either no
+                        * ->anon_vma exists or the anon_vma is locked.
+                        * We already checked ->anon_vma above, but that check
+                        * is racy because ->anon_vma can be populated under the
+                        * mmap lock in read mode.
+                        */
+                       if (vma->anon_vma) {
+                               result = SCAN_PAGE_ANON;
+                               goto unlock_next;
+                       }
+                       /*
                         * When a vma is registered with uffd-wp, we can't
                         * recycle the pmd pgtable because there can be pte
                         * markers installed.  Skip it only, so the rest mm/vma
@@ -2590,6 +2612,7 @@ static int madvise_collapse_errno(enum scan_result r)
        case SCAN_CGROUP_CHARGE_FAIL:
                return -EBUSY;
        /* Resource temporary unavailable - trying again might succeed */
+       case SCAN_PAGE_COUNT:
        case SCAN_PAGE_LOCK:
        case SCAN_PAGE_LRU:
        case SCAN_DEL_PAGE_LRU:
@@ -2646,7 +2669,7 @@ int madvise_collapse(struct vm_area_struct *vma, struct vm_area_struct **prev,
                                goto out_nolock;
                        }
 
-                       hend = vma->vm_end & HPAGE_PMD_MASK;
+                       hend = min(hend, vma->vm_end & HPAGE_PMD_MASK);
                }
                mmap_assert_locked(mm);
                memset(cc->node_load, 0, sizeof(cc->node_load));