mm/huge_memory: try avoiding write faults when changing PMD protection
authorDavid Hildenbrand <david@redhat.com>
Tue, 8 Nov 2022 17:46:48 +0000 (18:46 +0100)
committerAndrew Morton <akpm@linux-foundation.org>
Wed, 30 Nov 2022 23:58:49 +0000 (15:58 -0800)
Let's replicate what we have for PTEs in can_change_pte_writable() also
for PMDs.

While this might look like a pure performance improvement, we'll us this to
get rid of savedwrite handling in do_huge_pmd_numa_page() next. Place
do_huge_pmd_numa_page() strategically good for that purpose.

Note that MM_CP_TRY_CHANGE_WRITABLE is currently only set when we come
via mprotect_fixup().

Link: https://lkml.kernel.org/r/20221108174652.198904-4-david@redhat.com
Signed-off-by: David Hildenbrand <david@redhat.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Anshuman Khandual <anshuman.khandual@arm.com>
Cc: Dave Chinner <david@fromorbit.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mel Gorman <mgorman@techsingularity.net>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Mike Rapoport <rppt@kernel.org>
Cc: Nadav Amit <namit@vmware.com>
Cc: Nicholas Piggin <npiggin@gmail.com>
Cc: Peter Xu <peterx@redhat.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
mm/huge_memory.c

index aba3406..fac917b 100644 (file)
@@ -1390,6 +1390,36 @@ fallback:
        return VM_FAULT_FALLBACK;
 }
 
+static inline bool can_change_pmd_writable(struct vm_area_struct *vma,
+                                          unsigned long addr, pmd_t pmd)
+{
+       struct page *page;
+
+       if (WARN_ON_ONCE(!(vma->vm_flags & VM_WRITE)))
+               return false;
+
+       /* Don't touch entries that are not even readable (NUMA hinting). */
+       if (pmd_protnone(pmd))
+               return false;
+
+       /* Do we need write faults for softdirty tracking? */
+       if (vma_soft_dirty_enabled(vma) && !pmd_soft_dirty(pmd))
+               return false;
+
+       /* Do we need write faults for uffd-wp tracking? */
+       if (userfaultfd_huge_pmd_wp(vma, pmd))
+               return false;
+
+       if (!(vma->vm_flags & VM_SHARED)) {
+               /* See can_change_pte_writable(). */
+               page = vm_normal_page_pmd(vma, addr, pmd);
+               return page && PageAnon(page) && PageAnonExclusive(page);
+       }
+
+       /* See can_change_pte_writable(). */
+       return pmd_dirty(pmd);
+}
+
 /* FOLL_FORCE can write to even unwritable PMDs in COW mappings. */
 static inline bool can_follow_write_pmd(pmd_t pmd, struct page *page,
                                        struct vm_area_struct *vma,
@@ -1893,13 +1923,17 @@ int change_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma,
                 */
                entry = pmd_clear_uffd_wp(entry);
        }
+
+       /* See change_pte_range(). */
+       if ((cp_flags & MM_CP_TRY_CHANGE_WRITABLE) && !pmd_write(entry) &&
+           can_change_pmd_writable(vma, addr, entry))
+               entry = pmd_mkwrite(entry);
+
        ret = HPAGE_PMD_NR;
        set_pmd_at(mm, addr, pmd, entry);
 
        if (huge_pmd_needs_flush(oldpmd, entry))
                tlb_flush_pmd_range(tlb, addr, HPAGE_PMD_SIZE);
-
-       BUG_ON(vma_is_anonymous(vma) && !preserve_write && pmd_write(entry));
 unlock:
        spin_unlock(ptl);
        return ret;