From a5be621ee2925b6ee2db455c45c2af2d8a195b0c Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Thu, 8 Jun 2023 18:32:47 -0700 Subject: [PATCH] mm/mremap: retry if either pte_offset_map_*lock() fails MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit move_ptes() return -EAGAIN if pte_offset_map_lock() of old fails, or if pte_offset_map_nolock() of new fails: move_page_tables() retry if so. But that does need a pmd_none() check inside, to stop endless loop when huge shmem is truncated (thank you to syzbot); and move_huge_pmd() must tolerate that a page table might have been allocated there just before (of course it would be more satisfying to remove the empty page table, but this is not a path worth optimizing). Link: https://lkml.kernel.org/r/65e5e84a-f04-947-23f2-b97d3462e1e@google.com Signed-off-by: Hugh Dickins Cc: Alistair Popple Cc: Anshuman Khandual Cc: Axel Rasmussen Cc: Christophe Leroy Cc: Christoph Hellwig Cc: David Hildenbrand Cc: "Huang, Ying" Cc: Ira Weiny Cc: Jason Gunthorpe Cc: Kirill A. Shutemov Cc: Lorenzo Stoakes Cc: Matthew Wilcox Cc: Mel Gorman Cc: Miaohe Lin Cc: Mike Kravetz Cc: Mike Rapoport (IBM) Cc: Minchan Kim Cc: Naoya Horiguchi Cc: Pavel Tatashin Cc: Peter Xu Cc: Peter Zijlstra Cc: Qi Zheng Cc: Ralph Campbell Cc: Ryan Roberts Cc: SeongJae Park Cc: Song Liu Cc: Steven Price Cc: Suren Baghdasaryan Cc: Thomas Hellström Cc: Will Deacon Cc: Yang Shi Cc: Yu Zhao Cc: Zack Rusin Signed-off-by: Andrew Morton --- mm/huge_memory.c | 5 +++-- mm/mremap.c | 28 ++++++++++++++++++++-------- 2 files changed, 23 insertions(+), 10 deletions(-) diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 9ccdb3f..e21b3e3 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -1760,9 +1760,10 @@ bool move_huge_pmd(struct vm_area_struct *vma, unsigned long old_addr, /* * The destination pmd shouldn't be established, free_pgtables() - * should have release it. + * should have released it; but move_page_tables() might have already + * inserted a page table, if racing against shmem/file collapse. */ - if (WARN_ON(!pmd_none(*new_pmd))) { + if (!pmd_none(*new_pmd)) { VM_BUG_ON(pmd_trans_huge(*new_pmd)); return false; } diff --git a/mm/mremap.c b/mm/mremap.c index da107f2..bfc3d19 100644 --- a/mm/mremap.c +++ b/mm/mremap.c @@ -133,7 +133,7 @@ static pte_t move_soft_dirty_pte(pte_t pte) return pte; } -static void move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd, +static int move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd, unsigned long old_addr, unsigned long old_end, struct vm_area_struct *new_vma, pmd_t *new_pmd, unsigned long new_addr, bool need_rmap_locks) @@ -143,6 +143,7 @@ static void move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd, spinlock_t *old_ptl, *new_ptl; bool force_flush = false; unsigned long len = old_end - old_addr; + int err = 0; /* * When need_rmap_locks is true, we take the i_mmap_rwsem and anon_vma @@ -170,8 +171,16 @@ static void move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd, * pte locks because exclusive mmap_lock prevents deadlock. */ old_pte = pte_offset_map_lock(mm, old_pmd, old_addr, &old_ptl); - new_pte = pte_offset_map(new_pmd, new_addr); - new_ptl = pte_lockptr(mm, new_pmd); + if (!old_pte) { + err = -EAGAIN; + goto out; + } + new_pte = pte_offset_map_nolock(mm, new_pmd, new_addr, &new_ptl); + if (!new_pte) { + pte_unmap_unlock(old_pte, old_ptl); + err = -EAGAIN; + goto out; + } if (new_ptl != old_ptl) spin_lock_nested(new_ptl, SINGLE_DEPTH_NESTING); flush_tlb_batched_pending(vma->vm_mm); @@ -208,8 +217,10 @@ static void move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd, spin_unlock(new_ptl); pte_unmap(new_pte - 1); pte_unmap_unlock(old_pte - 1, old_ptl); +out: if (need_rmap_locks) drop_rmap_locks(vma); + return err; } #ifndef arch_supports_page_table_move @@ -537,6 +548,7 @@ unsigned long move_page_tables(struct vm_area_struct *vma, new_pmd = alloc_new_pmd(vma->vm_mm, vma, new_addr); if (!new_pmd) break; +again: if (is_swap_pmd(*old_pmd) || pmd_trans_huge(*old_pmd) || pmd_devmap(*old_pmd)) { if (extent == HPAGE_PMD_SIZE && @@ -544,8 +556,6 @@ unsigned long move_page_tables(struct vm_area_struct *vma, old_pmd, new_pmd, need_rmap_locks)) continue; split_huge_pmd(vma, old_pmd, old_addr); - if (pmd_trans_unstable(old_pmd)) - continue; } else if (IS_ENABLED(CONFIG_HAVE_MOVE_PMD) && extent == PMD_SIZE) { /* @@ -556,11 +566,13 @@ unsigned long move_page_tables(struct vm_area_struct *vma, old_pmd, new_pmd, true)) continue; } - + if (pmd_none(*old_pmd)) + continue; if (pte_alloc(new_vma->vm_mm, new_pmd)) break; - move_ptes(vma, old_pmd, old_addr, old_addr + extent, new_vma, - new_pmd, new_addr, need_rmap_locks); + if (move_ptes(vma, old_pmd, old_addr, old_addr + extent, + new_vma, new_pmd, new_addr, need_rmap_locks) < 0) + goto again; } mmu_notifier_invalidate_range_end(&range); -- 2.7.4