X-Git-Url: http://review.tizen.org/git/?a=blobdiff_plain;f=mm%2Fkhugepaged.c;h=3afcb1466ec51967a819885812fe6a6ebff2a86e;hb=refs%2Fheads%2Fsandbox%2Fm.szyprowski%2Fvirgl;hp=dd069afd9cb9c4ae9876365d9ec189f9e2c242b6;hpb=46c22e7b094f85858f5581bba0e3c59400f53dfa;p=platform%2Fkernel%2Flinux-rpi.git diff --git a/mm/khugepaged.c b/mm/khugepaged.c index dd069af..3afcb14 100644 --- a/mm/khugepaged.c +++ b/mm/khugepaged.c @@ -1156,6 +1156,7 @@ static void collapse_huge_page(struct mm_struct *mm, _pmd = pmdp_collapse_flush(vma, address, pmd); spin_unlock(pmd_ptl); mmu_notifier_invalidate_range_end(&range); + tlb_remove_table_sync_one(); spin_lock(pte_ptl); isolated = __collapse_huge_page_isolate(vma, address, pte, @@ -1442,6 +1443,7 @@ void collapse_pte_mapped_thp(struct mm_struct *mm, unsigned long addr) spinlock_t *ptl; int count = 0; int i; + struct mmu_notifier_range range; if (!vma || !vma->vm_file || !range_in_vma(vma, haddr, haddr + HPAGE_PMD_SIZE)) @@ -1468,6 +1470,19 @@ void collapse_pte_mapped_thp(struct mm_struct *mm, unsigned long addr) if (!pmd) goto drop_hpage; + /* + * We need to lock the mapping so that from here on, only GUP-fast and + * hardware page walks can access the parts of the page tables that + * we're operating on. + */ + i_mmap_lock_write(vma->vm_file->f_mapping); + + /* + * This spinlock should be unnecessary: Nobody else should be accessing + * the page tables under spinlock protection here, only + * lockless_pages_from_mm() and the hardware page walker can access page + * tables while all the high-level locks are held in write mode. + */ start_pte = pte_offset_map_lock(mm, pmd, haddr, &ptl); /* step 1: check all mapped PTEs are to the right huge page */ @@ -1514,12 +1529,23 @@ void collapse_pte_mapped_thp(struct mm_struct *mm, unsigned long addr) } /* step 4: collapse pmd */ - ptl = pmd_lock(vma->vm_mm, pmd); + /* we make no change to anon, but protect concurrent anon page lookup */ + if (vma->anon_vma) + anon_vma_lock_write(vma->anon_vma); + + mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, NULL, mm, haddr, + haddr + HPAGE_PMD_SIZE); + mmu_notifier_invalidate_range_start(&range); _pmd = pmdp_collapse_flush(vma, haddr, pmd); - spin_unlock(ptl); mm_dec_nr_ptes(mm); + tlb_remove_table_sync_one(); + mmu_notifier_invalidate_range_end(&range); pte_free(mm, pmd_pgtable(_pmd)); + if (vma->anon_vma) + anon_vma_unlock_write(vma->anon_vma); + i_mmap_unlock_write(vma->vm_file->f_mapping); + drop_hpage: unlock_page(hpage); put_page(hpage); @@ -1527,6 +1553,7 @@ drop_hpage: abort: pte_unmap_unlock(start_pte, ptl); + i_mmap_unlock_write(vma->vm_file->f_mapping); goto drop_hpage; } @@ -1575,7 +1602,8 @@ static void retract_page_tables(struct address_space *mapping, pgoff_t pgoff) * An alternative would be drop the check, but check that page * table is clear before calling pmdp_collapse_flush() under * ptl. It has higher chance to recover THP for the VMA, but - * has higher cost too. + * has higher cost too. It would also probably require locking + * the anon_vma. */ if (vma->anon_vma) continue; @@ -1597,12 +1625,19 @@ static void retract_page_tables(struct address_space *mapping, pgoff_t pgoff) */ if (mmap_write_trylock(mm)) { if (!khugepaged_test_exit(mm)) { - spinlock_t *ptl = pmd_lock(mm, pmd); + struct mmu_notifier_range range; + + mmu_notifier_range_init(&range, + MMU_NOTIFY_CLEAR, 0, + NULL, mm, addr, + addr + HPAGE_PMD_SIZE); + mmu_notifier_invalidate_range_start(&range); /* assume page table is clear */ _pmd = pmdp_collapse_flush(vma, addr, pmd); - spin_unlock(ptl); mm_dec_nr_ptes(mm); + tlb_remove_table_sync_one(); pte_free(mm, pmd_pgtable(_pmd)); + mmu_notifier_invalidate_range_end(&range); } mmap_write_unlock(mm); } else {