x86: add tizen_qemu_x86_defconfig & tizen_qemu_x86_64_defconfig
[platform/kernel/linux-rpi.git] / mm / khugepaged.c
index dd069af..3afcb14 100644 (file)
@@ -1156,6 +1156,7 @@ static void collapse_huge_page(struct mm_struct *mm,
        _pmd = pmdp_collapse_flush(vma, address, pmd);
        spin_unlock(pmd_ptl);
        mmu_notifier_invalidate_range_end(&range);
+       tlb_remove_table_sync_one();
 
        spin_lock(pte_ptl);
        isolated = __collapse_huge_page_isolate(vma, address, pte,
@@ -1442,6 +1443,7 @@ void collapse_pte_mapped_thp(struct mm_struct *mm, unsigned long addr)
        spinlock_t *ptl;
        int count = 0;
        int i;
+       struct mmu_notifier_range range;
 
        if (!vma || !vma->vm_file ||
            !range_in_vma(vma, haddr, haddr + HPAGE_PMD_SIZE))
@@ -1468,6 +1470,19 @@ void collapse_pte_mapped_thp(struct mm_struct *mm, unsigned long addr)
        if (!pmd)
                goto drop_hpage;
 
+       /*
+        * We need to lock the mapping so that from here on, only GUP-fast and
+        * hardware page walks can access the parts of the page tables that
+        * we're operating on.
+        */
+       i_mmap_lock_write(vma->vm_file->f_mapping);
+
+       /*
+        * This spinlock should be unnecessary: Nobody else should be accessing
+        * the page tables under spinlock protection here, only
+        * lockless_pages_from_mm() and the hardware page walker can access page
+        * tables while all the high-level locks are held in write mode.
+        */
        start_pte = pte_offset_map_lock(mm, pmd, haddr, &ptl);
 
        /* step 1: check all mapped PTEs are to the right huge page */
@@ -1514,12 +1529,23 @@ void collapse_pte_mapped_thp(struct mm_struct *mm, unsigned long addr)
        }
 
        /* step 4: collapse pmd */
-       ptl = pmd_lock(vma->vm_mm, pmd);
+       /* we make no change to anon, but protect concurrent anon page lookup */
+       if (vma->anon_vma)
+               anon_vma_lock_write(vma->anon_vma);
+
+       mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, NULL, mm, haddr,
+                               haddr + HPAGE_PMD_SIZE);
+       mmu_notifier_invalidate_range_start(&range);
        _pmd = pmdp_collapse_flush(vma, haddr, pmd);
-       spin_unlock(ptl);
        mm_dec_nr_ptes(mm);
+       tlb_remove_table_sync_one();
+       mmu_notifier_invalidate_range_end(&range);
        pte_free(mm, pmd_pgtable(_pmd));
 
+       if (vma->anon_vma)
+               anon_vma_unlock_write(vma->anon_vma);
+       i_mmap_unlock_write(vma->vm_file->f_mapping);
+
 drop_hpage:
        unlock_page(hpage);
        put_page(hpage);
@@ -1527,6 +1553,7 @@ drop_hpage:
 
 abort:
        pte_unmap_unlock(start_pte, ptl);
+       i_mmap_unlock_write(vma->vm_file->f_mapping);
        goto drop_hpage;
 }
 
@@ -1575,7 +1602,8 @@ static void retract_page_tables(struct address_space *mapping, pgoff_t pgoff)
                 * An alternative would be drop the check, but check that page
                 * table is clear before calling pmdp_collapse_flush() under
                 * ptl. It has higher chance to recover THP for the VMA, but
-                * has higher cost too.
+                * has higher cost too. It would also probably require locking
+                * the anon_vma.
                 */
                if (vma->anon_vma)
                        continue;
@@ -1597,12 +1625,19 @@ static void retract_page_tables(struct address_space *mapping, pgoff_t pgoff)
                 */
                if (mmap_write_trylock(mm)) {
                        if (!khugepaged_test_exit(mm)) {
-                               spinlock_t *ptl = pmd_lock(mm, pmd);
+                               struct mmu_notifier_range range;
+
+                               mmu_notifier_range_init(&range,
+                                                       MMU_NOTIFY_CLEAR, 0,
+                                                       NULL, mm, addr,
+                                                       addr + HPAGE_PMD_SIZE);
+                               mmu_notifier_invalidate_range_start(&range);
                                /* assume page table is clear */
                                _pmd = pmdp_collapse_flush(vma, addr, pmd);
-                               spin_unlock(ptl);
                                mm_dec_nr_ptes(mm);
+                               tlb_remove_table_sync_one();
                                pte_free(mm, pmd_pgtable(_pmd));
+                               mmu_notifier_invalidate_range_end(&range);
                        }
                        mmap_write_unlock(mm);
                } else {