mm: numa: avoid unnecessary work on the failure path

[platform/adaptation/renesas_rcar/renesas_kernel.git] / mm / migrate.c
diff --git a/mm/migrate.c b/mm/migrate.c

index c046927..a987525 100644 (file)
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -36,6 +36,7 @@
  #include <linux/hugetlb_cgroup.h>
  #include <linux/gfp.h>
  #include <linux/balloon_compaction.h>
+#include <linux/mmu_notifier.h>
  
  #include <asm/tlbflush.h>
  
@@ -130,7 +131,7 @@ static int remove_migration_pte(struct page *new, struct vm_area_struct *vma,
                 ptep = huge_pte_offset(mm, addr);
                 if (!ptep)
                         goto out;
-               ptl = &mm->page_table_lock;
+               ptl = huge_pte_lockptr(hstate_vma(vma), mm, ptep);
         } else {
                 pmd = mm_find_pmd(mm, addr);
                 if (!pmd)
@@ -249,9 +250,10 @@ void migration_entry_wait(struct mm_struct *mm, pmd_t *pmd,
         __migration_entry_wait(mm, ptep, ptl);
  }
  
-void migration_entry_wait_huge(struct mm_struct *mm, pte_t *pte)
+void migration_entry_wait_huge(struct vm_area_struct *vma,
+               struct mm_struct *mm, pte_t *pte)
  {
-       spinlock_t *ptl = &(mm)->page_table_lock;
+       spinlock_t *ptl = huge_pte_lockptr(hstate_vma(vma), mm, pte);
         __migration_entry_wait(mm, pte, ptl);
  }
  
@@ -441,10 +443,60 @@ int migrate_huge_page_move_mapping(struct address_space *mapping,
  }
  
  /*
+ * Gigantic pages are so large that we do not guarantee that page++ pointer
+ * arithmetic will work across the entire page.  We need something more
+ * specialized.
+ */
+static void __copy_gigantic_page(struct page *dst, struct page *src,
+                               int nr_pages)
+{
+       int i;
+       struct page *dst_base = dst;
+       struct page *src_base = src;
+
+       for (i = 0; i < nr_pages; ) {
+               cond_resched();
+               copy_highpage(dst, src);
+
+               i++;
+               dst = mem_map_next(dst, dst_base, i);
+               src = mem_map_next(src, src_base, i);
+       }
+}
+
+static void copy_huge_page(struct page *dst, struct page *src)
+{
+       int i;
+       int nr_pages;
+
+       if (PageHuge(src)) {
+               /* hugetlbfs page */
+               struct hstate *h = page_hstate(src);
+               nr_pages = pages_per_huge_page(h);
+
+               if (unlikely(nr_pages > MAX_ORDER_NR_PAGES)) {
+                       __copy_gigantic_page(dst, src, nr_pages);
+                       return;
+               }
+       } else {
+               /* thp page */
+               BUG_ON(!PageTransHuge(src));
+               nr_pages = hpage_nr_pages(src);
+       }
+
+       for (i = 0; i < nr_pages; i++) {
+               cond_resched();
+               copy_highpage(dst + i, src + i);
+       }
+}
+
+/*
   * Copy the page to its new location
   */
  void migrate_page_copy(struct page *newpage, struct page *page)
  {
+       int cpupid;
+
         if (PageHuge(page) || PageTransHuge(page))
                 copy_huge_page(newpage, page);
         else
@@ -481,6 +533,13 @@ void migrate_page_copy(struct page *newpage, struct page *page)
                         __set_page_dirty_nobuffers(newpage);
         }
  
+       /*
+        * Copy NUMA information to the new page, to prevent over-eager
+        * future migrations of this same page.
+        */
+       cpupid = page_cpupid_xchg_last(page, -1);
+       page_cpupid_xchg_last(newpage, cpupid);
+
         mlock_migrate_page(newpage, page);
         ksm_migrate_page(newpage, page);
         /*
@@ -1500,7 +1559,7 @@ static struct page *alloc_misplaced_dst_page(struct page *page,
                                           __GFP_NOWARN) &
                                          ~GFP_IOFS, 0);
         if (newpage)
-               page_nid_xchg_last(newpage, page_nid_last(page));
+               page_cpupid_xchg_last(newpage, page_cpupid_last(page));
  
         return newpage;
  }
@@ -1601,7 +1660,8 @@ int numamigrate_isolate_page(pg_data_t *pgdat, struct page *page)
   * node. Caller is expected to have an elevated reference count on
   * the page that will be dropped by this function before returning.
   */
-int migrate_misplaced_page(struct page *page, int node)
+int migrate_misplaced_page(struct page *page, struct vm_area_struct *vma,
+                          int node)
  {
         pg_data_t *pgdat = NODE_DATA(node);
         int isolated;
@@ -1609,10 +1669,11 @@ int migrate_misplaced_page(struct page *page, int node)
         LIST_HEAD(migratepages);
  
         /*
-        * Don't migrate pages that are mapped in multiple processes.
-        * TODO: Handle false sharing detection instead of this hammer
+        * Don't migrate file pages that are mapped in multiple processes
+        * with execute permissions as they are probably shared libraries.
          */
-       if (page_mapcount(page) != 1)
+       if (page_mapcount(page) != 1 && page_is_file_cache(page) &&
+           (vma->vm_flags & VM_EXEC))
                 goto out;
  
         /*
@@ -1655,19 +1716,15 @@ int migrate_misplaced_transhuge_page(struct mm_struct *mm,
                                 unsigned long address,
                                 struct page *page, int node)
  {
-       unsigned long haddr = address & HPAGE_PMD_MASK;
+       spinlock_t *ptl;
         pg_data_t *pgdat = NODE_DATA(node);
         int isolated = 0;
         struct page *new_page = NULL;
         struct mem_cgroup *memcg = NULL;
         int page_lru = page_is_file_cache(page);
-
-       /*
-        * Don't migrate pages that are mapped in multiple processes.
-        * TODO: Handle false sharing detection instead of this hammer
-        */
-       if (page_mapcount(page) != 1)
-               goto out_dropref;
+       unsigned long mmun_start = address & HPAGE_PMD_MASK;
+       unsigned long mmun_end = mmun_start + HPAGE_PMD_SIZE;
+       pmd_t orig_entry;
  
         /*
          * Rate-limit the amount of data that is being migrated to a node.
@@ -1682,7 +1739,7 @@ int migrate_misplaced_transhuge_page(struct mm_struct *mm,
         if (!new_page)
                 goto out_fail;
  
-       page_nid_xchg_last(new_page, page_nid_last(page));
+       page_cpupid_xchg_last(new_page, page_cpupid_last(page));
  
         isolated = numamigrate_isolate_page(pgdat, page);
         if (!isolated) {
@@ -1701,9 +1758,12 @@ int migrate_misplaced_transhuge_page(struct mm_struct *mm,
         WARN_ON(PageLRU(new_page));
  
         /* Recheck the target PMD */
-       spin_lock(&mm->page_table_lock);
-       if (unlikely(!pmd_same(*pmd, entry))) {
-               spin_unlock(&mm->page_table_lock);
+       mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end);
+       ptl = pmd_lock(mm, pmd);
+       if (unlikely(!pmd_same(*pmd, entry) || page_count(page) != 2)) {
+fail_putback:
+               spin_unlock(ptl);
+               mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end);
  
                 /* Reverse changes made by migrate_page_copy() */
                 if (TestClearPageActive(new_page))
@@ -1720,7 +1780,8 @@ int migrate_misplaced_transhuge_page(struct mm_struct *mm,
                 putback_lru_page(page);
                 mod_zone_page_state(page_zone(page),
                          NR_ISOLATED_ANON + page_lru, -HPAGE_PMD_NR);
-               goto out_fail;
+
+               goto out_unlock;
         }
  
         /*
@@ -1732,23 +1793,43 @@ int migrate_misplaced_transhuge_page(struct mm_struct *mm,
          */
         mem_cgroup_prepare_migration(page, new_page, &memcg);
  
+       orig_entry = *pmd;
         entry = mk_pmd(new_page, vma->vm_page_prot);
-       entry = pmd_mknonnuma(entry);
-       entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma);
         entry = pmd_mkhuge(entry);
+       entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma);
  
-       pmdp_clear_flush(vma, haddr, pmd);
-       set_pmd_at(mm, haddr, pmd, entry);
-       page_add_new_anon_rmap(new_page, vma, haddr);
+       /*
+        * Clear the old entry under pagetable lock and establish the new PTE.
+        * Any parallel GUP will either observe the old page blocking on the
+        * page lock, block on the page table lock or observe the new page.
+        * The SetPageUptodate on the new page and page_add_new_anon_rmap
+        * guarantee the copy is visible before the pagetable update.
+        */
+       flush_cache_range(vma, mmun_start, mmun_end);
+       page_add_new_anon_rmap(new_page, vma, mmun_start);
+       pmdp_clear_flush(vma, mmun_start, pmd);
+       set_pmd_at(mm, mmun_start, pmd, entry);
+       flush_tlb_range(vma, mmun_start, mmun_end);
         update_mmu_cache_pmd(vma, address, &entry);
+
+       if (page_count(page) != 2) {
+               set_pmd_at(mm, mmun_start, pmd, orig_entry);
+               flush_tlb_range(vma, mmun_start, mmun_end);
+               update_mmu_cache_pmd(vma, address, &entry);
+               page_remove_rmap(new_page);
+               goto fail_putback;
+       }
+
         page_remove_rmap(page);
+
         /*
          * Finish the charge transaction under the page table lock to
          * prevent split_huge_page() from dividing up the charge
          * before it's fully transferred to the new page.
          */
         mem_cgroup_end_migration(memcg, page, new_page, true);
-       spin_unlock(&mm->page_table_lock);
+       spin_unlock(ptl);
+       mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end);
  
         unlock_page(new_page);
         unlock_page(page);
@@ -1766,10 +1847,15 @@ int migrate_misplaced_transhuge_page(struct mm_struct *mm,
  out_fail:
         count_vm_events(PGMIGRATE_FAIL, HPAGE_PMD_NR);
  out_dropref:
-       entry = pmd_mknonnuma(entry);
-       set_pmd_at(mm, haddr, pmd, entry);
-       update_mmu_cache_pmd(vma, address, &entry);
+       ptl = pmd_lock(mm, pmd);
+       if (pmd_same(*pmd, entry)) {
+               entry = pmd_mknonnuma(entry);
+               set_pmd_at(mm, mmun_start, pmd, entry);
+               update_mmu_cache_pmd(vma, address, &entry);
+       }
+       spin_unlock(ptl);
  
+out_unlock:
         unlock_page(page);
         put_page(page);
         return 0;