mm: migration: fix migration of huge PMD shared pages

author Mike Kravetz <mike.kravetz@oracle.com>

Fri, 5 Oct 2018 22:51:29 +0000 (15:51 -0700)

committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Wed, 21 Nov 2018 08:26:03 +0000 (09:26 +0100)
author Mike Kravetz <mike.kravetz@oracle.com>
Fri, 5 Oct 2018 22:51:29 +0000 (15:51 -0700)
committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Wed, 21 Nov 2018 08:26:03 +0000 (09:26 +0100)
diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h

index 48c76d612d40ff4d24acd82b0b9cb8083e99af29..b699d59d0f4f94b3a034e8f4e594ad2c89f4eb29 100644 (file)
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -109,6 +109,8 @@ pte_t *huge_pte_alloc(struct mm_struct *mm,
                         unsigned long addr, unsigned long sz);
  pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr);
  int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep);
+void adjust_range_if_pmd_sharing_possible(struct vm_area_struct *vma,
+                               unsigned long *start, unsigned long *end);
  struct page *follow_huge_addr(struct mm_struct *mm, unsigned long address,
                               int write);
  struct page *follow_huge_pmd(struct mm_struct *mm, unsigned long address,
@@ -131,6 +133,18 @@ static inline unsigned long hugetlb_total_pages(void)
         return 0;
  }
  
+static inline int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr,
+                                               pte_t *ptep)
+{
+       return 0;
+}
+
+static inline void adjust_range_if_pmd_sharing_possible(
+                               struct vm_area_struct *vma,
+                               unsigned long *start, unsigned long *end)
+{
+}
+
  #define follow_hugetlb_page(m,v,p,vs,a,b,i,w)  ({ BUG(); 0; })
  #define follow_huge_addr(mm, addr, write)      ERR_PTR(-EINVAL)
  #define copy_hugetlb_page_range(src, dst, vma) ({ BUG(); 0; })
diff --git a/include/linux/mm.h b/include/linux/mm.h

index 493d07931ea5e45f2f17d8993b10c4c861b486e3..11a5a46ce72be6693c70171b3182591322df2607 100644 (file)
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -2187,6 +2187,12 @@ static inline struct vm_area_struct *find_exact_vma(struct mm_struct *mm,
         return vma;
  }
  
+static inline bool range_in_vma(struct vm_area_struct *vma,
+                               unsigned long start, unsigned long end)
+{
+       return (vma && vma->vm_start <= start && end <= vma->vm_end);
+}
+
  #ifdef CONFIG_MMU
  pgprot_t vm_get_page_prot(unsigned long vm_flags);
  void vma_set_page_prot(struct vm_area_struct *vma);
diff --git a/mm/hugetlb.c b/mm/hugetlb.c

index 7bd390797092c3a4cf0765e3541765e7ef05a264..5e3a4db36310ca58e068df4ea2be2763b02032be 100644 (file)
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -4333,12 +4333,40 @@ static bool vma_shareable(struct vm_area_struct *vma, unsigned long addr)
         /*
          * check on proper vm_flags and page table alignment
          */
-       if (vma->vm_flags & VM_MAYSHARE &&
-           vma->vm_start <= base && end <= vma->vm_end)
+       if (vma->vm_flags & VM_MAYSHARE && range_in_vma(vma, base, end))
                 return true;
         return false;
  }
  
+/*
+ * Determine if start,end range within vma could be mapped by shared pmd.
+ * If yes, adjust start and end to cover range associated with possible
+ * shared pmd mappings.
+ */
+void adjust_range_if_pmd_sharing_possible(struct vm_area_struct *vma,
+                               unsigned long *start, unsigned long *end)
+{
+       unsigned long check_addr = *start;
+
+       if (!(vma->vm_flags & VM_MAYSHARE))
+               return;
+
+       for (check_addr = *start; check_addr < *end; check_addr += PUD_SIZE) {
+               unsigned long a_start = check_addr & PUD_MASK;
+               unsigned long a_end = a_start + PUD_SIZE;
+
+               /*
+                * If sharing is possible, adjust start/end if necessary.
+                */
+               if (range_in_vma(vma, a_start, a_end)) {
+                       if (a_start < *start)
+                               *start = a_start;
+                       if (a_end > *end)
+                               *end = a_end;
+               }
+       }
+}
+
  /*
   * Search for a shareable pmd page for hugetlb. In any case calls pmd_alloc()
   * and returns the corresponding pte. While this is not necessary for the
@@ -4435,6 +4463,11 @@ int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep)
  {
         return 0;
  }
+
+void adjust_range_if_pmd_sharing_possible(struct vm_area_struct *vma,
+                               unsigned long *start, unsigned long *end)
+{
+}
  #define want_pmd_share()       (0)
  #endif /* CONFIG_ARCH_WANT_HUGE_PMD_SHARE */
  
diff --git a/mm/rmap.c b/mm/rmap.c

index 94488b0362f891173defb85dd849e14a5a9db4b1..a7276d8c96f33f95ef7743e6e7247ecb83c07861 100644 (file)
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -1476,6 +1476,9 @@ static int try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
         pte_t pteval;
         spinlock_t *ptl;
         int ret = SWAP_AGAIN;
+       unsigned long sh_address;
+       bool pmd_sharing_possible = false;
+       unsigned long spmd_start, spmd_end;
         struct rmap_private *rp = arg;
         enum ttu_flags flags = rp->flags;
  
@@ -1491,6 +1494,32 @@ static int try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
                         goto out;
         }
  
+       /*
+        * Only use the range_start/end mmu notifiers if huge pmd sharing
+        * is possible.  In the normal case, mmu_notifier_invalidate_page
+        * is sufficient as we only unmap a page.  However, if we unshare
+        * a pmd, we will unmap a PUD_SIZE range.
+        */
+       if (PageHuge(page)) {
+               spmd_start = address;
+               spmd_end = spmd_start + vma_mmu_pagesize(vma);
+
+               /*
+                * Check if pmd sharing is possible.  If possible, we could
+                * unmap a PUD_SIZE range.  spmd_start/spmd_end will be
+                * modified if sharing is possible.
+                */
+               adjust_range_if_pmd_sharing_possible(vma, &spmd_start,
+                                                               &spmd_end);
+               if (spmd_end - spmd_start != vma_mmu_pagesize(vma)) {
+                       sh_address = address;
+
+                       pmd_sharing_possible = true;
+                       mmu_notifier_invalidate_range_start(vma->vm_mm,
+                                                       spmd_start, spmd_end);
+               }
+       }
+
         pte = page_check_address(page, mm, address, &ptl,
                                  PageTransCompound(page));
         if (!pte)
@@ -1524,6 +1553,30 @@ static int try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
                 }
         }
  
+       /*
+        * Call huge_pmd_unshare to potentially unshare a huge pmd.  Pass
+        * sh_address as it will be modified if unsharing is successful.
+        */
+       if (PageHuge(page) && huge_pmd_unshare(mm, &sh_address, pte)) {
+               /*
+                * huge_pmd_unshare unmapped an entire PMD page.  There is
+                * no way of knowing exactly which PMDs may be cached for
+                * this mm, so flush them all.  spmd_start/spmd_end cover
+                * this PUD_SIZE range.
+                */
+               flush_cache_range(vma, spmd_start, spmd_end);
+               flush_tlb_range(vma, spmd_start, spmd_end);
+
+               /*
+                * The ref count of the PMD page was dropped which is part
+                * of the way map counting is done for shared PMDs.  When
+                * there is no other sharing, huge_pmd_unshare returns false
+                * and we will unmap the actual page and drop map count
+                * to zero.
+                */
+               goto out_unmap;
+       }
+
         /* Nuke the page table entry. */
         flush_cache_page(vma, address, page_to_pfn(page));
         if (should_defer_flush(mm, flags)) {
@@ -1621,6 +1674,9 @@ out_unmap:
         if (ret != SWAP_FAIL && ret != SWAP_MLOCK && !(flags & TTU_MUNLOCK))
                 mmu_notifier_invalidate_page(mm, address);
  out:
+       if (pmd_sharing_possible)
+               mmu_notifier_invalidate_range_end(vma->vm_mm,
+                                                       spmd_start, spmd_end);
         return ret;
  }
author	Mike Kravetz <mike.kravetz@oracle.com>
	Fri, 5 Oct 2018 22:51:29 +0000 (15:51 -0700)
committer	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	Wed, 21 Nov 2018 08:26:03 +0000 (09:26 +0100)
include/linux/hugetlb.h		patch \| blob \| history
include/linux/mm.h		patch \| blob \| history
mm/hugetlb.c		patch \| blob \| history
mm/rmap.c		patch \| blob \| history