mm: incorporate zero pages into transparent huge pages
authorEbru Akagunduz <ebru.akagunduz@gmail.com>
Tue, 14 Apr 2015 22:45:24 +0000 (15:45 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Tue, 14 Apr 2015 23:49:01 +0000 (16:49 -0700)
This patch improves THP collapse rates, by allowing zero pages.

Currently THP can collapse 4kB pages into a THP when there are up to
khugepaged_max_ptes_none pte_none ptes in a 2MB range.  This patch counts
pte none and mapped zero pages with the same variable.

The patch was tested with a program that allocates 800MB of
memory, and performs interleaved reads and writes, in a pattern
that causes some 2MB areas to first see read accesses, resulting
in the zero pfn being mapped there.

To simulate memory fragmentation at allocation time, I modified
do_huge_pmd_anonymous_page to return VM_FAULT_FALLBACK for read faults.

Without the patch, only %50 of the program was collapsed into THP and the
percentage did not increase over time.

With this patch after 10 minutes of waiting khugepaged had collapsed %99
of the program's memory.

[aarcange@redhat.com: fix bogus BUG()]
Signed-off-by: Ebru Akagunduz <ebru.akagunduz@gmail.com>
Reviewed-by: Rik van Riel <riel@redhat.com>
Reviewed-by: Andrea Arcangeli <aarcange@redhat.com>
Acked-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
Cc: Hugh Dickins <hughd@google.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Sasha Levin <sasha.levin@oracle.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Mel Gorman <mel@csn.ul.ie>
Signed-off-by: Andrea Arcangeli <aarcange@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
mm/huge_memory.c

index 10a4b6c..6352c1d 100644 (file)
@@ -2109,7 +2109,7 @@ static void release_pte_pages(pte_t *pte, pte_t *_pte)
 {
        while (--_pte >= pte) {
                pte_t pteval = *_pte;
-               if (!pte_none(pteval))
+               if (!pte_none(pteval) && !is_zero_pfn(pte_pfn(pteval)))
                        release_pte_page(pte_page(pteval));
        }
 }
@@ -2120,13 +2120,13 @@ static int __collapse_huge_page_isolate(struct vm_area_struct *vma,
 {
        struct page *page;
        pte_t *_pte;
-       int none = 0;
+       int none_or_zero = 0;
        bool referenced = false, writable = false;
        for (_pte = pte; _pte < pte+HPAGE_PMD_NR;
             _pte++, address += PAGE_SIZE) {
                pte_t pteval = *_pte;
-               if (pte_none(pteval)) {
-                       if (++none <= khugepaged_max_ptes_none)
+               if (pte_none(pteval) || is_zero_pfn(pte_pfn(pteval))) {
+                       if (++none_or_zero <= khugepaged_max_ptes_none)
                                continue;
                        else
                                goto out;
@@ -2207,9 +2207,21 @@ static void __collapse_huge_page_copy(pte_t *pte, struct page *page,
                pte_t pteval = *_pte;
                struct page *src_page;
 
-               if (pte_none(pteval)) {
+               if (pte_none(pteval) || is_zero_pfn(pte_pfn(pteval))) {
                        clear_user_highpage(page, address);
                        add_mm_counter(vma->vm_mm, MM_ANONPAGES, 1);
+                       if (is_zero_pfn(pte_pfn(pteval))) {
+                               /*
+                                * ptl mostly unnecessary.
+                                */
+                               spin_lock(ptl);
+                               /*
+                                * paravirt calls inside pte_clear here are
+                                * superfluous.
+                                */
+                               pte_clear(vma->vm_mm, address, _pte);
+                               spin_unlock(ptl);
+                       }
                } else {
                        src_page = pte_page(pteval);
                        copy_user_highpage(page, src_page, address, vma);
@@ -2543,7 +2555,7 @@ static int khugepaged_scan_pmd(struct mm_struct *mm,
 {
        pmd_t *pmd;
        pte_t *pte, *_pte;
-       int ret = 0, none = 0;
+       int ret = 0, none_or_zero = 0;
        struct page *page;
        unsigned long _address;
        spinlock_t *ptl;
@@ -2561,8 +2573,8 @@ static int khugepaged_scan_pmd(struct mm_struct *mm,
        for (_address = address, _pte = pte; _pte < pte+HPAGE_PMD_NR;
             _pte++, _address += PAGE_SIZE) {
                pte_t pteval = *_pte;
-               if (pte_none(pteval)) {
-                       if (++none <= khugepaged_max_ptes_none)
+               if (pte_none(pteval) || is_zero_pfn(pte_pfn(pteval))) {
+                       if (++none_or_zero <= khugepaged_max_ptes_none)
                                continue;
                        else
                                goto out_unmap;