mm/rmap: always do TTU_IGNORE_ACCESS
authorShakeel Butt <shakeelb@google.com>
Tue, 15 Dec 2020 03:06:39 +0000 (19:06 -0800)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Wed, 30 Dec 2020 10:53:55 +0000 (11:53 +0100)
[ Upstream commit 013339df116c2ee0d796dd8bfb8f293a2030c063 ]

Since commit 369ea8242c0f ("mm/rmap: update to new mmu_notifier semantic
v2"), the code to check the secondary MMU's page table access bit is
broken for !(TTU_IGNORE_ACCESS) because the page is unmapped from the
secondary MMU's page table before the check.  More specifically for those
secondary MMUs which unmap the memory in
mmu_notifier_invalidate_range_start() like kvm.

However memory reclaim is the only user of !(TTU_IGNORE_ACCESS) or the
absence of TTU_IGNORE_ACCESS and it explicitly performs the page table
access check before trying to unmap the page.  So, at worst the reclaim
will miss accesses in a very short window if we remove page table access
check in unmapping code.

There is an unintented consequence of !(TTU_IGNORE_ACCESS) for the memcg
reclaim.  From memcg reclaim the page_referenced() only account the
accesses from the processes which are in the same memcg of the target page
but the unmapping code is considering accesses from all the processes, so,
decreasing the effectiveness of memcg reclaim.

The simplest solution is to always assume TTU_IGNORE_ACCESS in unmapping
code.

Link: https://lkml.kernel.org/r/20201104231928.1494083-1-shakeelb@google.com
Fixes: 369ea8242c0f ("mm/rmap: update to new mmu_notifier semantic v2")
Signed-off-by: Shakeel Butt <shakeelb@google.com>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Cc: Hugh Dickins <hughd@google.com>
Cc: Jerome Glisse <jglisse@redhat.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Dan Williams <dan.j.williams@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Sasha Levin <sashal@kernel.org>
include/linux/rmap.h
mm/huge_memory.c
mm/memory-failure.c
mm/memory_hotplug.c
mm/migrate.c
mm/rmap.c
mm/vmscan.c

index 3a6adfa70fb0ef077e474d1d7af2a30aa20a59cc..70085ca1a3fc9dc69c514741cb0ec8293acb1317 100644 (file)
@@ -91,7 +91,6 @@ enum ttu_flags {
 
        TTU_SPLIT_HUGE_PMD      = 0x4,  /* split huge PMD if any */
        TTU_IGNORE_MLOCK        = 0x8,  /* ignore mlock */
-       TTU_IGNORE_ACCESS       = 0x10, /* don't age */
        TTU_IGNORE_HWPOISON     = 0x20, /* corrupted page is recoverable */
        TTU_BATCH_FLUSH         = 0x40, /* Batch TLB flushes where possible
                                         * and caller guarantees they will
index ec2bb93f7431438aa4d3f9d44bd5b02c3279e6b9..85eda66eb625dbc5af87b823de235878c9156527 100644 (file)
@@ -2321,7 +2321,7 @@ void vma_adjust_trans_huge(struct vm_area_struct *vma,
 
 static void unmap_page(struct page *page)
 {
-       enum ttu_flags ttu_flags = TTU_IGNORE_MLOCK | TTU_IGNORE_ACCESS |
+       enum ttu_flags ttu_flags = TTU_IGNORE_MLOCK |
                TTU_RMAP_LOCKED | TTU_SPLIT_HUGE_PMD;
        bool unmap_success;
 
index 5d880d4eb9a2678a557d7ad2c1d08358e65f0088..71295bb984af6f1ca4a82fa277d2b355a9568a29 100644 (file)
@@ -989,7 +989,7 @@ static int get_hwpoison_page(struct page *page)
 static bool hwpoison_user_mappings(struct page *p, unsigned long pfn,
                                  int flags, struct page **hpagep)
 {
-       enum ttu_flags ttu = TTU_IGNORE_MLOCK | TTU_IGNORE_ACCESS;
+       enum ttu_flags ttu = TTU_IGNORE_MLOCK;
        struct address_space *mapping;
        LIST_HEAD(tokill);
        bool unmap_success = true;
index 63b2e46b65552bb711afb92aa06b19c900122117..0f855deea4b2d4367885ac196f27732049514657 100644 (file)
@@ -1304,7 +1304,7 @@ do_migrate_range(unsigned long start_pfn, unsigned long end_pfn)
                        if (WARN_ON(PageLRU(page)))
                                isolate_lru_page(page);
                        if (page_mapped(page))
-                               try_to_unmap(page, TTU_IGNORE_MLOCK | TTU_IGNORE_ACCESS);
+                               try_to_unmap(page, TTU_IGNORE_MLOCK);
                        continue;
                }
 
index 5795cb82e27c35ceb0df155165f91ef467ebcfbf..8ea0c65f10756efeb3d2e1cc9e02945dd95b37aa 100644 (file)
@@ -1122,8 +1122,7 @@ static int __unmap_and_move(struct page *page, struct page *newpage,
                /* Establish migration ptes */
                VM_BUG_ON_PAGE(PageAnon(page) && !PageKsm(page) && !anon_vma,
                                page);
-               try_to_unmap(page,
-                       TTU_MIGRATION|TTU_IGNORE_MLOCK|TTU_IGNORE_ACCESS);
+               try_to_unmap(page, TTU_MIGRATION|TTU_IGNORE_MLOCK);
                page_was_mapped = 1;
        }
 
@@ -1329,8 +1328,7 @@ static int unmap_and_move_huge_page(new_page_t get_new_page,
 
        if (page_mapped(hpage)) {
                bool mapping_locked = false;
-               enum ttu_flags ttu = TTU_MIGRATION|TTU_IGNORE_MLOCK|
-                                       TTU_IGNORE_ACCESS;
+               enum ttu_flags ttu = TTU_MIGRATION|TTU_IGNORE_MLOCK;
 
                if (!PageAnon(hpage)) {
                        /*
@@ -2688,7 +2686,7 @@ static void migrate_vma_prepare(struct migrate_vma *migrate)
  */
 static void migrate_vma_unmap(struct migrate_vma *migrate)
 {
-       int flags = TTU_MIGRATION | TTU_IGNORE_MLOCK | TTU_IGNORE_ACCESS;
+       int flags = TTU_MIGRATION | TTU_IGNORE_MLOCK;
        const unsigned long npages = migrate->npages;
        const unsigned long start = migrate->start;
        unsigned long addr, i, restore = 0;
index 31b29321adfe1e3bd1b804ea2f0c910d47f52693..6657000b18d41bf5db76845401233d22ec8fd3aa 100644 (file)
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -1533,15 +1533,6 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
                        goto discard;
                }
 
-               if (!(flags & TTU_IGNORE_ACCESS)) {
-                       if (ptep_clear_flush_young_notify(vma, address,
-                                               pvmw.pte)) {
-                               ret = false;
-                               page_vma_mapped_walk_done(&pvmw);
-                               break;
-                       }
-               }
-
                /* Nuke the page table entry. */
                flush_cache_page(vma, address, pte_pfn(*pvmw.pte));
                if (should_defer_flush(mm, flags)) {
index 7b4e31eac2cff1bfdb80fe9468df0db23e29336d..0ec6321e98878ca6f62c73234d27f2bdd7166331 100644 (file)
@@ -1072,7 +1072,6 @@ static void page_check_dirty_writeback(struct page *page,
 static unsigned int shrink_page_list(struct list_head *page_list,
                                     struct pglist_data *pgdat,
                                     struct scan_control *sc,
-                                    enum ttu_flags ttu_flags,
                                     struct reclaim_stat *stat,
                                     bool ignore_references)
 {
@@ -1297,7 +1296,7 @@ static unsigned int shrink_page_list(struct list_head *page_list,
                 * processes. Try to unmap it here.
                 */
                if (page_mapped(page)) {
-                       enum ttu_flags flags = ttu_flags | TTU_BATCH_FLUSH;
+                       enum ttu_flags flags = TTU_BATCH_FLUSH;
                        bool was_swapbacked = PageSwapBacked(page);
 
                        if (unlikely(PageTransHuge(page)))
@@ -1514,7 +1513,7 @@ unsigned int reclaim_clean_pages_from_list(struct zone *zone,
        }
 
        nr_reclaimed = shrink_page_list(&clean_pages, zone->zone_pgdat, &sc,
-                       TTU_IGNORE_ACCESS, &stat, true);
+                                       &stat, true);
        list_splice(&clean_pages, page_list);
        mod_node_page_state(zone->zone_pgdat, NR_ISOLATED_FILE,
                            -(long)nr_reclaimed);
@@ -1958,8 +1957,7 @@ shrink_inactive_list(unsigned long nr_to_scan, struct lruvec *lruvec,
        if (nr_taken == 0)
                return 0;
 
-       nr_reclaimed = shrink_page_list(&page_list, pgdat, sc, 0,
-                               &stat, false);
+       nr_reclaimed = shrink_page_list(&page_list, pgdat, sc, &stat, false);
 
        spin_lock_irq(&pgdat->lru_lock);
 
@@ -2131,8 +2129,7 @@ unsigned long reclaim_pages(struct list_head *page_list)
 
                nr_reclaimed += shrink_page_list(&node_page_list,
                                                NODE_DATA(nid),
-                                               &sc, 0,
-                                               &dummy_stat, false);
+                                               &sc, &dummy_stat, false);
                while (!list_empty(&node_page_list)) {
                        page = lru_to_page(&node_page_list);
                        list_del(&page->lru);
@@ -2145,8 +2142,7 @@ unsigned long reclaim_pages(struct list_head *page_list)
        if (!list_empty(&node_page_list)) {
                nr_reclaimed += shrink_page_list(&node_page_list,
                                                NODE_DATA(nid),
-                                               &sc, 0,
-                                               &dummy_stat, false);
+                                               &sc, &dummy_stat, false);
                while (!list_empty(&node_page_list)) {
                        page = lru_to_page(&node_page_list);
                        list_del(&page->lru);