BACKPORT: mm: multi-gen LRU: exploit locality in rmap
[platform/kernel/linux-rpi.git] / mm / vmscan.c
index b10eda9..bac5931 100644 (file)
@@ -1124,6 +1124,11 @@ static unsigned long shrink_page_list(struct list_head *page_list,
                if (!sc->may_unmap && page_mapped(page))
                        goto keep_locked;
 
+               /* page_update_gen() tried to promote this page? */
+               if (lru_gen_enabled() && !ignore_references &&
+                   page_mapped(page) && PageReferenced(page))
+                       goto keep_locked;
+
                may_enter_fs = (sc->gfp_mask & __GFP_FS) ||
                        (PageSwapCache(page) && (sc->gfp_mask & __GFP_IO));
 
@@ -2699,6 +2704,29 @@ static bool positive_ctrl_err(struct ctrl_pos *sp, struct ctrl_pos *pv)
  *                          the aging
  ******************************************************************************/
 
+/* promote pages accessed through page tables */
+static int page_update_gen(struct page *page, int gen)
+{
+       unsigned long new_flags, old_flags = READ_ONCE(page->flags);
+
+       VM_WARN_ON_ONCE(gen >= MAX_NR_GENS);
+       VM_WARN_ON_ONCE(!rcu_read_lock_held());
+
+       do {
+               /* lru_gen_del_page() has isolated this page? */
+               if (!(old_flags & LRU_GEN_MASK)) {
+                       /* for shrink_page_list() */
+                       new_flags = old_flags | BIT(PG_referenced);
+                       continue;
+               }
+
+               new_flags = old_flags & ~(LRU_GEN_MASK | LRU_REFS_MASK | LRU_REFS_FLAGS);
+               new_flags |= (gen + 1UL) << LRU_GEN_PGOFF;
+       } while (!try_cmpxchg(&page->flags, &old_flags, new_flags));
+
+       return ((old_flags & LRU_GEN_MASK) >> LRU_GEN_PGOFF) - 1;
+}
+
 /* protect pages accessed multiple times through file descriptors */
 static int page_inc_gen(struct lruvec *lruvec, struct page *page, bool reclaiming)
 {
@@ -2710,6 +2738,11 @@ static int page_inc_gen(struct lruvec *lruvec, struct page *page, bool reclaimin
        VM_WARN_ON_ONCE_PAGE(!(old_flags & LRU_GEN_MASK), page);
 
        do {
+               new_gen = ((old_flags & LRU_GEN_MASK) >> LRU_GEN_PGOFF) - 1;
+               /* page_update_gen() has promoted this page? */
+               if (new_gen >= 0 && new_gen != old_gen)
+                       return new_gen;
+
                new_gen = (old_gen + 1) % MAX_NR_GENS;
 
                new_flags = old_flags & ~(LRU_GEN_MASK | LRU_REFS_MASK | LRU_REFS_FLAGS);
@@ -2724,6 +2757,43 @@ static int page_inc_gen(struct lruvec *lruvec, struct page *page, bool reclaimin
        return new_gen;
 }
 
+static unsigned long get_pte_pfn(pte_t pte, struct vm_area_struct *vma, unsigned long addr)
+{
+       unsigned long pfn = pte_pfn(pte);
+
+       VM_WARN_ON_ONCE(addr < vma->vm_start || addr >= vma->vm_end);
+
+       if (!pte_present(pte) || is_zero_pfn(pfn))
+               return -1;
+
+       if (WARN_ON_ONCE(pte_devmap(pte) || pte_special(pte)))
+               return -1;
+
+       if (WARN_ON_ONCE(!pfn_valid(pfn)))
+               return -1;
+
+       return pfn;
+}
+
+static struct page *get_pfn_page(unsigned long pfn, struct mem_cgroup *memcg,
+                                struct pglist_data *pgdat)
+{
+       struct page *page;
+
+       /* try to avoid unnecessary memory loads */
+       if (pfn < pgdat->node_start_pfn || pfn >= pgdat_end_pfn(pgdat))
+               return NULL;
+
+       page = compound_head(pfn_to_page(pfn));
+       if (page_to_nid(page) != pgdat->node_id)
+               return NULL;
+
+       if (page_memcg_rcu(page) != memcg)
+               return NULL;
+
+       return page;
+}
+
 static void inc_min_seq(struct lruvec *lruvec, int type)
 {
        struct lru_gen_struct *lrugen = &lruvec->lrugen;
@@ -2923,6 +2993,114 @@ static void lru_gen_age_node(struct pglist_data *pgdat, struct scan_control *sc)
        } while ((memcg = mem_cgroup_iter(NULL, memcg, NULL)));
 }
 
+/*
+ * This function exploits spatial locality when shrink_page_list() walks the
+ * rmap. It scans the adjacent PTEs of a young PTE and promotes hot pages.
+ */
+void lru_gen_look_around(struct page_vma_mapped_walk *pvmw)
+{
+       int i;
+       pte_t *pte;
+       unsigned long start;
+       unsigned long end;
+       unsigned long addr;
+       unsigned long bitmap[BITS_TO_LONGS(MIN_LRU_BATCH)] = {};
+       struct page *page = pvmw->page;
+       struct mem_cgroup *memcg = page_memcg(page);
+       struct pglist_data *pgdat = page_pgdat(page);
+       struct lruvec *lruvec = mem_cgroup_lruvec(memcg, pgdat);
+       DEFINE_MAX_SEQ(lruvec);
+       int old_gen, new_gen = lru_gen_from_seq(max_seq);
+
+       lockdep_assert_held(pvmw->ptl);
+       VM_WARN_ON_ONCE_PAGE(PageLRU(page), page);
+
+       if (spin_is_contended(pvmw->ptl))
+               return;
+
+       start = max(pvmw->address & PMD_MASK, pvmw->vma->vm_start);
+       end = min(pvmw->address | ~PMD_MASK, pvmw->vma->vm_end - 1) + 1;
+
+       if (end - start > MIN_LRU_BATCH * PAGE_SIZE) {
+               if (pvmw->address - start < MIN_LRU_BATCH * PAGE_SIZE / 2)
+                       end = start + MIN_LRU_BATCH * PAGE_SIZE;
+               else if (end - pvmw->address < MIN_LRU_BATCH * PAGE_SIZE / 2)
+                       start = end - MIN_LRU_BATCH * PAGE_SIZE;
+               else {
+                       start = pvmw->address - MIN_LRU_BATCH * PAGE_SIZE / 2;
+                       end = pvmw->address + MIN_LRU_BATCH * PAGE_SIZE / 2;
+               }
+       }
+
+       pte = pvmw->pte - (pvmw->address - start) / PAGE_SIZE;
+
+       rcu_read_lock();
+       arch_enter_lazy_mmu_mode();
+
+       for (i = 0, addr = start; addr != end; i++, addr += PAGE_SIZE) {
+               unsigned long pfn;
+
+               pfn = get_pte_pfn(pte[i], pvmw->vma, addr);
+               if (pfn == -1)
+                       continue;
+
+               if (!pte_young(pte[i]))
+                       continue;
+
+               page = get_pfn_page(pfn, memcg, pgdat);
+               if (!page)
+                       continue;
+
+               if (!ptep_test_and_clear_young(pvmw->vma, addr, pte + i))
+                       VM_WARN_ON_ONCE(true);
+
+               if (pte_dirty(pte[i]) && !PageDirty(page) &&
+                   !(PageAnon(page) && PageSwapBacked(page) &&
+                     !PageSwapCache(page)))
+                       set_page_dirty(page);
+
+               old_gen = page_lru_gen(page);
+               if (old_gen < 0)
+                       SetPageReferenced(page);
+               else if (old_gen != new_gen)
+                       __set_bit(i, bitmap);
+       }
+
+       arch_leave_lazy_mmu_mode();
+       rcu_read_unlock();
+
+       if (bitmap_weight(bitmap, MIN_LRU_BATCH) < PAGEVEC_SIZE) {
+               for_each_set_bit(i, bitmap, MIN_LRU_BATCH) {
+                       page = pte_page(pte[i]);
+                       activate_page(page);
+               }
+               return;
+       }
+
+       /* page_update_gen() requires stable page_memcg() */
+       if (!mem_cgroup_trylock_pages(memcg))
+               return;
+
+       spin_lock_irq(&lruvec_pgdat(lruvec)->lru_lock);
+       new_gen = lru_gen_from_seq(lruvec->lrugen.max_seq);
+
+       for_each_set_bit(i, bitmap, MIN_LRU_BATCH) {
+               page = compound_head(pte_page(pte[i]));
+               if (page_memcg_rcu(page) != memcg)
+                       continue;
+
+               old_gen = page_update_gen(page, new_gen);
+               if (old_gen < 0 || old_gen == new_gen)
+                       continue;
+
+               lru_gen_update_size(lruvec, page, old_gen, new_gen);
+       }
+
+       spin_unlock_irq(&lruvec_pgdat(lruvec)->lru_lock);
+
+       mem_cgroup_unlock_pages();
+}
+
 /******************************************************************************
  *                          the eviction
  ******************************************************************************/
@@ -2961,6 +3139,12 @@ static bool sort_page(struct lruvec *lruvec, struct page *page, int tier_idx)
                return true;
        }
 
+       /* promoted */
+       if (gen != lru_gen_from_seq(lrugen->min_seq[type])) {
+               list_move(&page->lru, &lrugen->lists[gen][type][zone]);
+               return true;
+       }
+
        /* protected */
        if (tier > tier_idx) {
                int hist = lru_hist_from_seq(lrugen->min_seq[type]);