X-Git-Url: http://review.tizen.org/git/?a=blobdiff_plain;f=mm%2Fvmscan.c;h=0dfb9a75dfa6d00dafbd2f9d1253f7fe57a2aa8d;hb=24d86b009af62566a22cc58ffb3a099bca4b1ce7;hp=6f13394b112eaea798ca50ff97fe5efa52747a3e;hpb=7cf4bea77ab60742c128c2ceb4b1b8078887b823;p=platform%2Fkernel%2Flinux-rpi.git diff --git a/mm/vmscan.c b/mm/vmscan.c index 6f13394..0dfb9a7 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -4656,6 +4656,7 @@ void lru_gen_look_around(struct page_vma_mapped_walk *pvmw) int young = 0; pte_t *pte = pvmw->pte; unsigned long addr = pvmw->address; + struct vm_area_struct *vma = pvmw->vma; struct folio *folio = pfn_folio(pvmw->pfn); bool can_swap = !folio_is_file_lru(folio); struct mem_cgroup *memcg = folio_memcg(folio); @@ -4670,11 +4671,15 @@ void lru_gen_look_around(struct page_vma_mapped_walk *pvmw) if (spin_is_contended(pvmw->ptl)) return; + /* exclude special VMAs containing anon pages from COW */ + if (vma->vm_flags & VM_SPECIAL) + return; + /* avoid taking the LRU lock under the PTL when possible */ walk = current->reclaim_state ? current->reclaim_state->mm_walk : NULL; - start = max(addr & PMD_MASK, pvmw->vma->vm_start); - end = min(addr | ~PMD_MASK, pvmw->vma->vm_end - 1) + 1; + start = max(addr & PMD_MASK, vma->vm_start); + end = min(addr | ~PMD_MASK, vma->vm_end - 1) + 1; if (end - start > MIN_LRU_BATCH * PAGE_SIZE) { if (addr - start < MIN_LRU_BATCH * PAGE_SIZE / 2) @@ -4699,7 +4704,7 @@ void lru_gen_look_around(struct page_vma_mapped_walk *pvmw) unsigned long pfn; pte_t ptent = ptep_get(pte + i); - pfn = get_pte_pfn(ptent, pvmw->vma, addr); + pfn = get_pte_pfn(ptent, vma, addr); if (pfn == -1) continue; @@ -4710,7 +4715,7 @@ void lru_gen_look_around(struct page_vma_mapped_walk *pvmw) if (!folio) continue; - if (!ptep_test_and_clear_young(pvmw->vma, addr, pte + i)) + if (!ptep_test_and_clear_young(vma, addr, pte + i)) VM_WARN_ON_ONCE(true); young++; @@ -4790,6 +4795,9 @@ static void lru_gen_rotate_memcg(struct lruvec *lruvec, int op) else VM_WARN_ON_ONCE(true); + WRITE_ONCE(lruvec->lrugen.seg, seg); + WRITE_ONCE(lruvec->lrugen.gen, new); + hlist_nulls_del_rcu(&lruvec->lrugen.list); if (op == MEMCG_LRU_HEAD || op == MEMCG_LRU_OLD) @@ -4800,9 +4808,6 @@ static void lru_gen_rotate_memcg(struct lruvec *lruvec, int op) pgdat->memcg_lru.nr_memcgs[old]--; pgdat->memcg_lru.nr_memcgs[new]++; - lruvec->lrugen.gen = new; - WRITE_ONCE(lruvec->lrugen.seg, seg); - if (!pgdat->memcg_lru.nr_memcgs[old] && old == get_memcg_gen(pgdat->memcg_lru.seq)) WRITE_ONCE(pgdat->memcg_lru.seq, pgdat->memcg_lru.seq + 1); @@ -4825,11 +4830,11 @@ void lru_gen_online_memcg(struct mem_cgroup *memcg) gen = get_memcg_gen(pgdat->memcg_lru.seq); + lruvec->lrugen.gen = gen; + hlist_nulls_add_tail_rcu(&lruvec->lrugen.list, &pgdat->memcg_lru.fifo[gen][bin]); pgdat->memcg_lru.nr_memcgs[gen]++; - lruvec->lrugen.gen = gen; - spin_unlock_irq(&pgdat->memcg_lru.lock); } } @@ -4933,7 +4938,7 @@ static bool sort_folio(struct lruvec *lruvec, struct folio *folio, struct scan_c } /* protected */ - if (tier > tier_idx) { + if (tier > tier_idx || refs == BIT(LRU_REFS_WIDTH)) { int hist = lru_hist_from_seq(lrugen->min_seq[type]); gen = folio_inc_gen(lruvec, folio, false); @@ -5291,7 +5296,12 @@ static bool should_run_aging(struct lruvec *lruvec, unsigned long max_seq, } /* try to scrape all its memory if this memcg was deleted */ - *nr_to_scan = mem_cgroup_online(memcg) ? (total >> sc->priority) : total; + if (!mem_cgroup_online(memcg)) { + *nr_to_scan = total; + return false; + } + + *nr_to_scan = total >> sc->priority; /* * The aging tries to be lazy to reduce the overhead, while the eviction @@ -5328,7 +5338,7 @@ static long get_nr_to_scan(struct lruvec *lruvec, struct scan_control *sc, bool DEFINE_MAX_SEQ(lruvec); if (mem_cgroup_below_min(sc->target_mem_cgroup, memcg)) - return 0; + return -1; if (!should_run_aging(lruvec, max_seq, sc, can_swap, &nr_to_scan)) return nr_to_scan; @@ -5341,20 +5351,41 @@ static long get_nr_to_scan(struct lruvec *lruvec, struct scan_control *sc, bool return try_to_inc_max_seq(lruvec, max_seq, sc, can_swap, false) ? -1 : 0; } -static unsigned long get_nr_to_reclaim(struct scan_control *sc) +static bool should_abort_scan(struct lruvec *lruvec, struct scan_control *sc) { + int i; + enum zone_watermarks mark; + /* don't abort memcg reclaim to ensure fairness */ if (!root_reclaim(sc)) - return -1; + return false; + + if (sc->nr_reclaimed >= max(sc->nr_to_reclaim, compact_gap(sc->order))) + return true; + + /* check the order to exclude compaction-induced reclaim */ + if (!current_is_kswapd() || sc->order) + return false; + + mark = sysctl_numa_balancing_mode & NUMA_BALANCING_MEMORY_TIERING ? + WMARK_PROMO : WMARK_HIGH; - return max(sc->nr_to_reclaim, compact_gap(sc->order)); + for (i = 0; i <= sc->reclaim_idx; i++) { + struct zone *zone = lruvec_pgdat(lruvec)->node_zones + i; + unsigned long size = wmark_pages(zone, mark) + MIN_LRU_BATCH; + + if (managed_zone(zone) && !zone_watermark_ok(zone, 0, size, sc->reclaim_idx, 0)) + return false; + } + + /* kswapd should abort if all eligible zones are safe */ + return true; } static bool try_to_shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc) { long nr_to_scan; unsigned long scanned = 0; - unsigned long nr_to_reclaim = get_nr_to_reclaim(sc); int swappiness = get_swappiness(lruvec, sc); /* clean file folios are more likely to exist */ @@ -5376,13 +5407,13 @@ static bool try_to_shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc) if (scanned >= nr_to_scan) break; - if (sc->nr_reclaimed >= nr_to_reclaim) + if (should_abort_scan(lruvec, sc)) break; cond_resched(); } - /* whether try_to_inc_max_seq() was successful */ + /* whether this lruvec should be rotated */ return nr_to_scan < 0; } @@ -5391,14 +5422,9 @@ static int shrink_one(struct lruvec *lruvec, struct scan_control *sc) bool success; unsigned long scanned = sc->nr_scanned; unsigned long reclaimed = sc->nr_reclaimed; - int seg = lru_gen_memcg_seg(lruvec); struct mem_cgroup *memcg = lruvec_memcg(lruvec); struct pglist_data *pgdat = lruvec_pgdat(lruvec); - /* see the comment on MEMCG_NR_GENS */ - if (!lruvec_is_sizable(lruvec, sc)) - return seg != MEMCG_LRU_TAIL ? MEMCG_LRU_TAIL : MEMCG_LRU_YOUNG; - mem_cgroup_calculate_protection(NULL, memcg); if (mem_cgroup_below_min(NULL, memcg)) @@ -5406,7 +5432,7 @@ static int shrink_one(struct lruvec *lruvec, struct scan_control *sc) if (mem_cgroup_below_low(NULL, memcg)) { /* see the comment on MEMCG_NR_GENS */ - if (seg != MEMCG_LRU_TAIL) + if (lru_gen_memcg_seg(lruvec) != MEMCG_LRU_TAIL) return MEMCG_LRU_TAIL; memcg_memory_event(memcg, MEMCG_LOW); @@ -5422,7 +5448,15 @@ static int shrink_one(struct lruvec *lruvec, struct scan_control *sc) flush_reclaim_state(sc); - return success ? MEMCG_LRU_YOUNG : 0; + if (success && mem_cgroup_online(memcg)) + return MEMCG_LRU_YOUNG; + + if (!success && lruvec_is_sizable(lruvec, sc)) + return 0; + + /* one retry if offlined or too small */ + return lru_gen_memcg_seg(lruvec) != MEMCG_LRU_TAIL ? + MEMCG_LRU_TAIL : MEMCG_LRU_YOUNG; } #ifdef CONFIG_MEMCG @@ -5436,14 +5470,13 @@ static void shrink_many(struct pglist_data *pgdat, struct scan_control *sc) struct lruvec *lruvec; struct lru_gen_folio *lrugen; struct mem_cgroup *memcg; - const struct hlist_nulls_node *pos; - unsigned long nr_to_reclaim = get_nr_to_reclaim(sc); + struct hlist_nulls_node *pos; + gen = get_memcg_gen(READ_ONCE(pgdat->memcg_lru.seq)); bin = first_bin = get_random_u32_below(MEMCG_NR_BINS); restart: op = 0; memcg = NULL; - gen = get_memcg_gen(READ_ONCE(pgdat->memcg_lru.seq)); rcu_read_lock(); @@ -5454,6 +5487,10 @@ restart: } mem_cgroup_put(memcg); + memcg = NULL; + + if (gen != READ_ONCE(lrugen->gen)) + continue; lruvec = container_of(lrugen, struct lruvec, lrugen); memcg = lruvec_memcg(lruvec); @@ -5470,7 +5507,7 @@ restart: rcu_read_lock(); - if (sc->nr_reclaimed >= nr_to_reclaim) + if (should_abort_scan(lruvec, sc)) break; } @@ -5481,7 +5518,7 @@ restart: mem_cgroup_put(memcg); - if (sc->nr_reclaimed >= nr_to_reclaim) + if (!is_a_nulls(pos)) return; /* restart if raced with lru_gen_rotate_memcg() */ @@ -5538,16 +5575,14 @@ static void set_initial_priority(struct pglist_data *pgdat, struct scan_control if (sc->priority != DEF_PRIORITY || sc->nr_to_reclaim < MIN_LRU_BATCH) return; /* - * Determine the initial priority based on ((total / MEMCG_NR_GENS) >> - * priority) * reclaimed_to_scanned_ratio = nr_to_reclaim, where the - * estimated reclaimed_to_scanned_ratio = inactive / total. + * Determine the initial priority based on + * (total >> priority) * reclaimed_to_scanned_ratio = nr_to_reclaim, + * where reclaimed_to_scanned_ratio = inactive / total. */ reclaimable = node_page_state(pgdat, NR_INACTIVE_FILE); if (get_swappiness(lruvec, sc)) reclaimable += node_page_state(pgdat, NR_INACTIVE_ANON); - reclaimable /= MEMCG_NR_GENS; - /* round down reclaimable and round up sc->nr_to_reclaim */ priority = fls_long(reclaimable) - 1 - fls_long(sc->nr_to_reclaim - 1);