X-Git-Url: http://review.tizen.org/git/?a=blobdiff_plain;f=mm%2Fvmscan.c;h=463990941a78bdfcaabea5682e2e4336aa3ab39d;hb=a394cb8ee632ec5edce20309901ec66767497a43;hp=196709f5ee5862753f5f3731bdeab58c2c45c323;hpb=45acc86b2e47e255c4a6194dd69248a1bce71e48;p=platform%2Fadaptation%2Frenesas_rcar%2Frenesas_kernel.git diff --git a/mm/vmscan.c b/mm/vmscan.c index 196709f..4639909 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -1638,6 +1638,13 @@ static int vmscan_swappiness(struct scan_control *sc) return mem_cgroup_swappiness(sc->target_mem_cgroup); } +enum scan_balance { + SCAN_EQUAL, + SCAN_FRACT, + SCAN_ANON, + SCAN_FILE, +}; + /* * Determine how aggressively the anon and file LRU lists should be * scanned. The relative value of each set of LRU lists is determined @@ -1650,15 +1657,16 @@ static int vmscan_swappiness(struct scan_control *sc) static void get_scan_count(struct lruvec *lruvec, struct scan_control *sc, unsigned long *nr) { - unsigned long anon, file, free; + struct zone_reclaim_stat *reclaim_stat = &lruvec->reclaim_stat; + u64 fraction[2]; + u64 denominator = 0; /* gcc */ + struct zone *zone = lruvec_zone(lruvec); unsigned long anon_prio, file_prio; + enum scan_balance scan_balance; + unsigned long anon, file, free; + bool force_scan = false; unsigned long ap, fp; - struct zone_reclaim_stat *reclaim_stat = &lruvec->reclaim_stat; - u64 fraction[2], denominator; enum lru_list lru; - int noswap = 0; - bool force_scan = false; - struct zone *zone = lruvec_zone(lruvec); /* * If the zone or memcg is small, nr[l] can be 0. This @@ -1677,10 +1685,29 @@ static void get_scan_count(struct lruvec *lruvec, struct scan_control *sc, /* If we have no swap space, do not bother scanning anon pages. */ if (!sc->may_swap || (nr_swap_pages <= 0)) { - noswap = 1; - fraction[0] = 0; - fraction[1] = 1; - denominator = 1; + scan_balance = SCAN_FILE; + goto out; + } + + /* + * Global reclaim will swap to prevent OOM even with no + * swappiness, but memcg users want to use this knob to + * disable swapping for individual groups completely when + * using the memory controller's swap limit feature would be + * too expensive. + */ + if (!global_reclaim(sc) && !vmscan_swappiness(sc)) { + scan_balance = SCAN_FILE; + goto out; + } + + /* + * Do not apply any pressure balancing cleverness when the + * system is close to OOM, scan both anon and file equally + * (unless the swappiness setting disagrees with swapping). + */ + if (!sc->priority && vmscan_swappiness(sc)) { + scan_balance = SCAN_EQUAL; goto out; } @@ -1689,30 +1716,32 @@ static void get_scan_count(struct lruvec *lruvec, struct scan_control *sc, file = get_lru_size(lruvec, LRU_ACTIVE_FILE) + get_lru_size(lruvec, LRU_INACTIVE_FILE); + /* + * If it's foreseeable that reclaiming the file cache won't be + * enough to get the zone back into a desirable shape, we have + * to swap. Better start now and leave the - probably heavily + * thrashing - remaining file pages alone. + */ if (global_reclaim(sc)) { - free = zone_page_state(zone, NR_FREE_PAGES); + free = zone_page_state(zone, NR_FREE_PAGES); if (unlikely(file + free <= high_wmark_pages(zone))) { - /* - * If we have very few page cache pages, force-scan - * anon pages. - */ - fraction[0] = 1; - fraction[1] = 0; - denominator = 1; - goto out; - } else if (!inactive_file_is_low_global(zone)) { - /* - * There is enough inactive page cache, do not - * reclaim anything from the working set right now. - */ - fraction[0] = 0; - fraction[1] = 1; - denominator = 1; + scan_balance = SCAN_ANON; goto out; } } /* + * There is enough inactive page cache, do not reclaim + * anything from the anonymous working set right now. + */ + if (!inactive_file_is_low(lruvec)) { + scan_balance = SCAN_FILE; + goto out; + } + + scan_balance = SCAN_FRACT; + + /* * With swappiness at 100, anonymous and file have the same priority. * This scanning priority is essentially the inverse of IO cost. */ @@ -1759,19 +1788,92 @@ static void get_scan_count(struct lruvec *lruvec, struct scan_control *sc, out: for_each_evictable_lru(lru) { int file = is_file_lru(lru); + unsigned long size; unsigned long scan; - scan = get_lru_size(lruvec, lru); - if (sc->priority || noswap || !vmscan_swappiness(sc)) { - scan >>= sc->priority; - if (!scan && force_scan) - scan = SWAP_CLUSTER_MAX; + size = get_lru_size(lruvec, lru); + scan = size >> sc->priority; + + if (!scan && force_scan) + scan = min(size, SWAP_CLUSTER_MAX); + + switch (scan_balance) { + case SCAN_EQUAL: + /* Scan lists relative to size */ + break; + case SCAN_FRACT: + /* + * Scan types proportional to swappiness and + * their relative recent reclaim efficiency. + */ scan = div64_u64(scan * fraction[file], denominator); + break; + case SCAN_FILE: + case SCAN_ANON: + /* Scan one type exclusively */ + if ((scan_balance == SCAN_FILE) != file) + scan = 0; + break; + default: + /* Look ma, no brain */ + BUG(); } nr[lru] = scan; } } +/* + * This is a basic per-zone page freer. Used by both kswapd and direct reclaim. + */ +static void shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc) +{ + unsigned long nr[NR_LRU_LISTS]; + unsigned long nr_to_scan; + enum lru_list lru; + unsigned long nr_reclaimed = 0; + unsigned long nr_to_reclaim = sc->nr_to_reclaim; + struct blk_plug plug; + + get_scan_count(lruvec, sc, nr); + + blk_start_plug(&plug); + while (nr[LRU_INACTIVE_ANON] || nr[LRU_ACTIVE_FILE] || + nr[LRU_INACTIVE_FILE]) { + for_each_evictable_lru(lru) { + if (nr[lru]) { + nr_to_scan = min(nr[lru], SWAP_CLUSTER_MAX); + nr[lru] -= nr_to_scan; + + nr_reclaimed += shrink_list(lru, nr_to_scan, + lruvec, sc); + } + } + /* + * On large memory systems, scan >> priority can become + * really large. This is fine for the starting priority; + * we want to put equal scanning pressure on each zone. + * However, if the VM has a harder time of freeing pages, + * with multiple processes reclaiming pages, the total + * freeing target can get unreasonably large. + */ + if (nr_reclaimed >= nr_to_reclaim && + sc->priority < DEF_PRIORITY) + break; + } + blk_finish_plug(&plug); + sc->nr_reclaimed += nr_reclaimed; + + /* + * Even if we did not try to evict anon pages at all, we want to + * rebalance the anon lru active/inactive ratio. + */ + if (inactive_anon_is_low(lruvec)) + shrink_active_list(SWAP_CLUSTER_MAX, lruvec, + sc, LRU_ACTIVE_ANON); + + throttle_vm_writeout(sc->gfp_mask); +} + /* Use reclaim/compaction for costly allocs or under memory pressure */ static bool in_reclaim_compaction(struct scan_control *sc) { @@ -1790,7 +1892,7 @@ static bool in_reclaim_compaction(struct scan_control *sc) * calls try_to_compact_zone() that it will have enough free pages to succeed. * It will give up earlier than that if there is difficulty reclaiming pages. */ -static inline bool should_continue_reclaim(struct lruvec *lruvec, +static inline bool should_continue_reclaim(struct zone *zone, unsigned long nr_reclaimed, unsigned long nr_scanned, struct scan_control *sc) @@ -1830,15 +1932,15 @@ static inline bool should_continue_reclaim(struct lruvec *lruvec, * inactive lists are large enough, continue reclaiming */ pages_for_compaction = (2UL << sc->order); - inactive_lru_pages = get_lru_size(lruvec, LRU_INACTIVE_FILE); + inactive_lru_pages = zone_page_state(zone, NR_INACTIVE_FILE); if (nr_swap_pages > 0) - inactive_lru_pages += get_lru_size(lruvec, LRU_INACTIVE_ANON); + inactive_lru_pages += zone_page_state(zone, NR_INACTIVE_ANON); if (sc->nr_reclaimed < pages_for_compaction && inactive_lru_pages > pages_for_compaction) return true; /* If compaction would go ahead or the allocation would succeed, stop */ - switch (compaction_suitable(lruvec_zone(lruvec), sc->order)) { + switch (compaction_suitable(zone, sc->order)) { case COMPACT_PARTIAL: case COMPACT_CONTINUE: return false; @@ -1847,98 +1949,48 @@ static inline bool should_continue_reclaim(struct lruvec *lruvec, } } -/* - * This is a basic per-zone page freer. Used by both kswapd and direct reclaim. - */ -static void shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc) +static void shrink_zone(struct zone *zone, struct scan_control *sc) { - unsigned long nr[NR_LRU_LISTS]; - unsigned long nr_to_scan; - enum lru_list lru; unsigned long nr_reclaimed, nr_scanned; - unsigned long nr_to_reclaim = sc->nr_to_reclaim; - struct blk_plug plug; - -restart: - nr_reclaimed = 0; - nr_scanned = sc->nr_scanned; - get_scan_count(lruvec, sc, nr); - - blk_start_plug(&plug); - while (nr[LRU_INACTIVE_ANON] || nr[LRU_ACTIVE_FILE] || - nr[LRU_INACTIVE_FILE]) { - for_each_evictable_lru(lru) { - if (nr[lru]) { - nr_to_scan = min_t(unsigned long, - nr[lru], SWAP_CLUSTER_MAX); - nr[lru] -= nr_to_scan; - - nr_reclaimed += shrink_list(lru, nr_to_scan, - lruvec, sc); - } - } - /* - * On large memory systems, scan >> priority can become - * really large. This is fine for the starting priority; - * we want to put equal scanning pressure on each zone. - * However, if the VM has a harder time of freeing pages, - * with multiple processes reclaiming pages, the total - * freeing target can get unreasonably large. - */ - if (nr_reclaimed >= nr_to_reclaim && - sc->priority < DEF_PRIORITY) - break; - } - blk_finish_plug(&plug); - sc->nr_reclaimed += nr_reclaimed; - /* - * Even if we did not try to evict anon pages at all, we want to - * rebalance the anon lru active/inactive ratio. - */ - if (inactive_anon_is_low(lruvec)) - shrink_active_list(SWAP_CLUSTER_MAX, lruvec, - sc, LRU_ACTIVE_ANON); - - /* reclaim/compaction might need reclaim to continue */ - if (should_continue_reclaim(lruvec, nr_reclaimed, - sc->nr_scanned - nr_scanned, sc)) - goto restart; + do { + struct mem_cgroup *root = sc->target_mem_cgroup; + struct mem_cgroup_reclaim_cookie reclaim = { + .zone = zone, + .priority = sc->priority, + }; + struct mem_cgroup *memcg; - throttle_vm_writeout(sc->gfp_mask); -} + nr_reclaimed = sc->nr_reclaimed; + nr_scanned = sc->nr_scanned; -static void shrink_zone(struct zone *zone, struct scan_control *sc) -{ - struct mem_cgroup *root = sc->target_mem_cgroup; - struct mem_cgroup_reclaim_cookie reclaim = { - .zone = zone, - .priority = sc->priority, - }; - struct mem_cgroup *memcg; + memcg = mem_cgroup_iter(root, NULL, &reclaim); + do { + struct lruvec *lruvec; - memcg = mem_cgroup_iter(root, NULL, &reclaim); - do { - struct lruvec *lruvec = mem_cgroup_zone_lruvec(zone, memcg); + lruvec = mem_cgroup_zone_lruvec(zone, memcg); - shrink_lruvec(lruvec, sc); + shrink_lruvec(lruvec, sc); - /* - * Limit reclaim has historically picked one memcg and - * scanned it with decreasing priority levels until - * nr_to_reclaim had been reclaimed. This priority - * cycle is thus over after a single memcg. - * - * Direct reclaim and kswapd, on the other hand, have - * to scan all memory cgroups to fulfill the overall - * scan target for the zone. - */ - if (!global_reclaim(sc)) { - mem_cgroup_iter_break(root, memcg); - break; - } - memcg = mem_cgroup_iter(root, memcg, &reclaim); - } while (memcg); + /* + * Direct reclaim and kswapd have to scan all memory + * cgroups to fulfill the overall scan target for the + * zone. + * + * Limit reclaim, on the other hand, only cares about + * nr_to_reclaim pages to be reclaimed and it will + * retry with decreasing priority if one round over the + * whole hierarchy is not sufficient. + */ + if (!global_reclaim(sc) && + sc->nr_reclaimed >= sc->nr_to_reclaim) { + mem_cgroup_iter_break(root, memcg); + break; + } + memcg = mem_cgroup_iter(root, memcg, &reclaim); + } while (memcg); + } while (should_continue_reclaim(zone, sc->nr_reclaimed - nr_reclaimed, + sc->nr_scanned - nr_scanned, sc)); } /* Returns true if compaction should go ahead for a high-order request */ @@ -3280,8 +3332,7 @@ static int __zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order) .may_writepage = !!(zone_reclaim_mode & RECLAIM_WRITE), .may_unmap = !!(zone_reclaim_mode & RECLAIM_SWAP), .may_swap = 1, - .nr_to_reclaim = max_t(unsigned long, nr_pages, - SWAP_CLUSTER_MAX), + .nr_to_reclaim = max(nr_pages, SWAP_CLUSTER_MAX), .gfp_mask = gfp_mask, .order = order, .priority = ZONE_RECLAIM_PRIORITY,