The global kswapd scans per-zone LRU and reclaims pages regardless of the
cgroup. It breaks memory isolation since one cgroup can end up reclaiming
pages from another cgroup. Instead we should rely on memcg-aware target
reclaim including per-memcg kswapd and soft_limit hierarchical reclaim under
memory pressure.
In the global background reclaim, we do soft reclaim before scanning the
per-zone LRU. However, the return value is ignored. This patch is the first
step to skip shrink_zone() if soft_limit reclaim does enough work.
This is part of the effort which tries to reduce reclaiming pages in global
LRU in memcg. The per-memcg background reclaim patchset further enhances the
per-cgroup targetting reclaim, which I should have V4 posted shortly.
Try running multiple memory intensive workloads within seperate memcgs. Watch
the counters of soft_steal in memory.stat.
$ cat /dev/cgroup/A/memory.stat | grep 'soft'
soft_steal 240000
soft_scan 240000
total_soft_steal 240000
total_soft_scan 240000
This patch:
In the global background reclaim, we do soft reclaim before scanning the
per-zone LRU. However, the return value is ignored.
We would like to skip shrink_zone() if soft_limit reclaim does enough
work. Also, we need to make the memory pressure balanced across per-memcg
zones, like the logic vm-core. This patch is the first step where we
start with counting the nr_scanned and nr_reclaimed from soft_limit
reclaim into the global scan_control.
Signed-off-by: Ying Han <yinghan@google.com>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Minchan Kim <minchan.kim@gmail.com>
Cc: Rik van Riel <riel@redhat.com>
Cc: Mel Gorman <mel@csn.ul.ie>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Balbir Singh <balbir@in.ibm.com>
Acked-by: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
}
unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order,
}
unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order,
+ gfp_t gfp_mask,
+ unsigned long *total_scanned);
u64 mem_cgroup_get_limit(struct mem_cgroup *mem);
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
u64 mem_cgroup_get_limit(struct mem_cgroup *mem);
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
static inline
unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order,
static inline
unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order,
+ gfp_t gfp_mask,
+ unsigned long *total_scanned)
extern unsigned long mem_cgroup_shrink_node_zone(struct mem_cgroup *mem,
gfp_t gfp_mask, bool noswap,
unsigned int swappiness,
extern unsigned long mem_cgroup_shrink_node_zone(struct mem_cgroup *mem,
gfp_t gfp_mask, bool noswap,
unsigned int swappiness,
+ struct zone *zone,
+ unsigned long *nr_scanned);
extern int __isolate_lru_page(struct page *page, int mode, int file);
extern unsigned long shrink_all_memory(unsigned long nr_pages);
extern int vm_swappiness;
extern int __isolate_lru_page(struct page *page, int mode, int file);
extern unsigned long shrink_all_memory(unsigned long nr_pages);
extern int vm_swappiness;
static int mem_cgroup_hierarchical_reclaim(struct mem_cgroup *root_mem,
struct zone *zone,
gfp_t gfp_mask,
static int mem_cgroup_hierarchical_reclaim(struct mem_cgroup *root_mem,
struct zone *zone,
gfp_t gfp_mask,
- unsigned long reclaim_options)
+ unsigned long reclaim_options,
+ unsigned long *total_scanned)
{
struct mem_cgroup *victim;
int ret, total = 0;
{
struct mem_cgroup *victim;
int ret, total = 0;
bool shrink = reclaim_options & MEM_CGROUP_RECLAIM_SHRINK;
bool check_soft = reclaim_options & MEM_CGROUP_RECLAIM_SOFT;
unsigned long excess;
bool shrink = reclaim_options & MEM_CGROUP_RECLAIM_SHRINK;
bool check_soft = reclaim_options & MEM_CGROUP_RECLAIM_SOFT;
unsigned long excess;
+ unsigned long nr_scanned;
excess = res_counter_soft_limit_excess(&root_mem->res) >> PAGE_SHIFT;
excess = res_counter_soft_limit_excess(&root_mem->res) >> PAGE_SHIFT;
continue;
}
/* we use swappiness of local cgroup */
continue;
}
/* we use swappiness of local cgroup */
ret = mem_cgroup_shrink_node_zone(victim, gfp_mask,
ret = mem_cgroup_shrink_node_zone(victim, gfp_mask,
- noswap, get_swappiness(victim), zone);
- else
+ noswap, get_swappiness(victim), zone,
+ &nr_scanned);
+ *total_scanned += nr_scanned;
+ } else
ret = try_to_free_mem_cgroup_pages(victim, gfp_mask,
noswap, get_swappiness(victim));
css_put(&victim->css);
ret = try_to_free_mem_cgroup_pages(victim, gfp_mask,
noswap, get_swappiness(victim));
css_put(&victim->css);
return CHARGE_WOULDBLOCK;
ret = mem_cgroup_hierarchical_reclaim(mem_over_limit, NULL,
return CHARGE_WOULDBLOCK;
ret = mem_cgroup_hierarchical_reclaim(mem_over_limit, NULL,
+ gfp_mask, flags, NULL);
if (mem_cgroup_margin(mem_over_limit) >= nr_pages)
return CHARGE_RETRY;
/*
if (mem_cgroup_margin(mem_over_limit) >= nr_pages)
return CHARGE_RETRY;
/*
break;
mem_cgroup_hierarchical_reclaim(memcg, NULL, GFP_KERNEL,
break;
mem_cgroup_hierarchical_reclaim(memcg, NULL, GFP_KERNEL,
- MEM_CGROUP_RECLAIM_SHRINK);
+ MEM_CGROUP_RECLAIM_SHRINK,
+ NULL);
curusage = res_counter_read_u64(&memcg->res, RES_USAGE);
/* Usage is reduced ? */
if (curusage >= oldusage)
curusage = res_counter_read_u64(&memcg->res, RES_USAGE);
/* Usage is reduced ? */
if (curusage >= oldusage)
mem_cgroup_hierarchical_reclaim(memcg, NULL, GFP_KERNEL,
MEM_CGROUP_RECLAIM_NOSWAP |
mem_cgroup_hierarchical_reclaim(memcg, NULL, GFP_KERNEL,
MEM_CGROUP_RECLAIM_NOSWAP |
- MEM_CGROUP_RECLAIM_SHRINK);
+ MEM_CGROUP_RECLAIM_SHRINK,
+ NULL);
curusage = res_counter_read_u64(&memcg->memsw, RES_USAGE);
/* Usage is reduced ? */
if (curusage >= oldusage)
curusage = res_counter_read_u64(&memcg->memsw, RES_USAGE);
/* Usage is reduced ? */
if (curusage >= oldusage)
}
unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order,
}
unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order,
+ gfp_t gfp_mask,
+ unsigned long *total_scanned)
{
unsigned long nr_reclaimed = 0;
struct mem_cgroup_per_zone *mz, *next_mz = NULL;
{
unsigned long nr_reclaimed = 0;
struct mem_cgroup_per_zone *mz, *next_mz = NULL;
int loop = 0;
struct mem_cgroup_tree_per_zone *mctz;
unsigned long long excess;
int loop = 0;
struct mem_cgroup_tree_per_zone *mctz;
unsigned long long excess;
+ unsigned long nr_scanned;
reclaimed = mem_cgroup_hierarchical_reclaim(mz->mem, zone,
gfp_mask,
reclaimed = mem_cgroup_hierarchical_reclaim(mz->mem, zone,
gfp_mask,
- MEM_CGROUP_RECLAIM_SOFT);
+ MEM_CGROUP_RECLAIM_SOFT,
+ &nr_scanned);
nr_reclaimed += reclaimed;
nr_reclaimed += reclaimed;
+ *total_scanned += nr_scanned;
spin_lock(&mctz->lock);
/*
spin_lock(&mctz->lock);
/*
unsigned long mem_cgroup_shrink_node_zone(struct mem_cgroup *mem,
gfp_t gfp_mask, bool noswap,
unsigned int swappiness,
unsigned long mem_cgroup_shrink_node_zone(struct mem_cgroup *mem,
gfp_t gfp_mask, bool noswap,
unsigned int swappiness,
+ struct zone *zone,
+ unsigned long *nr_scanned)
{
struct scan_control sc = {
{
struct scan_control sc = {
.nr_to_reclaim = SWAP_CLUSTER_MAX,
.may_writepage = !laptop_mode,
.may_unmap = 1,
.nr_to_reclaim = SWAP_CLUSTER_MAX,
.may_writepage = !laptop_mode,
.may_unmap = 1,
.order = 0,
.mem_cgroup = mem,
};
.order = 0,
.mem_cgroup = mem,
};
sc.gfp_mask = (gfp_mask & GFP_RECLAIM_MASK) |
(GFP_HIGHUSER_MOVABLE & ~GFP_RECLAIM_MASK);
sc.gfp_mask = (gfp_mask & GFP_RECLAIM_MASK) |
(GFP_HIGHUSER_MOVABLE & ~GFP_RECLAIM_MASK);
trace_mm_vmscan_memcg_softlimit_reclaim_end(sc.nr_reclaimed);
trace_mm_vmscan_memcg_softlimit_reclaim_end(sc.nr_reclaimed);
+ *nr_scanned = sc.nr_scanned;
return sc.nr_reclaimed;
}
return sc.nr_reclaimed;
}
int end_zone = 0; /* Inclusive. 0 = ZONE_DMA */
unsigned long total_scanned;
struct reclaim_state *reclaim_state = current->reclaim_state;
int end_zone = 0; /* Inclusive. 0 = ZONE_DMA */
unsigned long total_scanned;
struct reclaim_state *reclaim_state = current->reclaim_state;
+ unsigned long nr_soft_reclaimed;
+ unsigned long nr_soft_scanned;
struct scan_control sc = {
.gfp_mask = GFP_KERNEL,
.may_unmap = 1,
struct scan_control sc = {
.gfp_mask = GFP_KERNEL,
.may_unmap = 1,
/*
* Call soft limit reclaim before calling shrink_zone.
/*
* Call soft limit reclaim before calling shrink_zone.
- * For now we ignore the return value
- mem_cgroup_soft_limit_reclaim(zone, order, sc.gfp_mask);
+ nr_soft_reclaimed = mem_cgroup_soft_limit_reclaim(zone,
+ order, sc.gfp_mask,
+ &nr_soft_scanned);
+ sc.nr_reclaimed += nr_soft_reclaimed;
+ total_scanned += nr_soft_scanned;
/*
* We put equal pressure on every zone, unless
/*
* We put equal pressure on every zone, unless