enum charge_type {
MEM_CGROUP_CHARGE_TYPE_CACHE = 0,
MEM_CGROUP_CHARGE_TYPE_ANON,
- MEM_CGROUP_CHARGE_TYPE_SHMEM, /* used by page migration of shmem */
MEM_CGROUP_CHARGE_TYPE_SWAPOUT, /* for accounting swapcache */
MEM_CGROUP_CHARGE_TYPE_DROP, /* a page was unused swap cache */
NR_CHARGE_TYPE,
/*
* Return the memory (and swap, if configured) limit for a memcg.
*/
-u64 mem_cgroup_get_limit(struct mem_cgroup *memcg)
+static u64 mem_cgroup_get_limit(struct mem_cgroup *memcg)
{
u64 limit;
u64 memsw;
return min(limit, memsw);
}
+void mem_cgroup_out_of_memory(struct mem_cgroup *memcg, gfp_t gfp_mask,
+ int order)
+{
+ struct mem_cgroup *iter;
+ unsigned long chosen_points = 0;
+ unsigned long totalpages;
+ unsigned int points = 0;
+ struct task_struct *chosen = NULL;
+
+ /*
+ * If current has a pending SIGKILL, then automatically select it. The
+ * goal is to allow it to allocate so that it may quickly exit and free
+ * its memory.
+ */
+ if (fatal_signal_pending(current)) {
+ set_thread_flag(TIF_MEMDIE);
+ return;
+ }
+
+ check_panic_on_oom(CONSTRAINT_MEMCG, gfp_mask, order, NULL);
+ totalpages = mem_cgroup_get_limit(memcg) >> PAGE_SHIFT ? : 1;
+ for_each_mem_cgroup_tree(iter, memcg) {
+ struct cgroup *cgroup = iter->css.cgroup;
+ struct cgroup_iter it;
+ struct task_struct *task;
+
+ cgroup_iter_start(cgroup, &it);
+ while ((task = cgroup_iter_next(cgroup, &it))) {
+ switch (oom_scan_process_thread(task, totalpages, NULL,
+ false)) {
+ case OOM_SCAN_SELECT:
+ if (chosen)
+ put_task_struct(chosen);
+ chosen = task;
+ chosen_points = ULONG_MAX;
+ get_task_struct(chosen);
+ /* fall through */
+ case OOM_SCAN_CONTINUE:
+ continue;
+ case OOM_SCAN_ABORT:
+ cgroup_iter_end(cgroup, &it);
+ mem_cgroup_iter_break(memcg, iter);
+ if (chosen)
+ put_task_struct(chosen);
+ return;
+ case OOM_SCAN_OK:
+ break;
+ };
+ points = oom_badness(task, memcg, NULL, totalpages);
+ if (points > chosen_points) {
+ if (chosen)
+ put_task_struct(chosen);
+ chosen = task;
+ chosen_points = points;
+ get_task_struct(chosen);
+ }
+ }
+ cgroup_iter_end(cgroup, &it);
+ }
+
+ if (!chosen)
+ return;
+ points = chosen_points * 1000 / totalpages;
+ oom_kill_process(chosen, gfp_mask, order, points, totalpages, memcg,
+ NULL, "Memory cgroup out of memory");
+}
+
static unsigned long mem_cgroup_reclaim(struct mem_cgroup *memcg,
gfp_t gfp_mask,
unsigned long flags)
* We always charge the cgroup the mm_struct belongs to.
* The mm_struct's mem_cgroup changes on task migration if the
* thread group leader migrates. It's possible that mm is not
- * set, if so charge the init_mm (happens for pagecache usage).
+ * set, if so charge the root memcg (happens for pagecache usage).
*/
if (!*ptr && !mm)
*ptr = root_mem_cgroup;
MEM_CGROUP_CHARGE_TYPE_ANON);
}
-static void
-__mem_cgroup_commit_charge_swapin(struct page *page, struct mem_cgroup *ptr,
- enum charge_type ctype);
-
-int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm,
- gfp_t gfp_mask)
-{
- struct mem_cgroup *memcg = NULL;
- enum charge_type type = MEM_CGROUP_CHARGE_TYPE_CACHE;
- int ret;
-
- if (mem_cgroup_disabled())
- return 0;
- if (PageCompound(page))
- return 0;
-
- if (unlikely(!mm))
- mm = &init_mm;
- if (!page_is_file_cache(page))
- type = MEM_CGROUP_CHARGE_TYPE_SHMEM;
-
- if (!PageSwapCache(page))
- ret = mem_cgroup_charge_common(page, mm, gfp_mask, type);
- else { /* page is swapcache/shmem */
- ret = mem_cgroup_try_charge_swapin(mm, page, gfp_mask, &memcg);
- if (!ret)
- __mem_cgroup_commit_charge_swapin(page, memcg, type);
- }
- return ret;
-}
-
/*
* While swap-in, try_charge -> commit or cancel, the page is locked.
* And when try_charge() successfully returns, one refcnt to memcg without
* struct page_cgroup is acquired. This refcnt will be consumed by
* "commit()" or removed by "cancel()"
*/
-int mem_cgroup_try_charge_swapin(struct mm_struct *mm,
- struct page *page,
- gfp_t mask, struct mem_cgroup **memcgp)
+static int __mem_cgroup_try_charge_swapin(struct mm_struct *mm,
+ struct page *page,
+ gfp_t mask,
+ struct mem_cgroup **memcgp)
{
struct mem_cgroup *memcg;
int ret;
- *memcgp = NULL;
-
- if (mem_cgroup_disabled())
- return 0;
-
if (!do_swap_account)
goto charge_cur_mm;
/*
ret = 0;
return ret;
charge_cur_mm:
- if (unlikely(!mm))
- mm = &init_mm;
ret = __mem_cgroup_try_charge(mm, mask, 1, memcgp, true);
if (ret == -EINTR)
ret = 0;
return ret;
}
+int mem_cgroup_try_charge_swapin(struct mm_struct *mm, struct page *page,
+ gfp_t gfp_mask, struct mem_cgroup **memcgp)
+{
+ *memcgp = NULL;
+ if (mem_cgroup_disabled())
+ return 0;
+ return __mem_cgroup_try_charge_swapin(mm, page, gfp_mask, memcgp);
+}
+
+void mem_cgroup_cancel_charge_swapin(struct mem_cgroup *memcg)
+{
+ if (mem_cgroup_disabled())
+ return;
+ if (!memcg)
+ return;
+ __mem_cgroup_cancel_charge(memcg, 1);
+}
+
static void
__mem_cgroup_commit_charge_swapin(struct page *page, struct mem_cgroup *memcg,
enum charge_type ctype)
MEM_CGROUP_CHARGE_TYPE_ANON);
}
-void mem_cgroup_cancel_charge_swapin(struct mem_cgroup *memcg)
+int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm,
+ gfp_t gfp_mask)
{
+ struct mem_cgroup *memcg = NULL;
+ enum charge_type type = MEM_CGROUP_CHARGE_TYPE_CACHE;
+ int ret;
+
if (mem_cgroup_disabled())
- return;
- if (!memcg)
- return;
- __mem_cgroup_cancel_charge(memcg, 1);
+ return 0;
+ if (PageCompound(page))
+ return 0;
+
+ if (!PageSwapCache(page))
+ ret = mem_cgroup_charge_common(page, mm, gfp_mask, type);
+ else { /* page is swapcache/shmem */
+ ret = __mem_cgroup_try_charge_swapin(mm, page,
+ gfp_mask, &memcg);
+ if (!ret)
+ __mem_cgroup_commit_charge_swapin(page, memcg, type);
+ }
+ return ret;
}
static void mem_cgroup_do_uncharge(struct mem_cgroup *memcg,
* uncharge if !page_mapped(page)
*/
static struct mem_cgroup *
-__mem_cgroup_uncharge_common(struct page *page, enum charge_type ctype)
+__mem_cgroup_uncharge_common(struct page *page, enum charge_type ctype,
+ bool end_migration)
{
struct mem_cgroup *memcg = NULL;
unsigned int nr_pages = 1;
if (mem_cgroup_disabled())
return NULL;
- if (PageSwapCache(page))
- return NULL;
+ VM_BUG_ON(PageSwapCache(page));
if (PageTransHuge(page)) {
nr_pages <<= compound_order(page);
/* fallthrough */
case MEM_CGROUP_CHARGE_TYPE_DROP:
/* See mem_cgroup_prepare_migration() */
- if (page_mapped(page) || PageCgroupMigration(pc))
+ if (page_mapped(page))
+ goto unlock_out;
+ /*
+ * Pages under migration may not be uncharged. But
+ * end_migration() /must/ be the one uncharging the
+ * unused post-migration page and so it has to call
+ * here with the migration bit still set. See the
+ * res_counter handling below.
+ */
+ if (!end_migration && PageCgroupMigration(pc))
goto unlock_out;
break;
case MEM_CGROUP_CHARGE_TYPE_SWAPOUT:
mem_cgroup_swap_statistics(memcg, true);
mem_cgroup_get(memcg);
}
- if (!mem_cgroup_is_root(memcg))
+ /*
+ * Migration does not charge the res_counter for the
+ * replacement page, so leave it alone when phasing out the
+ * page that is unused after the migration.
+ */
+ if (!end_migration && !mem_cgroup_is_root(memcg))
mem_cgroup_do_uncharge(memcg, nr_pages, ctype);
return memcg;
if (page_mapped(page))
return;
VM_BUG_ON(page->mapping && !PageAnon(page));
- __mem_cgroup_uncharge_common(page, MEM_CGROUP_CHARGE_TYPE_ANON);
+ if (PageSwapCache(page))
+ return;
+ __mem_cgroup_uncharge_common(page, MEM_CGROUP_CHARGE_TYPE_ANON, false);
}
void mem_cgroup_uncharge_cache_page(struct page *page)
{
VM_BUG_ON(page_mapped(page));
VM_BUG_ON(page->mapping);
- __mem_cgroup_uncharge_common(page, MEM_CGROUP_CHARGE_TYPE_CACHE);
+ __mem_cgroup_uncharge_common(page, MEM_CGROUP_CHARGE_TYPE_CACHE, false);
}
/*
if (!swapout) /* this was a swap cache but the swap is unused ! */
ctype = MEM_CGROUP_CHARGE_TYPE_DROP;
- memcg = __mem_cgroup_uncharge_common(page, ctype);
+ memcg = __mem_cgroup_uncharge_common(page, ctype, false);
/*
* record memcg information, if swapout && memcg != NULL,
* Before starting migration, account PAGE_SIZE to mem_cgroup that the old
* page belongs to.
*/
-int mem_cgroup_prepare_migration(struct page *page,
- struct page *newpage, struct mem_cgroup **memcgp, gfp_t gfp_mask)
+void mem_cgroup_prepare_migration(struct page *page, struct page *newpage,
+ struct mem_cgroup **memcgp)
{
struct mem_cgroup *memcg = NULL;
struct page_cgroup *pc;
enum charge_type ctype;
- int ret = 0;
*memcgp = NULL;
VM_BUG_ON(PageTransHuge(page));
if (mem_cgroup_disabled())
- return 0;
+ return;
pc = lookup_page_cgroup(page);
lock_page_cgroup(pc);
* we return here.
*/
if (!memcg)
- return 0;
+ return;
*memcgp = memcg;
- ret = __mem_cgroup_try_charge(NULL, gfp_mask, 1, memcgp, false);
- css_put(&memcg->css);/* drop extra refcnt */
- if (ret) {
- if (PageAnon(page)) {
- lock_page_cgroup(pc);
- ClearPageCgroupMigration(pc);
- unlock_page_cgroup(pc);
- /*
- * The old page may be fully unmapped while we kept it.
- */
- mem_cgroup_uncharge_page(page);
- }
- /* we'll need to revisit this error code (we have -EINTR) */
- return -ENOMEM;
- }
/*
* We charge new page before it's used/mapped. So, even if unlock_page()
* is called before end_migration, we can catch all events on this new
*/
if (PageAnon(page))
ctype = MEM_CGROUP_CHARGE_TYPE_ANON;
- else if (page_is_file_cache(page))
- ctype = MEM_CGROUP_CHARGE_TYPE_CACHE;
else
- ctype = MEM_CGROUP_CHARGE_TYPE_SHMEM;
+ ctype = MEM_CGROUP_CHARGE_TYPE_CACHE;
+ /*
+ * The page is committed to the memcg, but it's not actually
+ * charged to the res_counter since we plan on replacing the
+ * old one and only one page is going to be left afterwards.
+ */
__mem_cgroup_commit_charge(memcg, newpage, 1, ctype, false);
- return ret;
}
/* remove redundant charge if migration failed*/
used = newpage;
unused = oldpage;
}
+ anon = PageAnon(used);
+ __mem_cgroup_uncharge_common(unused,
+ anon ? MEM_CGROUP_CHARGE_TYPE_ANON
+ : MEM_CGROUP_CHARGE_TYPE_CACHE,
+ true);
+ css_put(&memcg->css);
/*
* We disallowed uncharge of pages under migration because mapcount
* of the page goes down to zero, temporarly.
lock_page_cgroup(pc);
ClearPageCgroupMigration(pc);
unlock_page_cgroup(pc);
- anon = PageAnon(used);
- __mem_cgroup_uncharge_common(unused,
- anon ? MEM_CGROUP_CHARGE_TYPE_ANON
- : MEM_CGROUP_CHARGE_TYPE_CACHE);
/*
* If a page is a file cache, radix-tree replacement is very atomic
*/
if (!memcg)
return;
-
- if (PageSwapBacked(oldpage))
- type = MEM_CGROUP_CHARGE_TYPE_SHMEM;
-
/*
* Even if newpage->mapping was NULL before starting replacement,
* the newpage may be on LRU(or pagevec for LRU) already. We lock