X-Git-Url: http://review.tizen.org/git/?a=blobdiff_plain;f=mm%2Fmemcontrol.c;h=43a9ade724c7a7157c6014cbbda5240ae6eae791;hb=1f2b71f41ee81735c25ef326da9a0610d640abc2;hp=228d6461c12ade8941b37416ca867d3df3277acd;hpb=115e4bfd5bc68f870b3c889ab8f2d2733bcda452;p=platform%2Fadaptation%2Frenesas_rcar%2Frenesas_kernel.git diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 228d646..43a9ade 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -135,7 +135,7 @@ struct mem_cgroup_reclaim_iter { */ struct mem_cgroup_per_zone { struct lruvec lruvec; - unsigned long count[NR_LRU_LISTS]; + unsigned long lru_size[NR_LRU_LISTS]; struct mem_cgroup_reclaim_iter reclaim_iter[DEF_PRIORITY + 1]; @@ -144,11 +144,9 @@ struct mem_cgroup_per_zone { unsigned long long usage_in_excess;/* Set to the value by which */ /* the soft limit is exceeded*/ bool on_tree; - struct mem_cgroup *mem; /* Back pointer, we cannot */ + struct mem_cgroup *memcg; /* Back pointer, we cannot */ /* use container_of */ }; -/* Macro for accessing counter */ -#define MEM_CGROUP_ZSTAT(mz, idx) ((mz)->count[(idx)]) struct mem_cgroup_per_node { struct mem_cgroup_per_zone zoneinfo[MAX_NR_ZONES]; @@ -230,10 +228,30 @@ struct mem_cgroup { * the counter to account for memory usage */ struct res_counter res; - /* - * the counter to account for mem+swap usage. - */ - struct res_counter memsw; + + union { + /* + * the counter to account for mem+swap usage. + */ + struct res_counter memsw; + + /* + * rcu_freeing is used only when freeing struct mem_cgroup, + * so put it into a union to avoid wasting more memory. + * It must be disjoint from the css field. It could be + * in a union with the res field, but res plays a much + * larger part in mem_cgroup life than memsw, and might + * be of interest, even at time of free, when debugging. + * So share rcu_head with the less interesting memsw. + */ + struct rcu_head rcu_freeing; + /* + * But when using vfree(), that cannot be done at + * interrupt time, so we must then queue the work. + */ + struct work_struct work_freeing; + }; + /* * Per cgroup active and inactive list, similar to the * per zone LRU lists. @@ -592,9 +610,9 @@ retry: * we will to add it back at the end of reclaim to its correct * position in the tree. */ - __mem_cgroup_remove_exceeded(mz->mem, mz, mctz); - if (!res_counter_soft_limit_excess(&mz->mem->res) || - !css_tryget(&mz->mem->css)) + __mem_cgroup_remove_exceeded(mz->memcg, mz, mctz); + if (!res_counter_soft_limit_excess(&mz->memcg->res) || + !css_tryget(&mz->memcg->css)) goto retry; done: return mz; @@ -701,14 +719,14 @@ mem_cgroup_zone_nr_lru_pages(struct mem_cgroup *memcg, int nid, int zid, unsigned int lru_mask) { struct mem_cgroup_per_zone *mz; - enum lru_list l; + enum lru_list lru; unsigned long ret = 0; mz = mem_cgroup_zoneinfo(memcg, nid, zid); - for_each_lru(l) { - if (BIT(l) & lru_mask) - ret += MEM_CGROUP_ZSTAT(mz, l); + for_each_lru(lru) { + if (BIT(lru) & lru_mask) + ret += mz->lru_size[lru]; } return ret; } @@ -1042,9 +1060,22 @@ struct lruvec *mem_cgroup_lru_add_list(struct zone *zone, struct page *page, pc = lookup_page_cgroup(page); memcg = pc->mem_cgroup; + + /* + * Surreptitiously switch any uncharged page to root: + * an uncharged page off lru does nothing to secure + * its former mem_cgroup from sudden removal. + * + * Our caller holds lru_lock, and PageCgroupUsed is updated + * under page_cgroup lock: between them, they make all uses + * of pc->mem_cgroup safe. + */ + if (!PageCgroupUsed(pc) && memcg != root_mem_cgroup) + pc->mem_cgroup = memcg = root_mem_cgroup; + mz = page_cgroup_zoneinfo(memcg, page); /* compound_order() is stabilized through lru_lock */ - MEM_CGROUP_ZSTAT(mz, lru) += 1 << compound_order(page); + mz->lru_size[lru] += 1 << compound_order(page); return &mz->lruvec; } @@ -1072,8 +1103,8 @@ void mem_cgroup_lru_del_list(struct page *page, enum lru_list lru) VM_BUG_ON(!memcg); mz = page_cgroup_zoneinfo(memcg, page); /* huge page split is done under lru_lock. so, we have no races. */ - VM_BUG_ON(MEM_CGROUP_ZSTAT(mz, lru) < (1 << compound_order(page))); - MEM_CGROUP_ZSTAT(mz, lru) -= 1 << compound_order(page); + VM_BUG_ON(mz->lru_size[lru] < (1 << compound_order(page))); + mz->lru_size[lru] -= 1 << compound_order(page); } void mem_cgroup_lru_del(struct page *page) @@ -1360,7 +1391,6 @@ void mem_cgroup_print_oom_info(struct mem_cgroup *memcg, struct task_struct *p) if (!memcg || !p) return; - rcu_read_lock(); mem_cgrp = memcg->css.cgroup; @@ -1739,22 +1769,22 @@ static DEFINE_SPINLOCK(memcg_oom_lock); static DECLARE_WAIT_QUEUE_HEAD(memcg_oom_waitq); struct oom_wait_info { - struct mem_cgroup *mem; + struct mem_cgroup *memcg; wait_queue_t wait; }; static int memcg_oom_wake_function(wait_queue_t *wait, unsigned mode, int sync, void *arg) { - struct mem_cgroup *wake_memcg = (struct mem_cgroup *)arg, - *oom_wait_memcg; + struct mem_cgroup *wake_memcg = (struct mem_cgroup *)arg; + struct mem_cgroup *oom_wait_memcg; struct oom_wait_info *oom_wait_info; oom_wait_info = container_of(wait, struct oom_wait_info, wait); - oom_wait_memcg = oom_wait_info->mem; + oom_wait_memcg = oom_wait_info->memcg; /* - * Both of oom_wait_info->mem and wake_mem are stable under us. + * Both of oom_wait_info->memcg and wake_memcg are stable under us. * Then we can use css_is_ancestor without taking care of RCU. */ if (!mem_cgroup_same_or_subtree(oom_wait_memcg, wake_memcg) @@ -1778,12 +1808,12 @@ static void memcg_oom_recover(struct mem_cgroup *memcg) /* * try to call OOM killer. returns false if we should exit memory-reclaim loop. */ -bool mem_cgroup_handle_oom(struct mem_cgroup *memcg, gfp_t mask) +bool mem_cgroup_handle_oom(struct mem_cgroup *memcg, gfp_t mask, int order) { struct oom_wait_info owait; bool locked, need_to_kill; - owait.mem = memcg; + owait.memcg = memcg; owait.wait.flags = 0; owait.wait.func = memcg_oom_wake_function; owait.wait.private = current; @@ -1808,7 +1838,7 @@ bool mem_cgroup_handle_oom(struct mem_cgroup *memcg, gfp_t mask) if (need_to_kill) { finish_wait(&memcg_oom_waitq, &owait.wait); - mem_cgroup_out_of_memory(memcg, mask); + mem_cgroup_out_of_memory(memcg, mask, order); } else { schedule(); finish_wait(&memcg_oom_waitq, &owait.wait); @@ -1895,7 +1925,6 @@ out: if (unlikely(need_unlock)) move_unlock_page_cgroup(pc, &flags); rcu_read_unlock(); - return; } EXPORT_SYMBOL(mem_cgroup_update_page_stat); @@ -2179,7 +2208,7 @@ static int mem_cgroup_do_charge(struct mem_cgroup *memcg, gfp_t gfp_mask, if (!oom_check) return CHARGE_NOMEM; /* check OOM */ - if (!mem_cgroup_handle_oom(mem_over_limit, gfp_mask)) + if (!mem_cgroup_handle_oom(mem_over_limit, gfp_mask, get_order(csize))) return CHARGE_OOM_DIE; return CHARGE_RETRY; @@ -2408,8 +2437,12 @@ static void __mem_cgroup_commit_charge(struct mem_cgroup *memcg, struct page *page, unsigned int nr_pages, struct page_cgroup *pc, - enum charge_type ctype) + enum charge_type ctype, + bool lrucare) { + struct zone *uninitialized_var(zone); + bool was_on_lru = false; + lock_page_cgroup(pc); if (unlikely(PageCgroupUsed(pc))) { unlock_page_cgroup(pc); @@ -2420,6 +2453,21 @@ static void __mem_cgroup_commit_charge(struct mem_cgroup *memcg, * we don't need page_cgroup_lock about tail pages, becase they are not * accessed by any other context at this point. */ + + /* + * In some cases, SwapCache and FUSE(splice_buf->radixtree), the page + * may already be on some other mem_cgroup's LRU. Take care of it. + */ + if (lrucare) { + zone = page_zone(page); + spin_lock_irq(&zone->lru_lock); + if (PageLRU(page)) { + ClearPageLRU(page); + del_page_from_lru_list(zone, page, page_lru(page)); + was_on_lru = true; + } + } + pc->mem_cgroup = memcg; /* * We access a page_cgroup asynchronously without lock_page_cgroup(). @@ -2443,9 +2491,18 @@ static void __mem_cgroup_commit_charge(struct mem_cgroup *memcg, break; } + if (lrucare) { + if (was_on_lru) { + VM_BUG_ON(PageLRU(page)); + SetPageLRU(page); + add_page_to_lru_list(zone, page, page_lru(page)); + } + spin_unlock_irq(&zone->lru_lock); + } + mem_cgroup_charge_statistics(memcg, PageCgroupCache(pc), nr_pages); unlock_page_cgroup(pc); - WARN_ON_ONCE(PageLRU(page)); + /* * "charge_statistics" updated event counter. Then, check it. * Insert ancestor (and ancestor's ancestors), to softlimit RB-tree. @@ -2643,7 +2700,7 @@ static int mem_cgroup_charge_common(struct page *page, struct mm_struct *mm, ret = __mem_cgroup_try_charge(mm, gfp_mask, nr_pages, &memcg, oom); if (ret == -ENOMEM) return ret; - __mem_cgroup_commit_charge(memcg, page, nr_pages, pc, ctype); + __mem_cgroup_commit_charge(memcg, page, nr_pages, pc, ctype, false); return 0; } @@ -2663,35 +2720,6 @@ static void __mem_cgroup_commit_charge_swapin(struct page *page, struct mem_cgroup *ptr, enum charge_type ctype); -static void -__mem_cgroup_commit_charge_lrucare(struct page *page, struct mem_cgroup *memcg, - enum charge_type ctype) -{ - struct page_cgroup *pc = lookup_page_cgroup(page); - struct zone *zone = page_zone(page); - unsigned long flags; - bool removed = false; - - /* - * In some case, SwapCache, FUSE(splice_buf->radixtree), the page - * is already on LRU. It means the page may on some other page_cgroup's - * LRU. Take care of it. - */ - spin_lock_irqsave(&zone->lru_lock, flags); - if (PageLRU(page)) { - del_page_from_lru_list(zone, page, page_lru(page)); - ClearPageLRU(page); - removed = true; - } - __mem_cgroup_commit_charge(memcg, page, 1, pc, ctype); - if (removed) { - add_page_to_lru_list(zone, page, page_lru(page)); - SetPageLRU(page); - } - spin_unlock_irqrestore(&zone->lru_lock, flags); - return; -} - int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm, gfp_t gfp_mask) { @@ -2769,13 +2797,16 @@ static void __mem_cgroup_commit_charge_swapin(struct page *page, struct mem_cgroup *memcg, enum charge_type ctype) { + struct page_cgroup *pc; + if (mem_cgroup_disabled()) return; if (!memcg) return; cgroup_exclude_rmdir(&memcg->css); - __mem_cgroup_commit_charge_lrucare(page, memcg, ctype); + pc = lookup_page_cgroup(page); + __mem_cgroup_commit_charge(memcg, page, 1, pc, ctype, true); /* * Now swap is on-memory. This means this page may be * counted both as mem and swap....double count. @@ -2879,7 +2910,6 @@ direct_uncharge: res_counter_uncharge(&memcg->memsw, nr_pages * PAGE_SIZE); if (unlikely(batch->memcg != memcg)) memcg_oom_recover(memcg); - return; } /* @@ -3027,23 +3057,6 @@ void mem_cgroup_uncharge_end(void) batch->memcg = NULL; } -/* - * A function for resetting pc->mem_cgroup for newly allocated pages. - * This function should be called if the newpage will be added to LRU - * before start accounting. - */ -void mem_cgroup_reset_owner(struct page *newpage) -{ - struct page_cgroup *pc; - - if (mem_cgroup_disabled()) - return; - - pc = lookup_page_cgroup(newpage); - VM_BUG_ON(PageCgroupUsed(pc)); - pc->mem_cgroup = root_mem_cgroup; -} - #ifdef CONFIG_SWAP /* * called after __delete_from_swap_cache() and drop "page" account. @@ -3248,7 +3261,7 @@ int mem_cgroup_prepare_migration(struct page *page, ctype = MEM_CGROUP_CHARGE_TYPE_CACHE; else ctype = MEM_CGROUP_CHARGE_TYPE_SHMEM; - __mem_cgroup_commit_charge(memcg, newpage, 1, pc, ctype); + __mem_cgroup_commit_charge(memcg, newpage, 1, pc, ctype, false); return ret; } @@ -3332,7 +3345,7 @@ void mem_cgroup_replace_page_cache(struct page *oldpage, * the newpage may be on LRU(or pagevec for LRU) already. We lock * LRU while we overwrite pc->mem_cgroup. */ - __mem_cgroup_commit_charge_lrucare(newpage, memcg, type); + __mem_cgroup_commit_charge(memcg, newpage, 1, pc, type, true); } #ifdef CONFIG_DEBUG_VM @@ -3531,7 +3544,7 @@ unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order, break; nr_scanned = 0; - reclaimed = mem_cgroup_soft_reclaim(mz->mem, zone, + reclaimed = mem_cgroup_soft_reclaim(mz->memcg, zone, gfp_mask, &nr_scanned); nr_reclaimed += reclaimed; *total_scanned += nr_scanned; @@ -3558,13 +3571,13 @@ unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order, next_mz = __mem_cgroup_largest_soft_limit_node(mctz); if (next_mz == mz) - css_put(&next_mz->mem->css); + css_put(&next_mz->memcg->css); else /* next_mz == NULL or other memcg */ break; } while (1); } - __mem_cgroup_remove_exceeded(mz->mem, mz, mctz); - excess = res_counter_soft_limit_excess(&mz->mem->res); + __mem_cgroup_remove_exceeded(mz->memcg, mz, mctz); + excess = res_counter_soft_limit_excess(&mz->memcg->res); /* * One school of thought says that we should not add * back the node to the tree if reclaim returns 0. @@ -3574,9 +3587,9 @@ unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order, * term TODO. */ /* If excess == 0, no tree ops */ - __mem_cgroup_insert_exceeded(mz->mem, mz, mctz, excess); + __mem_cgroup_insert_exceeded(mz->memcg, mz, mctz, excess); spin_unlock(&mctz->lock); - css_put(&mz->mem->css); + css_put(&mz->memcg->css); loop++; /* * Could not reclaim anything and there are no more @@ -3589,7 +3602,7 @@ unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order, break; } while (!nr_reclaimed); if (next_mz) - css_put(&next_mz->mem->css); + css_put(&next_mz->memcg->css); return nr_reclaimed; } @@ -3611,7 +3624,7 @@ static int mem_cgroup_force_empty_list(struct mem_cgroup *memcg, mz = mem_cgroup_zoneinfo(memcg, node, zid); list = &mz->lruvec.lists[lru]; - loop = MEM_CGROUP_ZSTAT(mz, lru); + loop = mz->lru_size[lru]; /* give some margin against EBUSY etc...*/ loop += 256; busy = NULL; @@ -3685,10 +3698,10 @@ move_account: mem_cgroup_start_move(memcg); for_each_node_state(node, N_HIGH_MEMORY) { for (zid = 0; !ret && zid < MAX_NR_ZONES; zid++) { - enum lru_list l; - for_each_lru(l) { + enum lru_list lru; + for_each_lru(lru) { ret = mem_cgroup_force_empty_list(memcg, - node, zid, l); + node, zid, lru); if (ret) break; } @@ -3921,7 +3934,6 @@ static void memcg_get_hierarchical_limit(struct mem_cgroup *memcg, out: *mem_limit = min_limit; *memsw_limit = min_memsw_limit; - return; } static int mem_cgroup_reset(struct cgroup *cont, unsigned int event) @@ -4080,38 +4092,38 @@ static int mem_control_numa_stat_show(struct seq_file *m, void *arg) unsigned long total_nr, file_nr, anon_nr, unevictable_nr; unsigned long node_nr; struct cgroup *cont = m->private; - struct mem_cgroup *mem_cont = mem_cgroup_from_cont(cont); + struct mem_cgroup *memcg = mem_cgroup_from_cont(cont); - total_nr = mem_cgroup_nr_lru_pages(mem_cont, LRU_ALL); + total_nr = mem_cgroup_nr_lru_pages(memcg, LRU_ALL); seq_printf(m, "total=%lu", total_nr); for_each_node_state(nid, N_HIGH_MEMORY) { - node_nr = mem_cgroup_node_nr_lru_pages(mem_cont, nid, LRU_ALL); + node_nr = mem_cgroup_node_nr_lru_pages(memcg, nid, LRU_ALL); seq_printf(m, " N%d=%lu", nid, node_nr); } seq_putc(m, '\n'); - file_nr = mem_cgroup_nr_lru_pages(mem_cont, LRU_ALL_FILE); + file_nr = mem_cgroup_nr_lru_pages(memcg, LRU_ALL_FILE); seq_printf(m, "file=%lu", file_nr); for_each_node_state(nid, N_HIGH_MEMORY) { - node_nr = mem_cgroup_node_nr_lru_pages(mem_cont, nid, + node_nr = mem_cgroup_node_nr_lru_pages(memcg, nid, LRU_ALL_FILE); seq_printf(m, " N%d=%lu", nid, node_nr); } seq_putc(m, '\n'); - anon_nr = mem_cgroup_nr_lru_pages(mem_cont, LRU_ALL_ANON); + anon_nr = mem_cgroup_nr_lru_pages(memcg, LRU_ALL_ANON); seq_printf(m, "anon=%lu", anon_nr); for_each_node_state(nid, N_HIGH_MEMORY) { - node_nr = mem_cgroup_node_nr_lru_pages(mem_cont, nid, + node_nr = mem_cgroup_node_nr_lru_pages(memcg, nid, LRU_ALL_ANON); seq_printf(m, " N%d=%lu", nid, node_nr); } seq_putc(m, '\n'); - unevictable_nr = mem_cgroup_nr_lru_pages(mem_cont, BIT(LRU_UNEVICTABLE)); + unevictable_nr = mem_cgroup_nr_lru_pages(memcg, BIT(LRU_UNEVICTABLE)); seq_printf(m, "unevictable=%lu", unevictable_nr); for_each_node_state(nid, N_HIGH_MEMORY) { - node_nr = mem_cgroup_node_nr_lru_pages(mem_cont, nid, + node_nr = mem_cgroup_node_nr_lru_pages(memcg, nid, BIT(LRU_UNEVICTABLE)); seq_printf(m, " N%d=%lu", nid, node_nr); } @@ -4123,12 +4135,12 @@ static int mem_control_numa_stat_show(struct seq_file *m, void *arg) static int mem_control_stat_show(struct cgroup *cont, struct cftype *cft, struct cgroup_map_cb *cb) { - struct mem_cgroup *mem_cont = mem_cgroup_from_cont(cont); + struct mem_cgroup *memcg = mem_cgroup_from_cont(cont); struct mcs_total_stat mystat; int i; memset(&mystat, 0, sizeof(mystat)); - mem_cgroup_get_local_stat(mem_cont, &mystat); + mem_cgroup_get_local_stat(memcg, &mystat); for (i = 0; i < NR_MCS_STAT; i++) { @@ -4140,14 +4152,14 @@ static int mem_control_stat_show(struct cgroup *cont, struct cftype *cft, /* Hierarchical information */ { unsigned long long limit, memsw_limit; - memcg_get_hierarchical_limit(mem_cont, &limit, &memsw_limit); + memcg_get_hierarchical_limit(memcg, &limit, &memsw_limit); cb->fill(cb, "hierarchical_memory_limit", limit); if (do_swap_account) cb->fill(cb, "hierarchical_memsw_limit", memsw_limit); } memset(&mystat, 0, sizeof(mystat)); - mem_cgroup_get_total_stat(mem_cont, &mystat); + mem_cgroup_get_total_stat(memcg, &mystat); for (i = 0; i < NR_MCS_STAT; i++) { if (i == MCS_SWAP && !do_swap_account) continue; @@ -4163,7 +4175,7 @@ static int mem_control_stat_show(struct cgroup *cont, struct cftype *cft, for_each_online_node(nid) for (zid = 0; zid < MAX_NR_ZONES; zid++) { - mz = mem_cgroup_zoneinfo(mem_cont, nid, zid); + mz = mem_cgroup_zoneinfo(memcg, nid, zid); recent_rotated[0] += mz->reclaim_stat.recent_rotated[0]; @@ -4584,10 +4596,9 @@ static int register_kmem_files(struct cgroup *cont, struct cgroup_subsys *ss) return mem_cgroup_sockets_init(cont, ss); }; -static void kmem_cgroup_destroy(struct cgroup_subsys *ss, - struct cgroup *cont) +static void kmem_cgroup_destroy(struct cgroup *cont) { - mem_cgroup_sockets_destroy(cont, ss); + mem_cgroup_sockets_destroy(cont); } #else static int register_kmem_files(struct cgroup *cont, struct cgroup_subsys *ss) @@ -4595,8 +4606,7 @@ static int register_kmem_files(struct cgroup *cont, struct cgroup_subsys *ss) return 0; } -static void kmem_cgroup_destroy(struct cgroup_subsys *ss, - struct cgroup *cont) +static void kmem_cgroup_destroy(struct cgroup *cont) { } #endif @@ -4720,7 +4730,7 @@ static int alloc_mem_cgroup_per_zone_info(struct mem_cgroup *memcg, int node) { struct mem_cgroup_per_node *pn; struct mem_cgroup_per_zone *mz; - enum lru_list l; + enum lru_list lru; int zone, tmp = node; /* * This routine is called against possible nodes. @@ -4738,11 +4748,11 @@ static int alloc_mem_cgroup_per_zone_info(struct mem_cgroup *memcg, int node) for (zone = 0; zone < MAX_NR_ZONES; zone++) { mz = &pn->zoneinfo[zone]; - for_each_lru(l) - INIT_LIST_HEAD(&mz->lruvec.lists[l]); + for_each_lru(lru) + INIT_LIST_HEAD(&mz->lruvec.lists[lru]); mz->usage_in_excess = 0; mz->on_tree = false; - mz->mem = memcg; + mz->memcg = memcg; } memcg->info.nodeinfo[node] = pn; return 0; @@ -4755,33 +4765,54 @@ static void free_mem_cgroup_per_zone_info(struct mem_cgroup *memcg, int node) static struct mem_cgroup *mem_cgroup_alloc(void) { - struct mem_cgroup *mem; + struct mem_cgroup *memcg; int size = sizeof(struct mem_cgroup); /* Can be very big if MAX_NUMNODES is very big */ if (size < PAGE_SIZE) - mem = kzalloc(size, GFP_KERNEL); + memcg = kzalloc(size, GFP_KERNEL); else - mem = vzalloc(size); + memcg = vzalloc(size); - if (!mem) + if (!memcg) return NULL; - mem->stat = alloc_percpu(struct mem_cgroup_stat_cpu); - if (!mem->stat) + memcg->stat = alloc_percpu(struct mem_cgroup_stat_cpu); + if (!memcg->stat) goto out_free; - spin_lock_init(&mem->pcp_counter_lock); - return mem; + spin_lock_init(&memcg->pcp_counter_lock); + return memcg; out_free: if (size < PAGE_SIZE) - kfree(mem); + kfree(memcg); else - vfree(mem); + vfree(memcg); return NULL; } /* + * Helpers for freeing a vzalloc()ed mem_cgroup by RCU, + * but in process context. The work_freeing structure is overlaid + * on the rcu_freeing structure, which itself is overlaid on memsw. + */ +static void vfree_work(struct work_struct *work) +{ + struct mem_cgroup *memcg; + + memcg = container_of(work, struct mem_cgroup, work_freeing); + vfree(memcg); +} +static void vfree_rcu(struct rcu_head *rcu_head) +{ + struct mem_cgroup *memcg; + + memcg = container_of(rcu_head, struct mem_cgroup, rcu_freeing); + INIT_WORK(&memcg->work_freeing, vfree_work); + schedule_work(&memcg->work_freeing); +} + +/* * At destroying mem_cgroup, references from swap_cgroup can remain. * (scanning all at force_empty is too costly...) * @@ -4804,9 +4835,9 @@ static void __mem_cgroup_free(struct mem_cgroup *memcg) free_percpu(memcg->stat); if (sizeof(struct mem_cgroup) < PAGE_SIZE) - kfree(memcg); + kfree_rcu(memcg, rcu_freeing); else - vfree(memcg); + call_rcu(&memcg->rcu_freeing, vfree_rcu); } static void mem_cgroup_get(struct mem_cgroup *memcg) @@ -4888,7 +4919,7 @@ err_cleanup: } static struct cgroup_subsys_state * __ref -mem_cgroup_create(struct cgroup_subsys *ss, struct cgroup *cont) +mem_cgroup_create(struct cgroup *cont) { struct mem_cgroup *memcg, *parent; long error = -ENOMEM; @@ -4950,20 +4981,18 @@ free_out: return ERR_PTR(error); } -static int mem_cgroup_pre_destroy(struct cgroup_subsys *ss, - struct cgroup *cont) +static int mem_cgroup_pre_destroy(struct cgroup *cont) { struct mem_cgroup *memcg = mem_cgroup_from_cont(cont); return mem_cgroup_force_empty(memcg, false); } -static void mem_cgroup_destroy(struct cgroup_subsys *ss, - struct cgroup *cont) +static void mem_cgroup_destroy(struct cgroup *cont) { struct mem_cgroup *memcg = mem_cgroup_from_cont(cont); - kmem_cgroup_destroy(ss, cont); + kmem_cgroup_destroy(cont); mem_cgroup_put(memcg); } @@ -5195,6 +5224,8 @@ static int mem_cgroup_count_precharge_pte_range(pmd_t *pmd, spinlock_t *ptl; split_huge_page_pmd(walk->mm, pmd); + if (pmd_trans_unstable(pmd)) + return 0; pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl); for (; addr != end; pte++, addr += PAGE_SIZE) @@ -5300,9 +5331,8 @@ static void mem_cgroup_clear_mc(void) mem_cgroup_end_move(from); } -static int mem_cgroup_can_attach(struct cgroup_subsys *ss, - struct cgroup *cgroup, - struct cgroup_taskset *tset) +static int mem_cgroup_can_attach(struct cgroup *cgroup, + struct cgroup_taskset *tset) { struct task_struct *p = cgroup_taskset_first(tset); int ret = 0; @@ -5340,9 +5370,8 @@ static int mem_cgroup_can_attach(struct cgroup_subsys *ss, return ret; } -static void mem_cgroup_cancel_attach(struct cgroup_subsys *ss, - struct cgroup *cgroup, - struct cgroup_taskset *tset) +static void mem_cgroup_cancel_attach(struct cgroup *cgroup, + struct cgroup_taskset *tset) { mem_cgroup_clear_mc(); } @@ -5357,6 +5386,8 @@ static int mem_cgroup_move_charge_pte_range(pmd_t *pmd, spinlock_t *ptl; split_huge_page_pmd(walk->mm, pmd); + if (pmd_trans_unstable(pmd)) + return 0; retry: pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl); for (; addr != end; addr += PAGE_SIZE) { @@ -5457,9 +5488,8 @@ retry: up_read(&mm->mmap_sem); } -static void mem_cgroup_move_task(struct cgroup_subsys *ss, - struct cgroup *cont, - struct cgroup_taskset *tset) +static void mem_cgroup_move_task(struct cgroup *cont, + struct cgroup_taskset *tset) { struct task_struct *p = cgroup_taskset_first(tset); struct mm_struct *mm = get_task_mm(p); @@ -5474,20 +5504,17 @@ static void mem_cgroup_move_task(struct cgroup_subsys *ss, mem_cgroup_clear_mc(); } #else /* !CONFIG_MMU */ -static int mem_cgroup_can_attach(struct cgroup_subsys *ss, - struct cgroup *cgroup, - struct cgroup_taskset *tset) +static int mem_cgroup_can_attach(struct cgroup *cgroup, + struct cgroup_taskset *tset) { return 0; } -static void mem_cgroup_cancel_attach(struct cgroup_subsys *ss, - struct cgroup *cgroup, - struct cgroup_taskset *tset) +static void mem_cgroup_cancel_attach(struct cgroup *cgroup, + struct cgroup_taskset *tset) { } -static void mem_cgroup_move_task(struct cgroup_subsys *ss, - struct cgroup *cont, - struct cgroup_taskset *tset) +static void mem_cgroup_move_task(struct cgroup *cont, + struct cgroup_taskset *tset) { } #endif