memcg: remove redundant returns
[platform/adaptation/renesas_rcar/renesas_kernel.git] / mm / memcontrol.c
index 228d646..43a9ade 100644 (file)
@@ -135,7 +135,7 @@ struct mem_cgroup_reclaim_iter {
  */
 struct mem_cgroup_per_zone {
        struct lruvec           lruvec;
-       unsigned long           count[NR_LRU_LISTS];
+       unsigned long           lru_size[NR_LRU_LISTS];
 
        struct mem_cgroup_reclaim_iter reclaim_iter[DEF_PRIORITY + 1];
 
@@ -144,11 +144,9 @@ struct mem_cgroup_per_zone {
        unsigned long long      usage_in_excess;/* Set to the value by which */
                                                /* the soft limit is exceeded*/
        bool                    on_tree;
-       struct mem_cgroup       *mem;           /* Back pointer, we cannot */
+       struct mem_cgroup       *memcg;         /* Back pointer, we cannot */
                                                /* use container_of        */
 };
-/* Macro for accessing counter */
-#define MEM_CGROUP_ZSTAT(mz, idx)      ((mz)->count[(idx)])
 
 struct mem_cgroup_per_node {
        struct mem_cgroup_per_zone zoneinfo[MAX_NR_ZONES];
@@ -230,10 +228,30 @@ struct mem_cgroup {
         * the counter to account for memory usage
         */
        struct res_counter res;
-       /*
-        * the counter to account for mem+swap usage.
-        */
-       struct res_counter memsw;
+
+       union {
+               /*
+                * the counter to account for mem+swap usage.
+                */
+               struct res_counter memsw;
+
+               /*
+                * rcu_freeing is used only when freeing struct mem_cgroup,
+                * so put it into a union to avoid wasting more memory.
+                * It must be disjoint from the css field.  It could be
+                * in a union with the res field, but res plays a much
+                * larger part in mem_cgroup life than memsw, and might
+                * be of interest, even at time of free, when debugging.
+                * So share rcu_head with the less interesting memsw.
+                */
+               struct rcu_head rcu_freeing;
+               /*
+                * But when using vfree(), that cannot be done at
+                * interrupt time, so we must then queue the work.
+                */
+               struct work_struct work_freeing;
+       };
+
        /*
         * Per cgroup active and inactive list, similar to the
         * per zone LRU lists.
@@ -592,9 +610,9 @@ retry:
         * we will to add it back at the end of reclaim to its correct
         * position in the tree.
         */
-       __mem_cgroup_remove_exceeded(mz->mem, mz, mctz);
-       if (!res_counter_soft_limit_excess(&mz->mem->res) ||
-               !css_tryget(&mz->mem->css))
+       __mem_cgroup_remove_exceeded(mz->memcg, mz, mctz);
+       if (!res_counter_soft_limit_excess(&mz->memcg->res) ||
+               !css_tryget(&mz->memcg->css))
                goto retry;
 done:
        return mz;
@@ -701,14 +719,14 @@ mem_cgroup_zone_nr_lru_pages(struct mem_cgroup *memcg, int nid, int zid,
                        unsigned int lru_mask)
 {
        struct mem_cgroup_per_zone *mz;
-       enum lru_list l;
+       enum lru_list lru;
        unsigned long ret = 0;
 
        mz = mem_cgroup_zoneinfo(memcg, nid, zid);
 
-       for_each_lru(l) {
-               if (BIT(l) & lru_mask)
-                       ret += MEM_CGROUP_ZSTAT(mz, l);
+       for_each_lru(lru) {
+               if (BIT(lru) & lru_mask)
+                       ret += mz->lru_size[lru];
        }
        return ret;
 }
@@ -1042,9 +1060,22 @@ struct lruvec *mem_cgroup_lru_add_list(struct zone *zone, struct page *page,
 
        pc = lookup_page_cgroup(page);
        memcg = pc->mem_cgroup;
+
+       /*
+        * Surreptitiously switch any uncharged page to root:
+        * an uncharged page off lru does nothing to secure
+        * its former mem_cgroup from sudden removal.
+        *
+        * Our caller holds lru_lock, and PageCgroupUsed is updated
+        * under page_cgroup lock: between them, they make all uses
+        * of pc->mem_cgroup safe.
+        */
+       if (!PageCgroupUsed(pc) && memcg != root_mem_cgroup)
+               pc->mem_cgroup = memcg = root_mem_cgroup;
+
        mz = page_cgroup_zoneinfo(memcg, page);
        /* compound_order() is stabilized through lru_lock */
-       MEM_CGROUP_ZSTAT(mz, lru) += 1 << compound_order(page);
+       mz->lru_size[lru] += 1 << compound_order(page);
        return &mz->lruvec;
 }
 
@@ -1072,8 +1103,8 @@ void mem_cgroup_lru_del_list(struct page *page, enum lru_list lru)
        VM_BUG_ON(!memcg);
        mz = page_cgroup_zoneinfo(memcg, page);
        /* huge page split is done under lru_lock. so, we have no races. */
-       VM_BUG_ON(MEM_CGROUP_ZSTAT(mz, lru) < (1 << compound_order(page)));
-       MEM_CGROUP_ZSTAT(mz, lru) -= 1 << compound_order(page);
+       VM_BUG_ON(mz->lru_size[lru] < (1 << compound_order(page)));
+       mz->lru_size[lru] -= 1 << compound_order(page);
 }
 
 void mem_cgroup_lru_del(struct page *page)
@@ -1360,7 +1391,6 @@ void mem_cgroup_print_oom_info(struct mem_cgroup *memcg, struct task_struct *p)
        if (!memcg || !p)
                return;
 
-
        rcu_read_lock();
 
        mem_cgrp = memcg->css.cgroup;
@@ -1739,22 +1769,22 @@ static DEFINE_SPINLOCK(memcg_oom_lock);
 static DECLARE_WAIT_QUEUE_HEAD(memcg_oom_waitq);
 
 struct oom_wait_info {
-       struct mem_cgroup *mem;
+       struct mem_cgroup *memcg;
        wait_queue_t    wait;
 };
 
 static int memcg_oom_wake_function(wait_queue_t *wait,
        unsigned mode, int sync, void *arg)
 {
-       struct mem_cgroup *wake_memcg = (struct mem_cgroup *)arg,
-                         *oom_wait_memcg;
+       struct mem_cgroup *wake_memcg = (struct mem_cgroup *)arg;
+       struct mem_cgroup *oom_wait_memcg;
        struct oom_wait_info *oom_wait_info;
 
        oom_wait_info = container_of(wait, struct oom_wait_info, wait);
-       oom_wait_memcg = oom_wait_info->mem;
+       oom_wait_memcg = oom_wait_info->memcg;
 
        /*
-        * Both of oom_wait_info->mem and wake_mem are stable under us.
+        * Both of oom_wait_info->memcg and wake_memcg are stable under us.
         * Then we can use css_is_ancestor without taking care of RCU.
         */
        if (!mem_cgroup_same_or_subtree(oom_wait_memcg, wake_memcg)
@@ -1778,12 +1808,12 @@ static void memcg_oom_recover(struct mem_cgroup *memcg)
 /*
  * try to call OOM killer. returns false if we should exit memory-reclaim loop.
  */
-bool mem_cgroup_handle_oom(struct mem_cgroup *memcg, gfp_t mask)
+bool mem_cgroup_handle_oom(struct mem_cgroup *memcg, gfp_t mask, int order)
 {
        struct oom_wait_info owait;
        bool locked, need_to_kill;
 
-       owait.mem = memcg;
+       owait.memcg = memcg;
        owait.wait.flags = 0;
        owait.wait.func = memcg_oom_wake_function;
        owait.wait.private = current;
@@ -1808,7 +1838,7 @@ bool mem_cgroup_handle_oom(struct mem_cgroup *memcg, gfp_t mask)
 
        if (need_to_kill) {
                finish_wait(&memcg_oom_waitq, &owait.wait);
-               mem_cgroup_out_of_memory(memcg, mask);
+               mem_cgroup_out_of_memory(memcg, mask, order);
        } else {
                schedule();
                finish_wait(&memcg_oom_waitq, &owait.wait);
@@ -1895,7 +1925,6 @@ out:
        if (unlikely(need_unlock))
                move_unlock_page_cgroup(pc, &flags);
        rcu_read_unlock();
-       return;
 }
 EXPORT_SYMBOL(mem_cgroup_update_page_stat);
 
@@ -2179,7 +2208,7 @@ static int mem_cgroup_do_charge(struct mem_cgroup *memcg, gfp_t gfp_mask,
        if (!oom_check)
                return CHARGE_NOMEM;
        /* check OOM */
-       if (!mem_cgroup_handle_oom(mem_over_limit, gfp_mask))
+       if (!mem_cgroup_handle_oom(mem_over_limit, gfp_mask, get_order(csize)))
                return CHARGE_OOM_DIE;
 
        return CHARGE_RETRY;
@@ -2408,8 +2437,12 @@ static void __mem_cgroup_commit_charge(struct mem_cgroup *memcg,
                                       struct page *page,
                                       unsigned int nr_pages,
                                       struct page_cgroup *pc,
-                                      enum charge_type ctype)
+                                      enum charge_type ctype,
+                                      bool lrucare)
 {
+       struct zone *uninitialized_var(zone);
+       bool was_on_lru = false;
+
        lock_page_cgroup(pc);
        if (unlikely(PageCgroupUsed(pc))) {
                unlock_page_cgroup(pc);
@@ -2420,6 +2453,21 @@ static void __mem_cgroup_commit_charge(struct mem_cgroup *memcg,
         * we don't need page_cgroup_lock about tail pages, becase they are not
         * accessed by any other context at this point.
         */
+
+       /*
+        * In some cases, SwapCache and FUSE(splice_buf->radixtree), the page
+        * may already be on some other mem_cgroup's LRU.  Take care of it.
+        */
+       if (lrucare) {
+               zone = page_zone(page);
+               spin_lock_irq(&zone->lru_lock);
+               if (PageLRU(page)) {
+                       ClearPageLRU(page);
+                       del_page_from_lru_list(zone, page, page_lru(page));
+                       was_on_lru = true;
+               }
+       }
+
        pc->mem_cgroup = memcg;
        /*
         * We access a page_cgroup asynchronously without lock_page_cgroup().
@@ -2443,9 +2491,18 @@ static void __mem_cgroup_commit_charge(struct mem_cgroup *memcg,
                break;
        }
 
+       if (lrucare) {
+               if (was_on_lru) {
+                       VM_BUG_ON(PageLRU(page));
+                       SetPageLRU(page);
+                       add_page_to_lru_list(zone, page, page_lru(page));
+               }
+               spin_unlock_irq(&zone->lru_lock);
+       }
+
        mem_cgroup_charge_statistics(memcg, PageCgroupCache(pc), nr_pages);
        unlock_page_cgroup(pc);
-       WARN_ON_ONCE(PageLRU(page));
+
        /*
         * "charge_statistics" updated event counter. Then, check it.
         * Insert ancestor (and ancestor's ancestors), to softlimit RB-tree.
@@ -2643,7 +2700,7 @@ static int mem_cgroup_charge_common(struct page *page, struct mm_struct *mm,
        ret = __mem_cgroup_try_charge(mm, gfp_mask, nr_pages, &memcg, oom);
        if (ret == -ENOMEM)
                return ret;
-       __mem_cgroup_commit_charge(memcg, page, nr_pages, pc, ctype);
+       __mem_cgroup_commit_charge(memcg, page, nr_pages, pc, ctype, false);
        return 0;
 }
 
@@ -2663,35 +2720,6 @@ static void
 __mem_cgroup_commit_charge_swapin(struct page *page, struct mem_cgroup *ptr,
                                        enum charge_type ctype);
 
-static void
-__mem_cgroup_commit_charge_lrucare(struct page *page, struct mem_cgroup *memcg,
-                                       enum charge_type ctype)
-{
-       struct page_cgroup *pc = lookup_page_cgroup(page);
-       struct zone *zone = page_zone(page);
-       unsigned long flags;
-       bool removed = false;
-
-       /*
-        * In some case, SwapCache, FUSE(splice_buf->radixtree), the page
-        * is already on LRU. It means the page may on some other page_cgroup's
-        * LRU. Take care of it.
-        */
-       spin_lock_irqsave(&zone->lru_lock, flags);
-       if (PageLRU(page)) {
-               del_page_from_lru_list(zone, page, page_lru(page));
-               ClearPageLRU(page);
-               removed = true;
-       }
-       __mem_cgroup_commit_charge(memcg, page, 1, pc, ctype);
-       if (removed) {
-               add_page_to_lru_list(zone, page, page_lru(page));
-               SetPageLRU(page);
-       }
-       spin_unlock_irqrestore(&zone->lru_lock, flags);
-       return;
-}
-
 int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm,
                                gfp_t gfp_mask)
 {
@@ -2769,13 +2797,16 @@ static void
 __mem_cgroup_commit_charge_swapin(struct page *page, struct mem_cgroup *memcg,
                                        enum charge_type ctype)
 {
+       struct page_cgroup *pc;
+
        if (mem_cgroup_disabled())
                return;
        if (!memcg)
                return;
        cgroup_exclude_rmdir(&memcg->css);
 
-       __mem_cgroup_commit_charge_lrucare(page, memcg, ctype);
+       pc = lookup_page_cgroup(page);
+       __mem_cgroup_commit_charge(memcg, page, 1, pc, ctype, true);
        /*
         * Now swap is on-memory. This means this page may be
         * counted both as mem and swap....double count.
@@ -2879,7 +2910,6 @@ direct_uncharge:
                res_counter_uncharge(&memcg->memsw, nr_pages * PAGE_SIZE);
        if (unlikely(batch->memcg != memcg))
                memcg_oom_recover(memcg);
-       return;
 }
 
 /*
@@ -3027,23 +3057,6 @@ void mem_cgroup_uncharge_end(void)
        batch->memcg = NULL;
 }
 
-/*
- * A function for resetting pc->mem_cgroup for newly allocated pages.
- * This function should be called if the newpage will be added to LRU
- * before start accounting.
- */
-void mem_cgroup_reset_owner(struct page *newpage)
-{
-       struct page_cgroup *pc;
-
-       if (mem_cgroup_disabled())
-               return;
-
-       pc = lookup_page_cgroup(newpage);
-       VM_BUG_ON(PageCgroupUsed(pc));
-       pc->mem_cgroup = root_mem_cgroup;
-}
-
 #ifdef CONFIG_SWAP
 /*
  * called after __delete_from_swap_cache() and drop "page" account.
@@ -3248,7 +3261,7 @@ int mem_cgroup_prepare_migration(struct page *page,
                ctype = MEM_CGROUP_CHARGE_TYPE_CACHE;
        else
                ctype = MEM_CGROUP_CHARGE_TYPE_SHMEM;
-       __mem_cgroup_commit_charge(memcg, newpage, 1, pc, ctype);
+       __mem_cgroup_commit_charge(memcg, newpage, 1, pc, ctype, false);
        return ret;
 }
 
@@ -3332,7 +3345,7 @@ void mem_cgroup_replace_page_cache(struct page *oldpage,
         * the newpage may be on LRU(or pagevec for LRU) already. We lock
         * LRU while we overwrite pc->mem_cgroup.
         */
-       __mem_cgroup_commit_charge_lrucare(newpage, memcg, type);
+       __mem_cgroup_commit_charge(memcg, newpage, 1, pc, type, true);
 }
 
 #ifdef CONFIG_DEBUG_VM
@@ -3531,7 +3544,7 @@ unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order,
                        break;
 
                nr_scanned = 0;
-               reclaimed = mem_cgroup_soft_reclaim(mz->mem, zone,
+               reclaimed = mem_cgroup_soft_reclaim(mz->memcg, zone,
                                                    gfp_mask, &nr_scanned);
                nr_reclaimed += reclaimed;
                *total_scanned += nr_scanned;
@@ -3558,13 +3571,13 @@ unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order,
                                next_mz =
                                __mem_cgroup_largest_soft_limit_node(mctz);
                                if (next_mz == mz)
-                                       css_put(&next_mz->mem->css);
+                                       css_put(&next_mz->memcg->css);
                                else /* next_mz == NULL or other memcg */
                                        break;
                        } while (1);
                }
-               __mem_cgroup_remove_exceeded(mz->mem, mz, mctz);
-               excess = res_counter_soft_limit_excess(&mz->mem->res);
+               __mem_cgroup_remove_exceeded(mz->memcg, mz, mctz);
+               excess = res_counter_soft_limit_excess(&mz->memcg->res);
                /*
                 * One school of thought says that we should not add
                 * back the node to the tree if reclaim returns 0.
@@ -3574,9 +3587,9 @@ unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order,
                 * term TODO.
                 */
                /* If excess == 0, no tree ops */
-               __mem_cgroup_insert_exceeded(mz->mem, mz, mctz, excess);
+               __mem_cgroup_insert_exceeded(mz->memcg, mz, mctz, excess);
                spin_unlock(&mctz->lock);
-               css_put(&mz->mem->css);
+               css_put(&mz->memcg->css);
                loop++;
                /*
                 * Could not reclaim anything and there are no more
@@ -3589,7 +3602,7 @@ unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order,
                        break;
        } while (!nr_reclaimed);
        if (next_mz)
-               css_put(&next_mz->mem->css);
+               css_put(&next_mz->memcg->css);
        return nr_reclaimed;
 }
 
@@ -3611,7 +3624,7 @@ static int mem_cgroup_force_empty_list(struct mem_cgroup *memcg,
        mz = mem_cgroup_zoneinfo(memcg, node, zid);
        list = &mz->lruvec.lists[lru];
 
-       loop = MEM_CGROUP_ZSTAT(mz, lru);
+       loop = mz->lru_size[lru];
        /* give some margin against EBUSY etc...*/
        loop += 256;
        busy = NULL;
@@ -3685,10 +3698,10 @@ move_account:
                mem_cgroup_start_move(memcg);
                for_each_node_state(node, N_HIGH_MEMORY) {
                        for (zid = 0; !ret && zid < MAX_NR_ZONES; zid++) {
-                               enum lru_list l;
-                               for_each_lru(l) {
+                               enum lru_list lru;
+                               for_each_lru(lru) {
                                        ret = mem_cgroup_force_empty_list(memcg,
-                                                       node, zid, l);
+                                                       node, zid, lru);
                                        if (ret)
                                                break;
                                }
@@ -3921,7 +3934,6 @@ static void memcg_get_hierarchical_limit(struct mem_cgroup *memcg,
 out:
        *mem_limit = min_limit;
        *memsw_limit = min_memsw_limit;
-       return;
 }
 
 static int mem_cgroup_reset(struct cgroup *cont, unsigned int event)
@@ -4080,38 +4092,38 @@ static int mem_control_numa_stat_show(struct seq_file *m, void *arg)
        unsigned long total_nr, file_nr, anon_nr, unevictable_nr;
        unsigned long node_nr;
        struct cgroup *cont = m->private;
-       struct mem_cgroup *mem_cont = mem_cgroup_from_cont(cont);
+       struct mem_cgroup *memcg = mem_cgroup_from_cont(cont);
 
-       total_nr = mem_cgroup_nr_lru_pages(mem_cont, LRU_ALL);
+       total_nr = mem_cgroup_nr_lru_pages(memcg, LRU_ALL);
        seq_printf(m, "total=%lu", total_nr);
        for_each_node_state(nid, N_HIGH_MEMORY) {
-               node_nr = mem_cgroup_node_nr_lru_pages(mem_cont, nid, LRU_ALL);
+               node_nr = mem_cgroup_node_nr_lru_pages(memcg, nid, LRU_ALL);
                seq_printf(m, " N%d=%lu", nid, node_nr);
        }
        seq_putc(m, '\n');
 
-       file_nr = mem_cgroup_nr_lru_pages(mem_cont, LRU_ALL_FILE);
+       file_nr = mem_cgroup_nr_lru_pages(memcg, LRU_ALL_FILE);
        seq_printf(m, "file=%lu", file_nr);
        for_each_node_state(nid, N_HIGH_MEMORY) {
-               node_nr = mem_cgroup_node_nr_lru_pages(mem_cont, nid,
+               node_nr = mem_cgroup_node_nr_lru_pages(memcg, nid,
                                LRU_ALL_FILE);
                seq_printf(m, " N%d=%lu", nid, node_nr);
        }
        seq_putc(m, '\n');
 
-       anon_nr = mem_cgroup_nr_lru_pages(mem_cont, LRU_ALL_ANON);
+       anon_nr = mem_cgroup_nr_lru_pages(memcg, LRU_ALL_ANON);
        seq_printf(m, "anon=%lu", anon_nr);
        for_each_node_state(nid, N_HIGH_MEMORY) {
-               node_nr = mem_cgroup_node_nr_lru_pages(mem_cont, nid,
+               node_nr = mem_cgroup_node_nr_lru_pages(memcg, nid,
                                LRU_ALL_ANON);
                seq_printf(m, " N%d=%lu", nid, node_nr);
        }
        seq_putc(m, '\n');
 
-       unevictable_nr = mem_cgroup_nr_lru_pages(mem_cont, BIT(LRU_UNEVICTABLE));
+       unevictable_nr = mem_cgroup_nr_lru_pages(memcg, BIT(LRU_UNEVICTABLE));
        seq_printf(m, "unevictable=%lu", unevictable_nr);
        for_each_node_state(nid, N_HIGH_MEMORY) {
-               node_nr = mem_cgroup_node_nr_lru_pages(mem_cont, nid,
+               node_nr = mem_cgroup_node_nr_lru_pages(memcg, nid,
                                BIT(LRU_UNEVICTABLE));
                seq_printf(m, " N%d=%lu", nid, node_nr);
        }
@@ -4123,12 +4135,12 @@ static int mem_control_numa_stat_show(struct seq_file *m, void *arg)
 static int mem_control_stat_show(struct cgroup *cont, struct cftype *cft,
                                 struct cgroup_map_cb *cb)
 {
-       struct mem_cgroup *mem_cont = mem_cgroup_from_cont(cont);
+       struct mem_cgroup *memcg = mem_cgroup_from_cont(cont);
        struct mcs_total_stat mystat;
        int i;
 
        memset(&mystat, 0, sizeof(mystat));
-       mem_cgroup_get_local_stat(mem_cont, &mystat);
+       mem_cgroup_get_local_stat(memcg, &mystat);
 
 
        for (i = 0; i < NR_MCS_STAT; i++) {
@@ -4140,14 +4152,14 @@ static int mem_control_stat_show(struct cgroup *cont, struct cftype *cft,
        /* Hierarchical information */
        {
                unsigned long long limit, memsw_limit;
-               memcg_get_hierarchical_limit(mem_cont, &limit, &memsw_limit);
+               memcg_get_hierarchical_limit(memcg, &limit, &memsw_limit);
                cb->fill(cb, "hierarchical_memory_limit", limit);
                if (do_swap_account)
                        cb->fill(cb, "hierarchical_memsw_limit", memsw_limit);
        }
 
        memset(&mystat, 0, sizeof(mystat));
-       mem_cgroup_get_total_stat(mem_cont, &mystat);
+       mem_cgroup_get_total_stat(memcg, &mystat);
        for (i = 0; i < NR_MCS_STAT; i++) {
                if (i == MCS_SWAP && !do_swap_account)
                        continue;
@@ -4163,7 +4175,7 @@ static int mem_control_stat_show(struct cgroup *cont, struct cftype *cft,
 
                for_each_online_node(nid)
                        for (zid = 0; zid < MAX_NR_ZONES; zid++) {
-                               mz = mem_cgroup_zoneinfo(mem_cont, nid, zid);
+                               mz = mem_cgroup_zoneinfo(memcg, nid, zid);
 
                                recent_rotated[0] +=
                                        mz->reclaim_stat.recent_rotated[0];
@@ -4584,10 +4596,9 @@ static int register_kmem_files(struct cgroup *cont, struct cgroup_subsys *ss)
        return mem_cgroup_sockets_init(cont, ss);
 };
 
-static void kmem_cgroup_destroy(struct cgroup_subsys *ss,
-                               struct cgroup *cont)
+static void kmem_cgroup_destroy(struct cgroup *cont)
 {
-       mem_cgroup_sockets_destroy(cont, ss);
+       mem_cgroup_sockets_destroy(cont);
 }
 #else
 static int register_kmem_files(struct cgroup *cont, struct cgroup_subsys *ss)
@@ -4595,8 +4606,7 @@ static int register_kmem_files(struct cgroup *cont, struct cgroup_subsys *ss)
        return 0;
 }
 
-static void kmem_cgroup_destroy(struct cgroup_subsys *ss,
-                               struct cgroup *cont)
+static void kmem_cgroup_destroy(struct cgroup *cont)
 {
 }
 #endif
@@ -4720,7 +4730,7 @@ static int alloc_mem_cgroup_per_zone_info(struct mem_cgroup *memcg, int node)
 {
        struct mem_cgroup_per_node *pn;
        struct mem_cgroup_per_zone *mz;
-       enum lru_list l;
+       enum lru_list lru;
        int zone, tmp = node;
        /*
         * This routine is called against possible nodes.
@@ -4738,11 +4748,11 @@ static int alloc_mem_cgroup_per_zone_info(struct mem_cgroup *memcg, int node)
 
        for (zone = 0; zone < MAX_NR_ZONES; zone++) {
                mz = &pn->zoneinfo[zone];
-               for_each_lru(l)
-                       INIT_LIST_HEAD(&mz->lruvec.lists[l]);
+               for_each_lru(lru)
+                       INIT_LIST_HEAD(&mz->lruvec.lists[lru]);
                mz->usage_in_excess = 0;
                mz->on_tree = false;
-               mz->mem = memcg;
+               mz->memcg = memcg;
        }
        memcg->info.nodeinfo[node] = pn;
        return 0;
@@ -4755,33 +4765,54 @@ static void free_mem_cgroup_per_zone_info(struct mem_cgroup *memcg, int node)
 
 static struct mem_cgroup *mem_cgroup_alloc(void)
 {
-       struct mem_cgroup *mem;
+       struct mem_cgroup *memcg;
        int size = sizeof(struct mem_cgroup);
 
        /* Can be very big if MAX_NUMNODES is very big */
        if (size < PAGE_SIZE)
-               mem = kzalloc(size, GFP_KERNEL);
+               memcg = kzalloc(size, GFP_KERNEL);
        else
-               mem = vzalloc(size);
+               memcg = vzalloc(size);
 
-       if (!mem)
+       if (!memcg)
                return NULL;
 
-       mem->stat = alloc_percpu(struct mem_cgroup_stat_cpu);
-       if (!mem->stat)
+       memcg->stat = alloc_percpu(struct mem_cgroup_stat_cpu);
+       if (!memcg->stat)
                goto out_free;
-       spin_lock_init(&mem->pcp_counter_lock);
-       return mem;
+       spin_lock_init(&memcg->pcp_counter_lock);
+       return memcg;
 
 out_free:
        if (size < PAGE_SIZE)
-               kfree(mem);
+               kfree(memcg);
        else
-               vfree(mem);
+               vfree(memcg);
        return NULL;
 }
 
 /*
+ * Helpers for freeing a vzalloc()ed mem_cgroup by RCU,
+ * but in process context.  The work_freeing structure is overlaid
+ * on the rcu_freeing structure, which itself is overlaid on memsw.
+ */
+static void vfree_work(struct work_struct *work)
+{
+       struct mem_cgroup *memcg;
+
+       memcg = container_of(work, struct mem_cgroup, work_freeing);
+       vfree(memcg);
+}
+static void vfree_rcu(struct rcu_head *rcu_head)
+{
+       struct mem_cgroup *memcg;
+
+       memcg = container_of(rcu_head, struct mem_cgroup, rcu_freeing);
+       INIT_WORK(&memcg->work_freeing, vfree_work);
+       schedule_work(&memcg->work_freeing);
+}
+
+/*
  * At destroying mem_cgroup, references from swap_cgroup can remain.
  * (scanning all at force_empty is too costly...)
  *
@@ -4804,9 +4835,9 @@ static void __mem_cgroup_free(struct mem_cgroup *memcg)
 
        free_percpu(memcg->stat);
        if (sizeof(struct mem_cgroup) < PAGE_SIZE)
-               kfree(memcg);
+               kfree_rcu(memcg, rcu_freeing);
        else
-               vfree(memcg);
+               call_rcu(&memcg->rcu_freeing, vfree_rcu);
 }
 
 static void mem_cgroup_get(struct mem_cgroup *memcg)
@@ -4888,7 +4919,7 @@ err_cleanup:
 }
 
 static struct cgroup_subsys_state * __ref
-mem_cgroup_create(struct cgroup_subsys *ss, struct cgroup *cont)
+mem_cgroup_create(struct cgroup *cont)
 {
        struct mem_cgroup *memcg, *parent;
        long error = -ENOMEM;
@@ -4950,20 +4981,18 @@ free_out:
        return ERR_PTR(error);
 }
 
-static int mem_cgroup_pre_destroy(struct cgroup_subsys *ss,
-                                       struct cgroup *cont)
+static int mem_cgroup_pre_destroy(struct cgroup *cont)
 {
        struct mem_cgroup *memcg = mem_cgroup_from_cont(cont);
 
        return mem_cgroup_force_empty(memcg, false);
 }
 
-static void mem_cgroup_destroy(struct cgroup_subsys *ss,
-                               struct cgroup *cont)
+static void mem_cgroup_destroy(struct cgroup *cont)
 {
        struct mem_cgroup *memcg = mem_cgroup_from_cont(cont);
 
-       kmem_cgroup_destroy(ss, cont);
+       kmem_cgroup_destroy(cont);
 
        mem_cgroup_put(memcg);
 }
@@ -5195,6 +5224,8 @@ static int mem_cgroup_count_precharge_pte_range(pmd_t *pmd,
        spinlock_t *ptl;
 
        split_huge_page_pmd(walk->mm, pmd);
+       if (pmd_trans_unstable(pmd))
+               return 0;
 
        pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);
        for (; addr != end; pte++, addr += PAGE_SIZE)
@@ -5300,9 +5331,8 @@ static void mem_cgroup_clear_mc(void)
        mem_cgroup_end_move(from);
 }
 
-static int mem_cgroup_can_attach(struct cgroup_subsys *ss,
-                               struct cgroup *cgroup,
-                               struct cgroup_taskset *tset)
+static int mem_cgroup_can_attach(struct cgroup *cgroup,
+                                struct cgroup_taskset *tset)
 {
        struct task_struct *p = cgroup_taskset_first(tset);
        int ret = 0;
@@ -5340,9 +5370,8 @@ static int mem_cgroup_can_attach(struct cgroup_subsys *ss,
        return ret;
 }
 
-static void mem_cgroup_cancel_attach(struct cgroup_subsys *ss,
-                               struct cgroup *cgroup,
-                               struct cgroup_taskset *tset)
+static void mem_cgroup_cancel_attach(struct cgroup *cgroup,
+                                    struct cgroup_taskset *tset)
 {
        mem_cgroup_clear_mc();
 }
@@ -5357,6 +5386,8 @@ static int mem_cgroup_move_charge_pte_range(pmd_t *pmd,
        spinlock_t *ptl;
 
        split_huge_page_pmd(walk->mm, pmd);
+       if (pmd_trans_unstable(pmd))
+               return 0;
 retry:
        pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);
        for (; addr != end; addr += PAGE_SIZE) {
@@ -5457,9 +5488,8 @@ retry:
        up_read(&mm->mmap_sem);
 }
 
-static void mem_cgroup_move_task(struct cgroup_subsys *ss,
-                               struct cgroup *cont,
-                               struct cgroup_taskset *tset)
+static void mem_cgroup_move_task(struct cgroup *cont,
+                                struct cgroup_taskset *tset)
 {
        struct task_struct *p = cgroup_taskset_first(tset);
        struct mm_struct *mm = get_task_mm(p);
@@ -5474,20 +5504,17 @@ static void mem_cgroup_move_task(struct cgroup_subsys *ss,
                mem_cgroup_clear_mc();
 }
 #else  /* !CONFIG_MMU */
-static int mem_cgroup_can_attach(struct cgroup_subsys *ss,
-                               struct cgroup *cgroup,
-                               struct cgroup_taskset *tset)
+static int mem_cgroup_can_attach(struct cgroup *cgroup,
+                                struct cgroup_taskset *tset)
 {
        return 0;
 }
-static void mem_cgroup_cancel_attach(struct cgroup_subsys *ss,
-                               struct cgroup *cgroup,
-                               struct cgroup_taskset *tset)
+static void mem_cgroup_cancel_attach(struct cgroup *cgroup,
+                                    struct cgroup_taskset *tset)
 {
 }
-static void mem_cgroup_move_task(struct cgroup_subsys *ss,
-                               struct cgroup *cont,
-                               struct cgroup_taskset *tset)
+static void mem_cgroup_move_task(struct cgroup *cont,
+                                struct cgroup_taskset *tset)
 {
 }
 #endif