memcg: correctly order reading PCG_USED and pc->mem_cgroup
[platform/adaptation/renesas_rcar/renesas_kernel.git] / mm / memcontrol.c
index 7a94ef6..db76ef7 100644 (file)
@@ -836,13 +836,12 @@ void mem_cgroup_rotate_lru_list(struct page *page, enum lru_list lru)
                return;
 
        pc = lookup_page_cgroup(page);
-       /*
-        * Used bit is set without atomic ops but after smp_wmb().
-        * For making pc->mem_cgroup visible, insert smp_rmb() here.
-        */
-       smp_rmb();
        /* unused or root page is not rotated. */
-       if (!PageCgroupUsed(pc) || mem_cgroup_is_root(pc->mem_cgroup))
+       if (!PageCgroupUsed(pc))
+               return;
+       /* Ensure pc->mem_cgroup is visible after reading PCG_USED. */
+       smp_rmb();
+       if (mem_cgroup_is_root(pc->mem_cgroup))
                return;
        mz = page_cgroup_zoneinfo(pc);
        list_move(&pc->lru, &mz->lists[lru]);
@@ -857,14 +856,10 @@ void mem_cgroup_add_lru_list(struct page *page, enum lru_list lru)
                return;
        pc = lookup_page_cgroup(page);
        VM_BUG_ON(PageCgroupAcctLRU(pc));
-       /*
-        * Used bit is set without atomic ops but after smp_wmb().
-        * For making pc->mem_cgroup visible, insert smp_rmb() here.
-        */
-       smp_rmb();
        if (!PageCgroupUsed(pc))
                return;
-
+       /* Ensure pc->mem_cgroup is visible after reading PCG_USED. */
+       smp_rmb();
        mz = page_cgroup_zoneinfo(pc);
        /* huge page split is done under lru_lock. so, we have no races. */
        MEM_CGROUP_ZSTAT(mz, lru) += 1 << compound_order(page);
@@ -1031,14 +1026,10 @@ mem_cgroup_get_reclaim_stat_from_page(struct page *page)
                return NULL;
 
        pc = lookup_page_cgroup(page);
-       /*
-        * Used bit is set without atomic ops but after smp_wmb().
-        * For making pc->mem_cgroup visible, insert smp_rmb() here.
-        */
-       smp_rmb();
        if (!PageCgroupUsed(pc))
                return NULL;
-
+       /* Ensure pc->mem_cgroup is visible after reading PCG_USED. */
+       smp_rmb();
        mz = page_cgroup_zoneinfo(pc);
        if (!mz)
                return NULL;
@@ -2197,8 +2188,11 @@ void mem_cgroup_split_huge_fixup(struct page *head, struct page *tail)
  */
 
 static void __mem_cgroup_move_account(struct page_cgroup *pc,
-       struct mem_cgroup *from, struct mem_cgroup *to, bool uncharge)
+       struct mem_cgroup *from, struct mem_cgroup *to, bool uncharge,
+       int charge_size)
 {
+       int nr_pages = charge_size >> PAGE_SHIFT;
+
        VM_BUG_ON(from == to);
        VM_BUG_ON(PageLRU(pc->page));
        VM_BUG_ON(!page_is_cgroup_locked(pc));
@@ -2212,14 +2206,14 @@ static void __mem_cgroup_move_account(struct page_cgroup *pc,
                __this_cpu_inc(to->stat->count[MEM_CGROUP_STAT_FILE_MAPPED]);
                preempt_enable();
        }
-       mem_cgroup_charge_statistics(from, PageCgroupCache(pc), -1);
+       mem_cgroup_charge_statistics(from, PageCgroupCache(pc), -nr_pages);
        if (uncharge)
                /* This is not "cancel", but cancel_charge does all we need. */
-               mem_cgroup_cancel_charge(from, PAGE_SIZE);
+               mem_cgroup_cancel_charge(from, charge_size);
 
        /* caller should have done css_get */
        pc->mem_cgroup = to;
-       mem_cgroup_charge_statistics(to, PageCgroupCache(pc), 1);
+       mem_cgroup_charge_statistics(to, PageCgroupCache(pc), nr_pages);
        /*
         * We charges against "to" which may not have any tasks. Then, "to"
         * can be under rmdir(). But in current implementation, caller of
@@ -2234,15 +2228,19 @@ static void __mem_cgroup_move_account(struct page_cgroup *pc,
  * __mem_cgroup_move_account()
  */
 static int mem_cgroup_move_account(struct page_cgroup *pc,
-               struct mem_cgroup *from, struct mem_cgroup *to, bool uncharge)
+               struct mem_cgroup *from, struct mem_cgroup *to,
+               bool uncharge, int charge_size)
 {
        int ret = -EINVAL;
        unsigned long flags;
 
+       if ((charge_size > PAGE_SIZE) && !PageTransHuge(pc->page))
+               return -EBUSY;
+
        lock_page_cgroup(pc);
        if (PageCgroupUsed(pc) && pc->mem_cgroup == from) {
                move_lock_page_cgroup(pc, &flags);
-               __mem_cgroup_move_account(pc, from, to, uncharge);
+               __mem_cgroup_move_account(pc, from, to, uncharge, charge_size);
                move_unlock_page_cgroup(pc, &flags);
                ret = 0;
        }
@@ -2267,6 +2265,8 @@ static int mem_cgroup_move_parent(struct page_cgroup *pc,
        struct cgroup *cg = child->css.cgroup;
        struct cgroup *pcg = cg->parent;
        struct mem_cgroup *parent;
+       int charge = PAGE_SIZE;
+       unsigned long flags;
        int ret;
 
        /* Is ROOT ? */
@@ -2278,17 +2278,23 @@ static int mem_cgroup_move_parent(struct page_cgroup *pc,
                goto out;
        if (isolate_lru_page(page))
                goto put;
+       /* The page is isolated from LRU and we have no race with splitting */
+       charge = PAGE_SIZE << compound_order(page);
 
        parent = mem_cgroup_from_cont(pcg);
-       ret = __mem_cgroup_try_charge(NULL, gfp_mask, &parent, false,
-                                     PAGE_SIZE);
+       ret = __mem_cgroup_try_charge(NULL, gfp_mask, &parent, false, charge);
        if (ret || !parent)
                goto put_back;
 
-       ret = mem_cgroup_move_account(pc, child, parent, true);
+       if (charge > PAGE_SIZE)
+               flags = compound_lock_irqsave(page);
+
+       ret = mem_cgroup_move_account(pc, child, parent, true, charge);
        if (ret)
-               mem_cgroup_cancel_charge(parent, PAGE_SIZE);
+               mem_cgroup_cancel_charge(parent, charge);
 put_back:
+       if (charge > PAGE_SIZE)
+               compound_unlock_irqrestore(page, flags);
        putback_lru_page(page);
 put:
        put_page(page);
@@ -4868,7 +4874,7 @@ retry:
                                goto put;
                        pc = lookup_page_cgroup(page);
                        if (!mem_cgroup_move_account(pc,
-                                               mc.from, mc.to, false)) {
+                                       mc.from, mc.to, false, PAGE_SIZE)) {
                                mc.precharge--;
                                /* we uncharge from mc.from later. */
                                mc.moved_charge++;