mm: memcontrol: switch to native NR_ANON_MAPPED counter
[platform/kernel/linux-starfive.git] / mm / huge_memory.c
index 6ecd104..2caf249 100644 (file)
@@ -594,7 +594,7 @@ static vm_fault_t __do_huge_pmd_anonymous_page(struct vm_fault *vmf,
 
        VM_BUG_ON_PAGE(!PageCompound(page), page);
 
-       if (mem_cgroup_try_charge_delay(page, vma->vm_mm, gfp, &memcg, true)) {
+       if (mem_cgroup_try_charge_delay(page, vma->vm_mm, gfp, &memcg)) {
                put_page(page);
                count_vm_event(THP_FAULT_FALLBACK);
                count_vm_event(THP_FAULT_FALLBACK_CHARGE);
@@ -630,7 +630,7 @@ static vm_fault_t __do_huge_pmd_anonymous_page(struct vm_fault *vmf,
                        vm_fault_t ret2;
 
                        spin_unlock(vmf->ptl);
-                       mem_cgroup_cancel_charge(page, memcg, true);
+                       mem_cgroup_cancel_charge(page, memcg);
                        put_page(page);
                        pte_free(vma->vm_mm, pgtable);
                        ret2 = handle_userfault(vmf, VM_UFFD_MISSING);
@@ -640,8 +640,8 @@ static vm_fault_t __do_huge_pmd_anonymous_page(struct vm_fault *vmf,
 
                entry = mk_huge_pmd(page, vma->vm_page_prot);
                entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma);
+               mem_cgroup_commit_charge(page, memcg, false);
                page_add_new_anon_rmap(page, vma, haddr, true);
-               mem_cgroup_commit_charge(page, memcg, false, true);
                lru_cache_add_active_or_unevictable(page, vma);
                pgtable_trans_huge_deposit(vma->vm_mm, vmf->pmd, pgtable);
                set_pmd_at(vma->vm_mm, haddr, vmf->pmd, entry);
@@ -658,7 +658,7 @@ unlock_release:
 release:
        if (pgtable)
                pte_free(vma->vm_mm, pgtable);
-       mem_cgroup_cancel_charge(page, memcg, true);
+       mem_cgroup_cancel_charge(page, memcg);
        put_page(page);
        return ret;
 
@@ -1255,273 +1255,72 @@ unlock:
        spin_unlock(vmf->ptl);
 }
 
-static vm_fault_t do_huge_pmd_wp_page_fallback(struct vm_fault *vmf,
-                       pmd_t orig_pmd, struct page *page)
-{
-       struct vm_area_struct *vma = vmf->vma;
-       unsigned long haddr = vmf->address & HPAGE_PMD_MASK;
-       struct mem_cgroup *memcg;
-       pgtable_t pgtable;
-       pmd_t _pmd;
-       int i;
-       vm_fault_t ret = 0;
-       struct page **pages;
-       struct mmu_notifier_range range;
-
-       pages = kmalloc_array(HPAGE_PMD_NR, sizeof(struct page *),
-                             GFP_KERNEL);
-       if (unlikely(!pages)) {
-               ret |= VM_FAULT_OOM;
-               goto out;
-       }
-
-       for (i = 0; i < HPAGE_PMD_NR; i++) {
-               pages[i] = alloc_page_vma_node(GFP_HIGHUSER_MOVABLE, vma,
-                                              vmf->address, page_to_nid(page));
-               if (unlikely(!pages[i] ||
-                            mem_cgroup_try_charge_delay(pages[i], vma->vm_mm,
-                                    GFP_KERNEL, &memcg, false))) {
-                       if (pages[i])
-                               put_page(pages[i]);
-                       while (--i >= 0) {
-                               memcg = (void *)page_private(pages[i]);
-                               set_page_private(pages[i], 0);
-                               mem_cgroup_cancel_charge(pages[i], memcg,
-                                               false);
-                               put_page(pages[i]);
-                       }
-                       kfree(pages);
-                       ret |= VM_FAULT_OOM;
-                       goto out;
-               }
-               set_page_private(pages[i], (unsigned long)memcg);
-       }
-
-       for (i = 0; i < HPAGE_PMD_NR; i++) {
-               copy_user_highpage(pages[i], page + i,
-                                  haddr + PAGE_SIZE * i, vma);
-               __SetPageUptodate(pages[i]);
-               cond_resched();
-       }
-
-       mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, vma, vma->vm_mm,
-                               haddr, haddr + HPAGE_PMD_SIZE);
-       mmu_notifier_invalidate_range_start(&range);
-
-       vmf->ptl = pmd_lock(vma->vm_mm, vmf->pmd);
-       if (unlikely(!pmd_same(*vmf->pmd, orig_pmd)))
-               goto out_free_pages;
-       VM_BUG_ON_PAGE(!PageHead(page), page);
-
-       /*
-        * Leave pmd empty until pte is filled note we must notify here as
-        * concurrent CPU thread might write to new page before the call to
-        * mmu_notifier_invalidate_range_end() happens which can lead to a
-        * device seeing memory write in different order than CPU.
-        *
-        * See Documentation/vm/mmu_notifier.rst
-        */
-       pmdp_huge_clear_flush_notify(vma, haddr, vmf->pmd);
-
-       pgtable = pgtable_trans_huge_withdraw(vma->vm_mm, vmf->pmd);
-       pmd_populate(vma->vm_mm, &_pmd, pgtable);
-
-       for (i = 0; i < HPAGE_PMD_NR; i++, haddr += PAGE_SIZE) {
-               pte_t entry;
-               entry = mk_pte(pages[i], vma->vm_page_prot);
-               entry = maybe_mkwrite(pte_mkdirty(entry), vma);
-               memcg = (void *)page_private(pages[i]);
-               set_page_private(pages[i], 0);
-               page_add_new_anon_rmap(pages[i], vmf->vma, haddr, false);
-               mem_cgroup_commit_charge(pages[i], memcg, false, false);
-               lru_cache_add_active_or_unevictable(pages[i], vma);
-               vmf->pte = pte_offset_map(&_pmd, haddr);
-               VM_BUG_ON(!pte_none(*vmf->pte));
-               set_pte_at(vma->vm_mm, haddr, vmf->pte, entry);
-               pte_unmap(vmf->pte);
-       }
-       kfree(pages);
-
-       smp_wmb(); /* make pte visible before pmd */
-       pmd_populate(vma->vm_mm, vmf->pmd, pgtable);
-       page_remove_rmap(page, true);
-       spin_unlock(vmf->ptl);
-
-       /*
-        * No need to double call mmu_notifier->invalidate_range() callback as
-        * the above pmdp_huge_clear_flush_notify() did already call it.
-        */
-       mmu_notifier_invalidate_range_only_end(&range);
-
-       ret |= VM_FAULT_WRITE;
-       put_page(page);
-
-out:
-       return ret;
-
-out_free_pages:
-       spin_unlock(vmf->ptl);
-       mmu_notifier_invalidate_range_end(&range);
-       for (i = 0; i < HPAGE_PMD_NR; i++) {
-               memcg = (void *)page_private(pages[i]);
-               set_page_private(pages[i], 0);
-               mem_cgroup_cancel_charge(pages[i], memcg, false);
-               put_page(pages[i]);
-       }
-       kfree(pages);
-       goto out;
-}
-
 vm_fault_t do_huge_pmd_wp_page(struct vm_fault *vmf, pmd_t orig_pmd)
 {
        struct vm_area_struct *vma = vmf->vma;
-       struct page *page = NULL, *new_page;
-       struct mem_cgroup *memcg;
+       struct page *page;
        unsigned long haddr = vmf->address & HPAGE_PMD_MASK;
-       struct mmu_notifier_range range;
-       gfp_t huge_gfp;                 /* for allocation and charge */
-       vm_fault_t ret = 0;
 
        vmf->ptl = pmd_lockptr(vma->vm_mm, vmf->pmd);
        VM_BUG_ON_VMA(!vma->anon_vma, vma);
+
        if (is_huge_zero_pmd(orig_pmd))
-               goto alloc;
+               goto fallback;
+
        spin_lock(vmf->ptl);
-       if (unlikely(!pmd_same(*vmf->pmd, orig_pmd)))
-               goto out_unlock;
+
+       if (unlikely(!pmd_same(*vmf->pmd, orig_pmd))) {
+               spin_unlock(vmf->ptl);
+               return 0;
+       }
 
        page = pmd_page(orig_pmd);
        VM_BUG_ON_PAGE(!PageCompound(page) || !PageHead(page), page);
-       /*
-        * We can only reuse the page if nobody else maps the huge page or it's
-        * part.
-        */
+
+       /* Lock page for reuse_swap_page() */
        if (!trylock_page(page)) {
                get_page(page);
                spin_unlock(vmf->ptl);
                lock_page(page);
                spin_lock(vmf->ptl);
                if (unlikely(!pmd_same(*vmf->pmd, orig_pmd))) {
+                       spin_unlock(vmf->ptl);
                        unlock_page(page);
                        put_page(page);
-                       goto out_unlock;
+                       return 0;
                }
                put_page(page);
        }
+
+       /*
+        * We can only reuse the page if nobody else maps the huge page or it's
+        * part.
+        */
        if (reuse_swap_page(page, NULL)) {
                pmd_t entry;
                entry = pmd_mkyoung(orig_pmd);
                entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma);
-               if (pmdp_set_access_flags(vma, haddr, vmf->pmd, entry,  1))
+               if (pmdp_set_access_flags(vma, haddr, vmf->pmd, entry, 1))
                        update_mmu_cache_pmd(vma, vmf->address, vmf->pmd);
-               ret |= VM_FAULT_WRITE;
                unlock_page(page);
-               goto out_unlock;
-       }
-       unlock_page(page);
-       get_page(page);
-       spin_unlock(vmf->ptl);
-alloc:
-       if (__transparent_hugepage_enabled(vma) &&
-           !transparent_hugepage_debug_cow()) {
-               huge_gfp = alloc_hugepage_direct_gfpmask(vma);
-               new_page = alloc_hugepage_vma(huge_gfp, vma, haddr, HPAGE_PMD_ORDER);
-       } else
-               new_page = NULL;
-
-       if (likely(new_page)) {
-               prep_transhuge_page(new_page);
-       } else {
-               if (!page) {
-                       split_huge_pmd(vma, vmf->pmd, vmf->address);
-                       ret |= VM_FAULT_FALLBACK;
-               } else {
-                       ret = do_huge_pmd_wp_page_fallback(vmf, orig_pmd, page);
-                       if (ret & VM_FAULT_OOM) {
-                               split_huge_pmd(vma, vmf->pmd, vmf->address);
-                               ret |= VM_FAULT_FALLBACK;
-                       }
-                       put_page(page);
-               }
-               count_vm_event(THP_FAULT_FALLBACK);
-               goto out;
-       }
-
-       if (unlikely(mem_cgroup_try_charge_delay(new_page, vma->vm_mm,
-                                       huge_gfp, &memcg, true))) {
-               put_page(new_page);
-               split_huge_pmd(vma, vmf->pmd, vmf->address);
-               if (page)
-                       put_page(page);
-               ret |= VM_FAULT_FALLBACK;
-               count_vm_event(THP_FAULT_FALLBACK);
-               count_vm_event(THP_FAULT_FALLBACK_CHARGE);
-               goto out;
-       }
-
-       count_vm_event(THP_FAULT_ALLOC);
-       count_memcg_events(memcg, THP_FAULT_ALLOC, 1);
-
-       if (!page)
-               clear_huge_page(new_page, vmf->address, HPAGE_PMD_NR);
-       else
-               copy_user_huge_page(new_page, page, vmf->address,
-                                   vma, HPAGE_PMD_NR);
-       __SetPageUptodate(new_page);
-
-       mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, vma, vma->vm_mm,
-                               haddr, haddr + HPAGE_PMD_SIZE);
-       mmu_notifier_invalidate_range_start(&range);
-
-       spin_lock(vmf->ptl);
-       if (page)
-               put_page(page);
-       if (unlikely(!pmd_same(*vmf->pmd, orig_pmd))) {
                spin_unlock(vmf->ptl);
-               mem_cgroup_cancel_charge(new_page, memcg, true);
-               put_page(new_page);
-               goto out_mn;
-       } else {
-               pmd_t entry;
-               entry = mk_huge_pmd(new_page, vma->vm_page_prot);
-               entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma);
-               pmdp_huge_clear_flush_notify(vma, haddr, vmf->pmd);
-               page_add_new_anon_rmap(new_page, vma, haddr, true);
-               mem_cgroup_commit_charge(new_page, memcg, false, true);
-               lru_cache_add_active_or_unevictable(new_page, vma);
-               set_pmd_at(vma->vm_mm, haddr, vmf->pmd, entry);
-               update_mmu_cache_pmd(vma, vmf->address, vmf->pmd);
-               if (!page) {
-                       add_mm_counter(vma->vm_mm, MM_ANONPAGES, HPAGE_PMD_NR);
-               } else {
-                       VM_BUG_ON_PAGE(!PageHead(page), page);
-                       page_remove_rmap(page, true);
-                       put_page(page);
-               }
-               ret |= VM_FAULT_WRITE;
+               return VM_FAULT_WRITE;
        }
+
+       unlock_page(page);
        spin_unlock(vmf->ptl);
-out_mn:
-       /*
-        * No need to double call mmu_notifier->invalidate_range() callback as
-        * the above pmdp_huge_clear_flush_notify() did already call it.
-        */
-       mmu_notifier_invalidate_range_only_end(&range);
-out:
-       return ret;
-out_unlock:
-       spin_unlock(vmf->ptl);
-       return ret;
+fallback:
+       __split_huge_pmd(vma, vmf->pmd, vmf->address, false, NULL);
+       return VM_FAULT_FALLBACK;
 }
 
 /*
- * FOLL_FORCE can write to even unwritable pmd's, but only
- * after we've gone through a COW cycle and they are dirty.
+ * FOLL_FORCE or a forced COW break can write even to unwritable pmd's,
+ * but only after we've gone through a COW cycle and they are dirty.
  */
 static inline bool can_follow_write_pmd(pmd_t pmd, unsigned int flags)
 {
-       return pmd_write(pmd) ||
-              ((flags & FOLL_FORCE) && (flags & FOLL_COW) && pmd_dirty(pmd));
+       return pmd_write(pmd) || ((flags & FOLL_COW) && pmd_dirty(pmd));
 }
 
 struct page *follow_trans_huge_pmd(struct vm_area_struct *vma,