X-Git-Url: http://review.tizen.org/git/?a=blobdiff_plain;f=mm%2Fhuge_memory.c;h=de8d5cfc2bf2f9dffd873a4aef44fa69d4ba76a8;hb=f123d74abf91574837d14e5ea58f6a779a387bf5;hp=d66010e0049d1b982c702881769a18b73fcff10e;hpb=26935fb06ee88f1188789807687c03041f3c70d9;p=platform%2Fadaptation%2Frenesas_rcar%2Frenesas_kernel.git diff --git a/mm/huge_memory.c b/mm/huge_memory.c index d66010e..de8d5cf 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -695,11 +695,10 @@ pmd_t maybe_pmd_mkwrite(pmd_t pmd, struct vm_area_struct *vma) return pmd; } -static inline pmd_t mk_huge_pmd(struct page *page, struct vm_area_struct *vma) +static inline pmd_t mk_huge_pmd(struct page *page, pgprot_t prot) { pmd_t entry; - entry = mk_pmd(page, vma->vm_page_prot); - entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma); + entry = mk_pmd(page, prot); entry = pmd_mkhuge(entry); return entry; } @@ -732,7 +731,8 @@ static int __do_huge_pmd_anonymous_page(struct mm_struct *mm, pte_free(mm, pgtable); } else { pmd_t entry; - entry = mk_huge_pmd(page, vma); + entry = mk_huge_pmd(page, vma->vm_page_prot); + entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma); page_add_new_anon_rmap(page, vma, haddr); pgtable_trans_huge_deposit(mm, pmd, pgtable); set_pmd_at(mm, haddr, pmd, entry); @@ -788,77 +788,57 @@ int do_huge_pmd_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma, { struct page *page; unsigned long haddr = address & HPAGE_PMD_MASK; - pte_t *pte; - if (haddr >= vma->vm_start && haddr + HPAGE_PMD_SIZE <= vma->vm_end) { - if (unlikely(anon_vma_prepare(vma))) - return VM_FAULT_OOM; - if (unlikely(khugepaged_enter(vma))) + if (haddr < vma->vm_start || haddr + HPAGE_PMD_SIZE > vma->vm_end) + return VM_FAULT_FALLBACK; + if (unlikely(anon_vma_prepare(vma))) + return VM_FAULT_OOM; + if (unlikely(khugepaged_enter(vma))) + return VM_FAULT_OOM; + if (!(flags & FAULT_FLAG_WRITE) && + transparent_hugepage_use_zero_page()) { + pgtable_t pgtable; + struct page *zero_page; + bool set; + pgtable = pte_alloc_one(mm, haddr); + if (unlikely(!pgtable)) return VM_FAULT_OOM; - if (!(flags & FAULT_FLAG_WRITE) && - transparent_hugepage_use_zero_page()) { - pgtable_t pgtable; - struct page *zero_page; - bool set; - pgtable = pte_alloc_one(mm, haddr); - if (unlikely(!pgtable)) - return VM_FAULT_OOM; - zero_page = get_huge_zero_page(); - if (unlikely(!zero_page)) { - pte_free(mm, pgtable); - count_vm_event(THP_FAULT_FALLBACK); - goto out; - } - spin_lock(&mm->page_table_lock); - set = set_huge_zero_page(pgtable, mm, vma, haddr, pmd, - zero_page); - spin_unlock(&mm->page_table_lock); - if (!set) { - pte_free(mm, pgtable); - put_huge_zero_page(); - } - return 0; - } - page = alloc_hugepage_vma(transparent_hugepage_defrag(vma), - vma, haddr, numa_node_id(), 0); - if (unlikely(!page)) { + zero_page = get_huge_zero_page(); + if (unlikely(!zero_page)) { + pte_free(mm, pgtable); count_vm_event(THP_FAULT_FALLBACK); - goto out; - } - count_vm_event(THP_FAULT_ALLOC); - if (unlikely(mem_cgroup_newpage_charge(page, mm, GFP_KERNEL))) { - put_page(page); - goto out; + return VM_FAULT_FALLBACK; } - if (unlikely(__do_huge_pmd_anonymous_page(mm, vma, haddr, pmd, - page))) { - mem_cgroup_uncharge_page(page); - put_page(page); - goto out; + spin_lock(&mm->page_table_lock); + set = set_huge_zero_page(pgtable, mm, vma, haddr, pmd, + zero_page); + spin_unlock(&mm->page_table_lock); + if (!set) { + pte_free(mm, pgtable); + put_huge_zero_page(); } - return 0; } -out: - /* - * Use __pte_alloc instead of pte_alloc_map, because we can't - * run pte_offset_map on the pmd, if an huge pmd could - * materialize from under us from a different thread. - */ - if (unlikely(pmd_none(*pmd)) && - unlikely(__pte_alloc(mm, vma, pmd, address))) - return VM_FAULT_OOM; - /* if an huge pmd materialized from under us just retry later */ - if (unlikely(pmd_trans_huge(*pmd))) - return 0; - /* - * A regular pmd is established and it can't morph into a huge pmd - * from under us anymore at this point because we hold the mmap_sem - * read mode and khugepaged takes it in write mode. So now it's - * safe to run pte_offset_map(). - */ - pte = pte_offset_map(pmd, address); - return handle_pte_fault(mm, vma, address, pte, pmd, flags); + page = alloc_hugepage_vma(transparent_hugepage_defrag(vma), + vma, haddr, numa_node_id(), 0); + if (unlikely(!page)) { + count_vm_event(THP_FAULT_FALLBACK); + return VM_FAULT_FALLBACK; + } + if (unlikely(mem_cgroup_newpage_charge(page, mm, GFP_KERNEL))) { + put_page(page); + count_vm_event(THP_FAULT_FALLBACK); + return VM_FAULT_FALLBACK; + } + if (unlikely(__do_huge_pmd_anonymous_page(mm, vma, haddr, pmd, page))) { + mem_cgroup_uncharge_page(page); + put_page(page); + count_vm_event(THP_FAULT_FALLBACK); + return VM_FAULT_FALLBACK; + } + + count_vm_event(THP_FAULT_ALLOC); + return 0; } int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm, @@ -1170,7 +1150,6 @@ alloc: new_page = NULL; if (unlikely(!new_page)) { - count_vm_event(THP_FAULT_FALLBACK); if (is_huge_zero_pmd(orig_pmd)) { ret = do_huge_pmd_wp_zero_page_fallback(mm, vma, address, pmd, orig_pmd, haddr); @@ -1181,9 +1160,9 @@ alloc: split_huge_page(page); put_page(page); } + count_vm_event(THP_FAULT_FALLBACK); goto out; } - count_vm_event(THP_FAULT_ALLOC); if (unlikely(mem_cgroup_newpage_charge(new_page, mm, GFP_KERNEL))) { put_page(new_page); @@ -1191,10 +1170,13 @@ alloc: split_huge_page(page); put_page(page); } + count_vm_event(THP_FAULT_FALLBACK); ret |= VM_FAULT_OOM; goto out; } + count_vm_event(THP_FAULT_ALLOC); + if (is_huge_zero_pmd(orig_pmd)) clear_huge_page(new_page, haddr, HPAGE_PMD_NR); else @@ -1215,7 +1197,8 @@ alloc: goto out_mn; } else { pmd_t entry; - entry = mk_huge_pmd(new_page, vma); + entry = mk_huge_pmd(new_page, vma->vm_page_prot); + entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma); pmdp_clear_flush(vma, haddr, pmd); page_add_new_anon_rmap(new_page, vma, haddr); set_pmd_at(mm, haddr, pmd, entry); @@ -1295,64 +1278,90 @@ out: int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma, unsigned long addr, pmd_t pmd, pmd_t *pmdp) { + struct anon_vma *anon_vma = NULL; struct page *page; unsigned long haddr = addr & HPAGE_PMD_MASK; + int page_nid = -1, this_nid = numa_node_id(); int target_nid; - int current_nid = -1; - bool migrated; + bool page_locked; + bool migrated = false; spin_lock(&mm->page_table_lock); if (unlikely(!pmd_same(pmd, *pmdp))) goto out_unlock; page = pmd_page(pmd); - get_page(page); - current_nid = page_to_nid(page); + page_nid = page_to_nid(page); count_vm_numa_event(NUMA_HINT_FAULTS); - if (current_nid == numa_node_id()) + if (page_nid == this_nid) count_vm_numa_event(NUMA_HINT_FAULTS_LOCAL); + /* + * Acquire the page lock to serialise THP migrations but avoid dropping + * page_table_lock if at all possible + */ + page_locked = trylock_page(page); target_nid = mpol_misplaced(page, vma, haddr); if (target_nid == -1) { - put_page(page); - goto clear_pmdnuma; + /* If the page was locked, there are no parallel migrations */ + if (page_locked) + goto clear_pmdnuma; + + /* + * Otherwise wait for potential migrations and retry. We do + * relock and check_same as the page may no longer be mapped. + * As the fault is being retried, do not account for it. + */ + spin_unlock(&mm->page_table_lock); + wait_on_page_locked(page); + page_nid = -1; + goto out; } - /* Acquire the page lock to serialise THP migrations */ + /* Page is misplaced, serialise migrations and parallel THP splits */ + get_page(page); spin_unlock(&mm->page_table_lock); - lock_page(page); + if (!page_locked) + lock_page(page); + anon_vma = page_lock_anon_vma_read(page); - /* Confirm the PTE did not while locked */ + /* Confirm the PMD did not change while page_table_lock was released */ spin_lock(&mm->page_table_lock); if (unlikely(!pmd_same(pmd, *pmdp))) { unlock_page(page); put_page(page); + page_nid = -1; goto out_unlock; } - spin_unlock(&mm->page_table_lock); - /* Migrate the THP to the requested node */ + /* + * Migrate the THP to the requested node, returns with page unlocked + * and pmd_numa cleared. + */ + spin_unlock(&mm->page_table_lock); migrated = migrate_misplaced_transhuge_page(mm, vma, pmdp, pmd, addr, page, target_nid); - if (!migrated) - goto check_same; - - task_numa_fault(target_nid, HPAGE_PMD_NR, true); - return 0; + if (migrated) + page_nid = target_nid; -check_same: - spin_lock(&mm->page_table_lock); - if (unlikely(!pmd_same(pmd, *pmdp))) - goto out_unlock; + goto out; clear_pmdnuma: + BUG_ON(!PageLocked(page)); pmd = pmd_mknonnuma(pmd); set_pmd_at(mm, haddr, pmdp, pmd); VM_BUG_ON(pmd_numa(*pmdp)); update_mmu_cache_pmd(vma, addr, pmdp); + unlock_page(page); out_unlock: spin_unlock(&mm->page_table_lock); - if (current_nid != -1) - task_numa_fault(current_nid, HPAGE_PMD_NR, false); + +out: + if (anon_vma) + page_unlock_anon_vma_read(anon_vma); + + if (page_nid != -1) + task_numa_fault(page_nid, HPAGE_PMD_NR, migrated); + return 0; } @@ -1449,6 +1458,12 @@ out: return ret; } +/* + * Returns + * - 0 if PMD could not be locked + * - 1 if PMD was locked but protections unchange and TLB flush unnecessary + * - HPAGE_PMD_NR is protections changed and TLB flush necessary + */ int change_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd, unsigned long addr, pgprot_t newprot, int prot_numa) { @@ -1457,9 +1472,11 @@ int change_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd, if (__pmd_trans_huge_lock(pmd, vma) == 1) { pmd_t entry; - entry = pmdp_get_and_clear(mm, addr, pmd); + ret = 1; if (!prot_numa) { + entry = pmdp_get_and_clear(mm, addr, pmd); entry = pmd_modify(entry, newprot); + ret = HPAGE_PMD_NR; BUG_ON(pmd_write(entry)); } else { struct page *page = pmd_page(*pmd); @@ -1467,12 +1484,17 @@ int change_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd, /* only check non-shared pages */ if (page_mapcount(page) == 1 && !pmd_numa(*pmd)) { + entry = pmdp_get_and_clear(mm, addr, pmd); entry = pmd_mknuma(entry); + ret = HPAGE_PMD_NR; } } - set_pmd_at(mm, addr, pmd, entry); + + /* Set PMD if cleared earlier */ + if (ret == HPAGE_PMD_NR) + set_pmd_at(mm, addr, pmd, entry); + spin_unlock(&vma->vm_mm->page_table_lock); - ret = 1; } return ret; @@ -1666,7 +1688,6 @@ static void __split_huge_page_refcount(struct page *page, BUG_ON(atomic_read(&page->_count) <= 0); __mod_zone_page_state(zone, NR_ANON_TRANSPARENT_HUGEPAGES, -1); - __mod_zone_page_state(zone, NR_ANON_PAGES, HPAGE_PMD_NR); ClearPageCompound(page); compound_unlock(page); @@ -2364,7 +2385,8 @@ static void collapse_huge_page(struct mm_struct *mm, __SetPageUptodate(new_page); pgtable = pmd_pgtable(_pmd); - _pmd = mk_huge_pmd(new_page, vma); + _pmd = mk_huge_pmd(new_page, vma->vm_page_prot); + _pmd = maybe_pmd_mkwrite(pmd_mkdirty(_pmd), vma); /* * spin_lock() below is not the equivalent of smp_wmb(), so