page = alloc_buddy_huge_page_with_mpol(h, vma, addr);
if (!page)
goto out_uncharge_cgroup;
+ spin_lock_irq(&hugetlb_lock);
if (!avoid_reserve && vma_has_reserves(vma, gbl_chg)) {
SetHPageRestoreReserve(page);
h->resv_huge_pages--;
}
- spin_lock_irq(&hugetlb_lock);
list_add(&page->lru, &h->hugepage_activelist);
/* Fall through */
}
unsigned long haddr,
unsigned long reason)
{
- vm_fault_t ret;
u32 hash;
struct vm_fault vmf = {
.vma = vma,
};
/*
- * hugetlb_fault_mutex and i_mmap_rwsem must be
- * dropped before handling userfault. Reacquire
- * after handling fault to make calling code simpler.
+ * vma_lock and hugetlb_fault_mutex must be dropped before handling
+ * userfault. Also mmap_lock will be dropped during handling
+ * userfault, any vma operation should be careful from here.
*/
hash = hugetlb_fault_mutex_hash(mapping, idx);
mutex_unlock(&hugetlb_fault_mutex_table[hash]);
i_mmap_unlock_read(mapping);
- ret = handle_userfault(&vmf, reason);
- i_mmap_lock_read(mapping);
- mutex_lock(&hugetlb_fault_mutex_table[hash]);
-
- return ret;
+ return handle_userfault(&vmf, reason);
}
static vm_fault_t hugetlb_no_page(struct mm_struct *mm,
spinlock_t *ptl;
unsigned long haddr = address & huge_page_mask(h);
bool new_page, new_pagecache_page = false;
+ u32 hash = hugetlb_fault_mutex_hash(mapping, idx);
/*
* Currently, we are forced to kill the process in the event the
if (is_vma_resv_set(vma, HPAGE_RESV_UNMAPPED)) {
pr_warn_ratelimited("PID %d killed due to inadequate hugepage pool\n",
current->pid);
- return ret;
+ goto out;
}
/*
page = find_lock_page(mapping, idx);
if (!page) {
/* Check for page in userfault range */
- if (userfaultfd_missing(vma)) {
- ret = hugetlb_handle_userfault(vma, mapping, idx,
+ if (userfaultfd_missing(vma))
+ return hugetlb_handle_userfault(vma, mapping, idx,
flags, haddr,
VM_UFFD_MISSING);
- goto out;
- }
page = alloc_huge_page(vma, haddr, 0);
if (IS_ERR(page)) {
if (userfaultfd_minor(vma)) {
unlock_page(page);
put_page(page);
- ret = hugetlb_handle_userfault(vma, mapping, idx,
+ return hugetlb_handle_userfault(vma, mapping, idx,
flags, haddr,
VM_UFFD_MINOR);
- goto out;
}
}
unlock_page(page);
out:
+ mutex_unlock(&hugetlb_fault_mutex_table[hash]);
+ i_mmap_unlock_read(mapping);
return ret;
backout:
mutex_lock(&hugetlb_fault_mutex_table[hash]);
entry = huge_ptep_get(ptep);
- if (huge_pte_none(entry)) {
- ret = hugetlb_no_page(mm, vma, mapping, idx, address, ptep, flags);
- goto out_mutex;
- }
+ if (huge_pte_none(entry))
+ /*
+ * hugetlb_no_page will drop vma lock and hugetlb fault
+ * mutex internally, which make us return immediately.
+ */
+ return hugetlb_no_page(mm, vma, mapping, idx, address, ptep, flags);
ret = 0;
ptl = huge_pte_lockptr(h, dst_mm, dst_pte);
spin_lock(ptl);
+ ret = -EIO;
+ if (PageHWPoison(page))
+ goto out_release_unlock;
+
/*
* Recheck the i_size after holding PT lock to make sure not
* to leave any page mapped (as page_mapped()) beyond the end
if (!huge_pte_none(huge_ptep_get(dst_pte)))
goto out_release_unlock;
- if (vm_shared) {
+ if (page_in_pagecache) {
page_dup_rmap(page, true);
} else {
ClearHPageRestoreReserve(page);
}
struct page * __weak
-follow_huge_pmd(struct mm_struct *mm, unsigned long address,
- pmd_t *pmd, int flags)
+follow_huge_pmd_pte(struct vm_area_struct *vma, unsigned long address, int flags)
{
+ struct hstate *h = hstate_vma(vma);
+ struct mm_struct *mm = vma->vm_mm;
struct page *page = NULL;
spinlock_t *ptl;
- pte_t pte;
+ pte_t *ptep, pte;
/* FOLL_GET and FOLL_PIN are mutually exclusive. */
if (WARN_ON_ONCE((flags & (FOLL_PIN | FOLL_GET)) ==
return NULL;
retry:
- ptl = pmd_lockptr(mm, pmd);
- spin_lock(ptl);
- /*
- * make sure that the address range covered by this pmd is not
- * unmapped from other threads.
- */
- if (!pmd_huge(*pmd))
- goto out;
- pte = huge_ptep_get((pte_t *)pmd);
+ ptep = huge_pte_offset(mm, address, huge_page_size(h));
+ if (!ptep)
+ return NULL;
+
+ ptl = huge_pte_lock(h, mm, ptep);
+ pte = huge_ptep_get(ptep);
if (pte_present(pte)) {
- page = pmd_page(*pmd) + ((address & ~PMD_MASK) >> PAGE_SHIFT);
+ page = pte_page(pte) +
+ ((address & ~huge_page_mask(h)) >> PAGE_SHIFT);
/*
* try_grab_page() should always succeed here, because: a) we
* hold the pmd (ptl) lock, and b) we've just checked that the
} else {
if (is_hugetlb_entry_migration(pte)) {
spin_unlock(ptl);
- __migration_entry_wait(mm, (pte_t *)pmd, ptl);
+ __migration_entry_wait(mm, ptep, ptl);
goto retry;
}
/*