pmd_t maybe_pmd_mkwrite(pmd_t pmd, struct vm_area_struct *vma)
{
if (likely(vma->vm_flags & VM_WRITE))
- pmd = pmd_mkwrite(pmd);
+ pmd = pmd_mkwrite(pmd, vma);
return pmd;
}
}
#endif
-void prep_transhuge_page(struct page *page)
+void folio_prep_large_rmappable(struct folio *folio)
{
- struct folio *folio = (struct folio *)page;
-
VM_BUG_ON_FOLIO(folio_order(folio) < 2, folio);
INIT_LIST_HEAD(&folio->_deferred_list);
- folio_set_compound_dtor(folio, TRANSHUGE_PAGE_DTOR);
+ folio_set_large_rmappable(folio);
}
-static inline bool is_transparent_hugepage(struct page *page)
+static inline bool is_transparent_hugepage(struct folio *folio)
{
- struct folio *folio;
-
- if (!PageCompound(page))
+ if (!folio_test_large(folio))
return false;
- folio = page_folio(page);
return is_huge_zero_page(&folio->page) ||
- folio->_folio_dtor == TRANSHUGE_PAGE_DTOR;
+ folio_test_large_rmappable(folio);
}
static unsigned long __thp_get_unmapped_area(struct file *filp,
pmd = pmd_modify(oldpmd, vma->vm_page_prot);
pmd = pmd_mkyoung(pmd);
if (writable)
- pmd = pmd_mkwrite(pmd);
+ pmd = pmd_mkwrite(pmd, vma);
set_pmd_at(vma->vm_mm, haddr, vmf->pmd, pmd);
update_mmu_cache_pmd(vma, vmf->address, vmf->pmd);
spin_unlock(vmf->ptl);
*/
orig_pmd = pmdp_huge_get_and_clear_full(vma, addr, pmd,
tlb->fullmm);
+ arch_check_zapped_pmd(vma, orig_pmd);
tlb_remove_pmd_tlb_entry(tlb, pmd, addr);
if (vma_is_special_huge(vma)) {
if (arch_needs_pgtable_deposit())
/* See change_pte_range(). */
if ((cp_flags & MM_CP_TRY_CHANGE_WRITABLE) && !pmd_write(entry) &&
can_change_pmd_writable(vma, addr, entry))
- entry = pmd_mkwrite(entry);
+ entry = pmd_mkwrite(entry, vma);
ret = HPAGE_PMD_NR;
set_pmd_at(mm, addr, pmd, entry);
if (!ptl)
return 0;
- pudp_huge_get_and_clear_full(tlb->mm, addr, pud, tlb->fullmm);
+ pudp_huge_get_and_clear_full(vma, addr, pud, tlb->fullmm);
tlb_remove_pud_tlb_entry(tlb, pud, addr);
if (vma_is_special_huge(vma)) {
spin_unlock(ptl);
count_vm_event(THP_SPLIT_PUD);
- pudp_huge_clear_flush_notify(vma, haddr, pud);
+ pudp_huge_clear_flush(vma, haddr, pud);
}
void __split_huge_pud(struct vm_area_struct *vma, pud_t *pud,
out:
spin_unlock(ptl);
- /*
- * No need to double call mmu_notifier->invalidate_range() callback as
- * the above pudp_huge_clear_flush_notify() did already call it.
- */
- mmu_notifier_invalidate_range_only_end(&range);
+ mmu_notifier_invalidate_range_end(&range);
}
#endif /* CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD */
count_vm_event(THP_SPLIT_PMD);
if (!vma_is_anonymous(vma)) {
- old_pmd = pmdp_huge_clear_flush_notify(vma, haddr, pmd);
+ old_pmd = pmdp_huge_clear_flush(vma, haddr, pmd);
/*
* We are going to unmap this huge page. So
* just go ahead and zap it
if (is_huge_zero_pmd(*pmd)) {
/*
* FIXME: Do we want to invalidate secondary mmu by calling
- * mmu_notifier_invalidate_range() see comments below inside
- * __split_huge_pmd() ?
+ * mmu_notifier_arch_invalidate_secondary_tlbs() see comments below
+ * inside __split_huge_pmd() ?
*
* We are going from a zero huge page write protected to zero
* small page also write protected so it does not seems useful
} else {
entry = mk_pte(page + i, READ_ONCE(vma->vm_page_prot));
if (write)
- entry = pte_mkwrite(entry);
+ entry = pte_mkwrite(entry, vma);
if (anon_exclusive)
SetPageAnonExclusive(page + i);
if (!young)
entry = pte_mksoft_dirty(entry);
if (uffd_wp)
entry = pte_mkuffd_wp(entry);
- page_add_anon_rmap(page + i, vma, addr, false);
+ page_add_anon_rmap(page + i, vma, addr, RMAP_NONE);
}
VM_BUG_ON(!pte_none(ptep_get(pte)));
set_pte_at(mm, addr, pte, entry);
out:
spin_unlock(ptl);
- /*
- * No need to double call mmu_notifier->invalidate_range() callback.
- * They are 3 cases to consider inside __split_huge_pmd_locked():
- * 1) pmdp_huge_clear_flush_notify() call invalidate_range() obvious
- * 2) __split_huge_zero_page_pmd() read only zero page and any write
- * fault will trigger a flush_notify before pointing to a new page
- * (it is fine if the secondary mmu keeps pointing to the old zero
- * page in the meantime)
- * 3) Split a huge pmd into pte pointing to the same page. No need
- * to invalidate secondary tlb entry they are all still valid.
- * any further changes to individual pte will notify. So no need
- * to call mmu_notifier->invalidate_range()
- */
- mmu_notifier_invalidate_range_only_end(&range);
+ mmu_notifier_invalidate_range_end(&range);
}
void split_huge_pmd_address(struct vm_area_struct *vma, unsigned long address,
}
}
-static void __split_huge_page_tail(struct page *head, int tail,
+static void __split_huge_page_tail(struct folio *folio, int tail,
struct lruvec *lruvec, struct list_head *list)
{
+ struct page *head = &folio->page;
struct page *page_tail = head + tail;
+ /*
+ * Careful: new_folio is not a "real" folio before we cleared PageTail.
+ * Don't pass it around before clear_compound_head().
+ */
+ struct folio *new_folio = (struct folio *)page_tail;
VM_BUG_ON_PAGE(atomic_read(&page_tail->_mapcount) != -1, page_tail);
page_tail->index = head->index + tail;
/*
- * page->private should not be set in tail pages with the exception
- * of swap cache pages that store the swp_entry_t in tail pages.
- * Fix up and warn once if private is unexpectedly set.
- *
- * What of 32-bit systems, on which folio->_pincount overlays
- * head[1].private? No problem: THP_SWAP is not enabled on 32-bit, and
- * pincount must be 0 for folio_ref_freeze() to have succeeded.
+ * page->private should not be set in tail pages. Fix up and warn once
+ * if private is unexpectedly set.
*/
- if (!folio_test_swapcache(page_folio(head))) {
- VM_WARN_ON_ONCE_PAGE(page_tail->private != 0, page_tail);
+ if (unlikely(page_tail->private)) {
+ VM_WARN_ON_ONCE_PAGE(true, page_tail);
page_tail->private = 0;
}
+ if (folio_test_swapcache(folio))
+ new_folio->swap.val = folio->swap.val + tail;
/* Page flags must be visible before we make the page non-compound. */
smp_wmb();
struct address_space *swap_cache = NULL;
unsigned long offset = 0;
unsigned int nr = thp_nr_pages(head);
- int i;
+ int i, nr_dropped = 0;
/* complete memcg works before add pages to LRU */
split_page_memcg(head, nr);
- if (PageAnon(head) && PageSwapCache(head)) {
- swp_entry_t entry = { .val = page_private(head) };
-
- offset = swp_offset(entry);
- swap_cache = swap_address_space(entry);
+ if (folio_test_anon(folio) && folio_test_swapcache(folio)) {
+ offset = swp_offset(folio->swap);
+ swap_cache = swap_address_space(folio->swap);
xa_lock(&swap_cache->i_pages);
}
ClearPageHasHWPoisoned(head);
for (i = nr - 1; i >= 1; i--) {
- __split_huge_page_tail(head, i, lruvec, list);
+ __split_huge_page_tail(folio, i, lruvec, list);
/* Some pages can be beyond EOF: drop them from page cache */
if (head[i].index >= end) {
struct folio *tail = page_folio(head + i);
if (shmem_mapping(head->mapping))
- shmem_uncharge(head->mapping->host, 1);
+ nr_dropped++;
else if (folio_test_clear_dirty(tail))
folio_account_cleaned(tail,
inode_to_wb(folio->mapping->host));
}
local_irq_enable();
+ if (nr_dropped)
+ shmem_uncharge(head->mapping->host, nr_dropped);
remap_page(folio, nr);
- if (PageSwapCache(head)) {
- swp_entry_t entry = { .val = page_private(head) };
-
- split_swap_cluster(entry);
- }
+ if (folio_test_swapcache(folio))
+ split_swap_cluster(folio->swap);
for (i = 0; i < nr; i++) {
struct page *subpage = head + i;
gfp = current_gfp_context(mapping_gfp_mask(mapping) &
GFP_RECLAIM_MASK);
- if (folio_test_private(folio) &&
- !filemap_release_folio(folio, gfp)) {
+ if (!filemap_release_folio(folio, gfp)) {
ret = -EBUSY;
goto out;
}
return ret;
}
-void free_transhuge_page(struct page *page)
+void folio_undo_large_rmappable(struct folio *folio)
{
- struct folio *folio = (struct folio *)page;
- struct deferred_split *ds_queue = get_deferred_split_queue(folio);
+ struct deferred_split *ds_queue;
unsigned long flags;
/*
* deferred_list. If folio is not in deferred_list, it's safe
* to check without acquiring the split_queue_lock.
*/
- if (data_race(!list_empty(&folio->_deferred_list))) {
- spin_lock_irqsave(&ds_queue->split_queue_lock, flags);
- if (!list_empty(&folio->_deferred_list)) {
- ds_queue->split_queue_len--;
- list_del(&folio->_deferred_list);
- }
- spin_unlock_irqrestore(&ds_queue->split_queue_lock, flags);
+ if (data_race(list_empty(&folio->_deferred_list)))
+ return;
+
+ ds_queue = get_deferred_split_queue(folio);
+ spin_lock_irqsave(&ds_queue->split_queue_lock, flags);
+ if (!list_empty(&folio->_deferred_list)) {
+ ds_queue->split_queue_len--;
+ list_del(&folio->_deferred_list);
}
- free_compound_page(page);
+ spin_unlock_irqrestore(&ds_queue->split_queue_lock, flags);
}
void deferred_split_folio(struct folio *folio)
for (addr = vaddr_start; addr < vaddr_end; addr += PAGE_SIZE) {
struct vm_area_struct *vma = vma_lookup(mm, addr);
struct page *page;
+ struct folio *folio;
if (!vma)
break;
if (IS_ERR_OR_NULL(page))
continue;
- if (!is_transparent_hugepage(page))
+ folio = page_folio(page);
+ if (!is_transparent_hugepage(folio))
goto next;
total++;
- if (!can_split_folio(page_folio(page), NULL))
+ if (!can_split_folio(folio, NULL))
goto next;
- if (!trylock_page(page))
+ if (!folio_trylock(folio))
goto next;
- if (!split_huge_page(page))
+ if (!split_folio(folio))
split++;
- unlock_page(page);
+ folio_unlock(folio);
next:
- put_page(page);
+ folio_put(folio);
cond_resched();
}
mmap_read_unlock(mm);
if (pmd_swp_soft_dirty(*pvmw->pmd))
pmde = pmd_mksoft_dirty(pmde);
if (is_writable_migration_entry(entry))
- pmde = pmd_mkwrite(pmde);
+ pmde = pmd_mkwrite(pmde, vma);
if (pmd_swp_uffd_wp(*pvmw->pmd))
pmde = pmd_mkuffd_wp(pmde);
if (!is_migration_entry_young(entry))