* on behalf of the thread group. Return task_struct of the (first found)
* dedicated thread if found, and return NULL otherwise.
*
- * We already hold read_lock(&tasklist_lock) in the caller, so we don't
- * have to call rcu_read_lock/unlock() in this function.
+ * We already hold rcu lock in the caller, so we don't have to call
+ * rcu_read_lock/unlock() in this function.
*/
static struct task_struct *find_early_kill_thread(struct task_struct *tsk)
{
/*
* Collect processes when the error hit an anonymous page.
*/
-static void collect_procs_anon(struct page *page, struct list_head *to_kill,
- int force_early)
+static void collect_procs_anon(struct folio *folio, struct page *page,
+ struct list_head *to_kill, int force_early)
{
- struct folio *folio = page_folio(page);
struct vm_area_struct *vma;
struct task_struct *tsk;
struct anon_vma *av;
return;
pgoff = page_to_pgoff(page);
- read_lock(&tasklist_lock);
+ rcu_read_lock();
for_each_process(tsk) {
struct anon_vma_chain *vmac;
struct task_struct *t = task_early_kill(tsk, force_early);
add_to_kill_anon_file(t, page, vma, to_kill);
}
}
- read_unlock(&tasklist_lock);
+ rcu_read_unlock();
anon_vma_unlock_read(av);
}
/*
* Collect processes when the error hit a file mapped page.
*/
-static void collect_procs_file(struct page *page, struct list_head *to_kill,
- int force_early)
+static void collect_procs_file(struct folio *folio, struct page *page,
+ struct list_head *to_kill, int force_early)
{
struct vm_area_struct *vma;
struct task_struct *tsk;
- struct address_space *mapping = page->mapping;
+ struct address_space *mapping = folio->mapping;
pgoff_t pgoff;
i_mmap_lock_read(mapping);
- read_lock(&tasklist_lock);
+ rcu_read_lock();
pgoff = page_to_pgoff(page);
for_each_process(tsk) {
struct task_struct *t = task_early_kill(tsk, force_early);
add_to_kill_anon_file(t, page, vma, to_kill);
}
}
- read_unlock(&tasklist_lock);
+ rcu_read_unlock();
i_mmap_unlock_read(mapping);
}
struct task_struct *tsk;
i_mmap_lock_read(mapping);
- read_lock(&tasklist_lock);
+ rcu_read_lock();
for_each_process(tsk) {
struct task_struct *t = task_early_kill(tsk, true);
add_to_kill_fsdax(t, page, vma, to_kill, pgoff);
}
}
- read_unlock(&tasklist_lock);
+ rcu_read_unlock();
i_mmap_unlock_read(mapping);
}
#endif /* CONFIG_FS_DAX */
/*
* Collect the processes who have the corrupted page mapped to kill.
*/
-static void collect_procs(struct page *page, struct list_head *tokill,
- int force_early)
+static void collect_procs(struct folio *folio, struct page *page,
+ struct list_head *tokill, int force_early)
{
- if (!page->mapping)
+ if (!folio->mapping)
return;
if (unlikely(PageKsm(page)))
collect_procs_ksm(page, tokill, force_early);
else if (PageAnon(page))
- collect_procs_anon(page, tokill, force_early);
+ collect_procs_anon(folio, page, tokill, force_early);
else
- collect_procs_file(page, tokill, force_early);
+ collect_procs_file(folio, page, tokill, force_early);
}
-struct hwp_walk {
+struct hwpoison_walk {
struct to_kill tk;
unsigned long pfn;
int flags;
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
static int check_hwpoisoned_pmd_entry(pmd_t *pmdp, unsigned long addr,
- struct hwp_walk *hwp)
+ struct hwpoison_walk *hwp)
{
pmd_t pmd = *pmdp;
unsigned long pfn;
}
#else
static int check_hwpoisoned_pmd_entry(pmd_t *pmdp, unsigned long addr,
- struct hwp_walk *hwp)
+ struct hwpoison_walk *hwp)
{
return 0;
}
static int hwpoison_pte_range(pmd_t *pmdp, unsigned long addr,
unsigned long end, struct mm_walk *walk)
{
- struct hwp_walk *hwp = walk->private;
+ struct hwpoison_walk *hwp = walk->private;
int ret = 0;
pte_t *ptep, *mapped_pte;
spinlock_t *ptl;
unsigned long addr, unsigned long end,
struct mm_walk *walk)
{
- struct hwp_walk *hwp = walk->private;
+ struct hwpoison_walk *hwp = walk->private;
pte_t pte = huge_ptep_get(ptep);
struct hstate *h = hstate_vma(walk->vma);
#define hwpoison_hugetlb_range NULL
#endif
-static const struct mm_walk_ops hwp_walk_ops = {
+static const struct mm_walk_ops hwpoison_walk_ops = {
.pmd_entry = hwpoison_pte_range,
.hugetlb_entry = hwpoison_hugetlb_range,
.walk_lock = PGWALK_RDLOCK,
int flags)
{
int ret;
- struct hwp_walk priv = {
+ struct hwpoison_walk priv = {
.pfn = pfn,
};
priv.tk.tsk = p;
return -EFAULT;
mmap_read_lock(p->mm);
- ret = walk_page_range(p->mm, 0, TASK_SIZE, &hwp_walk_ops,
+ ret = walk_page_range(p->mm, 0, TASK_SIZE, &hwpoison_walk_ops,
(void *)&priv);
if (ret == 1 && priv.tk.addr)
kill_proc(&priv.tk, pfn, flags);
* Here we are interested only in user-mapped pages, so skip any
* other types of pages.
*/
- if (PageReserved(p) || PageSlab(p) || PageTable(p))
+ if (PageReserved(p) || PageSlab(p) || PageTable(p) || PageOffline(p))
return true;
if (!(PageLRU(hpage) || PageHuge(p)))
return true;
* This check implies we don't kill processes if their pages
* are in the swap cache early. Those are always late kills.
*/
- if (!page_mapped(hpage))
+ if (!page_mapped(p))
return true;
if (PageSwapCache(p)) {
* mapped in dirty form. This has to be done before try_to_unmap,
* because ttu takes the rmap data structures down.
*/
- collect_procs(hpage, &tokill, flags & MF_ACTION_REQUIRED);
+ collect_procs(folio, p, &tokill, flags & MF_ACTION_REQUIRED);
if (PageHuge(hpage) && !PageAnon(hpage)) {
/*
try_to_unmap(folio, ttu);
}
- unmap_success = !page_mapped(hpage);
+ unmap_success = !page_mapped(p);
if (!unmap_success)
pr_err("%#lx: failed to unmap page (mapcount=%d)\n",
- pfn, page_mapcount(hpage));
+ pfn, page_mapcount(p));
/*
* try_to_unmap() might put mlocked page in lru cache, so call
* mapping being torn down is communicated in siginfo, see
* kill_proc()
*/
- loff_t start = (index << PAGE_SHIFT) & ~(size - 1);
+ loff_t start = ((loff_t)index << PAGE_SHIFT) & ~(size - 1);
unmap_mapping_range(mapping, start, size, 0);
}
kill_procs(to_kill, flags & MF_MUST_KILL, false, pfn, flags);
}
+/*
+ * Only dev_pagemap pages get here, such as fsdax when the filesystem
+ * either do not claim or fails to claim a hwpoison event, or devdax.
+ * The fsdax pages are initialized per base page, and the devdax pages
+ * could be initialized either as base pages, or as compound pages with
+ * vmemmap optimization enabled. Devdax is simplistic in its dealing with
+ * hwpoison, such that, if a subpage of a compound page is poisoned,
+ * simply mark the compound head page is by far sufficient.
+ */
static int mf_generic_kill_procs(unsigned long long pfn, int flags,
struct dev_pagemap *pgmap)
{
- struct page *page = pfn_to_page(pfn);
+ struct folio *folio = pfn_folio(pfn);
LIST_HEAD(to_kill);
dax_entry_t cookie;
int rc = 0;
/*
- * Pages instantiated by device-dax (not filesystem-dax)
- * may be compound pages.
- */
- page = compound_head(page);
-
- /*
* Prevent the inode from being freed while we are interrogating
* the address_space, typically this would be handled by
* lock_page(), but dax pages do not use the page lock. This
* also prevents changes to the mapping of this pfn until
* poison signaling is complete.
*/
- cookie = dax_lock_page(page);
+ cookie = dax_lock_folio(folio);
if (!cookie)
return -EBUSY;
- if (hwpoison_filter(page)) {
+ if (hwpoison_filter(&folio->page)) {
rc = -EOPNOTSUPP;
goto unlock;
}
* Use this flag as an indication that the dax page has been
* remapped UC to prevent speculative consumption of poison.
*/
- SetPageHWPoison(page);
+ SetPageHWPoison(&folio->page);
/*
* Unlike System-RAM there is no possibility to swap in a
* SIGBUS (i.e. MF_MUST_KILL)
*/
flags |= MF_ACTION_REQUIRED | MF_MUST_KILL;
- collect_procs(page, &to_kill, true);
+ collect_procs(folio, &folio->page, &to_kill, true);
- unmap_and_kill(&to_kill, pfn, page->mapping, page->index, flags);
+ unmap_and_kill(&to_kill, pfn, folio->mapping, folio->index, flags);
unlock:
- dax_unlock_page(page, cookie);
+ dax_unlock_folio(folio, cookie);
return rc;
}
goto unlock_mutex;
}
- if (folio_test_slab(folio) || PageTable(&folio->page) || folio_test_reserved(folio))
+ if (folio_test_slab(folio) || PageTable(&folio->page) ||
+ folio_test_reserved(folio) || PageOffline(&folio->page))
goto unlock_mutex;
/*