return 0;
}
-static int memory_failure_hugetlb(unsigned long pfn, int flags)
+/*
+ * Called from hugetlb code with hugetlb_lock held.
+ *
+ * Return values:
+ * 0 - free hugepage
+ * 1 - in-use hugepage
+ * 2 - not a hugepage
+ * -EBUSY - the hugepage is busy (try to retry)
+ * -EHWPOISON - the hugepage is already hwpoisoned
+ */
+int __get_huge_page_for_hwpoison(unsigned long pfn, int flags)
+{
+ struct page *page = pfn_to_page(pfn);
+ struct page *head = compound_head(page);
+ int ret = 2; /* fallback to normal page handling */
+ bool count_increased = false;
+
+ if (!PageHeadHuge(head))
+ goto out;
+
+ if (flags & MF_COUNT_INCREASED) {
+ ret = 1;
+ count_increased = true;
+ } else if (HPageFreed(head) || HPageMigratable(head)) {
+ ret = get_page_unless_zero(head);
+ if (ret)
+ count_increased = true;
+ } else {
+ ret = -EBUSY;
+ goto out;
+ }
+
+ if (TestSetPageHWPoison(head)) {
+ ret = -EHWPOISON;
+ goto out;
+ }
+
+ return ret;
+out:
+ if (count_increased)
+ put_page(head);
+ return ret;
+}
+
+#ifdef CONFIG_HUGETLB_PAGE
+/*
+ * Taking refcount of hugetlb pages needs extra care about race conditions
+ * with basic operations like hugepage allocation/free/demotion.
+ * So some of prechecks for hwpoison (pinning, and testing/setting
+ * PageHWPoison) should be done in single hugetlb_lock range.
+ */
+static int try_memory_failure_hugetlb(unsigned long pfn, int flags, int *hugetlb)
{
- struct page *p = pfn_to_page(pfn);
- struct page *head = compound_head(p);
int res;
+ struct page *p = pfn_to_page(pfn);
+ struct page *head;
unsigned long page_flags;
+ bool retry = true;
- if (TestSetPageHWPoison(head)) {
- pr_err("Memory failure: %#lx: already hardware poisoned\n",
- pfn);
- res = -EHWPOISON;
- if (flags & MF_ACTION_REQUIRED)
+ *hugetlb = 1;
+retry:
+ res = get_huge_page_for_hwpoison(pfn, flags);
+ if (res == 2) { /* fallback to normal page handling */
+ *hugetlb = 0;
+ return 0;
+ } else if (res == -EHWPOISON) {
+ pr_err("Memory failure: %#lx: already hardware poisoned\n", pfn);
+ if (flags & MF_ACTION_REQUIRED) {
+ head = compound_head(p);
res = kill_accessing_process(current, page_to_pfn(head), flags);
+ }
return res;
+ } else if (res == -EBUSY) {
+ if (retry) {
+ retry = false;
+ goto retry;
+ }
+ action_result(pfn, MF_MSG_UNKNOWN, MF_IGNORED);
+ return res;
+ }
+
+ head = compound_head(p);
+ lock_page(head);
+
+ if (hwpoison_filter(p)) {
+ ClearPageHWPoison(head);
+ res = -EOPNOTSUPP;
+ goto out;
}
num_poisoned_pages_inc();
- if (!(flags & MF_COUNT_INCREASED)) {
- res = get_hwpoison_page(p, flags);
- if (!res) {
- lock_page(head);
- if (hwpoison_filter(p)) {
- if (TestClearPageHWPoison(head))
- num_poisoned_pages_dec();
- unlock_page(head);
- return -EOPNOTSUPP;
- }
- unlock_page(head);
- res = MF_FAILED;
- if (__page_handle_poison(p)) {
- page_ref_inc(p);
- res = MF_RECOVERED;
- }
- action_result(pfn, MF_MSG_FREE_HUGE, res);
- return res == MF_RECOVERED ? 0 : -EBUSY;
- } else if (res < 0) {
- action_result(pfn, MF_MSG_UNKNOWN, MF_IGNORED);
- return -EBUSY;
+ /*
+ * Handling free hugepage. The possible race with hugepage allocation
+ * or demotion can be prevented by PageHWPoison flag.
+ */
+ if (res == 0) {
+ unlock_page(head);
+ res = MF_FAILED;
+ if (__page_handle_poison(p)) {
+ page_ref_inc(p);
+ res = MF_RECOVERED;
}
+ action_result(pfn, MF_MSG_FREE_HUGE, res);
+ return res == MF_RECOVERED ? 0 : -EBUSY;
}
- lock_page(head);
-
/*
* The page could have changed compound pages due to race window.
* If this happens just bail out.
page_flags = head->flags;
- if (hwpoison_filter(p)) {
- if (TestClearPageHWPoison(head))
- num_poisoned_pages_dec();
- put_page(p);
- res = -EOPNOTSUPP;
- goto out;
- }
-
/*
* TODO: hwpoison for pud-sized hugetlb doesn't work right now, so
* simply disable it. In order to make it work properly, we need
unlock_page(head);
return res;
}
+#else
+static inline int try_memory_failure_hugetlb(unsigned long pfn, int flags, int *hugetlb)
+{
+ return 0;
+}
+#endif
static int memory_failure_dev_pagemap(unsigned long pfn, int flags,
struct dev_pagemap *pgmap)
int res = 0;
unsigned long page_flags;
bool retry = true;
+ int hugetlb = 0;
if (!sysctl_memory_failure_recovery)
panic("Memory failure on page %lx", pfn);
}
try_again:
- if (PageHuge(p)) {
- res = memory_failure_hugetlb(pfn, flags);
+ res = try_memory_failure_hugetlb(pfn, flags, &hugetlb);
+ if (hugetlb)
goto unlock_mutex;
- }
if (TestSetPageHWPoison(p)) {
pr_err("Memory failure: %#lx: already hardware poisoned\n",