mm/memory-failure.c: fix race with changing page compound again
authorMiaohe Lin <linmiaohe@huawei.com>
Tue, 22 Mar 2022 21:44:44 +0000 (14:44 -0700)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Tue, 12 Jul 2022 14:35:05 +0000 (16:35 +0200)
[ Upstream commit 888af2701db79b9b27c7e37f9ede528a5ca53b76 ]

Patch series "A few fixup patches for memory failure", v2.

This series contains a few patches to fix the race with changing page
compound page, make non-LRU movable pages unhandlable and so on.  More
details can be found in the respective changelogs.

There is a race window where we got the compound_head, the hugetlb page
could be freed to buddy, or even changed to another compound page just
before we try to get hwpoison page.  Think about the below race window:

  CPU 1   CPU 2
  memory_failure_hugetlb
  struct page *head = compound_head(p);
  hugetlb page might be freed to
  buddy, or even changed to another
  compound page.

  get_hwpoison_page -- page is not what we want now...

If this race happens, just bail out.  Also MF_MSG_DIFFERENT_PAGE_SIZE is
introduced to record this event.

[akpm@linux-foundation.org: s@/**@/*@, per Naoya Horiguchi]

Link: https://lkml.kernel.org/r/20220312074613.4798-1-linmiaohe@huawei.com
Link: https://lkml.kernel.org/r/20220312074613.4798-2-linmiaohe@huawei.com
Signed-off-by: Miaohe Lin <linmiaohe@huawei.com>
Acked-by: Naoya Horiguchi <naoya.horiguchi@nec.com>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Mike Kravetz <mike.kravetz@oracle.com>
Cc: Yang Shi <shy828301@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Sasha Levin <sashal@kernel.org>
include/linux/mm.h
include/ras/ras_event.h
mm/memory-failure.c

index 85205ad..7a80a08 100644 (file)
@@ -3167,6 +3167,7 @@ enum mf_action_page_type {
        MF_MSG_BUDDY_2ND,
        MF_MSG_DAX,
        MF_MSG_UNSPLIT_THP,
+       MF_MSG_DIFFERENT_PAGE_SIZE,
        MF_MSG_UNKNOWN,
 };
 
index 0bdbc0d..cac13ff 100644 (file)
@@ -376,6 +376,7 @@ TRACE_EVENT(aer_event,
        EM ( MF_MSG_BUDDY_2ND, "free buddy page (2nd try)" )            \
        EM ( MF_MSG_DAX, "dax page" )                                   \
        EM ( MF_MSG_UNSPLIT_THP, "unsplit thp" )                        \
+       EM ( MF_MSG_DIFFERENT_PAGE_SIZE, "different page size" )        \
        EMe ( MF_MSG_UNKNOWN, "unknown page" )
 
 /*
index 5664baf..a4d70c2 100644 (file)
@@ -741,6 +741,7 @@ static const char * const action_page_types[] = {
        [MF_MSG_BUDDY_2ND]              = "free buddy page (2nd try)",
        [MF_MSG_DAX]                    = "dax page",
        [MF_MSG_UNSPLIT_THP]            = "unsplit thp",
+       [MF_MSG_DIFFERENT_PAGE_SIZE]    = "different page size",
        [MF_MSG_UNKNOWN]                = "unknown page",
 };
 
@@ -1461,6 +1462,17 @@ static int memory_failure_hugetlb(unsigned long pfn, int flags)
        }
 
        lock_page(head);
+
+       /*
+        * The page could have changed compound pages due to race window.
+        * If this happens just bail out.
+        */
+       if (!PageHuge(p) || compound_head(p) != head) {
+               action_result(pfn, MF_MSG_DIFFERENT_PAGE_SIZE, MF_IGNORED);
+               res = -EBUSY;
+               goto out;
+       }
+
        page_flags = head->flags;
 
        /*