mm: hugetlb_vmemmap: fix a race between vmemmap pmd split
authorMuchun Song <songmuchun@bytedance.com>
Fri, 7 Jul 2023 03:38:59 +0000 (11:38 +0800)
committerAndrew Morton <akpm@linux-foundation.org>
Fri, 18 Aug 2023 17:12:14 +0000 (10:12 -0700)
The local variable @page in __split_vmemmap_huge_pmd() to obtain a pmd
page without holding page_table_lock may possiblely get the page table
page instead of a huge pmd page.

The effect may be in set_pte_at() since we may pass an invalid page
struct, if set_pte_at() wants to access the page struct (e.g.
CONFIG_PAGE_TABLE_CHECK is enabled), it may crash the kernel.

So fix it.  And inline __split_vmemmap_huge_pmd() since it only has one
user.

Link: https://lkml.kernel.org/r/20230707033859.16148-1-songmuchun@bytedance.com
Fixes: d8d55f5616cf ("mm: sparsemem: use page table lock to protect kernel pmd operations")
Signed-off-by: Muchun Song <songmuchun@bytedance.com>
Cc: Mike Kravetz <mike.kravetz@oracle.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
mm/hugetlb_vmemmap.c

index c2007ef..4b97347 100644 (file)
@@ -36,14 +36,22 @@ struct vmemmap_remap_walk {
        struct list_head        *vmemmap_pages;
 };
 
-static int __split_vmemmap_huge_pmd(pmd_t *pmd, unsigned long start)
+static int split_vmemmap_huge_pmd(pmd_t *pmd, unsigned long start)
 {
        pmd_t __pmd;
        int i;
        unsigned long addr = start;
-       struct page *page = pmd_page(*pmd);
-       pte_t *pgtable = pte_alloc_one_kernel(&init_mm);
+       struct page *head;
+       pte_t *pgtable;
+
+       spin_lock(&init_mm.page_table_lock);
+       head = pmd_leaf(*pmd) ? pmd_page(*pmd) : NULL;
+       spin_unlock(&init_mm.page_table_lock);
 
+       if (!head)
+               return 0;
+
+       pgtable = pte_alloc_one_kernel(&init_mm);
        if (!pgtable)
                return -ENOMEM;
 
@@ -53,7 +61,7 @@ static int __split_vmemmap_huge_pmd(pmd_t *pmd, unsigned long start)
                pte_t entry, *pte;
                pgprot_t pgprot = PAGE_KERNEL;
 
-               entry = mk_pte(page + i, pgprot);
+               entry = mk_pte(head + i, pgprot);
                pte = pte_offset_kernel(&__pmd, addr);
                set_pte_at(&init_mm, addr, pte, entry);
        }
@@ -65,8 +73,8 @@ static int __split_vmemmap_huge_pmd(pmd_t *pmd, unsigned long start)
                 * be treated as indepdenent small pages (as they can be freed
                 * individually).
                 */
-               if (!PageReserved(page))
-                       split_page(page, get_order(PMD_SIZE));
+               if (!PageReserved(head))
+                       split_page(head, get_order(PMD_SIZE));
 
                /* Make pte visible before pmd. See comment in pmd_install(). */
                smp_wmb();
@@ -80,20 +88,6 @@ static int __split_vmemmap_huge_pmd(pmd_t *pmd, unsigned long start)
        return 0;
 }
 
-static int split_vmemmap_huge_pmd(pmd_t *pmd, unsigned long start)
-{
-       int leaf;
-
-       spin_lock(&init_mm.page_table_lock);
-       leaf = pmd_leaf(*pmd);
-       spin_unlock(&init_mm.page_table_lock);
-
-       if (!leaf)
-               return 0;
-
-       return __split_vmemmap_huge_pmd(pmd, start);
-}
-
 static void vmemmap_pte_range(pmd_t *pmd, unsigned long addr,
                              unsigned long end,
                              struct vmemmap_remap_walk *walk)