page_migration
page_frags
page_owner
+ page_table_check
remap_file_pages
slub
split_page_table_lock
--- /dev/null
+.. SPDX-License-Identifier: GPL-2.0
+
+.. _page_table_check:
+
+================
+Page Table Check
+================
+
+Introduction
+============
+
+Page table check allows to hardern the kernel by ensuring that some types of
+the memory corruptions are prevented.
+
+Page table check performs extra verifications at the time when new pages become
+accessible from the userspace by getting their page table entries (PTEs PMDs
+etc.) added into the table.
+
+In case of detected corruption, the kernel is crashed. There is a small
+performance and memory overhead associated with the page table check. Therefore,
+it is disabled by default, but can be optionally enabled on systems where the
+extra hardening outweighs the performance costs. Also, because page table check
+is synchronous, it can help with debugging double map memory corruption issues,
+by crashing kernel at the time wrong mapping occurs instead of later which is
+often the case with memory corruptions bugs.
+
+Double mapping detection logic
+==============================
+
++-------------------+-------------------+-------------------+------------------+
+| Current Mapping | New mapping | Permissions | Rule |
++===================+===================+===================+==================+
+| Anonymous | Anonymous | Read | Allow |
++-------------------+-------------------+-------------------+------------------+
+| Anonymous | Anonymous | Read / Write | Prohibit |
++-------------------+-------------------+-------------------+------------------+
+| Anonymous | Named | Any | Prohibit |
++-------------------+-------------------+-------------------+------------------+
+| Named | Anonymous | Any | Prohibit |
++-------------------+-------------------+-------------------+------------------+
+| Named | Named | Any | Allow |
++-------------------+-------------------+-------------------+------------------+
+
+Enabling Page Table Check
+=========================
+
+Build kernel with:
+
+- PAGE_TABLE_CHECK=y
+ Note, it can only be enabled on platforms where ARCH_SUPPORTS_PAGE_TABLE_CHECK
+ is available.
+
+- Boot with 'page_table_check=on' kernel parameter.
+
+Optionally, build kernel with PAGE_TABLE_CHECK_ENFORCED in order to have page
+table support without extra kernel parameter.
F: include/trace/events/page_pool.h
F: net/core/page_pool.c
+PAGE TABLE CHECK
+M: Pasha Tatashin <pasha.tatashin@soleen.com>
+M: Andrew Morton <akpm@linux-foundation.org>
+L: linux-mm@kvack.org
+S: Maintained
+F: Documentation/vm/page_table_check.rst
+F: include/linux/page_table_check.h
+F: mm/page_table_check.c
+
PANASONIC LAPTOP ACPI EXTRAS DRIVER
M: Kenneth Chan <kenneth.t.chan@gmail.com>
L: platform-driver-x86@vger.kernel.org
config ARCH_SUPPORTS_DEBUG_PAGEALLOC
bool
+config ARCH_SUPPORTS_PAGE_TABLE_CHECK
+ bool
+
config ARCH_SPLIT_ARG64
bool
help
--- /dev/null
+/* SPDX-License-Identifier: GPL-2.0 */
+
+/*
+ * Copyright (c) 2021, Google LLC.
+ * Pasha Tatashin <pasha.tatashin@soleen.com>
+ */
+#ifndef __LINUX_PAGE_TABLE_CHECK_H
+#define __LINUX_PAGE_TABLE_CHECK_H
+
+#ifdef CONFIG_PAGE_TABLE_CHECK
+#include <linux/jump_label.h>
+
+extern struct static_key_true page_table_check_disabled;
+extern struct page_ext_operations page_table_check_ops;
+
+void __page_table_check_zero(struct page *page, unsigned int order);
+void __page_table_check_pte_clear(struct mm_struct *mm, unsigned long addr,
+ pte_t pte);
+void __page_table_check_pmd_clear(struct mm_struct *mm, unsigned long addr,
+ pmd_t pmd);
+void __page_table_check_pud_clear(struct mm_struct *mm, unsigned long addr,
+ pud_t pud);
+void __page_table_check_pte_set(struct mm_struct *mm, unsigned long addr,
+ pte_t *ptep, pte_t pte);
+void __page_table_check_pmd_set(struct mm_struct *mm, unsigned long addr,
+ pmd_t *pmdp, pmd_t pmd);
+void __page_table_check_pud_set(struct mm_struct *mm, unsigned long addr,
+ pud_t *pudp, pud_t pud);
+
+static inline void page_table_check_alloc(struct page *page, unsigned int order)
+{
+ if (static_branch_likely(&page_table_check_disabled))
+ return;
+
+ __page_table_check_zero(page, order);
+}
+
+static inline void page_table_check_free(struct page *page, unsigned int order)
+{
+ if (static_branch_likely(&page_table_check_disabled))
+ return;
+
+ __page_table_check_zero(page, order);
+}
+
+static inline void page_table_check_pte_clear(struct mm_struct *mm,
+ unsigned long addr, pte_t pte)
+{
+ if (static_branch_likely(&page_table_check_disabled))
+ return;
+
+ __page_table_check_pte_clear(mm, addr, pte);
+}
+
+static inline void page_table_check_pmd_clear(struct mm_struct *mm,
+ unsigned long addr, pmd_t pmd)
+{
+ if (static_branch_likely(&page_table_check_disabled))
+ return;
+
+ __page_table_check_pmd_clear(mm, addr, pmd);
+}
+
+static inline void page_table_check_pud_clear(struct mm_struct *mm,
+ unsigned long addr, pud_t pud)
+{
+ if (static_branch_likely(&page_table_check_disabled))
+ return;
+
+ __page_table_check_pud_clear(mm, addr, pud);
+}
+
+static inline void page_table_check_pte_set(struct mm_struct *mm,
+ unsigned long addr, pte_t *ptep,
+ pte_t pte)
+{
+ if (static_branch_likely(&page_table_check_disabled))
+ return;
+
+ __page_table_check_pte_set(mm, addr, ptep, pte);
+}
+
+static inline void page_table_check_pmd_set(struct mm_struct *mm,
+ unsigned long addr, pmd_t *pmdp,
+ pmd_t pmd)
+{
+ if (static_branch_likely(&page_table_check_disabled))
+ return;
+
+ __page_table_check_pmd_set(mm, addr, pmdp, pmd);
+}
+
+static inline void page_table_check_pud_set(struct mm_struct *mm,
+ unsigned long addr, pud_t *pudp,
+ pud_t pud)
+{
+ if (static_branch_likely(&page_table_check_disabled))
+ return;
+
+ __page_table_check_pud_set(mm, addr, pudp, pud);
+}
+
+#else
+
+static inline void page_table_check_alloc(struct page *page, unsigned int order)
+{
+}
+
+static inline void page_table_check_free(struct page *page, unsigned int order)
+{
+}
+
+static inline void page_table_check_pte_clear(struct mm_struct *mm,
+ unsigned long addr, pte_t pte)
+{
+}
+
+static inline void page_table_check_pmd_clear(struct mm_struct *mm,
+ unsigned long addr, pmd_t pmd)
+{
+}
+
+static inline void page_table_check_pud_clear(struct mm_struct *mm,
+ unsigned long addr, pud_t pud)
+{
+}
+
+static inline void page_table_check_pte_set(struct mm_struct *mm,
+ unsigned long addr, pte_t *ptep,
+ pte_t pte)
+{
+}
+
+static inline void page_table_check_pmd_set(struct mm_struct *mm,
+ unsigned long addr, pmd_t *pmdp,
+ pmd_t pmd)
+{
+}
+
+static inline void page_table_check_pud_set(struct mm_struct *mm,
+ unsigned long addr, pud_t *pudp,
+ pud_t pud)
+{
+}
+
+#endif /* CONFIG_PAGE_TABLE_CHECK */
+#endif /* __LINUX_PAGE_TABLE_CHECK_H */
If unsure, say N.
+config PAGE_TABLE_CHECK
+ bool "Check for invalid mappings in user page tables"
+ depends on ARCH_SUPPORTS_PAGE_TABLE_CHECK
+ select PAGE_EXTENSION
+ help
+ Check that anonymous page is not being mapped twice with read write
+ permissions. Check that anonymous and file pages are not being
+ erroneously shared. Since the checking is performed at the time
+ entries are added and removed to user page tables, leaking, corruption
+ and double mapping problems are detected synchronously.
+
+ If unsure say "n".
+
+config PAGE_TABLE_CHECK_ENFORCED
+ bool "Enforce the page table checking by default"
+ depends on PAGE_TABLE_CHECK
+ help
+ Always enable page table checking. By default the page table checking
+ is disabled, and can be optionally enabled via page_table_check=on
+ kernel parameter. This config enforces that page table check is always
+ enabled.
+
+ If unsure say "n".
+
config PAGE_POISONING
bool "Poison pages after freeing"
help
obj-$(CONFIG_CMA) += cma.o
obj-$(CONFIG_MEMORY_BALLOON) += balloon_compaction.o
obj-$(CONFIG_PAGE_EXTENSION) += page_ext.o
+obj-$(CONFIG_PAGE_TABLE_CHECK) += page_table_check.o
obj-$(CONFIG_CMA_DEBUGFS) += cma_debug.o
obj-$(CONFIG_SECRETMEM) += secretmem.o
obj-$(CONFIG_CMA_SYSFS) += cma_sysfs.o
#include <linux/sched/rt.h>
#include <linux/sched/mm.h>
#include <linux/page_owner.h>
+#include <linux/page_table_check.h>
#include <linux/kthread.h>
#include <linux/memcontrol.h>
#include <linux/ftrace.h>
if (memcg_kmem_enabled() && PageMemcgKmem(page))
__memcg_kmem_uncharge_page(page, order);
reset_page_owner(page, order);
+ page_table_check_free(page, order);
return false;
}
page_cpupid_reset_last(page);
page->flags &= ~PAGE_FLAGS_CHECK_AT_PREP;
reset_page_owner(page, order);
+ page_table_check_free(page, order);
if (!PageHighMem(page)) {
debug_check_no_locks_freed(page_address(page),
}
set_page_owner(page, order, gfp_flags);
+ page_table_check_alloc(page, order);
}
static void prep_new_page(struct page *page, unsigned int order, gfp_t gfp_flags,
#include <linux/kmemleak.h>
#include <linux/page_owner.h>
#include <linux/page_idle.h>
+#include <linux/page_table_check.h>
/*
* struct page extension
#if defined(CONFIG_PAGE_IDLE_FLAG) && !defined(CONFIG_64BIT)
&page_idle_ops,
#endif
+#ifdef CONFIG_PAGE_TABLE_CHECK
+ &page_table_check_ops,
+#endif
};
unsigned long page_ext_size = sizeof(struct page_ext);
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ * Copyright (c) 2021, Google LLC.
+ * Pasha Tatashin <pasha.tatashin@soleen.com>
+ */
+#include <linux/mm.h>
+#include <linux/page_table_check.h>
+
+#undef pr_fmt
+#define pr_fmt(fmt) "page_table_check: " fmt
+
+struct page_table_check {
+ atomic_t anon_map_count;
+ atomic_t file_map_count;
+};
+
+static bool __page_table_check_enabled __initdata =
+ IS_ENABLED(CONFIG_PAGE_TABLE_CHECK_ENFORCED);
+
+DEFINE_STATIC_KEY_TRUE(page_table_check_disabled);
+EXPORT_SYMBOL(page_table_check_disabled);
+
+static int __init early_page_table_check_param(char *buf)
+{
+ if (!buf)
+ return -EINVAL;
+
+ if (strcmp(buf, "on") == 0)
+ __page_table_check_enabled = true;
+ else if (strcmp(buf, "off") == 0)
+ __page_table_check_enabled = false;
+
+ return 0;
+}
+
+early_param("page_table_check", early_page_table_check_param);
+
+static bool __init need_page_table_check(void)
+{
+ return __page_table_check_enabled;
+}
+
+static void __init init_page_table_check(void)
+{
+ if (!__page_table_check_enabled)
+ return;
+ static_branch_disable(&page_table_check_disabled);
+}
+
+struct page_ext_operations page_table_check_ops = {
+ .size = sizeof(struct page_table_check),
+ .need = need_page_table_check,
+ .init = init_page_table_check,
+};
+
+static struct page_table_check *get_page_table_check(struct page_ext *page_ext)
+{
+ BUG_ON(!page_ext);
+ return (void *)(page_ext) + page_table_check_ops.offset;
+}
+
+static inline bool pte_user_accessible_page(pte_t pte)
+{
+ return (pte_val(pte) & _PAGE_PRESENT) && (pte_val(pte) & _PAGE_USER);
+}
+
+static inline bool pmd_user_accessible_page(pmd_t pmd)
+{
+ return pmd_leaf(pmd) && (pmd_val(pmd) & _PAGE_PRESENT) &&
+ (pmd_val(pmd) & _PAGE_USER);
+}
+
+static inline bool pud_user_accessible_page(pud_t pud)
+{
+ return pud_leaf(pud) && (pud_val(pud) & _PAGE_PRESENT) &&
+ (pud_val(pud) & _PAGE_USER);
+}
+
+/*
+ * An enty is removed from the page table, decrement the counters for that page
+ * verify that it is of correct type and counters do not become negative.
+ */
+static void page_table_check_clear(struct mm_struct *mm, unsigned long addr,
+ unsigned long pfn, unsigned long pgcnt)
+{
+ struct page_ext *page_ext;
+ struct page *page;
+ bool anon;
+ int i;
+
+ if (!pfn_valid(pfn))
+ return;
+
+ page = pfn_to_page(pfn);
+ page_ext = lookup_page_ext(page);
+ anon = PageAnon(page);
+
+ for (i = 0; i < pgcnt; i++) {
+ struct page_table_check *ptc = get_page_table_check(page_ext);
+
+ if (anon) {
+ BUG_ON(atomic_read(&ptc->file_map_count));
+ BUG_ON(atomic_dec_return(&ptc->anon_map_count) < 0);
+ } else {
+ BUG_ON(atomic_read(&ptc->anon_map_count));
+ BUG_ON(atomic_dec_return(&ptc->file_map_count) < 0);
+ }
+ page_ext = page_ext_next(page_ext);
+ }
+}
+
+/*
+ * A new enty is added to the page table, increment the counters for that page
+ * verify that it is of correct type and is not being mapped with a different
+ * type to a different process.
+ */
+static void page_table_check_set(struct mm_struct *mm, unsigned long addr,
+ unsigned long pfn, unsigned long pgcnt,
+ bool rw)
+{
+ struct page_ext *page_ext;
+ struct page *page;
+ bool anon;
+ int i;
+
+ if (!pfn_valid(pfn))
+ return;
+
+ page = pfn_to_page(pfn);
+ page_ext = lookup_page_ext(page);
+ anon = PageAnon(page);
+
+ for (i = 0; i < pgcnt; i++) {
+ struct page_table_check *ptc = get_page_table_check(page_ext);
+
+ if (anon) {
+ BUG_ON(atomic_read(&ptc->file_map_count));
+ BUG_ON(atomic_inc_return(&ptc->anon_map_count) > 1 && rw);
+ } else {
+ BUG_ON(atomic_read(&ptc->anon_map_count));
+ BUG_ON(atomic_inc_return(&ptc->file_map_count) < 0);
+ }
+ page_ext = page_ext_next(page_ext);
+ }
+}
+
+/*
+ * page is on free list, or is being allocated, verify that counters are zeroes
+ * crash if they are not.
+ */
+void __page_table_check_zero(struct page *page, unsigned int order)
+{
+ struct page_ext *page_ext = lookup_page_ext(page);
+ int i;
+
+ BUG_ON(!page_ext);
+ for (i = 0; i < (1 << order); i++) {
+ struct page_table_check *ptc = get_page_table_check(page_ext);
+
+ BUG_ON(atomic_read(&ptc->anon_map_count));
+ BUG_ON(atomic_read(&ptc->file_map_count));
+ page_ext = page_ext_next(page_ext);
+ }
+}
+
+void __page_table_check_pte_clear(struct mm_struct *mm, unsigned long addr,
+ pte_t pte)
+{
+ if (&init_mm == mm)
+ return;
+
+ if (pte_user_accessible_page(pte)) {
+ page_table_check_clear(mm, addr, pte_pfn(pte),
+ PAGE_SIZE >> PAGE_SHIFT);
+ }
+}
+EXPORT_SYMBOL(__page_table_check_pte_clear);
+
+void __page_table_check_pmd_clear(struct mm_struct *mm, unsigned long addr,
+ pmd_t pmd)
+{
+ if (&init_mm == mm)
+ return;
+
+ if (pmd_user_accessible_page(pmd)) {
+ page_table_check_clear(mm, addr, pmd_pfn(pmd),
+ PMD_PAGE_SIZE >> PAGE_SHIFT);
+ }
+}
+EXPORT_SYMBOL(__page_table_check_pmd_clear);
+
+void __page_table_check_pud_clear(struct mm_struct *mm, unsigned long addr,
+ pud_t pud)
+{
+ if (&init_mm == mm)
+ return;
+
+ if (pud_user_accessible_page(pud)) {
+ page_table_check_clear(mm, addr, pud_pfn(pud),
+ PUD_PAGE_SIZE >> PAGE_SHIFT);
+ }
+}
+EXPORT_SYMBOL(__page_table_check_pud_clear);
+
+void __page_table_check_pte_set(struct mm_struct *mm, unsigned long addr,
+ pte_t *ptep, pte_t pte)
+{
+ pte_t old_pte;
+
+ if (&init_mm == mm)
+ return;
+
+ old_pte = *ptep;
+ if (pte_user_accessible_page(old_pte)) {
+ page_table_check_clear(mm, addr, pte_pfn(old_pte),
+ PAGE_SIZE >> PAGE_SHIFT);
+ }
+
+ if (pte_user_accessible_page(pte)) {
+ page_table_check_set(mm, addr, pte_pfn(pte),
+ PAGE_SIZE >> PAGE_SHIFT,
+ pte_write(pte));
+ }
+}
+EXPORT_SYMBOL(__page_table_check_pte_set);
+
+void __page_table_check_pmd_set(struct mm_struct *mm, unsigned long addr,
+ pmd_t *pmdp, pmd_t pmd)
+{
+ pmd_t old_pmd;
+
+ if (&init_mm == mm)
+ return;
+
+ old_pmd = *pmdp;
+ if (pmd_user_accessible_page(old_pmd)) {
+ page_table_check_clear(mm, addr, pmd_pfn(old_pmd),
+ PMD_PAGE_SIZE >> PAGE_SHIFT);
+ }
+
+ if (pmd_user_accessible_page(pmd)) {
+ page_table_check_set(mm, addr, pmd_pfn(pmd),
+ PMD_PAGE_SIZE >> PAGE_SHIFT,
+ pmd_write(pmd));
+ }
+}
+EXPORT_SYMBOL(__page_table_check_pmd_set);
+
+void __page_table_check_pud_set(struct mm_struct *mm, unsigned long addr,
+ pud_t *pudp, pud_t pud)
+{
+ pud_t old_pud;
+
+ if (&init_mm == mm)
+ return;
+
+ old_pud = *pudp;
+ if (pud_user_accessible_page(old_pud)) {
+ page_table_check_clear(mm, addr, pud_pfn(old_pud),
+ PUD_PAGE_SIZE >> PAGE_SHIFT);
+ }
+
+ if (pud_user_accessible_page(pud)) {
+ page_table_check_set(mm, addr, pud_pfn(pud),
+ PUD_PAGE_SIZE >> PAGE_SHIFT,
+ pud_write(pud));
+ }
+}
+EXPORT_SYMBOL(__page_table_check_pud_set);