From 4850798f6e8f804dca444ba5ba400313167a1356 Mon Sep 17 00:00:00 2001 From: Marco Elver Date: Tue, 10 Nov 2020 14:53:20 +0100 Subject: [PATCH 01/16] kfence: Avoid stalling work queue task without allocations To toggle the allocation gates, we set up a delayed work that calls toggle_allocation_gate(). Here we use wait_event() to await an allocation and subsequently disable the static branch again. However, if the kernel has stopped doing allocations entirely, we'd wait indefinitely, and stall the worker task. This may also result in the appropriate warnings if CONFIG_DETECT_HUNG_TASK=y. Therefore, introduce a 1 second timeout and use wait_event_timeout(). If the timeout is reached, the static branch is disabled and a new delayed work is scheduled to try setting up an allocation at a later time. Note that, this scenario is very unlikely during normal workloads once the kernel has booted and user space tasks are running. It can, however, happen during early boot after KFENCE has been enabled, when e.g. running tests that do not result in any allocations. Link: https://lkml.kernel.org/r/CADYN=9J0DQhizAGB0-jz4HOBBh+05kMBXb4c0cXMS7Qi5NAJiw@mail.gmail.com Reported-by: Anders Roxell Signed-off-by: Marco Elver [port kfence feature to rpi-5.10.95] Signed-off-by: Sung-hun Kim Signed-off-by: Marek Szyprowski Signed-off-by: Seung-Woo Kim Change-Id: Ia4e37404fce3497d6c9d32cae23ffa5d106055da --- mm/kfence/core.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/mm/kfence/core.c b/mm/kfence/core.c index 9358f42..933b197 100644 --- a/mm/kfence/core.c +++ b/mm/kfence/core.c @@ -592,7 +592,11 @@ static void toggle_allocation_gate(struct work_struct *work) /* Enable static key, and await allocation to happen. */ atomic_set(&allocation_gate, 0); static_branch_enable(&kfence_allocation_key); - wait_event(allocation_wait, atomic_read(&allocation_gate) != 0); + /* + * Await an allocation. Timeout after 1 second, in case the kernel stops + * doing allocations, to avoid stalling this worker task for too long. + */ + wait_event_timeout(allocation_wait, atomic_read(&allocation_gate) != 0, HZ); /* Disable static key and reset timer. */ static_branch_disable(&kfence_allocation_key); -- 2.7.4 From 7c9346e497455ee3ac49622629b875b0f649bbf2 Mon Sep 17 00:00:00 2001 From: Marco Elver Date: Wed, 16 Dec 2020 00:31:12 +0100 Subject: [PATCH 02/16] kfence: fix typo in test Fix a typo/accidental copy-paste that resulted in the obviously incorrect 'GFP_KERNEL * 2' expression. Reported-by: kernel test robot Signed-off-by: Marco Elver Acked-by: Alexander Potapenko [port kfence feature to rpi-5.10.95] Signed-off-by: Sung-hun Kim Signed-off-by: Marek Szyprowski Signed-off-by: Seung-Woo Kim Change-Id: I157e9378f99fe2b1db005ee402dc0cdb6fe54164 --- mm/kfence/kfence_test.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/kfence/kfence_test.c b/mm/kfence/kfence_test.c index 1433a35..f57c61c 100644 --- a/mm/kfence/kfence_test.c +++ b/mm/kfence/kfence_test.c @@ -665,7 +665,7 @@ static void test_krealloc(struct kunit *test) for (; i < size * 3; i++) /* Fill to extra bytes. */ buf[i] = i + 1; - buf = krealloc(buf, size * 2, GFP_KERNEL * 2); /* Shrink. */ + buf = krealloc(buf, size * 2, GFP_KERNEL); /* Shrink. */ KUNIT_EXPECT_GE(test, ksize(buf), size * 2); for (i = 0; i < size * 2; i++) KUNIT_EXPECT_EQ(test, buf[i], (char)(i + 1)); -- 2.7.4 From 7d57216abf67f02b939504343e836d0a17762a3d Mon Sep 17 00:00:00 2001 From: Marco Elver Date: Mon, 11 Jan 2021 10:15:44 +0100 Subject: [PATCH 03/16] kfence: show access type in report MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Show the access type in KFENCE reports by plumbing through read/write information from the page fault handler. Update the documentation and test accordingly. Suggested-by: Jörn Engel Signed-off-by: Marco Elver Reviewed-by: Jörn Engel [port kfence feature to rpi-5.10.95] Signed-off-by: Sung-hun Kim Signed-off-by: Marek Szyprowski Signed-off-by: Seung-Woo Kim Change-Id: I9bae89e3fb99fe4c87ad17ab76c731ba91c6f830 --- Documentation/dev-tools/kfence.rst | 12 +++++----- arch/arm64/mm/fault.c | 2 +- arch/x86/mm/fault.c | 3 ++- include/linux/kfence.h | 9 ++++++-- mm/kfence/core.c | 11 +++++---- mm/kfence/kfence.h | 2 +- mm/kfence/kfence_test.c | 47 +++++++++++++++++++++++++++++++++----- mm/kfence/report.c | 27 ++++++++++++++-------- 8 files changed, 82 insertions(+), 31 deletions(-) diff --git a/Documentation/dev-tools/kfence.rst b/Documentation/dev-tools/kfence.rst index d7329f2..06a454e 100644 --- a/Documentation/dev-tools/kfence.rst +++ b/Documentation/dev-tools/kfence.rst @@ -64,9 +64,9 @@ Error reports A typical out-of-bounds access looks like this:: ================================================================== - BUG: KFENCE: out-of-bounds in test_out_of_bounds_read+0xa3/0x22b + BUG: KFENCE: out-of-bounds read in test_out_of_bounds_read+0xa3/0x22b - Out-of-bounds access at 0xffffffffb672efff (1B left of kfence-#17): + Out-of-bounds read at 0xffffffffb672efff (1B left of kfence-#17): test_out_of_bounds_read+0xa3/0x22b kunit_try_run_case+0x51/0x85 kunit_generic_run_threadfn_adapter+0x16/0x30 @@ -93,9 +93,9 @@ its origin. Note that, real kernel addresses are only shown for Use-after-free accesses are reported as:: ================================================================== - BUG: KFENCE: use-after-free in test_use_after_free_read+0xb3/0x143 + BUG: KFENCE: use-after-free read in test_use_after_free_read+0xb3/0x143 - Use-after-free access at 0xffffffffb673dfe0 (in kfence-#24): + Use-after-free read at 0xffffffffb673dfe0 (in kfence-#24): test_use_after_free_read+0xb3/0x143 kunit_try_run_case+0x51/0x85 kunit_generic_run_threadfn_adapter+0x16/0x30 @@ -192,9 +192,9 @@ where it was not possible to determine an associated object, e.g. if adjacent object pages had not yet been allocated:: ================================================================== - BUG: KFENCE: invalid access in test_invalid_access+0x26/0xe0 + BUG: KFENCE: invalid read in test_invalid_access+0x26/0xe0 - Invalid access at 0xffffffffb670b00a: + Invalid read at 0xffffffffb670b00a: test_invalid_access+0x26/0xe0 kunit_try_run_case+0x51/0x85 kunit_generic_run_threadfn_adapter+0x16/0x30 diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index a5fe9f7..8f4ceab 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -323,7 +323,7 @@ static void __do_kernel_fault(unsigned long addr, unsigned int esr, } else if (addr < PAGE_SIZE) { msg = "NULL pointer dereference"; } else { - if (kfence_handle_page_fault(addr, regs)) + if (kfence_handle_page_fault(addr, esr & ESR_ELx_WNR, regs)) return; msg = "paging request"; diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index b2688e8..7b707c9 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -734,7 +734,8 @@ no_context(struct pt_regs *regs, unsigned long error_code, efi_recover_from_page_fault(address); /* Only not-present faults should be handled by KFENCE. */ - if (!(error_code & X86_PF_PROT) && kfence_handle_page_fault(address, regs)) + if (!(error_code & X86_PF_PROT) && + kfence_handle_page_fault(address, error_code & X86_PF_WRITE, regs)) return; oops: diff --git a/include/linux/kfence.h b/include/linux/kfence.h index 7624688..3a30705 100644 --- a/include/linux/kfence.h +++ b/include/linux/kfence.h @@ -171,6 +171,7 @@ static __always_inline __must_check bool kfence_free(void *addr) /** * kfence_handle_page_fault() - perform page fault handling for KFENCE pages * @addr: faulting address + * @is_write: is access a write * @regs: current struct pt_regs (can be NULL, but shows full stack trace) * * Return: @@ -182,7 +183,7 @@ static __always_inline __must_check bool kfence_free(void *addr) * cases KFENCE prints an error message and marks the offending page as * present, so that the kernel can proceed. */ -bool __must_check kfence_handle_page_fault(unsigned long addr, struct pt_regs *regs); +bool __must_check kfence_handle_page_fault(unsigned long addr, bool is_write, struct pt_regs *regs); #else /* CONFIG_KFENCE */ @@ -195,7 +196,11 @@ static inline size_t kfence_ksize(const void *addr) { return 0; } static inline void *kfence_object_start(const void *addr) { return NULL; } static inline void __kfence_free(void *addr) { } static inline bool __must_check kfence_free(void *addr) { return false; } -static inline bool __must_check kfence_handle_page_fault(unsigned long addr, struct pt_regs *regs) { return false; } +static inline bool __must_check kfence_handle_page_fault(unsigned long addr, bool is_write, + struct pt_regs *regs) +{ + return false; +} #endif diff --git a/mm/kfence/core.c b/mm/kfence/core.c index 933b197..338ced5 100644 --- a/mm/kfence/core.c +++ b/mm/kfence/core.c @@ -212,7 +212,7 @@ static inline bool check_canary_byte(u8 *addr) return true; atomic_long_inc(&counters[KFENCE_COUNTER_BUGS]); - kfence_report_error((unsigned long)addr, NULL, addr_to_metadata((unsigned long)addr), + kfence_report_error((unsigned long)addr, false, NULL, addr_to_metadata((unsigned long)addr), KFENCE_ERROR_CORRUPTION); return false; } @@ -351,7 +351,8 @@ static void kfence_guarded_free(void *addr, struct kfence_metadata *meta, bool z if (meta->state != KFENCE_OBJECT_ALLOCATED || meta->addr != (unsigned long)addr) { /* Invalid or double-free, bail out. */ atomic_long_inc(&counters[KFENCE_COUNTER_BUGS]); - kfence_report_error((unsigned long)addr, NULL, meta, KFENCE_ERROR_INVALID_FREE); + kfence_report_error((unsigned long)addr, false, NULL, meta, + KFENCE_ERROR_INVALID_FREE); raw_spin_unlock_irqrestore(&meta->lock, flags); return; } @@ -756,7 +757,7 @@ void __kfence_free(void *addr) kfence_guarded_free(addr, meta, false); } -bool kfence_handle_page_fault(unsigned long addr, struct pt_regs *regs) +bool kfence_handle_page_fault(unsigned long addr, bool is_write, struct pt_regs *regs) { const int page_index = (addr - (unsigned long)__kfence_pool) / PAGE_SIZE; struct kfence_metadata *to_report = NULL; @@ -819,11 +820,11 @@ bool kfence_handle_page_fault(unsigned long addr, struct pt_regs *regs) out: if (to_report) { - kfence_report_error(addr, regs, to_report, error_type); + kfence_report_error(addr, is_write, regs, to_report, error_type); raw_spin_unlock_irqrestore(&to_report->lock, flags); } else { /* This may be a UAF or OOB access, but we can't be sure. */ - kfence_report_error(addr, regs, NULL, KFENCE_ERROR_INVALID); + kfence_report_error(addr, is_write, regs, NULL, KFENCE_ERROR_INVALID); } return kfence_unprotect(addr); /* Unprotect and let access proceed. */ diff --git a/mm/kfence/kfence.h b/mm/kfence/kfence.h index fa3579d..97282fa 100644 --- a/mm/kfence/kfence.h +++ b/mm/kfence/kfence.h @@ -99,7 +99,7 @@ enum kfence_error_type { KFENCE_ERROR_INVALID_FREE, /* Invalid free. */ }; -void kfence_report_error(unsigned long address, struct pt_regs *regs, +void kfence_report_error(unsigned long address, bool is_write, struct pt_regs *regs, const struct kfence_metadata *meta, enum kfence_error_type type); void kfence_print_object(struct seq_file *seq, const struct kfence_metadata *meta); diff --git a/mm/kfence/kfence_test.c b/mm/kfence/kfence_test.c index f57c61c..db1bb59 100644 --- a/mm/kfence/kfence_test.c +++ b/mm/kfence/kfence_test.c @@ -71,8 +71,14 @@ struct expect_report { enum kfence_error_type type; /* The type or error. */ void *fn; /* Function pointer to expected function where access occurred. */ char *addr; /* Address at which the bad access occurred. */ + bool is_write; /* Is access a write. */ }; +static const char *get_access_type(const struct expect_report *r) +{ + return r->is_write ? "write" : "read"; +} + /* Check observed report matches information in @r. */ static bool report_matches(const struct expect_report *r) { @@ -93,16 +99,19 @@ static bool report_matches(const struct expect_report *r) end = &expect[0][sizeof(expect[0]) - 1]; switch (r->type) { case KFENCE_ERROR_OOB: - cur += scnprintf(cur, end - cur, "BUG: KFENCE: out-of-bounds"); + cur += scnprintf(cur, end - cur, "BUG: KFENCE: out-of-bounds %s", + get_access_type(r)); break; case KFENCE_ERROR_UAF: - cur += scnprintf(cur, end - cur, "BUG: KFENCE: use-after-free"); + cur += scnprintf(cur, end - cur, "BUG: KFENCE: use-after-free %s", + get_access_type(r)); break; case KFENCE_ERROR_CORRUPTION: cur += scnprintf(cur, end - cur, "BUG: KFENCE: memory corruption"); break; case KFENCE_ERROR_INVALID: - cur += scnprintf(cur, end - cur, "BUG: KFENCE: invalid access"); + cur += scnprintf(cur, end - cur, "BUG: KFENCE: invalid %s", + get_access_type(r)); break; case KFENCE_ERROR_INVALID_FREE: cur += scnprintf(cur, end - cur, "BUG: KFENCE: invalid free"); @@ -121,16 +130,16 @@ static bool report_matches(const struct expect_report *r) switch (r->type) { case KFENCE_ERROR_OOB: - cur += scnprintf(cur, end - cur, "Out-of-bounds access at"); + cur += scnprintf(cur, end - cur, "Out-of-bounds %s at", get_access_type(r)); break; case KFENCE_ERROR_UAF: - cur += scnprintf(cur, end - cur, "Use-after-free access at"); + cur += scnprintf(cur, end - cur, "Use-after-free %s at", get_access_type(r)); break; case KFENCE_ERROR_CORRUPTION: cur += scnprintf(cur, end - cur, "Corrupted memory at"); break; case KFENCE_ERROR_INVALID: - cur += scnprintf(cur, end - cur, "Invalid access at"); + cur += scnprintf(cur, end - cur, "Invalid %s at", get_access_type(r)); break; case KFENCE_ERROR_INVALID_FREE: cur += scnprintf(cur, end - cur, "Invalid free of"); @@ -294,6 +303,7 @@ static void test_out_of_bounds_read(struct kunit *test) struct expect_report expect = { .type = KFENCE_ERROR_OOB, .fn = test_out_of_bounds_read, + .is_write = false, }; char *buf; @@ -321,12 +331,31 @@ static void test_out_of_bounds_read(struct kunit *test) test_free(buf); } +static void test_out_of_bounds_write(struct kunit *test) +{ + size_t size = 32; + struct expect_report expect = { + .type = KFENCE_ERROR_OOB, + .fn = test_out_of_bounds_write, + .is_write = true, + }; + char *buf; + + setup_test_cache(test, size, 0, NULL); + buf = test_alloc(test, size, GFP_KERNEL, ALLOCATE_LEFT); + expect.addr = buf - 1; + WRITE_ONCE(*expect.addr, 42); + KUNIT_EXPECT_TRUE(test, report_matches(&expect)); + test_free(buf); +} + static void test_use_after_free_read(struct kunit *test) { const size_t size = 32; struct expect_report expect = { .type = KFENCE_ERROR_UAF, .fn = test_use_after_free_read, + .is_write = false, }; setup_test_cache(test, size, 0, NULL); @@ -411,6 +440,7 @@ static void test_kmalloc_aligned_oob_read(struct kunit *test) struct expect_report expect = { .type = KFENCE_ERROR_OOB, .fn = test_kmalloc_aligned_oob_read, + .is_write = false, }; char *buf; @@ -509,6 +539,7 @@ static void test_init_on_free(struct kunit *test) struct expect_report expect = { .type = KFENCE_ERROR_UAF, .fn = test_init_on_free, + .is_write = false, }; int i; @@ -598,6 +629,7 @@ static void test_invalid_access(struct kunit *test) .type = KFENCE_ERROR_INVALID, .fn = test_invalid_access, .addr = &__kfence_pool[10], + .is_write = false, }; READ_ONCE(__kfence_pool[10]); @@ -611,6 +643,7 @@ static void test_memcache_typesafe_by_rcu(struct kunit *test) struct expect_report expect = { .type = KFENCE_ERROR_UAF, .fn = test_memcache_typesafe_by_rcu, + .is_write = false, }; setup_test_cache(test, size, SLAB_TYPESAFE_BY_RCU, NULL); @@ -647,6 +680,7 @@ static void test_krealloc(struct kunit *test) .type = KFENCE_ERROR_UAF, .fn = test_krealloc, .addr = test_alloc(test, size, GFP_KERNEL, ALLOCATE_ANY), + .is_write = false, }; char *buf = expect.addr; int i; @@ -728,6 +762,7 @@ static void test_memcache_alloc_bulk(struct kunit *test) static struct kunit_case kfence_test_cases[] = { KFENCE_KUNIT_CASE(test_out_of_bounds_read), + KFENCE_KUNIT_CASE(test_out_of_bounds_write), KFENCE_KUNIT_CASE(test_use_after_free_read), KFENCE_KUNIT_CASE(test_double_free), KFENCE_KUNIT_CASE(test_invalid_addr_free), diff --git a/mm/kfence/report.c b/mm/kfence/report.c index 4dedc2f..1996295 100644 --- a/mm/kfence/report.c +++ b/mm/kfence/report.c @@ -151,7 +151,12 @@ static void print_diff_canary(unsigned long address, size_t bytes_to_show, pr_cont(" ]"); } -void kfence_report_error(unsigned long address, struct pt_regs *regs, +static const char *get_access_type(bool is_write) +{ + return is_write ? "write" : "read"; +} + +void kfence_report_error(unsigned long address, bool is_write, struct pt_regs *regs, const struct kfence_metadata *meta, enum kfence_error_type type) { unsigned long stack_entries[KFENCE_STACK_DEPTH] = { 0 }; @@ -189,17 +194,19 @@ void kfence_report_error(unsigned long address, struct pt_regs *regs, case KFENCE_ERROR_OOB: { const bool left_of_object = address < meta->addr; - pr_err("BUG: KFENCE: out-of-bounds in %pS\n\n", (void *)stack_entries[skipnr]); - pr_err("Out-of-bounds access at 0x" PTR_FMT " (%luB %s of kfence-#%zd):\n", - (void *)address, + pr_err("BUG: KFENCE: out-of-bounds %s in %pS\n\n", get_access_type(is_write), + (void *)stack_entries[skipnr]); + pr_err("Out-of-bounds %s at 0x" PTR_FMT " (%luB %s of kfence-#%zd):\n", + get_access_type(is_write), (void *)address, left_of_object ? meta->addr - address : address - meta->addr, left_of_object ? "left" : "right", object_index); break; } case KFENCE_ERROR_UAF: - pr_err("BUG: KFENCE: use-after-free in %pS\n\n", (void *)stack_entries[skipnr]); - pr_err("Use-after-free access at 0x" PTR_FMT " (in kfence-#%zd):\n", - (void *)address, object_index); + pr_err("BUG: KFENCE: use-after-free %s in %pS\n\n", get_access_type(is_write), + (void *)stack_entries[skipnr]); + pr_err("Use-after-free %s at 0x" PTR_FMT " (in kfence-#%zd):\n", + get_access_type(is_write), (void *)address, object_index); break; case KFENCE_ERROR_CORRUPTION: pr_err("BUG: KFENCE: memory corruption in %pS\n\n", (void *)stack_entries[skipnr]); @@ -208,8 +215,10 @@ void kfence_report_error(unsigned long address, struct pt_regs *regs, pr_cont(" (in kfence-#%zd):\n", object_index); break; case KFENCE_ERROR_INVALID: - pr_err("BUG: KFENCE: invalid access in %pS\n\n", (void *)stack_entries[skipnr]); - pr_err("Invalid access at 0x" PTR_FMT ":\n", (void *)address); + pr_err("BUG: KFENCE: invalid %s in %pS\n\n", get_access_type(is_write), + (void *)stack_entries[skipnr]); + pr_err("Invalid %s at 0x" PTR_FMT ":\n", get_access_type(is_write), + (void *)address); break; case KFENCE_ERROR_INVALID_FREE: pr_err("BUG: KFENCE: invalid free in %pS\n\n", (void *)stack_entries[skipnr]); -- 2.7.4 From f0e2bb4198dba33f5fa962ea3ff553727fcd5c8c Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Mon, 22 Feb 2021 11:04:36 -0800 Subject: [PATCH 04/16] Merge tag 'printk-for-5.12' of git://git.kernel.org/pub/scm/linux/kernel/git/printk/linux Pull printk updates from Petr Mladek: - New "no_hash_pointers" kernel parameter causes that %p shows raw pointer values instead of hashed ones. It is intended only for debugging purposes. Misuse is prevented by a fat warning message that is inspired by trace_printk(). - Prevent a possible deadlock when flushing printk_safe buffers during panic(). - Fix performance regression caused by the lockless printk ringbuffer. It was visible with huge log buffer and long messages. - Documentation fix-up. * tag 'printk-for-5.12' of git://git.kernel.org/pub/scm/linux/kernel/git/printk/linux: lib/vsprintf: no_hash_pointers prints all addresses as unhashed kselftest: add support for skipped tests lib: use KSTM_MODULE_GLOBALS macro in kselftest drivers printk: avoid prb_first_valid_seq() where possible printk: fix deadlock when kernel panic printk: rectify kernel-doc for prb_rec_init_wr() [port kfence feature to rpi-5.10.95] Signed-off-by: Sung-hun Kim Signed-off-by: Marek Szyprowski Signed-off-by: Seung-Woo Kim Change-Id: I13558832bbd4970d2b35f39db052df735a6ced4f --- Documentation/admin-guide/kernel-parameters.txt | 15 +++++++++++ kernel/printk/printk_ringbuffer.h | 2 +- lib/test_bitmap.c | 3 +-- lib/test_printf.c | 12 +++++++-- lib/vsprintf.c | 36 +++++++++++++++++++++++-- tools/testing/selftests/kselftest_module.h | 18 ++++++++----- 6 files changed, 73 insertions(+), 13 deletions(-) diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 4ee4dca8..4320e97 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -3288,6 +3288,21 @@ in certain environments such as networked servers or real-time systems. + no_hash_pointers + Force pointers printed to the console or buffers to be + unhashed. By default, when a pointer is printed via %p + format string, that pointer is "hashed", i.e. obscured + by hashing the pointer value. This is a security feature + that hides actual kernel addresses from unprivileged + users, but it also makes debugging the kernel more + difficult since unequal pointers can no longer be + compared. However, if this command-line option is + specified, then all normal pointers will have their true + value printed. Pointers printed via %pK may still be + hashed. This option should only be specified when + debugging the kernel. Please do not use on production + kernels. + nohibernate [HIBERNATION] Disable hibernation and resume. nohz= [KNL] Boottime enable/disable dynamic ticks diff --git a/kernel/printk/printk_ringbuffer.h b/kernel/printk/printk_ringbuffer.h index 5dc9d02..73cc80e 100644 --- a/kernel/printk/printk_ringbuffer.h +++ b/kernel/printk/printk_ringbuffer.h @@ -287,7 +287,7 @@ _DEFINE_PRINTKRB(name, descbits, avgtextbits, &_##name##_text[0]) /* Writer Interface */ /** - * prb_rec_init_wd() - Initialize a buffer for writing records. + * prb_rec_init_wr() - Initialize a buffer for writing records. * * @r: The record to initialize. * @text_buf_size: The needed text buffer size. diff --git a/lib/test_bitmap.c b/lib/test_bitmap.c index 4425a1d..0ea0e82 100644 --- a/lib/test_bitmap.c +++ b/lib/test_bitmap.c @@ -16,8 +16,7 @@ #include "../tools/testing/selftests/kselftest_module.h" -static unsigned total_tests __initdata; -static unsigned failed_tests __initdata; +KSTM_MODULE_GLOBALS(); static char pbl_buffer[PAGE_SIZE] __initdata; diff --git a/lib/test_printf.c b/lib/test_printf.c index 7ac87f1..a675579 100644 --- a/lib/test_printf.c +++ b/lib/test_printf.c @@ -30,11 +30,13 @@ #define PAD_SIZE 16 #define FILL_CHAR '$' -static unsigned total_tests __initdata; -static unsigned failed_tests __initdata; +KSTM_MODULE_GLOBALS(); + static char *test_buffer __initdata; static char *alloced_buffer __initdata; +extern bool no_hash_pointers; + static int __printf(4, 0) __init do_test(int bufsize, const char *expect, int elen, const char *fmt, va_list ap) @@ -301,6 +303,12 @@ plain(void) { int err; + if (no_hash_pointers) { + pr_warn("skipping plain 'p' tests"); + skipped_tests += 2; + return; + } + err = plain_hash(); if (err) { pr_warn("plain 'p' does not appear to be hashed\n"); diff --git a/lib/vsprintf.c b/lib/vsprintf.c index 8ade1a86..ed2f709 100644 --- a/lib/vsprintf.c +++ b/lib/vsprintf.c @@ -2116,6 +2116,32 @@ char *fwnode_string(char *buf, char *end, struct fwnode_handle *fwnode, return widen_string(buf, buf - buf_start, end, spec); } +/* Disable pointer hashing if requested */ +bool no_hash_pointers __ro_after_init; +EXPORT_SYMBOL_GPL(no_hash_pointers); + +static int __init no_hash_pointers_enable(char *str) +{ + no_hash_pointers = true; + + pr_warn("**********************************************************\n"); + pr_warn("** NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE **\n"); + pr_warn("** **\n"); + pr_warn("** This system shows unhashed kernel memory addresses **\n"); + pr_warn("** via the console, logs, and other interfaces. This **\n"); + pr_warn("** might reduce the security of your system. **\n"); + pr_warn("** **\n"); + pr_warn("** If you see this message and you are not debugging **\n"); + pr_warn("** the kernel, report this immediately to your system **\n"); + pr_warn("** administrator! **\n"); + pr_warn("** **\n"); + pr_warn("** NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE **\n"); + pr_warn("**********************************************************\n"); + + return 0; +} +early_param("no_hash_pointers", no_hash_pointers_enable); + /* * Show a '%p' thing. A kernel extension is that the '%p' is followed * by an extra set of alphanumeric characters that are extended format @@ -2323,8 +2349,14 @@ char *pointer(const char *fmt, char *buf, char *end, void *ptr, } } - /* default is to _not_ leak addresses, hash before printing */ - return ptr_to_id(buf, end, ptr, spec); + /* + * default is to _not_ leak addresses, so hash before printing, + * unless no_hash_pointers is specified on the command line. + */ + if (unlikely(no_hash_pointers)) + return pointer_string(buf, end, ptr, spec); + else + return ptr_to_id(buf, end, ptr, spec); } /* diff --git a/tools/testing/selftests/kselftest_module.h b/tools/testing/selftests/kselftest_module.h index e8eafaf..e2ea41d 100644 --- a/tools/testing/selftests/kselftest_module.h +++ b/tools/testing/selftests/kselftest_module.h @@ -11,7 +11,8 @@ #define KSTM_MODULE_GLOBALS() \ static unsigned int total_tests __initdata; \ -static unsigned int failed_tests __initdata +static unsigned int failed_tests __initdata; \ +static unsigned int skipped_tests __initdata #define KSTM_CHECK_ZERO(x) do { \ total_tests++; \ @@ -21,11 +22,16 @@ static unsigned int failed_tests __initdata } \ } while (0) -static inline int kstm_report(unsigned int total_tests, unsigned int failed_tests) +static inline int kstm_report(unsigned int total_tests, unsigned int failed_tests, + unsigned int skipped_tests) { - if (failed_tests == 0) - pr_info("all %u tests passed\n", total_tests); - else + if (failed_tests == 0) { + if (skipped_tests) { + pr_info("skipped %u tests\n", skipped_tests); + pr_info("remaining %u tests passed\n", total_tests); + } else + pr_info("all %u tests passed\n", total_tests); + } else pr_warn("failed %u out of %u tests\n", failed_tests, total_tests); return failed_tests ? -EINVAL : 0; @@ -36,7 +42,7 @@ static int __init __module##_init(void) \ { \ pr_info("loaded.\n"); \ selftest(); \ - return kstm_report(total_tests, failed_tests); \ + return kstm_report(total_tests, failed_tests, skipped_tests); \ } \ static void __exit __module##_exit(void) \ { \ -- 2.7.4 From d6617e038823b56d99169b7f5ad174871430c4e8 Mon Sep 17 00:00:00 2001 From: Marco Elver Date: Tue, 23 Feb 2021 09:20:43 +0100 Subject: [PATCH 05/16] kfence: report sensitive information based on no_hash_pointers We cannot rely on CONFIG_DEBUG_KERNEL to decide if we're running a "debug kernel" where we can safely show potentially sensitive information in the kernel log. Instead, simply rely on the newly introduced "no_hash_pointers" to print unhashed kernel pointers, as well as decide if our reports can include other potentially sensitive information such as registers and corrupted bytes. Cc: Timur Tabi Signed-off-by: Marco Elver [port kfence feature to rpi-5.10.95] Signed-off-by: Sung-hun Kim Signed-off-by: Marek Szyprowski Signed-off-by: Seung-Woo Kim Change-Id: Ie87a49f890db5a49503afe50e63d908247eb31a3 --- Documentation/dev-tools/kfence.rst | 8 ++++---- mm/kfence/core.c | 10 +++------- mm/kfence/kfence.h | 7 ------- mm/kfence/kfence_test.c | 2 +- mm/kfence/report.c | 18 ++++++++++-------- 5 files changed, 18 insertions(+), 27 deletions(-) diff --git a/Documentation/dev-tools/kfence.rst b/Documentation/dev-tools/kfence.rst index 06a454e..d2da644 100644 --- a/Documentation/dev-tools/kfence.rst +++ b/Documentation/dev-tools/kfence.rst @@ -87,8 +87,8 @@ A typical out-of-bounds access looks like this:: The header of the report provides a short summary of the function involved in the access. It is followed by more detailed information about the access and -its origin. Note that, real kernel addresses are only shown for -``CONFIG_DEBUG_KERNEL=y`` builds. +its origin. Note that, real kernel addresses are only shown when using the +kernel command line option ``no_hash_pointers``. Use-after-free accesses are reported as:: @@ -183,8 +183,8 @@ invalidly written bytes (offset from the address) are shown; in this representation, '.' denote untouched bytes. In the example above ``0xac`` is the value written to the invalid address at offset 0, and the remaining '.' denote that no following bytes have been touched. Note that, real values are -only shown for ``CONFIG_DEBUG_KERNEL=y`` builds; to avoid information -disclosure for non-debug builds, '!' is used instead to denote invalidly +only shown if the kernel was booted with ``no_hash_pointers``; to avoid +information disclosure otherwise, '!' is used instead to denote invalidly written bytes. And finally, KFENCE may also report on invalid accesses to any protected page diff --git a/mm/kfence/core.c b/mm/kfence/core.c index 338ced5..e6d2e6e 100644 --- a/mm/kfence/core.c +++ b/mm/kfence/core.c @@ -631,13 +631,9 @@ void __init kfence_init(void) WRITE_ONCE(kfence_enabled, true); schedule_delayed_work(&kfence_timer, 0); - pr_info("initialized - using %lu bytes for %d objects", KFENCE_POOL_SIZE, - CONFIG_KFENCE_NUM_OBJECTS); - if (IS_ENABLED(CONFIG_DEBUG_KERNEL)) - pr_cont(" at 0x%px-0x%px\n", (void *)__kfence_pool, - (void *)(__kfence_pool + KFENCE_POOL_SIZE)); - else - pr_cont("\n"); + pr_info("initialized - using %lu bytes for %d objects at 0x%p-0x%p\n", KFENCE_POOL_SIZE, + CONFIG_KFENCE_NUM_OBJECTS, (void *)__kfence_pool, + (void *)(__kfence_pool + KFENCE_POOL_SIZE)); } void kfence_shutdown_cache(struct kmem_cache *s) diff --git a/mm/kfence/kfence.h b/mm/kfence/kfence.h index 97282fa..b02aad2 100644 --- a/mm/kfence/kfence.h +++ b/mm/kfence/kfence.h @@ -10,13 +10,6 @@ #include "../slab.h" /* for struct kmem_cache */ -/* For non-debug builds, avoid leaking kernel pointers into dmesg. */ -#ifdef CONFIG_DEBUG_KERNEL -#define PTR_FMT "%px" -#else -#define PTR_FMT "%p" -#endif - /* * Get the canary byte pattern for @addr. Use a pattern that varies based on the * lower 3 bits of the address, to detect memory corruptions with higher diff --git a/mm/kfence/kfence_test.c b/mm/kfence/kfence_test.c index db1bb59..4acf425 100644 --- a/mm/kfence/kfence_test.c +++ b/mm/kfence/kfence_test.c @@ -146,7 +146,7 @@ static bool report_matches(const struct expect_report *r) break; } - cur += scnprintf(cur, end - cur, " 0x" PTR_FMT, (void *)r->addr); + cur += scnprintf(cur, end - cur, " 0x%p", (void *)r->addr); spin_lock_irqsave(&observed.lock, flags); if (!report_available()) diff --git a/mm/kfence/report.c b/mm/kfence/report.c index 1996295..0365555 100644 --- a/mm/kfence/report.c +++ b/mm/kfence/report.c @@ -14,6 +14,8 @@ #include "kfence.h" +extern bool no_hash_pointers; + /* Helper function to either print to a seq_file or to console. */ __printf(2, 3) static void seq_con_printf(struct seq_file *seq, const char *fmt, ...) @@ -113,7 +115,7 @@ void kfence_print_object(struct seq_file *seq, const struct kfence_metadata *met } seq_con_printf(seq, - "kfence-#%zd [0x" PTR_FMT "-0x" PTR_FMT + "kfence-#%zd [0x%p-0x%p" ", size=%d, cache=%s] allocated by task %d:\n", meta - kfence_metadata, (void *)start, (void *)(start + size - 1), size, (cache && cache->name) ? cache->name : "", meta->alloc_track.pid); @@ -143,7 +145,7 @@ static void print_diff_canary(unsigned long address, size_t bytes_to_show, for (cur = (const u8 *)address; cur < end; cur++) { if (*cur == KFENCE_CANARY_PATTERN(cur)) pr_cont(" ."); - else if (IS_ENABLED(CONFIG_DEBUG_KERNEL)) + else if (no_hash_pointers) pr_cont(" 0x%02x", *cur); else /* Do not leak kernel memory in non-debug builds. */ pr_cont(" !"); @@ -196,7 +198,7 @@ void kfence_report_error(unsigned long address, bool is_write, struct pt_regs *r pr_err("BUG: KFENCE: out-of-bounds %s in %pS\n\n", get_access_type(is_write), (void *)stack_entries[skipnr]); - pr_err("Out-of-bounds %s at 0x" PTR_FMT " (%luB %s of kfence-#%zd):\n", + pr_err("Out-of-bounds %s at 0x%p (%luB %s of kfence-#%zd):\n", get_access_type(is_write), (void *)address, left_of_object ? meta->addr - address : address - meta->addr, left_of_object ? "left" : "right", object_index); @@ -205,24 +207,24 @@ void kfence_report_error(unsigned long address, bool is_write, struct pt_regs *r case KFENCE_ERROR_UAF: pr_err("BUG: KFENCE: use-after-free %s in %pS\n\n", get_access_type(is_write), (void *)stack_entries[skipnr]); - pr_err("Use-after-free %s at 0x" PTR_FMT " (in kfence-#%zd):\n", + pr_err("Use-after-free %s at 0x%p (in kfence-#%zd):\n", get_access_type(is_write), (void *)address, object_index); break; case KFENCE_ERROR_CORRUPTION: pr_err("BUG: KFENCE: memory corruption in %pS\n\n", (void *)stack_entries[skipnr]); - pr_err("Corrupted memory at 0x" PTR_FMT " ", (void *)address); + pr_err("Corrupted memory at 0x%p ", (void *)address); print_diff_canary(address, 16, meta); pr_cont(" (in kfence-#%zd):\n", object_index); break; case KFENCE_ERROR_INVALID: pr_err("BUG: KFENCE: invalid %s in %pS\n\n", get_access_type(is_write), (void *)stack_entries[skipnr]); - pr_err("Invalid %s at 0x" PTR_FMT ":\n", get_access_type(is_write), + pr_err("Invalid %s at 0x%p:\n", get_access_type(is_write), (void *)address); break; case KFENCE_ERROR_INVALID_FREE: pr_err("BUG: KFENCE: invalid free in %pS\n\n", (void *)stack_entries[skipnr]); - pr_err("Invalid free of 0x" PTR_FMT " (in kfence-#%zd):\n", (void *)address, + pr_err("Invalid free of 0x%p (in kfence-#%zd):\n", (void *)address, object_index); break; } @@ -237,7 +239,7 @@ void kfence_report_error(unsigned long address, bool is_write, struct pt_regs *r /* Print report footer. */ pr_err("\n"); - if (IS_ENABLED(CONFIG_DEBUG_KERNEL) && regs) + if (no_hash_pointers && regs) show_regs(regs); else dump_stack_print_info(KERN_ERR); -- 2.7.4 From f165dc316f5e64f9061338b4cda17a25ca982dd2 Mon Sep 17 00:00:00 2001 From: Marco Elver Date: Wed, 3 Mar 2021 13:11:57 +0100 Subject: [PATCH 06/16] kfence: fix printk format for ptrdiff_t Use %td for ptrdiff_t. Link: https://lkml.kernel.org/r/3abbe4c9-16ad-c168-a90f-087978ccd8f7@csgroup.eu Reported-by: Christophe Leroy Signed-off-by: Marco Elver Reviewed-by: Alexander Potapenko [port kfence feature to rpi-5.10.95] Signed-off-by: Sung-hun Kim Signed-off-by: Marek Szyprowski Signed-off-by: Seung-Woo Kim Change-Id: I1311862fe3bba39d7837c271a95e3513c8ef2c66 --- mm/kfence/report.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/mm/kfence/report.c b/mm/kfence/report.c index 0365555..0ab7330 100644 --- a/mm/kfence/report.c +++ b/mm/kfence/report.c @@ -110,12 +110,12 @@ void kfence_print_object(struct seq_file *seq, const struct kfence_metadata *met lockdep_assert_held(&meta->lock); if (meta->state == KFENCE_OBJECT_UNUSED) { - seq_con_printf(seq, "kfence-#%zd unused\n", meta - kfence_metadata); + seq_con_printf(seq, "kfence-#%td unused\n", meta - kfence_metadata); return; } seq_con_printf(seq, - "kfence-#%zd [0x%p-0x%p" + "kfence-#%td [0x%p-0x%p" ", size=%d, cache=%s] allocated by task %d:\n", meta - kfence_metadata, (void *)start, (void *)(start + size - 1), size, (cache && cache->name) ? cache->name : "", meta->alloc_track.pid); @@ -198,7 +198,7 @@ void kfence_report_error(unsigned long address, bool is_write, struct pt_regs *r pr_err("BUG: KFENCE: out-of-bounds %s in %pS\n\n", get_access_type(is_write), (void *)stack_entries[skipnr]); - pr_err("Out-of-bounds %s at 0x%p (%luB %s of kfence-#%zd):\n", + pr_err("Out-of-bounds %s at 0x%p (%luB %s of kfence-#%td):\n", get_access_type(is_write), (void *)address, left_of_object ? meta->addr - address : address - meta->addr, left_of_object ? "left" : "right", object_index); @@ -207,14 +207,14 @@ void kfence_report_error(unsigned long address, bool is_write, struct pt_regs *r case KFENCE_ERROR_UAF: pr_err("BUG: KFENCE: use-after-free %s in %pS\n\n", get_access_type(is_write), (void *)stack_entries[skipnr]); - pr_err("Use-after-free %s at 0x%p (in kfence-#%zd):\n", + pr_err("Use-after-free %s at 0x%p (in kfence-#%td):\n", get_access_type(is_write), (void *)address, object_index); break; case KFENCE_ERROR_CORRUPTION: pr_err("BUG: KFENCE: memory corruption in %pS\n\n", (void *)stack_entries[skipnr]); pr_err("Corrupted memory at 0x%p ", (void *)address); print_diff_canary(address, 16, meta); - pr_cont(" (in kfence-#%zd):\n", object_index); + pr_cont(" (in kfence-#%td):\n", object_index); break; case KFENCE_ERROR_INVALID: pr_err("BUG: KFENCE: invalid %s in %pS\n\n", get_access_type(is_write), @@ -224,7 +224,7 @@ void kfence_report_error(unsigned long address, bool is_write, struct pt_regs *r break; case KFENCE_ERROR_INVALID_FREE: pr_err("BUG: KFENCE: invalid free in %pS\n\n", (void *)stack_entries[skipnr]); - pr_err("Invalid free of 0x%p (in kfence-#%zd):\n", (void *)address, + pr_err("Invalid free of 0x%p (in kfence-#%td):\n", (void *)address, object_index); break; } -- 2.7.4 From 5c40c2e95c37f73be6d4f9cebe2afaf7610ece60 Mon Sep 17 00:00:00 2001 From: Marco Elver Date: Thu, 4 Mar 2021 15:40:00 +0100 Subject: [PATCH 07/16] kfence: fix reports if constant function prefixes exist Some architectures prefix all functions with a constant string ('.' on ppc64). Add ARCH_FUNC_PREFIX, which may optionally be defined in , so that get_stack_skipnr() can work properly. Link: https://lkml.kernel.org/r/f036c53d-7e81-763c-47f4-6024c6c5f058@csgroup.eu Reported-by: Christophe Leroy Tested-by: Christophe Leroy Signed-off-by: Marco Elver [port kfence feature to rpi-5.10.95] Signed-off-by: Sung-hun Kim Signed-off-by: Marek Szyprowski Signed-off-by: Seung-Woo Kim Change-Id: If0898f06ef741cc25dc0eaed3c4e2095ffe420f5 --- mm/kfence/report.c | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/mm/kfence/report.c b/mm/kfence/report.c index 0ab7330..89fe35e 100644 --- a/mm/kfence/report.c +++ b/mm/kfence/report.c @@ -14,6 +14,11 @@ #include "kfence.h" +/* May be overridden by . */ +#ifndef ARCH_FUNC_PREFIX +#define ARCH_FUNC_PREFIX "" +#endif + extern bool no_hash_pointers; /* Helper function to either print to a seq_file or to console. */ @@ -61,8 +66,9 @@ static int get_stack_skipnr(const unsigned long stack_entries[], int num_entries for (skipnr = 0; skipnr < num_entries; skipnr++) { int len = scnprintf(buf, sizeof(buf), "%ps", (void *)stack_entries[skipnr]); - if (str_has_prefix(buf, "kfence_") || str_has_prefix(buf, "__kfence_") || - !strncmp(buf, "__slab_free", len)) { + if (str_has_prefix(buf, ARCH_FUNC_PREFIX "kfence_") || + str_has_prefix(buf, ARCH_FUNC_PREFIX "__kfence_") || + !strncmp(buf, ARCH_FUNC_PREFIX "__slab_free", len)) { /* * In case of tail calls from any of the below * to any of the above. @@ -71,10 +77,10 @@ static int get_stack_skipnr(const unsigned long stack_entries[], int num_entries } /* Also the *_bulk() variants by only checking prefixes. */ - if (str_has_prefix(buf, "kfree") || - str_has_prefix(buf, "kmem_cache_free") || - str_has_prefix(buf, "__kmalloc") || - str_has_prefix(buf, "kmem_cache_alloc")) + if (str_has_prefix(buf, ARCH_FUNC_PREFIX "kfree") || + str_has_prefix(buf, ARCH_FUNC_PREFIX "kmem_cache_free") || + str_has_prefix(buf, ARCH_FUNC_PREFIX "__kmalloc") || + str_has_prefix(buf, ARCH_FUNC_PREFIX "kmem_cache_alloc")) goto found; } if (fallback < num_entries) -- 2.7.4 From 267fedaf4d03a7f4582f8945dcc45be3eea99dd6 Mon Sep 17 00:00:00 2001 From: Marco Elver Date: Thu, 4 Mar 2021 21:52:56 +0100 Subject: [PATCH 08/16] kfence, slab: fix cache_alloc_debugcheck_after() for bulk allocations cache_alloc_debugcheck_after() performs checks on an object, including adjusting the returned pointer. None of this should apply to KFENCE objects. While for non-bulk allocations, the checks are skipped when we allocate via KFENCE, for bulk allocations cache_alloc_debugcheck_after() is called via cache_alloc_debugcheck_after_bulk(). Fix it by skipping cache_alloc_debugcheck_after() for KFENCE objects. Signed-off-by: Marco Elver [port kfence feature to rpi-5.10.95] Signed-off-by: Sung-hun Kim Signed-off-by: Marek Szyprowski Signed-off-by: Seung-Woo Kim Change-Id: I07e1131fb1659455be3ac7e65147e627de63520a --- mm/slab.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/slab.c b/mm/slab.c index 88e54dd..f107c46 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -2991,7 +2991,7 @@ static void *cache_alloc_debugcheck_after(struct kmem_cache *cachep, gfp_t flags, void *objp, unsigned long caller) { WARN_ON_ONCE(cachep->ctor && (flags & __GFP_ZERO)); - if (!objp) + if (!objp || is_kfence_address(objp)) return objp; if (cachep->flags & SLAB_POISON) { check_poison_obj(cachep, objp); -- 2.7.4 From 1a0db97ea6aa13a0e8c98d1c35b49fc10aca2cb2 Mon Sep 17 00:00:00 2001 From: Marco Elver Date: Wed, 17 Mar 2021 09:47:40 +0100 Subject: [PATCH 09/16] kfence: make compatible with kmemleak Because memblock allocations are registered with kmemleak, the KFENCE pool was seen by kmemleak as one large object. Later allocations through kfence_alloc() that were registered with kmemleak via slab_post_alloc_hook() would then overlap and trigger a warning. Therefore, once the pool is initialized, we can remove (free) it from kmemleak again, since it should be treated as allocator-internal and be seen as "free memory". The second problem is that kmemleak is passed the rounded size, and not the originally requested size, which is also the size of KFENCE objects. To avoid kmemleak scanning past the end of an object and trigger a KFENCE out-of-bounds error, fix the size if it is a KFENCE object. For simplicity, to avoid a call to kfence_ksize() in slab_post_alloc_hook() (and avoid new IS_ENABLED(CONFIG_DEBUG_KMEMLEAK) guard), just call kfence_ksize() in mm/kmemleak.c:create_object(). Reported-by: Luis Henriques Cc: Catalin Marinas Signed-off-by: Marco Elver Reviewed-by: Catalin Marinas Tested-by: Luis Henriques [port kfence feature to rpi-5.10.95] Signed-off-by: Sung-hun Kim Signed-off-by: Marek Szyprowski Signed-off-by: Seung-Woo Kim Change-Id: Id820bc92f5fa8ca766b63c64101bdc7f13f96a46 --- mm/kfence/core.c | 9 +++++++++ mm/kmemleak.c | 3 ++- 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/mm/kfence/core.c b/mm/kfence/core.c index e6d2e6e..f2b0c00 100644 --- a/mm/kfence/core.c +++ b/mm/kfence/core.c @@ -7,6 +7,7 @@ #include #include #include +#include #include #include #include @@ -476,6 +477,14 @@ static bool __init kfence_init_pool(void) addr += 2 * PAGE_SIZE; } + /* + * The pool is live and will never be deallocated from this point on. + * Remove the pool object from the kmemleak object tree, as it would + * otherwise overlap with allocations returned by kfence_alloc(), which + * are registered with kmemleak through the slab post-alloc hook. + */ + kmemleak_free(__kfence_pool); + return true; err: diff --git a/mm/kmemleak.c b/mm/kmemleak.c index c0014d3..fe6e3ae 100644 --- a/mm/kmemleak.c +++ b/mm/kmemleak.c @@ -97,6 +97,7 @@ #include #include +#include #include #include @@ -589,7 +590,7 @@ static struct kmemleak_object *create_object(unsigned long ptr, size_t size, atomic_set(&object->use_count, 1); object->flags = OBJECT_ALLOCATED; object->pointer = ptr; - object->size = size; + object->size = kfence_ksize((void *)ptr) ?: size; object->excess_ref = 0; object->min_count = min_count; object->count = 0; /* white color initially */ -- 2.7.4 From 56415dab61b5616c8ddb74b72de91f7d66e6b534 Mon Sep 17 00:00:00 2001 From: Wang Kefeng Date: Wed, 22 Sep 2021 14:56:27 +0100 Subject: [PATCH 10/16] ARM: 9128/1: mm: Refactor the __do_page_fault() Clean up the multiple goto statements and drops local variable vm_fault_t fault, which will make the __do_page_fault() much more readability. No functional change. Signed-off-by: Kefeng Wang Signed-off-by: Russell King (Oracle) [backport of mainline commit caed89dab0ca0e73d7e016c04e1f5957650f4ec3] Signed-off-by: Marek Szyprowski Signed-off-by: Seung-Woo Kim Change-Id: Ice8f005ae5aa4cc54a983a377d89657511bfa632 --- arch/arm/mm/fault.c | 34 +++++++++++++--------------------- 1 file changed, 13 insertions(+), 21 deletions(-) diff --git a/arch/arm/mm/fault.c b/arch/arm/mm/fault.c index 59487ee..fcfc0b8 100644 --- a/arch/arm/mm/fault.c +++ b/arch/arm/mm/fault.c @@ -205,35 +205,27 @@ __do_page_fault(struct mm_struct *mm, unsigned long addr, unsigned int fsr, unsigned int flags, struct task_struct *tsk, struct pt_regs *regs) { - struct vm_area_struct *vma; - vm_fault_t fault; - - vma = find_vma(mm, addr); - fault = VM_FAULT_BADMAP; + struct vm_area_struct *vma = find_vma(mm, addr); if (unlikely(!vma)) - goto out; - if (unlikely(vma->vm_start > addr)) - goto check_stack; + return VM_FAULT_BADMAP; + + if (unlikely(vma->vm_start > addr)) { + if (!(vma->vm_flags & VM_GROWSDOWN)) + return VM_FAULT_BADMAP; + if (addr < FIRST_USER_ADDRESS) + return VM_FAULT_BADMAP; + if (expand_stack(vma, addr)) + return VM_FAULT_BADMAP; + } /* * Ok, we have a good vm_area for this * memory access, so we can handle it. */ -good_area: - if (access_error(fsr, vma)) { - fault = VM_FAULT_BADACCESS; - goto out; - } + if (access_error(fsr, vma)) + return VM_FAULT_BADACCESS; return handle_mm_fault(vma, addr & PAGE_MASK, flags, regs); - -check_stack: - /* Don't allow expansion below FIRST_USER_ADDRESS */ - if (vma->vm_flags & VM_GROWSDOWN && - addr >= FIRST_USER_ADDRESS && !expand_stack(vma, addr)) - goto good_area; -out: - return fault; } static int __kprobes -- 2.7.4 From 43eb9049e4915d3d0aedec26ae0e2d3108b30ac1 Mon Sep 17 00:00:00 2001 From: Wang Kefeng Date: Wed, 22 Sep 2021 14:56:28 +0100 Subject: [PATCH 11/16] ARM: 9129/1: mm: Kill task_struct argument for __do_page_fault() The __do_page_fault() won't use task_struct argument, kill it and also use current->mm directly in do_page_fault(). No functional change. Signed-off-by: Kefeng Wang Signed-off-by: Russell King (Oracle) [backport of mainline commit 488cab12c37169687015fbdfa8be7b0da42f3146] Signed-off-by: Marek Szyprowski Signed-off-by: Seung-Woo Kim Change-Id: I754182937867caad76e7fd5179eb35f697082cec --- arch/arm/mm/fault.c | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/arch/arm/mm/fault.c b/arch/arm/mm/fault.c index fcfc0b8..cc6b4300 100644 --- a/arch/arm/mm/fault.c +++ b/arch/arm/mm/fault.c @@ -202,8 +202,7 @@ static inline bool access_error(unsigned int fsr, struct vm_area_struct *vma) static vm_fault_t __kprobes __do_page_fault(struct mm_struct *mm, unsigned long addr, unsigned int fsr, - unsigned int flags, struct task_struct *tsk, - struct pt_regs *regs) + unsigned int flags, struct pt_regs *regs) { struct vm_area_struct *vma = find_vma(mm, addr); if (unlikely(!vma)) @@ -231,8 +230,7 @@ __do_page_fault(struct mm_struct *mm, unsigned long addr, unsigned int fsr, static int __kprobes do_page_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs) { - struct task_struct *tsk; - struct mm_struct *mm; + struct mm_struct *mm = current->mm; int sig, code; vm_fault_t fault; unsigned int flags = FAULT_FLAG_DEFAULT; @@ -240,8 +238,6 @@ do_page_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs) if (kprobe_page_fault(regs, fsr)) return 0; - tsk = current; - mm = tsk->mm; /* Enable interrupts if they were enabled in the parent context. */ if (interrupts_enabled(regs)) @@ -285,7 +281,7 @@ retry: #endif } - fault = __do_page_fault(mm, addr, fsr, flags, tsk, regs); + fault = __do_page_fault(mm, addr, fsr, flags, regs); /* If we need to retry but a fatal signal is pending, handle the * signal first. We do not need to release the mmap_lock because -- 2.7.4 From c63f938c99d38c81ffa486484bb8bfe508ccdeed Mon Sep 17 00:00:00 2001 From: Wang Kefeng Date: Wed, 22 Sep 2021 14:56:29 +0100 Subject: [PATCH 12/16] ARM: 9127/1: mm: Cleanup access_error() Now the write fault check in do_page_fault() and access_error() twice, we can cleanup access_error(), and make the fault check and vma flags set into do_page_fault() directly, then pass the vma flags to __do_page_fault. No functional change. Signed-off-by: Kefeng Wang Signed-off-by: Russell King (Oracle) [backport of mainline commit f177b06ed7d56c1d8256769d1a6e1feec90153dc] Signed-off-by: Marek Szyprowski Signed-off-by: Seung-Woo Kim Change-Id: I87a54628bad2b5ed14cc2fbaaa5b8f547d90294f --- arch/arm/mm/fault.c | 38 ++++++++++++++------------------------ 1 file changed, 14 insertions(+), 24 deletions(-) diff --git a/arch/arm/mm/fault.c b/arch/arm/mm/fault.c index cc6b4300..90b3979 100644 --- a/arch/arm/mm/fault.c +++ b/arch/arm/mm/fault.c @@ -183,26 +183,9 @@ void do_bad_area(unsigned long addr, unsigned int fsr, struct pt_regs *regs) #define VM_FAULT_BADMAP 0x010000 #define VM_FAULT_BADACCESS 0x020000 -/* - * Check that the permissions on the VMA allow for the fault which occurred. - * If we encountered a write fault, we must have write permission, otherwise - * we allow any permission. - */ -static inline bool access_error(unsigned int fsr, struct vm_area_struct *vma) -{ - unsigned int mask = VM_ACCESS_FLAGS; - - if ((fsr & FSR_WRITE) && !(fsr & FSR_CM)) - mask = VM_WRITE; - if (fsr & FSR_LNX_PF) - mask = VM_EXEC; - - return vma->vm_flags & mask ? false : true; -} - static vm_fault_t __kprobes -__do_page_fault(struct mm_struct *mm, unsigned long addr, unsigned int fsr, - unsigned int flags, struct pt_regs *regs) +__do_page_fault(struct mm_struct *mm, unsigned long addr, unsigned int flags, + unsigned long vma_flags, struct pt_regs *regs) { struct vm_area_struct *vma = find_vma(mm, addr); if (unlikely(!vma)) @@ -218,10 +201,10 @@ __do_page_fault(struct mm_struct *mm, unsigned long addr, unsigned int fsr, } /* - * Ok, we have a good vm_area for this - * memory access, so we can handle it. + * ok, we have a good vm_area for this memory access, check the + * permissions on the VMA allow for the fault which occurred. */ - if (access_error(fsr, vma)) + if (!(vma->vm_flags & vma_flags)) return VM_FAULT_BADACCESS; return handle_mm_fault(vma, addr & PAGE_MASK, flags, regs); @@ -234,6 +217,7 @@ do_page_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs) int sig, code; vm_fault_t fault; unsigned int flags = FAULT_FLAG_DEFAULT; + unsigned long vm_flags = VM_ACCESS_FLAGS; if (kprobe_page_fault(regs, fsr)) return 0; @@ -252,8 +236,14 @@ do_page_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs) if (user_mode(regs)) flags |= FAULT_FLAG_USER; - if ((fsr & FSR_WRITE) && !(fsr & FSR_CM)) + + if ((fsr & FSR_WRITE) && !(fsr & FSR_CM)) { flags |= FAULT_FLAG_WRITE; + vm_flags = VM_WRITE; + } + + if (fsr & FSR_LNX_PF) + vm_flags = VM_EXEC; perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, addr); @@ -281,7 +271,7 @@ retry: #endif } - fault = __do_page_fault(mm, addr, fsr, flags, regs); + fault = __do_page_fault(mm, addr, flags, vm_flags, regs); /* If we need to retry but a fatal signal is pending, handle the * signal first. We do not need to release the mmap_lock because -- 2.7.4 From e2829e5124ba0e25db2209c82b44c1a8e4239b69 Mon Sep 17 00:00:00 2001 From: Wang Kefeng Date: Wed, 22 Sep 2021 14:56:31 +0100 Subject: [PATCH 13/16] ARM: 9130/1: mm: Provide die_kernel_fault() helper Provide die_kernel_fault() helper to do the kernel fault reporting, which with msg argument, it could report different message in different scenes, and the later patch "ARM: mm: Fix PXN process with LPAE feature" will use it. Signed-off-by: Kefeng Wang Signed-off-by: Russell King (Oracle) [backport of mainline commit 2e707106fac7f81f780d7c76770a726c46757a84] Signed-off-by: Marek Szyprowski Signed-off-by: Seung-Woo Kim Change-Id: I4143a6323aef64f05752616f27361a2f92f33e8e --- arch/arm/mm/fault.c | 30 +++++++++++++++++++++--------- 1 file changed, 21 insertions(+), 9 deletions(-) diff --git a/arch/arm/mm/fault.c b/arch/arm/mm/fault.c index 90b3979..17923ef 100644 --- a/arch/arm/mm/fault.c +++ b/arch/arm/mm/fault.c @@ -100,6 +100,21 @@ void show_pte(const char *lvl, struct mm_struct *mm, unsigned long addr) { } #endif /* CONFIG_MMU */ +static void die_kernel_fault(const char *msg, struct mm_struct *mm, + unsigned long addr, unsigned int fsr, + struct pt_regs *regs) +{ + bust_spinlocks(1); + pr_alert("8<--- cut here ---\n"); + pr_alert("Unable to handle kernel %s at virtual address %08lx\n", + msg, addr); + + show_pte(KERN_ALERT, mm, addr); + die("Oops", regs, fsr); + bust_spinlocks(0); + do_exit(SIGKILL); +} + /* * Oops. The kernel tried to access some page that wasn't present. */ @@ -107,6 +122,7 @@ static void __do_kernel_fault(struct mm_struct *mm, unsigned long addr, unsigned int fsr, struct pt_regs *regs) { + const char *msg; /* * Are we prepared to handle this kernel fault? */ @@ -116,16 +132,12 @@ __do_kernel_fault(struct mm_struct *mm, unsigned long addr, unsigned int fsr, /* * No handler, we'll have to terminate things with extreme prejudice. */ - bust_spinlocks(1); - pr_alert("8<--- cut here ---\n"); - pr_alert("Unable to handle kernel %s at virtual address %08lx\n", - (addr < PAGE_SIZE) ? "NULL pointer dereference" : - "paging request", addr); + if (addr < PAGE_SIZE) + msg = "NULL pointer dereference"; + else + msg = "paging request"; - show_pte(KERN_ALERT, mm, addr); - die("Oops", regs, fsr); - bust_spinlocks(0); - do_exit(SIGKILL); + die_kernel_fault(msg, mm, addr, fsr, regs); } /* -- 2.7.4 From a51b49b950509de09bf0bd10dd013fcce92d3a1e Mon Sep 17 00:00:00 2001 From: Kefeng Wang Date: Mon, 15 Nov 2021 21:48:47 +0800 Subject: [PATCH 14/16] ARM: mm: Provide is_write_fault() The function will check whether the fault is caused by a write access, it will be called in die_kernel_fault() too in next patch, so put it before the function of die_kernel_fault(). Acked-by: Marco Elver Signed-off-by: Kefeng Wang Link: https://lore.kernel.org/linux-arm-kernel/20211115134848.171098-3-wangkefeng.wang@huawei.com/ [port for kfence feature to rpi-5.10.95] Signed-off-by: Marek Szyprowski Signed-off-by: Seung-Woo Kim Change-Id: I489b056ed9cbd8c4dbab142f3f9e26dbd727189f --- arch/arm/mm/fault.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/arch/arm/mm/fault.c b/arch/arm/mm/fault.c index 17923ef..d050021 100644 --- a/arch/arm/mm/fault.c +++ b/arch/arm/mm/fault.c @@ -100,6 +100,11 @@ void show_pte(const char *lvl, struct mm_struct *mm, unsigned long addr) { } #endif /* CONFIG_MMU */ +static inline bool is_write_fault(unsigned int fsr) +{ + return (fsr & FSR_WRITE) && !(fsr & FSR_CM); +} + static void die_kernel_fault(const char *msg, struct mm_struct *mm, unsigned long addr, unsigned int fsr, struct pt_regs *regs) @@ -249,7 +254,7 @@ do_page_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs) if (user_mode(regs)) flags |= FAULT_FLAG_USER; - if ((fsr & FSR_WRITE) && !(fsr & FSR_CM)) { + if (is_write_fault(fsr)) { flags |= FAULT_FLAG_WRITE; vm_flags = VM_WRITE; } -- 2.7.4 From 8b6fcae657f78f701ee7dd20cd7df80259b9c245 Mon Sep 17 00:00:00 2001 From: Kefeng Wang Date: Mon, 15 Nov 2021 21:48:46 +0800 Subject: [PATCH 15/16] ARM: mm: Provide set_memory_valid() This function validates and invalidates PTE entries, it will be used in the later patch. Acked-by: Marco Elver Signed-off-by: Kefeng Wang Link: https://lore.kernel.org/linux-arm-kernel/20211115134848.171098-2-wangkefeng.wang@huawei.com/ [port for kfence feature to rpi-5.10.95] Signed-off-by: Marek Szyprowski Signed-off-by: Seung-Woo Kim Change-Id: I2da9a932f8d0961d72afb49f23f574784ffa2b36 --- arch/arm/include/asm/set_memory.h | 1 + arch/arm/mm/pageattr.c | 42 +++++++++++++++++++++++++++++---------- 2 files changed, 33 insertions(+), 10 deletions(-) diff --git a/arch/arm/include/asm/set_memory.h b/arch/arm/include/asm/set_memory.h index a1ceff4..1e65ecb 100644 --- a/arch/arm/include/asm/set_memory.h +++ b/arch/arm/include/asm/set_memory.h @@ -11,6 +11,7 @@ int set_memory_ro(unsigned long addr, int numpages); int set_memory_rw(unsigned long addr, int numpages); int set_memory_x(unsigned long addr, int numpages); int set_memory_nx(unsigned long addr, int numpages); +int set_memory_valid(unsigned long addr, int numpages, int enable); #else static inline int set_memory_ro(unsigned long addr, int numpages) { return 0; } static inline int set_memory_rw(unsigned long addr, int numpages) { return 0; } diff --git a/arch/arm/mm/pageattr.c b/arch/arm/mm/pageattr.c index 9790ae3..c3c34fe 100644 --- a/arch/arm/mm/pageattr.c +++ b/arch/arm/mm/pageattr.c @@ -32,14 +32,31 @@ static bool in_range(unsigned long start, unsigned long size, size <= range_end - start; } +/* + * This function assumes that the range is mapped with PAGE_SIZE pages. + */ +static int __change_memory_common(unsigned long start, unsigned long size, + pgprot_t set_mask, pgprot_t clear_mask) +{ + struct page_change_data data; + int ret; + + data.set_mask = set_mask; + data.clear_mask = clear_mask; + + ret = apply_to_page_range(&init_mm, start, size, change_page_range, + &data); + + flush_tlb_kernel_range(start, start + size); + return ret; +} + static int change_memory_common(unsigned long addr, int numpages, pgprot_t set_mask, pgprot_t clear_mask) { unsigned long start = addr & PAGE_MASK; unsigned long end = PAGE_ALIGN(addr) + numpages * PAGE_SIZE; unsigned long size = end - start; - int ret; - struct page_change_data data; WARN_ON_ONCE(start != addr); @@ -50,14 +67,7 @@ static int change_memory_common(unsigned long addr, int numpages, !in_range(start, size, VMALLOC_START, VMALLOC_END)) return -EINVAL; - data.set_mask = set_mask; - data.clear_mask = clear_mask; - - ret = apply_to_page_range(&init_mm, start, size, change_page_range, - &data); - - flush_tlb_kernel_range(start, end); - return ret; + return __change_memory_common(start, size, set_mask, clear_mask); } int set_memory_ro(unsigned long addr, int numpages) @@ -87,3 +97,15 @@ int set_memory_x(unsigned long addr, int numpages) __pgprot(0), __pgprot(L_PTE_XN)); } + +int set_memory_valid(unsigned long addr, int numpages, int enable) +{ + if (enable) + return __change_memory_common(addr, PAGE_SIZE * numpages, + __pgprot(L_PTE_VALID), + __pgprot(0)); + else + return __change_memory_common(addr, PAGE_SIZE * numpages, + __pgprot(0), + __pgprot(L_PTE_VALID)); +} -- 2.7.4 From 40496ad8911ab0174aacad32aaf7e374f5c82279 Mon Sep 17 00:00:00 2001 From: Kefeng Wang Date: Mon, 15 Nov 2021 21:48:48 +0800 Subject: [PATCH 16/16] ARM: Support KFENCE for ARM Add architecture specific implementation details for KFENCE and enable KFENCE on ARM. In particular, this implements the required interface in . KFENCE requires that attributes for pages from its memory pool can individually be set. Therefore, force the kfence pool to be mapped at page granularity. Testing this patch using the testcases in kfence_test.c and all passed with or without ARM_LPAE. Acked-by: Marco Elver Signed-off-by: Kefeng Wang Link: https://lore.kernel.org/linux-arm-kernel/20211115134848.171098-4-wangkefeng.wang@huawei.com/ [port for kfence feature to rpi-5.10.95] Signed-off-by: Marek Szyprowski Signed-off-by: Seung-Woo Kim Change-Id: Ib2afb1c98fb5bf437f68766fe391ac9bc118f6ad --- arch/arm/Kconfig | 1 + arch/arm/include/asm/kfence.h | 53 +++++++++++++++++++++++++++++++++++++++++++ arch/arm/mm/fault.c | 9 ++++++-- 3 files changed, 61 insertions(+), 2 deletions(-) create mode 100644 arch/arm/include/asm/kfence.h diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index a87c79a..99e02d1 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -68,6 +68,7 @@ config ARM select HAVE_ARCH_AUDITSYSCALL if AEABI && !OABI_COMPAT select HAVE_ARCH_BITREVERSE if (CPU_32v7M || CPU_32v7) && !CPU_32v6 select HAVE_ARCH_JUMP_LABEL if !XIP_KERNEL && !CPU_ENDIAN_BE32 && MMU && !PREEMPT_RT + select HAVE_ARCH_KFENCE if MMU && !XIP_KERNEL select HAVE_ARCH_KGDB if !CPU_ENDIAN_BE32 && MMU select HAVE_ARCH_MMAP_RND_BITS if MMU select HAVE_ARCH_SECCOMP diff --git a/arch/arm/include/asm/kfence.h b/arch/arm/include/asm/kfence.h new file mode 100644 index 0000000..7980d0f --- /dev/null +++ b/arch/arm/include/asm/kfence.h @@ -0,0 +1,53 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#ifndef __ASM_ARM_KFENCE_H +#define __ASM_ARM_KFENCE_H + +#include + +#include +#include + +static inline int split_pmd_page(pmd_t *pmd, unsigned long addr) +{ + int i; + unsigned long pfn = PFN_DOWN(__pa(addr)); + pte_t *pte = pte_alloc_one_kernel(&init_mm); + + if (!pte) + return -ENOMEM; + + for (i = 0; i < PTRS_PER_PTE; i++) + set_pte_ext(pte + i, pfn_pte(pfn + i, PAGE_KERNEL), 0); + pmd_populate_kernel(&init_mm, pmd, pte); + + flush_tlb_kernel_range(addr, addr + PMD_SIZE); + return 0; +} + +static inline bool arch_kfence_init_pool(void) +{ + unsigned long addr; + pmd_t *pmd; + + for (addr = (unsigned long)__kfence_pool; is_kfence_address((void *)addr); + addr += PAGE_SIZE) { + pmd = pmd_off_k(addr); + + if (pmd_leaf(*pmd)) { + if (split_pmd_page(pmd, addr & PMD_MASK)) + return false; + } + } + + return true; +} + +static inline bool kfence_protect_page(unsigned long addr, bool protect) +{ + set_memory_valid(addr, 1, !protect); + + return true; +} + +#endif /* __ASM_ARM_KFENCE_H */ diff --git a/arch/arm/mm/fault.c b/arch/arm/mm/fault.c index d050021..54952fc 100644 --- a/arch/arm/mm/fault.c +++ b/arch/arm/mm/fault.c @@ -17,6 +17,7 @@ #include #include #include +#include #include #include @@ -137,10 +138,14 @@ __do_kernel_fault(struct mm_struct *mm, unsigned long addr, unsigned int fsr, /* * No handler, we'll have to terminate things with extreme prejudice. */ - if (addr < PAGE_SIZE) + if (addr < PAGE_SIZE) { msg = "NULL pointer dereference"; - else + } else { + if (kfence_handle_page_fault(addr, is_write_fault(fsr), regs)) + return; + msg = "paging request"; + } die_kernel_fault(msg, mm, addr, fsr, regs); } -- 2.7.4