1 // SPDX-License-Identifier: GPL-2.0
3 * shstk.c - Intel shadow stack support
5 * Copyright (c) 2021, Intel Corporation.
6 * Yu-cheng Yu <yu-cheng.yu@intel.com>
9 #include <linux/sched.h>
10 #include <linux/bitops.h>
11 #include <linux/types.h>
13 #include <linux/mman.h>
14 #include <linux/slab.h>
15 #include <linux/uaccess.h>
16 #include <linux/sched/signal.h>
17 #include <linux/compat.h>
18 #include <linux/sizes.h>
19 #include <linux/user.h>
20 #include <linux/syscalls.h>
22 #include <asm/fpu/xstate.h>
23 #include <asm/fpu/types.h>
24 #include <asm/shstk.h>
25 #include <asm/special_insns.h>
26 #include <asm/fpu/api.h>
27 #include <asm/prctl.h>
29 #define SS_FRAME_SIZE 8
31 static bool features_enabled(unsigned long features)
33 return current->thread.features & features;
36 static void features_set(unsigned long features)
38 current->thread.features |= features;
41 static void features_clr(unsigned long features)
43 current->thread.features &= ~features;
47 * Create a restore token on the shadow stack. A token is always 8-byte
50 static int create_rstor_token(unsigned long ssp, unsigned long *token_addr)
54 /* Token must be aligned */
55 if (!IS_ALIGNED(ssp, 8))
58 addr = ssp - SS_FRAME_SIZE;
61 * SSP is aligned, so reserved bits and mode bit are a zero, just mark
66 if (write_user_shstk_64((u64 __user *)addr, (u64)ssp))
76 * VM_SHADOW_STACK will have a guard page. This helps userspace protect
77 * itself from attacks. The reasoning is as follows:
79 * The shadow stack pointer(SSP) is moved by CALL, RET, and INCSSPQ. The
80 * INCSSP instruction can increment the shadow stack pointer. It is the
81 * shadow stack analog of an instruction like:
85 * However, there is one important difference between an ADD on %rsp
86 * and INCSSP. In addition to modifying SSP, INCSSP also reads from the
87 * memory of the first and last elements that were "popped". It can be
88 * thought of as acting like this:
90 * READ_ONCE(ssp); // read+discard top element on stack
91 * ssp += nr_to_pop * 8; // move the shadow stack
92 * READ_ONCE(ssp-8); // read+discard last popped stack element
94 * The maximum distance INCSSP can move the SSP is 2040 bytes, before
95 * it would read the memory. Therefore a single page gap will be enough
96 * to prevent any operation from shifting the SSP to an adjacent stack,
97 * since it would have to land in the gap at least once, causing a
100 static unsigned long alloc_shstk(unsigned long addr, unsigned long size,
101 unsigned long token_offset, bool set_res_tok)
103 int flags = MAP_ANONYMOUS | MAP_PRIVATE | MAP_ABOVE4G;
104 struct mm_struct *mm = current->mm;
105 unsigned long mapped_addr, unused;
108 flags |= MAP_FIXED_NOREPLACE;
111 mapped_addr = do_mmap(NULL, addr, size, PROT_READ, flags,
112 VM_SHADOW_STACK | VM_WRITE, 0, &unused, NULL);
113 mmap_write_unlock(mm);
115 if (!set_res_tok || IS_ERR_VALUE(mapped_addr))
118 if (create_rstor_token(mapped_addr + token_offset, NULL)) {
119 vm_munmap(mapped_addr, size);
127 static unsigned long adjust_shstk_size(unsigned long size)
130 return PAGE_ALIGN(size);
132 return PAGE_ALIGN(min_t(unsigned long long, rlimit(RLIMIT_STACK), SZ_4G));
135 static void unmap_shadow_stack(u64 base, u64 size)
139 r = vm_munmap(base, size);
142 * mmap_write_lock_killable() failed with -EINTR. This means
143 * the process is about to die and have it's MM cleaned up.
144 * This task shouldn't ever make it back to userspace. In this
145 * case it is ok to leak a shadow stack, so just exit out.
151 * For all other types of vm_munmap() failure, either the
152 * system is out of memory or there is bug.
157 static int shstk_setup(void)
159 struct thread_shstk *shstk = ¤t->thread.shstk;
160 unsigned long addr, size;
162 /* Already enabled */
163 if (features_enabled(ARCH_SHSTK_SHSTK))
166 /* Also not supported for 32 bit and x32 */
167 if (!cpu_feature_enabled(X86_FEATURE_USER_SHSTK) || in_32bit_syscall())
170 size = adjust_shstk_size(0);
171 addr = alloc_shstk(0, size, 0, false);
172 if (IS_ERR_VALUE(addr))
173 return PTR_ERR((void *)addr);
175 fpregs_lock_and_load();
176 wrmsrl(MSR_IA32_PL3_SSP, addr + size);
177 wrmsrl(MSR_IA32_U_CET, CET_SHSTK_EN);
182 features_set(ARCH_SHSTK_SHSTK);
187 void reset_thread_features(void)
189 memset(¤t->thread.shstk, 0, sizeof(struct thread_shstk));
190 current->thread.features = 0;
191 current->thread.features_locked = 0;
194 unsigned long shstk_alloc_thread_stack(struct task_struct *tsk, unsigned long clone_flags,
195 unsigned long stack_size)
197 struct thread_shstk *shstk = &tsk->thread.shstk;
198 unsigned long addr, size;
201 * If shadow stack is not enabled on the new thread, skip any
202 * switch to a new shadow stack.
204 if (!features_enabled(ARCH_SHSTK_SHSTK))
208 * For CLONE_VM, except vfork, the child needs a separate shadow
211 if ((clone_flags & (CLONE_VFORK | CLONE_VM)) != CLONE_VM)
214 size = adjust_shstk_size(stack_size);
215 addr = alloc_shstk(0, size, 0, false);
216 if (IS_ERR_VALUE(addr))
225 static unsigned long get_user_shstk_addr(void)
227 unsigned long long ssp;
229 fpregs_lock_and_load();
231 rdmsrl(MSR_IA32_PL3_SSP, ssp);
238 #define SHSTK_DATA_BIT BIT(63)
240 static int put_shstk_data(u64 __user *addr, u64 data)
242 if (WARN_ON_ONCE(data & SHSTK_DATA_BIT))
246 * Mark the high bit so that the sigframe can't be processed as a
249 if (write_user_shstk_64(addr, data | SHSTK_DATA_BIT))
254 static int get_shstk_data(unsigned long *data, unsigned long __user *addr)
258 if (unlikely(get_user(ldata, addr)))
261 if (!(ldata & SHSTK_DATA_BIT))
264 *data = ldata & ~SHSTK_DATA_BIT;
269 static int shstk_push_sigframe(unsigned long *ssp)
271 unsigned long target_ssp = *ssp;
273 /* Token must be aligned */
274 if (!IS_ALIGNED(target_ssp, 8))
277 *ssp -= SS_FRAME_SIZE;
278 if (put_shstk_data((void __user *)*ssp, target_ssp))
284 static int shstk_pop_sigframe(unsigned long *ssp)
286 struct vm_area_struct *vma;
287 unsigned long token_addr;
288 bool need_to_check_vma;
292 * It is possible for the SSP to be off the end of a shadow stack by 4
293 * or 8 bytes. If the shadow stack is at the start of a page or 4 bytes
294 * before it, it might be this case, so check that the address being
295 * read is actually shadow stack.
297 if (!IS_ALIGNED(*ssp, 8))
300 need_to_check_vma = PAGE_ALIGN(*ssp) == *ssp;
302 if (need_to_check_vma)
303 mmap_read_lock_killable(current->mm);
305 err = get_shstk_data(&token_addr, (unsigned long __user *)*ssp);
309 if (need_to_check_vma) {
310 vma = find_vma(current->mm, *ssp);
311 if (!vma || !(vma->vm_flags & VM_SHADOW_STACK)) {
316 mmap_read_unlock(current->mm);
319 /* Restore SSP aligned? */
320 if (unlikely(!IS_ALIGNED(token_addr, 8)))
323 /* SSP in userspace? */
324 if (unlikely(token_addr >= TASK_SIZE_MAX))
331 if (need_to_check_vma)
332 mmap_read_unlock(current->mm);
336 int setup_signal_shadow_stack(struct ksignal *ksig)
338 void __user *restorer = ksig->ka.sa.sa_restorer;
342 if (!cpu_feature_enabled(X86_FEATURE_USER_SHSTK) ||
343 !features_enabled(ARCH_SHSTK_SHSTK))
349 ssp = get_user_shstk_addr();
353 err = shstk_push_sigframe(&ssp);
357 /* Push restorer address */
358 ssp -= SS_FRAME_SIZE;
359 err = write_user_shstk_64((u64 __user *)ssp, (u64)restorer);
363 fpregs_lock_and_load();
364 wrmsrl(MSR_IA32_PL3_SSP, ssp);
370 int restore_signal_shadow_stack(void)
375 if (!cpu_feature_enabled(X86_FEATURE_USER_SHSTK) ||
376 !features_enabled(ARCH_SHSTK_SHSTK))
379 ssp = get_user_shstk_addr();
383 err = shstk_pop_sigframe(&ssp);
387 fpregs_lock_and_load();
388 wrmsrl(MSR_IA32_PL3_SSP, ssp);
394 void shstk_free(struct task_struct *tsk)
396 struct thread_shstk *shstk = &tsk->thread.shstk;
398 if (!cpu_feature_enabled(X86_FEATURE_USER_SHSTK) ||
399 !features_enabled(ARCH_SHSTK_SHSTK))
403 * When fork() with CLONE_VM fails, the child (tsk) already has a
404 * shadow stack allocated, and exit_thread() calls this function to
405 * free it. In this case the parent (current) and the child share
406 * the same mm struct.
408 if (!tsk->mm || tsk->mm != current->mm)
411 unmap_shadow_stack(shstk->base, shstk->size);
414 static int wrss_control(bool enable)
418 if (!cpu_feature_enabled(X86_FEATURE_USER_SHSTK))
422 * Only enable WRSS if shadow stack is enabled. If shadow stack is not
423 * enabled, WRSS will already be disabled, so don't bother clearing it
426 if (!features_enabled(ARCH_SHSTK_SHSTK))
429 /* Already enabled/disabled? */
430 if (features_enabled(ARCH_SHSTK_WRSS) == enable)
433 fpregs_lock_and_load();
434 rdmsrl(MSR_IA32_U_CET, msrval);
437 features_set(ARCH_SHSTK_WRSS);
438 msrval |= CET_WRSS_EN;
440 features_clr(ARCH_SHSTK_WRSS);
441 if (!(msrval & CET_WRSS_EN))
444 msrval &= ~CET_WRSS_EN;
447 wrmsrl(MSR_IA32_U_CET, msrval);
455 static int shstk_disable(void)
457 if (!cpu_feature_enabled(X86_FEATURE_USER_SHSTK))
460 /* Already disabled? */
461 if (!features_enabled(ARCH_SHSTK_SHSTK))
464 fpregs_lock_and_load();
465 /* Disable WRSS too when disabling shadow stack */
466 wrmsrl(MSR_IA32_U_CET, 0);
467 wrmsrl(MSR_IA32_PL3_SSP, 0);
471 features_clr(ARCH_SHSTK_SHSTK | ARCH_SHSTK_WRSS);
476 SYSCALL_DEFINE3(map_shadow_stack, unsigned long, addr, unsigned long, size, unsigned int, flags)
478 bool set_tok = flags & SHADOW_STACK_SET_TOKEN;
479 unsigned long aligned_size;
481 if (!cpu_feature_enabled(X86_FEATURE_USER_SHSTK))
484 if (flags & ~SHADOW_STACK_SET_TOKEN)
487 /* If there isn't space for a token */
488 if (set_tok && size < 8)
491 if (addr && addr < SZ_4G)
495 * An overflow would result in attempting to write the restore token
496 * to the wrong location. Not catastrophic, but just return the right
497 * error code and block it.
499 aligned_size = PAGE_ALIGN(size);
500 if (aligned_size < size)
503 return alloc_shstk(addr, aligned_size, size, set_tok);
506 long shstk_prctl(struct task_struct *task, int option, unsigned long arg2)
508 unsigned long features = arg2;
510 if (option == ARCH_SHSTK_STATUS) {
511 return put_user(task->thread.features, (unsigned long __user *)arg2);
514 if (option == ARCH_SHSTK_LOCK) {
515 task->thread.features_locked |= features;
519 /* Only allow via ptrace */
520 if (task != current) {
521 if (option == ARCH_SHSTK_UNLOCK && IS_ENABLED(CONFIG_CHECKPOINT_RESTORE)) {
522 task->thread.features_locked &= ~features;
528 /* Do not allow to change locked features */
529 if (features & task->thread.features_locked)
532 /* Only support enabling/disabling one feature at a time. */
533 if (hweight_long(features) > 1)
536 if (option == ARCH_SHSTK_DISABLE) {
537 if (features & ARCH_SHSTK_WRSS)
538 return wrss_control(false);
539 if (features & ARCH_SHSTK_SHSTK)
540 return shstk_disable();
544 /* Handle ARCH_SHSTK_ENABLE */
545 if (features & ARCH_SHSTK_SHSTK)
546 return shstk_setup();
547 if (features & ARCH_SHSTK_WRSS)
548 return wrss_control(true);