commit
a149180fbcf336e97ce4eb2cdc13672727feb94d upstream.
Note: needs to be in a section distinct from Retpolines such that the
Retpoline RET substitution cannot possibly use immediate jumps.
ORC unwinding for zen_untrain_ret() and __x86_return_thunk() is a
little tricky but works due to the fact that zen_untrain_ret() doesn't
have any stack ops and as such will emit a single ORC entry at the
start (+0x3f).
Meanwhile, unwinding an IP, including the __x86_return_thunk() one
(+0x40) will search for the largest ORC entry smaller or equal to the
IP, these will find the one ORC entry (+0x3f) and all works.
[ Alexandre: SVM part. ]
[ bp: Build fix, massages. ]
Suggested-by: Andrew Cooper <Andrew.Cooper3@citrix.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Borislav Petkov <bp@suse.de>
Reviewed-by: Josh Poimboeuf <jpoimboe@kernel.org>
Signed-off-by: Borislav Petkov <bp@suse.de>
[cascardo: conflicts at arch/x86/entry/entry_64_compat.S]
[cascardo: there is no ANNOTATE_NOENDBR]
[cascardo: objtool commit
34c861e806478ac2ea4032721defbf1d6967df08 missing]
[cascardo: conflict fixup]
Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@canonical.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp
SYM_INNER_LABEL(entry_SYSCALL_64_safe_stack, SYM_L_GLOBAL)
+ UNTRAIN_RET
/* Construct struct pt_regs on stack */
pushq $__USER_DS /* pt_regs->ss */
pushq %rdi /* Stash user RDI */
swapgs /* to kernel GS */
SWITCH_TO_KERNEL_CR3 scratch_reg=%rdi /* to kernel CR3 */
+ UNTRAIN_RET
movq PER_CPU_VAR(espfix_waddr), %rdi
movq %rax, (0*8)(%rdi) /* user RAX */
* be retrieved from a kernel internal table.
*/
SAVE_AND_SWITCH_TO_KERNEL_CR3 scratch_reg=%rax save_reg=%r14
+ UNTRAIN_RET
/*
* Handling GSBASE depends on the availability of FSGSBASE.
FENCE_SWAPGS_USER_ENTRY
/* We have user CR3. Change to kernel CR3. */
SWITCH_TO_KERNEL_CR3 scratch_reg=%rax
+ UNTRAIN_RET
leaq 8(%rsp), %rdi /* arg0 = pt_regs pointer */
.Lerror_entry_from_usermode_after_swapgs:
SWAPGS
FENCE_SWAPGS_USER_ENTRY
SWITCH_TO_KERNEL_CR3 scratch_reg=%rax
+ UNTRAIN_RET
/*
* Pretend that the exception came from user mode: set up pt_regs
movq %rsp, %rdx
movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp
UNWIND_HINT_IRET_REGS base=%rdx offset=8
+ UNTRAIN_RET
pushq 5*8(%rdx) /* pt_regs->ss */
pushq 4*8(%rdx) /* pt_regs->rsp */
pushq 3*8(%rdx) /* pt_regs->flags */
#include <asm/irqflags.h>
#include <asm/asm.h>
#include <asm/smap.h>
+#include <asm/nospec-branch.h>
#include <linux/linkage.h>
#include <linux/err.h>
pushq $__USER32_CS /* pt_regs->cs */
pushq $0 /* pt_regs->ip = 0 (placeholder) */
SYM_INNER_LABEL(entry_SYSENTER_compat_after_hwframe, SYM_L_GLOBAL)
+ UNTRAIN_RET
/*
* User tracing code (ptrace or signal handlers) might assume that
movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp
SYM_INNER_LABEL(entry_SYSCALL_compat_safe_stack, SYM_L_GLOBAL)
+ UNTRAIN_RET
/* Construct struct pt_regs on stack */
pushq $__USER32_DS /* pt_regs->ss */
pushq (%rdi) /* pt_regs->di */
.Lint80_keep_stack:
+ UNTRAIN_RET
pushq %rsi /* pt_regs->si */
xorl %esi, %esi /* nospec si */
pushq %rdx /* pt_regs->dx */
#define X86_FEATURE_RETPOLINE (11*32+12) /* "" Generic Retpoline mitigation for Spectre variant 2 */
#define X86_FEATURE_RETPOLINE_LFENCE (11*32+13) /* "" Use LFENCE for Spectre variant 2 */
#define X86_FEATURE_RETHUNK (11*32+14) /* "" Use REturn THUNK */
+#define X86_FEATURE_UNRET (11*32+15) /* "" AMD BTB untrain return */
/* Intel-defined CPU features, CPUID level 0x00000007:1 (EAX), word 12 */
#define X86_FEATURE_AVX_VNNI (12*32+ 4) /* AVX VNNI instructions */
#else
# define DISABLE_RETPOLINE ((1 << (X86_FEATURE_RETPOLINE & 31)) | \
(1 << (X86_FEATURE_RETPOLINE_LFENCE & 31)) | \
- (1 << (X86_FEATURE_RETHUNK & 31)))
+ (1 << (X86_FEATURE_RETHUNK & 31)) | \
+ (1 << (X86_FEATURE_UNRET & 31)))
#endif
/* Force disable because it's broken beyond repair */
#endif
.endm
+/*
+ * Mitigate RETBleed for AMD/Hygon Zen uarch. Requires KERNEL CR3 because the
+ * return thunk isn't mapped into the userspace tables (then again, AMD
+ * typically has NO_MELTDOWN).
+ *
+ * Doesn't clobber any registers but does require a stable stack.
+ *
+ * As such, this must be placed after every *SWITCH_TO_KERNEL_CR3 at a point
+ * where we have a stack but before any RET instruction.
+ */
+.macro UNTRAIN_RET
+#ifdef CONFIG_RETPOLINE
+ ALTERNATIVE "", "call zen_untrain_ret", X86_FEATURE_UNRET
+#endif
+.endm
+
#else /* __ASSEMBLY__ */
#define ANNOTATE_RETPOLINE_SAFE \
".popsection\n\t"
extern void __x86_return_thunk(void);
+extern void zen_untrain_ret(void);
#ifdef CONFIG_RETPOLINE
#ifdef CONFIG_RETPOLINE
__indirect_thunk_start = .;
- *(.text.__x86.indirect_thunk)
+ *(.text.__x86.*)
__indirect_thunk_end = .;
#endif
} :text =0xcccc
#endif
/*
+ * Mitigate RETBleed for AMD/Hygon Zen uarch. RET should be
+ * untrained as soon as we exit the VM and are back to the
+ * kernel. This should be done before re-enabling interrupts
+ * because interrupt handlers won't sanitize 'ret' if the return is
+ * from the kernel.
+ */
+ UNTRAIN_RET
+
+ /*
* Clear all general purpose registers except RSP and RAX to prevent
* speculative use of the guest's values, even those that are reloaded
* via the stack. In theory, an L1 cache miss when restoring registers
FILL_RETURN_BUFFER %_ASM_AX, RSB_CLEAR_LOOPS, X86_FEATURE_RETPOLINE
#endif
+ /*
+ * Mitigate RETBleed for AMD/Hygon Zen uarch. RET should be
+ * untrained as soon as we exit the VM and are back to the
+ * kernel. This should be done before re-enabling interrupts
+ * because interrupt handlers won't sanitize RET if the return is
+ * from the kernel.
+ */
+ UNTRAIN_RET
+
pop %_ASM_BX
#ifdef CONFIG_X86_64
* This function name is magical and is used by -mfunction-return=thunk-extern
* for the compiler to generate JMPs to it.
*/
-SYM_CODE_START(__x86_return_thunk)
- UNWIND_HINT_EMPTY
+ .section .text.__x86.return_thunk
+
+/*
+ * Safety details here pertain to the AMD Zen{1,2} microarchitecture:
+ * 1) The RET at __x86_return_thunk must be on a 64 byte boundary, for
+ * alignment within the BTB.
+ * 2) The instruction at zen_untrain_ret must contain, and not
+ * end with, the 0xc3 byte of the RET.
+ * 3) STIBP must be enabled, or SMT disabled, to prevent the sibling thread
+ * from re-poisioning the BTB prediction.
+ */
+ .align 64
+ .skip 63, 0xcc
+SYM_FUNC_START_NOALIGN(zen_untrain_ret);
+
+ /*
+ * As executed from zen_untrain_ret, this is:
+ *
+ * TEST $0xcc, %bl
+ * LFENCE
+ * JMP __x86_return_thunk
+ *
+ * Executing the TEST instruction has a side effect of evicting any BTB
+ * prediction (potentially attacker controlled) attached to the RET, as
+ * __x86_return_thunk + 1 isn't an instruction boundary at the moment.
+ */
+ .byte 0xf6
+
+ /*
+ * As executed from __x86_return_thunk, this is a plain RET.
+ *
+ * As part of the TEST above, RET is the ModRM byte, and INT3 the imm8.
+ *
+ * We subsequently jump backwards and architecturally execute the RET.
+ * This creates a correct BTB prediction (type=ret), but in the
+ * meantime we suffer Straight Line Speculation (because the type was
+ * no branch) which is halted by the INT3.
+ *
+ * With SMT enabled and STIBP active, a sibling thread cannot poison
+ * RET's prediction to a type of its choice, but can evict the
+ * prediction due to competitive sharing. If the prediction is
+ * evicted, __x86_return_thunk will suffer Straight Line Speculation
+ * which will be contained safely by the INT3.
+ */
+SYM_INNER_LABEL(__x86_return_thunk, SYM_L_GLOBAL)
ret
int3
SYM_CODE_END(__x86_return_thunk)
-__EXPORT_THUNK(__x86_return_thunk)
+ /*
+ * Ensure the TEST decoding / BTB invalidation is complete.
+ */
+ lfence
+
+ /*
+ * Jump back and execute the RET in the middle of the TEST instruction.
+ * INT3 is for SLS protection.
+ */
+ jmp __x86_return_thunk
+ int3
+SYM_FUNC_END(zen_untrain_ret)
+__EXPORT_THUNK(zen_untrain_ret)
+
+EXPORT_SYMBOL(__x86_return_thunk)
annotate_call_site(file, insn, false);
}
-static void add_return_call(struct objtool_file *file, struct instruction *insn)
+static void add_return_call(struct objtool_file *file, struct instruction *insn, bool add)
{
/*
* Return thunk tail calls are really just returns in disguise,
insn->retpoline_safe = true;
/* Skip the non-text sections, specially .discard ones */
- if (insn->sec->text)
+ if (add && insn->sec->text)
list_add_tail(&insn->call_node, &file->return_thunk_list);
}
add_retpoline_call(file, insn);
continue;
} else if (reloc->sym->return_thunk) {
- add_return_call(file, insn);
+ add_return_call(file, insn, true);
continue;
} else if (insn->func) {
/* internal or external sibling call (with reloc) */
insn->jump_dest = find_insn(file, dest_sec, dest_off);
if (!insn->jump_dest) {
+ struct symbol *sym = find_symbol_by_offset(dest_sec, dest_off);
/*
* This is a special case where an alt instruction
if (!strcmp(insn->sec->name, ".altinstr_replacement"))
continue;
+ /*
+ * This is a special case for zen_untrain_ret().
+ * It jumps to __x86_return_thunk(), but objtool
+ * can't find the thunk's starting RET
+ * instruction, because the RET is also in the
+ * middle of another instruction. Objtool only
+ * knows about the outer instruction.
+ */
+ if (sym && sym->return_thunk) {
+ add_return_call(file, insn, false);
+ continue;
+ }
+
WARN_FUNC("can't find jump dest instruction at %s+0x%lx",
insn->sec, insn->offset, dest_sec->name,
dest_off);