x86: Add magic AMD return-thunk

author Peter Zijlstra <peterz@infradead.org>

Tue, 14 Jun 2022 21:15:48 +0000 (23:15 +0200)

committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Sat, 23 Jul 2022 10:54:01 +0000 (12:54 +0200)
author Peter Zijlstra <peterz@infradead.org>
Tue, 14 Jun 2022 21:15:48 +0000 (23:15 +0200)
committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Sat, 23 Jul 2022 10:54:01 +0000 (12:54 +0200)
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S

index a63d097..cf08173 100644 (file)
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -94,6 +94,7 @@ SYM_CODE_START(entry_SYSCALL_64)
         movq    PER_CPU_VAR(cpu_current_top_of_stack), %rsp
  
  SYM_INNER_LABEL(entry_SYSCALL_64_safe_stack, SYM_L_GLOBAL)
+       UNTRAIN_RET
  
         /* Construct struct pt_regs on stack */
         pushq   $__USER_DS                              /* pt_regs->ss */
@@ -688,6 +689,7 @@ native_irq_return_ldt:
         pushq   %rdi                            /* Stash user RDI */
         swapgs                                  /* to kernel GS */
         SWITCH_TO_KERNEL_CR3 scratch_reg=%rdi   /* to kernel CR3 */
+       UNTRAIN_RET
  
         movq    PER_CPU_VAR(espfix_waddr), %rdi
         movq    %rax, (0*8)(%rdi)               /* user RAX */
@@ -882,6 +884,7 @@ SYM_CODE_START_LOCAL(paranoid_entry)
          * be retrieved from a kernel internal table.
          */
         SAVE_AND_SWITCH_TO_KERNEL_CR3 scratch_reg=%rax save_reg=%r14
+       UNTRAIN_RET
  
         /*
          * Handling GSBASE depends on the availability of FSGSBASE.
@@ -992,6 +995,7 @@ SYM_CODE_START_LOCAL(error_entry)
         FENCE_SWAPGS_USER_ENTRY
         /* We have user CR3.  Change to kernel CR3. */
         SWITCH_TO_KERNEL_CR3 scratch_reg=%rax
+       UNTRAIN_RET
  
         leaq    8(%rsp), %rdi                   /* arg0 = pt_regs pointer */
  .Lerror_entry_from_usermode_after_swapgs:
@@ -1044,6 +1048,7 @@ SYM_CODE_START_LOCAL(error_entry)
         SWAPGS
         FENCE_SWAPGS_USER_ENTRY
         SWITCH_TO_KERNEL_CR3 scratch_reg=%rax
+       UNTRAIN_RET
  
         /*
          * Pretend that the exception came from user mode: set up pt_regs
@@ -1138,6 +1143,7 @@ SYM_CODE_START(asm_exc_nmi)
         movq    %rsp, %rdx
         movq    PER_CPU_VAR(cpu_current_top_of_stack), %rsp
         UNWIND_HINT_IRET_REGS base=%rdx offset=8
+       UNTRAIN_RET
         pushq   5*8(%rdx)       /* pt_regs->ss */
         pushq   4*8(%rdx)       /* pt_regs->rsp */
         pushq   3*8(%rdx)       /* pt_regs->flags */
diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S

index 0051cf5..007f3a1 100644 (file)
--- a/arch/x86/entry/entry_64_compat.S
+++ b/arch/x86/entry/entry_64_compat.S
@@ -14,6 +14,7 @@
  #include <asm/irqflags.h>
  #include <asm/asm.h>
  #include <asm/smap.h>
+#include <asm/nospec-branch.h>
  #include <linux/linkage.h>
  #include <linux/err.h>
  
@@ -71,6 +72,7 @@ SYM_CODE_START(entry_SYSENTER_compat)
         pushq   $__USER32_CS            /* pt_regs->cs */
         pushq   $0                      /* pt_regs->ip = 0 (placeholder) */
  SYM_INNER_LABEL(entry_SYSENTER_compat_after_hwframe, SYM_L_GLOBAL)
+       UNTRAIN_RET
  
         /*
          * User tracing code (ptrace or signal handlers) might assume that
@@ -211,6 +213,7 @@ SYM_CODE_START(entry_SYSCALL_compat)
         movq    PER_CPU_VAR(cpu_current_top_of_stack), %rsp
  
  SYM_INNER_LABEL(entry_SYSCALL_compat_safe_stack, SYM_L_GLOBAL)
+       UNTRAIN_RET
  
         /* Construct struct pt_regs on stack */
         pushq   $__USER32_DS            /* pt_regs->ss */
@@ -377,6 +380,7 @@ SYM_CODE_START(entry_INT80_compat)
         pushq   (%rdi)                  /* pt_regs->di */
  .Lint80_keep_stack:
  
+       UNTRAIN_RET
         pushq   %rsi                    /* pt_regs->si */
         xorl    %esi, %esi              /* nospec   si */
         pushq   %rdx                    /* pt_regs->dx */
diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h

index b1fd846..b7218d5 100644 (file)
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -299,6 +299,7 @@
  #define X86_FEATURE_RETPOLINE          (11*32+12) /* "" Generic Retpoline mitigation for Spectre variant 2 */
  #define X86_FEATURE_RETPOLINE_LFENCE   (11*32+13) /* "" Use LFENCE for Spectre variant 2 */
  #define X86_FEATURE_RETHUNK            (11*32+14) /* "" Use REturn THUNK */
+#define X86_FEATURE_UNRET              (11*32+15) /* "" AMD BTB untrain return */
  
  /* Intel-defined CPU features, CPUID level 0x00000007:1 (EAX), word 12 */
  #define X86_FEATURE_AVX_VNNI           (12*32+ 4) /* AVX VNNI instructions */
diff --git a/arch/x86/include/asm/disabled-features.h b/arch/x86/include/asm/disabled-features.h

index d7c6e64..1905ac5 100644 (file)
--- a/arch/x86/include/asm/disabled-features.h
+++ b/arch/x86/include/asm/disabled-features.h
@@ -61,7 +61,8 @@
  #else
  # define DISABLE_RETPOLINE     ((1 << (X86_FEATURE_RETPOLINE & 31)) | \
                                  (1 << (X86_FEATURE_RETPOLINE_LFENCE & 31)) | \
-                                (1 << (X86_FEATURE_RETHUNK & 31)))
+                                (1 << (X86_FEATURE_RETHUNK & 31)) | \
+                                (1 << (X86_FEATURE_UNRET & 31)))
  #endif
  
  /* Force disable because it's broken beyond repair */
diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h

index 2b0d71f..3e61777 100644 (file)
--- a/arch/x86/include/asm/nospec-branch.h
+++ b/arch/x86/include/asm/nospec-branch.h
@@ -112,6 +112,22 @@
  #endif
  .endm
  
+/*
+ * Mitigate RETBleed for AMD/Hygon Zen uarch. Requires KERNEL CR3 because the
+ * return thunk isn't mapped into the userspace tables (then again, AMD
+ * typically has NO_MELTDOWN).
+ *
+ * Doesn't clobber any registers but does require a stable stack.
+ *
+ * As such, this must be placed after every *SWITCH_TO_KERNEL_CR3 at a point
+ * where we have a stack but before any RET instruction.
+ */
+.macro UNTRAIN_RET
+#ifdef CONFIG_RETPOLINE
+       ALTERNATIVE "", "call zen_untrain_ret", X86_FEATURE_UNRET
+#endif
+.endm
+
  #else /* __ASSEMBLY__ */
  
  #define ANNOTATE_RETPOLINE_SAFE                                        \
@@ -121,6 +137,7 @@
         ".popsection\n\t"
  
  extern void __x86_return_thunk(void);
+extern void zen_untrain_ret(void);
  
  #ifdef CONFIG_RETPOLINE
  
diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S

index a99d016..c1efcd1 100644 (file)
--- a/arch/x86/kernel/vmlinux.lds.S
+++ b/arch/x86/kernel/vmlinux.lds.S
@@ -142,7 +142,7 @@ SECTIONS
  
  #ifdef CONFIG_RETPOLINE
                 __indirect_thunk_start = .;
-               *(.text.__x86.indirect_thunk)
+               *(.text.__x86.*)
                 __indirect_thunk_end = .;
  #endif
         } :text =0xcccc
diff --git a/arch/x86/kvm/svm/vmenter.S b/arch/x86/kvm/svm/vmenter.S

index dfaeb47..723f853 100644 (file)
--- a/arch/x86/kvm/svm/vmenter.S
+++ b/arch/x86/kvm/svm/vmenter.S
@@ -111,6 +111,15 @@ SYM_FUNC_START(__svm_vcpu_run)
  #endif
  
         /*
+        * Mitigate RETBleed for AMD/Hygon Zen uarch. RET should be
+        * untrained as soon as we exit the VM and are back to the
+        * kernel. This should be done before re-enabling interrupts
+        * because interrupt handlers won't sanitize 'ret' if the return is
+        * from the kernel.
+        */
+       UNTRAIN_RET
+
+       /*
          * Clear all general purpose registers except RSP and RAX to prevent
          * speculative use of the guest's values, even those that are reloaded
          * via the stack.  In theory, an L1 cache miss when restoring registers
@@ -190,6 +199,15 @@ SYM_FUNC_START(__svm_sev_es_vcpu_run)
         FILL_RETURN_BUFFER %_ASM_AX, RSB_CLEAR_LOOPS, X86_FEATURE_RETPOLINE
  #endif
  
+       /*
+        * Mitigate RETBleed for AMD/Hygon Zen uarch. RET should be
+        * untrained as soon as we exit the VM and are back to the
+        * kernel. This should be done before re-enabling interrupts
+        * because interrupt handlers won't sanitize RET if the return is
+        * from the kernel.
+        */
+       UNTRAIN_RET
+
         pop %_ASM_BX
  
  #ifdef CONFIG_X86_64
diff --git a/arch/x86/lib/retpoline.S b/arch/x86/lib/retpoline.S

index 01667ea..807f674 100644 (file)
--- a/arch/x86/lib/retpoline.S
+++ b/arch/x86/lib/retpoline.S
@@ -71,10 +71,67 @@ SYM_CODE_END(__x86_indirect_thunk_array)
   * This function name is magical and is used by -mfunction-return=thunk-extern
   * for the compiler to generate JMPs to it.
   */
-SYM_CODE_START(__x86_return_thunk)
-       UNWIND_HINT_EMPTY
+       .section .text.__x86.return_thunk
+
+/*
+ * Safety details here pertain to the AMD Zen{1,2} microarchitecture:
+ * 1) The RET at __x86_return_thunk must be on a 64 byte boundary, for
+ *    alignment within the BTB.
+ * 2) The instruction at zen_untrain_ret must contain, and not
+ *    end with, the 0xc3 byte of the RET.
+ * 3) STIBP must be enabled, or SMT disabled, to prevent the sibling thread
+ *    from re-poisioning the BTB prediction.
+ */
+       .align 64
+       .skip 63, 0xcc
+SYM_FUNC_START_NOALIGN(zen_untrain_ret);
+
+       /*
+        * As executed from zen_untrain_ret, this is:
+        *
+        *   TEST $0xcc, %bl
+        *   LFENCE
+        *   JMP __x86_return_thunk
+        *
+        * Executing the TEST instruction has a side effect of evicting any BTB
+        * prediction (potentially attacker controlled) attached to the RET, as
+        * __x86_return_thunk + 1 isn't an instruction boundary at the moment.
+        */
+       .byte   0xf6
+
+       /*
+        * As executed from __x86_return_thunk, this is a plain RET.
+        *
+        * As part of the TEST above, RET is the ModRM byte, and INT3 the imm8.
+        *
+        * We subsequently jump backwards and architecturally execute the RET.
+        * This creates a correct BTB prediction (type=ret), but in the
+        * meantime we suffer Straight Line Speculation (because the type was
+        * no branch) which is halted by the INT3.
+        *
+        * With SMT enabled and STIBP active, a sibling thread cannot poison
+        * RET's prediction to a type of its choice, but can evict the
+        * prediction due to competitive sharing. If the prediction is
+        * evicted, __x86_return_thunk will suffer Straight Line Speculation
+        * which will be contained safely by the INT3.
+        */
+SYM_INNER_LABEL(__x86_return_thunk, SYM_L_GLOBAL)
         ret
         int3
  SYM_CODE_END(__x86_return_thunk)
  
-__EXPORT_THUNK(__x86_return_thunk)
+       /*
+        * Ensure the TEST decoding / BTB invalidation is complete.
+        */
+       lfence
+
+       /*
+        * Jump back and execute the RET in the middle of the TEST instruction.
+        * INT3 is for SLS protection.
+        */
+       jmp __x86_return_thunk
+       int3
+SYM_FUNC_END(zen_untrain_ret)
+__EXPORT_THUNK(zen_untrain_ret)
+
+EXPORT_SYMBOL(__x86_return_thunk)
diff --git a/tools/objtool/check.c b/tools/objtool/check.c

index a955cf4..a90f0d6 100644 (file)
--- a/tools/objtool/check.c
+++ b/tools/objtool/check.c
@@ -1145,7 +1145,7 @@ static void add_retpoline_call(struct objtool_file *file, struct instruction *in
         annotate_call_site(file, insn, false);
  }
  
-static void add_return_call(struct objtool_file *file, struct instruction *insn)
+static void add_return_call(struct objtool_file *file, struct instruction *insn, bool add)
  {
         /*
          * Return thunk tail calls are really just returns in disguise,
@@ -1155,7 +1155,7 @@ static void add_return_call(struct objtool_file *file, struct instruction *insn)
         insn->retpoline_safe = true;
  
         /* Skip the non-text sections, specially .discard ones */
-       if (insn->sec->text)
+       if (add && insn->sec->text)
                 list_add_tail(&insn->call_node, &file->return_thunk_list);
  }
  
@@ -1184,7 +1184,7 @@ static int add_jump_destinations(struct objtool_file *file)
                         add_retpoline_call(file, insn);
                         continue;
                 } else if (reloc->sym->return_thunk) {
-                       add_return_call(file, insn);
+                       add_return_call(file, insn, true);
                         continue;
                 } else if (insn->func) {
                         /* internal or external sibling call (with reloc) */
@@ -1201,6 +1201,7 @@ static int add_jump_destinations(struct objtool_file *file)
  
                 insn->jump_dest = find_insn(file, dest_sec, dest_off);
                 if (!insn->jump_dest) {
+                       struct symbol *sym = find_symbol_by_offset(dest_sec, dest_off);
  
                         /*
                          * This is a special case where an alt instruction
@@ -1210,6 +1211,19 @@ static int add_jump_destinations(struct objtool_file *file)
                         if (!strcmp(insn->sec->name, ".altinstr_replacement"))
                                 continue;
  
+                       /*
+                        * This is a special case for zen_untrain_ret().
+                        * It jumps to __x86_return_thunk(), but objtool
+                        * can't find the thunk's starting RET
+                        * instruction, because the RET is also in the
+                        * middle of another instruction.  Objtool only
+                        * knows about the outer instruction.
+                        */
+                       if (sym && sym->return_thunk) {
+                               add_return_call(file, insn, false);
+                               continue;
+                       }
+
                         WARN_FUNC("can't find jump dest instruction at %s+0x%lx",
                                   insn->sec, insn->offset, dest_sec->name,
                                   dest_off);
author	Peter Zijlstra <peterz@infradead.org>
	Tue, 14 Jun 2022 21:15:48 +0000 (23:15 +0200)
committer	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	Sat, 23 Jul 2022 10:54:01 +0000 (12:54 +0200)
arch/x86/entry/entry_64.S		patch \| blob \| history
arch/x86/entry/entry_64_compat.S		patch \| blob \| history
arch/x86/include/asm/cpufeatures.h		patch \| blob \| history
arch/x86/include/asm/disabled-features.h		patch \| blob \| history
arch/x86/include/asm/nospec-branch.h		patch \| blob \| history
arch/x86/kernel/vmlinux.lds.S		patch \| blob \| history
arch/x86/kvm/svm/vmenter.S		patch \| blob \| history
arch/x86/lib/retpoline.S		patch \| blob \| history
tools/objtool/check.c		patch \| blob \| history