From 7049cf6496c9aa8e355345a3fbea30861e4d2da8 Mon Sep 17 00:00:00 2001 From: Philip Reames Date: Mon, 2 Mar 2020 13:21:53 -0800 Subject: [PATCH] [BranchAlign] Fix bug w/nop padding for SS manipulation X86 has several instructions which are documented as enabling interrupts exactly one instruction *after* the one which changes the SS segment register. Inserting a nop between these two instructions allows an interrupt to arrive before the execution of the following instruction which changes semantic behaviour. The list of instructions is documented in "Table 24-3. Format of Interruptibility State" in Volume 3c of the Intel manual. They basically all come down to different ways to write to the SS register. Differential Revision: https://reviews.llvm.org/D75359 --- llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp | 26 ++++++++- llvm/test/MC/X86/align-branch-64-system.s | 68 ++++++++++++++++++++++ 2 files changed, 93 insertions(+), 1 deletion(-) create mode 100644 llvm/test/MC/X86/align-branch-64-system.s diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp b/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp index 067748f..ce48ea2 100644 --- a/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp +++ b/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp @@ -373,6 +373,27 @@ bool X86AsmBackend::needAlign(MCObjectStreamer &OS) const { return true; } +/// X86 has certain instructions which enable interrupts exactly one +/// instruction *after* the instruction which stores to SS. Return true if the +/// given instruction has such an interrupt delay slot. +static bool hasInterruptDelaySlot(const MCInst &Inst) { + switch (Inst.getOpcode()) { + case X86::POPSS16: + case X86::POPSS32: + case X86::STI: + return true; + + case X86::MOV16sr: + case X86::MOV32sr: + case X86::MOV64sr: + case X86::MOV16sm: + if (Inst.getOperand(0).getReg() == X86::SS) + return true; + break; + } + return false; +} + /// Check if the instruction operand needs to be aligned. Padding is disabled /// before intruction which may be rewritten by linker(e.g. TLSCALL). bool X86AsmBackend::needAlignInst(const MCInst &Inst) const { @@ -401,7 +422,10 @@ void X86AsmBackend::alignBranchesBegin(MCObjectStreamer &OS, MCFragment *CF = OS.getCurrentFragment(); bool NeedAlignFused = AlignBranchType & X86::AlignBranchFused; - if (NeedAlignFused && isMacroFused(PrevInst, Inst) && CF) { + if (hasInterruptDelaySlot(PrevInst)) { + // If this instruction follows an interrupt enabling instruction with a one + // instruction delay, inserting a nop would change behavior. + } else if (NeedAlignFused && isMacroFused(PrevInst, Inst) && CF) { // Macro fusion actually happens and there is no other fragment inserted // after the previous instruction. NOP can be emitted in PF to align fused // jcc. diff --git a/llvm/test/MC/X86/align-branch-64-system.s b/llvm/test/MC/X86/align-branch-64-system.s new file mode 100644 index 0000000..b62a4e3 --- /dev/null +++ b/llvm/test/MC/X86/align-branch-64-system.s @@ -0,0 +1,68 @@ + # RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu --x86-align-branch-boundary=32 --x86-align-branch=jmp %s | llvm-objdump -d --no-show-raw-insn - | FileCheck %s + + # Exercise cases where we're enabling interrupts with one instruction delay + # and thus can't add a nop in between without changing behavior. + + .text + + # CHECK: 1e: sti + # CHECK: 1f: jmp + .p2align 5 + .rept 30 + int3 + .endr + sti + jmp baz + + # CHECK: 5c: movq %rax, %ss + # CHECK: 5f: jmp + .p2align 5 + .rept 28 + int3 + .endr + movq %rax, %ss + jmp baz + + # CHECK: 9d: movl %esi, %ss + # CHECK: 9f: jmp + .p2align 5 + .rept 29 + int3 + .endr + movl %esi, %ss + jmp baz + + # movw and movl are interchangeable since we're only using the low 16 bits. + # Both are generated as "MOV Sreg,r/m16**" (8E /r), but disassembled as movl + # CHECK: dd: movl %esi, %ss + # CHECK: df: jmp + .p2align 5 + .rept 29 + int3 + .endr + movw %si, %ss + jmp baz + + # CHECK: 11b: movw (%esi), %ss + # CHECK: 11e: jmp + .p2align 5 + .rept 27 + int3 + .endr + movw (%esi), %ss + jmp baz + + # CHECK: 15b: movw (%rsi), %ss + # CHECK: 15d: jmp + .p2align 5 + .rept 27 + int3 + .endr + movw (%rsi), %ss + jmp baz + + + int3 + .section ".text.other" +bar: + retq -- 2.7.4