Emit section containing metadata on function stack sizes
-.. option:: -fstack-clash-protection, -fno-stack-clash-protection
-
-Instrument stack allocation to prevent stack clash attacks (x86, non-Windows only).
-
.. option:: -fstandalone-debug, -fno-limit-debug-info, -fno-standalone-debug
Emit full debug info for all types used by the program
------------------
-- -fstack-clash-protection will provide a protection against the stack clash
- attack for x86 architecture through automatic probing of each page of
- allocated stack.
-
Deprecated Compiler Flags
-------------------------
CODEGENOPT(EnableSegmentedStacks , 1, 0) ///< Set when -fsplit-stack is enabled.
CODEGENOPT(NoInlineLineTables, 1, 0) ///< Whether debug info should contain
///< inline line tables.
-CODEGENOPT(StackClashProtector, 1, 0) ///< Set when -fstack-clash-protection is enabled.
CODEGENOPT(NoImplicitFloat , 1, 0) ///< Set when -mno-implicit-float is enabled.
CODEGENOPT(NoInfsFPMath , 1, 0) ///< Assume FP arguments, results not +-Inf.
CODEGENOPT(NoSignedZeros , 1, 0) ///< Allow ignoring the signedness of FP zero
let CategoryName = "Inline Assembly Issue" in {
def err_asm_invalid_type_in_input : Error<
"invalid type %0 in asm input for constraint '%1'">;
-
- def warn_stack_clash_protection_inline_asm : Warning<
- "Unable to protect inline asm that clobbers stack pointer against stack clash">,
- InGroup<DiagGroup<"stack-protector">>;
}
// Sema && Serialization
StringRef getNormalizedGCCRegisterName(StringRef Name,
bool ReturnCanonical = false) const;
- virtual bool isSPRegName(StringRef) const { return false; }
-
/// Extracts a register from the passed constraint (if it is a
/// single-register constraint) and the asm label expression related to a
/// variable in the input or output list of an inline asm statement.
HelpText<"Enable stack protectors">;
def stack_protector_buffer_size : Separate<["-"], "stack-protector-buffer-size">,
HelpText<"Lower bound for a buffer to be considered for stack protection">;
-def stack_clash_protection : Separate<["-"], "stack-clash-protection">,
- HelpText<"Enable stack clash protection">;
def fvisibility : Separate<["-"], "fvisibility">,
HelpText<"Default type and symbol visibility">;
def ftype_visibility : Separate<["-"], "ftype-visibility">,
def fsplit_stack : Flag<["-"], "fsplit-stack">, Group<f_Group>;
def fstack_protector_all : Flag<["-"], "fstack-protector-all">, Group<f_Group>,
HelpText<"Enable stack protectors for all functions">;
-def fstack_clash_protection : Flag<["-"], "fstack-clash-protection">, Group<f_Group>,
- HelpText<"Enable stack clash protection">;
-def fnostack_clash_protection : Flag<["-"], "fnostack-clash-protection">, Group<f_Group>,
- HelpText<"Disable stack clash protection">;
def fstack_protector_strong : Flag<["-"], "fstack-protector-strong">, Group<f_Group>,
HelpText<"Enable stack protectors for some functions vulnerable to stack smashing. "
"Compared to -fstack-protector, this uses a stronger heuristic "
ArrayRef<TargetInfo::AddlRegName> getGCCAddlRegNames() const override;
- bool isSPRegName(StringRef RegName) const override {
- return RegName.equals("esp") || RegName.equals("rsp");
- }
-
bool validateCpuSupports(StringRef Name) const override;
bool validateCpuIs(StringRef Name) const override;
if (Clobber == "memory")
ReadOnly = ReadNone = false;
- else if (Clobber != "cc") {
+ else if (Clobber != "cc")
Clobber = getTarget().getNormalizedGCCRegisterName(Clobber);
- if (CGM.getCodeGenOpts().StackClashProtector &&
- getTarget().isSPRegName(Clobber)) {
- CGM.getDiags().Report(S.getAsmLoc(),
- diag::warn_stack_clash_protection_inline_asm);
- }
- }
if (!Constraints.empty())
Constraints += ',';
if (CodeGenOpts.UnwindTables)
B.addAttribute(llvm::Attribute::UWTable);
- if (CodeGenOpts.StackClashProtector)
- B.addAttribute("probe-stack", "inline-asm");
-
if (!hasUnwindExceptions(LangOpts))
B.addAttribute(llvm::Attribute::NoUnwind);
}
}
-static void RenderSCPOptions(const ToolChain &TC, const ArgList &Args,
- ArgStringList &CmdArgs) {
- const llvm::Triple &EffectiveTriple = TC.getEffectiveTriple();
-
- if (!EffectiveTriple.isOSLinux())
- return;
-
- if (!EffectiveTriple.isX86())
- return;
-
- if (Args.hasFlag(options::OPT_fstack_clash_protection,
- options::OPT_fnostack_clash_protection, false))
- CmdArgs.push_back("-stack-clash-protection");
-}
-
static void RenderTrivialAutoVarInitOptions(const Driver &D,
const ToolChain &TC,
const ArgList &Args,
CmdArgs.push_back(Args.MakeArgString("-mspeculative-load-hardening"));
RenderSSPOptions(TC, Args, CmdArgs, KernelOrKext);
- RenderSCPOptions(TC, Args, CmdArgs);
RenderTrivialAutoVarInitOptions(D, TC, Args, CmdArgs);
// Translate -mstackrealign
Opts.NoStackArgProbe = Args.hasArg(OPT_mno_stack_arg_probe);
- Opts.StackClashProtector = Args.hasArg(OPT_stack_clash_protection);
-
if (Arg *A = Args.getLastArg(OPT_fobjc_dispatch_method_EQ)) {
StringRef Name = A->getValue();
unsigned Method = llvm::StringSwitch<unsigned>(Name)
+++ /dev/null
-// check interaction between -fstack-clash-protection and dynamic allocation schemes
-// RUN: %clang -target x86_64 -O0 -o %t.out %s -fstack-clash-protection && %t.out
-
-int large_stack() __attribute__((noinline));
-
-int large_stack() {
- int stack[20000], i;
- for (i = 0; i < sizeof(stack) / sizeof(int); ++i)
- stack[i] = i;
- return stack[1];
-}
-
-int main(int argc, char **argv) {
- int volatile static_mem[8000];
- for (unsigned i = 0; i < argc * sizeof(static_mem) / sizeof(static_mem[0]); ++i)
- static_mem[i] = argc * i;
-
- int vla[argc];
- __builtin_memset(&vla[0], 0, argc);
-
- int index = large_stack();
-
- // also check allocation of 0 size
- volatile void *mem = __builtin_alloca(argc - 1);
-
- int volatile *dyn_mem = __builtin_alloca(sizeof(static_mem) * argc);
- for (unsigned i = 0; i < argc * sizeof(static_mem) / sizeof(static_mem[0]); ++i)
- dyn_mem[i] = argc * i;
-
- return static_mem[(7999 * argc) / 2] - dyn_mem[(7999 * argc) / 2] + vla[argc - index];
-}
+++ /dev/null
-// RUN: %clang -target i386-unknown-linux -fstack-clash-protection -### %s 2>&1 | FileCheck %s -check-prefix=SCP-i386
-// RUN: %clang -target i386-unknown-linux -fnostack-clash-protection -fstack-clash-protection -### %s 2>&1 | FileCheck %s -check-prefix=SCP-i386
-// RUN: %clang -target i386-unknown-linux -fstack-clash-protection -fnostack-clash-protection -### %s 2>&1 | FileCheck %s -check-prefix=SCP-i386-NO
-// SCP-i386: "-stack-clash-protection"
-// SCP-i386-NO-NOT: "-stack-clash-protection"
-
-// RUN: %clang -target x86_64-scei-linux -fstack-clash-protection -### %s 2>&1 | FileCheck %s -check-prefix=SCP-x86
-// SCP-x86: "-stack-clash-protection"
-
-// RUN: %clang -target armv7k-apple-linux -fstack-clash-protection -### %s 2>&1 | FileCheck %s -check-prefix=SCP-armv7
-// SCP-armv7-NOT: "-stack-clash-protection"
-// SCP-armv7: argument unused during compilation: '-fstack-clash-protection'
-
-// RUN: %clang -target x86_64-unknown-linux -fstack-clash-protection -c %s 2>&1 | FileCheck %s -check-prefix=SCP-warn
-// SCP-warn: warning: Unable to protect inline asm that clobbers stack pointer against stack clash
-
-// RUN: %clang -target x86_64-pc-unknown-linux -fstack-clash-protection -S -emit-llvm -o- %s | FileCheck %s -check-prefix=SCP-ll-linux64
-// SCP-ll-linux64: attributes {{.*}} "probe-stack"="inline-asm"
-
-// RUN: %clang -target x86_64-pc-windows-msvc -fstack-clash-protection -S -emit-llvm -o- %s 2>&1 | FileCheck %s -check-prefix=SCP-ll-win64
-// SCP-ll-win64-NOT: attributes {{.*}} "probe-stack"="inline-asm"
-// SCP-ll-win64: argument unused during compilation: '-fstack-clash-protection'
-
-int foo(int c) {
- int r;
- __asm__("sub %0, %%rsp"
- :
- : "rm"(c)
- : "rsp");
- __asm__("mov %%rsp, %0"
- : "=rm"(r)::);
- return r;
-}
During this release ...
-* Functions with the probe-stack attribute set to "inline-asm" are now protected
- against stack clash without the need of a third-party probing function and
- with limited impact on performance.
-
Changes to the AMDGPU Target
-----------------------------
/// Returns the name of the symbol used to emit stack probes or the empty
/// string if not applicable.
- virtual bool hasStackProbeSymbol(MachineFunction &MF) const { return false; }
-
- virtual bool hasInlineStackProbe(MachineFunction &MF) const { return false; }
-
virtual StringRef getStackProbeSymbolName(MachineFunction &MF) const {
return "";
}
// memory for arguments.
unsigned FrameSetupOpcode = TII->getCallFrameSetupOpcode();
unsigned FrameDestroyOpcode = TII->getCallFrameDestroyOpcode();
- bool EmitStackProbeCall = STI->getTargetLowering()->hasStackProbeSymbol(MF);
+ bool UseStackProbe =
+ !STI->getTargetLowering()->getStackProbeSymbolName(MF).empty();
unsigned StackProbeSize = STI->getTargetLowering()->getStackProbeSize(MF);
for (MachineBasicBlock &BB : MF) {
bool InsideFrameSequence = false;
for (MachineInstr &MI : BB) {
if (MI.getOpcode() == FrameSetupOpcode) {
- if (TII->getFrameSize(MI) >= StackProbeSize && EmitStackProbeCall)
+ if (TII->getFrameSize(MI) >= StackProbeSize && UseStackProbe)
return false;
if (InsideFrameSequence)
return false;
#include "X86Subtarget.h"
#include "X86TargetMachine.h"
#include "llvm/ADT/SmallSet.h"
-#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/EHPersonalities.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/Target/TargetOptions.h"
#include <cstdlib>
-#define DEBUG_TYPE "x86-fl"
-
-STATISTIC(NumFrameLoopProbe, "Number of loop stack probes used in prologue");
-STATISTIC(NumFrameExtraProbe,
- "Number of extra stack probes generated in prologue");
-
using namespace llvm;
X86FrameLowering::X86FrameLowering(const X86Subtarget &STI,
uint64_t Chunk = (1LL << 31) - 1;
- MachineFunction &MF = *MBB.getParent();
- const X86Subtarget &STI = MF.getSubtarget<X86Subtarget>();
- const X86TargetLowering &TLI = *STI.getTargetLowering();
- const bool EmitInlineStackProbe = TLI.hasInlineStackProbe(MF);
-
- // It's ok to not take into account large chunks when probing, as the
- // allocation is split in smaller chunks anyway.
- if (EmitInlineStackProbe && !InEpilogue) {
-
- // stack probing may involve looping, and control flow generations is
- // disallowed at this point. Rely to later processing through
- // `inlineStackProbe`.
- MachineInstr *Stub = emitStackProbeInlineStub(MF, MBB, MBBI, DL, true);
-
- // Encode the static offset as a metadata attached to the stub.
- LLVMContext &Context = MF.getFunction().getContext();
- MachineInstrBuilder(MF, Stub).addMetadata(
- MDTuple::get(Context, {ConstantAsMetadata::get(ConstantInt::get(
- IntegerType::get(Context, 64), Offset))}));
- return;
- } else if (Offset > Chunk) {
+ if (Offset > Chunk) {
// Rather than emit a long series of instructions for large offsets,
// load the offset into a register and do one sub/add
unsigned Reg = 0;
} else {
bool IsSub = Offset < 0;
uint64_t AbsOffset = IsSub ? -Offset : Offset;
- const unsigned Opc = IsSub ? getSUBriOpcode(Uses64BitFramePtr, AbsOffset)
- : getADDriOpcode(Uses64BitFramePtr, AbsOffset);
+ unsigned Opc = IsSub ? getSUBriOpcode(Uses64BitFramePtr, AbsOffset)
+ : getADDriOpcode(Uses64BitFramePtr, AbsOffset);
MI = BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr)
.addReg(StackPtr)
.addImm(AbsOffset);
const DebugLoc &DL,
bool InProlog) const {
const X86Subtarget &STI = MF.getSubtarget<X86Subtarget>();
- if (STI.isTargetWindowsCoreCLR() && STI.is64Bit())
- emitStackProbeInlineWindowsCoreCLR64(MF, MBB, MBBI, DL, InProlog);
- else
- emitStackProbeInlineGeneric(MF, MBB, MBBI, DL, InProlog);
-}
-
-void X86FrameLowering::emitStackProbeInlineGeneric(
- MachineFunction &MF, MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MBBI, const DebugLoc &DL, bool InProlog) const {
- MachineInstr &CallToInline = *std::prev(MBBI);
- assert(CallToInline.getOperand(1).isMetadata() &&
- "no metadata attached to that probe");
- uint64_t Offset =
- cast<ConstantInt>(
- cast<ConstantAsMetadata>(
- cast<MDTuple>(CallToInline.getOperand(1).getMetadata())
- ->getOperand(0))
- ->getValue())
- ->getZExtValue();
-
- const X86Subtarget &STI = MF.getSubtarget<X86Subtarget>();
- const X86TargetLowering &TLI = *STI.getTargetLowering();
- assert(!(STI.is64Bit() && STI.isTargetWindowsCoreCLR()) &&
- "different expansion expected for CoreCLR 64 bit");
-
- const uint64_t StackProbeSize = TLI.getStackProbeSize(MF);
- uint64_t ProbeChunk = StackProbeSize * 8;
-
- // Synthesize a loop or unroll it, depending on the number of iterations.
- if (Offset > ProbeChunk) {
- emitStackProbeInlineGenericLoop(MF, MBB, MBBI, DL, Offset);
- } else {
- emitStackProbeInlineGenericBlock(MF, MBB, MBBI, DL, Offset);
- }
-}
-
-void X86FrameLowering::emitStackProbeInlineGenericBlock(
- MachineFunction &MF, MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MBBI, const DebugLoc &DL,
- uint64_t Offset) const {
-
- const X86Subtarget &STI = MF.getSubtarget<X86Subtarget>();
- const X86TargetLowering &TLI = *STI.getTargetLowering();
- const unsigned Opc = getSUBriOpcode(Uses64BitFramePtr, Offset);
- const unsigned MovMIOpc = Is64Bit ? X86::MOV64mi32 : X86::MOV32mi;
- const uint64_t StackProbeSize = TLI.getStackProbeSize(MF);
- uint64_t CurrentOffset = 0;
- // 0 Thanks to return address being saved on the stack
- uint64_t CurrentProbeOffset = 0;
-
- // For the first N - 1 pages, just probe. I tried to take advantage of
- // natural probes but it implies much more logic and there was very few
- // interesting natural probes to interleave.
- while (CurrentOffset + StackProbeSize < Offset) {
- MachineInstr *MI = BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr)
- .addReg(StackPtr)
- .addImm(StackProbeSize)
- .setMIFlag(MachineInstr::FrameSetup);
- MI->getOperand(3).setIsDead(); // The EFLAGS implicit def is dead.
-
-
- addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(MovMIOpc))
- .setMIFlag(MachineInstr::FrameSetup),
- StackPtr, false, 0)
- .addImm(0)
- .setMIFlag(MachineInstr::FrameSetup);
- NumFrameExtraProbe++;
- CurrentOffset += StackProbeSize;
- CurrentProbeOffset += StackProbeSize;
- }
-
- uint64_t ChunkSize = Offset - CurrentOffset;
- MachineInstr *MI = BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr)
- .addReg(StackPtr)
- .addImm(ChunkSize)
- .setMIFlag(MachineInstr::FrameSetup);
- MI->getOperand(3).setIsDead(); // The EFLAGS implicit def is dead.
-}
-
-void X86FrameLowering::emitStackProbeInlineGenericLoop(
- MachineFunction &MF, MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MBBI, const DebugLoc &DL,
- uint64_t Offset) const {
-
- const X86Subtarget &STI = MF.getSubtarget<X86Subtarget>();
- const X86TargetLowering &TLI = *STI.getTargetLowering();
- const unsigned MovMIOpc = Is64Bit ? X86::MOV64mi32 : X86::MOV32mi;
- const uint64_t StackProbeSize = TLI.getStackProbeSize(MF);
-
- // Synthesize a loop
- NumFrameLoopProbe++;
- const BasicBlock *LLVM_BB = MBB.getBasicBlock();
-
- MachineBasicBlock *testMBB = MF.CreateMachineBasicBlock(LLVM_BB);
- MachineBasicBlock *tailMBB = MF.CreateMachineBasicBlock(LLVM_BB);
-
- MachineFunction::iterator MBBIter = ++MBB.getIterator();
- MF.insert(MBBIter, testMBB);
- MF.insert(MBBIter, tailMBB);
-
- unsigned FinalStackPtr = Uses64BitFramePtr ? X86::R11 : X86::R11D;
- BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64rr), FinalStackPtr)
- .addReg(StackPtr)
- .setMIFlag(MachineInstr::FrameSetup);
-
- // save loop bound
- {
- const unsigned Opc = getSUBriOpcode(Uses64BitFramePtr, Offset);
- BuildMI(MBB, MBBI, DL, TII.get(Opc), FinalStackPtr)
- .addReg(FinalStackPtr)
- .addImm(Offset / StackProbeSize * StackProbeSize)
- .setMIFlag(MachineInstr::FrameSetup);
- }
-
- // allocate a page
- {
- const unsigned Opc = getSUBriOpcode(Uses64BitFramePtr, StackProbeSize);
- BuildMI(testMBB, DL, TII.get(Opc), StackPtr)
- .addReg(StackPtr)
- .addImm(StackProbeSize)
- .setMIFlag(MachineInstr::FrameSetup);
- }
-
- // touch the page
- addRegOffset(BuildMI(testMBB, DL, TII.get(MovMIOpc))
- .setMIFlag(MachineInstr::FrameSetup),
- StackPtr, false, 0)
- .addImm(0)
- .setMIFlag(MachineInstr::FrameSetup);
-
- // cmp with stack pointer bound
- BuildMI(testMBB, DL, TII.get(IsLP64 ? X86::CMP64rr : X86::CMP32rr))
- .addReg(StackPtr)
- .addReg(FinalStackPtr)
- .setMIFlag(MachineInstr::FrameSetup);
-
- // jump
- BuildMI(testMBB, DL, TII.get(X86::JCC_1))
- .addMBB(testMBB)
- .addImm(X86::COND_NE)
- .setMIFlag(MachineInstr::FrameSetup);
- testMBB->addSuccessor(testMBB);
- testMBB->addSuccessor(tailMBB);
-
- // allocate a block and touch it
-
- tailMBB->splice(tailMBB->end(), &MBB, MBBI, MBB.end());
- tailMBB->transferSuccessorsAndUpdatePHIs(&MBB);
- MBB.addSuccessor(testMBB);
-
- if (Offset % StackProbeSize) {
- const unsigned Opc = getSUBriOpcode(Uses64BitFramePtr, Offset);
- BuildMI(*tailMBB, tailMBB->begin(), DL, TII.get(Opc), StackPtr)
- .addReg(StackPtr)
- .addImm(Offset % StackProbeSize)
- .setMIFlag(MachineInstr::FrameSetup);
- }
-}
-
-void X86FrameLowering::emitStackProbeInlineWindowsCoreCLR64(
- MachineFunction &MF, MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MBBI, const DebugLoc &DL, bool InProlog) const {
- const X86Subtarget &STI = MF.getSubtarget<X86Subtarget>();
assert(STI.is64Bit() && "different expansion needed for 32 bit");
assert(STI.isTargetWindowsCoreCLR() && "custom expansion expects CoreCLR");
const TargetInstrInfo &TII = *STI.getInstrInfo();
}
}
-MachineInstr *X86FrameLowering::emitStackProbeInlineStub(
+void X86FrameLowering::emitStackProbeInlineStub(
MachineFunction &MF, MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI, const DebugLoc &DL, bool InProlog) const {
assert(InProlog && "ChkStkStub called outside prolog!");
- return BuildMI(MBB, MBBI, DL, TII.get(X86::CALLpcrel32))
+ BuildMI(MBB, MBBI, DL, TII.get(X86::CALLpcrel32))
.addExternalSymbol("__chkstk_stub");
}
X86FI->setCalleeSavedFrameSize(
X86FI->getCalleeSavedFrameSize() - TailCallReturnAddrDelta);
- const bool EmitStackProbeCall =
- STI.getTargetLowering()->hasStackProbeSymbol(MF);
+ bool UseStackProbe = !STI.getTargetLowering()->getStackProbeSymbolName(MF).empty();
unsigned StackProbeSize = STI.getTargetLowering()->getStackProbeSize(MF);
// Re-align the stack on 64-bit if the x86-interrupt calling convention is
// pointer, calls, or dynamic alloca then we do not need to adjust the
// stack pointer (we fit in the Red Zone). We also check that we don't
// push and pop from the stack.
- if (has128ByteRedZone(MF) && !TRI->needsStackRealignment(MF) &&
+ if (has128ByteRedZone(MF) &&
+ !TRI->needsStackRealignment(MF) &&
!MFI.hasVarSizedObjects() && // No dynamic alloca.
!MFI.adjustsStack() && // No calls.
- !EmitStackProbeCall && // No stack probes.
+ !UseStackProbe && // No stack probes.
!MFI.hasCopyImplyingStackAdjustment() && // Don't push and pop.
!MF.shouldSplitStack()) { // Regular stack
uint64_t MinSize = X86FI->getCalleeSavedFrameSize();
uint64_t AlignedNumBytes = NumBytes;
if (IsWin64Prologue && !IsFunclet && TRI->needsStackRealignment(MF))
AlignedNumBytes = alignTo(AlignedNumBytes, MaxAlign);
- if (AlignedNumBytes >= StackProbeSize && EmitStackProbeCall) {
+ if (AlignedNumBytes >= StackProbeSize && UseStackProbe) {
assert(!X86FI->getUsesRedZone() &&
"The Red Zone is not accounted for in stack probes");
void emitStackProbeInline(MachineFunction &MF, MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,
const DebugLoc &DL, bool InProlog) const;
- void emitStackProbeInlineWindowsCoreCLR64(MachineFunction &MF,
- MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MBBI,
- const DebugLoc &DL,
- bool InProlog) const;
- void emitStackProbeInlineGeneric(MachineFunction &MF, MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MBBI,
- const DebugLoc &DL, bool InProlog) const;
-
- void emitStackProbeInlineGenericBlock(MachineFunction &MF,
- MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MBBI,
- const DebugLoc &DL,
- uint64_t Offset) const;
-
- void emitStackProbeInlineGenericLoop(MachineFunction &MF,
- MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MBBI,
- const DebugLoc &DL,
- uint64_t Offset) const;
/// Emit a stub to later inline the target stack probe.
- MachineInstr *emitStackProbeInlineStub(MachineFunction &MF,
- MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MBBI,
- const DebugLoc &DL,
- bool InProlog) const;
+ void emitStackProbeInlineStub(MachineFunction &MF, MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ const DebugLoc &DL, bool InProlog) const;
/// Aligns the stack pointer by ANDing it with -MaxAlign.
void BuildStackAlignAND(MachineBasicBlock &MBB,
SelectionDAG &DAG) const {
MachineFunction &MF = DAG.getMachineFunction();
bool SplitStack = MF.shouldSplitStack();
- bool EmitStackProbeCall = hasStackProbeSymbol(MF);
+ bool EmitStackProbe = !getStackProbeSymbolName(MF).empty();
bool Lower = (Subtarget.isOSWindows() && !Subtarget.isTargetMachO()) ||
- SplitStack || EmitStackProbeCall;
+ SplitStack || EmitStackProbe;
SDLoc dl(Op);
// Get the inputs.
assert(SPReg && "Target cannot require DYNAMIC_STACKALLOC expansion and"
" not tell us which reg is the stack pointer!");
+ SDValue SP = DAG.getCopyFromReg(Chain, dl, SPReg, VT);
+ Chain = SP.getValue(1);
const TargetFrameLowering &TFI = *Subtarget.getFrameLowering();
const Align StackAlign(TFI.getStackAlignment());
- if (hasInlineStackProbe(MF)) {
- MachineRegisterInfo &MRI = MF.getRegInfo();
-
- const TargetRegisterClass *AddrRegClass = getRegClassFor(SPTy);
- Register Vreg = MRI.createVirtualRegister(AddrRegClass);
- Chain = DAG.getCopyToReg(Chain, dl, Vreg, Size);
- Result = DAG.getNode(X86ISD::PROBED_ALLOCA, dl, SPTy, Chain,
- DAG.getRegister(Vreg, SPTy));
- } else {
- SDValue SP = DAG.getCopyFromReg(Chain, dl, SPReg, VT);
- Chain = SP.getValue(1);
- Result = DAG.getNode(ISD::SUB, dl, VT, SP, Size); // Value
- }
+ Result = DAG.getNode(ISD::SUB, dl, VT, SP, Size); // Value
if (Alignment && Alignment > StackAlign)
Result =
DAG.getNode(ISD::AND, dl, VT, Result,
NODE_NAME_CASE(MEMBARRIER)
NODE_NAME_CASE(MFENCE)
NODE_NAME_CASE(SEG_ALLOCA)
- NODE_NAME_CASE(PROBED_ALLOCA)
NODE_NAME_CASE(RDRAND)
NODE_NAME_CASE(RDSEED)
NODE_NAME_CASE(RDPKRU)
}
MachineBasicBlock *
-X86TargetLowering::EmitLoweredProbedAlloca(MachineInstr &MI,
- MachineBasicBlock *BB) const {
- MachineFunction *MF = BB->getParent();
- const TargetInstrInfo *TII = Subtarget.getInstrInfo();
- const X86FrameLowering &TFI = *Subtarget.getFrameLowering();
- DebugLoc DL = MI.getDebugLoc();
- const BasicBlock *LLVM_BB = BB->getBasicBlock();
-
- const unsigned ProbeSize = getStackProbeSize(*MF);
-
- MachineRegisterInfo &MRI = MF->getRegInfo();
- MachineBasicBlock *testMBB = MF->CreateMachineBasicBlock(LLVM_BB);
- MachineBasicBlock *tailMBB = MF->CreateMachineBasicBlock(LLVM_BB);
- MachineBasicBlock *blockMBB = MF->CreateMachineBasicBlock(LLVM_BB);
-
- MachineFunction::iterator MBBIter = ++BB->getIterator();
- MF->insert(MBBIter, testMBB);
- MF->insert(MBBIter, blockMBB);
- MF->insert(MBBIter, tailMBB);
-
- unsigned sizeVReg = MI.getOperand(1).getReg();
-
- const TargetRegisterClass *SizeRegClass = MRI.getRegClass(sizeVReg);
-
- unsigned tmpSizeVReg = MRI.createVirtualRegister(SizeRegClass);
- unsigned tmpSizeVReg2 = MRI.createVirtualRegister(SizeRegClass);
-
- unsigned physSPReg = TFI.Uses64BitFramePtr ? X86::RSP : X86::ESP;
-
- // test rsp size
- BuildMI(testMBB, DL, TII->get(X86::PHI), tmpSizeVReg)
- .addReg(sizeVReg)
- .addMBB(BB)
- .addReg(tmpSizeVReg2)
- .addMBB(blockMBB);
-
- BuildMI(testMBB, DL,
- TII->get(TFI.Uses64BitFramePtr ? X86::CMP64ri32 : X86::CMP32ri))
- .addReg(tmpSizeVReg)
- .addImm(ProbeSize);
-
- BuildMI(testMBB, DL, TII->get(X86::JCC_1))
- .addMBB(tailMBB)
- .addImm(X86::COND_L);
- testMBB->addSuccessor(blockMBB);
- testMBB->addSuccessor(tailMBB);
-
- // allocate a block and touch it
-
- BuildMI(blockMBB, DL,
- TII->get(TFI.Uses64BitFramePtr ? X86::SUB64ri32 : X86::SUB32ri),
- tmpSizeVReg2)
- .addReg(tmpSizeVReg)
- .addImm(ProbeSize);
-
- BuildMI(blockMBB, DL,
- TII->get(TFI.Uses64BitFramePtr ? X86::SUB64ri32 : X86::SUB32ri),
- physSPReg)
- .addReg(physSPReg)
- .addImm(ProbeSize);
-
- const unsigned MovMIOpc =
- TFI.Uses64BitFramePtr ? X86::MOV64mi32 : X86::MOV32mi;
- addRegOffset(BuildMI(blockMBB, DL, TII->get(MovMIOpc)), physSPReg, false, 0)
- .addImm(0);
-
- BuildMI(blockMBB, DL, TII->get(X86::JMP_1)).addMBB(testMBB);
- blockMBB->addSuccessor(testMBB);
-
- // allocate the tail and continue
- BuildMI(tailMBB, DL,
- TII->get(TFI.Uses64BitFramePtr ? X86::SUB64rr : X86::SUB32rr),
- physSPReg)
- .addReg(physSPReg)
- .addReg(tmpSizeVReg);
- BuildMI(tailMBB, DL, TII->get(TargetOpcode::COPY), MI.getOperand(0).getReg())
- .addReg(physSPReg);
-
- tailMBB->splice(tailMBB->end(), BB,
- std::next(MachineBasicBlock::iterator(MI)), BB->end());
- tailMBB->transferSuccessorsAndUpdatePHIs(BB);
- BB->addSuccessor(testMBB);
-
- // Delete the original pseudo instruction.
- MI.eraseFromParent();
-
- // And we're done.
- return tailMBB;
-}
-
-MachineBasicBlock *
X86TargetLowering::EmitLoweredSegAlloca(MachineInstr &MI,
MachineBasicBlock *BB) const {
MachineFunction *MF = BB->getParent();
case X86::SEG_ALLOCA_32:
case X86::SEG_ALLOCA_64:
return EmitLoweredSegAlloca(MI, BB);
- case X86::PROBED_ALLOCA_32:
- case X86::PROBED_ALLOCA_64:
- return EmitLoweredProbedAlloca(MI, BB);
case X86::TLSCall_32:
case X86::TLSCall_64:
return EmitLoweredTLSCall(MI, BB);
return Subtarget.is64Bit();
}
-/// Returns true if stack probing through a function call is requested.
-bool X86TargetLowering::hasStackProbeSymbol(MachineFunction &MF) const {
- return !getStackProbeSymbolName(MF).empty();
-}
-
-/// Returns true if stack probing through inline assembly is requested.
-bool X86TargetLowering::hasInlineStackProbe(MachineFunction &MF) const {
-
- // No inline stack probe for Windows, they have their own mechanism.
- if (Subtarget.isOSWindows() ||
- MF.getFunction().hasFnAttribute("no-stack-arg-probe"))
- return false;
-
- // If the function specifically requests inline stack probes, emit them.
- if (MF.getFunction().hasFnAttribute("probe-stack"))
- return MF.getFunction().getFnAttribute("probe-stack").getValueAsString() ==
- "inline-asm";
-
- return false;
-}
-
/// Returns the name of the symbol used to emit stack probes or the empty
/// string if not applicable.
StringRef
X86TargetLowering::getStackProbeSymbolName(MachineFunction &MF) const {
- // Inline Stack probes disable stack probe call
- if (hasInlineStackProbe(MF))
- return "";
-
// If the function specifically requests stack probes, emit them.
if (MF.getFunction().hasFnAttribute("probe-stack"))
return MF.getFunction().getFnAttribute("probe-stack").getValueAsString();
// falls back to heap allocation if not.
SEG_ALLOCA,
- // For allocating stack space when using stack clash protector.
- // Allocation is performed by block, and each block is probed.
- PROBED_ALLOCA,
-
// Memory barriers.
MEMBARRIER,
MFENCE,
bool supportSwiftError() const override;
- bool hasStackProbeSymbol(MachineFunction &MF) const override;
- bool hasInlineStackProbe(MachineFunction &MF) const override;
StringRef getStackProbeSymbolName(MachineFunction &MF) const override;
unsigned getStackProbeSize(MachineFunction &MF) const;
MachineBasicBlock *EmitLoweredSegAlloca(MachineInstr &MI,
MachineBasicBlock *BB) const;
- MachineBasicBlock *EmitLoweredProbedAlloca(MachineInstr &MI,
- MachineBasicBlock *BB) const;
-
MachineBasicBlock *EmitLoweredTLSAddr(MachineInstr &MI,
MachineBasicBlock *BB) const;
[(set GR64:$dst,
(X86SegAlloca GR64:$size))]>,
Requires<[In64BitMode]>;
-
-// To protect against stack clash, dynamic allocation should perform a memory
-// probe at each page.
-
-let Defs = [EAX, ESP, EFLAGS], Uses = [ESP] in
-def PROBED_ALLOCA_32 : I<0, Pseudo, (outs GR32:$dst), (ins GR32:$size),
- "# variable sized alloca with probing",
- [(set GR32:$dst,
- (X86ProbedAlloca GR32:$size))]>,
- Requires<[NotLP64]>;
-
-let Defs = [RAX, RSP, EFLAGS], Uses = [RSP] in
-def PROBED_ALLOCA_64 : I<0, Pseudo, (outs GR64:$dst), (ins GR64:$size),
- "# variable sized alloca with probing",
- [(set GR64:$dst,
- (X86ProbedAlloca GR64:$size))]>,
- Requires<[In64BitMode]>;
}
// Dynamic stack allocation yields a _chkstk or _alloca call for all Windows
def SDT_X86SEG_ALLOCA : SDTypeProfile<1, 1, [SDTCisVT<0, iPTR>, SDTCisVT<1, iPTR>]>;
-def SDT_X86PROBED_ALLOCA : SDTypeProfile<1, 1, [SDTCisVT<0, iPTR>, SDTCisVT<1, iPTR>]>;
-
def SDT_X86EHRET : SDTypeProfile<0, 1, [SDTCisInt<0>]>;
def SDT_X86TCRET : SDTypeProfile<0, 2, [SDTCisPtrTy<0>, SDTCisVT<1, i32>]>;
def X86SegAlloca : SDNode<"X86ISD::SEG_ALLOCA", SDT_X86SEG_ALLOCA,
[SDNPHasChain]>;
-def X86ProbedAlloca : SDNode<"X86ISD::PROBED_ALLOCA", SDT_X86PROBED_ALLOCA,
- [SDNPHasChain]>;
-
def X86TLSCall : SDNode<"X86ISD::TLSCALL", SDT_X86TLSCALL,
[SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
+++ /dev/null
-; RUN: llc < %s | FileCheck %s
-
-
-target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
-target triple = "x86_64-unknown-linux-gnu"
-
-define i32 @foo(i32 %n) local_unnamed_addr #0 {
-
-; CHECK-LABEL: foo:
-; CHECK: # %bb.0:
-; CHECK-NEXT: pushq %rbp
-; CHECK-NEXT: .cfi_def_cfa_offset 16
-; CHECK-NEXT: .cfi_offset %rbp, -16
-; CHECK-NEXT: movq %rsp, %rbp
-; CHECK-NEXT: .cfi_def_cfa_register %rbp
-; CHECK-NEXT: movl %edi, %eax
-; CHECK-NEXT: leaq 15(,%rax,4), %rax
-; CHECK-NEXT: andq $-16, %rax
-; CHECK-NEXT: cmpq $4096, %rax # imm = 0x1000
-; CHECK-NEXT: jl .LBB0_3
-; CHECK-NEXT: .LBB0_2: # =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: subq $4096, %rax # imm = 0x1000
-; CHECK-NEXT: subq $4096, %rsp # imm = 0x1000
-; CHECK-NEXT: movq $0, (%rsp)
-; CHECK-NEXT: cmpq $4096, %rax # imm = 0x1000
-; CHECK-NEXT: jge .LBB0_2
-; CHECK-NEXT: .LBB0_3:
-; CHECK-NEXT: subq %rax, %rsp
-; CHECK-NEXT: movq %rsp, %rax
-; CHECK-NEXT: movl $1, 4792(%rax)
-; CHECK-NEXT: movl (%rax), %eax
-; CHECK-NEXT: movq %rbp, %rsp
-; CHECK-NEXT: popq %rbp
-; CHECK-NEXT: .cfi_def_cfa %rsp, 8
-; CHECK-NEXT: retq
-
- %a = alloca i32, i32 %n, align 16
- %b = getelementptr inbounds i32, i32* %a, i64 1198
- store volatile i32 1, i32* %b
- %c = load volatile i32, i32* %a
- ret i32 %c
-}
-
-attributes #0 = {"probe-stack"="inline-asm"}
+++ /dev/null
-; RUN: llc < %s | FileCheck %s
-
-
-target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
-target triple = "x86_64-unknown-linux-gnu"
-
-define i32 @foo() local_unnamed_addr #0 {
-
-; CHECK-LABEL: foo:
-; CHECK: # %bb.0:
-; CHECK-NEXT: movq %rsp, %r11
-; CHECK-NEXT: subq $69632, %r11 # imm = 0x11000
-; CHECK-NEXT: .LBB0_1:
-; CHECK-NEXT: subq $4096, %rsp # imm = 0x1000
-; CHECK-NEXT: movq $0, (%rsp)
-; CHECK-NEXT: cmpq %r11, %rsp
-; CHECK-NEXT: jne .LBB0_1
-; CHECK-NEXT:# %bb.2:
-; CHECK-NEXT: subq $2248, %rsp # imm = 0x8C8
-; CHECK-NEXT: .cfi_def_cfa_offset 71888
-; CHECK-NEXT: movl $1, 264(%rsp)
-; CHECK-NEXT: movl $1, 28664(%rsp)
-; CHECK-NEXT: movl -128(%rsp), %eax
-; CHECK-NEXT: addq $71880, %rsp # imm = 0x118C8
-; CHECK-NEXT: .cfi_def_cfa_offset 8
-; CHECK-NEXT: retq
-
-
- %a = alloca i32, i64 18000, align 16
- %b0 = getelementptr inbounds i32, i32* %a, i64 98
- %b1 = getelementptr inbounds i32, i32* %a, i64 7198
- store volatile i32 1, i32* %b0
- store volatile i32 1, i32* %b1
- %c = load volatile i32, i32* %a
- ret i32 %c
-}
-
-attributes #0 = {"probe-stack"="inline-asm"}
+++ /dev/null
-; RUN: llc < %s | FileCheck %s
-
-
-target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
-target triple = "x86_64-unknown-linux-gnu"
-
-define i32 @foo() local_unnamed_addr #0 {
-
-; CHECK-LABEL: foo:
-; CHECK: # %bb.0:
-; CHECK-NEXT subq $4096, %rsp # imm = 0x1000
-; CHECK-NEXT .cfi_def_cfa_offset 5888
-; CHECK-NEXT movl $1, 2088(%rsp)
-; CHECK-NEXT subq $1784, %rsp # imm = 0x6F8
-; CHECK-NEXT movl $2, 672(%rsp)
-; CHECK-NEXT movl 1872(%rsp), %eax
-; CHECK-NEXT addq $5880, %rsp # imm = 0x16F8
-; CHECK-NEXT .cfi_def_cfa_offset 8
-; CHECK-NEXT retq
-
-
- %a = alloca i32, i64 1000, align 16
- %b = alloca i32, i64 500, align 16
- %a0 = getelementptr inbounds i32, i32* %a, i64 500
- %b0 = getelementptr inbounds i32, i32* %b, i64 200
- store volatile i32 1, i32* %a0
- store volatile i32 2, i32* %b0
- %c = load volatile i32, i32* %a
- ret i32 %c
-}
-
-attributes #0 = {"probe-stack"="inline-asm"}
+++ /dev/null
-; RUN: llc < %s | FileCheck %s
-
-
-target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
-target triple = "x86_64-unknown-linux-gnu"
-
-define i32 @foo() local_unnamed_addr #0 {
-
-; CHECK-LABEL: foo:
-; CHECK: # %bb.0:
-; CHECK-NEXT: subq $4096, %rsp # imm = 0x1000
-; CHECK-NEXT: movq $0, (%rsp)
-; CHECK-NEXT: subq $3784, %rsp # imm = 0xEC8
-; CHECK-NEXT: .cfi_def_cfa_offset 7888
-; CHECK-NEXT: movl $1, 264(%rsp)
-; CHECK-NEXT: movl $1, 4664(%rsp)
-; CHECK-NEXT: movl -128(%rsp), %eax
-; CHECK-NEXT: addq $7880, %rsp # imm = 0x1EC8
-; CHECK-NEXT: .cfi_def_cfa_offset 8
-; CHECK-NEXT: retq
-
-
-
- %a = alloca i32, i64 2000, align 16
- %b0 = getelementptr inbounds i32, i32* %a, i64 98
- %b1 = getelementptr inbounds i32, i32* %a, i64 1198
- store i32 1, i32* %b0
- store i32 1, i32* %b1
- %c = load volatile i32, i32* %a
- ret i32 %c
-}
-
-attributes #0 = {"probe-stack"="inline-asm"}
+++ /dev/null
-; RUN: llc < %s | FileCheck %s
-
-
-target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
-target triple = "x86_64-unknown-linux-gnu"
-
-define i32 @foo() local_unnamed_addr #0 {
-
-; CHECK-LABEL: foo:
-; CHECK: # %bb.0:
-; CHECK-NEXT: subq $4096, %rsp # imm = 0x1000
-; CHECK-NEXT: movq $0, (%rsp)
-; CHECK-NEXT: subq $3784, %rsp # imm = 0xEC8
-; CHECK-NEXT: .cfi_def_cfa_offset 7888
-; CHECK-NEXT: movl $1, 672(%rsp)
-; CHECK-NEXT: movl -128(%rsp), %eax
-; CHECK-NEXT: addq $7880, %rsp # imm = 0x1EC8
-; CHECK-NEXT: .cfi_def_cfa_offset 8
-; CHECK-NEXT: retq
-
-
-
- %a = alloca i32, i64 2000, align 16
- %b = getelementptr inbounds i32, i32* %a, i64 200
- store volatile i32 1, i32* %b
- %c = load volatile i32, i32* %a
- ret i32 %c
-}
-
-attributes #0 = {"probe-stack"="inline-asm"}
+++ /dev/null
-; RUN: llc < %s | FileCheck %s
-
-target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
-target triple = "x86_64-unknown-linux-gnu"
-
-define i32 @foo(i64 %i) local_unnamed_addr #0 {
-; CHECK-LABEL: foo:
-; CHECK: # %bb.0:
-; CHECK-NEXT: subq $4096, %rsp # imm = 0x1000
-; CHECK-NEXT: movq $0, (%rsp)
-; CHECK-NEXT: subq $3784, %rsp # imm = 0xEC8
-; CHECK-NEXT: .cfi_def_cfa_offset 7888
-; CHECK-NEXT: movl $1, -128(%rsp,%rdi,4)
-; CHECK-NEXT: movl -128(%rsp), %eax
-; CHECK-NEXT: addq $7880, %rsp # imm = 0x1EC8
-; CHECK-NEXT: .cfi_def_cfa_offset 8
-; CHECK-NEXT: retq
-
- %a = alloca i32, i32 2000, align 16
- %b = getelementptr inbounds i32, i32* %a, i64 %i
- store volatile i32 1, i32* %b
- %c = load volatile i32, i32* %a
- ret i32 %c
-}
-
-attributes #0 = {"probe-stack"="inline-asm"}
-
+++ /dev/null
-; RUN: llc < %s | FileCheck %s
-
-
-target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
-target triple = "x86_64-unknown-linux-gnu"
-
-define i32 @foo() local_unnamed_addr #0 {
-; CHECK-LABEL: foo:
-; CHECK: # %bb.0:
-; CHECK-NEXT: subq $280, %rsp # imm = 0x118
-; CHECK-NEXT: .cfi_def_cfa_offset 288
-; CHECK-NEXT: movl $1, 264(%rsp)
-; CHECK-NEXT: movl -128(%rsp), %eax
-; CHECK-NEXT: addq $280, %rsp # imm = 0x118
-; CHECK-NEXT: .cfi_def_cfa_offset 8
-; CHECK-NEXT: retq
-
- %a = alloca i32, i64 100, align 16
- %b = getelementptr inbounds i32, i32* %a, i64 98
- store volatile i32 1, i32* %b
- %c = load volatile i32, i32* %a
- ret i32 %c
-}
-
-attributes #0 = {"probe-stack"="inline-asm"}
+++ /dev/null
-; RUN: llc < %s | FileCheck %s
-
-
-target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
-target triple = "x86_64-unknown-linux-gnu"
-
-declare void @llvm.memset.p0i8.i64(i8* nocapture writeonly, i8, i64, i1 immarg);
-
-define void @foo() local_unnamed_addr #0 {
-
-;CHECK-LABEL: foo:
-;CHECK: # %bb.0:
-;CHECK-NEXT: subq $4096, %rsp # imm = 0x1000
-; it's important that we don't use the call as a probe here
-;CHECK-NEXT: movq $0, (%rsp)
-;CHECK-NEXT: subq $3912, %rsp # imm = 0xF48
-;CHECK-NEXT: .cfi_def_cfa_offset 8016
-;CHECK-NEXT: movq %rsp, %rdi
-;CHECK-NEXT: movl $8000, %edx # imm = 0x1F40
-;CHECK-NEXT: xorl %esi, %esi
-;CHECK-NEXT: callq memset
-;CHECK-NEXT: addq $8008, %rsp # imm = 0x1F48
-;CHECK-NEXT: .cfi_def_cfa_offset 8
-;CHECK-NEXT: retq
-
- %a = alloca i8, i64 8000, align 16
- call void @llvm.memset.p0i8.i64(i8* align 16 %a, i8 0, i64 8000, i1 false)
- ret void
-}
-
-attributes #0 = {"probe-stack"="inline-asm"}