From 65f9d9cd324fc12f8bf90fa67477e2ddd90803d6 Mon Sep 17 00:00:00 2001 From: Reid Kleckner Date: Wed, 2 Mar 2016 19:20:59 +0000 Subject: [PATCH] Revert "[X86] Elide references to _chkstk for dynamic allocas" This reverts commit r262370. It turns out there is code out there that does sequences of allocas greater than 4K: http://crbug.com/591404 The goal of this change was to improve the code size of inalloca call sequences, but we got tangled up in the mess of dynamic allocas. Instead, we should come back later with a separate MI pass that uses dominance to optimize the full sequence. This should also be able to remove the often unneeded stacksave/stackrestore pairs around the call. llvm-svn: 262505 --- llvm/lib/Target/X86/X86ISelLowering.cpp | 38 ++++++------------------ llvm/test/CodeGen/X86/cleanuppad-inalloca.ll | 5 ++-- llvm/test/CodeGen/X86/dynamic-alloca-in-entry.ll | 5 +--- llvm/test/CodeGen/X86/inalloca-ctor.ll | 9 +++--- llvm/test/CodeGen/X86/inalloca-invoke.ll | 8 ++--- llvm/test/CodeGen/X86/inalloca-stdcall.ll | 10 +++---- llvm/test/CodeGen/X86/inalloca.ll | 30 +++++++++---------- llvm/test/CodeGen/X86/shrink-wrap-chkstk.ll | 2 +- 8 files changed, 40 insertions(+), 67 deletions(-) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index e86c00b..596f487 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -16378,8 +16378,9 @@ SDValue X86TargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const { MachineFunction &MF = DAG.getMachineFunction(); - const Function *F = MF.getFunction(); bool SplitStack = MF.shouldSplitStack(); + bool Lower = (Subtarget.isOSWindows() && !Subtarget.isTargetMachO()) || + SplitStack; SDLoc dl(Op); // Get the inputs. @@ -16393,45 +16394,21 @@ X86TargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, // pointer when other instructions are using the stack. Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(0, dl, true), dl); - const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo(); bool Is64Bit = Subtarget.is64Bit(); MVT SPTy = getPointerTy(DAG.getDataLayout()); - bool CheckStack = SplitStack; - if (!CheckStack && Subtarget.isOSWindows() && !Subtarget.isTargetMachO()) { - // The Windows ABI requires us to probe the stack for allocations beyond - // the probe size. - if (auto *SizeC = dyn_cast(Size)) { - // Try to elide the probe if we can prove that this dynamic allocation is - // smaller than the probe size. - unsigned StackProbeSize = 4096; - if (F->hasFnAttribute("stack-probe-size")) - F->getFnAttribute("stack-probe-size") - .getValueAsString() - .getAsInteger(0, StackProbeSize); - unsigned AlignedAlloc = SizeC->getZExtValue(); - // Round the dynamic alloca's size up to it's alignment. - if (Align) - AlignedAlloc = alignTo(AlignedAlloc, Align); - - // If the aligned allocation is smaller than the probe size, then we don't - // need to probe the stack. - CheckStack = AlignedAlloc >= StackProbeSize; - } else { - // We cannot tell how big this dynamic alloca will be, probe the stack. - CheckStack = true; - } - } - SDValue Result; - if (!CheckStack) { + if (!Lower) { const TargetLowering &TLI = DAG.getTargetLoweringInfo(); unsigned SPReg = TLI.getStackPointerRegisterToSaveRestore(); assert(SPReg && "Target cannot require DYNAMIC_STACKALLOC expansion and" " not tell us which reg is the stack pointer!"); + EVT VT = Node->getValueType(0); + SDValue Tmp3 = Node->getOperand(2); SDValue SP = DAG.getCopyFromReg(Chain, dl, SPReg, VT); Chain = SP.getValue(1); + unsigned Align = cast(Tmp3)->getZExtValue(); const TargetFrameLowering &TFI = *Subtarget.getFrameLowering(); unsigned StackAlign = TFI.getStackAlignment(); Result = DAG.getNode(ISD::SUB, dl, VT, SP, Size); // Value @@ -16445,6 +16422,8 @@ X86TargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, if (Is64Bit) { // The 64 bit implementation of segmented stacks needs to clobber both r10 // r11. This makes it impossible to use it along with nested parameters. + const Function *F = MF.getFunction(); + for (Function::const_arg_iterator I = F->arg_begin(), E = F->arg_end(); I != E; ++I) if (I->hasNestAttr()) @@ -16467,6 +16446,7 @@ X86TargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, Chain = DAG.getNode(X86ISD::WIN_ALLOCA, dl, NodeTys, Chain, Flag); + const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo(); unsigned SPReg = RegInfo->getStackRegister(); SDValue SP = DAG.getCopyFromReg(Chain, dl, SPReg, SPTy); Chain = SP.getValue(1); diff --git a/llvm/test/CodeGen/X86/cleanuppad-inalloca.ll b/llvm/test/CodeGen/X86/cleanuppad-inalloca.ll index bb00f14..2e34ada 100644 --- a/llvm/test/CodeGen/X86/cleanuppad-inalloca.ll +++ b/llvm/test/CodeGen/X86/cleanuppad-inalloca.ll @@ -38,9 +38,8 @@ ehcleanup: ; preds = %entry ; CHECK: pushl %ebp ; CHECK: movl %esp, %ebp ; CHECK: subl ${{[0-9]+}}, %esp -; CHECK: movl %esp, %[[tmp_sp1:.*]] -; CHECK: leal -8(%[[tmp_sp1]]), %[[tmp_sp2:.*]] -; CHECK: %[[tmp_sp2]], %esp +; CHECK: movl $8, %eax +; CHECK: calll __chkstk ; CHECK: calll "??0A@@QAE@XZ" ; CHECK: calll "??0A@@QAE@XZ" ; CHECK: calll _takes_two diff --git a/llvm/test/CodeGen/X86/dynamic-alloca-in-entry.ll b/llvm/test/CodeGen/X86/dynamic-alloca-in-entry.ll index 5a34286..7ed471c 100644 --- a/llvm/test/CodeGen/X86/dynamic-alloca-in-entry.ll +++ b/llvm/test/CodeGen/X86/dynamic-alloca-in-entry.ll @@ -15,8 +15,5 @@ define void @bar() { ret void } ; CHECK-LABEL: _bar: -; CHECK: movl %esp, %ebp -; CHECK: movl %esp, %[[sp_tmp:.*]] -; CHECK: addl $-4, %[[sp_tmp]] -; CHECK: movl %[[sp_tmp]], %esp +; CHECK: calll __chkstk ; CHECK: retl diff --git a/llvm/test/CodeGen/X86/inalloca-ctor.ll b/llvm/test/CodeGen/X86/inalloca-ctor.ll index df9224f..eba4e72 100644 --- a/llvm/test/CodeGen/X86/inalloca-ctor.ll +++ b/llvm/test/CodeGen/X86/inalloca-ctor.ll @@ -10,14 +10,13 @@ declare void @Foo_ctor(%Foo* %this) define void @g() { entry: -; CHECK: movl %esp, %ebp %args = alloca inalloca %frame %c = getelementptr %frame, %frame* %args, i32 0, i32 2 -; CHECK: movl %esp, %[[tmp_sp1:.*]] -; CHECK: leal -20(%[[tmp_sp1]]), %[[tmp_sp2:.*]] -; CHECK: movl %[[tmp_sp2]], %esp +; CHECK: movl $20, %eax +; CHECK: calll __chkstk +; CHECK: movl %esp, call void @Foo_ctor(%Foo* %c) -; CHECK: leal -8(%[[tmp_sp1]]), +; CHECK: leal 12(%{{.*}}), ; CHECK-NEXT: pushl ; CHECK-NEXT: calll _Foo_ctor ; CHECK: addl $4, %esp diff --git a/llvm/test/CodeGen/X86/inalloca-invoke.ll b/llvm/test/CodeGen/X86/inalloca-invoke.ll index 73b939b..9a184e5 100644 --- a/llvm/test/CodeGen/X86/inalloca-invoke.ll +++ b/llvm/test/CodeGen/X86/inalloca-invoke.ll @@ -12,7 +12,6 @@ declare void @plus(%Iter* sret, %Iter*, i32) declare void @reverse(%frame.reverse* inalloca align 4) define i32 @main() personality i32 (...)* @pers { -; CHECK: movl %esp, %ebp %temp.lvalue = alloca %Iter br label %blah @@ -22,10 +21,9 @@ blah: %beg = getelementptr %frame.reverse, %frame.reverse* %rev_args, i32 0, i32 0 %end = getelementptr %frame.reverse, %frame.reverse* %rev_args, i32 0, i32 1 -; CHECK: movl %esp, %[[end:.*]] -; CHECK: leal -24(%[[end]]), %[[beg:.*]] -; CHECK: movl %[[beg]], %esp -; CHECK: addl $-12, %[[end]] +; CHECK: calll __chkstk +; CHECK: movl %esp, %[[beg:[^ ]*]] +; CHECK: leal 12(%[[beg]]), %[[end:[^ ]*]] call void @begin(%Iter* sret %temp.lvalue) ; CHECK: calll _begin diff --git a/llvm/test/CodeGen/X86/inalloca-stdcall.ll b/llvm/test/CodeGen/X86/inalloca-stdcall.ll index ad2271b..4f7e409 100644 --- a/llvm/test/CodeGen/X86/inalloca-stdcall.ll +++ b/llvm/test/CodeGen/X86/inalloca-stdcall.ll @@ -7,16 +7,16 @@ declare x86_stdcallcc void @i(i32 %a) define void @g() { ; CHECK-LABEL: _g: -; CHECK: movl %esp, %ebp %b = alloca inalloca %Foo -; CHECK: movl %esp, %[[tmp_sp:.*]] -; CHECK: leal -8(%[[tmp_sp]]), %esp +; CHECK: movl $8, %eax +; CHECK: calll __chkstk %f1 = getelementptr %Foo, %Foo* %b, i32 0, i32 0 %f2 = getelementptr %Foo, %Foo* %b, i32 0, i32 1 store i32 13, i32* %f1 store i32 42, i32* %f2 -; CHECK: movl $13, -8(%[[tmp_sp]]) -; CHECK: movl $42, -4(%[[tmp_sp]]) +; CHECK: movl %esp, %eax +; CHECK: movl $13, (%eax) +; CHECK: movl $42, 4(%eax) call x86_stdcallcc void @f(%Foo* inalloca %b) ; CHECK: calll _f@8 ; CHECK-NOT: %esp diff --git a/llvm/test/CodeGen/X86/inalloca.ll b/llvm/test/CodeGen/X86/inalloca.ll index b4ee79f..e523c94 100644 --- a/llvm/test/CodeGen/X86/inalloca.ll +++ b/llvm/test/CodeGen/X86/inalloca.ll @@ -7,16 +7,16 @@ declare void @f(%Foo* inalloca %b) define void @a() { ; CHECK-LABEL: _a: entry: -; CHECK: movl %esp, %ebp %b = alloca inalloca %Foo -; CHECK: movl %esp, %[[tmp_sp:.*]] -; CHECK: leal -8(%[[tmp_sp]]), %esp +; CHECK: movl $8, %eax +; CHECK: calll __chkstk %f1 = getelementptr %Foo, %Foo* %b, i32 0, i32 0 %f2 = getelementptr %Foo, %Foo* %b, i32 0, i32 1 store i32 13, i32* %f1 store i32 42, i32* %f2 -; CHECK: movl $13, -8(%[[tmp_sp]]) -; CHECK: movl $42, -4(%[[tmp_sp]]) +; CHECK: movl %esp, %eax +; CHECK: movl $13, (%eax) +; CHECK: movl $42, 4(%eax) call void @f(%Foo* inalloca %b) ; CHECK: calll _f ret void @@ -27,16 +27,16 @@ declare void @inreg_with_inalloca(i32 inreg %a, %Foo* inalloca %b) define void @b() { ; CHECK-LABEL: _b: entry: -; CHECK: movl %esp, %ebp %b = alloca inalloca %Foo -; CHECK: movl %esp, %[[tmp_sp:.*]] -; CHECK: leal -8(%[[tmp_sp]]), %esp +; CHECK: movl $8, %eax +; CHECK: calll __chkstk %f1 = getelementptr %Foo, %Foo* %b, i32 0, i32 0 %f2 = getelementptr %Foo, %Foo* %b, i32 0, i32 1 store i32 13, i32* %f1 store i32 42, i32* %f2 -; CHECK: movl $13, -8(%[[tmp_sp]]) -; CHECK: movl $42, -4(%[[tmp_sp]]) +; CHECK: movl %esp, %eax +; CHECK: movl $13, (%eax) +; CHECK: movl $42, 4(%eax) call void @inreg_with_inalloca(i32 inreg 1, %Foo* inalloca %b) ; CHECK: movl $1, %eax ; CHECK: calll _inreg_with_inalloca @@ -48,16 +48,16 @@ declare x86_thiscallcc void @thiscall_with_inalloca(i8* %a, %Foo* inalloca %b) define void @c() { ; CHECK-LABEL: _c: entry: -; CHECK: movl %esp, %ebp %b = alloca inalloca %Foo -; CHECK: movl %esp, %[[tmp_sp:.*]] -; CHECK: leal -8(%[[tmp_sp]]), %esp +; CHECK: movl $8, %eax +; CHECK: calll __chkstk %f1 = getelementptr %Foo, %Foo* %b, i32 0, i32 0 %f2 = getelementptr %Foo, %Foo* %b, i32 0, i32 1 store i32 13, i32* %f1 store i32 42, i32* %f2 -; CHECK-DAG: movl $13, -8(%[[tmp_sp]]) -; CHECK-DAG: movl $42, -4(%[[tmp_sp]]) +; CHECK: movl %esp, %eax +; CHECK-DAG: movl $13, (%eax) +; CHECK-DAG: movl $42, 4(%eax) call x86_thiscallcc void @thiscall_with_inalloca(i8* null, %Foo* inalloca %b) ; CHECK-DAG: xorl %ecx, %ecx ; CHECK: calll _thiscall_with_inalloca diff --git a/llvm/test/CodeGen/X86/shrink-wrap-chkstk.ll b/llvm/test/CodeGen/X86/shrink-wrap-chkstk.ll index 42f47bb..aecae89 100644 --- a/llvm/test/CodeGen/X86/shrink-wrap-chkstk.ll +++ b/llvm/test/CodeGen/X86/shrink-wrap-chkstk.ll @@ -9,7 +9,7 @@ target triple = "i686-pc-windows-msvc18.0.0" %struct.S = type { [12 x i8] } -define x86_thiscallcc void @call_inalloca(i1 %x) "stack-probe-size"="12" { +define x86_thiscallcc void @call_inalloca(i1 %x) { entry: %argmem = alloca inalloca <{ %struct.S }>, align 4 %argidx1 = getelementptr inbounds <{ %struct.S }>, <{ %struct.S }>* %argmem, i32 0, i32 0, i32 0, i32 0 -- 2.7.4