From: Ulrich Weigand Date: Mon, 23 Jun 2014 13:47:52 +0000 (+0000) Subject: [PowerPC] Allow stack frames without parameter save area X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=f316e1db75b74821cd1d495a7bc8fabd14a19da9;p=platform%2Fupstream%2Fllvm.git [PowerPC] Allow stack frames without parameter save area The PPCFrameLowering::determineFrameLayout routine currently ensures that every function that allocates a stack frame provides space for the parameter save area (via PPCFrameLowering::getMinCallFrameSize). This is actually not necessary. There may be functions that never call another routine but still allocate a frame; those do not require the parameter save area. In the future, with the ELFv2 ABI, even some routines that do call other functions do not need to allocate the parameter save area. While it is not a bug to allocate the parameter area when it is not needed, it is better to avoid it to save stack space. Note that when any particular function call requires the parameter save area, this space will already have been included by ABI code in the size the CALLSEQ_START insn is annotated with, and therefore included in the size returned by MFI->getMaxCallFrameSize(). This means that determineFrameLayout simply does not need to care about the parameter save area. (It still needs to ensure that every frame provides the linkage area.) This is implemented by this patch. Note that this exposed a bug in the new fast-isel code where the parameter area was *not* included in the CALLSEQ_START size; this is also fixed. A couple of test cases needed to be adapted for the new (smaller) stack frame size those tests now see. llvm-svn: 211495 --- diff --git a/llvm/lib/Target/PowerPC/PPCFastISel.cpp b/llvm/lib/Target/PowerPC/PPCFastISel.cpp index 924a07c..1b4f12e 100644 --- a/llvm/lib/Target/PowerPC/PPCFastISel.cpp +++ b/llvm/lib/Target/PowerPC/PPCFastISel.cpp @@ -1197,6 +1197,10 @@ bool PPCFastISel::processCallArgs(SmallVectorImpl &Args, bool IsVarArg) { SmallVector ArgLocs; CCState CCInfo(CC, IsVarArg, *FuncInfo.MF, TM, ArgLocs, *Context); + + // Reserve space for the linkage area on the stack. + CCInfo.AllocateStack(PPCFrameLowering::getLinkageSize(true, false), 8); + CCInfo.AnalyzeCallOperands(ArgVTs, ArgFlags, CC_PPC64_ELF_FIS); // Bail out if we can't handle any of the arguments. @@ -1218,6 +1222,14 @@ bool PPCFastISel::processCallArgs(SmallVectorImpl &Args, // Get a count of how many bytes are to be pushed onto the stack. NumBytes = CCInfo.getNextStackOffset(); + // The prolog code of the callee may store up to 8 GPR argument registers to + // the stack, allowing va_start to index over them in memory if its varargs. + // Because we cannot tell if this is needed on the caller side, we have to + // conservatively assume that it is needed. As such, make sure we have at + // least enough stack space for the caller to store the 8 GPRs. + NumBytes = std::max(NumBytes, + PPCFrameLowering::getMinCallFrameSize(true, false)); + // Issue CALLSEQ_START. BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TII.getCallFrameSetupOpcode())) diff --git a/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp b/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp index 9c5e588..ef4ea77 100644 --- a/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp @@ -422,9 +422,9 @@ unsigned PPCFrameLowering::determineFrameLayout(MachineFunction &MF, // Get the maximum call frame size of all the calls. unsigned maxCallFrameSize = MFI->getMaxCallFrameSize(); - // Maximum call frame needs to be at least big enough for linkage and 8 args. - unsigned minCallFrameSize = getMinCallFrameSize(Subtarget.isPPC64(), - Subtarget.isDarwinABI()); + // Maximum call frame needs to be at least big enough for linkage area. + unsigned minCallFrameSize = getLinkageSize(Subtarget.isPPC64(), + Subtarget.isDarwinABI()); maxCallFrameSize = std::max(maxCallFrameSize, minCallFrameSize); // If we have dynamic alloca then maxCallFrameSize needs to be aligned so diff --git a/llvm/test/CodeGen/PowerPC/Frames-alloca.ll b/llvm/test/CodeGen/PowerPC/Frames-alloca.ll index 4588bc0..c701fef 100644 --- a/llvm/test/CodeGen/PowerPC/Frames-alloca.ll +++ b/llvm/test/CodeGen/PowerPC/Frames-alloca.ll @@ -12,15 +12,15 @@ ; CHECK-PPC32-NOFP: stw r31, -4(r1) ; CHECK-PPC32-NOFP: lwz r1, 0(r1) ; CHECK-PPC32-NOFP: lwz r31, -4(r1) -; CHECK-PPC32-RS: stwu r1, -80(r1) -; CHECK-PPC32-RS-NOFP: stwu r1, -80(r1) +; CHECK-PPC32-RS: stwu r1, -48(r1) +; CHECK-PPC32-RS-NOFP: stwu r1, -48(r1) ; CHECK-PPC64: std r31, -8(r1) -; CHECK-PPC64: stdu r1, -128(r1) +; CHECK-PPC64: stdu r1, -64(r1) ; CHECK-PPC64: ld r1, 0(r1) ; CHECK-PPC64: ld r31, -8(r1) ; CHECK-PPC64-NOFP: std r31, -8(r1) -; CHECK-PPC64-NOFP: stdu r1, -128(r1) +; CHECK-PPC64-NOFP: stdu r1, -64(r1) ; CHECK-PPC64-NOFP: ld r1, 0(r1) ; CHECK-PPC64-NOFP: ld r31, -8(r1) diff --git a/llvm/test/CodeGen/PowerPC/Frames-large.ll b/llvm/test/CodeGen/PowerPC/Frames-large.ll index d07fea7..0ccea42 100644 --- a/llvm/test/CodeGen/PowerPC/Frames-large.ll +++ b/llvm/test/CodeGen/PowerPC/Frames-large.ll @@ -15,9 +15,9 @@ define i32* @f1() nounwind { ; PPC32-NOFP: _f1: ; PPC32-NOFP: lis r0, -1 -; PPC32-NOFP: ori r0, r0, 32704 +; PPC32-NOFP: ori r0, r0, 32736 ; PPC32-NOFP: stwux r1, r1, r0 -; PPC32-NOFP: addi r3, r1, 68 +; PPC32-NOFP: addi r3, r1, 36 ; PPC32-NOFP: lwz r1, 0(r1) ; PPC32-NOFP: blr @@ -25,10 +25,10 @@ define i32* @f1() nounwind { ; PPC32-FP: _f1: ; PPC32-FP: lis r0, -1 ; PPC32-FP: stw r31, -4(r1) -; PPC32-FP: ori r0, r0, 32704 +; PPC32-FP: ori r0, r0, 32736 ; PPC32-FP: stwux r1, r1, r0 ; PPC32-FP: mr r31, r1 -; PPC32-FP: addi r3, r31, 64 +; PPC32-FP: addi r3, r31, 32 ; PPC32-FP: lwz r1, 0(r1) ; PPC32-FP: lwz r31, -4(r1) ; PPC32-FP: blr @@ -36,9 +36,9 @@ define i32* @f1() nounwind { ; PPC64-NOFP: _f1: ; PPC64-NOFP: lis r0, -1 -; PPC64-NOFP: ori r0, r0, 32656 +; PPC64-NOFP: ori r0, r0, 32720 ; PPC64-NOFP: stdux r1, r1, r0 -; PPC64-NOFP: addi r3, r1, 116 +; PPC64-NOFP: addi r3, r1, 52 ; PPC64-NOFP: ld r1, 0(r1) ; PPC64-NOFP: blr @@ -46,10 +46,10 @@ define i32* @f1() nounwind { ; PPC64-FP: _f1: ; PPC64-FP: lis r0, -1 ; PPC64-FP: std r31, -8(r1) -; PPC64-FP: ori r0, r0, 32640 +; PPC64-FP: ori r0, r0, 32704 ; PPC64-FP: stdux r1, r1, r0 ; PPC64-FP: mr r31, r1 -; PPC64-FP: addi r3, r31, 124 +; PPC64-FP: addi r3, r31, 60 ; PPC64-FP: ld r1, 0(r1) ; PPC64-FP: ld r31, -8(r1) ; PPC64-FP: blr diff --git a/llvm/test/CodeGen/PowerPC/Frames-small.ll b/llvm/test/CodeGen/PowerPC/Frames-small.ll index 0f6bd10..28c1a5b 100644 --- a/llvm/test/CodeGen/PowerPC/Frames-small.ll +++ b/llvm/test/CodeGen/PowerPC/Frames-small.ll @@ -1,25 +1,25 @@ ; RUN: llc < %s -march=ppc32 -mtriple=powerpc-apple-darwin8 -o %t1 ; RUN: not grep "stw r31, -4(r1)" %t1 -; RUN: grep "stwu r1, -16448(r1)" %t1 -; RUN: grep "addi r1, r1, 16448" %t1 +; RUN: grep "stwu r1, -16416(r1)" %t1 +; RUN: grep "addi r1, r1, 16416" %t1 ; RUN: llc < %s -march=ppc32 | \ ; RUN: not grep "lwz r31, -4(r1)" ; RUN: llc < %s -march=ppc32 -mtriple=powerpc-apple-darwin8 -disable-fp-elim \ ; RUN: -o %t2 ; RUN: grep "stw r31, -4(r1)" %t2 -; RUN: grep "stwu r1, -16448(r1)" %t2 -; RUN: grep "addi r1, r1, 16448" %t2 +; RUN: grep "stwu r1, -16416(r1)" %t2 +; RUN: grep "addi r1, r1, 16416" %t2 ; RUN: grep "lwz r31, -4(r1)" %t2 ; RUN: llc < %s -march=ppc64 -mtriple=powerpc-apple-darwin8 -o %t3 ; RUN: not grep "std r31, -8(r1)" %t3 -; RUN: grep "stdu r1, -16496(r1)" %t3 -; RUN: grep "addi r1, r1, 16496" %t3 +; RUN: grep "stdu r1, -16432(r1)" %t3 +; RUN: grep "addi r1, r1, 16432" %t3 ; RUN: not grep "ld r31, -8(r1)" %t3 ; RUN: llc < %s -march=ppc64 -mtriple=powerpc-apple-darwin8 -disable-fp-elim \ ; RUN: -o %t4 ; RUN: grep "std r31, -8(r1)" %t4 -; RUN: grep "stdu r1, -16512(r1)" %t4 -; RUN: grep "addi r1, r1, 16512" %t4 +; RUN: grep "stdu r1, -16448(r1)" %t4 +; RUN: grep "addi r1, r1, 16448" %t4 ; RUN: grep "ld r31, -8(r1)" %t4 define i32* @f1() { diff --git a/llvm/test/CodeGen/PowerPC/svr4-redzone.ll b/llvm/test/CodeGen/PowerPC/svr4-redzone.ll index 7c51b67..bee3ac3 100644 --- a/llvm/test/CodeGen/PowerPC/svr4-redzone.ll +++ b/llvm/test/CodeGen/PowerPC/svr4-redzone.ll @@ -36,4 +36,4 @@ entry: ; PPC32: stwu 1, -240(1) ; PPC64-LABEL: bigstack: -; PPC64: stdu 1, -352(1) +; PPC64: stdu 1, -288(1)