From: Sean Fertile Date: Mon, 15 Oct 2018 19:05:57 +0000 (+0000) Subject: [PPC64] Add split - stack support. X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=795cc9332b771e8f578f5ae14855d4152c72bdec;p=platform%2Fupstream%2Fllvm.git [PPC64] Add split - stack support. This support is slightly different then the X86_64 implementation in that calls to __morestack don't need to get rewritten to calls to __moresatck_non_split when a split-stack caller calls a non-split-stack callee. Instead the size of the stack frame requested by the caller is adjusted prior to the call to __morestack. The size the stack-frame will be adjusted by is tune-able through a new --split-stack-adjust-size option. Differential Revision: https://reviews.llvm.org/D52099 llvm-svn: 344544 --- diff --git a/lld/ELF/Arch/PPC64.cpp b/lld/ELF/Arch/PPC64.cpp index eb01955..ac516d5 100644 --- a/lld/ELF/Arch/PPC64.cpp +++ b/lld/ELF/Arch/PPC64.cpp @@ -120,6 +120,9 @@ public: void relaxTlsGdToLe(uint8_t *Loc, RelType Type, uint64_t Val) const override; void relaxTlsLdToLe(uint8_t *Loc, RelType Type, uint64_t Val) const override; void relaxTlsIeToLe(uint8_t *Loc, RelType Type, uint64_t Val) const override; + + bool adjustPrologueForCrossSplitStack(uint8_t *Loc, uint8_t *End, + uint8_t StOther) const override; }; } // namespace @@ -213,6 +216,8 @@ PPC64::PPC64() { TlsGotRel = R_PPC64_TPREL64; + NeedsMoreStackNonSplit = false; + // We need 64K pages (at least under glibc/Linux, the loader won't // set different permissions on a finer granularity than that). DefaultMaxPageSize = 65536; @@ -761,7 +766,115 @@ void PPC64::relaxTlsGdToIe(uint8_t *Loc, RelType Type, uint64_t Val) const { } } +// The prologue for a split-stack function is expected to look roughly +// like this: +// .Lglobal_entry_point: +// # TOC pointer initalization. +// ... +// .Llocal_entry_point: +// # load the __private_ss member of the threads tcbhead. +// ld r0,-0x7000-64(r13) +// # subtract the functions stack size from the stack pointer. +// addis r12, r1, ha(-stack-frame size) +// addi r12, r12, l(-stack-frame size) +// # compare needed to actual and branch to allocate_more_stack if more +// # space is needed, otherwise fallthrough to 'normal' function body. +// cmpld cr7,r12,r0 +// blt- .Lallocate_more_stack +// +// -) The allocate_more_stack block might be placed after the split-stack +// prologue and the `blt-` replaced with a `bge+ .Lnormal_func_body` +// instead. +// -) If either the addis or addi is not needed due to the stack size being +// smaller then 32K or a multiple of 64K they will be replaced with a nop, +// but there will always be 2 instructions the linker can overwrite for the +// adjusted stack size. +// +// The linkers job here is to increase the stack size used in the addis/addi +// pair by split-stack-size-adjust. +// addis r12, r1, ha(-stack-frame size - split-stack-adjust-size) +// addi r12, r12, l(-stack-frame size - split-stack-adjust-size) +bool PPC64::adjustPrologueForCrossSplitStack(uint8_t *Loc, uint8_t *End, + uint8_t StOther) const { + // If the caller has a global entry point adjust the buffer past it. The start + // of the split-stack prologue will be at the local entry point. + Loc += getPPC64GlobalEntryToLocalEntryOffset(StOther); + + // At the very least we expect to see a load of some split-stack data from the + // tcb, and 2 instructions that calculate the ending stack address this + // function will require. If there is not enough room for at least 3 + // instructions it can't be a split-stack prologue. + if (Loc + 12 >= End) + return false; + + // First instruction must be `ld r0, -0x7000-64(r13)` + if (read32(Loc) != 0xe80d8fc0) + return false; + + int16_t HiImm = 0; + int16_t LoImm = 0; + // First instruction can be either an addis if the frame size is larger then + // 32K, or an addi if the size is less then 32K. + int32_t FirstInstr = read32(Loc + 4); + if (getPrimaryOpCode(FirstInstr) == 15) { + HiImm = FirstInstr & 0xFFFF; + } else if (getPrimaryOpCode(FirstInstr) == 14) { + LoImm = FirstInstr & 0xFFFF; + } else { + return false; + } + + // Second instruction is either an addi or a nop. If the first instruction was + // an addi then LoImm is set and the second instruction must be a nop. + uint32_t SecondInstr = read32(Loc + 8); + if (!LoImm && getPrimaryOpCode(SecondInstr) == 14) { + LoImm = SecondInstr & 0xFFFF; + } else if (SecondInstr != 0x60000000) { + return false; + } + + // The register operands of the first instruction should be the stack-pointer + // (r1) as the input (RA) and r12 as the output (RT). If the second + // instruction is not a nop, then it should use r12 as both input and output. + auto CheckRegOperands = + [](uint32_t Instr, uint8_t ExpectedRT, uint8_t ExpectedRA) { + return ((Instr & 0x3E00000) >> 21 == ExpectedRT) && + ((Instr & 0x1F0000) >> 16 == ExpectedRA); + }; + if (!CheckRegOperands(FirstInstr, 12, 1)) + return false; + if (SecondInstr != 0x60000000 && !CheckRegOperands(SecondInstr, 12, 12)) + return false; + + int32_t StackFrameSize = (HiImm << 16) + LoImm; + // Check that the adjusted size doesn't overflow what we can represent with 2 + // instructions. + if (StackFrameSize < -2147483648 + Config->SplitStackAdjustSize) { + error(getErrorLocation(Loc) + "split-stack prologue adjustment overflows"); + return false; + } + + int32_t AdjustedStackFrameSize = + StackFrameSize - Config->SplitStackAdjustSize; + + LoImm = AdjustedStackFrameSize & 0xFFFF; + HiImm = (AdjustedStackFrameSize + 0x8000) >> 16; + if (HiImm) { + write32(Loc + 4, 0x3D810000 | (uint16_t)HiImm); + // If the low immediate is zero the second instruction will be a nop. + SecondInstr = + LoImm ? 0x398C0000 | (uint16_t)LoImm : 0x60000000; + write32(Loc + 8, SecondInstr); + } else { + // addi r12, r1, imm + write32(Loc + 4, (0x39810000) | (uint16_t)LoImm); + write32(Loc + 8, 0x60000000); + } + + return true; +} + TargetInfo *elf::getPPC64TargetInfo() { static PPC64 Target; return &Target; -} + } diff --git a/lld/ELF/Arch/X86_64.cpp b/lld/ELF/Arch/X86_64.cpp index 960bf78..6421ec4 100644 --- a/lld/ELF/Arch/X86_64.cpp +++ b/lld/ELF/Arch/X86_64.cpp @@ -43,8 +43,8 @@ public: void relaxTlsGdToLe(uint8_t *Loc, RelType Type, uint64_t Val) const override; void relaxTlsIeToLe(uint8_t *Loc, RelType Type, uint64_t Val) const override; void relaxTlsLdToLe(uint8_t *Loc, RelType Type, uint64_t Val) const override; - bool adjustPrologueForCrossSplitStack(uint8_t *Loc, - uint8_t *End) const override; + bool adjustPrologueForCrossSplitStack(uint8_t *Loc, uint8_t *End, + uint8_t StOther) const override; private: void relaxGotNoPic(uint8_t *Loc, uint64_t Val, uint8_t Op, @@ -482,7 +482,8 @@ namespace { // B) Or a load of a stack pointer offset with an lea to r10 or r11. template <> bool X86_64::adjustPrologueForCrossSplitStack(uint8_t *Loc, - uint8_t *End) const { + uint8_t *End, + uint8_t StOther) const { if (Loc + 8 >= End) return false; @@ -509,7 +510,8 @@ bool X86_64::adjustPrologueForCrossSplitStack(uint8_t *Loc, template <> bool X86_64::adjustPrologueForCrossSplitStack(uint8_t *Loc, - uint8_t *End) const { + uint8_t *End, + uint8_t StOther) const { llvm_unreachable("Target doesn't support split stacks."); } diff --git a/lld/ELF/Config.h b/lld/ELF/Config.h index f9fc10c..de751af 100644 --- a/lld/ELF/Config.h +++ b/lld/ELF/Config.h @@ -217,6 +217,7 @@ struct Configuration { unsigned LTOO; unsigned Optimize; unsigned ThinLTOJobs; + int32_t SplitStackAdjustSize; // The following config options do not directly correspond to any // particualr command line options. diff --git a/lld/ELF/Driver.cpp b/lld/ELF/Driver.cpp index 1878d95..147675e 100644 --- a/lld/ELF/Driver.cpp +++ b/lld/ELF/Driver.cpp @@ -829,6 +829,7 @@ void LinkerDriver::readConfigs(opt::InputArgList &Args) { Config->SingleRoRx = Args.hasArg(OPT_no_rosegment); Config->SoName = Args.getLastArgValue(OPT_soname); Config->SortSection = getSortSection(Args); + Config->SplitStackAdjustSize = args::getInteger(Args, OPT_split_stack_adjust_size, 16384); Config->Strip = getStrip(Args); Config->Sysroot = Args.getLastArgValue(OPT_sysroot); Config->Target1Rel = Args.hasFlag(OPT_target1_rel, OPT_target1_abs, false); @@ -901,6 +902,9 @@ void LinkerDriver::readConfigs(opt::InputArgList &Args) { if (Config->ThinLTOJobs == 0) error("--thinlto-jobs: number of threads must be > 0"); + if (Config->SplitStackAdjustSize < 0) + error("--split-stack-adjust-size: size must be >= 0"); + // Parse ELF{32,64}{LE,BE} and CPU type. if (auto *Arg = Args.getLastArg(OPT_m)) { StringRef S = Arg->getValue(); diff --git a/lld/ELF/InputSection.cpp b/lld/ELF/InputSection.cpp index 9da4af6..15fa2dc 100644 --- a/lld/ELF/InputSection.cpp +++ b/lld/ELF/InputSection.cpp @@ -1009,7 +1009,7 @@ void InputSectionBase::adjustSplitStackFunctionPrologues(uint8_t *Buf, if (Defined *F = getEnclosingFunction(Rel.Offset)) { Prologues.insert(F); if (Target->adjustPrologueForCrossSplitStack(Buf + getOffset(F->Value), - End)) + End, F->StOther)) continue; if (!getFile()->SomeNoSplitStack) error(lld::toString(this) + ": " + F->getName() + @@ -1017,7 +1017,9 @@ void InputSectionBase::adjustSplitStackFunctionPrologues(uint8_t *Buf, " (without -fsplit-stack), but couldn't adjust its prologue"); } } - switchMorestackCallsToMorestackNonSplit(Prologues, MorestackCalls); + + if (Target->NeedsMoreStackNonSplit) + switchMorestackCallsToMorestackNonSplit(Prologues, MorestackCalls); } template void InputSection::writeTo(uint8_t *Buf) { diff --git a/lld/ELF/Options.td b/lld/ELF/Options.td index 67d3dc6..1ff1abe0 100644 --- a/lld/ELF/Options.td +++ b/lld/ELF/Options.td @@ -42,6 +42,12 @@ defm compress_debug_sections: defm defsym: Eq<"defsym", "Define a symbol alias">, MetaVarName<"=">; +defm split_stack_adjust_size + : Eq<"split-stack-adjust-size", + "Specify adjustment to stack size when a split-stack function calls a " + "non-split-stack function">, + MetaVarName<"">; + defm library_path: Eq<"library-path", "Add a directory to the library search path">, MetaVarName<"">; diff --git a/lld/ELF/Target.cpp b/lld/ELF/Target.cpp index 0c8c940..4db7939 100644 --- a/lld/ELF/Target.cpp +++ b/lld/ELF/Target.cpp @@ -132,12 +132,11 @@ bool TargetInfo::needsThunk(RelExpr Expr, RelType Type, const InputFile *File, return false; } -bool TargetInfo::adjustPrologueForCrossSplitStack(uint8_t *Loc, - uint8_t *End) const { +bool TargetInfo::adjustPrologueForCrossSplitStack(uint8_t *Loc, uint8_t *End, + uint8_t StOther) const { llvm_unreachable("Target doesn't support split stacks."); } - bool TargetInfo::inBranchRange(RelType Type, uint64_t Src, uint64_t Dst) const { return true; } diff --git a/lld/ELF/Target.h b/lld/ELF/Target.h index 1764963..4f310e3 100644 --- a/lld/ELF/Target.h +++ b/lld/ELF/Target.h @@ -69,8 +69,10 @@ public: // The function with a prologue starting at Loc was compiled with // -fsplit-stack and it calls a function compiled without. Adjust the prologue // to do the right thing. See https://gcc.gnu.org/wiki/SplitStacks. - virtual bool adjustPrologueForCrossSplitStack(uint8_t *Loc, - uint8_t *End) const; + // The symbols st_other flags are needed on PowerPC64 for determining the + // offset to the split-stack prologue. + virtual bool adjustPrologueForCrossSplitStack(uint8_t *Loc, uint8_t *End, + uint8_t StOther) const; // Return true if we can reach Dst from Src with Relocation RelocType virtual bool inBranchRange(RelType Type, uint64_t Src, @@ -130,6 +132,11 @@ public: // executable OutputSections. uint32_t TrapInstr = 0; + // If a target needs to rewrite calls to __morestack to instead call + // __morestack_non_split when a split-stack enabled caller calls a + // non-split-stack callee this will return true. Otherwise returns false. + bool NeedsMoreStackNonSplit = true; + virtual RelExpr adjustRelaxExpr(RelType Type, const uint8_t *Data, RelExpr Expr) const; virtual void relaxGot(uint8_t *Loc, uint64_t Val) const; diff --git a/lld/test/ELF/Inputs/ppc64-no-split-stack.s b/lld/test/ELF/Inputs/ppc64-no-split-stack.s new file mode 100644 index 0000000..8371767 --- /dev/null +++ b/lld/test/ELF/Inputs/ppc64-no-split-stack.s @@ -0,0 +1,8 @@ + .abiversion 2 + .p2align 2 + .global nss_callee + .type nss_callee, @function +nss_callee: + li 3, 1 + blr + diff --git a/lld/test/ELF/ppc64-split-stack-adjust-fail.s b/lld/test/ELF/ppc64-split-stack-adjust-fail.s new file mode 100644 index 0000000..4ad1a99 --- /dev/null +++ b/lld/test/ELF/ppc64-split-stack-adjust-fail.s @@ -0,0 +1,53 @@ +# REQUIRES: ppc + +# RUN: llvm-mc -filetype=obj -triple=powerpc64le-unknown-linux %s -o %t1.o +# RUN: llvm-mc -filetype=obj -triple=powerpc64le-unknown-linux %p/Inputs/ppc64-no-split-stack.s -o %t2.o + +# RUN: not ld.lld --defsym __morestack=0x10010000 %t1.o %t2.o -o %t 2>&1 | FileCheck %s + +# RUN: llvm-mc -filetype=obj -triple=powerpc64-unknown-linux %s -o %t1.o +# RUN: llvm-mc -filetype=obj -triple=powerpc64-unknown-linux %p/Inputs/ppc64-no-split-stack.s -o %t2.o + +# RUN: not ld.lld --defsym __morestack=0x10010000 %t1.o %t2.o -o %t 2>&1 | FileCheck %s + +# CHECK: error: {{.*}}.o:(.text): wrong_regs (with -fsplit-stack) calls nss_callee (without -fsplit-stack), but couldn't adjust its prologue + + .abiversion 2 + .section ".text" + + .p2align 2 + .global wrong_regs + .type wrong_regs, @function + +wrong_regs: +.Lwr_gep: + addis 2, 12, .TOC.-.Lwr_gep@ha + addi 2, 2, .TOC.-.Lwr_gep@l + .localentry wrong_regs, .-wrong_regs + ld 0, -0x7040(13) + addis 5, 2, -1 + addi 5, 5, -32 + addi 12, 1, -32 + nop + cmpld 7, 12, 0 + blt- 7, .Lwr_alloc_more +.Lwr_body: + mflr 0 + std 0, 16(1) + stdu 1, -32(1) + bl nss_callee + addi 1, 1, 32 + ld 0, 16(1) + mtlr 0 + blr +.Lwr_alloc_more: + mflr 0 + std 0, 16(1) + bl __morestack + ld 0, 16(1) + mtlr 0 + blr + b .Lwr_body + .size wrong_regs, .-wrong_regs + + .section .note.GNU-split-stack,"",@progbits diff --git a/lld/test/ELF/ppc64-split-stack-adjust-overflow.s b/lld/test/ELF/ppc64-split-stack-adjust-overflow.s new file mode 100644 index 0000000..bc958c7 --- /dev/null +++ b/lld/test/ELF/ppc64-split-stack-adjust-overflow.s @@ -0,0 +1,64 @@ +# REQUIRES: ppc + +# RUN: llvm-mc -filetype=obj -triple=powerpc64le-unknown-linux %s -o %t1.o +# RUN: llvm-mc -filetype=obj -triple=powerpc64le-unknown-linux %p/Inputs/ppc64-no-split-stack.s -o %t2.o + +# RUN: not ld.lld %t1.o %t2.o -o %t --defsym __morestack=0x10010000 2>&1 | \ +# RUN: FileCheck -check-prefix=OVERFLOW %s +# RUN: not ld.lld %t1.o %t2.o -o %t --defsym __morestack=0x10010000 \ +# RUN: -split-stack-adjust-size 4097 2>&1 | FileCheck -check-prefix=OVERFLOW %s +# RUN: ld.lld %t1.o %t2.o -o %t --defsym __morestack=0x10010000 -split-stack-adjust-size 4096 +# RUN: llvm-objdump -d %t | FileCheck %s + +# RUN: llvm-mc -filetype=obj -triple=powerpc64-unknown-linux %s -o %t1.o +# RUN: llvm-mc -filetype=obj -triple=powerpc64-unknown-linux %p/Inputs/ppc64-no-split-stack.s -o %t2.o + +# RUN: not ld.lld %t1.o %t2.o -o %t --defsym __morestack=0x10010000 2>&1 | \ +# RUN: FileCheck -check-prefix=OVERFLOW %s +# RUN: not ld.lld %t1.o %t2.o -o %t --defsym __morestack=0x10010000 \ +# RUN: -split-stack-adjust-size 4097 2>&1 | FileCheck -check-prefix=OVERFLOW %s +# RUN: ld.lld %t1.o %t2.o -o %t --defsym __morestack=0x10010000 -split-stack-adjust-size 4096 +# RUN: llvm-objdump -d %t | FileCheck %s + +# OVERFLOW: error: {{.*}}.o:(function caller): split-stack prologue adjustment overflows + + .p2align 2 + .global caller + .type caller, @function +caller: +.Lcaller_gep: + addis 2, 12, .TOC.-.Lcaller_gep@ha + addi 2, 2, .TOC.-.Lcaller_gep@l + .localentry caller, .-caller + ld 0, -0x7040(13) + addis 12, 1, -32768 + addi 12, 12, 4096 + cmpld 7, 12, 0 + blt- 7, .Lcaller_alloc_more +.Lcaller_body: + mflr 0 + std 0, 16(1) + stdu 1, -32(1) + bl nss_callee + addi 1, 1, 32 + ld 0, 16(1) + mtlr 0 + blr +.Lcaller_alloc_more: + mflr 0 + std 0, 16(1) + bl __morestack + ld 0, 16(1) + mtlr 0 + blr + b .Lcaller_body + .size caller, .-caller + +# CHECK-LABEL: caller +# CHECK: ld 0, -28736(13) +# CHECK-NEXT: addis 12, 1, -32768 +# CHECK-NEXT: nop +# CHECK-NEXT: cmpld 7, 12, 0 +# CHECK-NEXT: bt- 28, .+36 + +.section .note.GNU-split-stack,"",@progbits diff --git a/lld/test/ELF/ppc64-split-stack-adjust-size-success.s b/lld/test/ELF/ppc64-split-stack-adjust-size-success.s new file mode 100644 index 0000000..eeaac12 --- /dev/null +++ b/lld/test/ELF/ppc64-split-stack-adjust-size-success.s @@ -0,0 +1,106 @@ +# REQUIRES: ppc + +# RUN: llvm-mc -filetype=obj -triple=powerpc64le-unknown-linux %s -o %t1.o +# RUN: llvm-mc -filetype=obj -triple=powerpc64le-unknown-linux %p/Inputs/ppc64-no-split-stack.s -o %t2.o + +# RUN: ld.lld %t1.o %t2.o -o %t --defsym __morestack=0x10010000 -split-stack-adjust-size 32768 +# RUN: llvm-objdump -d %t | FileCheck %s +# RUN: ld.lld %t1.o %t2.o -o %t --defsym __morestack=0x10010000 -split-stack-adjust-size 4096 +# RUN: llvm-objdump -d %t | FileCheck %s -check-prefix=SMALL +# RUN: ld.lld %t1.o %t2.o -o %t --defsym __morestack=0x10010000 -split-stack-adjust-size 0 +# RUN: llvm-objdump -d %t | FileCheck %s -check-prefix=ZERO + +# RUN: llvm-mc -filetype=obj -triple=powerpc64-unknown-linux %s -o %t1.o +# RUN: llvm-mc -filetype=obj -triple=powerpc64-unknown-linux %p/Inputs/ppc64-no-split-stack.s -o %t2.o + +# RUN: ld.lld %t1.o %t2.o -o %t --defsym __morestack=0x10010000 -split-stack-adjust-size 32768 +# RUN: llvm-objdump -d %t | FileCheck %s +# RUN: ld.lld %t1.o %t2.o -o %t --defsym __morestack=0x10010000 -split-stack-adjust-size 4096 +# RUN: llvm-objdump -d %t | FileCheck %s -check-prefix=SMALL +# RUN: ld.lld %t1.o %t2.o -o %t --defsym __morestack=0x10010000 -split-stack-adjust-size 0 +# RUN: llvm-objdump -d %t | FileCheck %s -check-prefix=ZERO + .p2align 2 + .global caller + .type caller, @function +caller: +.Lcaller_gep: + addis 2, 12, .TOC.-.Lcaller_gep@ha + addi 2, 2, .TOC.-.Lcaller_gep@l + .localentry caller, .-caller + ld 0, -0x7040(13) + addi 12, 1, -32 + nop + cmpld 7, 12, 0 + blt- 7, .Lcaller_alloc_more +.Lcaller_body: + mflr 0 + std 0, 16(1) + stdu 1, -32(1) + bl nss_callee + addi 1, 1, 32 + ld 0, 16(1) + mtlr 0 + blr +.Lcaller_alloc_more: + mflr 0 + std 0, 16(1) + bl __morestack + ld 0, 16(1) + mtlr 0 + blr + b .Lcaller_body + .size caller, .-caller + +# CHECK-LABEL: caller +# CHECK: ld 0, -28736(13) +# CHECK-NEXT: addis 12, 1, -1 +# CHECK-NEXT: addi 12, 12, 32736 +# CHECK-NEXT: cmpld 7, 12, 0 +# CHECK-NEXT: bt- 28, .+36 + +# SMALL-LABEL: caller +# SMALL: ld 0, -28736(13) +# SMALL-NEXT: addi 12, 1, -4128 +# SMALL-NEXT: nop +# SMALL-NEXT: cmpld 7, 12, 0 +# SMALL-NEXT: bt- 28, .+36 + +# ZERO-LABEL: caller +# ZERO: ld 0, -28736(13) +# ZERO-NEXT: addi 12, 1, -32 +# ZERO-NEXT: nop +# ZERO-NEXT: cmpld 7, 12, 0 +# ZERO-NEXT: bt- 28, .+36 + .p2align 2 + .global main + .type main, @function +main: +.Lmain_gep: + addis 2,12,.TOC.-.Lmain_gep@ha + addi 2,2,.TOC.-.Lmain_gep@l + .localentry main,.-main + ld 0,-0x7040(13) + addi 12,1,-32 + nop + cmpld 7,12,0 + blt- 7, .Lmain_morestack +.Lmain_body: + mflr 0 + std 0, 16(1) + stdu 1, -32(1) + bl caller + addi 1, 1, 32 + ld 0, 16(1) + mtlr 0 + blr +.Lmain_morestack: + mflr 0 + std 0, 16(1) + bl __morestack + ld 0, 16(1) + mtlr 0 + blr + b .Lmain_body + .size main,.-main + + .section .note.GNU-split-stack,"",@progbits diff --git a/lld/test/ELF/ppc64-split-stack-prologue-adjust-success.s b/lld/test/ELF/ppc64-split-stack-prologue-adjust-success.s new file mode 100644 index 0000000..197df15 --- /dev/null +++ b/lld/test/ELF/ppc64-split-stack-prologue-adjust-success.s @@ -0,0 +1,224 @@ +# REQUIRES: ppc + +# RUN: llvm-mc -filetype=obj -triple=powerpc64le-unknown-linux %s -o %t1.o +# RUN: llvm-mc -filetype=obj -triple=powerpc64le-unknown-linux %p/Inputs/ppc64-no-split-stack.s -o %t2.o +# RUN: ld.lld --defsym __morestack=0x10010000 %t1.o %t2.o -o %t +# RUN: llvm-objdump -d %t | FileCheck %s + +# RUN: llvm-mc -filetype=obj -triple=powerpc64-unknown-linux %s -o %t1.o +# RUN: llvm-mc -filetype=obj -triple=powerpc64-unknown-linux %p/Inputs/ppc64-no-split-stack.s -o %t2.o +# RUN: ld.lld --defsym __morestack=0x10010000 %t1.o %t2.o -o %t +# RUN: llvm-objdump -d %t | FileCheck %s + + .abiversion 2 + .section ".text" + + +# A caller with a stack that is small enough that the addis instruction +# from the split-stack prologue is unneeded, and after the prologue adjustment +# the stack size still fits whithin 16 bits. + .p2align 2 + .global caller_small_stack + .type caller_small_stack, @function +caller_small_stack: +.Lcss_gep: + addis 2, 12, .TOC.-.Lcss_gep@ha + addi 2, 2, .TOC.-.Lcss_gep@l + .localentry caller_small_stack, .-caller_small_stack + ld 0, -0x7040(13) + addi 12, 1, -32 + nop + cmpld 7, 12, 0 + blt- 7, .Lcss_alloc_more +.Lcss_body: + mflr 0 + std 0, 16(1) + stdu 1, -32(1) + bl nss_callee + addi 1, 1, 32 + ld 0, 16(1) + mtlr 0 + blr +.Lcss_alloc_more: + mflr 0 + std 0, 16(1) + bl __morestack + ld 0, 16(1) + mtlr 0 + blr + b .Lcss_body + .size caller_small_stack, .-caller_small_stack + +# CHECK-LABEL: caller_small_stack +# CHECK: ld 0, -28736(13) +# CHECK-NEXT: addi 12, 1, -16416 +# CHECK-NEXT: nop +# CHECK-NEXT: cmpld 7, 12, 0 +# CHECK-NEXT: bt- 28, .+36 + +# A caller that has a stack size that fits whithin 16 bits, but the adjusted +# stack size after prologue adjustment now overflows 16 bits needing both addis +# and addi instructions. + .p2align 2 + .global caller_med_stack + .type caller_med_stack, @function +caller_med_stack: +.Lcms_gep: + addis 2, 12, .TOC.-.Lcms_gep@ha + addi 12, 12, .TOC.-.Lcms_gep@l + .localentry caller_med_stack, .-caller_med_stack + ld 0, -0x7040(13) + addi 12, 1, -32764 + nop + cmpld 7, 12, 0 + blt- 7, .Lcms_alloc_more +.Lcms_body: + mflr 0 + std 0, 16(1) + stdu 1, -32764(1) + bl nss_callee + addi 1, 1, 32764 + ld 0, 16(1) + mtlr 0 + blr +.Lcms_alloc_more: + mflr 0 + std 0, 16(1) + bl __morestack + ld 0, 16(1) + mtlr 0 + blr + b .Lcms_body + .size caller_med_stack, .-caller_med_stack + +# A caller with a large enough stack frame that both the addis and +# addi instructions are used in the split-stack prologue. + .p2align 2 + .global caller_large_stack + .type caller_large_stack, @function +caller_large_stack: +.Lcls_gep: + addis 2, 12, .TOC.-.Lcls_gep@ha + addi 12, 12, .TOC.-.Lcls_gep@l + .localentry caller_large_stack, .-caller_large_stack + ld 0, -0x7040(13) + addis 12, 1, -1 + addi 12, 12, -32 + cmpld 7, 12, 0 + blt- 7, .Lcls_alloc_more +.Lcls_body: + mflr 0 + std 0, 16(1) + lis 0, -1 + addi 0, 0, -32 + stdux 1, 0, 1 + bl nss_callee + ld 1, 0(1) + ld 0, 16(1) + mtlr 0 + blr +.Lcls_alloc_more: + mflr 0 + std 0, 16(1) + bl __morestack + ld 0, 16(1) + mtlr 0 + blr + b .Lcls_body + .size caller_large_stack, .-caller_large_stack + +# CHECK-LABEL: caller_large_stack +# CHECK: ld 0, -28736(13) +# CHECK-NEXT: addis 12, 1, -1 +# CHECK-NEXT: addi 12, 12, -16416 +# CHECK-NEXT: cmpld 7, 12, 0 +# CHECK-NEXT: bt- 28, .+44 + +# A caller with a stack size that is larger then 16 bits, but aligned such that +# the addi instruction is unneeded. + .p2align 2 + .global caller_large_aligned_stack + .type caller_large_aligned_stack, @function +caller_large_aligned_stack: +.Lclas_gep: + addis 2, 12, .TOC.-.Lclas_gep@ha + addi 12, 12, .TOC.-.Lclas_gep@l + .localentry caller_large_aligned_stack, .-caller_large_aligned_stack + ld 0, -0x7040(13) + addis 12, 1, -2 + nop + cmpld 7, 12, 0 + blt- 7, .Lclas_alloc_more +.Lclas_body: + mflr 0 + std 0, 16(1) + lis 0, -2 + stdux 1, 0, 1 + bl nss_callee + ld 1, 0(1) + ld 0, 16(1) + mtlr 0 + blr +.Lclas_alloc_more: + mflr 0 + std 0, 16(1) + bl __morestack + ld 0, 16(1) + mtlr 0 + blr + b .Lclas_body + .size caller_large_aligned_stack, .-caller_large_aligned_stack + +# CHECK-LABEL: caller_large_aligned_stack +# CHECK: ld 0, -28736(13) +# CHECK-NEXT: addis 12, 1, -2 +# CHECK-NEXT: addi 12, 12, -16384 +# CHECK-NEXT: cmpld 7, 12, 0 +# CHECK-NEXT: bt- 28, .+40 + +# main only calls split-stack functions or __morestack so +# there should be no adjustment of its split-stack prologue. + .p2align 2 + .global main + .type main, @function +main: +.Lmain_gep: + addis 2, 12,.TOC.-.Lmain_gep@ha + addi 2, 2,.TOC.-.Lmain_gep@l + .localentry main,.-main + ld 0, -0x7040(13) + addi 12,1,-32 + nop + cmpld 7, 12,0 + blt- 7, .Lmain_morestack +.Lmain_body: + mflr 0 + std 0, 16(1) + stdu 1, -32(1) + bl caller_small_stack + nop + bl caller_med_stack + nop + bl caller_large_stack + nop + bl caller_large_aligned_stack + addi 1, 1, 32 + ld 0, 16(1) + mtlr 0 + blr +.Lmain_morestack: + mflr 0 + std 0, 16(1) + bl __morestack + ld 0, 16(1) + mtlr 0 + blr + b .Lmain_body + .size main,.-main +# CHECK-LABEL: main +# CHECK: ld 0, -28736(13) +# CHECK-NEXT: addi 12, 1, -32 +# CHECK-NEXT: nop +# CHECK-NEXT: cmpld 7, 12, 0 + + .section .note.GNU-split-stack,"",@progbits