From 6c4b40def77622a5cf62a219ef4af63dc876e144 Mon Sep 17 00:00:00 2001 From: Stefan Pintilie Date: Wed, 8 Apr 2020 08:07:35 -0500 Subject: [PATCH] [PowerPC][Future] Add Support For Functions That Do Not Use A TOC. On PowerPC most functions require a valid TOC pointer. This is the case because either the function itself needs to use this pointer to access the TOC or because other functions that are called from that function expect a valid TOC pointer in the register R2. The main exception to this is leaf functions that do not access the TOC since they are guaranteed not to need a valid TOC pointer. This patch introduces a feature that will allow more functions to not require a valid TOC pointer in R2. Differential Revision: https://reviews.llvm.org/D73664 --- llvm/include/llvm/BinaryFormat/ELF.h | 6 - .../llvm/BinaryFormat/ELFRelocs/PowerPC64.def | 2 + llvm/include/llvm/MC/MCExpr.h | 1 + llvm/lib/MC/MCExpr.cpp | 2 + .../Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp | 5 + .../PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp | 5 + .../Target/PowerPC/MCTargetDesc/PPCFixupKinds.h | 4 + .../PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp | 4 +- .../PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp | 35 +- llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp | 41 +- llvm/lib/Target/PowerPC/PPCISelLowering.cpp | 34 +- llvm/lib/Target/PowerPC/PPCISelLowering.h | 2 + llvm/lib/Target/PowerPC/PPCInstr64Bit.td | 14 + llvm/lib/Target/PowerPC/PPCInstrInfo.td | 4 + llvm/lib/Target/PowerPC/PPCMCInstLower.cpp | 7 +- llvm/lib/Target/PowerPC/PPCMIPeephole.cpp | 49 +- llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp | 9 +- llvm/lib/Target/PowerPC/PPCScheduleP9.td | 4 +- llvm/lib/Target/PowerPC/PPCSubtarget.cpp | 5 + llvm/lib/Target/PowerPC/PPCSubtarget.h | 1 + .../CodeGen/PowerPC/pcrel-call-linkage-leaf.ll | 176 +++++++ .../CodeGen/PowerPC/pcrel-call-linkage-simple.ll | 42 ++ .../PowerPC/pcrel-call-linkage-with-calls.ll | 521 +++++++++++++++++++++ llvm/test/MC/PowerPC/ppc64-localentry-error1.s | 6 +- llvm/test/MC/PowerPC/ppc64-localentry-error2.s | 6 +- 25 files changed, 950 insertions(+), 35 deletions(-) create mode 100644 llvm/test/CodeGen/PowerPC/pcrel-call-linkage-leaf.ll create mode 100644 llvm/test/CodeGen/PowerPC/pcrel-call-linkage-simple.ll create mode 100644 llvm/test/CodeGen/PowerPC/pcrel-call-linkage-with-calls.ll diff --git a/llvm/include/llvm/BinaryFormat/ELF.h b/llvm/include/llvm/BinaryFormat/ELF.h index 9bf5a3c..1b0412b 100644 --- a/llvm/include/llvm/BinaryFormat/ELF.h +++ b/llvm/include/llvm/BinaryFormat/ELF.h @@ -393,12 +393,6 @@ static inline int64_t decodePPC64LocalEntryOffset(unsigned Other) { unsigned Val = (Other & STO_PPC64_LOCAL_MASK) >> STO_PPC64_LOCAL_BIT; return ((1 << Val) >> 2) << 2; } -static inline unsigned encodePPC64LocalEntryOffset(int64_t Offset) { - unsigned Val = - (Offset >= 4 * 4 ? (Offset >= 8 * 4 ? (Offset >= 16 * 4 ? 6 : 5) : 4) - : (Offset >= 2 * 4 ? 3 : (Offset >= 1 * 4 ? 2 : 0))); - return Val << STO_PPC64_LOCAL_BIT; -} // ELF Relocation types for PPC64 enum { diff --git a/llvm/include/llvm/BinaryFormat/ELFRelocs/PowerPC64.def b/llvm/include/llvm/BinaryFormat/ELFRelocs/PowerPC64.def index 719d0c9..f8c330e 100644 --- a/llvm/include/llvm/BinaryFormat/ELFRelocs/PowerPC64.def +++ b/llvm/include/llvm/BinaryFormat/ELFRelocs/PowerPC64.def @@ -96,6 +96,7 @@ #undef R_PPC64_TPREL16_HIGHA #undef R_PPC64_DTPREL16_HIGH #undef R_PPC64_DTPREL16_HIGHA +#undef R_PPC64_REL24_NOTOC #undef R_PPC64_IRELATIVE #undef R_PPC64_REL16 #undef R_PPC64_REL16_LO @@ -190,6 +191,7 @@ ELF_RELOC(R_PPC64_TPREL16_HIGH, 112) ELF_RELOC(R_PPC64_TPREL16_HIGHA, 113) ELF_RELOC(R_PPC64_DTPREL16_HIGH, 114) ELF_RELOC(R_PPC64_DTPREL16_HIGHA, 115) +ELF_RELOC(R_PPC64_REL24_NOTOC, 116) ELF_RELOC(R_PPC64_IRELATIVE, 248) ELF_RELOC(R_PPC64_REL16, 249) ELF_RELOC(R_PPC64_REL16_LO, 250) diff --git a/llvm/include/llvm/MC/MCExpr.h b/llvm/include/llvm/MC/MCExpr.h index c0060c8..386fa9c 100644 --- a/llvm/include/llvm/MC/MCExpr.h +++ b/llvm/include/llvm/MC/MCExpr.h @@ -284,6 +284,7 @@ public: VK_PPC_GOT_TLSLD_HA, // symbol@got@tlsld@ha VK_PPC_TLSLD, // symbol@tlsld VK_PPC_LOCAL, // symbol@local + VK_PPC_NOTOC, // symbol@notoc VK_COFF_IMGREL32, // symbol@imgrel (image-relative) diff --git a/llvm/lib/MC/MCExpr.cpp b/llvm/lib/MC/MCExpr.cpp index 2ddbf94..0918802 100644 --- a/llvm/lib/MC/MCExpr.cpp +++ b/llvm/lib/MC/MCExpr.cpp @@ -319,6 +319,7 @@ StringRef MCSymbolRefExpr::getVariantKindName(VariantKind Kind) { case VK_PPC_GOT_TLSLD_HA: return "got@tlsld@ha"; case VK_PPC_TLSLD: return "tlsld"; case VK_PPC_LOCAL: return "local"; + case VK_PPC_NOTOC: return "notoc"; case VK_COFF_IMGREL32: return "IMGREL"; case VK_Hexagon_LO16: return "LO16"; case VK_Hexagon_HI16: return "HI16"; @@ -432,6 +433,7 @@ MCSymbolRefExpr::getVariantKindForName(StringRef Name) { .Case("got@tlsld@l", VK_PPC_GOT_TLSLD_LO) .Case("got@tlsld@h", VK_PPC_GOT_TLSLD_HI) .Case("got@tlsld@ha", VK_PPC_GOT_TLSLD_HA) + .Case("notoc", VK_PPC_NOTOC) .Case("gdgot", VK_Hexagon_GD_GOT) .Case("gdplt", VK_Hexagon_GD_PLT) .Case("iegot", VK_Hexagon_IE_GOT) diff --git a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp index 7320c1e..cb7d429 100644 --- a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp +++ b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp @@ -39,6 +39,7 @@ static uint64_t adjustFixupValue(unsigned Kind, uint64_t Value) { return Value & 0xfffc; case PPC::fixup_ppc_br24: case PPC::fixup_ppc_br24abs: + case PPC::fixup_ppc_br24_notoc: return Value & 0x3fffffc; case PPC::fixup_ppc_half16: return Value & 0xffff; @@ -62,6 +63,7 @@ static unsigned getFixupKindNumBytes(unsigned Kind) { case PPC::fixup_ppc_brcond14abs: case PPC::fixup_ppc_br24: case PPC::fixup_ppc_br24abs: + case PPC::fixup_ppc_br24_notoc: return 4; case FK_Data_8: return 8; @@ -88,6 +90,7 @@ public: const static MCFixupKindInfo InfosBE[PPC::NumTargetFixupKinds] = { // name offset bits flags { "fixup_ppc_br24", 6, 24, MCFixupKindInfo::FKF_IsPCRel }, + { "fixup_ppc_br24_notoc", 6, 24, MCFixupKindInfo::FKF_IsPCRel }, { "fixup_ppc_brcond14", 16, 14, MCFixupKindInfo::FKF_IsPCRel }, { "fixup_ppc_br24abs", 6, 24, 0 }, { "fixup_ppc_brcond14abs", 16, 14, 0 }, @@ -98,6 +101,7 @@ public: const static MCFixupKindInfo InfosLE[PPC::NumTargetFixupKinds] = { // name offset bits flags { "fixup_ppc_br24", 2, 24, MCFixupKindInfo::FKF_IsPCRel }, + { "fixup_ppc_br24_notoc", 2, 24, MCFixupKindInfo::FKF_IsPCRel }, { "fixup_ppc_brcond14", 2, 14, MCFixupKindInfo::FKF_IsPCRel }, { "fixup_ppc_br24abs", 2, 24, 0 }, { "fixup_ppc_brcond14abs", 2, 14, 0 }, @@ -151,6 +155,7 @@ public: return Kind >= FirstLiteralRelocationKind; case PPC::fixup_ppc_br24: case PPC::fixup_ppc_br24abs: + case PPC::fixup_ppc_br24_notoc: // If the target symbol has a local entry point we must not attempt // to resolve the fixup directly. Emit a relocation and leave // resolution of the final target address to the linker. diff --git a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp index c037962..99a8207 100644 --- a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp +++ b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp @@ -86,6 +86,7 @@ unsigned PPCELFObjectWriter::getRelocType(MCContext &Ctx, const MCValue &Target, llvm_unreachable("Unimplemented"); case PPC::fixup_ppc_br24: case PPC::fixup_ppc_br24abs: + case PPC::fixup_ppc_br24_notoc: switch (Modifier) { default: llvm_unreachable("Unsupported Modifier"); case MCSymbolRefExpr::VK_None: @@ -97,6 +98,9 @@ unsigned PPCELFObjectWriter::getRelocType(MCContext &Ctx, const MCValue &Target, case MCSymbolRefExpr::VK_PPC_LOCAL: Type = ELF::R_PPC_LOCAL24PC; break; + case MCSymbolRefExpr::VK_PPC_NOTOC: + Type = ELF::R_PPC64_REL24_NOTOC; + break; } break; case PPC::fixup_ppc_brcond14: @@ -431,6 +435,7 @@ bool PPCELFObjectWriter::needsRelocateWithSymbol(const MCSymbol &Sym, return false; case ELF::R_PPC_REL24: + case ELF::R_PPC64_REL24_NOTOC: // If the target symbol has a local entry point, we must keep the // target symbol to preserve that information for the linker. // The "other" values are stored in the last 6 bits of the second byte. diff --git a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h index 84548978..efa7f0a 100644 --- a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h +++ b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h @@ -19,6 +19,10 @@ enum Fixups { // 24-bit PC relative relocation for direct branches like 'b' and 'bl'. fixup_ppc_br24 = FirstTargetFixupKind, + // 24-bit PC relative relocation for direct branches like 'b' and 'bl' where + // the caller does not use the TOC. + fixup_ppc_br24_notoc, + /// 14-bit PC relative relocation for conditional branches. fixup_ppc_brcond14, diff --git a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp index 672f6e0..06df3bd 100644 --- a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp +++ b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp @@ -48,7 +48,9 @@ getDirectBrEncoding(const MCInst &MI, unsigned OpNo, // Add a fixup for the branch target. Fixups.push_back(MCFixup::create(0, MO.getExpr(), - (MCFixupKind)PPC::fixup_ppc_br24)); + ((MI.getOpcode() == PPC::BL8_NOTOC) + ? (MCFixupKind)PPC::fixup_ppc_br24_notoc + : (MCFixupKind)PPC::fixup_ppc_br24))); return 0; } diff --git a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp index 9d3ecc0..c85b08a 100644 --- a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp +++ b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp @@ -179,13 +179,9 @@ public: void emitLocalEntry(MCSymbolELF *S, const MCExpr *LocalOffset) override { MCAssembler &MCA = getStreamer().getAssembler(); - int64_t Res; - if (!LocalOffset->evaluateAsAbsolute(Res, MCA)) - report_fatal_error(".localentry expression must be absolute."); - - unsigned Encoded = ELF::encodePPC64LocalEntryOffset(Res); - if (Res != ELF::decodePPC64LocalEntryOffset(Encoded)) - report_fatal_error(".localentry expression cannot be encoded."); + // encodePPC64LocalEntryOffset will report an error if it cannot + // encode LocalOffset. + unsigned Encoded = encodePPC64LocalEntryOffset(LocalOffset); unsigned Other = S->getOther(); Other &= ~ELF::STO_PPC64_LOCAL_MASK; @@ -230,6 +226,31 @@ private: D->setOther(Other); return true; } + + unsigned encodePPC64LocalEntryOffset(const MCExpr *LocalOffset) { + MCAssembler &MCA = getStreamer().getAssembler(); + int64_t Offset; + if (!LocalOffset->evaluateAsAbsolute(Offset, MCA)) + MCA.getContext().reportFatalError( + LocalOffset->getLoc(), ".localentry expression must be absolute."); + + switch (Offset) { + default: + MCA.getContext().reportFatalError( + LocalOffset->getLoc(), + ".localentry expression is not a valid power of 2."); + case 0: + return 0; + case 1: + return 1 << ELF::STO_PPC64_LOCAL_BIT; + case 4: + case 8: + case 16: + case 32: + case 64: + return (int)Log2(Offset) << (int)ELF::STO_PPC64_LOCAL_BIT; + } + } }; class PPCTargetMachOStreamer : public PPCTargetStreamer { diff --git a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp index 0a256cb..e0bbb8f 100644 --- a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp +++ b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp @@ -1460,14 +1460,16 @@ void PPCLinuxAsmPrinter::emitFunctionBodyStart() { // // This ensures we have r2 set up correctly while executing the function // body, no matter which entry point is called. - if (Subtarget->isELFv2ABI() - // Only do all that if the function uses r2 in the first place. - && !MF->getRegInfo().use_empty(PPC::X2)) { + const PPCFunctionInfo *PPCFI = MF->getInfo(); + const bool UsesX2OrR2 = !MF->getRegInfo().use_empty(PPC::X2) || + !MF->getRegInfo().use_empty(PPC::R2); + // Only do all that if the function uses R2 as the TOC pointer + // in the first place. We don't need the global entry point if the + // function uses R2 as an allocatable register. + if (Subtarget->isELFv2ABI() && UsesX2OrR2 && PPCFI->usesTOCBasePtr()) { // Note: The logic here must be synchronized with the code in the // branch-selection pass which sets the offset of the first block in the // function. This matters because it affects the alignment. - const PPCFunctionInfo *PPCFI = MF->getInfo(); - MCSymbol *GlobalEntryLabel = PPCFI->getGlobalEPSymbol(); OutStreamer->emitLabel(GlobalEntryLabel); const MCSymbolRefExpr *GlobalEntryLabelExp = @@ -1519,6 +1521,35 @@ void PPCLinuxAsmPrinter::emitFunctionBodyStart() { if (TS) TS->emitLocalEntry(cast(CurrentFnSym), LocalOffsetExp); + } else if (Subtarget->isELFv2ABI()) { + // When generating the entry point for a function we have a few scenarios + // based on whether or not that function uses R2 and whether or not that + // function makes calls (or is a leaf function). + // 1) A leaf function that does not use R2 (or treats it as callee-saved + // and preserves it). In this case st_other=0 and both + // the local and global entry points for the function are the same. + // No special entry point code is required. + // 2) A function uses the TOC pointer R2. This function may or may not have + // calls. In this case st_other=[2,6] and the global and local entry + // points are different. Code to correctly setup the TOC pointer in R2 + // is put between the global and local entry points. This case is + // covered by the if statatement above. + // 3) A function does not use the TOC pointer R2 but does have calls. + // In this case st_other=1 since we do not know whether or not any + // of the callees clobber R2. This case is dealt with in this else if + // block. + // 4) The function does not use the TOC pointer but R2 is used inside + // the function. In this case st_other=1 once again. + // 5) This function uses inline asm. We mark R2 as reserved if the function + // has inline asm so we have to assume that it may be used. + if (MF->getFrameInfo().hasCalls() || MF->hasInlineAsm() || + (!PPCFI->usesTOCBasePtr() && UsesX2OrR2)) { + PPCTargetStreamer *TS = + static_cast(OutStreamer->getTargetStreamer()); + if (TS) + TS->emitLocalEntry(cast(CurrentFnSym), + MCConstantExpr::create(1, OutContext)); + } } } diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp index 3d84419..db81a6c 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -1404,6 +1404,7 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const { case PPCISD::SRA_ADDZE: return "PPCISD::SRA_ADDZE"; case PPCISD::CALL: return "PPCISD::CALL"; case PPCISD::CALL_NOP: return "PPCISD::CALL_NOP"; + case PPCISD::CALL_NOTOC: return "PPCISD::CALL_NOTOC"; case PPCISD::MTCTR: return "PPCISD::MTCTR"; case PPCISD::BCTRL: return "PPCISD::BCTRL"; case PPCISD::BCTRL_LOAD_TOC: return "PPCISD::BCTRL_LOAD_TOC"; @@ -4689,6 +4690,16 @@ PPCTargetLowering::IsEligibleForTailCallOptimization_64SVR4( SelectionDAG& DAG) const { bool TailCallOpt = getTargetMachine().Options.GuaranteedTailCallOpt; + // FIXME: Tail calls are currently disabled when using PC Relative addressing. + // The issue is that PC Relative is only partially implemented and so there + // is currently a mix of functions that require the TOC and functions that do + // not require it. If we have A calls B calls C and both A and B require the + // TOC and C does not and is marked as clobbering R2 then it is not safe for + // B to tail call C. Since we do not have the information of whether or not + // a funciton needs to use the TOC here in this function we need to be + // conservatively safe and disable all tail calls for now. + if (Subtarget.isUsingPCRelativeCalls()) return false; + if (DisableSCO && !TailCallOpt) return false; // Variadic argument functions are not supported. @@ -5085,6 +5096,17 @@ static unsigned getCallOpcode(PPCTargetLowering::CallFlags CFlags, return PPCISD::BCTRL; } + // FIXME: At this moment indirect calls are treated ahead of the + // PC Relative condition because binaries can still contain a possible + // mix of functions that use a TOC and functions that do not use a TOC. + // Once the PC Relative feature is complete this condition should be moved + // up ahead of the indirect calls and should return a PPCISD::BCTRL for + // that case. + if (Subtarget.isUsingPCRelativeCalls()) { + assert(Subtarget.is64BitELFABI() && "PC Relative is only on ELF ABI."); + return PPCISD::CALL_NOTOC; + } + // The ABIs that maintain a TOC pointer accross calls need to have a nop // immediately following the call instruction if the caller and callee may // have different TOC bases. At link time if the linker determines the calls @@ -5094,8 +5116,8 @@ static unsigned getCallOpcode(PPCTargetLowering::CallFlags CFlags, // will rewrite the nop to be a load of the TOC pointer from the linkage area // into gpr2. if (Subtarget.isAIXABI() || Subtarget.is64BitELFABI()) - return callsShareTOCBase(&Caller, Callee, TM) ? PPCISD::CALL - : PPCISD::CALL_NOP; + return callsShareTOCBase(&Caller, Callee, TM) ? PPCISD::CALL + : PPCISD::CALL_NOP; return PPCISD::CALL; } @@ -5372,7 +5394,7 @@ buildCallOperands(SmallVectorImpl &Ops, // no way to mark dependencies as implicit here. // We will add the R2/X2 dependency in EmitInstrWithCustomInserter. if ((Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) && - !CFlags.IsPatchPoint) + !CFlags.IsPatchPoint && !Subtarget.isUsingPCRelativeCalls()) Ops.push_back(DAG.getRegister(Subtarget.getTOCPointerRegister(), RegVT)); // Add implicit use of CR bit 6 for 32-bit SVR4 vararg calls @@ -5398,7 +5420,8 @@ SDValue PPCTargetLowering::FinishCall( unsigned NumBytes, const SmallVectorImpl &Ins, SmallVectorImpl &InVals, ImmutableCallSite CS) const { - if (Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) + if ((Subtarget.is64BitELFABI() && !Subtarget.isUsingPCRelativeCalls()) || + Subtarget.isAIXABI()) setUsesTOCBasePtr(DAG); unsigned CallOpc = @@ -11373,7 +11396,8 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, if (MI.getOpcode() == TargetOpcode::STACKMAP || MI.getOpcode() == TargetOpcode::PATCHPOINT) { if (Subtarget.is64BitELFABI() && - MI.getOpcode() == TargetOpcode::PATCHPOINT) { + MI.getOpcode() == TargetOpcode::PATCHPOINT && + !Subtarget.isUsingPCRelativeCalls()) { // Call lowering should have added an r2 operand to indicate a dependence // on the TOC base pointer value. It can't however, because there is no // way to mark the dependence as implicit there, and so the stackmap code diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.h b/llvm/lib/Target/PowerPC/PPCISelLowering.h index 4107d2b..3a1001c 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.h +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.h @@ -165,9 +165,11 @@ namespace llvm { /// CALL - A direct function call. /// CALL_NOP is a call with the special NOP which follows 64-bit + /// CALL_NOTOC the caller does not use the TOC. /// SVR4 calls and 32-bit/64-bit AIX calls. CALL, CALL_NOP, + CALL_NOTOC, /// CHAIN,FLAG = MTCTR(VAL, CHAIN[, INFLAG]) - Directly corresponds to a /// MTCTR instruction. diff --git a/llvm/lib/Target/PowerPC/PPCInstr64Bit.td b/llvm/lib/Target/PowerPC/PPCInstr64Bit.td index 542225c..0734361 100644 --- a/llvm/lib/Target/PowerPC/PPCInstr64Bit.td +++ b/llvm/lib/Target/PowerPC/PPCInstr64Bit.td @@ -140,6 +140,15 @@ let isCall = 1, PPC970_Unit = 7, Defs = [LR8] in { (outs), (ins abscalltarget:$func), "bla $func\n\tnop", IIC_BrB, [(PPCcall_nop (i64 imm:$func))]>; + let Predicates = [PCRelativeMemops] in { + // BL8_NOTOC means that the caller does not use the TOC pointer and if + // it does use R2 then it is just a caller saved register. Therefore it is + // safe to emit only the bl and not the nop for this instruction. The + // linker will not try to restore R2 after the call. + def BL8_NOTOC : IForm_and_DForm_4_zero<18, 0, 1, 24, (outs), + (ins calltarget:$func), + "bl $func", IIC_BrB, []>; + } } let Uses = [CTR8, RM] in { let isPredicable = 1 in @@ -194,6 +203,11 @@ def : Pat<(PPCcall (i64 texternalsym:$dst)), def : Pat<(PPCcall_nop (i64 texternalsym:$dst)), (BL8_NOP texternalsym:$dst)>; +def : Pat<(PPCcall_notoc (i64 tglobaladdr:$dst)), + (BL8_NOTOC tglobaladdr:$dst)>; +def : Pat<(PPCcall_notoc (i64 texternalsym:$dst)), + (BL8_NOTOC texternalsym:$dst)>; + // Calls for AIX def : Pat<(PPCcall (i64 mcsym:$dst)), (BL8 mcsym:$dst)>; diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.td b/llvm/lib/Target/PowerPC/PPCInstrInfo.td index 189dd77..7a7128d 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrInfo.td +++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.td @@ -252,6 +252,9 @@ def PPCcall : SDNode<"PPCISD::CALL", SDT_PPCCall, def PPCcall_nop : SDNode<"PPCISD::CALL_NOP", SDT_PPCCall, [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, SDNPVariadic]>; +def PPCcall_notoc : SDNode<"PPCISD::CALL_NOTOC", SDT_PPCCall, + [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, + SDNPVariadic]>; def PPCmtctr : SDNode<"PPCISD::MTCTR", SDT_PPCCall, [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>; def PPCbctrl : SDNode<"PPCISD::BCTRL", SDTNone, @@ -994,6 +997,7 @@ def HasBPERMD : Predicate<"PPCSubTarget->hasBPERMD()">; def HasExtDiv : Predicate<"PPCSubTarget->hasExtDiv()">; def IsISA3_0 : Predicate<"PPCSubTarget->isISA3_0()">; def HasFPU : Predicate<"PPCSubTarget->hasFPU()">; +def PCRelativeMemops : Predicate<"PPCSubTarget->hasPCRelativeMemops()">; //===----------------------------------------------------------------------===// // PowerPC Multiclass Definitions. diff --git a/llvm/lib/Target/PowerPC/PPCMCInstLower.cpp b/llvm/lib/Target/PowerPC/PPCMCInstLower.cpp index e9856d4..253f71a 100644 --- a/llvm/lib/Target/PowerPC/PPCMCInstLower.cpp +++ b/llvm/lib/Target/PowerPC/PPCMCInstLower.cpp @@ -81,7 +81,12 @@ static MCOperand GetSymbolRef(const MachineOperand &MO, const MCSymbol *Symbol, if (MO.getTargetFlags() == PPCII::MO_PLT) RefKind = MCSymbolRefExpr::VK_PLT; - const MachineFunction *MF = MO.getParent()->getParent()->getParent(); + const MachineInstr *MI = MO.getParent(); + + if (MI->getOpcode() == PPC::BL8_NOTOC) + RefKind = MCSymbolRefExpr::VK_PPC_NOTOC; + + const MachineFunction *MF = MI->getMF(); const Module *M = MF->getFunction().getParent(); const PPCSubtarget *Subtarget = &(MF->getSubtarget()); const TargetMachine &TM = Printer.TM; diff --git a/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp b/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp index 70d18ed..ff2329a 100644 --- a/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp +++ b/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp @@ -57,6 +57,8 @@ STATISTIC(NumRotatesCollapsed, "Number of pairs of rotate left, clear left/right collapsed"); STATISTIC(NumEXTSWAndSLDICombined, "Number of pairs of EXTSW and SLDI combined as EXTSWSLI"); +STATISTIC(NumX2FoundForPCRel, "Number of times the X2 TOC pointer has been " + "found when PC relative NOTOC is being used."); static cl::opt FixedPointRegToImm("ppc-reg-to-imm-fixed-point", cl::Hidden, cl::init(true), @@ -99,6 +101,11 @@ private: // Initialize class variables. void initialize(MachineFunction &MFParm); + // Perform peepholes that cannot be skipped. + // Some peephole simplifications are required for correctness and will not + // be skipped even if skipFunction(MF.getFunction()) returns true. + void unskipableSimplifyCode(void); + // Perform peepholes. bool simplifyCode(void); @@ -124,9 +131,14 @@ public: // Main entry point for this pass. bool runOnMachineFunction(MachineFunction &MF) override { + initialize(MF); + // FIXME: This introduces another complete traversal of the instructions + // in the function in the common case (function is not skipped). Although + // this is less than ideal for compile time, this code will go away once + // our PC-Rel implementation is complete. + unskipableSimplifyCode(); if (skipFunction(MF.getFunction())) return false; - initialize(MF); return simplifyCode(); } }; @@ -260,6 +272,41 @@ void PPCMIPeephole::UpdateTOCSaves( TOCSaves[MI] = Keep; } +void PPCMIPeephole::unskipableSimplifyCode(void) { + // If this function has no uses of R2 there is nothing to do here. + if(MF->getRegInfo().use_empty(PPC::X2)) + return; + + // This is only for PCRelative calls. + if (!MF->getSubtarget().isUsingPCRelativeCalls()) { + return; + } + + // This function has R2 so we need to mark an implicit def for it. + PPCFunctionInfo *FuncInfo = MF->getInfo(); + FuncInfo->setUsesTOCBasePtr(); + for (MachineBasicBlock &MBB : *MF) { + for (MachineInstr &MI : MBB) { + if (MI.getOpcode() == PPC::BL8_NOTOC) { + // At this point the BL8_NOTOC instruction is not really safe because it + // assumes that the caller does not need the TOC. It will be safe + // later once the full PC relative implementation is complete but it is + // not now. + // Here we are looking for X2. Since this is Pre-RA the only uses of X2 + // would indicate the use of the TOC. We want to detect all uses of the + // TOC. Once the work is done we should not see any uses of the TOC. + // TODO: Once the implementation is complete this should be turned into + // an assert + Register Reg = MF->getSubtarget().getTOCPointerRegister(); + MachineOperand MO = MachineOperand::CreateReg(Reg, false, true); + MI.addOperand(*MF, MO); + MI.setDesc(TII->get(PPC::BL8_NOP)); + ++NumX2FoundForPCRel; + } + } + } +} + // Perform peephole optimizations. bool PPCMIPeephole::simplifyCode(void) { bool Simplified = false; diff --git a/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp b/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp index 4fcfb79..ae1aa66 100644 --- a/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp +++ b/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp @@ -153,7 +153,14 @@ PPCRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { return CSR_SRV464_TLS_PE_SaveList; // On PPC64, we might need to save r2 (but only if it is not reserved). - bool SaveR2 = MF->getRegInfo().isAllocatable(PPC::X2); + // We do not need to treat R2 as callee-saved when using PC-Relative calls + // because any direct uses of R2 will cause it to be reserved. If the function + // is a leaf or the only uses of R2 are implicit uses for calls, the calls + // will use the @notoc relocation which will cause this function to set the + // st_other bit to 1, thereby communicating to its caller that it arbitrarily + // clobbers the TOC. + bool SaveR2 = MF->getRegInfo().isAllocatable(PPC::X2) && + !Subtarget.isUsingPCRelativeCalls(); // Cold calling convention CSRs. if (MF->getFunction().getCallingConv() == CallingConv::Cold) { diff --git a/llvm/lib/Target/PowerPC/PPCScheduleP9.td b/llvm/lib/Target/PowerPC/PPCScheduleP9.td index 4f00fb1..e779abe 100644 --- a/llvm/lib/Target/PowerPC/PPCScheduleP9.td +++ b/llvm/lib/Target/PowerPC/PPCScheduleP9.td @@ -41,8 +41,8 @@ def P9Model : SchedMachineModel { let CompleteModel = 1; // Do not support QPX (Quad Processing eXtension), SPE (Signal Procesing - // Engine) or prefixed instructions on Power 9. - let UnsupportedFeatures = [HasQPX, HasSPE, PrefixInstrs]; + // Engine), prefixed instructions on Power 9 or PC relative mem ops. + let UnsupportedFeatures = [HasQPX, HasSPE, PrefixInstrs, PCRelativeMemops]; } diff --git a/llvm/lib/Target/PowerPC/PPCSubtarget.cpp b/llvm/lib/Target/PowerPC/PPCSubtarget.cpp index b0c0f30..7819874 100644 --- a/llvm/lib/Target/PowerPC/PPCSubtarget.cpp +++ b/llvm/lib/Target/PowerPC/PPCSubtarget.cpp @@ -227,3 +227,8 @@ bool PPCSubtarget::isGVIndirectSymbol(const GlobalValue *GV) const { bool PPCSubtarget::isELFv2ABI() const { return TM.isELFv2ABI(); } bool PPCSubtarget::isPPC64() const { return TM.isPPC64(); } + +bool PPCSubtarget::isUsingPCRelativeCalls() const { + return isPPC64() && hasPCRelativeMemops() && isELFv2ABI() && + CodeModel::Medium == getTargetMachine().getCodeModel(); +} diff --git a/llvm/lib/Target/PowerPC/PPCSubtarget.h b/llvm/lib/Target/PowerPC/PPCSubtarget.h index be061d9c..b7b09319 100644 --- a/llvm/lib/Target/PowerPC/PPCSubtarget.h +++ b/llvm/lib/Target/PowerPC/PPCSubtarget.h @@ -333,6 +333,7 @@ public: bool is64BitELFABI() const { return isSVR4ABI() && isPPC64(); } bool is32BitELFABI() const { return isSVR4ABI() && !isPPC64(); } + bool isUsingPCRelativeCalls() const; /// Originally, this function return hasISEL(). Now we always enable it, /// but may expand the ISEL instruction later. diff --git a/llvm/test/CodeGen/PowerPC/pcrel-call-linkage-leaf.ll b/llvm/test/CodeGen/PowerPC/pcrel-call-linkage-leaf.ll new file mode 100644 index 0000000..01024d4 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/pcrel-call-linkage-leaf.ll @@ -0,0 +1,176 @@ +; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \ +; RUN: -mcpu=future -ppc-asm-full-reg-names < %s \ +; RUN: | FileCheck %s --check-prefix=CHECK-S + +@global_int = common dso_local local_unnamed_addr global i32 0, align 4 + +define dso_local signext i32 @NoTOC() local_unnamed_addr { +; CHECK-S-LABEL: NoTOC: +; CHECK-S-NOT: .localentry +; CHECK-S: li r3, 42 +; CHECK-S-NEXT: blr +entry: + ret i32 42 +} + +define dso_local signext i32 @AsmClobberX2(i32 signext %a, i32 signext %b) local_unnamed_addr { +; CHECK-S-LABEL: AsmClobberX2: +; CHECK-S: .localentry AsmClobberX2, 1 +; CHECK-S: add r3, r4, r3 +; CHECK-S: #APP +; CHECK-S-NEXT: nop +; CHECK-S-NEXT: #NO_APP +; CHECK-S: blr +entry: + %add = add nsw i32 %b, %a + tail call void asm sideeffect "nop", "~{r2}"() + ret i32 %add +} + +; FIXME: This is actually a test case that shows a bug. On power9 and earlier +; this test should not compile. On later CPUs (like this test) the @toc +; should be replaced with @pcrel and we won't need R2 and so the problem +; goes away. +define dso_local signext i32 @AsmClobberX2WithTOC(i32 signext %a, i32 signext %b) local_unnamed_addr { +; CHECK-S-LABEL: AsmClobberX2WithTOC: +; CHECK-S: addis r2, r12, .TOC.-.Lfunc_gep2@ha +; CHECK-S-NEXT: addi r2, r2, .TOC.-.Lfunc_gep2@l +; CHECK-S: .localentry AsmClobberX2WithTOC, .Lfunc_lep2-.Lfunc_gep2 +; CHECK-S: #APP +; CHECK-S-NEXT: li r2, 0 +; CHECK-S-NEXT: #NO_APP +; CHECK-S-NEXT: addis r5, r2, global_int@toc@ha +; CHECK-S-NEXT: lwz r5, global_int@toc@l(r5) +; CHECK-S-NEXT: add r3, r4, r3 +; CHECK-S-NEXT: add r3, r3, r5 +; CHECK-S-NEXT: extsw r3, r3 +; CHECK-S-NEXT: blr +entry: + %add = add nsw i32 %b, %a + tail call void asm sideeffect "li 2, 0", "~{r2}"() + %0 = load i32, i32* @global_int, align 4 + %add1 = add nsw i32 %add, %0 + ret i32 %add1 +} + +define dso_local signext i32 @AsmClobberX5(i32 signext %a, i32 signext %b) local_unnamed_addr { +; CHECK-S-LABEL: AsmClobberX5: +; CHECK-S: .localentry AsmClobberX5, 1 +; CHECK-S-NEXT: # %bb.0: # %entry +; CHECK-S-NEXT: add r3, r4, r3 +; CHECK-S-NEXT: extsw r3, r3 +; CHECK-S-NEXT: #APP +; CHECK-S-NEXT: nop +; CHECK-S-NEXT: #NO_APP +; CHECK-S-NEXT: blr +entry: + %add = add nsw i32 %b, %a + tail call void asm sideeffect "nop", "~{r5}"() + ret i32 %add +} + +; Clobber all GPRs except R2. +define dso_local signext i32 @AsmClobberNotR2(i32 signext %a, i32 signext %b) local_unnamed_addr { +; CHECK-S-LABEL: AsmClobberNotR2: +; CHECK-S: .localentry AsmClobberNotR2, 1 +; CHECK-S: add r3, r4, r3 +; CHECK-S: stw r3, -148(r1) # 4-byte Folded Spill +; CHECK-S-NEXT: #APP +; CHECK-S-NEXT: nop +; CHECK-S-NEXT: #NO_APP +; CHECK-S-NEXT: lwz r3, -148(r1) # 4-byte Folded Reload +; CHECK-S: blr +entry: + %add = add nsw i32 %b, %a + tail call void asm sideeffect "nop", "~{r0},~{r1},~{r3},~{r4},~{r5},~{r6},~{r7},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15},~{r16},~{r17},~{r18},~{r19},~{r20},~{r21},~{r22},~{r23},~{r24},~{r25},~{r26},~{r27},~{r28},~{r29},~{r30},~{r31}"() + ret i32 %add +} + +; Increase register pressure enough to force the register allocator to +; make use of R2. +define dso_local signext i32 @X2IsCallerSaved(i32 signext %a, i32 signext %b, i32 signext %c, i32 signext %d, i32 signext %e, i32 signext %f, i32 signext %g, i32 signext %h) local_unnamed_addr { +; CHECK-S-LABEL: X2IsCallerSaved: +; CHECK-S: .localentry X2IsCallerSaved, 1 +; CHECK-S-NEXT: # %bb.0: # %entry +; CHECK-S-NEXT: std r29, -24(r1) # 8-byte Folded Spill +; CHECK-S-NEXT: add r11, r4, r3 +; CHECK-S-NEXT: subf r29, r9, r8 +; CHECK-S-NEXT: add r9, r10, r9 +; CHECK-S-NEXT: subf r10, r3, r10 +; CHECK-S-NEXT: mullw r3, r4, r3 +; CHECK-S-NEXT: mullw r3, r3, r11 +; CHECK-S-NEXT: mullw r3, r3, r5 +; CHECK-S-NEXT: subf r12, r5, r4 +; CHECK-S-NEXT: mullw r3, r3, r6 +; CHECK-S-NEXT: add r0, r6, r5 +; CHECK-S-NEXT: mullw r3, r3, r12 +; CHECK-S-NEXT: mullw r3, r3, r0 +; CHECK-S-NEXT: mullw r3, r3, r7 +; CHECK-S-NEXT: subf r2, r7, r6 +; CHECK-S-NEXT: mullw r3, r3, r8 +; CHECK-S-NEXT: std r30, -16(r1) # 8-byte Folded Spill +; CHECK-S-NEXT: add r30, r8, r7 +; CHECK-S-NEXT: mullw r3, r3, r2 +; CHECK-S-NEXT: mullw r3, r3, r30 +; CHECK-S-NEXT: mullw r3, r3, r29 +; CHECK-S-NEXT: mullw r3, r3, r9 +; CHECK-S-NEXT: ld r30, -16(r1) # 8-byte Folded Reload +; CHECK-S-NEXT: ld r29, -24(r1) # 8-byte Folded Reload +; CHECK-S-NEXT: mullw r3, r3, r10 +; CHECK-S-NEXT: extsw r3, r3 +; CHECK-S-NEXT: blr +entry: + %add = add nsw i32 %b, %a + %sub = sub nsw i32 %b, %c + %add1 = add nsw i32 %d, %c + %sub2 = sub nsw i32 %d, %e + %add3 = add nsw i32 %f, %e + %sub4 = sub nsw i32 %f, %g + %add5 = add nsw i32 %h, %g + %sub6 = sub nsw i32 %h, %a + %mul = mul i32 %b, %a + %mul7 = mul i32 %mul, %add + %mul8 = mul i32 %mul7, %c + %mul9 = mul i32 %mul8, %d + %mul10 = mul i32 %mul9, %sub + %mul11 = mul i32 %mul10, %add1 + %mul12 = mul i32 %mul11, %e + %mul13 = mul i32 %mul12, %f + %mul14 = mul i32 %mul13, %sub2 + %mul15 = mul i32 %mul14, %add3 + %mul16 = mul i32 %mul15, %sub4 + %mul17 = mul i32 %mul16, %add5 + %mul18 = mul i32 %mul17, %sub6 + ret i32 %mul18 +} + + +define dso_local signext i32 @UsesX2AsTOC() local_unnamed_addr { +; CHECK-S-LABEL: UsesX2AsTOC: +; CHECK-S: addis r2, r12, .TOC.-.Lfunc_gep6@ha +; CHECK-S-NEXT: addi r2, r2, .TOC.-.Lfunc_gep6@l +; CHECK-S: .localentry UsesX2AsTOC, .Lfunc_lep6-.Lfunc_gep6 +; CHECK-S: # %bb.0: # %entry +; CHECK-S-NEXT: addis r3, r2, global_int@toc@ha +; CHECK-S-NEXT: lwa r3, global_int@toc@l(r3) +; CHECK-S-NEXT: blr +entry: + %0 = load i32, i32* @global_int, align 4 + ret i32 %0 +} + + +define dso_local double @UsesX2AsConstPoolTOC() local_unnamed_addr { +; CHECK-S-LABEL: UsesX2AsConstPoolTOC: +; CHECK-S: addis r2, r12, .TOC.-.Lfunc_gep7@ha +; CHECK-S-NEXT: addi r2, r2, .TOC.-.Lfunc_gep7@l +; CHECK-S: .localentry UsesX2AsConstPoolTOC, .Lfunc_lep7-.Lfunc_gep7 +; CHECK-S: # %bb.0: # %entry +; CHECK-S-NEXT: addis r3, r2, .LCPI7_0@toc@ha +; CHECK-S-NEXT: lfd f1, .LCPI7_0@toc@l(r3) +; CHECK-S-NEXT: blr +entry: + ret double 0x404124A4EBDD334C +} + + diff --git a/llvm/test/CodeGen/PowerPC/pcrel-call-linkage-simple.ll b/llvm/test/CodeGen/PowerPC/pcrel-call-linkage-simple.ll new file mode 100644 index 0000000..c145b5c --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/pcrel-call-linkage-simple.ll @@ -0,0 +1,42 @@ +; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \ +; RUN: -mcpu=future -ppc-asm-full-reg-names < %s \ +; RUN: | FileCheck %s --check-prefix=CHECK-S +; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \ +; RUN: -mcpu=future -ppc-asm-full-reg-names --filetype=obj < %s | \ +; RUN: llvm-objdump -dr - | FileCheck %s --check-prefix=CHECK-O + + +; CHECK-S-LABEL: caller +; CHECK-S: bl callee@notoc +; CHECK-S: blr + +; CHECK-O-LABEL: caller +; CHECK-O: bl +; CHECK-O-NEXT: R_PPC64_REL24_NOTOC callee +; CHECK-O: blr +define dso_local signext i32 @caller() local_unnamed_addr { +entry: + %call = tail call signext i32 bitcast (i32 (...)* @callee to i32 ()*)() + ret i32 %call +} + +declare signext i32 @callee(...) local_unnamed_addr + + +; Some calls can be considered Extrnal Symbols. +; CHECK-S-LABEL: ExternalSymbol +; CHECK-S: bl memcpy@notoc +; CHECK-S: blr + +; CHECK-O-LABEL: ExternalSymbol +; CHECK-O: bl +; CHECK-O-NEXT: R_PPC64_REL24_NOTOC memcpy +; CHECK-O: blr +define dso_local void @ExternalSymbol(i8* nocapture %out, i8* nocapture readonly %in, i64 %num) local_unnamed_addr { +entry: + tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 %out, i8* align 1 %in, i64 %num, i1 false) + ret void +} + +declare void @llvm.memcpy.p0i8.p0i8.i64(i8* noalias nocapture writeonly, i8* noalias nocapture readonly, i64, i1 immarg) + diff --git a/llvm/test/CodeGen/PowerPC/pcrel-call-linkage-with-calls.ll b/llvm/test/CodeGen/PowerPC/pcrel-call-linkage-with-calls.ll new file mode 100644 index 0000000..69b8853 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/pcrel-call-linkage-with-calls.ll @@ -0,0 +1,521 @@ +; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \ +; RUN: -mcpu=future -ppc-asm-full-reg-names < %s \ +; RUN: | FileCheck %s --check-prefix=CHECK-S + +@globalVar = common dso_local local_unnamed_addr global i32 0, align 4 +@externGlobalVar = external local_unnamed_addr global i32, align 4 +@indirectCall = common dso_local local_unnamed_addr global i32 (i32)* null, align 8 + +; This funcion needs to remain as noinline. +; The compiler needs to know this function is local but must be forced to call +; it. The only thing we really need to check here is that st_other=0 and +; so we make sure that there is no .localentry. +define dso_local signext i32 @localCall(i32 signext %a) local_unnamed_addr #0 { +; CHECK-S-LABEL: localCall: +; CHECK-S-NOT: .localentry +; CHECK-S: addi r3, r3, 5 +; CHECK-S-NEXT: extsw r3, r3 +; CHECK-S-NEXT: blr +entry: + %add = add nsw i32 %a, 5 + ret i32 %add +} + +define dso_local signext i32 @DirectCallLocal1(i32 signext %a, i32 signext %b) local_unnamed_addr { +; CHECK-S-LABEL: DirectCallLocal1: +; CHECK-S: addis r2, r12, .TOC.-.Lfunc_gep1@ha +; CHECK-S-NEXT: addi r2, r2, .TOC.-.Lfunc_gep1@l +; CHECK-S: .localentry DirectCallLocal1, .Lfunc_lep1-.Lfunc_gep1 +; CHECK-S: # %bb.0: # %entry +; CHECK-S-NEXT: mflr r0 +; CHECK-S-NEXT: std r0, 16(r1) +; CHECK-S-NEXT: stdu r1, -32(r1) +; CHECK-S-NEXT: .cfi_def_cfa_offset 32 +; CHECK-S-NEXT: .cfi_offset lr, 16 +; CHECK-S-NEXT: add r3, r4, r3 +; CHECK-S-NEXT: extsw r3, r3 +; CHECK-S-NEXT: bl localCall +; CHECK-S-NEXT: nop +; CHECK-S-NEXT: addis r4, r2, globalVar@toc@ha +; CHECK-S-NEXT: lwz r4, globalVar@toc@l(r4) +; CHECK-S-NEXT: mullw r3, r4, r3 +; CHECK-S-NEXT: extsw r3, r3 +; CHECK-S-NEXT: addi r1, r1, 32 +; CHECK-S-NEXT: ld r0, 16(r1) +; CHECK-S-NEXT: mtlr r0 +; CHECK-S-NEXT: blr +entry: + %add = add nsw i32 %b, %a + %call = tail call signext i32 @localCall(i32 signext %add) + %0 = load i32, i32* @globalVar, align 4 + %mul = mul nsw i32 %0, %call + ret i32 %mul +} + +define dso_local signext i32 @DirectCallLocal2(i32 signext %a, i32 signext %b) local_unnamed_addr { +; CHECK-S-LABEL: DirectCallLocal2: +; CHECK-S: addis r2, r12, .TOC.-.Lfunc_gep2@ha +; CHECK-S-NEXT: addi r2, r2, .TOC.-.Lfunc_gep2@l +; CHECK-S: .localentry DirectCallLocal2, .Lfunc_lep2-.Lfunc_gep2 +; CHECK-S: # %bb.0: # %entry +; CHECK-S-NEXT: mflr r0 +; CHECK-S-NEXT: std r0, 16(r1) +; CHECK-S-NEXT: stdu r1, -32(r1) +; CHECK-S-NEXT: .cfi_def_cfa_offset 32 +; CHECK-S-NEXT: .cfi_offset lr, 16 +; CHECK-S-NEXT: add r3, r4, r3 +; CHECK-S-NEXT: extsw r3, r3 +; CHECK-S-NEXT: bl localCall +; CHECK-S-NEXT: nop +; CHECK-S-NEXT: addis r4, r2, .LC0@toc@ha +; CHECK-S-NEXT: ld r4, .LC0@toc@l(r4) +; CHECK-S-NEXT: lwz r4, 0(r4) +; CHECK-S-NEXT: mullw r3, r4, r3 +; CHECK-S-NEXT: extsw r3, r3 +; CHECK-S-NEXT: addi r1, r1, 32 +; CHECK-S-NEXT: ld r0, 16(r1) +; CHECK-S-NEXT: mtlr r0 +; CHECK-S-NEXT: blr +entry: + %add = add nsw i32 %b, %a + %call = tail call signext i32 @localCall(i32 signext %add) + %0 = load i32, i32* @externGlobalVar, align 4 + %mul = mul nsw i32 %0, %call + ret i32 %mul +} + +define dso_local signext i32 @DirectCallLocalNoGlobal(i32 signext %a, i32 signext %b) local_unnamed_addr { +; CHECK-S-LABEL: DirectCallLocalNoGlobal: +; CHECK-S: .localentry DirectCallLocalNoGlobal, 1 +; CHECK-S-NEXT: # %bb.0: # %entry +; CHECK-S-NEXT: mflr r0 +; CHECK-S-NEXT: .cfi_def_cfa_offset 48 +; CHECK-S-NEXT: .cfi_offset lr, 16 +; CHECK-S-NEXT: .cfi_offset r30, -16 +; CHECK-S-NEXT: std r30, -16(r1) # 8-byte Folded Spill +; CHECK-S-NEXT: std r0, 16(r1) +; CHECK-S-NEXT: stdu r1, -48(r1) +; CHECK-S-NEXT: mr r30, r4 +; CHECK-S-NEXT: bl localCall@notoc +; CHECK-S-NEXT: add r3, r3, r30 +; CHECK-S-NEXT: extsw r3, r3 +; CHECK-S-NEXT: addi r1, r1, 48 +; CHECK-S-NEXT: ld r0, 16(r1) +; CHECK-S-NEXT: ld r30, -16(r1) # 8-byte Folded Reload +; CHECK-S-NEXT: mtlr r0 +; CHECK-S-NEXT: blr +entry: + %call = tail call signext i32 @localCall(i32 signext %a) + %add = add nsw i32 %call, %b + ret i32 %add +} + +define dso_local signext i32 @DirectCallExtern1(i32 signext %a, i32 signext %b) local_unnamed_addr { +; CHECK-S-LABEL: DirectCallExtern1: +; CHECK-S: addis r2, r12, .TOC.-.Lfunc_gep4@ha +; CHECK-S-NEXT: addi r2, r2, .TOC.-.Lfunc_gep4@l +; CHECK-S: .localentry DirectCallExtern1, .Lfunc_lep4-.Lfunc_gep4 +; CHECK-S: # %bb.0: # %entry +; CHECK-S-NEXT: mflr r0 +; CHECK-S-NEXT: std r0, 16(r1) +; CHECK-S-NEXT: stdu r1, -32(r1) +; CHECK-S-NEXT: .cfi_def_cfa_offset 32 +; CHECK-S-NEXT: .cfi_offset lr, 16 +; CHECK-S-NEXT: add r3, r4, r3 +; CHECK-S-NEXT: extsw r3, r3 +; CHECK-S-NEXT: bl externCall +; CHECK-S-NEXT: nop +; CHECK-S-NEXT: addis r4, r2, globalVar@toc@ha +; CHECK-S-NEXT: lwz r4, globalVar@toc@l(r4) +; CHECK-S-NEXT: mullw r3, r4, r3 +; CHECK-S-NEXT: extsw r3, r3 +; CHECK-S-NEXT: addi r1, r1, 32 +; CHECK-S-NEXT: ld r0, 16(r1) +; CHECK-S-NEXT: mtlr r0 +; CHECK-S-NEXT: blr +entry: + %add = add nsw i32 %b, %a + %call = tail call signext i32 @externCall(i32 signext %add) + %0 = load i32, i32* @globalVar, align 4 + %mul = mul nsw i32 %0, %call + ret i32 %mul +} + +declare signext i32 @externCall(i32 signext) local_unnamed_addr + +define dso_local signext i32 @DirectCallExtern2(i32 signext %a, i32 signext %b) local_unnamed_addr { +; CHECK-S-LABEL: DirectCallExtern2: +; CHECK-S: addis r2, r12, .TOC.-.Lfunc_gep5@ha +; CHECK-S-NEXT: addi r2, r2, .TOC.-.Lfunc_gep5@l +; CHECK-S: .localentry DirectCallExtern2, .Lfunc_lep5-.Lfunc_gep5 +; CHECK-S: # %bb.0: # %entry +; CHECK-S-NEXT: mflr r0 +; CHECK-S-NEXT: std r0, 16(r1) +; CHECK-S-NEXT: stdu r1, -32(r1) +; CHECK-S-NEXT: .cfi_def_cfa_offset 32 +; CHECK-S-NEXT: .cfi_offset lr, 16 +; CHECK-S-NEXT: add r3, r4, r3 +; CHECK-S-NEXT: extsw r3, r3 +; CHECK-S-NEXT: bl externCall +; CHECK-S-NEXT: nop +; CHECK-S-NEXT: addis r4, r2, .LC0@toc@ha +; CHECK-S-NEXT: ld r4, .LC0@toc@l(r4) +; CHECK-S-NEXT: lwz r4, 0(r4) +; CHECK-S-NEXT: mullw r3, r4, r3 +; CHECK-S-NEXT: extsw r3, r3 +; CHECK-S-NEXT: addi r1, r1, 32 +; CHECK-S-NEXT: ld r0, 16(r1) +; CHECK-S-NEXT: mtlr r0 +; CHECK-S-NEXT: blr +entry: + %add = add nsw i32 %b, %a + %call = tail call signext i32 @externCall(i32 signext %add) + %0 = load i32, i32* @externGlobalVar, align 4 + %mul = mul nsw i32 %0, %call + ret i32 %mul +} + +define dso_local signext i32 @DirectCallExternNoGlobal(i32 signext %a, i32 signext %b) local_unnamed_addr { +; CHECK-S-LABEL: DirectCallExternNoGlobal: +; CHECK-S: .localentry DirectCallExternNoGlobal, 1 +; CHECK-S-NEXT: # %bb.0: # %entry +; CHECK-S-NEXT: mflr r0 +; CHECK-S-NEXT: .cfi_def_cfa_offset 48 +; CHECK-S-NEXT: .cfi_offset lr, 16 +; CHECK-S-NEXT: .cfi_offset r30, -16 +; CHECK-S-NEXT: std r30, -16(r1) # 8-byte Folded Spill +; CHECK-S-NEXT: std r0, 16(r1) +; CHECK-S-NEXT: stdu r1, -48(r1) +; CHECK-S-NEXT: mr r30, r4 +; CHECK-S-NEXT: bl externCall@notoc +; CHECK-S-NEXT: add r3, r3, r30 +; CHECK-S-NEXT: extsw r3, r3 +; CHECK-S-NEXT: addi r1, r1, 48 +; CHECK-S-NEXT: ld r0, 16(r1) +; CHECK-S-NEXT: ld r30, -16(r1) # 8-byte Folded Reload +; CHECK-S-NEXT: mtlr r0 +; CHECK-S-NEXT: blr +entry: + %call = tail call signext i32 @externCall(i32 signext %a) + %add = add nsw i32 %call, %b + ret i32 %add +} + +define dso_local signext i32 @TailCallLocal1(i32 signext %a) local_unnamed_addr { +; CHECK-S-LABEL: TailCallLocal1: +; CHECK-S: addis r2, r12, .TOC.-.Lfunc_gep7@ha +; CHECK-S-NEXT: addi r2, r2, .TOC.-.Lfunc_gep7@l +; CHECK-S: .localentry TailCallLocal1, .Lfunc_lep7-.Lfunc_gep7 +; CHECK-S: # %bb.0: # %entry +; CHECK-S-NEXT: mflr r0 +; CHECK-S-NEXT: std r0, 16(r1) +; CHECK-S-NEXT: stdu r1, -32(r1) +; CHECK-S-NEXT: .cfi_def_cfa_offset 32 +; CHECK-S-NEXT: .cfi_offset lr, 16 +; CHECK-S-NEXT: addis r4, r2, globalVar@toc@ha +; CHECK-S-NEXT: lwz r4, globalVar@toc@l(r4) +; CHECK-S-NEXT: add r3, r4, r3 +; CHECK-S-NEXT: extsw r3, r3 +; CHECK-S-NEXT: bl localCall +; CHECK-S-NEXT: nop +; CHECK-S-NEXT: addi r1, r1, 32 +; CHECK-S-NEXT: ld r0, 16(r1) +; CHECK-S-NEXT: mtlr r0 +; CHECK-S-NEXT: blr +entry: + %0 = load i32, i32* @globalVar, align 4 + %add = add nsw i32 %0, %a + %call = tail call signext i32 @localCall(i32 signext %add) + ret i32 %call +} + +define dso_local signext i32 @TailCallLocal2(i32 signext %a) local_unnamed_addr { +; CHECK-S-LABEL: TailCallLocal2: +; CHECK-S: addis r2, r12, .TOC.-.Lfunc_gep8@ha +; CHECK-S-NEXT: addi r2, r2, .TOC.-.Lfunc_gep8@l +; CHECK-S: .localentry TailCallLocal2, .Lfunc_lep8-.Lfunc_gep8 +; CHECK-S: # %bb.0: # %entry +; CHECK-S-NEXT: mflr r0 +; CHECK-S-NEXT: std r0, 16(r1) +; CHECK-S-NEXT: stdu r1, -32(r1) +; CHECK-S-NEXT: .cfi_def_cfa_offset 32 +; CHECK-S-NEXT: .cfi_offset lr, 16 +; CHECK-S-NEXT: addis r4, r2, .LC0@toc@ha +; CHECK-S-NEXT: ld r4, .LC0@toc@l(r4) +; CHECK-S-NEXT: lwz r4, 0(r4) +; CHECK-S-NEXT: add r3, r4, r3 +; CHECK-S-NEXT: extsw r3, r3 +; CHECK-S-NEXT: bl localCall +; CHECK-S-NEXT: nop +; CHECK-S-NEXT: addi r1, r1, 32 +; CHECK-S-NEXT: ld r0, 16(r1) +; CHECK-S-NEXT: mtlr r0 +; CHECK-S-NEXT: blr +entry: + %0 = load i32, i32* @externGlobalVar, align 4 + %add = add nsw i32 %0, %a + %call = tail call signext i32 @localCall(i32 signext %add) + ret i32 %call +} + +define dso_local signext i32 @TailCallLocalNoGlobal(i32 signext %a) local_unnamed_addr { +; CHECK-S-LABEL: TailCallLocalNoGlobal: +; CHECK-S: .localentry TailCallLocalNoGlobal, 1 +; CHECK-S-NEXT: # %bb.0: # %entry +; CHECK-S-NEXT: mflr r0 +; CHECK-S-NEXT: std r0, 16(r1) +; CHECK-S-NEXT: stdu r1, -32(r1) +; CHECK-S-NEXT: .cfi_def_cfa_offset 32 +; CHECK-S-NEXT: .cfi_offset lr, 16 +; CHECK-S-NEXT: bl localCall@notoc +; CHECK-S-NEXT: addi r1, r1, 32 +; CHECK-S-NEXT: ld r0, 16(r1) +; CHECK-S-NEXT: mtlr r0 +; CHECK-S-NEXT: blr +entry: + %call = tail call signext i32 @localCall(i32 signext %a) + ret i32 %call +} + +define dso_local signext i32 @TailCallExtern1(i32 signext %a) local_unnamed_addr { +; CHECK-S-LABEL: TailCallExtern1: +; CHECK-S: addis r2, r12, .TOC.-.Lfunc_gep10@ha +; CHECK-S-NEXT: addi r2, r2, .TOC.-.Lfunc_gep10@l +; CHECK-S: .localentry TailCallExtern1, .Lfunc_lep10-.Lfunc_gep10 +; CHECK-S: # %bb.0: # %entry +; CHECK-S-NEXT: mflr r0 +; CHECK-S-NEXT: std r0, 16(r1) +; CHECK-S-NEXT: stdu r1, -32(r1) +; CHECK-S-NEXT: .cfi_def_cfa_offset 32 +; CHECK-S-NEXT: .cfi_offset lr, 16 +; CHECK-S-NEXT: addis r4, r2, globalVar@toc@ha +; CHECK-S-NEXT: lwz r4, globalVar@toc@l(r4) +; CHECK-S-NEXT: add r3, r4, r3 +; CHECK-S-NEXT: extsw r3, r3 +; CHECK-S-NEXT: bl externCall +; CHECK-S-NEXT: nop +; CHECK-S-NEXT: addi r1, r1, 32 +; CHECK-S-NEXT: ld r0, 16(r1) +; CHECK-S-NEXT: mtlr r0 +; CHECK-S-NEXT: blr +entry: + %0 = load i32, i32* @globalVar, align 4 + %add = add nsw i32 %0, %a + %call = tail call signext i32 @externCall(i32 signext %add) + ret i32 %call +} + +define dso_local signext i32 @TailCallExtern2(i32 signext %a) local_unnamed_addr { +; CHECK-S-LABEL: TailCallExtern2: +; CHECK-S: addis r2, r12, .TOC.-.Lfunc_gep11@ha +; CHECK-S-NEXT: addi r2, r2, .TOC.-.Lfunc_gep11@l +; CHECK-S: .localentry TailCallExtern2, .Lfunc_lep11-.Lfunc_gep11 +; CHECK-S: # %bb.0: # %entry +; CHECK-S-NEXT: mflr r0 +; CHECK-S-NEXT: std r0, 16(r1) +; CHECK-S-NEXT: stdu r1, -32(r1) +; CHECK-S-NEXT: .cfi_def_cfa_offset 32 +; CHECK-S-NEXT: .cfi_offset lr, 16 +; CHECK-S-NEXT: addis r4, r2, .LC0@toc@ha +; CHECK-S-NEXT: ld r4, .LC0@toc@l(r4) +; CHECK-S-NEXT: lwz r4, 0(r4) +; CHECK-S-NEXT: add r3, r4, r3 +; CHECK-S-NEXT: extsw r3, r3 +; CHECK-S-NEXT: bl externCall +; CHECK-S-NEXT: nop +; CHECK-S-NEXT: addi r1, r1, 32 +; CHECK-S-NEXT: ld r0, 16(r1) +; CHECK-S-NEXT: mtlr r0 +; CHECK-S-NEXT: blr +entry: + %0 = load i32, i32* @externGlobalVar, align 4 + %add = add nsw i32 %0, %a + %call = tail call signext i32 @externCall(i32 signext %add) + ret i32 %call +} + +define dso_local signext i32 @TailCallExternNoGlobal(i32 signext %a) local_unnamed_addr { +; CHECK-S-LABEL: TailCallExternNoGlobal: +; CHECK-S: .localentry TailCallExternNoGlobal, 1 +; CHECK-S-NEXT: # %bb.0: # %entry +; CHECK-S-NEXT: mflr r0 +; CHECK-S-NEXT: std r0, 16(r1) +; CHECK-S-NEXT: stdu r1, -32(r1) +; CHECK-S-NEXT: .cfi_def_cfa_offset 32 +; CHECK-S-NEXT: .cfi_offset lr, 16 +; CHECK-S-NEXT: bl externCall@notoc +; CHECK-S-NEXT: addi r1, r1, 32 +; CHECK-S-NEXT: ld r0, 16(r1) +; CHECK-S-NEXT: mtlr r0 +; CHECK-S-NEXT: blr +entry: + %call = tail call signext i32 @externCall(i32 signext %a) + ret i32 %call +} + +define dso_local signext i32 @IndirectCall1(i32 signext %a, i32 signext %b) local_unnamed_addr { +; CHECK-S-LABEL: IndirectCall1: +; CHECK-S: addis r2, r12, .TOC.-.Lfunc_gep13@ha +; CHECK-S-NEXT: addi r2, r2, .TOC.-.Lfunc_gep13@l +; CHECK-S: .localentry IndirectCall1, .Lfunc_lep13-.Lfunc_gep13 +; CHECK-S: # %bb.0: # %entry +; CHECK-S-NEXT: mflr r0 +; CHECK-S-NEXT: std r0, 16(r1) +; CHECK-S-NEXT: stdu r1, -32(r1) +; CHECK-S-NEXT: std r2, 24(r1) +; CHECK-S-NEXT: .cfi_def_cfa_offset 32 +; CHECK-S-NEXT: .cfi_offset lr, 16 +; CHECK-S-NEXT: addis r5, r2, indirectCall@toc@ha +; CHECK-S-NEXT: ld r12, indirectCall@toc@l(r5) +; CHECK-S-NEXT: add r3, r4, r3 +; CHECK-S-NEXT: extsw r3, r3 +; CHECK-S-NEXT: mtctr r12 +; CHECK-S-NEXT: bctrl +; CHECK-S-NEXT: ld 2, 24(r1) +; CHECK-S-NEXT: addis r4, r2, globalVar@toc@ha +; CHECK-S-NEXT: lwz r4, globalVar@toc@l(r4) +; CHECK-S-NEXT: mullw r3, r4, r3 +; CHECK-S-NEXT: extsw r3, r3 +; CHECK-S-NEXT: addi r1, r1, 32 +; CHECK-S-NEXT: ld r0, 16(r1) +; CHECK-S-NEXT: mtlr r0 +; CHECK-S-NEXT: blr +entry: + %add = add nsw i32 %b, %a + %0 = load i32 (i32)*, i32 (i32)** @indirectCall, align 8 + %call = tail call signext i32 %0(i32 signext %add) + %1 = load i32, i32* @globalVar, align 4 + %mul = mul nsw i32 %1, %call + ret i32 %mul +} + +define dso_local signext i32 @IndirectCall2(i32 signext %a, i32 signext %b) local_unnamed_addr { +; CHECK-S-LABEL: IndirectCall2: +; CHECK-S: addis r2, r12, .TOC.-.Lfunc_gep14@ha +; CHECK-S-NEXT: addi r2, r2, .TOC.-.Lfunc_gep14@l +; CHECK-S: .localentry IndirectCall2, .Lfunc_lep14-.Lfunc_gep14 +; CHECK-S: # %bb.0: # %entry +; CHECK-S-NEXT: mflr r0 +; CHECK-S-NEXT: std r0, 16(r1) +; CHECK-S-NEXT: stdu r1, -32(r1) +; CHECK-S-NEXT: std r2, 24(r1) +; CHECK-S-NEXT: .cfi_def_cfa_offset 32 +; CHECK-S-NEXT: .cfi_offset lr, 16 +; CHECK-S-NEXT: addis r5, r2, indirectCall@toc@ha +; CHECK-S-NEXT: ld r12, indirectCall@toc@l(r5) +; CHECK-S-NEXT: add r3, r4, r3 +; CHECK-S-NEXT: extsw r3, r3 +; CHECK-S-NEXT: mtctr r12 +; CHECK-S-NEXT: bctrl +; CHECK-S-NEXT: ld 2, 24(r1) +; CHECK-S-NEXT: addis r4, r2, .LC0@toc@ha +; CHECK-S-NEXT: ld r4, .LC0@toc@l(r4) +; CHECK-S-NEXT: lwz r4, 0(r4) +; CHECK-S-NEXT: mullw r3, r4, r3 +; CHECK-S-NEXT: extsw r3, r3 +; CHECK-S-NEXT: addi r1, r1, 32 +; CHECK-S-NEXT: ld r0, 16(r1) +; CHECK-S-NEXT: mtlr r0 +; CHECK-S-NEXT: blr +entry: + %add = add nsw i32 %b, %a + %0 = load i32 (i32)*, i32 (i32)** @indirectCall, align 8 + %call = tail call signext i32 %0(i32 signext %add) + %1 = load i32, i32* @externGlobalVar, align 4 + %mul = mul nsw i32 %1, %call + ret i32 %mul +} + +define dso_local signext i32 @IndirectCall3(i32 signext %a, i32 signext %b, i32 (i32)* nocapture %call_param) local_unnamed_addr { +; CHECK-S-LABEL: IndirectCall3: +; CHECK-S: addis r2, r12, .TOC.-.Lfunc_gep15@ha +; CHECK-S-NEXT: addi r2, r2, .TOC.-.Lfunc_gep15@l +; CHECK-S: .localentry IndirectCall3, .Lfunc_lep15-.Lfunc_gep15 +; CHECK-S: # %bb.0: # %entry +; CHECK-S-NEXT: mflr r0 +; CHECK-S-NEXT: std r0, 16(r1) +; CHECK-S-NEXT: stdu r1, -32(r1) +; CHECK-S-NEXT: std r2, 24(r1) +; CHECK-S-NEXT: .cfi_def_cfa_offset 32 +; CHECK-S-NEXT: .cfi_offset lr, 16 +; CHECK-S-NEXT: add r3, r4, r3 +; CHECK-S-NEXT: extsw r3, r3 +; CHECK-S-NEXT: mtctr r5 +; CHECK-S-NEXT: mr r12, r5 +; CHECK-S-NEXT: bctrl +; CHECK-S-NEXT: ld 2, 24(r1) +; CHECK-S-NEXT: addis r4, r2, globalVar@toc@ha +; CHECK-S-NEXT: lwz r4, globalVar@toc@l(r4) +; CHECK-S-NEXT: mullw r3, r4, r3 +; CHECK-S-NEXT: extsw r3, r3 +; CHECK-S-NEXT: addi r1, r1, 32 +; CHECK-S-NEXT: ld r0, 16(r1) +; CHECK-S-NEXT: mtlr r0 +; CHECK-S-NEXT: blr +entry: + %add = add nsw i32 %b, %a + %call = tail call signext i32 %call_param(i32 signext %add) + %0 = load i32, i32* @globalVar, align 4 + %mul = mul nsw i32 %0, %call + ret i32 %mul +} + +define dso_local signext i32 @IndirectCallNoGlobal(i32 signext %a, i32 signext %b, i32 (i32)* nocapture %call_param) local_unnamed_addr { +; CHECK-S-LABEL: IndirectCallNoGlobal: +; CHECK-S: addis r2, r12, .TOC.-.Lfunc_gep16@ha +; CHECK-S-NEXT: addi r2, r2, .TOC.-.Lfunc_gep16@l +; CHECK-S: .localentry IndirectCallNoGlobal, .Lfunc_lep16-.Lfunc_gep16 +; CHECK-S: # %bb.0: # %entry +; CHECK-S-NEXT: mflr r0 +; CHECK-S-NEXT: .cfi_def_cfa_offset 48 +; CHECK-S-NEXT: .cfi_offset lr, 16 +; CHECK-S-NEXT: .cfi_offset r30, -16 +; CHECK-S-NEXT: std r30, -16(r1) # 8-byte Folded Spill +; CHECK-S-NEXT: std r0, 16(r1) +; CHECK-S-NEXT: stdu r1, -48(r1) +; CHECK-S-NEXT: mtctr r5 +; CHECK-S-NEXT: mr r12, r5 +; CHECK-S-NEXT: std r2, 24(r1) +; CHECK-S-NEXT: mr r30, r4 +; CHECK-S-NEXT: bctrl +; CHECK-S-NEXT: ld 2, 24(r1) +; CHECK-S-NEXT: add r3, r3, r30 +; CHECK-S-NEXT: extsw r3, r3 +; CHECK-S-NEXT: addi r1, r1, 48 +; CHECK-S-NEXT: ld r0, 16(r1) +; CHECK-S-NEXT: ld r30, -16(r1) # 8-byte Folded Reload +; CHECK-S-NEXT: mtlr r0 +; CHECK-S-NEXT: blr +entry: + %call = tail call signext i32 %call_param(i32 signext %a) + %add = add nsw i32 %call, %b + ret i32 %add +} + +define dso_local signext i32 @IndirectCallOnly(i32 signext %a, i32 (i32)* nocapture %call_param) local_unnamed_addr { +; CHECK-S-LABEL: IndirectCallOnly: +; CHECK-S: addis r2, r12, .TOC.-.Lfunc_gep17@ha +; CHECK-S-NEXT: addi r2, r2, .TOC.-.Lfunc_gep17@l +; CHECK-S: .localentry IndirectCallOnly, .Lfunc_lep17-.Lfunc_gep17 +; CHECK-S: # %bb.0: # %entry +; CHECK-S-NEXT: mflr r0 +; CHECK-S-NEXT: std r0, 16(r1) +; CHECK-S-NEXT: stdu r1, -32(r1) +; CHECK-S-NEXT: std r2, 24(r1) +; CHECK-S-NEXT: .cfi_def_cfa_offset 32 +; CHECK-S-NEXT: .cfi_offset lr, 16 +; CHECK-S-NEXT: mtctr r4 +; CHECK-S-NEXT: mr r12, r4 +; CHECK-S-NEXT: bctrl +; CHECK-S-NEXT: ld 2, 24(r1) +; CHECK-S-NEXT: addi r1, r1, 32 +; CHECK-S-NEXT: ld r0, 16(r1) +; CHECK-S-NEXT: mtlr r0 +; CHECK-S-NEXT: blr +entry: + %call = tail call signext i32 %call_param(i32 signext %a) + ret i32 %call +} + +attributes #0 = { noinline } + diff --git a/llvm/test/MC/PowerPC/ppc64-localentry-error1.s b/llvm/test/MC/PowerPC/ppc64-localentry-error1.s index c028da8..3c54606 100644 --- a/llvm/test/MC/PowerPC/ppc64-localentry-error1.s +++ b/llvm/test/MC/PowerPC/ppc64-localentry-error1.s @@ -1,11 +1,11 @@ -# RUN: not --crash llvm-mc -triple powerpc64-unknown-unknown -filetype=obj < %s 2> %t +# RUN: not llvm-mc -triple powerpc64-unknown-unknown -filetype=obj < %s 2> %t # RUN: FileCheck < %t %s -# RUN: not --crash llvm-mc -triple powerpc64le-unknown-unknown -filetype=obj < %s 2> %t +# RUN: not llvm-mc -triple powerpc64le-unknown-unknown -filetype=obj < %s 2> %t # RUN: FileCheck < %t %s sym: .localentry sym, 123 -# CHECK: LLVM ERROR: .localentry expression cannot be encoded. +# CHECK: error: .localentry expression is not a valid power of 2. diff --git a/llvm/test/MC/PowerPC/ppc64-localentry-error2.s b/llvm/test/MC/PowerPC/ppc64-localentry-error2.s index 89a30ee..8603dfb 100644 --- a/llvm/test/MC/PowerPC/ppc64-localentry-error2.s +++ b/llvm/test/MC/PowerPC/ppc64-localentry-error2.s @@ -1,12 +1,12 @@ -# RUN: not --crash llvm-mc -triple powerpc64-unknown-unknown -filetype=obj < %s 2> %t +# RUN: not llvm-mc -triple powerpc64-unknown-unknown -filetype=obj < %s 2> %t # RUN: FileCheck < %t %s -# RUN: not --crash llvm-mc -triple powerpc64le-unknown-unknown -filetype=obj < %s 2> %t +# RUN: not llvm-mc -triple powerpc64le-unknown-unknown -filetype=obj < %s 2> %t # RUN: FileCheck < %t %s .globl remote_sym sym: .localentry sym, remote_sym -# CHECK: LLVM ERROR: .localentry expression must be absolute. +# CHECK: error: .localentry expression must be absolute. -- 2.7.4