From fca55129b16251c89f2cebaf4ea322da58609fc8 Mon Sep 17 00:00:00 2001 From: Jessica Paquette Date: Tue, 24 Jul 2018 17:42:11 +0000 Subject: [PATCH] [MachineOutliner][NFC] Make Candidates own their call information Before this, TCI contained all the call information for each Candidate. This moves that information onto the Candidates. As a result, each Candidate can now supply how it ought to be called. Thus, Candidates will be able to, say, call the same function in cheaper ways when possible. This also removes that information from TCI, since it's no longer used there. A follow-up patch for the AArch64 outliner will demonstrate this. llvm-svn: 337840 --- llvm/include/llvm/CodeGen/MachineOutliner.h | 44 ++++++++++++++---------- llvm/include/llvm/CodeGen/TargetInstrInfo.h | 2 +- llvm/lib/CodeGen/MachineOutliner.cpp | 2 +- llvm/lib/Target/AArch64/AArch64InstrInfo.cpp | 41 +++++++++++++--------- llvm/lib/Target/AArch64/AArch64InstrInfo.h | 2 +- llvm/lib/Target/X86/X86InstrInfo.cpp | 51 +++++++++++++++------------- llvm/lib/Target/X86/X86InstrInfo.h | 2 +- 7 files changed, 83 insertions(+), 61 deletions(-) diff --git a/llvm/include/llvm/CodeGen/MachineOutliner.h b/llvm/include/llvm/CodeGen/MachineOutliner.h index 7391358..d12fedc 100644 --- a/llvm/include/llvm/CodeGen/MachineOutliner.h +++ b/llvm/include/llvm/CodeGen/MachineOutliner.h @@ -39,27 +39,18 @@ struct TargetCostInfo { /// widely in size, so just counting the instructions isn't very useful.) unsigned SequenceSize; - /// Number of instructions to call an outlined function for this candidate. - unsigned CallOverhead; - /// Number of instructions to construct an outlined function frame /// for this candidate. unsigned FrameOverhead; /// Represents the specific instructions that must be emitted to - /// construct a call to this candidate. - unsigned CallConstructionID; - - /// Represents the specific instructions that must be emitted to /// construct a frame for this candidate's outlined function. unsigned FrameConstructionID; TargetCostInfo() {} - TargetCostInfo(unsigned SequenceSize, unsigned CallOverhead, - unsigned FrameOverhead, unsigned CallConstructionID, + TargetCostInfo(unsigned SequenceSize, unsigned FrameOverhead, unsigned FrameConstructionID) - : SequenceSize(SequenceSize), CallOverhead(CallOverhead), - FrameOverhead(FrameOverhead), CallConstructionID(CallConstructionID), + : SequenceSize(SequenceSize), FrameOverhead(FrameOverhead), FrameConstructionID(FrameConstructionID) {} }; @@ -82,6 +73,10 @@ private: // The basic block that contains this Candidate. MachineBasicBlock *MBB; + /// Cost of calling an outlined function from this point as defined by the + /// target. + unsigned CallOverhead; + public: /// The index of this \p Candidate's \p OutlinedFunction in the list of /// \p OutlinedFunctions. @@ -90,8 +85,9 @@ public: /// Set to false if the candidate overlapped with another candidate. bool InCandidateList = true; - /// Contains all target-specific information for this \p Candidate. - TargetCostInfo TCI; + /// Identifier denoting the instructions to emit to call an outlined function + /// from this point. Defined by the target. + unsigned CallConstructionID; /// Contains physical register liveness information for the MBB containing /// this \p Candidate. @@ -109,6 +105,18 @@ public: /// Return the end index of this candidate. unsigned getEndIdx() const { return StartIdx + Len - 1; } + /// Set the CallConstructionID and CallOverhead of this candidate to CID and + /// CO respectively. + void setCallInfo(unsigned CID, unsigned CO) { + CallConstructionID = CID; + CallOverhead = CO; + } + + /// Returns the call overhead of this candidate if it is in the list. + unsigned getCallOverhead() const { + return InCandidateList ? CallOverhead : 0; + } + MachineBasicBlock::iterator &front() { return FirstInst; } MachineBasicBlock::iterator &back() { return LastInst; } MachineFunction *getMF() const { return MBB->getParent(); } @@ -193,8 +201,10 @@ public: /// Return the number of bytes it would take to outline this /// function. unsigned getOutliningCost() { - return (OccurrenceCount * TCI.CallOverhead) + TCI.SequenceSize + - TCI.FrameOverhead; + unsigned CallOverhead = 0; + for (std::shared_ptr &C : Candidates) + CallOverhead += C->getCallOverhead(); + return CallOverhead + TCI.SequenceSize + TCI.FrameOverhead; } /// Return the size in bytes of the unoutlined sequences. @@ -217,10 +227,8 @@ public: Candidates.push_back(std::make_shared(C)); unsigned B = getBenefit(); - for (std::shared_ptr &C : Candidates) { + for (std::shared_ptr &C : Candidates) C->Benefit = B; - C->TCI = TCI; - } } }; } // namespace outliner diff --git a/llvm/include/llvm/CodeGen/TargetInstrInfo.h b/llvm/include/llvm/CodeGen/TargetInstrInfo.h index efed728..3b270ab 100644 --- a/llvm/include/llvm/CodeGen/TargetInstrInfo.h +++ b/llvm/include/llvm/CodeGen/TargetInstrInfo.h @@ -1637,7 +1637,7 @@ public: virtual MachineBasicBlock::iterator insertOutlinedCall(Module &M, MachineBasicBlock &MBB, MachineBasicBlock::iterator &It, MachineFunction &MF, - const outliner::TargetCostInfo &TCI) const { + const outliner::Candidate &C) const { llvm_unreachable( "Target didn't implement TargetInstrInfo::insertOutlinedCall!"); } diff --git a/llvm/lib/CodeGen/MachineOutliner.cpp b/llvm/lib/CodeGen/MachineOutliner.cpp index 9c68591..99cc5f7 100644 --- a/llvm/lib/CodeGen/MachineOutliner.cpp +++ b/llvm/lib/CodeGen/MachineOutliner.cpp @@ -1279,7 +1279,7 @@ bool MachineOutliner::outline( const TargetInstrInfo &TII = *STI.getInstrInfo(); // Insert a call to the new function and erase the old sequence. - auto CallInst = TII.insertOutlinedCall(M, MBB, StartIt, *OF.MF, C.TCI); + auto CallInst = TII.insertOutlinedCall(M, MBB, StartIt, *OF.MF, C); // If the caller tracks liveness, then we need to make sure that anything // we outline doesn't break liveness assumptions. diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp index 9c7782d..9d16348 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp @@ -4936,10 +4936,6 @@ AArch64InstrInfo::getOutliningCandidateInfo( 0, [this](unsigned Sum, const MachineInstr &MI) { return Sum + getInstSizeInBytes(MI); }); - unsigned CallID = MachineOutlinerDefault; - unsigned FrameID = MachineOutlinerDefault; - unsigned NumBytesForCall = 12; - unsigned NumBytesToCreateFrame = 4; // Compute liveness information for each candidate. const TargetRegisterInfo &TRI = getRegisterInfo(); @@ -4976,21 +4972,29 @@ AArch64InstrInfo::getOutliningCandidateInfo( unsigned LastInstrOpcode = RepeatedSequenceLocs[0].back()->getOpcode(); + // Helper lambda which sets call information for every candidate. + auto SetCandidateCallInfo = + [&RepeatedSequenceLocs](unsigned CallID, unsigned NumBytesForCall) { + for (outliner::Candidate &C : RepeatedSequenceLocs) + C.setCallInfo(CallID, NumBytesForCall); + }; + + unsigned FrameID = MachineOutlinerDefault; + unsigned NumBytesToCreateFrame = 4; + // If the last instruction in any candidate is a terminator, then we should // tail call all of the candidates. if (RepeatedSequenceLocs[0].back()->isTerminator()) { - CallID = MachineOutlinerTailCall; FrameID = MachineOutlinerTailCall; - NumBytesForCall = 4; NumBytesToCreateFrame = 0; + SetCandidateCallInfo(MachineOutlinerTailCall, 4); } else if (LastInstrOpcode == AArch64::BL || LastInstrOpcode == AArch64::BLR) { // FIXME: Do we need to check if the code after this uses the value of LR? - CallID = MachineOutlinerThunk; FrameID = MachineOutlinerThunk; - NumBytesForCall = 4; NumBytesToCreateFrame = 0; + SetCandidateCallInfo(MachineOutlinerThunk, 4); } // Make sure that LR isn't live on entry to this candidate. The only @@ -5002,10 +5006,16 @@ AArch64InstrInfo::getOutliningCandidateInfo( [](outliner::Candidate &C) { return C.LRU.available(AArch64::LR); })) { - CallID = MachineOutlinerNoLRSave; FrameID = MachineOutlinerNoLRSave; - NumBytesForCall = 4; NumBytesToCreateFrame = 4; + SetCandidateCallInfo(MachineOutlinerNoLRSave, 4); + } + + // LR is live, so we need to save it to the stack. + else { + FrameID = MachineOutlinerDefault; + NumBytesToCreateFrame = 4; + SetCandidateCallInfo(MachineOutlinerDefault, 12); } // Check if the range contains a call. These require a save + restore of the @@ -5024,8 +5034,7 @@ AArch64InstrInfo::getOutliningCandidateInfo( RepeatedSequenceLocs[0].back()->isCall()) NumBytesToCreateFrame += 8; - return outliner::TargetCostInfo(SequenceSize, NumBytesForCall, - NumBytesToCreateFrame, CallID, FrameID); + return outliner::TargetCostInfo(SequenceSize, NumBytesToCreateFrame, FrameID); } bool AArch64InstrInfo::isFunctionSafeToOutlineFrom( @@ -5420,10 +5429,10 @@ void AArch64InstrInfo::buildOutlinedFrame( MachineBasicBlock::iterator AArch64InstrInfo::insertOutlinedCall( Module &M, MachineBasicBlock &MBB, MachineBasicBlock::iterator &It, - MachineFunction &MF, const outliner::TargetCostInfo &TCI) const { + MachineFunction &MF, const outliner::Candidate &C) const { // Are we tail calling? - if (TCI.CallConstructionID == MachineOutlinerTailCall) { + if (C.CallConstructionID == MachineOutlinerTailCall) { // If yes, then we can just branch to the label. It = MBB.insert(It, BuildMI(MF, DebugLoc(), get(AArch64::TCRETURNdi)) .addGlobalAddress(M.getNamedValue(MF.getName())) @@ -5432,8 +5441,8 @@ MachineBasicBlock::iterator AArch64InstrInfo::insertOutlinedCall( } // Are we saving the link register? - if (TCI.CallConstructionID == MachineOutlinerNoLRSave || - TCI.CallConstructionID == MachineOutlinerThunk) { + if (C.CallConstructionID == MachineOutlinerNoLRSave || + C.CallConstructionID == MachineOutlinerThunk) { // No, so just insert the call. It = MBB.insert(It, BuildMI(MF, DebugLoc(), get(AArch64::BL)) .addGlobalAddress(M.getNamedValue(MF.getName()))); diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.h b/llvm/lib/Target/AArch64/AArch64InstrInfo.h index 980f96b..7249657 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.h +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.h @@ -248,7 +248,7 @@ public: MachineBasicBlock::iterator insertOutlinedCall(Module &M, MachineBasicBlock &MBB, MachineBasicBlock::iterator &It, MachineFunction &MF, - const outliner::TargetCostInfo &TCI) const override; + const outliner::Candidate &C) const override; /// Returns true if the instruction sets to an immediate value that can be /// executed more efficiently. bool isExynosResetFast(const MachineInstr &MI) const; diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp index c5d10fc..e830123 100644 --- a/llvm/lib/Target/X86/X86InstrInfo.cpp +++ b/llvm/lib/Target/X86/X86InstrInfo.cpp @@ -7557,30 +7557,35 @@ enum MachineOutlinerClass { MachineOutlinerTailCall }; -outliner::TargetCostInfo -X86InstrInfo::getOutliningCandidateInfo( - std::vector &RepeatedSequenceLocs) const { - unsigned SequenceSize = std::accumulate( - RepeatedSequenceLocs[0].front(), std::next(RepeatedSequenceLocs[0].back()), - 0, [](unsigned Sum, const MachineInstr &MI) { - // FIXME: x86 doesn't implement getInstSizeInBytes, so we can't - // tell the cost. Just assume each instruction is one byte. - if (MI.isDebugInstr() || MI.isKill()) - return Sum; - return Sum + 1; - }); +outliner::TargetCostInfo X86InstrInfo::getOutliningCandidateInfo( + std::vector &RepeatedSequenceLocs) const { + unsigned SequenceSize = + std::accumulate(RepeatedSequenceLocs[0].front(), + std::next(RepeatedSequenceLocs[0].back()), 0, + [](unsigned Sum, const MachineInstr &MI) { + // FIXME: x86 doesn't implement getInstSizeInBytes, so + // we can't tell the cost. Just assume each instruction + // is one byte. + if (MI.isDebugInstr() || MI.isKill()) + return Sum; + return Sum + 1; + }); // FIXME: Use real size in bytes for call and ret instructions. - if (RepeatedSequenceLocs[0].back()->isTerminator()) + if (RepeatedSequenceLocs[0].back()->isTerminator()) { + for (outliner::Candidate &C : RepeatedSequenceLocs) + C.setCallInfo(MachineOutlinerTailCall, 1); + return outliner::TargetCostInfo(SequenceSize, - 1, // Number of bytes to emit call. - 0, // Number of bytes to emit frame. - MachineOutlinerTailCall, // Type of call. - MachineOutlinerTailCall // Type of frame. - ); - - return outliner::TargetCostInfo(SequenceSize, 1, 1, MachineOutlinerDefault, - MachineOutlinerDefault); + 0, // Number of bytes to emit frame. + MachineOutlinerTailCall // Type of frame. + ); + } + + for (outliner::Candidate &C : RepeatedSequenceLocs) + C.setCallInfo(MachineOutlinerDefault, 1); + + return outliner::TargetCostInfo(SequenceSize, 1, MachineOutlinerDefault); } bool X86InstrInfo::isFunctionSafeToOutlineFrom(MachineFunction &MF, @@ -7683,9 +7688,9 @@ MachineBasicBlock::iterator X86InstrInfo::insertOutlinedCall(Module &M, MachineBasicBlock &MBB, MachineBasicBlock::iterator &It, MachineFunction &MF, - const outliner::TargetCostInfo &TCI) const { + const outliner::Candidate &C) const { // Is it a tail call? - if (TCI.CallConstructionID == MachineOutlinerTailCall) { + if (C.CallConstructionID == MachineOutlinerTailCall) { // Yes, just insert a JMP. It = MBB.insert(It, BuildMI(MF, DebugLoc(), get(X86::JMP_1)) diff --git a/llvm/lib/Target/X86/X86InstrInfo.h b/llvm/lib/Target/X86/X86InstrInfo.h index b97acf6..b1e510a 100644 --- a/llvm/lib/Target/X86/X86InstrInfo.h +++ b/llvm/lib/Target/X86/X86InstrInfo.h @@ -559,7 +559,7 @@ public: MachineBasicBlock::iterator insertOutlinedCall(Module &M, MachineBasicBlock &MBB, MachineBasicBlock::iterator &It, MachineFunction &MF, - const outliner::TargetCostInfo &TCI) const override; + const outliner::Candidate &C) const override; protected: /// Commutes the operands in the given instruction by changing the operands -- 2.7.4