From 15e27b0b6d9d51362fad85dbe95ac5b3fadf0a06 Mon Sep 17 00:00:00 2001 From: James Molloy Date: Fri, 20 Sep 2019 08:57:46 +0000 Subject: [PATCH] [MachinePipeliner] Improve the TargetInstrInfo API analyzeLoop/reduceLoopCount The way MachinePipeliner uses these target hooks is stateful - we reduce trip count by one per call to reduceLoopCount. It's a little overfit for hardware loops, where we don't have to worry about stitching a loop induction variable across prologs and epilogs (the induction variable is implicit). This patch introduces a new API: /// Analyze loop L, which must be a single-basic-block loop, and if the /// conditions can be understood enough produce a PipelinerLoopInfo object. virtual std::unique_ptr analyzeLoopForPipelining(MachineBasicBlock *LoopBB) const; The return value is expected to be an implementation of the abstract class: /// Object returned by analyzeLoopForPipelining. Allows software pipelining /// implementations to query attributes of the loop being pipelined. class PipelinerLoopInfo { public: virtual ~PipelinerLoopInfo(); /// Return true if the given instruction should not be pipelined and should /// be ignored. An example could be a loop comparison, or induction variable /// update with no users being pipelined. virtual bool shouldIgnoreForPipelining(const MachineInstr *MI) const = 0; /// Create a condition to determine if the trip count of the loop is greater /// than TC. /// /// If the trip count is statically known to be greater than TC, return /// true. If the trip count is statically known to be not greater than TC, /// return false. Otherwise return nullopt and fill out Cond with the test /// condition. virtual Optional createTripCountGreaterCondition(int TC, MachineBasicBlock &MBB, SmallVectorImpl &Cond) = 0; /// Modify the loop such that the trip count is /// OriginalTC + TripCountAdjust. virtual void adjustTripCount(int TripCountAdjust) = 0; /// Called when the loop's preheader has been modified to NewPreheader. virtual void setPreheader(MachineBasicBlock *NewPreheader) = 0; /// Called when the loop is being removed. virtual void disposed() = 0; }; The Pipeliner (ModuloSchedule.cpp) can use this object to modify the loop while allowing the target to hold its own state across all calls. This API, in particular the disjunction of creating a trip count check condition and adjusting the loop, improves the code quality in ModuloSchedule.cpp. llvm-svn: 372376 --- llvm/include/llvm/CodeGen/ModuloSchedule.h | 2 + llvm/include/llvm/CodeGen/TargetInstrInfo.h | 44 ++++++++ llvm/lib/CodeGen/MachinePipeliner.cpp | 2 +- llvm/lib/CodeGen/ModuloSchedule.cpp | 40 +++---- llvm/lib/CodeGen/TargetInstrInfo.cpp | 2 + llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp | 152 +++++++++++++-------------- llvm/lib/Target/Hexagon/HexagonInstrInfo.h | 19 +--- llvm/lib/Target/PowerPC/PPCInstrInfo.cpp | 139 ++++++++++++++---------- llvm/lib/Target/PowerPC/PPCInstrInfo.h | 28 ++--- llvm/test/CodeGen/Hexagon/swp-epilog-phi7.ll | 4 +- 10 files changed, 238 insertions(+), 194 deletions(-) diff --git a/llvm/include/llvm/CodeGen/ModuloSchedule.h b/llvm/include/llvm/CodeGen/ModuloSchedule.h index 530cefd..36cc843 100644 --- a/llvm/include/llvm/CodeGen/ModuloSchedule.h +++ b/llvm/include/llvm/CodeGen/ModuloSchedule.h @@ -62,6 +62,7 @@ #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" #include @@ -168,6 +169,7 @@ private: MachineBasicBlock *BB; MachineBasicBlock *Preheader; MachineBasicBlock *NewKernel = nullptr; + std::unique_ptr LoopInfo; /// Map for each register and the max difference between its uses and def. /// The first element in the pair is the max difference in stages. The diff --git a/llvm/include/llvm/CodeGen/TargetInstrInfo.h b/llvm/include/llvm/CodeGen/TargetInstrInfo.h index a01d851..f16ec7a 100644 --- a/llvm/include/llvm/CodeGen/TargetInstrInfo.h +++ b/llvm/include/llvm/CodeGen/TargetInstrInfo.h @@ -662,6 +662,50 @@ public: BytesAdded); } + /// Object returned by analyzeLoopForPipelining. Allows software pipelining + /// implementations to query attributes of the loop being pipelined and to + /// apply target-specific updates to the loop once pipelining is complete. + class PipelinerLoopInfo { + public: + virtual ~PipelinerLoopInfo(); + /// Return true if the given instruction should not be pipelined and should + /// be ignored. An example could be a loop comparison, or induction variable + /// update with no users being pipelined. + virtual bool shouldIgnoreForPipelining(const MachineInstr *MI) const = 0; + + /// Create a condition to determine if the trip count of the loop is greater + /// than TC. + /// + /// If the trip count is statically known to be greater than TC, return + /// true. If the trip count is statically known to be not greater than TC, + /// return false. Otherwise return nullopt and fill out Cond with the test + /// condition. + virtual Optional + createTripCountGreaterCondition(int TC, MachineBasicBlock &MBB, + SmallVectorImpl &Cond) = 0; + + /// Modify the loop such that the trip count is + /// OriginalTC + TripCountAdjust. + virtual void adjustTripCount(int TripCountAdjust) = 0; + + /// Called when the loop's preheader has been modified to NewPreheader. + virtual void setPreheader(MachineBasicBlock *NewPreheader) = 0; + + /// Called when the loop is being removed. Any instructions in the preheader + /// should be removed. + /// + /// Once this function is called, no other functions on this object are + /// valid; the loop has been removed. + virtual void disposed() = 0; + }; + + /// Analyze loop L, which must be a single-basic-block loop, and if the + /// conditions can be understood enough produce a PipelinerLoopInfo object. + virtual std::unique_ptr + analyzeLoopForPipelining(MachineBasicBlock *LoopBB) const { + return nullptr; + } + /// Analyze the loop code, return true if it cannot be understoo. Upon /// success, this function returns false and returns information about the /// induction variable and compare instruction used at the end. diff --git a/llvm/lib/CodeGen/MachinePipeliner.cpp b/llvm/lib/CodeGen/MachinePipeliner.cpp index fc16ebf..b3d97c6 100644 --- a/llvm/lib/CodeGen/MachinePipeliner.cpp +++ b/llvm/lib/CodeGen/MachinePipeliner.cpp @@ -326,7 +326,7 @@ bool MachinePipeliner::canPipelineLoop(MachineLoop &L) { LI.LoopInductionVar = nullptr; LI.LoopCompare = nullptr; - if (TII->analyzeLoop(L, LI.LoopInductionVar, LI.LoopCompare)) { + if (!TII->analyzeLoopForPipelining(L.getTopBlock())) { LLVM_DEBUG( dbgs() << "Unable to analyzeLoop, can NOT pipeline current Loop\n"); NumFailLoop++; diff --git a/llvm/lib/CodeGen/ModuloSchedule.cpp b/llvm/lib/CodeGen/ModuloSchedule.cpp index 8bea403..aa5e86e 100644 --- a/llvm/lib/CodeGen/ModuloSchedule.cpp +++ b/llvm/lib/CodeGen/ModuloSchedule.cpp @@ -105,6 +105,9 @@ void ModuloScheduleExpander::expand() { } void ModuloScheduleExpander::generatePipelinedLoop() { + LoopInfo = TII->analyzeLoopForPipelining(BB); + assert(LoopInfo && "Must be able to analyze loop!"); + // Create a new basic block for the kernel and add it to the CFG. MachineBasicBlock *KernelBB = MF.CreateMachineBasicBlock(BB->getBasicBlock()); @@ -847,10 +850,6 @@ void ModuloScheduleExpander::addBranches(MachineBasicBlock &PreheaderBB, MBBVectorTy &EpilogBBs, ValueMapTy *VRMap) { assert(PrologBBs.size() == EpilogBBs.size() && "Prolog/Epilog mismatch"); - MachineInstr *IndVar; - MachineInstr *Cmp; - if (TII->analyzeLoop(*Schedule.getLoop(), IndVar, Cmp)) - llvm_unreachable("Must be able to analyze loop!"); MachineBasicBlock *LastPro = KernelBB; MachineBasicBlock *LastEpi = KernelBB; @@ -858,32 +857,20 @@ void ModuloScheduleExpander::addBranches(MachineBasicBlock &PreheaderBB, // to the first prolog and the last epilog blocks. SmallVector PrevInsts; unsigned MaxIter = PrologBBs.size() - 1; - unsigned LC = UINT_MAX; - unsigned LCMin = UINT_MAX; for (unsigned i = 0, j = MaxIter; i <= MaxIter; ++i, --j) { // Add branches to the prolog that go to the corresponding // epilog, and the fall-thru prolog/kernel block. MachineBasicBlock *Prolog = PrologBBs[j]; MachineBasicBlock *Epilog = EpilogBBs[i]; - // We've executed one iteration, so decrement the loop count and check for - // the loop end. - SmallVector Cond; - // Check if the LOOP0 has already been removed. If so, then there is no need - // to reduce the trip count. - if (LC != 0) - LC = TII->reduceLoopCount(*Prolog, PreheaderBB, IndVar, *Cmp, Cond, - PrevInsts, j, MaxIter); - - // Record the value of the first trip count, which is used to determine if - // branches and blocks can be removed for constant trip counts. - if (LCMin == UINT_MAX) - LCMin = LC; + SmallVector Cond; + Optional StaticallyGreater = + LoopInfo->createTripCountGreaterCondition(j + 1, *Prolog, Cond); unsigned numAdded = 0; - if (Register::isVirtualRegister(LC)) { + if (!StaticallyGreater.hasValue()) { Prolog->addSuccessor(Epilog); numAdded = TII->insertBranch(*Prolog, Epilog, LastPro, Cond, DebugLoc()); - } else if (j >= LCMin) { + } else if (*StaticallyGreater == false) { Prolog->addSuccessor(Epilog); Prolog->removeSuccessor(LastPro); LastEpi->removeSuccessor(Epilog); @@ -894,10 +881,12 @@ void ModuloScheduleExpander::addBranches(MachineBasicBlock &PreheaderBB, LastEpi->clear(); LastEpi->eraseFromParent(); } + if (LastPro == KernelBB) { + LoopInfo->disposed(); + NewKernel = nullptr; + } LastPro->clear(); LastPro->eraseFromParent(); - if (LastPro == KernelBB) - NewKernel = nullptr; } else { numAdded = TII->insertBranch(*Prolog, LastPro, nullptr, Cond, DebugLoc()); removePhis(Epilog, Prolog); @@ -909,6 +898,11 @@ void ModuloScheduleExpander::addBranches(MachineBasicBlock &PreheaderBB, I != E && numAdded > 0; ++I, --numAdded) updateInstruction(&*I, false, j, 0, VRMap); } + + if (NewKernel) { + LoopInfo->setPreheader(PrologBBs[MaxIter]); + LoopInfo->adjustTripCount(-(MaxIter + 1)); + } } /// Return true if we can compute the amount the instruction changes diff --git a/llvm/lib/CodeGen/TargetInstrInfo.cpp b/llvm/lib/CodeGen/TargetInstrInfo.cpp index 0406649..11872e1 100644 --- a/llvm/lib/CodeGen/TargetInstrInfo.cpp +++ b/llvm/lib/CodeGen/TargetInstrInfo.cpp @@ -1257,3 +1257,5 @@ bool TargetInstrInfo::getInsertSubregInputs( InsertedReg.SubIdx = (unsigned)MOSubIdx.getImm(); return true; } + +TargetInstrInfo::PipelinerLoopInfo::~PipelinerLoopInfo() {} diff --git a/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp b/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp index 97318a8..f9d464b 100644 --- a/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp +++ b/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp @@ -674,86 +674,84 @@ unsigned HexagonInstrInfo::insertBranch(MachineBasicBlock &MBB, return 2; } -/// Analyze the loop code to find the loop induction variable and compare used -/// to compute the number of iterations. Currently, we analyze loop that are -/// controlled using hardware loops. In this case, the induction variable -/// instruction is null. For all other cases, this function returns true, which -/// means we're unable to analyze it. -bool HexagonInstrInfo::analyzeLoop(MachineLoop &L, - MachineInstr *&IndVarInst, - MachineInstr *&CmpInst) const { - - MachineBasicBlock *LoopEnd = L.getBottomBlock(); - MachineBasicBlock::iterator I = LoopEnd->getFirstTerminator(); - // We really "analyze" only hardware loops right now. - if (I != LoopEnd->end() && isEndLoopN(I->getOpcode())) { - IndVarInst = nullptr; - CmpInst = &*I; - return false; +class HexagonPipelinerLoopInfo : public TargetInstrInfo::PipelinerLoopInfo { + MachineInstr *Loop, *EndLoop; + MachineFunction *MF; + const HexagonInstrInfo *TII; + +public: + HexagonPipelinerLoopInfo(MachineInstr *Loop, MachineInstr *EndLoop) + : Loop(Loop), EndLoop(EndLoop), MF(Loop->getParent()->getParent()), + TII(MF->getSubtarget().getInstrInfo()) {} + + bool shouldIgnoreForPipelining(const MachineInstr *MI) const override { + // Only ignore the terminator. + return MI == EndLoop; } - return true; -} -/// Generate code to reduce the loop iteration by one and check if the loop is -/// finished. Return the value/register of the new loop count. this function -/// assumes the nth iteration is peeled first. -unsigned HexagonInstrInfo::reduceLoopCount( - MachineBasicBlock &MBB, MachineBasicBlock &PreHeader, MachineInstr *IndVar, - MachineInstr &Cmp, SmallVectorImpl &Cond, - SmallVectorImpl &PrevInsts, unsigned Iter, - unsigned MaxIter) const { - // We expect a hardware loop currently. This means that IndVar is set - // to null, and the compare is the ENDLOOP instruction. - assert((!IndVar) && isEndLoopN(Cmp.getOpcode()) - && "Expecting a hardware loop"); - MachineFunction *MF = MBB.getParent(); - DebugLoc DL = Cmp.getDebugLoc(); - SmallPtrSet VisitedBBs; - MachineInstr *Loop = findLoopInstr(&MBB, Cmp.getOpcode(), - Cmp.getOperand(0).getMBB(), VisitedBBs); - if (!Loop) - return 0; - // If the loop trip count is a compile-time value, then just change the - // value. - if (Loop->getOpcode() == Hexagon::J2_loop0i || - Loop->getOpcode() == Hexagon::J2_loop1i) { - int64_t Offset = Loop->getOperand(1).getImm(); - if (Offset <= 1) - Loop->eraseFromParent(); - else - Loop->getOperand(1).setImm(Offset - 1); - return Offset - 1; + Optional + createTripCountGreaterCondition(int TC, MachineBasicBlock &MBB, + SmallVectorImpl &Cond) override { + if (Loop->getOpcode() == Hexagon::J2_loop0r) { + Register LoopCount = Loop->getOperand(1).getReg(); + // Check if we're done with the loop. + unsigned Done = TII->createVR(MF, MVT::i1); + MachineInstr *NewCmp = BuildMI(&MBB, Loop->getDebugLoc(), + TII->get(Hexagon::C2_cmpgtui), Done) + .addReg(LoopCount) + .addImm(TC); + Cond.push_back(MachineOperand::CreateImm(Hexagon::J2_jumpf)); + Cond.push_back(NewCmp->getOperand(0)); + return {}; + } + + int64_t TripCount = Loop->getOperand(1).getImm(); + return TripCount > TC; } - // The loop trip count is a run-time value. We generate code to subtract - // one from the trip count, and update the loop instruction. - assert(Loop->getOpcode() == Hexagon::J2_loop0r && "Unexpected instruction"); - Register LoopCount = Loop->getOperand(1).getReg(); - // Check if we're done with the loop. - unsigned LoopEnd = createVR(MF, MVT::i1); - MachineInstr *NewCmp = BuildMI(&MBB, DL, get(Hexagon::C2_cmpgtui), LoopEnd). - addReg(LoopCount).addImm(1); - unsigned NewLoopCount = createVR(MF, MVT::i32); - MachineInstr *NewAdd = BuildMI(&MBB, DL, get(Hexagon::A2_addi), NewLoopCount). - addReg(LoopCount).addImm(-1); - const HexagonRegisterInfo &HRI = *Subtarget.getRegisterInfo(); - // Update the previously generated instructions with the new loop counter. - for (SmallVectorImpl::iterator I = PrevInsts.begin(), - E = PrevInsts.end(); I != E; ++I) - (*I)->substituteRegister(LoopCount, NewLoopCount, 0, HRI); - PrevInsts.clear(); - PrevInsts.push_back(NewCmp); - PrevInsts.push_back(NewAdd); - // Insert the new loop instruction if this is the last time the loop is - // decremented. - if (Iter == MaxIter) - BuildMI(&MBB, DL, get(Hexagon::J2_loop0r)). - addMBB(Loop->getOperand(0).getMBB()).addReg(NewLoopCount); - // Delete the old loop instruction. - if (Iter == 0) - Loop->eraseFromParent(); - Cond.push_back(MachineOperand::CreateImm(Hexagon::J2_jumpf)); - Cond.push_back(NewCmp->getOperand(0)); - return NewLoopCount; + + void setPreheader(MachineBasicBlock *NewPreheader) override { + NewPreheader->splice(NewPreheader->getFirstTerminator(), Loop->getParent(), + Loop); + } + + void adjustTripCount(int TripCountAdjust) override { + // If the loop trip count is a compile-time value, then just change the + // value. + if (Loop->getOpcode() == Hexagon::J2_loop0i || + Loop->getOpcode() == Hexagon::J2_loop1i) { + int64_t TripCount = Loop->getOperand(1).getImm() + TripCountAdjust; + assert(TripCount > 0 && "Can't create an empty or negative loop!"); + Loop->getOperand(1).setImm(TripCount); + return; + } + + // The loop trip count is a run-time value. We generate code to subtract + // one from the trip count, and update the loop instruction. + Register LoopCount = Loop->getOperand(1).getReg(); + Register NewLoopCount = TII->createVR(MF, MVT::i32); + BuildMI(*Loop->getParent(), Loop, Loop->getDebugLoc(), + TII->get(Hexagon::A2_addi), NewLoopCount) + .addReg(LoopCount) + .addImm(TripCountAdjust); + Loop->getOperand(1).setReg(NewLoopCount); + } + + void disposed() override { Loop->eraseFromParent(); } +}; + +std::unique_ptr +HexagonInstrInfo::analyzeLoopForPipelining(MachineBasicBlock *LoopBB) const { + // We really "analyze" only hardware loops right now. + MachineBasicBlock::iterator I = LoopBB->getFirstTerminator(); + + if (I != LoopBB->end() && isEndLoopN(I->getOpcode())) { + SmallPtrSet VisitedBBs; + MachineInstr *LoopInst = findLoopInstr( + LoopBB, I->getOpcode(), I->getOperand(0).getMBB(), VisitedBBs); + if (LoopInst) + return std::make_unique(LoopInst, &*I); + } + return nullptr; } bool HexagonInstrInfo::isProfitableToIfCvt(MachineBasicBlock &MBB, diff --git a/llvm/lib/Target/Hexagon/HexagonInstrInfo.h b/llvm/lib/Target/Hexagon/HexagonInstrInfo.h index e0a999d..e863400 100644 --- a/llvm/lib/Target/Hexagon/HexagonInstrInfo.h +++ b/llvm/lib/Target/Hexagon/HexagonInstrInfo.h @@ -129,21 +129,10 @@ public: const DebugLoc &DL, int *BytesAdded = nullptr) const override; - /// Analyze the loop code, return true if it cannot be understood. Upon - /// success, this function returns false and returns information about the - /// induction variable and compare instruction used at the end. - bool analyzeLoop(MachineLoop &L, MachineInstr *&IndVarInst, - MachineInstr *&CmpInst) const override; - - /// Generate code to reduce the loop iteration by one and check if the loop - /// is finished. Return the value/register of the new loop count. We need - /// this function when peeling off one or more iterations of a loop. This - /// function assumes the nth iteration is peeled first. - unsigned reduceLoopCount(MachineBasicBlock &MBB, MachineBasicBlock &PreHeader, - MachineInstr *IndVar, MachineInstr &Cmp, - SmallVectorImpl &Cond, - SmallVectorImpl &PrevInsts, - unsigned Iter, unsigned MaxIter) const override; + /// Analyze loop L, which must be a single-basic-block loop, and if the + /// conditions can be understood enough produce a PipelinerLoopInfo object. + std::unique_ptr + analyzeLoopForPipelining(MachineBasicBlock *LoopBB) const override; /// Return true if it's profitable to predicate /// instructions with accumulated instruction latency of "NumCycles" diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp index e4540d6..455fbdf 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp +++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp @@ -3930,21 +3930,92 @@ bool PPCInstrInfo::isBDNZ(unsigned Opcode) const { return (Opcode == (Subtarget.isPPC64() ? PPC::BDNZ8 : PPC::BDNZ)); } -bool PPCInstrInfo::analyzeLoop(MachineLoop &L, MachineInstr *&IndVarInst, - MachineInstr *&CmpInst) const { - MachineBasicBlock *LoopEnd = L.getBottomBlock(); - MachineBasicBlock::iterator I = LoopEnd->getFirstTerminator(); - // We really "analyze" only CTR loops right now. - if (I != LoopEnd->end() && isBDNZ(I->getOpcode())) { - IndVarInst = nullptr; - CmpInst = &*I; - return false; +class PPCPipelinerLoopInfo : public TargetInstrInfo::PipelinerLoopInfo { + MachineInstr *Loop, *EndLoop, *LoopCount; + MachineFunction *MF; + const TargetInstrInfo *TII; + +public: + PPCPipelinerLoopInfo(MachineInstr *Loop, MachineInstr *EndLoop, + MachineInstr *LoopCount) + : Loop(Loop), EndLoop(EndLoop), LoopCount(LoopCount), + MF(Loop->getParent()->getParent()), + TII(MF->getSubtarget().getInstrInfo()) {} + + bool shouldIgnoreForPipelining(const MachineInstr *MI) const override { + // Only ignore the terminator. + return MI == EndLoop; + } + + Optional + createTripCountGreaterCondition(int TC, MachineBasicBlock &MBB, + SmallVectorImpl &Cond) override { + bool IsConstantTripCount = + LoopCount->getOpcode() == PPC::LI8 || LoopCount->getOpcode() == PPC::LI; + if (!IsConstantTripCount) { + // Since BDZ/BDZ8 that we will insert will also decrease the ctr by 1, + // so we don't need to generate any thing here. + Cond.push_back(MachineOperand::CreateImm(0)); + Cond.push_back(MachineOperand::CreateReg( + MF->getSubtarget().isPPC64() ? PPC::CTR8 : PPC::CTR, + true)); + return {}; + } + + int64_t TripCount = LoopCount->getOperand(1).getImm(); + return TripCount > TC; } - return true; + + void setPreheader(MachineBasicBlock *NewPreheader) override { + // Do nothing. We want the LOOP setup instruction to stay in the *old* + // preheader, so we can use BDZ in the prologs to adapt the loop trip count. + } + + void adjustTripCount(int TripCountAdjust) override { + // If the loop trip count is a compile-time value, then just change the + // value. + if (LoopCount->getOpcode() == PPC::LI8 || + LoopCount->getOpcode() == PPC::LI) { + int64_t TripCount = LoopCount->getOperand(1).getImm() + TripCountAdjust; + LoopCount->getOperand(1).setImm(TripCount); + return; + } + + // Since BDZ/BDZ8 that we will insert will also decrease the ctr by 1, + // so we don't need to generate any thing here. + } + + void disposed() override { + Loop->eraseFromParent(); + // Ensure the loop setup instruction is deleted too. + LoopCount->eraseFromParent(); + } +}; + +std::unique_ptr +PPCInstrInfo::analyzeLoopForPipelining(MachineBasicBlock *LoopBB) const { + // We really "analyze" only hardware loops right now. + MachineBasicBlock::iterator I = LoopBB->getFirstTerminator(); + MachineBasicBlock *Preheader = *LoopBB->pred_begin(); + if (Preheader == LoopBB) + Preheader = *std::next(LoopBB->pred_begin()); + MachineFunction *MF = Preheader->getParent(); + + if (I != LoopBB->end() && isBDNZ(I->getOpcode())) { + SmallPtrSet Visited; + if (MachineInstr *LoopInst = findLoopInstr(*Preheader, Visited)) { + Register LoopCountReg = LoopInst->getOperand(0).getReg(); + MachineRegisterInfo &MRI = MF->getRegInfo(); + MachineInstr *LoopCount = MRI.getUniqueVRegDef(LoopCountReg); + return std::make_unique(LoopInst, &*I, LoopCount); + } + } + return nullptr; } -MachineInstr * -PPCInstrInfo::findLoopInstr(MachineBasicBlock &PreHeader) const { +MachineInstr *PPCInstrInfo::findLoopInstr( + MachineBasicBlock &PreHeader, + SmallPtrSet &Visited) const { unsigned LOOPi = (Subtarget.isPPC64() ? PPC::MTCTR8loop : PPC::MTCTRloop); @@ -3955,50 +4026,6 @@ PPCInstrInfo::findLoopInstr(MachineBasicBlock &PreHeader) const { return nullptr; } -unsigned PPCInstrInfo::reduceLoopCount( - MachineBasicBlock &MBB, MachineBasicBlock &PreHeader, MachineInstr *IndVar, - MachineInstr &Cmp, SmallVectorImpl &Cond, - SmallVectorImpl &PrevInsts, unsigned Iter, - unsigned MaxIter) const { - // We expect a hardware loop currently. This means that IndVar is set - // to null, and the compare is the ENDLOOP instruction. - assert((!IndVar) && isBDNZ(Cmp.getOpcode()) && "Expecting a CTR loop"); - MachineFunction *MF = MBB.getParent(); - DebugLoc DL = Cmp.getDebugLoc(); - MachineInstr *Loop = findLoopInstr(PreHeader); - if (!Loop) - return 0; - Register LoopCountReg = Loop->getOperand(0).getReg(); - MachineRegisterInfo &MRI = MF->getRegInfo(); - MachineInstr *LoopCount = MRI.getUniqueVRegDef(LoopCountReg); - - if (!LoopCount) - return 0; - // If the loop trip count is a compile-time value, then just change the - // value. - if (LoopCount->getOpcode() == PPC::LI8 || LoopCount->getOpcode() == PPC::LI) { - int64_t Offset = LoopCount->getOperand(1).getImm(); - if (Offset <= 1) { - LoopCount->eraseFromParent(); - Loop->eraseFromParent(); - return 0; - } - LoopCount->getOperand(1).setImm(Offset - 1); - return Offset - 1; - } - - // The loop trip count is a run-time value. - // We need to subtract one from the trip count, - // and insert branch later to check if we're done with the loop. - - // Since BDZ/BDZ8 that we will insert will also decrease the ctr by 1, - // so we don't need to generate any thing here. - Cond.push_back(MachineOperand::CreateImm(0)); - Cond.push_back(MachineOperand::CreateReg( - Subtarget.isPPC64() ? PPC::CTR8 : PPC::CTR, true)); - return LoopCountReg; -} - // Return true if get the base operand, byte offset of an instruction and the // memory width. Width is the size of memory that is being loaded/stored. bool PPCInstrInfo::getMemOperandWithOffsetWidth( diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.h b/llvm/lib/Target/PowerPC/PPCInstrInfo.h index 5150650..13eecdf 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrInfo.h +++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.h @@ -486,26 +486,14 @@ public: /// On PPC, we have two instructions used to set-up the hardware loop /// (MTCTRloop, MTCTR8loop) with corresponding endloop (BDNZ, BDNZ8) /// instructions to indicate the end of a loop. - MachineInstr *findLoopInstr(MachineBasicBlock &PreHeader) const; - - /// Analyze the loop code to find the loop induction variable and compare used - /// to compute the number of iterations. Currently, we analyze loop that are - /// controlled using hardware loops. In this case, the induction variable - /// instruction is null. For all other cases, this function returns true, - /// which means we're unable to analyze it. \p IndVarInst and \p CmpInst will - /// return new values when we can analyze the readonly loop \p L, otherwise, - /// nothing got changed - bool analyzeLoop(MachineLoop &L, MachineInstr *&IndVarInst, - MachineInstr *&CmpInst) const override; - /// Generate code to reduce the loop iteration by one and check if the loop - /// is finished. Return the value/register of the new loop count. We need - /// this function when peeling off one or more iterations of a loop. This - /// function assumes the last iteration is peeled first. - unsigned reduceLoopCount(MachineBasicBlock &MBB, MachineBasicBlock &PreHeader, - MachineInstr *IndVar, MachineInstr &Cmp, - SmallVectorImpl &Cond, - SmallVectorImpl &PrevInsts, - unsigned Iter, unsigned MaxIter) const override; + MachineInstr * + findLoopInstr(MachineBasicBlock &PreHeader, + SmallPtrSet &Visited) const; + + /// Analyze loop L, which must be a single-basic-block loop, and if the + /// conditions can be understood enough produce a PipelinerLoopInfo object. + std::unique_ptr + analyzeLoopForPipelining(MachineBasicBlock *LoopBB) const override; }; } diff --git a/llvm/test/CodeGen/Hexagon/swp-epilog-phi7.ll b/llvm/test/CodeGen/Hexagon/swp-epilog-phi7.ll index 8cad174..89a04d9 100644 --- a/llvm/test/CodeGen/Hexagon/swp-epilog-phi7.ll +++ b/llvm/test/CodeGen/Hexagon/swp-epilog-phi7.ll @@ -7,8 +7,8 @@ ; CHECK: if ({{.*}}) jump ; CHECK: [[VREG:v([0-9]+)]]{{.*}} = {{.*}}vmem(r{{[0-9]+}}++#1) -; CHECK: if ({{.*}}) {{jump|jump:nt}} [[EPLOG1:(.*)]] -; CHECK: if ({{.*}}) {{jump|jump:nt}} [[EPLOG:(.*)]] +; CHECK: if ({{.*}}) {{jump|jump:nt|jump:t}} [[EPLOG1:(.*)]] +; CHECK: if ({{.*}}) {{jump|jump:nt|jump:t}} [[EPLOG:(.*)]] ; CHECK: [[EPLOG]]: ; CHECK: [[VREG1:v([0-9]+)]] = [[VREG]] ; CHECK: [[VREG]] = v{{[0-9]+}} -- 2.7.4