From 408e300933f2c5e8aaffb2539e47b89a2112b81b Mon Sep 17 00:00:00 2001 From: Krzysztof Parzyszek Date: Fri, 15 Jul 2016 21:34:02 +0000 Subject: [PATCH] [Hexagon] Handle instruction latency for 0 or 2 cycles The Hexagon schedulers need to handle instructions with a latency of 0 or 2 more accurately. The problem, in v60, is that a dependence between two instructions with a 2 cycle latency can use a .cur version of the source to achieve a 0 cycle latency when the use is in the same packet. Any othe use, must be at least 2 packets later, or a stall occurs. In other words, the compiler does not want to schedule the dependent instructions 1 cycle later. To achieve this, the latency adjustment code allows only a single dependence to have a zero latency. All other instructions have the other value, which is typically 2 cycles. We use a heuristic to determine which instruction gets the 0 latency. The Hexagon machine scheduler was also changed to increase the cost associated with 0 latency dependences than can be scheduled in the same packet. Patch by Brendon Cahoon. llvm-svn: 275625 --- llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp | 32 +++++ llvm/lib/Target/Hexagon/HexagonInstrInfo.h | 4 + .../lib/Target/Hexagon/HexagonMachineScheduler.cpp | 22 +++ llvm/lib/Target/Hexagon/HexagonSubtarget.cpp | 159 +++++++++++++++++++++ llvm/lib/Target/Hexagon/HexagonSubtarget.h | 10 ++ 5 files changed, 227 insertions(+) diff --git a/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp b/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp index 72cffe2..fe9f97d 100644 --- a/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp +++ b/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp @@ -2515,6 +2515,28 @@ bool HexagonInstrInfo::isTC4x(const MachineInstr *MI) const { } +// Schedule this ASAP. +bool HexagonInstrInfo::isToBeScheduledASAP(const MachineInstr *MI1, + const MachineInstr *MI2) const { + if (!MI1 || !MI2) + return false; + if (mayBeCurLoad(MI1)) { + // if (result of SU is used in Next) return true; + unsigned DstReg = MI1->getOperand(0).getReg(); + int N = MI2->getNumOperands(); + for (int I = 0; I < N; I++) + if (MI2->getOperand(I).isReg() && DstReg == MI2->getOperand(I).getReg()) + return true; + } + if (mayBeNewStore(MI2)) + if (MI2->getOpcode() == Hexagon::V6_vS32b_pi) + if (MI1->getOperand(0).isReg() && MI2->getOperand(3).isReg() && + MI1->getOperand(0).getReg() == MI2->getOperand(3).getReg()) + return true; + return false; +} + + bool HexagonInstrInfo::isV60VectorInstruction(const MachineInstr *MI) const { if (!MI) return false; @@ -2839,6 +2861,16 @@ bool HexagonInstrInfo::isZeroExtendingLoad(const MachineInstr &MI) const { } +// Add latency to instruction. +bool HexagonInstrInfo::addLatencyToSchedule(const MachineInstr *MI1, + const MachineInstr *MI2) const { + if (isV60VectorInstruction(MI1) && isV60VectorInstruction(MI2)) + if (!isVecUsableNextPacket(MI1, MI2)) + return true; + return false; +} + + /// \brief Can these instructions execute at the same time in a bundle. bool HexagonInstrInfo::canExecuteInBundle(const MachineInstr *First, const MachineInstr *Second) const { diff --git a/llvm/lib/Target/Hexagon/HexagonInstrInfo.h b/llvm/lib/Target/Hexagon/HexagonInstrInfo.h index 74cbc62..66b6883 100644 --- a/llvm/lib/Target/Hexagon/HexagonInstrInfo.h +++ b/llvm/lib/Target/Hexagon/HexagonInstrInfo.h @@ -314,6 +314,8 @@ public: bool isTC2(const MachineInstr *MI) const; bool isTC2Early(const MachineInstr *MI) const; bool isTC4x(const MachineInstr *MI) const; + bool isToBeScheduledASAP(const MachineInstr *MI1, + const MachineInstr *MI2) const; bool isV60VectorInstruction(const MachineInstr *MI) const; bool isValidAutoIncImm(const EVT VT, const int Offset) const; bool isValidOffset(unsigned Opcode, int Offset, bool Extend = true) const; @@ -323,6 +325,8 @@ public: const MachineInstr *ConsMI) const; bool isZeroExtendingLoad(const MachineInstr &MI) const; + bool addLatencyToSchedule(const MachineInstr *MI1, + const MachineInstr *MI2) const; bool canExecuteInBundle(const MachineInstr *First, const MachineInstr *Second) const; bool hasEHLabel(const MachineBasicBlock *B) const; diff --git a/llvm/lib/Target/Hexagon/HexagonMachineScheduler.cpp b/llvm/lib/Target/Hexagon/HexagonMachineScheduler.cpp index 2ff1b53..d1f0013 100644 --- a/llvm/lib/Target/Hexagon/HexagonMachineScheduler.cpp +++ b/llvm/lib/Target/Hexagon/HexagonMachineScheduler.cpp @@ -609,6 +609,28 @@ int ConvergingVLIWScheduler::SchedulingCost(ReadyQueue &Q, SUnit *SU, auto &QST = DAG->MF.getSubtarget(); auto &QII = *QST.getInstrInfo(); + // Give preference to a zero latency instruction if the dependent + // instruction is in the current packet. + if (Q.getID() == TopQID) { + for (const SDep &PI : SU->Preds) { + if (!PI.getSUnit()->getInstr()->isPseudo() && PI.isAssignedRegDep() && + PI.getLatency() == 0 && + Top.ResourceModel->isInPacket(PI.getSUnit())) { + ResCount += PriorityTwo; + DEBUG(if (verbose) dbgs() << "Z|"); + } + } + } else { + for (const SDep &SI : SU->Succs) { + if (!SI.getSUnit()->getInstr()->isPseudo() && SI.isAssignedRegDep() && + SI.getLatency() == 0 && + Bot.ResourceModel->isInPacket(SI.getSUnit())) { + ResCount += PriorityTwo; + DEBUG(if (verbose) dbgs() << "Z|"); + } + } + } + // Give less preference to an instruction that will cause a stall with // an instruction in the previous packet. if (QII.isV60VectorInstruction(Instr)) { diff --git a/llvm/lib/Target/Hexagon/HexagonSubtarget.cpp b/llvm/lib/Target/Hexagon/HexagonSubtarget.cpp index 0771cbebb..8d0571e 100644 --- a/llvm/lib/Target/Hexagon/HexagonSubtarget.cpp +++ b/llvm/lib/Target/Hexagon/HexagonSubtarget.cpp @@ -51,6 +51,16 @@ static cl::opt EnableHexagonHVX("enable-hexagon-hvx", cl::Hidden, cl::ZeroOrMore, cl::init(false), cl::desc("Enable Hexagon Vector eXtensions")); +static cl::opt EnableTCLatencySched("enable-tc-latency-sched", + cl::Hidden, cl::ZeroOrMore, cl::init(false)); + +static cl::opt EnableDotCurSched("enable-cur-sched", + cl::Hidden, cl::ZeroOrMore, cl::init(true), + cl::desc("Enable the scheduler to generate .cur")); + +static cl::opt EnableVecFrwdSched("enable-evec-frwd-sched", + cl::Hidden, cl::ZeroOrMore, cl::init(true)); + static cl::opt DisableHexagonMISched("disable-hexagon-misched", cl::Hidden, cl::ZeroOrMore, cl::init(false), cl::desc("Disable Hexagon MI Scheduling")); @@ -185,3 +195,152 @@ bool HexagonSubtarget::enableSubRegLiveness() const { return EnableSubregLiveness; } +// This helper function is responsible for increasing the latency only. +void HexagonSubtarget::updateLatency(MachineInstr *SrcInst, + MachineInstr *DstInst, SDep &Dep) const { + if (!hasV60TOps()) + return; + + auto &QII = static_cast(*getInstrInfo()); + + if (EnableVecFrwdSched && QII.addLatencyToSchedule(SrcInst, DstInst)) { + // Vec frwd scheduling. + Dep.setLatency(Dep.getLatency() + 1); + } else if (useBSBScheduling() && + QII.isLateInstrFeedsEarlyInstr(SrcInst, DstInst)) { + // BSB scheduling. + Dep.setLatency(Dep.getLatency() + 1); + } else if (EnableTCLatencySched) { + // TClass latency scheduling. + // Check if SrcInst produces in 2C an operand of DstInst taken in stage 2B. + if (QII.isTC1(SrcInst) || QII.isTC2(SrcInst)) + if (!QII.isTC1(DstInst) && !QII.isTC2(DstInst)) + Dep.setLatency(Dep.getLatency() + 1); + } +} + +// Return true if these are the best two instructions to schedule +// together with a zero latency. Only one dependence should have a zero +// latency. If there are multiple choices, choose the best, and change +// ther others, if needed. +bool HexagonSubtarget::isBestZeroLatency(SUnit *Src, SUnit *Dst, + const HexagonInstrInfo *TII) const { + MachineInstr *SrcInst = Src->getInstr(); + MachineInstr *DstInst = Dst->getInstr(); + // Check if the instructions can be scheduled together. + assert((TII->isToBeScheduledASAP(SrcInst, DstInst) || + TII->canExecuteInBundle(SrcInst, DstInst)) && + "Unable to schedule instructions together."); + + if (SrcInst->isPHI() || DstInst->isPHI()) + return false; + + // Look for the best candidate to schedule together. If there are + // multiple choices, then the best candidate is the one with the + // greatest height, i.e., longest critical path. + SUnit *Best = Dst; + SUnit *PrevBest = nullptr; + for (const SDep &SI : Src->Succs) { + if (!SI.isAssignedRegDep()) + continue; + if (SI.getLatency() == 0) + PrevBest = SI.getSUnit(); + MachineInstr *Inst = SI.getSUnit()->getInstr(); + if (!TII->isToBeScheduledASAP(SrcInst, Inst) || + !TII->canExecuteInBundle(SrcInst, Inst)) + continue; + if (SI.getSUnit()->getHeight() > Best->getHeight()) + Best = SI.getSUnit(); + } + + // Reassign the latency for the previous best, which requires setting + // the dependence edge in both directions. + if (Best != PrevBest) { + for (SDep &SI : Src->Succs) { + if (SI.getSUnit() != PrevBest) + continue; + SI.setLatency(1); + updateLatency(SrcInst, DstInst, SI); + // Update the latency of the predecessor edge too. + for (SDep &PI : PrevBest->Preds) { + if (PI.getSUnit() != Src || !PI.isAssignedRegDep()) + continue; + PI.setLatency(1); + updateLatency(SrcInst, DstInst, PI); + } + } + } + + return Best == Dst; +} + +// Update the latency of a Phi when the Phi bridges two instructions that +// require a multi-cycle latency. +void HexagonSubtarget::changePhiLatency(MachineInstr *SrcInst, SUnit *Dst, + SDep &Dep) const { + if (!SrcInst->isPHI() || Dst->NumPreds == 0 || Dep.getLatency() != 0) + return; + + for (const SDep &PI : Dst->Preds) { + if (PI.getLatency() != 0) + continue; + Dep.setLatency(2); + break; + } +} + +/// \brief Perform target specific adjustments to the latency of a schedule +/// dependency. +void HexagonSubtarget::adjustSchedDependency(SUnit *Src, SUnit *Dst, + SDep &Dep) const { + MachineInstr *SrcInst = Src->getInstr(); + MachineInstr *DstInst = Dst->getInstr(); + if (!Src->isInstr() || !Dst->isInstr()) + return; + + const HexagonInstrInfo *QII = static_cast(getInstrInfo()); + + // Instructions with .new operands have zero latency. + if (QII->canExecuteInBundle(SrcInst, DstInst) && + isBestZeroLatency(Src, Dst, QII)) { + Dep.setLatency(0); + return; + } + + if (!hasV60TOps()) + return; + + // Don't adjust the latency of post-increment part of the instruction. + if (QII->isPostIncrement(SrcInst) && Dep.isAssignedRegDep()) { + if (SrcInst->mayStore()) + return; + if (Dep.getReg() != SrcInst->getOperand(0).getReg()) + return; + } else if (QII->isPostIncrement(DstInst) && Dep.getKind() == SDep::Anti) { + if (DstInst->mayStore()) + return; + if (Dep.getReg() != DstInst->getOperand(0).getReg()) + return; + } else if (QII->isPostIncrement(DstInst) && DstInst->mayStore() && + Dep.isAssignedRegDep()) { + MachineOperand &Op = DstInst->getOperand(DstInst->getNumOperands() - 1); + if (Op.isReg() && Dep.getReg() != Op.getReg()) + return; + } + + // Check if we need to change any the latency values when Phis are added. + if (useBSBScheduling() && SrcInst->isPHI()) { + changePhiLatency(SrcInst, Dst, Dep); + return; + } + + // Try to schedule uses near definitions to generate .cur. + if (EnableDotCurSched && QII->isToBeScheduledASAP(SrcInst, DstInst) && + isBestZeroLatency(Src, Dst, QII)) { + Dep.setLatency(0); + return; + } + + updateLatency(SrcInst, DstInst, Dep); +} + diff --git a/llvm/lib/Target/Hexagon/HexagonSubtarget.h b/llvm/lib/Target/Hexagon/HexagonSubtarget.h index 1922df1..143f1d3 100644 --- a/llvm/lib/Target/Hexagon/HexagonSubtarget.h +++ b/llvm/lib/Target/Hexagon/HexagonSubtarget.h @@ -127,6 +127,16 @@ public: void getPostRAMutations( std::vector> &Mutations) const override; + + /// \brief Perform target specific adjustments to the latency of a schedule + /// dependency. + void adjustSchedDependency(SUnit *def, SUnit *use, SDep& dep) const override; + +private: + // Helper function responsible for increasing the latency only. + void updateLatency(MachineInstr *SrcInst, MachineInstr *DstInst, SDep &Dep) const; + bool isBestZeroLatency(SUnit *Src, SUnit *Dst, const HexagonInstrInfo *TII) const; + void changePhiLatency(MachineInstr *SrcInst, SUnit *Dst, SDep &Dep) const; }; } // end namespace llvm -- 2.7.4