From b6c7fce67add2769cb5f3e07d4a70ae09dc12836 Mon Sep 17 00:00:00 2001 From: James Molloy Date: Mon, 9 Sep 2019 13:17:55 +0000 Subject: [PATCH] [DFAPacketizer] Reapply: Track resources for packetized instructions Reapply with fix to reduce resources required by the compiler - use unsigned[2] instead of std::pair. This causes clang and gcc to compile the generated file multiple times faster, and hopefully will reduce the resource requirements on Visual Studio also. This fix is a little ugly but it's clearly the same issue the previous author of DFAPacketizer faced (the previous tables use unsigned[2] rather uglily too). This patch allows the DFAPacketizer to be queried after a packet is formed to work out which resources were allocated to the packetized instructions. This is particularly important for targets that do their own bundle packing - it's not sufficient to know simply that instructions can share a packet; which slots are used is also required for encoding. This extends the emitter to emit a side-table containing resource usage diffs for each state transition. The packetizer maintains a set of all possible resource states in its current state. After packetization is complete, all remaining resource states are possible packetization strategies. The sidetable is only ~500K for Hexagon, but the extra tracking is disabled by default (most uses of the packetizer like MachinePipeliner don't care and don't need the extra maintained state). Differential Revision: https://reviews.llvm.org/D66936 llvm-svn: 371399 --- llvm/include/llvm/CodeGen/DFAPacketizer.h | 44 ++++++- llvm/lib/CodeGen/DFAPacketizer.cpp | 65 ++++++++-- llvm/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp | 11 ++ llvm/test/CodeGen/Hexagon/packetizer-resources.ll | 29 +++++ llvm/utils/TableGen/DFAPacketizerEmitter.cpp | 140 ++++++++++++++-------- 5 files changed, 229 insertions(+), 60 deletions(-) create mode 100644 llvm/test/CodeGen/Hexagon/packetizer-resources.ll diff --git a/llvm/include/llvm/CodeGen/DFAPacketizer.h b/llvm/include/llvm/CodeGen/DFAPacketizer.h index cf58ee0..dad19fe 100644 --- a/llvm/include/llvm/CodeGen/DFAPacketizer.h +++ b/llvm/include/llvm/CodeGen/DFAPacketizer.h @@ -82,20 +82,53 @@ private: int CurrentState = 0; const DFAStateInput (*DFAStateInputTable)[2]; const unsigned *DFAStateEntryTable; + const unsigned (*DFAResourceTransitionTable)[2]; + const unsigned *DFAResourceTransitionEntryTable; // CachedTable is a map from to ToState. DenseMap CachedTable; + // CachedResourceTransitions is a map from to a list of + // resource transitions. + DenseMap> + CachedResourceTransitions; // Read the DFA transition table and update CachedTable. void ReadTable(unsigned state); + bool TrackResources = false; + // State for the current packet. Every entry is a possible packing of the + // bundle, indexed by cumulative resource state. Each entry is a list of the + // cumulative resource states after packing each instruction. For example if + // we pack I0: [0x4] and I1: [0x2] we will end up with: + // ResourceStates[0x6] = [0x4, 0x6] + DenseMap> ResourceStates; + public: DFAPacketizer(const InstrItineraryData *I, const DFAStateInput (*SIT)[2], - const unsigned *SET); + const unsigned *SET, + const unsigned (*RTT)[2] = nullptr, + const unsigned *RTET = nullptr); // Reset the current state to make all resources available. void clearResources() { CurrentState = 0; + ResourceStates.clear(); + ResourceStates[0] = {}; + } + + // Set whether this packetizer should track not just whether instructions + // can be packetized, but also which functional units each instruction ends up + // using after packetization. + void setTrackResources(bool Track) { + if (Track != TrackResources) { + TrackResources = Track; + if (Track) { + CachedTable.clear(); + assert(DFAResourceTransitionEntryTable); + assert(DFAResourceTransitionTable); + } + } + assert(CurrentState == 0 && "Can only change TrackResources on an empty packetizer!"); } // Return the DFAInput for an instruction class. @@ -120,6 +153,15 @@ public: // current state to reflect that change. void reserveResources(MachineInstr &MI); + // Return the resources used by the InstIdx'th instruction added to this + // packet. The resources are returned as a bitvector of functional units. + // + // Note that a bundle may be packed in multiple valid ways. This function + // returns one arbitary valid packing. + // + // Requires setTrackResources(true) to have been called. + unsigned getUsedResources(unsigned InstIdx); + const InstrItineraryData *getInstrItins() const { return InstrItins; } }; diff --git a/llvm/lib/CodeGen/DFAPacketizer.cpp b/llvm/lib/CodeGen/DFAPacketizer.cpp index b99be5d..500ae68 100644 --- a/llvm/lib/CodeGen/DFAPacketizer.cpp +++ b/llvm/lib/CodeGen/DFAPacketizer.cpp @@ -23,6 +23,7 @@ //===----------------------------------------------------------------------===// #include "llvm/CodeGen/DFAPacketizer.h" +#include "llvm/ADT/StringExtras.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBundle.h" @@ -72,9 +73,11 @@ static DFAInput getDFAInsnInput(const std::vector &InsnClass) { // -------------------------------------------------------------------- DFAPacketizer::DFAPacketizer(const InstrItineraryData *I, - const DFAStateInput (*SIT)[2], - const unsigned *SET): - InstrItins(I), DFAStateInputTable(SIT), DFAStateEntryTable(SET) { + const DFAStateInput (*SIT)[2], const unsigned *SET, + const unsigned (*RTT)[2], + const unsigned *RTET) + : InstrItins(I), DFAStateInputTable(SIT), DFAStateEntryTable(SET), + DFAResourceTransitionTable(RTT), DFAResourceTransitionEntryTable(RTET) { // Make sure DFA types are large enough for the number of terms & resources. static_assert((DFA_MAX_RESTERMS * DFA_MAX_RESOURCES) <= (8 * sizeof(DFAInput)), @@ -82,6 +85,7 @@ DFAPacketizer::DFAPacketizer(const InstrItineraryData *I, static_assert( (DFA_MAX_RESTERMS * DFA_MAX_RESOURCES) <= (8 * sizeof(DFAStateInput)), "(DFA_MAX_RESTERMS * DFA_MAX_RESOURCES) too big for DFAStateInput"); + clearResources(); } // Read the DFA transition table and update CachedTable. @@ -93,16 +97,26 @@ DFAPacketizer::DFAPacketizer(const InstrItineraryData *I, // for the ith state // void DFAPacketizer::ReadTable(unsigned int state) { - unsigned ThisState = DFAStateEntryTable[state]; - unsigned NextStateInTable = DFAStateEntryTable[state+1]; + unsigned ThisStateIdx = DFAStateEntryTable[state]; + unsigned NextStateIdxInTable = DFAStateEntryTable[state + 1]; // Early exit in case CachedTable has already contains this // state's transitions. - if (CachedTable.count(UnsignPair(state, DFAStateInputTable[ThisState][0]))) + if (CachedTable.count(UnsignPair(state, DFAStateInputTable[ThisStateIdx][0]))) return; - for (unsigned i = ThisState; i < NextStateInTable; i++) - CachedTable[UnsignPair(state, DFAStateInputTable[i][0])] = - DFAStateInputTable[i][1]; + for (unsigned TransitionIdx = ThisStateIdx; + TransitionIdx < NextStateIdxInTable; TransitionIdx++) { + auto TransitionPair = + UnsignPair(state, DFAStateInputTable[TransitionIdx][0]); + CachedTable[TransitionPair] = DFAStateInputTable[TransitionIdx][1]; + + if (TrackResources) { + unsigned I = DFAResourceTransitionEntryTable[TransitionIdx]; + unsigned E = DFAResourceTransitionEntryTable[TransitionIdx + 1]; + CachedResourceTransitions[TransitionPair] = makeArrayRef( + &DFAResourceTransitionTable[I], &DFAResourceTransitionTable[E]); + } + } } // Return the DFAInput for an instruction class. @@ -141,6 +155,16 @@ void DFAPacketizer::reserveResources(const MCInstrDesc *MID) { DFAInput InsnInput = getInsnInput(InsnClass); UnsignPair StateTrans = UnsignPair(CurrentState, InsnInput); ReadTable(CurrentState); + + if (TrackResources) { + DenseMap> NewResourceStates; + for (const auto &KV : CachedResourceTransitions[StateTrans]) { + assert(ResourceStates.count(KV[0])); + NewResourceStates[KV[1]] = ResourceStates[KV[0]]; + NewResourceStates[KV[1]].push_back(KV[1]); + } + ResourceStates = NewResourceStates; + } assert(CachedTable.count(StateTrans) != 0); CurrentState = CachedTable[StateTrans]; } @@ -159,6 +183,21 @@ void DFAPacketizer::reserveResources(MachineInstr &MI) { reserveResources(&MID); } +unsigned DFAPacketizer::getUsedResources(unsigned InstIdx) { + assert(TrackResources && "getUsedResources requires resource tracking!"); + // Assert that there is at least one example of a valid bundle format. + assert(!ResourceStates.empty() && "Invalid bundle!"); + SmallVectorImpl &RS = ResourceStates.begin()->second; + + // RS stores the cumulative resources used up to and including the I'th + // instruction. The 0th instruction is the base case. + if (InstIdx == 0) + return RS[0]; + // Return the difference between the cumulative resources used by InstIdx and + // its predecessor. + return RS[InstIdx] ^ RS[InstIdx - 1]; +} + namespace llvm { // This class extends ScheduleDAGInstrs and overrides the schedule method @@ -210,6 +249,7 @@ VLIWPacketizerList::VLIWPacketizerList(MachineFunction &mf, MachineLoopInfo &mli, AliasAnalysis *aa) : MF(mf), TII(mf.getSubtarget().getInstrInfo()), AA(aa) { ResourceTracker = TII->CreateTargetScheduleState(MF.getSubtarget()); + ResourceTracker->setTrackResources(true); VLIWScheduler = new DefaultVLIWScheduler(MF, mli, AA); } @@ -224,8 +264,11 @@ void VLIWPacketizerList::endPacket(MachineBasicBlock *MBB, LLVM_DEBUG({ if (!CurrentPacketMIs.empty()) { dbgs() << "Finalizing packet:\n"; - for (MachineInstr *MI : CurrentPacketMIs) - dbgs() << " * " << *MI; + unsigned Idx = 0; + for (MachineInstr *MI : CurrentPacketMIs) { + unsigned R = ResourceTracker->getUsedResources(Idx++); + dbgs() << " * [res:0x" << utohexstr(R) << "] " << *MI; + } } }); if (CurrentPacketMIs.size() > 1) { diff --git a/llvm/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp b/llvm/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp index c89c0b3..7024daf 100644 --- a/llvm/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp +++ b/llvm/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp @@ -24,6 +24,7 @@ #include "llvm/ADT/BitVector.h" #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/StringExtras.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineBranchProbabilityInfo.h" @@ -1763,6 +1764,16 @@ HexagonPacketizerList::addToPacket(MachineInstr &MI) { void HexagonPacketizerList::endPacket(MachineBasicBlock *MBB, MachineBasicBlock::iterator EndMI) { // Replace VLIWPacketizerList::endPacket(MBB, EndMI). + LLVM_DEBUG({ + if (!CurrentPacketMIs.empty()) { + dbgs() << "Finalizing packet:\n"; + unsigned Idx = 0; + for (MachineInstr *MI : CurrentPacketMIs) { + unsigned R = ResourceTracker->getUsedResources(Idx++); + dbgs() << " * [res:0x" << utohexstr(R) << "] " << *MI; + } + } + }); bool memShufDisabled = getmemShufDisabled(); if (memShufDisabled && !foundLSInPacket()) { diff --git a/llvm/test/CodeGen/Hexagon/packetizer-resources.ll b/llvm/test/CodeGen/Hexagon/packetizer-resources.ll new file mode 100644 index 0000000..9bd0cb1 --- /dev/null +++ b/llvm/test/CodeGen/Hexagon/packetizer-resources.ll @@ -0,0 +1,29 @@ +; RUN: llc -O2 -march=hexagon < %s -debug-only=packets 2>&1 | FileCheck %s +; REQUIRES: asserts + +; CHECK: Finalizing packet: +; CHECK-NEXT: * [res:0x8] renamable $r1 = S2_vsplatrb renamable $r0 +; CHECK-NEXT: * [res:0x4] renamable $d1 = S2_vsplatrh killed renamable $r0 + +target triple = "hexagon" + +; Function Attrs: nounwind readnone +define i64 @f0(i64 %a0) #0 { +b0: + %v0 = trunc i64 %a0 to i32 + %v1 = and i32 %v0, 65535 + %v2 = tail call i64 @llvm.hexagon.S2.vsplatrh(i32 %v1) + %v3 = and i32 %v0, 255 + %v4 = tail call i32 @llvm.hexagon.S2.vsplatrb(i32 %v3) + %v5 = sext i32 %v4 to i64 + %v6 = add nsw i64 %v5, %v2 + ret i64 %v6 +} + +; Function Attrs: nounwind readnone +declare i64 @llvm.hexagon.S2.vsplatrh(i32) #0 + +; Function Attrs: nounwind readnone +declare i32 @llvm.hexagon.S2.vsplatrb(i32) #0 + +attributes #0 = { nounwind readnone } diff --git a/llvm/utils/TableGen/DFAPacketizerEmitter.cpp b/llvm/utils/TableGen/DFAPacketizerEmitter.cpp index 19a6580..8ac187e 100644 --- a/llvm/utils/TableGen/DFAPacketizerEmitter.cpp +++ b/llvm/utils/TableGen/DFAPacketizerEmitter.cpp @@ -192,7 +192,14 @@ class State { const int stateNum; mutable bool isInitial; mutable std::set stateInfo; - typedef std::map, const State *> TransitionMap; + + struct TransitionInfo { + // Maps from a resource bitmask in this state to the equivalent resource + // bitmap in the transitioned-to state. This is a 1-to-N mapping. + std::vector> ResourceTransitions; + const State *S; + }; + using TransitionMap = std::map, TransitionInfo>; mutable TransitionMap Transitions; State(); @@ -221,9 +228,14 @@ class State { // PossibleStates is the set of valid resource states that ensue from valid // transitions. // - void AddInsnClass(std::vector &InsnClass, - std::map &ComboBitToBitsMap, - std::set &PossibleStates) const; + // TransitionInfo maps from a resource bitmask B in this state to a resource + // bitmask B' in PossibleStates. This is a one-to-many (or none) mapping. + // + void AddInsnClass( + std::vector &InsnClass, + std::map &ComboBitToBitsMap, + std::set &PossibleStates, + std::vector> &TransitionInfo) const; // // AddInsnClassStages - Return all combinations of resource reservation @@ -231,16 +243,17 @@ class State { // which are possible from this state (PossibleStates). // void AddInsnClassStages(std::vector &InsnClass, - std::map &ComboBitToBitsMap, - unsigned chkstage, unsigned numstages, - unsigned prevState, unsigned origState, - DenseSet &VisitedResourceStates, - std::set &PossibleStates) const; + std::map &ComboBitToBitsMap, + unsigned chkstage, unsigned numstages, + unsigned prevState, unsigned origState, + DenseSet &VisitedResourceStates) const; // - // addTransition - Add a transition from this state given the input InsnClass + // addTransition - Add a transition from this state given the input InsnClass. // - void addTransition(std::vector InsnClass, const State *To) const; + void addTransition( + std::vector InsnClass, const State *To, + const std::vector> &TransitionInfo) const; // // hasTransition - Returns true if there is a transition from this state @@ -329,11 +342,12 @@ State::State() : // // addTransition - Add a transition from this state given the input InsnClass // -void State::addTransition(std::vector InsnClass, const State *To) - const { +void State::addTransition( + std::vector InsnClass, const State *To, + const std::vector> &TransitionInfo) const { assert(!Transitions.count(InsnClass) && "Cannot have multiple transitions for the same input"); - Transitions[InsnClass] = To; + Transitions[InsnClass] = {TransitionInfo, To}; } // @@ -351,9 +365,11 @@ bool State::hasTransition(std::vector InsnClass) const { // PossibleStates is the set of valid resource states that ensue from valid // transitions. // -void State::AddInsnClass(std::vector &InsnClass, - std::map &ComboBitToBitsMap, - std::set &PossibleStates) const { +void State::AddInsnClass( + std::vector &InsnClass, + std::map &ComboBitToBitsMap, + std::set &PossibleStates, + std::vector> &TransitionInfo) const { // // Iterate over all resource states in currentState. // @@ -362,25 +378,26 @@ void State::AddInsnClass(std::vector &InsnClass, for (std::set::iterator SI = stateInfo.begin(); SI != stateInfo.end(); ++SI) { - unsigned thisState = *SI; + unsigned ThisState = *SI; DenseSet VisitedResourceStates; - LLVM_DEBUG(dbgs() << " thisState: 0x" << Twine::utohexstr(thisState) + LLVM_DEBUG(dbgs() << " thisState: 0x" << Twine::utohexstr(ThisState) << "\n"); - AddInsnClassStages(InsnClass, ComboBitToBitsMap, - numstages - 1, numstages, - thisState, thisState, - VisitedResourceStates, PossibleStates); + AddInsnClassStages(InsnClass, ComboBitToBitsMap, numstages - 1, numstages, + ThisState, ThisState, VisitedResourceStates); + for (unsigned NewState : VisitedResourceStates) { + PossibleStates.insert(NewState); + TransitionInfo.emplace_back(ThisState, NewState); + } } } -void State::AddInsnClassStages(std::vector &InsnClass, - std::map &ComboBitToBitsMap, - unsigned chkstage, unsigned numstages, - unsigned prevState, unsigned origState, - DenseSet &VisitedResourceStates, - std::set &PossibleStates) const { +void State::AddInsnClassStages( + std::vector &InsnClass, + std::map &ComboBitToBitsMap, unsigned chkstage, + unsigned numstages, unsigned prevState, unsigned origState, + DenseSet &VisitedResourceStates) const { assert((chkstage < numstages) && "AddInsnClassStages: stage out of range"); unsigned thisStage = InsnClass[chkstage]; @@ -438,7 +455,6 @@ void State::AddInsnClassStages(std::vector &InsnClass, if (ResultingResourceState != prevState) { if (VisitedResourceStates.count(ResultingResourceState) == 0) { VisitedResourceStates.insert(ResultingResourceState); - PossibleStates.insert(ResultingResourceState); LLVM_DEBUG(dbgs() << "\tResultingResourceState: 0x" << Twine::utohexstr(ResultingResourceState) << "\n"); @@ -456,10 +472,9 @@ void State::AddInsnClassStages(std::vector &InsnClass, // if (ResultingResourceState != prevState) { LLVM_DEBUG(dbgs() << "\n"); - AddInsnClassStages(InsnClass, ComboBitToBitsMap, - chkstage - 1, numstages, - ResultingResourceState, origState, - VisitedResourceStates, PossibleStates); + AddInsnClassStages(InsnClass, ComboBitToBitsMap, chkstage - 1, + numstages, ResultingResourceState, origState, + VisitedResourceStates); } else { LLVM_DEBUG(dbgs() << "\tSkipped Add - no resources available\n"); } @@ -578,17 +593,10 @@ void DFA::writeTableAndAPI(raw_ostream &OS, const std::string &TargetName, II = SI->Transitions.begin(), IE = SI->Transitions.end(); II != IE; ++II) { OS << "{0x" << Twine::utohexstr(getDFAInsnInput(II->first)) << ", " - << II->second->stateNum << "},\t"; + << II->second.S->stateNum << "},\t"; } ValidTransitions += SI->Transitions.size(); - // If there are no valid transitions from this stage, we need a sentinel - // transition. - if (ValidTransitions == StateEntry[i]) { - OS << SentinelEntry << ",\t"; - ++ValidTransitions; - } - OS << " // state " << i << ": " << StateEntry[i]; if (StateEntry[i] != (ValidTransitions-1)) { // More than one transition. OS << "-" << (ValidTransitions-1); @@ -610,8 +618,6 @@ void DFA::writeTableAndAPI(raw_ostream &OS, const std::string &TargetName, OS << "// " << numStates << " states\n"; OS << "const unsigned int " << TargetName << "DFAStateEntryTable[] = {\n"; - // Multiply i by 2 since each entry in DFAStateInputTable is a set of - // two numbers. unsigned lastState = 0; for (unsigned i = 0; i < numStates; ++i) { if (i && ((i % 10) == 0)) { @@ -620,11 +626,44 @@ void DFA::writeTableAndAPI(raw_ostream &OS, const std::string &TargetName, } OS << StateEntry[i] << ", "; } - // Print out the index to the sentinel entry in StateInputTable OS << ValidTransitions << ", "; OS << " // states " << (lastState+1) << ":" << numStates << "\n"; OS << "};\n"; + + // Generate the resource transition table. + OS << "const unsigned " << TargetName + << "DFAResourceTransitionTable[][2] = { \n"; + int N = 0; + StateEntry.clear(); + for (const State &S : states) { + for (auto &KV : S.Transitions) { + StateEntry.push_back(N); + for (std::pair &T : KV.second.ResourceTransitions) { + OS << "{0x" << utohexstr(T.first) << ", 0x" << utohexstr(T.second) + << "}, "; + ++N; + } + } + OS << "\n "; + } + // Add a sentinel entry to terminate the search. + StateEntry.push_back(N); + OS << "\n {~0U,~0U}\n};\n\n"; + + OS << "// " << TargetName << "DFAResourceTransitionEntryTable[i] = " + << "Index of the first entry in DFAResourceTransitionTable for\n"; + OS << "// the ith transition.\n"; + OS << "const unsigned int " << TargetName + << "DFAResourceTransitionEntryTable[] = { \n"; + + N = 0; + for (int S : StateEntry) { + OS << S << ","; + if (N++ % 10 == 0) + OS << "\n "; + } + OS << "\n ~0U\n};\n"; } // @@ -946,7 +985,9 @@ void DFAPacketizerEmitter::emitForItineraries( if (!current->hasTransition(InsnClass) && current->canMaybeAddInsnClass(InsnClass, ComboBitToBitsMap)) { const State *NewState = nullptr; - current->AddInsnClass(InsnClass, ComboBitToBitsMap, NewStateResources); + std::vector> TransitionInfo; + current->AddInsnClass(InsnClass, ComboBitToBitsMap, NewStateResources, + TransitionInfo); if (NewStateResources.empty()) { LLVM_DEBUG(dbgs() << " Skipped - no new states generated\n"); continue; @@ -982,7 +1023,7 @@ void DFAPacketizerEmitter::emitForItineraries( }); } - current->addTransition(InsnClass, NewState); + current->addTransition(InsnClass, NewState, TransitionInfo); } } } @@ -1000,7 +1041,10 @@ void DFAPacketizerEmitter::emitForItineraries( << "DFAPacketizer(const InstrItineraryData *IID) const {\n" << " return new DFAPacketizer(IID, " << TargetName << DFAName << "DFAStateInputTable, " << TargetName << DFAName - << "DFAStateEntryTable);\n}\n\n"; + << "DFAStateEntryTable, " << TargetName << DFAName + << "DFAResourceTransitionTable, " << TargetName << DFAName + << "DFAResourceTransitionEntryTable" + << ");\n}\n\n"; } namespace llvm { -- 2.7.4