From 258a425c69f0f611ae237ad507252ad18048d2ab Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Wed, 17 Apr 2019 15:05:29 +0000 Subject: [PATCH] [ScheduleDAGRRList] Recompute topological ordering on demand. Currently there is a single point in ScheduleDAGRRList, where we actually query the topological order (besides init code). Currently we are recomputing the order after adding a node (which does not have predecessors) and then we add predecessors edge-by-edge. We can avoid adding edges one-by-one after we added a new node. In that case, we can just rebuild the order from scratch after adding the edges to the DAG and avoid all the updates to the ordering. Also, we can delay updating the DAG until we query the DAG, if we keep a list of added edges. Depending on the number of updates, we can either apply them when needed or recompute the order from scratch. This brings down the geomean compile time for of CTMark with -O1 down 0.3% on X86, with no regressions. Reviewers: MatzeB, atrick, efriedma, niravd, paquette Reviewed By: efriedma Differential Revision: https://reviews.llvm.org/D60125 llvm-svn: 358583 --- llvm/include/llvm/CodeGen/ScheduleDAG.h | 19 +++++++ llvm/lib/CodeGen/ScheduleDAG.cpp | 32 ++++++++++++ .../lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp | 60 +++++++++++++--------- 3 files changed, 87 insertions(+), 24 deletions(-) diff --git a/llvm/include/llvm/CodeGen/ScheduleDAG.h b/llvm/include/llvm/CodeGen/ScheduleDAG.h index 17cdb4b..e004f3b 100644 --- a/llvm/include/llvm/CodeGen/ScheduleDAG.h +++ b/llvm/include/llvm/CodeGen/ScheduleDAG.h @@ -691,6 +691,12 @@ class TargetRegisterInfo; std::vector &SUnits; SUnit *ExitSU; + // Have any new nodes been added? + bool Dirty = false; + + // Outstanding added edges, that have not been applied to the ordering. + SmallVector, 16> Updates; + /// Maps topological index to the node number. std::vector Index2Node; /// Maps the node number to its topological index. @@ -710,6 +716,11 @@ class TargetRegisterInfo; /// Assigns the topological index to the node n. void Allocate(int n, int index); + /// Fix the ordering, by either recomputing from scratch or by applying + /// any outstanding updates. Uses a heuristic to estimate what will be + /// cheaper. + void FixOrder(); + public: ScheduleDAGTopologicalSort(std::vector &SUnits, SUnit *ExitSU); @@ -734,11 +745,19 @@ class TargetRegisterInfo; /// added from SUnit \p X to SUnit \p Y. void AddPred(SUnit *Y, SUnit *X); + /// Queues an update to the topological ordering to accommodate an edge to + /// be added from SUnit \p X to SUnit \p Y. + void AddPredQueued(SUnit *Y, SUnit *X); + /// Updates the topological ordering to accommodate an an edge to be /// removed from the specified node \p N from the predecessors of the /// current node \p M. void RemovePred(SUnit *M, SUnit *N); + /// Mark the ordering as temporarily broken, after a new node has been + /// added. + void MarkDirty() { Dirty = true; } + typedef std::vector::iterator iterator; typedef std::vector::const_iterator const_iterator; iterator begin() { return Index2Node.begin(); } diff --git a/llvm/lib/CodeGen/ScheduleDAG.cpp b/llvm/lib/CodeGen/ScheduleDAG.cpp index 5294c5b..dc3a116 100644 --- a/llvm/lib/CodeGen/ScheduleDAG.cpp +++ b/llvm/lib/CodeGen/ScheduleDAG.cpp @@ -462,6 +462,11 @@ void ScheduleDAGTopologicalSort::InitDAGTopologicalSorting() { // On insertion of the edge X->Y, the algorithm first marks by calling DFS // the nodes reachable from Y, and then shifts them using Shift to lie // immediately after X in Index2Node. + + // Cancel pending updates, mark as valid. + Dirty = false; + Updates.clear(); + unsigned DAGSize = SUnits.size(); std::vector WorkList; WorkList.reserve(DAGSize); @@ -515,6 +520,31 @@ void ScheduleDAGTopologicalSort::InitDAGTopologicalSorting() { #endif } +void ScheduleDAGTopologicalSort::FixOrder() { + // Recompute from scratch after new nodes have been added. + if (Dirty) { + InitDAGTopologicalSorting(); + return; + } + + // Otherwise apply updates one-by-one. + for (auto &U : Updates) + AddPred(U.first, U.second); + Updates.clear(); +} + +void ScheduleDAGTopologicalSort::AddPredQueued(SUnit *Y, SUnit *X) { + // Recomputing the order from scratch is likely more efficient than applying + // updates one-by-one for too many updates. The current cut-off is arbitrarily + // chosen. + Dirty = Dirty || Updates.size() > 10; + + if (Dirty) + return; + + Updates.emplace_back(Y, X); +} + void ScheduleDAGTopologicalSort::AddPred(SUnit *Y, SUnit *X) { int UpperBound, LowerBound; LowerBound = Node2Index[Y->NodeNum]; @@ -672,6 +702,7 @@ void ScheduleDAGTopologicalSort::Shift(BitVector& Visited, int LowerBound, } bool ScheduleDAGTopologicalSort::WillCreateCycle(SUnit *TargetSU, SUnit *SU) { + FixOrder(); // Is SU reachable from TargetSU via successor edges? if (IsReachable(SU, TargetSU)) return true; @@ -684,6 +715,7 @@ bool ScheduleDAGTopologicalSort::WillCreateCycle(SUnit *TargetSU, SUnit *SU) { bool ScheduleDAGTopologicalSort::IsReachable(const SUnit *SU, const SUnit *TargetSU) { + FixOrder(); // If insertion of the edge SU->TargetSU would create a cycle // then there is a path from TargetSU to SU. int UpperBound, LowerBound; diff --git a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp index 75d9eb2..34b4c85 100644 --- a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp @@ -219,6 +219,14 @@ public: return Topo.WillCreateCycle(SU, TargetSU); } + /// AddPredQueued - Queues and update to add a predecessor edge to SUnit SU. + /// This returns true if this is a new predecessor. + /// Does *NOT* update the topological ordering! It just queues an update. + void AddPredQueued(SUnit *SU, const SDep &D) { + Topo.AddPredQueued(SU, D.getSUnit()); + SU->addPred(D); + } + /// AddPred - adds a predecessor edge to SUnit SU. /// This returns true if this is a new predecessor. /// Updates the topological ordering if required. @@ -266,24 +274,22 @@ private: void ListScheduleBottomUp(); /// CreateNewSUnit - Creates a new SUnit and returns a pointer to it. - /// Updates the topological ordering if required. SUnit *CreateNewSUnit(SDNode *N) { unsigned NumSUnits = SUnits.size(); SUnit *NewNode = newSUnit(N); // Update the topological ordering. if (NewNode->NodeNum >= NumSUnits) - Topo.InitDAGTopologicalSorting(); + Topo.MarkDirty(); return NewNode; } /// CreateClone - Creates a new SUnit from an existing one. - /// Updates the topological ordering if required. SUnit *CreateClone(SUnit *N) { unsigned NumSUnits = SUnits.size(); SUnit *NewNode = Clone(N); // Update the topological ordering. if (NewNode->NodeNum >= NumSUnits) - Topo.InitDAGTopologicalSorting(); + Topo.MarkDirty(); return NewNode; } @@ -365,7 +371,7 @@ void ScheduleDAGRRList::Schedule() { BuildSchedGraph(nullptr); LLVM_DEBUG(dump()); - Topo.InitDAGTopologicalSorting(); + Topo.MarkDirty(); AvailableQueue->initNodes(SUnits); @@ -1017,8 +1023,9 @@ SUnit *ScheduleDAGRRList::TryUnfoldSU(SUnit *SU) { NewSU = &SUnits[N->getNodeId()]; // If NewSU has already been scheduled, we need to clone it, but this // negates the benefit to unfolding so just return SU. - if (NewSU->isScheduled) + if (NewSU->isScheduled) { return SU; + } isNewN = false; } else { NewSU = CreateNewSUnit(N); @@ -1071,23 +1078,23 @@ SUnit *ScheduleDAGRRList::TryUnfoldSU(SUnit *SU) { for (const SDep &Pred : ChainPreds) { RemovePred(SU, Pred); if (isNewLoad) - AddPred(LoadSU, Pred); + AddPredQueued(LoadSU, Pred); } for (const SDep &Pred : LoadPreds) { RemovePred(SU, Pred); if (isNewLoad) - AddPred(LoadSU, Pred); + AddPredQueued(LoadSU, Pred); } for (const SDep &Pred : NodePreds) { RemovePred(SU, Pred); - AddPred(NewSU, Pred); + AddPredQueued(NewSU, Pred); } for (SDep D : NodeSuccs) { SUnit *SuccDep = D.getSUnit(); D.setSUnit(SU); RemovePred(SuccDep, D); D.setSUnit(NewSU); - AddPred(SuccDep, D); + AddPredQueued(SuccDep, D); // Balance register pressure. if (AvailableQueue->tracksRegPressure() && SuccDep->isScheduled && !D.isCtrl() && NewSU->NumRegDefsLeft > 0) @@ -1099,7 +1106,7 @@ SUnit *ScheduleDAGRRList::TryUnfoldSU(SUnit *SU) { RemovePred(SuccDep, D); if (isNewLoad) { D.setSUnit(LoadSU); - AddPred(SuccDep, D); + AddPredQueued(SuccDep, D); } } @@ -1107,7 +1114,7 @@ SUnit *ScheduleDAGRRList::TryUnfoldSU(SUnit *SU) { // by LoadSU. SDep D(LoadSU, SDep::Data, 0); D.setLatency(LoadSU->Latency); - AddPred(NewSU, D); + AddPredQueued(NewSU, D); if (isNewLoad) AvailableQueue->addNode(LoadSU); @@ -1179,7 +1186,7 @@ SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) { // New SUnit has the exact same predecessors. for (SDep &Pred : SU->Preds) if (!Pred.isArtificial()) - AddPred(NewSU, Pred); + AddPredQueued(NewSU, Pred); // Only copy scheduled successors. Cut them from old node's successor // list and move them over. @@ -1191,7 +1198,7 @@ SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) { if (SuccSU->isScheduled) { SDep D = Succ; D.setSUnit(NewSU); - AddPred(SuccSU, D); + AddPredQueued(SuccSU, D); D.setSUnit(SU); DelDeps.push_back(std::make_pair(SuccSU, D)); } @@ -1230,14 +1237,14 @@ void ScheduleDAGRRList::InsertCopiesAndMoveSuccs(SUnit *SU, unsigned Reg, if (SuccSU->isScheduled) { SDep D = Succ; D.setSUnit(CopyToSU); - AddPred(SuccSU, D); + AddPredQueued(SuccSU, D); DelDeps.push_back(std::make_pair(SuccSU, Succ)); } else { // Avoid scheduling the def-side copy before other successors. Otherwise // we could introduce another physreg interference on the copy and // continue inserting copies indefinitely. - AddPred(SuccSU, SDep(CopyFromSU, SDep::Artificial)); + AddPredQueued(SuccSU, SDep(CopyFromSU, SDep::Artificial)); } } for (auto &DelDep : DelDeps) @@ -1245,10 +1252,10 @@ void ScheduleDAGRRList::InsertCopiesAndMoveSuccs(SUnit *SU, unsigned Reg, SDep FromDep(SU, SDep::Data, Reg); FromDep.setLatency(SU->Latency); - AddPred(CopyFromSU, FromDep); + AddPredQueued(CopyFromSU, FromDep); SDep ToDep(CopyFromSU, SDep::Data, 0); ToDep.setLatency(CopyFromSU->Latency); - AddPred(CopyToSU, ToDep); + AddPredQueued(CopyToSU, ToDep); AvailableQueue->updateNode(SU); AvailableQueue->addNode(CopyFromSU); @@ -1478,6 +1485,11 @@ SUnit *ScheduleDAGRRList::PickNodeToScheduleBottomUp() { if (CurSU) return CurSU; + // We query the topological order in the loop body, so make sure outstanding + // updates are applied before entering it (we only enter the loop if there + // are some interferences). If we make changes to the ordering, we exit + // the loop. + // All candidates are delayed due to live physical reg dependencies. // Try backtracking, code duplication, or inserting cross class copies // to resolve it. @@ -1507,7 +1519,7 @@ SUnit *ScheduleDAGRRList::PickNodeToScheduleBottomUp() { } LLVM_DEBUG(dbgs() << "ARTIFICIAL edge from SU(" << BtSU->NodeNum << ") to SU(" << TrySU->NodeNum << ")\n"); - AddPred(TrySU, SDep(BtSU, SDep::Artificial)); + AddPredQueued(TrySU, SDep(BtSU, SDep::Artificial)); // If one or more successors has been unscheduled, then the current // node is no longer available. @@ -1561,14 +1573,14 @@ SUnit *ScheduleDAGRRList::PickNodeToScheduleBottomUp() { InsertCopiesAndMoveSuccs(LRDef, Reg, DestRC, RC, Copies); LLVM_DEBUG(dbgs() << " Adding an edge from SU #" << TrySU->NodeNum << " to SU #" << Copies.front()->NodeNum << "\n"); - AddPred(TrySU, SDep(Copies.front(), SDep::Artificial)); + AddPredQueued(TrySU, SDep(Copies.front(), SDep::Artificial)); NewDef = Copies.back(); } LLVM_DEBUG(dbgs() << " Adding an edge from SU #" << NewDef->NodeNum << " to SU #" << TrySU->NodeNum << "\n"); LiveRegDefs[Reg] = NewDef; - AddPred(NewDef, SDep(TrySU, SDep::Artificial)); + AddPredQueued(NewDef, SDep(TrySU, SDep::Artificial)); TrySU->isAvailable = false; CurSU = NewDef; } @@ -3017,9 +3029,9 @@ void RegReductionPQBase::PrescheduleNodesWithMultipleUses() { if (SuccSU != &SU) { Edge.setSUnit(PredSU); scheduleDAG->RemovePred(SuccSU, Edge); - scheduleDAG->AddPred(&SU, Edge); + scheduleDAG->AddPredQueued(&SU, Edge); Edge.setSUnit(&SU); - scheduleDAG->AddPred(SuccSU, Edge); + scheduleDAG->AddPredQueued(SuccSU, Edge); --i; } } @@ -3101,7 +3113,7 @@ void RegReductionPQBase::AddPseudoTwoAddrDeps() { LLVM_DEBUG(dbgs() << " Adding a pseudo-two-addr edge from SU #" << SU.NodeNum << " to SU #" << SuccSU->NodeNum << "\n"); - scheduleDAG->AddPred(&SU, SDep(SuccSU, SDep::Artificial)); + scheduleDAG->AddPredQueued(&SU, SDep(SuccSU, SDep::Artificial)); } } } -- 2.7.4