From: Andrea Di Biagio Date: Fri, 3 Aug 2018 12:55:28 +0000 (+0000) Subject: [llvm-mca] Speed up the computation of the wait/ready/issued sets in the Scheduler. X-Git-Tag: llvmorg-8.0.0-rc1~11784 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=1c3bcc6ce51bd4e6d9a344b22752df8f600ba969;p=platform%2Fupstream%2Fllvm.git [llvm-mca] Speed up the computation of the wait/ready/issued sets in the Scheduler. This patch is a follow-up to r338702. We don't need to use a map to model the wait/ready/issued sets. It is much more efficient to use a vector instead. This patch gives us an average 7.5% speedup (on top of the ~12% speedup obtained after r338702). llvm-svn: 338883 --- diff --git a/llvm/docs/CommandGuide/llvm-mca.rst b/llvm/docs/CommandGuide/llvm-mca.rst index 188b31e..a2ca1d6 100644 --- a/llvm/docs/CommandGuide/llvm-mca.rst +++ b/llvm/docs/CommandGuide/llvm-mca.rst @@ -645,32 +645,32 @@ available units from the group; by default, the resource manager uses a round-robin selector to guarantee that resource usage is uniformly distributed between all units of a group. -:program:`llvm-mca`'s scheduler implements three instruction queues: +:program:`llvm-mca`'s scheduler internally groups instructions into three sets: -* WaitQueue: a queue of instructions whose operands are not ready. -* ReadyQueue: a queue of instructions ready to execute. -* IssuedQueue: a queue of instructions executing. +* WaitSet: a set of instructions whose operands are not ready. +* ReadySet: a set of instructions ready to execute. +* IssuedSet: a set of instructions executing. -Depending on the operand availability, instructions that are dispatched to the -scheduler are either placed into the WaitQueue or into the ReadyQueue. +Depending on the operands availability, instructions that are dispatched to the +scheduler are either placed into the WaitSet or into the ReadySet. -Every cycle, the scheduler checks if instructions can be moved from the -WaitQueue to the ReadyQueue, and if instructions from the ReadyQueue can be -issued to the underlying pipelines. The algorithm prioritizes older instructions -over younger instructions. +Every cycle, the scheduler checks if instructions can be moved from the WaitSet +to the ReadySet, and if instructions from the ReadySet can be issued to the +underlying pipelines. The algorithm prioritizes older instructions over younger +instructions. Write-Back and Retire Stage """"""""""""""""""""""""""" -Issued instructions are moved from the ReadyQueue to the IssuedQueue. There, +Issued instructions are moved from the ReadySet to the IssuedSet. There, instructions wait until they reach the write-back stage. At that point, they get removed from the queue and the retire control unit is notified. -When instructions are executed, the retire control unit flags the -instruction as "ready to retire." +When instructions are executed, the retire control unit flags the instruction as +"ready to retire." -Instructions are retired in program order. The register file is notified of -the retirement so that it can free the physical registers that were allocated -for the instruction during the register renaming stage. +Instructions are retired in program order. The register file is notified of the +retirement so that it can free the physical registers that were allocated for +the instruction during the register renaming stage. Load/Store Unit and Memory Consistency Model """""""""""""""""""""""""""""""""""""""""""" diff --git a/llvm/tools/llvm-mca/ExecuteStage.cpp b/llvm/tools/llvm-mca/ExecuteStage.cpp index 437f864..1bc9c4a 100644 --- a/llvm/tools/llvm-mca/ExecuteStage.cpp +++ b/llvm/tools/llvm-mca/ExecuteStage.cpp @@ -37,7 +37,7 @@ void ExecuteStage::reclaimSchedulerResources() { // Update the scheduler's instruction queues. void ExecuteStage::updateSchedulerQueues() { SmallVector InstructionIDs; - HWS.updateIssuedQueue(InstructionIDs); + HWS.updateIssuedSet(InstructionIDs); for (const InstRef &IR : InstructionIDs) notifyInstructionExecuted(IR); InstructionIDs.clear(); @@ -65,9 +65,9 @@ void ExecuteStage::issueReadyInstructions() { // Instructions that have been issued during this cycle might have unblocked // other dependent instructions. Dependent instructions may be issued during // this same cycle if operands have ReadAdvance entries. Promote those - // instructions to the ReadyQueue and tell to the caller that we need + // instructions to the ReadySet and tell to the caller that we need // another round of 'issue()'. - HWS.promoteToReadyQueue(InstructionIDs); + HWS.promoteToReadySet(InstructionIDs); for (const InstRef &I : InstructionIDs) notifyInstructionReady(I); InstructionIDs.clear(); diff --git a/llvm/tools/llvm-mca/Instruction.h b/llvm/tools/llvm-mca/Instruction.h index 3b2f905..66153cd 100644 --- a/llvm/tools/llvm-mca/Instruction.h +++ b/llvm/tools/llvm-mca/Instruction.h @@ -384,9 +384,12 @@ public: Instruction *getInstruction() { return second; } const Instruction *getInstruction() const { return second; } - /// Returns true if this InstRef has been populated. + /// Returns true if this references a valid instruction. bool isValid() const { return second != nullptr; } + /// Invalidate this reference. + void invalidate() { second = nullptr; } + #ifndef NDEBUG void print(llvm::raw_ostream &OS) const { OS << getSourceIndex(); } #endif diff --git a/llvm/tools/llvm-mca/Scheduler.cpp b/llvm/tools/llvm-mca/Scheduler.cpp index 553cd00..764cd23 100644 --- a/llvm/tools/llvm-mca/Scheduler.cpp +++ b/llvm/tools/llvm-mca/Scheduler.cpp @@ -250,9 +250,9 @@ void ResourceManager::releaseResource(uint64_t ResourceID) { #ifndef NDEBUG void Scheduler::dump() const { - dbgs() << "[SCHEDULER]: WaitQueue size is: " << WaitQueue.size() << '\n'; - dbgs() << "[SCHEDULER]: ReadyQueue size is: " << ReadyQueue.size() << '\n'; - dbgs() << "[SCHEDULER]: IssuedQueue size is: " << IssuedQueue.size() << '\n'; + dbgs() << "[SCHEDULER]: WaitSet size is: " << WaitSet.size() << '\n'; + dbgs() << "[SCHEDULER]: ReadySet size is: " << ReadySet.size() << '\n'; + dbgs() << "[SCHEDULER]: IssuedSet size is: " << IssuedSet.size() << '\n'; Resources->dump(); } #endif @@ -296,7 +296,7 @@ void Scheduler::issueInstructionImpl( IS->execute(); if (IS->isExecuting()) - IssuedQueue[IR.getSourceIndex()] = IS; + IssuedSet.emplace_back(IR); } // Release the buffered resources and issue the instruction. @@ -308,92 +308,109 @@ void Scheduler::issueInstruction( issueInstructionImpl(IR, UsedResources); } -void Scheduler::promoteToReadyQueue(SmallVectorImpl &Ready) { +void Scheduler::promoteToReadySet(SmallVectorImpl &Ready) { // Scan the set of waiting instructions and promote them to the // ready queue if operands are all ready. - for (auto I = WaitQueue.begin(), E = WaitQueue.end(); I != E;) { - const unsigned IID = I->first; - Instruction *IS = I->second; + unsigned RemovedElements = 0; + for (auto I = WaitSet.begin(), E = WaitSet.end(); I != E; ) { + InstRef &IR = *I; + if (!IR.isValid()) + break; // Check if this instruction is now ready. In case, force // a transition in state using method 'update()'. - if (!IS->isReady()) - IS->update(); + Instruction &IS = *IR.getInstruction(); + if (!IS.isReady()) + IS.update(); - const InstrDesc &Desc = IS->getDesc(); + const InstrDesc &Desc = IS.getDesc(); bool IsMemOp = Desc.MayLoad || Desc.MayStore; - if (!IS->isReady() || (IsMemOp && !LSU->isReady({IID, IS}))) { + if (!IS.isReady() || (IsMemOp && !LSU->isReady(IR))) { ++I; continue; } - Ready.emplace_back(IID, IS); - ReadyQueue[IID] = IS; - auto ToRemove = I; - ++I; - WaitQueue.erase(ToRemove); + Ready.emplace_back(IR); + ReadySet.emplace_back(IR); + + IR.invalidate(); + ++RemovedElements; + std::iter_swap(I, E - RemovedElements); } + + WaitSet.resize(WaitSet.size() - RemovedElements); } InstRef Scheduler::select() { - // Find the oldest ready-to-issue instruction in the ReadyQueue. - auto It = std::find_if(ReadyQueue.begin(), ReadyQueue.end(), - [&](const QueueEntryTy &Entry) { - const InstrDesc &D = Entry.second->getDesc(); - return Resources->canBeIssued(D); - }); - - if (It == ReadyQueue.end()) - return {0, nullptr}; - - // We want to prioritize older instructions over younger instructions to - // minimize the pressure on the reorder buffer. We also want to - // rank higher the instructions with more users to better expose ILP. - - // Compute a rank value based on the age of an instruction (i.e. its source - // index) and its number of users. The lower the rank value, the better. - int Rank = It->first - It->second->getNumUsers(); - for (auto I = It, E = ReadyQueue.end(); I != E; ++I) { - int CurrentRank = I->first - I->second->getNumUsers(); - if (CurrentRank < Rank) { - const InstrDesc &D = I->second->getDesc(); + unsigned QueueIndex = ReadySet.size(); + int Rank = std::numeric_limits::max(); + + for (unsigned I = 0, E = ReadySet.size(); I != E; ++I) { + const InstRef &IR = ReadySet[I]; + const unsigned IID = IR.getSourceIndex(); + const Instruction &IS = *IR.getInstruction(); + + // Compute a rank value based on the age of an instruction (i.e. its source + // index) and its number of users. The lower the rank value, the better. + int CurrentRank = IID - IS.getNumUsers(); + + // We want to prioritize older instructions over younger instructions to + // minimize the pressure on the reorder buffer. We also want to + // rank higher the instructions with more users to better expose ILP. + if (CurrentRank == Rank) + if (IID > ReadySet[QueueIndex].getSourceIndex()) + continue; + + if (CurrentRank <= Rank) { + const InstrDesc &D = IS.getDesc(); if (Resources->canBeIssued(D)) { Rank = CurrentRank; - It = I; + QueueIndex = I; } } } + if (QueueIndex == ReadySet.size()) + return InstRef(); + // We found an instruction to issue. - InstRef IR(It->first, It->second); - ReadyQueue.erase(It); + + InstRef IR = ReadySet[QueueIndex]; + std::swap(ReadySet[QueueIndex], ReadySet[ReadySet.size() - 1]); + ReadySet.pop_back(); return IR; } void Scheduler::updatePendingQueue(SmallVectorImpl &Ready) { // Notify to instructions in the pending queue that a new cycle just // started. - for (QueueEntryTy Entry : WaitQueue) - Entry.second->cycleEvent(); - promoteToReadyQueue(Ready); + for (InstRef &Entry : WaitSet) + Entry.getInstruction()->cycleEvent(); + promoteToReadySet(Ready); } -void Scheduler::updateIssuedQueue(SmallVectorImpl &Executed) { - for (auto I = IssuedQueue.begin(), E = IssuedQueue.end(); I != E;) { - const QueueEntryTy Entry = *I; - Instruction *IS = Entry.second; - IS->cycleEvent(); - if (IS->isExecuted()) { - Executed.push_back({Entry.first, Entry.second}); - auto ToRemove = I; - ++I; - IssuedQueue.erase(ToRemove); - } else { - LLVM_DEBUG(dbgs() << "[SCHEDULER]: Instruction #" << Entry.first +void Scheduler::updateIssuedSet(SmallVectorImpl &Executed) { + unsigned RemovedElements = 0; + for (auto I = IssuedSet.begin(), E = IssuedSet.end(); I != E; ) { + InstRef &IR = *I; + if (!IR.isValid()) + break; + Instruction &IS = *IR.getInstruction(); + IS.cycleEvent(); + if (!IS.isExecuted()) { + LLVM_DEBUG(dbgs() << "[SCHEDULER]: Instruction #" << IR << " is still executing.\n"); ++I; + continue; } + + Executed.emplace_back(IR); + ++RemovedElements; + IR.invalidate(); + std::iter_swap(I, E - RemovedElements); } + + IssuedSet.resize(IssuedSet.size() - RemovedElements); } void Scheduler::onInstructionExecuted(const InstRef &IR) { @@ -408,9 +425,8 @@ bool Scheduler::reserveResources(InstRef &IR) { // If necessary, reserve queue entries in the load-store unit (LSU). const bool Reserved = LSU->reserve(IR); if (!IR.getInstruction()->isReady() || (Reserved && !LSU->isReady(IR))) { - LLVM_DEBUG(dbgs() << "[SCHEDULER] Adding #" << IR - << " to the Wait Queue\n"); - WaitQueue[IR.getSourceIndex()] = IR.getInstruction(); + LLVM_DEBUG(dbgs() << "[SCHEDULER] Adding #" << IR << " to the WaitSet\n"); + WaitSet.push_back(IR); return false; } return true; @@ -419,9 +435,8 @@ bool Scheduler::reserveResources(InstRef &IR) { bool Scheduler::issueImmediately(InstRef &IR) { const InstrDesc &Desc = IR.getInstruction()->getDesc(); if (!Desc.isZeroLatency() && !Resources->mustIssueImmediately(Desc)) { - LLVM_DEBUG(dbgs() << "[SCHEDULER] Adding #" << IR - << " to the Ready Queue\n"); - ReadyQueue[IR.getSourceIndex()] = IR.getInstruction(); + LLVM_DEBUG(dbgs() << "[SCHEDULER] Adding #" << IR << " to the ReadySet\n"); + ReadySet.push_back(IR); return false; } return true; diff --git a/llvm/tools/llvm-mca/Scheduler.h b/llvm/tools/llvm-mca/Scheduler.h index b0d2dbc..8d44f61 100644 --- a/llvm/tools/llvm-mca/Scheduler.h +++ b/llvm/tools/llvm-mca/Scheduler.h @@ -342,30 +342,28 @@ public: }; // namespace mca /// Class Scheduler is responsible for issuing instructions to pipeline -/// resources. Internally, it delegates to a ResourceManager the management of -/// processor resources. -/// This class is also responsible for tracking the progress of instructions -/// from the dispatch stage, until the write-back stage. +/// resources. /// -/// An nstruction dispatched to the Scheduler is initially placed into either -/// the 'WaitQueue' or the 'ReadyQueue' depending on the availability of the -/// input operands. Instructions in the WaitQueue are ordered by instruction -/// index. An instruction is moved from the WaitQueue to the ReadyQueue when -/// register operands become available, and all memory dependencies are met. -/// Instructions that are moved from the WaitQueue to the ReadyQueue transition -/// from state 'IS_AVAILABLE' to state 'IS_READY'. +/// Internally, it delegates to a ResourceManager the management of processor +/// resources. This class is also responsible for tracking the progress of +/// instructions from the dispatch stage, until the write-back stage. /// -/// At the beginning of each cycle, the Scheduler checks if there are -/// instructions in the WaitQueue that can be moved to the ReadyQueue. If the -/// ReadyQueue is not empty, then older instructions from the queue are issued -/// to the processor pipelines, and the underlying ResourceManager is updated -/// accordingly. The ReadyQueue is ordered by instruction index to guarantee -/// that the first instructions in the set are also the oldest. +/// An instruction dispatched to the Scheduler is initially placed into either +/// the 'WaitSet' or the 'ReadySet' depending on the availability of the input +/// operands. /// -/// An Instruction is moved from the ReadyQueue the `IssuedQueue` when it is -/// issued to a (one or more) pipeline(s). This event also causes an instruction -/// state transition (i.e. from state IS_READY, to state IS_EXECUTING). -/// An Instruction leaves the IssuedQueue when it reaches the write-back stage. +/// An instruction is moved from the WaitSet to the ReadySet when register +/// operands become available, and all memory dependencies are met. +/// Instructions that are moved from the WaitSet to the ReadySet transition +/// in state from 'IS_AVAILABLE' to 'IS_READY'. +/// +/// On every cycle, the Scheduler checks if it can promote instructions from the +/// WaitSet to the ReadySet. +/// +/// An Instruction is moved from the ReadySet the `IssuedSet` when it is issued +/// to a (one or more) pipeline(s). This event also causes an instruction state +/// transition (i.e. from state IS_READY, to state IS_EXECUTING). An Instruction +/// leaves the IssuedSet when it reaches the write-back stage. class Scheduler : public HardwareUnit { const llvm::MCSchedModel &SM; @@ -373,10 +371,9 @@ class Scheduler : public HardwareUnit { std::unique_ptr Resources; std::unique_ptr LSU; - using QueueEntryTy = std::pair; - std::map WaitQueue; - std::map ReadyQueue; - std::map IssuedQueue; + std::vector WaitSet; + std::vector ReadySet; + std::vector IssuedSet; /// Issue an instruction without updating the ready queue. void issueInstructionImpl( @@ -412,7 +409,7 @@ public: /// zero-latency instructions). /// /// Returns true if the instruction is issued immediately. If this does not - /// occur, then the instruction will be added to the Scheduler's ReadyQueue. + /// occur, then the instruction will be added to the Scheduler's ReadySet. bool issueImmediately(InstRef &IR); /// Reserve one entry in each buffered resource. @@ -431,15 +428,15 @@ public: /// once they have been fully consumed. void reclaimSimulatedResources(llvm::SmallVectorImpl &Freed); - /// Move instructions from the WaitQueue to the ReadyQueue if input operands + /// Move instructions from the WaitSet to the ReadySet if input operands /// are all available. - void promoteToReadyQueue(llvm::SmallVectorImpl &Ready); + void promoteToReadySet(llvm::SmallVectorImpl &Ready); /// Update the ready queue. void updatePendingQueue(llvm::SmallVectorImpl &Ready); /// Update the issued queue. - void updateIssuedQueue(llvm::SmallVectorImpl &Executed); + void updateIssuedSet(llvm::SmallVectorImpl &Executed); /// Updates the Scheduler's resources to reflect that an instruction has just /// been executed. @@ -456,7 +453,7 @@ public: /// execute the given instruction immediately. bool reserveResources(InstRef &IR); - /// Select the next instruction to issue from the ReadyQueue. + /// Select the next instruction to issue from the ReadySet. /// This method gives priority to older instructions. InstRef select(); @@ -467,10 +464,9 @@ public: // This routine performs a sanity check. This routine should only be called // when we know that 'IR' is not in the scheduler's instruction queues. void sanityCheck(const InstRef &IR) const { - const unsigned Idx = IR.getSourceIndex(); - assert(WaitQueue.find(Idx) == WaitQueue.end()); - assert(ReadyQueue.find(Idx) == ReadyQueue.end()); - assert(IssuedQueue.find(Idx) == IssuedQueue.end()); + assert(llvm::find(WaitSet, IR) == WaitSet.end()); + assert(llvm::find(ReadySet, IR) == ReadySet.end()); + assert(llvm::find(IssuedSet, IR) == IssuedSet.end()); } #endif // !NDEBUG };