From 3de2f0330f4b7b4b099b21a030e19c442f536a83 Mon Sep 17 00:00:00 2001 From: Andrea Di Biagio Date: Thu, 15 Aug 2019 15:27:40 +0000 Subject: [PATCH] [MCA] Slightly refactor class RetireControlUnit, and add the ability to override the mask of used buffered resources in class mca::Instruction. NFCI This patch teaches the RCU how to peek 'next' RCUTokens. A new method has been added to the RetireControlUnit class with the goal of minimizing the complexity of follow-up patches that will enable macro-fusion support in mca. This patch also adds method Instruction::getNumMicroOpcodes() to simplify common interactions with the instruction descriptor (a pattern quite common in some pipeline stages). Added the ability to override the default set of consumed scheduler resources (this -again- is to simplify future patches that add support for macro-op fusion). No functional change intended. llvm-svn: 369010 --- .../llvm/MCA/HardwareUnits/RetireControlUnit.h | 33 +++++++---- llvm/include/llvm/MCA/Instruction.h | 17 +++++- llvm/lib/MCA/HardwareUnits/RetireControlUnit.cpp | 65 +++++++++++++--------- llvm/lib/MCA/HardwareUnits/Scheduler.cpp | 10 ++-- llvm/lib/MCA/Stages/DispatchStage.cpp | 17 +++--- llvm/lib/MCA/Stages/ExecuteStage.cpp | 8 ++- llvm/lib/MCA/Stages/RetireStage.cpp | 4 +- 7 files changed, 96 insertions(+), 58 deletions(-) diff --git a/llvm/include/llvm/MCA/HardwareUnits/RetireControlUnit.h b/llvm/include/llvm/MCA/HardwareUnits/RetireControlUnit.h index 0629014..acbd454 100644 --- a/llvm/include/llvm/MCA/HardwareUnits/RetireControlUnit.h +++ b/llvm/include/llvm/MCA/HardwareUnits/RetireControlUnit.h @@ -57,34 +57,43 @@ struct RetireControlUnit : public HardwareUnit { private: unsigned NextAvailableSlotIdx; unsigned CurrentInstructionSlotIdx; - unsigned AvailableSlots; + unsigned NumROBEntries; + unsigned AvailableEntries; unsigned MaxRetirePerCycle; // 0 means no limit. std::vector Queue; -public: - RetireControlUnit(const MCSchedModel &SM); - - bool isEmpty() const { return AvailableSlots == Queue.size(); } - bool isAvailable(unsigned Quantity = 1) const { + unsigned normalizeQuantity(unsigned Quantity) const { // Some instructions may declare a number of uOps which exceeds the size // of the reorder buffer. To avoid problems, cap the amount of slots to // the size of the reorder buffer. - Quantity = std::min(Quantity, static_cast(Queue.size())); + Quantity = std::min(Quantity, NumROBEntries); // Further normalize the number of micro opcodes for instructions that // declare zero opcodes. This should match the behavior of method // reserveSlot(). - Quantity = std::max(Quantity, 1U); - return AvailableSlots >= Quantity; + return std::max(Quantity, 1U); + } + + unsigned computeNextSlotIdx() const; + +public: + RetireControlUnit(const MCSchedModel &SM); + + bool isEmpty() const { return AvailableEntries == NumROBEntries; } + + bool isAvailable(unsigned Quantity = 1) const { + return AvailableEntries >= normalizeQuantity(Quantity); } unsigned getMaxRetirePerCycle() const { return MaxRetirePerCycle; } - // Reserves a number of slots, and returns a new token. - unsigned reserveSlot(const InstRef &IS, unsigned NumMicroOps); + // Reserves a number of slots, and returns a new token reference. + unsigned dispatch(const InstRef &IS); // Return the current token from the RCU's circular token queue. - const RUToken &peekCurrentToken() const; + const RUToken &getCurrentToken() const; + + const RUToken &peekNextToken() const; // Advance the pointer to the next token in the circular token queue. void consumeCurrentToken(); diff --git a/llvm/include/llvm/MCA/Instruction.h b/llvm/include/llvm/MCA/Instruction.h index 016a048..572c3ba 100644 --- a/llvm/include/llvm/MCA/Instruction.h +++ b/llvm/include/llvm/MCA/Instruction.h @@ -417,6 +417,7 @@ public: const InstrDesc &getDesc() const { return Desc; } unsigned getLatency() const { return Desc.MaxLatency; } + unsigned getNumMicroOps() const { return Desc.NumMicroOps; } bool hasDependentUsers() const { return any_of(Defs, @@ -466,6 +467,12 @@ class Instruction : public InstructionBase { // operation. unsigned LSUTokenID; + // A resource mask which identifies buffered resources consumed by this + // instruction at dispatch stage. In the absence of macro-fusion, this value + // should always match the value of field `UsedBuffers` from the instruction + // descriptor (see field InstrBase::Desc). + uint64_t UsedBuffers; + // Critical register dependency. CriticalDependency CriticalRegDep; @@ -483,12 +490,18 @@ class Instruction : public InstructionBase { public: Instruction(const InstrDesc &D) : InstructionBase(D), Stage(IS_INVALID), CyclesLeft(UNKNOWN_CYCLES), - RCUTokenID(0), LSUTokenID(0), CriticalRegDep(), CriticalMemDep(), - CriticalResourceMask(0), IsEliminated(false) {} + RCUTokenID(0), LSUTokenID(0), UsedBuffers(D.UsedBuffers), + CriticalRegDep(), CriticalMemDep(), CriticalResourceMask(0), + IsEliminated(false) {} unsigned getRCUTokenID() const { return RCUTokenID; } unsigned getLSUTokenID() const { return LSUTokenID; } void setLSUTokenID(unsigned LSUTok) { LSUTokenID = LSUTok; } + + uint64_t getUsedBuffers() const { return UsedBuffers; } + void setUsedBuffers(uint64_t Mask) { UsedBuffers = Mask; } + void clearUsedBuffers() { UsedBuffers = 0ULL; } + int getCyclesLeft() const { return CyclesLeft; } // Transition to the dispatch stage, and assign a RCUToken to this diff --git a/llvm/lib/MCA/HardwareUnits/RetireControlUnit.cpp b/llvm/lib/MCA/HardwareUnits/RetireControlUnit.cpp index 068c506..de519d7 100644 --- a/llvm/lib/MCA/HardwareUnits/RetireControlUnit.cpp +++ b/llvm/lib/MCA/HardwareUnits/RetireControlUnit.cpp @@ -21,65 +21,78 @@ namespace mca { RetireControlUnit::RetireControlUnit(const MCSchedModel &SM) : NextAvailableSlotIdx(0), CurrentInstructionSlotIdx(0), - AvailableSlots(SM.MicroOpBufferSize), MaxRetirePerCycle(0) { + NumROBEntries(SM.MicroOpBufferSize), + AvailableEntries(SM.MicroOpBufferSize), MaxRetirePerCycle(0) { // Check if the scheduling model provides extra information about the machine // processor. If so, then use that information to set the reorder buffer size // and the maximum number of instructions retired per cycle. if (SM.hasExtraProcessorInfo()) { const MCExtraProcessorInfo &EPI = SM.getExtraProcessorInfo(); if (EPI.ReorderBufferSize) - AvailableSlots = EPI.ReorderBufferSize; + AvailableEntries = EPI.ReorderBufferSize; MaxRetirePerCycle = EPI.MaxRetirePerCycle; } - - assert(AvailableSlots && "Invalid reorder buffer size!"); - Queue.resize(AvailableSlots); + NumROBEntries = AvailableEntries; + assert(NumROBEntries && "Invalid reorder buffer size!"); + Queue.resize(2 * NumROBEntries); } // Reserves a number of slots, and returns a new token. -unsigned RetireControlUnit::reserveSlot(const InstRef &IR, - unsigned NumMicroOps) { - assert(isAvailable(NumMicroOps) && "Reorder Buffer unavailable!"); - unsigned NormalizedQuantity = - std::min(NumMicroOps, static_cast(Queue.size())); - // Zero latency instructions may have zero uOps. Artificially bump this - // value to 1. Although zero latency instructions don't consume scheduler - // resources, they still consume one slot in the retire queue. - NormalizedQuantity = std::max(NormalizedQuantity, 1U); +unsigned RetireControlUnit::dispatch(const InstRef &IR) { + const Instruction &Inst = *IR.getInstruction(); + unsigned Entries = normalizeQuantity(Inst.getNumMicroOps()); + assert((AvailableEntries >= Entries) && "Reorder Buffer unavailable!"); + unsigned TokenID = NextAvailableSlotIdx; - Queue[NextAvailableSlotIdx] = {IR, NormalizedQuantity, false}; - NextAvailableSlotIdx += NormalizedQuantity; + Queue[NextAvailableSlotIdx] = {IR, Entries, false}; + NextAvailableSlotIdx += std::max(1U, Entries); NextAvailableSlotIdx %= Queue.size(); - AvailableSlots -= NormalizedQuantity; + + AvailableEntries -= Entries; return TokenID; } -const RetireControlUnit::RUToken &RetireControlUnit::peekCurrentToken() const { - return Queue[CurrentInstructionSlotIdx]; +const RetireControlUnit::RUToken &RetireControlUnit::getCurrentToken() const { + const RetireControlUnit::RUToken &Current = Queue[CurrentInstructionSlotIdx]; +#ifndef NDEBUG + const Instruction *Inst = Current.IR.getInstruction(); + assert(Inst && "Invalid RUToken in the RCU queue."); +#endif + return Current; +} + +unsigned RetireControlUnit::computeNextSlotIdx() const { + const RetireControlUnit::RUToken &Current = getCurrentToken(); + unsigned NextSlotIdx = CurrentInstructionSlotIdx + std::max(1U, Current.NumSlots); + return NextSlotIdx % Queue.size(); +} + +const RetireControlUnit::RUToken &RetireControlUnit::peekNextToken() const { + return Queue[computeNextSlotIdx()]; } void RetireControlUnit::consumeCurrentToken() { RetireControlUnit::RUToken &Current = Queue[CurrentInstructionSlotIdx]; - assert(Current.NumSlots && "Reserved zero slots?"); - assert(Current.IR && "Invalid RUToken in the RCU queue."); Current.IR.getInstruction()->retire(); // Update the slot index to be the next item in the circular queue. - CurrentInstructionSlotIdx += Current.NumSlots; + CurrentInstructionSlotIdx += std::max(1U, Current.NumSlots); CurrentInstructionSlotIdx %= Queue.size(); - AvailableSlots += Current.NumSlots; + AvailableEntries += Current.NumSlots; + Current = { InstRef(), 0U, false }; } void RetireControlUnit::onInstructionExecuted(unsigned TokenID) { assert(Queue.size() > TokenID); - assert(Queue[TokenID].Executed == false && Queue[TokenID].IR); + assert(Queue[TokenID].IR.getInstruction() && "Instruction was not dispatched!"); + assert(Queue[TokenID].Executed == false && "Instruction already executed!"); Queue[TokenID].Executed = true; } #ifndef NDEBUG void RetireControlUnit::dump() const { - dbgs() << "Retire Unit: { Total Slots=" << Queue.size() - << ", Available Slots=" << AvailableSlots << " }\n"; + dbgs() << "Retire Unit: { Total ROB Entries =" << NumROBEntries + << ", Available ROB entries=" << AvailableEntries << " }\n"; } #endif diff --git a/llvm/lib/MCA/HardwareUnits/Scheduler.cpp b/llvm/lib/MCA/HardwareUnits/Scheduler.cpp index 2d6b22c..c793130 100644 --- a/llvm/lib/MCA/HardwareUnits/Scheduler.cpp +++ b/llvm/lib/MCA/HardwareUnits/Scheduler.cpp @@ -38,9 +38,8 @@ void Scheduler::dump() const { #endif Scheduler::Status Scheduler::isAvailable(const InstRef &IR) { - const InstrDesc &Desc = IR.getInstruction()->getDesc(); - - ResourceStateEvent RSE = Resources->canBeDispatched(Desc.UsedBuffers); + ResourceStateEvent RSE = + Resources->canBeDispatched(IR.getInstruction()->getUsedBuffers()); HadTokenStall = RSE != RS_BUFFER_AVAILABLE; switch (RSE) { @@ -106,7 +105,7 @@ void Scheduler::issueInstruction( bool HasDependentUsers = Inst.hasDependentUsers(); HasDependentUsers |= Inst.isMemOp() && LSU.hasDependentUsers(IR); - Resources->releaseBuffers(Inst.getDesc().UsedBuffers); + Resources->releaseBuffers(Inst.getUsedBuffers()); issueInstructionImpl(IR, UsedResources); // Instructions that have been issued during this cycle might have unblocked // other dependent instructions. Dependent instructions may be issued during @@ -300,8 +299,7 @@ bool Scheduler::mustIssueImmediately(const InstRef &IR) const { bool Scheduler::dispatch(InstRef &IR) { Instruction &IS = *IR.getInstruction(); - const InstrDesc &Desc = IS.getDesc(); - Resources->reserveBuffers(Desc.UsedBuffers); + Resources->reserveBuffers(IS.getUsedBuffers()); // If necessary, reserve queue entries in the load-store unit (LSU). if (IS.isMemOp()) diff --git a/llvm/lib/MCA/Stages/DispatchStage.cpp b/llvm/lib/MCA/Stages/DispatchStage.cpp index 7334a26..88a7311 100644 --- a/llvm/lib/MCA/Stages/DispatchStage.cpp +++ b/llvm/lib/MCA/Stages/DispatchStage.cpp @@ -60,7 +60,7 @@ bool DispatchStage::checkPRF(const InstRef &IR) const { } bool DispatchStage::checkRCU(const InstRef &IR) const { - const unsigned NumMicroOps = IR.getInstruction()->getDesc().NumMicroOps; + const unsigned NumMicroOps = IR.getInstruction()->getNumMicroOps(); if (RCU.isAvailable(NumMicroOps)) return true; notifyEvent( @@ -79,7 +79,7 @@ Error DispatchStage::dispatch(InstRef IR) { assert(!CarryOver && "Cannot dispatch another instruction!"); Instruction &IS = *IR.getInstruction(); const InstrDesc &Desc = IS.getDesc(); - const unsigned NumMicroOps = Desc.NumMicroOps; + const unsigned NumMicroOps = IS.getNumMicroOps(); if (NumMicroOps > DispatchWidth) { assert(AvailableEntries == DispatchWidth); AvailableEntries = 0; @@ -123,9 +123,10 @@ Error DispatchStage::dispatch(InstRef IR) { for (WriteState &WS : IS.getDefs()) PRF.addRegisterWrite(WriteRef(IR.getSourceIndex(), &WS), RegisterFiles); - // Reserve slots in the RCU, and notify the instruction that it has been - // dispatched to the schedulers for execution. - IS.dispatch(RCU.reserveSlot(IR, NumMicroOps)); + // Reserve entries in the reorder buffer. + unsigned RCUTokenID = RCU.dispatch(IR); + // Notify the instruction that it has been dispatched. + IS.dispatch(RCUTokenID); // Notify listeners of the "instruction dispatched" event, // and move IR to the next stage. @@ -155,8 +156,10 @@ Error DispatchStage::cycleStart() { } bool DispatchStage::isAvailable(const InstRef &IR) const { - const InstrDesc &Desc = IR.getInstruction()->getDesc(); - unsigned Required = std::min(Desc.NumMicroOps, DispatchWidth); + const Instruction &Inst = *IR.getInstruction(); + unsigned NumMicroOps = Inst.getNumMicroOps(); + const InstrDesc &Desc = Inst.getDesc(); + unsigned Required = std::min(NumMicroOps, DispatchWidth); if (Required > AvailableEntries) return false; diff --git a/llvm/lib/MCA/Stages/ExecuteStage.cpp b/llvm/lib/MCA/Stages/ExecuteStage.cpp index 9166f07..2284ed7 100644 --- a/llvm/lib/MCA/Stages/ExecuteStage.cpp +++ b/llvm/lib/MCA/Stages/ExecuteStage.cpp @@ -56,12 +56,13 @@ Error ExecuteStage::issueInstruction(InstRef &IR) { SmallVector Ready; HWS.issueInstruction(IR, Used, Pending, Ready); - NumIssuedOpcodes += IR.getInstruction()->getDesc().NumMicroOps; + Instruction &IS = *IR.getInstruction(); + NumIssuedOpcodes += IS.getNumMicroOps(); notifyReservedOrReleasedBuffers(IR, /* Reserved */ false); notifyInstructionIssued(IR, Used); - if (IR.getInstruction()->isExecuted()) { + if (IS.isExecuted()) { notifyInstructionExecuted(IR); // FIXME: add a buffer of executed instructions. if (Error S = moveToTheNextStage(IR)) @@ -199,7 +200,8 @@ Error ExecuteStage::execute(InstRef &IR) { // units have been consumed. bool IsReadyInstruction = HWS.dispatch(IR); const Instruction &Inst = *IR.getInstruction(); - NumDispatchedOpcodes += Inst.getDesc().NumMicroOps; + unsigned NumMicroOps = Inst.getNumMicroOps(); + NumDispatchedOpcodes += NumMicroOps; notifyReservedOrReleasedBuffers(IR, /* Reserved */ true); if (!IsReadyInstruction) { diff --git a/llvm/lib/MCA/Stages/RetireStage.cpp b/llvm/lib/MCA/Stages/RetireStage.cpp index e1789dd..7354445 100644 --- a/llvm/lib/MCA/Stages/RetireStage.cpp +++ b/llvm/lib/MCA/Stages/RetireStage.cpp @@ -31,11 +31,11 @@ llvm::Error RetireStage::cycleStart() { while (!RCU.isEmpty()) { if (MaxRetirePerCycle != 0 && NumRetired == MaxRetirePerCycle) break; - const RetireControlUnit::RUToken &Current = RCU.peekCurrentToken(); + const RetireControlUnit::RUToken &Current = RCU.getCurrentToken(); if (!Current.Executed) break; - RCU.consumeCurrentToken(); notifyInstructionRetired(Current.IR); + RCU.consumeCurrentToken(); NumRetired++; } -- 2.7.4