From 7aa0dbb664ea05e233c503c009052f8682fc0f62 Mon Sep 17 00:00:00 2001 From: Andrea Di Biagio Date: Thu, 15 Aug 2019 12:39:55 +0000 Subject: [PATCH] [MCA] Slightly refactor the logic in ResourceManager. NFCI This patch slightly changes the API in the attempt to simplify resource buffer queries. It is done in preparation for a patch that will enable support for macro fusion. llvm-svn: 368994 --- .../llvm/MCA/HardwareUnits/ResourceManager.h | 51 +++++++++++++------- llvm/include/llvm/MCA/HardwareUnits/Scheduler.h | 3 ++ llvm/include/llvm/MCA/Instruction.h | 11 +++-- llvm/lib/MCA/HardwareUnits/ResourceManager.cpp | 55 +++++++++++++--------- llvm/lib/MCA/HardwareUnits/Scheduler.cpp | 6 +-- llvm/lib/MCA/InstrBuilder.cpp | 30 +++++------- llvm/lib/MCA/Stages/ExecuteStage.cpp | 14 ++++-- 7 files changed, 102 insertions(+), 68 deletions(-) diff --git a/llvm/include/llvm/MCA/HardwareUnits/ResourceManager.h b/llvm/include/llvm/MCA/HardwareUnits/ResourceManager.h index 2f91185..917af37 100644 --- a/llvm/include/llvm/MCA/HardwareUnits/ResourceManager.h +++ b/llvm/include/llvm/MCA/HardwareUnits/ResourceManager.h @@ -33,8 +33,7 @@ namespace mca { /// with a buffer size of -1 is always available if it is not reserved. /// /// Values of type ResourceStateEvent are returned by method -/// ResourceState::isBufferAvailable(), which is used to query the internal -/// state of a resource. +/// ResourceManager::canBeDispatched() /// /// The naming convention for resource state events is: /// * Event names start with prefix RS_ @@ -263,16 +262,26 @@ public: /// Returns RS_BUFFER_UNAVAILABLE if there are no available slots. ResourceStateEvent isBufferAvailable() const; - /// Reserve a slot in the buffer. - void reserveBuffer() { - if (AvailableSlots) - AvailableSlots--; + /// Reserve a buffer slot. + /// + /// Returns true if the buffer is not full. + /// It always returns true if BufferSize is set to zero. + bool reserveBuffer() { + if (BufferSize <= 0) + return true; + + --AvailableSlots; + assert(AvailableSlots <= static_cast(BufferSize)); + return AvailableSlots; } - /// Release a slot in the buffer. + /// Releases a slot in the buffer. void releaseBuffer() { - if (BufferSize > 0) - AvailableSlots++; + // Ignore dispatch hazards or invalid buffer sizes. + if (BufferSize <= 0) + return; + + ++AvailableSlots; assert(AvailableSlots <= static_cast(BufferSize)); } @@ -351,9 +360,16 @@ class ResourceManager { // Set of processor resource units that are available during this cycle. uint64_t AvailableProcResUnits; - // Set of processor resource groups that are currently reserved. + // Set of processor resources that are currently reserved. uint64_t ReservedResourceGroups; + // Set of unavailable scheduler buffer resources. This is used internally to + // speedup `canBeDispatched()` queries. + uint64_t AvailableBuffers; + + // Set of dispatch hazard buffer resources that are currently unavailable. + uint64_t ReservedBuffers; + // Returns the actual resource unit that will be used. ResourceRef selectPipe(uint64_t ResourceID); @@ -382,17 +398,20 @@ public: // Returns RS_BUFFER_AVAILABLE if buffered resources are not reserved, and if // there are enough available slots in the buffers. - ResourceStateEvent canBeDispatched(ArrayRef Buffers) const; + ResourceStateEvent canBeDispatched(uint64_t ConsumedBuffers) const; // Return the processor resource identifier associated to this Mask. unsigned resolveResourceMask(uint64_t Mask) const; - // Consume a slot in every buffered resource from array 'Buffers'. Resource - // units that are dispatch hazards (i.e. BufferSize=0) are marked as reserved. - void reserveBuffers(ArrayRef Buffers); + // Acquires a slot from every buffered resource in mask `ConsumedBuffers`. + // Units that are dispatch hazards (i.e. BufferSize=0) are marked as reserved. + void reserveBuffers(uint64_t ConsumedBuffers); - // Release buffer entries previously allocated by method reserveBuffers. - void releaseBuffers(ArrayRef Buffers); + // Releases a slot from every buffered resource in mask `ConsumedBuffers`. + // ConsumedBuffers is a bitmask of previously acquired buffers (using method + // `reserveBuffers`). Units that are dispatch hazards (i.e. BufferSize=0) are + // not automatically unreserved by this method. + void releaseBuffers(uint64_t ConsumedBuffers); // Reserve a processor resource. A reserved resource is not available for // instruction issue until it is released. diff --git a/llvm/include/llvm/MCA/HardwareUnits/Scheduler.h b/llvm/include/llvm/MCA/HardwareUnits/Scheduler.h index 27beb84..36d0bd4 100644 --- a/llvm/include/llvm/MCA/HardwareUnits/Scheduler.h +++ b/llvm/include/llvm/MCA/HardwareUnits/Scheduler.h @@ -228,6 +228,9 @@ public: SmallVectorImpl &Ready); /// Convert a resource mask into a valid llvm processor resource identifier. + /// + /// Only the most significant bit of the Mask is used by this method to + /// identify the processor resource. unsigned getResourceID(uint64_t Mask) const { return Resources->resolveResourceMask(Mask); } diff --git a/llvm/include/llvm/MCA/Instruction.h b/llvm/include/llvm/MCA/Instruction.h index d4d3f227..016a048 100644 --- a/llvm/include/llvm/MCA/Instruction.h +++ b/llvm/include/llvm/MCA/Instruction.h @@ -352,11 +352,14 @@ struct InstrDesc { // reports the number of "consumed cycles". SmallVector, 4> Resources; - // A list of buffered resources consumed by this instruction. - SmallVector Buffers; + // A bitmask of used hardware buffers. + uint64_t UsedBuffers; - unsigned UsedProcResUnits; - unsigned UsedProcResGroups; + // A bitmask of used processor resource units. + uint64_t UsedProcResUnits; + + // A bitmask of used processor resource groups. + uint64_t UsedProcResGroups; unsigned MaxLatency; // Number of MicroOps for this instruction. diff --git a/llvm/lib/MCA/HardwareUnits/ResourceManager.cpp b/llvm/lib/MCA/HardwareUnits/ResourceManager.cpp index 06f2476..5c91198 100644 --- a/llvm/lib/MCA/HardwareUnits/ResourceManager.cpp +++ b/llvm/lib/MCA/HardwareUnits/ResourceManager.cpp @@ -114,7 +114,8 @@ ResourceManager::ResourceManager(const MCSchedModel &SM) Resource2Groups(SM.getNumProcResourceKinds() - 1, 0), ProcResID2Mask(SM.getNumProcResourceKinds(), 0), ResIndex2ProcResID(SM.getNumProcResourceKinds() - 1, 0), - ProcResUnitMask(0), ReservedResourceGroups(0) { + ProcResUnitMask(0), ReservedResourceGroups(0), + AvailableBuffers(~0ULL), ReservedBuffers(0) { computeProcResourceMasks(SM, ProcResID2Mask); // initialize vector ResIndex2ProcResID. @@ -241,33 +242,41 @@ void ResourceManager::release(const ResourceRef &RR) { } ResourceStateEvent -ResourceManager::canBeDispatched(ArrayRef Buffers) const { - ResourceStateEvent Result = ResourceStateEvent::RS_BUFFER_AVAILABLE; - for (uint64_t Buffer : Buffers) { - ResourceState &RS = *Resources[getResourceStateIndex(Buffer)]; - Result = RS.isBufferAvailable(); - if (Result != ResourceStateEvent::RS_BUFFER_AVAILABLE) - break; - } - return Result; +ResourceManager::canBeDispatched(uint64_t ConsumedBuffers) const { + if (ConsumedBuffers & ReservedBuffers) + return ResourceStateEvent::RS_RESERVED; + if (ConsumedBuffers & (~AvailableBuffers)) + return ResourceStateEvent::RS_BUFFER_UNAVAILABLE; + return ResourceStateEvent::RS_BUFFER_AVAILABLE; } -void ResourceManager::reserveBuffers(ArrayRef Buffers) { - for (const uint64_t Buffer : Buffers) { - ResourceState &RS = *Resources[getResourceStateIndex(Buffer)]; +void ResourceManager::reserveBuffers(uint64_t ConsumedBuffers) { + while (ConsumedBuffers) { + uint64_t CurrentBuffer = ConsumedBuffers & (-ConsumedBuffers); + ResourceState &RS = *Resources[getResourceStateIndex(CurrentBuffer)]; + ConsumedBuffers ^= CurrentBuffer; assert(RS.isBufferAvailable() == ResourceStateEvent::RS_BUFFER_AVAILABLE); - RS.reserveBuffer(); - + if (!RS.reserveBuffer()) + AvailableBuffers ^= CurrentBuffer; if (RS.isADispatchHazard()) { - assert(!RS.isReserved()); - RS.setReserved(); + // Reserve this buffer now, and release it once pipeline resources + // consumed by the instruction become available again. + // We do this to simulate an in-order dispatch/issue of instructions. + ReservedBuffers ^= CurrentBuffer; } } } -void ResourceManager::releaseBuffers(ArrayRef Buffers) { - for (const uint64_t R : Buffers) - Resources[getResourceStateIndex(R)]->releaseBuffer(); +void ResourceManager::releaseBuffers(uint64_t ConsumedBuffers) { + AvailableBuffers |= ConsumedBuffers; + while (ConsumedBuffers) { + uint64_t CurrentBuffer = ConsumedBuffers & (-ConsumedBuffers); + ResourceState &RS = *Resources[getResourceStateIndex(CurrentBuffer)]; + ConsumedBuffers ^= CurrentBuffer; + RS.releaseBuffer(); + // Do not unreserve dispatch hazard resource buffers. Wait until all + // pipeline resources have been freed too. + } } uint64_t ResourceManager::checkAvailability(const InstrDesc &Desc) const { @@ -322,7 +331,6 @@ void ResourceManager::cycleEvent(SmallVectorImpl &ResourcesFreed) { if (countPopulation(RR.first) == 1) release(RR); - releaseResource(RR.first); ResourcesFreed.push_back(RR); } @@ -336,7 +344,7 @@ void ResourceManager::reserveResource(uint64_t ResourceID) { const unsigned Index = getResourceStateIndex(ResourceID); ResourceState &Resource = *Resources[Index]; assert(Resource.isAResourceGroup() && !Resource.isReserved() && - "Unexpected resource found!"); + "Unexpected resource state found!"); Resource.setReserved(); ReservedResourceGroups ^= 1ULL << Index; } @@ -347,6 +355,9 @@ void ResourceManager::releaseResource(uint64_t ResourceID) { Resource.clearReserved(); if (Resource.isAResourceGroup()) ReservedResourceGroups ^= 1ULL << Index; + // Now it is safe to release dispatch/issue resources. + if (Resource.isADispatchHazard()) + ReservedBuffers ^= 1ULL << Index; } } // namespace mca diff --git a/llvm/lib/MCA/HardwareUnits/Scheduler.cpp b/llvm/lib/MCA/HardwareUnits/Scheduler.cpp index 0f0f2ff..2d6b22c 100644 --- a/llvm/lib/MCA/HardwareUnits/Scheduler.cpp +++ b/llvm/lib/MCA/HardwareUnits/Scheduler.cpp @@ -40,7 +40,7 @@ void Scheduler::dump() const { Scheduler::Status Scheduler::isAvailable(const InstRef &IR) { const InstrDesc &Desc = IR.getInstruction()->getDesc(); - ResourceStateEvent RSE = Resources->canBeDispatched(Desc.Buffers); + ResourceStateEvent RSE = Resources->canBeDispatched(Desc.UsedBuffers); HadTokenStall = RSE != RS_BUFFER_AVAILABLE; switch (RSE) { @@ -106,7 +106,7 @@ void Scheduler::issueInstruction( bool HasDependentUsers = Inst.hasDependentUsers(); HasDependentUsers |= Inst.isMemOp() && LSU.hasDependentUsers(IR); - Resources->releaseBuffers(Inst.getDesc().Buffers); + Resources->releaseBuffers(Inst.getDesc().UsedBuffers); issueInstructionImpl(IR, UsedResources); // Instructions that have been issued during this cycle might have unblocked // other dependent instructions. Dependent instructions may be issued during @@ -301,7 +301,7 @@ bool Scheduler::mustIssueImmediately(const InstRef &IR) const { bool Scheduler::dispatch(InstRef &IR) { Instruction &IS = *IR.getInstruction(); const InstrDesc &Desc = IS.getDesc(); - Resources->reserveBuffers(Desc.Buffers); + Resources->reserveBuffers(Desc.UsedBuffers); // If necessary, reserve queue entries in the load-store unit (LSU). if (IS.isMemOp()) diff --git a/llvm/lib/MCA/InstrBuilder.cpp b/llvm/lib/MCA/InstrBuilder.cpp index 8299203..ee9e2bcd 100644 --- a/llvm/lib/MCA/InstrBuilder.cpp +++ b/llvm/lib/MCA/InstrBuilder.cpp @@ -80,7 +80,7 @@ static void initializeUsedResources(InstrDesc &ID, if (PR.BufferSize < 0) { AllInOrderResources = false; } else { - Buffers.setBit(PRE->ProcResourceIdx); + Buffers.setBit(getResourceStateIndex(Mask)); AnyDispatchHazards |= (PR.BufferSize == 0); AllInOrderResources &= (PR.BufferSize <= 1); } @@ -139,9 +139,6 @@ static void initializeUsedResources(InstrDesc &ID, } } - ID.UsedProcResUnits = UsedResourceUnits; - ID.UsedProcResGroups = UsedResourceGroups; - // A SchedWrite may specify a number of cycles in which a resource group // is reserved. For example (on target x86; cpu Haswell): // @@ -177,20 +174,13 @@ static void initializeUsedResources(InstrDesc &ID, uint64_t Mask = ProcResourceMasks[I]; if (Mask != SR.first && ((Mask & SR.first) == SR.first)) - Buffers.setBit(I); + Buffers.setBit(getResourceStateIndex(Mask)); } } - // Now set the buffers. - if (unsigned NumBuffers = Buffers.countPopulation()) { - ID.Buffers.resize(NumBuffers); - for (unsigned I = 0, E = NumProcResources; I < E && NumBuffers; ++I) { - if (Buffers[I]) { - --NumBuffers; - ID.Buffers[NumBuffers] = ProcResourceMasks[I]; - } - } - } + ID.UsedBuffers = Buffers.getZExtValue(); + ID.UsedProcResUnits = UsedResourceUnits; + ID.UsedProcResGroups = UsedResourceGroups; LLVM_DEBUG({ for (const std::pair &R : ID.Resources) @@ -198,8 +188,12 @@ static void initializeUsedResources(InstrDesc &ID, << "Reserved=" << R.second.isReserved() << ", " << "#Units=" << R.second.NumUnits << ", " << "cy=" << R.second.size() << '\n'; - for (const uint64_t R : ID.Buffers) - dbgs() << "\t\tBuffer Mask=" << format_hex(R, 16) << '\n'; + uint64_t BufferIDs = ID.UsedBuffers; + while (BufferIDs) { + uint64_t Current = BufferIDs & (-BufferIDs); + dbgs() << "\t\tBuffer Mask=" << format_hex(Current, 16) << '\n'; + BufferIDs ^= Current; + } dbgs() << "\t\t Used Units=" << format_hex(ID.UsedProcResUnits, 16) << '\n'; dbgs() << "\t\tUsed Groups=" << format_hex(ID.UsedProcResGroups, 16) << '\n'; @@ -493,7 +487,7 @@ Error InstrBuilder::verifyInstrDesc(const InstrDesc &ID, return ErrorSuccess(); bool UsesMemory = ID.MayLoad || ID.MayStore; - bool UsesBuffers = !ID.Buffers.empty(); + bool UsesBuffers = ID.UsedBuffers; bool UsesResources = !ID.Resources.empty(); if (!UsesMemory && !UsesBuffers && !UsesResources) return ErrorSuccess(); diff --git a/llvm/lib/MCA/Stages/ExecuteStage.cpp b/llvm/lib/MCA/Stages/ExecuteStage.cpp index a2b361f..9166f07 100644 --- a/llvm/lib/MCA/Stages/ExecuteStage.cpp +++ b/llvm/lib/MCA/Stages/ExecuteStage.cpp @@ -269,13 +269,17 @@ void ExecuteStage::notifyInstructionIssued( void ExecuteStage::notifyReservedOrReleasedBuffers(const InstRef &IR, bool Reserved) const { - const InstrDesc &Desc = IR.getInstruction()->getDesc(); - if (Desc.Buffers.empty()) + uint64_t UsedBuffers = IR.getInstruction()->getDesc().UsedBuffers; + if (!UsedBuffers) return; - SmallVector BufferIDs(Desc.Buffers.begin(), Desc.Buffers.end()); - std::transform(Desc.Buffers.begin(), Desc.Buffers.end(), BufferIDs.begin(), - [&](uint64_t Op) { return HWS.getResourceID(Op); }); + SmallVector BufferIDs(countPopulation(UsedBuffers), 0); + for (unsigned I = 0, E = BufferIDs.size(); I < E; ++I) { + uint64_t CurrentBufferMask = UsedBuffers & (-UsedBuffers); + BufferIDs[I] = HWS.getResourceID(CurrentBufferMask); + UsedBuffers ^= CurrentBufferMask; + } + if (Reserved) { for (HWEventListener *Listener : getListeners()) Listener->onReservedBuffers(IR, BufferIDs); -- 2.7.4