From: Andrea Di Biagio Date: Thu, 1 Nov 2018 18:04:39 +0000 (+0000) Subject: [llvm-mca] Add extra counters for move elimination in view RegisterFileStatistics. X-Git-Tag: llvmorg-8.0.0-rc1~5221 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=fe3bc1b9bf7603fcd4984b7e6c721f95171a56f8;p=platform%2Fupstream%2Fllvm.git [llvm-mca] Add extra counters for move elimination in view RegisterFileStatistics. This patch teaches view RegisterFileStatistics how to report events for optimizable register moves. For each processor register file, view RegisterFileStatistics reports the following extra information: - Number of optimizable register moves - Number of register moves eliminated - Number of zero moves (i.e. register moves that propagate a zero) - Max Number of moves eliminated per cycle. Differential Revision: https://reviews.llvm.org/D53976 llvm-svn: 345865 --- diff --git a/llvm/test/tools/llvm-mca/X86/BtVer2/reg-move-elimination-1.s b/llvm/test/tools/llvm-mca/X86/BtVer2/reg-move-elimination-1.s index 3b38173..0c27d2c 100644 --- a/llvm/test/tools/llvm-mca/X86/BtVer2/reg-move-elimination-1.s +++ b/llvm/test/tools/llvm-mca/X86/BtVer2/reg-move-elimination-1.s @@ -39,6 +39,10 @@ vaddps %xmm1, %xmm1, %xmm2 # CHECK-NEXT: Number of physical registers: 72 # CHECK-NEXT: Total number of mappings created: 3 # CHECK-NEXT: Max number of mappings used: 3 +# CHECK-NEXT: Number of optimizable moves: 3 +# CHECK-NEXT: Number of moves eliminated: 3 (100.0%) +# CHECK-NEXT: Number of zero moves: 3 (100.0%) +# CHECK-NEXT: Max moves eliminated per cycle: 1 # CHECK: * Register File #2 -- JIntegerPRF: # CHECK-NEXT: Number of physical registers: 64 diff --git a/llvm/test/tools/llvm-mca/X86/BtVer2/reg-move-elimination-2.s b/llvm/test/tools/llvm-mca/X86/BtVer2/reg-move-elimination-2.s index 096fe6c..08465f9 100644 --- a/llvm/test/tools/llvm-mca/X86/BtVer2/reg-move-elimination-2.s +++ b/llvm/test/tools/llvm-mca/X86/BtVer2/reg-move-elimination-2.s @@ -49,6 +49,10 @@ movdqu %xmm5, %xmm0 # CHECK-NEXT: Number of physical registers: 72 # CHECK-NEXT: Total number of mappings created: 0 # CHECK-NEXT: Max number of mappings used: 0 +# CHECK-NEXT: Number of optimizable moves: 21 +# CHECK-NEXT: Number of moves eliminated: 21 (100.0%) +# CHECK-NEXT: Number of zero moves: 21 (100.0%) +# CHECK-NEXT: Max moves eliminated per cycle: 2 # CHECK: * Register File #2 -- JIntegerPRF: # CHECK-NEXT: Number of physical registers: 64 diff --git a/llvm/test/tools/llvm-mca/X86/BtVer2/reg-move-elimination-3.s b/llvm/test/tools/llvm-mca/X86/BtVer2/reg-move-elimination-3.s index 3d64bfd..f3d850f 100644 --- a/llvm/test/tools/llvm-mca/X86/BtVer2/reg-move-elimination-3.s +++ b/llvm/test/tools/llvm-mca/X86/BtVer2/reg-move-elimination-3.s @@ -44,6 +44,10 @@ vmovdqu %xmm5, %xmm0 # CHECK-NEXT: Number of physical registers: 72 # CHECK-NEXT: Total number of mappings created: 0 # CHECK-NEXT: Max number of mappings used: 0 +# CHECK-NEXT: Number of optimizable moves: 18 +# CHECK-NEXT: Number of moves eliminated: 18 (100.0%) +# CHECK-NEXT: Number of zero moves: 18 (100.0%) +# CHECK-NEXT: Max moves eliminated per cycle: 2 # CHECK: * Register File #2 -- JIntegerPRF: # CHECK-NEXT: Number of physical registers: 64 diff --git a/llvm/test/tools/llvm-mca/X86/BtVer2/reg-move-elimination-4.s b/llvm/test/tools/llvm-mca/X86/BtVer2/reg-move-elimination-4.s index 223b4c2..c2df1ba 100644 --- a/llvm/test/tools/llvm-mca/X86/BtVer2/reg-move-elimination-4.s +++ b/llvm/test/tools/llvm-mca/X86/BtVer2/reg-move-elimination-4.s @@ -45,6 +45,10 @@ mov %edx, %eax # CHECK-NEXT: Number of physical registers: 64 # CHECK-NEXT: Total number of mappings created: 0 # CHECK-NEXT: Max number of mappings used: 0 +# CHECK-NEXT: Number of optimizable moves: 12 +# CHECK-NEXT: Number of moves eliminated: 12 (100.0%) +# CHECK-NEXT: Number of zero moves: 12 (100.0%) +# CHECK-NEXT: Max moves eliminated per cycle: 2 # CHECK: Resources: # CHECK-NEXT: [0] - JALU0 diff --git a/llvm/test/tools/llvm-mca/X86/BtVer2/reg-move-elimination-5.s b/llvm/test/tools/llvm-mca/X86/BtVer2/reg-move-elimination-5.s index ab873c7..277293e 100644 --- a/llvm/test/tools/llvm-mca/X86/BtVer2/reg-move-elimination-5.s +++ b/llvm/test/tools/llvm-mca/X86/BtVer2/reg-move-elimination-5.s @@ -45,6 +45,10 @@ mov %rdx, %rax # CHECK-NEXT: Number of physical registers: 64 # CHECK-NEXT: Total number of mappings created: 0 # CHECK-NEXT: Max number of mappings used: 0 +# CHECK-NEXT: Number of optimizable moves: 12 +# CHECK-NEXT: Number of moves eliminated: 12 (100.0%) +# CHECK-NEXT: Number of zero moves: 12 (100.0%) +# CHECK-NEXT: Max moves eliminated per cycle: 2 # CHECK: Resources: # CHECK-NEXT: [0] - JALU0 diff --git a/llvm/tools/llvm-mca/Views/RegisterFileStatistics.cpp b/llvm/tools/llvm-mca/Views/RegisterFileStatistics.cpp index bd638d9..06202bc 100644 --- a/llvm/tools/llvm-mca/Views/RegisterFileStatistics.cpp +++ b/llvm/tools/llvm-mca/Views/RegisterFileStatistics.cpp @@ -21,10 +21,12 @@ namespace mca { RegisterFileStatistics::RegisterFileStatistics(const MCSubtargetInfo &sti) : STI(sti) { const MCSchedModel &SM = STI.getSchedModel(); - RegisterFileUsage Empty = {0, 0, 0}; + RegisterFileUsage RFUEmpty = {0, 0, 0}; + MoveEliminationInfo MEIEmpty = {0, 0, 0, 0, 0}; if (!SM.hasExtraProcessorInfo()) { // Assume a single register file. - RegisterFiles.emplace_back(Empty); + PRFUsage.emplace_back(RFUEmpty); + MoveElimInfo.emplace_back(MEIEmpty); return; } @@ -35,8 +37,42 @@ RegisterFileStatistics::RegisterFileStatistics(const MCSubtargetInfo &sti) // be skipped. If there are no user defined register files, then reserve a // single entry for the default register file at index #0. unsigned NumRegFiles = std::max(PI.NumRegisterFiles, 1U); - RegisterFiles.resize(NumRegFiles); - std::fill(RegisterFiles.begin(), RegisterFiles.end(), Empty); + + PRFUsage.resize(NumRegFiles); + std::fill(PRFUsage.begin(), PRFUsage.end(), RFUEmpty); + + MoveElimInfo.resize(NumRegFiles); + std::fill(MoveElimInfo.begin(), MoveElimInfo.end(), MEIEmpty); +} + +void RegisterFileStatistics::updateRegisterFileUsage( + ArrayRef UsedPhysRegs) { + for (unsigned I = 0, E = PRFUsage.size(); I < E; ++I) { + RegisterFileUsage &RFU = PRFUsage[I]; + unsigned NumUsedPhysRegs = UsedPhysRegs[I]; + RFU.CurrentlyUsedMappings += NumUsedPhysRegs; + RFU.TotalMappings += NumUsedPhysRegs; + RFU.MaxUsedMappings = + std::max(RFU.MaxUsedMappings, RFU.CurrentlyUsedMappings); + } +} + +void RegisterFileStatistics::updateMoveElimInfo(const Instruction &Inst) { + if (!Inst.isOptimizableMove()) + return; + + assert(Inst.getDefs().size() == 1 && "Expected a single definition!"); + assert(Inst.getUses().size() == 1 && "Expected a single register use!"); + const WriteState &WS = Inst.getDefs()[0]; + const ReadState &RS = Inst.getUses()[0]; + + MoveEliminationInfo &Info = + MoveElimInfo[Inst.getDefs()[0].getRegisterFileID()]; + Info.TotalMoveEliminationCandidates++; + if (WS.isEliminated()) + Info.CurrentMovesEliminated++; + if (WS.isWriteZero() && RS.isReadZero()) + Info.TotalMovesThatPropagateZero++; } void RegisterFileStatistics::onEvent(const HWInstructionEvent &Event) { @@ -45,21 +81,24 @@ void RegisterFileStatistics::onEvent(const HWInstructionEvent &Event) { break; case HWInstructionEvent::Retired: { const auto &RE = static_cast(Event); - for (unsigned I = 0, E = RegisterFiles.size(); I < E; ++I) - RegisterFiles[I].CurrentlyUsedMappings -= RE.FreedPhysRegs[I]; + for (unsigned I = 0, E = PRFUsage.size(); I < E; ++I) + PRFUsage[I].CurrentlyUsedMappings -= RE.FreedPhysRegs[I]; break; } case HWInstructionEvent::Dispatched: { const auto &DE = static_cast(Event); - for (unsigned I = 0, E = RegisterFiles.size(); I < E; ++I) { - RegisterFileUsage &RFU = RegisterFiles[I]; - unsigned NumUsedPhysRegs = DE.UsedPhysRegs[I]; - RFU.CurrentlyUsedMappings += NumUsedPhysRegs; - RFU.TotalMappings += NumUsedPhysRegs; - RFU.MaxUsedMappings = - std::max(RFU.MaxUsedMappings, RFU.CurrentlyUsedMappings); - } + updateRegisterFileUsage(DE.UsedPhysRegs); + updateMoveElimInfo(*DE.IR.getInstruction()); + } } +} + +void RegisterFileStatistics::onCycleEnd() { + for (MoveEliminationInfo &MEI : MoveElimInfo) { + unsigned &CurrentMax = MEI.MaxMovesEliminatedPerCycle; + CurrentMax = std::max(CurrentMax, MEI.CurrentMovesEliminated); + MEI.TotalMovesEliminated += MEI.CurrentMovesEliminated; + MEI.CurrentMovesEliminated = 0; } } @@ -68,14 +107,14 @@ void RegisterFileStatistics::printView(raw_ostream &OS) const { raw_string_ostream TempStream(Buffer); TempStream << "\n\nRegister File statistics:"; - const RegisterFileUsage &GlobalUsage = RegisterFiles[0]; + const RegisterFileUsage &GlobalUsage = PRFUsage[0]; TempStream << "\nTotal number of mappings created: " << GlobalUsage.TotalMappings; TempStream << "\nMax number of mappings used: " << GlobalUsage.MaxUsedMappings << '\n'; - for (unsigned I = 1, E = RegisterFiles.size(); I < E; ++I) { - const RegisterFileUsage &RFU = RegisterFiles[I]; + for (unsigned I = 1, E = PRFUsage.size(); I < E; ++I) { + const RegisterFileUsage &RFU = PRFUsage[I]; // Obtain the register file descriptor from the scheduling model. assert(STI.getSchedModel().hasExtraProcessorInfo() && "Unable to find register file info!"); @@ -98,6 +137,27 @@ void RegisterFileStatistics::printView(raw_ostream &OS) const { << RFU.TotalMappings; TempStream << "\n Max number of mappings used: " << RFU.MaxUsedMappings << '\n'; + const MoveEliminationInfo &MEI = MoveElimInfo[I]; + + if (MEI.TotalMoveEliminationCandidates) { + TempStream << " Number of optimizable moves: " + << MEI.TotalMoveEliminationCandidates; + double EliminatedMovProportion = (double)MEI.TotalMovesEliminated / + MEI.TotalMoveEliminationCandidates * + 100.0; + double ZeroMovProportion = (double)MEI.TotalMovesThatPropagateZero / + MEI.TotalMoveEliminationCandidates * 100.0; + TempStream << "\n Number of moves eliminated: " + << MEI.TotalMovesEliminated << " " + << format("(%.1f%%)", + floor((EliminatedMovProportion * 10) + 0.5) / 10); + TempStream << "\n Number of zero moves: " + << MEI.TotalMovesThatPropagateZero << " " + << format("(%.1f%%)", + floor((ZeroMovProportion * 10) + 0.5) / 10); + TempStream << "\n Max moves eliminated per cycle: " + << MEI.MaxMovesEliminatedPerCycle << '\n'; + } } TempStream.flush(); diff --git a/llvm/tools/llvm-mca/Views/RegisterFileStatistics.h b/llvm/tools/llvm-mca/Views/RegisterFileStatistics.h index 86858d8..a2c52a6 100644 --- a/llvm/tools/llvm-mca/Views/RegisterFileStatistics.h +++ b/llvm/tools/llvm-mca/Views/RegisterFileStatistics.h @@ -21,6 +21,10 @@ /// Number of physical registers: 72 /// Total number of mappings created: 0 /// Max number of mappings used: 0 +/// Number of optimizable moves: 200 +/// Number of moves eliminated: 200 (100.0%) +/// Number of zero moves: 200 (100.0%) +/// Max moves eliminated per cycle: 2 /// /// * Register File #2 -- IntegerPRF: /// Number of physical registers: 64 @@ -49,12 +53,25 @@ class RegisterFileStatistics : public View { unsigned CurrentlyUsedMappings; }; + struct MoveEliminationInfo { + unsigned TotalMoveEliminationCandidates; + unsigned TotalMovesEliminated; + unsigned TotalMovesThatPropagateZero; + unsigned MaxMovesEliminatedPerCycle; + unsigned CurrentMovesEliminated; + }; + // There is one entry for each register file implemented by the processor. - llvm::SmallVector RegisterFiles; + llvm::SmallVector PRFUsage; + llvm::SmallVector MoveElimInfo; + + void updateRegisterFileUsage(ArrayRef UsedPhysRegs); + void updateMoveElimInfo(const Instruction &Inst); public: RegisterFileStatistics(const llvm::MCSubtargetInfo &sti); + void onCycleEnd() override; void onEvent(const HWInstructionEvent &Event) override; void printView(llvm::raw_ostream &OS) const override; }; diff --git a/llvm/tools/llvm-mca/include/HardwareUnits/RegisterFile.h b/llvm/tools/llvm-mca/include/HardwareUnits/RegisterFile.h index 1cca8b5..d9949bf 100644 --- a/llvm/tools/llvm-mca/include/HardwareUnits/RegisterFile.h +++ b/llvm/tools/llvm-mca/include/HardwareUnits/RegisterFile.h @@ -173,6 +173,11 @@ class RegisterFile : public HardwareUnit { void freePhysRegs(const RegisterRenamingInfo &Entry, MutableArrayRef FreedPhysRegs); + // Collects writes that are in a RAW dependency with RS. + // This method is called from `addRegisterRead()`. + void collectWrites(const ReadState &RS, + SmallVectorImpl &Writes) const; + // Create an instance of RegisterMappingTracker for every register file // specified by the processor model. // If no register file is specified, then this method creates a default @@ -189,6 +194,10 @@ public: // No physical regiser is allocated if this write is from a zero-idiom. void addRegisterWrite(WriteRef Write, MutableArrayRef UsedPhysRegs); + // Collect writes that are in a data dependency with RS, and update RS + // internal state. + void addRegisterRead(ReadState &RS, SmallVectorImpl &Writes) const; + // Removes write \param WS from the register mappings. // Physical registers may be released to reflect this update. // No registers are released if this write is from a zero-idiom. @@ -200,7 +209,7 @@ public: // If RS is a read from a zero register, and WS is eliminated, then // `WS.WritesZero` is also set, so that method addRegisterWrite() would not // reserve a physical register for it. - bool tryEliminateMove(WriteState &WS, const ReadState &RS); + bool tryEliminateMove(WriteState &WS, ReadState &RS); // Checks if there are enough physical registers in the register files. // Returns a "response mask" where each bit represents the response from a @@ -212,7 +221,8 @@ public: // Current implementation can simulate up to 32 register files (including the // special register file at index #0). unsigned isAvailable(ArrayRef Regs) const; - void collectWrites(SmallVectorImpl &Writes, unsigned RegID) const; + + // Returns the number of PRFs implemented by this processor. unsigned getNumRegisterFiles() const { return RegisterFiles.size(); } // Notify each PRF that a new cycle just started. diff --git a/llvm/tools/llvm-mca/include/Instruction.h b/llvm/tools/llvm-mca/include/Instruction.h index f83be1f..7407283 100644 --- a/llvm/tools/llvm-mca/include/Instruction.h +++ b/llvm/tools/llvm-mca/include/Instruction.h @@ -101,6 +101,9 @@ class WriteState { // field RegisterID from WD. unsigned RegisterID; + // Physical register file that serves register RegisterID. + unsigned PRFID; + // True if this write implicitly clears the upper portion of RegisterID's // super-registers. bool ClearsSuperRegs; @@ -135,7 +138,7 @@ public: WriteState(const WriteDescriptor &Desc, unsigned RegID, bool clearsSuperRegs = false, bool writesZero = false) : WD(&Desc), CyclesLeft(UNKNOWN_CYCLES), RegisterID(RegID), - ClearsSuperRegs(clearsSuperRegs), WritesZero(writesZero), + PRFID(0), ClearsSuperRegs(clearsSuperRegs), WritesZero(writesZero), IsEliminated(false), DependentWrite(nullptr), NumWriteUsers(0U) {} WriteState(const WriteState &Other) = default; @@ -144,6 +147,7 @@ public: int getCyclesLeft() const { return CyclesLeft; } unsigned getWriteResourceID() const { return WD->SClassOrWriteResourceID; } unsigned getRegisterID() const { return RegisterID; } + unsigned getRegisterFileID() const { return PRFID; } unsigned getLatency() const { return WD->Latency; } void addUser(ReadState *Use, int ReadAdvance); @@ -168,6 +172,8 @@ public: IsEliminated = true; } + void setPRF(unsigned PRF) { PRFID = PRF; } + // On every cycle, update CyclesLeft and notify dependent users. void cycleEvent(); void onInstructionIssued(); @@ -185,6 +191,8 @@ class ReadState { const ReadDescriptor *RD; // Physical register identified associated to this read. unsigned RegisterID; + // Physical register file that serves register RegisterID. + unsigned PRFID; // Number of writes that contribute to the definition of RegisterID. // In the absence of partial register updates, the number of DependentWrites // cannot be more than one. @@ -201,18 +209,21 @@ class ReadState { // This field is set to true only if there are no dependent writes, and // there are no `CyclesLeft' to wait. bool IsReady; + // True if this is a read from a known zero register. + bool IsZero; // True if this register read is from a dependency-breaking instruction. bool IndependentFromDef; public: ReadState(const ReadDescriptor &Desc, unsigned RegID) - : RD(&Desc), RegisterID(RegID), DependentWrites(0), + : RD(&Desc), RegisterID(RegID), PRFID(0), DependentWrites(0), CyclesLeft(UNKNOWN_CYCLES), TotalCycles(0), IsReady(true), - IndependentFromDef(false) {} + IsZero(false), IndependentFromDef(false) {} const ReadDescriptor &getDescriptor() const { return *RD; } unsigned getSchedClass() const { return RD->SchedClassID; } unsigned getRegisterID() const { return RegisterID; } + unsigned getRegisterFileID() const { return PRFID; } bool isReady() const { return IsReady; } bool isImplicitRead() const { return RD->isImplicitRead(); } @@ -226,6 +237,10 @@ public: DependentWrites = Writes; IsReady = !Writes; } + + bool isReadZero() const { return IsZero; } + void setReadZero() { IsZero = true; } + void setPRF(unsigned ID) { PRFID = ID; } }; /// A sequence of cycles. diff --git a/llvm/tools/llvm-mca/include/Stages/DispatchStage.h b/llvm/tools/llvm-mca/include/Stages/DispatchStage.h index 3595f31..29cace1 100644 --- a/llvm/tools/llvm-mca/include/Stages/DispatchStage.h +++ b/llvm/tools/llvm-mca/include/Stages/DispatchStage.h @@ -68,10 +68,6 @@ class DispatchStage final : public Stage { ArrayRef UsedPhysRegs, unsigned uOps) const; - void collectWrites(SmallVectorImpl &Vec, unsigned RegID) const { - return PRF.collectWrites(Vec, RegID); - } - public: DispatchStage(const MCSubtargetInfo &Subtarget, const MCRegisterInfo &MRI, unsigned MaxDispatchWidth, RetireControlUnit &R, diff --git a/llvm/tools/llvm-mca/lib/HardwareUnits/RegisterFile.cpp b/llvm/tools/llvm-mca/lib/HardwareUnits/RegisterFile.cpp index 71aec49..6bc63a0 100644 --- a/llvm/tools/llvm-mca/lib/HardwareUnits/RegisterFile.cpp +++ b/llvm/tools/llvm-mca/lib/HardwareUnits/RegisterFile.cpp @@ -173,6 +173,7 @@ void RegisterFile::addRegisterWrite(WriteRef Write, bool IsEliminated = WS.isEliminated(); bool ShouldAllocatePhysRegs = !IsWriteZero && !IsEliminated; const RegisterRenamingInfo &RRI = RegisterMappings[RegID].second; + WS.setPRF(RRI.IndexPlusCost.first); if (RRI.RenameAs && RRI.RenameAs != RegID) { RegID = RRI.RenameAs; @@ -217,9 +218,9 @@ void RegisterFile::addRegisterWrite(WriteRef Write, RegisterMappings[*I].second.AliasRegID = 0U; } - // No physical registers are allocated for instructions that are optimized in - // hardware. For example, zero-latency data-dependency breaking instructions - // don't consume physical registers. + // No physical registers are allocated for instructions that are optimized + // in hardware. For example, zero-latency data-dependency breaking + // instructions don't consume physical registers. if (ShouldAllocatePhysRegs) allocatePhysRegs(RegisterMappings[RegID].second, UsedPhysRegs); } @@ -288,7 +289,7 @@ void RegisterFile::removeRegisterWrite( } } -bool RegisterFile::tryEliminateMove(WriteState &WS, const ReadState &RS) { +bool RegisterFile::tryEliminateMove(WriteState &WS, ReadState &RS) { const RegisterMapping &RMFrom = RegisterMappings[RS.getRegisterID()]; const RegisterMapping &RMTo = RegisterMappings[WS.getRegisterID()]; @@ -349,15 +350,18 @@ bool RegisterFile::tryEliminateMove(WriteState &WS, const ReadState &RS) { } RMT.NumMoveEliminated++; - if (IsZeroMove) + if (IsZeroMove) { WS.setWriteZero(); + RS.setReadZero(); + } WS.setEliminated(); return true; } -void RegisterFile::collectWrites(SmallVectorImpl &Writes, - unsigned RegID) const { +void RegisterFile::collectWrites(const ReadState &RS, + SmallVectorImpl &Writes) const { + unsigned RegID = RS.getRegisterID(); assert(RegID && RegID < RegisterMappings.size()); LLVM_DEBUG(dbgs() << "RegisterFile: collecting writes for register " << MRI.getName(RegID) << '\n'); @@ -379,11 +383,13 @@ void RegisterFile::collectWrites(SmallVectorImpl &Writes, } // Remove duplicate entries and resize the input vector. - sort(Writes, [](const WriteRef &Lhs, const WriteRef &Rhs) { - return Lhs.getWriteState() < Rhs.getWriteState(); - }); - auto It = std::unique(Writes.begin(), Writes.end()); - Writes.resize(std::distance(Writes.begin(), It)); + if (Writes.size() > 1) { + sort(Writes, [](const WriteRef &Lhs, const WriteRef &Rhs) { + return Lhs.getWriteState() < Rhs.getWriteState(); + }); + auto It = std::unique(Writes.begin(), Writes.end()); + Writes.resize(std::distance(Writes.begin(), It)); + } LLVM_DEBUG({ for (const WriteRef &WR : Writes) { @@ -395,6 +401,20 @@ void RegisterFile::collectWrites(SmallVectorImpl &Writes, }); } +void RegisterFile::addRegisterRead(ReadState &RS, + SmallVectorImpl &Defs) const { + unsigned RegID = RS.getRegisterID(); + const RegisterRenamingInfo &RRI = RegisterMappings[RegID].second; + RS.setPRF(RRI.IndexPlusCost.first); + if (RS.isIndependentFromDef()) + return; + + if (ZeroRegisters[RS.getRegisterID()]) + RS.setReadZero(); + collectWrites(RS, Defs); + RS.setDependentWrites(Defs.size()); +} + unsigned RegisterFile::isAvailable(ArrayRef Regs) const { SmallVector NumPhysRegs(getNumRegisterFiles()); diff --git a/llvm/tools/llvm-mca/lib/Stages/DispatchStage.cpp b/llvm/tools/llvm-mca/lib/Stages/DispatchStage.cpp index 104446e..838dbad 100644 --- a/llvm/tools/llvm-mca/lib/Stages/DispatchStage.cpp +++ b/llvm/tools/llvm-mca/lib/Stages/DispatchStage.cpp @@ -67,8 +67,9 @@ void DispatchStage::updateRAWDependencies(ReadState &RS, const MCSubtargetInfo &STI) { SmallVector DependentWrites; - collectWrites(DependentWrites, RS.getRegisterID()); - RS.setDependentWrites(DependentWrites.size()); + // Collect all the dependent writes, and update RS internal state. + PRF.addRegisterRead(RS, DependentWrites); + // We know that this read depends on all the writes in DependentWrites. // For each write, check if we have ReadAdvance information, and use it // to figure out in how many cycles this read becomes available. @@ -116,10 +117,8 @@ Error DispatchStage::dispatch(InstRef IR) { // We also don't update data dependencies for instructions that have been // eliminated at register renaming stage. if (!IsEliminated) { - for (ReadState &RS : IS.getUses()) { - if (!RS.isIndependentFromDef()) - updateRAWDependencies(RS, STI); - } + for (ReadState &RS : IS.getUses()) + updateRAWDependencies(RS, STI); } // By default, a dependency-breaking zero-idiom is expected to be optimized @@ -127,8 +126,7 @@ Error DispatchStage::dispatch(InstRef IR) { // to the instruction. SmallVector RegisterFiles(PRF.getNumRegisterFiles()); for (WriteState &WS : IS.getDefs()) - PRF.addRegisterWrite(WriteRef(IR.getSourceIndex(), &WS), - RegisterFiles); + PRF.addRegisterWrite(WriteRef(IR.getSourceIndex(), &WS), RegisterFiles); // Reserve slots in the RCU, and notify the instruction that it has been // dispatched to the schedulers for execution.