# CHECK-NEXT: Number of physical registers: 72
# CHECK-NEXT: Total number of mappings created: 3
# CHECK-NEXT: Max number of mappings used: 3
+# CHECK-NEXT: Number of optimizable moves: 3
+# CHECK-NEXT: Number of moves eliminated: 3 (100.0%)
+# CHECK-NEXT: Number of zero moves: 3 (100.0%)
+# CHECK-NEXT: Max moves eliminated per cycle: 1
# CHECK: * Register File #2 -- JIntegerPRF:
# CHECK-NEXT: Number of physical registers: 64
# CHECK-NEXT: Number of physical registers: 72
# CHECK-NEXT: Total number of mappings created: 0
# CHECK-NEXT: Max number of mappings used: 0
+# CHECK-NEXT: Number of optimizable moves: 21
+# CHECK-NEXT: Number of moves eliminated: 21 (100.0%)
+# CHECK-NEXT: Number of zero moves: 21 (100.0%)
+# CHECK-NEXT: Max moves eliminated per cycle: 2
# CHECK: * Register File #2 -- JIntegerPRF:
# CHECK-NEXT: Number of physical registers: 64
# CHECK-NEXT: Number of physical registers: 72
# CHECK-NEXT: Total number of mappings created: 0
# CHECK-NEXT: Max number of mappings used: 0
+# CHECK-NEXT: Number of optimizable moves: 18
+# CHECK-NEXT: Number of moves eliminated: 18 (100.0%)
+# CHECK-NEXT: Number of zero moves: 18 (100.0%)
+# CHECK-NEXT: Max moves eliminated per cycle: 2
# CHECK: * Register File #2 -- JIntegerPRF:
# CHECK-NEXT: Number of physical registers: 64
# CHECK-NEXT: Number of physical registers: 64
# CHECK-NEXT: Total number of mappings created: 0
# CHECK-NEXT: Max number of mappings used: 0
+# CHECK-NEXT: Number of optimizable moves: 12
+# CHECK-NEXT: Number of moves eliminated: 12 (100.0%)
+# CHECK-NEXT: Number of zero moves: 12 (100.0%)
+# CHECK-NEXT: Max moves eliminated per cycle: 2
# CHECK: Resources:
# CHECK-NEXT: [0] - JALU0
# CHECK-NEXT: Number of physical registers: 64
# CHECK-NEXT: Total number of mappings created: 0
# CHECK-NEXT: Max number of mappings used: 0
+# CHECK-NEXT: Number of optimizable moves: 12
+# CHECK-NEXT: Number of moves eliminated: 12 (100.0%)
+# CHECK-NEXT: Number of zero moves: 12 (100.0%)
+# CHECK-NEXT: Max moves eliminated per cycle: 2
# CHECK: Resources:
# CHECK-NEXT: [0] - JALU0
RegisterFileStatistics::RegisterFileStatistics(const MCSubtargetInfo &sti)
: STI(sti) {
const MCSchedModel &SM = STI.getSchedModel();
- RegisterFileUsage Empty = {0, 0, 0};
+ RegisterFileUsage RFUEmpty = {0, 0, 0};
+ MoveEliminationInfo MEIEmpty = {0, 0, 0, 0, 0};
if (!SM.hasExtraProcessorInfo()) {
// Assume a single register file.
- RegisterFiles.emplace_back(Empty);
+ PRFUsage.emplace_back(RFUEmpty);
+ MoveElimInfo.emplace_back(MEIEmpty);
return;
}
// be skipped. If there are no user defined register files, then reserve a
// single entry for the default register file at index #0.
unsigned NumRegFiles = std::max(PI.NumRegisterFiles, 1U);
- RegisterFiles.resize(NumRegFiles);
- std::fill(RegisterFiles.begin(), RegisterFiles.end(), Empty);
+
+ PRFUsage.resize(NumRegFiles);
+ std::fill(PRFUsage.begin(), PRFUsage.end(), RFUEmpty);
+
+ MoveElimInfo.resize(NumRegFiles);
+ std::fill(MoveElimInfo.begin(), MoveElimInfo.end(), MEIEmpty);
+}
+
+void RegisterFileStatistics::updateRegisterFileUsage(
+ ArrayRef<unsigned> UsedPhysRegs) {
+ for (unsigned I = 0, E = PRFUsage.size(); I < E; ++I) {
+ RegisterFileUsage &RFU = PRFUsage[I];
+ unsigned NumUsedPhysRegs = UsedPhysRegs[I];
+ RFU.CurrentlyUsedMappings += NumUsedPhysRegs;
+ RFU.TotalMappings += NumUsedPhysRegs;
+ RFU.MaxUsedMappings =
+ std::max(RFU.MaxUsedMappings, RFU.CurrentlyUsedMappings);
+ }
+}
+
+void RegisterFileStatistics::updateMoveElimInfo(const Instruction &Inst) {
+ if (!Inst.isOptimizableMove())
+ return;
+
+ assert(Inst.getDefs().size() == 1 && "Expected a single definition!");
+ assert(Inst.getUses().size() == 1 && "Expected a single register use!");
+ const WriteState &WS = Inst.getDefs()[0];
+ const ReadState &RS = Inst.getUses()[0];
+
+ MoveEliminationInfo &Info =
+ MoveElimInfo[Inst.getDefs()[0].getRegisterFileID()];
+ Info.TotalMoveEliminationCandidates++;
+ if (WS.isEliminated())
+ Info.CurrentMovesEliminated++;
+ if (WS.isWriteZero() && RS.isReadZero())
+ Info.TotalMovesThatPropagateZero++;
}
void RegisterFileStatistics::onEvent(const HWInstructionEvent &Event) {
break;
case HWInstructionEvent::Retired: {
const auto &RE = static_cast<const HWInstructionRetiredEvent &>(Event);
- for (unsigned I = 0, E = RegisterFiles.size(); I < E; ++I)
- RegisterFiles[I].CurrentlyUsedMappings -= RE.FreedPhysRegs[I];
+ for (unsigned I = 0, E = PRFUsage.size(); I < E; ++I)
+ PRFUsage[I].CurrentlyUsedMappings -= RE.FreedPhysRegs[I];
break;
}
case HWInstructionEvent::Dispatched: {
const auto &DE = static_cast<const HWInstructionDispatchedEvent &>(Event);
- for (unsigned I = 0, E = RegisterFiles.size(); I < E; ++I) {
- RegisterFileUsage &RFU = RegisterFiles[I];
- unsigned NumUsedPhysRegs = DE.UsedPhysRegs[I];
- RFU.CurrentlyUsedMappings += NumUsedPhysRegs;
- RFU.TotalMappings += NumUsedPhysRegs;
- RFU.MaxUsedMappings =
- std::max(RFU.MaxUsedMappings, RFU.CurrentlyUsedMappings);
- }
+ updateRegisterFileUsage(DE.UsedPhysRegs);
+ updateMoveElimInfo(*DE.IR.getInstruction());
+ }
}
+}
+
+void RegisterFileStatistics::onCycleEnd() {
+ for (MoveEliminationInfo &MEI : MoveElimInfo) {
+ unsigned &CurrentMax = MEI.MaxMovesEliminatedPerCycle;
+ CurrentMax = std::max(CurrentMax, MEI.CurrentMovesEliminated);
+ MEI.TotalMovesEliminated += MEI.CurrentMovesEliminated;
+ MEI.CurrentMovesEliminated = 0;
}
}
raw_string_ostream TempStream(Buffer);
TempStream << "\n\nRegister File statistics:";
- const RegisterFileUsage &GlobalUsage = RegisterFiles[0];
+ const RegisterFileUsage &GlobalUsage = PRFUsage[0];
TempStream << "\nTotal number of mappings created: "
<< GlobalUsage.TotalMappings;
TempStream << "\nMax number of mappings used: "
<< GlobalUsage.MaxUsedMappings << '\n';
- for (unsigned I = 1, E = RegisterFiles.size(); I < E; ++I) {
- const RegisterFileUsage &RFU = RegisterFiles[I];
+ for (unsigned I = 1, E = PRFUsage.size(); I < E; ++I) {
+ const RegisterFileUsage &RFU = PRFUsage[I];
// Obtain the register file descriptor from the scheduling model.
assert(STI.getSchedModel().hasExtraProcessorInfo() &&
"Unable to find register file info!");
<< RFU.TotalMappings;
TempStream << "\n Max number of mappings used: "
<< RFU.MaxUsedMappings << '\n';
+ const MoveEliminationInfo &MEI = MoveElimInfo[I];
+
+ if (MEI.TotalMoveEliminationCandidates) {
+ TempStream << " Number of optimizable moves: "
+ << MEI.TotalMoveEliminationCandidates;
+ double EliminatedMovProportion = (double)MEI.TotalMovesEliminated /
+ MEI.TotalMoveEliminationCandidates *
+ 100.0;
+ double ZeroMovProportion = (double)MEI.TotalMovesThatPropagateZero /
+ MEI.TotalMoveEliminationCandidates * 100.0;
+ TempStream << "\n Number of moves eliminated: "
+ << MEI.TotalMovesEliminated << " "
+ << format("(%.1f%%)",
+ floor((EliminatedMovProportion * 10) + 0.5) / 10);
+ TempStream << "\n Number of zero moves: "
+ << MEI.TotalMovesThatPropagateZero << " "
+ << format("(%.1f%%)",
+ floor((ZeroMovProportion * 10) + 0.5) / 10);
+ TempStream << "\n Max moves eliminated per cycle: "
+ << MEI.MaxMovesEliminatedPerCycle << '\n';
+ }
}
TempStream.flush();
/// Number of physical registers: 72
/// Total number of mappings created: 0
/// Max number of mappings used: 0
+/// Number of optimizable moves: 200
+/// Number of moves eliminated: 200 (100.0%)
+/// Number of zero moves: 200 (100.0%)
+/// Max moves eliminated per cycle: 2
///
/// * Register File #2 -- IntegerPRF:
/// Number of physical registers: 64
unsigned CurrentlyUsedMappings;
};
+ struct MoveEliminationInfo {
+ unsigned TotalMoveEliminationCandidates;
+ unsigned TotalMovesEliminated;
+ unsigned TotalMovesThatPropagateZero;
+ unsigned MaxMovesEliminatedPerCycle;
+ unsigned CurrentMovesEliminated;
+ };
+
// There is one entry for each register file implemented by the processor.
- llvm::SmallVector<RegisterFileUsage, 4> RegisterFiles;
+ llvm::SmallVector<RegisterFileUsage, 4> PRFUsage;
+ llvm::SmallVector<MoveEliminationInfo, 4> MoveElimInfo;
+
+ void updateRegisterFileUsage(ArrayRef<unsigned> UsedPhysRegs);
+ void updateMoveElimInfo(const Instruction &Inst);
public:
RegisterFileStatistics(const llvm::MCSubtargetInfo &sti);
+ void onCycleEnd() override;
void onEvent(const HWInstructionEvent &Event) override;
void printView(llvm::raw_ostream &OS) const override;
};
void freePhysRegs(const RegisterRenamingInfo &Entry,
MutableArrayRef<unsigned> FreedPhysRegs);
+ // Collects writes that are in a RAW dependency with RS.
+ // This method is called from `addRegisterRead()`.
+ void collectWrites(const ReadState &RS,
+ SmallVectorImpl<WriteRef> &Writes) const;
+
// Create an instance of RegisterMappingTracker for every register file
// specified by the processor model.
// If no register file is specified, then this method creates a default
// No physical regiser is allocated if this write is from a zero-idiom.
void addRegisterWrite(WriteRef Write, MutableArrayRef<unsigned> UsedPhysRegs);
+ // Collect writes that are in a data dependency with RS, and update RS
+ // internal state.
+ void addRegisterRead(ReadState &RS, SmallVectorImpl<WriteRef> &Writes) const;
+
// Removes write \param WS from the register mappings.
// Physical registers may be released to reflect this update.
// No registers are released if this write is from a zero-idiom.
// If RS is a read from a zero register, and WS is eliminated, then
// `WS.WritesZero` is also set, so that method addRegisterWrite() would not
// reserve a physical register for it.
- bool tryEliminateMove(WriteState &WS, const ReadState &RS);
+ bool tryEliminateMove(WriteState &WS, ReadState &RS);
// Checks if there are enough physical registers in the register files.
// Returns a "response mask" where each bit represents the response from a
// Current implementation can simulate up to 32 register files (including the
// special register file at index #0).
unsigned isAvailable(ArrayRef<unsigned> Regs) const;
- void collectWrites(SmallVectorImpl<WriteRef> &Writes, unsigned RegID) const;
+
+ // Returns the number of PRFs implemented by this processor.
unsigned getNumRegisterFiles() const { return RegisterFiles.size(); }
// Notify each PRF that a new cycle just started.
// field RegisterID from WD.
unsigned RegisterID;
+ // Physical register file that serves register RegisterID.
+ unsigned PRFID;
+
// True if this write implicitly clears the upper portion of RegisterID's
// super-registers.
bool ClearsSuperRegs;
WriteState(const WriteDescriptor &Desc, unsigned RegID,
bool clearsSuperRegs = false, bool writesZero = false)
: WD(&Desc), CyclesLeft(UNKNOWN_CYCLES), RegisterID(RegID),
- ClearsSuperRegs(clearsSuperRegs), WritesZero(writesZero),
+ PRFID(0), ClearsSuperRegs(clearsSuperRegs), WritesZero(writesZero),
IsEliminated(false), DependentWrite(nullptr), NumWriteUsers(0U) {}
WriteState(const WriteState &Other) = default;
int getCyclesLeft() const { return CyclesLeft; }
unsigned getWriteResourceID() const { return WD->SClassOrWriteResourceID; }
unsigned getRegisterID() const { return RegisterID; }
+ unsigned getRegisterFileID() const { return PRFID; }
unsigned getLatency() const { return WD->Latency; }
void addUser(ReadState *Use, int ReadAdvance);
IsEliminated = true;
}
+ void setPRF(unsigned PRF) { PRFID = PRF; }
+
// On every cycle, update CyclesLeft and notify dependent users.
void cycleEvent();
void onInstructionIssued();
const ReadDescriptor *RD;
// Physical register identified associated to this read.
unsigned RegisterID;
+ // Physical register file that serves register RegisterID.
+ unsigned PRFID;
// Number of writes that contribute to the definition of RegisterID.
// In the absence of partial register updates, the number of DependentWrites
// cannot be more than one.
// This field is set to true only if there are no dependent writes, and
// there are no `CyclesLeft' to wait.
bool IsReady;
+ // True if this is a read from a known zero register.
+ bool IsZero;
// True if this register read is from a dependency-breaking instruction.
bool IndependentFromDef;
public:
ReadState(const ReadDescriptor &Desc, unsigned RegID)
- : RD(&Desc), RegisterID(RegID), DependentWrites(0),
+ : RD(&Desc), RegisterID(RegID), PRFID(0), DependentWrites(0),
CyclesLeft(UNKNOWN_CYCLES), TotalCycles(0), IsReady(true),
- IndependentFromDef(false) {}
+ IsZero(false), IndependentFromDef(false) {}
const ReadDescriptor &getDescriptor() const { return *RD; }
unsigned getSchedClass() const { return RD->SchedClassID; }
unsigned getRegisterID() const { return RegisterID; }
+ unsigned getRegisterFileID() const { return PRFID; }
bool isReady() const { return IsReady; }
bool isImplicitRead() const { return RD->isImplicitRead(); }
DependentWrites = Writes;
IsReady = !Writes;
}
+
+ bool isReadZero() const { return IsZero; }
+ void setReadZero() { IsZero = true; }
+ void setPRF(unsigned ID) { PRFID = ID; }
};
/// A sequence of cycles.
ArrayRef<unsigned> UsedPhysRegs,
unsigned uOps) const;
- void collectWrites(SmallVectorImpl<WriteRef> &Vec, unsigned RegID) const {
- return PRF.collectWrites(Vec, RegID);
- }
-
public:
DispatchStage(const MCSubtargetInfo &Subtarget, const MCRegisterInfo &MRI,
unsigned MaxDispatchWidth, RetireControlUnit &R,
bool IsEliminated = WS.isEliminated();
bool ShouldAllocatePhysRegs = !IsWriteZero && !IsEliminated;
const RegisterRenamingInfo &RRI = RegisterMappings[RegID].second;
+ WS.setPRF(RRI.IndexPlusCost.first);
if (RRI.RenameAs && RRI.RenameAs != RegID) {
RegID = RRI.RenameAs;
RegisterMappings[*I].second.AliasRegID = 0U;
}
- // No physical registers are allocated for instructions that are optimized in
- // hardware. For example, zero-latency data-dependency breaking instructions
- // don't consume physical registers.
+ // No physical registers are allocated for instructions that are optimized
+ // in hardware. For example, zero-latency data-dependency breaking
+ // instructions don't consume physical registers.
if (ShouldAllocatePhysRegs)
allocatePhysRegs(RegisterMappings[RegID].second, UsedPhysRegs);
}
}
}
-bool RegisterFile::tryEliminateMove(WriteState &WS, const ReadState &RS) {
+bool RegisterFile::tryEliminateMove(WriteState &WS, ReadState &RS) {
const RegisterMapping &RMFrom = RegisterMappings[RS.getRegisterID()];
const RegisterMapping &RMTo = RegisterMappings[WS.getRegisterID()];
}
RMT.NumMoveEliminated++;
- if (IsZeroMove)
+ if (IsZeroMove) {
WS.setWriteZero();
+ RS.setReadZero();
+ }
WS.setEliminated();
return true;
}
-void RegisterFile::collectWrites(SmallVectorImpl<WriteRef> &Writes,
- unsigned RegID) const {
+void RegisterFile::collectWrites(const ReadState &RS,
+ SmallVectorImpl<WriteRef> &Writes) const {
+ unsigned RegID = RS.getRegisterID();
assert(RegID && RegID < RegisterMappings.size());
LLVM_DEBUG(dbgs() << "RegisterFile: collecting writes for register "
<< MRI.getName(RegID) << '\n');
}
// Remove duplicate entries and resize the input vector.
- sort(Writes, [](const WriteRef &Lhs, const WriteRef &Rhs) {
- return Lhs.getWriteState() < Rhs.getWriteState();
- });
- auto It = std::unique(Writes.begin(), Writes.end());
- Writes.resize(std::distance(Writes.begin(), It));
+ if (Writes.size() > 1) {
+ sort(Writes, [](const WriteRef &Lhs, const WriteRef &Rhs) {
+ return Lhs.getWriteState() < Rhs.getWriteState();
+ });
+ auto It = std::unique(Writes.begin(), Writes.end());
+ Writes.resize(std::distance(Writes.begin(), It));
+ }
LLVM_DEBUG({
for (const WriteRef &WR : Writes) {
});
}
+void RegisterFile::addRegisterRead(ReadState &RS,
+ SmallVectorImpl<WriteRef> &Defs) const {
+ unsigned RegID = RS.getRegisterID();
+ const RegisterRenamingInfo &RRI = RegisterMappings[RegID].second;
+ RS.setPRF(RRI.IndexPlusCost.first);
+ if (RS.isIndependentFromDef())
+ return;
+
+ if (ZeroRegisters[RS.getRegisterID()])
+ RS.setReadZero();
+ collectWrites(RS, Defs);
+ RS.setDependentWrites(Defs.size());
+}
+
unsigned RegisterFile::isAvailable(ArrayRef<unsigned> Regs) const {
SmallVector<unsigned, 4> NumPhysRegs(getNumRegisterFiles());
const MCSubtargetInfo &STI) {
SmallVector<WriteRef, 4> DependentWrites;
- collectWrites(DependentWrites, RS.getRegisterID());
- RS.setDependentWrites(DependentWrites.size());
+ // Collect all the dependent writes, and update RS internal state.
+ PRF.addRegisterRead(RS, DependentWrites);
+
// We know that this read depends on all the writes in DependentWrites.
// For each write, check if we have ReadAdvance information, and use it
// to figure out in how many cycles this read becomes available.
// We also don't update data dependencies for instructions that have been
// eliminated at register renaming stage.
if (!IsEliminated) {
- for (ReadState &RS : IS.getUses()) {
- if (!RS.isIndependentFromDef())
- updateRAWDependencies(RS, STI);
- }
+ for (ReadState &RS : IS.getUses())
+ updateRAWDependencies(RS, STI);
}
// By default, a dependency-breaking zero-idiom is expected to be optimized
// to the instruction.
SmallVector<unsigned, 4> RegisterFiles(PRF.getNumRegisterFiles());
for (WriteState &WS : IS.getDefs())
- PRF.addRegisterWrite(WriteRef(IR.getSourceIndex(), &WS),
- RegisterFiles);
+ PRF.addRegisterWrite(WriteRef(IR.getSourceIndex(), &WS), RegisterFiles);
// Reserve slots in the RCU, and notify the instruction that it has been
// dispatched to the schedulers for execution.