}
};
+/// Specify the cost of a register definition in terms of number of physical
+/// register allocated at register renaming stage. For example, AMD Jaguar.
+/// natively supports 128-bit data types, and operations on 256-bit registers
+/// (i.e. YMM registers) are internally split into two COPs (complex operations)
+/// and each COP updates a physical register. Basically, on Jaguar, a YMM
+/// register write effectively consumes two physical registers. That means,
+/// the cost of a YMM write in the BtVer2 model is 2.
+struct MCRegisterCostEntry {
+ unsigned RegisterClassID;
+ unsigned Cost;
+};
+
+/// A register file descriptor.
+///
+/// This struct allows to describe processor register files. In particular, it
+/// helps describing the size of the register file, as well as the cost of
+/// allocating a register file at register renaming stage.
+/// FIXME: this struct can be extended to provide information about the number
+/// of read/write ports to the register file. A value of zero for field
+/// 'NumPhysRegs' means: this register file has an unbounded number of physical
+/// registers.
+struct MCRegisterFileDesc {
+ const char *Name;
+ uint16_t NumPhysRegs;
+ uint16_t NumRegisterCostEntries;
+ // Index of the first cost entry in MCExtraProcessorInfo::RegisterCostTable.
+ uint16_t RegisterCostEntryIdx;
+};
+
+/// Provide extra details about the machine processor.
+///
+/// This is a collection of "optional" processor information that is not
+/// normally used by the LLVM machine schedulers, but that can be consumed by
+/// external tools like llvm-mca to improve the quality of the peformance
+/// analysis.
+/// In future, the plan is to extend this struct with extra information (for
+/// example: maximum number of instructions retired per cycle; actual size of
+/// the reorder buffer; etc.).
+struct MCExtraProcessorInfo {
+ const MCRegisterFileDesc *RegisterFiles;
+ unsigned NumRegisterFiles;
+ const MCRegisterCostEntry *RegisterCostTable;
+ unsigned NumRegisterCostEntries;
+};
+
/// Machine model for scheduling, bundling, and heuristics.
///
/// The machine model directly provides basic information about the
friend class InstrItineraryData;
const InstrItinerary *InstrItineraries;
+ const MCExtraProcessorInfo *ExtraProcessorInfo;
+
+ bool hasExtraProcessorInfo() const { return ExtraProcessorInfo; }
+
unsigned getProcessorID() const { return ProcID; }
/// Does this machine model include instruction-level scheduling.
bool hasInstrSchedModel() const { return SchedClassTable; }
+ const MCExtraProcessorInfo &getExtraProcessorInfo() const {
+ assert(hasExtraProcessorInfo() &&
+ "No extra information available for this model");
+ return *ExtraProcessorInfo;
+ }
+
/// Return true if this machine model data for all instructions with a
/// scheduling class (itinerary class or SchedRW list).
bool isComplete() const { return CompleteModel; }
SchedReadWrite AliasRW = alias;
SchedMachineModel SchedModel = ?;
}
+
+// Alow the definition of processor register files.
+// Each processor register file declares the number of physical registers, as
+// well as a optional register cost information. The cost of a register R is the
+// number of physical registers used to rename R (at register renaming stage).
+// That value defaults to 1, to all the registers contained in the register
+// file. The set of target register files is inferred from the list of register
+// classes. Register costs are defined at register class granularity. An empty
+// list of register classes means that this register file contains all the
+// registers defined by the target.
+class RegisterFile<int numPhysRegs, list<RegisterClass> Classes = [],
+ list<int> Costs = []> {
+ list<RegisterClass> RegClasses = Classes;
+ list<int> RegCosts = Costs;
+ int NumPhysRegs = numPhysRegs;
+ SchedMachineModel SchedModel = ?;
+}
def JFPU0 : ProcResource<1>; // Vector/FPU Pipe0: VALU0/VIMUL/FPA
def JFPU1 : ProcResource<1>; // Vector/FPU Pipe1: VALU1/STC/FPM
+// The Integer PRF for Jaguar is 64 entries, and it holds the architectural and
+// speculative version of the 64-bit integer registers.
+// Reference: www.realworldtech.com/jaguar/4/
+def IntegerPRF : RegisterFile<64, [GR8, GR16, GR32, GR64, CCR]>;
+
+// The Jaguar FP Retire Queue renames SIMD and FP uOps onto a pool of 72 SSE
+// registers. Operations on 256-bit data types are cracked into two COPs.
+// Reference: www.realworldtech.com/jaguar/4/
+def FpuPRF: RegisterFile<72, [VR64, VR128, VR256], [1, 1, 2]>;
+
// Integer Pipe Scheduler
def JALU01 : ProcResGroup<[JALU0, JALU1]> {
let BufferSize=20;
# CHECK: Iterations: 5
# CHECK-NEXT: Instructions: 10
+
# CHECK: Dynamic Dispatch Stall Cycles:
# CHECK-NEXT: RAT - Register unavailable: 0
+# CHECK-NEXT: RCU - Retire tokens unavailable: 0
+# CHECK-NEXT: SCHEDQ - Scheduler full: 0
+# CHECK-NEXT: LQ - Load queue full: 0
+# CHECK-NEXT: SQ - Store queue full: 0
+# CHECK-NEXT: GROUP - Static restrictions on the dispatch group: 0
+
+
+# CHECK: Register File statistics:
+# CHECK-NEXT: Total number of mappings created: 10
+# CHECK-NEXT: Max number of mappings used: 10
+
+# CHECK: * Register File #1 -- FpuPRF:
+# CHECK-NEXT: Number of physical registers: 72
+# CHECK-NEXT: Total number of mappings created: 10
+# CHECK-NEXT: Max number of mappings used: 10
+
+# CHECK: * Register File #2 -- IntegerPRF:
+# CHECK-NEXT: Number of physical registers: 64
+# CHECK-NEXT: Total number of mappings created: 0
+# CHECK-NEXT: Max number of mappings used: 0
-# CHECK: Register File statistics.
-# CHECK-NEXT: Register File #0
-# CHECK-NEXT: Total number of mappings created: 10
-# CHECK-NEXT: Max number of mappings used: 10
# CHECK: Timeline view:
# CHECK-NEXT: 0123456789
# CHECK: Iterations: 5
# CHECK-NEXT: Instructions: 10
-# CHECK: Dynamic Dispatch Stall Cycles:
+
+# CHECK: Dynamic Dispatch Stall Cycles:
# CHECK-NEXT: RAT - Register unavailable: 13
+# CHECK-NEXT: RCU - Retire tokens unavailable: 0
+# CHECK-NEXT: SCHEDQ - Scheduler full: 0
+# CHECK-NEXT: LQ - Load queue full: 0
+# CHECK-NEXT: SQ - Store queue full: 0
+# CHECK-NEXT: GROUP - Static restrictions on the dispatch group: 0
+
+
+# CHECK: Register File statistics:
+# CHECK-NEXT: Total number of mappings created: 10
+# CHECK-NEXT: Max number of mappings used: 5
+
+# CHECK: * Register File #1 -- FpuPRF:
+# CHECK-NEXT: Number of physical registers: 72
+# CHECK-NEXT: Total number of mappings created: 10
+# CHECK-NEXT: Max number of mappings used: 5
+
+# CHECK: * Register File #2 -- IntegerPRF:
+# CHECK-NEXT: Number of physical registers: 64
+# CHECK-NEXT: Total number of mappings created: 0
+# CHECK-NEXT: Max number of mappings used: 0
-# CHECK: Register File statistics.
-# CHECK-NEXT: Register File #0
-# CHECK-NEXT: Total number of mappings created: 10
-# CHECK-NEXT: Max number of mappings used: 5
# CHECK: Timeline view:
# CHECK-NEXT: 0123456789
# CHECK-NEXT: 2 25 25.00 * idivl %eax
-# CHECK: RAT - Register unavailable: 26
-
-# CHECK: Register File statistics.
-# CHECK-NEXT: Register File #0
+# CHECK: Dynamic Dispatch Stall Cycles:
+# CHECK-NEXT: RAT - Register unavailable: 26
+# CHECK-NEXT: RCU - Retire tokens unavailable: 0
+# CHECK-NEXT: SCHEDQ - Scheduler full: 0
+# CHECK-NEXT: LQ - Load queue full: 0
+# CHECK-NEXT: SQ - Store queue full: 0
+# CHECK-NEXT: GROUP - Static restrictions on the dispatch group: 0
+
+
+# CHECK: Register File statistics:
+# CHECK-NEXT: Total number of mappings created: 6
+# CHECK-NEXT: Max number of mappings used: 3
+
+# CHECK: * Register File #1 -- FpuPRF:
+# CHECK-NEXT: Number of physical registers: 72
+# CHECK-NEXT: Total number of mappings created: 0
+# CHECK-NEXT: Max number of mappings used: 0
+
+# CHECK: * Register File #2 -- IntegerPRF:
+# CHECK-NEXT: Number of physical registers: 64
# CHECK-NEXT: Total number of mappings created: 6
# CHECK-NEXT: Max number of mappings used: 3
--- /dev/null
+# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=btver2 -iterations=22 -verbose -timeline -timeline-max-iterations=3 < %s | FileCheck %s
+
+idiv %eax
+
+# CHECK: Iterations: 22
+# CHECK-NEXT: Instructions: 22
+
+# CHECK: Instruction Info:
+# CHECK-NEXT: [1]: #uOps
+# CHECK-NEXT: [2]: Latency
+# CHECK-NEXT: [3]: RThroughput
+# CHECK-NEXT: [4]: MayLoad
+# CHECK-NEXT: [5]: MayStore
+# CHECK-NEXT: [6]: HasSideEffects
+
+# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
+# CHECK-NEXT: 2 25 25.00 * idivl %eax
+
+
+# CHECK: Dynamic Dispatch Stall Cycles:
+# CHECK-NEXT: RAT - Register unavailable: 6
+# CHECK-NEXT: RCU - Retire tokens unavailable: 0
+# CHECK-NEXT: SCHEDQ - Scheduler full: 0
+# CHECK-NEXT: LQ - Load queue full: 0
+# CHECK-NEXT: SQ - Store queue full: 0
+# CHECK-NEXT: GROUP - Static restrictions on the dispatch group: 0
+
+
+# CHECK: Register File statistics:
+# CHECK-NEXT: Total number of mappings created: 66
+# CHECK-NEXT: Max number of mappings used: 63
+
+# CHECK: * Register File #1 -- FpuPRF:
+# CHECK-NEXT: Number of physical registers: 72
+# CHECK-NEXT: Total number of mappings created: 0
+# CHECK-NEXT: Max number of mappings used: 0
+
+# CHECK: * Register File #2 -- IntegerPRF:
+# CHECK-NEXT: Number of physical registers: 64
+# CHECK-NEXT: Total number of mappings created: 66
+# CHECK-NEXT: Max number of mappings used: 63
+
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0123456789 0123456789 01234567
+# CHECK-NEXT: Index 0123456789 0123456789 0123456789 0123456789
+# CHECK: [0,0] DeeeeeeeeeeeeeeeeeeeeeeeeeER . . . . . . . . . . . idivl %eax
+# CHECK: [1,0] .D========================eeeeeeeeeeeeeeeeeeeeeeeeeER . . . . . . idivl %eax
+# CHECK: [2,0] . D================================================eeeeeeeeeeeeeeeeeeeeeeeeeER idivl %eax
--- /dev/null
+# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=btver2 -iterations=1 -resource-pressure=false -instruction-info=false -verbose -timeline < %s | FileCheck %s
+
+ vdivps %ymm0, %ymm0, %ymm1
+ vaddps %ymm0, %ymm0, %ymm2
+ vaddps %ymm0, %ymm0, %ymm3
+ vaddps %ymm0, %ymm0, %ymm4
+ vaddps %ymm0, %ymm0, %ymm5
+ vaddps %ymm0, %ymm0, %ymm6
+ vaddps %ymm0, %ymm0, %ymm7
+ vaddps %ymm0, %ymm0, %ymm8
+ vaddps %ymm0, %ymm0, %ymm9
+ vaddps %ymm0, %ymm0, %ymm10
+ vaddps %ymm0, %ymm0, %ymm11
+ vaddps %ymm0, %ymm0, %ymm12
+ vaddps %ymm0, %ymm0, %ymm13
+ vaddps %ymm0, %ymm0, %ymm14
+ vaddps %ymm0, %ymm0, %ymm15
+ vaddps %ymm2, %ymm0, %ymm0
+ vaddps %ymm2, %ymm0, %ymm3
+ vaddps %ymm2, %ymm0, %ymm4
+ vaddps %ymm2, %ymm0, %ymm5
+ vaddps %ymm2, %ymm0, %ymm6
+ vaddps %ymm2, %ymm0, %ymm7
+ vaddps %ymm2, %ymm0, %ymm8
+ vaddps %ymm2, %ymm0, %ymm9
+ vaddps %ymm2, %ymm0, %ymm10
+ vaddps %ymm2, %ymm0, %ymm11
+ vaddps %ymm2, %ymm0, %ymm12
+ vaddps %ymm2, %ymm0, %ymm13
+ vaddps %ymm2, %ymm0, %ymm14
+ vaddps %ymm2, %ymm0, %ymm15
+ vaddps %ymm3, %ymm0, %ymm2
+ vaddps %ymm3, %ymm0, %ymm4
+ vaddps %ymm3, %ymm0, %ymm5
+ vaddps %ymm3, %ymm0, %ymm6
+
+
+# CHECK: Iterations: 1
+# CHECK-NEXT: Instructions: 33
+# CHECK-NEXT: Total Cycles: 70
+# CHECK-NEXT: Dispatch Width: 2
+# CHECK-NEXT: IPC: 0.47
+
+
+# CHECK: Dynamic Dispatch Stall Cycles:
+# CHECK-NEXT: RAT - Register unavailable: 0
+# CHECK-NEXT: RCU - Retire tokens unavailable: 8
+# CHECK-NEXT: SCHEDQ - Scheduler full: 0
+# CHECK-NEXT: LQ - Load queue full: 0
+# CHECK-NEXT: SQ - Store queue full: 0
+# CHECK-NEXT: GROUP - Static restrictions on the dispatch group: 0
+
+
+# CHECK: Register File statistics:
+# CHECK-NEXT: Total number of mappings created: 66
+# CHECK-NEXT: Max number of mappings used: 64
+
+# CHECK: * Register File #1 -- FpuPRF:
+# CHECK-NEXT: Number of physical registers: 72
+# CHECK-NEXT: Total number of mappings created: 66
+# CHECK-NEXT: Max number of mappings used: 64
+
+# CHECK: * Register File #2 -- IntegerPRF:
+# CHECK-NEXT: Number of physical registers: 64
+# CHECK-NEXT: Total number of mappings created: 0
+# CHECK-NEXT: Max number of mappings used: 0
+
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0123456789 0123456789
+# CHECK-NEXT: Index 0123456789 0123456789 0123456789 0123456789
+
+# CHECK: [0,0] DeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeER . . . . . . vdivps %ymm0, %ymm0, %ymm1
+# CHECK-NEXT: [0,1] .DeeeE----------------------------------R . . . . . . vaddps %ymm0, %ymm0, %ymm2
+# CHECK-NEXT: [0,2] . D=eeeE--------------------------------R . . . . . . vaddps %ymm0, %ymm0, %ymm3
+# CHECK-NEXT: [0,3] . D==eeeE------------------------------R . . . . . . vaddps %ymm0, %ymm0, %ymm4
+# CHECK-NEXT: [0,4] . D===eeeE----------------------------R . . . . . . vaddps %ymm0, %ymm0, %ymm5
+# CHECK-NEXT: [0,5] . D====eeeE--------------------------R . . . . . . vaddps %ymm0, %ymm0, %ymm6
+# CHECK-NEXT: [0,6] . .D=====eeeE------------------------R . . . . . . vaddps %ymm0, %ymm0, %ymm7
+# CHECK-NEXT: [0,7] . . D======eeeE----------------------R . . . . . . vaddps %ymm0, %ymm0, %ymm8
+# CHECK-NEXT: [0,8] . . D=======eeeE--------------------R . . . . . . vaddps %ymm0, %ymm0, %ymm9
+# CHECK-NEXT: [0,9] . . D========eeeE------------------R . . . . . . vaddps %ymm0, %ymm0, %ymm10
+# CHECK-NEXT: [0,10] . . D=========eeeE----------------R . . . . . . vaddps %ymm0, %ymm0, %ymm11
+# CHECK-NEXT: [0,11] . . .D==========eeeE--------------R . . . . . . vaddps %ymm0, %ymm0, %ymm12
+# CHECK-NEXT: [0,12] . . . D===========eeeE------------R . . . . . . vaddps %ymm0, %ymm0, %ymm13
+# CHECK-NEXT: [0,13] . . . D============eeeE----------R . . . . . . vaddps %ymm0, %ymm0, %ymm14
+# CHECK-NEXT: [0,14] . . . D=============eeeE--------R . . . . . . vaddps %ymm0, %ymm0, %ymm15
+# CHECK-NEXT: [0,15] . . . D==============eeeE------R . . . . . . vaddps %ymm2, %ymm0, %ymm0
+# CHECK-NEXT: [0,16] . . . .D================eeeE---R . . . . . . vaddps %ymm2, %ymm0, %ymm3
+# CHECK-NEXT: [0,17] . . . . D=================eeeE-R . . . . . . vaddps %ymm2, %ymm0, %ymm4
+# CHECK-NEXT: [0,18] . . . . D==================eeeER . . . . . . vaddps %ymm2, %ymm0, %ymm5
+# CHECK-NEXT: [0,19] . . . . D===================eeeER . . . . . . vaddps %ymm2, %ymm0, %ymm6
+# CHECK-NEXT: [0,20] . . . . D====================eeeER . . . . . vaddps %ymm2, %ymm0, %ymm7
+# CHECK-NEXT: [0,21] . . . . .D=====================eeeER . . . . . vaddps %ymm2, %ymm0, %ymm8
+# CHECK-NEXT: [0,22] . . . . . D======================eeeER. . . . . vaddps %ymm2, %ymm0, %ymm9
+# CHECK-NEXT: [0,23] . . . . . D=======================eeeER . . . . vaddps %ymm2, %ymm0, %ymm10
+# CHECK-NEXT: [0,24] . . . . . D========================eeeER . . . . vaddps %ymm2, %ymm0, %ymm11
+# CHECK-NEXT: [0,25] . . . . . D=========================eeeER . . . vaddps %ymm2, %ymm0, %ymm12
+# CHECK-NEXT: [0,26] . . . . . .D==========================eeeER . . . vaddps %ymm2, %ymm0, %ymm13
+# CHECK-NEXT: [0,27] . . . . . . D===========================eeeER. . . vaddps %ymm2, %ymm0, %ymm14
+# CHECK-NEXT: [0,28] . . . . . . D============================eeeER . . vaddps %ymm2, %ymm0, %ymm15
+# CHECK-NEXT: [0,29] . . . . . . D=============================eeeER . . vaddps %ymm3, %ymm0, %ymm2
+# CHECK-NEXT: [0,30] . . . . . . D==============================eeeER . vaddps %ymm3, %ymm0, %ymm4
+# CHECK-NEXT: [0,31] . . . . . . .D===============================eeeER . vaddps %ymm3, %ymm0, %ymm5
+# CHECK-NEXT: [0,32] . . . . . . . . D========================eeeER vaddps %ymm3, %ymm0, %ymm6
LoadQueueSize, StoreQueueSize,
AssumeNoAlias)),
DU(llvm::make_unique<DispatchUnit>(
- this, MRI, Subtarget.getSchedModel().MicroOpBufferSize,
+ this, STI, MRI, Subtarget.getSchedModel().MicroOpBufferSize,
RegisterFileSize, MaxRetirePerCycle, DispatchWidth, HWS.get())),
SM(Source), Cycles(0) {
HWS->setDispatchUnit(DU.get());
namespace mca {
+void BackendStatistics::initializeRegisterFileInfo() {
+ const MCSchedModel &SM = STI.getSchedModel();
+ RegisterFileUsage Empty = {0, 0, 0};
+ if (!SM.hasExtraProcessorInfo()) {
+ // Assume a single register file.
+ RegisterFiles.emplace_back(Empty);
+ return;
+ }
+
+ // Initialize a RegisterFileUsage for every user defined register file, plus
+ // the default register file which is always at index #0.
+ const MCExtraProcessorInfo &PI = SM.getExtraProcessorInfo();
+ // There is always an "InvalidRegisterFile" entry in tablegen. That entry can
+ // be skipped. If there are no user defined register files, then reserve a
+ // single entry for the default register file at index #0.
+ unsigned NumRegFiles = std::max(PI.NumRegisterFiles, 1U);
+ RegisterFiles.resize(NumRegFiles);
+ std::fill(RegisterFiles.begin(), RegisterFiles.end(), Empty);
+}
+
void BackendStatistics::onInstructionEvent(const HWInstructionEvent &Event) {
switch (Event.Type) {
default:
std::string Buffer;
raw_string_ostream TempStream(Buffer);
- TempStream << "\n\nRegister File statistics.";
- for (unsigned I = 0, E = RegisterFiles.size(); I < E; ++I) {
+ TempStream << "\n\nRegister File statistics:";
+ const RegisterFileUsage &GlobalUsage = RegisterFiles[0];
+ TempStream << "\nTotal number of mappings created: "
+ << GlobalUsage.TotalMappings;
+ TempStream << "\nMax number of mappings used: "
+ << GlobalUsage.MaxUsedMappings << '\n';
+
+ for (unsigned I = 1, E = RegisterFiles.size(); I < E; ++I) {
const RegisterFileUsage &RFU = RegisterFiles[I];
- TempStream << "\nRegister File #" << I;
- TempStream << "\n Total number of mappings created: " << RFU.TotalMappings;
- TempStream << "\n Max number of mappings used: "
- << RFU.MaxUsedMappings;
+ // Obtain the register file descriptor from the scheduling model.
+ assert(STI.getSchedModel().hasExtraProcessorInfo() &&
+ "Unable to find register file info!");
+ const MCExtraProcessorInfo &PI =
+ STI.getSchedModel().getExtraProcessorInfo();
+ assert(I <= PI.NumRegisterFiles && "Unexpected register file index!");
+ const MCRegisterFileDesc &RFDesc = PI.RegisterFiles[I];
+ // Skip invalid register files.
+ if (!RFDesc.NumPhysRegs)
+ continue;
+
+ TempStream << "\n* Register File #" << I;
+ TempStream << " -- " << StringRef(RFDesc.Name) << ':';
+ TempStream << "\n Number of physical registers: ";
+ if (!RFDesc.NumPhysRegs)
+ TempStream << "unbounded";
+ else
+ TempStream << RFDesc.NumPhysRegs;
+ TempStream << "\n Total number of mappings created: " << RFU.TotalMappings;
+ TempStream << "\n Max number of mappings used: "
+ << RFU.MaxUsedMappings << '\n';
}
TempStream.flush();
// There is one entry for each register file implemented by the processor.
llvm::SmallVector<RegisterFileUsage, 4> RegisterFiles;
+ void initializeRegisterFileInfo();
+
void printRetireUnitStatistics(llvm::raw_ostream &OS) const;
void printDispatchUnitStatistics(llvm::raw_ostream &OS) const;
void printSchedulerStatistics(llvm::raw_ostream &OS) const;
public:
BackendStatistics(const llvm::MCSubtargetInfo &sti)
: STI(sti), NumDispatched(0), NumIssued(0), NumRetired(0), NumCycles(0),
- HWStalls(HWStallEvent::LastGenericEvent),
- // TODO: The view currently assumes a single register file. This will
- // change in future.
- RegisterFiles(1) {}
+ HWStalls(HWStallEvent::LastGenericEvent) {
+ initializeRegisterFileInfo();
+ }
void onInstructionEvent(const HWInstructionEvent &Event) override;
namespace mca {
-void RegisterFile::addRegisterFile(ArrayRef<unsigned> RegisterClasses,
- unsigned NumTemps) {
+void RegisterFile::initialize(const MCSchedModel &SM, unsigned NumRegs) {
+ // Create a default register file that "sees" all the machine registers
+ // declared by the target. The number of physical registers in the default
+ // register file is set equal to `NumRegs`. A value of zero for `NumRegs`
+ // means: this register file has an unbounded number of physical registers.
+ addRegisterFile({} /* all registers */, NumRegs);
+ if (!SM.hasExtraProcessorInfo())
+ return;
+
+ // For each user defined register file, allocate a RegisterMappingTracker
+ // object. The size of every register file, as well as the mapping between
+ // register files and register classes is specified via tablegen.
+ const MCExtraProcessorInfo &Info = SM.getExtraProcessorInfo();
+ for (unsigned I = 0, E = Info.NumRegisterFiles; I < E; ++I) {
+ const MCRegisterFileDesc &RF = Info.RegisterFiles[I];
+ // Skip invalid register files with zero physical registers.
+ unsigned Length = RF.NumRegisterCostEntries;
+ if (!RF.NumPhysRegs)
+ continue;
+ // The cost of a register definition is equivalent to the number of
+ // physical registers that are allocated at register renaming stage.
+ const MCRegisterCostEntry *FirstElt =
+ &Info.RegisterCostTable[RF.RegisterCostEntryIdx];
+ addRegisterFile(ArrayRef<MCRegisterCostEntry>(FirstElt, Length),
+ RF.NumPhysRegs);
+ }
+}
+
+void RegisterFile::addRegisterFile(ArrayRef<MCRegisterCostEntry> Entries,
+ unsigned NumPhysRegs) {
+ // A default register file is always allocated at index #0. That register file
+ // is mainly used to count the total number of mappings created by all
+ // register files at runtime. Users can limit the number of available physical
+ // registers in register file #0 through the command line flag
+ // `-register-file-size`.
unsigned RegisterFileIndex = RegisterFiles.size();
- assert(RegisterFileIndex < 32 && "Too many register files!");
- RegisterFiles.emplace_back(NumTemps);
-
- // Special case where there are no register classes specified.
- // An empty register class set means *all* registers.
- if (RegisterClasses.empty()) {
- for (std::pair<WriteState *, unsigned> &Mapping : RegisterMappings)
- Mapping.second |= 1U << RegisterFileIndex;
- } else {
- for (const unsigned RegClassIndex : RegisterClasses) {
- const MCRegisterClass &RC = MRI.getRegClass(RegClassIndex);
- for (const MCPhysReg Reg : RC)
- RegisterMappings[Reg].second |= 1U << RegisterFileIndex;
+ RegisterFiles.emplace_back(NumPhysRegs);
+
+ // Special case where there is no register class identifier in the set.
+ // An empty set of register classes means: this register file contains all
+ // the physical registers specified by the target.
+ if (Entries.empty()) {
+ for (std::pair<WriteState *, IndexPlusCostPairTy> &Mapping : RegisterMappings)
+ Mapping.second = std::make_pair(RegisterFileIndex, 1U);
+ return;
+ }
+
+ // Now update the cost of individual registers.
+ for (const MCRegisterCostEntry &RCE : Entries) {
+ const MCRegisterClass &RC = MRI.getRegClass(RCE.RegisterClassID);
+ for (const MCPhysReg Reg : RC) {
+ IndexPlusCostPairTy &Entry = RegisterMappings[Reg].second;
+ if (Entry.first) {
+ // The only register file that is allowed to overlap is the default
+ // register file at index #0. The analysis is inaccurate if register
+ // files overlap.
+ errs() << "warning: register " << MRI.getName(Reg)
+ << " defined in multiple register files.";
+ }
+ Entry.first = RegisterFileIndex;
+ Entry.second = RCE.Cost;
}
}
}
-void RegisterFile::createNewMappings(unsigned RegisterFileMask,
+void RegisterFile::createNewMappings(IndexPlusCostPairTy Entry,
MutableArrayRef<unsigned> UsedPhysRegs) {
- assert(RegisterFileMask && "RegisterFileMask cannot be zero!");
- // Notify each register file that contains RegID.
- do {
- unsigned NextRegisterFile = llvm::PowerOf2Floor(RegisterFileMask);
- unsigned RegisterFileIndex = llvm::countTrailingZeros(NextRegisterFile);
+ unsigned RegisterFileIndex = Entry.first;
+ unsigned Cost = Entry.second;
+ if (RegisterFileIndex) {
RegisterMappingTracker &RMT = RegisterFiles[RegisterFileIndex];
- RMT.NumUsedMappings++;
- UsedPhysRegs[RegisterFileIndex]++;
- RegisterFileMask ^= NextRegisterFile;
- } while (RegisterFileMask);
+ RMT.NumUsedMappings += Cost;
+ UsedPhysRegs[RegisterFileIndex] += Cost;
+ }
+
+ // Now update the default register mapping tracker.
+ RegisterFiles[0].NumUsedMappings += Cost;
+ UsedPhysRegs[0] += Cost;
}
-void RegisterFile::removeMappings(unsigned RegisterFileMask,
+void RegisterFile::removeMappings(IndexPlusCostPairTy Entry,
MutableArrayRef<unsigned> FreedPhysRegs) {
- assert(RegisterFileMask && "RegisterFileMask cannot be zero!");
- // Notify each register file that contains RegID.
- do {
- unsigned NextRegisterFile = llvm::PowerOf2Floor(RegisterFileMask);
- unsigned RegisterFileIndex = llvm::countTrailingZeros(NextRegisterFile);
+ unsigned RegisterFileIndex = Entry.first;
+ unsigned Cost = Entry.second;
+ if (RegisterFileIndex) {
RegisterMappingTracker &RMT = RegisterFiles[RegisterFileIndex];
- assert(RMT.NumUsedMappings);
- RMT.NumUsedMappings--;
- FreedPhysRegs[RegisterFileIndex]++;
- RegisterFileMask ^= NextRegisterFile;
- } while (RegisterFileMask);
+ RMT.NumUsedMappings -= Cost;
+ FreedPhysRegs[RegisterFileIndex] += Cost;
+ }
+
+ // Now update the default register mapping tracker.
+ RegisterFiles[0].NumUsedMappings -= Cost;
+ FreedPhysRegs[0] += Cost;
}
void RegisterFile::addRegisterMapping(WriteState &WS,
}
unsigned RegisterFile::isAvailable(ArrayRef<unsigned> Regs) const {
- SmallVector<unsigned, 4> NumTemporaries(getNumRegisterFiles());
+ SmallVector<unsigned, 4> NumPhysRegs(getNumRegisterFiles());
// Find how many new mappings must be created for each register file.
for (const unsigned RegID : Regs) {
- unsigned RegisterFileMask = RegisterMappings[RegID].second;
- do {
- unsigned NextRegisterFileID = llvm::PowerOf2Floor(RegisterFileMask);
- NumTemporaries[llvm::countTrailingZeros(NextRegisterFileID)]++;
- RegisterFileMask ^= NextRegisterFileID;
- } while (RegisterFileMask);
+ const IndexPlusCostPairTy &Entry = RegisterMappings[RegID].second;
+ if (Entry.first)
+ NumPhysRegs[Entry.first] += Entry.second;
+ NumPhysRegs[0] += Entry.second;
}
unsigned Response = 0;
for (unsigned I = 0, E = getNumRegisterFiles(); I < E; ++I) {
- unsigned Temporaries = NumTemporaries[I];
- if (!Temporaries)
+ unsigned NumRegs = NumPhysRegs[I];
+ if (!NumRegs)
continue;
const RegisterMappingTracker &RMT = RegisterFiles[I];
if (!RMT.TotalMappings) {
- // The register file has an unbound number of microarchitectural
+ // The register file has an unbounded number of microarchitectural
// registers.
continue;
}
- if (RMT.TotalMappings < Temporaries) {
+ if (RMT.TotalMappings < NumRegs) {
// The current register file is too small. This may occur if the number of
// microarchitectural registers in register file #0 was changed by the
// users via flag -reg-file-size. Alternatively, the scheduling model
"Not enough microarchitectural registers in the register file");
}
- if (RMT.TotalMappings < RMT.NumUsedMappings + Temporaries)
+ if (RMT.TotalMappings < (RMT.NumUsedMappings + NumRegs))
Response |= (1U << I);
}
void RegisterFile::dump() const {
for (unsigned I = 0, E = MRI.getNumRegs(); I < E; ++I) {
const RegisterMapping &RM = RegisterMappings[I];
- dbgs() << MRI.getName(I) << ", " << I << ", Map=" << RM.second << ", ";
+ dbgs() << MRI.getName(I) << ", " << I << ", Map=" << RM.second.first
+ << ", ";
if (RM.first)
RM.first->dump();
else
// This is where information related to the various register files is kept.
// This set always contains at least one register file at index #0. That
// register file "sees" all the physical registers declared by the target, and
- // (by default) it allows an unbound number of mappings.
+ // (by default) it allows an unbounded number of mappings.
// Users can limit the number of mappings that can be created by register file
// #0 through the command line flag `-register-file-size`.
llvm::SmallVector<RegisterMappingTracker, 4> RegisterFiles;
+ // This pair is used to identify the owner of a physical register, as well as
+ // the cost of using that register file.
+ using IndexPlusCostPairTy = std::pair<unsigned, unsigned>;
+
// RegisterMapping objects are mainly used to track physical register
// definitions. A WriteState object describes a register definition, and it is
// used to track RAW dependencies (see Instruction.h). A RegisterMapping
// object also specifies the set of register files. The mapping between
// physreg and register files is done using a "register file mask".
//
- // A register file mask identifies a set of register files. Each bit of the
- // mask representation references a specific register file.
- // For example:
- // 0b0001 --> Register file #0
- // 0b0010 --> Register file #1
- // 0b0100 --> Register file #2
+ // A register file index identifies a user defined register file.
+ // There is one index per RegisterMappingTracker, and index #0 is reserved to
+ // the default unified register file.
//
- // Note that this implementation allows register files to overlap.
- // The maximum number of register files allowed by this implementation is 32.
- using RegisterMapping = std::pair<WriteState *, unsigned>;
+ // This implementation does not allow overlapping register files. The only
+ // register file that is allowed to overlap with other register files is
+ // register file #0.
+ using RegisterMapping = std::pair<WriteState *, IndexPlusCostPairTy>;
// This map contains one entry for each physical register defined by the
// processor scheduling model.
// The list of register classes is then converted by the tablegen backend into
// a list of register class indices. That list, along with the number of
// available mappings, is then used to create a new RegisterMappingTracker.
- void addRegisterFile(llvm::ArrayRef<unsigned> RegisterClasses,
- unsigned NumTemps);
+ void
+ addRegisterFile(llvm::ArrayRef<llvm::MCRegisterCostEntry> RegisterClasses,
+ unsigned NumPhysRegs);
- // Allocates a new register mapping in every register file specified by the
- // register file mask. This method is called from addRegisterMapping.
- void createNewMappings(unsigned RegisterFileMask,
+ // Allocates register mappings in register file specified by the
+ // IndexPlusCostPairTy object. This method is called from addRegisterMapping.
+ void createNewMappings(IndexPlusCostPairTy IPC,
llvm::MutableArrayRef<unsigned> UsedPhysRegs);
- // Removes a previously allocated mapping from each register file in the
- // RegisterFileMask set. This method is called from invalidateRegisterMapping.
- void removeMappings(unsigned RegisterFileMask,
+ // Removes a previously allocated mapping from the register file referenced
+ // by the IndexPlusCostPairTy object. This method is called from
+ // invalidateRegisterMapping.
+ void removeMappings(IndexPlusCostPairTy IPC,
llvm::MutableArrayRef<unsigned> FreedPhysRegs);
+ // Create an instance of RegisterMappingTracker for every register file
+ // specified by the processor model.
+ // If no register file is specified, then this method creates a single
+ // register file with an unbounded number of registers.
+ void initialize(const llvm::MCSchedModel &SM, unsigned NumRegs);
+
public:
- RegisterFile(const llvm::MCRegisterInfo &mri, unsigned TempRegs = 0)
- : MRI(mri), RegisterMappings(MRI.getNumRegs(), {nullptr, 0U}) {
- addRegisterFile({}, TempRegs);
- // TODO: teach the scheduling models how to specify multiple register files.
+ RegisterFile(const llvm::MCSchedModel &SM, const llvm::MCRegisterInfo &mri,
+ unsigned NumRegs = 0)
+ : MRI(mri), RegisterMappings(mri.getNumRegs(), {nullptr, {0, 0}}) {
+ initialize(SM, NumRegs);
}
// Creates a new register mapping for RegID.
std::unique_ptr<RetireControlUnit> RCU;
Backend *Owner;
- bool checkRAT(unsigned Index, const Instruction &Desc);
+ bool checkRAT(unsigned Index, const Instruction &Inst);
bool checkRCU(unsigned Index, const InstrDesc &Desc);
bool checkScheduler(unsigned Index, const InstrDesc &Desc);
llvm::ArrayRef<unsigned> UsedPhysRegs);
public:
- DispatchUnit(Backend *B, const llvm::MCRegisterInfo &MRI,
- unsigned MicroOpBufferSize, unsigned RegisterFileSize,
- unsigned MaxRetirePerCycle, unsigned MaxDispatchWidth,
- Scheduler *Sched)
+ DispatchUnit(Backend *B, const llvm::MCSubtargetInfo &STI,
+ const llvm::MCRegisterInfo &MRI, unsigned MicroOpBufferSize,
+ unsigned RegisterFileSize, unsigned MaxRetirePerCycle,
+ unsigned MaxDispatchWidth, Scheduler *Sched)
: DispatchWidth(MaxDispatchWidth), AvailableEntries(MaxDispatchWidth),
CarryOver(0U), SC(Sched),
- RAT(llvm::make_unique<RegisterFile>(MRI, RegisterFileSize)),
+ RAT(llvm::make_unique<RegisterFile>(STI.getSchedModel(), MRI,
+ RegisterFileSize)),
RCU(llvm::make_unique<RetireControlUnit>(MicroOpBufferSize,
MaxRetirePerCycle, this)),
Owner(B) {}
DEBUG(dbgs() << "\n+++ RESOURCE DEFINITIONS (collectProcResources) +++\n");
collectProcResources();
+ // Find register file definitions for each processor.
+ collectRegisterFiles();
+
checkCompleteness();
}
}
}
+// Collect all the RegisterFile definitions available in this target.
+void CodeGenSchedModels::collectRegisterFiles() {
+ RecVec RegisterFileDefs = Records.getAllDerivedDefinitions("RegisterFile");
+
+ // RegisterFiles is the vector of CodeGenRegisterFile.
+ for (Record *RF : RegisterFileDefs) {
+ // For each register file definition, construct a CodeGenRegisterFile object
+ // and add it to the appropriate scheduling model.
+ CodeGenProcModel &PM = getProcModel(RF->getValueAsDef("SchedModel"));
+ PM.RegisterFiles.emplace_back(CodeGenRegisterFile(RF->getName(),RF));
+ CodeGenRegisterFile &CGRF = PM.RegisterFiles.back();
+
+ // Now set the number of physical registers as well as the cost of registers
+ // in each register class.
+ CGRF.NumPhysRegs = RF->getValueAsInt("NumPhysRegs");
+ RecVec RegisterClasses = RF->getValueAsListOfDefs("RegClasses");
+ std::vector<int64_t> RegisterCosts = RF->getValueAsListOfInts("RegCosts");
+ for (unsigned I = 0, E = RegisterClasses.size(); I < E; ++I) {
+ int Cost = RegisterCosts.size() > I ? RegisterCosts[I] : 1;
+ CGRF.Costs.emplace_back(RegisterClasses[I], Cost);
+ }
+ }
+}
+
// Collect and sort WriteRes, ReadAdvance, and ProcResources.
void CodeGenSchedModels::collectProcResources() {
ProcResourceDefs = Records.getAllDerivedDefinitions("ProcResourceUnits");
class CodeGenTarget;
class CodeGenSchedModels;
class CodeGenInstruction;
+class CodeGenRegisterClass;
using RecVec = std::vector<Record*>;
using RecIter = std::vector<Record*>::const_iterator;
#endif
};
+/// Represent the cost of allocating a register of register class RCDef.
+///
+/// The cost of allocating a register is equivalent to the number of physical
+/// registers used by the register renamer. Register costs are defined at
+/// register class granularity.
+struct CodeGenRegisterCost {
+ Record *RCDef;
+ unsigned Cost;
+ CodeGenRegisterCost(Record *RC, unsigned RegisterCost)
+ : RCDef(RC), Cost(RegisterCost) {}
+ CodeGenRegisterCost(const CodeGenRegisterCost &) = default;
+ CodeGenRegisterCost &operator=(const CodeGenRegisterCost &) = delete;
+};
+
+/// A processor register file.
+///
+/// This class describes a processor register file. Register file information is
+/// currently consumed by external tools like llvm-mca to predict dispatch
+/// stalls due to register pressure.
+struct CodeGenRegisterFile {
+ std::string Name;
+ Record *RegisterFileDef;
+
+ unsigned NumPhysRegs;
+ std::vector<CodeGenRegisterCost> Costs;
+
+ CodeGenRegisterFile(StringRef name, Record *def)
+ : Name(name), RegisterFileDef(def), NumPhysRegs(0) {}
+
+ bool hasDefaultCosts() const { return Costs.empty(); }
+};
+
// Processor model.
//
// ModelName is a unique name used to name an instantiation of MCSchedModel.
// Per-operand machine model resources associated with this processor.
RecVec ProcResourceDefs;
+ // List of Register Files.
+ std::vector<CodeGenRegisterFile> RegisterFiles;
+
CodeGenProcModel(unsigned Idx, std::string Name, Record *MDef,
Record *IDef) :
Index(Idx), ModelName(std::move(Name)), ModelDef(MDef), ItinsDef(IDef) {}
return !WriteResDefs.empty() || !ItinRWDefs.empty();
}
+ bool hasExtraProcessorInfo() const {
+ return !RegisterFiles.empty();
+ }
+
unsigned getProcResourceIdx(Record *PRDef) const;
bool isUnsupported(const CodeGenInstruction &Inst) const;
void collectSchedClasses();
+ void collectRegisterFiles();
+
std::string createSchedClassName(Record *ItinClassDef,
ArrayRef<unsigned> OperWrites,
ArrayRef<unsigned> OperReads);
void EmitItineraries(raw_ostream &OS,
std::vector<std::vector<InstrItinerary>>
&ProcItinLists);
+ void EmitExtraProcessorInfo(const CodeGenProcModel &ProcModel, raw_ostream &OS);
void EmitProcessorProp(raw_ostream &OS, const Record *R, StringRef Name,
char Separator);
void EmitProcessorResourceSubUnits(const CodeGenProcModel &ProcModel,
OS << "};\n";
}
+void SubtargetEmitter::EmitExtraProcessorInfo(const CodeGenProcModel &ProcModel,
+ raw_ostream &OS) {
+ if (llvm::all_of(ProcModel.RegisterFiles, [](const CodeGenRegisterFile &RF) {
+ return RF.hasDefaultCosts();
+ }))
+ return;
+
+ // Print the RegisterCost table first.
+ OS << "\n// {RegisterClassID, Register Cost}\n";
+ OS << "static const llvm::MCRegisterCostEntry " << ProcModel.ModelName
+ << "RegisterCosts"
+ << "[] = {\n";
+
+ for (const CodeGenRegisterFile &RF : ProcModel.RegisterFiles) {
+ // Skip register files with a default cost table.
+ if (RF.hasDefaultCosts())
+ continue;
+ // Add entries to the cost table.
+ for (const CodeGenRegisterCost &RC : RF.Costs) {
+ OS << " { ";
+ Record *Rec = RC.RCDef;
+ if (Rec->getValue("Namespace"))
+ OS << Rec->getValueAsString("Namespace") << "::";
+ OS << Rec->getName() << "RegClassID, " << RC.Cost << "},\n";
+ }
+ }
+ OS << "};\n";
+
+ // Now generate a table with register file info.
+ OS << "\n // {Name, #PhysRegs, #CostEntries, IndexToCostTbl}\n";
+ OS << "static const llvm::MCRegisterFileDesc " << ProcModel.ModelName
+ << "RegisterFiles"
+ << "[] = {\n"
+ << " { \"InvalidRegisterFile\", 0, 0, 0 },\n";
+ unsigned CostTblIndex = 0;
+
+ for (const CodeGenRegisterFile &RD : ProcModel.RegisterFiles) {
+ OS << " { ";
+ OS << '"' << RD.Name << '"' << ", " << RD.NumPhysRegs << ", ";
+ unsigned NumCostEntries = RD.Costs.size();
+ OS << NumCostEntries << ", " << CostTblIndex << "},\n";
+ CostTblIndex += NumCostEntries;
+ }
+ OS << "};\n";
+
+ // Now generate a table for the extra processor info.
+ OS << "\nstatic const llvm::MCExtraProcessorInfo " << ProcModel.ModelName
+ << "ExtraInfo = {\n " << ProcModel.ModelName << "RegisterFiles,\n "
+ << (1 + ProcModel.RegisterFiles.size())
+ << ", // Number of register files.\n "
+ << ProcModel.ModelName << "RegisterCosts,\n " << CostTblIndex
+ << " // Number of register cost entries.\n"
+ << "};\n";
+}
+
void SubtargetEmitter::EmitProcessorResources(const CodeGenProcModel &ProcModel,
raw_ostream &OS) {
EmitProcessorResourceSubUnits(ProcModel, OS);
void SubtargetEmitter::EmitProcessorModels(raw_ostream &OS) {
// For each processor model.
for (const CodeGenProcModel &PM : SchedModels.procModels()) {
+ // Emit extra processor info if available.
+ if (PM.hasExtraProcessorInfo())
+ EmitExtraProcessorInfo(PM, OS);
// Emit processor resource table.
if (PM.hasInstrSchedModel())
EmitProcessorResources(PM, OS);
OS << " nullptr, nullptr, 0, 0,"
<< " // No instruction-level machine model.\n";
if (PM.hasItineraries())
- OS << " " << PM.ItinsDef->getName() << "\n";
+ OS << " " << PM.ItinsDef->getName() << ",\n";
+ else
+ OS << " nullptr, // No Itinerary\n";
+ if (PM.hasExtraProcessorInfo())
+ OS << " &" << PM.ModelName << "ExtraInfo\n";
else
- OS << " nullptr // No Itinerary\n";
+ OS << " nullptr // No extra processor descriptor\n";
OS << "};\n";
}
}