--- /dev/null
+# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
+# RUN: llvm-mca -mtriple=armv7-unknown-unknown -mcpu=swift -timeline -iterations=5 < %s | FileCheck %s
+
+# Register r1 is updated in one cycle by instruction vld1.32, so the add.w can
+# start one cycle later.
+
+add.w r1, r1, r12
+vld1.32 {d16, d17}, [r1]!
+
+# CHECK: Iterations: 5
+# CHECK-NEXT: Instructions: 10
+# CHECK-NEXT: Total Cycles: 16
+# CHECK-NEXT: Total uOps: 15
+
+# CHECK: Dispatch Width: 3
+# CHECK-NEXT: uOps Per Cycle: 0.94
+# CHECK-NEXT: IPC: 0.63
+# CHECK-NEXT: Block RThroughput: 1.0
+
+# CHECK: Instruction Info:
+# CHECK-NEXT: [1]: #uOps
+# CHECK-NEXT: [2]: Latency
+# CHECK-NEXT: [3]: RThroughput
+# CHECK-NEXT: [4]: MayLoad
+# CHECK-NEXT: [5]: MayStore
+# CHECK-NEXT: [6]: HasSideEffects (U)
+
+# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
+# CHECK-NEXT: 1 1 0.50 add r1, r1, r12
+# CHECK-NEXT: 2 4 1.00 * vld1.32 {d16, d17}, [r1]!
+
+# CHECK: Resources:
+# CHECK-NEXT: [0] - SwiftUnitDiv
+# CHECK-NEXT: [1] - SwiftUnitP0
+# CHECK-NEXT: [2] - SwiftUnitP1
+# CHECK-NEXT: [3] - SwiftUnitP2
+# CHECK-NEXT: [4.0] - SwiftUnitP01
+# CHECK-NEXT: [4.1] - SwiftUnitP01
+
+# CHECK: Resource pressure per iteration:
+# CHECK-NEXT: [0] [1] [2] [3] [4.0] [4.1]
+# CHECK-NEXT: - - - 1.00 1.00 1.00
+
+# CHECK: Resource pressure by instruction:
+# CHECK-NEXT: [0] [1] [2] [3] [4.0] [4.1] Instructions:
+# CHECK-NEXT: - - - - - 1.00 add r1, r1, r12
+# CHECK-NEXT: - - - 1.00 1.00 - vld1.32 {d16, d17}, [r1]!
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 012345
+# CHECK-NEXT: Index 0123456789
+
+# CHECK: [0,0] DeER . . . add r1, r1, r12
+# CHECK-NEXT: [0,1] D=eeeeER . . vld1.32 {d16, d17}, [r1]!
+# CHECK-NEXT: [1,0] .D=eE--R . . add r1, r1, r12
+# CHECK-NEXT: [1,1] .D==eeeeER. . vld1.32 {d16, d17}, [r1]!
+# CHECK-NEXT: [2,0] . D==eE--R. . add r1, r1, r12
+# CHECK-NEXT: [2,1] . D===eeeeER . vld1.32 {d16, d17}, [r1]!
+# CHECK-NEXT: [3,0] . D===eE--R . add r1, r1, r12
+# CHECK-NEXT: [3,1] . D====eeeeER . vld1.32 {d16, d17}, [r1]!
+# CHECK-NEXT: [4,0] . D====eE--R . add r1, r1, r12
+# CHECK-NEXT: [4,1] . D=====eeeeER vld1.32 {d16, d17}, [r1]!
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 5 3.0 0.2 1.6 add r1, r1, r12
+# CHECK-NEXT: 1. 5 4.0 0.0 0.0 vld1.32 {d16, d17}, [r1]!
ID.MaxLatency = Latency < 0 ? 100U : static_cast<unsigned>(Latency);
}
-Error InstrBuilder::populateWrites(InstrDesc &ID, const MCInst &MCI,
- unsigned SchedClassID) {
+static Error verifyOperands(const MCInstrDesc &MCDesc, const MCInst &MCI) {
+ // Variadic opcodes are not correctly supported.
+ if (MCDesc.isVariadic()) {
+ if (MCI.getNumOperands() - MCDesc.getNumOperands()) {
+ return make_error<InstructionError<MCInst>>(
+ "Don't know how to process this variadic opcode.", MCI);
+ }
+ }
+
+ // Count register definitions, and skip non register operands in the process.
+ unsigned I, E;
+ unsigned NumExplicitDefs = MCDesc.getNumDefs();
+ for (I = 0, E = MCI.getNumOperands(); NumExplicitDefs && I < E; ++I) {
+ const MCOperand &Op = MCI.getOperand(I);
+ if (Op.isReg())
+ --NumExplicitDefs;
+ }
+
+ if (NumExplicitDefs) {
+ return make_error<InstructionError<MCInst>>(
+ "Expected more register operand definitions.", MCI);
+ }
+
+ if (MCDesc.hasOptionalDef()) {
+ // Always assume that the optional definition is the last operand.
+ const MCOperand &Op = MCI.getOperand(MCDesc.getNumOperands() - 1);
+ if (I == MCI.getNumOperands() || !Op.isReg()) {
+ std::string Message =
+ "expected a register operand for an optional definition. Instruction "
+ "has not been correctly analyzed.";
+ return make_error<InstructionError<MCInst>>(Message, MCI);
+ }
+ }
+
+ return ErrorSuccess();
+}
+
+void InstrBuilder::populateWrites(InstrDesc &ID, const MCInst &MCI,
+ unsigned SchedClassID) {
const MCInstrDesc &MCDesc = MCII.get(MCI.getOpcode());
const MCSchedModel &SM = STI.getSchedModel();
const MCSchedClassDesc &SCDesc = *SM.getSchedClassDesc(SchedClassID);
- // These are for now the (strong) assumptions made by this algorithm:
- // * The number of explicit and implicit register definitions in a MCInst
- // matches the number of explicit and implicit definitions according to
- // the opcode descriptor (MCInstrDesc).
- // * Register definitions take precedence over register uses in the operands
- // list.
- // * If an opcode specifies an optional definition, then the optional
- // definition is always the last operand in the sequence, and it can be
- // set to zero (i.e. "no register").
+ // Assumptions made by this algorithm:
+ // 1. The number of explicit and implicit register definitions in a MCInst
+ // matches the number of explicit and implicit definitions according to
+ // the opcode descriptor (MCInstrDesc).
+ // 2. Uses start at index #(MCDesc.getNumDefs()).
+ // 3. There can only be a single optional register definition, an it is
+ // always the last operand of the sequence (excluding extra operands
+ // contributed by variadic opcodes).
//
// These assumptions work quite well for most out-of-order in-tree targets
// like x86. This is mainly because the vast majority of instructions is
// expanded to MCInst using a straightforward lowering logic that preserves
// the ordering of the operands.
+ //
+ // About assumption 1.
+ // The algorithm allows non-register operands between register operand
+ // definitions. This helps to handle some special ARM instructions with
+ // implicit operand increment (-mtriple=armv7):
+ //
+ // vld1.32 {d18, d19}, [r1]! @ <MCInst #1463 VLD1q32wb_fixed
+ // @ <MCOperand Reg:59>
+ // @ <MCOperand Imm:0> (!!)
+ // @ <MCOperand Reg:67>
+ // @ <MCOperand Imm:0>
+ // @ <MCOperand Imm:14>
+ // @ <MCOperand Reg:0>>
+ //
+ // MCDesc reports:
+ // 6 explicit operands.
+ // 1 optional definition
+ // 2 explicit definitions (!!)
+ //
+ // The presence of an 'Imm' operand between the two register definitions
+ // breaks the assumption that "register definitions are always at the
+ // beginning of the operand sequence".
+ //
+ // To workaround this issue, this algorithm ignores (i.e. skips) any
+ // non-register operands between register definitions. The optional
+ // definition is still at index #(NumOperands-1).
+ //
+ // According to assumption 2. register reads start at #(NumExplicitDefs-1).
+ // That means, register R1 from the example is both read and written.
unsigned NumExplicitDefs = MCDesc.getNumDefs();
unsigned NumImplicitDefs = MCDesc.getNumImplicitDefs();
unsigned NumWriteLatencyEntries = SCDesc.NumWriteLatencyEntries;
unsigned TotalDefs = NumExplicitDefs + NumImplicitDefs;
if (MCDesc.hasOptionalDef())
TotalDefs++;
+
ID.Writes.resize(TotalDefs);
// Iterate over the operands list, and skip non-register operands.
// The first NumExplictDefs register operands are expected to be register
}
Write.IsOptionalDef = false;
LLVM_DEBUG({
- dbgs() << "\t\t[Def] OpIdx=" << Write.OpIndex
+ dbgs() << "\t\t[Def] OpIdx=" << Write.OpIndex
<< ", Latency=" << Write.Latency
<< ", WriteResourceID=" << Write.SClassOrWriteResourceID << '\n';
});
CurrentDef++;
}
- if (CurrentDef != NumExplicitDefs) {
- return make_error<InstructionError<MCInst>>(
- "Expected more register operand definitions.", MCI);
- }
-
- CurrentDef = 0;
+ assert(CurrentDef == NumExplicitDefs &&
+ "Expected more register operand definitions.");
for (CurrentDef = 0; CurrentDef < NumImplicitDefs; ++CurrentDef) {
unsigned Index = NumExplicitDefs + CurrentDef;
WriteDescriptor &Write = ID.Writes[Index];
Write.IsOptionalDef = false;
assert(Write.RegisterID != 0 && "Expected a valid phys register!");
LLVM_DEBUG({
- dbgs() << "\t\t[Def] OpIdx=" << Write.OpIndex
+ dbgs() << "\t\t[Def][I] OpIdx=" << ~Write.OpIndex
<< ", PhysReg=" << MRI.getName(Write.RegisterID)
<< ", Latency=" << Write.Latency
<< ", WriteResourceID=" << Write.SClassOrWriteResourceID << '\n';
}
if (MCDesc.hasOptionalDef()) {
- // Always assume that the optional definition is the last operand of the
- // MCInst sequence.
- const MCOperand &Op = MCI.getOperand(MCI.getNumOperands() - 1);
- if (i == MCI.getNumOperands() || !Op.isReg()) {
- std::string Message =
- "expected a register operand for an optional definition. Instruction "
- "has not been correctly analyzed.";
- return make_error<InstructionError<MCInst>>(Message, MCI);
- }
-
- WriteDescriptor &Write = ID.Writes[TotalDefs - 1];
- Write.OpIndex = MCI.getNumOperands() - 1;
+ WriteDescriptor &Write = ID.Writes[NumExplicitDefs + NumImplicitDefs];
+ Write.OpIndex = MCDesc.getNumOperands() - 1;
// Assign a default latency for this write.
Write.Latency = ID.MaxLatency;
Write.SClassOrWriteResourceID = 0;
Write.IsOptionalDef = true;
+ LLVM_DEBUG({
+ dbgs() << "\t\t[Def][O] OpIdx=" << Write.OpIndex
+ << ", Latency=" << Write.Latency
+ << ", WriteResourceID=" << Write.SClassOrWriteResourceID << '\n';
+ });
}
-
- return ErrorSuccess();
}
-Error InstrBuilder::populateReads(InstrDesc &ID, const MCInst &MCI,
- unsigned SchedClassID) {
+void InstrBuilder::populateReads(InstrDesc &ID, const MCInst &MCI,
+ unsigned SchedClassID) {
const MCInstrDesc &MCDesc = MCII.get(MCI.getOpcode());
- unsigned NumExplicitDefs = MCDesc.getNumDefs();
-
- // Skip explicit definitions.
- unsigned i = 0;
- for (; i < MCI.getNumOperands() && NumExplicitDefs; ++i) {
- const MCOperand &Op = MCI.getOperand(i);
- if (Op.isReg())
- NumExplicitDefs--;
- }
-
- if (NumExplicitDefs) {
- return make_error<InstructionError<MCInst>>(
- "Expected more register operand definitions.", MCI);
- }
-
- unsigned NumExplicitUses = MCI.getNumOperands() - i;
+ unsigned NumExplicitUses = MCDesc.getNumOperands() - MCDesc.getNumDefs();
unsigned NumImplicitUses = MCDesc.getNumImplicitUses();
- if (MCDesc.hasOptionalDef()) {
- assert(NumExplicitUses);
- NumExplicitUses--;
- }
+ // Remove the optional definition.
+ if (MCDesc.hasOptionalDef())
+ --NumExplicitUses;
unsigned TotalUses = NumExplicitUses + NumImplicitUses;
- if (!TotalUses)
- return ErrorSuccess();
ID.Reads.resize(TotalUses);
- for (unsigned CurrentUse = 0; CurrentUse < NumExplicitUses; ++CurrentUse) {
- ReadDescriptor &Read = ID.Reads[CurrentUse];
- Read.OpIndex = i + CurrentUse;
- Read.UseIndex = CurrentUse;
+ for (unsigned I = 0; I < NumExplicitUses; ++I) {
+ ReadDescriptor &Read = ID.Reads[I];
+ Read.OpIndex = MCDesc.getNumDefs() + I;
+ Read.UseIndex = I;
Read.SchedClassID = SchedClassID;
- LLVM_DEBUG(dbgs() << "\t\t[Use] OpIdx=" << Read.OpIndex
+ LLVM_DEBUG(dbgs() << "\t\t[Use] OpIdx=" << Read.OpIndex
<< ", UseIndex=" << Read.UseIndex << '\n');
}
- for (unsigned CurrentUse = 0; CurrentUse < NumImplicitUses; ++CurrentUse) {
- ReadDescriptor &Read = ID.Reads[NumExplicitUses + CurrentUse];
- Read.OpIndex = ~CurrentUse;
- Read.UseIndex = NumExplicitUses + CurrentUse;
- Read.RegisterID = MCDesc.getImplicitUses()[CurrentUse];
+ // For the purpose of ReadAdvance, implicit uses come directly after explicit
+ // uses. The "UseIndex" must be updated according to that implicit layout.
+ for (unsigned I = 0; I < NumImplicitUses; ++I) {
+ ReadDescriptor &Read = ID.Reads[NumExplicitUses + I];
+ Read.OpIndex = ~I;
+ Read.UseIndex = NumExplicitUses + I;
+ Read.RegisterID = MCDesc.getImplicitUses()[I];
Read.SchedClassID = SchedClassID;
- LLVM_DEBUG(dbgs() << "\t\t[Use] OpIdx=" << Read.OpIndex << ", RegisterID="
+ LLVM_DEBUG(dbgs() << "\t\t[Use][I] OpIdx=" << ~Read.OpIndex
+ << ", UseIndex=" << Read.UseIndex << ", RegisterID="
<< MRI.getName(Read.RegisterID) << '\n');
}
- return ErrorSuccess();
}
Error InstrBuilder::verifyInstrDesc(const InstrDesc &ID,
initializeUsedResources(*ID, SCDesc, STI, ProcResourceMasks);
computeMaxLatency(*ID, MCDesc, SCDesc, STI);
- if (auto Err = populateWrites(*ID, MCI, SchedClassID))
- return std::move(Err);
- if (auto Err = populateReads(*ID, MCI, SchedClassID))
+
+ if (Error Err = verifyOperands(MCDesc, MCI))
return std::move(Err);
+ populateWrites(*ID, MCI, SchedClassID);
+ populateReads(*ID, MCI, SchedClassID);
+
LLVM_DEBUG(dbgs() << "\t\tMaxLatency=" << ID->MaxLatency << '\n');
LLVM_DEBUG(dbgs() << "\t\tNumMicroOps=" << ID->NumMicroOps << '\n');
}
assert(RegID && "Expected a valid register ID!");
- NewIS->getDefs().emplace_back(
- WD, RegID, /* ClearsSuperRegs */ WriteMask[WriteIndex],
- /* WritesZero */ IsZeroIdiom);
+ NewIS->getDefs().emplace_back(WD, RegID,
+ /* ClearsSuperRegs */ WriteMask[WriteIndex],
+ /* WritesZero */ IsZeroIdiom);
++WriteIndex;
}