--- /dev/null
+# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
+# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=btver2 -timeline -timeline-max-iterations=5 < %s | FileCheck %s
+
+sete %r9b
+movzbl %al, %eax
+shll $2, %eax
+imull %ecx, %eax
+cmpl $1025, %eax
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 500
+# CHECK-NEXT: Total Cycles: 504
+# CHECK-NEXT: Total uOps: 600
+
+# CHECK: Dispatch Width: 2
+# CHECK-NEXT: uOps Per Cycle: 1.19
+# CHECK-NEXT: IPC: 0.99
+# CHECK-NEXT: Block RThroughput: 3.0
+
+# CHECK: Instruction Info:
+# CHECK-NEXT: [1]: #uOps
+# CHECK-NEXT: [2]: Latency
+# CHECK-NEXT: [3]: RThroughput
+# CHECK-NEXT: [4]: MayLoad
+# CHECK-NEXT: [5]: MayStore
+# CHECK-NEXT: [6]: HasSideEffects (U)
+
+# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
+# CHECK-NEXT: 1 1 0.50 sete %r9b
+# CHECK-NEXT: 1 1 0.50 movzbl %al, %eax
+# CHECK-NEXT: 1 1 0.50 shll $2, %eax
+# CHECK-NEXT: 2 3 1.00 imull %ecx, %eax
+# CHECK-NEXT: 1 1 0.50 cmpl $1025, %eax
+
+# CHECK: Resources:
+# CHECK-NEXT: [0] - JALU0
+# CHECK-NEXT: [1] - JALU1
+# CHECK-NEXT: [2] - JDiv
+# CHECK-NEXT: [3] - JFPA
+# CHECK-NEXT: [4] - JFPM
+# CHECK-NEXT: [5] - JFPU0
+# CHECK-NEXT: [6] - JFPU1
+# CHECK-NEXT: [7] - JLAGU
+# CHECK-NEXT: [8] - JMul
+# CHECK-NEXT: [9] - JSAGU
+# CHECK-NEXT: [10] - JSTC
+# CHECK-NEXT: [11] - JVALU0
+# CHECK-NEXT: [12] - JVALU1
+# CHECK-NEXT: [13] - JVIMUL
+
+# CHECK: Resource pressure per iteration:
+# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13]
+# CHECK-NEXT: 2.00 3.00 - - - - - - 1.00 - - - - -
+
+# CHECK: Resource pressure by instruction:
+# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] Instructions:
+# CHECK-NEXT: 0.99 0.01 - - - - - - - - - - - - sete %r9b
+# CHECK-NEXT: 0.01 0.99 - - - - - - - - - - - - movzbl %al, %eax
+# CHECK-NEXT: - 1.00 - - - - - - - - - - - - shll $2, %eax
+# CHECK-NEXT: - 1.00 - - - - - - 1.00 - - - - - imull %ecx, %eax
+# CHECK-NEXT: 1.00 - - - - - - - - - - - - - cmpl $1025, %eax
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 012345678
+
+# CHECK: [0,0] DeER . . . . . . sete %r9b
+# CHECK-NEXT: [0,1] DeER . . . . . . movzbl %al, %eax
+# CHECK-NEXT: [0,2] .DeER. . . . . . shll $2, %eax
+# CHECK-NEXT: [0,3] . DeeeER . . . . . imull %ecx, %eax
+# CHECK-NEXT: [0,4] . D==eER . . . . . cmpl $1025, %eax
+# CHECK-NEXT: [1,0] . D===eER. . . . . sete %r9b
+# CHECK-NEXT: [1,1] . D=eE-R. . . . . movzbl %al, %eax
+# CHECK-NEXT: [1,2] . D==eE-R . . . . shll $2, %eax
+# CHECK-NEXT: [1,3] . D==eeeER . . . . imull %ecx, %eax
+# CHECK-NEXT: [1,4] . .D====eER . . . . cmpl $1025, %eax
+# CHECK-NEXT: [2,0] . .D=====eER. . . . sete %r9b
+# CHECK-NEXT: [2,1] . . D===eE-R. . . . movzbl %al, %eax
+# CHECK-NEXT: [2,2] . . D====eE-R . . . shll $2, %eax
+# CHECK-NEXT: [2,3] . . D====eeeER . . . imull %ecx, %eax
+# CHECK-NEXT: [2,4] . . D======eER . . . cmpl $1025, %eax
+# CHECK-NEXT: [3,0] . . D=======eER. . . sete %r9b
+# CHECK-NEXT: [3,1] . . D=====eE-R. . . movzbl %al, %eax
+# CHECK-NEXT: [3,2] . . D======eE-R . . shll $2, %eax
+# CHECK-NEXT: [3,3] . . .D======eeeER . . imull %ecx, %eax
+# CHECK-NEXT: [3,4] . . . D========eER . . cmpl $1025, %eax
+# CHECK-NEXT: [4,0] . . . D=========eER. . sete %r9b
+# CHECK-NEXT: [4,1] . . . D=======eE-R. . movzbl %al, %eax
+# CHECK-NEXT: [4,2] . . . D========eE-R . shll $2, %eax
+# CHECK-NEXT: [4,3] . . . D========eeeER. imull %ecx, %eax
+# CHECK-NEXT: [4,4] . . . D==========eER cmpl $1025, %eax
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 5 5.8 0.2 0.0 sete %r9b
+# CHECK-NEXT: 1. 5 4.2 0.2 0.8 movzbl %al, %eax
+# CHECK-NEXT: 2. 5 5.0 0.0 0.8 shll $2, %eax
+# CHECK-NEXT: 3. 5 5.0 0.0 0.0 imull %ecx, %eax
+# CHECK-NEXT: 4. 5 7.0 0.0 0.0 cmpl $1025, %eax
// that we don't break the WAW, and the two writes can be merged together.
const WriteState *DependentWrite;
- // Number of writes that are in a WAW dependency with this write.
- unsigned NumWriteUsers;
+ // A partial write that is in a false dependency with this write.
+ WriteState *PartialWrite;
+
+ unsigned DependentWriteCyclesLeft;
// A list of dependent reads. Users is a set of dependent
// reads. A dependent read is added to the set only if CyclesLeft
bool clearsSuperRegs = false, bool writesZero = false)
: WD(&Desc), CyclesLeft(UNKNOWN_CYCLES), RegisterID(RegID),
PRFID(0), ClearsSuperRegs(clearsSuperRegs), WritesZero(writesZero),
- IsEliminated(false), DependentWrite(nullptr), NumWriteUsers(0U) {}
+ IsEliminated(false), DependentWrite(nullptr), PartialWrite(nullptr),
+ DependentWriteCyclesLeft(0) {}
WriteState(const WriteState &Other) = default;
WriteState &operator=(const WriteState &Other) = default;
unsigned getLatency() const { return WD->Latency; }
void addUser(ReadState *Use, int ReadAdvance);
+ void addUser(WriteState *Use);
+
+ unsigned getDependentWriteCyclesLeft() const { return DependentWriteCyclesLeft; }
+
+ unsigned getNumUsers() const {
+ unsigned NumUsers = Users.size();
+ if (PartialWrite)
+ ++NumUsers;
+ return NumUsers;
+ }
- unsigned getNumUsers() const { return Users.size() + NumWriteUsers; }
bool clearsSuperRegisters() const { return ClearsSuperRegs; }
bool isWriteZero() const { return WritesZero; }
bool isEliminated() const { return IsEliminated; }
}
const WriteState *getDependentWrite() const { return DependentWrite; }
- void setDependentWrite(WriteState *Other) {
- DependentWrite = Other;
- ++Other->NumWriteUsers;
+ void setDependentWrite(WriteState *Other) { DependentWrite = Other; }
+ void writeStartEvent(unsigned Cycles) {
+ DependentWriteCyclesLeft = Cycles;
+ DependentWrite = nullptr;
}
+
void setWriteZero() { WritesZero = true; }
void setEliminated() {
assert(Users.empty() && "Write is in an inconsistent state.");
unsigned ReadCycles = std::max(0, CyclesLeft - User.second);
RS->writeStartEvent(ReadCycles);
}
+
+ // Notify any writes that are in a false dependency with this write.
+ if (PartialWrite)
+ PartialWrite->writeStartEvent(CyclesLeft);
}
void WriteState::addUser(ReadState *User, int ReadAdvance) {
Users.insert(NewPair);
}
+void WriteState::addUser(WriteState *User) {
+ if (CyclesLeft != UNKNOWN_CYCLES) {
+ User->writeStartEvent(std::max(0, CyclesLeft));
+ return;
+ }
+
+ assert(!PartialWrite && "PartialWrite already set!");
+ PartialWrite = User;
+ User->setDependentWrite(this);
+}
+
void WriteState::cycleEvent() {
// Note: CyclesLeft can be a negative number. It is an error to
// make it an unsigned quantity because users of this write may
// specify a negative ReadAdvance.
if (CyclesLeft != UNKNOWN_CYCLES)
CyclesLeft--;
+
+ if (DependentWriteCyclesLeft)
+ DependentWriteCyclesLeft--;
}
void ReadState::cycleEvent() {
// A partial register write cannot complete before a dependent write.
auto IsDefReady = [&](const WriteState &Def) {
- if (const WriteState *Write = Def.getDependentWrite()) {
- int WriteLatency = Write->getCyclesLeft();
- if (WriteLatency == UNKNOWN_CYCLES)
- return false;
- return static_cast<unsigned>(WriteLatency) < getLatency();
+ if (!Def.getDependentWrite()) {
+ unsigned CyclesLeft = Def.getDependentWriteCyclesLeft();
+ return !CyclesLeft || CyclesLeft < getLatency();
}
- return true;
+ return false;
};
if (all_of(getDefs(), IsDefReady))
for (ReadState &Use : getUses())
Use.cycleEvent();
+ for (WriteState &Def : getDefs())
+ Def.cycleEvent();
+
update();
return;
}