// If this move has been eliminated, then method tryEliminateMoveOrSwap should
// have already updated all the register mappings.
if (!IsEliminated) {
+ // Check if this is one of multiple writes performed by this
+ // instruction to register RegID.
+ const WriteRef &OtherWrite = RegisterMappings[RegID].first;
+ const WriteState *OtherWS = OtherWrite.getWriteState();
+ if (OtherWS && OtherWrite.getSourceIndex() == Write.getSourceIndex()) {
+ if (OtherWS->getLatency() > WS.getLatency()) {
+ // Conservatively keep the slowest write to RegID.
+ return;
+ }
+ }
+
// Update the mapping for register RegID including its sub-registers.
RegisterMappings[RegID].first = Write;
RegisterMappings[RegID].second.AliasRegID = 0U;
let hasSideEffects = 0 in {
def rr : I<0xF6, MRMSrcReg, (outs RC:$dst1, RC:$dst2), (ins RC:$src),
!strconcat(mnemonic, "\t{$src, $dst2, $dst1|$dst1, $dst2, $src}"),
- []>, T8XD, VEX_4V, Sched<[sched, WriteIMulH]>;
+ []>, T8XD, VEX_4V, Sched<[WriteIMulH, sched]>;
let mayLoad = 1 in
def rm : I<0xF6, MRMSrcMem, (outs RC:$dst1, RC:$dst2), (ins x86memop:$src),
!strconcat(mnemonic, "\t{$src, $dst2, $dst1|$dst1, $dst2, $src}"),
[]>, T8XD, VEX_4V,
- Sched<[sched.Folded, WriteIMulHLd,
+ Sched<[WriteIMulHLd, sched.Folded,
// Memory operand.
ReadDefault, ReadDefault, ReadDefault, ReadDefault, ReadDefault,
// Implicit read of EDX/RDX
defm : X86WriteRes<WriteIMul16ImmLd, [BWPort1,BWPort0156,BWPort23], 8, [1,1,1], 3>;
defm : BWWriteResPair<WriteIMul16Reg, [BWPort1], 3>;
defm : BWWriteResPair<WriteIMul32, [BWPort1,BWPort06,BWPort0156], 4, [1,1,1], 3>;
-defm : BWWriteResPair<WriteMULX32, [BWPort1,BWPort06,BWPort0156], 4, [1,1,1], 3>;
+defm : BWWriteResPair<WriteMULX32, [BWPort1,BWPort06,BWPort0156], 3, [1,1,1], 3>;
defm : BWWriteResPair<WriteIMul32Imm, [BWPort1], 3>;
defm : BWWriteResPair<WriteIMul32Reg, [BWPort1], 3>;
defm : BWWriteResPair<WriteIMul64, [BWPort1,BWPort5], 4, [1,1], 2>;
-defm : BWWriteResPair<WriteMULX64, [BWPort1,BWPort5], 4, [1,1], 2>;
+defm : BWWriteResPair<WriteMULX64, [BWPort1,BWPort5], 3, [1,1], 2>;
defm : BWWriteResPair<WriteIMul64Imm, [BWPort1], 3>;
defm : BWWriteResPair<WriteIMul64Reg, [BWPort1], 3>;
-def BWWriteIMulH : WriteRes<WriteIMulH, []> { let Latency = 3; }
+def BWWriteIMulH : WriteRes<WriteIMulH, []> { let Latency = 4; }
def : WriteRes<WriteIMulHLd, []> {
let Latency = !add(BWWriteIMulH.Latency, BroadwellModel.LoadLatency);
}
defm : X86WriteRes<WriteIMul16ImmLd, [HWPort1,HWPort0156,HWPort23], 8, [1,1,1], 3>;
defm : HWWriteResPair<WriteIMul16Reg, [HWPort1], 3>;
defm : HWWriteResPair<WriteIMul32, [HWPort1,HWPort06,HWPort0156], 4, [1,1,1], 3>;
-defm : HWWriteResPair<WriteMULX32, [HWPort1,HWPort06,HWPort0156], 4, [1,1,1], 3>;
+defm : HWWriteResPair<WriteMULX32, [HWPort1,HWPort06,HWPort0156], 3, [1,1,1], 3>;
defm : HWWriteResPair<WriteIMul32Imm, [HWPort1], 3>;
defm : HWWriteResPair<WriteIMul32Reg, [HWPort1], 3>;
defm : HWWriteResPair<WriteIMul64, [HWPort1,HWPort6], 4, [1,1], 2>;
-defm : HWWriteResPair<WriteMULX64, [HWPort1,HWPort6], 4, [1,1], 2>;
+defm : HWWriteResPair<WriteMULX64, [HWPort1,HWPort6], 3, [1,1], 2>;
defm : HWWriteResPair<WriteIMul64Imm, [HWPort1], 3>;
defm : HWWriteResPair<WriteIMul64Reg, [HWPort1], 3>;
-def HWWriteIMulH : WriteRes<WriteIMulH, []> { let Latency = 3; }
+def HWWriteIMulH : WriteRes<WriteIMulH, []> { let Latency = 4; }
def : WriteRes<WriteIMulHLd, []> {
let Latency = !add(HWWriteIMulH.Latency, HaswellModel.LoadLatency);
}
defm : X86WriteRes<WriteIMul16ImmLd, [SBPort1,SBPort015,SBPort23], 8, [1,1,1], 3>;
defm : SBWriteResPair<WriteIMul16Reg, [SBPort1], 3>;
defm : SBWriteResPair<WriteIMul32, [SBPort1,SBPort05,SBPort015], 4, [1,1,1], 3>;
-defm : SBWriteResPair<WriteMULX32, [SBPort1,SBPort05,SBPort015], 4, [1,1,1], 3>;
+defm : SBWriteResPair<WriteMULX32, [SBPort1,SBPort05,SBPort015], 3, [1,1,1], 3>;
defm : SBWriteResPair<WriteIMul32Imm, [SBPort1], 3>;
defm : SBWriteResPair<WriteIMul32Reg, [SBPort1], 3>;
defm : SBWriteResPair<WriteIMul64, [SBPort1,SBPort0], 4, [1,1], 2>;
-defm : SBWriteResPair<WriteMULX64, [SBPort1,SBPort0], 4, [1,1], 2>;
+defm : SBWriteResPair<WriteMULX64, [SBPort1,SBPort0], 3, [1,1], 2>;
defm : SBWriteResPair<WriteIMul64Imm, [SBPort1], 3>;
defm : SBWriteResPair<WriteIMul64Reg, [SBPort1], 3>;
-def SBWriteIMulH : WriteRes<WriteIMulH, []> { let Latency = 3; }
+def SBWriteIMulH : WriteRes<WriteIMulH, []> { let Latency = 4; }
def : WriteRes<WriteIMulHLd, []> {
let Latency = !add(SBWriteIMulH.Latency, SandyBridgeModel.LoadLatency);
}
defm : X86WriteRes<WriteIMul16ImmLd, [SKLPort1,SKLPort0156,SKLPort23], 8, [1,1,1], 3>;
defm : SKLWriteResPair<WriteIMul16Reg, [SKLPort1], 3>;
defm : SKLWriteResPair<WriteIMul32, [SKLPort1,SKLPort06,SKLPort0156], 4, [1,1,1], 3>;
-defm : SKLWriteResPair<WriteMULX32, [SKLPort1,SKLPort06,SKLPort0156], 4, [1,1,1], 3>;
+defm : SKLWriteResPair<WriteMULX32, [SKLPort1,SKLPort06,SKLPort0156], 3, [1,1,1], 3>;
defm : SKLWriteResPair<WriteIMul32Imm, [SKLPort1], 3>;
defm : SKLWriteResPair<WriteIMul32Reg, [SKLPort1], 3>;
defm : SKLWriteResPair<WriteIMul64, [SKLPort1,SKLPort5], 4, [1,1], 2>;
-defm : SKLWriteResPair<WriteMULX64, [SKLPort1,SKLPort5], 4, [1,1], 2>;
+defm : SKLWriteResPair<WriteMULX64, [SKLPort1,SKLPort5], 3, [1,1], 2>;
defm : SKLWriteResPair<WriteIMul64Imm, [SKLPort1], 3>;
defm : SKLWriteResPair<WriteIMul64Reg, [SKLPort1], 3>;
-def SKLWriteIMulH : WriteRes<WriteIMulH, []> { let Latency = 3; }
+def SKLWriteIMulH : WriteRes<WriteIMulH, []> { let Latency = 4; }
def : WriteRes<WriteIMulHLd, []> {
let Latency = !add(SKLWriteIMulH.Latency, SkylakeClientModel.LoadLatency);
}
defm : X86WriteRes<WriteIMul16Reg, [SKXPort1], 3, [1], 1>;
defm : X86WriteRes<WriteIMul16RegLd, [SKXPort1,SKXPort0156,SKXPort23], 8, [1,1,1], 3>;
defm : SKXWriteResPair<WriteIMul32, [SKXPort1,SKXPort06,SKXPort0156], 4, [1,1,1], 3>;
-defm : SKXWriteResPair<WriteMULX32, [SKXPort1,SKXPort06,SKXPort0156], 4, [1,1,1], 3>;
+defm : SKXWriteResPair<WriteMULX32, [SKXPort1,SKXPort06,SKXPort0156], 3, [1,1,1], 3>;
defm : SKXWriteResPair<WriteIMul32Imm, [SKXPort1], 3>;
defm : SKXWriteResPair<WriteIMul32Reg, [SKXPort1], 3>;
defm : SKXWriteResPair<WriteIMul64, [SKXPort1,SKXPort5], 4, [1,1], 2>;
-defm : SKXWriteResPair<WriteMULX64, [SKXPort1,SKXPort5], 4, [1,1], 2>;
+defm : SKXWriteResPair<WriteMULX64, [SKXPort1,SKXPort5], 3, [1,1], 2>;
defm : SKXWriteResPair<WriteIMul64Imm, [SKXPort1], 3>;
defm : SKXWriteResPair<WriteIMul64Reg, [SKXPort1], 3>;
-def SKXWriteIMulH : WriteRes<WriteIMulH, []> { let Latency = 3; }
+def SKXWriteIMulH : WriteRes<WriteIMulH, []> { let Latency = 4; }
def : WriteRes<WriteIMulHLd, []> {
let Latency = !add(SKXWriteIMulH.Latency, SkylakeServerModel.LoadLatency);
}
# CHECK: Iterations: 2
# CHECK-NEXT: Instructions: 2
-# CHECK-NEXT: Total Cycles: 10
+# CHECK-NEXT: Total Cycles: 11
# CHECK-NEXT: Total uOps: 8
# CHECK: Dispatch Width: 4
-# CHECK-NEXT: uOps Per Cycle: 0.80
-# CHECK-NEXT: IPC: 0.20
+# CHECK-NEXT: uOps Per Cycle: 0.73
+# CHECK-NEXT: IPC: 0.18
# CHECK-NEXT: Block RThroughput: 1.0
# CHECK: Instruction Info:
# CHECK-NEXT: - - 0.50 1.00 - - - 0.50 1.00 - mulxl %eax, %eax, %eax
# CHECK: Timeline view:
+# CHECK-NEXT: 0
# CHECK-NEXT: Index 0123456789
-# CHECK: [0,0] DeeeeER . mulxl %eax, %eax, %eax
-# CHECK-NEXT: [1,0] .D==eeeeER mulxl %eax, %eax, %eax
+# CHECK: [0,0] DeeeeER . mulxl %eax, %eax, %eax
+# CHECK-NEXT: [1,0] .D===eeeeER mulxl %eax, %eax, %eax
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
# CHECK: [0] [1] [2] [3]
-# CHECK-NEXT: 0. 2 2.0 0.5 0.0 mulxl %eax, %eax, %eax
+# CHECK-NEXT: 0. 2 2.5 0.5 0.0 mulxl %eax, %eax, %eax
# CHECK: [1] Code Region
# CHECK: Iterations: 2
# CHECK-NEXT: Instructions: 2
-# CHECK-NEXT: Total Cycles: 10
+# CHECK-NEXT: Total Cycles: 11
# CHECK-NEXT: Total uOps: 6
# CHECK: Dispatch Width: 4
-# CHECK-NEXT: uOps Per Cycle: 0.60
-# CHECK-NEXT: IPC: 0.20
+# CHECK-NEXT: uOps Per Cycle: 0.55
+# CHECK-NEXT: IPC: 0.18
# CHECK-NEXT: Block RThroughput: 1.0
# CHECK: Instruction Info:
# CHECK-NEXT: - - - 1.00 - - - - 1.00 - mulxq %rax, %rax, %rax
# CHECK: Timeline view:
+# CHECK-NEXT: 0
# CHECK-NEXT: Index 0123456789
-# CHECK: [0,0] DeeeeER . mulxq %rax, %rax, %rax
-# CHECK-NEXT: [1,0] .D==eeeeER mulxq %rax, %rax, %rax
+# CHECK: [0,0] DeeeeER . mulxq %rax, %rax, %rax
+# CHECK-NEXT: [1,0] .D===eeeeER mulxq %rax, %rax, %rax
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
# CHECK: [0] [1] [2] [3]
-# CHECK-NEXT: 0. 2 2.0 0.5 0.0 mulxq %rax, %rax, %rax
+# CHECK-NEXT: 0. 2 2.5 0.5 0.0 mulxq %rax, %rax, %rax
# CHECK: Iterations: 2
# CHECK-NEXT: Instructions: 2
-# CHECK-NEXT: Total Cycles: 10
+# CHECK-NEXT: Total Cycles: 11
# CHECK-NEXT: Total uOps: 8
# CHECK: Dispatch Width: 6
-# CHECK-NEXT: uOps Per Cycle: 0.80
-# CHECK-NEXT: IPC: 0.20
+# CHECK-NEXT: uOps Per Cycle: 0.73
+# CHECK-NEXT: IPC: 0.18
# CHECK-NEXT: Block RThroughput: 1.0
# CHECK: Instruction Info:
# CHECK-NEXT: - - 0.50 1.00 - - - 0.50 1.00 - mulxl %eax, %eax, %eax
# CHECK: Timeline view:
+# CHECK-NEXT: 0
# CHECK-NEXT: Index 0123456789
-# CHECK: [0,0] DeeeeER . mulxl %eax, %eax, %eax
-# CHECK-NEXT: [1,0] .D==eeeeER mulxl %eax, %eax, %eax
+# CHECK: [0,0] DeeeeER . mulxl %eax, %eax, %eax
+# CHECK-NEXT: [1,0] .D===eeeeER mulxl %eax, %eax, %eax
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
# CHECK: [0] [1] [2] [3]
-# CHECK-NEXT: 0. 2 2.0 0.5 0.0 mulxl %eax, %eax, %eax
+# CHECK-NEXT: 0. 2 2.5 0.5 0.0 mulxl %eax, %eax, %eax
# CHECK: [1] Code Region
# CHECK: Iterations: 2
# CHECK-NEXT: Instructions: 2
-# CHECK-NEXT: Total Cycles: 10
+# CHECK-NEXT: Total Cycles: 11
# CHECK-NEXT: Total uOps: 6
# CHECK: Dispatch Width: 6
-# CHECK-NEXT: uOps Per Cycle: 0.60
-# CHECK-NEXT: IPC: 0.20
+# CHECK-NEXT: uOps Per Cycle: 0.55
+# CHECK-NEXT: IPC: 0.18
# CHECK-NEXT: Block RThroughput: 1.0
# CHECK: Instruction Info:
# CHECK-NEXT: - - - 1.00 - - - 1.00 - - mulxq %rax, %rax, %rax
# CHECK: Timeline view:
+# CHECK-NEXT: 0
# CHECK-NEXT: Index 0123456789
-# CHECK: [0,0] DeeeeER . mulxq %rax, %rax, %rax
-# CHECK-NEXT: [1,0] D===eeeeER mulxq %rax, %rax, %rax
+# CHECK: [0,0] DeeeeER . mulxq %rax, %rax, %rax
+# CHECK-NEXT: [1,0] D====eeeeER mulxq %rax, %rax, %rax
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
# CHECK: [0] [1] [2] [3]
-# CHECK-NEXT: 0. 2 2.5 0.5 0.0 mulxq %rax, %rax, %rax
+# CHECK-NEXT: 0. 2 3.0 0.5 0.0 mulxq %rax, %rax, %rax