As suggested by Craig Topper - I'm going to look at cleaning up the RMW sequences instead.
The uops are slightly different to the register variant, so requires a +1uop tweak
llvm-svn: 342969
multiclass BWWriteResPair<X86FoldableSchedWrite SchedRW,
list<ProcResourceKind> ExePorts,
int Lat, list<int> Res = [1], int UOps = 1,
- int LoadLat = 5, int LoadUOps = 1> {
+ int LoadLat = 5> {
// Register variant is using a single cycle on ExePort.
def : WriteRes<SchedRW, ExePorts> {
let Latency = Lat;
def : WriteRes<SchedRW.Folded, !listconcat([BWPort23], ExePorts)> {
let Latency = !add(Lat, LoadLat);
let ResourceCycles = !listconcat([1], Res);
- let NumMicroOps = !add(UOps, LoadUOps);
+ let NumMicroOps = !add(UOps, 1);
}
}
// Integer shifts and rotates.
defm : BWWriteResPair<WriteShift, [BWPort06], 1>;
-defm : BWWriteResPair<WriteShiftCL, [BWPort06,BWPort0156], 3, [2,1], 3, 5, 2>;
+defm : BWWriteResPair<WriteShiftCL, [BWPort06,BWPort0156], 3, [2,1], 3>;
defm : BWWriteResPair<WriteRotate, [BWPort06], 2, [2], 2>;
-defm : BWWriteResPair<WriteRotateCL, [BWPort06,BWPort0156], 3, [2,1], 3, 5, 2>;
+defm : BWWriteResPair<WriteRotateCL, [BWPort06,BWPort0156], 3, [2,1], 3>;
// SHLD/SHRD.
defm : X86WriteRes<WriteSHDrri, [BWPort1], 3, [1], 1>;
let ResourceCycles = [1,1,1,2,1];
}
def : SchedAlias<WriteADCRMW, BWWriteResGroup100>;
+def: InstRW<[BWWriteResGroup100], (instregex "ROL(8|16|32|64)mCL",
+ "ROR(8|16|32|64)mCL",
+ "SAR(8|16|32|64)mCL",
+ "SHL(8|16|32|64)mCL",
+ "SHR(8|16|32|64)mCL")>;
def BWWriteResGroup101 : SchedWriteRes<[BWPort1,BWPort23]> {
let Latency = 9;
multiclass HWWriteResPair<X86FoldableSchedWrite SchedRW,
list<ProcResourceKind> ExePorts,
int Lat, list<int> Res = [1], int UOps = 1,
- int LoadLat = 5, int LoadUOps = 1> {
+ int LoadLat = 5> {
// Register variant is using a single cycle on ExePort.
def : WriteRes<SchedRW, ExePorts> {
let Latency = Lat;
def : WriteRes<SchedRW.Folded, !listconcat([HWPort23], ExePorts)> {
let Latency = !add(Lat, LoadLat);
let ResourceCycles = !listconcat([1], Res);
- let NumMicroOps = !add(UOps, LoadUOps);
+ let NumMicroOps = !add(UOps, 1);
}
}
// Integer shifts and rotates.
defm : HWWriteResPair<WriteShift, [HWPort06], 1>;
-defm : HWWriteResPair<WriteShiftCL, [HWPort06, HWPort0156], 3, [2,1], 3, 6, 2>;
+defm : HWWriteResPair<WriteShiftCL, [HWPort06, HWPort0156], 3, [2,1], 3>;
defm : HWWriteResPair<WriteRotate, [HWPort06], 2, [2], 2>;
-defm : HWWriteResPair<WriteRotateCL, [HWPort06, HWPort0156], 3, [2,1], 3, 6, 2>;
+defm : HWWriteResPair<WriteRotateCL, [HWPort06, HWPort0156], 3, [2,1], 3>;
// SHLD/SHRD.
defm : X86WriteRes<WriteSHDrri, [HWPort1], 3, [1], 1>;
let NumMicroOps = 6;
let ResourceCycles = [1,1,1,2,1];
}
+def: InstRW<[HWWriteResGroup69], (instregex "ROL(8|16|32|64)mCL",
+ "ROR(8|16|32|64)mCL",
+ "SAR(8|16|32|64)mCL",
+ "SHL(8|16|32|64)mCL",
+ "SHR(8|16|32|64)mCL")>;
def: SchedAlias<WriteADCRMW, HWWriteResGroup69>;
def HWWriteResGroup70 : SchedWriteRes<[HWPort0,HWPort1]> {
multiclass SBWriteResPair<X86FoldableSchedWrite SchedRW,
list<ProcResourceKind> ExePorts,
int Lat, list<int> Res = [1], int UOps = 1,
- int LoadLat = 5, int LoadUOps = 1> {
+ int LoadLat = 5> {
// Register variant is using a single cycle on ExePort.
def : WriteRes<SchedRW, ExePorts> {
let Latency = Lat;
def : WriteRes<SchedRW.Folded, !listconcat([SBPort23], ExePorts)> {
let Latency = !add(Lat, LoadLat);
let ResourceCycles = !listconcat([1], Res);
- let NumMicroOps = !add(UOps, LoadUOps);
+ let NumMicroOps = !add(UOps, 1);
}
}
defm : X86WriteRes<WriteSHDmri, [SBPort4,SBPort23,SBPort05,SBPort015], 8, [1, 2, 1, 1], 5>;
defm : X86WriteRes<WriteSHDmrcl,[SBPort4,SBPort23,SBPort05,SBPort015], 10, [1, 2, 3, 1], 7>;
-defm : SBWriteResPair<WriteShift, [SBPort05], 1, [1], 1, 6, 2>;
-defm : SBWriteResPair<WriteShiftCL, [SBPort05], 3, [3], 3, 6, 2>;
-defm : SBWriteResPair<WriteRotate, [SBPort05], 2, [2], 2, 6, 2>;
-defm : SBWriteResPair<WriteRotateCL, [SBPort05], 3, [3], 3, 6, 2>;
+defm : SBWriteResPair<WriteShift, [SBPort05], 1>;
+defm : SBWriteResPair<WriteShiftCL, [SBPort05], 3, [3], 3>;
+defm : SBWriteResPair<WriteRotate, [SBPort05], 2, [2], 2>;
+defm : SBWriteResPair<WriteRotateCL, [SBPort05], 3, [3], 3>;
defm : SBWriteResPair<WriteJump, [SBPort5], 1>;
defm : SBWriteResPair<WriteCRC32, [SBPort1], 3, [1], 1, 5>;
}
def: InstRW<[SBWriteResGroup69], (instregex "BTC(16|32|64)mi8",
"BTR(16|32|64)mi8",
- "BTS(16|32|64)mi8")>;
+ "BTS(16|32|64)mi8",
+ "SAR(8|16|32|64)m(1|i)",
+ "SHL(8|16|32|64)m(1|i)",
+ "SHR(8|16|32|64)m(1|i)")>;
def SBWriteResGroup77 : SchedWriteRes<[SBPort0,SBPort1,SBPort23]> {
let Latency = 8;
}
def: InstRW<[SBWriteResGroup84], (instrs FLDCW16m)>;
+def SBWriteResGroup85 : SchedWriteRes<[SBPort4,SBPort23,SBPort05]> {
+ let Latency = 8;
+ let NumMicroOps = 5;
+ let ResourceCycles = [1,2,2];
+}
+def: InstRW<[SBWriteResGroup85], (instregex "ROL(8|16|32|64)m(1|i)",
+ "ROR(8|16|32|64)m(1|i)")>;
+
def SBWriteResGroup86 : SchedWriteRes<[SBPort4,SBPort23,SBPort015]> {
let Latency = 8;
let NumMicroOps = 5;
def: InstRW<[SBWriteResGroup97], (instregex "IST_F(16|32)m",
"IST_FP(16|32|64)m")>;
+def SBWriteResGroup97_2 : SchedWriteRes<[SBPort4,SBPort23,SBPort05]> {
+ let Latency = 9;
+ let NumMicroOps = 6;
+ let ResourceCycles = [1,2,3];
+}
+def: InstRW<[SBWriteResGroup97_2], (instregex "ROL(8|16|32|64)mCL",
+ "ROR(8|16|32|64)mCL",
+ "SAR(8|16|32|64)mCL",
+ "SHL(8|16|32|64)mCL",
+ "SHR(8|16|32|64)mCL")>;
+
def SBWriteResGroup98 : SchedWriteRes<[SBPort4,SBPort23,SBPort015]> {
let Latency = 9;
let NumMicroOps = 6;
multiclass SKLWriteResPair<X86FoldableSchedWrite SchedRW,
list<ProcResourceKind> ExePorts,
int Lat, list<int> Res = [1], int UOps = 1,
- int LoadLat = 5, int LoadUOps = 1> {
+ int LoadLat = 5> {
// Register variant is using a single cycle on ExePort.
def : WriteRes<SchedRW, ExePorts> {
let Latency = Lat;
def : WriteRes<SchedRW.Folded, !listconcat([SKLPort23], ExePorts)> {
let Latency = !add(Lat, LoadLat);
let ResourceCycles = !listconcat([1], Res);
- let NumMicroOps = !add(UOps, LoadUOps);
+ let NumMicroOps = !add(UOps, 1);
}
}
defm : SKLWriteResPair<WritePOPCNT, [SKLPort1], 3>;
// Integer shifts and rotates.
-defm : SKLWriteResPair<WriteShift, [SKLPort06], 1>;
-defm : SKLWriteResPair<WriteShiftCL, [SKLPort06], 3, [3], 3, 5, 2>;
-defm : SKLWriteResPair<WriteRotate, [SKLPort06], 2, [2], 2>;
-defm : SKLWriteResPair<WriteRotateCL, [SKLPort06], 3, [3], 3, 5, 2>;
+defm : SKLWriteResPair<WriteShift, [SKLPort06], 1>;
+defm : SKLWriteResPair<WriteShiftCL, [SKLPort06], 3, [3], 3>;
+defm : SKLWriteResPair<WriteRotate, [SKLPort06], 2, [2], 2>;
+defm : SKLWriteResPair<WriteRotateCL, [SKLPort06], 3, [3], 3>;
// SHLD/SHRD.
defm : X86WriteRes<WriteSHDrri, [SKLPort1], 3, [1], 1>;
def: InstRW<[SKLWriteResGroup116], (instregex "RCL(8|16|32|64)m(1|i)",
"RCR(8|16|32|64)m(1|i)")>;
+def SKLWriteResGroup117 : SchedWriteRes<[SKLPort4,SKLPort23,SKLPort237,SKLPort06]> {
+ let Latency = 8;
+ let NumMicroOps = 6;
+ let ResourceCycles = [1,1,1,3];
+}
+def: InstRW<[SKLWriteResGroup117], (instregex "ROL(8|16|32|64)mCL",
+ "ROR(8|16|32|64)mCL",
+ "SAR(8|16|32|64)mCL",
+ "SHL(8|16|32|64)mCL",
+ "SHR(8|16|32|64)mCL")>;
+
def SKLWriteResGroup119 : SchedWriteRes<[SKLPort4,SKLPort23,SKLPort237,SKLPort06,SKLPort0156]> {
let Latency = 8;
let NumMicroOps = 6;
multiclass SKXWriteResPair<X86FoldableSchedWrite SchedRW,
list<ProcResourceKind> ExePorts,
int Lat, list<int> Res = [1], int UOps = 1,
- int LoadLat = 5, int LoadUOps = 1> {
+ int LoadLat = 5> {
// Register variant is using a single cycle on ExePort.
def : WriteRes<SchedRW, ExePorts> {
let Latency = Lat;
def : WriteRes<SchedRW.Folded, !listconcat([SKXPort23], ExePorts)> {
let Latency = !add(Lat, LoadLat);
let ResourceCycles = !listconcat([1], Res);
- let NumMicroOps = !add(UOps, LoadUOps);
+ let NumMicroOps = !add(UOps, 1);
}
}
def : WriteRes<WriteBitTest, [SKXPort06]>; //
// Integer shifts and rotates.
-defm : SKXWriteResPair<WriteShift, [SKXPort06], 1>;
-defm : SKXWriteResPair<WriteShiftCL, [SKXPort06], 3, [3], 3, 5, 2>;
-defm : SKXWriteResPair<WriteRotate, [SKXPort06], 2, [2], 2>;
-defm : SKXWriteResPair<WriteRotateCL, [SKXPort06], 3, [3], 3, 5, 2>;
+defm : SKXWriteResPair<WriteShift, [SKXPort06], 1>;
+defm : SKXWriteResPair<WriteShiftCL, [SKXPort06], 3, [3], 3>;
+defm : SKXWriteResPair<WriteRotate, [SKXPort06], 2, [2], 2>;
+defm : SKXWriteResPair<WriteRotateCL, [SKXPort06], 3, [3], 3>;
// SHLD/SHRD.
defm : X86WriteRes<WriteSHDrri, [SKXPort1], 3, [1], 1>;
def: InstRW<[SKXWriteResGroup127], (instregex "RCL(8|16|32|64)m(1|i)",
"RCR(8|16|32|64)m(1|i)")>;
+def SKXWriteResGroup128 : SchedWriteRes<[SKXPort4,SKXPort23,SKXPort237,SKXPort06]> {
+ let Latency = 8;
+ let NumMicroOps = 6;
+ let ResourceCycles = [1,1,1,3];
+}
+def: InstRW<[SKXWriteResGroup128], (instregex "ROL(8|16|32|64)mCL",
+ "ROR(8|16|32|64)mCL",
+ "SAR(8|16|32|64)mCL",
+ "SHL(8|16|32|64)mCL",
+ "SHR(8|16|32|64)mCL")>;
+
def SKXWriteResGroup130 : SchedWriteRes<[SKXPort4,SKXPort23,SKXPort237,SKXPort06,SKXPort0156]> {
let Latency = 8;
let NumMicroOps = 6;
; GENERIC-LABEL: test_rorx_i32:
; GENERIC: # %bb.0:
; GENERIC-NEXT: rorxl $5, %edi, %ecx # sched: [1:0.50]
-; GENERIC-NEXT: rorxl $5, (%rdx), %eax # sched: [7:0.50]
+; GENERIC-NEXT: rorxl $5, (%rdx), %eax # sched: [6:0.50]
; GENERIC-NEXT: addl %ecx, %eax # sched: [1:0.33]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; GENERIC-LABEL: test_rorx_i64:
; GENERIC: # %bb.0:
; GENERIC-NEXT: rorxq $5, %rdi, %rcx # sched: [1:0.50]
-; GENERIC-NEXT: rorxq $5, (%rdx), %rax # sched: [7:0.50]
+; GENERIC-NEXT: rorxq $5, (%rdx), %rax # sched: [6:0.50]
; GENERIC-NEXT: addq %rcx, %rax # sched: [1:0.33]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; GENERIC-LABEL: test_sarx_i32:
; GENERIC: # %bb.0:
; GENERIC-NEXT: sarxl %esi, %edi, %ecx # sched: [1:0.50]
-; GENERIC-NEXT: sarxl %esi, (%rdx), %eax # sched: [7:0.50]
+; GENERIC-NEXT: sarxl %esi, (%rdx), %eax # sched: [6:0.50]
; GENERIC-NEXT: addl %ecx, %eax # sched: [1:0.33]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; GENERIC-LABEL: test_sarx_i64:
; GENERIC: # %bb.0:
; GENERIC-NEXT: sarxq %rsi, %rdi, %rcx # sched: [1:0.50]
-; GENERIC-NEXT: sarxq %rsi, (%rdx), %rax # sched: [7:0.50]
+; GENERIC-NEXT: sarxq %rsi, (%rdx), %rax # sched: [6:0.50]
; GENERIC-NEXT: addq %rcx, %rax # sched: [1:0.33]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; GENERIC-LABEL: test_shlx_i32:
; GENERIC: # %bb.0:
; GENERIC-NEXT: shlxl %esi, %edi, %ecx # sched: [1:0.50]
-; GENERIC-NEXT: shlxl %esi, (%rdx), %eax # sched: [7:0.50]
+; GENERIC-NEXT: shlxl %esi, (%rdx), %eax # sched: [6:0.50]
; GENERIC-NEXT: addl %ecx, %eax # sched: [1:0.33]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; GENERIC-LABEL: test_shlx_i64:
; GENERIC: # %bb.0:
; GENERIC-NEXT: shlxq %rsi, %rdi, %rcx # sched: [1:0.50]
-; GENERIC-NEXT: shlxq %rsi, (%rdx), %rax # sched: [7:0.50]
+; GENERIC-NEXT: shlxq %rsi, (%rdx), %rax # sched: [6:0.50]
; GENERIC-NEXT: addq %rcx, %rax # sched: [1:0.33]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; GENERIC-LABEL: test_shrx_i32:
; GENERIC: # %bb.0:
; GENERIC-NEXT: shrxl %esi, %edi, %ecx # sched: [1:0.50]
-; GENERIC-NEXT: shrxl %esi, (%rdx), %eax # sched: [7:0.50]
+; GENERIC-NEXT: shrxl %esi, (%rdx), %eax # sched: [6:0.50]
; GENERIC-NEXT: addl %ecx, %eax # sched: [1:0.33]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; GENERIC-LABEL: test_shrx_i64:
; GENERIC: # %bb.0:
; GENERIC-NEXT: shrxq %rsi, %rdi, %rcx # sched: [1:0.50]
-; GENERIC-NEXT: shrxq %rsi, (%rdx), %rax # sched: [7:0.50]
+; GENERIC-NEXT: shrxq %rsi, (%rdx), %rax # sched: [6:0.50]
; GENERIC-NEXT: addq %rcx, %rax # sched: [1:0.33]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
# CHECK-NEXT: 1 1 0.33 pextq %rax, %rbx, %rcx
# CHECK-NEXT: 2 6 0.50 * pextq (%rax), %rbx, %rcx
# CHECK-NEXT: 1 1 0.50 rorxl $1, %eax, %ecx
-# CHECK-NEXT: 3 7 0.50 * rorxl $1, (%rax), %ecx
+# CHECK-NEXT: 2 6 0.50 * rorxl $1, (%rax), %ecx
# CHECK-NEXT: 1 1 0.50 rorxq $1, %rax, %rcx
-# CHECK-NEXT: 3 7 0.50 * rorxq $1, (%rax), %rcx
+# CHECK-NEXT: 2 6 0.50 * rorxq $1, (%rax), %rcx
# CHECK-NEXT: 1 1 0.50 sarxl %eax, %ebx, %ecx
-# CHECK-NEXT: 3 7 0.50 * sarxl %eax, (%rbx), %ecx
+# CHECK-NEXT: 2 6 0.50 * sarxl %eax, (%rbx), %ecx
# CHECK-NEXT: 1 1 0.50 sarxq %rax, %rbx, %rcx
-# CHECK-NEXT: 3 7 0.50 * sarxq %rax, (%rbx), %rcx
+# CHECK-NEXT: 2 6 0.50 * sarxq %rax, (%rbx), %rcx
# CHECK-NEXT: 1 1 0.50 shlxl %eax, %ebx, %ecx
-# CHECK-NEXT: 3 7 0.50 * shlxl %eax, (%rbx), %ecx
+# CHECK-NEXT: 2 6 0.50 * shlxl %eax, (%rbx), %ecx
# CHECK-NEXT: 1 1 0.50 shlxq %rax, %rbx, %rcx
-# CHECK-NEXT: 3 7 0.50 * shlxq %rax, (%rbx), %rcx
+# CHECK-NEXT: 2 6 0.50 * shlxq %rax, (%rbx), %rcx
# CHECK-NEXT: 1 1 0.50 shrxl %eax, %ebx, %ecx
-# CHECK-NEXT: 3 7 0.50 * shrxl %eax, (%rbx), %ecx
+# CHECK-NEXT: 2 6 0.50 * shrxl %eax, (%rbx), %ecx
# CHECK-NEXT: 1 1 0.50 shrxq %rax, %rbx, %rcx
-# CHECK-NEXT: 3 7 0.50 * shrxq %rax, (%rbx), %rcx
+# CHECK-NEXT: 2 6 0.50 * shrxq %rax, (%rbx), %rcx
# CHECK: Resources:
# CHECK-NEXT: [0] - SBDivider