let NumMicroOps = 2;
let ResourceCycles = [1,1];
}
-def: InstRW<[BWWriteResGroup42], (instrs IMUL32r, IMUL64r, MUL32r, MUL64r)>;
-def: InstRW<[BWWriteResGroup42], (instrs MULX64rr)>;
+def: InstRW<[BWWriteResGroup42], (instrs IMUL64r, MUL64r, MULX64rr)>;
def: InstRW<[BWWriteResGroup42], (instregex "CVTDQ2PDrr",
"CVTPD2DQrr",
"CVTPD2PSrr",
def: InstRW<[BWWriteResGroup51], (instregex "STR(16|32|64)r")>;
def BWWriteResGroup52 : SchedWriteRes<[BWPort1,BWPort06,BWPort0156]> {
- let Latency = 5;
+ let Latency = 4;
let NumMicroOps = 3;
let ResourceCycles = [1,1,1];
}
-def: InstRW<[BWWriteResGroup52], (instrs MULX32rr)>;
+def: InstRW<[BWWriteResGroup52], (instrs IMUL32r, MUL32r, MULX32rr)>;
def BWWriteResGroup53 : SchedWriteRes<[BWPort0,BWPort4,BWPort237,BWPort15]> {
let Latency = 5;
let NumMicroOps = 2;
let ResourceCycles = [1,1];
}
-def: InstRW<[BWWriteResGroup91], (instrs IMUL64m, MUL64m)>;
def: InstRW<[BWWriteResGroup91], (instrs IMUL32rm, IMUL32rmi, IMUL32rmi8, IMUL64rm, IMUL64rmi8, IMUL64rmi32)>;
def: InstRW<[BWWriteResGroup91], (instrs IMUL8m, MUL8m)>;
def: InstRW<[BWWriteResGroup91], (instregex "ADDPDrm",
}
def: InstRW<[BWWriteResGroup91_16_2], (instrs IMUL16m, MUL16m)>;
-def BWWriteResGroup91_32 : SchedWriteRes<[BWPort1, BWPort0156, BWPort23]> {
- let Latency = 8;
- let NumMicroOps = 3;
- let ResourceCycles = [1,1,1];
-}
-def: InstRW<[BWWriteResGroup91_32], (instrs IMUL32m, MUL32m)>;
-
def BWWriteResGroup92 : SchedWriteRes<[BWPort5,BWPort23]> {
let Latency = 8;
let NumMicroOps = 2;
let NumMicroOps = 3;
let ResourceCycles = [1,1,1];
}
-def: InstRW<[BWWriteResGroup107], (instrs MULX64rm)>;
+def: InstRW<[BWWriteResGroup107], (instrs IMUL64m, MUL64m, MULX64rm)>;
def: InstRW<[BWWriteResGroup107], (instregex "CVTDQ2PDrm",
"CVTPD2DQrm",
"CVTPD2PSrm",
def: InstRW<[BWWriteResGroup120], (instregex "CVTTSS2SI64rm")>;
def BWWriteResGroup121 : SchedWriteRes<[BWPort1,BWPort23,BWPort06,BWPort0156]> {
- let Latency = 10;
+ let Latency = 9;
let NumMicroOps = 4;
let ResourceCycles = [1,1,1,1];
}
-def: InstRW<[BWWriteResGroup121], (instrs MULX32rm)>;
+def: InstRW<[BWWriteResGroup121], (instrs IMUL32m, MUL32m, MULX32rm)>;
def BWWriteResGroup122 : SchedWriteRes<[BWPort0]> {
let Latency = 11;
let NumMicroOps = 2;
let ResourceCycles = [1,1];
}
-def: InstRW<[HWWriteResGroup12], (instrs MUL8m, MUL16m, MUL32m, MUL64m,
- IMUL8m, IMUL16m, IMUL32m, IMUL64m,
+def: InstRW<[HWWriteResGroup12], (instrs MUL8m, MUL16m,
+ IMUL8m, IMUL16m,
IMUL16rm, IMUL16rmi, IMUL16rmi8, IMUL32rm, IMUL32rmi, IMUL32rmi8, IMUL64rm, IMUL64rmi32, IMUL64rmi8)>;
def: InstRW<[HWWriteResGroup12], (instregex "BSF(16|32|64)rm",
"BSR(16|32|64)rm",
}
def: InstRW<[HWWriteResGroup74_16], (instrs IMUL16r, MUL16r)>;
-def HWWriteResGroup74_32 : SchedWriteRes<[HWPort1,HWPort0156]> {
- let Latency = 4;
- let NumMicroOps = 3;
-}
-def: InstRW<[HWWriteResGroup74_32], (instrs IMUL32r, MUL32r)>;
-
def HWWriteResGroup75 : SchedWriteRes<[HWPort1,HWPort23]> {
let Latency = 11;
let NumMicroOps = 3;
let NumMicroOps = 3;
let ResourceCycles = [1,1,1];
}
-def: InstRW<[HWWriteResGroup79], (instrs MULX64rm)>;
+def: InstRW<[HWWriteResGroup79], (instrs IMUL64m, MUL64m, MULX64rm)>;
def HWWriteResGroup80 : SchedWriteRes<[HWPort5,HWPort23,HWPort015]> {
let Latency = 9;
def: InstRW<[HWWriteResGroup94], (instregex "STR(16|32|64)r")>;
def HWWriteResGroup95 : SchedWriteRes<[HWPort1,HWPort06,HWPort0156]> {
- let Latency = 5;
+ let Latency = 4;
let NumMicroOps = 3;
let ResourceCycles = [1,1,1];
}
-def: InstRW<[HWWriteResGroup95], (instrs MULX32rr)>;
+def: InstRW<[HWWriteResGroup95], (instrs IMUL32r, MUL32r, MULX32rr)>;
def HWWriteResGroup96 : SchedWriteRes<[HWPort1,HWPort5,HWPort23]> {
let Latency = 11;
def: InstRW<[HWWriteResGroup97], (instregex "CVTTSS2SI64rm")>;
def HWWriteResGroup98 : SchedWriteRes<[HWPort1,HWPort23,HWPort06,HWPort0156]> {
- let Latency = 10;
+ let Latency = 9;
let NumMicroOps = 4;
let ResourceCycles = [1,1,1,1];
}
-def: InstRW<[HWWriteResGroup98], (instrs MULX32rm)>;
+def: InstRW<[HWWriteResGroup98], (instrs IMUL32m, MUL32m, MULX32rm)>;
def HWWriteResGroup99 : SchedWriteRes<[HWPort6,HWPort0156]> {
let Latency = 5;
"UCOM_FIPr",
"UCOM_FIr")>;
+// FIXME: this is probably incorrect.
def SBWriteResGroup27 : SchedWriteRes<[SBPort0,SBPort1]> {
let Latency = 4;
let NumMicroOps = 2;
"CVTTSD2SIrm",
"CVTTSS2SI64rm",
"CVTTSS2SIrm")>;
+// FIXME this is probably incorrect.
def: InstRW<[SBWriteResGroup93], (instrs MUL16m, MUL32m, MUL64m)>;
def SBWriteResGroup94 : SchedWriteRes<[SBPort0,SBPort5,SBPort23]> {
def: InstRW<[SKLWriteResGroup61], (instregex "STR(16|32|64)r")>;
def SKLWriteResGroup62 : SchedWriteRes<[SKLPort1,SKLPort06,SKLPort0156]> {
- let Latency = 5;
+ let Latency = 4;
let NumMicroOps = 3;
let ResourceCycles = [1,1,1];
}
-def: InstRW<[SKLWriteResGroup62], (instrs IMUL32r, MUL32r)>;
-def: InstRW<[SKLWriteResGroup62], (instrs MULX32rr)>;
+def: InstRW<[SKLWriteResGroup62], (instrs IMUL32r, MUL32r, MULX32rr)>;
def SKLWriteResGroup63 : SchedWriteRes<[SKLPort06,SKLPort0156]> {
let Latency = 5;
let NumMicroOps = 2;
let ResourceCycles = [1,1];
}
-def: InstRW<[SKLWriteResGroup107], (instrs IMUL64m, MUL64m)>;
-def: InstRW<[SKLWriteResGroup107], (instrs IMUL32rm, IMUL32rmi, IMUL32rmi8, IMUL64rm, IMUL64rmi32, IMUL64rmi8)>;
+def: InstRW<[SKLWriteResGroup107], (instrs IMUL32rmi, IMUL32rmi8, IMUL64rm, IMUL64rmi32, IMUL64rmi8)>;
def: InstRW<[SKLWriteResGroup107], (instrs IMUL8m, MUL8m)>;
def: InstRW<[SKLWriteResGroup107], (instregex "BSF(16|32|64)rm",
"BSR(16|32|64)rm",
}
def: InstRW<[SKLWriteResGroup107_16_2], (instrs IMUL16m, MUL16m)>;
-def SKLWriteResGroup107_32 : SchedWriteRes<[SKLPort1, SKLPort0156, SKLPort23]> {
- let Latency = 8;
- let NumMicroOps = 3;
- let ResourceCycles = [1,1,1];
-}
-def: InstRW<[SKLWriteResGroup107_32], (instrs IMUL32m, MUL32m)>;
-
def SKLWriteResGroup108 : SchedWriteRes<[SKLPort5,SKLPort23]> {
let Latency = 8;
let NumMicroOps = 2;
let NumMicroOps = 3;
let ResourceCycles = [1,1,1];
}
-def: InstRW<[SKLWriteResGroup127], (instrs MULX64rm)>;
+def: InstRW<[SKLWriteResGroup127], (instrs IMUL64m, MUL64m, MULX64rm)>;
def SKLWriteResGroup128 : SchedWriteRes<[SKLPort5,SKLPort01,SKLPort23]> {
let Latency = 9;
"VPHSUBWYrm")>;
def SKLWriteResGroup142 : SchedWriteRes<[SKLPort1,SKLPort23,SKLPort06,SKLPort0156]> {
- let Latency = 10;
+ let Latency = 9;
let NumMicroOps = 4;
let ResourceCycles = [1,1,1,1];
}
-def: InstRW<[SKLWriteResGroup142], (instrs MULX32rm)>;
+def: InstRW<[SKLWriteResGroup142], (instrs IMUL32rm, MUL32m, MULX32rm)>;
def SKLWriteResGroup143 : SchedWriteRes<[SKLPort4,SKLPort6,SKLPort23,SKLPort237,SKLPort06,SKLPort0156]> {
let Latency = 10;
"VPMOVWBZ256rr(b?)(k?)(z?)",
"VPMOVWBZrr(b?)(k?)(z?)")>;
-// FIXME: IMUL32r/MUL32r should be uops lik SkylakeClient.
def SKXWriteResGroup52 : SchedWriteRes<[SKXPort1,SKXPort5]> {
let Latency = 4;
let NumMicroOps = 2;
let ResourceCycles = [1,1];
}
-def: InstRW<[SKXWriteResGroup52], (instrs IMUL32r, IMUL64r)>;
-def: InstRW<[SKXWriteResGroup52], (instrs MUL32r, MUL64r)>;
-def: InstRW<[SKXWriteResGroup52], (instrs MULX64rr)>;
+def: InstRW<[SKXWriteResGroup52], (instrs IMUL64r, MUL64r, MULX64rr)>;
def SKXWriteResGroup52_16 : SchedWriteRes<[SKXPort1,SKXPort06,SKXPort0156]> {
let Latency = 4;
def: InstRW<[SKXWriteResGroup63], (instregex "STR(16|32|64)r")>;
def SKXWriteResGroup64 : SchedWriteRes<[SKXPort1,SKXPort06,SKXPort0156]> {
- let Latency = 5;
+ let Latency = 4;
let NumMicroOps = 3;
let ResourceCycles = [1,1,1];
}
-def: InstRW<[SKXWriteResGroup64], (instrs MULX32rr)>;
+def: InstRW<[SKXWriteResGroup64], (instrs IMUL32r, MUL32r, MULX32rr)>;
def SKXWriteResGroup65 : SchedWriteRes<[SKXPort4,SKXPort237,SKXPort015]> {
let Latency = 5;
let NumMicroOps = 2;
let ResourceCycles = [1,1];
}
-def: InstRW<[SKXWriteResGroup118], (instrs IMUL64m, MUL64m)>;
def: InstRW<[SKXWriteResGroup118], (instrs IMUL32rm, IMUL32rmi, IMUL32rmi8, IMUL64rm, IMUL64rmi32, IMUL64rmi8)>;
def: InstRW<[SKXWriteResGroup118], (instrs IMUL8m, MUL8m)>;
def: InstRW<[SKXWriteResGroup118], (instregex "BSF(16|32|64)rm",
}
def: InstRW<[SKXWriteResGroup118_16_2], (instrs IMUL16m, MUL16m)>;
-def SKXWriteResGroup118_32 : SchedWriteRes<[SKXPort1, SKXPort0156, SKXPort23]> {
- let Latency = 8;
- let NumMicroOps = 3;
- let ResourceCycles = [1,1,1];
-}
-def: InstRW<[SKXWriteResGroup118_32], (instrs IMUL32m, MUL32m)>;
-
def SKXWriteResGroup119 : SchedWriteRes<[SKXPort5,SKXPort23]> {
let Latency = 8;
let NumMicroOps = 2;
let NumMicroOps = 3;
let ResourceCycles = [1,1,1];
}
-def: InstRW<[SKXWriteResGroup142], (instrs MULX64rm)>;
+def: InstRW<[SKXWriteResGroup142], (instrs IMUL64m, MUL64m, MULX64rm)>;
def SKXWriteResGroup143 : SchedWriteRes<[SKXPort5,SKXPort01,SKXPort23]> {
let Latency = 9;
"VPHSUBWYrm")>;
def SKXWriteResGroup156 : SchedWriteRes<[SKXPort1,SKXPort23,SKXPort06,SKXPort0156]> {
- let Latency = 10;
+ let Latency = 9;
let NumMicroOps = 4;
let ResourceCycles = [1,1,1,1];
}
-def: InstRW<[SKXWriteResGroup156], (instrs MULX32rm)>;
+def: InstRW<[SKXWriteResGroup156], (instrs IMUL32m, MUL32m, MULX32rm)>;
def SKXWriteResGroup157 : SchedWriteRes<[SKXPort4,SKXPort6,SKXPort23,SKXPort237,SKXPort06,SKXPort0156]> {
let Latency = 10;
; HASWELL-LABEL: test_mulx_i32:
; HASWELL: # %bb.0:
; HASWELL-NEXT: #APP
-; HASWELL-NEXT: mulxl %esi, %esi, %edi # sched: [5:1.00]
-; HASWELL-NEXT: mulxl (%rdx), %esi, %edi # sched: [10:1.00]
+; HASWELL-NEXT: mulxl %esi, %esi, %edi # sched: [4:1.00]
+; HASWELL-NEXT: mulxl (%rdx), %esi, %edi # sched: [9:1.00]
; HASWELL-NEXT: #NO_APP
; HASWELL-NEXT: retq # sched: [7:1.00]
;
; BROADWELL-LABEL: test_mulx_i32:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: #APP
-; BROADWELL-NEXT: mulxl %esi, %esi, %edi # sched: [5:1.00]
-; BROADWELL-NEXT: mulxl (%rdx), %esi, %edi # sched: [10:1.00]
+; BROADWELL-NEXT: mulxl %esi, %esi, %edi # sched: [4:1.00]
+; BROADWELL-NEXT: mulxl (%rdx), %esi, %edi # sched: [9:1.00]
; BROADWELL-NEXT: #NO_APP
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
; SKYLAKE-LABEL: test_mulx_i32:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: #APP
-; SKYLAKE-NEXT: mulxl %esi, %esi, %edi # sched: [5:1.00]
-; SKYLAKE-NEXT: mulxl (%rdx), %esi, %edi # sched: [10:1.00]
+; SKYLAKE-NEXT: mulxl %esi, %esi, %edi # sched: [4:1.00]
+; SKYLAKE-NEXT: mulxl (%rdx), %esi, %edi # sched: [9:1.00]
; SKYLAKE-NEXT: #NO_APP
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
; KNL-LABEL: test_mulx_i32:
; KNL: # %bb.0:
; KNL-NEXT: #APP
-; KNL-NEXT: mulxl %esi, %esi, %edi # sched: [5:1.00]
-; KNL-NEXT: mulxl (%rdx), %esi, %edi # sched: [10:1.00]
+; KNL-NEXT: mulxl %esi, %esi, %edi # sched: [4:1.00]
+; KNL-NEXT: mulxl (%rdx), %esi, %edi # sched: [9:1.00]
; KNL-NEXT: #NO_APP
; KNL-NEXT: retq # sched: [7:1.00]
;
; HASWELL: # %bb.0:
; HASWELL-NEXT: #APP
; HASWELL-NEXT: imull %edi # sched: [4:1.00]
-; HASWELL-NEXT: imull (%rsi) # sched: [8:1.00]
+; HASWELL-NEXT: imull (%rsi) # sched: [9:1.00]
; HASWELL-NEXT: imull %edi, %edi # sched: [3:1.00]
; HASWELL-NEXT: imull (%rsi), %edi # sched: [8:1.00]
; HASWELL-NEXT: imull $665536, %edi, %edi # imm = 0xA27C0
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: #APP
; BROADWELL-NEXT: imull %edi # sched: [4:1.00]
-; BROADWELL-NEXT: imull (%rsi) # sched: [8:1.00]
+; BROADWELL-NEXT: imull (%rsi) # sched: [9:1.00]
; BROADWELL-NEXT: imull %edi, %edi # sched: [3:1.00]
; BROADWELL-NEXT: imull (%rsi), %edi # sched: [8:1.00]
; BROADWELL-NEXT: imull $665536, %edi, %edi # imm = 0xA27C0
; SKYLAKE-LABEL: test_imul_32:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: #APP
-; SKYLAKE-NEXT: imull %edi # sched: [5:1.00]
+; SKYLAKE-NEXT: imull %edi # sched: [4:1.00]
; SKYLAKE-NEXT: imull (%rsi) # sched: [8:1.00]
; SKYLAKE-NEXT: imull %edi, %edi # sched: [3:1.00]
-; SKYLAKE-NEXT: imull (%rsi), %edi # sched: [8:1.00]
+; SKYLAKE-NEXT: imull (%rsi), %edi # sched: [9:1.00]
; SKYLAKE-NEXT: imull $665536, %edi, %edi # imm = 0xA27C0
; SKYLAKE-NEXT: # sched: [3:1.00]
; SKYLAKE-NEXT: imull $665536, (%rsi), %edi # imm = 0xA27C0
; SKX: # %bb.0:
; SKX-NEXT: #APP
; SKX-NEXT: imull %edi # sched: [4:1.00]
-; SKX-NEXT: imull (%rsi) # sched: [8:1.00]
+; SKX-NEXT: imull (%rsi) # sched: [9:1.00]
; SKX-NEXT: imull %edi, %edi # sched: [3:1.00]
; SKX-NEXT: imull (%rsi), %edi # sched: [8:1.00]
; SKX-NEXT: imull $665536, %edi, %edi # imm = 0xA27C0
; HASWELL: # %bb.0:
; HASWELL-NEXT: #APP
; HASWELL-NEXT: imulq %rdi # sched: [4:1.00]
-; HASWELL-NEXT: imulq (%rsi) # sched: [8:1.00]
+; HASWELL-NEXT: imulq (%rsi) # sched: [9:1.00]
; HASWELL-NEXT: imulq %rdi, %rdi # sched: [3:1.00]
; HASWELL-NEXT: imulq (%rsi), %rdi # sched: [8:1.00]
; HASWELL-NEXT: imulq $665536, %rdi, %rdi # imm = 0xA27C0
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: #APP
; BROADWELL-NEXT: imulq %rdi # sched: [4:1.00]
-; BROADWELL-NEXT: imulq (%rsi) # sched: [8:1.00]
+; BROADWELL-NEXT: imulq (%rsi) # sched: [9:1.00]
; BROADWELL-NEXT: imulq %rdi, %rdi # sched: [3:1.00]
; BROADWELL-NEXT: imulq (%rsi), %rdi # sched: [8:1.00]
; BROADWELL-NEXT: imulq $665536, %rdi, %rdi # imm = 0xA27C0
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: #APP
; SKYLAKE-NEXT: imulq %rdi # sched: [4:1.00]
-; SKYLAKE-NEXT: imulq (%rsi) # sched: [8:1.00]
+; SKYLAKE-NEXT: imulq (%rsi) # sched: [9:1.00]
; SKYLAKE-NEXT: imulq %rdi, %rdi # sched: [3:1.00]
; SKYLAKE-NEXT: imulq (%rsi), %rdi # sched: [8:1.00]
; SKYLAKE-NEXT: imulq $665536, %rdi, %rdi # imm = 0xA27C0
; SKX: # %bb.0:
; SKX-NEXT: #APP
; SKX-NEXT: imulq %rdi # sched: [4:1.00]
-; SKX-NEXT: imulq (%rsi) # sched: [8:1.00]
+; SKX-NEXT: imulq (%rsi) # sched: [9:1.00]
; SKX-NEXT: imulq %rdi, %rdi # sched: [3:1.00]
; SKX-NEXT: imulq (%rsi), %rdi # sched: [8:1.00]
; SKX-NEXT: imulq $665536, %rdi, %rdi # imm = 0xA27C0
; HASWELL-NEXT: mulw %si # sched: [4:1.00]
; HASWELL-NEXT: mulw (%r9) # sched: [8:1.00]
; HASWELL-NEXT: mull %edx # sched: [4:1.00]
-; HASWELL-NEXT: mull (%rax) # sched: [8:1.00]
+; HASWELL-NEXT: mull (%rax) # sched: [9:1.00]
; HASWELL-NEXT: mulq %rcx # sched: [4:1.00]
-; HASWELL-NEXT: mulq (%r10) # sched: [8:1.00]
+; HASWELL-NEXT: mulq (%r10) # sched: [9:1.00]
; HASWELL-NEXT: #NO_APP
; HASWELL-NEXT: retq # sched: [7:1.00]
;
; BROADWELL-NEXT: mulw %si # sched: [4:1.00]
; BROADWELL-NEXT: mulw (%r9) # sched: [8:1.00]
; BROADWELL-NEXT: mull %edx # sched: [4:1.00]
-; BROADWELL-NEXT: mull (%rax) # sched: [8:1.00]
+; BROADWELL-NEXT: mull (%rax) # sched: [9:1.00]
; BROADWELL-NEXT: mulq %rcx # sched: [4:1.00]
-; BROADWELL-NEXT: mulq (%r10) # sched: [8:1.00]
+; BROADWELL-NEXT: mulq (%r10) # sched: [9:1.00]
; BROADWELL-NEXT: #NO_APP
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
; SKYLAKE-NEXT: mulb (%r8) # sched: [8:1.00]
; SKYLAKE-NEXT: mulw %si # sched: [4:1.00]
; SKYLAKE-NEXT: mulw (%r9) # sched: [8:1.00]
-; SKYLAKE-NEXT: mull %edx # sched: [5:1.00]
-; SKYLAKE-NEXT: mull (%rax) # sched: [8:1.00]
+; SKYLAKE-NEXT: mull %edx # sched: [4:1.00]
+; SKYLAKE-NEXT: mull (%rax) # sched: [9:1.00]
; SKYLAKE-NEXT: mulq %rcx # sched: [4:1.00]
-; SKYLAKE-NEXT: mulq (%r10) # sched: [8:1.00]
+; SKYLAKE-NEXT: mulq (%r10) # sched: [9:1.00]
; SKYLAKE-NEXT: #NO_APP
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
; SKX-NEXT: mulw %si # sched: [4:1.00]
; SKX-NEXT: mulw (%r9) # sched: [8:1.00]
; SKX-NEXT: mull %edx # sched: [4:1.00]
-; SKX-NEXT: mull (%rax) # sched: [8:1.00]
+; SKX-NEXT: mull (%rax) # sched: [9:1.00]
; SKX-NEXT: mulq %rcx # sched: [4:1.00]
-; SKX-NEXT: mulq (%r10) # sched: [8:1.00]
+; SKX-NEXT: mulq (%r10) # sched: [9:1.00]
; SKX-NEXT: #NO_APP
; SKX-NEXT: retq # sched: [7:1.00]
;