This fixes a couple of BtVer2 missing instructions that weren't been handled in the override.
NOTE: There are still a lot of overrides that still need cleaning up!
llvm-svn: 331770
def MUL64r : RI<0xF7, MRM4r, (outs), (ins GR64:$src),
"mul{q}\t$src",
[/*(set RAX, RDX, EFLAGS, (X86umul_flag RAX, GR64:$src))*/]>,
- Sched<[WriteIMul]>;
+ Sched<[WriteIMul64]>;
// AL,AH = AL*[mem8]
let Defs = [AL,EFLAGS,AX], Uses = [AL] in
def MUL8m : I<0xF6, MRM4m, (outs), (ins i8mem :$src),
// This probably ought to be moved to a def : Pat<> if the
// syntax can be accepted.
[(set AL, (mul AL, (loadi8 addr:$src))),
- (implicit EFLAGS)]>, SchedLoadReg<WriteIMulLd>;
+ (implicit EFLAGS)]>, SchedLoadReg<WriteIMul.Folded>;
// AX,DX = AX*[mem16]
let mayLoad = 1, hasSideEffects = 0 in {
let Defs = [AX,DX,EFLAGS], Uses = [AX] in
def MUL16m : I<0xF7, MRM4m, (outs), (ins i16mem:$src),
- "mul{w}\t$src", []>, OpSize16, SchedLoadReg<WriteIMulLd>;
+ "mul{w}\t$src", []>, OpSize16, SchedLoadReg<WriteIMul.Folded>;
// EAX,EDX = EAX*[mem32]
let Defs = [EAX,EDX,EFLAGS], Uses = [EAX] in
def MUL32m : I<0xF7, MRM4m, (outs), (ins i32mem:$src),
- "mul{l}\t$src", []>, OpSize32, SchedLoadReg<WriteIMulLd>;
+ "mul{l}\t$src", []>, OpSize32, SchedLoadReg<WriteIMul.Folded>;
// RAX,RDX = RAX*[mem64]
let Defs = [RAX,RDX,EFLAGS], Uses = [RAX] in
def MUL64m : RI<0xF7, MRM4m, (outs), (ins i64mem:$src),
- "mul{q}\t$src", []>, SchedLoadReg<WriteIMulLd>,
+ "mul{q}\t$src", []>, SchedLoadReg<WriteIMul64.Folded>,
Requires<[In64BitMode]>;
}
// RAX,RDX = RAX*GR64
let Defs = [RAX,RDX,EFLAGS], Uses = [RAX] in
def IMUL64r : RI<0xF7, MRM5r, (outs), (ins GR64:$src), "imul{q}\t$src", []>,
- Sched<[WriteIMul]>;
+ Sched<[WriteIMul64]>;
let mayLoad = 1 in {
// AL,AH = AL*[mem8]
let Defs = [AL,EFLAGS,AX], Uses = [AL] in
def IMUL8m : I<0xF6, MRM5m, (outs), (ins i8mem :$src),
- "imul{b}\t$src", []>, SchedLoadReg<WriteIMulLd>;
+ "imul{b}\t$src", []>, SchedLoadReg<WriteIMul.Folded>;
// AX,DX = AX*[mem16]
let Defs = [AX,DX,EFLAGS], Uses = [AX] in
def IMUL16m : I<0xF7, MRM5m, (outs), (ins i16mem:$src),
- "imul{w}\t$src", []>, OpSize16, SchedLoadReg<WriteIMulLd>;
+ "imul{w}\t$src", []>, OpSize16, SchedLoadReg<WriteIMul.Folded>;
// EAX,EDX = EAX*[mem32]
let Defs = [EAX,EDX,EFLAGS], Uses = [EAX] in
def IMUL32m : I<0xF7, MRM5m, (outs), (ins i32mem:$src),
- "imul{l}\t$src", []>, OpSize32, SchedLoadReg<WriteIMulLd>;
+ "imul{l}\t$src", []>, OpSize32, SchedLoadReg<WriteIMul.Folded>;
// RAX,RDX = RAX*[mem64]
let Defs = [RAX,RDX,EFLAGS], Uses = [RAX] in
def IMUL64m : RI<0xF7, MRM5m, (outs), (ins i64mem:$src),
- "imul{q}\t$src", []>, SchedLoadReg<WriteIMulLd>,
+ "imul{q}\t$src", []>, SchedLoadReg<WriteIMul64.Folded>,
Requires<[In64BitMode]>;
}
} // hasSideEffects
let Defs = [EFLAGS] in {
let Constraints = "$src1 = $dst" in {
-let isCommutable = 1, SchedRW = [WriteIMul] in {
+let isCommutable = 1 in {
// X = IMUL Y, Z --> X = IMUL Z, Y
// Register-Register Signed Integer Multiply
def IMUL16rr : I<0xAF, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src1,GR16:$src2),
"imul{w}\t{$src2, $dst|$dst, $src2}",
[(set GR16:$dst, EFLAGS,
- (X86smul_flag GR16:$src1, GR16:$src2))]>, TB, OpSize16;
+ (X86smul_flag GR16:$src1, GR16:$src2))]>,
+ Sched<[WriteIMul]>, TB, OpSize16;
def IMUL32rr : I<0xAF, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src1,GR32:$src2),
"imul{l}\t{$src2, $dst|$dst, $src2}",
[(set GR32:$dst, EFLAGS,
- (X86smul_flag GR32:$src1, GR32:$src2))]>, TB, OpSize32;
+ (X86smul_flag GR32:$src1, GR32:$src2))]>,
+ Sched<[WriteIMul]>, TB, OpSize32;
def IMUL64rr : RI<0xAF, MRMSrcReg, (outs GR64:$dst),
(ins GR64:$src1, GR64:$src2),
"imul{q}\t{$src2, $dst|$dst, $src2}",
[(set GR64:$dst, EFLAGS,
- (X86smul_flag GR64:$src1, GR64:$src2))]>, TB;
-} // isCommutable, SchedRW
+ (X86smul_flag GR64:$src1, GR64:$src2))]>,
+ Sched<[WriteIMul64]>, TB;
+} // isCommutable
// Register-Memory Signed Integer Multiply
-let SchedRW = [WriteIMulLd, ReadAfterLd] in {
def IMUL16rm : I<0xAF, MRMSrcMem, (outs GR16:$dst),
(ins GR16:$src1, i16mem:$src2),
"imul{w}\t{$src2, $dst|$dst, $src2}",
[(set GR16:$dst, EFLAGS,
(X86smul_flag GR16:$src1, (loadi16 addr:$src2)))]>,
- TB, OpSize16;
+ Sched<[WriteIMul.Folded, ReadAfterLd]>, TB, OpSize16;
def IMUL32rm : I<0xAF, MRMSrcMem, (outs GR32:$dst),
(ins GR32:$src1, i32mem:$src2),
"imul{l}\t{$src2, $dst|$dst, $src2}",
[(set GR32:$dst, EFLAGS,
(X86smul_flag GR32:$src1, (loadi32 addr:$src2)))]>,
- TB, OpSize32;
+ Sched<[WriteIMul.Folded, ReadAfterLd]>, TB, OpSize32;
def IMUL64rm : RI<0xAF, MRMSrcMem, (outs GR64:$dst),
(ins GR64:$src1, i64mem:$src2),
"imul{q}\t{$src2, $dst|$dst, $src2}",
[(set GR64:$dst, EFLAGS,
(X86smul_flag GR64:$src1, (loadi64 addr:$src2)))]>,
- TB;
-} // SchedRW
+ Sched<[WriteIMul64.Folded, ReadAfterLd]>, TB;
} // Constraints = "$src1 = $dst"
} // Defs = [EFLAGS]
// Surprisingly enough, these are not two address instructions!
let Defs = [EFLAGS] in {
-let SchedRW = [WriteIMul] in {
// Register-Integer Signed Integer Multiply
def IMUL16rri : Ii16<0x69, MRMSrcReg, // GR16 = GR16*I16
(outs GR16:$dst), (ins GR16:$src1, i16imm:$src2),
"imul{w}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set GR16:$dst, EFLAGS,
(X86smul_flag GR16:$src1, imm:$src2))]>,
- OpSize16;
+ Sched<[WriteIMul]>, OpSize16;
def IMUL16rri8 : Ii8<0x6B, MRMSrcReg, // GR16 = GR16*I8
(outs GR16:$dst), (ins GR16:$src1, i16i8imm:$src2),
"imul{w}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set GR16:$dst, EFLAGS,
(X86smul_flag GR16:$src1, i16immSExt8:$src2))]>,
- OpSize16;
+ Sched<[WriteIMul]>, OpSize16;
def IMUL32rri : Ii32<0x69, MRMSrcReg, // GR32 = GR32*I32
(outs GR32:$dst), (ins GR32:$src1, i32imm:$src2),
"imul{l}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set GR32:$dst, EFLAGS,
(X86smul_flag GR32:$src1, imm:$src2))]>,
- OpSize32;
+ Sched<[WriteIMul]>, OpSize32;
def IMUL32rri8 : Ii8<0x6B, MRMSrcReg, // GR32 = GR32*I8
(outs GR32:$dst), (ins GR32:$src1, i32i8imm:$src2),
"imul{l}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set GR32:$dst, EFLAGS,
(X86smul_flag GR32:$src1, i32immSExt8:$src2))]>,
- OpSize32;
+ Sched<[WriteIMul]>, OpSize32;
def IMUL64rri32 : RIi32S<0x69, MRMSrcReg, // GR64 = GR64*I32
(outs GR64:$dst), (ins GR64:$src1, i64i32imm:$src2),
"imul{q}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set GR64:$dst, EFLAGS,
- (X86smul_flag GR64:$src1, i64immSExt32:$src2))]>;
+ (X86smul_flag GR64:$src1, i64immSExt32:$src2))]>,
+ Sched<[WriteIMul64]>;
def IMUL64rri8 : RIi8<0x6B, MRMSrcReg, // GR64 = GR64*I8
(outs GR64:$dst), (ins GR64:$src1, i64i8imm:$src2),
"imul{q}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set GR64:$dst, EFLAGS,
- (X86smul_flag GR64:$src1, i64immSExt8:$src2))]>;
-} // SchedRW
+ (X86smul_flag GR64:$src1, i64immSExt8:$src2))]>,
+ Sched<[WriteIMul64]>;
// Memory-Integer Signed Integer Multiply
-let SchedRW = [WriteIMulLd] in {
def IMUL16rmi : Ii16<0x69, MRMSrcMem, // GR16 = [mem16]*I16
(outs GR16:$dst), (ins i16mem:$src1, i16imm:$src2),
"imul{w}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set GR16:$dst, EFLAGS,
(X86smul_flag (loadi16 addr:$src1), imm:$src2))]>,
- OpSize16;
+ Sched<[WriteIMul.Folded]>, OpSize16;
def IMUL16rmi8 : Ii8<0x6B, MRMSrcMem, // GR16 = [mem16]*I8
(outs GR16:$dst), (ins i16mem:$src1, i16i8imm :$src2),
"imul{w}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set GR16:$dst, EFLAGS,
(X86smul_flag (loadi16 addr:$src1),
i16immSExt8:$src2))]>,
- OpSize16;
+ Sched<[WriteIMul.Folded]>, OpSize16;
def IMUL32rmi : Ii32<0x69, MRMSrcMem, // GR32 = [mem32]*I32
(outs GR32:$dst), (ins i32mem:$src1, i32imm:$src2),
"imul{l}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set GR32:$dst, EFLAGS,
(X86smul_flag (loadi32 addr:$src1), imm:$src2))]>,
- OpSize32;
+ Sched<[WriteIMul.Folded]>, OpSize32;
def IMUL32rmi8 : Ii8<0x6B, MRMSrcMem, // GR32 = [mem32]*I8
(outs GR32:$dst), (ins i32mem:$src1, i32i8imm: $src2),
"imul{l}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set GR32:$dst, EFLAGS,
(X86smul_flag (loadi32 addr:$src1),
i32immSExt8:$src2))]>,
- OpSize32;
+ Sched<[WriteIMul.Folded]>, OpSize32;
def IMUL64rmi32 : RIi32S<0x69, MRMSrcMem, // GR64 = [mem64]*I32
(outs GR64:$dst), (ins i64mem:$src1, i64i32imm:$src2),
"imul{q}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set GR64:$dst, EFLAGS,
(X86smul_flag (loadi64 addr:$src1),
- i64immSExt32:$src2))]>;
+ i64immSExt32:$src2))]>,
+ Sched<[WriteIMul64.Folded]>;
def IMUL64rmi8 : RIi8<0x6B, MRMSrcMem, // GR64 = [mem64]*I8
(outs GR64:$dst), (ins i64mem:$src1, i64i8imm: $src2),
"imul{q}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set GR64:$dst, EFLAGS,
(X86smul_flag (loadi64 addr:$src1),
- i64immSExt8:$src2))]>;
-} // SchedRW
+ i64immSExt8:$src2))]>,
+ Sched<[WriteIMul64.Folded]>;
} // Defs = [EFLAGS]
-
-
-
// unsigned division/remainder
let hasSideEffects = 1 in { // so that we don't speculatively execute
let Defs = [AL,AH,EFLAGS], Uses = [AX] in
//===----------------------------------------------------------------------===//
// MULX Instruction
//
-multiclass bmi_mulx<string mnemonic, RegisterClass RC, X86MemOperand x86memop> {
+multiclass bmi_mulx<string mnemonic, RegisterClass RC, X86MemOperand x86memop,
+ X86FoldableSchedWrite sched> {
let hasSideEffects = 0 in {
let isCommutable = 1 in
def rr : I<0xF6, MRMSrcReg, (outs RC:$dst1, RC:$dst2), (ins RC:$src),
!strconcat(mnemonic, "\t{$src, $dst2, $dst1|$dst1, $dst2, $src}"),
- []>, T8XD, VEX_4V, Sched<[WriteIMul, WriteIMulH]>;
+ []>, T8XD, VEX_4V, Sched<[sched, WriteIMulH]>;
let mayLoad = 1 in
def rm : I<0xF6, MRMSrcMem, (outs RC:$dst1, RC:$dst2), (ins x86memop:$src),
!strconcat(mnemonic, "\t{$src, $dst2, $dst1|$dst1, $dst2, $src}"),
- []>, T8XD, VEX_4V, Sched<[WriteIMulLd, WriteIMulH]>;
+ []>, T8XD, VEX_4V, Sched<[sched.Folded, WriteIMulH]>;
}
}
let Predicates = [HasBMI2] in {
let Uses = [EDX] in
- defm MULX32 : bmi_mulx<"mulx{l}", GR32, i32mem>;
+ defm MULX32 : bmi_mulx<"mulx{l}", GR32, i32mem, WriteIMul>;
let Uses = [RDX] in
- defm MULX64 : bmi_mulx<"mulx{q}", GR64, i64mem>, VEX_W;
+ defm MULX64 : bmi_mulx<"mulx{q}", GR64, i64mem, WriteIMul64>, VEX_W;
}
//===----------------------------------------------------------------------===//
def : WriteRes<WriteRMW, [BWPort237,BWPort4]>;
// Arithmetic.
-defm : BWWriteResPair<WriteALU, [BWPort0156], 1>; // Simple integer ALU op.
-defm : BWWriteResPair<WriteIMul, [BWPort1], 3>; // Integer multiplication.
+defm : BWWriteResPair<WriteALU, [BWPort0156], 1>; // Simple integer ALU op.
+defm : BWWriteResPair<WriteIMul, [BWPort1], 3>; // Integer multiplication.
+defm : BWWriteResPair<WriteIMul64, [BWPort1], 3>; // Integer 64-bit multiplication.
defm : BWWriteResPair<WriteDiv8, [BWPort0, BWDivider], 25, [1, 10]>;
defm : BWWriteResPair<WriteDiv16, [BWPort0, BWDivider], 25, [1, 10]>;
def : WriteRes<WriteMove, [HWPort0156]>;
def : WriteRes<WriteZero, []>;
-defm : HWWriteResPair<WriteALU, [HWPort0156], 1>;
-defm : HWWriteResPair<WriteIMul, [HWPort1], 3>;
+defm : HWWriteResPair<WriteALU, [HWPort0156], 1>;
+defm : HWWriteResPair<WriteIMul, [HWPort1], 3>;
+defm : HWWriteResPair<WriteIMul64, [HWPort1], 3>;
def : WriteRes<WriteIMulH, []> { let Latency = 3; }
-defm : HWWriteResPair<WriteShift, [HWPort06], 1>;
-defm : HWWriteResPair<WriteJump, [HWPort06], 1>;
-defm : HWWriteResPair<WriteCRC32, [HWPort1], 3>;
+defm : HWWriteResPair<WriteShift, [HWPort06], 1>;
+defm : HWWriteResPair<WriteJump, [HWPort06], 1>;
+defm : HWWriteResPair<WriteCRC32, [HWPort1], 3>;
defm : HWWriteResPair<WriteCMOV, [HWPort06,HWPort0156], 2, [1,1], 2>; // Conditional move.
def : WriteRes<WriteSETCC, [HWPort06]>; // Setcc.
def : WriteRes<WriteMove, [SBPort015]>;
def : WriteRes<WriteZero, []>;
-defm : SBWriteResPair<WriteALU, [SBPort015], 1>;
-defm : SBWriteResPair<WriteIMul, [SBPort1], 3>;
+defm : SBWriteResPair<WriteALU, [SBPort015], 1>;
+defm : SBWriteResPair<WriteIMul, [SBPort1], 3>;
+defm : SBWriteResPair<WriteIMul64, [SBPort1], 3>;
defm : SBWriteResPair<WriteDiv8, [SBPort0, SBDivider], 25, [1, 10]>;
defm : SBWriteResPair<WriteDiv16, [SBPort0, SBDivider], 25, [1, 10]>;
"SHR(8|16|32|64)m1",
"SHR(8|16|32|64)mi")>;
-def SBWriteResGroup72 : SchedWriteRes<[SBPort1,SBPort23]> {
- let Latency = 8;
- let NumMicroOps = 2;
- let ResourceCycles = [1,1];
-}
-def: InstRW<[SBWriteResGroup72], (instrs MUL8m)>;
-
def SBWriteResGroup77 : SchedWriteRes<[SBPort0,SBPort1,SBPort23]> {
let Latency = 8;
let NumMicroOps = 3;
def : WriteRes<WriteRMW, [SKLPort237,SKLPort4]>;
// Arithmetic.
-defm : SKLWriteResPair<WriteALU, [SKLPort0156], 1>; // Simple integer ALU op.
-defm : SKLWriteResPair<WriteIMul, [SKLPort1], 3>; // Integer multiplication.
+defm : SKLWriteResPair<WriteALU, [SKLPort0156], 1>; // Simple integer ALU op.
+defm : SKLWriteResPair<WriteIMul, [SKLPort1], 3>; // Integer multiplication.
+defm : SKLWriteResPair<WriteIMul64, [SKLPort1], 3>; // Integer 64-bit multiplication.
defm : SKLWriteResPair<WriteDiv8, [SKLPort0, SKLDivider], 25, [1,10], 1, 4>;
defm : SKLWriteResPair<WriteDiv16, [SKLPort0, SKLDivider], 25, [1,10], 1, 4>;
def : WriteRes<WriteRMW, [SKXPort237,SKXPort4]>;
// Arithmetic.
-defm : SKXWriteResPair<WriteALU, [SKXPort0156], 1>; // Simple integer ALU op.
-defm : SKXWriteResPair<WriteIMul, [SKXPort1], 3>; // Integer multiplication.
+defm : SKXWriteResPair<WriteALU, [SKXPort0156], 1>; // Simple integer ALU op.
+defm : SKXWriteResPair<WriteIMul, [SKXPort1], 3>; // Integer multiplication.
+defm : SKXWriteResPair<WriteIMul64, [SKXPort1], 3>; // Integer 64-bit multiplication.
defm : SKXWriteResPair<WriteDiv8, [SKXPort0, SKXDivider], 25, [1,10], 1, 4>;
defm : SKXWriteResPair<WriteDiv16, [SKXPort0, SKXDivider], 25, [1,10], 1, 4>;
def WriteMove : SchedWrite;
// Arithmetic.
-defm WriteALU : X86SchedWritePair; // Simple integer ALU op.
-def WriteALURMW : WriteSequence<[WriteALULd, WriteStore]>;
-defm WriteIMul : X86SchedWritePair; // Integer multiplication.
-def WriteIMulH : SchedWrite; // Integer multiplication, high part.
-def WriteLEA : SchedWrite; // LEA instructions can't fold loads.
+defm WriteALU : X86SchedWritePair; // Simple integer ALU op.
+def WriteALURMW : WriteSequence<[WriteALULd, WriteStore]>;
+defm WriteIMul : X86SchedWritePair; // Integer multiplication.
+defm WriteIMul64 : X86SchedWritePair; // Integer 64-bit multiplication.
+def WriteIMulH : SchedWrite; // Integer multiplication, high part.
+def WriteLEA : SchedWrite; // LEA instructions can't fold loads.
// Integer division.
defm WriteDiv8 : X86SchedWritePair;
// Arithmetic.
////////////////////////////////////////////////////////////////////////////////
-defm : AtomWriteResPair<WriteALU, [AtomPort01], [AtomPort0]>;
-defm : AtomWriteResPair<WriteIMul, [AtomPort01], [AtomPort01], 7, 7, [7], [7]>;
+defm : AtomWriteResPair<WriteALU, [AtomPort01], [AtomPort0]>;
+defm : AtomWriteResPair<WriteIMul, [AtomPort01], [AtomPort01], 7, 7, [7], [7]>;
+defm : AtomWriteResPair<WriteIMul64, [AtomPort01], [AtomPort01], 12, 12, [12], [12]>;
defm : AtomWriteResPair<WriteDiv8, [AtomPort01], [AtomPort01], 50, 68, [50], [68]>;
defm : AtomWriteResPair<WriteDiv16, [AtomPort01], [AtomPort01], 50, 50, [50], [50]>;
}
def : InstRW<[AtomWriteIMul32], (instrs MUL32r, IMUL32r)>;
-def AtomWriteIMul64 : SchedWriteRes<[AtomPort01]> {
- let Latency = 12;
- let ResourceCycles = [12];
-}
-def : InstRW<[AtomWriteIMul64], (instrs MUL64r, IMUL64r, IMUL64rr, IMUL64rm,
- MUL64m, IMUL64m)>;
-
def AtomWriteIMul64I : SchedWriteRes<[AtomPort01]> {
let Latency = 14;
let ResourceCycles = [14];
defm : JWriteResIntPair<WriteALU, [JALU01], 1>;
defm : JWriteResIntPair<WriteIMul, [JALU1, JMul], 3, [1, 1], 2>; // i8/i16/i32 multiplication
+defm : JWriteResIntPair<WriteIMul64, [JALU1, JMul], 6, [1, 4], 2>; // i64 multiplication
+defm : X86WriteRes<WriteIMulH, [JALU1], 6, [4], 1>;
defm : JWriteResIntPair<WriteDiv8, [JALU1, JDiv], 12, [1, 12], 1>;
defm : JWriteResIntPair<WriteDiv16, [JALU1, JDiv], 17, [1, 17], 2>;
def : WriteRes<WriteSETCC, [JALU01]>; // Setcc.
def : WriteRes<WriteSETCCStore, [JALU01,JSAGU]>;
-def : WriteRes<WriteIMulH, [JALU1]> {
- let Latency = 6;
- let ResourceCycles = [4];
-}
-
// This is for simple LEAs with one or two input operands.
// FIXME: SAGU 3-operand LEA
def : WriteRes<WriteLEA, [JALU01]>;
defm : JWriteResIntPair<WriteBEXTR, [JALU01], 1>;
defm : JWriteResIntPair<WriteBZHI, [JALU01], 1>; // NOTE: Doesn't exist on Jaguar.
-def JWriteIMul64 : SchedWriteRes<[JALU1, JMul]> {
- let Latency = 6;
- let ResourceCycles = [1, 4];
- let NumMicroOps = 2;
-}
-def JWriteIMul64Ld : SchedWriteRes<[JLAGU, JALU1, JMul]> {
- let Latency = 9;
- let ResourceCycles = [1, 1, 4];
- let NumMicroOps = 2;
-}
-def : InstRW<[JWriteIMul64], (instrs MUL64r, IMUL64r)>;
-def : InstRW<[JWriteIMul64Ld], (instrs MUL64m, IMUL64m)>;
-
////////////////////////////////////////////////////////////////////////////////
// Integer shifts and rotates.
////////////////////////////////////////////////////////////////////////////////
// Treat misc copies as a move.
def : InstRW<[WriteMove], (instrs COPY)>;
-defm : SLMWriteResPair<WriteALU, [SLM_IEC_RSV01], 1>;
-defm : SLMWriteResPair<WriteIMul, [SLM_IEC_RSV1], 3>;
-defm : SLMWriteResPair<WriteShift, [SLM_IEC_RSV0], 1>;
-defm : SLMWriteResPair<WriteJump, [SLM_IEC_RSV1], 1>;
-defm : SLMWriteResPair<WriteCRC32, [SLM_IEC_RSV1], 3>;
+defm : SLMWriteResPair<WriteALU, [SLM_IEC_RSV01], 1>;
+defm : SLMWriteResPair<WriteIMul, [SLM_IEC_RSV1], 3>;
+defm : SLMWriteResPair<WriteIMul64, [SLM_IEC_RSV1], 3>;
+defm : SLMWriteResPair<WriteShift, [SLM_IEC_RSV0], 1>;
+defm : SLMWriteResPair<WriteJump, [SLM_IEC_RSV1], 1>;
+defm : SLMWriteResPair<WriteCRC32, [SLM_IEC_RSV1], 3>;
defm : SLMWriteResPair<WriteCMOV, [SLM_IEC_RSV01], 2, [2]>;
def : WriteRes<WriteSETCC, [SLM_IEC_RSV01]>;
// This multiclass is for folded loads for integer units.
multiclass ZnWriteResPair<X86FoldableSchedWrite SchedRW,
list<ProcResourceKind> ExePorts,
- int Lat, list<int> Res = [], int UOps = 1> {
+ int Lat, list<int> Res = [], int UOps = 1,
+ int LoadLat = 4, int LoadUOps = 1> {
// Register variant takes 1-cycle on Execution Port.
def : WriteRes<SchedRW, ExePorts> {
let Latency = Lat;
}
// Memory variant also uses a cycle on ZnAGU
- // adds 4 cycles to the latency.
+ // adds LoadLat cycles to the latency (default = 4).
def : WriteRes<SchedRW.Folded, !listconcat([ZnAGU], ExePorts)> {
- let Latency = !add(Lat, 4);
+ let Latency = !add(Lat, LoadLat);
let ResourceCycles = !if(!empty(Res), [], !listconcat([1], Res));
- let NumMicroOps = !add(UOps, 1);
+ let NumMicroOps = !add(UOps, LoadUOps);
}
}
def : WriteRes<WriteLEA, [ZnALU]>;
defm : ZnWriteResPair<WriteALU, [ZnALU], 1>;
defm : ZnWriteResPair<WriteIMul, [ZnALU1, ZnMultiplier], 4>;
+defm : ZnWriteResPair<WriteIMul64, [ZnALU1, ZnMultiplier], 4, [1,1], 2>;
defm : ZnWriteResPair<WriteShift, [ZnALU], 1>;
defm : ZnWriteResPair<WriteJump, [ZnALU], 1>;
defm : ZnWriteResFpuPair<WriteCRC32, [ZnFPU0], 3>;
;
; JAG-NOOPT-LABEL: test_mul_by_6:
; JAG-NOOPT: # %bb.0:
-; JAG-NOOPT-NEXT: imulq $6, %rdi, %rax # sched: [3:1.00]
+; JAG-NOOPT-NEXT: imulq $6, %rdi, %rax # sched: [6:4.00]
; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
;
; X64-SLM-LABEL: test_mul_by_6:
;
; JAG-NOOPT-LABEL: test_mul_by_7:
; JAG-NOOPT: # %bb.0:
-; JAG-NOOPT-NEXT: imulq $7, %rdi, %rax # sched: [3:1.00]
+; JAG-NOOPT-NEXT: imulq $7, %rdi, %rax # sched: [6:4.00]
; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
;
; X64-SLM-LABEL: test_mul_by_7:
;
; JAG-NOOPT-LABEL: test_mul_by_10:
; JAG-NOOPT: # %bb.0:
-; JAG-NOOPT-NEXT: imulq $10, %rdi, %rax # sched: [3:1.00]
+; JAG-NOOPT-NEXT: imulq $10, %rdi, %rax # sched: [6:4.00]
; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
;
; X64-SLM-LABEL: test_mul_by_10:
;
; JAG-NOOPT-LABEL: test_mul_by_11:
; JAG-NOOPT: # %bb.0:
-; JAG-NOOPT-NEXT: imulq $11, %rdi, %rax # sched: [3:1.00]
+; JAG-NOOPT-NEXT: imulq $11, %rdi, %rax # sched: [6:4.00]
; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
;
; X64-SLM-LABEL: test_mul_by_11:
;
; JAG-NOOPT-LABEL: test_mul_by_12:
; JAG-NOOPT: # %bb.0:
-; JAG-NOOPT-NEXT: imulq $12, %rdi, %rax # sched: [3:1.00]
+; JAG-NOOPT-NEXT: imulq $12, %rdi, %rax # sched: [6:4.00]
; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
;
; X64-SLM-LABEL: test_mul_by_12:
;
; JAG-NOOPT-LABEL: test_mul_by_13:
; JAG-NOOPT: # %bb.0:
-; JAG-NOOPT-NEXT: imulq $13, %rdi, %rax # sched: [3:1.00]
+; JAG-NOOPT-NEXT: imulq $13, %rdi, %rax # sched: [6:4.00]
; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
;
; X64-SLM-LABEL: test_mul_by_13:
;
; JAG-NOOPT-LABEL: test_mul_by_14:
; JAG-NOOPT: # %bb.0:
-; JAG-NOOPT-NEXT: imulq $14, %rdi, %rax # sched: [3:1.00]
+; JAG-NOOPT-NEXT: imulq $14, %rdi, %rax # sched: [6:4.00]
; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
;
; X64-SLM-LABEL: test_mul_by_14:
;
; JAG-NOOPT-LABEL: test_mul_by_15:
; JAG-NOOPT: # %bb.0:
-; JAG-NOOPT-NEXT: imulq $15, %rdi, %rax # sched: [3:1.00]
+; JAG-NOOPT-NEXT: imulq $15, %rdi, %rax # sched: [6:4.00]
; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
;
; X64-SLM-LABEL: test_mul_by_15:
;
; JAG-NOOPT-LABEL: test_mul_by_17:
; JAG-NOOPT: # %bb.0:
-; JAG-NOOPT-NEXT: imulq $17, %rdi, %rax # sched: [3:1.00]
+; JAG-NOOPT-NEXT: imulq $17, %rdi, %rax # sched: [6:4.00]
; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
;
; X64-SLM-LABEL: test_mul_by_17:
;
; JAG-NOOPT-LABEL: test_mul_by_18:
; JAG-NOOPT: # %bb.0:
-; JAG-NOOPT-NEXT: imulq $18, %rdi, %rax # sched: [3:1.00]
+; JAG-NOOPT-NEXT: imulq $18, %rdi, %rax # sched: [6:4.00]
; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
;
; X64-SLM-LABEL: test_mul_by_18:
;
; JAG-NOOPT-LABEL: test_mul_by_19:
; JAG-NOOPT: # %bb.0:
-; JAG-NOOPT-NEXT: imulq $19, %rdi, %rax # sched: [3:1.00]
+; JAG-NOOPT-NEXT: imulq $19, %rdi, %rax # sched: [6:4.00]
; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
;
; X64-SLM-LABEL: test_mul_by_19:
;
; JAG-NOOPT-LABEL: test_mul_by_20:
; JAG-NOOPT: # %bb.0:
-; JAG-NOOPT-NEXT: imulq $20, %rdi, %rax # sched: [3:1.00]
+; JAG-NOOPT-NEXT: imulq $20, %rdi, %rax # sched: [6:4.00]
; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
;
; X64-SLM-LABEL: test_mul_by_20:
;
; JAG-NOOPT-LABEL: test_mul_by_21:
; JAG-NOOPT: # %bb.0:
-; JAG-NOOPT-NEXT: imulq $21, %rdi, %rax # sched: [3:1.00]
+; JAG-NOOPT-NEXT: imulq $21, %rdi, %rax # sched: [6:4.00]
; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
;
; X64-SLM-LABEL: test_mul_by_21:
;
; JAG-NOOPT-LABEL: test_mul_by_22:
; JAG-NOOPT: # %bb.0:
-; JAG-NOOPT-NEXT: imulq $22, %rdi, %rax # sched: [3:1.00]
+; JAG-NOOPT-NEXT: imulq $22, %rdi, %rax # sched: [6:4.00]
; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
;
; X64-SLM-LABEL: test_mul_by_22:
;
; JAG-NOOPT-LABEL: test_mul_by_23:
; JAG-NOOPT: # %bb.0:
-; JAG-NOOPT-NEXT: imulq $23, %rdi, %rax # sched: [3:1.00]
+; JAG-NOOPT-NEXT: imulq $23, %rdi, %rax # sched: [6:4.00]
; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
;
; X64-SLM-LABEL: test_mul_by_23:
;
; JAG-NOOPT-LABEL: test_mul_by_24:
; JAG-NOOPT: # %bb.0:
-; JAG-NOOPT-NEXT: imulq $24, %rdi, %rax # sched: [3:1.00]
+; JAG-NOOPT-NEXT: imulq $24, %rdi, %rax # sched: [6:4.00]
; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
;
; X64-SLM-LABEL: test_mul_by_24:
;
; JAG-NOOPT-LABEL: test_mul_by_25:
; JAG-NOOPT: # %bb.0:
-; JAG-NOOPT-NEXT: imulq $25, %rdi, %rax # sched: [3:1.00]
+; JAG-NOOPT-NEXT: imulq $25, %rdi, %rax # sched: [6:4.00]
; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
;
; X64-SLM-LABEL: test_mul_by_25:
;
; JAG-NOOPT-LABEL: test_mul_by_26:
; JAG-NOOPT: # %bb.0:
-; JAG-NOOPT-NEXT: imulq $26, %rdi, %rax # sched: [3:1.00]
+; JAG-NOOPT-NEXT: imulq $26, %rdi, %rax # sched: [6:4.00]
; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
;
; X64-SLM-LABEL: test_mul_by_26:
;
; JAG-NOOPT-LABEL: test_mul_by_27:
; JAG-NOOPT: # %bb.0:
-; JAG-NOOPT-NEXT: imulq $27, %rdi, %rax # sched: [3:1.00]
+; JAG-NOOPT-NEXT: imulq $27, %rdi, %rax # sched: [6:4.00]
; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
;
; X64-SLM-LABEL: test_mul_by_27:
;
; JAG-NOOPT-LABEL: test_mul_by_28:
; JAG-NOOPT: # %bb.0:
-; JAG-NOOPT-NEXT: imulq $28, %rdi, %rax # sched: [3:1.00]
+; JAG-NOOPT-NEXT: imulq $28, %rdi, %rax # sched: [6:4.00]
; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
;
; X64-SLM-LABEL: test_mul_by_28:
;
; JAG-NOOPT-LABEL: test_mul_by_29:
; JAG-NOOPT: # %bb.0:
-; JAG-NOOPT-NEXT: imulq $29, %rdi, %rax # sched: [3:1.00]
+; JAG-NOOPT-NEXT: imulq $29, %rdi, %rax # sched: [6:4.00]
; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
;
; X64-SLM-LABEL: test_mul_by_29:
;
; JAG-NOOPT-LABEL: test_mul_by_30:
; JAG-NOOPT: # %bb.0:
-; JAG-NOOPT-NEXT: imulq $30, %rdi, %rax # sched: [3:1.00]
+; JAG-NOOPT-NEXT: imulq $30, %rdi, %rax # sched: [6:4.00]
; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
;
; X64-SLM-LABEL: test_mul_by_30:
;
; JAG-NOOPT-LABEL: test_mul_by_31:
; JAG-NOOPT: # %bb.0:
-; JAG-NOOPT-NEXT: imulq $31, %rdi, %rax # sched: [3:1.00]
+; JAG-NOOPT-NEXT: imulq $31, %rdi, %rax # sched: [6:4.00]
; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
;
; X64-SLM-LABEL: test_mul_by_31:
; X64-JAG: # %bb.0:
; X64-JAG-NEXT: leaq 42(%rdi,%rdi,8), %rcx # sched: [1:0.50]
; X64-JAG-NEXT: leaq 2(%rdi,%rdi,4), %rax # sched: [1:0.50]
-; X64-JAG-NEXT: imulq %rcx, %rax # sched: [3:1.00]
+; X64-JAG-NEXT: imulq %rcx, %rax # sched: [6:4.00]
; X64-JAG-NEXT: retq # sched: [4:1.00]
;
; X86-NOOPT-LABEL: test_mul_spec:
; JAG-NOOPT: # %bb.0:
; JAG-NOOPT-NEXT: leaq 42(%rdi,%rdi,8), %rcx # sched: [1:0.50]
; JAG-NOOPT-NEXT: leaq 2(%rdi,%rdi,4), %rax # sched: [1:0.50]
-; JAG-NOOPT-NEXT: imulq %rcx, %rax # sched: [3:1.00]
+; JAG-NOOPT-NEXT: imulq %rcx, %rax # sched: [6:4.00]
; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
;
; X64-SLM-LABEL: test_mul_spec:
; BTVER2-NEXT: #APP
; BTVER2-NEXT: imulq %rdi # sched: [6:4.00]
; BTVER2-NEXT: imulq (%rsi) # sched: [9:4.00]
-; BTVER2-NEXT: imulq %rdi, %rdi # sched: [3:1.00]
-; BTVER2-NEXT: imulq (%rsi), %rdi # sched: [6:1.00]
+; BTVER2-NEXT: imulq %rdi, %rdi # sched: [6:4.00]
+; BTVER2-NEXT: imulq (%rsi), %rdi # sched: [9:4.00]
; BTVER2-NEXT: imulq $665536, %rdi, %rdi # imm = 0xA27C0
-; BTVER2-NEXT: # sched: [3:1.00]
+; BTVER2-NEXT: # sched: [6:4.00]
; BTVER2-NEXT: imulq $665536, (%rsi), %rdi # imm = 0xA27C0
-; BTVER2-NEXT: # sched: [6:1.00]
-; BTVER2-NEXT: imulq $7, %rdi, %rdi # sched: [3:1.00]
-; BTVER2-NEXT: imulq $7, (%rsi), %rdi # sched: [6:1.00]
+; BTVER2-NEXT: # sched: [9:4.00]
+; BTVER2-NEXT: imulq $7, %rdi, %rdi # sched: [6:4.00]
+; BTVER2-NEXT: imulq $7, (%rsi), %rdi # sched: [9:4.00]
; BTVER2-NEXT: #NO_APP
; BTVER2-NEXT: retq # sched: [4:1.00]
;
# CHECK-NEXT: 2 6 1.00 * imull $7, (%rax), %edi
# CHECK-NEXT: 2 6 4.00 imulq %rdi
# CHECK-NEXT: 2 9 4.00 * imulq (%rax)
-# CHECK-NEXT: 2 3 1.00 imulq %rdi, %rdi
-# CHECK-NEXT: 2 6 1.00 * imulq (%rax), %rdi
-# CHECK-NEXT: 2 3 1.00 imulq $665536, %rdi, %rdi
-# CHECK-NEXT: 2 6 1.00 * imulq $665536, (%rax), %rdi
-# CHECK-NEXT: 2 3 1.00 imulq $7, %rdi, %rdi
-# CHECK-NEXT: 2 6 1.00 * imulq $7, (%rax), %rdi
+# CHECK-NEXT: 2 6 4.00 imulq %rdi, %rdi
+# CHECK-NEXT: 2 9 4.00 * imulq (%rax), %rdi
+# CHECK-NEXT: 2 6 4.00 imulq $665536, %rdi, %rdi
+# CHECK-NEXT: 2 9 4.00 * imulq $665536, (%rax), %rdi
+# CHECK-NEXT: 2 6 4.00 imulq $7, %rdi, %rdi
+# CHECK-NEXT: 2 9 4.00 * imulq $7, (%rax), %rdi
# CHECK-NEXT: 1 1 0.50 incb %dil
# CHECK-NEXT: 2 5 1.00 * * incb (%rax)
# CHECK-NEXT: 1 1 0.50 incw %di
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13]
-# CHECK-NEXT: 357.50 407.50 380.00 - - - - 214.00 46.00 158.00 - - - -
+# CHECK-NEXT: 357.50 407.50 380.00 - - - - 214.00 64.00 158.00 - - - -
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] Instructions:
# CHECK-NEXT: - 1.00 - - - - - 1.00 1.00 - - - - - imull $7, (%rax), %edi
# CHECK-NEXT: - 1.00 - - - - - - 4.00 - - - - - imulq %rdi
# CHECK-NEXT: - 1.00 - - - - - 1.00 4.00 - - - - - imulq (%rax)
-# CHECK-NEXT: - 1.00 - - - - - - 1.00 - - - - - imulq %rdi, %rdi
-# CHECK-NEXT: - 1.00 - - - - - 1.00 1.00 - - - - - imulq (%rax), %rdi
-# CHECK-NEXT: - 1.00 - - - - - - 1.00 - - - - - imulq $665536, %rdi, %rdi
-# CHECK-NEXT: - 1.00 - - - - - 1.00 1.00 - - - - - imulq $665536, (%rax), %rdi
-# CHECK-NEXT: - 1.00 - - - - - - 1.00 - - - - - imulq $7, %rdi, %rdi
-# CHECK-NEXT: - 1.00 - - - - - 1.00 1.00 - - - - - imulq $7, (%rax), %rdi
+# CHECK-NEXT: - 1.00 - - - - - - 4.00 - - - - - imulq %rdi, %rdi
+# CHECK-NEXT: - 1.00 - - - - - 1.00 4.00 - - - - - imulq (%rax), %rdi
+# CHECK-NEXT: - 1.00 - - - - - - 4.00 - - - - - imulq $665536, %rdi, %rdi
+# CHECK-NEXT: - 1.00 - - - - - 1.00 4.00 - - - - - imulq $665536, (%rax), %rdi
+# CHECK-NEXT: - 1.00 - - - - - - 4.00 - - - - - imulq $7, %rdi, %rdi
+# CHECK-NEXT: - 1.00 - - - - - 1.00 4.00 - - - - - imulq $7, (%rax), %rdi
# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - - incb %dil
# CHECK-NEXT: 0.50 0.50 - - - - - 1.00 - 1.00 - - - - incb (%rax)
# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - - incw %di