Currently we hardcode instructions with ReadAfterLd if the register operands don't need to be available until the folded load has completed. This doesn't take into account the different load latencies of different memory operands (PR36957).
This patch adds a ReadAfterFold def into X86FoldableSchedWrite to replace ReadAfterLd, allowing us to specify the load latency at a scheduler class level.
I've added ReadAfterVec*Ld classes that match the XMM/Scl, XMM and YMM/ZMM WriteVecLoad classes that we currently use, we can tweak these values in future patches once this infrastructure is in place.
Differential Revision: https://reviews.llvm.org/D52886
llvm-svn: 343868
[(set VR64:$dst, (!cast<Intrinsic>(
!strconcat("int_x86_3dnow", Ver, "_", Mn)) VR64:$src1,
(bitconvert (load_mmx addr:$src2))))]>,
- Sched<[sched.Folded, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
}
multiclass I3DNow_conv_rm_int<bits<8> opc, string Mn,
[(set VR64:$dst, (!cast<Intrinsic>(
!strconcat("int_x86_3dnow", Ver, "_", Mn))
(bitconvert (load_mmx addr:$src))))]>,
- Sched<[sched.Folded, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
}
defm PAVGUSB : I3DNow_binop_rm_int<0xBF, "pavgusb", SchedWriteVecALU.MMX, 1>;
(From.VT (bitconvert (From.LdFrag addr:$src2))),
(iPTR imm))>, AVX512AIi8Base, EVEX_4V,
EVEX_CD8<From.EltSize, From.CD8TupleForm>,
- Sched<[sched.Folded, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
}
}
(v4f32 (scalar_to_vector (loadf32 addr:$src2))),
imm:$src3))]>,
EVEX_4V, EVEX_CD8<32, CD8VT1>,
- Sched<[SchedWriteFShuffle.XMM.Folded, ReadAfterLd]>;
+ Sched<[SchedWriteFShuffle.XMM.Folded, SchedWriteFShuffle.XMM.ReadAfterFold]>;
}
//===----------------------------------------------------------------------===//
OpcodeStr, "$src3, $src2", "$src2, $src3",
(_.VT (X86VPermt2 _.RC:$src2, IdxVT.RC:$src1,
(_.VT (bitconvert (_.LdFrag addr:$src3))))), 1>,
- EVEX_4V, AVX5128IBase, Sched<[sched.Folded, ReadAfterLd]>;
+ EVEX_4V, AVX5128IBase, Sched<[sched.Folded, sched.ReadAfterFold]>;
}
}
(_.VT (X86VPermt2 _.RC:$src2,
IdxVT.RC:$src1,(_.VT (X86VBroadcast (_.ScalarLdFrag addr:$src3))))), 1>,
AVX5128IBase, EVEX_4V, EVEX_B,
- Sched<[sched.Folded, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
}
multiclass avx512_perm_i_sizes<bits<8> opc, string OpcodeStr,
OpcodeStr, "$src3, $src2", "$src2, $src3",
(_.VT (X86VPermt2 _.RC:$src1, IdxVT.RC:$src2,
(bitconvert (_.LdFrag addr:$src3)))), 1>,
- EVEX_4V, AVX5128IBase, Sched<[sched.Folded, ReadAfterLd]>;
+ EVEX_4V, AVX5128IBase, Sched<[sched.Folded, sched.ReadAfterFold]>;
}
}
multiclass avx512_perm_t_mb<bits<8> opc, string OpcodeStr,
(_.VT (X86VPermt2 _.RC:$src1,
IdxVT.RC:$src2,(_.VT (X86VBroadcast (_.ScalarLdFrag addr:$src3))))), 1>,
AVX5128IBase, EVEX_4V, EVEX_B,
- Sched<[sched.Folded, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
}
multiclass avx512_perm_t_sizes<bits<8> opc, string OpcodeStr,
!strconcat(OpcodeStr,
"\t{$src2, $src1, ${dst}|${dst}, $src1, $src2}"),
[]>, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
- Sched<[sched.Folded, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
def rmk : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
(ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2),
!strconcat(OpcodeStr,
"\t{$src2, $src1, ${dst} {${mask}}|${dst} {${mask}}, $src1, $src2}"),
[]>, EVEX_4V, EVEX_K, EVEX_CD8<_.EltSize, CD8VF>,
- Sched<[sched.Folded, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
def rmkz : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
(ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2),
!strconcat(OpcodeStr,
"\t{$src2, $src1, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src1, $src2}"),
[]>, EVEX_4V, EVEX_KZ, EVEX_CD8<_.EltSize, CD8VF>,
- Sched<[sched.Folded, ReadAfterLd]>, NotMemoryFoldable;
+ Sched<[sched.Folded, sched.ReadAfterFold]>, NotMemoryFoldable;
}
}
}
"\t{${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}|",
"$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, "}"), []>,
EVEX_4V, EVEX_K, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
- Sched<[sched.Folded, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
def rmbkz : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
(ins _.KRCWM:$mask, _.RC:$src1, _.ScalarMemOp:$src2),
"\t{${src2}", _.BroadcastStr, ", $src1, $dst {${mask}} {z}|",
"$dst {${mask}} {z}, $src1, ${src2}", _.BroadcastStr, "}"), []>,
EVEX_4V, EVEX_KZ, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
- Sched<[sched.Folded, ReadAfterLd]>, NotMemoryFoldable;
+ Sched<[sched.Folded, sched.ReadAfterFold]>, NotMemoryFoldable;
def rmb : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
(ins _.RC:$src1, _.ScalarMemOp:$src2),
"\t{${src2}", _.BroadcastStr, ", $src1, $dst|",
"$dst, $src1, ${src2}", _.BroadcastStr, "}"), []>,
EVEX_4V, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
- Sched<[sched.Folded, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
}
}
"$src2, $src1", "$src1, $src2",
(OpNode (_.VT _.RC:$src1), _.ScalarIntMemCPat:$src2,
imm:$cc)>, EVEX_4V, EVEX_CD8<_.EltSize, CD8VT1>,
- Sched<[sched.Folded, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
defm rrb_Int : AVX512_maskable_cmp<0xC2, MRMSrcReg, _,
(outs _.KRC:$dst),
"vcmp"#_.Suffix,
"$cc, $src2, $src1", "$src1, $src2, $cc">,
EVEX_4V, EVEX_CD8<_.EltSize, CD8VT1>,
- Sched<[sched.Folded, ReadAfterLd]>, NotMemoryFoldable;
+ Sched<[sched.Folded, sched.ReadAfterFold]>, NotMemoryFoldable;
defm rrb_alt : AVX512_maskable_cmp_alt<0xC2, MRMSrcReg, _,
(outs _.KRC:$dst),
(_.ScalarLdFrag addr:$src2),
imm:$cc))]>,
EVEX_4V, EVEX_CD8<_.EltSize, CD8VT1>,
- Sched<[sched.Folded, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
}
}
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set _.KRC:$dst, (OpNode (_.VT _.RC:$src1),
(_.VT (bitconvert (_.LdFrag addr:$src2)))))]>,
- EVEX_4V, Sched<[sched.Folded, ReadAfterLd]>;
+ EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
let isCommutable = IsCommutable in
def rrk : AVX512BI<opc, MRMSrcReg,
(outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
(OpNode (_.VT _.RC:$src1),
(_.VT (bitconvert
(_.LdFrag addr:$src2))))))]>,
- EVEX_4V, EVEX_K, Sched<[sched.Folded, ReadAfterLd]>;
+ EVEX_4V, EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>;
}
multiclass avx512_icmp_packed_rmb<bits<8> opc, string OpcodeStr, PatFrag OpNode,
"|$dst, $src1, ${src2}", _.BroadcastStr, "}"),
[(set _.KRC:$dst, (OpNode (_.VT _.RC:$src1),
(X86VBroadcast (_.ScalarLdFrag addr:$src2))))]>,
- EVEX_4V, EVEX_B, Sched<[sched.Folded, ReadAfterLd]>;
+ EVEX_4V, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
def rmbk : AVX512BI<opc, MRMSrcMem,
(outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1,
_.ScalarMemOp:$src2),
(X86VBroadcast
(_.ScalarLdFrag addr:$src2)))))]>,
EVEX_4V, EVEX_K, EVEX_B,
- Sched<[sched.Folded, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
}
multiclass avx512_icmp_packed_vl<bits<8> opc, string OpcodeStr, PatFrag OpNode,
(_.VT _.RC:$src1),
(_.VT (bitconvert (_.LdFrag addr:$src2))),
cond)))]>,
- EVEX_4V, Sched<[sched.Folded, ReadAfterLd]>;
+ EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
let isCommutable = 1 in
def rrik : AVX512AIi8<opc, MRMSrcReg,
(outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2,
(_.VT (bitconvert
(_.LdFrag addr:$src2))),
cond))))]>,
- EVEX_4V, EVEX_K, Sched<[sched.Folded, ReadAfterLd]>;
+ EVEX_4V, EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>;
// Accept explicit immediate argument form instead of comparison code.
let isAsmParserOnly = 1, hasSideEffects = 0 in {
(outs _.KRC:$dst), (ins _.RC:$src1, _.MemOp:$src2, u8imm:$cc),
!strconcat("vpcmp", Suffix, "\t{$cc, $src2, $src1, $dst|",
"$dst, $src1, $src2, $cc}"), []>,
- EVEX_4V, Sched<[sched.Folded, ReadAfterLd]>, NotMemoryFoldable;
+ EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>, NotMemoryFoldable;
def rrik_alt : AVX512AIi8<opc, MRMSrcReg,
(outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2,
u8imm:$cc),
!strconcat("vpcmp", Suffix,
"\t{$cc, $src2, $src1, $dst {${mask}}|",
"$dst {${mask}}, $src1, $src2, $cc}"), []>,
- EVEX_4V, EVEX_K, Sched<[sched.Folded, ReadAfterLd]>,
+ EVEX_4V, EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>,
NotMemoryFoldable;
}
(X86VBroadcast
(_.ScalarLdFrag addr:$src2)),
cond)))]>,
- EVEX_4V, EVEX_B, Sched<[sched.Folded, ReadAfterLd]>;
+ EVEX_4V, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
def rmibk : AVX512AIi8<opc, MRMSrcMem,
(outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1,
_.ScalarMemOp:$src2, AVX512ICC:$cc),
(X86VBroadcast
(_.ScalarLdFrag addr:$src2)),
cond))))]>,
- EVEX_4V, EVEX_K, EVEX_B, Sched<[sched.Folded, ReadAfterLd]>;
+ EVEX_4V, EVEX_K, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
// Accept explicit immediate argument form instead of comparison code.
let isAsmParserOnly = 1, hasSideEffects = 0, mayLoad = 1 in {
!strconcat("vpcmp", Suffix,
"\t{$cc, ${src2}", _.BroadcastStr, ", $src1, $dst|",
"$dst, $src1, ${src2}", _.BroadcastStr, ", $cc}"), []>,
- EVEX_4V, EVEX_B, Sched<[sched.Folded, ReadAfterLd]>,
+ EVEX_4V, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>,
NotMemoryFoldable;
def rmibk_alt : AVX512AIi8<opc, MRMSrcMem,
(outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1,
!strconcat("vpcmp", Suffix,
"\t{$cc, ${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}|",
"$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, ", $cc}"), []>,
- EVEX_4V, EVEX_K, EVEX_B, Sched<[sched.Folded, ReadAfterLd]>,
+ EVEX_4V, EVEX_K, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>,
NotMemoryFoldable;
}
(X86cmpm (_.VT _.RC:$src1),
(_.VT (bitconvert (_.LdFrag addr:$src2))),
imm:$cc)>,
- Sched<[sched.Folded, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
defm rmbi : AVX512_maskable_cmp<0xC2, MRMSrcMem, _,
(outs _.KRC:$dst),
(X86cmpm (_.VT _.RC:$src1),
(_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src2))),
imm:$cc)>,
- EVEX_B, Sched<[sched.Folded, ReadAfterLd]>;
+ EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
// Accept explicit immediate argument form instead of comparison code.
let isAsmParserOnly = 1, hasSideEffects = 0 in {
defm rri_alt : AVX512_maskable_cmp_alt<0xC2, MRMSrcReg, _,
(ins _.RC:$src1, _.MemOp:$src2, u8imm:$cc),
"vcmp"#_.Suffix,
"$cc, $src2, $src1", "$src1, $src2, $cc">,
- Sched<[sched.Folded, ReadAfterLd]>,
+ Sched<[sched.Folded, sched.ReadAfterFold]>,
NotMemoryFoldable;
defm rmbi_alt : AVX512_maskable_cmp_alt<0xC2, MRMSrcMem, _,
"vcmp"#_.Suffix,
"$cc, ${src2}"##_.BroadcastStr##", $src1",
"$src1, ${src2}"##_.BroadcastStr##", $cc">,
- EVEX_B, Sched<[sched.Folded, ReadAfterLd]>,
+ EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>,
NotMemoryFoldable;
}
}
[(set _.KRC:$dst,
(OpNode _.ScalarIntMemCPat:$src1,
(i32 imm:$src2)))]>,
- Sched<[sched.Folded, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
def rmk : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
(ins _.KRCWM:$mask, _.IntScalarMemOp:$src1, i32u8imm:$src2),
OpcodeStr##_.Suffix##
[(set _.KRC:$dst,(and _.KRCWM:$mask,
(OpNode _.ScalarIntMemCPat:$src1,
(i32 imm:$src2))))]>,
- EVEX_K, Sched<[sched.Folded, ReadAfterLd]>;
+ EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>;
}
}
[(set _.KRC:$dst,(OpNode
(_.VT (bitconvert (_.LdFrag addr:$src1))),
(i32 imm:$src2)))]>,
- Sched<[sched.Folded, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
def rmk : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
(ins _.KRCWM:$mask, _.MemOp:$src1, i32u8imm:$src2),
OpcodeStr##_.Suffix##mem#
[(set _.KRC:$dst, (and _.KRCWM:$mask, (OpNode
(_.VT (bitconvert (_.LdFrag addr:$src1))),
(i32 imm:$src2))))]>,
- EVEX_K, Sched<[sched.Folded, ReadAfterLd]>;
+ EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>;
def rmb : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
(ins _.ScalarMemOp:$src1, i32u8imm:$src2),
OpcodeStr##_.Suffix##broadcast##"\t{$src2, ${src1}"##
(_.VT (X86VBroadcast
(_.ScalarLdFrag addr:$src1))),
(i32 imm:$src2)))]>,
- EVEX_B, Sched<[sched.Folded, ReadAfterLd]>;
+ EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
def rmbk : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
(ins _.KRCWM:$mask, _.ScalarMemOp:$src1, i32u8imm:$src2),
OpcodeStr##_.Suffix##broadcast##"\t{$src2, ${src1}"##
(_.VT (X86VBroadcast
(_.ScalarLdFrag addr:$src1))),
(i32 imm:$src2))))]>,
- EVEX_B, EVEX_K, Sched<[sched.Folded, ReadAfterLd]>;
+ EVEX_B, EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>;
}
}
(_.VT (OpNode _.RC:$src1,
(bitconvert (_.LdFrag addr:$src2))))>,
AVX512BIBase, EVEX_4V,
- Sched<[sched.Folded, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
}
multiclass avx512_binop_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode,
(X86VBroadcast
(_.ScalarLdFrag addr:$src2))))>,
AVX512BIBase, EVEX_4V, EVEX_B,
- Sched<[sched.Folded, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
}
multiclass avx512_binop_rm_vl<bits<8> opc, string OpcodeStr, SDNode OpNode,
(_Dst.VT (OpNode (_Src.VT _Src.RC:$src1),
(bitconvert (_Src.LdFrag addr:$src2))))>,
AVX512BIBase, EVEX_4V,
- Sched<[sched.Folded, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
defm rmb : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
(ins _Src.RC:$src1, _Brdct.ScalarMemOp:$src2),
(_Brdct.VT (X86VBroadcast
(_Brdct.ScalarLdFrag addr:$src2))))))>,
AVX512BIBase, EVEX_4V, EVEX_B,
- Sched<[sched.Folded, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
}
defm VPADD : avx512_binop_rm_vl_all<0xFC, 0xFD, 0xFE, 0xD4, "vpadd", add,
(_Src.VT (X86VBroadcast
(_Src.ScalarLdFrag addr:$src2))))))>,
EVEX_4V, EVEX_B, EVEX_CD8<_Src.EltSize, CD8VF>,
- Sched<[sched.Folded, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
}
multiclass avx512_packs_rm<bits<8> opc, string OpcodeStr,
(_Dst.VT (OpNode (_Src.VT _Src.RC:$src1),
(bitconvert (_Src.LdFrag addr:$src2))))>,
EVEX_4V, EVEX_CD8<_Src.EltSize, CD8VF>,
- Sched<[sched.Folded, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
}
multiclass avx512_packs_all_i32_i16<bits<8> opc, string OpcodeStr,
(_.VT (bitconvert (_.i64VT (OpNodeMsk _.RC:$src1,
(bitconvert (_.LdFrag addr:$src2))))))>,
AVX512BIBase, EVEX_4V,
- Sched<[sched.Folded, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
}
// OpNodeMsk is the OpNode to use where element size is important. So use
(_.VT (X86VBroadcast
(_.ScalarLdFrag addr:$src2))))))))>,
AVX512BIBase, EVEX_4V, EVEX_B,
- Sched<[sched.Folded, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
}
multiclass avx512_logic_rmb_vl<bits<8> opc, string OpcodeStr,
(_.VT (VecNode _.RC:$src1,
_.ScalarIntMemCPat:$src2,
(i32 FROUND_CURRENT)))>,
- Sched<[sched.Folded, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
let isCodeGenOnly = 1, Predicates = [HasAVX512] in {
def rr : I< opc, MRMSrcReg, (outs _.FRC:$dst),
(ins _.FRC:$src1, _.FRC:$src2),
OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set _.FRC:$dst, (OpNode _.FRC:$src1,
(_.ScalarLdFrag addr:$src2)))]>,
- Sched<[sched.Folded, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
}
}
}
"$src2, $src1", "$src1, $src2",
(_.VT (VecNode _.RC:$src1,
_.ScalarIntMemCPat:$src2))>,
- Sched<[sched.Folded, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
let isCodeGenOnly = 1, Predicates = [HasAVX512] in {
def rr : I< opc, MRMSrcReg, (outs _.FRC:$dst),
OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set _.FRC:$dst, (OpNode _.FRC:$src1,
(_.ScalarLdFrag addr:$src2)))]>,
- Sched<[sched.Folded, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
}
defm rrb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set _.FRC:$dst, (OpNode _.FRC:$src1,
(_.ScalarLdFrag addr:$src2)))]>,
- Sched<[sched.Folded, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
}
}
defm VMINCSSZ : avx512_comutable_binop_s<0x5D, "vminss", f32x_info, X86fminc,
(ins _.RC:$src1, _.MemOp:$src2), OpcodeStr##_.Suffix,
"$src2, $src1", "$src1, $src2",
(OpNode _.RC:$src1, (_.LdFrag addr:$src2))>,
- EVEX_4V, Sched<[sched.Folded, ReadAfterLd]>;
+ EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
defm rmb: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr##_.Suffix,
"${src2}"##_.BroadcastStr##", $src1",
(OpNode _.RC:$src1, (_.VT (X86VBroadcast
(_.ScalarLdFrag addr:$src2))))>,
EVEX_4V, EVEX_B,
- Sched<[sched.Folded, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
}
}
}
(ins _.RC:$src1, _.MemOp:$src2), OpcodeStr##_.Suffix,
"$src2, $src1", "$src1, $src2",
(OpNode _.RC:$src1, (_.LdFrag addr:$src2), (i32 FROUND_CURRENT))>,
- EVEX_4V, Sched<[sched.Folded, ReadAfterLd]>;
+ EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
defm rmb: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr##_.Suffix,
"${src2}"##_.BroadcastStr##", $src1",
(OpNode _.RC:$src1, (_.VT (X86VBroadcast
(_.ScalarLdFrag addr:$src2))),
(i32 FROUND_CURRENT))>,
- EVEX_4V, EVEX_B, Sched<[sched.Folded, ReadAfterLd]>;
+ EVEX_4V, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
}
}
"$src2, $src1", "$src1, $src2",
(OpNode _.RC:$src1, _.ScalarIntMemCPat:$src2,
(i32 FROUND_CURRENT))>,
- Sched<[sched.Folded, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
}
}
(bitconvert (_.LdFrag addr:$src2))))),
_.ImmAllZerosV)>,
EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
- Sched<[sched.Folded, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
}
// Patterns for compare with 0 that just use the same source twice.
(_.ScalarLdFrag addr:$src2))),
_.ImmAllZerosV)>,
EVEX_B, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
- Sched<[sched.Folded, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
}
// Use 512bit version to implement 128/256 bit in case NoVLX.
"$src2, $src1", "$src1, $src2",
(_.VT (OpNode _.RC:$src1, (bc_frag (loadv2i64 addr:$src2))))>,
AVX512BIBase,
- EVEX_4V, Sched<[sched.Folded, ReadAfterLd]>;
+ EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
}
}
(_.VT (OpNode _.RC:$src1,
(_.VT (bitconvert (_.LdFrag addr:$src2)))))>,
AVX5128IBase, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
- Sched<[sched.Folded, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
}
}
(_.VT (OpNode _.RC:$src1, (_.VT (X86VBroadcast
(_.ScalarLdFrag addr:$src2)))))>,
AVX5128IBase, EVEX_B, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
- Sched<[sched.Folded, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
}
multiclass avx512_var_shift_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
_.RC:$src1,
(Ctrl.VT (bitconvert(Ctrl.LdFrag addr:$src2)))))>,
T8PD, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
- Sched<[sched.Folded, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
defm rmb: AVX512_maskable<OpcVar, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr,
"${src2}"##_.BroadcastStr##", $src1",
(Ctrl.VT (X86VBroadcast
(Ctrl.ScalarLdFrag addr:$src2)))))>,
T8PD, EVEX_4V, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
- Sched<[sched.Folded, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
}
multiclass avx512_permil_vec_common<string OpcodeStr, bits<8> OpcVar,
(OpNode _.RC:$src1,
(_.VT (bitconvert
(v2f64 (scalar_to_vector (loadf64 addr:$src2)))))))]>,
- Sched<[SchedWriteFShuffle.XMM.Folded, ReadAfterLd]>, EVEX_4V;
+ Sched<[SchedWriteFShuffle.XMM.Folded, SchedWriteFShuffle.XMM.ReadAfterFold]>, EVEX_4V;
}
// No patterns for MOVLPS/MOVHPS as the Movlhps node should only be created in
(ins _.RC:$src2, _.MemOp:$src3),
OpcodeStr, "$src3, $src2", "$src2, $src3",
(_.VT (OpNode _.RC:$src2, _.RC:$src1, (_.LdFrag addr:$src3))), 1, 0>,
- AVX512FMA3Base, Sched<[sched.Folded, ReadAfterLd]>;
+ AVX512FMA3Base, Sched<[sched.Folded, sched.ReadAfterFold]>;
defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.RC:$src2, _.ScalarMemOp:$src3),
!strconcat("$src2, ${src3}", _.BroadcastStr ),
(OpNode _.RC:$src2,
_.RC:$src1,(_.VT (X86VBroadcast (_.ScalarLdFrag addr:$src3)))), 1, 0>,
- AVX512FMA3Base, EVEX_B, Sched<[sched.Folded, ReadAfterLd]>;
+ AVX512FMA3Base, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
}
}
(ins _.RC:$src2, _.MemOp:$src3),
OpcodeStr, "$src3, $src2", "$src2, $src3",
(_.VT (OpNode _.RC:$src2, (_.LdFrag addr:$src3), _.RC:$src1)), 1, 0>,
- AVX512FMA3Base, Sched<[sched.Folded, ReadAfterLd]>;
+ AVX512FMA3Base, Sched<[sched.Folded, sched.ReadAfterFold]>;
defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.RC:$src2, _.ScalarMemOp:$src3),
(_.VT (OpNode _.RC:$src2,
(_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src3))),
_.RC:$src1)), 1, 0>, AVX512FMA3Base, EVEX_B,
- Sched<[sched.Folded, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
}
}
(ins _.RC:$src2, _.MemOp:$src3),
OpcodeStr, "$src3, $src2", "$src2, $src3",
(_.VT (OpNode (_.LdFrag addr:$src3), _.RC:$src1, _.RC:$src2)), 1, 0>,
- AVX512FMA3Base, Sched<[sched.Folded, ReadAfterLd]>;
+ AVX512FMA3Base, Sched<[sched.Folded, sched.ReadAfterFold]>;
// Pattern is 312 order so that the load is in a different place from the
// 213 and 231 patterns this helps tablegen's duplicate pattern detection.
"$src2, ${src3}"##_.BroadcastStr,
(_.VT (OpNode (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src3))),
_.RC:$src1, _.RC:$src2)), 1, 0>,
- AVX512FMA3Base, EVEX_B, Sched<[sched.Folded, ReadAfterLd]>;
+ AVX512FMA3Base, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
}
}
defm m_Int: AVX512_maskable_3src_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.RC:$src2, _.IntScalarMemOp:$src3), OpcodeStr,
"$src3, $src2", "$src2, $src3", (null_frag), 1, 1>,
- AVX512FMA3Base, Sched<[SchedWriteFMA.Scl.Folded, ReadAfterLd]>;
+ AVX512FMA3Base, Sched<[SchedWriteFMA.Scl.Folded, SchedWriteFMA.Scl.ReadAfterFold]>;
defm rb_Int: AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc),
(ins _.FRC:$src1, _.FRC:$src2, _.ScalarMemOp:$src3),
!strconcat(OpcodeStr,
"\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
- [RHS_m]>, Sched<[SchedWriteFMA.Scl.Folded, ReadAfterLd]>;
+ [RHS_m]>, Sched<[SchedWriteFMA.Scl.Folded, SchedWriteFMA.Scl.ReadAfterFold]>;
def rb : AVX512FMA3S<opc, MRMSrcReg, (outs _.FRC:$dst),
(ins _.FRC:$src1, _.FRC:$src2, _.FRC:$src3, AVX512RC:$rc),
(ins _.RC:$src2, _.MemOp:$src3),
OpcodeStr, "$src3, $src2", "$src2, $src3",
(_.VT (OpNode _.RC:$src2, (_.LdFrag addr:$src3), _.RC:$src1))>,
- AVX512FMA3Base, Sched<[sched.Folded, ReadAfterLd]>;
+ AVX512FMA3Base, Sched<[sched.Folded, sched.ReadAfterFold]>;
defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.RC:$src2, _.ScalarMemOp:$src3),
(OpNode _.RC:$src2,
(_.VT (X86VBroadcast (_.ScalarLdFrag addr:$src3))),
_.RC:$src1)>,
- AVX512FMA3Base, EVEX_B, Sched<[sched.Folded, ReadAfterLd]>;
+ AVX512FMA3Base, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
}
}
} // Constraints = "$src1 = $dst"
def rm : SI<opc, MRMSrcMem, (outs DstVT.FRC:$dst),
(ins DstVT.FRC:$src1, x86memop:$src),
!strconcat(asm,"\t{$src, $src1, $dst|$dst, $src1, $src}"), []>,
- EVEX_4V, Sched<[sched.Folded, ReadAfterLd]>;
+ EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
} // hasSideEffects = 0
let isCodeGenOnly = 1 in {
def rr_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst),
(OpNode (DstVT.VT DstVT.RC:$src1),
(ld_frag addr:$src2),
(i32 FROUND_CURRENT)))]>,
- EVEX_4V, Sched<[sched.Folded, ReadAfterLd]>;
+ EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
}//isCodeGenOnly = 1
}
!strconcat(asm,"\t{$src, $dst|$dst, $src}"),
[(set DstVT.RC:$dst, (OpNode
(SrcVT.VT SrcVT.ScalarIntMemCPat:$src)))]>,
- EVEX, VEX_LIG, Sched<[sched.Folded, ReadAfterLd]>;
+ EVEX, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>;
def : InstAlias<"v" # asm # aliasStr # "\t{$src, $dst|$dst, $src}",
(!cast<Instruction>(NAME # "rr_Int") DstVT.RC:$dst, SrcVT.RC:$src), 0, "att">;
def rm : AVX512<opc, MRMSrcMem, (outs _DstRC.RC:$dst), (ins _SrcRC.ScalarMemOp:$src),
!strconcat(asm,"\t{$src, $dst|$dst, $src}"),
[(set _DstRC.RC:$dst, (OpNode (_SrcRC.ScalarLdFrag addr:$src)))]>,
- EVEX, Sched<[sched.Folded, ReadAfterLd]>;
+ EVEX, Sched<[sched.Folded, sched.ReadAfterFold]>;
}
def rr_Int : AVX512<opc, MRMSrcReg, (outs _DstRC.RC:$dst), (ins _SrcRC.RC:$src),
!strconcat(asm,"\t{$src, $dst|$dst, $src}"),
[(set _DstRC.RC:$dst,
(OpNodeInt (_SrcRC.VT _SrcRC.ScalarIntMemCPat:$src)))]>,
- EVEX, VEX_LIG, Sched<[sched.Folded, ReadAfterLd]>;
+ EVEX, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>;
def : InstAlias<asm # aliasStr # "\t{$src, $dst|$dst, $src}",
(!cast<Instruction>(NAME # "rr_Int") _DstRC.RC:$dst, _SrcRC.RC:$src), 0, "att">;
(_Src.VT _Src.ScalarIntMemCPat:$src2),
(i32 FROUND_CURRENT)))>,
EVEX_4V, VEX_LIG,
- Sched<[sched.Folded, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
let isCodeGenOnly = 1, hasSideEffects = 0 in {
def rr : I<opc, MRMSrcReg, (outs _.FRC:$dst),
def rm : I<opc, MRMSrcMem, (outs _.FRC:$dst),
(ins _.FRC:$src1, _Src.ScalarMemOp:$src2),
OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
- EVEX_4V, VEX_LIG, Sched<[sched.Folded, ReadAfterLd]>;
+ EVEX_4V, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>;
}
}
"$src2, $src1", "$src1, $src2",
(OpNode (_.VT _.RC:$src1),
_.ScalarIntMemCPat:$src2)>, EVEX_4V,
- Sched<[sched.Folded, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
}
}
(ins _.MemOp:$src), OpcodeStr, "$src", "$src",
(OpNode (_.VT
(bitconvert (_.LdFrag addr:$src))))>, EVEX, T8PD,
- Sched<[sched.Folded, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
defm mb: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.ScalarMemOp:$src), OpcodeStr,
"${src}"##_.BroadcastStr, "${src}"##_.BroadcastStr,
(OpNode (_.VT
(X86VBroadcast (_.ScalarLdFrag addr:$src))))>,
- EVEX, T8PD, EVEX_B, Sched<[sched.Folded, ReadAfterLd]>;
+ EVEX, T8PD, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
}
}
"$src2, $src1", "$src1, $src2",
(OpNode (_.VT _.RC:$src1), _.ScalarIntMemCPat:$src2,
(i32 FROUND_CURRENT))>,
- Sched<[sched.Folded, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
}
}
(OpNode (_.VT
(bitconvert (_.LdFrag addr:$src))),
(i32 FROUND_CURRENT))>,
- Sched<[sched.Folded, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
defm mb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.ScalarMemOp:$src), OpcodeStr,
(OpNode (_.VT
(X86VBroadcast (_.ScalarLdFrag addr:$src))),
(i32 FROUND_CURRENT))>, EVEX_B,
- Sched<[sched.Folded, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
}
}
multiclass avx512_fp28_p_round<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
(ins _.MemOp:$src), OpcodeStr, "$src", "$src",
(fsqrt (_.VT
(bitconvert (_.LdFrag addr:$src))))>, EVEX,
- Sched<[sched.Folded, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
defm mb: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.ScalarMemOp:$src), OpcodeStr,
"${src}"##_.BroadcastStr, "${src}"##_.BroadcastStr,
(fsqrt (_.VT
(X86VBroadcast (_.ScalarLdFrag addr:$src))))>,
- EVEX, EVEX_B, Sched<[sched.Folded, ReadAfterLd]>;
+ EVEX, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
}
}
(X86fsqrtRnds (_.VT _.RC:$src1),
_.ScalarIntMemCPat:$src2,
(i32 FROUND_CURRENT))>,
- Sched<[sched.Folded, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
defm rb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src1, _.RC:$src2, AVX512RC:$rc), OpcodeStr,
"$rc, $src2, $src1", "$src1, $src2, $rc",
def m : I<opc, MRMSrcMem, (outs _.FRC:$dst),
(ins _.FRC:$src1, _.ScalarMemOp:$src2),
OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
- Sched<[sched.Folded, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
}
}
"$src3, $src2, $src1", "$src1, $src2, $src3",
(_.VT (X86RndScales _.RC:$src1,
_.ScalarIntMemCPat:$src2, (i32 imm:$src3)))>,
- Sched<[sched.Folded, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
let isCodeGenOnly = 1, hasSideEffects = 0, Predicates = [HasAVX512] in {
def r : I<opc, MRMSrcReg, (outs _.FRC:$dst),
def m : I<opc, MRMSrcMem, (outs _.FRC:$dst),
(ins _.FRC:$src1, _.ScalarMemOp:$src2, i32u8imm:$src3),
OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
- []>, Sched<[sched.Folded, ReadAfterLd]>;
+ []>, Sched<[sched.Folded, sched.ReadAfterFold]>;
}
}
(_.VT (X86expand (_.VT (bitconvert
(_.LdFrag addr:$src1)))))>,
AVX5128IBase, EVEX_CD8<_.EltSize, CD8VT1>,
- Sched<[sched.Folded, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
}
multiclass expand_by_vec_width_lowering<X86VectorVTInfo _, string Name> {
OpcodeStr##_.Suffix, "$src2, $src1", "$src1, $src2",
(OpNode (_.VT (bitconvert (_.LdFrag addr:$src1))),
(i32 imm:$src2))>,
- Sched<[sched.Folded, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.ScalarMemOp:$src1, i32u8imm:$src2),
OpcodeStr##_.Suffix, "$src2, ${src1}"##_.BroadcastStr,
"${src1}"##_.BroadcastStr##", $src2",
(OpNode (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src1))),
(i32 imm:$src2))>, EVEX_B,
- Sched<[sched.Folded, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
}
}
(OpNode (_.VT _.RC:$src1),
(_.VT (bitconvert (_.LdFrag addr:$src2))),
(i32 imm:$src3))>,
- Sched<[sched.Folded, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.RC:$src1, _.ScalarMemOp:$src2, i32u8imm:$src3),
OpcodeStr, "$src3, ${src2}"##_.BroadcastStr##", $src1",
(OpNode (_.VT _.RC:$src1),
(_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src2))),
(i32 imm:$src3))>, EVEX_B,
- Sched<[sched.Folded, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
}
}
(SrcInfo.VT (bitconvert
(SrcInfo.LdFrag addr:$src2))),
(i8 imm:$src3)))>,
- Sched<[sched.Folded, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
}
}
(OpNode (_.VT _.RC:$src1),
(_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src2))),
(i8 imm:$src3))>, EVEX_B,
- Sched<[sched.Folded, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
}
//handle scalar instruction reg_vec1 = op(reg_vec2,reg_vec3,imm)
(_.VT (scalar_to_vector
(_.ScalarLdFrag addr:$src2))),
(i32 imm:$src3))>,
- Sched<[sched.Folded, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
}
}
(CastInfo.VT (X86Shuf128 _.RC:$src1,
(bitconvert (_.LdFrag addr:$src2)),
(i8 imm:$src3)))))>,
- Sched<[sched.Folded, ReadAfterLd]>,
+ Sched<[sched.Folded, sched.ReadAfterFold]>,
EVEX2VEXOverride<EVEX2VEXOvrd#"rm">;
defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$src3),
(X86Shuf128 _.RC:$src1,
(X86VBroadcast (_.ScalarLdFrag addr:$src2)),
(i8 imm:$src3)))))>, EVEX_B,
- Sched<[sched.Folded, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
}
}
(_.VT (X86VAlign _.RC:$src1,
(bitconvert (_.LdFrag addr:$src2)),
(i8 imm:$src3)))>,
- Sched<[sched.Folded, ReadAfterLd]>,
+ Sched<[sched.Folded, sched.ReadAfterFold]>,
EVEX2VEXOverride<"VPALIGNRrmi">;
defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
(X86VAlign _.RC:$src1,
(_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src2))),
(i8 imm:$src3))>, EVEX_B,
- Sched<[sched.Folded, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
}
}
OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
[(set _.RC:$dst,
(_.VT (OpNode _.RC:$src1, (LdFrag addr:$src2), imm:$src3)))]>,
- EVEX_4V, EVEX_CD8<_.EltSize, CD8VT1>, Sched<[WriteVecInsertLd, ReadAfterLd]>;
+ EVEX_4V, EVEX_CD8<_.EltSize, CD8VT1>, Sched<[WriteVecInsert.Folded, WriteVecInsert.ReadAfterFold]>;
}
multiclass avx512_insert_elt_bw<bits<8> opc, string OpcodeStr, SDNode OpNode,
[(set _.RC:$dst,(_.VT (OpNode
(_.VT (bitconvert (_.LdFrag addr:$src1))),
(i8 imm:$src2))))]>,
- Sched<[sched.Folded, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
}
multiclass avx512_shift_packed_all<bits<8> opc, SDNode OpNode, Format MRMr,
(OpNode (_src.VT _src.RC:$src1),
(_src.VT (bitconvert
(_src.LdFrag addr:$src2))))))]>,
- Sched<[sched.Folded, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
}
multiclass avx512_psadbw_packed_all<bits<8> opc, SDNode OpNode,
(_.VT (bitconvert (_.LdFrag addr:$src3))),
(i8 imm:$src4)), 1, 0>,
AVX512AIi8Base, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
- Sched<[sched.Folded, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
defm rmbi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.RC:$src2, _.ScalarMemOp:$src3, u8imm:$src4),
OpcodeStr, "$src4, ${src3}"##_.BroadcastStr##", $src2",
(_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src3))),
(i8 imm:$src4)), 1, 0>, EVEX_B,
AVX512AIi8Base, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
- Sched<[sched.Folded, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
}// Constraints = "$src1 = $dst"
// Additional patterns for matching passthru operand in other positions.
(TblVT.VT (bitconvert (TblVT.LdFrag addr:$src3))),
(i32 imm:$src4),
(i32 FROUND_CURRENT))>,
- Sched<[sched.Folded, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
defm rmbi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.RC:$src2, _.ScalarMemOp:$src3, i32u8imm:$src4),
OpcodeStr##_.Suffix, "$src4, ${src3}"##_.BroadcastStr##", $src2",
(TblVT.VT (X86VBroadcast(TblVT.ScalarLdFrag addr:$src3))),
(i32 imm:$src4),
(i32 FROUND_CURRENT))>,
- EVEX_B, Sched<[sched.Folded, ReadAfterLd]>;
+ EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
} // Constraints = "$src1 = $dst"
}
(_src3VT.VT _src3VT.RC:$src3),
(i32 imm:$src4),
(i32 FROUND_NO_EXC))>,
- EVEX_B, Sched<[sched.Folded, ReadAfterLd]>;
+ EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
defm rmi : AVX512_maskable_3src_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.RC:$src2, _.ScalarMemOp:$src3, i32u8imm:$src4),
OpcodeStr##_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4",
(_src3VT.ScalarLdFrag addr:$src3))),
(i32 imm:$src4),
(i32 FROUND_CURRENT))>,
- Sched<[sched.Folded, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
}
}
(VTI.VT (OpNode VTI.RC:$src1, VTI.RC:$src2,
(VTI.VT (bitconvert (VTI.LdFrag addr:$src3)))))>,
AVX512FMA3Base,
- Sched<[sched.Folded, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
}
}
(OpNode VTI.RC:$src1, VTI.RC:$src2,
(VTI.VT (X86VBroadcast (VTI.ScalarLdFrag addr:$src3))))>,
AVX512FMA3Base, EVEX_B,
- Sched<[sched.Folded, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
}
multiclass VBMI2_shift_var_rm_common<bits<8> Op, string OpStr, SDNode OpNode,
(VTI.VT (bitconvert
(VTI.LdFrag addr:$src3)))))>,
EVEX_4V, EVEX_CD8<32, CD8VF>, T8PD,
- Sched<[sched.Folded, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
defm mb : AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
(ins VTI.RC:$src2, VTI.ScalarMemOp:$src3),
OpStr, "${src3}"##VTI.BroadcastStr##", $src2",
(VTI.VT (X86VBroadcast
(VTI.ScalarLdFrag addr:$src3))))>,
EVEX_4V, EVEX_CD8<32, CD8VF>, EVEX_B,
- T8PD, Sched<[sched.Folded, ReadAfterLd]>;
+ T8PD, Sched<[sched.Folded, sched.ReadAfterFold]>;
}
multiclass VNNI_common<bits<8> Op, string OpStr, SDNode OpNode,
(X86Vpshufbitqmb (VTI.VT VTI.RC:$src1),
(VTI.VT (bitconvert (VTI.LdFrag addr:$src2))))>,
EVEX_4V, EVEX_CD8<8, CD8VF>, T8PD,
- Sched<[sched.Folded, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
}
multiclass VPSHUFBITQMB_common<X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTI> {
(OpNode (VTI.VT VTI.RC:$src1),
(bitconvert (BcstVTI.VT (X86VBroadcast (loadi64 addr:$src2)))),
(i8 imm:$src3))>, EVEX_B,
- Sched<[sched.Folded, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
}
multiclass GF2P8AFFINE_avx512_common<bits<8> Op, string OpStr, SDNode OpNode,
// (and possibly third) value from a register.
// This is used for instructions that put the memory operands before other
// uses.
-class SchedLoadReg<SchedWrite SW> : Sched<[SW,
+class SchedLoadReg<X86FoldableSchedWrite Sched> : Sched<[Sched.Folded,
// Memory operand.
ReadDefault, ReadDefault, ReadDefault, ReadDefault, ReadDefault,
// Register reads (implicit or explicit).
- ReadAfterLd, ReadAfterLd]>;
+ Sched.ReadAfterFold, Sched.ReadAfterFold]>;
// Extra precision multiplication
// This probably ought to be moved to a def : Pat<> if the
// syntax can be accepted.
[(set AL, (mul AL, (loadi8 addr:$src))),
- (implicit EFLAGS)]>, SchedLoadReg<WriteIMul8.Folded>;
+ (implicit EFLAGS)]>, SchedLoadReg<WriteIMul8>;
// AX,DX = AX*[mem16]
let mayLoad = 1, hasSideEffects = 0 in {
let Defs = [AX,DX,EFLAGS], Uses = [AX] in
def MUL16m : I<0xF7, MRM4m, (outs), (ins i16mem:$src),
- "mul{w}\t$src", []>, OpSize16, SchedLoadReg<WriteIMul16.Folded>;
+ "mul{w}\t$src", []>, OpSize16, SchedLoadReg<WriteIMul16>;
// EAX,EDX = EAX*[mem32]
let Defs = [EAX,EDX,EFLAGS], Uses = [EAX] in
def MUL32m : I<0xF7, MRM4m, (outs), (ins i32mem:$src),
- "mul{l}\t$src", []>, OpSize32, SchedLoadReg<WriteIMul32.Folded>;
+ "mul{l}\t$src", []>, OpSize32, SchedLoadReg<WriteIMul32>;
// RAX,RDX = RAX*[mem64]
let Defs = [RAX,RDX,EFLAGS], Uses = [RAX] in
def MUL64m : RI<0xF7, MRM4m, (outs), (ins i64mem:$src),
- "mul{q}\t$src", []>, SchedLoadReg<WriteIMul64.Folded>,
+ "mul{q}\t$src", []>, SchedLoadReg<WriteIMul64>,
Requires<[In64BitMode]>;
}
// AL,AH = AL*[mem8]
let Defs = [AL,EFLAGS,AX], Uses = [AL] in
def IMUL8m : I<0xF6, MRM5m, (outs), (ins i8mem :$src),
- "imul{b}\t$src", []>, SchedLoadReg<WriteIMul8.Folded>;
+ "imul{b}\t$src", []>, SchedLoadReg<WriteIMul8>;
// AX,DX = AX*[mem16]
let Defs = [AX,DX,EFLAGS], Uses = [AX] in
def IMUL16m : I<0xF7, MRM5m, (outs), (ins i16mem:$src),
- "imul{w}\t$src", []>, OpSize16, SchedLoadReg<WriteIMul16.Folded>;
+ "imul{w}\t$src", []>, OpSize16, SchedLoadReg<WriteIMul16>;
// EAX,EDX = EAX*[mem32]
let Defs = [EAX,EDX,EFLAGS], Uses = [EAX] in
def IMUL32m : I<0xF7, MRM5m, (outs), (ins i32mem:$src),
- "imul{l}\t$src", []>, OpSize32, SchedLoadReg<WriteIMul32.Folded>;
+ "imul{l}\t$src", []>, OpSize32, SchedLoadReg<WriteIMul32>;
// RAX,RDX = RAX*[mem64]
let Defs = [RAX,RDX,EFLAGS], Uses = [RAX] in
def IMUL64m : RI<0xF7, MRM5m, (outs), (ins i64mem:$src),
- "imul{q}\t$src", []>, SchedLoadReg<WriteIMul64.Folded>,
+ "imul{q}\t$src", []>, SchedLoadReg<WriteIMul64>,
Requires<[In64BitMode]>;
}
} // hasSideEffects
"imul{w}\t{$src2, $dst|$dst, $src2}",
[(set GR16:$dst, EFLAGS,
(X86smul_flag GR16:$src1, (loadi16 addr:$src2)))]>,
- Sched<[WriteIMul16Reg.Folded, ReadAfterLd]>, TB, OpSize16;
+ Sched<[WriteIMul16Reg.Folded, WriteIMul16Reg.ReadAfterFold]>, TB, OpSize16;
def IMUL32rm : I<0xAF, MRMSrcMem, (outs GR32:$dst),
(ins GR32:$src1, i32mem:$src2),
"imul{l}\t{$src2, $dst|$dst, $src2}",
[(set GR32:$dst, EFLAGS,
(X86smul_flag GR32:$src1, (loadi32 addr:$src2)))]>,
- Sched<[WriteIMul32Reg.Folded, ReadAfterLd]>, TB, OpSize32;
+ Sched<[WriteIMul32Reg.Folded, WriteIMul32Reg.ReadAfterFold]>, TB, OpSize32;
def IMUL64rm : RI<0xAF, MRMSrcMem, (outs GR64:$dst),
(ins GR64:$src1, i64mem:$src2),
"imul{q}\t{$src2, $dst|$dst, $src2}",
[(set GR64:$dst, EFLAGS,
(X86smul_flag GR64:$src1, (loadi64 addr:$src2)))]>,
- Sched<[WriteIMul64Reg.Folded, ReadAfterLd]>, TB;
+ Sched<[WriteIMul64Reg.Folded, WriteIMul32Reg.ReadAfterFold]>, TB;
} // Constraints = "$src1 = $dst"
} // Defs = [EFLAGS]
let mayLoad = 1 in {
let Defs = [AL,AH,EFLAGS], Uses = [AX] in
def DIV8m : I<0xF6, MRM6m, (outs), (ins i8mem:$src), // AX/[mem8] = AL,AH
- "div{b}\t$src", []>, SchedLoadReg<WriteDiv8.Folded>;
+ "div{b}\t$src", []>, SchedLoadReg<WriteDiv8>;
let Defs = [AX,DX,EFLAGS], Uses = [AX,DX] in
def DIV16m : I<0xF7, MRM6m, (outs), (ins i16mem:$src), // DX:AX/[mem16] = AX,DX
- "div{w}\t$src", []>, OpSize16, SchedLoadReg<WriteDiv16.Folded>;
+ "div{w}\t$src", []>, OpSize16, SchedLoadReg<WriteDiv16>;
let Defs = [EAX,EDX,EFLAGS], Uses = [EAX,EDX] in // EDX:EAX/[mem32] = EAX,EDX
def DIV32m : I<0xF7, MRM6m, (outs), (ins i32mem:$src),
- "div{l}\t$src", []>, SchedLoadReg<WriteDiv32.Folded>, OpSize32;
+ "div{l}\t$src", []>, SchedLoadReg<WriteDiv32>, OpSize32;
// RDX:RAX/[mem64] = RAX,RDX
let Defs = [RAX,RDX,EFLAGS], Uses = [RAX,RDX] in
def DIV64m : RI<0xF7, MRM6m, (outs), (ins i64mem:$src),
- "div{q}\t$src", []>, SchedLoadReg<WriteDiv64.Folded>,
+ "div{q}\t$src", []>, SchedLoadReg<WriteDiv64>,
Requires<[In64BitMode]>;
}
let mayLoad = 1 in {
let Defs = [AL,AH,EFLAGS], Uses = [AX] in
def IDIV8m : I<0xF6, MRM7m, (outs), (ins i8mem:$src), // AX/[mem8] = AL,AH
- "idiv{b}\t$src", []>, SchedLoadReg<WriteIDiv8.Folded>;
+ "idiv{b}\t$src", []>, SchedLoadReg<WriteIDiv8>;
let Defs = [AX,DX,EFLAGS], Uses = [AX,DX] in
def IDIV16m: I<0xF7, MRM7m, (outs), (ins i16mem:$src), // DX:AX/[mem16] = AX,DX
- "idiv{w}\t$src", []>, OpSize16, SchedLoadReg<WriteIDiv16.Folded>;
+ "idiv{w}\t$src", []>, OpSize16, SchedLoadReg<WriteIDiv16>;
let Defs = [EAX,EDX,EFLAGS], Uses = [EAX,EDX] in // EDX:EAX/[mem32] = EAX,EDX
def IDIV32m: I<0xF7, MRM7m, (outs), (ins i32mem:$src),
- "idiv{l}\t$src", []>, OpSize32, SchedLoadReg<WriteIDiv32.Folded>;
+ "idiv{l}\t$src", []>, OpSize32, SchedLoadReg<WriteIDiv32>;
let Defs = [RAX,RDX,EFLAGS], Uses = [RAX,RDX] in // RDX:RAX/[mem64] = RAX,RDX
def IDIV64m: RI<0xF7, MRM7m, (outs), (ins i64mem:$src),
- "idiv{q}\t$src", []>, SchedLoadReg<WriteIDiv64.Folded>,
+ "idiv{q}\t$src", []>, SchedLoadReg<WriteIDiv64>,
Requires<[In64BitMode]>;
}
} // hasSideEffects = 0
: ITy<opcode, MRMSrcMem, typeinfo, outlist,
(ins typeinfo.RegClass:$src1, typeinfo.MemOperand:$src2),
mnemonic, "{$src2, $src1|$src1, $src2}", pattern>,
- Sched<[sched.Folded, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
// BinOpRM_F - Instructions like "cmp reg, [mem]".
class BinOpRM_F<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
: BinOpMR<opcode, mnemonic, typeinfo,
[(set EFLAGS, (opnode (typeinfo.LoadNode addr:$dst),
typeinfo.RegClass:$src))]>,
- Sched<[WriteALULd, ReadDefault, ReadDefault, ReadDefault,
- ReadDefault, ReadDefault, ReadAfterLd]>;
+ Sched<[WriteALU.Folded, ReadDefault, ReadDefault, ReadDefault,
+ ReadDefault, ReadDefault, WriteALU.ReadAfterFold]>;
// BinOpMI - Instructions like "add [mem], imm".
class BinOpMI<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
: BinOpMI<opcode, mnemonic, typeinfo, f,
[(set EFLAGS, (opnode (typeinfo.LoadNode addr:$dst),
typeinfo.ImmOperator:$src))]>,
- Sched<[WriteALULd]>;
+ Sched<[WriteALU.Folded]>;
// BinOpMI8 - Instructions like "add [mem], imm8".
class BinOpMI8<string mnemonic, X86TypeInfo typeinfo,
: BinOpMI8<mnemonic, typeinfo, f,
[(set EFLAGS, (opnode (typeinfo.LoadNode addr:$dst),
typeinfo.Imm8Operator:$src))]>,
- Sched<[WriteALULd]>;
+ Sched<[WriteALU.Folded]>;
// BinOpAI - Instructions like "add %eax, %eax, imm", that imp-def EFLAGS.
class BinOpAI<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
!strconcat(mnemonic, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set RC:$dst, EFLAGS,
(X86and_flag (not RC:$src1), (ld_frag addr:$src2)))]>,
- Sched<[WriteALULd, ReadAfterLd]>;
+ Sched<[WriteALU.Folded, WriteALU.ReadAfterFold]>;
}
// Complexity is reduced to give and with immediate a chance to match first.
"adox{q}\t{$src2, $dst|$dst, $src2}", []>, T8XS;
} // SchedRW
- let mayLoad = 1, SchedRW = [WriteADCLd, ReadAfterLd] in {
+ let mayLoad = 1, SchedRW = [WriteADC.Folded, WriteADC.ReadAfterFold] in {
def ADCX32rm : I<0xF6, MRMSrcMem, (outs GR32:$dst),
(ins GR32:$src1, i32mem:$src2),
"adcx{l}\t{$src2, $dst|$dst, $src2}", []>, T8PD;
def ADOX64rm : RI<0xF6, MRMSrcMem, (outs GR64:$dst),
(ins GR64:$src1, i64mem:$src2),
"adox{q}\t{$src2, $dst|$dst, $src2}", []>, T8XS;
- } // mayLoad = 1, SchedRW = [WriteADCLd]
+ } // mayLoad, SchedRW
}
}
let Uses = [EFLAGS], Predicates = [HasCMov], Constraints = "$src1 = $dst",
- SchedRW = [Sched.Folded, ReadAfterLd] in {
+ SchedRW = [Sched.Folded, Sched.ReadAfterFold] in {
def NAME#16rm
: I<opc, MRMSrcMem, (outs GR16:$dst), (ins GR16:$src1, i16mem:$src2),
!strconcat(Mnemonic, "{w}\t{$src2, $dst|$dst, $src2}"),
"\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
[(set RC:$dst, (VT (Op RC:$src2, RC:$src1,
(MemFrag addr:$src3))))]>,
- Sched<[sched.Folded, ReadAfterLd, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold, sched.ReadAfterFold]>;
}
multiclass fma3p_rm_231<bits<8> opc, string OpcodeStr, RegisterClass RC,
"\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
[(set RC:$dst, (VT (Op RC:$src2, (MemFrag addr:$src3),
RC:$src1)))]>,
- Sched<[sched.Folded, ReadAfterLd, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold, sched.ReadAfterFold]>;
}
multiclass fma3p_rm_132<bits<8> opc, string OpcodeStr, RegisterClass RC,
"\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
[(set RC:$dst, (VT (Op (MemFrag addr:$src3), RC:$src1,
RC:$src2)))]>,
- Sched<[sched.Folded, ReadAfterLd, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold, sched.ReadAfterFold]>;
}
let Constraints = "$src1 = $dst", hasSideEffects = 0, isCommutable = 1 in
"\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
[(set RC:$dst,
(OpNode RC:$src2, RC:$src1, (load addr:$src3)))]>,
- Sched<[sched.Folded, ReadAfterLd, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold, sched.ReadAfterFold]>;
}
multiclass fma3s_rm_231<bits<8> opc, string OpcodeStr,
"\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
[(set RC:$dst,
(OpNode RC:$src2, (load addr:$src3), RC:$src1))]>,
- Sched<[sched.Folded, ReadAfterLd, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold, sched.ReadAfterFold]>;
}
multiclass fma3s_rm_132<bits<8> opc, string OpcodeStr,
"\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
[(set RC:$dst,
(OpNode (load addr:$src3), RC:$src1, RC:$src2))]>,
- Sched<[sched.Folded, ReadAfterLd, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold, sched.ReadAfterFold]>;
}
let Constraints = "$src1 = $dst", isCommutable = 1, hasSideEffects = 0 in
(ins RC:$src1, RC:$src2, memopr:$src3),
!strconcat(OpcodeStr,
"\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
- []>, Sched<[sched.Folded, ReadAfterLd, ReadAfterLd]>;
+ []>, Sched<[sched.Folded, sched.ReadAfterFold, sched.ReadAfterFold]>;
}
// The FMA 213 form is created for lowering of scalar FMA intrinscis
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
[(set RC:$dst, (OpNode RC:$src1, RC:$src2,
(mem_frag addr:$src3)))]>, VEX_W, VEX_LIG,
- Sched<[sched.Folded, ReadAfterLd, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold, sched.ReadAfterFold]>;
def mr : FMA4S<opc, MRMSrcMem, (outs RC:$dst),
(ins RC:$src1, x86memop:$src2, RC:$src3),
!strconcat(OpcodeStr,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
[(set RC:$dst,
(OpNode RC:$src1, (mem_frag addr:$src2), RC:$src3))]>, VEX_LIG,
- Sched<[sched.Folded, ReadAfterLd,
+ Sched<[sched.Folded, sched.ReadAfterFold,
// x86memop:$src2
ReadDefault, ReadDefault, ReadDefault, ReadDefault,
ReadDefault,
// RC:$src3
- ReadAfterLd]>;
+ sched.ReadAfterFold]>;
// For disassembler
let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in
def rr_REV : FMA4S<opc, MRMSrcReg, (outs RC:$dst),
!strconcat(OpcodeStr,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
[]>, VEX_W, VEX_LIG,
- Sched<[sched.Folded, ReadAfterLd, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold, sched.ReadAfterFold]>;
let mayLoad = 1 in
def mr_Int : FMA4S_Int<opc, MRMSrcMem, (outs VR128:$dst),
(ins VR128:$src1, memop:$src2, VR128:$src3),
!strconcat(OpcodeStr,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
[]>,
- VEX_LIG, Sched<[sched.Folded, ReadAfterLd,
+ VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold,
// memop:$src2
ReadDefault, ReadDefault, ReadDefault,
ReadDefault, ReadDefault,
// VR128::$src3
- ReadAfterLd]>;
+ sched.ReadAfterFold]>;
def rr_Int_REV : FMA4S_Int<opc, MRMSrcReg, (outs VR128:$dst),
(ins VR128:$src1, VR128:$src2, VR128:$src3),
!strconcat(OpcodeStr,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
[(set VR128:$dst, (OpNode VR128:$src1, VR128:$src2,
(ld_frag128 addr:$src3)))]>, VEX_W,
- Sched<[sched.XMM.Folded, ReadAfterLd, ReadAfterLd]>;
+ Sched<[sched.XMM.Folded, sched.XMM.ReadAfterFold, sched.XMM.ReadAfterFold]>;
def mr : FMA4<opc, MRMSrcMem, (outs VR128:$dst),
(ins VR128:$src1, f128mem:$src2, VR128:$src3),
!strconcat(OpcodeStr,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
[(set VR128:$dst,
(OpNode VR128:$src1, (ld_frag128 addr:$src2), VR128:$src3))]>,
- Sched<[sched.XMM.Folded, ReadAfterLd,
+ Sched<[sched.XMM.Folded, sched.XMM.ReadAfterFold,
// f128mem:$src2
ReadDefault, ReadDefault, ReadDefault, ReadDefault,
ReadDefault,
// VR128::$src3
- ReadAfterLd]>;
+ sched.XMM.ReadAfterFold]>;
let isCommutable = 1 in
def Yrr : FMA4<opc, MRMSrcRegOp4, (outs VR256:$dst),
(ins VR256:$src1, VR256:$src2, VR256:$src3),
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
[(set VR256:$dst, (OpNode VR256:$src1, VR256:$src2,
(ld_frag256 addr:$src3)))]>, VEX_W, VEX_L,
- Sched<[sched.YMM.Folded, ReadAfterLd, ReadAfterLd]>;
+ Sched<[sched.YMM.Folded, sched.YMM.ReadAfterFold, sched.YMM.ReadAfterFold]>;
def Ymr : FMA4<opc, MRMSrcMem, (outs VR256:$dst),
(ins VR256:$src1, f256mem:$src2, VR256:$src3),
!strconcat(OpcodeStr,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
[(set VR256:$dst, (OpNode VR256:$src1,
(ld_frag256 addr:$src2), VR256:$src3))]>, VEX_L,
- Sched<[sched.YMM.Folded, ReadAfterLd,
+ Sched<[sched.YMM.Folded, sched.YMM.ReadAfterFold,
// f256mem:$src2
ReadDefault, ReadDefault, ReadDefault, ReadDefault,
ReadDefault,
// VR256::$src3
- ReadAfterLd]>;
+ sched.YMM.ReadAfterFold]>;
// For disassembler
let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in {
def rr_REV : FMA4<opc, MRMSrcReg, (outs VR128:$dst),
ReadDefault, ReadDefault, ReadDefault, ReadDefault,
ReadDefault,
// RC:$src2
- ReadAfterLd]>;
+ Sched.ReadAfterFold]>;
}
let Predicates = [HasBMI], Defs = [EFLAGS] in {
ReadDefault, ReadDefault, ReadDefault, ReadDefault,
ReadDefault,
// RC:$src2
- ReadAfterLd]>;
+ Sched.ReadAfterFold]>;
}
let Predicates = [HasBMI2], Defs = [EFLAGS] in {
def rm : I<0xF5, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2),
!strconcat(mnemonic, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set RC:$dst, (Int RC:$src1, (ld_frag addr:$src2)))]>,
- VEX_4V, Sched<[WriteALULd, ReadAfterLd]>;
+ VEX_4V, Sched<[WriteALU.Folded, WriteALU.ReadAfterFold]>;
}
let Predicates = [HasBMI2] in {
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
[(set VR64:$dst, (IntId VR64:$src1,
(bitconvert (load_mmx addr:$src2))))]>,
- Sched<[sched.Folded, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
}
multiclass MMXI_binop_rmi_int<bits<8> opc, bits<8> opc2, Format ImmForm,
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
[(set VR64:$dst, (IntId VR64:$src1,
(bitconvert (load_mmx addr:$src2))))]>,
- Sched<[sched.Folded, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
def ri : MMXIi8<opc2, ImmForm, (outs VR64:$dst),
(ins VR64:$src1, i32u8imm:$src2),
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
[(set VR64:$dst,
(IntId64 VR64:$src1,
(bitconvert (load_mmx addr:$src2))))]>,
- Sched<[sched.Folded, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
}
}
!strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
[(set VR64:$dst, (IntId VR64:$src1,
(bitconvert (load_mmx addr:$src2)), (i8 imm:$src3)))]>,
- Sched<[sched.Folded, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
}
multiclass sse12_cvt_pint<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
[(set VR64:$dst, (int_x86_mmx_pinsr_w VR64:$src1,
(i32 (anyext (loadi16 addr:$src2))),
imm:$src3))]>,
- Sched<[WriteVecInsertLd, ReadAfterLd]>;
+ Sched<[WriteVecInsert.Folded, WriteVecInsert.ReadAfterFold]>;
}
}
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
[(set RC:$dst, (OpNode RC:$src1, (load addr:$src2)))], d>,
- Sched<[sched.Folded, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
}
/// sse12_fp_scalar_int - SSE 1 & 2 scalar instructions intrinsics class
!strconcat(asm, "\t{$src2, $dst|$dst, $src2}"),
!strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
[(set RC:$dst, (VT (OpNode RC:$src1, mem_cpat:$src2)))], d>,
- Sched<[sched.Folded, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
}
}
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
[(set RC:$dst, (OpNode RC:$src1, (mem_frag addr:$src2)))],
d>,
- Sched<[sched.Folded, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
}
/// sse12_fp_packed_logical_rm - SSE 1 & 2 packed instructions class
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
pat_rm, d>,
- Sched<[sched.Folded, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
}
(outs VR128:$dst), (ins VR128:$src1, f64mem:$src2),
!strconcat(base_opc, "s", asm_opr),
[], SSEPackedSingle>, PS,
- Sched<[SchedWriteFShuffle.XMM.Folded, ReadAfterLd]>;
+ Sched<[SchedWriteFShuffle.XMM.Folded, SchedWriteFShuffle.XMM.ReadAfterFold]>;
def PDrm : PI<opc, MRMSrcMem,
(outs VR128:$dst), (ins VR128:$src1, f64mem:$src2),
[(set VR128:$dst, (v2f64 (pdnode VR128:$src1,
(scalar_to_vector (loadf64 addr:$src2)))))],
SSEPackedDouble>, PD,
- Sched<[SchedWriteFShuffle.XMM.Folded, ReadAfterLd]>;
+ Sched<[SchedWriteFShuffle.XMM.Folded, SchedWriteFShuffle.XMM.ReadAfterFold]>;
}
multiclass sse12_mov_hilo_packed<bits<8>opc, SDPatternOperator pdnode,
def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst),
(ins DstRC:$src1, x86memop:$src),
!strconcat(asm,"\t{$src, $src1, $dst|$dst, $src1, $src}"), []>,
- Sched<[sched.Folded, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
} // hasSideEffects = 0
}
!if(Is2Addr,
!strconcat(asm, "\t{$src2, $dst|$dst, $src2}"),
!strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
- []>, Sched<[sched.Folded, ReadAfterLd]>;
+ []>, Sched<[sched.Folded, sched.ReadAfterFold]>;
}
}
(ins FR32:$src1, f64mem:$src2),
"vcvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
XD, VEX_4V, VEX_LIG, VEX_WIG,
- Sched<[WriteCvtSD2SS.Folded, ReadAfterLd]>;
+ Sched<[WriteCvtSD2SS.Folded, WriteCvtSD2SS.ReadAfterFold]>;
}
def : Pat<(f32 (fpround FR64:$src)),
[(set VR128:$dst, (int_x86_sse2_cvtsd2ss
VR128:$src1, sse_load_f64:$src2))]>,
XD, VEX_4V, VEX_WIG, Requires<[HasAVX]>,
- Sched<[WriteCvtSD2SS.Folded, ReadAfterLd]>;
+ Sched<[WriteCvtSD2SS.Folded, WriteCvtSD2SS.ReadAfterFold]>;
let Constraints = "$src1 = $dst" in {
def CVTSD2SSrr_Int: I<0x5A, MRMSrcReg,
(outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
[(set VR128:$dst, (int_x86_sse2_cvtsd2ss
VR128:$src1, sse_load_f64:$src2))]>,
XD, Requires<[UseSSE2]>,
- Sched<[WriteCvtSD2SS.Folded, ReadAfterLd]>;
+ Sched<[WriteCvtSD2SS.Folded, WriteCvtSD2SS.ReadAfterFold]>;
}
} // isCodeGenOnly = 1
(ins FR64:$src1, f32mem:$src2),
"vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
XS, VEX_4V, VEX_LIG, VEX_WIG,
- Sched<[WriteCvtSS2SD.Folded, ReadAfterLd]>,
+ Sched<[WriteCvtSS2SD.Folded, WriteCvtSS2SD.ReadAfterFold]>,
Requires<[UseAVX, OptForSize]>;
}
(outs VR128:$dst), (ins VR128:$src1, ssmem:$src2),
"vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[]>, XS, VEX_4V, VEX_WIG, Requires<[HasAVX]>,
- Sched<[WriteCvtSS2SD.Folded, ReadAfterLd]>;
+ Sched<[WriteCvtSS2SD.Folded, WriteCvtSS2SD.ReadAfterFold]>;
let Constraints = "$src1 = $dst" in { // SSE2 instructions with XS prefix
def CVTSS2SDrr_Int: I<0x5A, MRMSrcReg,
(outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
(outs VR128:$dst), (ins VR128:$src1, ssmem:$src2),
"cvtss2sd\t{$src2, $dst|$dst, $src2}",
[]>, XS, Requires<[UseSSE2]>,
- Sched<[WriteCvtSS2SD.Folded, ReadAfterLd]>;
+ Sched<[WriteCvtSS2SD.Folded, WriteCvtSS2SD.ReadAfterFold]>;
}
} // isCodeGenOnly = 1
(outs RC:$dst), (ins RC:$src1, x86memop:$src2, CC:$cc), asm,
[(set RC:$dst, (OpNode (VT RC:$src1),
(ld_frag addr:$src2), imm:$cc))]>,
- Sched<[sched.Folded, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
// Accept explicit immediate argument form instead of comparison code.
let isAsmParserOnly = 1, hasSideEffects = 0 in {
let mayLoad = 1 in
def rm_alt : SIi8<0xC2, MRMSrcMem, (outs RC:$dst),
(ins RC:$src1, x86memop:$src2, u8imm:$cc), asm_alt, []>,
- Sched<[sched.Folded, ReadAfterLd]>, NotMemoryFoldable;
+ Sched<[sched.Folded, sched.ReadAfterFold]>, NotMemoryFoldable;
}
}
(ins VR128:$src1, memop:$src, CC:$cc), asm,
[(set VR128:$dst, (Int VR128:$src1,
mem_cpat:$src, imm:$cc))]>,
- Sched<[sched.Folded, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
}
let isCodeGenOnly = 1 in {
!strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"),
[(set EFLAGS, (OpNode (vt RC:$src1),
(ld_frag addr:$src2)))]>,
- Sched<[sched.Folded, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
}
}
!strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"),
[(set EFLAGS, (OpNode (vt RC:$src1),
mem_cpat:$src2))]>,
- Sched<[sched.Folded, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
}
let Defs = [EFLAGS] in {
(outs RC:$dst), (ins RC:$src1, x86memop:$src2, CC:$cc), asm,
[(set RC:$dst,
(VT (X86cmpp RC:$src1, (ld_frag addr:$src2), imm:$cc)))], d>,
- Sched<[sched.Folded, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
// Accept explicit immediate argument form instead of comparison code.
let isAsmParserOnly = 1, hasSideEffects = 0 in {
let mayLoad = 1 in
def rmi_alt : PIi8<0xC2, MRMSrcMem,
(outs RC:$dst), (ins RC:$src1, x86memop:$src2, u8imm:$cc),
- asm_alt, [], d>, Sched<[sched.Folded, ReadAfterLd]>,
+ asm_alt, [], d>, Sched<[sched.Folded, sched.ReadAfterFold]>,
NotMemoryFoldable;
}
}
(ins RC:$src1, x86memop:$src2, u8imm:$src3), asm,
[(set RC:$dst, (vt (X86Shufp RC:$src1, (mem_frag addr:$src2),
(i8 imm:$src3))))], d>,
- Sched<[sched.Folded, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
def rri : PIi8<0xC6, MRMSrcReg, (outs RC:$dst),
(ins RC:$src1, RC:$src2, u8imm:$src3), asm,
[(set RC:$dst, (vt (X86Shufp RC:$src1, RC:$src2,
asm, [(set RC:$dst,
(vt (OpNode RC:$src1,
(mem_frag addr:$src2))))], d>,
- Sched<[sched.Folded, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
}
let Predicates = [HasAVX, NoVLX] in {
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
[(set RC:$dst, (OpVT (OpNode RC:$src1,
(bitconvert (memop_frag addr:$src2)))))]>,
- Sched<[sched.Folded, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
}
} // ExeDomain = SSEPackedInt
let mayLoad = 1 in
def m_Int : I<opc, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, intmemop:$src2),
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), []>,
- Sched<[sched.Folded, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
}
}
let mayLoad = 1 in
def m : I<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- [], d>, Sched<[sched.Folded, ReadAfterLd]>;
+ [], d>, Sched<[sched.Folded, sched.ReadAfterFold]>;
let isCodeGenOnly = 1, ExeDomain = d in {
def r_Int : I<opc, MRMSrcReg, (outs VR128:$dst),
(ins VR128:$src1, VR128:$src2),
def m_Int : I<opc, MRMSrcMem, (outs VR128:$dst),
(ins VR128:$src1, intmemop:$src2),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- []>, Sched<[sched.Folded, ReadAfterLd]>;
+ []>, Sched<[sched.Folded, sched.ReadAfterFold]>;
}
}
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
[(set RC:$dst, (DstVT (OpNode (SrcVT RC:$src1),
(bitconvert (memop_frag addr:$src2)))))]>,
- Sched<[sched.Folded, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
}
} // ExeDomain = SSEPackedInt
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
[(set RC:$dst, (DstVT (OpNode RC:$src1,
(SrcVT (bitconvert (ld_frag addr:$src2))))))]>,
- Sched<[sched.Folded, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
def ri : PDIi8<opc2, ImmForm, (outs RC:$dst),
(ins RC:$src1, u8imm:$src2),
!if(Is2Addr,
[(set RC:$dst,
(OutVT (OpNode (ArgVT RC:$src1),
(bitconvert (ld_frag addr:$src2)))))]>,
- Sched<[sched.Folded, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
}
multiclass sse4_pack<bits<8> opc, string OpcodeStr, ValueType OutVT,
[(set RC:$dst,
(OutVT (OpNode (ArgVT RC:$src1),
(bitconvert (ld_frag addr:$src2)))))]>,
- Sched<[sched.Folded, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
}
let Predicates = [HasAVX, NoVLX_Or_NoBWI] in {
!strconcat(OpcodeStr,"\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
[(set RC:$dst, (vt (OpNode RC:$src1,
(bitconvert (ld_frag addr:$src2)))))]>,
- Sched<[sched.Folded, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
}
let Predicates = [HasAVX, NoVLX_Or_NoBWI] in {
[(set VR128:$dst,
(X86pinsrw VR128:$src1, (extloadi16 addr:$src2),
imm:$src3))]>,
- Sched<[WriteVecInsertLd, ReadAfterLd]>;
+ Sched<[WriteVecInsert.Folded, WriteVecInsert.ReadAfterFold]>;
}
// Extract
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
[(set RC:$dst, (vt (X86Addsub RC:$src1, (ld_frag addr:$src2))))]>,
- Sched<[sched.Folded, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
}
let Predicates = [HasAVX] in {
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
[(set RC:$dst, (vt (OpNode RC:$src1, (ld_frag addr:$src2))))]>,
- Sched<[sched.Folded, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
}
multiclass S3_Int<bits<8> o, string OpcodeStr, ValueType vt, RegisterClass RC,
X86MemOperand x86memop, SDNode OpNode,
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
[(set RC:$dst, (vt (OpNode RC:$src1, (ld_frag addr:$src2))))]>,
- Sched<[sched.Folded, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
}
let Predicates = [HasAVX] in {
[(set RC:$dst,
(DstVT (OpNode (OpVT RC:$src1),
(bitconvert (memop_frag addr:$src2)))))]>,
- Sched<[sched.Folded, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
}
/// SS3I_binop_rm_int - Simple SSSE3 bin op whose type can be v*{i8,i16,i32}.
[(set VR128:$dst,
(IntId128 VR128:$src1,
(bitconvert (ld_frag addr:$src2))))]>,
- Sched<[sched.Folded, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
}
multiclass SS3I_binop_rm_int_y<bits<8> opc, string OpcodeStr,
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set VR256:$dst,
(IntId256 VR256:$src1, (bitconvert (loadv4i64 addr:$src2))))]>,
- Sched<[sched.Folded, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
}
let ImmT = NoImm, Predicates = [HasAVX, NoVLX_Or_NoBWI] in {
[(set RC:$dst, (VT (X86PAlignr RC:$src1,
(bitconvert (memop_frag addr:$src2)),
(i8 imm:$src3))))]>,
- Sched<[sched.Folded, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
}
}
!strconcat(asm,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
[(set VR128:$dst,
- (X86pinsrb VR128:$src1, (extloadi8 addr:$src2),
- imm:$src3))]>, Sched<[WriteVecInsertLd, ReadAfterLd]>;
+ (X86pinsrb VR128:$src1, (extloadi8 addr:$src2), imm:$src3))]>,
+ Sched<[WriteVecInsert.Folded, WriteVecInsert.ReadAfterFold]>;
}
let Predicates = [HasAVX, NoBWI] in
!strconcat(asm,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
[(set VR128:$dst,
- (v4i32 (insertelt VR128:$src1, (loadi32 addr:$src2),
- imm:$src3)))]>, Sched<[WriteVecInsertLd, ReadAfterLd]>;
+ (v4i32 (insertelt VR128:$src1, (loadi32 addr:$src2), imm:$src3)))]>,
+ Sched<[WriteVecInsert.Folded, WriteVecInsert.ReadAfterFold]>;
}
let Predicates = [HasAVX, NoDQI] in
!strconcat(asm,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
[(set VR128:$dst,
- (v2i64 (insertelt VR128:$src1, (loadi64 addr:$src2),
- imm:$src3)))]>, Sched<[WriteVecInsertLd, ReadAfterLd]>;
+ (v2i64 (insertelt VR128:$src1, (loadi64 addr:$src2), imm:$src3)))]>,
+ Sched<[WriteVecInsert.Folded, WriteVecInsert.ReadAfterFold]>;
}
let Predicates = [HasAVX, NoDQI] in
(X86insertps VR128:$src1,
(v4f32 (scalar_to_vector (loadf32 addr:$src2))),
imm:$src3))]>,
- Sched<[SchedWriteFShuffle.XMM.Folded, ReadAfterLd]>;
+ Sched<[SchedWriteFShuffle.XMM.Folded, SchedWriteFShuffle.XMM.ReadAfterFold]>;
}
let ExeDomain = SSEPackedSingle in {
(outs FR32:$dst), (ins FR32:$src1, f32mem:$src2, i32u8imm:$src3),
!strconcat(OpcodeStr,
"ss\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
- []>, Sched<[sched.Folded, ReadAfterLd]>;
+ []>, Sched<[sched.Folded, sched.ReadAfterFold]>;
} // ExeDomain = SSEPackedSingle, hasSideEffects = 0
let ExeDomain = SSEPackedDouble, hasSideEffects = 0 in {
(outs FR64:$dst), (ins FR64:$src1, f64mem:$src2, i32u8imm:$src3),
!strconcat(OpcodeStr,
"sd\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
- []>, Sched<[sched.Folded, ReadAfterLd]>;
+ []>, Sched<[sched.Folded, sched.ReadAfterFold]>;
} // ExeDomain = SSEPackedDouble, hasSideEffects = 0
}
(outs FR32:$dst), (ins f32mem:$src1, i32u8imm:$src2),
!strconcat(OpcodeStr,
"ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- []>, Sched<[sched.Folded, ReadAfterLd]>;
+ []>, Sched<[sched.Folded, sched.ReadAfterFold]>;
} // ExeDomain = SSEPackedSingle, hasSideEffects = 0
let ExeDomain = SSEPackedDouble, hasSideEffects = 0 in {
(outs FR64:$dst), (ins f64mem:$src1, i32u8imm:$src2),
!strconcat(OpcodeStr,
"sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- []>, Sched<[sched.Folded, ReadAfterLd]>;
+ []>, Sched<[sched.Folded, sched.ReadAfterFold]>;
} // ExeDomain = SSEPackedDouble, hasSideEffects = 0
}
"ss\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
[(set VR128:$dst,
(OpNode VR128:$src1, sse_load_f32:$src2, imm:$src3))]>,
- Sched<[sched.Folded, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
} // ExeDomain = SSEPackedSingle, isCodeGenOnly = 1
let ExeDomain = SSEPackedDouble, isCodeGenOnly = 1 in {
"sd\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
[(set VR128:$dst,
(OpNode VR128:$src1, sse_load_f64:$src2, imm:$src3))]>,
- Sched<[sched.Folded, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
} // ExeDomain = SSEPackedDouble, isCodeGenOnly = 1
}
def VPTESTrm : SS48I<0x17, MRMSrcMem, (outs), (ins VR128:$src1, f128mem:$src2),
"vptest\t{$src2, $src1|$src1, $src2}",
[(set EFLAGS,(X86ptest VR128:$src1, (loadv2i64 addr:$src2)))]>,
- Sched<[SchedWriteVecTest.XMM.Folded, ReadAfterLd]>,
+ Sched<[SchedWriteVecTest.XMM.Folded, SchedWriteVecTest.XMM.ReadAfterFold]>,
VEX, VEX_WIG;
def VPTESTYrr : SS48I<0x17, MRMSrcReg, (outs), (ins VR256:$src1, VR256:$src2),
def VPTESTYrm : SS48I<0x17, MRMSrcMem, (outs), (ins VR256:$src1, i256mem:$src2),
"vptest\t{$src2, $src1|$src1, $src2}",
[(set EFLAGS,(X86ptest VR256:$src1, (loadv4i64 addr:$src2)))]>,
- Sched<[SchedWriteVecTest.YMM.Folded, ReadAfterLd]>,
+ Sched<[SchedWriteVecTest.YMM.Folded, SchedWriteVecTest.YMM.ReadAfterFold]>,
VEX, VEX_L, VEX_WIG;
}
def PTESTrm : SS48I<0x17, MRMSrcMem, (outs), (ins VR128:$src1, f128mem:$src2),
"ptest\t{$src2, $src1|$src1, $src2}",
[(set EFLAGS, (X86ptest VR128:$src1, (memopv2i64 addr:$src2)))]>,
- Sched<[SchedWriteVecTest.XMM.Folded, ReadAfterLd]>;
+ Sched<[SchedWriteVecTest.XMM.Folded, SchedWriteVecTest.XMM.ReadAfterFold]>;
}
// The bit test instructions below are AVX only
def rm : SS48I<opc, MRMSrcMem, (outs), (ins RC:$src1, x86memop:$src2),
!strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"),
[(set EFLAGS, (X86testp RC:$src1, (mem_frag addr:$src2)))]>,
- Sched<[sched.Folded, ReadAfterLd]>, VEX;
+ Sched<[sched.Folded, sched.ReadAfterFold]>, VEX;
}
let Defs = [EFLAGS], Predicates = [HasAVX] in {
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
[(set RC:$dst,
(OpVT (OpNode RC:$src1, (bitconvert (memop_frag addr:$src2)))))]>,
- Sched<[sched.Folded, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
}
let Predicates = [HasAVX, NoVLX] in {
[(set RC:$dst,
(IntId RC:$src1,
(bitconvert (memop_frag addr:$src2)), imm:$src3))]>,
- Sched<[sched.Folded, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
}
/// SS41I_binop_rmi - SSE 4.1 binary operator with 8-bit immediate
[(set RC:$dst,
(OpVT (OpNode RC:$src1,
(bitconvert (memop_frag addr:$src2)), imm:$src3)))]>,
- Sched<[sched.Folded, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
}
def BlendCommuteImm2 : SDNodeXForm<imm, [{
[(set RC:$dst,
(OpVT (OpNode RC:$src1,
(bitconvert (memop_frag addr:$src2)), imm:$src3)))]>,
- Sched<[sched.Folded, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
}
// Pattern to commute if load is in first source.
[(set RC:$dst,
(IntId RC:$src1, (bitconvert (mem_frag addr:$src2)),
RC:$src3))], SSEPackedInt>, TAPD, VEX_4V,
- Sched<[sched.Folded, ReadAfterLd,
+ Sched<[sched.Folded, sched.ReadAfterFold,
// x86memop:$src2
ReadDefault, ReadDefault, ReadDefault, ReadDefault,
ReadDefault,
// RC::$src3
- ReadAfterLd]>;
+ sched.ReadAfterFold]>;
}
let Predicates = [HasAVX] in {
[(set VR128:$dst,
(IntId VR128:$src1,
(bitconvert (mem_frag addr:$src2)), XMM0))]>,
- Sched<[sched.Folded, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
}
}
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
[(set RC:$dst,
(OpVT (OpNode RC:$src1, (memop_frag addr:$src2))))]>,
- Sched<[sched.Folded, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
}
let Predicates = [HasAVX] in
def rm :SS42AI<0x62, MRMSrcMem, (outs),
(ins VR128:$src1, i128mem:$src2, u8imm:$src3),
!strconcat(asm, "\t{$src3, $src2, $src1|$src1, $src2, $src3}"),
- []>, Sched<[WritePCmpIStrM.Folded, ReadAfterLd]>;
+ []>, Sched<[WritePCmpIStrM.Folded, WritePCmpIStrM.ReadAfterFold]>;
}
let Defs = [XMM0, EFLAGS], hasSideEffects = 0 in {
def rm : SS42AI<0x60, MRMSrcMem, (outs),
(ins VR128:$src1, i128mem:$src3, u8imm:$src5),
!strconcat(asm, "\t{$src5, $src3, $src1|$src1, $src3, $src5}"),
- []>, Sched<[WritePCmpEStrM.Folded, ReadAfterLd]>;
+ []>, Sched<[WritePCmpEStrM.Folded, WritePCmpEStrM.ReadAfterFold]>;
}
let Defs = [XMM0, EFLAGS], Uses = [EAX, EDX], hasSideEffects = 0 in {
def rm : SS42AI<0x63, MRMSrcMem, (outs),
(ins VR128:$src1, i128mem:$src2, u8imm:$src3),
!strconcat(asm, "\t{$src3, $src2, $src1|$src1, $src2, $src3}"),
- []>, Sched<[WritePCmpIStrI.Folded, ReadAfterLd]>;
+ []>, Sched<[WritePCmpIStrI.Folded, WritePCmpIStrI.ReadAfterFold]>;
}
let Defs = [ECX, EFLAGS], hasSideEffects = 0 in {
def rm : SS42AI<0x61, MRMSrcMem, (outs),
(ins VR128:$src1, i128mem:$src3, u8imm:$src5),
!strconcat(asm, "\t{$src5, $src3, $src1|$src1, $src3, $src5}"),
- []>, Sched<[WritePCmpEStrI.Folded, ReadAfterLd]>;
+ []>, Sched<[WritePCmpEStrI.Folded, WritePCmpEStrI.ReadAfterFold]>;
}
let Defs = [ECX, EFLAGS], Uses = [EAX, EDX], hasSideEffects = 0 in {
SS42FI<opc, MRMSrcMem, (outs RCOut:$dst), (ins RCOut:$src1, x86memop:$src2),
!strconcat(asm, "\t{$src2, $src1|$src1, $src2}"),
[(set RCOut:$dst, (Int RCOut:$src1, (load addr:$src2)))]>,
- Sched<[WriteCRC32.Folded, ReadAfterLd]>;
+ Sched<[WriteCRC32.Folded, WriteCRC32.ReadAfterFold]>;
let Constraints = "$src1 = $dst" in {
def CRC32r32m8 : SS42I_crc32m<0xF0, "crc32{b}", GR32, i8mem,
(bc_v4i32 (memopv2i64 addr:$src2)), XMM0)),
(set VR128:$dst, (IntId VR128:$src1,
(bc_v4i32 (memopv2i64 addr:$src2)))))]>, T8,
- Sched<[sched.Folded, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
}
let Constraints = "$src1 = $dst", Predicates = [HasSHA] in {
(int_x86_sha1rnds4 VR128:$src1,
(bc_v4i32 (memopv2i64 addr:$src2)),
(i8 imm:$src3)))]>, TA,
- Sched<[SchedWriteVecIMul.XMM.Folded, ReadAfterLd]>;
+ Sched<[SchedWriteVecIMul.XMM.Folded,
+ SchedWriteVecIMul.XMM.ReadAfterFold]>;
defm SHA1NEXTE : SHAI_binop<0xC8, "sha1nexte", int_x86_sha1nexte,
SchedWriteVecIMul.XMM>;
def rm : AES8I<opc, MRMSrcMem, (outs RC:$dst),
(ins RC:$src1, MemOp:$src2), "",
[(set RC:$dst, (IntId RC:$src1, (ld_frag addr:$src2)))]>,
- Sched<[WriteAESDecEnc.Folded, ReadAfterLd]>;
+ Sched<[WriteAESDecEnc.Folded, WriteAESDecEnc.ReadAfterFold]>;
}
}
[(set VR128:$dst,
(int_x86_pclmulqdq VR128:$src1, (memopv2i64 addr:$src2),
imm:$src3))]>,
- Sched<[WriteCLMul.Folded, ReadAfterLd]>;
+ Sched<[WriteCLMul.Folded, WriteCLMul.ReadAfterFold]>;
} // Constraints = "$src1 = $dst"
def : Pat<(int_x86_pclmulqdq (memopv2i64 addr:$src2), VR128:$src1,
"vpclmulqdq\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
[(set RC:$dst,
(IntId RC:$src1, (LdFrag addr:$src2), imm:$src3))]>,
- Sched<[WriteCLMul.Folded, ReadAfterLd]>;
+ Sched<[WriteCLMul.Folded, WriteCLMul.ReadAfterFold]>;
// We can commute a load in the first operand by swapping the sources and
// rotating the immediate.
def VINSERTF128rm : AVXAIi8<0x18, MRMSrcMem, (outs VR256:$dst),
(ins VR256:$src1, f128mem:$src2, u8imm:$src3),
"vinsertf128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
- []>, Sched<[WriteFShuffle256Ld, ReadAfterLd]>, VEX_4V, VEX_L;
+ []>, Sched<[WriteFShuffle256.Folded, WriteFShuffle256.ReadAfterFold]>, VEX_4V, VEX_L;
}
// To create a 256-bit all ones value, we should produce VCMPTRUEPS
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set RC:$dst, (f_vt (X86VPermilpv RC:$src1,
(i_vt (bitconvert (i_frag addr:$src2))))))]>, VEX_4V,
- Sched<[varsched.Folded, ReadAfterLd]>;
+ Sched<[varsched.Folded, sched.ReadAfterFold]>;
def ri : AVXAIi8<opc_rmi, MRMSrcReg, (outs RC:$dst),
(ins RC:$src1, u8imm:$src2),
"vperm2f128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
[(set VR256:$dst, (X86VPerm2x128 VR256:$src1, (loadv4f64 addr:$src2),
(i8 imm:$src3)))]>, VEX_4V, VEX_L,
- Sched<[WriteFShuffle256Ld, ReadAfterLd]>;
+ Sched<[WriteFShuffle256.Folded, WriteFShuffle256.ReadAfterFold]>;
}
// Immediate transform to help with commuting.
[(set RC:$dst,
(OpVT (OpNode RC:$src1,
(bitconvert (memop_frag addr:$src2)), imm:$src3)))]>,
- Sched<[sched.Folded, ReadAfterLd]>, VEX_4V;
+ Sched<[sched.Folded, sched.ReadAfterFold]>, VEX_4V;
// Pattern to commute if load is in first source.
def : Pat<(OpVT (OpNode (bitconvert (memop_frag addr:$src2)),
[(set VR256:$dst,
(OpVT (X86VPermv VR256:$src1,
(bitconvert (mem_frag addr:$src2)))))]>,
- Sched<[Sched.Folded, ReadAfterLd]>, VEX_4V, VEX_L;
+ Sched<[Sched.Folded, Sched.ReadAfterFold]>, VEX_4V, VEX_L;
}
}
[(set VR256:$dst,
(OpVT (X86VPermi (mem_frag addr:$src1),
(i8 imm:$src2))))]>,
- Sched<[Sched.Folded, ReadAfterLd]>, VEX, VEX_L;
+ Sched<[Sched.Folded, Sched.ReadAfterFold]>, VEX, VEX_L;
}
}
"vperm2i128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
[(set VR256:$dst, (X86VPerm2x128 VR256:$src1, (loadv4i64 addr:$src2),
(i8 imm:$src3)))]>,
- Sched<[WriteShuffle256Ld, ReadAfterLd]>, VEX_4V, VEX_L;
+ Sched<[WriteShuffle256.Folded, WriteShuffle256.ReadAfterFold]>, VEX_4V, VEX_L;
let Predicates = [HasAVX2] in
def : Pat<(v4i64 (X86VPerm2x128 (loadv4i64 addr:$src2),
def VINSERTI128rm : AVX2AIi8<0x38, MRMSrcMem, (outs VR256:$dst),
(ins VR256:$src1, i128mem:$src2, u8imm:$src3),
"vinserti128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
- []>, Sched<[WriteShuffle256Ld, ReadAfterLd]>, VEX_4V, VEX_L;
+ []>, Sched<[WriteShuffle256.Folded, WriteShuffle256.ReadAfterFold]>, VEX_4V, VEX_L;
}
let Predicates = [HasAVX2, NoVLX] in {
[(set VR128:$dst,
(vt128 (OpNode VR128:$src1,
(vt128 (bitconvert (loadv2i64 addr:$src2))))))]>,
- VEX_4V, Sched<[SchedWriteVarVecShift.XMM.Folded, ReadAfterLd]>;
+ VEX_4V, Sched<[SchedWriteVarVecShift.XMM.Folded,
+ SchedWriteVarVecShift.XMM.ReadAfterFold]>;
def Yrr : AVX28I<opc, MRMSrcReg, (outs VR256:$dst),
(ins VR256:$src1, VR256:$src2),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set VR256:$dst,
(vt256 (OpNode VR256:$src1,
(vt256 (bitconvert (loadv4i64 addr:$src2))))))]>,
- VEX_4V, VEX_L, Sched<[SchedWriteVarVecShift.YMM.Folded, ReadAfterLd]>;
+ VEX_4V, VEX_L, Sched<[SchedWriteVarVecShift.YMM.Folded,
+ SchedWriteVarVecShift.YMM.ReadAfterFold]>;
}
let Predicates = [HasAVX2, NoVLX] in {
def rm : PDI<0xCF, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, X86MemOp:$src2), "",
[(set RC:$dst, (OpVT (X86GF2P8mulb RC:$src1,
(bitconvert (MemOpFrag addr:$src2)))))]>,
- Sched<[SchedWriteVecALU.XMM.Folded, ReadAfterLd]>, T8PD;
+ Sched<[SchedWriteVecALU.XMM.Folded, SchedWriteVecALU.XMM.ReadAfterFold]>, T8PD;
}
}
[(set RC:$dst, (OpVT (OpNode RC:$src1,
(bitconvert (MemOpFrag addr:$src2)),
imm:$src3)))], SSEPackedInt>,
- Sched<[SchedWriteVecALU.XMM.Folded, ReadAfterLd]>;
+ Sched<[SchedWriteVecALU.XMM.Folded, SchedWriteVecALU.XMM.ReadAfterFold]>;
}
}
def rm : I<0xF7, MRMSrcMem4VOp3,
(outs RC:$dst), (ins x86memop:$src1, RC:$src2),
!strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>,
- VEX, Sched<[WriteShiftLd,
+ VEX, Sched<[WriteShift.Folded,
// x86memop:$src1
ReadDefault, ReadDefault, ReadDefault, ReadDefault,
ReadDefault,
// RC:$src2
- ReadAfterLd]>;
+ WriteShift.ReadAfterFold]>;
}
}
def rm : IXOP<opc, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
[(set VR128:$dst, (Int (bitconvert (memop addr:$src))))]>, XOP,
- Sched<[SchedWritePHAdd.XMM.Folded, ReadAfterLd]>;
+ Sched<[SchedWritePHAdd.XMM.Folded, SchedWritePHAdd.XMM.ReadAfterFold]>;
}
let ExeDomain = SSEPackedInt in {
def rm : IXOP<opc, MRMSrcMem, (outs VR128:$dst), (ins memop:$src),
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
[(set VR128:$dst, (Int (bitconvert mem_cpat:$src)))]>, XOP,
- Sched<[sched.Folded, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
}
multiclass xop2op128<bits<8> opc, string OpcodeStr, Intrinsic Int,
def rm : IXOP<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
[(set VR128:$dst, (Int (bitconvert (memop addr:$src))))]>, XOP,
- Sched<[sched.Folded, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
}
multiclass xop2op256<bits<8> opc, string OpcodeStr, Intrinsic Int,
def Yrm : IXOP<opc, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src),
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
[(set VR256:$dst, (Int (bitconvert (memop addr:$src))))]>, XOP, VEX_L,
- Sched<[sched.Folded, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
}
let ExeDomain = SSEPackedSingle in {
[(set VR128:$dst,
(vt128 (OpNode (vt128 VR128:$src1),
(vt128 (bitconvert (loadv2i64 addr:$src2))))))]>,
- XOP_4V, VEX_W, Sched<[sched.Folded, ReadAfterLd]>;
+ XOP_4V, VEX_W, Sched<[sched.Folded, sched.ReadAfterFold]>;
def mr : IXOP<opc, MRMSrcMem4VOp3, (outs VR128:$dst),
(ins i128mem:$src1, VR128:$src2),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set VR128:$dst,
(vt128 (OpNode (vt128 (bitconvert (loadv2i64 addr:$src1))),
(vt128 VR128:$src2))))]>,
- XOP, Sched<[sched.Folded, ReadAfterLd]>;
+ XOP, Sched<[sched.Folded, sched.ReadAfterFold]>;
// For disassembler
let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in
def rr_REV : IXOP<opc, MRMSrcReg, (outs VR128:$dst),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set VR128:$dst,
(vt128 (OpNode (vt128 (bitconvert (loadv2i64 addr:$src1))), imm:$src2)))]>,
- XOP, Sched<[sched.Folded, ReadAfterLd]>;
+ XOP, Sched<[sched.Folded, sched.ReadAfterFold]>;
}
let ExeDomain = SSEPackedInt in {
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
[(set VR128:$dst,
(Int VR128:$src1, (bitconvert (loadv2i64 addr:$src2)),
- VR128:$src3))]>, XOP_4V, Sched<[sched.Folded, ReadAfterLd]>;
+ VR128:$src3))]>, XOP_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
}
let ExeDomain = SSEPackedInt in {
(vt128 (OpNode (vt128 VR128:$src1),
(vt128 (bitconvert (loadv2i64 addr:$src2))),
imm:$cc)))]>,
- XOP_4V, Sched<[sched.Folded, ReadAfterLd]>;
+ XOP_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
let isAsmParserOnly = 1, hasSideEffects = 0 in {
def ri_alt : IXOPi8<opc, MRMSrcReg, (outs VR128:$dst),
(ins VR128:$src1, VR128:$src2, u8imm:$src3),
(ins VR128:$src1, i128mem:$src2, u8imm:$src3),
!strconcat("vpcom", Suffix,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
- []>, XOP_4V, Sched<[sched.Folded, ReadAfterLd]>,
+ []>, XOP_4V, Sched<[sched.Folded, sched.ReadAfterFold]>,
NotMemoryFoldable;
}
}
[(set VR128:$dst,
(vt128 (OpNode (vt128 VR128:$src1), (vt128 VR128:$src2),
(vt128 (bitconvert (loadv2i64 addr:$src3))))))]>,
- XOP_4V, VEX_W, Sched<[sched.Folded, ReadAfterLd, ReadAfterLd]>;
+ XOP_4V, VEX_W, Sched<[sched.Folded, sched.ReadAfterFold, sched.ReadAfterFold]>;
def rmr : IXOPi8Reg<opc, MRMSrcMem, (outs VR128:$dst),
(ins VR128:$src1, i128mem:$src2, VR128:$src3),
!strconcat(OpcodeStr,
[(set VR128:$dst,
(v16i8 (OpNode (vt128 VR128:$src1), (vt128 (bitconvert (loadv2i64 addr:$src2))),
(vt128 VR128:$src3))))]>,
- XOP_4V, Sched<[sched.Folded, ReadAfterLd,
+ XOP_4V, Sched<[sched.Folded, sched.ReadAfterFold,
// 128mem:$src2
ReadDefault, ReadDefault, ReadDefault, ReadDefault,
ReadDefault,
// VR128:$src3
- ReadAfterLd]>;
+ sched.ReadAfterFold]>;
// For disassembler
let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in
def rrr_REV : IXOPi8Reg<opc, MRMSrcRegOp4, (outs VR128:$dst),
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
[(set RC:$dst, (VT (or (and (load addr:$src3), RC:$src1),
(X86andnp (load addr:$src3), RC:$src2))))]>,
- XOP_4V, VEX_W, Sched<[sched.Folded, ReadAfterLd, ReadAfterLd]>;
+ XOP_4V, VEX_W, Sched<[sched.Folded, sched.ReadAfterFold, sched.ReadAfterFold]>;
def rmr : IXOPi8Reg<opc, MRMSrcMem, (outs RC:$dst),
(ins RC:$src1, x86memop:$src2, RC:$src3),
!strconcat(OpcodeStr,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
[(set RC:$dst, (VT (or (and RC:$src3, RC:$src1),
(X86andnp RC:$src3, (load addr:$src2)))))]>,
- XOP_4V, Sched<[sched.Folded, ReadAfterLd,
+ XOP_4V, Sched<[sched.Folded, sched.ReadAfterFold,
// x86memop:$src2
ReadDefault, ReadDefault, ReadDefault, ReadDefault,
ReadDefault,
// RC::$src3
- ReadAfterLd]>;
+ sched.ReadAfterFold]>;
// For disassembler
let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in
def rrr_REV : IXOPi8Reg<opc, MRMSrcRegOp4, (outs RC:$dst),
(VT (X86vpermil2 RC:$src1, RC:$src2,
(bitconvert (IntLdFrag addr:$src3)),
(i8 imm:$src4))))]>, VEX_W,
- Sched<[sched.Folded, ReadAfterLd, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold, sched.ReadAfterFold]>;
def mr : IXOP5<Opc, MRMSrcMem, (outs RC:$dst),
(ins RC:$src1, fpmemop:$src2, RC:$src3, u8imm:$src4),
!strconcat(OpcodeStr,
[(set RC:$dst,
(VT (X86vpermil2 RC:$src1, (FPLdFrag addr:$src2),
RC:$src3, (i8 imm:$src4))))]>,
- Sched<[sched.Folded, ReadAfterLd,
+ Sched<[sched.Folded, sched.ReadAfterFold,
// fpmemop:$src2
ReadDefault, ReadDefault, ReadDefault, ReadDefault, ReadDefault,
// RC:$src3
- ReadAfterLd]>;
+ sched.ReadAfterFold]>;
// For disassembler
let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in
def rr_REV : IXOP5<Opc, MRMSrcRegOp4, (outs RC:$dst),
// FP division and sqrt on port 0.
def BWFPDivider : ProcResource<1>;
-// Loads are 5 cycles, so ReadAfterLd registers needn't be available until 5
+// Integer loads are 5 cycles, so ReadAfterLd registers needn't be available until 5
// cycles after the memory operand.
def : ReadAdvance<ReadAfterLd, 5>;
+// Vector loads are 5/5/6 cycles, so ReadAfterVec*Ld registers needn't be available
+// until 5/5/6 cycles after the memory operand.
+def : ReadAdvance<ReadAfterVecLd, 5>;
+def : ReadAdvance<ReadAfterVecXLd, 5>;
+def : ReadAdvance<ReadAfterVecYLd, 6>;
+
// Many SchedWrites are defined in pairs with and without a folded load.
// Instructions with folded loads are usually micro-fused, so they only appear
// as two micro-ops when queued in the reservation station.
// FP division and sqrt on port 0.
def HWFPDivider : ProcResource<1>;
-// Loads are 5 cycles, so ReadAfterLd registers needn't be available until 5
+// Integer loads are 5 cycles, so ReadAfterLd registers needn't be available until 5
// cycles after the memory operand.
def : ReadAdvance<ReadAfterLd, 5>;
+// Vector loads are 5/6/7 cycles, so ReadAfterVec*Ld registers needn't be available
+// until 5/6/7 cycles after the memory operand.
+def : ReadAdvance<ReadAfterVecLd, 5>;
+def : ReadAdvance<ReadAfterVecXLd, 6>;
+def : ReadAdvance<ReadAfterVecYLd, 7>;
+
// Many SchedWrites are defined in pairs with and without a folded load.
// Instructions with folded loads are usually micro-fused, so they only appear
// as two micro-ops when queued in the reservation station.
// FP division and sqrt on port 0.
def SBFPDivider : ProcResource<1>;
-// Loads are 5 cycles, so ReadAfterLd registers needn't be available until 5
+// Integer loads are 5 cycles, so ReadAfterLd registers needn't be available until 5
// cycles after the memory operand.
def : ReadAdvance<ReadAfterLd, 5>;
+// Vector loads are 5/6/7 cycles, so ReadAfterVec*Ld registers needn't be available
+// until 5/6/7 cycles after the memory operand.
+def : ReadAdvance<ReadAfterVecLd, 5>;
+def : ReadAdvance<ReadAfterVecXLd, 6>;
+def : ReadAdvance<ReadAfterVecYLd, 7>;
+
// Many SchedWrites are defined in pairs with and without a folded load.
// Instructions with folded loads are usually micro-fused, so they only appear
// as two micro-ops when queued in the reservation station.
let BufferSize=60;
}
-// Loads are 5 cycles, so ReadAfterLd registers needn't be available until 5
+// Integer loads are 5 cycles, so ReadAfterLd registers needn't be available until 5
// cycles after the memory operand.
def : ReadAdvance<ReadAfterLd, 5>;
+// Vector loads are 5/6/7 cycles, so ReadAfterVec*Ld registers needn't be available
+// until 5/6/7 cycles after the memory operand.
+def : ReadAdvance<ReadAfterVecLd, 5>;
+def : ReadAdvance<ReadAfterVecXLd, 6>;
+def : ReadAdvance<ReadAfterVecYLd, 7>;
+
// Many SchedWrites are defined in pairs with and without a folded load.
// Instructions with folded loads are usually micro-fused, so they only appear
// as two micro-ops when queued in the reservation station.
let BufferSize=60;
}
-// Loads are 5 cycles, so ReadAfterLd registers needn't be available until 5
+// Integer loads are 5 cycles, so ReadAfterLd registers needn't be available until 5
// cycles after the memory operand.
def : ReadAdvance<ReadAfterLd, 5>;
+// Vector loads are 5/6/7 cycles, so ReadAfterVec*Ld registers needn't be available
+// until 5/6/7 cycles after the memory operand.
+def : ReadAdvance<ReadAfterVecLd, 5>;
+def : ReadAdvance<ReadAfterVecXLd, 6>;
+def : ReadAdvance<ReadAfterVecYLd, 7>;
+
// Many SchedWrites are defined in pairs with and without a folded load.
// Instructions with folded loads are usually micro-fused, so they only appear
// as two micro-ops when queued in the reservation station.
// but other register operands don't have to be read until the load is ready.
// These operands are marked with ReadAfterLd.
def ReadAfterLd : SchedRead;
+def ReadAfterVecLd : SchedRead;
+def ReadAfterVecXLd : SchedRead;
+def ReadAfterVecYLd : SchedRead;
// Instructions with both a load and a store folded are modeled as a folded
// load + WriteRMW.
class X86FoldableSchedWrite : SchedWrite {
// The SchedWrite to use when a load is folded into the instruction.
SchedWrite Folded;
+ // The SchedRead to tag register operands than don't need to be ready
+ // until the folded load has completed.
+ SchedRead ReadAfterFold;
}
// Multiclass that produces a linked pair of SchedWrites.
-multiclass X86SchedWritePair {
+multiclass X86SchedWritePair<SchedRead ReadAfter = ReadAfterLd> {
// Register-Memory operation.
def Ld : SchedWrite;
// Register-Register operation.
def NAME : X86FoldableSchedWrite {
let Folded = !cast<SchedWrite>(NAME#"Ld");
+ let ReadAfterFold = ReadAfter;
}
}
def WriteFMoveX : SchedWrite;
def WriteFMoveY : SchedWrite;
-defm WriteFAdd : X86SchedWritePair; // Floating point add/sub.
-defm WriteFAddX : X86SchedWritePair; // Floating point add/sub (XMM).
-defm WriteFAddY : X86SchedWritePair; // Floating point add/sub (YMM).
-defm WriteFAddZ : X86SchedWritePair; // Floating point add/sub (ZMM).
-defm WriteFAdd64 : X86SchedWritePair; // Floating point double add/sub.
-defm WriteFAdd64X : X86SchedWritePair; // Floating point double add/sub (XMM).
-defm WriteFAdd64Y : X86SchedWritePair; // Floating point double add/sub (YMM).
-defm WriteFAdd64Z : X86SchedWritePair; // Floating point double add/sub (ZMM).
-defm WriteFCmp : X86SchedWritePair; // Floating point compare.
-defm WriteFCmpX : X86SchedWritePair; // Floating point compare (XMM).
-defm WriteFCmpY : X86SchedWritePair; // Floating point compare (YMM).
-defm WriteFCmpZ : X86SchedWritePair; // Floating point compare (ZMM).
-defm WriteFCmp64 : X86SchedWritePair; // Floating point double compare.
-defm WriteFCmp64X : X86SchedWritePair; // Floating point double compare (XMM).
-defm WriteFCmp64Y : X86SchedWritePair; // Floating point double compare (YMM).
-defm WriteFCmp64Z : X86SchedWritePair; // Floating point double compare (ZMM).
-defm WriteFCom : X86SchedWritePair; // Floating point compare to flags.
-defm WriteFMul : X86SchedWritePair; // Floating point multiplication.
-defm WriteFMulX : X86SchedWritePair; // Floating point multiplication (XMM).
-defm WriteFMulY : X86SchedWritePair; // Floating point multiplication (YMM).
-defm WriteFMulZ : X86SchedWritePair; // Floating point multiplication (YMM).
-defm WriteFMul64 : X86SchedWritePair; // Floating point double multiplication.
-defm WriteFMul64X : X86SchedWritePair; // Floating point double multiplication (XMM).
-defm WriteFMul64Y : X86SchedWritePair; // Floating point double multiplication (YMM).
-defm WriteFMul64Z : X86SchedWritePair; // Floating point double multiplication (ZMM).
-defm WriteFDiv : X86SchedWritePair; // Floating point division.
-defm WriteFDivX : X86SchedWritePair; // Floating point division (XMM).
-defm WriteFDivY : X86SchedWritePair; // Floating point division (YMM).
-defm WriteFDivZ : X86SchedWritePair; // Floating point division (ZMM).
-defm WriteFDiv64 : X86SchedWritePair; // Floating point double division.
-defm WriteFDiv64X : X86SchedWritePair; // Floating point double division (XMM).
-defm WriteFDiv64Y : X86SchedWritePair; // Floating point double division (YMM).
-defm WriteFDiv64Z : X86SchedWritePair; // Floating point double division (ZMM).
-defm WriteFSqrt : X86SchedWritePair; // Floating point square root.
-defm WriteFSqrtX : X86SchedWritePair; // Floating point square root (XMM).
-defm WriteFSqrtY : X86SchedWritePair; // Floating point square root (YMM).
-defm WriteFSqrtZ : X86SchedWritePair; // Floating point square root (ZMM).
-defm WriteFSqrt64 : X86SchedWritePair; // Floating point double square root.
-defm WriteFSqrt64X : X86SchedWritePair; // Floating point double square root (XMM).
-defm WriteFSqrt64Y : X86SchedWritePair; // Floating point double square root (YMM).
-defm WriteFSqrt64Z : X86SchedWritePair; // Floating point double square root (ZMM).
-defm WriteFSqrt80 : X86SchedWritePair; // Floating point long double square root.
-defm WriteFRcp : X86SchedWritePair; // Floating point reciprocal estimate.
-defm WriteFRcpX : X86SchedWritePair; // Floating point reciprocal estimate (XMM).
-defm WriteFRcpY : X86SchedWritePair; // Floating point reciprocal estimate (YMM).
-defm WriteFRcpZ : X86SchedWritePair; // Floating point reciprocal estimate (ZMM).
-defm WriteFRsqrt : X86SchedWritePair; // Floating point reciprocal square root estimate.
-defm WriteFRsqrtX: X86SchedWritePair; // Floating point reciprocal square root estimate (XMM).
-defm WriteFRsqrtY: X86SchedWritePair; // Floating point reciprocal square root estimate (YMM).
-defm WriteFRsqrtZ: X86SchedWritePair; // Floating point reciprocal square root estimate (ZMM).
-defm WriteFMA : X86SchedWritePair; // Fused Multiply Add.
-defm WriteFMAX : X86SchedWritePair; // Fused Multiply Add (XMM).
-defm WriteFMAY : X86SchedWritePair; // Fused Multiply Add (YMM).
-defm WriteFMAZ : X86SchedWritePair; // Fused Multiply Add (ZMM).
-defm WriteDPPD : X86SchedWritePair; // Floating point double dot product.
-defm WriteDPPS : X86SchedWritePair; // Floating point single dot product.
-defm WriteDPPSY : X86SchedWritePair; // Floating point single dot product (YMM).
-defm WriteDPPSZ : X86SchedWritePair; // Floating point single dot product (ZMM).
-defm WriteFSign : X86SchedWritePair; // Floating point fabs/fchs.
-defm WriteFRnd : X86SchedWritePair; // Floating point rounding.
-defm WriteFRndY : X86SchedWritePair; // Floating point rounding (YMM).
-defm WriteFRndZ : X86SchedWritePair; // Floating point rounding (ZMM).
-defm WriteFLogic : X86SchedWritePair; // Floating point and/or/xor logicals.
-defm WriteFLogicY : X86SchedWritePair; // Floating point and/or/xor logicals (YMM).
-defm WriteFLogicZ : X86SchedWritePair; // Floating point and/or/xor logicals (ZMM).
-defm WriteFTest : X86SchedWritePair; // Floating point TEST instructions.
-defm WriteFTestY : X86SchedWritePair; // Floating point TEST instructions (YMM).
-defm WriteFTestZ : X86SchedWritePair; // Floating point TEST instructions (ZMM).
-defm WriteFShuffle : X86SchedWritePair; // Floating point vector shuffles.
-defm WriteFShuffleY : X86SchedWritePair; // Floating point vector shuffles (YMM).
-defm WriteFShuffleZ : X86SchedWritePair; // Floating point vector shuffles (ZMM).
-defm WriteFVarShuffle : X86SchedWritePair; // Floating point vector variable shuffles.
-defm WriteFVarShuffleY : X86SchedWritePair; // Floating point vector variable shuffles (YMM).
-defm WriteFVarShuffleZ : X86SchedWritePair; // Floating point vector variable shuffles (ZMM).
-defm WriteFBlend : X86SchedWritePair; // Floating point vector blends.
-defm WriteFBlendY : X86SchedWritePair; // Floating point vector blends (YMM).
-defm WriteFBlendZ : X86SchedWritePair; // Floating point vector blends (ZMM).
-defm WriteFVarBlend : X86SchedWritePair; // Fp vector variable blends.
-defm WriteFVarBlendY : X86SchedWritePair; // Fp vector variable blends (YMM).
-defm WriteFVarBlendZ : X86SchedWritePair; // Fp vector variable blends (YMZMM).
+defm WriteFAdd : X86SchedWritePair<ReadAfterVecLd>; // Floating point add/sub.
+defm WriteFAddX : X86SchedWritePair<ReadAfterVecXLd>; // Floating point add/sub (XMM).
+defm WriteFAddY : X86SchedWritePair<ReadAfterVecYLd>; // Floating point add/sub (YMM).
+defm WriteFAddZ : X86SchedWritePair<ReadAfterVecYLd>; // Floating point add/sub (ZMM).
+defm WriteFAdd64 : X86SchedWritePair<ReadAfterVecLd>; // Floating point double add/sub.
+defm WriteFAdd64X : X86SchedWritePair<ReadAfterVecXLd>; // Floating point double add/sub (XMM).
+defm WriteFAdd64Y : X86SchedWritePair<ReadAfterVecYLd>; // Floating point double add/sub (YMM).
+defm WriteFAdd64Z : X86SchedWritePair<ReadAfterVecYLd>; // Floating point double add/sub (ZMM).
+defm WriteFCmp : X86SchedWritePair<ReadAfterVecLd>; // Floating point compare.
+defm WriteFCmpX : X86SchedWritePair<ReadAfterVecXLd>; // Floating point compare (XMM).
+defm WriteFCmpY : X86SchedWritePair<ReadAfterVecYLd>; // Floating point compare (YMM).
+defm WriteFCmpZ : X86SchedWritePair<ReadAfterVecYLd>; // Floating point compare (ZMM).
+defm WriteFCmp64 : X86SchedWritePair<ReadAfterVecLd>; // Floating point double compare.
+defm WriteFCmp64X : X86SchedWritePair<ReadAfterVecXLd>; // Floating point double compare (XMM).
+defm WriteFCmp64Y : X86SchedWritePair<ReadAfterVecYLd>; // Floating point double compare (YMM).
+defm WriteFCmp64Z : X86SchedWritePair<ReadAfterVecYLd>; // Floating point double compare (ZMM).
+defm WriteFCom : X86SchedWritePair<ReadAfterVecLd>; // Floating point compare to flags.
+defm WriteFMul : X86SchedWritePair<ReadAfterVecLd>; // Floating point multiplication.
+defm WriteFMulX : X86SchedWritePair<ReadAfterVecXLd>; // Floating point multiplication (XMM).
+defm WriteFMulY : X86SchedWritePair<ReadAfterVecYLd>; // Floating point multiplication (YMM).
+defm WriteFMulZ : X86SchedWritePair<ReadAfterVecYLd>; // Floating point multiplication (YMM).
+defm WriteFMul64 : X86SchedWritePair<ReadAfterVecLd>; // Floating point double multiplication.
+defm WriteFMul64X : X86SchedWritePair<ReadAfterVecXLd>; // Floating point double multiplication (XMM).
+defm WriteFMul64Y : X86SchedWritePair<ReadAfterVecYLd>; // Floating point double multiplication (YMM).
+defm WriteFMul64Z : X86SchedWritePair<ReadAfterVecYLd>; // Floating point double multiplication (ZMM).
+defm WriteFDiv : X86SchedWritePair<ReadAfterVecLd>; // Floating point division.
+defm WriteFDivX : X86SchedWritePair<ReadAfterVecXLd>; // Floating point division (XMM).
+defm WriteFDivY : X86SchedWritePair<ReadAfterVecYLd>; // Floating point division (YMM).
+defm WriteFDivZ : X86SchedWritePair<ReadAfterVecYLd>; // Floating point division (ZMM).
+defm WriteFDiv64 : X86SchedWritePair<ReadAfterVecLd>; // Floating point double division.
+defm WriteFDiv64X : X86SchedWritePair<ReadAfterVecXLd>; // Floating point double division (XMM).
+defm WriteFDiv64Y : X86SchedWritePair<ReadAfterVecYLd>; // Floating point double division (YMM).
+defm WriteFDiv64Z : X86SchedWritePair<ReadAfterVecYLd>; // Floating point double division (ZMM).
+defm WriteFSqrt : X86SchedWritePair<ReadAfterVecLd>; // Floating point square root.
+defm WriteFSqrtX : X86SchedWritePair<ReadAfterVecXLd>; // Floating point square root (XMM).
+defm WriteFSqrtY : X86SchedWritePair<ReadAfterVecYLd>; // Floating point square root (YMM).
+defm WriteFSqrtZ : X86SchedWritePair<ReadAfterVecYLd>; // Floating point square root (ZMM).
+defm WriteFSqrt64 : X86SchedWritePair<ReadAfterVecLd>; // Floating point double square root.
+defm WriteFSqrt64X : X86SchedWritePair<ReadAfterVecXLd>; // Floating point double square root (XMM).
+defm WriteFSqrt64Y : X86SchedWritePair<ReadAfterVecYLd>; // Floating point double square root (YMM).
+defm WriteFSqrt64Z : X86SchedWritePair<ReadAfterVecYLd>; // Floating point double square root (ZMM).
+defm WriteFSqrt80 : X86SchedWritePair<ReadAfterVecLd>; // Floating point long double square root.
+defm WriteFRcp : X86SchedWritePair<ReadAfterVecLd>; // Floating point reciprocal estimate.
+defm WriteFRcpX : X86SchedWritePair<ReadAfterVecXLd>; // Floating point reciprocal estimate (XMM).
+defm WriteFRcpY : X86SchedWritePair<ReadAfterVecYLd>; // Floating point reciprocal estimate (YMM).
+defm WriteFRcpZ : X86SchedWritePair<ReadAfterVecYLd>; // Floating point reciprocal estimate (ZMM).
+defm WriteFRsqrt : X86SchedWritePair<ReadAfterVecLd>; // Floating point reciprocal square root estimate.
+defm WriteFRsqrtX: X86SchedWritePair<ReadAfterVecXLd>; // Floating point reciprocal square root estimate (XMM).
+defm WriteFRsqrtY: X86SchedWritePair<ReadAfterVecYLd>; // Floating point reciprocal square root estimate (YMM).
+defm WriteFRsqrtZ: X86SchedWritePair<ReadAfterVecYLd>; // Floating point reciprocal square root estimate (ZMM).
+defm WriteFMA : X86SchedWritePair<ReadAfterVecLd>; // Fused Multiply Add.
+defm WriteFMAX : X86SchedWritePair<ReadAfterVecXLd>; // Fused Multiply Add (XMM).
+defm WriteFMAY : X86SchedWritePair<ReadAfterVecYLd>; // Fused Multiply Add (YMM).
+defm WriteFMAZ : X86SchedWritePair<ReadAfterVecYLd>; // Fused Multiply Add (ZMM).
+defm WriteDPPD : X86SchedWritePair<ReadAfterVecXLd>; // Floating point double dot product.
+defm WriteDPPS : X86SchedWritePair<ReadAfterVecXLd>; // Floating point single dot product.
+defm WriteDPPSY : X86SchedWritePair<ReadAfterVecYLd>; // Floating point single dot product (YMM).
+defm WriteDPPSZ : X86SchedWritePair<ReadAfterVecYLd>; // Floating point single dot product (ZMM).
+defm WriteFSign : X86SchedWritePair<ReadAfterVecLd>; // Floating point fabs/fchs.
+defm WriteFRnd : X86SchedWritePair<ReadAfterVecXLd>; // Floating point rounding.
+defm WriteFRndY : X86SchedWritePair<ReadAfterVecYLd>; // Floating point rounding (YMM).
+defm WriteFRndZ : X86SchedWritePair<ReadAfterVecYLd>; // Floating point rounding (ZMM).
+defm WriteFLogic : X86SchedWritePair<ReadAfterVecXLd>; // Floating point and/or/xor logicals.
+defm WriteFLogicY : X86SchedWritePair<ReadAfterVecYLd>; // Floating point and/or/xor logicals (YMM).
+defm WriteFLogicZ : X86SchedWritePair<ReadAfterVecYLd>; // Floating point and/or/xor logicals (ZMM).
+defm WriteFTest : X86SchedWritePair<ReadAfterVecXLd>; // Floating point TEST instructions.
+defm WriteFTestY : X86SchedWritePair<ReadAfterVecYLd>; // Floating point TEST instructions (YMM).
+defm WriteFTestZ : X86SchedWritePair<ReadAfterVecYLd>; // Floating point TEST instructions (ZMM).
+defm WriteFShuffle : X86SchedWritePair<ReadAfterVecXLd>; // Floating point vector shuffles.
+defm WriteFShuffleY : X86SchedWritePair<ReadAfterVecYLd>; // Floating point vector shuffles (YMM).
+defm WriteFShuffleZ : X86SchedWritePair<ReadAfterVecYLd>; // Floating point vector shuffles (ZMM).
+defm WriteFVarShuffle : X86SchedWritePair<ReadAfterVecXLd>; // Floating point vector variable shuffles.
+defm WriteFVarShuffleY : X86SchedWritePair<ReadAfterVecYLd>; // Floating point vector variable shuffles (YMM).
+defm WriteFVarShuffleZ : X86SchedWritePair<ReadAfterVecYLd>; // Floating point vector variable shuffles (ZMM).
+defm WriteFBlend : X86SchedWritePair<ReadAfterVecXLd>; // Floating point vector blends.
+defm WriteFBlendY : X86SchedWritePair<ReadAfterVecYLd>; // Floating point vector blends (YMM).
+defm WriteFBlendZ : X86SchedWritePair<ReadAfterVecYLd>; // Floating point vector blends (ZMM).
+defm WriteFVarBlend : X86SchedWritePair<ReadAfterVecXLd>; // Fp vector variable blends.
+defm WriteFVarBlendY : X86SchedWritePair<ReadAfterVecYLd>; // Fp vector variable blends (YMM).
+defm WriteFVarBlendZ : X86SchedWritePair<ReadAfterVecYLd>; // Fp vector variable blends (YMZMM).
// FMA Scheduling helper class.
class FMASC { X86FoldableSchedWrite Sched = WriteFAdd; }
// Horizontal Add/Sub (float and integer)
-defm WriteFHAdd : X86SchedWritePair;
-defm WriteFHAddY : X86SchedWritePair;
-defm WriteFHAddZ : X86SchedWritePair;
-defm WritePHAdd : X86SchedWritePair;
-defm WritePHAddX : X86SchedWritePair;
-defm WritePHAddY : X86SchedWritePair;
-defm WritePHAddZ : X86SchedWritePair;
+defm WriteFHAdd : X86SchedWritePair<ReadAfterVecXLd>;
+defm WriteFHAddY : X86SchedWritePair<ReadAfterVecYLd>;
+defm WriteFHAddZ : X86SchedWritePair<ReadAfterVecYLd>;
+defm WritePHAdd : X86SchedWritePair<ReadAfterVecLd>;
+defm WritePHAddX : X86SchedWritePair<ReadAfterVecXLd>;
+defm WritePHAddY : X86SchedWritePair<ReadAfterVecYLd>;
+defm WritePHAddZ : X86SchedWritePair<ReadAfterVecYLd>;
// Vector integer operations.
def WriteVecLoad : SchedWrite;
def WriteVecMoveToGpr : SchedWrite;
def WriteVecMoveFromGpr : SchedWrite;
-defm WriteVecALU : X86SchedWritePair; // Vector integer ALU op, no logicals.
-defm WriteVecALUX : X86SchedWritePair; // Vector integer ALU op, no logicals (XMM).
-defm WriteVecALUY : X86SchedWritePair; // Vector integer ALU op, no logicals (YMM).
-defm WriteVecALUZ : X86SchedWritePair; // Vector integer ALU op, no logicals (ZMM).
-defm WriteVecLogic : X86SchedWritePair; // Vector integer and/or/xor logicals.
-defm WriteVecLogicX : X86SchedWritePair; // Vector integer and/or/xor logicals (XMM).
-defm WriteVecLogicY : X86SchedWritePair; // Vector integer and/or/xor logicals (YMM).
-defm WriteVecLogicZ : X86SchedWritePair; // Vector integer and/or/xor logicals (ZMM).
-defm WriteVecTest : X86SchedWritePair; // Vector integer TEST instructions.
-defm WriteVecTestY : X86SchedWritePair; // Vector integer TEST instructions (YMM).
-defm WriteVecTestZ : X86SchedWritePair; // Vector integer TEST instructions (ZMM).
-defm WriteVecShift : X86SchedWritePair; // Vector integer shifts (default).
-defm WriteVecShiftX : X86SchedWritePair; // Vector integer shifts (XMM).
-defm WriteVecShiftY : X86SchedWritePair; // Vector integer shifts (YMM).
-defm WriteVecShiftZ : X86SchedWritePair; // Vector integer shifts (ZMM).
-defm WriteVecShiftImm : X86SchedWritePair; // Vector integer immediate shifts (default).
-defm WriteVecShiftImmX: X86SchedWritePair; // Vector integer immediate shifts (XMM).
-defm WriteVecShiftImmY: X86SchedWritePair; // Vector integer immediate shifts (YMM).
-defm WriteVecShiftImmZ: X86SchedWritePair; // Vector integer immediate shifts (ZMM).
-defm WriteVecIMul : X86SchedWritePair; // Vector integer multiply (default).
-defm WriteVecIMulX : X86SchedWritePair; // Vector integer multiply (XMM).
-defm WriteVecIMulY : X86SchedWritePair; // Vector integer multiply (YMM).
-defm WriteVecIMulZ : X86SchedWritePair; // Vector integer multiply (ZMM).
-defm WritePMULLD : X86SchedWritePair; // Vector PMULLD.
-defm WritePMULLDY : X86SchedWritePair; // Vector PMULLD (YMM).
-defm WritePMULLDZ : X86SchedWritePair; // Vector PMULLD (ZMM).
-defm WriteShuffle : X86SchedWritePair; // Vector shuffles.
-defm WriteShuffleX : X86SchedWritePair; // Vector shuffles (XMM).
-defm WriteShuffleY : X86SchedWritePair; // Vector shuffles (YMM).
-defm WriteShuffleZ : X86SchedWritePair; // Vector shuffles (ZMM).
-defm WriteVarShuffle : X86SchedWritePair; // Vector variable shuffles.
-defm WriteVarShuffleX : X86SchedWritePair; // Vector variable shuffles (XMM).
-defm WriteVarShuffleY : X86SchedWritePair; // Vector variable shuffles (YMM).
-defm WriteVarShuffleZ : X86SchedWritePair; // Vector variable shuffles (ZMM).
-defm WriteBlend : X86SchedWritePair; // Vector blends.
-defm WriteBlendY : X86SchedWritePair; // Vector blends (YMM).
-defm WriteBlendZ : X86SchedWritePair; // Vector blends (ZMM).
-defm WriteVarBlend : X86SchedWritePair; // Vector variable blends.
-defm WriteVarBlendY : X86SchedWritePair; // Vector variable blends (YMM).
-defm WriteVarBlendZ : X86SchedWritePair; // Vector variable blends (ZMM).
-defm WritePSADBW : X86SchedWritePair; // Vector PSADBW.
-defm WritePSADBWX : X86SchedWritePair; // Vector PSADBW (XMM).
-defm WritePSADBWY : X86SchedWritePair; // Vector PSADBW (YMM).
-defm WritePSADBWZ : X86SchedWritePair; // Vector PSADBW (ZMM).
-defm WriteMPSAD : X86SchedWritePair; // Vector MPSAD.
-defm WriteMPSADY : X86SchedWritePair; // Vector MPSAD (YMM).
-defm WriteMPSADZ : X86SchedWritePair; // Vector MPSAD (ZMM).
-defm WritePHMINPOS : X86SchedWritePair; // Vector PHMINPOS.
+defm WriteVecALU : X86SchedWritePair<ReadAfterVecLd>; // Vector integer ALU op, no logicals.
+defm WriteVecALUX : X86SchedWritePair<ReadAfterVecXLd>; // Vector integer ALU op, no logicals (XMM).
+defm WriteVecALUY : X86SchedWritePair<ReadAfterVecYLd>; // Vector integer ALU op, no logicals (YMM).
+defm WriteVecALUZ : X86SchedWritePair<ReadAfterVecYLd>; // Vector integer ALU op, no logicals (ZMM).
+defm WriteVecLogic : X86SchedWritePair<ReadAfterVecLd>; // Vector integer and/or/xor logicals.
+defm WriteVecLogicX : X86SchedWritePair<ReadAfterVecXLd>; // Vector integer and/or/xor logicals (XMM).
+defm WriteVecLogicY : X86SchedWritePair<ReadAfterVecYLd>; // Vector integer and/or/xor logicals (YMM).
+defm WriteVecLogicZ : X86SchedWritePair<ReadAfterVecYLd>; // Vector integer and/or/xor logicals (ZMM).
+defm WriteVecTest : X86SchedWritePair<ReadAfterVecXLd>; // Vector integer TEST instructions.
+defm WriteVecTestY : X86SchedWritePair<ReadAfterVecYLd>; // Vector integer TEST instructions (YMM).
+defm WriteVecTestZ : X86SchedWritePair<ReadAfterVecYLd>; // Vector integer TEST instructions (ZMM).
+defm WriteVecShift : X86SchedWritePair<ReadAfterVecLd>; // Vector integer shifts (default).
+defm WriteVecShiftX : X86SchedWritePair<ReadAfterVecXLd>; // Vector integer shifts (XMM).
+defm WriteVecShiftY : X86SchedWritePair<ReadAfterVecYLd>; // Vector integer shifts (YMM).
+defm WriteVecShiftZ : X86SchedWritePair<ReadAfterVecYLd>; // Vector integer shifts (ZMM).
+defm WriteVecShiftImm : X86SchedWritePair<ReadAfterVecLd>; // Vector integer immediate shifts (default).
+defm WriteVecShiftImmX: X86SchedWritePair<ReadAfterVecXLd>; // Vector integer immediate shifts (XMM).
+defm WriteVecShiftImmY: X86SchedWritePair<ReadAfterVecYLd>; // Vector integer immediate shifts (YMM).
+defm WriteVecShiftImmZ: X86SchedWritePair<ReadAfterVecYLd>; // Vector integer immediate shifts (ZMM).
+defm WriteVecIMul : X86SchedWritePair<ReadAfterVecLd>; // Vector integer multiply (default).
+defm WriteVecIMulX : X86SchedWritePair<ReadAfterVecXLd>; // Vector integer multiply (XMM).
+defm WriteVecIMulY : X86SchedWritePair<ReadAfterVecYLd>; // Vector integer multiply (YMM).
+defm WriteVecIMulZ : X86SchedWritePair<ReadAfterVecYLd>; // Vector integer multiply (ZMM).
+defm WritePMULLD : X86SchedWritePair<ReadAfterVecXLd>; // Vector PMULLD.
+defm WritePMULLDY : X86SchedWritePair<ReadAfterVecYLd>; // Vector PMULLD (YMM).
+defm WritePMULLDZ : X86SchedWritePair<ReadAfterVecYLd>; // Vector PMULLD (ZMM).
+defm WriteShuffle : X86SchedWritePair<ReadAfterVecLd>; // Vector shuffles.
+defm WriteShuffleX : X86SchedWritePair<ReadAfterVecXLd>; // Vector shuffles (XMM).
+defm WriteShuffleY : X86SchedWritePair<ReadAfterVecYLd>; // Vector shuffles (YMM).
+defm WriteShuffleZ : X86SchedWritePair<ReadAfterVecYLd>; // Vector shuffles (ZMM).
+defm WriteVarShuffle : X86SchedWritePair<ReadAfterVecLd>; // Vector variable shuffles.
+defm WriteVarShuffleX : X86SchedWritePair<ReadAfterVecXLd>; // Vector variable shuffles (XMM).
+defm WriteVarShuffleY : X86SchedWritePair<ReadAfterVecYLd>; // Vector variable shuffles (YMM).
+defm WriteVarShuffleZ : X86SchedWritePair<ReadAfterVecYLd>; // Vector variable shuffles (ZMM).
+defm WriteBlend : X86SchedWritePair<ReadAfterVecXLd>; // Vector blends.
+defm WriteBlendY : X86SchedWritePair<ReadAfterVecYLd>; // Vector blends (YMM).
+defm WriteBlendZ : X86SchedWritePair<ReadAfterVecYLd>; // Vector blends (ZMM).
+defm WriteVarBlend : X86SchedWritePair<ReadAfterVecXLd>; // Vector variable blends.
+defm WriteVarBlendY : X86SchedWritePair<ReadAfterVecYLd>; // Vector variable blends (YMM).
+defm WriteVarBlendZ : X86SchedWritePair<ReadAfterVecYLd>; // Vector variable blends (ZMM).
+defm WritePSADBW : X86SchedWritePair<ReadAfterVecLd>; // Vector PSADBW.
+defm WritePSADBWX : X86SchedWritePair<ReadAfterVecXLd>; // Vector PSADBW (XMM).
+defm WritePSADBWY : X86SchedWritePair<ReadAfterVecYLd>; // Vector PSADBW (YMM).
+defm WritePSADBWZ : X86SchedWritePair<ReadAfterVecYLd>; // Vector PSADBW (ZMM).
+defm WriteMPSAD : X86SchedWritePair<ReadAfterVecXLd>; // Vector MPSAD.
+defm WriteMPSADY : X86SchedWritePair<ReadAfterVecYLd>; // Vector MPSAD (YMM).
+defm WriteMPSADZ : X86SchedWritePair<ReadAfterVecYLd>; // Vector MPSAD (ZMM).
+defm WritePHMINPOS : X86SchedWritePair<ReadAfterVecXLd>; // Vector PHMINPOS.
// Vector insert/extract operations.
defm WriteVecInsert : X86SchedWritePair; // Insert gpr to vector element.
def WriteMMXMOVMSK : SchedWrite;
// Conversion between integer and float.
-defm WriteCvtSD2I : X86SchedWritePair; // Double -> Integer.
-defm WriteCvtPD2I : X86SchedWritePair; // Double -> Integer (XMM).
-defm WriteCvtPD2IY : X86SchedWritePair; // Double -> Integer (YMM).
-defm WriteCvtPD2IZ : X86SchedWritePair; // Double -> Integer (ZMM).
-
-defm WriteCvtSS2I : X86SchedWritePair; // Float -> Integer.
-defm WriteCvtPS2I : X86SchedWritePair; // Float -> Integer (XMM).
-defm WriteCvtPS2IY : X86SchedWritePair; // Float -> Integer (YMM).
-defm WriteCvtPS2IZ : X86SchedWritePair; // Float -> Integer (ZMM).
-
-defm WriteCvtI2SD : X86SchedWritePair; // Integer -> Double.
-defm WriteCvtI2PD : X86SchedWritePair; // Integer -> Double (XMM).
-defm WriteCvtI2PDY : X86SchedWritePair; // Integer -> Double (YMM).
-defm WriteCvtI2PDZ : X86SchedWritePair; // Integer -> Double (ZMM).
-
-defm WriteCvtI2SS : X86SchedWritePair; // Integer -> Float.
-defm WriteCvtI2PS : X86SchedWritePair; // Integer -> Float (XMM).
-defm WriteCvtI2PSY : X86SchedWritePair; // Integer -> Float (YMM).
-defm WriteCvtI2PSZ : X86SchedWritePair; // Integer -> Float (ZMM).
-
-defm WriteCvtSS2SD : X86SchedWritePair; // Float -> Double size conversion.
-defm WriteCvtPS2PD : X86SchedWritePair; // Float -> Double size conversion (XMM).
-defm WriteCvtPS2PDY : X86SchedWritePair; // Float -> Double size conversion (YMM).
-defm WriteCvtPS2PDZ : X86SchedWritePair; // Float -> Double size conversion (ZMM).
-
-defm WriteCvtSD2SS : X86SchedWritePair; // Double -> Float size conversion.
-defm WriteCvtPD2PS : X86SchedWritePair; // Double -> Float size conversion (XMM).
-defm WriteCvtPD2PSY : X86SchedWritePair; // Double -> Float size conversion (YMM).
-defm WriteCvtPD2PSZ : X86SchedWritePair; // Double -> Float size conversion (ZMM).
-
-defm WriteCvtPH2PS : X86SchedWritePair; // Half -> Float size conversion.
-defm WriteCvtPH2PSY : X86SchedWritePair; // Half -> Float size conversion (YMM).
-defm WriteCvtPH2PSZ : X86SchedWritePair; // Half -> Float size conversion (ZMM).
+defm WriteCvtSD2I : X86SchedWritePair<ReadAfterVecLd>; // Double -> Integer.
+defm WriteCvtPD2I : X86SchedWritePair<ReadAfterVecXLd>; // Double -> Integer (XMM).
+defm WriteCvtPD2IY : X86SchedWritePair<ReadAfterVecYLd>; // Double -> Integer (YMM).
+defm WriteCvtPD2IZ : X86SchedWritePair<ReadAfterVecYLd>; // Double -> Integer (ZMM).
+
+defm WriteCvtSS2I : X86SchedWritePair<ReadAfterVecLd>; // Float -> Integer.
+defm WriteCvtPS2I : X86SchedWritePair<ReadAfterVecXLd>; // Float -> Integer (XMM).
+defm WriteCvtPS2IY : X86SchedWritePair<ReadAfterVecYLd>; // Float -> Integer (YMM).
+defm WriteCvtPS2IZ : X86SchedWritePair<ReadAfterVecYLd>; // Float -> Integer (ZMM).
+
+defm WriteCvtI2SD : X86SchedWritePair<ReadAfterVecLd>; // Integer -> Double.
+defm WriteCvtI2PD : X86SchedWritePair<ReadAfterVecXLd>; // Integer -> Double (XMM).
+defm WriteCvtI2PDY : X86SchedWritePair<ReadAfterVecYLd>; // Integer -> Double (YMM).
+defm WriteCvtI2PDZ : X86SchedWritePair<ReadAfterVecYLd>; // Integer -> Double (ZMM).
+
+defm WriteCvtI2SS : X86SchedWritePair<ReadAfterVecLd>; // Integer -> Float.
+defm WriteCvtI2PS : X86SchedWritePair<ReadAfterVecXLd>; // Integer -> Float (XMM).
+defm WriteCvtI2PSY : X86SchedWritePair<ReadAfterVecYLd>; // Integer -> Float (YMM).
+defm WriteCvtI2PSZ : X86SchedWritePair<ReadAfterVecYLd>; // Integer -> Float (ZMM).
+
+defm WriteCvtSS2SD : X86SchedWritePair<ReadAfterVecLd>; // Float -> Double size conversion.
+defm WriteCvtPS2PD : X86SchedWritePair<ReadAfterVecXLd>; // Float -> Double size conversion (XMM).
+defm WriteCvtPS2PDY : X86SchedWritePair<ReadAfterVecYLd>; // Float -> Double size conversion (YMM).
+defm WriteCvtPS2PDZ : X86SchedWritePair<ReadAfterVecYLd>; // Float -> Double size conversion (ZMM).
+
+defm WriteCvtSD2SS : X86SchedWritePair<ReadAfterVecLd>; // Double -> Float size conversion.
+defm WriteCvtPD2PS : X86SchedWritePair<ReadAfterVecXLd>; // Double -> Float size conversion (XMM).
+defm WriteCvtPD2PSY : X86SchedWritePair<ReadAfterVecYLd>; // Double -> Float size conversion (YMM).
+defm WriteCvtPD2PSZ : X86SchedWritePair<ReadAfterVecYLd>; // Double -> Float size conversion (ZMM).
+
+defm WriteCvtPH2PS : X86SchedWritePair<ReadAfterVecXLd>; // Half -> Float size conversion.
+defm WriteCvtPH2PSY : X86SchedWritePair<ReadAfterVecYLd>; // Half -> Float size conversion (YMM).
+defm WriteCvtPH2PSZ : X86SchedWritePair<ReadAfterVecYLd>; // Half -> Float size conversion (ZMM).
def WriteCvtPS2PH : SchedWrite; // // Float -> Half size conversion.
def WriteCvtPS2PHY : SchedWrite; // // Float -> Half size conversion (YMM).
def WriteCvtPS2PHZSt : SchedWrite; // // Float -> Half + store size conversion (ZMM).
// CRC32 instruction.
-defm WriteCRC32 : X86SchedWritePair;
+defm WriteCRC32 : X86SchedWritePair<ReadAfterLd>;
// Strings instructions.
// Packed Compare Implicit Length Strings, Return Mask
-defm WritePCmpIStrM : X86SchedWritePair;
+defm WritePCmpIStrM : X86SchedWritePair<ReadAfterVecXLd>;
// Packed Compare Explicit Length Strings, Return Mask
-defm WritePCmpEStrM : X86SchedWritePair;
+defm WritePCmpEStrM : X86SchedWritePair<ReadAfterVecXLd>;
// Packed Compare Implicit Length Strings, Return Index
-defm WritePCmpIStrI : X86SchedWritePair;
+defm WritePCmpIStrI : X86SchedWritePair<ReadAfterVecXLd>;
// Packed Compare Explicit Length Strings, Return Index
-defm WritePCmpEStrI : X86SchedWritePair;
+defm WritePCmpEStrI : X86SchedWritePair<ReadAfterVecXLd>;
// AES instructions.
-defm WriteAESDecEnc : X86SchedWritePair; // Decryption, encryption.
-defm WriteAESIMC : X86SchedWritePair; // InvMixColumn.
-defm WriteAESKeyGen : X86SchedWritePair; // Key Generation.
+defm WriteAESDecEnc : X86SchedWritePair<ReadAfterVecXLd>; // Decryption, encryption.
+defm WriteAESIMC : X86SchedWritePair<ReadAfterVecXLd>; // InvMixColumn.
+defm WriteAESKeyGen : X86SchedWritePair<ReadAfterVecXLd>; // Key Generation.
// Carry-less multiplication instructions.
-defm WriteCLMul : X86SchedWritePair;
+defm WriteCLMul : X86SchedWritePair<ReadAfterVecXLd>;
// EMMS/FEMMS
def WriteEMMS : SchedWrite;
def WriteSystem : SchedWrite;
// AVX2.
-defm WriteFShuffle256 : X86SchedWritePair; // Fp 256-bit width vector shuffles.
-defm WriteFVarShuffle256 : X86SchedWritePair; // Fp 256-bit width variable shuffles.
-defm WriteShuffle256 : X86SchedWritePair; // 256-bit width vector shuffles.
-defm WriteVarShuffle256 : X86SchedWritePair; // 256-bit width vector variable shuffles.
-defm WriteVarVecShift : X86SchedWritePair; // Variable vector shifts.
-defm WriteVarVecShiftY : X86SchedWritePair; // Variable vector shifts (YMM).
-defm WriteVarVecShiftZ : X86SchedWritePair; // Variable vector shifts (ZMM).
+defm WriteFShuffle256 : X86SchedWritePair<ReadAfterVecYLd>; // Fp 256-bit width vector shuffles.
+defm WriteFVarShuffle256 : X86SchedWritePair<ReadAfterVecYLd>; // Fp 256-bit width variable shuffles.
+defm WriteShuffle256 : X86SchedWritePair<ReadAfterVecYLd>; // 256-bit width vector shuffles.
+defm WriteVarShuffle256 : X86SchedWritePair<ReadAfterVecYLd>; // 256-bit width vector variable shuffles.
+defm WriteVarVecShift : X86SchedWritePair<ReadAfterVecXLd>; // Variable vector shifts.
+defm WriteVarVecShiftY : X86SchedWritePair<ReadAfterVecYLd>; // Variable vector shifts (YMM).
+defm WriteVarVecShiftZ : X86SchedWritePair<ReadAfterVecYLd>; // Variable vector shifts (ZMM).
// Old microcoded instructions that nobody use.
def WriteMicrocoded : SchedWrite;
// Loads are 3 cycles, so ReadAfterLd registers needn't be available until 3
// cycles after the memory operand.
def : ReadAdvance<ReadAfterLd, 3>;
+def : ReadAdvance<ReadAfterVecLd, 3>;
+def : ReadAdvance<ReadAfterVecXLd, 3>;
+def : ReadAdvance<ReadAfterVecYLd, 3>;
// Many SchedWrites are defined in pairs with and without a folded load.
// Instructions with folded loads are usually micro-fused, so they only appear
// cycles after the memory operand.
def : ReadAdvance<ReadAfterLd, 3>;
+// Vector loads are 5 cycles, so ReadAfterVec*Ld registers needn't be available until 5
+// cycles after the memory operand.
+def : ReadAdvance<ReadAfterVecLd, 5>;
+def : ReadAdvance<ReadAfterVecXLd, 5>;
+def : ReadAdvance<ReadAfterVecYLd, 5>;
+
// Many SchedWrites are defined in pairs with and without a folded load.
// Instructions with folded loads are usually micro-fused, so they only appear
// as two micro-ops when dispatched by the schedulers.
// Loads are 3 cycles, so ReadAfterLd registers needn't be available until 3
// cycles after the memory operand.
def : ReadAdvance<ReadAfterLd, 3>;
+def : ReadAdvance<ReadAfterVecLd, 3>;
+def : ReadAdvance<ReadAfterVecXLd, 3>;
+def : ReadAdvance<ReadAfterVecYLd, 3>;
// Many SchedWrites are defined in pairs with and without a folded load.
// Instructions with folded loads are usually micro-fused, so they only appear
// Integer division issued on ALU2.
def ZnDivider : ProcResource<1>;
-// 4 Cycles load-to use Latency is captured
+// 4 Cycles integer load-to use Latency is captured
def : ReadAdvance<ReadAfterLd, 4>;
+// 8 Cycles vector load-to use Latency is captured
+def : ReadAdvance<ReadAfterVecLd, 8>;
+def : ReadAdvance<ReadAfterVecXLd, 8>;
+def : ReadAdvance<ReadAfterVecYLd, 8>;
+
// The Integer PRF for Zen is 168 entries, and it holds the architectural and
// speculative version of the 64-bit integer registers.
// Reference: "Software Optimization Guide for AMD Family 17h Processors"
# BDWELL-NEXT: Total Cycles: 17
# BDWELL-NEXT: Total uOps: 3
-# BTVER2-NEXT: Total Cycles: 27
+# BTVER2-NEXT: Total Cycles: 25
# BTVER2-NEXT: Total uOps: 2
-# HASWELL-NEXT: Total Cycles: 20
+# HASWELL-NEXT: Total Cycles: 19
# HASWELL-NEXT: Total uOps: 3
-# SANDY-NEXT: Total Cycles: 21
+# SANDY-NEXT: Total Cycles: 20
# SANDY-NEXT: Total uOps: 3
-# SKYLAKE-NEXT: Total Cycles: 19
+# SKYLAKE-NEXT: Total Cycles: 18
# SKYLAKE-NEXT: Total uOps: 3
-# ZNVER1-NEXT: Total Cycles: 24
+# ZNVER1-NEXT: Total Cycles: 20
# ZNVER1-NEXT: Total uOps: 2
# BDWELL: Dispatch Width: 4
# BDWELL-NEXT: Block RThroughput: 5.0
# BTVER2: Dispatch Width: 2
-# BTVER2-NEXT: uOps Per Cycle: 0.07
-# BTVER2-NEXT: IPC: 0.07
+# BTVER2-NEXT: uOps Per Cycle: 0.08
+# BTVER2-NEXT: IPC: 0.08
# BTVER2-NEXT: Block RThroughput: 19.0
# HASWELL: Dispatch Width: 4
-# HASWELL-NEXT: uOps Per Cycle: 0.15
-# HASWELL-NEXT: IPC: 0.10
+# HASWELL-NEXT: uOps Per Cycle: 0.16
+# HASWELL-NEXT: IPC: 0.11
# HASWELL-NEXT: Block RThroughput: 7.0
# SANDY: Dispatch Width: 4
-# SANDY-NEXT: uOps Per Cycle: 0.14
+# SANDY-NEXT: uOps Per Cycle: 0.15
# SANDY-NEXT: IPC: 0.10
# SANDY-NEXT: Block RThroughput: 14.0
# SKYLAKE: Dispatch Width: 6
-# SKYLAKE-NEXT: uOps Per Cycle: 0.16
+# SKYLAKE-NEXT: uOps Per Cycle: 0.17
# SKYLAKE-NEXT: IPC: 0.11
# SKYLAKE-NEXT: Block RThroughput: 3.0
# ZNVER1: Dispatch Width: 4
-# ZNVER1-NEXT: uOps Per Cycle: 0.08
-# ZNVER1-NEXT: IPC: 0.08
+# ZNVER1-NEXT: uOps Per Cycle: 0.10
+# ZNVER1-NEXT: IPC: 0.10
# ZNVER1-NEXT: Block RThroughput: 1.0
# ALL: Timeline view:
# BDWELL-NEXT: Index 0123456789
# BTVER2-NEXT: 0123456789
-# BTVER2-NEXT: Index 0123456789 0123456
+# BTVER2-NEXT: Index 0123456789 01234
-# HASWELL-NEXT: 0123456789
+# HASWELL-NEXT: 012345678
# HASWELL-NEXT: Index 0123456789
# SANDY-NEXT: 0123456789
-# SANDY-NEXT: Index 0123456789 0
+# SANDY-NEXT: Index 0123456789
-# SKYLAKE-NEXT: 012345678
+# SKYLAKE-NEXT: 01234567
# SKYLAKE-NEXT: Index 0123456789
# ZNVER1-NEXT: 0123456789
-# ZNVER1-NEXT: Index 0123456789 0123
+# ZNVER1-NEXT: Index 0123456789
# BDWELL: [0,0] DeeeeeeeeeeeER .. vdivps %xmm0, %xmm1, %xmm1
# BDWELL-NEXT: [0,1] D======eeeeeeeeER vaddps (%rax), %xmm1, %xmm1
-# BTVER2: [0,0] DeeeeeeeeeeeeeeeeeeeER .. vdivps %xmm0, %xmm1, %xmm1
-# BTVER2-NEXT: [0,1] D================eeeeeeeeER vaddps (%rax), %xmm1, %xmm1
+# BTVER2: [0,0] DeeeeeeeeeeeeeeeeeeeER . vdivps %xmm0, %xmm1, %xmm1
+# BTVER2-NEXT: [0,1] D==============eeeeeeeeER vaddps (%rax), %xmm1, %xmm1
-# HASWELL: [0,0] DeeeeeeeeeeeeeER . vdivps %xmm0, %xmm1, %xmm1
-# HASWELL-NEXT: [0,1] D========eeeeeeeeeER vaddps (%rax), %xmm1, %xmm1
+# HASWELL: [0,0] DeeeeeeeeeeeeeER . vdivps %xmm0, %xmm1, %xmm1
+# HASWELL-NEXT: [0,1] D=======eeeeeeeeeER vaddps (%rax), %xmm1, %xmm1
-# SANDY: [0,0] DeeeeeeeeeeeeeeER . vdivps %xmm0, %xmm1, %xmm1
-# SANDY-NEXT: [0,1] D=========eeeeeeeeeER vaddps (%rax), %xmm1, %xmm1
+# SANDY: [0,0] DeeeeeeeeeeeeeeER . vdivps %xmm0, %xmm1, %xmm1
+# SANDY-NEXT: [0,1] D========eeeeeeeeeER vaddps (%rax), %xmm1, %xmm1
-# SKYLAKE: [0,0] DeeeeeeeeeeeER . . vdivps %xmm0, %xmm1, %xmm1
-# SKYLAKE-NEXT: [0,1] D======eeeeeeeeeeER vaddps (%rax), %xmm1, %xmm1
+# SKYLAKE: [0,0] DeeeeeeeeeeeER . . vdivps %xmm0, %xmm1, %xmm1
+# SKYLAKE-NEXT: [0,1] D=====eeeeeeeeeeER vaddps (%rax), %xmm1, %xmm1
-# ZNVER1: [0,0] DeeeeeeeeeeeeeeeER . . vdivps %xmm0, %xmm1, %xmm1
-# ZNVER1-NEXT: [0,1] D===========eeeeeeeeeeER vaddps (%rax), %xmm1, %xmm1
+# ZNVER1: [0,0] DeeeeeeeeeeeeeeeER . vdivps %xmm0, %xmm1, %xmm1
+# ZNVER1-NEXT: [0,1] D=======eeeeeeeeeeER vaddps (%rax), %xmm1, %xmm1
# ALL: Average Wait times (based on the timeline view):
# ALL-NEXT: [0]: Executions
# ALL-NEXT: 0. 1 1.0 1.0 0.0 vdivps %xmm0, %xmm1, %xmm1
# BDWELL-NEXT: 1. 1 7.0 0.0 0.0 vaddps (%rax), %xmm1, %xmm1
-# BTVER2-NEXT: 1. 1 17.0 0.0 0.0 vaddps (%rax), %xmm1, %xmm1
-# HASWELL-NEXT: 1. 1 9.0 0.0 0.0 vaddps (%rax), %xmm1, %xmm1
-# SANDY-NEXT: 1. 1 10.0 0.0 0.0 vaddps (%rax), %xmm1, %xmm1
-# SKYLAKE-NEXT: 1. 1 7.0 0.0 0.0 vaddps (%rax), %xmm1, %xmm1
-# ZNVER1-NEXT: 1. 1 12.0 0.0 0.0 vaddps (%rax), %xmm1, %xmm1
+# BTVER2-NEXT: 1. 1 15.0 0.0 0.0 vaddps (%rax), %xmm1, %xmm1
+# HASWELL-NEXT: 1. 1 8.0 0.0 0.0 vaddps (%rax), %xmm1, %xmm1
+# SANDY-NEXT: 1. 1 9.0 0.0 0.0 vaddps (%rax), %xmm1, %xmm1
+# SKYLAKE-NEXT: 1. 1 6.0 0.0 0.0 vaddps (%rax), %xmm1, %xmm1
+# ZNVER1-NEXT: 1. 1 8.0 0.0 0.0 vaddps (%rax), %xmm1, %xmm1
# ALL: Iterations: 100
# ALL-NEXT: Instructions: 400
-# BDWELL-NEXT: Total Cycles: 208
+# BDWELL-NEXT: Total Cycles: 142
# BDWELL-NEXT: Total uOps: 500
-# HASWELL-NEXT: Total Cycles: 308
+# HASWELL-NEXT: Total Cycles: 143
# HASWELL-NEXT: Total uOps: 500
# SKYLAKE-NEXT: Total Cycles: 803
# SKYLAKE-NEXT: Total uOps: 500
-# ZNVER1-NEXT: Total Cycles: 407
+# ZNVER1-NEXT: Total Cycles: 110
# ZNVER1-NEXT: Total uOps: 400
# BDWELL: Dispatch Width: 4
-# BDWELL-NEXT: uOps Per Cycle: 2.40
-# BDWELL-NEXT: IPC: 1.92
+# BDWELL-NEXT: uOps Per Cycle: 3.52
+# BDWELL-NEXT: IPC: 2.82
# BDWELL-NEXT: Block RThroughput: 1.3
# HASWELL: Dispatch Width: 4
-# HASWELL-NEXT: uOps Per Cycle: 1.62
-# HASWELL-NEXT: IPC: 1.30
+# HASWELL-NEXT: uOps Per Cycle: 3.50
+# HASWELL-NEXT: IPC: 2.80
# HASWELL-NEXT: Block RThroughput: 1.3
# SKYLAKE: Dispatch Width: 6
# SKYLAKE-NEXT: Block RThroughput: 0.8
# ZNVER1: Dispatch Width: 4
-# ZNVER1-NEXT: uOps Per Cycle: 0.98
-# ZNVER1-NEXT: IPC: 0.98
+# ZNVER1-NEXT: uOps Per Cycle: 3.64
+# ZNVER1-NEXT: IPC: 3.64
# ZNVER1-NEXT: Block RThroughput: 1.0
# ALL: Timeline view:
# BDWELL-NEXT: 0123456789
-# BDWELL-NEXT: Index 0123456789 01234567
+# BDWELL-NEXT: Index 0123456789 01
-# HASWELL-NEXT: 0123456789 01234567
-# HASWELL-NEXT: Index 0123456789 0123456789
+# HASWELL-NEXT: 0123456789
+# HASWELL-NEXT: Index 0123456789 012
# SKYLAKE-NEXT: 0123456789 0123456789 0123456789 01234
# SKYLAKE-NEXT: Index 0123456789 0123456789 0123456789 0123456789
-# ZNVER1-NEXT: 0123456789 0123456789
-# ZNVER1-NEXT: Index 0123456789 0123456789 0123456
-
-# BDWELL: [0,0] DeER . . . . . . addl $1, %edx
-# BDWELL-NEXT: [0,1] DeeeeeeeER. . . . . vpaddd (%r8), %ymm0, %ymm0
-# BDWELL-NEXT: [0,2] DeE------R. . . . . addq $32, %r8
-# BDWELL-NEXT: [0,3] .DeE-----R. . . . . cmpl %edi, %edx
-# BDWELL-NEXT: [1,0] .DeE-----R. . . . . addl $1, %edx
-# BDWELL-NEXT: [1,1] .D=eeeeeeeER . . . . vpaddd (%r8), %ymm0, %ymm0
-# BDWELL-NEXT: [1,2] . DeE------R . . . . addq $32, %r8
-# BDWELL-NEXT: [1,3] . DeE------R . . . . cmpl %edi, %edx
-# BDWELL-NEXT: [2,0] . DeE------R . . . . addl $1, %edx
-# BDWELL-NEXT: [2,1] . D=eeeeeeeER . . . . vpaddd (%r8), %ymm0, %ymm0
-# BDWELL-NEXT: [2,2] . DeE-------R . . . . addq $32, %r8
-# BDWELL-NEXT: [2,3] . DeE-------R . . . . cmpl %edi, %edx
-# BDWELL-NEXT: [3,0] . DeE------R . . . . addl $1, %edx
-# BDWELL-NEXT: [3,1] . D==eeeeeeeER . . . vpaddd (%r8), %ymm0, %ymm0
-# BDWELL-NEXT: [3,2] . DeE--------R . . . addq $32, %r8
-# BDWELL-NEXT: [3,3] . DeE-------R . . . cmpl %edi, %edx
-# BDWELL-NEXT: [4,0] . DeE-------R . . . addl $1, %edx
-# BDWELL-NEXT: [4,1] . D===eeeeeeeER . . . vpaddd (%r8), %ymm0, %ymm0
-# BDWELL-NEXT: [4,2] . .DeE--------R . . . addq $32, %r8
-# BDWELL-NEXT: [4,3] . .DeE--------R . . . cmpl %edi, %edx
-# BDWELL-NEXT: [5,0] . .DeE--------R . . . addl $1, %edx
-# BDWELL-NEXT: [5,1] . . D===eeeeeeeER. . . vpaddd (%r8), %ymm0, %ymm0
-# BDWELL-NEXT: [5,2] . . DeE---------R. . . addq $32, %r8
-# BDWELL-NEXT: [5,3] . . DeE---------R. . . cmpl %edi, %edx
-# BDWELL-NEXT: [6,0] . . DeE--------R. . . addl $1, %edx
-# BDWELL-NEXT: [6,1] . . D====eeeeeeeER . . vpaddd (%r8), %ymm0, %ymm0
-# BDWELL-NEXT: [6,2] . . DeE----------R . . addq $32, %r8
-# BDWELL-NEXT: [6,3] . . DeE---------R . . cmpl %edi, %edx
-# BDWELL-NEXT: [7,0] . . DeE---------R . . addl $1, %edx
-# BDWELL-NEXT: [7,1] . . D=====eeeeeeeER . . vpaddd (%r8), %ymm0, %ymm0
-# BDWELL-NEXT: [7,2] . . DeE----------R . . addq $32, %r8
-# BDWELL-NEXT: [7,3] . . DeE----------R . . cmpl %edi, %edx
-# BDWELL-NEXT: [8,0] . . DeE----------R . . addl $1, %edx
-# BDWELL-NEXT: [8,1] . . .D=====eeeeeeeER . vpaddd (%r8), %ymm0, %ymm0
-# BDWELL-NEXT: [8,2] . . .DeE-----------R . addq $32, %r8
-# BDWELL-NEXT: [8,3] . . .DeE-----------R . cmpl %edi, %edx
-# BDWELL-NEXT: [9,0] . . . DeE----------R . addl $1, %edx
-# BDWELL-NEXT: [9,1] . . . D======eeeeeeeER vpaddd (%r8), %ymm0, %ymm0
-# BDWELL-NEXT: [9,2] . . . DeE------------R addq $32, %r8
-# BDWELL-NEXT: [9,3] . . . DeE-----------R cmpl %edi, %edx
-
-# HASWELL: [0,0] DeER . . . . . . . . addl $1, %edx
-# HASWELL-NEXT: [0,1] DeeeeeeeeER . . . . . . vpaddd (%r8), %ymm0, %ymm0
-# HASWELL-NEXT: [0,2] DeE-------R . . . . . . addq $32, %r8
-# HASWELL-NEXT: [0,3] .DeE------R . . . . . . cmpl %edi, %edx
-# HASWELL-NEXT: [1,0] .DeE------R . . . . . . addl $1, %edx
-# HASWELL-NEXT: [1,1] .D==eeeeeeeeER . . . . . . vpaddd (%r8), %ymm0, %ymm0
-# HASWELL-NEXT: [1,2] . DeE--------R . . . . . . addq $32, %r8
-# HASWELL-NEXT: [1,3] . DeE--------R . . . . . . cmpl %edi, %edx
-# HASWELL-NEXT: [2,0] . DeE--------R . . . . . . addl $1, %edx
-# HASWELL-NEXT: [2,1] . D===eeeeeeeeER . . . . . vpaddd (%r8), %ymm0, %ymm0
-# HASWELL-NEXT: [2,2] . DeE----------R . . . . . addq $32, %r8
-# HASWELL-NEXT: [2,3] . DeE----------R . . . . . cmpl %edi, %edx
-# HASWELL-NEXT: [3,0] . DeE---------R . . . . . addl $1, %edx
-# HASWELL-NEXT: [3,1] . D=====eeeeeeeeER. . . . . vpaddd (%r8), %ymm0, %ymm0
-# HASWELL-NEXT: [3,2] . DeE------------R. . . . . addq $32, %r8
-# HASWELL-NEXT: [3,3] . DeE-----------R. . . . . cmpl %edi, %edx
-# HASWELL-NEXT: [4,0] . DeE-----------R. . . . . addl $1, %edx
-# HASWELL-NEXT: [4,1] . D=======eeeeeeeeER . . . . vpaddd (%r8), %ymm0, %ymm0
-# HASWELL-NEXT: [4,2] . .DeE-------------R . . . . addq $32, %r8
-# HASWELL-NEXT: [4,3] . .DeE-------------R . . . . cmpl %edi, %edx
-# HASWELL-NEXT: [5,0] . .DeE-------------R . . . . addl $1, %edx
-# HASWELL-NEXT: [5,1] . . D========eeeeeeeeER . . . vpaddd (%r8), %ymm0, %ymm0
-# HASWELL-NEXT: [5,2] . . DeE---------------R . . . addq $32, %r8
-# HASWELL-NEXT: [5,3] . . DeE---------------R . . . cmpl %edi, %edx
-# HASWELL-NEXT: [6,0] . . DeE--------------R . . . addl $1, %edx
-# HASWELL-NEXT: [6,1] . . D==========eeeeeeeeER . . . vpaddd (%r8), %ymm0, %ymm0
-# HASWELL-NEXT: [6,2] . . DeE-----------------R . . . addq $32, %r8
-# HASWELL-NEXT: [6,3] . . DeE----------------R . . . cmpl %edi, %edx
-# HASWELL-NEXT: [7,0] . . DeE----------------R . . . addl $1, %edx
-# HASWELL-NEXT: [7,1] . . D============eeeeeeeeER . . vpaddd (%r8), %ymm0, %ymm0
-# HASWELL-NEXT: [7,2] . . DeE------------------R . . addq $32, %r8
-# HASWELL-NEXT: [7,3] . . DeE------------------R . . cmpl %edi, %edx
-# HASWELL-NEXT: [8,0] . . DeE------------------R . . addl $1, %edx
-# HASWELL-NEXT: [8,1] . . .D=============eeeeeeeeER. . vpaddd (%r8), %ymm0, %ymm0
-# HASWELL-NEXT: [8,2] . . .DeE--------------------R. . addq $32, %r8
-# HASWELL-NEXT: [8,3] . . .DeE--------------------R. . cmpl %edi, %edx
-# HASWELL-NEXT: [9,0] . . . DeE-------------------R. . addl $1, %edx
-# HASWELL-NEXT: [9,1] . . . D===============eeeeeeeeER vpaddd (%r8), %ymm0, %ymm0
-# HASWELL-NEXT: [9,2] . . . DeE----------------------R addq $32, %r8
-# HASWELL-NEXT: [9,3] . . . DeE---------------------R cmpl %edi, %edx
+# ZNVER1-NEXT: 0123456789
+# ZNVER1-NEXT: Index 0123456789
+
+# BDWELL: [0,0] DeER . . . .. addl $1, %edx
+# BDWELL-NEXT: [0,1] DeeeeeeeER. . .. vpaddd (%r8), %ymm0, %ymm0
+# BDWELL-NEXT: [0,2] DeE------R. . .. addq $32, %r8
+# BDWELL-NEXT: [0,3] .DeE-----R. . .. cmpl %edi, %edx
+# BDWELL-NEXT: [1,0] .DeE-----R. . .. addl $1, %edx
+# BDWELL-NEXT: [1,1] .DeeeeeeeER . .. vpaddd (%r8), %ymm0, %ymm0
+# BDWELL-NEXT: [1,2] . DeE-----R . .. addq $32, %r8
+# BDWELL-NEXT: [1,3] . DeE-----R . .. cmpl %edi, %edx
+# BDWELL-NEXT: [2,0] . DeE-----R . .. addl $1, %edx
+# BDWELL-NEXT: [2,1] . DeeeeeeeER . .. vpaddd (%r8), %ymm0, %ymm0
+# BDWELL-NEXT: [2,2] . DeE------R . .. addq $32, %r8
+# BDWELL-NEXT: [2,3] . DeE------R . .. cmpl %edi, %edx
+# BDWELL-NEXT: [3,0] . DeE-----R . .. addl $1, %edx
+# BDWELL-NEXT: [3,1] . DeeeeeeeER . .. vpaddd (%r8), %ymm0, %ymm0
+# BDWELL-NEXT: [3,2] . DeE------R . .. addq $32, %r8
+# BDWELL-NEXT: [3,3] . DeE-----R . .. cmpl %edi, %edx
+# BDWELL-NEXT: [4,0] . DeE-----R . .. addl $1, %edx
+# BDWELL-NEXT: [4,1] . DeeeeeeeER. .. vpaddd (%r8), %ymm0, %ymm0
+# BDWELL-NEXT: [4,2] . .DeE-----R. .. addq $32, %r8
+# BDWELL-NEXT: [4,3] . .DeE-----R. .. cmpl %edi, %edx
+# BDWELL-NEXT: [5,0] . .DeE-----R. .. addl $1, %edx
+# BDWELL-NEXT: [5,1] . . DeeeeeeeER .. vpaddd (%r8), %ymm0, %ymm0
+# BDWELL-NEXT: [5,2] . . DeE------R .. addq $32, %r8
+# BDWELL-NEXT: [5,3] . . DeE------R .. cmpl %edi, %edx
+# BDWELL-NEXT: [6,0] . . DeE-----R .. addl $1, %edx
+# BDWELL-NEXT: [6,1] . . DeeeeeeeER .. vpaddd (%r8), %ymm0, %ymm0
+# BDWELL-NEXT: [6,2] . . DeE------R .. addq $32, %r8
+# BDWELL-NEXT: [6,3] . . DeE-----R .. cmpl %edi, %edx
+# BDWELL-NEXT: [7,0] . . DeE-----R .. addl $1, %edx
+# BDWELL-NEXT: [7,1] . . DeeeeeeeER .. vpaddd (%r8), %ymm0, %ymm0
+# BDWELL-NEXT: [7,2] . . DeE-----R .. addq $32, %r8
+# BDWELL-NEXT: [7,3] . . DeE-----R .. cmpl %edi, %edx
+# BDWELL-NEXT: [8,0] . . DeE-----R .. addl $1, %edx
+# BDWELL-NEXT: [8,1] . . .DeeeeeeeER. vpaddd (%r8), %ymm0, %ymm0
+# BDWELL-NEXT: [8,2] . . .DeE------R. addq $32, %r8
+# BDWELL-NEXT: [8,3] . . .DeE------R. cmpl %edi, %edx
+# BDWELL-NEXT: [9,0] . . . DeE-----R. addl $1, %edx
+# BDWELL-NEXT: [9,1] . . . DeeeeeeeER vpaddd (%r8), %ymm0, %ymm0
+# BDWELL-NEXT: [9,2] . . . DeE------R addq $32, %r8
+# BDWELL-NEXT: [9,3] . . . DeE-----R cmpl %edi, %edx
+
+# HASWELL: [0,0] DeER . . . . . addl $1, %edx
+# HASWELL-NEXT: [0,1] DeeeeeeeeER . . . vpaddd (%r8), %ymm0, %ymm0
+# HASWELL-NEXT: [0,2] DeE-------R . . . addq $32, %r8
+# HASWELL-NEXT: [0,3] .DeE------R . . . cmpl %edi, %edx
+# HASWELL-NEXT: [1,0] .DeE------R . . . addl $1, %edx
+# HASWELL-NEXT: [1,1] .DeeeeeeeeER . . . vpaddd (%r8), %ymm0, %ymm0
+# HASWELL-NEXT: [1,2] . DeE------R . . . addq $32, %r8
+# HASWELL-NEXT: [1,3] . DeE------R . . . cmpl %edi, %edx
+# HASWELL-NEXT: [2,0] . DeE------R . . . addl $1, %edx
+# HASWELL-NEXT: [2,1] . DeeeeeeeeER . . . vpaddd (%r8), %ymm0, %ymm0
+# HASWELL-NEXT: [2,2] . DeE-------R . . . addq $32, %r8
+# HASWELL-NEXT: [2,3] . DeE-------R . . . cmpl %edi, %edx
+# HASWELL-NEXT: [3,0] . DeE------R . . . addl $1, %edx
+# HASWELL-NEXT: [3,1] . DeeeeeeeeER. . . vpaddd (%r8), %ymm0, %ymm0
+# HASWELL-NEXT: [3,2] . DeE-------R. . . addq $32, %r8
+# HASWELL-NEXT: [3,3] . DeE------R. . . cmpl %edi, %edx
+# HASWELL-NEXT: [4,0] . DeE------R. . . addl $1, %edx
+# HASWELL-NEXT: [4,1] . DeeeeeeeeER . . vpaddd (%r8), %ymm0, %ymm0
+# HASWELL-NEXT: [4,2] . .DeE------R . . addq $32, %r8
+# HASWELL-NEXT: [4,3] . .DeE------R . . cmpl %edi, %edx
+# HASWELL-NEXT: [5,0] . .DeE------R . . addl $1, %edx
+# HASWELL-NEXT: [5,1] . . DeeeeeeeeER . . vpaddd (%r8), %ymm0, %ymm0
+# HASWELL-NEXT: [5,2] . . DeE-------R . . addq $32, %r8
+# HASWELL-NEXT: [5,3] . . DeE-------R . . cmpl %edi, %edx
+# HASWELL-NEXT: [6,0] . . DeE------R . . addl $1, %edx
+# HASWELL-NEXT: [6,1] . . DeeeeeeeeER . . vpaddd (%r8), %ymm0, %ymm0
+# HASWELL-NEXT: [6,2] . . DeE-------R . . addq $32, %r8
+# HASWELL-NEXT: [6,3] . . DeE------R . . cmpl %edi, %edx
+# HASWELL-NEXT: [7,0] . . DeE------R . . addl $1, %edx
+# HASWELL-NEXT: [7,1] . . DeeeeeeeeER. . vpaddd (%r8), %ymm0, %ymm0
+# HASWELL-NEXT: [7,2] . . DeE------R. . addq $32, %r8
+# HASWELL-NEXT: [7,3] . . DeE------R. . cmpl %edi, %edx
+# HASWELL-NEXT: [8,0] . . DeE------R. . addl $1, %edx
+# HASWELL-NEXT: [8,1] . . .DeeeeeeeeER. vpaddd (%r8), %ymm0, %ymm0
+# HASWELL-NEXT: [8,2] . . .DeE-------R. addq $32, %r8
+# HASWELL-NEXT: [8,3] . . .DeE-------R. cmpl %edi, %edx
+# HASWELL-NEXT: [9,0] . . . DeE------R. addl $1, %edx
+# HASWELL-NEXT: [9,1] . . . DeeeeeeeeER vpaddd (%r8), %ymm0, %ymm0
+# HASWELL-NEXT: [9,2] . . . DeE-------R addq $32, %r8
+# HASWELL-NEXT: [9,3] . . . DeE------R cmpl %edi, %edx
# SKYLAKE: [0,0] DeER . . . . . . . . . . . . . . . addl $1, %edx
# SKYLAKE-NEXT: [0,1] DeeeeeeeeER . . . . . . . . . . . . . vpaddd (%r8), %ymm0, %ymm0
# SKYLAKE-NEXT: [8,3] . . D=eE--------------------------------------------------------------R cmpl %edi, %edx
# SKYLAKE-NEXT: [9,0] . . D=eE--------------------------------------------------------------R addl $1, %edx
-# ZNVER1: [0,0] DeER . . . . . . . . .. addl $1, %edx
-# ZNVER1-NEXT: [0,1] DeeeeeeeeER . . . . . . .. vpaddd (%r8), %ymm0, %ymm0
-# ZNVER1-NEXT: [0,2] DeE-------R . . . . . . .. addq $32, %r8
-# ZNVER1-NEXT: [0,3] D=eE------R . . . . . . .. cmpl %edi, %edx
-# ZNVER1-NEXT: [1,0] .DeE------R . . . . . . .. addl $1, %edx
-# ZNVER1-NEXT: [1,1] .D===eeeeeeeeER. . . . . . .. vpaddd (%r8), %ymm0, %ymm0
-# ZNVER1-NEXT: [1,2] .DeE----------R. . . . . . .. addq $32, %r8
-# ZNVER1-NEXT: [1,3] .D=eE---------R. . . . . . .. cmpl %edi, %edx
-# ZNVER1-NEXT: [2,0] . DeE---------R. . . . . . .. addl $1, %edx
-# ZNVER1-NEXT: [2,1] . D======eeeeeeeeER . . . . . .. vpaddd (%r8), %ymm0, %ymm0
-# ZNVER1-NEXT: [2,2] . DeE-------------R . . . . . .. addq $32, %r8
-# ZNVER1-NEXT: [2,3] . D=eE------------R . . . . . .. cmpl %edi, %edx
-# ZNVER1-NEXT: [3,0] . DeE------------R . . . . . .. addl $1, %edx
-# ZNVER1-NEXT: [3,1] . D=========eeeeeeeeER . . . . .. vpaddd (%r8), %ymm0, %ymm0
-# ZNVER1-NEXT: [3,2] . DeE----------------R . . . . .. addq $32, %r8
-# ZNVER1-NEXT: [3,3] . D=eE---------------R . . . . .. cmpl %edi, %edx
-# ZNVER1-NEXT: [4,0] . DeE---------------R . . . . .. addl $1, %edx
-# ZNVER1-NEXT: [4,1] . D============eeeeeeeeER . . . .. vpaddd (%r8), %ymm0, %ymm0
-# ZNVER1-NEXT: [4,2] . DeE-------------------R . . . .. addq $32, %r8
-# ZNVER1-NEXT: [4,3] . D=eE------------------R . . . .. cmpl %edi, %edx
-# ZNVER1-NEXT: [5,0] . DeE------------------R . . . .. addl $1, %edx
-# ZNVER1-NEXT: [5,1] . D===============eeeeeeeeER . . .. vpaddd (%r8), %ymm0, %ymm0
-# ZNVER1-NEXT: [5,2] . DeE----------------------R . . .. addq $32, %r8
-# ZNVER1-NEXT: [5,3] . D=eE---------------------R . . .. cmpl %edi, %edx
-# ZNVER1-NEXT: [6,0] . .DeE---------------------R . . .. addl $1, %edx
-# ZNVER1-NEXT: [6,1] . .D==================eeeeeeeeER. . .. vpaddd (%r8), %ymm0, %ymm0
-# ZNVER1-NEXT: [6,2] . .DeE-------------------------R. . .. addq $32, %r8
-# ZNVER1-NEXT: [6,3] . .D=eE------------------------R. . .. cmpl %edi, %edx
-# ZNVER1-NEXT: [7,0] . . DeE------------------------R. . .. addl $1, %edx
-# ZNVER1-NEXT: [7,1] . . D=====================eeeeeeeeER . .. vpaddd (%r8), %ymm0, %ymm0
-# ZNVER1-NEXT: [7,2] . . DeE----------------------------R . .. addq $32, %r8
-# ZNVER1-NEXT: [7,3] . . D=eE---------------------------R . .. cmpl %edi, %edx
-# ZNVER1-NEXT: [8,0] . . DeE---------------------------R . .. addl $1, %edx
-# ZNVER1-NEXT: [8,1] . . D========================eeeeeeeeER .. vpaddd (%r8), %ymm0, %ymm0
-# ZNVER1-NEXT: [8,2] . . DeE-------------------------------R .. addq $32, %r8
-# ZNVER1-NEXT: [8,3] . . D=eE------------------------------R .. cmpl %edi, %edx
-# ZNVER1-NEXT: [9,0] . . DeE------------------------------R .. addl $1, %edx
-# ZNVER1-NEXT: [9,1] . . D===========================eeeeeeeeER vpaddd (%r8), %ymm0, %ymm0
-# ZNVER1-NEXT: [9,2] . . DeE----------------------------------R addq $32, %r8
-# ZNVER1-NEXT: [9,3] . . D=eE---------------------------------R cmpl %edi, %edx
+# ZNVER1: [0,0] DeER . . . . addl $1, %edx
+# ZNVER1-NEXT: [0,1] DeeeeeeeeER . . vpaddd (%r8), %ymm0, %ymm0
+# ZNVER1-NEXT: [0,2] DeE-------R . . addq $32, %r8
+# ZNVER1-NEXT: [0,3] D=eE------R . . cmpl %edi, %edx
+# ZNVER1-NEXT: [1,0] .DeE------R . . addl $1, %edx
+# ZNVER1-NEXT: [1,1] .DeeeeeeeeER . . vpaddd (%r8), %ymm0, %ymm0
+# ZNVER1-NEXT: [1,2] .DeE-------R . . addq $32, %r8
+# ZNVER1-NEXT: [1,3] .D=eE------R . . cmpl %edi, %edx
+# ZNVER1-NEXT: [2,0] . DeE------R . . addl $1, %edx
+# ZNVER1-NEXT: [2,1] . DeeeeeeeeER . . vpaddd (%r8), %ymm0, %ymm0
+# ZNVER1-NEXT: [2,2] . DeE-------R . . addq $32, %r8
+# ZNVER1-NEXT: [2,3] . D=eE------R . . cmpl %edi, %edx
+# ZNVER1-NEXT: [3,0] . DeE------R . . addl $1, %edx
+# ZNVER1-NEXT: [3,1] . DeeeeeeeeER . . vpaddd (%r8), %ymm0, %ymm0
+# ZNVER1-NEXT: [3,2] . DeE-------R . . addq $32, %r8
+# ZNVER1-NEXT: [3,3] . D=eE------R . . cmpl %edi, %edx
+# ZNVER1-NEXT: [4,0] . DeE------R . . addl $1, %edx
+# ZNVER1-NEXT: [4,1] . DeeeeeeeeER. . vpaddd (%r8), %ymm0, %ymm0
+# ZNVER1-NEXT: [4,2] . DeE-------R. . addq $32, %r8
+# ZNVER1-NEXT: [4,3] . D=eE------R. . cmpl %edi, %edx
+# ZNVER1-NEXT: [5,0] . DeE------R. . addl $1, %edx
+# ZNVER1-NEXT: [5,1] . DeeeeeeeeER . vpaddd (%r8), %ymm0, %ymm0
+# ZNVER1-NEXT: [5,2] . DeE-------R . addq $32, %r8
+# ZNVER1-NEXT: [5,3] . D=eE------R . cmpl %edi, %edx
+# ZNVER1-NEXT: [6,0] . .DeE------R . addl $1, %edx
+# ZNVER1-NEXT: [6,1] . .DeeeeeeeeER . vpaddd (%r8), %ymm0, %ymm0
+# ZNVER1-NEXT: [6,2] . .DeE-------R . addq $32, %r8
+# ZNVER1-NEXT: [6,3] . .D=eE------R . cmpl %edi, %edx
+# ZNVER1-NEXT: [7,0] . . DeE------R . addl $1, %edx
+# ZNVER1-NEXT: [7,1] . . DeeeeeeeeER . vpaddd (%r8), %ymm0, %ymm0
+# ZNVER1-NEXT: [7,2] . . DeE-------R . addq $32, %r8
+# ZNVER1-NEXT: [7,3] . . D=eE------R . cmpl %edi, %edx
+# ZNVER1-NEXT: [8,0] . . DeE------R . addl $1, %edx
+# ZNVER1-NEXT: [8,1] . . DeeeeeeeeER. vpaddd (%r8), %ymm0, %ymm0
+# ZNVER1-NEXT: [8,2] . . DeE-------R. addq $32, %r8
+# ZNVER1-NEXT: [8,3] . . D=eE------R. cmpl %edi, %edx
+# ZNVER1-NEXT: [9,0] . . DeE------R. addl $1, %edx
+# ZNVER1-NEXT: [9,1] . . DeeeeeeeeER vpaddd (%r8), %ymm0, %ymm0
+# ZNVER1-NEXT: [9,2] . . DeE-------R addq $32, %r8
+# ZNVER1-NEXT: [9,3] . . D=eE------R cmpl %edi, %edx
# ALL: Average Wait times (based on the timeline view):
# ALL-NEXT: [0]: Executions
# ALL: [0] [1] [2] [3]
-# BDWELL-NEXT: 0. 10 1.0 0.4 6.9 addl $1, %edx
-# BDWELL-NEXT: 1. 10 4.0 0.1 0.0 vpaddd (%r8), %ymm0, %ymm0
-# BDWELL-NEXT: 2. 10 1.0 0.4 8.7 addq $32, %r8
-# BDWELL-NEXT: 3. 10 1.0 0.0 8.3 cmpl %edi, %edx
+# BDWELL-NEXT: 0. 10 1.0 0.4 4.5 addl $1, %edx
+# BDWELL-NEXT: 1. 10 1.0 0.1 0.0 vpaddd (%r8), %ymm0, %ymm0
+# BDWELL-NEXT: 2. 10 1.0 0.4 5.7 addq $32, %r8
+# BDWELL-NEXT: 3. 10 1.0 0.0 5.3 cmpl %edi, %edx
-# HASWELL-NEXT: 0. 10 1.0 0.4 11.4 addl $1, %edx
-# HASWELL-NEXT: 1. 10 8.5 0.1 0.0 vpaddd (%r8), %ymm0, %ymm0
-# HASWELL-NEXT: 2. 10 1.0 0.4 14.2 addq $32, %r8
-# HASWELL-NEXT: 3. 10 1.0 0.0 13.8 cmpl %edi, %edx
+# HASWELL-NEXT: 0. 10 1.0 0.4 5.4 addl $1, %edx
+# HASWELL-NEXT: 1. 10 1.0 0.1 0.0 vpaddd (%r8), %ymm0, %ymm0
+# HASWELL-NEXT: 2. 10 1.0 0.4 6.7 addq $32, %r8
+# HASWELL-NEXT: 3. 10 1.0 0.0 6.3 cmpl %edi, %edx
# SKYLAKE-NEXT: 0. 10 1.9 0.1 30.6 addl $1, %edx
# SKYLAKE-NEXT: 1. 10 32.5 0.1 0.0 vpaddd (%r8), %ymm0, %ymm0
# SKYLAKE-NEXT: 2. 10 1.0 0.1 38.5 addq $32, %r8
# SKYLAKE-NEXT: 3. 10 2.0 0.0 37.5 cmpl %edi, %edx
-# ZNVER1-NEXT: 0. 10 1.0 0.1 16.2 addl $1, %edx
-# ZNVER1-NEXT: 1. 10 14.5 0.1 0.0 vpaddd (%r8), %ymm0, %ymm0
-# ZNVER1-NEXT: 2. 10 1.0 0.1 20.5 addq $32, %r8
-# ZNVER1-NEXT: 3. 10 2.0 0.0 19.5 cmpl %edi, %edx
+# ZNVER1-NEXT: 0. 10 1.0 0.1 5.4 addl $1, %edx
+# ZNVER1-NEXT: 1. 10 1.0 0.1 0.0 vpaddd (%r8), %ymm0, %ymm0
+# ZNVER1-NEXT: 2. 10 1.0 0.1 7.0 addq $32, %r8
+# ZNVER1-NEXT: 3. 10 2.0 0.0 6.0 cmpl %edi, %edx