}
defm VMOVNTDQ : avx512_movnt_vl<0xE7, "vmovntdq", avx512vl_i64_info,
- SchedWriteVecMoveLS>, PD;
+ SchedWriteVecMoveLSNT>, PD;
defm VMOVNTPD : avx512_movnt_vl<0x2B, "vmovntpd", avx512vl_f64_info,
- SchedWriteFMoveLS>, PD, VEX_W;
+ SchedWriteFMoveLSNT>, PD, VEX_W;
defm VMOVNTPS : avx512_movnt_vl<0x2B, "vmovntps", avx512vl_f32_info,
- SchedWriteFMoveLS>, PS;
+ SchedWriteFMoveLSNT>, PS;
let Predicates = [HasAVX512], AddedComplexity = 400 in {
def : Pat<(alignednontemporalstore (v16i32 VR512:$src), addr:$dst),
def MMX_MOVNTQmr : MMXI<0xE7, MRMDestMem, (outs), (ins i64mem:$dst, VR64:$src),
"movntq\t{$src, $dst|$dst, $src}",
[(int_x86_mmx_movnt_dq addr:$dst, VR64:$src)]>,
- Sched<[SchedWriteVecMoveLS.MMX.MR]>;
+ Sched<[SchedWriteVecMoveLSNT.MMX.MR]>;
let Predicates = [HasMMX] in {
let AddedComplexity = 15 in
let AddedComplexity = 400 in { // Prefer non-temporal versions
let Predicates = [HasAVX, NoVLX] in {
-let SchedRW = [SchedWriteFMoveLS.XMM.MR] in {
+let SchedRW = [SchedWriteFMoveLSNT.XMM.MR] in {
def VMOVNTPSmr : VPSI<0x2B, MRMDestMem, (outs),
(ins f128mem:$dst, VR128:$src),
"movntps\t{$src, $dst|$dst, $src}",
addr:$dst)]>, VEX, VEX_WIG;
} // SchedRW
-let SchedRW = [SchedWriteFMoveLS.YMM.MR] in {
+let SchedRW = [SchedWriteFMoveLSNT.YMM.MR] in {
def VMOVNTPSYmr : VPSI<0x2B, MRMDestMem, (outs),
(ins f256mem:$dst, VR256:$src),
"movntps\t{$src, $dst|$dst, $src}",
"movntdq\t{$src, $dst|$dst, $src}",
[(alignednontemporalstore (v2i64 VR128:$src),
addr:$dst)]>, VEX, VEX_WIG,
- Sched<[SchedWriteVecMoveLS.XMM.MR]>;
+ Sched<[SchedWriteVecMoveLSNT.XMM.MR]>;
def VMOVNTDQYmr : VPDI<0xE7, MRMDestMem, (outs),
(ins i256mem:$dst, VR256:$src),
"movntdq\t{$src, $dst|$dst, $src}",
[(alignednontemporalstore (v4i64 VR256:$src),
addr:$dst)]>, VEX, VEX_L, VEX_WIG,
- Sched<[SchedWriteVecMoveLS.YMM.MR]>;
+ Sched<[SchedWriteVecMoveLSNT.YMM.MR]>;
} // ExeDomain
} // Predicates
-let SchedRW = [SchedWriteFMoveLS.XMM.MR] in {
+let SchedRW = [SchedWriteFMoveLSNT.XMM.MR] in {
def MOVNTPSmr : PSI<0x2B, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
"movntps\t{$src, $dst|$dst, $src}",
[(alignednontemporalstore (v4f32 VR128:$src), addr:$dst)]>;
[(alignednontemporalstore(v2f64 VR128:$src), addr:$dst)]>;
} // SchedRW
-let ExeDomain = SSEPackedInt, SchedRW = [SchedWriteVecMoveLS.XMM.MR] in
+let ExeDomain = SSEPackedInt, SchedRW = [SchedWriteVecMoveLSNT.XMM.MR] in
def MOVNTDQmr : PDI<0xE7, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
"movntdq\t{$src, $dst|$dst, $src}",
[(alignednontemporalstore (v2i64 VR128:$src), addr:$dst)]>;
-let SchedRW = [WriteStore] in {
+let SchedRW = [WriteStoreNT] in {
// There is no AVX form for instructions below this point
def MOVNTImr : I<0xC3, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src),
"movnti{l}\t{$src, $dst|$dst, $src}",
let Predicates = [HasAVX, NoVLX] in
def VMOVNTDQArm : SS48I<0x2A, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
"vmovntdqa\t{$src, $dst|$dst, $src}", []>,
- Sched<[SchedWriteVecMoveLS.XMM.RM]>, VEX, VEX_WIG;
+ Sched<[SchedWriteVecMoveLSNT.XMM.RM]>, VEX, VEX_WIG;
let Predicates = [HasAVX2, NoVLX] in
def VMOVNTDQAYrm : SS48I<0x2A, MRMSrcMem, (outs VR256:$dst), (ins i256mem:$src),
"vmovntdqa\t{$src, $dst|$dst, $src}", []>,
- Sched<[SchedWriteVecMoveLS.YMM.RM]>, VEX, VEX_L, VEX_WIG;
+ Sched<[SchedWriteVecMoveLSNT.YMM.RM]>, VEX, VEX_L, VEX_WIG;
def MOVNTDQArm : SS48I<0x2A, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
"movntdqa\t{$src, $dst|$dst, $src}", []>,
- Sched<[SchedWriteVecMoveLS.XMM.RM]>;
+ Sched<[SchedWriteVecMoveLSNT.XMM.RM]>;
let Predicates = [HasAVX2, NoVLX] in {
def : Pat<(v8f32 (alignednontemporalload addr:$src)),
// Non-temporal (unaligned) scalar stores.
let AddedComplexity = 400 in { // Prefer non-temporal versions
-let hasSideEffects = 0, mayStore = 1, SchedRW = [WriteStore] in {
+let hasSideEffects = 0, mayStore = 1, SchedRW = [SchedWriteFMoveLSNT.Scl.MR] in {
def MOVNTSS : I<0x2B, MRMDestMem, (outs), (ins f32mem:$dst, VR128:$src),
"movntss\t{$src, $dst|$dst, $src}", []>, XS;
defm : BWWriteResPair<WriteBZHI, [BWPort15], 1>;
// Loads, stores, and moves, not folded with other operations.
-def : WriteRes<WriteLoad, [BWPort23]> { let Latency = 5; }
-def : WriteRes<WriteStore, [BWPort237, BWPort4]>;
-def : WriteRes<WriteMove, [BWPort0156]>;
+defm : X86WriteRes<WriteLoad, [BWPort23], 5, [1], 1>;
+defm : X86WriteRes<WriteStore, [BWPort237, BWPort4], 1, [1,1], 1>;
+defm : X86WriteRes<WriteStoreNT, [BWPort237, BWPort4], 1, [1,1], 2>;
+defm : X86WriteRes<WriteMove, [BWPort0156], 1, [1,1], 1>;
// Idioms that clear a register, like xorps %xmm0, %xmm0.
// These can often bypass execution ports completely.
defm : X86WriteRes<WriteFStore, [BWPort237,BWPort4], 1, [1,1], 2>;
defm : X86WriteRes<WriteFStoreX, [BWPort237,BWPort4], 1, [1,1], 2>;
defm : X86WriteRes<WriteFStoreY, [BWPort237,BWPort4], 1, [1,1], 2>;
+defm : X86WriteRes<WriteFStoreNT, [BWPort237,BWPort4], 1, [1,1], 2>;
+defm : X86WriteRes<WriteFStoreNTX, [BWPort237,BWPort4], 1, [1,1], 2>;
+defm : X86WriteRes<WriteFStoreNTY, [BWPort237,BWPort4], 1, [1,1], 2>;
defm : X86WriteRes<WriteFMaskedStore, [BWPort0,BWPort4,BWPort237,BWPort15], 5, [1,1,1,1], 4>;
defm : X86WriteRes<WriteFMaskedStoreY, [BWPort0,BWPort4,BWPort237,BWPort15], 5, [1,1,1,1], 4>;
defm : X86WriteRes<WriteFMove, [BWPort5], 1, [1], 1>;
defm : X86WriteRes<WriteVecLoad, [BWPort23], 5, [1], 1>;
defm : X86WriteRes<WriteVecLoadX, [BWPort23], 5, [1], 1>;
defm : X86WriteRes<WriteVecLoadY, [BWPort23], 6, [1], 1>;
+defm : X86WriteRes<WriteVecLoadNT, [BWPort23], 5, [1], 1>;
+defm : X86WriteRes<WriteVecLoadNTY, [BWPort23], 6, [1], 1>;
defm : X86WriteRes<WriteVecMaskedLoad, [BWPort23,BWPort5], 7, [1,2], 3>;
defm : X86WriteRes<WriteVecMaskedLoadY, [BWPort23,BWPort5], 8, [1,2], 3>;
defm : X86WriteRes<WriteVecStore, [BWPort237,BWPort4], 1, [1,1], 2>;
defm : X86WriteRes<WriteVecStoreX, [BWPort237,BWPort4], 1, [1,1], 2>;
defm : X86WriteRes<WriteVecStoreY, [BWPort237,BWPort4], 1, [1,1], 2>;
+defm : X86WriteRes<WriteVecStoreNT, [BWPort237,BWPort4], 1, [1,1], 2>;
+defm : X86WriteRes<WriteVecStoreNTY, [BWPort237,BWPort4], 1, [1,1], 2>;
defm : X86WriteRes<WriteVecMaskedStore, [BWPort0,BWPort4,BWPort237,BWPort15], 5, [1,1,1,1], 4>;
defm : X86WriteRes<WriteVecMaskedStoreY, [BWPort0,BWPort4,BWPort237,BWPort15], 5, [1,1,1,1], 4>;
defm : X86WriteRes<WriteVecMove, [BWPort015], 1, [1], 1>;
}
def: InstRW<[BWWriteResGroup10], (instregex "FBSTPm",
"MMX_MOVD64mr",
- "MOVNTI_64mr",
- "MOVNTImr",
"ST_FP(32|64|80)m",
"(V?)MOV(H|L)(PD|PS)mr",
"(V?)MOVPDI2DImr",
// Store_addr on 237.
// Store_data on 4.
-def : WriteRes<WriteStore, [HWPort237, HWPort4]>;
-def : WriteRes<WriteLoad, [HWPort23]> { let Latency = 5; }
-def : WriteRes<WriteMove, [HWPort0156]>;
-def : WriteRes<WriteZero, []>;
+defm : X86WriteRes<WriteStore, [HWPort237, HWPort4], 1, [1,1], 1>;
+defm : X86WriteRes<WriteStoreNT, [HWPort237, HWPort4], 1, [1,1], 2>;
+defm : X86WriteRes<WriteLoad, [HWPort23], 5, [1], 1>;
+defm : X86WriteRes<WriteMove, [HWPort0156], 1, [1], 1>;
+def : WriteRes<WriteZero, []>;
defm : HWWriteResPair<WriteALU, [HWPort0156], 1>;
defm : HWWriteResPair<WriteIMul, [HWPort1], 3>;
defm : X86WriteRes<WriteFStore, [HWPort237,HWPort4], 1, [1,1], 2>;
defm : X86WriteRes<WriteFStoreX, [HWPort237,HWPort4], 1, [1,1], 2>;
defm : X86WriteRes<WriteFStoreY, [HWPort237,HWPort4], 1, [1,1], 2>;
+defm : X86WriteRes<WriteFStoreNT, [HWPort237,HWPort4], 1, [1,1], 2>;
+defm : X86WriteRes<WriteFStoreNTX, [HWPort237,HWPort4], 1, [1,1], 2>;
+defm : X86WriteRes<WriteFStoreNTY, [HWPort237,HWPort4], 1, [1,1], 2>;
defm : X86WriteRes<WriteFMaskedStore, [HWPort0,HWPort4,HWPort237,HWPort15], 5, [1,1,1,1], 4>;
defm : X86WriteRes<WriteFMaskedStoreY, [HWPort0,HWPort4,HWPort237,HWPort15], 5, [1,1,1,1], 4>;
defm : X86WriteRes<WriteFMove, [HWPort5], 1, [1], 1>;
defm : X86WriteRes<WriteVecLoad, [HWPort23], 5, [1], 1>;
defm : X86WriteRes<WriteVecLoadX, [HWPort23], 6, [1], 1>;
defm : X86WriteRes<WriteVecLoadY, [HWPort23], 7, [1], 1>;
+defm : X86WriteRes<WriteVecLoadNT, [HWPort23], 6, [1], 1>;
+defm : X86WriteRes<WriteVecLoadNTY, [HWPort23], 7, [1], 1>;
defm : X86WriteRes<WriteVecMaskedLoad, [HWPort23,HWPort5], 8, [1,2], 3>;
defm : X86WriteRes<WriteVecMaskedLoadY, [HWPort23,HWPort5], 9, [1,2], 3>;
defm : X86WriteRes<WriteVecStore, [HWPort237,HWPort4], 1, [1,1], 2>;
defm : X86WriteRes<WriteVecStoreX, [HWPort237,HWPort4], 1, [1,1], 2>;
defm : X86WriteRes<WriteVecStoreY, [HWPort237,HWPort4], 1, [1,1], 2>;
+defm : X86WriteRes<WriteVecStoreNT, [HWPort237,HWPort4], 1, [1,1], 2>;
+defm : X86WriteRes<WriteVecStoreNTY, [HWPort237,HWPort4], 1, [1,1], 2>;
defm : X86WriteRes<WriteVecMaskedStore, [HWPort0,HWPort4,HWPort237,HWPort15], 5, [1,1,1,1], 4>;
defm : X86WriteRes<WriteVecMaskedStoreY, [HWPort0,HWPort4,HWPort237,HWPort15], 5, [1,1,1,1], 4>;
defm : X86WriteRes<WriteVecMove, [HWPort015], 1, [1], 1>;
}
def: InstRW<[HWWriteResGroup1], (instregex "FBSTPm",
"MMX_MOVD64mr",
- "MOVNTI_64mr",
- "MOVNTImr",
"ST_FP(32|64|80)m",
"(V?)MOV(H|L)(PD|PS)mr",
"(V?)MOVPDI2DImr",
// 2/3 cycle to recompute the address.
def : WriteRes<WriteRMW, [SBPort23,SBPort4]>;
-def : WriteRes<WriteStore, [SBPort23, SBPort4]>;
-def : WriteRes<WriteLoad, [SBPort23]> { let Latency = 5; }
-def : WriteRes<WriteMove, [SBPort015]>;
-def : WriteRes<WriteZero, []>;
+def : WriteRes<WriteStore, [SBPort23, SBPort4]>;
+def : WriteRes<WriteStoreNT, [SBPort23, SBPort4]>;
+def : WriteRes<WriteLoad, [SBPort23]> { let Latency = 5; }
+def : WriteRes<WriteMove, [SBPort015]>;
+def : WriteRes<WriteZero, []>;
defm : SBWriteResPair<WriteALU, [SBPort015], 1>;
defm : SBWriteResPair<WriteIMul, [SBPort1], 3>;
defm : X86WriteRes<WriteFStore, [SBPort23,SBPort4], 1, [1,1], 1>;
defm : X86WriteRes<WriteFStoreX, [SBPort23,SBPort4], 1, [1,1], 1>;
defm : X86WriteRes<WriteFStoreY, [SBPort23,SBPort4], 1, [1,1], 1>;
+defm : X86WriteRes<WriteFStoreNT, [SBPort23,SBPort4], 1, [1,1], 1>;
+defm : X86WriteRes<WriteFStoreNTX, [SBPort23,SBPort4], 1, [1,1], 1>;
+defm : X86WriteRes<WriteFStoreNTY, [SBPort23,SBPort4], 1, [1,1], 1>;
defm : X86WriteRes<WriteFMaskedStore, [SBPort4,SBPort01,SBPort23], 5, [1,1,1], 3>;
defm : X86WriteRes<WriteFMaskedStoreY, [SBPort4,SBPort01,SBPort23], 5, [1,1,1], 3>;
defm : X86WriteRes<WriteFMove, [SBPort5], 1, [1], 1>;
defm : X86WriteRes<WriteVecLoad, [SBPort23], 5, [1], 1>;
defm : X86WriteRes<WriteVecLoadX, [SBPort23], 6, [1], 1>;
defm : X86WriteRes<WriteVecLoadY, [SBPort23], 7, [1], 1>;
+defm : X86WriteRes<WriteVecLoadNT, [SBPort23], 6, [1], 1>;
+defm : X86WriteRes<WriteVecLoadNTY, [SBPort23], 7, [1], 1>;
defm : X86WriteRes<WriteVecMaskedLoad, [SBPort23,SBPort05], 8, [1,2], 3>;
defm : X86WriteRes<WriteVecMaskedLoadY, [SBPort23,SBPort05], 9, [1,2], 3>;
defm : X86WriteRes<WriteVecStore, [SBPort23,SBPort4], 1, [1,1], 1>;
defm : X86WriteRes<WriteVecStoreX, [SBPort23,SBPort4], 1, [1,1], 1>;
defm : X86WriteRes<WriteVecStoreY, [SBPort23,SBPort4], 1, [1,1], 1>;
+defm : X86WriteRes<WriteVecStoreNT, [SBPort23,SBPort4], 1, [1,1], 1>;
+defm : X86WriteRes<WriteVecStoreNTY, [SBPort23,SBPort4], 1, [1,1], 1>;
defm : X86WriteRes<WriteVecMaskedStore, [SBPort4,SBPort01,SBPort23], 5, [1,1,1], 3>;
defm : X86WriteRes<WriteVecMaskedStoreY, [SBPort4,SBPort01,SBPort23], 5, [1,1,1], 3>;
defm : X86WriteRes<WriteVecMove, [SBPort05], 1, [1], 1>;
defm : SKLWriteResPair<WriteBZHI, [SKLPort15], 1>;
// Loads, stores, and moves, not folded with other operations.
-def : WriteRes<WriteLoad, [SKLPort23]> { let Latency = 5; }
-def : WriteRes<WriteStore, [SKLPort237, SKLPort4]>;
-def : WriteRes<WriteMove, [SKLPort0156]>;
+defm : X86WriteRes<WriteLoad, [SKLPort23], 5, [1], 1>;
+defm : X86WriteRes<WriteStore, [SKLPort237, SKLPort4], 1, [1,1], 1>;
+defm : X86WriteRes<WriteStoreNT, [SKLPort237, SKLPort4], 1, [1,1], 2>;
+defm : X86WriteRes<WriteMove, [SKLPort0156], 1, [1], 1>;
// Idioms that clear a register, like xorps %xmm0, %xmm0.
// These can often bypass execution ports completely.
defm : X86WriteRes<WriteFStore, [SKLPort237,SKLPort4], 1, [1,1], 2>;
defm : X86WriteRes<WriteFStoreX, [SKLPort237,SKLPort4], 1, [1,1], 2>;
defm : X86WriteRes<WriteFStoreY, [SKLPort237,SKLPort4], 1, [1,1], 2>;
+defm : X86WriteRes<WriteFStoreNT, [SKLPort237,SKLPort4], 1, [1,1], 2>;
+defm : X86WriteRes<WriteFStoreNTX, [SKLPort237,SKLPort4], 1, [1,1], 2>;
+defm : X86WriteRes<WriteFStoreNTY, [SKLPort237,SKLPort4], 1, [1,1], 2>;
defm : X86WriteRes<WriteFMaskedStore, [SKLPort237,SKLPort0], 2, [1,1], 2>;
defm : X86WriteRes<WriteFMaskedStoreY, [SKLPort237,SKLPort0], 2, [1,1], 2>;
defm : X86WriteRes<WriteFMove, [SKLPort015], 1, [1], 1>;
defm : X86WriteRes<WriteVecLoad, [SKLPort23], 5, [1], 1>;
defm : X86WriteRes<WriteVecLoadX, [SKLPort23], 6, [1], 1>;
defm : X86WriteRes<WriteVecLoadY, [SKLPort23], 7, [1], 1>;
+defm : X86WriteRes<WriteVecLoadNT, [SKLPort23], 6, [1], 1>;
+defm : X86WriteRes<WriteVecLoadNTY, [SKLPort23], 7, [1], 1>;
defm : X86WriteRes<WriteVecMaskedLoad, [SKLPort23,SKLPort015], 7, [1,1], 2>;
defm : X86WriteRes<WriteVecMaskedLoadY, [SKLPort23,SKLPort015], 8, [1,1], 2>;
defm : X86WriteRes<WriteVecStore, [SKLPort237,SKLPort4], 1, [1,1], 2>;
defm : X86WriteRes<WriteVecStoreX, [SKLPort237,SKLPort4], 1, [1,1], 2>;
defm : X86WriteRes<WriteVecStoreY, [SKLPort237,SKLPort4], 1, [1,1], 2>;
+defm : X86WriteRes<WriteVecStoreNT, [SKLPort237,SKLPort4], 1, [1,1], 2>;
+defm : X86WriteRes<WriteVecStoreNTY, [SKLPort237,SKLPort4], 1, [1,1], 2>;
defm : X86WriteRes<WriteVecMaskedStore, [SKLPort237,SKLPort0], 2, [1,1], 2>;
defm : X86WriteRes<WriteVecMaskedStoreY, [SKLPort237,SKLPort0], 2, [1,1], 2>;
defm : X86WriteRes<WriteVecMove, [SKLPort015], 1, [1], 1>;
}
def: InstRW<[SKLWriteResGroup11], (instregex "FBSTPm",
"MMX_MOVD64mr",
- "MOVNTI_64mr",
- "MOVNTImr",
"ST_FP(32|64|80)m",
"(V?)MOV(H|L)(PD|PS)mr",
"(V?)MOVPDI2DImr",
defm : SKXWriteResPair<WriteBZHI, [SKXPort15], 1>;
// Loads, stores, and moves, not folded with other operations.
-def : WriteRes<WriteLoad, [SKXPort23]> { let Latency = 5; }
-def : WriteRes<WriteStore, [SKXPort237, SKXPort4]>;
-def : WriteRes<WriteMove, [SKXPort0156]>;
+defm : X86WriteRes<WriteLoad, [SKXPort23], 5, [1], 1>;
+defm : X86WriteRes<WriteStore, [SKXPort237, SKXPort4], 1, [1,1], 1>;
+defm : X86WriteRes<WriteStoreNT, [SKXPort237, SKXPort4], 1, [1,1], 2>;
+defm : X86WriteRes<WriteMove, [SKXPort0156], 1, [1], 1>;
// Idioms that clear a register, like xorps %xmm0, %xmm0.
// These can often bypass execution ports completely.
defm : X86WriteRes<WriteFStore, [SKXPort237,SKXPort4], 1, [1,1], 2>;
defm : X86WriteRes<WriteFStoreX, [SKXPort237,SKXPort4], 1, [1,1], 2>;
defm : X86WriteRes<WriteFStoreY, [SKXPort237,SKXPort4], 1, [1,1], 2>;
+defm : X86WriteRes<WriteFStoreNT, [SKXPort237,SKXPort4], 1, [1,1], 2>;
+defm : X86WriteRes<WriteFStoreNTX, [SKXPort237,SKXPort4], 1, [1,1], 2>;
+defm : X86WriteRes<WriteFStoreNTY, [SKXPort237,SKXPort4], 1, [1,1], 2>;
defm : X86WriteRes<WriteFMaskedStore, [SKXPort237,SKXPort0], 2, [1,1], 2>;
defm : X86WriteRes<WriteFMaskedStoreY, [SKXPort237,SKXPort0], 2, [1,1], 2>;
defm : X86WriteRes<WriteFMove, [SKXPort015], 1, [1], 1>;
defm : X86WriteRes<WriteVecLoad, [SKXPort23], 5, [1], 1>;
defm : X86WriteRes<WriteVecLoadX, [SKXPort23], 6, [1], 1>;
defm : X86WriteRes<WriteVecLoadY, [SKXPort23], 7, [1], 1>;
+defm : X86WriteRes<WriteVecLoadNT, [SKXPort23], 6, [1], 1>;
+defm : X86WriteRes<WriteVecLoadNTY, [SKXPort23], 7, [1], 1>;
defm : X86WriteRes<WriteVecMaskedLoad, [SKXPort23,SKXPort015], 7, [1,1], 2>;
defm : X86WriteRes<WriteVecMaskedLoadY, [SKXPort23,SKXPort015], 8, [1,1], 2>;
defm : X86WriteRes<WriteVecStore, [SKXPort237,SKXPort4], 1, [1,1], 2>;
defm : X86WriteRes<WriteVecStoreX, [SKXPort237,SKXPort4], 1, [1,1], 2>;
defm : X86WriteRes<WriteVecStoreY, [SKXPort237,SKXPort4], 1, [1,1], 2>;
+defm : X86WriteRes<WriteVecStoreNT, [SKXPort237,SKXPort4], 1, [1,1], 2>;
+defm : X86WriteRes<WriteVecStoreNTY, [SKXPort237,SKXPort4], 1, [1,1], 2>;
defm : X86WriteRes<WriteVecMaskedStore, [SKXPort237,SKXPort0], 2, [1,1], 2>;
defm : X86WriteRes<WriteVecMaskedStoreY, [SKXPort237,SKXPort0], 2, [1,1], 2>;
defm : X86WriteRes<WriteVecMove, [SKXPort015], 1, [1], 1>;
def: InstRW<[SKXWriteResGroup11], (instregex "FBSTPm",
"KMOV(B|D|Q|W)mk",
"MMX_MOVD64mr",
- "MOVNTI_64mr",
- "MOVNTImr",
"ST_FP(32|64|80)m",
"VMOV(H|L)(PD|PS)Z128mr(b?)",
"(V?)MOV(H|L)(PD|PS)mr",
}
// Loads, stores, and moves, not folded with other operations.
-def WriteLoad : SchedWrite;
-def WriteStore : SchedWrite;
-def WriteMove : SchedWrite;
+def WriteLoad : SchedWrite;
+def WriteStore : SchedWrite;
+def WriteStoreNT : SchedWrite;
+def WriteMove : SchedWrite;
// Arithmetic.
defm WriteALU : X86SchedWritePair; // Simple integer ALU op.
def WriteFStore : SchedWrite;
def WriteFStoreX : SchedWrite;
def WriteFStoreY : SchedWrite;
+def WriteFStoreNT : SchedWrite;
+def WriteFStoreNTX : SchedWrite;
+def WriteFStoreNTY : SchedWrite;
def WriteFMaskedStore : SchedWrite;
def WriteFMaskedStoreY : SchedWrite;
def WriteFMove : SchedWrite;
def WriteVecLoad : SchedWrite;
def WriteVecLoadX : SchedWrite;
def WriteVecLoadY : SchedWrite;
+def WriteVecLoadNT : SchedWrite;
+def WriteVecLoadNTY : SchedWrite;
def WriteVecMaskedLoad : SchedWrite;
def WriteVecMaskedLoadY : SchedWrite;
def WriteVecStore : SchedWrite;
def WriteVecStoreX : SchedWrite;
def WriteVecStoreY : SchedWrite;
+def WriteVecStoreNT : SchedWrite;
+def WriteVecStoreNTY : SchedWrite;
def WriteVecMaskedStore : SchedWrite;
def WriteVecMaskedStoreY : SchedWrite;
def WriteVecMove : SchedWrite;
: X86SchedWriteMoveLSWidths<WriteFMoveLS, WriteFMoveLSX,
WriteFMoveLSY, WriteFMoveLSY>;
+def WriteFMoveLSNT
+ : X86SchedWriteMoveLS<WriteFMove, WriteFLoad, WriteFStoreNT>;
+def WriteFMoveLSNTX
+ : X86SchedWriteMoveLS<WriteFMove, WriteFLoad, WriteFStoreNTX>;
+def WriteFMoveLSNTY
+ : X86SchedWriteMoveLS<WriteFMoveY, WriteFLoadY, WriteFStoreNTY>;
+def SchedWriteFMoveLSNT
+ : X86SchedWriteMoveLSWidths<WriteFMoveLSNT, WriteFMoveLSNTX,
+ WriteFMoveLSNTY, WriteFMoveLSNTY>;
+
def WriteVecMoveLS
: X86SchedWriteMoveLS<WriteVecMove, WriteVecLoad, WriteVecStore>;
def WriteVecMoveLSX
: X86SchedWriteMoveLSWidths<WriteVecMoveLS, WriteVecMoveLSX,
WriteVecMoveLSY, WriteVecMoveLSY>;
+def WriteVecMoveLSNT
+ : X86SchedWriteMoveLS<WriteVecMove, WriteVecLoadNT, WriteVecStoreNT>;
+def WriteVecMoveLSNTX
+ : X86SchedWriteMoveLS<WriteVecMoveX, WriteVecLoadNT, WriteVecStoreNT>;
+def WriteVecMoveLSNTY
+ : X86SchedWriteMoveLS<WriteVecMoveY, WriteVecLoadNTY, WriteVecStoreNTY>;
+def SchedWriteVecMoveLSNT
+ : X86SchedWriteMoveLSWidths<WriteVecMoveLSNT, WriteVecMoveLSNTX,
+ WriteVecMoveLSNTY, WriteVecMoveLSNTY>;
+
// Vector width wrappers.
def SchedWriteFAdd
: X86SchedWriteWidths<WriteFAdd, WriteFAddX, WriteFAddY, WriteFAddY>;
// Loads, stores, and moves, not folded with other operations.
////////////////////////////////////////////////////////////////////////////////
-def : WriteRes<WriteLoad, [AtomPort0]>;
-def : WriteRes<WriteStore, [AtomPort0]>;
-def : WriteRes<WriteMove, [AtomPort01]>;
+def : WriteRes<WriteLoad, [AtomPort0]>;
+def : WriteRes<WriteStore, [AtomPort0]>;
+def : WriteRes<WriteStoreNT, [AtomPort0]>;
+def : WriteRes<WriteMove, [AtomPort01]>;
// Treat misc copies as a move.
def : InstRW<[WriteMove], (instrs COPY)>;
def : WriteRes<WriteFStore, [AtomPort0]>;
def : WriteRes<WriteFStoreX, [AtomPort0]>;
def : WriteRes<WriteFStoreY, [AtomPort0]>;
+def : WriteRes<WriteFStoreNT, [AtomPort0]>;
+def : WriteRes<WriteFStoreNTX, [AtomPort0]>;
+def : WriteRes<WriteFStoreNTY, [AtomPort0]>;
def : WriteRes<WriteFMaskedStore, [AtomPort0]>;
def : WriteRes<WriteFMaskedStoreY, [AtomPort0]>;
def : WriteRes<WriteVecLoad, [AtomPort0]>;
def : WriteRes<WriteVecLoadX, [AtomPort0]>;
def : WriteRes<WriteVecLoadY, [AtomPort0]>;
+def : WriteRes<WriteVecLoadNT, [AtomPort0]>;
+def : WriteRes<WriteVecLoadNTY, [AtomPort0]>;
def : WriteRes<WriteVecMaskedLoad, [AtomPort0]>;
def : WriteRes<WriteVecMaskedLoadY, [AtomPort0]>;
def : WriteRes<WriteVecStore, [AtomPort0]>;
def : WriteRes<WriteVecStoreX, [AtomPort0]>;
def : WriteRes<WriteVecStoreY, [AtomPort0]>;
+def : WriteRes<WriteVecStoreNT, [AtomPort0]>;
+def : WriteRes<WriteVecStoreNTY, [AtomPort0]>;
def : WriteRes<WriteVecMaskedStore, [AtomPort0]>;
def : WriteRes<WriteVecMaskedStoreY, [AtomPort0]>;
// Loads, stores, and moves, not folded with other operations.
////////////////////////////////////////////////////////////////////////////////
-def : WriteRes<WriteLoad, [JLAGU]> { let Latency = 5; }
-def : WriteRes<WriteStore, [JSAGU]>;
-def : WriteRes<WriteMove, [JALU01]>;
+def : WriteRes<WriteLoad, [JLAGU]> { let Latency = 5; }
+def : WriteRes<WriteStore, [JSAGU]>;
+def : WriteRes<WriteStoreNT, [JSAGU]>;
+def : WriteRes<WriteMove, [JALU01]>;
// Load/store MXCSR.
// FIXME: These are copy and pasted from WriteLoad/Store.
defm : X86WriteRes<WriteFStore, [JSAGU, JFPU1, JSTC], 1, [1, 1, 1], 1>;
defm : X86WriteRes<WriteFStoreX, [JSAGU, JFPU1, JSTC], 1, [1, 1, 1], 1>;
defm : X86WriteRes<WriteFStoreY, [JSAGU, JFPU1, JSTC], 1, [1, 1, 1], 1>;
+defm : X86WriteRes<WriteFStoreNT, [JSAGU, JFPU1, JSTC], 3, [1, 1, 1], 1>;
+defm : X86WriteRes<WriteFStoreNTX, [JSAGU, JFPU1, JSTC], 3, [1, 1, 1], 1>;
+defm : X86WriteRes<WriteFStoreNTY, [JSAGU, JFPU1, JSTC], 3, [2, 2, 2], 1>;
defm : X86WriteRes<WriteFMaskedStore, [JSAGU, JFPU01, JFPX], 6, [1, 1, 4], 1>;
defm : X86WriteRes<WriteFMaskedStoreY, [JSAGU, JFPU01, JFPX], 6, [2, 2, 4], 2>;
defm : X86WriteRes<WriteVecLoad, [JLAGU, JFPU01, JVALU], 5, [1, 1, 1], 1>;
defm : X86WriteRes<WriteVecLoadX, [JLAGU, JFPU01, JVALU], 5, [1, 1, 1], 1>;
defm : X86WriteRes<WriteVecLoadY, [JLAGU, JFPU01, JVALU], 5, [1, 1, 1], 1>;
+defm : X86WriteRes<WriteVecLoadNT, [JLAGU, JFPU01, JVALU], 5, [1, 1, 1], 1>;
+defm : X86WriteRes<WriteVecLoadNTY, [JLAGU, JFPU01, JVALU], 5, [1, 1, 1], 1>;
defm : X86WriteRes<WriteVecMaskedLoad, [JLAGU, JFPU01, JVALU], 6, [1, 1, 2], 1>;
defm : X86WriteRes<WriteVecMaskedLoadY, [JLAGU, JFPU01, JVALU], 6, [2, 2, 4], 2>;
defm : X86WriteRes<WriteVecStore, [JSAGU, JFPU1, JSTC], 1, [1, 1, 1], 1>;
defm : X86WriteRes<WriteVecStoreX, [JSAGU, JFPU1, JSTC], 1, [1, 1, 1], 1>;
defm : X86WriteRes<WriteVecStoreY, [JSAGU, JFPU1, JSTC], 1, [1, 1, 1], 1>;
+defm : X86WriteRes<WriteVecStoreNT, [JSAGU, JFPU1, JSTC], 2, [1, 1, 1], 1>;
+defm : X86WriteRes<WriteVecStoreNTY, [JSAGU, JFPU1, JSTC], 2, [2, 2, 2], 1>;
defm : X86WriteRes<WriteVecMaskedStore, [JSAGU, JFPU01, JVALU], 6, [1, 1, 4], 1>;
defm : X86WriteRes<WriteVecMaskedStoreY, [JSAGU, JFPU01, JVALU], 6, [2, 2, 4], 2>;
def : InstRW<[JWriteVCVTYLd, ReadAfterLd], (instrs VCVTDQ2PDYrm, VCVTDQ2PSYrm,
VCVTPS2DQYrm, VCVTTPS2DQYrm)>;
-def JWriteVMOVNTDQSt: SchedWriteRes<[JFPU1, JSTC, JSAGU]> {
- let Latency = 2;
-}
-def : InstRW<[JWriteVMOVNTDQSt], (instrs MMX_MOVNTQmr, MOVNTDQmr, VMOVNTDQmr)>;
-
-def JWriteVMOVNTDQYSt: SchedWriteRes<[JFPU1, JSTC, JSAGU]> {
- let Latency = 2;
- let ResourceCycles = [2, 2, 2];
-}
-def : InstRW<[JWriteVMOVNTDQYSt], (instrs VMOVNTDQYmr)>;
-
-def JWriteMOVNTSt: SchedWriteRes<[JFPU1, JSTC, JSAGU]> {
- let Latency = 3;
-}
-def : InstRW<[JWriteMOVNTSt], (instrs MOVNTPDmr, MOVNTPSmr, MOVNTSD, MOVNTSS, VMOVNTPDmr, VMOVNTPSmr)>;
-
-def JWriteVMOVNTPYSt: SchedWriteRes<[JFPU1, JSTC, JSAGU]> {
- let Latency = 3;
- let ResourceCycles = [2, 2, 2];
-}
-def : InstRW<[JWriteVMOVNTPYSt], (instrs VMOVNTPDYmr, VMOVNTPSYmr)>;
-
def JWriteVCVTPDY: SchedWriteRes<[JFPU1, JSTC, JFPX]> {
let Latency = 6;
let ResourceCycles = [2, 2, 4];
// need an extra port cycle to recompute the address.
def : WriteRes<WriteRMW, [SLM_MEC_RSV]>;
-def : WriteRes<WriteStore, [SLM_IEC_RSV01, SLM_MEC_RSV]>;
-def : WriteRes<WriteLoad, [SLM_MEC_RSV]> { let Latency = 3; }
-def : WriteRes<WriteMove, [SLM_IEC_RSV01]>;
-def : WriteRes<WriteZero, []>;
+def : WriteRes<WriteStore, [SLM_IEC_RSV01, SLM_MEC_RSV]>;
+def : WriteRes<WriteStoreNT, [SLM_IEC_RSV01, SLM_MEC_RSV]>;
+def : WriteRes<WriteLoad, [SLM_MEC_RSV]> { let Latency = 3; }
+def : WriteRes<WriteMove, [SLM_IEC_RSV01]>;
+def : WriteRes<WriteZero, []>;
// Load/store MXCSR.
// FIXME: These are probably wrong. They are copy pasted from WriteStore/Load.
def : WriteRes<WriteFStore, [SLM_MEC_RSV]>;
def : WriteRes<WriteFStoreX, [SLM_MEC_RSV]>;
def : WriteRes<WriteFStoreY, [SLM_MEC_RSV]>;
+def : WriteRes<WriteFStoreNT, [SLM_MEC_RSV]>;
+def : WriteRes<WriteFStoreNTX, [SLM_MEC_RSV]>;
+def : WriteRes<WriteFStoreNTY, [SLM_MEC_RSV]>;
def : WriteRes<WriteFMaskedStore, [SLM_MEC_RSV]>;
def : WriteRes<WriteFMaskedStoreY, [SLM_MEC_RSV]>;
def : WriteRes<WriteFMove, [SLM_FPC_RSV01]>;
def : WriteRes<WriteVecLoad, [SLM_MEC_RSV]> { let Latency = 3; }
def : WriteRes<WriteVecLoadX, [SLM_MEC_RSV]> { let Latency = 3; }
def : WriteRes<WriteVecLoadY, [SLM_MEC_RSV]> { let Latency = 3; }
+def : WriteRes<WriteVecLoadNT, [SLM_MEC_RSV]> { let Latency = 3; }
+def : WriteRes<WriteVecLoadNTY, [SLM_MEC_RSV]> { let Latency = 3; }
def : WriteRes<WriteVecMaskedLoad, [SLM_MEC_RSV]> { let Latency = 3; }
def : WriteRes<WriteVecMaskedLoadY, [SLM_MEC_RSV]> { let Latency = 3; }
def : WriteRes<WriteVecStore, [SLM_MEC_RSV]>;
def : WriteRes<WriteVecStoreX, [SLM_MEC_RSV]>;
def : WriteRes<WriteVecStoreY, [SLM_MEC_RSV]>;
+def : WriteRes<WriteVecStoreNT, [SLM_MEC_RSV]>;
+def : WriteRes<WriteVecStoreNTY, [SLM_MEC_RSV]>;
def : WriteRes<WriteVecMaskedStore, [SLM_MEC_RSV]>;
def : WriteRes<WriteVecMaskedStoreY, [SLM_MEC_RSV]>;
def : WriteRes<WriteVecMove, [SLM_FPC_RSV01]>;
// operation in codegen
def : WriteRes<WriteRMW, [ZnAGU]>;
-def : WriteRes<WriteStore, [ZnAGU]>;
-def : WriteRes<WriteMove, [ZnALU]>;
-def : WriteRes<WriteLoad, [ZnAGU]> { let Latency = 8; }
+def : WriteRes<WriteStore, [ZnAGU]>;
+def : WriteRes<WriteStoreNT, [ZnAGU]>;
+def : WriteRes<WriteMove, [ZnALU]>;
+def : WriteRes<WriteLoad, [ZnAGU]> { let Latency = 8; }
def : WriteRes<WriteZero, []>;
def : WriteRes<WriteLEA, [ZnALU]>;
defm : X86WriteRes<WriteFStore, [ZnAGU], 1, [1,1], 1>;
defm : X86WriteRes<WriteFStoreX, [ZnAGU], 1, [1,1], 1>;
defm : X86WriteRes<WriteFStoreY, [ZnAGU], 1, [1,1], 1>;
+defm : X86WriteRes<WriteFStoreNT, [ZnAGU,ZnFPU2], 8, [1,1], 1>;
+defm : X86WriteRes<WriteFStoreNTX, [ZnAGU], 1, [1,1], 1>;
+defm : X86WriteRes<WriteFStoreNTY, [ZnAGU], 1, [1,1], 1>;
defm : X86WriteRes<WriteFMaskedStore, [ZnAGU,ZnFPU01], 4, [1,1], 1>;
defm : X86WriteRes<WriteFMaskedStoreY, [ZnAGU,ZnFPU01], 5, [1,2], 2>;
defm : X86WriteRes<WriteFMove, [ZnFPU], 1, [1], 1>;
defm : X86WriteRes<WriteVecLoad, [ZnAGU], 8, [1], 1>;
defm : X86WriteRes<WriteVecLoadX, [ZnAGU], 8, [1], 1>;
defm : X86WriteRes<WriteVecLoadY, [ZnAGU], 8, [1], 1>;
+defm : X86WriteRes<WriteVecLoadNT, [ZnAGU], 8, [1], 1>;
+defm : X86WriteRes<WriteVecLoadNTY, [ZnAGU], 8, [1], 1>;
defm : X86WriteRes<WriteVecMaskedLoad, [ZnAGU,ZnFPU01], 8, [1,2], 2>;
defm : X86WriteRes<WriteVecMaskedLoadY, [ZnAGU,ZnFPU01], 9, [1,3], 2>;
defm : X86WriteRes<WriteVecStore, [ZnAGU], 1, [1,1], 1>;
defm : X86WriteRes<WriteVecStoreX, [ZnAGU], 1, [1,1], 1>;
defm : X86WriteRes<WriteVecStoreY, [ZnAGU], 1, [1,1], 1>;
+defm : X86WriteRes<WriteVecStoreNT, [ZnAGU], 1, [1,1], 1>;
+defm : X86WriteRes<WriteVecStoreNTY, [ZnAGU], 1, [1,1], 1>;
defm : X86WriteRes<WriteVecMaskedStore, [ZnAGU,ZnFPU01], 4, [1,1], 1>;
defm : X86WriteRes<WriteVecMaskedStoreY, [ZnAGU,ZnFPU01], 5, [1,2], 2>;
defm : X86WriteRes<WriteVecMove, [ZnFPU], 1, [1], 1>;
}
def : InstRW<[ZnWriteINSERTQ], (instregex "INSERTQ")>;
-// MOVNTSS/MOVNTSD
-def ZnWriteMOVNT: SchedWriteRes<[ZnAGU,ZnFPU2]> {
- let Latency = 8;
-}
-def : InstRW<[ZnWriteMOVNT], (instregex "MOVNTS(S|D)")>;
-
//-- SHA instructions --//
// SHA256MSG2
def : InstRW<[WriteMicrocoded], (instregex "SHA256MSG2(Y?)r(r|m)")>;