//
multiclass avx2_pmovmask<string OpcodeStr,
Intrinsic IntLd128, Intrinsic IntLd256,
- Intrinsic IntSt128, Intrinsic IntSt256> {
+ Intrinsic IntSt128, Intrinsic IntSt256,
+ X86SchedWriteMaskMove schedX,
+ X86SchedWriteMaskMove schedY> {
def rm : AVX28I<0x8c, MRMSrcMem, (outs VR128:$dst),
(ins VR128:$src1, i128mem:$src2),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set VR128:$dst, (IntLd128 addr:$src2, VR128:$src1))]>,
- VEX_4V, Sched<[WriteVecMaskedLoad]>;
+ VEX_4V, Sched<[schedX.RM]>;
def Yrm : AVX28I<0x8c, MRMSrcMem, (outs VR256:$dst),
(ins VR256:$src1, i256mem:$src2),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set VR256:$dst, (IntLd256 addr:$src2, VR256:$src1))]>,
- VEX_4V, VEX_L, Sched<[WriteVecMaskedLoadY]>;
+ VEX_4V, VEX_L, Sched<[schedY.RM]>;
def mr : AVX28I<0x8e, MRMDestMem, (outs),
(ins i128mem:$dst, VR128:$src1, VR128:$src2),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(IntSt128 addr:$dst, VR128:$src1, VR128:$src2)]>,
- VEX_4V, Sched<[WriteVecMaskedStore]>;
+ VEX_4V, Sched<[schedX.MR]>;
def Ymr : AVX28I<0x8e, MRMDestMem, (outs),
(ins i256mem:$dst, VR256:$src1, VR256:$src2),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(IntSt256 addr:$dst, VR256:$src1, VR256:$src2)]>,
- VEX_4V, VEX_L, Sched<[WriteVecMaskedStoreY]>;
+ VEX_4V, VEX_L, Sched<[schedY.MR]>;
}
defm VPMASKMOVD : avx2_pmovmask<"vpmaskmovd",
int_x86_avx2_maskload_d,
int_x86_avx2_maskload_d_256,
int_x86_avx2_maskstore_d,
- int_x86_avx2_maskstore_d_256>;
+ int_x86_avx2_maskstore_d_256,
+ WriteVecMaskMove32, WriteVecMaskMove32Y>;
defm VPMASKMOVQ : avx2_pmovmask<"vpmaskmovq",
int_x86_avx2_maskload_q,
int_x86_avx2_maskload_q_256,
int_x86_avx2_maskstore_q,
- int_x86_avx2_maskstore_q_256>, VEX_W;
+ int_x86_avx2_maskstore_q_256,
+ WriteVecMaskMove64, WriteVecMaskMove64Y>, VEX_W;
multiclass maskmov_lowering<string InstrStr, RegisterClass RC, ValueType VT,
ValueType MaskVT> {
defm : X86WriteRes<WriteVecStoreY, [BWPort237,BWPort4], 1, [1,1], 2>;
defm : X86WriteRes<WriteVecStoreNT, [BWPort237,BWPort4], 1, [1,1], 2>;
defm : X86WriteRes<WriteVecStoreNTY, [BWPort237,BWPort4], 1, [1,1], 2>;
-defm : X86WriteRes<WriteVecMaskedStore, [BWPort0,BWPort4,BWPort237,BWPort15], 5, [1,1,1,1], 4>;
-defm : X86WriteRes<WriteVecMaskedStoreY, [BWPort0,BWPort4,BWPort237,BWPort15], 5, [1,1,1,1], 4>;
+defm : X86WriteRes<WriteVecMaskedStore32, [BWPort0,BWPort4,BWPort237,BWPort15], 5, [1,1,1,1], 4>;
+defm : X86WriteRes<WriteVecMaskedStore32Y, [BWPort0,BWPort4,BWPort237,BWPort15], 5, [1,1,1,1], 4>;
+defm : X86WriteRes<WriteVecMaskedStore64, [BWPort0,BWPort4,BWPort237,BWPort15], 5, [1,1,1,1], 4>;
+defm : X86WriteRes<WriteVecMaskedStore64Y, [BWPort0,BWPort4,BWPort237,BWPort15], 5, [1,1,1,1], 4>;
defm : X86WriteRes<WriteVecMove, [BWPort015], 1, [1], 1>;
defm : X86WriteRes<WriteVecMoveX, [BWPort015], 1, [1], 1>;
defm : X86WriteRes<WriteVecMoveY, [BWPort015], 1, [1], 1>;
defm : X86WriteRes<WriteVecStoreY, [HWPort237,HWPort4], 1, [1,1], 2>;
defm : X86WriteRes<WriteVecStoreNT, [HWPort237,HWPort4], 1, [1,1], 2>;
defm : X86WriteRes<WriteVecStoreNTY, [HWPort237,HWPort4], 1, [1,1], 2>;
-defm : X86WriteRes<WriteVecMaskedStore, [HWPort0,HWPort4,HWPort237,HWPort15], 5, [1,1,1,1], 4>;
-defm : X86WriteRes<WriteVecMaskedStoreY, [HWPort0,HWPort4,HWPort237,HWPort15], 5, [1,1,1,1], 4>;
+defm : X86WriteRes<WriteVecMaskedStore32, [HWPort0,HWPort4,HWPort237,HWPort15], 5, [1,1,1,1], 4>;
+defm : X86WriteRes<WriteVecMaskedStore32Y, [HWPort0,HWPort4,HWPort237,HWPort15], 5, [1,1,1,1], 4>;
+defm : X86WriteRes<WriteVecMaskedStore64, [HWPort0,HWPort4,HWPort237,HWPort15], 5, [1,1,1,1], 4>;
+defm : X86WriteRes<WriteVecMaskedStore64Y, [HWPort0,HWPort4,HWPort237,HWPort15], 5, [1,1,1,1], 4>;
defm : X86WriteRes<WriteVecMove, [HWPort015], 1, [1], 1>;
defm : X86WriteRes<WriteVecMoveX, [HWPort015], 1, [1], 1>;
defm : X86WriteRes<WriteVecMoveY, [HWPort015], 1, [1], 1>;
defm : X86WriteRes<WriteVecStoreY, [SBPort23,SBPort4], 1, [1,1], 1>;
defm : X86WriteRes<WriteVecStoreNT, [SBPort23,SBPort4], 1, [1,1], 1>;
defm : X86WriteRes<WriteVecStoreNTY, [SBPort23,SBPort4], 1, [1,1], 1>;
-defm : X86WriteRes<WriteVecMaskedStore, [SBPort4,SBPort01,SBPort23], 5, [1,1,1], 3>;
-defm : X86WriteRes<WriteVecMaskedStoreY, [SBPort4,SBPort01,SBPort23], 5, [1,1,1], 3>;
+defm : X86WriteRes<WriteVecMaskedStore32, [SBPort4,SBPort01,SBPort23], 5, [1,1,1], 3>;
+defm : X86WriteRes<WriteVecMaskedStore32Y, [SBPort4,SBPort01,SBPort23], 5, [1,1,1], 3>;
+defm : X86WriteRes<WriteVecMaskedStore64, [SBPort4,SBPort01,SBPort23], 5, [1,1,1], 3>;
+defm : X86WriteRes<WriteVecMaskedStore64Y, [SBPort4,SBPort01,SBPort23], 5, [1,1,1], 3>;
defm : X86WriteRes<WriteVecMove, [SBPort05], 1, [1], 1>;
defm : X86WriteRes<WriteVecMoveX, [SBPort015], 1, [1], 1>;
defm : X86WriteRes<WriteVecMoveY, [SBPort05], 1, [1], 1>;
defm : X86WriteRes<WriteVecStoreY, [SKLPort237,SKLPort4], 1, [1,1], 2>;
defm : X86WriteRes<WriteVecStoreNT, [SKLPort237,SKLPort4], 1, [1,1], 2>;
defm : X86WriteRes<WriteVecStoreNTY, [SKLPort237,SKLPort4], 1, [1,1], 2>;
-defm : X86WriteRes<WriteVecMaskedStore, [SKLPort237,SKLPort0], 2, [1,1], 2>;
-defm : X86WriteRes<WriteVecMaskedStoreY, [SKLPort237,SKLPort0], 2, [1,1], 2>;
+defm : X86WriteRes<WriteVecMaskedStore32, [SKLPort237,SKLPort0], 2, [1,1], 2>;
+defm : X86WriteRes<WriteVecMaskedStore32Y, [SKLPort237,SKLPort0], 2, [1,1], 2>;
+defm : X86WriteRes<WriteVecMaskedStore64, [SKLPort237,SKLPort0], 2, [1,1], 2>;
+defm : X86WriteRes<WriteVecMaskedStore64Y, [SKLPort237,SKLPort0], 2, [1,1], 2>;
defm : X86WriteRes<WriteVecMove, [SKLPort05], 1, [1], 1>;
defm : X86WriteRes<WriteVecMoveX, [SKLPort015], 1, [1], 1>;
defm : X86WriteRes<WriteVecMoveY, [SKLPort015], 1, [1], 1>;
defm : X86WriteRes<WriteVecStoreY, [SKXPort237,SKXPort4], 1, [1,1], 2>;
defm : X86WriteRes<WriteVecStoreNT, [SKXPort237,SKXPort4], 1, [1,1], 2>;
defm : X86WriteRes<WriteVecStoreNTY, [SKXPort237,SKXPort4], 1, [1,1], 2>;
-defm : X86WriteRes<WriteVecMaskedStore, [SKXPort237,SKXPort0], 2, [1,1], 2>;
-defm : X86WriteRes<WriteVecMaskedStoreY, [SKXPort237,SKXPort0], 2, [1,1], 2>;
+defm : X86WriteRes<WriteVecMaskedStore32, [SKXPort237,SKXPort0], 2, [1,1], 2>;
+defm : X86WriteRes<WriteVecMaskedStore32Y, [SKXPort237,SKXPort0], 2, [1,1], 2>;
+defm : X86WriteRes<WriteVecMaskedStore64, [SKXPort237,SKXPort0], 2, [1,1], 2>;
+defm : X86WriteRes<WriteVecMaskedStore64Y, [SKXPort237,SKXPort0], 2, [1,1], 2>;
defm : X86WriteRes<WriteVecMove, [SKXPort05], 1, [1], 1>;
defm : X86WriteRes<WriteVecMoveX, [SKXPort015], 1, [1], 1>;
defm : X86WriteRes<WriteVecMoveY, [SKXPort015], 1, [1], 1>;
def WriteVecStoreY : SchedWrite;
def WriteVecStoreNT : SchedWrite;
def WriteVecStoreNTY : SchedWrite;
-def WriteVecMaskedStore : SchedWrite;
-def WriteVecMaskedStoreY : SchedWrite;
+def WriteVecMaskedStore32 : SchedWrite;
+def WriteVecMaskedStore64 : SchedWrite;
+def WriteVecMaskedStore32Y : SchedWrite;
+def WriteVecMaskedStore64Y : SchedWrite;
def WriteVecMove : SchedWrite;
def WriteVecMoveX : SchedWrite;
def WriteVecMoveY : SchedWrite;
: X86SchedWriteMaskMove<WriteFMaskedLoadY, WriteFMaskedStore32Y>;
def WriteFMaskMove64Y
: X86SchedWriteMaskMove<WriteFMaskedLoadY, WriteFMaskedStore64Y>;
+def WriteVecMaskMove32
+ : X86SchedWriteMaskMove<WriteVecMaskedLoad, WriteVecMaskedStore32>;
+def WriteVecMaskMove64
+ : X86SchedWriteMaskMove<WriteVecMaskedLoad, WriteVecMaskedStore64>;
+def WriteVecMaskMove32Y
+ : X86SchedWriteMaskMove<WriteVecMaskedLoadY, WriteVecMaskedStore32Y>;
+def WriteVecMaskMove64Y
+ : X86SchedWriteMaskMove<WriteVecMaskedLoadY, WriteVecMaskedStore64Y>;
// Vector width wrappers.
def SchedWriteFAdd
defm : X86WriteResUnsupported<WriteVecStoreY>;
def : WriteRes<WriteVecStoreNT, [AtomPort0]>;
defm : X86WriteResUnsupported<WriteVecStoreNTY>;
-def : WriteRes<WriteVecMaskedStore, [AtomPort0]>;
-defm : X86WriteResUnsupported<WriteVecMaskedStoreY>;
+defm : X86WriteResUnsupported<WriteVecMaskedStore32>;
+defm : X86WriteResUnsupported<WriteVecMaskedStore64>;
+defm : X86WriteResUnsupported<WriteVecMaskedStore32Y>;
+defm : X86WriteResUnsupported<WriteVecMaskedStore64Y>;
def : WriteRes<WriteVecMove, [AtomPort0]>;
def : WriteRes<WriteVecMoveX, [AtomPort01]>;
defm : PdWriteRes<WriteVecStoreNT, [PdStore, PdFPU1, PdFPSTO], 2>;
defm : PdWriteRes<WriteVecStoreNTY, [PdStore, PdFPU1, PdFPSTO], 2, [2, 2, 2], 4>;
-defm : PdWriteRes<WriteVecMaskedStore, [PdStore, PdFPU01, PdFPMAL], 6, [1, 1, 4]>;
-defm : PdWriteRes<WriteVecMaskedStoreY, [PdStore, PdFPU01, PdFPMAL], 6, [2, 2, 4], 2>;
+defm : X86WriteResUnsupported<WriteVecMaskedStore32>;
+defm : X86WriteResUnsupported<WriteVecMaskedStore32Y>;
+defm : X86WriteResUnsupported<WriteVecMaskedStore64>;
+defm : X86WriteResUnsupported<WriteVecMaskedStore64Y>;
defm : PdWriteRes<WriteVecMove, [PdFPU01, PdFPMAL], 2>;
defm : PdWriteRes<WriteVecMoveX, [PdFPU01, PdFPMAL], 1, [1, 2]>;
defm : X86WriteRes<WriteVecStoreY, [JSAGU, JFPU1, JSTC], 1, [2, 2, 2], 2>;
defm : X86WriteRes<WriteVecStoreNT, [JSAGU, JFPU1, JSTC], 2, [1, 1, 1], 1>;
defm : X86WriteRes<WriteVecStoreNTY, [JSAGU, JFPU1, JSTC], 2, [2, 2, 2], 1>;
-defm : X86WriteRes<WriteVecMaskedStore, [JSAGU, JFPU01, JVALU], 6, [1, 1, 4], 1>;
-defm : X86WriteRes<WriteVecMaskedStoreY, [JSAGU, JFPU01, JVALU], 6, [2, 2, 4], 2>;
+defm : X86WriteResUnsupported<WriteVecMaskedStore32>;
+defm : X86WriteResUnsupported<WriteVecMaskedStore64>;
+defm : X86WriteResUnsupported<WriteVecMaskedStore32Y>;
+defm : X86WriteResUnsupported<WriteVecMaskedStore64Y>;
defm : X86WriteRes<WriteVecMove, [JFPU01, JVALU], 1, [1, 1], 1>;
defm : X86WriteRes<WriteVecMoveX, [JFPU01, JVALU], 1, [1, 1], 1>;
def : WriteRes<WriteVecStoreY, [SLM_MEC_RSV]>;
def : WriteRes<WriteVecStoreNT, [SLM_MEC_RSV]>;
def : WriteRes<WriteVecStoreNTY, [SLM_MEC_RSV]>;
-def : WriteRes<WriteVecMaskedStore, [SLM_MEC_RSV]>;
-def : WriteRes<WriteVecMaskedStoreY, [SLM_MEC_RSV]>;
+def : WriteRes<WriteVecMaskedStore32, [SLM_MEC_RSV]>;
+def : WriteRes<WriteVecMaskedStore32Y, [SLM_MEC_RSV]>;
+def : WriteRes<WriteVecMaskedStore64, [SLM_MEC_RSV]>;
+def : WriteRes<WriteVecMaskedStore64Y, [SLM_MEC_RSV]>;
def : WriteRes<WriteVecMove, [SLM_FPC_RSV01]>;
def : WriteRes<WriteVecMoveX, [SLM_FPC_RSV01]>;
def : WriteRes<WriteVecMoveY, [SLM_FPC_RSV01]>;
defm : X86WriteRes<WriteVecStoreY, [ZnAGU], 1, [1], 1>;
defm : X86WriteRes<WriteVecStoreNT, [ZnAGU], 1, [1], 1>;
defm : X86WriteRes<WriteVecStoreNTY, [ZnAGU], 1, [1], 1>;
-defm : X86WriteRes<WriteVecMaskedStore, [ZnAGU,ZnFPU01], 4, [1,1], 1>;
-defm : X86WriteRes<WriteVecMaskedStoreY, [ZnAGU,ZnFPU01], 5, [1,2], 2>;
+defm : X86WriteRes<WriteVecMaskedStore32, [ZnAGU,ZnFPU01], 4, [1,1], 1>;
+defm : X86WriteRes<WriteVecMaskedStore32Y, [ZnAGU,ZnFPU01], 5, [1,2], 2>;
+defm : X86WriteRes<WriteVecMaskedStore64, [ZnAGU,ZnFPU01], 4, [1,1], 1>;
+defm : X86WriteRes<WriteVecMaskedStore64Y, [ZnAGU,ZnFPU01], 5, [1,2], 2>;
defm : X86WriteRes<WriteVecMove, [ZnFPU], 1, [1], 1>;
defm : X86WriteRes<WriteVecMoveX, [ZnFPU], 1, [1], 1>;
defm : X86WriteRes<WriteVecMoveY, [ZnFPU], 2, [1], 2>;
defm : X86WriteRes<WriteVecStoreY, [Zn2AGU], 1, [1], 1>;
defm : X86WriteRes<WriteVecStoreNT, [Zn2AGU], 1, [1], 1>;
defm : X86WriteRes<WriteVecStoreNTY, [Zn2AGU], 1, [1], 1>;
-defm : X86WriteRes<WriteVecMaskedStore, [Zn2AGU,Zn2FPU01], 4, [1,1], 1>;
-defm : X86WriteRes<WriteVecMaskedStoreY, [Zn2AGU,Zn2FPU01], 5, [1,1], 2>;
+defm : X86WriteRes<WriteVecMaskedStore32, [Zn2AGU,Zn2FPU01], 4, [1,1], 1>;
+defm : X86WriteRes<WriteVecMaskedStore32Y, [Zn2AGU,Zn2FPU01], 5, [1,2], 2>;
+defm : X86WriteRes<WriteVecMaskedStore64, [Zn2AGU,Zn2FPU01], 4, [1,1], 1>;
+defm : X86WriteRes<WriteVecMaskedStore64Y, [Zn2AGU,Zn2FPU01], 5, [1,2], 2>;
defm : X86WriteRes<WriteVecMove, [Zn2FPU], 1, [1], 1>;
defm : X86WriteRes<WriteVecMoveX, [Zn2FPU], 1, [1], 1>;
defm : X86WriteRes<WriteVecMoveY, [Zn2FPU], 2, [1], 2>;