This removes all the WriteFBlend/WriteFVarBlend InstRW overrides - some WriteFVarShuffle remain to be fixed.
llvm-svn: 331065
}
multiclass blendmask_dq<bits<8> opc, string OpcodeStr,
- X86FoldableSchedWrite sched,
+ X86FoldableSchedWrite sched128,
+ X86FoldableSchedWrite sched256,
AVX512VLVectorVTInfo VTInfo> {
- defm Z : WriteFVarBlendask <opc, OpcodeStr, sched, VTInfo.info512>,
- WriteFVarBlendask_rmb <opc, OpcodeStr, sched, VTInfo.info512>, EVEX_V512;
+ defm Z : WriteFVarBlendask <opc, OpcodeStr, sched256, VTInfo.info512>,
+ WriteFVarBlendask_rmb <opc, OpcodeStr, sched256, VTInfo.info512>, EVEX_V512;
let Predicates = [HasVLX] in {
- defm Z256 : WriteFVarBlendask<opc, OpcodeStr, sched, VTInfo.info256>,
- WriteFVarBlendask_rmb<opc, OpcodeStr, sched, VTInfo.info256>, EVEX_V256;
- defm Z128 : WriteFVarBlendask<opc, OpcodeStr, sched, VTInfo.info128>,
- WriteFVarBlendask_rmb<opc, OpcodeStr, sched, VTInfo.info128>, EVEX_V128;
+ defm Z256 : WriteFVarBlendask<opc, OpcodeStr, sched256, VTInfo.info256>,
+ WriteFVarBlendask_rmb<opc, OpcodeStr, sched256, VTInfo.info256>, EVEX_V256;
+ defm Z128 : WriteFVarBlendask<opc, OpcodeStr, sched128, VTInfo.info128>,
+ WriteFVarBlendask_rmb<opc, OpcodeStr, sched128, VTInfo.info128>, EVEX_V128;
}
}
multiclass blendmask_bw<bits<8> opc, string OpcodeStr,
- X86FoldableSchedWrite sched,
+ X86FoldableSchedWrite sched128,
+ X86FoldableSchedWrite sched256,
AVX512VLVectorVTInfo VTInfo> {
let Predicates = [HasBWI] in
- defm Z : WriteFVarBlendask<opc, OpcodeStr, sched, VTInfo.info512>, EVEX_V512;
+ defm Z : WriteFVarBlendask<opc, OpcodeStr, sched256, VTInfo.info512>, EVEX_V512;
let Predicates = [HasBWI, HasVLX] in {
- defm Z256 : WriteFVarBlendask<opc, OpcodeStr, sched, VTInfo.info256>, EVEX_V256;
- defm Z128 : WriteFVarBlendask<opc, OpcodeStr, sched, VTInfo.info128>, EVEX_V128;
+ defm Z256 : WriteFVarBlendask<opc, OpcodeStr, sched256, VTInfo.info256>, EVEX_V256;
+ defm Z128 : WriteFVarBlendask<opc, OpcodeStr, sched128, VTInfo.info128>, EVEX_V128;
}
}
-defm VBLENDMPS : blendmask_dq <0x65, "vblendmps", WriteFVarBlend, avx512vl_f32_info>;
-defm VBLENDMPD : blendmask_dq <0x65, "vblendmpd", WriteFVarBlend, avx512vl_f64_info>, VEX_W;
-defm VPBLENDMD : blendmask_dq <0x64, "vpblendmd", WriteVarBlend, avx512vl_i32_info>;
-defm VPBLENDMQ : blendmask_dq <0x64, "vpblendmq", WriteVarBlend, avx512vl_i64_info>, VEX_W;
-defm VPBLENDMB : blendmask_bw <0x66, "vpblendmb", WriteVarBlend, avx512vl_i8_info>;
-defm VPBLENDMW : blendmask_bw <0x66, "vpblendmw", WriteVarBlend, avx512vl_i16_info>, VEX_W;
+defm VBLENDMPS : blendmask_dq<0x65, "vblendmps", WriteFVarBlend, WriteFVarBlendY,
+ avx512vl_f32_info>;
+defm VBLENDMPD : blendmask_dq<0x65, "vblendmpd", WriteFVarBlend, WriteFVarBlendY,
+ avx512vl_f64_info>, VEX_W;
+defm VPBLENDMD : blendmask_dq<0x64, "vpblendmd", WriteVarBlend, WriteVarBlend,
+ avx512vl_i32_info>;
+defm VPBLENDMQ : blendmask_dq<0x64, "vpblendmq", WriteVarBlend, WriteVarBlend,
+ avx512vl_i64_info>, VEX_W;
+defm VPBLENDMB : blendmask_bw<0x66, "vpblendmb", WriteVarBlend, WriteVarBlend,
+ avx512vl_i8_info>;
+defm VPBLENDMW : blendmask_bw<0x66, "vpblendmw", WriteVarBlend, WriteVarBlend,
+ avx512vl_i16_info>, VEX_W;
//===----------------------------------------------------------------------===//
// Compare Instructions
}
multiclass avx512_permil_vec_common<string OpcodeStr, bits<8> OpcVar,
- X86FoldableSchedWrite sched, AVX512VLVectorVTInfo _,
+ X86FoldableSchedWrite sched128,
+ X86FoldableSchedWrite sched256,
+ AVX512VLVectorVTInfo _,
AVX512VLVectorVTInfo Ctrl> {
let Predicates = [HasAVX512] in {
- defm Z : avx512_permil_vec<OpcVar, OpcodeStr, X86VPermilpv, sched,
+ defm Z : avx512_permil_vec<OpcVar, OpcodeStr, X86VPermilpv, sched256,
_.info512, Ctrl.info512>, EVEX_V512;
}
let Predicates = [HasAVX512, HasVLX] in {
- defm Z128 : avx512_permil_vec<OpcVar, OpcodeStr, X86VPermilpv, sched,
+ defm Z128 : avx512_permil_vec<OpcVar, OpcodeStr, X86VPermilpv, sched128,
_.info128, Ctrl.info128>, EVEX_V128;
- defm Z256 : avx512_permil_vec<OpcVar, OpcodeStr, X86VPermilpv, sched,
+ defm Z256 : avx512_permil_vec<OpcVar, OpcodeStr, X86VPermilpv, sched256,
_.info256, Ctrl.info256>, EVEX_V256;
}
}
multiclass avx512_permil<string OpcodeStr, bits<8> OpcImm, bits<8> OpcVar,
AVX512VLVectorVTInfo _, AVX512VLVectorVTInfo Ctrl>{
- defm NAME: avx512_permil_vec_common<OpcodeStr, OpcVar, WriteFVarShuffle, _, Ctrl>;
+ defm NAME: avx512_permil_vec_common<OpcodeStr, OpcVar, WriteFVarShuffle,
+ WriteFVarShuffleY, _, Ctrl>;
defm NAME: avx512_shift_rmi_sizes<OpcImm, MRMSrcReg, MRMSrcMem, OpcodeStr,
X86VPermilpi, WriteFShuffle, _>,
EVEX, AVX512AIi8Base, EVEX_CD8<_.info128.EltSize, CD8VF>;
VEX_4V, VEX_WIG;
defm VBLENDPSY : SS41I_blend_rmi<0x0C, "vblendps", X86Blendi, v8f32,
VR256, loadv8f32, f256mem, 0, SSEPackedSingle,
- WriteFBlend, BlendCommuteImm8>,
+ WriteFBlendY, BlendCommuteImm8>,
VEX_4V, VEX_L, VEX_WIG;
defm VBLENDPD : SS41I_blend_rmi<0x0D, "vblendpd", X86Blendi, v2f64,
VR128, loadv2f64, f128mem, 0, SSEPackedDouble,
VEX_4V, VEX_WIG;
defm VBLENDPDY : SS41I_blend_rmi<0x0D, "vblendpd", X86Blendi, v4f64,
VR256, loadv4f64, f256mem, 0, SSEPackedDouble,
- WriteFBlend, BlendCommuteImm4>,
+ WriteFBlendY, BlendCommuteImm4>,
VEX_4V, VEX_L, VEX_WIG;
defm VPBLENDW : SS41I_blend_rmi<0x0E, "vpblendw", X86Blendi, v8i16,
VR128, loadv2i64, i128mem, 0, SSEPackedInt,
WriteFVarBlend>;
defm VBLENDVPDY : SS41I_quaternary_int_avx<0x4B, "vblendvpd", VR256, f256mem,
loadv4f64, int_x86_avx_blendv_pd_256,
- WriteFVarBlend>, VEX_L;
+ WriteFVarBlendY>, VEX_L;
} // ExeDomain = SSEPackedDouble
let ExeDomain = SSEPackedSingle in {
defm VBLENDVPS : SS41I_quaternary_int_avx<0x4A, "vblendvps", VR128, f128mem,
WriteFVarBlend>;
defm VBLENDVPSY : SS41I_quaternary_int_avx<0x4A, "vblendvps", VR256, f256mem,
loadv8f32, int_x86_avx_blendv_ps_256,
- WriteFVarBlend>, VEX_L;
+ WriteFVarBlendY>, VEX_L;
} // ExeDomain = SSEPackedSingle
defm VPBLENDVB : SS41I_quaternary_int_avx<0x4C, "vpblendvb", VR128, i128mem,
loadv2i64, int_x86_sse41_pblendvb,
multiclass avx_permil<bits<8> opc_rm, bits<8> opc_rmi, string OpcodeStr,
RegisterClass RC, X86MemOperand x86memop_f,
X86MemOperand x86memop_i, PatFrag i_frag,
- ValueType f_vt, ValueType i_vt> {
+ ValueType f_vt, ValueType i_vt,
+ X86FoldableSchedWrite sched,
+ X86FoldableSchedWrite varsched> {
let Predicates = [HasAVX, NoVLX] in {
def rr : AVX8I<opc_rm, MRMSrcReg, (outs RC:$dst),
(ins RC:$src1, RC:$src2),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set RC:$dst, (f_vt (X86VPermilpv RC:$src1, (i_vt RC:$src2))))]>, VEX_4V,
- Sched<[WriteFVarShuffle]>;
+ Sched<[varsched]>;
def rm : AVX8I<opc_rm, MRMSrcMem, (outs RC:$dst),
(ins RC:$src1, x86memop_i:$src2),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set RC:$dst, (f_vt (X86VPermilpv RC:$src1,
(i_vt (bitconvert (i_frag addr:$src2))))))]>, VEX_4V,
- Sched<[WriteFVarShuffleLd, ReadAfterLd]>;
+ Sched<[varsched.Folded, ReadAfterLd]>;
def ri : AVXAIi8<opc_rmi, MRMSrcReg, (outs RC:$dst),
(ins RC:$src1, u8imm:$src2),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set RC:$dst, (f_vt (X86VPermilpi RC:$src1, (i8 imm:$src2))))]>, VEX,
- Sched<[WriteFShuffle]>;
+ Sched<[sched]>;
def mi : AVXAIi8<opc_rmi, MRMSrcMem, (outs RC:$dst),
(ins x86memop_f:$src1, u8imm:$src2),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set RC:$dst,
(f_vt (X86VPermilpi (load addr:$src1), (i8 imm:$src2))))]>, VEX,
- Sched<[WriteFShuffleLd]>;
+ Sched<[sched.Folded]>;
}// Predicates = [HasAVX, NoVLX]
}
let ExeDomain = SSEPackedSingle in {
defm VPERMILPS : avx_permil<0x0C, 0x04, "vpermilps", VR128, f128mem, i128mem,
- loadv2i64, v4f32, v4i32>;
+ loadv2i64, v4f32, v4i32, WriteFShuffle,
+ WriteFVarShuffle>;
defm VPERMILPSY : avx_permil<0x0C, 0x04, "vpermilps", VR256, f256mem, i256mem,
- loadv4i64, v8f32, v8i32>, VEX_L;
+ loadv4i64, v8f32, v8i32, WriteFShuffle,
+ WriteFVarShuffleY>, VEX_L;
}
let ExeDomain = SSEPackedDouble in {
defm VPERMILPD : avx_permil<0x0D, 0x05, "vpermilpd", VR128, f128mem, i128mem,
- loadv2i64, v2f64, v2i64>;
+ loadv2i64, v2f64, v2i64, WriteFShuffle,
+ WriteFVarShuffle>;
defm VPERMILPDY : avx_permil<0x0D, 0x05, "vpermilpd", VR256, f256mem, i256mem,
- loadv4i64, v4f64, v4i64>, VEX_L;
+ loadv4i64, v4f64, v4i64, WriteFShuffle,
+ WriteFVarShuffleY>, VEX_L;
}
//===----------------------------------------------------------------------===//
defm : BWWriteResPair<WriteFLogic, [BWPort5], 1, [1], 1, 5>; // Floating point and/or/xor logicals.
defm : BWWriteResPair<WriteFLogicY, [BWPort5], 1, [1], 1, 6>; // Floating point and/or/xor logicals (YMM/ZMM).
defm : BWWriteResPair<WriteFShuffle, [BWPort5], 1>; // Floating point vector shuffles.
-defm : BWWriteResPair<WriteFVarShuffle, [BWPort5], 1>; // Floating point vector variable shuffles.
-defm : BWWriteResPair<WriteFBlend, [BWPort015], 1>; // Floating point vector blends.
+defm : BWWriteResPair<WriteFVarShuffle, [BWPort5], 1, [1], 1, 5>; // Floating point vector variable shuffles.
+defm : BWWriteResPair<WriteFVarShuffleY, [BWPort5], 1, [1], 1, 6>; // Floating point vector variable shuffles.
+defm : BWWriteResPair<WriteFBlend, [BWPort015], 1, [1], 1, 5>; // Floating point vector blends.
+defm : BWWriteResPair<WriteFBlendY, [BWPort015], 1, [1], 1, 6>; // Floating point vector blends.
defm : BWWriteResPair<WriteFVarBlend, [BWPort5], 2, [2], 2, 5>; // Fp vector variable blends.
+defm : BWWriteResPair<WriteFVarBlendY, [BWPort5], 2, [2], 2, 6>; // Fp vector variable blends.
def : WriteRes<WriteCvtF2FSt, [BWPort1,BWPort4,BWPort237]> {
let Latency = 4;
"VPALIGNRYrmi",
"VPBLENDWYrmi",
"VPERMILPDYmi",
- "VPERMILPDYrm",
"VPERMILPSYmi",
- "VPERMILPSYrm",
"VPSHUFBYrm",
"VPSHUFDYmi",
"VPSHUFHWYmi",
let NumMicroOps = 2;
let ResourceCycles = [1,1];
}
-def: InstRW<[BWWriteResGroup77], (instregex "VBLENDPDYrmi",
- "VBLENDPSYrmi",
- "VPANDNYrm",
+def: InstRW<[BWWriteResGroup77], (instregex "VPANDNYrm",
"VPANDYrm",
"VPBLENDDYrmi",
"VPORYrm",
let NumMicroOps = 3;
let ResourceCycles = [2,1];
}
-def: InstRW<[BWWriteResGroup94], (instregex "VBLENDVPDYrm",
- "VBLENDVPSYrm",
- "VMASKMOVPDYrm",
+def: InstRW<[BWWriteResGroup94], (instregex "VMASKMOVPDYrm",
"VMASKMOVPSYrm",
"VPBLENDVBYrm",
"VPMASKMOVDYrm",
defm : HWWriteResPair<WriteFLogic, [HWPort5], 1, [1], 1, 6>;
defm : HWWriteResPair<WriteFLogicY, [HWPort5], 1, [1], 1, 7>;
defm : HWWriteResPair<WriteFShuffle, [HWPort5], 1>;
-defm : HWWriteResPair<WriteFVarShuffle, [HWPort5], 1>;
+defm : HWWriteResPair<WriteFVarShuffle, [HWPort5], 1, [1], 1, 6>;
+defm : HWWriteResPair<WriteFVarShuffleY, [HWPort5], 1, [1], 1, 7>;
defm : HWWriteResPair<WriteFBlend, [HWPort015], 1, [1], 1, 6>;
+defm : HWWriteResPair<WriteFBlendY, [HWPort015], 1, [1], 1, 7>;
defm : HWWriteResPair<WriteFShuffle256, [HWPort5], 3>;
defm : HWWriteResPair<WriteFVarShuffle256, [HWPort5], 3>;
defm : HWWriteResPair<WriteFVarBlend, [HWPort5], 2, [2], 2, 6>;
+defm : HWWriteResPair<WriteFVarBlendY, [HWPort5], 2, [2], 2, 7>;
def : WriteRes<WriteCvtF2FSt, [HWPort1,HWPort4,HWPort5,HWPort237]> {
let Latency = 5;
"(V?)PACKUSWBrm",
"(V?)PALIGNRrmi",
"VPERMILPDmi",
- "VPERMILPDrm",
"VPERMILPSmi",
- "VPERMILPSrm",
"(V?)PSHUFBrm",
"(V?)PSHUFDmi",
"(V?)PSHUFHWmi",
"VPALIGNRYrmi",
"VPBLENDWYrmi",
"VPERMILPDYmi",
- "VPERMILPDYrm",
"VPERMILPSYmi",
- "VPERMILPSYrm",
"VPMOVSXBDYrm",
"VPMOVSXBQYrm",
"VPMOVSXWQYrm",
let NumMicroOps = 2;
let ResourceCycles = [1,1];
}
-def: InstRW<[HWWriteResGroup17_2], (instregex "VBLENDPDYrmi",
- "VBLENDPSYrmi",
- "VPANDNYrm",
+def: InstRW<[HWWriteResGroup17_2], (instregex "VPANDNYrm",
"VPANDYrm",
"VPBLENDDYrmi",
"VPORYrm",
let NumMicroOps = 3;
let ResourceCycles = [2,1];
}
-def: InstRW<[HWWriteResGroup36_1], (instregex "VBLENDVPDYrm",
- "VBLENDVPSYrm",
- "VMASKMOVPDYrm",
+def: InstRW<[HWWriteResGroup36_1], (instregex "VMASKMOVPDYrm",
"VMASKMOVPSYrm",
"VPBLENDVBYrm",
"VPMASKMOVDYrm",
defm : SBWriteResPair<WriteFLogicY, [SBPort5], 1, [1], 1, 7>;
defm : SBWriteResPair<WriteFShuffle, [SBPort5], 1>;
defm : SBWriteResPair<WriteFVarShuffle, [SBPort5], 1>;
+defm : SBWriteResPair<WriteFVarShuffleY,[SBPort5], 1>;
defm : SBWriteResPair<WriteFBlend, [SBPort05], 1, [1], 1, 6>;
+defm : SBWriteResPair<WriteFBlendY, [SBPort05], 1, [1], 1, 7>;
defm : SBWriteResPair<WriteFVarBlend, [SBPort05], 2, [2], 2, 6>;
+defm : SBWriteResPair<WriteFVarBlendY,[SBPort05], 2, [2], 2, 7>;
def : WriteRes<WriteCvtF2FSt, [SBPort1, SBPort23, SBPort4]> { let Latency = 4; }
// Vector integer operations.
"VUNPCKLPDYrm",
"VUNPCKLPSYrm")>;
-def SBWriteResGroup74 : SchedWriteRes<[SBPort23,SBPort05]> {
- let Latency = 8;
- let NumMicroOps = 2;
- let ResourceCycles = [1,1];
-}
-def: InstRW<[SBWriteResGroup74], (instregex "VBLENDPDYrmi",
- "VBLENDPSYrmi")>;
-
def SBWriteResGroup75 : SchedWriteRes<[SBPort23,SBPort05]> {
let Latency = 8;
let NumMicroOps = 3;
let NumMicroOps = 3;
let ResourceCycles = [1,2];
}
-def: InstRW<[SBWriteResGroup91], (instregex "VBLENDVPDYrm",
- "VBLENDVPSYrm",
- "VMASKMOVPDYrm",
+def: InstRW<[SBWriteResGroup91], (instregex "VMASKMOVPDYrm",
"VMASKMOVPSYrm")>;
def SBWriteResGroup92 : SchedWriteRes<[SBPort0,SBPort1,SBPort5]> {
defm : SKLWriteResPair<WriteFLogicY, [SKLPort015], 1, [1], 1, 7>; // Floating point and/or/xor logicals (YMM/ZMM).
defm : SKLWriteResPair<WriteFShuffle, [SKLPort5], 1>; // Floating point vector shuffles.
defm : SKLWriteResPair<WriteFVarShuffle, [SKLPort5], 1>; // Floating point vector shuffles.
+defm : SKLWriteResPair<WriteFVarShuffleY, [SKLPort5], 1>; // Floating point vector shuffles.
defm : SKLWriteResPair<WriteFBlend, [SKLPort015], 1, [1], 1, 6>; // Floating point vector blends.
+defm : SKLWriteResPair<WriteFBlendY, [SKLPort015], 1, [1], 1, 7>; // Floating point vector blends.
defm : SKLWriteResPair<WriteFVarBlend, [SKLPort015], 2, [2], 2, 6>; // Fp vector variable blends.
+defm : SKLWriteResPair<WriteFVarBlendY,[SKLPort015], 2, [2], 2, 7>; // Fp vector variable blends.
def : WriteRes<WriteCvtF2FSt, [SKLPort4,SKLPort5,SKLPort237,SKLPort01]> {
let Latency = 6;
let NumMicroOps = 2;
let ResourceCycles = [1,1];
}
-def: InstRW<[SKLWriteResGroup110], (instregex "VBLENDPDYrmi",
- "VBLENDPSYrmi",
- "VMASKMOVPDYrm",
+def: InstRW<[SKLWriteResGroup110], (instregex "VMASKMOVPDYrm",
"VMASKMOVPSYrm",
"VPADDBYrm",
"VPADDDYrm",
}
def: InstRW<[SKLWriteResGroup124], (instregex "(V?)DPPDrri")>;
-def SKLWriteResGroup125 : SchedWriteRes<[SKLPort23,SKLPort015]> {
- let Latency = 9;
- let NumMicroOps = 3;
- let ResourceCycles = [1,2];
-}
-def: InstRW<[SKLWriteResGroup125], (instregex "VBLENDVPDYrm",
- "VBLENDVPSYrm")>;
-
def SKLWriteResGroup126 : SchedWriteRes<[SKLPort0,SKLPort5,SKLPort23]> {
let Latency = 9;
let NumMicroOps = 3;
defm : SKXWriteResPair<WriteFLogicY, [SKXPort015], 1, [1], 1, 7>; // Floating point and/or/xor logicals (YMM/ZMM).
defm : SKXWriteResPair<WriteFShuffle, [SKXPort5], 1>; // Floating point vector shuffles.
defm : SKXWriteResPair<WriteFVarShuffle, [SKXPort5], 1>; // Floating point vector variable shuffles.
+defm : SKXWriteResPair<WriteFVarShuffleY, [SKXPort5], 1>; // Floating point vector variable shuffles.
defm : SKXWriteResPair<WriteFBlend, [SKXPort015], 1, [1], 1, 6>; // Floating point vector blends.
+defm : SKXWriteResPair<WriteFBlendY,[SKXPort015], 1, [1], 1, 7>; // Floating point vector blends.
defm : SKXWriteResPair<WriteFVarBlend, [SKXPort015], 2, [2], 2, 6>; // Fp vector variable blends.
+defm : SKXWriteResPair<WriteFVarBlendY,[SKXPort015], 2, [2], 2, 7>; // Fp vector variable blends.
def : WriteRes<WriteCvtF2FSt, [SKXPort4,SKXPort5,SKXPort237,SKXPort015]> {
let Latency = 6;
"VBLENDMPDZrm(b?)",
"VBLENDMPSZ256rm(b?)",
"VBLENDMPSZrm(b?)",
- "VBLENDPDYrmi",
- "VBLENDPSYrmi",
"VBROADCASTF32X2Z256m(b?)",
"VBROADCASTF32X2Zm(b?)",
"VBROADCASTF32X4Z256rm(b?)",
}
def: InstRW<[SKXWriteResGroup139], (instregex "(V?)DPPDrri")>;
-def SKXWriteResGroup140 : SchedWriteRes<[SKXPort23,SKXPort015]> {
- let Latency = 9;
- let NumMicroOps = 3;
- let ResourceCycles = [1,2];
-}
-def: InstRW<[SKXWriteResGroup140], (instregex "VBLENDVPDYrm",
- "VBLENDVPSYrm")>;
-
def SKXWriteResGroup141 : SchedWriteRes<[SKXPort0,SKXPort5,SKXPort23]> {
let Latency = 9;
let NumMicroOps = 3;
defm WriteFLogicY : X86SchedWritePair; // Floating point and/or/xor logicals (YMM/ZMM).
defm WriteFShuffle : X86SchedWritePair; // Floating point vector shuffles.
defm WriteFVarShuffle : X86SchedWritePair; // Floating point vector variable shuffles.
+defm WriteFVarShuffleY : X86SchedWritePair; // Floating point vector variable shuffles (YMM/ZMM).
defm WriteFBlend : X86SchedWritePair; // Floating point vector blends.
+defm WriteFBlendY : X86SchedWritePair; // Floating point vector blends (YMM/ZMM).
defm WriteFVarBlend : X86SchedWritePair; // Fp vector variable blends.
+defm WriteFVarBlendY : X86SchedWritePair; // Fp vector variable blends (YMM/ZMM).
// FMA Scheduling helper class.
class FMASC { X86FoldableSchedWrite Sched = WriteFAdd; }
defm : AtomWriteResPair<WriteFLogicY, [AtomPort01], [AtomPort0]>; // NOTE: Doesn't exist on Atom.
defm : AtomWriteResPair<WriteFShuffle, [AtomPort0], [AtomPort0]>;
defm : AtomWriteResPair<WriteFVarShuffle, [AtomPort0], [AtomPort0]>; // NOTE: Doesn't exist on Atom.
+defm : AtomWriteResPair<WriteFVarShuffleY, [AtomPort0], [AtomPort0]>; // NOTE: Doesn't exist on Atom.
defm : AtomWriteResPair<WriteFMA, [AtomPort0], [AtomPort0]>; // NOTE: Doesn't exist on Atom.
defm : AtomWriteResPair<WriteFMAS, [AtomPort0], [AtomPort0]>; // NOTE: Doesn't exist on Atom.
defm : AtomWriteResPair<WriteFMAY, [AtomPort0], [AtomPort0]>; // NOTE: Doesn't exist on Atom.
defm : AtomWriteResPair<WriteFBlend, [AtomPort0], [AtomPort0]>; // NOTE: Doesn't exist on Atom.
+defm : AtomWriteResPair<WriteFBlendY, [AtomPort0], [AtomPort0]>; // NOTE: Doesn't exist on Atom.
defm : AtomWriteResPair<WriteFVarBlend, [AtomPort0], [AtomPort0]>; // NOTE: Doesn't exist on Atom.
+defm : AtomWriteResPair<WriteFVarBlendY, [AtomPort0], [AtomPort0]>; // NOTE: Doesn't exist on Atom.
defm : AtomWriteResPair<WriteFShuffle256, [AtomPort0], [AtomPort0]>; // NOTE: Doesn't exist on Atom.
defm : AtomWriteResPair<WriteFVarShuffle256, [AtomPort0], [AtomPort0]>; // NOTE: Doesn't exist on Atom.
defm : JWriteResYMMPair<WriteFLogicY, [JFPU01, JFPX], 1, [2, 2], 2>;
defm : JWriteResFpuPair<WriteFShuffle, [JFPU01, JFPX], 1>;
defm : JWriteResFpuPair<WriteFVarShuffle, [JFPU01, JFPX], 2, [1, 4], 3>;
+defm : JWriteResYMMPair<WriteFVarShuffleY,[JFPU01, JFPX], 3, [2, 6], 6>;
defm : JWriteResFpuPair<WriteFBlend, [JFPU01, JFPX], 1>;
+defm : JWriteResYMMPair<WriteFBlendY, [JFPU01, JFPX], 1, [2, 2], 2>;
defm : JWriteResFpuPair<WriteFVarBlend, [JFPU01, JFPX], 2, [1, 4], 3>;
+defm : JWriteResYMMPair<WriteFVarBlendY, [JFPU01, JFPX], 3, [2, 6], 6>;
defm : JWriteResFpuPair<WriteFShuffle256, [JFPU01, JFPX], 1>;
defm : JWriteResFpuPair<WriteFVarShuffle256, [JFPU01, JFPX], 1>; // NOTE: Doesn't exist on Jaguar.
}
def : InstRW<[JWriteVCVTPDYLd, ReadAfterLd], (instrs VCVTPD2DQYrm, VCVTTPD2DQYrm, VCVTPD2PSYrm)>;
-def JWriteVPERMY: SchedWriteRes<[JFPU01, JFPX]> {
- let Latency = 3;
- let ResourceCycles = [2, 6];
- let NumMicroOps = 6;
-}
-def : InstRW<[JWriteVPERMY], (instrs VBLENDVPDYrr, VBLENDVPSYrr, VPERMILPDYrr, VPERMILPSYrr)>;
-
-def JWriteVPERMYLd: SchedWriteRes<[JLAGU, JFPU01, JFPX]> {
- let Latency = 8;
- let ResourceCycles = [2, 2, 6];
- let NumMicroOps = 6;
-}
-def : InstRW<[JWriteVPERMYLd, ReadAfterLd], (instrs VBLENDVPDYrm, VBLENDVPSYrm, VPERMILPDYrm, VPERMILPSYrm)>;
-
def JWriteShuffleY: SchedWriteRes<[JFPU01, JFPX]> {
let ResourceCycles = [2, 2];
let NumMicroOps = 2;
}
-def : InstRW<[JWriteShuffleY], (instrs VBLENDPDYrri, VBLENDPSYrri,
- VMOVDDUPYrr, VMOVSHDUPYrr, VMOVSLDUPYrr,
+def : InstRW<[JWriteShuffleY], (instrs VMOVDDUPYrr, VMOVSHDUPYrr, VMOVSLDUPYrr,
VPERMILPDYri, VPERMILPSYri, VSHUFPDYrri,
VSHUFPSYrri, VUNPCKHPDYrr, VUNPCKHPSYrr,
VUNPCKLPDYrr, VUNPCKLPSYrr)>;
let ResourceCycles = [2, 2, 2];
let NumMicroOps = 2;
}
-def : InstRW<[JWriteShuffleYLd, ReadAfterLd], (instrs VBLENDPDYrmi, VBLENDPSYrmi,
- VMOVDDUPYrm, VMOVSHDUPYrm,
+def : InstRW<[JWriteShuffleYLd, ReadAfterLd], (instrs VMOVDDUPYrm, VMOVSHDUPYrm,
VMOVSLDUPYrm, VPERMILPDYmi,
VPERMILPSYmi, VSHUFPDYrmi,
VSHUFPSYrmi, VUNPCKHPDYrm,
defm : SLMWriteResPair<WriteFLogicY, [SLM_FPC_RSV01], 1>;
defm : SLMWriteResPair<WriteFShuffle, [SLM_FPC_RSV0], 1>;
defm : SLMWriteResPair<WriteFVarShuffle, [SLM_FPC_RSV0], 1>;
+defm : SLMWriteResPair<WriteFVarShuffleY,[SLM_FPC_RSV0], 1>;
defm : SLMWriteResPair<WriteFBlend, [SLM_FPC_RSV0], 1>;
def : WriteRes<WriteCvtF2FSt, [SLM_FPC_RSV01, SLM_MEC_RSV]>;
// AVX/FMA is not supported on that architecture, but we should define the basic
// scheduling resources anyway.
def : WriteRes<WriteIMulH, [SLM_FPC_RSV0]>;
+defm : SLMWriteResPair<WriteFBlendY, [SLM_FPC_RSV0], 1>;
defm : SLMWriteResPair<WriteVarBlend, [SLM_FPC_RSV0], 1>;
defm : SLMWriteResPair<WriteFVarBlend, [SLM_FPC_RSV0], 1>;
+defm : SLMWriteResPair<WriteFVarBlendY, [SLM_FPC_RSV0], 1>;
defm : SLMWriteResPair<WriteFShuffle256, [SLM_FPC_RSV0], 1>;
defm : SLMWriteResPair<WriteFVarShuffle256, [SLM_FPC_RSV0], 1>;
defm : SLMWriteResPair<WriteShuffle256, [SLM_FPC_RSV0], 1>;
defm : ZnWriteResFpuPair<WriteFCmp, [ZnFPU0], 3>;
defm : ZnWriteResFpuPair<WriteFCom, [ZnFPU0], 3>;
defm : ZnWriteResFpuPair<WriteFBlend, [ZnFPU01], 1>;
+defm : ZnWriteResFpuPair<WriteFBlendY, [ZnFPU01], 1>;
defm : ZnWriteResFpuPair<WriteFVarBlend, [ZnFPU01], 1>;
+defm : ZnWriteResFpuPair<WriteFVarBlendY,[ZnFPU01], 1>;
defm : ZnWriteResFpuPair<WriteVarBlend, [ZnFPU0], 1>;
defm : ZnWriteResFpuPair<WriteCvtI2F, [ZnFPU3], 5>;
defm : ZnWriteResFpuPair<WriteCvtF2F, [ZnFPU3], 5>;
defm : ZnWriteResFpuPair<WriteFLogicY, [ZnFPU], 1>;
defm : ZnWriteResFpuPair<WriteFShuffle, [ZnFPU12], 1>;
defm : ZnWriteResFpuPair<WriteFVarShuffle, [ZnFPU12], 1>;
+defm : ZnWriteResFpuPair<WriteFVarShuffleY,[ZnFPU12], 1>;
defm : ZnWriteResFpuPair<WriteFMul, [ZnFPU0], 5>;
defm : ZnWriteResFpuPair<WriteFMA, [ZnFPU03], 5>;
defm : ZnWriteResFpuPair<WriteFMAS, [ZnFPU03], 5>;