This removes all the AND/ANDN/OR/XOR PS/PD InstRW overrides.
llvm-svn: 331051
multiclass avx512_fp_binop_p<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
Predicate prd, X86FoldableSchedWrite sched,
+ X86FoldableSchedWrite schedY,
bit IsCommutable = 0> {
let Predicates = [prd] in {
defm PSZ : avx512_fp_packed<opc, OpcodeStr, OpNode, v16f32_info,
- sched, IsCommutable>, EVEX_V512, PS,
+ schedY, IsCommutable>, EVEX_V512, PS,
EVEX_CD8<32, CD8VF>;
defm PDZ : avx512_fp_packed<opc, OpcodeStr, OpNode, v8f64_info,
- sched, IsCommutable>, EVEX_V512, PD, VEX_W,
+ schedY, IsCommutable>, EVEX_V512, PD, VEX_W,
EVEX_CD8<64, CD8VF>;
}
sched, IsCommutable>, EVEX_V128, PS,
EVEX_CD8<32, CD8VF>;
defm PSZ256 : avx512_fp_packed<opc, OpcodeStr, OpNode, v8f32x_info,
- sched, IsCommutable>, EVEX_V256, PS,
+ schedY, IsCommutable>, EVEX_V256, PS,
EVEX_CD8<32, CD8VF>;
defm PDZ128 : avx512_fp_packed<opc, OpcodeStr, OpNode, v2f64x_info,
sched, IsCommutable>, EVEX_V128, PD, VEX_W,
EVEX_CD8<64, CD8VF>;
defm PDZ256 : avx512_fp_packed<opc, OpcodeStr, OpNode, v4f64x_info,
- sched, IsCommutable>, EVEX_V256, PD, VEX_W,
+ schedY, IsCommutable>, EVEX_V256, PD, VEX_W,
EVEX_CD8<64, CD8VF>;
}
}
}
defm VADD : avx512_fp_binop_p<0x58, "vadd", fadd, HasAVX512,
- WriteFAdd, 1>,
+ WriteFAdd, WriteFAdd, 1>,
avx512_fp_binop_p_round<0x58, "vadd", X86faddRnd, WriteFAdd>;
-defm VMUL : avx512_fp_binop_p<0x59, "vmul", fmul, HasAVX512, WriteFMul, 1>,
+defm VMUL : avx512_fp_binop_p<0x59, "vmul", fmul, HasAVX512,
+ WriteFMul, WriteFMul, 1>,
avx512_fp_binop_p_round<0x59, "vmul", X86fmulRnd, WriteFMul>;
-defm VSUB : avx512_fp_binop_p<0x5C, "vsub", fsub, HasAVX512, WriteFAdd>,
+defm VSUB : avx512_fp_binop_p<0x5C, "vsub", fsub, HasAVX512,
+ WriteFAdd, WriteFAdd>,
avx512_fp_binop_p_round<0x5C, "vsub", X86fsubRnd, WriteFAdd>;
-defm VDIV : avx512_fp_binop_p<0x5E, "vdiv", fdiv, HasAVX512, WriteFDiv>,
+defm VDIV : avx512_fp_binop_p<0x5E, "vdiv", fdiv, HasAVX512,
+ WriteFDiv, WriteFDiv>,
avx512_fp_binop_p_round<0x5E, "vdiv", X86fdivRnd, WriteFDiv>;
-defm VMIN : avx512_fp_binop_p<0x5D, "vmin", X86fmin, HasAVX512, WriteFCmp, 0>,
+defm VMIN : avx512_fp_binop_p<0x5D, "vmin", X86fmin, HasAVX512,
+ WriteFCmp, WriteFCmp, 0>,
avx512_fp_binop_p_sae<0x5D, "vmin", X86fminRnd, WriteFCmp>;
-defm VMAX : avx512_fp_binop_p<0x5F, "vmax", X86fmax, HasAVX512, WriteFCmp, 0>,
+defm VMAX : avx512_fp_binop_p<0x5F, "vmax", X86fmax, HasAVX512,
+ WriteFCmp, WriteFCmp, 0>,
avx512_fp_binop_p_sae<0x5F, "vmax", X86fmaxRnd, WriteFCmp>;
let isCodeGenOnly = 1 in {
- defm VMINC : avx512_fp_binop_p<0x5D, "vmin", X86fminc, HasAVX512, WriteFCmp, 1>;
- defm VMAXC : avx512_fp_binop_p<0x5F, "vmax", X86fmaxc, HasAVX512, WriteFCmp, 1>;
-}
-defm VAND : avx512_fp_binop_p<0x54, "vand", null_frag, HasDQI, WriteFLogic, 1>;
-defm VANDN : avx512_fp_binop_p<0x55, "vandn", null_frag, HasDQI, WriteFLogic, 0>;
-defm VOR : avx512_fp_binop_p<0x56, "vor", null_frag, HasDQI, WriteFLogic, 1>;
-defm VXOR : avx512_fp_binop_p<0x57, "vxor", null_frag, HasDQI, WriteFLogic, 1>;
+ defm VMINC : avx512_fp_binop_p<0x5D, "vmin", X86fminc, HasAVX512,
+ WriteFCmp, WriteFCmp, 1>;
+ defm VMAXC : avx512_fp_binop_p<0x5F, "vmax", X86fmaxc, HasAVX512,
+ WriteFCmp, WriteFCmp, 1>;
+}
+defm VAND : avx512_fp_binop_p<0x54, "vand", null_frag, HasDQI,
+ WriteFLogic, WriteFLogicY, 1>;
+defm VANDN : avx512_fp_binop_p<0x55, "vandn", null_frag, HasDQI,
+ WriteFLogic, WriteFLogicY, 0>;
+defm VOR : avx512_fp_binop_p<0x56, "vor", null_frag, HasDQI,
+ WriteFLogic, WriteFLogicY, 1>;
+defm VXOR : avx512_fp_binop_p<0x57, "vxor", null_frag, HasDQI,
+ WriteFLogic, WriteFLogicY, 1>;
// Patterns catch floating point selects with bitcasted integer logic ops.
multiclass avx512_fp_logical_lowering<string InstrStr, SDNode OpNode,
//===----------------------------------------------------------------------===//
defm VUNPCKH : avx512_fp_binop_p<0x15, "vunpckh", X86Unpckh, HasAVX512,
- WriteFShuffle>;
+ WriteFShuffle, WriteFShuffle>;
defm VUNPCKL : avx512_fp_binop_p<0x14, "vunpckl", X86Unpckl, HasAVX512,
- WriteFShuffle>;
+ WriteFShuffle, WriteFShuffle>;
defm VPUNPCKLBW : avx512_binop_rm_vl_b<0x60, "vpunpcklbw", X86Unpckl,
WriteShuffle, HasBWI>;
/// sse12_fp_packed_logical_rm - SSE 1 & 2 packed instructions class
multiclass sse12_fp_packed_logical_rm<bits<8> opc, RegisterClass RC, Domain d,
string OpcodeStr, X86MemOperand x86memop,
+ X86FoldableSchedWrite sched,
list<dag> pat_rr, list<dag> pat_rm,
bit Is2Addr = 1> {
let isCommutable = 1, hasSideEffects = 0 in
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
pat_rr, d>,
- Sched<[WriteFLogic]>;
+ Sched<[sched]>;
let hasSideEffects = 0, mayLoad = 1 in
def rm : PI<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2),
!if(Is2Addr,
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
pat_rm, d>,
- Sched<[WriteFLogic.Folded, ReadAfterLd]>;
+ Sched<[sched.Folded, ReadAfterLd]>;
}
SDNode OpNode> {
let Predicates = [HasAVX, NoVLX] in {
defm V#NAME#PSY : sse12_fp_packed_logical_rm<opc, VR256, SSEPackedSingle,
- !strconcat(OpcodeStr, "ps"), f256mem,
+ !strconcat(OpcodeStr, "ps"), f256mem, WriteFLogicY,
[], [], 0>, PS, VEX_4V, VEX_L, VEX_WIG;
defm V#NAME#PDY : sse12_fp_packed_logical_rm<opc, VR256, SSEPackedDouble,
- !strconcat(OpcodeStr, "pd"), f256mem,
+ !strconcat(OpcodeStr, "pd"), f256mem, WriteFLogicY,
[], [], 0>, PD, VEX_4V, VEX_L, VEX_WIG;
defm V#NAME#PS : sse12_fp_packed_logical_rm<opc, VR128, SSEPackedSingle,
- !strconcat(OpcodeStr, "ps"), f128mem,
+ !strconcat(OpcodeStr, "ps"), f128mem, WriteFLogic,
[], [], 0>, PS, VEX_4V, VEX_WIG;
defm V#NAME#PD : sse12_fp_packed_logical_rm<opc, VR128, SSEPackedDouble,
- !strconcat(OpcodeStr, "pd"), f128mem,
+ !strconcat(OpcodeStr, "pd"), f128mem, WriteFLogic,
[], [], 0>, PD, VEX_4V, VEX_WIG;
}
let Constraints = "$src1 = $dst" in {
defm PS : sse12_fp_packed_logical_rm<opc, VR128, SSEPackedSingle,
- !strconcat(OpcodeStr, "ps"), f128mem,
+ !strconcat(OpcodeStr, "ps"), f128mem, WriteFLogic,
[], []>, PS;
defm PD : sse12_fp_packed_logical_rm<opc, VR128, SSEPackedDouble,
- !strconcat(OpcodeStr, "pd"), f128mem,
+ !strconcat(OpcodeStr, "pd"), f128mem, WriteFLogic,
[], []>, PD;
}
}
defm : BWWriteResPair<WriteFMAS, [BWPort01], 5, [1], 1, 5>; // Fused Multiply Add (Scalar).
defm : BWWriteResPair<WriteFMAY, [BWPort01], 5, [1], 1, 6>; // Fused Multiply Add (YMM/ZMM).
defm : BWWriteResPair<WriteFSign, [BWPort5], 1>; // Floating point fabs/fchs.
-defm : BWWriteResPair<WriteFLogic, [BWPort5], 1>; // Floating point and/or/xor logicals.
+defm : BWWriteResPair<WriteFLogic, [BWPort5], 1, [1], 1, 5>; // Floating point and/or/xor logicals.
+defm : BWWriteResPair<WriteFLogicY, [BWPort5], 1, [1], 1, 6>; // Floating point and/or/xor logicals (YMM/ZMM).
defm : BWWriteResPair<WriteFShuffle, [BWPort5], 1>; // Floating point vector shuffles.
defm : BWWriteResPair<WriteFVarShuffle, [BWPort5], 1>; // Floating point vector variable shuffles.
defm : BWWriteResPair<WriteFBlend, [BWPort015], 1>; // Floating point vector blends.
let NumMicroOps = 2;
let ResourceCycles = [1,1];
}
-def: InstRW<[BWWriteResGroup75], (instregex "VANDNPDYrm",
- "VANDNPSYrm",
- "VANDPDYrm",
- "VANDPSYrm",
- "VORPDYrm",
- "VORPSYrm",
- "VPACKSSDWYrm",
+def: InstRW<[BWWriteResGroup75], (instregex "VPACKSSDWYrm",
"VPACKSSWBYrm",
"VPACKUSDWYrm",
"VPACKUSWBYrm",
"VUNPCKHPDYrm",
"VUNPCKHPSYrm",
"VUNPCKLPDYrm",
- "VUNPCKLPSYrm",
- "VXORPDYrm",
- "VXORPSYrm")>;
+ "VUNPCKLPSYrm")>;
def BWWriteResGroup76 : SchedWriteRes<[BWPort23,BWPort15]> {
let Latency = 7;
defm : HWWriteResPair<WriteFMAS, [HWPort01], 5, [1], 1, 5>;
defm : HWWriteResPair<WriteFMAY, [HWPort01], 5, [1], 1, 7>;
defm : HWWriteResPair<WriteFSign, [HWPort0], 1>;
-defm : HWWriteResPair<WriteFLogic, [HWPort5], 1, [1], 1, 6>;
+defm : HWWriteResPair<WriteFLogic, [HWPort5], 1, [1], 1, 6>;
+defm : HWWriteResPair<WriteFLogicY, [HWPort5], 1, [1], 1, 7>;
defm : HWWriteResPair<WriteFShuffle, [HWPort5], 1>;
defm : HWWriteResPair<WriteFVarShuffle, [HWPort5], 1>;
defm : HWWriteResPair<WriteFBlend, [HWPort015], 1, [1], 1, 6>;
let NumMicroOps = 2;
let ResourceCycles = [1,1];
}
-def: InstRW<[HWWriteResGroup13_1], (instregex "VANDNPDYrm",
- "VANDNPSYrm",
- "VANDPDYrm",
- "VANDPSYrm",
- "VORPDYrm",
- "VORPSYrm",
- "VPACKSSDWYrm",
+def: InstRW<[HWWriteResGroup13_1], (instregex "VPACKSSDWYrm",
"VPACKSSWBYrm",
"VPACKUSDWYrm",
"VPACKUSWBYrm",
"VUNPCKHPDYrm",
"VUNPCKHPSYrm",
"VUNPCKLPDYrm",
- "VUNPCKLPSYrm",
- "VXORPDYrm",
- "VXORPSYrm")>;
+ "VUNPCKLPSYrm")>;
def HWWriteResGroup14 : SchedWriteRes<[HWPort6,HWPort23]> {
let Latency = 6;
defm : SBWriteResPair<WriteCvtF2F, [SBPort1], 3>;
defm : SBWriteResPair<WriteFSign, [SBPort5], 1>;
defm : SBWriteResPair<WriteFLogic, [SBPort5], 1, [1], 1, 6>;
+defm : SBWriteResPair<WriteFLogicY, [SBPort5], 1, [1], 1, 7>;
defm : SBWriteResPair<WriteFShuffle, [SBPort5], 1>;
defm : SBWriteResPair<WriteFVarShuffle, [SBPort5], 1>;
defm : SBWriteResPair<WriteFBlend, [SBPort05], 1, [1], 1, 6>;
let NumMicroOps = 2;
let ResourceCycles = [1,1];
}
-def: InstRW<[SBWriteResGroup73], (instregex "VANDNPDYrm",
- "VANDNPSYrm",
- "VANDPDYrm",
- "VANDPSYrm",
- "VORPDYrm",
- "VORPSYrm",
- "VPERM2F128rm",
+def: InstRW<[SBWriteResGroup73], (instregex "VPERM2F128rm",
"VPERMILPDYmi",
"VPERMILPDYrm",
"VPERMILPSYmi",
"VUNPCKHPDYrm",
"VUNPCKHPSYrm",
"VUNPCKLPDYrm",
- "VUNPCKLPSYrm",
- "VXORPDYrm",
- "VXORPSYrm")>;
+ "VUNPCKLPSYrm")>;
def SBWriteResGroup74 : SchedWriteRes<[SBPort23,SBPort05]> {
let Latency = 8;
defm : SKLWriteResPair<WriteFMAS, [SKLPort01], 4, [1], 1, 5>; // Fused Multiply Add (Scalar).
defm : SKLWriteResPair<WriteFMAY, [SKLPort01], 4, [1], 1, 7>; // Fused Multiply Add (YMM/ZMM).
defm : SKLWriteResPair<WriteFSign, [SKLPort0], 1>; // Floating point fabs/fchs.
-defm : SKLWriteResPair<WriteFLogic, [SKLPort015], 1, [1], 1, 6>; // Floating point and/or/xor logicals.
+defm : SKLWriteResPair<WriteFLogic, [SKLPort015], 1, [1], 1, 6>; // Floating point and/or/xor logicals.
+defm : SKLWriteResPair<WriteFLogicY, [SKLPort015], 1, [1], 1, 7>; // Floating point and/or/xor logicals (YMM/ZMM).
defm : SKLWriteResPair<WriteFShuffle, [SKLPort5], 1>; // Floating point vector shuffles.
defm : SKLWriteResPair<WriteFVarShuffle, [SKLPort5], 1>; // Floating point vector shuffles.
defm : SKLWriteResPair<WriteFBlend, [SKLPort015], 1, [1], 1, 6>; // Floating point vector blends.
let NumMicroOps = 2;
let ResourceCycles = [1,1];
}
-def: InstRW<[SKLWriteResGroup110], (instregex "VANDNPDYrm",
- "VANDNPSYrm",
- "VANDPDYrm",
- "VANDPSYrm",
- "VBLENDPDYrmi",
+def: InstRW<[SKLWriteResGroup110], (instregex "VBLENDPDYrmi",
"VBLENDPSYrmi",
"VMASKMOVPDYrm",
"VMASKMOVPSYrm",
- "VORPDYrm",
- "VORPSYrm",
"VPADDBYrm",
"VPADDDYrm",
"VPADDQYrm",
"VPSUBDYrm",
"VPSUBQYrm",
"VPSUBWYrm",
- "VPXORYrm",
- "VXORPDYrm",
- "VXORPSYrm")>;
+ "VPXORYrm")>;
def SKLWriteResGroup112 : SchedWriteRes<[SKLPort0,SKLPort5,SKLPort23]> {
let Latency = 8;
defm : SKXWriteResPair<WriteFMAY, [SKXPort015], 4, [1], 1, 7>; // Fused Multiply Add (YMM/ZMM).
defm : SKXWriteResPair<WriteFSign, [SKXPort0], 1>; // Floating point fabs/fchs.
defm : SKXWriteResPair<WriteFLogic, [SKXPort015], 1, [1], 1, 6>; // Floating point and/or/xor logicals.
+defm : SKXWriteResPair<WriteFLogicY, [SKXPort015], 1, [1], 1, 7>; // Floating point and/or/xor logicals (YMM/ZMM).
defm : SKXWriteResPair<WriteFShuffle, [SKXPort5], 1>; // Floating point vector shuffles.
defm : SKXWriteResPair<WriteFVarShuffle, [SKXPort5], 1>; // Floating point vector variable shuffles.
defm : SKXWriteResPair<WriteFBlend, [SKXPort015], 1, [1], 1, 6>; // Floating point vector blends.
let NumMicroOps = 2;
let ResourceCycles = [1,1];
}
-def: InstRW<[SKXWriteResGroup121], (instregex "VANDNPDYrm",
- "VANDNPDZ256rm(b?)",
- "VANDNPDZrm(b?)",
- "VANDNPSYrm",
- "VANDNPSZ256rm(b?)",
- "VANDNPSZrm(b?)",
- "VANDPDYrm",
- "VANDPDZ256rm(b?)",
- "VANDPDZrm(b?)",
- "VANDPSYrm",
- "VANDPSZ256rm(b?)",
- "VANDPSZrm(b?)",
- "VBLENDMPDZ256rm(b?)",
+def: InstRW<[SKXWriteResGroup121], (instregex "VBLENDMPDZ256rm(b?)",
"VBLENDMPDZrm(b?)",
"VBLENDMPSZ256rm(b?)",
"VBLENDMPSZrm(b?)",
"VMOVUPDZrm(b?)",
"VMOVUPSZ256rm(b?)",
"VMOVUPSZrm(b?)",
- "VORPDYrm",
- "VORPDZ256rm(b?)",
- "VORPDZrm(b?)",
- "VORPSYrm",
- "VORPSZ256rm(b?)",
- "VORPSZrm(b?)",
"VPADDBYrm",
"VPADDBZ256rm(b?)",
"VPADDBZrm(b?)",
"VPXORDZrm(b?)",
"VPXORQZ256rm(b?)",
"VPXORQZrm(b?)",
- "VPXORYrm",
- "VXORPDYrm",
- "VXORPDZ256rm(b?)",
- "VXORPDZrm(b?)",
- "VXORPSYrm",
- "VXORPSZ256rm(b?)",
- "VXORPSZrm(b?)")>;
+ "VPXORYrm")>;
def SKXWriteResGroup123 : SchedWriteRes<[SKXPort0,SKXPort5,SKXPort23]> {
let Latency = 8;
defm WriteFMA : X86SchedWritePair; // Fused Multiply Add.
defm WriteFMAS : X86SchedWritePair; // Fused Multiply Add (Scalar).
defm WriteFMAY : X86SchedWritePair; // Fused Multiply Add (YMM/ZMM).
-defm WriteFSign : X86SchedWritePair; // Floating point fabs/fchs.
-defm WriteFLogic : X86SchedWritePair; // Floating point and/or/xor logicals.
+defm WriteFSign : X86SchedWritePair; // Floating point fabs/fchs.
+defm WriteFLogic : X86SchedWritePair; // Floating point and/or/xor logicals.
+defm WriteFLogicY : X86SchedWritePair; // Floating point and/or/xor logicals (YMM/ZMM).
defm WriteFShuffle : X86SchedWritePair; // Floating point vector shuffles.
defm WriteFVarShuffle : X86SchedWritePair; // Floating point vector variable shuffles.
defm WriteFBlend : X86SchedWritePair; // Floating point vector blends.
defm : AtomWriteResPair<WriteFSqrt, [AtomPort01], [AtomPort01], 34, 34, [34], [34]>;
defm : AtomWriteResPair<WriteFSign, [AtomPort1], [AtomPort1]>;
defm : AtomWriteResPair<WriteFLogic, [AtomPort01], [AtomPort0]>;
+defm : AtomWriteResPair<WriteFLogicY, [AtomPort01], [AtomPort0]>; // NOTE: Doesn't exist on Atom.
defm : AtomWriteResPair<WriteFShuffle, [AtomPort0], [AtomPort0]>;
defm : AtomWriteResPair<WriteFVarShuffle, [AtomPort0], [AtomPort0]>; // NOTE: Doesn't exist on Atom.
defm : AtomWriteResPair<WriteFMA, [AtomPort0], [AtomPort0]>; // NOTE: Doesn't exist on Atom.
}
}
+multiclass JWriteResYMMPair<X86FoldableSchedWrite SchedRW,
+ list<ProcResourceKind> ExePorts,
+ int Lat, list<int> Res = [2], int UOps = 2> {
+ // Register variant is using a single cycle on ExePort.
+ def : WriteRes<SchedRW, ExePorts> {
+ let Latency = Lat;
+ let ResourceCycles = Res;
+ let NumMicroOps = UOps;
+ }
+
+ // Memory variant also uses 2 cycles on JLAGU and adds 5 cycles to the
+ // latency.
+ def : WriteRes<SchedRW.Folded, !listconcat([JLAGU], ExePorts)> {
+ let Latency = !add(Lat, 5);
+ let ResourceCycles = !listconcat([2], Res);
+ let NumMicroOps = UOps;
+ }
+}
+
// A folded store needs a cycle on the SAGU for the store data.
def : WriteRes<WriteRMW, [JSAGU]>;
defm : JWriteResFpuPair<WriteFSqrt, [JFPU1, JFPM], 21, [1, 21]>;
defm : JWriteResFpuPair<WriteFSign, [JFPU1, JFPM], 2>;
defm : JWriteResFpuPair<WriteFLogic, [JFPU01, JFPX], 1>;
+defm : JWriteResYMMPair<WriteFLogicY, [JFPU01, JFPX], 1, [2, 2], 2>;
defm : JWriteResFpuPair<WriteFShuffle, [JFPU01, JFPX], 1>;
defm : JWriteResFpuPair<WriteFVarShuffle, [JFPU01, JFPX], 2, [1, 4], 3>;
defm : JWriteResFpuPair<WriteFBlend, [JFPU01, JFPX], 1>;
// AVX instructions.
////////////////////////////////////////////////////////////////////////////////
-def JWriteFLogicY: SchedWriteRes<[JFPU01, JFPX]> {
- let ResourceCycles = [2, 2];
- let NumMicroOps = 2;
-}
-def : InstRW<[JWriteFLogicY], (instrs VORPDYrr, VORPSYrr,
- VXORPDYrr, VXORPSYrr,
- VANDPDYrr, VANDPSYrr,
- VANDNPDYrr, VANDNPSYrr)>;
-
-def JWriteFLogicYLd: SchedWriteRes<[JLAGU, JFPU01, JFPX]> {
- let Latency = 6;
- let ResourceCycles = [2, 2, 2];
- let NumMicroOps = 2;
-}
-def : InstRW<[JWriteFLogicYLd, ReadAfterLd], (instrs VORPDYrm, VORPSYrm,
- VXORPDYrm, VXORPSYrm,
- VANDPDYrm, VANDPSYrm,
- VANDNPDYrm, VANDNPSYrm)>;
-
def JWriteVDPPSY: SchedWriteRes<[JFPU1, JFPM, JFPA]> {
let Latency = 12;
let ResourceCycles = [2, 6, 6];
defm : SLMWriteResPair<WriteCvtF2F, [SLM_FPC_RSV01], 4>;
defm : SLMWriteResPair<WriteFSign, [SLM_FPC_RSV01], 1>;
defm : SLMWriteResPair<WriteFLogic, [SLM_FPC_RSV01], 1>;
+defm : SLMWriteResPair<WriteFLogicY, [SLM_FPC_RSV01], 1>;
defm : SLMWriteResPair<WriteFShuffle, [SLM_FPC_RSV0], 1>;
defm : SLMWriteResPair<WriteFVarShuffle, [SLM_FPC_RSV0], 1>;
defm : SLMWriteResPair<WriteFBlend, [SLM_FPC_RSV0], 1>;
defm : ZnWriteResFpuPair<WriteFDiv, [ZnFPU3], 15>;
defm : ZnWriteResFpuPair<WriteFSign, [ZnFPU3], 2>;
defm : ZnWriteResFpuPair<WriteFLogic, [ZnFPU], 1>;
+defm : ZnWriteResFpuPair<WriteFLogicY, [ZnFPU], 1>;
defm : ZnWriteResFpuPair<WriteFShuffle, [ZnFPU12], 1>;
defm : ZnWriteResFpuPair<WriteFVarShuffle, [ZnFPU12], 1>;
defm : ZnWriteResFpuPair<WriteFMul, [ZnFPU0], 5>;
define <8 x i64> @orq_broadcast(<8 x i64> %a) nounwind {
; GENERIC-LABEL: orq_broadcast:
; GENERIC: # %bb.0:
-; GENERIC-NEXT: vorpd {{.*}}(%rip){1to8}, %zmm0, %zmm0 # sched: [7:1.00]
+; GENERIC-NEXT: vorpd {{.*}}(%rip){1to8}, %zmm0, %zmm0 # sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: orq_broadcast:
define <16 x i32> @andd512fold(<16 x i32> %y, <16 x i32>* %x) {
; GENERIC-LABEL: andd512fold:
; GENERIC: # %bb.0: # %entry
-; GENERIC-NEXT: vandps (%rdi), %zmm0, %zmm0 # sched: [7:1.00]
+; GENERIC-NEXT: vandps (%rdi), %zmm0, %zmm0 # sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: andd512fold:
define <8 x i64> @andqbrst(<8 x i64> %p1, i64* %ap) {
; GENERIC-LABEL: andqbrst:
; GENERIC: # %bb.0: # %entry
-; GENERIC-NEXT: vandpd (%rdi){1to8}, %zmm0, %zmm0 # sched: [7:1.00]
+; GENERIC-NEXT: vandpd (%rdi){1to8}, %zmm0, %zmm0 # sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: andqbrst:
define <16 x float> @test_fxor(<16 x float> %a) {
; GENERIC-LABEL: test_fxor:
; GENERIC: # %bb.0:
-; GENERIC-NEXT: vxorps {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [7:1.00]
+; GENERIC-NEXT: vxorps {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_fxor:
define <8 x float> @test_fxor_8f32(<8 x float> %a) {
; GENERIC-LABEL: test_fxor_8f32:
; GENERIC: # %bb.0:
-; GENERIC-NEXT: vxorps {{.*}}(%rip){1to8}, %ymm0, %ymm0 # sched: [7:1.00]
+; GENERIC-NEXT: vxorps {{.*}}(%rip){1to8}, %ymm0, %ymm0 # sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_fxor_8f32:
define <8 x double> @fabs_v8f64(<8 x double> %p)
; GENERIC-LABEL: fabs_v8f64:
; GENERIC: # %bb.0:
-; GENERIC-NEXT: vandpd {{.*}}(%rip){1to8}, %zmm0, %zmm0 # sched: [7:1.00]
+; GENERIC-NEXT: vandpd {{.*}}(%rip){1to8}, %zmm0, %zmm0 # sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: fabs_v8f64:
define <16 x float> @fabs_v16f32(<16 x float> %p)
; GENERIC-LABEL: fabs_v16f32:
; GENERIC: # %bb.0:
-; GENERIC-NEXT: vandps {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [7:1.00]
+; GENERIC-NEXT: vandps {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: fabs_v16f32:
; GENERIC-LABEL: test_x86_fnmsub_ps_z:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vmulps %zmm1, %zmm0, %zmm0 # sched: [5:1.00]
-; GENERIC-NEXT: vxorps {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [7:1.00]
+; GENERIC-NEXT: vxorps {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [8:1.00]
; GENERIC-NEXT: vsubps %zmm2, %zmm0, %zmm0 # sched: [3:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;