This removes all the HADD/HSUB PS/PD InstRW overrides.
llvm-svn: 331054
defm VHSUBPS : S3D_Int<0x7D, "vhsubps", v4f32, VR128, f128mem,
X86fhsub, WriteFHAdd, loadv4f32, 0>, VEX_4V, VEX_WIG;
defm VHADDPSY : S3D_Int<0x7C, "vhaddps", v8f32, VR256, f256mem,
- X86fhadd, WriteFHAdd, loadv8f32, 0>, VEX_4V, VEX_L, VEX_WIG;
+ X86fhadd, WriteFHAddY, loadv8f32, 0>, VEX_4V, VEX_L, VEX_WIG;
defm VHSUBPSY : S3D_Int<0x7D, "vhsubps", v8f32, VR256, f256mem,
- X86fhsub, WriteFHAdd, loadv8f32, 0>, VEX_4V, VEX_L, VEX_WIG;
+ X86fhsub, WriteFHAddY, loadv8f32, 0>, VEX_4V, VEX_L, VEX_WIG;
}
let ExeDomain = SSEPackedDouble in {
defm VHADDPD : S3_Int<0x7C, "vhaddpd", v2f64, VR128, f128mem,
defm VHSUBPD : S3_Int<0x7D, "vhsubpd", v2f64, VR128, f128mem,
X86fhsub, WriteFHAdd, loadv2f64, 0>, VEX_4V, VEX_WIG;
defm VHADDPDY : S3_Int<0x7C, "vhaddpd", v4f64, VR256, f256mem,
- X86fhadd, WriteFHAdd, loadv4f64, 0>, VEX_4V, VEX_L, VEX_WIG;
+ X86fhadd, WriteFHAddY, loadv4f64, 0>, VEX_4V, VEX_L, VEX_WIG;
defm VHSUBPDY : S3_Int<0x7D, "vhsubpd", v4f64, VR256, f256mem,
- X86fhsub, WriteFHAdd, loadv4f64, 0>, VEX_4V, VEX_L, VEX_WIG;
+ X86fhsub, WriteFHAddY, loadv4f64, 0>, VEX_4V, VEX_L, VEX_WIG;
}
}
////////////////////////////////////////////////////////////////////////////////
defm : BWWriteResPair<WriteFHAdd, [BWPort1,BWPort5], 5, [1,2], 3>;
+defm : BWWriteResPair<WriteFHAddY, [BWPort1,BWPort5], 5, [1,2], 3, 6>;
defm : BWWriteResPair<WritePHAdd, [BWPort5,BWPort15], 3, [2,1], 3>;
// Remaining instrs.
}
def: InstRW<[BWWriteResGroup128], (instregex "VCVTDQ2PDYrm")>;
-def BWWriteResGroup129 : SchedWriteRes<[BWPort1,BWPort5,BWPort23]> {
- let Latency = 11;
- let NumMicroOps = 4;
- let ResourceCycles = [1,2,1];
-}
-def: InstRW<[BWWriteResGroup129], (instregex "VHADDPDYrm",
- "VHADDPSYrm",
- "VHSUBPDYrm",
- "VHSUBPSYrm")>;
-
def BWWriteResGroup130 : SchedWriteRes<[BWPort1,BWPort23,BWPort237,BWPort06,BWPort0156]> {
let Latency = 11;
let NumMicroOps = 6;
////////////////////////////////////////////////////////////////////////////////
defm : HWWriteResPair<WriteFHAdd, [HWPort1, HWPort5], 5, [1,2], 3, 6>;
+defm : HWWriteResPair<WriteFHAddY, [HWPort1, HWPort5], 5, [1,2], 3, 7>;
defm : HWWriteResPair<WritePHAdd, [HWPort5, HWPort15], 3, [2,1], 3, 6>;
//=== Floating Point XMM and YMM Instructions ===//
}
def: InstRW<[HWWriteResGroup95], (instrs IMUL32r, MUL32r, MULX32rr)>;
-def HWWriteResGroup96_1 : SchedWriteRes<[HWPort1,HWPort5,HWPort23]> {
- let Latency = 12;
- let NumMicroOps = 4;
- let ResourceCycles = [1,2,1];
-}
-def: InstRW<[HWWriteResGroup96_1], (instregex "VHADDPDYrm",
- "VHADDPSYrm",
- "VHSUBPDYrm",
- "VHSUBPSYrm")>;
-
def HWWriteResGroup97 : SchedWriteRes<[HWPort0,HWPort1,HWPort5,HWPort23]> {
let Latency = 10;
let NumMicroOps = 4;
// Horizontal add/sub instructions.
////////////////////////////////////////////////////////////////////////////////
-defm : SBWriteResPair<WriteFHAdd, [SBPort1,SBPort5], 5, [1,2], 3, 6>;
-defm : SBWriteResPair<WritePHAdd, [SBPort15], 3, [3], 3, 6>;
+defm : SBWriteResPair<WriteFHAdd, [SBPort1,SBPort5], 5, [1,2], 3, 6>;
+defm : SBWriteResPair<WriteFHAddY, [SBPort1,SBPort5], 5, [1,2], 3, 7>;
+defm : SBWriteResPair<WritePHAdd, [SBPort15], 3, [3], 3, 6>;
////////////////////////////////////////////////////////////////////////////////
// String instructions.
}
def: InstRW<[SBWriteResGroup112], (instregex "(V?)DPPS(Y?)rri")>;
-def SBWriteResGroup113 : SchedWriteRes<[SBPort1,SBPort5,SBPort23]> {
- let Latency = 12;
- let NumMicroOps = 4;
- let ResourceCycles = [1,2,1];
-}
-def: InstRW<[SBWriteResGroup113], (instregex "VH(ADD|SUB)(PD|PS)Yrm")>;
-
def SBWriteResGroup114 : SchedWriteRes<[SBPort1,SBPort23]> {
let Latency = 13;
let NumMicroOps = 3;
// Horizontal add/sub instructions.
////////////////////////////////////////////////////////////////////////////////
-defm : SKLWriteResPair<WriteFHAdd, [SKLPort5,SKLPort01], 6, [2,1], 3, 6>;
+defm : SKLWriteResPair<WriteFHAdd, [SKLPort5,SKLPort01], 6, [2,1], 3, 6>;
+defm : SKLWriteResPair<WriteFHAddY, [SKLPort5,SKLPort01], 6, [2,1], 3, 7>;
defm : SKLWriteResPair<WritePHAdd, [SKLPort15], 1>;
// Remaining instrs.
}
def: InstRW<[SKLWriteResGroup164], (instregex "(V?)DPPS(Y?)rri")>;
-def SKLWriteResGroup165 : SchedWriteRes<[SKLPort5,SKLPort01,SKLPort23]> {
- let Latency = 13;
- let NumMicroOps = 4;
- let ResourceCycles = [2,1,1];
-}
-def: InstRW<[SKLWriteResGroup165], (instregex "VHADDPDYrm",
- "VHADDPSYrm",
- "VHSUBPDYrm",
- "VHSUBPSYrm")>;
-
def SKLWriteResGroup166 : SchedWriteRes<[SKLPort0,SKLFPDivider]> {
let Latency = 14;
let NumMicroOps = 1;
// Horizontal add/sub instructions.
////////////////////////////////////////////////////////////////////////////////
-defm : SKXWriteResPair<WriteFHAdd, [SKXPort5,SKXPort015], 6, [2,1], 3, 6>;
+defm : SKXWriteResPair<WriteFHAdd, [SKXPort5,SKXPort015], 6, [2,1], 3, 6>;
+defm : SKXWriteResPair<WriteFHAddY, [SKXPort5,SKXPort015], 6, [2,1], 3, 7>;
defm : SKXWriteResPair<WritePHAdd, [SKXPort15], 1>;
// Remaining instrs.
let NumMicroOps = 4;
let ResourceCycles = [2,1,1];
}
-def: InstRW<[SKXWriteResGroup183], (instregex "VHADDPDYrm",
- "VHADDPSYrm",
- "VHSUBPDYrm",
- "VHSUBPSYrm",
- "VPERMI2W128rm(b?)",
+def: InstRW<[SKXWriteResGroup183], (instregex "VPERMI2W128rm(b?)",
"VPERMT2W128rm(b?)")>;
def SKXWriteResGroup184 : SchedWriteRes<[SKXPort0,SKXFPDivider]> {
// Horizontal Add/Sub (float and integer)
defm WriteFHAdd : X86SchedWritePair;
-defm WritePHAdd : X86SchedWritePair;
+defm WriteFHAddY : X86SchedWritePair; // YMM/ZMM.
+defm WritePHAdd : X86SchedWritePair;
// Vector integer operations.
def WriteVecLoad : SchedWrite;
// Horizontal add/sub instructions.
////////////////////////////////////////////////////////////////////////////////
-defm : AtomWriteResPair<WriteFHAdd, [AtomPort01], [AtomPort01], 8, 9, [8], [9]>;
-defm : AtomWriteResPair<WritePHAdd, [AtomPort01], [AtomPort01], 7, 8, [7], [8]>;
+defm : AtomWriteResPair<WriteFHAdd, [AtomPort01], [AtomPort01], 8, 9, [8], [9]>;
+defm : AtomWriteResPair<WriteFHAddY, [AtomPort01], [AtomPort01], 8, 9, [8], [9]>;
+defm : AtomWriteResPair<WritePHAdd, [AtomPort01], [AtomPort01], 7, 8, [7], [8]>;
////////////////////////////////////////////////////////////////////////////////
// Carry-less multiplication instructions.
////////////////////////////////////////////////////////////////////////////////
defm : JWriteResFpuPair<WriteFHAdd, [JFPU0, JFPA], 3>;
+defm : JWriteResYMMPair<WriteFHAddY, [JFPU0, JFPA], 3, [2,2], 2>;
defm : JWriteResFpuPair<WritePHAdd, [JFPU01, JVALU], 1>;
-def JWriteFHAddY: SchedWriteRes<[JFPU0, JFPA]> {
- let Latency = 3;
- let ResourceCycles = [2, 2];
- let NumMicroOps = 2;
-}
-def : InstRW<[JWriteFHAddY], (instrs VHADDPDYrr, VHADDPSYrr, VHSUBPDYrr, VHSUBPSYrr)>;
-
-def JWriteFHAddYLd: SchedWriteRes<[JLAGU, JFPU0, JFPA]> {
- let Latency = 8;
- let ResourceCycles = [2, 2, 2];
- let NumMicroOps = 2;
-}
-def : InstRW<[JWriteFHAddYLd, ReadAfterLd], (instrs VHADDPDYrm, VHADDPSYrm,
- VHSUBPDYrm, VHSUBPSYrm)>;
-
////////////////////////////////////////////////////////////////////////////////
// Carry-less multiplication instructions.
////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////
defm : SLMWriteResPair<WriteFHAdd, [SLM_FPC_RSV01], 3, [2]>;
+defm : SLMWriteResPair<WriteFHAddY, [SLM_FPC_RSV01], 3, [2]>;
defm : SLMWriteResPair<WritePHAdd, [SLM_FPC_RSV01], 1>;
// String instructions.
def : WriteRes<WriteFLoad, [ZnAGU]> { let Latency = 8; }
defm : ZnWriteResFpuPair<WriteFHAdd, [ZnFPU0], 3>;
+defm : ZnWriteResFpuPair<WriteFHAddY, [ZnFPU0], 3>;
defm : ZnWriteResFpuPair<WriteFAdd, [ZnFPU0], 3>;
defm : ZnWriteResFpuPair<WriteFCmp, [ZnFPU0], 3>;
defm : ZnWriteResFpuPair<WriteFCom, [ZnFPU0], 3>;