This also fixes some of the ReadAfterLd issues due to InstRW.
llvm-svn: 330544
defm : BWWriteResPair<WriteFShuffle, [BWPort5], 1>; // Floating point vector shuffles.
defm : BWWriteResPair<WriteFVarShuffle, [BWPort5], 1>; // Floating point vector variable shuffles.
defm : BWWriteResPair<WriteFBlend, [BWPort015], 1>; // Floating point vector blends.
-defm : BWWriteResPair<WriteFVarBlend, [BWPort5], 2, [2]>; // Fp vector variable blends.
+defm : BWWriteResPair<WriteFVarBlend, [BWPort5], 2, [2], 2, 5>; // Fp vector variable blends.
// FMA Scheduling helper class.
// class FMASC { X86FoldableSchedWrite Sched = WriteFAdd; }
defm : BWWriteResPair<WriteShuffle, [BWPort5], 1>; // Vector shuffles.
defm : BWWriteResPair<WriteVarShuffle, [BWPort5], 1>; // Vector variable shuffles.
defm : BWWriteResPair<WriteBlend, [BWPort15], 1>; // Vector blends.
-defm : BWWriteResPair<WriteVarBlend, [BWPort5], 2, [2]>; // Vector variable blends.
+defm : BWWriteResPair<WriteVarBlend, [BWPort5], 2, [2], 2, 5>; // Vector variable blends.
defm : BWWriteResPair<WriteMPSAD, [BWPort0, BWPort5], 7, [1, 2], 3, 5>; // Vector MPSAD.
defm : BWWriteResPair<WritePSADBW, [BWPort0], 5>; // Vector PSADBW.
let NumMicroOps = 2;
let ResourceCycles = [2];
}
-def: InstRW<[BWWriteResGroup11], (instregex "BLENDVPDrr0",
- "BLENDVPSrr0",
- "MMX_PINSRWrr",
- "PBLENDVBrr0",
- "VBLENDVPD(Y?)rr",
- "VBLENDVPS(Y?)rr",
- "VPBLENDVB(Y?)rr",
+def: InstRW<[BWWriteResGroup11], (instregex "MMX_PINSRWrr",
"(V?)PINSRBrr",
"(V?)PINSRDrr",
"(V?)PINSRQrr",
let NumMicroOps = 3;
let ResourceCycles = [2,1];
}
-def: InstRW<[BWWriteResGroup79], (instregex "BLENDVPDrm0",
- "BLENDVPSrm0",
- "MMX_PACKSSDWirm",
+def: InstRW<[BWWriteResGroup79], (instregex "MMX_PACKSSDWirm",
"MMX_PACKSSWBirm",
"MMX_PACKUSWBirm",
- "PBLENDVBrm0",
- "VBLENDVPDrm",
- "VBLENDVPSrm",
"VMASKMOVPDrm",
"VMASKMOVPSrm",
- "VPBLENDVBrm",
"VPMASKMOVDrm",
"VPMASKMOVQrm")>;
defm : HWWriteResPair<WriteFBlend, [HWPort015], 1>;
defm : HWWriteResPair<WriteFShuffle256, [HWPort5], 3>;
defm : HWWriteResPair<WriteFVarShuffle256, [HWPort5], 3>;
-defm : HWWriteResPair<WriteFVarBlend, [HWPort5], 2, [2]>;
+defm : HWWriteResPair<WriteFVarBlend, [HWPort5], 2, [2], 2, 6>;
// Vector integer operations.
def : WriteRes<WriteVecStore, [HWPort237, HWPort4]>;
defm : HWWriteResPair<WriteBlend, [HWPort15], 1>;
defm : HWWriteResPair<WriteShuffle256, [HWPort5], 3>;
defm : HWWriteResPair<WriteVarShuffle256, [HWPort5], 3>;
-defm : HWWriteResPair<WriteVarBlend, [HWPort5], 2, [2]>;
+defm : HWWriteResPair<WriteVarBlend, [HWPort5], 2, [2], 2, 6>;
defm : HWWriteResPair<WriteVarVecShift, [HWPort0, HWPort5], 2, [2, 1]>;
defm : HWWriteResPair<WriteMPSAD, [HWPort0, HWPort5], 7, [1, 2], 3, 6>;
defm : HWWriteResPair<WritePSADBW, [HWPort0], 5>;
let NumMicroOps = 2;
let ResourceCycles = [2];
}
-def: InstRW<[HWWriteResGroup27], (instregex "BLENDVPDrr0",
- "BLENDVPSrr0",
- "MMX_PINSRWrr",
- "PBLENDVBrr0",
- "VBLENDVPD(Y?)rr",
- "VBLENDVPS(Y?)rr",
- "VPBLENDVB(Y?)rr",
+def: InstRW<[HWWriteResGroup27], (instregex "MMX_PINSRWrr",
"(V?)PINSRBrr",
"(V?)PINSRDrr",
"(V?)PINSRQrr",
let NumMicroOps = 3;
let ResourceCycles = [2,1];
}
-def: InstRW<[HWWriteResGroup36], (instregex "BLENDVPDrm0",
- "BLENDVPSrm0",
- "PBLENDVBrm0",
- "VBLENDVPDrm",
- "VBLENDVPSrm",
- "VMASKMOVPDrm",
+def: InstRW<[HWWriteResGroup36], (instregex "VMASKMOVPDrm",
"VMASKMOVPSrm",
- "VPBLENDVBrm",
"VPMASKMOVDrm",
"VPMASKMOVQrm")>;
defm : SBWriteResPair<WriteFShuffle, [SBPort5], 1>;
defm : SBWriteResPair<WriteFVarShuffle, [SBPort5], 1>;
defm : SBWriteResPair<WriteFBlend, [SBPort05], 1>;
-defm : SBWriteResPair<WriteFVarBlend, [SBPort0, SBPort5], 2>;
+defm : SBWriteResPair<WriteFVarBlend, [SBPort05], 2, [2], 2, 6>;
// Vector integer operations.
def : WriteRes<WriteVecStore, [SBPort23, SBPort4]>;
defm : SBWriteResPair<WriteShuffle, [SBPort5], 1>;
defm : SBWriteResPair<WriteVarShuffle, [SBPort15], 1>;
defm : SBWriteResPair<WriteBlend, [SBPort15], 1>;
-defm : SBWriteResPair<WriteVarBlend, [SBPort1, SBPort5], 2>;
+defm : SBWriteResPair<WriteVarBlend, [SBPort15], 2, [2], 2, 6>;
defm : SBWriteResPair<WriteMPSAD, [SBPort0, SBPort15], 7, [1,2], 3, 6>;
defm : SBWriteResPair<WritePSADBW, [SBPort0], 5>;
let NumMicroOps = 2;
let ResourceCycles = [2];
}
-def: InstRW<[SBWriteResGroup9], (instregex "BLENDVPDrr0",
- "BLENDVPSrr0",
- "ROL(8|16|32|64)r1",
+def: InstRW<[SBWriteResGroup9], (instregex "ROL(8|16|32|64)r1",
"ROL(8|16|32|64)ri",
"ROR(8|16|32|64)r1",
"ROR(8|16|32|64)ri",
- "SET(A|BE)r",
- "VBLENDVPD(Y?)rr",
- "VBLENDVPS(Y?)rr")>;
-
-def SBWriteResGroup10 : SchedWriteRes<[SBPort15]> {
- let Latency = 2;
- let NumMicroOps = 2;
- let ResourceCycles = [2];
-}
-def: InstRW<[SBWriteResGroup10], (instregex "PBLENDVBrr0",
- "VPBLENDVBrr")>;
+ "SET(A|BE)r")>;
def SBWriteResGroup11 : SchedWriteRes<[SBPort015]> {
let Latency = 2;
let NumMicroOps = 3;
let ResourceCycles = [1,2];
}
-def: InstRW<[SBWriteResGroup75], (instregex "BLENDVPDrm0",
- "BLENDVPSrm0",
- "VBLENDVPDrm",
- "VBLENDVPSrm",
- "VMASKMOVPDrm",
+def: InstRW<[SBWriteResGroup75], (instregex "VMASKMOVPDrm",
"VMASKMOVPSrm")>;
-def SBWriteResGroup76 : SchedWriteRes<[SBPort23,SBPort15]> {
- let Latency = 8;
- let NumMicroOps = 3;
- let ResourceCycles = [1,2];
-}
-def: InstRW<[SBWriteResGroup76], (instregex "PBLENDVBrm0",
- "VPBLENDVBrm")>;
-
def SBWriteResGroup77 : SchedWriteRes<[SBPort0,SBPort1,SBPort23]> {
let Latency = 8;
let NumMicroOps = 3;
defm : SKLWriteResPair<WriteFShuffle, [SKLPort5], 1>; // Floating point vector shuffles.
defm : SKLWriteResPair<WriteFVarShuffle, [SKLPort5], 1>; // Floating point vector shuffles.
defm : SKLWriteResPair<WriteFBlend, [SKLPort015], 1>; // Floating point vector blends.
-defm : SKLWriteResPair<WriteFVarBlend, [SKLPort5], 2, [2]>; // Fp vector variable blends.
+defm : SKLWriteResPair<WriteFVarBlend, [SKLPort015], 2, [2], 2, 6>; // Fp vector variable blends.
// FMA Scheduling helper class.
// class FMASC { X86FoldableSchedWrite Sched = WriteFAdd; }
defm : SKLWriteResPair<WriteShuffle, [SKLPort5], 1>; // Vector shuffles.
defm : SKLWriteResPair<WriteVarShuffle, [SKLPort5], 1>; // Vector shuffles.
defm : SKLWriteResPair<WriteBlend, [SKLPort15], 1>; // Vector blends.
-defm : SKLWriteResPair<WriteVarBlend, [SKLPort5], 2, [2]>; // Vector variable blends.
+defm : SKLWriteResPair<WriteVarBlend, [SKLPort015], 2, [2], 2, 6>; // Vector variable blends.
defm : SKLWriteResPair<WriteMPSAD, [SKLPort5], 4, [2], 2, 6>; // Vector MPSAD.
defm : SKLWriteResPair<WritePSADBW, [SKLPort5], 3>; // Vector PSADBW.
"ROR(8|16|32|64)ri",
"SET(A|BE)r")>;
-def SKLWriteResGroup16 : SchedWriteRes<[SKLPort015]> {
- let Latency = 2;
- let NumMicroOps = 2;
- let ResourceCycles = [2];
-}
-def: InstRW<[SKLWriteResGroup16], (instregex "BLENDVPDrr0",
- "BLENDVPSrr0",
- "PBLENDVBrr0",
- "VBLENDVPD(Y?)rr",
- "VBLENDVPS(Y?)rr",
- "VPBLENDVB(Y?)rr")>;
-
def SKLWriteResGroup17 : SchedWriteRes<[SKLPort0156]> {
let Latency = 2;
let NumMicroOps = 2;
"VXORPDYrm",
"VXORPSYrm")>;
-def SKLWriteResGroup111 : SchedWriteRes<[SKLPort23,SKLPort015]> {
- let Latency = 8;
- let NumMicroOps = 3;
- let ResourceCycles = [1,2];
-}
-def: InstRW<[SKLWriteResGroup111], (instregex "BLENDVPDrm0",
- "BLENDVPSrm0",
- "PBLENDVBrm0",
- "VBLENDVPDrm",
- "VBLENDVPSrm",
- "VPBLENDVB(Y?)rm")>;
-
def SKLWriteResGroup112 : SchedWriteRes<[SKLPort0,SKLPort5,SKLPort23]> {
let Latency = 8;
let NumMicroOps = 4;
defm : SKXWriteResPair<WriteFShuffle, [SKXPort5], 1>; // Floating point vector shuffles.
defm : SKXWriteResPair<WriteFVarShuffle, [SKXPort5], 1>; // Floating point vector variable shuffles.
defm : SKXWriteResPair<WriteFBlend, [SKXPort015], 1>; // Floating point vector blends.
-defm : SKXWriteResPair<WriteFVarBlend, [SKXPort5], 2, [2]>; // Fp vector variable blends.
+defm : SKXWriteResPair<WriteFVarBlend, [SKXPort015], 2, [2], 2, 6>; // Fp vector variable blends.
// FMA Scheduling helper class.
// class FMASC { X86FoldableSchedWrite Sched = WriteFAdd; }
defm : SKXWriteResPair<WriteShuffle, [SKXPort5], 1>; // Vector shuffles.
defm : SKXWriteResPair<WriteVarShuffle, [SKXPort5], 1>; // Vector variable shuffles.
defm : SKXWriteResPair<WriteBlend, [SKXPort15], 1>; // Vector blends.
-defm : SKXWriteResPair<WriteVarBlend, [SKXPort5], 2, [2]>; // Vector variable blends.
+defm : SKXWriteResPair<WriteVarBlend, [SKXPort015], 2, [2], 2, 6>; // Vector variable blends.
defm : SKXWriteResPair<WriteMPSAD, [SKXPort5], 4, [2], 2, 6>; // Vector MPSAD.
defm : SKXWriteResPair<WritePSADBW, [SKXPort5], 3, [1,1], 1, 6>; // Vector PSADBW.
"ROR(8|16|32|64)ri",
"SET(A|BE)r")>;
-def SKXWriteResGroup16 : SchedWriteRes<[SKXPort015]> {
- let Latency = 2;
- let NumMicroOps = 2;
- let ResourceCycles = [2];
-}
-def: InstRW<[SKXWriteResGroup16], (instregex "BLENDVPDrr0",
- "BLENDVPSrr0",
- "PBLENDVBrr0",
- "VBLENDVPDYrr",
- "VBLENDVPDrr",
- "VBLENDVPSYrr",
- "VBLENDVPSrr",
- "VPBLENDVBYrr",
- "VPBLENDVBrr")>;
-
def SKXWriteResGroup17 : SchedWriteRes<[SKXPort0156]> {
let Latency = 2;
let NumMicroOps = 2;
"VXORPSZ256rm(b?)",
"VXORPSZrm(b?)")>;
-def SKXWriteResGroup122 : SchedWriteRes<[SKXPort23,SKXPort015]> {
- let Latency = 8;
- let NumMicroOps = 3;
- let ResourceCycles = [1,2];
-}
-def: InstRW<[SKXWriteResGroup122], (instregex "BLENDVPDrm0",
- "BLENDVPSrm0",
- "PBLENDVBrm0",
- "VBLENDVPDrm",
- "VBLENDVPSrm",
- "VPBLENDVBYrm",
- "VPBLENDVBrm")>;
-
def SKXWriteResGroup123 : SchedWriteRes<[SKXPort0,SKXPort5,SKXPort23]> {
let Latency = 8;
let NumMicroOps = 4;
; GENERIC-LABEL: test_pblendvb:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0 # sched: [2:1.00]
-; GENERIC-NEXT: vpblendvb %ymm3, (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
+; GENERIC-NEXT: vpblendvb %ymm3, (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: test_pblendvb:
+# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=sandybridge -iterations=1 -timeline -resource-pressure=false < %s | FileCheck %s -check-prefix=SANDY
# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=ivybridge -iterations=1 -timeline -resource-pressure=false < %s | FileCheck %s -check-prefix=IVY
vblendvps %xmm1, (%rdi), %xmm2, %xmm3
-# SANDY: [0,0] DeeeER . . vaddps %xmm0, %xmm0, %xmm1
-# SANDY-NEXT: [0,1] D===eeeeeeeeER vblendvps %xmm1, (%rdi), %xmm2, %xmm3
+# SANDY: [0,0] DeeeER . vaddps %xmm0, %xmm0, %xmm1
+# SANDY-NEXT: [0,1] DeeeeeeeeER vblendvps %xmm1, (%rdi), %xmm2, %xmm3
-# IVY: [0,0] DeeeER . . vaddps %xmm0, %xmm0, %xmm1
-# IVY-NEXT: [0,1] D===eeeeeeeeER vblendvps %xmm1, (%rdi), %xmm2, %xmm3
+# IVY: [0,0] DeeeER . vaddps %xmm0, %xmm0, %xmm1
+# IVY-NEXT: [0,1] DeeeeeeeeER vblendvps %xmm1, (%rdi), %xmm2, %xmm3
-# HASWELL: [0,0] DeeeER . . vaddps %xmm0, %xmm0, %xmm1
-# HASWELL-NEXT: [0,1] D===eeeeeeeeER vblendvps %xmm1, (%rdi), %xmm2, %xmm3
+# HASWELL: [0,0] DeeeER . vaddps %xmm0, %xmm0, %xmm1
+# HASWELL-NEXT: [0,1] DeeeeeeeeER vblendvps %xmm1, (%rdi), %xmm2, %xmm3
-# BDWELL: [0,0] DeeeER . . vaddps %xmm0, %xmm0, %xmm1
-# BDWELL-NEXT: [0,1] D===eeeeeeeER vblendvps %xmm1, (%rdi), %xmm2, %xmm3
+# BDWELL: [0,0] DeeeER . vaddps %xmm0, %xmm0, %xmm1
+# BDWELL-NEXT: [0,1] DeeeeeeeER vblendvps %xmm1, (%rdi), %xmm2, %xmm3
-# SKYLAKE: [0,0] DeeeeER . . vaddps %xmm0, %xmm0, %xmm1
-# SKYLAKE-NEXT: [0,1] D====eeeeeeeeER vblendvps %xmm1, (%rdi), %xmm2, %xmm3
+# SKYLAKE: [0,0] DeeeeER . vaddps %xmm0, %xmm0, %xmm1
+# SKYLAKE-NEXT: [0,1] DeeeeeeeeER vblendvps %xmm1, (%rdi), %xmm2, %xmm3
-# BTVER2: [0,0] DeeeER . vaddps %xmm0, %xmm0, %xmm1
-# BTVER2-NEXT: [0,1] .DeeeeeeeER vblendvps %xmm1, (%rdi), %xmm2, %xmm3
+# BTVER2: [0,0] DeeeER . vaddps %xmm0, %xmm0, %xmm1
+# BTVER2-NEXT: [0,1] .DeeeeeeeER vblendvps %xmm1, (%rdi), %xmm2, %xmm3
-# ZNVER1: [0,0] DeeeER . vaddps %xmm0, %xmm0, %xmm1
-# ZNVER1-NEXT: [0,1] DeeeeeeeeER vblendvps %xmm1, (%rdi), %xmm2, %xmm3
+# ZNVER1: [0,0] DeeeER . vaddps %xmm0, %xmm0, %xmm1
+# ZNVER1-NEXT: [0,1] DeeeeeeeeER vblendvps %xmm1, (%rdi), %xmm2, %xmm3
+# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=sandybridge -iterations=1 -timeline -resource-pressure=false < %s | FileCheck %s -check-prefix=SANDY
# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=ivybridge -iterations=1 -timeline -resource-pressure=false < %s | FileCheck %s -check-prefix=IVY
vblendvps %xmm1, (%rdi), %xmm2, %xmm3
-# SANDY: [0,0] DeeeER . . vaddps %xmm0, %xmm0, %xmm2
-# SANDY-NEXT: [0,1] D===eeeeeeeeER vblendvps %xmm1, (%rdi), %xmm2, %xmm3
+# SANDY: [0,0] DeeeER . vaddps %xmm0, %xmm0, %xmm2
+# SANDY-NEXT: [0,1] DeeeeeeeeER vblendvps %xmm1, (%rdi), %xmm2, %xmm3
-# IVY: [0,0] DeeeER . . vaddps %xmm0, %xmm0, %xmm2
-# IVY-NEXT: [0,1] D===eeeeeeeeER vblendvps %xmm1, (%rdi), %xmm2, %xmm3
+# IVY: [0,0] DeeeER . vaddps %xmm0, %xmm0, %xmm2
+# IVY-NEXT: [0,1] DeeeeeeeeER vblendvps %xmm1, (%rdi), %xmm2, %xmm3
-# HASWELL: [0,0] DeeeER . . vaddps %xmm0, %xmm0, %xmm2
-# HASWELL-NEXT: [0,1] D===eeeeeeeeER vblendvps %xmm1, (%rdi), %xmm2, %xmm3
+# HASWELL: [0,0] DeeeER . vaddps %xmm0, %xmm0, %xmm2
+# HASWELL-NEXT: [0,1] DeeeeeeeeER vblendvps %xmm1, (%rdi), %xmm2, %xmm3
-# BDWELL: [0,0] DeeeER . . vaddps %xmm0, %xmm0, %xmm2
-# BDWELL-NEXT: [0,1] D===eeeeeeeER vblendvps %xmm1, (%rdi), %xmm2, %xmm3
+# BDWELL: [0,0] DeeeER . vaddps %xmm0, %xmm0, %xmm2
+# BDWELL-NEXT: [0,1] DeeeeeeeER vblendvps %xmm1, (%rdi), %xmm2, %xmm3
-# SKYLAKE: [0,0] DeeeeER . . vaddps %xmm0, %xmm0, %xmm2
-# SKYLAKE-NEXT: [0,1] D====eeeeeeeeER vblendvps %xmm1, (%rdi), %xmm2, %xmm3
+# SKYLAKE: [0,0] DeeeeER . vaddps %xmm0, %xmm0, %xmm2
+# SKYLAKE-NEXT: [0,1] DeeeeeeeeER vblendvps %xmm1, (%rdi), %xmm2, %xmm3
-# BTVER2: [0,0] DeeeER . vaddps %xmm0, %xmm0, %xmm2
-# BTVER2-NEXT: [0,1] .DeeeeeeeER vblendvps %xmm1, (%rdi), %xmm2, %xmm3
+# BTVER2: [0,0] DeeeER . vaddps %xmm0, %xmm0, %xmm2
+# BTVER2-NEXT: [0,1] .DeeeeeeeER vblendvps %xmm1, (%rdi), %xmm2, %xmm3
-# ZNVER1: [0,0] DeeeER . vaddps %xmm0, %xmm0, %xmm2
-# ZNVER1-NEXT: [0,1] DeeeeeeeeER vblendvps %xmm1, (%rdi), %xmm2, %xmm3
+# ZNVER1: [0,0] DeeeER . vaddps %xmm0, %xmm0, %xmm2
+# ZNVER1-NEXT: [0,1] DeeeeeeeeER vblendvps %xmm1, (%rdi), %xmm2, %xmm3