The znver1/znver2 overrides shouldn't need 2uops for the xmm case (but znver1 should double-pump for the ymm case).
Found with the help of D138359
//=== Integer MMX and XMM Instructions ===//
def ZnWriteFPU013 : SchedWriteRes<[ZnFPU013]> ;
-def ZnWriteFPU013Y : SchedWriteRes<[ZnFPU013]> {
- let Latency = 2;
-}
def ZnWriteFPU013m : SchedWriteRes<[ZnAGU, ZnFPU013]> {
let Latency = 8;
let NumMicroOps = 2;
}
-def ZnWriteFPU013Ld : SchedWriteRes<[ZnAGU, ZnFPU013]> {
- let Latency = 8;
- let NumMicroOps = 2;
-}
-def ZnWriteFPU013LdY : SchedWriteRes<[ZnAGU, ZnFPU013]> {
- let Latency = 9;
- let NumMicroOps = 2;
-}
-
-// PBLENDW.
-// x,m,i / v,v,m,i
-def : InstRW<[ZnWriteFPU013Ld], (instregex "(V?)PBLENDWrmi")>;
-// y,m,i
-def : InstRW<[ZnWriteFPU013LdY], (instrs VPBLENDWYrmi)>;
def ZnWriteFPU01 : SchedWriteRes<[ZnFPU01]> ;
def ZnWriteFPU01Y : SchedWriteRes<[ZnFPU01]> {
//=== Integer MMX and XMM Instructions ===//
def Zn2WriteFPU013 : SchedWriteRes<[Zn2FPU013]> ;
-def Zn2WriteFPU013Y : SchedWriteRes<[Zn2FPU013]> ;
def Zn2WriteFPU013m : SchedWriteRes<[Zn2AGU, Zn2FPU013]> {
let Latency = 8;
let NumMicroOps = 2;
}
-def Zn2WriteFPU013Ld : SchedWriteRes<[Zn2AGU, Zn2FPU013]> {
- let Latency = 8;
- let NumMicroOps = 2;
-}
-def Zn2WriteFPU013LdY : SchedWriteRes<[Zn2AGU, Zn2FPU013]> {
- let Latency = 8;
- let NumMicroOps = 2;
-}
-
-// PBLENDW.
-// x,m,i / v,v,m,i
-def : InstRW<[Zn2WriteFPU013Ld], (instregex "(V?)PBLENDWrmi")>;
-// y,m,i
-def : InstRW<[Zn2WriteFPU013LdY], (instrs VPBLENDWYrmi)>;
def Zn2WriteFPU01 : SchedWriteRes<[Zn2FPU01]> ;
def Zn2WriteFPU01Y : SchedWriteRes<[Zn2FPU01]> {
# CHECK-NEXT: 1 1 1.00 vpblendvb %xmm3, %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 8 1.00 * vpblendvb %xmm3, (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 1 0.33 vpblendw $11, %xmm0, %xmm1, %xmm2
-# CHECK-NEXT: 2 8 0.50 * vpblendw $11, (%rax), %xmm1, %xmm2
+# CHECK-NEXT: 1 8 0.50 * vpblendw $11, (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 100 0.25 vpclmulqdq $11, %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 100 0.25 * vpclmulqdq $11, (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 1 0.33 vpcmpeqb %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 2 1 2.00 vpblendvb %ymm3, %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 2 8 2.00 * vpblendvb %ymm3, (%rax), %ymm1, %ymm2
# CHECK-NEXT: 2 1 0.67 vpblendw $11, %ymm0, %ymm1, %ymm2
-# CHECK-NEXT: 2 9 0.50 * vpblendw $11, (%rax), %ymm1, %ymm2
+# CHECK-NEXT: 2 8 0.67 * vpblendw $11, (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 1 0.50 vpbroadcastb %xmm0, %xmm0
# CHECK-NEXT: 2 8 1.00 * vpbroadcastb (%rax), %xmm0
# CHECK-NEXT: 2 2 1.00 vpbroadcastb %xmm0, %ymm0
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11]
-# CHECK-NEXT: 66.50 66.50 - - - - - 119.83 238.83 158.00 66.33 -
+# CHECK-NEXT: 66.50 66.50 - - - - - 120.17 239.17 158.00 66.67 -
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] Instructions:
# CHECK-NEXT: - - - - - - - 2.00 - - - - vpblendvb %ymm3, %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 0.50 0.50 - - - - - 2.00 - - - - vpblendvb %ymm3, (%rax), %ymm1, %ymm2
# CHECK-NEXT: - - - - - - - 0.67 0.67 - 0.67 - vpblendw $11, %ymm0, %ymm1, %ymm2
-# CHECK-NEXT: 0.50 0.50 - - - - - 0.33 0.33 - 0.33 - vpblendw $11, (%rax), %ymm1, %ymm2
+# CHECK-NEXT: 0.50 0.50 - - - - - 0.67 0.67 - 0.67 - vpblendw $11, (%rax), %ymm1, %ymm2
# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - vpbroadcastb %xmm0, %xmm0
# CHECK-NEXT: 0.50 0.50 - - - - - - 1.00 1.00 - - vpbroadcastb (%rax), %xmm0
# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - vpbroadcastb %xmm0, %ymm0
# CHECK-NEXT: 1 1 1.00 pblendvb %xmm0, %xmm0, %xmm2
# CHECK-NEXT: 1 8 1.00 * pblendvb %xmm0, (%rax), %xmm2
# CHECK-NEXT: 1 1 0.33 pblendw $11, %xmm0, %xmm2
-# CHECK-NEXT: 2 8 0.50 * pblendw $11, (%rax), %xmm2
+# CHECK-NEXT: 1 8 0.50 * pblendw $11, (%rax), %xmm2
# CHECK-NEXT: 1 1 0.33 pcmpeqq %xmm0, %xmm2
# CHECK-NEXT: 1 8 0.50 * pcmpeqq (%rax), %xmm2
# CHECK-NEXT: 1 2 2.00 pextrb $1, %xmm0, %ecx
# CHECK-NEXT: 1 1 1.00 vpblendvb %xmm3, %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 8 1.00 * vpblendvb %xmm3, (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 1 0.33 vpblendw $11, %xmm0, %xmm1, %xmm2
-# CHECK-NEXT: 2 8 0.33 * vpblendw $11, (%rax), %xmm1, %xmm2
+# CHECK-NEXT: 1 8 0.33 * vpblendw $11, (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 100 0.25 vpclmulqdq $11, %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 100 0.25 * vpclmulqdq $11, (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 1 0.33 vpcmpeqb %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 1 1.00 vpblendvb %ymm3, %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 1 8 1.00 * vpblendvb %ymm3, (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 1 0.33 vpblendw $11, %ymm0, %ymm1, %ymm2
-# CHECK-NEXT: 2 8 0.33 * vpblendw $11, (%rax), %ymm1, %ymm2
+# CHECK-NEXT: 1 8 0.33 * vpblendw $11, (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 1 0.50 vpbroadcastb %xmm0, %xmm0
# CHECK-NEXT: 2 8 1.00 * vpbroadcastb (%rax), %xmm0
# CHECK-NEXT: 1 2 0.50 vpbroadcastb %xmm0, %ymm0
# CHECK-NEXT: 1 1 1.00 pblendvb %xmm0, %xmm0, %xmm2
# CHECK-NEXT: 1 8 1.00 * pblendvb %xmm0, (%rax), %xmm2
# CHECK-NEXT: 1 1 0.33 pblendw $11, %xmm0, %xmm2
-# CHECK-NEXT: 2 8 0.33 * pblendw $11, (%rax), %xmm2
+# CHECK-NEXT: 1 8 0.33 * pblendw $11, (%rax), %xmm2
# CHECK-NEXT: 1 1 0.33 pcmpeqq %xmm0, %xmm2
# CHECK-NEXT: 1 8 0.33 * pcmpeqq (%rax), %xmm2
# CHECK-NEXT: 1 2 2.00 pextrb $1, %xmm0, %ecx