This appears to be a slow down vs Skylake (which the model was copied off) - confirmed with uops.info / instlatx64
Noticed as D138359 was reporting that many of the PACKS overrides were redundant, but were in fact incorrect
"(V?)PALIGNR(Y|Z128|Z256)?rri",
"(V?)PERMIL(PD|PS)(Y|Z128|Z256)?ri",
"(V?)PERMIL(PD|PS)(Y|Z128|Z256)?rr",
- "(V?)PACK(U|S)S(DW|WB)(Y|Z|Z128|Z256)?rr",
"(V?)UNPCK(L|H)(PD|PS)(Y|Z128|Z256)?rr")>;
def ICXWriteResGroup4 : SchedWriteRes<[ICXPort6]> {
"VALIGND(Z|Z128|Z256)rri",
"VALIGNQ(Z|Z128|Z256)rri",
"VPBROADCAST(B|W)rr",
+ "(V?)PACK(U|S)S(DW|WB)(Y|Z|Z128|Z256)?rr",
"VP(MAX|MIN)(S|U)Q(Z|Z128|Z256)rr")>;
def ICXWriteResGroup33 : SchedWriteRes<[ICXPort5]> {
"(V?)PALIGNR(Z128)?rmi",
"(V?)PERMIL(PD|PS)(Z128)?m(b?)i",
"(V?)PERMIL(PD|PS)(Z128)?rm",
- "(V?)PACK(U|S)S(DW|WB)(Z128)?rm",
"(V?)UNPCK(L|H)(PD|PS)(Z128)?rm")>;
def ICXWriteResGroup93 : SchedWriteRes<[ICXPort5,ICXPort01]> {
"(V?)PALIGNR(Y|Z256)rmi",
"(V?)PERMIL(PD|PS)(Y|Z256)m(b?)i",
"(V?)PERMIL(PD|PS)(Y|Z256)rm",
- "(V?)PACK(U|S)S(DW|WB)(Y|Z|Z256)rm",
"(V?)UNPCK(L|H)(PD|PS)(Y|Z256)rm")>;
def: InstRW<[ICXWriteResGroup119], (instrs VPBROADCASTBYrm,
VPBROADCASTWYrm,
"VPCMPQZ128rmi(b?)",
"VPCMPU(B|D|Q|W)Z128rmi(b?)",
"VPCMPWZ128rmi(b?)",
+ "(V?)PACK(U|S)S(DW|WB)(Z128)?rm",
"VPTESTMBZ128rm(b?)",
"VPTESTMDZ128rm(b?)",
"VPTESTMQZ128rm(b?)",
"VPCMPU(B|D|Q|W)Z256rmi(b?)",
"VPCMPU(B|D|Q|W)Zrmi(b?)",
"VPCMPW(Z|Z256)rmi(b?)",
+ "(V?)PACK(U|S)S(DW|WB)(Y|Z|Z256)rm",
"VPTESTM(B|D|Q|W)Z256rm(b?)",
"VPTESTM(B|D|Q|W)Zrm(b?)",
"VPTESTNM(B|D|Q|W)Z256rm(b?)",
# CHECK-NEXT: 2 7 0.50 * vpabsd (%rax), %xmm2
# CHECK-NEXT: 1 1 0.50 vpabsw %xmm0, %xmm2
# CHECK-NEXT: 2 7 0.50 * vpabsw (%rax), %xmm2
-# CHECK-NEXT: 1 1 1.00 vpackssdw %xmm0, %xmm1, %xmm2
-# CHECK-NEXT: 2 7 1.00 * vpackssdw (%rax), %xmm1, %xmm2
-# CHECK-NEXT: 1 1 1.00 vpacksswb %xmm0, %xmm1, %xmm2
-# CHECK-NEXT: 2 7 1.00 * vpacksswb (%rax), %xmm1, %xmm2
-# CHECK-NEXT: 1 1 1.00 vpackusdw %xmm0, %xmm1, %xmm2
-# CHECK-NEXT: 2 7 1.00 * vpackusdw (%rax), %xmm1, %xmm2
-# CHECK-NEXT: 1 1 1.00 vpackuswb %xmm0, %xmm1, %xmm2
-# CHECK-NEXT: 2 7 1.00 * vpackuswb (%rax), %xmm1, %xmm2
+# CHECK-NEXT: 1 3 1.00 vpackssdw %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 2 10 1.00 * vpackssdw (%rax), %xmm1, %xmm2
+# CHECK-NEXT: 1 3 1.00 vpacksswb %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 2 10 1.00 * vpacksswb (%rax), %xmm1, %xmm2
+# CHECK-NEXT: 1 3 1.00 vpackusdw %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 2 10 1.00 * vpackusdw (%rax), %xmm1, %xmm2
+# CHECK-NEXT: 1 3 1.00 vpackuswb %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 2 10 1.00 * vpackuswb (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 1 0.33 vpaddb %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 2 7 0.50 * vpaddb (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 1 0.33 vpaddd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 2 8 0.50 * vpabsd (%rax), %ymm2
# CHECK-NEXT: 1 1 0.50 vpabsw %ymm0, %ymm2
# CHECK-NEXT: 2 8 0.50 * vpabsw (%rax), %ymm2
-# CHECK-NEXT: 1 1 1.00 vpackssdw %ymm0, %ymm1, %ymm2
-# CHECK-NEXT: 2 8 1.00 * vpackssdw (%rax), %ymm1, %ymm2
-# CHECK-NEXT: 1 1 1.00 vpacksswb %ymm0, %ymm1, %ymm2
-# CHECK-NEXT: 2 8 1.00 * vpacksswb (%rax), %ymm1, %ymm2
-# CHECK-NEXT: 1 1 1.00 vpackusdw %ymm0, %ymm1, %ymm2
-# CHECK-NEXT: 2 8 1.00 * vpackusdw (%rax), %ymm1, %ymm2
-# CHECK-NEXT: 1 1 1.00 vpackuswb %ymm0, %ymm1, %ymm2
-# CHECK-NEXT: 2 8 1.00 * vpackuswb (%rax), %ymm1, %ymm2
+# CHECK-NEXT: 1 3 1.00 vpackssdw %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: 2 11 1.00 * vpackssdw (%rax), %ymm1, %ymm2
+# CHECK-NEXT: 1 3 1.00 vpacksswb %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: 2 11 1.00 * vpacksswb (%rax), %ymm1, %ymm2
+# CHECK-NEXT: 1 3 1.00 vpackusdw %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: 2 11 1.00 * vpackusdw (%rax), %ymm1, %ymm2
+# CHECK-NEXT: 1 3 1.00 vpackuswb %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: 2 11 1.00 * vpackuswb (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 1 0.33 vpaddb %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 2 8 0.50 * vpaddb (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 1 0.33 vpaddd %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 2 8 1.00 * vpabsw (%rax), %zmm19 {%k1}
# CHECK-NEXT: 1 1 1.00 vpabsw %zmm16, %zmm19 {%k1} {z}
# CHECK-NEXT: 2 8 1.00 * vpabsw (%rax), %zmm19 {%k1} {z}
-# CHECK-NEXT: 1 1 1.00 vpackssdw %zmm16, %zmm17, %zmm19
-# CHECK-NEXT: 2 8 1.00 * vpackssdw (%rax), %zmm17, %zmm19
-# CHECK-NEXT: 1 1 1.00 vpackssdw %zmm16, %zmm17, %zmm19 {%k1}
-# CHECK-NEXT: 2 8 1.00 * vpackssdw (%rax), %zmm17, %zmm19 {%k1}
-# CHECK-NEXT: 1 1 1.00 vpackssdw %zmm16, %zmm17, %zmm19 {%k1} {z}
-# CHECK-NEXT: 2 8 1.00 * vpackssdw (%rax), %zmm17, %zmm19 {%k1} {z}
-# CHECK-NEXT: 1 1 1.00 vpacksswb %zmm16, %zmm17, %zmm19
-# CHECK-NEXT: 2 8 1.00 * vpacksswb (%rax), %zmm17, %zmm19
-# CHECK-NEXT: 1 1 1.00 vpacksswb %zmm16, %zmm17, %zmm19 {%k1}
-# CHECK-NEXT: 2 8 1.00 * vpacksswb (%rax), %zmm17, %zmm19 {%k1}
-# CHECK-NEXT: 1 1 1.00 vpacksswb %zmm16, %zmm17, %zmm19 {%k1} {z}
-# CHECK-NEXT: 2 8 1.00 * vpacksswb (%rax), %zmm17, %zmm19 {%k1} {z}
-# CHECK-NEXT: 1 1 1.00 vpackusdw %zmm16, %zmm17, %zmm19
-# CHECK-NEXT: 2 8 1.00 * vpackusdw (%rax), %zmm17, %zmm19
-# CHECK-NEXT: 1 1 1.00 vpackusdw %zmm16, %zmm17, %zmm19 {%k1}
-# CHECK-NEXT: 2 8 1.00 * vpackusdw (%rax), %zmm17, %zmm19 {%k1}
-# CHECK-NEXT: 1 1 1.00 vpackusdw %zmm16, %zmm17, %zmm19 {%k1} {z}
-# CHECK-NEXT: 2 8 1.00 * vpackusdw (%rax), %zmm17, %zmm19 {%k1} {z}
-# CHECK-NEXT: 1 1 1.00 vpackuswb %zmm16, %zmm17, %zmm19
-# CHECK-NEXT: 2 8 1.00 * vpackuswb (%rax), %zmm17, %zmm19
-# CHECK-NEXT: 1 1 1.00 vpackuswb %zmm16, %zmm17, %zmm19 {%k1}
-# CHECK-NEXT: 2 8 1.00 * vpackuswb (%rax), %zmm17, %zmm19 {%k1}
-# CHECK-NEXT: 1 1 1.00 vpackuswb %zmm16, %zmm17, %zmm19 {%k1} {z}
-# CHECK-NEXT: 2 8 1.00 * vpackuswb (%rax), %zmm17, %zmm19 {%k1} {z}
+# CHECK-NEXT: 1 3 1.00 vpackssdw %zmm16, %zmm17, %zmm19
+# CHECK-NEXT: 2 11 1.00 * vpackssdw (%rax), %zmm17, %zmm19
+# CHECK-NEXT: 1 3 1.00 vpackssdw %zmm16, %zmm17, %zmm19 {%k1}
+# CHECK-NEXT: 2 11 1.00 * vpackssdw (%rax), %zmm17, %zmm19 {%k1}
+# CHECK-NEXT: 1 3 1.00 vpackssdw %zmm16, %zmm17, %zmm19 {%k1} {z}
+# CHECK-NEXT: 2 11 1.00 * vpackssdw (%rax), %zmm17, %zmm19 {%k1} {z}
+# CHECK-NEXT: 1 3 1.00 vpacksswb %zmm16, %zmm17, %zmm19
+# CHECK-NEXT: 2 11 1.00 * vpacksswb (%rax), %zmm17, %zmm19
+# CHECK-NEXT: 1 3 1.00 vpacksswb %zmm16, %zmm17, %zmm19 {%k1}
+# CHECK-NEXT: 2 11 1.00 * vpacksswb (%rax), %zmm17, %zmm19 {%k1}
+# CHECK-NEXT: 1 3 1.00 vpacksswb %zmm16, %zmm17, %zmm19 {%k1} {z}
+# CHECK-NEXT: 2 11 1.00 * vpacksswb (%rax), %zmm17, %zmm19 {%k1} {z}
+# CHECK-NEXT: 1 3 1.00 vpackusdw %zmm16, %zmm17, %zmm19
+# CHECK-NEXT: 2 11 1.00 * vpackusdw (%rax), %zmm17, %zmm19
+# CHECK-NEXT: 1 3 1.00 vpackusdw %zmm16, %zmm17, %zmm19 {%k1}
+# CHECK-NEXT: 2 11 1.00 * vpackusdw (%rax), %zmm17, %zmm19 {%k1}
+# CHECK-NEXT: 1 3 1.00 vpackusdw %zmm16, %zmm17, %zmm19 {%k1} {z}
+# CHECK-NEXT: 2 11 1.00 * vpackusdw (%rax), %zmm17, %zmm19 {%k1} {z}
+# CHECK-NEXT: 1 3 1.00 vpackuswb %zmm16, %zmm17, %zmm19
+# CHECK-NEXT: 2 11 1.00 * vpackuswb (%rax), %zmm17, %zmm19
+# CHECK-NEXT: 1 3 1.00 vpackuswb %zmm16, %zmm17, %zmm19 {%k1}
+# CHECK-NEXT: 2 11 1.00 * vpackuswb (%rax), %zmm17, %zmm19 {%k1}
+# CHECK-NEXT: 1 3 1.00 vpackuswb %zmm16, %zmm17, %zmm19 {%k1} {z}
+# CHECK-NEXT: 2 11 1.00 * vpackuswb (%rax), %zmm17, %zmm19 {%k1} {z}
# CHECK-NEXT: 1 1 0.33 vpaddb %zmm16, %zmm17, %zmm19
# CHECK-NEXT: 2 8 0.50 * vpaddb (%rax), %zmm17, %zmm19
# CHECK-NEXT: 1 1 0.33 vpaddb %zmm16, %zmm17, %zmm19 {%k1}
# CHECK-NEXT: 2 8 0.50 * vpabsw (%rax), %ymm19 {%k1}
# CHECK-NEXT: 1 1 0.50 vpabsw %ymm16, %ymm19 {%k1} {z}
# CHECK-NEXT: 2 8 0.50 * vpabsw (%rax), %ymm19 {%k1} {z}
-# CHECK-NEXT: 1 1 1.00 vpackssdw %xmm16, %xmm17, %xmm19
-# CHECK-NEXT: 2 7 1.00 * vpackssdw (%rax), %xmm17, %xmm19
-# CHECK-NEXT: 1 1 1.00 vpackssdw %xmm16, %xmm17, %xmm19 {%k1}
-# CHECK-NEXT: 2 7 1.00 * vpackssdw (%rax), %xmm17, %xmm19 {%k1}
-# CHECK-NEXT: 1 1 1.00 vpackssdw %xmm16, %xmm17, %xmm19 {%k1} {z}
-# CHECK-NEXT: 2 7 1.00 * vpackssdw (%rax), %xmm17, %xmm19 {%k1} {z}
-# CHECK-NEXT: 1 1 1.00 vpackssdw %ymm16, %ymm17, %ymm19
-# CHECK-NEXT: 2 8 1.00 * vpackssdw (%rax), %ymm17, %ymm19
-# CHECK-NEXT: 1 1 1.00 vpackssdw %ymm16, %ymm17, %ymm19 {%k1}
-# CHECK-NEXT: 2 8 1.00 * vpackssdw (%rax), %ymm17, %ymm19 {%k1}
-# CHECK-NEXT: 1 1 1.00 vpackssdw %ymm16, %ymm17, %ymm19 {%k1} {z}
-# CHECK-NEXT: 2 8 1.00 * vpackssdw (%rax), %ymm17, %ymm19 {%k1} {z}
-# CHECK-NEXT: 1 1 1.00 vpacksswb %xmm16, %xmm17, %xmm19
-# CHECK-NEXT: 2 7 1.00 * vpacksswb (%rax), %xmm17, %xmm19
-# CHECK-NEXT: 1 1 1.00 vpacksswb %xmm16, %xmm17, %xmm19 {%k1}
-# CHECK-NEXT: 2 7 1.00 * vpacksswb (%rax), %xmm17, %xmm19 {%k1}
-# CHECK-NEXT: 1 1 1.00 vpacksswb %xmm16, %xmm17, %xmm19 {%k1} {z}
-# CHECK-NEXT: 2 7 1.00 * vpacksswb (%rax), %xmm17, %xmm19 {%k1} {z}
-# CHECK-NEXT: 1 1 1.00 vpacksswb %ymm16, %ymm17, %ymm19
-# CHECK-NEXT: 2 8 1.00 * vpacksswb (%rax), %ymm17, %ymm19
-# CHECK-NEXT: 1 1 1.00 vpacksswb %ymm16, %ymm17, %ymm19 {%k1}
-# CHECK-NEXT: 2 8 1.00 * vpacksswb (%rax), %ymm17, %ymm19 {%k1}
-# CHECK-NEXT: 1 1 1.00 vpacksswb %ymm16, %ymm17, %ymm19 {%k1} {z}
-# CHECK-NEXT: 2 8 1.00 * vpacksswb (%rax), %ymm17, %ymm19 {%k1} {z}
-# CHECK-NEXT: 1 1 1.00 vpackusdw %xmm16, %xmm17, %xmm19
-# CHECK-NEXT: 2 7 1.00 * vpackusdw (%rax), %xmm17, %xmm19
-# CHECK-NEXT: 1 1 1.00 vpackusdw %xmm16, %xmm17, %xmm19 {%k1}
-# CHECK-NEXT: 2 7 1.00 * vpackusdw (%rax), %xmm17, %xmm19 {%k1}
-# CHECK-NEXT: 1 1 1.00 vpackusdw %xmm16, %xmm17, %xmm19 {%k1} {z}
-# CHECK-NEXT: 2 7 1.00 * vpackusdw (%rax), %xmm17, %xmm19 {%k1} {z}
-# CHECK-NEXT: 1 1 1.00 vpackusdw %ymm16, %ymm17, %ymm19
-# CHECK-NEXT: 2 8 1.00 * vpackusdw (%rax), %ymm17, %ymm19
-# CHECK-NEXT: 1 1 1.00 vpackusdw %ymm16, %ymm17, %ymm19 {%k1}
-# CHECK-NEXT: 2 8 1.00 * vpackusdw (%rax), %ymm17, %ymm19 {%k1}
-# CHECK-NEXT: 1 1 1.00 vpackusdw %ymm16, %ymm17, %ymm19 {%k1} {z}
-# CHECK-NEXT: 2 8 1.00 * vpackusdw (%rax), %ymm17, %ymm19 {%k1} {z}
-# CHECK-NEXT: 1 1 1.00 vpackuswb %xmm16, %xmm17, %xmm19
-# CHECK-NEXT: 2 7 1.00 * vpackuswb (%rax), %xmm17, %xmm19
-# CHECK-NEXT: 1 1 1.00 vpackuswb %xmm16, %xmm17, %xmm19 {%k1}
-# CHECK-NEXT: 2 7 1.00 * vpackuswb (%rax), %xmm17, %xmm19 {%k1}
-# CHECK-NEXT: 1 1 1.00 vpackuswb %xmm16, %xmm17, %xmm19 {%k1} {z}
-# CHECK-NEXT: 2 7 1.00 * vpackuswb (%rax), %xmm17, %xmm19 {%k1} {z}
-# CHECK-NEXT: 1 1 1.00 vpackuswb %ymm16, %ymm17, %ymm19
-# CHECK-NEXT: 2 8 1.00 * vpackuswb (%rax), %ymm17, %ymm19
-# CHECK-NEXT: 1 1 1.00 vpackuswb %ymm16, %ymm17, %ymm19 {%k1}
-# CHECK-NEXT: 2 8 1.00 * vpackuswb (%rax), %ymm17, %ymm19 {%k1}
-# CHECK-NEXT: 1 1 1.00 vpackuswb %ymm16, %ymm17, %ymm19 {%k1} {z}
-# CHECK-NEXT: 2 8 1.00 * vpackuswb (%rax), %ymm17, %ymm19 {%k1} {z}
+# CHECK-NEXT: 1 3 1.00 vpackssdw %xmm16, %xmm17, %xmm19
+# CHECK-NEXT: 2 10 1.00 * vpackssdw (%rax), %xmm17, %xmm19
+# CHECK-NEXT: 1 3 1.00 vpackssdw %xmm16, %xmm17, %xmm19 {%k1}
+# CHECK-NEXT: 2 10 1.00 * vpackssdw (%rax), %xmm17, %xmm19 {%k1}
+# CHECK-NEXT: 1 3 1.00 vpackssdw %xmm16, %xmm17, %xmm19 {%k1} {z}
+# CHECK-NEXT: 2 10 1.00 * vpackssdw (%rax), %xmm17, %xmm19 {%k1} {z}
+# CHECK-NEXT: 1 3 1.00 vpackssdw %ymm16, %ymm17, %ymm19
+# CHECK-NEXT: 2 11 1.00 * vpackssdw (%rax), %ymm17, %ymm19
+# CHECK-NEXT: 1 3 1.00 vpackssdw %ymm16, %ymm17, %ymm19 {%k1}
+# CHECK-NEXT: 2 11 1.00 * vpackssdw (%rax), %ymm17, %ymm19 {%k1}
+# CHECK-NEXT: 1 3 1.00 vpackssdw %ymm16, %ymm17, %ymm19 {%k1} {z}
+# CHECK-NEXT: 2 11 1.00 * vpackssdw (%rax), %ymm17, %ymm19 {%k1} {z}
+# CHECK-NEXT: 1 3 1.00 vpacksswb %xmm16, %xmm17, %xmm19
+# CHECK-NEXT: 2 10 1.00 * vpacksswb (%rax), %xmm17, %xmm19
+# CHECK-NEXT: 1 3 1.00 vpacksswb %xmm16, %xmm17, %xmm19 {%k1}
+# CHECK-NEXT: 2 10 1.00 * vpacksswb (%rax), %xmm17, %xmm19 {%k1}
+# CHECK-NEXT: 1 3 1.00 vpacksswb %xmm16, %xmm17, %xmm19 {%k1} {z}
+# CHECK-NEXT: 2 10 1.00 * vpacksswb (%rax), %xmm17, %xmm19 {%k1} {z}
+# CHECK-NEXT: 1 3 1.00 vpacksswb %ymm16, %ymm17, %ymm19
+# CHECK-NEXT: 2 11 1.00 * vpacksswb (%rax), %ymm17, %ymm19
+# CHECK-NEXT: 1 3 1.00 vpacksswb %ymm16, %ymm17, %ymm19 {%k1}
+# CHECK-NEXT: 2 11 1.00 * vpacksswb (%rax), %ymm17, %ymm19 {%k1}
+# CHECK-NEXT: 1 3 1.00 vpacksswb %ymm16, %ymm17, %ymm19 {%k1} {z}
+# CHECK-NEXT: 2 11 1.00 * vpacksswb (%rax), %ymm17, %ymm19 {%k1} {z}
+# CHECK-NEXT: 1 3 1.00 vpackusdw %xmm16, %xmm17, %xmm19
+# CHECK-NEXT: 2 10 1.00 * vpackusdw (%rax), %xmm17, %xmm19
+# CHECK-NEXT: 1 3 1.00 vpackusdw %xmm16, %xmm17, %xmm19 {%k1}
+# CHECK-NEXT: 2 10 1.00 * vpackusdw (%rax), %xmm17, %xmm19 {%k1}
+# CHECK-NEXT: 1 3 1.00 vpackusdw %xmm16, %xmm17, %xmm19 {%k1} {z}
+# CHECK-NEXT: 2 10 1.00 * vpackusdw (%rax), %xmm17, %xmm19 {%k1} {z}
+# CHECK-NEXT: 1 3 1.00 vpackusdw %ymm16, %ymm17, %ymm19
+# CHECK-NEXT: 2 11 1.00 * vpackusdw (%rax), %ymm17, %ymm19
+# CHECK-NEXT: 1 3 1.00 vpackusdw %ymm16, %ymm17, %ymm19 {%k1}
+# CHECK-NEXT: 2 11 1.00 * vpackusdw (%rax), %ymm17, %ymm19 {%k1}
+# CHECK-NEXT: 1 3 1.00 vpackusdw %ymm16, %ymm17, %ymm19 {%k1} {z}
+# CHECK-NEXT: 2 11 1.00 * vpackusdw (%rax), %ymm17, %ymm19 {%k1} {z}
+# CHECK-NEXT: 1 3 1.00 vpackuswb %xmm16, %xmm17, %xmm19
+# CHECK-NEXT: 2 10 1.00 * vpackuswb (%rax), %xmm17, %xmm19
+# CHECK-NEXT: 1 3 1.00 vpackuswb %xmm16, %xmm17, %xmm19 {%k1}
+# CHECK-NEXT: 2 10 1.00 * vpackuswb (%rax), %xmm17, %xmm19 {%k1}
+# CHECK-NEXT: 1 3 1.00 vpackuswb %xmm16, %xmm17, %xmm19 {%k1} {z}
+# CHECK-NEXT: 2 10 1.00 * vpackuswb (%rax), %xmm17, %xmm19 {%k1} {z}
+# CHECK-NEXT: 1 3 1.00 vpackuswb %ymm16, %ymm17, %ymm19
+# CHECK-NEXT: 2 11 1.00 * vpackuswb (%rax), %ymm17, %ymm19
+# CHECK-NEXT: 1 3 1.00 vpackuswb %ymm16, %ymm17, %ymm19 {%k1}
+# CHECK-NEXT: 2 11 1.00 * vpackuswb (%rax), %ymm17, %ymm19 {%k1}
+# CHECK-NEXT: 1 3 1.00 vpackuswb %ymm16, %ymm17, %ymm19 {%k1} {z}
+# CHECK-NEXT: 2 11 1.00 * vpackuswb (%rax), %ymm17, %ymm19 {%k1} {z}
# CHECK-NEXT: 1 1 0.33 vpaddb %xmm16, %xmm17, %xmm19
# CHECK-NEXT: 2 7 0.50 * vpaddb (%rax), %xmm17, %xmm19
# CHECK-NEXT: 1 1 0.33 vpaddb %xmm16, %xmm17, %xmm19 {%k1}
# CHECK-NEXT: 2 9 0.50 * mulsd (%rax), %xmm2
# CHECK-NEXT: 1 1 0.33 orpd %xmm0, %xmm2
# CHECK-NEXT: 2 7 0.50 * orpd (%rax), %xmm2
-# CHECK-NEXT: 1 1 1.00 packssdw %xmm0, %xmm2
-# CHECK-NEXT: 2 7 1.00 * packssdw (%rax), %xmm2
-# CHECK-NEXT: 1 1 1.00 packsswb %xmm0, %xmm2
-# CHECK-NEXT: 2 7 1.00 * packsswb (%rax), %xmm2
-# CHECK-NEXT: 1 1 1.00 packuswb %xmm0, %xmm2
-# CHECK-NEXT: 2 7 1.00 * packuswb (%rax), %xmm2
+# CHECK-NEXT: 1 3 1.00 packssdw %xmm0, %xmm2
+# CHECK-NEXT: 2 10 1.00 * packssdw (%rax), %xmm2
+# CHECK-NEXT: 1 3 1.00 packsswb %xmm0, %xmm2
+# CHECK-NEXT: 2 10 1.00 * packsswb (%rax), %xmm2
+# CHECK-NEXT: 1 3 1.00 packuswb %xmm0, %xmm2
+# CHECK-NEXT: 2 10 1.00 * packuswb (%rax), %xmm2
# CHECK-NEXT: 1 1 0.33 paddb %xmm0, %xmm2
# CHECK-NEXT: 2 7 0.50 * paddb (%rax), %xmm2
# CHECK-NEXT: 1 1 0.33 paddd %xmm0, %xmm2
# CHECK-NEXT: 1 6 0.50 * movntdqa (%rax), %xmm2
# CHECK-NEXT: 2 4 2.00 mpsadbw $1, %xmm0, %xmm2
# CHECK-NEXT: 3 10 2.00 * mpsadbw $1, (%rax), %xmm2
-# CHECK-NEXT: 1 1 1.00 packusdw %xmm0, %xmm2
-# CHECK-NEXT: 2 7 1.00 * packusdw (%rax), %xmm2
+# CHECK-NEXT: 1 3 1.00 packusdw %xmm0, %xmm2
+# CHECK-NEXT: 2 10 1.00 * packusdw (%rax), %xmm2
# CHECK-NEXT: 2 2 0.67 pblendvb %xmm0, %xmm0, %xmm2
# CHECK-NEXT: 3 8 0.67 * pblendvb %xmm0, (%rax), %xmm2
# CHECK-NEXT: 1 1 1.00 pblendw $11, %xmm0, %xmm2