We still use i8 for the load/store type. So we need to convert to/from i16 to around the mask type.
By doing this we get an i8->i16 extload which we can then pattern match to a KMOVW if the access is aligned.
llvm-svn: 350989
assert(Subtarget.hasAVX512() && !Subtarget.hasDQI() &&
"Expected AVX512F without AVX512DQI");
- StoredVal = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, MVT::v8i1,
- DAG.getUNDEF(MVT::v8i1), StoredVal,
+ StoredVal = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, MVT::v16i1,
+ DAG.getUNDEF(MVT::v16i1), StoredVal,
DAG.getIntPtrConstant(0, dl));
- StoredVal = DAG.getBitcast(MVT::i8, StoredVal);
+ StoredVal = DAG.getBitcast(MVT::i16, StoredVal);
+ StoredVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, StoredVal);
return DAG.getStore(St->getChain(), dl, StoredVal, St->getBasePtr(),
St->getPointerInfo(), St->getAlignment(),
// Replace chain users with the new chain.
assert(NewLd->getNumValues() == 2 && "Loads must carry a chain!");
- SDValue Extract = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, RegVT,
- DAG.getBitcast(MVT::v8i1, NewLd),
- DAG.getIntPtrConstant(0, dl));
- return DAG.getMergeValues({Extract, NewLd.getValue(1)}, dl);
+ SDValue Val = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i16, NewLd);
+ Val = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, RegVT,
+ DAG.getBitcast(MVT::v16i1, Val),
+ DAG.getIntPtrConstant(0, dl));
+ return DAG.getMergeValues({Val, NewLd.getValue(1)}, dl);
}
// Nothing useful we can do without SSE2 shuffles.
let Predicates = [HasAVX512] in {
def : Pat<(v8i1 (bitconvert (i8 (load addr:$src)))),
(COPY_TO_REGCLASS (MOVZX32rm8 addr:$src), VK8)>;
+ def : Pat<(v16i1 (bitconvert (loadi16 addr:$src))),
+ (KMOVWkm addr:$src)>;
}
def X86kextract : SDNode<"ISD::EXTRACT_VECTOR_ELT",
;
; AVX512NOTDQ-LABEL: load_v8i1_broadcast_4_v2i1:
; AVX512NOTDQ: # %bb.0:
-; AVX512NOTDQ-NEXT: movzbl 4(%rdi), %eax
-; AVX512NOTDQ-NEXT: kmovd %eax, %k1
+; AVX512NOTDQ-NEXT: kmovw 4(%rdi), %k1
; AVX512NOTDQ-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
; AVX512NOTDQ-NEXT: vmovdqa64 %xmm2, %xmm2 {%k1} {z}
; AVX512NOTDQ-NEXT: vpbroadcastq %xmm2, %xmm2
;
; AVX512NOTDQ-LABEL: load_v8i1_broadcast_7_v2i1:
; AVX512NOTDQ: # %bb.0:
-; AVX512NOTDQ-NEXT: movzbl 6(%rdi), %eax
-; AVX512NOTDQ-NEXT: kmovd %eax, %k1
+; AVX512NOTDQ-NEXT: kmovw 6(%rdi), %k1
; AVX512NOTDQ-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
; AVX512NOTDQ-NEXT: vmovdqa64 %xmm2, %xmm2 {%k1} {z}
; AVX512NOTDQ-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[2,3,2,3]
;
; AVX512NOTDQ-LABEL: load_v16i1_broadcast_8_v2i1:
; AVX512NOTDQ: # %bb.0:
-; AVX512NOTDQ-NEXT: movzbl 8(%rdi), %eax
-; AVX512NOTDQ-NEXT: kmovd %eax, %k1
+; AVX512NOTDQ-NEXT: kmovw 8(%rdi), %k1
; AVX512NOTDQ-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
; AVX512NOTDQ-NEXT: vmovdqa64 %xmm2, %xmm2 {%k1} {z}
; AVX512NOTDQ-NEXT: vpbroadcastq %xmm2, %xmm2
;
; AVX512NOTDQ-LABEL: load_v16i1_broadcast_8_v4i1:
; AVX512NOTDQ: # %bb.0:
-; AVX512NOTDQ-NEXT: movzbl 8(%rdi), %eax
-; AVX512NOTDQ-NEXT: kmovd %eax, %k1
+; AVX512NOTDQ-NEXT: kmovw 8(%rdi), %k1
; AVX512NOTDQ-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
; AVX512NOTDQ-NEXT: vmovdqa32 %xmm2, %xmm2 {%k1} {z}
; AVX512NOTDQ-NEXT: vpbroadcastd %xmm2, %xmm2
;
; AVX512NOTDQ-LABEL: load_v16i1_broadcast_15_v2i1:
; AVX512NOTDQ: # %bb.0:
-; AVX512NOTDQ-NEXT: movzbl 14(%rdi), %eax
-; AVX512NOTDQ-NEXT: kmovd %eax, %k1
+; AVX512NOTDQ-NEXT: kmovw 14(%rdi), %k1
; AVX512NOTDQ-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
; AVX512NOTDQ-NEXT: vmovdqa64 %xmm2, %xmm2 {%k1} {z}
; AVX512NOTDQ-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[2,3,2,3]
;
; AVX512NOTDQ-LABEL: load_v16i1_broadcast_15_v4i1:
; AVX512NOTDQ: # %bb.0:
-; AVX512NOTDQ-NEXT: movzbl 12(%rdi), %eax
-; AVX512NOTDQ-NEXT: kmovd %eax, %k1
+; AVX512NOTDQ-NEXT: kmovw 12(%rdi), %k1
; AVX512NOTDQ-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
; AVX512NOTDQ-NEXT: vmovdqa32 %xmm2, %xmm2 {%k1} {z}
; AVX512NOTDQ-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[3,3,3,3]
;
; AVX512NOTDQ-LABEL: load_v32i1_broadcast_16_v2i1:
; AVX512NOTDQ: # %bb.0:
-; AVX512NOTDQ-NEXT: movzbl 16(%rdi), %eax
-; AVX512NOTDQ-NEXT: kmovd %eax, %k1
+; AVX512NOTDQ-NEXT: kmovw 16(%rdi), %k1
; AVX512NOTDQ-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
; AVX512NOTDQ-NEXT: vmovdqa64 %xmm2, %xmm2 {%k1} {z}
; AVX512NOTDQ-NEXT: vpbroadcastq %xmm2, %xmm2
;
; AVX512NOTDQ-LABEL: load_v32i1_broadcast_16_v4i1:
; AVX512NOTDQ: # %bb.0:
-; AVX512NOTDQ-NEXT: movzbl 16(%rdi), %eax
-; AVX512NOTDQ-NEXT: kmovd %eax, %k1
+; AVX512NOTDQ-NEXT: kmovw 16(%rdi), %k1
; AVX512NOTDQ-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
; AVX512NOTDQ-NEXT: vmovdqa32 %xmm2, %xmm2 {%k1} {z}
; AVX512NOTDQ-NEXT: vpbroadcastd %xmm2, %xmm2
;
; AVX512NOTDQ-LABEL: load_v32i1_broadcast_16_v8i1:
; AVX512NOTDQ: # %bb.0:
-; AVX512NOTDQ-NEXT: movzbl 16(%rdi), %eax
-; AVX512NOTDQ-NEXT: kmovd %eax, %k1
+; AVX512NOTDQ-NEXT: kmovw 16(%rdi), %k1
; AVX512NOTDQ-NEXT: vpcmpeqd %ymm2, %ymm2, %ymm2
; AVX512NOTDQ-NEXT: vmovdqa32 %ymm2, %ymm2 {%k1} {z}
; AVX512NOTDQ-NEXT: vpbroadcastd %xmm2, %ymm2
;
; AVX512NOTDQ-LABEL: load_v32i1_broadcast_31_v2i1:
; AVX512NOTDQ: # %bb.0:
-; AVX512NOTDQ-NEXT: movzbl 30(%rdi), %eax
-; AVX512NOTDQ-NEXT: kmovd %eax, %k1
+; AVX512NOTDQ-NEXT: kmovw 30(%rdi), %k1
; AVX512NOTDQ-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
; AVX512NOTDQ-NEXT: vmovdqa64 %xmm2, %xmm2 {%k1} {z}
; AVX512NOTDQ-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[2,3,2,3]
;
; AVX512NOTDQ-LABEL: load_v32i1_broadcast_31_v4i1:
; AVX512NOTDQ: # %bb.0:
-; AVX512NOTDQ-NEXT: movzbl 28(%rdi), %eax
-; AVX512NOTDQ-NEXT: kmovd %eax, %k1
+; AVX512NOTDQ-NEXT: kmovw 28(%rdi), %k1
; AVX512NOTDQ-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
; AVX512NOTDQ-NEXT: vmovdqa32 %xmm2, %xmm2 {%k1} {z}
; AVX512NOTDQ-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[3,3,3,3]
;
; AVX512NOTDQ-LABEL: load_v32i1_broadcast_31_v8i1:
; AVX512NOTDQ: # %bb.0:
-; AVX512NOTDQ-NEXT: movzbl 24(%rdi), %eax
-; AVX512NOTDQ-NEXT: kmovd %eax, %k1
+; AVX512NOTDQ-NEXT: kmovw 24(%rdi), %k1
; AVX512NOTDQ-NEXT: vpcmpeqd %ymm2, %ymm2, %ymm2
; AVX512NOTDQ-NEXT: vmovdqa32 %ymm2, %ymm2 {%k1} {z}
; AVX512NOTDQ-NEXT: vpbroadcastd {{.*#+}} ymm3 = [7,7,7,7,7,7,7,7]
;
; AVX512NOTDQ-LABEL: load_v64i1_broadcast_32_v2i1:
; AVX512NOTDQ: # %bb.0:
-; AVX512NOTDQ-NEXT: movzbl 32(%rdi), %eax
-; AVX512NOTDQ-NEXT: kmovd %eax, %k1
+; AVX512NOTDQ-NEXT: kmovw 32(%rdi), %k1
; AVX512NOTDQ-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
; AVX512NOTDQ-NEXT: vmovdqa64 %xmm2, %xmm2 {%k1} {z}
; AVX512NOTDQ-NEXT: vpbroadcastq %xmm2, %xmm2
;
; AVX512NOTDQ-LABEL: load_v64i1_broadcast_32_v4i1:
; AVX512NOTDQ: # %bb.0:
-; AVX512NOTDQ-NEXT: movzbl 32(%rdi), %eax
-; AVX512NOTDQ-NEXT: kmovd %eax, %k1
+; AVX512NOTDQ-NEXT: kmovw 32(%rdi), %k1
; AVX512NOTDQ-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
; AVX512NOTDQ-NEXT: vmovdqa32 %xmm2, %xmm2 {%k1} {z}
; AVX512NOTDQ-NEXT: vpbroadcastd %xmm2, %xmm2
;
; AVX512NOTDQ-LABEL: load_v64i1_broadcast_32_v8i1:
; AVX512NOTDQ: # %bb.0:
-; AVX512NOTDQ-NEXT: movzbl 32(%rdi), %eax
-; AVX512NOTDQ-NEXT: kmovd %eax, %k1
+; AVX512NOTDQ-NEXT: kmovw 32(%rdi), %k1
; AVX512NOTDQ-NEXT: vpcmpeqd %ymm2, %ymm2, %ymm2
; AVX512NOTDQ-NEXT: vmovdqa32 %ymm2, %ymm2 {%k1} {z}
; AVX512NOTDQ-NEXT: vpbroadcastd %xmm2, %ymm2
;
; AVX512NOTDQ-LABEL: load_v64i1_broadcast_63_v2i1:
; AVX512NOTDQ: # %bb.0:
-; AVX512NOTDQ-NEXT: movzbl 62(%rdi), %eax
-; AVX512NOTDQ-NEXT: kmovd %eax, %k1
+; AVX512NOTDQ-NEXT: kmovw 62(%rdi), %k1
; AVX512NOTDQ-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
; AVX512NOTDQ-NEXT: vmovdqa64 %xmm2, %xmm2 {%k1} {z}
; AVX512NOTDQ-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[2,3,2,3]
;
; AVX512NOTDQ-LABEL: load_v64i1_broadcast_63_v4i1:
; AVX512NOTDQ: # %bb.0:
-; AVX512NOTDQ-NEXT: movzbl 60(%rdi), %eax
-; AVX512NOTDQ-NEXT: kmovd %eax, %k1
+; AVX512NOTDQ-NEXT: kmovw 60(%rdi), %k1
; AVX512NOTDQ-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
; AVX512NOTDQ-NEXT: vmovdqa32 %xmm2, %xmm2 {%k1} {z}
; AVX512NOTDQ-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[3,3,3,3]
;
; AVX512NOTDQ-LABEL: load_v64i1_broadcast_63_v8i1:
; AVX512NOTDQ: # %bb.0:
-; AVX512NOTDQ-NEXT: movzbl 56(%rdi), %eax
-; AVX512NOTDQ-NEXT: kmovd %eax, %k1
+; AVX512NOTDQ-NEXT: kmovw 56(%rdi), %k1
; AVX512NOTDQ-NEXT: vpcmpeqd %ymm2, %ymm2, %ymm2
; AVX512NOTDQ-NEXT: vmovdqa32 %ymm2, %ymm2 {%k1} {z}
; AVX512NOTDQ-NEXT: vpbroadcastd {{.*#+}} ymm3 = [7,7,7,7,7,7,7,7]
;
; AVX512NOTDQ-LABEL: load_v8i1_broadcast_4_v2i1_store:
; AVX512NOTDQ: # %bb.0:
-; AVX512NOTDQ-NEXT: movzbl 4(%rdi), %eax
-; AVX512NOTDQ-NEXT: kmovd %eax, %k1
+; AVX512NOTDQ-NEXT: kmovw 4(%rdi), %k1
; AVX512NOTDQ-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
; AVX512NOTDQ-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
; AVX512NOTDQ-NEXT: vpbroadcastq %xmm0, %xmm0
;
; AVX512NOTDQ-LABEL: load_v8i1_broadcast_7_v2i1_store:
; AVX512NOTDQ: # %bb.0:
-; AVX512NOTDQ-NEXT: movzbl 6(%rdi), %eax
-; AVX512NOTDQ-NEXT: kmovd %eax, %k1
+; AVX512NOTDQ-NEXT: kmovw 6(%rdi), %k1
; AVX512NOTDQ-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
; AVX512NOTDQ-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
; AVX512NOTDQ-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
;
; AVX512NOTDQ-LABEL: load_v16i1_broadcast_8_v2i1_store:
; AVX512NOTDQ: # %bb.0:
-; AVX512NOTDQ-NEXT: movzbl 8(%rdi), %eax
-; AVX512NOTDQ-NEXT: kmovd %eax, %k1
+; AVX512NOTDQ-NEXT: kmovw 8(%rdi), %k1
; AVX512NOTDQ-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
; AVX512NOTDQ-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
; AVX512NOTDQ-NEXT: vpbroadcastq %xmm0, %xmm0
;
; AVX512NOTDQ-LABEL: load_v16i1_broadcast_8_v4i1_store:
; AVX512NOTDQ: # %bb.0:
-; AVX512NOTDQ-NEXT: movzbl 8(%rdi), %eax
-; AVX512NOTDQ-NEXT: kmovd %eax, %k1
+; AVX512NOTDQ-NEXT: kmovw 8(%rdi), %k1
; AVX512NOTDQ-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
; AVX512NOTDQ-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
; AVX512NOTDQ-NEXT: vpbroadcastd %xmm0, %xmm0
;
; AVX512NOTDQ-LABEL: load_v16i1_broadcast_15_v2i1_store:
; AVX512NOTDQ: # %bb.0:
-; AVX512NOTDQ-NEXT: movzbl 14(%rdi), %eax
-; AVX512NOTDQ-NEXT: kmovd %eax, %k1
+; AVX512NOTDQ-NEXT: kmovw 14(%rdi), %k1
; AVX512NOTDQ-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
; AVX512NOTDQ-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
; AVX512NOTDQ-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
;
; AVX512NOTDQ-LABEL: load_v16i1_broadcast_15_v4i1_store:
; AVX512NOTDQ: # %bb.0:
-; AVX512NOTDQ-NEXT: movzbl 12(%rdi), %eax
-; AVX512NOTDQ-NEXT: kmovd %eax, %k1
+; AVX512NOTDQ-NEXT: kmovw 12(%rdi), %k1
; AVX512NOTDQ-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
; AVX512NOTDQ-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
; AVX512NOTDQ-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[3,3,3,3]
;
; AVX512NOTDQ-LABEL: load_v32i1_broadcast_16_v2i1_store:
; AVX512NOTDQ: # %bb.0:
-; AVX512NOTDQ-NEXT: movzbl 16(%rdi), %eax
-; AVX512NOTDQ-NEXT: kmovd %eax, %k1
+; AVX512NOTDQ-NEXT: kmovw 16(%rdi), %k1
; AVX512NOTDQ-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
; AVX512NOTDQ-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
; AVX512NOTDQ-NEXT: vpbroadcastq %xmm0, %xmm0
;
; AVX512NOTDQ-LABEL: load_v32i1_broadcast_16_v4i1_store:
; AVX512NOTDQ: # %bb.0:
-; AVX512NOTDQ-NEXT: movzbl 16(%rdi), %eax
-; AVX512NOTDQ-NEXT: kmovd %eax, %k1
+; AVX512NOTDQ-NEXT: kmovw 16(%rdi), %k1
; AVX512NOTDQ-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
; AVX512NOTDQ-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
; AVX512NOTDQ-NEXT: vpbroadcastd %xmm0, %xmm0
;
; AVX512NOTDQ-LABEL: load_v32i1_broadcast_16_v8i1_store:
; AVX512NOTDQ: # %bb.0:
-; AVX512NOTDQ-NEXT: movzbl 16(%rdi), %eax
-; AVX512NOTDQ-NEXT: kmovd %eax, %k1
+; AVX512NOTDQ-NEXT: kmovw 16(%rdi), %k1
; AVX512NOTDQ-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
; AVX512NOTDQ-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z}
; AVX512NOTDQ-NEXT: vpbroadcastd %xmm0, %ymm0
;
; AVX512NOTDQ-LABEL: load_v32i1_broadcast_31_v2i1_store:
; AVX512NOTDQ: # %bb.0:
-; AVX512NOTDQ-NEXT: movzbl 30(%rdi), %eax
-; AVX512NOTDQ-NEXT: kmovd %eax, %k1
+; AVX512NOTDQ-NEXT: kmovw 30(%rdi), %k1
; AVX512NOTDQ-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
; AVX512NOTDQ-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
; AVX512NOTDQ-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
;
; AVX512NOTDQ-LABEL: load_v32i1_broadcast_31_v4i1_store:
; AVX512NOTDQ: # %bb.0:
-; AVX512NOTDQ-NEXT: movzbl 28(%rdi), %eax
-; AVX512NOTDQ-NEXT: kmovd %eax, %k1
+; AVX512NOTDQ-NEXT: kmovw 28(%rdi), %k1
; AVX512NOTDQ-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
; AVX512NOTDQ-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
; AVX512NOTDQ-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[3,3,3,3]
;
; AVX512NOTDQ-LABEL: load_v32i1_broadcast_31_v8i1_store:
; AVX512NOTDQ: # %bb.0:
-; AVX512NOTDQ-NEXT: movzbl 24(%rdi), %eax
-; AVX512NOTDQ-NEXT: kmovd %eax, %k1
+; AVX512NOTDQ-NEXT: kmovw 24(%rdi), %k1
; AVX512NOTDQ-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
; AVX512NOTDQ-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z}
; AVX512NOTDQ-NEXT: vpbroadcastd {{.*#+}} ymm1 = [7,7,7,7,7,7,7,7]
;
; AVX512NOTDQ-LABEL: load_v64i1_broadcast_32_v2i1_store:
; AVX512NOTDQ: # %bb.0:
-; AVX512NOTDQ-NEXT: movzbl 32(%rdi), %eax
-; AVX512NOTDQ-NEXT: kmovd %eax, %k1
+; AVX512NOTDQ-NEXT: kmovw 32(%rdi), %k1
; AVX512NOTDQ-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
; AVX512NOTDQ-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
; AVX512NOTDQ-NEXT: vpbroadcastq %xmm0, %xmm0
;
; AVX512NOTDQ-LABEL: load_v64i1_broadcast_32_v4i1_store:
; AVX512NOTDQ: # %bb.0:
-; AVX512NOTDQ-NEXT: movzbl 32(%rdi), %eax
-; AVX512NOTDQ-NEXT: kmovd %eax, %k1
+; AVX512NOTDQ-NEXT: kmovw 32(%rdi), %k1
; AVX512NOTDQ-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
; AVX512NOTDQ-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
; AVX512NOTDQ-NEXT: vpbroadcastd %xmm0, %xmm0
;
; AVX512NOTDQ-LABEL: load_v64i1_broadcast_32_v8i1_store:
; AVX512NOTDQ: # %bb.0:
-; AVX512NOTDQ-NEXT: movzbl 32(%rdi), %eax
-; AVX512NOTDQ-NEXT: kmovd %eax, %k1
+; AVX512NOTDQ-NEXT: kmovw 32(%rdi), %k1
; AVX512NOTDQ-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
; AVX512NOTDQ-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z}
; AVX512NOTDQ-NEXT: vpbroadcastd %xmm0, %ymm0
;
; AVX512NOTDQ-LABEL: load_v64i1_broadcast_63_v2i1_store:
; AVX512NOTDQ: # %bb.0:
-; AVX512NOTDQ-NEXT: movzbl 62(%rdi), %eax
-; AVX512NOTDQ-NEXT: kmovd %eax, %k1
+; AVX512NOTDQ-NEXT: kmovw 62(%rdi), %k1
; AVX512NOTDQ-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
; AVX512NOTDQ-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
; AVX512NOTDQ-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
;
; AVX512NOTDQ-LABEL: load_v64i1_broadcast_63_v4i1_store:
; AVX512NOTDQ: # %bb.0:
-; AVX512NOTDQ-NEXT: movzbl 60(%rdi), %eax
-; AVX512NOTDQ-NEXT: kmovd %eax, %k1
+; AVX512NOTDQ-NEXT: kmovw 60(%rdi), %k1
; AVX512NOTDQ-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
; AVX512NOTDQ-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
; AVX512NOTDQ-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[3,3,3,3]
;
; AVX512NOTDQ-LABEL: load_v64i1_broadcast_63_v8i1_store:
; AVX512NOTDQ: # %bb.0:
-; AVX512NOTDQ-NEXT: movzbl 56(%rdi), %eax
-; AVX512NOTDQ-NEXT: kmovd %eax, %k1
+; AVX512NOTDQ-NEXT: kmovw 56(%rdi), %k1
; AVX512NOTDQ-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
; AVX512NOTDQ-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z}
; AVX512NOTDQ-NEXT: vpbroadcastd {{.*#+}} ymm1 = [7,7,7,7,7,7,7,7]
define i16 @unpckbw_test(i16 %a0, i16 %a1) {
; X86-LABEL: unpckbw_test:
; X86: ## %bb.0:
-; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
-; X86-NEXT: kmovw %eax, %k0 ## encoding: [0xc5,0xf8,0x92,0xc0]
-; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x08]
-; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
+; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k0 ## encoding: [0xc5,0xf8,0x90,0x44,0x24,0x04]
+; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
; X86-NEXT: kunpckbw %k1, %k0, %k0 ## encoding: [0xc5,0xfd,0x4b,0xc1]
; X86-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
; X86-NEXT: ## kill: def $ax killed $ax killed $eax
define <8 x i64> @load_8i1(<8 x i1>* %a) {
; KNL-LABEL: load_8i1:
; KNL: ## %bb.0:
-; KNL-NEXT: movzbl (%rdi), %eax
-; KNL-NEXT: kmovw %eax, %k1
+; KNL-NEXT: kmovw (%rdi), %k1
; KNL-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; KNL-NEXT: retq
;
;
; AVX512BW-LABEL: load_8i1:
; AVX512BW: ## %bb.0:
-; AVX512BW-NEXT: movzbl (%rdi), %eax
-; AVX512BW-NEXT: kmovd %eax, %k1
+; AVX512BW-NEXT: kmovw (%rdi), %k1
; AVX512BW-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; AVX512BW-NEXT: retq
;
define <2 x i16> @load_2i1(<2 x i1>* %a) {
; KNL-LABEL: load_2i1:
; KNL: ## %bb.0:
-; KNL-NEXT: movzbl (%rdi), %eax
-; KNL-NEXT: kmovw %eax, %k1
+; KNL-NEXT: kmovw (%rdi), %k1
; KNL-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; KNL-NEXT: ## kill: def $xmm0 killed $xmm0 killed $zmm0
; KNL-NEXT: vzeroupper
;
; AVX512BW-LABEL: load_2i1:
; AVX512BW: ## %bb.0:
-; AVX512BW-NEXT: movzbl (%rdi), %eax
-; AVX512BW-NEXT: kmovd %eax, %k1
+; AVX512BW-NEXT: kmovw (%rdi), %k1
; AVX512BW-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; AVX512BW-NEXT: ## kill: def $xmm0 killed $xmm0 killed $zmm0
; AVX512BW-NEXT: vzeroupper
define <4 x i16> @load_4i1(<4 x i1>* %a) {
; KNL-LABEL: load_4i1:
; KNL: ## %bb.0:
-; KNL-NEXT: movzbl (%rdi), %eax
-; KNL-NEXT: kmovw %eax, %k1
+; KNL-NEXT: kmovw (%rdi), %k1
; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; KNL-NEXT: ## kill: def $xmm0 killed $xmm0 killed $zmm0
; KNL-NEXT: vzeroupper
;
; AVX512BW-LABEL: load_4i1:
; AVX512BW: ## %bb.0:
-; AVX512BW-NEXT: movzbl (%rdi), %eax
-; AVX512BW-NEXT: kmovd %eax, %k1
+; AVX512BW-NEXT: kmovw (%rdi), %k1
; AVX512BW-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; AVX512BW-NEXT: ## kill: def $xmm0 killed $xmm0 killed $zmm0
; AVX512BW-NEXT: vzeroupper
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movzbl (%ecx), %ecx
-; X86-NEXT: kmovw %ecx, %k0
-; X86-NEXT: movzbl (%eax), %eax
-; X86-NEXT: kmovw %eax, %k1
+; X86-NEXT: kmovw (%ecx), %k0
+; X86-NEXT: kmovw (%eax), %k1
; X86-NEXT: korw %k1, %k0, %k0
; X86-NEXT: kmovw %k0, %eax
; X86-NEXT: # kill: def $al killed $al killed $eax
;
; X64-LABEL: select05_mem:
; X64: # %bb.0:
-; X64-NEXT: movzbl (%rsi), %eax
-; X64-NEXT: kmovw %eax, %k0
-; X64-NEXT: movzbl (%rdi), %eax
-; X64-NEXT: kmovw %eax, %k1
+; X64-NEXT: kmovw (%rsi), %k0
+; X64-NEXT: kmovw (%rdi), %k1
; X64-NEXT: korw %k1, %k0, %k0
; X64-NEXT: kmovw %k0, %eax
; X64-NEXT: # kill: def $al killed $al killed $eax
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movzbl (%ecx), %ecx
-; X86-NEXT: kmovw %ecx, %k0
-; X86-NEXT: movzbl (%eax), %eax
-; X86-NEXT: kmovw %eax, %k1
+; X86-NEXT: kmovw (%ecx), %k0
+; X86-NEXT: kmovw (%eax), %k1
; X86-NEXT: kandw %k1, %k0, %k0
; X86-NEXT: kmovw %k0, %eax
; X86-NEXT: # kill: def $al killed $al killed $eax
;
; X64-LABEL: select06_mem:
; X64: # %bb.0:
-; X64-NEXT: movzbl (%rsi), %eax
-; X64-NEXT: kmovw %eax, %k0
-; X64-NEXT: movzbl (%rdi), %eax
-; X64-NEXT: kmovw %eax, %k1
+; X64-NEXT: kmovw (%rsi), %k0
+; X64-NEXT: kmovw (%rdi), %k1
; X64-NEXT: kandw %k1, %k0, %k0
; X64-NEXT: kmovw %k0, %eax
; X64-NEXT: # kill: def $al killed $al killed $eax
; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
; AVX2-NEXT: retq
;
-; AVX512F-LABEL: load_sext_2i1_to_2i64:
-; AVX512F: # %bb.0: # %entry
-; AVX512F-NEXT: movzbl (%rdi), %eax
-; AVX512F-NEXT: kmovw %eax, %k1
-; AVX512F-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
-; AVX512F-NEXT: vzeroupper
-; AVX512F-NEXT: retq
-;
-; AVX512BW-LABEL: load_sext_2i1_to_2i64:
-; AVX512BW: # %bb.0: # %entry
-; AVX512BW-NEXT: movzbl (%rdi), %eax
-; AVX512BW-NEXT: kmovd %eax, %k1
-; AVX512BW-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
-; AVX512BW-NEXT: vzeroupper
-; AVX512BW-NEXT: retq
+; AVX512-LABEL: load_sext_2i1_to_2i64:
+; AVX512: # %bb.0: # %entry
+; AVX512-NEXT: kmovw (%rdi), %k1
+; AVX512-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
+; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
+; AVX512-NEXT: vzeroupper
+; AVX512-NEXT: retq
;
; X32-SSE2-LABEL: load_sext_2i1_to_2i64:
; X32-SSE2: # %bb.0: # %entry
; AVX2-NEXT: vpinsrd $3, %eax, %xmm0, %xmm0
; AVX2-NEXT: retq
;
-; AVX512F-LABEL: load_sext_4i1_to_4i32:
-; AVX512F: # %bb.0: # %entry
-; AVX512F-NEXT: movzbl (%rdi), %eax
-; AVX512F-NEXT: kmovw %eax, %k1
-; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
-; AVX512F-NEXT: vzeroupper
-; AVX512F-NEXT: retq
-;
-; AVX512BW-LABEL: load_sext_4i1_to_4i32:
-; AVX512BW: # %bb.0: # %entry
-; AVX512BW-NEXT: movzbl (%rdi), %eax
-; AVX512BW-NEXT: kmovd %eax, %k1
-; AVX512BW-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
-; AVX512BW-NEXT: vzeroupper
-; AVX512BW-NEXT: retq
+; AVX512-LABEL: load_sext_4i1_to_4i32:
+; AVX512: # %bb.0: # %entry
+; AVX512-NEXT: kmovw (%rdi), %k1
+; AVX512-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
+; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
+; AVX512-NEXT: vzeroupper
+; AVX512-NEXT: retq
;
; X32-SSE2-LABEL: load_sext_4i1_to_4i32:
; X32-SSE2: # %bb.0: # %entry
; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
; AVX2-NEXT: retq
;
-; AVX512F-LABEL: load_sext_4i1_to_4i64:
-; AVX512F: # %bb.0: # %entry
-; AVX512F-NEXT: movzbl (%rdi), %eax
-; AVX512F-NEXT: kmovw %eax, %k1
-; AVX512F-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
-; AVX512F-NEXT: retq
-;
-; AVX512BW-LABEL: load_sext_4i1_to_4i64:
-; AVX512BW: # %bb.0: # %entry
-; AVX512BW-NEXT: movzbl (%rdi), %eax
-; AVX512BW-NEXT: kmovd %eax, %k1
-; AVX512BW-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
-; AVX512BW-NEXT: retq
+; AVX512-LABEL: load_sext_4i1_to_4i64:
+; AVX512: # %bb.0: # %entry
+; AVX512-NEXT: kmovw (%rdi), %k1
+; AVX512-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
+; AVX512-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
+; AVX512-NEXT: retq
;
; X32-SSE2-LABEL: load_sext_4i1_to_4i64:
; X32-SSE2: # %bb.0: # %entry
;
; AVX512F-LABEL: load_sext_8i1_to_8i16:
; AVX512F: # %bb.0: # %entry
-; AVX512F-NEXT: movzbl (%rdi), %eax
-; AVX512F-NEXT: kmovw %eax, %k1
+; AVX512F-NEXT: kmovw (%rdi), %k1
; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; AVX512F-NEXT: vpmovdw %zmm0, %ymm0
; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
;
; AVX512BW-LABEL: load_sext_8i1_to_8i16:
; AVX512BW: # %bb.0: # %entry
-; AVX512BW-NEXT: movzbl (%rdi), %eax
-; AVX512BW-NEXT: kmovd %eax, %k0
+; AVX512BW-NEXT: kmovw (%rdi), %k0
; AVX512BW-NEXT: vpmovm2w %k0, %zmm0
; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
; AVX512BW-NEXT: vzeroupper
; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
; AVX2-NEXT: retq
;
-; AVX512F-LABEL: load_sext_8i1_to_8i32:
-; AVX512F: # %bb.0: # %entry
-; AVX512F-NEXT: movzbl (%rdi), %eax
-; AVX512F-NEXT: kmovw %eax, %k1
-; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
-; AVX512F-NEXT: retq
-;
-; AVX512BW-LABEL: load_sext_8i1_to_8i32:
-; AVX512BW: # %bb.0: # %entry
-; AVX512BW-NEXT: movzbl (%rdi), %eax
-; AVX512BW-NEXT: kmovd %eax, %k1
-; AVX512BW-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
-; AVX512BW-NEXT: retq
+; AVX512-LABEL: load_sext_8i1_to_8i32:
+; AVX512: # %bb.0: # %entry
+; AVX512-NEXT: kmovw (%rdi), %k1
+; AVX512-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
+; AVX512-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
+; AVX512-NEXT: retq
;
; X32-SSE2-LABEL: load_sext_8i1_to_8i32:
; X32-SSE2: # %bb.0: # %entry
; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
; AVX2-NEXT: retq
;
-; AVX512F-LABEL: load_sext_2i1_to_2i64:
-; AVX512F: # %bb.0: # %entry
-; AVX512F-NEXT: movzbl (%rdi), %eax
-; AVX512F-NEXT: kmovw %eax, %k1
-; AVX512F-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
-; AVX512F-NEXT: vzeroupper
-; AVX512F-NEXT: retq
-;
-; AVX512BW-LABEL: load_sext_2i1_to_2i64:
-; AVX512BW: # %bb.0: # %entry
-; AVX512BW-NEXT: movzbl (%rdi), %eax
-; AVX512BW-NEXT: kmovd %eax, %k1
-; AVX512BW-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
-; AVX512BW-NEXT: vzeroupper
-; AVX512BW-NEXT: retq
+; AVX512-LABEL: load_sext_2i1_to_2i64:
+; AVX512: # %bb.0: # %entry
+; AVX512-NEXT: kmovw (%rdi), %k1
+; AVX512-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
+; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
+; AVX512-NEXT: vzeroupper
+; AVX512-NEXT: retq
;
; X32-SSE2-LABEL: load_sext_2i1_to_2i64:
; X32-SSE2: # %bb.0: # %entry
; AVX2-NEXT: vpinsrd $3, %eax, %xmm0, %xmm0
; AVX2-NEXT: retq
;
-; AVX512F-LABEL: load_sext_4i1_to_4i32:
-; AVX512F: # %bb.0: # %entry
-; AVX512F-NEXT: movzbl (%rdi), %eax
-; AVX512F-NEXT: kmovw %eax, %k1
-; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
-; AVX512F-NEXT: vzeroupper
-; AVX512F-NEXT: retq
-;
-; AVX512BW-LABEL: load_sext_4i1_to_4i32:
-; AVX512BW: # %bb.0: # %entry
-; AVX512BW-NEXT: movzbl (%rdi), %eax
-; AVX512BW-NEXT: kmovd %eax, %k1
-; AVX512BW-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
-; AVX512BW-NEXT: vzeroupper
-; AVX512BW-NEXT: retq
+; AVX512-LABEL: load_sext_4i1_to_4i32:
+; AVX512: # %bb.0: # %entry
+; AVX512-NEXT: kmovw (%rdi), %k1
+; AVX512-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
+; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
+; AVX512-NEXT: vzeroupper
+; AVX512-NEXT: retq
;
; X32-SSE2-LABEL: load_sext_4i1_to_4i32:
; X32-SSE2: # %bb.0: # %entry
; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
; AVX2-NEXT: retq
;
-; AVX512F-LABEL: load_sext_4i1_to_4i64:
-; AVX512F: # %bb.0: # %entry
-; AVX512F-NEXT: movzbl (%rdi), %eax
-; AVX512F-NEXT: kmovw %eax, %k1
-; AVX512F-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
-; AVX512F-NEXT: retq
-;
-; AVX512BW-LABEL: load_sext_4i1_to_4i64:
-; AVX512BW: # %bb.0: # %entry
-; AVX512BW-NEXT: movzbl (%rdi), %eax
-; AVX512BW-NEXT: kmovd %eax, %k1
-; AVX512BW-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
-; AVX512BW-NEXT: retq
+; AVX512-LABEL: load_sext_4i1_to_4i64:
+; AVX512: # %bb.0: # %entry
+; AVX512-NEXT: kmovw (%rdi), %k1
+; AVX512-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
+; AVX512-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
+; AVX512-NEXT: retq
;
; X32-SSE2-LABEL: load_sext_4i1_to_4i64:
; X32-SSE2: # %bb.0: # %entry
;
; AVX512F-LABEL: load_sext_8i1_to_8i16:
; AVX512F: # %bb.0: # %entry
-; AVX512F-NEXT: movzbl (%rdi), %eax
-; AVX512F-NEXT: kmovw %eax, %k1
+; AVX512F-NEXT: kmovw (%rdi), %k1
; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; AVX512F-NEXT: vpmovdw %zmm0, %ymm0
; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
;
; AVX512BW-LABEL: load_sext_8i1_to_8i16:
; AVX512BW: # %bb.0: # %entry
-; AVX512BW-NEXT: movzbl (%rdi), %eax
-; AVX512BW-NEXT: kmovd %eax, %k0
+; AVX512BW-NEXT: kmovw (%rdi), %k0
; AVX512BW-NEXT: vpmovm2w %k0, %zmm0
; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
; AVX512BW-NEXT: vzeroupper
; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
; AVX2-NEXT: retq
;
-; AVX512F-LABEL: load_sext_8i1_to_8i32:
-; AVX512F: # %bb.0: # %entry
-; AVX512F-NEXT: movzbl (%rdi), %eax
-; AVX512F-NEXT: kmovw %eax, %k1
-; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
-; AVX512F-NEXT: retq
-;
-; AVX512BW-LABEL: load_sext_8i1_to_8i32:
-; AVX512BW: # %bb.0: # %entry
-; AVX512BW-NEXT: movzbl (%rdi), %eax
-; AVX512BW-NEXT: kmovd %eax, %k1
-; AVX512BW-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
-; AVX512BW-NEXT: retq
+; AVX512-LABEL: load_sext_8i1_to_8i32:
+; AVX512: # %bb.0: # %entry
+; AVX512-NEXT: kmovw (%rdi), %k1
+; AVX512-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
+; AVX512-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
+; AVX512-NEXT: retq
;
; X32-SSE2-LABEL: load_sext_8i1_to_8i32:
; X32-SSE2: # %bb.0: # %entry