This makes X86ISD::VSEXT more similar to ISD::SIGN_EXTEND and ISD::ZERO_EXTEND.
I'm hoping to replace X86ISD::VSEXT/VZEXT with target independent nodes. Making the target specific nodes similar to the target independent nodes helps minimize test diffs in that patch.
llvm-svn: 346539
}
break;
}
+ case X86ISD::VSEXT: {
+ SDValue Src = Op.getOperand(0);
+ unsigned InBits = Src.getScalarValueSizeInBits();
+
+ // If none of the top bits are demanded, convert this into an any_extend.
+ if (OriginalDemandedBits.getActiveBits() <= InBits)
+ return TLO.CombineTo(Op,
+ TLO.DAG.getNode(X86ISD::VZEXT, SDLoc(Op),
+ Op.getValueType(), Src));
+
+ break;
+ }
}
return TargetLowering::SimplifyDemandedBitsForTargetNode(
define <8 x i32> @zext_8x8mem_to_8x32(<8 x i8> *%i , <8 x i1> %mask) nounwind readnone {
; KNL-LABEL: zext_8x8mem_to_8x32:
; KNL: # %bb.0:
-; KNL-NEXT: vpmovsxwq %xmm0, %zmm0
+; KNL-NEXT: vpmovzxwq {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
; KNL-NEXT: vpsllq $63, %zmm0, %zmm0
; KNL-NEXT: vptestmq %zmm0, %zmm0, %k1
; KNL-NEXT: vpmovzxbd {{.*#+}} ymm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero
;
; AVX512DQNOBW-LABEL: zext_8x8mem_to_8x32:
; AVX512DQNOBW: # %bb.0:
-; AVX512DQNOBW-NEXT: vpmovsxwd %xmm0, %ymm0
+; AVX512DQNOBW-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
; AVX512DQNOBW-NEXT: vpslld $31, %ymm0, %ymm0
; AVX512DQNOBW-NEXT: vpmovd2m %ymm0, %k1
; AVX512DQNOBW-NEXT: vpmovzxbd {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero
define <8 x i32> @sext_8x8mem_to_8x32(<8 x i8> *%i , <8 x i1> %mask) nounwind readnone {
; KNL-LABEL: sext_8x8mem_to_8x32:
; KNL: # %bb.0:
-; KNL-NEXT: vpmovsxwq %xmm0, %zmm0
+; KNL-NEXT: vpmovzxwq {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
; KNL-NEXT: vpsllq $63, %zmm0, %zmm0
; KNL-NEXT: vptestmq %zmm0, %zmm0, %k1
; KNL-NEXT: vpmovsxbd (%rdi), %ymm0
;
; AVX512DQNOBW-LABEL: sext_8x8mem_to_8x32:
; AVX512DQNOBW: # %bb.0:
-; AVX512DQNOBW-NEXT: vpmovsxwd %xmm0, %ymm0
+; AVX512DQNOBW-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
; AVX512DQNOBW-NEXT: vpslld $31, %ymm0, %ymm0
; AVX512DQNOBW-NEXT: vpmovd2m %ymm0, %k1
; AVX512DQNOBW-NEXT: vpmovsxbd (%rdi), %ymm0 {%k1} {z}
define <16 x i32> @zext_16x8mem_to_16x32(<16 x i8> *%i , <16 x i1> %mask) nounwind readnone {
; KNL-LABEL: zext_16x8mem_to_16x32:
; KNL: # %bb.0:
-; KNL-NEXT: vpmovsxbd %xmm0, %zmm0
+; KNL-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
; KNL-NEXT: vpslld $31, %zmm0, %zmm0
; KNL-NEXT: vptestmd %zmm0, %zmm0, %k1
; KNL-NEXT: vpmovzxbd {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero,mem[8],zero,zero,zero,mem[9],zero,zero,zero,mem[10],zero,zero,zero,mem[11],zero,zero,zero,mem[12],zero,zero,zero,mem[13],zero,zero,zero,mem[14],zero,zero,zero,mem[15],zero,zero,zero
;
; AVX512DQNOBW-LABEL: zext_16x8mem_to_16x32:
; AVX512DQNOBW: # %bb.0:
-; AVX512DQNOBW-NEXT: vpmovsxbd %xmm0, %zmm0
+; AVX512DQNOBW-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
; AVX512DQNOBW-NEXT: vpslld $31, %zmm0, %zmm0
; AVX512DQNOBW-NEXT: vpmovd2m %zmm0, %k1
; AVX512DQNOBW-NEXT: vpmovzxbd {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero,mem[8],zero,zero,zero,mem[9],zero,zero,zero,mem[10],zero,zero,zero,mem[11],zero,zero,zero,mem[12],zero,zero,zero,mem[13],zero,zero,zero,mem[14],zero,zero,zero,mem[15],zero,zero,zero
define <16 x i32> @sext_16x8mem_to_16x32(<16 x i8> *%i , <16 x i1> %mask) nounwind readnone {
; KNL-LABEL: sext_16x8mem_to_16x32:
; KNL: # %bb.0:
-; KNL-NEXT: vpmovsxbd %xmm0, %zmm0
+; KNL-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
; KNL-NEXT: vpslld $31, %zmm0, %zmm0
; KNL-NEXT: vptestmd %zmm0, %zmm0, %k1
; KNL-NEXT: vpmovsxbd (%rdi), %zmm0 {%k1} {z}
;
; AVX512DQNOBW-LABEL: sext_16x8mem_to_16x32:
; AVX512DQNOBW: # %bb.0:
-; AVX512DQNOBW-NEXT: vpmovsxbd %xmm0, %zmm0
+; AVX512DQNOBW-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
; AVX512DQNOBW-NEXT: vpslld $31, %zmm0, %zmm0
; AVX512DQNOBW-NEXT: vpmovd2m %zmm0, %k1
; AVX512DQNOBW-NEXT: vpmovsxbd (%rdi), %zmm0 {%k1} {z}
define <16 x i32> @zext_16x8_to_16x32_mask(<16 x i8> %a , <16 x i1> %mask) nounwind readnone {
; KNL-LABEL: zext_16x8_to_16x32_mask:
; KNL: # %bb.0:
-; KNL-NEXT: vpmovsxbd %xmm1, %zmm1
+; KNL-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero
; KNL-NEXT: vpslld $31, %zmm1, %zmm1
; KNL-NEXT: vptestmd %zmm1, %zmm1, %k1
; KNL-NEXT: vpmovzxbd {{.*#+}} zmm0 {%k1} {z} = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
;
; AVX512DQNOBW-LABEL: zext_16x8_to_16x32_mask:
; AVX512DQNOBW: # %bb.0:
-; AVX512DQNOBW-NEXT: vpmovsxbd %xmm1, %zmm1
+; AVX512DQNOBW-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero
; AVX512DQNOBW-NEXT: vpslld $31, %zmm1, %zmm1
; AVX512DQNOBW-NEXT: vpmovd2m %zmm1, %k1
; AVX512DQNOBW-NEXT: vpmovzxbd {{.*#+}} zmm0 {%k1} {z} = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
define <16 x i32> @sext_16x8_to_16x32_mask(<16 x i8> %a , <16 x i1> %mask) nounwind readnone {
; KNL-LABEL: sext_16x8_to_16x32_mask:
; KNL: # %bb.0:
-; KNL-NEXT: vpmovsxbd %xmm1, %zmm1
+; KNL-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero
; KNL-NEXT: vpslld $31, %zmm1, %zmm1
; KNL-NEXT: vptestmd %zmm1, %zmm1, %k1
; KNL-NEXT: vpmovsxbd %xmm0, %zmm0 {%k1} {z}
;
; AVX512DQNOBW-LABEL: sext_16x8_to_16x32_mask:
; AVX512DQNOBW: # %bb.0:
-; AVX512DQNOBW-NEXT: vpmovsxbd %xmm1, %zmm1
+; AVX512DQNOBW-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero
; AVX512DQNOBW-NEXT: vpslld $31, %zmm1, %zmm1
; AVX512DQNOBW-NEXT: vpmovd2m %zmm1, %k1
; AVX512DQNOBW-NEXT: vpmovsxbd %xmm0, %zmm0 {%k1} {z}
define <8 x i64> @zext_8x8mem_to_8x64(<8 x i8> *%i , <8 x i1> %mask) nounwind readnone {
; KNL-LABEL: zext_8x8mem_to_8x64:
; KNL: # %bb.0:
-; KNL-NEXT: vpmovsxwq %xmm0, %zmm0
+; KNL-NEXT: vpmovzxwq {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
; KNL-NEXT: vpsllq $63, %zmm0, %zmm0
; KNL-NEXT: vptestmq %zmm0, %zmm0, %k1
; KNL-NEXT: vpmovzxbq {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero,mem[4],zero,zero,zero,zero,zero,zero,zero,mem[5],zero,zero,zero,zero,zero,zero,zero,mem[6],zero,zero,zero,zero,zero,zero,zero,mem[7],zero,zero,zero,zero,zero,zero,zero
;
; AVX512DQNOBW-LABEL: zext_8x8mem_to_8x64:
; AVX512DQNOBW: # %bb.0:
-; AVX512DQNOBW-NEXT: vpmovsxwd %xmm0, %ymm0
+; AVX512DQNOBW-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
; AVX512DQNOBW-NEXT: vpslld $31, %ymm0, %ymm0
; AVX512DQNOBW-NEXT: vpmovd2m %ymm0, %k1
; AVX512DQNOBW-NEXT: vpmovzxbq {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero,mem[4],zero,zero,zero,zero,zero,zero,zero,mem[5],zero,zero,zero,zero,zero,zero,zero,mem[6],zero,zero,zero,zero,zero,zero,zero,mem[7],zero,zero,zero,zero,zero,zero,zero
define <8 x i64> @sext_8x8mem_to_8x64mask(<8 x i8> *%i , <8 x i1> %mask) nounwind readnone {
; KNL-LABEL: sext_8x8mem_to_8x64mask:
; KNL: # %bb.0:
-; KNL-NEXT: vpmovsxwq %xmm0, %zmm0
+; KNL-NEXT: vpmovzxwq {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
; KNL-NEXT: vpsllq $63, %zmm0, %zmm0
; KNL-NEXT: vptestmq %zmm0, %zmm0, %k1
; KNL-NEXT: vpmovsxbq (%rdi), %zmm0 {%k1} {z}
;
; AVX512DQNOBW-LABEL: sext_8x8mem_to_8x64mask:
; AVX512DQNOBW: # %bb.0:
-; AVX512DQNOBW-NEXT: vpmovsxwd %xmm0, %ymm0
+; AVX512DQNOBW-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
; AVX512DQNOBW-NEXT: vpslld $31, %ymm0, %ymm0
; AVX512DQNOBW-NEXT: vpmovd2m %ymm0, %k1
; AVX512DQNOBW-NEXT: vpmovsxbq (%rdi), %zmm0 {%k1} {z}
define <8 x i32> @zext_8x16mem_to_8x32(<8 x i16> *%i , <8 x i1> %mask) nounwind readnone {
; KNL-LABEL: zext_8x16mem_to_8x32:
; KNL: # %bb.0:
-; KNL-NEXT: vpmovsxwq %xmm0, %zmm0
+; KNL-NEXT: vpmovzxwq {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
; KNL-NEXT: vpsllq $63, %zmm0, %zmm0
; KNL-NEXT: vptestmq %zmm0, %zmm0, %k1
; KNL-NEXT: vpmovzxwd {{.*#+}} ymm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
;
; AVX512DQNOBW-LABEL: zext_8x16mem_to_8x32:
; AVX512DQNOBW: # %bb.0:
-; AVX512DQNOBW-NEXT: vpmovsxwd %xmm0, %ymm0
+; AVX512DQNOBW-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
; AVX512DQNOBW-NEXT: vpslld $31, %ymm0, %ymm0
; AVX512DQNOBW-NEXT: vpmovd2m %ymm0, %k1
; AVX512DQNOBW-NEXT: vpmovzxwd {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
define <8 x i32> @sext_8x16mem_to_8x32mask(<8 x i16> *%i , <8 x i1> %mask) nounwind readnone {
; KNL-LABEL: sext_8x16mem_to_8x32mask:
; KNL: # %bb.0:
-; KNL-NEXT: vpmovsxwq %xmm0, %zmm0
+; KNL-NEXT: vpmovzxwq {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
; KNL-NEXT: vpsllq $63, %zmm0, %zmm0
; KNL-NEXT: vptestmq %zmm0, %zmm0, %k1
; KNL-NEXT: vpmovsxwd (%rdi), %ymm0
;
; AVX512DQNOBW-LABEL: sext_8x16mem_to_8x32mask:
; AVX512DQNOBW: # %bb.0:
-; AVX512DQNOBW-NEXT: vpmovsxwd %xmm0, %ymm0
+; AVX512DQNOBW-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
; AVX512DQNOBW-NEXT: vpslld $31, %ymm0, %ymm0
; AVX512DQNOBW-NEXT: vpmovd2m %ymm0, %k1
; AVX512DQNOBW-NEXT: vpmovsxwd (%rdi), %ymm0 {%k1} {z}
define <8 x i32> @zext_8x16_to_8x32mask(<8 x i16> %a , <8 x i1> %mask) nounwind readnone {
; KNL-LABEL: zext_8x16_to_8x32mask:
; KNL: # %bb.0:
-; KNL-NEXT: vpmovsxwq %xmm1, %zmm1
+; KNL-NEXT: vpmovzxwq {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero
; KNL-NEXT: vpsllq $63, %zmm1, %zmm1
; KNL-NEXT: vptestmq %zmm1, %zmm1, %k1
; KNL-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
;
; AVX512DQNOBW-LABEL: zext_8x16_to_8x32mask:
; AVX512DQNOBW: # %bb.0:
-; AVX512DQNOBW-NEXT: vpmovsxwd %xmm1, %ymm1
+; AVX512DQNOBW-NEXT: vpmovzxwd {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
; AVX512DQNOBW-NEXT: vpslld $31, %ymm1, %ymm1
; AVX512DQNOBW-NEXT: vpmovd2m %ymm1, %k1
; AVX512DQNOBW-NEXT: vpmovzxwd {{.*#+}} ymm0 {%k1} {z} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
define <16 x i32> @zext_16x16mem_to_16x32(<16 x i16> *%i , <16 x i1> %mask) nounwind readnone {
; KNL-LABEL: zext_16x16mem_to_16x32:
; KNL: # %bb.0:
-; KNL-NEXT: vpmovsxbd %xmm0, %zmm0
+; KNL-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
; KNL-NEXT: vpslld $31, %zmm0, %zmm0
; KNL-NEXT: vptestmd %zmm0, %zmm0, %k1
; KNL-NEXT: vpmovzxwd {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero
;
; AVX512DQNOBW-LABEL: zext_16x16mem_to_16x32:
; AVX512DQNOBW: # %bb.0:
-; AVX512DQNOBW-NEXT: vpmovsxbd %xmm0, %zmm0
+; AVX512DQNOBW-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
; AVX512DQNOBW-NEXT: vpslld $31, %zmm0, %zmm0
; AVX512DQNOBW-NEXT: vpmovd2m %zmm0, %k1
; AVX512DQNOBW-NEXT: vpmovzxwd {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero
define <16 x i32> @sext_16x16mem_to_16x32mask(<16 x i16> *%i , <16 x i1> %mask) nounwind readnone {
; KNL-LABEL: sext_16x16mem_to_16x32mask:
; KNL: # %bb.0:
-; KNL-NEXT: vpmovsxbd %xmm0, %zmm0
+; KNL-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
; KNL-NEXT: vpslld $31, %zmm0, %zmm0
; KNL-NEXT: vptestmd %zmm0, %zmm0, %k1
; KNL-NEXT: vpmovsxwd (%rdi), %zmm0 {%k1} {z}
;
; AVX512DQNOBW-LABEL: sext_16x16mem_to_16x32mask:
; AVX512DQNOBW: # %bb.0:
-; AVX512DQNOBW-NEXT: vpmovsxbd %xmm0, %zmm0
+; AVX512DQNOBW-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
; AVX512DQNOBW-NEXT: vpslld $31, %zmm0, %zmm0
; AVX512DQNOBW-NEXT: vpmovd2m %zmm0, %k1
; AVX512DQNOBW-NEXT: vpmovsxwd (%rdi), %zmm0 {%k1} {z}
define <16 x i32> @zext_16x16_to_16x32mask(<16 x i16> %a , <16 x i1> %mask) nounwind readnone {
; KNL-LABEL: zext_16x16_to_16x32mask:
; KNL: # %bb.0:
-; KNL-NEXT: vpmovsxbd %xmm1, %zmm1
+; KNL-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero
; KNL-NEXT: vpslld $31, %zmm1, %zmm1
; KNL-NEXT: vptestmd %zmm1, %zmm1, %k1
; KNL-NEXT: vpmovzxwd {{.*#+}} zmm0 {%k1} {z} = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
;
; AVX512DQNOBW-LABEL: zext_16x16_to_16x32mask:
; AVX512DQNOBW: # %bb.0:
-; AVX512DQNOBW-NEXT: vpmovsxbd %xmm1, %zmm1
+; AVX512DQNOBW-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero
; AVX512DQNOBW-NEXT: vpslld $31, %zmm1, %zmm1
; AVX512DQNOBW-NEXT: vpmovd2m %zmm1, %k1
; AVX512DQNOBW-NEXT: vpmovzxwd {{.*#+}} zmm0 {%k1} {z} = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
define <8 x i64> @zext_8x16mem_to_8x64(<8 x i16> *%i , <8 x i1> %mask) nounwind readnone {
; KNL-LABEL: zext_8x16mem_to_8x64:
; KNL: # %bb.0:
-; KNL-NEXT: vpmovsxwq %xmm0, %zmm0
+; KNL-NEXT: vpmovzxwq {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
; KNL-NEXT: vpsllq $63, %zmm0, %zmm0
; KNL-NEXT: vptestmq %zmm0, %zmm0, %k1
; KNL-NEXT: vpmovzxwq {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero
;
; AVX512DQNOBW-LABEL: zext_8x16mem_to_8x64:
; AVX512DQNOBW: # %bb.0:
-; AVX512DQNOBW-NEXT: vpmovsxwd %xmm0, %ymm0
+; AVX512DQNOBW-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
; AVX512DQNOBW-NEXT: vpslld $31, %ymm0, %ymm0
; AVX512DQNOBW-NEXT: vpmovd2m %ymm0, %k1
; AVX512DQNOBW-NEXT: vpmovzxwq {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero
define <8 x i64> @sext_8x16mem_to_8x64mask(<8 x i16> *%i , <8 x i1> %mask) nounwind readnone {
; KNL-LABEL: sext_8x16mem_to_8x64mask:
; KNL: # %bb.0:
-; KNL-NEXT: vpmovsxwq %xmm0, %zmm0
+; KNL-NEXT: vpmovzxwq {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
; KNL-NEXT: vpsllq $63, %zmm0, %zmm0
; KNL-NEXT: vptestmq %zmm0, %zmm0, %k1
; KNL-NEXT: vpmovsxwq (%rdi), %zmm0 {%k1} {z}
;
; AVX512DQNOBW-LABEL: sext_8x16mem_to_8x64mask:
; AVX512DQNOBW: # %bb.0:
-; AVX512DQNOBW-NEXT: vpmovsxwd %xmm0, %ymm0
+; AVX512DQNOBW-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
; AVX512DQNOBW-NEXT: vpslld $31, %ymm0, %ymm0
; AVX512DQNOBW-NEXT: vpmovd2m %ymm0, %k1
; AVX512DQNOBW-NEXT: vpmovsxwq (%rdi), %zmm0 {%k1} {z}
define <8 x i64> @zext_8x16_to_8x64mask(<8 x i16> %a , <8 x i1> %mask) nounwind readnone {
; KNL-LABEL: zext_8x16_to_8x64mask:
; KNL: # %bb.0:
-; KNL-NEXT: vpmovsxwq %xmm1, %zmm1
+; KNL-NEXT: vpmovzxwq {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero
; KNL-NEXT: vpsllq $63, %zmm1, %zmm1
; KNL-NEXT: vptestmq %zmm1, %zmm1, %k1
; KNL-NEXT: vpmovzxwq {{.*#+}} zmm0 {%k1} {z} = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
;
; AVX512DQNOBW-LABEL: zext_8x16_to_8x64mask:
; AVX512DQNOBW: # %bb.0:
-; AVX512DQNOBW-NEXT: vpmovsxwd %xmm1, %ymm1
+; AVX512DQNOBW-NEXT: vpmovzxwd {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
; AVX512DQNOBW-NEXT: vpslld $31, %ymm1, %ymm1
; AVX512DQNOBW-NEXT: vpmovd2m %ymm1, %k1
; AVX512DQNOBW-NEXT: vpmovzxwq {{.*#+}} zmm0 {%k1} {z} = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
define <8 x i64> @zext_8x32mem_to_8x64(<8 x i32> *%i , <8 x i1> %mask) nounwind readnone {
; KNL-LABEL: zext_8x32mem_to_8x64:
; KNL: # %bb.0:
-; KNL-NEXT: vpmovsxwq %xmm0, %zmm0
+; KNL-NEXT: vpmovzxwq {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
; KNL-NEXT: vpsllq $63, %zmm0, %zmm0
; KNL-NEXT: vptestmq %zmm0, %zmm0, %k1
; KNL-NEXT: vpmovzxdq {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
;
; AVX512DQNOBW-LABEL: zext_8x32mem_to_8x64:
; AVX512DQNOBW: # %bb.0:
-; AVX512DQNOBW-NEXT: vpmovsxwd %xmm0, %ymm0
+; AVX512DQNOBW-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
; AVX512DQNOBW-NEXT: vpslld $31, %ymm0, %ymm0
; AVX512DQNOBW-NEXT: vpmovd2m %ymm0, %k1
; AVX512DQNOBW-NEXT: vpmovzxdq {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
define <8 x i64> @sext_8x32mem_to_8x64mask(<8 x i32> *%i , <8 x i1> %mask) nounwind readnone {
; KNL-LABEL: sext_8x32mem_to_8x64mask:
; KNL: # %bb.0:
-; KNL-NEXT: vpmovsxwq %xmm0, %zmm0
+; KNL-NEXT: vpmovzxwq {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
; KNL-NEXT: vpsllq $63, %zmm0, %zmm0
; KNL-NEXT: vptestmq %zmm0, %zmm0, %k1
; KNL-NEXT: vpmovsxdq (%rdi), %zmm0 {%k1} {z}
;
; AVX512DQNOBW-LABEL: sext_8x32mem_to_8x64mask:
; AVX512DQNOBW: # %bb.0:
-; AVX512DQNOBW-NEXT: vpmovsxwd %xmm0, %ymm0
+; AVX512DQNOBW-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
; AVX512DQNOBW-NEXT: vpslld $31, %ymm0, %ymm0
; AVX512DQNOBW-NEXT: vpmovd2m %ymm0, %k1
; AVX512DQNOBW-NEXT: vpmovsxdq (%rdi), %zmm0 {%k1} {z}
define <8 x i64> @zext_8x32_to_8x64mask(<8 x i32> %a , <8 x i1> %mask) nounwind readnone {
; KNL-LABEL: zext_8x32_to_8x64mask:
; KNL: # %bb.0:
-; KNL-NEXT: vpmovsxwq %xmm1, %zmm1
+; KNL-NEXT: vpmovzxwq {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero
; KNL-NEXT: vpsllq $63, %zmm1, %zmm1
; KNL-NEXT: vptestmq %zmm1, %zmm1, %k1
; KNL-NEXT: vpmovzxdq {{.*#+}} zmm0 {%k1} {z} = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero
;
; AVX512DQNOBW-LABEL: zext_8x32_to_8x64mask:
; AVX512DQNOBW: # %bb.0:
-; AVX512DQNOBW-NEXT: vpmovsxwd %xmm1, %ymm1
+; AVX512DQNOBW-NEXT: vpmovzxwd {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
; AVX512DQNOBW-NEXT: vpslld $31, %ymm1, %ymm1
; AVX512DQNOBW-NEXT: vpmovd2m %ymm1, %k1
; AVX512DQNOBW-NEXT: vpmovzxdq {{.*#+}} zmm0 {%k1} {z} = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero
define i16 @trunc_16i8_to_16i1(<16 x i8> %a) {
; KNL-LABEL: trunc_16i8_to_16i1:
; KNL: # %bb.0:
-; KNL-NEXT: vpmovsxbd %xmm0, %zmm0
+; KNL-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
; KNL-NEXT: vpslld $31, %zmm0, %zmm0
; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
; KNL-NEXT: kmovw %k0, %eax
;
; AVX512DQNOBW-LABEL: trunc_16i8_to_16i1:
; AVX512DQNOBW: # %bb.0:
-; AVX512DQNOBW-NEXT: vpmovsxbd %xmm0, %zmm0
+; AVX512DQNOBW-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
; AVX512DQNOBW-NEXT: vpslld $31, %zmm0, %zmm0
; AVX512DQNOBW-NEXT: vpmovd2m %zmm0, %k0
; AVX512DQNOBW-NEXT: kmovw %k0, %eax
define i8 @trunc_8i16_to_8i1(<8 x i16> %a) {
; KNL-LABEL: trunc_8i16_to_8i1:
; KNL: # %bb.0:
-; KNL-NEXT: vpmovsxwq %xmm0, %zmm0
+; KNL-NEXT: vpmovzxwq {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
; KNL-NEXT: vpsllq $63, %zmm0, %zmm0
; KNL-NEXT: vptestmq %zmm0, %zmm0, %k0
; KNL-NEXT: kmovw %k0, %eax
;
; AVX512DQNOBW-LABEL: trunc_8i16_to_8i1:
; AVX512DQNOBW: # %bb.0:
-; AVX512DQNOBW-NEXT: vpmovsxwd %xmm0, %ymm0
+; AVX512DQNOBW-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
; AVX512DQNOBW-NEXT: vpslld $31, %ymm0, %ymm0
; AVX512DQNOBW-NEXT: vpmovd2m %ymm0, %k0
; AVX512DQNOBW-NEXT: kmovw %k0, %eax
; KNL-LABEL: zext_32xi1_to_32xi8:
; KNL: # %bb.0:
; KNL-NEXT: vpcmpeqw %ymm2, %ymm0, %ymm0
-; KNL-NEXT: vpmovsxwd %ymm0, %zmm0
+; KNL-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
; KNL-NEXT: vpmovdb %zmm0, %xmm0
; KNL-NEXT: vpcmpeqw %ymm3, %ymm1, %ymm1
-; KNL-NEXT: vpmovsxwd %ymm1, %zmm1
+; KNL-NEXT: vpmovzxwd {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero
; KNL-NEXT: vpmovdb %zmm1, %xmm1
; KNL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
; KNL-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0
; AVX512DQNOBW-LABEL: zext_32xi1_to_32xi8:
; AVX512DQNOBW: # %bb.0:
; AVX512DQNOBW-NEXT: vpcmpeqw %ymm2, %ymm0, %ymm0
-; AVX512DQNOBW-NEXT: vpmovsxwd %ymm0, %zmm0
+; AVX512DQNOBW-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
; AVX512DQNOBW-NEXT: vpmovdb %zmm0, %xmm0
; AVX512DQNOBW-NEXT: vpcmpeqw %ymm3, %ymm1, %ymm1
-; AVX512DQNOBW-NEXT: vpmovsxwd %ymm1, %zmm1
+; AVX512DQNOBW-NEXT: vpmovzxwd {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero
; AVX512DQNOBW-NEXT: vpmovdb %zmm1, %xmm1
; AVX512DQNOBW-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
; AVX512DQNOBW-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0
define <16 x float> @test_x86_fmadd_ps_z(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
; ALL-LABEL: test_x86_fmadd_ps_z:
; ALL: ## %bb.0:
-; ALL-NEXT: vfmadd213ps %zmm2, %zmm1, %zmm0
+; ALL-NEXT: vfmadd213ps {{.*#+}} zmm0 = (zmm1 * zmm0) + zmm2
; ALL-NEXT: retq
%x = fmul <16 x float> %a0, %a1
%res = fadd <16 x float> %x, %a2
define <16 x float> @test_x86_fmsub_ps_z(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
; ALL-LABEL: test_x86_fmsub_ps_z:
; ALL: ## %bb.0:
-; ALL-NEXT: vfmsub213ps %zmm2, %zmm1, %zmm0
+; ALL-NEXT: vfmsub213ps {{.*#+}} zmm0 = (zmm1 * zmm0) - zmm2
; ALL-NEXT: retq
%x = fmul <16 x float> %a0, %a1
%res = fsub <16 x float> %x, %a2
define <16 x float> @test_x86_fnmadd_ps_z(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
; ALL-LABEL: test_x86_fnmadd_ps_z:
; ALL: ## %bb.0:
-; ALL-NEXT: vfnmadd213ps %zmm2, %zmm1, %zmm0
+; ALL-NEXT: vfnmadd213ps {{.*#+}} zmm0 = -(zmm1 * zmm0) + zmm2
; ALL-NEXT: retq
%x = fmul <16 x float> %a0, %a1
%res = fsub <16 x float> %a2, %x
define <16 x float> @test_x86_fnmsub_ps_z(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
; ALL-LABEL: test_x86_fnmsub_ps_z:
; ALL: ## %bb.0:
-; ALL-NEXT: vfnmsub213ps %zmm2, %zmm1, %zmm0
+; ALL-NEXT: vfnmsub213ps {{.*#+}} zmm0 = -(zmm1 * zmm0) - zmm2
; ALL-NEXT: retq
%x = fmul <16 x float> %a0, %a1
%y = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00,
define <8 x double> @test_x86_fmadd_pd_z(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
; ALL-LABEL: test_x86_fmadd_pd_z:
; ALL: ## %bb.0:
-; ALL-NEXT: vfmadd213pd %zmm2, %zmm1, %zmm0
+; ALL-NEXT: vfmadd213pd {{.*#+}} zmm0 = (zmm1 * zmm0) + zmm2
; ALL-NEXT: retq
%x = fmul <8 x double> %a0, %a1
%res = fadd <8 x double> %x, %a2
define <8 x double> @test_x86_fmsub_pd_z(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
; ALL-LABEL: test_x86_fmsub_pd_z:
; ALL: ## %bb.0:
-; ALL-NEXT: vfmsub213pd %zmm2, %zmm1, %zmm0
+; ALL-NEXT: vfmsub213pd {{.*#+}} zmm0 = (zmm1 * zmm0) - zmm2
; ALL-NEXT: retq
%x = fmul <8 x double> %a0, %a1
%res = fsub <8 x double> %x, %a2
define double @test_x86_fmsub_213(double %a0, double %a1, double %a2) {
; ALL-LABEL: test_x86_fmsub_213:
; ALL: ## %bb.0:
-; ALL-NEXT: vfmsub213sd %xmm2, %xmm1, %xmm0
+; ALL-NEXT: vfmsub213sd {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2
; ALL-NEXT: retq
%x = fmul double %a0, %a1
%res = fsub double %x, %a2
define double @test_x86_fmsub_213_m(double %a0, double %a1, double * %a2_ptr) {
; ALL-LABEL: test_x86_fmsub_213_m:
; ALL: ## %bb.0:
-; ALL-NEXT: vfmsub213sd (%rdi), %xmm1, %xmm0
+; ALL-NEXT: vfmsub213sd {{.*#+}} xmm0 = (xmm1 * xmm0) - mem
; ALL-NEXT: retq
%a2 = load double , double *%a2_ptr
%x = fmul double %a0, %a1
define double @test_x86_fmsub_231_m(double %a0, double %a1, double * %a2_ptr) {
; ALL-LABEL: test_x86_fmsub_231_m:
; ALL: ## %bb.0:
-; ALL-NEXT: vfmsub132sd (%rdi), %xmm1, %xmm0
+; ALL-NEXT: vfmsub132sd {{.*#+}} xmm0 = (xmm0 * mem) - xmm1
; ALL-NEXT: retq
%a2 = load double , double *%a2_ptr
%x = fmul double %a0, %a2
define <16 x float> @test231_br(<16 x float> %a1, <16 x float> %a2) nounwind {
; ALL-LABEL: test231_br:
; ALL: ## %bb.0:
-; ALL-NEXT: vfmadd132ps {{.*}}(%rip){1to16}, %zmm1, %zmm0
+; ALL-NEXT: vfmadd132ps {{.*#+}} zmm0 = (zmm0 * mem) + zmm1
; ALL-NEXT: retq
%b1 = fmul <16 x float> %a1, <float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000>
%b2 = fadd <16 x float> %b1, %a2
define <16 x float> @test213_br(<16 x float> %a1, <16 x float> %a2) nounwind {
; ALL-LABEL: test213_br:
; ALL: ## %bb.0:
-; ALL-NEXT: vfmadd213ps {{.*}}(%rip){1to16}, %zmm1, %zmm0
+; ALL-NEXT: vfmadd213ps {{.*#+}} zmm0 = (zmm1 * zmm0) + mem
; ALL-NEXT: retq
%b1 = fmul <16 x float> %a1, %a2
%b2 = fadd <16 x float> %b1, <float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000>
define <16 x float> @test_x86_fmadd132_ps(<16 x float> %a0, <16 x float> %a1, <16 x float> *%a2_ptrt, <16 x i1> %mask) {
; KNL-LABEL: test_x86_fmadd132_ps:
; KNL: ## %bb.0:
-; KNL-NEXT: vpmovsxbd %xmm2, %zmm2
+; KNL-NEXT: vpmovzxbd {{.*#+}} zmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero,xmm2[2],zero,zero,zero,xmm2[3],zero,zero,zero,xmm2[4],zero,zero,zero,xmm2[5],zero,zero,zero,xmm2[6],zero,zero,zero,xmm2[7],zero,zero,zero,xmm2[8],zero,zero,zero,xmm2[9],zero,zero,zero,xmm2[10],zero,zero,zero,xmm2[11],zero,zero,zero,xmm2[12],zero,zero,zero,xmm2[13],zero,zero,zero,xmm2[14],zero,zero,zero,xmm2[15],zero,zero,zero
; KNL-NEXT: vpslld $31, %zmm2, %zmm2
; KNL-NEXT: vptestmd %zmm2, %zmm2, %k1
-; KNL-NEXT: vfmadd132ps (%rdi), %zmm1, %zmm0 {%k1}
+; KNL-NEXT: vfmadd132ps {{.*#+}} zmm0 = (zmm0 * mem) + zmm1
; KNL-NEXT: retq
;
; SKX-LABEL: test_x86_fmadd132_ps:
; SKX: ## %bb.0:
; SKX-NEXT: vpsllw $7, %xmm2, %xmm2
; SKX-NEXT: vpmovb2m %xmm2, %k1
-; SKX-NEXT: vfmadd132ps (%rdi), %zmm1, %zmm0 {%k1}
+; SKX-NEXT: vfmadd132ps {{.*#+}} zmm0 = (zmm0 * mem) + zmm1
; SKX-NEXT: retq
%a2 = load <16 x float>,<16 x float> *%a2_ptrt,align 1
%x = fmul <16 x float> %a0, %a2
define <16 x float> @test_x86_fmadd231_ps(<16 x float> %a0, <16 x float> %a1, <16 x float> *%a2_ptrt, <16 x i1> %mask) {
; KNL-LABEL: test_x86_fmadd231_ps:
; KNL: ## %bb.0:
-; KNL-NEXT: vpmovsxbd %xmm2, %zmm2
+; KNL-NEXT: vpmovzxbd {{.*#+}} zmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero,xmm2[2],zero,zero,zero,xmm2[3],zero,zero,zero,xmm2[4],zero,zero,zero,xmm2[5],zero,zero,zero,xmm2[6],zero,zero,zero,xmm2[7],zero,zero,zero,xmm2[8],zero,zero,zero,xmm2[9],zero,zero,zero,xmm2[10],zero,zero,zero,xmm2[11],zero,zero,zero,xmm2[12],zero,zero,zero,xmm2[13],zero,zero,zero,xmm2[14],zero,zero,zero,xmm2[15],zero,zero,zero
; KNL-NEXT: vpslld $31, %zmm2, %zmm2
; KNL-NEXT: vptestmd %zmm2, %zmm2, %k1
-; KNL-NEXT: vfmadd231ps (%rdi), %zmm0, %zmm1 {%k1}
+; KNL-NEXT: vfmadd231ps {{.*#+}} zmm1 = (zmm0 * mem) + zmm1
; KNL-NEXT: vmovaps %zmm1, %zmm0
; KNL-NEXT: retq
;
; SKX: ## %bb.0:
; SKX-NEXT: vpsllw $7, %xmm2, %xmm2
; SKX-NEXT: vpmovb2m %xmm2, %k1
-; SKX-NEXT: vfmadd231ps (%rdi), %zmm0, %zmm1 {%k1}
+; SKX-NEXT: vfmadd231ps {{.*#+}} zmm1 = (zmm0 * mem) + zmm1
; SKX-NEXT: vmovaps %zmm1, %zmm0
; SKX-NEXT: retq
%a2 = load <16 x float>,<16 x float> *%a2_ptrt,align 1
define <16 x float> @test_x86_fmadd213_ps(<16 x float> %a0, <16 x float> %a1, <16 x float> *%a2_ptrt, <16 x i1> %mask) {
; KNL-LABEL: test_x86_fmadd213_ps:
; KNL: ## %bb.0:
-; KNL-NEXT: vpmovsxbd %xmm2, %zmm2
+; KNL-NEXT: vpmovzxbd {{.*#+}} zmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero,xmm2[2],zero,zero,zero,xmm2[3],zero,zero,zero,xmm2[4],zero,zero,zero,xmm2[5],zero,zero,zero,xmm2[6],zero,zero,zero,xmm2[7],zero,zero,zero,xmm2[8],zero,zero,zero,xmm2[9],zero,zero,zero,xmm2[10],zero,zero,zero,xmm2[11],zero,zero,zero,xmm2[12],zero,zero,zero,xmm2[13],zero,zero,zero,xmm2[14],zero,zero,zero,xmm2[15],zero,zero,zero
; KNL-NEXT: vpslld $31, %zmm2, %zmm2
; KNL-NEXT: vptestmd %zmm2, %zmm2, %k1
-; KNL-NEXT: vfmadd213ps (%rdi), %zmm0, %zmm1 {%k1}
+; KNL-NEXT: vfmadd213ps {{.*#+}} zmm1 = (zmm0 * zmm1) + mem
; KNL-NEXT: vmovaps %zmm1, %zmm0
; KNL-NEXT: retq
;
; SKX: ## %bb.0:
; SKX-NEXT: vpsllw $7, %xmm2, %xmm2
; SKX-NEXT: vpmovb2m %xmm2, %k1
-; SKX-NEXT: vfmadd213ps (%rdi), %zmm0, %zmm1 {%k1}
+; SKX-NEXT: vfmadd213ps {{.*#+}} zmm1 = (zmm0 * zmm1) + mem
; SKX-NEXT: vmovaps %zmm1, %zmm0
; SKX-NEXT: retq
%a2 = load <16 x float>,<16 x float> *%a2_ptrt,align 1
; KNL-NEXT: testb %dil, %dil
; KNL-NEXT: vmovdqa %ymm0, (%rsp)
; KNL-NEXT: setne (%rsp,%rsi)
-; KNL-NEXT: vpmovsxbd (%rsp), %zmm0
+; KNL-NEXT: vpmovzxbd {{.*#+}} zmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero,mem[8],zero,zero,zero,mem[9],zero,zero,zero,mem[10],zero,zero,zero,mem[11],zero,zero,zero,mem[12],zero,zero,zero,mem[13],zero,zero,zero,mem[14],zero,zero,zero,mem[15],zero,zero,zero
; KNL-NEXT: vpslld $31, %zmm0, %zmm0
; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
; KNL-NEXT: kmovw %k0, %ecx
-; KNL-NEXT: vpmovsxbd {{[0-9]+}}(%rsp), %zmm0
+; KNL-NEXT: vpmovzxbd {{.*#+}} zmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero,mem[8],zero,zero,zero,mem[9],zero,zero,zero,mem[10],zero,zero,zero,mem[11],zero,zero,zero,mem[12],zero,zero,zero,mem[13],zero,zero,zero,mem[14],zero,zero,zero,mem[15],zero,zero,zero
; KNL-NEXT: vpslld $31, %zmm0, %zmm0
; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
; KNL-NEXT: kmovw %k0, %eax
; KNL-NEXT: setne (%rsp,%rsi)
; KNL-NEXT: vmovdqa (%rsp), %ymm0
; KNL-NEXT: vmovdqa {{[0-9]+}}(%rsp), %ymm1
-; KNL-NEXT: vpmovsxbd %xmm0, %zmm2
+; KNL-NEXT: vpmovzxbd {{.*#+}} zmm2 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
; KNL-NEXT: vpslld $31, %zmm2, %zmm2
; KNL-NEXT: vptestmd %zmm2, %zmm2, %k0
; KNL-NEXT: kmovw %k0, %eax
; KNL-NEXT: vextracti128 $1, %ymm0, %xmm0
-; KNL-NEXT: vpmovsxbd %xmm0, %zmm0
+; KNL-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
; KNL-NEXT: vpslld $31, %zmm0, %zmm0
; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
; KNL-NEXT: kmovw %k0, %ecx
; KNL-NEXT: shll $16, %ecx
; KNL-NEXT: orl %eax, %ecx
-; KNL-NEXT: vpmovsxbd %xmm1, %zmm0
+; KNL-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero
; KNL-NEXT: vpslld $31, %zmm0, %zmm0
; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
; KNL-NEXT: kmovw %k0, %edx
; KNL-NEXT: vextracti128 $1, %ymm1, %xmm0
-; KNL-NEXT: vpmovsxbd %xmm0, %zmm0
+; KNL-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
; KNL-NEXT: vpslld $31, %zmm0, %zmm0
; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
; KNL-NEXT: kmovw %k0, %eax
; KNL-NEXT: vmovdqa {{[0-9]+}}(%rsp), %ymm3
; KNL-NEXT: vmovdqa {{[0-9]+}}(%rsp), %ymm1
; KNL-NEXT: vmovdqa {{[0-9]+}}(%rsp), %ymm0
-; KNL-NEXT: vpmovsxbd %xmm2, %zmm4
+; KNL-NEXT: vpmovzxbd {{.*#+}} zmm4 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero,xmm2[2],zero,zero,zero,xmm2[3],zero,zero,zero,xmm2[4],zero,zero,zero,xmm2[5],zero,zero,zero,xmm2[6],zero,zero,zero,xmm2[7],zero,zero,zero,xmm2[8],zero,zero,zero,xmm2[9],zero,zero,zero,xmm2[10],zero,zero,zero,xmm2[11],zero,zero,zero,xmm2[12],zero,zero,zero,xmm2[13],zero,zero,zero,xmm2[14],zero,zero,zero,xmm2[15],zero,zero,zero
; KNL-NEXT: vpslld $31, %zmm4, %zmm4
; KNL-NEXT: vptestmd %zmm4, %zmm4, %k0
; KNL-NEXT: kmovw %k0, %eax
; KNL-NEXT: vextracti128 $1, %ymm2, %xmm2
-; KNL-NEXT: vpmovsxbd %xmm2, %zmm2
+; KNL-NEXT: vpmovzxbd {{.*#+}} zmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero,xmm2[2],zero,zero,zero,xmm2[3],zero,zero,zero,xmm2[4],zero,zero,zero,xmm2[5],zero,zero,zero,xmm2[6],zero,zero,zero,xmm2[7],zero,zero,zero,xmm2[8],zero,zero,zero,xmm2[9],zero,zero,zero,xmm2[10],zero,zero,zero,xmm2[11],zero,zero,zero,xmm2[12],zero,zero,zero,xmm2[13],zero,zero,zero,xmm2[14],zero,zero,zero,xmm2[15],zero,zero,zero
; KNL-NEXT: vpslld $31, %zmm2, %zmm2
; KNL-NEXT: vptestmd %zmm2, %zmm2, %k0
; KNL-NEXT: kmovw %k0, %ecx
; KNL-NEXT: shll $16, %ecx
; KNL-NEXT: orl %eax, %ecx
-; KNL-NEXT: vpmovsxbd %xmm3, %zmm2
+; KNL-NEXT: vpmovzxbd {{.*#+}} zmm2 = xmm3[0],zero,zero,zero,xmm3[1],zero,zero,zero,xmm3[2],zero,zero,zero,xmm3[3],zero,zero,zero,xmm3[4],zero,zero,zero,xmm3[5],zero,zero,zero,xmm3[6],zero,zero,zero,xmm3[7],zero,zero,zero,xmm3[8],zero,zero,zero,xmm3[9],zero,zero,zero,xmm3[10],zero,zero,zero,xmm3[11],zero,zero,zero,xmm3[12],zero,zero,zero,xmm3[13],zero,zero,zero,xmm3[14],zero,zero,zero,xmm3[15],zero,zero,zero
; KNL-NEXT: vpslld $31, %zmm2, %zmm2
; KNL-NEXT: vptestmd %zmm2, %zmm2, %k0
; KNL-NEXT: kmovw %k0, %edx
; KNL-NEXT: vextracti128 $1, %ymm3, %xmm2
-; KNL-NEXT: vpmovsxbd %xmm2, %zmm2
+; KNL-NEXT: vpmovzxbd {{.*#+}} zmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero,xmm2[2],zero,zero,zero,xmm2[3],zero,zero,zero,xmm2[4],zero,zero,zero,xmm2[5],zero,zero,zero,xmm2[6],zero,zero,zero,xmm2[7],zero,zero,zero,xmm2[8],zero,zero,zero,xmm2[9],zero,zero,zero,xmm2[10],zero,zero,zero,xmm2[11],zero,zero,zero,xmm2[12],zero,zero,zero,xmm2[13],zero,zero,zero,xmm2[14],zero,zero,zero,xmm2[15],zero,zero,zero
; KNL-NEXT: vpslld $31, %zmm2, %zmm2
; KNL-NEXT: vptestmd %zmm2, %zmm2, %k0
; KNL-NEXT: kmovw %k0, %eax
; KNL-NEXT: orl %edx, %eax
; KNL-NEXT: shlq $32, %rax
; KNL-NEXT: orq %rcx, %rax
-; KNL-NEXT: vpmovsxbd %xmm1, %zmm2
+; KNL-NEXT: vpmovzxbd {{.*#+}} zmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero
; KNL-NEXT: vpslld $31, %zmm2, %zmm2
; KNL-NEXT: vptestmd %zmm2, %zmm2, %k0
; KNL-NEXT: kmovw %k0, %ecx
; KNL-NEXT: vextracti128 $1, %ymm1, %xmm1
-; KNL-NEXT: vpmovsxbd %xmm1, %zmm1
+; KNL-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero
; KNL-NEXT: vpslld $31, %zmm1, %zmm1
; KNL-NEXT: vptestmd %zmm1, %zmm1, %k0
; KNL-NEXT: kmovw %k0, %esi
; KNL-NEXT: shll $16, %esi
; KNL-NEXT: orl %ecx, %esi
-; KNL-NEXT: vpmovsxbd %xmm0, %zmm1
+; KNL-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
; KNL-NEXT: vpslld $31, %zmm1, %zmm1
; KNL-NEXT: vptestmd %zmm1, %zmm1, %k0
; KNL-NEXT: kmovw %k0, %ecx
; KNL-NEXT: vextracti128 $1, %ymm0, %xmm0
-; KNL-NEXT: vpmovsxbd %xmm0, %zmm0
+; KNL-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
; KNL-NEXT: vpslld $31, %zmm0, %zmm0
; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
; KNL-NEXT: kmovw %k0, %edx
; KNL-NEXT: vmovdqa {{[0-9]+}}(%rsp), %ymm3
; KNL-NEXT: vmovdqa {{[0-9]+}}(%rsp), %ymm1
; KNL-NEXT: vmovdqa {{[0-9]+}}(%rsp), %ymm0
-; KNL-NEXT: vpmovsxbd %xmm2, %zmm4
+; KNL-NEXT: vpmovzxbd {{.*#+}} zmm4 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero,xmm2[2],zero,zero,zero,xmm2[3],zero,zero,zero,xmm2[4],zero,zero,zero,xmm2[5],zero,zero,zero,xmm2[6],zero,zero,zero,xmm2[7],zero,zero,zero,xmm2[8],zero,zero,zero,xmm2[9],zero,zero,zero,xmm2[10],zero,zero,zero,xmm2[11],zero,zero,zero,xmm2[12],zero,zero,zero,xmm2[13],zero,zero,zero,xmm2[14],zero,zero,zero,xmm2[15],zero,zero,zero
; KNL-NEXT: vpslld $31, %zmm4, %zmm4
; KNL-NEXT: vptestmd %zmm4, %zmm4, %k0
; KNL-NEXT: kmovw %k0, %eax
; KNL-NEXT: vextracti128 $1, %ymm2, %xmm2
-; KNL-NEXT: vpmovsxbd %xmm2, %zmm2
+; KNL-NEXT: vpmovzxbd {{.*#+}} zmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero,xmm2[2],zero,zero,zero,xmm2[3],zero,zero,zero,xmm2[4],zero,zero,zero,xmm2[5],zero,zero,zero,xmm2[6],zero,zero,zero,xmm2[7],zero,zero,zero,xmm2[8],zero,zero,zero,xmm2[9],zero,zero,zero,xmm2[10],zero,zero,zero,xmm2[11],zero,zero,zero,xmm2[12],zero,zero,zero,xmm2[13],zero,zero,zero,xmm2[14],zero,zero,zero,xmm2[15],zero,zero,zero
; KNL-NEXT: vpslld $31, %zmm2, %zmm2
; KNL-NEXT: vptestmd %zmm2, %zmm2, %k0
; KNL-NEXT: kmovw %k0, %ecx
; KNL-NEXT: shll $16, %ecx
; KNL-NEXT: orl %eax, %ecx
-; KNL-NEXT: vpmovsxbd %xmm3, %zmm2
+; KNL-NEXT: vpmovzxbd {{.*#+}} zmm2 = xmm3[0],zero,zero,zero,xmm3[1],zero,zero,zero,xmm3[2],zero,zero,zero,xmm3[3],zero,zero,zero,xmm3[4],zero,zero,zero,xmm3[5],zero,zero,zero,xmm3[6],zero,zero,zero,xmm3[7],zero,zero,zero,xmm3[8],zero,zero,zero,xmm3[9],zero,zero,zero,xmm3[10],zero,zero,zero,xmm3[11],zero,zero,zero,xmm3[12],zero,zero,zero,xmm3[13],zero,zero,zero,xmm3[14],zero,zero,zero,xmm3[15],zero,zero,zero
; KNL-NEXT: vpslld $31, %zmm2, %zmm2
; KNL-NEXT: vptestmd %zmm2, %zmm2, %k0
; KNL-NEXT: kmovw %k0, %edx
; KNL-NEXT: vextracti128 $1, %ymm3, %xmm2
-; KNL-NEXT: vpmovsxbd %xmm2, %zmm2
+; KNL-NEXT: vpmovzxbd {{.*#+}} zmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero,xmm2[2],zero,zero,zero,xmm2[3],zero,zero,zero,xmm2[4],zero,zero,zero,xmm2[5],zero,zero,zero,xmm2[6],zero,zero,zero,xmm2[7],zero,zero,zero,xmm2[8],zero,zero,zero,xmm2[9],zero,zero,zero,xmm2[10],zero,zero,zero,xmm2[11],zero,zero,zero,xmm2[12],zero,zero,zero,xmm2[13],zero,zero,zero,xmm2[14],zero,zero,zero,xmm2[15],zero,zero,zero
; KNL-NEXT: vpslld $31, %zmm2, %zmm2
; KNL-NEXT: vptestmd %zmm2, %zmm2, %k0
; KNL-NEXT: kmovw %k0, %eax
; KNL-NEXT: orl %edx, %eax
; KNL-NEXT: shlq $32, %rax
; KNL-NEXT: orq %rcx, %rax
-; KNL-NEXT: vpmovsxbd %xmm1, %zmm2
+; KNL-NEXT: vpmovzxbd {{.*#+}} zmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero
; KNL-NEXT: vpslld $31, %zmm2, %zmm2
; KNL-NEXT: vptestmd %zmm2, %zmm2, %k0
; KNL-NEXT: kmovw %k0, %ecx
; KNL-NEXT: vextracti128 $1, %ymm1, %xmm1
-; KNL-NEXT: vpmovsxbd %xmm1, %zmm1
+; KNL-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero
; KNL-NEXT: vpslld $31, %zmm1, %zmm1
; KNL-NEXT: vptestmd %zmm1, %zmm1, %k0
; KNL-NEXT: kmovw %k0, %esi
; KNL-NEXT: shll $16, %esi
; KNL-NEXT: orl %ecx, %esi
-; KNL-NEXT: vpmovsxbd %xmm0, %zmm1
+; KNL-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
; KNL-NEXT: vpslld $31, %zmm1, %zmm1
; KNL-NEXT: vptestmd %zmm1, %zmm1, %k0
; KNL-NEXT: kmovw %k0, %ecx
; KNL-NEXT: vextracti128 $1, %ymm0, %xmm0
-; KNL-NEXT: vpmovsxbd %xmm0, %zmm0
+; KNL-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
; KNL-NEXT: vpslld $31, %zmm0, %zmm0
; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
; KNL-NEXT: kmovw %k0, %edx
define void @test7(<8 x i1> %mask) {
; KNL-LABEL: test7:
; KNL: ## %bb.0: ## %allocas
-; KNL-NEXT: vpmovsxwq %xmm0, %zmm0
+; KNL-NEXT: vpmovzxwq {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
; KNL-NEXT: vpsllq $63, %zmm0, %zmm0
; KNL-NEXT: vptestmq %zmm0, %zmm0, %k0
; KNL-NEXT: kmovw %k0, %eax
;
; AVX512DQ-LABEL: test7:
; AVX512DQ: ## %bb.0: ## %allocas
-; AVX512DQ-NEXT: vpmovsxwq %xmm0, %zmm0
+; AVX512DQ-NEXT: vpmovzxwq {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
; AVX512DQ-NEXT: vpsllq $63, %zmm0, %zmm0
; AVX512DQ-NEXT: vpmovq2m %zmm0, %k0
; AVX512DQ-NEXT: kmovw %k0, %eax
; KNL-NEXT: cmpl %esi, %edi
; KNL-NEXT: jg LBB18_1
; KNL-NEXT: ## %bb.2:
-; KNL-NEXT: vpmovsxbd %xmm1, %zmm0
+; KNL-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero
; KNL-NEXT: jmp LBB18_3
; KNL-NEXT: LBB18_1:
-; KNL-NEXT: vpmovsxbd %xmm0, %zmm0
+; KNL-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
; KNL-NEXT: LBB18_3:
; KNL-NEXT: vpslld $31, %zmm0, %zmm0
; KNL-NEXT: vptestmd %zmm0, %zmm0, %k1
; AVX512DQ-NEXT: cmpl %esi, %edi
; AVX512DQ-NEXT: jg LBB18_1
; AVX512DQ-NEXT: ## %bb.2:
-; AVX512DQ-NEXT: vpmovsxbd %xmm1, %zmm0
+; AVX512DQ-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero
; AVX512DQ-NEXT: jmp LBB18_3
; AVX512DQ-NEXT: LBB18_1:
-; AVX512DQ-NEXT: vpmovsxbd %xmm0, %zmm0
+; AVX512DQ-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
; AVX512DQ-NEXT: LBB18_3:
; AVX512DQ-NEXT: vpslld $31, %zmm0, %zmm0
; AVX512DQ-NEXT: vpmovd2m %zmm0, %k0
define void @store_v8i1(<8 x i1> %c , <8 x i1>* %ptr) {
; KNL-LABEL: store_v8i1:
; KNL: ## %bb.0:
-; KNL-NEXT: vpmovsxwq %xmm0, %zmm0
+; KNL-NEXT: vpmovzxwq {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
; KNL-NEXT: vpsllq $63, %zmm0, %zmm0
; KNL-NEXT: vptestnmq %zmm0, %zmm0, %k0
; KNL-NEXT: kmovw %k0, %eax
;
; AVX512DQ-LABEL: store_v8i1:
; AVX512DQ: ## %bb.0:
-; AVX512DQ-NEXT: vpmovsxwq %xmm0, %zmm0
+; AVX512DQ-NEXT: vpmovzxwq {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
; AVX512DQ-NEXT: vpsllq $63, %zmm0, %zmm0
; AVX512DQ-NEXT: vpmovq2m %zmm0, %k0
; AVX512DQ-NEXT: knotb %k0, %k0
define void @store_v16i1(<16 x i1> %c , <16 x i1>* %ptr) {
; KNL-LABEL: store_v16i1:
; KNL: ## %bb.0:
-; KNL-NEXT: vpmovsxbd %xmm0, %zmm0
+; KNL-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
; KNL-NEXT: vpslld $31, %zmm0, %zmm0
; KNL-NEXT: vptestnmd %zmm0, %zmm0, %k0
; KNL-NEXT: kmovw %k0, (%rdi)
;
; AVX512DQ-LABEL: store_v16i1:
; AVX512DQ: ## %bb.0:
-; AVX512DQ-NEXT: vpmovsxbd %xmm0, %zmm0
+; AVX512DQ-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
; AVX512DQ-NEXT: vpslld $31, %zmm0, %zmm0
; AVX512DQ-NEXT: vpmovd2m %zmm0, %k0
; AVX512DQ-NEXT: knotw %k0, %k0
define void @store_8i1(<8 x i1>* %a, <8 x i1> %v) {
; KNL-LABEL: store_8i1:
; KNL: ## %bb.0:
-; KNL-NEXT: vpmovsxwq %xmm0, %zmm0
+; KNL-NEXT: vpmovzxwq {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
; KNL-NEXT: vpsllq $63, %zmm0, %zmm0
; KNL-NEXT: vptestmq %zmm0, %zmm0, %k0
; KNL-NEXT: kmovw %k0, %eax
;
; AVX512DQ-LABEL: store_8i1:
; AVX512DQ: ## %bb.0:
-; AVX512DQ-NEXT: vpmovsxwq %xmm0, %zmm0
+; AVX512DQ-NEXT: vpmovzxwq {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
; AVX512DQ-NEXT: vpsllq $63, %zmm0, %zmm0
; AVX512DQ-NEXT: vpmovq2m %zmm0, %k0
; AVX512DQ-NEXT: kmovb %k0, (%rdi)
define void @store_8i1_1(<8 x i1>* %a, <8 x i16> %v) {
; KNL-LABEL: store_8i1_1:
; KNL: ## %bb.0:
-; KNL-NEXT: vpmovsxwq %xmm0, %zmm0
+; KNL-NEXT: vpmovzxwq {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
; KNL-NEXT: vpsllq $63, %zmm0, %zmm0
; KNL-NEXT: vptestmq %zmm0, %zmm0, %k0
; KNL-NEXT: kmovw %k0, %eax
;
; AVX512DQ-LABEL: store_8i1_1:
; AVX512DQ: ## %bb.0:
-; AVX512DQ-NEXT: vpmovsxwq %xmm0, %zmm0
+; AVX512DQ-NEXT: vpmovzxwq {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
; AVX512DQ-NEXT: vpsllq $63, %zmm0, %zmm0
; AVX512DQ-NEXT: vpmovq2m %zmm0, %k0
; AVX512DQ-NEXT: kmovb %k0, (%rdi)
define void @store_16i1(<16 x i1>* %a, <16 x i1> %v) {
; KNL-LABEL: store_16i1:
; KNL: ## %bb.0:
-; KNL-NEXT: vpmovsxbd %xmm0, %zmm0
+; KNL-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
; KNL-NEXT: vpslld $31, %zmm0, %zmm0
; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
; KNL-NEXT: kmovw %k0, (%rdi)
;
; AVX512DQ-LABEL: store_16i1:
; AVX512DQ: ## %bb.0:
-; AVX512DQ-NEXT: vpmovsxbd %xmm0, %zmm0
+; AVX512DQ-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
; AVX512DQ-NEXT: vpslld $31, %zmm0, %zmm0
; AVX512DQ-NEXT: vpmovd2m %zmm0, %k0
; AVX512DQ-NEXT: kmovw %k0, (%rdi)
define void @store_32i1(<32 x i1>* %a, <32 x i1> %v) {
; KNL-LABEL: store_32i1:
; KNL: ## %bb.0:
-; KNL-NEXT: vpmovsxbd %xmm0, %zmm1
+; KNL-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
; KNL-NEXT: vpslld $31, %zmm1, %zmm1
; KNL-NEXT: vptestmd %zmm1, %zmm1, %k0
; KNL-NEXT: vextracti128 $1, %ymm0, %xmm0
-; KNL-NEXT: vpmovsxbd %xmm0, %zmm0
+; KNL-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
; KNL-NEXT: vpslld $31, %zmm0, %zmm0
; KNL-NEXT: vptestmd %zmm0, %zmm0, %k1
; KNL-NEXT: kmovw %k1, 2(%rdi)
;
; AVX512DQ-LABEL: store_32i1:
; AVX512DQ: ## %bb.0:
-; AVX512DQ-NEXT: vpmovsxbd %xmm0, %zmm1
+; AVX512DQ-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
; AVX512DQ-NEXT: vpslld $31, %zmm1, %zmm1
; AVX512DQ-NEXT: vpmovd2m %zmm1, %k0
; AVX512DQ-NEXT: vextracti128 $1, %ymm0, %xmm0
-; AVX512DQ-NEXT: vpmovsxbd %xmm0, %zmm0
+; AVX512DQ-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
; AVX512DQ-NEXT: vpslld $31, %zmm0, %zmm0
; AVX512DQ-NEXT: vpmovd2m %zmm0, %k1
; AVX512DQ-NEXT: kmovw %k1, 2(%rdi)
define void @store_32i1_1(<32 x i1>* %a, <32 x i16> %v) {
; KNL-LABEL: store_32i1_1:
; KNL: ## %bb.0:
-; KNL-NEXT: vpmovsxwd %ymm0, %zmm0
+; KNL-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
; KNL-NEXT: vpslld $31, %zmm0, %zmm0
; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
-; KNL-NEXT: vpmovsxwd %ymm1, %zmm0
+; KNL-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero
; KNL-NEXT: vpslld $31, %zmm0, %zmm0
; KNL-NEXT: vptestmd %zmm0, %zmm0, %k1
; KNL-NEXT: kmovw %k1, 2(%rdi)
;
; AVX512DQ-LABEL: store_32i1_1:
; AVX512DQ: ## %bb.0:
-; AVX512DQ-NEXT: vpmovsxwd %ymm0, %zmm0
+; AVX512DQ-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
; AVX512DQ-NEXT: vpslld $31, %zmm0, %zmm0
; AVX512DQ-NEXT: vpmovd2m %zmm0, %k0
-; AVX512DQ-NEXT: vpmovsxwd %ymm1, %zmm0
+; AVX512DQ-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero
; AVX512DQ-NEXT: vpslld $31, %zmm0, %zmm0
; AVX512DQ-NEXT: vpmovd2m %zmm0, %k1
; AVX512DQ-NEXT: kmovw %k1, 2(%rdi)
;
; KNL-LABEL: store_64i1:
; KNL: ## %bb.0:
-; KNL-NEXT: vpmovsxbd %xmm0, %zmm0
+; KNL-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
; KNL-NEXT: vpslld $31, %zmm0, %zmm0
; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
-; KNL-NEXT: vpmovsxbd %xmm1, %zmm0
+; KNL-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero
; KNL-NEXT: vpslld $31, %zmm0, %zmm0
; KNL-NEXT: vptestmd %zmm0, %zmm0, %k1
-; KNL-NEXT: vpmovsxbd %xmm2, %zmm0
+; KNL-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero,xmm2[2],zero,zero,zero,xmm2[3],zero,zero,zero,xmm2[4],zero,zero,zero,xmm2[5],zero,zero,zero,xmm2[6],zero,zero,zero,xmm2[7],zero,zero,zero,xmm2[8],zero,zero,zero,xmm2[9],zero,zero,zero,xmm2[10],zero,zero,zero,xmm2[11],zero,zero,zero,xmm2[12],zero,zero,zero,xmm2[13],zero,zero,zero,xmm2[14],zero,zero,zero,xmm2[15],zero,zero,zero
; KNL-NEXT: vpslld $31, %zmm0, %zmm0
; KNL-NEXT: vptestmd %zmm0, %zmm0, %k2
-; KNL-NEXT: vpmovsxbd %xmm3, %zmm0
+; KNL-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm3[0],zero,zero,zero,xmm3[1],zero,zero,zero,xmm3[2],zero,zero,zero,xmm3[3],zero,zero,zero,xmm3[4],zero,zero,zero,xmm3[5],zero,zero,zero,xmm3[6],zero,zero,zero,xmm3[7],zero,zero,zero,xmm3[8],zero,zero,zero,xmm3[9],zero,zero,zero,xmm3[10],zero,zero,zero,xmm3[11],zero,zero,zero,xmm3[12],zero,zero,zero,xmm3[13],zero,zero,zero,xmm3[14],zero,zero,zero,xmm3[15],zero,zero,zero
; KNL-NEXT: vpslld $31, %zmm0, %zmm0
; KNL-NEXT: vptestmd %zmm0, %zmm0, %k3
; KNL-NEXT: kmovw %k3, 6(%rdi)
;
; AVX512DQ-LABEL: store_64i1:
; AVX512DQ: ## %bb.0:
-; AVX512DQ-NEXT: vpmovsxbd %xmm0, %zmm0
+; AVX512DQ-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
; AVX512DQ-NEXT: vpslld $31, %zmm0, %zmm0
; AVX512DQ-NEXT: vpmovd2m %zmm0, %k0
-; AVX512DQ-NEXT: vpmovsxbd %xmm1, %zmm0
+; AVX512DQ-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero
; AVX512DQ-NEXT: vpslld $31, %zmm0, %zmm0
; AVX512DQ-NEXT: vpmovd2m %zmm0, %k1
-; AVX512DQ-NEXT: vpmovsxbd %xmm2, %zmm0
+; AVX512DQ-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero,xmm2[2],zero,zero,zero,xmm2[3],zero,zero,zero,xmm2[4],zero,zero,zero,xmm2[5],zero,zero,zero,xmm2[6],zero,zero,zero,xmm2[7],zero,zero,zero,xmm2[8],zero,zero,zero,xmm2[9],zero,zero,zero,xmm2[10],zero,zero,zero,xmm2[11],zero,zero,zero,xmm2[12],zero,zero,zero,xmm2[13],zero,zero,zero,xmm2[14],zero,zero,zero,xmm2[15],zero,zero,zero
; AVX512DQ-NEXT: vpslld $31, %zmm0, %zmm0
; AVX512DQ-NEXT: vpmovd2m %zmm0, %k2
-; AVX512DQ-NEXT: vpmovsxbd %xmm3, %zmm0
+; AVX512DQ-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm3[0],zero,zero,zero,xmm3[1],zero,zero,zero,xmm3[2],zero,zero,zero,xmm3[3],zero,zero,zero,xmm3[4],zero,zero,zero,xmm3[5],zero,zero,zero,xmm3[6],zero,zero,zero,xmm3[7],zero,zero,zero,xmm3[8],zero,zero,zero,xmm3[9],zero,zero,zero,xmm3[10],zero,zero,zero,xmm3[11],zero,zero,zero,xmm3[12],zero,zero,zero,xmm3[13],zero,zero,zero,xmm3[14],zero,zero,zero,xmm3[15],zero,zero,zero
; AVX512DQ-NEXT: vpslld $31, %zmm0, %zmm0
; AVX512DQ-NEXT: vpmovd2m %zmm0, %k3
; AVX512DQ-NEXT: kmovw %k3, 6(%rdi)
define <16 x %mystruct*> @test24(<16 x i1> %mask, <16 x %mystruct*>* %addr) {
; AVX512F-LABEL: test24:
; AVX512F: ## %bb.0:
-; AVX512F-NEXT: vpmovsxbd %xmm0, %zmm0
+; AVX512F-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
; AVX512F-NEXT: vpslld $31, %zmm0, %zmm0
; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k1
; AVX512F-NEXT: vmovdqu64 (%rdi), %zmm0 {%k1} {z}
define void @test_store_16i64(<16 x i64>* %ptrs, <16 x i1> %mask, <16 x i64> %src0) {
; AVX512F-LABEL: test_store_16i64:
; AVX512F: ## %bb.0:
-; AVX512F-NEXT: vpmovsxbd %xmm0, %zmm0
+; AVX512F-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
; AVX512F-NEXT: vpslld $31, %zmm0, %zmm0
; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k1
; AVX512F-NEXT: vmovdqu64 %zmm1, (%rdi) {%k1}
define void @test_store_16f64(<16 x double>* %ptrs, <16 x i1> %mask, <16 x double> %src0) {
; AVX512F-LABEL: test_store_16f64:
; AVX512F: ## %bb.0:
-; AVX512F-NEXT: vpmovsxbd %xmm0, %zmm0
+; AVX512F-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
; AVX512F-NEXT: vpslld $31, %zmm0, %zmm0
; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k1
; AVX512F-NEXT: vmovupd %zmm1, (%rdi) {%k1}
define <16 x i64> @test_load_16i64(<16 x i64>* %ptrs, <16 x i1> %mask, <16 x i64> %src0) {
; AVX512F-LABEL: test_load_16i64:
; AVX512F: ## %bb.0:
-; AVX512F-NEXT: vpmovsxbd %xmm0, %zmm0
+; AVX512F-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
; AVX512F-NEXT: vpslld $31, %zmm0, %zmm0
; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k1
; AVX512F-NEXT: vpblendmq (%rdi), %zmm1, %zmm0 {%k1}
define <16 x double> @test_load_16f64(<16 x double>* %ptrs, <16 x i1> %mask, <16 x double> %src0) {
; AVX512F-LABEL: test_load_16f64:
; AVX512F: ## %bb.0:
-; AVX512F-NEXT: vpmovsxbd %xmm0, %zmm0
+; AVX512F-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
; AVX512F-NEXT: vpslld $31, %zmm0, %zmm0
; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k1
; AVX512F-NEXT: vblendmpd (%rdi), %zmm1, %zmm0 {%k1}
; AVX512F-LABEL: test_load_32f64:
; AVX512F: ## %bb.0:
; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm5
-; AVX512F-NEXT: vpmovsxbd %xmm5, %zmm5
+; AVX512F-NEXT: vpmovzxbd {{.*#+}} zmm5 = xmm5[0],zero,zero,zero,xmm5[1],zero,zero,zero,xmm5[2],zero,zero,zero,xmm5[3],zero,zero,zero,xmm5[4],zero,zero,zero,xmm5[5],zero,zero,zero,xmm5[6],zero,zero,zero,xmm5[7],zero,zero,zero,xmm5[8],zero,zero,zero,xmm5[9],zero,zero,zero,xmm5[10],zero,zero,zero,xmm5[11],zero,zero,zero,xmm5[12],zero,zero,zero,xmm5[13],zero,zero,zero,xmm5[14],zero,zero,zero,xmm5[15],zero,zero,zero
; AVX512F-NEXT: vpslld $31, %zmm5, %zmm5
; AVX512F-NEXT: vptestmd %zmm5, %zmm5, %k1
-; AVX512F-NEXT: vpmovsxbd %xmm0, %zmm0
+; AVX512F-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
; AVX512F-NEXT: vpslld $31, %zmm0, %zmm0
; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k2
; AVX512F-NEXT: vblendmpd (%rdi), %zmm1, %zmm0 {%k2}
define <16 x i16> @pr31515(<16 x i1> %a, <16 x i1> %b, <16 x i16> %c) nounwind {
; X86-LABEL: pr31515:
; X86: # %bb.0:
-; X86-NEXT: vpmovsxbd %xmm1, %zmm1
+; X86-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero
; X86-NEXT: vpslld $31, %zmm1, %zmm1
-; X86-NEXT: vpmovsxbd %xmm0, %zmm0
+; X86-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
; X86-NEXT: vpslld $31, %zmm0, %zmm0
; X86-NEXT: vptestmd %zmm0, %zmm0, %k1
; X86-NEXT: vptestmd %zmm1, %zmm1, %k1 {%k1}
;
; X64-LABEL: pr31515:
; X64: # %bb.0:
-; X64-NEXT: vpmovsxbd %xmm1, %zmm1
+; X64-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero
; X64-NEXT: vpslld $31, %zmm1, %zmm1
-; X64-NEXT: vpmovsxbd %xmm0, %zmm0
+; X64-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
; X64-NEXT: vpslld $31, %zmm0, %zmm0
; X64-NEXT: vptestmd %zmm0, %zmm0, %k1
; X64-NEXT: vptestmd %zmm1, %zmm1, %k1 {%k1}
define void @trunc_wb_256_mem(<16 x i16> %i, <16 x i8>* %res) #0 {
; KNL-LABEL: trunc_wb_256_mem:
; KNL: ## %bb.0:
-; KNL-NEXT: vpmovsxwd %ymm0, %zmm0
+; KNL-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
; KNL-NEXT: vpmovdb %zmm0, (%rdi)
; KNL-NEXT: vzeroupper
; KNL-NEXT: retq
; KNL-LABEL: usat_trunc_wb_256_mem:
; KNL: ## %bb.0:
; KNL-NEXT: vpminuw {{.*}}(%rip), %ymm0, %ymm0
-; KNL-NEXT: vpmovsxwd %ymm0, %zmm0
+; KNL-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
; KNL-NEXT: vpmovdb %zmm0, (%rdi)
; KNL-NEXT: vzeroupper
; KNL-NEXT: retq
; KNL-NEXT: vpxor %xmm1, %xmm1, %xmm1
; KNL-NEXT: vpmaxsw %ymm1, %ymm0, %ymm0
; KNL-NEXT: vpminsw {{.*}}(%rip), %ymm0, %ymm0
-; KNL-NEXT: vpmovsxwd %ymm0, %zmm0
+; KNL-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
; KNL-NEXT: vpmovdb %zmm0, (%rdi)
; KNL-NEXT: vzeroupper
; KNL-NEXT: retq
; KNL-NEXT: vpminsw {{.*}}(%rip), %ymm0, %ymm0
; KNL-NEXT: vpxor %xmm1, %xmm1, %xmm1
; KNL-NEXT: vpmaxsw %ymm1, %ymm0, %ymm0
-; KNL-NEXT: vpmovsxwd %ymm0, %zmm0
+; KNL-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
; KNL-NEXT: vpmovdb %zmm0, (%rdi)
; KNL-NEXT: vzeroupper
; KNL-NEXT: retq
; KNL-NEXT: vpmaxsw %ymm1, %ymm0, %ymm0
; KNL-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
; KNL-NEXT: vpminsw %ymm1, %ymm0, %ymm0
-; KNL-NEXT: vpmovsxwd %ymm0, %zmm0
+; KNL-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
; KNL-NEXT: vpmovdb %zmm0, (%rdi)
; KNL-NEXT: vzeroupper
; KNL-NEXT: retq
; KNL: ## %bb.0:
; KNL-NEXT: vpmaxsw {{.*}}(%rip), %ymm0, %ymm0
; KNL-NEXT: vpminsw {{.*}}(%rip), %ymm0, %ymm0
-; KNL-NEXT: vpmovsxwd %ymm0, %zmm0
+; KNL-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
; KNL-NEXT: vpmovdb %zmm0, (%rdi)
; KNL-NEXT: vzeroupper
; KNL-NEXT: retq
define <8 x double> @test43(<8 x double> %x, <8 x double> %x1, double* %ptr,<8 x i1> %mask_in) nounwind {
; KNL-LABEL: test43:
; KNL: ## %bb.0:
-; KNL-NEXT: vpmovsxwq %xmm2, %zmm2 ## encoding: [0x62,0xf2,0x7d,0x48,0x24,0xd2]
+; KNL-NEXT: vpmovzxwq %xmm2, %zmm2 ## encoding: [0x62,0xf2,0x7d,0x48,0x34,0xd2]
+; KNL-NEXT: ## zmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero,xmm2[2],zero,zero,zero,xmm2[3],zero,zero,zero,xmm2[4],zero,zero,zero,xmm2[5],zero,zero,zero,xmm2[6],zero,zero,zero,xmm2[7],zero,zero,zero
; KNL-NEXT: vpsllq $63, %zmm2, %zmm2 ## encoding: [0x62,0xf1,0xed,0x48,0x73,0xf2,0x3f]
; KNL-NEXT: vptestmq %zmm2, %zmm2, %k1 ## encoding: [0x62,0xf2,0xed,0x48,0x27,0xca]
; KNL-NEXT: vcmpltpd (%rdi){1to8}, %zmm0, %k1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x59,0xc2,0x0f,0x01]
;
; KNL-LABEL: test3:
; KNL: # %bb.0:
-; KNL-NEXT: vpmovsxwq %xmm1, %zmm1
+; KNL-NEXT: vpmovzxwq {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero
; KNL-NEXT: vpsllq $63, %zmm1, %zmm1
; KNL-NEXT: vptestmq %zmm1, %zmm1, %k1
; KNL-NEXT: vexpandpd (%rdi), %zmm0 {%k1}
; KNL-LABEL: test7:
; KNL: # %bb.0:
; KNL-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
-; KNL-NEXT: vpmovsxwq %xmm1, %zmm1
+; KNL-NEXT: vpmovzxwq {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero
; KNL-NEXT: vpsllq $63, %zmm1, %zmm1
; KNL-NEXT: vptestmq %zmm1, %zmm1, %k1
; KNL-NEXT: vcompressps %zmm0, (%rdi) {%k1}
;
; KNL-LABEL: test8:
; KNL: # %bb.0:
-; KNL-NEXT: vpmovsxwq %xmm1, %zmm1
+; KNL-NEXT: vpmovzxwq {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero
; KNL-NEXT: vpsllq $63, %zmm1, %zmm1
; KNL-NEXT: vptestmq %zmm1, %zmm1, %k1
; KNL-NEXT: vcompresspd %zmm0, (%rdi) {%k1}
;
; KNL-LABEL: test9:
; KNL: # %bb.0:
-; KNL-NEXT: vpmovsxwq %xmm1, %zmm1
+; KNL-NEXT: vpmovzxwq {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero
; KNL-NEXT: vpsllq $63, %zmm1, %zmm1
; KNL-NEXT: vptestmq %zmm1, %zmm1, %k1
; KNL-NEXT: vpcompressq %zmm0, (%rdi) {%k1}
;
; KNL-LABEL: test18:
; KNL: # %bb.0:
-; KNL-NEXT: vpmovsxbd %xmm2, %zmm2
+; KNL-NEXT: vpmovzxbd {{.*#+}} zmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero,xmm2[2],zero,zero,zero,xmm2[3],zero,zero,zero,xmm2[4],zero,zero,zero,xmm2[5],zero,zero,zero,xmm2[6],zero,zero,zero,xmm2[7],zero,zero,zero,xmm2[8],zero,zero,zero,xmm2[9],zero,zero,zero,xmm2[10],zero,zero,zero,xmm2[11],zero,zero,zero,xmm2[12],zero,zero,zero,xmm2[13],zero,zero,zero,xmm2[14],zero,zero,zero,xmm2[15],zero,zero,zero
; KNL-NEXT: vpslld $31, %zmm2, %zmm2
; KNL-NEXT: vptestmd %zmm2, %zmm2, %k1
; KNL-NEXT: kshiftrw $8, %k1, %k2
define <16 x i32> @test_gather_16i32(<16 x i32*> %ptrs, <16 x i1> %mask, <16 x i32> %src0) {
; KNL_64-LABEL: test_gather_16i32:
; KNL_64: # %bb.0:
-; KNL_64-NEXT: vpmovsxbd %xmm2, %zmm2
+; KNL_64-NEXT: vpmovzxbd {{.*#+}} zmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero,xmm2[2],zero,zero,zero,xmm2[3],zero,zero,zero,xmm2[4],zero,zero,zero,xmm2[5],zero,zero,zero,xmm2[6],zero,zero,zero,xmm2[7],zero,zero,zero,xmm2[8],zero,zero,zero,xmm2[9],zero,zero,zero,xmm2[10],zero,zero,zero,xmm2[11],zero,zero,zero,xmm2[12],zero,zero,zero,xmm2[13],zero,zero,zero,xmm2[14],zero,zero,zero,xmm2[15],zero,zero,zero
; KNL_64-NEXT: vpslld $31, %zmm2, %zmm2
; KNL_64-NEXT: vptestmd %zmm2, %zmm2, %k1
; KNL_64-NEXT: vextracti64x4 $1, %zmm3, %ymm2
;
; KNL_32-LABEL: test_gather_16i32:
; KNL_32: # %bb.0:
-; KNL_32-NEXT: vpmovsxbd %xmm1, %zmm1
+; KNL_32-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero
; KNL_32-NEXT: vpslld $31, %zmm1, %zmm1
; KNL_32-NEXT: vptestmd %zmm1, %zmm1, %k1
; KNL_32-NEXT: vpgatherdd (,%zmm0), %zmm2 {%k1}
;
; SKX-LABEL: test_gather_16i32:
; SKX: # %bb.0:
-; SKX-NEXT: vpmovsxbd %xmm2, %zmm2
+; SKX-NEXT: vpmovzxbd {{.*#+}} zmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero,xmm2[2],zero,zero,zero,xmm2[3],zero,zero,zero,xmm2[4],zero,zero,zero,xmm2[5],zero,zero,zero,xmm2[6],zero,zero,zero,xmm2[7],zero,zero,zero,xmm2[8],zero,zero,zero,xmm2[9],zero,zero,zero,xmm2[10],zero,zero,zero,xmm2[11],zero,zero,zero,xmm2[12],zero,zero,zero,xmm2[13],zero,zero,zero,xmm2[14],zero,zero,zero,xmm2[15],zero,zero,zero
; SKX-NEXT: vpslld $31, %zmm2, %zmm2
; SKX-NEXT: vpmovd2m %zmm2, %k1
; SKX-NEXT: vextracti64x4 $1, %zmm3, %ymm2
;
; SKX_32-LABEL: test_gather_16i32:
; SKX_32: # %bb.0:
-; SKX_32-NEXT: vpmovsxbd %xmm1, %zmm1
+; SKX_32-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero
; SKX_32-NEXT: vpslld $31, %zmm1, %zmm1
; SKX_32-NEXT: vpmovd2m %zmm1, %k1
; SKX_32-NEXT: vpgatherdd (,%zmm0), %zmm2 {%k1}
define <16 x i64> @test_gather_16i64(<16 x i64*> %ptrs, <16 x i1> %mask, <16 x i64> %src0) {
; KNL_64-LABEL: test_gather_16i64:
; KNL_64: # %bb.0:
-; KNL_64-NEXT: vpmovsxbd %xmm2, %zmm2
+; KNL_64-NEXT: vpmovzxbd {{.*#+}} zmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero,xmm2[2],zero,zero,zero,xmm2[3],zero,zero,zero,xmm2[4],zero,zero,zero,xmm2[5],zero,zero,zero,xmm2[6],zero,zero,zero,xmm2[7],zero,zero,zero,xmm2[8],zero,zero,zero,xmm2[9],zero,zero,zero,xmm2[10],zero,zero,zero,xmm2[11],zero,zero,zero,xmm2[12],zero,zero,zero,xmm2[13],zero,zero,zero,xmm2[14],zero,zero,zero,xmm2[15],zero,zero,zero
; KNL_64-NEXT: vpslld $31, %zmm2, %zmm2
; KNL_64-NEXT: vptestmd %zmm2, %zmm2, %k1
; KNL_64-NEXT: kshiftrw $8, %k1, %k2
; KNL_32-NEXT: .cfi_def_cfa_register %ebp
; KNL_32-NEXT: andl $-64, %esp
; KNL_32-NEXT: subl $64, %esp
-; KNL_32-NEXT: vpmovsxbd %xmm1, %zmm1
+; KNL_32-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero
; KNL_32-NEXT: vpslld $31, %zmm1, %zmm1
; KNL_32-NEXT: vptestmd %zmm1, %zmm1, %k1
; KNL_32-NEXT: vmovdqa64 8(%ebp), %zmm1
;
; SKX-LABEL: test_gather_16i64:
; SKX: # %bb.0:
-; SKX-NEXT: vpmovsxbd %xmm2, %zmm2
+; SKX-NEXT: vpmovzxbd {{.*#+}} zmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero,xmm2[2],zero,zero,zero,xmm2[3],zero,zero,zero,xmm2[4],zero,zero,zero,xmm2[5],zero,zero,zero,xmm2[6],zero,zero,zero,xmm2[7],zero,zero,zero,xmm2[8],zero,zero,zero,xmm2[9],zero,zero,zero,xmm2[10],zero,zero,zero,xmm2[11],zero,zero,zero,xmm2[12],zero,zero,zero,xmm2[13],zero,zero,zero,xmm2[14],zero,zero,zero,xmm2[15],zero,zero,zero
; SKX-NEXT: vpslld $31, %zmm2, %zmm2
; SKX-NEXT: vpmovd2m %zmm2, %k1
; SKX-NEXT: kshiftrw $8, %k1, %k2
; SKX_32-NEXT: .cfi_def_cfa_register %ebp
; SKX_32-NEXT: andl $-64, %esp
; SKX_32-NEXT: subl $64, %esp
-; SKX_32-NEXT: vpmovsxbd %xmm1, %zmm1
+; SKX_32-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero
; SKX_32-NEXT: vpslld $31, %zmm1, %zmm1
; SKX_32-NEXT: vpmovd2m %zmm1, %k1
; SKX_32-NEXT: vmovdqa64 8(%ebp), %zmm1
define <16 x float> @test_gather_16f32(<16 x float*> %ptrs, <16 x i1> %mask, <16 x float> %src0) {
; KNL_64-LABEL: test_gather_16f32:
; KNL_64: # %bb.0:
-; KNL_64-NEXT: vpmovsxbd %xmm2, %zmm2
+; KNL_64-NEXT: vpmovzxbd {{.*#+}} zmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero,xmm2[2],zero,zero,zero,xmm2[3],zero,zero,zero,xmm2[4],zero,zero,zero,xmm2[5],zero,zero,zero,xmm2[6],zero,zero,zero,xmm2[7],zero,zero,zero,xmm2[8],zero,zero,zero,xmm2[9],zero,zero,zero,xmm2[10],zero,zero,zero,xmm2[11],zero,zero,zero,xmm2[12],zero,zero,zero,xmm2[13],zero,zero,zero,xmm2[14],zero,zero,zero,xmm2[15],zero,zero,zero
; KNL_64-NEXT: vpslld $31, %zmm2, %zmm2
; KNL_64-NEXT: vptestmd %zmm2, %zmm2, %k1
; KNL_64-NEXT: vextractf64x4 $1, %zmm3, %ymm2
;
; KNL_32-LABEL: test_gather_16f32:
; KNL_32: # %bb.0:
-; KNL_32-NEXT: vpmovsxbd %xmm1, %zmm1
+; KNL_32-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero
; KNL_32-NEXT: vpslld $31, %zmm1, %zmm1
; KNL_32-NEXT: vptestmd %zmm1, %zmm1, %k1
; KNL_32-NEXT: vgatherdps (,%zmm0), %zmm2 {%k1}
;
; SKX-LABEL: test_gather_16f32:
; SKX: # %bb.0:
-; SKX-NEXT: vpmovsxbd %xmm2, %zmm2
+; SKX-NEXT: vpmovzxbd {{.*#+}} zmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero,xmm2[2],zero,zero,zero,xmm2[3],zero,zero,zero,xmm2[4],zero,zero,zero,xmm2[5],zero,zero,zero,xmm2[6],zero,zero,zero,xmm2[7],zero,zero,zero,xmm2[8],zero,zero,zero,xmm2[9],zero,zero,zero,xmm2[10],zero,zero,zero,xmm2[11],zero,zero,zero,xmm2[12],zero,zero,zero,xmm2[13],zero,zero,zero,xmm2[14],zero,zero,zero,xmm2[15],zero,zero,zero
; SKX-NEXT: vpslld $31, %zmm2, %zmm2
; SKX-NEXT: vpmovd2m %zmm2, %k1
; SKX-NEXT: vextractf64x4 $1, %zmm3, %ymm2
;
; SKX_32-LABEL: test_gather_16f32:
; SKX_32: # %bb.0:
-; SKX_32-NEXT: vpmovsxbd %xmm1, %zmm1
+; SKX_32-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero
; SKX_32-NEXT: vpslld $31, %zmm1, %zmm1
; SKX_32-NEXT: vpmovd2m %zmm1, %k1
; SKX_32-NEXT: vgatherdps (,%zmm0), %zmm2 {%k1}
define <16 x double> @test_gather_16f64(<16 x double*> %ptrs, <16 x i1> %mask, <16 x double> %src0) {
; KNL_64-LABEL: test_gather_16f64:
; KNL_64: # %bb.0:
-; KNL_64-NEXT: vpmovsxbd %xmm2, %zmm2
+; KNL_64-NEXT: vpmovzxbd {{.*#+}} zmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero,xmm2[2],zero,zero,zero,xmm2[3],zero,zero,zero,xmm2[4],zero,zero,zero,xmm2[5],zero,zero,zero,xmm2[6],zero,zero,zero,xmm2[7],zero,zero,zero,xmm2[8],zero,zero,zero,xmm2[9],zero,zero,zero,xmm2[10],zero,zero,zero,xmm2[11],zero,zero,zero,xmm2[12],zero,zero,zero,xmm2[13],zero,zero,zero,xmm2[14],zero,zero,zero,xmm2[15],zero,zero,zero
; KNL_64-NEXT: vpslld $31, %zmm2, %zmm2
; KNL_64-NEXT: vptestmd %zmm2, %zmm2, %k1
; KNL_64-NEXT: kshiftrw $8, %k1, %k2
; KNL_32-NEXT: .cfi_def_cfa_register %ebp
; KNL_32-NEXT: andl $-64, %esp
; KNL_32-NEXT: subl $64, %esp
-; KNL_32-NEXT: vpmovsxbd %xmm1, %zmm1
+; KNL_32-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero
; KNL_32-NEXT: vpslld $31, %zmm1, %zmm1
; KNL_32-NEXT: vptestmd %zmm1, %zmm1, %k1
; KNL_32-NEXT: vmovapd 8(%ebp), %zmm1
;
; SKX-LABEL: test_gather_16f64:
; SKX: # %bb.0:
-; SKX-NEXT: vpmovsxbd %xmm2, %zmm2
+; SKX-NEXT: vpmovzxbd {{.*#+}} zmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero,xmm2[2],zero,zero,zero,xmm2[3],zero,zero,zero,xmm2[4],zero,zero,zero,xmm2[5],zero,zero,zero,xmm2[6],zero,zero,zero,xmm2[7],zero,zero,zero,xmm2[8],zero,zero,zero,xmm2[9],zero,zero,zero,xmm2[10],zero,zero,zero,xmm2[11],zero,zero,zero,xmm2[12],zero,zero,zero,xmm2[13],zero,zero,zero,xmm2[14],zero,zero,zero,xmm2[15],zero,zero,zero
; SKX-NEXT: vpslld $31, %zmm2, %zmm2
; SKX-NEXT: vpmovd2m %zmm2, %k1
; SKX-NEXT: kshiftrw $8, %k1, %k2
; SKX_32-NEXT: .cfi_def_cfa_register %ebp
; SKX_32-NEXT: andl $-64, %esp
; SKX_32-NEXT: subl $64, %esp
-; SKX_32-NEXT: vpmovsxbd %xmm1, %zmm1
+; SKX_32-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero
; SKX_32-NEXT: vpslld $31, %zmm1, %zmm1
; SKX_32-NEXT: vpmovd2m %zmm1, %k1
; SKX_32-NEXT: vmovapd 8(%ebp), %zmm1
define void @test_scatter_16i32(<16 x i32*> %ptrs, <16 x i1> %mask, <16 x i32> %src0) {
; KNL_64-LABEL: test_scatter_16i32:
; KNL_64: # %bb.0:
-; KNL_64-NEXT: vpmovsxbd %xmm2, %zmm2
+; KNL_64-NEXT: vpmovzxbd {{.*#+}} zmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero,xmm2[2],zero,zero,zero,xmm2[3],zero,zero,zero,xmm2[4],zero,zero,zero,xmm2[5],zero,zero,zero,xmm2[6],zero,zero,zero,xmm2[7],zero,zero,zero,xmm2[8],zero,zero,zero,xmm2[9],zero,zero,zero,xmm2[10],zero,zero,zero,xmm2[11],zero,zero,zero,xmm2[12],zero,zero,zero,xmm2[13],zero,zero,zero,xmm2[14],zero,zero,zero,xmm2[15],zero,zero,zero
; KNL_64-NEXT: vpslld $31, %zmm2, %zmm2
; KNL_64-NEXT: vptestmd %zmm2, %zmm2, %k1
; KNL_64-NEXT: kshiftrw $8, %k1, %k2
;
; KNL_32-LABEL: test_scatter_16i32:
; KNL_32: # %bb.0:
-; KNL_32-NEXT: vpmovsxbd %xmm1, %zmm1
+; KNL_32-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero
; KNL_32-NEXT: vpslld $31, %zmm1, %zmm1
; KNL_32-NEXT: vptestmd %zmm1, %zmm1, %k1
; KNL_32-NEXT: vpscatterdd %zmm2, (,%zmm0) {%k1}
;
; SKX-LABEL: test_scatter_16i32:
; SKX: # %bb.0:
-; SKX-NEXT: vpmovsxbd %xmm2, %zmm2
+; SKX-NEXT: vpmovzxbd {{.*#+}} zmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero,xmm2[2],zero,zero,zero,xmm2[3],zero,zero,zero,xmm2[4],zero,zero,zero,xmm2[5],zero,zero,zero,xmm2[6],zero,zero,zero,xmm2[7],zero,zero,zero,xmm2[8],zero,zero,zero,xmm2[9],zero,zero,zero,xmm2[10],zero,zero,zero,xmm2[11],zero,zero,zero,xmm2[12],zero,zero,zero,xmm2[13],zero,zero,zero,xmm2[14],zero,zero,zero,xmm2[15],zero,zero,zero
; SKX-NEXT: vpslld $31, %zmm2, %zmm2
; SKX-NEXT: vpmovd2m %zmm2, %k1
; SKX-NEXT: kshiftrw $8, %k1, %k2
;
; SKX_32-LABEL: test_scatter_16i32:
; SKX_32: # %bb.0:
-; SKX_32-NEXT: vpmovsxbd %xmm1, %zmm1
+; SKX_32-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero
; SKX_32-NEXT: vpslld $31, %zmm1, %zmm1
; SKX_32-NEXT: vpmovd2m %zmm1, %k1
; SKX_32-NEXT: vpscatterdd %zmm2, (,%zmm0) {%k1}
define void @test_scatter_16i64(<16 x i64*> %ptrs, <16 x i1> %mask, <16 x i64> %src0) {
; KNL_64-LABEL: test_scatter_16i64:
; KNL_64: # %bb.0:
-; KNL_64-NEXT: vpmovsxbd %xmm2, %zmm2
+; KNL_64-NEXT: vpmovzxbd {{.*#+}} zmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero,xmm2[2],zero,zero,zero,xmm2[3],zero,zero,zero,xmm2[4],zero,zero,zero,xmm2[5],zero,zero,zero,xmm2[6],zero,zero,zero,xmm2[7],zero,zero,zero,xmm2[8],zero,zero,zero,xmm2[9],zero,zero,zero,xmm2[10],zero,zero,zero,xmm2[11],zero,zero,zero,xmm2[12],zero,zero,zero,xmm2[13],zero,zero,zero,xmm2[14],zero,zero,zero,xmm2[15],zero,zero,zero
; KNL_64-NEXT: vpslld $31, %zmm2, %zmm2
; KNL_64-NEXT: vptestmd %zmm2, %zmm2, %k1
; KNL_64-NEXT: kshiftrw $8, %k1, %k2
; KNL_32-NEXT: .cfi_def_cfa_register %ebp
; KNL_32-NEXT: andl $-64, %esp
; KNL_32-NEXT: subl $64, %esp
-; KNL_32-NEXT: vpmovsxbd %xmm1, %zmm1
+; KNL_32-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero
; KNL_32-NEXT: vpslld $31, %zmm1, %zmm1
; KNL_32-NEXT: vptestmd %zmm1, %zmm1, %k1
; KNL_32-NEXT: vmovdqa64 8(%ebp), %zmm1
;
; SKX-LABEL: test_scatter_16i64:
; SKX: # %bb.0:
-; SKX-NEXT: vpmovsxbd %xmm2, %zmm2
+; SKX-NEXT: vpmovzxbd {{.*#+}} zmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero,xmm2[2],zero,zero,zero,xmm2[3],zero,zero,zero,xmm2[4],zero,zero,zero,xmm2[5],zero,zero,zero,xmm2[6],zero,zero,zero,xmm2[7],zero,zero,zero,xmm2[8],zero,zero,zero,xmm2[9],zero,zero,zero,xmm2[10],zero,zero,zero,xmm2[11],zero,zero,zero,xmm2[12],zero,zero,zero,xmm2[13],zero,zero,zero,xmm2[14],zero,zero,zero,xmm2[15],zero,zero,zero
; SKX-NEXT: vpslld $31, %zmm2, %zmm2
; SKX-NEXT: vpmovd2m %zmm2, %k1
; SKX-NEXT: kshiftrw $8, %k1, %k2
; SKX_32-NEXT: .cfi_def_cfa_register %ebp
; SKX_32-NEXT: andl $-64, %esp
; SKX_32-NEXT: subl $64, %esp
-; SKX_32-NEXT: vpmovsxbd %xmm1, %zmm1
+; SKX_32-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero
; SKX_32-NEXT: vpslld $31, %zmm1, %zmm1
; SKX_32-NEXT: vpmovd2m %zmm1, %k1
; SKX_32-NEXT: vmovdqa64 8(%ebp), %zmm1
define void @test_scatter_16f32(<16 x float*> %ptrs, <16 x i1> %mask, <16 x float> %src0) {
; KNL_64-LABEL: test_scatter_16f32:
; KNL_64: # %bb.0:
-; KNL_64-NEXT: vpmovsxbd %xmm2, %zmm2
+; KNL_64-NEXT: vpmovzxbd {{.*#+}} zmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero,xmm2[2],zero,zero,zero,xmm2[3],zero,zero,zero,xmm2[4],zero,zero,zero,xmm2[5],zero,zero,zero,xmm2[6],zero,zero,zero,xmm2[7],zero,zero,zero,xmm2[8],zero,zero,zero,xmm2[9],zero,zero,zero,xmm2[10],zero,zero,zero,xmm2[11],zero,zero,zero,xmm2[12],zero,zero,zero,xmm2[13],zero,zero,zero,xmm2[14],zero,zero,zero,xmm2[15],zero,zero,zero
; KNL_64-NEXT: vpslld $31, %zmm2, %zmm2
; KNL_64-NEXT: vptestmd %zmm2, %zmm2, %k1
; KNL_64-NEXT: kshiftrw $8, %k1, %k2
;
; KNL_32-LABEL: test_scatter_16f32:
; KNL_32: # %bb.0:
-; KNL_32-NEXT: vpmovsxbd %xmm1, %zmm1
+; KNL_32-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero
; KNL_32-NEXT: vpslld $31, %zmm1, %zmm1
; KNL_32-NEXT: vptestmd %zmm1, %zmm1, %k1
; KNL_32-NEXT: vscatterdps %zmm2, (,%zmm0) {%k1}
;
; SKX-LABEL: test_scatter_16f32:
; SKX: # %bb.0:
-; SKX-NEXT: vpmovsxbd %xmm2, %zmm2
+; SKX-NEXT: vpmovzxbd {{.*#+}} zmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero,xmm2[2],zero,zero,zero,xmm2[3],zero,zero,zero,xmm2[4],zero,zero,zero,xmm2[5],zero,zero,zero,xmm2[6],zero,zero,zero,xmm2[7],zero,zero,zero,xmm2[8],zero,zero,zero,xmm2[9],zero,zero,zero,xmm2[10],zero,zero,zero,xmm2[11],zero,zero,zero,xmm2[12],zero,zero,zero,xmm2[13],zero,zero,zero,xmm2[14],zero,zero,zero,xmm2[15],zero,zero,zero
; SKX-NEXT: vpslld $31, %zmm2, %zmm2
; SKX-NEXT: vpmovd2m %zmm2, %k1
; SKX-NEXT: kshiftrw $8, %k1, %k2
;
; SKX_32-LABEL: test_scatter_16f32:
; SKX_32: # %bb.0:
-; SKX_32-NEXT: vpmovsxbd %xmm1, %zmm1
+; SKX_32-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero
; SKX_32-NEXT: vpslld $31, %zmm1, %zmm1
; SKX_32-NEXT: vpmovd2m %zmm1, %k1
; SKX_32-NEXT: vscatterdps %zmm2, (,%zmm0) {%k1}
define void @test_scatter_16f64(<16 x double*> %ptrs, <16 x i1> %mask, <16 x double> %src0) {
; KNL_64-LABEL: test_scatter_16f64:
; KNL_64: # %bb.0:
-; KNL_64-NEXT: vpmovsxbd %xmm2, %zmm2
+; KNL_64-NEXT: vpmovzxbd {{.*#+}} zmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero,xmm2[2],zero,zero,zero,xmm2[3],zero,zero,zero,xmm2[4],zero,zero,zero,xmm2[5],zero,zero,zero,xmm2[6],zero,zero,zero,xmm2[7],zero,zero,zero,xmm2[8],zero,zero,zero,xmm2[9],zero,zero,zero,xmm2[10],zero,zero,zero,xmm2[11],zero,zero,zero,xmm2[12],zero,zero,zero,xmm2[13],zero,zero,zero,xmm2[14],zero,zero,zero,xmm2[15],zero,zero,zero
; KNL_64-NEXT: vpslld $31, %zmm2, %zmm2
; KNL_64-NEXT: vptestmd %zmm2, %zmm2, %k1
; KNL_64-NEXT: kshiftrw $8, %k1, %k2
; KNL_32-NEXT: .cfi_def_cfa_register %ebp
; KNL_32-NEXT: andl $-64, %esp
; KNL_32-NEXT: subl $64, %esp
-; KNL_32-NEXT: vpmovsxbd %xmm1, %zmm1
+; KNL_32-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero
; KNL_32-NEXT: vpslld $31, %zmm1, %zmm1
; KNL_32-NEXT: vptestmd %zmm1, %zmm1, %k1
; KNL_32-NEXT: vmovapd 8(%ebp), %zmm1
;
; SKX-LABEL: test_scatter_16f64:
; SKX: # %bb.0:
-; SKX-NEXT: vpmovsxbd %xmm2, %zmm2
+; SKX-NEXT: vpmovzxbd {{.*#+}} zmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero,xmm2[2],zero,zero,zero,xmm2[3],zero,zero,zero,xmm2[4],zero,zero,zero,xmm2[5],zero,zero,zero,xmm2[6],zero,zero,zero,xmm2[7],zero,zero,zero,xmm2[8],zero,zero,zero,xmm2[9],zero,zero,zero,xmm2[10],zero,zero,zero,xmm2[11],zero,zero,zero,xmm2[12],zero,zero,zero,xmm2[13],zero,zero,zero,xmm2[14],zero,zero,zero,xmm2[15],zero,zero,zero
; SKX-NEXT: vpslld $31, %zmm2, %zmm2
; SKX-NEXT: vpmovd2m %zmm2, %k1
; SKX-NEXT: kshiftrw $8, %k1, %k2
; SKX_32-NEXT: .cfi_def_cfa_register %ebp
; SKX_32-NEXT: andl $-64, %esp
; SKX_32-NEXT: subl $64, %esp
-; SKX_32-NEXT: vpmovsxbd %xmm1, %zmm1
+; SKX_32-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero
; SKX_32-NEXT: vpslld $31, %zmm1, %zmm1
; SKX_32-NEXT: vpmovd2m %zmm1, %k1
; SKX_32-NEXT: vmovapd 8(%ebp), %zmm1
; AVX512F-LABEL: test11b:
; AVX512F: ## %bb.0:
; AVX512F-NEXT: ## kill: def $ymm1 killed $ymm1 def $zmm1
-; AVX512F-NEXT: vpmovsxwq %xmm0, %zmm0
+; AVX512F-NEXT: vpmovzxwq {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
; AVX512F-NEXT: vpsllq $63, %zmm0, %zmm0
; AVX512F-NEXT: vptestmq %zmm0, %zmm0, %k1
; AVX512F-NEXT: vpblendmd (%rdi), %zmm1, %zmm0 {%k1}
;
; AVX512F-LABEL: test11c:
; AVX512F: ## %bb.0:
-; AVX512F-NEXT: vpmovsxwq %xmm0, %zmm0
+; AVX512F-NEXT: vpmovzxwq {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
; AVX512F-NEXT: vpsllq $63, %zmm0, %zmm0
; AVX512F-NEXT: vptestmq %zmm0, %zmm0, %k1
; AVX512F-NEXT: vmovups (%rdi), %zmm0 {%k1} {z}
;
; AVX512F-LABEL: test11d:
; AVX512F: ## %bb.0:
-; AVX512F-NEXT: vpmovsxwq %xmm0, %zmm0
+; AVX512F-NEXT: vpmovzxwq {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
; AVX512F-NEXT: vpsllq $63, %zmm0, %zmm0
; AVX512F-NEXT: vptestmq %zmm0, %zmm0, %k1
; AVX512F-NEXT: vmovdqu32 (%rdi), %zmm0 {%k1} {z}
; AVX2-NEXT: vpmovsxwq %xmm2, %ymm2
; AVX2-NEXT: vpmovsxwq %xmm0, %ymm0
; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm3
-; AVX2-NEXT: vpmovsxdq %xmm3, %ymm3
+; AVX2-NEXT: vpmovzxdq {{.*#+}} ymm3 = xmm3[0],zero,xmm3[1],zero,xmm3[2],zero,xmm3[3],zero
; AVX2-NEXT: vpmuldq %ymm3, %ymm2, %ymm2
-; AVX2-NEXT: vpmovsxdq %xmm1, %ymm1
+; AVX2-NEXT: vpmovzxdq {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero
; AVX2-NEXT: vpmuldq %ymm1, %ymm0, %ymm0
; AVX2-NEXT: vmovdqa %ymm2, %ymm1
; AVX2-NEXT: retq
; AVX256VL-NEXT: vpmovdw %ymm2, %xmm2
; AVX256VL-NEXT: vpblendw {{.*#+}} xmm3 = xmm2[0,1],xmm1[2],xmm2[3],xmm1[4],xmm2[5,6,7]
; AVX256VL-NEXT: vpshufb {{.*#+}} xmm3 = xmm3[6,7,12,13,4,5,8,9,6,7,14,15,14,15,0,1]
-; AVX256VL-NEXT: vpmovsxwd %xmm3, %ymm3
+; AVX256VL-NEXT: vpmovzxwd {{.*#+}} ymm3 = xmm3[0],zero,xmm3[1],zero,xmm3[2],zero,xmm3[3],zero,xmm3[4],zero,xmm3[5],zero,xmm3[6],zero,xmm3[7],zero
; AVX256VL-NEXT: vpslld $31, %ymm3, %ymm3
; AVX256VL-NEXT: vptestmd %ymm3, %ymm3, %k1
; AVX256VL-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,2,1,3]
; AVX256VL-NEXT: vpshufb {{.*#+}} xmm2 = xmm2[6,7,12,13,2,3,14,15,6,7,6,7,14,15,0,1]
; AVX256VL-NEXT: vpblendw {{.*#+}} xmm1 = xmm2[0,1,2],xmm1[3],xmm2[4],xmm1[5],xmm2[6,7]
-; AVX256VL-NEXT: vpmovsxwd %xmm1, %ymm1
+; AVX256VL-NEXT: vpmovzxwd {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
; AVX256VL-NEXT: vpslld $31, %ymm1, %ymm1
; AVX256VL-NEXT: vptestmd %ymm1, %ymm1, %k0
; AVX256VL-NEXT: kunpckbw %k1, %k0, %k0
; AVX256VL-NEXT: vpermq {{.*#+}} ymm2 = ymm2[1,1,2,1]
; AVX256VL-NEXT: vmovdqa {{.*#+}} ymm3 = [255,255,255,255,0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,0,255,255,255,255]
; AVX256VL-NEXT: vpblendvb %ymm3, %ymm1, %ymm2, %ymm1
-; AVX256VL-NEXT: vpmovsxwd %xmm1, %ymm2
+; AVX256VL-NEXT: vpmovzxwd {{.*#+}} ymm2 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
; AVX256VL-NEXT: vpslld $31, %ymm2, %ymm2
; AVX256VL-NEXT: vptestmd %ymm2, %ymm2, %k1
; AVX256VL-NEXT: vextracti128 $1, %ymm1, %xmm1
-; AVX256VL-NEXT: vpmovsxwd %xmm1, %ymm1
+; AVX256VL-NEXT: vpmovzxwd {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
; AVX256VL-NEXT: vpslld $31, %ymm1, %ymm1
; AVX256VL-NEXT: vptestmd %ymm1, %ymm1, %k0
; AVX256VL-NEXT: kunpckbw %k1, %k0, %k0
; KNL-32-NEXT: pushl %esi
; KNL-32-NEXT: .cfi_def_cfa_offset 8
; KNL-32-NEXT: .cfi_offset %esi, -8
-; KNL-32-NEXT: vpmovsxbd %xmm0, %zmm0
+; KNL-32-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
; KNL-32-NEXT: vpslld $31, %zmm0, %zmm0
; KNL-32-NEXT: vptestmd %zmm0, %zmm0, %k0
; KNL-32-NEXT: movl {{[0-9]+}}(%esp), %eax
;
; AVX512F-LABEL: trunc_v16i16_to_v16i8:
; AVX512F: # %bb.0:
-; AVX512F-NEXT: vpmovsxwd (%rdi), %zmm0
+; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero
; AVX512F-NEXT: vpmovdb %zmm0, (%rsi)
; AVX512F-NEXT: vzeroupper
; AVX512F-NEXT: retq
;
; AVX512VL-LABEL: trunc_v16i16_to_v16i8:
; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vpmovsxwd (%rdi), %zmm0
+; AVX512VL-NEXT: vpmovzxwd {{.*#+}} zmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero
; AVX512VL-NEXT: vpmovdb %zmm0, (%rsi)
; AVX512VL-NEXT: vzeroupper
; AVX512VL-NEXT: retq
define <16 x float> @test_vshuff32x4_512_mask(<16 x float> %x, <16 x float> %x1, <16 x float> %y, <16 x i1> %mask) nounwind {
; AVX512F-LABEL: test_vshuff32x4_512_mask:
; AVX512F: # %bb.0:
-; AVX512F-NEXT: vpmovsxbd %xmm3, %zmm3
+; AVX512F-NEXT: vpmovzxbd {{.*#+}} zmm3 = xmm3[0],zero,zero,zero,xmm3[1],zero,zero,zero,xmm3[2],zero,zero,zero,xmm3[3],zero,zero,zero,xmm3[4],zero,zero,zero,xmm3[5],zero,zero,zero,xmm3[6],zero,zero,zero,xmm3[7],zero,zero,zero,xmm3[8],zero,zero,zero,xmm3[9],zero,zero,zero,xmm3[10],zero,zero,zero,xmm3[11],zero,zero,zero,xmm3[12],zero,zero,zero,xmm3[13],zero,zero,zero,xmm3[14],zero,zero,zero,xmm3[15],zero,zero,zero
; AVX512F-NEXT: vpslld $31, %zmm3, %zmm3
; AVX512F-NEXT: vpmovd2m %zmm3, %k1
; AVX512F-NEXT: vshuff32x4 {{.*#+}} zmm2 {%k1} = zmm0[0,1,2,3,4,5,6,7],zmm1[4,5,6,7,0,1,2,3]
define <16 x i32> @test_vshufi32x4_512_mask(<16 x i32> %x, <16 x i32> %x1, <16 x i32> %y, <16 x i1> %mask) nounwind {
; AVX512F-LABEL: test_vshufi32x4_512_mask:
; AVX512F: # %bb.0:
-; AVX512F-NEXT: vpmovsxbd %xmm3, %zmm3
+; AVX512F-NEXT: vpmovzxbd {{.*#+}} zmm3 = xmm3[0],zero,zero,zero,xmm3[1],zero,zero,zero,xmm3[2],zero,zero,zero,xmm3[3],zero,zero,zero,xmm3[4],zero,zero,zero,xmm3[5],zero,zero,zero,xmm3[6],zero,zero,zero,xmm3[7],zero,zero,zero,xmm3[8],zero,zero,zero,xmm3[9],zero,zero,zero,xmm3[10],zero,zero,zero,xmm3[11],zero,zero,zero,xmm3[12],zero,zero,zero,xmm3[13],zero,zero,zero,xmm3[14],zero,zero,zero,xmm3[15],zero,zero,zero
; AVX512F-NEXT: vpslld $31, %zmm3, %zmm3
; AVX512F-NEXT: vpmovd2m %zmm3, %k1
; AVX512F-NEXT: vshufi32x4 {{.*#+}} zmm2 {%k1} = zmm0[0,1,2,3,4,5,6,7],zmm1[4,5,6,7,0,1,2,3]
define <8 x double> @test_vshuff64x2_512_maskz(<8 x double> %x, <8 x double> %x1, <8 x i1> %mask) nounwind {
; ALL-LABEL: test_vshuff64x2_512_maskz:
; ALL: # %bb.0:
-; ALL-NEXT: vpmovsxwq %xmm2, %zmm2
+; ALL-NEXT: vpmovzxwq {{.*#+}} zmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero,xmm2[2],zero,zero,zero,xmm2[3],zero,zero,zero,xmm2[4],zero,zero,zero,xmm2[5],zero,zero,zero,xmm2[6],zero,zero,zero,xmm2[7],zero,zero,zero
; ALL-NEXT: vpsllq $63, %zmm2, %zmm2
; ALL-NEXT: vptestmq %zmm2, %zmm2, %k1
; ALL-NEXT: vshuff64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,4,5],zmm1[2,3,0,1]
define <8 x i64> @test_vshufi64x2_512_mask(<8 x i64> %x, <8 x i64> %x1, <8 x i1> %mask) nounwind {
; ALL-LABEL: test_vshufi64x2_512_mask:
; ALL: # %bb.0:
-; ALL-NEXT: vpmovsxwq %xmm2, %zmm2
+; ALL-NEXT: vpmovzxwq {{.*#+}} zmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero,xmm2[2],zero,zero,zero,xmm2[3],zero,zero,zero,xmm2[4],zero,zero,zero,xmm2[5],zero,zero,zero,xmm2[6],zero,zero,zero,xmm2[7],zero,zero,zero
; ALL-NEXT: vpsllq $63, %zmm2, %zmm2
; ALL-NEXT: vptestmq %zmm2, %zmm2, %k1
; ALL-NEXT: vshufi64x2 {{.*#+}} zmm0 {%k1} = zmm0[0,1,4,5],zmm1[2,3,0,1]
define <8 x double> @test_vshuff64x2_512_mem_mask(<8 x double> %x, <8 x double> *%ptr, <8 x i1> %mask) nounwind {
; AVX512F-LABEL: test_vshuff64x2_512_mem_mask:
; AVX512F: # %bb.0:
-; AVX512F-NEXT: vpmovsxwq %xmm1, %zmm1
+; AVX512F-NEXT: vpmovzxwq {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero
; AVX512F-NEXT: vpsllq $63, %zmm1, %zmm1
; AVX512F-NEXT: vptestmq %zmm1, %zmm1, %k1
; AVX512F-NEXT: vshuff64x2 {{.*#+}} zmm0 {%k1} = zmm0[0,1,4,5],mem[2,3,0,1]
;
; AVX512F-32-LABEL: test_vshuff64x2_512_mem_mask:
; AVX512F-32: # %bb.0:
-; AVX512F-32-NEXT: vpmovsxwq %xmm1, %zmm1
+; AVX512F-32-NEXT: vpmovzxwq {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero
; AVX512F-32-NEXT: vpsllq $63, %zmm1, %zmm1
; AVX512F-32-NEXT: vptestmq %zmm1, %zmm1, %k1
; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax
define <8 x double> @test_vshuff64x2_512_mem_maskz(<8 x double> %x, <8 x double> *%ptr, <8 x i1> %mask) nounwind {
; AVX512F-LABEL: test_vshuff64x2_512_mem_maskz:
; AVX512F: # %bb.0:
-; AVX512F-NEXT: vpmovsxwq %xmm1, %zmm1
+; AVX512F-NEXT: vpmovzxwq {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero
; AVX512F-NEXT: vpsllq $63, %zmm1, %zmm1
; AVX512F-NEXT: vptestmq %zmm1, %zmm1, %k1
; AVX512F-NEXT: vshuff64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,4,5],mem[2,3,0,1]
;
; AVX512F-32-LABEL: test_vshuff64x2_512_mem_maskz:
; AVX512F-32: # %bb.0:
-; AVX512F-32-NEXT: vpmovsxwq %xmm1, %zmm1
+; AVX512F-32-NEXT: vpmovzxwq {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero
; AVX512F-32-NEXT: vpsllq $63, %zmm1, %zmm1
; AVX512F-32-NEXT: vptestmq %zmm1, %zmm1, %k1
; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax
define <32 x i1> @shuf32i1_3_6_22_12_3_7_7_0_3_6_1_13_3_21_7_0_3_6_22_12_3_7_7_0_3_6_1_13_3_21_7_0(<32 x i1> %a) {
; AVX512F-LABEL: shuf32i1_3_6_22_12_3_7_7_0_3_6_1_13_3_21_7_0_3_6_22_12_3_7_7_0_3_6_1_13_3_21_7_0:
; AVX512F: # %bb.0:
-; AVX512F-NEXT: vpmovsxbd %xmm0, %zmm1
+; AVX512F-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
; AVX512F-NEXT: vpslld $31, %zmm1, %zmm1
; AVX512F-NEXT: vptestmd %zmm1, %zmm1, %k1
; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm0
-; AVX512F-NEXT: vpmovsxbd %xmm0, %zmm0
+; AVX512F-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
; AVX512F-NEXT: vpslld $31, %zmm0, %zmm0
; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k2
; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k2} {z}
;
; AVX512VL-LABEL: shuf32i1_3_6_22_12_3_7_7_0_3_6_1_13_3_21_7_0_3_6_22_12_3_7_7_0_3_6_1_13_3_21_7_0:
; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vpmovsxbd %xmm0, %zmm1
+; AVX512VL-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
; AVX512VL-NEXT: vpslld $31, %zmm1, %zmm1
; AVX512VL-NEXT: vptestmd %zmm1, %zmm1, %k1
; AVX512VL-NEXT: vextracti128 $1, %ymm0, %xmm0
-; AVX512VL-NEXT: vpmovsxbd %xmm0, %zmm0
+; AVX512VL-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
; AVX512VL-NEXT: vpslld $31, %zmm0, %zmm0
; AVX512VL-NEXT: vptestmd %zmm0, %zmm0, %k2
; AVX512VL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k2} {z}
define i8 @shuf8i1_9_6_1_10_3_7_7_0_all_ones(<8 x i1> %a) {
; AVX512F-LABEL: shuf8i1_9_6_1_10_3_7_7_0_all_ones:
; AVX512F: # %bb.0:
-; AVX512F-NEXT: vpmovsxwq %xmm0, %zmm0
+; AVX512F-NEXT: vpmovzxwq {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
; AVX512F-NEXT: vpsllq $63, %zmm0, %zmm0
; AVX512F-NEXT: vptestmq %zmm0, %zmm0, %k1
; AVX512F-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
;
; AVX512VL-LABEL: shuf8i1_9_6_1_10_3_7_7_0_all_ones:
; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vpmovsxwd %xmm0, %ymm0
+; AVX512VL-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
; AVX512VL-NEXT: vpslld $31, %ymm0, %ymm0
; AVX512VL-NEXT: vptestmd %ymm0, %ymm0, %k1
; AVX512VL-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
;
; AVX512F-LABEL: trunc_add_const_v16i16_v16i8:
; AVX512F: # %bb.0:
-; AVX512F-NEXT: vpmovsxwd %ymm0, %zmm0
+; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
; AVX512F-NEXT: vpmovdb %zmm0, %xmm0
; AVX512F-NEXT: vpaddb {{.*}}(%rip), %xmm0, %xmm0
; AVX512F-NEXT: vzeroupper
;
; AVX512DQ-LABEL: trunc_add_const_v16i16_v16i8:
; AVX512DQ: # %bb.0:
-; AVX512DQ-NEXT: vpmovsxwd %ymm0, %zmm0
+; AVX512DQ-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0
; AVX512DQ-NEXT: vpaddb {{.*}}(%rip), %xmm0, %xmm0
; AVX512DQ-NEXT: vzeroupper
;
; AVX512F-LABEL: trunc_and_const_v16i16_v16i8:
; AVX512F: # %bb.0:
-; AVX512F-NEXT: vpmovsxwd %ymm0, %zmm0
+; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
; AVX512F-NEXT: vpmovdb %zmm0, %xmm0
; AVX512F-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
; AVX512F-NEXT: vzeroupper
;
; AVX512DQ-LABEL: trunc_and_const_v16i16_v16i8:
; AVX512DQ: # %bb.0:
-; AVX512DQ-NEXT: vpmovsxwd %ymm0, %zmm0
+; AVX512DQ-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0
; AVX512DQ-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
; AVX512DQ-NEXT: vzeroupper
;
; AVX512F-LABEL: trunc_xor_const_v16i16_v16i8:
; AVX512F: # %bb.0:
-; AVX512F-NEXT: vpmovsxwd %ymm0, %zmm0
+; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
; AVX512F-NEXT: vpmovdb %zmm0, %xmm0
; AVX512F-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
; AVX512F-NEXT: vzeroupper
;
; AVX512DQ-LABEL: trunc_xor_const_v16i16_v16i8:
; AVX512DQ: # %bb.0:
-; AVX512DQ-NEXT: vpmovsxwd %ymm0, %zmm0
+; AVX512DQ-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0
; AVX512DQ-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
; AVX512DQ-NEXT: vzeroupper
;
; AVX512F-LABEL: trunc_or_const_v16i16_v16i8:
; AVX512F: # %bb.0:
-; AVX512F-NEXT: vpmovsxwd %ymm0, %zmm0
+; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
; AVX512F-NEXT: vpmovdb %zmm0, %xmm0
; AVX512F-NEXT: vpor {{.*}}(%rip), %xmm0, %xmm0
; AVX512F-NEXT: vzeroupper
;
; AVX512DQ-LABEL: trunc_or_const_v16i16_v16i8:
; AVX512DQ: # %bb.0:
-; AVX512DQ-NEXT: vpmovsxwd %ymm0, %zmm0
+; AVX512DQ-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0
; AVX512DQ-NEXT: vpor {{.*}}(%rip), %xmm0, %xmm0
; AVX512DQ-NEXT: vzeroupper
;
; AVX512F-LABEL: trunc16i16_16i8:
; AVX512F: # %bb.0: # %entry
-; AVX512F-NEXT: vpmovsxwd %ymm0, %zmm0
+; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
; AVX512F-NEXT: vpmovdb %zmm0, (%rax)
; AVX512F-NEXT: vzeroupper
; AVX512F-NEXT: retq
;
; AVX512VL-LABEL: trunc16i16_16i8:
; AVX512VL: # %bb.0: # %entry
-; AVX512VL-NEXT: vpmovsxwd %ymm0, %zmm0
+; AVX512VL-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
; AVX512VL-NEXT: vpmovdb %zmm0, (%rax)
; AVX512VL-NEXT: vzeroupper
; AVX512VL-NEXT: retq
; AVX512F-LABEL: trunc16i16_16i8_ashr:
; AVX512F: # %bb.0: # %entry
; AVX512F-NEXT: vpsraw $8, %ymm0, %ymm0
-; AVX512F-NEXT: vpmovsxwd %ymm0, %zmm0
+; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
; AVX512F-NEXT: vpmovdb %zmm0, (%rax)
; AVX512F-NEXT: vzeroupper
; AVX512F-NEXT: retq
; AVX512VL-LABEL: trunc16i16_16i8_ashr:
; AVX512VL: # %bb.0: # %entry
; AVX512VL-NEXT: vpsraw $8, %ymm0, %ymm0
-; AVX512VL-NEXT: vpmovsxwd %ymm0, %zmm0
+; AVX512VL-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
; AVX512VL-NEXT: vpmovdb %zmm0, (%rax)
; AVX512VL-NEXT: vzeroupper
; AVX512VL-NEXT: retq
; AVX512F-LABEL: trunc16i16_16i8_lshr:
; AVX512F: # %bb.0: # %entry
; AVX512F-NEXT: vpsrlw $8, %ymm0, %ymm0
-; AVX512F-NEXT: vpmovsxwd %ymm0, %zmm0
+; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
; AVX512F-NEXT: vpmovdb %zmm0, (%rax)
; AVX512F-NEXT: vzeroupper
; AVX512F-NEXT: retq
; AVX512VL-LABEL: trunc16i16_16i8_lshr:
; AVX512VL: # %bb.0: # %entry
; AVX512VL-NEXT: vpsrlw $8, %ymm0, %ymm0
-; AVX512VL-NEXT: vpmovsxwd %ymm0, %zmm0
+; AVX512VL-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
; AVX512VL-NEXT: vpmovdb %zmm0, (%rax)
; AVX512VL-NEXT: vzeroupper
; AVX512VL-NEXT: retq
; AVX512F-NEXT: vmovdqu 32(%rdi), %ymm1
; AVX512F-NEXT: vpsrlw $8, %ymm0, %ymm0
; AVX512F-NEXT: vpsrlw $8, %ymm1, %ymm1
-; AVX512F-NEXT: vpmovsxwd %ymm0, %zmm0
-; AVX512F-NEXT: vpmovsxwd %ymm1, %zmm1
+; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
+; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero
; AVX512F-NEXT: vpmovdb %zmm0, (%rsi)
; AVX512F-NEXT: vpmovdb %zmm1, 16(%rsi)
; AVX512F-NEXT: vzeroupper
; AVX512VL-NEXT: vmovdqu 32(%rdi), %ymm1
; AVX512VL-NEXT: vpsrlw $8, %ymm0, %ymm0
; AVX512VL-NEXT: vpsrlw $8, %ymm1, %ymm1
-; AVX512VL-NEXT: vpmovsxwd %ymm0, %zmm0
-; AVX512VL-NEXT: vpmovsxwd %ymm1, %zmm1
+; AVX512VL-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
+; AVX512VL-NEXT: vpmovzxwd {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero
; AVX512VL-NEXT: vpmovdb %zmm0, (%rsi)
; AVX512VL-NEXT: vpmovdb %zmm1, 16(%rsi)
; AVX512VL-NEXT: vzeroupper
; AVX512NOBW-LABEL: vselect_packss_v16i16:
; AVX512NOBW: # %bb.0:
; AVX512NOBW-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0
-; AVX512NOBW-NEXT: vpmovsxwd %ymm0, %zmm0
+; AVX512NOBW-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
; AVX512NOBW-NEXT: vpmovdb %zmm0, %xmm0
; AVX512NOBW-NEXT: vpblendvb %xmm0, %xmm2, %xmm3, %xmm0
; AVX512NOBW-NEXT: vzeroupper