(V1.getOpcode() == ISD::SCALAR_TO_VECTOR &&
isUndefOrZeroInRange(Mask, 1, NumMaskElts - 1))) {
Shuffle = X86ISD::VZEXT_MOVL;
- SrcVT = DstVT = MaskEltSize == 16 ? MVT::v8f16
- : !Subtarget.hasSSE2() ? MVT::v4f32
- : MaskVT;
+ if (MaskEltSize == 16)
+ SrcVT = DstVT = MaskVT.changeVectorElementType(MVT::f16);
+ else
+ SrcVT = DstVT = !Subtarget.hasSSE2() ? MVT::v4f32 : MaskVT;
return true;
}
}
isUndefOrEqual(Mask[0], 0) &&
isUndefOrZeroInRange(Mask, 1, NumMaskElts - 1)) {
Shuffle = X86ISD::VZEXT_MOVL;
- SrcVT = DstVT = MaskEltSize == 16 ? MVT::v8f16
- : !Subtarget.hasSSE2() ? MVT::v4f32
- : MaskVT;
+ if (MaskEltSize == 16)
+ SrcVT = DstVT = MaskVT.changeVectorElementType(MVT::f16);
+ else
+ SrcVT = DstVT = !Subtarget.hasSSE2() ? MVT::v4f32 : MaskVT;
return true;
}
for.end: ; preds = %for.body.preheader, %entry
ret void
}
+
+define <16 x i32> @pr52561(<16 x i32> %a, <16 x i32> %b) "min-legal-vector-width"="256" "prefer-vector-width"="256" nounwind {
+; X64-LABEL: pr52561:
+; X64: # %bb.0:
+; X64-NEXT: vpbroadcastd {{.*#+}} ymm4 = [112,112,112,112,112,112,112,112]
+; X64-NEXT: vpaddd %ymm4, %ymm2, %ymm2
+; X64-NEXT: vpaddd %ymm2, %ymm0, %ymm0
+; X64-NEXT: vpaddd %ymm4, %ymm3, %ymm2
+; X64-NEXT: vpaddd %ymm2, %ymm1, %ymm1
+; X64-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1
+; X64-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; X64-NEXT: vmovsh %xmm0, %xmm2, %xmm0
+; X64-NEXT: retq
+;
+; X86-LABEL: pr52561:
+; X86: # %bb.0:
+; X86-NEXT: pushl %ebp
+; X86-NEXT: movl %esp, %ebp
+; X86-NEXT: andl $-32, %esp
+; X86-NEXT: subl $32, %esp
+; X86-NEXT: vpaddd 8(%ebp), %ymm1, %ymm1
+; X86-NEXT: vpbroadcastd {{.*#+}} ymm3 = [112,112,112,112,112,112,112,112]
+; X86-NEXT: vpaddd %ymm3, %ymm2, %ymm2
+; X86-NEXT: vpaddd %ymm2, %ymm0, %ymm0
+; X86-NEXT: vpaddd %ymm3, %ymm1, %ymm1
+; X86-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %ymm1, %ymm1
+; X86-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; X86-NEXT: vmovsh %xmm0, %xmm2, %xmm0
+; X86-NEXT: movl %ebp, %esp
+; X86-NEXT: popl %ebp
+; X86-NEXT: retl
+ %1 = add <16 x i32> %a, <i32 112, i32 112, i32 112, i32 112, i32 112, i32 112, i32 112, i32 112, i32 112, i32 112, i32 112, i32 112, i32 112, i32 112, i32 112, i32 112>
+ %2 = add <16 x i32> %1, %b
+ %3 = and <16 x i32> %2, <i32 65535, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 65535>
+ ret <16 x i32> %3
+}