if (!Subtarget.hasSSE2() || (Subtarget.hasAVX512() && !PreferMovMsk))
return SDValue();
+ // If the upper half of the ops are undef, then try to bitcast the lower half
+ // and extend.
+ SmallVector<SDValue, 4> SubSrcOps;
+ if (collectConcatOps(Src.getNode(), SubSrcOps, DAG) &&
+ SubSrcOps.size() == 2) {
+ SDValue LowerOp = SubSrcOps[0];
+ SDValue UpperOp = SubSrcOps[1];
+ if (LowerOp.getOpcode() == ISD::SETCC && UpperOp.isUndef()) {
+ EVT HalfVT = VT.getHalfSizedIntegerVT(*DAG.getContext());
+ EVT HalfSrcVT = SrcVT.getHalfNumVectorElementsVT(*DAG.getContext());
+ if (SDValue V = combineBitcastvxi1(DAG, HalfVT, LowerOp, DL, Subtarget))
+ return DAG.getNode(ISD::ANY_EXTEND, DL, VT, V);
+ }
+ }
+
// There are MOVMSK flavors for types v16i8, v32i8, v4f32, v8f32, v4f64 and
// v8f64. So all legal 128-bit and 256-bit vectors are covered except for
// v8i16 and v16i16.
; SSE2-SSSE3: # %bb.0:
; SSE2-SSSE3-NEXT: pxor %xmm1, %xmm1
; SSE2-SSSE3-NEXT: pcmpeqd %xmm0, %xmm1
-; SSE2-SSSE3-NEXT: pcmpeqd %xmm0, %xmm0
-; SSE2-SSSE3-NEXT: pxor %xmm1, %xmm0
-; SSE2-SSSE3-NEXT: packssdw %xmm0, %xmm0
-; SSE2-SSSE3-NEXT: psllw $15, %xmm0
-; SSE2-SSSE3-NEXT: packsswb %xmm0, %xmm0
-; SSE2-SSSE3-NEXT: pmovmskb %xmm0, %eax
+; SSE2-SSSE3-NEXT: movmskps %xmm1, %eax
+; SSE2-SSSE3-NEXT: xorl $15, %eax
; SSE2-SSSE3-NEXT: # kill: def $al killed $al killed $eax
; SSE2-SSSE3-NEXT: retq
;
; AVX12: # %bb.0:
; AVX12-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX12-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
-; AVX12-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
-; AVX12-NEXT: vpxor %xmm1, %xmm0, %xmm0
-; AVX12-NEXT: vpackssdw %xmm0, %xmm0, %xmm0
-; AVX12-NEXT: vpsllw $15, %xmm0, %xmm0
-; AVX12-NEXT: vpacksswb %xmm0, %xmm0, %xmm0
-; AVX12-NEXT: vpmovmskb %xmm0, %eax
+; AVX12-NEXT: vmovmskps %xmm0, %eax
+; AVX12-NEXT: xorl $15, %eax
; AVX12-NEXT: # kill: def $al killed $al killed $eax
; AVX12-NEXT: retq
;