DAG.getConstant(NotMask, DL, VT));
}
+ // Fold movmsk(icmp_eq(and(x,c1),c1)) -> movmsk(shl(x,c2))
+ // iff pow2splat(c1).
+ // Use KnownBits to determine if only a single bit is non-zero
+ // in each element (pow2 or zero), and shift that bit to the msb.
+ // TODO: Merge with the movmsk(icmp_eq(and(x,c1),0)) fold below?
+ if (Src.getOpcode() == X86ISD::PCMPEQ &&
+ Src.getOperand(0).getOpcode() == ISD::AND &&
+ Src.getOperand(1) == Src.getOperand(0).getOperand(1)) {
+ KnownBits KnownSrc = DAG.computeKnownBits(Src.getOperand(1));
+ if (KnownSrc.countMaxPopulation() == 1) {
+ SDLoc DL(N);
+ MVT ShiftVT = SrcVT;
+ SDValue ShiftSrc = Src.getOperand(0);
+ if (ShiftVT.getScalarType() == MVT::i8) {
+ // vXi8 shifts - we only care about the signbit so can use PSLLW.
+ ShiftVT = MVT::getVectorVT(MVT::i16, NumElts / 2);
+ ShiftSrc = DAG.getBitcast(ShiftVT, ShiftSrc);
+ }
+ unsigned ShiftAmt = KnownSrc.countMinLeadingZeros();
+ ShiftSrc = getTargetVShiftByConstNode(X86ISD::VSHLI, DL, ShiftVT,
+ ShiftSrc, ShiftAmt, DAG);
+ ShiftSrc = DAG.getBitcast(SrcVT, ShiftSrc);
+ return DAG.getNode(X86ISD::MOVMSK, DL, VT, ShiftSrc);
+ }
+ }
+
// Fold movmsk(icmp_eq(and(x,c1),0)) -> movmsk(not(shl(x,c2)))
// iff pow2splat(c1).
// Use KnownBits to determine if only a single bit is non-zero
define i1 @trunc_v4i32_cmp(<4 x i32> %a0) nounwind {
; SSE2-SSSE3-LABEL: trunc_v4i32_cmp:
; SSE2-SSSE3: # %bb.0:
-; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [1,1,1,1]
-; SSE2-SSSE3-NEXT: pand %xmm1, %xmm0
-; SSE2-SSSE3-NEXT: pcmpeqd %xmm1, %xmm0
+; SSE2-SSSE3-NEXT: pslld $31, %xmm0
; SSE2-SSSE3-NEXT: movmskps %xmm0, %eax
; SSE2-SSSE3-NEXT: xorl $15, %eax
; SSE2-SSSE3-NEXT: sete %al
define i1 @trunc_v16i8_cmp(<16 x i8> %a0) nounwind {
; SSE2-SSSE3-LABEL: trunc_v16i8_cmp:
; SSE2-SSSE3: # %bb.0:
-; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
-; SSE2-SSSE3-NEXT: pand %xmm1, %xmm0
-; SSE2-SSSE3-NEXT: pcmpeqb %xmm1, %xmm0
+; SSE2-SSSE3-NEXT: psllw $7, %xmm0
; SSE2-SSSE3-NEXT: pmovmskb %xmm0, %eax
; SSE2-SSSE3-NEXT: xorl $65535, %eax # imm = 0xFFFF
; SSE2-SSSE3-NEXT: setne %al
; SSE2-SSSE3-LABEL: trunc_v8i132_cmp:
; SSE2-SSSE3: # %bb.0:
; SSE2-SSSE3-NEXT: pand %xmm1, %xmm0
-; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [1,1,1,1]
-; SSE2-SSSE3-NEXT: pand %xmm1, %xmm0
-; SSE2-SSSE3-NEXT: pcmpeqd %xmm1, %xmm0
+; SSE2-SSSE3-NEXT: pslld $31, %xmm0
; SSE2-SSSE3-NEXT: movmskps %xmm0, %eax
; SSE2-SSSE3-NEXT: xorl $15, %eax
; SSE2-SSSE3-NEXT: setne %al
; SSE2-SSSE3-LABEL: trunc_v32i8_cmp:
; SSE2-SSSE3: # %bb.0:
; SSE2-SSSE3-NEXT: pand %xmm1, %xmm0
-; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
-; SSE2-SSSE3-NEXT: pand %xmm1, %xmm0
-; SSE2-SSSE3-NEXT: pcmpeqb %xmm1, %xmm0
+; SSE2-SSSE3-NEXT: psllw $7, %xmm0
; SSE2-SSSE3-NEXT: pmovmskb %xmm0, %eax
; SSE2-SSSE3-NEXT: xorl $65535, %eax # imm = 0xFFFF
; SSE2-SSSE3-NEXT: sete %al