// TODO convert SrcUndef to KnownUndef.
break;
}
- case X86ISD::KSHIFTL:
- case X86ISD::KSHIFTR: {
+ case X86ISD::KSHIFTL: {
SDValue Src = Op.getOperand(0);
auto *Amt = cast<ConstantSDNode>(Op.getOperand(1));
assert(Amt->getAPIntValue().ult(NumElts) && "Out of range shift amount");
unsigned ShiftAmt = Amt->getZExtValue();
- bool ShiftLeft = (X86ISD::KSHIFTL == Opc);
- APInt DemandedSrc =
- ShiftLeft ? DemandedElts.lshr(ShiftAmt) : DemandedElts.shl(ShiftAmt);
+ if (ShiftAmt == 0)
+ return TLO.CombineTo(Op, Src);
+
+ // If this is ((X >>u C1) << ShAmt), see if we can simplify this into a
+ // single shift. We can do this if the bottom bits (which are shifted
+ // out) are never demanded.
+ if (Src.getOpcode() == X86ISD::KSHIFTR) {
+ if (!DemandedElts.intersects(APInt::getLowBitsSet(NumElts, ShiftAmt))) {
+ unsigned C1 = Src.getConstantOperandVal(1);
+ unsigned Opc = X86ISD::KSHIFTL;
+ int Diff = ShiftAmt - C1;
+ if (Diff < 0) {
+ Diff = -Diff;
+ Opc = X86ISD::KSHIFTR;
+ }
+
+ SDLoc dl(Op);
+ SDValue NewSA = TLO.DAG.getConstant(Diff, dl, MVT::i8);
+ return TLO.CombineTo(
+ Op, TLO.DAG.getNode(Opc, dl, VT, Src.getOperand(0), NewSA));
+ }
+ }
+
+ APInt DemandedSrc = DemandedElts.lshr(ShiftAmt);
if (SimplifyDemandedVectorElts(Src, DemandedSrc, KnownUndef, KnownZero, TLO,
Depth + 1))
return true;
- if (ShiftLeft) {
- KnownUndef = KnownUndef.shl(ShiftAmt);
- KnownZero = KnownZero.shl(ShiftAmt);
- KnownZero.setLowBits(ShiftAmt);
- } else {
- KnownUndef = KnownUndef.lshr(ShiftAmt);
- KnownZero = KnownZero.lshr(ShiftAmt);
- KnownZero.setHighBits(ShiftAmt);
+ KnownUndef = KnownUndef.shl(ShiftAmt);
+ KnownZero = KnownZero.shl(ShiftAmt);
+ KnownZero.setLowBits(ShiftAmt);
+ break;
+ }
+ case X86ISD::KSHIFTR: {
+ SDValue Src = Op.getOperand(0);
+ auto *Amt = cast<ConstantSDNode>(Op.getOperand(1));
+ assert(Amt->getAPIntValue().ult(NumElts) && "Out of range shift amount");
+ unsigned ShiftAmt = Amt->getZExtValue();
+
+ if (ShiftAmt == 0)
+ return TLO.CombineTo(Op, Src);
+
+ // If this is ((X << C1) >>u ShAmt), see if we can simplify this into a
+ // single shift. We can do this if the top bits (which are shifted
+ // out) are never demanded.
+ if (Src.getOpcode() == X86ISD::KSHIFTL) {
+ if (!DemandedElts.intersects(APInt::getHighBitsSet(NumElts, ShiftAmt))) {
+ unsigned C1 = Src.getConstantOperandVal(1);
+ unsigned Opc = X86ISD::KSHIFTR;
+ int Diff = ShiftAmt - C1;
+ if (Diff < 0) {
+ Diff = -Diff;
+ Opc = X86ISD::KSHIFTL;
+ }
+
+ SDLoc dl(Op);
+ SDValue NewSA = TLO.DAG.getConstant(Diff, dl, MVT::i8);
+ return TLO.CombineTo(
+ Op, TLO.DAG.getNode(Opc, dl, VT, Src.getOperand(0), NewSA));
+ }
}
+
+ APInt DemandedSrc = DemandedElts.shl(ShiftAmt);
+ if (SimplifyDemandedVectorElts(Src, DemandedSrc, KnownUndef, KnownZero, TLO,
+ Depth + 1))
+ return true;
+
+ KnownUndef = KnownUndef.lshr(ShiftAmt);
+ KnownZero = KnownZero.lshr(ShiftAmt);
+ KnownZero.setHighBits(ShiftAmt);
break;
}
case X86ISD::CVTSI2P:
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
; KNL-NEXT: kmovw %eax, %k3
; KNL-NEXT: kxorw %k3, %k2, %k2
-; KNL-NEXT: kshiftlw $15, %k2, %k2
-; KNL-NEXT: kshiftrw $1, %k2, %k2
+; KNL-NEXT: kshiftlw $14, %k2, %k2
; KNL-NEXT: kxorw %k2, %k1, %k1
; KNL-NEXT: kshiftlw $1, %k1, %k1
; KNL-NEXT: kshiftrw $1, %k1, %k1
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
; KNL-NEXT: kmovw %eax, %k4
; KNL-NEXT: kxorw %k4, %k3, %k3
-; KNL-NEXT: kshiftlw $15, %k3, %k3
-; KNL-NEXT: kshiftrw $1, %k3, %k3
+; KNL-NEXT: kshiftlw $14, %k3, %k3
; KNL-NEXT: kxorw %k3, %k2, %k2
; KNL-NEXT: kshiftlw $1, %k2, %k2
; KNL-NEXT: kshiftrw $1, %k2, %k2
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
; KNL-NEXT: kmovw %eax, %k5
; KNL-NEXT: kxorw %k5, %k4, %k4
-; KNL-NEXT: kshiftlw $15, %k4, %k4
-; KNL-NEXT: kshiftrw $1, %k4, %k4
+; KNL-NEXT: kshiftlw $14, %k4, %k4
; KNL-NEXT: kxorw %k4, %k3, %k3
; KNL-NEXT: kshiftlw $1, %k3, %k3
; KNL-NEXT: kshiftrw $1, %k3, %k3
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
; KNL-NEXT: kmovw %eax, %k5
; KNL-NEXT: kxorw %k5, %k4, %k4
-; KNL-NEXT: kshiftlw $15, %k4, %k4
-; KNL-NEXT: kshiftrw $1, %k4, %k4
+; KNL-NEXT: kshiftlw $14, %k4, %k4
; KNL-NEXT: kxorw %k4, %k0, %k0
; KNL-NEXT: kshiftlw $1, %k0, %k0
; KNL-NEXT: kshiftrw $1, %k0, %k0
; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al
; AVX512DQNOBW-NEXT: kmovw %eax, %k3
; AVX512DQNOBW-NEXT: kxorw %k3, %k2, %k2
-; AVX512DQNOBW-NEXT: kshiftlw $15, %k2, %k2
-; AVX512DQNOBW-NEXT: kshiftrw $1, %k2, %k2
+; AVX512DQNOBW-NEXT: kshiftlw $14, %k2, %k2
; AVX512DQNOBW-NEXT: kxorw %k2, %k0, %k0
; AVX512DQNOBW-NEXT: kshiftlw $1, %k0, %k0
; AVX512DQNOBW-NEXT: kshiftrw $1, %k0, %k0
; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al
; AVX512DQNOBW-NEXT: kmovw %eax, %k4
; AVX512DQNOBW-NEXT: kxorw %k4, %k3, %k3
-; AVX512DQNOBW-NEXT: kshiftlw $15, %k3, %k3
-; AVX512DQNOBW-NEXT: kshiftrw $1, %k3, %k3
+; AVX512DQNOBW-NEXT: kshiftlw $14, %k3, %k3
; AVX512DQNOBW-NEXT: kxorw %k3, %k2, %k2
; AVX512DQNOBW-NEXT: kshiftlw $1, %k2, %k2
; AVX512DQNOBW-NEXT: kshiftrw $1, %k2, %k2
; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al
; AVX512DQNOBW-NEXT: kmovw %eax, %k5
; AVX512DQNOBW-NEXT: kxorw %k5, %k4, %k4
-; AVX512DQNOBW-NEXT: kshiftlw $15, %k4, %k4
-; AVX512DQNOBW-NEXT: kshiftrw $1, %k4, %k4
+; AVX512DQNOBW-NEXT: kshiftlw $14, %k4, %k4
; AVX512DQNOBW-NEXT: kxorw %k4, %k3, %k3
; AVX512DQNOBW-NEXT: kshiftlw $1, %k3, %k3
; AVX512DQNOBW-NEXT: kshiftrw $1, %k3, %k3
; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al
; AVX512DQNOBW-NEXT: kmovw %eax, %k5
; AVX512DQNOBW-NEXT: kxorw %k5, %k4, %k4
-; AVX512DQNOBW-NEXT: kshiftlw $15, %k4, %k4
-; AVX512DQNOBW-NEXT: kshiftrw $1, %k4, %k4
+; AVX512DQNOBW-NEXT: kshiftlw $14, %k4, %k4
; AVX512DQNOBW-NEXT: kxorw %k4, %k1, %k1
; AVX512DQNOBW-NEXT: kshiftlw $1, %k1, %k1
; AVX512DQNOBW-NEXT: kshiftrw $1, %k1, %k1
; KNL-NEXT: kshiftrw $9, %k1, %k1
; KNL-NEXT: kshiftrw $6, %k0, %k3
; KNL-NEXT: kxorw %k1, %k3, %k1
-; KNL-NEXT: kshiftlw $15, %k1, %k1
-; KNL-NEXT: kshiftrw $9, %k1, %k1
+; KNL-NEXT: kshiftlw $6, %k1, %k1
; KNL-NEXT: kxorw %k1, %k0, %k0
; KNL-NEXT: kshiftlw $9, %k0, %k0
; KNL-NEXT: kshiftrw $9, %k0, %k0
; SKX-NEXT: kshiftrw $9, %k1, %k1
; SKX-NEXT: kshiftrb $6, %k0, %k3
; SKX-NEXT: kxorb %k1, %k3, %k1
-; SKX-NEXT: kshiftlb $7, %k1, %k1
-; SKX-NEXT: kshiftrb $1, %k1, %k1
+; SKX-NEXT: kshiftlb $6, %k1, %k1
; SKX-NEXT: kxorb %k1, %k0, %k0
; SKX-NEXT: kshiftlb $1, %k0, %k0
; SKX-NEXT: kshiftrb $1, %k0, %k0
; AVX512BW-NEXT: kshiftrw $9, %k1, %k1
; AVX512BW-NEXT: kshiftrw $6, %k0, %k3
; AVX512BW-NEXT: kxorw %k1, %k3, %k1
-; AVX512BW-NEXT: kshiftlw $15, %k1, %k1
-; AVX512BW-NEXT: kshiftrw $9, %k1, %k1
+; AVX512BW-NEXT: kshiftlw $6, %k1, %k1
; AVX512BW-NEXT: kxorw %k1, %k0, %k0
; AVX512BW-NEXT: kshiftlw $9, %k0, %k0
; AVX512BW-NEXT: kshiftrw $9, %k0, %k0
; AVX512DQ-NEXT: kshiftrw $9, %k1, %k1
; AVX512DQ-NEXT: kshiftrb $6, %k0, %k3
; AVX512DQ-NEXT: kxorb %k1, %k3, %k1
-; AVX512DQ-NEXT: kshiftlb $7, %k1, %k1
-; AVX512DQ-NEXT: kshiftrb $1, %k1, %k1
+; AVX512DQ-NEXT: kshiftlb $6, %k1, %k1
; AVX512DQ-NEXT: kxorb %k1, %k0, %k0
; AVX512DQ-NEXT: kshiftlb $1, %k0, %k0
; AVX512DQ-NEXT: kshiftrb $1, %k0, %k0
; X86-NEXT: kshiftrw $9, %k1, %k1
; X86-NEXT: kshiftrb $6, %k0, %k3
; X86-NEXT: kxorb %k1, %k3, %k1
-; X86-NEXT: kshiftlb $7, %k1, %k1
-; X86-NEXT: kshiftrb $1, %k1, %k1
+; X86-NEXT: kshiftlb $6, %k1, %k1
; X86-NEXT: kxorb %k1, %k0, %k0
; X86-NEXT: kshiftlb $1, %k0, %k0
; X86-NEXT: kshiftrb $1, %k0, %k0
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
; KNL-NEXT: kmovw %eax, %k3
; KNL-NEXT: kxorw %k3, %k2, %k2
-; KNL-NEXT: kshiftlw $15, %k2, %k2
-; KNL-NEXT: kshiftrw $1, %k2, %k2
+; KNL-NEXT: kshiftlw $14, %k2, %k2
; KNL-NEXT: kxorw %k2, %k0, %k0
; KNL-NEXT: kshiftlw $1, %k0, %k0
; KNL-NEXT: kshiftrw $1, %k0, %k0
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
; KNL-NEXT: kmovw %eax, %k4
; KNL-NEXT: kxorw %k4, %k3, %k3
-; KNL-NEXT: kshiftlw $15, %k3, %k3
-; KNL-NEXT: kshiftrw $1, %k3, %k3
+; KNL-NEXT: kshiftlw $14, %k3, %k3
; KNL-NEXT: kxorw %k3, %k2, %k2
; KNL-NEXT: kshiftlw $1, %k2, %k2
; KNL-NEXT: kshiftrw $1, %k2, %k2
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
; KNL-NEXT: kmovw %eax, %k5
; KNL-NEXT: kxorw %k5, %k4, %k4
-; KNL-NEXT: kshiftlw $15, %k4, %k4
-; KNL-NEXT: kshiftrw $1, %k4, %k4
+; KNL-NEXT: kshiftlw $14, %k4, %k4
; KNL-NEXT: kxorw %k4, %k3, %k3
; KNL-NEXT: kshiftlw $1, %k3, %k3
; KNL-NEXT: kshiftrw $1, %k3, %k3
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
; KNL-NEXT: kmovw %eax, %k5
; KNL-NEXT: kxorw %k5, %k4, %k4
-; KNL-NEXT: kshiftlw $15, %k4, %k4
-; KNL-NEXT: kshiftrw $1, %k4, %k4
+; KNL-NEXT: kshiftlw $14, %k4, %k4
; KNL-NEXT: kxorw %k4, %k1, %k1
; KNL-NEXT: kshiftlw $1, %k1, %k1
; KNL-NEXT: kshiftrw $1, %k1, %k1
; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al
; AVX512DQ-NEXT: kmovw %eax, %k3
; AVX512DQ-NEXT: kxorw %k3, %k2, %k2
-; AVX512DQ-NEXT: kshiftlw $15, %k2, %k2
-; AVX512DQ-NEXT: kshiftrw $1, %k2, %k2
+; AVX512DQ-NEXT: kshiftlw $14, %k2, %k2
; AVX512DQ-NEXT: kxorw %k2, %k0, %k0
; AVX512DQ-NEXT: kshiftlw $1, %k0, %k0
; AVX512DQ-NEXT: kshiftrw $1, %k0, %k0
; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al
; AVX512DQ-NEXT: kmovw %eax, %k4
; AVX512DQ-NEXT: kxorw %k4, %k3, %k3
-; AVX512DQ-NEXT: kshiftlw $15, %k3, %k3
-; AVX512DQ-NEXT: kshiftrw $1, %k3, %k3
+; AVX512DQ-NEXT: kshiftlw $14, %k3, %k3
; AVX512DQ-NEXT: kxorw %k3, %k2, %k2
; AVX512DQ-NEXT: kshiftlw $1, %k2, %k2
; AVX512DQ-NEXT: kshiftrw $1, %k2, %k2
; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al
; AVX512DQ-NEXT: kmovw %eax, %k5
; AVX512DQ-NEXT: kxorw %k5, %k4, %k4
-; AVX512DQ-NEXT: kshiftlw $15, %k4, %k4
-; AVX512DQ-NEXT: kshiftrw $1, %k4, %k4
+; AVX512DQ-NEXT: kshiftlw $14, %k4, %k4
; AVX512DQ-NEXT: kxorw %k4, %k3, %k3
; AVX512DQ-NEXT: kshiftlw $1, %k3, %k3
; AVX512DQ-NEXT: kshiftrw $1, %k3, %k3
; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al
; AVX512DQ-NEXT: kmovw %eax, %k5
; AVX512DQ-NEXT: kxorw %k5, %k4, %k4
-; AVX512DQ-NEXT: kshiftlw $15, %k4, %k4
-; AVX512DQ-NEXT: kshiftrw $1, %k4, %k4
+; AVX512DQ-NEXT: kshiftlw $14, %k4, %k4
; AVX512DQ-NEXT: kxorw %k4, %k1, %k1
; AVX512DQ-NEXT: kshiftlw $1, %k1, %k1
; AVX512DQ-NEXT: kshiftrw $1, %k1, %k1
; NoVLX: # %bb.0:
; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kshiftlw $12, %k0, %k0
-; NoVLX-NEXT: kshiftrw $8, %k0, %k0
+; NoVLX-NEXT: kshiftlw $4, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def $al killed $al killed $eax
; NoVLX-NEXT: vzeroupper
; AVX512-NEXT: kxorw %k0, %k2, %k2
; AVX512-NEXT: kshiftrw $2, %k2, %k3
; AVX512-NEXT: kxorw %k1, %k3, %k1
-; AVX512-NEXT: kshiftlw $15, %k1, %k1
-; AVX512-NEXT: kshiftrw $13, %k1, %k1
+; AVX512-NEXT: kshiftlw $2, %k1, %k1
; AVX512-NEXT: kxorw %k1, %k2, %k1
; AVX512-NEXT: kshiftlw $13, %k1, %k1
; AVX512-NEXT: kshiftrw $13, %k1, %k1
; AVX512-NEXT: kxorw %k0, %k2, %k2
; AVX512-NEXT: kshiftrw $2, %k2, %k3
; AVX512-NEXT: kxorw %k1, %k3, %k1
-; AVX512-NEXT: kshiftlw $15, %k1, %k1
-; AVX512-NEXT: kshiftrw $13, %k1, %k1
+; AVX512-NEXT: kshiftlw $2, %k1, %k1
; AVX512-NEXT: kxorw %k1, %k2, %k1
; AVX512-NEXT: kshiftlw $13, %k1, %k1
; AVX512-NEXT: kshiftrw $13, %k1, %k1