SDValue Op0 = Op.getOperand(0);
SDValue Op1 = Op.getOperand(1);
- if (ConstantSDNode *SA = isConstOrConstSplat(Op1)) {
+ if (ConstantSDNode *SA = isConstOrConstSplat(Op1, DemandedElts)) {
// If the shift count is an invalid immediate, don't do anything.
if (SA->getAPIntValue().uge(BitWidth))
break;
unsigned ShAmt = SA->getZExtValue();
+ if (ShAmt == 0)
+ return TLO.CombineTo(Op, Op0);
// If this is ((X >>u C1) << ShAmt), see if we can simplify this into a
// single shift. We can do this if the bottom bits (which are shifted
// out) are never demanded.
+ // TODO - support non-uniform vector amounts.
if (Op0.getOpcode() == ISD::SRL) {
- if (ShAmt &&
- (DemandedBits & APInt::getLowBitsSet(BitWidth, ShAmt)) == 0) {
- if (ConstantSDNode *SA2 = isConstOrConstSplat(Op0.getOperand(1))) {
+ if ((DemandedBits & APInt::getLowBitsSet(BitWidth, ShAmt)) == 0) {
+ if (ConstantSDNode *SA2 =
+ isConstOrConstSplat(Op0.getOperand(1), DemandedElts)) {
if (SA2->getAPIntValue().ult(BitWidth)) {
unsigned C1 = SA2->getZExtValue();
unsigned Opc = ISD::SHL;
SDValue Op0 = Op.getOperand(0);
SDValue Op1 = Op.getOperand(1);
- if (ConstantSDNode *SA = isConstOrConstSplat(Op1)) {
+ if (ConstantSDNode *SA = isConstOrConstSplat(Op1, DemandedElts)) {
// If the shift count is an invalid immediate, don't do anything.
if (SA->getAPIntValue().uge(BitWidth))
break;
- EVT ShiftVT = Op1.getValueType();
unsigned ShAmt = SA->getZExtValue();
+ if (ShAmt == 0)
+ return TLO.CombineTo(Op, Op0);
+
+ EVT ShiftVT = Op1.getValueType();
APInt InDemandedMask = (DemandedBits << ShAmt);
// If the shift is exact, then it does demand the low bits (and knows that
// If this is ((X << C1) >>u ShAmt), see if we can simplify this into a
// single shift. We can do this if the top bits (which are shifted out)
// are never demanded.
+ // TODO - support non-uniform vector amounts.
if (Op0.getOpcode() == ISD::SHL) {
- if (ConstantSDNode *SA2 = isConstOrConstSplat(Op0.getOperand(1))) {
- if (ShAmt &&
- (DemandedBits & APInt::getHighBitsSet(BitWidth, ShAmt)) == 0) {
+ if (ConstantSDNode *SA2 =
+ isConstOrConstSplat(Op0.getOperand(1), DemandedElts)) {
+ if ((DemandedBits & APInt::getHighBitsSet(BitWidth, ShAmt)) == 0) {
if (SA2->getAPIntValue().ult(BitWidth)) {
unsigned C1 = SA2->getZExtValue();
unsigned Opc = ISD::SRL;
if (DemandedBits.isOneValue())
return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, Op1));
- if (ConstantSDNode *SA = isConstOrConstSplat(Op1)) {
+ if (ConstantSDNode *SA = isConstOrConstSplat(Op1, DemandedElts)) {
// If the shift count is an invalid immediate, don't do anything.
if (SA->getAPIntValue().uge(BitWidth))
break;
unsigned ShAmt = SA->getZExtValue();
+ if (ShAmt == 0)
+ return TLO.CombineTo(Op, Op0);
+
APInt InDemandedMask = (DemandedBits << ShAmt);
// If the shift is exact, then it does demand the low bits (and knows that
SDValue Op2 = Op.getOperand(2);
bool IsFSHL = (Op.getOpcode() == ISD::FSHL);
- if (ConstantSDNode *SA = isConstOrConstSplat(Op2)) {
+ if (ConstantSDNode *SA = isConstOrConstSplat(Op2, DemandedElts)) {
unsigned Amt = SA->getAPIntValue().urem(BitWidth);
// For fshl, 0-shift returns the 1st arg.
;
; AVX2ORLATER-LABEL: non_splat_minus_one_divisor_2:
; AVX2ORLATER: # %bb.0:
-; AVX2ORLATER-NEXT: vpsrad $31, %xmm0, %xmm1
-; AVX2ORLATER-NEXT: vpsrlvd {{.*}}(%rip), %xmm1, %xmm1
+; AVX2ORLATER-NEXT: vpsrlvd {{.*}}(%rip), %xmm0, %xmm1
; AVX2ORLATER-NEXT: vpaddd %xmm1, %xmm0, %xmm1
; AVX2ORLATER-NEXT: vpsravd {{.*}}(%rip), %xmm1, %xmm1
; AVX2ORLATER-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
;
; XOP-LABEL: non_splat_minus_one_divisor_2:
; XOP: # %bb.0:
-; XOP-NEXT: vpsrad $31, %xmm0, %xmm1
-; XOP-NEXT: vpshld {{.*}}(%rip), %xmm1, %xmm1
+; XOP-NEXT: vpshld {{.*}}(%rip), %xmm0, %xmm1
; XOP-NEXT: vpaddd %xmm1, %xmm0, %xmm1
; XOP-NEXT: vpshad {{.*}}(%rip), %xmm1, %xmm1
; XOP-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]