From: Sanjay Patel Date: Wed, 31 Aug 2016 21:57:21 +0000 (+0000) Subject: [InstCombine] allow icmp (div X, Y), C folds for splat constant vectors X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=541aef4661dc19aa7eff6324756ab5f9124239de;p=platform%2Fupstream%2Fllvm.git [InstCombine] allow icmp (div X, Y), C folds for splat constant vectors Converting all of the overflow ops to APInt looked risky, so I've left that as a TODO. llvm-svn: 280299 --- diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp index 91ea83e..a564771 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp @@ -35,11 +35,6 @@ using namespace PatternMatch; // How many times is a select replaced by one of its operands? STATISTIC(NumSel, "Number of select opts"); -// Initialization Routines - -static ConstantInt *getOne(Constant *C) { - return ConstantInt::get(cast(C->getType()), 1); -} static ConstantInt *ExtractElement(Constant *V, Constant *Idx) { return cast(ConstantExpr::getExtractElement(V, Idx)); @@ -2001,33 +1996,28 @@ Instruction *InstCombiner::foldICmpDivConstant(ICmpInst &Cmp, assert(!(DivIsSigned && C2->isAllOnesValue()) && "The overflow computation will fail."); - // FIXME: These checks restrict all folds under here to scalar types. - ConstantInt *RHS = dyn_cast(Cmp.getOperand(1)); - if (!RHS) - return nullptr; - - ConstantInt *DivRHS = dyn_cast(Div->getOperand(1)); - if (!DivRHS) - return nullptr; + // TODO: We could do all of the computations below using APInt. + Constant *CmpRHS = cast(Cmp.getOperand(1)); + Constant *DivRHS = cast(Div->getOperand(1)); - // Compute Prod = CI * DivRHS. We are essentially solving an equation - // of form X/C2=C. We solve for X by multiplying C2 (DivRHS) and - // C (CI). By solving for X we can turn this into a range check - // instead of computing a divide. - Constant *Prod = ConstantExpr::getMul(RHS, DivRHS); + // Compute Prod = CmpRHS * DivRHS. We are essentially solving an equation of + // form X / C2 = C. We solve for X by multiplying C2 (DivRHS) and C (CmpRHS). + // By solving for X, we can turn this into a range check instead of computing + // a divide. + Constant *Prod = ConstantExpr::getMul(CmpRHS, DivRHS); - // Determine if the product overflows by seeing if the product is - // not equal to the divide. Make sure we do the same kind of divide - // as in the LHS instruction that we're folding. - bool ProdOV = (DivIsSigned ? ConstantExpr::getSDiv(Prod, DivRHS) : - ConstantExpr::getUDiv(Prod, DivRHS)) != RHS; + // Determine if the product overflows by seeing if the product is not equal to + // the divide. Make sure we do the same kind of divide as in the LHS + // instruction that we're folding. + bool ProdOV = (DivIsSigned ? ConstantExpr::getSDiv(Prod, DivRHS) + : ConstantExpr::getUDiv(Prod, DivRHS)) != CmpRHS; - // Get the ICmp opcode ICmpInst::Predicate Pred = Cmp.getPredicate(); // If the division is known to be exact, then there is no remainder from the // divide, so the covered range size is unit, otherwise it is the divisor. - ConstantInt *RangeSize = Div->isExact() ? getOne(Prod) : DivRHS; + Constant *RangeSize = + Div->isExact() ? ConstantInt::get(Div->getType(), 1) : DivRHS; // Figure out the interval that is being checked. For example, a comparison // like "X /u 5 == 0" is really checking that X is in the interval [0, 5). @@ -2048,7 +2038,7 @@ Instruction *InstCombiner::foldICmpDivConstant(ICmpInst &Cmp, // to the same result value. HiOverflow = AddWithOverflow(HiBound, LoBound, RangeSize, false); } - } else if (DivRHS->getValue().isStrictlyPositive()) { // Divisor is > 0. + } else if (C2->isStrictlyPositive()) { // Divisor is > 0. if (*C == 0) { // (X / pos) op 0 // Can't overflow. e.g. X/2 op 0 --> [-1, 2) LoBound = ConstantExpr::getNeg(SubOne(RangeSize)); @@ -2063,17 +2053,17 @@ Instruction *InstCombiner::foldICmpDivConstant(ICmpInst &Cmp, HiBound = AddOne(Prod); LoOverflow = HiOverflow = ProdOV ? -1 : 0; if (!LoOverflow) { - ConstantInt *DivNeg =cast(ConstantExpr::getNeg(RangeSize)); + Constant *DivNeg = ConstantExpr::getNeg(RangeSize); LoOverflow = AddWithOverflow(LoBound, HiBound, DivNeg, true) ? -1 : 0; } } - } else if (DivRHS->isNegative()) { // Divisor is < 0. + } else if (C2->isNegative()) { // Divisor is < 0. if (Div->isExact()) - RangeSize = cast(ConstantExpr::getNeg(RangeSize)); + RangeSize = ConstantExpr::getNeg(RangeSize); if (*C == 0) { // (X / neg) op 0 // e.g. X/-5 op 0 --> [-4, 5) LoBound = AddOne(RangeSize); - HiBound = cast(ConstantExpr::getNeg(RangeSize)); + HiBound = ConstantExpr::getNeg(RangeSize); if (HiBound == DivRHS) { // -INTMIN = INTMIN HiOverflow = 1; // [INTMIN+1, overflow) HiBound = nullptr; // e.g. X/INTMIN = 0 --> X > INTMIN @@ -2108,9 +2098,8 @@ Instruction *InstCombiner::foldICmpDivConstant(ICmpInst &Cmp, return new ICmpInst(DivIsSigned ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT, X, HiBound); return replaceInstUsesWith( - Cmp, insertRangeTest(X, cast(LoBound)->getValue(), - cast(HiBound)->getValue(), - DivIsSigned, true)); + Cmp, insertRangeTest(X, LoBound->getUniqueInteger(), + HiBound->getUniqueInteger(), DivIsSigned, true)); case ICmpInst::ICMP_NE: if (LoOverflow && HiOverflow) return replaceInstUsesWith(Cmp, Builder->getTrue()); @@ -2120,10 +2109,10 @@ Instruction *InstCombiner::foldICmpDivConstant(ICmpInst &Cmp, if (LoOverflow) return new ICmpInst(DivIsSigned ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE, X, HiBound); - return replaceInstUsesWith( - Cmp, insertRangeTest(X, cast(LoBound)->getValue(), - cast(HiBound)->getValue(), - DivIsSigned, false)); + return replaceInstUsesWith(Cmp, + insertRangeTest(X, LoBound->getUniqueInteger(), + HiBound->getUniqueInteger(), + DivIsSigned, false)); case ICmpInst::ICMP_ULT: case ICmpInst::ICMP_SLT: if (LoOverflow == +1) // Low bound is greater than input range. diff --git a/llvm/test/Transforms/InstCombine/2007-03-21-SignedRangeTest.ll b/llvm/test/Transforms/InstCombine/2007-03-21-SignedRangeTest.ll index 69b7c54..ffcfe26 100644 --- a/llvm/test/Transforms/InstCombine/2007-03-21-SignedRangeTest.ll +++ b/llvm/test/Transforms/InstCombine/2007-03-21-SignedRangeTest.ll @@ -14,11 +14,10 @@ define i1 @test(i32 %tmp6) { ret i1 %1 } -; FIXME: Vectors should fold the same way. define <2 x i1> @test_vec(<2 x i32> %tmp6) { ; CHECK-LABEL: @test_vec( -; CHECK-NEXT: [[TMP7:%.*]] = sdiv <2 x i32> %tmp6, -; CHECK-NEXT: [[TMP1:%.*]] = icmp ne <2 x i32> [[TMP7]], +; CHECK-NEXT: [[TMP6_OFF:%.*]] = add <2 x i32> %tmp6, +; CHECK-NEXT: [[TMP1:%.*]] = icmp ugt <2 x i32> [[TMP6_OFF]], ; CHECK-NEXT: ret <2 x i1> [[TMP1]] ; %tmp7 = sdiv <2 x i32> %tmp6, diff --git a/llvm/test/Transforms/InstCombine/div.ll b/llvm/test/Transforms/InstCombine/div.ll index ece332a..a037607 100644 --- a/llvm/test/Transforms/InstCombine/div.ll +++ b/llvm/test/Transforms/InstCombine/div.ll @@ -72,12 +72,11 @@ define i1 @test7(i32 %A) { ret i1 %C } -; FIXME: Vectors should fold the same way. define <2 x i1> @test7vec(<2 x i32> %A) { ; CHECK-LABEL: @test7vec( -; CHECK-NEXT: [[B:%.*]] = udiv <2 x i32> %A, -; CHECK-NEXT: [[C:%.*]] = icmp eq <2 x i32> [[B]], -; CHECK-NEXT: ret <2 x i1> [[C]] +; CHECK-NEXT: [[A_OFF:%.*]] = add <2 x i32> %A, +; CHECK-NEXT: [[TMP1:%.*]] = icmp ult <2 x i32> [[A_OFF]], +; CHECK-NEXT: ret <2 x i1> [[TMP1]] ; %B = udiv <2 x i32> %A, %C = icmp eq <2 x i32> %B, @@ -95,11 +94,9 @@ define i1 @test8(i8 %A) { ret i1 %C } -; FIXME: Vectors should fold the same way. define <2 x i1> @test8vec(<2 x i8> %A) { ; CHECK-LABEL: @test8vec( -; CHECK-NEXT: [[B:%.*]] = udiv <2 x i8> %A, -; CHECK-NEXT: [[C:%.*]] = icmp eq <2 x i8> [[B]], +; CHECK-NEXT: [[C:%.*]] = icmp ugt <2 x i8> %A, ; CHECK-NEXT: ret <2 x i1> [[C]] ; %B = udiv <2 x i8> %A, @@ -118,11 +115,9 @@ define i1 @test9(i8 %A) { ret i1 %C } -; FIXME: Vectors should fold the same way. define <2 x i1> @test9vec(<2 x i8> %A) { ; CHECK-LABEL: @test9vec( -; CHECK-NEXT: [[B:%.*]] = udiv <2 x i8> %A, -; CHECK-NEXT: [[C:%.*]] = icmp ne <2 x i8> [[B]], +; CHECK-NEXT: [[C:%.*]] = icmp ult <2 x i8> %A, ; CHECK-NEXT: ret <2 x i1> [[C]] ; %B = udiv <2 x i8> %A, diff --git a/llvm/test/Transforms/InstCombine/exact.ll b/llvm/test/Transforms/InstCombine/exact.ll index 530d665..f93bc8e 100644 --- a/llvm/test/Transforms/InstCombine/exact.ll +++ b/llvm/test/Transforms/InstCombine/exact.ll @@ -159,8 +159,8 @@ define i1 @udiv_icmp1(i64 %X) { define <2 x i1> @udiv_icmp1_vec(<2 x i64> %X) { ; CHECK-LABEL: @udiv_icmp1_vec( -; CHECK-NEXT: [[B:%.*]] = icmp ugt <2 x i64> %X, -; CHECK-NEXT: ret <2 x i1> [[B]] +; CHECK-NEXT: [[TMP1:%.*]] = icmp ne <2 x i64> %X, zeroinitializer +; CHECK-NEXT: ret <2 x i1> [[TMP1]] ; %A = udiv exact <2 x i64> %X, %B = icmp ne <2 x i64> %A, zeroinitializer @@ -177,10 +177,11 @@ define i1 @udiv_icmp2(i64 %X) { ret i1 %B } +; FIXME: missing vector fold for ult 1 -> eq 0 define <2 x i1> @udiv_icmp2_vec(<2 x i64> %X) { ; CHECK-LABEL: @udiv_icmp2_vec( -; CHECK-NEXT: [[B:%.*]] = icmp ult <2 x i64> %X, -; CHECK-NEXT: ret <2 x i1> [[B]] +; CHECK-NEXT: [[TMP1:%.*]] = icmp ult <2 x i64> %X, +; CHECK-NEXT: ret <2 x i1> [[TMP1]] ; %A = udiv exact <2 x i64> %X, %B = icmp eq <2 x i64> %A, zeroinitializer @@ -197,12 +198,11 @@ define i1 @sdiv_icmp1(i64 %X) { ret i1 %B } -; FIXME: Vectors should fold too. +; FIXME: missing vector fold for ult 1 -> eq 0 define <2 x i1> @sdiv_icmp1_vec(<2 x i64> %X) { ; CHECK-LABEL: @sdiv_icmp1_vec( -; CHECK-NEXT: [[A:%.*]] = sdiv exact <2 x i64> %X, -; CHECK-NEXT: [[B:%.*]] = icmp eq <2 x i64> [[A]], zeroinitializer -; CHECK-NEXT: ret <2 x i1> [[B]] +; CHECK-NEXT: [[TMP1:%.*]] = icmp ult <2 x i64> %X, +; CHECK-NEXT: ret <2 x i1> [[TMP1]] ; %A = sdiv exact <2 x i64> %X, %B = icmp eq <2 x i64> %A, zeroinitializer @@ -219,12 +219,10 @@ define i1 @sdiv_icmp2(i64 %X) { ret i1 %B } -; FIXME: Vectors should fold too. define <2 x i1> @sdiv_icmp2_vec(<2 x i64> %X) { ; CHECK-LABEL: @sdiv_icmp2_vec( -; CHECK-NEXT: [[A:%.*]] = sdiv exact <2 x i64> %X, -; CHECK-NEXT: [[B:%.*]] = icmp eq <2 x i64> [[A]], -; CHECK-NEXT: ret <2 x i1> [[B]] +; CHECK-NEXT: [[TMP1:%.*]] = icmp eq <2 x i64> %X, +; CHECK-NEXT: ret <2 x i1> [[TMP1]] ; %A = sdiv exact <2 x i64> %X, %B = icmp eq <2 x i64> %A, @@ -241,12 +239,10 @@ define i1 @sdiv_icmp3(i64 %X) { ret i1 %B } -; FIXME: Vectors should fold too. define <2 x i1> @sdiv_icmp3_vec(<2 x i64> %X) { ; CHECK-LABEL: @sdiv_icmp3_vec( -; CHECK-NEXT: [[A:%.*]] = sdiv exact <2 x i64> %X, -; CHECK-NEXT: [[B:%.*]] = icmp eq <2 x i64> [[A]], -; CHECK-NEXT: ret <2 x i1> [[B]] +; CHECK-NEXT: [[TMP1:%.*]] = icmp eq <2 x i64> %X, +; CHECK-NEXT: ret <2 x i1> [[TMP1]] ; %A = sdiv exact <2 x i64> %X, %B = icmp eq <2 x i64> %A, @@ -263,12 +259,11 @@ define i1 @sdiv_icmp4(i64 %X) { ret i1 %B } -; FIXME: Vectors should fold too. +; FIXME: missing vector fold for ult 1 -> eq 0 define <2 x i1> @sdiv_icmp4_vec(<2 x i64> %X) { ; CHECK-LABEL: @sdiv_icmp4_vec( -; CHECK-NEXT: [[A:%.*]] = sdiv exact <2 x i64> %X, -; CHECK-NEXT: [[B:%.*]] = icmp eq <2 x i64> [[A]], zeroinitializer -; CHECK-NEXT: ret <2 x i1> [[B]] +; CHECK-NEXT: [[TMP1:%.*]] = icmp ult <2 x i64> %X, +; CHECK-NEXT: ret <2 x i1> [[TMP1]] ; %A = sdiv exact <2 x i64> %X, %B = icmp eq <2 x i64> %A, zeroinitializer @@ -285,12 +280,10 @@ define i1 @sdiv_icmp5(i64 %X) { ret i1 %B } -; FIXME: Vectors should fold too. define <2 x i1> @sdiv_icmp5_vec(<2 x i64> %X) { ; CHECK-LABEL: @sdiv_icmp5_vec( -; CHECK-NEXT: [[A:%.*]] = sdiv exact <2 x i64> %X, -; CHECK-NEXT: [[B:%.*]] = icmp eq <2 x i64> [[A]], -; CHECK-NEXT: ret <2 x i1> [[B]] +; CHECK-NEXT: [[TMP1:%.*]] = icmp eq <2 x i64> %X, +; CHECK-NEXT: ret <2 x i1> [[TMP1]] ; %A = sdiv exact <2 x i64> %X, %B = icmp eq <2 x i64> %A, @@ -307,12 +300,10 @@ define i1 @sdiv_icmp6(i64 %X) { ret i1 %B } -; FIXME: Vectors should fold too. define <2 x i1> @sdiv_icmp6_vec(<2 x i64> %X) { ; CHECK-LABEL: @sdiv_icmp6_vec( -; CHECK-NEXT: [[A:%.*]] = sdiv exact <2 x i64> %X, -; CHECK-NEXT: [[B:%.*]] = icmp eq <2 x i64> [[A]], -; CHECK-NEXT: ret <2 x i1> [[B]] +; CHECK-NEXT: [[TMP1:%.*]] = icmp eq <2 x i64> %X, +; CHECK-NEXT: ret <2 x i1> [[TMP1]] ; %A = sdiv exact <2 x i64> %X, %B = icmp eq <2 x i64> %A, diff --git a/llvm/test/Transforms/InstCombine/icmp.ll b/llvm/test/Transforms/InstCombine/icmp.ll index 93e0cdf..9ad8250 100644 --- a/llvm/test/Transforms/InstCombine/icmp.ll +++ b/llvm/test/Transforms/InstCombine/icmp.ll @@ -282,11 +282,9 @@ define i1 @test23(i32 %x) { ret i1 %i4 } -; FIXME: Vectors should fold too. define <2 x i1> @test23vec(<2 x i32> %x) { ; CHECK-LABEL: @test23vec( -; CHECK-NEXT: [[I3:%.*]] = sdiv <2 x i32> %x, -; CHECK-NEXT: [[I4:%.*]] = icmp eq <2 x i32> [[I3]], +; CHECK-NEXT: [[I4:%.*]] = icmp sgt <2 x i32> %x, ; CHECK-NEXT: ret <2 x i1> [[I4]] ; %i3 = sdiv <2 x i32> %x,