From af91d1f81e8e407c5bad925067981fc6f770efa1 Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Thu, 15 Sep 2016 21:35:30 +0000 Subject: [PATCH] [InstCombine] allow icmp (shr/shl) folds for vectors These 2 helper functions were already using APInt internally, so just change the API and caller to allow folds for splats. The scalar regression tests look quite thorough, so I just added a couple of tests to prove that vectors are handled too. These folds should be grouped with the other cmp+shift folds though. That can be an NFC follow-up. llvm-svn: 281663 --- .../Transforms/InstCombine/InstCombineCompares.cpp | 41 ++++++++++------------ .../Transforms/InstCombine/InstCombineInternal.h | 4 +-- llvm/test/Transforms/InstCombine/icmp-shr.ll | 10 ++++++ llvm/test/Transforms/InstCombine/icmp.ll | 10 ++++++ 4 files changed, 40 insertions(+), 25 deletions(-) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp index 9604448a..c86486c 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp @@ -1177,8 +1177,8 @@ Instruction *InstCombiner::foldICmpAddOpConst(Instruction &ICI, /// (icmp eq/ne A, Log2(const2/const1)) -> /// (icmp eq/ne A, Log2(const2) - Log2(const1)). Instruction *InstCombiner::foldICmpCstShrConst(ICmpInst &I, Value *Op, Value *A, - ConstantInt *CI1, - ConstantInt *CI2) { + const APInt &AP1, + const APInt &AP2) { assert(I.isEquality() && "Cannot fold icmp gt/lt"); auto getICmp = [&I](CmpInst::Predicate Pred, Value *LHS, Value *RHS) { @@ -1187,9 +1187,6 @@ Instruction *InstCombiner::foldICmpCstShrConst(ICmpInst &I, Value *Op, Value *A, return new ICmpInst(Pred, LHS, RHS); }; - const APInt &AP1 = CI1->getValue(); - const APInt &AP2 = CI2->getValue(); - // Don't bother doing any work for cases which InstSimplify handles. if (AP2 == 0) return nullptr; @@ -1238,8 +1235,8 @@ Instruction *InstCombiner::foldICmpCstShrConst(ICmpInst &I, Value *Op, Value *A, /// Handle "(icmp eq/ne (shl const2, A), const1)" -> /// (icmp eq/ne A, TrailingZeros(const1) - TrailingZeros(const2)). Instruction *InstCombiner::foldICmpCstShlConst(ICmpInst &I, Value *Op, Value *A, - ConstantInt *CI1, - ConstantInt *CI2) { + const APInt &AP1, + const APInt &AP2) { assert(I.isEquality() && "Cannot fold icmp gt/lt"); auto getICmp = [&I](CmpInst::Predicate Pred, Value *LHS, Value *RHS) { @@ -1248,9 +1245,6 @@ Instruction *InstCombiner::foldICmpCstShlConst(ICmpInst &I, Value *Op, Value *A, return new ICmpInst(Pred, LHS, RHS); }; - const APInt &AP1 = CI1->getValue(); - const APInt &AP2 = CI2->getValue(); - // Don't bother doing any work for cases which InstSimplify handles. if (AP2 == 0) return nullptr; @@ -1258,8 +1252,9 @@ Instruction *InstCombiner::foldICmpCstShlConst(ICmpInst &I, Value *Op, Value *A, unsigned AP2TrailingZeros = AP2.countTrailingZeros(); if (!AP1 && AP2TrailingZeros != 0) - return getICmp(I.ICMP_UGE, A, - ConstantInt::get(A->getType(), AP2.getBitWidth() - AP2TrailingZeros)); + return getICmp( + I.ICMP_UGE, A, + ConstantInt::get(A->getType(), AP2.getBitWidth() - AP2TrailingZeros)); if (AP1 == AP2) return getICmp(I.ICMP_EQ, A, ConstantInt::getNullValue(A->getType())); @@ -1408,26 +1403,26 @@ Instruction *InstCombiner::foldICmpWithConstant(ICmpInst &Cmp) { } } - // FIXME: Use m_APInt to allow folds for splat constants. - ConstantInt *CI = dyn_cast(Cmp.getOperand(1)); - if (!CI) - return nullptr; - if (Cmp.isEquality()) { - ConstantInt *CI2; - if (match(X, m_AShr(m_ConstantInt(CI2), m_Value(A))) || - match(X, m_LShr(m_ConstantInt(CI2), m_Value(A)))) { + const APInt *C2; + if (match(X, m_AShr(m_APInt(C2), m_Value(A))) || + match(X, m_LShr(m_APInt(C2), m_Value(A)))) { // (icmp eq/ne (ashr/lshr const2, A), const1) - if (Instruction *Inst = foldICmpCstShrConst(Cmp, X, A, CI, CI2)) + if (Instruction *Inst = foldICmpCstShrConst(Cmp, X, A, *C, *C2)) return Inst; } - if (match(X, m_Shl(m_ConstantInt(CI2), m_Value(A)))) { + if (match(X, m_Shl(m_APInt(C2), m_Value(A)))) { // (icmp eq/ne (shl const2, A), const1) - if (Instruction *Inst = foldICmpCstShlConst(Cmp, X, A, CI, CI2)) + if (Instruction *Inst = foldICmpCstShlConst(Cmp, X, A, *C, *C2)) return Inst; } } + // FIXME: Use m_APInt to allow folds for splat constants. + ConstantInt *CI = dyn_cast(Cmp.getOperand(1)); + if (!CI) + return nullptr; + // Canonicalize icmp instructions based on dominating conditions. BasicBlock *Parent = Cmp.getParent(); BasicBlock *Dom = Parent->getSinglePredecessor(); diff --git a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h index 5b256cd..d214939 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h +++ b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h @@ -549,9 +549,9 @@ private: Instruction *foldFCmpIntToFPConst(FCmpInst &I, Instruction *LHSI, Constant *RHSC); Instruction *foldICmpCstShrConst(ICmpInst &I, Value *Op, Value *A, - ConstantInt *CI1, ConstantInt *CI2); + const APInt &C1, const APInt &C2); Instruction *foldICmpCstShlConst(ICmpInst &I, Value *Op, Value *A, - ConstantInt *CI1, ConstantInt *CI2); + const APInt &C1, const APInt &C2); Instruction *foldICmpAddOpConst(Instruction &ICI, Value *X, ConstantInt *CI, ICmpInst::Predicate Pred); Instruction *foldICmpWithCastAndCast(ICmpInst &ICI); diff --git a/llvm/test/Transforms/InstCombine/icmp-shr.ll b/llvm/test/Transforms/InstCombine/icmp-shr.ll index 0b0ef98..af608f3 100644 --- a/llvm/test/Transforms/InstCombine/icmp-shr.ll +++ b/llvm/test/Transforms/InstCombine/icmp-shr.ll @@ -13,6 +13,16 @@ define i1 @lshr_eq_msb_low_last_zero(i8 %a) { ret i1 %cmp } +define <2 x i1> @lshr_eq_msb_low_last_zero_vec(<2 x i8> %a) { +; CHECK-LABEL: @lshr_eq_msb_low_last_zero_vec( +; CHECK-NEXT: [[CMP:%.*]] = icmp ugt <2 x i8> %a, +; CHECK-NEXT: ret <2 x i1> [[CMP]] +; + %shr = lshr <2 x i8> , %a + %cmp = icmp eq <2 x i8> %shr, zeroinitializer + ret <2 x i1> %cmp +} + define i1 @ashr_eq_msb_low_second_zero(i8 %a) { ; CHECK-LABEL: @ashr_eq_msb_low_second_zero( ; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i8 %a, 6 diff --git a/llvm/test/Transforms/InstCombine/icmp.ll b/llvm/test/Transforms/InstCombine/icmp.ll index a2b5707..da5913e 100644 --- a/llvm/test/Transforms/InstCombine/icmp.ll +++ b/llvm/test/Transforms/InstCombine/icmp.ll @@ -2194,6 +2194,16 @@ define i1 @shl_ap1_zero_ap2_non_zero_2(i32 %a) { ret i1 %cmp } +define <2 x i1> @shl_ap1_zero_ap2_non_zero_2_vec(<2 x i32> %a) { +; CHECK-LABEL: @shl_ap1_zero_ap2_non_zero_2_vec( +; CHECK-NEXT: [[CMP:%.*]] = icmp ugt <2 x i32> %a, +; CHECK-NEXT: ret <2 x i1> [[CMP]] +; + %shl = shl <2 x i32> , %a + %cmp = icmp eq <2 x i32> %shl, zeroinitializer + ret <2 x i1> %cmp +} + define i1 @shl_ap1_zero_ap2_non_zero_4(i32 %a) { ; CHECK-LABEL: @shl_ap1_zero_ap2_non_zero_4( ; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i32 %a, 30 -- 2.7.4