From: Chenbing Zheng Date: Mon, 20 Jun 2022 02:55:47 +0000 (+0800) Subject: [InstCombine] add vector support for (A >> C) == (B >> C) --> (A^B) u< (1 << C) X-Git-Tag: upstream/15.0.7~4197 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=0eff6c6ba81c4d5f2a4b60fbc0f44c35529065c5;p=platform%2Fupstream%2Fllvm.git [InstCombine] add vector support for (A >> C) == (B >> C) --> (A^B) u< (1 << C) Reviewed By: spatel, RKSimon Differential Revision: https://reviews.llvm.org/D127398 --- diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp index c4bfdcf..afcd0be 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp @@ -4620,18 +4620,21 @@ Instruction *InstCombinerImpl::foldICmpEquality(ICmpInst &I) { // (A >> C) == (B >> C) --> (A^B) u< (1 << C) // For lshr and ashr pairs. - if ((match(Op0, m_OneUse(m_LShr(m_Value(A), m_ConstantInt(Cst1)))) && - match(Op1, m_OneUse(m_LShr(m_Value(B), m_Specific(Cst1))))) || - (match(Op0, m_OneUse(m_AShr(m_Value(A), m_ConstantInt(Cst1)))) && - match(Op1, m_OneUse(m_AShr(m_Value(B), m_Specific(Cst1)))))) { - unsigned TypeBits = Cst1->getBitWidth(); - unsigned ShAmt = (unsigned)Cst1->getLimitedValue(TypeBits); + const APInt *AP1, *AP2; + if ((match(Op0, m_OneUse(m_LShr(m_Value(A), m_APIntAllowUndef(AP1)))) && + match(Op1, m_OneUse(m_LShr(m_Value(B), m_APIntAllowUndef(AP2))))) || + (match(Op0, m_OneUse(m_AShr(m_Value(A), m_APIntAllowUndef(AP1)))) && + match(Op1, m_OneUse(m_AShr(m_Value(B), m_APIntAllowUndef(AP2)))))) { + if (AP1 != AP2) + return nullptr; + unsigned TypeBits = AP1->getBitWidth(); + unsigned ShAmt = AP1->getLimitedValue(TypeBits); if (ShAmt < TypeBits && ShAmt != 0) { ICmpInst::Predicate NewPred = Pred == ICmpInst::ICMP_NE ? ICmpInst::ICMP_UGE : ICmpInst::ICMP_ULT; Value *Xor = Builder.CreateXor(A, B, I.getName() + ".unshifted"); APInt CmpVal = APInt::getOneBitSet(TypeBits, ShAmt); - return new ICmpInst(NewPred, Xor, Builder.getInt(CmpVal)); + return new ICmpInst(NewPred, Xor, ConstantInt::get(A->getType(), CmpVal)); } } diff --git a/llvm/test/Transforms/InstCombine/compare-signs.ll b/llvm/test/Transforms/InstCombine/compare-signs.ll index 7b56a7b..206fe25 100644 --- a/llvm/test/Transforms/InstCombine/compare-signs.ll +++ b/llvm/test/Transforms/InstCombine/compare-signs.ll @@ -47,12 +47,10 @@ define i32 @test3(i32 %a, i32 %b) nounwind readnone { ret i32 %t3 } -; TODO this should optimize but doesn't due to missing vector support in InstCombiner::foldICmpEquality. define <2 x i32> @test3vec(<2 x i32> %a, <2 x i32> %b) nounwind readnone { ; CHECK-LABEL: @test3vec( -; CHECK-NEXT: [[T0:%.*]] = lshr <2 x i32> [[A:%.*]], -; CHECK-NEXT: [[T1:%.*]] = lshr <2 x i32> [[B:%.*]], -; CHECK-NEXT: [[T2:%.*]] = icmp eq <2 x i32> [[T0]], [[T1]] +; CHECK-NEXT: [[T2_UNSHIFTED:%.*]] = xor <2 x i32> [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: [[T2:%.*]] = icmp sgt <2 x i32> [[T2_UNSHIFTED]], ; CHECK-NEXT: [[T3:%.*]] = zext <2 x i1> [[T2]] to <2 x i32> ; CHECK-NEXT: ret <2 x i32> [[T3]] ; @@ -65,9 +63,8 @@ define <2 x i32> @test3vec(<2 x i32> %a, <2 x i32> %b) nounwind readnone { define <2 x i32> @test3vec_undef1(<2 x i32> %a, <2 x i32> %b) nounwind readnone { ; CHECK-LABEL: @test3vec_undef1( -; CHECK-NEXT: [[T0:%.*]] = lshr <2 x i32> [[A:%.*]], -; CHECK-NEXT: [[T1:%.*]] = lshr <2 x i32> [[B:%.*]], -; CHECK-NEXT: [[T2:%.*]] = icmp eq <2 x i32> [[T0]], [[T1]] +; CHECK-NEXT: [[T2_UNSHIFTED:%.*]] = xor <2 x i32> [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: [[T2:%.*]] = icmp ult <2 x i32> [[T2_UNSHIFTED]], ; CHECK-NEXT: [[T3:%.*]] = zext <2 x i1> [[T2]] to <2 x i32> ; CHECK-NEXT: ret <2 x i32> [[T3]] ; @@ -80,9 +77,8 @@ define <2 x i32> @test3vec_undef1(<2 x i32> %a, <2 x i32> %b) nounwind readnone define <2 x i32> @test3vec_undef2(<2 x i32> %a, <2 x i32> %b) nounwind readnone { ; CHECK-LABEL: @test3vec_undef2( -; CHECK-NEXT: [[T0:%.*]] = lshr <2 x i32> [[A:%.*]], -; CHECK-NEXT: [[T1:%.*]] = lshr <2 x i32> [[B:%.*]], -; CHECK-NEXT: [[T2:%.*]] = icmp eq <2 x i32> [[T0]], [[T1]] +; CHECK-NEXT: [[T2_UNSHIFTED:%.*]] = xor <2 x i32> [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: [[T2:%.*]] = icmp ult <2 x i32> [[T2_UNSHIFTED]], ; CHECK-NEXT: [[T3:%.*]] = zext <2 x i1> [[T2]] to <2 x i32> ; CHECK-NEXT: ret <2 x i32> [[T3]] ;