From 6dc87004fab4b34076e6f0f8505316e40b9ea2fb Mon Sep 17 00:00:00 2001 From: Roman Lebedev Date: Thu, 13 Sep 2018 20:33:12 +0000 Subject: [PATCH] [InstCombine] Inefficient pattern for high-bits checking 2 (PR38708) Summary: It is sometimes important to check that some newly-computed value is non-negative and only n bits wide (where n is a variable.) There are many ways to check that: https://godbolt.org/z/o4RB8D The last variant seems best? (I'm sure there are some other variations i haven't thought of..) More complicated, canonical pattern: https://rise4fun.com/Alive/uhA We do need to have two `switch()`'es like this, to not mismatch the swappable predicates. https://bugs.llvm.org/show_bug.cgi?id=38708 Reviewers: spatel, craig.topper, RKSimon Reviewed By: spatel Subscribers: llvm-commits Differential Revision: https://reviews.llvm.org/D52001 llvm-svn: 342173 --- .../Transforms/InstCombine/InstCombineCompares.cpp | 55 ++++++++++++++-------- ...and-val-to-icmp-eq-of-lshr-val-by-bits-and-0.ll | 34 ++++++------- ...and-val-to-icmp-ne-of-lshr-val-by-bits-and-0.ll | 34 ++++++------- 3 files changed, 64 insertions(+), 59 deletions(-) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp index d0673d9..a295f65 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp @@ -4624,34 +4624,51 @@ static Instruction *canonicalizeICmpBool(ICmpInst &I, } // Transform pattern like: -// (1 << Y) u<= X -// (1 << Y) u> X +// (1 << Y) u<= X or ~(-1 << Y) u< X +// (1 << Y) u> X or ~(-1 << Y) u>= X // Into: // (X l>> Y) != 0 // (X l>> Y) == 0 static Instruction *foldICmpWithHighBitMask(ICmpInst &Cmp, InstCombiner::BuilderTy &Builder) { - ICmpInst::Predicate Pred; + ICmpInst::Predicate Pred, NewPred; Value *X, *Y; - if (!match(&Cmp, - m_c_ICmp(Pred, m_OneUse(m_Shl(m_One(), m_Value(Y))), m_Value(X)))) - return nullptr; + if (match(&Cmp, + m_c_ICmp(Pred, m_OneUse(m_Shl(m_One(), m_Value(Y))), m_Value(X)))) { + // We want X to be the icmp's second operand, so swap predicate if it isn't. + if (Cmp.getOperand(0) == X) + Pred = Cmp.getSwappedPredicate(); - // We want X to be the icmp's second operand, so swap predicate if it is not. - if (Cmp.getOperand(0) == X) - Pred = Cmp.getSwappedPredicate(); + switch (Pred) { + case ICmpInst::ICMP_ULE: + NewPred = ICmpInst::ICMP_NE; + break; + case ICmpInst::ICMP_UGT: + NewPred = ICmpInst::ICMP_EQ; + break; + default: + return nullptr; + } + } else if (match(&Cmp, + m_c_ICmp(Pred, + m_OneUse(m_Not(m_Shl(m_AllOnes(), m_Value(Y)))), + m_Value(X)))) { + // We want X to be the icmp's second operand, so swap predicate if it isn't. + if (Cmp.getOperand(0) == X) + Pred = Cmp.getSwappedPredicate(); - ICmpInst::Predicate NewPred; - switch (Pred) { - case ICmpInst::ICMP_ULE: - NewPred = ICmpInst::ICMP_NE; - break; - case ICmpInst::ICMP_UGT: - NewPred = ICmpInst::ICMP_EQ; - break; - default: + switch (Pred) { + case ICmpInst::ICMP_ULT: + NewPred = ICmpInst::ICMP_NE; + break; + case ICmpInst::ICMP_UGE: + NewPred = ICmpInst::ICMP_EQ; + break; + default: + return nullptr; + } + } else return nullptr; - } Value *NewX = Builder.CreateLShr(X, Y, X->getName() + ".highbits"); Constant *Zero = Constant::getNullValue(NewX->getType()); diff --git a/llvm/test/Transforms/InstCombine/icmp-uge-of-not-of-shl-allones-by-bits-and-val-to-icmp-eq-of-lshr-val-by-bits-and-0.ll b/llvm/test/Transforms/InstCombine/icmp-uge-of-not-of-shl-allones-by-bits-and-val-to-icmp-eq-of-lshr-val-by-bits-and-0.ll index a48198b..a326846 100644 --- a/llvm/test/Transforms/InstCombine/icmp-uge-of-not-of-shl-allones-by-bits-and-val-to-icmp-eq-of-lshr-val-by-bits-and-0.ll +++ b/llvm/test/Transforms/InstCombine/icmp-uge-of-not-of-shl-allones-by-bits-and-val-to-icmp-eq-of-lshr-val-by-bits-and-0.ll @@ -14,9 +14,8 @@ define i1 @p0(i8 %val, i8 %bits) { ; CHECK-LABEL: @p0( -; CHECK-NEXT: [[T0:%.*]] = shl i8 -1, [[BITS:%.*]] -; CHECK-NEXT: [[T1:%.*]] = xor i8 [[T0]], -1 -; CHECK-NEXT: [[R:%.*]] = icmp uge i8 [[T1]], [[VAL:%.*]] +; CHECK-NEXT: [[VAL_HIGHBITS:%.*]] = lshr i8 [[VAL:%.*]], [[BITS:%.*]] +; CHECK-NEXT: [[R:%.*]] = icmp eq i8 [[VAL_HIGHBITS]], 0 ; CHECK-NEXT: ret i1 [[R]] ; %t0 = shl i8 -1, %bits @@ -31,9 +30,8 @@ define i1 @p0(i8 %val, i8 %bits) { define <2 x i1> @p1_vec(<2 x i8> %val, <2 x i8> %bits) { ; CHECK-LABEL: @p1_vec( -; CHECK-NEXT: [[T0:%.*]] = shl <2 x i8> , [[BITS:%.*]] -; CHECK-NEXT: [[T1:%.*]] = xor <2 x i8> [[T0]], -; CHECK-NEXT: [[R:%.*]] = icmp uge <2 x i8> [[T1]], [[VAL:%.*]] +; CHECK-NEXT: [[VAL_HIGHBITS:%.*]] = lshr <2 x i8> [[VAL:%.*]], [[BITS:%.*]] +; CHECK-NEXT: [[R:%.*]] = icmp eq <2 x i8> [[VAL_HIGHBITS]], zeroinitializer ; CHECK-NEXT: ret <2 x i1> [[R]] ; %t0 = shl <2 x i8> , %bits @@ -44,9 +42,8 @@ define <2 x i1> @p1_vec(<2 x i8> %val, <2 x i8> %bits) { define <3 x i1> @p2_vec_undef0(<3 x i8> %val, <3 x i8> %bits) { ; CHECK-LABEL: @p2_vec_undef0( -; CHECK-NEXT: [[T0:%.*]] = shl <3 x i8> , [[BITS:%.*]] -; CHECK-NEXT: [[T1:%.*]] = xor <3 x i8> [[T0]], -; CHECK-NEXT: [[R:%.*]] = icmp uge <3 x i8> [[T1]], [[VAL:%.*]] +; CHECK-NEXT: [[VAL_HIGHBITS:%.*]] = lshr <3 x i8> [[VAL:%.*]], [[BITS:%.*]] +; CHECK-NEXT: [[R:%.*]] = icmp eq <3 x i8> [[VAL_HIGHBITS]], zeroinitializer ; CHECK-NEXT: ret <3 x i1> [[R]] ; %t0 = shl <3 x i8> , %bits @@ -57,9 +54,8 @@ define <3 x i1> @p2_vec_undef0(<3 x i8> %val, <3 x i8> %bits) { define <3 x i1> @p2_vec_undef1(<3 x i8> %val, <3 x i8> %bits) { ; CHECK-LABEL: @p2_vec_undef1( -; CHECK-NEXT: [[T0:%.*]] = shl <3 x i8> , [[BITS:%.*]] -; CHECK-NEXT: [[T1:%.*]] = xor <3 x i8> [[T0]], -; CHECK-NEXT: [[R:%.*]] = icmp uge <3 x i8> [[T1]], [[VAL:%.*]] +; CHECK-NEXT: [[VAL_HIGHBITS:%.*]] = lshr <3 x i8> [[VAL:%.*]], [[BITS:%.*]] +; CHECK-NEXT: [[R:%.*]] = icmp eq <3 x i8> [[VAL_HIGHBITS]], zeroinitializer ; CHECK-NEXT: ret <3 x i1> [[R]] ; %t0 = shl <3 x i8> , %bits @@ -70,9 +66,8 @@ define <3 x i1> @p2_vec_undef1(<3 x i8> %val, <3 x i8> %bits) { define <3 x i1> @p2_vec_undef2(<3 x i8> %val, <3 x i8> %bits) { ; CHECK-LABEL: @p2_vec_undef2( -; CHECK-NEXT: [[T0:%.*]] = shl <3 x i8> , [[BITS:%.*]] -; CHECK-NEXT: [[T1:%.*]] = xor <3 x i8> [[T0]], -; CHECK-NEXT: [[R:%.*]] = icmp uge <3 x i8> [[T1]], [[VAL:%.*]] +; CHECK-NEXT: [[VAL_HIGHBITS:%.*]] = lshr <3 x i8> [[VAL:%.*]], [[BITS:%.*]] +; CHECK-NEXT: [[R:%.*]] = icmp eq <3 x i8> [[VAL_HIGHBITS]], zeroinitializer ; CHECK-NEXT: ret <3 x i1> [[R]] ; %t0 = shl <3 x i8> , %bits @@ -89,10 +84,9 @@ declare i8 @gen8() define i1 @c0(i8 %bits) { ; CHECK-LABEL: @c0( -; CHECK-NEXT: [[T0:%.*]] = shl i8 -1, [[BITS:%.*]] -; CHECK-NEXT: [[T1:%.*]] = xor i8 [[T0]], -1 ; CHECK-NEXT: [[VAL:%.*]] = call i8 @gen8() -; CHECK-NEXT: [[R:%.*]] = icmp ule i8 [[VAL]], [[T1]] +; CHECK-NEXT: [[VAL_HIGHBITS:%.*]] = lshr i8 [[VAL]], [[BITS:%.*]] +; CHECK-NEXT: [[R:%.*]] = icmp eq i8 [[VAL_HIGHBITS]], 0 ; CHECK-NEXT: ret i1 [[R]] ; %t0 = shl i8 -1, %bits @@ -128,8 +122,8 @@ define i1 @oneuse0(i8 %val, i8 %bits) { ; CHECK-LABEL: @oneuse0( ; CHECK-NEXT: [[T0:%.*]] = shl i8 -1, [[BITS:%.*]] ; CHECK-NEXT: call void @use8(i8 [[T0]]) -; CHECK-NEXT: [[T1:%.*]] = xor i8 [[T0]], -1 -; CHECK-NEXT: [[R:%.*]] = icmp uge i8 [[T1]], [[VAL:%.*]] +; CHECK-NEXT: [[VAL_HIGHBITS:%.*]] = lshr i8 [[VAL:%.*]], [[BITS]] +; CHECK-NEXT: [[R:%.*]] = icmp eq i8 [[VAL_HIGHBITS]], 0 ; CHECK-NEXT: ret i1 [[R]] ; %t0 = shl i8 -1, %bits diff --git a/llvm/test/Transforms/InstCombine/icmp-ult-of-not-of-shl-allones-by-bits-and-val-to-icmp-ne-of-lshr-val-by-bits-and-0.ll b/llvm/test/Transforms/InstCombine/icmp-ult-of-not-of-shl-allones-by-bits-and-val-to-icmp-ne-of-lshr-val-by-bits-and-0.ll index d81d69e..f97d243 100644 --- a/llvm/test/Transforms/InstCombine/icmp-ult-of-not-of-shl-allones-by-bits-and-val-to-icmp-ne-of-lshr-val-by-bits-and-0.ll +++ b/llvm/test/Transforms/InstCombine/icmp-ult-of-not-of-shl-allones-by-bits-and-val-to-icmp-ne-of-lshr-val-by-bits-and-0.ll @@ -14,9 +14,8 @@ define i1 @p0(i8 %val, i8 %bits) { ; CHECK-LABEL: @p0( -; CHECK-NEXT: [[T0:%.*]] = shl i8 -1, [[BITS:%.*]] -; CHECK-NEXT: [[T1:%.*]] = xor i8 [[T0]], -1 -; CHECK-NEXT: [[R:%.*]] = icmp ult i8 [[T1]], [[VAL:%.*]] +; CHECK-NEXT: [[VAL_HIGHBITS:%.*]] = lshr i8 [[VAL:%.*]], [[BITS:%.*]] +; CHECK-NEXT: [[R:%.*]] = icmp ne i8 [[VAL_HIGHBITS]], 0 ; CHECK-NEXT: ret i1 [[R]] ; %t0 = shl i8 -1, %bits @@ -31,9 +30,8 @@ define i1 @p0(i8 %val, i8 %bits) { define <2 x i1> @p1_vec(<2 x i8> %val, <2 x i8> %bits) { ; CHECK-LABEL: @p1_vec( -; CHECK-NEXT: [[T0:%.*]] = shl <2 x i8> , [[BITS:%.*]] -; CHECK-NEXT: [[T1:%.*]] = xor <2 x i8> [[T0]], -; CHECK-NEXT: [[R:%.*]] = icmp ult <2 x i8> [[T1]], [[VAL:%.*]] +; CHECK-NEXT: [[VAL_HIGHBITS:%.*]] = lshr <2 x i8> [[VAL:%.*]], [[BITS:%.*]] +; CHECK-NEXT: [[R:%.*]] = icmp ne <2 x i8> [[VAL_HIGHBITS]], zeroinitializer ; CHECK-NEXT: ret <2 x i1> [[R]] ; %t0 = shl <2 x i8> , %bits @@ -44,9 +42,8 @@ define <2 x i1> @p1_vec(<2 x i8> %val, <2 x i8> %bits) { define <3 x i1> @p2_vec_undef0(<3 x i8> %val, <3 x i8> %bits) { ; CHECK-LABEL: @p2_vec_undef0( -; CHECK-NEXT: [[T0:%.*]] = shl <3 x i8> , [[BITS:%.*]] -; CHECK-NEXT: [[T1:%.*]] = xor <3 x i8> [[T0]], -; CHECK-NEXT: [[R:%.*]] = icmp ult <3 x i8> [[T1]], [[VAL:%.*]] +; CHECK-NEXT: [[VAL_HIGHBITS:%.*]] = lshr <3 x i8> [[VAL:%.*]], [[BITS:%.*]] +; CHECK-NEXT: [[R:%.*]] = icmp ne <3 x i8> [[VAL_HIGHBITS]], zeroinitializer ; CHECK-NEXT: ret <3 x i1> [[R]] ; %t0 = shl <3 x i8> , %bits @@ -57,9 +54,8 @@ define <3 x i1> @p2_vec_undef0(<3 x i8> %val, <3 x i8> %bits) { define <3 x i1> @p2_vec_undef1(<3 x i8> %val, <3 x i8> %bits) { ; CHECK-LABEL: @p2_vec_undef1( -; CHECK-NEXT: [[T0:%.*]] = shl <3 x i8> , [[BITS:%.*]] -; CHECK-NEXT: [[T1:%.*]] = xor <3 x i8> [[T0]], -; CHECK-NEXT: [[R:%.*]] = icmp ult <3 x i8> [[T1]], [[VAL:%.*]] +; CHECK-NEXT: [[VAL_HIGHBITS:%.*]] = lshr <3 x i8> [[VAL:%.*]], [[BITS:%.*]] +; CHECK-NEXT: [[R:%.*]] = icmp ne <3 x i8> [[VAL_HIGHBITS]], zeroinitializer ; CHECK-NEXT: ret <3 x i1> [[R]] ; %t0 = shl <3 x i8> , %bits @@ -70,9 +66,8 @@ define <3 x i1> @p2_vec_undef1(<3 x i8> %val, <3 x i8> %bits) { define <3 x i1> @p2_vec_undef2(<3 x i8> %val, <3 x i8> %bits) { ; CHECK-LABEL: @p2_vec_undef2( -; CHECK-NEXT: [[T0:%.*]] = shl <3 x i8> , [[BITS:%.*]] -; CHECK-NEXT: [[T1:%.*]] = xor <3 x i8> [[T0]], -; CHECK-NEXT: [[R:%.*]] = icmp ult <3 x i8> [[T1]], [[VAL:%.*]] +; CHECK-NEXT: [[VAL_HIGHBITS:%.*]] = lshr <3 x i8> [[VAL:%.*]], [[BITS:%.*]] +; CHECK-NEXT: [[R:%.*]] = icmp ne <3 x i8> [[VAL_HIGHBITS]], zeroinitializer ; CHECK-NEXT: ret <3 x i1> [[R]] ; %t0 = shl <3 x i8> , %bits @@ -89,10 +84,9 @@ declare i8 @gen8() define i1 @c0(i8 %bits) { ; CHECK-LABEL: @c0( -; CHECK-NEXT: [[T0:%.*]] = shl i8 -1, [[BITS:%.*]] -; CHECK-NEXT: [[T1:%.*]] = xor i8 [[T0]], -1 ; CHECK-NEXT: [[VAL:%.*]] = call i8 @gen8() -; CHECK-NEXT: [[R:%.*]] = icmp ugt i8 [[VAL]], [[T1]] +; CHECK-NEXT: [[VAL_HIGHBITS:%.*]] = lshr i8 [[VAL]], [[BITS:%.*]] +; CHECK-NEXT: [[R:%.*]] = icmp ne i8 [[VAL_HIGHBITS]], 0 ; CHECK-NEXT: ret i1 [[R]] ; %t0 = shl i8 -1, %bits @@ -128,8 +122,8 @@ define i1 @oneuse0(i8 %val, i8 %bits) { ; CHECK-LABEL: @oneuse0( ; CHECK-NEXT: [[T0:%.*]] = shl i8 -1, [[BITS:%.*]] ; CHECK-NEXT: call void @use8(i8 [[T0]]) -; CHECK-NEXT: [[T1:%.*]] = xor i8 [[T0]], -1 -; CHECK-NEXT: [[R:%.*]] = icmp ult i8 [[T1]], [[VAL:%.*]] +; CHECK-NEXT: [[VAL_HIGHBITS:%.*]] = lshr i8 [[VAL:%.*]], [[BITS]] +; CHECK-NEXT: [[R:%.*]] = icmp ne i8 [[VAL_HIGHBITS]], 0 ; CHECK-NEXT: ret i1 [[R]] ; %t0 = shl i8 -1, %bits -- 2.7.4