From d398d4a39ecef00c718e3a2cada0582fc35f46db Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Wed, 24 Aug 2016 22:22:06 +0000 Subject: [PATCH] [InstCombine] use m_APInt to allow icmp eq/ne (shr X, C2), C folds for splat constant vectors llvm-svn: 279677 --- .../Transforms/InstCombine/InstCombineCompares.cpp | 35 ++++++++++++---------- llvm/test/Transforms/InstCombine/apint-shift.ll | 13 ++++---- llvm/test/Transforms/InstCombine/cast.ll | 4 +-- llvm/test/Transforms/InstCombine/icmp.ll | 20 +++++++++++++ llvm/test/Transforms/InstCombine/shift.ll | 17 ++++------- 5 files changed, 51 insertions(+), 38 deletions(-) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp index 0d23ea0..e1b2350 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp @@ -1988,17 +1988,14 @@ Instruction *InstCombiner::foldICmpShrConstant(ICmpInst &Cmp, if (Cmp.isEquality() && Shr->isExact() && Shr->hasOneUse() && *C == 0) return new ICmpInst(Pred, X, Cmp.getOperand(1)); - // FIXME: This check restricts all folds under here to scalar types. - // Handle equality comparisons of shift-by-constant. - ConstantInt *ShAmt = dyn_cast(Shr->getOperand(1)); - if (!ShAmt) + const APInt *ShiftAmt; + if (!match(Shr->getOperand(1), m_APInt(ShiftAmt))) return nullptr; - // Check that the shift amount is in range. If not, don't perform - // undefined shifts. When the shift is visited it will be - // simplified. - uint32_t TypeBits = C->getBitWidth(); - uint32_t ShAmtVal = (uint32_t)ShAmt->getLimitedValue(TypeBits); + // Check that the shift amount is in range. If not, don't perform undefined + // shifts. When the shift is visited it will be simplified. + unsigned TypeBits = C->getBitWidth(); + unsigned ShAmtVal = ShiftAmt->getLimitedValue(TypeBits); if (ShAmtVal >= TypeBits || ShAmtVal == 0) return nullptr; @@ -2015,6 +2012,11 @@ Instruction *InstCombiner::foldICmpShrConstant(ICmpInst &Cmp, if (IsAShr && (!Shr->isExact() || ShAmtVal == TypeBits - 1)) return nullptr; + // FIXME: This check restricts this fold to scalar types. + ConstantInt *ShAmt = dyn_cast(Shr->getOperand(1)); + if (!ShAmt) + return nullptr; + // Revisit the shift (to delete it). Worklist.Add(Shr); @@ -2041,6 +2043,8 @@ Instruction *InstCombiner::foldICmpShrConstant(ICmpInst &Cmp, return Res; } + // Handle equality comparisons of shift-by-constant. + // If the comparison constant changes with the shift, the comparison cannot // succeed (bits of the comparison constant cannot match the shifted value). // This should be known by InstSimplify and already be folded to true/false. @@ -2051,15 +2055,14 @@ Instruction *InstCombiner::foldICmpShrConstant(ICmpInst &Cmp, // Check if the bits shifted out are known to be zero. If so, we can compare // against the unshifted value: // (X & 4) >> 1 == 2 --> (X & 4) == 4. - ConstantInt *ShiftedCmpRHS = Builder->getInt(*C << ShAmtVal); - if (Shr->hasOneUse() && Shr->isExact()) - return new ICmpInst(Pred, X, ShiftedCmpRHS); - + Constant *ShiftedCmpRHS = ConstantInt::get(Shr->getType(), *C << ShAmtVal); if (Shr->hasOneUse()) { - // Otherwise strength reduce the shift into an and. - APInt Val(APInt::getHighBitsSet(TypeBits, TypeBits - ShAmtVal)); - Constant *Mask = Builder->getInt(Val); + if (Shr->isExact()) + return new ICmpInst(Pred, X, ShiftedCmpRHS); + // Otherwise strength reduce the shift into an 'and'. + APInt Val(APInt::getHighBitsSet(TypeBits, TypeBits - ShAmtVal)); + Constant *Mask = ConstantInt::get(Shr->getType(), Val); Value *And = Builder->CreateAnd(X, Mask, Shr->getName() + ".mask"); return new ICmpInst(Pred, And, ShiftedCmpRHS); } diff --git a/llvm/test/Transforms/InstCombine/apint-shift.ll b/llvm/test/Transforms/InstCombine/apint-shift.ll index 01d83f3..3e1699a 100644 --- a/llvm/test/Transforms/InstCombine/apint-shift.ll +++ b/llvm/test/Transforms/InstCombine/apint-shift.ll @@ -230,11 +230,10 @@ define i1 @test17(i106 %A) { ret i1 %C } -; FIXME: Vectors should fold too. define <2 x i1> @test17vec(<2 x i106> %A) { ; CHECK-LABEL: @test17vec( -; CHECK-NEXT: [[B:%.*]] = lshr <2 x i106> %A, -; CHECK-NEXT: [[C:%.*]] = icmp eq <2 x i106> [[B]], +; CHECK-NEXT: [[B_MASK:%.*]] = and <2 x i106> %A, +; CHECK-NEXT: [[C:%.*]] = icmp eq <2 x i106> [[B_MASK]], ; CHECK-NEXT: ret <2 x i1> [[C]] ; %B = lshr <2 x i106> %A, @@ -261,11 +260,9 @@ define i1 @test19(i37 %A) { ret i1 %C } -; FIXME: Vectors should fold too. define <2 x i1> @test19vec(<2 x i37> %A) { ; CHECK-LABEL: @test19vec( -; CHECK-NEXT: [[B:%.*]] = ashr <2 x i37> %A, -; CHECK-NEXT: [[C:%.*]] = icmp eq <2 x i37> [[B]], zeroinitializer +; CHECK-NEXT: [[C:%.*]] = icmp ult <2 x i37> %A, ; CHECK-NEXT: ret <2 x i1> [[C]] ; %B = ashr <2 x i37> %A, @@ -286,8 +283,8 @@ define i1 @test19a(i39 %A) { ; FIXME: Vectors should fold too. define <2 x i1> @test19a_vec(<2 x i39> %A) { ; CHECK-LABEL: @test19a_vec( -; CHECK-NEXT: [[B:%.*]] = ashr <2 x i39> %A, -; CHECK-NEXT: [[C:%.*]] = icmp eq <2 x i39> [[B]], +; CHECK-NEXT: [[B_MASK:%.*]] = and <2 x i39> %A, +; CHECK-NEXT: [[C:%.*]] = icmp eq <2 x i39> [[B_MASK]], ; CHECK-NEXT: ret <2 x i1> [[C]] ; %B = ashr <2 x i39> %A, diff --git a/llvm/test/Transforms/InstCombine/cast.ll b/llvm/test/Transforms/InstCombine/cast.ll index 9837ca9e..b4f14e1 100644 --- a/llvm/test/Transforms/InstCombine/cast.ll +++ b/llvm/test/Transforms/InstCombine/cast.ll @@ -414,11 +414,9 @@ define i1 @test36(i32 %a) { ret i1 %d } -; FIXME: The trunc is removed, but the icmp+lshr fold is missing. define <2 x i1> @test36vec(<2 x i32> %a) { ; CHECK-LABEL: @test36vec( -; CHECK-NEXT: [[B:%.*]] = lshr <2 x i32> %a, -; CHECK-NEXT: [[D:%.*]] = icmp eq <2 x i32> [[B]], zeroinitializer +; CHECK-NEXT: [[D:%.*]] = icmp sgt <2 x i32> %a, ; CHECK-NEXT: ret <2 x i1> [[D]] ; %b = lshr <2 x i32> %a, diff --git a/llvm/test/Transforms/InstCombine/icmp.ll b/llvm/test/Transforms/InstCombine/icmp.ll index 780529c..a8d55b1 100644 --- a/llvm/test/Transforms/InstCombine/icmp.ll +++ b/llvm/test/Transforms/InstCombine/icmp.ll @@ -518,6 +518,26 @@ define <2 x i1> @test40vec(<2 x i32> %X, <2 x i32> %Y) { ret <2 x i1> %B } +define i1 @shr_exact(i132 %x) { +; CHECK-LABEL: @shr_exact( +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i132 %x, 32 +; CHECK-NEXT: ret i1 [[CMP]] +; + %sh = ashr exact i132 %x, 4 + %cmp = icmp eq i132 %sh, 2 + ret i1 %cmp +} + +define <2 x i1> @shr_exact_vec(<2 x i132> %x) { +; CHECK-LABEL: @shr_exact_vec( +; CHECK-NEXT: [[CMP:%.*]] = icmp ne <2 x i132> %x, +; CHECK-NEXT: ret <2 x i1> [[CMP]] +; + %sh = lshr exact <2 x i132> %x, + %cmp = icmp ne <2 x i132> %sh, + ret <2 x i1> %cmp +} + ; PR9343 #3 define i1 @test41(i32 %X, i32 %Y) { ; CHECK-LABEL: @test41( diff --git a/llvm/test/Transforms/InstCombine/shift.ll b/llvm/test/Transforms/InstCombine/shift.ll index 537e5cd..a91038f 100644 --- a/llvm/test/Transforms/InstCombine/shift.ll +++ b/llvm/test/Transforms/InstCombine/shift.ll @@ -320,11 +320,10 @@ define i1 @test17(i32 %A) { ret i1 %C } -; FIXME: Vectors should fold the same way. define <2 x i1> @test17vec(<2 x i32> %A) { ; CHECK-LABEL: @test17vec( -; CHECK-NEXT: [[B:%.*]] = lshr <2 x i32> %A, -; CHECK-NEXT: [[C:%.*]] = icmp eq <2 x i32> [[B]], +; CHECK-NEXT: [[B_MASK:%.*]] = and <2 x i32> %A, +; CHECK-NEXT: [[C:%.*]] = icmp eq <2 x i32> [[B_MASK]], ; CHECK-NEXT: ret <2 x i1> [[C]] ; %B = lshr <2 x i32> %A, @@ -353,11 +352,9 @@ define i1 @test19(i32 %A) { ret i1 %C } -; FIXME: Vectors should fold the same way. define <2 x i1> @test19vec(<2 x i32> %A) { ; CHECK-LABEL: @test19vec( -; CHECK-NEXT: [[B:%.*]] = ashr <2 x i32> %A, -; CHECK-NEXT: [[C:%.*]] = icmp eq <2 x i32> [[B]], zeroinitializer +; CHECK-NEXT: [[C:%.*]] = icmp ult <2 x i32> %A, ; CHECK-NEXT: ret <2 x i1> [[C]] ; %B = ashr <2 x i32> %A, @@ -379,8 +376,8 @@ define i1 @test19a(i32 %A) { ; FIXME: Vectors should fold the same way. define <2 x i1> @test19a_vec(<2 x i32> %A) { ; CHECK-LABEL: @test19a_vec( -; CHECK-NEXT: [[B:%.*]] = ashr <2 x i32> %A, -; CHECK-NEXT: [[C:%.*]] = icmp eq <2 x i32> [[B]], +; CHECK-NEXT: [[B_MASK:%.*]] = and <2 x i32> %A, +; CHECK-NEXT: [[C:%.*]] = icmp eq <2 x i32> [[B_MASK]], ; CHECK-NEXT: ret <2 x i1> [[C]] ; %B = ashr <2 x i32> %A, @@ -506,11 +503,9 @@ define i1 @test28(i8 %x) { ret i1 %cmp } -; FIXME: Vectors should fold the same way. define <2 x i1> @test28vec(<2 x i8> %x) { ; CHECK-LABEL: @test28vec( -; CHECK-NEXT: [[SHR:%.*]] = lshr <2 x i8> %x, -; CHECK-NEXT: [[CMP:%.*]] = icmp ne <2 x i8> [[SHR]], zeroinitializer +; CHECK-NEXT: [[CMP:%.*]] = icmp slt <2 x i8> %x, zeroinitializer ; CHECK-NEXT: ret <2 x i1> [[CMP]] ; %shr = lshr <2 x i8> %x, -- 2.7.4