From b0761a0c1ba8ec77d3704d2450d481bc25e60a9d Mon Sep 17 00:00:00 2001 From: David Majnemer Date: Wed, 21 Dec 2016 19:21:59 +0000 Subject: [PATCH] Revert "[InstCombine] New opportunities for FoldAndOfICmp and FoldXorOfICmp" This reverts commit r289813, it caused PR31449. llvm-svn: 290266 --- .../Transforms/InstCombine/InstCombineAndOrXor.cpp | 99 +--------- .../Transforms/InstCombine/InstCombineInternal.h | 1 - llvm/test/Transforms/InstCombine/and-or-icmps.ll | 204 --------------------- 3 files changed, 2 insertions(+), 302 deletions(-) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp index 52b611f..a59b43d 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp @@ -733,44 +733,6 @@ static Value *foldLogOpOfMaskedICmps(ICmpInst *LHS, ICmpInst *RHS, bool IsAnd, return nullptr; } -namespace { - -struct BitGroupCheck { - // If the Cmp, checks the bits in the group are nonzero? - bool CheckIfSet {false}; - // The mask that identifies the bitgroup in question. - const APInt *Mask {nullptr}; -}; -} -/// For an ICMP where RHS is zero, we want to check if the ICMP is equivalent to -/// comparing a group of bits in an integer value against zero. -BitGroupCheck isAnyBitSet(Value *LHS, ICmpInst::Predicate CC) { - - BitGroupCheck BGC; - auto *Inst = dyn_cast(LHS); - - if (!Inst || Inst->getOpcode() != Instruction::And) - return BGC; - - // TODO Currently this does not work for vectors. - ConstantInt *Mask; - if (!match(LHS, m_And(m_Value(), m_ConstantInt(Mask)))) - return BGC; - // At this point we know that LHS of ICMP is "and" of a value with a constant. - // Also we know that the RHS is zero. That means we are checking if a certain - // group of bits in a given integer value are all zero or at least one of them - // is set to one. - if (CC == ICmpInst::ICMP_EQ) - BGC.CheckIfSet = false; - else if (CC == ICmpInst::ICMP_NE) - BGC.CheckIfSet = true; - else - return BGC; - - BGC.Mask = &Mask->getValue(); - return BGC; -} - /// Try to fold a signed range checked with lower bound 0 to an unsigned icmp. /// Example: (icmp sge x, 0) & (icmp slt x, n) --> icmp ult x, n /// If \p Inverted is true then the check is for the inverted range, e.g. @@ -827,32 +789,6 @@ Value *InstCombiner::simplifyRangeCheck(ICmpInst *Cmp0, ICmpInst *Cmp1, return Builder->CreateICmp(NewPred, Input, RangeEnd); } -Value *InstCombiner::FoldXorOfICmps(ICmpInst *LHS, ICmpInst *RHS) { - - Value *Val = LHS->getOperand(0), *Val2 = RHS->getOperand(0); - // TODO The lines below does not work for vectors. ConstantInt is scalar. - auto *LHSCst = dyn_cast(LHS->getOperand(1)); - auto *RHSCst = dyn_cast(RHS->getOperand(1)); - if (!LHSCst || !RHSCst) - return nullptr; - ICmpInst::Predicate LHSCC = LHS->getPredicate(), RHSCC = RHS->getPredicate(); - - // E.g. (icmp ne %x, 0) ^ (icmp ne %y, 0) => icmp ne %x, %y if the following - // conditions hold: - // 1- (%x = and %a, %mask) and (%y = and %b, %mask) - // 2- %mask is a power of 2. - if (RHSCst->isZero() && LHSCst == RHSCst) { - - BitGroupCheck BGC1 = isAnyBitSet(Val, LHSCC); - BitGroupCheck BGC2 = isAnyBitSet(Val2, RHSCC); - if (BGC1.Mask && BGC2.Mask && BGC1.CheckIfSet == BGC2.CheckIfSet && - *BGC1.Mask == *BGC2.Mask && BGC1.Mask->isPowerOf2()) { - return Builder->CreateICmp(ICmpInst::ICMP_NE, Val2, Val); - } - } - return nullptr; -} - /// Fold (icmp)&(icmp) if possible. Value *InstCombiner::FoldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS) { ICmpInst::Predicate LHSCC = LHS->getPredicate(), RHSCC = RHS->getPredicate(); @@ -935,29 +871,6 @@ Value *InstCombiner::FoldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS) { } } - // E.g. (icmp eq %x, 0) & (icmp ne %y, 0) => icmp ult %x, %y if the following - // conditions hold: - // 1- (%x = and %a, %mask1) and (%y = and %b, %mask2) - // 2- Let %t be the smallest power of 2 where %mask1 & %t != 0. Then for any - // %s that is a power of 2 and %s & %mask2 != 0, we must have %s <= %t. - // For example if %mask1 = 24 and %mask2 = 16, setting %s = 16 and %t = 8 - // violates condition (2) above. So this optimization cannot be applied. - if (RHSCst->isZero() && LHSCst == RHSCst) { - BitGroupCheck BGC1 = isAnyBitSet(Val, LHSCC); - BitGroupCheck BGC2 = isAnyBitSet(Val2, RHSCC); - - if (BGC1.Mask && BGC2.Mask && (BGC1.CheckIfSet != BGC2.CheckIfSet)) { - if (!BGC1.CheckIfSet && - BGC1.Mask->countTrailingZeros() >= - BGC2.Mask->getBitWidth() - BGC2.Mask->countLeadingZeros() - 1) - return Builder->CreateICmp(ICmpInst::ICMP_ULT, Val, Val2); - else if (!BGC2.CheckIfSet && - BGC2.Mask->countTrailingZeros() >= - BGC1.Mask->getBitWidth() - BGC1.Mask->countLeadingZeros() - 1) - return Builder->CreateICmp(ICmpInst::ICMP_ULT, Val2, Val); - } - } - // From here on, we only handle: // (icmp1 A, C1) & (icmp2 A, C2) --> something simpler. if (Val != Val2) return nullptr; @@ -2808,16 +2721,9 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) { match(Op1, m_Not(m_Specific(A)))) return BinaryOperator::CreateNot(Builder->CreateAnd(A, B)); + // (icmp1 A, B) ^ (icmp2 A, B) --> (icmp3 A, B) if (ICmpInst *RHS = dyn_cast(I.getOperand(1))) - if (ICmpInst *LHS = dyn_cast(I.getOperand(0))) { - - // E.g. if we have xor (icmp eq %A, 0), (icmp eq %B, 0) - // and we know both A and B are either 8 (power of 2) or 0 - // we can simplify to (icmp ne A, B). - if (Value *Res = FoldXorOfICmps(LHS, RHS)) - return replaceInstUsesWith(I, Res); - - // (icmp1 A, B) ^ (icmp2 A, B) --> (icmp3 A, B) + if (ICmpInst *LHS = dyn_cast(I.getOperand(0))) if (PredicatesFoldable(LHS->getPredicate(), RHS->getPredicate())) { if (LHS->getOperand(0) == RHS->getOperand(1) && LHS->getOperand(1) == RHS->getOperand(0)) @@ -2832,7 +2738,6 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) { Builder)); } } - } if (Instruction *CastedXor = foldCastedBitwiseLogic(I)) return CastedXor; diff --git a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h index c30598a..d13b94c 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h +++ b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h @@ -243,7 +243,6 @@ public: Instruction *visitFDiv(BinaryOperator &I); Value *simplifyRangeCheck(ICmpInst *Cmp0, ICmpInst *Cmp1, bool Inverted); Value *FoldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS); - Value *FoldXorOfICmps(ICmpInst *LHS, ICmpInst *RHS); Value *FoldAndOfFCmps(FCmpInst *LHS, FCmpInst *RHS); Instruction *visitAnd(BinaryOperator &I); Value *FoldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS, Instruction *CxtI); diff --git a/llvm/test/Transforms/InstCombine/and-or-icmps.ll b/llvm/test/Transforms/InstCombine/and-or-icmps.ll index daa8129..3903472 100644 --- a/llvm/test/Transforms/InstCombine/and-or-icmps.ll +++ b/llvm/test/Transforms/InstCombine/and-or-icmps.ll @@ -51,207 +51,3 @@ define i1 @test(i32 %tmp1030) { ret i1 %tmp1042 } -; Last three instructions (ignoring ret) are equivalent of %val2 < %val1. -define i1 @test2(i32 %a, i32 %b) { -; CHECK-LABEL: @test2( -; CHECK-NEXT: [[VAL1:%.*]] = and i32 %a, 8 -; CHECK-NEXT: [[VAL2:%.*]] = and i32 %b, 8 -; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i32 [[VAL2]], [[VAL1]] -; CHECK-NEXT: ret i1 [[TMP1]] -; - %val1 = and i32 %a, 8 - %val2 = and i32 %b, 8 - %cmp.a = icmp ne i32 %val1, 0 - %cmp.b = icmp eq i32 %val2, 0 - %and = and i1 %cmp.b, %cmp.a - ret i1 %and -} - -; Last three instructions (ignoring ret) are equivalent of %val2 < %val1. -define i1 @test3(i32 %a, i32 %b) { -; CHECK-LABEL: @test3( -; CHECK-NEXT: [[VAL1:%.*]] = and i32 %a, 8 -; CHECK-NEXT: [[VAL2:%.*]] = and i32 %b, 8 -; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i32 [[VAL2]], [[VAL1]] -; CHECK-NEXT: ret i1 [[TMP1]] -; - %val1 = and i32 %a, 8 - %val2 = and i32 %b, 8 - %cmp.a = icmp ne i32 %val1, 0 - %cmp.b = icmp eq i32 %val2, 0 - %and = and i1 %cmp.a, %cmp.b - ret i1 %and -} - -; Last three instructions (ignoring ret) are equivalent of %val2 < %val1. -define i1 @test4(i32 %a, i32 %b) { -; CHECK-LABEL: @test4( -; CHECK-NEXT: [[VAL1:%.*]] = and i32 %a, 15 -; CHECK-NEXT: [[VAL2:%.*]] = and i32 %b, 24 -; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i32 [[VAL2]], [[VAL1]] -; CHECK-NEXT: ret i1 [[TMP1]] -; - %val1 = and i32 %a, 15 - %val2 = and i32 %b, 24 - %cmp.a = icmp ne i32 %val1, 0 - %cmp.b = icmp eq i32 %val2, 0 - %and = and i1 %cmp.a, %cmp.b - ret i1 %and -} - -; Last three instructions (ignoring ret) are equivalent of %val2 < %val1. -define i1 @test5(i32 %a, i32 %b) { -; CHECK-LABEL: @test5( -; CHECK-NEXT: [[VAL1:%.*]] = and i32 %a, 15 -; CHECK-NEXT: [[VAL2:%.*]] = and i32 %b, 24 -; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i32 [[VAL2]], [[VAL1]] -; CHECK-NEXT: ret i1 [[TMP1]] -; - %val1 = and i32 %a, 15 - %val2 = and i32 %b, 24 - %cmp.a = icmp ne i32 %val1, 0 - %cmp.b = icmp eq i32 %val2, 0 - %and = and i1 %cmp.b, %cmp.a - ret i1 %and -} - -; An optimization like those of previous tests is not possible -; for example if %b = 8 and %a = 16, we have %val2 = 8 and -; % %val1 = 16 so %val2 < %val1 but %and == 0. -define i1 @test6(i32 %a, i32 %b) { -; CHECK-LABEL: @test6( -; CHECK-NEXT: [[VAL1:%.*]] = and i32 %a, 16 -; CHECK-NEXT: [[VAL2:%.*]] = and i32 %b, 24 -; CHECK-NEXT: [[CMP_A:%.*]] = icmp ne i32 [[VAL1]], 0 -; CHECK-NEXT: [[CMP_B:%.*]] = icmp eq i32 [[VAL2]], 0 -; CHECK-NEXT: [[AND:%.*]] = and i1 [[CMP_B]], [[CMP_A]] -; CHECK-NEXT: ret i1 [[AND]] -; - %val1 = and i32 %a, 16 - %val2 = and i32 %b, 24 - %cmp.a = icmp ne i32 %val1, 0 - %cmp.b = icmp eq i32 %val2, 0 - %and = and i1 %cmp.b, %cmp.a - ret i1 %and -} - -; %a and %b have different widths. So optimization is not possible. -define i1 @test7(i16 %a, i32 %b) { -; CHECK-LABEL: @test7( -; CHECK-NEXT: [[VAL1:%.*]] = and i16 %a, 15 -; CHECK-NEXT: [[VAL2:%.*]] = and i32 %b, 24 -; CHECK-NEXT: [[CMP_A:%.*]] = icmp ne i16 [[VAL1]], 0 -; CHECK-NEXT: [[CMP_B:%.*]] = icmp eq i32 [[VAL2]], 0 -; CHECK-NEXT: [[AND:%.*]] = and i1 [[CMP_B]], [[CMP_A]] -; CHECK-NEXT: ret i1 [[AND]] -; - %val1 = and i16 %a, 15 - %val2 = and i32 %b, 24 - %cmp.a = icmp ne i16 %val1, 0 - %cmp.b = icmp eq i32 %val2, 0 - %and = and i1 %cmp.b, %cmp.a - ret i1 %and -} - -; The last three instructions can be simplified to checking %val1 != %val2. -; After that other transformations change the code further. -define i1 @test8(i32 %a, i32 %b) { -; CHECK-LABEL: @test8( -; CHECK-NEXT: [[TMP1:%.*]] = xor i32 %a, %b -; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[TMP1]], 8 -; CHECK-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0 -; CHECK-NEXT: ret i1 [[TMP3]] -; - %val1 = and i32 %a, 8 - %val2 = and i32 %b, 8 - %cmp.a = icmp ne i32 %val1, 0 - %cmp.b = icmp ne i32 %val2, 0 - %and = xor i1 %cmp.b, %cmp.a - ret i1 %and -} - -; Operands of and instructions, must be identical powers of 2 otherwise -; a simplification, like that of previous testcase is not possible. -define i1 @test9(i32 %a, i32 %b) { -; CHECK-LABEL: @test9( -; CHECK-NEXT: [[VAL1:%.*]] = and i32 %a, 24 -; CHECK-NEXT: [[VAL2:%.*]] = and i32 %b, 24 -; CHECK-NEXT: [[CMP_A:%.*]] = icmp ne i32 [[VAL1]], 0 -; CHECK-NEXT: [[CMP_B:%.*]] = icmp ne i32 [[VAL2]], 0 -; CHECK-NEXT: [[AND:%.*]] = xor i1 [[CMP_B]], [[CMP_A]] -; CHECK-NEXT: ret i1 [[AND]] -; - %val1 = and i32 %a, 24 - %val2 = and i32 %b, 24 - %cmp.a = icmp ne i32 %val1, 0 - %cmp.b = icmp ne i32 %val2, 0 - %and = xor i1 %cmp.b, %cmp.a - ret i1 %and -} - -; The last three instructions are equivalent of checking %val1 != %val2. -; After making this change, other transformation further change the code. -define i1 @test10(i32 %a, i32 %b) { -; CHECK-LABEL: @test10( -; CHECK-NEXT: [[TMP1:%.*]] = xor i32 %a, %b -; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[TMP1]], 8 -; CHECK-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0 -; CHECK-NEXT: ret i1 [[TMP3]] -; - %val1 = and i32 %a, 8 - %val2 = and i32 %b, 8 - %cmp.a = icmp eq i32 %val1, 0 - %cmp.b = icmp eq i32 %val2, 0 - %and = xor i1 %cmp.b, %cmp.a - ret i1 %and -} - -; Cannot be simplified because of different width of %a and %b -define i1 @test11(i16 %a, i32 %b) { -; CHECK-LABEL: @test11( -; CHECK-NEXT: [[VAL1:%.*]] = and i16 %a, 8 -; CHECK-NEXT: [[VAL2:%.*]] = and i32 %b, 8 -; CHECK-NEXT: [[CMP_A:%.*]] = icmp ne i16 [[VAL1]], 0 -; CHECK-NEXT: [[CMP_B:%.*]] = icmp ne i32 [[VAL2]], 0 -; CHECK-NEXT: [[AND:%.*]] = xor i1 [[CMP_B]], [[CMP_A]] -; CHECK-NEXT: ret i1 [[AND]] -; - %val1 = and i16 %a, 8 - %val2 = and i32 %b, 8 - %cmp.a = icmp ne i16 %val1, 0 - %cmp.b = icmp ne i32 %val2, 0 - %and = xor i1 %cmp.b, %cmp.a - ret i1 %and -} - -; Similar to @test8 except that icmp instns use ugt here instead of ne. -define i1 @test12(i32 %a, i32 %b) { -; CHECK-LABEL: @test12( -; CHECK-NEXT: [[TMP1:%.*]] = xor i32 %a, %b -; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[TMP1]], 8 -; CHECK-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0 -; CHECK-NEXT: ret i1 [[TMP3]] -; - %val1 = and i32 %a, 8 - %val2 = and i32 %b, 8 - %cmp.a = icmp ugt i32 %val1, 0 - %cmp.b = icmp ugt i32 %val2, 0 - %and = xor i1 %cmp.b, %cmp.a - ret i1 %and -} - -; Similar to @test3 except that the first icmp uses ugt instead of ne. -define i1 @test13(i32 %a, i32 %b) { -; CHECK-LABEL: @test13( -; CHECK-NEXT: [[VAL1:%.*]] = and i32 %a, 8 -; CHECK-NEXT: [[VAL2:%.*]] = and i32 %b, 8 -; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i32 [[VAL2]], [[VAL1]] -; CHECK-NEXT: ret i1 [[TMP1]] -; - %val1 = and i32 %a, 8 - %val2 = and i32 %b, 8 - %cmp.a = icmp ugt i32 %val1, 0 - %cmp.b = icmp eq i32 %val2, 0 - %and = and i1 %cmp.a, %cmp.b - ret i1 %and -} -- 2.7.4