From 7091a743b41d531ffcce207cca2439b3567271a2 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Mon, 7 Aug 2017 18:10:39 +0000 Subject: [PATCH] [InstCombine] Support (X | C1) & C2 --> (X & C2^(C1&C2)) | (C1&C2) for vector splats Note the original code I deleted incorrectly listed this as (X | C1) & C2 --> (X & C2^(C1&C2)) | C1 Which is only valid if C1 is a subset of C2. This relied on SimplifyDemandedBits to remove any extra bits from C1 before we got to that code. My new implementation avoids relying on that behavior so that it can be naively verified with alive. Differential Revision: https://reviews.llvm.org/D36384 llvm-svn: 310272 --- .../Transforms/InstCombine/InstCombineAndOrXor.cpp | 31 +++++++++++----------- llvm/test/Transforms/InstCombine/or.ll | 31 ++++++++++++++++++++++ 2 files changed, 47 insertions(+), 15 deletions(-) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp index 1699a0c..db8e6ec 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp @@ -126,21 +126,6 @@ Instruction *InstCombiner::OptAndOp(BinaryOperator *Op, switch (Op->getOpcode()) { default: break; - case Instruction::Or: - if (Op->hasOneUse()){ - ConstantInt *TogetherCI = dyn_cast(Together); - if (TogetherCI && !TogetherCI->isZero()){ - // (X | C1) & C2 --> (X & (C2^(C1&C2))) | C1 - // NOTE: This reduces the number of bits set in the & mask, which - // can expose opportunities for store narrowing. - Together = ConstantExpr::getXor(AndRHS, Together); - Value *And = Builder.CreateAnd(X, Together); - And->takeName(Op); - return BinaryOperator::CreateOr(And, OpRHS); - } - } - - break; case Instruction::Add: if (Op->hasOneUse()) { // Adding a one to a single bit bit-field should be turned into an XOR @@ -1223,6 +1208,22 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) { return BinaryOperator::CreateXor(And, NewC); } + const APInt *OrC; + if (match(Op0, m_OneUse(m_Or(m_Value(X), m_APInt(OrC))))) { + // (X | C1) & C2 --> (X & C2^(C1&C2)) | (C1&C2) + // NOTE: This reduces the number of bits set in the & mask, which + // can expose opportunities for store narrowing for scalars. + // NOTE: SimplifyDemandedBits should have already removed bits from C1 + // that aren't set in C2. Meaning we can replace (C1&C2) with C1 in + // above, but this feels safer. + APInt Together = *C & *OrC; + Value *And = Builder.CreateAnd(X, ConstantInt::get(I.getType(), + Together ^ *C)); + And->takeName(Op0); + return BinaryOperator::CreateOr(And, ConstantInt::get(I.getType(), + Together)); + } + // If the mask is only needed on one incoming arm, push the 'and' op up. if (match(Op0, m_OneUse(m_Xor(m_Value(X), m_Value(Y)))) || match(Op0, m_OneUse(m_Or(m_Value(X), m_Value(Y))))) { diff --git a/llvm/test/Transforms/InstCombine/or.ll b/llvm/test/Transforms/InstCombine/or.ll index fb56449..6ae52d4 100644 --- a/llvm/test/Transforms/InstCombine/or.ll +++ b/llvm/test/Transforms/InstCombine/or.ll @@ -268,6 +268,21 @@ define i32 @test30(i32 %A) { ret i32 %E } +define <2 x i32> @test30vec(<2 x i32> %A) { +; CHECK-LABEL: @test30vec( +; CHECK-NEXT: [[C:%.*]] = and <2 x i32> [[A:%.*]], +; CHECK-NEXT: [[B:%.*]] = and <2 x i32> [[A]], +; CHECK-NEXT: [[D:%.*]] = or <2 x i32> [[B]], +; CHECK-NEXT: [[E:%.*]] = or <2 x i32> [[D]], [[C]] +; CHECK-NEXT: ret <2 x i32> [[E]] +; + %B = or <2 x i32> %A, + %C = and <2 x i32> %A, + %D = and <2 x i32> %B, + %E = or <2 x i32> %D, %C + ret <2 x i32> %E +} + ; PR4216 define i64 @test31(i64 %A) { ; CHECK-LABEL: @test31( @@ -285,6 +300,22 @@ define i64 @test31(i64 %A) { ret i64 %F } +define <2 x i64> @test31vec(<2 x i64> %A) { +; CHECK-LABEL: @test31vec( +; CHECK-NEXT: [[E:%.*]] = and <2 x i64> [[A:%.*]], +; CHECK-NEXT: [[F:%.*]] = or <2 x i64> [[E]], +; CHECK-NEXT: ret <2 x i64> [[F]] +; + %B = or <2 x i64> %A, + %D = and <2 x i64> %B, + + %C = or <2 x i64> %A, + %E = and <2 x i64> %C, + + %F = or <2 x i64> %D, %E + ret <2 x i64> %F +} + ; codegen is mature enough to handle vector selects. define <4 x i32> @test32(<4 x i1> %and.i1352, <4 x i32> %vecinit6.i176, <4 x i32> %vecinit6.i191) { ; CHECK-LABEL: @test32( -- 2.7.4