From 90a36346bc9a42d7f98851501708fee1c630a47a Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Fri, 14 Sep 2018 22:23:35 +0000 Subject: [PATCH] [InstCombine] refactor mul narrowing folds; NFCI Similar to rL342278: The test diffs are all cosmetic due to the change in value naming, but I'm including that to show that the new code does perform these folds rather than something else in instcombine. D52075 should be able to use this code too rather than duplicating all of the logic. llvm-svn: 342292 --- .../Transforms/InstCombine/InstCombineAddSub.cpp | 41 +----------- .../Transforms/InstCombine/InstCombineInternal.h | 13 +++- .../InstCombine/InstCombineMulDivRem.cpp | 73 +--------------------- .../InstCombine/InstructionCombining.cpp | 45 +++++++++++++ llvm/test/Transforms/InstCombine/narrow-math.ll | 48 +++++++------- 5 files changed, 84 insertions(+), 136 deletions(-) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp index 838ef3a..acb62b6 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp @@ -1032,45 +1032,6 @@ static Instruction *canonicalizeLowbitMask(BinaryOperator &I, return BinaryOperator::CreateNot(NotMask, I.getName()); } -/// Try to narrow the width of an 'add' if at least 1 operand is an extend of -/// of a value. This requires a potentially expensive known bits check to make -/// sure the narrow op does not overflow. -Instruction *InstCombiner::narrowAddIfNoOverflow(BinaryOperator &I) { - // We need at least one extended operand. - Value *LHS = I.getOperand(0), *RHS = I.getOperand(1); - Value *X; - bool IsSext = match(LHS, m_SExt(m_Value(X))); - if (!IsSext && !match(LHS, m_ZExt(m_Value(X)))) - return nullptr; - - // If both operands are the same extension from the same source type and we - // can eliminate at least one (hasOneUse), this might work. - CastInst::CastOps CastOpc = IsSext ? Instruction::SExt : Instruction::ZExt; - Value *Y; - if (!(match(RHS, m_ZExtOrSExt(m_Value(Y))) && X->getType() == Y->getType() && - cast(RHS)->getOpcode() == CastOpc && - (LHS->hasOneUse() || RHS->hasOneUse()))) { - // If that did not match, see if the RHS is a constant. Truncating and - // extending must produce the same constant. - Constant *WideC; - if (!LHS->hasOneUse() || !match(RHS, m_Constant(WideC))) - return nullptr; - Constant *NarrowC = ConstantExpr::getTrunc(WideC, X->getType()); - if (ConstantExpr::getCast(CastOpc, NarrowC, I.getType()) != WideC) - return nullptr; - Y = NarrowC; - } - // Both operands have narrow versions. Last step: the math must not overflow - // in the narrow width. - if (!willNotOverflowAdd(X, Y, I, IsSext)) - return nullptr; - - // add (ext X), (ext Y) --> ext (add X, Y) - // add (ext X), C --> ext (add X, C') - Value *NarrowAdd = Builder.CreateAdd(X, Y, "narrow", !IsSext, IsSext); - return CastInst::Create(CastOpc, NarrowAdd, I.getType()); -} - Instruction *InstCombiner::visitAdd(BinaryOperator &I) { if (Value *V = SimplifyAddInst(I.getOperand(0), I.getOperand(1), I.hasNoSignedWrap(), I.hasNoUnsignedWrap(), @@ -1230,7 +1191,7 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) { } } - if (Instruction *Ext = narrowAddIfNoOverflow(I)) + if (Instruction *Ext = narrowMathIfNoOverflow(I)) return Ext; // (add (xor A, B) (and A, B)) --> (or A, B) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h index 1462660..e5d0ac9 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h +++ b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h @@ -538,13 +538,24 @@ private: : willNotOverflowUnsignedMul(LHS, RHS, CxtI); } + bool willNotOverflow(BinaryOperator::BinaryOps Opcode, const Value *LHS, + const Value *RHS, const Instruction &CxtI, + bool IsSigned) const { + switch (Opcode) { + case Instruction::Add: return willNotOverflowAdd(LHS, RHS, CxtI, IsSigned); + case Instruction::Sub: return willNotOverflowSub(LHS, RHS, CxtI, IsSigned); + case Instruction::Mul: return willNotOverflowMul(LHS, RHS, CxtI, IsSigned); + default: llvm_unreachable("Unexpected opcode for overflow query"); + } + } + Value *EmitGEPOffset(User *GEP); Instruction *scalarizePHI(ExtractElementInst &EI, PHINode *PN); Value *EvaluateInDifferentElementOrder(Value *V, ArrayRef Mask); Instruction *foldCastedBitwiseLogic(BinaryOperator &I); Instruction *narrowBinOp(TruncInst &Trunc); Instruction *narrowMaskedBinOp(BinaryOperator &And); - Instruction *narrowAddIfNoOverflow(BinaryOperator &I); + Instruction *narrowMathIfNoOverflow(BinaryOperator &I); Instruction *narrowRotate(TruncInst &Trunc); Instruction *optimizeBitCastFromPhi(CastInst &CI, PHINode *PN); diff --git a/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp b/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp index ee21363..d76351f 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp @@ -322,77 +322,8 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) { if (match(Op1, m_LShr(m_Value(X), m_APInt(C))) && *C == C->getBitWidth() - 1) return BinaryOperator::CreateAnd(Builder.CreateAShr(X, *C), Op0); - // Check for (mul (sext x), y), see if we can merge this into an - // integer mul followed by a sext. - if (SExtInst *Op0Conv = dyn_cast(Op0)) { - // (mul (sext x), cst) --> (sext (mul x, cst')) - if (auto *Op1C = dyn_cast(Op1)) { - if (Op0Conv->hasOneUse()) { - Constant *CI = - ConstantExpr::getTrunc(Op1C, Op0Conv->getOperand(0)->getType()); - if (ConstantExpr::getSExt(CI, I.getType()) == Op1C && - willNotOverflowSignedMul(Op0Conv->getOperand(0), CI, I)) { - // Insert the new, smaller mul. - Value *NewMul = - Builder.CreateNSWMul(Op0Conv->getOperand(0), CI, "mulconv"); - return new SExtInst(NewMul, I.getType()); - } - } - } - - // (mul (sext x), (sext y)) --> (sext (mul int x, y)) - if (SExtInst *Op1Conv = dyn_cast(Op1)) { - // Only do this if x/y have the same type, if at last one of them has a - // single use (so we don't increase the number of sexts), and if the - // integer mul will not overflow. - if (Op0Conv->getOperand(0)->getType() == - Op1Conv->getOperand(0)->getType() && - (Op0Conv->hasOneUse() || Op1Conv->hasOneUse()) && - willNotOverflowSignedMul(Op0Conv->getOperand(0), - Op1Conv->getOperand(0), I)) { - // Insert the new integer mul. - Value *NewMul = Builder.CreateNSWMul( - Op0Conv->getOperand(0), Op1Conv->getOperand(0), "mulconv"); - return new SExtInst(NewMul, I.getType()); - } - } - } - - // Check for (mul (zext x), y), see if we can merge this into an - // integer mul followed by a zext. - if (auto *Op0Conv = dyn_cast(Op0)) { - // (mul (zext x), cst) --> (zext (mul x, cst')) - if (auto *Op1C = dyn_cast(Op1)) { - if (Op0Conv->hasOneUse()) { - Constant *CI = - ConstantExpr::getTrunc(Op1C, Op0Conv->getOperand(0)->getType()); - if (ConstantExpr::getZExt(CI, I.getType()) == Op1C && - willNotOverflowUnsignedMul(Op0Conv->getOperand(0), CI, I)) { - // Insert the new, smaller mul. - Value *NewMul = - Builder.CreateNUWMul(Op0Conv->getOperand(0), CI, "mulconv"); - return new ZExtInst(NewMul, I.getType()); - } - } - } - - // (mul (zext x), (zext y)) --> (zext (mul int x, y)) - if (auto *Op1Conv = dyn_cast(Op1)) { - // Only do this if x/y have the same type, if at last one of them has a - // single use (so we don't increase the number of zexts), and if the - // integer mul will not overflow. - if (Op0Conv->getOperand(0)->getType() == - Op1Conv->getOperand(0)->getType() && - (Op0Conv->hasOneUse() || Op1Conv->hasOneUse()) && - willNotOverflowUnsignedMul(Op0Conv->getOperand(0), - Op1Conv->getOperand(0), I)) { - // Insert the new integer mul. - Value *NewMul = Builder.CreateNUWMul( - Op0Conv->getOperand(0), Op1Conv->getOperand(0), "mulconv"); - return new ZExtInst(NewMul, I.getType()); - } - } - } + if (Instruction *Ext = narrowMathIfNoOverflow(I)) + return Ext; bool Changed = false; if (!I.hasNoSignedWrap() && willNotOverflowSignedMul(Op0, Op1, I)) { diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp index 99874b3..d29cf93 100644 --- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp +++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -1446,6 +1446,51 @@ Instruction *InstCombiner::foldShuffledBinop(BinaryOperator &Inst) { return nullptr; } +/// Try to narrow the width of a binop if at least 1 operand is an extend of +/// of a value. This requires a potentially expensive known bits check to make +/// sure the narrow op does not overflow. +Instruction *InstCombiner::narrowMathIfNoOverflow(BinaryOperator &BO) { + // We need at least one extended operand. + Value *LHS = BO.getOperand(0), *RHS = BO.getOperand(1); + Value *X; + bool IsSext = match(LHS, m_SExt(m_Value(X))); + if (!IsSext && !match(LHS, m_ZExt(m_Value(X)))) + return nullptr; + + // If both operands are the same extension from the same source type and we + // can eliminate at least one (hasOneUse), this might work. + CastInst::CastOps CastOpc = IsSext ? Instruction::SExt : Instruction::ZExt; + Value *Y; + if (!(match(RHS, m_ZExtOrSExt(m_Value(Y))) && X->getType() == Y->getType() && + cast(RHS)->getOpcode() == CastOpc && + (LHS->hasOneUse() || RHS->hasOneUse()))) { + // If that did not match, see if we have a suitable constant operand. + // Truncating and extending must produce the same constant. + Constant *WideC; + if (!LHS->hasOneUse() || !match(RHS, m_Constant(WideC))) + return nullptr; + Constant *NarrowC = ConstantExpr::getTrunc(WideC, X->getType()); + if (ConstantExpr::getCast(CastOpc, NarrowC, BO.getType()) != WideC) + return nullptr; + Y = NarrowC; + } + // Both operands have narrow versions. Last step: the math must not overflow + // in the narrow width. + if (!willNotOverflow(BO.getOpcode(), X, Y, BO, IsSext)) + return nullptr; + + // bo (ext X), (ext Y) --> ext (bo X, Y) + // bo (ext X), C --> ext (bo X, C') + Value *NarrowBO = Builder.CreateBinOp(BO.getOpcode(), X, Y, "narrow"); + if (auto *NewBinOp = dyn_cast(NarrowBO)) { + if (IsSext) + NewBinOp->setHasNoSignedWrap(); + else + NewBinOp->setHasNoUnsignedWrap(); + } + return CastInst::Create(CastOpc, NarrowBO, BO.getType()); +} + Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { SmallVector Ops(GEP.op_begin(), GEP.op_end()); Type *GEPType = GEP.getType(); diff --git a/llvm/test/Transforms/InstCombine/narrow-math.ll b/llvm/test/Transforms/InstCombine/narrow-math.ll index 5badaa6..62ed5cb 100644 --- a/llvm/test/Transforms/InstCombine/narrow-math.ll +++ b/llvm/test/Transforms/InstCombine/narrow-math.ll @@ -155,8 +155,8 @@ define i64 @test3(i32 %V) { ; CHECK-LABEL: @test3( ; CHECK-NEXT: [[CALL1:%.*]] = call i32 @callee(), !range !0 ; CHECK-NEXT: [[CALL2:%.*]] = call i32 @callee(), !range !0 -; CHECK-NEXT: [[MULCONV:%.*]] = mul nuw nsw i32 [[CALL1]], [[CALL2]] -; CHECK-NEXT: [[ADD:%.*]] = zext i32 [[MULCONV]] to i64 +; CHECK-NEXT: [[NARROW:%.*]] = mul nuw nsw i32 [[CALL1]], [[CALL2]] +; CHECK-NEXT: [[ADD:%.*]] = zext i32 [[NARROW]] to i64 ; CHECK-NEXT: ret i64 [[ADD]] ; %call1 = call i32 @callee(), !range !0 @@ -332,8 +332,8 @@ define <2 x i64> @test7_vec(<2 x i32> %V) { define i64 @test8(i32 %V) { ; CHECK-LABEL: @test8( ; CHECK-NEXT: [[ASHR:%.*]] = ashr i32 [[V:%.*]], 16 -; CHECK-NEXT: [[MULCONV:%.*]] = mul nsw i32 [[ASHR]], 32767 -; CHECK-NEXT: [[MUL:%.*]] = sext i32 [[MULCONV]] to i64 +; CHECK-NEXT: [[NARROW:%.*]] = mul nsw i32 [[ASHR]], 32767 +; CHECK-NEXT: [[MUL:%.*]] = sext i32 [[NARROW]] to i64 ; CHECK-NEXT: ret i64 [[MUL]] ; %ashr = ashr i32 %V, 16 @@ -345,8 +345,8 @@ define i64 @test8(i32 %V) { define <2 x i64> @test8_splat(<2 x i32> %V) { ; CHECK-LABEL: @test8_splat( ; CHECK-NEXT: [[ASHR:%.*]] = ashr <2 x i32> [[V:%.*]], -; CHECK-NEXT: [[MULCONV:%.*]] = mul nsw <2 x i32> [[ASHR]], -; CHECK-NEXT: [[MUL:%.*]] = sext <2 x i32> [[MULCONV]] to <2 x i64> +; CHECK-NEXT: [[NARROW:%.*]] = mul nsw <2 x i32> [[ASHR]], +; CHECK-NEXT: [[MUL:%.*]] = sext <2 x i32> [[NARROW]] to <2 x i64> ; CHECK-NEXT: ret <2 x i64> [[MUL]] ; %ashr = ashr <2 x i32> %V, @@ -358,8 +358,8 @@ define <2 x i64> @test8_splat(<2 x i32> %V) { define <2 x i64> @test8_vec(<2 x i32> %V) { ; CHECK-LABEL: @test8_vec( ; CHECK-NEXT: [[ASHR:%.*]] = ashr <2 x i32> [[V:%.*]], -; CHECK-NEXT: [[MULCONV:%.*]] = mul nsw <2 x i32> [[ASHR]], -; CHECK-NEXT: [[MUL:%.*]] = sext <2 x i32> [[MULCONV]] to <2 x i64> +; CHECK-NEXT: [[NARROW:%.*]] = mul nsw <2 x i32> [[ASHR]], +; CHECK-NEXT: [[MUL:%.*]] = sext <2 x i32> [[NARROW]] to <2 x i64> ; CHECK-NEXT: ret <2 x i64> [[MUL]] ; %ashr = ashr <2 x i32> %V, @@ -371,8 +371,8 @@ define <2 x i64> @test8_vec(<2 x i32> %V) { define <2 x i64> @test8_vec2(<2 x i32> %V) { ; CHECK-LABEL: @test8_vec2( ; CHECK-NEXT: [[ASHR:%.*]] = ashr <2 x i32> [[V:%.*]], -; CHECK-NEXT: [[MULCONV:%.*]] = mul nsw <2 x i32> [[ASHR]], -; CHECK-NEXT: [[MUL:%.*]] = sext <2 x i32> [[MULCONV]] to <2 x i64> +; CHECK-NEXT: [[NARROW:%.*]] = mul nsw <2 x i32> [[ASHR]], +; CHECK-NEXT: [[MUL:%.*]] = sext <2 x i32> [[NARROW]] to <2 x i64> ; CHECK-NEXT: ret <2 x i64> [[MUL]] ; %ashr = ashr <2 x i32> %V, @@ -384,8 +384,8 @@ define <2 x i64> @test8_vec2(<2 x i32> %V) { define i64 @test9(i32 %V) { ; CHECK-LABEL: @test9( ; CHECK-NEXT: [[ASHR:%.*]] = ashr i32 [[V:%.*]], 16 -; CHECK-NEXT: [[MULCONV:%.*]] = mul nsw i32 [[ASHR]], -32767 -; CHECK-NEXT: [[MUL:%.*]] = sext i32 [[MULCONV]] to i64 +; CHECK-NEXT: [[NARROW:%.*]] = mul nsw i32 [[ASHR]], -32767 +; CHECK-NEXT: [[MUL:%.*]] = sext i32 [[NARROW]] to i64 ; CHECK-NEXT: ret i64 [[MUL]] ; %ashr = ashr i32 %V, 16 @@ -397,8 +397,8 @@ define i64 @test9(i32 %V) { define <2 x i64> @test9_splat(<2 x i32> %V) { ; CHECK-LABEL: @test9_splat( ; CHECK-NEXT: [[ASHR:%.*]] = ashr <2 x i32> [[V:%.*]], -; CHECK-NEXT: [[MULCONV:%.*]] = mul nsw <2 x i32> [[ASHR]], -; CHECK-NEXT: [[MUL:%.*]] = sext <2 x i32> [[MULCONV]] to <2 x i64> +; CHECK-NEXT: [[NARROW:%.*]] = mul nsw <2 x i32> [[ASHR]], +; CHECK-NEXT: [[MUL:%.*]] = sext <2 x i32> [[NARROW]] to <2 x i64> ; CHECK-NEXT: ret <2 x i64> [[MUL]] ; %ashr = ashr <2 x i32> %V, @@ -410,8 +410,8 @@ define <2 x i64> @test9_splat(<2 x i32> %V) { define <2 x i64> @test9_vec(<2 x i32> %V) { ; CHECK-LABEL: @test9_vec( ; CHECK-NEXT: [[ASHR:%.*]] = ashr <2 x i32> [[V:%.*]], -; CHECK-NEXT: [[MULCONV:%.*]] = mul nsw <2 x i32> [[ASHR]], -; CHECK-NEXT: [[MUL:%.*]] = sext <2 x i32> [[MULCONV]] to <2 x i64> +; CHECK-NEXT: [[NARROW:%.*]] = mul nsw <2 x i32> [[ASHR]], +; CHECK-NEXT: [[MUL:%.*]] = sext <2 x i32> [[NARROW]] to <2 x i64> ; CHECK-NEXT: ret <2 x i64> [[MUL]] ; %ashr = ashr <2 x i32> %V, @@ -423,8 +423,8 @@ define <2 x i64> @test9_vec(<2 x i32> %V) { define i64 @test10(i32 %V) { ; CHECK-LABEL: @test10( ; CHECK-NEXT: [[LSHR:%.*]] = lshr i32 [[V:%.*]], 16 -; CHECK-NEXT: [[MULCONV:%.*]] = mul nuw i32 [[LSHR]], 65535 -; CHECK-NEXT: [[MUL:%.*]] = zext i32 [[MULCONV]] to i64 +; CHECK-NEXT: [[NARROW:%.*]] = mul nuw i32 [[LSHR]], 65535 +; CHECK-NEXT: [[MUL:%.*]] = zext i32 [[NARROW]] to i64 ; CHECK-NEXT: ret i64 [[MUL]] ; %lshr = lshr i32 %V, 16 @@ -436,8 +436,8 @@ define i64 @test10(i32 %V) { define <2 x i64> @test10_splat(<2 x i32> %V) { ; CHECK-LABEL: @test10_splat( ; CHECK-NEXT: [[LSHR:%.*]] = lshr <2 x i32> [[V:%.*]], -; CHECK-NEXT: [[MULCONV:%.*]] = mul nuw <2 x i32> [[LSHR]], -; CHECK-NEXT: [[MUL:%.*]] = zext <2 x i32> [[MULCONV]] to <2 x i64> +; CHECK-NEXT: [[NARROW:%.*]] = mul nuw <2 x i32> [[LSHR]], +; CHECK-NEXT: [[MUL:%.*]] = zext <2 x i32> [[NARROW]] to <2 x i64> ; CHECK-NEXT: ret <2 x i64> [[MUL]] ; %lshr = lshr <2 x i32> %V, @@ -449,8 +449,8 @@ define <2 x i64> @test10_splat(<2 x i32> %V) { define <2 x i64> @test10_vec(<2 x i32> %V) { ; CHECK-LABEL: @test10_vec( ; CHECK-NEXT: [[LSHR:%.*]] = lshr <2 x i32> [[V:%.*]], -; CHECK-NEXT: [[MULCONV:%.*]] = mul nuw <2 x i32> [[LSHR]], -; CHECK-NEXT: [[MUL:%.*]] = zext <2 x i32> [[MULCONV]] to <2 x i64> +; CHECK-NEXT: [[NARROW:%.*]] = mul nuw <2 x i32> [[LSHR]], +; CHECK-NEXT: [[MUL:%.*]] = zext <2 x i32> [[NARROW]] to <2 x i64> ; CHECK-NEXT: ret <2 x i64> [[MUL]] ; %lshr = lshr <2 x i32> %V, @@ -479,8 +479,8 @@ define i64 @test12(i32 %V) { ; CHECK-LABEL: @test12( ; CHECK-NEXT: [[CALL1:%.*]] = call i32 @callee(), !range !1 ; CHECK-NEXT: [[CALL2:%.*]] = call i32 @callee(), !range !1 -; CHECK-NEXT: [[MULCONV:%.*]] = mul nsw i32 [[CALL1]], [[CALL2]] -; CHECK-NEXT: [[TMP1:%.*]] = zext i32 [[MULCONV]] to i64 +; CHECK-NEXT: [[NARROW:%.*]] = mul nsw i32 [[CALL1]], [[CALL2]] +; CHECK-NEXT: [[TMP1:%.*]] = zext i32 [[NARROW]] to i64 ; CHECK-NEXT: ret i64 [[TMP1]] ; %call1 = call i32 @callee(), !range !1 -- 2.7.4