From 2da738167886d4a56a74d351f9586f309c1bfb2e Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Sat, 15 Sep 2018 18:54:10 +0000 Subject: [PATCH] [InstCombine] Support (sub (sext x), (sext y)) --> (sext (sub x, y)) and (sub (zext x), (zext y)) --> (zext (sub x, y)) Summary: If the sub doesn't overflow in the original type we can move it above the sext/zext. This is similar to what we do for add. The overflow checking for sub is currently weaker than add, so the test cases are constructed for what is supported. Reviewers: spatel Reviewed By: spatel Subscribers: llvm-commits Differential Revision: https://reviews.llvm.org/D52075 llvm-svn: 342335 --- .../Transforms/InstCombine/InstCombineAddSub.cpp | 3 + .../InstCombine/InstructionCombining.cpp | 25 ++-- llvm/test/Transforms/InstCombine/narrow-math.ll | 135 +++++++++++++++++++++ 3 files changed, 156 insertions(+), 7 deletions(-) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp index acb62b6..910ec83 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp @@ -1697,6 +1697,9 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) { return SelectInst::Create(Cmp, Neg, A); } + if (Instruction *Ext = narrowMathIfNoOverflow(I)) + return Ext; + bool Changed = false; if (!I.hasNoSignedWrap() && willNotOverflowSignedSub(Op0, Op1, I)) { Changed = true; diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp index d29cf93..5d5a9b2 100644 --- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp +++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -1451,29 +1451,40 @@ Instruction *InstCombiner::foldShuffledBinop(BinaryOperator &Inst) { /// sure the narrow op does not overflow. Instruction *InstCombiner::narrowMathIfNoOverflow(BinaryOperator &BO) { // We need at least one extended operand. - Value *LHS = BO.getOperand(0), *RHS = BO.getOperand(1); + Value *Op0 = BO.getOperand(0), *Op1 = BO.getOperand(1); + + // If this is a sub, we swap the operands since we always want an extension + // on the RHS. The LHS can be an extension or a constant. + if (BO.getOpcode() == Instruction::Sub) + std::swap(Op0, Op1); + Value *X; - bool IsSext = match(LHS, m_SExt(m_Value(X))); - if (!IsSext && !match(LHS, m_ZExt(m_Value(X)))) + bool IsSext = match(Op0, m_SExt(m_Value(X))); + if (!IsSext && !match(Op0, m_ZExt(m_Value(X)))) return nullptr; // If both operands are the same extension from the same source type and we // can eliminate at least one (hasOneUse), this might work. CastInst::CastOps CastOpc = IsSext ? Instruction::SExt : Instruction::ZExt; Value *Y; - if (!(match(RHS, m_ZExtOrSExt(m_Value(Y))) && X->getType() == Y->getType() && - cast(RHS)->getOpcode() == CastOpc && - (LHS->hasOneUse() || RHS->hasOneUse()))) { + if (!(match(Op1, m_ZExtOrSExt(m_Value(Y))) && X->getType() == Y->getType() && + cast(Op1)->getOpcode() == CastOpc && + (Op0->hasOneUse() || Op1->hasOneUse()))) { // If that did not match, see if we have a suitable constant operand. // Truncating and extending must produce the same constant. Constant *WideC; - if (!LHS->hasOneUse() || !match(RHS, m_Constant(WideC))) + if (!Op0->hasOneUse() || !match(Op1, m_Constant(WideC))) return nullptr; Constant *NarrowC = ConstantExpr::getTrunc(WideC, X->getType()); if (ConstantExpr::getCast(CastOpc, NarrowC, BO.getType()) != WideC) return nullptr; Y = NarrowC; } + + // Swap back now that we found our operands. + if (BO.getOpcode() == Instruction::Sub) + std::swap(X, Y); + // Both operands have narrow versions. Last step: the math must not overflow // in the narrow width. if (!willNotOverflow(BO.getOpcode(), X, Y, BO, IsSext)) diff --git a/llvm/test/Transforms/InstCombine/narrow-math.ll b/llvm/test/Transforms/InstCombine/narrow-math.ll index 62ed5cb..8caf93d 100644 --- a/llvm/test/Transforms/InstCombine/narrow-math.ll +++ b/llvm/test/Transforms/InstCombine/narrow-math.ll @@ -491,5 +491,140 @@ define i64 @test12(i32 %V) { ret i64 %add } +define i64 @test13(i32 %V) { +; CHECK-LABEL: @test13( +; CHECK-NEXT: [[CALL1:%.*]] = call i32 @callee(), !range !2 +; CHECK-NEXT: [[CALL2:%.*]] = call i32 @callee(), !range !3 +; CHECK-NEXT: [[SUBCONV:%.*]] = sub nsw i32 [[CALL1]], [[CALL2]] +; CHECK-NEXT: [[SUB:%.*]] = sext i32 [[SUBCONV]] to i64 +; CHECK-NEXT: ret i64 [[SUB]] +; + %call1 = call i32 @callee(), !range !2 + %call2 = call i32 @callee(), !range !3 + %sext1 = sext i32 %call1 to i64 + %sext2 = sext i32 %call2 to i64 + %sub = sub i64 %sext1, %sext2 + ret i64 %sub +} + +define i64 @test14(i32 %V) { +; CHECK-LABEL: @test14( +; CHECK-NEXT: [[CALL1:%.*]] = call i32 @callee(), !range !2 +; CHECK-NEXT: [[CALL2:%.*]] = call i32 @callee(), !range !0 +; CHECK-NEXT: [[SUBCONV:%.*]] = sub nuw nsw i32 [[CALL1]], [[CALL2]] +; CHECK-NEXT: [[SUB:%.*]] = zext i32 [[SUBCONV]] to i64 +; CHECK-NEXT: ret i64 [[SUB]] +; + %call1 = call i32 @callee(), !range !2 + %call2 = call i32 @callee(), !range !0 + %zext1 = zext i32 %call1 to i64 + %zext2 = zext i32 %call2 to i64 + %sub = sub i64 %zext1, %zext2 + ret i64 %sub +} + +define i64 @test15(i32 %V) { +; CHECK-LABEL: @test15( +; CHECK-NEXT: [[ASHR:%.*]] = ashr i32 [[V:%.*]], 1 +; CHECK-NEXT: [[SUBCONV:%.*]] = sub nsw i32 8, [[ASHR]] +; CHECK-NEXT: [[SUB:%.*]] = sext i32 [[SUBCONV]] to i64 +; CHECK-NEXT: ret i64 [[SUB]] +; + %ashr = ashr i32 %V, 1 + %sext = sext i32 %ashr to i64 + %sub = sub i64 8, %sext + ret i64 %sub +} + +define <2 x i64> @test15vec(<2 x i32> %V) { +; CHECK-LABEL: @test15vec( +; CHECK-NEXT: [[ASHR:%.*]] = ashr <2 x i32> [[V:%.*]], +; CHECK-NEXT: [[SUBCONV:%.*]] = sub nsw <2 x i32> , [[ASHR]] +; CHECK-NEXT: [[SUB:%.*]] = sext <2 x i32> [[SUBCONV]] to <2 x i64> +; CHECK-NEXT: ret <2 x i64> [[SUB]] +; + %ashr = ashr <2 x i32> %V, + %sext = sext <2 x i32> %ashr to <2 x i64> + %sub = sub <2 x i64> , %sext + ret <2 x i64> %sub +} + +define i64 @test16(i32 %V) { +; CHECK-LABEL: @test16( +; CHECK-NEXT: [[LSHR:%.*]] = lshr i32 [[V:%.*]], 1 +; CHECK-NEXT: [[SUBCONV:%.*]] = sub nuw i32 -2, [[LSHR]] +; CHECK-NEXT: [[SUB:%.*]] = zext i32 [[SUBCONV]] to i64 +; CHECK-NEXT: ret i64 [[SUB]] +; + %lshr = lshr i32 %V, 1 + %zext = zext i32 %lshr to i64 + %sub = sub i64 4294967294, %zext + ret i64 %sub +} + +define <2 x i64> @test16vec(<2 x i32> %V) { +; CHECK-LABEL: @test16vec( +; CHECK-NEXT: [[LSHR:%.*]] = lshr <2 x i32> [[V:%.*]], +; CHECK-NEXT: [[SUBCONV:%.*]] = sub nuw <2 x i32> , [[LSHR]] +; CHECK-NEXT: [[SUB:%.*]] = zext <2 x i32> [[SUBCONV]] to <2 x i64> +; CHECK-NEXT: ret <2 x i64> [[SUB]] +; + %lshr = lshr <2 x i32> %V, + %zext = zext <2 x i32> %lshr to <2 x i64> + %sub = sub <2 x i64> , %zext + ret <2 x i64> %sub +} + +; Negative test. Both have the same range so we can't guarantee the subtract +; won't wrap. +define i64 @test17(i32 %V) { +; CHECK-LABEL: @test17( +; CHECK-NEXT: [[CALL1:%.*]] = call i32 @callee(), !range !0 +; CHECK-NEXT: [[CALL2:%.*]] = call i32 @callee(), !range !0 +; CHECK-NEXT: [[SEXT1:%.*]] = zext i32 [[CALL1]] to i64 +; CHECK-NEXT: [[SEXT2:%.*]] = zext i32 [[CALL2]] to i64 +; CHECK-NEXT: [[SUB:%.*]] = sub nsw i64 [[SEXT1]], [[SEXT2]] +; CHECK-NEXT: ret i64 [[SUB]] +; + %call1 = call i32 @callee(), !range !0 + %call2 = call i32 @callee(), !range !0 + %sext1 = zext i32 %call1 to i64 + %sext2 = zext i32 %call2 to i64 + %sub = sub i64 %sext1, %sext2 + ret i64 %sub +} + +; Negative test. LHS is large positive 32-bit number. Range of callee can +; cause overflow. +define i64 @test18(i32 %V) { +; CHECK-LABEL: @test18( +; CHECK-NEXT: [[CALL1:%.*]] = call i32 @callee(), !range !1 +; CHECK-NEXT: [[SEXT1:%.*]] = sext i32 [[CALL1]] to i64 +; CHECK-NEXT: [[SUB:%.*]] = sub nsw i64 2147481648, [[SEXT1]] +; CHECK-NEXT: ret i64 [[SUB]] +; + %call1 = call i32 @callee(), !range !1 + %sext1 = sext i32 %call1 to i64 + %sub = sub i64 2147481648, %sext1 + ret i64 %sub +} + +; Negative test. LHS is large negative 32-bit number. Range of callee can +; cause overflow. +define i64 @test19(i32 %V) { +; CHECK-LABEL: @test19( +; CHECK-NEXT: [[CALL1:%.*]] = call i32 @callee(), !range !0 +; CHECK-NEXT: [[TMP1:%.*]] = zext i32 [[CALL1]] to i64 +; CHECK-NEXT: [[SUB:%.*]] = sub nuw nsw i64 -2147481648, [[TMP1]] +; CHECK-NEXT: ret i64 [[SUB]] +; + %call1 = call i32 @callee(), !range !0 + %sext1 = sext i32 %call1 to i64 + %sub = sub i64 -2147481648, %sext1 + ret i64 %sub +} + !0 = !{ i32 0, i32 2000 } !1 = !{ i32 -2000, i32 0 } +!2 = !{ i32 -512, i32 -255 } +!3 = !{ i32 -128, i32 0 } -- 2.7.4