From 2a88d00cf250534f31c706bc832f0f6386c28ef3 Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Tue, 9 Nov 2021 12:31:15 -0500 Subject: [PATCH] [InstCombine] fold sub-of-umax to 0-usubsat Op0 - umax(X, Op0) --> 0 - usub.sat(X, Op1) I'm not sure if this is really an improvement in IR because we probably have better recognition/analysis for min/max, but this lines up with the fold we do for the icmp+select idiom and removes another diff from D98152. This is similar to the previous fold in the code that was added with: 83c2fb9f66be baa6a851308d https://alive2.llvm.org/ce/z/5MrVB9 --- llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp | 6 ++++++ llvm/test/Transforms/InstCombine/sub-minmax.ll | 8 ++++---- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp index 3e0cc54..8cc6495 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp @@ -2175,6 +2175,12 @@ Instruction *InstCombinerImpl::visitSub(BinaryOperator &I) { return replaceInstUsesWith( I, Builder.CreateIntrinsic(Intrinsic::usub_sat, {Ty}, {X, Op1})); + // Op0 - umax(X, Op0) --> 0 - usub.sat(X, Op1) + if (match(Op1, m_OneUse(m_c_UMax(m_Value(X), m_Specific(Op0))))) { + Value *USub = Builder.CreateIntrinsic(Intrinsic::usub_sat, {Ty}, {X, Op0}); + return BinaryOperator::CreateNeg(USub); + } + // C - ctpop(X) => ctpop(~X) if C is bitwidth if (match(Op0, m_SpecificInt(Ty->getScalarSizeInBits())) && match(Op1, m_OneUse(m_Intrinsic(m_Value(X))))) diff --git a/llvm/test/Transforms/InstCombine/sub-minmax.ll b/llvm/test/Transforms/InstCombine/sub-minmax.ll index 6b58b16..5b1100d 100644 --- a/llvm/test/Transforms/InstCombine/sub-minmax.ll +++ b/llvm/test/Transforms/InstCombine/sub-minmax.ll @@ -534,8 +534,8 @@ define i8 @umax_sub_op0_use(i8 %x, i8 %y) { define i8 @umax_sub_op1(i8 %x, i8 %y) { ; CHECK-LABEL: @umax_sub_op1( -; CHECK-NEXT: [[U:%.*]] = call i8 @llvm.umax.i8(i8 [[X:%.*]], i8 [[Y:%.*]]) -; CHECK-NEXT: [[R:%.*]] = sub i8 [[Y]], [[U]] +; CHECK-NEXT: [[TMP1:%.*]] = call i8 @llvm.usub.sat.i8(i8 [[X:%.*]], i8 [[Y:%.*]]) +; CHECK-NEXT: [[R:%.*]] = sub i8 0, [[TMP1]] ; CHECK-NEXT: ret i8 [[R]] ; %u = call i8 @llvm.umax.i8(i8 %x, i8 %y) @@ -545,8 +545,8 @@ define i8 @umax_sub_op1(i8 %x, i8 %y) { define <2 x i8> @umax_sub_op1_vec_commute(<2 x i8> %x, <2 x i8> %y) { ; CHECK-LABEL: @umax_sub_op1_vec_commute( -; CHECK-NEXT: [[U:%.*]] = call <2 x i8> @llvm.umax.v2i8(<2 x i8> [[Y:%.*]], <2 x i8> [[X:%.*]]) -; CHECK-NEXT: [[R:%.*]] = sub <2 x i8> [[Y]], [[U]] +; CHECK-NEXT: [[TMP1:%.*]] = call <2 x i8> @llvm.usub.sat.v2i8(<2 x i8> [[X:%.*]], <2 x i8> [[Y:%.*]]) +; CHECK-NEXT: [[R:%.*]] = sub <2 x i8> zeroinitializer, [[TMP1]] ; CHECK-NEXT: ret <2 x i8> [[R]] ; %u = call <2 x i8> @llvm.umax.v2i8(<2 x i8> %y, <2 x i8> %x) -- 2.7.4