From 8296c6c96f7bcd1ec2b7a87808ad9ba7931bae9d Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Mon, 19 Dec 2016 16:28:53 +0000 Subject: [PATCH] [InstCombine] add folds for icmp (smax X, Y), X This is a follow-up to: https://reviews.llvm.org/rL289855 (D27531) llvm-svn: 290111 --- llvm/include/llvm/IR/PatternMatch.h | 11 ++++- .../Transforms/InstCombine/InstCombineCompares.cpp | 52 +++++++++++++++------- llvm/test/Transforms/InstCombine/smax-icmp.ll | 48 +++++--------------- 3 files changed, 56 insertions(+), 55 deletions(-) diff --git a/llvm/include/llvm/IR/PatternMatch.h b/llvm/include/llvm/IR/PatternMatch.h index 9b18850..f22acba6 100644 --- a/llvm/include/llvm/IR/PatternMatch.h +++ b/llvm/include/llvm/IR/PatternMatch.h @@ -1349,15 +1349,22 @@ m_c_Xor(const LHS &L, const RHS &R) { return m_CombineOr(m_Xor(L, R), m_Xor(R, L)); } -// TODO: Add the related SMax, UMax, UMin commuted matchers. +// TODO: Add the related UMax and UMin commuted matchers. /// Matches an SMin with LHS and RHS in either order. -template +template inline match_combine_or, MaxMin_match> m_c_SMin(const LHS &L, const RHS &R) { return m_CombineOr(m_SMin(L, R), m_SMin(R, L)); } +/// Matches an SMax with LHS and RHS in either order. +template +inline match_combine_or, + MaxMin_match> +m_c_SMax(const LHS &L, const RHS &R) { + return m_CombineOr(m_SMax(L, R), m_SMax(R, L)); +} } // end namespace PatternMatch } // end namespace llvm diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp index 6c4f4a1..b986d80 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp @@ -3030,37 +3030,55 @@ Instruction *InstCombiner::foldICmpBinOp(ICmpInst &I) { return nullptr; } -/// Fold icmp Pred smin(X, Y), X. -static Instruction *foldICmpWithSMin(ICmpInst &Cmp) { +/// Fold icmp Pred smin|smax(X, Y), X. +static Instruction *foldICmpWithSMinMax(ICmpInst &Cmp) { ICmpInst::Predicate Pred = Cmp.getPredicate(); Value *Op0 = Cmp.getOperand(0); Value *X = Cmp.getOperand(1); - // TODO: This should be expanded to handle smax/umax/umin. + // TODO: This should be expanded to handle umax/umin. // Canonicalize minimum operand to LHS of the icmp. - if (match(X, m_c_SMin(m_Specific(Op0), m_Value()))) { + if (match(X, m_c_SMin(m_Specific(Op0), m_Value())) || + match(X, m_c_SMax(m_Specific(Op0), m_Value()))) { std::swap(Op0, X); Pred = Cmp.getSwappedPredicate(); } Value *Y; - if (!match(Op0, m_c_SMin(m_Specific(X), m_Value(Y)))) + if (match(Op0, m_c_SMin(m_Specific(X), m_Value(Y)))) { + // smin(X, Y) == X --> X <= Y + // smin(X, Y) >= X --> X <= Y + if (Pred == CmpInst::ICMP_EQ || Pred == CmpInst::ICMP_SGE) + return new ICmpInst(ICmpInst::ICMP_SLE, X, Y); + + // smin(X, Y) != X --> X > Y + // smin(X, Y) < X --> X > Y + if (Pred == CmpInst::ICMP_NE || Pred == CmpInst::ICMP_SLT) + return new ICmpInst(ICmpInst::ICMP_SGT, X, Y); + + // These cases should be handled in InstSimplify: + // smin(X, Y) <= X --> true + // smin(X, Y) > X --> false return nullptr; + } + if (match(Op0, m_c_SMax(m_Specific(X), m_Value(Y)))) { + // smax(X, Y) == X --> X >= Y + // smax(X, Y) <= X --> X >= Y + if (Pred == CmpInst::ICMP_EQ || Pred == CmpInst::ICMP_SLE) + return new ICmpInst(ICmpInst::ICMP_SGE, X, Y); - // smin(X, Y) == X --> X <= Y - // smin(X, Y) >= X --> X <= Y - if (Pred == CmpInst::ICMP_EQ || Pred == CmpInst::ICMP_SGE) - return new ICmpInst(ICmpInst::ICMP_SLE, X, Y); + // smax(X, Y) != X --> X < Y + // smax(X, Y) > X --> X < Y + if (Pred == CmpInst::ICMP_NE || Pred == CmpInst::ICMP_SGT) + return new ICmpInst(ICmpInst::ICMP_SLT, X, Y); - // smin(X, Y) != X --> X > Y - // smin(X, Y) < X --> X > Y - if (Pred == CmpInst::ICMP_NE || Pred == CmpInst::ICMP_SLT) - return new ICmpInst(ICmpInst::ICMP_SGT, X, Y); + // These cases should be handled in InstSimplify: + // smax(X, Y) >= X --> true + // smax(X, Y) < X --> false + return nullptr; + } - // These cases should be handled in InstSimplify: - // smin(X, Y) <= X --> true - // smin(X, Y) > X --> false return nullptr; } @@ -4311,7 +4329,7 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { if (Instruction *Res = foldICmpBinOp(I)) return Res; - if (Instruction *Res = foldICmpWithSMin(I)) + if (Instruction *Res = foldICmpWithSMinMax(I)) return Res; { diff --git a/llvm/test/Transforms/InstCombine/smax-icmp.ll b/llvm/test/Transforms/InstCombine/smax-icmp.ll index 1462a96..e64626f 100644 --- a/llvm/test/Transforms/InstCombine/smax-icmp.ll +++ b/llvm/test/Transforms/InstCombine/smax-icmp.ll @@ -12,9 +12,7 @@ define i1 @eq_smax1(i32 %x, i32 %y) { ; CHECK-LABEL: @eq_smax1( -; CHECK-NEXT: [[CMP1:%.*]] = icmp sgt i32 %x, %y -; CHECK-NEXT: [[SEL:%.*]] = select i1 [[CMP1]], i32 %x, i32 %y -; CHECK-NEXT: [[CMP2:%.*]] = icmp eq i32 [[SEL]], %x +; CHECK-NEXT: [[CMP2:%.*]] = icmp sge i32 %x, %y ; CHECK-NEXT: ret i1 [[CMP2]] ; %cmp1 = icmp sgt i32 %x, %y @@ -27,9 +25,7 @@ define i1 @eq_smax1(i32 %x, i32 %y) { define i1 @eq_smax2(i32 %x, i32 %y) { ; CHECK-LABEL: @eq_smax2( -; CHECK-NEXT: [[CMP1:%.*]] = icmp sgt i32 %y, %x -; CHECK-NEXT: [[SEL:%.*]] = select i1 [[CMP1]], i32 %y, i32 %x -; CHECK-NEXT: [[CMP2:%.*]] = icmp eq i32 [[SEL]], %x +; CHECK-NEXT: [[CMP2:%.*]] = icmp sge i32 %x, %y ; CHECK-NEXT: ret i1 [[CMP2]] ; %cmp1 = icmp sgt i32 %y, %x @@ -43,9 +39,7 @@ define i1 @eq_smax2(i32 %x, i32 %y) { define i1 @eq_smax3(i32 %a, i32 %y) { ; CHECK-LABEL: @eq_smax3( ; CHECK-NEXT: [[X:%.*]] = add i32 %a, 3 -; CHECK-NEXT: [[CMP1:%.*]] = icmp sgt i32 [[X]], %y -; CHECK-NEXT: [[SEL:%.*]] = select i1 [[CMP1]], i32 [[X]], i32 %y -; CHECK-NEXT: [[CMP2:%.*]] = icmp eq i32 [[X]], [[SEL]] +; CHECK-NEXT: [[CMP2:%.*]] = icmp sge i32 [[X]], %y ; CHECK-NEXT: ret i1 [[CMP2]] ; %x = add i32 %a, 3 ; thwart complexity-based canonicalization @@ -60,9 +54,7 @@ define i1 @eq_smax3(i32 %a, i32 %y) { define i1 @eq_smax4(i32 %a, i32 %y) { ; CHECK-LABEL: @eq_smax4( ; CHECK-NEXT: [[X:%.*]] = add i32 %a, 3 -; CHECK-NEXT: [[CMP1:%.*]] = icmp slt i32 [[X]], %y -; CHECK-NEXT: [[SEL:%.*]] = select i1 [[CMP1]], i32 %y, i32 [[X]] -; CHECK-NEXT: [[CMP2:%.*]] = icmp eq i32 [[X]], [[SEL]] +; CHECK-NEXT: [[CMP2:%.*]] = icmp sge i32 [[X]], %y ; CHECK-NEXT: ret i1 [[CMP2]] ; %x = add i32 %a, 3 ; thwart complexity-based canonicalization @@ -76,9 +68,7 @@ define i1 @eq_smax4(i32 %a, i32 %y) { define i1 @sle_smax1(i32 %x, i32 %y) { ; CHECK-LABEL: @sle_smax1( -; CHECK-NEXT: [[CMP1:%.*]] = icmp sgt i32 %x, %y -; CHECK-NEXT: [[SEL:%.*]] = select i1 [[CMP1]], i32 %x, i32 %y -; CHECK-NEXT: [[CMP2:%.*]] = icmp sle i32 [[SEL]], %x +; CHECK-NEXT: [[CMP2:%.*]] = icmp sge i32 %x, %y ; CHECK-NEXT: ret i1 [[CMP2]] ; %cmp1 = icmp sgt i32 %x, %y @@ -91,9 +81,7 @@ define i1 @sle_smax1(i32 %x, i32 %y) { define i1 @sle_smax2(i32 %x, i32 %y) { ; CHECK-LABEL: @sle_smax2( -; CHECK-NEXT: [[CMP1:%.*]] = icmp sgt i32 %y, %x -; CHECK-NEXT: [[SEL:%.*]] = select i1 [[CMP1]], i32 %y, i32 %x -; CHECK-NEXT: [[CMP2:%.*]] = icmp sle i32 [[SEL]], %x +; CHECK-NEXT: [[CMP2:%.*]] = icmp sge i32 %x, %y ; CHECK-NEXT: ret i1 [[CMP2]] ; %cmp1 = icmp sgt i32 %y, %x @@ -107,9 +95,7 @@ define i1 @sle_smax2(i32 %x, i32 %y) { define i1 @sle_smax3(i32 %a, i32 %y) { ; CHECK-LABEL: @sle_smax3( ; CHECK-NEXT: [[X:%.*]] = add i32 %a, 3 -; CHECK-NEXT: [[CMP1:%.*]] = icmp sgt i32 [[X]], %y -; CHECK-NEXT: [[SEL:%.*]] = select i1 [[CMP1]], i32 [[X]], i32 %y -; CHECK-NEXT: [[CMP2:%.*]] = icmp sge i32 [[X]], [[SEL]] +; CHECK-NEXT: [[CMP2:%.*]] = icmp sge i32 [[X]], %y ; CHECK-NEXT: ret i1 [[CMP2]] ; %x = add i32 %a, 3 ; thwart complexity-based canonicalization @@ -124,9 +110,7 @@ define i1 @sle_smax3(i32 %a, i32 %y) { define i1 @sle_smax4(i32 %a, i32 %y) { ; CHECK-LABEL: @sle_smax4( ; CHECK-NEXT: [[X:%.*]] = add i32 %a, 3 -; CHECK-NEXT: [[CMP1:%.*]] = icmp slt i32 [[X]], %y -; CHECK-NEXT: [[SEL:%.*]] = select i1 [[CMP1]], i32 %y, i32 [[X]] -; CHECK-NEXT: [[CMP2:%.*]] = icmp sge i32 [[X]], [[SEL]] +; CHECK-NEXT: [[CMP2:%.*]] = icmp sge i32 [[X]], %y ; CHECK-NEXT: ret i1 [[CMP2]] ; %x = add i32 %a, 3 ; thwart complexity-based canonicalization @@ -140,9 +124,7 @@ define i1 @sle_smax4(i32 %a, i32 %y) { define i1 @ne_smax1(i32 %x, i32 %y) { ; CHECK-LABEL: @ne_smax1( -; CHECK-NEXT: [[CMP1:%.*]] = icmp sgt i32 %x, %y -; CHECK-NEXT: [[SEL:%.*]] = select i1 [[CMP1]], i32 %x, i32 %y -; CHECK-NEXT: [[CMP2:%.*]] = icmp ne i32 [[SEL]], %x +; CHECK-NEXT: [[CMP2:%.*]] = icmp slt i32 %x, %y ; CHECK-NEXT: ret i1 [[CMP2]] ; %cmp1 = icmp sgt i32 %x, %y @@ -169,9 +151,7 @@ define i1 @ne_smax2(i32 %x, i32 %y) { define i1 @ne_smax3(i32 %a, i32 %y) { ; CHECK-LABEL: @ne_smax3( ; CHECK-NEXT: [[X:%.*]] = add i32 %a, 3 -; CHECK-NEXT: [[CMP1:%.*]] = icmp sgt i32 [[X]], %y -; CHECK-NEXT: [[SEL:%.*]] = select i1 [[CMP1]], i32 [[X]], i32 %y -; CHECK-NEXT: [[CMP2:%.*]] = icmp ne i32 [[X]], [[SEL]] +; CHECK-NEXT: [[CMP2:%.*]] = icmp slt i32 [[X]], %y ; CHECK-NEXT: ret i1 [[CMP2]] ; %x = add i32 %a, 3 ; thwart complexity-based canonicalization @@ -200,9 +180,7 @@ define i1 @ne_smax4(i32 %a, i32 %y) { define i1 @sgt_smax1(i32 %x, i32 %y) { ; CHECK-LABEL: @sgt_smax1( -; CHECK-NEXT: [[CMP1:%.*]] = icmp sgt i32 %x, %y -; CHECK-NEXT: [[SEL:%.*]] = select i1 [[CMP1]], i32 %x, i32 %y -; CHECK-NEXT: [[CMP2:%.*]] = icmp sgt i32 [[SEL]], %x +; CHECK-NEXT: [[CMP2:%.*]] = icmp slt i32 %x, %y ; CHECK-NEXT: ret i1 [[CMP2]] ; %cmp1 = icmp sgt i32 %x, %y @@ -229,9 +207,7 @@ define i1 @sgt_smax2(i32 %x, i32 %y) { define i1 @sgt_smax3(i32 %a, i32 %y) { ; CHECK-LABEL: @sgt_smax3( ; CHECK-NEXT: [[X:%.*]] = add i32 %a, 3 -; CHECK-NEXT: [[CMP1:%.*]] = icmp sgt i32 [[X]], %y -; CHECK-NEXT: [[SEL:%.*]] = select i1 [[CMP1]], i32 [[X]], i32 %y -; CHECK-NEXT: [[CMP2:%.*]] = icmp slt i32 [[X]], [[SEL]] +; CHECK-NEXT: [[CMP2:%.*]] = icmp slt i32 [[X]], %y ; CHECK-NEXT: ret i1 [[CMP2]] ; %x = add i32 %a, 3 ; thwart complexity-based canonicalization -- 2.7.4