From cfcc42bdc2b880a3036337ac259f329ae56d8c22 Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Sun, 13 Nov 2016 20:04:52 +0000 Subject: [PATCH] [ValueTracking] recognize even more variants of smin/smax Similar to: https://reviews.llvm.org/rL285499 https://reviews.llvm.org/rL286318 We can't minimally expose this in IR tests because we don't have min/max intrinsics, but the difference is visible in codegen because SelectionDAGBuilder::visitSelect() uses matchSelectPattern(). We're not canonicalizing these patterns in IR (yet), so I don't expect there to be any regressions as noted here: http://lists.llvm.org/pipermail/llvm-dev/2016-November/106868.html llvm-svn: 286776 --- llvm/lib/Analysis/ValueTracking.cpp | 20 ++++++++++++++++++++ llvm/test/CodeGen/X86/vec_minmax_match.ll | 24 ++++++++++++------------ 2 files changed, 32 insertions(+), 12 deletions(-) diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp index 5a6ca94..18117f6 100644 --- a/llvm/lib/Analysis/ValueTracking.cpp +++ b/llvm/lib/Analysis/ValueTracking.cpp @@ -3861,6 +3861,26 @@ static SelectPatternResult matchMinMax(CmpInst::Predicate Pred, if (Pred != CmpInst::ICMP_SGT && Pred != CmpInst::ICMP_SLT) return {SPF_UNKNOWN, SPNB_NA, false}; + // Z = X -nsw Y + // (X >s Y) ? 0 : Z ==> (Z >s 0) ? 0 : Z ==> SMIN(Z, 0) + // (X (Z SMAX(Z, 0) + if (match(TrueVal, m_Zero()) && + match(FalseVal, m_NSWSub(m_Specific(CmpLHS), m_Specific(CmpRHS)))) { + LHS = TrueVal; + RHS = FalseVal; + return {Pred == CmpInst::ICMP_SGT ? SPF_SMIN : SPF_SMAX, SPNB_NA, false}; + } + + // Z = X -nsw Y + // (X >s Y) ? Z : 0 ==> (Z >s 0) ? Z : 0 ==> SMAX(Z, 0) + // (X (Z SMIN(Z, 0) + if (match(FalseVal, m_Zero()) && + match(TrueVal, m_NSWSub(m_Specific(CmpLHS), m_Specific(CmpRHS)))) { + LHS = TrueVal; + RHS = FalseVal; + return {Pred == CmpInst::ICMP_SGT ? SPF_SMAX : SPF_SMIN, SPNB_NA, false}; + } + const APInt *C1; if (!match(CmpRHS, m_APInt(C1))) return {SPF_UNKNOWN, SPNB_NA, false}; diff --git a/llvm/test/CodeGen/X86/vec_minmax_match.ll b/llvm/test/CodeGen/X86/vec_minmax_match.ll index 6293a61..af4410a 100644 --- a/llvm/test/CodeGen/X86/vec_minmax_match.ll +++ b/llvm/test/CodeGen/X86/vec_minmax_match.ll @@ -37,9 +37,9 @@ define <4 x i32> @smin_vec2(<4 x i32> %x) { define <4 x i32> @smin_vec3(<4 x i32> %x, <4 x i32> %y) { ; CHECK-LABEL: smin_vec3: ; CHECK: # BB#0: -; CHECK-NEXT: vpsubd %xmm1, %xmm0, %xmm2 -; CHECK-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 -; CHECK-NEXT: vpandn %xmm2, %xmm0, %xmm0 +; CHECK-NEXT: vpsubd %xmm1, %xmm0, %xmm0 +; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; CHECK-NEXT: vpminsd %xmm1, %xmm0, %xmm0 ; CHECK-NEXT: retq ; %sub = sub nsw <4 x i32> %x, %y @@ -53,9 +53,9 @@ define <4 x i32> @smin_vec3(<4 x i32> %x, <4 x i32> %y) { define <4 x i32> @smin_vec4(<4 x i32> %x, <4 x i32> %y) { ; CHECK-LABEL: smin_vec4: ; CHECK: # BB#0: -; CHECK-NEXT: vpsubd %xmm1, %xmm0, %xmm2 -; CHECK-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 -; CHECK-NEXT: vpand %xmm2, %xmm0, %xmm0 +; CHECK-NEXT: vpsubd %xmm1, %xmm0, %xmm0 +; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; CHECK-NEXT: vpminsd %xmm1, %xmm0, %xmm0 ; CHECK-NEXT: retq ; %sub = sub nsw <4 x i32> %x, %y @@ -97,9 +97,9 @@ define <4 x i32> @smax_vec2(<4 x i32> %x) { define <4 x i32> @smax_vec3(<4 x i32> %x, <4 x i32> %y) { ; CHECK-LABEL: smax_vec3: ; CHECK: # BB#0: -; CHECK-NEXT: vpsubd %xmm1, %xmm0, %xmm2 -; CHECK-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 -; CHECK-NEXT: vpandn %xmm2, %xmm0, %xmm0 +; CHECK-NEXT: vpsubd %xmm1, %xmm0, %xmm0 +; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; CHECK-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0 ; CHECK-NEXT: retq ; %sub = sub nsw <4 x i32> %x, %y @@ -113,9 +113,9 @@ define <4 x i32> @smax_vec3(<4 x i32> %x, <4 x i32> %y) { define <4 x i32> @smax_vec4(<4 x i32> %x, <4 x i32> %y) { ; CHECK-LABEL: smax_vec4: ; CHECK: # BB#0: -; CHECK-NEXT: vpsubd %xmm1, %xmm0, %xmm2 -; CHECK-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 -; CHECK-NEXT: vpand %xmm2, %xmm0, %xmm0 +; CHECK-NEXT: vpsubd %xmm1, %xmm0, %xmm0 +; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; CHECK-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0 ; CHECK-NEXT: retq ; %sub = sub nsw <4 x i32> %x, %y -- 2.7.4