From fb32eea1b4b0f7bf2261d23250f6d0667a5b21a7 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Thu, 19 Jan 2017 22:41:22 +0000 Subject: [PATCH] [SelectionDAG] Improve knownbits handling of UMIN/UMAX (PR31293) This patch improves the knownbits logic for unsigned integer min/max opcodes. For UMIN we know that the result will have the maximum of the inputs' known leading zero bits in the result, similarly for UMAX the maximum of the inputs' leading one bits. This is particularly useful for simplifying clamping patterns,. e.g. as SSE doesn't have a uitofp instruction we want to use sitofp instead where possible and for that we need to confirm that the top bit is not set. Differential Revision: https://reviews.llvm.org/D28853 llvm-svn: 292528 --- llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 36 +++++++++++++++++++++++--- llvm/test/CodeGen/X86/known-bits-vector.ll | 14 ++-------- 2 files changed, 35 insertions(+), 15 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index be3eef9..d7b4b42 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -2687,12 +2687,42 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero, KnownOne = KnownOne2.byteSwap(); break; } - case ISD::SMIN: - case ISD::SMAX: - case ISD::UMIN: + case ISD::UMIN: { + computeKnownBits(Op.getOperand(0), KnownZero, KnownOne, DemandedElts, + Depth + 1); + computeKnownBits(Op.getOperand(1), KnownZero2, KnownOne2, DemandedElts, + Depth + 1); + + // UMIN - we know that the result will have the maximum of the + // known zero leading bits of the inputs. + unsigned LeadZero = KnownZero.countLeadingOnes(); + LeadZero = std::max(LeadZero, KnownZero2.countLeadingOnes()); + + KnownZero &= KnownZero2; + KnownOne &= KnownOne2; + KnownZero |= APInt::getHighBitsSet(BitWidth, LeadZero); + break; + } case ISD::UMAX: { computeKnownBits(Op.getOperand(0), KnownZero, KnownOne, DemandedElts, Depth + 1); + computeKnownBits(Op.getOperand(1), KnownZero2, KnownOne2, DemandedElts, + Depth + 1); + + // UMAX - we know that the result will have the maximum of the + // known one leading bits of the inputs. + unsigned LeadOne = KnownOne.countLeadingOnes(); + LeadOne = std::max(LeadOne, KnownOne2.countLeadingOnes()); + + KnownZero &= KnownZero2; + KnownOne &= KnownOne2; + KnownOne |= APInt::getHighBitsSet(BitWidth, LeadOne); + break; + } + case ISD::SMIN: + case ISD::SMAX: { + computeKnownBits(Op.getOperand(0), KnownZero, KnownOne, DemandedElts, + Depth + 1); // If we don't know any bits, early out. if (!KnownOne && !KnownZero) break; diff --git a/llvm/test/CodeGen/X86/known-bits-vector.ll b/llvm/test/CodeGen/X86/known-bits-vector.ll index 19b2811..77e3c53 100644 --- a/llvm/test/CodeGen/X86/known-bits-vector.ll +++ b/llvm/test/CodeGen/X86/known-bits-vector.ll @@ -481,22 +481,14 @@ define <4 x float> @knownbits_umin_shuffle_uitofp(<4 x i32> %a0) { ; X32: # BB#0: ; X32-NEXT: vpminud {{\.LCPI.*}}, %xmm0, %xmm0 ; X32-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,3,3] -; X32-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0],mem[1],xmm0[2],mem[3],xmm0[4],mem[5],xmm0[6],mem[7] -; X32-NEXT: vpsrld $16, %xmm0, %xmm0 -; X32-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],mem[1],xmm0[2],mem[3],xmm0[4],mem[5],xmm0[6],mem[7] -; X32-NEXT: vaddps {{\.LCPI.*}}, %xmm0, %xmm0 -; X32-NEXT: vaddps %xmm0, %xmm1, %xmm0 +; X32-NEXT: vcvtdq2ps %xmm0, %xmm0 ; X32-NEXT: retl ; ; X64-LABEL: knownbits_umin_shuffle_uitofp: ; X64: # BB#0: ; X64-NEXT: vpminud {{.*}}(%rip), %xmm0, %xmm0 ; X64-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,3,3] -; X64-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0],mem[1],xmm0[2],mem[3],xmm0[4],mem[5],xmm0[6],mem[7] -; X64-NEXT: vpsrld $16, %xmm0, %xmm0 -; X64-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],mem[1],xmm0[2],mem[3],xmm0[4],mem[5],xmm0[6],mem[7] -; X64-NEXT: vaddps {{.*}}(%rip), %xmm0, %xmm0 -; X64-NEXT: vaddps %xmm0, %xmm1, %xmm0 +; X64-NEXT: vcvtdq2ps %xmm0, %xmm0 ; X64-NEXT: retq %1 = call <4 x i32> @llvm.x86.sse41.pminud(<4 x i32> %a0, <4 x i32> ) %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> @@ -511,14 +503,12 @@ define <4 x i32> @knownbits_umax_shuffle_ashr(<4 x i32> %a0) { ; X32: # BB#0: ; X32-NEXT: vpmaxud {{\.LCPI.*}}, %xmm0, %xmm0 ; X32-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,2,2] -; X32-NEXT: vpsrad $31, %xmm0, %xmm0 ; X32-NEXT: retl ; ; X64-LABEL: knownbits_umax_shuffle_ashr: ; X64: # BB#0: ; X64-NEXT: vpmaxud {{.*}}(%rip), %xmm0, %xmm0 ; X64-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,2,2] -; X64-NEXT: vpsrad $31, %xmm0, %xmm0 ; X64-NEXT: retq %1 = call <4 x i32> @llvm.x86.sse41.pmaxud(<4 x i32> %a0, <4 x i32> ) %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> -- 2.7.4