From: Simon Pilgrim Date: Mon, 13 Jan 2020 17:56:15 +0000 (+0000) Subject: [SelectionDAG] ComputeNumSignBits add getValidMaximumShiftAmountConstant() for ISD... X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=c6fcd5d115b62280669719c5ead436904c93d6cb;p=platform%2Fupstream%2Fllvm.git [SelectionDAG] ComputeNumSignBits add getValidMaximumShiftAmountConstant() for ISD::SHL support Allows us to handle non-uniform SHL shifts to determine the minimum number of sign bits remaining (based off the maximum shift amount value) --- diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 54899ab..1b81e7d 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -2449,6 +2449,32 @@ getValidMinimumShiftAmountConstant(SDValue V, const APInt &DemandedElts) { return MinShAmt; } +/// If a SHL/SRA/SRL node has constant vector shift amounts that are all less +/// than the element bit-width of the shift node, return the maximum value. +static const APInt * +getValidMaximumShiftAmountConstant(SDValue V, const APInt &DemandedElts) { + unsigned BitWidth = V.getScalarValueSizeInBits(); + auto *BV = dyn_cast(V.getOperand(1)); + if (!BV) + return nullptr; + const APInt *MaxShAmt = nullptr; + for (unsigned i = 0, e = BV->getNumOperands(); i != e; ++i) { + if (!DemandedElts[i]) + continue; + auto *SA = dyn_cast(BV->getOperand(i)); + if (!SA) + return nullptr; + // Shifting more than the bitwidth is not valid. + const APInt &ShAmt = SA->getAPIntValue(); + if (ShAmt.uge(BitWidth)) + return nullptr; + if (MaxShAmt && MaxShAmt->uge(ShAmt)) + continue; + MaxShAmt = &ShAmt; + } + return MaxShAmt; +} + /// Determine which bits of Op are known to be either zero or one and return /// them in Known. For vectors, the known bits are those that are shared by /// every vector element. @@ -3621,6 +3647,11 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts, Tmp = ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth + 1); if (ShAmt->ult(Tmp)) return Tmp - ShAmt->getZExtValue(); + } else if (const APInt *ShAmt = + getValidMaximumShiftAmountConstant(Op, DemandedElts)) { + Tmp = ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth + 1); + if (ShAmt->ult(Tmp)) + return Tmp - ShAmt->getZExtValue(); } break; case ISD::AND: diff --git a/llvm/test/CodeGen/X86/known-signbits-vector.ll b/llvm/test/CodeGen/X86/known-signbits-vector.ll index 8f662c1..1d2afeb 100644 --- a/llvm/test/CodeGen/X86/known-signbits-vector.ll +++ b/llvm/test/CodeGen/X86/known-signbits-vector.ll @@ -257,45 +257,30 @@ define <4 x double> @signbits_sext_shuffle_sitofp(<4 x i32> %a0, <4 x i64> %a1) define <2 x double> @signbits_sext_shl_sitofp(<2 x i16> %a0) nounwind { ; X86-LABEL: signbits_sext_shl_sitofp: ; X86: # %bb.0: -; X86-NEXT: pushl %ebp -; X86-NEXT: movl %esp, %ebp -; X86-NEXT: andl $-8, %esp -; X86-NEXT: subl $32, %esp ; X86-NEXT: vpmovsxwq %xmm0, %xmm0 ; X86-NEXT: vpsllq $5, %xmm0, %xmm1 ; X86-NEXT: vpsllq $11, %xmm0, %xmm0 -; X86-NEXT: vmovq %xmm0, {{[0-9]+}}(%esp) -; X86-NEXT: vpshufd {{.*#+}} xmm0 = xmm1[2,3,0,1] -; X86-NEXT: vmovq %xmm0, {{[0-9]+}}(%esp) -; X86-NEXT: fildll {{[0-9]+}}(%esp) -; X86-NEXT: fstpl {{[0-9]+}}(%esp) -; X86-NEXT: fildll {{[0-9]+}}(%esp) -; X86-NEXT: fstpl (%esp) -; X86-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero -; X86-NEXT: vmovhps {{.*#+}} xmm0 = xmm0[0,1],mem[0,1] -; X86-NEXT: movl %ebp, %esp -; X86-NEXT: popl %ebp +; X86-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7] +; X86-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] +; X86-NEXT: vcvtdq2pd %xmm0, %xmm0 ; X86-NEXT: retl ; ; X64-AVX1-LABEL: signbits_sext_shl_sitofp: ; X64-AVX1: # %bb.0: ; X64-AVX1-NEXT: vpmovsxwq %xmm0, %xmm0 -; X64-AVX1-NEXT: vpsllq $11, %xmm0, %xmm1 -; X64-AVX1-NEXT: vpsllq $5, %xmm0, %xmm0 -; X64-AVX1-NEXT: vpextrq $1, %xmm0, %rax -; X64-AVX1-NEXT: vcvtsi2sd %eax, %xmm2, %xmm0 -; X64-AVX1-NEXT: vcvtdq2pd %xmm1, %xmm1 -; X64-AVX1-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0] +; X64-AVX1-NEXT: vpsllq $5, %xmm0, %xmm1 +; X64-AVX1-NEXT: vpsllq $11, %xmm0, %xmm0 +; X64-AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7] +; X64-AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] +; X64-AVX1-NEXT: vcvtdq2pd %xmm0, %xmm0 ; X64-AVX1-NEXT: retq ; ; X64-AVX2-LABEL: signbits_sext_shl_sitofp: ; X64-AVX2: # %bb.0: ; X64-AVX2-NEXT: vpmovsxwq %xmm0, %xmm0 ; X64-AVX2-NEXT: vpsllvq {{.*}}(%rip), %xmm0, %xmm0 -; X64-AVX2-NEXT: vpextrq $1, %xmm0, %rax -; X64-AVX2-NEXT: vcvtsi2sd %eax, %xmm1, %xmm1 +; X64-AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] ; X64-AVX2-NEXT: vcvtdq2pd %xmm0, %xmm0 -; X64-AVX2-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; X64-AVX2-NEXT: retq %1 = sext <2 x i16> %a0 to <2 x i64> %2 = shl <2 x i64> %1,