From 5cee232be4d4b41453806298054269bcc3b0ddda Mon Sep 17 00:00:00 2001 From: Juergen Ributzka Date: Fri, 28 Oct 2016 04:01:12 +0000 Subject: [PATCH] Revert "[DAGCombiner] Add vector demanded elements support to computeKnownBits" This seems to have increased LTO compile time bejond 2x of previous builds. See http://lab.llvm.org:8080/green/job/clang-stage2-configure-Rlto/10676/ llvm-svn: 285381 --- llvm/include/llvm/CodeGen/SelectionDAG.h | 16 +--- llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 124 +++---------------------- llvm/test/CodeGen/X86/avx-vperm2x128.ll | 6 +- llvm/test/CodeGen/X86/known-bits-vector.ll | 50 +++++++--- 4 files changed, 59 insertions(+), 137 deletions(-) diff --git a/llvm/include/llvm/CodeGen/SelectionDAG.h b/llvm/include/llvm/CodeGen/SelectionDAG.h index 5b53554..6cc1f50 100644 --- a/llvm/include/llvm/CodeGen/SelectionDAG.h +++ b/llvm/include/llvm/CodeGen/SelectionDAG.h @@ -1256,22 +1256,12 @@ public: const; /// Determine which bits of Op are known to be either zero or one and return - /// them in the KnownZero/KnownOne bitsets. For vectors, the known bits are - /// those that are shared by every vector element. - /// Targets can implement the computeKnownBitsForTargetNode method in the - /// TargetLowering class to allow target nodes to be understood. + /// them in the KnownZero/KnownOne bitsets. Targets can implement the + /// computeKnownBitsForTargetNode method in the TargetLowering class to allow + /// target nodes to be understood. void computeKnownBits(SDValue Op, APInt &KnownZero, APInt &KnownOne, unsigned Depth = 0) const; - /// Determine which bits of Op are known to be either zero or one and return - /// them in the KnownZero/KnownOne bitsets. The DemandedElts argument allows - /// us to only collect the known bits that are shared by the requested vector - /// elements. - /// Targets can implement the computeKnownBitsForTargetNode method in the - /// TargetLowering class to allow target nodes to be understood. - void computeKnownBits(SDValue Op, APInt &KnownZero, APInt &KnownOne, - const APInt &DemandedElts, unsigned Depth = 0) const; - /// Test if the given value is known to have exactly one bit set. This differs /// from computeKnownBits in that it doesn't necessarily determine which bit /// is set. diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index cb37583..c2c7bb0 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -2007,26 +2007,9 @@ static const APInt *getValidShiftAmountConstant(SDValue V) { } /// Determine which bits of Op are known to be either zero or one and return -/// them in the KnownZero/KnownOne bitsets. For vectors, the known bits are -/// those that are shared by every vector element. +/// them in the KnownZero/KnownOne bitsets. void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero, APInt &KnownOne, unsigned Depth) const { - EVT VT = Op.getValueType(); - APInt DemandedElts = VT.isVector() - ? APInt::getAllOnesValue(VT.getVectorNumElements()) - : APInt(1, 1); - computeKnownBits(Op, KnownZero, KnownOne, DemandedElts, Depth); -} - -/// Determine which bits of Op are known to be either zero or one and return -/// them in the KnownZero/KnownOne bitsets. The DemandedElts argument allows -/// us to only collect the known bits that are shared by the requested vector -/// elements. -/// TODO: We only support DemandedElts on a few opcodes so far, the remainder -/// should be added when they become necessary. -void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero, - APInt &KnownOne, const APInt &DemandedElts, - unsigned Depth) const { unsigned BitWidth = Op.getScalarValueSizeInBits(); KnownZero = KnownOne = APInt(BitWidth, 0); // Don't know anything. @@ -2034,10 +2017,6 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero, return; // Limit search depth. APInt KnownZero2, KnownOne2; - unsigned NumElts = DemandedElts.getBitWidth(); - - if (DemandedElts == APInt(NumElts, 0)) - return; // No demanded elts, better to assume we don't know anything. switch (Op.getOpcode()) { case ISD::Constant: @@ -2046,15 +2025,9 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero, KnownZero = ~KnownOne; break; case ISD::BUILD_VECTOR: - // Collect the known bits that are shared by every demanded vector element. - assert(NumElts == Op.getValueType().getVectorNumElements() && - "Unexpected vector size"); + // Collect the known bits that are shared by every vector element. KnownZero = KnownOne = APInt::getAllOnesValue(BitWidth); - for (unsigned i = 0, e = Op.getNumOperands(); i != e; ++i) { - if (!DemandedElts[i]) - continue; - - SDValue SrcOp = Op.getOperand(i); + for (SDValue SrcOp : Op->ops()) { computeKnownBits(SrcOp, KnownZero2, KnownOne2, Depth + 1); // BUILD_VECTOR can implicitly truncate sources, we must handle this. @@ -2065,72 +2038,16 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero, KnownZero2 = KnownZero2.trunc(BitWidth); } - // Known bits are the values that are shared by every demanded element. - KnownOne &= KnownOne2; - KnownZero &= KnownZero2; - } - break; - case ISD::VECTOR_SHUFFLE: { - // Collect the known bits that are shared by every vector element referenced - // by the shuffle. - APInt DemandedLHS(NumElts, 0), DemandedRHS(NumElts, 0); - KnownZero = KnownOne = APInt::getAllOnesValue(BitWidth); - const ShuffleVectorSDNode *SVN = cast(Op); - assert(NumElts == SVN->getMask().size() && "Unexpected vector size"); - for (unsigned i = 0; i != NumElts; ++i) { - int M = SVN->getMaskElt(i); - if (M < 0) { - // For UNDEF elements, we don't know anything about the common state of - // the shuffle result. - // FIXME: Is this too pessimistic? - KnownZero = KnownOne = APInt(BitWidth, 0); - break; - } - if (!DemandedElts[i]) - continue; - - if ((unsigned)M < NumElts) - DemandedLHS.setBit((unsigned)M % NumElts); - else - DemandedRHS.setBit((unsigned)M % NumElts); - } - // Known bits are the values that are shared by every demanded element. - if (DemandedLHS != APInt(NumElts, 0)) { - SDValue LHS = Op.getOperand(0); - computeKnownBits(LHS, KnownZero2, KnownOne2, DemandedLHS, Depth + 1); + // Known bits are the values that are shared by every element. + // TODO: support per-element known bits. KnownOne &= KnownOne2; KnownZero &= KnownZero2; } - if (DemandedRHS != APInt(NumElts, 0)) { - SDValue RHS = Op.getOperand(1); - computeKnownBits(RHS, KnownZero2, KnownOne2, DemandedRHS, Depth + 1); - KnownOne &= KnownOne2; - KnownZero &= KnownZero2; - } - break; - } - case ISD::EXTRACT_SUBVECTOR: { - // If we know the element index, just demand that subvector elements, - // otherwise demand them all. - SDValue Src = Op.getOperand(0); - ConstantSDNode *SubIdx = dyn_cast(Op.getOperand(1)); - unsigned NumSrcElts = Src.getValueType().getVectorNumElements(); - if (SubIdx && SubIdx->getAPIntValue().ule(NumSrcElts - NumElts)) { - // Offset the demanded elts by the subvector index. - uint64_t Idx = SubIdx->getZExtValue(); - APInt DemandedSrc = DemandedElts.zext(NumSrcElts).shl(Idx); - computeKnownBits(Src, KnownZero, KnownOne, DemandedSrc, Depth + 1); - } else { - computeKnownBits(Src, KnownZero, KnownOne, Depth + 1); - } break; - } case ISD::AND: // If either the LHS or the RHS are Zero, the result is zero. - computeKnownBits(Op.getOperand(1), KnownZero, KnownOne, DemandedElts, - Depth + 1); - computeKnownBits(Op.getOperand(0), KnownZero2, KnownOne2, DemandedElts, - Depth + 1); + computeKnownBits(Op.getOperand(1), KnownZero, KnownOne, Depth+1); + computeKnownBits(Op.getOperand(0), KnownZero2, KnownOne2, Depth+1); // Output known-1 bits are only known if set in both the LHS & RHS. KnownOne &= KnownOne2; @@ -2289,8 +2206,7 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero, if (NewBits.getBoolValue()) InputDemandedBits |= InSignBit; - computeKnownBits(Op.getOperand(0), KnownZero, KnownOne, DemandedElts, - Depth + 1); + computeKnownBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1); KnownOne &= InputDemandedBits; KnownZero &= InputDemandedBits; @@ -2337,8 +2253,7 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero, APInt NewBits = APInt::getHighBitsSet(BitWidth, BitWidth - InBits); KnownZero = KnownZero.trunc(InBits); KnownOne = KnownOne.trunc(InBits); - computeKnownBits(Op.getOperand(0), KnownZero, KnownOne, DemandedElts, - Depth + 1); + computeKnownBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1); KnownZero = KnownZero.zext(BitWidth); KnownOne = KnownOne.zext(BitWidth); KnownZero |= NewBits; @@ -2351,8 +2266,7 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero, KnownZero = KnownZero.trunc(InBits); KnownOne = KnownOne.trunc(InBits); - computeKnownBits(Op.getOperand(0), KnownZero, KnownOne, DemandedElts, - Depth + 1); + computeKnownBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1); // Note if the sign bit is known to be zero or one. bool SignBitKnownZero = KnownZero.isNegative(); @@ -2525,28 +2439,16 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero, // At the moment we keep this simple and skip tracking the specific // element. This way we get the lowest common denominator for all elements // of the vector. - SDValue InVec = Op.getOperand(0); - SDValue EltNo = Op.getOperand(1); - EVT VecVT = InVec.getValueType(); + // TODO: get information for given vector element const unsigned BitWidth = Op.getValueSizeInBits(); - const unsigned EltBitWidth = VecVT.getScalarSizeInBits(); - const unsigned NumSrcElts = VecVT.getVectorNumElements(); + const unsigned EltBitWidth = Op.getOperand(0).getScalarValueSizeInBits(); // If BitWidth > EltBitWidth the value is anyext:ed. So we do not know // anything about the extended bits. if (BitWidth > EltBitWidth) { KnownZero = KnownZero.trunc(EltBitWidth); KnownOne = KnownOne.trunc(EltBitWidth); } - ConstantSDNode *ConstEltNo = dyn_cast(EltNo); - if (ConstEltNo && ConstEltNo->getAPIntValue().ult(NumSrcElts)) { - // If we know the element index, just demand that vector element. - unsigned Idx = ConstEltNo->getZExtValue(); - APInt DemandedElt = APInt::getOneBitSet(NumSrcElts, Idx); - computeKnownBits(InVec, KnownZero, KnownOne, DemandedElt, Depth + 1); - } else { - // Unknown element index, so ignore DemandedElts and demand them all. - computeKnownBits(InVec, KnownZero, KnownOne, Depth + 1); - } + computeKnownBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1); if (BitWidth > EltBitWidth) { KnownZero = KnownZero.zext(BitWidth); KnownOne = KnownOne.zext(BitWidth); diff --git a/llvm/test/CodeGen/X86/avx-vperm2x128.ll b/llvm/test/CodeGen/X86/avx-vperm2x128.ll index f7f54a0..740fd77 100644 --- a/llvm/test/CodeGen/X86/avx-vperm2x128.ll +++ b/llvm/test/CodeGen/X86/avx-vperm2x128.ll @@ -465,9 +465,11 @@ define <4 x i64> @shuffle_v4i64_67zz(<4 x i64> %a, <4 x i64> %b) { ; AVX1-LABEL: shuffle_v4i64_67zz: ; AVX1: ## BB#0: ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],zero,zero +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 +; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3 +; AVX1-NEXT: vpaddq %xmm2, %xmm3, %xmm2 ; AVX1-NEXT: vpaddq %xmm0, %xmm1, %xmm0 -; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1 -; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 +; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 ; AVX1-NEXT: retq ; ; AVX2-LABEL: shuffle_v4i64_67zz: diff --git a/llvm/test/CodeGen/X86/known-bits-vector.ll b/llvm/test/CodeGen/X86/known-bits-vector.ll index 142fbed..e124eda 100644 --- a/llvm/test/CodeGen/X86/known-bits-vector.ll +++ b/llvm/test/CodeGen/X86/known-bits-vector.ll @@ -5,14 +5,16 @@ define i32 @knownbits_mask_extract_sext(<8 x i16> %a0) nounwind { ; X32-LABEL: knownbits_mask_extract_sext: ; X32: # BB#0: -; X32-NEXT: vpand {{\.LCPI.*}}, %xmm0, %xmm0 -; X32-NEXT: vpextrw $0, %xmm0, %eax +; X32-NEXT: vandps {{\.LCPI.*}}, %xmm0, %xmm0 +; X32-NEXT: vmovd %xmm0, %eax +; X32-NEXT: cwtl ; X32-NEXT: retl ; ; X64-LABEL: knownbits_mask_extract_sext: ; X64: # BB#0: -; X64-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0 -; X64-NEXT: vpextrw $0, %xmm0, %eax +; X64-NEXT: vandps {{.*}}(%rip), %xmm0, %xmm0 +; X64-NEXT: vmovd %xmm0, %eax +; X64-NEXT: cwtl ; X64-NEXT: retq %1 = and <8 x i16> %a0, %2 = extractelement <8 x i16> %1, i32 0 @@ -29,10 +31,17 @@ define float @knownbits_mask_extract_uitofp(<2 x i64> %a0) nounwind { ; X32-NEXT: subl $16, %esp ; X32-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; X32-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3],xmm0[4,5,6,7] +; X32-NEXT: vpextrd $1, %xmm0, %eax ; X32-NEXT: vmovq %xmm0, {{[0-9]+}}(%esp) +; X32-NEXT: xorl %ecx, %ecx +; X32-NEXT: testl %eax, %eax +; X32-NEXT: setns %cl ; X32-NEXT: fildll {{[0-9]+}}(%esp) +; X32-NEXT: fadds {{\.LCPI.*}}(,%ecx,4) ; X32-NEXT: fstps {{[0-9]+}}(%esp) -; X32-NEXT: flds {{[0-9]+}}(%esp) +; X32-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; X32-NEXT: vmovss %xmm0, (%esp) +; X32-NEXT: flds (%esp) ; X32-NEXT: movl %ebp, %esp ; X32-NEXT: popl %ebp ; X32-NEXT: retl @@ -42,7 +51,18 @@ define float @knownbits_mask_extract_uitofp(<2 x i64> %a0) nounwind { ; X64-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; X64-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3],xmm0[4,5,6,7] ; X64-NEXT: vmovq %xmm0, %rax +; X64-NEXT: testq %rax, %rax +; X64-NEXT: js .LBB1_1 +; X64-NEXT: # BB#2: +; X64-NEXT: vcvtsi2ssq %rax, %xmm2, %xmm0 +; X64-NEXT: retq +; X64-NEXT: .LBB1_1: +; X64-NEXT: movq %rax, %rcx +; X64-NEXT: shrq %rcx +; X64-NEXT: andl $1, %eax +; X64-NEXT: orq %rcx, %rax ; X64-NEXT: vcvtsi2ssq %rax, %xmm2, %xmm0 +; X64-NEXT: vaddss %xmm0, %xmm0, %xmm0 ; X64-NEXT: retq %1 = and <2 x i64> %a0, %2 = extractelement <2 x i64> %1, i32 0 @@ -54,15 +74,15 @@ define <4 x i32> @knownbits_mask_shuffle_sext(<8 x i16> %a0) nounwind { ; X32-LABEL: knownbits_mask_shuffle_sext: ; X32: # BB#0: ; X32-NEXT: vpand {{\.LCPI.*}}, %xmm0, %xmm0 -; X32-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; X32-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] +; X32-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] +; X32-NEXT: vpmovsxwd %xmm0, %xmm0 ; X32-NEXT: retl ; ; X64-LABEL: knownbits_mask_shuffle_sext: ; X64: # BB#0: ; X64-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0 -; X64-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; X64-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] +; X64-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] +; X64-NEXT: vpmovsxwd %xmm0, %xmm0 ; X64-NEXT: retq %1 = and <8 x i16> %a0, %2 = shufflevector <8 x i16> %1, <8 x i16> undef, <4 x i32> @@ -75,14 +95,22 @@ define <4 x float> @knownbits_mask_shuffle_uitofp(<4 x i32> %a0) nounwind { ; X32: # BB#0: ; X32-NEXT: vpand {{\.LCPI.*}}, %xmm0, %xmm0 ; X32-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,2,3,3] -; X32-NEXT: vcvtdq2ps %xmm0, %xmm0 +; X32-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0],mem[1],xmm0[2],mem[3],xmm0[4],mem[5],xmm0[6],mem[7] +; X32-NEXT: vpsrld $16, %xmm0, %xmm0 +; X32-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],mem[1],xmm0[2],mem[3],xmm0[4],mem[5],xmm0[6],mem[7] +; X32-NEXT: vaddps {{\.LCPI.*}}, %xmm0, %xmm0 +; X32-NEXT: vaddps %xmm0, %xmm1, %xmm0 ; X32-NEXT: retl ; ; X64-LABEL: knownbits_mask_shuffle_uitofp: ; X64: # BB#0: ; X64-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0 ; X64-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,2,3,3] -; X64-NEXT: vcvtdq2ps %xmm0, %xmm0 +; X64-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0],mem[1],xmm0[2],mem[3],xmm0[4],mem[5],xmm0[6],mem[7] +; X64-NEXT: vpsrld $16, %xmm0, %xmm0 +; X64-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],mem[1],xmm0[2],mem[3],xmm0[4],mem[5],xmm0[6],mem[7] +; X64-NEXT: vaddps {{.*}}(%rip), %xmm0, %xmm0 +; X64-NEXT: vaddps %xmm0, %xmm1, %xmm0 ; X64-NEXT: retq %1 = and <4 x i32> %a0, %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> -- 2.7.4