From: Simon Pilgrim Date: Thu, 31 Oct 2019 11:21:39 +0000 (+0000) Subject: [X86][SSE] Convert computeZeroableShuffleElements to emit KnownUndef and KnownZero X-Git-Tag: llvmorg-11-init~5374 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=a780b94cd1b03c3642a2662ea59dd6cfa7d6b81f;p=platform%2Fupstream%2Fllvm.git [X86][SSE] Convert computeZeroableShuffleElements to emit KnownUndef and KnownZero --- diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index ee80d81..13d62dc 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -6732,7 +6732,7 @@ static bool getTargetShuffleAndZeroables(SDValue N, SmallVectorImpl &Mask, V1 = peekThroughBitcasts(V1); V2 = peekThroughBitcasts(V2); - assert((VT.getSizeInBits() % Mask.size()) == 0 && + assert((VT.getSizeInBits() % Size) == 0 && "Illegal split of shuffle value type"); unsigned EltSizeInBits = VT.getSizeInBits() / Size; @@ -10423,9 +10423,12 @@ static SDValue getV4X86ShuffleImm8ForMask(ArrayRef Mask, const SDLoc &DL, /// zero. Many x86 shuffles can zero lanes cheaply and we often want to handle /// as many lanes with this technique as possible to simplify the remaining /// shuffle. -static APInt computeZeroableShuffleElements(ArrayRef Mask, - SDValue V1, SDValue V2) { - APInt Zeroable(Mask.size(), 0); +static void computeZeroableShuffleElements(ArrayRef Mask, + SDValue V1, SDValue V2, + APInt &KnownUndef, APInt &KnownZero) { + int Size = Mask.size(); + KnownUndef = KnownZero = APInt::getNullValue(Size); + V1 = peekThroughBitcasts(V1); V2 = peekThroughBitcasts(V2); @@ -10433,14 +10436,18 @@ static APInt computeZeroableShuffleElements(ArrayRef Mask, bool V2IsZero = ISD::isBuildVectorAllZeros(V2.getNode()); int VectorSizeInBits = V1.getValueSizeInBits(); - int ScalarSizeInBits = VectorSizeInBits / Mask.size(); + int ScalarSizeInBits = VectorSizeInBits / Size; assert(!(VectorSizeInBits % ScalarSizeInBits) && "Illegal shuffle mask size"); - for (int i = 0, Size = Mask.size(); i < Size; ++i) { + for (int i = 0; i < Size; ++i) { int M = Mask[i]; // Handle the easy cases. - if (M < 0 || (M >= 0 && M < Size && V1IsZero) || (M >= Size && V2IsZero)) { - Zeroable.setBit(i); + if (M < 0) { + KnownUndef.setBit(i); + continue; + } + if ((M >= 0 && M < Size && V1IsZero) || (M >= Size && V2IsZero)) { + KnownZero.setBit(i); continue; } @@ -10457,20 +10464,20 @@ static APInt computeZeroableShuffleElements(ArrayRef Mask, if ((Size % V.getNumOperands()) == 0) { int Scale = Size / V->getNumOperands(); SDValue Op = V.getOperand(M / Scale); - if (Op.isUndef() || X86::isZeroNode(Op)) - Zeroable.setBit(i); + if (Op.isUndef()) + KnownUndef.setBit(i); + if (X86::isZeroNode(Op)) + KnownZero.setBit(i); else if (ConstantSDNode *Cst = dyn_cast(Op)) { APInt Val = Cst->getAPIntValue(); - Val.lshrInPlace((M % Scale) * ScalarSizeInBits); - Val = Val.getLoBits(ScalarSizeInBits); + Val = Val.extractBits(ScalarSizeInBits, (M % Scale) * ScalarSizeInBits); if (Val == 0) - Zeroable.setBit(i); + KnownZero.setBit(i); } else if (ConstantFPSDNode *Cst = dyn_cast(Op)) { APInt Val = Cst->getValueAPF().bitcastToAPInt(); - Val.lshrInPlace((M % Scale) * ScalarSizeInBits); - Val = Val.getLoBits(ScalarSizeInBits); + Val = Val.extractBits(ScalarSizeInBits, (M % Scale) * ScalarSizeInBits); if (Val == 0) - Zeroable.setBit(i); + KnownZero.setBit(i); } continue; } @@ -10479,18 +10486,20 @@ static APInt computeZeroableShuffleElements(ArrayRef Mask, // elements must be UNDEF or ZERO. if ((V.getNumOperands() % Size) == 0) { int Scale = V->getNumOperands() / Size; - bool AllZeroable = true; + bool AllUndef = true; + bool AllZero = true; for (int j = 0; j < Scale; ++j) { SDValue Op = V.getOperand((M * Scale) + j); - AllZeroable &= (Op.isUndef() || X86::isZeroNode(Op)); + AllUndef &= Op.isUndef(); + AllZero &= X86::isZeroNode(Op); } - if (AllZeroable) - Zeroable.setBit(i); + if (AllUndef) + KnownUndef.setBit(i); + if (AllZero) + KnownZero.setBit(i); continue; } } - - return Zeroable; } // The Shuffle result is as follow: @@ -17077,7 +17086,10 @@ static SDValue lowerVectorShuffle(SDValue Op, const X86Subtarget &Subtarget, // We actually see shuffles that are entirely re-arrangements of a set of // zero inputs. This mostly happens while decomposing complex shuffles into // simple ones. Directly lower these as a buildvector of zeros. - APInt Zeroable = computeZeroableShuffleElements(OrigMask, V1, V2); + APInt KnownUndef, KnownZero; + computeZeroableShuffleElements(OrigMask, V1, V2, KnownUndef, KnownZero); + + APInt Zeroable = KnownUndef | KnownZero; if (Zeroable.isAllOnesValue()) return getZeroVector(VT, Subtarget, DAG, DL);