From 7324e845a44fe010dc712feebba7b2c0c0715694 Mon Sep 17 00:00:00 2001 From: Tim Northover Date: Thu, 24 Jul 2014 15:39:55 +0000 Subject: [PATCH] AArch64: refactor ReconstructShuffle function Quite a bit of cruft had accumulated as we realised the various different cases it had to handle and squeezed them in where possible. This refactoring mostly flattens the logic and special-cases. The result is slightly longer, but I think clearer. Should be no functionality change. llvm-svn: 213867 --- llvm/lib/Target/AArch64/AArch64ISelLowering.cpp | 233 +++++++++++++----------- 1 file changed, 124 insertions(+), 109 deletions(-) diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 88ec06f..e186c10 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -4138,10 +4138,30 @@ SDValue AArch64TargetLowering::ReconstructShuffle(SDValue Op, EVT VT = Op.getValueType(); unsigned NumElts = VT.getVectorNumElements(); - SmallVector SourceVecs; - SmallVector MinElts; - SmallVector MaxElts; + struct ShuffleSourceInfo { + SDValue Vec; + unsigned MinElt; + unsigned MaxElt; + + // We may insert some combination of BITCASTs and VEXT nodes to force Vec to + // be compatible with the shuffle we intend to construct. As a result + // ShuffleVec will be some sliding window into the original Vec. + SDValue ShuffleVec; + + // Code should guarantee that element i in Vec starts at element "WindowBase + // + i * WindowScale in ShuffleVec". + int WindowBase; + int WindowScale; + + bool operator ==(SDValue OtherVec) { return Vec == OtherVec; } + ShuffleSourceInfo(SDValue Vec) + : Vec(Vec), MinElt(UINT_MAX), MaxElt(0), ShuffleVec(Vec), WindowBase(0), + WindowScale(1) {} + }; + // First gather all vectors used as an immediate source for this BUILD_VECTOR + // node. + SmallVector Sources; for (unsigned i = 0; i < NumElts; ++i) { SDValue V = Op.getOperand(i); if (V.getOpcode() == ISD::UNDEF) @@ -4152,158 +4172,153 @@ SDValue AArch64TargetLowering::ReconstructShuffle(SDValue Op, return SDValue(); } - // Record this extraction against the appropriate vector if possible... + // Add this element source to the list if it's not already there. SDValue SourceVec = V.getOperand(0); - unsigned EltNo = cast(V.getOperand(1))->getZExtValue(); - bool FoundSource = false; - for (unsigned j = 0; j < SourceVecs.size(); ++j) { - if (SourceVecs[j] == SourceVec) { - if (MinElts[j] > EltNo) - MinElts[j] = EltNo; - if (MaxElts[j] < EltNo) - MaxElts[j] = EltNo; - FoundSource = true; - break; - } - } + auto Source = std::find(Sources.begin(), Sources.end(), SourceVec); + if (Source == Sources.end()) + Sources.push_back(ShuffleSourceInfo(SourceVec)); - // Or record a new source if not... - if (!FoundSource) { - SourceVecs.push_back(SourceVec); - MinElts.push_back(EltNo); - MaxElts.push_back(EltNo); - } + // Update the minimum and maximum lane number seen. + unsigned EltNo = cast(V.getOperand(1))->getZExtValue(); + Source->MinElt = std::min(Source->MinElt, EltNo); + Source->MaxElt = std::max(Source->MaxElt, EltNo); } // Currently only do something sane when at most two source vectors - // involved. - if (SourceVecs.size() > 2) + // are involved. + if (Sources.size() > 2) return SDValue(); // Find out the smallest element size among result and two sources, and use // it as element size to build the shuffle_vector. EVT SmallestEltTy = VT.getVectorElementType(); - for (unsigned i = 0; i < SourceVecs.size(); ++i) { - EVT SrcEltTy = SourceVecs[i].getValueType().getVectorElementType(); + for (auto &Source : Sources) { + EVT SrcEltTy = Source.Vec.getValueType().getVectorElementType(); if (SrcEltTy.bitsLT(SmallestEltTy)) { SmallestEltTy = SrcEltTy; } } unsigned ResMultiplier = VT.getVectorElementType().getSizeInBits() / SmallestEltTy.getSizeInBits(); - int VEXTOffsets[2] = { 0, 0 }; - int OffsetMultipliers[2] = { 1, 1 }; NumElts = VT.getSizeInBits() / SmallestEltTy.getSizeInBits(); EVT ShuffleVT = EVT::getVectorVT(*DAG.getContext(), SmallestEltTy, NumElts); - SDValue ShuffleSrcs[2] = {DAG.getUNDEF(ShuffleVT), DAG.getUNDEF(ShuffleVT)}; - - // This loop extracts the usage patterns of the source vectors - // and prepares appropriate SDValues for a shuffle if possible. - for (unsigned i = 0; i < SourceVecs.size(); ++i) { - unsigned NumSrcElts = SourceVecs[i].getValueType().getVectorNumElements(); - SDValue CurSource = SourceVecs[i]; - if (SourceVecs[i].getValueType().getVectorElementType() != - ShuffleVT.getVectorElementType()) { - // As ShuffleVT holds smallest element size, it may hit here only if - // the element type of SourceVecs is bigger than that of ShuffleVT. - // Adjust the element size of SourceVecs to match ShuffleVT, and record - // the multipliers. - EVT CastVT = EVT::getVectorVT( - *DAG.getContext(), ShuffleVT.getVectorElementType(), - SourceVecs[i].getValueSizeInBits() / - ShuffleVT.getVectorElementType().getSizeInBits()); - - CurSource = DAG.getNode(ISD::BITCAST, dl, CastVT, SourceVecs[i]); - OffsetMultipliers[i] = CastVT.getVectorNumElements() / NumSrcElts; - NumSrcElts *= OffsetMultipliers[i]; - MaxElts[i] *= OffsetMultipliers[i]; - MinElts[i] *= OffsetMultipliers[i]; - } - if (CurSource.getValueType() == ShuffleVT) { - // No VEXT necessary - ShuffleSrcs[i] = CurSource; - VEXTOffsets[i] = 0; + // If the source vector is too wide or too narrow, we may nevertheless be able + // to construct a compatible shuffle either by concatenating it with UNDEF or + // extracting a suitable range of elements. + for (auto &Src : Sources) { + EVT SrcVT = Src.ShuffleVec.getValueType(); + + if (SrcVT.getSizeInBits() == VT.getSizeInBits()) continue; - } else if (NumSrcElts < NumElts) { + + // This stage of the search produces a source with the same element type as + // the original, but with a total width matching the BUILD_VECTOR output. + EVT EltVT = SrcVT.getVectorElementType(); + EVT DestVT = EVT::getVectorVT(*DAG.getContext(), EltVT, + VT.getSizeInBits() / EltVT.getSizeInBits()); + + if (SrcVT.getSizeInBits() < VT.getSizeInBits()) { + assert(2 * SrcVT.getSizeInBits() == VT.getSizeInBits()); // We can pad out the smaller vector for free, so if it's part of a // shuffle... - ShuffleSrcs[i] = - DAG.getNode(ISD::CONCAT_VECTORS, dl, ShuffleVT, CurSource, - DAG.getUNDEF(CurSource.getValueType())); + Src.ShuffleVec = + DAG.getNode(ISD::CONCAT_VECTORS, dl, DestVT, Src.ShuffleVec, + DAG.getUNDEF(Src.ShuffleVec.getValueType())); continue; } - // Since only 64-bit and 128-bit vectors are legal on ARM and - // we've eliminated the other cases... - assert(NumSrcElts == 2 * NumElts && - "unexpected vector sizes in ReconstructShuffle"); + assert(SrcVT.getSizeInBits() == 2 * VT.getSizeInBits()); - if (MaxElts[i] - MinElts[i] >= NumElts) { + if (Src.MaxElt - Src.MinElt >= NumElts) { // Span too large for a VEXT to cope return SDValue(); } - if (MinElts[i] >= NumElts) { + if (Src.MinElt >= NumElts) { // The extraction can just take the second half - VEXTOffsets[i] = NumElts; - ShuffleSrcs[i] = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, ShuffleVT, - CurSource, DAG.getIntPtrConstant(NumElts)); - } else if (MaxElts[i] < NumElts) { + Src.ShuffleVec = + DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, Src.ShuffleVec, + DAG.getIntPtrConstant(NumElts)); + Src.WindowBase = -NumElts; + } else if (Src.MaxElt < NumElts) { // The extraction can just take the first half - VEXTOffsets[i] = 0; - ShuffleSrcs[i] = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, ShuffleVT, - CurSource, DAG.getIntPtrConstant(0)); + Src.ShuffleVec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, + Src.ShuffleVec, DAG.getIntPtrConstant(0)); } else { // An actual VEXT is needed - VEXTOffsets[i] = MinElts[i]; - SDValue VEXTSrc1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, ShuffleVT, - CurSource, DAG.getIntPtrConstant(0)); - SDValue VEXTSrc2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, ShuffleVT, - CurSource, DAG.getIntPtrConstant(NumElts)); - unsigned Imm = VEXTOffsets[i] * getExtFactor(VEXTSrc1); - ShuffleSrcs[i] = DAG.getNode(AArch64ISD::EXT, dl, ShuffleVT, VEXTSrc1, + SDValue VEXTSrc1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, + Src.ShuffleVec, DAG.getIntPtrConstant(0)); + SDValue VEXTSrc2 = + DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, Src.ShuffleVec, + DAG.getIntPtrConstant(NumElts)); + unsigned Imm = Src.MinElt * getExtFactor(VEXTSrc1); + + Src.ShuffleVec = DAG.getNode(AArch64ISD::EXT, dl, DestVT, VEXTSrc1, VEXTSrc2, DAG.getConstant(Imm, MVT::i32)); + Src.WindowBase = -Src.MinElt; } } - SmallVector Mask; - unsigned VTEltSize = VT.getVectorElementType().getSizeInBits(); + // Another possible incompatibility occurs from the vector element types. We + // can fix this by bitcasting the source vectors to the same type we intend + // for the shuffle. + for (auto &Src : Sources) { + EVT SrcEltTy = Src.ShuffleVec.getValueType().getVectorElementType(); + if (SrcEltTy == SmallestEltTy) + continue; + assert(ShuffleVT.getVectorElementType() == SmallestEltTy); + Src.ShuffleVec = DAG.getNode(ISD::BITCAST, dl, ShuffleVT, Src.ShuffleVec); + Src.WindowScale = SrcEltTy.getSizeInBits() / SmallestEltTy.getSizeInBits(); + Src.WindowBase *= Src.WindowScale; + } + // Final sanity check before we try to actually produce a shuffle. + DEBUG( + for (auto Src : Sources) + assert(Src.ShuffleVec.getValueType() == ShuffleVT); + ); + + // The stars all align, our next step is to produce the mask for the shuffle. + SmallVector Mask(ShuffleVT.getVectorNumElements(), -1); + int BitsPerShuffleLane = ShuffleVT.getVectorElementType().getSizeInBits(); for (unsigned i = 0; i < VT.getVectorNumElements(); ++i) { SDValue Entry = Op.getOperand(i); - int SourceNum = 1; - unsigned LanePartNum = 0; - int ExtractElt; - if (Entry.getOpcode() != ISD::UNDEF) { - // Check how many parts of source lane should be inserted. - SDValue ExtractVec = Entry.getOperand(0); - if (ExtractVec == SourceVecs[0]) - SourceNum = 0; - ExtractElt = cast(Entry.getOperand(1))->getSExtValue(); - unsigned ExtEltSize = - ExtractVec.getValueType().getVectorElementType().getSizeInBits(); - unsigned SmallerSize = ExtEltSize < VTEltSize ? ExtEltSize : VTEltSize; - LanePartNum = SmallerSize / SmallestEltTy.getSizeInBits(); - } + if (Entry.getOpcode() == ISD::UNDEF) + continue; - for (unsigned j = 0; j != ResMultiplier; ++j) { - if (j < LanePartNum) - Mask.push_back(ExtractElt * OffsetMultipliers[SourceNum] + - NumElts * SourceNum - VEXTOffsets[SourceNum] + j); - else - Mask.push_back(-1); - } + auto Src = std::find(Sources.begin(), Sources.end(), Entry.getOperand(0)); + int EltNo = cast(Entry.getOperand(1))->getSExtValue(); + + // EXTRACT_VECTOR_ELT performs an implicit any_ext; BUILD_VECTOR an implicit + // trunc. So only std::min(SrcBits, DestBits) actually get defined in this + // segment. + EVT OrigEltTy = Entry.getOperand(0).getValueType().getVectorElementType(); + int BitsDefined = std::min(OrigEltTy.getSizeInBits(), + VT.getVectorElementType().getSizeInBits()); + int LanesDefined = BitsDefined / BitsPerShuffleLane; + + // This source is expected to fill ResMultiplier lanes of the final shuffle, + // starting at the appropriate offset. + int *LaneMask = &Mask[i * ResMultiplier]; + + int ExtractBase = EltNo * Src->WindowScale + Src->WindowBase; + ExtractBase += NumElts * (Src - Sources.begin()); + for (int j = 0; j < LanesDefined; ++j) + LaneMask[j] = ExtractBase + j; } // Final check before we try to produce nonsense... - if (isShuffleMaskLegal(Mask, ShuffleVT)) { - SDValue Shuffle = DAG.getVectorShuffle(ShuffleVT, dl, ShuffleSrcs[0], - ShuffleSrcs[1], &Mask[0]); - return DAG.getNode(ISD::BITCAST, dl, VT, Shuffle); - } + if (!isShuffleMaskLegal(Mask, ShuffleVT)) + return SDValue(); - return SDValue(); + SDValue ShuffleOps[] = { DAG.getUNDEF(ShuffleVT), DAG.getUNDEF(ShuffleVT) }; + for (unsigned i = 0; i < Sources.size(); ++i) + ShuffleOps[i] = Sources[i].ShuffleVec; + + SDValue Shuffle = DAG.getVectorShuffle(ShuffleVT, dl, ShuffleOps[0], + ShuffleOps[1], &Mask[0]); + return DAG.getNode(ISD::BITCAST, dl, VT, Shuffle); } // check if an EXT instruction can handle the shuffle mask when the -- 2.7.4