From 1c048ab6ba2321be0ae4a6dd8052b12f21963943 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Thu, 23 Mar 2017 16:09:34 +0000 Subject: [PATCH] [X86][SSE] Extract elements from narrower shuffle masks. Add support for widening narrow shuffle masks so we can directly extract from the relevant input vector of the shuffle. llvm-svn: 298616 --- llvm/lib/Target/X86/X86ISelLowering.cpp | 35 +++++++++++++++----------- llvm/test/CodeGen/X86/known-signbits-vector.ll | 7 ++---- 2 files changed, 23 insertions(+), 19 deletions(-) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index f4685c1..d7fe86c 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -29165,9 +29165,10 @@ static SDValue combineExtractWithShuffle(SDNode *N, SelectionDAG &DAG, SDValue Src = N->getOperand(0); SDValue Idx = N->getOperand(1); + EVT VT = N->getValueType(0); EVT SrcVT = Src.getValueType(); EVT SrcSVT = SrcVT.getVectorElementType(); - EVT VT = N->getValueType(0); + unsigned NumSrcElts = SrcVT.getVectorNumElements(); // Don't attempt this for boolean mask vectors or unknown extraction indices. if (SrcSVT == MVT::i1 || !isa(Idx)) @@ -29179,21 +29180,27 @@ static SDValue combineExtractWithShuffle(SDNode *N, SelectionDAG &DAG, if (!resolveTargetShuffleInputs(peekThroughBitcasts(Src), Ops, Mask)) return SDValue(); - // At the moment we can only narrow a shuffle mask to handle extractions - // of smaller scalars. - // TODO - investigate support for wider shuffle masks with known upper - // undef/zero elements for implicit zero-extension. - unsigned NumMaskElts = Mask.size(); - if ((SrcVT.getVectorNumElements() % NumMaskElts) != 0) - return SDValue(); - - int Scale = SrcVT.getVectorNumElements() / NumMaskElts; - if (Scale != 1) { - SmallVector ScaledMask; - scaleShuffleMask(Scale, Mask, ScaledMask); - Mask = ScaledMask; + // Attempt to narrow/widen the shuffle mask to the correct size. + if (Mask.size() != NumSrcElts) { + if ((NumSrcElts % Mask.size()) == 0) { + SmallVector ScaledMask; + int Scale = NumSrcElts / Mask.size(); + scaleShuffleMask(Scale, Mask, ScaledMask); + Mask = std::move(ScaledMask); + } else if ((Mask.size() % NumSrcElts) == 0) { + SmallVector WidenedMask; + while (Mask.size() > NumSrcElts && + canWidenShuffleElements(Mask, WidenedMask)) + Mask = std::move(WidenedMask); + // TODO - investigate support for wider shuffle masks with known upper + // undef/zero elements for implicit zero-extension. + } } + // Check if narrowing/widening failed. + if (Mask.size() != NumSrcElts) + return SDValue(); + int SrcIdx = Mask[N->getConstantOperandVal(1)]; SDLoc dl(N); diff --git a/llvm/test/CodeGen/X86/known-signbits-vector.ll b/llvm/test/CodeGen/X86/known-signbits-vector.ll index 6922bf0..f2b2d34 100644 --- a/llvm/test/CodeGen/X86/known-signbits-vector.ll +++ b/llvm/test/CodeGen/X86/known-signbits-vector.ll @@ -76,11 +76,8 @@ define float @signbits_ashr_extract_sitofp(<2 x i64> %a0) nounwind { ; X32-LABEL: signbits_ashr_extract_sitofp: ; X32: # BB#0: ; X32-NEXT: pushl %eax -; X32-NEXT: vpsrad $31, %xmm0, %xmm1 -; X32-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] -; X32-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7] -; X32-NEXT: vmovd %xmm0, %eax -; X32-NEXT: vcvtsi2ssl %eax, %xmm2, %xmm0 +; X32-NEXT: vpextrd $1, %xmm0, %eax +; X32-NEXT: vcvtsi2ssl %eax, %xmm1, %xmm0 ; X32-NEXT: vmovss %xmm0, (%esp) ; X32-NEXT: flds (%esp) ; X32-NEXT: popl %eax -- 2.7.4