From 3e2de767f621bf277dfdb9337bf87e49ae53d3a8 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Thu, 20 Sep 2018 13:10:22 +0000 Subject: [PATCH] [X86][SSE] Remove UNPCKL(SHUFFLE)->UNPCKH custom combine This can be achieved more generally by combineX86ShufflesRecursively. llvm-svn: 342645 --- llvm/lib/Target/X86/X86ISelLowering.cpp | 34 --------------------------------- 1 file changed, 34 deletions(-) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index e48dcaf..2da9222 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -31087,40 +31087,6 @@ static SDValue combineTargetShuffle(SDValue N, SelectionDAG &DAG, Mask = getPSHUFShuffleMask(N); assert(Mask.size() == 4); break; - case X86ISD::UNPCKL: { - // Combine X86ISD::UNPCKL and ISD::VECTOR_SHUFFLE into X86ISD::UNPCKH, in - // which X86ISD::UNPCKL has a ISD::UNDEF operand, and ISD::VECTOR_SHUFFLE - // moves upper half elements into the lower half part. For example: - // - // t2: v16i8 = vector_shuffle<8,9,10,11,12,13,14,15,u,u,u,u,u,u,u,u> t1, - // undef:v16i8 - // t3: v16i8 = X86ISD::UNPCKL undef:v16i8, t2 - // - // will be combined to: - // - // t3: v16i8 = X86ISD::UNPCKH undef:v16i8, t1 - - // This is only for 128-bit vectors. From SSE4.1 onward this combine may not - // happen due to advanced instructions. - if (!VT.is128BitVector()) - return SDValue(); - - auto Op0 = N.getOperand(0); - auto Op1 = N.getOperand(1); - if (Op0.isUndef() && Op1.getOpcode() == ISD::VECTOR_SHUFFLE) { - ArrayRef Mask = cast(Op1.getNode())->getMask(); - - unsigned NumElts = VT.getVectorNumElements(); - SmallVector ExpectedMask(NumElts, -1); - std::iota(ExpectedMask.begin(), ExpectedMask.begin() + NumElts / 2, - NumElts / 2); - - auto ShufOp = Op1.getOperand(0); - if (isShuffleEquivalent(Op1, ShufOp, Mask, ExpectedMask)) - return DAG.getNode(X86ISD::UNPCKH, DL, VT, N.getOperand(0), ShufOp); - } - return SDValue(); - } case X86ISD::MOVSD: case X86ISD::MOVSS: { SDValue N0 = N.getOperand(0); -- 2.7.4