From 0a429f040e338a1f126bb80e263918d8f8641ff2 Mon Sep 17 00:00:00 2001 From: Tony Jiang Date: Wed, 24 May 2017 23:48:29 +0000 Subject: [PATCH] [PowerPC] Fix a performance bug for PPC::XXSLDWI. There are some VectorShuffle Nodes in SDAG which can be selected to XXSLDWI instruction, this patch recognizes them and does the selection to improve the PPC performance. llvm-svn: 303822 --- llvm/lib/Target/PowerPC/PPCISelLowering.cpp | 88 +++++- llvm/lib/Target/PowerPC/PPCISelLowering.h | 6 +- llvm/lib/Target/PowerPC/PPCInstrInfo.td | 2 +- llvm/lib/Target/PowerPC/PPCInstrVSX.td | 4 + .../CodeGen/PowerPC/p8altivec-shuffles-pred.ll | 2 +- .../CodeGen/PowerPC/p9-xxinsertw-xxextractuw.ll | 64 ++--- llvm/test/CodeGen/PowerPC/pr27078.ll | 8 +- llvm/test/CodeGen/PowerPC/vec_sldwi.ll | 307 +++++++++++++++++++++ 8 files changed, 440 insertions(+), 41 deletions(-) create mode 100644 llvm/test/CodeGen/PowerPC/vec_sldwi.ll diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp index e65b1f1..2f64a3f 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -1596,9 +1596,8 @@ bool PPC::isSplatShuffleMask(ShuffleVectorSDNode *N, unsigned EltSize) { return true; } -bool PPC::isXXINSERTWMask(ShuffleVectorSDNode *N, unsigned &ShiftElts, - unsigned &InsertAtByte, bool &Swap, bool IsLE) { // Check that the mask is shuffling words +static bool isWordShuffleMask(ShuffleVectorSDNode *N) { for (unsigned i = 0; i < 4; ++i) { unsigned B0 = N->getMaskElt(i*4); unsigned B1 = N->getMaskElt(i*4+1); @@ -1610,6 +1609,14 @@ bool PPC::isXXINSERTWMask(ShuffleVectorSDNode *N, unsigned &ShiftElts, return false; } + return true; +} + +bool PPC::isXXINSERTWMask(ShuffleVectorSDNode *N, unsigned &ShiftElts, + unsigned &InsertAtByte, bool &Swap, bool IsLE) { + if (!isWordShuffleMask(N)) + return false; + // Now we look at mask elements 0,4,8,12 unsigned M0 = N->getMaskElt(0) / 4; unsigned M1 = N->getMaskElt(4) / 4; @@ -1680,6 +1687,69 @@ bool PPC::isXXINSERTWMask(ShuffleVectorSDNode *N, unsigned &ShiftElts, return false; } +bool PPC::isXXSLDWIShuffleMask(ShuffleVectorSDNode *N, unsigned &ShiftElts, + bool &Swap, bool IsLE) { + assert(N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8"); + // Ensure each byte index of the word is consecutive. + if (!isWordShuffleMask(N)) + return false; + + // Now we look at mask elements 0,4,8,12, which are the beginning of words. + unsigned M0 = N->getMaskElt(0) / 4; + unsigned M1 = N->getMaskElt(4) / 4; + unsigned M2 = N->getMaskElt(8) / 4; + unsigned M3 = N->getMaskElt(12) / 4; + + // If both vector operands for the shuffle are the same vector, the mask will + // contain only elements from the first one and the second one will be undef. + if (N->getOperand(1).isUndef()) { + assert(M0 < 4 && "Indexing into an undef vector?"); + if (M1 != (M0 + 1) % 4 || M2 != (M1 + 1) % 4 || M3 != (M2 + 1) % 4) + return false; + + ShiftElts = IsLE ? (4 - M0) % 4 : M0; + Swap = false; + return true; + } + + // Ensure each word index of the ShuffleVector Mask is consecutive. + if (M1 != (M0 + 1) % 8 || M2 != (M1 + 1) % 8 || M3 != (M2 + 1) % 8) + return false; + + if (IsLE) { + if (M0 == 0 || M0 == 7 || M0 == 6 || M0 == 5) { + // Input vectors don't need to be swapped if the leading element + // of the result is one of the 3 left elements of the second vector + // (or if there is no shift to be done at all). + Swap = false; + ShiftElts = (8 - M0) % 8; + } else if (M0 == 4 || M0 == 3 || M0 == 2 || M0 == 1) { + // Input vectors need to be swapped if the leading element + // of the result is one of the 3 left elements of the first vector + // (or if we're shifting by 4 - thereby simply swapping the vectors). + Swap = true; + ShiftElts = (4 - M0) % 4; + } + + return true; + } else { // BE + if (M0 == 0 || M0 == 1 || M0 == 2 || M0 == 3) { + // Input vectors don't need to be swapped if the leading element + // of the result is one of the 4 elements of the first vector. + Swap = false; + ShiftElts = M0; + } else if (M0 == 4 || M0 == 5 || M0 == 6 || M0 == 7) { + // Input vectors need to be swapped if the leading element + // of the result is one of the 4 elements of the right vector. + Swap = true; + ShiftElts = M0 - 4; + } + + return true; + } +} + + /// getVSPLTImmediate - Return the appropriate VSPLT* immediate to splat the /// specified isSplatShuffleMask VECTOR_SHUFFLE mask. unsigned PPC::getVSPLTImmediate(SDNode *N, unsigned EltSize, @@ -7679,6 +7749,20 @@ SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins); } + + if (Subtarget.hasVSX() && + PPC::isXXSLDWIShuffleMask(SVOp, ShiftElts, Swap, isLittleEndian)) { + if (Swap) + std::swap(V1, V2); + SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1); + SDValue Conv2 = + DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V2.isUndef() ? V1 : V2); + + SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v4i32, Conv1, Conv2, + DAG.getConstant(ShiftElts, dl, MVT::i32)); + return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Shl); + } + if (Subtarget.hasVSX()) { if (V2.isUndef() && PPC::isSplatShuffleMask(SVOp, 4)) { int SplatIdx = PPC::getVSPLTImmediate(SVOp, 4, DAG); diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.h b/llvm/lib/Target/PowerPC/PPCISelLowering.h index acb7794..2f9eb95 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.h +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.h @@ -450,7 +450,11 @@ namespace llvm { /// a VMRGEW or VMRGOW instruction bool isVMRGEOShuffleMask(ShuffleVectorSDNode *N, bool CheckEven, unsigned ShuffleKind, SelectionDAG &DAG); - + /// isXXSLDWIShuffleMask - Return true if this is a shuffle mask suitable + /// for a XXSLDWI instruction. + bool isXXSLDWIShuffleMask(ShuffleVectorSDNode *N, unsigned &ShiftElts, + bool &Swap, bool IsLE); + /// isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the /// shift amount, otherwise return -1. int isVSLDOIShuffleMask(SDNode *N, unsigned ShuffleKind, diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.td b/llvm/lib/Target/PowerPC/PPCInstrInfo.td index 0766cfe..26b99ec 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrInfo.td +++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.td @@ -46,7 +46,7 @@ def SDT_PPCVecSplat : SDTypeProfile<1, 2, [ SDTCisVec<0>, ]>; def SDT_PPCVecShift : SDTypeProfile<1, 3, [ SDTCisVec<0>, - SDTCisVec<1>, SDTCisVec<2>, SDTCisInt<3> + SDTCisVec<1>, SDTCisVec<2>, SDTCisPtrTy<3> ]>; def SDT_PPCVecInsert : SDTypeProfile<1, 3, [ SDTCisVec<0>, diff --git a/llvm/lib/Target/PowerPC/PPCInstrVSX.td b/llvm/lib/Target/PowerPC/PPCInstrVSX.td index 35a9cda..a4a8865 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrVSX.td +++ b/llvm/lib/Target/PowerPC/PPCInstrVSX.td @@ -1066,6 +1066,10 @@ def : Pat<(v4f32 (PPCxxswapd v4f32:$src)), (XXPERMDI $src, $src, 2)>; def : Pat<(v4i32 (PPCxxswapd v4i32:$src)), (XXPERMDI $src, $src, 2)>; def : Pat<(v2f64 (PPCswapNoChain v2f64:$src)), (XXPERMDI $src, $src, 2)>; +// PPCvecshl XT, XA, XA, 2 can be selected to both XXSLDWI XT,XA,XA,2 and +// XXSWAPD XT,XA (i.e. XXPERMDI XT,XA,XA,2), the later one is more profitable. +def : Pat<(v4i32 (PPCvecshl v4i32:$src, v4i32:$src, 2)), (XXPERMDI $src, $src, 2)>; + // Selects. def : Pat<(v2f64 (selectcc i1:$lhs, i1:$rhs, v2f64:$tval, v2f64:$fval, SETLT)), (SELECT_VSRC (CRANDC $lhs, $rhs), $tval, $fval)>; diff --git a/llvm/test/CodeGen/PowerPC/p8altivec-shuffles-pred.ll b/llvm/test/CodeGen/PowerPC/p8altivec-shuffles-pred.ll index 329f5bb..de930af 100644 --- a/llvm/test/CodeGen/PowerPC/p8altivec-shuffles-pred.ll +++ b/llvm/test/CodeGen/PowerPC/p8altivec-shuffles-pred.ll @@ -21,7 +21,7 @@ entry: ret <16 x i8> %strided.vec ; CHECK-LABEL: @test2 -; CHECK: vsldoi 2, 2, 2, 12 +; CHECK: xxsldwi 34, 34, 34, 3 ; CHECK: blr } diff --git a/llvm/test/CodeGen/PowerPC/p9-xxinsertw-xxextractuw.ll b/llvm/test/CodeGen/PowerPC/p9-xxinsertw-xxextractuw.ll index 2e625f2..fe34bcb 100644 --- a/llvm/test/CodeGen/PowerPC/p9-xxinsertw-xxextractuw.ll +++ b/llvm/test/CodeGen/PowerPC/p9-xxinsertw-xxextractuw.ll @@ -6,7 +6,7 @@ define <4 x float> @_Z7testInsILj0ELj0EDv4_fET1_S1_S1_(<4 x float> %a, <4 x float> %b) { entry: ; CHECK-LABEL: _Z7testInsILj0ELj0EDv4_fET1_S1_S1_ -; CHECK: xxsldwi 0, 35, 35, 2 +; CHECK: xxswapd 0, 35 ; CHECK: xxinsertw 34, 0, 12 ; CHECK-BE-LABEL: _Z7testInsILj0ELj0EDv4_fET1_S1_S1_ ; CHECK-BE: xxsldwi 0, 35, 35, 3 @@ -45,7 +45,7 @@ entry: ; CHECK: xxsldwi 0, 35, 35, 3 ; CHECK: xxinsertw 34, 0, 12 ; CHECK-BE-LABEL: _Z7testInsILj0ELj3EDv4_fET1_S1_S1_ -; CHECK-BE: xxsldwi 0, 35, 35, 2 +; CHECK-BE: xxswapd 0, 35 ; CHECK-BE: xxinsertw 34, 0, 0 %vecins = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> ret <4 x float> %vecins @@ -54,7 +54,7 @@ entry: define <4 x float> @_Z7testInsILj1ELj0EDv4_fET1_S1_S1_(<4 x float> %a, <4 x float> %b) { entry: ; CHECK-LABEL: _Z7testInsILj1ELj0EDv4_fET1_S1_S1_ -; CHECK: xxsldwi 0, 35, 35, 2 +; CHECK: xxswapd 0, 35 ; CHECK: xxinsertw 34, 0, 8 ; CHECK-BE-LABEL: _Z7testInsILj1ELj0EDv4_fET1_S1_S1_ ; CHECK-BE: xxsldwi 0, 35, 35, 3 @@ -93,7 +93,7 @@ entry: ; CHECK: xxsldwi 0, 35, 35, 3 ; CHECK: xxinsertw 34, 0, 8 ; CHECK-BE-LABEL: _Z7testInsILj1ELj3EDv4_fET1_S1_S1_ -; CHECK-BE: xxsldwi 0, 35, 35, 2 +; CHECK-BE: xxswapd 0, 35 ; CHECK-BE: xxinsertw 34, 0, 4 %vecins = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> ret <4 x float> %vecins @@ -102,7 +102,7 @@ entry: define <4 x float> @_Z7testInsILj2ELj0EDv4_fET1_S1_S1_(<4 x float> %a, <4 x float> %b) { entry: ; CHECK-LABEL: _Z7testInsILj2ELj0EDv4_fET1_S1_S1_ -; CHECK: xxsldwi 0, 35, 35, 2 +; CHECK: xxswapd 0, 35 ; CHECK: xxinsertw 34, 0, 4 ; CHECK-BE-LABEL: _Z7testInsILj2ELj0EDv4_fET1_S1_S1_ ; CHECK-BE: xxsldwi 0, 35, 35, 3 @@ -141,7 +141,7 @@ entry: ; CHECK: xxsldwi 0, 35, 35, 3 ; CHECK: xxinsertw 34, 0, 4 ; CHECK-BE-LABEL: _Z7testInsILj2ELj3EDv4_fET1_S1_S1_ -; CHECK-BE: xxsldwi 0, 35, 35, 2 +; CHECK-BE: xxswapd 0, 35 ; CHECK-BE: xxinsertw 34, 0, 8 %vecins = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> ret <4 x float> %vecins @@ -150,7 +150,7 @@ entry: define <4 x float> @_Z7testInsILj3ELj0EDv4_fET1_S1_S1_(<4 x float> %a, <4 x float> %b) { entry: ; CHECK-LABEL: _Z7testInsILj3ELj0EDv4_fET1_S1_S1_ -; CHECK: xxsldwi 0, 35, 35, 2 +; CHECK: xxswapd 0, 35 ; CHECK: xxinsertw 34, 0, 0 ; CHECK-BE-LABEL: _Z7testInsILj3ELj0EDv4_fET1_S1_S1_ ; CHECK-BE: xxsldwi 0, 35, 35, 3 @@ -189,7 +189,7 @@ entry: ; CHECK: xxsldwi 0, 35, 35, 3 ; CHECK: xxinsertw 34, 0, 0 ; CHECK-BE-LABEL: _Z7testInsILj3ELj3EDv4_fET1_S1_S1_ -; CHECK-BE: xxsldwi 0, 35, 35, 2 +; CHECK-BE: xxswapd 0, 35 ; CHECK-BE: xxinsertw 34, 0, 12 %vecins = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> ret <4 x float> %vecins @@ -198,7 +198,7 @@ entry: define <4 x i32> @_Z7testInsILj0ELj0EDv4_jET1_S1_S1_(<4 x i32> %a, <4 x i32> %b) { entry: ; CHECK-LABEL: _Z7testInsILj0ELj0EDv4_jET1_S1_S1_ -; CHECK: xxsldwi 0, 35, 35, 2 +; CHECK: xxswapd 0, 35 ; CHECK: xxinsertw 34, 0, 12 ; CHECK-BE-LABEL: _Z7testInsILj0ELj0EDv4_jET1_S1_S1_ ; CHECK-BE: xxsldwi 0, 35, 35, 3 @@ -237,7 +237,7 @@ entry: ; CHECK: xxsldwi 0, 35, 35, 3 ; CHECK: xxinsertw 34, 0, 12 ; CHECK-BE-LABEL: _Z7testInsILj0ELj3EDv4_jET1_S1_S1_ -; CHECK-BE: xxsldwi 0, 35, 35, 2 +; CHECK-BE: xxswapd 0, 35 ; CHECK-BE: xxinsertw 34, 0, 0 %vecins = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> ret <4 x i32> %vecins @@ -246,7 +246,7 @@ entry: define <4 x i32> @_Z7testInsILj1ELj0EDv4_jET1_S1_S1_(<4 x i32> %a, <4 x i32> %b) { entry: ; CHECK-LABEL: _Z7testInsILj1ELj0EDv4_jET1_S1_S1_ -; CHECK: xxsldwi 0, 35, 35, 2 +; CHECK: xxswapd 0, 35 ; CHECK: xxinsertw 34, 0, 8 ; CHECK-BE-LABEL: _Z7testInsILj1ELj0EDv4_jET1_S1_S1_ ; CHECK-BE: xxsldwi 0, 35, 35, 3 @@ -285,7 +285,7 @@ entry: ; CHECK: xxsldwi 0, 35, 35, 3 ; CHECK: xxinsertw 34, 0, 8 ; CHECK-BE-LABEL: _Z7testInsILj1ELj3EDv4_jET1_S1_S1_ -; CHECK-BE: xxsldwi 0, 35, 35, 2 +; CHECK-BE: xxswapd 0, 35 ; CHECK-BE: xxinsertw 34, 0, 4 %vecins = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> ret <4 x i32> %vecins @@ -294,7 +294,7 @@ entry: define <4 x i32> @_Z7testInsILj2ELj0EDv4_jET1_S1_S1_(<4 x i32> %a, <4 x i32> %b) { entry: ; CHECK-LABEL: _Z7testInsILj2ELj0EDv4_jET1_S1_S1_ -; CHECK: xxsldwi 0, 35, 35, 2 +; CHECK: xxswapd 0, 35 ; CHECK: xxinsertw 34, 0, 4 ; CHECK-BE-LABEL: _Z7testInsILj2ELj0EDv4_jET1_S1_S1_ ; CHECK-BE: xxsldwi 0, 35, 35, 3 @@ -333,7 +333,7 @@ entry: ; CHECK: xxsldwi 0, 35, 35, 3 ; CHECK: xxinsertw 34, 0, 4 ; CHECK-BE-LABEL: _Z7testInsILj2ELj3EDv4_jET1_S1_S1_ -; CHECK-BE: xxsldwi 0, 35, 35, 2 +; CHECK-BE: xxswapd 0, 35 ; CHECK-BE: xxinsertw 34, 0, 8 %vecins = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> ret <4 x i32> %vecins @@ -342,7 +342,7 @@ entry: define <4 x i32> @_Z7testInsILj3ELj0EDv4_jET1_S1_S1_(<4 x i32> %a, <4 x i32> %b) { entry: ; CHECK-LABEL: _Z7testInsILj3ELj0EDv4_jET1_S1_S1_ -; CHECK: xxsldwi 0, 35, 35, 2 +; CHECK: xxswapd 0, 35 ; CHECK: xxinsertw 34, 0, 0 ; CHECK-BE-LABEL: _Z7testInsILj3ELj0EDv4_jET1_S1_S1_ ; CHECK-BE: xxsldwi 0, 35, 35, 3 @@ -381,7 +381,7 @@ entry: ; CHECK: xxsldwi 0, 35, 35, 3 ; CHECK: xxinsertw 34, 0, 0 ; CHECK-BE-LABEL: _Z7testInsILj3ELj3EDv4_jET1_S1_S1_ -; CHECK-BE: xxsldwi 0, 35, 35, 2 +; CHECK-BE: xxswapd 0, 35 ; CHECK-BE: xxinsertw 34, 0, 12 %vecins = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> ret <4 x i32> %vecins @@ -546,7 +546,7 @@ entry: define <4 x float> @_Z7testInsILj0ELj0EDv4_fET1_S1_S1_r(<4 x float> %a, <4 x float> %b) { entry: ; CHECK-LABEL: _Z7testInsILj0ELj0EDv4_fET1_S1_S1_ -; CHECK: xxsldwi 0, 35, 35, 2 +; CHECK: xxswapd 0, 35 ; CHECK: xxinsertw 34, 0, 12 ; CHECK-BE-LABEL: _Z7testInsILj0ELj0EDv4_fET1_S1_S1_ ; CHECK-BE: xxsldwi 0, 35, 35, 3 @@ -585,7 +585,7 @@ entry: ; CHECK: xxsldwi 0, 35, 35, 3 ; CHECK: xxinsertw 34, 0, 12 ; CHECK-BE-LABEL: _Z7testInsILj0ELj3EDv4_fET1_S1_S1_ -; CHECK-BE: xxsldwi 0, 35, 35, 2 +; CHECK-BE: xxswapd 0, 35 ; CHECK-BE: xxinsertw 34, 0, 0 %vecins = shufflevector <4 x float> %b, <4 x float> %a, <4 x i32> ret <4 x float> %vecins @@ -594,7 +594,7 @@ entry: define <4 x float> @_Z7testInsILj1ELj0EDv4_fET1_S1_S1_r(<4 x float> %a, <4 x float> %b) { entry: ; CHECK-LABEL: _Z7testInsILj1ELj0EDv4_fET1_S1_S1_ -; CHECK: xxsldwi 0, 35, 35, 2 +; CHECK: xxswapd 0, 35 ; CHECK: xxinsertw 34, 0, 8 ; CHECK-BE-LABEL: _Z7testInsILj1ELj0EDv4_fET1_S1_S1_ ; CHECK-BE: xxsldwi 0, 35, 35, 3 @@ -633,7 +633,7 @@ entry: ; CHECK: xxsldwi 0, 35, 35, 3 ; CHECK: xxinsertw 34, 0, 8 ; CHECK-BE-LABEL: _Z7testInsILj1ELj3EDv4_fET1_S1_S1_ -; CHECK-BE: xxsldwi 0, 35, 35, 2 +; CHECK-BE: xxswapd 0, 35 ; CHECK-BE: xxinsertw 34, 0, 4 %vecins = shufflevector <4 x float> %b, <4 x float> %a, <4 x i32> ret <4 x float> %vecins @@ -642,7 +642,7 @@ entry: define <4 x float> @_Z7testInsILj2ELj0EDv4_fET1_S1_S1_r(<4 x float> %a, <4 x float> %b) { entry: ; CHECK-LABEL: _Z7testInsILj2ELj0EDv4_fET1_S1_S1_ -; CHECK: xxsldwi 0, 35, 35, 2 +; CHECK: xxswapd 0, 35 ; CHECK: xxinsertw 34, 0, 4 ; CHECK-BE-LABEL: _Z7testInsILj2ELj0EDv4_fET1_S1_S1_ ; CHECK-BE: xxsldwi 0, 35, 35, 3 @@ -681,7 +681,7 @@ entry: ; CHECK: xxsldwi 0, 35, 35, 3 ; CHECK: xxinsertw 34, 0, 4 ; CHECK-BE-LABEL: _Z7testInsILj2ELj3EDv4_fET1_S1_S1_ -; CHECK-BE: xxsldwi 0, 35, 35, 2 +; CHECK-BE: xxswapd 0, 35 ; CHECK-BE: xxinsertw 34, 0, 8 %vecins = shufflevector <4 x float> %b, <4 x float> %a, <4 x i32> ret <4 x float> %vecins @@ -690,7 +690,7 @@ entry: define <4 x float> @_Z7testInsILj3ELj0EDv4_fET1_S1_S1_r(<4 x float> %a, <4 x float> %b) { entry: ; CHECK-LABEL: _Z7testInsILj3ELj0EDv4_fET1_S1_S1_ -; CHECK: xxsldwi 0, 35, 35, 2 +; CHECK: xxswapd 0, 35 ; CHECK: xxinsertw 34, 0, 0 ; CHECK-BE-LABEL: _Z7testInsILj3ELj0EDv4_fET1_S1_S1_ ; CHECK-BE: xxsldwi 0, 35, 35, 3 @@ -729,7 +729,7 @@ entry: ; CHECK: xxsldwi 0, 35, 35, 3 ; CHECK: xxinsertw 34, 0, 0 ; CHECK-BE-LABEL: _Z7testInsILj3ELj3EDv4_fET1_S1_S1_ -; CHECK-BE: xxsldwi 0, 35, 35, 2 +; CHECK-BE: xxswapd 0, 35 ; CHECK-BE: xxinsertw 34, 0, 12 %vecins = shufflevector <4 x float> %b, <4 x float> %a, <4 x i32> ret <4 x float> %vecins @@ -738,7 +738,7 @@ entry: define <4 x i32> @_Z7testInsILj0ELj0EDv4_jET1_S1_S1_r(<4 x i32> %a, <4 x i32> %b) { entry: ; CHECK-LABEL: _Z7testInsILj0ELj0EDv4_jET1_S1_S1_ -; CHECK: xxsldwi 0, 35, 35, 2 +; CHECK: xxswapd 0, 35 ; CHECK: xxinsertw 34, 0, 12 ; CHECK-BE-LABEL: _Z7testInsILj0ELj0EDv4_jET1_S1_S1_ ; CHECK-BE: xxsldwi 0, 35, 35, 3 @@ -777,7 +777,7 @@ entry: ; CHECK: xxsldwi 0, 35, 35, 3 ; CHECK: xxinsertw 34, 0, 12 ; CHECK-BE-LABEL: _Z7testInsILj0ELj3EDv4_jET1_S1_S1_ -; CHECK-BE: xxsldwi 0, 35, 35, 2 +; CHECK-BE: xxswapd 0, 35 ; CHECK-BE: xxinsertw 34, 0, 0 %vecins = shufflevector <4 x i32> %b, <4 x i32> %a, <4 x i32> ret <4 x i32> %vecins @@ -786,7 +786,7 @@ entry: define <4 x i32> @_Z7testInsILj1ELj0EDv4_jET1_S1_S1_r(<4 x i32> %a, <4 x i32> %b) { entry: ; CHECK-LABEL: _Z7testInsILj1ELj0EDv4_jET1_S1_S1_ -; CHECK: xxsldwi 0, 35, 35, 2 +; CHECK: xxswapd 0, 35 ; CHECK: xxinsertw 34, 0, 8 ; CHECK-BE-LABEL: _Z7testInsILj1ELj0EDv4_jET1_S1_S1_ ; CHECK-BE: xxsldwi 0, 35, 35, 3 @@ -825,7 +825,7 @@ entry: ; CHECK: xxsldwi 0, 35, 35, 3 ; CHECK: xxinsertw 34, 0, 8 ; CHECK-BE-LABEL: _Z7testInsILj1ELj3EDv4_jET1_S1_S1_ -; CHECK-BE: xxsldwi 0, 35, 35, 2 +; CHECK-BE: xxswapd 0, 35 ; CHECK-BE: xxinsertw 34, 0, 4 %vecins = shufflevector <4 x i32> %b, <4 x i32> %a, <4 x i32> ret <4 x i32> %vecins @@ -834,7 +834,7 @@ entry: define <4 x i32> @_Z7testInsILj2ELj0EDv4_jET1_S1_S1_r(<4 x i32> %a, <4 x i32> %b) { entry: ; CHECK-LABEL: _Z7testInsILj2ELj0EDv4_jET1_S1_S1_ -; CHECK: xxsldwi 0, 35, 35, 2 +; CHECK: xxswapd 0, 35 ; CHECK: xxinsertw 34, 0, 4 ; CHECK-BE-LABEL: _Z7testInsILj2ELj0EDv4_jET1_S1_S1_ ; CHECK-BE: xxsldwi 0, 35, 35, 3 @@ -873,7 +873,7 @@ entry: ; CHECK: xxsldwi 0, 35, 35, 3 ; CHECK: xxinsertw 34, 0, 4 ; CHECK-BE-LABEL: _Z7testInsILj2ELj3EDv4_jET1_S1_S1_ -; CHECK-BE: xxsldwi 0, 35, 35, 2 +; CHECK-BE: xxswapd 0, 35 ; CHECK-BE: xxinsertw 34, 0, 8 %vecins = shufflevector <4 x i32> %b, <4 x i32> %a, <4 x i32> ret <4 x i32> %vecins @@ -882,7 +882,7 @@ entry: define <4 x i32> @_Z7testInsILj3ELj0EDv4_jET1_S1_S1_r(<4 x i32> %a, <4 x i32> %b) { entry: ; CHECK-LABEL: _Z7testInsILj3ELj0EDv4_jET1_S1_S1_ -; CHECK: xxsldwi 0, 35, 35, 2 +; CHECK: xxswapd 0, 35 ; CHECK: xxinsertw 34, 0, 0 ; CHECK-BE-LABEL: _Z7testInsILj3ELj0EDv4_jET1_S1_S1_ ; CHECK-BE: xxsldwi 0, 35, 35, 3 @@ -921,7 +921,7 @@ entry: ; CHECK: xxsldwi 0, 35, 35, 3 ; CHECK: xxinsertw 34, 0, 0 ; CHECK-BE-LABEL: _Z7testInsILj3ELj3EDv4_jET1_S1_S1_ -; CHECK-BE: xxsldwi 0, 35, 35, 2 +; CHECK-BE: xxswapd 0, 35 ; CHECK-BE: xxinsertw 34, 0, 12 %vecins = shufflevector <4 x i32> %b, <4 x i32> %a, <4 x i32> ret <4 x i32> %vecins diff --git a/llvm/test/CodeGen/PowerPC/pr27078.ll b/llvm/test/CodeGen/PowerPC/pr27078.ll index b100e3a..d97008e 100644 --- a/llvm/test/CodeGen/PowerPC/pr27078.ll +++ b/llvm/test/CodeGen/PowerPC/pr27078.ll @@ -9,11 +9,11 @@ define <4 x float> @bar(float* %p, float* %q) { %6 = shufflevector <12 x float> %5, <12 x float> undef, <4 x i32> ret <4 x float> %6 -; CHECK: vsldoi +; CHECK: xxsldwi ; CHECK-NEXT: vmrghw ; CHECK-NEXT: vmrglw -; CHECK-NEXT: vsldoi -; CHECK-NEXT: vsldoi -; CHECK-NEXT: vsldoi +; CHECK-NEXT: xxsldwi +; CHECK-NEXT: xxsldwi +; CHECK-NEXT: xxsldwi ; CHECK-NEXT: blr } diff --git a/llvm/test/CodeGen/PowerPC/vec_sldwi.ll b/llvm/test/CodeGen/PowerPC/vec_sldwi.ll new file mode 100644 index 0000000..01537d1 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/vec_sldwi.ll @@ -0,0 +1,307 @@ +; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr8 < %s | \ +; RUN: FileCheck %s -check-prefix=CHECK-LE +; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr8 < %s | \ +; RUN: FileCheck %s -check-prefix=CHECK-BE + +; Possible LE ShuffleVector masks (Case 1): +; ShuffleVector((vector int)a, vector(int)b, 0, 1, 2, 3) +; ShuffleVector((vector int)a, vector(int)b, 7, 0, 1, 2) +; ShuffleVector((vector int)a, vector(int)b, 6, 7, 0, 1) +; ShuffleVector((vector int)a, vector(int)b, 5, 6, 7, 0) +; which targets at: +; xxsldwi a, b, 0 +; xxsldwi a, b, 1 +; xxsldwi a, b, 2 +; xxsldwi a, b, 3 +; Possible LE Swap ShuffleVector masks (Case 2): +; ShuffleVector((vector int)a, vector(int)b, 4, 5, 6, 7) +; ShuffleVector((vector int)a, vector(int)b, 3, 4, 5, 6) +; ShuffleVector((vector int)a, vector(int)b, 2, 3, 4, 5) +; ShuffleVector((vector int)a, vector(int)b, 1, 2, 3, 4) +; which targets at: +; xxsldwi b, a, 0 +; xxsldwi b, a, 1 +; xxsldwi b, a, 2 +; xxsldwi b, a, 3 +; Possible LE ShuffleVector masks when a == b, b is undef (Case 3): +; ShuffleVector((vector int)a, vector(int)a, 0, 1, 2, 3) +; ShuffleVector((vector int)a, vector(int)a, 3, 0, 1, 2) +; ShuffleVector((vector int)a, vector(int)a, 2, 3, 0, 1) +; ShuffleVector((vector int)a, vector(int)a, 1, 2, 3, 0) +; which targets at: +; xxsldwi a, a, 0 +; xxsldwi a, a, 1 +; xxsldwi a, a, 2 +; xxsldwi a, a, 3 + +; Possible BE ShuffleVector masks (Case 4): +; ShuffleVector((vector int)a, vector(int)b, 0, 1, 2, 3) +; ShuffleVector((vector int)a, vector(int)b, 1, 2, 3, 4) +; ShuffleVector((vector int)a, vector(int)b, 2, 3, 4, 5) +; ShuffleVector((vector int)a, vector(int)b, 3, 4, 5, 6) +; which targets at: +; xxsldwi b, a, 0 +; xxsldwi b, a, 1 +; xxsldwi a, a, 2 +; xxsldwi a, a, 3 +; Possible BE Swap ShuffleVector masks (Case 5): +; ShuffleVector((vector int)a, vector(int)b, 4, 5, 6, 7) +; ShuffleVector((vector int)a, vector(int)b, 5, 6, 7, 0) +; ShuffleVector((vector int)a, vector(int)b, 6, 7, 0, 1) +; ShuffleVector((vector int)a, vector(int)b, 7, 0, 1, 2) +; which targets at: +; xxsldwi b, a, 0 +; xxsldwi b, a, 1 +; xxsldwi b, a, 2 +; xxsldwi b, a, 3 +; Possible BE ShuffleVector masks when a == b, b is undef (Case 6): +; ShuffleVector((vector int)a, vector(int)b, 0, 1, 2, 3) +; ShuffleVector((vector int)a, vector(int)a, 1, 2, 3, 0) +; ShuffleVector((vector int)a, vector(int)a, 2, 3, 0, 1) +; ShuffleVector((vector int)a, vector(int)a, 3, 0, 1, 2) +; which targets at: +; xxsldwi a, a, 0 +; xxsldwi a, a, 1 +; xxsldwi a, a, 2 +; xxsldwi a, a, 3 + +define <4 x i32> @check_le_vec_sldwi_va_vb_0(<4 x i32> %VA, <4 x i32> %VB) { +entry: + %0 = shufflevector <4 x i32> %VA, <4 x i32> %VB, <4 x i32> + ret <4 x i32> %0 +; CHECK-LE-LABEL: @check_le_vec_sldwi_va_vb_0 +; CHECK-LE: blr +} + +define <4 x i32> @check_le_vec_sldwi_va_vb_1(<4 x i32> %VA, <4 x i32> %VB) { +entry: + %0 = shufflevector <4 x i32> %VA, <4 x i32> %VB, <4 x i32> + ret <4 x i32> %0 +; CHECK-LE-LABEL: @check_le_vec_sldwi_va_vb_1 +; CHECK-LE: xxsldwi 34, 34, 35, 1 +; CHECK-LE: blr +} + +define <4 x i32> @check_le_vec_sldwi_va_vb_2(<4 x i32> %VA, <4 x i32> %VB) { +entry: + %0 = shufflevector <4 x i32> %VA, <4 x i32> %VB, <4 x i32> + ret <4 x i32> %0 +; CHECK-LE-LABEL: @check_le_vec_sldwi_va_vb_2 +; CHECK-LE: xxsldwi 34, 34, 35, 2 +; CHECK-LE: blr +} + +define <4 x i32> @check_le_vec_sldwi_va_vb_3(<4 x i32> %VA, <4 x i32> %VB) { +entry: + %0 = shufflevector <4 x i32> %VA, <4 x i32> %VB, <4 x i32> + ret <4 x i32> %0 +; CHECK-LE-LABEL: @check_le_vec_sldwi_va_vb_3 +; CHECK-LE: xxsldwi 34, 34, 35, 3 +; CHECK-LE: blr +} + +define <4 x i32> @check_le_swap_vec_sldwi_va_vb_0(<4 x i32> %VA, <4 x i32> %VB) { +entry: + %0 = shufflevector <4 x i32> %VA, <4 x i32> %VB, <4 x i32> + ret <4 x i32> %0 +; CHECK-LE-LABEL: @check_le_swap_vec_sldwi_va_vb_0 +; CHECK-LE; vmr 2, 3 +; CHECK-LE: blr +} + +define <4 x i32> @check_le_swap_vec_sldwi_va_vb_1(<4 x i32> %VA, <4 x i32> %VB) { +entry: + %0 = shufflevector <4 x i32> %VA, <4 x i32> %VB, <4 x i32> + ret <4 x i32> %0 +; CHECK-LE-LABEL: @check_le_swap_vec_sldwi_va_vb_1 +; CHECK-LE: xxsldwi 34, 35, 34, 1 +; CHECK-LE: blr +} + +define <4 x i32> @check_le_swap_vec_sldwi_va_vb_2(<4 x i32> %VA, <4 x i32> %VB) { +entry: + %0 = shufflevector <4 x i32> %VA, <4 x i32> %VB, <4 x i32> + ret <4 x i32> %0 +; CHECK-LE-LABEL: @check_le_swap_vec_sldwi_va_vb_2 +; CHECK-LE: xxsldwi 34, 35, 34, 2 +; CHECK-LE: blr +} + +define <4 x i32> @check_le_swap_vec_sldwi_va_vb_3(<4 x i32> %VA, <4 x i32> %VB) { +entry: + %0 = shufflevector <4 x i32> %VA, <4 x i32> %VB, <4 x i32> + ret <4 x i32> %0 +; CHECK-LE-LABEL: @check_le_swap_vec_sldwi_va_vb_3 +; CHECK-LE: xxsldwi 34, 35, 34, 3 +; CHECK-LE: blr +} + +define <4 x i32> @check_le_vec_sldwi_va_undef_0(<4 x i32> %VA) { +entry: + %0 = shufflevector <4 x i32> %VA, <4 x i32> undef, <4 x i32> + ret <4 x i32> %0 +; CHECK-LE-LABEL: @check_le_vec_sldwi_va_undef_0 +; CHECK-LE: blr +} + +define <4 x i32> @check_le_vec_sldwi_va_undef_1(<4 x i32> %VA) { +entry: + %0 = shufflevector <4 x i32> %VA, <4 x i32> undef, <4 x i32> + ret <4 x i32> %0 +; CHECK-BE-LABEL: @check_le_vec_sldwi_va_undef_1 +; CHECK-LE: xxsldwi 34, 34, 34, 1 +; CHECK-LE: blr +} + +define <4 x i32> @check_le_vec_sldwi_va_undef_2(<4 x i32> %VA) { +entry: + %0 = shufflevector <4 x i32> %VA, <4 x i32> undef, <4 x i32> + ret <4 x i32> %0 +; CHECK-LE-LABEL: @check_le_vec_sldwi_va_undef_2 +; CHECK-LE: xxswapd 34, 34 +; CHECK-LE: blr +} + +define <4 x i32> @check_le_vec_sldwi_va_undef_3(<4 x i32> %VA) { +entry: + %0 = shufflevector <4 x i32> %VA, <4 x i32> undef, <4 x i32> + ret <4 x i32> %0 +; CHECK-LE-LABEL: @check_le_vec_sldwi_va_undef_3 +; CHECK-LE: xxsldwi 34, 34, 34, 3 +; CHECK-LE: blr +} + +define <4 x i32> @check_be_vec_sldwi_va_vb_0(<4 x i32> %VA, <4 x i32> %VB) { +entry: + %0 = shufflevector <4 x i32> %VA, <4 x i32> %VB, <4 x i32> + ret <4 x i32> %0 +; CHECK-BE-LABEL: @check_be_vec_sldwi_va_vb_0 +; CHECK-BE: blr +} + +define <4 x i32> @check_be_vec_sldwi_va_vb_1(<4 x i32> %VA, <4 x i32> %VB) { +entry: + %0 = shufflevector <4 x i32> %VA, <4 x i32> %VB, <4 x i32> + ret <4 x i32> %0 +; CHECK-BE-LABEL: @check_be_vec_sldwi_va_vb_1 +; CHECK-BE: xxsldwi 34, 34, 35, 1 +; CHECK-BE: blr +} + +define <4 x i32> @check_be_vec_sldwi_va_vb_2(<4 x i32> %VA, <4 x i32> %VB) { +entry: + %0 = shufflevector <4 x i32> %VA, <4 x i32> %VB, <4 x i32> + ret <4 x i32> %0 +; CHECK-BE-LABEL: @check_be_vec_sldwi_va_vb_2 +; CHECK-BE: xxsldwi 34, 34, 35, 2 +; CHECK-BE: blr +} + +define <4 x i32> @check_be_vec_sldwi_va_vb_3(<4 x i32> %VA, <4 x i32> %VB) { +entry: + %0 = shufflevector <4 x i32> %VA, <4 x i32> %VB, <4 x i32> + ret <4 x i32> %0 +; CHECK-BE-LABEL: @check_be_vec_sldwi_va_vb_3 +; CHECK-BE: xxsldwi 34, 34, 35, 3 +; CHECK-BE: blr +} + +define <4 x i32> @check_be_swap_vec_sldwi_va_vb_0(<4 x i32> %VA, <4 x i32> %VB) { +entry: + %0 = shufflevector <4 x i32> %VA, <4 x i32> %VB, <4 x i32> + ret <4 x i32> %0 +; CHECK-BE-LABEL: @check_be_swap_vec_sldwi_va_vb_0 +; CHECK-LE; vmr 2, 3 +; CHECK-BE: blr +} + +define <4 x i32> @check_be_swap_vec_sldwi_va_vb_1(<4 x i32> %VA, <4 x i32> %VB) { +entry: + %0 = shufflevector <4 x i32> %VA, <4 x i32> %VB, <4 x i32> + ret <4 x i32> %0 +; CHECK-BE-LABEL: @check_be_swap_vec_sldwi_va_vb_1 +; CHECK-BE: xxsldwi 34, 35, 34, 1 +; CHECK-BE: blr +} + +define <4 x i32> @check_be_swap_vec_sldwi_va_vb_2(<4 x i32> %VA, <4 x i32> %VB) { +entry: + %0 = shufflevector <4 x i32> %VA, <4 x i32> %VB, <4 x i32> + ret <4 x i32> %0 +; CHECK-BE-LABEL: @check_be_swap_vec_sldwi_va_vb_2 +; CHECK-BE: xxsldwi 34, 35, 34, 2 +; CHECK-BE: blr +} + +define <4 x i32> @check_be_swap_vec_sldwi_va_vb_3(<4 x i32> %VA, <4 x i32> %VB) { +entry: + %0 = shufflevector <4 x i32> %VA, <4 x i32> %VB, <4 x i32> + ret <4 x i32> %0 +; CHECK-BE-LABEL: @check_be_swap_vec_sldwi_va_vb_3 +; CHECK-BE: xxsldwi 34, 35, 34, 3 +; CHECK-BE: blr +} + +define <4 x i32> @check_be_vec_sldwi_va_undef_0(<4 x i32> %VA) { +entry: + %0 = shufflevector <4 x i32> %VA, <4 x i32> undef, <4 x i32> + ret <4 x i32> %0 +; CHECK-LE-LABEL: @check_be_vec_sldwi_va_undef_0 +; CHECK-BE: blr +} + +define <4 x i32> @check_be_vec_sldwi_va_undef_1(<4 x i32> %VA) { +entry: + %0 = shufflevector <4 x i32> %VA, <4 x i32> undef, <4 x i32> + ret <4 x i32> %0 +; CHECK-BE-LABEL: @check_be_vec_sldwi_va_undef_1 +; CHECK-BE: xxsldwi 34, 34, 34, 1 +; CHECK-BE: blr +} + +define <4 x i32> @check_be_vec_sldwi_va_undef_2(<4 x i32> %VA) { +entry: + %0 = shufflevector <4 x i32> %VA, <4 x i32> undef, <4 x i32> + ret <4 x i32> %0 +; CHECK-BE-LABEL: @check_be_vec_sldwi_va_undef_2 +; CHECK-BE: xxswapd 34, 34 +; CHECK-BE: blr +} + +define <4 x i32> @check_be_vec_sldwi_va_undef_3(<4 x i32> %VA) { +entry: + %0 = shufflevector <4 x i32> %VA, <4 x i32> undef, <4 x i32> + ret <4 x i32> %0 +; CHECK-BE-LABEL: @check_be_vec_sldwi_va_undef_3 +; CHECK-BE: xxsldwi 34, 34, 34, 3 +; CHECK-BE: blr +} + +; More test cases to test different types of vector inputs +define <16 x i8> @test_le_vec_sldwi_v16i8_v16i8(<16 x i8> %VA, <16 x i8> %VB) { + entry: + %0 = shufflevector <16 x i8> %VA, <16 x i8> %VB,<16 x i32> + ret <16 x i8> %0 +; CHECK-LE-LABEL: @test_le_vec_sldwi_v16i8_v16i8 +; CHECK-LE: xxsldwi 34, 34, 35, 1 +; CHECK-LE: blr +} + +define <8 x i16> @test_le_vec_sldwi_v8i16_v8i16(<8 x i16> %VA, <8 x i16> %VB) { + entry: + %0 = shufflevector <8 x i16> %VA, <8 x i16> %VB,<8 x i32> + ret <8 x i16> %0 +; CHECK-LE-LABEL: @test_le_vec_sldwi_v8i16_v8i16 +; CHECK-LE: xxsldwi 34, 34, 35, 1 +; CHECK-LE: blr +} + +; Note here xxpermdi 34, 34, 35, 2 <=> xxsldwi 34, 34, 35, 2 +define <2 x i64> @test_be_vec_sldwi_v2i64_v2i64(<2 x i64> %VA, <2 x i64> %VB) { + entry: + %0 = shufflevector <2 x i64> %VA, <2 x i64> %VB,<2 x i32> + ret <2 x i64> %0 +; CHECK-LE-LABEL: @test_be_vec_sldwi_v2i64_v2i64 +; CHECK-LE: xxpermdi 34, 34, 35, 2 +; CHECK-LE: blr +} -- 2.7.4