STATISTIC(NumTailCalls, "Number of tail calls");
STATISTIC(NumSiblingCalls, "Number of sibling calls");
+static bool isNByteElemShuffleMask(ShuffleVectorSDNode *, unsigned, int);
+
// FIXME: Remove this once the bug has been fixed!
extern cl::opt<bool> ANDIGlueBug;
return DAG.getNode(ISD::BITCAST, dl, VT, T);
}
+/// lowerToVINSERTH - Return the SDValue if this VECTOR_SHUFFLE can be handled
+/// by the VINSERTH instruction introduced in ISA 3.0, else just return default
+/// SDValue.
+SDValue PPCTargetLowering::lowerToVINSERTH(ShuffleVectorSDNode *N,
+ SelectionDAG &DAG) const {
+ const unsigned NumHalfWords = 8;
+ const unsigned BytesInVector = NumHalfWords * 2;
+ // Check that the shuffle is on half-words.
+ if (!isNByteElemShuffleMask(N, 2, 1))
+ return SDValue();
+
+ bool IsLE = Subtarget.isLittleEndian();
+ SDLoc dl(N);
+ SDValue V1 = N->getOperand(0);
+ SDValue V2 = N->getOperand(1);
+ unsigned ShiftElts = 0, InsertAtByte = 0;
+ bool Swap = false;
+
+ // Shifts required to get the half-word we want at element 3.
+ unsigned LittleEndianShifts[] = {4, 3, 2, 1, 0, 7, 6, 5};
+ unsigned BigEndianShifts[] = {5, 6, 7, 0, 1, 2, 3, 4};
+
+ uint32_t Mask = 0;
+ uint32_t OriginalOrderLow = 0x1234567;
+ uint32_t OriginalOrderHigh = 0x89ABCDEF;
+ // Now we look at mask elements 0,2,4,6,8,10,12,14. Pack the mask into a
+ // 32-bit space, only need 4-bit nibbles per element.
+ for (unsigned i = 0; i < NumHalfWords; ++i) {
+ unsigned MaskShift = (NumHalfWords - 1 - i) * 4;
+ Mask |= ((uint32_t)(N->getMaskElt(i * 2) / 2) << MaskShift);
+ }
+
+ // For each mask element, find out if we're just inserting something
+ // from V2 into V1 or vice versa. Possible permutations inserting an element
+ // from V2 into V1:
+ // X, 1, 2, 3, 4, 5, 6, 7
+ // 0, X, 2, 3, 4, 5, 6, 7
+ // 0, 1, X, 3, 4, 5, 6, 7
+ // 0, 1, 2, X, 4, 5, 6, 7
+ // 0, 1, 2, 3, X, 5, 6, 7
+ // 0, 1, 2, 3, 4, X, 6, 7
+ // 0, 1, 2, 3, 4, 5, X, 7
+ // 0, 1, 2, 3, 4, 5, 6, X
+ // Inserting from V1 into V2 will be similar, except mask range will be [8,15].
+
+ bool FoundCandidate = false;
+ // Go through the mask of half-words to find an element that's being moved
+ // from one vector to the other.
+ for (unsigned i = 0; i < NumHalfWords; ++i) {
+ unsigned MaskShift = (NumHalfWords - 1 - i) * 4;
+ uint32_t MaskOneElt = (Mask >> MaskShift) & 0xF;
+ uint32_t MaskOtherElts = ~(0xF << MaskShift);
+ uint32_t TargetOrder = 0x0;
+
+ // If both vector operands for the shuffle are the same vector, the mask
+ // will contain only elements from the first one and the second one will be
+ // undef.
+ if (V2.isUndef()) {
+ ShiftElts = 0;
+ unsigned VINSERTHSrcElem = IsLE ? 4 : 3;
+ TargetOrder = OriginalOrderLow;
+ Swap = false;
+ // Skip if not the correct element or mask of other elements don't equal
+ // to our expected order.
+ if (MaskOneElt == VINSERTHSrcElem &&
+ (Mask & MaskOtherElts) == (TargetOrder & MaskOtherElts)) {
+ InsertAtByte = IsLE ? BytesInVector - (i + 1) * 2 : i * 2;
+ FoundCandidate = true;
+ break;
+ }
+ } else { // If both operands are defined.
+ // Target order is [8,15] if the current mask is between [0,7].
+ TargetOrder =
+ (MaskOneElt < NumHalfWords) ? OriginalOrderHigh : OriginalOrderLow;
+ // Skip if mask of other elements don't equal our expected order.
+ if ((Mask & MaskOtherElts) == (TargetOrder & MaskOtherElts)) {
+ // We only need the last 3 bits for the number of shifts.
+ ShiftElts = IsLE ? LittleEndianShifts[MaskOneElt & 0x7]
+ : BigEndianShifts[MaskOneElt & 0x7];
+ InsertAtByte = IsLE ? BytesInVector - (i + 1) * 2 : i * 2;
+ Swap = MaskOneElt < NumHalfWords;
+ FoundCandidate = true;
+ break;
+ }
+ }
+ }
+
+ if (!FoundCandidate)
+ return SDValue();
+
+ // Candidate found, construct the proper SDAG sequence with VINSERTH,
+ // optionally with VECSHL if shift is required.
+ if (Swap)
+ std::swap(V1, V2);
+ if (V2.isUndef())
+ V2 = V1;
+ SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, V1);
+ if (ShiftElts) {
+ // Double ShiftElts because we're left shifting on v16i8 type.
+ SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v16i8, V2, V2,
+ DAG.getConstant(2 * ShiftElts, dl, MVT::i32));
+ SDValue Conv2 = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, Shl);
+ SDValue Ins = DAG.getNode(PPCISD::VECINSERT, dl, MVT::v8i16, Conv1, Conv2,
+ DAG.getConstant(InsertAtByte, dl, MVT::i32));
+ return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins);
+ }
+ SDValue Conv2 = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, V2);
+ SDValue Ins = DAG.getNode(PPCISD::VECINSERT, dl, MVT::v8i16, Conv1, Conv2,
+ DAG.getConstant(InsertAtByte, dl, MVT::i32));
+ return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins);
+}
+
/// LowerVECTOR_SHUFFLE - Return the code we lower for VECTOR_SHUFFLE. If this
/// is a shuffle we can handle in a single instruction, return it. Otherwise,
/// return the code it can be lowered into. Worst case, it can always be
return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins);
}
+ if (Subtarget.hasP9Altivec()) {
+ SDValue NewISDNode = lowerToVINSERTH(SVOp, DAG);
+ if (NewISDNode)
+ return NewISDNode;
+ }
if (Subtarget.hasVSX() &&
PPC::isXXSLDWIShuffleMask(SVOp, ShiftElts, Swap, isLittleEndian)) {
--- /dev/null
+; RUN: llc -mcpu=pwr9 -mtriple=powerpc64le-unknown-linux-gnu \
+; RUN: -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -O0 -mcpu=pwr9 -mtriple=powerpc64le-unknown-linux-gnu \
+; RUN: -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mcpu=pwr9 -mtriple=powerpc64-unknown-linux-gnu \
+; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefix=CHECK-BE
+; RUN: llc -O0 -mcpu=pwr9 -mtriple=powerpc64-unknown-linux-gnu \
+; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefix=CHECK-BE
+
+; The following testcases take one halfword element from the second vector and
+; inserts it at various locations in the first vector
+define <8 x i16> @shuffle_vector_halfword_0_8(<8 x i16> %a, <8 x i16> %b) {
+entry:
+; CHECK-LABEL: shuffle_vector_halfword_0_8
+; CHECK: vsldoi 3, 3, 3, 8
+; CHECK: vinserth 2, 3, 14
+; CHECK-BE-LABEL: shuffle_vector_halfword_0_8
+; CHECK-BE: vsldoi 3, 3, 3, 10
+; CHECK-BE: vinserth 2, 3, 0
+ %vecins = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 8, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+ ret <8 x i16> %vecins
+}
+
+define <8 x i16> @shuffle_vector_halfword_1_15(<8 x i16> %a, <8 x i16> %b) {
+entry:
+; CHECK-LABEL: shuffle_vector_halfword_1_15
+; CHECK: vsldoi 3, 3, 3, 10
+; CHECK: vinserth 2, 3, 12
+; CHECK-BE-LABEL: shuffle_vector_halfword_1_15
+; CHECK-BE: vsldoi 3, 3, 3, 8
+; CHECK-BE: vinserth 2, 3, 2
+ %vecins = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 15, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+ ret <8 x i16> %vecins
+}
+
+define <8 x i16> @shuffle_vector_halfword_2_9(<8 x i16> %a, <8 x i16> %b) {
+entry:
+; CHECK-LABEL: shuffle_vector_halfword_2_9
+; CHECK: vsldoi 3, 3, 3, 6
+; CHECK: vinserth 2, 3, 10
+; CHECK-BE-LABEL: shuffle_vector_halfword_2_9
+; CHECK-BE: vsldoi 3, 3, 3, 12
+; CHECK-BE: vinserth 2, 3, 4
+ %vecins = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 1, i32 9, i32 3, i32 4, i32 5, i32 6, i32 7>
+ ret <8 x i16> %vecins
+}
+
+define <8 x i16> @shuffle_vector_halfword_3_13(<8 x i16> %a, <8 x i16> %b) {
+entry:
+; CHECK-LABEL: shuffle_vector_halfword_3_13
+; CHECK: vsldoi 3, 3, 3, 14
+; CHECK: vinserth 2, 3, 8
+; CHECK-BE-LABEL: shuffle_vector_halfword_3_13
+; CHECK-BE: vsldoi 3, 3, 3, 4
+; CHECK-BE: vinserth 2, 3, 6
+ %vecins = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 13, i32 4, i32 5, i32 6, i32 7>
+ ret <8 x i16> %vecins
+}
+
+define <8 x i16> @shuffle_vector_halfword_4_10(<8 x i16> %a, <8 x i16> %b) {
+entry:
+; CHECK-LABEL: shuffle_vector_halfword_4_10
+; CHECK: vsldoi 3, 3, 3, 4
+; CHECK: vinserth 2, 3, 6
+; CHECK-BE-LABEL: shuffle_vector_halfword_4_10
+; CHECK-BE: vsldoi 3, 3, 3, 14
+; CHECK-BE: vinserth 2, 3, 8
+ %vecins = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 10, i32 5, i32 6, i32 7>
+ ret <8 x i16> %vecins
+}
+
+define <8 x i16> @shuffle_vector_halfword_5_14(<8 x i16> %a, <8 x i16> %b) {
+entry:
+; CHECK-LABEL: shuffle_vector_halfword_5_14
+; CHECK: vsldoi 3, 3, 3, 12
+; CHECK: vinserth 2, 3, 4
+; CHECK-BE-LABEL: shuffle_vector_halfword_5_14
+; CHECK-BE: vsldoi 3, 3, 3, 6
+; CHECK-BE: vinserth 2, 3, 10
+ %vecins = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 14, i32 6, i32 7>
+ ret <8 x i16> %vecins
+}
+
+define <8 x i16> @shuffle_vector_halfword_6_11(<8 x i16> %a, <8 x i16> %b) {
+entry:
+; CHECK-LABEL: shuffle_vector_halfword_6_11
+; CHECK: vsldoi 3, 3, 3, 2
+; CHECK: vinserth 2, 3, 2
+; CHECK-BE-LABEL: shuffle_vector_halfword_6_11
+; CHECK-BE: vinserth 2, 3, 12
+ %vecins = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 11, i32 7>
+ ret <8 x i16> %vecins
+}
+
+define <8 x i16> @shuffle_vector_halfword_7_12(<8 x i16> %a, <8 x i16> %b) {
+entry:
+; CHECK-LABEL: shuffle_vector_halfword_7_12
+; CHECK: vinserth 2, 3, 0
+; CHECK-BE-LABEL: shuffle_vector_halfword_7_12
+; CHECK-BE: vsldoi 3, 3, 3, 2
+; CHECK-BE: vinserth 2, 3, 14
+ %vecins = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 12>
+ ret <8 x i16> %vecins
+}
+
+define <8 x i16> @shuffle_vector_halfword_8_1(<8 x i16> %a, <8 x i16> %b) {
+entry:
+; CHECK-LABEL: shuffle_vector_halfword_8_1
+; CHECK: vsldoi 2, 2, 2, 6
+; CHECK: vinserth 3, 2, 14
+; CHECK: vmr 2, 3
+; CHECK-BE-LABEL: shuffle_vector_halfword_8_1
+; CHECK-BE: vsldoi 2, 2, 2, 12
+; CHECK-BE: vinserth 3, 2, 0
+; CHECK-BE: vmr 2, 3
+ %vecins = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 1, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ ret <8 x i16> %vecins
+}
+
+; The following testcases take one halfword element from the first vector and
+; inserts it at various locations in the second vector
+define <8 x i16> @shuffle_vector_halfword_9_7(<8 x i16> %a, <8 x i16> %b) {
+entry:
+; CHECK-LABEL: shuffle_vector_halfword_9_7
+; CHECK: vsldoi 2, 2, 2, 10
+; CHECK: vinserth 3, 2, 12
+; CHECK: vmr 2, 3
+; CHECK-BE-LABEL: shuffle_vector_halfword_9_7
+; CHECK-BE: vsldoi 2, 2, 2, 8
+; CHECK-BE: vinserth 3, 2, 2
+; CHECK-BE: vmr 2, 3
+ %vecins = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 8, i32 7, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ ret <8 x i16> %vecins
+}
+
+define <8 x i16> @shuffle_vector_halfword_10_4(<8 x i16> %a, <8 x i16> %b) {
+entry:
+; CHECK-LABEL: shuffle_vector_halfword_10_4
+; CHECK: vinserth 3, 2, 10
+; CHECK: vmr 2, 3
+; CHECK-BE-LABEL: shuffle_vector_halfword_10_4
+; CHECK-BE: vsldoi 2, 2, 2, 2
+; CHECK-BE: vinserth 3, 2, 4
+; CHECK-BE: vmr 2, 3
+ %vecins = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 8, i32 9, i32 4, i32 11, i32 12, i32 13, i32 14, i32 15>
+ ret <8 x i16> %vecins
+}
+
+define <8 x i16> @shuffle_vector_halfword_11_2(<8 x i16> %a, <8 x i16> %b) {
+entry:
+; CHECK-LABEL: shuffle_vector_halfword_11_2
+; CHECK: vsldoi 2, 2, 2, 4
+; CHECK: vinserth 3, 2, 8
+; CHECK: vmr 2, 3
+; CHECK-BE-LABEL: shuffle_vector_halfword_11_2
+; CHECK-BE: vsldoi 2, 2, 2, 14
+; CHECK-BE: vinserth 3, 2, 6
+; CHECK-BE: vmr 2, 3
+ %vecins = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 2, i32 12, i32 13, i32 14, i32 15>
+ ret <8 x i16> %vecins
+}
+
+define <8 x i16> @shuffle_vector_halfword_12_6(<8 x i16> %a, <8 x i16> %b) {
+entry:
+; CHECK-LABEL: shuffle_vector_halfword_12_6
+; CHECK: vsldoi 2, 2, 2, 12
+; CHECK: vinserth 3, 2, 6
+; CHECK: vmr 2, 3
+; CHECK-BE-LABEL: shuffle_vector_halfword_12_6
+; CHECK-BE: vsldoi 2, 2, 2, 6
+; CHECK-BE: vinserth 3, 2, 8
+; CHECK-BE: vmr 2, 3
+ %vecins = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 6, i32 13, i32 14, i32 15>
+ ret <8 x i16> %vecins
+}
+
+define <8 x i16> @shuffle_vector_halfword_13_3(<8 x i16> %a, <8 x i16> %b) {
+entry:
+; CHECK-LABEL: shuffle_vector_halfword_13_3
+; CHECK: vsldoi 2, 2, 2, 2
+; CHECK: vinserth 3, 2, 4
+; CHECK: vmr 2, 3
+; CHECK-BE-LABEL: shuffle_vector_halfword_13_3
+; CHECK-BE: vinserth 3, 2, 10
+; CHECK-BE: vmr 2, 3
+ %vecins = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 3, i32 14, i32 15>
+ ret <8 x i16> %vecins
+}
+
+define <8 x i16> @shuffle_vector_halfword_14_5(<8 x i16> %a, <8 x i16> %b) {
+entry:
+; CHECK-LABEL: shuffle_vector_halfword_14_5
+; CHECK: vsldoi 2, 2, 2, 14
+; CHECK: vinserth 3, 2, 2
+; CHECK: vmr 2, 3
+; CHECK-BE-LABEL: shuffle_vector_halfword_14_5
+; CHECK-BE: vsldoi 2, 2, 2, 4
+; CHECK-BE: vinserth 3, 2, 12
+; CHECK-BE: vmr 2, 3
+ %vecins = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 5, i32 15>
+ ret <8 x i16> %vecins
+}
+
+define <8 x i16> @shuffle_vector_halfword_15_0(<8 x i16> %a, <8 x i16> %b) {
+entry:
+; CHECK-LABEL: shuffle_vector_halfword_15_0
+; CHECK: vsldoi 2, 2, 2, 8
+; CHECK: vinserth 3, 2, 0
+; CHECK: vmr 2, 3
+; CHECK-BE-LABEL: shuffle_vector_halfword_15_0
+; CHECK-BE: vsldoi 2, 2, 2, 10
+; CHECK-BE: vinserth 3, 2, 14
+; CHECK-BE: vmr 2, 3
+ %vecins = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 0>
+ ret <8 x i16> %vecins
+}
+
+; The following testcases use the same vector in both arguments of the
+; shufflevector. If halfword element 3 in BE mode(or 4 in LE mode) is the one
+; we're attempting to insert, then we can use the vector insert instruction
+define <8 x i16> @shuffle_vector_halfword_0_4(<8 x i16> %a) {
+entry:
+; CHECK-LABEL: shuffle_vector_halfword_0_4
+; CHECK: vinserth 2, 2, 14
+; CHECK-BE-LABEL: shuffle_vector_halfword_0_4
+; CHECK-BE-NOT: vinserth
+ %vecins = shufflevector <8 x i16> %a, <8 x i16> %a, <8 x i32> <i32 4, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+ ret <8 x i16> %vecins
+}
+
+define <8 x i16> @shuffle_vector_halfword_1_3(<8 x i16> %a) {
+entry:
+; CHECK-LABEL: shuffle_vector_halfword_1_3
+; CHECK-NOT: vinserth
+; CHECK-BE-LABEL: shuffle_vector_halfword_1_3
+; CHECK-BE: vinserth 2, 2, 2
+ %vecins = shufflevector <8 x i16> %a, <8 x i16> %a, <8 x i32> <i32 0, i32 3, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+ ret <8 x i16> %vecins
+}
+
+define <8 x i16> @shuffle_vector_halfword_2_3(<8 x i16> %a) {
+entry:
+; CHECK-LABEL: shuffle_vector_halfword_2_3
+; CHECK-NOT: vinserth
+; CHECK-BE-LABEL: shuffle_vector_halfword_2_3
+; CHECK-BE: vinserth 2, 2, 4
+ %vecins = shufflevector <8 x i16> %a, <8 x i16> %a, <8 x i32> <i32 0, i32 1, i32 3, i32 3, i32 4, i32 5, i32 6, i32 7>
+ ret <8 x i16> %vecins
+}
+
+define <8 x i16> @shuffle_vector_halfword_3_4(<8 x i16> %a) {
+entry:
+; CHECK-LABEL: shuffle_vector_halfword_3_4
+; CHECK: vinserth 2, 2, 8
+; CHECK-BE-LABEL: shuffle_vector_halfword_3_4
+; CHECK-BE-NOT: vinserth
+ %vecins = shufflevector <8 x i16> %a, <8 x i16> %a, <8 x i32> <i32 0, i32 1, i32 2, i32 4, i32 4, i32 5, i32 6, i32 7>
+ ret <8 x i16> %vecins
+}
+
+define <8 x i16> @shuffle_vector_halfword_4_3(<8 x i16> %a) {
+entry:
+; CHECK-LABEL: shuffle_vector_halfword_4_3
+; CHECK-NOT: vinserth
+; CHECK-BE-LABEL: shuffle_vector_halfword_4_3
+; CHECK-BE: vinserth 2, 2, 8
+ %vecins = shufflevector <8 x i16> %a, <8 x i16> %a, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 3, i32 5, i32 6, i32 7>
+ ret <8 x i16> %vecins
+}
+
+define <8 x i16> @shuffle_vector_halfword_5_3(<8 x i16> %a) {
+entry:
+; CHECK-LABEL: shuffle_vector_halfword_5_3
+; CHECK-NOT: vinserth
+; CHECK-BE-LABEL: shuffle_vector_halfword_5_3
+; CHECK-BE: vinserth 2, 2, 10
+ %vecins = shufflevector <8 x i16> %a, <8 x i16> %a, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 3, i32 6, i32 7>
+ ret <8 x i16> %vecins
+}
+
+define <8 x i16> @shuffle_vector_halfword_6_4(<8 x i16> %a) {
+entry:
+; CHECK-LABEL: shuffle_vector_halfword_6_4
+; CHECK: vinserth 2, 2, 2
+; CHECK-BE-LABEL: shuffle_vector_halfword_6_4
+; CHECK-BE-NOT: vinserth
+ %vecins = shufflevector <8 x i16> %a, <8 x i16> %a, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 4, i32 7>
+ ret <8 x i16> %vecins
+}
+
+define <8 x i16> @shuffle_vector_halfword_7_4(<8 x i16> %a) {
+entry:
+; CHECK-LABEL: shuffle_vector_halfword_7_4
+; CHECK: vinserth 2, 2, 0
+; CHECK-BE-LABEL: shuffle_vector_halfword_7_4
+; CHECK-BE-NOT: vinserth
+ %vecins = shufflevector <8 x i16> %a, <8 x i16> %a, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 4>
+ ret <8 x i16> %vecins
+}
+