/// Test whether \p V has a splatted value.
bool isSplatValue(SDValue V, bool AllowUndefs = false);
+ /// If V is a splatted value, return the source vector and its splat index.
+ SDValue getSplatSourceVector(SDValue V, int &SplatIndex);
+
+ /// If V is a splat vector, return its scalar source operand by extracting
+ /// that element from the source vector.
+ SDValue getSplatValue(SDValue V);
+
/// Match a binop + shuffle pyramid that represents a horizontal reduction
/// over the elements of a vector starting from the EXTRACT_VECTOR_ELT node /p
/// Extract. The reduction must use one of the opcodes listed in /p
/// If \p V is not a bitcasted one-use value, it is returned as-is.
SDValue peekThroughOneUseBitcasts(SDValue V);
+/// Return the non-extracted vector source operand of \p V if it exists.
+/// If \p V is not an extracted subvector, it is returned as-is.
+SDValue peekThroughExtractSubvectors(SDValue V);
+
/// Returns true if \p V is a bitwise not operation. Assumes that an all ones
/// constant is canonicalized to be operand 1.
bool isBitwiseNot(SDValue V);
(AllowUndefs || !UndefElts);
}
+SDValue SelectionDAG::getSplatSourceVector(SDValue V, int &SplatIdx) {
+ V = peekThroughExtractSubvectors(V);
+
+ EVT VT = V.getValueType();
+ unsigned Opcode = V.getOpcode();
+ switch (Opcode) {
+ default: {
+ APInt UndefElts;
+ APInt DemandedElts = APInt::getAllOnesValue(VT.getVectorNumElements());
+ if (isSplatValue(V, DemandedElts, UndefElts)) {
+ // Handle case where all demanded elements are UNDEF.
+ if (DemandedElts.isSubsetOf(UndefElts)) {
+ SplatIdx = 0;
+ return getUNDEF(VT);
+ }
+ SplatIdx = (UndefElts & DemandedElts).countTrailingOnes();
+ return V;
+ }
+ break;
+ }
+ case ISD::VECTOR_SHUFFLE: {
+ // Check if this is a shuffle node doing a splat.
+ // TODO - remove this and rely purely on SelectionDAG::isSplatValue,
+ // getTargetVShiftNode currently struggles without the splat source.
+ auto *SVN = cast<ShuffleVectorSDNode>(V);
+ if (!SVN->isSplat())
+ break;
+ int Idx = SVN->getSplatIndex();
+ int NumElts = V.getValueType().getVectorNumElements();
+ SplatIdx = Idx % NumElts;
+ return V.getOperand(Idx / NumElts);
+ }
+ }
+
+ return SDValue();
+}
+
+SDValue SelectionDAG::getSplatValue(SDValue V) {
+ int SplatIdx;
+ if (SDValue SrcVector = getSplatSourceVector(V, SplatIdx))
+ return getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(V),
+ SrcVector.getValueType().getScalarType(), SrcVector,
+ getIntPtrConstant(SplatIdx, SDLoc(V)));
+ return SDValue();
+}
+
/// If a SHL/SRA/SRL node has a constant or splat constant shift amount that
/// is less than the element bit-width of the shift node, return it.
static const APInt *getValidShiftAmountConstant(SDValue V) {
return V;
}
+SDValue llvm::peekThroughExtractSubvectors(SDValue V) {
+ while (V.getOpcode() == ISD::EXTRACT_SUBVECTOR)
+ V = V.getOperand(0);
+ return V;
+}
+
bool llvm::isBitwiseNot(SDValue V) {
if (V.getOpcode() != ISD::XOR)
return false;
return DAG.getVectorShuffle(VT, SDLoc(V2), V1, V2, MaskVec);
}
-// Peek through EXTRACT_SUBVECTORs - typically used for AVX1 256-bit intops.
-static SDValue peekThroughEXTRACT_SUBVECTORs(SDValue V) {
- while (V.getOpcode() == ISD::EXTRACT_SUBVECTOR)
- V = V.getOperand(0);
- return V;
-}
-
static const Constant *getTargetConstantFromNode(SDValue Op) {
Op = peekThroughBitcasts(Op);
return SDValue();
}
-// If V is a splat value, return the source vector and splat index;
-static SDValue IsSplatVector(SDValue V, int &SplatIdx, SelectionDAG &DAG) {
- V = peekThroughEXTRACT_SUBVECTORs(V);
-
- EVT VT = V.getValueType();
- unsigned Opcode = V.getOpcode();
- switch (Opcode) {
- default: {
- APInt UndefElts;
- APInt DemandedElts = APInt::getAllOnesValue(VT.getVectorNumElements());
- if (DAG.isSplatValue(V, DemandedElts, UndefElts)) {
- // Handle case where all demanded elements are UNDEF.
- if (DemandedElts.isSubsetOf(UndefElts)) {
- SplatIdx = 0;
- return DAG.getUNDEF(VT);
- }
- SplatIdx = (UndefElts & DemandedElts).countTrailingOnes();
- return V;
- }
- break;
- }
- case ISD::VECTOR_SHUFFLE: {
- // Check if this is a shuffle node doing a splat.
- // TODO - remove this and rely purely on SelectionDAG::isSplatValue,
- // getTargetVShiftNode currently struggles without the splat source.
- auto *SVN = cast<ShuffleVectorSDNode>(V);
- if (!SVN->isSplat())
- break;
- int Idx = SVN->getSplatIndex();
- int NumElts = V.getValueType().getVectorNumElements();
- SplatIdx = Idx % NumElts;
- return V.getOperand(Idx / NumElts);
- }
- }
-
- return SDValue();
-}
-
-static SDValue GetSplatValue(SDValue V, const SDLoc &dl,
- SelectionDAG &DAG) {
- int SplatIdx;
- if (SDValue SrcVector = IsSplatVector(V, SplatIdx, DAG))
- return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,
- SrcVector.getValueType().getScalarType(), SrcVector,
- DAG.getIntPtrConstant(SplatIdx, dl));
- return SDValue();
-}
-
static SDValue LowerScalarVariableShift(SDValue Op, SelectionDAG &DAG,
const X86Subtarget &Subtarget) {
MVT VT = Op.getSimpleValueType();
unsigned X86OpcI = getTargetVShiftUniformOpcode(Opcode, false);
unsigned X86OpcV = getTargetVShiftUniformOpcode(Opcode, true);
- if (SDValue BaseShAmt = GetSplatValue(Amt, dl, DAG)) {
+ if (SDValue BaseShAmt = DAG.getSplatValue(Amt)) {
if (SupportedVectorShiftWithBaseAmnt(VT, Subtarget, Opcode)) {
MVT EltVT = VT.getVectorElementType();
assert(EltVT.bitsLE(MVT::i64) && "Unexpected element type!");
;
; AVX2-LABEL: splatvar_rotate_v4i64:
; AVX2: # %bb.0:
-; AVX2-NEXT: vpsllq %xmm1, %ymm0, %ymm2
-; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [64,64]
-; AVX2-NEXT: vpsubq %xmm1, %xmm3, %xmm1
-; AVX2-NEXT: vpsrlq %xmm1, %ymm0, %ymm0
-; AVX2-NEXT: vpor %ymm0, %ymm2, %ymm0
+; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [64,64]
+; AVX2-NEXT: vpsubq %xmm1, %xmm2, %xmm2
+; AVX2-NEXT: vpsllq %xmm1, %ymm0, %ymm1
+; AVX2-NEXT: vpsrlq %xmm2, %ymm0, %ymm0
+; AVX2-NEXT: vpor %ymm0, %ymm1, %ymm0
; AVX2-NEXT: retq
;
; AVX512F-LABEL: splatvar_rotate_v4i64:
; AVX512BW-LABEL: splatvar_rotate_v32i16:
; AVX512BW: # %bb.0:
; AVX512BW-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
-; AVX512BW-NEXT: vpsllw %xmm2, %zmm0, %zmm2
; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16]
; AVX512BW-NEXT: vpsubw %xmm1, %xmm3, %xmm1
; AVX512BW-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
+; AVX512BW-NEXT: vpsllw %xmm2, %zmm0, %zmm2
; AVX512BW-NEXT: vpsrlw %xmm1, %zmm0, %zmm0
; AVX512BW-NEXT: vporq %zmm0, %zmm2, %zmm0
; AVX512BW-NEXT: retq
; AVX512VLBW-LABEL: splatvar_rotate_v32i16:
; AVX512VLBW: # %bb.0:
; AVX512VLBW-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
-; AVX512VLBW-NEXT: vpsllw %xmm2, %zmm0, %zmm2
; AVX512VLBW-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16]
; AVX512VLBW-NEXT: vpsubw %xmm1, %xmm3, %xmm1
; AVX512VLBW-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
+; AVX512VLBW-NEXT: vpsllw %xmm2, %zmm0, %zmm2
; AVX512VLBW-NEXT: vpsrlw %xmm1, %zmm0, %zmm0
; AVX512VLBW-NEXT: vporq %zmm0, %zmm2, %zmm0
; AVX512VLBW-NEXT: retq
; AVX512BW-LABEL: splatvar_rotate_v64i8:
; AVX512BW: # %bb.0:
; AVX512BW-NEXT: vpmovzxbq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
+; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm3 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8]
+; AVX512BW-NEXT: vpsubb %xmm1, %xmm3, %xmm1
+; AVX512BW-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
; AVX512BW-NEXT: vpsllw %xmm2, %zmm0, %zmm3
; AVX512BW-NEXT: vpternlogd $255, %zmm4, %zmm4, %zmm4
; AVX512BW-NEXT: vpsllw %xmm2, %zmm4, %zmm2
; AVX512BW-NEXT: vpbroadcastb %xmm2, %zmm2
; AVX512BW-NEXT: vpandq %zmm2, %zmm3, %zmm2
-; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm3 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8]
-; AVX512BW-NEXT: vpsubb %xmm1, %xmm3, %xmm1
-; AVX512BW-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
; AVX512BW-NEXT: vpsrlw %xmm1, %zmm0, %zmm0
; AVX512BW-NEXT: vpsrlw %xmm1, %zmm4, %zmm1
; AVX512BW-NEXT: vpsrlw $8, %zmm1, %zmm1
; AVX512VLBW-LABEL: splatvar_rotate_v64i8:
; AVX512VLBW: # %bb.0:
; AVX512VLBW-NEXT: vpmovzxbq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
+; AVX512VLBW-NEXT: vmovdqa {{.*#+}} xmm3 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8]
+; AVX512VLBW-NEXT: vpsubb %xmm1, %xmm3, %xmm1
+; AVX512VLBW-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
; AVX512VLBW-NEXT: vpsllw %xmm2, %zmm0, %zmm3
; AVX512VLBW-NEXT: vpternlogd $255, %zmm4, %zmm4, %zmm4
; AVX512VLBW-NEXT: vpsllw %xmm2, %zmm4, %zmm2
; AVX512VLBW-NEXT: vpbroadcastb %xmm2, %zmm2
; AVX512VLBW-NEXT: vpandq %zmm2, %zmm3, %zmm2
-; AVX512VLBW-NEXT: vmovdqa {{.*#+}} xmm3 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8]
-; AVX512VLBW-NEXT: vpsubb %xmm1, %xmm3, %xmm1
-; AVX512VLBW-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
; AVX512VLBW-NEXT: vpsrlw %xmm1, %zmm0, %zmm0
; AVX512VLBW-NEXT: vpsrlw %xmm1, %zmm4, %zmm1
; AVX512VLBW-NEXT: vpsrlw $8, %zmm1, %zmm1
define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, <8 x i32> %b) nounwind {
; AVX1-LABEL: splatvar_shift_v8i32:
; AVX1: # %bb.0:
-; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
; AVX1-NEXT: vpsrad %xmm1, %xmm2, %xmm2
; AVX1-NEXT: vpsrad %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
;
; XOPAVX1-LABEL: splatvar_shift_v8i32:
; XOPAVX1: # %bb.0:
-; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
; XOPAVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
+; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
; XOPAVX1-NEXT: vpsrad %xmm1, %xmm2, %xmm2
; XOPAVX1-NEXT: vpsrad %xmm1, %xmm0, %xmm0
; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
;
; X32-AVX1-LABEL: splatvar_shift_v8i32:
; X32-AVX1: # %bb.0:
-; X32-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
; X32-AVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
+; X32-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
; X32-AVX1-NEXT: vpsrad %xmm1, %xmm2, %xmm2
; X32-AVX1-NEXT: vpsrad %xmm1, %xmm0, %xmm0
; X32-AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
define <16 x i16> @splatvar_shift_v16i16(<16 x i16> %a, <16 x i16> %b) nounwind {
; AVX1-LABEL: splatvar_shift_v16i16:
; AVX1: # %bb.0:
-; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
; AVX1-NEXT: vpsraw %xmm1, %xmm2, %xmm2
; AVX1-NEXT: vpsraw %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
;
; XOPAVX1-LABEL: splatvar_shift_v16i16:
; XOPAVX1: # %bb.0:
-; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
; XOPAVX1-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
+; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
; XOPAVX1-NEXT: vpsraw %xmm1, %xmm2, %xmm2
; XOPAVX1-NEXT: vpsraw %xmm1, %xmm0, %xmm0
; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
;
; X32-AVX1-LABEL: splatvar_shift_v16i16:
; X32-AVX1: # %bb.0:
-; X32-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
; X32-AVX1-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
+; X32-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
; X32-AVX1-NEXT: vpsraw %xmm1, %xmm2, %xmm2
; X32-AVX1-NEXT: vpsraw %xmm1, %xmm0, %xmm0
; X32-AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, <32 x i8> %b) nounwind {
; AVX1-LABEL: splatvar_shift_v32i8:
; AVX1: # %bb.0:
-; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
; AVX1-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
; AVX1-NEXT: vpsrlw %xmm1, %xmm2, %xmm2
; AVX1-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3
; AVX1-NEXT: vpsrlw %xmm1, %xmm3, %xmm3
;
; X32-AVX1-LABEL: splatvar_shift_v32i8:
; X32-AVX1: # %bb.0:
-; X32-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
; X32-AVX1-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
+; X32-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
; X32-AVX1-NEXT: vpsrlw %xmm1, %xmm2, %xmm2
; X32-AVX1-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3
; X32-AVX1-NEXT: vpsrlw %xmm1, %xmm3, %xmm3
define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, <8 x i32> %b) nounwind {
; AVX1-LABEL: splatvar_shift_v8i32:
; AVX1: # %bb.0:
-; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
; AVX1-NEXT: vpsrld %xmm1, %xmm2, %xmm2
; AVX1-NEXT: vpsrld %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
;
; XOPAVX1-LABEL: splatvar_shift_v8i32:
; XOPAVX1: # %bb.0:
-; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
; XOPAVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
+; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
; XOPAVX1-NEXT: vpsrld %xmm1, %xmm2, %xmm2
; XOPAVX1-NEXT: vpsrld %xmm1, %xmm0, %xmm0
; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
;
; X32-AVX1-LABEL: splatvar_shift_v8i32:
; X32-AVX1: # %bb.0:
-; X32-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
; X32-AVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
+; X32-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
; X32-AVX1-NEXT: vpsrld %xmm1, %xmm2, %xmm2
; X32-AVX1-NEXT: vpsrld %xmm1, %xmm0, %xmm0
; X32-AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
define <16 x i16> @splatvar_shift_v16i16(<16 x i16> %a, <16 x i16> %b) nounwind {
; AVX1-LABEL: splatvar_shift_v16i16:
; AVX1: # %bb.0:
-; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
; AVX1-NEXT: vpsrlw %xmm1, %xmm2, %xmm2
; AVX1-NEXT: vpsrlw %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
;
; XOPAVX1-LABEL: splatvar_shift_v16i16:
; XOPAVX1: # %bb.0:
-; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
; XOPAVX1-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
+; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
; XOPAVX1-NEXT: vpsrlw %xmm1, %xmm2, %xmm2
; XOPAVX1-NEXT: vpsrlw %xmm1, %xmm0, %xmm0
; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
;
; X32-AVX1-LABEL: splatvar_shift_v16i16:
; X32-AVX1: # %bb.0:
-; X32-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
; X32-AVX1-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
+; X32-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
; X32-AVX1-NEXT: vpsrlw %xmm1, %xmm2, %xmm2
; X32-AVX1-NEXT: vpsrlw %xmm1, %xmm0, %xmm0
; X32-AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, <32 x i8> %b) nounwind {
; AVX1-LABEL: splatvar_shift_v32i8:
; AVX1: # %bb.0:
-; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
; AVX1-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
; AVX1-NEXT: vpsrlw %xmm1, %xmm2, %xmm2
; AVX1-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3
; AVX1-NEXT: vpsrlw %xmm1, %xmm3, %xmm3
;
; X32-AVX1-LABEL: splatvar_shift_v32i8:
; X32-AVX1: # %bb.0:
-; X32-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
; X32-AVX1-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
+; X32-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
; X32-AVX1-NEXT: vpsrlw %xmm1, %xmm2, %xmm2
; X32-AVX1-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3
; X32-AVX1-NEXT: vpsrlw %xmm1, %xmm3, %xmm3
define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, <8 x i32> %b) nounwind {
; AVX1-LABEL: splatvar_shift_v8i32:
; AVX1: # %bb.0:
-; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
; AVX1-NEXT: vpslld %xmm1, %xmm2, %xmm2
; AVX1-NEXT: vpslld %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
;
; XOPAVX1-LABEL: splatvar_shift_v8i32:
; XOPAVX1: # %bb.0:
-; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
; XOPAVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
+; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
; XOPAVX1-NEXT: vpslld %xmm1, %xmm2, %xmm2
; XOPAVX1-NEXT: vpslld %xmm1, %xmm0, %xmm0
; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
;
; X32-AVX1-LABEL: splatvar_shift_v8i32:
; X32-AVX1: # %bb.0:
-; X32-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
; X32-AVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
+; X32-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
; X32-AVX1-NEXT: vpslld %xmm1, %xmm2, %xmm2
; X32-AVX1-NEXT: vpslld %xmm1, %xmm0, %xmm0
; X32-AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
define <16 x i16> @splatvar_shift_v16i16(<16 x i16> %a, <16 x i16> %b) nounwind {
; AVX1-LABEL: splatvar_shift_v16i16:
; AVX1: # %bb.0:
-; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
; AVX1-NEXT: vpsllw %xmm1, %xmm2, %xmm2
; AVX1-NEXT: vpsllw %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
;
; XOPAVX1-LABEL: splatvar_shift_v16i16:
; XOPAVX1: # %bb.0:
-; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
; XOPAVX1-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
+; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
; XOPAVX1-NEXT: vpsllw %xmm1, %xmm2, %xmm2
; XOPAVX1-NEXT: vpsllw %xmm1, %xmm0, %xmm0
; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
;
; X32-AVX1-LABEL: splatvar_shift_v16i16:
; X32-AVX1: # %bb.0:
-; X32-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
; X32-AVX1-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
+; X32-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
; X32-AVX1-NEXT: vpsllw %xmm1, %xmm2, %xmm2
; X32-AVX1-NEXT: vpsllw %xmm1, %xmm0, %xmm0
; X32-AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, <32 x i8> %b) nounwind {
; AVX1-LABEL: splatvar_shift_v32i8:
; AVX1: # %bb.0:
-; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
; AVX1-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
; AVX1-NEXT: vpsllw %xmm1, %xmm2, %xmm2
; AVX1-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3
; AVX1-NEXT: vpsllw %xmm1, %xmm3, %xmm3
;
; X32-AVX1-LABEL: splatvar_shift_v32i8:
; X32-AVX1: # %bb.0:
-; X32-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
; X32-AVX1-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
+; X32-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
; X32-AVX1-NEXT: vpsllw %xmm1, %xmm2, %xmm2
; X32-AVX1-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3
; X32-AVX1-NEXT: vpsllw %xmm1, %xmm3, %xmm3