From: Simon Pilgrim Date: Fri, 4 Dec 2020 11:06:21 +0000 (+0000) Subject: [X86] Let VBMI2 non-VLX targets still use funnel shifts instructions X-Git-Tag: llvmorg-13-init~4381 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=df1ddc42345356f575088c4c80f9cf54d1461e6f;p=platform%2Fupstream%2Fllvm.git [X86] Let VBMI2 non-VLX targets still use funnel shifts instructions --- diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 13a2d8b..9bfd7ca 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -1709,7 +1709,9 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, } if (Subtarget.hasVBMI2()) { - for (auto VT : { MVT::v32i16, MVT::v16i32, MVT::v8i64 }) { + for (auto VT : { MVT::v8i16, MVT::v4i32, MVT::v2i64, + MVT::v16i16, MVT::v8i32, MVT::v4i64, + MVT::v32i16, MVT::v16i32, MVT::v8i64 }) { setOperationAction(ISD::FSHL, VT, Custom); setOperationAction(ISD::FSHR, VT, Custom); } @@ -1879,15 +1881,6 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setTruncStoreAction(MVT::v8i16, MVT::v8i8, Legal); } - if (Subtarget.hasVBMI2()) { - // TODO: Make these legal even without VLX? - for (auto VT : { MVT::v8i16, MVT::v4i32, MVT::v2i64, - MVT::v16i16, MVT::v8i32, MVT::v4i64 }) { - setOperationAction(ISD::FSHL, VT, Custom); - setOperationAction(ISD::FSHR, VT, Custom); - } - } - setOperationAction(ISD::TRUNCATE, MVT::v16i32, Custom); setOperationAction(ISD::TRUNCATE, MVT::v8i64, Custom); setOperationAction(ISD::TRUNCATE, MVT::v16i64, Custom); @@ -19453,15 +19446,29 @@ static SDValue LowerFunnelShift(SDValue Op, const X86Subtarget &Subtarget, if (IsFSHR) std::swap(Op0, Op1); + // With AVX512, but not VLX we need to widen to get a 512-bit result type. + if (!Subtarget.hasVLX() && !VT.is512BitVector()) { + Op0 = widenSubVector(Op0, false, Subtarget, DAG, DL, 512); + Op1 = widenSubVector(Op1, false, Subtarget, DAG, DL, 512); + } + + SDValue Funnel; APInt APIntShiftAmt; + MVT ResultVT = Op0.getSimpleValueType(); if (X86::isConstantSplat(Amt, APIntShiftAmt)) { uint64_t ShiftAmt = APIntShiftAmt.urem(VT.getScalarSizeInBits()); - return DAG.getNode(IsFSHR ? X86ISD::VSHRD : X86ISD::VSHLD, DL, VT, Op0, - Op1, DAG.getTargetConstant(ShiftAmt, DL, MVT::i8)); - } - - return DAG.getNode(IsFSHR ? X86ISD::VSHRDV : X86ISD::VSHLDV, DL, VT, - Op0, Op1, Amt); + Funnel = + DAG.getNode(IsFSHR ? X86ISD::VSHRD : X86ISD::VSHLD, DL, ResultVT, Op0, + Op1, DAG.getTargetConstant(ShiftAmt, DL, MVT::i8)); + } else { + if (!Subtarget.hasVLX() && !VT.is512BitVector()) + Amt = widenSubVector(Amt, false, Subtarget, DAG, DL, 512); + Funnel = DAG.getNode(IsFSHR ? X86ISD::VSHRDV : X86ISD::VSHLDV, DL, + ResultVT, Op0, Op1, Amt); + } + if (!Subtarget.hasVLX() && !VT.is512BitVector()) + Funnel = extractSubVector(Funnel, 0, DAG, DL, VT.getSizeInBits()); + return Funnel; } assert( (VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32 || VT == MVT::i64) && diff --git a/llvm/test/CodeGen/X86/vector-fshl-128.ll b/llvm/test/CodeGen/X86/vector-fshl-128.ll index d760ebd..9b34604 100644 --- a/llvm/test/CodeGen/X86/vector-fshl-128.ll +++ b/llvm/test/CodeGen/X86/vector-fshl-128.ll @@ -128,13 +128,12 @@ define <2 x i64> @var_funnnel_v2i64(<2 x i64> %x, <2 x i64> %y, <2 x i64> %amt) ; ; AVX512VBMI2-LABEL: var_funnnel_v2i64: ; AVX512VBMI2: # %bb.0: -; AVX512VBMI2-NEXT: vmovdqa {{.*#+}} xmm3 = [63,63] -; AVX512VBMI2-NEXT: vpandn %xmm3, %xmm2, %xmm4 -; AVX512VBMI2-NEXT: vpsrlq $1, %xmm1, %xmm1 -; AVX512VBMI2-NEXT: vpsrlvq %xmm4, %xmm1, %xmm1 -; AVX512VBMI2-NEXT: vpand %xmm3, %xmm2, %xmm2 -; AVX512VBMI2-NEXT: vpsllvq %xmm2, %xmm0, %xmm0 -; AVX512VBMI2-NEXT: vpor %xmm1, %xmm0, %xmm0 +; AVX512VBMI2-NEXT: # kill: def $xmm2 killed $xmm2 def $zmm2 +; AVX512VBMI2-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 +; AVX512VBMI2-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 +; AVX512VBMI2-NEXT: vpshldvq %zmm2, %zmm1, %zmm0 +; AVX512VBMI2-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 +; AVX512VBMI2-NEXT: vzeroupper ; AVX512VBMI2-NEXT: retq ; ; AVX512VLBW-LABEL: var_funnnel_v2i64: @@ -336,13 +335,12 @@ define <4 x i32> @var_funnnel_v4i32(<4 x i32> %x, <4 x i32> %y, <4 x i32> %amt) ; ; AVX512VBMI2-LABEL: var_funnnel_v4i32: ; AVX512VBMI2: # %bb.0: -; AVX512VBMI2-NEXT: vpbroadcastd {{.*#+}} xmm3 = [31,31,31,31] -; AVX512VBMI2-NEXT: vpandn %xmm3, %xmm2, %xmm4 -; AVX512VBMI2-NEXT: vpsrld $1, %xmm1, %xmm1 -; AVX512VBMI2-NEXT: vpsrlvd %xmm4, %xmm1, %xmm1 -; AVX512VBMI2-NEXT: vpand %xmm3, %xmm2, %xmm2 -; AVX512VBMI2-NEXT: vpsllvd %xmm2, %xmm0, %xmm0 -; AVX512VBMI2-NEXT: vpor %xmm1, %xmm0, %xmm0 +; AVX512VBMI2-NEXT: # kill: def $xmm2 killed $xmm2 def $zmm2 +; AVX512VBMI2-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 +; AVX512VBMI2-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 +; AVX512VBMI2-NEXT: vpshldvd %zmm2, %zmm1, %zmm0 +; AVX512VBMI2-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 +; AVX512VBMI2-NEXT: vzeroupper ; AVX512VBMI2-NEXT: retq ; ; AVX512VLBW-LABEL: var_funnnel_v4i32: @@ -633,14 +631,11 @@ define <8 x i16> @var_funnnel_v8i16(<8 x i16> %x, <8 x i16> %y, <8 x i16> %amt) ; ; AVX512VBMI2-LABEL: var_funnnel_v8i16: ; AVX512VBMI2: # %bb.0: +; AVX512VBMI2-NEXT: # kill: def $xmm2 killed $xmm2 def $zmm2 +; AVX512VBMI2-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 ; AVX512VBMI2-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; AVX512VBMI2-NEXT: vmovdqa {{.*#+}} xmm3 = [15,15,15,15,15,15,15,15] -; AVX512VBMI2-NEXT: vpandn %xmm3, %xmm2, %xmm4 -; AVX512VBMI2-NEXT: vpsrlw $1, %xmm1, %xmm1 -; AVX512VBMI2-NEXT: vpsrlvw %zmm4, %zmm1, %zmm1 -; AVX512VBMI2-NEXT: vpand %xmm3, %xmm2, %xmm2 -; AVX512VBMI2-NEXT: vpsllvw %zmm2, %zmm0, %zmm0 -; AVX512VBMI2-NEXT: vpor %xmm1, %xmm0, %xmm0 +; AVX512VBMI2-NEXT: vpshldvw %zmm2, %zmm1, %zmm0 +; AVX512VBMI2-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 ; AVX512VBMI2-NEXT: vzeroupper ; AVX512VBMI2-NEXT: retq ; @@ -1177,14 +1172,12 @@ define <2 x i64> @splatvar_funnnel_v2i64(<2 x i64> %x, <2 x i64> %y, <2 x i64> % ; ; AVX512VBMI2-LABEL: splatvar_funnnel_v2i64: ; AVX512VBMI2: # %bb.0: -; AVX512VBMI2-NEXT: vpbroadcastq %xmm2, %xmm3 -; AVX512VBMI2-NEXT: vmovdqa {{.*#+}} xmm4 = [63,63] -; AVX512VBMI2-NEXT: vpand %xmm4, %xmm2, %xmm2 -; AVX512VBMI2-NEXT: vpsllq %xmm2, %xmm0, %xmm0 -; AVX512VBMI2-NEXT: vpandn %xmm4, %xmm3, %xmm2 -; AVX512VBMI2-NEXT: vpsrlq $1, %xmm1, %xmm1 -; AVX512VBMI2-NEXT: vpsrlvq %xmm2, %xmm1, %xmm1 -; AVX512VBMI2-NEXT: vpor %xmm1, %xmm0, %xmm0 +; AVX512VBMI2-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 +; AVX512VBMI2-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 +; AVX512VBMI2-NEXT: vpbroadcastq %xmm2, %xmm2 +; AVX512VBMI2-NEXT: vpshldvq %zmm2, %zmm1, %zmm0 +; AVX512VBMI2-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 +; AVX512VBMI2-NEXT: vzeroupper ; AVX512VBMI2-NEXT: retq ; ; AVX512VLBW-LABEL: splatvar_funnnel_v2i64: @@ -1389,15 +1382,12 @@ define <4 x i32> @splatvar_funnnel_v4i32(<4 x i32> %x, <4 x i32> %y, <4 x i32> % ; ; AVX512VBMI2-LABEL: splatvar_funnnel_v4i32: ; AVX512VBMI2: # %bb.0: -; AVX512VBMI2-NEXT: vpbroadcastd %xmm2, %xmm3 -; AVX512VBMI2-NEXT: vpbroadcastd {{.*#+}} xmm4 = [31,31,31,31] -; AVX512VBMI2-NEXT: vpandn %xmm4, %xmm3, %xmm3 -; AVX512VBMI2-NEXT: vpsrld $1, %xmm1, %xmm1 -; AVX512VBMI2-NEXT: vpsrlvd %xmm3, %xmm1, %xmm1 -; AVX512VBMI2-NEXT: vpand %xmm4, %xmm2, %xmm2 -; AVX512VBMI2-NEXT: vpmovzxdq {{.*#+}} xmm2 = xmm2[0],zero,xmm2[1],zero -; AVX512VBMI2-NEXT: vpslld %xmm2, %xmm0, %xmm0 -; AVX512VBMI2-NEXT: vpor %xmm1, %xmm0, %xmm0 +; AVX512VBMI2-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 +; AVX512VBMI2-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 +; AVX512VBMI2-NEXT: vpbroadcastd %xmm2, %xmm2 +; AVX512VBMI2-NEXT: vpshldvd %zmm2, %zmm1, %zmm0 +; AVX512VBMI2-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 +; AVX512VBMI2-NEXT: vzeroupper ; AVX512VBMI2-NEXT: retq ; ; AVX512VLBW-LABEL: splatvar_funnnel_v4i32: @@ -1657,15 +1647,11 @@ define <8 x i16> @splatvar_funnnel_v8i16(<8 x i16> %x, <8 x i16> %y, <8 x i16> % ; ; AVX512VBMI2-LABEL: splatvar_funnnel_v8i16: ; AVX512VBMI2: # %bb.0: -; AVX512VBMI2-NEXT: vpbroadcastw %xmm2, %xmm3 -; AVX512VBMI2-NEXT: vpsrlw $1, %xmm1, %xmm1 -; AVX512VBMI2-NEXT: vmovdqa {{.*#+}} xmm4 = [15,15,15,15,15,15,15,15] -; AVX512VBMI2-NEXT: vpandn %xmm4, %xmm3, %xmm3 -; AVX512VBMI2-NEXT: vpsrlvw %zmm3, %zmm1, %zmm1 -; AVX512VBMI2-NEXT: vpand %xmm4, %xmm2, %xmm2 -; AVX512VBMI2-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero -; AVX512VBMI2-NEXT: vpsllw %xmm2, %xmm0, %xmm0 -; AVX512VBMI2-NEXT: vpor %xmm1, %xmm0, %xmm0 +; AVX512VBMI2-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 +; AVX512VBMI2-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 +; AVX512VBMI2-NEXT: vpbroadcastw %xmm2, %xmm2 +; AVX512VBMI2-NEXT: vpshldvw %zmm2, %zmm1, %zmm0 +; AVX512VBMI2-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 ; AVX512VBMI2-NEXT: vzeroupper ; AVX512VBMI2-NEXT: retq ; @@ -2468,9 +2454,12 @@ define <2 x i64> @constant_funnnel_v2i64(<2 x i64> %x, <2 x i64> %y) nounwind { ; ; AVX512VBMI2-LABEL: constant_funnnel_v2i64: ; AVX512VBMI2: # %bb.0: -; AVX512VBMI2-NEXT: vpsrlvq {{.*}}(%rip), %xmm1, %xmm1 -; AVX512VBMI2-NEXT: vpsllvq {{.*}}(%rip), %xmm0, %xmm0 -; AVX512VBMI2-NEXT: vpor %xmm1, %xmm0, %xmm0 +; AVX512VBMI2-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 +; AVX512VBMI2-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 +; AVX512VBMI2-NEXT: vmovdqa {{.*#+}} xmm2 = [4,14] +; AVX512VBMI2-NEXT: vpshldvq %zmm2, %zmm1, %zmm0 +; AVX512VBMI2-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 +; AVX512VBMI2-NEXT: vzeroupper ; AVX512VBMI2-NEXT: retq ; ; AVX512VLBW-LABEL: constant_funnnel_v2i64: @@ -2606,9 +2595,12 @@ define <4 x i32> @constant_funnnel_v4i32(<4 x i32> %x, <4 x i32> %y) nounwind { ; ; AVX512VBMI2-LABEL: constant_funnnel_v4i32: ; AVX512VBMI2: # %bb.0: -; AVX512VBMI2-NEXT: vpsrlvd {{.*}}(%rip), %xmm1, %xmm1 -; AVX512VBMI2-NEXT: vpsllvd {{.*}}(%rip), %xmm0, %xmm0 -; AVX512VBMI2-NEXT: vpor %xmm1, %xmm0, %xmm0 +; AVX512VBMI2-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 +; AVX512VBMI2-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 +; AVX512VBMI2-NEXT: vmovdqa {{.*#+}} xmm2 = [4,5,6,7] +; AVX512VBMI2-NEXT: vpshldvd %zmm2, %zmm1, %zmm0 +; AVX512VBMI2-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 +; AVX512VBMI2-NEXT: vzeroupper ; AVX512VBMI2-NEXT: retq ; ; AVX512VLBW-LABEL: constant_funnnel_v4i32: @@ -2710,13 +2702,11 @@ define <8 x i16> @constant_funnnel_v8i16(<8 x i16> %x, <8 x i16> %y) nounwind { ; ; AVX512VBMI2-LABEL: constant_funnnel_v8i16: ; AVX512VBMI2: # %bb.0: +; AVX512VBMI2-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 ; AVX512VBMI2-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512VBMI2-NEXT: vmovdqa {{.*#+}} xmm2 = [0,1,2,3,4,5,6,7] -; AVX512VBMI2-NEXT: vpsllvw %zmm2, %zmm0, %zmm0 -; AVX512VBMI2-NEXT: vmovdqa {{.*#+}} xmm2 = [15,14,13,12,11,10,9,8] -; AVX512VBMI2-NEXT: vpsrlw $1, %xmm1, %xmm1 -; AVX512VBMI2-NEXT: vpsrlvw %zmm2, %zmm1, %zmm1 -; AVX512VBMI2-NEXT: vpor %xmm1, %xmm0, %xmm0 +; AVX512VBMI2-NEXT: vpshldvw %zmm2, %zmm1, %zmm0 +; AVX512VBMI2-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 ; AVX512VBMI2-NEXT: vzeroupper ; AVX512VBMI2-NEXT: retq ; @@ -3005,9 +2995,11 @@ define <2 x i64> @splatconstant_funnnel_v2i64(<2 x i64> %x, <2 x i64> %y) nounwi ; ; AVX512VBMI2-LABEL: splatconstant_funnnel_v2i64: ; AVX512VBMI2: # %bb.0: -; AVX512VBMI2-NEXT: vpsrlq $50, %xmm1, %xmm1 -; AVX512VBMI2-NEXT: vpsllq $14, %xmm0, %xmm0 -; AVX512VBMI2-NEXT: vpor %xmm1, %xmm0, %xmm0 +; AVX512VBMI2-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 +; AVX512VBMI2-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 +; AVX512VBMI2-NEXT: vpshldq $14, %zmm1, %zmm0, %zmm0 +; AVX512VBMI2-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 +; AVX512VBMI2-NEXT: vzeroupper ; AVX512VBMI2-NEXT: retq ; ; AVX512VLBW-LABEL: splatconstant_funnnel_v2i64: @@ -3079,9 +3071,11 @@ define <4 x i32> @splatconstant_funnnel_v4i32(<4 x i32> %x, <4 x i32> %y) nounwi ; ; AVX512VBMI2-LABEL: splatconstant_funnnel_v4i32: ; AVX512VBMI2: # %bb.0: -; AVX512VBMI2-NEXT: vpsrld $28, %xmm1, %xmm1 -; AVX512VBMI2-NEXT: vpslld $4, %xmm0, %xmm0 -; AVX512VBMI2-NEXT: vpor %xmm1, %xmm0, %xmm0 +; AVX512VBMI2-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 +; AVX512VBMI2-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 +; AVX512VBMI2-NEXT: vpshldd $4, %zmm1, %zmm0, %zmm0 +; AVX512VBMI2-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 +; AVX512VBMI2-NEXT: vzeroupper ; AVX512VBMI2-NEXT: retq ; ; AVX512VLBW-LABEL: splatconstant_funnnel_v4i32: @@ -3151,9 +3145,11 @@ define <8 x i16> @splatconstant_funnnel_v8i16(<8 x i16> %x, <8 x i16> %y) nounwi ; ; AVX512VBMI2-LABEL: splatconstant_funnnel_v8i16: ; AVX512VBMI2: # %bb.0: -; AVX512VBMI2-NEXT: vpsrlw $9, %xmm1, %xmm1 -; AVX512VBMI2-NEXT: vpsllw $7, %xmm0, %xmm0 -; AVX512VBMI2-NEXT: vpor %xmm1, %xmm0, %xmm0 +; AVX512VBMI2-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 +; AVX512VBMI2-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 +; AVX512VBMI2-NEXT: vpshldw $7, %zmm1, %zmm0, %zmm0 +; AVX512VBMI2-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 +; AVX512VBMI2-NEXT: vzeroupper ; AVX512VBMI2-NEXT: retq ; ; AVX512VLBW-LABEL: splatconstant_funnnel_v8i16: diff --git a/llvm/test/CodeGen/X86/vector-fshl-256.ll b/llvm/test/CodeGen/X86/vector-fshl-256.ll index 3ca4333..d673bde 100644 --- a/llvm/test/CodeGen/X86/vector-fshl-256.ll +++ b/llvm/test/CodeGen/X86/vector-fshl-256.ll @@ -98,13 +98,11 @@ define <4 x i64> @var_funnnel_v4i64(<4 x i64> %x, <4 x i64> %y, <4 x i64> %amt) ; ; AVX512VBMI2-LABEL: var_funnnel_v4i64: ; AVX512VBMI2: # %bb.0: -; AVX512VBMI2-NEXT: vpbroadcastq {{.*#+}} ymm3 = [63,63,63,63] -; AVX512VBMI2-NEXT: vpandn %ymm3, %ymm2, %ymm4 -; AVX512VBMI2-NEXT: vpsrlq $1, %ymm1, %ymm1 -; AVX512VBMI2-NEXT: vpsrlvq %ymm4, %ymm1, %ymm1 -; AVX512VBMI2-NEXT: vpand %ymm3, %ymm2, %ymm2 -; AVX512VBMI2-NEXT: vpsllvq %ymm2, %ymm0, %ymm0 -; AVX512VBMI2-NEXT: vpor %ymm1, %ymm0, %ymm0 +; AVX512VBMI2-NEXT: # kill: def $ymm2 killed $ymm2 def $zmm2 +; AVX512VBMI2-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 +; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 +; AVX512VBMI2-NEXT: vpshldvq %zmm2, %zmm1, %zmm0 +; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 ; AVX512VBMI2-NEXT: retq ; ; AVX512VLBW-LABEL: var_funnnel_v4i64: @@ -255,13 +253,11 @@ define <8 x i32> @var_funnnel_v8i32(<8 x i32> %x, <8 x i32> %y, <8 x i32> %amt) ; ; AVX512VBMI2-LABEL: var_funnnel_v8i32: ; AVX512VBMI2: # %bb.0: -; AVX512VBMI2-NEXT: vpbroadcastd {{.*#+}} ymm3 = [31,31,31,31,31,31,31,31] -; AVX512VBMI2-NEXT: vpandn %ymm3, %ymm2, %ymm4 -; AVX512VBMI2-NEXT: vpsrld $1, %ymm1, %ymm1 -; AVX512VBMI2-NEXT: vpsrlvd %ymm4, %ymm1, %ymm1 -; AVX512VBMI2-NEXT: vpand %ymm3, %ymm2, %ymm2 -; AVX512VBMI2-NEXT: vpsllvd %ymm2, %ymm0, %ymm0 -; AVX512VBMI2-NEXT: vpor %ymm1, %ymm0, %ymm0 +; AVX512VBMI2-NEXT: # kill: def $ymm2 killed $ymm2 def $zmm2 +; AVX512VBMI2-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 +; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 +; AVX512VBMI2-NEXT: vpshldvd %zmm2, %zmm1, %zmm0 +; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 ; AVX512VBMI2-NEXT: retq ; ; AVX512VLBW-LABEL: var_funnnel_v8i32: @@ -457,14 +453,11 @@ define <16 x i16> @var_funnnel_v16i16(<16 x i16> %x, <16 x i16> %y, <16 x i16> % ; ; AVX512VBMI2-LABEL: var_funnnel_v16i16: ; AVX512VBMI2: # %bb.0: +; AVX512VBMI2-NEXT: # kill: def $ymm2 killed $ymm2 def $zmm2 +; AVX512VBMI2-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 ; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 -; AVX512VBMI2-NEXT: vmovdqa {{.*#+}} ymm3 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; AVX512VBMI2-NEXT: vpandn %ymm3, %ymm2, %ymm4 -; AVX512VBMI2-NEXT: vpsrlw $1, %ymm1, %ymm1 -; AVX512VBMI2-NEXT: vpsrlvw %zmm4, %zmm1, %zmm1 -; AVX512VBMI2-NEXT: vpand %ymm3, %ymm2, %ymm2 -; AVX512VBMI2-NEXT: vpsllvw %zmm2, %zmm0, %zmm0 -; AVX512VBMI2-NEXT: vpor %ymm1, %ymm0, %ymm0 +; AVX512VBMI2-NEXT: vpshldvw %zmm2, %zmm1, %zmm0 +; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 ; AVX512VBMI2-NEXT: retq ; ; AVX512VLBW-LABEL: var_funnnel_v16i16: @@ -901,14 +894,11 @@ define <4 x i64> @splatvar_funnnel_v4i64(<4 x i64> %x, <4 x i64> %y, <4 x i64> % ; ; AVX512VBMI2-LABEL: splatvar_funnnel_v4i64: ; AVX512VBMI2: # %bb.0: -; AVX512VBMI2-NEXT: vpbroadcastq %xmm2, %ymm3 -; AVX512VBMI2-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2 -; AVX512VBMI2-NEXT: vpsllq %xmm2, %ymm0, %ymm0 -; AVX512VBMI2-NEXT: vpbroadcastq {{.*#+}} ymm2 = [63,63,63,63] -; AVX512VBMI2-NEXT: vpandn %ymm2, %ymm3, %ymm2 -; AVX512VBMI2-NEXT: vpsrlq $1, %ymm1, %ymm1 -; AVX512VBMI2-NEXT: vpsrlvq %ymm2, %ymm1, %ymm1 -; AVX512VBMI2-NEXT: vpor %ymm1, %ymm0, %ymm0 +; AVX512VBMI2-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 +; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 +; AVX512VBMI2-NEXT: vpbroadcastq %xmm2, %ymm2 +; AVX512VBMI2-NEXT: vpshldvq %zmm2, %zmm1, %zmm0 +; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 ; AVX512VBMI2-NEXT: retq ; ; AVX512VLBW-LABEL: splatvar_funnnel_v4i64: @@ -1064,15 +1054,11 @@ define <8 x i32> @splatvar_funnnel_v8i32(<8 x i32> %x, <8 x i32> %y, <8 x i32> % ; ; AVX512VBMI2-LABEL: splatvar_funnnel_v8i32: ; AVX512VBMI2: # %bb.0: +; AVX512VBMI2-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 +; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; AVX512VBMI2-NEXT: vpbroadcastd %xmm2, %ymm2 -; AVX512VBMI2-NEXT: vpbroadcastd {{.*#+}} ymm3 = [31,31,31,31,31,31,31,31] -; AVX512VBMI2-NEXT: vpandn %ymm3, %ymm2, %ymm4 -; AVX512VBMI2-NEXT: vpsrld $1, %ymm1, %ymm1 -; AVX512VBMI2-NEXT: vpsrlvd %ymm4, %ymm1, %ymm1 -; AVX512VBMI2-NEXT: vpand %xmm3, %xmm2, %xmm2 -; AVX512VBMI2-NEXT: vpmovzxdq {{.*#+}} xmm2 = xmm2[0],zero,xmm2[1],zero -; AVX512VBMI2-NEXT: vpslld %xmm2, %ymm0, %ymm0 -; AVX512VBMI2-NEXT: vpor %ymm1, %ymm0, %ymm0 +; AVX512VBMI2-NEXT: vpshldvd %zmm2, %zmm1, %zmm0 +; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 ; AVX512VBMI2-NEXT: retq ; ; AVX512VLBW-LABEL: splatvar_funnnel_v8i32: @@ -1249,14 +1235,11 @@ define <16 x i16> @splatvar_funnnel_v16i16(<16 x i16> %x, <16 x i16> %y, <16 x i ; ; AVX512VBMI2-LABEL: splatvar_funnnel_v16i16: ; AVX512VBMI2: # %bb.0: +; AVX512VBMI2-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 +; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; AVX512VBMI2-NEXT: vpbroadcastw %xmm2, %ymm2 -; AVX512VBMI2-NEXT: vpsrlw $1, %ymm1, %ymm1 -; AVX512VBMI2-NEXT: vpandn {{.*}}(%rip), %ymm2, %ymm3 -; AVX512VBMI2-NEXT: vpsrlvw %zmm3, %zmm1, %zmm1 -; AVX512VBMI2-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2 -; AVX512VBMI2-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero -; AVX512VBMI2-NEXT: vpsllw %xmm2, %ymm0, %ymm0 -; AVX512VBMI2-NEXT: vpor %ymm1, %ymm0, %ymm0 +; AVX512VBMI2-NEXT: vpshldvw %zmm2, %zmm1, %zmm0 +; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 ; AVX512VBMI2-NEXT: retq ; ; AVX512VLBW-LABEL: splatvar_funnnel_v16i16: @@ -1939,9 +1922,11 @@ define <4 x i64> @constant_funnnel_v4i64(<4 x i64> %x, <4 x i64> %y) nounwind { ; ; AVX512VBMI2-LABEL: constant_funnnel_v4i64: ; AVX512VBMI2: # %bb.0: -; AVX512VBMI2-NEXT: vpsrlvq {{.*}}(%rip), %ymm1, %ymm1 -; AVX512VBMI2-NEXT: vpsllvq {{.*}}(%rip), %ymm0, %ymm0 -; AVX512VBMI2-NEXT: vpor %ymm1, %ymm0, %ymm0 +; AVX512VBMI2-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 +; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 +; AVX512VBMI2-NEXT: vmovdqa {{.*#+}} ymm2 = [4,14,50,60] +; AVX512VBMI2-NEXT: vpshldvq %zmm2, %zmm1, %zmm0 +; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 ; AVX512VBMI2-NEXT: retq ; ; AVX512VLBW-LABEL: constant_funnnel_v4i64: @@ -2035,9 +2020,11 @@ define <8 x i32> @constant_funnnel_v8i32(<8 x i32> %x, <8 x i32> %y) nounwind { ; ; AVX512VBMI2-LABEL: constant_funnnel_v8i32: ; AVX512VBMI2: # %bb.0: -; AVX512VBMI2-NEXT: vpsrlvd {{.*}}(%rip), %ymm1, %ymm1 -; AVX512VBMI2-NEXT: vpsllvd {{.*}}(%rip), %ymm0, %ymm0 -; AVX512VBMI2-NEXT: vpor %ymm1, %ymm0, %ymm0 +; AVX512VBMI2-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 +; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 +; AVX512VBMI2-NEXT: vmovdqa {{.*#+}} ymm2 = [4,5,6,7,8,9,10,11] +; AVX512VBMI2-NEXT: vpshldvd %zmm2, %zmm1, %zmm0 +; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 ; AVX512VBMI2-NEXT: retq ; ; AVX512VLBW-LABEL: constant_funnnel_v8i32: @@ -2135,13 +2122,11 @@ define <16 x i16> @constant_funnnel_v16i16(<16 x i16> %x, <16 x i16> %y) nounwin ; ; AVX512VBMI2-LABEL: constant_funnnel_v16i16: ; AVX512VBMI2: # %bb.0: +; AVX512VBMI2-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 ; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; AVX512VBMI2-NEXT: vmovdqa {{.*#+}} ymm2 = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15] -; AVX512VBMI2-NEXT: vpsllvw %zmm2, %zmm0, %zmm0 -; AVX512VBMI2-NEXT: vmovdqa {{.*#+}} ymm2 = [15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0] -; AVX512VBMI2-NEXT: vpsrlw $1, %ymm1, %ymm1 -; AVX512VBMI2-NEXT: vpsrlvw %zmm2, %zmm1, %zmm1 -; AVX512VBMI2-NEXT: vpor %ymm1, %ymm0, %ymm0 +; AVX512VBMI2-NEXT: vpshldvw %zmm2, %zmm1, %zmm0 +; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 ; AVX512VBMI2-NEXT: retq ; ; AVX512VLBW-LABEL: constant_funnnel_v16i16: @@ -2449,9 +2434,10 @@ define <4 x i64> @splatconstant_funnnel_v4i64(<4 x i64> %x, <4 x i64> %y) nounwi ; ; AVX512VBMI2-LABEL: splatconstant_funnnel_v4i64: ; AVX512VBMI2: # %bb.0: -; AVX512VBMI2-NEXT: vpsrlq $50, %ymm1, %ymm1 -; AVX512VBMI2-NEXT: vpsllq $14, %ymm0, %ymm0 -; AVX512VBMI2-NEXT: vpor %ymm1, %ymm0, %ymm0 +; AVX512VBMI2-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 +; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 +; AVX512VBMI2-NEXT: vpshldq $14, %zmm1, %zmm0, %zmm0 +; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 ; AVX512VBMI2-NEXT: retq ; ; AVX512VLBW-LABEL: splatconstant_funnnel_v4i64: @@ -2533,9 +2519,10 @@ define <8 x i32> @splatconstant_funnnel_v8i32(<8 x i32> %x, <8 x i32> %y) nounwi ; ; AVX512VBMI2-LABEL: splatconstant_funnnel_v8i32: ; AVX512VBMI2: # %bb.0: -; AVX512VBMI2-NEXT: vpsrld $28, %ymm1, %ymm1 -; AVX512VBMI2-NEXT: vpslld $4, %ymm0, %ymm0 -; AVX512VBMI2-NEXT: vpor %ymm1, %ymm0, %ymm0 +; AVX512VBMI2-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 +; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 +; AVX512VBMI2-NEXT: vpshldd $4, %zmm1, %zmm0, %zmm0 +; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 ; AVX512VBMI2-NEXT: retq ; ; AVX512VLBW-LABEL: splatconstant_funnnel_v8i32: @@ -2617,9 +2604,10 @@ define <16 x i16> @splatconstant_funnnel_v16i16(<16 x i16> %x, <16 x i16> %y) no ; ; AVX512VBMI2-LABEL: splatconstant_funnnel_v16i16: ; AVX512VBMI2: # %bb.0: -; AVX512VBMI2-NEXT: vpsrlw $9, %ymm1, %ymm1 -; AVX512VBMI2-NEXT: vpsllw $7, %ymm0, %ymm0 -; AVX512VBMI2-NEXT: vpor %ymm1, %ymm0, %ymm0 +; AVX512VBMI2-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 +; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 +; AVX512VBMI2-NEXT: vpshldw $7, %zmm1, %zmm0, %zmm0 +; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 ; AVX512VBMI2-NEXT: retq ; ; AVX512VLBW-LABEL: splatconstant_funnnel_v16i16: diff --git a/llvm/test/CodeGen/X86/vector-fshr-128.ll b/llvm/test/CodeGen/X86/vector-fshr-128.ll index 7fc12f3..bee4986 100644 --- a/llvm/test/CodeGen/X86/vector-fshr-128.ll +++ b/llvm/test/CodeGen/X86/vector-fshr-128.ll @@ -128,13 +128,12 @@ define <2 x i64> @var_funnnel_v2i64(<2 x i64> %x, <2 x i64> %y, <2 x i64> %amt) ; ; AVX512VBMI2-LABEL: var_funnnel_v2i64: ; AVX512VBMI2: # %bb.0: -; AVX512VBMI2-NEXT: vmovdqa {{.*#+}} xmm3 = [63,63] -; AVX512VBMI2-NEXT: vpand %xmm3, %xmm2, %xmm4 -; AVX512VBMI2-NEXT: vpsrlvq %xmm4, %xmm1, %xmm1 -; AVX512VBMI2-NEXT: vpandn %xmm3, %xmm2, %xmm2 -; AVX512VBMI2-NEXT: vpsllq $1, %xmm0, %xmm0 -; AVX512VBMI2-NEXT: vpsllvq %xmm2, %xmm0, %xmm0 -; AVX512VBMI2-NEXT: vpor %xmm1, %xmm0, %xmm0 +; AVX512VBMI2-NEXT: # kill: def $xmm2 killed $xmm2 def $zmm2 +; AVX512VBMI2-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 +; AVX512VBMI2-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 +; AVX512VBMI2-NEXT: vpshrdvq %zmm2, %zmm0, %zmm1 +; AVX512VBMI2-NEXT: vmovdqa %xmm1, %xmm0 +; AVX512VBMI2-NEXT: vzeroupper ; AVX512VBMI2-NEXT: retq ; ; AVX512VLBW-LABEL: var_funnnel_v2i64: @@ -337,13 +336,12 @@ define <4 x i32> @var_funnnel_v4i32(<4 x i32> %x, <4 x i32> %y, <4 x i32> %amt) ; ; AVX512VBMI2-LABEL: var_funnnel_v4i32: ; AVX512VBMI2: # %bb.0: -; AVX512VBMI2-NEXT: vpbroadcastd {{.*#+}} xmm3 = [31,31,31,31] -; AVX512VBMI2-NEXT: vpand %xmm3, %xmm2, %xmm4 -; AVX512VBMI2-NEXT: vpsrlvd %xmm4, %xmm1, %xmm1 -; AVX512VBMI2-NEXT: vpandn %xmm3, %xmm2, %xmm2 -; AVX512VBMI2-NEXT: vpslld $1, %xmm0, %xmm0 -; AVX512VBMI2-NEXT: vpsllvd %xmm2, %xmm0, %xmm0 -; AVX512VBMI2-NEXT: vpor %xmm1, %xmm0, %xmm0 +; AVX512VBMI2-NEXT: # kill: def $xmm2 killed $xmm2 def $zmm2 +; AVX512VBMI2-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 +; AVX512VBMI2-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 +; AVX512VBMI2-NEXT: vpshrdvd %zmm2, %zmm0, %zmm1 +; AVX512VBMI2-NEXT: vmovdqa %xmm1, %xmm0 +; AVX512VBMI2-NEXT: vzeroupper ; AVX512VBMI2-NEXT: retq ; ; AVX512VLBW-LABEL: var_funnnel_v4i32: @@ -633,14 +631,11 @@ define <8 x i16> @var_funnnel_v8i16(<8 x i16> %x, <8 x i16> %y, <8 x i16> %amt) ; ; AVX512VBMI2-LABEL: var_funnnel_v8i16: ; AVX512VBMI2: # %bb.0: +; AVX512VBMI2-NEXT: # kill: def $xmm2 killed $xmm2 def $zmm2 ; AVX512VBMI2-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 -; AVX512VBMI2-NEXT: vmovdqa {{.*#+}} xmm3 = [15,15,15,15,15,15,15,15] -; AVX512VBMI2-NEXT: vpand %xmm3, %xmm2, %xmm4 -; AVX512VBMI2-NEXT: vpsrlvw %zmm4, %zmm1, %zmm1 -; AVX512VBMI2-NEXT: vpandn %xmm3, %xmm2, %xmm2 -; AVX512VBMI2-NEXT: vpsllw $1, %xmm0, %xmm0 -; AVX512VBMI2-NEXT: vpsllvw %zmm2, %zmm0, %zmm0 -; AVX512VBMI2-NEXT: vpor %xmm1, %xmm0, %xmm0 +; AVX512VBMI2-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 +; AVX512VBMI2-NEXT: vpshrdvw %zmm2, %zmm0, %zmm1 +; AVX512VBMI2-NEXT: vmovdqa %xmm1, %xmm0 ; AVX512VBMI2-NEXT: vzeroupper ; AVX512VBMI2-NEXT: retq ; @@ -1164,14 +1159,12 @@ define <2 x i64> @splatvar_funnnel_v2i64(<2 x i64> %x, <2 x i64> %y, <2 x i64> % ; ; AVX512VBMI2-LABEL: splatvar_funnnel_v2i64: ; AVX512VBMI2: # %bb.0: -; AVX512VBMI2-NEXT: vpbroadcastq %xmm2, %xmm3 -; AVX512VBMI2-NEXT: vmovdqa {{.*#+}} xmm4 = [63,63] -; AVX512VBMI2-NEXT: vpand %xmm4, %xmm2, %xmm2 -; AVX512VBMI2-NEXT: vpsrlq %xmm2, %xmm1, %xmm1 -; AVX512VBMI2-NEXT: vpandn %xmm4, %xmm3, %xmm2 -; AVX512VBMI2-NEXT: vpsllq $1, %xmm0, %xmm0 -; AVX512VBMI2-NEXT: vpsllvq %xmm2, %xmm0, %xmm0 -; AVX512VBMI2-NEXT: vpor %xmm1, %xmm0, %xmm0 +; AVX512VBMI2-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 +; AVX512VBMI2-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 +; AVX512VBMI2-NEXT: vpbroadcastq %xmm2, %xmm2 +; AVX512VBMI2-NEXT: vpshrdvq %zmm2, %zmm0, %zmm1 +; AVX512VBMI2-NEXT: vmovdqa %xmm1, %xmm0 +; AVX512VBMI2-NEXT: vzeroupper ; AVX512VBMI2-NEXT: retq ; ; AVX512VLBW-LABEL: splatvar_funnnel_v2i64: @@ -1351,15 +1344,12 @@ define <4 x i32> @splatvar_funnnel_v4i32(<4 x i32> %x, <4 x i32> %y, <4 x i32> % ; ; AVX512VBMI2-LABEL: splatvar_funnnel_v4i32: ; AVX512VBMI2: # %bb.0: -; AVX512VBMI2-NEXT: vpbroadcastd %xmm2, %xmm3 -; AVX512VBMI2-NEXT: vpbroadcastd {{.*#+}} xmm4 = [31,31,31,31] -; AVX512VBMI2-NEXT: vpand %xmm4, %xmm2, %xmm2 -; AVX512VBMI2-NEXT: vpmovzxdq {{.*#+}} xmm2 = xmm2[0],zero,xmm2[1],zero -; AVX512VBMI2-NEXT: vpsrld %xmm2, %xmm1, %xmm1 -; AVX512VBMI2-NEXT: vpandn %xmm4, %xmm3, %xmm2 -; AVX512VBMI2-NEXT: vpslld $1, %xmm0, %xmm0 -; AVX512VBMI2-NEXT: vpsllvd %xmm2, %xmm0, %xmm0 -; AVX512VBMI2-NEXT: vpor %xmm1, %xmm0, %xmm0 +; AVX512VBMI2-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 +; AVX512VBMI2-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 +; AVX512VBMI2-NEXT: vpbroadcastd %xmm2, %xmm2 +; AVX512VBMI2-NEXT: vpshrdvd %zmm2, %zmm0, %zmm1 +; AVX512VBMI2-NEXT: vmovdqa %xmm1, %xmm0 +; AVX512VBMI2-NEXT: vzeroupper ; AVX512VBMI2-NEXT: retq ; ; AVX512VLBW-LABEL: splatvar_funnnel_v4i32: @@ -1583,15 +1573,11 @@ define <8 x i16> @splatvar_funnnel_v8i16(<8 x i16> %x, <8 x i16> %y, <8 x i16> % ; ; AVX512VBMI2-LABEL: splatvar_funnnel_v8i16: ; AVX512VBMI2: # %bb.0: -; AVX512VBMI2-NEXT: vpbroadcastw %xmm2, %xmm3 -; AVX512VBMI2-NEXT: vpsllw $1, %xmm0, %xmm0 -; AVX512VBMI2-NEXT: vmovdqa {{.*#+}} xmm4 = [15,15,15,15,15,15,15,15] -; AVX512VBMI2-NEXT: vpandn %xmm4, %xmm3, %xmm3 -; AVX512VBMI2-NEXT: vpsllvw %zmm3, %zmm0, %zmm0 -; AVX512VBMI2-NEXT: vpand %xmm4, %xmm2, %xmm2 -; AVX512VBMI2-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero -; AVX512VBMI2-NEXT: vpsrlw %xmm2, %xmm1, %xmm1 -; AVX512VBMI2-NEXT: vpor %xmm1, %xmm0, %xmm0 +; AVX512VBMI2-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 +; AVX512VBMI2-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 +; AVX512VBMI2-NEXT: vpbroadcastw %xmm2, %xmm2 +; AVX512VBMI2-NEXT: vpshrdvw %zmm2, %zmm0, %zmm1 +; AVX512VBMI2-NEXT: vmovdqa %xmm1, %xmm0 ; AVX512VBMI2-NEXT: vzeroupper ; AVX512VBMI2-NEXT: retq ; @@ -2084,9 +2070,12 @@ define <2 x i64> @constant_funnnel_v2i64(<2 x i64> %x, <2 x i64> %y) nounwind { ; ; AVX512VBMI2-LABEL: constant_funnnel_v2i64: ; AVX512VBMI2: # %bb.0: -; AVX512VBMI2-NEXT: vpsrlvq {{.*}}(%rip), %xmm1, %xmm1 -; AVX512VBMI2-NEXT: vpsllvq {{.*}}(%rip), %xmm0, %xmm0 -; AVX512VBMI2-NEXT: vpor %xmm1, %xmm0, %xmm0 +; AVX512VBMI2-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 +; AVX512VBMI2-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 +; AVX512VBMI2-NEXT: vmovdqa {{.*#+}} xmm2 = [4,14] +; AVX512VBMI2-NEXT: vpshrdvq %zmm2, %zmm0, %zmm1 +; AVX512VBMI2-NEXT: vmovdqa %xmm1, %xmm0 +; AVX512VBMI2-NEXT: vzeroupper ; AVX512VBMI2-NEXT: retq ; ; AVX512VLBW-LABEL: constant_funnnel_v2i64: @@ -2223,9 +2212,12 @@ define <4 x i32> @constant_funnnel_v4i32(<4 x i32> %x, <4 x i32> %y) nounwind { ; ; AVX512VBMI2-LABEL: constant_funnnel_v4i32: ; AVX512VBMI2: # %bb.0: -; AVX512VBMI2-NEXT: vpsrlvd {{.*}}(%rip), %xmm1, %xmm1 -; AVX512VBMI2-NEXT: vpsllvd {{.*}}(%rip), %xmm0, %xmm0 -; AVX512VBMI2-NEXT: vpor %xmm1, %xmm0, %xmm0 +; AVX512VBMI2-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 +; AVX512VBMI2-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 +; AVX512VBMI2-NEXT: vmovdqa {{.*#+}} xmm2 = [4,5,6,7] +; AVX512VBMI2-NEXT: vpshrdvd %zmm2, %zmm0, %zmm1 +; AVX512VBMI2-NEXT: vmovdqa %xmm1, %xmm0 +; AVX512VBMI2-NEXT: vzeroupper ; AVX512VBMI2-NEXT: retq ; ; AVX512VLBW-LABEL: constant_funnnel_v4i32: @@ -2347,12 +2339,10 @@ define <8 x i16> @constant_funnnel_v8i16(<8 x i16> %x, <8 x i16> %y) nounwind { ; AVX512VBMI2-LABEL: constant_funnnel_v8i16: ; AVX512VBMI2: # %bb.0: ; AVX512VBMI2-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 +; AVX512VBMI2-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512VBMI2-NEXT: vmovdqa {{.*#+}} xmm2 = [0,1,2,3,4,5,6,7] -; AVX512VBMI2-NEXT: vpsrlvw %zmm2, %zmm1, %zmm1 -; AVX512VBMI2-NEXT: vmovdqa {{.*#+}} xmm2 = [15,14,13,12,11,10,9,8] -; AVX512VBMI2-NEXT: vpsllw $1, %xmm0, %xmm0 -; AVX512VBMI2-NEXT: vpsllvw %zmm2, %zmm0, %zmm0 -; AVX512VBMI2-NEXT: vpor %xmm1, %xmm0, %xmm0 +; AVX512VBMI2-NEXT: vpshrdvw %zmm2, %zmm0, %zmm1 +; AVX512VBMI2-NEXT: vmovdqa %xmm1, %xmm0 ; AVX512VBMI2-NEXT: vzeroupper ; AVX512VBMI2-NEXT: retq ; @@ -2635,9 +2625,11 @@ define <2 x i64> @splatconstant_funnnel_v2i64(<2 x i64> %x, <2 x i64> %y) nounwi ; ; AVX512VBMI2-LABEL: splatconstant_funnnel_v2i64: ; AVX512VBMI2: # %bb.0: -; AVX512VBMI2-NEXT: vpsrlq $14, %xmm1, %xmm1 -; AVX512VBMI2-NEXT: vpsllq $50, %xmm0, %xmm0 -; AVX512VBMI2-NEXT: vpor %xmm1, %xmm0, %xmm0 +; AVX512VBMI2-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 +; AVX512VBMI2-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 +; AVX512VBMI2-NEXT: vpshrdq $14, %zmm0, %zmm1, %zmm0 +; AVX512VBMI2-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 +; AVX512VBMI2-NEXT: vzeroupper ; AVX512VBMI2-NEXT: retq ; ; AVX512VLBW-LABEL: splatconstant_funnnel_v2i64: @@ -2709,9 +2701,11 @@ define <4 x i32> @splatconstant_funnnel_v4i32(<4 x i32> %x, <4 x i32> %y) nounwi ; ; AVX512VBMI2-LABEL: splatconstant_funnnel_v4i32: ; AVX512VBMI2: # %bb.0: -; AVX512VBMI2-NEXT: vpsrld $4, %xmm1, %xmm1 -; AVX512VBMI2-NEXT: vpslld $28, %xmm0, %xmm0 -; AVX512VBMI2-NEXT: vpor %xmm1, %xmm0, %xmm0 +; AVX512VBMI2-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 +; AVX512VBMI2-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 +; AVX512VBMI2-NEXT: vpshrdd $4, %zmm0, %zmm1, %zmm0 +; AVX512VBMI2-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 +; AVX512VBMI2-NEXT: vzeroupper ; AVX512VBMI2-NEXT: retq ; ; AVX512VLBW-LABEL: splatconstant_funnnel_v4i32: @@ -2781,9 +2775,11 @@ define <8 x i16> @splatconstant_funnnel_v8i16(<8 x i16> %x, <8 x i16> %y) nounwi ; ; AVX512VBMI2-LABEL: splatconstant_funnnel_v8i16: ; AVX512VBMI2: # %bb.0: -; AVX512VBMI2-NEXT: vpsrlw $7, %xmm1, %xmm1 -; AVX512VBMI2-NEXT: vpsllw $9, %xmm0, %xmm0 -; AVX512VBMI2-NEXT: vpor %xmm1, %xmm0, %xmm0 +; AVX512VBMI2-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 +; AVX512VBMI2-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 +; AVX512VBMI2-NEXT: vpshrdw $7, %zmm0, %zmm1, %zmm0 +; AVX512VBMI2-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 +; AVX512VBMI2-NEXT: vzeroupper ; AVX512VBMI2-NEXT: retq ; ; AVX512VLBW-LABEL: splatconstant_funnnel_v8i16: diff --git a/llvm/test/CodeGen/X86/vector-fshr-256.ll b/llvm/test/CodeGen/X86/vector-fshr-256.ll index 942a318..a1ab521 100644 --- a/llvm/test/CodeGen/X86/vector-fshr-256.ll +++ b/llvm/test/CodeGen/X86/vector-fshr-256.ll @@ -98,13 +98,11 @@ define <4 x i64> @var_funnnel_v4i64(<4 x i64> %x, <4 x i64> %y, <4 x i64> %amt) ; ; AVX512VBMI2-LABEL: var_funnnel_v4i64: ; AVX512VBMI2: # %bb.0: -; AVX512VBMI2-NEXT: vpbroadcastq {{.*#+}} ymm3 = [63,63,63,63] -; AVX512VBMI2-NEXT: vpand %ymm3, %ymm2, %ymm4 -; AVX512VBMI2-NEXT: vpsrlvq %ymm4, %ymm1, %ymm1 -; AVX512VBMI2-NEXT: vpandn %ymm3, %ymm2, %ymm2 -; AVX512VBMI2-NEXT: vpsllq $1, %ymm0, %ymm0 -; AVX512VBMI2-NEXT: vpsllvq %ymm2, %ymm0, %ymm0 -; AVX512VBMI2-NEXT: vpor %ymm1, %ymm0, %ymm0 +; AVX512VBMI2-NEXT: # kill: def $ymm2 killed $ymm2 def $zmm2 +; AVX512VBMI2-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 +; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 +; AVX512VBMI2-NEXT: vpshrdvq %zmm2, %zmm0, %zmm1 +; AVX512VBMI2-NEXT: vmovdqa %ymm1, %ymm0 ; AVX512VBMI2-NEXT: retq ; ; AVX512VLBW-LABEL: var_funnnel_v4i64: @@ -256,13 +254,11 @@ define <8 x i32> @var_funnnel_v8i32(<8 x i32> %x, <8 x i32> %y, <8 x i32> %amt) ; ; AVX512VBMI2-LABEL: var_funnnel_v8i32: ; AVX512VBMI2: # %bb.0: -; AVX512VBMI2-NEXT: vpbroadcastd {{.*#+}} ymm3 = [31,31,31,31,31,31,31,31] -; AVX512VBMI2-NEXT: vpand %ymm3, %ymm2, %ymm4 -; AVX512VBMI2-NEXT: vpsrlvd %ymm4, %ymm1, %ymm1 -; AVX512VBMI2-NEXT: vpandn %ymm3, %ymm2, %ymm2 -; AVX512VBMI2-NEXT: vpslld $1, %ymm0, %ymm0 -; AVX512VBMI2-NEXT: vpsllvd %ymm2, %ymm0, %ymm0 -; AVX512VBMI2-NEXT: vpor %ymm1, %ymm0, %ymm0 +; AVX512VBMI2-NEXT: # kill: def $ymm2 killed $ymm2 def $zmm2 +; AVX512VBMI2-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 +; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 +; AVX512VBMI2-NEXT: vpshrdvd %zmm2, %zmm0, %zmm1 +; AVX512VBMI2-NEXT: vmovdqa %ymm1, %ymm0 ; AVX512VBMI2-NEXT: retq ; ; AVX512VLBW-LABEL: var_funnnel_v8i32: @@ -459,14 +455,11 @@ define <16 x i16> @var_funnnel_v16i16(<16 x i16> %x, <16 x i16> %y, <16 x i16> % ; ; AVX512VBMI2-LABEL: var_funnnel_v16i16: ; AVX512VBMI2: # %bb.0: +; AVX512VBMI2-NEXT: # kill: def $ymm2 killed $ymm2 def $zmm2 ; AVX512VBMI2-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 -; AVX512VBMI2-NEXT: vmovdqa {{.*#+}} ymm3 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; AVX512VBMI2-NEXT: vpand %ymm3, %ymm2, %ymm4 -; AVX512VBMI2-NEXT: vpsrlvw %zmm4, %zmm1, %zmm1 -; AVX512VBMI2-NEXT: vpandn %ymm3, %ymm2, %ymm2 -; AVX512VBMI2-NEXT: vpsllw $1, %ymm0, %ymm0 -; AVX512VBMI2-NEXT: vpsllvw %zmm2, %zmm0, %zmm0 -; AVX512VBMI2-NEXT: vpor %ymm1, %ymm0, %ymm0 +; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 +; AVX512VBMI2-NEXT: vpshrdvw %zmm2, %zmm0, %zmm1 +; AVX512VBMI2-NEXT: vmovdqa %ymm1, %ymm0 ; AVX512VBMI2-NEXT: retq ; ; AVX512VLBW-LABEL: var_funnnel_v16i16: @@ -890,14 +883,11 @@ define <4 x i64> @splatvar_funnnel_v4i64(<4 x i64> %x, <4 x i64> %y, <4 x i64> % ; ; AVX512VBMI2-LABEL: splatvar_funnnel_v4i64: ; AVX512VBMI2: # %bb.0: -; AVX512VBMI2-NEXT: vpbroadcastq %xmm2, %ymm3 -; AVX512VBMI2-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2 -; AVX512VBMI2-NEXT: vpsrlq %xmm2, %ymm1, %ymm1 -; AVX512VBMI2-NEXT: vpbroadcastq {{.*#+}} ymm2 = [63,63,63,63] -; AVX512VBMI2-NEXT: vpandn %ymm2, %ymm3, %ymm2 -; AVX512VBMI2-NEXT: vpsllq $1, %ymm0, %ymm0 -; AVX512VBMI2-NEXT: vpsllvq %ymm2, %ymm0, %ymm0 -; AVX512VBMI2-NEXT: vpor %ymm1, %ymm0, %ymm0 +; AVX512VBMI2-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 +; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 +; AVX512VBMI2-NEXT: vpbroadcastq %xmm2, %ymm2 +; AVX512VBMI2-NEXT: vpshrdvq %zmm2, %zmm0, %zmm1 +; AVX512VBMI2-NEXT: vmovdqa %ymm1, %ymm0 ; AVX512VBMI2-NEXT: retq ; ; AVX512VLBW-LABEL: splatvar_funnnel_v4i64: @@ -1037,15 +1027,11 @@ define <8 x i32> @splatvar_funnnel_v8i32(<8 x i32> %x, <8 x i32> %y, <8 x i32> % ; ; AVX512VBMI2-LABEL: splatvar_funnnel_v8i32: ; AVX512VBMI2: # %bb.0: +; AVX512VBMI2-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 +; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; AVX512VBMI2-NEXT: vpbroadcastd %xmm2, %ymm2 -; AVX512VBMI2-NEXT: vpbroadcastd {{.*#+}} ymm3 = [31,31,31,31,31,31,31,31] -; AVX512VBMI2-NEXT: vpandn %ymm3, %ymm2, %ymm4 -; AVX512VBMI2-NEXT: vpslld $1, %ymm0, %ymm0 -; AVX512VBMI2-NEXT: vpsllvd %ymm4, %ymm0, %ymm0 -; AVX512VBMI2-NEXT: vpand %xmm3, %xmm2, %xmm2 -; AVX512VBMI2-NEXT: vpmovzxdq {{.*#+}} xmm2 = xmm2[0],zero,xmm2[1],zero -; AVX512VBMI2-NEXT: vpsrld %xmm2, %ymm1, %ymm1 -; AVX512VBMI2-NEXT: vpor %ymm1, %ymm0, %ymm0 +; AVX512VBMI2-NEXT: vpshrdvd %zmm2, %zmm0, %zmm1 +; AVX512VBMI2-NEXT: vmovdqa %ymm1, %ymm0 ; AVX512VBMI2-NEXT: retq ; ; AVX512VLBW-LABEL: splatvar_funnnel_v8i32: @@ -1213,14 +1199,11 @@ define <16 x i16> @splatvar_funnnel_v16i16(<16 x i16> %x, <16 x i16> %y, <16 x i ; ; AVX512VBMI2-LABEL: splatvar_funnnel_v16i16: ; AVX512VBMI2: # %bb.0: +; AVX512VBMI2-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 +; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; AVX512VBMI2-NEXT: vpbroadcastw %xmm2, %ymm2 -; AVX512VBMI2-NEXT: vpsllw $1, %ymm0, %ymm0 -; AVX512VBMI2-NEXT: vpandn {{.*}}(%rip), %ymm2, %ymm3 -; AVX512VBMI2-NEXT: vpsllvw %zmm3, %zmm0, %zmm0 -; AVX512VBMI2-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2 -; AVX512VBMI2-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero -; AVX512VBMI2-NEXT: vpsrlw %xmm2, %ymm1, %ymm1 -; AVX512VBMI2-NEXT: vpor %ymm1, %ymm0, %ymm0 +; AVX512VBMI2-NEXT: vpshrdvw %zmm2, %zmm0, %zmm1 +; AVX512VBMI2-NEXT: vmovdqa %ymm1, %ymm0 ; AVX512VBMI2-NEXT: retq ; ; AVX512VLBW-LABEL: splatvar_funnnel_v16i16: @@ -1586,9 +1569,11 @@ define <4 x i64> @constant_funnnel_v4i64(<4 x i64> %x, <4 x i64> %y) nounwind { ; ; AVX512VBMI2-LABEL: constant_funnnel_v4i64: ; AVX512VBMI2: # %bb.0: -; AVX512VBMI2-NEXT: vpsrlvq {{.*}}(%rip), %ymm1, %ymm1 -; AVX512VBMI2-NEXT: vpsllvq {{.*}}(%rip), %ymm0, %ymm0 -; AVX512VBMI2-NEXT: vpor %ymm1, %ymm0, %ymm0 +; AVX512VBMI2-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 +; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 +; AVX512VBMI2-NEXT: vmovdqa {{.*#+}} ymm2 = [4,14,50,60] +; AVX512VBMI2-NEXT: vpshrdvq %zmm2, %zmm0, %zmm1 +; AVX512VBMI2-NEXT: vmovdqa %ymm1, %ymm0 ; AVX512VBMI2-NEXT: retq ; ; AVX512VLBW-LABEL: constant_funnnel_v4i64: @@ -1683,9 +1668,11 @@ define <8 x i32> @constant_funnnel_v8i32(<8 x i32> %x, <8 x i32> %y) nounwind { ; ; AVX512VBMI2-LABEL: constant_funnnel_v8i32: ; AVX512VBMI2: # %bb.0: -; AVX512VBMI2-NEXT: vpsrlvd {{.*}}(%rip), %ymm1, %ymm1 -; AVX512VBMI2-NEXT: vpsllvd {{.*}}(%rip), %ymm0, %ymm0 -; AVX512VBMI2-NEXT: vpor %ymm1, %ymm0, %ymm0 +; AVX512VBMI2-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 +; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 +; AVX512VBMI2-NEXT: vmovdqa {{.*#+}} ymm2 = [4,5,6,7,8,9,10,11] +; AVX512VBMI2-NEXT: vpshrdvd %zmm2, %zmm0, %zmm1 +; AVX512VBMI2-NEXT: vmovdqa %ymm1, %ymm0 ; AVX512VBMI2-NEXT: retq ; ; AVX512VLBW-LABEL: constant_funnnel_v8i32: @@ -1785,12 +1772,10 @@ define <16 x i16> @constant_funnnel_v16i16(<16 x i16> %x, <16 x i16> %y) nounwin ; AVX512VBMI2-LABEL: constant_funnnel_v16i16: ; AVX512VBMI2: # %bb.0: ; AVX512VBMI2-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 +; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; AVX512VBMI2-NEXT: vmovdqa {{.*#+}} ymm2 = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15] -; AVX512VBMI2-NEXT: vpsrlvw %zmm2, %zmm1, %zmm1 -; AVX512VBMI2-NEXT: vmovdqa {{.*#+}} ymm2 = [15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0] -; AVX512VBMI2-NEXT: vpsllw $1, %ymm0, %ymm0 -; AVX512VBMI2-NEXT: vpsllvw %zmm2, %zmm0, %zmm0 -; AVX512VBMI2-NEXT: vpor %ymm1, %ymm0, %ymm0 +; AVX512VBMI2-NEXT: vpshrdvw %zmm2, %zmm0, %zmm1 +; AVX512VBMI2-NEXT: vmovdqa %ymm1, %ymm0 ; AVX512VBMI2-NEXT: retq ; ; AVX512VLBW-LABEL: constant_funnnel_v16i16: @@ -2087,9 +2072,10 @@ define <4 x i64> @splatconstant_funnnel_v4i64(<4 x i64> %x, <4 x i64> %y) nounwi ; ; AVX512VBMI2-LABEL: splatconstant_funnnel_v4i64: ; AVX512VBMI2: # %bb.0: -; AVX512VBMI2-NEXT: vpsrlq $14, %ymm1, %ymm1 -; AVX512VBMI2-NEXT: vpsllq $50, %ymm0, %ymm0 -; AVX512VBMI2-NEXT: vpor %ymm1, %ymm0, %ymm0 +; AVX512VBMI2-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 +; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 +; AVX512VBMI2-NEXT: vpshrdq $14, %zmm0, %zmm1, %zmm0 +; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 ; AVX512VBMI2-NEXT: retq ; ; AVX512VLBW-LABEL: splatconstant_funnnel_v4i64: @@ -2171,9 +2157,10 @@ define <8 x i32> @splatconstant_funnnel_v8i32(<8 x i32> %x, <8 x i32> %y) nounwi ; ; AVX512VBMI2-LABEL: splatconstant_funnnel_v8i32: ; AVX512VBMI2: # %bb.0: -; AVX512VBMI2-NEXT: vpsrld $4, %ymm1, %ymm1 -; AVX512VBMI2-NEXT: vpslld $28, %ymm0, %ymm0 -; AVX512VBMI2-NEXT: vpor %ymm1, %ymm0, %ymm0 +; AVX512VBMI2-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 +; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 +; AVX512VBMI2-NEXT: vpshrdd $4, %zmm0, %zmm1, %zmm0 +; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 ; AVX512VBMI2-NEXT: retq ; ; AVX512VLBW-LABEL: splatconstant_funnnel_v8i32: @@ -2255,9 +2242,10 @@ define <16 x i16> @splatconstant_funnnel_v16i16(<16 x i16> %x, <16 x i16> %y) no ; ; AVX512VBMI2-LABEL: splatconstant_funnnel_v16i16: ; AVX512VBMI2: # %bb.0: -; AVX512VBMI2-NEXT: vpsrlw $7, %ymm1, %ymm1 -; AVX512VBMI2-NEXT: vpsllw $9, %ymm0, %ymm0 -; AVX512VBMI2-NEXT: vpor %ymm1, %ymm0, %ymm0 +; AVX512VBMI2-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 +; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 +; AVX512VBMI2-NEXT: vpshrdw $7, %zmm0, %zmm1, %zmm0 +; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 ; AVX512VBMI2-NEXT: retq ; ; AVX512VLBW-LABEL: splatconstant_funnnel_v16i16: